update README
This commit is contained in:
101
README.md
101
README.md
@@ -46,3 +46,104 @@ few examples:
|
||||
|
||||
- keep our own local atom table containing all atom deserialized. A
|
||||
soft/hard limit can be set.
|
||||
|
||||
## How really? Is it serious?
|
||||
|
||||
In fact, a simple solution already exists, using the option `safe` or
|
||||
`used` when using
|
||||
[`binary_to_term/2`](https://www.erlang.org/doc/man/erlang.html#binary_to_term-2). It
|
||||
will protect you from creating non-existing atoms, but how many
|
||||
projects are using that?
|
||||
|
||||
- [`mojombo/bert.erl`](https://github.com/mojombo/bert.erl):
|
||||
https://github.com/mojombo/bert.erl/blob/master/src/bert.erl#L25
|
||||
|
||||
```erlang
|
||||
-spec decode(binary()) -> term().
|
||||
|
||||
decode(Bin) ->
|
||||
decode_term(binary_to_term(Bin)).
|
||||
|
||||
```
|
||||
|
||||
- [`mojombo/ernie`](https://github.com/mojombo/ernie):
|
||||
https://github.com/mojombo/ernie/blob/master/elib/ernie_server.erl#L178
|
||||
|
||||
```erlang
|
||||
receive_term(Request, State) ->
|
||||
Sock = Request#request.sock,
|
||||
case gen_tcp:recv(Sock, 0) of
|
||||
{ok, BinaryTerm} ->
|
||||
logger:debug("Got binary term: ~p~n", [BinaryTerm]),
|
||||
Term = binary_to_term(BinaryTerm),
|
||||
```
|
||||
|
||||
- [`sync/n2o`](https://github.com/synrc/n2o):
|
||||
https://github.com/synrc/n2o/blob/master/src/services/n2o_bert.erl#L8
|
||||
|
||||
```erlang
|
||||
encode(#ftp{}=FTP) -> term_to_binary(setelement(1,FTP,ftpack));
|
||||
encode(Term) -> term_to_binary(Term).
|
||||
decode(Bin) -> binary_to_term(Bin).
|
||||
```
|
||||
|
||||
- [`ferd/bertconf`](https://github.com/ferd/bertconf):
|
||||
https://github.com/ferd/bertconf/blob/master/src/bertconf_lib.erl#L10
|
||||
|
||||
```erlang
|
||||
decode(Bin) ->
|
||||
try validate(binary_to_term(Bin)) of
|
||||
Terms -> {ok, Terms}
|
||||
catch
|
||||
throw:Reason -> {error, Reason}
|
||||
end.
|
||||
```
|
||||
|
||||
- [`a13x/aberth`](https://github.com/a13x/aberth):
|
||||
https://github.com/a13x/aberth/blob/master/src/bert.erl#L25
|
||||
|
||||
```erlang
|
||||
-spec decode(binary()) -> term().
|
||||
|
||||
decode(Bin) ->
|
||||
decode_term(binary_to_term(Bin)).
|
||||
```
|
||||
|
||||
|
||||
- [`yuce/bert.erl`](https://github.com/yuce/bert.erl):
|
||||
https://github.com/yuce/bert.erl/blob/master/src/bert.erl#L24
|
||||
|
||||
```erlang
|
||||
-spec decode(binary()) -> term().
|
||||
decode(Bin) ->
|
||||
decode_term(binary_to_term(Bin)).
|
||||
```
|
||||
|
||||
- And probably many more like this search on
|
||||
[`searchcode.com`](https://searchcode.com/?lan=25&q=binary_to_term)
|
||||
or
|
||||
[`github.com`](https://github.com/search?q=binary_to_term+language%3AErlang&type=code&l=Erlang)
|
||||
suggest.
|
||||
|
||||
It's highly probable lot of those functions are hard to call, but it
|
||||
could be the case. In situation where unknown data are coming,
|
||||
`erlang:binary_to_term/1` and even `erlang:binary_to_term/2` should be
|
||||
avoided or carefully used.
|
||||
|
||||
## Why am I not aware of that?
|
||||
|
||||
Few articles[^erlef-atom-exhaustion][^paraxial-atom-dos] have been
|
||||
created in the past to explain these problems.
|
||||
|
||||
[^erlef-atom-exhaustion]: https://erlef.github.io/security-wg/secure_coding_and_deployment_hardening/atom_exhaustion.html
|
||||
[^paraxial-atom-dos]: https://paraxial.io/blog/atom-dos
|
||||
|
||||
## How to fix the root cause?
|
||||
|
||||
The problem is from atoms, at least one
|
||||
paper[^atom-garbage-collection] talked about that. Fixing the garbage
|
||||
collection issue could help a lot, but if it's not possible for many
|
||||
reason, using an high level implementation of ETF with some way to
|
||||
control what kind of data are coming might be an "okayish" solution.
|
||||
|
||||
[^atom-garbage-collection]: Atom garbage collection by Thomas Lindgren, https://dl.acm.org/doi/10.1145/1088361.1088369
|
||||
|
||||
@@ -265,6 +265,24 @@ decode_terms(<<Code, _/binary>>, _Opts, _Buffer) ->
|
||||
%% {ok, "ok"}
|
||||
%% '''
|
||||
%%
|
||||
%% Finally, a simple atom whitelist. If an atom (defined as binary) is
|
||||
%% not present in the list, it is returned as binary, else, it is
|
||||
%% converted as atoms.
|
||||
%%
|
||||
%% ```
|
||||
%% Binary = [1,2,3,[4,5,6,{1,2,3}, "test", atom]].
|
||||
%%
|
||||
%% % with an empty whitelist
|
||||
%% {ok,[1,2,3,[4,5,6,{1,2,3},"test",<<"atom">>]],<<>>}
|
||||
%% = berty:decode(Binary, #{ atoms => {whitelist, []}}).
|
||||
%% % Warning: ATOM_EXT is deprecated
|
||||
%% % Warning: Atom <<"atom">> not in whitelist
|
||||
%%
|
||||
%% {ok,[1,2,3,[4,5,6,{1,2,3},"test",atom]],<<>>}
|
||||
%% = berty:decode(Binary, #{ atoms => {whitelist, [<<"atom">>]}}).
|
||||
%% % Warning: ATOM_EXT is deprecated
|
||||
%% '''
|
||||
%%
|
||||
%% @end
|
||||
%%--------------------------------------------------------------------
|
||||
-spec decode_atoms(Binary, Opts) -> Return when
|
||||
@@ -280,6 +298,15 @@ decode_terms(<<Code, _/binary>>, _Opts, _Buffer) ->
|
||||
Return :: binary() | string() | atom().
|
||||
|
||||
% @TODO add utf8 support for atoms
|
||||
decode_atoms(Binary, #{ atoms := {whitelist, Whitelist} })
|
||||
when is_list(Whitelist) ->
|
||||
case lists:member(Binary, Whitelist) of
|
||||
true ->
|
||||
erlang:binary_to_atom(Binary);
|
||||
false ->
|
||||
?LOG_WARNING("Atom ~p not in whitelist", [Binary]),
|
||||
Binary
|
||||
end;
|
||||
decode_atoms(Binary, #{ atoms := create }) ->
|
||||
erlang:binary_to_atom(Binary);
|
||||
decode_atoms(Binary, #{ atoms := {create, Limit} })
|
||||
|
||||
Reference in New Issue
Block a user