update README

This commit is contained in:
niamtokik
2023-08-12 15:40:54 +00:00
parent a7676fe467
commit 8c11d044e5
2 changed files with 128 additions and 0 deletions

101
README.md
View File

@@ -46,3 +46,104 @@ few examples:
- keep our own local atom table containing all atom deserialized. A
soft/hard limit can be set.
## How really? Is it serious?
In fact, a simple solution already exists, using the option `safe` or
`used` when using
[`binary_to_term/2`](https://www.erlang.org/doc/man/erlang.html#binary_to_term-2). It
will protect you from creating non-existing atoms, but how many
projects are using that?
- [`mojombo/bert.erl`](https://github.com/mojombo/bert.erl):
https://github.com/mojombo/bert.erl/blob/master/src/bert.erl#L25
```erlang
-spec decode(binary()) -> term().
decode(Bin) ->
decode_term(binary_to_term(Bin)).
```
- [`mojombo/ernie`](https://github.com/mojombo/ernie):
https://github.com/mojombo/ernie/blob/master/elib/ernie_server.erl#L178
```erlang
receive_term(Request, State) ->
Sock = Request#request.sock,
case gen_tcp:recv(Sock, 0) of
{ok, BinaryTerm} ->
logger:debug("Got binary term: ~p~n", [BinaryTerm]),
Term = binary_to_term(BinaryTerm),
```
- [`sync/n2o`](https://github.com/synrc/n2o):
https://github.com/synrc/n2o/blob/master/src/services/n2o_bert.erl#L8
```erlang
encode(#ftp{}=FTP) -> term_to_binary(setelement(1,FTP,ftpack));
encode(Term) -> term_to_binary(Term).
decode(Bin) -> binary_to_term(Bin).
```
- [`ferd/bertconf`](https://github.com/ferd/bertconf):
https://github.com/ferd/bertconf/blob/master/src/bertconf_lib.erl#L10
```erlang
decode(Bin) ->
try validate(binary_to_term(Bin)) of
Terms -> {ok, Terms}
catch
throw:Reason -> {error, Reason}
end.
```
- [`a13x/aberth`](https://github.com/a13x/aberth):
https://github.com/a13x/aberth/blob/master/src/bert.erl#L25
```erlang
-spec decode(binary()) -> term().
decode(Bin) ->
decode_term(binary_to_term(Bin)).
```
- [`yuce/bert.erl`](https://github.com/yuce/bert.erl):
https://github.com/yuce/bert.erl/blob/master/src/bert.erl#L24
```erlang
-spec decode(binary()) -> term().
decode(Bin) ->
decode_term(binary_to_term(Bin)).
```
- And probably many more like this search on
[`searchcode.com`](https://searchcode.com/?lan=25&q=binary_to_term)
or
[`github.com`](https://github.com/search?q=binary_to_term+language%3AErlang&type=code&l=Erlang)
suggest.
It's highly probable lot of those functions are hard to call, but it
could be the case. In situation where unknown data are coming,
`erlang:binary_to_term/1` and even `erlang:binary_to_term/2` should be
avoided or carefully used.
## Why am I not aware of that?
Few articles[^erlef-atom-exhaustion][^paraxial-atom-dos] have been
created in the past to explain these problems.
[^erlef-atom-exhaustion]: https://erlef.github.io/security-wg/secure_coding_and_deployment_hardening/atom_exhaustion.html
[^paraxial-atom-dos]: https://paraxial.io/blog/atom-dos
## How to fix the root cause?
The problem is from atoms, at least one
paper[^atom-garbage-collection] talked about that. Fixing the garbage
collection issue could help a lot, but if it's not possible for many
reason, using an high level implementation of ETF with some way to
control what kind of data are coming might be an "okayish" solution.
[^atom-garbage-collection]: Atom garbage collection by Thomas Lindgren, https://dl.acm.org/doi/10.1145/1088361.1088369

View File

@@ -265,6 +265,24 @@ decode_terms(<<Code, _/binary>>, _Opts, _Buffer) ->
%% {ok, "ok"}
%% '''
%%
%% Finally, a simple atom whitelist. If an atom (defined as binary) is
%% not present in the list, it is returned as binary, else, it is
%% converted as atoms.
%%
%% ```
%% Binary = [1,2,3,[4,5,6,{1,2,3}, "test", atom]].
%%
%% % with an empty whitelist
%% {ok,[1,2,3,[4,5,6,{1,2,3},"test",<<"atom">>]],<<>>}
%% = berty:decode(Binary, #{ atoms => {whitelist, []}}).
%% % Warning: ATOM_EXT is deprecated
%% % Warning: Atom <<"atom">> not in whitelist
%%
%% {ok,[1,2,3,[4,5,6,{1,2,3},"test",atom]],<<>>}
%% = berty:decode(Binary, #{ atoms => {whitelist, [<<"atom">>]}}).
%% % Warning: ATOM_EXT is deprecated
%% '''
%%
%% @end
%%--------------------------------------------------------------------
-spec decode_atoms(Binary, Opts) -> Return when
@@ -280,6 +298,15 @@ decode_terms(<<Code, _/binary>>, _Opts, _Buffer) ->
Return :: binary() | string() | atom().
% @TODO add utf8 support for atoms
decode_atoms(Binary, #{ atoms := {whitelist, Whitelist} })
when is_list(Whitelist) ->
case lists:member(Binary, Whitelist) of
true ->
erlang:binary_to_atom(Binary);
false ->
?LOG_WARNING("Atom ~p not in whitelist", [Binary]),
Binary
end;
decode_atoms(Binary, #{ atoms := create }) ->
erlang:binary_to_atom(Binary);
decode_atoms(Binary, #{ atoms := {create, Limit} })