diff --git a/README.md b/README.md index c34a3a9..b6a88b0 100644 --- a/README.md +++ b/README.md @@ -46,3 +46,104 @@ few examples: - keep our own local atom table containing all atom deserialized. A soft/hard limit can be set. + +## How really? Is it serious? + +In fact, a simple solution already exists, using the option `safe` or +`used` when using +[`binary_to_term/2`](https://www.erlang.org/doc/man/erlang.html#binary_to_term-2). It +will protect you from creating non-existing atoms, but how many +projects are using that? + +- [`mojombo/bert.erl`](https://github.com/mojombo/bert.erl): + https://github.com/mojombo/bert.erl/blob/master/src/bert.erl#L25 + + ```erlang + -spec decode(binary()) -> term(). + + decode(Bin) -> + decode_term(binary_to_term(Bin)). + + ``` + +- [`mojombo/ernie`](https://github.com/mojombo/ernie): + https://github.com/mojombo/ernie/blob/master/elib/ernie_server.erl#L178 + + ```erlang + receive_term(Request, State) -> + Sock = Request#request.sock, + case gen_tcp:recv(Sock, 0) of + {ok, BinaryTerm} -> + logger:debug("Got binary term: ~p~n", [BinaryTerm]), + Term = binary_to_term(BinaryTerm), + ``` + +- [`sync/n2o`](https://github.com/synrc/n2o): + https://github.com/synrc/n2o/blob/master/src/services/n2o_bert.erl#L8 + + ```erlang + encode(#ftp{}=FTP) -> term_to_binary(setelement(1,FTP,ftpack)); + encode(Term) -> term_to_binary(Term). + decode(Bin) -> binary_to_term(Bin). + ``` + +- [`ferd/bertconf`](https://github.com/ferd/bertconf): + https://github.com/ferd/bertconf/blob/master/src/bertconf_lib.erl#L10 + + ```erlang + decode(Bin) -> + try validate(binary_to_term(Bin)) of + Terms -> {ok, Terms} + catch + throw:Reason -> {error, Reason} + end. + ``` + +- [`a13x/aberth`](https://github.com/a13x/aberth): + https://github.com/a13x/aberth/blob/master/src/bert.erl#L25 + + ```erlang + -spec decode(binary()) -> term(). + + decode(Bin) -> + decode_term(binary_to_term(Bin)). + ``` + + +- [`yuce/bert.erl`](https://github.com/yuce/bert.erl): + https://github.com/yuce/bert.erl/blob/master/src/bert.erl#L24 + + ```erlang + -spec decode(binary()) -> term(). + decode(Bin) -> + decode_term(binary_to_term(Bin)). + ``` + +- And probably many more like this search on + [`searchcode.com`](https://searchcode.com/?lan=25&q=binary_to_term) + or + [`github.com`](https://github.com/search?q=binary_to_term+language%3AErlang&type=code&l=Erlang) + suggest. + +It's highly probable lot of those functions are hard to call, but it +could be the case. In situation where unknown data are coming, +`erlang:binary_to_term/1` and even `erlang:binary_to_term/2` should be +avoided or carefully used. + +## Why am I not aware of that? + +Few articles[^erlef-atom-exhaustion][^paraxial-atom-dos] have been +created in the past to explain these problems. + +[^erlef-atom-exhaustion]: https://erlef.github.io/security-wg/secure_coding_and_deployment_hardening/atom_exhaustion.html +[^paraxial-atom-dos]: https://paraxial.io/blog/atom-dos + +## How to fix the root cause? + +The problem is from atoms, at least one +paper[^atom-garbage-collection] talked about that. Fixing the garbage +collection issue could help a lot, but if it's not possible for many +reason, using an high level implementation of ETF with some way to +control what kind of data are coming might be an "okayish" solution. + +[^atom-garbage-collection]: Atom garbage collection by Thomas Lindgren, https://dl.acm.org/doi/10.1145/1088361.1088369 diff --git a/src/berty.erl b/src/berty.erl index 05897b4..d7c96e1 100644 --- a/src/berty.erl +++ b/src/berty.erl @@ -265,6 +265,24 @@ decode_terms(<>, _Opts, _Buffer) -> %% {ok, "ok"} %% ''' %% +%% Finally, a simple atom whitelist. If an atom (defined as binary) is +%% not present in the list, it is returned as binary, else, it is +%% converted as atoms. +%% +%% ``` +%% Binary = [1,2,3,[4,5,6,{1,2,3}, "test", atom]]. +%% +%% % with an empty whitelist +%% {ok,[1,2,3,[4,5,6,{1,2,3},"test",<<"atom">>]],<<>>} +%% = berty:decode(Binary, #{ atoms => {whitelist, []}}). +%% % Warning: ATOM_EXT is deprecated +%% % Warning: Atom <<"atom">> not in whitelist +%% +%% {ok,[1,2,3,[4,5,6,{1,2,3},"test",atom]],<<>>} +%% = berty:decode(Binary, #{ atoms => {whitelist, [<<"atom">>]}}). +%% % Warning: ATOM_EXT is deprecated +%% ''' +%% %% @end %%-------------------------------------------------------------------- -spec decode_atoms(Binary, Opts) -> Return when @@ -280,6 +298,15 @@ decode_terms(<>, _Opts, _Buffer) -> Return :: binary() | string() | atom(). % @TODO add utf8 support for atoms +decode_atoms(Binary, #{ atoms := {whitelist, Whitelist} }) + when is_list(Whitelist) -> + case lists:member(Binary, Whitelist) of + true -> + erlang:binary_to_atom(Binary); + false -> + ?LOG_WARNING("Atom ~p not in whitelist", [Binary]), + Binary + end; decode_atoms(Binary, #{ atoms := create }) -> erlang:binary_to_atom(Binary); decode_atoms(Binary, #{ atoms := {create, Limit} })