diff --git a/FUTURE.md b/FUTURE.md new file mode 100644 index 0000000..e6d722d --- /dev/null +++ b/FUTURE.md @@ -0,0 +1,39 @@ +# Future implementations and Ideas + +## ETF Data Description + +Describe a term or ETF encoded data to show what kind of terms are +present in it. + +```erlang +{ok, {list_ext, 3, [ small_integer_ext + , small_integer_ext + , small_integer_ext] + }} + = berty:describe([1,2,3]). + +{ok, {tuple_ext, 2, [ atom_ext + , string_ext + ]}} + = berty:describe({ok, "test"}). +``` + +## ETF Data Analysis + +Take a list of terms or ETF encoded data to analyze them by creating a +summary of the different kind of terms present in them. + +``` +{ok, {tuple, 2, [[{atom_ext, 1},{atom_ext, 1}] + ,[{integer_ext, 1},{string_ext, 1} + ]]}} + = berty:analyze([{ok, 123},{error,"test}]). +``` + +## ETF Path + +An easy way to extract only one term, similar to xmlpath or jsonpath. + +## ETF Schema + +A schema to validate ETF data received. diff --git a/README.md b/README.md index d5759b6..85697a0 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,114 @@ A clean, safe and flexible implementation of BERT, a data-structure format inspired by Erlang ETF. +This project is in active development, and should not be used in +production yet. + +## Features + +Primary features: + + - [x] High level implementation of ETF in pure Erlang + - [ ] Atoms protection and limitation + - [ ] Fine grained filtering based on type + - [ ] Callback function or MFA + - [ ] Fallback to `binary_to_term` function on demand + - [ ] Drop terms on demande + - [ ] Term size limitation + - [ ] Custom options for term + - [ ] Property based testing + - [ ] BERT parser subset + - [ ] Depth type protection + - [ ] Fully documented + - [ ] +90% coverage + - [ ] 100% compatible with standard ETF + - [ ] 100% compatible with BERT + +Secondary features: + + - [ ] Global or fine grained statistics + - [ ] Profiling and benchmarking facilities + - [ ] Logging facilities + - [ ] Tracing facilities + - [ ] ETF path + - [ ] ETF schema + - [ ] Custom parser subset based on behaviors + - [ ] ETF as stream of data + - [ ] Usage example with ETF, BERT and/or custom parser + +## Usage + +Berty was created to easily replace `binary_to_term/1` and +`binary_to_term/2` built-in functions. In fact, the implementation is +transparent in many cases. The big idea is to protect your system from +outside, in particular atom and memory exhaution. + +```erlang +% create an atom from scratch +Atom = term_to_binary(test). + +% An atom is automatically converted as binary +{ok, <<"test">>} + = berty:decode(Atom). + +% different methods can be used to deal with atoms. +{ok, test} + = berty:decode(Atom, #{ atoms => {create, 0.2, warning} }). + +% Other terms are supported +Terms = term_to_binary([{ok,1.0,"test",<<>>}]), +{ok, [{ok,1.0,"test",<<>>}]} + = berty:decode(Terms). +``` + +More features are present, for example, dropping terms or creating +custom callbacks. + +```erlang +Lists = term_to_binary([1024,<<>>,"test"]). + +% let drop all integers +{ok, [<<>>, "test"]} + = berty:decode(Lists, #{ integer_ext => drop + , small_integer_ext => drop + }). + +% let create a custom callback +Callback = fun + (_Term, Rest) -> + {ok, doh, Rest} +end. +{ok, [doh, <<>>, "test"]} + = berty:decode(Lists, #{ integer_ext => {callback, Callback} + , small_integer_ext => {callback, Callback} + }). + +% let create another one. +Callback2 = fun + (Term, Rest) when 1024 =:= Term -> + logger:warning("catch term ~p", [1024]), + {ok, Term, Rest}; + (Term, Rest) -> {ok, Term, Rest} +end. + +{ok, [1024, <<>>, "test"]} + = berty:decode(Lists, #{ integer_ext => {callback, Callback2} + , small_integer_ext => {callback, Callback2} + }). +``` + +Those are simple examples, more features are present and will be +added. Here the most important functions: + + - `berty:decode/1`: standard BERT decoder with default options + - `berty:decode/2`: standard BERT decoder with custom options + - `berty:decode/3`: custom decoder with custom options + - `berty:encode/1`: standard BERT encoder with default options + - `berty:encode/2`: standard BERT encoder with custom options + - `berty:encode/3`: custom encoder with custom options + - `berty:binary_to_term/1`: wrapper around `binary_to_term/1` + - `berty:term_to_binary/1`: wrapper around `term_to_binary/1` + ## Build ```sh @@ -61,7 +169,7 @@ few examples: terms are deserialized, atoms can be (1) converted in existing atom (2) converted in binary or list (3) simply dropped or replaced with something to alert the VM this part of the data is dangerous. - + - keep our own local atom table containing all atom deserialized. A soft/hard limit can be set. @@ -78,15 +186,15 @@ projects are using that? ```erlang -spec decode(binary()) -> term(). - + decode(Bin) -> decode_term(binary_to_term(Bin)). - + ``` - [`mojombo/ernie`](https://github.com/mojombo/ernie): https://github.com/mojombo/ernie/blob/master/elib/ernie_server.erl#L178 - + ```erlang receive_term(Request, State) -> Sock = Request#request.sock, @@ -98,7 +206,7 @@ projects are using that? - [`sync/n2o`](https://github.com/synrc/n2o): https://github.com/synrc/n2o/blob/master/src/services/n2o_bert.erl#L8 - + ```erlang encode(#ftp{}=FTP) -> term_to_binary(setelement(1,FTP,ftpack)); encode(Term) -> term_to_binary(Term). @@ -119,14 +227,14 @@ projects are using that? - [`a13x/aberth`](https://github.com/a13x/aberth): https://github.com/a13x/aberth/blob/master/src/bert.erl#L25 - + ```erlang -spec decode(binary()) -> term(). - + decode(Bin) -> decode_term(binary_to_term(Bin)). ``` - + - [`yuce/bert.erl`](https://github.com/yuce/bert.erl): https://github.com/yuce/bert.erl/blob/master/src/bert.erl#L24 @@ -167,15 +275,15 @@ deal with atoms and divide the problem in half: 1. create fixed atom store containing only atoms from source code (Erlang release and project), this one can't be increased. - + 2. create a second atom store containing dynamically created atoms during runtime, this one can be increased. - + What I worry about is when dealing with mnesia. What could happen if someone create more than 2M unwanted atoms added in Mnesia or DETS? What kind of behavior the cluster will have? And how to fix that if it's critical. - + Unfortunately, I think it will totally break atom performance, but it could be an interesting project to learn how Erlang BEAM works under the hood. @@ -207,7 +315,7 @@ binary_to_term(<<131, 111, 4294967294:32/unsigned-integer, 0:8/integer, 255:8, 0 ``` Generating ETF payload with very long binaries can also have -an impact on CPUs, the following code can generate DoS and if many process +an impact on CPUs, the following code can generate DoS and if many process ```erlang % big payload, high cpu usage, no crash. @@ -259,20 +367,22 @@ feature. It might be great to have syntax to create ETF schema, a bit like protobuf[^protobuf], json schema[^json-schema], XML[^xml] (with -XLST[^xlst]) or ASN.1[^asn.1]. +XLST[^xlst]) or ASN.1[^asn.1]. In fact, when I started to find +something around this feature, I also found UBF[^ubf] project from Joe +Armstrong. ```erlang schema1() -> integer(). - + schema2() -> tuple([[atom(ok), integer()] ,[atom(error), string(1024)]). - + % fun ({ok, X}) when is_integer(X) -> true; % ({error, X) when is_list(X) andalso length(X) =< 1024 -> is_string(X); % (_) -> false. - + schema3() -> tuple( ``` @@ -296,6 +406,7 @@ Here the final representation. [^xml]: https://en.wikipedia.org/wiki/XML [^xlst]: https://en.wikipedia.org/wiki/XSLT [^asn.1]: https://en.wikipedia.org/wiki/ASN.1 +[^ubf]: https://ubf.github.io/ubf/ubf-user-guide.en.html ## What about an ETF path feature? @@ -384,3 +495,8 @@ each terms is it safe or not and with the risk(s). | `SMALL_TUPLE_EXT` | 104 | maybe | dynamic tuple length (8bits) | `STRING_EXT` | 107 | maybe | dynamic string length (16bits) | `V4_PORT_EXT` | 120 | no | atom exhaustion + +# Resources + + - [BERT-RPC Official](https://bert-rpc.org) [(archive)](https://web.archive.org/web/20160304092040/http://bert-rpc.org/) + - [BERT-RPC Google group](https://groups.google.com/g/bert-rpc) diff --git a/src/berty_bert.erl b/src/berty_bert.erl index c53d4b8..26d12ce 100644 --- a/src/berty_bert.erl +++ b/src/berty_bert.erl @@ -34,5 +34,3 @@ default_options() -> decode(Data) -> berty_etf:decode(Data, default_options()). - - diff --git a/src/berty_etf.erl b/src/berty_etf.erl index 7f9fe35..1230255 100644 --- a/src/berty_etf.erl +++ b/src/berty_etf.erl @@ -130,8 +130,8 @@ default_options() -> -type string_ext() :: enabled | disabled | callback_option(). -type v4_port_ext() :: enabled | disabled | cursed | callback_option(). --type atoms() :: create - | {create, number()} +-type atoms() :: create + | {create, number()} | {create, number(), warning} | as_string | as_binary @@ -183,11 +183,11 @@ default_options() -> %%-------------------------------------------------------------------- -spec decode(Data) -> Return when Data :: binary(), - Return :: {ok, term()} - | {ok, term(), binary()} + Return :: {ok, term()} + | {ok, term(), binary()} | {error, Reason}, Reason :: proplists:proplist(). - + decode(Data) -> decode(Data, default_options()). @@ -196,8 +196,8 @@ decode(Data) -> %%-------------------------------------------------------------------- -spec decode(Data, Opts) -> Return when Data :: binary(), - Return :: {ok, term()} - | {ok, term(), binary()} + Return :: {ok, term()} + | {ok, term(), binary()} | {error, Reason}, Opts :: options(), Reason :: proplists:proplist(). @@ -247,9 +247,9 @@ decode(small_integer_ext, < Params = [Integer, Rest], case Callback of - _ when is_function(Callback) -> + _ when is_function(Callback) -> apply(Callback, Params); - {Module, Function, Args} -> + {Module, Function, Args} -> apply(Module, Function, [Params|Args]) end; decode(small_integer_ext, <> @@ -560,7 +560,7 @@ decode(Parser, Rest, Opts, State) -> ,{opts, Opts}, {state, State}]}. -decode_test() -> +decode_test() -> [ decode_properties(integer, default_options()) , decode_properties(atom, default_options()) , decode_properties(float, default_options())