mirror of
https://github.com/ubf/ubf.git
synced 2026-04-26 14:48:32 +00:00
312 lines
10 KiB
Erlang
312 lines
10 KiB
Erlang
%%% The MIT License
|
|
%%%
|
|
%%% Copyright (C) 2011-2015 by Joseph Wayne Norton <norton@alum.mit.edu>
|
|
%%% Copyright (C) 2002 by Joe Armstrong
|
|
%%%
|
|
%%% Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
%%% of this software and associated documentation files (the "Software"), to deal
|
|
%%% in the Software without restriction, including without limitation the rights
|
|
%%% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
%%% copies of the Software, and to permit persons to whom the Software is
|
|
%%% furnished to do so, subject to the following conditions:
|
|
%%%
|
|
%%% The above copyright notice and this permission notice shall be included in
|
|
%%% all copies or substantial portions of the Software.
|
|
%%%
|
|
%%% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
%%% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
%%% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
%%% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
%%% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
%%% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
%%% THE SOFTWARE.
|
|
|
|
%%% @doc Low-level functions for encoding and decoding the UBF(a)
|
|
%%% protocol.
|
|
%%%
|
|
%%% UBF is a family of languages for transporting and describing
|
|
%%% complex data structures across a network. It has three
|
|
%%% components. In terms of a protocol stack, UBF(a) is a data
|
|
%%% transport format, roughly equivalent to well-formed XML.
|
|
%%%
|
|
%%% UBF(a) is the transport format, it was designed to be easy to
|
|
%%% parse and to be easy to write with a text editor. UBF(a) is based
|
|
%%% on a byte encoded virtual machine, 26 byte codes are
|
|
%%% reserved. Instead of allocating the byte codes from 0 we use the
|
|
%%% printable character codes to make the format easy to read.
|
|
%%%
|
|
|
|
-module(ubf).
|
|
-behaviour(contract_proto).
|
|
|
|
-export([proto_vsn/0, proto_driver/0, proto_packet_type/0]).
|
|
-export([encode/1, encode/2]).
|
|
-export([decode_init/0, decode_init/1, decode_init/2, decode/1, decode/2, decode/3]).
|
|
-export([deabstract/1]).
|
|
|
|
-import(lists, [foldl/3, reverse/1, map/2, seq/2, sort/1]).
|
|
|
|
-record(state, {
|
|
safe=false :: boolean(),
|
|
dict :: term() % FYI - dict:dict() vs. dict() Erlang/OTP 17 incompatibility
|
|
}).
|
|
|
|
|
|
%%---------------------------------------------------------------------
|
|
proto_vsn() -> 'ubf1.0'.
|
|
proto_driver() -> ubf_driver.
|
|
proto_packet_type() -> 0.
|
|
|
|
|
|
%%---------------------------------------------------------------------
|
|
decode_init() ->
|
|
decode_init(false).
|
|
|
|
decode_init(Safe) ->
|
|
decode_init(Safe, []).
|
|
|
|
decode_init(Safe, []) ->
|
|
State = #state{safe=Safe, dict=dict:new()},
|
|
{more, fun(I) -> decode1(I, [[]], State) end};
|
|
decode_init(Safe, String) when is_list(String) ->
|
|
State = #state{safe=Safe, dict=dict:new()},
|
|
decode1(String, [[]], State).
|
|
|
|
decode(String) ->
|
|
decode(String, ?MODULE).
|
|
|
|
decode(String, Mod) ->
|
|
decode(String, Mod, decode_init(false)).
|
|
|
|
decode(S, _Mod, {more, Fun}) ->
|
|
Fun(S).
|
|
|
|
decode1([$'|T], Stack, State) ->
|
|
get_stuff(T, $', [], Stack, State);
|
|
decode1([$~|T], [[Int|Stack]|S1], State) when is_integer(Int), Int >= 0 ->
|
|
collect_binary(Int, T, [], [Stack|S1], State);
|
|
decode1([$~|_T], _Stack, _State) ->
|
|
exit(tilde);
|
|
decode1([$%|T], Stack, State) ->
|
|
get_stuff(T, $%, [], Stack, State);
|
|
decode1([$"|T], Stack, State) ->
|
|
get_stuff(T, $", [], Stack, State);
|
|
decode1([$`|T], Stack, State) ->
|
|
get_stuff(T, $`, [], Stack, State);
|
|
decode1([$-|T], Stack, State) ->
|
|
collect_int(T, 0, '-', Stack, State);
|
|
decode1([H|T], Stack, State) when $0 =< H, H =< $9 ->
|
|
collect_int(T, H-$0, '+', Stack, State);
|
|
decode1([${|T], Stack, State) ->
|
|
decode1(T, [[]|Stack], State);
|
|
decode1([$}|T], [H|Stack], State) ->
|
|
decode1(T, push(list_to_tuple(reverse(H)),Stack), State);
|
|
decode1([$&|T], [[H1,H2|T1] | Stack], State) ->
|
|
decode1(T, [[[H1|H2]|T1]|Stack], State);
|
|
decode1([$#|T], Stack, State) ->
|
|
decode1(T, push([], Stack), State);
|
|
decode1([$$|T], [[X]], _State) ->
|
|
{done, X, T, undefined};
|
|
decode1([$>], Stack, State) ->
|
|
{more, fun(I) -> decode1([$>|I], Stack, State) end};
|
|
decode1([$>,Key|T], [[Val|R]|Stack], #state{dict=Dict}=State) ->
|
|
decode1(T, [R|Stack], State#state{dict=dict:store(Key,Val,Dict)});
|
|
decode1([H|T], Stack, #state{dict=Dict}=State) ->
|
|
case special(H) of
|
|
true ->
|
|
decode1(T, Stack, State);
|
|
false ->
|
|
decode1(T, push(dict:fetch(H, Dict), Stack), State)
|
|
end;
|
|
decode1([], Stack, State) ->
|
|
{more, fun(I) -> decode1(I, Stack, State) end};
|
|
decode1(_X, _Stack, _State) ->
|
|
exit(decode1).
|
|
|
|
get_stuff([$\\], Stop, L, Stack, State) ->
|
|
{more, fun(I) -> get_stuff([$\\|I], Stop, L, Stack, State) end};
|
|
get_stuff([$\\,H|T], Stop, L, Stack, State) ->
|
|
get_stuff(T, Stop, [H|L], Stack, State);
|
|
get_stuff([$'|T], $', L, Stack, #state{safe=Safe}=State) ->
|
|
RevL = reverse(L),
|
|
Atom = if Safe -> list_to_existing_atom(RevL); true -> list_to_atom(RevL) end,
|
|
decode1(T, push(Atom, Stack), State);
|
|
get_stuff([$"|T], $", L, Stack, State) ->
|
|
decode1(T, push({'#S',reverse(L)},Stack), State);
|
|
get_stuff([$`|T], $`, L, [[X|Top]|Stack], State) ->
|
|
decode1(T, push({'$TYPE', reverse(L), X}, [Top|Stack]), State);
|
|
get_stuff([$%|T], $%, _L, Stack, State) ->
|
|
decode1(T, Stack, State);
|
|
get_stuff([H|T], Stop, L, Stack, State) ->
|
|
get_stuff(T, Stop, [H|L], Stack, State);
|
|
get_stuff([], Stop, L, Stack, State) ->
|
|
{more, fun(I) -> get_stuff(I, Stop, L, Stack, State) end}.
|
|
|
|
collect_binary(0, T, L, Stack, State) ->
|
|
expect_tilde(T, push(list_to_binary(reverse(L)),Stack), State);
|
|
collect_binary(N, [H|T], L, Stack, State) ->
|
|
collect_binary(N-1, T, [H|L], Stack, State);
|
|
collect_binary(N, [], L, Stack, State) ->
|
|
{more, fun(I) -> collect_binary(N, I, L, Stack, State) end}.
|
|
|
|
expect_tilde([$~|T], Stack, State) ->
|
|
decode1(T, Stack, State);
|
|
expect_tilde([], Stack, State) ->
|
|
{more, fun(I) -> expect_tilde(I, Stack, State) end};
|
|
expect_tilde([H|_], _, _) ->
|
|
exit({expect_tilde, H}).
|
|
|
|
push(X, [Top|Rest]) ->
|
|
[[X|Top]|Rest];
|
|
push(_X, _Y) ->
|
|
exit(push).
|
|
|
|
special($ ) -> true;
|
|
special(${) -> true;
|
|
special($}) -> true;
|
|
special($,) -> true;
|
|
special($#) -> true;
|
|
special($&) -> true;
|
|
special($%) -> true;
|
|
special($>) -> true;
|
|
special($\n) -> true;
|
|
special($\r) -> true;
|
|
special($\t) -> true;
|
|
special($$) -> true;
|
|
special($") -> true;
|
|
special($') -> true;
|
|
special($~) -> true;
|
|
special(_) -> false.
|
|
|
|
special_chars() ->
|
|
" 0123456789{},~%#>\n\r\s\t\"'-&$".
|
|
|
|
collect_int([H|T], N, Sign, Stack, State) when $0 =< H, H =< $9 ->
|
|
collect_int(T, N*10 + H - $0, Sign, Stack, State);
|
|
collect_int([], N, Sign, Stack, State) ->
|
|
{more, fun(I) -> collect_int(I, N, Sign, Stack, State) end};
|
|
collect_int(T, N, '+', Stack, State) ->
|
|
decode1(T, push(N, Stack), State);
|
|
collect_int(T, N, '-', Stack, State) ->
|
|
decode1(T, push(-N, Stack), State).
|
|
|
|
|
|
%%---------------------------------------------------------------------
|
|
encode(X) ->
|
|
encode(X, ?MODULE).
|
|
|
|
encode(X, _Mod) ->
|
|
element(1, encode1(X, dict:new())).
|
|
|
|
encode1(X, Dict0) ->
|
|
{Dict1, L1} = initial_dict(X, Dict0),
|
|
case (catch do_encode(X, Dict1)) of
|
|
{'EXIT', _What} ->
|
|
exit(encode1);
|
|
L ->
|
|
{flatten([L1, L,$$]), Dict1}
|
|
end.
|
|
|
|
initial_dict(X, Dict0) ->
|
|
Free = seq(32,255) -- special_chars(),
|
|
Most = analyse(X),
|
|
load_dict(Most, Free, Dict0, []).
|
|
|
|
load_dict([{N,X}|T], [Key|T1], Dict0, L) when N > 0->
|
|
load_dict(T, T1, dict:store(X, Key, Dict0),
|
|
[encode_obj(X),">",Key|L]);
|
|
load_dict(_, _, Dict, L) ->
|
|
{Dict, L}.
|
|
|
|
analyse(T) ->
|
|
KV = dict:to_list(analyse(T, dict:new())),
|
|
%% The Range is the Number of things times its size. If the size
|
|
%% is greater than 0
|
|
KV1 = map(fun rank/1, KV),
|
|
reverse(sort(KV1)).
|
|
|
|
rank({X, K}) when is_atom(X) ->
|
|
case length(atom_to_list(X)) of
|
|
N when N > 1, K > 1 ->
|
|
{(N-1) * K, X};
|
|
_ ->
|
|
{0, X}
|
|
end;
|
|
rank({X, K}) when is_integer(X) ->
|
|
case length(integer_to_list(X)) of
|
|
N when N > 1, K > 1 ->
|
|
{(N-1) * K, X};
|
|
_ ->
|
|
{0, X}
|
|
end;
|
|
rank({X, _}) ->
|
|
{0, X}.
|
|
|
|
analyse({'#S', String}, Dict) ->
|
|
analyse(String, Dict);
|
|
analyse(T, Dict) when is_tuple(T) ->
|
|
foldl(fun analyse/2, Dict, tuple_to_list(T));
|
|
analyse(X, Dict) ->
|
|
case dict:find(X, Dict) of
|
|
{ok, Val} ->
|
|
dict:store(X, Val+1, Dict);
|
|
error ->
|
|
dict:store(X, 1, Dict)
|
|
end.
|
|
|
|
flatten(L) ->
|
|
binary_to_list(list_to_binary(L)).
|
|
|
|
encode_obj(X) when is_atom(X) -> encode_atom(X);
|
|
encode_obj(X) when is_integer(X) -> integer_to_list(X);
|
|
encode_obj(X) when is_binary(X) -> encode_binary(X).
|
|
|
|
encode_string(S) -> [$",add_string(S, $"), $"].
|
|
encode_atom(X) -> [$',add_string(atom_to_list(X), $'), $'].
|
|
encode_binary(X) -> [integer_to_list(byte_size(X)), $~,X,$~].
|
|
|
|
do_encode(X, Dict) when is_atom(X); is_integer(X); is_binary(X) ->
|
|
case dict:find(X, Dict) of
|
|
{ok, Y} ->
|
|
Y;
|
|
error ->
|
|
encode_obj(X)
|
|
end;
|
|
do_encode({'#S', String}, _Dict) ->
|
|
%% This *is* a string
|
|
encode_string(String);
|
|
do_encode([H|T], Dict) ->
|
|
S1 = do_encode(T, Dict),
|
|
S2 = do_encode(H, Dict),
|
|
[S1,S2,$&];
|
|
do_encode(T, Dict) when is_tuple(T) ->
|
|
S1 = encode_tuple(1, T, Dict),
|
|
[${,S1,$}];
|
|
do_encode([], _Dict) ->
|
|
$#.
|
|
|
|
encode_tuple(N, T, _Dict) when N > tuple_size(T) ->
|
|
"";
|
|
encode_tuple(N, T, Dict) ->
|
|
S1 = do_encode(element(N, T), Dict),
|
|
S2 = encode_tuple(N+1, T, Dict),
|
|
[S1,possible_comma(N, T),S2].
|
|
|
|
possible_comma(N, T) when N < tuple_size(T) -> $,;
|
|
possible_comma(_, _) -> [].
|
|
|
|
%% The ascii printables are in the range 32..126 inclusive
|
|
add_string([$\\|T], Quote) -> [$\\,$\\|add_string(T, Quote)];
|
|
add_string([Quote|T], Quote) -> [$\\,Quote|add_string(T, Quote)];
|
|
add_string([H|T], Quote) when H >= 0, H=< 255 -> [H|add_string(T, Quote)];
|
|
add_string([H|_], _Quote) -> exit({string_character,H});
|
|
add_string([], _) -> [].
|
|
|
|
|
|
%%---------------------------------------------------------------------
|
|
deabstract({'#S',S}) -> S;
|
|
deabstract(T) when is_tuple(T) ->
|
|
list_to_tuple(map(fun deabstract/1, tuple_to_list(T)));
|
|
deabstract([H|T]) -> [deabstract(H)|deabstract(T)];
|
|
deabstract(T) -> T.
|