mirror of
https://github.com/Mercury-Language/mercury.git
synced 2026-04-27 23:34:52 +00:00
Estimated hours taken: 3
Branches: main
Change the list constructor from `./2' to `[|]/2'. `./2' will
eventually become the module qualification operator.
library/parser.m:
library/io.m:
library/sparse_bitset.m:
library/std_util.m:
library/term_io.m:
compiler/mercury_to_mercury.m:
compiler/prog_io_dcg.m:
compiler/prog_io_goal.m:
compiler/prog_io_pragma.m:
compiler/prog_io_typeclass.m:
compiler/prog_io_util.m:
browser/interactive_query.m:
extras/moose/grammar.m:
extras/moose/moose.m:
extras/morphine/source/generate_call_site_cov.m:
extras/xml/xml.encoding.m:
samples/muz/higher_order.m:
tests/debugger/declarative/app.m:
tests/dppd/transpose_impl.m:
tests/hard_coded/ground_dd.m:
tests/hard_coded/split_c_files.m:
Change all references to `./2' to use `[|]/2' instead.
compiler/typecheck.m:
Handle `./2' as a special case in `report_error_undef_cons'.
Warn about module list not being imported if `[|]/2' is undefined.
compiler/llds_out.m:
util/mdemangle.c:
profiler/demangle.m:
Add name conversions for `[|]' (f_cons) and `[]' (f_nil).
NEWS:
doc/reference_manual.texi:
w3/tutorial/lists-n-things.m4:
Document the changes.
tests/debugger/{,declarative}/*.exp*:
Update test case results. For some tests the output changed
because they output lists in the non-pretty format. For others,
the output changed because the alphabetical ordering of the
constructors of type `list/1' changed, so the numbering of
the switch branches in the goal paths changed.
226 lines
6.0 KiB
Mathematica
226 lines
6.0 KiB
Mathematica
%---------------------------------------------------------------------------%
|
|
% Copyright (C) 2000-2001 The University of Melbourne.
|
|
% This file may only be copied under the terms of the GNU Library General
|
|
% Public License - see the file COPYING.LIB in the Mercury distribution.
|
|
%---------------------------------------------------------------------------%
|
|
%
|
|
% Main author: conway@cs.mu.oz.au.
|
|
%
|
|
%---------------------------------------------------------------------------%
|
|
:- module xml:encoding.
|
|
|
|
:- interface.
|
|
|
|
:- import_module parsing.
|
|
|
|
:- type ascii7 ---> ascii7.
|
|
|
|
:- instance encoding(ascii7).
|
|
|
|
:- type latin1 ---> latin1.
|
|
|
|
:- instance encoding(latin1).
|
|
|
|
:- type utf8 ---> utf8.
|
|
|
|
:- instance encoding(utf8).
|
|
|
|
:- implementation.
|
|
|
|
:- import_module unicode.
|
|
:- import_module char, int, list, require, string.
|
|
|
|
:- instance encoding(ascii7) where [
|
|
pred(decode/4) is decode_ascii7,
|
|
pred(encode/3) is encode_ascii7
|
|
].
|
|
|
|
:- pred decode_ascii7(ascii7, unicode, entity, entity).
|
|
:- mode decode_ascii7(in, out, in, out) is semidet.
|
|
|
|
decode_ascii7(_, U) -->
|
|
[U].
|
|
|
|
:- pred encode_ascii7(ascii7, list(unicode), string).
|
|
:- mode encode_ascii7(in, in, out) is det.
|
|
|
|
encode_ascii7(_, Us, Str) :-
|
|
unicodesToAscii7(Us, Cs, []),
|
|
string__from_char_list(Cs, Str).
|
|
|
|
:- pred unicodesToAscii7(list(unicode), list(char), list(char)).
|
|
:- mode unicodesToAscii7(in, out, in) is det.
|
|
|
|
unicodesToAscii7([]) --> [].
|
|
unicodesToAscii7([U|Us]) -->
|
|
( { U > 0x00, U < 0x80, char__to_int(C, U) } ->
|
|
[C],
|
|
unicodesToAscii7(Us)
|
|
;
|
|
{ format("unicodesToAscii7: couldn't convert U-%x to 7bit ascii",
|
|
[i(U)], Msg) },
|
|
{ error(Msg) }
|
|
).
|
|
|
|
:- instance encoding(latin1) where [
|
|
pred(decode/4) is decode_latin1,
|
|
pred(encode/3) is encode_latin1
|
|
].
|
|
|
|
:- pred decode_latin1(latin1, unicode, entity, entity).
|
|
:- mode decode_latin1(in, out, in, out) is semidet.
|
|
|
|
decode_latin1(_, U) -->
|
|
[U].
|
|
|
|
:- pred encode_latin1(latin1, list(unicode), string).
|
|
:- mode encode_latin1(in, in, out) is det.
|
|
|
|
encode_latin1(_, Us, Str) :-
|
|
unicodesToLatin1(Us, Cs, []),
|
|
string__from_char_list(Cs, Str).
|
|
|
|
:- pred unicodesToLatin1(list(unicode), list(char), list(char)).
|
|
:- mode unicodesToLatin1(in, out, in) is det.
|
|
|
|
unicodesToLatin1([]) --> [].
|
|
unicodesToLatin1([U|Us]) -->
|
|
( { char__to_int(C, U) } ->
|
|
[C],
|
|
unicodesToLatin1(Us)
|
|
;
|
|
{ format("unicodesToLatin1: couldn't convert U-%x to Latin-1",
|
|
[i(U)], Msg) },
|
|
{ error(Msg) }
|
|
).
|
|
|
|
:- instance encoding(utf8) where [
|
|
pred(decode/4) is decode_utf8,
|
|
pred(encode/3) is encode_utf8
|
|
].
|
|
|
|
:- pred decode_utf8(utf8, unicode, entity, entity).
|
|
:- mode decode_utf8(in, out, in, out) is semidet.
|
|
|
|
decode_utf8(_, U) -->
|
|
[U0],
|
|
( { U0 /\ 0x80 = 0 } ->
|
|
{ U = U0 }
|
|
; { U0 /\ 0x20 = 0 } ->
|
|
[U1],
|
|
{ U = ((U0 /\ 0x1F) << 6) \/ (U1 /\ 0x3F) }
|
|
; { U0 /\ 0x10 = 0 } ->
|
|
[U1], [U2],
|
|
{ U = ((U0 /\ 0x0F) << 12) \/ ((U1 /\ 0x3F) << 6) \/ (U2 /\ 0x3F) }
|
|
; { U0 /\ 0x08 = 0 } ->
|
|
[U1], [U2], [U3],
|
|
{ U = ((U0 /\ 0x07) << 18) \/ ((U1 /\ 0x3F) << 12) \/
|
|
((U2 /\ 0x3F) << 6) \/ (U3 /\ 0x3F) }
|
|
; { U0 /\ 0x04 = 0 } ->
|
|
[U1], [U2], [U3], [U4],
|
|
{ U = ((U0 /\ 0x03) << 24) \/ ((U1 /\ 0x3F) << 18) \/
|
|
((U2 /\ 0x3F) << 12) \/ ((U3 /\ 0x3F) << 6) \/ (U4 /\ 0x3F) }
|
|
; { U0 /\ 0x02 = 0 } ->
|
|
[U1], [U2], [U3], [U4], [U5],
|
|
{ U = ((U0 /\ 0x01) << 30) \/ ((U1 /\ 0x3F) << 24) \/
|
|
((U2 /\ 0x3F) << 18) \/ ((U3 /\ 0x3F) << 12) \/
|
|
((U4 /\ 0x3F) << 6) \/ (U5 /\ 0x3F) }
|
|
;
|
|
%{ error("decode_utf8: bad value!") }
|
|
{ fail }
|
|
).
|
|
|
|
:- pred encode_utf8(utf8, list(unicode), string).
|
|
:- mode encode_utf8(in, in, out) is det.
|
|
|
|
encode_utf8(_, Us, Str) :-
|
|
unicodesToUTF8(Us, Cs, []),
|
|
string__from_char_list(Cs, Str).
|
|
|
|
:- pred unicodesToUTF8(list(unicode), list(char), list(char)).
|
|
:- mode unicodesToUTF8(in, out, in) is det.
|
|
|
|
unicodesToUTF8([]) --> [].
|
|
unicodesToUTF8([U|Us]) -->
|
|
(
|
|
{ U > 0x00, U =< 0x7F },
|
|
{ char__to_int(C, U) }
|
|
->
|
|
[C]
|
|
;
|
|
{ U >= 0x80, U =< 0x07FF },
|
|
{ U0 = 0xC0 \/ (0x1F /\ (U >> 6)) },
|
|
{ U1 = 0x80 \/ (0x3F /\ U) },
|
|
{ char__to_int(C0, U0) },
|
|
{ char__to_int(C1, U1) }
|
|
->
|
|
[C0, C1]
|
|
;
|
|
{ U >= 0x0800, U =< 0xFFFF },
|
|
{ U0 = 0xE0 \/ (0x0F /\ (U >> 12)) },
|
|
{ U1 = 0x80 \/ (0x3F /\ (U >> 6)) },
|
|
{ U2 = 0x80 \/ (0x3F /\ U) },
|
|
{ char__to_int(C0, U0) },
|
|
{ char__to_int(C1, U1) },
|
|
{ char__to_int(C2, U2) }
|
|
->
|
|
[C0, C1, C2]
|
|
;
|
|
{ U >= 0x010000, U =< 0x1FFFFF },
|
|
{ U0 = 0xF0 \/ (0x07 /\ (U >> 18)) },
|
|
{ U1 = 0x80 \/ (0x3F /\ (U >> 12)) },
|
|
{ U2 = 0x80 \/ (0x3F /\ (U >> 6)) },
|
|
{ U3 = 0x80 \/ (0x3F /\ U) },
|
|
{ char__to_int(C0, U0) },
|
|
{ char__to_int(C1, U1) },
|
|
{ char__to_int(C2, U2) },
|
|
{ char__to_int(C3, U3) }
|
|
->
|
|
[C0, C1, C2, C3]
|
|
;
|
|
{ U >= 0x200000, U =< 0x03FFFFFF },
|
|
{ U0 = 0xF8 \/ (0x03 /\ (U >> 24)) },
|
|
{ U1 = 0x80 \/ (0x3F /\ (U >> 18)) },
|
|
{ U2 = 0x80 \/ (0x3F /\ (U >> 12)) },
|
|
{ U3 = 0x80 \/ (0x3F /\ (U >> 6)) },
|
|
{ U4 = 0x80 \/ (0x3F /\ U) },
|
|
{ char__to_int(C0, U0) },
|
|
{ char__to_int(C1, U1) },
|
|
{ char__to_int(C2, U2) },
|
|
{ char__to_int(C3, U3) },
|
|
{ char__to_int(C4, U4) }
|
|
->
|
|
[C0, C1, C2, C3, C4]
|
|
;
|
|
{ U >= 0x04000000, U =< 0x7FFFFFFF },
|
|
{ U0 = 0xFC \/ (0x01 /\ (U >> 30)) },
|
|
{ U1 = 0x80 \/ (0x3F /\ (U >> 24)) },
|
|
{ U2 = 0x80 \/ (0x3F /\ (U >> 18)) },
|
|
{ U3 = 0x80 \/ (0x3F /\ (U >> 12)) },
|
|
{ U4 = 0x80 \/ (0x3F /\ (U >> 6)) },
|
|
{ U5 = 0x80 \/ (0x3F /\ U) },
|
|
{ char__to_int(C0, U0) },
|
|
{ char__to_int(C1, U1) },
|
|
{ char__to_int(C2, U2) },
|
|
{ char__to_int(C3, U3) },
|
|
{ char__to_int(C4, U4) },
|
|
{ char__to_int(C5, U5) }
|
|
->
|
|
[C0, C1, C2, C3, C4, C5]
|
|
;
|
|
{ format("unicodesToUTF8: couldn't convert U-%x to UTF-8",
|
|
[i(U)], Msg) },
|
|
{ error(Msg) }
|
|
),
|
|
unicodesToUTF8(Us).
|
|
|
|
:- func [unicode | entity] = entity.
|
|
:- mode [out | out] = in is semidet.
|
|
|
|
[U | E] = E0 :-
|
|
E0^curr < E0^leng,
|
|
string__unsafe_index(E0^text, E0^curr, C),
|
|
char__to_int(C, U),
|
|
E = E0^curr := (E0^curr + 1).
|
|
|