Files
mercury/tests/hard_coded/string_char_list_ilseq.m
Peter Wang 9b25e167e1 Define behaviour of string.to_char_list (and rev) on ill-formed sequences.
library/string.m:
    Define string.to_char_list and string.to_rev_char_list to either
    replace code units in ill-formed sequences with U+FFFD or return
    unpaired surrogate code points.

    Use Mercury version of do_to_char_list instead of updating
    the foreign language implementations.

tests/hard_coded/Mmakefile:
tests/hard_coded/string_char_list_ilseq.exp:
tests/hard_coded/string_char_list_ilseq.exp2:
tests/hard_coded/string_char_list_ilseq.m:
    Add test case.
2019-10-24 09:14:46 +11:00

51 lines
1.5 KiB
Mathematica

%---------------------------------------------------------------------------%
% vim: ts=4 sw=4 et ft=mercury
%---------------------------------------------------------------------------%
%
% The .exp file is for backends using UTF-8 string encoding.
% The .exp2 file is for backends using UTF-16 string encoding.
%
%---------------------------------------------------------------------------%
:- module string_char_list_ilseq.
:- interface.
:- import_module io.
:- pred main(io::di, io::uo) is det.
%---------------------------------------------------------------------------%
:- implementation.
:- import_module char.
:- import_module int.
:- import_module list.
:- import_module string.
%---------------------------------------------------------------------------%
main(!IO) :-
S0 = "😀",
S1 = string.between(S0, 0, count_code_units(S0) - 1),
S = "abc" ++ S0 ++ S1 ++ "xyz",
string.to_char_list(S, CharList),
io.write_string("string.to_char_list\n[", !IO),
io.write_list(CharList, ", ", write_char_or_hex, !IO),
io.write_string("]\n\n", !IO),
string.to_rev_char_list(S, RevCharList),
io.write_string("string.to_rev_char_list\n[", !IO),
io.write_list(RevCharList, ", ", write_char_or_hex, !IO),
io.write_string("]\n", !IO).
:- pred write_char_or_hex(char::in, io::di, io::uo) is det.
write_char_or_hex(Char, !IO) :-
( if Char = '\ufffd' ; char.is_surrogate(Char) then
io.format("%#x", [i(char.to_int(Char))], !IO)
else
io.write_char(Char, !IO)
).