mirror of
https://github.com/Mercury-Language/mercury.git
synced 2026-04-16 18:03:36 +00:00
library/string.m:
Add predicate to test if a string is in UTF-8 or UTF-16,
depending on the target language.
NEWS:
Announce the addition.
tests/hard_coded/string_well_formed.exp:
tests/hard_coded/string_well_formed.m:
Add basic test case.
tests/hard_coded/string_well_formed_utf8.exp:
tests/hard_coded/string_well_formed_utf8.exp2:
tests/hard_coded/string_well_formed_utf8.exp3:
tests/hard_coded/string_well_formed_utf8.inp:
tests/hard_coded/string_well_formed_utf8.m:
Add more thorough test for UTF-8. The input file is from
https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
tests/hard_coded/Mmakefile:
Enable the tests.
77 lines
2.3 KiB
Mathematica
77 lines
2.3 KiB
Mathematica
%---------------------------------------------------------------------------%
|
|
% vim: ts=4 sw=4 et ft=mercury
|
|
%---------------------------------------------------------------------------%
|
|
%
|
|
% The .exp file is for targets using UTF-8 encoding.
|
|
% The .exp2 file is for the Java backend.
|
|
% The .exp3 file is for the C# backend.
|
|
%
|
|
% XXX The Java and C# backends currently differ in whether the null byte on
|
|
% line 71 is considered an error.
|
|
%
|
|
%---------------------------------------------------------------------------%
|
|
|
|
:- module string_well_formed_utf8.
|
|
:- interface.
|
|
|
|
:- import_module io.
|
|
|
|
:- pred main(io::di, io::uo) is det.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
%---------------------------------------------------------------------------%
|
|
|
|
:- implementation.
|
|
|
|
:- import_module char.
|
|
:- import_module list.
|
|
:- import_module string.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
main(!IO) :-
|
|
( if count_code_units("\U0001F600") = 4 then
|
|
io.write_string("string encoding is UTF-8\n", !IO)
|
|
else
|
|
io.write_string("string encoding is UTF-16\n", !IO)
|
|
),
|
|
loop([], AccResults, !IO),
|
|
list.reverse(AccResults, Results0),
|
|
list.remove_adjacent_dups(Results0, Results),
|
|
io.write_list(Results, "\n", io.write_string, !IO).
|
|
|
|
:- pred loop(list(string)::in, list(string)::out, io::di, io::uo) is det.
|
|
|
|
loop(!Acc, !IO) :-
|
|
io.get_line_number(LineNr, !IO),
|
|
io.read_line_as_string(RLAS, !IO),
|
|
(
|
|
RLAS = ok(Line),
|
|
check_line(LineNr, strip(Line), !Acc),
|
|
loop(!Acc, !IO)
|
|
;
|
|
RLAS = eof
|
|
;
|
|
RLAS = error(Error),
|
|
Msg = format("line %d: %s", [i(LineNr), s(io.error_message(Error))]),
|
|
!:Acc = [Msg | !.Acc],
|
|
loop(!Acc, !IO)
|
|
).
|
|
|
|
:- pred check_line(int::in, string::in, list(string)::in, list(string)::out)
|
|
is det.
|
|
|
|
check_line(LineNr, S, !Acc) :-
|
|
( if string.is_well_formed(S) then
|
|
( if string.all_match(is_ascii, S) then
|
|
Msg = ""
|
|
else if string.contains_char(S, '\uFFFD') then
|
|
Msg = format("line %d: contains replacement char", [i(LineNr)])
|
|
else
|
|
Msg = format("line %d: well-formed", [i(LineNr)])
|
|
)
|
|
else
|
|
Msg = format("line %d: not well-formed", [i(LineNr)])
|
|
),
|
|
!:Acc = [Msg | !.Acc].
|