Make string.append(out, out, in) work with ill-formed sequences.

library/string.m:
    Simplify string.append(out, out, in) and make it work sensibly in
    the presence of ill-formed code unit sequences, breaking the input
    string after each code point or code unit in an ill-formed sequence.

tests/hard_coded/Mmakefile:
tests/hard_coded/string_append_ooi_ilseq.exp:
tests/hard_coded/string_append_ooi_ilseq.exp2:
tests/hard_coded/string_append_ooi_ilseq.m:
    Add test case.
This commit is contained in:
Peter Wang
2019-10-22 11:54:03 +11:00
parent 30f287951c
commit cd899271c6
5 changed files with 118 additions and 50 deletions

View File

@@ -3642,62 +3642,21 @@ append_ioi(S1, S2, S3) :-
:- pred append_ooi(string::out, string::out, string::in) is multi.
append_ooi(S1, S2, S3) :-
S3Len = length(S3),
append_ooi_2(0, S3Len, S1, S2, S3).
Len3 = length(S3),
append_ooi_2(0, Len3, S1, S2, S3).
:- pred append_ooi_2(int::in, int::in, string::out, string::out,
string::in) is multi.
append_ooi_2(NextS1Len, S3Len, S1, S2, S3) :-
( if NextS1Len = S3Len then
append_ooi_3(NextS1Len, S3Len, S1, S2, S3)
else
(
append_ooi_3(NextS1Len, S3Len, S1, S2, S3)
;
unsafe_index_next(S3, NextS1Len, AdvS1Len, _),
append_ooi_2(AdvS1Len, S3Len, S1, S2, S3)
)
append_ooi_2(Start2, Len3, S1, S2, S3) :-
(
unsafe_between(S3, 0, Start2, S1),
unsafe_between(S3, Start2, Len3, S2)
;
unsafe_index_next(S3, Start2, NextStart2, _Char),
append_ooi_2(NextStart2, Len3, S1, S2, S3)
).
:- pred append_ooi_3(int::in, int::in, string::out,
string::out, string::in) is det.
:- pragma foreign_proc("C",
append_ooi_3(S1Len::in, S3Len::in, S1::out, S2::out, S3::in),
[will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail,
does_not_affect_liveness, may_not_duplicate, no_sharing],
"{
MR_allocate_aligned_string_msg(S1, S1Len, MR_ALLOC_ID);
MR_memcpy(S1, S3, S1Len);
S1[S1Len] = '\\0';
MR_allocate_aligned_string_msg(S2, S3Len - S1Len, MR_ALLOC_ID);
strcpy(S2, S3 + S1Len);
}").
:- pragma foreign_proc("C#",
append_ooi_3(S1Len::in, _S3Len::in, S1::out, S2::out, S3::in),
[will_not_call_mercury, promise_pure, thread_safe],
"
S1 = S3.Substring(0, S1Len);
S2 = S3.Substring(S1Len);
").
:- pragma foreign_proc("Java",
append_ooi_3(S1Len::in, _S3Len::in, S1::out, S2::out, S3::in),
[will_not_call_mercury, promise_pure, thread_safe],
"
S1 = S3.substring(0, S1Len);
S2 = S3.substring(S1Len);
").
:- pragma foreign_proc("Erlang",
append_ooi_3(S1Len::in, _S3Len::in, S1::out, S2::out, S3::in),
[will_not_call_mercury, promise_pure, thread_safe],
"
<< S1:S1Len/binary, S2/binary >> = S3
").
append_ooi_3(S1Len, _S3Len, S1, S2, S3) :-
split(S3, S1Len, S1, S2).
S1 ++ S2 = append(S1, S2).
%---------------------%

View File

@@ -353,6 +353,7 @@ ORDINARY_PROGS = \
string_append_iii \
string_append_ioi \
string_append_ooi \
string_append_ooi_ilseq \
string_builder_test \
string_case \
string_char_list_ilseq \

View File

@@ -0,0 +1,18 @@
L:
R: 😀 0xf0 0x9f 0x98 z
L: 😀
R: 0xf0 0x9f 0x98 z
L: 😀 0xf0
R: 0x9f 0x98 z
L: 😀 0xf0 0x9f
R: 0x98 z
L: 😀 0xf0 0x9f 0x98
R: z
L: 😀 0xf0 0x9f 0x98 z
R:

View File

@@ -0,0 +1,12 @@
L:
R: 😀 0xd83d z
L: 😀
R: 0xd83d z
L: 😀 0xd83d
R: z
L: 😀 0xd83d z
R:

View File

@@ -0,0 +1,78 @@
%---------------------------------------------------------------------------%
% vim: ts=4 sw=4 et ft=mercury
%---------------------------------------------------------------------------%
%
% The .exp file is for backends using UTF-8 string encoding.
% The .exp2 file is for backends using UTF-16 string encoding.
%
%---------------------------------------------------------------------------%
:- module string_append_ooi_ilseq.
:- interface.
:- import_module io.
:- pred main(io::di, io::uo) is cc_multi.
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
:- implementation.
:- import_module char.
:- import_module int.
:- import_module list.
:- import_module pair.
:- import_module solutions.
:- import_module string.
%---------------------------------------------------------------------------%
main(!IO) :-
S0 = "😀",
S1 = string.between(S0, 0, count_code_units(S0) - 1),
S = S0 ++ S1 ++ "z",
unsorted_aggregate(test_append_ooi(S), write_result, !IO).
:- pred test_append_ooi(string::in, pair(string, string)::out) is multi.
test_append_ooi(S, L - R) :-
string.append(L, R, S).
:- pred write_result(pair(string, string)::in, io::di, io::uo) is det.
write_result(L - R, !IO) :-
io.write_string("L: ", !IO),
write_string_debug(L, !IO),
io.write_string("\n", !IO),
io.write_string("R: ", !IO),
write_string_debug(R, !IO),
io.write_string("\n\n", !IO).
:- pred write_string_debug(string::in, io::di, io::uo) is det.
write_string_debug(S, !IO) :-
write_string_debug_loop(S, 0, !IO).
:- pred write_string_debug_loop(string::in, int::in, io::di, io::uo) is det.
write_string_debug_loop(S, Index, !IO) :-
( if string.index_next(S, Index, NextIndex, Char) then
( if Char = '\ufffd' then
string.unsafe_index_code_unit(S, Index, CodeUnit),
write_hex(CodeUnit, !IO)
else if is_surrogate(Char) then
write_hex(char.to_int(Char), !IO)
else
io.write_char(Char, !IO)
),
io.write_char(' ', !IO),
write_string_debug_loop(S, NextIndex, !IO)
else
true
).
:- pred write_hex(int::in, io::di, io::uo) is det.
write_hex(I, !IO) :-
io.format("%#x", [i(I)], !IO).