mirror of
https://github.com/Mercury-Language/mercury.git
synced 2026-04-15 09:23:44 +00:00
Make string.append(out, out, in) work with ill-formed sequences.
library/string.m:
Simplify string.append(out, out, in) and make it work sensibly in
the presence of ill-formed code unit sequences, breaking the input
string after each code point or code unit in an ill-formed sequence.
tests/hard_coded/Mmakefile:
tests/hard_coded/string_append_ooi_ilseq.exp:
tests/hard_coded/string_append_ooi_ilseq.exp2:
tests/hard_coded/string_append_ooi_ilseq.m:
Add test case.
This commit is contained in:
@@ -3642,62 +3642,21 @@ append_ioi(S1, S2, S3) :-
|
||||
:- pred append_ooi(string::out, string::out, string::in) is multi.
|
||||
|
||||
append_ooi(S1, S2, S3) :-
|
||||
S3Len = length(S3),
|
||||
append_ooi_2(0, S3Len, S1, S2, S3).
|
||||
Len3 = length(S3),
|
||||
append_ooi_2(0, Len3, S1, S2, S3).
|
||||
|
||||
:- pred append_ooi_2(int::in, int::in, string::out, string::out,
|
||||
string::in) is multi.
|
||||
|
||||
append_ooi_2(NextS1Len, S3Len, S1, S2, S3) :-
|
||||
( if NextS1Len = S3Len then
|
||||
append_ooi_3(NextS1Len, S3Len, S1, S2, S3)
|
||||
else
|
||||
(
|
||||
append_ooi_3(NextS1Len, S3Len, S1, S2, S3)
|
||||
;
|
||||
unsafe_index_next(S3, NextS1Len, AdvS1Len, _),
|
||||
append_ooi_2(AdvS1Len, S3Len, S1, S2, S3)
|
||||
)
|
||||
append_ooi_2(Start2, Len3, S1, S2, S3) :-
|
||||
(
|
||||
unsafe_between(S3, 0, Start2, S1),
|
||||
unsafe_between(S3, Start2, Len3, S2)
|
||||
;
|
||||
unsafe_index_next(S3, Start2, NextStart2, _Char),
|
||||
append_ooi_2(NextStart2, Len3, S1, S2, S3)
|
||||
).
|
||||
|
||||
:- pred append_ooi_3(int::in, int::in, string::out,
|
||||
string::out, string::in) is det.
|
||||
|
||||
:- pragma foreign_proc("C",
|
||||
append_ooi_3(S1Len::in, S3Len::in, S1::out, S2::out, S3::in),
|
||||
[will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail,
|
||||
does_not_affect_liveness, may_not_duplicate, no_sharing],
|
||||
"{
|
||||
MR_allocate_aligned_string_msg(S1, S1Len, MR_ALLOC_ID);
|
||||
MR_memcpy(S1, S3, S1Len);
|
||||
S1[S1Len] = '\\0';
|
||||
MR_allocate_aligned_string_msg(S2, S3Len - S1Len, MR_ALLOC_ID);
|
||||
strcpy(S2, S3 + S1Len);
|
||||
}").
|
||||
:- pragma foreign_proc("C#",
|
||||
append_ooi_3(S1Len::in, _S3Len::in, S1::out, S2::out, S3::in),
|
||||
[will_not_call_mercury, promise_pure, thread_safe],
|
||||
"
|
||||
S1 = S3.Substring(0, S1Len);
|
||||
S2 = S3.Substring(S1Len);
|
||||
").
|
||||
:- pragma foreign_proc("Java",
|
||||
append_ooi_3(S1Len::in, _S3Len::in, S1::out, S2::out, S3::in),
|
||||
[will_not_call_mercury, promise_pure, thread_safe],
|
||||
"
|
||||
S1 = S3.substring(0, S1Len);
|
||||
S2 = S3.substring(S1Len);
|
||||
").
|
||||
:- pragma foreign_proc("Erlang",
|
||||
append_ooi_3(S1Len::in, _S3Len::in, S1::out, S2::out, S3::in),
|
||||
[will_not_call_mercury, promise_pure, thread_safe],
|
||||
"
|
||||
<< S1:S1Len/binary, S2/binary >> = S3
|
||||
").
|
||||
|
||||
append_ooi_3(S1Len, _S3Len, S1, S2, S3) :-
|
||||
split(S3, S1Len, S1, S2).
|
||||
|
||||
S1 ++ S2 = append(S1, S2).
|
||||
|
||||
%---------------------%
|
||||
|
||||
@@ -353,6 +353,7 @@ ORDINARY_PROGS = \
|
||||
string_append_iii \
|
||||
string_append_ioi \
|
||||
string_append_ooi \
|
||||
string_append_ooi_ilseq \
|
||||
string_builder_test \
|
||||
string_case \
|
||||
string_char_list_ilseq \
|
||||
|
||||
18
tests/hard_coded/string_append_ooi_ilseq.exp
Normal file
18
tests/hard_coded/string_append_ooi_ilseq.exp
Normal file
@@ -0,0 +1,18 @@
|
||||
L:
|
||||
R: 😀 0xf0 0x9f 0x98 z
|
||||
|
||||
L: 😀
|
||||
R: 0xf0 0x9f 0x98 z
|
||||
|
||||
L: 😀 0xf0
|
||||
R: 0x9f 0x98 z
|
||||
|
||||
L: 😀 0xf0 0x9f
|
||||
R: 0x98 z
|
||||
|
||||
L: 😀 0xf0 0x9f 0x98
|
||||
R: z
|
||||
|
||||
L: 😀 0xf0 0x9f 0x98 z
|
||||
R:
|
||||
|
||||
12
tests/hard_coded/string_append_ooi_ilseq.exp2
Normal file
12
tests/hard_coded/string_append_ooi_ilseq.exp2
Normal file
@@ -0,0 +1,12 @@
|
||||
L:
|
||||
R: 😀 0xd83d z
|
||||
|
||||
L: 😀
|
||||
R: 0xd83d z
|
||||
|
||||
L: 😀 0xd83d
|
||||
R: z
|
||||
|
||||
L: 😀 0xd83d z
|
||||
R:
|
||||
|
||||
78
tests/hard_coded/string_append_ooi_ilseq.m
Normal file
78
tests/hard_coded/string_append_ooi_ilseq.m
Normal file
@@ -0,0 +1,78 @@
|
||||
%---------------------------------------------------------------------------%
|
||||
% vim: ts=4 sw=4 et ft=mercury
|
||||
%---------------------------------------------------------------------------%
|
||||
%
|
||||
% The .exp file is for backends using UTF-8 string encoding.
|
||||
% The .exp2 file is for backends using UTF-16 string encoding.
|
||||
%
|
||||
%---------------------------------------------------------------------------%
|
||||
|
||||
:- module string_append_ooi_ilseq.
|
||||
:- interface.
|
||||
|
||||
:- import_module io.
|
||||
|
||||
:- pred main(io::di, io::uo) is cc_multi.
|
||||
|
||||
%---------------------------------------------------------------------------%
|
||||
%---------------------------------------------------------------------------%
|
||||
|
||||
:- implementation.
|
||||
|
||||
:- import_module char.
|
||||
:- import_module int.
|
||||
:- import_module list.
|
||||
:- import_module pair.
|
||||
:- import_module solutions.
|
||||
:- import_module string.
|
||||
|
||||
%---------------------------------------------------------------------------%
|
||||
|
||||
main(!IO) :-
|
||||
S0 = "😀",
|
||||
S1 = string.between(S0, 0, count_code_units(S0) - 1),
|
||||
S = S0 ++ S1 ++ "z",
|
||||
unsorted_aggregate(test_append_ooi(S), write_result, !IO).
|
||||
|
||||
:- pred test_append_ooi(string::in, pair(string, string)::out) is multi.
|
||||
|
||||
test_append_ooi(S, L - R) :-
|
||||
string.append(L, R, S).
|
||||
|
||||
:- pred write_result(pair(string, string)::in, io::di, io::uo) is det.
|
||||
|
||||
write_result(L - R, !IO) :-
|
||||
io.write_string("L: ", !IO),
|
||||
write_string_debug(L, !IO),
|
||||
io.write_string("\n", !IO),
|
||||
io.write_string("R: ", !IO),
|
||||
write_string_debug(R, !IO),
|
||||
io.write_string("\n\n", !IO).
|
||||
|
||||
:- pred write_string_debug(string::in, io::di, io::uo) is det.
|
||||
|
||||
write_string_debug(S, !IO) :-
|
||||
write_string_debug_loop(S, 0, !IO).
|
||||
|
||||
:- pred write_string_debug_loop(string::in, int::in, io::di, io::uo) is det.
|
||||
|
||||
write_string_debug_loop(S, Index, !IO) :-
|
||||
( if string.index_next(S, Index, NextIndex, Char) then
|
||||
( if Char = '\ufffd' then
|
||||
string.unsafe_index_code_unit(S, Index, CodeUnit),
|
||||
write_hex(CodeUnit, !IO)
|
||||
else if is_surrogate(Char) then
|
||||
write_hex(char.to_int(Char), !IO)
|
||||
else
|
||||
io.write_char(Char, !IO)
|
||||
),
|
||||
io.write_char(' ', !IO),
|
||||
write_string_debug_loop(S, NextIndex, !IO)
|
||||
else
|
||||
true
|
||||
).
|
||||
|
||||
:- pred write_hex(int::in, io::di, io::uo) is det.
|
||||
|
||||
write_hex(I, !IO) :-
|
||||
io.format("%#x", [i(I)], !IO).
|
||||
Reference in New Issue
Block a user