mirror of
https://github.com/Mercury-Language/mercury.git
synced 2026-04-15 17:33:38 +00:00
Make string.replace_all with empty pattern preserve ill-formed sequences.
library/string.m:
Define behaviour of string.replace_all on ill-formed code unit
sequences when the pattern is empty.
Implement that behaviour.
Use better variable names in documentation of string.replace and
string.replace_all.
tests/general/string_replace.exp:
tests/general/string_replace.exp2:
tests/general/string_replace.m:
Extend test case.
Update code style.
This commit is contained in:
@@ -1069,19 +1069,26 @@
|
||||
:- func rstrip_pred(pred(char)::in(pred(in) is semidet), string::in)
|
||||
= (string::out) is det.
|
||||
|
||||
% replace(String0, Search, Replace, String):
|
||||
% replace(String0, Pattern, Subst, String):
|
||||
%
|
||||
% Replace replaces the first occurrence of Search in String0
|
||||
% with Replace to give String. It fails if Search does not occur
|
||||
% in String0.
|
||||
% Replaces the first occurrence of Pattern in String0 with Subst to give
|
||||
% String. Fails if Pattern does not occur in String0.
|
||||
%
|
||||
:- pred replace(string::in, string::in, string::in, string::uo) is semidet.
|
||||
|
||||
% replace_all(String0, Search, Replace, String):
|
||||
% replace_all(String0, Pattern, Subst, String):
|
||||
%
|
||||
% Replaces any occurrences of Search in String0 with Replace to give
|
||||
% Replaces any occurrences of Pattern in String0 with Subst to give
|
||||
% String.
|
||||
%
|
||||
% If Pattern is the empty string then Subst is inserted at every point
|
||||
% in String0 except between two code units in an encoding of a code point.
|
||||
% For example, these are true:
|
||||
%
|
||||
% replace_all("", "", "|", "|")
|
||||
% replace_all("a", "", "|", "|a|")
|
||||
% replace_all("ab", "", "|", "|a|b|")
|
||||
%
|
||||
:- func replace_all(string::in, string::in, string::in) = (string::uo) is det.
|
||||
:- pred replace_all(string::in, string::in, string::in, string::uo) is det.
|
||||
|
||||
@@ -4718,10 +4725,7 @@ replace_all(S1, S2, S3) = S4 :-
|
||||
|
||||
replace_all(Str, Pat, Subst, Result) :-
|
||||
( if Pat = "" then
|
||||
% XXX ILSEQ foldl cannot handle ill-formed sequences.
|
||||
F = (func(C, L) = [char_to_string(C) ++ Subst | L]),
|
||||
Foldl = foldl(F, Str, []),
|
||||
Result = append_list([Subst | list.reverse(Foldl)])
|
||||
replace_all_empty_pat(Str, Subst, Result)
|
||||
else
|
||||
PatLength = length(Pat),
|
||||
replace_all_loop(Str, Pat, Subst, PatLength, 0, [], ReversedChunks),
|
||||
@@ -4729,6 +4733,50 @@ replace_all(Str, Pat, Subst, Result) :-
|
||||
Result = append_list(Chunks)
|
||||
).
|
||||
|
||||
:- pred replace_all_empty_pat(string::in, string::in, string::uo) is det.
|
||||
|
||||
replace_all_empty_pat(Str, Subst, Result) :-
|
||||
% This implementation is not the most efficient, but it is not expected
|
||||
% to be used much in practice.
|
||||
to_code_unit_list(Subst, SubstCodes),
|
||||
Codes0 = SubstCodes,
|
||||
replace_all_empty_pat_loop(Str, SubstCodes, length(Str), Codes0, Codes),
|
||||
( if from_code_unit_list_allow_ill_formed(Codes, ResultPrime) then
|
||||
Result = ResultPrime
|
||||
else
|
||||
unexpected($pred, "string.from_code_unit_list_allow_ill_formed failed")
|
||||
).
|
||||
|
||||
:- pred replace_all_empty_pat_loop(string::in, list(int)::in, int::in,
|
||||
list(int)::in, list(int)::out) is det.
|
||||
|
||||
replace_all_empty_pat_loop(Str, Subst, Index, Codes0, Codes) :-
|
||||
( if unsafe_prev_index(Str, Index, PrevIndex, Char) then
|
||||
char.to_int(Char, CharInt),
|
||||
( if CharInt =< 0x7f then
|
||||
% Fast path for single code unit code points.
|
||||
Codes1 = [CharInt | Codes0]
|
||||
else
|
||||
prepend_code_units(Str, PrevIndex, Index - 1, Codes0, Codes1)
|
||||
),
|
||||
Codes2 = Subst ++ Codes1,
|
||||
replace_all_empty_pat_loop(Str, Subst, PrevIndex, Codes2, Codes)
|
||||
else
|
||||
Codes = Codes0
|
||||
).
|
||||
|
||||
:- pred prepend_code_units(string::in, int::in, int::in,
|
||||
list(int)::in, list(int)::out) is det.
|
||||
|
||||
prepend_code_units(Str, FirstIndex, Index, Codes0, Codes) :-
|
||||
unsafe_index_code_unit(Str, Index, Code),
|
||||
Codes1 = [Code | Codes0],
|
||||
( if Index = FirstIndex then
|
||||
Codes = Codes1
|
||||
else
|
||||
prepend_code_units(Str, FirstIndex, Index - 1, Codes1, Codes)
|
||||
).
|
||||
|
||||
:- pred replace_all_loop(string::in, string::in, string::in,
|
||||
int::in, int::in, list(string)::in, list(string)::out) is det.
|
||||
|
||||
|
||||
@@ -8,6 +8,8 @@ string__replace("aaa bbbb ccccc aaa", "", "**", Result)
|
||||
"**aaa bbbb ccccc aaa"
|
||||
string__replace("aßξ啕ßξ啕𐀀.", "", "**", Result)
|
||||
"**aßξ啕ßξ啕𐀀."
|
||||
string__replace("[0x9f][0x98][0x80]😀", "", "**", Result)
|
||||
"**[0x9f][0x98][0x80]😀"
|
||||
string__replace("aaa bbbb ccccc aaa", "aaa", "", Result)
|
||||
" bbbb ccccc aaa"
|
||||
string__replace("aaa bbbb ccccc aaa", "cc", "**", Result)
|
||||
@@ -28,6 +30,8 @@ string__replace_all("aaa bbbb ccccc aaa", "", "**", Result)
|
||||
"**a**a**a** **b**b**b**b** **c**c**c**c**c** **a**a**a**"
|
||||
string__replace_all("aßξ啕ßξ啕𐀀.", "", "**", Result)
|
||||
"**a**ß**ξ**啕**ß**ξ**啕**𐀀**.**"
|
||||
string__replace_all("[0x9f][0x98][0x80]😀", "", "**", Result)
|
||||
"**[0x9f]**[0x98]**[0x80]**😀**"
|
||||
string__replace_all("aaa bbbb ccccc aaa", "aaa", "", Result)
|
||||
" bbbb ccccc "
|
||||
string__replace_all("aaa bbbb ccccc aaa", "cc", "**", Result)
|
||||
|
||||
44
tests/general/string_replace.exp2
Normal file
44
tests/general/string_replace.exp2
Normal file
@@ -0,0 +1,44 @@
|
||||
string__replace("", "a", "bc", Result)
|
||||
FAIL!
|
||||
string__replace("aaa bbbb ccccc aaa", "aab", "**", Result)
|
||||
FAIL!
|
||||
string__replace("aaa bbbb ccccc aaa", "aaaa", "**", Result)
|
||||
FAIL!
|
||||
string__replace("aaa bbbb ccccc aaa", "", "**", Result)
|
||||
"**aaa bbbb ccccc aaa"
|
||||
string__replace("aßξ啕ßξ啕𐀀.", "", "**", Result)
|
||||
"**aßξ啕ßξ啕𐀀."
|
||||
string__replace("[0xde00]😀", "", "**", Result)
|
||||
"**[0xde00]😀"
|
||||
string__replace("aaa bbbb ccccc aaa", "aaa", "", Result)
|
||||
" bbbb ccccc aaa"
|
||||
string__replace("aaa bbbb ccccc aaa", "cc", "**", Result)
|
||||
"aaa bbbb **ccc aaa"
|
||||
string__replace("aßξ啕ßξ啕𐀀.", "ßξ", "**", Result)
|
||||
"a**啕ßξ啕𐀀."
|
||||
string__replace("aßξ啕ßξ啕𐀀.", "ßξ", "★★", Result)
|
||||
"a★★啕ßξ啕𐀀."
|
||||
string__replace("aßξ啕ßξ啕𐀀.", "啕ßξ", "***", Result)
|
||||
"aßξ***啕𐀀."
|
||||
string__replace_all("", "a", "bc", Result)
|
||||
""
|
||||
string__replace_all("aaa bbbb ccccc aaa", "aab", "**", Result)
|
||||
"aaa bbbb ccccc aaa"
|
||||
string__replace_all("aaa bbbb ccccc aaa", "aaaa", "**", Result)
|
||||
"aaa bbbb ccccc aaa"
|
||||
string__replace_all("aaa bbbb ccccc aaa", "", "**", Result)
|
||||
"**a**a**a** **b**b**b**b** **c**c**c**c**c** **a**a**a**"
|
||||
string__replace_all("aßξ啕ßξ啕𐀀.", "", "**", Result)
|
||||
"**a**ß**ξ**啕**ß**ξ**啕**𐀀**.**"
|
||||
string__replace_all("[0xde00]😀", "", "**", Result)
|
||||
"**[0xde00]**😀**"
|
||||
string__replace_all("aaa bbbb ccccc aaa", "aaa", "", Result)
|
||||
" bbbb ccccc "
|
||||
string__replace_all("aaa bbbb ccccc aaa", "cc", "**", Result)
|
||||
"aaa bbbb ****c aaa"
|
||||
string__replace_all("aßξ啕ßξ啕𐀀.", "ßξ", "**", Result)
|
||||
"a**啕**啕𐀀."
|
||||
string__replace_all("aßξ啕ßξ啕𐀀.", "ßξ", "★★", Result)
|
||||
"a★★啕★★啕𐀀."
|
||||
string__replace_all("aßξ啕ßξ啕𐀀.", "啕ßξ", "***", Result)
|
||||
"aßξ***啕𐀀."
|
||||
@@ -1,6 +1,11 @@
|
||||
%---------------------------------------------------------------------------%
|
||||
% vim: ts=4 sw=4 et ft=mercury
|
||||
%---------------------------------------------------------------------------%
|
||||
%
|
||||
% The .exp file is for backends using UTF-8 string encoding.
|
||||
% The .exp2 file is for backends using UTF-16 string encoding.
|
||||
%
|
||||
%---------------------------------------------------------------------------%
|
||||
|
||||
:- module string_replace.
|
||||
|
||||
@@ -14,6 +19,8 @@
|
||||
|
||||
:- implementation.
|
||||
|
||||
:- import_module char.
|
||||
:- import_module int.
|
||||
:- import_module list.
|
||||
:- import_module string.
|
||||
|
||||
@@ -22,6 +29,8 @@
|
||||
main(!IO) :-
|
||||
Str = "aaa bbbb ccccc aaa",
|
||||
Str2 = "aßξ啕ßξ啕𐀀.",
|
||||
Smiley = "😀",
|
||||
Str3 = between(Smiley, 1, length(Smiley)) ++ Smiley,
|
||||
Tests = [
|
||||
% pattern not in string
|
||||
{"", "a", "bc"},
|
||||
@@ -31,6 +40,7 @@ main(!IO) :-
|
||||
% pattern is empty string
|
||||
{Str, "", "**"},
|
||||
{Str2, "", "**"},
|
||||
{Str3, "", "**"},
|
||||
|
||||
% pattern in string
|
||||
{Str, "aaa", ""},
|
||||
@@ -39,31 +49,58 @@ main(!IO) :-
|
||||
{Str2, "ßξ", "★★"},
|
||||
{Str2, "啕ßξ", "***"}
|
||||
],
|
||||
list__foldl(test_replace, Tests, !IO),
|
||||
list__foldl(test_replace_all, Tests, !IO).
|
||||
list.foldl(test_replace, Tests, !IO),
|
||||
list.foldl(test_replace_all, Tests, !IO).
|
||||
|
||||
:- pred test_replace({string, string, string}::in, io::di, io::uo) is det.
|
||||
|
||||
test_replace({Str, Pat, Subst}, !IO) :-
|
||||
io__write_string("string__replace(\"" ++ Str ++
|
||||
"\", \"" ++ Pat ++
|
||||
"\", \"" ++ Subst ++ "\", Result) \n\t", !IO),
|
||||
( string__replace(Str, Pat, Subst, Result) ->
|
||||
io__write(Result, !IO),
|
||||
io__nl(!IO)
|
||||
;
|
||||
io__write_string("FAIL!\n", !IO)
|
||||
io.write_string("string__replace(\"", !IO),
|
||||
write_string_debug(Str, !IO),
|
||||
io.write_string("\", \"", !IO),
|
||||
io.write_string(Pat, !IO),
|
||||
io.write_string("\", \"", !IO),
|
||||
io.write_string(Subst, !IO),
|
||||
io.write_string("\", Result) \n\t", !IO),
|
||||
( if string.replace(Str, Pat, Subst, Result) then
|
||||
io.write_string("\"", !IO),
|
||||
write_string_debug(Result, !IO),
|
||||
io.write_string("\"\n", !IO)
|
||||
else
|
||||
io.write_string("FAIL!\n", !IO)
|
||||
).
|
||||
|
||||
:- pred test_replace_all({string, string, string}::in, io::di, io::uo) is det.
|
||||
|
||||
test_replace_all({Str, Pat, Subst}, !IO) :-
|
||||
io__write_string("string__replace_all(\"" ++ Str ++
|
||||
"\", \"" ++ Pat ++
|
||||
"\", \"" ++ Subst ++ "\", Result) \n\t", !IO),
|
||||
string__replace_all(Str, Pat, Subst, Result),
|
||||
io__write(Result, !IO),
|
||||
io__nl(!IO).
|
||||
io.write_string("string__replace_all(\"", !IO),
|
||||
write_string_debug(Str, !IO),
|
||||
io.write_string("\", \"", !IO),
|
||||
io.write_string(Pat, !IO),
|
||||
io.write_string("\", \"", !IO),
|
||||
io.write_string(Subst, !IO),
|
||||
io.write_string("\", Result) \n\t", !IO),
|
||||
io.write_string("\"", !IO),
|
||||
string.replace_all(Str, Pat, Subst, Result),
|
||||
write_string_debug(Result, !IO),
|
||||
io.write_string("\"\n", !IO).
|
||||
|
||||
%---------------------------------------------------------------------------%
|
||||
%---------------------------------------------------------------------------%
|
||||
:- pred write_string_debug(string::in, io::di, io::uo) is det.
|
||||
|
||||
write_string_debug(S, !IO) :-
|
||||
write_string_debug_loop(S, 0, !IO).
|
||||
|
||||
:- pred write_string_debug_loop(string::in, int::in, io::di, io::uo) is det.
|
||||
|
||||
write_string_debug_loop(S, Index, !IO) :-
|
||||
( if string.index_next(S, Index, NextIndex, Char) then
|
||||
( if char.is_surrogate(Char) ; Char = '\ufffd' then
|
||||
unsafe_index_code_unit(S, Index, Code),
|
||||
io.format("[%#x]", [i(Code)], !IO)
|
||||
else
|
||||
io.write_char(Char, !IO)
|
||||
),
|
||||
write_string_debug_loop(S, NextIndex, !IO)
|
||||
else
|
||||
true
|
||||
).
|
||||
|
||||
Reference in New Issue
Block a user