<div dir="ltr">This looks fine.</div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Thu, Nov 7, 2019 at 10:44 AM Peter Wang <<a href="mailto:novalazy@gmail.com">novalazy@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">library/string.m:<br>
Define behaviour of string.replace_all on ill-formed code unit<br>
sequences when the pattern is empty.<br>
<br>
Implement that behaviour.<br>
<br>
tests/general/string_replace.exp:<br>
tests/general/string_replace.exp2:<br>
tests/general/string_replace.m:<br>
Extend test case.<br>
<br>
Update code style.<br>
---<br>
library/string.m | 61 ++++++++++++++++++++++---<br>
tests/general/string_replace.exp | 4 ++<br>
tests/general/string_replace.exp2 | 44 ++++++++++++++++++<br>
tests/general/string_replace.m | 75 +++++++++++++++++++++++--------<br>
4 files changed, 159 insertions(+), 25 deletions(-)<br>
create mode 100644 tests/general/string_replace.exp2<br>
<br>
diff --git a/library/string.m b/library/string.m<br>
index c65e857f8..7d6dda761 100644<br>
--- a/library/string.m<br>
+++ b/library/string.m<br>
@@ -1077,11 +1077,19 @@<br>
%<br>
:- pred replace(string::in, string::in, string::in, string::uo) is semidet.<br>
<br>
- % replace_all(String0, Search, Replace, String):<br>
+ % replace_all(String0, Pattern, Subst, String):<br>
%<br>
- % Replaces any occurrences of Search in String0 with Replace to give<br>
+ % Replaces any occurrences of Pattern in String0 with Subst to give<br>
% String.<br>
%<br>
+ % If Pattern is the empty string then Subst is inserted at every point<br>
+ % in String0 except between two code units in an encoding of a code point.<br>
+ % For example, these are true:<br>
+ %<br>
+ % replace_all("", "", "|", "|")<br>
+ % replace_all("a", "", "|", "|a|")<br>
+ % replace_all("ab", "", "|", "|a|b|")<br>
+ %<br>
:- func replace_all(string::in, string::in, string::in) = (string::uo) is det.<br>
:- pred replace_all(string::in, string::in, string::in, string::uo) is det.<br>
<br>
@@ -4718,10 +4726,7 @@ replace_all(S1, S2, S3) = S4 :-<br>
<br>
replace_all(Str, Pat, Subst, Result) :-<br>
( if Pat = "" then<br>
- % XXX ILSEQ foldl cannot handle ill-formed sequences.<br>
- F = (func(C, L) = [char_to_string(C) ++ Subst | L]),<br>
- Foldl = foldl(F, Str, []),<br>
- Result = append_list([Subst | list.reverse(Foldl)])<br>
+ replace_all_empty_pat(Str, Subst, Result)<br>
else<br>
PatLength = length(Pat),<br>
replace_all_loop(Str, Pat, Subst, PatLength, 0, [], ReversedChunks),<br>
@@ -4729,6 +4734,50 @@ replace_all(Str, Pat, Subst, Result) :-<br>
Result = append_list(Chunks)<br>
).<br>
<br>
+:- pred replace_all_empty_pat(string::in, string::in, string::uo) is det.<br>
+<br>
+replace_all_empty_pat(Str, Subst, Result) :-<br>
+ % This implementation is not the most efficient, but it is not expected<br>
+ % to be used much in practice.<br>
+ to_code_unit_list(Subst, SubstCodes),<br>
+ Codes0 = SubstCodes,<br>
+ replace_all_empty_pat_loop(Str, SubstCodes, length(Str), Codes0, Codes),<br>
+ ( if from_code_unit_list_allow_ill_formed(Codes, ResultPrime) then<br>
+ Result = ResultPrime<br>
+ else<br>
+ unexpected($pred, "string.from_code_unit_list_allow_ill_formed failed")<br>
+ ).<br>
+<br>
+:- pred replace_all_empty_pat_loop(string::in, list(int)::in, int::in,<br>
+ list(int)::in, list(int)::out) is det.<br>
+<br>
+replace_all_empty_pat_loop(Str, Subst, Index, Codes0, Codes) :-<br>
+ ( if unsafe_prev_index(Str, Index, PrevIndex, Char) then<br>
+ char.to_int(Char, CharInt),<br>
+ ( if CharInt =< 0x7f then<br>
+ % Fast path for single code unit code points.<br>
+ Codes1 = [CharInt | Codes0]<br>
+ else<br>
+ prepend_code_units(Str, PrevIndex, Index - 1, Codes0, Codes1)<br>
+ ),<br>
+ Codes2 = Subst ++ Codes1,<br>
+ replace_all_empty_pat_loop(Str, Subst, PrevIndex, Codes2, Codes)<br>
+ else<br>
+ Codes = Codes0<br>
+ ).<br>
+<br>
+:- pred prepend_code_units(string::in, int::in, int::in,<br>
+ list(int)::in, list(int)::out) is det.<br>
+<br>
+prepend_code_units(Str, FirstIndex, Index, Codes0, Codes) :-<br>
+ unsafe_index_code_unit(Str, Index, Code),<br>
+ Codes1 = [Code | Codes0],<br>
+ ( if Index = FirstIndex then<br>
+ Codes = Codes1<br>
+ else<br>
+ prepend_code_units(Str, FirstIndex, Index - 1, Codes1, Codes)<br>
+ ).<br>
+<br>
:- pred replace_all_loop(string::in, string::in, string::in,<br>
int::in, int::in, list(string)::in, list(string)::out) is det.<br>
<br>
diff --git a/tests/general/string_replace.exp b/tests/general/string_replace.exp<br>
index eab49079f..d633a7822 100644<br>
--- a/tests/general/string_replace.exp<br>
+++ b/tests/general/string_replace.exp<br>
@@ -8,6 +8,8 @@ string__replace("aaa bbbb ccccc aaa", "", "**", Result)<br>
"**aaa bbbb ccccc aaa"<br>
string__replace("aßξ啕ßξ啕𐀀.", "", "**", Result) <br>
"**aßξ啕ßξ啕𐀀."<br>
+string__replace("[0x9f][0x98][0x80]😀", "", "**", Result) <br>
+ "**[0x9f][0x98][0x80]😀"<br>
string__replace("aaa bbbb ccccc aaa", "aaa", "", Result) <br>
" bbbb ccccc aaa"<br>
string__replace("aaa bbbb ccccc aaa", "cc", "**", Result) <br>
@@ -28,6 +30,8 @@ string__replace_all("aaa bbbb ccccc aaa", "", "**", Result)<br>
"**a**a**a** **b**b**b**b** **c**c**c**c**c** **a**a**a**"<br>
string__replace_all("aßξ啕ßξ啕𐀀.", "", "**", Result) <br>
"**a**ß**ξ**啕**ß**ξ**啕**𐀀**.**"<br>
+string__replace_all("[0x9f][0x98][0x80]😀", "", "**", Result) <br>
+ "**[0x9f]**[0x98]**[0x80]**😀**"<br>
string__replace_all("aaa bbbb ccccc aaa", "aaa", "", Result) <br>
" bbbb ccccc "<br>
string__replace_all("aaa bbbb ccccc aaa", "cc", "**", Result) <br>
diff --git a/tests/general/string_replace.exp2 b/tests/general/string_replace.exp2<br>
new file mode 100644<br>
index 000000000..082aa9237<br>
--- /dev/null<br>
+++ b/tests/general/string_replace.exp2<br>
@@ -0,0 +1,44 @@<br>
+string__replace("", "a", "bc", Result) <br>
+ FAIL!<br>
+string__replace("aaa bbbb ccccc aaa", "aab", "**", Result) <br>
+ FAIL!<br>
+string__replace("aaa bbbb ccccc aaa", "aaaa", "**", Result) <br>
+ FAIL!<br>
+string__replace("aaa bbbb ccccc aaa", "", "**", Result) <br>
+ "**aaa bbbb ccccc aaa"<br>
+string__replace("aßξ啕ßξ啕𐀀.", "", "**", Result) <br>
+ "**aßξ啕ßξ啕𐀀."<br>
+string__replace("[0xde00]😀", "", "**", Result) <br>
+ "**[0xde00]😀"<br>
+string__replace("aaa bbbb ccccc aaa", "aaa", "", Result) <br>
+ " bbbb ccccc aaa"<br>
+string__replace("aaa bbbb ccccc aaa", "cc", "**", Result) <br>
+ "aaa bbbb **ccc aaa"<br>
+string__replace("aßξ啕ßξ啕𐀀.", "ßξ", "**", Result) <br>
+ "a**啕ßξ啕𐀀."<br>
+string__replace("aßξ啕ßξ啕𐀀.", "ßξ", "★★", Result) <br>
+ "a★★啕ßξ啕𐀀."<br>
+string__replace("aßξ啕ßξ啕𐀀.", "啕ßξ", "***", Result) <br>
+ "aßξ***啕𐀀."<br>
+string__replace_all("", "a", "bc", Result) <br>
+ ""<br>
+string__replace_all("aaa bbbb ccccc aaa", "aab", "**", Result) <br>
+ "aaa bbbb ccccc aaa"<br>
+string__replace_all("aaa bbbb ccccc aaa", "aaaa", "**", Result) <br>
+ "aaa bbbb ccccc aaa"<br>
+string__replace_all("aaa bbbb ccccc aaa", "", "**", Result) <br>
+ "**a**a**a** **b**b**b**b** **c**c**c**c**c** **a**a**a**"<br>
+string__replace_all("aßξ啕ßξ啕𐀀.", "", "**", Result) <br>
+ "**a**ß**ξ**啕**ß**ξ**啕**𐀀**.**"<br>
+string__replace_all("[0xde00]😀", "", "**", Result) <br>
+ "**[0xde00]**😀**"<br>
+string__replace_all("aaa bbbb ccccc aaa", "aaa", "", Result) <br>
+ " bbbb ccccc "<br>
+string__replace_all("aaa bbbb ccccc aaa", "cc", "**", Result) <br>
+ "aaa bbbb ****c aaa"<br>
+string__replace_all("aßξ啕ßξ啕𐀀.", "ßξ", "**", Result) <br>
+ "a**啕**啕𐀀."<br>
+string__replace_all("aßξ啕ßξ啕𐀀.", "ßξ", "★★", Result) <br>
+ "a★★啕★★啕𐀀."<br>
+string__replace_all("aßξ啕ßξ啕𐀀.", "啕ßξ", "***", Result) <br>
+ "aßξ***啕𐀀."<br>
diff --git a/tests/general/string_replace.m b/tests/general/string_replace.m<br>
index 2b905fe90..3dcfa2ae8 100644<br>
--- a/tests/general/string_replace.m<br>
+++ b/tests/general/string_replace.m<br>
@@ -1,6 +1,11 @@<br>
%---------------------------------------------------------------------------%<br>
% vim: ts=4 sw=4 et ft=mercury<br>
%---------------------------------------------------------------------------%<br>
+%<br>
+% The .exp file is for backends using UTF-8 string encoding.<br>
+% The .exp2 file is for backends using UTF-16 string encoding.<br>
+%<br>
+%---------------------------------------------------------------------------%<br>
<br>
:- module string_replace.<br>
<br>
@@ -14,6 +19,8 @@<br>
<br>
:- implementation.<br>
<br>
+:- import_module char.<br>
+:- import_module int.<br>
:- import_module list.<br>
:- import_module string.<br>
<br>
@@ -22,6 +29,8 @@<br>
main(!IO) :-<br>
Str = "aaa bbbb ccccc aaa",<br>
Str2 = "aßξ啕ßξ啕𐀀.",<br>
+ Smiley = "😀",<br>
+ Str3 = between(Smiley, 1, length(Smiley)) ++ Smiley,<br>
Tests = [<br>
% pattern not in string<br>
{"", "a", "bc"},<br>
@@ -31,6 +40,7 @@ main(!IO) :-<br>
% pattern is empty string<br>
{Str, "", "**"},<br>
{Str2, "", "**"},<br>
+ {Str3, "", "**"},<br>
<br>
% pattern in string<br>
{Str, "aaa", ""},<br>
@@ -39,31 +49,58 @@ main(!IO) :-<br>
{Str2, "ßξ", "★★"},<br>
{Str2, "啕ßξ", "***"}<br>
],<br>
- list__foldl(test_replace, Tests, !IO),<br>
- list__foldl(test_replace_all, Tests, !IO).<br>
+ list.foldl(test_replace, Tests, !IO),<br>
+ list.foldl(test_replace_all, Tests, !IO).<br>
<br>
:- pred test_replace({string, string, string}::in, io::di, io::uo) is det.<br>
<br>
test_replace({Str, Pat, Subst}, !IO) :-<br>
- io__write_string("string__replace(\"" ++ Str ++<br>
- "\", \"" ++ Pat ++<br>
- "\", \"" ++ Subst ++ "\", Result) \n\t", !IO),<br>
- ( string__replace(Str, Pat, Subst, Result) -><br>
- io__write(Result, !IO),<br>
- io__nl(!IO)<br>
- ;<br>
- io__write_string("FAIL!\n", !IO)<br>
+ io.write_string("string__replace(\"", !IO),<br>
+ write_string_debug(Str, !IO),<br>
+ io.write_string("\", \"", !IO),<br>
+ io.write_string(Pat, !IO),<br>
+ io.write_string("\", \"", !IO),<br>
+ io.write_string(Subst, !IO),<br>
+ io.write_string("\", Result) \n\t", !IO),<br>
+ ( if string.replace(Str, Pat, Subst, Result) then<br>
+ io.write_string("\"", !IO),<br>
+ write_string_debug(Result, !IO),<br>
+ io.write_string("\"\n", !IO)<br>
+ else<br>
+ io.write_string("FAIL!\n", !IO)<br>
).<br>
<br>
:- pred test_replace_all({string, string, string}::in, io::di, io::uo) is det.<br>
<br>
test_replace_all({Str, Pat, Subst}, !IO) :-<br>
- io__write_string("string__replace_all(\"" ++ Str ++<br>
- "\", \"" ++ Pat ++<br>
- "\", \"" ++ Subst ++ "\", Result) \n\t", !IO),<br>
- string__replace_all(Str, Pat, Subst, Result),<br>
- io__write(Result, !IO),<br>
- io__nl(!IO).<br>
-<br>
-%---------------------------------------------------------------------------%<br>
-%---------------------------------------------------------------------------%<br>
+ io.write_string("string__replace_all(\"", !IO),<br>
+ write_string_debug(Str, !IO),<br>
+ io.write_string("\", \"", !IO),<br>
+ io.write_string(Pat, !IO),<br>
+ io.write_string("\", \"", !IO),<br>
+ io.write_string(Subst, !IO),<br>
+ io.write_string("\", Result) \n\t", !IO),<br>
+ io.write_string("\"", !IO),<br>
+ string.replace_all(Str, Pat, Subst, Result),<br>
+ write_string_debug(Result, !IO),<br>
+ io.write_string("\"\n", !IO).<br>
+<br>
+:- pred write_string_debug(string::in, io::di, io::uo) is det.<br>
+<br>
+write_string_debug(S, !IO) :-<br>
+ write_string_debug_loop(S, 0, !IO).<br>
+<br>
+:- pred write_string_debug_loop(string::in, int::in, io::di, io::uo) is det.<br>
+<br>
+write_string_debug_loop(S, Index, !IO) :-<br>
+ ( if string.index_next(S, Index, NextIndex, Char) then<br>
+ ( if char.is_surrogate(Char) ; Char = '\ufffd' then<br>
+ unsafe_index_code_unit(S, Index, Code),<br>
+ io.format("[%#x]", [i(Code)], !IO)<br>
+ else<br>
+ io.write_char(Char, !IO)<br>
+ ),<br>
+ write_string_debug_loop(S, NextIndex, !IO)<br>
+ else<br>
+ true<br>
+ ).<br>
-- <br>
2.23.0<br>
<br>
_______________________________________________<br>
reviews mailing list<br>
<a href="mailto:reviews@lists.mercurylang.org" target="_blank">reviews@lists.mercurylang.org</a><br>
<a href="https://lists.mercurylang.org/listinfo/reviews" rel="noreferrer" target="_blank">https://lists.mercurylang.org/listinfo/reviews</a><br>
</blockquote></div>