[m-rev.] for review: Add string indexing predicates that indicate a code unit was replaced.

Peter Wang novalazy at gmail.com
Thu Nov 14 16:56:09 AEDT 2019


library/string.m:
    Add index_next_repl, unsafe_index_next_repl, prev_index_repl,
    unsafe_prev_index_repl predicates that return an indication if a
    replacement character was returned because an ill-formed code unit
    sequence was encountered.

    Add more pragma inlines for indexing predicates.

    Remove may_not_duplicate attribute on the Erlang version of
    unsafe_prev_index_repl, which would conflict with the pragma inline
    declaration. This requires the helper function do_unsafe_prev_index
    to be exported.

tests/hard_coded/string_append_ooi_ilseq.m:
tests/hard_coded/string_set_char_ilseq.m:
    Use index_next_repl in test cases.

NEWS:
    Announce additions.
---
 NEWS                                       |   4 +
 library/string.m                           | 236 ++++++++++++---------
 tests/hard_coded/string_append_ooi_ilseq.m |  20 +-
 tests/hard_coded/string_set_char_ilseq.m   |  18 +-
 4 files changed, 169 insertions(+), 109 deletions(-)

diff --git a/NEWS b/NEWS
index ef22ba3fd..f41c9fb8e 100644
--- a/NEWS
+++ b/NEWS
@@ -434,6 +434,10 @@ Changes to the Mercury standard library:
    - append_string_pieces/2
    - unsafe_append_string_pieces/2
    - unsafe_sub_string_search_start/4
+   - index_next_repl/5
+   - unsafe_index_next_repl/5
+   - prev_index_repl/5
+   - unsafe_prev_index_repl/5
 
   The following procedures in the string module have been deprecated:
 
diff --git a/library/string.m b/library/string.m
index a9fbfd693..a28c33792 100644
--- a/library/string.m
+++ b/library/string.m
@@ -262,6 +262,10 @@
 % Reading characters from strings.
 %
 
+:- type maybe_replaced
+    --->    not_replaced
+    ;       replaced_code_unit(uint8).
+
     % index(String, Index, Char):
     %
     % If `Index' is the initial code unit offset of a well-formed code unit
@@ -324,6 +328,17 @@
     %
 :- pred index_next(string::in, int::in, int::out, char::uo) is semidet.
 
+    % index_next_repl(String, Index, NextIndex, Char, MaybeReplaced):
+    %
+    % Like index_next/4 but `MaybeReplaced' is `replaced_code_unit(CodeUnit)'
+    % iff `Char' is U+FFFD but `String' does NOT contain an encoding of U+FFFD
+    % beginning at `Index'; `CodeUnit' is the code unit at `Index'.
+    % (`MaybeReplaced' is always `not_replaced' when strings are UTF-16
+    % encoded.)
+    %
+:- pred index_next_repl(string::in, int::in, int::out, char::uo,
+    maybe_replaced::out) is semidet.
+
     % unsafe_index_next(String, Index, NextIndex, Char):
     %
     % Like index_next/4 but does not check that `Index' is in range.
@@ -334,6 +349,17 @@
     %
 :- pred unsafe_index_next(string::in, int::in, int::out, char::uo) is semidet.
 
+    % unsafe_index_next_repl(String, Index, NextIndex, Char, MaybeReplaced):
+    %
+    % Like index_next_repl/5 but does not check that `Index' is in range.
+    % Fails if `Index' is equal to the length of `String'.
+    %
+    % WARNING: behavior is UNDEFINED if `Index' is out of range
+    % (negative, or greater than the length of `String').
+    %
+:- pred unsafe_index_next_repl(string::in, int::in, int::out, char::uo,
+    maybe_replaced::out) is semidet.
+
     % prev_index(String, Index, PrevIndex, Char):
     %
     % If `Index - 1' is the final code unit offset of a well-formed sequence in
@@ -350,6 +376,17 @@
     %
 :- pred prev_index(string::in, int::in, int::out, char::uo) is semidet.
 
+    % prev_index_repl(String, Index, PrevIndex, Char, MaybeReplaced):
+    %
+    % Like prev_index/4 but `MaybeReplaced' is `replaced_code_unit(CodeUnit)'
+    % iff `Char' is U+FFFD but `String' does NOT contain an encoding of U+FFFD
+    % ending at `Index - 1'; `CodeUnit' is the code unit at `Index - 1'.
+    % (`MaybeReplaced' is always `not_replaced' when strings are UTF-16
+    % encoded.)
+    %
+:- pred prev_index_repl(string::in, int::in, int::out, char::uo,
+    maybe_replaced::out) is semidet.
+
     % unsafe_prev_index(String, Index, PrevIndex, Char):
     %
     % Like prev_index/4 but does not check that `Index' is in range.
@@ -360,6 +397,17 @@
     %
 :- pred unsafe_prev_index(string::in, int::in, int::out, char::uo) is semidet.
 
+    % unsafe_prev_index_repl(String, Index, PrevIndex, Char, MaybeReplaced):
+    %
+    % Like prev_index_repl/5 but does not check that `Index' is in range.
+    % Fails if `Index' is zero.
+    %
+    % WARNING: behavior is UNDEFINED if `Index' is out of range
+    % (negative, or greater than the length of `String').
+    %
+:- pred unsafe_prev_index_repl(string::in, int::in, int::out, char::uo,
+    maybe_replaced::out) is semidet.
+
     % unsafe_index_code_unit(String, Index, CodeUnit):
     %
     % `CodeUnit' is the code unit in `String' at the offset `Index'.
@@ -1578,6 +1626,7 @@
 :- import_module string.format.
 :- import_module string.to_string.
 :- import_module term_io.
+:- import_module uint8.
 
 % Many routines in this module are implemented using foreign language code.
 
@@ -1989,13 +2038,13 @@ to_utf16_code_unit_list(String, CodeList) :-
 
 utf8_to_utf16_code_units_loop(String, Index, CodeList0, CodeList) :-
     ( if
-        unsafe_prev_index_repl(String, Index, PrevIndex, Char, IsReplaced)
+        unsafe_prev_index_repl(String, Index, PrevIndex, Char, MaybeReplaced)
     then
         (
-            IsReplaced = yes,
+            MaybeReplaced = replaced_code_unit(_),
             unexpected($pred, "ill-formed code unit sequence")
         ;
-            IsReplaced = no,
+            MaybeReplaced = not_replaced,
             ( if char.to_utf16(Char, CharCodes) then
                 CodeList1 = CharCodes ++ CodeList0
             else
@@ -2282,17 +2331,18 @@ duplicate_char(Char, Count, String) :-
 % so that the compiler can do loop invariant hoisting on calls to them
 % that occur in loops.
 
-% XXX ILSEQ
-% We should allow the possibility of working with strings containing ill-formed
-% sequences. That would require predicates that can return either a code point
-% when possible, or else code units from ill-formed sequences.
-
 :- pragma inline(index/3).
 :- pragma inline(det_index/3).
 :- pragma inline(index_next/4).
 :- pragma inline(index_next_repl/5).
+:- pragma inline(unsafe_index_next/4).
+:- pragma inline(unsafe_index_next_repl/5).
+:- pragma inline(unsafe_index_next_repl_2/5).
 :- pragma inline(prev_index/4).
 :- pragma inline(prev_index_repl/5).
+:- pragma inline(unsafe_prev_index/4).
+:- pragma inline(unsafe_prev_index_repl/5).
+:- pragma inline(unsafe_prev_index_repl_2/5).
 
 index(Str, Index, Char) :-
     Len = length(Str),
@@ -2368,42 +2418,39 @@ String ^ unsafe_elem(Index) = unsafe_index(String, Index).
 %---------------------%
 
 index_next(Str, Index, NextIndex, Char) :-
-    index_next_repl(Str, Index, NextIndex, Char, _IsReplaced).
-
-unsafe_index_next(Str, Index, NextIndex, Ch) :-
-    unsafe_index_next_repl(Str, Index, NextIndex, Ch, _IsReplaced).
-
-    % XXX ILSEQ Export something like this.
-    % index_next_repl(String, Index, NextIndex, Char, IsReplaced):
-    %
-    % Like index_next/4 but `IsReplaced' is `yes' iff `Char' is U+FFFD but
-    % `String' does NOT contain an encoding of U+FFFD beginning at `Index'.
-    % (`IsReplaced' is always `no' when strings are UTF-16 encoded.)
-    %
-:- pred index_next_repl(string::in, int::in, int::out, char::uo, bool::out)
-    is semidet.
+    index_next_repl(Str, Index, NextIndex, Char, _MaybeReplaced).
 
-index_next_repl(Str, Index, NextIndex, Char, IsReplaced) :-
+index_next_repl(Str, Index, NextIndex, Char, MaybeReplaced) :-
     Len = length(Str),
     ( if index_check(Index, Len) then
-        unsafe_index_next_repl(Str, Index, NextIndex, Char, IsReplaced)
+        unsafe_index_next_repl(Str, Index, NextIndex, Char, MaybeReplaced)
     else
         fail
     ).
 
-    % XXX ILSEQ Export something like this.
-    %
-:- pred unsafe_index_next_repl(string::in, int::in, int::out, char::uo,
-    bool::out) is semidet.
+unsafe_index_next(Str, Index, NextIndex, Ch) :-
+    unsafe_index_next_repl_2(Str, Index, NextIndex, Ch, _ReplacedCodeUnit).
+
+unsafe_index_next_repl(Str, Index, NextIndex, Ch, MaybeReplaced) :-
+    unsafe_index_next_repl_2(Str, Index, NextIndex, Ch, ReplacedCodeUnit),
+    ( if ReplacedCodeUnit = -1 then
+        MaybeReplaced = not_replaced
+    else
+        CodeUnit = uint8.cast_from_int(ReplacedCodeUnit),
+        MaybeReplaced = replaced_code_unit(CodeUnit)
+    ).
+
+:- pred unsafe_index_next_repl_2(string::in, int::in, int::out, char::uo,
+    int::out) is semidet.
 
 :- pragma foreign_proc("C",
-    unsafe_index_next_repl(Str::in, Index::in, NextIndex::out, Ch::uo,
-        IsReplaced::out),
+    unsafe_index_next_repl_2(Str::in, Index::in, NextIndex::out, Ch::uo,
+        ReplacedCodeUnit::out),
     [will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail,
         does_not_affect_liveness, no_sharing],
 "
     Ch = Str[Index];
-    IsReplaced = MR_FALSE;
+    ReplacedCodeUnit = -1;
     if (MR_is_ascii(Ch)) {
         NextIndex = Index + 1;
         SUCCESS_INDICATOR = (Ch != 0);
@@ -2412,19 +2459,19 @@ index_next_repl(Str, Index, NextIndex, Char, IsReplaced) :-
         Ch = MR_utf8_get_next_mb(Str, &NextIndex);
         if (Ch < 0) {
             Ch = 0xfffd;
-            IsReplaced = MR_TRUE;
+            ReplacedCodeUnit = Str[Index];
             NextIndex = Index + 1;
         }
         SUCCESS_INDICATOR = MR_TRUE;
     }
 ").
 :- pragma foreign_proc("C#",
-    unsafe_index_next_repl(Str::in, Index::in, NextIndex::out, Ch::uo,
-        IsReplaced::out),
+    unsafe_index_next_repl_2(Str::in, Index::in, NextIndex::out, Ch::uo,
+        ReplacedCodeUnit::out),
     [will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail,
         does_not_affect_liveness, no_sharing],
 "
-    IsReplaced = mr_bool.NO;
+    ReplacedCodeUnit = -1;
     try {
         Ch = System.Char.ConvertToUtf32(Str, Index);
         if (Ch <= 0xffff) {
@@ -2445,12 +2492,12 @@ index_next_repl(Str, Index, NextIndex, Char, IsReplaced) :-
     }
 ").
 :- pragma foreign_proc("Java",
-    unsafe_index_next_repl(Str::in, Index::in, NextIndex::out, Ch::uo,
-        IsReplaced::out),
+    unsafe_index_next_repl_2(Str::in, Index::in, NextIndex::out, Ch::uo,
+        ReplacedCodeUnit::out),
     [will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail,
         does_not_affect_liveness, no_sharing],
 "
-    IsReplaced = bool.NO;
+    ReplacedCodeUnit = -1;
     try {
         Ch = Str.codePointAt(Index);
         NextIndex = Index + java.lang.Character.charCount(Ch);
@@ -2462,8 +2509,8 @@ index_next_repl(Str, Index, NextIndex, Char, IsReplaced) :-
     }
 ").
 :- pragma foreign_proc("Erlang",
-    unsafe_index_next_repl(Str::in, Index::in, NextIndex::out, Ch::uo,
-        IsReplaced::out),
+    unsafe_index_next_repl_2(Str::in, Index::in, NextIndex::out, Ch::uo,
+        ReplacedCodeUnit::out),
     [will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail,
         does_not_affect_liveness, no_sharing],
 "
@@ -2480,11 +2527,11 @@ index_next_repl(Str, Index, NextIndex, Char, IsReplaced) :-
                 true ->
                     NextIndex = Index + 4
             end,
-            IsReplaced = {no},
+            ReplacedCodeUnit = -1,
             SUCCESS_INDICATOR = true;
         _ ->
             Ch = -1,
-            IsReplaced = {no},
+            ReplacedCodeUnit = -1,
             NextIndex = Index,
             SUCCESS_INDICATOR = false
     end
@@ -2493,36 +2540,38 @@ index_next_repl(Str, Index, NextIndex, Char, IsReplaced) :-
 %---------------------%
 
 prev_index(Str, Index, PrevIndex, Char) :-
-    prev_index_repl(Str, Index, PrevIndex, Char, _IsReplaced).
+    prev_index_repl(Str, Index, PrevIndex, Char, _MaybeReplaced).
 
-unsafe_prev_index(Str, Index, PrevIndex, Ch) :-
-    unsafe_prev_index_repl(Str, Index, PrevIndex, Ch, _IsReplaced).
-
-    % XXX ILSEQ Export something like this.
-    %
-:- pred prev_index_repl(string::in, int::in, int::out, char::uo, bool::out)
-    is semidet.
-
-prev_index_repl(Str, Index, PrevIndex, Char, IsReplaced) :-
+prev_index_repl(Str, Index, PrevIndex, Char, MaybeReplaced) :-
     Len = length(Str),
     ( if index_check(Index - 1, Len) then
-        unsafe_prev_index_repl(Str, Index, PrevIndex, Char, IsReplaced)
+        unsafe_prev_index_repl(Str, Index, PrevIndex, Char, MaybeReplaced)
     else
         fail
     ).
 
-    % XXX ILSEQ Export something like this.
-    %
-:- pred unsafe_prev_index_repl(string::in, int::in, int::out, char::uo,
-    bool::out) is semidet.
+unsafe_prev_index(Str, Index, PrevIndex, Ch) :-
+    unsafe_prev_index_repl_2(Str, Index, PrevIndex, Ch, _ReplacedCodeUnit).
+
+unsafe_prev_index_repl(Str, Index, PrevIndex, Ch, MaybeReplaced) :-
+    unsafe_prev_index_repl_2(Str, Index, PrevIndex, Ch, ReplacedCodeUnit),
+    ( if ReplacedCodeUnit = -1 then
+        MaybeReplaced = not_replaced
+    else
+        CodeUnit = uint8.cast_from_int(ReplacedCodeUnit),
+        MaybeReplaced = replaced_code_unit(CodeUnit)
+    ).
+
+:- pred unsafe_prev_index_repl_2(string::in, int::in, int::out, char::uo,
+    int::out) is semidet.
 
 :- pragma foreign_proc("C",
-    unsafe_prev_index_repl(Str::in, Index::in, PrevIndex::out, Ch::uo,
-        IsReplaced::out),
+    unsafe_prev_index_repl_2(Str::in, Index::in, PrevIndex::out, Ch::uo,
+        ReplacedCodeUnit::out),
     [will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail,
         does_not_affect_liveness, no_sharing],
 "
-    IsReplaced = MR_FALSE;
+    ReplacedCodeUnit = -1;
     if (Index <= 0) {
         PrevIndex = Index;
         Ch = 0;
@@ -2537,7 +2586,7 @@ prev_index_repl(Str, Index, PrevIndex, Char, IsReplaced) :-
             // unaccounted for.
             if (Ch < 0 || PrevIndex + MR_utf8_width(Ch) != Index) {
                 Ch = 0xfffd;
-                IsReplaced = MR_TRUE;
+                ReplacedCodeUnit = Str[Index - 1];
                 PrevIndex = Index - 1;
             }
         }
@@ -2545,12 +2594,12 @@ prev_index_repl(Str, Index, PrevIndex, Char, IsReplaced) :-
     }
 ").
 :- pragma foreign_proc("C#",
-    unsafe_prev_index_repl(Str::in, Index::in, PrevIndex::out, Ch::uo,
-        IsReplaced::out),
+    unsafe_prev_index_repl_2(Str::in, Index::in, PrevIndex::out, Ch::uo,
+        ReplacedCodeUnit::out),
     [will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail,
         does_not_affect_liveness, no_sharing],
 "
-    IsReplaced = mr_bool.NO;
+    ReplacedCodeUnit = -1;
     if (Index <= 0) {
         Ch = 0;
         PrevIndex = Index;
@@ -2579,12 +2628,12 @@ prev_index_repl(Str, Index, PrevIndex, Char, IsReplaced) :-
     }
 ").
 :- pragma foreign_proc("Java",
-    unsafe_prev_index_repl(Str::in, Index::in, PrevIndex::out, Ch::uo,
-        IsReplaced::out),
+    unsafe_prev_index_repl_2(Str::in, Index::in, PrevIndex::out, Ch::uo,
+        ReplacedCodeUnit::out),
     [will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail,
         does_not_affect_liveness, no_sharing],
 "
-    IsReplaced = bool.NO;
+    ReplacedCodeUnit = -1;
     try {
         Ch = Str.codePointBefore(Index);
         PrevIndex = Index - java.lang.Character.charCount(Ch);
@@ -2596,17 +2645,21 @@ prev_index_repl(Str, Index, PrevIndex, Char, IsReplaced) :-
     }
 ").
 :- pragma foreign_proc("Erlang",
-    unsafe_prev_index_repl(Str::in, Index::in, PrevIndex::out, Ch::uo,
-        IsReplaced::out),
+    unsafe_prev_index_repl_2(Str::in, Index::in, PrevIndex::out, Ch::uo,
+        ReplacedCodeUnit::out),
     [will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail,
-        does_not_affect_liveness, may_not_duplicate, no_sharing],
+        does_not_affect_liveness, no_sharing],
 "
     % XXX does not handle ill-formed sequences as described
-    {PrevIndex, Ch} = do_unsafe_prev_index(Str, Index - 1),
-    IsReplaced = {no},
+    {PrevIndex, Ch} = mercury__string:do_unsafe_prev_index(Str, Index - 1),
+    ReplacedCodeUnit = -1,
     SUCCESS_INDICATOR = (Ch =/= -1)
 ").
 
+:- pragma foreign_decl("Erlang", local, "
+-export([do_unsafe_prev_index/2]).
+").
+
 :- pragma foreign_code("Erlang", "
 do_unsafe_prev_index(Str, Index) ->
     if Index >= 0 ->
@@ -3319,8 +3372,7 @@ all_match(P, String) :-
     int::in) is semidet.
 
 all_match_loop(P, String, Cur) :-
-    ( if unsafe_index_next_repl(String, Cur, Next, Char, IsReplaced) then
-        IsReplaced = no,
+    ( if unsafe_index_next_repl(String, Cur, Next, Char, not_replaced) then
         P(Char),
         all_match_loop(P, String, Next)
     else
@@ -3372,11 +3424,8 @@ contains_char(String, Char) :-
 :- pred contains_char_loop(string::in, char::in, int::in) is semidet.
 
 contains_char_loop(Str, Char, I) :-
-    unsafe_index_next_repl(Str, I, J, IndexChar, IsReplaced),
-    ( if
-        IndexChar = Char,
-        IsReplaced = no
-    then
+    unsafe_index_next_repl(Str, I, J, IndexChar, not_replaced),
+    ( if IndexChar = Char then
         true
     else
         contains_char_loop(Str, Char, J)
@@ -3496,8 +3545,7 @@ prefix_length(P, S) = Index :-
 
 prefix_length_loop(P, S, I, Index) :-
     ( if
-        unsafe_index_next_repl(S, I, J, Char, IsReplaced),
-        IsReplaced = no,
+        unsafe_index_next_repl(S, I, J, Char, not_replaced),
         P(Char)
     then
         prefix_length_loop(P, S, J, Index)
@@ -3514,8 +3562,7 @@ suffix_length(P, S) = End - Index :-
 
 suffix_length_loop(P, S, I, Index) :-
     ( if
-        unsafe_prev_index_repl(S, I, J, Char, IsReplaced),
-        IsReplaced = no,
+        unsafe_prev_index_repl(S, I, J, Char, not_replaced),
         P(Char)
     then
         suffix_length_loop(P, S, J, Index)
@@ -4120,8 +4167,7 @@ first_char(Str::uo, First::in, Rest::in) :-
 :- mode first_char_rest_in(in, uo, in) is semidet.
 
 first_char_rest_in(Str, First, Rest) :-
-    index_next_repl(Str, 0, NextIndex, First0, IsReplaced),
-    IsReplaced = no,
+    index_next_repl(Str, 0, NextIndex, First0, not_replaced),
     not is_surrogate(First0),
     unsafe_promise_unique(First0, First),
     unsafe_compare_substrings((=), Str, NextIndex, Rest, 0, length(Rest)).
@@ -4131,8 +4177,7 @@ first_char_rest_in(Str, First, Rest) :-
 :- mode first_char_rest_out(in, uo, uo) is semidet.
 
 first_char_rest_out(Str, First, Rest) :-
-    index_next_repl(Str, 0, NextIndex, First0, IsReplaced),
-    IsReplaced = no,
+    index_next_repl(Str, 0, NextIndex, First0, not_replaced),
     not is_surrogate(First0),
     unsafe_promise_unique(First0, First),
     unsafe_between(Str, NextIndex, length(Str), Rest).
@@ -4325,8 +4370,7 @@ words_loop(SepP, String, WordStartPos, Words) :-
 
 skip_to_next_word_start(SepP, String, CurPos, NextWordStartPos) :-
     ( if
-        unsafe_index_next_repl(String, CurPos, NextPos, Char, IsReplaced),
-        IsReplaced = no,
+        unsafe_index_next_repl(String, CurPos, NextPos, Char, not_replaced),
         SepP(Char)
     then
         skip_to_next_word_start(SepP, String, NextPos, NextWordStartPos)
@@ -4341,8 +4385,11 @@ skip_to_next_word_start(SepP, String, CurPos, NextWordStartPos) :-
     string::in, int::in, int::out) is det.
 
 skip_to_word_end(SepP, String, CurPos, PastWordEndPos) :-
-    ( if unsafe_index_next_repl(String, CurPos, NextPos, Char, IsReplaced) then
-        ( if IsReplaced = no, SepP(Char) then
+    ( if unsafe_index_next_repl(String, CurPos, NextPos, Char, MaybeReplaced) then
+        ( if
+            MaybeReplaced = not_replaced,
+            SepP(Char)
+        then
             PastWordEndPos = CurPos
         else
             skip_to_word_end(SepP, String, NextPos, PastWordEndPos)
@@ -4367,9 +4414,9 @@ split_at_separator_loop(DelimP, Str, CurPos, PastSegEnd, !Segments) :-
     % Invariant: 0 =< CurPos =< length(Str).
     % PastSegEnd is one past the last index of the current segment.
     %
-    ( if unsafe_prev_index_repl(Str, CurPos, PrevPos, Char, IsReplaced) then
+    ( if unsafe_prev_index_repl(Str, CurPos, PrevPos, Char, MaybeReplaced) then
         ( if
-            IsReplaced = no,
+            MaybeReplaced = not_replaced,
             DelimP(Char)
         then
             % Chop here.
@@ -5509,8 +5556,7 @@ char_to_string(C) = S1 :-
 char_to_string(Char::in, String::uo) :-
     from_char_list([Char], String).
 char_to_string(Char::out, String::in) :-
-    index_next_repl(String, 0, NextIndex, Char, IsReplaced),
-    IsReplaced = no,
+    index_next_repl(String, 0, NextIndex, Char, not_replaced),
     length(String, NextIndex).
 
 from_char(Char) = char_to_string(Char).
diff --git a/tests/hard_coded/string_append_ooi_ilseq.m b/tests/hard_coded/string_append_ooi_ilseq.m
index e3a13fcad..e4199df9e 100644
--- a/tests/hard_coded/string_append_ooi_ilseq.m
+++ b/tests/hard_coded/string_append_ooi_ilseq.m
@@ -25,6 +25,7 @@
 :- import_module pair.
 :- import_module solutions.
 :- import_module string.
+:- import_module uint8.
 
 %---------------------------------------------------------------------------%
 
@@ -57,14 +58,17 @@ write_string_debug(S, !IO) :-
 :- pred write_string_debug_loop(string::in, int::in, io::di, io::uo) is det.
 
 write_string_debug_loop(S, Index, !IO) :-
-    ( if string.index_next(S, Index, NextIndex, Char) then
-        ( if Char = '\ufffd' then
-            string.unsafe_index_code_unit(S, Index, CodeUnit),
-            write_hex(CodeUnit, !IO)
-        else if is_surrogate(Char) then
-            write_hex(char.to_int(Char), !IO)
-        else
-            io.write_char(Char, !IO)
+    ( if string.index_next_repl(S, Index, NextIndex, Char, MaybeReplaced) then
+        (
+            MaybeReplaced = replaced_code_unit(CodeUnit),
+            write_hex(uint8.to_int(CodeUnit), !IO)
+        ;
+            MaybeReplaced = not_replaced,
+            ( if is_surrogate(Char) then
+                write_hex(char.to_int(Char), !IO)
+            else
+                io.write_char(Char, !IO)
+            )
         ),
         io.write_char(' ', !IO),
         write_string_debug_loop(S, NextIndex, !IO)
diff --git a/tests/hard_coded/string_set_char_ilseq.m b/tests/hard_coded/string_set_char_ilseq.m
index 99df95ff1..7ca68cc5e 100644
--- a/tests/hard_coded/string_set_char_ilseq.m
+++ b/tests/hard_coded/string_set_char_ilseq.m
@@ -23,6 +23,7 @@
 :- import_module int.
 :- import_module list.
 :- import_module string.
+:- import_module uint8.
 
 %---------------------------------------------------------------------------%
 
@@ -58,12 +59,17 @@ write_string_debug(S, !IO) :-
 :- pred write_string_debug_loop(string::in, int::in, io::di, io::uo) is det.
 
 write_string_debug_loop(S, Index, !IO) :-
-    ( if string.index_next(S, Index, NextIndex, Char) then
-        ( if char.is_surrogate(Char) ; Char = '\ufffd' then
-            string.unsafe_index_code_unit(S, Index, Code),
-            io.format("[%x]", [i(Code)], !IO)
-        else
-            io.write_char(Char, !IO)
+    ( if string.index_next_repl(S, Index, NextIndex, Char, MaybeReplaced) then
+        (
+            MaybeReplaced = replaced_code_unit(Code),
+            io.format("[%x]", [i(uint8.to_int(Code))], !IO)
+        ;
+            MaybeReplaced = not_replaced,
+            ( if char.is_surrogate(Char) then
+                io.format("[%x]", [i(char.to_int(Char))], !IO)
+            else
+                io.write_char(Char, !IO)
+            )
         ),
         write_string_debug_loop(S, NextIndex, !IO)
     else
-- 
2.23.0



More information about the reviews mailing list