[m-rev.] for review: Add string.compare_substrings and unsafe_compare_substrings.

Peter Wang novalazy at gmail.com
Wed Oct 23 15:02:05 AEDT 2019


library/string.m:
    Add the new predicates.

tests/hard_coded/Mmakefile:
tests/hard_coded/string_compare_substrings.exp:
tests/hard_coded/string_compare_substrings.m:
    Add test case.

NEWS:
    Announce additions.
---
 NEWS                                          |  2 +
 library/string.m                              | 65 ++++++++++++++
 tests/hard_coded/Mmakefile                    |  1 +
 .../hard_coded/string_compare_substrings.exp  |  1 +
 tests/hard_coded/string_compare_substrings.m  | 88 +++++++++++++++++++
 5 files changed, 157 insertions(+)
 create mode 100644 tests/hard_coded/string_compare_substrings.exp
 create mode 100644 tests/hard_coded/string_compare_substrings.m

diff --git a/NEWS b/NEWS
index ba69b8d16..536abd05b 100644
--- a/NEWS
+++ b/NEWS
@@ -427,6 +427,8 @@ Changes to the Mercury standard library:
    - det_remove_prefix/3
    - compare_ignore_case_ascii/3
    - to_rev_char_list/2
+   - compare_substrings/6
+   - unsafe_compare_substrings/6
 
   The following procedures in the string module have been deprecated:
 
diff --git a/library/string.m b/library/string.m
index ee624c32b..036f1438a 100644
--- a/library/string.m
+++ b/library/string.m
@@ -548,6 +548,30 @@
     %
 :- pred contains_char(string::in, char::in) is semidet.
 
+    % compare_substrings(Res, X, StartX, Y, StartY, Length):
+    %
+    % Compare two substrings by code unit order. The two substrings are
+    % the substring of `X' between `StartX' and `StartX + Length', and
+    % the substring of `Y' between `StartY' and `StartY + Length'.
+    % `StartX', `StartY' and `Length' are all in terms of code units.
+    %
+    % Fails if `StartX' or `StartX + Length' are not within [0, length(X)],
+    % or if `StartY' or `StartY + Length' are not within [0, length(Y)],
+    % or if `Length' is negative.
+    %
+:- pred compare_substrings(comparison_result::uo, string::in, int::in,
+    string::in, int::in, int::in) is semidet.
+
+    % unsafe_compare_substrings(Res, X, StartX, Y, StartY, Length):
+    %
+    % Same as compare_between/4 but without range checks.
+    % WARNING: if any of `StartX', `StartY', `StartX + Length' or
+    % `StartY + Length' are out of range, or if `Length' is negative,
+    % then the behaviour is UNDEFINED. Use with care!
+    %
+:- pred unsafe_compare_substrings(comparison_result::uo, string::in, int::in,
+    string::in, int::in, int::in) is det.
+
     % compare_ignore_case_ascii(Res, X, Y):
     %
     % Compare two strings by code point order, ignoring the case of letters
@@ -3289,6 +3313,47 @@ contains_char(Str, Char, I) :-
 
 %---------------------%
 
+compare_substrings(Res, X, StartX, Y, StartY, Length) :-
+    LengthX = length(X),
+    LengthY = length(Y),
+    ( if
+        Length >= 0,
+        StartX >= 0,
+        StartY >= 0,
+        StartX + Length =< LengthX,
+        StartY + Length =< LengthY
+    then
+        unsafe_compare_substrings(Res, X, StartX, Y, StartY, Length)
+    else
+        fail
+    ).
+
+unsafe_compare_substrings(Res, X, StartX, Y, StartY, Length) :-
+    unsafe_compare_substrings_loop(X, Y, StartX, StartY, Length, Res).
+
+:- pred unsafe_compare_substrings_loop(string::in, string::in,
+    int::in, int::in, int::in, comparison_result::uo) is det.
+
+unsafe_compare_substrings_loop(X, Y, IX, IY, Rem, Res) :-
+    ( if Rem = 0 then
+        Res = (=)
+    else
+        unsafe_index_code_unit(X, IX, CodeX),
+        unsafe_index_code_unit(Y, IY, CodeY),
+        compare(Res0, CodeX, CodeY),
+        (
+            Res0 = (=),
+            unsafe_compare_substrings_loop(X, Y, IX + 1, IY + 1, Rem - 1, Res)
+        ;
+            ( Res0 = (<)
+            ; Res0 = (>)
+            ),
+            Res = Res0
+        )
+    ).
+
+%---------------------%
+
 % XXX ILSEQ unsafe_index_next effectively truncates either or both strings
 % at the first ill-formed sequence.
 
diff --git a/tests/hard_coded/Mmakefile b/tests/hard_coded/Mmakefile
index 347a93c31..ee9f51179 100644
--- a/tests/hard_coded/Mmakefile
+++ b/tests/hard_coded/Mmakefile
@@ -361,6 +361,7 @@ ORDINARY_PROGS = \
 	string_code_unit \
 	string_codepoint \
 	string_codepoint_offset_ilseq \
+	string_compare_substrings \
 	string_count_codepoints_ilseq \
 	string_first_char \
 	string_fold_ilseq \
diff --git a/tests/hard_coded/string_compare_substrings.exp b/tests/hard_coded/string_compare_substrings.exp
new file mode 100644
index 000000000..70ff8e5af
--- /dev/null
+++ b/tests/hard_coded/string_compare_substrings.exp
@@ -0,0 +1 @@
+done.
diff --git a/tests/hard_coded/string_compare_substrings.m b/tests/hard_coded/string_compare_substrings.m
new file mode 100644
index 000000000..2c7cd81b3
--- /dev/null
+++ b/tests/hard_coded/string_compare_substrings.m
@@ -0,0 +1,88 @@
+%---------------------------------------------------------------------------%
+% vim: ts=4 sw=4 et ft=mercury
+%---------------------------------------------------------------------------%
+
+:- module string_compare_substrings.
+:- interface.
+
+:- import_module io.
+
+:- pred main(io::di, io::uo) is det.
+
+%---------------------------------------------------------------------------%
+%---------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module int.
+:- import_module list.
+:- import_module solutions.
+:- import_module string.
+
+%---------------------------------------------------------------------------%
+
+main(!IO) :-
+    S0 = "😀",
+    S1 = string.between(S0, 0, count_code_units(S0) - 1),
+    X = "small" ++ S0 ++ "dog" ++ S1 ++ "cat",
+    Y = "big" ++ S0 ++ "cat" ++ S1 ++ "dog",
+    solutions(generate_params(X, Y), Params),
+    foldl(test_compare_substrings(X, Y), Params, !IO),
+    io.write_string("done.\n", !IO).
+
+:- pred generate_params(string::in, string::in, {int, int, int}::out) is nondet.
+
+generate_params(X, Y, {IX, IY, Len}) :-
+    list.member(IX, -1 .. length(X) + 1),
+    list.member(IY, -1 .. length(Y) + 1),
+    list.member(Len, -1 .. max(length(X), length(Y)) + 1).
+
+:- pred test_compare_substrings(string::in, string::in, {int, int, int}::in,
+    io::di, io::uo) is det.
+
+test_compare_substrings(X, Y, {IX, IY, Len}, !IO) :-
+    ( if compare_substrings(Rel, X, IX, Y, IY, Len) then
+        ( if ref_compare_substrings(RefRel, X, IX, Y, IY, Len) then
+            ( if Rel = RefRel then
+                true
+            else
+                io.write_string(
+                    "error: result does not match reference implementation\n",
+                    !IO)
+            )
+        else
+            io.write_string(
+                "error: succeeded but reference implementation failed\n", !IO)
+        )
+    else
+        ( if ref_compare_substrings(_RelRef, X, IX, Y, IY, Len) then
+            io.write_string(
+                "error: failed but reference implementation succeeded\n", !IO)
+        else
+            true
+        )
+    ).
+
+:- pred ref_compare_substrings(comparison_result::uo, string::in, int::in,
+    string::in, int::in, int::in) is semidet.
+
+ref_compare_substrings(Res, X, StartX, Y, StartY, Length) :-
+    strict_between(X, StartX, StartX + Length, SubX),
+    strict_between(Y, StartY, StartY + Length, SubY),
+    compare(Res, SubX, SubY),
+    /*
+    trace [io(!IO)] (
+        io.print_line({Res, SubX, SubY}, !IO)
+    ),
+    */
+    true.
+
+:- pred strict_between(string::in, int::in, int::in, string::out) is semidet.
+
+strict_between(Str, Start, End, SubStr) :-
+    % string.between adjusts offsets (unfortunately).
+    Start >= 0,
+    End >= Start,
+    Start =< length(Str),
+    End =< length(Str),
+    string.between(Str, Start, End, SubStr).
-- 
2.23.0



More information about the reviews mailing list