[m-rev.] for review: Add string.compare_substrings and unsafe_compare_substrings.

Mark Brown mark at mercurylang.org
Wed Oct 23 17:55:18 AEDT 2019


This looks fine.

On Wed, Oct 23, 2019 at 3:02 PM Peter Wang <novalazy at gmail.com> wrote:

> library/string.m:
>     Add the new predicates.
>
> tests/hard_coded/Mmakefile:
> tests/hard_coded/string_compare_substrings.exp:
> tests/hard_coded/string_compare_substrings.m:
>     Add test case.
>
> NEWS:
>     Announce additions.
> ---
>  NEWS                                          |  2 +
>  library/string.m                              | 65 ++++++++++++++
>  tests/hard_coded/Mmakefile                    |  1 +
>  .../hard_coded/string_compare_substrings.exp  |  1 +
>  tests/hard_coded/string_compare_substrings.m  | 88 +++++++++++++++++++
>  5 files changed, 157 insertions(+)
>  create mode 100644 tests/hard_coded/string_compare_substrings.exp
>  create mode 100644 tests/hard_coded/string_compare_substrings.m
>
> diff --git a/NEWS b/NEWS
> index ba69b8d16..536abd05b 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -427,6 +427,8 @@ Changes to the Mercury standard library:
>     - det_remove_prefix/3
>     - compare_ignore_case_ascii/3
>     - to_rev_char_list/2
> +   - compare_substrings/6
> +   - unsafe_compare_substrings/6
>
>    The following procedures in the string module have been deprecated:
>
> diff --git a/library/string.m b/library/string.m
> index ee624c32b..036f1438a 100644
> --- a/library/string.m
> +++ b/library/string.m
> @@ -548,6 +548,30 @@
>      %
>  :- pred contains_char(string::in, char::in) is semidet.
>
> +    % compare_substrings(Res, X, StartX, Y, StartY, Length):
> +    %
> +    % Compare two substrings by code unit order. The two substrings are
> +    % the substring of `X' between `StartX' and `StartX + Length', and
> +    % the substring of `Y' between `StartY' and `StartY + Length'.
> +    % `StartX', `StartY' and `Length' are all in terms of code units.
> +    %
> +    % Fails if `StartX' or `StartX + Length' are not within [0,
> length(X)],
> +    % or if `StartY' or `StartY + Length' are not within [0, length(Y)],
> +    % or if `Length' is negative.
> +    %
> +:- pred compare_substrings(comparison_result::uo, string::in, int::in,
> +    string::in, int::in, int::in) is semidet.
> +
> +    % unsafe_compare_substrings(Res, X, StartX, Y, StartY, Length):
> +    %
> +    % Same as compare_between/4 but without range checks.
> +    % WARNING: if any of `StartX', `StartY', `StartX + Length' or
> +    % `StartY + Length' are out of range, or if `Length' is negative,
> +    % then the behaviour is UNDEFINED. Use with care!
> +    %
> +:- pred unsafe_compare_substrings(comparison_result::uo, string::in,
> int::in,
> +    string::in, int::in, int::in) is det.
> +
>      % compare_ignore_case_ascii(Res, X, Y):
>      %
>      % Compare two strings by code point order, ignoring the case of
> letters
> @@ -3289,6 +3313,47 @@ contains_char(Str, Char, I) :-
>
>  %---------------------%
>
> +compare_substrings(Res, X, StartX, Y, StartY, Length) :-
> +    LengthX = length(X),
> +    LengthY = length(Y),
> +    ( if
> +        Length >= 0,
> +        StartX >= 0,
> +        StartY >= 0,
> +        StartX + Length =< LengthX,
> +        StartY + Length =< LengthY
> +    then
> +        unsafe_compare_substrings(Res, X, StartX, Y, StartY, Length)
> +    else
> +        fail
> +    ).
> +
> +unsafe_compare_substrings(Res, X, StartX, Y, StartY, Length) :-
> +    unsafe_compare_substrings_loop(X, Y, StartX, StartY, Length, Res).
> +
> +:- pred unsafe_compare_substrings_loop(string::in, string::in,
> +    int::in, int::in, int::in, comparison_result::uo) is det.
> +
> +unsafe_compare_substrings_loop(X, Y, IX, IY, Rem, Res) :-
> +    ( if Rem = 0 then
> +        Res = (=)
> +    else
> +        unsafe_index_code_unit(X, IX, CodeX),
> +        unsafe_index_code_unit(Y, IY, CodeY),
> +        compare(Res0, CodeX, CodeY),
> +        (
> +            Res0 = (=),
> +            unsafe_compare_substrings_loop(X, Y, IX + 1, IY + 1, Rem - 1,
> Res)
> +        ;
> +            ( Res0 = (<)
> +            ; Res0 = (>)
> +            ),
> +            Res = Res0
> +        )
> +    ).
> +
> +%---------------------%
> +
>  % XXX ILSEQ unsafe_index_next effectively truncates either or both strings
>  % at the first ill-formed sequence.
>
> diff --git a/tests/hard_coded/Mmakefile b/tests/hard_coded/Mmakefile
> index 347a93c31..ee9f51179 100644
> --- a/tests/hard_coded/Mmakefile
> +++ b/tests/hard_coded/Mmakefile
> @@ -361,6 +361,7 @@ ORDINARY_PROGS = \
>         string_code_unit \
>         string_codepoint \
>         string_codepoint_offset_ilseq \
> +       string_compare_substrings \
>         string_count_codepoints_ilseq \
>         string_first_char \
>         string_fold_ilseq \
> diff --git a/tests/hard_coded/string_compare_substrings.exp
> b/tests/hard_coded/string_compare_substrings.exp
> new file mode 100644
> index 000000000..70ff8e5af
> --- /dev/null
> +++ b/tests/hard_coded/string_compare_substrings.exp
> @@ -0,0 +1 @@
> +done.
> diff --git a/tests/hard_coded/string_compare_substrings.m
> b/tests/hard_coded/string_compare_substrings.m
> new file mode 100644
> index 000000000..2c7cd81b3
> --- /dev/null
> +++ b/tests/hard_coded/string_compare_substrings.m
> @@ -0,0 +1,88 @@
>
> +%---------------------------------------------------------------------------%
> +% vim: ts=4 sw=4 et ft=mercury
>
> +%---------------------------------------------------------------------------%
> +
> +:- module string_compare_substrings.
> +:- interface.
> +
> +:- import_module io.
> +
> +:- pred main(io::di, io::uo) is det.
> +
>
> +%---------------------------------------------------------------------------%
>
> +%---------------------------------------------------------------------------%
> +
> +:- implementation.
> +
> +:- import_module int.
> +:- import_module list.
> +:- import_module solutions.
> +:- import_module string.
> +
>
> +%---------------------------------------------------------------------------%
> +
> +main(!IO) :-
> +    S0 = "😀",
> +    S1 = string.between(S0, 0, count_code_units(S0) - 1),
> +    X = "small" ++ S0 ++ "dog" ++ S1 ++ "cat",
> +    Y = "big" ++ S0 ++ "cat" ++ S1 ++ "dog",
> +    solutions(generate_params(X, Y), Params),
> +    foldl(test_compare_substrings(X, Y), Params, !IO),
> +    io.write_string("done.\n", !IO).
> +
> +:- pred generate_params(string::in, string::in, {int, int, int}::out) is
> nondet.
> +
> +generate_params(X, Y, {IX, IY, Len}) :-
> +    list.member(IX, -1 .. length(X) + 1),
> +    list.member(IY, -1 .. length(Y) + 1),
> +    list.member(Len, -1 .. max(length(X), length(Y)) + 1).
> +
> +:- pred test_compare_substrings(string::in, string::in, {int, int,
> int}::in,
> +    io::di, io::uo) is det.
> +
> +test_compare_substrings(X, Y, {IX, IY, Len}, !IO) :-
> +    ( if compare_substrings(Rel, X, IX, Y, IY, Len) then
> +        ( if ref_compare_substrings(RefRel, X, IX, Y, IY, Len) then
> +            ( if Rel = RefRel then
> +                true
> +            else
> +                io.write_string(
> +                    "error: result does not match reference
> implementation\n",
> +                    !IO)
> +            )
> +        else
> +            io.write_string(
> +                "error: succeeded but reference implementation failed\n",
> !IO)
> +        )
> +    else
> +        ( if ref_compare_substrings(_RelRef, X, IX, Y, IY, Len) then
> +            io.write_string(
> +                "error: failed but reference implementation succeeded\n",
> !IO)
> +        else
> +            true
> +        )
> +    ).
> +
> +:- pred ref_compare_substrings(comparison_result::uo, string::in, int::in,
> +    string::in, int::in, int::in) is semidet.
> +
> +ref_compare_substrings(Res, X, StartX, Y, StartY, Length) :-
> +    strict_between(X, StartX, StartX + Length, SubX),
> +    strict_between(Y, StartY, StartY + Length, SubY),
> +    compare(Res, SubX, SubY),
> +    /*
> +    trace [io(!IO)] (
> +        io.print_line({Res, SubX, SubY}, !IO)
> +    ),
> +    */
> +    true.
> +
> +:- pred strict_between(string::in, int::in, int::in, string::out) is
> semidet.
> +
> +strict_between(Str, Start, End, SubStr) :-
> +    % string.between adjusts offsets (unfortunately).
> +    Start >= 0,
> +    End >= Start,
> +    Start =< length(Str),
> +    End =< length(Str),
> +    string.between(Str, Start, End, SubStr).
> --
> 2.23.0
>
> _______________________________________________
> reviews mailing list
> reviews at lists.mercurylang.org
> https://lists.mercurylang.org/listinfo/reviews
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurylang.org/archives/reviews/attachments/20191023/2fd4d809/attachment-0001.html>


More information about the reviews mailing list