[m-rev.] for review: Add string.unsafe_sub_string_search_start.
Mark Brown
mark at mercurylang.org
Thu Nov 7 20:44:56 AEDT 2019
This looks fine.
On Thu, Nov 7, 2019 at 3:21 PM Peter Wang <novalazy at gmail.com> wrote:
>
> library/string.m:
> Add unsafe_sub_string_search_start/4.
>
> NEWS:
> Announce addition.
> ---
> NEWS | 1 +
> library/string.m | 116 ++++++++++++++++++++++++-----------------------
> 2 files changed, 60 insertions(+), 57 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 60c5deb16..6475045c4 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -433,6 +433,7 @@ Changes to the Mercury standard library:
> - nondet_append/3
> - append_string_pieces/2
> - unsafe_append_string_pieces/2
> + - unsafe_sub_string_search_start/4
>
> The following procedures in the string module have been deprecated:
>
> diff --git a/library/string.m b/library/string.m
> index 5f10270fe..8e7802e3c 100644
> --- a/library/string.m
> +++ b/library/string.m
> @@ -630,6 +630,16 @@
> :- pred sub_string_search_start(string::in, string::in, int::in, int::out)
> is semidet.
>
> + % unsafe_sub_string_search_start(String, SubString, BeginAt, Index):
> + %
> + % Same as sub_string_search_start/4 but does not check that `BeginAt'
> + % is in range.
> + % WARNING: if `BeginAt' is negative or greater than length(String)
> + % then the behaviour is UNDEFINED. Use with care!
> + %
> +:- pred unsafe_sub_string_search_start(string::in, string::in, int::in,
> + int::out) is semidet.
> +
> %---------------------------------------------------------------------------%
> %
> % Appending strings.
> @@ -3584,100 +3594,92 @@ suffix_length_loop(P, S, I, Index) :-
> sub_string_search(WholeString, Pattern, Index) :-
> sub_string_search_start(WholeString, Pattern, 0, Index).
>
> +sub_string_search_start(WholeString, Pattern, BeginAt, Index) :-
> + ( if
> + (
> + BeginAt = 0
> + ;
> + BeginAt > 0,
> + BeginAt =< length(WholeString)
> + )
> + then
> + unsafe_sub_string_search_start(WholeString, Pattern, BeginAt, Index)
> + else
> + fail
> + ).
> +
> :- pragma foreign_proc("C",
> - sub_string_search_start(WholeString::in, Pattern::in, BeginAt::in,
> + unsafe_sub_string_search_start(WholeString::in, Pattern::in, BeginAt::in,
> Index::out),
> [will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail,
> does_not_affect_liveness, no_sharing],
> "{
> - char *match;
> - if ((MR_Unsigned) BeginAt > strlen(WholeString)) {
> - SUCCESS_INDICATOR = MR_FALSE;
> + char *match = strstr(WholeString + BeginAt, Pattern);
> + if (match) {
> + Index = match - WholeString;
> + SUCCESS_INDICATOR = MR_TRUE;
> } else {
> - match = strstr(WholeString + BeginAt, Pattern);
> - if (match) {
> - Index = match - WholeString;
> - SUCCESS_INDICATOR = MR_TRUE;
> - } else {
> - SUCCESS_INDICATOR = MR_FALSE;
> - }
> + SUCCESS_INDICATOR = MR_FALSE;
> }
> }").
> :- pragma foreign_proc("C#",
> - sub_string_search_start(WholeString::in, Pattern::in, BeginAt::in,
> + unsafe_sub_string_search_start(WholeString::in, Pattern::in, BeginAt::in,
> Index::out),
> [will_not_call_mercury, promise_pure, thread_safe],
> "{
> - if (BeginAt < 0 || BeginAt > WholeString.Length) {
> - Index = -1;
> - } else {
> - Index = WholeString.IndexOf(Pattern, BeginAt,
> - System.StringComparison.Ordinal);
> - }
> + Index = WholeString.IndexOf(Pattern, BeginAt,
> + System.StringComparison.Ordinal);
> SUCCESS_INDICATOR = (Index >= 0);
> }").
> :- pragma foreign_proc("Java",
> - sub_string_search_start(WholeString::in, Pattern::in, BeginAt::in,
> + unsafe_sub_string_search_start(WholeString::in, Pattern::in, BeginAt::in,
> Index::out),
> [will_not_call_mercury, promise_pure, thread_safe],
> "
> - // String.indexOf will check BeginAt > WholeString.Length
> - // so we don't need to do it first.
> - if (BeginAt < 0) {
> - Index = -1;
> - } else {
> - Index = WholeString.indexOf(Pattern, BeginAt);
> - }
> + Index = WholeString.indexOf(Pattern, BeginAt);
> SUCCESS_INDICATOR = (Index >= 0);
> ").
> :- pragma foreign_proc("Erlang",
> - sub_string_search_start(String::in, SubString::in, BeginAt::in,
> + unsafe_sub_string_search_start(String::in, SubString::in, BeginAt::in,
> Index::out),
> [will_not_call_mercury, promise_pure, thread_safe],
> "
> - case String of
> - <<_:BeginAt/binary, Haystack/binary>> ->
> - if
> - size(SubString) =:= 0 ->
> - Index = BeginAt;
> - true ->
> - case binary:match(Haystack, SubString) of
> - {FoundStart, FoundLength} ->
> - Index = BeginAt + FoundStart;
> - nomatch ->
> - Index = -1
> - end
> - end;
> - _ ->
> - Index = -1
> + <<_:BeginAt/binary, Haystack/binary>> = String,
> + if
> + size(SubString) =:= 0 ->
> + Index = BeginAt;
> + true ->
> + case binary:match(Haystack, SubString) of
> + {FoundStart, FoundLength} ->
> + Index = BeginAt + FoundStart;
> + nomatch ->
> + Index = -1
> + end
> end,
> SUCCESS_INDICATOR = (Index =/= -1)
> ").
>
> -sub_string_search_start(String, SubString, BeginAt, Index) :-
> - ( if BeginAt < 0 then
> - fail
> - else
> - Len = length(String),
> - SubLen = length(SubString),
> - LastStart = Len - SubLen,
> - sub_string_search_start_loop(String, SubString, BeginAt, LastStart,
> - SubLen, Index)
> - ).
> +unsafe_sub_string_search_start(String, SubString, BeginAt, Index) :-
> + Len = length(String),
> + SubLen = length(SubString),
> + LastStart = Len - SubLen,
> + unsafe_sub_string_search_start_loop(String, SubString, BeginAt, LastStart,
> + SubLen, Index).
>
> % Brute force string searching. For short Strings this is good;
> % for longer strings Boyer-Moore is much better.
> %
> -:- pred sub_string_search_start_loop(string::in, string::in, int::in, int::in,
> - int::in, int::out) is semidet.
> +:- pred unsafe_sub_string_search_start_loop(string::in, string::in, int::in,
> + int::in, int::in, int::out) is semidet.
>
> -sub_string_search_start_loop(String, SubString, I, LastI, SubLen, Index) :-
> +unsafe_sub_string_search_start_loop(String, SubString, I, LastI, SubLen, Index)
> + :-
> I =< LastI,
> ( if unsafe_compare_substrings((=), String, I, SubString, 0, SubLen) then
> Index = I
> else
> - sub_string_search_start_loop(String, SubString, I + 1, LastI, SubLen,
> - Index)
> + unsafe_sub_string_search_start_loop(String, SubString, I + 1, LastI,
> + SubLen, Index)
> ).
>
> %---------------------------------------------------------------------------%
> --
> 2.23.0
>
> _______________________________________________
> reviews mailing list
> reviews at lists.mercurylang.org
> https://lists.mercurylang.org/listinfo/reviews
More information about the reviews
mailing list