[m-rev.] for review: Define string.sub_string_search_start for out-of-range starting offset.

Peter Wang novalazy at gmail.com
Thu Oct 31 13:41:04 AEDT 2019


library/string.m:
    Define sub_string_search_start to fail if the BeginAt parameter is
    negative or past the end of the string to search. The original C
    implementation did not check for an out-of-range starting offset,
    and could crash the program. The C implementation was later amended
    to fail instead, but not other implementations.

    Check for negative starting offset in non-C implementations of
    sub_string_search_start.

tests/hard_coded/string_sub_string_search.m:
    Extend test case.
---
 library/string.m                            | 37 ++++++++++++++++-----
 tests/hard_coded/string_sub_string_search.m | 10 ++++++
 2 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/library/string.m b/library/string.m
index 3a1646b81..2e319148b 100644
--- a/library/string.m
+++ b/library/string.m
@@ -601,6 +601,8 @@
     % `Index' is the code unit position in `String' where the first
     % occurrence of `SubString' occurs such that 'Index' is greater than or
     % equal to `BeginAt'. Indices start at zero.
+    % Fails if `BeginAt' is negative or greater than
+    % length(String) - length(SubString).
     %
 :- pred sub_string_search_start(string::in, string::in, int::in, int::out)
     is semidet.
@@ -3603,8 +3605,12 @@ sub_string_search_start_2(String, SubString, I, Length, SubLength) ->
         Index::out),
     [will_not_call_mercury, promise_pure, thread_safe],
 "{
-    Index = WholeString.IndexOf(Pattern, BeginAt,
-        System.StringComparison.Ordinal);
+    if (BeginAt < 0 || BeginAt > WholeString.Length) {
+        Index = -1;
+    } else {
+        Index = WholeString.IndexOf(Pattern, BeginAt,
+            System.StringComparison.Ordinal);
+    }
     SUCCESS_INDICATOR = (Index >= 0);
 }").
 :- pragma foreign_proc("Java",
@@ -3612,7 +3618,13 @@ sub_string_search_start_2(String, SubString, I, Length, SubLength) ->
         Index::out),
     [will_not_call_mercury, promise_pure, thread_safe],
 "
-    Index = WholeString.indexOf(Pattern, BeginAt);
+    // String.indexOf will check BeginAt > WholeString.Length
+    // so we don't need to do it first.
+    if (BeginAt < 0) {
+        Index = -1;
+    } else {
+        Index = WholeString.indexOf(Pattern, BeginAt);
+    }
     SUCCESS_INDICATOR = (Index >= 0);
 ").
 :- pragma foreign_proc("Erlang",
@@ -3620,14 +3632,23 @@ sub_string_search_start_2(String, SubString, I, Length, SubLength) ->
         Index::out),
     [will_not_call_mercury, promise_pure, thread_safe],
 "
-    Index = mercury__string:sub_string_search_start_2(String, SubString,
-        BeginAt, size(String), size(SubString)),
+    case BeginAt >= 0 of
+        true ->
+            Index = mercury__string:sub_string_search_start_2(String, SubString,
+                BeginAt, size(String), size(SubString));
+        false ->
+            Index = -1
+    end,
     SUCCESS_INDICATOR = (Index =/= -1)
 ").
 
 sub_string_search_start(String, SubString, BeginAt, Index) :-
-    sub_string_search_start_loop(String, SubString, BeginAt,
-        length(String), length(SubString), Index).
+    ( if BeginAt < 0 then
+        fail
+    else
+        sub_string_search_start_loop(String, SubString, BeginAt,
+            length(String), length(SubString), Index)
+    ).
 
     % Brute force string searching. For short Strings this is good;
     % for longer strings Boyer-Moore is much better.
@@ -3636,7 +3657,7 @@ sub_string_search_start(String, SubString, BeginAt, Index) :-
     int::in, int::out) is semidet.
 
 sub_string_search_start_loop(String, SubString, I, Len, SubLen, Index) :-
-    I < Len,
+    I =< Len - SubLen,
     ( if
         % XXX This is inefficient -- there is no (in, in, in) = in is semidet
         % mode of between, so this ends up calling the (in, in, in) = out
diff --git a/tests/hard_coded/string_sub_string_search.m b/tests/hard_coded/string_sub_string_search.m
index bd308f3a2..7e3c4e32b 100644
--- a/tests/hard_coded/string_sub_string_search.m
+++ b/tests/hard_coded/string_sub_string_search.m
@@ -26,8 +26,18 @@ main(!IO) :-
         string.sub_string_search("cat", "at", 1),
         string.sub_string_search("cat", "t", 2),
 
+        not string.sub_string_search_start("catcatcat", "cat", -1, _),
+        string.sub_string_search_start("catcatcat", "cat", 0, 0),
         string.sub_string_search_start("catcatcat", "cat", 1, 3),
+        string.sub_string_search_start("catcatcat", "cat", 2, 3),
+        string.sub_string_search_start("catcatcat", "cat", 3, 3),
+        string.sub_string_search_start("catcatcat", "cat", 4, 6),
+        string.sub_string_search_start("catcatcat", "cat", 5, 6),
+        string.sub_string_search_start("catcatcat", "cat", 6, 6),
+        not string.sub_string_search_start("catcatcat", "cat", 7, _),
+        not string.sub_string_search_start("catcatcat", "cat", 8, _),
         not string.sub_string_search_start("catcatcat", "cat", 9, _),
+        not string.sub_string_search_start("catcatcat", "cat", 10, _),
 
         string.sub_string_search("cαtcατcat", "cατ", length("cαt"))
     ->
-- 
2.23.0



More information about the reviews mailing list