[m-rev.] for review: Define string.between_codepoints more precisely and fix bug.

Peter Wang novalazy at gmail.com
Wed Oct 23 15:02:13 AEDT 2019


library/string.m:
    Define string.between_codepoints in terms of codepoint_offset.

    Fix behaviour in the case where
        Start < 0,
        End < 0,
        End > Start

tests/hard_coded/string_codepoint.exp:
tests/hard_coded/string_codepoint.exp2:
tests/hard_coded/string_codepoint.m:
    Extend test case.
---
 library/string.m                       | 25 ++++++++++++++++++-------
 tests/hard_coded/string_codepoint.exp  | 19 +++++++++++++++++++
 tests/hard_coded/string_codepoint.exp2 | 19 +++++++++++++++++++
 tests/hard_coded/string_codepoint.m    | 11 ++++++++---
 4 files changed, 64 insertions(+), 10 deletions(-)

diff --git a/library/string.m b/library/string.m
index c978476ab..16888d12f 100644
--- a/library/string.m
+++ b/library/string.m
@@ -756,11 +756,21 @@
     % between_codepoints(String, Start, End, Substring):
     %
     % `Substring' is the part of `String' between the code point positions
-    % `Start' and `End'.
-    % (If `Start' is out of the range [0, length of `String'], it is treated
-    % as if it were the nearest end-point of that range.
-    % If `End' is out of the range [`Start', length of `String'],
-    % it is treated as if it were the nearest end-point of that range.)
+    % `Start' and `End'. The result is equivalent to:
+    %
+    %   between(String, StartOffset, EndOffset, Substring)
+    %
+    % where:
+    %
+    %   StartOffset is from codepoint_offset(String, Start, StartOffset)
+    %     if Start is in [0, count_codepoints(String)],
+    %   StartOffset = 0 if Start < 0,
+    %   StartOffset = length(String) otherwise;
+    %
+    %   EndOffset is from codepoint_offset(String, End, EndOffset)
+    %     if End is in [Start, count_codepoints(String)],
+    %   EndOffset = 0 if End < 0,
+    %   EndOffset = length(String) otherwise.
     %
 :- func between_codepoints(string::in, int::in, int::in)
     = (string::uo) is det.
@@ -4255,13 +4265,14 @@ between_codepoints(Str, Start, End, SubString) :-
     else
         StartOffset = length(Str)
     ),
-    ( if End =< Start then
-        EndOffset = StartOffset
+    ( if End < 0 then
+        EndOffset = 0
     else if codepoint_offset(Str, End, EndOffset0) then
         EndOffset = EndOffset0
     else
         EndOffset = length(Str)
     ),
+    % between/4 enforces EndOffset >= StartOffset.
     between(Str, StartOffset, EndOffset, SubString).
 
 %---------------------%
diff --git a/tests/hard_coded/string_codepoint.exp b/tests/hard_coded/string_codepoint.exp
index 6c1778654..43b1a1066 100644
--- a/tests/hard_coded/string_codepoint.exp
+++ b/tests/hard_coded/string_codepoint.exp
@@ -87,6 +87,17 @@ right_by_codepoint:
 啕𐀀.
 
 between_codepoints:
+between_codepoints(Str, -2, -2, "")
+between_codepoints(Str, -2, -1, "")
+between_codepoints(Str, -2, 0, "")
+between_codepoints(Str, -2, 1, "a")
+between_codepoints(Str, -2, 2, "aß")
+between_codepoints(Str, -2, 3, "aßξ")
+between_codepoints(Str, -2, 4, "aΓŸΞΎε••")
+between_codepoints(Str, -2, 5, "aΓŸΞΎε••π€€")
+between_codepoints(Str, -2, 6, "aΓŸΞΎε••π€€.")
+between_codepoints(Str, -2, 7, "aΓŸΞΎε••π€€.")
+between_codepoints(Str, -1, -2, "")
 between_codepoints(Str, -1, -1, "")
 between_codepoints(Str, -1, 0, "")
 between_codepoints(Str, -1, 1, "a")
@@ -96,6 +107,7 @@ between_codepoints(Str, -1, 4, "aΓŸΞΎε••")
 between_codepoints(Str, -1, 5, "aΓŸΞΎε••π€€")
 between_codepoints(Str, -1, 6, "aΓŸΞΎε••π€€.")
 between_codepoints(Str, -1, 7, "aΓŸΞΎε••π€€.")
+between_codepoints(Str, 0, -2, "")
 between_codepoints(Str, 0, -1, "")
 between_codepoints(Str, 0, 0, "")
 between_codepoints(Str, 0, 1, "a")
@@ -105,6 +117,7 @@ between_codepoints(Str, 0, 4, "aΓŸΞΎε••")
 between_codepoints(Str, 0, 5, "aΓŸΞΎε••π€€")
 between_codepoints(Str, 0, 6, "aΓŸΞΎε••π€€.")
 between_codepoints(Str, 0, 7, "aΓŸΞΎε••π€€.")
+between_codepoints(Str, 1, -2, "")
 between_codepoints(Str, 1, -1, "")
 between_codepoints(Str, 1, 0, "")
 between_codepoints(Str, 1, 1, "")
@@ -114,6 +127,7 @@ between_codepoints(Str, 1, 4, "ΓŸΞΎε••")
 between_codepoints(Str, 1, 5, "ΓŸΞΎε••π€€")
 between_codepoints(Str, 1, 6, "ΓŸΞΎε••π€€.")
 between_codepoints(Str, 1, 7, "ΓŸΞΎε••π€€.")
+between_codepoints(Str, 2, -2, "")
 between_codepoints(Str, 2, -1, "")
 between_codepoints(Str, 2, 0, "")
 between_codepoints(Str, 2, 1, "")
@@ -123,6 +137,7 @@ between_codepoints(Str, 2, 4, "ΞΎε••")
 between_codepoints(Str, 2, 5, "ξ啕𐀀")
 between_codepoints(Str, 2, 6, "ξ啕𐀀.")
 between_codepoints(Str, 2, 7, "ξ啕𐀀.")
+between_codepoints(Str, 3, -2, "")
 between_codepoints(Str, 3, -1, "")
 between_codepoints(Str, 3, 0, "")
 between_codepoints(Str, 3, 1, "")
@@ -132,6 +147,7 @@ between_codepoints(Str, 3, 4, "ε••")
 between_codepoints(Str, 3, 5, "啕𐀀")
 between_codepoints(Str, 3, 6, "啕𐀀.")
 between_codepoints(Str, 3, 7, "啕𐀀.")
+between_codepoints(Str, 4, -2, "")
 between_codepoints(Str, 4, -1, "")
 between_codepoints(Str, 4, 0, "")
 between_codepoints(Str, 4, 1, "")
@@ -141,6 +157,7 @@ between_codepoints(Str, 4, 4, "")
 between_codepoints(Str, 4, 5, "𐀀")
 between_codepoints(Str, 4, 6, "𐀀.")
 between_codepoints(Str, 4, 7, "𐀀.")
+between_codepoints(Str, 5, -2, "")
 between_codepoints(Str, 5, -1, "")
 between_codepoints(Str, 5, 0, "")
 between_codepoints(Str, 5, 1, "")
@@ -150,6 +167,7 @@ between_codepoints(Str, 5, 4, "")
 between_codepoints(Str, 5, 5, "")
 between_codepoints(Str, 5, 6, ".")
 between_codepoints(Str, 5, 7, ".")
+between_codepoints(Str, 6, -2, "")
 between_codepoints(Str, 6, -1, "")
 between_codepoints(Str, 6, 0, "")
 between_codepoints(Str, 6, 1, "")
@@ -159,6 +177,7 @@ between_codepoints(Str, 6, 4, "")
 between_codepoints(Str, 6, 5, "")
 between_codepoints(Str, 6, 6, "")
 between_codepoints(Str, 6, 7, "")
+between_codepoints(Str, 7, -2, "")
 between_codepoints(Str, 7, -1, "")
 between_codepoints(Str, 7, 0, "")
 between_codepoints(Str, 7, 1, "")
diff --git a/tests/hard_coded/string_codepoint.exp2 b/tests/hard_coded/string_codepoint.exp2
index d20d59af8..1b320e7b1 100644
--- a/tests/hard_coded/string_codepoint.exp2
+++ b/tests/hard_coded/string_codepoint.exp2
@@ -87,6 +87,17 @@ right_by_codepoint:
 啕𐀀.
 
 between_codepoints:
+between_codepoints(Str, -2, -2, "")
+between_codepoints(Str, -2, -1, "")
+between_codepoints(Str, -2, 0, "")
+between_codepoints(Str, -2, 1, "a")
+between_codepoints(Str, -2, 2, "aß")
+between_codepoints(Str, -2, 3, "aßξ")
+between_codepoints(Str, -2, 4, "aΓŸΞΎε••")
+between_codepoints(Str, -2, 5, "aΓŸΞΎε••π€€")
+between_codepoints(Str, -2, 6, "aΓŸΞΎε••π€€.")
+between_codepoints(Str, -2, 7, "aΓŸΞΎε••π€€.")
+between_codepoints(Str, -1, -2, "")
 between_codepoints(Str, -1, -1, "")
 between_codepoints(Str, -1, 0, "")
 between_codepoints(Str, -1, 1, "a")
@@ -96,6 +107,7 @@ between_codepoints(Str, -1, 4, "aΓŸΞΎε••")
 between_codepoints(Str, -1, 5, "aΓŸΞΎε••π€€")
 between_codepoints(Str, -1, 6, "aΓŸΞΎε••π€€.")
 between_codepoints(Str, -1, 7, "aΓŸΞΎε••π€€.")
+between_codepoints(Str, 0, -2, "")
 between_codepoints(Str, 0, -1, "")
 between_codepoints(Str, 0, 0, "")
 between_codepoints(Str, 0, 1, "a")
@@ -105,6 +117,7 @@ between_codepoints(Str, 0, 4, "aΓŸΞΎε••")
 between_codepoints(Str, 0, 5, "aΓŸΞΎε••π€€")
 between_codepoints(Str, 0, 6, "aΓŸΞΎε••π€€.")
 between_codepoints(Str, 0, 7, "aΓŸΞΎε••π€€.")
+between_codepoints(Str, 1, -2, "")
 between_codepoints(Str, 1, -1, "")
 between_codepoints(Str, 1, 0, "")
 between_codepoints(Str, 1, 1, "")
@@ -114,6 +127,7 @@ between_codepoints(Str, 1, 4, "ΓŸΞΎε••")
 between_codepoints(Str, 1, 5, "ΓŸΞΎε••π€€")
 between_codepoints(Str, 1, 6, "ΓŸΞΎε••π€€.")
 between_codepoints(Str, 1, 7, "ΓŸΞΎε••π€€.")
+between_codepoints(Str, 2, -2, "")
 between_codepoints(Str, 2, -1, "")
 between_codepoints(Str, 2, 0, "")
 between_codepoints(Str, 2, 1, "")
@@ -123,6 +137,7 @@ between_codepoints(Str, 2, 4, "ΞΎε••")
 between_codepoints(Str, 2, 5, "ξ啕𐀀")
 between_codepoints(Str, 2, 6, "ξ啕𐀀.")
 between_codepoints(Str, 2, 7, "ξ啕𐀀.")
+between_codepoints(Str, 3, -2, "")
 between_codepoints(Str, 3, -1, "")
 between_codepoints(Str, 3, 0, "")
 between_codepoints(Str, 3, 1, "")
@@ -132,6 +147,7 @@ between_codepoints(Str, 3, 4, "ε••")
 between_codepoints(Str, 3, 5, "啕𐀀")
 between_codepoints(Str, 3, 6, "啕𐀀.")
 between_codepoints(Str, 3, 7, "啕𐀀.")
+between_codepoints(Str, 4, -2, "")
 between_codepoints(Str, 4, -1, "")
 between_codepoints(Str, 4, 0, "")
 between_codepoints(Str, 4, 1, "")
@@ -141,6 +157,7 @@ between_codepoints(Str, 4, 4, "")
 between_codepoints(Str, 4, 5, "𐀀")
 between_codepoints(Str, 4, 6, "𐀀.")
 between_codepoints(Str, 4, 7, "𐀀.")
+between_codepoints(Str, 5, -2, "")
 between_codepoints(Str, 5, -1, "")
 between_codepoints(Str, 5, 0, "")
 between_codepoints(Str, 5, 1, "")
@@ -150,6 +167,7 @@ between_codepoints(Str, 5, 4, "")
 between_codepoints(Str, 5, 5, "")
 between_codepoints(Str, 5, 6, ".")
 between_codepoints(Str, 5, 7, ".")
+between_codepoints(Str, 6, -2, "")
 between_codepoints(Str, 6, -1, "")
 between_codepoints(Str, 6, 0, "")
 between_codepoints(Str, 6, 1, "")
@@ -159,6 +177,7 @@ between_codepoints(Str, 6, 4, "")
 between_codepoints(Str, 6, 5, "")
 between_codepoints(Str, 6, 6, "")
 between_codepoints(Str, 6, 7, "")
+between_codepoints(Str, 7, -2, "")
 between_codepoints(Str, 7, -1, "")
 between_codepoints(Str, 7, 0, "")
 between_codepoints(Str, 7, 1, "")
diff --git a/tests/hard_coded/string_codepoint.m b/tests/hard_coded/string_codepoint.m
index e41ae60d1..749a768a1 100644
--- a/tests/hard_coded/string_codepoint.m
+++ b/tests/hard_coded/string_codepoint.m
@@ -1,6 +1,11 @@
 %---------------------------------------------------------------------------%
 % vim: ts=4 sw=4 et ft=mercury
 %---------------------------------------------------------------------------%
+%
+% The .exp file is for backends using UTF-8 string encoding.
+% The .exp2 file is for backends using UTF-16 string encoding.
+%
+%---------------------------------------------------------------------------%
 
 :- module string_codepoint.
 :- interface.
@@ -102,7 +107,7 @@ main(!IO) :-
     io.nl(!IO),
 
     io.write_string("\nbetween_codepoints:\n", !IO),
-    Range = -1 .. (NumCodePoints + 1),
+    Range = -2 .. (NumCodePoints + 1),
     foldl(test_between_codepoints(Str, Range), Range, !IO),
     io.nl(!IO).
 
@@ -189,9 +194,9 @@ test_between_codepoints_2(Str, Start, End, !IO) :-
     ( SubString = SlowSubString ->
         true
     ;
-        io.write_string("slow_between_codepoints returned: ", !IO),
+        io.write_string("but slow_between_codepoints returned: \"", !IO),
         io.write_string(SlowSubString, !IO),
-        io.nl(!IO)
+        io.write_string("\"\n", !IO)
     ).
 
 :- pred slow_between_codepoints(string::in, int::in, int::in, string::out)
-- 
2.23.0



More information about the reviews mailing list