[m-rev.] diff: Fix string.between_codepoints range clamping.

Peter Wang novalazy at gmail.com
Sun Oct 4 19:01:45 AEDT 2015


Fix string.between_codepoints range clamping.

library/string.m:
    Fix string.between_codepoints so that the clamping of start/end
    points works as documented.

tests/hard_coded/string_codepoint.m
tests/hard_coded/string_codepoint.exp
tests/hard_coded/string_codepoint.exp2
    Improve test case.

NEWS:
    Mention fix.

diff --git a/NEWS b/NEWS
index 4c02c07..68916c4 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,8 @@ This is a bug-fix release.
   reference array.sort_fix_2014 to ensure that you using the fixed version.
 * Fix the handling of nondet code by the auto-parallelisation analysis in
   mdprof_create_feedback.  (Bug #364)
+* Fix string.between_codepoints so that the clamping of start/end points
+  works as documented.
 
 NEWS for Mercury 14.01.1
 ------------------------
diff --git a/library/string.m b/library/string.m
index 03354cc..45b42ca 100644
--- a/library/string.m
+++ b/library/string.m
@@ -5633,12 +5633,16 @@ string.between_codepoints(Str, Start, End) = SubString :-
     string.between_codepoints(Str, Start, End, SubString).
 
 string.between_codepoints(Str, Start, End, SubString) :-
-    ( string.codepoint_offset(Str, Start, StartOffset0) ->
+    ( Start < 0 ->
+        StartOffset = 0
+    ; string.codepoint_offset(Str, Start, StartOffset0) ->
         StartOffset = StartOffset0
     ;
-        StartOffset = 0
+        StartOffset = string.length(Str)
     ),
-    ( string.codepoint_offset(Str, End, EndOffset0) ->
+    ( End =< Start ->
+        EndOffset = StartOffset
+    ; string.codepoint_offset(Str, End, EndOffset0) ->
         EndOffset = EndOffset0
     ;
         EndOffset = string.length(Str)
diff --git a/tests/hard_coded/string_codepoint.exp b/tests/hard_coded/string_codepoint.exp
index 82c494b..6c17786 100644
--- a/tests/hard_coded/string_codepoint.exp
+++ b/tests/hard_coded/string_codepoint.exp
@@ -87,4 +87,85 @@ right_by_codepoint:
 啕𐀀.
 
 between_codepoints:
-ΞΎε••
+between_codepoints(Str, -1, -1, "")
+between_codepoints(Str, -1, 0, "")
+between_codepoints(Str, -1, 1, "a")
+between_codepoints(Str, -1, 2, "aß")
+between_codepoints(Str, -1, 3, "aßξ")
+between_codepoints(Str, -1, 4, "aΓŸΞΎε••")
+between_codepoints(Str, -1, 5, "aΓŸΞΎε••π€€")
+between_codepoints(Str, -1, 6, "aΓŸΞΎε••π€€.")
+between_codepoints(Str, -1, 7, "aΓŸΞΎε••π€€.")
+between_codepoints(Str, 0, -1, "")
+between_codepoints(Str, 0, 0, "")
+between_codepoints(Str, 0, 1, "a")
+between_codepoints(Str, 0, 2, "aß")
+between_codepoints(Str, 0, 3, "aßξ")
+between_codepoints(Str, 0, 4, "aΓŸΞΎε••")
+between_codepoints(Str, 0, 5, "aΓŸΞΎε••π€€")
+between_codepoints(Str, 0, 6, "aΓŸΞΎε••π€€.")
+between_codepoints(Str, 0, 7, "aΓŸΞΎε••π€€.")
+between_codepoints(Str, 1, -1, "")
+between_codepoints(Str, 1, 0, "")
+between_codepoints(Str, 1, 1, "")
+between_codepoints(Str, 1, 2, "ß")
+between_codepoints(Str, 1, 3, "ßξ")
+between_codepoints(Str, 1, 4, "ΓŸΞΎε••")
+between_codepoints(Str, 1, 5, "ΓŸΞΎε••π€€")
+between_codepoints(Str, 1, 6, "ΓŸΞΎε••π€€.")
+between_codepoints(Str, 1, 7, "ΓŸΞΎε••π€€.")
+between_codepoints(Str, 2, -1, "")
+between_codepoints(Str, 2, 0, "")
+between_codepoints(Str, 2, 1, "")
+between_codepoints(Str, 2, 2, "")
+between_codepoints(Str, 2, 3, "ΞΎ")
+between_codepoints(Str, 2, 4, "ΞΎε••")
+between_codepoints(Str, 2, 5, "ξ啕𐀀")
+between_codepoints(Str, 2, 6, "ξ啕𐀀.")
+between_codepoints(Str, 2, 7, "ξ啕𐀀.")
+between_codepoints(Str, 3, -1, "")
+between_codepoints(Str, 3, 0, "")
+between_codepoints(Str, 3, 1, "")
+between_codepoints(Str, 3, 2, "")
+between_codepoints(Str, 3, 3, "")
+between_codepoints(Str, 3, 4, "ε••")
+between_codepoints(Str, 3, 5, "啕𐀀")
+between_codepoints(Str, 3, 6, "啕𐀀.")
+between_codepoints(Str, 3, 7, "啕𐀀.")
+between_codepoints(Str, 4, -1, "")
+between_codepoints(Str, 4, 0, "")
+between_codepoints(Str, 4, 1, "")
+between_codepoints(Str, 4, 2, "")
+between_codepoints(Str, 4, 3, "")
+between_codepoints(Str, 4, 4, "")
+between_codepoints(Str, 4, 5, "𐀀")
+between_codepoints(Str, 4, 6, "𐀀.")
+between_codepoints(Str, 4, 7, "𐀀.")
+between_codepoints(Str, 5, -1, "")
+between_codepoints(Str, 5, 0, "")
+between_codepoints(Str, 5, 1, "")
+between_codepoints(Str, 5, 2, "")
+between_codepoints(Str, 5, 3, "")
+between_codepoints(Str, 5, 4, "")
+between_codepoints(Str, 5, 5, "")
+between_codepoints(Str, 5, 6, ".")
+between_codepoints(Str, 5, 7, ".")
+between_codepoints(Str, 6, -1, "")
+between_codepoints(Str, 6, 0, "")
+between_codepoints(Str, 6, 1, "")
+between_codepoints(Str, 6, 2, "")
+between_codepoints(Str, 6, 3, "")
+between_codepoints(Str, 6, 4, "")
+between_codepoints(Str, 6, 5, "")
+between_codepoints(Str, 6, 6, "")
+between_codepoints(Str, 6, 7, "")
+between_codepoints(Str, 7, -1, "")
+between_codepoints(Str, 7, 0, "")
+between_codepoints(Str, 7, 1, "")
+between_codepoints(Str, 7, 2, "")
+between_codepoints(Str, 7, 3, "")
+between_codepoints(Str, 7, 4, "")
+between_codepoints(Str, 7, 5, "")
+between_codepoints(Str, 7, 6, "")
+between_codepoints(Str, 7, 7, "")
+
diff --git a/tests/hard_coded/string_codepoint.exp2 b/tests/hard_coded/string_codepoint.exp2
index e2ef44e..d20d59a 100644
--- a/tests/hard_coded/string_codepoint.exp2
+++ b/tests/hard_coded/string_codepoint.exp2
@@ -87,4 +87,85 @@ right_by_codepoint:
 啕𐀀.
 
 between_codepoints:
-ΞΎε••
+between_codepoints(Str, -1, -1, "")
+between_codepoints(Str, -1, 0, "")
+between_codepoints(Str, -1, 1, "a")
+between_codepoints(Str, -1, 2, "aß")
+between_codepoints(Str, -1, 3, "aßξ")
+between_codepoints(Str, -1, 4, "aΓŸΞΎε••")
+between_codepoints(Str, -1, 5, "aΓŸΞΎε••π€€")
+between_codepoints(Str, -1, 6, "aΓŸΞΎε••π€€.")
+between_codepoints(Str, -1, 7, "aΓŸΞΎε••π€€.")
+between_codepoints(Str, 0, -1, "")
+between_codepoints(Str, 0, 0, "")
+between_codepoints(Str, 0, 1, "a")
+between_codepoints(Str, 0, 2, "aß")
+between_codepoints(Str, 0, 3, "aßξ")
+between_codepoints(Str, 0, 4, "aΓŸΞΎε••")
+between_codepoints(Str, 0, 5, "aΓŸΞΎε••π€€")
+between_codepoints(Str, 0, 6, "aΓŸΞΎε••π€€.")
+between_codepoints(Str, 0, 7, "aΓŸΞΎε••π€€.")
+between_codepoints(Str, 1, -1, "")
+between_codepoints(Str, 1, 0, "")
+between_codepoints(Str, 1, 1, "")
+between_codepoints(Str, 1, 2, "ß")
+between_codepoints(Str, 1, 3, "ßξ")
+between_codepoints(Str, 1, 4, "ΓŸΞΎε••")
+between_codepoints(Str, 1, 5, "ΓŸΞΎε••π€€")
+between_codepoints(Str, 1, 6, "ΓŸΞΎε••π€€.")
+between_codepoints(Str, 1, 7, "ΓŸΞΎε••π€€.")
+between_codepoints(Str, 2, -1, "")
+between_codepoints(Str, 2, 0, "")
+between_codepoints(Str, 2, 1, "")
+between_codepoints(Str, 2, 2, "")
+between_codepoints(Str, 2, 3, "ΞΎ")
+between_codepoints(Str, 2, 4, "ΞΎε••")
+between_codepoints(Str, 2, 5, "ξ啕𐀀")
+between_codepoints(Str, 2, 6, "ξ啕𐀀.")
+between_codepoints(Str, 2, 7, "ξ啕𐀀.")
+between_codepoints(Str, 3, -1, "")
+between_codepoints(Str, 3, 0, "")
+between_codepoints(Str, 3, 1, "")
+between_codepoints(Str, 3, 2, "")
+between_codepoints(Str, 3, 3, "")
+between_codepoints(Str, 3, 4, "ε••")
+between_codepoints(Str, 3, 5, "啕𐀀")
+between_codepoints(Str, 3, 6, "啕𐀀.")
+between_codepoints(Str, 3, 7, "啕𐀀.")
+between_codepoints(Str, 4, -1, "")
+between_codepoints(Str, 4, 0, "")
+between_codepoints(Str, 4, 1, "")
+between_codepoints(Str, 4, 2, "")
+between_codepoints(Str, 4, 3, "")
+between_codepoints(Str, 4, 4, "")
+between_codepoints(Str, 4, 5, "𐀀")
+between_codepoints(Str, 4, 6, "𐀀.")
+between_codepoints(Str, 4, 7, "𐀀.")
+between_codepoints(Str, 5, -1, "")
+between_codepoints(Str, 5, 0, "")
+between_codepoints(Str, 5, 1, "")
+between_codepoints(Str, 5, 2, "")
+between_codepoints(Str, 5, 3, "")
+between_codepoints(Str, 5, 4, "")
+between_codepoints(Str, 5, 5, "")
+between_codepoints(Str, 5, 6, ".")
+between_codepoints(Str, 5, 7, ".")
+between_codepoints(Str, 6, -1, "")
+between_codepoints(Str, 6, 0, "")
+between_codepoints(Str, 6, 1, "")
+between_codepoints(Str, 6, 2, "")
+between_codepoints(Str, 6, 3, "")
+between_codepoints(Str, 6, 4, "")
+between_codepoints(Str, 6, 5, "")
+between_codepoints(Str, 6, 6, "")
+between_codepoints(Str, 6, 7, "")
+between_codepoints(Str, 7, -1, "")
+between_codepoints(Str, 7, 0, "")
+between_codepoints(Str, 7, 1, "")
+between_codepoints(Str, 7, 2, "")
+between_codepoints(Str, 7, 3, "")
+between_codepoints(Str, 7, 4, "")
+between_codepoints(Str, 7, 5, "")
+between_codepoints(Str, 7, 6, "")
+between_codepoints(Str, 7, 7, "")
+
diff --git a/tests/hard_coded/string_codepoint.m b/tests/hard_coded/string_codepoint.m
index a9d3bce..6cad624 100644
--- a/tests/hard_coded/string_codepoint.m
+++ b/tests/hard_coded/string_codepoint.m
@@ -12,6 +12,7 @@
 
 :- implementation.
 
+:- import_module int.
 :- import_module list.
 :- import_module string.
 
@@ -100,8 +101,8 @@ main(!IO) :-
     io.nl(!IO),
 
     io.write_string("\nbetween_codepoints:\n", !IO),
-    string.between_codepoints(Str, 2, 4, Sub),
-    io.write_string(Sub, !IO),
+    Range = -1 .. (NumCodePoints + 1),
+    foldl(test_between_codepoints(Str, Range), Range, !IO),
     io.nl(!IO).
 
 :- pred test_codepoint_offset(string::in, int::in, io::di, io::uo) is det.
@@ -169,5 +170,45 @@ test_split_by_codepoint(Str, Pos, !IO) :-
     io.format("split_by_codepoint(Str, %d, ""%s"", ""%s"")\n",
         [i(Pos), s(L), s(R)], !IO).
 
+:- pred test_between_codepoints(string::in, list(int)::in, int::in,
+    io::di, io::uo) is det.
+
+test_between_codepoints(Str, EndRange, Start, !IO) :-
+    foldl(test_between_codepoints_2(Str, Start), EndRange, !IO).
+
+:- pred test_between_codepoints_2(string::in, int::in, int::in,
+    io::di, io::uo) is det.
+
+test_between_codepoints_2(Str, Start, End, !IO) :-
+    string.between_codepoints(Str, Start, End, SubString),
+    io.format("between_codepoints(Str, %d, %d, ""%s"")\n",
+        [i(Start), i(End), s(SubString)], !IO),
+
+    slow_between_codepoints(Str, Start, End, SlowSubString),
+    ( SubString = SlowSubString ->
+        true
+    ;
+        io.write_string("slow_between_codepoints returned: ", !IO),
+        io.write_string(SlowSubString, !IO),
+        io.nl(!IO)
+    ).
+
+:- pred slow_between_codepoints(string::in, int::in, int::in, string::out)
+    is det.
+
+slow_between_codepoints(Str, Start, End, SubString) :-
+    Chars = to_char_list(Str),
+    NumCodePoints = length(Chars),
+    ClampStart = clamp(0, Start, NumCodePoints),
+    ClampEnd = clamp(ClampStart, End, NumCodePoints),
+    ClampLen = ClampEnd - ClampStart,
+    det_split_list(ClampStart, Chars, _, CharsRight),
+    det_split_list(ClampLen, CharsRight, CharsMid, _),
+    SubString = from_char_list(CharsMid).
+
+:- func clamp(int, int, int) = int.
+
+clamp(Min, X, Max) = max(Min, min(X, Max)).
+
 %-----------------------------------------------------------------------------%
 % vim: ft=mercury ts=8 sts=4 sw=4 et
-- 
1.8.4



More information about the reviews mailing list