[m-rev.] for review: Define string.between_codepoints more precisely and fix bug.
Peter Wang
novalazy at gmail.com
Wed Oct 23 15:02:13 AEDT 2019
library/string.m:
Define string.between_codepoints in terms of codepoint_offset.
Fix behaviour in the case where
Start < 0,
End < 0,
End > Start
tests/hard_coded/string_codepoint.exp:
tests/hard_coded/string_codepoint.exp2:
tests/hard_coded/string_codepoint.m:
Extend test case.
---
library/string.m | 25 ++++++++++++++++++-------
tests/hard_coded/string_codepoint.exp | 19 +++++++++++++++++++
tests/hard_coded/string_codepoint.exp2 | 19 +++++++++++++++++++
tests/hard_coded/string_codepoint.m | 11 ++++++++---
4 files changed, 64 insertions(+), 10 deletions(-)
diff --git a/library/string.m b/library/string.m
index c978476ab..16888d12f 100644
--- a/library/string.m
+++ b/library/string.m
@@ -756,11 +756,21 @@
% between_codepoints(String, Start, End, Substring):
%
% `Substring' is the part of `String' between the code point positions
- % `Start' and `End'.
- % (If `Start' is out of the range [0, length of `String'], it is treated
- % as if it were the nearest end-point of that range.
- % If `End' is out of the range [`Start', length of `String'],
- % it is treated as if it were the nearest end-point of that range.)
+ % `Start' and `End'. The result is equivalent to:
+ %
+ % between(String, StartOffset, EndOffset, Substring)
+ %
+ % where:
+ %
+ % StartOffset is from codepoint_offset(String, Start, StartOffset)
+ % if Start is in [0, count_codepoints(String)],
+ % StartOffset = 0 if Start < 0,
+ % StartOffset = length(String) otherwise;
+ %
+ % EndOffset is from codepoint_offset(String, End, EndOffset)
+ % if End is in [Start, count_codepoints(String)],
+ % EndOffset = 0 if End < 0,
+ % EndOffset = length(String) otherwise.
%
:- func between_codepoints(string::in, int::in, int::in)
= (string::uo) is det.
@@ -4255,13 +4265,14 @@ between_codepoints(Str, Start, End, SubString) :-
else
StartOffset = length(Str)
),
- ( if End =< Start then
- EndOffset = StartOffset
+ ( if End < 0 then
+ EndOffset = 0
else if codepoint_offset(Str, End, EndOffset0) then
EndOffset = EndOffset0
else
EndOffset = length(Str)
),
+ % between/4 enforces EndOffset >= StartOffset.
between(Str, StartOffset, EndOffset, SubString).
%---------------------%
diff --git a/tests/hard_coded/string_codepoint.exp b/tests/hard_coded/string_codepoint.exp
index 6c1778654..43b1a1066 100644
--- a/tests/hard_coded/string_codepoint.exp
+++ b/tests/hard_coded/string_codepoint.exp
@@ -87,6 +87,17 @@ right_by_codepoint:
επ.
between_codepoints:
+between_codepoints(Str, -2, -2, "")
+between_codepoints(Str, -2, -1, "")
+between_codepoints(Str, -2, 0, "")
+between_codepoints(Str, -2, 1, "a")
+between_codepoints(Str, -2, 2, "aΓ")
+between_codepoints(Str, -2, 3, "aΓΞΎ")
+between_codepoints(Str, -2, 4, "aΓΞΎε")
+between_codepoints(Str, -2, 5, "aΓΞΎεπ")
+between_codepoints(Str, -2, 6, "aΓΞΎεπ.")
+between_codepoints(Str, -2, 7, "aΓΞΎεπ.")
+between_codepoints(Str, -1, -2, "")
between_codepoints(Str, -1, -1, "")
between_codepoints(Str, -1, 0, "")
between_codepoints(Str, -1, 1, "a")
@@ -96,6 +107,7 @@ between_codepoints(Str, -1, 4, "aΓΞΎε")
between_codepoints(Str, -1, 5, "aΓΞΎεπ")
between_codepoints(Str, -1, 6, "aΓΞΎεπ.")
between_codepoints(Str, -1, 7, "aΓΞΎεπ.")
+between_codepoints(Str, 0, -2, "")
between_codepoints(Str, 0, -1, "")
between_codepoints(Str, 0, 0, "")
between_codepoints(Str, 0, 1, "a")
@@ -105,6 +117,7 @@ between_codepoints(Str, 0, 4, "aΓΞΎε")
between_codepoints(Str, 0, 5, "aΓΞΎεπ")
between_codepoints(Str, 0, 6, "aΓΞΎεπ.")
between_codepoints(Str, 0, 7, "aΓΞΎεπ.")
+between_codepoints(Str, 1, -2, "")
between_codepoints(Str, 1, -1, "")
between_codepoints(Str, 1, 0, "")
between_codepoints(Str, 1, 1, "")
@@ -114,6 +127,7 @@ between_codepoints(Str, 1, 4, "ΓΞΎε")
between_codepoints(Str, 1, 5, "ΓΞΎεπ")
between_codepoints(Str, 1, 6, "ΓΞΎεπ.")
between_codepoints(Str, 1, 7, "ΓΞΎεπ.")
+between_codepoints(Str, 2, -2, "")
between_codepoints(Str, 2, -1, "")
between_codepoints(Str, 2, 0, "")
between_codepoints(Str, 2, 1, "")
@@ -123,6 +137,7 @@ between_codepoints(Str, 2, 4, "ΞΎε")
between_codepoints(Str, 2, 5, "ΞΎεπ")
between_codepoints(Str, 2, 6, "ΞΎεπ.")
between_codepoints(Str, 2, 7, "ΞΎεπ.")
+between_codepoints(Str, 3, -2, "")
between_codepoints(Str, 3, -1, "")
between_codepoints(Str, 3, 0, "")
between_codepoints(Str, 3, 1, "")
@@ -132,6 +147,7 @@ between_codepoints(Str, 3, 4, "ε")
between_codepoints(Str, 3, 5, "επ")
between_codepoints(Str, 3, 6, "επ.")
between_codepoints(Str, 3, 7, "επ.")
+between_codepoints(Str, 4, -2, "")
between_codepoints(Str, 4, -1, "")
between_codepoints(Str, 4, 0, "")
between_codepoints(Str, 4, 1, "")
@@ -141,6 +157,7 @@ between_codepoints(Str, 4, 4, "")
between_codepoints(Str, 4, 5, "π")
between_codepoints(Str, 4, 6, "π.")
between_codepoints(Str, 4, 7, "π.")
+between_codepoints(Str, 5, -2, "")
between_codepoints(Str, 5, -1, "")
between_codepoints(Str, 5, 0, "")
between_codepoints(Str, 5, 1, "")
@@ -150,6 +167,7 @@ between_codepoints(Str, 5, 4, "")
between_codepoints(Str, 5, 5, "")
between_codepoints(Str, 5, 6, ".")
between_codepoints(Str, 5, 7, ".")
+between_codepoints(Str, 6, -2, "")
between_codepoints(Str, 6, -1, "")
between_codepoints(Str, 6, 0, "")
between_codepoints(Str, 6, 1, "")
@@ -159,6 +177,7 @@ between_codepoints(Str, 6, 4, "")
between_codepoints(Str, 6, 5, "")
between_codepoints(Str, 6, 6, "")
between_codepoints(Str, 6, 7, "")
+between_codepoints(Str, 7, -2, "")
between_codepoints(Str, 7, -1, "")
between_codepoints(Str, 7, 0, "")
between_codepoints(Str, 7, 1, "")
diff --git a/tests/hard_coded/string_codepoint.exp2 b/tests/hard_coded/string_codepoint.exp2
index d20d59af8..1b320e7b1 100644
--- a/tests/hard_coded/string_codepoint.exp2
+++ b/tests/hard_coded/string_codepoint.exp2
@@ -87,6 +87,17 @@ right_by_codepoint:
επ.
between_codepoints:
+between_codepoints(Str, -2, -2, "")
+between_codepoints(Str, -2, -1, "")
+between_codepoints(Str, -2, 0, "")
+between_codepoints(Str, -2, 1, "a")
+between_codepoints(Str, -2, 2, "aΓ")
+between_codepoints(Str, -2, 3, "aΓΞΎ")
+between_codepoints(Str, -2, 4, "aΓΞΎε")
+between_codepoints(Str, -2, 5, "aΓΞΎεπ")
+between_codepoints(Str, -2, 6, "aΓΞΎεπ.")
+between_codepoints(Str, -2, 7, "aΓΞΎεπ.")
+between_codepoints(Str, -1, -2, "")
between_codepoints(Str, -1, -1, "")
between_codepoints(Str, -1, 0, "")
between_codepoints(Str, -1, 1, "a")
@@ -96,6 +107,7 @@ between_codepoints(Str, -1, 4, "aΓΞΎε")
between_codepoints(Str, -1, 5, "aΓΞΎεπ")
between_codepoints(Str, -1, 6, "aΓΞΎεπ.")
between_codepoints(Str, -1, 7, "aΓΞΎεπ.")
+between_codepoints(Str, 0, -2, "")
between_codepoints(Str, 0, -1, "")
between_codepoints(Str, 0, 0, "")
between_codepoints(Str, 0, 1, "a")
@@ -105,6 +117,7 @@ between_codepoints(Str, 0, 4, "aΓΞΎε")
between_codepoints(Str, 0, 5, "aΓΞΎεπ")
between_codepoints(Str, 0, 6, "aΓΞΎεπ.")
between_codepoints(Str, 0, 7, "aΓΞΎεπ.")
+between_codepoints(Str, 1, -2, "")
between_codepoints(Str, 1, -1, "")
between_codepoints(Str, 1, 0, "")
between_codepoints(Str, 1, 1, "")
@@ -114,6 +127,7 @@ between_codepoints(Str, 1, 4, "ΓΞΎε")
between_codepoints(Str, 1, 5, "ΓΞΎεπ")
between_codepoints(Str, 1, 6, "ΓΞΎεπ.")
between_codepoints(Str, 1, 7, "ΓΞΎεπ.")
+between_codepoints(Str, 2, -2, "")
between_codepoints(Str, 2, -1, "")
between_codepoints(Str, 2, 0, "")
between_codepoints(Str, 2, 1, "")
@@ -123,6 +137,7 @@ between_codepoints(Str, 2, 4, "ΞΎε")
between_codepoints(Str, 2, 5, "ΞΎεπ")
between_codepoints(Str, 2, 6, "ΞΎεπ.")
between_codepoints(Str, 2, 7, "ΞΎεπ.")
+between_codepoints(Str, 3, -2, "")
between_codepoints(Str, 3, -1, "")
between_codepoints(Str, 3, 0, "")
between_codepoints(Str, 3, 1, "")
@@ -132,6 +147,7 @@ between_codepoints(Str, 3, 4, "ε")
between_codepoints(Str, 3, 5, "επ")
between_codepoints(Str, 3, 6, "επ.")
between_codepoints(Str, 3, 7, "επ.")
+between_codepoints(Str, 4, -2, "")
between_codepoints(Str, 4, -1, "")
between_codepoints(Str, 4, 0, "")
between_codepoints(Str, 4, 1, "")
@@ -141,6 +157,7 @@ between_codepoints(Str, 4, 4, "")
between_codepoints(Str, 4, 5, "π")
between_codepoints(Str, 4, 6, "π.")
between_codepoints(Str, 4, 7, "π.")
+between_codepoints(Str, 5, -2, "")
between_codepoints(Str, 5, -1, "")
between_codepoints(Str, 5, 0, "")
between_codepoints(Str, 5, 1, "")
@@ -150,6 +167,7 @@ between_codepoints(Str, 5, 4, "")
between_codepoints(Str, 5, 5, "")
between_codepoints(Str, 5, 6, ".")
between_codepoints(Str, 5, 7, ".")
+between_codepoints(Str, 6, -2, "")
between_codepoints(Str, 6, -1, "")
between_codepoints(Str, 6, 0, "")
between_codepoints(Str, 6, 1, "")
@@ -159,6 +177,7 @@ between_codepoints(Str, 6, 4, "")
between_codepoints(Str, 6, 5, "")
between_codepoints(Str, 6, 6, "")
between_codepoints(Str, 6, 7, "")
+between_codepoints(Str, 7, -2, "")
between_codepoints(Str, 7, -1, "")
between_codepoints(Str, 7, 0, "")
between_codepoints(Str, 7, 1, "")
diff --git a/tests/hard_coded/string_codepoint.m b/tests/hard_coded/string_codepoint.m
index e41ae60d1..749a768a1 100644
--- a/tests/hard_coded/string_codepoint.m
+++ b/tests/hard_coded/string_codepoint.m
@@ -1,6 +1,11 @@
%---------------------------------------------------------------------------%
% vim: ts=4 sw=4 et ft=mercury
%---------------------------------------------------------------------------%
+%
+% The .exp file is for backends using UTF-8 string encoding.
+% The .exp2 file is for backends using UTF-16 string encoding.
+%
+%---------------------------------------------------------------------------%
:- module string_codepoint.
:- interface.
@@ -102,7 +107,7 @@ main(!IO) :-
io.nl(!IO),
io.write_string("\nbetween_codepoints:\n", !IO),
- Range = -1 .. (NumCodePoints + 1),
+ Range = -2 .. (NumCodePoints + 1),
foldl(test_between_codepoints(Str, Range), Range, !IO),
io.nl(!IO).
@@ -189,9 +194,9 @@ test_between_codepoints_2(Str, Start, End, !IO) :-
( SubString = SlowSubString ->
true
;
- io.write_string("slow_between_codepoints returned: ", !IO),
+ io.write_string("but slow_between_codepoints returned: \"", !IO),
io.write_string(SlowSubString, !IO),
- io.nl(!IO)
+ io.write_string("\"\n", !IO)
).
:- pred slow_between_codepoints(string::in, int::in, int::in, string::out)
--
2.23.0
More information about the reviews
mailing list