[m-rev.] for review: Define behaviour of string.char_to_string on edge cases.
Peter Wang
novalazy at gmail.com
Tue Oct 29 12:01:48 AEDT 2019
library/string.m:
Define behaviour of char_to_string when the string is not
well-formed or if the char is a surrogate code point.
Implement char_to_string/2 using multiple clauses
as the described behaviour doesn't match to_char_list/2.
tests/hard_coded/Mmakefile:
tests/hard_coded/char_to_string.exp:
tests/hard_coded/char_to_string.exp2:
tests/hard_coded/char_to_string.m:
Add test case.
---
library/string.m | 21 ++++--
tests/hard_coded/Mmakefile | 1 +
tests/hard_coded/char_to_string.exp | 11 +++
tests/hard_coded/char_to_string.exp2 | 11 +++
tests/hard_coded/char_to_string.m | 106 +++++++++++++++++++++++++++
5 files changed, 145 insertions(+), 5 deletions(-)
create mode 100644 tests/hard_coded/char_to_string.exp
create mode 100644 tests/hard_coded/char_to_string.exp2
create mode 100644 tests/hard_coded/char_to_string.m
diff --git a/library/string.m b/library/string.m
index 82bc84fa3..aff17b069 100644
--- a/library/string.m
+++ b/library/string.m
@@ -1353,7 +1353,15 @@
% char_to_string(Char, String):
%
- % Converts a character (code point) to a string, or vice versa.
+ % Converts a character to a string, or vice versa.
+ % True if `String' is the well-formed string that encodes the code point
+ % `Char'; or, if strings are UTF-16 encoded, `Char' is a surrogate code
+ % point and `String' is the string that contains only that surrogate code
+ % point. Otherwise, `char_to_string(Char, String)' is false.
+ %
+ % Throws an exception if `Char' is the null character or a code point that
+ % cannot be encoded in a string (namely, surrogate code points cannot be
+ % encoded in UTF-8 strings).
%
:- func char_to_string(char::in) = (string::uo) is det.
:- pred char_to_string(char, string).
@@ -5340,10 +5348,13 @@ det_to_float(FloatString) = Float :-
char_to_string(C) = S1 :-
char_to_string(C, S1).
-char_to_string(Char, String) :-
- % XXX ILSEQ Should fail when String is not a well-formed encoding of a
- % single code point.
- to_char_list(String, [Char]).
+:- pragma promise_equivalent_clauses(char_to_string/2).
+
+char_to_string(Char::in, String::uo) :-
+ from_char_list([Char], String).
+char_to_string(Char::out, String::in) :-
+ index_next_not_replaced(String, 0, NextIndex, Char),
+ length(String, NextIndex).
from_char(Char) = char_to_string(Char).
diff --git a/tests/hard_coded/Mmakefile b/tests/hard_coded/Mmakefile
index 048827e4a..826ea7d0d 100644
--- a/tests/hard_coded/Mmakefile
+++ b/tests/hard_coded/Mmakefile
@@ -54,6 +54,7 @@ ORDINARY_PROGS = \
cc_nondet_disj \
change_hunk_test \
char_signed \
+ char_to_string \
char_unicode \
checked_nondet_tailcall \
checked_nondet_tailcall_noinline \
diff --git a/tests/hard_coded/char_to_string.exp b/tests/hard_coded/char_to_string.exp
new file mode 100644
index 000000000..3bce46e39
--- /dev/null
+++ b/tests/hard_coded/char_to_string.exp
@@ -0,0 +1,11 @@
+exception: software_error("predicate `string.from_char_list\'/2: Unexpected: null character or surrogate code point in list")
+exception: software_error("predicate `string.from_char_list\'/2: Unexpected: null character or surrogate code point in list")
+exception: software_error("predicate `string.from_char_list\'/2: Unexpected: null character or surrogate code point in list")
+succeeded: 😀
+
+failed
+failed
+failed
+failed
+failed
+succeeded: 😀
diff --git a/tests/hard_coded/char_to_string.exp2 b/tests/hard_coded/char_to_string.exp2
new file mode 100644
index 000000000..716a1dfef
--- /dev/null
+++ b/tests/hard_coded/char_to_string.exp2
@@ -0,0 +1,11 @@
+exception: software_error("predicate `string.from_char_list\'/2: Unexpected: null character or surrogate code point in list")
+succeeded: 0xd83d
+succeeded: 0xde00
+succeeded: 😀
+
+failed
+failed
+failed
+succeeded: 0xd83d
+succeeded: 0xde00
+succeeded: 😀
diff --git a/tests/hard_coded/char_to_string.m b/tests/hard_coded/char_to_string.m
new file mode 100644
index 000000000..2027ce1c0
--- /dev/null
+++ b/tests/hard_coded/char_to_string.m
@@ -0,0 +1,106 @@
+%---------------------------------------------------------------------------%
+% vim: ts=4 sw=4 et ft=mercury
+%---------------------------------------------------------------------------%
+%
+% The .exp file is for backends using UTF-8 string encoding.
+% The .exp2 file is for backends using UTF-16 string encoding.
+%
+%---------------------------------------------------------------------------%
+
+:- module char_to_string.
+:- interface.
+
+:- import_module io.
+
+:- pred main(io::di, io::uo) is cc_multi.
+
+%---------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module char.
+:- import_module list.
+:- import_module string.
+
+%---------------------------------------------------------------------------%
+
+main(!IO) :-
+ % null character
+ test_char_to_string_fwd(char.det_from_int(0), !IO),
+ % surrogate code points
+ test_char_to_string_fwd(char.det_from_int(0xd83d), !IO),
+ test_char_to_string_fwd(char.det_from_int(0xde00), !IO),
+ % non-BMP code point
+ test_char_to_string_fwd('😀', !IO),
+ io.nl(!IO),
+
+ S = "😀",
+ S0 = between(S, 0, 1),
+ S1 = between(S, 1, length(S)),
+
+ % empty string
+ test_char_to_string_rev("", !IO),
+ % string too long
+ test_char_to_string_rev(S0 ++ S, !IO),
+ test_char_to_string_rev(S ++ S0, !IO),
+ % ill-formed (unpaired surrogate in UTF-16 backends)
+ test_char_to_string_rev(S0, !IO),
+ test_char_to_string_rev(S1, !IO),
+ % non-BMP code point
+ test_char_to_string_rev(S, !IO).
+
+:- pred test_char_to_string_fwd(char::in, io::di, io::uo) is cc_multi.
+
+test_char_to_string_fwd(Char, !IO) :-
+ ( try []
+ char_to_string(Char, Str)
+ then
+ io.write_string("succeeded: ", !IO),
+ write_string_debug(Str, !IO),
+ io.nl(!IO)
+ catch_any Excp ->
+ io.write_string("exception: ", !IO),
+ io.write(Excp, !IO),
+ io.nl(!IO)
+ ).
+
+:- pred test_char_to_string_rev(string::in, io::di, io::uo) is cc_multi.
+
+test_char_to_string_rev(Str, !IO) :-
+ ( try []
+ char_to_string(Char, Str)
+ then
+ io.write_string("succeeded: ", !IO),
+ write_char_or_hex(Char, !IO),
+ io.nl(!IO)
+ else
+ io.write_string("failed\n", !IO)
+ catch_any Excp ->
+ io.write_string("exception: ", !IO),
+ io.write(Excp, !IO),
+ io.nl(!IO)
+ ).
+
+:- pred write_string_debug(string::in, io::di, io::uo) is det.
+
+write_string_debug(S, !IO) :-
+ write_string_debug_loop(S, 0, !IO).
+
+:- pred write_string_debug_loop(string::in, int::in, io::di, io::uo) is det.
+
+write_string_debug_loop(S, Index, !IO) :-
+ ( if string.index_next(S, Index, NextIndex, Char) then
+ write_char_or_hex(Char, !IO),
+ write_string_debug_loop(S, NextIndex, !IO)
+ else
+ true
+ ).
+
+:- pred write_char_or_hex(char::in, io::di, io::uo) is det.
+
+write_char_or_hex(Char, !IO) :-
+ ( if char.is_surrogate(Char) then
+ io.format("%#x", [i(char.to_int(Char))], !IO)
+ else
+ io.write_char(Char, !IO)
+ ).
--
2.23.0
More information about the reviews
mailing list