[m-rev.] for review: Define behaviour of string.char_to_string on edge cases.

Peter Wang novalazy at gmail.com
Tue Oct 29 12:01:48 AEDT 2019


library/string.m:
    Define behaviour of char_to_string when the string is not
    well-formed or if the char is a surrogate code point.

    Implement char_to_string/2 using multiple clauses
    as the described behaviour doesn't match to_char_list/2.

tests/hard_coded/Mmakefile:
tests/hard_coded/char_to_string.exp:
tests/hard_coded/char_to_string.exp2:
tests/hard_coded/char_to_string.m:
    Add test case.
---
 library/string.m                     |  21 ++++--
 tests/hard_coded/Mmakefile           |   1 +
 tests/hard_coded/char_to_string.exp  |  11 +++
 tests/hard_coded/char_to_string.exp2 |  11 +++
 tests/hard_coded/char_to_string.m    | 106 +++++++++++++++++++++++++++
 5 files changed, 145 insertions(+), 5 deletions(-)
 create mode 100644 tests/hard_coded/char_to_string.exp
 create mode 100644 tests/hard_coded/char_to_string.exp2
 create mode 100644 tests/hard_coded/char_to_string.m

diff --git a/library/string.m b/library/string.m
index 82bc84fa3..aff17b069 100644
--- a/library/string.m
+++ b/library/string.m
@@ -1353,7 +1353,15 @@
 
     % char_to_string(Char, String):
     %
-    % Converts a character (code point) to a string, or vice versa.
+    % Converts a character to a string, or vice versa.
+    % True if `String' is the well-formed string that encodes the code point
+    % `Char'; or, if strings are UTF-16 encoded, `Char' is a surrogate code
+    % point and `String' is the string that contains only that surrogate code
+    % point. Otherwise, `char_to_string(Char, String)' is false.
+    %
+    % Throws an exception if `Char' is the null character or a code point that
+    % cannot be encoded in a string (namely, surrogate code points cannot be
+    % encoded in UTF-8 strings).
     %
 :- func char_to_string(char::in) = (string::uo) is det.
 :- pred char_to_string(char, string).
@@ -5340,10 +5348,13 @@ det_to_float(FloatString) = Float :-
 char_to_string(C) = S1 :-
     char_to_string(C, S1).
 
-char_to_string(Char, String) :-
-    % XXX ILSEQ Should fail when String is not a well-formed encoding of a
-    % single code point.
-    to_char_list(String, [Char]).
+:- pragma promise_equivalent_clauses(char_to_string/2).
+
+char_to_string(Char::in, String::uo) :-
+    from_char_list([Char], String).
+char_to_string(Char::out, String::in) :-
+    index_next_not_replaced(String, 0, NextIndex, Char),
+    length(String, NextIndex).
 
 from_char(Char) = char_to_string(Char).
 
diff --git a/tests/hard_coded/Mmakefile b/tests/hard_coded/Mmakefile
index 048827e4a..826ea7d0d 100644
--- a/tests/hard_coded/Mmakefile
+++ b/tests/hard_coded/Mmakefile
@@ -54,6 +54,7 @@ ORDINARY_PROGS = \
 	cc_nondet_disj \
 	change_hunk_test \
 	char_signed \
+	char_to_string \
 	char_unicode \
 	checked_nondet_tailcall \
 	checked_nondet_tailcall_noinline \
diff --git a/tests/hard_coded/char_to_string.exp b/tests/hard_coded/char_to_string.exp
new file mode 100644
index 000000000..3bce46e39
--- /dev/null
+++ b/tests/hard_coded/char_to_string.exp
@@ -0,0 +1,11 @@
+exception: software_error("predicate `string.from_char_list\'/2: Unexpected: null character or surrogate code point in list")
+exception: software_error("predicate `string.from_char_list\'/2: Unexpected: null character or surrogate code point in list")
+exception: software_error("predicate `string.from_char_list\'/2: Unexpected: null character or surrogate code point in list")
+succeeded: 😀
+
+failed
+failed
+failed
+failed
+failed
+succeeded: 😀
diff --git a/tests/hard_coded/char_to_string.exp2 b/tests/hard_coded/char_to_string.exp2
new file mode 100644
index 000000000..716a1dfef
--- /dev/null
+++ b/tests/hard_coded/char_to_string.exp2
@@ -0,0 +1,11 @@
+exception: software_error("predicate `string.from_char_list\'/2: Unexpected: null character or surrogate code point in list")
+succeeded: 0xd83d
+succeeded: 0xde00
+succeeded: 😀
+
+failed
+failed
+failed
+succeeded: 0xd83d
+succeeded: 0xde00
+succeeded: 😀
diff --git a/tests/hard_coded/char_to_string.m b/tests/hard_coded/char_to_string.m
new file mode 100644
index 000000000..2027ce1c0
--- /dev/null
+++ b/tests/hard_coded/char_to_string.m
@@ -0,0 +1,106 @@
+%---------------------------------------------------------------------------%
+% vim: ts=4 sw=4 et ft=mercury
+%---------------------------------------------------------------------------%
+%
+% The .exp file is for backends using UTF-8 string encoding.
+% The .exp2 file is for backends using UTF-16 string encoding.
+%
+%---------------------------------------------------------------------------%
+
+:- module char_to_string.
+:- interface.
+
+:- import_module io.
+
+:- pred main(io::di, io::uo) is cc_multi.
+
+%---------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module char.
+:- import_module list.
+:- import_module string.
+
+%---------------------------------------------------------------------------%
+
+main(!IO) :-
+    % null character
+    test_char_to_string_fwd(char.det_from_int(0), !IO),
+    % surrogate code points
+    test_char_to_string_fwd(char.det_from_int(0xd83d), !IO),
+    test_char_to_string_fwd(char.det_from_int(0xde00), !IO),
+    % non-BMP code point
+    test_char_to_string_fwd('😀', !IO),
+    io.nl(!IO),
+
+    S = "😀",
+    S0 = between(S, 0, 1),
+    S1 = between(S, 1, length(S)),
+
+    % empty string
+    test_char_to_string_rev("", !IO),
+    % string too long
+    test_char_to_string_rev(S0 ++ S, !IO),
+    test_char_to_string_rev(S ++ S0, !IO),
+    % ill-formed (unpaired surrogate in UTF-16 backends)
+    test_char_to_string_rev(S0, !IO),
+    test_char_to_string_rev(S1, !IO),
+    % non-BMP code point
+    test_char_to_string_rev(S, !IO).
+
+:- pred test_char_to_string_fwd(char::in, io::di, io::uo) is cc_multi.
+
+test_char_to_string_fwd(Char, !IO) :-
+    ( try []
+        char_to_string(Char, Str)
+    then
+        io.write_string("succeeded: ", !IO),
+        write_string_debug(Str, !IO),
+        io.nl(!IO)
+    catch_any Excp ->
+        io.write_string("exception: ", !IO),
+        io.write(Excp, !IO),
+        io.nl(!IO)
+    ).
+
+:- pred test_char_to_string_rev(string::in, io::di, io::uo) is cc_multi.
+
+test_char_to_string_rev(Str, !IO) :-
+    ( try []
+        char_to_string(Char, Str)
+    then
+        io.write_string("succeeded: ", !IO),
+        write_char_or_hex(Char, !IO),
+        io.nl(!IO)
+    else
+        io.write_string("failed\n", !IO)
+    catch_any Excp ->
+        io.write_string("exception: ", !IO),
+        io.write(Excp, !IO),
+        io.nl(!IO)
+    ).
+
+:- pred write_string_debug(string::in, io::di, io::uo) is det.
+
+write_string_debug(S, !IO) :-
+    write_string_debug_loop(S, 0, !IO).
+
+:- pred write_string_debug_loop(string::in, int::in, io::di, io::uo) is det.
+
+write_string_debug_loop(S, Index, !IO) :-
+    ( if string.index_next(S, Index, NextIndex, Char) then
+        write_char_or_hex(Char, !IO),
+        write_string_debug_loop(S, NextIndex, !IO)
+    else
+        true
+    ).
+
+:- pred write_char_or_hex(char::in, io::di, io::uo) is det.
+
+write_char_or_hex(Char, !IO) :-
+    ( if char.is_surrogate(Char) then
+        io.format("%#x", [i(char.to_int(Char))], !IO)
+    else
+        io.write_char(Char, !IO)
+    ).
-- 
2.23.0



More information about the reviews mailing list