[m-rev.] for review: Deprecate modes of string predicates that imply round-trippability.

Peter Wang novalazy at gmail.com
Wed Oct 23 12:54:31 AEDT 2019


Mark pointed out that to_char_list/2 having multiple modes implies the
ability to round trip convert between a string and list of chars,
which is not true if to_char_list replaces code units in ill-formed
sequences with U+FFFD; converting the list of chars back to a string
may produce a different string from the original input.

library/string.m:
    Deprecate reverse modes of to_char_list/2, to_rev_char_list/2,
    from_char_list/2 and char_to_string/2. Add commented out
    `obsolete_proc' pragmas to be enabled at a later date.

    Add comment about a future change to char_to_string.

    Implement char_to_string/2 without using the multiple moded
    to_char_list/2.

    Delete the unused Mercury implementation of string.append/3
    that depends on multi-moded to_char_list/2. The implementation is
    incorrect anyway in the presence of ill-formed code unit sequences.

compiler/old_type_constraints.m:
compiler/typecheck.m:
    Replace use of deprecated mode of char_to_string/2.

NEWS:
    Announce changes.
---
 NEWS                            |  7 +++++
 compiler/old_type_constraints.m |  3 +-
 compiler/typecheck.m            |  3 +-
 library/string.m                | 55 ++++++++++++++++++---------------
 4 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/NEWS b/NEWS
index a5e1c887a..ba69b8d16 100644
--- a/NEWS
+++ b/NEWS
@@ -428,6 +428,13 @@ Changes to the Mercury standard library:
    - compare_ignore_case_ascii/3
    - to_rev_char_list/2
 
+  The following procedures in the string module have been deprecated:
+
+   - to_char_list(uo, in)
+   - to_rev_char_list(uo, in)
+   - from_char_list(out, in)
+   - char_to_string(out, in)
+
 * The following predicates have been added to the map module:
 
    - foldl5/12
diff --git a/compiler/old_type_constraints.m b/compiler/old_type_constraints.m
index d79792ab6..104b91c11 100644
--- a/compiler/old_type_constraints.m
+++ b/compiler/old_type_constraints.m
@@ -1035,7 +1035,8 @@ builtin_atomic_type(uint32_const(_), builtin_type_int(int_type_uint32)).
 builtin_atomic_type(float_const(_), builtin_type_float).
 builtin_atomic_type(string_const(_), builtin_type_string).
 builtin_atomic_type(cons(unqualified(String), 0, _), builtin_type_char) :-
-    string.char_to_string(_, String).
+    string.index_next(String, 0, NextIndex, _Char),
+    string.length(String, NextIndex).
 builtin_atomic_type(impl_defined_const(Name), Type) :-
     (
         ( Name = "file"
diff --git a/compiler/typecheck.m b/compiler/typecheck.m
index 1b733a448..cee48fb2f 100644
--- a/compiler/typecheck.m
+++ b/compiler/typecheck.m
@@ -2799,7 +2799,8 @@ builtin_atomic_type(cons(unqualified(String), 0, _), "character") :-
     % XXX The parser should have a separate term.functor representation
     % for character constants, which should be converted to char_consts
     % during the term to item translation.
-    string.char_to_string(_, String).
+    string.index_next(String, 0, NextIndex, _Char),
+    string.length(String, NextIndex).
 builtin_atomic_type(impl_defined_const(Name), Type) :-
     (
         ( Name = "file"
diff --git a/library/string.m b/library/string.m
index c88d6f22f..ec893e9a7 100644
--- a/library/string.m
+++ b/library/string.m
@@ -136,6 +136,11 @@
     % NOTE: In the future we may also throw an exception if the list contains
     % a surrogate code point.
     %
+    % The reverse mode of to_char_list/2 is deprecated because the implied
+    % ability to round trip convert a string to a list then back to the same
+    % string does not hold in the presence of ill-formed code unit sequences.
+    %
+%:- pragma obsolete_proc(to_char_list(uo, in), [from_char_list/2]).
 :- func to_char_list(string) = list(char).
 :- pred to_char_list(string, list(char)).
 :- mode to_char_list(in, out) is det.
@@ -155,6 +160,11 @@
     % NOTE: In the future we may also throw an exception if the list contains
     % a surrogate code point.
     %
+    % The reverse mode of to_rev_char_list/2 is deprecated because the implied
+    % ability to round trip convert a string to a list then back to the same
+    % string does not hold in the presence of ill-formed code unit sequences.
+    %
+%:- pragma obsolete_proc(to_rev_char_list(uo, in), [from_rev_char_list/2]).
 :- func to_rev_char_list(string) = list(char).
 :- pred to_rev_char_list(string, list(char)).
 :- mode to_rev_char_list(in, out) is det.
@@ -166,6 +176,11 @@
     % NOTE: In the future we may also throw an exception if the list contains
     % a surrogate code point.
     %
+    % The forward mode of from_char_list/2 is deprecated because the implied
+    % ability to round trip convert a string to a list then back to the same
+    % string does not hold in the presence of ill-formed code unit sequences.
+    %
+%:- pragma obsolete_proc(from_char_list(out, in), [to_char_list/2]).
 :- func from_char_list(list(char)::in) = (string::uo) is det.
 :- pred from_char_list(list(char), string).
 :- mode from_char_list(in, uo) is det.
@@ -1275,6 +1290,14 @@
     %
     % Converts a character (code point) to a string, or vice versa.
     %
+    % NOTE: In the future we may also throw an exception if Char is a
+    % surrogate code point.
+    %
+    % The reverse mode of char_to_string/2 is deprecated because the implied
+    % ability to round trip convert a string to a char then back to the same
+    % string does not hold in the presence of ill-formed code unit sequences.
+    %
+%:- pragma obsolete_proc(char_to_string(out, in)).
 :- func char_to_string(char::in) = (string::uo) is det.
 :- pred char_to_string(char, string).
 :- mode char_to_string(in, uo) is det.
@@ -3504,9 +3527,6 @@ append(S1::out, S2::out, S3::in) :-
     end
 ").
 
-append_iii(X, Y, Z) :-
-    mercury_append(X, Y, Z).
-
 :- pred append_ioi(string::in, string::uo, string::in) is semidet.
 
 :- pragma foreign_proc("C",
@@ -3554,9 +3574,6 @@ append_iii(X, Y, Z) :-
     }
 ").
 
-append_ioi(X, Y, Z) :-
-    mercury_append(X, Y, Z).
-
 :- pred append_iio(string::in, string::in, string::uo) is det.
 
 :- pragma foreign_proc("C",
@@ -3590,9 +3607,6 @@ append_ioi(X, Y, Z) :-
     S3 = list_to_binary([S1, S2])
 ").
 
-append_iio(X, Y, Z) :-
-    mercury_append(X, Y, Z).
-
 :- pred append_ooi(string::out, string::out, string::in) is multi.
 
 append_ooi(S1, S2, S3) :-
@@ -3652,20 +3666,6 @@ append_ooi_2(NextS1Len, S3Len, S1, S2, S3) :-
 append_ooi_3(S1Len, _S3Len, S1, S2, S3) :-
     split(S3, S1Len, S1, S2).
 
-    % XXX ILSEQ to_char_list cannot handle ill-formed sequences.
-    %
-:- pred mercury_append(string, string, string).
-:- mode mercury_append(in, in, in) is semidet.  % implied
-:- mode mercury_append(in, uo, in) is semidet.
-:- mode mercury_append(in, in, uo) is det.
-:- mode mercury_append(uo, uo, in) is multi.
-
-mercury_append(X, Y, Z) :-
-    to_char_list(X, XList),
-    to_char_list(Y, YList),
-    to_char_list(Z, ZList),
-    list.append(XList, YList, ZList).
-
 S1 ++ S2 = append(S1, S2).
 
 %---------------------%
@@ -5726,8 +5726,13 @@ det_to_float(FloatString) = Float :-
 char_to_string(C) = S1 :-
     char_to_string(C, S1).
 
-char_to_string(Char, String) :-
-    to_char_list(String, [Char]).
+:- pragma promise_equivalent_clauses(char_to_string/2).
+
+char_to_string(Char::in, String::uo) :-
+    from_char_list([Char], String).
+char_to_string(Char::out, String::in) :-
+    string.index_next(String, 0, NextIndex, Char),
+    string.length(String, NextIndex).
 
 from_char(Char) = char_to_string(Char).
 
-- 
2.23.0



More information about the reviews mailing list