[m-rev.] for review: fix java string/character literals

Peter Wang novalazy at gmail.com
Wed May 6 14:50:59 AEST 2009


Branches: main

Fix these problems with string and character literals output by the Java
backend, due to using routines originally designed for C literals:

- string literals cannot be broken into multiple segments

- \a, \v escape characters are not supported

- trigraphs are not supported so don't need to avoid them


compiler/c_util.m:
        Add versions of existing predicates which take the output language as
        an argument.

        Fix the above problems for Java.

compiler/mlds_to_java.m:
        Use the new predicates.

diff --git a/compiler/c_util.m b/compiler/c_util.m
index a7f4c79..87bb25f 100644
--- a/compiler/c_util.m
+++ b/compiler/c_util.m
@@ -49,12 +49,23 @@
 % String and character handling
 %

+    % Chooses between C and Java literal syntax.
+    %
+:- type literal_language
+    --->    literal_c
+    ;       literal_java.
+
     % Print out a string suitably escaped for use as a C string literal.
     % This doesn't actually print out the enclosing double quotes --
     % that is the caller's responsibility.
     %
 :- pred output_quoted_string(string::in, io::di, io::uo) is det.

+    % As above, but for the specified language.
+    %
+:- pred output_quoted_string_lang(literal_language::in, string::in,
+    io::di, io::uo) is det.
+
     % output_quoted_multi_string is like list.foldl(output_quoted_string)
     % except that a null character will be written between each string
     % in the list.
@@ -62,6 +73,11 @@
 :- type multi_string == list(string).
 :- pred output_quoted_multi_string(multi_string::in, io::di, io::uo) is det.

+    % As above, but for the specified language.
+    %
+:- pred output_quoted_multi_string_lang(literal_language::in,
+    multi_string::in, io::di, io::uo) is det.
+
     % Print out a char suitably escaped for use as a C char literal.
     % This doesn't actually print out the enclosing single quotes --
     % that is the caller's responsibility.
@@ -206,24 +222,31 @@ reset_line_num(!IO) :-
 %

 output_quoted_string(S, !IO) :-
-    output_quoted_string(0, length(S), S, !IO).
+    output_quoted_string_lang(literal_c, S, !IO).

-output_quoted_multi_string([], !IO).
-output_quoted_multi_string([S | Ss], !IO) :-
-    output_quoted_string(S, !IO),
-    output_quoted_char(char.det_from_int(0), !IO),
-    output_quoted_multi_string(Ss, !IO).
+output_quoted_string_lang(Lang, S, !IO) :-
+    do_output_quoted_string(Lang, 0, length(S), S, !IO).

-:- pred output_quoted_string(int::in, int::in, string::in,
-    io::di, io::uo) is det.
+output_quoted_multi_string(Ss, !IO) :-
+    output_quoted_multi_string_lang(literal_c, Ss, !IO).
+
+output_quoted_multi_string_lang(_Lang, [], !IO).
+output_quoted_multi_string_lang(Lang, [S | Ss], !IO) :-
+    output_quoted_string_lang(Lang, S, !IO),
+    output_quoted_char_lang(Lang, char.det_from_int(0), !IO),
+    output_quoted_multi_string_lang(Lang, Ss, !IO).
+
+:- pred do_output_quoted_string(literal_language::in, int::in, int::in,
+    string::in, io::di, io::uo) is det.

-output_quoted_string(Cur, Len, S, !IO) :-
+do_output_quoted_string(Lang, Cur, Len, S, !IO) :-
     ( Cur < Len ->
         % Avoid a limitation in the MSVC compiler where string literals
-        % can be no longer then 2048 chars. However if you output the string
+        % can be no longer than 2048 chars. However if you output the string
         % in chunks, eg "part a" "part b" it will accept a string longer than
         % 2048 chars, go figure!
         (
+            Lang = literal_c,
             Cur \= 0,
             Cur mod 512 = 0
         ->
@@ -233,12 +256,13 @@ output_quoted_string(Cur, Len, S, !IO) :-
         ),

         string.unsafe_index(S, Cur, Char),
-        output_quoted_char(Char, !IO),
+        output_quoted_char_lang(Lang, Char, !IO),

         % Check for trigraph sequences in string literals. We break the
         % trigraph by breaking the string into multiple chunks. For example,
         % "??-" gets converted to "?" "?-".
         (
+            Lang = literal_c,
             Char = '?',
             Cur + 2 < Len
         ->
@@ -255,27 +279,44 @@ output_quoted_string(Cur, Len, S, !IO) :-
             true
         ),

-        output_quoted_string(Cur + 1, Len, S, !IO)
+        do_output_quoted_string(Lang, Cur + 1, Len, S, !IO)
     ;
         true
     ).

 output_quoted_char(Char, !IO) :-
-    EscapedCharStr = quote_char(Char),
+    output_quoted_char_lang(literal_c, Char, !IO).
+
+:- pred output_quoted_char_lang(literal_language::in, char::in, io::di, io::uo)
+    is det.
+
+output_quoted_char_lang(Lang, Char, !IO) :-
+    EscapedCharStr = quote_char_lang(Lang, Char),
     io.write_string(EscapedCharStr, !IO).

-quote_char(Char) = QuotedCharStr :-
-    quote_one_char(Char, [], RevQuotedCharStr),
+quote_char(Char) = quote_char_lang(literal_c, Char).
+
+:- func quote_char_lang(literal_language, char) = string.
+
+quote_char_lang(Lang, Char) = QuotedCharStr :-
+    quote_one_char(Lang, Char, [], RevQuotedCharStr),
     string.from_rev_char_list(RevQuotedCharStr, QuotedCharStr).

 quote_string(String) = QuotedString :-
-    string.foldl(quote_one_char, String, [], RevQuotedChars),
+    Lang = literal_c,
+    string.foldl(quote_one_char(Lang), String, [], RevQuotedChars),
     string.from_rev_char_list(RevQuotedChars, QuotedString).

-:- pred quote_one_char(char::in, list(char)::in, list(char)::out) is det.
+:- pred quote_one_char(literal_language::in, char::in,
+    list(char)::in, list(char)::out) is det.

-quote_one_char(Char, RevChars0, RevChars) :-
-    ( escape_special_char(Char, EscapeChar) ->
+quote_one_char(Lang, Char, RevChars0, RevChars) :-
+    (
+        Lang = literal_java,
+        java_escape_special_char(Char, RevEscapeChars)
+    ->
+        list.append(RevEscapeChars, RevChars0, RevChars)
+    ; escape_special_char(Char, EscapeChar) ->
         RevChars = [EscapeChar, '\\' | RevChars0]
     ; is_c_source_char(Char) ->
         RevChars = [Char | RevChars0]
@@ -286,6 +327,11 @@ quote_one_char(Char, RevChars0, RevChars) :-
         reverse_append(EscapeChars, RevChars0, RevChars)
     ).

+:- pred java_escape_special_char(char::in, list(char)::out) is semidet.
+
+java_escape_special_char('\a', ['7', '0', '0', '\\']).
+java_escape_special_char('\v', ['3', '1', '0', '\\']).
+
 :- pred escape_special_char(char::in, char::out) is semidet.

 escape_special_char('"', '"').
@@ -294,8 +340,8 @@ escape_special_char('\\', '\\').
 escape_special_char('\n', 'n').
 escape_special_char('\t', 't').
 escape_special_char('\b', 'b').
-escape_special_char('\a', 'a').
-escape_special_char('\v', 'v').
+escape_special_char('\a', 'a'). % not in Java
+escape_special_char('\v', 'v'). % not in Java
 escape_special_char('\r', 'r').
 escape_special_char('\f', 'f').

diff --git a/compiler/mlds_to_java.m b/compiler/mlds_to_java.m
index d18de80..0aa6c45 100644
--- a/compiler/mlds_to_java.m
+++ b/compiler/mlds_to_java.m
@@ -3637,12 +3637,12 @@ output_rval_const(mlconst_float(FloatVal), !IO) :-

 output_rval_const(mlconst_string(String), !IO) :-
     io.write_string("""", !IO),
-    c_util.output_quoted_string(String, !IO),
+    c_util.output_quoted_string_lang(literal_java, String, !IO),
     io.write_string("""", !IO).

 output_rval_const(mlconst_multi_string(String), !IO) :-
     io.write_string("""", !IO),
-    c_util.output_quoted_multi_string(String, !IO),
+    c_util.output_quoted_multi_string_lang(literal_java, String, !IO),
     io.write_string("""", !IO).

 output_rval_const(mlconst_named_const(NamedConst), !IO) :-
--------------------------------------------------------------------------
mercury-reviews mailing list
Post messages to:       mercury-reviews at csse.unimelb.edu.au
Administrative Queries: owner-mercury-reviews at csse.unimelb.edu.au
Subscriptions:          mercury-reviews-request at csse.unimelb.edu.au
--------------------------------------------------------------------------



More information about the reviews mailing list