[m-rev.] diff: fix bug with carriage-return and gcc >= 2.96
Fergus Henderson
fjh at cs.mu.OZ.AU
Tue Aug 6 09:18:07 AEST 2002
Estimated hours taken: 2
Branches: main
Fix a bug where we were generating C code that contained special
characters in string literals. This generated code was relying on
the implementation-specific behaviour of GCC, and unfortunately
that behaviour changed in GCC versions 2.96 and later.
The symptom was that printing "\r\n" came out as "\n\n"
when using GCC versions >= 2.96.
compiler/c_util.m:
Change the code used to implement quote_char, quote_string,
and quote_multi_string so that these routines properly escape
all special characters, rather than just \" \' \n \b and \t.
(This required changing the output argument type for quote_char
from a character to a string.)
Add output_quoted_char, for use by layout_out.m.
compiler/layout_out.m:
Use c_util__output_quoted_char, rather than duplicating the
logic in c_util.m.
tests/hard_coded/Mmakefile:
tests/hard_coded/special_char.m:
tests/hard_coded/special_char.exp:
Regression test.
tests/hard_coded/Mmakefile:
tests/hard_coded/null_char.m:
tests/hard_coded/null_char.exp:
tests/hard_coded/null_char.exp2:
Add a test of outputting strings containing null characters.
Note that currently we don't handle this correctly;
we ignore everything after the first null character.
So the ".exp2" file for this test case allows that output.
If/when this is fixed, the ".exp2" file for this
test case should be removed.
Workspace: /home/ceres/fjh/ws-ceres1/mercury
Index: compiler/c_util.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/c_util.m,v
retrieving revision 1.10
diff -u -d -r1.10 c_util.m
--- compiler/c_util.m 20 Mar 2002 12:35:52 -0000 1.10
+++ compiler/c_util.m 5 Aug 2002 21:54:42 -0000
@@ -49,6 +49,12 @@
io__state, io__state).
:- mode c_util__output_quoted_multi_string(in, in, di, uo) is det.
+ % Print out a char suitably escaped for use as a C char literal.
+ % This doesn't actually print out the enclosing single quotes --
+ % that is the caller's responsibility.
+:- pred c_util__output_quoted_char(char, io__state, io__state).
+:- mode c_util__output_quoted_char(in, di, uo) is det.
+
% Convert a string to a form that is suitably escaped for use as a
% C string literal. This doesn't actually add the enclosing double
% quotes -- that is the caller's responsibility.
@@ -56,10 +62,10 @@
:- mode c_util__quote_string(in, out) is det.
% Convert a character to a form that is suitably escaped for use as a
- % C character literal. This doesn't actually add the enclosing double
+ % C character literal. This doesn't actually add the enclosing single
% quotes -- that is the caller's responsibility.
-:- pred c_util__quote_char(char, char).
-:- mode c_util__quote_char(in, out) is semidet.
+:- pred c_util__quote_char(char, string).
+:- mode c_util__quote_char(in, out) is det.
%-----------------------------------------------------------------------------%
%
@@ -176,34 +182,80 @@
% we must use unsafe index, because we want to be able
% to access chars beyond the first NUL
{ string__unsafe_index(S, Cur, Char) },
- ( { char__to_int(Char, 0) } ->
- io__write_string("\\0")
- ; { c_util__quote_char(Char, QuoteChar) } ->
- io__write_char('\\'),
- io__write_char(QuoteChar)
- ;
- io__write_char(Char)
- ),
+ c_util__output_quoted_char(Char),
output_quoted_multi_string_2(Cur + 1, Len, S)
;
[]
).
+c_util__output_quoted_char(Char) -->
+ { c_util__quote_char(Char, EscapedChars) },
+ io__write_string(EscapedChars).
+
+c_util__quote_char(Char, QuotedChar) :-
+ c_util__quote_one_char(Char, [], RevQuotedChar),
+ string__from_rev_char_list(RevQuotedChar, QuotedChar).
+
c_util__quote_string(String, QuotedString) :-
- QuoteOneChar = (pred(Char::in, RevChars0::in, RevChars::out) is det :-
- ( c_util__quote_char(Char, QuoteChar) ->
- RevChars = [QuoteChar, '\\' | RevChars0]
- ;
- RevChars = [Char | RevChars0]
- )),
- string__foldl(QuoteOneChar, String, [], RevQuotedChars),
+ string__foldl(c_util__quote_one_char, String, [], RevQuotedChars),
string__from_rev_char_list(RevQuotedChars, QuotedString).
-c_util__quote_char('"', '"').
-c_util__quote_char('\\', '\\').
-c_util__quote_char('\n', 'n').
-c_util__quote_char('\t', 't').
-c_util__quote_char('\b', 'b').
+:- pred c_util__quote_one_char(char::in, list(char)::in, list(char)::out)
+ is det.
+c_util__quote_one_char(Char, RevChars0, RevChars) :-
+ ( c_util__escape_special_char(Char, EscapeChar) ->
+ RevChars = [EscapeChar, '\\' | RevChars0]
+ ; c_util__is_c_source_char(Char) ->
+ RevChars = [Char | RevChars0]
+ ; char__to_int(Char, 0) ->
+ RevChars = ['0', '\\' | RevChars0]
+ ;
+ c_util__escape_any_char(Char, EscapeChars),
+ reverse_append(EscapeChars, RevChars0, RevChars)
+ ).
+
+:- pred c_util__escape_special_char(char::in, char::out) is semidet.
+c_util__escape_special_char('"', '"').
+c_util__escape_special_char('\\', '\\').
+c_util__escape_special_char('\n', 'n').
+c_util__escape_special_char('\t', 't').
+c_util__escape_special_char('\b', 'b').
+c_util__escape_special_char('\a', 'a').
+c_util__escape_special_char('\v', 'v').
+c_util__escape_special_char('\r', 'r').
+c_util__escape_special_char('\f', 'f').
+
+% This succeeds iff the specified character is allowed as an (unescaped)
+% character in standard-conforming C source code.
+:- pred c_util__is_c_source_char(char::in) is semidet.
+c_util__is_c_source_char(Char) :-
+ ( char__is_alnum(Char)
+ ; string__contains_char(c_graphic_chars, Char)
+ ).
+
+% This returns a string containing all the characters that the C standard
+% specifies as being included in the "basic execution character set",
+% except for the letters (a-z A-Z) and digits (0-9).
+:- func c_graphic_chars = string.
+c_graphic_chars = " !\"#%&'()*+,-./:;<=>?[\\]^_{|}~".
+
+
+ % reverse_append(Xs, Ys, Zs) <=> Zs = list__reverse(Xs) ++ Ys.
+:- pred reverse_append(list(T), list(T), list(T)).
+:- mode reverse_append(in, in, out) is det.
+reverse_append([], L, L).
+reverse_append([X|Xs], L0, L) :-
+ reverse_append(Xs, [X|L0], L).
+
+:- pred escape_any_char(char, list(char)).
+:- mode escape_any_char(in, out) is det.
+
+ % Convert a character to the corresponding C octal escape code.
+escape_any_char(Char, EscapeCodeChars) :-
+ char__to_int(Char, Int),
+ string__int_to_base_string(Int, 8, OctalString0),
+ string__pad_left(OctalString0, '0', 3, OctalString),
+ EscapeCodeChars = ['\\' | string__to_char_list(OctalString)].
%-----------------------------------------------------------------------------%
Index: compiler/layout_out.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/layout_out.m,v
retrieving revision 1.12
diff -u -d -r1.12 layout_out.m
--- compiler/layout_out.m 26 Jul 2002 04:18:16 -0000 1.12
+++ compiler/layout_out.m 5 Aug 2002 10:05:55 -0000
@@ -966,14 +966,7 @@
),
{ string__unsafe_index(String, CurIndex, Char) },
io__write_char(''''),
- ( { char__to_int(Char, 0) } ->
- io__write_string("\\0")
- ; { c_util__quote_char(Char, QuoteChar) } ->
- io__write_char('\\'),
- io__write_char(QuoteChar)
- ;
- io__write_char(Char)
- ),
+ c_util__output_quoted_char(Char),
io__write_char(''''),
( { CurIndex < MaxIndex } ->
io__write_string(", "),
Index: tests/hard_coded/Mmakefile
===================================================================
RCS file: /home/mercury1/repository/tests/hard_coded/Mmakefile,v
retrieving revision 1.160
diff -u -d -r1.160 Mmakefile
--- tests/hard_coded/Mmakefile 30 Jul 2002 04:29:44 -0000 1.160
+++ tests/hard_coded/Mmakefile 5 Aug 2002 22:24:23 -0000
@@ -101,6 +101,7 @@
nondet_ctrl_vn \
nondet_copy_out \
nullary_ho_func \
+ null_char \
pprint_test \
pragma_c_code \
pragma_export \
@@ -127,6 +128,7 @@
shift_test \
solve_quadratic \
space \
+ special_char \
split_c_files \
string_alignment \
string_alignment_bug \
Index: tests/hard_coded/null_char.exp
===================================================================
RCS file: tests/hard_coded/null_char.exp
diff -N tests/hard_coded/null_char.exp
Binary files /dev/null and null_char.exp differ
Index: tests/hard_coded/null_char.exp2
===================================================================
RCS file: tests/hard_coded/null_char.exp2
diff -N tests/hard_coded/null_char.exp2
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ tests/hard_coded/null_char.exp2 5 Aug 2002 22:15:33 -0000
@@ -0,0 +1 @@
+text before null before some more
Index: tests/hard_coded/null_char.m
===================================================================
RCS file: tests/hard_coded/null_char.m
diff -N tests/hard_coded/null_char.m
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ tests/hard_coded/null_char.m 5 Aug 2002 23:12:54 -0000
@@ -0,0 +1,22 @@
+% Test output of strings containing null characters
+
+% XXX Note that currently we don't handle this correctly;
+% we ignore everything after the first null character.
+% So the ".exp2" file for this test case allows that output.
+% If/when this is fixed, the ".exp2" file for this
+% test case should be removed.
+
+:- module null_char.
+:- interface.
+:- import_module io.
+
+:- pred main(io__state::di, io__state::uo) is det.
+
+:- implementation.
+:- import_module string.
+
+main -->
+ print("text before null \0 text after null character\n"),
+ { Foo = "before\0&after", Bar = " some more\0&more" },
+ print(Foo ++ Bar),
+ print("\n").
Index: tests/hard_coded/special_char.exp
===================================================================
RCS file: tests/hard_coded/special_char.exp
diff -N tests/hard_coded/special_char.exp
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ tests/hard_coded/special_char.exp 5 Aug 2002 22:14:14 -0000
@@ -0,0 +1,4 @@
+Hello world
+
+
+?SÑ
Index: tests/hard_coded/special_char.m
===================================================================
RCS file: tests/hard_coded/special_char.m
diff -N tests/hard_coded/special_char.m
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ tests/hard_coded/special_char.m 5 Aug 2002 22:13:30 -0000
@@ -0,0 +1,18 @@
+% Test output of special characters such as \r
+
+:- module special_char.
+:- interface.
+:- import_module io.
+
+:- pred main(io__state::di, io__state::uo) is det.
+
+:- implementation.
+
+main -->
+ print("Hello world\r\n"),
+ print("\r\n"),
+ print("\a\b\v\f\t\n"),
+ print("\077"),
+ print("\0123"),
+ print("\0321"),
+ print("\n").
--
Fergus Henderson <fjh at cs.mu.oz.au> | "I have always known that the pursuit
The University of Melbourne | of excellence is a lethal habit"
WWW: <http://www.cs.mu.oz.au/~fjh> | -- the last words of T. S. Garp.
--------------------------------------------------------------------------
mercury-reviews mailing list
post: mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------
More information about the reviews
mailing list