[m-rev.] diff: fix bug with carriage-return and gcc >= 2.96

Fergus Henderson fjh at cs.mu.OZ.AU
Tue Aug 6 09:18:07 AEST 2002


Estimated hours taken: 2
Branches: main

Fix a bug where we were generating C code that contained special
characters in string literals.  This generated code was relying on
the implementation-specific behaviour of GCC, and unfortunately
that behaviour changed in GCC versions 2.96 and later.
The symptom was that printing "\r\n" came out as "\n\n"
when using GCC versions >= 2.96.

compiler/c_util.m:
	Change the code used to implement quote_char, quote_string,
	and quote_multi_string so that these routines properly escape
	all special characters, rather than just \" \' \n \b and \t.
	(This required changing the output argument type for quote_char
	from a character to a string.)
	Add output_quoted_char, for use by layout_out.m.

compiler/layout_out.m:
	Use c_util__output_quoted_char, rather than duplicating the
	logic in c_util.m.

tests/hard_coded/Mmakefile:
tests/hard_coded/special_char.m:
tests/hard_coded/special_char.exp:
	Regression test.

tests/hard_coded/Mmakefile:
tests/hard_coded/null_char.m:
tests/hard_coded/null_char.exp:
tests/hard_coded/null_char.exp2:
	Add a test of outputting strings containing null characters.
	Note that currently we don't handle this correctly;
	we ignore everything after the first null character.
	So the ".exp2" file for this test case allows that output.
	If/when this is fixed, the ".exp2" file for this
	test case should be removed.

Workspace: /home/ceres/fjh/ws-ceres1/mercury
Index: compiler/c_util.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/c_util.m,v
retrieving revision 1.10
diff -u -d -r1.10 c_util.m
--- compiler/c_util.m	20 Mar 2002 12:35:52 -0000	1.10
+++ compiler/c_util.m	5 Aug 2002 21:54:42 -0000
@@ -49,6 +49,12 @@
 		io__state, io__state).
 :- mode c_util__output_quoted_multi_string(in, in, di, uo) is det.
 
+	% Print out a char suitably escaped for use as a C char literal.
+	% This doesn't actually print out the enclosing single quotes --
+	% that is the caller's responsibility.
+:- pred c_util__output_quoted_char(char, io__state, io__state).
+:- mode c_util__output_quoted_char(in, di, uo) is det.
+
 	% Convert a string to a form that is suitably escaped for use as a
 	% C string literal.  This doesn't actually add the enclosing double
 	% quotes -- that is the caller's responsibility.
@@ -56,10 +62,10 @@
 :- mode c_util__quote_string(in, out) is det.
 
 	% Convert a character to a form that is suitably escaped for use as a
-	% C character literal.  This doesn't actually add the enclosing double
+	% C character literal.  This doesn't actually add the enclosing single
 	% quotes -- that is the caller's responsibility.
-:- pred c_util__quote_char(char, char).
-:- mode c_util__quote_char(in, out) is semidet.
+:- pred c_util__quote_char(char, string).
+:- mode c_util__quote_char(in, out) is det.
 
 %-----------------------------------------------------------------------------%
 %
@@ -176,34 +182,80 @@
 			% we must use unsafe index, because we want to be able
 			% to access chars beyond the first NUL
 		{ string__unsafe_index(S, Cur, Char) },
-		( { char__to_int(Char, 0) } ->
-			io__write_string("\\0")
-		; { c_util__quote_char(Char, QuoteChar) } ->
-			io__write_char('\\'),
-			io__write_char(QuoteChar)
-		;
-			io__write_char(Char)
-		),
+		c_util__output_quoted_char(Char),
 		output_quoted_multi_string_2(Cur + 1, Len, S)
 	;
 		[]
 	).
 
+c_util__output_quoted_char(Char) -->
+	{ c_util__quote_char(Char, EscapedChars) },
+	io__write_string(EscapedChars).
+
+c_util__quote_char(Char, QuotedChar) :-
+	c_util__quote_one_char(Char, [], RevQuotedChar),
+	string__from_rev_char_list(RevQuotedChar, QuotedChar).
+
 c_util__quote_string(String, QuotedString) :-
-	QuoteOneChar = (pred(Char::in, RevChars0::in, RevChars::out) is det :-
-		( c_util__quote_char(Char, QuoteChar) ->
-			RevChars = [QuoteChar, '\\' | RevChars0]
-		;
-			RevChars = [Char | RevChars0]
-		)),
-	string__foldl(QuoteOneChar, String, [], RevQuotedChars),
+	string__foldl(c_util__quote_one_char, String, [], RevQuotedChars),
 	string__from_rev_char_list(RevQuotedChars, QuotedString).
 
-c_util__quote_char('"', '"').
-c_util__quote_char('\\', '\\').
-c_util__quote_char('\n', 'n').
-c_util__quote_char('\t', 't').
-c_util__quote_char('\b', 'b').
+:- pred c_util__quote_one_char(char::in, list(char)::in, list(char)::out)
+	is det.
+c_util__quote_one_char(Char, RevChars0, RevChars) :-
+	( c_util__escape_special_char(Char, EscapeChar) ->
+		RevChars = [EscapeChar, '\\' | RevChars0]
+	; c_util__is_c_source_char(Char) ->
+		RevChars = [Char | RevChars0]
+	; char__to_int(Char, 0) ->
+		RevChars = ['0', '\\' | RevChars0]
+	;
+		c_util__escape_any_char(Char, EscapeChars),
+		reverse_append(EscapeChars, RevChars0, RevChars)
+	).
+
+:- pred c_util__escape_special_char(char::in, char::out) is semidet.
+c_util__escape_special_char('"', '"').
+c_util__escape_special_char('\\', '\\').
+c_util__escape_special_char('\n', 'n').
+c_util__escape_special_char('\t', 't').
+c_util__escape_special_char('\b', 'b').
+c_util__escape_special_char('\a', 'a').
+c_util__escape_special_char('\v', 'v').
+c_util__escape_special_char('\r', 'r').
+c_util__escape_special_char('\f', 'f').
+
+% This succeeds iff the specified character is allowed as an (unescaped)
+% character in standard-conforming C source code.
+:- pred c_util__is_c_source_char(char::in) is semidet.
+c_util__is_c_source_char(Char) :-
+	( char__is_alnum(Char)
+	; string__contains_char(c_graphic_chars, Char)
+	).
+
+% This returns a string containing all the characters that the C standard
+% specifies as being included in the "basic execution character set",
+% except for the letters (a-z A-Z) and digits (0-9).
+:- func c_graphic_chars = string.
+c_graphic_chars = " !\"#%&'()*+,-./:;<=>?[\\]^_{|}~".
+
+
+	% reverse_append(Xs, Ys, Zs) <=> Zs = list__reverse(Xs) ++ Ys.
+:- pred reverse_append(list(T), list(T), list(T)).
+:- mode reverse_append(in, in, out) is det.
+reverse_append([], L, L).
+reverse_append([X|Xs], L0, L) :-
+	reverse_append(Xs, [X|L0], L).
+
+:- pred escape_any_char(char, list(char)).
+:- mode escape_any_char(in, out) is det.
+
+        % Convert a character to the corresponding C octal escape code.
+escape_any_char(Char, EscapeCodeChars) :-
+        char__to_int(Char, Int),
+        string__int_to_base_string(Int, 8, OctalString0),
+        string__pad_left(OctalString0, '0', 3, OctalString),
+        EscapeCodeChars = ['\\' | string__to_char_list(OctalString)].
 
 %-----------------------------------------------------------------------------%
 
Index: compiler/layout_out.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/layout_out.m,v
retrieving revision 1.12
diff -u -d -r1.12 layout_out.m
--- compiler/layout_out.m	26 Jul 2002 04:18:16 -0000	1.12
+++ compiler/layout_out.m	5 Aug 2002 10:05:55 -0000
@@ -966,14 +966,7 @@
 	),
 	{ string__unsafe_index(String, CurIndex, Char) },
 	io__write_char(''''),
-	( { char__to_int(Char, 0) } ->
-		io__write_string("\\0")
-	; { c_util__quote_char(Char, QuoteChar) } ->
-		io__write_char('\\'),
-		io__write_char(QuoteChar)
-	;
-		io__write_char(Char)
-	),
+	c_util__output_quoted_char(Char),
 	io__write_char(''''),
 	( { CurIndex < MaxIndex } ->
 		io__write_string(", "),
Index: tests/hard_coded/Mmakefile
===================================================================
RCS file: /home/mercury1/repository/tests/hard_coded/Mmakefile,v
retrieving revision 1.160
diff -u -d -r1.160 Mmakefile
--- tests/hard_coded/Mmakefile	30 Jul 2002 04:29:44 -0000	1.160
+++ tests/hard_coded/Mmakefile	5 Aug 2002 22:24:23 -0000
@@ -101,6 +101,7 @@
 	nondet_ctrl_vn \
 	nondet_copy_out \
 	nullary_ho_func \
+	null_char \
 	pprint_test \
 	pragma_c_code \
 	pragma_export \
@@ -127,6 +128,7 @@
 	shift_test \
 	solve_quadratic \
 	space \
+	special_char \
 	split_c_files \
 	string_alignment \
 	string_alignment_bug \
Index: tests/hard_coded/null_char.exp
===================================================================
RCS file: tests/hard_coded/null_char.exp
diff -N tests/hard_coded/null_char.exp
Binary files /dev/null and null_char.exp differ
Index: tests/hard_coded/null_char.exp2
===================================================================
RCS file: tests/hard_coded/null_char.exp2
diff -N tests/hard_coded/null_char.exp2
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ tests/hard_coded/null_char.exp2	5 Aug 2002 22:15:33 -0000
@@ -0,0 +1 @@
+text before null before some more
Index: tests/hard_coded/null_char.m
===================================================================
RCS file: tests/hard_coded/null_char.m
diff -N tests/hard_coded/null_char.m
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ tests/hard_coded/null_char.m	5 Aug 2002 23:12:54 -0000
@@ -0,0 +1,22 @@
+% Test output of strings containing null characters
+
+% XXX Note that currently we don't handle this correctly;
+% we ignore everything after the first null character.
+% So the ".exp2" file for this test case allows that output.
+% If/when this is fixed, the ".exp2" file for this
+% test case should be removed.
+
+:- module null_char.
+:- interface.
+:- import_module io.
+
+:- pred main(io__state::di, io__state::uo) is det.
+
+:- implementation.
+:- import_module string.
+
+main -->
+	print("text before null \0 text after null character\n"),
+	{ Foo = "before\0&after", Bar = " some more\0&more" },
+	print(Foo ++ Bar),
+	print("\n").
Index: tests/hard_coded/special_char.exp
===================================================================
RCS file: tests/hard_coded/special_char.exp
diff -N tests/hard_coded/special_char.exp
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ tests/hard_coded/special_char.exp	5 Aug 2002 22:14:14 -0000
@@ -0,0 +1,4 @@
+Hello world
+
+	
+?SÑ
Index: tests/hard_coded/special_char.m
===================================================================
RCS file: tests/hard_coded/special_char.m
diff -N tests/hard_coded/special_char.m
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ tests/hard_coded/special_char.m	5 Aug 2002 22:13:30 -0000
@@ -0,0 +1,18 @@
+% Test output of special characters such as \r
+
+:- module special_char.
+:- interface.
+:- import_module io.
+
+:- pred main(io__state::di, io__state::uo) is det.
+
+:- implementation.
+
+main -->
+	print("Hello world\r\n"),
+	print("\r\n"),
+	print("\a\b\v\f\t\n"),
+	print("\077"),
+	print("\0123"),
+	print("\0321"),
+	print("\n").

-- 
Fergus Henderson <fjh at cs.mu.oz.au>  |  "I have always known that the pursuit
The University of Melbourne         |  of excellence is a lethal habit"
WWW: <http://www.cs.mu.oz.au/~fjh>  |     -- the last words of T. S. Garp.
--------------------------------------------------------------------------
mercury-reviews mailing list
post:  mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe:   Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------



More information about the reviews mailing list