[m-rev.] diff: fix lexer two-char pushback bug

Fergus Henderson fjh at cs.mu.OZ.AU
Sat Feb 15 05:04:45 AEDT 2003


Estimated hours taken: 1
Branches: main

library/lexer.m:
	Fix a bug where it was relying on being able to push back two
	characters in a row, which is more than io__putback_char guarantees.
	This bug broke things in grade `il', which only supports exactly
	one byte of pushback.

	Unfortunately the fix required adding a new kind of token to the
	token type, changing the interface.  The new token is only used
	internally within lexer.m -- it gets expanded before being returned.
	So hopefully this won't cause too much in the way of backwards
	compatibility problems.

library/parser.m:
	Update to handle the new token.

Workspace: /home/fjh/ws/hermes
Index: library/lexer.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/lexer.m,v
retrieving revision 1.36
diff -u -d -r1.36 lexer.m
--- library/lexer.m	9 Jul 2002 01:30:18 -0000	1.36
+++ library/lexer.m	14 Feb 2003 17:47:57 -0000
@@ -38,7 +38,15 @@
 	;	junk(char)		% junk character in the input stream
 	;	error(string)		% some other invalid token
 	;	io_error(io__error)	% error reading from the input stream
-	;	eof.			% end-of-file
+	;	eof			% end-of-file
+	;	integer_dot(int).	% the lexer will never return this.
+					% The integer_dot/1 token is used
+					% internally in the lexer, to keep
+					% the grammar LL(1) so that only one
+					% character of pushback is needed.
+					% But the lexer will convert
+					% integer_dot/1 tokens to integer/1
+					% tokens before returning them.
 
 % For every token, we record the line number of the line on
 % which the token occurred.
@@ -158,6 +166,9 @@
 	string__append("I/O error: ", IO_ErrorMessage, String).
 lexer__token_to_string(error(Message), String) :-
 	string__append_list(["illegal token (", Message, ")"], String).
+lexer__token_to_string(integer_dot(Int), String) :-
+	string__int_to_string(Int, IntString),
+	string__append_list(["integer `", IntString, "'."], String).
 
 	% We build the tokens up as lists of characters in reverse order.
 	% When we get to the end of each token, we call
@@ -171,13 +182,25 @@
 
 lexer__get_token_list(Tokens) -->
 	lexer__get_token(Token, Context),
-	( { Token = eof } ->
+	lexer__get_token_list_2(Token, Context, Tokens).
+
+:- pred lexer__get_token_list_2(token, token_context, token_list,
+		io__state, io__state).
+:- mode lexer__get_token_list_2(in, in, out, di, uo) is det.
+lexer__get_token_list_2(Token0, Context0, Tokens) -->
+	( { Token0 = eof } ->
 		{ Tokens = token_nil }
-	; { Token = end ; Token = error(_) ; Token = io_error(_) } ->
-		{ Tokens = token_cons(Token, Context, token_nil) }
+	; { Token0 = end ; Token0 = error(_) ; Token0 = io_error(_) } ->
+		{ Tokens = token_cons(Token0, Context0, token_nil) }
+	; { Token0 = integer_dot(Int) } ->
+		lexer__get_context(Context1),
+		lexer__get_dot(Token1),
+		lexer__get_token_list_2(Token1, Context1, Tokens1),
+		{ Tokens = token_cons(integer(Int), Context0, Tokens1) }
 	;
-		{ Tokens = token_cons(Token, Context, Tokens1) },
-		lexer__get_token_list(Tokens1)
+		lexer__get_token(Token1, Context1),
+		lexer__get_token_list_2(Token1, Context1, Tokens1),
+		{ Tokens = token_cons(Token0, Context0, Tokens1) }
 	).
 
 lexer__string_get_token_list(String, Tokens) -->
@@ -1741,8 +1764,17 @@
 			lexer__get_float_decimals([Char, '.' | Chars], Token)
 		;
 			io__putback_char(Char),
-			io__putback_char('.'),
-			{ lexer__rev_char_list_to_int(Chars, 10, Token) }
+			% We can't putback the ".", because io__putback_char
+			% only guarantees one character of pushback.
+			% So instead, we return an `integer_dot' token;
+			% the main loop of lexer__get_token_list_2 will
+			% handle this appropriately.
+			{ lexer__rev_char_list_to_int(Chars, 10, Token0) },
+			{ Token0 = integer(Int) ->
+				Token = integer_dot(Int)
+			;
+				Token = Token0
+			}
 		)
 	).
 
Index: library/parser.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/parser.m,v
retrieving revision 1.39
diff -u -d -r1.39 parser.m
--- library/parser.m	12 Feb 2003 04:58:24 -0000	1.39
+++ library/parser.m	14 Feb 2003 17:47:57 -0000
@@ -866,6 +866,7 @@
 parser__could_start_term(error(_), no).
 parser__could_start_term(io_error(_), no).
 parser__could_start_term(eof, no).
+parser__could_start_term(integer_dot(_), no).
 
 %-----------------------------------------------------------------------------%
 
-- 
Fergus Henderson <fjh at cs.mu.oz.au>  |  "I have always known that the pursuit
The University of Melbourne         |  of excellence is a lethal habit"
WWW: <http://www.cs.mu.oz.au/~fjh>  |     -- the last words of T. S. Garp.
--------------------------------------------------------------------------
mercury-reviews mailing list
post:  mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe:   Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------



More information about the reviews mailing list