[m-rev.] diff: fix lexer two-char pushback bug
Fergus Henderson
fjh at cs.mu.OZ.AU
Sat Feb 15 05:04:45 AEDT 2003
Estimated hours taken: 1
Branches: main
library/lexer.m:
Fix a bug where it was relying on being able to push back two
characters in a row, which is more than io__putback_char guarantees.
This bug broke things in grade `il', which only supports exactly
one byte of pushback.
Unfortunately the fix required adding a new kind of token to the
token type, changing the interface. The new token is only used
internally within lexer.m -- it gets expanded before being returned.
So hopefully this won't cause too much in the way of backwards
compatibility problems.
library/parser.m:
Update to handle the new token.
Workspace: /home/fjh/ws/hermes
Index: library/lexer.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/lexer.m,v
retrieving revision 1.36
diff -u -d -r1.36 lexer.m
--- library/lexer.m 9 Jul 2002 01:30:18 -0000 1.36
+++ library/lexer.m 14 Feb 2003 17:47:57 -0000
@@ -38,7 +38,15 @@
; junk(char) % junk character in the input stream
; error(string) % some other invalid token
; io_error(io__error) % error reading from the input stream
- ; eof. % end-of-file
+ ; eof % end-of-file
+ ; integer_dot(int). % the lexer will never return this.
+ % The integer_dot/1 token is used
+ % internally in the lexer, to keep
+ % the grammar LL(1) so that only one
+ % character of pushback is needed.
+ % But the lexer will convert
+ % integer_dot/1 tokens to integer/1
+ % tokens before returning them.
% For every token, we record the line number of the line on
% which the token occurred.
@@ -158,6 +166,9 @@
string__append("I/O error: ", IO_ErrorMessage, String).
lexer__token_to_string(error(Message), String) :-
string__append_list(["illegal token (", Message, ")"], String).
+lexer__token_to_string(integer_dot(Int), String) :-
+ string__int_to_string(Int, IntString),
+ string__append_list(["integer `", IntString, "'."], String).
% We build the tokens up as lists of characters in reverse order.
% When we get to the end of each token, we call
@@ -171,13 +182,25 @@
lexer__get_token_list(Tokens) -->
lexer__get_token(Token, Context),
- ( { Token = eof } ->
+ lexer__get_token_list_2(Token, Context, Tokens).
+
+:- pred lexer__get_token_list_2(token, token_context, token_list,
+ io__state, io__state).
+:- mode lexer__get_token_list_2(in, in, out, di, uo) is det.
+lexer__get_token_list_2(Token0, Context0, Tokens) -->
+ ( { Token0 = eof } ->
{ Tokens = token_nil }
- ; { Token = end ; Token = error(_) ; Token = io_error(_) } ->
- { Tokens = token_cons(Token, Context, token_nil) }
+ ; { Token0 = end ; Token0 = error(_) ; Token0 = io_error(_) } ->
+ { Tokens = token_cons(Token0, Context0, token_nil) }
+ ; { Token0 = integer_dot(Int) } ->
+ lexer__get_context(Context1),
+ lexer__get_dot(Token1),
+ lexer__get_token_list_2(Token1, Context1, Tokens1),
+ { Tokens = token_cons(integer(Int), Context0, Tokens1) }
;
- { Tokens = token_cons(Token, Context, Tokens1) },
- lexer__get_token_list(Tokens1)
+ lexer__get_token(Token1, Context1),
+ lexer__get_token_list_2(Token1, Context1, Tokens1),
+ { Tokens = token_cons(Token0, Context0, Tokens1) }
).
lexer__string_get_token_list(String, Tokens) -->
@@ -1741,8 +1764,17 @@
lexer__get_float_decimals([Char, '.' | Chars], Token)
;
io__putback_char(Char),
- io__putback_char('.'),
- { lexer__rev_char_list_to_int(Chars, 10, Token) }
+ % We can't putback the ".", because io__putback_char
+ % only guarantees one character of pushback.
+ % So instead, we return an `integer_dot' token;
+ % the main loop of lexer__get_token_list_2 will
+ % handle this appropriately.
+ { lexer__rev_char_list_to_int(Chars, 10, Token0) },
+ { Token0 = integer(Int) ->
+ Token = integer_dot(Int)
+ ;
+ Token = Token0
+ }
)
).
Index: library/parser.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/parser.m,v
retrieving revision 1.39
diff -u -d -r1.39 parser.m
--- library/parser.m 12 Feb 2003 04:58:24 -0000 1.39
+++ library/parser.m 14 Feb 2003 17:47:57 -0000
@@ -866,6 +866,7 @@
parser__could_start_term(error(_), no).
parser__could_start_term(io_error(_), no).
parser__could_start_term(eof, no).
+parser__could_start_term(integer_dot(_), no).
%-----------------------------------------------------------------------------%
--
Fergus Henderson <fjh at cs.mu.oz.au> | "I have always known that the pursuit
The University of Melbourne | of excellence is a lethal habit"
WWW: <http://www.cs.mu.oz.au/~fjh> | -- the last words of T. S. Garp.
--------------------------------------------------------------------------
mercury-reviews mailing list
post: mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------
More information about the reviews
mailing list