[mercury-users] lex bug?
Peter Ross
pro at missioncriticalit.com
Wed Feb 25 23:22:36 AEDT 2004
The attached file parse.m appears to highlight a bug where it lexes the
string "0+1" as
[integer(0), (+), eof]
while "0+1\n" lexes correctly as
[integer(0), (+), integer(1), eof]
Is this a bug or a misunderstanding on my part?
--
Peter Ross
Software Engineer (Work) +32 2 757 10 15
Mission Critical (Mobile) +32 485 482 559
-------------- next part --------------
%----------------------------------------------------------------------------- %
%----------------------------------------------------------------------------- %
:- module parse.
:- interface.
:- import_module io.
:- pred main(io::di, io::uo) is det.
%----------------------------------------------------------------------------- %
%----------------------------------------------------------------------------- %
:- implementation.
:- import_module std_util, string, int, float, exception, list, require.
:- import_module lex.
:- type token
---> real(float)
; integer(int)
; ('+')
; space
; eof
.
%----------------------------------------------------------------------------- %
main(!IO) :-
io__read_line_as_string(Result, !IO),
( Result = ok(String) ->
% Removing the trailing "\n" seems to break the parsing
% of integers at the of the string.
parse(string__chomp(String), Expr),
io__write(Expr, !IO),
io__nl(!IO),
main(!IO)
;
true
).
%----------------------------------------------------------------------------- %
:- pred parse(string::in, list(token)::out) is det.
parse(String, Tokens) :-
copy(String, UniqString),
Lexer = lex__init(lexemes, read_from_string, ignore(space)),
State0 = lex__start(Lexer, UniqString),
tokenize(Tokens, State0, State),
_ = lex__stop(State).
:- pred tokenize(list(token)::out, lexer_state(token, string)::di,
lexer_state(token, string)::uo) is det.
tokenize(Tokens, !State) :-
lex__read(Result, !State),
( Result = ok(Token),
tokenize(RemainingTokens, !State),
Tokens = [Token | RemainingTokens]
; Result = error(ErrString, _ErrLineNumber),
error(ErrString)
; Result = eof,
Tokens = [eof]
).
%----------------------------------------------------------------------------- %
:- func lexemes = list(lexeme(token)).
lexemes = [
( nat -> (func(Match) = integer(det_to_int(Match))) ),
( real -> (func(Match) = real(det_string_to_float(Match))) ),
( "+" -> return('+') ),
( whitespace -> return(space) )
].
:- func det_string_to_float(string) = float.
det_string_to_float(String) =
( if string__to_float(String, Float)
then Float
else throw("error in float conversion")
).
%----------------------------------------------------------------------------- %
%----------------------------------------------------------------------------- %
More information about the users
mailing list