[mercury-users] lex bug?

Peter Ross pro at missioncriticalit.com
Wed Feb 25 23:22:36 AEDT 2004


The attached file parse.m appears to highlight a bug where it lexes the
string "0+1" as

    [integer(0), (+), eof]

while "0+1\n" lexes correctly as 

    [integer(0), (+), integer(1), eof]

Is this a bug or a misunderstanding on my part?

-- 
Peter Ross		
Software Engineer                                (Work)   +32 2 757 10 15
Mission Critical                                 (Mobile) +32 485 482 559
-------------- next part --------------
%----------------------------------------------------------------------------- %
%----------------------------------------------------------------------------- %

:- module parse.

:- interface.

:- import_module io.

:- pred main(io::di, io::uo) is det.

%----------------------------------------------------------------------------- %
%----------------------------------------------------------------------------- %

:- implementation.

:- import_module std_util, string, int, float, exception, list, require.
:- import_module lex.

:- type token
	---> 	real(float)
	;       integer(int)
	;	('+')
	;	space
	;	eof
	.
%----------------------------------------------------------------------------- %

main(!IO) :-
	io__read_line_as_string(Result, !IO),
	( Result = ok(String) ->
		% Removing the trailing "\n" seems to break the parsing
		% of integers at the of the string.
		parse(string__chomp(String), Expr),

	        io__write(Expr, !IO),
	        io__nl(!IO),

		main(!IO)
	;
		true
	).

%----------------------------------------------------------------------------- %

:- pred parse(string::in, list(token)::out) is det.

parse(String, Tokens) :-
	copy(String, UniqString),
	Lexer  = lex__init(lexemes, read_from_string, ignore(space)),
	State0 = lex__start(Lexer, UniqString),
	tokenize(Tokens, State0, State),
	_ = lex__stop(State).

:- pred tokenize(list(token)::out, lexer_state(token, string)::di,
		lexer_state(token, string)::uo) is det.

tokenize(Tokens, !State) :-
	lex__read(Result, !State),
	( Result = ok(Token),
		tokenize(RemainingTokens, !State),
		Tokens = [Token | RemainingTokens]
	; Result = error(ErrString, _ErrLineNumber),
		error(ErrString)
	; Result = eof,
		Tokens = [eof]
	).

%----------------------------------------------------------------------------- %

:- func lexemes = list(lexeme(token)).

lexemes = [
	( nat		-> (func(Match) = integer(det_to_int(Match))) ),
	( real		-> (func(Match) = real(det_string_to_float(Match))) ),

	( "+"		-> return('+') ),
	( whitespace	-> return(space) )
].

:- func det_string_to_float(string) = float.

det_string_to_float(String) =
    ( if   string__to_float(String, Float)
      then Float
      else throw("error in float conversion")
    ).

%----------------------------------------------------------------------------- %
%----------------------------------------------------------------------------- %


More information about the users mailing list