[m-rev.] Small, but significant, change to lex
Ralph Becket
rafe at cs.mu.OZ.AU
Tue Jan 22 16:04:41 AEDT 2002
Estimated hours taken: 1.5
Branches: main
extras/lex/lex.m:
Changed the lexing behaviour so that in ambiguous cases, the token
returned is that for the first competing lexeme given in the call
to init/[2,3]. This brings lex.m in line with the standard C lex.
Also included minor syntactic clean-ups.
Index: lex.m
===================================================================
RCS file: /home/mercury1/repository/mercury/extras/lex/lex.m,v
retrieving revision 1.2
diff -u -r1.2 lex.m
--- lex.m 4 Oct 2001 07:46:04 -0000 1.2
+++ lex.m 22 Jan 2002 05:00:45 -0000
@@ -143,6 +143,10 @@
% Construct a lexer from which we can generate running
% instances.
%
+ % NOTE: If several lexemes match the same string only
+ % the token generated by the one closest to the start
+ % of the list of lexemes is returned.
+ %
:- func init(list(lexeme(Tok)), read_pred(Src)) = lexer(Tok, Src).
:- mode init(in, in(read_pred)) = out(lexer) is det.
@@ -150,6 +154,10 @@
% instances. If we construct a lexer with init/4, we
% can additionally ignore specific tokens.
%
+ % NOTE: If several lexemes match the same string only
+ % the token generated by the one closest to the start
+ % of the list of lexemes is returned.
+ %
:- func init(list(lexeme(Tok)), read_pred(Src), ignore_pred(Tok)) =
lexer(Tok, Src).
:- mode init(in, in(read_pred), in(ignore_pred)) = out(lexer) is det.
@@ -240,8 +248,8 @@
:- type winner(Token)
== maybe(pair(token_creator(Token), offset)).
:- inst winner
- ---> yes(pair(token_creator, ground))
- ; no.
+ ---> yes(pair(token_creator, ground))
+ ; no.
%------------------------------------------------------------------------------%
@@ -261,7 +269,8 @@
DontIgnoreAnything = ( pred(_::in) is semidet :- semidet_fail ).
init(Lexemes, BufReadPred, IgnorePred) =
- lexer(CompiledLexemes, IgnorePred, BufReadPred) :-
+ lexer(CompiledLexemes, IgnorePred, BufReadPred)
+ :-
CompiledLexemes = list__map(compile_lexeme, Lexemes).
%------------------------------------------------------------------------------%
@@ -436,6 +445,10 @@
%------------------------------------------------------------------------------%
+ % Note that in the case where two or more lexemes match the same
+ % string, the win is given to the earliest such lexeme in the list.
+ % This matches the behaviour of standard C lex.
+ %
:- pred advance_live_lexemes(char, offset,
list(live_lexeme(Token)), list(live_lexeme(Token)),
winner(Token), winner(Token)).
@@ -457,7 +470,11 @@
Winner1 = Winner0
;
IsAccepting = yes,
- Winner1 = yes(ATok - Offset)
+ Winner1 = ( if Winner0 = yes(_ATok0 - Offset0),
+ Offset = Offset0
+ then Winner0
+ else yes(ATok - Offset)
+ )
),
advance_live_lexemes(Char, Offset, Ls0, Ls1, Winner1, Winner),
Ls = [( L0 ^ state := State ) | Ls1]
@@ -520,7 +537,7 @@
P(Src0, Src),
State = args_lexer_state(Instance, Buf, Src).
-% -----------------------------------------------------------------------------%
+%------------------------------------------------------------------------------%
read_from_stdin(_Offset, Result) -->
io__read_char(IOResult),
@@ -529,14 +546,13 @@
; IOResult = error(_E), throw(IOResult)
}.
-% -----------------------------------------------------------------------------%
+%------------------------------------------------------------------------------%
read_from_string(Offset, Result, String, String) :-
( if Offset < string__length(String)
then Result = ok(string__unsafe_index(String, Offset))
else Result = eof
).
-
%------------------------------------------------------------------------------%
% The type of regular expressions.
--------------------------------------------------------------------------
mercury-reviews mailing list
post: mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------
More information about the reviews
mailing list