[m-rev.] Small, but significant, change to lex

Ralph Becket rafe at cs.mu.OZ.AU
Tue Jan 22 16:04:41 AEDT 2002


Estimated hours taken: 1.5
Branches: main

extras/lex/lex.m:
	Changed the lexing behaviour so that in ambiguous cases, the token
	returned is that for the first competing lexeme given in the call
	to init/[2,3].  This brings lex.m in line with the standard C lex.

	Also included minor syntactic clean-ups.

Index: lex.m
===================================================================
RCS file: /home/mercury1/repository/mercury/extras/lex/lex.m,v
retrieving revision 1.2
diff -u -r1.2 lex.m
--- lex.m	4 Oct 2001 07:46:04 -0000	1.2
+++ lex.m	22 Jan 2002 05:00:45 -0000
@@ -143,6 +143,10 @@
     % Construct a lexer from which we can generate running
     % instances.
     %
+    % NOTE: If several lexemes match the same string only
+    % the token generated by the one closest to the start
+    % of the list of lexemes is returned.
+    %
 :- func init(list(lexeme(Tok)), read_pred(Src)) = lexer(Tok, Src).
 :- mode init(in, in(read_pred)) = out(lexer) is det.
 
@@ -150,6 +154,10 @@
     % instances. If we construct a lexer with init/4, we
     % can additionally ignore specific tokens.
     %
+    % NOTE: If several lexemes match the same string only
+    % the token generated by the one closest to the start
+    % of the list of lexemes is returned.
+    %
 :- func init(list(lexeme(Tok)), read_pred(Src), ignore_pred(Tok)) =
             lexer(Tok, Src).
 :- mode init(in, in(read_pred), in(ignore_pred)) = out(lexer) is det.
@@ -240,8 +248,8 @@
 :- type winner(Token)
     ==      maybe(pair(token_creator(Token), offset)).
 :- inst winner
-    ---> yes(pair(token_creator, ground))
-    ;    no.
+    --->    yes(pair(token_creator, ground))
+    ;       no.
 
 %------------------------------------------------------------------------------%
 
@@ -261,7 +269,8 @@
     DontIgnoreAnything = ( pred(_::in) is semidet :- semidet_fail ).
 
 init(Lexemes, BufReadPred, IgnorePred) =
-    lexer(CompiledLexemes, IgnorePred, BufReadPred) :-
+    lexer(CompiledLexemes, IgnorePred, BufReadPred)
+ :-
     CompiledLexemes = list__map(compile_lexeme, Lexemes).
 
 %------------------------------------------------------------------------------%
@@ -436,6 +445,10 @@
 
 %------------------------------------------------------------------------------%
 
+    % Note that in the case where two or more lexemes match the same
+    % string, the win is given to the earliest such lexeme in the list.
+    % This matches the behaviour of standard C lex.
+    %
 :- pred advance_live_lexemes(char, offset,
             list(live_lexeme(Token)), list(live_lexeme(Token)),
             winner(Token), winner(Token)).
@@ -457,7 +470,11 @@
             Winner1     = Winner0
         ;
             IsAccepting = yes,
-            Winner1     = yes(ATok - Offset)
+            Winner1     = ( if   Winner0 = yes(_ATok0 - Offset0),
+                                 Offset  = Offset0
+                            then Winner0
+                            else yes(ATok - Offset)
+                          )
         ),
         advance_live_lexemes(Char, Offset, Ls0, Ls1, Winner1, Winner),
         Ls = [( L0 ^ state := State ) | Ls1]
@@ -520,7 +537,7 @@
     P(Src0, Src),
     State = args_lexer_state(Instance, Buf, Src).
 
-% -----------------------------------------------------------------------------%
+%------------------------------------------------------------------------------%
 
 read_from_stdin(_Offset, Result) -->
     io__read_char(IOResult),
@@ -529,14 +546,13 @@
     ;   IOResult = error(_E),             throw(IOResult)
     }.
 
-% -----------------------------------------------------------------------------%
+%------------------------------------------------------------------------------%
 
 read_from_string(Offset, Result, String, String) :-
     ( if   Offset < string__length(String)
       then Result = ok(string__unsafe_index(String, Offset))
       else Result = eof
     ).
-
 
 %------------------------------------------------------------------------------%
 % The type of regular expressions.
--------------------------------------------------------------------------
mercury-reviews mailing list
post:  mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe:   Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------



More information about the reviews mailing list