[m-rev.] lex and moose changed

Holger Krug hkrug at rationalizer.com
Mon Jul 30 16:51:47 AEST 2001


For review and comments, especially by Ralph.

Changes concerning lex:

%----------------------------------------------------------------------------%
%
% 07/26/01 hkrug at rationalizer.com:
%    * the value contained in a token now may belong to an arbitrary 
%      Mercury type, not only string as before
%    * lexemes now may contain a function to compute an arbitrary token
%      value from the string matched
%    * the lexeme definition by the user now is more difficult,
%      because the user has to name higher order functions
%    * the advantage is that no postprocessing is needed to evaluate
%      the string, hence the interface to the consumer of the tokens is 
%      cleaner
%
%----------------------------------------------------------------------------%

Changes concerning moose:

%----------------------------------------------------------------------------%
%
% 07/24/01 hkrug at rationalizer.com:
%    * added option --unique-state/-u
%    * `parser_state' renamed to `lexer_state'
%    * the `lexer_state' is managed using di/uo modes, if 
%      called with --unique-state
%    * predicates `write_types', `write_rule_num_type', 
%      `write_state_num_type' and `write_action_type' added
%    * changed type for rule number for `int' to generated discriminated 
%      union type `rule_num' allowing some procedures to be declared
%      `det' instead of `semidet'
%    * changed type for state number for `int' to generated discriminated 
%      union type `state_num' allowing some procedures to be declared
%      `det' instead of `semidet'
%    * changed definition of type `parsing_action' which now includes
%      not only the kind of action to be taken but also the value
%      specifying the concrete action
%    * obviously unused dump options removed from usage message
%
%----------------------------------------------------------------------------%

Attention: moose now depends on lex, because the following type forms the
interface of moose with its lexer:

:- type lex__lexer_result(Token)
    --->    ok(Token)                   % Token matched.
    ;       eof                         % End of input.
    ;       error(int).                 % No matches for string at this offset.




-- 
Holger Krug
hkrug at rationalizer.com

-------------- next part --------------
Index: Mmakefile
===================================================================
RCS file: lex/Mmakefile,v
retrieving revision 1.1
retrieving revision 1.1.1.1
diff -u -r1.1 -r1.1.1.1
Index: README
===================================================================
RCS file: lex/README,v
retrieving revision 1.1
retrieving revision 1.5
diff -u -r1.1 -r1.5
--- README	2001/07/13 09:33:50	1.1
+++ README	2001/07/30 06:28:47	1.5
@@ -1,11 +1,29 @@
 lex 1.0 (very alpha)
 Fri Aug 25 17:54:28  2000
 Copyright (C) 2001 Ralph Becket <rbeck at microsoft.com>
-
     THIS FILE IS HEREBY CONTRIBUTED TO THE MERCURY PROJECT TO
     BE RELEASED UNDER WHATEVER LICENCE IS DEEMED APPROPRIATE
     BY THE ADMINISTRATORS OF THE MERCURY PROJECT.
-
+Thu Jul 26 07:45:47 UTC 2001
+Copyright (C) 2001 The Rationalizer Intelligent Software AG
+    The changes made by Rationalizer are contributed under the terms 
+    of the GNU Free Documentation License - see the file COPYING in the
+    Mercury Distribution.
+
+%----------------------------------------------------------------------------%
+%
+% 07/26/01 hkrug at rationalizer.com:
+%    * the value contained in a token now may belong to an arbitrary 
+%      Mercury type, not only string as before
+%    * lexemes now may contain a function to compute an arbitrary token
+%      value from the string matched
+%    * the lexeme definition by the user now is more difficult,
+%      because the user has to name higher order functions
+%    * the advantage is that no postprocessing is needed to evaluate
+%      the string, hence the interface to the consumer of the tokens is 
+%      cleaner
+%
+%----------------------------------------------------------------------------%
 
 
 This package defines a lexer for Mercury.  There is plenty of scope
@@ -23,20 +41,30 @@
 
     :- type token
         --->    comment
-        ;       id
-        ;       num.
+        ;       id(string)
+        ;       num(int).
 
 3. Set up a list of annotated_lexemes.
 
     Lexemes = [
-        lexeme(noval(comment),      (atom('%'), star(dot))),
-        lexeme(value(id),           identifier),
-        lexeme(ignore,              whitespace)
+        lexeme( noval(comment),
+                (atom('%'), star(dot))
+	      ),
+        lexeme( t( func(Match) = id(Match) ),
+	        identifier
+              ),
+        lexeme( ignore ,
+	        whitespace
+              )
     ]
 
-noval tokens are simply identified;
-value tokens are identified and returned with the string matched;
-ignore regexps are simply passed over.
+Lexemes are pairs. Their first entry is one of:
+* noval(FixedToken)
+* t(TokenCreator)
+* ignore
+where `FixedToken' is a fixed token value and `TokenCreator' is a function
+to compute tokens from strings, namely from the string matched by the
+regular expression forming the second part of the lexeme.
 
 4. Set up a lexer with an appropriate read predicate (see the buf module).
 
@@ -49,8 +77,7 @@
 6. Use it to lex the input stream.
 
     lex__read(Result, State0, State1),
-    ( Result = ok(NoValToken), ...
-    ; Result = ok(ValueToken, String), ...
+    ( Result = ok(Token), ...
     ; Result = error(OffsetInInputStream), ...
     ; Result = eof, ...
     )
===================================================================
RCS file: lex/lex.lexeme.m,v
retrieving revision 1.1
retrieving revision 1.5
diff -u -r1.1 -r1.5
--- lex.lexeme.m	2001/07/13 09:33:53	1.1
+++ lex.lexeme.m	2001/07/27 16:19:16	1.5
@@ -1,16 +1,21 @@
 %-----------------------------------------------------------------------------
 % lex.lexeme.m
-% Copyright (C) 2001 Ralph Becket <rbeck at microsoft.com>
 % Sat Aug 19 08:22:32 BST 2000
+% Copyright (C) 2001 Ralph Becket <rbeck at microsoft.com>
+%   THIS FILE IS HEREBY CONTRIBUTED TO THE MERCURY PROJECT TO
+%   BE RELEASED UNDER WHATEVER LICENCE IS DEEMED APPROPRIATE
+%   BY THE ADMINISTRATORS OF THE MERCURY PROJECT.
+% Thu Jul 26 07:45:47 UTC 2001
+% Copyright (C) 2001 The Rationalizer Intelligent Software AG
+%   The changes made by Rationalizer are contributed under the terms 
+%   of the GNU Lesser General Public License.
+%
 % vim: ts=4 sw=4 et tw=0 wm=0 ff=unix
 %
 % A lexeme combines a token with a regexp.  The lexer compiles
 % lexemes and returns the longest successul parse in the input
 % stream or an error if no match occurs.
 %
-%   THIS FILE IS HEREBY CONTRIBUTED TO THE MERCURY PROJECT TO
-%   BE RELEASED UNDER WHATEVER LICENCE IS DEEMED APPROPRIATE
-%   BY THE ADMINISTRATORS OF THE MERCURY PROJECT.
 %
 %----------------------------------------------------------------------------- %
 
@@ -26,6 +31,8 @@
                 clxm_state              :: state_no,
                 clxm_transition_map     :: transition_map
             ).
+:- inst compiled_lexeme(Inst) --->
+            lex__lexeme__compiled_lexeme(Inst,ground,ground).
 
 :- type transition_map
     --->    transition_map(
@@ -59,12 +66,12 @@
     % an accepting state_no.
     %
 :- pred next_state(compiled_lexeme(T), state_no, char, state_no, bool).
-:- mode next_state(in, in, in, out, out) is semidet.
+:- mode next_state(in(live_lexeme), in, in, out, out) is semidet.
 
     % Succeeds iff a compiled_lexeme is in an accepting state_no.
     %
-:- pred in_accepting_state(compiled_lexeme(T)).
-:- mode in_accepting_state(in) is semidet.
+:- pred in_accepting_state(live_lexeme(T)).
+:- mode in_accepting_state(in(live_lexeme)) is semidet.
 
 %----------------------------------------------------------------------------- %
 %----------------------------------------------------------------------------- %
@@ -159,7 +166,7 @@
 next_state(CLXM, CurrentState, Char, NextState, IsAccepting) :-
     Rows            = CLXM ^ clxm_transition_map ^ trm_rows,
     AcceptingStates = CLXM ^ clxm_transition_map ^ trm_accepting_states,
-    find_next_state(char__to_int(Char), Rows ^ elem(CurrentState), NextState),
+    lex__lexeme__find_next_state(char__to_int(Char), Rows ^ elem(CurrentState), NextState),
     IsAccepting     = bitmap__get(AcceptingStates, NextState).
 
 %------------------------------------------------------------------------------%
@@ -170,7 +177,7 @@
 find_next_state(Byte, ByteTransitions, State) :-
     Lo = array__min(ByteTransitions),
     Hi = array__max(ByteTransitions),
-    find_next_state_0(Lo, Hi, Byte, ByteTransitions, State).
+    lex__lexeme__find_next_state_0(Lo, Hi, Byte, ByteTransitions, State).
 
 
 
@@ -182,7 +189,7 @@
     ByteTransition = ByteTransitions ^ elem(Lo),
     ( if ByteTransition ^ btr_byte = Byte
       then State = ByteTransition ^ btr_state
-      else find_next_state_0(Lo + 1, Hi, Byte, ByteTransitions, State)
+      else lex__lexeme__find_next_state_0(Lo + 1, Hi, Byte, ByteTransitions, State)
     ).
 
 %------------------------------------------------------------------------------%
Index: lex.m
===================================================================
RCS file: lex/lex.m,v
retrieving revision 1.1
retrieving revision 1.5
diff -u -r1.1 -r1.5
--- lex.m	2001/07/13 09:33:53	1.1
+++ lex.m	2001/07/27 16:19:17	1.5
@@ -2,15 +2,19 @@
 % lex.m
 % Copyright (C) 2001 Ralph Becket <rbeck at microsoft.com>
 % Sun Aug 20 09:08:46 BST 2000
+%   THIS FILE IS HEREBY CONTRIBUTED TO THE MERCURY PROJECT TO
+%   BE RELEASED UNDER WHATEVER LICENCE IS DEEMED APPROPRIATE
+%   BY THE ADMINISTRATORS OF THE MERCURY PROJECT.
+% Thu Jul 26 07:45:47 UTC 2001
+% Copyright (C) 2001 The Rationalizer Intelligent Software AG
+%   The changes made by Rationalizer are contributed under the terms 
+%   of the GNU Lesser General Public License.
+%
 % vim: ts=4 sw=4 et tw=0 wm=0 ff=unix
 %
 % This module puts everything together, compiling a list of lexemes
 % into state machines and turning the input stream into a token stream.
 %
-%   THIS FILE IS HEREBY CONTRIBUTED TO THE MERCURY PROJECT TO
-%   BE RELEASED UNDER WHATEVER LICENCE IS DEEMED APPROPRIATE
-%   BY THE ADMINISTRATORS OF THE MERCURY PROJECT.
-%
 %----------------------------------------------------------------------------- %
 
 :- module lex.
@@ -24,31 +28,38 @@
 :- type annotated_lexeme(Token)
     ==      lexeme(annotated_token(Token)).
 
+:- type token_creator(Token) == (func(string)=Token).
+:- inst token_creator == (func(in)=out is det).
+
 :- type lexeme(Token)
     --->    lexeme(
                 lxm_token               :: Token,
                 lxm_regexp              :: regexp
             ).
+:- inst lexeme(Inst) ---> lex__lexeme(Inst,ground).
 
 :- type annotated_token(Token)
-    --->    noval(Token)                % Just return ok(Token) on success.
-    ;       value(Token)                % Return ok(Token, String) on success.
-    ;       ignore.                     % Just skip over these tokens.
+    --->    t(token_creator(Token))   % Return ok(TokenCreator(Match))
+                                      % on success.
+    ;       noval(Token)              % Just return ok(Match) on success
+    ;       ignore.                   % Just skip over these tokens.
+
+:- inst annotated_token ---> lex__t(token_creator)
+                        ;    lex__noval(ground)
+                        ;    lex__ignore.
 
 :- type lexer(Token, Source).
-:- inst lexer
-    ==      bound(lexer(ground, read_pred)).
+:- inst lexer  ---> lex__lexer(ground, lex__read_pred).
+
 
 :- type lexer_state(Token, Source).
 
 :- type lexer_result(Token)
-    --->    ok(Token)                   % Noval token matched.
-    ;       ok(Token, string)           % Value token matched.
+    --->    ok(Token)                   % Token matched.
     ;       eof                         % End of input.
     ;       error(int).                 % No matches for string at this offset.
 
 
-
 :- type offset
     ==      int.                        % Byte offset into the source data.
 
@@ -184,17 +195,22 @@
                 lexi_buf_state          :: buf_state(Source)
             ).
 :- inst lexer_instance
-    ==      bound(lexer_instance(ground, ground, ground, buf_state)).
-
-
+    --->      lex__lexer_instance(lex__live_lexeme_list, 
+                                  lex__live_lexeme_list, 
+	                          lex__winner, 
+                                  lex__buf__buf_state).
 
 :- type live_lexeme(Token)
     ==      compiled_lexeme(annotated_token(Token)).
+:- inst live_lexeme == compiled_lexeme(annotated_token).
+:- inst live_lexeme_list == list__list_skel(live_lexeme).
 
 
 
 :- type winner(Token)
     ==      maybe(pair(annotated_token(Token), offset)).
+:- inst winner ---> yes(bound(annotated_token - ground))
+               ;    no.
 
 %----------------------------------------------------------------------------- %
 
@@ -293,7 +309,7 @@
 :- pred process_any_winner(lexer_result(Tok), winner(Tok),
             lexer_instance(Tok, Src), lexer_instance(Tok, Src), 
             buf_state(Src), buf, buf, Src, Src).
-:- mode process_any_winner(out, in,
+:- mode process_any_winner(out, in(winner),
             in(lexer_instance), out(lexer_instance),
             in(buf_state), array_di, array_uo, di, uo) is det.
 
@@ -306,14 +322,15 @@
                         ^ lexi_current_winner := no)
                         ^ lexi_buf_state      := buf__commit(BufState1)),
     (
-        ATok     = noval(Token),
-        Result   = ok(Token),
+        ATok     = t(TokenCreator),
+        Match    = buf__string_to_cursor(BufState1, Buf),
+        Result   = ok(TokenCreator(Match)),
         Instance = Instance1,
         Buf      = Buf0,
         Src      = Src0
     ;
-        ATok     = value(Token),
-        Result   = ok(Token, buf__string_to_cursor(BufState1, Buf)),
+        ATok     = noval(Token),
+        Result   = ok(Token),
         Instance = Instance1,
         Buf      = Buf0,
         Src      = Src0
@@ -353,12 +370,13 @@
             % Return the token and set things up so that we return
             % eof next.
         (
+            ATok   = t(TokenCreator),
+            Match  = buf__string_to_cursor(BufState, Buf),
+            Result = ok(TokenCreator(Match))
+        ;
             ATok   = noval(Token),
             Result = ok(Token)
         ;
-            ATok   = value(Token),
-            Result = ok(Token, buf__string_to_cursor(BufState, Buf))
-        ;
             ATok   = ignore,
             Result = eof
         )
@@ -376,7 +394,9 @@
 :- pred advance_live_lexemes(char, offset,
             list(live_lexeme(Token)), list(live_lexeme(Token)),
             winner(Token), winner(Token)).
-:- mode advance_live_lexemes(in, in, in, out, in, out) is det.
+:- mode advance_live_lexemes(in, in, in(live_lexeme_list), 
+                             out(live_lexeme_list), 
+			     in(winner), out(winner)) is det.
 
 advance_live_lexemes(_Char, _Offset, [], [], Winner, Winner).
 
@@ -406,13 +426,15 @@
 
 :- pred live_lexeme_in_accepting_state(list(live_lexeme(Tok)),
                 annotated_token(Tok)).
-:- mode live_lexeme_in_accepting_state(in, out) is semidet.
+:- mode live_lexeme_in_accepting_state(in(live_lexeme_list), 
+	                  out(annotated_token)) is semidet.
 
 live_lexeme_in_accepting_state([L | Ls], Token) :-
-    ( if in_accepting_state(L)
+    ( if lex__lexeme__in_accepting_state(L)
       then Token = L ^ clxm_token
       else live_lexeme_in_accepting_state(Ls, Token)
     ).
+
 
 %----------------------------------------------------------------------------- %
 %----------------------------------------------------------------------------- %
===================================================================
RCS file: lex/samples/demo.m,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- samples/demo.m	2001/07/13 09:33:56	1.1
+++ samples/demo.m	2001/07/26 08:07:46	1.2
@@ -1,12 +1,17 @@
 %----------------------------------------------------------------------------- %
 % demo.m
-% Copyright (C) 2001 Ralph Becket <rbeck at microsoft.com>
 % Sun Aug 20 18:11:42 BST 2000
-% vim: ts=4 sw=4 et tw=0 wm=0 ff=unix ft=mercury
-%
+% Copyright (C) 2001 Ralph Becket <rbeck at microsoft.com>
 %   THIS FILE IS HEREBY CONTRIBUTED TO THE MERCURY PROJECT TO
 %   BE RELEASED UNDER WHATEVER LICENCE IS DEEMED APPROPRIATE
 %   BY THE ADMINISTRATORS OF THE MERCURY PROJECT.
+% Thu Jul 26 07:45:47 UTC 2001
+% Copyright (C) 2001 The Rationalizer Intelligent Software AG
+%   The changes made by Rationalizer are contributed under the terms 
+%   of the GNU General Public License - see the file COPYING in the
+%   Mercury Distribution.
+%
+% vim: ts=4 sw=4 et tw=0 wm=0 ff=unix ft=mercury
 %
 %----------------------------------------------------------------------------- %
 
@@ -66,48 +71,58 @@
 %----------------------------------------------------------------------------- %
 
 :- type token
-    --->    noun
-    ;       comment
-    ;       integer
-    ;       real
-    ;       verb
-    ;       conj
-    ;       prep
+    --->    noun(string)
+    ;       comment(string)
+    ;       integer(int)
+    ;       real(float)
+    ;       verb(string)
+    ;       conj(string)
+    ;       prep(string)
     ;       punc
     .
 
 :- func lexemes = list(annotated_lexeme(token)).
 
 lexemes = [
-    lexeme(value(comment),
-			(atom('%') >> junk)),
+%    lexeme(ignore, (atom('%') >> junk)),
+     lexeme( t(func(Match) = comment(Match)), (atom('%') >> junk)),
+%    lexeme( t(comment), (atom('%') >> junk)),
 
-    lexeme(value(integer),
+    lexeme( t(func(Match) = integer(string__det_to_int(Match))),
 			(signed_int)),
 
-    lexeme(value(real),
-            (real)),
+    lexeme( t(func(Match) = real(det_string_to_float(Match))), (real)),
 
-    lexeme(value(noun), str("cat")),
-    lexeme(value(noun), str("dog")),
-    lexeme(value(noun), str("rat")),
-    lexeme(value(noun), str("mat")),
+    lexeme( t(func(Match) = noun(Match)), str("cat")),
+    lexeme( t(func(Match) = noun(Match)), str("dog")),
+    lexeme( t(func(Match) = noun(Match)), str("rat")),
+    lexeme( t(func(Match) = noun(Match)), str("mat")),
 
-    lexeme(value(verb),
+    lexeme( t(func(Match) = verb(Match)),
 			(str("sat") \/ str("caught") \/ str("chased"))),
 
-    lexeme(value(conj),
+    lexeme( t(func(Match) = conj(Match)),
 			(str("and") \/ str("then"))),
 
-    lexeme(value(prep),
+    lexeme( t(func(Match) = prep(Match)),
 			(str("the") \/ str("it") \/ str("them") \/ str("to") \/ str("on"))),
 
     lexeme(noval(punc),
 			(any("~!@#$%^&*()_+`-={}|[]\\:"";'<>?,./"))),
 
-    lexeme(ignore,
-			whitespace)
+    lexeme(ignore, whitespace)
 ].
 
+:- func det_string_to_float(string) = float.
+
+:- import_module require.
+
+det_string_to_float(String) = Float :-
+	string__to_float(String,Float) 
+        ; 
+	error("Wrong regular expression chosen for token `real'").
+
 %----------------------------------------------------------------------------- %
 %----------------------------------------------------------------------------- %
+
+
-------------- next part --------------
Index: Mmakefile
===================================================================
RCS file: moose/Mmakefile,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- Mmakefile	2001/07/13 09:34:39	1.1
+++ Mmakefile	2001/07/24 15:26:31	1.2
@@ -6,6 +6,7 @@
 
 -include ../Mmake.params
 
+GRADE = asm_fast.gc
 # GRADE = asm_fast.gc.debug
 # GRADE = asm_fast.gc.prof
 
Index: README
===================================================================
RCS file: moose/README,v
retrieving revision 1.1
retrieving revision 1.4
diff -u -r1.1 -r1.4
--- README	2001/07/13 09:34:39	1.1
+++ README	2001/07/27 16:18:19	1.4
@@ -18,14 +18,22 @@
 
 - One Moose parser declaration, of the form
 
-	:- parse(<StartSymbol>, <EndToken>, <TokenType>, <Prefix>).
+	:- parse(<StartSymbol>, <TokenType>, <LexerResultType>, <Prefix>).
 
   Here <StartSymbol> is the name of the starting symbol for the grammar,
-  <EndToken> is the token that signifies end-of-file,
   <TokenType> is the name of the Mercury type for tokens in this grammar,
-  and <Prefix> is unused.  (<Prefix> is intended to be used as a prefix
-  to the generated predicate names, however it is currently
-  unimplemented).
+  <LexerResultType> is the name of a type which is defined as a shortcut for
+  `lex__lexer_result(<TokenType>)' and <Prefix> is unused.  (<Prefix> is 
+  intended to be used as a prefix to the generated predicate names, 
+  however it is currently unimplemented). moose defines the type
+  shortcut <LexerResultType> for your convenience.
+
+  The definition of `lex__lexer_result' is:
+
+  :- type lexer_result(Token)
+     --->    ok(Token)                % Token matched.
+     ;       eof                      % End of input.
+     ;       error(int).              % No matches for string at this offset.
 
 - One or more Moose rule declarations, of the form
 
@@ -69,52 +77,58 @@
         :- action(<Name>/<Arity>, <PredicateName>).
 
   Each action declaration will add a method called PredicateName
-  to the type class parser_state/1.  The method will have the same types
+  to the type class lexer_state/1.  The method will have the same types
   as the rule given by Name/Arity, plus a threaded in/out pair of
-  parser_state arguments.
+  lexer_state arguments.
 
   For example
         :- rule foo(int).
         :- action(foo/1, process_foo).
   will generate
-        :- typeclass parser_state(T) where [
+        :- typeclass lexer_state(T) where [
                 ... get_token and any other action methods ...
                 pred process_foo(int, T, T),
-                mode process_foo(in, in, out) is det
+                mode process_foo(in, lexst_in, lexst_out) is det
         ].
 
+  Here `lexst_in == di', `lexst_out == uo' if moose is called with
+  the option "--unique-state/-u". Otherwise `lexst_in == in' and 
+  `lexst_out == out'.
+
+
   Whenever the parser reduces using a rule, it will invoke the associated
-  action method for that rule (if there is one).  Since the parser_state
+  action method for that rule (if there is one).  Since the lexer_state
   is threaded through all action methods, it can be used to implement
   inherited attributes.  Actions can also modify the token stream in the
-  parser_state (see below).
+  lexer_state (see below).
 
 
 In order to use the Moose parser, you need to provide a lexer, which
-generates tokens of the type <TokenType> given in the Moose parse
-declaration.  To allow flexibility in implementing the lexer, the parser
-requests tokens using a type class.
+generates tokens of the type `lex__lexer_result(<TokenType>)' with
+<TokenType> as given in the Moose parse declaration. To allow
+flexibility in implementing the lexer, the parser requests tokens
+using a type class.
 
-The parser_state type class is the set of all operations that must be
-implemented in order to use the parser.  parser_state will contain at
+The lexer_state type class is the set of all operations that must be
+implemented in order to use the parser.  lexer_state will contain at
 least one method, called get_token.
 
-        :- typeclass parser_state(T) where [
-                pred get_token(token, T, T),
-                mode get_token(out, in, out) is semidet
-        ].
+	:- typeclass lexer_state(T) where [
+		pred get_token(<LexerResultType>, T, T),
+		mode get_token(out, lexst_in, lexst_out) is det
+	].
 
-get_token returns the next token in the token stream.  The parser state
+get_token returns the next token in the token stream.  The lexer state
 typically contains a list of tokens, from which the next token is
 retrieved (although other implementations are possible).  get_token may
 fail if there are no more tokens available.
 
-The other methods in parser_state will be dictated by the Moose action
+The other methods in lexer_state will be dictated by the Moose action
 declarations.  To use the Moose generated parser, simply call the
-generated parse predicate with an instance of the parser_state type
+generated parse predicate with an instance of the lexer_state type
 class.
 
 
 The samples directory contains some simple grammars, lexers
-and implementations of the parser_state.  
-
+and implementations of the lexer_state. Currently no examples
+are provided for the unique lexer_state.  
Index: TODO
===================================================================
RCS file: moose/TODO,v
retrieving revision 1.1
retrieving revision 1.4
diff -u -r1.1 -r1.4
--- TODO	2001/07/13 09:34:39	1.1
+++ TODO	2001/07/30 06:42:37	1.4
@@ -1,6 +1,4 @@
 To make moose more useful:
-	- allow the parsing state to be unique (so that it can contain
-	  the io:state, just for example).
 	- allow parsing actions to be semidet to allow the imposition
 	  of semantic conditions during parsing.
 	- add error productions to produce nice error messages.
@@ -15,6 +13,8 @@
 	- provide the ability to dump a list of the terminal symbols.
 
 	- make the various dumps go to files other than the .m file.
+        - replace types `rule_num' and `state_num' by int, use
+          instance declarations to assure predicates to be `det'.
 	
 Wish list:
 	- introduce new nonterminals for disjunctions in productions.
Index: grammar.m
===================================================================
RCS file: moose/grammar.m,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- grammar.m	2001/07/13 09:34:40	1.1
+++ grammar.m	2001/07/27 14:10:26	1.2
@@ -135,8 +135,8 @@
 :- pred add_clause(clauses, nonterminal, clause, clauses).
 :- mode add_clause(in, in, in, out) is det.
 
-:- pred construct_grammar(nonterminal, term, clauses, xforms, grammar).
-:- mode construct_grammar(in, in, in, in, out) is det.
+:- pred construct_grammar(nonterminal, clauses, xforms, grammar).
+:- mode construct_grammar(in, in, in, out) is det.
 
 :- pred compute_first(rules, first).
 :- mode compute_first(in, out) is det.
@@ -219,7 +219,7 @@
 
 %------------------------------------------------------------------------------%
 
-construct_grammar(Start, _End, AllClauses, XForms, Grammar) :-
+construct_grammar(Start, AllClauses, XForms, Grammar) :-
 	map__to_assoc_list(AllClauses, ClauseList),
 	Nont0 = 1,
 	start_rule(Start, StartRule),
Index: moose.m
===================================================================
RCS file: moose/moose.m,v
retrieving revision 1.1
retrieving revision 1.7
diff -u -r1.1 -r1.7
--- moose.m	2001/07/13 09:34:41	1.1
+++ moose.m	2001/07/27 16:13:14	1.7
@@ -1,9 +1,32 @@
 %----------------------------------------------------------------------------%
 % Copyright (C) 1998-2000 The University of Melbourne.
+% Copyright (C) 2001 The Rationalizer Intelligent Software AG
 % This file may only be copied under the terms of the GNU General
 % Public License - see the file COPYING in the Mercury Distribution.
 %----------------------------------------------------------------------------%
 
+%----------------------------------------------------------------------------%
+%
+% 07/24/01 hkrug at rationalizer.com:
+%    * added option --unique-state/-u
+%    * `parser_state' renamed to `lexer_state'
+%    * the `lexer_state' is managed using di/uo modes, if 
+%      called with --unique-state
+%    * predicates `write_types', `write_rule_num_type', 
+%      `write_state_num_type' and `write_action_type' added
+%    * changed type for rule number for `int' to generated discriminated 
+%      union type `rule_num' allowing some procedures to be declared
+%      `det' instead of `semidet'
+%    * changed type for state number for `int' to generated discriminated 
+%      union type `state_num' allowing some procedures to be declared
+%      `det' instead of `semidet'
+%    * changed definition of type `parsing_action' which now includes
+%      not only the kind of action to be taken but also the value
+%      specifying the concrete action
+%    * obviously unused dump options removed from usage message
+%
+%----------------------------------------------------------------------------%
+
 :- module moose.
 
 :- interface.
@@ -83,8 +106,9 @@
 :- type whereami
 	--->	(interface) ; (implementation) .
 
+% parser(WhereAmI, StartID, TokAtom, LexerTokAtom, PrefixAtom)
 :- type parser
-	--->	parser(whereami, nonterminal, term, string, string).
+	--->	parser(whereami, nonterminal, string, string, string).
 
 :- pred process(options::in, io__state::di, io__state::uo) is det.
 
@@ -126,7 +150,8 @@
 :- mode process_2(in, in, in, in, in, in, di, uo) is det.
 
 process_2(Options, Module, Parser, Decls0, Clauses0, XFormList) -->
-	{ lookup_bool_option(Options, verbose, Verbse) },
+	{ getopt__lookup_bool_option(Options, verbose, Verbse) },
+	{ getopt__lookup_bool_option(Options, unique_parsing_state, UniqueLS)},
 	( { Verbse = yes } -> report_stats ; [] ),
 
 	{ check_rule_decls(Decls0, Decls, DeclErrors) },
@@ -135,7 +160,8 @@
 	{ check_clauses(Clauses0, Decls, Clauses, ClauseErrors) },
 	foldl(write_error, ClauseErrors),
 
-	{ Parser = parser(WhereAmI, StartId, EndToken, TokenType, _Prefix) },
+	{ Parser = parser(WhereAmI, StartId, TokenType, LexerTokenType, 
+	                  _Prefix) },
 
 	{ check_useless(StartId, Clauses, Decls, UselessErrors) },
 	foldl(write_error, UselessErrors),
@@ -151,8 +177,10 @@
 	->
 		write_module(nolines, Module), nl,
 		{ lookup(Decls, StartId, StartDecl) },
-		write_parser(WhereAmI, StartId, StartDecl, TokenType),
-		write_action_type_class(WhereAmI, XFormList, Decls),
+		write_parser(WhereAmI, StartId, StartDecl, LexerTokenType, 
+                             UniqueLS),
+		write_action_type_class(WhereAmI, XFormList, Decls, 
+		                        TokenType, LexerTokenType),
 
 		stderr_stream(StdErr),
 		write_string(StdErr, "constructing grammar...\n"),
@@ -163,7 +191,7 @@
 			map__det_insert(Xf0, XfNt, XForm, Xf)
 		), XFormList, Xfns0, XForms) },
 
-		{ construct_grammar(StartId, EndToken, Clauses, XForms,
+		{ construct_grammar(StartId, Clauses, XForms,
 			Grammar) },
 		{ Grammar = grammar(Rules, _, Xfns, _, Index, First, _Follow) },
 		{ reaching(Rules, First, Reaching) },
@@ -176,6 +204,7 @@
 		{ shifts(C, Rules, First, Reaching, Shifts) },
 		{ actions(C, Rules, Lookaheads, Gotos, Shifts,
 			States, ActionTable, ActionErrs) },
+
 		foldl2((pred(Err::in, HasEs0::in, HasEs::out, di, uo) is det -->
 			(
 				{ Err = warning(Warning) },
@@ -211,11 +240,12 @@
 				)
 			)
 		), ActionErrs, no, _HasErrors),
-		write_action_table(ActionTable, TokenType, EndToken),
+		write_types(Rules, ActionTable),
+		write_action_table(ActionTable, LexerTokenType),
 		write_string(StdErr, "computing the goto table...\n"),
 		{ gotos(C, States, Gotos, GotoTable) },
 		write_goto_table(GotoTable, Decls),
-		write_reductions(Rules, TokenType, Xfns),
+		write_reductions(Rules, LexerTokenType, Xfns),
 		[]
 	;
 		[]
@@ -223,19 +253,22 @@
 
 %------------------------------------------------------------------------------%
 
-:- pred write_action_type_class(whereami, list(xform), rule_decls, 
-	io__state, io__state).
-:- mode write_action_type_class(in, in, in, di, uo) is det.
+:- pred write_action_type_class(whereami, list(xform), rule_decls,
+	string, string, io__state, io__state).
+:- mode write_action_type_class(in, in, in, in, in, di, uo) is det.
 
-write_action_type_class(Where, XForms, Decls) -->
+write_action_type_class(Where, XForms, Decls, TokenType, LexerTokenType) -->
 	( { Where = (interface) } ->
 		write_string(":- interface.\n\n")
 	;
 		[]
 	),
-	io__write_string(":- typeclass parser_state(T) where [\n"),
-	io__write_string("	pred get_token(token, T, T),\n"),
-	io__write_string("	mode get_token(out, in, out) is semidet"),
+	io__write_strings([
+":- import_module lex.\n\n",
+":- type ", LexerTokenType, " == lex__lexer_result(", TokenType, ").\n\n",
+":- typeclass lexer_state(T) where [\n",
+"         pred get_token(", LexerTokenType, ", T, T),\n",
+"         mode get_token(out, lexst_in, lexst_out) is det"]),
 	( { not XForms = [] } ->
 		io__write_string(",\n")
 	;
@@ -244,7 +277,8 @@
 	{ WriteIn = (pred(_Anything::in, di, uo) is det -->
 		io__write_string("in"))
 	},
-	{ WriteXForm = (pred(XForm::in, di, uo) is det -->
+	{ WriteXForm = (pred(XForm::in, di, uo) 
+		is det -->
 		{ XForm = xform(NT, MethodName) },
 		{ lookup(Decls, NT, RuleDecl) },
 		{ RuleDecl = rule(_NT, Types, VarSet, _Context) },
@@ -256,17 +290,99 @@
 		io__write_strings(["\tmode ", MethodName, "("]),
 		io__write_list(Types, ", ", WriteIn),
 		( { Types \= [] } -> io__write_string(", ") ; [] ),
-		io__write_string("in, out) is det")
-		)
+		io__write_string("lexst_in, lexst_out) is det")
+	    )
 	},
-	io__write_list(XForms, ",\n", WriteXForm),
-	io__write_string("\n].\n"),
+        io__write_list(XForms, ",\n", WriteXForm),
+	io__write_string("\n].\n\n"),
 	( { Where = (interface) } ->
 		write_string(":- implementation.\n\n")
 	;
 		[]
 	).
 
+
+%------------------------------------------------------------------------------%
+
+:- pred write_types(rules, actiontable, io__state, io__state).
+:- mode write_types(in, in, di, uo) is det.
+
+write_types(Rules, Table) -->
+	write_rule_num_type(Rules),
+	write_state_num_type(Table),
+	write_action_type.
+
+%------------------------------------------------------------------------------%
+
+:- pred write_rule_num_type(rules, io__state, io__state).
+:- mode write_rule_num_type(in, di, uo) is det.
+
+write_rule_num_type(Rules) -->
+	{ map__keys(Rules,RuleNumList) },
+	( 
+	    % the start rule does not define a constructor for the
+	    % `rule_num' type, because no reduction is done based
+	    % on it
+	    { RuleNumList = [ _StartRule, Rn1 | Rest ] }
+	->
+	    io__write_string(":- type rule_num\n"),
+            { format("rule0x%x", [i(Rn1)], RuleConst1) },
+	    io__write_strings(["        --->    ",RuleConst1,"\n"]),
+	    foldl((pred(Rn::in, di, uo) is det -->
+	             { format("rule0x%x", [i(Rn)], RuleConst) },
+		     io__write_strings(["        ;       ",
+		                        RuleConst,
+					"\n"])
+	          ), Rest), 
+	    io__write_string(  "        .\n\n")
+	;
+	    []
+	).
+
+%------------------------------------------------------------------------------%
+
+:- pred write_state_num_type(actiontable, io__state, io__state).
+:- mode write_state_num_type(in, di, uo) is det.
+
+write_state_num_type(Table) -->
+	{ map__keys(Table,StateNumList) },
+	( 
+	    { StateNumList = [ Sn1 | Rest ] }
+	->
+	    io__write_string(":- type state_num\n"),
+            { format("state0x%x", [i(Sn1)], StateConst1) },
+	    io__write_strings(["        --->     ",StateConst1,"\n"]),
+	    foldl((pred(Sn::in, di, uo) is det -->
+	             { format("state0x%x", [i(Sn)], StateConst) },
+		     io__write_strings(["        ;        ",
+		                        StateConst,
+					"\n"])
+	          ), Rest), 
+	    io__write_string(  "        .\n\n")
+	;
+	    []
+	).
+
+
+%------------------------------------------------------------------------------%
+
+% TODO: more expressive error type containing error number, error message
+%       or something similar
+
+:- pred write_action_type(io__state, io__state).
+:- mode write_action_type(di, uo) is det.
+
+write_action_type -->
+	io__write_strings([
+":- type parsing_action\n",
+"        --->  reduce(rule_num)\n",
+"        ;     shift(state_num)\n",
+"        ;     accept\n",
+"        ;     error\n",
+"        .\n",
+"\n"
+]).
+
 %------------------------------------------------------------------------------%
 
 :- pred write_rule(output_stream, int, rules, io__state, io__state).
@@ -406,13 +522,14 @@
 
 parser_term(functor(atom(":-"), [functor(atom("parse"), Args, _)], _),
 		_VarSet, WhereAmI, Decl) :-
-	Args = [StartIdTerm, EndTok, TokTerm, PrefixTerm],
+	Args = [StartIdTerm, TokTerm, LexerTokTerm, PrefixTerm],
 	StartIdTerm = functor(atom("/"), [functor(atom(Name), [], _),
 		functor(integer(Arity), _, _)], _),
 	StartId = Name / Arity,
 	TokTerm = functor(atom(TokAtom), [], _),
+	LexerTokTerm = functor(atom(LexerTokAtom), [], _),
 	PrefixTerm = functor(atom(PrefixAtom), [], _),
-	Decl = parser(WhereAmI, StartId, EndTok, TokAtom, PrefixAtom).
+	Decl = parser(WhereAmI, StartId, TokAtom, LexerTokAtom, PrefixAtom).
 
 :- pred xform_term(term, xform).
 :- mode xform_term(in, out) is semidet.
@@ -439,88 +556,93 @@
 	write_strings(StdErr, [
 		"usage: moose <options> file ...\n",
 		"	-h|--help		help\n",
-		"	-a|--dump-action	dump the action table\n",
-		"	-f|--dump-first		dump the FIRST sets\n",
-		"	-a|--dump-follow	dump the FOLLOW sets\n",
-		"	-a|--dump-goto		dump the goto table\n",
-		"	-a|--dump-items		dump the item sets\n",
-		"	-a|--dump-rules		dump the flattened rules\n"
+% currently not implemented:
+%		"	-a|--dump-action	dump the action table\n",
+%		"	-f|--dump-first		dump the FIRST sets\n",
+%		"	-F|--dump-follow	dump the FOLLOW sets\n",
+%		"	-g|--dump-goto		dump the goto table\n",
+%		"	-i|--dump-items		dump the item sets\n",
+%		"	-r|--dump-rules		dump the flattened rules\n",
+		"	-u|--unique-state	typeclass `lexer_state' with\n",
+		"	                        destructive input and unique output\n"
 	]).
 
 %------------------------------------------------------------------------------%
 
-:- pred write_action_table(actiontable, string, term, io__state, io__state).
-:- mode write_action_table(in, in, in, di, uo) is det.
+:- pred write_action_table(actiontable, string, io__state, io__state).
+:- mode write_action_table(in, in, di, uo) is det.
 
-write_action_table(Table, TT, End) -->
+write_action_table(Table, TT) -->
 	write_strings([
-		":- type parsing_action\n",
-		"	--->	shift\n",
-		"	;	reduce\n",
-		"	;	accept\n",
-		"	.\n",
-		"\n",
-		":- pred actions(int, ", TT, ", parsing_action, int).\n",
-		":- mode actions(in, in, out, out) is semidet.\n",
+		":- pred actions(state_num, ", TT, ", parsing_action).\n",
+		":- mode actions(in, in, out) is semidet.\n",
 		"\n"
 	]),
 	foldl((pred(State::in, StateActions::in, di,uo) is det -->
 		{ format("0x%x", [i(State)], SS) },
+		{ format("state0x%x", [i(State)], SSConst) },
 		write_strings([
-			"actions(", SS, ",Tok, Action, Value) :-\n",
-			"	actions", SS, "(Tok, Action, Value).\n",
+			"actions(", SSConst, ",Tok, Action) :-\n",
+			"	actions", SS, "(Tok, Action).\n",
 			"\n",
-			":- pred actions", SS, "(", TT, ", parsing_action, int).\n",
-			":- mode actions", SS, "(in, out, out) is semidet.\n",
+			":- pred actions", SS, "(", TT, ", parsing_action).\n",
+			":- mode actions", SS, "(in, out) is semidet.\n",
 			"\n"
 		]),
-		write_state_actions(SS, End, StateActions)
+		write_state_actions(SS, StateActions)
 	), Table).
 
-:- pred write_state_actions(string, term, (terminal -> action),
+:- pred write_state_actions(string, (terminal -> action),
 		io__state, io__state).
-:- mode write_state_actions(in, in, in, di, uo) is det.
+:- mode write_state_actions(in, in, di, uo) is det.
 
-write_state_actions(SS, End, StateActions) -->
+write_state_actions(SS, StateActions) -->
 	{ format("actions%s", [s(SS)], Name) },
 	foldl((pred(Terminal::in, Action::in, di, uo) is det -->
-		{ terminal_to_term(Terminal, End, Token) },
+		{ terminal_to_term(Terminal, Token) },
 		{ context_init(Ctxt) },
-		{ Term = functor(atom(Name),
-			[Token,
-			functor(atom(Kind), [], Ctxt),
-			functor(integer(Val), [], Ctxt)], Ctxt) },
-		(
-			{ Action = shift(Val) },
-			{ Kind = "shift" }
+		{ Term = functor(atom(Name), [Token, KindFunctor], Ctxt) }, 
+		{
+			Action = shift(Val),
+			format("state0x%x", [i(Val)], ValS),
+			KindFunctor = 
+		            functor(atom("shift"),
+			            [functor(atom(ValS), [], Ctxt)], 
+                                    Ctxt)
 		;
-			{ Action = reduce(Val) },
-			{ Kind = "reduce" }
+			Action = reduce(Val),
+			format("rule0x%x", [i(Val)], ValS),
+			KindFunctor = 
+		            functor(atom("reduce"),
+			            [functor(atom(ValS), [], Ctxt)], 
+                                    Ctxt)
 		;
-			{ Action = accept },
-			{ Kind = "accept" },
-			{ Val = 0 }
-		),
-		{ init(Varset) },
+			Action = accept,
+			KindFunctor = functor(atom("accept"), [], Ctxt)
+% TODO: add `error' action !		
+		},
+		{ varset__init(Varset) },
 		term_io__write_term_nl(Varset, Term)
 	), StateActions),
 	nl.
 
-:- pred terminal_to_term(terminal, term, term).
-:- mode terminal_to_term(in, in, out) is det.
+:- pred terminal_to_term(terminal, term).
+:- mode terminal_to_term(in, out) is det.
 
-terminal_to_term(epsilon, _, _) :-
+terminal_to_term(epsilon, _) :-
 	error("terminal_to_term: unexpected epsilon").
-terminal_to_term(Name/Arity, _, Term) :-
+terminal_to_term(Name/Arity, Term) :-
 	init(V0),
 	new_vars(V0, Arity, Vars, _),
 	context_init(Ctxt),
 	map((pred(Var::in, T::out) is det :-
 		T = variable(Var)
 	), Vars, Args),
-	Term = functor(atom(Name), Args, Ctxt).
-terminal_to_term(($), End, End).
-terminal_to_term((*), _, _) :-
+	Term = functor(atom("ok"), [
+                      functor(atom(Name), Args, Ctxt)
+	              ], Ctxt).
+terminal_to_term( ($), functor(atom("eof"),[],context_init) ).
+terminal_to_term((*), _) :-
 	error("terminal_to_term: unexpected hash").
 
 %------------------------------------------------------------------------------%
@@ -532,17 +654,18 @@
 	{ values(DeclTable, Decls) },
 	write_nonterminal_type(Decls),
 	write_strings([
-		":- pred gotos(int, nonterminal, int).\n",
+		":- pred gotos(state_num, nonterminal, state_num).\n",
 		":- mode gotos(in, in, out) is semidet.\n",
 		"\n"
 	]),
 	foldl((pred(State::in, StateActions::in, di,uo) is det -->
 		{ format("0x%x", [i(State)], SS) },
+		{ format("state0x%x", [i(State)], SSConst) },
 		write_strings([
-			"gotos(", SS, ", NT, NS) :-\n",
+			"gotos(", SSConst, ", NT, NS) :-\n",
 			"	gotos", SS, "(NT, NS).\n",
 			"\n",
-			":- pred gotos", SS, "(nonterminal, int).\n",
+			":- pred gotos", SS, "(nonterminal, state_num).\n",
 			":- mode gotos", SS, "(in, out) is semidet.\n",
 			"\n"
 		]),
@@ -579,8 +702,9 @@
 	foldl((pred(NT::in, NS::in, di, uo) is det -->
 		{ nonterminal_to_term(NT, Token) },
 		{ context_init(Ctxt) },
+		{ format("state0x%x", [i(NS)], NSConst) },
 		{ Term = functor(atom(Name),
-			[Token, functor(integer(NS), [], Ctxt)], Ctxt) },
+			[Token, functor(atom(NSConst), [], Ctxt)], Ctxt) },
 		{ init(Varset) },
 		term_io__write_term_nl(Varset, Term)
 	), StateActions),
@@ -602,11 +726,11 @@
 
 %------------------------------------------------------------------------------%
 
-:- pred write_parser(whereami, nonterminal, rule_decl, string,
+:- pred write_parser(whereami, nonterminal, rule_decl, string, bool,
 		io__state, io__state).
-:- mode write_parser(in, in, in, in, di, uo) is det.
+:- mode write_parser(in, in, in, in, in, di, uo) is det.
 
-write_parser(Where, NT, Decl, TT) -->
+write_parser(Where, NT, Decl, LexerTokenType, UniqueLS) -->
 	(
 		{ NT = StartName/StartArity }
 	;
@@ -630,73 +754,100 @@
 	),
 	write_element(nolines, ParseResultType),
 	nl,
-	write_strings([
-		":- import_module list.\n\n",
-		":- pred parse(P, presult, P) <= parser_state(P).\n",
-		":- mode parse(in, out, out) is det.\n",
-		"\n"
-	]),
+        io__write_string(":- import_module list.\n\n"),
+	( 
+	    { UniqueLS = yes } 
+	->
+	    io__write_strings([":- mode lexst_in == di.\n",
+                               ":- mode lexst_out == uo.\n\n"])
+	;
+	    io__write_strings([":- mode lexst_in == in.\n",
+                               ":- mode lexst_out == out.\n\n"])
+	),
+	io__write_string(":- pred parse(P, presult, P) <= lexer_state(P).\n"),
+	io__write_string(":- mode parse(lexst_in, out, lexst_out) is det.\n"),
+	io__write_string("\n"),
 	( { Where = (interface) } ->
 		write_string(":- implementation.\n\n")
 	;
 		[]
 	),
-	write_strings([
-"parse(Toks0, Result, Toks) :-\n",
-"	parse(Toks0, Toks, [0], [], Result).\n",
+	io__write_strings([
+":- inst non_error_lexer_token\n", 
+"	--->        lex__ok(ground) \n",
+"  	;           lex__eof.\n",
+"\n",
+"parse(Lex0, Result, Lex) :-\n",
+"     parse(Lex0, Lex, [state0x0], [], Result).\n",
 "\n",
-":- pred parse(P, P, statestack, symbolstack, presult) <= parser_state(P).\n",
-":- mode parse(in, out, in, in, out) is det.\n",
+":- pred parse(P, P, statestack, symbolstack,  presult) \n",
+"      <= lexer_state(P).\n",
+":- mode parse(lexst_in, lexst_out, in(statestack), in, out) is det.\n",
 "\n",
-"parse(Toks0, Toks, St0, Sy0, Res) :-\n",
+"parse(Lex0, Lex, St, Sy, Res) :-\n",
 "    (\n",
-"        St0 = [S0|_],\n",
+"        get_token(Tok, Lex0, Lex1),\n",
+"\n",
 "        ( \n",
-"            get_token(Tok, Toks0, Toks1)\n",
-"        ->\n",
-"            ( \n",
-"                actions(S0, Tok, What, Val)\n",
-"            ->\n",
-"                (\n",
-"                    What = shift,\n",
-"                    Sy1 = [t(Tok)|Sy0],\n",
-"                    St1 = [Val|St0],\n",
-"                    parse(Toks1, Toks, St1, Sy1, Res)\n",
-"                ;\n",
-"                    What = reduce,\n",
-"                    reduce(Val, St0, St1, Sy0, Sy1, Toks0, Toks2),\n",
-"                    parse(Toks2, Toks, St1, Sy1, Res)\n",
-"                ;\n",
-"                    What = accept,\n",
-"                        ( Sy0 = [n("
-        ]),
-        write_term(Varset, StartTerm),
-        write_strings([
+"	    Tok = error(_),\n",
+"            Res = error(""lexer error""),\n",
+"            Lex = Lex1\n",
+"	;\n",
+"	    Tok = ok(_),\n",
+"	    process_token(Lex1, Lex, St, Sy, Tok, Res)\n",
+"	;\n",
+"	    Tok = eof,\n",
+"	    process_token(Lex1, Lex, St, Sy, Tok, Res)\n",
+"        )\n",
+"    ).\n",
+"\n",
+":- pred process_token(P, P, statestack, symbolstack, \n",
+"	      ", LexerTokenType, ", presult)\n",
+"      <= lexer_state(P).\n",
+":- mode process_token(lexst_in, lexst_out, in(statestack), in,\n",
+"		      in(non_error_lexer_token), out) is det.\n",
+"\n",
+"process_token(Lex0, Lex, St0, Sy0, Tok, Res) :-\n",
+"    (\n",
+"        St0 = [ S0 | _ ],\n",
+"	actions(S0, Tok, What)\n",
+"    ->\n",
+"        (\n",
+"	    What = shift(SS),\n",
+"	    Sy1 = [t(Tok) | Sy0],\n",
+"	    St1 = [SS | St0],\n",
+"	    parse(Lex0, Lex, St1, Sy1, Res)\n",
+"	;\n",
+"	    What = reduce(Rule),\n",
+"	    reduce(Rule, St0, St1, Sy0, Sy1, Lex0, Lex1),\n",
+"	    process_token(Lex1, Lex, St1, Sy1, Tok, Res)\n",
+"	;\n",
+"	    What = accept,\n",
+"	    ( Sy0 = [n("
+            ]),
+            write_term(Varset, StartTerm),
+            write_strings([
 ")] ->\n",
 "                        Res = ("
-	]),
-	write_term(Varset, StartTerm),
-	write_strings([
+	    ]),
+	    write_term(Varset, StartTerm),
+	    write_strings([
 "),\n",
-"                        Toks = Toks1\n",
-"                    ;\n",
-"                        error(""parse: internal accept error"")\n",
-"                    )\n",
-"                )\n",
-"            ;\n",
-"                Res = error(""parse error""),\n",
-"                Toks = Toks0\n",
-"            )\n",
-"        ;\n",
-"            Res = error(""unexpected end of input""),\n",
-"            Toks = Toks0\n",
-"        )\n",
+"	      Lex = Lex0\n",
+"	    ;\n",
+"	      error(""parse: internal accept error"")\n",
+"	    )\n",
+"	;\n",
+"	    What = error,\n",
+"	    Res = error(""parse error: action table reports error""),\n",
+"	    Lex = Lex0\n",
+"	)\n",
 "    ;\n",
-"        St0 = [],\n",
-"        error(""parse: state stack underflow"")\n",
+"	Res = error(""parse error: no entry in action table""),\n",
+"	Lex = Lex0\n",
 "    ).\n",
 "\n"
-	]).
+        ]).
 
 :- pred mkstartargs(int, list(term), list(term), varset, varset).
 :- mode mkstartargs(in, in, out, in, out) is det.
@@ -722,15 +873,17 @@
 	write_strings([
 ":- import_module require, std_util.\n",
 "\n",
-":- type statestack == list(int).\n",
+":- type statestack == list(state_num).\n",
+":- inst statestack ---> [ ground | ground ].\n",
 ":- type symbolstack == list(stacksymbol).\n",
 ":- type stacksymbol\n",
 "	--->	n(nonterminal)\n",
 "	;	t(", TT, ").\n",
 "\n",
-":- pred reduce(int, statestack, statestack, symbolstack, symbolstack,\n",
-"		P, P) <= parser_state(P).\n",
-":- mode reduce(in, in, out, in, out, in, out) is det.\n",
+":- pred reduce(rule_num, statestack, statestack, symbolstack, symbolstack,\n",
+"		P, P) <= lexer_state(P).\n",
+":- mode reduce(in, in(statestack), out(statestack), in, out,\n",
+"               lexst_in, lexst_out) is det.\n",
 "\n",
 "reduce(RuleNum, States0, States, Symbols0, Symbols, Tokens0, Tokens) :-\n",
 "	(\n",
@@ -748,22 +901,26 @@
 "		error(""reduce: reduction failed"")\n",
 "	).\n",
 "\n",
-":- pred reduce0(int, statestack, statestack, symbolstack, symbolstack,\n",
-"		P, P) <= parser_state(P).\n",
-":- mode reduce0(in, in, out, in, out, in, out) is semidet.\n",
+":- pred reduce0(rule_num, statestack, statestack, symbolstack, symbolstack,\n",
+"		P, P) <= lexer_state(P).\n",
+":- mode reduce0(in, in(statestack), out(statestack), in, out,\n",
+"                lexst_in, lexst_out) is det.\n",
 "\n"
 	]),
 	foldl((pred(Rn::in, Rule::in, di, uo) is det -->
 		( { Rn \= 0 } ->
 		{ format("reduce0x%x", [i(Rn)], RedName) },
-		{ format("0x%x", [i(Rn)], RnS) },
+		{ format("rule0x%x", [i(Rn)], RnS) },
 		write_strings([
 "reduce0(", RnS, ", S0, S, T0, T, U0, U) :-\n",
 "	", RedName, "(S0, S, T0, T, U0, U).\n",
 "\n",
 ":- pred ", RedName, "(statestack, statestack, symbolstack, symbolstack,\n",
-"		P, P) <= parser_state(P).\n",
-":- mode ", RedName, "(in, out, in, out, in, out) is det.\n",
+"		P, P) <= lexer_state(P).\n",
+":- mode ", 
+RedName,
+"(in(statestack), out(statestack), in, out,\n",
+"	        lexst_in, lexst_out) is det.\n",
 "\n"
 		]),
 		{ Rule = rule(RNt, Head, _, Body, Actions, Varset0, _C) },
@@ -846,7 +1003,9 @@
 	context_init(Ctxt),
 	(
 		E0 = terminal(ET),
-		E = functor(atom("t"), [ET], Ctxt)
+		E = functor(atom("t"), [
+	                    functor(atom("ok"),[ET], Ctxt)
+                            ], Ctxt)
 	;
 		E0 = nonterminal(EN),
 		E = functor(atom("n"), [EN], Ctxt)
Index: options.m
===================================================================
RCS file: moose/options.m,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- options.m	2001/07/13 09:34:41	1.1
+++ options.m	2001/07/24 15:26:33	1.2
@@ -1,9 +1,17 @@
 %----------------------------------------------------------------------------%
 % Copyright (C) 1998-2000 The University of Melbourne.
+% Copyright (C) 2001 The Rationalizer Intelligent Software AG
 % This file may only be copied under the terms of the GNU General
 % Public License - see the file COPYING in the Mercury Distribution.
 %----------------------------------------------------------------------------%
 
+%----------------------------------------------------------------------------%
+%
+% 07/24/01 hkrug at rationalizer.com:
+%    * added option --unique-state/-u
+%
+%----------------------------------------------------------------------------%
+
 :- module options.
 
 :- interface.
@@ -23,6 +31,7 @@
 	;	dump_rules
 
 		% Output options
+	;	unique_parsing_state
 	.
 
 :- type options == option_table(option).
@@ -51,6 +60,7 @@
 short('g',	dump_goto).
 short('i',	dump_items).
 short('r',	dump_rules).
+short('u',	unique_parsing_state).
 
 :- pred long(string, option).
 :- mode long(in, out) is semidet.
@@ -63,6 +73,7 @@
 long("dump-goto",	dump_goto).
 long("dump-items",	dump_items).
 long("dump-rules",	dump_rules).
+long("unique-state",	unique_parsing_state).
 
 :- pred defaults(option, option_data).
 :- mode defaults(out, out) is nondet.
@@ -82,4 +93,4 @@
 defaults0(dump_goto,	bool(no)).
 defaults0(dump_items,	bool(no)).
 defaults0(dump_rules,	bool(no)).
-
+defaults0(unique_parsing_state,	bool(no)).
Index: samples/Mmakefile
===================================================================
RCS file: moose/samples/Mmakefile,v
retrieving revision 1.1
retrieving revision 1.3
diff -u -r1.1 -r1.3
--- samples/Mmakefile	2001/07/13 09:34:41	1.1
+++ samples/Mmakefile	2001/07/27 16:10:46	1.3
@@ -1,4 +1,16 @@
+# The location of lex
 
+LEX_DIR = ../../lex
+
+VPATH = $(LEX_DIR):$(MMAKE_VPATH)
+MCFLAGS = -I$(LEX_DIR) $(EXTRA_MCFLAGS)
+MLFLAGS = -R$(LEX_DIR) $(EXTRA_MLFLAGS) \
+          -L$(LEX_DIR)
+MLLIBS = -llex $(EXTRA_MLLIBS)
+C2INITARGS = $(LEX_DIR)/lex.init
+
+GRADE = asm_fast.gc
+
 .SUFFIXES: .m .moo
 
 default_target : all
@@ -15,4 +27,3 @@
 
 realclean:
 	rm -f alpha.m expr.m small.m cgram.m 
-
Index: samples/alpha.moo
===================================================================
RCS file: moose/samples/alpha.moo,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- samples/alpha.moo	2001/07/13 09:34:41	1.1
+++ samples/alpha.moo	2001/07/27 16:11:44	1.2
@@ -9,13 +9,12 @@
 	;	num(int)
 	;	('(')
 	;	(')')
-	;	('$')
 	.
 
-:- parse(exprn/1, ('$'), token, xx).
+:- parse(exprn/1, token, lexer_token, xx).
 
-:- pred scan(list(char), list(token)).
-:- mode scan(in, out) is semidet.
+:- pred scan(list(char), list(lexer_token)).
+:- mode scan(in, out) is det.
 
 :- implementation.
 
@@ -36,25 +35,25 @@
 	scan(Chars, [], Toks0),
 	list__reverse(Toks0, Toks).
 
-:- pred scan(list(char), list(token), list(token)).
-:- mode scan(in, in, out) is semidet.
+:- pred scan(list(char), list(lexer_token), list(lexer_token)).
+:- mode scan(in, in, out) is det.
 
-scan([], Toks, ['$'|Toks]).
+scan([], Toks, [eof | Toks]).
 scan([C|Cs], Toks0, Toks) :-
 	( char__is_whitespace(C) ->
 		scan(Cs, Toks0, Toks)
 	; char__is_digit(C) ->
 		takewhile(char__is_digit, [C|Cs], Digits, Rest),
 		string__from_char_list(Digits, NumStr),
-		string__to_int(NumStr, Num),
-		scan(Rest, [num(Num)|Toks0], Toks)
+		Num = string__det_to_int(NumStr),
+		scan(Rest, [ok(num(Num))|Toks0], Toks)
 	; C = ('+') ->
-		scan(Cs, ['+'|Toks0], Toks)
+		scan(Cs, [ok('+')|Toks0], Toks)
 	; C = ('(') ->
-		scan(Cs, ['('|Toks0], Toks)
+		scan(Cs, [ok('(')|Toks0], Toks)
 	; C = (')') ->
-		scan(Cs, [')'|Toks0], Toks)
+		scan(Cs, [ok(')')|Toks0], Toks)
 	;
-		fail
+	   scan(Cs, [error(0) | Toks0], Toks)
 	).
 
Index: samples/cgram.moo
===================================================================
RCS file: moose/samples/cgram.moo,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- samples/cgram.moo	2001/07/13 09:34:41	1.1
+++ samples/cgram.moo	2001/07/27 16:11:44	1.2
@@ -2,10 +2,10 @@
 
 :- interface.
 
-:- parse(file/0, ('$'), token, 'x').
+:- parse(file/0, token, lexer_token, 'x').
 
 :- type token --->
-('!'); ('!='); ('$'); ('%');
+('!'); ('!='); ('%');
 ('%='); ('&&'); ('&'); ('&=');
 ('('); (')'); ('*'); ('*=');
 ('+'); ('++'); ('+='); (',');
Index: samples/expr.moo
===================================================================
RCS file: moose/samples/expr.moo,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- samples/expr.moo	2001/07/13 09:34:42	1.1
+++ samples/expr.moo	2001/07/27 16:11:45	1.2
@@ -2,7 +2,7 @@
 
 :- interface.
 
-:- import_module char, int, list.
+:- import_module char, int.
 
 :- type token
 	--->	('+')
@@ -12,13 +12,12 @@
 	;	num(int)
 	;	('(')
 	;	(')')
-	;	('$')
 	.
 
-:- parse(exprn/1, ('$'), token, xx).
+:- parse(exprn/1, token, lexer_token, xx).
 
-:- pred scan(list(char), list(token)).
-:- mode scan(in, out) is semidet.
+:- pred scan(list(char), list(lexer_token)).
+:- mode scan(in, out) is det.
 
 :- implementation.
 
@@ -42,31 +41,49 @@
 	scan(Chars, [], Toks0),
 	list__reverse(Toks0, Toks).
 
-:- pred scan(list(char), list(token), list(token)).
-:- mode scan(in, in, out) is semidet.
+:- pred scan(list(char), list(lexer_token), list(lexer_token)).
+:- mode scan(in, in, out) is det.
 
-scan([], Toks, ['$'|Toks]).
-scan([C|Cs], Toks0, Toks) :-
-	( char__is_whitespace(C) ->
-		scan(Cs, Toks0, Toks)
-	; char__is_digit(C) ->
-		takewhile(char__is_digit, [C|Cs], Digits, Rest),
-		string__from_char_list(Digits, NumStr),
-		string__to_int(NumStr, Num),
-		scan(Rest, [num(Num)|Toks0], Toks)
-	; C = ('+') ->
-		scan(Cs, ['+'|Toks0], Toks)
-	; C = ('-') ->
-		scan(Cs, ['-'|Toks0], Toks)
-	; C = ('*') ->
-		scan(Cs, ['*'|Toks0], Toks)
-	; C = ('/') ->
-		scan(Cs, ['/'|Toks0], Toks)
-	; C = ('(') ->
-		scan(Cs, ['('|Toks0], Toks)
-	; C = (')') ->
-		scan(Cs, [')'|Toks0], Toks)
-	;
-		fail
-	).
+scan([], Toks, [eof | Toks]).
 
+scan([C | Cs], Toks0, Toks) :-
+    (
+        char__is_whitespace(C)
+    ->
+        scan(Cs, Toks0, Toks)
+    ;
+        char__is_digit(C)
+    ->
+        takewhile(char__is_digit, [C | Cs], Digits, Rest),
+        string__from_char_list(Digits, NumStr),
+        Num = string__det_to_int(NumStr),
+        scan(Rest, [ok(num(Num)) | Toks0], Toks)
+    ;
+        C = (+)
+    ->
+        scan(Cs, [ok(+) | Toks0], Toks)
+    ;
+        C = (-)
+    ->
+        scan(Cs, [ok(-) | Toks0], Toks)
+    ;
+        C = (*)
+    ->
+        scan(Cs, [ok(*) | Toks0], Toks)
+    ;
+        C = (/)
+    ->
+        scan(Cs, [ok(/) | Toks0], Toks)
+    ;
+        C = '('
+    ->
+        scan(Cs, [ok('(') | Toks0], Toks)
+    ;
+        C = ')'
+    ->
+        scan(Cs, [ok(')') | Toks0], Toks)
+    ;
+        (
+	   scan(Cs, [error(0) | Toks0], Toks)
+        )
+    ).
Index: samples/small.moo
===================================================================
RCS file: moose/samples/small.moo,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- samples/small.moo	2001/07/13 09:34:42	1.1
+++ samples/small.moo	2001/07/27 16:11:45	1.2
@@ -1,6 +1,6 @@
 :- module small.
 
-:- parse(program/1, ('$'), token, 'x').
+:- parse(program/1, token, lexer_token, 'x').
 
 :- rule program(list(defn)).
 program(Defs) --->
Index: samples/try_alpha.m
===================================================================
RCS file: moose/samples/try_alpha.m,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- samples/try_alpha.m	2001/07/13 09:34:42	1.1
+++ samples/try_alpha.m	2001/07/27 16:12:07	1.2
@@ -9,29 +9,29 @@
 
 :- implementation.
 
-:- import_module alpha, list.
+:- import_module alpha, list, lex.
 
-:- type token_list == list(token).
+:- type token_list == list(lexer_token).
 
-:- instance parser_state(token_list) where [
+:- instance lexer_state(token_list) where [
 	pred(get_token/3) is uncons
 ].
 
-:- pred uncons(T::out, list(T)::in, list(T)::out) is semidet.
+:- pred uncons(lexer_result(T)::out, 
+	       list(lexer_result(T))::in, 
+	       list(lexer_result(T))::out) is det.
 
+uncons(eof,[],[]).
 uncons(X, Xs, Xs0) :- Xs = [X | Xs0].
 
 main --> 
 	read_line(Res0),
 	(
 		{ Res0 = ok(Chars) },
-		( { scan(Chars, Toks) } ->
-			{ parse(Toks, Res, RemainingToks) },
-			write(Res), nl,
-			write(RemainingToks), nl
-		;
-			write_string("scanning error.\n")
-		),
+		{ scan(Chars, Toks) },
+		{ parse(Toks, Res, RemainingToks) },
+		write(Res), nl,
+		write(RemainingToks), nl,
 		main
 	;
 		{ Res0 = eof }
Index: samples/try_expr.m
===================================================================
RCS file: moose/samples/try_expr.m,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- samples/try_expr.m	2001/07/13 09:34:42	1.1
+++ samples/try_expr.m	2001/07/27 16:12:07	1.2
@@ -9,35 +9,34 @@
 
 :- implementation.
 
-:- import_module expr, list.
+:- import_module expr, list, lex.
 
-:- type token_list == list(token).
+:- type token_list == list(lexer_token).
 
-:- instance parser_state(token_list) where [
+:- instance lexer_state(token_list) where [
 	pred(get_token/3) is uncons
 ].
 
-:- pred uncons(T::out, list(T)::in, list(T)::out) is semidet.
+:- pred uncons(lexer_result(T)::out, 
+	       list(lexer_result(T))::in, 
+	       list(lexer_result(T))::out) is det.
 
+uncons(eof,[],[]).
 uncons(X, Xs, Xs0) :- Xs = [X | Xs0].
 
 main --> 
 	read_line(Res0),
 	(
-		{ Res0 = ok(Chars) },
-		( { scan(Chars, Toks) } ->
-			{ parse(Toks, Res, RemainingToks) },
-			write(Res), nl,
-			write(RemainingToks), nl
-		;
-			write_string("scanning error.\n")
-		),
-		main
+	    { Res0 = ok(Chars) },
+	    { scan(Chars, Toks) },
+	    { parse(Toks, Res, RemainingToks) },
+	    write(Res), nl,
+	    write(RemainingToks), nl,
+	    main
 	;
-		{ Res0 = eof }
+	    { Res0 = eof }
 	;
-		{ Res0 = error(Err) },
-		{ io__error_message(Err, Msg) },
-		write_string(Msg), nl
+	    { Res0 = error(Err) },
+	    { io__error_message(Err, Msg) },
+	    write_string(Msg), nl
 	).
-


More information about the reviews mailing list