[m-rev.] Re: for review: parsing_utils improvements

Ian MacLarty maclarty at csse.unimelb.edu.au
Thu Oct 1 17:30:38 AEST 2009


On Thu, Oct 01, 2009 at 01:49:48PM +1000, Ralph Becket wrote:
> Ian MacLarty, Wednesday, 30 September 2009:
> > On Wed, Sep 30, 2009 at 12:12 PM, Ian MacLarty
> > <maclarty at csse.unimelb.edu.au> wrote:
> > > Here are some benchmarks I did with the sparql parser with an input of
> > > 4.8M (times are an average of 3 runs):
> > 
> > Correction: 48M (48 100 031 bytes).
> 
> Sorry, I thought I'd replied to this before: you've convinced me this is
> a good idea!

Here's an interdiff for review.  Note the whitespace bug fix.  It was
using char.is_whitespace for the first character, but then calling
skip_whitespace/3 which calls the user defined whitespace function.
I changed it so it calls char.is_whitespace for all characters which is
its documented behaviour.

diff -u library/parsing_utils.m library/parsing_utils.m
--- library/parsing_utils.m	29 Sep 2009 12:43:24 -0000
+++ library/parsing_utils.m	1 Oct 2009 17:15:52 -0000
@@ -7,18 +7,20 @@
 %---------------------------------------------------------------------------%
 % 
 % File: parsing_utils.m
-% Author: Ralph Becket <rafe at csse.unimelb.edu.au>
+% Authors: Ralph Becket <rafe at csse.unimelb.edu.au>, maclarty
 % Stability: low
 %
 % Utilities for recursive descent parsers.  Parsers take at least three
 % arguments: a source (src) containing the input string and a parser
 % state (ps) input/output pair tracking the current offset into the input.
 %
-% A new src and ps can be constructed by calling
-% new_src_and_ps(InputString, SkipWS, Src, !:PS) where the SkipWS function
-% is used by the primitive parsers to skip over any following whitespace
-% (providing a skipping function allows users to define comments as
-% whitespace).
+% Call parse(InputString, SkipWS, Parser, Result) to parse an input string
+% and return an error context and message if parsing failed.
+% The SkipWS function is used by the primitive parsers to skip over any
+% following whitespace (providing a skipping function allows users to define
+% comments as whitespace).
+% Alternatively a new src and ps can be constructed by calling
+% new_src_and_ps(InputString, SkipWS, Src, !:PS).
 % Parsing predicates are semidet and typically take the form
 % p(...parameters..., Src, Result, !PS).  A parser matching variable
 % assignments of the form `x = 42' might be defined like this:
@@ -68,6 +70,9 @@
 :- type parser_with_state(T, S) == pred(src, T, S, S, ps, ps).
 :- inst parser_with_state == ( pred(in, out, in, out, in, out) is semidet ).
 
+    % Functions of this type are used to skip whitespace in the primitive
+    % parsers provided by this module.
+    %
 :- type skip_whitespace_func == (func(src, ps) = ps).
 
 :- type parse_result(T)
@@ -83,9 +88,9 @@
     % If Parser succeeds then return ok with the parsed value,
     % otherwise return error.  If there were any calls to fail_with_message
     % without any subsequent progress being made, then the error message
-    % passed to fail_with_message will be returned in the error result.
-    % Otherwise no message is returned and the furthest position the parser
-    % got in the input string is returned.
+    % passed to the last call to fail_with_message will be returned in the
+    % error result.  Otherwise no message is returned and the furthest position
+    % the parser got in the input string is returned.
     %
 :- pred parse(string::in, skip_whitespace_func::in,
         parser(T)::in(parser), parse_result(T)::out) is cc_multi.
@@ -93,7 +98,7 @@
     % As above but using the default whitespace parser.
     %
 :- pred parse(string::in, parser(T)::in(parser), parse_result(T)::out)
-    is cc_multi.
+        is cc_multi.
 
     % Construct a new parser source and state from a string, also specifying
     % a function for skipping over whitespace (several primitive parsers
@@ -325,7 +330,7 @@
 :- implementation.
 
 :- import_module array.
-:- import_module store.
+:- import_module mutvar.
 
     % The parser "state" is just the offset into the input string.
     %
@@ -336,11 +341,18 @@
                 input_length        ::  int,
                 input_string        ::  string,
                 skip_ws_func        ::  func(src, ps) = ps,
-                error_info          ::  generic_mutvar(error_info, unit)
+
+                furthest_offset     ::  mutvar(int),
+                % This mutable records the progress of the parser
+                % through the input string.
+
+                last_fail_message   ::  mutvar(fail_message_info)
+                % This mutable is used to record messages passed to
+                % fail_with_message and their context.
             ).
 
-:- type error_info
-    --->    error_info(int, maybe(string)).
+:- type fail_message_info
+    --->    fail_message_info(int, maybe(string)).
 
 %-----------------------------------------------------------------------------%
 
@@ -351,16 +363,24 @@
     % This is pure, because it will always return the same results for
     % the same inputs (the mutable in Src cannot be accessed outside
     % of the promise_pure scope below).
-    promise_pure(
+    promise_pure (
         new_src_and_ps(InputString, SkipWS, Src, PS0),
         ( Parser(Src, Val, PS0, _) ->
             Result = ok(Val)
         ;
-            MutVar = Src ^ error_info,
-            semipure get_mutvar(MutVar, error_info(Offset, MaybeMsg)),
+            impure get_mutvar(Src ^ last_fail_message, Info),
+            impure get_mutvar(Src ^ furthest_offset, FurthestOffset),
+            Info = fail_message_info(MessageOffset, LastFailMsg),
+            ( MessageOffset < FurthestOffset ->
+                Msg = no,
+                Offset = FurthestOffset
+            ;
+                Msg = LastFailMsg,
+                Offset = MessageOffset
+            ),
             offset_to_line_number_and_position(src_to_line_numbers(Src),
                 Offset, Line, Col),
-            Result0 = error(MaybeMsg, Line, Col),
+            Result0 = error(Msg, Line, Col),
             % We make parse/4 cc_multi because declaratively 
             % parse(Str, SkipWS, Parser, error(MaybeMsg, Line, Col)) is true
             % for all MaybeMsg, Line and Col iff
@@ -381,8 +401,10 @@
 
 new_src_and_ps(InputString, SkipWS, Src, PS) :-
     promise_pure (
-        impure new_mutvar(error_info(0, no), MutVar),
-        Src = src(string.length(InputString), InputString, SkipWS, MutVar),
+        impure new_mutvar(fail_message_info(0, no), ErrorInfoMutVar),
+        impure new_mutvar(0, FurthestOffsetMutvar),
+        Src = src(string.length(InputString), InputString, SkipWS,  
+            FurthestOffsetMutvar, ErrorInfoMutVar),
         PS = 0
     ).
 
@@ -682,8 +704,9 @@
     % the parse/4 predicate which will always return the same results
     % for the same inputs.
     promise_pure (
-        Mutvar = Src ^ error_info,
-        impure set_mutvar(Mutvar, error_info(!.PS, yes(Msg))),
+        impure set_mutvar(Src ^ last_fail_message,
+            fail_message_info(!.PS, yes(Msg))),
+        impure set_mutvar(Src ^ furthest_offset, !.PS),
         ( semidet_fail ->
             dynamic_cast(0, Val) % unreachable
         ;
@@ -691,8 +714,8 @@
         )
     ).
 
-fail_with_message(Msg, Offset, Src, Val, !PS) :-
-    fail_with_message(Msg, Src, Val, Offset, _).
+fail_with_message(Msg, Offset, Src, Val, _, PS) :-
+    fail_with_message(Msg, Src, Val, Offset, PS).
 
 %-----------------------------------------------------------------------------%
 
@@ -701,7 +724,7 @@
         next_char(Src, C, !PS),
         char.is_whitespace(C)
       then
-        skip_whitespace(Src, !PS)
+        whitespace(Src, _, !PS)
       else
         semidet_true
     ).
@@ -721,14 +744,28 @@
 %---------------------------------------------------------------------------%
 
 keyword(IdChars, Keyword, Src, unit, !PS) :-
-    match_string(Keyword, Src, !PS),
-    not char_in_class(IdChars, Src, _, !.PS, _),
-    skip_whitespace(Src, !PS).
+    promise_pure (
+        Start = !.PS,
+        match_string(Keyword, Src, !PS),
+        not char_in_class(IdChars, Src, _, !.PS, _),
+        skip_whitespace(Src, !PS),
+        % Set progress to the beginning of the keyword so that if parsing
+        % fails the error context will point to the beginning of the
+        % keyword.
+        impure record_progress(Src, Start)
+    ).
 
 ikeyword(IdChars, Keyword, Src, unit, !PS) :-
-    imatch_string(Keyword, Src, !PS),
-    not char_in_class(IdChars, Src, _, !.PS, _),
-    skip_whitespace(Src, !PS).
+    promise_pure (
+        Start = !.PS,
+        imatch_string(Keyword, Src, !PS),
+        not char_in_class(IdChars, Src, _, !.PS, _),
+        skip_whitespace(Src, !PS),
+        % Set progress to the beginning of the keyword so that if parsing
+        % fails the error context will point to the beginning of the
+        % keyword.
+        impure record_progress(Src, Start)
+    ).
 
 %-----------------------------------------------------------------------------%
 
@@ -850,13 +887,15 @@
 
 %-----------------------------------------------------------------------------%
 
+    % Update the furthest_offset field if any progress has been made.
+    %
 :- impure pred record_progress(src::in, ps::in) is det.
 
 record_progress(Src, PS) :-
-    MutVar = Src ^ error_info,
-    semipure get_mutvar(MutVar, error_info(PS0, _)),
-    ( PS > PS0 ->
-        impure set_mutvar(MutVar, error_info(PS, no))
+    MutVar = Src ^ furthest_offset,
+    impure get_mutvar(MutVar, OS0),
+    ( PS > OS0 ->
+        impure set_mutvar(MutVar, PS)
     ;
         true
     ).
diff -u tests/general/test_parsing_utils.exp tests/general/test_parsing_utils.exp
--- tests/general/test_parsing_utils.exp	29 Sep 2009 12:43:44 -0000
+++ tests/general/test_parsing_utils.exp	1 Oct 2009 17:16:12 -0000
@@ -254,4 +254,7 @@
               ^
 expecting an operator
+1 + 3 mody 2 + f(3 + x)
+      ^
+expecting an operator
 1 + 1x
      ^
diff -u tests/general/test_parsing_utils.m tests/general/test_parsing_utils.m
--- tests/general/test_parsing_utils.m	29 Sep 2009 12:43:44 -0000
+++ tests/general/test_parsing_utils.m	1 Oct 2009 17:16:12 -0000
@@ -46,6 +46,7 @@
     test_err("abs(x ++ 3)", expr_top, !IO),
     test_err("abs (x))", expr_top, !IO),
     test_err("1 + 3 MoD 2 + f(3 + x)", expr_top, !IO),
+    test_err("1 + 3 mody 2 + f(3 + x)", expr_top, !IO),
     test_err("1 + 1x", expr_top, !IO),
     true.
 
--------------------------------------------------------------------------
mercury-reviews mailing list
Post messages to:       mercury-reviews at csse.unimelb.edu.au
Administrative Queries: owner-mercury-reviews at csse.unimelb.edu.au
Subscriptions:          mercury-reviews-request at csse.unimelb.edu.au
--------------------------------------------------------------------------



More information about the reviews mailing list