[m-rev.] Re: for review: parsing_utils improvements
Ian MacLarty
maclarty at csse.unimelb.edu.au
Thu Oct 1 17:30:38 AEST 2009
On Thu, Oct 01, 2009 at 01:49:48PM +1000, Ralph Becket wrote:
> Ian MacLarty, Wednesday, 30 September 2009:
> > On Wed, Sep 30, 2009 at 12:12 PM, Ian MacLarty
> > <maclarty at csse.unimelb.edu.au> wrote:
> > > Here are some benchmarks I did with the sparql parser with an input of
> > > 4.8M (times are an average of 3 runs):
> >
> > Correction: 48M (48 100 031 bytes).
>
> Sorry, I thought I'd replied to this before: you've convinced me this is
> a good idea!
Here's an interdiff for review. Note the whitespace bug fix. It was
using char.is_whitespace for the first character, but then calling
skip_whitespace/3 which calls the user defined whitespace function.
I changed it so it calls char.is_whitespace for all characters which is
its documented behaviour.
diff -u library/parsing_utils.m library/parsing_utils.m
--- library/parsing_utils.m 29 Sep 2009 12:43:24 -0000
+++ library/parsing_utils.m 1 Oct 2009 17:15:52 -0000
@@ -7,18 +7,20 @@
%---------------------------------------------------------------------------%
%
% File: parsing_utils.m
-% Author: Ralph Becket <rafe at csse.unimelb.edu.au>
+% Authors: Ralph Becket <rafe at csse.unimelb.edu.au>, maclarty
% Stability: low
%
% Utilities for recursive descent parsers. Parsers take at least three
% arguments: a source (src) containing the input string and a parser
% state (ps) input/output pair tracking the current offset into the input.
%
-% A new src and ps can be constructed by calling
-% new_src_and_ps(InputString, SkipWS, Src, !:PS) where the SkipWS function
-% is used by the primitive parsers to skip over any following whitespace
-% (providing a skipping function allows users to define comments as
-% whitespace).
+% Call parse(InputString, SkipWS, Parser, Result) to parse an input string
+% and return an error context and message if parsing failed.
+% The SkipWS function is used by the primitive parsers to skip over any
+% following whitespace (providing a skipping function allows users to define
+% comments as whitespace).
+% Alternatively a new src and ps can be constructed by calling
+% new_src_and_ps(InputString, SkipWS, Src, !:PS).
% Parsing predicates are semidet and typically take the form
% p(...parameters..., Src, Result, !PS). A parser matching variable
% assignments of the form `x = 42' might be defined like this:
@@ -68,6 +70,9 @@
:- type parser_with_state(T, S) == pred(src, T, S, S, ps, ps).
:- inst parser_with_state == ( pred(in, out, in, out, in, out) is semidet ).
+ % Functions of this type are used to skip whitespace in the primitive
+ % parsers provided by this module.
+ %
:- type skip_whitespace_func == (func(src, ps) = ps).
:- type parse_result(T)
@@ -83,9 +88,9 @@
% If Parser succeeds then return ok with the parsed value,
% otherwise return error. If there were any calls to fail_with_message
% without any subsequent progress being made, then the error message
- % passed to fail_with_message will be returned in the error result.
- % Otherwise no message is returned and the furthest position the parser
- % got in the input string is returned.
+ % passed to the last call to fail_with_message will be returned in the
+ % error result. Otherwise no message is returned and the furthest position
+ % the parser got in the input string is returned.
%
:- pred parse(string::in, skip_whitespace_func::in,
parser(T)::in(parser), parse_result(T)::out) is cc_multi.
@@ -93,7 +98,7 @@
% As above but using the default whitespace parser.
%
:- pred parse(string::in, parser(T)::in(parser), parse_result(T)::out)
- is cc_multi.
+ is cc_multi.
% Construct a new parser source and state from a string, also specifying
% a function for skipping over whitespace (several primitive parsers
@@ -325,7 +330,7 @@
:- implementation.
:- import_module array.
-:- import_module store.
+:- import_module mutvar.
% The parser "state" is just the offset into the input string.
%
@@ -336,11 +341,18 @@
input_length :: int,
input_string :: string,
skip_ws_func :: func(src, ps) = ps,
- error_info :: generic_mutvar(error_info, unit)
+
+ furthest_offset :: mutvar(int),
+ % This mutable records the progress of the parser
+ % through the input string.
+
+ last_fail_message :: mutvar(fail_message_info)
+ % This mutable is used to record messages passed to
+ % fail_with_message and their context.
).
-:- type error_info
- ---> error_info(int, maybe(string)).
+:- type fail_message_info
+ ---> fail_message_info(int, maybe(string)).
%-----------------------------------------------------------------------------%
@@ -351,16 +363,24 @@
% This is pure, because it will always return the same results for
% the same inputs (the mutable in Src cannot be accessed outside
% of the promise_pure scope below).
- promise_pure(
+ promise_pure (
new_src_and_ps(InputString, SkipWS, Src, PS0),
( Parser(Src, Val, PS0, _) ->
Result = ok(Val)
;
- MutVar = Src ^ error_info,
- semipure get_mutvar(MutVar, error_info(Offset, MaybeMsg)),
+ impure get_mutvar(Src ^ last_fail_message, Info),
+ impure get_mutvar(Src ^ furthest_offset, FurthestOffset),
+ Info = fail_message_info(MessageOffset, LastFailMsg),
+ ( MessageOffset < FurthestOffset ->
+ Msg = no,
+ Offset = FurthestOffset
+ ;
+ Msg = LastFailMsg,
+ Offset = MessageOffset
+ ),
offset_to_line_number_and_position(src_to_line_numbers(Src),
Offset, Line, Col),
- Result0 = error(MaybeMsg, Line, Col),
+ Result0 = error(Msg, Line, Col),
% We make parse/4 cc_multi because declaratively
% parse(Str, SkipWS, Parser, error(MaybeMsg, Line, Col)) is true
% for all MaybeMsg, Line and Col iff
@@ -381,8 +401,10 @@
new_src_and_ps(InputString, SkipWS, Src, PS) :-
promise_pure (
- impure new_mutvar(error_info(0, no), MutVar),
- Src = src(string.length(InputString), InputString, SkipWS, MutVar),
+ impure new_mutvar(fail_message_info(0, no), ErrorInfoMutVar),
+ impure new_mutvar(0, FurthestOffsetMutvar),
+ Src = src(string.length(InputString), InputString, SkipWS,
+ FurthestOffsetMutvar, ErrorInfoMutVar),
PS = 0
).
@@ -682,8 +704,9 @@
% the parse/4 predicate which will always return the same results
% for the same inputs.
promise_pure (
- Mutvar = Src ^ error_info,
- impure set_mutvar(Mutvar, error_info(!.PS, yes(Msg))),
+ impure set_mutvar(Src ^ last_fail_message,
+ fail_message_info(!.PS, yes(Msg))),
+ impure set_mutvar(Src ^ furthest_offset, !.PS),
( semidet_fail ->
dynamic_cast(0, Val) % unreachable
;
@@ -691,8 +714,8 @@
)
).
-fail_with_message(Msg, Offset, Src, Val, !PS) :-
- fail_with_message(Msg, Src, Val, Offset, _).
+fail_with_message(Msg, Offset, Src, Val, _, PS) :-
+ fail_with_message(Msg, Src, Val, Offset, PS).
%-----------------------------------------------------------------------------%
@@ -701,7 +724,7 @@
next_char(Src, C, !PS),
char.is_whitespace(C)
then
- skip_whitespace(Src, !PS)
+ whitespace(Src, _, !PS)
else
semidet_true
).
@@ -721,14 +744,28 @@
%---------------------------------------------------------------------------%
keyword(IdChars, Keyword, Src, unit, !PS) :-
- match_string(Keyword, Src, !PS),
- not char_in_class(IdChars, Src, _, !.PS, _),
- skip_whitespace(Src, !PS).
+ promise_pure (
+ Start = !.PS,
+ match_string(Keyword, Src, !PS),
+ not char_in_class(IdChars, Src, _, !.PS, _),
+ skip_whitespace(Src, !PS),
+ % Set progress to the beginning of the keyword so that if parsing
+ % fails the error context will point to the beginning of the
+ % keyword.
+ impure record_progress(Src, Start)
+ ).
ikeyword(IdChars, Keyword, Src, unit, !PS) :-
- imatch_string(Keyword, Src, !PS),
- not char_in_class(IdChars, Src, _, !.PS, _),
- skip_whitespace(Src, !PS).
+ promise_pure (
+ Start = !.PS,
+ imatch_string(Keyword, Src, !PS),
+ not char_in_class(IdChars, Src, _, !.PS, _),
+ skip_whitespace(Src, !PS),
+ % Set progress to the beginning of the keyword so that if parsing
+ % fails the error context will point to the beginning of the
+ % keyword.
+ impure record_progress(Src, Start)
+ ).
%-----------------------------------------------------------------------------%
@@ -850,13 +887,15 @@
%-----------------------------------------------------------------------------%
+ % Update the furthest_offset field if any progress has been made.
+ %
:- impure pred record_progress(src::in, ps::in) is det.
record_progress(Src, PS) :-
- MutVar = Src ^ error_info,
- semipure get_mutvar(MutVar, error_info(PS0, _)),
- ( PS > PS0 ->
- impure set_mutvar(MutVar, error_info(PS, no))
+ MutVar = Src ^ furthest_offset,
+ impure get_mutvar(MutVar, OS0),
+ ( PS > OS0 ->
+ impure set_mutvar(MutVar, PS)
;
true
).
diff -u tests/general/test_parsing_utils.exp tests/general/test_parsing_utils.exp
--- tests/general/test_parsing_utils.exp 29 Sep 2009 12:43:44 -0000
+++ tests/general/test_parsing_utils.exp 1 Oct 2009 17:16:12 -0000
@@ -254,4 +254,7 @@
^
expecting an operator
+1 + 3 mody 2 + f(3 + x)
+ ^
+expecting an operator
1 + 1x
^
diff -u tests/general/test_parsing_utils.m tests/general/test_parsing_utils.m
--- tests/general/test_parsing_utils.m 29 Sep 2009 12:43:44 -0000
+++ tests/general/test_parsing_utils.m 1 Oct 2009 17:16:12 -0000
@@ -46,6 +46,7 @@
test_err("abs(x ++ 3)", expr_top, !IO),
test_err("abs (x))", expr_top, !IO),
test_err("1 + 3 MoD 2 + f(3 + x)", expr_top, !IO),
+ test_err("1 + 3 mody 2 + f(3 + x)", expr_top, !IO),
test_err("1 + 1x", expr_top, !IO),
true.
--------------------------------------------------------------------------
mercury-reviews mailing list
Post messages to: mercury-reviews at csse.unimelb.edu.au
Administrative Queries: owner-mercury-reviews at csse.unimelb.edu.au
Subscriptions: mercury-reviews-request at csse.unimelb.edu.au
--------------------------------------------------------------------------
More information about the reviews
mailing list