[m-rev.] for review: parsing_utils improvements

Ian MacLarty maclarty at csse.unimelb.edu.au
Tue Sep 29 12:50:20 AEST 2009


For review by Ralph.

Make it easier to use parsing_utils to write parsers that return
useful error messages by recording the progress of the parser
in a mutable and adding a parse/4 predicate that will return the
furthest the parser got if it failed (often this is sufficient to
see where the problem is).

Also introduce a fail_with_message predicate that can be used in
conjunction with the new parse/4 predicate to return custom errors.

Also add a case-insensitive version of the keyword predicate.

library/parsing_utils.m:
    As above.

library/store.m:
    Add impure versions of new_mutvar, set_mutvar and get_mutvar
    that don't require a store argument.

tests/general/test_parsing_utils.exp:
tests/general/test_parsing_utils.m:
    Test the new features.

Index: library/parsing_utils.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/parsing_utils.m,v
retrieving revision 1.3
diff -u -r1.3 parsing_utils.m
--- library/parsing_utils.m	18 Aug 2009 06:45:16 -0000	1.3
+++ library/parsing_utils.m	29 Sep 2009 12:43:24 -0000
@@ -68,14 +68,40 @@
 :- type parser_with_state(T, S) == pred(src, T, S, S, ps, ps).
 :- inst parser_with_state == ( pred(in, out, in, out, in, out) is semidet ).
 
+:- type skip_whitespace_func == (func(src, ps) = ps).
+
+:- type parse_result(T)
+    --->    ok(T)
+    ;       error(
+                error_message :: maybe(string),
+                error_line    :: int,
+                error_col     :: int
+            ).
+
+    % parse(Input, SkipWS, Parser, Result).
+    % Try to parse Input using Parser and SkipWS to consume whitespace.
+    % If Parser succeeds then return ok with the parsed value,
+    % otherwise return error.  If there were any calls to fail_with_message
+    % without any subsequent progress being made, then the error message
+    % passed to fail_with_message will be returned in the error result.
+    % Otherwise no message is returned and the furthest position the parser
+    % got in the input string is returned.
+    %
+:- pred parse(string::in, skip_whitespace_func::in,
+        parser(T)::in(parser), parse_result(T)::out) is cc_multi.
+
+    % As above but using the default whitespace parser.
+    %
+:- pred parse(string::in, parser(T)::in(parser), parse_result(T)::out)
+    is cc_multi.
+
     % Construct a new parser source and state from a string, also specifying
     % a function for skipping over whitespace (several primitive parsers
     % use this function to consume whitespace after a token; this argument
     % allows the user to specify a function for, say, skipping over comments
     % as well).
     %
-:- pred new_src_and_ps(string::in,
-        (func(src, ps) = ps)::in(func(in, in) = out is det),
+:- pred new_src_and_ps(string::in, skip_whitespace_func::in,
         src::out, ps::out) is det.
 
     % Construct a new parser source and state from a string (the default
@@ -132,6 +158,12 @@
 :- pred keyword(string::in, string::in, src::in, unit::out,
         ps::in, ps::out) is semidet.
 
+    % ikeyword(IdChars, Keyword, Src, _, !PS)
+    % Case-insensitive version of keyword/6.
+    %
+:- pred ikeyword(string::in, string::in, src::in, unit::out,
+        ps::in, ps::out) is semidet.
+
     % identifier(Src, InitIdChars, IdChars, Identifier, !PS) matches the next
     % identifer (result in Identifier) comprising a char from InitIdChars
     % followed by zero or more chars from IdChars.  Any subsequent whitespace
@@ -229,6 +261,18 @@
 :- pred comma_separated_list(parser(T)::in(parser), src::in, list(T)::out,
         ps::in, ps::out) is semidet.
 
+    % Declaratively this predicate is equivalent to false.  Operationally
+    % it will record an error message that will be returned by parse/4
+    % if no further progress is made and then fail.
+    %
+:- pred fail_with_message(string::in, src::in, T::out, ps::in, ps::out)
+    is semidet.
+
+    % As above, but use the given offset for the context of the message.
+    %
+:- pred fail_with_message(string::in, int::in, src::in, T::out,
+    ps::in, ps::out) is semidet.
+
 % The following parser combinators are equivalent to the above, except that
 % a separate state argument is threaded through the computation (e.g., for
 % parsers that incrementally construct a symbol table).
@@ -281,8 +325,7 @@
 :- implementation.
 
 :- import_module array.
-
-
+:- import_module store.
 
     % The parser "state" is just the offset into the input string.
     %
@@ -290,11 +333,45 @@
 
 :- type src
     --->    src(
-                input_length    ::  int,
-                input_string    ::  string,
-                skip_ws_func    ::  func(src, ps) = ps
+                input_length        ::  int,
+                input_string        ::  string,
+                skip_ws_func        ::  func(src, ps) = ps,
+                error_info          ::  generic_mutvar(error_info, unit)
             ).
 
+:- type error_info
+    --->    error_info(int, maybe(string)).
+
+%-----------------------------------------------------------------------------%
+
+parse(InputString, Parser, Result) :-
+    parse(InputString, skip_whitespace, Parser, Result).
+
+parse(InputString, SkipWS, Parser, Result) :-
+    % This is pure, because it will always return the same results for
+    % the same inputs (the mutable in Src cannot be accessed outside
+    % of the promise_pure scope below).
+    promise_pure(
+        new_src_and_ps(InputString, SkipWS, Src, PS0),
+        ( Parser(Src, Val, PS0, _) ->
+            Result = ok(Val)
+        ;
+            MutVar = Src ^ error_info,
+            semipure get_mutvar(MutVar, error_info(Offset, MaybeMsg)),
+            offset_to_line_number_and_position(src_to_line_numbers(Src),
+                Offset, Line, Col),
+            Result0 = error(MaybeMsg, Line, Col),
+            % We make parse/4 cc_multi because declaratively 
+            % parse(Str, SkipWS, Parser, error(MaybeMsg, Line, Col)) is true
+            % for all MaybeMsg, Line and Col iff
+            %   new_src_and_ps(Str, SkipWS, Src, PS0),
+            %   Parser(Src, _, PS0, _)
+            % is false, but operationally MaybeMsg, Line and Col are
+            % restricted to one value each.
+            cc_multi_equal(Result0, Result)
+        )
+    ).
+
 %-----------------------------------------------------------------------------%
 
 new_src_and_ps(InputString, Src, PS) :-
@@ -303,8 +380,11 @@
 %-----------------------------------------------------------------------------%
 
 new_src_and_ps(InputString, SkipWS, Src, PS) :-
-    Src = src(string.length(InputString), InputString, SkipWS),
-    PS = 0.
+    promise_pure (
+        impure new_mutvar(error_info(0, no), MutVar),
+        Src = src(string.length(InputString), InputString, SkipWS, MutVar),
+        PS = 0
+    ).
 
 %-----------------------------------------------------------------------------%
 
@@ -397,10 +477,13 @@
 %-----------------------------------------------------------------------------%
 
 next_char(Src, Char, !PS) :-
-    current_offset(Src, Offset, !PS),
-    Offset < Src ^ input_length,
-    Char = Src ^ input_string ^ unsafe_elem(Offset),
-    !:PS = !.PS + 1.
+    promise_pure (
+        current_offset(Src, Offset, !PS),
+        Offset < Src ^ input_length,
+        Char = Src ^ input_string ^ unsafe_elem(Offset),
+        impure record_progress(Src, Offset),
+        !:PS = !.PS + 1
+    ).
 
 %-----------------------------------------------------------------------------%
 
@@ -411,9 +494,12 @@
 %-----------------------------------------------------------------------------%
 
 input_substring(Src, Start, EndPlusOne, Substring) :-
-    EndPlusOne =< Src ^ input_length,
-    Substring =
-        unsafe_substring(Src ^ input_string, Start, EndPlusOne - Start).
+    promise_pure (
+        EndPlusOne =< Src ^ input_length,
+        Substring =
+            unsafe_substring(Src ^ input_string, Start, EndPlusOne - Start),
+        impure record_progress(Src, Start)
+    ).
 
 %-----------------------------------------------------------------------------%
 
@@ -421,10 +507,12 @@
         ps::in, ps::out) is semidet.
 
 match_string(MatchStr, Src, PS, PS + N) :-
-    N = string.length(MatchStr),
-    PS + N =< Src ^ input_length,
-    match_string_2(N, 0, MatchStr, PS, Src ^ input_string).
-
+    promise_pure (
+        impure record_progress(Src, PS),
+        N = string.length(MatchStr),
+        PS + N =< Src ^ input_length,
+        match_string_2(N, 0, MatchStr, PS, Src ^ input_string)
+    ).
 
 :- pred match_string_2(int::in, int::in, string::in, int::in, string::in)
         is semidet.
@@ -437,6 +525,30 @@
         true
     ).
 
+:- pred imatch_string(string::in, src::in,
+        ps::in, ps::out) is semidet.
+
+imatch_string(MatchStr, Src, PS, PS + N) :-
+    promise_pure (
+        impure record_progress(Src, PS),
+        N = string.length(MatchStr),
+        PS + N =< Src ^ input_length,
+        imatch_string_2(N, 0, MatchStr, PS, Src ^ input_string)
+    ).
+
+:- pred imatch_string_2(int::in, int::in, string::in, int::in, string::in)
+        is semidet.
+
+imatch_string_2(N, I, MatchStr, Offset, Str) :-
+    ( if I < N then
+        char.to_upper(MatchStr ^ unsafe_elem(I), Chr1),
+        char.to_upper(Str ^ unsafe_elem(Offset + I), Chr2),
+        Chr1 = Chr2,
+        imatch_string_2(N, I + 1, MatchStr, Offset, Str)
+      else
+        true
+    ).
+
 %-----------------------------------------------------------------------------%
 %-----------------------------------------------------------------------------%
 % Utility predicates.
@@ -565,6 +677,25 @@
 
 %-----------------------------------------------------------------------------%
 
+fail_with_message(Msg, Src, Val, !PS) :-
+    % This is pure, because the mutable can only be accessed via
+    % the parse/4 predicate which will always return the same results
+    % for the same inputs.
+    promise_pure (
+        Mutvar = Src ^ error_info,
+        impure set_mutvar(Mutvar, error_info(!.PS, yes(Msg))),
+        ( semidet_fail ->
+            dynamic_cast(0, Val) % unreachable
+        ;
+            fail
+        )
+    ).
+
+fail_with_message(Msg, Offset, Src, Val, !PS) :-
+    fail_with_message(Msg, Src, Val, Offset, _).
+
+%-----------------------------------------------------------------------------%
+
 whitespace(Src, unit, !PS) :-
     ( if
         next_char(Src, C, !PS),
@@ -594,6 +725,11 @@
     not char_in_class(IdChars, Src, _, !.PS, _),
     skip_whitespace(Src, !PS).
 
+ikeyword(IdChars, Keyword, Src, unit, !PS) :-
+    imatch_string(Keyword, Src, !PS),
+    not char_in_class(IdChars, Src, _, !.PS, _),
+    skip_whitespace(Src, !PS).
+
 %-----------------------------------------------------------------------------%
 
 float_literal_as_string(Src, FloatStr, !PS) :-
@@ -713,4 +849,17 @@
     ).
 
 %-----------------------------------------------------------------------------%
+
+:- impure pred record_progress(src::in, ps::in) is det.
+
+record_progress(Src, PS) :-
+    MutVar = Src ^ error_info,
+    semipure get_mutvar(MutVar, error_info(PS0, _)),
+    ( PS > PS0 ->
+        impure set_mutvar(MutVar, error_info(PS, no))
+    ;
+        true
+    ).
+
+%-----------------------------------------------------------------------------%
 %-----------------------------------------------------------------------------%
Index: library/store.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/store.m,v
retrieving revision 1.66
diff -u -r1.66 store.m
--- library/store.m	17 Jun 2009 07:48:16 -0000	1.66
+++ library/store.m	29 Sep 2009 12:43:33 -0000
@@ -111,6 +111,16 @@
     generic_mutvar(T, S)::out, S::di, S::uo) is det <= store(S).
 
 %-----------------------------------------------------------------------------%
+% Impure manipulation of mutvars.
+%
+
+:- impure pred new_mutvar(T::in, generic_mutvar(T, S)::out) is det.
+
+:- impure pred set_mutvar(generic_mutvar(T, S)::in, T::in) is det.
+
+:- semipure pred get_mutvar(generic_mutvar(T, S)::in, T::out) is det.
+
+%-----------------------------------------------------------------------------%
 %
 % References
 %
@@ -315,52 +325,67 @@
 %
 % I wonder whether it is worth it?  Hmm, probably not.
 
+new_mutvar(Val, Mutvar, S0, S) :-
+    promise_pure (
+        impure new_mutvar(Val, Mutvar),
+        S = S0
+    ).
+
 :- pragma foreign_proc("C",
-    new_mutvar(Val::in, Mutvar::out, S0::di, S::uo),
-    [will_not_call_mercury, promise_pure, will_not_modify_trail],
+    new_mutvar(Val::in, Mutvar::out),
+    [will_not_call_mercury, will_not_modify_trail],
 "
     MR_offset_incr_hp_msg(Mutvar, MR_SIZE_SLOT_SIZE, MR_SIZE_SLOT_SIZE + 1,
         MR_PROC_LABEL, ""store.mutvar/2"");
     MR_define_size_slot(0, Mutvar, 1);
     * (MR_Word *) Mutvar = Val;
-    S = S0;
 ").
 
+get_mutvar(Mutvar, Val, S0, S) :-
+    promise_pure (
+        semipure get_mutvar(Mutvar, Val),
+        S = S0
+    ).
+
 :- pragma foreign_proc("C",
-    get_mutvar(Mutvar::in, Val::out, S0::di, S::uo),
-    [will_not_call_mercury, promise_pure, will_not_modify_trail],
+    get_mutvar(Mutvar::in, Val::out),
+    [will_not_call_mercury, promise_semipure, will_not_modify_trail],
 "
     Val = * (MR_Word *) Mutvar;
-    S = S0;
 ").
 
+set_mutvar(Mutvar, Val, S0, S) :-
+    promise_pure (
+        impure set_mutvar(Mutvar, Val),
+        S = S0
+    ).
+
 :- pragma foreign_proc("C",
-    set_mutvar(Mutvar::in, Val::in, S0::di, S::uo),
-    [will_not_call_mercury, promise_pure, will_not_modify_trail],
+    set_mutvar(Mutvar::in, Val::in),
+    [will_not_call_mercury, will_not_modify_trail],
 "
     * (MR_Word *) Mutvar = Val;
-    S = S0;
 ").
 
 :- pragma foreign_type(java, generic_mutvar(T, S), "mutvar.Mutvar").
 
 :- pragma foreign_proc("Java",
-    new_mutvar(Val::in, Mutvar::out, _S0::di, _S::uo),
-    [will_not_call_mercury, promise_pure],
+    new_mutvar(Val::in, Mutvar::out),
+    [will_not_call_mercury],
 "
     Mutvar = new mutvar.Mutvar(Val);
 ").
 
 :- pragma foreign_proc("Java",
-    get_mutvar(Mutvar::in, Val::out, _S0::di, _S::uo),
-    [will_not_call_mercury, promise_pure],
+    get_mutvar(Mutvar::in, Val::out),
+    [will_not_call_mercury],
 "
     Val = Mutvar.object;
 ").
 
 :- pragma foreign_proc("Java",
-    set_mutvar(Mutvar::in, Val::in, _S0::di, _S::uo),
-    [will_not_call_mercury, promise_pure],
+    set_mutvar(Mutvar::in, Val::in),
+    [will_not_call_mercury],
 "
     Mutvar.object = Val;
 ").
@@ -371,28 +396,25 @@
 :- pragma foreign_type("Erlang", generic_mutvar(T, S), "").
 
 :- pragma foreign_proc("Erlang",
-    new_mutvar(Val::in, Mutvar::out, S0::di, S::uo),
-    [will_not_call_mercury, promise_pure],
+    new_mutvar(Val::in, Mutvar::out),
+    [will_not_call_mercury],
 "
     Mutvar = ets:new(mutvar, [set, public]),
-    ets:insert(Mutvar, {value, Val}),
-    S = S0
+    ets:insert(Mutvar, {value, Val})
 ").
 
 :- pragma foreign_proc("Erlang",
-    get_mutvar(Mutvar::in, Val::out, S0::di, S::uo),
-    [will_not_call_mercury, promise_pure],
+    get_mutvar(Mutvar::in, Val::out),
+    [will_not_call_mercury],
 "
-    [{value, Val}] = ets:lookup(Mutvar, value),
-    S = S0
+    [{value, Val}] = ets:lookup(Mutvar, value)
 ").
 
 :- pragma foreign_proc("Erlang",
-    set_mutvar(Mutvar::in, Val::in, S0::di, S::uo),
-    [will_not_call_mercury, promise_pure],
+    set_mutvar(Mutvar::in, Val::in),
+    [will_not_call_mercury],
 "
-    ets:insert(Mutvar, {value, Val}),
-    S = S0
+    ets:insert(Mutvar, {value, Val})
 ").
 
 copy_mutvar(Mutvar, Copy, !S) :-
Index: tests/general/test_parsing_utils.exp
===================================================================
RCS file: /home/mercury1/repository/tests/general/test_parsing_utils.exp,v
retrieving revision 1.3
diff -u -r1.3 test_parsing_utils.exp
--- tests/general/test_parsing_utils.exp	18 Aug 2009 06:45:16 -0000	1.3
+++ tests/general/test_parsing_utils.exp	29 Sep 2009 12:43:44 -0000
@@ -240,3 +240,18 @@
 Line = 2, Pos = 10
 Line = 3, Pos = 1
 Line = 1, Pos = 1
+expecting an operator
+12 + x-pow(x + 3; y)
+                ^
+syntax error
+abs(x ++ 3)
+       ^
+expecting an operator
+abs (x))
+       ^
+unknown function: f
+1 + 3 MoD 2 + f(3 + x)
+              ^
+expecting an operator
+1 + 1x
+     ^
Index: tests/general/test_parsing_utils.m
===================================================================
RCS file: /home/mercury1/repository/tests/general/test_parsing_utils.m,v
retrieving revision 1.3
diff -u -r1.3 test_parsing_utils.m
--- tests/general/test_parsing_utils.m	18 Aug 2009 06:45:16 -0000	1.3
+++ tests/general/test_parsing_utils.m	29 Sep 2009 12:43:44 -0000
@@ -21,6 +21,7 @@
 
 :- implementation.
 
+:- import_module int.
 :- import_module list.
 :- import_module maybe.
 :- import_module parsing_utils.
@@ -40,7 +41,13 @@
     test_pos("123456789", 0, !IO),
     test_pos("123456789\n123456789\n\n", 19, !IO),
     test_pos("123456789\n123456789\n\n", 20, !IO),
-    test_pos("", 0, !IO).
+    test_pos("", 0, !IO),
+    test_err("12 + x-pow(x + 3; y)", expr_top, !IO),
+    test_err("abs(x ++ 3)", expr_top, !IO),
+    test_err("abs (x))", expr_top, !IO),
+    test_err("1 + 3 MoD 2 + f(3 + x)", expr_top, !IO),
+    test_err("1 + 1x", expr_top, !IO),
+    true.
 
 %-----------------------------------------------------------------------------%
 
@@ -377,4 +384,109 @@
     String = string.string(State).
 
 %-----------------------------------------------------------------------------%
+
+:- pred test_err(string::in, parser(expr)::in(parser), io::di, io::uo)
+    is cc_multi.
+
+test_err(Input, Parser, !IO) :-
+    parse(Input, Parser, Result),
+    (
+        Result = ok(Expr),
+        io.write(Expr, !IO),
+        io.nl(!IO)
+    ;
+        Result = error(MaybeMsg, LineNo, Col),
+        Lines = string.words_separator(unify('\n'), Input),
+        Line= list.det_index1(Lines, LineNo),
+        Spaces = string.from_char_list(list.duplicate(Col - 1, ' ')),
+        (
+            MaybeMsg = yes(Msg),
+            io.write_string(Msg ++ "\n", !IO)
+        ; 
+            MaybeMsg = no,
+            io.write_string("syntax error\n", !IO)
+        ),
+        io.write_string(Line ++ "\n", !IO),
+        io.write_string(Spaces ++ "^\n", !IO)
+    ).
+
+:- type expr
+    --->    op(op, expr, expr)
+    ;       function_application(string, list(expr))
+    ;       integer(int)
+    ;       variable(string).
+
+:- type op
+    --->    plus
+    ;       minus
+    ;       modulo.
+
+:- pred expr_top(src::in, expr::out, ps::in, ps::out) is semidet.
+
+expr_top(Src, Expr) -->
+    expr(Src, Expr),
+    eof(Src, _).
+
+:- pred expr(src::in, expr::out, ps::in, ps::out) is semidet.
+
+expr(Src, Expr) -->
+    term(Src, Term1),
+    ( op(Src, Op) ->
+        expr(Src, Expr2),
+        { Expr = op(Op, Term1, Expr2) }
+    ;
+        { Expr = Term1 }
+    ).
+
+:- pred term(src::in, expr::out, ps::in, ps::out) is semidet.
+
+term(Src, Term) -->
+    current_offset(Src, Start),
+    ( int_literal(Src, Int) ->
+        { Term = integer(Int) }
+    ;
+        id(Src, Id)
+    ->
+        ( punct("(", Src, _) ->
+            ( { known_function(Id) } ->
+                comma_separated_list(expr, Src, Args),
+                punct(")", Src, _),
+                { Term = function_application(Id, Args) }
+            ;
+                fail_with_message("unknown function: " ++ Id, Start, Src, Term)
+            )
+        ;
+            { Term = variable(Id) }
+        )
+    ;
+        { fail }
+    ).
+
+:- pred known_function(string::in) is semidet.
+
+known_function("abs").
+known_function("pow").
+
+:- pred op(src::in, op::out, ps::in, ps::out) is semidet.
+
+op(Src, Op) -->
+    ( punct("+", Src, _) ->
+        { Op = plus }
+    ; punct("-", Src, _) ->
+        { Op = minus }
+    ; ikeyword(id_chars, "mod", Src, _) ->
+        { Op = modulo }
+    ;
+        fail_with_message("expecting an operator", Src, Op)
+    ).
+
+:- pred id(src::in, string::out, ps::in, ps::out) is semidet.
+
+id(Src, Id) -->
+    identifier(id_chars, id_chars ++ "0123456789", Src, Id).
+
+:- func id_chars = string.
+
+id_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_".
+
 %-----------------------------------------------------------------------------%
--------------------------------------------------------------------------
mercury-reviews mailing list
Post messages to:       mercury-reviews at csse.unimelb.edu.au
Administrative Queries: owner-mercury-reviews at csse.unimelb.edu.au
Subscriptions:          mercury-reviews-request at csse.unimelb.edu.au
--------------------------------------------------------------------------



More information about the reviews mailing list