[m-rev.] for review: parsing_utils improvements
Ian MacLarty
maclarty at csse.unimelb.edu.au
Tue Sep 29 12:50:20 AEST 2009
For review by Ralph.
Make it easier to use parsing_utils to write parsers that return
useful error messages by recording the progress of the parser
in a mutable and adding a parse/4 predicate that will return the
furthest the parser got if it failed (often this is sufficient to
see where the problem is).
Also introduce a fail_with_message predicate that can be used in
conjunction with the new parse/4 predicate to return custom errors.
Also add a case-insensitive version of the keyword predicate.
library/parsing_utils.m:
As above.
library/store.m:
Add impure versions of new_mutvar, set_mutvar and get_mutvar
that don't require a store argument.
tests/general/test_parsing_utils.exp:
tests/general/test_parsing_utils.m:
Test the new features.
Index: library/parsing_utils.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/parsing_utils.m,v
retrieving revision 1.3
diff -u -r1.3 parsing_utils.m
--- library/parsing_utils.m 18 Aug 2009 06:45:16 -0000 1.3
+++ library/parsing_utils.m 29 Sep 2009 12:43:24 -0000
@@ -68,14 +68,40 @@
:- type parser_with_state(T, S) == pred(src, T, S, S, ps, ps).
:- inst parser_with_state == ( pred(in, out, in, out, in, out) is semidet ).
+:- type skip_whitespace_func == (func(src, ps) = ps).
+
+:- type parse_result(T)
+ ---> ok(T)
+ ; error(
+ error_message :: maybe(string),
+ error_line :: int,
+ error_col :: int
+ ).
+
+ % parse(Input, SkipWS, Parser, Result).
+ % Try to parse Input using Parser and SkipWS to consume whitespace.
+ % If Parser succeeds then return ok with the parsed value,
+ % otherwise return error. If there were any calls to fail_with_message
+ % without any subsequent progress being made, then the error message
+ % passed to fail_with_message will be returned in the error result.
+ % Otherwise no message is returned and the furthest position the parser
+ % got in the input string is returned.
+ %
+:- pred parse(string::in, skip_whitespace_func::in,
+ parser(T)::in(parser), parse_result(T)::out) is cc_multi.
+
+ % As above but using the default whitespace parser.
+ %
+:- pred parse(string::in, parser(T)::in(parser), parse_result(T)::out)
+ is cc_multi.
+
% Construct a new parser source and state from a string, also specifying
% a function for skipping over whitespace (several primitive parsers
% use this function to consume whitespace after a token; this argument
% allows the user to specify a function for, say, skipping over comments
% as well).
%
-:- pred new_src_and_ps(string::in,
- (func(src, ps) = ps)::in(func(in, in) = out is det),
+:- pred new_src_and_ps(string::in, skip_whitespace_func::in,
src::out, ps::out) is det.
% Construct a new parser source and state from a string (the default
@@ -132,6 +158,12 @@
:- pred keyword(string::in, string::in, src::in, unit::out,
ps::in, ps::out) is semidet.
+ % ikeyword(IdChars, Keyword, Src, _, !PS)
+ % Case-insensitive version of keyword/6.
+ %
+:- pred ikeyword(string::in, string::in, src::in, unit::out,
+ ps::in, ps::out) is semidet.
+
% identifier(Src, InitIdChars, IdChars, Identifier, !PS) matches the next
% identifer (result in Identifier) comprising a char from InitIdChars
% followed by zero or more chars from IdChars. Any subsequent whitespace
@@ -229,6 +261,18 @@
:- pred comma_separated_list(parser(T)::in(parser), src::in, list(T)::out,
ps::in, ps::out) is semidet.
+ % Declaratively this predicate is equivalent to false. Operationally
+ % it will record an error message that will be returned by parse/4
+ % if no further progress is made and then fail.
+ %
+:- pred fail_with_message(string::in, src::in, T::out, ps::in, ps::out)
+ is semidet.
+
+ % As above, but use the given offset for the context of the message.
+ %
+:- pred fail_with_message(string::in, int::in, src::in, T::out,
+ ps::in, ps::out) is semidet.
+
% The following parser combinators are equivalent to the above, except that
% a separate state argument is threaded through the computation (e.g., for
% parsers that incrementally construct a symbol table).
@@ -281,8 +325,7 @@
:- implementation.
:- import_module array.
-
-
+:- import_module store.
% The parser "state" is just the offset into the input string.
%
@@ -290,11 +333,45 @@
:- type src
---> src(
- input_length :: int,
- input_string :: string,
- skip_ws_func :: func(src, ps) = ps
+ input_length :: int,
+ input_string :: string,
+ skip_ws_func :: func(src, ps) = ps,
+ error_info :: generic_mutvar(error_info, unit)
).
+:- type error_info
+ ---> error_info(int, maybe(string)).
+
+%-----------------------------------------------------------------------------%
+
+parse(InputString, Parser, Result) :-
+ parse(InputString, skip_whitespace, Parser, Result).
+
+parse(InputString, SkipWS, Parser, Result) :-
+ % This is pure, because it will always return the same results for
+ % the same inputs (the mutable in Src cannot be accessed outside
+ % of the promise_pure scope below).
+ promise_pure(
+ new_src_and_ps(InputString, SkipWS, Src, PS0),
+ ( Parser(Src, Val, PS0, _) ->
+ Result = ok(Val)
+ ;
+ MutVar = Src ^ error_info,
+ semipure get_mutvar(MutVar, error_info(Offset, MaybeMsg)),
+ offset_to_line_number_and_position(src_to_line_numbers(Src),
+ Offset, Line, Col),
+ Result0 = error(MaybeMsg, Line, Col),
+ % We make parse/4 cc_multi because declaratively
+ % parse(Str, SkipWS, Parser, error(MaybeMsg, Line, Col)) is true
+ % for all MaybeMsg, Line and Col iff
+ % new_src_and_ps(Str, SkipWS, Src, PS0),
+ % Parser(Src, _, PS0, _)
+ % is false, but operationally MaybeMsg, Line and Col are
+ % restricted to one value each.
+ cc_multi_equal(Result0, Result)
+ )
+ ).
+
%-----------------------------------------------------------------------------%
new_src_and_ps(InputString, Src, PS) :-
@@ -303,8 +380,11 @@
%-----------------------------------------------------------------------------%
new_src_and_ps(InputString, SkipWS, Src, PS) :-
- Src = src(string.length(InputString), InputString, SkipWS),
- PS = 0.
+ promise_pure (
+ impure new_mutvar(error_info(0, no), MutVar),
+ Src = src(string.length(InputString), InputString, SkipWS, MutVar),
+ PS = 0
+ ).
%-----------------------------------------------------------------------------%
@@ -397,10 +477,13 @@
%-----------------------------------------------------------------------------%
next_char(Src, Char, !PS) :-
- current_offset(Src, Offset, !PS),
- Offset < Src ^ input_length,
- Char = Src ^ input_string ^ unsafe_elem(Offset),
- !:PS = !.PS + 1.
+ promise_pure (
+ current_offset(Src, Offset, !PS),
+ Offset < Src ^ input_length,
+ Char = Src ^ input_string ^ unsafe_elem(Offset),
+ impure record_progress(Src, Offset),
+ !:PS = !.PS + 1
+ ).
%-----------------------------------------------------------------------------%
@@ -411,9 +494,12 @@
%-----------------------------------------------------------------------------%
input_substring(Src, Start, EndPlusOne, Substring) :-
- EndPlusOne =< Src ^ input_length,
- Substring =
- unsafe_substring(Src ^ input_string, Start, EndPlusOne - Start).
+ promise_pure (
+ EndPlusOne =< Src ^ input_length,
+ Substring =
+ unsafe_substring(Src ^ input_string, Start, EndPlusOne - Start),
+ impure record_progress(Src, Start)
+ ).
%-----------------------------------------------------------------------------%
@@ -421,10 +507,12 @@
ps::in, ps::out) is semidet.
match_string(MatchStr, Src, PS, PS + N) :-
- N = string.length(MatchStr),
- PS + N =< Src ^ input_length,
- match_string_2(N, 0, MatchStr, PS, Src ^ input_string).
-
+ promise_pure (
+ impure record_progress(Src, PS),
+ N = string.length(MatchStr),
+ PS + N =< Src ^ input_length,
+ match_string_2(N, 0, MatchStr, PS, Src ^ input_string)
+ ).
:- pred match_string_2(int::in, int::in, string::in, int::in, string::in)
is semidet.
@@ -437,6 +525,30 @@
true
).
+:- pred imatch_string(string::in, src::in,
+ ps::in, ps::out) is semidet.
+
+imatch_string(MatchStr, Src, PS, PS + N) :-
+ promise_pure (
+ impure record_progress(Src, PS),
+ N = string.length(MatchStr),
+ PS + N =< Src ^ input_length,
+ imatch_string_2(N, 0, MatchStr, PS, Src ^ input_string)
+ ).
+
+:- pred imatch_string_2(int::in, int::in, string::in, int::in, string::in)
+ is semidet.
+
+imatch_string_2(N, I, MatchStr, Offset, Str) :-
+ ( if I < N then
+ char.to_upper(MatchStr ^ unsafe_elem(I), Chr1),
+ char.to_upper(Str ^ unsafe_elem(Offset + I), Chr2),
+ Chr1 = Chr2,
+ imatch_string_2(N, I + 1, MatchStr, Offset, Str)
+ else
+ true
+ ).
+
%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%
% Utility predicates.
@@ -565,6 +677,25 @@
%-----------------------------------------------------------------------------%
+fail_with_message(Msg, Src, Val, !PS) :-
+ % This is pure, because the mutable can only be accessed via
+ % the parse/4 predicate which will always return the same results
+ % for the same inputs.
+ promise_pure (
+ Mutvar = Src ^ error_info,
+ impure set_mutvar(Mutvar, error_info(!.PS, yes(Msg))),
+ ( semidet_fail ->
+ dynamic_cast(0, Val) % unreachable
+ ;
+ fail
+ )
+ ).
+
+fail_with_message(Msg, Offset, Src, Val, !PS) :-
+ fail_with_message(Msg, Src, Val, Offset, _).
+
+%-----------------------------------------------------------------------------%
+
whitespace(Src, unit, !PS) :-
( if
next_char(Src, C, !PS),
@@ -594,6 +725,11 @@
not char_in_class(IdChars, Src, _, !.PS, _),
skip_whitespace(Src, !PS).
+ikeyword(IdChars, Keyword, Src, unit, !PS) :-
+ imatch_string(Keyword, Src, !PS),
+ not char_in_class(IdChars, Src, _, !.PS, _),
+ skip_whitespace(Src, !PS).
+
%-----------------------------------------------------------------------------%
float_literal_as_string(Src, FloatStr, !PS) :-
@@ -713,4 +849,17 @@
).
%-----------------------------------------------------------------------------%
+
+:- impure pred record_progress(src::in, ps::in) is det.
+
+record_progress(Src, PS) :-
+ MutVar = Src ^ error_info,
+ semipure get_mutvar(MutVar, error_info(PS0, _)),
+ ( PS > PS0 ->
+ impure set_mutvar(MutVar, error_info(PS, no))
+ ;
+ true
+ ).
+
+%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%
Index: library/store.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/store.m,v
retrieving revision 1.66
diff -u -r1.66 store.m
--- library/store.m 17 Jun 2009 07:48:16 -0000 1.66
+++ library/store.m 29 Sep 2009 12:43:33 -0000
@@ -111,6 +111,16 @@
generic_mutvar(T, S)::out, S::di, S::uo) is det <= store(S).
%-----------------------------------------------------------------------------%
+% Impure manipulation of mutvars.
+%
+
+:- impure pred new_mutvar(T::in, generic_mutvar(T, S)::out) is det.
+
+:- impure pred set_mutvar(generic_mutvar(T, S)::in, T::in) is det.
+
+:- semipure pred get_mutvar(generic_mutvar(T, S)::in, T::out) is det.
+
+%-----------------------------------------------------------------------------%
%
% References
%
@@ -315,52 +325,67 @@
%
% I wonder whether it is worth it? Hmm, probably not.
+new_mutvar(Val, Mutvar, S0, S) :-
+ promise_pure (
+ impure new_mutvar(Val, Mutvar),
+ S = S0
+ ).
+
:- pragma foreign_proc("C",
- new_mutvar(Val::in, Mutvar::out, S0::di, S::uo),
- [will_not_call_mercury, promise_pure, will_not_modify_trail],
+ new_mutvar(Val::in, Mutvar::out),
+ [will_not_call_mercury, will_not_modify_trail],
"
MR_offset_incr_hp_msg(Mutvar, MR_SIZE_SLOT_SIZE, MR_SIZE_SLOT_SIZE + 1,
MR_PROC_LABEL, ""store.mutvar/2"");
MR_define_size_slot(0, Mutvar, 1);
* (MR_Word *) Mutvar = Val;
- S = S0;
").
+get_mutvar(Mutvar, Val, S0, S) :-
+ promise_pure (
+ semipure get_mutvar(Mutvar, Val),
+ S = S0
+ ).
+
:- pragma foreign_proc("C",
- get_mutvar(Mutvar::in, Val::out, S0::di, S::uo),
- [will_not_call_mercury, promise_pure, will_not_modify_trail],
+ get_mutvar(Mutvar::in, Val::out),
+ [will_not_call_mercury, promise_semipure, will_not_modify_trail],
"
Val = * (MR_Word *) Mutvar;
- S = S0;
").
+set_mutvar(Mutvar, Val, S0, S) :-
+ promise_pure (
+ impure set_mutvar(Mutvar, Val),
+ S = S0
+ ).
+
:- pragma foreign_proc("C",
- set_mutvar(Mutvar::in, Val::in, S0::di, S::uo),
- [will_not_call_mercury, promise_pure, will_not_modify_trail],
+ set_mutvar(Mutvar::in, Val::in),
+ [will_not_call_mercury, will_not_modify_trail],
"
* (MR_Word *) Mutvar = Val;
- S = S0;
").
:- pragma foreign_type(java, generic_mutvar(T, S), "mutvar.Mutvar").
:- pragma foreign_proc("Java",
- new_mutvar(Val::in, Mutvar::out, _S0::di, _S::uo),
- [will_not_call_mercury, promise_pure],
+ new_mutvar(Val::in, Mutvar::out),
+ [will_not_call_mercury],
"
Mutvar = new mutvar.Mutvar(Val);
").
:- pragma foreign_proc("Java",
- get_mutvar(Mutvar::in, Val::out, _S0::di, _S::uo),
- [will_not_call_mercury, promise_pure],
+ get_mutvar(Mutvar::in, Val::out),
+ [will_not_call_mercury],
"
Val = Mutvar.object;
").
:- pragma foreign_proc("Java",
- set_mutvar(Mutvar::in, Val::in, _S0::di, _S::uo),
- [will_not_call_mercury, promise_pure],
+ set_mutvar(Mutvar::in, Val::in),
+ [will_not_call_mercury],
"
Mutvar.object = Val;
").
@@ -371,28 +396,25 @@
:- pragma foreign_type("Erlang", generic_mutvar(T, S), "").
:- pragma foreign_proc("Erlang",
- new_mutvar(Val::in, Mutvar::out, S0::di, S::uo),
- [will_not_call_mercury, promise_pure],
+ new_mutvar(Val::in, Mutvar::out),
+ [will_not_call_mercury],
"
Mutvar = ets:new(mutvar, [set, public]),
- ets:insert(Mutvar, {value, Val}),
- S = S0
+ ets:insert(Mutvar, {value, Val})
").
:- pragma foreign_proc("Erlang",
- get_mutvar(Mutvar::in, Val::out, S0::di, S::uo),
- [will_not_call_mercury, promise_pure],
+ get_mutvar(Mutvar::in, Val::out),
+ [will_not_call_mercury],
"
- [{value, Val}] = ets:lookup(Mutvar, value),
- S = S0
+ [{value, Val}] = ets:lookup(Mutvar, value)
").
:- pragma foreign_proc("Erlang",
- set_mutvar(Mutvar::in, Val::in, S0::di, S::uo),
- [will_not_call_mercury, promise_pure],
+ set_mutvar(Mutvar::in, Val::in),
+ [will_not_call_mercury],
"
- ets:insert(Mutvar, {value, Val}),
- S = S0
+ ets:insert(Mutvar, {value, Val})
").
copy_mutvar(Mutvar, Copy, !S) :-
Index: tests/general/test_parsing_utils.exp
===================================================================
RCS file: /home/mercury1/repository/tests/general/test_parsing_utils.exp,v
retrieving revision 1.3
diff -u -r1.3 test_parsing_utils.exp
--- tests/general/test_parsing_utils.exp 18 Aug 2009 06:45:16 -0000 1.3
+++ tests/general/test_parsing_utils.exp 29 Sep 2009 12:43:44 -0000
@@ -240,3 +240,18 @@
Line = 2, Pos = 10
Line = 3, Pos = 1
Line = 1, Pos = 1
+expecting an operator
+12 + x-pow(x + 3; y)
+ ^
+syntax error
+abs(x ++ 3)
+ ^
+expecting an operator
+abs (x))
+ ^
+unknown function: f
+1 + 3 MoD 2 + f(3 + x)
+ ^
+expecting an operator
+1 + 1x
+ ^
Index: tests/general/test_parsing_utils.m
===================================================================
RCS file: /home/mercury1/repository/tests/general/test_parsing_utils.m,v
retrieving revision 1.3
diff -u -r1.3 test_parsing_utils.m
--- tests/general/test_parsing_utils.m 18 Aug 2009 06:45:16 -0000 1.3
+++ tests/general/test_parsing_utils.m 29 Sep 2009 12:43:44 -0000
@@ -21,6 +21,7 @@
:- implementation.
+:- import_module int.
:- import_module list.
:- import_module maybe.
:- import_module parsing_utils.
@@ -40,7 +41,13 @@
test_pos("123456789", 0, !IO),
test_pos("123456789\n123456789\n\n", 19, !IO),
test_pos("123456789\n123456789\n\n", 20, !IO),
- test_pos("", 0, !IO).
+ test_pos("", 0, !IO),
+ test_err("12 + x-pow(x + 3; y)", expr_top, !IO),
+ test_err("abs(x ++ 3)", expr_top, !IO),
+ test_err("abs (x))", expr_top, !IO),
+ test_err("1 + 3 MoD 2 + f(3 + x)", expr_top, !IO),
+ test_err("1 + 1x", expr_top, !IO),
+ true.
%-----------------------------------------------------------------------------%
@@ -377,4 +384,109 @@
String = string.string(State).
%-----------------------------------------------------------------------------%
+
+:- pred test_err(string::in, parser(expr)::in(parser), io::di, io::uo)
+ is cc_multi.
+
+test_err(Input, Parser, !IO) :-
+ parse(Input, Parser, Result),
+ (
+ Result = ok(Expr),
+ io.write(Expr, !IO),
+ io.nl(!IO)
+ ;
+ Result = error(MaybeMsg, LineNo, Col),
+ Lines = string.words_separator(unify('\n'), Input),
+ Line= list.det_index1(Lines, LineNo),
+ Spaces = string.from_char_list(list.duplicate(Col - 1, ' ')),
+ (
+ MaybeMsg = yes(Msg),
+ io.write_string(Msg ++ "\n", !IO)
+ ;
+ MaybeMsg = no,
+ io.write_string("syntax error\n", !IO)
+ ),
+ io.write_string(Line ++ "\n", !IO),
+ io.write_string(Spaces ++ "^\n", !IO)
+ ).
+
+:- type expr
+ ---> op(op, expr, expr)
+ ; function_application(string, list(expr))
+ ; integer(int)
+ ; variable(string).
+
+:- type op
+ ---> plus
+ ; minus
+ ; modulo.
+
+:- pred expr_top(src::in, expr::out, ps::in, ps::out) is semidet.
+
+expr_top(Src, Expr) -->
+ expr(Src, Expr),
+ eof(Src, _).
+
+:- pred expr(src::in, expr::out, ps::in, ps::out) is semidet.
+
+expr(Src, Expr) -->
+ term(Src, Term1),
+ ( op(Src, Op) ->
+ expr(Src, Expr2),
+ { Expr = op(Op, Term1, Expr2) }
+ ;
+ { Expr = Term1 }
+ ).
+
+:- pred term(src::in, expr::out, ps::in, ps::out) is semidet.
+
+term(Src, Term) -->
+ current_offset(Src, Start),
+ ( int_literal(Src, Int) ->
+ { Term = integer(Int) }
+ ;
+ id(Src, Id)
+ ->
+ ( punct("(", Src, _) ->
+ ( { known_function(Id) } ->
+ comma_separated_list(expr, Src, Args),
+ punct(")", Src, _),
+ { Term = function_application(Id, Args) }
+ ;
+ fail_with_message("unknown function: " ++ Id, Start, Src, Term)
+ )
+ ;
+ { Term = variable(Id) }
+ )
+ ;
+ { fail }
+ ).
+
+:- pred known_function(string::in) is semidet.
+
+known_function("abs").
+known_function("pow").
+
+:- pred op(src::in, op::out, ps::in, ps::out) is semidet.
+
+op(Src, Op) -->
+ ( punct("+", Src, _) ->
+ { Op = plus }
+ ; punct("-", Src, _) ->
+ { Op = minus }
+ ; ikeyword(id_chars, "mod", Src, _) ->
+ { Op = modulo }
+ ;
+ fail_with_message("expecting an operator", Src, Op)
+ ).
+
+:- pred id(src::in, string::out, ps::in, ps::out) is semidet.
+
+id(Src, Id) -->
+ identifier(id_chars, id_chars ++ "0123456789", Src, Id).
+
+:- func id_chars = string.
+
+id_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_".
+
%-----------------------------------------------------------------------------%
--------------------------------------------------------------------------
mercury-reviews mailing list
Post messages to: mercury-reviews at csse.unimelb.edu.au
Administrative Queries: owner-mercury-reviews at csse.unimelb.edu.au
Subscriptions: mercury-reviews-request at csse.unimelb.edu.au
--------------------------------------------------------------------------
More information about the reviews
mailing list