[m-rev.] For review: parsing_utils.m
Ralph Becket
rafe at csse.unimelb.edu.au
Wed Jan 28 11:44:07 AEDT 2009
Add parsing_utils.m to the library, providing support for recursive descent
parsers.
NEWS:
Report the addition of parsing_utils.m to the library.
library/library.m:
Include parsing_utils.m.
library/parsing_utils.m:
Added.
library/string.m:
Make string.to_int fail on overflow. Amend comments to reflect this.
tests/general/Mmakefile:
tests/general/test_parsing_utils.exp:
tests/general/test_parsing_utils.m:
Test case for parsing_utils.m.
Index: NEWS
===================================================================
RCS file: /home/mercury1/repository/mercury/NEWS,v
retrieving revision 1.499
diff -u -r1.499 NEWS
--- NEWS 12 Jan 2009 02:28:45 -0000 1.499
+++ NEWS 28 Jan 2009 00:14:17 -0000
@@ -30,6 +30,9 @@
Changes to the Mercury standard library:
+* A new module, parsing_utils, has been added to provide support for
+ implementing recursive descent parsers.
+
* We have added extra modes to many of the fold style predicates in the
library in order to better support (mostly-)unique accumulators.
Index: library/library.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/library.m,v
retrieving revision 1.120
diff -u -r1.120 library.m
--- library/library.m 9 Mar 2008 09:39:24 -0000 1.120
+++ library/library.m 27 Jan 2009 23:44:31 -0000
@@ -89,6 +89,7 @@
:- import_module ops.
:- import_module pair.
:- import_module parser.
+:- import_module parsing_utils.
:- import_module pprint.
:- import_module pqueue.
:- import_module pretty_printer.
Index: library/parsing_utils.m
===================================================================
RCS file: library/parsing_utils.m
diff -N library/parsing_utils.m
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ library/parsing_utils.m 28 Jan 2009 00:16:17 -0000
@@ -0,0 +1,483 @@
+%---------------------------------------------------------------------------%
+% vim: ft=mercury ts=4 sw=4 et wm=0 tw=0
+%---------------------------------------------------------------------------%
+% Copyright (C) 2009 The University of Melbourne.
+% This file may only be copied under the terms of the GNU Library General
+% Public License - see the file COPYING.LIB in the Mercury distribution.
+%---------------------------------------------------------------------------%
+%
+% File: parsing_utils.m
+% Author: Ralph Becket <rafe at csse.unimelb.edu.au>
+% Stability: low
+%
+% Utilities for recursive descent parsers. Parsers take at least three
+% arguments: a source (src) containing the input string and a parser
+% state (ps) input/output pair tracking the current offset into the input.
+%
+% A new src and ps can be constructed by calling
+% new_src_and_ps(InputString, Src, !:PS). Parsing predicates are semidet
+% and typically take the form p(...input arguments..., Src, Result, !PS).
+% A parser matching variable assignments of the form `x = 42' might be
+% defined like this:
+%
+% var_assignment(Src, {Var, Value}, !PS) :-
+% var(Src, Var, !PS),
+% punct(Src, "=", !PS),
+% expr(Src, Expr, !PS).
+%
+% where var/4 and expr/4 are parsers for variables and expressions
+% respectively and punct/4 is provided by this module for matching
+% punctuation.
+%
+%-----------------------------------------------------------------------------%
+%-----------------------------------------------------------------------------%
+
+:- module parsing_utils.
+
+:- interface.
+
+:- import_module char.
+:- import_module float.
+:- import_module int.
+:- import_module list.
+:- import_module maybe.
+:- import_module string.
+:- import_module unit.
+
+
+
+ % The parser source (input string).
+ %
+:- type src.
+
+ % The parser "state", passed around in DCG arguments.
+ %
+:- type ps.
+
+ % This type and inst are useful for specifying "standard" parser
+ % signatures.
+ %
+:- type parser(T) == pred(src, T, ps, ps).
+:- inst parser == ( pred(in, out, in, out) is semidet ).
+
+ % Construct a new parser source and state from a string.
+ %
+:- pred new_src_and_ps(string::in, src::out, ps::out) is det.
+
+ % Obtain the current offset from the start of the input string
+ % (the first character in the input has offset 0).
+ %
+:- pred current_offset(src::in, int::out,
+ ps::in, ps::out) is det.
+
+ % input_substring(Src, StartOffset, EndOffsetPlusOne, Substring)
+ % Copy the substring from the input occupying the offsets
+ % [StartOffset, EndOffsetPlusOne).
+ %
+:- pred input_substring(src::in, int::in, int::in, string::out) is semidet.
+
+ % Read the next char.
+ %
+:- pred next_char(src::in, char::out,
+ ps::in, ps::out) is semidet.
+
+ % Match a char from the given string.
+ %
+:- pred char_in_class(string::in, src::in, char::out,
+ ps::in, ps::out) is semidet.
+
+ % Match a string exactly and any subsequent whitespace.
+ %
+:- pred punct(string::in, src::in, unit::out,
+ ps::in, ps::out) is semidet.
+
+ % keyword(Src, IdChars, Keyword, _, !PS) matches Keyword exactly (i.e., it
+ % must not be followed by any character in IdChars) and any subsequent
+ % whitespace.
+ %
+:- pred keyword(string::in, string::in, src::in, unit::out,
+ ps::in, ps::out) is semidet.
+
+ % identifier(Src, InitIdChars, IdChars, Identifier, !PS) matches the next
+ % identifer (result in Identifier) comprising a char from InitIdChars
+ % followed by zero or more chars from IdChars. Any subsequent whitespace
+ % is consumed.
+ %
+:- pred identifier(string::in, string::in, src::in, string::out,
+ ps::in, ps::out) is semidet.
+
+ % Consume any whitespace.
+ %
+:- pred whitespace(src::in, unit::out,
+ ps::in, ps::out) is semidet.
+
+ % Consume any input up to, and including, the next newline character
+ % marking the end of the current line.
+ %
+:- pred skip_to_eol(src::in, unit::out,
+ ps::in, ps::out) is semidet.
+
+ % Succeed if we have reached the end of the input.
+ %
+:- pred eof(src::in, unit::out,
+ ps::in, ps::out) is semidet.
+
+ % Parse a float literal matching [-][0-9]+[.][0-9]+([Ee][-][0-9]+)?
+ % followed by any whitespace. The float_literal_as_string version simply
+ % returns the matched string. The float_literal version uses
+ % string.to_float to convert the output of float_literal_as_string; this
+ % may return an approximate answer since not all floating point numbers
+ % can be perfectly represented as Mercury floats.
+ %
+:- pred float_literal_as_string(src::in, string::out,
+ ps::in, ps::out) is semidet.
+:- pred float_literal(src::in, float::out,
+ ps::in, ps::out) is semidet.
+
+ % Parse an int literal matching [-][0-9]+, not followed by [.][0-9]+,
+ % followed by any whitespace. The int_literal_as_string version simply
+ % returns the matched string. The int_literal version uses string.to_int
+ % to convert the output of int_literal_as_string; this may fail if the
+ % number in question cannot be represented as a Mercury int.
+ %
+:- pred int_literal_as_string(src::in, string::out,
+ ps::in, ps::out) is semidet.
+:- pred int_literal(src::in, int::out,
+ ps::in, ps::out) is semidet.
+
+ % Parse an string literal. The string argument is the quote character.
+ % A backslash (\) character in the string makes the next character
+ % literal (e.g., for embedding quotes). These 'escaped' characters
+ % are included as-is in the result, along with the preceding backslash.
+ % Any following whitespace is also consumed.
+ %
+:- pred string_literal(char::in, src::in, string::out,
+ ps::in, ps::out) is semidet.
+
+ % optional(P, Src, Result, !PS) returns Result = yes(X), if P(Src, X, !PS),
+ % or Result = no if P does not succeed.
+ %
+:- pred optional(parser(T)::in(parser), src::in, maybe(T)::out,
+ ps::in, ps::out) is semidet.
+
+ % zero_or_more(P, Src, Xs, !PS) returns the list of results Xs obtained
+ % by repeatedly applying P until P fails. The nth item in Xs is
+ % the result from the nth application of P.
+ %
+:- pred zero_or_more(parser(T)::in(parser), src::in, list(T)::out,
+ ps::in, ps::out) is semidet.
+
+ % one_or_more(P, Src, Xs, !PS) returns the list of results Xs obtained
+ % by repeatedly applying P until P fails. The nth item in Xs is
+ % the result from the nth application of P. P must succeed at
+ % least once.
+ %
+:- pred one_or_more(parser(T)::in(parser), src::in, list(T)::out,
+ ps::in, ps::out) is semidet.
+
+ % brackets(L, R, P, Src, X, !PS) is equivalent to
+ % punct(L, Src, _, !PS), P(Src, X, !PS), punct(R, Src, _, !PS).
+ %
+:- pred brackets(string::in, string::in, parser(T)::in(parser), src::in,
+ T::out, ps::in, ps::out) is semidet.
+
+ % separated_list(Separator, P, Src, Xs, !PS) is like
+ % zero_or_more(P, Src, Xs, !PS) except that successive applications of
+ % P must be separated by punct(Separator, Src, _, !PS).
+ %
+:- pred separated_list(string::in, parser(T)::in(parser), src::in,
+ list(T)::out, ps::in, ps::out) is semidet.
+
+ % comma_separated_list(P, Src, Xs) is the same as
+ % separated_list(",", P, Src, Xs).
+ %
+:- pred comma_separated_list(parser(T)::in(parser), src::in, list(T)::out,
+ ps::in, ps::out) is semidet.
+
+%-----------------------------------------------------------------------------%
+%-----------------------------------------------------------------------------%
+
+:- implementation.
+
+
+
+ % The parser "state" is just the offset into the input string.
+ %
+:- type ps == int.
+
+:- type src
+ ---> src(
+ input_length :: int,
+ input_string :: string
+ ).
+
+%-----------------------------------------------------------------------------%
+
+new_src_and_ps(InputString, Src, PS) :-
+ Src = src(string.length(InputString), InputString),
+ PS = 0.
+
+%-----------------------------------------------------------------------------%
+%-----------------------------------------------------------------------------%
+% Low-level predicates.
+%-----------------------------------------------------------------------------%
+%-----------------------------------------------------------------------------%
+
+current_offset(_Src, Offset, !PS) :-
+ Offset = !.PS.
+
+%-----------------------------------------------------------------------------%
+
+eof(Src, unit, !PS) :-
+ current_offset(Src, Offset, !PS),
+ Offset = Src ^ input_length.
+
+%-----------------------------------------------------------------------------%
+
+next_char(Src, Char, !PS) :-
+ current_offset(Src, Offset, !PS),
+ Offset < Src ^ input_length,
+ Char = Src ^ input_string ^ unsafe_elem(Offset),
+ !:PS = !.PS + 1.
+
+%-----------------------------------------------------------------------------%
+
+char_in_class(CharClass, Src, Char, !PS) :-
+ next_char(Src, Char, !PS),
+ string.contains_char(CharClass, Char).
+
+%-----------------------------------------------------------------------------%
+
+input_substring(Src, Start, EndPlusOne, Substring) :-
+ EndPlusOne =< Src ^ input_length,
+ Substring =
+ unsafe_substring(Src ^ input_string, Start, EndPlusOne - Start).
+
+%-----------------------------------------------------------------------------%
+
+:- pred match_string(string::in, src::in,
+ ps::in, ps::out) is semidet.
+
+match_string(MatchStr, Src, PS, PS + N) :-
+ N = string.length(MatchStr),
+ PS + N =< Src ^ input_length,
+ match_string_2(N, 0, MatchStr, PS, Src ^ input_string).
+
+
+:- pred match_string_2(int::in, int::in, string::in, int::in, string::in)
+ is semidet.
+
+match_string_2(N, I, MatchStr, Offset, Str) :-
+ ( if I < N then
+ MatchStr ^ unsafe_elem(I) = Str ^ unsafe_elem(Offset + I),
+ match_string_2(N, I + 1, MatchStr, Offset, Str)
+ else
+ true
+ ).
+
+%-----------------------------------------------------------------------------%
+%-----------------------------------------------------------------------------%
+% Utility predicates.
+%-----------------------------------------------------------------------------%
+%-----------------------------------------------------------------------------%
+
+optional(P, Src, Result, !PS) :-
+ ( if P(Src, X, !PS) then
+ Result = yes(X)
+ else
+ Result = no,
+ semidet_true
+ ).
+
+%-----------------------------------------------------------------------------%
+
+zero_or_more(P, Src, Result, !PS) :-
+ ( if P(Src, X, !PS), zero_or_more(P, Src, Xs, !PS) then
+ Result = [X | Xs]
+ else
+ Result = [],
+ semidet_true
+ ).
+
+%-----------------------------------------------------------------------------%
+
+one_or_more(P, Src, Result, !PS) :-
+ P(Src, X, !PS),
+ zero_or_more(P, Src, Xs, !PS),
+ Result = [X | Xs].
+
+%-----------------------------------------------------------------------------%
+
+brackets(L, R, P, Src, Result, !PS) :-
+ punct(L, Src, _, !PS),
+ P(Src, Result, !PS),
+ punct(R, Src, _, !PS).
+
+%-----------------------------------------------------------------------------%
+
+separated_list(Separator, P, Src, Result, !PS) :-
+ CommaP = ( pred(CommaPSrc::in, CommaPX::out, !.PS::in, !:PS::out)
+ is semidet :-
+ punct(Separator, CommaPSrc, _, !PS),
+ P(CommaPSrc, CommaPX, !PS)
+ ),
+ P(Src, X, !PS),
+ zero_or_more(CommaP, Src, Xs, !PS),
+ Result = [X | Xs].
+
+%-----------------------------------------------------------------------------%
+
+comma_separated_list(P, Src, Result, !PS) :-
+ separated_list(",", P, Src, Result, !PS).
+
+%-----------------------------------------------------------------------------%
+
+whitespace(Src, unit, !PS) :-
+ ( if
+ next_char(Src, C, !PS),
+ char.is_whitespace(C)
+ then
+ whitespace(Src, _, !PS)
+ else
+ true
+ ).
+
+%-----------------------------------------------------------------------------%
+
+skip_to_eol(Src, unit, !PS) :-
+ next_char(Src, C, !PS),
+ ( if C = ('\n') then true else skip_to_eol(Src, _, !PS) ).
+
+%-----------------------------------------------------------------------------%
+
+punct(Punct, Src, unit, !PS) :-
+ match_string(Punct, Src, !PS),
+ whitespace(Src, _, !PS).
+
+%---------------------------------------------------------------------------%
+
+keyword(IdChars, Keyword, Src, unit, !PS) :-
+ match_string(Keyword, Src, !PS),
+ not char_in_class(IdChars, Src, _, !.PS, _),
+ whitespace(Src, _, !PS).
+
+%-----------------------------------------------------------------------------%
+
+float_literal_as_string(Src, FloatStr, !PS) :-
+ current_offset(Src, Start, !PS),
+ ( if next_char(Src, ('-'), !PS) then true else true ),
+ digits(10, Src, _, !PS),
+ next_char(Src, ('.'), !PS),
+ digits(10, Src, _, !PS),
+ ( if char_in_class("eE", Src, _, !PS) then
+ ( if next_char(Src, ('-'), !PS) then true else true ),
+ digits(10, Src, _, !PS)
+ else
+ true
+ ),
+ current_offset(Src, EndPlusOne, !PS),
+ whitespace(Src, _, !PS),
+ input_substring(Src, Start, EndPlusOne, FloatStr).
+
+%-----------------------------------------------------------------------------%
+
+float_literal(Src, Float, !PS) :-
+ float_literal_as_string(Src, FloatStr, !PS),
+ string.to_float(FloatStr, Float).
+
+%-----------------------------------------------------------------------------%
+
+int_literal_as_string(Src, IntStr, !PS) :-
+ current_offset(Src, Start, !PS),
+ optional(char_in_class("-"), Src, _, !PS),
+ digits(10, Src, _, !PS),
+ not (
+ next_char(Src, ('.'), !PS),
+ digits(10, Src, _, !.PS, _)
+ ),
+ current_offset(Src, EndPlusOne, !PS),
+ whitespace(Src, _, !PS),
+ input_substring(Src, Start, EndPlusOne, IntStr).
+
+%-----------------------------------------------------------------------------%
+
+int_literal(Src, Int, !PS) :-
+ int_literal_as_string(Src, IntStr, !PS),
+ string.to_int(IntStr, Int).
+
+%-----------------------------------------------------------------------------%
+
+:- pred digits(int::in, src::in, unit::out,
+ ps::in, ps::out) is semidet.
+
+digits(Base, Src, unit, !PS) :-
+ next_char(Src, C, !PS),
+ char.digit_to_int(C, D),
+ D < Base,
+ digits_2(Base, Src, _, !PS).
+
+
+:- pred digits_2(int::in, src::in, unit::out,
+ ps::in, ps::out) is semidet.
+
+digits_2(Base, Src, unit, !PS) :-
+ ( if
+ next_char(Src, C, !PS),
+ char.digit_to_int(C, D),
+ D < Base
+ then
+ digits_2(Base, Src, _, !PS)
+ else
+ true
+ ).
+
+%-----------------------------------------------------------------------------%
+
+string_literal(QuoteChar, Src, String, !PS) :-
+ current_offset(Src, Start, !PS),
+ next_char(Src, QuoteChar, !PS),
+ string_literal_2(Src, QuoteChar, _, !PS),
+ current_offset(Src, EndPlusOne, !PS),
+ whitespace(Src, _, !PS),
+ input_substring(Src, Start + 1, EndPlusOne - 1, String).
+
+%-----------------------------------------------------------------------------%
+
+:- pred string_literal_2(src::in, char::in, unit::out,
+ ps::in, ps::out) is semidet.
+
+string_literal_2(Src, QuoteChar, unit, !PS) :-
+ next_char(Src, C, !PS),
+ ( if C = QuoteChar then
+ true
+ else if C = ('\\') then
+ next_char(Src, _, !PS),
+ string_literal_2(Src, QuoteChar, _, !PS)
+ else
+ string_literal_2(Src, QuoteChar, _, !PS)
+ ).
+
+%-----------------------------------------------------------------------------%
+
+identifier(InitIdChars, IdChars, Src, Identifier, !PS) :-
+ current_offset(Src, Start, !PS),
+ char_in_class(InitIdChars, Src, _, !PS),
+ identifier_2(IdChars, Src, _, !PS),
+ current_offset(Src, EndPlusOne, !PS),
+ whitespace(Src, _, !PS),
+ input_substring(Src, Start, EndPlusOne, Identifier).
+
+%-----------------------------------------------------------------------------%
+
+:- pred identifier_2(string::in, src::in, unit::out,
+ ps::in, ps::out) is semidet.
+
+identifier_2(IdChars, Src, unit, !PS) :-
+ ( if char_in_class(IdChars, Src, _, !PS) then
+ identifier_2(IdChars, Src, _, !PS)
+ else
+ true
+ ).
+
+%-----------------------------------------------------------------------------%
+%-----------------------------------------------------------------------------%
Index: library/string.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/string.m,v
retrieving revision 1.276
diff -u -r1.276 string.m
--- library/string.m 3 Sep 2008 04:39:16 -0000 1.276
+++ library/string.m 28 Jan 2009 00:32:37 -0000
@@ -306,6 +306,7 @@
% As above, but fail instead of throwing an exception if the
% list contains a null character.
+ %
:- pred string.semidet_from_char_list(list(char)::in, string::uo) is semidet.
% Same as string.from_char_list, except that it reverses the order
@@ -323,12 +324,14 @@
% Converts a signed base 10 string to an int; throws an exception
% if the string argument does not match the regexp [+-]?[0-9]+
+ % or the number is not in the range [int.min_int+1, int.max_int].
%
:- func string.det_to_int(string) = int.
% Convert a string to an int. The string must contain only digits,
% optionally preceded by a plus or minus sign. If the string does
- % not match this syntax, string.to_int fails.
+ % not match this syntax or the number is not in the range
+ % [int.min_int+1, int.max_int], string.to_int fails.
%
:- pred string.to_int(string::in, int::out) is semidet.
@@ -336,13 +339,15 @@
% must contain one or more digits in the specified base, optionally
% preceded by a plus or minus sign. For bases > 10, digits 10 to 35
% are represented by the letters A-Z or a-z. If the string does not match
- % this syntax, the predicate fails.
+ % this syntax or the number is not in the range
+ % [int.min_int+1, int.max_int], the predicate fails.
%
:- pred string.base_string_to_int(int::in, string::in, int::out) is semidet.
% Converts a signed base N string to an int; throws an exception
% if the string argument is not precisely an optional sign followed by
- % a non-empty string of base N digits.
+ % a non-empty string of base N digits and the number is in the range
+ % [int.min_int+1, int.max_int].
%
:- func string.det_base_string_to_int(int, string) = int.
@@ -944,9 +949,11 @@
:- pred accumulate_int(int::in, char::in, int::in, int::out) is semidet.
-accumulate_int(Base, Char, N, (Base * N) + M) :-
+accumulate_int(Base, Char, N0, N) :-
char.digit_to_int(Char, M),
- M < Base.
+ M < Base,
+ N = (Base * N0) + M,
+ N0 =< N. % Fail on overflow.
% It is important to inline string.index and string.index_det.
% so that the compiler can do loop invariant hoisting
Index: tests/general/Mmakefile
===================================================================
RCS file: /home/mercury1/repository/tests/general/Mmakefile,v
retrieving revision 1.56
diff -u -r1.56 Mmakefile
--- tests/general/Mmakefile 1 Dec 2008 00:32:54 -0000 1.56
+++ tests/general/Mmakefile 27 Jan 2009 23:41:10 -0000
@@ -67,6 +67,7 @@
string_test \
string_test_2 \
string_to_float \
+ test_parsing_utils \
test_univ \
unreachable \
unsafe_uniqueness
Index: tests/general/test_parsing_utils.exp
===================================================================
RCS file: tests/general/test_parsing_utils.exp
diff -N tests/general/test_parsing_utils.exp
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ tests/general/test_parsing_utils.exp 27 Jan 2009 23:40:38 -0000
@@ -0,0 +1,216 @@
+pass: next_char on ""
+ failed as expected
+pass: next_char on "123"
+ returned '1' as expected
+ [1 chars consumed]
+pass: char_in_class("123") on ""
+ failed as expected
+pass: char_in_class("123") on "abc"
+ failed as expected
+pass: char_in_class("123") on "123"
+ returned '1' as expected
+ [1 chars consumed]
+pass: punct("!") on ""
+ failed as expected
+pass: punct("!") on "abc"
+ failed as expected
+pass: punct("!") on "*"
+ failed as expected
+pass: punct("!") on "!"
+ returned unit as expected
+ [1 chars consumed]
+pass: keyword("ABC", "ABC") on ""
+ failed as expected
+pass: keyword("ABC", "ABC") on "123"
+ failed as expected
+pass: keyword("ABC", "ABC") on "ABCA"
+ failed as expected
+pass: keyword("ABC", "ABC") on "ABC 123"
+ returned unit as expected
+ [4 chars consumed]
+pass: identifier("ABC", "ABCabc_") on ""
+ failed as expected
+pass: identifier("ABC", "ABCabc_") on "abc"
+ failed as expected
+pass: identifier("ABC", "ABCabc_") on "_"
+ failed as expected
+pass: identifier("ABC", "ABCabc_") on "A"
+ returned "A" as expected
+ [1 chars consumed]
+pass: identifier("ABC", "ABCabc_") on "Ab_c"
+ returned "Ab_c" as expected
+ [4 chars consumed]
+pass: identifier("ABC", "ABCabc_") on "*"
+ failed as expected
+pass: identifier("ABC", "ABCabc_") on "Abc !"
+ returned "Abc" as expected
+ [4 chars consumed]
+pass: whitespace on ""
+ returned unit as expected
+ [0 chars consumed]
+pass: whitespace on "123"
+ returned unit as expected
+ [0 chars consumed]
+pass: whitespace on " "
+ returned unit as expected
+ [3 chars consumed]
+pass: whitespace on " 123"
+ returned unit as expected
+ [3 chars consumed]
+pass: skip_to_eol on ""
+ failed as expected
+pass: skip_to_eol on "blah blah
+"
+ returned unit as expected
+ [10 chars consumed]
+pass: skip_to_eol on "blah blah
+123"
+ returned unit as expected
+ [10 chars consumed]
+pass: eof on "123"
+ failed as expected
+pass: eof on ""
+ returned unit as expected
+ [0 chars consumed]
+pass: float_literal_as_string on ""
+ failed as expected
+pass: float_literal_as_string on "abc"
+ failed as expected
+pass: float_literal_as_string on "123"
+ failed as expected
+pass: float_literal_as_string on "123.0 abc"
+ returned "123.0" as expected
+ [8 chars consumed]
+pass: float_literal_as_string on "123.0e1 abc"
+ returned "123.0e1" as expected
+ [10 chars consumed]
+pass: float_literal_as_string on "-123.0 abc"
+ returned "-123.0" as expected
+ [9 chars consumed]
+pass: float_literal_as_string on "-123.0e1 abc"
+ returned "-123.0e1" as expected
+ [11 chars consumed]
+pass: float_literal_as_string on "-123.0E-1 abc"
+ returned "-123.0E-1" as expected
+ [12 chars consumed]
+pass: float_literal on ""
+ failed as expected
+pass: float_literal on "abc"
+ failed as expected
+pass: float_literal on "123"
+ failed as expected
+pass: float_literal on "123.0 abc"
+ returned 123.0 as expected
+ [8 chars consumed]
+pass: float_literal on "123.0e1 abc"
+ returned 1230.0 as expected
+ [10 chars consumed]
+pass: float_literal on "-123.0 abc"
+ returned -123.0 as expected
+ [9 chars consumed]
+pass: float_literal on "-123.0e1 abc"
+ returned -1230.0 as expected
+ [11 chars consumed]
+pass: float_literal on "-123.0E-1 abc"
+ returned -12.3 as expected
+ [12 chars consumed]
+pass: int_literal_as_string on ""
+ failed as expected
+pass: int_literal_as_string on "abc"
+ failed as expected
+pass: int_literal_as_string on "123.0"
+ failed as expected
+pass: int_literal_as_string on "123 abc"
+ returned "123" as expected
+ [6 chars consumed]
+pass: int_literal_as_string on "-123 abc"
+ returned "-123" as expected
+ [7 chars consumed]
+pass: int_literal_as_string on "999999999999999999999 abc"
+ returned "999999999999999999999" as expected
+ [24 chars consumed]
+pass: int_literal on ""
+ failed as expected
+pass: int_literal on "abc"
+ failed as expected
+pass: int_literal on "123.0"
+ failed as expected
+pass: int_literal on "123 abc"
+ returned 123 as expected
+ [6 chars consumed]
+pass: int_literal on "-123 abc"
+ returned -123 as expected
+ [7 chars consumed]
+fail: int_literal on "999999999999999999999 abc"
+ failed as expected
+pass: string_literal('\"') on ""
+ failed as expected
+pass: string_literal('\"') on ""123" abc"
+ returned "123" as expected
+ [8 chars consumed]
+pass: string_literal('\"') on ""1\"2\"3" abc"
+ returned "1\\\"2\\\"3" as expected
+ [12 chars consumed]
+pass: string_literal('\'') on ""
+ failed as expected
+pass: string_literal('\'') on "'123' abc"
+ returned "123" as expected
+ [8 chars consumed]
+pass: string_literal('\'') on "'1\'2\'3' abc"
+ returned "1\\\'2\\\'3" as expected
+ [12 chars consumed]
+pass: optional(punct("!")) on ""
+ returned no as expected
+ [0 chars consumed]
+pass: optional(punct("!")) on "abc"
+ returned no as expected
+ [0 chars consumed]
+pass: optional(punct("!")) on "! "
+ returned yes(unit) as expected
+ [4 chars consumed]
+pass: zero_or_more(punct("!")) on ""
+ returned [] as expected
+ [0 chars consumed]
+pass: zero_or_more(punct("!")) on "abc"
+ returned [] as expected
+ [0 chars consumed]
+pass: zero_or_more(punct("!")) on "!!! abc"
+ returned [unit, unit, unit] as expected
+ [6 chars consumed]
+pass: one_or_more(punct("!")) on ""
+ failed as expected
+pass: one_or_more(punct("!")) on "abc"
+ failed as expected
+pass: one_or_more(punct("!")) on "!!! abc"
+ returned [unit, unit, unit] as expected
+ [6 chars consumed]
+pass: brackets("(", ")", punct("!")) on ""
+ failed as expected
+pass: brackets("(", ")", punct("!")) on "abc"
+ failed as expected
+pass: brackets("(", ")", punct("!")) on "(abc)"
+ failed as expected
+pass: brackets("(", ")", punct("!")) on "(!) abc"
+ returned unit as expected
+ [6 chars consumed]
+pass: separated_list("+", punct("!")) on ""
+ failed as expected
+pass: separated_list("+", punct("!")) on "abc"
+ failed as expected
+pass: separated_list("+", punct("!")) on "! abc"
+ returned [unit] as expected
+ [4 chars consumed]
+pass: separated_list("+", punct("!")) on "!+ ! + ! abc"
+ returned [unit, unit, unit] as expected
+ [11 chars consumed]
+pass: comma_separated_list(punct("!")) on ""
+ failed as expected
+pass: comma_separated_list(punct("!")) on "abc"
+ failed as expected
+pass: comma_separated_list(punct("!")) on "! abc"
+ returned [unit] as expected
+ [4 chars consumed]
+pass: comma_separated_list(punct("!")) on "!, ! , ! abc"
+ returned [unit, unit, unit] as expected
+ [11 chars consumed]
Index: tests/general/test_parsing_utils.m
===================================================================
RCS file: tests/general/test_parsing_utils.m
diff -N tests/general/test_parsing_utils.m
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ tests/general/test_parsing_utils.m 27 Jan 2009 23:40:38 -0000
@@ -0,0 +1,313 @@
+%-----------------------------------------------------------------------------%
+% test_parsing_utils.m
+% Ralph Becket <rafe at csse.unimelb.edu.au>
+% Tue Jan 27 13:44:59 EST 2009
+% vim: ft=mercury ts=4 sw=4 et wm=0 tw=0
+%
+%-----------------------------------------------------------------------------%
+
+:- module test_parsing_utils.
+
+:- interface.
+
+:- import_module io.
+
+
+
+:- pred main(io::di, io::uo) is cc_multi.
+
+%-----------------------------------------------------------------------------%
+%-----------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module list.
+:- import_module maybe.
+:- import_module parsing_utils.
+:- import_module solutions.
+:- import_module string.
+
+%-----------------------------------------------------------------------------%
+
+main(!IO) :-
+ unsorted_aggregate(run_test, io.write_string, !IO).
+
+%-----------------------------------------------------------------------------%
+
+:- pred run_test(string::out) is multi.
+
+run_test(Result) :-
+ test_case(ParserName, Parser, TestString, ExpectedOutput),
+ new_src_and_ps(TestString, Src, PS0),
+ ( if Parser(Src, ActualOutput0, PS0, PS) then
+ ActualOutput = yes(ActualOutput0),
+ current_offset(Src, CurrentOffset0, PS, _),
+ CurrentOffset = yes(CurrentOffset0)
+ else
+ ActualOutput = no,
+ CurrentOffset = no
+ ),
+ (
+ ExpectedOutput = no,
+ ActualOutput = no,
+ Outcome = "failed as expected",
+ PassFail = "pass"
+ ;
+ ExpectedOutput = no,
+ ActualOutput = yes(ActualOutputString),
+ Outcome = "should have failed, but returned \"" ++
+ ActualOutputString ++ "\"",
+ PassFail = "fail"
+ ;
+ ExpectedOutput = yes(ExpectedOutputString),
+ ActualOutput = no,
+ Outcome = "failed, but should have returned \"" ++
+ ExpectedOutputString ++ "\"",
+ PassFail = "fail"
+ ;
+ ExpectedOutput = yes(ExpectedOutputString),
+ ActualOutput = yes(ActualOutputString),
+ Outcome0 = "returned " ++ ActualOutputString,
+ ( if ActualOutputString = ExpectedOutputString then
+ Outcome = Outcome0 ++ " as expected",
+ PassFail = "pass"
+ else
+ Outcome = Outcome0 ++ ", but should have returned " ++
+ ExpectedOutputString,
+ PassFail = "fail"
+ )
+ ),
+ Result = PassFail ++ ": " ++
+ ParserName ++ " on \"" ++ TestString ++ "\"\n\t" ++
+ Outcome ++
+ ( if CurrentOffset = yes(CO) then
+ string.format("\n\t[%d chars consumed]", [i(CO)])
+ else
+ ""
+ ) ++
+ "\n".
+
+%-----------------------------------------------------------------------------%
+
+:- pred test_case(
+ string::out,
+ pred(src, string, ps, ps)::out(pred(in, out, in, out) is semidet),
+ string::out,
+ maybe(string)::out)
+ is multi.
+
+test_case("next_char", stringify(next_char),
+ "", no).
+test_case("next_char", stringify(next_char),
+ "123", yes("'1'")).
+
+test_case("char_in_class(\"123\")", stringify(char_in_class("123")),
+ "", no).
+test_case("char_in_class(\"123\")", stringify(char_in_class("123")),
+ "abc", no).
+test_case("char_in_class(\"123\")", stringify(char_in_class("123")),
+ "123", yes("'1'")).
+
+test_case("punct(\"!\")", stringify(punct("!")),
+ "", no).
+test_case("punct(\"!\")", stringify(punct("!")),
+ "abc", no).
+test_case("punct(\"!\")", stringify(punct("!")),
+ "*", no).
+test_case("punct(\"!\")", stringify(punct("!")),
+ "!", yes("unit")).
+
+test_case("keyword(\"ABC\", \"ABC\")", stringify(keyword("ABC", "ABC")),
+ "", no).
+test_case("keyword(\"ABC\", \"ABC\")", stringify(keyword("ABC", "ABC")),
+ "123", no).
+test_case("keyword(\"ABC\", \"ABC\")", stringify(keyword("ABC", "ABC")),
+ "ABCA", no).
+test_case("keyword(\"ABC\", \"ABC\")", stringify(keyword("ABC", "ABC")),
+ "ABC 123", yes("unit")).
+
+test_case("identifier(\"ABC\", \"ABCabc_\")", stringify(identifier("ABC", "ABCabc_")),
+ "", no).
+test_case("identifier(\"ABC\", \"ABCabc_\")", stringify(identifier("ABC", "ABCabc_")),
+ "abc", no).
+test_case("identifier(\"ABC\", \"ABCabc_\")", stringify(identifier("ABC", "ABCabc_")),
+ "_", no).
+test_case("identifier(\"ABC\", \"ABCabc_\")", stringify(identifier("ABC", "ABCabc_")),
+ "A", yes("\"A\"")).
+test_case("identifier(\"ABC\", \"ABCabc_\")", stringify(identifier("ABC", "ABCabc_")),
+ "Ab_c", yes("\"Ab_c\"")).
+test_case("identifier(\"ABC\", \"ABCabc_\")", stringify(identifier("ABC", "ABCabc_")),
+ "*", no).
+test_case("identifier(\"ABC\", \"ABCabc_\")", stringify(identifier("ABC", "ABCabc_")),
+ "Abc !", yes("\"Abc\"")).
+
+test_case("whitespace", stringify(whitespace),
+ "", yes("unit")).
+test_case("whitespace", stringify(whitespace),
+ "123", yes("unit")).
+test_case("whitespace", stringify(whitespace),
+ " ", yes("unit")).
+test_case("whitespace", stringify(whitespace),
+ " 123", yes("unit")).
+
+test_case("skip_to_eol", stringify(skip_to_eol),
+ "", no).
+test_case("skip_to_eol", stringify(skip_to_eol),
+ "blah blah\n", yes("unit")).
+test_case("skip_to_eol", stringify(skip_to_eol),
+ "blah blah\n123", yes("unit")).
+
+test_case("eof", stringify(eof),
+ "123", no).
+test_case("eof", stringify(eof),
+ "", yes("unit")).
+
+test_case("float_literal_as_string", stringify(float_literal_as_string),
+ "", no).
+test_case("float_literal_as_string", stringify(float_literal_as_string),
+ "abc", no).
+test_case("float_literal_as_string", stringify(float_literal_as_string),
+ "123", no).
+test_case("float_literal_as_string", stringify(float_literal_as_string),
+ "123.0 abc", yes("\"123.0\"")).
+test_case("float_literal_as_string", stringify(float_literal_as_string),
+ "123.0e1 abc", yes("\"123.0e1\"")).
+test_case("float_literal_as_string", stringify(float_literal_as_string),
+ "-123.0 abc", yes("\"-123.0\"")).
+test_case("float_literal_as_string", stringify(float_literal_as_string),
+ "-123.0e1 abc", yes("\"-123.0e1\"")).
+test_case("float_literal_as_string", stringify(float_literal_as_string),
+ "-123.0E-1 abc", yes("\"-123.0E-1\"")).
+
+test_case("float_literal", stringify(float_literal),
+ "", no).
+test_case("float_literal", stringify(float_literal),
+ "abc", no).
+test_case("float_literal", stringify(float_literal),
+ "123", no).
+test_case("float_literal", stringify(float_literal),
+ "123.0 abc", yes("123.0")).
+test_case("float_literal", stringify(float_literal),
+ "123.0e1 abc", yes("1230.0")).
+test_case("float_literal", stringify(float_literal),
+ "-123.0 abc", yes("-123.0")).
+test_case("float_literal", stringify(float_literal),
+ "-123.0e1 abc", yes("-1230.0")).
+test_case("float_literal", stringify(float_literal),
+ "-123.0E-1 abc", yes("-12.3")).
+
+test_case("int_literal_as_string", stringify(int_literal_as_string),
+ "", no).
+test_case("int_literal_as_string", stringify(int_literal_as_string),
+ "abc", no).
+test_case("int_literal_as_string", stringify(int_literal_as_string),
+ "123.0", no).
+test_case("int_literal_as_string", stringify(int_literal_as_string),
+ "123 abc", yes("\"123\"")).
+test_case("int_literal_as_string", stringify(int_literal_as_string),
+ "-123 abc", yes("\"-123\"")).
+test_case("int_literal_as_string", stringify(int_literal_as_string),
+ "999999999999999999999 abc", yes("\"999999999999999999999\"")).
+
+test_case("int_literal", stringify(int_literal),
+ "", no).
+test_case("int_literal", stringify(int_literal),
+ "abc", no).
+test_case("int_literal", stringify(int_literal),
+ "123.0", no).
+test_case("int_literal", stringify(int_literal),
+ "123 abc", yes("123")).
+test_case("int_literal", stringify(int_literal),
+ "-123 abc", yes("-123")).
+test_case("int_literal", stringify(int_literal),
+ "999999999999999999999 abc", no).
+
+test_case("string_literal('\\\"')", stringify(string_literal('\"')),
+ "", no).
+test_case("string_literal('\\\"')", stringify(string_literal('\"')),
+ "\"123\" abc", yes("\"123\"")).
+test_case("string_literal('\\\"')", stringify(string_literal('\"')),
+ "\"1\\\"2\\\"3\" abc", yes("\"1\\\\\\\"2\\\\\\\"3\"")).
+test_case("string_literal('\\\'')", stringify(string_literal('\'')),
+ "", no).
+test_case("string_literal('\\\'')", stringify(string_literal('\'')),
+ "\'123\' abc", yes("\"123\"")).
+test_case("string_literal('\\\'')", stringify(string_literal('\'')),
+ "\'1\\\'2\\\'3\' abc", yes("\"1\\\\\\\'2\\\\\\\'3\"")).
+
+test_case("optional(punct(\"!\"))", stringify(optional(punct("!"))),
+ "", yes("no")).
+test_case("optional(punct(\"!\"))", stringify(optional(punct("!"))),
+ "abc", yes("no")).
+test_case("optional(punct(\"!\"))", stringify(optional(punct("!"))),
+ "! ", yes("yes(unit)")).
+
+test_case("zero_or_more(punct(\"!\"))", stringify(zero_or_more(punct("!"))),
+ "", yes("[]")).
+test_case("zero_or_more(punct(\"!\"))", stringify(zero_or_more(punct("!"))),
+ "abc", yes("[]")).
+test_case("zero_or_more(punct(\"!\"))", stringify(zero_or_more(punct("!"))),
+ "!!! abc", yes("[unit, unit, unit]")).
+
+test_case("one_or_more(punct(\"!\"))", stringify(one_or_more(punct("!"))),
+ "", no).
+test_case("one_or_more(punct(\"!\"))", stringify(one_or_more(punct("!"))),
+ "abc", no).
+test_case("one_or_more(punct(\"!\"))", stringify(one_or_more(punct("!"))),
+ "!!! abc", yes("[unit, unit, unit]")).
+
+test_case("brackets(\"(\", \")\", punct(\"!\"))",
+ stringify(brackets("(", ")", punct("!"))),
+ "", no).
+test_case("brackets(\"(\", \")\", punct(\"!\"))",
+ stringify(brackets("(", ")", punct("!"))),
+ "abc", no).
+test_case("brackets(\"(\", \")\", punct(\"!\"))",
+ stringify(brackets("(", ")", punct("!"))),
+ "(abc)", no).
+test_case("brackets(\"(\", \")\", punct(\"!\"))",
+ stringify(brackets("(", ")", punct("!"))),
+ "(!) abc", yes("unit")).
+
+test_case("separated_list(\"+\", punct(\"!\"))",
+ stringify(separated_list("+", punct("!"))),
+ "", no).
+test_case("separated_list(\"+\", punct(\"!\"))",
+ stringify(separated_list("+", punct("!"))),
+ "abc", no).
+test_case("separated_list(\"+\", punct(\"!\"))",
+ stringify(separated_list("+", punct("!"))),
+ "! abc", yes("[unit]")).
+test_case("separated_list(\"+\", punct(\"!\"))",
+ stringify(separated_list("+", punct("!"))),
+ "!+ ! + ! abc", yes("[unit, unit, unit]")).
+
+test_case("comma_separated_list(punct(\"!\"))",
+ stringify(comma_separated_list(punct("!"))),
+ "", no).
+test_case("comma_separated_list(punct(\"!\"))",
+ stringify(comma_separated_list(punct("!"))),
+ "abc", no).
+test_case("comma_separated_list(punct(\"!\"))",
+ stringify(comma_separated_list(punct("!"))),
+ "! abc", yes("[unit]")).
+test_case("comma_separated_list(punct(\"!\"))",
+ stringify(comma_separated_list(punct("!"))),
+ "!, ! , ! abc", yes("[unit, unit, unit]")).
+
+%-----------------------------------------------------------------------------%
+
+:- pred stringify(
+ pred(src, T, ps, ps)::in(pred(in, out, in, out) is semidet),
+ src::in,
+ string::out,
+ ps::in,
+ ps::out)
+ is semidet.
+
+stringify(P, Src, String, !PS) :-
+ P(Src, X, !PS),
+ String = string.string(X).
+
+%-----------------------------------------------------------------------------%
+%-----------------------------------------------------------------------------%
--------------------------------------------------------------------------
mercury-reviews mailing list
Post messages to: mercury-reviews at csse.unimelb.edu.au
Administrative Queries: owner-mercury-reviews at csse.unimelb.edu.au
Subscriptions: mercury-reviews-request at csse.unimelb.edu.au
--------------------------------------------------------------------------
More information about the reviews
mailing list