[m-rev.] New regex module in extras/lex

Ralph Becket rafe at cs.mu.OZ.AU
Fri Nov 29 15:04:53 AEDT 2002


I believe I have addressed all of Fergus' points.  Here's the interdiff:


diff -u README.regex README.regex
--- README.regex	26 Nov 2002 01:08:56 -0000
+++ README.regex	29 Nov 2002 03:55:19 -0000
@@ -1,7 +1,8 @@
 THE REGEX MODULE
 
-The regex/1 function converts standard string-type regular expression
-definitions into values of type regex.
+The regex/1 function converts conventional string-type regular expression
+definitions into values of type regex.  These regex values can be used
+for string matching and search-and-replace operations.
 
 EXAMPLE OF REGEXES
 
diff -u regex.m regex.m
--- regex.m	28 Nov 2002 02:52:50 -0000
+++ regex.m	29 Nov 2002 04:02:36 -0000
@@ -8,10 +8,22 @@
 % functionality using regular expressions defined as strings of the
 % form recognised by tools such as sed and grep.
 %
-% TODO
-% - Add <regex>{n[,m]} regexps.
-% - Add character classes (e.g. [:space:]) to sets.
-% - Add chomp function to string.m.
+% The regular expression langauge matched is a subset of POSIX 1003.2
+% with a few minor differences:
+% - bounds {[n][,[m]]} are not recognised;
+% - collating elements [.ab.] in character sets are not recognised;
+% - equivalence classes [=x=] in character sets are not recognised;
+% - character classes [:space:] in character sets are not recognised;
+% - special inter-character patterns such as ^, $, \<, \> are not recognised;
+% - to include a literal `-' in a character set, use the range `---' or
+% include `-' as one end-point of a range (e.g. [!--]);
+% - regex will complain if `-' appears other than as an end-point
+% of a range or the range delimiter in a character set;
+% - regex is a little more sensible about including `[' and `]' in character
+% sets - either character can appear as and end-point of a range;
+% - literal `)' must be backslash escaped, even in the absence of an `(';
+% - `.' matches any character except `\n';
+% - [^...] matches any character not in ... or `\n'.
 %
 %-----------------------------------------------------------------------------%
 
@@ -35,9 +47,10 @@
     % memoize this function for efficiency (it is cheaper to look up a
     % string in a hash table than to parse it and recompute the regex.)
     %
-    % A regex string obeys the following grammar
-    % (concatenation of <char> takes highest precedence,
-    % otherwise concatenation has lowest precedence):
+    % A regex string obeys the following grammar.  Note that alternation
+    % has lowest priority, followed by concatenation, followed by
+    % *, + and ?.  Hence "ab*" is equivalent to "a(b*)" and not "(ab)*"
+    % while "ab|cd" is equivalent to "(ab)|(cd)" and not "a(b|c)d".
     %
     % <regex> ::= <char>                % Single char
     %           |  <regex><regex>       % Concatenation
@@ -189,10 +202,10 @@
     % The possible semi-parsed regexes.
     %
 :- type re
-    --->    re(regexp)                      % An ordinary regex.
-    ;       chars(regexp)                   % A sequence of chars.
+    --->    re(regexp)                      % An ordinary regexp.
+    ;       char(regexp)                    % A single char regexp.
     ;       lpar                            % A left parenthesis.
-    ;       alt.                            % An alternation.
+    ;       alt(regexp).                    % An alternation.
 
 :- type chars == list(char).
 
@@ -216,7 +229,7 @@
     %
 compile_regex(S, C, res(REs)) =
     (      if C = ('.')  then res([re(dot) | REs])
-      else if C = ('|')  then res([alt | REs])
+      else if C = ('|')  then res(alt(S, REs))
       else if C = ('*')  then res(star(S, REs))
       else if C = ('+')  then res(plus(S, REs))
       else if C = ('?')  then res(opt(S, REs))
@@ -225,13 +238,13 @@
       else if C = ('[')  then set1(REs)
       else if C = (']')  then regex_error("`]' without opening `['", S)
       else if C = ('\\') then esc(REs)
-      else                    res(push_char(C, REs))
+      else                    res([char(re(C)) | REs])
     ).
 
     % esc: the current char has been \ escaped.
     %
 compile_regex(_, C, esc(REs)) =
-    res(push_char(C, REs)).
+    res([char(re(C)) | REs]).
 
     % set1: we have just seen the opening [.
     %
@@ -315,31 +328,19 @@
     regex_error("`[' without closing `]'", S).
 
 finish_regex(S, res(REs)) =
-    ( if   rpar(S, REs ++ [lpar]) = [re(RE)]
-      then RE
+    ( if   rpar(S, REs ++ [lpar]) = [RE]
+      then extract_regex(RE)
       else regex_error("`(' without closing `)'", S)
     ).
 
 %-----------------------------------------------------------------------------%
 
-    % Push a char regex.
-    %
-:- func push_char(char, list(re)) = list(re).
-
-push_char(C, REs) =
-    ( if   REs = [chars(Cs) | REs0]
-      then [chars(Cs ++ C)  | REs0]
-      else [chars(re(C))    | REs ]
-    ).
-
-%-----------------------------------------------------------------------------%
-
     % The *, + and ? regexes.
     %
 :- func star(string, list(re)) = list(re).
 
 star(S, REs) =
-    ( if   ( REs = [re(RE) | REs0] ; REs = [chars(RE) | REs0] )
+    ( if   ( REs = [re(RE) | REs0] ; REs = [char(RE) | REs0] )
       then [re(*(RE)) | REs0]
       else regex_error("`*' without preceding regex", S)
     ).
@@ -347,7 +348,7 @@
 :- func plus(string, list(re)) = list(re).
 
 plus(S, REs) =
-    ( if   ( REs = [re(RE) | REs0] ; REs = [chars(RE) | REs0] )
+    ( if   ( REs = [re(RE) | REs0] ; REs = [char(RE) | REs0] )
       then [re(+(RE)) | REs0]
       else regex_error("`+' without preceding regex", S)
     ).
@@ -355,63 +356,72 @@
 :- func opt(string, list(re)) = list(re).
 
 opt(S, REs) =
-    ( if   ( REs = [re(RE) | REs0] ; REs = [chars(RE) | REs0] )
+    ( if   ( REs = [re(RE) | REs0] ; REs = [char(RE) | REs0] )
       then [re(?(RE)) | REs0]
       else regex_error("`?' without preceding regex", S)
     ).
 
 %-----------------------------------------------------------------------------%
 
-    % Handle a closing parenthesis.
+    % Handle an alternation sign.
     %
-:- func rpar(string, list(re)) = list(re).
-
-rpar(S, REs) =
-    (      if REs = [chars(RE)              | REs0]
-      then    rpar(S, [re(RE)               | REs0])
+:- func alt(string, list(re)) = list(re).
 
-      else if REs = [alt, lpar              | REs0]
-      then    [nil                          | REs0]
+alt(S, REs) =
+    (      if REs =   [alt(_)                                         | _   ]
+      then    regex_error("`|' immediately following `|'", S)
 
-      else if REs = [RE_A, alt, lpar        | REs0]
-      then    [alt(nil, RE_A)               | REs0]
+      else if REs =   [lpar                                           | _   ]
+      then    regex_error("`|' immediately following `('", S)
 
-      else if REs = [RE_A, alt, RE_B        | REs0]
-      then    rpar(S, [alt(RE_B, RE_A)      | REs0])
+      else if REs =   [RE_B, alt(RE_A)                                | REs0]
+      then            [alt(RE_A or extract_regex(RE_B))               | REs0]
 
-      else if REs = [lpar                   | REs0]
-      then    [nil                          | REs0]
+      else if REs =   [RE, lpar                                       | REs0]
+      then            [alt(extract_regex(RE)), lpar                   | REs0]
 
-      else if REs = [RE, lpar               | REs0]
-      then    [RE                           | REs0]
+      else if REs =   [RE_B, RE_A                                     | REs0]
+      then    alt(S,  [re(extract_regex(RE_A) ++ extract_regex(RE_B)) | REs0])
 
-      else if REs = [RE_A, RE_B             | REs0]
-      then    rpar(S, [concat(RE_B, RE_A)   | REs0])
+      else if REs =   [RE]
+      then            [alt(extract_regex(RE))]
 
-      else    regex_error("`)' without opening `('", S)
+      else regex_error("`|' without preceding regex", S)
     ).
 
 %-----------------------------------------------------------------------------%
 
-    % Handle the alternation of two res.
+    % Handle a closing parenthesis.
     %
-:- func alt(re, re) = re.
+:- func rpar(string, list(re)) = list(re).
 
-alt(A, B) = re(extract_regex(A) or extract_regex(B)).
+rpar(S, REs) =
+    (      if REs =   [alt(_)                                         | _   ]
+      then    regex_error("`)' immediately following `|'", S)
 
-    % Handle the concatenation of two res.
-    %
-:- func concat(re, re) = re.
+      else if REs =   [RE_B, alt(RE_A)                                | REs0]
+      then    rpar(S, [re(RE_A or extract_regex(RE_B))                | REs0])
+
+      else if REs =   [lpar                                           | REs0]
+      then            [nil                                            | REs0]
+
+      else if REs =   [RE, lpar                                       | REs0]
+      then            [RE                                             | REs0]
+
+      else if REs =   [RE_B, RE_A                                     | REs0]
+      then    rpar(S, [re(extract_regex(RE_A) ++ extract_regex(RE_B)) | REs0])
 
-concat(A, B) = re(extract_regex(A) ++ extract_regex(B)).
+      else    regex_error("`)' without opening `('", S)
+    ).
 
+%-----------------------------------------------------------------------------%
 
 :- func extract_regex(re) = regexp.
 
-extract_regex(re(A))    = A.
-extract_regex(chars(A)) = A.
-extract_regex(alt)      = func_error("regex__extract_regex").
-extract_regex(lpar)     = func_error("regex__extract_regex").
+extract_regex(re(R))   = R.
+extract_regex(char(R)) = R.
+extract_regex(alt(_))  = func_error("regex__extract_regex").
+extract_regex(lpar)    = func_error("regex__extract_regex").
 
 %-----------------------------------------------------------------------------%
 
@@ -491,24 +501,24 @@
 
 matches(Regex, String) = Matches :-
     State   = start(Regex, unsafe_promise_unique(String)),
-    Matches = matches_2(length(String), State).
+    Matches = matches_2(length(String), -1, State).
 
 
-:- func matches_2(int, lexer_state) = list({string, int, int}).
-:- mode matches_2(in,  di)          = out is det.
+:- func matches_2(int, offset, lexer_state) = list({string, int, int}).
+:- mode matches_2(in,  in,     di)          = out is det.
 
-matches_2(Length, State0) = Matches :-
+matches_2(Length, LastEnd, State0) = Matches :-
     lex__offset_from_start(Start0, State0, State1),
     lex__read(Result, State1, State2),
+    lex__offset_from_start(End, State2, State3),
     (
         Result  = eof,
         Matches = []
     ;
         Result  = error(_, _),
-        Matches = matches_2(Length, State2)
+        Matches = matches_2(Length, End, State3)
     ;
         Result  = ok(Substring),
-        lex__offset_from_start(End, State2, State3),
         Start   = Start0,
         Count   = End - Start,
 
@@ -520,21 +530,37 @@
             % two matches for "foo" - "foo" and the notional null string
             % at the end.)
             %
+            % If we matched the empty string at the same point the
+            % last match ended, then we ignore this solution and
+            % move on.
+            %
         Matches =
-            [ {Substring, Start, Count} |
-              ( if End = Length then
-                    []
-                else if Count = 0 then
-                  ( if lex__read_char(ok(_), State3, State4) then
-                      matches_2(Length, State4)
+            ( if Count = 0, Start = LastEnd then
+
+                    % This is an empty match at the same point as the end
+                    % of our last match.  We have to ignore it and move on.
+                    % 
+                ( if   lex__read_char(ok(_), State3, State4)
+                  then matches_2(Length, End, State4)
+                  else []
+                )
+
+              else
+
+                [ {Substring, Start, Count} |
+                  ( if End = Length then
+                        []
+                    else if Count = 0 then
+                        ( if   lex__read_char(ok(_), State3, State4)
+                          then matches_2(Length, End, State4)
+                          else []
+                        )
                     else
-                      []
+                      matches_2(Length, End, State3)
                   )
-                else
-                  matches_2(Length, State3)
-              )
-            ]
-        ).
+                ]
+            )
+    ).
 
 %-----------------------------------------------------------------------------%
 
diff -u tests/test_regex.exp tests/test_regex.exp
--- tests/test_regex.exp	26 Nov 2002 08:45:41 -0000
+++ tests/test_regex.exp	29 Nov 2002 03:12:37 -0000
@@ -8,9 +8,9 @@
 replace_all with `<>'   : "<>br<>c<>d<>br<>"
 change_first to `<&>'   : "<a>bracadabra"
 change_all to `<&>'     : "<a>br<a>c<a>d<a>br<a>"
-left match              : {"a", 0, 1}
-right match             : {"a", 10, 1}
-first match             : {"a", 0, 1}
+left_match              : {"a", 0, 1}
+right_match             : {"a", 10, 1}
+first_match             : {"a", 0, 1}
 
 > "xabracadabra"
 all matches             : [{"a", 1, 1}, {"a", 4, 1}, {"a", 6, 1}, {"a", 8, 1}, {"a", 11, 1}]
@@ -18,8 +18,8 @@
 replace_all with `<>'   : "x<>br<>c<>d<>br<>"
 change_first to `<&>'   : "x<a>bracadabra"
 change_all to `<&>'     : "x<a>br<a>c<a>d<a>br<a>"
-right match             : {"a", 11, 1}
-first match             : {"a", 1, 1}
+right_match             : {"a", 11, 1}
+first_match             : {"a", 1, 1}
 
 > "abracadabrax"
 all matches             : [{"a", 0, 1}, {"a", 3, 1}, {"a", 5, 1}, {"a", 7, 1}, {"a", 10, 1}]
@@ -27,8 +27,8 @@
 replace_all with `<>'   : "<>br<>c<>d<>br<>x"
 change_first to `<&>'   : "<a>bracadabrax"
 change_all to `<&>'     : "<a>br<a>c<a>d<a>br<a>x"
-left match              : {"a", 0, 1}
-first match             : {"a", 0, 1}
+left_match              : {"a", 0, 1}
+first_match             : {"a", 0, 1}
 
 > "foo"
 all matches             : []
@@ -42,8 +42,8 @@
 replace_all with `<>'   : "<>racad<>ra"
 change_first to `<&>'   : "<ab>racadabra"
 change_all to `<&>'     : "<ab>racad<ab>ra"
-left match              : {"ab", 0, 2}
-first match             : {"ab", 0, 2}
+left_match              : {"ab", 0, 2}
+first_match             : {"ab", 0, 2}
 
 > "xabracadabra"
 all matches             : [{"ab", 1, 2}, {"ab", 8, 2}]
@@ -51,7 +51,7 @@
 replace_all with `<>'   : "x<>racad<>ra"
 change_first to `<&>'   : "x<ab>racadabra"
 change_all to `<&>'     : "x<ab>racad<ab>ra"
-first match             : {"ab", 1, 2}
+first_match             : {"ab", 1, 2}
 
 > "abracadabrax"
 all matches             : [{"ab", 0, 2}, {"ab", 7, 2}]
@@ -59,8 +59,8 @@
 replace_all with `<>'   : "<>racad<>rax"
 change_first to `<&>'   : "<ab>racadabrax"
 change_all to `<&>'     : "<ab>racad<ab>rax"
-left match              : {"ab", 0, 2}
-first match             : {"ab", 0, 2}
+left_match              : {"ab", 0, 2}
+first_match             : {"ab", 0, 2}
 
 > "foo"
 all matches             : []
@@ -74,8 +74,8 @@
 replace_all with `<>'   : "<>rac<><>ra"
 change_first to `<&>'   : "<ab>racadabra"
 change_all to `<&>'     : "<ab>rac<ad><ab>ra"
-left match              : {"ab", 0, 2}
-first match             : {"ab", 0, 2}
+left_match              : {"ab", 0, 2}
+first_match             : {"ab", 0, 2}
 
 > "xabracadabra"
 all matches             : [{"ab", 1, 2}, {"ad", 6, 2}, {"ab", 8, 2}]
@@ -83,7 +83,7 @@
 replace_all with `<>'   : "x<>rac<><>ra"
 change_first to `<&>'   : "x<ab>racadabra"
 change_all to `<&>'     : "x<ab>rac<ad><ab>ra"
-first match             : {"ab", 1, 2}
+first_match             : {"ab", 1, 2}
 
 > "abracadabrax"
 all matches             : [{"ab", 0, 2}, {"ad", 5, 2}, {"ab", 7, 2}]
@@ -91,8 +91,8 @@
 replace_all with `<>'   : "<>rac<><>rax"
 change_first to `<&>'   : "<ab>racadabrax"
 change_all to `<&>'     : "<ab>rac<ad><ab>rax"
-left match              : {"ab", 0, 2}
-first match             : {"ab", 0, 2}
+left_match              : {"ab", 0, 2}
+first_match             : {"ab", 0, 2}
 
 > "foo"
 all matches             : []
@@ -101,34 +101,34 @@
 * Matching against "a*"
 
 > "aardvark"
-all matches             : [{"aa", 0, 2}, {"", 2, 0}, {"", 3, 0}, {"", 4, 0}, {"a", 5, 1}, {"", 6, 0}, {"", 7, 0}, {"", 8, 0}]
+all matches             : [{"aa", 0, 2}, {"", 3, 0}, {"", 4, 0}, {"a", 5, 1}, {"", 7, 0}, {"", 8, 0}]
 replace_first with `<>' : "<>rdvark"
-replace_all with `<>'   : "<><>r<>d<>v<><>r<>k<>"
+replace_all with `<>'   : "<>r<>d<>v<>r<>k<>"
 change_first to `<&>'   : "<aa>rdvark"
-change_all to `<&>'     : "<aa><>r<>d<>v<a><>r<>k<>"
-left match              : {"aa", 0, 2}
-right match             : {"", 8, 0}
-first match             : {"aa", 0, 2}
+change_all to `<&>'     : "<aa>r<>d<>v<a>r<>k<>"
+left_match              : {"aa", 0, 2}
+right_match             : {"", 8, 0}
+first_match             : {"aa", 0, 2}
 
 > "xaardvark"
-all matches             : [{"", 0, 0}, {"aa", 1, 2}, {"", 3, 0}, {"", 4, 0}, {"", 5, 0}, {"a", 6, 1}, {"", 7, 0}, {"", 8, 0}, {"", 9, 0}]
+all matches             : [{"", 0, 0}, {"aa", 1, 2}, {"", 4, 0}, {"", 5, 0}, {"a", 6, 1}, {"", 8, 0}, {"", 9, 0}]
 replace_first with `<>' : "<>xaardvark"
-replace_all with `<>'   : "<>x<><>r<>d<>v<><>r<>k<>"
+replace_all with `<>'   : "<>x<>r<>d<>v<>r<>k<>"
 change_first to `<&>'   : "<>xaardvark"
-change_all to `<&>'     : "<>x<aa><>r<>d<>v<a><>r<>k<>"
-left match              : {"", 0, 0}
-right match             : {"", 9, 0}
-first match             : {"", 0, 0}
+change_all to `<&>'     : "<>x<aa>r<>d<>v<a>r<>k<>"
+left_match              : {"", 0, 0}
+right_match             : {"", 9, 0}
+first_match             : {"", 0, 0}
 
 > "aardvarkx"
-all matches             : [{"aa", 0, 2}, {"", 2, 0}, {"", 3, 0}, {"", 4, 0}, {"a", 5, 1}, {"", 6, 0}, {"", 7, 0}, {"", 8, 0}, {"", 9, 0}]
+all matches             : [{"aa", 0, 2}, {"", 3, 0}, {"", 4, 0}, {"a", 5, 1}, {"", 7, 0}, {"", 8, 0}, {"", 9, 0}]
 replace_first with `<>' : "<>rdvarkx"
-replace_all with `<>'   : "<><>r<>d<>v<><>r<>k<>x<>"
+replace_all with `<>'   : "<>r<>d<>v<>r<>k<>x<>"
 change_first to `<&>'   : "<aa>rdvarkx"
-change_all to `<&>'     : "<aa><>r<>d<>v<a><>r<>k<>x<>"
-left match              : {"aa", 0, 2}
-right match             : {"", 9, 0}
-first match             : {"aa", 0, 2}
+change_all to `<&>'     : "<aa>r<>d<>v<a>r<>k<>x<>"
+left_match              : {"aa", 0, 2}
+right_match             : {"", 9, 0}
+first_match             : {"aa", 0, 2}
 
 > "foo"
 all matches             : [{"", 0, 0}, {"", 1, 0}, {"", 2, 0}, {"", 3, 0}]
@@ -136,52 +136,41 @@
 replace_all with `<>'   : "<>f<>o<>o<>"
 change_first to `<&>'   : "<>foo"
 change_all to `<&>'     : "<>f<>o<>o<>"
-left match              : {"", 0, 0}
-right match             : {"", 3, 0}
-first match             : {"", 0, 0}
+left_match              : {"", 0, 0}
+right_match             : {"", 3, 0}
+first_match             : {"", 0, 0}
 
 
 * Matching against "aa*"
 
 > "aardvark"
-all matches             : [{"aa", 0, 2}, {"", 2, 0}, {"", 3, 0}, {"", 4, 0}, {"", 5, 0}, {"", 6, 0}, {"", 7, 0}, {"", 8, 0}]
+all matches             : [{"aa", 0, 2}, {"a", 5, 1}]
 replace_first with `<>' : "<>rdvark"
-replace_all with `<>'   : "<><>r<>d<>v<>a<>r<>k<>"
+replace_all with `<>'   : "<>rdv<>rk"
 change_first to `<&>'   : "<aa>rdvark"
-change_all to `<&>'     : "<aa><>r<>d<>v<>a<>r<>k<>"
-left match              : {"aa", 0, 2}
-right match             : {"", 8, 0}
-first match             : {"aa", 0, 2}
+change_all to `<&>'     : "<aa>rdv<a>rk"
+left_match              : {"aa", 0, 2}
+first_match             : {"aa", 0, 2}
 
 > "xaardvark"
-all matches             : [{"", 0, 0}, {"aa", 1, 2}, {"", 3, 0}, {"", 4, 0}, {"", 5, 0}, {"", 6, 0}, {"", 7, 0}, {"", 8, 0}, {"", 9, 0}]
-replace_first with `<>' : "<>xaardvark"
-replace_all with `<>'   : "<>x<><>r<>d<>v<>a<>r<>k<>"
-change_first to `<&>'   : "<>xaardvark"
-change_all to `<&>'     : "<>x<aa><>r<>d<>v<>a<>r<>k<>"
-left match              : {"", 0, 0}
-right match             : {"", 9, 0}
-first match             : {"", 0, 0}
+all matches             : [{"aa", 1, 2}, {"a", 6, 1}]
+replace_first with `<>' : "x<>rdvark"
+replace_all with `<>'   : "x<>rdv<>rk"
+change_first to `<&>'   : "x<aa>rdvark"
+change_all to `<&>'     : "x<aa>rdv<a>rk"
+first_match             : {"aa", 1, 2}
 
 > "aardvarkx"
-all matches             : [{"aa", 0, 2}, {"", 2, 0}, {"", 3, 0}, {"", 4, 0}, {"", 5, 0}, {"", 6, 0}, {"", 7, 0}, {"", 8, 0}, {"", 9, 0}]
+all matches             : [{"aa", 0, 2}, {"a", 5, 1}]
 replace_first with `<>' : "<>rdvarkx"
-replace_all with `<>'   : "<><>r<>d<>v<>a<>r<>k<>x<>"
+replace_all with `<>'   : "<>rdv<>rkx"
 change_first to `<&>'   : "<aa>rdvarkx"
-change_all to `<&>'     : "<aa><>r<>d<>v<>a<>r<>k<>x<>"
-left match              : {"aa", 0, 2}
-right match             : {"", 9, 0}
-first match             : {"aa", 0, 2}
+change_all to `<&>'     : "<aa>rdv<a>rkx"
+left_match              : {"aa", 0, 2}
+first_match             : {"aa", 0, 2}
 
 > "foo"
-all matches             : [{"", 0, 0}, {"", 1, 0}, {"", 2, 0}, {"", 3, 0}]
-replace_first with `<>' : "<>foo"
-replace_all with `<>'   : "<>f<>o<>o<>"
-change_first to `<&>'   : "<>foo"
-change_all to `<&>'     : "<>f<>o<>o<>"
-left match              : {"", 0, 0}
-right match             : {"", 3, 0}
-first match             : {"", 0, 0}
+all matches             : []
 
 
 * Matching against "a+"
@@ -192,8 +181,8 @@
 replace_all with `<>'   : "<>rdv<>rk"
 change_first to `<&>'   : "<aa>rdvark"
 change_all to `<&>'     : "<aa>rdv<a>rk"
-left match              : {"aa", 0, 2}
-first match             : {"aa", 0, 2}
+left_match              : {"aa", 0, 2}
+first_match             : {"aa", 0, 2}
 
 > "xaardvark"
 all matches             : [{"aa", 1, 2}, {"a", 6, 1}]
@@ -201,7 +190,7 @@
 replace_all with `<>'   : "x<>rdv<>rk"
 change_first to `<&>'   : "x<aa>rdvark"
 change_all to `<&>'     : "x<aa>rdv<a>rk"
-first match             : {"aa", 1, 2}
+first_match             : {"aa", 1, 2}
 
 > "aardvarkx"
 all matches             : [{"aa", 0, 2}, {"a", 5, 1}]
@@ -209,8 +198,8 @@
 replace_all with `<>'   : "<>rdv<>rkx"
 change_first to `<&>'   : "<aa>rdvarkx"
 change_all to `<&>'     : "<aa>rdv<a>rkx"
-left match              : {"aa", 0, 2}
-first match             : {"aa", 0, 2}
+left_match              : {"aa", 0, 2}
+first_match             : {"aa", 0, 2}
 
 > "foo"
 all matches             : []
@@ -224,8 +213,8 @@
 replace_all with `<>'   : "<>rdvark"
 change_first to `<&>'   : "<aa>rdvark"
 change_all to `<&>'     : "<aa>rdvark"
-left match              : {"aa", 0, 2}
-first match             : {"aa", 0, 2}
+left_match              : {"aa", 0, 2}
+first_match             : {"aa", 0, 2}
 
 > "xaardvark"
 all matches             : [{"aa", 1, 2}]
@@ -233,7 +222,7 @@
 replace_all with `<>'   : "x<>rdvark"
 change_first to `<&>'   : "x<aa>rdvark"
 change_all to `<&>'     : "x<aa>rdvark"
-first match             : {"aa", 1, 2}
+first_match             : {"aa", 1, 2}
 
 > "aardvarkx"
 all matches             : [{"aa", 0, 2}]
@@ -241,8 +230,8 @@
 replace_all with `<>'   : "<>rdvarkx"
 change_first to `<&>'   : "<aa>rdvarkx"
 change_all to `<&>'     : "<aa>rdvarkx"
-left match              : {"aa", 0, 2}
-first match             : {"aa", 0, 2}
+left_match              : {"aa", 0, 2}
+first_match             : {"aa", 0, 2}
 
 > "foo"
 all matches             : []
@@ -251,34 +240,34 @@
 * Matching against "a?"
 
 > "aardvark"
-all matches             : [{"a", 0, 1}, {"a", 1, 1}, {"", 2, 0}, {"", 3, 0}, {"", 4, 0}, {"a", 5, 1}, {"", 6, 0}, {"", 7, 0}, {"", 8, 0}]
+all matches             : [{"a", 0, 1}, {"a", 1, 1}, {"", 3, 0}, {"", 4, 0}, {"a", 5, 1}, {"", 7, 0}, {"", 8, 0}]
 replace_first with `<>' : "<>ardvark"
-replace_all with `<>'   : "<><><>r<>d<>v<><>r<>k<>"
+replace_all with `<>'   : "<><>r<>d<>v<>r<>k<>"
 change_first to `<&>'   : "<a>ardvark"
-change_all to `<&>'     : "<a><a><>r<>d<>v<a><>r<>k<>"
-left match              : {"a", 0, 1}
-right match             : {"", 8, 0}
-first match             : {"a", 0, 1}
+change_all to `<&>'     : "<a><a>r<>d<>v<a>r<>k<>"
+left_match              : {"a", 0, 1}
+right_match             : {"", 8, 0}
+first_match             : {"a", 0, 1}
 
 > "xaardvark"
-all matches             : [{"", 0, 0}, {"a", 1, 1}, {"a", 2, 1}, {"", 3, 0}, {"", 4, 0}, {"", 5, 0}, {"a", 6, 1}, {"", 7, 0}, {"", 8, 0}, {"", 9, 0}]
+all matches             : [{"", 0, 0}, {"a", 1, 1}, {"a", 2, 1}, {"", 4, 0}, {"", 5, 0}, {"a", 6, 1}, {"", 8, 0}, {"", 9, 0}]
 replace_first with `<>' : "<>xaardvark"
-replace_all with `<>'   : "<>x<><><>r<>d<>v<><>r<>k<>"
+replace_all with `<>'   : "<>x<><>r<>d<>v<>r<>k<>"
 change_first to `<&>'   : "<>xaardvark"
-change_all to `<&>'     : "<>x<a><a><>r<>d<>v<a><>r<>k<>"
-left match              : {"", 0, 0}
-right match             : {"", 9, 0}
-first match             : {"", 0, 0}
+change_all to `<&>'     : "<>x<a><a>r<>d<>v<a>r<>k<>"
+left_match              : {"", 0, 0}
+right_match             : {"", 9, 0}
+first_match             : {"", 0, 0}
 
 > "aardvarkx"
-all matches             : [{"a", 0, 1}, {"a", 1, 1}, {"", 2, 0}, {"", 3, 0}, {"", 4, 0}, {"a", 5, 1}, {"", 6, 0}, {"", 7, 0}, {"", 8, 0}, {"", 9, 0}]
+all matches             : [{"a", 0, 1}, {"a", 1, 1}, {"", 3, 0}, {"", 4, 0}, {"a", 5, 1}, {"", 7, 0}, {"", 8, 0}, {"", 9, 0}]
 replace_first with `<>' : "<>ardvarkx"
-replace_all with `<>'   : "<><><>r<>d<>v<><>r<>k<>x<>"
+replace_all with `<>'   : "<><>r<>d<>v<>r<>k<>x<>"
 change_first to `<&>'   : "<a>ardvarkx"
-change_all to `<&>'     : "<a><a><>r<>d<>v<a><>r<>k<>x<>"
-left match              : {"a", 0, 1}
-right match             : {"", 9, 0}
-first match             : {"a", 0, 1}
+change_all to `<&>'     : "<a><a>r<>d<>v<a>r<>k<>x<>"
+left_match              : {"a", 0, 1}
+right_match             : {"", 9, 0}
+first_match             : {"a", 0, 1}
 
 > "foo"
 all matches             : [{"", 0, 0}, {"", 1, 0}, {"", 2, 0}, {"", 3, 0}]
@@ -286,52 +275,41 @@
 replace_all with `<>'   : "<>f<>o<>o<>"
 change_first to `<&>'   : "<>foo"
 change_all to `<&>'     : "<>f<>o<>o<>"
-left match              : {"", 0, 0}
-right match             : {"", 3, 0}
-first match             : {"", 0, 0}
+left_match              : {"", 0, 0}
+right_match             : {"", 3, 0}
+first_match             : {"", 0, 0}
 
 
 * Matching against "aa?"
 
 > "aardvark"
-all matches             : [{"aa", 0, 2}, {"", 2, 0}, {"", 3, 0}, {"", 4, 0}, {"", 5, 0}, {"", 6, 0}, {"", 7, 0}, {"", 8, 0}]
+all matches             : [{"aa", 0, 2}, {"a", 5, 1}]
 replace_first with `<>' : "<>rdvark"
-replace_all with `<>'   : "<><>r<>d<>v<>a<>r<>k<>"
+replace_all with `<>'   : "<>rdv<>rk"
 change_first to `<&>'   : "<aa>rdvark"
-change_all to `<&>'     : "<aa><>r<>d<>v<>a<>r<>k<>"
-left match              : {"aa", 0, 2}
-right match             : {"", 8, 0}
-first match             : {"aa", 0, 2}
+change_all to `<&>'     : "<aa>rdv<a>rk"
+left_match              : {"aa", 0, 2}
+first_match             : {"aa", 0, 2}
 
 > "xaardvark"
-all matches             : [{"", 0, 0}, {"aa", 1, 2}, {"", 3, 0}, {"", 4, 0}, {"", 5, 0}, {"", 6, 0}, {"", 7, 0}, {"", 8, 0}, {"", 9, 0}]
-replace_first with `<>' : "<>xaardvark"
-replace_all with `<>'   : "<>x<><>r<>d<>v<>a<>r<>k<>"
-change_first to `<&>'   : "<>xaardvark"
-change_all to `<&>'     : "<>x<aa><>r<>d<>v<>a<>r<>k<>"
-left match              : {"", 0, 0}
-right match             : {"", 9, 0}
-first match             : {"", 0, 0}
+all matches             : [{"aa", 1, 2}, {"a", 6, 1}]
+replace_first with `<>' : "x<>rdvark"
+replace_all with `<>'   : "x<>rdv<>rk"
+change_first to `<&>'   : "x<aa>rdvark"
+change_all to `<&>'     : "x<aa>rdv<a>rk"
+first_match             : {"aa", 1, 2}
 
 > "aardvarkx"
-all matches             : [{"aa", 0, 2}, {"", 2, 0}, {"", 3, 0}, {"", 4, 0}, {"", 5, 0}, {"", 6, 0}, {"", 7, 0}, {"", 8, 0}, {"", 9, 0}]
+all matches             : [{"aa", 0, 2}, {"a", 5, 1}]
 replace_first with `<>' : "<>rdvarkx"
-replace_all with `<>'   : "<><>r<>d<>v<>a<>r<>k<>x<>"
+replace_all with `<>'   : "<>rdv<>rkx"
 change_first to `<&>'   : "<aa>rdvarkx"
-change_all to `<&>'     : "<aa><>r<>d<>v<>a<>r<>k<>x<>"
-left match              : {"aa", 0, 2}
-right match             : {"", 9, 0}
-first match             : {"aa", 0, 2}
+change_all to `<&>'     : "<aa>rdv<a>rkx"
+left_match              : {"aa", 0, 2}
+first_match             : {"aa", 0, 2}
 
 > "foo"
-all matches             : [{"", 0, 0}, {"", 1, 0}, {"", 2, 0}, {"", 3, 0}]
-replace_first with `<>' : "<>foo"
-replace_all with `<>'   : "<>f<>o<>o<>"
-change_first to `<&>'   : "<>foo"
-change_all to `<&>'     : "<>f<>o<>o<>"
-left match              : {"", 0, 0}
-right match             : {"", 3, 0}
-first match             : {"", 0, 0}
+all matches             : []
 
 
 * Matching against "(ab|ad)+"
@@ -342,8 +320,8 @@
 replace_all with `<>'   : "<>rac<>ra"
 change_first to `<&>'   : "<ab>racadabra"
 change_all to `<&>'     : "<ab>rac<adab>ra"
-left match              : {"ab", 0, 2}
-first match             : {"ab", 0, 2}
+left_match              : {"ab", 0, 2}
+first_match             : {"ab", 0, 2}
 
 > "xabracadabra"
 all matches             : [{"ab", 1, 2}, {"adab", 6, 4}]
@@ -351,7 +329,7 @@
 replace_all with `<>'   : "x<>rac<>ra"
 change_first to `<&>'   : "x<ab>racadabra"
 change_all to `<&>'     : "x<ab>rac<adab>ra"
-first match             : {"ab", 1, 2}
+first_match             : {"ab", 1, 2}
 
 > "abracadabrax"
 all matches             : [{"ab", 0, 2}, {"adab", 5, 4}]
@@ -359,8 +337,8 @@
 replace_all with `<>'   : "<>rac<>rax"
 change_first to `<&>'   : "<ab>racadabrax"
 change_all to `<&>'     : "<ab>rac<adab>rax"
-left match              : {"ab", 0, 2}
-first match             : {"ab", 0, 2}
+left_match              : {"ab", 0, 2}
+first_match             : {"ab", 0, 2}
 
 > "foo"
 all matches             : []
@@ -374,9 +352,9 @@
 replace_all with `<>'   : "<><>r<><><><><><>r<>"
 change_first to `<&>'   : "<a>bracadabra"
 change_all to `<&>'     : "<a><b>r<a><c><a><d><a><b>r<a>"
-left match              : {"a", 0, 1}
-right match             : {"a", 10, 1}
-first match             : {"a", 0, 1}
+left_match              : {"a", 0, 1}
+right_match             : {"a", 10, 1}
+first_match             : {"a", 0, 1}
 
 > "xabracadabra"
 all matches             : [{"a", 1, 1}, {"b", 2, 1}, {"a", 4, 1}, {"c", 5, 1}, {"a", 6, 1}, {"d", 7, 1}, {"a", 8, 1}, {"b", 9, 1}, {"a", 11, 1}]
@@ -384,8 +362,8 @@
 replace_all with `<>'   : "x<><>r<><><><><><>r<>"
 change_first to `<&>'   : "x<a>bracadabra"
 change_all to `<&>'     : "x<a><b>r<a><c><a><d><a><b>r<a>"
-right match             : {"a", 11, 1}
-first match             : {"a", 1, 1}
+right_match             : {"a", 11, 1}
+first_match             : {"a", 1, 1}
 
 > "abracadabrax"
 all matches             : [{"a", 0, 1}, {"b", 1, 1}, {"a", 3, 1}, {"c", 4, 1}, {"a", 5, 1}, {"d", 6, 1}, {"a", 7, 1}, {"b", 8, 1}, {"a", 10, 1}]
@@ -393,8 +371,8 @@
 replace_all with `<>'   : "<><>r<><><><><><>r<>x"
 change_first to `<&>'   : "<a>bracadabrax"
 change_all to `<&>'     : "<a><b>r<a><c><a><d><a><b>r<a>x"
-left match              : {"a", 0, 1}
-first match             : {"a", 0, 1}
+left_match              : {"a", 0, 1}
+first_match             : {"a", 0, 1}
 
 > "foo"
 all matches             : []
@@ -408,9 +386,9 @@
 replace_all with `<>'   : "<><>r<><><><><><>r<>"
 change_first to `<&>'   : "<a>bracadabra"
 change_all to `<&>'     : "<a><b>r<a><c><a><d><a><b>r<a>"
-left match              : {"a", 0, 1}
-right match             : {"a", 10, 1}
-first match             : {"a", 0, 1}
+left_match              : {"a", 0, 1}
+right_match             : {"a", 10, 1}
+first_match             : {"a", 0, 1}
 
 > "xabracadabra"
 all matches             : [{"a", 1, 1}, {"b", 2, 1}, {"a", 4, 1}, {"c", 5, 1}, {"a", 6, 1}, {"d", 7, 1}, {"a", 8, 1}, {"b", 9, 1}, {"a", 11, 1}]
@@ -418,8 +396,8 @@
 replace_all with `<>'   : "x<><>r<><><><><><>r<>"
 change_first to `<&>'   : "x<a>bracadabra"
 change_all to `<&>'     : "x<a><b>r<a><c><a><d><a><b>r<a>"
-right match             : {"a", 11, 1}
-first match             : {"a", 1, 1}
+right_match             : {"a", 11, 1}
+first_match             : {"a", 1, 1}
 
 > "abracadabrax"
 all matches             : [{"a", 0, 1}, {"b", 1, 1}, {"a", 3, 1}, {"c", 4, 1}, {"a", 5, 1}, {"d", 6, 1}, {"a", 7, 1}, {"b", 8, 1}, {"a", 10, 1}]
@@ -427,8 +405,8 @@
 replace_all with `<>'   : "<><>r<><><><><><>r<>x"
 change_first to `<&>'   : "<a>bracadabrax"
 change_all to `<&>'     : "<a><b>r<a><c><a><d><a><b>r<a>x"
-left match              : {"a", 0, 1}
-first match             : {"a", 0, 1}
+left_match              : {"a", 0, 1}
+first_match             : {"a", 0, 1}
 
 > "foo"
 all matches             : []
@@ -442,8 +420,8 @@
 replace_all with `<>'   : "<>foo["
 change_first to `<&>'   : "<]>foo["
 change_all to `<&>'     : "<]>foo["
-left match              : {"]", 0, 1}
-first match             : {"]", 0, 1}
+left_match              : {"]", 0, 1}
+first_match             : {"]", 0, 1}
 
 > "[foo]"
 all matches             : [{"]", 4, 1}]
@@ -451,8 +429,8 @@
 replace_all with `<>'   : "[foo<>"
 change_first to `<&>'   : "[foo<]>"
 change_all to `<&>'     : "[foo<]>"
-right match             : {"]", 4, 1}
-first match             : {"]", 4, 1}
+right_match             : {"]", 4, 1}
+first_match             : {"]", 4, 1}
 
 > "foo"
 all matches             : []
@@ -466,9 +444,9 @@
 replace_all with `<>'   : "<>foo<>"
 change_first to `<&>'   : "<]>foo["
 change_all to `<&>'     : "<]>foo<[>"
-left match              : {"]", 0, 1}
-right match             : {"[", 4, 1}
-first match             : {"]", 0, 1}
+left_match              : {"]", 0, 1}
+right_match             : {"[", 4, 1}
+first_match             : {"]", 0, 1}
 
 > "[foo]"
 all matches             : [{"[", 0, 1}, {"]", 4, 1}]
@@ -476,9 +454,9 @@
 replace_all with `<>'   : "<>foo<>"
 change_first to `<&>'   : "<[>foo]"
 change_all to `<&>'     : "<[>foo<]>"
-left match              : {"[", 0, 1}
-right match             : {"]", 4, 1}
-first match             : {"[", 0, 1}
+left_match              : {"[", 0, 1}
+right_match             : {"]", 4, 1}
+first_match             : {"[", 0, 1}
 
 > "foo"
 all matches             : []
@@ -492,8 +470,8 @@
 replace_all with `<>'   : "]foo<>"
 change_first to `<&>'   : "]foo<[>"
 change_all to `<&>'     : "]foo<[>"
-right match             : {"[", 4, 1}
-first match             : {"[", 4, 1}
+right_match             : {"[", 4, 1}
+first_match             : {"[", 4, 1}
 
 > "[foo]"
 all matches             : [{"[", 0, 1}]
@@ -501,8 +479,8 @@
 replace_all with `<>'   : "<>foo]"
 change_first to `<&>'   : "<[>foo]"
 change_all to `<&>'     : "<[>foo]"
-left match              : {"[", 0, 1}
-first match             : {"[", 0, 1}
+left_match              : {"[", 0, 1}
+first_match             : {"[", 0, 1}
 
 > "foo"
 all matches             : []
@@ -516,7 +494,7 @@
 replace_all with `<>'   : "ab<>acadab<>a"
 change_first to `<&>'   : "ab<r>acadabra"
 change_all to `<&>'     : "ab<r>acadab<r>a"
-first match             : {"r", 2, 1}
+first_match             : {"r", 2, 1}
 
 > "xabracadabra"
 all matches             : [{"x", 0, 1}, {"r", 3, 1}, {"r", 10, 1}]
@@ -524,8 +502,8 @@
 replace_all with `<>'   : "<>ab<>acadab<>a"
 change_first to `<&>'   : "<x>abracadabra"
 change_all to `<&>'     : "<x>ab<r>acadab<r>a"
-left match              : {"x", 0, 1}
-first match             : {"x", 0, 1}
+left_match              : {"x", 0, 1}
+first_match             : {"x", 0, 1}
 
 > "abracadabrax"
 all matches             : [{"r", 2, 1}, {"r", 9, 1}, {"x", 11, 1}]
@@ -533,8 +511,8 @@
 replace_all with `<>'   : "ab<>acadab<>a<>"
 change_first to `<&>'   : "ab<r>acadabrax"
 change_all to `<&>'     : "ab<r>acadab<r>a<x>"
-right match             : {"x", 11, 1}
-first match             : {"r", 2, 1}
+right_match             : {"x", 11, 1}
+first_match             : {"r", 2, 1}
 
 > "foo"
 all matches             : [{"f", 0, 1}, {"o", 1, 1}, {"o", 2, 1}]
@@ -542,9 +520,9 @@
 replace_all with `<>'   : "<><><>"
 change_first to `<&>'   : "<f>oo"
 change_all to `<&>'     : "<f><o><o>"
-left match              : {"f", 0, 1}
-right match             : {"o", 2, 1}
-first match             : {"f", 0, 1}
+left_match              : {"f", 0, 1}
+right_match             : {"o", 2, 1}
+first_match             : {"f", 0, 1}
 
 
 * Matching against "[^ab-d]"
@@ -555,7 +533,7 @@
 replace_all with `<>'   : "ab<>acadab<>a"
 change_first to `<&>'   : "ab<r>acadabra"
 change_all to `<&>'     : "ab<r>acadab<r>a"
-first match             : {"r", 2, 1}
+first_match             : {"r", 2, 1}
 
 > "xabracadabra"
 all matches             : [{"x", 0, 1}, {"r", 3, 1}, {"r", 10, 1}]
@@ -563,8 +541,8 @@
 replace_all with `<>'   : "<>ab<>acadab<>a"
 change_first to `<&>'   : "<x>abracadabra"
 change_all to `<&>'     : "<x>ab<r>acadab<r>a"
-left match              : {"x", 0, 1}
-first match             : {"x", 0, 1}
+left_match              : {"x", 0, 1}
+first_match             : {"x", 0, 1}
 
 > "abracadabrax"
 all matches             : [{"r", 2, 1}, {"r", 9, 1}, {"x", 11, 1}]
@@ -572,8 +550,8 @@
 replace_all with `<>'   : "ab<>acadab<>a<>"
 change_first to `<&>'   : "ab<r>acadabrax"
 change_all to `<&>'     : "ab<r>acadab<r>a<x>"
-right match             : {"x", 11, 1}
-first match             : {"r", 2, 1}
+right_match             : {"x", 11, 1}
+first_match             : {"r", 2, 1}
 
 > "foo"
 all matches             : [{"f", 0, 1}, {"o", 1, 1}, {"o", 2, 1}]
@@ -581,9 +559,9 @@
 replace_all with `<>'   : "<><><>"
 change_first to `<&>'   : "<f>oo"
 change_all to `<&>'     : "<f><o><o>"
-left match              : {"f", 0, 1}
-right match             : {"o", 2, 1}
-first match             : {"f", 0, 1}
+left_match              : {"f", 0, 1}
+right_match             : {"o", 2, 1}
+first_match             : {"f", 0, 1}
 
 
 * Matching against "[^]]"
@@ -594,8 +572,8 @@
 replace_all with `<>'   : "]<><><><>"
 change_first to `<&>'   : "]<f>oo["
 change_all to `<&>'     : "]<f><o><o><[>"
-right match             : {"[", 4, 1}
-first match             : {"f", 1, 1}
+right_match             : {"[", 4, 1}
+first_match             : {"f", 1, 1}
 
 > "[foo]"
 all matches             : [{"[", 0, 1}, {"f", 1, 1}, {"o", 2, 1}, {"o", 3, 1}]
@@ -603,8 +581,8 @@
 replace_all with `<>'   : "<><><><>]"
 change_first to `<&>'   : "<[>foo]"
 change_all to `<&>'     : "<[><f><o><o>]"
-left match              : {"[", 0, 1}
-first match             : {"[", 0, 1}
+left_match              : {"[", 0, 1}
+first_match             : {"[", 0, 1}
 
 > "foo"
 all matches             : [{"f", 0, 1}, {"o", 1, 1}, {"o", 2, 1}]
@@ -612,9 +590,9 @@
 replace_all with `<>'   : "<><><>"
 change_first to `<&>'   : "<f>oo"
 change_all to `<&>'     : "<f><o><o>"
-left match              : {"f", 0, 1}
-right match             : {"o", 2, 1}
-first match             : {"f", 0, 1}
+left_match              : {"f", 0, 1}
+right_match             : {"o", 2, 1}
+first_match             : {"f", 0, 1}
 
 
 * Matching against "[^[-]]"
@@ -625,7 +603,7 @@
 replace_all with `<>'   : "]<><><>["
 change_first to `<&>'   : "]<f>oo["
 change_all to `<&>'     : "]<f><o><o>["
-first match             : {"f", 1, 1}
+first_match             : {"f", 1, 1}
 
 > "[foo]"
 all matches             : [{"f", 1, 1}, {"o", 2, 1}, {"o", 3, 1}]
@@ -633,7 +611,7 @@
 replace_all with `<>'   : "[<><><>]"
 change_first to `<&>'   : "[<f>oo]"
 change_all to `<&>'     : "[<f><o><o>]"
-first match             : {"f", 1, 1}
+first_match             : {"f", 1, 1}
 
 > "foo"
 all matches             : [{"f", 0, 1}, {"o", 1, 1}, {"o", 2, 1}]
@@ -641,9 +619,9 @@
 replace_all with `<>'   : "<><><>"
 change_first to `<&>'   : "<f>oo"
 change_all to `<&>'     : "<f><o><o>"
-left match              : {"f", 0, 1}
-right match             : {"o", 2, 1}
-first match             : {"f", 0, 1}
+left_match              : {"f", 0, 1}
+right_match             : {"o", 2, 1}
+first_match             : {"f", 0, 1}
 
 
 * Matching against ".*"
@@ -654,10 +632,10 @@
 replace_all with `<>'   : "<>"
 change_first to `<&>'   : "<abracadabra>"
 change_all to `<&>'     : "<abracadabra>"
-exact match
-left match              : {"abracadabra", 0, 11}
-right match             : {"abracadabra", 0, 11}
-first match             : {"abracadabra", 0, 11}
+exact_match
+left_match              : {"abracadabra", 0, 11}
+right_match             : {"abracadabra", 0, 11}
+first_match             : {"abracadabra", 0, 11}
 
 > "xabracadabra"
 all matches             : [{"xabracadabra", 0, 12}]
@@ -665,10 +643,10 @@
 replace_all with `<>'   : "<>"
 change_first to `<&>'   : "<xabracadabra>"
 change_all to `<&>'     : "<xabracadabra>"
-exact match
-left match              : {"xabracadabra", 0, 12}
-right match             : {"xabracadabra", 0, 12}
-first match             : {"xabracadabra", 0, 12}
+exact_match
+left_match              : {"xabracadabra", 0, 12}
+right_match             : {"xabracadabra", 0, 12}
+first_match             : {"xabracadabra", 0, 12}
 
 > "abracadabrax"
 all matches             : [{"abracadabrax", 0, 12}]
@@ -676,10 +654,10 @@
 replace_all with `<>'   : "<>"
 change_first to `<&>'   : "<abracadabrax>"
 change_all to `<&>'     : "<abracadabrax>"
-exact match
-left match              : {"abracadabrax", 0, 12}
-right match             : {"abracadabrax", 0, 12}
-first match             : {"abracadabrax", 0, 12}
+exact_match
+left_match              : {"abracadabrax", 0, 12}
+right_match             : {"abracadabrax", 0, 12}
+first_match             : {"abracadabrax", 0, 12}
 
 > "foo"
 all matches             : [{"foo", 0, 3}]
@@ -687,10 +665,10 @@
 replace_all with `<>'   : "<>"
 change_first to `<&>'   : "<foo>"
 change_all to `<&>'     : "<foo>"
-exact match
-left match              : {"foo", 0, 3}
-right match             : {"foo", 0, 3}
-first match             : {"foo", 0, 3}
+exact_match
+left_match              : {"foo", 0, 3}
+right_match             : {"foo", 0, 3}
+first_match             : {"foo", 0, 3}
 
 
 * Matching against "."
@@ -701,9 +679,9 @@
 replace_all with `<>'   : "<><><><><><><><><><><>"
 change_first to `<&>'   : "<a>bracadabra"
 change_all to `<&>'     : "<a><b><r><a><c><a><d><a><b><r><a>"
-left match              : {"a", 0, 1}
-right match             : {"a", 10, 1}
-first match             : {"a", 0, 1}
+left_match              : {"a", 0, 1}
+right_match             : {"a", 10, 1}
+first_match             : {"a", 0, 1}
 
 > "xabracadabra"
 all matches             : [{"x", 0, 1}, {"a", 1, 1}, {"b", 2, 1}, {"r", 3, 1}, {"a", 4, 1}, {"c", 5, 1}, {"a", 6, 1}, {"d", 7, 1}, {"a", 8, 1}, {"b", 9, 1}, {"r", 10, 1}, {"a", 11, 1}]
@@ -711,9 +689,9 @@
 replace_all with `<>'   : "<><><><><><><><><><><><>"
 change_first to `<&>'   : "<x>abracadabra"
 change_all to `<&>'     : "<x><a><b><r><a><c><a><d><a><b><r><a>"
-left match              : {"x", 0, 1}
-right match             : {"a", 11, 1}
-first match             : {"x", 0, 1}
+left_match              : {"x", 0, 1}
+right_match             : {"a", 11, 1}
+first_match             : {"x", 0, 1}
 
 > "abracadabrax"
 all matches             : [{"a", 0, 1}, {"b", 1, 1}, {"r", 2, 1}, {"a", 3, 1}, {"c", 4, 1}, {"a", 5, 1}, {"d", 6, 1}, {"a", 7, 1}, {"b", 8, 1}, {"r", 9, 1}, {"a", 10, 1}, {"x", 11, 1}]
@@ -721,9 +699,9 @@
 replace_all with `<>'   : "<><><><><><><><><><><><>"
 change_first to `<&>'   : "<a>bracadabrax"
 change_all to `<&>'     : "<a><b><r><a><c><a><d><a><b><r><a><x>"
-left match              : {"a", 0, 1}
-right match             : {"x", 11, 1}
-first match             : {"a", 0, 1}
+left_match              : {"a", 0, 1}
+right_match             : {"x", 11, 1}
+first_match             : {"a", 0, 1}
 
 > "foo"
 all matches             : [{"f", 0, 1}, {"o", 1, 1}, {"o", 2, 1}]
@@ -734,3 +712,207 @@
-left match              : {"f", 0, 1}
-right match             : {"o", 2, 1}
-first match             : {"f", 0, 1}
+left_match              : {"f", 0, 1}
+right_match             : {"o", 2, 1}
+first_match             : {"f", 0, 1}
+
+
+* Matching against "a|b"
+
+> "abracadabra"
+all matches             : [{"a", 0, 1}, {"b", 1, 1}, {"a", 3, 1}, {"a", 5, 1}, {"a", 7, 1}, {"b", 8, 1}, {"a", 10, 1}]
+replace_first with `<>' : "<>bracadabra"
+replace_all with `<>'   : "<><>r<>c<>d<><>r<>"
+change_first to `<&>'   : "<a>bracadabra"
+change_all to `<&>'     : "<a><b>r<a>c<a>d<a><b>r<a>"
+left_match              : {"a", 0, 1}
+right_match             : {"a", 10, 1}
+first_match             : {"a", 0, 1}
+
+> "xabracadabra"
+all matches             : [{"a", 1, 1}, {"b", 2, 1}, {"a", 4, 1}, {"a", 6, 1}, {"a", 8, 1}, {"b", 9, 1}, {"a", 11, 1}]
+replace_first with `<>' : "x<>bracadabra"
+replace_all with `<>'   : "x<><>r<>c<>d<><>r<>"
+change_first to `<&>'   : "x<a>bracadabra"
+change_all to `<&>'     : "x<a><b>r<a>c<a>d<a><b>r<a>"
+right_match             : {"a", 11, 1}
+first_match             : {"a", 1, 1}
+
+> "abracadabrax"
+all matches             : [{"a", 0, 1}, {"b", 1, 1}, {"a", 3, 1}, {"a", 5, 1}, {"a", 7, 1}, {"b", 8, 1}, {"a", 10, 1}]
+replace_first with `<>' : "<>bracadabrax"
+replace_all with `<>'   : "<><>r<>c<>d<><>r<>x"
+change_first to `<&>'   : "<a>bracadabrax"
+change_all to `<&>'     : "<a><b>r<a>c<a>d<a><b>r<a>x"
+left_match              : {"a", 0, 1}
+first_match             : {"a", 0, 1}
+
+> "foo"
+all matches             : []
+
+
+* Matching against "(a|b)"
+
+> "abracadabra"
+all matches             : [{"a", 0, 1}, {"b", 1, 1}, {"a", 3, 1}, {"a", 5, 1}, {"a", 7, 1}, {"b", 8, 1}, {"a", 10, 1}]
+replace_first with `<>' : "<>bracadabra"
+replace_all with `<>'   : "<><>r<>c<>d<><>r<>"
+change_first to `<&>'   : "<a>bracadabra"
+change_all to `<&>'     : "<a><b>r<a>c<a>d<a><b>r<a>"
+left_match              : {"a", 0, 1}
+right_match             : {"a", 10, 1}
+first_match             : {"a", 0, 1}
+
+> "xabracadabra"
+all matches             : [{"a", 1, 1}, {"b", 2, 1}, {"a", 4, 1}, {"a", 6, 1}, {"a", 8, 1}, {"b", 9, 1}, {"a", 11, 1}]
+replace_first with `<>' : "x<>bracadabra"
+replace_all with `<>'   : "x<><>r<>c<>d<><>r<>"
+change_first to `<&>'   : "x<a>bracadabra"
+change_all to `<&>'     : "x<a><b>r<a>c<a>d<a><b>r<a>"
+right_match             : {"a", 11, 1}
+first_match             : {"a", 1, 1}
+
+> "abracadabrax"
+all matches             : [{"a", 0, 1}, {"b", 1, 1}, {"a", 3, 1}, {"a", 5, 1}, {"a", 7, 1}, {"b", 8, 1}, {"a", 10, 1}]
+replace_first with `<>' : "<>bracadabrax"
+replace_all with `<>'   : "<><>r<>c<>d<><>r<>x"
+change_first to `<&>'   : "<a>bracadabrax"
+change_all to `<&>'     : "<a><b>r<a>c<a>d<a><b>r<a>x"
+left_match              : {"a", 0, 1}
+first_match             : {"a", 0, 1}
+
+> "foo"
+all matches             : []
+
+
+* Matching against "a|(b|c)"
+
+> "abracadabra"
+all matches             : [{"a", 0, 1}, {"b", 1, 1}, {"a", 3, 1}, {"c", 4, 1}, {"a", 5, 1}, {"a", 7, 1}, {"b", 8, 1}, {"a", 10, 1}]
+replace_first with `<>' : "<>bracadabra"
+replace_all with `<>'   : "<><>r<><><>d<><>r<>"
+change_first to `<&>'   : "<a>bracadabra"
+change_all to `<&>'     : "<a><b>r<a><c><a>d<a><b>r<a>"
+left_match              : {"a", 0, 1}
+right_match             : {"a", 10, 1}
+first_match             : {"a", 0, 1}
+
+> "xabracadabra"
+all matches             : [{"a", 1, 1}, {"b", 2, 1}, {"a", 4, 1}, {"c", 5, 1}, {"a", 6, 1}, {"a", 8, 1}, {"b", 9, 1}, {"a", 11, 1}]
+replace_first with `<>' : "x<>bracadabra"
+replace_all with `<>'   : "x<><>r<><><>d<><>r<>"
+change_first to `<&>'   : "x<a>bracadabra"
+change_all to `<&>'     : "x<a><b>r<a><c><a>d<a><b>r<a>"
+right_match             : {"a", 11, 1}
+first_match             : {"a", 1, 1}
+
+> "abracadabrax"
+all matches             : [{"a", 0, 1}, {"b", 1, 1}, {"a", 3, 1}, {"c", 4, 1}, {"a", 5, 1}, {"a", 7, 1}, {"b", 8, 1}, {"a", 10, 1}]
+replace_first with `<>' : "<>bracadabrax"
+replace_all with `<>'   : "<><>r<><><>d<><>r<>x"
+change_first to `<&>'   : "<a>bracadabrax"
+change_all to `<&>'     : "<a><b>r<a><c><a>d<a><b>r<a>x"
+left_match              : {"a", 0, 1}
+first_match             : {"a", 0, 1}
+
+> "foo"
+all matches             : []
+
+
+* Matching against "(a|(b|c))"
+
+> "abracadabra"
+all matches             : [{"a", 0, 1}, {"b", 1, 1}, {"a", 3, 1}, {"c", 4, 1}, {"a", 5, 1}, {"a", 7, 1}, {"b", 8, 1}, {"a", 10, 1}]
+replace_first with `<>' : "<>bracadabra"
+replace_all with `<>'   : "<><>r<><><>d<><>r<>"
+change_first to `<&>'   : "<a>bracadabra"
+change_all to `<&>'     : "<a><b>r<a><c><a>d<a><b>r<a>"
+left_match              : {"a", 0, 1}
+right_match             : {"a", 10, 1}
+first_match             : {"a", 0, 1}
+
+> "xabracadabra"
+all matches             : [{"a", 1, 1}, {"b", 2, 1}, {"a", 4, 1}, {"c", 5, 1}, {"a", 6, 1}, {"a", 8, 1}, {"b", 9, 1}, {"a", 11, 1}]
+replace_first with `<>' : "x<>bracadabra"
+replace_all with `<>'   : "x<><>r<><><>d<><>r<>"
+change_first to `<&>'   : "x<a>bracadabra"
+change_all to `<&>'     : "x<a><b>r<a><c><a>d<a><b>r<a>"
+right_match             : {"a", 11, 1}
+first_match             : {"a", 1, 1}
+
+> "abracadabrax"
+all matches             : [{"a", 0, 1}, {"b", 1, 1}, {"a", 3, 1}, {"c", 4, 1}, {"a", 5, 1}, {"a", 7, 1}, {"b", 8, 1}, {"a", 10, 1}]
+replace_first with `<>' : "<>bracadabrax"
+replace_all with `<>'   : "<><>r<><><>d<><>r<>x"
+change_first to `<&>'   : "<a>bracadabrax"
+change_all to `<&>'     : "<a><b>r<a><c><a>d<a><b>r<a>x"
+left_match              : {"a", 0, 1}
+first_match             : {"a", 0, 1}
+
+> "foo"
+all matches             : []
+
+
+* Matching against "a|b+"
+
+> "abbracadabra"
+all matches             : [{"a", 0, 1}, {"bb", 1, 2}, {"a", 4, 1}, {"a", 6, 1}, {"a", 8, 1}, {"b", 9, 1}, {"a", 11, 1}]
+replace_first with `<>' : "<>bbracadabra"
+replace_all with `<>'   : "<><>r<>c<>d<><>r<>"
+change_first to `<&>'   : "<a>bbracadabra"
+change_all to `<&>'     : "<a><bb>r<a>c<a>d<a><b>r<a>"
+left_match              : {"a", 0, 1}
+right_match             : {"a", 11, 1}
+first_match             : {"a", 0, 1}
+
+> "xabbracadabra"
+all matches             : [{"a", 1, 1}, {"bb", 2, 2}, {"a", 5, 1}, {"a", 7, 1}, {"a", 9, 1}, {"b", 10, 1}, {"a", 12, 1}]
+replace_first with `<>' : "x<>bbracadabra"
+replace_all with `<>'   : "x<><>r<>c<>d<><>r<>"
+change_first to `<&>'   : "x<a>bbracadabra"
+change_all to `<&>'     : "x<a><bb>r<a>c<a>d<a><b>r<a>"
+right_match             : {"a", 12, 1}
+first_match             : {"a", 1, 1}
+
+> "abbracadabrax"
+all matches             : [{"a", 0, 1}, {"bb", 1, 2}, {"a", 4, 1}, {"a", 6, 1}, {"a", 8, 1}, {"b", 9, 1}, {"a", 11, 1}]
+replace_first with `<>' : "<>bbracadabrax"
+replace_all with `<>'   : "<><>r<>c<>d<><>r<>x"
+change_first to `<&>'   : "<a>bbracadabrax"
+change_all to `<&>'     : "<a><bb>r<a>c<a>d<a><b>r<a>x"
+left_match              : {"a", 0, 1}
+first_match             : {"a", 0, 1}
+
+> "foo"
+all matches             : []
+
+
+* Matching against "a+|b"
+
+> "abbraacadabra"
+all matches             : [{"a", 0, 1}, {"b", 1, 1}, {"b", 2, 1}, {"aa", 4, 2}, {"a", 7, 1}, {"a", 9, 1}, {"b", 10, 1}, {"a", 12, 1}]
+replace_first with `<>' : "<>bbraacadabra"
+replace_all with `<>'   : "<><><>r<>c<>d<><>r<>"
+change_first to `<&>'   : "<a>bbraacadabra"
+change_all to `<&>'     : "<a><b><b>r<aa>c<a>d<a><b>r<a>"
+left_match              : {"a", 0, 1}
+right_match             : {"a", 12, 1}
+first_match             : {"a", 0, 1}
+
+> "xabbraacadabra"
+all matches             : [{"a", 1, 1}, {"b", 2, 1}, {"b", 3, 1}, {"aa", 5, 2}, {"a", 8, 1}, {"a", 10, 1}, {"b", 11, 1}, {"a", 13, 1}]
+replace_first with `<>' : "x<>bbraacadabra"
+replace_all with `<>'   : "x<><><>r<>c<>d<><>r<>"
+change_first to `<&>'   : "x<a>bbraacadabra"
+change_all to `<&>'     : "x<a><b><b>r<aa>c<a>d<a><b>r<a>"
+right_match             : {"a", 13, 1}
+first_match             : {"a", 1, 1}
+
+> "abbraacadabrax"
+all matches             : [{"a", 0, 1}, {"b", 1, 1}, {"b", 2, 1}, {"aa", 4, 2}, {"a", 7, 1}, {"a", 9, 1}, {"b", 10, 1}, {"a", 12, 1}]
+replace_first with `<>' : "<>bbraacadabrax"
+replace_all with `<>'   : "<><><>r<>c<>d<><>r<>x"
+change_first to `<&>'   : "<a>bbraacadabrax"
+change_all to `<&>'     : "<a><b><b>r<aa>c<a>d<a><b>r<a>x"
+left_match              : {"a", 0, 1}
+first_match             : {"a", 0, 1}
+
+> "foo"
+all matches             : []
diff -u tests/test_regex.in tests/test_regex.in
--- tests/test_regex.in	28 Nov 2002 02:14:06 -0000
+++ tests/test_regex.in	29 Nov 2002 03:12:04 -0000
@@ -121,0 +121,36 @@
+
+set_regex("a|b").
+try_match("abracadabra").
+try_match("xabracadabra").
+try_match("abracadabrax").
+try_match("foo").
+
+set_regex("(a|b)").
+try_match("abracadabra").
+try_match("xabracadabra").
+try_match("abracadabrax").
+try_match("foo").
+
+set_regex("a|(b|c)").
+try_match("abracadabra").
+try_match("xabracadabra").
+try_match("abracadabrax").
+try_match("foo").
+
+set_regex("(a|(b|c))").
+try_match("abracadabra").
+try_match("xabracadabra").
+try_match("abracadabrax").
+try_match("foo").
+
+set_regex("a|b+").
+try_match("abbracadabra").
+try_match("xabbracadabra").
+try_match("abbracadabrax").
+try_match("foo").
+
+set_regex("a+|b").
+try_match("abbraacadabra").
+try_match("xabbraacadabra").
+try_match("abbraacadabrax").
+try_match("foo").
diff -u tests/test_regex.m tests/test_regex.m
--- tests/test_regex.m	28 Nov 2002 02:38:37 -0000
+++ tests/test_regex.m	29 Nov 2002 03:09:16 -0000
@@ -77,21 +77,21 @@
               else true
             ),
             ( if exact_match(R, S) then
-                io__format("exact match\n", [], !IO)
+                io__format("exact_match\n", [], !IO)
               else true
             ),
             ( if left_match(R, S, LSub, LS, LC) then
-                io__format("left match              : {\"%s\", %d, %d}\n",
+                io__format("left_match              : {\"%s\", %d, %d}\n",
                         [s(LSub), i(LS), i(LC)], !IO)
               else true
             ),
             ( if right_match(R, S, RSub, RS, RC) then
-                io__format("right match             : {\"%s\", %d, %d}\n",
+                io__format("right_match             : {\"%s\", %d, %d}\n",
                         [s(RSub), i(RS), i(RC)], !IO)
               else true
             ),
             ( if first_match(R, S, FSub, FS, FC) then
-                io__format("first match             : {\"%s\", %d, %d}\n",
+                io__format("first_match             : {\"%s\", %d, %d}\n",
                         [s(FSub), i(FS), i(FC)], !IO)
               else true
             ),
only in patch2:
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ tests/cmp_regex_gawk	29 Nov 2002 03:16:18 -0000
@@ -0,0 +1,72 @@
+#! /usr/bin/gawk -f
+
+# This is a gawk program that attempts to mirror what test_regex does
+# for global search and replace.  It scans the files test_regex.in
+# and test_regex.exp and tests for differences between the results
+# obtained by gawk and those obtained by regex.  It exits with a non-zero
+# return code if any differences are detected.
+
+BEGIN {
+	IN    = "test_regex.in"
+	EXP   = "test_regex.exp"
+	regex = ""
+
+	while(getline lineIN < IN) {
+
+		if(lineIN ~ "set_regex") {
+			regexp = strip(lineIN)
+		}
+		else if(lineIN ~ "try_match") {
+
+			string = strip(lineIN)
+			gawk   = string
+			gsub(regexp, "<&>", gawk)
+
+			while(getline lineEXP < EXP) {
+
+				if(lineEXP ~ "change_all") {
+					regex = strip(lineEXP)
+					if(regex != gawk) {
+						print "pattern \"" regexp "\""
+						print "string  \"" string "\""
+						print "regex   \"" regex "\""
+						print "gawk    \"" gawk "\""
+						print ""
+
+						failed = 1
+					}
+
+					break
+				}
+				else if(lineEXP ~ "^all matches *: \\[\\]$") {
+					if(gawk != string) {
+						print "pattern \"" regexp "\""
+						print "string  \"" string "\""
+						print "regex finds no match"
+						print "gawk    \"" gawk "\""
+						print ""
+
+						failed = 1
+					}
+
+					while(getline lineEXP < EXP) {
+						if(lineEXP ~ "") { break }
+					}
+
+					break
+				}
+			}
+		}
+	}
+
+	exit failed
+}
+
+	# Remove the outermost level of quotation from a string.
+	#
+function strip(l) {
+	sub(/^[^\"]*\"/, "",   l)
+	sub(/\"[^\"]*$/, "",   l)
+	gsub(/\\\\/,     "\\", l)
+	return l
+}
--------------------------------------------------------------------------
mercury-reviews mailing list
post:  mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe:   Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------



More information about the reviews mailing list