[m-rev.] For review: updates to string.m
Ralph Becket
rafe at cs.mu.OZ.AU
Fri Jun 28 11:59:01 AEST 2002
Some updates and additions to string.m.
Estimated hours taken: 4
Branches: main
library/string.m:
Added pred string__suffix/2.
Added funcs string__fold[lr]_substring/5.
Added preds string__fold[lr]_substring/6.
Added func string__foldr/3.
Added pred string__foldr/4.
Added func string__words/1.
Recoded string__prefix for efficiency.
Recoded string__sub_string_search_2 for efficiency.
Minor cosmetic changes to separator comments.
NEWS:
Mention the above changes.
Index: NEWS
===================================================================
RCS file: /home/mercury1/repository/mercury/NEWS,v
retrieving revision 1.259
diff -u -r1.259 NEWS
--- NEWS 30 May 2002 12:54:51 -0000 1.259
+++ NEWS 28 Jun 2002 01:58:09 -0000
@@ -181,6 +181,9 @@
to update a constant string will cause a crash. Fixing this properly
will be a lot of work, so for now we have just removed the modes.
+* We've added string__suffix, string__words/1, string__foldr,
+ string__foldl_substring and string__foldr_substring.
+
* The exception module has a new predicate `try_store', which is
like `try_io', but which works with stores rather than io__states.
Index: string.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/string.m,v
retrieving revision 1.172
diff -u -r1.172 string.m
--- string.m 24 Jun 2002 11:09:06 -0000 1.172
+++ string.m 27 Jun 2002 01:59:02 -0000
@@ -63,6 +63,12 @@
% string__prefix(String, Prefix) is true iff Prefix is a
% prefix of String. Same as string__append(Prefix, _, String).
+:- pred string__suffix(string, string).
+:- mode string__suffix(in, in) is semidet.
+:- mode string__suffix(in, out) is multi.
+ % string__suffix(String, Suffix) is true iff Suffix is a
+ % suffix of String. Same as string__append(_, Suffix, String).
+
:- func string__char_to_string(char) = string.
:- pred string__char_to_string(char, string).
:- mode string__char_to_string(in, out) is det.
@@ -309,6 +315,48 @@
% list__foldl(Closure, Chars, Acc0, Acc)
% but is implemented more efficiently.)
+:- func string__foldl_substring(func(char, T) = T, string, int, int, T) = T.
+:- pred string__foldl_substring(pred(char, T, T), string, int, int, T, T).
+:- mode string__foldl_substring(pred(in, in, out) is det, in, in, in,
+ in, out) is det.
+:- mode string__foldl_substring(pred(in, di, uo) is det, in, in, in,
+ di, uo) is det.
+:- mode string__foldl_substring(pred(in, in, out) is semidet, in, in, in,
+ in, out) is semidet.
+:- mode string__foldl_substring(pred(in, in, out) is nondet, in, in, in,
+ in, out) is nondet.
+:- mode string__foldl_substring(pred(in, in, out) is multi, in, in, in,
+ in, out) is multi.
+% string__foldl_substring(Closure, String, Start, Count, Acc0, Acc)
+% is equivalent to string__foldl(Closure, SubString, Acc0, Acc)
+% where SubString = string__substring(String, Start, Count).
+
+:- func string__foldr(func(char, T) = T, string, T) = T.
+:- pred string__foldr(pred(char, T, T), string, T, T).
+:- mode string__foldr(pred(in, in, out) is det, in, in, out) is det.
+:- mode string__foldr(pred(in, di, uo) is det, in, di, uo) is det.
+:- mode string__foldr(pred(in, in, out) is semidet, in, in, out) is semidet.
+:- mode string__foldr(pred(in, in, out) is nondet, in, in, out) is nondet.
+:- mode string__foldr(pred(in, in, out) is multi, in, in, out) is multi.
+% string__foldr(Closure, String, Acc0, Acc):
+% As string__foldl/4, except that processing proceeds right-to-left.
+
+:- func string__foldr_substring(func(char, T) = T, string, int, int, T) = T.
+:- pred string__foldr_substring(pred(char, T, T), string, int, int, T, T).
+:- mode string__foldr_substring(pred(in, in, out) is det, in, in, in,
+ in, out) is det.
+:- mode string__foldr_substring(pred(in, di, uo) is det, in, in, in,
+ di, uo) is det.
+:- mode string__foldr_substring(pred(in, in, out) is semidet, in, in, in,
+ in, out) is semidet.
+:- mode string__foldr_substring(pred(in, in, out) is nondet, in, in, in,
+ in, out) is nondet.
+:- mode string__foldr_substring(pred(in, in, out) is multi, in, in, in,
+ in, out) is multi.
+% string__foldr_substring(Closure, String, Start, Count, Acc0, Acc)
+% is equivalent to string__foldr(Closure, SubString, Acc0, Acc)
+% where SubString = string__unsafe_substring(String, Start, Count).
+
:- func string__words(pred(char), string) = list(string).
:- mode string__words(pred(in) is semidet, in) = out is det.
% string__words(SepP, String) returns the list of
@@ -319,6 +367,9 @@
% string__words(char__is_whitespace, " the cat sat on the mat") =
% ["the", "cat", "sat", "on", "the", "mat"]
+:- func string__words(string) = list(string).
+% string__words(String) = string__words(char__is_whitespace, String).
+
:- pred string__split(string, int, string, string).
:- mode string__split(in, in, out, out) is det.
% string__split(String, Count, LeftSubstring, RightSubstring):
@@ -554,13 +605,13 @@
string__index(String, 0, Char),
Len = string__length(String),
( if Char = ('-') then
- string__foldl2(accumulate_int(Base), String, 1, Len, 0, N),
+ foldl_substring(accumulate_int(Base), String, 1, Len - 1, 0, N),
Int = -N
else if Char = ('+') then
- string__foldl2(accumulate_int(Base), String, 1, Len, 0, N),
+ foldl_substring(accumulate_int(Base), String, 1, Len - 1, 0, N),
Int = N
else
- string__foldl2(accumulate_int(Base), String, 0, Len, 0, N),
+ foldl_substring(accumulate_int(Base), String, 0, Len, 0, N),
Int = N
).
@@ -589,30 +640,38 @@
string__foldl(Closure, String, Acc0, Acc) :-
string__length(String, Length),
- string__foldl2(Closure, String, 0, Length, Acc0, Acc).
+ string__foldl_substring(Closure, String, 0, Length, Acc0, Acc).
-:- pred string__foldl2(pred(char, T, T), string, int, int, T, T).
-:- mode string__foldl2(pred(in, in, out) is det, in, in, in, in, out) is det.
-:- mode string__foldl2(pred(in, di, uo) is det, in, in, in, di, uo) is det.
-:- mode string__foldl2(pred(in, in, out) is semidet, in, in, in, in, out)
- is semidet.
-:- mode string__foldl2(pred(in, in, out) is nondet, in, in, in, in, out)
- is nondet.
-:- mode string__foldl2(pred(in, in, out) is multi, in, in, in, in, out)
- is multi.
-
-string__foldl2(Closure, String, N, Max, Acc0, Acc) :-
- (
- N >= Max
- ->
- Acc = Acc0
- ;
- string__unsafe_index(String, N, Char),
- call(Closure, Char, Acc0, Acc1),
- N1 is N + 1,
- string__foldl2(Closure, String, N1, Max, Acc1, Acc)
+string__foldl_substring(Closure, String, I, Count, Acc0, Acc) :-
+ ( if 0 < Count then
+ Closure(string__unsafe_index(String, I), Acc0, Acc1),
+ string__foldl_substring(Closure, String, I + 1, Count - 1,
+ Acc1, Acc)
+ else
+ Acc = Acc0
).
+string__foldr(F, String, Acc0) = Acc :-
+ Closure = ( pred(X::in, Y::in, Z::out) is det :- Z = F(X, Y)),
+ string__foldr(Closure, String, Acc0, Acc).
+
+string__foldr_substring(F, String, Start, Count, Acc0) = Acc :-
+ Closure = ( pred(X::in, Y::in, Z::out) is det :- Z = F(X, Y) ),
+ string__foldr_substring(Closure, String, Start, Count, Acc0, Acc).
+
+string__foldr(Closure, String, Acc0, Acc) :-
+ string__foldr_substring(Closure, String, 0, length(String), Acc0, Acc).
+
+string__foldr_substring(Closure, String, I, Count, Acc0, Acc) :-
+ ( if 0 < Count then
+ Closure(string__unsafe_index(String, I + Count - 1),
+ Acc0, Acc1),
+ string__foldr_substring(Closure, String, I, Count - 1,
+ Acc1, Acc )
+ else
+ Acc = Acc0
+ ).
+
string__left(String, Count, LeftString) :-
string__split(String, Count, LeftString, _RightString).
@@ -627,8 +686,73 @@
string__to_char_list(C, LC),
list__remove_suffix(LA, LB, LC).
-string__prefix(String, Prefix) :-
- string__append(Prefix, _, String).
+:- pragma promise_pure(string__prefix/2).
+
+string__prefix(String::in, Prefix::in) :-
+ Len = length(String),
+ PreLen = length(Prefix),
+ PreLen =< Len,
+ prefix_2_iii(String, Prefix, PreLen - 1).
+
+:- pred prefix_2_iii(string, string, int).
+:- mode prefix_2_iii(in, in, in) is semidet.
+
+prefix_2_iii(String, Prefix, I) :-
+ ( if 0 =< I then
+ (String `unsafe_index` I) =
+ (Prefix `unsafe_index` I) `with_type` char,
+ prefix_2_iii(String, Prefix, I - 1)
+ else
+ true
+ ).
+
+string__prefix(String::in, Prefix::out) :-
+ Len = length(String),
+ prefix_2_ioii(String, Prefix, 0, Len).
+
+:- pred prefix_2_ioii(string, string, int, int).
+:- mode prefix_2_ioii(in, out, in, in) is multi.
+
+prefix_2_ioii(String, Prefix, PreLen, _Len) :-
+ Prefix = unsafe_substring(String, 0, PreLen).
+
+prefix_2_ioii(String, Prefix, PreLen, Len) :-
+ PreLen < Len,
+ prefix_2_ioii(String, Prefix, PreLen + 1, Len).
+
+:- pragma promise_pure(string__suffix/2).
+
+string__suffix(String::in, Suffix::in) :-
+ Len = length(String),
+ PreLen = length(Suffix),
+ PreLen =< Len,
+ suffix_2_iiii(String, Suffix, Len - PreLen, Len).
+
+:- pred suffix_2_iiii(string, string, int, int).
+:- mode suffix_2_iiii(in, in, in, in) is semidet.
+
+suffix_2_iiii(String, Suffix, I, Len) :-
+ ( if I < Len then
+ (String `unsafe_index` I) =
+ (Suffix `unsafe_index` I) `with_type` char,
+ suffix_2_iiii(String, Suffix, I + 1, Len)
+ else
+ true
+ ).
+
+string__suffix(String::in, Suffix::out) :-
+ Len = length(String),
+ suffix_2_ioii(String, Suffix, 0, Len).
+
+:- pred suffix_2_ioii(string, string, int, int).
+:- mode suffix_2_ioii(in, out, in, in) is multi.
+
+suffix_2_ioii(String, Suffix, SufLen, Len) :-
+ Suffix = unsafe_substring(String, Len - SufLen, SufLen).
+
+suffix_2_ioii(String, Suffix, SufLen, Len) :-
+ SufLen < Len,
+ suffix_2_ioii(String, Suffix, SufLen + 1, Len).
string__char_to_string(Char, String) :-
string__to_char_list(String, [Char]).
@@ -1171,18 +1295,22 @@
}").
string__sub_string_search(String, SubString, Index) :-
- string__sub_string_search_2(String, SubString, 0, Index).
+ sub_string_search_2(String, SubString, 0, length(String), Index).
-:- pred sub_string_search_2(string::in, string::in,
- int::in, int::out) is semidet.
+ % Brute force string searching. For short Strings this is
+ % good; for longer strings Boyer-Moore is much better.
+ %
+:- pred sub_string_search_2(string::in, string::in, int::in, int::in,
+ int::out) is semidet.
- % XXX This is very inefficient.
-sub_string_search_2(String, SubString, CurrentIndex, Index) :-
- ( string__prefix(String, SubString) ->
- Index = CurrentIndex
- ;
- string__first_char(String, _, Rest),
- sub_string_search_2(Rest, SubString, CurrentIndex + 1, Index)
+sub_string_search_2(String, SubString, I, Length, Index) :-
+ ( if
+ I < Length,
+ string__prefix(String, SubString)
+ then
+ Index = I
+ else
+ sub_string_search_2(String, SubString, I + 1, Length, Index)
).
%-----------------------------------------------------------------------------%
@@ -2644,6 +2772,10 @@
P = ( pred(X::in, Y::in, Z::out) is det :- Z = F(X, Y) ),
string__foldl(P, S, A, B).
+string__foldl_substring(F, S, Start, Count, A) = B :-
+ P = ( pred(X::in, Y::in, Z::out) is det :- Z = F(X, Y) ),
+ string__foldl_substring(P, S, Start, Count, A, B).
+
string__left(S1, N) = S2 :-
string__left(S1, N, S2).
@@ -2662,13 +2794,13 @@
string__format(S1, PT) = S2 :-
string__format(S1, PT, S2).
-% ---------------------------------------------------------------------------- %
+%------------------------------------------------------------------------------%
string__words(SepP, String) = Words :-
I = preceding_boundary(isnt(SepP), String, string__length(String) - 1),
Words = words_2(SepP, String, I, []).
-% ---------------------------------------------------------------------------- %
+%------------------------------------------------------------------------------%
:- func words_2(pred(char), string, int, list(string)) = list(string).
:- mode words_2(pred(in) is semidet, in, in, in) = out is det.
@@ -2684,7 +2816,11 @@
Words = words_2(SepP, String, PrevWordEnd, [Word | Words0])
).
-% ---------------------------------------------------------------------------- %
+%------------------------------------------------------------------------------%
+
+string__words(String) = string__words(char__is_whitespace, String).
+
+%------------------------------------------------------------------------------%
% preceding_boundary(SepP, String, I) returns the largest index J =< I
% in String of the char that is SepP and min(-1, I) if there is no
@@ -2704,15 +2840,15 @@
preceding_boundary(SepP, String, I - 1)
).
-% ---------------------------------------------------------------------------- %
+%------------------------------------------------------------------------------%
S1 ++ S2 = string__append(S1, S2).
-% ---------------------------------------------------------------------------- %
+%------------------------------------------------------------------------------%
string__det_to_int(S) = string__det_base_string_to_int(10, S).
-% ---------------------------------------------------------------------------- %
+%------------------------------------------------------------------------------%
string__det_base_string_to_int(Base, S) = N :-
( if string__base_string_to_int(Base, S, N0) then
@@ -2721,9 +2857,9 @@
error("string__det_base_string_to_int/2: conversion failed")
).
-% ---------------------------------------------------------------------------- %
+%------------------------------------------------------------------------------%
:- end_module string.
-% ---------------------------------------------------------------------------- %
-% ---------------------------------------------------------------------------- %
+%------------------------------------------------------------------------------%
+%------------------------------------------------------------------------------%
--------------------------------------------------------------------------
mercury-reviews mailing list
post: mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------
More information about the reviews
mailing list