[m-rev.] For review: updates to string.m

Ralph Becket rafe at cs.mu.OZ.AU
Fri Jun 28 11:59:01 AEST 2002


Some updates and additions to string.m.

Estimated hours taken: 4
Branches: main

library/string.m:
	Added pred string__suffix/2.
	Added funcs string__fold[lr]_substring/5.
	Added preds string__fold[lr]_substring/6.
	Added func string__foldr/3.
	Added pred string__foldr/4.
	Added func string__words/1.
	Recoded string__prefix for efficiency.
	Recoded string__sub_string_search_2 for efficiency.
	Minor cosmetic changes to separator comments.

NEWS:
	Mention the above changes.

Index: NEWS
===================================================================
RCS file: /home/mercury1/repository/mercury/NEWS,v
retrieving revision 1.259
diff -u -r1.259 NEWS
--- NEWS	30 May 2002 12:54:51 -0000	1.259
+++ NEWS	28 Jun 2002 01:58:09 -0000
@@ -181,6 +181,9 @@
   to update a constant string will cause a crash. Fixing this properly
   will be a lot of work, so for now we have just removed the modes.
 
+* We've added string__suffix, string__words/1, string__foldr,
+  string__foldl_substring and string__foldr_substring.
+
 * The exception module has a new predicate `try_store', which is
   like `try_io', but which works with stores rather than io__states.
 
Index: string.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/string.m,v
retrieving revision 1.172
diff -u -r1.172 string.m
--- string.m	24 Jun 2002 11:09:06 -0000	1.172
+++ string.m	27 Jun 2002 01:59:02 -0000
@@ -63,6 +63,12 @@
 	% string__prefix(String, Prefix) is true iff Prefix is a
 	% prefix of String.  Same as string__append(Prefix, _, String).
 
+:- pred string__suffix(string, string).
+:- mode string__suffix(in, in) is semidet.
+:- mode string__suffix(in, out) is multi.
+	% string__suffix(String, Suffix) is true iff Suffix is a
+	% suffix of String.  Same as string__append(_, Suffix, String).
+
 :- func string__char_to_string(char) = string.
 :- pred string__char_to_string(char, string).
 :- mode string__char_to_string(in, out) is det.
@@ -309,6 +315,48 @@
 %		list__foldl(Closure, Chars, Acc0, Acc)
 %	but is implemented more efficiently.)
 
+:- func string__foldl_substring(func(char, T) = T, string, int, int, T) = T.
+:- pred string__foldl_substring(pred(char, T, T), string, int, int, T, T).
+:- mode string__foldl_substring(pred(in, in, out) is det, in, in, in,
+		in, out) is det.
+:- mode string__foldl_substring(pred(in, di, uo) is det, in, in, in,
+		di, uo) is det.
+:- mode string__foldl_substring(pred(in, in, out) is semidet, in, in, in,
+		in, out) is semidet.
+:- mode string__foldl_substring(pred(in, in, out) is nondet, in, in, in,
+		in, out) is nondet.
+:- mode string__foldl_substring(pred(in, in, out) is multi, in, in, in,
+		in, out) is multi.
+%	string__foldl_substring(Closure, String, Start, Count, Acc0, Acc)
+%	is equivalent to string__foldl(Closure, SubString, Acc0, Acc)
+%	where SubString = string__substring(String, Start, Count).
+
+:- func string__foldr(func(char, T) = T, string, T) = T.
+:- pred string__foldr(pred(char, T, T), string, T, T).
+:- mode string__foldr(pred(in, in, out) is det, in, in, out) is det.
+:- mode string__foldr(pred(in, di, uo) is det, in, di, uo) is det.
+:- mode string__foldr(pred(in, in, out) is semidet, in, in, out) is semidet.
+:- mode string__foldr(pred(in, in, out) is nondet, in, in, out) is nondet.
+:- mode string__foldr(pred(in, in, out) is multi, in, in, out) is multi.
+%	string__foldr(Closure, String, Acc0, Acc):
+%	As string__foldl/4, except that processing proceeds right-to-left.
+
+:- func string__foldr_substring(func(char, T) = T, string, int, int, T) = T.
+:- pred string__foldr_substring(pred(char, T, T), string, int, int, T, T).
+:- mode string__foldr_substring(pred(in, in, out) is det, in, in, in,
+		in, out) is det.
+:- mode string__foldr_substring(pred(in, di, uo) is det, in, in, in,
+		di, uo) is det.
+:- mode string__foldr_substring(pred(in, in, out) is semidet, in, in, in,
+		in, out) is semidet.
+:- mode string__foldr_substring(pred(in, in, out) is nondet, in, in, in,
+		in, out) is nondet.
+:- mode string__foldr_substring(pred(in, in, out) is multi, in, in, in,
+		in, out) is multi.
+%	string__foldr_substring(Closure, String, Start, Count, Acc0, Acc)
+%	is equivalent to string__foldr(Closure, SubString, Acc0, Acc)
+%	where SubString = string__unsafe_substring(String, Start, Count).
+
 :- func string__words(pred(char), string) = list(string).
 :- mode string__words(pred(in) is semidet, in) = out is det.
 %	string__words(SepP, String) returns the list of
@@ -319,6 +367,9 @@
 %	string__words(char__is_whitespace, " the cat  sat on the  mat") =
 %		["the", "cat", "sat", "on", "the", "mat"]
 
+:- func string__words(string) = list(string).
+%	string__words(String) = string__words(char__is_whitespace, String).
+
 :- pred string__split(string, int, string, string).
 :- mode string__split(in, in, out, out) is det.
 %	string__split(String, Count, LeftSubstring, RightSubstring):
@@ -554,13 +605,13 @@
 	string__index(String, 0, Char),
 	Len = string__length(String),
 	(      if Char = ('-') then
-		string__foldl2(accumulate_int(Base), String, 1, Len, 0, N),
+		foldl_substring(accumulate_int(Base), String, 1, Len - 1, 0, N),
 		Int = -N
 	  else if Char = ('+') then
-		string__foldl2(accumulate_int(Base), String, 1, Len, 0, N),
+		foldl_substring(accumulate_int(Base), String, 1, Len - 1, 0, N),
 		Int = N
 	  else
-		string__foldl2(accumulate_int(Base), String, 0, Len, 0, N),
+		foldl_substring(accumulate_int(Base), String, 0, Len, 0, N),
 		Int = N
 	).
 
@@ -589,30 +640,38 @@
 
 string__foldl(Closure, String, Acc0, Acc) :-
 	string__length(String, Length),
-	string__foldl2(Closure, String, 0, Length, Acc0, Acc).
+	string__foldl_substring(Closure, String, 0, Length, Acc0, Acc).
 
-:- pred string__foldl2(pred(char, T, T), string, int, int, T, T).
-:- mode string__foldl2(pred(in, in, out) is det, in, in, in, in, out) is det.
-:- mode string__foldl2(pred(in, di, uo) is det, in, in, in, di, uo) is det.
-:- mode string__foldl2(pred(in, in, out) is semidet, in, in, in, in, out)
-		is semidet.
-:- mode string__foldl2(pred(in, in, out) is nondet, in, in, in, in, out)
-		is nondet.
-:- mode string__foldl2(pred(in, in, out) is multi, in, in, in, in, out)
-		is multi.
-
-string__foldl2(Closure, String, N, Max, Acc0, Acc) :-
-	(
-		N >= Max
-	->
-		Acc = Acc0
-	;
-		string__unsafe_index(String, N, Char),
-		call(Closure, Char, Acc0, Acc1),
-		N1 is N + 1,
-		string__foldl2(Closure, String, N1, Max, Acc1, Acc)
+string__foldl_substring(Closure, String, I, Count, Acc0, Acc) :-
+	( if 0 < Count then
+		Closure(string__unsafe_index(String, I), Acc0, Acc1),
+		string__foldl_substring(Closure, String, I + 1, Count - 1,
+			Acc1, Acc)
+	  else
+	  	Acc = Acc0
 	).
 
+string__foldr(F, String, Acc0) = Acc :-
+	Closure = ( pred(X::in, Y::in, Z::out) is det :- Z = F(X, Y)),
+	string__foldr(Closure, String, Acc0, Acc).
+
+string__foldr_substring(F, String, Start, Count, Acc0) = Acc :-
+	Closure = ( pred(X::in, Y::in, Z::out) is det :- Z = F(X, Y) ),
+	string__foldr_substring(Closure, String, Start, Count, Acc0, Acc).
+
+string__foldr(Closure, String, Acc0, Acc) :-
+	string__foldr_substring(Closure, String, 0, length(String), Acc0, Acc).
+
+string__foldr_substring(Closure, String, I, Count, Acc0, Acc) :-
+	( if 0 < Count then
+		Closure(string__unsafe_index(String, I + Count - 1),
+			Acc0, Acc1),
+		string__foldr_substring(Closure, String, I, Count - 1,
+			Acc1, Acc )
+	  else
+	  	Acc = Acc0
+	).
+  		
 string__left(String, Count, LeftString) :-
 	string__split(String, Count, LeftString, _RightString).
 
@@ -627,8 +686,73 @@
 	string__to_char_list(C, LC),
 	list__remove_suffix(LA, LB, LC).
 
-string__prefix(String, Prefix) :-
-	string__append(Prefix, _, String).
+:- pragma promise_pure(string__prefix/2).
+
+string__prefix(String::in, Prefix::in) :-
+	Len    = length(String),
+	PreLen = length(Prefix),
+	PreLen =< Len,
+	prefix_2_iii(String, Prefix, PreLen - 1).
+
+:- pred prefix_2_iii(string, string, int).
+:- mode prefix_2_iii(in, in, in) is semidet.
+
+prefix_2_iii(String, Prefix, I) :-
+	( if 0 =< I then
+		(String `unsafe_index` I) =
+			(Prefix `unsafe_index` I) `with_type` char,
+		prefix_2_iii(String, Prefix, I - 1)
+	  else
+	  	true
+	).
+
+string__prefix(String::in, Prefix::out) :-
+	Len = length(String),
+	prefix_2_ioii(String, Prefix, 0, Len).
+
+:- pred prefix_2_ioii(string, string, int, int).
+:- mode prefix_2_ioii(in, out, in, in) is multi.
+
+prefix_2_ioii(String, Prefix, PreLen, _Len) :-
+	Prefix = unsafe_substring(String, 0, PreLen).
+
+prefix_2_ioii(String, Prefix, PreLen, Len) :-
+	PreLen < Len,
+	prefix_2_ioii(String, Prefix, PreLen + 1, Len).
+
+:- pragma promise_pure(string__suffix/2).
+
+string__suffix(String::in, Suffix::in) :-
+	Len    = length(String),
+	PreLen = length(Suffix),
+	PreLen =< Len,
+	suffix_2_iiii(String, Suffix, Len - PreLen, Len).
+
+:- pred suffix_2_iiii(string, string, int, int).
+:- mode suffix_2_iiii(in, in, in, in) is semidet.
+
+suffix_2_iiii(String, Suffix, I, Len) :-
+	( if I < Len then
+		(String `unsafe_index` I) =
+			(Suffix `unsafe_index` I) `with_type` char,
+		suffix_2_iiii(String, Suffix, I + 1, Len)
+	  else
+	  	true
+	).
+
+string__suffix(String::in, Suffix::out) :-
+	Len = length(String),
+	suffix_2_ioii(String, Suffix, 0, Len).
+
+:- pred suffix_2_ioii(string, string, int, int).
+:- mode suffix_2_ioii(in, out, in, in) is multi.
+
+suffix_2_ioii(String, Suffix, SufLen, Len) :-
+	Suffix = unsafe_substring(String, Len - SufLen, SufLen).
+
+suffix_2_ioii(String, Suffix, SufLen, Len) :-
+	SufLen < Len,
+	suffix_2_ioii(String, Suffix, SufLen + 1, Len).
 
 string__char_to_string(Char, String) :-
 	string__to_char_list(String, [Char]).
@@ -1171,18 +1295,22 @@
 }").
 
 string__sub_string_search(String, SubString, Index) :-
-	string__sub_string_search_2(String, SubString, 0, Index).
+	sub_string_search_2(String, SubString, 0, length(String), Index).
 
-:- pred sub_string_search_2(string::in, string::in,
-		int::in, int::out) is semidet.
+	% Brute force string searching.  For short Strings this is
+	% good; for longer strings Boyer-Moore is much better.
+	%
+:- pred sub_string_search_2(string::in, string::in, int::in, int::in,
+		int::out) is semidet.
 
-	% XXX This is very inefficient.
-sub_string_search_2(String, SubString, CurrentIndex, Index) :-
-	( string__prefix(String, SubString) ->
-		Index = CurrentIndex
-	;
-		string__first_char(String, _, Rest),
-		sub_string_search_2(Rest, SubString, CurrentIndex + 1, Index)
+sub_string_search_2(String, SubString, I, Length, Index) :-
+	( if
+		I < Length,
+		string__prefix(String, SubString)
+	  then
+	  	Index = I
+	  else
+	  	sub_string_search_2(String, SubString, I + 1, Length, Index)
 	).
 
 %-----------------------------------------------------------------------------%
@@ -2644,6 +2772,10 @@
 	P = ( pred(X::in, Y::in, Z::out) is det :- Z = F(X, Y) ),
 	string__foldl(P, S, A, B).
 
+string__foldl_substring(F, S, Start, Count, A) = B :-
+	P = ( pred(X::in, Y::in, Z::out) is det :- Z = F(X, Y) ),
+	string__foldl_substring(P, S, Start, Count, A, B).
+
 string__left(S1, N) = S2 :-
 	string__left(S1, N, S2).
 
@@ -2662,13 +2794,13 @@
 string__format(S1, PT) = S2 :-
 	string__format(S1, PT, S2).
 
-% ---------------------------------------------------------------------------- %
+%------------------------------------------------------------------------------%
 
 string__words(SepP, String) = Words :-
 	I = preceding_boundary(isnt(SepP), String, string__length(String) - 1),
 	Words = words_2(SepP, String, I, []).
 
-% ---------------------------------------------------------------------------- %
+%------------------------------------------------------------------------------%
 
 :- func words_2(pred(char), string, int, list(string)) = list(string).
 :- mode words_2(pred(in) is semidet, in, in, in) = out is det.
@@ -2684,7 +2816,11 @@
 		Words = words_2(SepP, String, PrevWordEnd, [Word | Words0])
 	).
 
-% ---------------------------------------------------------------------------- %
+%------------------------------------------------------------------------------%
+
+string__words(String) = string__words(char__is_whitespace, String).
+
+%------------------------------------------------------------------------------%
 
 	% preceding_boundary(SepP, String, I) returns the largest index J =< I
 	% in String of the char that is SepP and min(-1, I) if there is no
@@ -2704,15 +2840,15 @@
 		preceding_boundary(SepP, String, I - 1)
 	).
 
-% ---------------------------------------------------------------------------- %
+%------------------------------------------------------------------------------%
 
 S1 ++ S2 = string__append(S1, S2).
 
-% ---------------------------------------------------------------------------- %
+%------------------------------------------------------------------------------%
 
 string__det_to_int(S) = string__det_base_string_to_int(10, S).
 
-% ---------------------------------------------------------------------------- %
+%------------------------------------------------------------------------------%
 
 string__det_base_string_to_int(Base, S) = N :-
 	( if string__base_string_to_int(Base, S, N0) then
@@ -2721,9 +2857,9 @@
 	  	error("string__det_base_string_to_int/2: conversion failed")
 	).
 
-% ---------------------------------------------------------------------------- %
+%------------------------------------------------------------------------------%
 
 :- end_module string.
 
-% ---------------------------------------------------------------------------- %
-% ---------------------------------------------------------------------------- %
+%------------------------------------------------------------------------------%
+%------------------------------------------------------------------------------%
--------------------------------------------------------------------------
mercury-reviews mailing list
post:  mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe:   Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------



More information about the reviews mailing list