[m-rev.] for review: improve string__replace and string__replace_all

Peter Ross pro at missioncriticalit.com
Fri Jun 25 22:03:39 AEST 2004


Hi,

For anyone to review.

One question what should the result of 
    string__replace("aaa bbbb ccccc aaa", "", "**", Result) 
be?

Currently it is inconsistent between replace and replace_all.


===================================================================


Improve the time, space and stack usage of string__replace and
string__replace_all.

library/string.m:
	Reimplement string__replace_all and string__replace using
	string__sub_string_search.
	Make string__replace_all tail recursive.
	Add a new version of string__sub_string_search which takes an
	index to begin searching from.
	Remove an infinite loop in string__sub_string_search_2.

	
tests/general/Mmakefile:
tests/general/string_replace.exp:
tests/general/string_replace.m:
	Test string__replace and string__replace_all.


Index: library/string.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/string.m,v
retrieving revision 1.215
diff -u -r1.215 string.m
--- library/string.m	18 Mar 2004 03:51:01 -0000	1.215
+++ library/string.m	25 Jun 2004 11:49:33 -0000
@@ -520,6 +520,13 @@
 :- pred string__sub_string_search(string, string, int).
 :- mode string__sub_string_search(in, in, out) is semidet.
 
+	% string__sub_string_search(String, SubString, BeginAt, Index).
+	% `Index' is the position in `String' where the first occurrence of
+	% `SubString' occurs such that 'Index' is greater than or equal to
+	% `BeginAt'.  Indices start at zero,
+:- pred string__sub_string_search(string, string, int, int).
+:- mode string__sub_string_search(in, in, in, out) is semidet.
+
 	% A function similar to sprintf() in C.
 	%
 	% For example,
@@ -589,83 +596,42 @@
 :- import_module bool, integer, std_util, int, float, array, require.
 :- use_module term_io, type_desc, rtti_implementation.
 
-string__replace(String, SubString0, SubString1, StringOut) :-
-	string__to_char_list(String, CharList),
-	string__to_char_list(SubString0, SubCharList0),
-	find_sub_charlist(CharList, SubCharList0, Before, After),
-	string__to_char_list(SubString1, SubCharList1),
-	list__append(Before, SubCharList1, Before0),
-	list__append(Before0, After, CharListOut),
-	string__from_char_list(CharListOut, StringOut).
-
-string__replace_all(String, SubString0, SubString1, StringOut) :-
-	string__to_char_list(String, CharList),
-	string__to_char_list(SubString0, SubCharList0),
-	string__to_char_list(SubString1, SubCharList1),
-	find_all_sub_charlist(CharList, SubCharList0, SubCharList1,
-		CharListOut),
-	string__from_char_list(CharListOut, StringOut).
-
-	% find_all_sub_charlist replaces any occurences of the second list of
-	% characters (in order) in the first list of characters with the second
-	% list of characters.
-:- pred find_all_sub_charlist(list(char)::in, list(char)::in, list(char)::in,
-	list(char)::out) is det.
-
-find_all_sub_charlist(CharList, SubCharList0, SubCharList1, CharList0) :-
-		% find the first occurence
-	( find_sub_charlist(CharList, SubCharList0, BeforeList, AfterList) ->
-		( AfterList = [] ->
-			% at the end
-			list__append(BeforeList, SubCharList1, CharList0)
-		;
-			% recursively find the rest of the occurences
-			find_all_sub_charlist(AfterList, SubCharList0,
-				SubCharList1, AfterList0),
-			list__append(BeforeList, SubCharList1, BeforeList0),
-			list__append(BeforeList0, AfterList0, CharList0)
-		)
-	;
-		% no occurences left
-		CharList0 = CharList
-	).
-
-	% find_sub_charlist(List, SubList, Before, After) is true iff SubList
-	% is a sublist of List, and Before is the list of characters before
-	% SubList in List, and After is the list after it.
-:- pred find_sub_charlist(list(char)::in, list(char)::in, list(char)::out,
-	list(char)::out) is semidet.
+string__replace(Str, Pat, Subst, Result) :-
+	sub_string_search(Str, Pat, Index),
 
-find_sub_charlist(CharList, [], [], CharList).
-find_sub_charlist([C | CharList], [S | SubCharList], Before, After) :-
-	(
-		C = S
-	->
-		(
-			find_rest_of_sub_charlist(CharList, SubCharList, After0)
-		->
-			Before = [],
-			After = After0
-		;
-			find_sub_charlist(CharList, [S | SubCharList], Before0,
-				After0),
-			Before = [C | Before0],
-			After = After0
-
-		)
-	;
-		find_sub_charlist(CharList, [S | SubCharList], Before0, After),
-		Before = [C | Before0]
-	).
-
-	% find_rest_of_sub_charlist(List, SubList, After) is true iff List
-	% begins with all the characters in SubList in order, and end with
-	% After.
-:- pred find_rest_of_sub_charlist(list(char)::in, list(char)::in,
-	list(char)::out) is semidet.
+	Initial = string__unsafe_substring(Str, 0, Index),
+
+	BeginAt = Index + string__length(Pat),
+	Length = string__length(Str) - BeginAt,
+	Final = string__unsafe_substring(Str, BeginAt, Length),
+
+	Result = string__append_list([Initial, Subst, Final]).
+
+string__replace_all(Str, Pat, Subst, Result) :-
+	( Pat = "" ->
+		copy(Str, Result)
+	;
+		PatLength = string__length(Pat),
+		ReversedChunks = replace_all(Str, Pat, Subst, PatLength, 0, []),
+		Chunks = list__reverse(ReversedChunks),
+		Result = string__append_list(Chunks)
+	).
+
+:- func string__replace_all(string, string, string,
+			int, int, list(string)) = list(string).
 
-find_rest_of_sub_charlist(CharList, SubCharList, After) :-
-	list__append(SubCharList, After, CharList).
+string__replace_all(Str, Pat, Subst, PatLength, BeginAt, Result0) = Result :-
+	( sub_string_search(Str, Pat, BeginAt, Index) ->
+		Length = Index - BeginAt,
+		Initial = string__unsafe_substring(Str, BeginAt, Length),
+		Start = Index + PatLength,
+		Result = string__replace_all(Str, Pat, Subst,
+			PatLength, Start, [Subst, Initial | Result0])
+	;
+		Length = string__length(Str) - BeginAt,
+		EndString = string__unsafe_substring(Str, BeginAt, Length),
+		Result = [EndString | Result0]
+	).
 
 string__to_int(String, Int) :-
 	string__base_string_to_int(10, String, Int).
@@ -1265,12 +1231,15 @@
 
 %-----------------------------------------------------------------------------%
 
+string__sub_string_search(WholeString, Pattern, Index) :-
+	sub_string_search(WholeString, Pattern, 0, Index).
+
 :- pragma foreign_proc("C",
-	string__sub_string_search(WholeString::in, SubString::in, Index::out),
+	sub_string_search(WholeString::in, Pattern::in, BeginAt::in, Index::out),
 	[will_not_call_mercury, promise_pure, thread_safe],
 "{
 	char *match;
-	match = strstr(WholeString, SubString);
+	match = strstr(WholeString + BeginAt, Pattern);
 	if (match) {
 		Index = match - WholeString;
 		SUCCESS_INDICATOR = MR_TRUE;
@@ -1280,16 +1249,16 @@
 }").
 
 :- pragma foreign_proc("C#",
-	string__sub_string_search(WholeString::in, SubString::in, Index::out),
+	sub_string_search(WholeString::in, Pattern::in, BeginAt::in, Index::out),
 	[will_not_call_mercury, promise_pure, thread_safe],
 "{
-	Index = WholeString.IndexOf(SubString);
+	Index = WholeString.IndexOf(Pattern, BeginAt);
 	SUCCESS_INDICATOR = (Index >= 0);
 }").
 
 % This is only used if there is no matching foreign_proc definition
-string__sub_string_search(String, SubString, Index) :-
-	sub_string_search_2(String, SubString, 0, length(String),
+sub_string_search(String, SubString, BeginAt, Index) :-
+	sub_string_search_2(String, SubString, BeginAt, length(String),
 		length(SubString), Index).
 
 	% Brute force string searching. For short Strings this is
@@ -1298,14 +1267,14 @@
 	int::out) is semidet.
 
 sub_string_search_2(String, SubString, I, Length, SubLength, Index) :-
+	I < Length,
 	(
-		I < Length,
 		% XXX This is inefficient --
 		% there is no (in, in, in) = in is semidet mode of
-		% string__substring, so this ends up calling the
+		% substring, so this ends up calling the
 		% (in, in, in) = out mode and then doing the unification.
 		% This will create a lot of unnecessary garbage.
-		string__substring(String, I, SubLength) = SubString
+		substring(String, I, SubLength) = SubString
 	->
 		Index = I
 	;
Index: tests/general/Mmakefile
===================================================================
RCS file: /home/mercury1/repository/tests/general/Mmakefile,v
retrieving revision 1.50
diff -u -r1.50 Mmakefile
--- tests/general/Mmakefile	12 Jan 2003 22:33:08 -0000	1.50
+++ tests/general/Mmakefile	25 Jun 2004 11:49:33 -0000
@@ -60,6 +60,7 @@
 		string_format_test \
 		string_format_test_2 \
 		string_format_test_3 \
+		string_replace \
 		string_test \
 		string_test_2 \
 		string_to_float \
Index: tests/general/string_replace.exp
===================================================================
RCS file: tests/general/string_replace.exp
diff -N tests/general/string_replace.exp
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ tests/general/string_replace.exp	25 Jun 2004 11:49:33 -0000
@@ -0,0 +1,24 @@
+string__replace("", "a", "bc", Result) 
+	FAIL!
+string__replace("aaa bbbb ccccc aaa", "aab", "**", Result) 
+	FAIL!
+string__replace("aaa bbbb ccccc aaa", "aaaa", "**", Result) 
+	FAIL!
+string__replace("aaa bbbb ccccc aaa", "", "**", Result) 
+	"**aaa bbbb ccccc aaa"
+string__replace("aaa bbbb ccccc aaa", "aaa", "", Result) 
+	" bbbb ccccc aaa"
+string__replace("aaa bbbb ccccc aaa", "cc", "**", Result) 
+	"aaa bbbb **ccc aaa"
+string__replace_all("", "a", "bc", Result) 
+	""
+string__replace_all("aaa bbbb ccccc aaa", "aab", "**", Result) 
+	"aaa bbbb ccccc aaa"
+string__replace_all("aaa bbbb ccccc aaa", "aaaa", "**", Result) 
+	"aaa bbbb ccccc aaa"
+string__replace_all("aaa bbbb ccccc aaa", "", "**", Result) 
+	"aaa bbbb ccccc aaa"
+string__replace_all("aaa bbbb ccccc aaa", "aaa", "", Result) 
+	" bbbb ccccc "
+string__replace_all("aaa bbbb ccccc aaa", "cc", "**", Result) 
+	"aaa bbbb ****c aaa"
Index: tests/general/string_replace.m
===================================================================
RCS file: tests/general/string_replace.m
diff -N tests/general/string_replace.m
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ tests/general/string_replace.m	25 Jun 2004 11:49:33 -0000
@@ -0,0 +1,59 @@
+%------------------------------------------------------------------------------%
+
+:- module string_replace.
+
+:- interface.
+
+:- import_module io.
+
+:- pred main(io::di, io::uo) is det.
+
+%------------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module list, string.
+
+%------------------------------------------------------------------------------%
+
+main(!IO) :-
+	Str = "aaa bbbb ccccc aaa",
+	Tests = [
+		{"", "a", "bc"},
+
+		{Str, "aab", "**"},
+		{Str, "aaaa", "**"},
+		{Str, "", "**"},
+
+		{Str, "aaa", ""},
+		{Str, "cc", "**"}
+	],
+	list__foldl(test_replace, Tests, !IO),
+	list__foldl(test_replace_all, Tests, !IO).
+
+:- pred test_replace({string, string, string}::in, io::di, io::uo) is det.
+
+test_replace({Str, Pat, Subst}, !IO) :-
+	io__write_string("string__replace(\"" ++ Str ++
+			"\", \"" ++ Pat ++
+			"\", \"" ++ Subst ++ "\", Result) \n\t", !IO),
+	( string__replace(Str, Pat, Subst, Result) ->
+		io__write(Result, !IO),
+		io__nl(!IO)
+	;
+		io__write_string("FAIL!\n", !IO)
+	).
+
+:- pred test_replace_all({string, string, string}::in, io::di, io::uo) is det.
+
+test_replace_all({Str, Pat, Subst}, !IO) :-
+	io__write_string("string__replace_all(\"" ++ Str ++
+			"\", \"" ++ Pat ++
+			"\", \"" ++ Subst ++ "\", Result) \n\t", !IO),
+	string__replace_all(Str, Pat, Subst, Result),
+	io__write(Result, !IO),
+	io__nl(!IO).
+
+
+%------------------------------------------------------------------------------%
+%------------------------------------------------------------------------------%


-- 
Software Engineer                                (Work)   +32 2 757 10 15
Mission Critical                                 (Mobile) +32 485 482 559
--------------------------------------------------------------------------
mercury-reviews mailing list
post:  mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe:   Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------



More information about the reviews mailing list