[m-rev.] for review: mercury implementation of string.m

Simon Taylor stayl at cs.mu.OZ.AU
Fri Jun 14 02:12:57 AEST 2002


On 13-Jun-2002, Peter Ross <pro at missioncriticalit.com> wrote:
> Estimated hours taken: 4
> Branches: main
> 
> library/string.m:
> 	Provide a mercury implementation of every foreign_proc,
> 	calling private_builtin__sorry for those which are difficult
> 	to implement in mercury.
> 	Remove the unused reverse mode of string__to_int_list.
 
> Index: string.m
> ===================================================================
> @@ -743,6 +742,56 @@
>  	Str[size] = '\\0';
>  }").
>  
> +:- pragma foreign_proc("MC++", string__to_char_list(Str::in, CharList::uo),
> +		[will_not_call_mercury, promise_pure, thread_safe], "{
> +        MR_Integer length, i; 
> +        MR_Word tmp;
> +        MR_Word prev;
> +
> +        length = Str->get_Length();
> +      
> +        MR_list_nil(prev);
> +
> +        for (i = length - 1; i >= 0; i--) {
> +		MR_list_cons(tmp, __box(Str->get_Chars(i)), prev);

You should get someone else (probably Tyson) to review the Managed C++
part of the code, but the reference to `__box' here rings alarm bells.

> +string__to_char_list(Str::in, CharList::uo) :-
> +	( string__first_char(Str, First, Rest) ->
> +		string__to_char_list(Rest, CharList0),
> +		CharList = [First | CharList0]
> +	;
> +		CharList = []
> +	).

Don't use string__first_char, it's horrendously inefficient.
Base the algorithm on string__unsafe_index instead.

> @@ -1116,6 +1165,20 @@
>  	Index = WholeString->IndexOf(SubString);
>  }").
>  
> +string__sub_string_search(String, SubString, Index) :-
> +	string__sub_string_search_2(String, SubString, 0, Index).
> +
> +:- pred sub_string_search_2(string::in, string::in,
> +		int::in, int::out) is semidet.
> +
> +sub_string_search_2(String, SubString, CurrentIndex, Index) :-
> +	( string__prefix(String, SubString) ->
> +		Index = CurrentIndex
> +	;
> +		string__first_char(String, _, Rest),
> +		sub_string_search_2(Rest, SubString, CurrentIndex + 1, Index)
> +	).

Same here.

>  /*
>  :- pred string__to_int_list(string, list(int)).
>  :- mode string__to_int_list(in, out) is det.
> -:- mode string__to_int_list(out, in) is det.
>  */
> -
>  :- pragma foreign_proc("C",
>  	string__to_int_list(Str::in, IntList::out),
>  		[will_not_call_mercury, promise_pure, thread_safe], "{
> @@ -1727,45 +1824,6 @@
>  			MR_PROC_LABEL);
>  	}
>  }").
> -
> -:- pragma foreign_proc("C",
> -	string__to_int_list(Str::out, IntList::in),
> -		[will_not_call_mercury, promise_pure, thread_safe], "{
> -		/* mode (out, in) is det */
> -	MR_Word int_list_ptr;
> -	size_t size;
> -	MR_Word str_ptr;
> -/*
> -** loop to calculate list length + sizeof(MR_Word) in `size' using list in
> -** `int_list_ptr'
> -*/
> -	size = sizeof(MR_Word);
> -	int_list_ptr = IntList;
> -	while (! MR_list_is_empty(int_list_ptr)) {
> -		size++;
> -		int_list_ptr = MR_list_tail(int_list_ptr);
> -	}

> +string__to_int_list(String, IntList) :-
> +	string__to_char_list(String, CharList),
> +	IntList = list__map(char__to_int, CharList).

Why not just remove the foreign implementation altogether?
  
> @@ -1817,6 +1859,13 @@
>  		[will_not_call_mercury, promise_pure, thread_safe], "
>  	SUCCESS_INDICATOR = (Str->IndexOf(Ch) != -1);
>  ").
> +string__contains_char(String, Char) :-
> +	string__first_char(String, FirstChar, RestOfString),
> +	( FirstChar = Char ->
> +		true
> +	;
> +		string__contains_char(RestOfString, Char)
> +	).

Again, don't use string__first_char.

> @@ -1853,6 +1902,13 @@
>  		Ch = Str->get_Chars(Index);
>  	}
>  ").
> +string__index(Str, Index, Char) :-
> +	string__first_char(Str, First, Rest),
> +	( Index = 0 ->
> +		Char = First
> +	;
> +		string__index(Rest, Index - 1, Char)
> +	).

It would be better to call `sorry' here. If string__index
doesn't work, string__first_char isn't likely to.
  
> @@ -1866,6 +1922,9 @@
>  		[will_not_call_mercury, promise_pure, thread_safe], "
>  	Ch = Str->get_Chars(Index);
>  ").
> +string__unsafe_index(_, _, _) :-
> +	% This version is only for if there is not a foreign_proc version.
> +	private_builtin__sorry("string__unsafe_index").

This should default to using string__index.
 
> @@ -2021,6 +2087,22 @@
>  	Length = Str->get_Length();
>  ").
>  
> +:- pragma promise_pure(string__length/2).
> +string__length(Str::in, Len::uo) :-
> +	string__length_2(Str, Len).
> +string__length(Str0::ui, Len::uo) :-
> +	copy(Str0, Str),
> +	string__length_2(Str, Len).
> +
> +:- pred string__length_2(string::in, int::uo) is det.
> +string__length_2(Str, Length) :-
> +	( string__first_char(Str, _First, Rest) ->
> +		string__length(Rest, Length0),
> +		Length = Length0 + 1
> +	;
> +		Length = 0
> +	).

It would be better to just call `sorry' here.

> +string__split(Str, Count, Left, Right) :-
> +	( Count =< 0 ->
> +		Left = "",
> +		Right = Str
> +	;
> +		string__to_char_list(Str, List),
> +		Len = list__length(List),

string__length is likely to be faster than list__length, unless
deforestation can merge the call to list__length into the call to
string__to_char_list.
  
Simon.
--------------------------------------------------------------------------
mercury-reviews mailing list
post:  mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe:   Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------



More information about the reviews mailing list