[m-dev.] for review: string__fmt

Peter Ross peter.ross at miscrit.be
Tue Aug 8 00:40:55 AEST 2000


Hi,

For review by anyone.


===================================================================


Estimated hours taken: 8

library/string.m:
    Implement a new predicate string__fmt.  It provides almost the same
    functionality as string__format, but uses a lot less memory and is
    more compatible with the C version of sprintf seeing as that is what
    it calls to do all of its formatting.


Index: library/string.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/string.m,v
retrieving revision 1.122
diff -u -r1.122 string.m
--- library/string.m	2000/04/27 10:24:25	1.122
+++ library/string.m	2000/08/07 14:32:20
@@ -109,12 +109,11 @@
 
 :- pred string__to_char_list(string, list(char)).
 :- mode string__to_char_list(in, out) is det.
+:- mode string__to_char_list(out, in) is det.
 
 :- pred string__from_char_list(list(char), string).
 :- mode string__from_char_list(in, out) is det.
-:- mode string__from_char_list(out, in) is semidet.
-	% XXX second mode should be det too
-	% (but this turns out to be tricky to implement)
+:- mode string__from_char_list(out, in) is det.
 
 :- pred string__from_rev_char_list(list(char), string).
 :- mode string__from_rev_char_list(in, out) is det.
@@ -341,8 +340,16 @@
 %		It is always better to include a `.' to remove ambiguity.  This
 %		interpretation is non-standard and may change.
 %
-%		Numbers are truncated by a precision value, not rounded off.
 
+:- pred string__fmt(string::in,
+		list(string__poly_type)::in, string::out) is det.
+% Has almost equivalent behavour to string__format except that it
+% doesn't handle * modifier for precision and width.
+% It also rounds numbers by precision value, not truncates.
+% The main advantage of using this version is that it uses *much* less
+% memory then string__format and I am a lot more confident of its
+% correctness.
+
 %------------------------------------------------------------------------------%
 
 :- type string__poly_type --->
@@ -576,16 +583,68 @@
 		string__int_to_base_string_2(NegN1, Base, Str1),
 		string__append(Str1, DigitString, Str)
 	).
+
+string__from_char_list(CharList, Str) :-
+	string__to_char_list(Str, CharList).
+
+/*-----------------------------------------------------------------------*/
+
+/*
+:- pred string__to_char_list(string, list(char)).
+:- mode string__to_char_list(in, out) is det.
+:- mode string__to_char_list(out, in) is det.
+*/
+
+:- pragma c_code(string__to_char_list(Str::in, CharList::out),
+		[will_not_call_mercury, thread_safe], "{
+	const char *p = Str + strlen(Str);
+	CharList = MR_list_empty_msg(MR_PROC_LABEL);
+	while (p > Str) {
+		p--;
+		CharList = MR_list_cons_msg((UnsignedChar) *p, CharList,
+			MR_PROC_LABEL);
+	}
+}").
+
+:- pragma c_code(string__to_char_list(Str::out, CharList::in),
+		[will_not_call_mercury, thread_safe], "{
+		/* mode (out, in) is det */
+	Word char_list_ptr;
+	size_t size;
+	Word str_ptr;
+/*
+** loop to calculate list length + sizeof(Word) in `size' using list in
+** `char_list_ptr'
+*/
+	size = sizeof(Word);
+	char_list_ptr = CharList;
+	while (! MR_list_is_empty(char_list_ptr)) {
+		size++;
+		char_list_ptr = MR_list_tail(char_list_ptr);
+	}
+/*
+** allocate (length + 1) bytes of heap space for string
+** i.e. (length + 1 + sizeof(Word) - 1) / sizeof(Word) words
+*/
+	incr_hp_atomic_msg(str_ptr, size / sizeof(Word),
+		MR_PROC_LABEL, ""string:string/0"");
+	Str = (char *) str_ptr;
+/*
+** loop to copy the characters from the char_list to the string
+*/
+	size = 0;
+	char_list_ptr = CharList;
+	while (! MR_list_is_empty(char_list_ptr)) {
+		Str[size++] = MR_list_head(char_list_ptr);
+		char_list_ptr = MR_list_tail(char_list_ptr);
+	}
+/*
+** null terminate the string
+*/
+	Str[size] = '\\0';
+}").
 
-% NB: it would be more efficient to do this directly (using pragma c_code)
-string__to_char_list(String, CharList) :-
-	string__to_int_list(String, IntList),
-	string__int_list_to_char_list(IntList, CharList).
-
-% NB: it would be more efficient to do this directly (using pragma c_code)
-string__from_char_list(CharList, String) :-
-	string__char_list_to_int_list(CharList, IntList),
-	string__to_int_list(String, IntList).
+/*-----------------------------------------------------------------------*/
 
 %
 % We could implement from_rev_char_list using list__reverse and from_char_list,
@@ -755,6 +814,41 @@
 
 %-----------------------------------------------------------------------------%
 
+	% Implementation of append_list that uses C as this minimises the
+	% amount of garbage created.
+:- func fast_append_list(list(string)) = string.
+:- pragma c_code(fast_append_list(Strs::in) = (Str::out),
+		[will_not_call_mercury, thread_safe], "{
+	Word	list = Strs;
+	Word	tmp;
+	size_t	len = 0;
+
+		/* Determine the total len of all strings */
+	while (!MR_list_is_empty(list)) {
+		len += strlen((char *) MR_list_head(list));
+		list = MR_list_tail(list);
+	}
+
+		/* Allocate enough word aligned memory for the string */
+	incr_hp_atomic_msg(tmp, (len + sizeof(Word)) / sizeof(Word),
+			MR_PROC_LABEL, ""string:string/0"");
+	Str = (String) tmp;
+
+		/* Copy the strings into the new memory */
+	len = 0;
+	list = Strs;
+	while (!MR_list_is_empty(list)) {
+		strcpy((char *) Str + len, (char *) MR_list_head(list));
+		len += strlen((char *) MR_list_head(list));
+		list = MR_list_tail(list);
+	}
+
+		/* Set the last character to the null char */
+	*(Str + len) = '\\0';
+}").
+
+%-----------------------------------------------------------------------------%
+
 	% Note - string__hash is also defined in code/imp.h
 	% The two definitions must be kept identical.
 
@@ -1553,6 +1647,322 @@
 :- pred string__special_precision_and_width(int).
 :- mode string__special_precision_and_width(out) is det.
 string__special_precision_and_width(-1).
+
+%-----------------------------------------------------------------------------%
+
+	% This predicate has been optimised to produce the least memory
+	% possible.  Which is very useful for programs which do a lot of
+	% formatted IO.
+string__fmt(FormatString, PolyList, String) :-
+	(
+		format_string(Specifiers, PolyList, [],
+				to_char_list(FormatString), [])
+	->
+		specifiers_to_string(Specifiers, PolyList, Strings),
+		String = fast_append_list(Strings)
+	;
+		error("string__fmt: format string invalid.")
+	).
+
+:- type specifier
+	--->	conv(list(char))	% a list of chars which contains
+					% one conversion specifier.
+	;	string(list(char)).	% a list of chars which may only
+					% contain the "%%" conversion
+					% specifier.
+
+:- pred specifiers_to_string(list(specifier)::in, list(string__poly_type)::in,
+		list(string)::out) is det.
+
+specifiers_to_string([], _Polys, []).
+specifiers_to_string([conv(FormatChars) | Specs], Poly0s, Strings) :-
+	string__from_char_list(FormatChars, FormatString),
+	( Poly0s = [Poly | Polys] ->
+		specifiers_to_string(Specs, Polys, Strings0),
+		(
+			Poly = i(Int),
+			SubString = int_to_string(FormatString, Int)
+		;
+			Poly = f(Float),
+			SubString = float_to_string(FormatString, Float)
+		;
+			Poly = c(Char),
+			SubString = char_to_string(FormatString, Char)
+		;
+			Poly = s(Str),
+			SubString = string_to_string(FormatString, Str)
+		),
+		Strings = [SubString | Strings0]
+		% string__append(SubString, String0, String)
+	;
+		error("specifiers_to_string")
+	).
+specifiers_to_string([string(Chars) | Specs], Polys, Strings) :-
+	specifiers_to_string(Specs, Polys, Strings0),
+		% We need to use sprintf to print the string as it may
+		% contain the "%%" string.
+	Strings = [string_to_string(string__from_char_list(Chars), "") |
+			Strings0].
+
+	%
+	% Parse a format string.
+	%
+:- pred format_string(list(specifier)::out, list(string__poly_type)::in,
+		list(string__poly_type)::out,
+		list(char)::in, list(char)::out) is semidet.
+
+format_string(Results, PolyTypes0, PolyTypes) -->
+	=(Init),
+	( other(PolyTypes0) ->
+		=(Final),
+		format_string_2(Result0s, PolyTypes0, PolyTypes),
+		{ list__remove_suffix(Init, Final, Prefix) },
+		{ Results = [string(Prefix) | Result0s] }
+	;
+		format_string_2(Results, PolyTypes0, PolyTypes)
+	).
+
+	%
+	% Parse a string beginning with a '%' that is a conversion
+	% specification.
+	%
+:- pred format_string_2(list(specifier)::out, list(string__poly_type)::in,
+		list(string__poly_type)::out,
+		list(char)::in, list(char)::out) is semidet.
+
+format_string_2(Results, PolyTypes0, PolyTypes) -->
+	=(Init),
+	( ['%'] ->
+		conversion_specification_and_other(PolyTypes0),
+		=(Final),
+		{ PolyTypes0 = [_ | PolyTypes1] },
+		format_string_2(Result0s, PolyTypes1, PolyTypes),
+		{ list__remove_suffix(Init, Final, Prefix) },
+		{ Results = [conv(Prefix) | Result0s] }
+	;
+		{ Results = [] },
+		{ PolyTypes = PolyTypes0 }
+	).
+
+	%
+	% Parse a conversion specification followed by any characters
+	% which don't belong to a conversion specification.
+	%
+:- pred conversion_specification_and_other(list(string__poly_type)::in,
+		list(char)::in, list(char)::out) is semidet.
+
+conversion_specification_and_other(PolyTypes0) -->
+	conversion_specification(PolyTypes0),
+	{ PolyTypes0 = [_ | PolyTypes] },
+	other(PolyTypes).
+
+	%
+	% Each conversion specification is introduced by the character
+	% '%',  and ends with a conversion specifier.  In between there
+	% may be (in this order)  zero  or more  flags,  an optional
+	% minimum field width, an optional precision and an optional
+	% length modifier.
+	% Note we *don't* parse the '%' character as a valid conversion
+	% specifier, this ensures that if we parse a conversion
+	% specification we should consume one of the string__poly_type
+	% tokens.
+	%
+:- pred conversion_specification(list(string__poly_type)::in,
+		list(char)::in, list(char)::out) is semidet.
+
+conversion_specification(PolyTypes0) -->
+	zero_or_more_occurences(flag_mod),
+	zero_or_one_occurence(width_mod),
+	zero_or_one_occurence(prec_mod),
+	zero_or_one_occurence(length_mod),
+	spec(PolyTypes0).
+
+	%
+	% Do we have a flag?
+	%
+:- pred flag_mod(list(char)::in, list(char)::out) is semidet.
+
+flag_mod --> ['#'].
+flag_mod --> ['0'].
+flag_mod --> ['-'].
+flag_mod --> [' '].
+flag_mod --> ['+'].
+
+	%
+	% Do we have a minimum field width?
+	%
+:- pred width_mod(list(char)::in, list(char)::out) is semidet.
+
+width_mod --> 
+	non_zero_digit,
+	zero_or_more_occurences(digit).
+
+	%
+	% Do we have a precision?
+	%
+:- pred prec_mod(list(char)::in, list(char)::out) is semidet.
+
+prec_mod --> 
+	['.'],
+	non_zero_digit,
+	zero_or_more_occurences(digit).
+
+	%
+	% Do we have a length modifier?
+	%
+:- pred length_mod(list(char)::in, list(char)::out) is semidet.
+
+length_mod --> ['h'], ( ['h'] -> [] ; [] ).
+length_mod --> ['l'], ( ['l'] -> [] ; [] ).
+length_mod --> ['L'].
+length_mod --> ['j'].
+length_mod --> ['z'].
+length_mod --> ['t'].
+
+	%
+	% Do we have a valid conversion specifier?
+	% We check to ensure that the specifier also matches the type
+	% from the input list.
+	% Note we *don't* parse the '%' character as a valid conversion
+	% specifier, this ensures that if we parse a conversion
+	% specification we should consume one of the string__poly_type
+	% tokens.
+	%
+:- pred spec(list(string__poly_type)::in,
+		list(char)::in, list(char)::out) is semidet.
+
+	% valid integer conversion specifiers
+spec([P | _Ps]) --> { P = i(_) }, ['d'].
+spec([P | _Ps]) --> { P = i(_) }, ['i'].
+spec([P | _Ps]) --> { P = i(_) }, ['o'].
+spec([P | _Ps]) --> { P = i(_) }, ['u'].
+spec([P | _Ps]) --> { P = i(_) }, ['x'].
+spec([P | _Ps]) --> { P = i(_) }, ['X'].
+
+	% valid float conversion specifiers
+spec([P | _Ps]) --> { P = f(_) }, ['e'].
+spec([P | _Ps]) --> { P = f(_) }, ['E'].
+spec([P | _Ps]) --> { P = f(_) }, ['f'].
+spec([P | _Ps]) --> { P = f(_) }, ['F'].
+spec([P | _Ps]) --> { P = f(_) }, ['g'].
+spec([P | _Ps]) --> { P = f(_) }, ['G'].
+% spec([P | _Ps]) --> { P = f(_) }, ['a'].
+% spec([P | _Ps]) --> { P = f(_) }, ['A'].
+
+	% valid char conversion specifiers
+spec([P | _Ps]) --> { P = c(_) }, ['c'].
+
+	% valid string conversion specifiers
+spec([P | _Ps]) --> { P = s(_) }, ['s'].
+
+	%
+	% Parse a string which doesn't contain any conversion
+	% specifications.  The format string for "%%" doesn't count as a
+	% conversion specification.
+	%
+:- pred other(list(string__poly_type)::in,
+		list(char)::in, list(char)::out) is semidet.
+
+other(PolyTypes) -->
+	( [Char], { Char \= '%' } ->
+		other(PolyTypes)
+	;
+		( ['%'], \+ conversion_specification(PolyTypes) ->
+			other(PolyTypes)
+		;
+			[]
+		)
+	).
+
+
+	% Zero or more occurences of the string parsed by the ho pred.
+:- pred zero_or_more_occurences(pred(list(T), list(T)), list(T), list(T)).
+:- mode zero_or_more_occurences(pred(in, out) is semidet, in, out) is det.
+
+zero_or_more_occurences(P) -->
+	( P ->
+		zero_or_more_occurences(P)
+	;
+		[]
+	).
+
+	% At most one occurence of the string parsed by the ho pred.
+:- pred zero_or_one_occurence(pred(list(T), list(T)), list(T), list(T)).
+:- mode zero_or_one_occurence(pred(in, out) is semidet, in, out) is det.
+
+zero_or_one_occurence(P) -->
+	( P ->
+		[]
+	;
+		[]
+	).
+
+	% A digit in the range [1-9]
+:- pred non_zero_digit(list(char)::in, list(char)::out) is semidet.
+
+non_zero_digit -->
+	[ Char ],
+	{ char__is_digit(Char) },
+	{ Char \= '0' }.
+
+	% A digit in the range [0-9]
+:- pred digit(list(char)::in, list(char)::out) is semidet.
+
+digit -->
+	[ Char ],
+	{ char__is_digit(Char) }.
+
+	% Create a string from a float using the format string.
+:- func float_to_string(string, float) = string.
+:- pragma c_code(float_to_string(FormatStr::in, Val::in) = (Str::out),
+		[will_not_call_mercury, thread_safe], "{
+	char buf[500];
+	Word tmp;
+	sprintf(buf, FormatStr, Val);
+	incr_hp_atomic_msg(tmp, (strlen(buf) + sizeof(Word)) / sizeof(Word),
+		MR_PROC_LABEL, ""string:string/0"");
+	Str = (char *)tmp;
+	strcpy(Str, buf);
+}").
+
+	% Create a string from a int using the format string.
+:- func int_to_string(string, int) = string.
+:- pragma c_code(int_to_string(FormatStr::in, Val::in) = (Str::out),
+		[will_not_call_mercury, thread_safe], "{
+	char buf[500];
+	Word tmp;
+	sprintf(buf, FormatStr, Val);
+	incr_hp_atomic_msg(tmp, (strlen(buf) + sizeof(Word)) / sizeof(Word),
+		MR_PROC_LABEL, ""string:string/0"");
+	Str = (char *)tmp;
+	strcpy(Str, buf);
+}").
+
+	% Create a string from a string using the format string.
+:- func string_to_string(string, string) = string.
+:- pragma c_code(string_to_string(FormatStr::in, Val::in) = (Str::out),
+		[will_not_call_mercury, thread_safe], "{
+	char buf[500];
+	Word tmp;
+	sprintf(buf, FormatStr, Val);
+	incr_hp_atomic_msg(tmp, (strlen(buf) + sizeof(Word)) / sizeof(Word),
+		MR_PROC_LABEL, ""string:string/0"");
+	Str = (char *)tmp;
+	strcpy(Str, buf);
+}").
+
+	% Create a string from a char using the format string.
+:- func char_to_string(string, char) = string.
+:- pragma c_code(char_to_string(FormatStr::in, Val::in) = (Str::out),
+		[will_not_call_mercury, thread_safe], "{
+	char buf[500];
+	Word tmp;
+	sprintf(buf, FormatStr, Val);
+	incr_hp_atomic_msg(tmp, (strlen(buf) + sizeof(Word)) / sizeof(Word),
+		MR_PROC_LABEL, ""string:string/0"");
+	Str = (char *)tmp;
+	strcpy(Str, buf);
+}").
 
 %-----------------------------------------------------------------------------%
 

--------------------------------------------------------------------------
mercury-developers mailing list
Post messages to:       mercury-developers at cs.mu.oz.au
Administrative Queries: owner-mercury-developers at cs.mu.oz.au
Subscriptions:          mercury-developers-request at cs.mu.oz.au
--------------------------------------------------------------------------



More information about the developers mailing list