[m-dev.] for review: string__fmt
Peter Ross
peter.ross at miscrit.be
Tue Aug 8 00:40:55 AEST 2000
Hi,
For review by anyone.
===================================================================
Estimated hours taken: 8
library/string.m:
Implement a new predicate string__fmt. It provides almost the same
functionality as string__format, but uses a lot less memory and is
more compatible with the C version of sprintf seeing as that is what
it calls to do all of its formatting.
Index: library/string.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/string.m,v
retrieving revision 1.122
diff -u -r1.122 string.m
--- library/string.m 2000/04/27 10:24:25 1.122
+++ library/string.m 2000/08/07 14:32:20
@@ -109,12 +109,11 @@
:- pred string__to_char_list(string, list(char)).
:- mode string__to_char_list(in, out) is det.
+:- mode string__to_char_list(out, in) is det.
:- pred string__from_char_list(list(char), string).
:- mode string__from_char_list(in, out) is det.
-:- mode string__from_char_list(out, in) is semidet.
- % XXX second mode should be det too
- % (but this turns out to be tricky to implement)
+:- mode string__from_char_list(out, in) is det.
:- pred string__from_rev_char_list(list(char), string).
:- mode string__from_rev_char_list(in, out) is det.
@@ -341,8 +340,16 @@
% It is always better to include a `.' to remove ambiguity. This
% interpretation is non-standard and may change.
%
-% Numbers are truncated by a precision value, not rounded off.
+:- pred string__fmt(string::in,
+ list(string__poly_type)::in, string::out) is det.
+% Has almost equivalent behavour to string__format except that it
+% doesn't handle * modifier for precision and width.
+% It also rounds numbers by precision value, not truncates.
+% The main advantage of using this version is that it uses *much* less
+% memory then string__format and I am a lot more confident of its
+% correctness.
+
%------------------------------------------------------------------------------%
:- type string__poly_type --->
@@ -576,16 +583,68 @@
string__int_to_base_string_2(NegN1, Base, Str1),
string__append(Str1, DigitString, Str)
).
+
+string__from_char_list(CharList, Str) :-
+ string__to_char_list(Str, CharList).
+
+/*-----------------------------------------------------------------------*/
+
+/*
+:- pred string__to_char_list(string, list(char)).
+:- mode string__to_char_list(in, out) is det.
+:- mode string__to_char_list(out, in) is det.
+*/
+
+:- pragma c_code(string__to_char_list(Str::in, CharList::out),
+ [will_not_call_mercury, thread_safe], "{
+ const char *p = Str + strlen(Str);
+ CharList = MR_list_empty_msg(MR_PROC_LABEL);
+ while (p > Str) {
+ p--;
+ CharList = MR_list_cons_msg((UnsignedChar) *p, CharList,
+ MR_PROC_LABEL);
+ }
+}").
+
+:- pragma c_code(string__to_char_list(Str::out, CharList::in),
+ [will_not_call_mercury, thread_safe], "{
+ /* mode (out, in) is det */
+ Word char_list_ptr;
+ size_t size;
+ Word str_ptr;
+/*
+** loop to calculate list length + sizeof(Word) in `size' using list in
+** `char_list_ptr'
+*/
+ size = sizeof(Word);
+ char_list_ptr = CharList;
+ while (! MR_list_is_empty(char_list_ptr)) {
+ size++;
+ char_list_ptr = MR_list_tail(char_list_ptr);
+ }
+/*
+** allocate (length + 1) bytes of heap space for string
+** i.e. (length + 1 + sizeof(Word) - 1) / sizeof(Word) words
+*/
+ incr_hp_atomic_msg(str_ptr, size / sizeof(Word),
+ MR_PROC_LABEL, ""string:string/0"");
+ Str = (char *) str_ptr;
+/*
+** loop to copy the characters from the char_list to the string
+*/
+ size = 0;
+ char_list_ptr = CharList;
+ while (! MR_list_is_empty(char_list_ptr)) {
+ Str[size++] = MR_list_head(char_list_ptr);
+ char_list_ptr = MR_list_tail(char_list_ptr);
+ }
+/*
+** null terminate the string
+*/
+ Str[size] = '\\0';
+}").
-% NB: it would be more efficient to do this directly (using pragma c_code)
-string__to_char_list(String, CharList) :-
- string__to_int_list(String, IntList),
- string__int_list_to_char_list(IntList, CharList).
-
-% NB: it would be more efficient to do this directly (using pragma c_code)
-string__from_char_list(CharList, String) :-
- string__char_list_to_int_list(CharList, IntList),
- string__to_int_list(String, IntList).
+/*-----------------------------------------------------------------------*/
%
% We could implement from_rev_char_list using list__reverse and from_char_list,
@@ -755,6 +814,41 @@
%-----------------------------------------------------------------------------%
+ % Implementation of append_list that uses C as this minimises the
+ % amount of garbage created.
+:- func fast_append_list(list(string)) = string.
+:- pragma c_code(fast_append_list(Strs::in) = (Str::out),
+ [will_not_call_mercury, thread_safe], "{
+ Word list = Strs;
+ Word tmp;
+ size_t len = 0;
+
+ /* Determine the total len of all strings */
+ while (!MR_list_is_empty(list)) {
+ len += strlen((char *) MR_list_head(list));
+ list = MR_list_tail(list);
+ }
+
+ /* Allocate enough word aligned memory for the string */
+ incr_hp_atomic_msg(tmp, (len + sizeof(Word)) / sizeof(Word),
+ MR_PROC_LABEL, ""string:string/0"");
+ Str = (String) tmp;
+
+ /* Copy the strings into the new memory */
+ len = 0;
+ list = Strs;
+ while (!MR_list_is_empty(list)) {
+ strcpy((char *) Str + len, (char *) MR_list_head(list));
+ len += strlen((char *) MR_list_head(list));
+ list = MR_list_tail(list);
+ }
+
+ /* Set the last character to the null char */
+ *(Str + len) = '\\0';
+}").
+
+%-----------------------------------------------------------------------------%
+
% Note - string__hash is also defined in code/imp.h
% The two definitions must be kept identical.
@@ -1553,6 +1647,322 @@
:- pred string__special_precision_and_width(int).
:- mode string__special_precision_and_width(out) is det.
string__special_precision_and_width(-1).
+
+%-----------------------------------------------------------------------------%
+
+ % This predicate has been optimised to produce the least memory
+ % possible. Which is very useful for programs which do a lot of
+ % formatted IO.
+string__fmt(FormatString, PolyList, String) :-
+ (
+ format_string(Specifiers, PolyList, [],
+ to_char_list(FormatString), [])
+ ->
+ specifiers_to_string(Specifiers, PolyList, Strings),
+ String = fast_append_list(Strings)
+ ;
+ error("string__fmt: format string invalid.")
+ ).
+
+:- type specifier
+ ---> conv(list(char)) % a list of chars which contains
+ % one conversion specifier.
+ ; string(list(char)). % a list of chars which may only
+ % contain the "%%" conversion
+ % specifier.
+
+:- pred specifiers_to_string(list(specifier)::in, list(string__poly_type)::in,
+ list(string)::out) is det.
+
+specifiers_to_string([], _Polys, []).
+specifiers_to_string([conv(FormatChars) | Specs], Poly0s, Strings) :-
+ string__from_char_list(FormatChars, FormatString),
+ ( Poly0s = [Poly | Polys] ->
+ specifiers_to_string(Specs, Polys, Strings0),
+ (
+ Poly = i(Int),
+ SubString = int_to_string(FormatString, Int)
+ ;
+ Poly = f(Float),
+ SubString = float_to_string(FormatString, Float)
+ ;
+ Poly = c(Char),
+ SubString = char_to_string(FormatString, Char)
+ ;
+ Poly = s(Str),
+ SubString = string_to_string(FormatString, Str)
+ ),
+ Strings = [SubString | Strings0]
+ % string__append(SubString, String0, String)
+ ;
+ error("specifiers_to_string")
+ ).
+specifiers_to_string([string(Chars) | Specs], Polys, Strings) :-
+ specifiers_to_string(Specs, Polys, Strings0),
+ % We need to use sprintf to print the string as it may
+ % contain the "%%" string.
+ Strings = [string_to_string(string__from_char_list(Chars), "") |
+ Strings0].
+
+ %
+ % Parse a format string.
+ %
+:- pred format_string(list(specifier)::out, list(string__poly_type)::in,
+ list(string__poly_type)::out,
+ list(char)::in, list(char)::out) is semidet.
+
+format_string(Results, PolyTypes0, PolyTypes) -->
+ =(Init),
+ ( other(PolyTypes0) ->
+ =(Final),
+ format_string_2(Result0s, PolyTypes0, PolyTypes),
+ { list__remove_suffix(Init, Final, Prefix) },
+ { Results = [string(Prefix) | Result0s] }
+ ;
+ format_string_2(Results, PolyTypes0, PolyTypes)
+ ).
+
+ %
+ % Parse a string beginning with a '%' that is a conversion
+ % specification.
+ %
+:- pred format_string_2(list(specifier)::out, list(string__poly_type)::in,
+ list(string__poly_type)::out,
+ list(char)::in, list(char)::out) is semidet.
+
+format_string_2(Results, PolyTypes0, PolyTypes) -->
+ =(Init),
+ ( ['%'] ->
+ conversion_specification_and_other(PolyTypes0),
+ =(Final),
+ { PolyTypes0 = [_ | PolyTypes1] },
+ format_string_2(Result0s, PolyTypes1, PolyTypes),
+ { list__remove_suffix(Init, Final, Prefix) },
+ { Results = [conv(Prefix) | Result0s] }
+ ;
+ { Results = [] },
+ { PolyTypes = PolyTypes0 }
+ ).
+
+ %
+ % Parse a conversion specification followed by any characters
+ % which don't belong to a conversion specification.
+ %
+:- pred conversion_specification_and_other(list(string__poly_type)::in,
+ list(char)::in, list(char)::out) is semidet.
+
+conversion_specification_and_other(PolyTypes0) -->
+ conversion_specification(PolyTypes0),
+ { PolyTypes0 = [_ | PolyTypes] },
+ other(PolyTypes).
+
+ %
+ % Each conversion specification is introduced by the character
+ % '%', and ends with a conversion specifier. In between there
+ % may be (in this order) zero or more flags, an optional
+ % minimum field width, an optional precision and an optional
+ % length modifier.
+ % Note we *don't* parse the '%' character as a valid conversion
+ % specifier, this ensures that if we parse a conversion
+ % specification we should consume one of the string__poly_type
+ % tokens.
+ %
+:- pred conversion_specification(list(string__poly_type)::in,
+ list(char)::in, list(char)::out) is semidet.
+
+conversion_specification(PolyTypes0) -->
+ zero_or_more_occurences(flag_mod),
+ zero_or_one_occurence(width_mod),
+ zero_or_one_occurence(prec_mod),
+ zero_or_one_occurence(length_mod),
+ spec(PolyTypes0).
+
+ %
+ % Do we have a flag?
+ %
+:- pred flag_mod(list(char)::in, list(char)::out) is semidet.
+
+flag_mod --> ['#'].
+flag_mod --> ['0'].
+flag_mod --> ['-'].
+flag_mod --> [' '].
+flag_mod --> ['+'].
+
+ %
+ % Do we have a minimum field width?
+ %
+:- pred width_mod(list(char)::in, list(char)::out) is semidet.
+
+width_mod -->
+ non_zero_digit,
+ zero_or_more_occurences(digit).
+
+ %
+ % Do we have a precision?
+ %
+:- pred prec_mod(list(char)::in, list(char)::out) is semidet.
+
+prec_mod -->
+ ['.'],
+ non_zero_digit,
+ zero_or_more_occurences(digit).
+
+ %
+ % Do we have a length modifier?
+ %
+:- pred length_mod(list(char)::in, list(char)::out) is semidet.
+
+length_mod --> ['h'], ( ['h'] -> [] ; [] ).
+length_mod --> ['l'], ( ['l'] -> [] ; [] ).
+length_mod --> ['L'].
+length_mod --> ['j'].
+length_mod --> ['z'].
+length_mod --> ['t'].
+
+ %
+ % Do we have a valid conversion specifier?
+ % We check to ensure that the specifier also matches the type
+ % from the input list.
+ % Note we *don't* parse the '%' character as a valid conversion
+ % specifier, this ensures that if we parse a conversion
+ % specification we should consume one of the string__poly_type
+ % tokens.
+ %
+:- pred spec(list(string__poly_type)::in,
+ list(char)::in, list(char)::out) is semidet.
+
+ % valid integer conversion specifiers
+spec([P | _Ps]) --> { P = i(_) }, ['d'].
+spec([P | _Ps]) --> { P = i(_) }, ['i'].
+spec([P | _Ps]) --> { P = i(_) }, ['o'].
+spec([P | _Ps]) --> { P = i(_) }, ['u'].
+spec([P | _Ps]) --> { P = i(_) }, ['x'].
+spec([P | _Ps]) --> { P = i(_) }, ['X'].
+
+ % valid float conversion specifiers
+spec([P | _Ps]) --> { P = f(_) }, ['e'].
+spec([P | _Ps]) --> { P = f(_) }, ['E'].
+spec([P | _Ps]) --> { P = f(_) }, ['f'].
+spec([P | _Ps]) --> { P = f(_) }, ['F'].
+spec([P | _Ps]) --> { P = f(_) }, ['g'].
+spec([P | _Ps]) --> { P = f(_) }, ['G'].
+% spec([P | _Ps]) --> { P = f(_) }, ['a'].
+% spec([P | _Ps]) --> { P = f(_) }, ['A'].
+
+ % valid char conversion specifiers
+spec([P | _Ps]) --> { P = c(_) }, ['c'].
+
+ % valid string conversion specifiers
+spec([P | _Ps]) --> { P = s(_) }, ['s'].
+
+ %
+ % Parse a string which doesn't contain any conversion
+ % specifications. The format string for "%%" doesn't count as a
+ % conversion specification.
+ %
+:- pred other(list(string__poly_type)::in,
+ list(char)::in, list(char)::out) is semidet.
+
+other(PolyTypes) -->
+ ( [Char], { Char \= '%' } ->
+ other(PolyTypes)
+ ;
+ ( ['%'], \+ conversion_specification(PolyTypes) ->
+ other(PolyTypes)
+ ;
+ []
+ )
+ ).
+
+
+ % Zero or more occurences of the string parsed by the ho pred.
+:- pred zero_or_more_occurences(pred(list(T), list(T)), list(T), list(T)).
+:- mode zero_or_more_occurences(pred(in, out) is semidet, in, out) is det.
+
+zero_or_more_occurences(P) -->
+ ( P ->
+ zero_or_more_occurences(P)
+ ;
+ []
+ ).
+
+ % At most one occurence of the string parsed by the ho pred.
+:- pred zero_or_one_occurence(pred(list(T), list(T)), list(T), list(T)).
+:- mode zero_or_one_occurence(pred(in, out) is semidet, in, out) is det.
+
+zero_or_one_occurence(P) -->
+ ( P ->
+ []
+ ;
+ []
+ ).
+
+ % A digit in the range [1-9]
+:- pred non_zero_digit(list(char)::in, list(char)::out) is semidet.
+
+non_zero_digit -->
+ [ Char ],
+ { char__is_digit(Char) },
+ { Char \= '0' }.
+
+ % A digit in the range [0-9]
+:- pred digit(list(char)::in, list(char)::out) is semidet.
+
+digit -->
+ [ Char ],
+ { char__is_digit(Char) }.
+
+ % Create a string from a float using the format string.
+:- func float_to_string(string, float) = string.
+:- pragma c_code(float_to_string(FormatStr::in, Val::in) = (Str::out),
+ [will_not_call_mercury, thread_safe], "{
+ char buf[500];
+ Word tmp;
+ sprintf(buf, FormatStr, Val);
+ incr_hp_atomic_msg(tmp, (strlen(buf) + sizeof(Word)) / sizeof(Word),
+ MR_PROC_LABEL, ""string:string/0"");
+ Str = (char *)tmp;
+ strcpy(Str, buf);
+}").
+
+ % Create a string from a int using the format string.
+:- func int_to_string(string, int) = string.
+:- pragma c_code(int_to_string(FormatStr::in, Val::in) = (Str::out),
+ [will_not_call_mercury, thread_safe], "{
+ char buf[500];
+ Word tmp;
+ sprintf(buf, FormatStr, Val);
+ incr_hp_atomic_msg(tmp, (strlen(buf) + sizeof(Word)) / sizeof(Word),
+ MR_PROC_LABEL, ""string:string/0"");
+ Str = (char *)tmp;
+ strcpy(Str, buf);
+}").
+
+ % Create a string from a string using the format string.
+:- func string_to_string(string, string) = string.
+:- pragma c_code(string_to_string(FormatStr::in, Val::in) = (Str::out),
+ [will_not_call_mercury, thread_safe], "{
+ char buf[500];
+ Word tmp;
+ sprintf(buf, FormatStr, Val);
+ incr_hp_atomic_msg(tmp, (strlen(buf) + sizeof(Word)) / sizeof(Word),
+ MR_PROC_LABEL, ""string:string/0"");
+ Str = (char *)tmp;
+ strcpy(Str, buf);
+}").
+
+ % Create a string from a char using the format string.
+:- func char_to_string(string, char) = string.
+:- pragma c_code(char_to_string(FormatStr::in, Val::in) = (Str::out),
+ [will_not_call_mercury, thread_safe], "{
+ char buf[500];
+ Word tmp;
+ sprintf(buf, FormatStr, Val);
+ incr_hp_atomic_msg(tmp, (strlen(buf) + sizeof(Word)) / sizeof(Word),
+ MR_PROC_LABEL, ""string:string/0"");
+ Str = (char *)tmp;
+ strcpy(Str, buf);
+}").
%-----------------------------------------------------------------------------%
--------------------------------------------------------------------------
mercury-developers mailing list
Post messages to: mercury-developers at cs.mu.oz.au
Administrative Queries: owner-mercury-developers at cs.mu.oz.au
Subscriptions: mercury-developers-request at cs.mu.oz.au
--------------------------------------------------------------------------
More information about the developers
mailing list