[m-dev.] for review: string__fmt
Peter Ross
peter.ross at miscrit.be
Thu Aug 10 00:58:05 AEST 2000
Hi,
Here is the new and improved version of string__format. If you review
it now you will get a free set of steak knives.
I have included the new code for string__format at the start of the
diff, as the unified diff mixes it up with the deletion of the old
string format.
===================================================================
Estimated hours taken: 24
Reimplement string__format so that it uses less memory and it's
behaviour is closer to the C standard.
library/string.m:
Reimplement string__format. The new implementation parses the
format string building a control structure. The control structure
is then traversed to generate the output.
Implement string__to_char_list in C.
Implement string__append_list in C as this results in no garbage
being generated due to the creation of the intermediate strings,
which can be quite a significant saving.
Remove the string__append_list(out, in) mode as it generates an
infinite number of solutions.
Use MR_allocate_aligned_string_msg for all memory allocations.
runtime/mercury_string.h:
Add a new macro MR_allocate_aligned_string_msg which allocates
word aligned space for storage of a string in.
runtime/mercury_string.c:
Define a new function MR_make_string which provides sprintf like
functionality for creating MR_Strings. This function is safe from
buffer overflows providing the vsnprintf function is available.
runtime/Mmakefile:
Add mercury_string.c
configure.in:
Check for the vsnprintf function.
runtime/mercury_conf.h.in:
Define HAVE_VSNPRINTF.
===================================================================
% This predicate has been optimised to produce the least memory
% possible -- memory usage is a significant problem for programs
% which do a lot of formatted IO.
string__format(FormatString, PolyList, String) :-
(
format_string(Specifiers, PolyList, [],
to_char_list(FormatString), [])
->
String = string__append_list(
list__map(specifier_to_string, Specifiers))
;
error("string__format: format string invalid.")
).
:- type specifier
---> conv(
flags :: list(char),
width :: maybe(list(char)),
precision :: maybe(list(char)),
spec :: spec
)
; string(list(char)).
%
% A format string is parsed into alternate sections.
% We alternate between the list of characters which don't
% represent a conversion specifier and those that do.
%
:- pred format_string(list(specifier)::out,
list(string__poly_type)::in, list(string__poly_type)::out,
list(char)::in, list(char)::out) is det.
format_string(Results, PolyTypes0, PolyTypes) -->
other(NonConversionSpecChars),
( conversion_specification(ConversionSpec, PolyTypes0, PolyTypes1) ->
format_string(Results0, PolyTypes1, PolyTypes),
{ Results = [string(NonConversionSpecChars),
ConversionSpec | Results0] }
;
{ Results = [string(NonConversionSpecChars)] },
{ PolyTypes = PolyTypes0 }
).
%
% Parse a string which doesn't contain any conversion
% specifications.
%
:- pred other(list(char)::out, list(char)::in, list(char)::out) is det.
other(Result) -->
( [Char], { Char \= '%' } ->
other(Result0),
{ Result = [Char | Result0] }
;
{ Result = [] }
).
%
% Each conversion specification is introduced by the character
% '%', and ends with a conversion specifier. In between there
% may be (in this order) zero or more flags, an optional
% minimum field width, and an optional precision.
%
:- pred conversion_specification(specifier::out,
list(string__poly_type)::in, list(string__poly_type)::out,
list(char)::in, list(char)::out) is semidet.
conversion_specification(Specificier, PolyTypes0, PolyTypes) -->
['%'],
flags(Flags),
optional(width, MaybeWidth, PolyTypes0, PolyTypes1),
optional(prec, MaybePrec, PolyTypes1, PolyTypes2),
( spec(Spec, PolyTypes2, PolyTypes3) ->
{ Specificier = conv(Flags, MaybeWidth, MaybePrec, Spec) },
{ PolyTypes = PolyTypes3 }
;
{ error("string__format: invalid conversion specifier.") }
).
:- pred optional(pred(T, U, U, V, V), maybe(T), U, U, V, V).
:- mode optional(pred(out, in, out, in, out) is semidet, out, in, out,
in, out) is det.
optional(P, MaybeOutput, Init, Final) -->
( P(Output, Init, Final0) ->
{ MaybeOutput = yes(Output) },
{ Final = Final0 }
;
{ MaybeOutput = no },
{ Final = Init }
).
:- pred flags(list(char)::out, list(char)::in, list(char)::out) is semidet.
flags(Result) -->
( [Char], { flag(Char) } ->
flags(Result0),
{ Result = [Char | Result0] }
;
{ Result = [] }
).
%
% Is it a valid flag character?
%
:- pred flag(char::in) is semidet.
flag('#').
flag('0').
flag('-').
flag(' ').
flag('+').
%
% Do we have a minimum field width?
%
:- pred width(list(char)::out,
list(string__poly_type)::in, list(string__poly_type)::out,
list(char)::in, list(char)::out) is semidet.
width(Width, PolyTypes0, PolyTypes) -->
( ['*'] ->
{ PolyTypes0 = [i(Width0) | PolyTypes1] ->
% XXX maybe better done in C.
Width = to_char_list(int_to_string(Width0)),
PolyTypes = PolyTypes1
;
error("string__format: `*' width modifer not associated with an integer.")
}
;
=(Init),
non_zero_digit,
zero_or_more_occurences(digit),
=(Final),
{ list__remove_suffix(Init, Final, Width) },
{ PolyTypes = PolyTypes0 }
).
%
% Do we have a precision?
%
:- pred prec(list(char)::out,
list(string__poly_type)::in, list(string__poly_type)::out,
list(char)::in, list(char)::out) is semidet.
prec(Prec, PolyTypes0, PolyTypes) -->
['.'],
( ['*'] ->
{ PolyTypes0 = [i(Prec0) | PolyTypes1] ->
% XXX Best done in C
Prec = to_char_list(int_to_string(Prec0)),
PolyTypes = PolyTypes1
;
error("string__format: `*' precision modifer not associated with an integer.")
}
;
=(Init),
digit,
zero_or_more_occurences(digit),
=(Final),
{ list__remove_suffix(Init, Final, Prec) },
{ PolyTypes = PolyTypes0 }
).
:- type spec
% valid integer specifiers
---> d(int)
; i(int)
; o(int)
; u(int)
; x(int)
; cX(int)
; p(int)
% valid float specifiers
; e(float)
; cE(float)
; f(float)
; cF(float)
; g(float)
; cG(float)
% valid char specifiers
; c(char)
% valid string specifiers
; s(string)
% specifier representing "%%"
; percent
.
%
% Do we have a valid conversion specifier?
% We check to ensure that the specifier also matches the type
% from the input list.
%
:- pred spec(spec::out,
list(string__poly_type)::in, list(string__poly_type)::out,
list(char)::in, list(char)::out) is semidet.
% valid integer conversion specifiers
spec(d(Int), [i(Int) | Ps], Ps) --> ['d'].
spec(i(Int), [i(Int) | Ps], Ps) --> ['i'].
spec(o(Int), [i(Int) | Ps], Ps) --> ['o'].
spec(u(Int), [i(Int) | Ps], Ps) --> ['u'].
spec(x(Int), [i(Int) | Ps], Ps) --> ['x'].
spec(cX(Int), [i(Int) | Ps], Ps) --> ['X'].
spec(p(Int), [i(Int) | Ps], Ps) --> ['p'].
% valid float conversion specifiers
spec(e(Float), [f(Float) | Ps], Ps) --> ['e'].
spec(cE(Float), [f(Float) | Ps], Ps) --> ['E'].
spec(f(Float), [f(Float) | Ps], Ps) --> ['f'].
spec(cF(Float), [f(Float) | Ps], Ps) --> ['F'].
spec(g(Float), [f(Float) | Ps], Ps) --> ['g'].
spec(cG(Float), [f(Float) | Ps], Ps) --> ['G'].
% valid char conversion specifiers
spec(c(Char), [c(Char) | Ps], Ps) --> ['c'].
% valid string conversion specifiers
spec(s(Str), [s(Str) | Ps], Ps) --> ['s'].
% conversion specifier representing the "%" sign
spec(percent, Ps, Ps) --> ['%'].
% A digit in the range [1-9]
:- pred non_zero_digit(list(char)::in, list(char)::out) is semidet.
non_zero_digit -->
[ Char ],
{ char__is_digit(Char) },
{ Char \= '0' }.
% A digit in the range [0-9]
:- pred digit(list(char)::in, list(char)::out) is semidet.
digit -->
[ Char ],
{ char__is_digit(Char) }.
% Zero or more occurences of the string parsed by the ho pred.
:- pred zero_or_more_occurences(pred(list(T), list(T)), list(T), list(T)).
:- mode zero_or_more_occurences(pred(in, out) is semidet, in, out) is det.
zero_or_more_occurences(P) -->
( P ->
zero_or_more_occurences(P)
;
[]
).
:- func specifier_to_string(specifier) = string.
specifier_to_string(conv(Flags, Width, Prec, Spec)) = String :-
(
% valid int conversion specifiers
Spec = d(Int),
String = format_int(
format_string(Flags, Width, Prec, "ld"), Int)
;
Spec = i(Int),
String = format_int(
format_string(Flags, Width, Prec, "li"), Int)
;
Spec = o(Int),
String = format_int(
format_string(Flags, Width, Prec, "lo"), Int)
;
Spec = u(Int),
String = format_int(
format_string(Flags, Width, Prec, "lu"), Int)
;
Spec = x(Int),
String = format_int(
format_string(Flags, Width, Prec, "lx"), Int)
;
Spec = cX(Int),
String = format_int(
format_string(Flags, Width, Prec, "lX"), Int)
;
Spec = p(Int),
String = format_int(
format_string(Flags, Width, Prec, "lp"), Int)
;
% valid float conversion specifiers
Spec = e(Float),
String = format_float(
format_string(Flags, Width, Prec, "Le"), Float)
;
Spec = cE(Float),
String = format_float(
format_string(Flags, Width, Prec, "LE"), Float)
;
Spec = f(Float),
String = format_float(
format_string(Flags, Width, Prec, "Lf"), Float)
;
Spec = cF(Float),
String = format_float(
format_string(Flags, Width, Prec, "LF"), Float)
;
Spec = g(Float),
String = format_float(
format_string(Flags, Width, Prec, "Lg"), Float)
;
Spec = cG(Float),
String = format_float(
format_string(Flags, Width, Prec, "LG"), Float)
;
% valid char conversion Specifiers
Spec = c(Char),
String = format_char(
format_string(Flags, Width, Prec, "c"), Char)
;
% valid string conversion Spec = ifiers
Spec = s(Str),
String = format_string(
format_string(Flags, Width, Prec, "s"), Str)
;
% conversion specifier representing the "%" sign
Spec = percent,
String = "%"
).
specifier_to_string(string(Chars)) = from_char_list(Chars).
% Construct a format string suitable to passing to sprintf.
:- func format_string(list(char), maybe(list(char)),
maybe(list(char)), string) = string.
format_string(Flags, MaybeWidth, MaybePrec, Spec) = String :-
(
MaybeWidth = yes(Width)
;
MaybeWidth = no,
Width = []
),
(
MaybePrec = yes(Prec0),
Prec = ['.' | Prec0]
;
MaybePrec = no,
Prec = []
),
String = string__append_list(["%", from_char_list(Flags),
from_char_list(Width),
from_char_list(Prec), Spec]).
% Create a string from a float using the format string.
% Note is is the responsibility of the caller to ensure that the
% format string is valid.
:- func format_float(string, float) = string.
:- pragma c_code(format_float(FormatStr::in, Val::in) = (Str::out),
[will_not_call_mercury, thread_safe], "{
Str = MR_make_string(MR_PROC_LABEL, FormatStr, (long double) Val);
}").
% Create a string from a int using the format string.
% Note is is the responsibility of the caller to ensure that the
% format string is valid.
:- func format_int(string, int) = string.
:- pragma c_code(format_int(FormatStr::in, Val::in) = (Str::out),
[will_not_call_mercury, thread_safe], "{
Str = MR_make_string(MR_PROC_LABEL, FormatStr, (long) Val);
}").
% Create a string from a string using the format string.
% Note is is the responsibility of the caller to ensure that the
% format string is valid.
:- func format_string(string, string) = string.
:- pragma c_code(format_string(FormatStr::in, Val::in) = (Str::out),
[will_not_call_mercury, thread_safe], "{
Str = MR_make_string(MR_PROC_LABEL, FormatStr, Val);
}").
% Create a string from a char using the format string.
% Note is is the responsibility of the caller to ensure that the
% format string is valid.
:- func format_char(string, char) = string.
:- pragma c_code(format_char(FormatStr::in, Val::in) = (Str::out),
[will_not_call_mercury, thread_safe], "{
Str = MR_make_string(MR_PROC_LABEL, FormatStr, Val);
}").
===================================================================
Index: configure.in
===================================================================
RCS file: /home/mercury1/repository/mercury/configure.in,v
retrieving revision 1.214
diff -u -r1.214 configure.in
--- configure.in 2000/07/17 13:51:03 1.214
+++ configure.in 2000/08/09 14:46:45
@@ -297,6 +297,7 @@
esac
AC_HAVE_FUNCS(sysconf getpagesize memalign mprotect sigaction setitimer)
AC_HAVE_FUNCS(strerror memmove fileno fdopen fstat)
+AC_HAVE_FUNCS(vsnprintf)
#-----------------------------------------------------------------------------#
AC_CHECK_HEADER(unistd.h, HAVE_UNISTD_H=1)
if test "$HAVE_UNISTD_H" = 1; then
Index: library/string.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/string.m,v
retrieving revision 1.124
diff -u -r1.124 string.m
--- library/string.m 2000/08/08 09:30:02 1.124
+++ library/string.m 2000/08/09 14:46:53
@@ -109,12 +109,11 @@
:- pred string__to_char_list(string, list(char)).
:- mode string__to_char_list(in, out) is det.
+:- mode string__to_char_list(out, in) is det.
:- pred string__from_char_list(list(char), string).
:- mode string__from_char_list(in, out) is det.
-:- mode string__from_char_list(out, in) is semidet.
- % XXX second mode should be det too
- % (but this turns out to be tricky to implement)
+:- mode string__from_char_list(out, in) is det.
:- pred string__from_rev_char_list(list(char), string).
:- mode string__from_rev_char_list(in, out) is det.
@@ -273,7 +272,6 @@
:- pred string__append_list(list(string), string).
:- mode string__append_list(in, out) is det.
-:- mode string__append_list(out, in) is multi.
% Append a list of strings together.
:- pred string__hash(string, int).
@@ -299,9 +297,8 @@
% will return
% String = "Square-root of 2 = 1.41\n".
%
-% All the normal options available in C are supported, ie Flags [0+-# ],
-% a field width (or *), '.', precision (could be a '*'), and a length
-% modifier (currently ignored).
+% The following options available in C are supported: flags [0+-# ],
+% a field width (or *), and a precision (could be a ".*").
%
% Valid conversion character types are {dioxXucsfeEgGp%}. %n is not
% supported. string__format will not return the length of the string.
@@ -331,18 +328,18 @@
% directly after the '%'.
%
% Note:
-% %#.0e, %#.0E won't print a '.' before the 'e' ('#' ignored).
+% %#.0e, %#.0E now prints a '.' before the 'e'.
%
% Asking for more precision than a float actually has will
% result in potentially misleading output.
%
-% If a width or precision is specified, without a `.', a number
-% is assumed to be a width and a `*' is assumed to be a precision.
-% It is always better to include a `.' to remove ambiguity. This
-% interpretation is non-standard and may change.
+% Numbers are now rounded by precision value, not
+% truncated as previously.
%
-% Numbers are truncated by a precision value, not rounded off.
+% The implementation uses the sprintf() function, so the
+% actual output will depend on the C standard library.
+
%------------------------------------------------------------------------------%
:- type string__poly_type --->
@@ -576,17 +573,67 @@
string__int_to_base_string_2(NegN1, Base, Str1),
string__append(Str1, DigitString, Str)
).
+
+string__from_char_list(CharList, Str) :-
+ string__to_char_list(Str, CharList).
+
+/*-----------------------------------------------------------------------*/
+
+/*
+:- pred string__to_char_list(string, list(char)).
+:- mode string__to_char_list(in, out) is det.
+:- mode string__to_char_list(out, in) is det.
+*/
+
+:- pragma c_code(string__to_char_list(Str::in, CharList::out),
+ [will_not_call_mercury, thread_safe], "{
+ MR_ConstString p = Str + strlen(Str);
+ CharList = MR_list_empty_msg(MR_PROC_LABEL);
+ while (p > Str) {
+ p--;
+ CharList = MR_list_cons_msg((MR_UnsignedChar) *p, CharList,
+ MR_PROC_LABEL);
+ }
+}").
-% NB: it would be more efficient to do this directly (using pragma c_code)
-string__to_char_list(String, CharList) :-
- string__to_int_list(String, IntList),
- string__int_list_to_char_list(IntList, CharList).
-
-% NB: it would be more efficient to do this directly (using pragma c_code)
-string__from_char_list(CharList, String) :-
- string__char_list_to_int_list(CharList, IntList),
- string__to_int_list(String, IntList).
+:- pragma c_code(string__to_char_list(Str::out, CharList::in),
+ [will_not_call_mercury, thread_safe], "{
+ /* mode (out, in) is det */
+ MR_Word char_list_ptr;
+ size_t size;
+/*
+** loop to calculate list length + sizeof(MR_Word) in `size' using list in
+** `char_list_ptr'
+*/
+ size = sizeof(MR_Word);
+ char_list_ptr = CharList;
+ while (! MR_list_is_empty(char_list_ptr)) {
+ size++;
+ char_list_ptr = MR_list_tail(char_list_ptr);
+ }
+/*
+** allocate (length + 1) bytes of heap space for string
+** i.e. (length + 1 + sizeof(MR_Word) - 1) / sizeof(MR_Word) words
+*/
+ MR_allocate_aligned_string_msg(Str, size, MR_PROC_LABEL);
+/*
+** loop to copy the characters from the char_list to the string
+*/
+ size = 0;
+ char_list_ptr = CharList;
+ while (! MR_list_is_empty(char_list_ptr)) {
+ Str[size++] = MR_list_head(char_list_ptr);
+ char_list_ptr = MR_list_tail(char_list_ptr);
+ }
+/*
+** null terminate the string
+*/
+ Str[size] = '\\0';
+}").
+
+/*-----------------------------------------------------------------------*/
+
%
% We could implement from_rev_char_list using list__reverse and from_char_list,
% but the optimized implementation in C below is there for efficiency since
@@ -597,7 +644,6 @@
{
MR_Word list_ptr;
MR_Word size, len;
- MR_Word str_ptr;
/*
** loop to calculate list length + sizeof(MR_Word) in `size' using list in
** `list_ptr' and separately count the length of the string
@@ -614,10 +660,8 @@
** allocate (length + 1) bytes of heap space for string
** i.e. (length + 1 + sizeof(MR_Word) - 1) / sizeof(MR_Word) words
*/
- incr_hp_atomic_msg(str_ptr, size / sizeof(MR_Word),
- MR_PROC_LABEL,
- ""string:string/0"");
- Str = (MR_String) str_ptr;
+ MR_allocate_aligned_string_msg(Str, size, MR_PROC_LABEL);
+
/*
** set size to be the offset of the end of the string
** (ie the \\0) and null terminate the string.
@@ -747,12 +791,41 @@
string__first_char(String, Char, String1),
string__duplicate_char(Char, Count1, String1)
).
+
+%-----------------------------------------------------------------------------%
+
+string__append_list(Lists, string__append_list(Lists)).
+
+ % Implementation of append_list that uses C as this minimises the
+ % amount of garbage created.
+:- pragma c_code(string__append_list(Strs::in) = (Str::out),
+ [will_not_call_mercury, thread_safe], "{
+ MR_Word list = Strs;
+ MR_Word tmp;
+ size_t len = 0;
+
+ /* Determine the total len of all strings */
+ while (!MR_list_is_empty(list)) {
+ len += strlen((MR_String) MR_list_head(list));
+ list = MR_list_tail(list);
+ }
-string__append_list([], "").
-string__append_list([S | Ss], L) :-
- string__append_list(Ss, L0),
- string__append(S, L0, L).
+ /* Allocate enough word aligned memory for the string */
+ MR_allocate_aligned_string_msg(Str, len, MR_PROC_LABEL);
+ /* Copy the strings into the new memory */
+ len = 0;
+ list = Strs;
+ while (!MR_list_is_empty(list)) {
+ strcpy((MR_String) Str + len, (MR_String) MR_list_head(list));
+ len += strlen((MR_String) MR_list_head(list));
+ list = MR_list_tail(list);
+ }
+
+ /* Set the last character to the null char */
+ Str[len] = '\\0';
+}").
+
%-----------------------------------------------------------------------------%
% Note - string__hash is also defined in code/imp.h
@@ -807,752 +880,394 @@
).
%-----------------------------------------------------------------------------%
-
-string__format(Format_string, Poly_list, Out_string ) :-
- string__to_char_list(Format_string, Format_char_list),
- string__format_2(Format_char_list, Poly_list, Out_string) .
-
-:- pred string__format_2(list(char), list(string__poly_type), string).
-:- mode string__format_2(in, in, out) is det.
-%
-% string__format_2(stream, char_f, vars, IO, IO).
-% The main function, with different input types.
-%
-% Accumulator recursion is not used, as it would involve adding a
-% short string to the end of a long string many times, which I understand
-% is not efficient. Instead, many short strings are added to the front
-% of a (long) and growing string.
-%
-string__format_2([], Vars_in, Result) :-
- ( Vars_in = [] ->
- Result = ""
- ;
- error(
- "string__format: argument list has more elements than format string")
- ).
-string__format_2([Achar|As], Vars_in, Ostring) :-
- (
- Achar = '%'
- ->
- (
- As = ['%' | Ass]
- ->
- string__format_2(Ass, Vars_in, Temp_out),
- string__first_char(Ostring, '%', Temp_out)
- ;
- (
- string__format_top_convert_variable(As, Vars_in,
- As_out, Vars_out, String_1)
- ->
- string__format_2(As_out, Vars_out, String_2),
- string__append(String_1, String_2, Ostring)
- ;
- error(
- "string__format: argument list has fewer elements than format string")
- )
- )
- ;
- string__format_2(As, Vars_in, Temp_out),
- string__first_char(Ostring, Achar, Temp_out)
- ).
-:- pred string__format_top_convert_variable(list(char),
- list(string__poly_type), list(char),
- list(string__poly_type), string).
-:- mode string__format_top_convert_variable(in, in, out, out, out) is semidet.
-%
-% string__format_top_convert_variable(formated string in, var in, formatted
-% string out, var out, Out string)
-% Return a string of the formatted variable.
-%
-string__format_top_convert_variable(['%'|Bs], [], Bs, [], "%").
- % Above rule should not be executed... defensive rule.
-string__format_top_convert_variable(F_chars0, [Var0|Vars_list0],
- F_chars, Vars_list, OutString ) :-
- string__format_takewhile1(F_chars0, [Conv_char_0|F_chars],
- Fmt_info),
- % Seperate formatting info from formatting string.
- % in, out, out
- string__format_get_optional_args(Fmt_info, Flags, Width0,
- Precision0, Conv_modify),
- % Parse formatting info.
- % in, out, out, out, out.
- string__format_read_star(Vars_list0, Width0, Precision0, Vars_list,
- Width1, Precision1),
- % Do something if a precision or width was '*'
- % in, in, in, out, out, out
- string__format_mod_conv_char(Precision1, Var0, Conv_char_0,
- Conv_char_1, Precision),
- % Modify(?) conversion character.
- % in, in, in, out, out
- string__format_do_mod_char(Conv_modify, Conv_char_1, Conv_char_2),
- % Interperate input conversion modifiers.
- % in, in, out
- string__format_do_conversion(Conv_char_2, Var0, Precision,
- Flags, Move_i0, OutString0),
- % Actually convert a Variable to a string
- % in, in, in, in, out, out
- string__format_add_sign(OutString0, Flags, Var0, Move_i0,
- Move_i1, OutString1),
- % Adds an optional '+' or ' ' to string.
- % in, in, in, in, out, out
- string__format_pad_width(OutString1, Width1, Flags, Move_i1,
- OutString).
- % Ensures that the string is at least width.
- % in, in, in, out, in
-
-%
-% Change conversion character.
-%
-% Ideally the outer "->" symbols could be removed, the last case given
-% a guard, and the compiler accept this as det, rather than non-det.
-%
-:- pred string__format_mod_conv_char(maybe(int), string__poly_type, char,
- char, maybe(int)).
-:- mode string__format_mod_conv_char(in, in, in, out, out) is det.
-string__format_mod_conv_char(Precision0, Poly_var, Conversion_in,
- Conversion_out, Precision) :-
- ( Precision0 = yes(Prec0) ->
- Prec = Prec0
- ;
- Prec = 0
- ),
- ( Conversion_in = 'i' ->
- Conversion_out = 'd', % %d = %i
- Precision = Precision0
- ;
- Conversion_in = 'g' -> %g is either %e of %f
- (Poly_var = f(F) ->
- Ft = float__abs(F),
- int__pow(10, Prec, P),
- int__to_float(P, Pe),
- (
- Ft > 0.0001,
- Pe > Ft
- ->
- Conversion_out = 'f',
- Precision = yes(0)
- ;
- Conversion_out = 'e',
- Precision = Precision0
- )
- ;
- error("string__format: %g without a f(Float).")
- )
- ;
- Conversion_in = 'G' -> %G is either %E of %f
- (Poly_var = f(F) ->
- Ft = float__abs(F),
- int__pow(10, Prec, P),
- int__to_float(P, Pe),
- (
- Ft > 0.0001,
- Pe > Ft
- ->
- Conversion_out = 'f',
- Precision = yes(0)
- ;
- Conversion_out = 'E',
- Precision = Precision0
- )
- ;
- error("string__format: %G without a f(Float).")
+ % This predicate has been optimised to produce the least memory
+ % possible -- memory usage is a significant problem for programs
+ % which do a lot of formatted IO.
+string__format(FormatString, PolyList, String) :-
+ (
+ format_string(Specifiers, PolyList, [],
+ to_char_list(FormatString), [])
+ ->
+ String = string__append_list(
+ list__map(specifier_to_string, Specifiers))
+ ;
+ error("string__format: format string invalid.")
+ ).
+
+:- type specifier
+ ---> conv(
+ flags :: list(char),
+ width :: maybe(list(char)),
+ precision :: maybe(list(char)),
+ spec :: spec
)
- ;
- Conversion_out = Conversion_in,
- Precision = Precision0
- ).
-
-% This function glances at the input-modification flags, only applicable
-% with a more complicated type system
-%
-% Another function that would be better off as a switch.
-%
-:- pred string__format_do_mod_char(char, char, char).
-:- mode string__format_do_mod_char(in, in, out) is det.
-string__format_do_mod_char(Char_mods, C_in, C_out) :-
- (
- Char_mods = 'h'
- ->
- C_out = C_in
- ;
- Char_mods = 'l'
- ->
- C_out = C_in
- ;
- Char_mods = 'L'
- ->
- C_out = C_in
- ;
- C_out = C_in
- ).
+ ; string(list(char)).
-%
-% Change Width or Precision value, if '*' was spcified
-%
-:- pred string__format_read_star(list(string__poly_type), int, int,
- list(string__poly_type), maybe(int), maybe(int)).
-:- mode string__format_read_star(in, in, in, out, out, out) is semidet.
-string__format_read_star(Polys_in, Int_width, Int_precis, Polys_out,
- Width, Precision) :-
- (
- string__special_precision_and_width(Int_width)
- ->
- Polys_in = [ i(Width0) | Poly_temp],
- Width = yes(Width0)
- ;
- ( string__default_precision_and_width(Int_width) ->
- Width = no
- ;
- Width = yes(Int_width)
- ),
- Polys_in = Poly_temp
- ),
- (
- string__special_precision_and_width(Int_precis)
- ->
- Poly_temp = [ i(Precision0) | Polys_out],
- Precision = yes(Precision0)
- ;
- ( string__default_precision_and_width(Int_precis) ->
- Precision = no
+ %
+ % A format string is parsed into alternate sections.
+ % We alternate between the list of characters which don't
+ % represent a conversion specifier and those that do.
+ %
+:- pred format_string(list(specifier)::out,
+ list(string__poly_type)::in, list(string__poly_type)::out,
+ list(char)::in, list(char)::out) is det.
+
+format_string(Results, PolyTypes0, PolyTypes) -->
+ other(NonConversionSpecChars),
+ ( conversion_specification(ConversionSpec, PolyTypes0, PolyTypes1) ->
+ format_string(Results0, PolyTypes1, PolyTypes),
+ { Results = [string(NonConversionSpecChars),
+ ConversionSpec | Results0] }
+ ;
+ { Results = [string(NonConversionSpecChars)] },
+ { PolyTypes = PolyTypes0 }
+ ).
+
+ %
+ % Parse a string which doesn't contain any conversion
+ % specifications.
+ %
+:- pred other(list(char)::out, list(char)::in, list(char)::out) is det.
+
+other(Result) -->
+ ( [Char], { Char \= '%' } ->
+ other(Result0),
+ { Result = [Char | Result0] }
+ ;
+ { Result = [] }
+ ).
+
+ %
+ % Each conversion specification is introduced by the character
+ % '%', and ends with a conversion specifier. In between there
+ % may be (in this order) zero or more flags, an optional
+ % minimum field width, and an optional precision.
+ %
+:- pred conversion_specification(specifier::out,
+ list(string__poly_type)::in, list(string__poly_type)::out,
+ list(char)::in, list(char)::out) is semidet.
+
+conversion_specification(Specificier, PolyTypes0, PolyTypes) -->
+ ['%'],
+ flags(Flags),
+ optional(width, MaybeWidth, PolyTypes0, PolyTypes1),
+ optional(prec, MaybePrec, PolyTypes1, PolyTypes2),
+ ( spec(Spec, PolyTypes2, PolyTypes3) ->
+ { Specificier = conv(Flags, MaybeWidth, MaybePrec, Spec) },
+ { PolyTypes = PolyTypes3 }
+ ;
+ { error("string__format: invalid conversion specifier.") }
+ ).
+
+:- pred optional(pred(T, U, U, V, V), maybe(T), U, U, V, V).
+:- mode optional(pred(out, in, out, in, out) is semidet, out, in, out,
+ in, out) is det.
+
+optional(P, MaybeOutput, Init, Final) -->
+ ( P(Output, Init, Final0) ->
+ { MaybeOutput = yes(Output) },
+ { Final = Final0 }
+ ;
+ { MaybeOutput = no },
+ { Final = Init }
+ ).
+
+:- pred flags(list(char)::out, list(char)::in, list(char)::out) is semidet.
+
+flags(Result) -->
+ ( [Char], { flag(Char) } ->
+ flags(Result0),
+ { Result = [Char | Result0] }
+ ;
+ { Result = [] }
+ ).
+
+ %
+ % Is it a valid flag character?
+ %
+:- pred flag(char::in) is semidet.
+
+flag('#').
+flag('0').
+flag('-').
+flag(' ').
+flag('+').
+
+ %
+ % Do we have a minimum field width?
+ %
+:- pred width(list(char)::out,
+ list(string__poly_type)::in, list(string__poly_type)::out,
+ list(char)::in, list(char)::out) is semidet.
+
+width(Width, PolyTypes0, PolyTypes) -->
+ ( ['*'] ->
+ { PolyTypes0 = [i(Width0) | PolyTypes1] ->
+ % XXX maybe better done in C.
+ Width = to_char_list(int_to_string(Width0)),
+ PolyTypes = PolyTypes1
;
- Precision = yes(Int_precis)
- ),
- Polys_out = Poly_temp
- ).
-
-%
-% This function did the variable conversion to string.
-% Now done by string__do_conversion_0/6.
-%
-%
-% Mv_width records the length of the prefix in front of the number,
-% so that it is more easy to insert width and precision padding and
-% optional signs, in the correct place.
-%
-:- pred string__format_do_conversion(char, string__poly_type, maybe(int),
- list(char), int, string).
-:- mode string__format_do_conversion(in, in, in, in, out, out)
- is det.
-string__format_do_conversion(Conversion, Poly_t, Precision, Flags, Mv_width,
- Ostring) :-
- (
- string__do_conversion_0(Conversion, Poly_t, Ostring0,
- Precision, Flags, Mv_width0)
- ->
- Mv_width = Mv_width0,
- Ostring = Ostring0
- ;
- string__do_conversion_fail(Conversion)
- ).
-
-:- pred string__do_conversion_0(char, string__poly_type, string, maybe(int),
- list(char), int).
-:- mode string__do_conversion_0(in, in, out, in, in, out) is semidet.
-string__do_conversion_0(Conversion, Poly_t, Ostring, Precision, Flags,
- Mv_width) :-
- (
- Conversion = 'd',
- Poly_t = i(I),
- string__int_to_string(I, S),
- string__format_int_precision(S, Ostring, Precision, _),
- (
- I < 0
- ->
- Mv_width is 1
- ;
- Mv_width is 0
- )
- ;
- Conversion = 'o',
- Poly_t = i(I),
- ( I = 0 ->
- S = "0",
- string__format_int_precision(S, Ostring, Precision, _),
- Pfix_len = 0
- ;
- string__int_to_base_string(I, 8, S),
- string__format_int_precision(S, SS, Precision, _),
- ( list__member('#', Flags) ->
- string__first_char(Ostring, '0', SS),
- Pfix_len = 1
- ;
- Ostring = SS,
- Pfix_len = 0
- )
- ),
- ( I < 0 -> Mv_width is Pfix_len + 1 ; Mv_width is Pfix_len )
- ;
- Conversion = 'x' ,
- Poly_t = i(I),
- ( I = 0 ->
- SS = "0",
- Pfix_len = 0,
- string__format_int_precision(SS, Ostring, Precision, _)
- ;
- string__int_to_base_string(I, 16, S0),
- string__to_lower(S0, S),
- string__format_int_precision(S, SS, Precision, _),
- (
- list__member('#', Flags)
- ->
- string__append("0x", SS, Ostring),
- Pfix_len = 2
- ;
- Ostring = SS,
- Pfix_len = 0
- )
- ),
- ( I < 0 -> Mv_width is Pfix_len + 1 ; Mv_width is Pfix_len )
- ;
- Conversion = 'X',
- Poly_t = i(I),
- ( I = 0 ->
- SS = "0",
- Pfix_len = 0,
- string__format_int_precision(SS, Ostring, Precision, _)
- ;
- string__int_to_base_string(I, 16, Otemp),
- string__to_upper(Otemp, S),
- string__format_int_precision(S, SS, Precision, _),
- ( list__member('#', Flags) ->
- string__append("0X", SS, Ostring),
- Pfix_len = 2
- ;
- SS = Ostring,
- Pfix_len = 0
- )
- ),
- ( I < 0 -> Mv_width is Pfix_len + 1 ; Mv_width is Pfix_len )
- ;
- Conversion = 'u' ,
- Poly_t = i(I),
- int__abs(I, J),
- string__int_to_string(J, S),
- string__format_int_precision(S, Ostring, Precision, Mvt),
- Mv_width = Mvt
- ;
- Conversion = 'c' ,
- Poly_t = c(C),
- string__char_to_string(C, Ostring),
- Mv_width = 0
- ;
- Conversion = 's' ,
- Poly_t = s(S),
- ( Precision = yes(Prec) ->
- string__split(S, Prec, Ostring, _)
- ;
- S = Ostring
- ),
- Mv_width = 0
- ;
- Conversion = 'f' ,
- Poly_t = f(F),
- string__float_to_f_string(F, Fstring),
- string__format_calc_prec(Fstring, Ostring, Precision),
- (F < 0.0 -> Mv_width = 1 ; Mv_width = 0)
- ;
- Conversion = 'e',
- Poly_t = f(F),
- string__format_calc_exp(F, Ostring, Precision, 0),
- (F < 0.0 -> Mv_width = 1 ; Mv_width = 0)
- ;
- Conversion = 'E' ,
- Poly_t = f(F),
- string__format_calc_exp(F, Otemp, Precision, 0),
- string__to_upper(Otemp, Ostring),
- (F < 0.0 -> Mv_width = 1 ; Mv_width = 0)
- ;
- Conversion = 'p' ,
- Poly_t = i(I),
- string__int_to_string(I, Ostring),
- ((I < 0) -> Mv_width = 1 ; Mv_width = 0)
- ).
-
-:- pred string__do_conversion_fail(char).
-:- mode string__do_conversion_fail(in) is erroneous.
-string__do_conversion_fail(Conversion) :-
- string__format("%s `%%%c', without a correct poly-variable.",
- [s("string__format: statement has used type"), c(Conversion)],
- Error_message),
- error(Error_message).
-
-%
-% Use precision information to modify string. - for integers
-%
-:- pred string__format_int_precision(string, string, maybe(int), int).
-:- mode string__format_int_precision(in, out, in, out) is semidet.
-string__format_int_precision(S, Ostring, Precision, Added_width) :-
- ( Precision = yes(Prec0) ->
- Prec = Prec0
- ;
- Prec = 0
- ),
- string__length(S, L),
- ( string__first_char(S, '-', _) ->
- Xzeros is Prec - L + 1
+ error("string__format: `*' width modifer not associated with an integer.")
+ }
;
- Xzeros is Prec - L
- ),
- Added_width = Xzeros,
- ( Xzeros > 0 ->
- string__duplicate_char('0', Xzeros, Pfix),
- string__first_char(S, C, Rest),
- (
- C \= ('-'),
- C \= ('+')
- ->
- string__append(Pfix, S, Ostring)
+ =(Init),
+ non_zero_digit,
+ zero_or_more_occurences(digit),
+ =(Final),
+
+ { list__remove_suffix(Init, Final, Width) },
+ { PolyTypes = PolyTypes0 }
+ ).
+
+ %
+ % Do we have a precision?
+ %
+:- pred prec(list(char)::out,
+ list(string__poly_type)::in, list(string__poly_type)::out,
+ list(char)::in, list(char)::out) is semidet.
+
+prec(Prec, PolyTypes0, PolyTypes) -->
+ ['.'],
+ ( ['*'] ->
+ { PolyTypes0 = [i(Prec0) | PolyTypes1] ->
+ % XXX Best done in C
+ Prec = to_char_list(int_to_string(Prec0)),
+ PolyTypes = PolyTypes1
;
- string__append(Pfix, Rest, Temps),
- string__first_char(Ostring, C, Temps)
- )
+ error("string__format: `*' precision modifer not associated with an integer.")
+ }
;
- Ostring = S
- ).
+ =(Init),
+ digit,
+ zero_or_more_occurences(digit),
+ =(Final),
+
+ { list__remove_suffix(Init, Final, Prec) },
+ { PolyTypes = PolyTypes0 }
+ ).
+
+:- type spec
+ % valid integer specifiers
+ ---> d(int)
+ ; i(int)
+ ; o(int)
+ ; u(int)
+ ; x(int)
+ ; cX(int)
+ ; p(int)
+
+ % valid float specifiers
+ ; e(float)
+ ; cE(float)
+ ; f(float)
+ ; cF(float)
+ ; g(float)
+ ; cG(float)
+
+ % valid char specifiers
+ ; c(char)
+
+ % valid string specifiers
+ ; s(string)
+
+ % specifier representing "%%"
+ ; percent
+ .
+
+ %
+ % Do we have a valid conversion specifier?
+ % We check to ensure that the specifier also matches the type
+ % from the input list.
+ %
+:- pred spec(spec::out,
+ list(string__poly_type)::in, list(string__poly_type)::out,
+ list(char)::in, list(char)::out) is semidet.
+
+ % valid integer conversion specifiers
+spec(d(Int), [i(Int) | Ps], Ps) --> ['d'].
+spec(i(Int), [i(Int) | Ps], Ps) --> ['i'].
+spec(o(Int), [i(Int) | Ps], Ps) --> ['o'].
+spec(u(Int), [i(Int) | Ps], Ps) --> ['u'].
+spec(x(Int), [i(Int) | Ps], Ps) --> ['x'].
+spec(cX(Int), [i(Int) | Ps], Ps) --> ['X'].
+spec(p(Int), [i(Int) | Ps], Ps) --> ['p'].
+
+ % valid float conversion specifiers
+spec(e(Float), [f(Float) | Ps], Ps) --> ['e'].
+spec(cE(Float), [f(Float) | Ps], Ps) --> ['E'].
+spec(f(Float), [f(Float) | Ps], Ps) --> ['f'].
+spec(cF(Float), [f(Float) | Ps], Ps) --> ['F'].
+spec(g(Float), [f(Float) | Ps], Ps) --> ['g'].
+spec(cG(Float), [f(Float) | Ps], Ps) --> ['G'].
+
+ % valid char conversion specifiers
+spec(c(Char), [c(Char) | Ps], Ps) --> ['c'].
+
+ % valid string conversion specifiers
+spec(s(Str), [s(Str) | Ps], Ps) --> ['s'].
+
+ % conversion specifier representing the "%" sign
+spec(percent, Ps, Ps) --> ['%'].
+
+ % A digit in the range [1-9]
+:- pred non_zero_digit(list(char)::in, list(char)::out) is semidet.
+
+non_zero_digit -->
+ [ Char ],
+ { char__is_digit(Char) },
+ { Char \= '0' }.
+
+ % A digit in the range [0-9]
+:- pred digit(list(char)::in, list(char)::out) is semidet.
+
+digit -->
+ [ Char ],
+ { char__is_digit(Char) }.
+
+
+ % Zero or more occurences of the string parsed by the ho pred.
+:- pred zero_or_more_occurences(pred(list(T), list(T)), list(T), list(T)).
+:- mode zero_or_more_occurences(pred(in, out) is semidet, in, out) is det.
+
+zero_or_more_occurences(P) -->
+ ( P ->
+ zero_or_more_occurences(P)
+ ;
+ []
+ ).
+
+:- func specifier_to_string(specifier) = string.
+
+specifier_to_string(conv(Flags, Width, Prec, Spec)) = String :-
+ (
+ % valid int conversion specifiers
+ Spec = d(Int),
+ String = format_int(
+ format_string(Flags, Width, Prec, "ld"), Int)
+ ;
+ Spec = i(Int),
+ String = format_int(
+ format_string(Flags, Width, Prec, "li"), Int)
+ ;
+ Spec = o(Int),
+ String = format_int(
+ format_string(Flags, Width, Prec, "lo"), Int)
+ ;
+ Spec = u(Int),
+ String = format_int(
+ format_string(Flags, Width, Prec, "lu"), Int)
+ ;
+ Spec = x(Int),
+ String = format_int(
+ format_string(Flags, Width, Prec, "lx"), Int)
+ ;
+ Spec = cX(Int),
+ String = format_int(
+ format_string(Flags, Width, Prec, "lX"), Int)
+ ;
+ Spec = p(Int),
+ String = format_int(
+ format_string(Flags, Width, Prec, "lp"), Int)
+ ;
+ % valid float conversion specifiers
+ Spec = e(Float),
+ String = format_float(
+ format_string(Flags, Width, Prec, "Le"), Float)
+ ;
+ Spec = cE(Float),
+ String = format_float(
+ format_string(Flags, Width, Prec, "LE"), Float)
+ ;
+ Spec = f(Float),
+ String = format_float(
+ format_string(Flags, Width, Prec, "Lf"), Float)
+ ;
+ Spec = cF(Float),
+ String = format_float(
+ format_string(Flags, Width, Prec, "LF"), Float)
+ ;
+ Spec = g(Float),
+ String = format_float(
+ format_string(Flags, Width, Prec, "Lg"), Float)
+ ;
+ Spec = cG(Float),
+ String = format_float(
+ format_string(Flags, Width, Prec, "LG"), Float)
+ ;
+ % valid char conversion Specifiers
+ Spec = c(Char),
+ String = format_char(
+ format_string(Flags, Width, Prec, "c"), Char)
+ ;
+ % valid string conversion Spec = ifiers
+ Spec = s(Str),
+ String = format_string(
+ format_string(Flags, Width, Prec, "s"), Str)
+ ;
+ % conversion specifier representing the "%" sign
+ Spec = percent,
+ String = "%"
+ ).
+specifier_to_string(string(Chars)) = from_char_list(Chars).
+
+ % Construct a format string suitable to passing to sprintf.
+:- func format_string(list(char), maybe(list(char)),
+ maybe(list(char)), string) = string.
-% Function to calculate exponent for a %e conversion of a float
-%
-:- pred string__format_calc_exp(float, string, maybe(int), int).
-:- mode string__format_calc_exp(in, out, in, in) is det.
-string__format_calc_exp(F, Fstring, Precision, Exp) :-
- ( F < 0.0 ->
- Tf is 0.0 - F,
- string__format_calc_exp(Tf, Tst, Precision, Exp),
- string__first_char(Fstring, '-', Tst)
- ; F > 0.0, F < 1.0 ->
- Texp is Exp - 1,
- FF is 10.0 * F,
- string__format_calc_exp(FF, Fstring, Precision, Texp)
- ; F >= 10.0 ->
- Texp is Exp + 1,
- FF is F / 10.0,
- string__format_calc_exp(FF, Fstring, Precision, Texp)
- ;
- string__float_to_f_string(F, Fs),
- string__format_calc_prec(Fs, Fs2, Precision),
- string__int_to_string(Exp, Exps),
- ( Exp < 0 ->
- string__append("e", Exps, TFstring),
- string__append(Fs2, TFstring, Fstring)
- ;
- string__append("e+", Exps, TFstring),
- string__append(Fs2, TFstring, Fstring)
- )
- ).
-
-%
-% This precision output-modification predicate handles floats.
-%
-:- pred string__format_calc_prec(string, string, maybe(int)).
-:- mode string__format_calc_prec(in, out, in) is det.
-string__format_calc_prec(Istring0, Ostring, Precision) :-
- (
- Precision = yes(Prec0)
- ->
- Prec = Prec0
- ;
- Prec = 15
- ),
- (
- string__find_index(Istring0, '.', Index)
- ->
- TargetLength1 is Prec + Index,
- Istring1 = Istring0
- ;
- string__length(Istring0, TargetLength0),
- TargetLength1 is TargetLength0 + 1 + Prec,
- string__append(Istring0, ".", Istring1)
-
- % This branch should never be called if mercury is implemented
- % in ansi-C, according to Kernighan and Ritchie p244, as a
- % float converted to a string using sprintf should always have
- % a decimal point. (where specified precision != 0.
- % string__float_to_string doesn't specify a precision to be
- % used.)
- %
- % Unfortunately, this branch is called.
- % Often.
- ),
+format_string(Flags, MaybeWidth, MaybePrec, Spec) = String :-
(
- Prec = 0
- ->
- % Forget the '.'.
- TargetLength is TargetLength1 - 1
+ MaybeWidth = yes(Width)
;
- TargetLength = TargetLength1
+ MaybeWidth = no,
+ Width = []
),
(
- string__length(Istring1, Length),
- Length < TargetLength
- ->
- % Ensure that there are "enough" chars in Istring.
- string__duplicate_char('0', Prec, Suffix),
- string__append(Istring1, Suffix, Istring)
+ MaybePrec = yes(Prec0),
+ Prec = ['.' | Prec0]
;
- Istring = Istring1
+ MaybePrec = no,
+ Prec = []
),
- string__split(Istring, TargetLength, Ostring, _).
-
-% string__find_index is a funky little predicate to find the first
-% occurrence of a particular character in a string.
-
-:- pred string__find_index(string, char, int).
-:- mode string__find_index(in, in, out) is semidet.
-string__find_index(Str, C, Index) :-
- string__to_char_list(Str, List),
- string__find_index_2(List, C, Index).
-
-:- pred string__find_index_2(list(char), char, int).
-:- mode string__find_index_2(in, in, out) is semidet.
-string__find_index_2([], _C, _Index) :- fail.
-string__find_index_2([X|Xs], C, Index) :-
- (
- X = C
- ->
- Index = 1
- ;
- string__find_index_2(Xs, C, Index0),
- Index is Index0 + 1
- ).
-
-%string__find_index(A, Ch, Check, Ret) :-
-% (
-% string__length(A, Len),
-% Len < Check
-% ->
-% fail
-% ;
-% string__index(A, Check, Ch)
-% ->
-% Ret = Check
-% ;
-% Check2 is Check + 1,
-% string__find_index(A, Ch, Check2, Ret)
-% ).
-%
-
-% Add a '+' or ' ' sign, if it is needed in this output.
-%
-:- pred string__format_add_sign(string, list(char), string__poly_type,
- int, int, string).
-:- mode string__format_add_sign(in, in, in, in, out, out) is det.
-% Mvw is the prefix-length in front of the number.
-string__format_add_sign(Istring, Flags, _V, MoveWidth0, Movewidth, Ostring) :-
- MoveWidth1 is MoveWidth0 - 1,
- (
- string__index(Istring, MoveWidth1, '-')
- ->
- Ostring = Istring,
- Movewidth = MoveWidth0
- ;
- string__split(Istring, MoveWidth0, Lstring, Rstring),
- (
- list__member(('+'), Flags)
- ->
- string__append("+", Rstring, Astring),
- string__append(Lstring, Astring, Ostring),
- Movewidth is MoveWidth0 + 1
- ;
- list__member(' ', Flags)
- ->
- string__append(" ", Rstring, Astring),
- string__append(Lstring, Astring, Ostring),
- Movewidth is MoveWidth0 + 1
- ;
- Ostring = Istring,
- Movewidth = MoveWidth0
- )
- ).
-
-%
-% This function pads some characters to the left or right of a string that is
-% shorter than it's width.
-%
-:- pred string__format_pad_width(string, maybe(int), list(char), int, string).
-:- mode string__format_pad_width(in, in, in, in, out) is det.
-% (String in, width, flags, #Moveables, Output string).
-string__format_pad_width(Istring, Width0, Flags, Mv_cs, Out_string) :-
- string__length(Istring, Len),
- (
- Width0 = yes(Width),
- Len < Width
- ->
- % time for some FLAG tests
- Xspace is Width - Len,
- (
- list__member('0', Flags)
- ->
- Pad_char = '0'
- ;
- Pad_char = ' '
- ),
- string__duplicate_char(Pad_char, Xspace, Pad_string),
- (
- list__member('-', Flags)
- ->
- string__append(Istring, Pad_string, Out_string)
- ;
- list__member('0', Flags)
- ->
- string__split(Istring, Mv_cs, B4, After),
- string__append(Pad_string, After, Astring),
- string__append(B4, Astring, Out_string)
- ;
- string__append(Pad_string, Istring, Out_string)
- )
- ;
- Out_string = Istring
- ).
-
-:- pred string__format_get_optional_args(list(char), list(char), int, int,
- char).
-:- mode string__format_get_optional_args(in, out, out, out, out) is det.
-% string__format_get_optional_args(format info, flags, width, precision, modifier)
-% format is assumed to be in ANSI C format.
-% p243-4 of Kernighan & Ritchie 2nd Ed. 1988
-% "Parse" format informtion.
-%
-% A function to do some basic parsing on the optional printf arguments.
-%
-% The ites make this det. It would be nicer to see a det switch on A, but the
-% determinism checker does not `see' the equity tests that are hidden one layer
-% further down.
-%
-string__format_get_optional_args([], Flags, Width, Precision, Mods) :-
- Flags = [],
- Width = 0,
- string__default_precision_and_width(Precision),
- Mods = ' '.
-string__format_get_optional_args([A|As], Flags, Width, Precision, Mods) :-
- (
- (A = (-) ; A = (+) ; A = ' ' ; A = '0' ; A = '#' )
- ->
- string__format_get_optional_args(As, Oflags, Width, Precision,
- Mods),
- UFlags = [A | Oflags],
- list__sort_and_remove_dups(UFlags, Flags)
- ;
- (
- ( A = (.) ; A = '1' ; A = '2' ; A = '3' ; A = '4' ;
- A = '5' ; A = '6' ; A = '7' ; A = '8' ; A = '9' )
- ->
- string__format_string_to_ints([A|As], Bs, Numl1, Numl2, yes),
- string__format_int_from_char_list(Numl1, Width),
- string__format_int_from_char_list(Numl2, Prec),
- string__format_get_optional_args(Bs, Flags, _, Ptemp, Mods),
- (Numl2 = [] ->
- Precision = Ptemp
- ;
- Precision = Prec
- )
- ;
- ( ( A = 'h' ; A = 'l' ; A = 'L' )
- ->
- Mods = A,
- string__format_get_optional_args(As, Flags, Width,
- Precision, _)
- ;
- ( A = ('*')
- ->
- string__format_get_optional_args(As, Flags, W, P, Mods),
- (
- As = [(.)|_]
- ->
- Precision = P,
- string__special_precision_and_width(Width)
- ;
- Width = W,
- string__special_precision_and_width(Precision)
- )
-% (
-% string__default_precision_and_width(P)
-% ->
-% string__special_precision_and_width(Precision)
-% ;
-% Precision = P
-% ),
-% string__special_precision_and_width(Width)
- ;
- error("string__format: Unrecognised formatting information\n")
- )
- ))) .
-
-:- pred string__format_takewhile1(list(char), list(char), list(char)).
-:- mode string__format_takewhile1(in, out, out) is det.
-% string__format_takewhile(formatted string in, out, format info).
-% A HACK. Would be much nicer with a proper string__takewhile.
-% Looses the format info from the front of the first argument,
-% puts this in the last argument, while the second is the
-% remainder of the string.
-%
-% XXXXXX
-%
-string__format_takewhile1([], [], []).
-string__format_takewhile1([A|As], Rem, Finf) :-
- (
- ( A = 'd' ; A = 'i' ; A = 'o' ; A = 'x' ; A = 'X' ; A = 'u' ;
- A = 's' ; A = '%' ; A = 'c' ; A = 'f' ; A = 'e' ; A = 'E' ;
- A = 'g' ; A = 'G' ; A = 'p')
- ->
- Rem = [A|As],
- Finf = []
- ;
- string__format_takewhile1(As, Rem, F),
- Finf = [A|F]
- ).
-
-:- pred string__format_string_to_ints(list(char), list(char), list(char),
- list(char), bool).
-:- mode string__format_string_to_ints(in, out, out, out, in) is det.
-% (String in, out, Number1, Number2, seen '.' yet?)
-% Takes in a char list and splits off the rational number at the
-% start of the list. This is split into 2 parts - an int and a
-% fraction.
-%
-string__format_string_to_ints([], [], [], [], _).
-string__format_string_to_ints([A|As], Bs, Int1, Int2, Bool) :-
- (char__is_digit(A) ->
- ( Bool = yes ->
- string__format_string_to_ints(As, Bs, I1, Int2, yes),
- Int1 = [A|I1]
- ;
- string__format_string_to_ints(As, Bs, Int1, I2, no),
- Int2 = [A|I2]
- )
- ;
- ( A = ('.') ->
- string__format_string_to_ints(As, Bs, Int1, Int2, no)
- ;
- Bs = [A|As],
- Int1 = [],
- Int2 = []
- )
- ).
+ String = string__append_list(["%", from_char_list(Flags),
+ from_char_list(Width),
+ from_char_list(Prec), Spec]).
+
+ % Create a string from a float using the format string.
+ % Note is is the responsibility of the caller to ensure that the
+ % format string is valid.
+:- func format_float(string, float) = string.
+:- pragma c_code(format_float(FormatStr::in, Val::in) = (Str::out),
+ [will_not_call_mercury, thread_safe], "{
+ Str = MR_make_string(MR_PROC_LABEL, FormatStr, (long double) Val);
+}").
-:- pred string__format_int_from_char_list(list(char), int).
-:- mode string__format_int_from_char_list(in, out) is det.
-% Convert a char_list to an int
-%
-string__format_int_from_char_list([], 0).
-string__format_int_from_char_list([L|Ls], I) :-
- (
- string__from_char_list([L|Ls], S),
- string__to_int(S, I_0)
- ->
- I = I_0
- ;
- I = 0
- ).
+ % Create a string from a int using the format string.
+ % Note is is the responsibility of the caller to ensure that the
+ % format string is valid.
+:- func format_int(string, int) = string.
+:- pragma c_code(format_int(FormatStr::in, Val::in) = (Str::out),
+ [will_not_call_mercury, thread_safe], "{
+ Str = MR_make_string(MR_PROC_LABEL, FormatStr, (long) Val);
+}").
-:- pred string__default_precision_and_width(int).
-:- mode string__default_precision_and_width(out) is det.
-string__default_precision_and_width(-15).
+ % Create a string from a string using the format string.
+ % Note is is the responsibility of the caller to ensure that the
+ % format string is valid.
+:- func format_string(string, string) = string.
+:- pragma c_code(format_string(FormatStr::in, Val::in) = (Str::out),
+ [will_not_call_mercury, thread_safe], "{
+ Str = MR_make_string(MR_PROC_LABEL, FormatStr, Val);
+}").
-:- pred string__special_precision_and_width(int).
-:- mode string__special_precision_and_width(out) is det.
-string__special_precision_and_width(-1).
+ % Create a string from a char using the format string.
+ % Note is is the responsibility of the caller to ensure that the
+ % format string is valid.
+:- func format_char(string, char) = string.
+:- pragma c_code(format_char(FormatStr::in, Val::in) = (Str::out),
+ [will_not_call_mercury, thread_safe], "{
+ Str = MR_make_string(MR_PROC_LABEL, FormatStr, Val);
+}").
%-----------------------------------------------------------------------------%
@@ -1568,12 +1283,8 @@
:- pragma c_code(string__float_to_string(FloatVal::in, FloatString::out),
[will_not_call_mercury, thread_safe], "{
char buf[500];
- MR_Word tmp;
sprintf(buf, ""%#.15g"", FloatVal);
- incr_hp_atomic_msg(tmp,
- (strlen(buf) + sizeof(MR_Word)) / sizeof(MR_Word),
- MR_PROC_LABEL, ""string:string/0"");
- FloatString = (MR_String) tmp;
+ MR_allocate_aligned_string_msg(FloatString, strlen(buf), MR_PROC_LABEL);
strcpy(FloatString, buf);
}").
@@ -1585,12 +1296,8 @@
:- pragma c_code(string__float_to_f_string(FloatVal::in, FloatString::out),
[will_not_call_mercury, thread_safe], "{
char buf[500];
- MR_Word tmp;
sprintf(buf, ""%.15f"", FloatVal);
- incr_hp_atomic_msg(tmp,
- (strlen(buf) + sizeof(MR_Word)) / sizeof(MR_Word),
- MR_PROC_LABEL, ""string:string/0"");
- FloatString = (MR_String) tmp;
+ MR_allocate_aligned_string_msg(FloatString, strlen(buf), MR_PROC_LABEL);
strcpy(FloatString, buf);
}").
@@ -1643,9 +1350,8 @@
** allocate (length + 1) bytes of heap space for string
** i.e. (length + 1 + sizeof(MR_Word) - 1) / sizeof(MR_Word) words
*/
- incr_hp_atomic_msg(str_ptr, size / sizeof(MR_Word),
- MR_PROC_LABEL, ""string:string/0"");
- Str = (MR_String) str_ptr;
+ MR_allocate_aligned_string_msg(Str, size, MR_PROC_LABEL);
+
/*
** loop to copy the characters from the int_list to the string
*/
@@ -1733,7 +1439,6 @@
*/
:- pragma c_code(string__append(S1::in, S2::out, S3::in),
[will_not_call_mercury, thread_safe], "{
- MR_Word tmp;
size_t len_1, len_2, len_3;
len_1 = strlen(S1);
@@ -1746,10 +1451,7 @@
** We need to make a copy to ensure that the pointer is
** word-aligned.
*/
- incr_hp_atomic_msg(tmp,
- (len_2 + sizeof(MR_Word)) / sizeof(MR_Word),
- MR_PROC_LABEL, ""string:string/0"");
- S2 = (MR_String) tmp;
+ MR_allocate_aligned_string_msg(S2, len_2, MR_PROC_LABEL);
strcpy(S2, S3 + len_1);
SUCCESS_INDICATOR = TRUE;
}
@@ -1761,13 +1463,9 @@
:- pragma c_code(string__append(S1::in, S2::in, S3::out),
[will_not_call_mercury, thread_safe], "{
size_t len_1, len_2;
- MR_Word tmp;
len_1 = strlen(S1);
len_2 = strlen(S2);
- incr_hp_atomic_msg(tmp,
- (len_1 + len_2 + sizeof(MR_Word)) / sizeof(MR_Word),
- MR_PROC_LABEL, ""string:string/0"");
- S3 = (MR_String) tmp;
+ MR_allocate_aligned_string_msg(S3, len_1 + len_2, MR_PROC_LABEL);
strcpy(S3, S1);
strcpy(S3 + len_1, S2);
}").
@@ -1788,19 +1486,12 @@
LOCALS->count++;
"),
common_code("
- MR_Word temp;
-
- incr_hp_atomic_msg(temp,
- (LOCALS->count + sizeof(MR_Word)) / sizeof(MR_Word),
- MR_PROC_LABEL, ""string:string/0"");
- S1 = (MR_String) temp;
+ MR_allocate_aligned_string_msg(S1, LOCALS->count,
+ MR_PROC_LABEL);
memcpy(S1, LOCALS->s, LOCALS->count);
S1[LOCALS->count] = '\\0';
- incr_hp_atomic_msg(temp,
- (LOCALS->len - LOCALS->count + sizeof(MR_Word))
- / sizeof(MR_Word),
- MR_PROC_LABEL, ""string:string/0"");
- S2 = (MR_String) temp;
+ MR_allocate_aligned_string_msg(S2, LOCALS->len - LOCALS->count,
+ MR_PROC_LABEL);
strcpy(S2, LOCALS->s + LOCALS->count);
if (LOCALS->count < LOCALS->len) {
@@ -1833,10 +1524,7 @@
len = strlen(Str);
if (Start > len) Start = len;
if (Count > len - Start) Count = len - Start;
- incr_hp_atomic_msg(tmp,
- (Count + sizeof(MR_Word)) / sizeof(MR_Word),
- MR_PROC_LABEL, ""string:string/0"");
- SubString = (MR_String) tmp;
+ MR_allocate_aligned_string_msg(SubString, Count, MR_PROC_LABEL);
memcpy(SubString, Str + Start, Count);
SubString[Count] = '\\0';
}
@@ -1854,10 +1542,7 @@
[will_not_call_mercury, thread_safe],
"{
MR_Integer len;
- MR_Word tmp;
- incr_hp_atomic_msg(tmp, (Count + sizeof(MR_Word)) / sizeof(MR_Word),
- MR_PROC_LABEL, ""string:string/0"");
- SubString = (MR_String) tmp;
+ MR_allocate_aligned_string_msg(SubString, Count, MR_PROC_LABEL);
memcpy(SubString, Str + Start, Count);
SubString[Count] = '\\0';
}").
@@ -1883,20 +1568,15 @@
} else {
len = strlen(Str);
if (Count > len) Count = len;
- incr_hp_atomic_msg(tmp,
- (Count + sizeof(MR_Word)) / sizeof(MR_Word),
- MR_PROC_LABEL, ""string:string/0"");
- Left = (MR_String) tmp;
+ MR_allocate_aligned_string_msg(Left, Count, MR_PROC_LABEL);
memcpy(Left, Str, Count);
Left[Count] = '\\0';
/*
** We need to make a copy to ensure that the pointer is
** word-aligned.
*/
- incr_hp_atomic_msg(tmp,
- (len - Count + sizeof(MR_Word)) / sizeof(MR_Word),
- MR_PROC_LABEL, ""string:string/0"");
- Right = (MR_String) tmp;
+ MR_allocate_aligned_string_msg(Right, len - Count,
+ MR_PROC_LABEL);
strcpy(Right, Str + Count);
}
}").
@@ -1941,7 +1621,6 @@
*/
:- pragma c_code(string__first_char(Str::in, First::in, Rest::out),
[will_not_call_mercury, thread_safe], "{
- MR_Word tmp;
if (Str[0] != First || First == '\\0') {
SUCCESS_INDICATOR = FALSE;
} else {
@@ -1950,10 +1629,8 @@
** We need to make a copy to ensure that the pointer is
** word-aligned.
*/
- incr_hp_atomic_msg(tmp,
- (strlen(Str) + sizeof(MR_Word)) / sizeof(MR_Word),
- MR_PROC_LABEL, ""string:string/0"");
- Rest = (MR_String) tmp;
+ MR_allocate_aligned_string_msg(Rest, strlen(Str),
+ MR_PROC_LABEL);
strcpy(Rest, Str);
SUCCESS_INDICATOR = TRUE;
}
@@ -1964,7 +1641,6 @@
*/
:- pragma c_code(string__first_char(Str::in, First::out, Rest::out),
[will_not_call_mercury, thread_safe], "{
- MR_Word tmp;
First = Str[0];
if (First == '\\0') {
SUCCESS_INDICATOR = FALSE;
@@ -1974,10 +1650,8 @@
** We need to make a copy to ensure that the pointer is
** word-aligned.
*/
- incr_hp_atomic_msg(tmp,
- (strlen(Str) + sizeof(MR_Word)) / sizeof(MR_Word),
- MR_PROC_LABEL, ""string:string/0"");
- Rest = (MR_String) tmp;
+ MR_allocate_aligned_string_msg(Rest, strlen(Str),
+ MR_PROC_LABEL);
strcpy(Rest, Str);
SUCCESS_INDICATOR = TRUE;
}
@@ -1989,10 +1663,7 @@
:- pragma c_code(string__first_char(Str::out, First::in, Rest::in),
[will_not_call_mercury, thread_safe], "{
size_t len = strlen(Rest) + 1;
- MR_Word tmp;
- incr_hp_atomic_msg(tmp, (len + sizeof(MR_Word)) / sizeof(MR_Word),
- MR_PROC_LABEL, ""string:string/0"");
- Str = (MR_String) tmp;
+ MR_allocate_aligned_string_msg(Str, len, MR_PROC_LABEL);
Str[0] = First;
strcpy(Str + 1, Rest);
}").
@@ -2135,9 +1806,6 @@
string__unsafe_substring(S1, N1, N2) = S2 :-
string__unsafe_substring(S1, N1, N2, S2).
-
-string__append_list(S1s) = S2 :-
- string__append_list(S1s, S2).
string__hash(S) = N :-
string__hash(S, N).
Index: runtime/Mmakefile
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/Mmakefile,v
retrieving revision 1.59
diff -u -r1.59 Mmakefile
--- runtime/Mmakefile 2000/07/22 11:23:21 1.59
+++ runtime/Mmakefile 2000/08/09 14:46:53
@@ -140,6 +140,7 @@
mercury_signal.c \
mercury_stack_trace.c \
mercury_stacks.c \
+ mercury_string.c \
mercury_tabling.c \
mercury_thread.c \
mercury_timing.c \
Index: runtime/mercury_conf.h.in
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_conf.h.in,v
retrieving revision 1.28
diff -u -r1.28 mercury_conf.h.in
--- runtime/mercury_conf.h.in 2000/08/03 06:18:35 1.28
+++ runtime/mercury_conf.h.in 2000/08/09 14:46:53
@@ -139,6 +139,7 @@
** The following macros are defined iff the corresponding function or
** system call is available:
**
+** HAVE_VSNPRINTF we have the vsnprintf() function.
** HAVE_SYSCONF we have the sysconf() system call.
** HAVE_SIGACTION we have the sigaction() system call.
** HAVE_GETPAGESIZE we have the getpagesize() system call.
@@ -158,6 +159,7 @@
** rather than a function, so you should use
** #if defined(fileno) || defined(HAVE_FILENO)
*/
+#undef HAVE_VSNPRINTF
#undef HAVE_SYSCONF
#undef HAVE_SIGACTION
#undef HAVE_GETPAGESIZE
Index: runtime/mercury_string.c
===================================================================
RCS file: mercury_string.c
diff -N mercury_string.c
--- /dev/null Sat Aug 7 21:45:41 1999
+++ mercury_string.c Thu Aug 10 00:46:53 2000
@@ -0,0 +1,59 @@
+/*
+** Copyright (C) 2000 The University of Melbourne.
+** This file may only be copied under the terms of the GNU Library General
+** Public License - see the file COPYING.LIB in the Mercury distribution.
+*/
+
+/* mercury_string.c - string handling */
+
+#include "mercury_imp.h"
+#include "mercury_string.h"
+
+MR_String
+MR_make_string(MR_Code *proclabel, const char *fmt, ...) {
+ va_list ap;
+ MR_String result;
+ int n;
+
+#ifdef HAVE_VSNPRINTF
+ /* Guess that 100 bytes should be sufficient */
+ int size = 100;
+ char *p;
+
+ p = MR_NEW_ARRAY(char, size);
+
+ while (1) {
+ /* Try to print in the allocated space. */
+ va_start(ap, fmt);
+ n = vsnprintf(p, size, fmt, ap);
+ va_end(ap);
+
+ /* If that worked, return the string. */
+ if (n > -1 && n < size)
+ continue;
+
+ /* Else try again with more space. */
+ if (n > -1) /* glibc 2.1 */
+ /* precisely what is needed */
+ size = n+1;
+ else /* glibc 2.0 */
+ size *= 2; /* twice the old size */
+
+ MR_RESIZE_ARRAY(p, char, size);
+ }
+#else
+ /* It is possible for this buffer to overflow */
+ /* and then bad things may happen */
+ char p[40960];
+
+ va_start(ap, fmt);
+ n = vsprintf(p, fmt, ap);
+ va_end(ap);
+#endif
+
+ MR_allocate_aligned_string_msg(result, strlen(p),
+ proclabel);
+ strcpy((char *) result, p);
+ MR_free(p);
+ return result;
+}
Index: runtime/mercury_string.h
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_string.h,v
retrieving revision 1.16
diff -u -r1.16 mercury_string.h
--- runtime/mercury_string.h 2000/08/03 06:18:57 1.16
+++ runtime/mercury_string.h 2000/08/09 14:46:54
@@ -10,6 +10,7 @@
#define MERCURY_STRING_H
#include <string.h> /* for strcmp() etc. */
+#include <stdarg.h>
#include "mercury_heap.h" /* for incr_hp_atomic */
@@ -63,7 +64,7 @@
*/
#define MR_make_aligned_string(ptr, string) \
do { \
- if (MR_tag((MR_Word) (string)) != 0) { \
+ if (MR_tag((MR_Word) (string)) != 0) { \
MR_make_aligned_string_copy((ptr), (string)); \
} else { \
(ptr) = (string); \
@@ -82,17 +83,42 @@
*/
#define MR_make_aligned_string_copy(ptr, string) \
do { \
- MR_Word make_aligned_string_tmp; \
+ MR_Word make_aligned_string_tmp; \
char * make_aligned_string_ptr; \
\
incr_hp_atomic(make_aligned_string_tmp, \
- (strlen(string) + sizeof(MR_Word)) / sizeof(MR_Word)); \
+ (strlen(string) + sizeof(MR_Word)) / sizeof(MR_Word)); \
make_aligned_string_ptr = \
(char *) make_aligned_string_tmp; \
strcpy(make_aligned_string_ptr, (string)); \
(ptr) = make_aligned_string_ptr; \
} while(0)
+
+/* void MR_allocate_aligned_string_msg(MR_ConstString &ptr, size_t len,
+** Code *proclabel, const char *type);
+** Allocate enough word aligned memory to hold len characters. Also
+** record for memory profiling purposes the location, proclabel, of the
+** allocation if profiling is enabled.
+**
+** BEWARE: this may modify `hp', so it must only be called from
+** places where `hp' is valid. If calling it from inside a C function,
+** rather than inside Mercury code, you may need to call
+** save/restore_transient_hp().
+*/
+#define MR_allocate_aligned_string_msg(ptr, len, proclabel) \
+ do { \
+ MR_Word make_aligned_string_tmp; \
+ char * make_aligned_string_ptr; \
+ \
+ incr_hp_atomic_msg(make_aligned_string_tmp, \
+ ((len) + sizeof(MR_Word)) / sizeof(MR_Word), \
+ proclabel, "string:string/0"); \
+ make_aligned_string_ptr = \
+ (char *) make_aligned_string_tmp; \
+ (ptr) = (MR_String) make_aligned_string_ptr; \
+ } while(0)
+
/*
** do_hash_string(int & hash, MR_Word string):
** Given a Mercury string `string', set `hash' to the hash value
@@ -141,5 +167,12 @@
int hash_string_result; \
MR_do_hash_string(hash_string_result, s); \
return hash_string_result;
+
+/*
+** Return an MR_String which has been created using the format string,
+** fmt, passed to sprintf. If memory profiling is turned on, record the
+** allocation as coming from proclabel.
+*/
+MR_String MR_make_string(MR_Code *proclabel, const char *fmt, ...);
#endif /* not MERCURY_STRING_H */
--------------------------------------------------------------------------
mercury-developers mailing list
Post messages to: mercury-developers at cs.mu.oz.au
Administrative Queries: owner-mercury-developers at cs.mu.oz.au
Subscriptions: mercury-developers-request at cs.mu.oz.au
--------------------------------------------------------------------------
More information about the developers
mailing list