diff: char & string additions

Fergus Henderson fjh at cs.mu.oz.au
Fri May 16 20:43:07 AEST 1997


Tom, can you please review this one?

Export a few new predicates from the library, after moving the around
and renaming them, since they seem to be quite useful.

library/char.m:
	Add char__digit_to_int, char__int_to_digit, and char__det_int_to_digit.
	(These are renamed and exported versions of the old
	predicates string__char_to_digit and string__det_char_to_digit
	from `string.m'.)

library/string.m:
	Add string__from_rev_char_list.
	(This is a renamed version of lexer__rev_char_list_to_string.)
	Use char__int_to_digit etc.

library/lexer.m:
	Use string__from_rev_char_list.

cvs diff: Diffing .
Index: char.m
===================================================================
RCS file: /home/staff/zs/imp/mercury/library/char.m,v
retrieving revision 1.19
diff -u -r1.19 char.m
--- char.m	1997/02/19 05:32:55	1.19
+++ char.m	1997/05/16 10:03:27
@@ -104,10 +104,30 @@
 :- mode char__is_hex_digit(in) is semidet.
 	% True iff the character is a hexadecimal digit (0-9, a-f, A-F).
 
+:- pred char__digit_to_int(char, int).
+:- mode char__digit_to_int(in, out) is semidet.
+	% Succeeds if char is a decimal digit (0-9) or letter (a-z or A-Z).
+	% Returns the character's value as a digit (0-9 or 10-35).
+
+:- pred char__int_to_digit(int, char).
+:- mode char__int_to_digit(in, out) is semidet.
+:- mode char__int_to_digit(out, in) is semidet.
+	% char__int_to_uppercase_digit(Int, DigitChar):
+	% True iff `Int' is an integer in the range 0-35 and
+	% `DigitChar' is a decimal digit or uppercase letter
+	% whose value as a digit is `Int'.
+
+:- pred char__det_int_to_digit(int, char).
+:- mode char__det_int_to_digit(in, out) is det.
+	% Returns a decimal digit or uppercase letter corresponding to the
+	% value.
+	% Calls error/1 if the integer is not in the range 0-35.
+
 %-----------------------------------------------------------------------------%
 %-----------------------------------------------------------------------------%
 
 :- implementation.
+:- import_module require.
 
 char__is_whitespace(' ').
 char__is_whitespace('\t').
@@ -299,6 +319,59 @@
 char__is_hex_digit('D').
 char__is_hex_digit('E').
 char__is_hex_digit('F').
+
+%-----------------------------------------------------------------------------%
+
+char__det_int_to_digit(Int, Digit) :-
+	( char__int_to_digit(Int, Digit1) ->
+		Digit = Digit1
+	;
+		error("char__int_to_digit failed")
+	).
+
+char__int_to_digit(0, '0').
+char__int_to_digit(1, '1').
+char__int_to_digit(2, '2').
+char__int_to_digit(3, '3').
+char__int_to_digit(4, '4').
+char__int_to_digit(5, '5').
+char__int_to_digit(6, '6').
+char__int_to_digit(7, '7').
+char__int_to_digit(8, '8').
+char__int_to_digit(9, '9').
+char__int_to_digit(10, 'A').
+char__int_to_digit(11, 'B').
+char__int_to_digit(12, 'C').
+char__int_to_digit(13, 'D').
+char__int_to_digit(14, 'E').
+char__int_to_digit(15, 'F').
+char__int_to_digit(16, 'G').
+char__int_to_digit(17, 'H').
+char__int_to_digit(18, 'I').
+char__int_to_digit(19, 'J').
+char__int_to_digit(20, 'K').
+char__int_to_digit(21, 'L').
+char__int_to_digit(22, 'M').
+char__int_to_digit(23, 'N').
+char__int_to_digit(24, 'O').
+char__int_to_digit(25, 'P').
+char__int_to_digit(26, 'Q').
+char__int_to_digit(27, 'R').
+char__int_to_digit(28, 'S').
+char__int_to_digit(29, 'T').
+char__int_to_digit(30, 'U').
+char__int_to_digit(31, 'V').
+char__int_to_digit(32, 'W').
+char__int_to_digit(33, 'X').
+char__int_to_digit(34, 'Y').
+char__int_to_digit(35, 'Z').
+
+char__digit_to_int(Digit, Int) :-
+	( char__lower_upper(Digit, Upper) ->
+		char__int_to_digit(Int, Upper)
+	;
+		char__int_to_digit(Int, Digit)
+	).
 
 %-----------------------------------------------------------------------------%
 
cvs diff: lazy_list.m is a new entry, no comparison available
Index: lexer.m
===================================================================
RCS file: /home/staff/zs/imp/mercury/library/lexer.m,v
retrieving revision 1.22
diff -u -r1.22 lexer.m
--- lexer.m	1997/04/30 10:37:49	1.22
+++ lexer.m	1997/05/16 10:12:44
@@ -987,52 +987,7 @@
 :- pred lexer__rev_char_list_to_string(list(character), string).
 :- mode lexer__rev_char_list_to_string(in, out) is det.
 
-% lexer__rev_char_list_to_string was originally implemented like this:
-%
-%lexer__rev_char_list_to_string(RevChars, String) :-
-%       list__reverse(RevChars, Chars),
-%       string__from_char_list(Chars, String).
-%
-% The optimized implementation in C below is there for efficiency since
-% it improves the overall speed of parsing by about 7%.
-
-:- pragma(c_code, lexer__rev_char_list_to_string(Chars::in, Str::out), "
-{
-	Word list_ptr;
-	Word size, len;
-	Word str_ptr;
-/*
-** loop to calculate list length + sizeof(Word) in `size' using list in
-** `list_ptr' and separately count the length of the string
-*/
-	size = sizeof(Word);
-	len = 1;
-	list_ptr = Chars;
-	while (!list_is_empty(list_ptr)) {
-		size++;
-		len++;
-		list_ptr = list_tail(list_ptr);
-	}
-/*
-** allocate (length + 1) bytes of heap space for string
-** i.e. (length + 1 + sizeof(Word) - 1) / sizeof(Word) words
-*/
-	incr_hp_atomic(str_ptr, size / sizeof(Word));
-	Str = (char *) str_ptr;
-/*
-** set size to be the offset of the end of the string
-** (ie the \\0) and null terminate the string.
-*/
-	Str[--len] = '\\0';
-/*
-** loop to copy the characters from the list_ptr to the string
-** in reverse order.
-*/
-	list_ptr = Chars;
-	while (!list_is_empty(list_ptr)) {
-		Str[--len] = (char) list_head(list_ptr);
-		list_ptr = list_tail(list_ptr);
-	}
-}").
+lexer__rev_char_list_to_string(RevChars, String) :-
+       string__from_rev_char_list(RevChars, String).
 
 %-----------------------------------------------------------------------------%
cvs diff: mostly_uniq_array.m is a new entry, no comparison available
Index: string.m
===================================================================
RCS file: /home/staff/zs/imp/mercury/library/string.m,v
retrieving revision 1.88
diff -u -r1.88 string.m
--- string.m	1997/02/19 05:33:01	1.88
+++ string.m	1997/05/16 10:11:30
@@ -117,6 +117,11 @@
 	% XXX second mode should be det too
 	% (but this turns out to be tricky to implement)
 
+:- pred string__from_rev_char_list(list(character), string).
+:- mode string__from_rev_char_list(in, out) is det.
+%	Same as string__from_char_list, except that it reverses the order
+%	of the characters.
+
 :- pred string__to_int(string, int).
 :- mode string__to_int(in, out) is semidet.
 % 	Convert a string to an int.  The string must contain only digits,
@@ -412,12 +417,11 @@
 :- mode string__base_string_to_int_2(in, in, in, out) is semidet.
 
 string__base_string_to_int_2(Base, String, Int0, Int) :-
-	( string__first_char(String, Char, String1) ->
-		char__to_upper(Char, UpperChar),
-		string__digit_to_char(Digit, UpperChar),
-		Digit < Base,
+	( string__first_char(String, DigitChar, String1) ->
+		char__digit_to_int(DigitChar, DigitValue),
+		DigitValue < Base,
 		Int1 is Base * Int0,
-		Int2 is Int1 + Digit,
+		Int2 is Int1 + DigitValue,
 		string__base_string_to_int_2(Base, String1, Int2, Int) 
 	;
 		Int = Int0
@@ -490,70 +494,17 @@
 	(
 		N < Base
 	->
-		string__digit_to_char_det(N, DigitChar),
+		char__det_int_to_digit(N, DigitChar),
 		string__char_to_string(DigitChar, Str)
 	;
 		N10 is N mod Base,
 		N1 is N // Base,
-		string__digit_to_char_det(N10, DigitChar),
+		char__det_int_to_digit(N10, DigitChar),
 		string__char_to_string(DigitChar, DigitString),
 		string__int_to_base_string_2(N1, Base, Str1),
 		string__append(Str1, DigitString, Str)
 	).
 
-:- pred string__digit_to_char_det(int, character).
-:- mode string__digit_to_char_det(in, out) is det.
-
-string__digit_to_char_det(Digit, Char) :-
-	( string__digit_to_char(Digit, Char0) ->
-		Char = Char0
-	;
-		error("string__digit_to_char failed")
-	).
-
-% Simple-minded, but extremely portable.
-
-:- pred string__digit_to_char(int, character).
-:- mode string__digit_to_char(in, out) is semidet.
-:- mode string__digit_to_char(out, in) is semidet.
-
-string__digit_to_char(0, '0').
-string__digit_to_char(1, '1').
-string__digit_to_char(2, '2').
-string__digit_to_char(3, '3').
-string__digit_to_char(4, '4').
-string__digit_to_char(5, '5').
-string__digit_to_char(6, '6').
-string__digit_to_char(7, '7').
-string__digit_to_char(8, '8').
-string__digit_to_char(9, '9').
-string__digit_to_char(10, 'A').
-string__digit_to_char(11, 'B').
-string__digit_to_char(12, 'C').
-string__digit_to_char(13, 'D').
-string__digit_to_char(14, 'E').
-string__digit_to_char(15, 'F').
-string__digit_to_char(16, 'G').
-string__digit_to_char(17, 'H').
-string__digit_to_char(18, 'I').
-string__digit_to_char(19, 'J').
-string__digit_to_char(20, 'K').
-string__digit_to_char(21, 'L').
-string__digit_to_char(22, 'M').
-string__digit_to_char(23, 'N').
-string__digit_to_char(24, 'O').
-string__digit_to_char(25, 'P').
-string__digit_to_char(26, 'Q').
-string__digit_to_char(27, 'R').
-string__digit_to_char(28, 'S').
-string__digit_to_char(29, 'T').
-string__digit_to_char(30, 'U').
-string__digit_to_char(31, 'V').
-string__digit_to_char(32, 'W').
-string__digit_to_char(33, 'X').
-string__digit_to_char(34, 'Y').
-string__digit_to_char(35, 'Z').
-
 % NB: it would be more efficient to do this directly (using pragma c_code)
 string__to_char_list(String, CharList) :-
 	string__to_int_list(String, IntList),
@@ -563,6 +514,50 @@
 string__from_char_list(CharList, String) :-
 	string__char_list_to_int_list(CharList, IntList),
 	string__to_int_list(String, IntList).
+
+%
+% We could implement from_rev_char_list using list__reverse and from_char_list,
+% but the optimized implementation in C below is there for efficiency since
+% it improves the overall speed of parsing by about 7%.
+%
+:- pragma(c_code, string__from_rev_char_list(Chars::in, Str::out), "
+{
+	Word list_ptr;
+	Word size, len;
+	Word str_ptr;
+/*
+** loop to calculate list length + sizeof(Word) in `size' using list in
+** `list_ptr' and separately count the length of the string
+*/
+	size = sizeof(Word);
+	len = 1;
+	list_ptr = Chars;
+	while (!list_is_empty(list_ptr)) {
+		size++;
+		len++;
+		list_ptr = list_tail(list_ptr);
+	}
+/*
+** allocate (length + 1) bytes of heap space for string
+** i.e. (length + 1 + sizeof(Word) - 1) / sizeof(Word) words
+*/
+	incr_hp_atomic(str_ptr, size / sizeof(Word));
+	Str = (char *) str_ptr;
+/*
+** set size to be the offset of the end of the string
+** (ie the \\0) and null terminate the string.
+*/
+	Str[--len] = '\\0';
+/*
+** loop to copy the characters from the list_ptr to the string
+** in reverse order.
+*/
+	list_ptr = Chars;
+	while (!list_is_empty(list_ptr)) {
+		Str[--len] = (char) list_head(list_ptr);
+		list_ptr = list_tail(list_ptr);
+	}
+}").
 
 :- pred string__int_list_to_char_list(list(int), list(character)).
 :- mode string__int_list_to_char_list(in, out) is det.

-- 
Fergus Henderson <fjh at cs.mu.oz.au>   |  "I have always known that the pursuit
WWW: <http://www.cs.mu.oz.au/~fjh>   |  of excellence is a lethal habit"
PGP: finger fjh at 128.250.37.3         |     -- the last words of T. S. Garp.



More information about the developers mailing list