[m-rev.] Fast string library
Ralph Becket
rbeck at microsoft.com
Tue May 15 05:39:19 AEST 2001
This is a string ADT that supports O(1) concatenation and substring
extraction at the expense of O(n) (very) worst case indexing.
I'm not sure where this one should go and/or whether it's worth
adding all the standard string operations (shouldn't be too hard.)
Any opinions would be gratefully received.
%-----------------------------------------------------------------------
-------%
% str.m
% Copyright (C) 2001 Ralph Becket <rbeck at microsoft.com>
% Mon May 14 16:16:08 BST 2001
% vim: ts=4 sw=4 et tw=0 wm=0 ff=unix ft=mercury
%
% RELEASED TO THE MERCURY PROJECT FOR DISTRIBUTION UNDER
% WHATEVER LICENCE IS DEEMED APPROPRIATE BY THE PROJECT
% MANAGEMENT.
%
%
% String library supporting fast concatenation and substring
% extraction at the cost of slower indexing. The worst case
% situation is a long string of left- or right-nested
% concatenations or substrings of single chars.
%
%-----------------------------------------------------------------------
-------%
:- module str.
:- interface.
:- import_module string, int, char, float.
:- type str.
:- func str(string) = str.
:- func from_char(char) = str.
:- func from_int(int) = str.
:- func from_float(float) = str.
% null = str("").
%
:- func null = str.
% O(N) pathological
case.
%
:- func to_string(str) = string.
% Str ^ length is the number of chars in a str.
% O(1)
%
:- func length(str) = int.
% Decide whether two strs are equal.
% O(N + M)
%
:- pred equal(str, str).
:- mode equal(in, in) is semidet.
% Str ^ elem(Index) is the char at Index in Str.
% If Index is out of range then an exception is
% raised. The first char in Str has index 0.
% O(N) pathological
case.
%
:- func elem(int, str) = char.
% Concatenation of strs.
% O(1)
%
:- func str ++ str = str.
% substr(Index, Length, Str) returns a str consisting of the
% chars starting at Index in Str up to either the end of Str
% or for Length chars, whichever is the shorter. An exception
% is raised if Index is not valid for Str.
% O(1)
%
:- func substr(int, int, str) = str.
% leftstr(Length, Str) = substr(0, Length, Str)
% O(1)
%
:- func leftstr(int, str) = str.
% rightstr(Index, Str) = substr(Index, Str ^ length, Str)
% O(1)
%
:- func rightstr(int, str) = str.
% XXX This could and should be recoded for O(N) performance.
% O(N^2) pathological
case.
:- func foldl(func(char, T) = T, str, T) = T.
% foldl_subrange(Lo, Hi, Fn, Str, T) is the same as foldl except
% that only chars with indices Lo =< I < Hi are considered.
%
:- func foldl_subrange(int, int, func(char, T) = T, str, T) = T.
% XXX This could and should be recoded for O(N) performance.
% O(N^2) pathological
case.
:- func foldr(func(char, T) = T, str, T) = T.
% foldl_subrange(Lo, Hi, Fn, Str, T) is the same as foldl except
% that only chars with indices Hi >= I >= Lo are considered.
%
:- func foldr_subrange(int, int, func(char, T) = T, str, T) = T.
% copies(Str, N) concatenates N copies of Str.
% O(log N)
%
:- func copies(str, int) = str.
% Some strs can become complicated, making indexing operations
% more costly. This function reduces a str to its simplest
% possible form.
% O(N) pathological
case.
%
:- func simplify(str) = str.
%-----------------------------------------------------------------------
-------%
%-----------------------------------------------------------------------
-------%
:- implementation.
:- import_module list, exception.
:- type str
---> str(int, string) % length.
; sub(int, int, str) % index, length.
; cat(int, str, str). % length.
%-----------------------------------------------------------------------
-------%
str(String) = str(string__length(String), String).
from_char(C) = str(string__format("%c", [c(C)])).
from_int(I) = str(string__format("%d", [i(I)])).
from_float(F) = str(string__format("%f", [f(F)])).
null = str("").
%-----------------------------------------------------------------------
-------%
to_string(Str) = string__append_list(to_strings(Str)).
:- func to_strings(str) = list(string).
to_strings(str(_, String)) = [String].
to_strings(sub(I, N, Str)) = sub_to_strings(I, N, Str).
to_strings(cat(_, StrA, StrB)) = to_strings(StrA) ++ to_strings(StrB).
:- func sub_to_strings(int, int, str) = list(string).
sub_to_strings(I, N, str(_, String)) = [string__substring(String, I,
N)].
sub_to_strings(I, N, sub(J, _, Str)) = sub_to_strings(I + J, N,
Str).
sub_to_strings(I, N, cat(_, StrA, StrB)) = Strings :-
M = StrA ^ length,
Strings =
( if I > M
then sub_to_strings(I - M, N, StrB)
else if I + N =< M
then sub_to_strings(I, N, StrA)
else sub_to_strings(I, (M - I), StrA) ++
sub_to_strings(0, N - (M - I), StrB)
).
%-----------------------------------------------------------------------
-------%
length(str(N, _)) = N.
length(sub(_, N, _)) = N.
length(cat(N, _, _)) = N.
%-----------------------------------------------------------------------
-------%
elem(I, str(_, String)) = string__index_det(String, I).
elem(I, sub(J, _, Str)) = Str ^ elem(I + J).
elem(I, cat(_, StrA, StrB)) = C :-
N = StrA ^ length,
C = ( if I < N then StrA ^ elem(I) else StrB ^ elem(I - N) ).
%-----------------------------------------------------------------------
-------%
StrA ++ StrB = cat(StrA ^ length + StrB ^ length, StrA, StrB).
%-----------------------------------------------------------------------
-------%
substr(I, N, Str) =
( if 0 =< I, I < Str ^ length
then sub(I, int__min(N, Str ^ length - I), Str)
else throw("str__substr/3: index out of range")
).
leftstr(N, Str) = substr(0, N, Str).
rightstr(I, Str) = substr(I, Str ^ length, Str).
%-----------------------------------------------------------------------
-------%
equal(StrA, StrB) :-
StrA ^ length = StrB ^ length,
to_string(StrA) = to_string(StrB).
%-----------------------------------------------------------------------
-------%
copies(Str0, N) = Str :-
( if N `rem` 2 = 1
then Str = Str0 ++ copies(Str0, N -
1)
else Str1 = copies(Str0, N // 2), Str = Str1 ++ Str1
).
%-----------------------------------------------------------------------
-------%
simplify(Str) = str(to_string(Str)).
%-----------------------------------------------------------------------
-------%
foldl(Fn, Str, X) = foldl_subrange(0, Str ^ length, Fn, Str, X).
:- func foldl_subrange(int, int, func(char, T) = T, str, T) = T.
foldl_subrange(I, Hi, Fn, Str, X) =
( if I < Hi
then foldl_subrange(I + 1, Hi, Fn, Str, Fn(Str ^ elem(I), X))
else X
).
%-----------------------------------------------------------------------
-------%
foldr(Fn, Str, X) = foldr_subrange(0, Str ^ length - 1, Fn, Str, X).
:- func foldr_subrange(int, int, func(char, T) = T, str, T) = T.
foldr_subrange(Lo, I, Fn, Str, X) =
( if Lo =< I
then foldr_subrange(Lo, I - 1, Fn, Str, Fn(Str ^ elem(I), X))
else X
).
%-----------------------------------------------------------------------
-------%
%-----------------------------------------------------------------------
-------%
--------------------------------------------------------------------------
mercury-reviews mailing list
post: mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------
More information about the reviews
mailing list