[m-rev.] Fast string library

Ralph Becket rbeck at microsoft.com
Tue May 15 05:39:19 AEST 2001


This is a string ADT that supports O(1) concatenation and substring
extraction at the expense of O(n) (very) worst case indexing.

I'm not sure where this one should go and/or whether it's worth
adding all the standard string operations (shouldn't be too hard.)

Any opinions would be gratefully received.

%-----------------------------------------------------------------------
-------%
% str.m
% Copyright (C) 2001 Ralph Becket <rbeck at microsoft.com>
% Mon May 14 16:16:08 BST 2001
% vim: ts=4 sw=4 et tw=0 wm=0 ff=unix ft=mercury
%
% RELEASED TO THE MERCURY PROJECT FOR DISTRIBUTION UNDER
% WHATEVER LICENCE IS DEEMED APPROPRIATE BY THE PROJECT
% MANAGEMENT.
%
%
% String library supporting fast concatenation and substring
% extraction at the cost of slower indexing.  The worst case
% situation is a long string of left- or right-nested
% concatenations or substrings of single chars.
%
%-----------------------------------------------------------------------
-------%

:- module str.

:- interface.

:- import_module string, int, char, float.



:- type str.



:- func str(string)       = str.

:- func from_char(char)   = str.
:- func from_int(int)     = str.
:- func from_float(float) = str.

    % null = str("").
    %
:- func null = str.

    %                                               O(N) pathological
case.
    %
:- func to_string(str) = string.

    % Str ^ length is the number of chars in a str.
    %                                               O(1)
    %
:- func length(str) = int.

    % Decide whether two strs are equal.
    %                                               O(N + M)
    %
:- pred equal(str, str).
:- mode equal(in, in) is semidet.

    % Str ^ elem(Index) is the char at Index in Str.
    % If Index is out of range then an exception is
    % raised.  The first char in Str has index 0.
    %                                               O(N) pathological
case.
    %
:- func elem(int, str) = char.

    % Concatenation of strs.
    %                                               O(1)
    %
:- func str ++ str = str.

    % substr(Index, Length, Str) returns a str consisting of the
    % chars starting at Index in Str up to either the end of Str
    % or for Length chars, whichever is the shorter.  An exception
    % is raised if Index is not valid for Str.
    %                                               O(1)
    %
:- func substr(int, int, str) = str.

    % leftstr(Length, Str) = substr(0, Length, Str)
    %                                               O(1)
    %
:- func leftstr(int, str) = str.

    % rightstr(Index, Str) = substr(Index, Str ^ length, Str)
    %                                               O(1)
    %
:- func rightstr(int, str) = str.

    % XXX This could and should be recoded for O(N) performance.
    %                                               O(N^2) pathological
case.
:- func foldl(func(char, T) = T, str, T) = T.

    % foldl_subrange(Lo, Hi, Fn, Str, T) is the same as foldl except
    % that only chars with indices Lo =< I < Hi are considered.
    %
:- func foldl_subrange(int, int, func(char, T) = T, str, T) = T.

    % XXX This could and should be recoded for O(N) performance.
    %                                               O(N^2) pathological
case.
:- func foldr(func(char, T) = T, str, T) = T.

    % foldl_subrange(Lo, Hi, Fn, Str, T) is the same as foldl except
    % that only chars with indices Hi >= I >= Lo are considered.
    %
:- func foldr_subrange(int, int, func(char, T) = T, str, T) = T.

    % copies(Str, N) concatenates N copies of Str.
    %                                               O(log N)
    %
:- func copies(str, int) = str.

    % Some strs can become complicated, making indexing operations
    % more costly.  This function reduces a str to its simplest
    % possible form.
    %                                               O(N) pathological
case.
    %
:- func simplify(str) = str.

%-----------------------------------------------------------------------
-------%
%-----------------------------------------------------------------------
-------%

:- implementation.

:- import_module list, exception.



:- type str
    --->    str(int, string)            % length.
    ;       sub(int, int, str)          % index, length.
    ;       cat(int, str, str).         % length.

%-----------------------------------------------------------------------
-------%

str(String)   = str(string__length(String), String).

from_char(C)  = str(string__format("%c", [c(C)])).

from_int(I)   = str(string__format("%d", [i(I)])).

from_float(F) = str(string__format("%f", [f(F)])).

null          = str("").

%-----------------------------------------------------------------------
-------%

to_string(Str) = string__append_list(to_strings(Str)).



:- func to_strings(str) = list(string).

to_strings(str(_, String))     = [String].
to_strings(sub(I, N, Str))     = sub_to_strings(I, N, Str).
to_strings(cat(_, StrA, StrB)) = to_strings(StrA) ++ to_strings(StrB).



:- func sub_to_strings(int, int, str) = list(string).

sub_to_strings(I, N, str(_, String))     = [string__substring(String, I,
N)].
sub_to_strings(I, N, sub(J, _, Str))     = sub_to_strings(I + J, N,
Str).
sub_to_strings(I, N, cat(_, StrA, StrB)) = Strings :-
    M       = StrA ^ length,
    Strings =
        ( if I > M
          then sub_to_strings(I - M, N, StrB)
          else if I + N =< M
          then sub_to_strings(I, N, StrA)
          else sub_to_strings(I,     (M - I), StrA) ++
               sub_to_strings(0, N - (M - I), StrB)
        ).

%-----------------------------------------------------------------------
-------%

length(str(N, _))    = N.
length(sub(_, N, _)) = N.
length(cat(N, _, _)) = N.

%-----------------------------------------------------------------------
-------%

elem(I, str(_, String))     = string__index_det(String, I).

elem(I, sub(J, _, Str))     = Str ^ elem(I + J).

elem(I, cat(_, StrA, StrB)) = C :-
    N = StrA ^ length,
    C = ( if I < N then StrA ^ elem(I) else StrB ^ elem(I - N) ).

%-----------------------------------------------------------------------
-------%

StrA ++ StrB = cat(StrA ^ length + StrB ^ length, StrA, StrB).

%-----------------------------------------------------------------------
-------%

substr(I, N, Str) =
    ( if 0 =< I, I < Str ^ length
      then sub(I, int__min(N, Str ^ length - I), Str)
      else throw("str__substr/3: index out of range")
    ).

leftstr(N, Str) = substr(0, N, Str).

rightstr(I, Str) = substr(I, Str ^ length, Str).

%-----------------------------------------------------------------------
-------%

equal(StrA, StrB) :-
    StrA ^ length = StrB ^ length,
    to_string(StrA) = to_string(StrB).

%-----------------------------------------------------------------------
-------%

copies(Str0, N) = Str :-
    ( if N `rem` 2 = 1
      then                              Str = Str0 ++ copies(Str0, N -
1)
      else Str1 = copies(Str0, N // 2), Str = Str1 ++ Str1
    ).

%-----------------------------------------------------------------------
-------%

simplify(Str) = str(to_string(Str)).

%-----------------------------------------------------------------------
-------%

foldl(Fn, Str, X) = foldl_subrange(0, Str ^ length, Fn, Str, X).



:- func foldl_subrange(int, int, func(char, T) = T, str, T) = T.

foldl_subrange(I, Hi, Fn, Str, X) =
    ( if I < Hi
      then foldl_subrange(I + 1, Hi, Fn, Str, Fn(Str ^ elem(I), X))
      else X
    ).

%-----------------------------------------------------------------------
-------%

foldr(Fn, Str, X) = foldr_subrange(0, Str ^ length - 1, Fn, Str, X).



:- func foldr_subrange(int, int, func(char, T) = T, str, T) = T.

foldr_subrange(Lo, I, Fn, Str, X) =
    ( if Lo =< I
      then foldr_subrange(Lo, I - 1, Fn, Str, Fn(Str ^ elem(I), X))
      else X
    ).

%-----------------------------------------------------------------------
-------%
%-----------------------------------------------------------------------
-------%
--------------------------------------------------------------------------
mercury-reviews mailing list
post:  mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe:   Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------



More information about the reviews mailing list