[m-rev.] for review: Add string.append_string_pieces.

Mark Brown mark at mercurylang.org
Thu Oct 31 18:51:17 AEDT 2019


This looks good.

On Thu, Oct 31, 2019 at 5:18 PM Peter Wang <novalazy at gmail.com> wrote:

> library/string.m:
>     Add append_string_pieces/2 predicate.
>
> library/io.m:
>     Add a comment about a potential future change.
>
> tests/hard_coded/Mmakefile:
> tests/hard_coded/string_append_pieces.exp:
> tests/hard_coded/string_append_pieces.m:
>     Add test case.
>
> NEWS:
>     Announce addition.
> ---
>  NEWS                                      |   1 +
>  library/io.m                              |   3 +
>  library/string.m                          | 181 ++++++++++++++++++++++
>  tests/hard_coded/Mmakefile                |   1 +
>  tests/hard_coded/string_append_pieces.exp |  11 ++
>  tests/hard_coded/string_append_pieces.m   |  59 +++++++
>  6 files changed, 256 insertions(+)
>  create mode 100644 tests/hard_coded/string_append_pieces.exp
>  create mode 100644 tests/hard_coded/string_append_pieces.m
>
> diff --git a/NEWS b/NEWS
> index b8827d9b1..570d22b85 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -430,6 +430,7 @@ Changes to the Mercury standard library:
>     - compare_substrings/6
>     - unsafe_compare_substrings/6
>     - nondet_append/3
> +   - append_string_pieces/2
>
>    The following procedures in the string module have been deprecated:
>
> diff --git a/library/io.m b/library/io.m
> index b8a8cabad..54d7ce133 100644
> --- a/library/io.m
> +++ b/library/io.m
> @@ -5155,6 +5155,9 @@ file_id(FileName, Result, !IO) :-
>      % XXX It would be better to use a char_array type rather than
> array(char).
>      % This is because on the Java and IL backends indexing into an array
> whose
>      % element type is known statically requires less overhead.
> +    %
> +    % It may be possible to merge with string.string_buffer.
> +    %
>  :- type buffer
>      --->    buffer(array(char)).
>
> diff --git a/library/string.m b/library/string.m
> index b0930a767..bcd7b78bc 100644
> --- a/library/string.m
> +++ b/library/string.m
> @@ -665,6 +665,24 @@
>      %
>  :- func join_list(string::in, list(string)::in) = (string::uo) is det.
>
>
> +%---------------------------------------------------------------------------%
> +%
> +% Making strings from smaller pieces.
> +%
> +
> +:- type string_piece
> +    --->    string(string)
> +    ;       substring(string, int, int).    % string, start, end offset
> +
> +    % append_string_pieces(Pieces, String):
> +    %
> +    % Append together the strings and substrings in `Pieces' into a
> string.
> +    % Throws an exception `Pieces' contains an element
> +    % `substring(S, Start, End)' where `Start' or `End' are not within
> +    % the range [0, length(S)], or if `Start' > `End'.
> +    %
> +:- pred append_string_pieces(list(string_piece)::in, string::uo) is det.
> +
>
>  %---------------------------------------------------------------------------%
>  %
>  % Splitting up strings.
> @@ -3951,6 +3969,169 @@ join_list(Sep, [H | T]) = H ++ join_list_loop(Sep,
> T).
>  join_list_loop(_, []) = "".
>  join_list_loop(Sep, [H | T]) = Sep ++ H ++ join_list_loop(Sep, T).
>
>
> +%---------------------------------------------------------------------------%
> +%
> +% Making strings from smaller pieces.
> +%
> +
> +:- type string_buffer
> +    --->    string_buffer(string).
> +
> +:- pragma foreign_type("C", string_buffer, "char *",
> +    [can_pass_as_mercury_type]).
> +:- pragma foreign_type("C#", string_buffer, "char[]").
> +:- pragma foreign_type("Java", string_buffer, "java.lang.StringBuilder").
> +
> +:- pred alloc_buffer(int::in, string_buffer::uo) is det.
> +
> +:- pragma foreign_proc("C",
> +    alloc_buffer(Size::in, Buffer::uo),
> +    [will_not_call_mercury, promise_pure, thread_safe,
> +        does_not_affect_liveness, no_sharing],
> +"
> +    MR_allocate_aligned_string_msg(Buffer, Size, MR_ALLOC_ID);
> +    Buffer[Size] = '\\0';
> +").
> +:- pragma foreign_proc("C#",
> +    alloc_buffer(Size::in, Buffer::uo),
> +    [will_not_call_mercury, promise_pure, thread_safe],
> +"
> +    Buffer = new char[Size];
> +").
> +:- pragma foreign_proc("Java",
> +    alloc_buffer(Size::in, Buffer::uo),
> +    [will_not_call_mercury, promise_pure, thread_safe],
> +"
> +    Buffer = new java.lang.StringBuilder(Size);
> +").
> +
> +alloc_buffer(_Size, Buffer) :-
> +    Buffer = string_buffer("").
> +
> +:- pred buffer_to_string(string_buffer::di, string::uo) is det.
> +
> +:- pragma foreign_proc("C",
> +    buffer_to_string(Buffer::di, Str::uo),
> +    [will_not_call_mercury, promise_pure, thread_safe,
> +        does_not_affect_liveness],
> +"
> +    Str = Buffer;
> +").
> +:- pragma foreign_proc("C#",
> +    buffer_to_string(Buffer::di, Str::uo),
> +    [will_not_call_mercury, promise_pure, thread_safe],
> +"
> +    Str = new string(Buffer);
> +").
> +:- pragma foreign_proc("Java",
> +    buffer_to_string(Buffer::di, Str::uo),
> +    [will_not_call_mercury, promise_pure, thread_safe],
> +"
> +    Str = Buffer.toString();
> +").
> +
> +buffer_to_string(Buffer, Str) :-
> +    Buffer = string_buffer(Str).
> +
> +:- pred copy_into_buffer(string_buffer::di, string_buffer::uo,
> +    int::in, int::out, string::in, int::in, int::in) is det.
> +
> +:- pragma foreign_proc("C",
> +    copy_into_buffer(Dest0::di, Dest::uo, DestOffset0::in,
> DestOffset::out,
> +        Src::in, SrcStart::in, SrcEnd::in),
> +    [will_not_call_mercury, promise_pure, thread_safe,
> +        does_not_affect_liveness],
> +"
> +    size_t count;
> +
> +    MR_CHECK_EXPR_TYPE(Dest0, char *);
> +    MR_CHECK_EXPR_TYPE(Dest, char *);
> +
> +    count = SrcEnd - SrcStart;
> +    Dest = Dest0;
> +    MR_memcpy(Dest + DestOffset0, Src + SrcStart, count);
> +    DestOffset = DestOffset0 + count;
> +").
> +:- pragma foreign_proc("C#",
> +    copy_into_buffer(Dest0::di, Dest::uo, DestOffset0::in,
> DestOffset::out,
> +        Src::in, SrcStart::in, SrcEnd::in),
> +    [will_not_call_mercury, promise_pure, thread_safe],
> +"
> +    int count = SrcEnd - SrcStart;
> +    Dest = Dest0;
> +    Src.CopyTo(SrcStart, Dest, DestOffset0, count);
> +    DestOffset = DestOffset0 + count;
> +").
> +:- pragma foreign_proc("Java",
> +    copy_into_buffer(Dest0::di, Dest::uo, DestOffset0::in,
> DestOffset::out,
> +        Src::in, SrcStart::in, SrcEnd::in),
> +    [will_not_call_mercury, promise_pure, thread_safe],
> +"
> +    // The Java implementation does not actually use the dest offsets.
> +    Dest = Dest0;
> +    Dest.append(Src, SrcStart, SrcEnd);
> +    DestOffset = DestOffset0 + (SrcEnd - SrcStart);
> +").
> +
> +copy_into_buffer(Dest0, Dest, DestOffset0, DestOffset, Src, SrcStart,
> SrcEnd)
> +        :-
> +    Dest0 = string_buffer(Buffer0),
> +    Buffer = Buffer0 ++ unsafe_between(Src, SrcStart, SrcEnd),
> +    DestOffset = DestOffset0 + (SrcEnd - SrcStart),
> +    Dest = string_buffer(Buffer).
> +
> +%---------------------%
> +
> +append_string_pieces(Pieces, String) :-
> +    check_pieces_and_sum_length($pred, Pieces, 0, BufferLen),
> +    alloc_buffer(BufferLen, Buffer0),
> +    list.foldl2(copy_piece_into_buffer, Pieces, 0, End, Buffer0, Buffer),
> +    expect(unify(End, BufferLen), $pred, "End != BufferLen"),
> +    buffer_to_string(Buffer, String).
> +
> +:- pred check_pieces_and_sum_length(string::in, list(string_piece)::in,
> +    int::in, int::out) is det.
> +
> +check_pieces_and_sum_length(PredName, Pieces, Len0, Len) :-
> +    (
> +        Pieces = [],
> +        Len = Len0
> +    ;
> +        Pieces = [Piece | TailPieces],
> +        (
> +            Piece = string(Str),
> +            PieceLen = length(Str)
> +        ;
> +            Piece = substring(BaseStr, Start, End),
> +            BaseLen = length(BaseStr),
> +            ( if
> +                Start >= 0,
> +                Start =< BaseLen,
> +                End >= Start,
> +                End =< BaseLen
> +            then
> +                PieceLen = End - Start
> +            else
> +                unexpected(PredName, "substring index out of range")
> +            )
> +        ),
> +        Len1 = Len0 + PieceLen,
> +        check_pieces_and_sum_length(PredName, TailPieces, Len1, Len)
> +    ).
> +
> +:- pred copy_piece_into_buffer(string_piece::in, int::in, int::out,
> +    string_buffer::di, string_buffer::uo) is det.
> +
> +copy_piece_into_buffer(Piece, !DestOffset, !DestBuffer) :-
> +    (
> +        Piece = string(Src),
> +        SrcStart = 0,
> +        SrcEnd = length(Src)
> +    ;
> +        Piece = substring(Src, SrcStart, SrcEnd)
> +    ),
> +    copy_into_buffer(!DestBuffer, !DestOffset, Src, SrcStart, SrcEnd).
> +
>
>  %---------------------------------------------------------------------------%
>  %
>  % Splitting up strings.
> diff --git a/tests/hard_coded/Mmakefile b/tests/hard_coded/Mmakefile
> index 826ea7d0d..16fdf0eb7 100644
> --- a/tests/hard_coded/Mmakefile
> +++ b/tests/hard_coded/Mmakefile
> @@ -355,6 +355,7 @@ ORDINARY_PROGS = \
>         string_append_ioi \
>         string_append_ooi \
>         string_append_ooi_ilseq \
> +       string_append_pieces \
>         string_builder_test \
>         string_case \
>         string_char_list_ilseq \
> diff --git a/tests/hard_coded/string_append_pieces.exp
> b/tests/hard_coded/string_append_pieces.exp
> new file mode 100644
> index 000000000..8e2a865d5
> --- /dev/null
> +++ b/tests/hard_coded/string_append_pieces.exp
> @@ -0,0 +1,11 @@
> +""
> +""
> +""
> +""
> +"ab"
> +"cool!😀"
> +software_error("predicate `string.append_string_pieces\'/2: Unexpected:
> substring index out of range")
> +software_error("predicate `string.append_string_pieces\'/2: Unexpected:
> substring index out of range")
> +software_error("predicate `string.append_string_pieces\'/2: Unexpected:
> substring index out of range")
> +software_error("predicate `string.append_string_pieces\'/2: Unexpected:
> substring index out of range")
> +software_error("predicate `string.append_string_pieces\'/2: Unexpected:
> substring index out of range")
> diff --git a/tests/hard_coded/string_append_pieces.m
> b/tests/hard_coded/string_append_pieces.m
> new file mode 100644
> index 000000000..f5397e03f
> --- /dev/null
> +++ b/tests/hard_coded/string_append_pieces.m
> @@ -0,0 +1,59 @@
>
> +%---------------------------------------------------------------------------%
> +% vim: ts=4 sw=4 et ft=mercury
>
> +%---------------------------------------------------------------------------%
> +
> +:- module string_append_pieces.
> +:- interface.
> +
> +:- import_module io.
> +
> +:- pred main(io::di, io::uo) is cc_multi.
> +
>
> +%---------------------------------------------------------------------------%
> +
> +:- implementation.
> +
> +:- import_module exception.
> +:- import_module int.
> +:- import_module list.
> +:- import_module string.
> +
>
> +%---------------------------------------------------------------------------%
> +
> +main(!IO) :-
> +    foldl(test_append_string_pieces, test_cases, !IO).
> +
> +:- func test_cases = list(list(string_piece)).
> +
> +test_cases = [
> +    [],
> +    [string("")],
> +    [substring("", 0, 0)],
> +    [substring("ok", 2, 2)],
> +    [substring("axx", 0, 1), substring("xbx", 1, 2)],
> +    [
> +        string("c"),
> +        substring("whoops!", 2, 4),
> +        string("l!"),
> +        substring("😀😀😀", length("😀"), 2 * length("😀"))
> +    ],
> +    [substring("bad", -1, 0)],
> +    [substring("bad", 4, 3)],
> +    [substring("bad", 0, -1)],
> +    [substring("bad", 0, 4)],
> +    [substring("bad", 3, 2)]
> +].
> +
> +:- pred test_append_string_pieces(list(string_piece)::in, io::di, io::uo)
> +    is cc_multi.
> +
> +test_append_string_pieces(Pieces, !IO) :-
> +    ( try []
> +        append_string_pieces(Pieces, Str)
> +    then
> +        io.write_string("""", !IO),
> +        io.write_string(Str, !IO),
> +        io.write_string("""\n", !IO)
> +    catch_any Excp ->
> +        io.print_line(Excp, !IO)
> +    ).
> --
> 2.23.0
>
> _______________________________________________
> reviews mailing list
> reviews at lists.mercurylang.org
> https://lists.mercurylang.org/listinfo/reviews
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurylang.org/archives/reviews/attachments/20191031/5541dbe0/attachment-0001.html>


More information about the reviews mailing list