[m-rev.] for review: Add string.append_string_pieces.
Mark Brown
mark at mercurylang.org
Thu Oct 31 18:51:17 AEDT 2019
This looks good.
On Thu, Oct 31, 2019 at 5:18 PM Peter Wang <novalazy at gmail.com> wrote:
> library/string.m:
> Add append_string_pieces/2 predicate.
>
> library/io.m:
> Add a comment about a potential future change.
>
> tests/hard_coded/Mmakefile:
> tests/hard_coded/string_append_pieces.exp:
> tests/hard_coded/string_append_pieces.m:
> Add test case.
>
> NEWS:
> Announce addition.
> ---
> NEWS | 1 +
> library/io.m | 3 +
> library/string.m | 181 ++++++++++++++++++++++
> tests/hard_coded/Mmakefile | 1 +
> tests/hard_coded/string_append_pieces.exp | 11 ++
> tests/hard_coded/string_append_pieces.m | 59 +++++++
> 6 files changed, 256 insertions(+)
> create mode 100644 tests/hard_coded/string_append_pieces.exp
> create mode 100644 tests/hard_coded/string_append_pieces.m
>
> diff --git a/NEWS b/NEWS
> index b8827d9b1..570d22b85 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -430,6 +430,7 @@ Changes to the Mercury standard library:
> - compare_substrings/6
> - unsafe_compare_substrings/6
> - nondet_append/3
> + - append_string_pieces/2
>
> The following procedures in the string module have been deprecated:
>
> diff --git a/library/io.m b/library/io.m
> index b8a8cabad..54d7ce133 100644
> --- a/library/io.m
> +++ b/library/io.m
> @@ -5155,6 +5155,9 @@ file_id(FileName, Result, !IO) :-
> % XXX It would be better to use a char_array type rather than
> array(char).
> % This is because on the Java and IL backends indexing into an array
> whose
> % element type is known statically requires less overhead.
> + %
> + % It may be possible to merge with string.string_buffer.
> + %
> :- type buffer
> ---> buffer(array(char)).
>
> diff --git a/library/string.m b/library/string.m
> index b0930a767..bcd7b78bc 100644
> --- a/library/string.m
> +++ b/library/string.m
> @@ -665,6 +665,24 @@
> %
> :- func join_list(string::in, list(string)::in) = (string::uo) is det.
>
>
> +%---------------------------------------------------------------------------%
> +%
> +% Making strings from smaller pieces.
> +%
> +
> +:- type string_piece
> + ---> string(string)
> + ; substring(string, int, int). % string, start, end offset
> +
> + % append_string_pieces(Pieces, String):
> + %
> + % Append together the strings and substrings in `Pieces' into a
> string.
> + % Throws an exception `Pieces' contains an element
> + % `substring(S, Start, End)' where `Start' or `End' are not within
> + % the range [0, length(S)], or if `Start' > `End'.
> + %
> +:- pred append_string_pieces(list(string_piece)::in, string::uo) is det.
> +
>
> %---------------------------------------------------------------------------%
> %
> % Splitting up strings.
> @@ -3951,6 +3969,169 @@ join_list(Sep, [H | T]) = H ++ join_list_loop(Sep,
> T).
> join_list_loop(_, []) = "".
> join_list_loop(Sep, [H | T]) = Sep ++ H ++ join_list_loop(Sep, T).
>
>
> +%---------------------------------------------------------------------------%
> +%
> +% Making strings from smaller pieces.
> +%
> +
> +:- type string_buffer
> + ---> string_buffer(string).
> +
> +:- pragma foreign_type("C", string_buffer, "char *",
> + [can_pass_as_mercury_type]).
> +:- pragma foreign_type("C#", string_buffer, "char[]").
> +:- pragma foreign_type("Java", string_buffer, "java.lang.StringBuilder").
> +
> +:- pred alloc_buffer(int::in, string_buffer::uo) is det.
> +
> +:- pragma foreign_proc("C",
> + alloc_buffer(Size::in, Buffer::uo),
> + [will_not_call_mercury, promise_pure, thread_safe,
> + does_not_affect_liveness, no_sharing],
> +"
> + MR_allocate_aligned_string_msg(Buffer, Size, MR_ALLOC_ID);
> + Buffer[Size] = '\\0';
> +").
> +:- pragma foreign_proc("C#",
> + alloc_buffer(Size::in, Buffer::uo),
> + [will_not_call_mercury, promise_pure, thread_safe],
> +"
> + Buffer = new char[Size];
> +").
> +:- pragma foreign_proc("Java",
> + alloc_buffer(Size::in, Buffer::uo),
> + [will_not_call_mercury, promise_pure, thread_safe],
> +"
> + Buffer = new java.lang.StringBuilder(Size);
> +").
> +
> +alloc_buffer(_Size, Buffer) :-
> + Buffer = string_buffer("").
> +
> +:- pred buffer_to_string(string_buffer::di, string::uo) is det.
> +
> +:- pragma foreign_proc("C",
> + buffer_to_string(Buffer::di, Str::uo),
> + [will_not_call_mercury, promise_pure, thread_safe,
> + does_not_affect_liveness],
> +"
> + Str = Buffer;
> +").
> +:- pragma foreign_proc("C#",
> + buffer_to_string(Buffer::di, Str::uo),
> + [will_not_call_mercury, promise_pure, thread_safe],
> +"
> + Str = new string(Buffer);
> +").
> +:- pragma foreign_proc("Java",
> + buffer_to_string(Buffer::di, Str::uo),
> + [will_not_call_mercury, promise_pure, thread_safe],
> +"
> + Str = Buffer.toString();
> +").
> +
> +buffer_to_string(Buffer, Str) :-
> + Buffer = string_buffer(Str).
> +
> +:- pred copy_into_buffer(string_buffer::di, string_buffer::uo,
> + int::in, int::out, string::in, int::in, int::in) is det.
> +
> +:- pragma foreign_proc("C",
> + copy_into_buffer(Dest0::di, Dest::uo, DestOffset0::in,
> DestOffset::out,
> + Src::in, SrcStart::in, SrcEnd::in),
> + [will_not_call_mercury, promise_pure, thread_safe,
> + does_not_affect_liveness],
> +"
> + size_t count;
> +
> + MR_CHECK_EXPR_TYPE(Dest0, char *);
> + MR_CHECK_EXPR_TYPE(Dest, char *);
> +
> + count = SrcEnd - SrcStart;
> + Dest = Dest0;
> + MR_memcpy(Dest + DestOffset0, Src + SrcStart, count);
> + DestOffset = DestOffset0 + count;
> +").
> +:- pragma foreign_proc("C#",
> + copy_into_buffer(Dest0::di, Dest::uo, DestOffset0::in,
> DestOffset::out,
> + Src::in, SrcStart::in, SrcEnd::in),
> + [will_not_call_mercury, promise_pure, thread_safe],
> +"
> + int count = SrcEnd - SrcStart;
> + Dest = Dest0;
> + Src.CopyTo(SrcStart, Dest, DestOffset0, count);
> + DestOffset = DestOffset0 + count;
> +").
> +:- pragma foreign_proc("Java",
> + copy_into_buffer(Dest0::di, Dest::uo, DestOffset0::in,
> DestOffset::out,
> + Src::in, SrcStart::in, SrcEnd::in),
> + [will_not_call_mercury, promise_pure, thread_safe],
> +"
> + // The Java implementation does not actually use the dest offsets.
> + Dest = Dest0;
> + Dest.append(Src, SrcStart, SrcEnd);
> + DestOffset = DestOffset0 + (SrcEnd - SrcStart);
> +").
> +
> +copy_into_buffer(Dest0, Dest, DestOffset0, DestOffset, Src, SrcStart,
> SrcEnd)
> + :-
> + Dest0 = string_buffer(Buffer0),
> + Buffer = Buffer0 ++ unsafe_between(Src, SrcStart, SrcEnd),
> + DestOffset = DestOffset0 + (SrcEnd - SrcStart),
> + Dest = string_buffer(Buffer).
> +
> +%---------------------%
> +
> +append_string_pieces(Pieces, String) :-
> + check_pieces_and_sum_length($pred, Pieces, 0, BufferLen),
> + alloc_buffer(BufferLen, Buffer0),
> + list.foldl2(copy_piece_into_buffer, Pieces, 0, End, Buffer0, Buffer),
> + expect(unify(End, BufferLen), $pred, "End != BufferLen"),
> + buffer_to_string(Buffer, String).
> +
> +:- pred check_pieces_and_sum_length(string::in, list(string_piece)::in,
> + int::in, int::out) is det.
> +
> +check_pieces_and_sum_length(PredName, Pieces, Len0, Len) :-
> + (
> + Pieces = [],
> + Len = Len0
> + ;
> + Pieces = [Piece | TailPieces],
> + (
> + Piece = string(Str),
> + PieceLen = length(Str)
> + ;
> + Piece = substring(BaseStr, Start, End),
> + BaseLen = length(BaseStr),
> + ( if
> + Start >= 0,
> + Start =< BaseLen,
> + End >= Start,
> + End =< BaseLen
> + then
> + PieceLen = End - Start
> + else
> + unexpected(PredName, "substring index out of range")
> + )
> + ),
> + Len1 = Len0 + PieceLen,
> + check_pieces_and_sum_length(PredName, TailPieces, Len1, Len)
> + ).
> +
> +:- pred copy_piece_into_buffer(string_piece::in, int::in, int::out,
> + string_buffer::di, string_buffer::uo) is det.
> +
> +copy_piece_into_buffer(Piece, !DestOffset, !DestBuffer) :-
> + (
> + Piece = string(Src),
> + SrcStart = 0,
> + SrcEnd = length(Src)
> + ;
> + Piece = substring(Src, SrcStart, SrcEnd)
> + ),
> + copy_into_buffer(!DestBuffer, !DestOffset, Src, SrcStart, SrcEnd).
> +
>
> %---------------------------------------------------------------------------%
> %
> % Splitting up strings.
> diff --git a/tests/hard_coded/Mmakefile b/tests/hard_coded/Mmakefile
> index 826ea7d0d..16fdf0eb7 100644
> --- a/tests/hard_coded/Mmakefile
> +++ b/tests/hard_coded/Mmakefile
> @@ -355,6 +355,7 @@ ORDINARY_PROGS = \
> string_append_ioi \
> string_append_ooi \
> string_append_ooi_ilseq \
> + string_append_pieces \
> string_builder_test \
> string_case \
> string_char_list_ilseq \
> diff --git a/tests/hard_coded/string_append_pieces.exp
> b/tests/hard_coded/string_append_pieces.exp
> new file mode 100644
> index 000000000..8e2a865d5
> --- /dev/null
> +++ b/tests/hard_coded/string_append_pieces.exp
> @@ -0,0 +1,11 @@
> +""
> +""
> +""
> +""
> +"ab"
> +"cool!😀"
> +software_error("predicate `string.append_string_pieces\'/2: Unexpected:
> substring index out of range")
> +software_error("predicate `string.append_string_pieces\'/2: Unexpected:
> substring index out of range")
> +software_error("predicate `string.append_string_pieces\'/2: Unexpected:
> substring index out of range")
> +software_error("predicate `string.append_string_pieces\'/2: Unexpected:
> substring index out of range")
> +software_error("predicate `string.append_string_pieces\'/2: Unexpected:
> substring index out of range")
> diff --git a/tests/hard_coded/string_append_pieces.m
> b/tests/hard_coded/string_append_pieces.m
> new file mode 100644
> index 000000000..f5397e03f
> --- /dev/null
> +++ b/tests/hard_coded/string_append_pieces.m
> @@ -0,0 +1,59 @@
>
> +%---------------------------------------------------------------------------%
> +% vim: ts=4 sw=4 et ft=mercury
>
> +%---------------------------------------------------------------------------%
> +
> +:- module string_append_pieces.
> +:- interface.
> +
> +:- import_module io.
> +
> +:- pred main(io::di, io::uo) is cc_multi.
> +
>
> +%---------------------------------------------------------------------------%
> +
> +:- implementation.
> +
> +:- import_module exception.
> +:- import_module int.
> +:- import_module list.
> +:- import_module string.
> +
>
> +%---------------------------------------------------------------------------%
> +
> +main(!IO) :-
> + foldl(test_append_string_pieces, test_cases, !IO).
> +
> +:- func test_cases = list(list(string_piece)).
> +
> +test_cases = [
> + [],
> + [string("")],
> + [substring("", 0, 0)],
> + [substring("ok", 2, 2)],
> + [substring("axx", 0, 1), substring("xbx", 1, 2)],
> + [
> + string("c"),
> + substring("whoops!", 2, 4),
> + string("l!"),
> + substring("😀😀😀", length("😀"), 2 * length("😀"))
> + ],
> + [substring("bad", -1, 0)],
> + [substring("bad", 4, 3)],
> + [substring("bad", 0, -1)],
> + [substring("bad", 0, 4)],
> + [substring("bad", 3, 2)]
> +].
> +
> +:- pred test_append_string_pieces(list(string_piece)::in, io::di, io::uo)
> + is cc_multi.
> +
> +test_append_string_pieces(Pieces, !IO) :-
> + ( try []
> + append_string_pieces(Pieces, Str)
> + then
> + io.write_string("""", !IO),
> + io.write_string(Str, !IO),
> + io.write_string("""\n", !IO)
> + catch_any Excp ->
> + io.print_line(Excp, !IO)
> + ).
> --
> 2.23.0
>
> _______________________________________________
> reviews mailing list
> reviews at lists.mercurylang.org
> https://lists.mercurylang.org/listinfo/reviews
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.mercurylang.org/archives/reviews/attachments/20191031/5541dbe0/attachment-0001.html>
More information about the reviews
mailing list