[m-rev.] for review: Add string.append_string_pieces.

Peter Wang novalazy at gmail.com
Thu Oct 31 17:18:01 AEDT 2019


library/string.m:
    Add append_string_pieces/2 predicate.

library/io.m:
    Add a comment about a potential future change.

tests/hard_coded/Mmakefile:
tests/hard_coded/string_append_pieces.exp:
tests/hard_coded/string_append_pieces.m:
    Add test case.

NEWS:
    Announce addition.
---
 NEWS                                      |   1 +
 library/io.m                              |   3 +
 library/string.m                          | 181 ++++++++++++++++++++++
 tests/hard_coded/Mmakefile                |   1 +
 tests/hard_coded/string_append_pieces.exp |  11 ++
 tests/hard_coded/string_append_pieces.m   |  59 +++++++
 6 files changed, 256 insertions(+)
 create mode 100644 tests/hard_coded/string_append_pieces.exp
 create mode 100644 tests/hard_coded/string_append_pieces.m

diff --git a/NEWS b/NEWS
index b8827d9b1..570d22b85 100644
--- a/NEWS
+++ b/NEWS
@@ -430,6 +430,7 @@ Changes to the Mercury standard library:
    - compare_substrings/6
    - unsafe_compare_substrings/6
    - nondet_append/3
+   - append_string_pieces/2
 
   The following procedures in the string module have been deprecated:
 
diff --git a/library/io.m b/library/io.m
index b8a8cabad..54d7ce133 100644
--- a/library/io.m
+++ b/library/io.m
@@ -5155,6 +5155,9 @@ file_id(FileName, Result, !IO) :-
     % XXX It would be better to use a char_array type rather than array(char).
     % This is because on the Java and IL backends indexing into an array whose
     % element type is known statically requires less overhead.
+    %
+    % It may be possible to merge with string.string_buffer.
+    %
 :- type buffer
     --->    buffer(array(char)).
 
diff --git a/library/string.m b/library/string.m
index b0930a767..bcd7b78bc 100644
--- a/library/string.m
+++ b/library/string.m
@@ -665,6 +665,24 @@
     %
 :- func join_list(string::in, list(string)::in) = (string::uo) is det.
 
+%---------------------------------------------------------------------------%
+%
+% Making strings from smaller pieces.
+%
+
+:- type string_piece
+    --->    string(string)
+    ;       substring(string, int, int).    % string, start, end offset
+
+    % append_string_pieces(Pieces, String):
+    %
+    % Append together the strings and substrings in `Pieces' into a string.
+    % Throws an exception `Pieces' contains an element
+    % `substring(S, Start, End)' where `Start' or `End' are not within
+    % the range [0, length(S)], or if `Start' > `End'.
+    %
+:- pred append_string_pieces(list(string_piece)::in, string::uo) is det.
+
 %---------------------------------------------------------------------------%
 %
 % Splitting up strings.
@@ -3951,6 +3969,169 @@ join_list(Sep, [H | T]) = H ++ join_list_loop(Sep, T).
 join_list_loop(_, []) = "".
 join_list_loop(Sep, [H | T]) = Sep ++ H ++ join_list_loop(Sep, T).
 
+%---------------------------------------------------------------------------%
+%
+% Making strings from smaller pieces.
+%
+
+:- type string_buffer
+    --->    string_buffer(string).
+
+:- pragma foreign_type("C", string_buffer, "char *",
+    [can_pass_as_mercury_type]).
+:- pragma foreign_type("C#", string_buffer, "char[]").
+:- pragma foreign_type("Java", string_buffer, "java.lang.StringBuilder").
+
+:- pred alloc_buffer(int::in, string_buffer::uo) is det.
+
+:- pragma foreign_proc("C",
+    alloc_buffer(Size::in, Buffer::uo),
+    [will_not_call_mercury, promise_pure, thread_safe,
+        does_not_affect_liveness, no_sharing],
+"
+    MR_allocate_aligned_string_msg(Buffer, Size, MR_ALLOC_ID);
+    Buffer[Size] = '\\0';
+").
+:- pragma foreign_proc("C#",
+    alloc_buffer(Size::in, Buffer::uo),
+    [will_not_call_mercury, promise_pure, thread_safe],
+"
+    Buffer = new char[Size];
+").
+:- pragma foreign_proc("Java",
+    alloc_buffer(Size::in, Buffer::uo),
+    [will_not_call_mercury, promise_pure, thread_safe],
+"
+    Buffer = new java.lang.StringBuilder(Size);
+").
+
+alloc_buffer(_Size, Buffer) :-
+    Buffer = string_buffer("").
+
+:- pred buffer_to_string(string_buffer::di, string::uo) is det.
+
+:- pragma foreign_proc("C",
+    buffer_to_string(Buffer::di, Str::uo),
+    [will_not_call_mercury, promise_pure, thread_safe,
+        does_not_affect_liveness],
+"
+    Str = Buffer;
+").
+:- pragma foreign_proc("C#",
+    buffer_to_string(Buffer::di, Str::uo),
+    [will_not_call_mercury, promise_pure, thread_safe],
+"
+    Str = new string(Buffer);
+").
+:- pragma foreign_proc("Java",
+    buffer_to_string(Buffer::di, Str::uo),
+    [will_not_call_mercury, promise_pure, thread_safe],
+"
+    Str = Buffer.toString();
+").
+
+buffer_to_string(Buffer, Str) :-
+    Buffer = string_buffer(Str).
+
+:- pred copy_into_buffer(string_buffer::di, string_buffer::uo,
+    int::in, int::out, string::in, int::in, int::in) is det.
+
+:- pragma foreign_proc("C",
+    copy_into_buffer(Dest0::di, Dest::uo, DestOffset0::in, DestOffset::out,
+        Src::in, SrcStart::in, SrcEnd::in),
+    [will_not_call_mercury, promise_pure, thread_safe,
+        does_not_affect_liveness],
+"
+    size_t count;
+
+    MR_CHECK_EXPR_TYPE(Dest0, char *);
+    MR_CHECK_EXPR_TYPE(Dest, char *);
+
+    count = SrcEnd - SrcStart;
+    Dest = Dest0;
+    MR_memcpy(Dest + DestOffset0, Src + SrcStart, count);
+    DestOffset = DestOffset0 + count;
+").
+:- pragma foreign_proc("C#",
+    copy_into_buffer(Dest0::di, Dest::uo, DestOffset0::in, DestOffset::out,
+        Src::in, SrcStart::in, SrcEnd::in),
+    [will_not_call_mercury, promise_pure, thread_safe],
+"
+    int count = SrcEnd - SrcStart;
+    Dest = Dest0;
+    Src.CopyTo(SrcStart, Dest, DestOffset0, count);
+    DestOffset = DestOffset0 + count;
+").
+:- pragma foreign_proc("Java",
+    copy_into_buffer(Dest0::di, Dest::uo, DestOffset0::in, DestOffset::out,
+        Src::in, SrcStart::in, SrcEnd::in),
+    [will_not_call_mercury, promise_pure, thread_safe],
+"
+    // The Java implementation does not actually use the dest offsets.
+    Dest = Dest0;
+    Dest.append(Src, SrcStart, SrcEnd);
+    DestOffset = DestOffset0 + (SrcEnd - SrcStart);
+").
+
+copy_into_buffer(Dest0, Dest, DestOffset0, DestOffset, Src, SrcStart, SrcEnd)
+        :-
+    Dest0 = string_buffer(Buffer0),
+    Buffer = Buffer0 ++ unsafe_between(Src, SrcStart, SrcEnd),
+    DestOffset = DestOffset0 + (SrcEnd - SrcStart),
+    Dest = string_buffer(Buffer).
+
+%---------------------%
+
+append_string_pieces(Pieces, String) :-
+    check_pieces_and_sum_length($pred, Pieces, 0, BufferLen),
+    alloc_buffer(BufferLen, Buffer0),
+    list.foldl2(copy_piece_into_buffer, Pieces, 0, End, Buffer0, Buffer),
+    expect(unify(End, BufferLen), $pred, "End != BufferLen"),
+    buffer_to_string(Buffer, String).
+
+:- pred check_pieces_and_sum_length(string::in, list(string_piece)::in,
+    int::in, int::out) is det.
+
+check_pieces_and_sum_length(PredName, Pieces, Len0, Len) :-
+    (
+        Pieces = [],
+        Len = Len0
+    ;
+        Pieces = [Piece | TailPieces],
+        (
+            Piece = string(Str),
+            PieceLen = length(Str)
+        ;
+            Piece = substring(BaseStr, Start, End),
+            BaseLen = length(BaseStr),
+            ( if
+                Start >= 0,
+                Start =< BaseLen,
+                End >= Start,
+                End =< BaseLen
+            then
+                PieceLen = End - Start
+            else
+                unexpected(PredName, "substring index out of range")
+            )
+        ),
+        Len1 = Len0 + PieceLen,
+        check_pieces_and_sum_length(PredName, TailPieces, Len1, Len)
+    ).
+
+:- pred copy_piece_into_buffer(string_piece::in, int::in, int::out,
+    string_buffer::di, string_buffer::uo) is det.
+
+copy_piece_into_buffer(Piece, !DestOffset, !DestBuffer) :-
+    (
+        Piece = string(Src),
+        SrcStart = 0,
+        SrcEnd = length(Src)
+    ;
+        Piece = substring(Src, SrcStart, SrcEnd)
+    ),
+    copy_into_buffer(!DestBuffer, !DestOffset, Src, SrcStart, SrcEnd).
+
 %---------------------------------------------------------------------------%
 %
 % Splitting up strings.
diff --git a/tests/hard_coded/Mmakefile b/tests/hard_coded/Mmakefile
index 826ea7d0d..16fdf0eb7 100644
--- a/tests/hard_coded/Mmakefile
+++ b/tests/hard_coded/Mmakefile
@@ -355,6 +355,7 @@ ORDINARY_PROGS = \
 	string_append_ioi \
 	string_append_ooi \
 	string_append_ooi_ilseq \
+	string_append_pieces \
 	string_builder_test \
 	string_case \
 	string_char_list_ilseq \
diff --git a/tests/hard_coded/string_append_pieces.exp b/tests/hard_coded/string_append_pieces.exp
new file mode 100644
index 000000000..8e2a865d5
--- /dev/null
+++ b/tests/hard_coded/string_append_pieces.exp
@@ -0,0 +1,11 @@
+""
+""
+""
+""
+"ab"
+"cool!😀"
+software_error("predicate `string.append_string_pieces\'/2: Unexpected: substring index out of range")
+software_error("predicate `string.append_string_pieces\'/2: Unexpected: substring index out of range")
+software_error("predicate `string.append_string_pieces\'/2: Unexpected: substring index out of range")
+software_error("predicate `string.append_string_pieces\'/2: Unexpected: substring index out of range")
+software_error("predicate `string.append_string_pieces\'/2: Unexpected: substring index out of range")
diff --git a/tests/hard_coded/string_append_pieces.m b/tests/hard_coded/string_append_pieces.m
new file mode 100644
index 000000000..f5397e03f
--- /dev/null
+++ b/tests/hard_coded/string_append_pieces.m
@@ -0,0 +1,59 @@
+%---------------------------------------------------------------------------%
+% vim: ts=4 sw=4 et ft=mercury
+%---------------------------------------------------------------------------%
+
+:- module string_append_pieces.
+:- interface.
+
+:- import_module io.
+
+:- pred main(io::di, io::uo) is cc_multi.
+
+%---------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module exception.
+:- import_module int.
+:- import_module list.
+:- import_module string.
+
+%---------------------------------------------------------------------------%
+
+main(!IO) :-
+    foldl(test_append_string_pieces, test_cases, !IO).
+
+:- func test_cases = list(list(string_piece)).
+
+test_cases = [
+    [],
+    [string("")],
+    [substring("", 0, 0)],
+    [substring("ok", 2, 2)],
+    [substring("axx", 0, 1), substring("xbx", 1, 2)],
+    [
+        string("c"),
+        substring("whoops!", 2, 4),
+        string("l!"),
+        substring("😀😀😀", length("😀"), 2 * length("😀"))
+    ],
+    [substring("bad", -1, 0)],
+    [substring("bad", 4, 3)],
+    [substring("bad", 0, -1)],
+    [substring("bad", 0, 4)],
+    [substring("bad", 3, 2)]
+].
+
+:- pred test_append_string_pieces(list(string_piece)::in, io::di, io::uo)
+    is cc_multi.
+
+test_append_string_pieces(Pieces, !IO) :-
+    ( try []
+        append_string_pieces(Pieces, Str)
+    then
+        io.write_string("""", !IO),
+        io.write_string(Str, !IO),
+        io.write_string("""\n", !IO)
+    catch_any Excp ->
+        io.print_line(Excp, !IO)
+    ).
-- 
2.23.0



More information about the reviews mailing list