[m-rev.] for review: Add string.append_string_pieces.
Peter Wang
novalazy at gmail.com
Thu Oct 31 17:18:01 AEDT 2019
library/string.m:
Add append_string_pieces/2 predicate.
library/io.m:
Add a comment about a potential future change.
tests/hard_coded/Mmakefile:
tests/hard_coded/string_append_pieces.exp:
tests/hard_coded/string_append_pieces.m:
Add test case.
NEWS:
Announce addition.
---
NEWS | 1 +
library/io.m | 3 +
library/string.m | 181 ++++++++++++++++++++++
tests/hard_coded/Mmakefile | 1 +
tests/hard_coded/string_append_pieces.exp | 11 ++
tests/hard_coded/string_append_pieces.m | 59 +++++++
6 files changed, 256 insertions(+)
create mode 100644 tests/hard_coded/string_append_pieces.exp
create mode 100644 tests/hard_coded/string_append_pieces.m
diff --git a/NEWS b/NEWS
index b8827d9b1..570d22b85 100644
--- a/NEWS
+++ b/NEWS
@@ -430,6 +430,7 @@ Changes to the Mercury standard library:
- compare_substrings/6
- unsafe_compare_substrings/6
- nondet_append/3
+ - append_string_pieces/2
The following procedures in the string module have been deprecated:
diff --git a/library/io.m b/library/io.m
index b8a8cabad..54d7ce133 100644
--- a/library/io.m
+++ b/library/io.m
@@ -5155,6 +5155,9 @@ file_id(FileName, Result, !IO) :-
% XXX It would be better to use a char_array type rather than array(char).
% This is because on the Java and IL backends indexing into an array whose
% element type is known statically requires less overhead.
+ %
+ % It may be possible to merge with string.string_buffer.
+ %
:- type buffer
---> buffer(array(char)).
diff --git a/library/string.m b/library/string.m
index b0930a767..bcd7b78bc 100644
--- a/library/string.m
+++ b/library/string.m
@@ -665,6 +665,24 @@
%
:- func join_list(string::in, list(string)::in) = (string::uo) is det.
+%---------------------------------------------------------------------------%
+%
+% Making strings from smaller pieces.
+%
+
+:- type string_piece
+ ---> string(string)
+ ; substring(string, int, int). % string, start, end offset
+
+ % append_string_pieces(Pieces, String):
+ %
+ % Append together the strings and substrings in `Pieces' into a string.
+ % Throws an exception `Pieces' contains an element
+ % `substring(S, Start, End)' where `Start' or `End' are not within
+ % the range [0, length(S)], or if `Start' > `End'.
+ %
+:- pred append_string_pieces(list(string_piece)::in, string::uo) is det.
+
%---------------------------------------------------------------------------%
%
% Splitting up strings.
@@ -3951,6 +3969,169 @@ join_list(Sep, [H | T]) = H ++ join_list_loop(Sep, T).
join_list_loop(_, []) = "".
join_list_loop(Sep, [H | T]) = Sep ++ H ++ join_list_loop(Sep, T).
+%---------------------------------------------------------------------------%
+%
+% Making strings from smaller pieces.
+%
+
+:- type string_buffer
+ ---> string_buffer(string).
+
+:- pragma foreign_type("C", string_buffer, "char *",
+ [can_pass_as_mercury_type]).
+:- pragma foreign_type("C#", string_buffer, "char[]").
+:- pragma foreign_type("Java", string_buffer, "java.lang.StringBuilder").
+
+:- pred alloc_buffer(int::in, string_buffer::uo) is det.
+
+:- pragma foreign_proc("C",
+ alloc_buffer(Size::in, Buffer::uo),
+ [will_not_call_mercury, promise_pure, thread_safe,
+ does_not_affect_liveness, no_sharing],
+"
+ MR_allocate_aligned_string_msg(Buffer, Size, MR_ALLOC_ID);
+ Buffer[Size] = '\\0';
+").
+:- pragma foreign_proc("C#",
+ alloc_buffer(Size::in, Buffer::uo),
+ [will_not_call_mercury, promise_pure, thread_safe],
+"
+ Buffer = new char[Size];
+").
+:- pragma foreign_proc("Java",
+ alloc_buffer(Size::in, Buffer::uo),
+ [will_not_call_mercury, promise_pure, thread_safe],
+"
+ Buffer = new java.lang.StringBuilder(Size);
+").
+
+alloc_buffer(_Size, Buffer) :-
+ Buffer = string_buffer("").
+
+:- pred buffer_to_string(string_buffer::di, string::uo) is det.
+
+:- pragma foreign_proc("C",
+ buffer_to_string(Buffer::di, Str::uo),
+ [will_not_call_mercury, promise_pure, thread_safe,
+ does_not_affect_liveness],
+"
+ Str = Buffer;
+").
+:- pragma foreign_proc("C#",
+ buffer_to_string(Buffer::di, Str::uo),
+ [will_not_call_mercury, promise_pure, thread_safe],
+"
+ Str = new string(Buffer);
+").
+:- pragma foreign_proc("Java",
+ buffer_to_string(Buffer::di, Str::uo),
+ [will_not_call_mercury, promise_pure, thread_safe],
+"
+ Str = Buffer.toString();
+").
+
+buffer_to_string(Buffer, Str) :-
+ Buffer = string_buffer(Str).
+
+:- pred copy_into_buffer(string_buffer::di, string_buffer::uo,
+ int::in, int::out, string::in, int::in, int::in) is det.
+
+:- pragma foreign_proc("C",
+ copy_into_buffer(Dest0::di, Dest::uo, DestOffset0::in, DestOffset::out,
+ Src::in, SrcStart::in, SrcEnd::in),
+ [will_not_call_mercury, promise_pure, thread_safe,
+ does_not_affect_liveness],
+"
+ size_t count;
+
+ MR_CHECK_EXPR_TYPE(Dest0, char *);
+ MR_CHECK_EXPR_TYPE(Dest, char *);
+
+ count = SrcEnd - SrcStart;
+ Dest = Dest0;
+ MR_memcpy(Dest + DestOffset0, Src + SrcStart, count);
+ DestOffset = DestOffset0 + count;
+").
+:- pragma foreign_proc("C#",
+ copy_into_buffer(Dest0::di, Dest::uo, DestOffset0::in, DestOffset::out,
+ Src::in, SrcStart::in, SrcEnd::in),
+ [will_not_call_mercury, promise_pure, thread_safe],
+"
+ int count = SrcEnd - SrcStart;
+ Dest = Dest0;
+ Src.CopyTo(SrcStart, Dest, DestOffset0, count);
+ DestOffset = DestOffset0 + count;
+").
+:- pragma foreign_proc("Java",
+ copy_into_buffer(Dest0::di, Dest::uo, DestOffset0::in, DestOffset::out,
+ Src::in, SrcStart::in, SrcEnd::in),
+ [will_not_call_mercury, promise_pure, thread_safe],
+"
+ // The Java implementation does not actually use the dest offsets.
+ Dest = Dest0;
+ Dest.append(Src, SrcStart, SrcEnd);
+ DestOffset = DestOffset0 + (SrcEnd - SrcStart);
+").
+
+copy_into_buffer(Dest0, Dest, DestOffset0, DestOffset, Src, SrcStart, SrcEnd)
+ :-
+ Dest0 = string_buffer(Buffer0),
+ Buffer = Buffer0 ++ unsafe_between(Src, SrcStart, SrcEnd),
+ DestOffset = DestOffset0 + (SrcEnd - SrcStart),
+ Dest = string_buffer(Buffer).
+
+%---------------------%
+
+append_string_pieces(Pieces, String) :-
+ check_pieces_and_sum_length($pred, Pieces, 0, BufferLen),
+ alloc_buffer(BufferLen, Buffer0),
+ list.foldl2(copy_piece_into_buffer, Pieces, 0, End, Buffer0, Buffer),
+ expect(unify(End, BufferLen), $pred, "End != BufferLen"),
+ buffer_to_string(Buffer, String).
+
+:- pred check_pieces_and_sum_length(string::in, list(string_piece)::in,
+ int::in, int::out) is det.
+
+check_pieces_and_sum_length(PredName, Pieces, Len0, Len) :-
+ (
+ Pieces = [],
+ Len = Len0
+ ;
+ Pieces = [Piece | TailPieces],
+ (
+ Piece = string(Str),
+ PieceLen = length(Str)
+ ;
+ Piece = substring(BaseStr, Start, End),
+ BaseLen = length(BaseStr),
+ ( if
+ Start >= 0,
+ Start =< BaseLen,
+ End >= Start,
+ End =< BaseLen
+ then
+ PieceLen = End - Start
+ else
+ unexpected(PredName, "substring index out of range")
+ )
+ ),
+ Len1 = Len0 + PieceLen,
+ check_pieces_and_sum_length(PredName, TailPieces, Len1, Len)
+ ).
+
+:- pred copy_piece_into_buffer(string_piece::in, int::in, int::out,
+ string_buffer::di, string_buffer::uo) is det.
+
+copy_piece_into_buffer(Piece, !DestOffset, !DestBuffer) :-
+ (
+ Piece = string(Src),
+ SrcStart = 0,
+ SrcEnd = length(Src)
+ ;
+ Piece = substring(Src, SrcStart, SrcEnd)
+ ),
+ copy_into_buffer(!DestBuffer, !DestOffset, Src, SrcStart, SrcEnd).
+
%---------------------------------------------------------------------------%
%
% Splitting up strings.
diff --git a/tests/hard_coded/Mmakefile b/tests/hard_coded/Mmakefile
index 826ea7d0d..16fdf0eb7 100644
--- a/tests/hard_coded/Mmakefile
+++ b/tests/hard_coded/Mmakefile
@@ -355,6 +355,7 @@ ORDINARY_PROGS = \
string_append_ioi \
string_append_ooi \
string_append_ooi_ilseq \
+ string_append_pieces \
string_builder_test \
string_case \
string_char_list_ilseq \
diff --git a/tests/hard_coded/string_append_pieces.exp b/tests/hard_coded/string_append_pieces.exp
new file mode 100644
index 000000000..8e2a865d5
--- /dev/null
+++ b/tests/hard_coded/string_append_pieces.exp
@@ -0,0 +1,11 @@
+""
+""
+""
+""
+"ab"
+"cool!😀"
+software_error("predicate `string.append_string_pieces\'/2: Unexpected: substring index out of range")
+software_error("predicate `string.append_string_pieces\'/2: Unexpected: substring index out of range")
+software_error("predicate `string.append_string_pieces\'/2: Unexpected: substring index out of range")
+software_error("predicate `string.append_string_pieces\'/2: Unexpected: substring index out of range")
+software_error("predicate `string.append_string_pieces\'/2: Unexpected: substring index out of range")
diff --git a/tests/hard_coded/string_append_pieces.m b/tests/hard_coded/string_append_pieces.m
new file mode 100644
index 000000000..f5397e03f
--- /dev/null
+++ b/tests/hard_coded/string_append_pieces.m
@@ -0,0 +1,59 @@
+%---------------------------------------------------------------------------%
+% vim: ts=4 sw=4 et ft=mercury
+%---------------------------------------------------------------------------%
+
+:- module string_append_pieces.
+:- interface.
+
+:- import_module io.
+
+:- pred main(io::di, io::uo) is cc_multi.
+
+%---------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module exception.
+:- import_module int.
+:- import_module list.
+:- import_module string.
+
+%---------------------------------------------------------------------------%
+
+main(!IO) :-
+ foldl(test_append_string_pieces, test_cases, !IO).
+
+:- func test_cases = list(list(string_piece)).
+
+test_cases = [
+ [],
+ [string("")],
+ [substring("", 0, 0)],
+ [substring("ok", 2, 2)],
+ [substring("axx", 0, 1), substring("xbx", 1, 2)],
+ [
+ string("c"),
+ substring("whoops!", 2, 4),
+ string("l!"),
+ substring("😀😀😀", length("😀"), 2 * length("😀"))
+ ],
+ [substring("bad", -1, 0)],
+ [substring("bad", 4, 3)],
+ [substring("bad", 0, -1)],
+ [substring("bad", 0, 4)],
+ [substring("bad", 3, 2)]
+].
+
+:- pred test_append_string_pieces(list(string_piece)::in, io::di, io::uo)
+ is cc_multi.
+
+test_append_string_pieces(Pieces, !IO) :-
+ ( try []
+ append_string_pieces(Pieces, Str)
+ then
+ io.write_string("""", !IO),
+ io.write_string(Str, !IO),
+ io.write_string("""\n", !IO)
+ catch_any Excp ->
+ io.print_line(Excp, !IO)
+ ).
--
2.23.0
More information about the reviews
mailing list