[m-rev.] for review: add write_binary_utf8_string
Julien Fischer
jfischer at opturion.com
Thu Apr 7 10:38:02 AEST 2022
For review by Peter.
--------------------------
Add write_binary_utf8_string.
Add predicates for writing the UTF-8 encoding of strings to binary output
streams.
library/io.m:
library/io.primitives_write.m:
Add the new predicates.
NEWS:
Announce the additions.
tests/hard_coded/Mmakefile:
tests/hard_coded/write_binary_utf8.{m,exp}:
Add a test of the new predicates.
Julien.
diff --git a/NEWS b/NEWS
index 3e03928..507eebd 100644
--- a/NEWS
+++ b/NEWS
@@ -68,6 +68,11 @@ The following obsolete predicates have been removed:
### Changes to the `io` module
+* The following predicates have been added:
+
+ - pred `write_binary_utf8_string/3`
+ - pred `write_binary_utf8_string/4`
+
* The following obsolete predicates have been removed:
- pred `see/3` (replacement: `prolog.see/3`)
diff --git a/library/io.m b/library/io.m
index 29dff2b..54667b5 100644
--- a/library/io.m
+++ b/library/io.m
@@ -837,6 +837,16 @@
:- pred write_binary_uint64_be(io.binary_output_stream::in, uint64::in,
io::di, io::uo) is det.
+%---------------------%
+
+ % Write the UTF-8 encoding of a string to the current binary output stream
+ % or the specified binary output stream. If the given string is not
+ % well-formed, then the behaviour is implementation dependent.
+ %
+:- pred write_binary_utf8_string(string::in, io::di, io::uo) is det.
+:- pred write_binary_utf8_string(io.binary_output_stream::in, string::in,
+ io::di, io::uo) is det.
+
%---------------------------------------------------------------------------%
%
% Text input predicates.
@@ -3356,6 +3366,16 @@ write_binary_uint64_be(binary_output_stream(Stream), UInt64, !IO) :-
do_write_binary_uint64_be(Stream, UInt64, Error, !IO),
throw_on_output_error(Error, !IO).
+%---------------------%
+
+write_binary_utf8_string(String, !IO) :-
+ binary_output_stream(Stream, !IO),
+ write_binary_utf8_string(Stream, String, !IO).
+
+write_binary_utf8_string(binary_output_stream(Stream), String, !IO) :-
+ do_write_binary_utf8_string(Stream, String, Error, !IO),
+ throw_on_output_error(Error, !IO).
+
%---------------------------------------------------------------------------%
%
% Text input predicates.
diff --git a/library/io.primitives_write.m b/library/io.primitives_write.m
index 962d301..389690a 100644
--- a/library/io.primitives_write.m
+++ b/library/io.primitives_write.m
@@ -92,6 +92,9 @@
:- pred do_write_binary_uint64_be(stream::in, uint64::in, system_error::out,
io::di, io::uo) is det.
+:- pred do_write_binary_utf8_string(stream::in, string::in, system_error::out,
+ io::di, io::uo) is det.
+
%---------------------------------------------------------------------------%
:- implementation.
@@ -1147,6 +1150,50 @@ do_write_float(Stream, Float, Error, !IO) :-
").
%---------------------------------------------------------------------------%
+
+:- pragma foreign_proc("C",
+ do_write_binary_utf8_string(Stream::in, String::in, Error::out,
+ _IO0::di, _IO::uo),
+ [will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
+"
+ size_t len = strlen(String);
+ if (MR_WRITE(*Stream, (unsigned char *) String, len)) {
+ Error = errno;
+ } else {
+ Error = 0;
+ }
+").
+
+:- pragma foreign_proc("C#",
+ do_write_binary_utf8_string(Stream::in, String::in, Error::out,
+ _IO0::di, _IO::uo),
+ [will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
+"
+ byte[] bytes = mercury.io__stream_ops.text_encoding.GetBytes(String);
+ try {
+ Stream.stream.Write(bytes, 0, bytes.Length);
+ Error = null;
+ } catch (System.Exception e) {
+ Error = e;
+ }
+").
+
+:- pragma foreign_proc("Java",
+ do_write_binary_utf8_string(Stream::in, String::in, Error::out,
+ _IO0::di, _IO::uo),
+ [will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
+"
+ byte[] bytes = String.getBytes(java.nio.charset.StandardCharsets.UTF_8);
+ try {
+ ((jmercury.io__stream_ops.MR_BinaryOutputFile) Stream).write(
+ bytes, 0, bytes.length);
+ Error = null;
+ } catch (java.io.IOException e) {
+ Error = e;
+ }
+").
+
+%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
:- pragma foreign_decl("C", "
diff --git a/tests/hard_coded/Mmakefile b/tests/hard_coded/Mmakefile
index 77a30ac..a205a89 100644
--- a/tests/hard_coded/Mmakefile
+++ b/tests/hard_coded/Mmakefile
@@ -504,6 +504,7 @@ ORDINARY_PROGS = \
write_binary_int8 \
write_binary_multibyte_int \
write_binary_uint8 \
+ write_binary_utf8 \
write_float_special \
write_reg1 \
write_reg2 \
diff --git a/tests/hard_coded/write_binary_utf8.exp b/tests/hard_coded/write_binary_utf8.exp
index e69de29..69ce34f 100644
--- a/tests/hard_coded/write_binary_utf8.exp
+++ b/tests/hard_coded/write_binary_utf8.exp
@@ -0,0 +1,14 @@
+1: 0x61
+2: 0xCE
+3: 0xA9
+4: 0xE2
+5: 0x88
+6: 0x80
+7: 0xF0
+8: 0x9D
+9: 0x84
+10: 0x9E
+11: 0xF0
+12: 0x9F
+13: 0x98
+14: 0x80
diff --git a/tests/hard_coded/write_binary_utf8.m b/tests/hard_coded/write_binary_utf8.m
index e69de29..0ca6e27 100644
--- a/tests/hard_coded/write_binary_utf8.m
+++ b/tests/hard_coded/write_binary_utf8.m
@@ -0,0 +1,118 @@
+%---------------------------------------------------------------------------%
+% vim: ft=mercury ts=4 sw=4 et
+%---------------------------------------------------------------------------%
+% A test of io.write_binary_utf8_string.
+%---------------------------------------------------------------------------%
+
+:- module write_binary_utf8.
+:- interface.
+
+:- import_module io.
+
+:- pred main(io::di, io::uo) is det.
+
+%---------------------------------------------------------------------------%
+%---------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module int.
+:- import_module io.file.
+:- import_module list.
+:- import_module stream.
+:- import_module string.
+
+%---------------------------------------------------------------------------%
+
+main(!IO) :-
+ io.open_binary_output(test_file, OpenOutResult, !IO),
+ (
+ OpenOutResult = ok(Out),
+ output_test_strings(Out, !IO),
+ io.close_binary_output(Out, !IO),
+ read_and_print_bytes(!IO),
+ io.file.remove_file(test_file, _, !IO)
+ ;
+ OpenOutResult = error(Error),
+ handle_io_error(Error, !IO)
+ ).
+
+%---------------------------------------------------------------------------%
+
+:- pred output_test_strings(io.binary_output_stream::in, io::di, io::uo)
+ is det.
+
+output_test_strings(Out, !IO) :-
+
+ % Codepoint : Name : UTF-8 encoding
+
+ % U+0061: 'LATIN SMALL LETTER A': 0x61
+ % Output bytes: 1
+ io.write_binary_utf8_string(Out, "a", !IO),
+
+ % U+03A9: 'GREEK CAPITAL LETTER OMEGA': 0xCE 0xA9
+ % Output bytes: 2 - 3
+ io.write_binary_utf8_string(Out, "\u03A9", !IO),
+
+ % U+2200: 'FOR ALL': 0xE2 0x88 0x8
+ % Output bytes: 4 - 6
+ io.write_binary_utf8_string(Out, "\u2200", !IO),
+
+ % U+1D11E: 'MUSICAL SYMBOL G CLEF': 0xF0 0x9D 0x84 0x9E
+ % Output bytes: 7 - 10
+ io.write_binary_utf8_string(Out, "\U0001D11E", !IO),
+
+ % U+1F600: 'GRINNING FACE': 0xF0 0x9F 0x98 0x80
+ % Output bytes: 11 - 14
+ io.write_binary_utf8_string(Out, "\U0001F600", !IO).
+
+%---------------------------------------------------------------------------%
+
+% Read write_binary_utf8.bin and print out all the bytes it contains, one
+% per line. The point of this is to check that the encoding is correct and
+% also to ensure that we are not inadvertently writing out a BOM.
+
+:- pred read_and_print_bytes(io::di, io::uo) is det.
+
+read_and_print_bytes(!IO) :-
+ io.open_binary_input(test_file, OpenResult, !IO),
+ (
+ OpenResult = ok(InFile),
+ stream.input_stream_fold2_state(InFile, output_code_unit, 1,
+ FoldResult, !IO),
+ (
+ FoldResult = ok(_)
+ ;
+ FoldResult = error(_, Error),
+ handle_io_error(Error, !IO)
+ )
+ ;
+ OpenResult = error(Error),
+ handle_io_error(Error, !IO)
+ ).
+
+:- pred output_code_unit(uint8::in, int::in, int::out, io::di, io::uo) is det.
+
+output_code_unit(CodeUnit, !N, !IO) :-
+ io.format("%d: 0x%X\n", [i(!.N), u8(CodeUnit)], !IO),
+ !:N = !.N + 1.
+
+%---------------------------------------------------------------------------%
+
+:- pred handle_io_error(io.error::in, io::di, io::uo) is det.
+
+handle_io_error(Error, !IO) :-
+ io.error_message(Error, ErrorMsg),
+ io.stderr_stream(Stderr, !IO),
+ io.format(Stderr, "error: %s\n", [s(ErrorMsg)], !IO),
+ io.set_exit_status(1, !IO).
+
+%---------------------------------------------------------------------------%
+
+:- func test_file = string.
+
+test_file = "write_binary_utf8.bin".
+
+%---------------------------------------------------------------------------%
+:- end_module write_binary_utf8.
+%---------------------------------------------------------------------------%
More information about the reviews
mailing list