[m-rev.] for review: add write_binary_utf8_string

Julien Fischer jfischer at opturion.com
Thu Apr 7 10:38:02 AEST 2022


For review by Peter.

--------------------------

Add write_binary_utf8_string.

Add predicates for writing the UTF-8 encoding of strings to binary output
streams.

library/io.m:
library/io.primitives_write.m:
     Add the new predicates.

NEWS:
     Announce the additions.

tests/hard_coded/Mmakefile:
tests/hard_coded/write_binary_utf8.{m,exp}:
     Add a test of the new predicates.

Julien.

diff --git a/NEWS b/NEWS
index 3e03928..507eebd 100644
--- a/NEWS
+++ b/NEWS
@@ -68,6 +68,11 @@ The following obsolete predicates have been removed:

  ### Changes to the `io` module

+* The following predicates have been added:
+
+    - pred `write_binary_utf8_string/3`
+    - pred `write_binary_utf8_string/4`
+
  * The following obsolete predicates have been removed:

      - pred `see/3`                  (replacement: `prolog.see/3`)
diff --git a/library/io.m b/library/io.m
index 29dff2b..54667b5 100644
--- a/library/io.m
+++ b/library/io.m
@@ -837,6 +837,16 @@
  :- pred write_binary_uint64_be(io.binary_output_stream::in, uint64::in,
      io::di, io::uo) is det.

+%---------------------%
+
+    % Write the UTF-8 encoding of a string to the current binary output stream
+    % or the specified binary output stream. If the given string is not
+    % well-formed, then the behaviour is implementation dependent.
+    %
+:- pred write_binary_utf8_string(string::in, io::di, io::uo) is det.
+:- pred write_binary_utf8_string(io.binary_output_stream::in, string::in,
+    io::di, io::uo) is det.
+
  %---------------------------------------------------------------------------%
  %
  % Text input predicates.
@@ -3356,6 +3366,16 @@ write_binary_uint64_be(binary_output_stream(Stream), UInt64, !IO) :-
      do_write_binary_uint64_be(Stream, UInt64, Error, !IO),
      throw_on_output_error(Error, !IO).

+%---------------------%
+
+write_binary_utf8_string(String, !IO) :-
+    binary_output_stream(Stream, !IO),
+    write_binary_utf8_string(Stream, String, !IO).
+
+write_binary_utf8_string(binary_output_stream(Stream), String, !IO) :-
+    do_write_binary_utf8_string(Stream, String, Error, !IO),
+    throw_on_output_error(Error, !IO).
+
  %---------------------------------------------------------------------------%
  %
  % Text input predicates.
diff --git a/library/io.primitives_write.m b/library/io.primitives_write.m
index 962d301..389690a 100644
--- a/library/io.primitives_write.m
+++ b/library/io.primitives_write.m
@@ -92,6 +92,9 @@
  :- pred do_write_binary_uint64_be(stream::in, uint64::in, system_error::out,
      io::di, io::uo) is det.

+:- pred do_write_binary_utf8_string(stream::in, string::in, system_error::out,
+    io::di, io::uo) is det.
+
  %---------------------------------------------------------------------------%

  :- implementation.
@@ -1147,6 +1150,50 @@ do_write_float(Stream, Float, Error, !IO) :-
  ").

  %---------------------------------------------------------------------------%
+
+:- pragma foreign_proc("C",
+    do_write_binary_utf8_string(Stream::in, String::in, Error::out,
+        _IO0::di, _IO::uo),
+    [will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
+"
+    size_t len = strlen(String);
+    if (MR_WRITE(*Stream, (unsigned char *) String, len)) {
+        Error = errno;
+    } else {
+        Error = 0;
+    }
+").
+
+:- pragma foreign_proc("C#",
+    do_write_binary_utf8_string(Stream::in, String::in, Error::out,
+        _IO0::di, _IO::uo),
+    [will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
+"
+    byte[] bytes = mercury.io__stream_ops.text_encoding.GetBytes(String);
+    try {
+        Stream.stream.Write(bytes, 0, bytes.Length);
+        Error = null;
+    } catch (System.Exception e) {
+        Error = e;
+    }
+").
+
+:- pragma foreign_proc("Java",
+    do_write_binary_utf8_string(Stream::in, String::in, Error::out,
+        _IO0::di, _IO::uo),
+    [will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
+"
+    byte[] bytes = String.getBytes(java.nio.charset.StandardCharsets.UTF_8);
+    try {
+        ((jmercury.io__stream_ops.MR_BinaryOutputFile) Stream).write(
+            bytes, 0, bytes.length);
+        Error = null;
+    } catch (java.io.IOException e) {
+        Error = e;
+    }
+").
+
+%---------------------------------------------------------------------------%
  %---------------------------------------------------------------------------%

  :- pragma foreign_decl("C", "
diff --git a/tests/hard_coded/Mmakefile b/tests/hard_coded/Mmakefile
index 77a30ac..a205a89 100644
--- a/tests/hard_coded/Mmakefile
+++ b/tests/hard_coded/Mmakefile
@@ -504,6 +504,7 @@ ORDINARY_PROGS = \
  	write_binary_int8 \
  	write_binary_multibyte_int \
  	write_binary_uint8 \
+	write_binary_utf8 \
  	write_float_special \
  	write_reg1 \
  	write_reg2 \
diff --git a/tests/hard_coded/write_binary_utf8.exp b/tests/hard_coded/write_binary_utf8.exp
index e69de29..69ce34f 100644
--- a/tests/hard_coded/write_binary_utf8.exp
+++ b/tests/hard_coded/write_binary_utf8.exp
@@ -0,0 +1,14 @@
+1: 0x61
+2: 0xCE
+3: 0xA9
+4: 0xE2
+5: 0x88
+6: 0x80
+7: 0xF0
+8: 0x9D
+9: 0x84
+10: 0x9E
+11: 0xF0
+12: 0x9F
+13: 0x98
+14: 0x80
diff --git a/tests/hard_coded/write_binary_utf8.m b/tests/hard_coded/write_binary_utf8.m
index e69de29..0ca6e27 100644
--- a/tests/hard_coded/write_binary_utf8.m
+++ b/tests/hard_coded/write_binary_utf8.m
@@ -0,0 +1,118 @@
+%---------------------------------------------------------------------------%
+% vim: ft=mercury ts=4 sw=4 et
+%---------------------------------------------------------------------------%
+% A test of io.write_binary_utf8_string.
+%---------------------------------------------------------------------------%
+
+:- module write_binary_utf8.
+:- interface.
+
+:- import_module io.
+
+:- pred main(io::di, io::uo) is det.
+
+%---------------------------------------------------------------------------%
+%---------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module int.
+:- import_module io.file.
+:- import_module list.
+:- import_module stream.
+:- import_module string.
+
+%---------------------------------------------------------------------------%
+
+main(!IO) :-
+    io.open_binary_output(test_file, OpenOutResult, !IO),
+    (
+        OpenOutResult = ok(Out),
+        output_test_strings(Out, !IO),
+        io.close_binary_output(Out, !IO),
+        read_and_print_bytes(!IO),
+        io.file.remove_file(test_file, _, !IO)
+    ;
+        OpenOutResult = error(Error),
+        handle_io_error(Error, !IO)
+    ).
+
+%---------------------------------------------------------------------------%
+
+:- pred output_test_strings(io.binary_output_stream::in, io::di, io::uo)
+    is det.
+
+output_test_strings(Out, !IO) :-
+
+    % Codepoint : Name : UTF-8 encoding
+
+    % U+0061: 'LATIN SMALL LETTER A': 0x61
+    % Output bytes: 1
+    io.write_binary_utf8_string(Out, "a", !IO),
+
+    % U+03A9: 'GREEK CAPITAL LETTER OMEGA': 0xCE 0xA9
+    % Output bytes: 2 - 3
+    io.write_binary_utf8_string(Out, "\u03A9", !IO),
+
+    % U+2200: 'FOR ALL': 0xE2 0x88 0x8
+    % Output bytes: 4 - 6
+    io.write_binary_utf8_string(Out, "\u2200", !IO),
+
+    % U+1D11E: 'MUSICAL SYMBOL G CLEF': 0xF0 0x9D 0x84 0x9E
+    % Output bytes: 7 - 10
+    io.write_binary_utf8_string(Out, "\U0001D11E", !IO),
+
+    % U+1F600: 'GRINNING FACE': 0xF0 0x9F 0x98 0x80
+    % Output bytes: 11 - 14
+    io.write_binary_utf8_string(Out, "\U0001F600", !IO).
+
+%---------------------------------------------------------------------------%
+
+% Read write_binary_utf8.bin and print out all the bytes it contains, one
+% per line. The point of this is to check that the encoding is correct and
+% also to ensure that we are not inadvertently writing out a BOM.
+
+:- pred read_and_print_bytes(io::di, io::uo) is det.
+
+read_and_print_bytes(!IO) :-
+    io.open_binary_input(test_file, OpenResult, !IO),
+    (
+        OpenResult = ok(InFile),
+        stream.input_stream_fold2_state(InFile, output_code_unit, 1,
+             FoldResult, !IO),
+        (
+            FoldResult = ok(_)
+        ;
+            FoldResult = error(_, Error),
+            handle_io_error(Error, !IO)
+        )
+    ;
+        OpenResult = error(Error),
+        handle_io_error(Error, !IO)
+    ).
+
+:- pred output_code_unit(uint8::in, int::in, int::out, io::di, io::uo) is det.
+
+output_code_unit(CodeUnit, !N, !IO) :-
+    io.format("%d: 0x%X\n", [i(!.N), u8(CodeUnit)], !IO),
+    !:N = !.N + 1.
+
+%---------------------------------------------------------------------------%
+
+:- pred handle_io_error(io.error::in, io::di, io::uo) is det.
+
+handle_io_error(Error, !IO) :-
+    io.error_message(Error, ErrorMsg),
+    io.stderr_stream(Stderr, !IO),
+    io.format(Stderr, "error: %s\n", [s(ErrorMsg)], !IO),
+    io.set_exit_status(1, !IO).
+
+%---------------------------------------------------------------------------%
+
+:- func test_file = string.
+
+test_file = "write_binary_utf8.bin".
+
+%---------------------------------------------------------------------------%
+:- end_module write_binary_utf8.
+%---------------------------------------------------------------------------%


More information about the reviews mailing list