[m-rev.] for review: access bytes in bitmaps as uint8s

Julien Fischer jfischer at opturion.com
Sat Dec 30 16:56:42 AEDT 2017


For review by anyone.

The intention is that we eventually move to the situation in the standard
library where:

    :- type byte == uint8.

as opposed to the current situation where bytes are the 8 least 
significant bits of an int.

------------------------------

Access bytes in bitmaps as uint8s.

library/bitmap.m:
      Add functions for accessing bytes in bitmaps as uint8 values.

      Add predicates for setting bytes in bitmaps as uint8 values.

      Add a det version of from_string/1.

      Fix a type in the description of is_empty/1.

tests/hard_coded/bitmap_bytes.{m,exp}:
      Extend this test to cover uint8 access to bitmaps.

NEWS:
      Announce the new predicates and functions.

Julien.

diff --git a/NEWS b/NEWS
index 74f1d9a..1d6c91c 100644
--- a/NEWS
+++ b/NEWS
@@ -499,9 +499,12 @@ Changes to the Mercury standard library:
    - det_insert_duplicates/4
    - det_insert_duplicates/3

-* We have added the following predicate to the bitmap module:
+* We have added the following predicates and functions  to the bitmap module:

    - is_empty/1
+  - det_from_string/1
+  - get_uint8/1, unsafe_get_uint8/1
+  - set_uint8/4, unsafe_set_uint8/4

  Changes to the Mercury compiler:

diff --git a/library/bitmap.m b/library/bitmap.m
index 456105e..c2253c2 100644
--- a/library/bitmap.m
+++ b/library/bitmap.m
@@ -117,7 +117,7 @@
  :- func bits_per_byte = int.

      % is_empty(Bitmap):
-    % True iff Bitmap is a bitmap containint zero bits.
+    % True iff Bitmap is a bitmap containing zero bits.
      %
  :- pred is_empty(bitmap).
  %:- mode is_empty(bitmap_ui) is semidet.
@@ -172,9 +172,10 @@
  % Get or set the given numbered byte (multiply ByteNumber by
  % bits_per_byte to get the bit index of the start of the byte).
  %
-% The bits are stored in or taken from the least significant bits
-% of the integer.
-% The unsafe versions do not check whether the byte is in range.
+% The bits are stored in or taken from the least significant bits of an int.
+% The safe versions will throw an exception if the given ByteNumber is out of
+% bounds.  Final partial bytes are out of bounds.  The unsafe versions do not
+% check whether the byte is in range.
  %

  :- func bitmap      ^ byte(byte_index) = byte.
@@ -191,6 +192,25 @@
  :- func (bitmap     ^ unsafe_byte(byte_index)   := byte) = bitmap.
  :- mode (bitmap_di  ^ unsafe_byte(in)           := in)   = bitmap_uo is det.

+%
+% Versions of the above that set or take uint8 values instead of a byte stored
+% in the least significant bits of an int.
+%
+
+:- func get_uint8(bitmap, byte_index) = uint8.
+%:- mode get_uint8(bitmap_ui, in) = out is det.
+:- mode get_uint8(in, in) = out is det.
+
+:- func unsafe_get_uint8(bitmap, byte_index) = uint8.
+%:- mode unsafe_get_uint8(bitmap_ui, in) = out is det.
+:- mode unsafe_get_uint8(in, in) = out is det.
+
+:- pred set_uint8(byte_index::in, uint8::in,
+    bitmap::bitmap_di, bitmap::bitmap_uo) is det.
+
+:- pred unsafe_set_uint8(byte_index::in, uint8::in,
+    bitmap::bitmap_di, bitmap::bitmap_uo) is det.
+
  %---------------------------------------------------------------------------%

      % Flip the given bit.
@@ -393,10 +413,16 @@
  :- mode to_string(in) = out is det.

      % Convert a string created by to_string back into a bitmap.
+    % Fails if the string is not of the form created by to_string.
      %
  :- func from_string(string) = bitmap.
  :- mode from_string(in) = bitmap_uo is semidet.

+    % As above, but throws an exception instead of failing.
+    %
+:- func det_from_string(string) = bitmap.
+:- mode det_from_string(in) = bitmap_uo is det.
+
      % Convert a bitmap to a string of `1' and `0' characters, where
      % the bytes are separated by `.'.
      %
@@ -432,6 +458,7 @@
  :- import_module char.
  :- import_module exception.
  :- import_module int.
+:- import_module require.
  :- import_module string.

  %---------------------------------------------------------------------------%
@@ -749,6 +776,44 @@ _ ^ unsafe_byte(_) = _ :-

  %---------------------------------------------------------------------------%

+get_uint8(BM, N) = U8 :-
+    ( if N >= 0, in_range(BM, N * bits_per_byte + bits_per_byte - 1) then
+        U8 = unsafe_get_uint8(BM, N)
+    else
+        throw_byte_bounds_error(BM, "bitmap.get_uint8", N)
+    ).
+
+:- pragma foreign_proc("C",
+    unsafe_get_uint8(BM::in, N::in) = (U8::out),
+    [will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail],
+"
+    U8 = (uint8_t) BM->elements[N];
+").
+
+:- pragma foreign_proc("Java",
+    unsafe_get_uint8(BM::in, N::in) = (U8::out),
+    [will_not_call_mercury, promise_pure, thread_safe],
+"
+    U8 = BM.elements[N];
+").
+
+:- pragma foreign_proc("C#",
+    unsafe_get_uint8(BM::in, N::in) = (U8::out),
+    [will_not_call_mercury, promise_pure, thread_safe],
+"
+    U8 = BM.elements[N];
+").
+
+:- pragma foreign_proc("Erlang",
+    unsafe_get_uint8(BM::in, N::in) = (U8::out),
+    [will_not_call_mercury, promise_pure, thread_safe],
+"
+    {Bin, _} = BM,
+    <<_:N/binary, U8/integer, _/binary>> = Bin
+").
+
+%---------------------------------------------------------------------------%
+
  (!.BM ^ byte(N) := Byte) = !:BM :-
      ( if N >= 0, in_range(!.BM, N * bits_per_byte + bits_per_byte - 1) then
          !BM ^ unsafe_byte(N) := Byte
@@ -795,6 +860,49 @@ _ ^ unsafe_byte(_) = _ :-

  %---------------------------------------------------------------------------%

+set_uint8(N, U8, !BM) :-
+    ( if N >= 0, in_range(!.BM, N * bits_per_byte + bits_per_byte - 1) then
+        unsafe_set_uint8(N, U8, !BM)
+    else
+        throw_byte_bounds_error(!.BM, "bitmap.set_uint", N)
+    ).
+
+:- pragma foreign_proc("C",
+    unsafe_set_uint8(N::in, U8::in, BM0::bitmap_di, BM::bitmap_uo),
+    [will_not_call_mercury, promise_pure, thread_safe, will_not_modify_trail],
+"
+    BM = BM0;
+    BM->elements[N] = (MR_uint_least8_t) U8;
+").
+
+:- pragma foreign_proc("Java",
+    unsafe_set_uint8(N::in, U8::in, BM0::bitmap_di, BM::bitmap_uo),
+    [will_not_call_mercury, promise_pure, thread_safe],
+"
+    BM = BM0;
+    BM.elements[N] = (byte) U8;
+").
+
+:- pragma foreign_proc("C#",
+    unsafe_set_uint8(N::in, U8::in, BM0::bitmap_di, BM::bitmap_uo),
+    [will_not_call_mercury, promise_pure, thread_safe],
+"
+    BM = BM0;
+    BM.elements[N] = (byte) U8;
+").
+
+:- pragma foreign_proc("Erlang",
+    unsafe_set_uint8(N::in, U8::in, BM0::bitmap_di, BM::bitmap_uo),
+    [will_not_call_mercury, promise_pure, thread_safe],
+"
+    {Bin0, NumBits} = BM0,
+    <<Left:N/binary, _/integer, Right/binary>> = Bin0,
+    Bin = <<Left/binary, U8/integer, Right/binary>>,
+    BM = {Bin, NumBits}
+").
+
+%---------------------------------------------------------------------------%
+
  flip(!.BM, I) = !:BM :-
      flip(I, !BM).

@@ -1619,6 +1727,13 @@ from_string(Str) = BM :-
          fail
      ).

+det_from_string(Str) =
+    ( if BM = from_string(Str) then
+        BM
+    else
+        unexpected($pred, "bitmap.from_string failed")
+    ).
+
  :- pred hex_chars_to_bitmap(string::in, int::in, int::in, byte_index::in,
      bitmap::bitmap_di, bitmap::bitmap_uo) is semidet.

diff --git a/tests/hard_coded/bitmap_bytes.exp b/tests/hard_coded/bitmap_bytes.exp
index 93e55bd..c3e54a0 100644
--- a/tests/hard_coded/bitmap_bytes.exp
+++ b/tests/hard_coded/bitmap_bytes.exp
@@ -1,19 +1,55 @@
  Bitmap:
  ^ byte(-1): bitmap.byte: byte index -1 is out of bounds [0, 0).
+get_uint(-1): bitmap.get_uint8: byte index -1 is out of bounds [0, 0).
  ^ byte(0): bitmap.byte: byte index 0 is out of bounds [0, 0).
+get_uint(0): bitmap.get_uint8: byte index 0 is out of bounds [0, 0).
  ^ byte(1): bitmap.byte: byte index 1 is out of bounds [0, 0).
+get_uint(1): bitmap.get_uint8: byte index 1 is out of bounds [0, 0).

  Bitmap: 00000000.00000000.00000000
  ^ byte(-1): bitmap.byte: byte index -1 is out of bounds [0, 3).
+get_uint(-1): bitmap.get_uint8: byte index -1 is out of bounds [0, 3).
  ^ byte(0): 0
+get_uint(0): 0
  ^ byte(1): 0
+get_uint(1): 0
  ^ byte(2): 0
+get_uint(2): 0
  ^ byte(3): bitmap.byte: byte index 3 is out of bounds [0, 3).
+get_uint(3): bitmap.get_uint8: byte index 3 is out of bounds [0, 3).
  ^ byte(4): bitmap.byte: byte index 4 is out of bounds [0, 3).
+get_uint(4): bitmap.get_uint8: byte index 4 is out of bounds [0, 3).

  Bitmap: 00000000.00000000.0
  ^ byte(-1): bitmap.byte: byte index -1 is out of bounds [0, 2).
+get_uint(-1): bitmap.get_uint8: byte index -1 is out of bounds [0, 2).
  ^ byte(0): 0
+get_uint(0): 0
  ^ byte(1): 0
+get_uint(1): 0
  ^ byte(2): bitmap.byte: byte index 2 is out of bounds [0, 2).
+get_uint(2): bitmap.get_uint8: byte index 2 is out of bounds [0, 2).
  ^ byte(3): bitmap.byte: byte index 3 is out of bounds [0, 2).
+get_uint(3): bitmap.get_uint8: byte index 3 is out of bounds [0, 2).
+
+Bitmap: 11111111.00000000.11111111
+^ byte(-1): bitmap.byte: byte index -1 is out of bounds [0, 3).
+get_uint(-1): bitmap.get_uint8: byte index -1 is out of bounds [0, 3).
+^ byte(0): 255
+get_uint(0): 255
+^ byte(1): 0
+get_uint(1): 0
+^ byte(2): 255
+get_uint(2): 255
+^ byte(3): bitmap.byte: byte index 3 is out of bounds [0, 3).
+get_uint(3): bitmap.get_uint8: byte index 3 is out of bounds [0, 3).
+
+Bitmap: 11110000.00001111
+^ byte(-1): bitmap.byte: byte index -1 is out of bounds [0, 2).
+get_uint(-1): bitmap.get_uint8: byte index -1 is out of bounds [0, 2).
+^ byte(0): 240
+get_uint(0): 240
+^ byte(1): 15
+get_uint(1): 15
+^ byte(2): bitmap.byte: byte index 2 is out of bounds [0, 2).
+get_uint(2): bitmap.get_uint8: byte index 2 is out of bounds [0, 2).
diff --git a/tests/hard_coded/bitmap_bytes.m b/tests/hard_coded/bitmap_bytes.m
index 35851bb..713979e 100644
--- a/tests/hard_coded/bitmap_bytes.m
+++ b/tests/hard_coded/bitmap_bytes.m
@@ -34,7 +34,15 @@ main(!IO) :-

      % With partial final byte.
      BM2 = bitmap.init(17),
-    do_bitmap_test(BM2, [-1, 0, 1, 2, 3], !IO).
+    do_bitmap_test(BM2, [-1, 0, 1, 2, 3], !IO),
+    io.nl(!IO),
+
+    BM3 = det_from_string("<24:FF00FF>"),
+    do_bitmap_test(BM3, [-1, 0, 1, 2, 3], !IO),
+    io.nl(!IO),
+
+    BM4 = det_from_string("<16:F00F>"),
+    do_bitmap_test(BM4, [-1, 0, 1, 2], !IO).

  :- pred do_bitmap_test(bitmap::in, list(byte_index)::in,
      io::di, io::uo) is cc_multi.
@@ -51,7 +59,16 @@ test_byte_lookup(BM, Index, !IO) :-
          Byte = BM ^ byte(Index)
      then
          io.write_int(Byte, !IO)
-    catch bitmap_error(Error) ->
-        io.write_string(Error, !IO)
+    catch bitmap_error(ByteError) ->
+        io.write_string(ByteError, !IO)
+    ),
+    io.nl(!IO),
+    io.format("get_uint(%d): ", [i(Index)], !IO),
+    ( try []
+        U8 = get_uint8(BM, Index)
+    then
+        io.write_uint8(U8, !IO)
+    catch bitmap_error(U8Error) ->
+        io.write_string(U8Error, !IO)
      ),
      io.nl(!IO).


More information about the reviews mailing list