[m-rev.] for review: more specific errors when file sizes exceed maximum buffer sizes

Julien Fischer jfischer at opturion.com
Fri Oct 25 16:02:28 AEDT 2019


More specific errors when file sizes exceed maximum buffer sizes.

Predicates such as read_file_as_string and read_binary_input_as_bitmap, use
buffers that are backend by Mercury arrays.  Since Mercury arrays are indexed
using the int type, it is possible to cause the buffer index to overflow when
reading large files on 32-bit platforms.  Detect this situation and throw an
exception that is specific to it.  Currently, the behaviour we get depends on
how much the overflowing index wraps around.

As part of the above, fix the XXXs where we were casting 64-bit stream file
sizes to ints; file stream files sizes are now always returned as int64s.

Julien.

diff --git a/library/io.m b/library/io.m
index 91c1a72..7b63ed4 100644
--- a/library/io.m
+++ b/library/io.m
@@ -200,6 +200,9 @@
      % Returns an error if the file contains a null character, because
      % null characters are not allowed in Mercury strings.
      %
+    % Throws an exception if the number of characters in the stream exceeds the
+    % maximum index of an array on this platform.
+    %
      % WARNING: the returned string is NOT guaranteed to be valid UTF-8
      % or UTF-16.
      %
@@ -1053,8 +1056,10 @@
      bitmap::bitmap_di, bitmap::bitmap_uo, num_bytes::out, io.res::out,
      io::di, io::uo) is det.

-    % Reads all the bytes until eof or error from the current binary input
-    % stream or from the specified binary input stream into a bitmap.
+    % Reads all the bytes until eof on error from the current binary input
+    % stream or the specified binary input stream into a bitmap. Throws an
+    % exception if the number of bytes to be read exceeds the maximum number
+    % of bytes that can be stored in a bitmap on this platform.
      %
  :- pred read_binary_file_as_bitmap(
      io.res(bitmap)::out, io::di, io::uo) is det.
@@ -3626,9 +3631,18 @@ read_binary_file_as_bitmap(Stream, Result, !IO) :-
      % according to the size of the file. Otherwise, just use a default buffer
      % size of 4k minus a bit (to give malloc some room).
      binary_input_stream_file_size(Stream, FileSize, !IO),
-    ( if FileSize >= 0 then
-        binary_input_stream_offset(Stream, CurrentOffset, !IO),
-        RemainingSize = FileSize - CurrentOffset,
+    ( if FileSize >= 0i64 then
+        binary_input_stream_offset64(Stream, CurrentOffset, !IO),
+        RemainingSizeInt64 = FileSize - CurrentOffset,
+        ( if
+            int.bits_per_int = 32,
+            RemainingSizeInt64 > from_int(int.max_int)
+        then
+            error("io.read_binary_file_as_bitmap",
+                "file size exceeds maximum buffer size")
+        else
+            RemainingSize = cast_to_int(RemainingSizeInt64)
+        ),
          some [!BM] (
              !:BM = bitmap.init(RemainingSize * bits_per_byte),
              ( if RemainingSize = 0 then
@@ -4043,18 +4057,30 @@ read_file_as_string_2(Stream, String, NumCUs, Error, NullCharError, !IO) :-
      % according to the size of the file. Otherwise, just use a default buffer
      % size of 4k minus a bit (to give malloc some room).
      input_stream_file_size(input_stream(Stream), FileSize, !IO),
-    ( if FileSize >= 0 then
-        BufferSize0 = FileSize + 1
+    ( if FileSize >= 0i64 then
+        BufferSize0 = FileSize + 1i64
+    else
+        BufferSize0 = 4000i64
+    ),
+
+    % In grades with 32-bit ints, buffer indexes will be 32-bit, so abort if we
+    % need to read in more than int.int_max bytes.
+    ( if
+        int.bits_per_int = 32,
+        BufferSize0 > from_int(int.max_int)
+    then
+        error("io.read_file_as_string: file size exceeds maximum buffer size")
      else
-        BufferSize0 = 4000
+        BufferSize1 = cast_to_int(BufferSize0)
      ),
-    alloc_buffer(BufferSize0, Buffer0),
+
+    alloc_buffer(BufferSize1, Buffer0),

      % Read the file into the buffer (resizing it as we go if necessary),
      % convert the buffer into a string, and see if anything went wrong.
      NumCUs0 = 0,
      read_file_as_string_loop(input_stream(Stream),
-        Buffer0, Buffer, BufferSize0, BufferSize, NumCUs0, NumCUs, Error, !IO),
+        Buffer0, Buffer, BufferSize1, BufferSize, NumCUs0, NumCUs, Error, !IO),
      require(NumCUs < BufferSize, "io.read_file_as_string: overflow"),
      ( if buffer_to_string(Buffer, NumCUs, StringPrime) then
          String = StringPrime,
@@ -4168,21 +4194,17 @@ input_stream_foldl2_io_maybe_stop(Stream, Pred, T0, Res, !IO) :-

  %---------------------------------------------------------------------------%

-    % XXX FIXME this should return an int64.
-:- pred input_stream_file_size(io.input_stream::in, int::out,
+:- pred input_stream_file_size(io.input_stream::in, int64::out,
      io::di, io::uo) is det.

  input_stream_file_size(input_stream(Stream), Size, !IO) :-
-    stream_file_size(Stream, Size64, !IO),
-    Size = int64.cast_to_int(Size64).
+    stream_file_size(Stream, Size, !IO).

-    % XXX FIXME this should return an int64.
-:- pred binary_input_stream_file_size(io.binary_input_stream::in, int::out,
+:- pred binary_input_stream_file_size(io.binary_input_stream::in, int64::out,
      io::di, io::uo) is det.

  binary_input_stream_file_size(binary_input_stream(Stream), Size, !IO) :-
-    stream_file_size(Stream, Size64, !IO),
-    Size = int64.cast_to_int(Size64).
+    stream_file_size(Stream, Size, !IO).

      % stream_file_size(Stream, Size):
      % If Stream is a regular file, then Size is its size (in bytes),


More information about the reviews mailing list