[m-rev.] for review: Fix a bug with IO errors while reading files

Paul Bone paul at bone.id.au
Fri Aug 26 22:27:57 AEST 2016


For review by anyone

---

Fix a bug with IO errors while reading files

Mercury did not properly test for IO errors for file reading.  If an IO
error occurred it would be reported as EOF.  If this happened at just
the right spot it could be silently ignored.  I found the mistake when
trying to open a directory as a file.  Opening the file succeeded but
reading from it incorrectly gave an EOF.

library/io.m:
    read_char_code has always been documented as returning -1 for EOF
    and -2 for an error.  But the C code did not correctly implement
    this.  fgetc will return EOF in either case and the caller must use
    ferror or feof to determine what has happened.

    Make it clearer that valid characters and bytes from read_char_code
    must
    >= 0.

    Conform to changes in the runtime regarding supporting ferror

runtime/mercury_library_types.h:
    Add MR_FERROR marcro

    Add ferror field to the MercuryFile struct.

runtime/mercury_file.[ch]:
    Add ferror field to the MercuryFile struct.
---
 library/io.m                    | 83 ++++++++++++++++++++++++++++++++---------
 runtime/mercury_file.c          |  8 ++++
 runtime/mercury_file.h          |  1 +
 runtime/mercury_library_types.h |  6 +++
 4 files changed, 80 insertions(+), 18 deletions(-)

diff --git a/library/io.m b/library/io.m
index 369cd25..9e662c7 100644
--- a/library/io.m
+++ b/library/io.m
@@ -2122,10 +2122,13 @@ read_char(Result, !IO) :-
 
 read_char(Stream, Result, !IO) :-
     read_char_code(Stream, Code, !IO),
-    ( if Code = -1 then
-        Result = eof
-    else if char.to_int(Char, Code) then
+    ( if
+        Code >= 0,
+        char.to_int(Char, Code)
+    then
         Result = ok(Char)
+    else if Code = -1 then
+        Result = eof
     else
         io.make_err_msg("read failed: ", Msg, !IO),
         Result = error(io_error(Msg))
@@ -2135,12 +2138,15 @@ read_char(Stream, Result, !IO) :-
 
 read_char_unboxed(Stream, Result, Char, !IO) :-
     read_char_code(Stream, Code, !IO),
-    ( if Code = -1 then
-        Result = eof,
-        Char = char.det_from_int(0)
-    else if char.to_int(Char0, Code) then
+    ( if
+        Code >= 0,
+        char.to_int(Char0, Code)
+    then
         Result = ok,
         Char = Char0
+    else if Code = -1 then
+        Result = eof,
+        Char = char.det_from_int(0)
     else
         make_err_msg("read failed: ", Msg, !IO),
         Result = error(io_error(Msg)),
@@ -2366,15 +2372,18 @@ read_line(Result, !IO) :-
 
 read_line(Stream, Result, !IO) :-
     read_char_code(Stream, Code, !IO),
-    ( if Code = -1 then
-        Result = eof
-    else if char.to_int(Char, Code) then
+    ( if
+        Code >= 0,
+        char.to_int(Char, Code)
+    then
         ( if Char = '\n' then
             Result = ok([Char])
         else
             read_line_2(Stream, Result0, !IO),
             Result = ok([Char | Result0])
         )
+    else if Code = -1 then
+        Result = eof
     else
         make_err_msg("read failed: ", Msg, !IO),
         Result = error(io_error(Msg))
@@ -2385,15 +2394,18 @@ read_line(Stream, Result, !IO) :-
 
 read_line_2(Stream, Result, !IO) :-
     read_char_code(Stream, Code, !IO),
-    ( if Code = -1 then
-        Result = []
-    else if char.to_int(Char, Code) then
+    ( if
+        Code >= 0,
+        char.to_int(Char, Code)
+    then
         ( if Char = '\n' then
             Result = [Char]
         else
             read_line_2(Stream, Chars, !IO),
             Result = [Char | Chars]
         )
+    else if Code = -1 then
+        Result = []
     else
         Result = []
     ).
@@ -2441,7 +2453,11 @@ read_line_as_string(input_stream(Stream), Result, !IO) :-
         char_code = mercury_get_byte(Stream);
         if (char_code == EOF) {
             if (i == 0) {
-                Res = -1;
+                if (MR_FERROR(*Stream)) {
+                    Res = -2;
+                } else {
+                    Res = -1;
+                }
             }
             break;
         }
@@ -7523,6 +7539,12 @@ ME_closed_stream_putch(MR_StreamInfo *info, int ch)
     return EOF;
 }
 
+static int
+ME_closed_stream_ferror(MR_StreamInfo *info)
+{
+    return 0;
+}
+
 static const MercuryFile MR_closed_stream = {
     /* stream_type  = */    MR_USER_STREAM,
     /* stream_info  = */    { NULL },
@@ -7536,7 +7558,8 @@ static const MercuryFile MR_closed_stream = {
     /* ungetc   = */    ME_closed_stream_ungetch,
     /* getc     = */    ME_closed_stream_getch,
     /* vprintf  = */    ME_closed_stream_vfprintf,
-    /* putc     = */    ME_closed_stream_putch
+    /* putc     = */    ME_closed_stream_putch,
+    /* ferror   = */    ME_closed_stream_ferror
 };
 
 #endif /* MR_NEW_MERCURYFILE_STRUCT */
@@ -7742,7 +7765,11 @@ read_char_code(input_stream(Stream), CharCode, !IO) :-
     if (uc <= 0x7f) {
         CharCode = uc;
     } else if (c == EOF) {
-        CharCode = -1;
+        if (MR_FERROR(*Stream)) {
+            CharCode = -2;
+        } else {
+            CharCode = -1;
+        }
     } else {
         if ((uc & 0xE0) == 0xC0) {
             nbytes = 2;
@@ -7756,7 +7783,17 @@ read_char_code(input_stream(Stream), CharCode, !IO) :-
         if (nbytes > 0) {
             buf[0] = (char) uc;
             for (i = 1; i < nbytes; i++) {
-                uc = mercury_get_byte(Stream);
+                c = mercury_get_byte(Stream);
+                uc = c;
+                if (c == EOF) {
+                    /*
+                    ** If the byte sequence ends early then it is invalid.
+                    ** The next read attempt will determine if this is EOF or
+                    ** an IO error.
+                    */
+                    errno = EILSEQ;
+                    CharCode = -2;
+                }
                 buf[i] = uc;
             }
             buf[i] = '\\0';
@@ -7783,7 +7820,17 @@ read_byte_val(input_stream(Stream), ByteVal, !IO) :-
     [will_not_call_mercury, promise_pure, tabled_for_io,
         does_not_affect_liveness, no_sharing],
 "
-    ByteVal = mercury_get_byte(Stream);
+    int c = mercury_get_byte(Stream);
+
+    if (c == EOF) {
+        if (MR_FERROR(*Stream)) {
+            ByteVal = -2;
+        } else {
+            ByteVal = -1;
+        }
+    } else {
+        ByteVal = c;
+    }
 ").
 
 putback_char(input_stream(Stream), Character, !IO) :-
diff --git a/runtime/mercury_file.c b/runtime/mercury_file.c
index 6d705ad..e855914 100644
--- a/runtime/mercury_file.c
+++ b/runtime/mercury_file.c
@@ -43,6 +43,7 @@ MR_mercuryfile_init(FILE *file, int line_number, MercuryFile *mf)
     mf->getc        = MR_getch;
     mf->vprintf     = MR_vfprintf;
     mf->putc        = MR_putch;
+    mf->ferror      = MR_ferror;
   #ifdef MR_NATIVE_GC
     mf->id = ++next_id;
   #endif
@@ -70,6 +71,13 @@ MR_putch(MR_StreamInfo *info, int ch)
 }
 
 int
+MR_ferror(MR_StreamInfo *info)
+{
+    MR_assert(info != NULL);
+    return ferror(info->file);
+}
+
+int
 MR_close(MR_StreamInfo *info)
 {
     MR_assert(info != NULL);
diff --git a/runtime/mercury_file.h b/runtime/mercury_file.h
index df93b5c..7207808 100644
--- a/runtime/mercury_file.h
+++ b/runtime/mercury_file.h
@@ -24,6 +24,7 @@ void MR_mercuryfile_init(FILE *file, int line_number, MercuryFile *mf);
   int MR_vfprintf(MR_StreamInfo *info, const char *format, va_list ap);
   int MR_read(MR_StreamInfo *info, void *buffer, size_t size);
   int MR_write(MR_StreamInfo *info, const void *buffer, size_t size);
+  int MR_ferror(MR_StreamInfo *info);
 #endif
 
 #endif // MERCURY_FILE_H
diff --git a/runtime/mercury_library_types.h b/runtime/mercury_library_types.h
index 1245aa3..ea4a813 100644
--- a/runtime/mercury_library_types.h
+++ b/runtime/mercury_library_types.h
@@ -54,6 +54,8 @@
 
   #define MR_PUTCH(mf, ch)  putc((ch), MR_file(mf))
 
+  #define MR_FERROR(mf) ferror(MR_file(mf))
+
 #else // MR_NEW_MERCURYFILE_STRUCT
 
   // The possible types of a MercuryFile.
@@ -80,6 +82,7 @@
   typedef int (MR_Stream_getc)(MR_StreamInfo *);
   typedef int (MR_Stream_vprintf)(MR_StreamInfo *, const char *, va_list);
   typedef int (MR_Stream_putc)(MR_StreamInfo *, int);
+  typedef int (MR_Stream_ferror)(MR_StreamInfo *);
 
   // The MercuryFile structure records:
   //
@@ -116,6 +119,8 @@
     MR_Stream_getc      *getc;
     MR_Stream_vprintf   *vprintf;
     MR_Stream_putc      *putc;
+    MR_Stream_ferror    *ferror;
+
   } MercuryFile;
 
   // Access the file and line number fields.
@@ -139,6 +144,7 @@
         ((mf).vprintf)(&((mf).stream_info), (fmt), (args))
   #define MR_PUTCH(mf, ch)                                              \
         ((mf).putc)(&((mf).stream_info), (ch))
+  #define MR_FERROR(mf) ((mf).ferror)(&((mf).stream_info))
 
 #endif  // MR_NEW_MERCURYFILE_STRUCT
 
-- 
2.7.4



More information about the reviews mailing list