[m-dev.] Binary Input Stream Bug Under Windows?

Fergus Henderson fjh at cs.mu.OZ.AU
Thu Sep 23 11:53:38 AEST 1999


On 22-Sep-1999, Ralph Becket <rbeck at microsoft.com> wrote:
> It transpires that EOF under Windows is taken as the byte ^Z, a la the C
> NUL string terminator, except for files opened for binary input.  However,
> if I do
> 
> $ cat a_binary_file_containing_^Zs | mercury_prog
> 
> where mercury_prog is using io__read_byte/3, mercury_prog will incorrectly
> register EOF the first time it hits a ^Z in the input stream.  Or is this
> an oddity of piped input under NT?  If not, I suspect that the standard
> binary input stream is really just stdin with no changes.  A bug, n'est pas?

Yes.

Under DOS/Windows, when writing C code, if you want to use stdin as a binary
stream, then you need to put it in binary mode first.  I forget off-hand how
to do this, and indeed the exact method may vary between different C
implementations, since AFAIK the C standard does not provides any way to do it.
But with Cygwin I think there is a function called fsetmode() or setmode()
or _setmode() or something like that which you can use to set stdin to
binary mode.  Another alternative is to use fdopen() to create a new FILE
that also refers to stdin but in binary mode.

When writing Mercury code, it should be enough to just use
io__stdin_binary_stream (which io__read_byte/3 does implicitly;
it calls io__stdin_binary_stream and then io__read_byte/4).
However, as you suspected, our current implementation of
io__stdin_binary_stream does just use stdin with no changes.

The following patch should hopefully fix that.
However, I haven't tested it on Windows.

----------

Fix a problem with io__stdin_binary_stream not being in binary mode
on Windows NT.

Also improve the portability of the implementation of io__stream_file_size.

configure.in:
runtime/mercury_conf.h.in:
	Check for fileno(), fdopen(), fstat(), and <sys/stat.h>.

library/io.m:
	Create a separate FILE for binary stdin, and initialize
	it to binary mode using `fdopen(fileno(stdout), "rb")'.
	Likewise for stdout.

	Add some #ifdefs around code using fstat() and fileno()
	in io__stream_file_size, to make things easier when porting
	to non-POSIX systems.

	Also add some comments.

Workspace: /home/mercury0/fjh/mercury
Index: configure.in
===================================================================
RCS file: /home/mercury1/repository/mercury/configure.in,v
retrieving revision 1.180
diff -u -r1.180 configure.in
--- configure.in	1999/09/14 18:07:06	1.180
+++ configure.in	1999/09/23 01:09:55
@@ -223,7 +223,8 @@
 	*-cygwin*)
 		ac_cv_func_mprotect=no ;;
 esac
-AC_HAVE_FUNCS(sysconf getpagesize memalign mprotect sigaction setitimer strerror memmove)
+AC_HAVE_FUNCS(sysconf getpagesize memalign mprotect sigaction setitimer)
+AC_HAVE_FUNCS(strerror memmove fileno fdopen fstat)
 #-----------------------------------------------------------------------------#
 AC_CHECK_HEADER(sys/wait.h, HAVE_SYS_WAIT_H=1)
 if test "$HAVE_SYS_WAIT_H" = 1; then
@@ -259,6 +260,11 @@
 AC_CHECK_HEADER(sys/time.h, HAVE_SYS_TIME_H=1)
 if test "$HAVE_SYS_TIME_H" = 1; then
 	AC_DEFINE(HAVE_SYS_TIME)
+fi
+#-----------------------------------------------------------------------------#
+AC_CHECK_HEADER(sys/stat.h, HAVE_SYS_STAT_H=1)
+if test "$HAVE_SYS_STAT_H" = 1; then
+	AC_DEFINE(HAVE_SYS_STAT_H)
 fi
 #-----------------------------------------------------------------------------#
 AC_CHECK_HEADER(dlfcn.h, HAVE_DLFCN_H=1)
Index: library/io.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/io.m,v
retrieving revision 1.180
diff -u -r1.180 io.m
--- io.m	1999/09/23 01:36:17	1.180
+++ io.m	1999/09/23 01:40:33
@@ -1502,20 +1502,14 @@
 :- pred io__stream_file_size(stream, int, io__state, io__state).
 :- mode io__stream_file_size(in, out, di, uo) is det.
 % io__stream_file_size(Stream, Size):
-%	if Stream is a regular file, then Size is its size,
+%	if Stream is a regular file, then Size is its size (in bytes),
 %	otherwise Size is -1.
 
 :- pragma c_header_code("
 	#include <unistd.h>
+#ifdef HAVE_SYS_STAT_H
 	#include <sys/stat.h>
-
-	/*
-	** in case some non-POSIX implementation doesn't have S_ISREG(),
-	** define it to always fail
-	*/
-	#ifndef S_ISREG
-	#define S_ISREG(x) FALSE
-	#endif
+#endif
 ").
 
 :- pragma c_code(io__stream_file_size(Stream::in, Size::out,
@@ -1523,12 +1517,18 @@
 		[will_not_call_mercury, thread_safe],
 "{
 	MercuryFile *f = (MercuryFile *) Stream;
+#if defined(HAVE_FSTAT) && \
+    (defined(HAVE_FILENO) || defined(fileno)) && \
+    defined(S_ISREG)
 	struct stat s;
 	if (fstat(fileno(f->file), &s) == 0 && S_ISREG(s.st_mode)) {
 		Size = s.st_size;
 	} else {
 		Size = -1;
 	}
+#else
+	Size = -1;
+#endif
 }").
 
 %-----------------------------------------------------------------------------%
@@ -2412,6 +2412,7 @@
 
 :- pragma c_code(io__set_globals(Globals::di, IOState0::di, IOState::uo), 
 		will_not_call_mercury, "
+	/* XXX need to globalize the memory */
 	ML_io_user_globals = Globals;
 	update_io(IOState0, IOState);
 ").
@@ -2600,12 +2601,14 @@
 #include <errno.h>
 
 #ifdef HAVE_SYS_WAIT
-#include <sys/wait.h>
+  #include <sys/wait.h>		/* for WIFEXITED, WEXITSTATUS, etc. */
 #endif
 
 extern MercuryFile mercury_stdin;
 extern MercuryFile mercury_stdout;
 extern MercuryFile mercury_stderr;
+extern MercuryFile mercury_stdin_binary;
+extern MercuryFile mercury_stdout_binary;
 extern MercuryFile *mercury_current_text_input;
 extern MercuryFile *mercury_current_text_output;
 extern MercuryFile *mercury_current_binary_input;
@@ -2630,10 +2633,12 @@
 MercuryFile mercury_stdin = { NULL, 1 };
 MercuryFile mercury_stdout = { NULL, 1 };
 MercuryFile mercury_stderr = { NULL, 1 };
+MercuryFile mercury_stdin_binary = { NULL, 1 };
+MercuryFile mercury_stdout_binary = { NULL, 1 };
 MercuryFile *mercury_current_text_input = &mercury_stdin;
 MercuryFile *mercury_current_text_output = &mercury_stdout;
-MercuryFile *mercury_current_binary_input = &mercury_stdin;
-MercuryFile *mercury_current_binary_output = &mercury_stdout;
+MercuryFile *mercury_current_binary_input = &mercury_stdin_binary;
+MercuryFile *mercury_current_binary_output = &mercury_stdout_binary;
 
 void
 mercury_init_io(void)
@@ -2641,6 +2646,27 @@
 	mercury_stdin.file = stdin;
 	mercury_stdout.file = stdout;
 	mercury_stderr.file = stderr;
+#if defined(HAVE_FDOPEN) && (defined(HAVE_FILENO) || defined(fileno))
+	mercury_stdin_binary.file = fdopen(fileno(stdin), ""rb"");
+	if (mercury_stdin_binary.file == NULL) {
+		fatal_error(""error opening standard input stream in ""
+			""binary mode:\n\tfdopen() failed: %s"",
+			strerror(errno));
+	}
+	mercury_stdout_binary.file = fdopen(fileno(stdout), ""wb"");
+	if (mercury_stdout_binary.file == NULL) {
+		fatal_error(""error opening standard output stream in ""
+			""binary mode:\n\tfdopen() failed: %s"",
+			strerror(errno));
+	}
+#else
+	/*
+	** XXX Standard ANSI/ISO C provides no way to set stdin/stdout
+	** to binary mode.  I guess we just have to punt...
+	*/
+	mercury_stdin_binary.file = stdin;
+	mercury_stdout_binary.file = stdout;
+#endif
 }
 
 ").
@@ -3002,13 +3028,13 @@
 
 :- pragma c_code(io__stdin_binary_stream(Stream::out, IO0::di, IO::uo),
 		[will_not_call_mercury, thread_safe], "
-	Stream = (Word) &mercury_stdin;
+	Stream = (Word) &mercury_stdin_binary;
 	update_io(IO0, IO);
 ").
 
 :- pragma c_code(io__stdout_binary_stream(Stream::out, IO0::di, IO::uo),
 		[will_not_call_mercury, thread_safe], "
-	Stream = (Word) &mercury_stdout;
+	Stream = (Word) &mercury_stdout_binary;
 	update_io(IO0, IO);
 ").
 
@@ -3282,6 +3308,14 @@
 	io__make_temp(Dir, "mtmp", Name).
 
 /*---------------------------------------------------------------------------*/
+
+/*
+** XXX	The code for io__make_temp assumes POSIX.
+**	It uses the functions open(), close(), and getpid()
+**	and the macros EEXIST, O_WRONLY, O_CREAT, and O_EXCL.
+**	We should be using conditional compilation here to
+**	avoid these POSIX dependencies.
+*/
 
 %#include <stdio.h>
 
Index: runtime/mercury_conf.h.in
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_conf.h.in,v
retrieving revision 1.22
diff -u -r1.22 mercury_conf.h.in
--- mercury_conf.h.in	1999/08/24 01:35:44	1.22
+++ mercury_conf.h.in	1999/09/23 00:44:09
@@ -100,6 +100,7 @@
 **	HAVE_SYS_TIME		we have <sys/time.h>
 **	HAVE_SYS_PARAM		we have <sys/param.h>
 **	HAVE_SYS_WAIT		we have <sys/wait.h>
+**	HAVE_SYS_STAT_H		we have <sys/stat.h>
 **	HAVE_DLFCN_H		we have <dlfcn.h>
 */
 #undef	HAVE_SYS_SIGINFO
@@ -109,6 +110,7 @@
 #undef	HAVE_SYS_TIME
 #undef	HAVE_SYS_PARAM
 #undef	HAVE_SYS_WAIT
+#undef	HAVE_SYS_STAT_H
 #undef	HAVE_DLFCN_H
 
 /*
@@ -137,6 +139,12 @@
 **	HAVE_DLCLOSE   		we have the dlclose() function.
 **	HAVE_DLSYM   		we have the dlsym() function.
 **	HAVE_DLERROR   		we have the dlerror() function.
+**	HAVE_FSTAT 		we have the fstat() function.
+**	HAVE_FDOPEN 		we have the fdopen() function.
+**	HAVE_FILENO 		we have the fileno() function.
+**				Note that fileno() may be a macro
+**				rather than a function, so you should use
+**				#if defined(fileno) || defined(HAVE_FILENO)
 */
 #undef	HAVE_SYSCONF
 #undef	HAVE_SIGACTION
@@ -150,6 +158,9 @@
 #undef	HAVE_DLCLOSE
 #undef	HAVE_DLSYM
 #undef	HAVE_DLERROR
+#undef	HAVE_FSTAT
+#undef	HAVE_FDOPEN
+#undef	HAVE_FILENO
 
 /*
 ** RETSIGTYPE: the return type of signal handlers.
-- 
Fergus Henderson <fjh at cs.mu.oz.au>  |  "I have always known that the pursuit
WWW: <http://www.cs.mu.oz.au/~fjh>  |  of excellence is a lethal habit"
PGP: finger fjh at 128.250.37.3        |     -- the last words of T. S. Garp.
--------------------------------------------------------------------------
mercury-developers mailing list
Post messages to:       mercury-developers at cs.mu.oz.au
Administrative Queries: owner-mercury-developers at cs.mu.oz.au
Subscriptions:          mercury-developers-request at cs.mu.oz.au
--------------------------------------------------------------------------



More information about the developers mailing list