[m-rev.] for review: Use mmap for Boehm GC in threaded grades on Linux.

Peter Wang novalazy at gmail.com
Fri Jul 25 12:09:21 AEST 2014


Branches: master

Maybe configure should set MR_HAVE_SBRK in mercury_conf.h just for
thread_sbrk?

---

The glibc implementation of malloc (commonly used on Linux) can use sbrk
to acquire more memory, and so does Boehm GC in its default configuration
on Linux.  When both allocators are invoked simultaneously from different
threads then memory corruption may result.

configure.ac:
	Add --enable-gc-mmap option.

	Make --enable-gc-munmap imply --enable-gc-mmap.

	Detect mmap and set MR_HAVE_MMAP.

	On Linux (and potentially other platforms), build Boehm GC for
	threaded grades with -DUSE_MMAP if mmap was found.

Mmake.common.in:
	Add ENABLE_BOEHM_USE_MMAP.

runtime/mercury_conf.h.in:
	Add MR_HAVE_MMAP, although it is not used anywhere yet.

tests/hard_coded/thread_sbrk.m:
	Add test program.  This is not wired into the build system
	because we do not otherwise check for the existence of sbrk.

NEWS:
	Announce changes.

diff --git a/Mmake.common.in b/Mmake.common.in
index 14dce70..343c8c8 100644
--- a/Mmake.common.in
+++ b/Mmake.common.in
@@ -164,8 +164,10 @@ GRADESTRING = $(shell $(SCRIPTS_DIR)/canonical_grade $(ALL_GRADEFLAGS))
 # MCFLAGS	+= --no-infer-all --halt-at-warn --no-warn-inferred-erroneous
 
 # Options to pass to the C compiler when building Boehm-GC.
-BOEHM_CFLAGS = @ENABLE_BOEHM_LARGE_CONFIG@ @ENABLE_BOEHM_USE_MUNMAP@ \
-    @ENABLE_BOEHM_XOPEN_SOURCE@
+BOEHM_CFLAGS = @ENABLE_BOEHM_LARGE_CONFIG@ \
+	@ENABLE_BOEHM_USE_MMAP@ \
+	@ENABLE_BOEHM_USE_MUNMAP@ \
+	@ENABLE_BOEHM_XOPEN_SOURCE@
 
 # Additional options to pass to the C compiler when building Boehm-GC for
 # threads.
diff --git a/NEWS b/NEWS
index ad80d04..4ff4b8a 100644
--- a/NEWS
+++ b/NEWS
@@ -42,6 +42,11 @@ Changes to the Mercury compiler:
     - IRIX
     - OSF/1
 
+* We have added the configure option --enable-gc-mmap.
+
+* We configure Boehm GC to use mmap in threaded grades on Linux to avoid
+  conflicts with glibc malloc leading to memory corruption.
+
 Changes to the extras distribution:
 
 * We have added support for Unicode and other enhancements to the lex and
diff --git a/configure.ac b/configure.ac
index 7138f2e..384f6b8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -652,9 +652,14 @@ AC_SUBST(ENABLE_BOEHM_LARGE_CONFIG)
 
 #-----------------------------------------------------------------------------#
 #
-# Determine whether to define USE_MUNMAP when compiling boehm_gc.
+# Determine whether to define USE_MMAP, USE_MUNMAP when compiling boehm_gc.
 #
 
+AC_ARG_ENABLE(gc-mmap,
+    AC_HELP_STRING([--enable-gc-mmap],
+                   [use mmap instead of sbrk when using Boehm GC]),
+    enable_gc_mmap="$enableval",enable_gc_mmap=no)
+
 AC_ARG_ENABLE(gc-munmap,
     AC_HELP_STRING([--enable-gc-munmap],
                    [enable unmapping of unused pages when using Boehm GC]),
@@ -663,20 +668,39 @@ AC_ARG_ENABLE(gc-munmap,
 AC_MSG_CHECKING(whether to enable unmapping of unused pages when using Boehm GC)
 case "$enable_gc_munmap" in
     yes)
+        # --enable-gc-munmap implies --enable-gc-mmap
+        enable_gc_mmap=yes
         AC_MSG_RESULT(yes)
-        # USE_MUNMAP requires USE_MMAP.
-        ENABLE_BOEHM_USE_MUNMAP="-DUSE_MMAP -DUSE_MUNMAP"
         ;;
     no)
         AC_MSG_RESULT(no)
-        ENABLE_BOEHM_USE_MUNMAP=
         ;;
     *)
-        AC_MSG_RESULT(no)
-        AC_MSG_WARN([unexpected argument to --enable-gc-munmap])
-        ENABLE_BOEHM_USE_MUNMAP=
+        AC_MSG_ERROR([unexpected argument to --enable-gc-munmap])
+        ;;
+esac
+
+AC_MSG_CHECKING(whether to use mmap for Boehm GC)
+case "$enable_gc_mmap" in
+    yes|no)
+        AC_MSG_RESULT($enable_gc_mmap)
+        ;;
+    *)
+        AC_MSG_ERROR([unexpected argument to --enable-gc-mmap])
         ;;
 esac
+
+if test "$enable_gc_mmap" = yes; then
+    ENABLE_BOEHM_USE_MMAP="-DUSE_MMAP"
+else
+    ENABLE_BOEHM_USE_MMAP=
+fi
+if test "$enable_gc_munmap" = yes; then
+    ENABLE_BOEHM_USE_MUNMAP="-DUSE_MUNMAP"
+else
+    ENABLE_BOEHM_USE_MUNMAP=
+fi
+AC_SUBST(ENABLE_BOEHM_USE_MMAP)
 AC_SUBST(ENABLE_BOEHM_USE_MUNMAP)
 
 #-----------------------------------------------------------------------------#
@@ -1278,7 +1302,7 @@ esac
 
 mercury_check_for_functions \
         sysconf getpagesize gethostname \
-        mprotect memalign posix_memalign memmove \
+        mmap mprotect memalign posix_memalign memmove \
         sigaction siginterrupt setitimer \
         snprintf _snprintf vsnprintf _vsnprintf \
         strerror strerror_r strerror_s \
@@ -3033,6 +3057,10 @@ ENABLE_BOEHM_PARALLEL_MARK=
 #
 BOEHM_MISC_CFLAGS_FOR_THREADS=
 
+# This should be set if both the system malloc and Boehm GC (in the default
+# configuration) may both call sbrk.
+avoid_sbrk=
+
 case "$host" in
     *solaris*)
         CFLAGS_FOR_THREADS="-DGC_SOLARIS_PTHREADS -D_REENTRANT"
@@ -3056,6 +3084,7 @@ case "$host" in
         # correctly on Linux.)
         # XXX disabled as it aborts on GC_free when --enable-gc-munmap is used
         # BOEHM_MISC_CFLAGS_FOR_THREADS="-DHBLKSIZE=32768 -DCPP_LOG_HBLKSIZE=15"
+        avoid_sbrk=yes
         ;;
 
     *cygwin*)
@@ -3069,6 +3098,7 @@ case "$host" in
             *)
                 CFLAGS_FOR_THREADS="$WIN32_GC_THREADLIB" ;;
         esac
+        # avoid_sbrk?
         ;;
 
     i*86-pc-mingw*)
@@ -3122,6 +3152,18 @@ case "$host" in
         CFLAGS_FOR_THREADS=""
         ;;
 esac
+
+if test "$avoid_sbrk" = yes; then
+    if test "$ac_cv_func_mmap" = yes; then
+        BOEHM_MISC_CFLAGS_FOR_THREADS="$BOEHM_MISC_CFLAGS_FOR_THREADS -DUSE_MMAP"
+        if test "$enable_gc_mmap" = no; then
+            AC_MSG_NOTICE([using mmap for Boehm GC in threaded grades])
+        fi
+    else
+        AC_MSG_WARN([malloc and Boehm GC both use sbrk; probably unsafe!])
+    fi
+fi
+
 AC_SUBST(CFLAGS_FOR_THREADS)
 AC_SUBST(THREAD_LIBS)
 AC_SUBST(LDFLAGS_FOR_THREADS)
diff --git a/runtime/mercury_conf.h.in b/runtime/mercury_conf.h.in
index 00019da..7ff52c5 100644
--- a/runtime/mercury_conf.h.in
+++ b/runtime/mercury_conf.h.in
@@ -237,6 +237,7 @@
 **	MR_HAVE_SYSCONF     	we have the sysconf() system call.
 **	MR_HAVE_SIGACTION	we have the sigaction() system call.
 **	MR_HAVE_GETPAGESIZE 	we have the getpagesize() system call.
+**	MR_HAVE_MMAP    	we have the mmap() system call.
 **	MR_HAVE_MPROTECT    	we have the mprotect() system call.
 **	MR_HAVE_MEMALIGN    	we have the memalign() function.
 **	MR_HAVE_POSIX_MEMALIGN  we have the posix_memalign() function.
@@ -313,9 +314,10 @@
 #undef	MR_HAVE_SYSCONF
 #undef	MR_HAVE_SIGACTION
 #undef	MR_HAVE_GETPAGESIZE
+#undef	MR_HAVE_MMAP
+#undef	MR_HAVE_MPROTECT
 #undef	MR_HAVE_MEMALIGN
 #undef	MR_HAVE_POSIX_MEMALIGN
-#undef	MR_HAVE_MPROTECT
 #undef	MR_HAVE_STRERROR
 #undef	MR_HAVE_STRERROR_R
 #undef	MR_HAVE_STRERROR_S
diff --git a/tests/hard_coded/thread_sbrk.m b/tests/hard_coded/thread_sbrk.m
new file mode 100644
index 0000000..bc03164
--- /dev/null
+++ b/tests/hard_coded/thread_sbrk.m
@@ -0,0 +1,104 @@
+%-----------------------------------------------------------------------------%
+
+% This program performs Mercury allocations in one thread (e.g. using Boehm GC)
+% and simulates malloc calls in another thread that indirectly call sbrk.
+% If both allocators use sbrk and are invoked simulataneously then
+% memory corruption can result.
+
+:- module thread_sbrk.
+:- interface.
+
+:- import_module io.
+
+:- pred main(io::di, io::uo) is cc_multi.
+
+%-----------------------------------------------------------------------------%
+%-----------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module bool.
+:- import_module int.
+:- import_module list.
+:- import_module string.
+:- import_module thread.
+:- import_module thread.semaphore.
+
+:- pragma foreign_decl("C", "
+    #include <unistd.h>
+").
+
+:- type tree
+    --->    nil
+    ;       node(int, tree, tree).
+
+%-----------------------------------------------------------------------------%
+
+main(!IO) :-
+    semaphore.init(Sem, !IO),
+    thread.spawn(alloc_thread(Sem), !IO),
+    semaphore.wait(Sem, !IO),
+    sbrk_loop(Sem, !IO),
+    io.write_string("done.\n", !IO).
+
+:- pred sbrk_loop(semaphore::in, io::di, io::uo) is det.
+
+sbrk_loop(Sem, !IO) :-
+    thread.yield(!IO),
+    semaphore.try_wait(Sem, Success, !IO),
+    (
+        Success = yes
+    ;
+        Success = no,
+        % It is hard to trigger a crash by calling malloc because not every
+        % call ends up calling sbrk.  Therefore we call sbrk directly.
+        sbrk(0x100, !IO),
+        sbrk_loop(Sem, !IO)
+    ).
+
+:- pred sbrk(int::in, io::di, io::uo) is det.
+
+:- pragma foreign_proc("C",
+    sbrk(Increment::in, _IO0::di, _IO::uo),
+    [will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
+"
+    sbrk(Increment);
+").
+
+:- pred alloc_thread(semaphore::in, io::di, io::uo) is cc_multi.
+
+alloc_thread(Sem, !IO) :-
+    semaphore.signal(Sem, !IO),
+    alloc_loop(Sem, 1, !IO).
+
+:- pred alloc_loop(semaphore::in, int::in, io::di, io::uo) is cc_multi.
+
+alloc_loop(Sem, Depth, !IO) :-
+    ( Depth > 20 ->
+        semaphore.signal(Sem, !IO)
+    ;
+        build(Depth, T, 0, _Id),
+        io.format("depth %d, size %d\n", [i(Depth), i(size(T))], !IO),
+        thread.yield(!IO),
+        alloc_loop(Sem, Depth + 1, !IO)
+    ).
+
+:- pred build(int::in, tree::out, int::in, int::out) is det.
+
+build(Depth, T, Id0, Id) :-
+    ( Depth = 1 ->
+        T = nil,
+        Id = Id0
+    ;
+        build(Depth - 1, L, Id0 + 1, Id2),
+        build(Depth - 1, R, Id2, Id),
+        T = node(Id0, L, R)
+    ).
+
+:- func size(tree) = int.
+
+size(nil) = 1.
+size(node(_, L, R)) = 1 + size(L) + size(R).
+
+%-----------------------------------------------------------------------------%
+% vim: ft=mercury ts=4 sts=4 sw=4 et
-- 
1.8.4



More information about the reviews mailing list