[m-rev.] For review: Fix a crash that can occur in parallel, stack-segment grades.

Paul Bone pbone at csse.unimelb.edu.au
Wed May 26 14:14:22 AEST 2010


For review by anyone.

I'll commit this once the bootchecks finish in a number of grades.


Fix a crash that can occur in parallel, stack-segment grades.

MR_destroy_context will cache contexts in case the runtime needs a context in
the near future.  Because the context no-longer represents an on-going
computation MR_destroy_context did not copy context-data out of the
MercuryEngine and real machine registers into the context before caching it.
However in a stack segments grade a one of these values is the current stack
segment, this means that a context may be cached and later re-used which refers
to a stack segment that another context is now using, the cached context will
then trash the other context's stack.

The solution is to save the context before caching it.

This change also contains code that was helpful in diagnosing this problem.

runtime/mercury_context.c:
    Fix the bug (as above).

    Initialise the MR_init_engine_array_lock in MR_setup_thread_stuff.

    Print out logging messages if MR_DEBUG_STACK_SEGMENTS is defined in various
    places.

runtime/mercury_debug.h:
runtime/mercury_debug.c:
    Write code for printing out debug log messages in a grade appropriate-way.

runtime/mercury_memory_handlers.c:
    When exiting a signal handler flush the threadscope buffers of all engines
    before re-raising the signal.

runtime/mercury_engine.h:
runtime/mercury_engine.c:
    In thread safe grades provide an array of pointers to Mercury engines in
    the runtime.  This is used to flush the threadscope buffers in the signal
    handlers.  It may be used to improve work stealing in the future.

runtime/mercury_thread.h:
runtime/mercury_thread.c:
    When creating threads add each one's engine to the array of engine pointers.

runtime/mercury_wrapper.c:
    Allocate the array of engine pointers when the runtime starts up.

Index: runtime/mercury_context.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_context.c,v
retrieving revision 1.79
diff -u -p -b -r1.79 mercury_context.c
--- runtime/mercury_context.c	20 Mar 2010 10:15:51 -0000	1.79
+++ runtime/mercury_context.c	26 May 2010 03:26:26 -0000
@@ -186,6 +186,7 @@ MR_init_thread_stuff(void)
     pthread_cond_init(&MR_runqueue_cond, MR_COND_ATTR);
     pthread_mutex_init(&free_context_list_lock, MR_MUTEX_ATTR);
     pthread_mutex_init(&MR_global_lock, MR_MUTEX_ATTR);
+    pthread_mutex_init(&MR_init_engine_array_lock, MR_MUTEX_ATTR);
     pthread_mutex_init(&MR_pending_contexts_lock, MR_MUTEX_ATTR);
   #ifdef MR_LL_PARALLEL_CONJ
     pthread_mutex_init(&spark_deques_lock, MR_MUTEX_ATTR);
@@ -686,8 +687,17 @@ MR_create_context(const char *id, MR_Con
     }
     MR_UNLOCK(&free_context_list_lock, "create_context i");
 
+#ifdef MR_DEBUG_STACK_SEGMENTS
+    MR_debug_log_message("Re-used an old context: %p", c);
+#endif
+
     if (c == NULL) {
         c = MR_GC_NEW(MR_Context);
+#ifdef MR_DEBUG_STACK_SEGMENTS
+        if (c) {
+            MR_debug_log_message("Creating new context: %p", c);
+        }
+#endif
         c->MR_ctxt_size = ctxt_size;
 #ifndef MR_HIGHLEVEL_CODE
         c->MR_ctxt_detstack_zone = NULL;
@@ -708,15 +718,28 @@ MR_create_context(const char *id, MR_Con
     return c;
 }
 
+/*
+** TODO: We should gc the cached contexts, or otherwise not cache to many.
+*/
 void 
 MR_destroy_context(MR_Context *c)
 {
     MR_assert(c);
 
+#ifdef MR_DEBUG_STACK_SEGMENTS
+    MR_debug_log_message("Caching old context: %p", c);
+#endif
+
 #ifdef MR_THREAD_SAFE
     MR_assert(c->MR_ctxt_saved_owners == NULL);
 #endif
 
+    /*
+    ** Save the context first, even though we're not saving a computation
+    ** that's in progress we are saving some bookkeeping information.
+    */
+    MR_save_context(c);
+
     /* XXX not sure if this is an overall win yet */
 #if 0 && defined(MR_CONSERVATIVE_GC) && !defined(MR_HIGHLEVEL_CODE)
     /* Clear stacks to prevent retention of data. */
@@ -1186,6 +1209,10 @@ MR_define_entry(MR_do_runnext);
 
     /* Discard whatever unused context we may have and switch to tmp. */
     if (MR_ENGINE(MR_eng_this_context) != NULL) {
+#ifdef MR_DEBUG_STACK_SEGMENTS
+        MR_debug_log_message("destroying old context %p",
+            MR_ENGINE(MR_eng_this_context));
+#endif
         MR_destroy_context(MR_ENGINE(MR_eng_this_context));
     }
 #ifdef MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
@@ -1196,6 +1223,9 @@ MR_define_entry(MR_do_runnext);
 #endif
     MR_ENGINE(MR_eng_this_context) = ready_context;
     MR_load_context(ready_context);
+#ifdef MR_DEBUG_STACK_SEGMENTS
+    MR_debug_log_message("resuming old context: %p", ready_context);
+#endif
 
     resume_point = (MR_Code*)(ready_context->MR_ctxt_resume);
     ready_context->MR_ctxt_resume = NULL;
@@ -1203,6 +1233,10 @@ MR_define_entry(MR_do_runnext);
 
   ReadySpark:
 
+#ifdef MR_DEBUG_STACK_SEGMENTS
+    MR_debug_log_message("stole spark: st: %p", spark.MR_spark_sync_term);
+#endif
+
 #if 0 /* This is a complicated optimisation that may not be worth-while */
     if (!spark.MR_spark_sync_term->MR_st_is_shared) {
         spark.MR_spark_sync_term_is_shared = MR_TRUE;
@@ -1243,6 +1277,11 @@ MR_define_entry(MR_do_runnext);
         }
 #endif
         MR_load_context(MR_ENGINE(MR_eng_this_context));
+#ifdef MR_DEBUG_STACK_SEGMENTS
+        MR_debug_log_message("created new context for spark: %p",
+            MR_ENGINE(MR_eng_this_context));
+#endif
+
     } else {
 #ifdef MR_THREADSCOPE
         /*
Index: runtime/mercury_debug.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_debug.c,v
retrieving revision 1.30
diff -u -p -b -r1.30 mercury_debug.c
--- runtime/mercury_debug.c	3 Jan 2007 05:17:16 -0000	1.30
+++ runtime/mercury_debug.c	26 May 2010 03:50:03 -0000
@@ -1268,3 +1268,47 @@ MR_find_zone_for_nondet_ptr(const MR_Wor
     return MR_FALSE;
 }
 
+void 
+MR_debug_log_message(const char *format, ...) {
+    char    *buffer;
+    int     len, result;
+    va_list args;
+
+    /*
+    ** This should be a reasonable estimate of the size of the buffer that we
+    ** need.  At least twice the size of the format string or 128 bytes.
+    */
+    len = strlen(format);
+    len = len*2;
+    len = len<128 ? 128 : len;
+
+    buffer = MR_GC_malloc(len);
+    while (1) {
+        va_start(args, format);
+#ifdef MR_HAVE_VSNPRINTF
+        result = vsnprintf(buffer, len, format, args);
+#elif MR_HAVE__VSNPRINTF
+        result = _vsnprintf(buffer, len, formst, args);
+#else
+        MR_fatal_error(
+            "MR_debug_log_message: Don't have vsnprintf or _vsnprintf\n");
+#endif
+        va_end(args);
+        if (result < len) {
+            break;
+        }
+
+        /* Make the buffer bigger. */
+        len = len*2;
+        buffer = MR_GC_realloc(buffer, len);
+    }
+
+#if defined(MR_THREADSCOPE) && defined(MR_THREAD_SAFE)
+    MR_threadscope_post_log_msg(buffer);
+#elif defined(MR_THREADSCOPE)
+    printf("Eng %p: %s\n", MR_thread_engine_base, buffer);
+#else
+    printf("%s\n", buffer);
+#endif
+}
+
Index: runtime/mercury_debug.h
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_debug.h,v
retrieving revision 1.19
diff -u -p -b -r1.19 mercury_debug.h
--- runtime/mercury_debug.h	1 Nov 2006 02:31:13 -0000	1.19
+++ runtime/mercury_debug.h	26 May 2010 03:18:32 -0000
@@ -257,6 +257,15 @@ extern	void	MR_printlabel(FILE *fp, cons
 extern	void	MR_print_deep_prof_var(FILE *fp, const char *name,
 			MR_CallSiteDynamic *csd);
 
+/*
+** Log a message for debugging purposes.  This will log the message with
+** threadscope if available.  In other parallel grades it will print the
+** address of the MercuryEngine structure with the message to stdout, In all
+** other grades it will print the message to standard out.  In all cases there
+** is no need to put a newline character at the end of the message.
+*/
+extern  void    MR_debug_log_message(const char *format, ...);
+
 /*---------------------------------------------------------------------------*/
 
 #endif /* not MERCURY_DEBUG_H */
Index: runtime/mercury_engine.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_engine.c,v
retrieving revision 1.60
diff -u -p -b -r1.60 mercury_engine.c
--- runtime/mercury_engine.c	10 Jan 2010 04:53:39 -0000	1.60
+++ runtime/mercury_engine.c	26 May 2010 03:18:32 -0000
@@ -79,8 +79,13 @@ MR_Debug_Flag_Info  MR_debug_flag_info[M
     { "detail",         MR_DETAILFLAG }
 };
 
-#ifndef MR_THREAD_SAFE
-  MercuryEngine MR_engine_base;
+#ifdef MR_THREAD_SAFE 
+/*
+** Writes to this array are protected by the init_engine_array_lock.
+*/
+MercuryEngine **MR_all_engine_bases = NULL;
+#else
+MercuryEngine MR_engine_base;
 #endif
 
 /*---------------------------------------------------------------------------*/
Index: runtime/mercury_engine.h
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_engine.h,v
retrieving revision 1.52
diff -u -p -b -r1.52 mercury_engine.h
--- runtime/mercury_engine.h	10 Jan 2010 04:53:39 -0000	1.52
+++ runtime/mercury_engine.h	26 May 2010 03:18:32 -0000
@@ -471,6 +471,14 @@ typedef struct MR_mercury_engine_struct 
   #define MR_cur_engine()   ((MercuryEngine *) MR_engine_base)
   #define MR_get_engine()   ((MercuryEngine *) MR_thread_engine_base)
 
+  /*
+  ** This points to an array containing MR_num_threads pointers to Mercury engines.
+  ** The first item in the array is the primordial thread.  During
+  ** initialisation the array may be a null pointer as may be any pointer
+  ** inside.
+  */
+  extern MercuryEngine      **MR_all_engine_bases;
+
 #else   /* !MR_THREAD_SAFE */
 
   extern MercuryEngine      MR_engine_base;
Index: runtime/mercury_memory_handlers.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_memory_handlers.c,v
retrieving revision 1.33
diff -u -p -b -r1.33 mercury_memory_handlers.c
--- runtime/mercury_memory_handlers.c	23 May 2010 04:54:22 -0000	1.33
+++ runtime/mercury_memory_handlers.c	26 May 2010 03:18:32 -0000
@@ -56,6 +56,7 @@
 #include "mercury_memory_zones.h"
 #include "mercury_memory_handlers.h"
 #include "mercury_faultaddr.h"
+#include "mercury_threadscope.h"
 
 /*---------------------------------------------------------------------------*/
 
@@ -108,6 +109,7 @@ static  char    *MR_explain_context(void
 static  MR_Code *get_pc_from_context(void *the_context);
 static  MR_Word *get_sp_from_context(void *the_context);
 static  MR_Word *get_curfr_from_context(void *the_context);
+static  void    leave_signal_handler(int sig);
 
 #define STDERR 2
 
@@ -400,9 +402,7 @@ MR_explain_context(void *the_context)
     MR_trace_report(stderr);
     MR_print_dump_stack();
     MR_dump_prev_locations();
-    fprintf(stderr, "exiting from signal handler\n");
-    MR_reset_signal(sig);
-    raise(sig);
+    leave_signal_handler(sig);
 } /* end complex_sighandler() */
 
 #elif defined(MR_HAVE_SIGINFO_T)
@@ -480,9 +480,7 @@ complex_bushandler(int sig, siginfo_t *i
     MR_trace_report(stderr);
     MR_print_dump_stack();
     MR_dump_prev_locations();
-    fprintf(stderr, "exiting from signal handler\n");
-    MR_reset_signal(sig);
-    raise(sig);
+    leave_signal_handler(sig);
 } /* end complex_bushandler() */
 
 static void
@@ -558,9 +556,7 @@ complex_segvhandler(int sig, siginfo_t *
     MR_trace_report(stderr);
     MR_print_dump_stack();
     MR_dump_prev_locations();
-    fprintf(stderr, "exiting from signal handler\n");
-    MR_reset_signal(sig);
-    raise(sig);
+    leave_signal_handler(sig);
 } /* end complex_segvhandler */
 
 #else /* not MR_HAVE_SIGINFO_T && not MR_HAVE_SIGCONTEXT_STRUCT */
@@ -590,9 +586,7 @@ simple_sighandler(int sig)
 
     MR_print_dump_stack();
     MR_dump_prev_locations();
-    fprintf(stderr, "exiting from signal handler\n");
-    MR_reset_signal(sig);
-    raise(sig);
+    leave_signal_handler(sig);
 }
 
 #endif /* not MR_HAVE_SIGINFO_T && not MR_HAVE_SIGCONTEXT_STRUCT */
@@ -1019,3 +1013,24 @@ MR_print_dump_stack(void)
         ret = write(STDERR, msg, strlen(msg));
     } while (ret == -1 && MR_is_eintr(errno));
 }
+
+static void
+leave_signal_handler(int sig)
+{
+    fprintf(stderr, "exiting from signal handler\n");
+#if defined(MR_THREAD_SAFE) && defined(MR_THREADSCOPE)
+    if (MR_all_engine_bases) {
+        int i;
+        for (i = 0; i < MR_num_threads; i++) {
+            if (MR_all_engine_bases[i] && 
+                MR_all_engine_bases[i]->MR_eng_ts_buffer) 
+            {
+                MR_threadscope_finalize_engine(MR_all_engine_bases[i]);
+            }
+        }
+    }
+    MR_finalize_threadscope();
+#endif
+    MR_reset_signal(sig);
+    raise(sig);
+}
Index: runtime/mercury_thread.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_thread.c,v
retrieving revision 1.40
diff -u -p -b -r1.40 mercury_thread.c
--- runtime/mercury_thread.c	10 Jan 2010 04:53:39 -0000	1.40
+++ runtime/mercury_thread.c	26 May 2010 03:18:32 -0000
@@ -28,6 +28,7 @@
     MercuryThreadKey MR_engine_base_key;
   #endif
   MercuryLock       MR_global_lock;
+  MercuryLock       MR_init_engine_array_lock;
 #endif
 
 volatile MR_bool    MR_exit_now;
@@ -123,6 +124,17 @@ MR_init_thread(MR_when_to_use when_to_us
   #ifdef MR_ENGINE_BASE_REGISTER
     MR_engine_base_word = (MR_Word) eng;
   #endif
+    MR_LOCK(&MR_init_engine_array_lock, "MR_init_thread");
+    {
+        int i;
+        for (i = 0; i < MR_num_threads; i++) {
+            if (!MR_all_engine_bases[i]) {
+                MR_all_engine_bases[i] = eng;
+                break;
+            }
+        }
+    }
+    MR_UNLOCK(&MR_init_engine_array_lock, "MR_init_thread");
 #else
     MR_memcpy(&MR_engine_base, eng, sizeof(MercuryEngine));
     MR_restore_registers();
Index: runtime/mercury_thread.h
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_thread.h,v
retrieving revision 1.29
diff -u -p -b -r1.29 mercury_thread.h
--- runtime/mercury_thread.h	15 Dec 2009 02:29:06 -0000	1.29
+++ runtime/mercury_thread.h	26 May 2010 03:18:32 -0000
@@ -156,14 +156,17 @@ MR_cond_timed_wait(MercuryCond *cond, Me
   ** (the defaults) are specified since if you obtain the lock then
   ** call back into Mercury deadlock could result.
   */
-
   extern MercuryLock        MR_global_lock;
 
   /*
+  ** This lock protects writes to the MR_all_engine_bases structure.
+  */
+  extern MercuryLock        MR_init_engine_array_lock;
+
+  /*
   ** MR_exception_handler_key stores a key which can be used to get
   ** the current exception handler for the current thread.
   */
-
   extern MercuryThreadKey   MR_exception_handler_key;
 
 #else /* not MR_THREAD_SAFE */
Index: runtime/mercury_wrapper.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_wrapper.c,v
retrieving revision 1.208
diff -u -p -b -r1.208 mercury_wrapper.c
--- runtime/mercury_wrapper.c	1 Mar 2010 02:27:30 -0000	1.208
+++ runtime/mercury_wrapper.c	26 May 2010 03:18:32 -0000
@@ -637,6 +637,14 @@ mercury_runtime_init(int argc, char **ar
     */
     MR_setup_threadscope();
   #endif
+
+    MR_all_engine_bases = MR_GC_malloc(sizeof(MercuryEngine*)*MR_num_threads);
+    {
+        int i;
+        for (i = 0; i < MR_num_threads; i++) {
+            MR_all_engine_bases[i] = NULL;
+        }
+    }
 #endif
 
     /*
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 489 bytes
Desc: Digital signature
URL: <http://lists.mercurylang.org/archives/reviews/attachments/20100526/a59a34f3/attachment.sig>


More information about the reviews mailing list