[m-rev.] Parallel runtime thread pinning.

Paul Bone pbone at csse.unimelb.edu.au
Mon Aug 24 08:55:11 AEST 2009


For post-commit review.

A later changeset will address Julien's review comments on my other
recent changes to the parallel runtime.


Parallel runtime thread pinning.

This change introduces two new features in the mercury runtime;
pinning of threads to CPU cores/threads and runtime detection of the number of
CPU cores/threads available.

If MR_num_threads has not been specified in the runtime options with the -P
flag we use the sysconf(_SC_NPROCESSORS_ONLN) call if available to detect the
number of CPUs online and set MR_num_threads available.  As before this
defaults to 1.

Thread pinning is enabled if the runtime was able to detect the number of CPUs
on the machine or the user specifically requests thread pinning with the
--thread-pinning runtime option.  The sched_setaffinity() call is used to pin
each thread to a specific CPU.

I believe that in some cases thread pinning can achieve better performance,
this is yet to be determined and it may depend on the machine's architecture.
It does make profiling of the runtime system more reliable where the RDTSCP
instruction is not available.  It ensuring that a thread is not migrated to a
different CPU between sampling of the CPU's TSC.

configure.in:
runtime/mercury_conf.h.in:
	Detect the presence of sched.h sysconf() sched_setaffinity() and
	_SC_NPROCESSORS_ONLN.

doc/user_guide.texi:
	Document the new --thread-pinning runtime option.
	Adjust the documentation of -P to reflect the new behaviour.

runtime/mercury_context.c:
	Add the MR_pin_thread() function.
	Create a new global MR_bool MR_pin_threads;
	Add the calculation of the number of threads to use to
	MR_init_thread_stuff() 
	Correct a bug in a format string in my previous patch.

runtime/mercury_context.h:
	Export the new MR_pin_thread() function.
	Export the new MR_pin_threads global.
	Correct a previous spelling mistake.
	Adjust the documentation of MR_init_thread_stuff to reflect the new
	behaviour.

runtime/mercury_wrapper.c:
	Pin the primordial thread to a CPU after it spawns the other threads.
	Add the --thread-pinning runtime configuration option.
	Move the calculation of MR_max_outstanding_contexts until after
	MR_init_thread_stuff() so that it is calculated after the number of CPUs
	available has been determined.
	Add a pause instruction to a spinloop for better behaviour on later
	i386/x86_64 processors.  See the documentation for MR_ATOMIC_PAUSE.

runtime/mercury_thread.c:
	After a thread is spawned call MR_pin_thread() to pin a thread to a CPU if
	the thread has been created to pickup work from the global work queue.

Index: configure.in
===================================================================
RCS file: /home/mercury1/repository/mercury/configure.in,v
retrieving revision 1.541
diff -u -p -b -r1.541 configure.in
--- configure.in	18 Aug 2009 05:10:39 -0000	1.541
+++ configure.in	19 Aug 2009 06:43:37 -0000
@@ -1107,7 +1107,7 @@ mercury_check_for_functions \
         getpid setpgid fork execlp wait kill \
         grantpt unlockpt ptsname tcgetattr tcsetattr ioctl \
         access sleep opendir readdir closedir mkdir symlink readlink \
-        gettimeofday setenv putenv _putenv posix_spawn
+        gettimeofday setenv putenv _putenv posix_spawn sched_setaffinity
 
 #-----------------------------------------------------------------------------#
 
@@ -1116,7 +1116,8 @@ MERCURY_CHECK_FOR_HEADERS( \
         asm/sigcontext.h sys/param.h sys/time.h sys/times.h \
         sys/types.h sys/stat.h fcntl.h termios.h sys/ioctl.h \
         sys/stropts.h windows.h dirent.h getopt.h malloc.h \
-        semaphore.h pthread.h time.h spawn.h fenv.h sys/mman.h sys/sem.h)
+        semaphore.h pthread.h time.h spawn.h fenv.h sys/mman.h sys/sem.h \
+        sched.h)
 
 if test "$MR_HAVE_GETOPT_H" = 1; then
     GETOPT_H_AVAILABLE=yes
@@ -1138,6 +1139,24 @@ MERCURY_CHECK_FOR_FENV_FUNC([fesetround]
 
 #-----------------------------------------------------------------------------#
 #
+# Check for declarations. 
+#
+
+mercury_check_for_declarations () {
+    for mercury_cv_decl in "$@"
+    do
+        mercury_cv_decl_define="MR_HAVE_`echo $mercury_cv_decl | \
+            tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ`"
+        AC_CHECK_DECL($mercury_cv_decl, [
+            AC_DEFINE_UNQUOTED($mercury_cv_decl_define)
+        ])
+    done
+}
+
+mercury_check_for_declarations "_SC_NPROCESSORS_ONLN"
+
+#-----------------------------------------------------------------------------#
+#
 # Check the basics of sigaction
 #
 
Index: doc/user_guide.texi
===================================================================
RCS file: /home/mercury1/repository/mercury/doc/user_guide.texi,v
retrieving revision 1.590
diff -u -p -b -r1.590 user_guide.texi
--- doc/user_guide.texi	19 Aug 2009 07:45:11 -0000	1.590
+++ doc/user_guide.texi	23 Aug 2009 22:27:28 -0000
@@ -9587,7 +9587,10 @@ This only has an effect if the executabl
 @item -P @var{num}
 @findex -P (runtime option)
 Tells the runtime system to use @var{num} threads
-if the program was built in a parallel grade.
+if the program was built in a parallel low-level C grade.
+The mercury runtime attempts to automatically determine this value if support
+is available from the operating system.
+If it cannot or support is unavailable it defaults to @samp{1}.
 
 @sp 1
 @item --max-contexts-per-thread @var{num}
@@ -9608,6 +9611,17 @@ grade.
 @c and the @samp{MR_PROFILE_PARALLEL_EXECUTION_SUPPORT} C macro was
 @c defined when the runtime system was compiled.
 
+ at sp 1
+ at item --thread-pinning
+ at findex --thread-pinning
+Request that the runtime system attempts to pin mercury engines (POSIX threads)
+to CPU cores/hardware threads.
+This only has an effect if the executable was build in a parallel low-level C 
+grade.
+It is disabled by default unless @samp{-P @var{num}} is not specified and the
+runtime system is able to detect the number of processors enabled by the
+operating system.
+
 @c @item -r @var{num}
 @c @findex -r (runtime option)
 @c Repeats execution of the entry point procedure @var{num} times,
Index: runtime/mercury_atomic_ops.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_atomic_ops.c,v
retrieving revision 1.4
diff -u -p -b -r1.4 mercury_atomic_ops.c
--- runtime/mercury_atomic_ops.c	16 Aug 2009 10:18:36 -0000	1.4
+++ runtime/mercury_atomic_ops.c	21 Aug 2009 02:14:29 -0000
@@ -71,18 +71,18 @@ MR_OUTLINE_DEFN(
 
 #if defined(MR_THREAD_SAFE) && defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT)
 
-#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__))
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
 static MR_bool  MR_rdtscp_is_available = MR_FALSE;
 static MR_bool  MR_rdtsc_is_available = MR_FALSE;
 #endif
 
-#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__))
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
 
 /* Set this to 1 to enable some printfs below */
 #define MR_DEBUG_CPU_FEATURE_DETECTION 0 
 
 /*
-** cpuid, rdtscp and rdtsc are i386/amd64 instructions.
+** cpuid, rdtscp and rdtsc are i386/x86_64 instructions.
 */
 static __inline__ void
 MR_cpuid(MR_Unsigned code, MR_Unsigned sub_code,
@@ -98,14 +98,14 @@ MR_rdtsc(MR_uint_least64_t *tsc);
 
 extern void 
 MR_configure_profiling_timers(void) {
-#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__))
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
     MR_Unsigned     a, b, c, d;
     MR_Unsigned     eflags, old_eflags;
 
     /* 
     ** Check for the CPUID instruction.  CPUID is supported if we can flip bit
     ** 21 in the CPU's EFLAGS register.  The assembly below is written in a
-    ** subset of i386 and amd64 assembly.  To read and write EFLAGS we have
+    ** subset of i386 and x86_64 assembly.  To read and write EFLAGS we have
     ** to go via the C stack.
     */
     __asm__ ("pushf; pop %0"
@@ -256,7 +256,7 @@ MR_configure_profiling_timers(void) {
 
 extern void
 MR_profiling_start_timer(MR_Timer *timer) {
-#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__))
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
     /*
     ** If we don't have enough data to fill in all the fields of this structure
     ** we leave them alone, we won't check them later without checking
@@ -275,7 +275,7 @@ MR_profiling_start_timer(MR_Timer *timer
 
 extern void
 MR_profiling_stop_timer(MR_Timer *timer, MR_Stats *stats) {
-#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__))
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
     MR_Timer            now;
     MR_int_least64_t    duration;
     MR_uint_least64_t   duration_squared;
@@ -310,9 +310,9 @@ MR_profiling_stop_timer(MR_Timer *timer,
 }
 
 /*
-** It's convenient that this instruction is the same on both i386 and amd64
+** It's convenient that this instruction is the same on both i386 and x86_64
 */
-#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__))
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
 
 static __inline__ void 
 MR_cpuid(MR_Unsigned code, MR_Unsigned sub_code,
Index: runtime/mercury_atomic_ops.h
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_atomic_ops.h,v
retrieving revision 1.5
diff -u -p -b -r1.5 mercury_atomic_ops.h
--- runtime/mercury_atomic_ops.h	16 Aug 2009 10:47:55 -0000	1.5
+++ runtime/mercury_atomic_ops.h	21 Aug 2009 02:14:52 -0000
@@ -16,16 +16,6 @@
 
 #include "mercury_std.h"
 
-/*
-** AMD say that __amd64__ is defined by the compiler for 64bit platforms,
-** Intel say that __x86_64__ is the correct macro.  Really these refer to
-** the same thing that is simply branded differently, we use __amd64__ below
-** and define it if necessary ourselves.
-*/
-#if defined(__x86_64__) && !defined(__amd64__)
-#define __amd64__
-#endif
-
 /*---------------------------------------------------------------------------*/
 #if defined(MR_LL_PARALLEL_CONJ)
 
@@ -47,7 +37,7 @@ MR_compare_and_swap_word(volatile MR_Int
             return __sync_bool_compare_and_swap(addr, old, new_val);        \
         } while (0)
 
-#elif defined(__GNUC__) && defined(__amd64__)
+#elif defined(__GNUC__) && defined(__x86_64__)
 
     #define MR_COMPARE_AND_SWAP_WORD_BODY                                   \
         do {                                                                \
@@ -95,7 +85,7 @@ MR_compare_and_swap_word(volatile MR_Int
 MR_EXTERN_INLINE void
 MR_atomic_inc_int(volatile MR_Integer *addr);
 
-#if defined(__GNUC__) && defined(__amd64__)
+#if defined(__GNUC__) && defined(__x86_64__)
 
     #define MR_ATOMIC_INC_INT_BODY                                          \
         do {                                                                \
@@ -148,7 +138,7 @@ MR_atomic_inc_int(volatile MR_Integer *a
 MR_EXTERN_INLINE void
 MR_atomic_dec_int(volatile MR_Integer *addr);
 
-#if defined(__GNUC__) && defined(__amd64__)
+#if defined(__GNUC__) && defined(__x86_64__)
 
     #define MR_ATOMIC_DEC_INT_BODY                                          \
         do {                                                                \
@@ -191,7 +181,7 @@ MR_atomic_dec_int(volatile MR_Integer *a
 MR_EXTERN_INLINE void
 MR_atomic_add_int(volatile MR_Integer *addr, MR_Integer addend);
 
-#if defined(__GNUC__) && defined(__amd64__)
+#if defined(__GNUC__) && defined(__x86_64__)
 
     #define MR_ATOMIC_ADD_INT_BODY                                          \
         do {                                                                \
@@ -233,7 +223,7 @@ MR_atomic_add_int(volatile MR_Integer *a
 MR_EXTERN_INLINE void
 MR_atomic_sub_int(volatile MR_Integer *addr, MR_Integer x);
 
-#if defined(__GNUC__) && defined(__amd64__)
+#if defined(__GNUC__) && defined(__x86_64__)
 
     #define MR_ATOMIC_SUB_INT_BODY                                          \
         do {                                                                \
@@ -294,7 +284,7 @@ MR_atomic_sub_int(volatile MR_Integer *a
  * References: Intel and AMD documentation for PAUSE, Intel optimisation
  * guide.
  */
-#if defined(__GNUC__) && ( defined(__i386__) || defined(__amd64__) )
+#if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) )
 
     #define MR_ATOMIC_PAUSE                                                 \
         do {                                                                \
@@ -344,7 +334,7 @@ typedef struct {
 } MR_Timer;
 
 /*
-** Configure the profiling stats code.  On i386 and amd64 machines this uses
+** Configure the profiling stats code.  On i386 and x86_64 machines this uses
 ** CPUID to determine if the RDTSCP instruction is available and not prohibited
 ** by the OS.
 */
Index: runtime/mercury_conf.h.in
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_conf.h.in,v
retrieving revision 1.64
diff -u -p -b -r1.64 mercury_conf.h.in
--- runtime/mercury_conf.h.in	30 Jul 2009 04:02:56 -0000	1.64
+++ runtime/mercury_conf.h.in	19 Aug 2009 07:13:41 -0000
@@ -136,6 +136,7 @@
 **	MR_HAVE_FENV_H		we have <fenv.h>
 **	MR_HAVE_SYS_MMAN_H	we have <sys/mman.h>
 **	MR_HAVE_SYS_SEM_H 	we have <sys/sem.h>
+**	MR_HAVE_SCHED_H     we have <sched.h>
 */
 #undef	MR_HAVE_SYS_SIGINFO_H
 #undef	MR_HAVE_SYS_SIGNAL_H
@@ -165,6 +166,7 @@
 #undef	MR_HAVE_FENV_H
 #undef	MR_HAVE_SYS_MMAN_H
 #undef	MR_HAVE_SYS_SEM_H
+#undef	MR_HAVE_SCHED_H
 
 /*
 ** MR_HAVE_POSIX_TIMES is defined if we have the POSIX
@@ -263,6 +265,7 @@
 **	MR_HAVE__PUTENV		we have the _putenv() function.
 **	MR_HAVE_POSIX_SPAWN	we have the posix_spawn() function.
 **	MR_HAVE_FESETROUND	we have the fesetround() function.
+**	MR_HAVE_SCHED_SETAFFINITY if we have the sched_setaffinity() function.
 */
 #undef	MR_HAVE_GETPID
 #undef	MR_HAVE_SETPGID
@@ -324,6 +327,7 @@
 #undef	MR_HAVE__PUTENV
 #undef	MR_HAVE_POSIX_SPAWN
 #undef	MR_HAVE_FESETROUND
+#undef	MR_HAVE_SCHED_SETAFFINITY
 
 /*
 ** We use mprotect() and signals to catch stack and heap overflows.
@@ -358,6 +362,14 @@
 #undef	MR_HAVE_SIGCONTEXT_STRUCT_2ARG
 
 /*
+** These specify weather the given C macros are defined.
+**
+** MR_HAVE__SC_NPROCESSORS_ONLN, This is defined as a parameter for sysconf to
+** determine the number of processors online.
+*/
+#undef	MR_HAVE__SC_NPROCESSORS_ONLN
+
+/*
 ** For debugging purposes, if we get a fatal signal, we print out the
 ** program counter (PC) at which the signal occurred.
 **
Index: runtime/mercury_context.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_context.c,v
retrieving revision 1.67
diff -u -p -b -r1.67 mercury_context.c
--- runtime/mercury_context.c	17 Aug 2009 08:12:17 -0000	1.67
+++ runtime/mercury_context.c	20 Aug 2009 02:31:36 -0000
@@ -31,6 +31,10 @@ ENDINIT
   #include <math.h> /* for sqrt and pow */
 #endif
 
+#ifdef MR_HAVE_SCHED_H 
+#include <sched.h>
+#endif
+
 #include "mercury_memory_handlers.h"
 #include "mercury_context.h"
 #include "mercury_engine.h"             /* for `MR_memdebug' */
@@ -62,6 +66,9 @@ MR_Context              *MR_runqueue_tai
 #ifdef  MR_LL_PARALLEL_CONJ
   MR_SparkDeque         MR_spark_queue;
   MercuryLock           MR_sync_term_lock;
+  MR_bool               MR_thread_pinning = MR_FALSE;
+  static MercuryLock    MR_next_cpu_lock;
+  static MR_Unsigned    MR_next_cpu = 0;
 #endif
 
 MR_PendingContext       *MR_pending_contexts;
@@ -134,6 +141,7 @@ MR_init_thread_stuff(void)
   #ifdef MR_LL_PARALLEL_CONJ
     MR_init_wsdeque(&MR_spark_queue, MR_INITIAL_GLOBAL_SPARK_QUEUE_SIZE);
     pthread_mutex_init(&MR_sync_term_lock, MR_MUTEX_ATTR);
+    pthread_mutex_init(&MR_next_cpu_lock, MR_MUTEX_ATTR);
   #ifdef MR_DEBUG_RUNTIME_GRANULARITY_CONTROL
     pthread_mutex_init(&MR_par_cond_stats_lock, MR_MUTEX_ATTR);
   #endif
@@ -155,6 +163,66 @@ MR_init_thread_stuff(void)
     pthread_cond_init(&MR_thread_barrier_cond, MR_COND_ATTR);
   #endif
 
+    /* 
+     * Configure MR_num_threads if unset to match number of processors on the
+     * system, If we do this then we prepare to set processor affinities later
+     * on.
+     */
+    if (MR_num_threads == 0)
+    {
+#if defined(MR_HAVE_SYSCONF) && defined(MR_HAVE__SC_NPROCESSORS_ONLN) 
+        long result;
+
+        result = sysconf(_SC_NPROCESSORS_ONLN);
+        if (result < 1) {
+            /* We couldn't determine the number of processors. */
+            MR_num_threads = 1;
+        } else {
+            MR_num_threads = result;
+            /* On systems that don't support sched_setaffinity we don't try to
+            ** automatically enable thread pinning.  This prevents a runtime
+            ** warning that could unnecessarily confuse the user.
+            **/
+#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_SCHED_SETAFFINITY) 
+            MR_thread_pinning = MR_TRUE;
+#endif
+        }
+#else
+        MR_num_threads = 1;
+#endif
+    }
+#endif
+}
+
+void
+MR_pin_thread(void)
+{
+#if defined(MR_THREAD_SAFE) && defined(MR_LL_PARALLEL_CONJ) && \
+        defined(MR_HAVE_SCHED_SETAFFINITY) 
+    MR_LOCK(&MR_next_cpu_lock, "MR_pin_thread");
+    if (MR_thread_pinning == MR_TRUE) {
+        cpu_set_t   cpus;
+
+        if (MR_next_cpu < CPU_SETSIZE) {
+            CPU_ZERO(&cpus);
+            CPU_SET(MR_next_cpu, &cpus);
+            if (sched_setaffinity(0, sizeof(cpu_set_t), &cpus) == 0)
+            {
+                MR_next_cpu++;
+            } else {
+                perror("Warning: Couldn't set CPU affinity");
+                /* if this failed once it will probably fail again so disable
+                ** it. 
+                */
+                MR_thread_pinning = MR_FALSE;
+            }
+        } else {
+            perror("Warning: Couldn't set CPU affinity due to a static " \
+                "system limit");
+            MR_thread_pinning = MR_FALSE;
+        }
+    }
+    MR_UNLOCK(&MR_next_cpu_lock, "MR_pin_thread");
 #endif
 }
 
@@ -260,7 +328,7 @@ MR_write_out_profiling_parallel_executio
     MR_INTEGER_LENGTH_MODIFIER "ur, %" MR_INTEGER_LENGTH_MODIFIER \
     "unr), sample %ul\n")
 #define MR_FPRINT_STATS_FORMAT_STRING_NONE \
-    ("s: count %" MR_INTEGER_LENGTH_MODIFIER "u (%" \
+    ("%s: count %" MR_INTEGER_LENGTH_MODIFIER "u (%" \
     MR_INTEGER_LENGTH_MODIFIER "ur, %" MR_INTEGER_LENGTH_MODIFIER "unr)\n")
 
 static int 
Index: runtime/mercury_context.h
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_context.h,v
retrieving revision 1.52
diff -u -p -b -r1.52 mercury_context.h
--- runtime/mercury_context.h	16 Aug 2009 10:47:55 -0000	1.52
+++ runtime/mercury_context.h	20 Aug 2009 02:31:43 -0000
@@ -344,6 +344,7 @@ extern      MR_Context  *MR_runqueue_tai
 #ifdef  MR_LL_PARALLEL_CONJ
   extern    MR_SparkDeque   MR_spark_queue;
   extern    MercuryLock     MR_sync_term_lock;
+  extern    MR_bool         MR_thread_pinning;
 #endif
 
 #if defined(MR_THREAD_SAFE) && defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT) 
@@ -458,13 +459,20 @@ extern  MR_Context  *MR_create_context(c
 extern  void        MR_destroy_context(MR_Context *context);
 
 /*
-** MR_init_thread_stuff() initializes the lock structures for the runqueue.
+** MR_init_thread_stuff() initializes the lock structures for the runqueue, 
+** and detect the number of threads to use on the LLC backend.
 */
 extern  void        MR_init_thread_stuff(void);
 
 /*
+** MR_pin_thread() pins the current thread to the next available processor ID,
+** if thread pinning is enabled.
+*/
+extern  void        MR_pin_thread(void);
+
+/*
 ** MR_finalize_thread_stuff() finalizes the lock structures for the runqueue
-** amoung other things setup by MR_init_thread_stuff().
+** among other things setup by MR_init_thread_stuff().
 */
 extern  void        MR_finalize_thread_stuff(void);
 
Index: runtime/mercury_thread.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_thread.c,v
retrieving revision 1.34
diff -u -p -b -r1.34 mercury_thread.c
--- runtime/mercury_thread.c	4 May 2009 01:50:41 -0000	1.34
+++ runtime/mercury_thread.c	19 Aug 2009 03:05:31 -0000
@@ -90,6 +90,7 @@ MR_create_thread_2(void *goal0)
         MR_init_thread(MR_use_now);
         (goal->func)(goal->arg);
     } else {
+        MR_pin_thread();
         MR_init_thread(MR_use_later);
     }
 
Index: runtime/mercury_wrapper.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_wrapper.c,v
retrieving revision 1.198
diff -u -p -b -r1.198 mercury_wrapper.c
--- runtime/mercury_wrapper.c	16 Aug 2009 10:18:36 -0000	1.198
+++ runtime/mercury_wrapper.c	19 Aug 2009 07:11:18 -0000
@@ -289,7 +289,13 @@ static  char        *MR_mem_usage_report
 
 static  int         MR_num_output_args = 0;
 
-MR_Unsigned         MR_num_threads = 1;
+/*
+** This is initialized to zero, if it is still zero after configuration of the
+** runtime but before threads are started then the number of processors on the
+** system is detected and used if support is available.  Otherwise we fall back
+** to 1 
+*/
+MR_Unsigned         MR_num_threads = 0;
 
 static  MR_bool     MR_print_table_statistics = MR_FALSE;
 
@@ -582,6 +588,7 @@ mercury_runtime_init(int argc, char **ar
 #ifdef MR_THREAD_SAFE
     /* MR_init_thread_stuff() must be called prior to MR_init_memory() */
     MR_init_thread_stuff();
+    MR_max_outstanding_contexts = MR_max_contexts_per_thread * MR_num_threads;
     MR_primordial_thread = pthread_self();
 #endif
 
@@ -623,9 +630,10 @@ mercury_runtime_init(int argc, char **ar
         for (i = 1 ; i < MR_num_threads ; i++) {
             MR_create_thread(NULL);
         }
-
+        MR_pin_thread();
         while (MR_num_idle_engines < MR_num_threads-1) {
             /* busy wait until the worker threads are ready */
+            MR_ATOMIC_PAUSE;
         }
     }
   #endif /* ! MR_LL_PARALLEL_CONJ */
@@ -1208,6 +1216,7 @@ enum MR_long_option {
     MR_GEN_NONDETSTACK_REDZONE_SIZE,
     MR_GEN_NONDETSTACK_REDZONE_SIZE_KWORDS,
     MR_MAX_CONTEXTS_PER_THREAD,
+    MR_THREAD_PINNING,
     MR_PROFILE_PARALLEL_EXECUTION,
     MR_MDB_TTY,
     MR_MDB_IN,
@@ -1305,6 +1314,7 @@ struct MR_option MR_long_opts[] = {
     { "gen-nondetstack-zone-size-kwords",
         1, 0, MR_GEN_NONDETSTACK_REDZONE_SIZE_KWORDS },
     { "max-contexts-per-thread",        1, 0, MR_MAX_CONTEXTS_PER_THREAD },
+    { "thread-pinning",                 0, 0, MR_THREAD_PINNING },
     { "profile-parallel-execution",     0, 0, MR_PROFILE_PARALLEL_EXECUTION },
     { "mdb-tty",                        1, 0, MR_MDB_TTY },
     { "mdb-in",                         1, 0, MR_MDB_IN },
@@ -1718,6 +1728,11 @@ MR_process_options(int argc, char **argv
                 MR_max_contexts_per_thread = size;
                 break;
 
+            case MR_THREAD_PINNING:
+#if defined(MR_THREAD_SAFE) && defined(MR_LL_PARALLEL_CONJ)
+                MR_thread_pinning = MR_TRUE;
+#endif
+
             case MR_PROFILE_PARALLEL_EXECUTION:
 #if defined(MR_THREAD_SAFE) && defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT) 
                 MR_profile_parallel_execution = MR_TRUE;
@@ -2201,8 +2216,6 @@ MR_process_options(int argc, char **argv
         }
     }
 
-    MR_max_outstanding_contexts = MR_max_contexts_per_thread * MR_num_threads;
-
     if (MR_lld_print_min > 0 || MR_lld_start_name != NULL) {
         MR_lld_print_enabled = 0;
     }
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 481 bytes
Desc: Digital signature
URL: <http://lists.mercurylang.org/archives/reviews/attachments/20090824/6be3fd75/attachment.sig>


More information about the reviews mailing list