[m-rev.] for review: Defer CPU detection in high-level C grades, and related cleanups.

Peter Wang novalazy at gmail.com
Tue May 22 18:03:03 AEST 2018


library/thread.m:
    Make num_processors/3 call a function in the runtime instead of
    reading a global variable, so we can defer initialisation of the
    variable.

    Return `no' if unable to determine the number of CPUs instead of
    defaulting to 1.

runtime/mercury_context.c:
runtime/mercury_context.h:
    Add a function MR_get_num_processors().

    Hide MR_num_processors_detected variable.

    Rename MR_detect_num_processors() to
    MR_init_available_cpus_and_detect_num_processors()
    to reflect what it does.

    Add a function MR_free_available_cpus(), currently only implemented
    for the Linux CPU affinity API path.

    Call MR_init_available_cpus_and_detect_num_processors() at startup
    only in low-level C grades. The only reason to call it in high-level
    C grades was to set MR_num_processors_detected. It will now be set
    at the first call to MR_get_num_processors().

    Free the available CPU data structures as soon as we have
    MR_num_processors_detected, unless required for thread pinning.

    Rename MR_setup_num_threads to MR_setup_num_ws_engines.
    Pass the number of processors detected as an argument to make
    the dependency explicit.

    Make MR_pin_primordial_thread return a CPU number as its comment
    suggests.

    Delete the global variable MR_primordial_thread_cpu as it is
    currently unused.

    Add a function MR_done_thread_pinning().

runtime/mercury_wrapper.c:
    Call MR_done_thread_pinning() to free up available CPU data
    structures after thread pinning is done with them.

    Add an unrelated XXX.
---
 library/thread.m          |   8 +-
 runtime/mercury_context.c | 203 +++++++++++++++++++++++---------------
 runtime/mercury_context.h |  36 +++----
 runtime/mercury_wrapper.c |  25 ++---
 4 files changed, 154 insertions(+), 118 deletions(-)

diff --git a/library/thread.m b/library/thread.m
index 0e11177ed..06302c794 100644
--- a/library/thread.m
+++ b/library/thread.m
@@ -808,26 +808,26 @@ num_processors(MaybeProcs, !IO) :-
     ).
 
 :- pred num_processors(int::out, bool::out, io::di, io::uo) is det.
 
 :- pragma foreign_proc("C",
     num_processors(Procs::out, Success::out, _IO0::di, _IO::uo),
     [promise_pure, thread_safe, will_not_call_mercury,
      will_not_throw_exception, tabled_for_io],
 "
 #ifdef MR_THREAD_SAFE
-    Procs = MR_num_processors_detected;
-    Success = MR_YES;
+    // MR_get_num_processors() takes the global lock.
+    Procs = MR_get_num_processors();
 #else
-    Procs = 1;
-    Success = MR_YES;
+    Procs = 0;
 #endif
+    Success = (Procs > 0) ? MR_YES : MR_NO;
 ").
 
 :- pragma foreign_proc("Java",
     num_processors(Procs::out, Success::out, _IO0::di, _IO::uo),
     [promise_pure, thread_safe, will_not_call_mercury,
      will_not_throw_exception, tabled_for_io],
 "
     Procs = Runtime.getRuntime().availableProcessors();
     Success = bool.YES;
 ").
diff --git a/runtime/mercury_context.c b/runtime/mercury_context.c
index 6937f18bb..48b740083 100644
--- a/runtime/mercury_context.c
+++ b/runtime/mercury_context.c
@@ -8,21 +8,20 @@
 // mercury_context.c - handles multithreading stuff.
 
 /*
 INIT mercury_sys_init_scheduler_wrapper
 ENDINIT
 */
 
 #ifndef _GNU_SOURCE
   // This must be defined prior to including <sched.h> for sched_setaffinity,
   // etc.
-
   #define _GNU_SOURCE
 #endif
 
 #include "mercury_imp.h"
 
 #include <stdio.h>
 #ifdef MR_THREAD_SAFE
   #include "mercury_thread.h"
   #include "mercury_stm.h"
   #ifndef MR_HIGHLEVEL_CODE
@@ -204,50 +203,54 @@ static MR_Integer       MR_profile_parallel_contexts_created_for_sparks = 0;
 
 static MR_Integer       MR_profile_parallel_small_context_reused = 0;
 static MR_Integer       MR_profile_parallel_regular_context_reused = 0;
 static MR_Integer       MR_profile_parallel_small_context_kept = 0;
 static MR_Integer       MR_profile_parallel_regular_context_kept = 0;
   #endif // ! MR_HIGHLEVEL_CODE
 #endif // MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
 
 #ifdef MR_THREAD_SAFE
 
-// The detected number of processors available to this process.
-unsigned         MR_num_processors_detected;
+// The detected number of processors available to this process,
+// or zero if not (yet) determined.
+static unsigned         MR_num_processors_detected;
 
+// Structures representing the processors available to this process.
+// These are required for thread pinning but also used to count
+// MR_num_processors_detected.
 #if defined(MR_HAVE_HWLOC)
     static hwloc_topology_t MR_hw_topology;
     static hwloc_cpuset_t   MR_hw_available_pus = NULL;
 #elif defined(MR_HAVE_LINUX_CPU_AFFINITY_API)
     // The number of CPUs that can be represented by MR_cpuset_available.
     static int          MR_cpuset_num_cpus = 0;
 
     // The size of MR_cpuset_available in bytes, given by
     // CPU_ALLOC_SIZE(MR_cpuset_num_cpus).
     static size_t       MR_cpuset_size = 0;
 
     // A cpuset of MR_cpuset_size bytes, able to represent processors in the
     // range [0, MR_cpuset_num_cpus).
     // NOTE: the processors available to a process are NOT necessarily
     // numbered from 0 to MR_num_processors_detected-1.
     static cpu_set_t    *MR_cpuset_available;
 #endif
 
-// Local variables for thread pinning.
-
 #if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
+// Variables for thread pinning.
 MR_bool                 MR_thread_pinning = MR_FALSE;
 static MercuryLock      MR_thread_pinning_lock;
 static unsigned         MR_num_threads_left_to_pin;
 MR_Unsigned             MR_primordial_thread_cpu;
 #endif
-#endif
+
+#endif  // MR_THREAD_SAFE
 
 #if defined(MR_LL_PARALLEL_CONJ) && \
     defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT)
 // This is used to give each context its own unique ID. It is accessed with
 // atomic operations.
 
 static MR_ContextId     MR_next_context_id = 0;
 
 // Allocate a context ID.
 
@@ -278,29 +281,31 @@ static MercuryLock      MR_par_cond_stats_lock;
 // This array will contain MR_max_engines pointers to deques.
 // The slot i points to the spark deque of engine id i.
 // Slots are NULL for unallocated engines.
 
 MR_SparkDeque           **MR_spark_deques = NULL;
 #endif
 
 ////////////////////////////////////////////////////////////////////////////
 
 #ifdef MR_THREAD_SAFE
-// Reset or initialize the cpuset that tracks which CPUs are available for
+// Initialize or reset the cpuset that tracks which CPUs are available for
 // binding.
-
+static void     MR_init_available_cpus_and_detect_num_processors(void);
 static void     MR_reset_available_cpus(void);
 
-static void     MR_detect_num_processors(void);
+// Free the cpuset if allocated.
+static void     MR_free_available_cpus(void);
+#endif
 
-  #ifdef MR_LL_PARALLEL_CONJ
-static void     MR_setup_num_threads(void);
+#ifdef MR_LL_PARALLEL_CONJ
+static void     MR_setup_num_ws_engines(unsigned num_processors_detected);
 
 // Try to wake up a sleeping engine and tell it to do action. The engine is
 // only woken if it is in the sleeping state. If the engine is not sleeping
 // use try_notify_engine below. If the engine is woken without a race, this
 // function returns MR_TRUE, otherwise it returns MR_FALSE.
 
 static MR_bool  try_wake_engine(MR_EngineId engine_id, int action,
                     union MR_engine_wake_action_data *action_data);
 
 // Send a notification to the engine. This is applicable if the engine is
@@ -309,22 +314,21 @@ static MR_bool  try_wake_engine(MR_EngineId engine_id, int action,
 // dropable and non-dropable notifications with the notify/wake methods.
 // The only connection is that in general non-dropable notifications should
 // be used wit try_notify_engine.
 //
 // The engine's current state must be passed in engine_state as it is used
 // with the CAS operation.
 
 static MR_bool  try_notify_engine(MR_EngineId engine_id, int action,
                     union MR_engine_wake_action_data *action_data,
                     MR_Unsigned engine_state);
-  #endif    // MR_LL_PARALLEL_CONJ
-#endif // MR_THREAD_SAFE
+#endif    // MR_LL_PARALLEL_CONJ
 
 #ifdef MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
 // Write out the profiling data that we collect during execution.
 
 static void     MR_write_out_profiling_parallel_execution(void);
 #endif
 
 #if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
 static void     MR_setup_thread_pinning(void);
 
@@ -363,28 +367,29 @@ MR_init_context_stuff(void)
     #endif
     MR_sem_init(&shutdown_ws_semaphore, 0);
   #endif
     pthread_mutex_init(&MR_STM_lock, MR_MUTEX_ATTR);
 
   #ifdef MR_HIGHLEVEL_CODE
     MR_KEY_CREATE(&MR_backjump_handler_key, NULL);
     MR_KEY_CREATE(&MR_backjump_next_choice_id_key, (void *)0);
   #endif
 
-    MR_detect_num_processors();
-    assert(MR_num_processors_detected > 0);
-
   #ifdef MR_LL_PARALLEL_CONJ
-    MR_setup_num_threads();
-    assert(MR_num_ws_engines > 0);
+    MR_init_available_cpus_and_detect_num_processors();
+    #ifndef MR_HAVE_THREAD_PINNING
+    MR_free_available_cpus();
+    #endif
+
+    MR_setup_num_ws_engines(MR_num_processors_detected);
 
-    #if defined(MR_HAVE_THREAD_PINNING)
+    #ifdef MR_HAVE_THREAD_PINNING
     MR_setup_thread_pinning();
     #endif
 
     MR_granularity_wsdeque_length =
         MR_granularity_wsdeque_length_factor * MR_num_ws_engines;
 
     MR_spark_deques = MR_GC_NEW_ARRAY_ATTRIB(MR_SparkDeque*,
         MR_max_engines, MR_ALLOC_SITE_RUNTIME);
     for (i = 0; i < MR_max_engines; i++) {
         MR_spark_deques[i] = NULL;
@@ -400,21 +405,78 @@ MR_init_context_stuff(void)
         // before they are started would be useless).
 
         esync->d.es_state = ENGINE_STATE_WORKING;
         esync->d.es_action = MR_ENGINE_ACTION_NONE;
     }
   #endif
 #endif // MR_THREAD_SAFE
 }
 
 #ifdef MR_THREAD_SAFE
-// Detect number of processors.
+
+unsigned
+MR_get_num_processors(void)
+{
+    unsigned result;
+
+    MR_OBTAIN_GLOBAL_LOCK("MR_get_num_processors");
+
+    // In low-level threaded grades, MR_num_processors_detected is initialised
+    // at startup to count the number of work-stealing engines to run.
+    // In high-level grades, MR_num_processors_detected is initialised on
+    // demand.
+    if (MR_num_processors_detected == 0) {
+        MR_init_available_cpus_and_detect_num_processors();
+        MR_free_available_cpus();
+    }
+
+    result = MR_num_processors_detected;
+
+    MR_RELEASE_GLOBAL_LOCK("MR_get_num_processors");
+
+    return result;
+}
+
+static void
+MR_init_available_cpus_and_detect_num_processors(void)
+{
+  #ifdef MR_HAVE_HWLOC
+    if (-1 == hwloc_topology_init(&MR_hw_topology)) {
+        MR_fatal_error("Error allocating libhwloc topology object");
+    }
+    if (-1 == hwloc_topology_load(MR_hw_topology)) {
+        MR_fatal_error("Error detecting hardware topology (hwloc)");
+    }
+  #endif
+
+    MR_reset_available_cpus();
+
+  #if defined(MR_HAVE_HWLOC)
+    MR_num_processors_detected = hwloc_bitmap_weight(MR_hw_available_pus);
+  #elif defined(MR_HAVE_LINUX_CPU_AFFINITY_API)
+    MR_num_processors_detected =
+        CPU_COUNT_S(MR_cpuset_size, MR_cpuset_available);
+  #elif defined(MR_WIN32_GETSYSTEMINFO)
+    {
+        SYSTEM_INFO sysinfo;
+        GetSystemInfo(&sysinfo);
+        MR_num_processors_detected = sysinfo.dwNumberOfProcessors;
+    }
+  #elif defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
+    {
+        long n = sysconf(_SC_NPROCESSORS_ONLN);
+        if (n > 0) {
+            MR_num_processors_detected = n;
+        }
+    }
+  #endif
+}
 
 static void
 MR_reset_available_cpus(void)
 {
   #if defined(MR_HAVE_HWLOC)
     hwloc_cpuset_t  inherited_binding;
 
     // Gather the cpuset that our parent process bound this process to.
     //
     // (For information about how to deliberately restrict a process and it's
@@ -498,78 +560,57 @@ MR_reset_available_cpus(void)
     } else {
         MR_perror("Couldn't get CPU affinity");
       #if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
         MR_thread_pinning = MR_FALSE;
       #endif
     }
   #endif
 }
 
 static void
-MR_detect_num_processors(void)
+MR_free_available_cpus(void)
 {
-  #ifdef MR_HAVE_HWLOC
-    if (-1 == hwloc_topology_init(&MR_hw_topology)) {
-        MR_fatal_error("Error allocating libhwloc topology object");
-    }
-    if (-1 == hwloc_topology_load(MR_hw_topology)) {
-        MR_fatal_error("Error detecting hardware topology (hwloc)");
-    }
-  #endif
-
-    // Setup num processors.
-
-    MR_reset_available_cpus();
   #if defined(MR_HAVE_HWLOC)
-    MR_num_processors_detected = hwloc_bitmap_weight(MR_hw_available_pus);
+    // XXX Fill this in.
   #elif defined(MR_HAVE_LINUX_CPU_AFFINITY_API)
-    MR_num_processors_detected =
-        CPU_COUNT_S(MR_cpuset_size, MR_cpuset_available);
-    if (MR_num_processors_detected == 0) {
-        // Carry on even if the MR_cpuset_available is somehow empty.
-        MR_num_processors_detected = 1;
-    }
-  #elif defined(MR_WIN32_GETSYSTEMINFO)
-    {
-        SYSTEM_INFO sysinfo;
-        GetSystemInfo(&sysinfo);
-        MR_num_processors_detected = sysinfo.dwNumberOfProcessors;
+    MR_cpuset_size = 0;
+    if (MR_cpuset_available != NULL) {
+        CPU_FREE(MR_cpuset_available);
+        MR_cpuset_available = NULL;
     }
-  #elif defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
-    MR_num_processors_detected = sysconf(_SC_NPROCESSORS_ONLN);
-  #else
-    #warning "Cannot detect MR_num_processors_detected"
-    MR_num_processors_detected = 1;
   #endif
 }
 
+#endif // MR_THREAD_SAFE
+
 #ifdef MR_LL_PARALLEL_CONJ
 static void
-MR_setup_num_threads(void)
+MR_setup_num_ws_engines(unsigned num_processors_detected)
 {
-    // If MR_num_threads is unset, configure it to match number of processors
-    // on the system. If we do this, then we prepare to set processor
-    // affinities later on.
-
+    // If MR_num_ws_engines is unset, configure it to match the number of
+    // processors available to the process (if known). If we do this, then we
+    // prepare to set processor affinities later on.
     if (MR_num_ws_engines == 0) {
-        MR_num_ws_engines = MR_num_processors_detected;
+        MR_num_ws_engines = num_processors_detected;
+
+        // In case CPU detection failed for some reason.
+        if (MR_num_ws_engines == 0) {
+            MR_num_ws_engines = 1;
+        }
     }
 
-  #ifdef MR_DEBUG_THREADS
     if (MR_debug_threads) {
-        fprintf(stderr, "Detected %d processors, will use %d threads\n",
-            MR_num_processors_detected, MR_num_ws_engines);
+        fprintf(stderr, "Detected %d processors, will use %d ws engines\n",
+            num_processors_detected, MR_num_ws_engines);
     }
-  #endif
 }
 #endif // MR_LL_PARALLEL_CONJ
-#endif // MR_THREAD_SAFE
 
 // Thread pinning.
 
 #if defined(MR_HAVE_THREAD_PINNING) && defined(MR_LL_PARALLEL_CONJ)
 
 static int
 MR_pin_thread_no_locking(void)
 {
     int     initial_cpu;
     int     max;
@@ -620,38 +661,37 @@ MR_pin_thread(void)
 {
     int cpu;
 
     MR_LOCK(&MR_thread_pinning_lock, "MR_pin_thread");
     cpu = MR_pin_thread_no_locking();
     MR_UNLOCK(&MR_thread_pinning_lock, "MR_pin_thread");
 
     return cpu;
 }
 
-void
+int
 MR_pin_primordial_thread(void)
 {
     // We don't need locking to pin the primordial thread as it is called
     // before any other threads exist.
 
-    MR_primordial_thread_cpu = MR_pin_thread_no_locking();
+    return MR_pin_thread_no_locking();
 }
 
 static void MR_setup_thread_pinning(void)
 {
     MR_num_threads_left_to_pin = MR_num_ws_engines;
 
     pthread_mutex_init(&MR_thread_pinning_lock, MR_MUTEX_ATTR);
 
-  // Comment this back in to enable thread pinning by default
-  // if we autodetected the number of CPUs without error.
-
+    // Restore this to enable thread pinning by default
+    // if we autodetected the number of CPUs without error.
 #if 0
     if (MR_num_processors_detected > 1) {
         MR_thread_pinning = MR_TRUE;
     }
 #endif
 }
 
 // Determine which CPU this thread is currently running on.
 
 static int MR_current_cpu(void)
@@ -749,35 +789,20 @@ MR_do_pin_thread(int cpu)
         CPU_FREE(cpuset);
     } else {
         success = MR_FALSE;
     }
     return success;
 #endif
 
     return MR_TRUE;
 }
 
-#if defined(MR_HAVE_HWLOC)
-static MR_bool  MR_make_pu_unavailable(const struct hwloc_obj *pu);
-#endif
-
-static void MR_make_cpu_unavailable(int cpu)
-{
-#if defined(MR_HAVE_HWLOC)
-    hwloc_obj_t pu;
-    pu = hwloc_get_obj_by_type(MR_hw_topology, HWLOC_OBJ_PU, cpu);
-    MR_make_pu_unavailable(pu);
-#elif defined(MR_HAVE_LINUX_CPU_AFFINITY_API)
-    CPU_CLR_S(cpu, MR_cpuset_size, MR_cpuset_available);
-#endif
-}
-
 #if defined(MR_HAVE_HWLOC)
 static MR_bool MR_make_pu_unavailable(const struct hwloc_obj *pu)
 {
     hwloc_obj_t core;
     static int  siblings_to_make_unavailable;
     int         i;
 
 #ifdef MR_DEBUG_THREAD_PINNING
     char        *cpusetstr;
 
@@ -821,20 +846,36 @@ static MR_bool MR_make_pu_unavailable(const struct hwloc_obj *pu)
                     return MR_FALSE;
                 }
             }
         }
     }
 
     return MR_TRUE;
 }
 #endif
 
+static void MR_make_cpu_unavailable(int cpu)
+{
+#if defined(MR_HAVE_HWLOC)
+    hwloc_obj_t pu;
+    pu = hwloc_get_obj_by_type(MR_hw_topology, HWLOC_OBJ_PU, cpu);
+    MR_make_pu_unavailable(pu);
+#elif defined(MR_HAVE_LINUX_CPU_AFFINITY_API)
+    CPU_CLR_S(cpu, MR_cpuset_size, MR_cpuset_available);
+#endif
+}
+
+void MR_done_thread_pinning(void)
+{
+    MR_free_available_cpus();
+}
+
 #endif // MR_HAVE_THREAD_PINNING && MR_LL_PARALLEL_CONJ
 
 void
 MR_finalize_context_stuff(void)
 {
 #ifdef MR_THREAD_SAFE
     pthread_mutex_destroy(&MR_runqueue_lock);
     pthread_mutex_destroy(&free_context_list_lock);
   #ifdef MR_LL_PARALLEL_CONJ
     MR_sem_destroy(&shutdown_ws_semaphore);
diff --git a/runtime/mercury_context.h b/runtime/mercury_context.h
index 11d7d880c..3e0e8349f 100644
--- a/runtime/mercury_context.h
+++ b/runtime/mercury_context.h
@@ -1,14 +1,14 @@
 // vim: ts=4 sw=4 expandtab ft=c
 
 // Copyright (C) 1997-2007, 2009-2011 The University of Melbourne.
-// Copyright (C) 2014 The Mercury team.
+// Copyright (C) 2014-2016, 2018 The Mercury team.
 // This file may only be copied under the terms of the GNU Library General
 // Public License - see the file COPYING.LIB in the Mercury distribution.
 
 // mercury_context.h - defines Mercury multithreading stuff.
 //
 // A "context" is a Mercury thread. (We use a different term than "thread"
 // to avoid confusing Mercury threads and POSIX threads.)
 // Each context is represented by a value of type MR_Context,
 // which contains a detstack, a nondetstack, a trail (if needed), the various
 // pointers that refer to them, a succip, and a thread-resumption continuation.
@@ -358,35 +358,30 @@ struct MR_Context_Struct {
 
 // The runqueue is a linked list of contexts that are runnable.
 
 extern      MR_Context  *MR_runqueue_head;
 extern      MR_Context  *MR_runqueue_tail;
 #ifdef  MR_THREAD_SAFE
   extern    MercuryLock MR_runqueue_lock;
   extern    MercuryCond MR_runqueue_cond;
 #endif
 #ifdef  MR_LL_PARALLEL_CONJ
-  extern    MR_bool         MR_thread_pinning;
+  extern    MR_bool     MR_thread_pinning;
 #endif
 
 #ifdef MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
 extern MR_bool      MR_profile_parallel_execution;
 
 // XXX: This is currently unused, we plan to use it in the future. -pbone
 extern MR_Stats     MR_profile_parallel_executed_local_sparks;
 #endif
 
-#ifdef MR_THREAD_SAFE
-// The number of processors detected.
-extern unsigned     MR_num_processors_detected;
-#endif
-
 // As well as the runqueue, we maintain a linked list of contexts
 // and associated file descriptors that are suspended blocked for
 // reads/writes/exceptions. When the runqueue becomes empty, if
 // this list is not empty then we call select and block until one
 // or more of the file descriptors become ready for I/O, then
 // wake the appropriate context.
 // In addition, we should periodically check to see if the list of blocked
 // contexts is non-empty and if so, poll to wake any contexts that
 // can unblock. This, while not yielding true fairness (since this
 // requires the current context to perform some yield-like action),
@@ -423,25 +418,28 @@ extern  MR_PendingContext   *MR_pending_contexts;
   extern volatile MR_Integer    MR_num_idle_ws_engines;
 
   // Spark deques for work stealing,  These are made visible so that they can
   // be initialised by code in mercury_thread.c.
 
   extern MR_SparkDeque          **MR_spark_deques;
 #endif  // !MR_LL_PARALLEL_CONJ
 
 ////////////////////////////////////////////////////////////////////////////
 
-// Initializes a context structure, and gives it the given id. If gen is
-// non-NULL, the context is for the given generator.
+#ifdef MR_THREAD_SAFE
+// Return the number of processors available to this process or 0 if unknown.
+// This function is not directly related to contexts, but shares code with the
+// code to count the number of Mercury engines to start.
+extern unsigned     MR_get_num_processors(void);
+#endif
 
-extern  void        MR_init_context(MR_Context *context, const char *id,
-                        MR_Generator *gen);
+////////////////////////////////////////////////////////////////////////////
 
 // Allocates and initializes a new context structure, and gives it
 // the given id. If gen is non-NULL, the context is for the given generator.
 // The `MR_ctxt_thread_local_mutables' member must be initialised separately.
 
 extern  MR_Context  *MR_create_context(const char *id,
                         MR_ContextSize ctxt_size, MR_Generator *gen);
 
 // MR_release_context(context) returns the pointed-to context structure
 // to the free list, and releases resources as necessary.
@@ -453,45 +451,41 @@ extern  MR_Context  *MR_create_context(const char *id,
 // stack segment may still be referenced by the context. If that context
 // is reused later, then it will clobber another context's stack!
 
 extern  void        MR_release_context(MR_Context *context);
 
 // MR_init_context_stuff() initializes the lock structures for the runqueue,
 // and detects the number of threads to use on the LLC backend.
 
 extern  void        MR_init_context_stuff(void);
 
+#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
 // MR_pin_thread() pins the current thread to the next available processor ID,
 // if thread pinning is enabled.
 // MR_pin_primordial_thread() is a special case for the primordial thread.
 // It should only be executed once, and only by the primordial thread _before_
 // the other threads are started.
 //
 // Both functions return the CPU number that the thread is pinned to or would
 // be pinned to if pinning was both enabled and supported. That is a valid
 // value is always returned even if the thread is not actually pinned.
-
-#if defined(MR_LL_PARALLEL_CONJ)
-#if defined(MR_HAVE_THREAD_PINNING)
-extern void         MR_pin_primordial_thread(void);
+extern int          MR_pin_primordial_thread(void);
 extern int          MR_pin_thread(void);
 
-// The CPU that the primordial thread is running on.
-
-extern MR_Unsigned        MR_primordial_thread_cpu;
+// Free resources no longer required after thread pinning is done.
+extern void         MR_done_thread_pinning(void);
 #endif
 
+#ifdef MR_LL_PARALLEL_CONJ
 // Shutdown all the work-stealing engines.
 // (Exclusive engines shut down by themselves.)
-
-extern void
-MR_shutdown_ws_engines(void);
+extern void         MR_shutdown_ws_engines(void);
 #endif
 
 // MR_finalize_context_stuff() finalizes the lock structures for the runqueue
 // among other things setup by MR_init_context_stuff().
 
 extern  void        MR_finalize_context_stuff(void);
 
 // MR_flounder() aborts with a runtime error message. It is called if
 // the runqueue becomes empty and none of the running processes are
 // working, which means that the computation has floundered.
diff --git a/runtime/mercury_wrapper.c b/runtime/mercury_wrapper.c
index f9f0c2341..c4ba5ff45 100644
--- a/runtime/mercury_wrapper.c
+++ b/runtime/mercury_wrapper.c
@@ -632,72 +632,73 @@ mercury_runtime_init(int argc, char **argv)
     // initialize the trail
     MR_trail_zone = MR_create_or_reuse_zone("trail",
         MR_trail_size, MR_next_offset(),
         MR_trail_zone_size, MR_default_handler);
     MR_trail_ptr = (MR_TrailEntry *) MR_trail_zone->min;
     MR_ticket_counter = 1;
     MR_ticket_high_water = 1;
   #endif
 #else
 
-#if defined(MR_LL_PARALLEL_CONJ)
-#if defined(MR_HAVE_THREAD_PINNING)
+  #ifdef MR_LL_PARALLEL_CONJ
+    #ifdef MR_HAVE_THREAD_PINNING
     MR_pin_primordial_thread();
-#endif
-  #if defined(MR_THREADSCOPE)
+    #endif
+
+    #ifdef MR_THREADSCOPE
     // We must setup threadscope before we setup the first engine.
     // Pin the primordial thread, if thread pinning is configured.
-
     MR_setup_threadscope();
 
     // Setup the threadscope string tables before the standard library is
     // initialised or engines are created.
-
     (*MR_address_of_init_modules_threadscope_string_table)();
+    #endif
   #endif
 
-#endif
-
     // Start up the Mercury engine. We don't yet know how many slots will be
     // needed for thread-local mutable values so allocate the maximum number.
-
     MR_init_thread_inner(MR_use_now, MR_PRIMORIDAL_ENGINE_TYPE);
     MR_SET_THREAD_LOCAL_MUTABLES(
         MR_create_thread_local_mutables(MR_MAX_THREAD_LOCAL_MUTABLES));
 
     // Start up additional work-stealing Mercury engines.
-
   #ifdef MR_LL_PARALLEL_CONJ
     {
         int i;
 
         for (i = 1; i < MR_num_ws_engines; i++) {
             MR_create_worksteal_thread();
         }
+
     #ifdef MR_THREADSCOPE
         // TSC Synchronization is not used, support is commented out.
         // See runtime/mercury_threadscope.h for an explanation.
-        *
         for (i = 1; i < MR_num_threads; i++) {
             MR_threadscope_sync_tsc_master();
         }
-
     #endif
+
         while (MR_num_idle_ws_engines < MR_num_ws_engines-1) {
             // busy wait until the worker threads are ready
             MR_ATOMIC_PAUSE;
         }
     }
+
+    #ifdef MR_HAVE_THREAD_PINNING
+    MR_done_thread_pinning();
+    #endif
   #endif // ! MR_LL_PARALLEL_CONJ
 #endif // ! 0
 
 #ifdef MR_BOEHM_GC
+    // XXX overrides MERCURY_OPTIONS -x
     GC_enable();
 #endif
 
     if (MR_memdebug) {
         MR_debug_memory(stderr);
     }
 
     // Initialize profiling.
 
 #if defined(MR_MPROF_PROFILE_TIME) || defined(MR_MPROF_PROFILE_CALLS) \
-- 
2.17.0



More information about the reviews mailing list