[m-rev.] for review: Defer CPU detection in high-level C grades, and related cleanups.
Peter Wang
novalazy at gmail.com
Tue May 22 18:03:03 AEST 2018
library/thread.m:
Make num_processors/3 call a function in the runtime instead of
reading a global variable, so we can defer initialisation of the
variable.
Return `no' if unable to determine the number of CPUs instead of
defaulting to 1.
runtime/mercury_context.c:
runtime/mercury_context.h:
Add a function MR_get_num_processors().
Hide MR_num_processors_detected variable.
Rename MR_detect_num_processors() to
MR_init_available_cpus_and_detect_num_processors()
to reflect what it does.
Add a function MR_free_available_cpus(), currently only implemented
for the Linux CPU affinity API path.
Call MR_init_available_cpus_and_detect_num_processors() at startup
only in low-level C grades. The only reason to call it in high-level
C grades was to set MR_num_processors_detected. It will now be set
at the first call to MR_get_num_processors().
Free the available CPU data structures as soon as we have
MR_num_processors_detected, unless required for thread pinning.
Rename MR_setup_num_threads to MR_setup_num_ws_engines.
Pass the number of processors detected as an argument to make
the dependency explicit.
Make MR_pin_primordial_thread return a CPU number as its comment
suggests.
Delete the global variable MR_primordial_thread_cpu as it is
currently unused.
Add a function MR_done_thread_pinning().
runtime/mercury_wrapper.c:
Call MR_done_thread_pinning() to free up available CPU data
structures after thread pinning is done with them.
Add an unrelated XXX.
---
library/thread.m | 8 +-
runtime/mercury_context.c | 203 +++++++++++++++++++++++---------------
runtime/mercury_context.h | 36 +++----
runtime/mercury_wrapper.c | 25 ++---
4 files changed, 154 insertions(+), 118 deletions(-)
diff --git a/library/thread.m b/library/thread.m
index 0e11177ed..06302c794 100644
--- a/library/thread.m
+++ b/library/thread.m
@@ -808,26 +808,26 @@ num_processors(MaybeProcs, !IO) :-
).
:- pred num_processors(int::out, bool::out, io::di, io::uo) is det.
:- pragma foreign_proc("C",
num_processors(Procs::out, Success::out, _IO0::di, _IO::uo),
[promise_pure, thread_safe, will_not_call_mercury,
will_not_throw_exception, tabled_for_io],
"
#ifdef MR_THREAD_SAFE
- Procs = MR_num_processors_detected;
- Success = MR_YES;
+ // MR_get_num_processors() takes the global lock.
+ Procs = MR_get_num_processors();
#else
- Procs = 1;
- Success = MR_YES;
+ Procs = 0;
#endif
+ Success = (Procs > 0) ? MR_YES : MR_NO;
").
:- pragma foreign_proc("Java",
num_processors(Procs::out, Success::out, _IO0::di, _IO::uo),
[promise_pure, thread_safe, will_not_call_mercury,
will_not_throw_exception, tabled_for_io],
"
Procs = Runtime.getRuntime().availableProcessors();
Success = bool.YES;
").
diff --git a/runtime/mercury_context.c b/runtime/mercury_context.c
index 6937f18bb..48b740083 100644
--- a/runtime/mercury_context.c
+++ b/runtime/mercury_context.c
@@ -8,21 +8,20 @@
// mercury_context.c - handles multithreading stuff.
/*
INIT mercury_sys_init_scheduler_wrapper
ENDINIT
*/
#ifndef _GNU_SOURCE
// This must be defined prior to including <sched.h> for sched_setaffinity,
// etc.
-
#define _GNU_SOURCE
#endif
#include "mercury_imp.h"
#include <stdio.h>
#ifdef MR_THREAD_SAFE
#include "mercury_thread.h"
#include "mercury_stm.h"
#ifndef MR_HIGHLEVEL_CODE
@@ -204,50 +203,54 @@ static MR_Integer MR_profile_parallel_contexts_created_for_sparks = 0;
static MR_Integer MR_profile_parallel_small_context_reused = 0;
static MR_Integer MR_profile_parallel_regular_context_reused = 0;
static MR_Integer MR_profile_parallel_small_context_kept = 0;
static MR_Integer MR_profile_parallel_regular_context_kept = 0;
#endif // ! MR_HIGHLEVEL_CODE
#endif // MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
#ifdef MR_THREAD_SAFE
-// The detected number of processors available to this process.
-unsigned MR_num_processors_detected;
+// The detected number of processors available to this process,
+// or zero if not (yet) determined.
+static unsigned MR_num_processors_detected;
+// Structures representing the processors available to this process.
+// These are required for thread pinning but also used to count
+// MR_num_processors_detected.
#if defined(MR_HAVE_HWLOC)
static hwloc_topology_t MR_hw_topology;
static hwloc_cpuset_t MR_hw_available_pus = NULL;
#elif defined(MR_HAVE_LINUX_CPU_AFFINITY_API)
// The number of CPUs that can be represented by MR_cpuset_available.
static int MR_cpuset_num_cpus = 0;
// The size of MR_cpuset_available in bytes, given by
// CPU_ALLOC_SIZE(MR_cpuset_num_cpus).
static size_t MR_cpuset_size = 0;
// A cpuset of MR_cpuset_size bytes, able to represent processors in the
// range [0, MR_cpuset_num_cpus).
// NOTE: the processors available to a process are NOT necessarily
// numbered from 0 to MR_num_processors_detected-1.
static cpu_set_t *MR_cpuset_available;
#endif
-// Local variables for thread pinning.
-
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
+// Variables for thread pinning.
MR_bool MR_thread_pinning = MR_FALSE;
static MercuryLock MR_thread_pinning_lock;
static unsigned MR_num_threads_left_to_pin;
MR_Unsigned MR_primordial_thread_cpu;
#endif
-#endif
+
+#endif // MR_THREAD_SAFE
#if defined(MR_LL_PARALLEL_CONJ) && \
defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT)
// This is used to give each context its own unique ID. It is accessed with
// atomic operations.
static MR_ContextId MR_next_context_id = 0;
// Allocate a context ID.
@@ -278,29 +281,31 @@ static MercuryLock MR_par_cond_stats_lock;
// This array will contain MR_max_engines pointers to deques.
// The slot i points to the spark deque of engine id i.
// Slots are NULL for unallocated engines.
MR_SparkDeque **MR_spark_deques = NULL;
#endif
////////////////////////////////////////////////////////////////////////////
#ifdef MR_THREAD_SAFE
-// Reset or initialize the cpuset that tracks which CPUs are available for
+// Initialize or reset the cpuset that tracks which CPUs are available for
// binding.
-
+static void MR_init_available_cpus_and_detect_num_processors(void);
static void MR_reset_available_cpus(void);
-static void MR_detect_num_processors(void);
+// Free the cpuset if allocated.
+static void MR_free_available_cpus(void);
+#endif
- #ifdef MR_LL_PARALLEL_CONJ
-static void MR_setup_num_threads(void);
+#ifdef MR_LL_PARALLEL_CONJ
+static void MR_setup_num_ws_engines(unsigned num_processors_detected);
// Try to wake up a sleeping engine and tell it to do action. The engine is
// only woken if it is in the sleeping state. If the engine is not sleeping
// use try_notify_engine below. If the engine is woken without a race, this
// function returns MR_TRUE, otherwise it returns MR_FALSE.
static MR_bool try_wake_engine(MR_EngineId engine_id, int action,
union MR_engine_wake_action_data *action_data);
// Send a notification to the engine. This is applicable if the engine is
@@ -309,22 +314,21 @@ static MR_bool try_wake_engine(MR_EngineId engine_id, int action,
// dropable and non-dropable notifications with the notify/wake methods.
// The only connection is that in general non-dropable notifications should
// be used wit try_notify_engine.
//
// The engine's current state must be passed in engine_state as it is used
// with the CAS operation.
static MR_bool try_notify_engine(MR_EngineId engine_id, int action,
union MR_engine_wake_action_data *action_data,
MR_Unsigned engine_state);
- #endif // MR_LL_PARALLEL_CONJ
-#endif // MR_THREAD_SAFE
+#endif // MR_LL_PARALLEL_CONJ
#ifdef MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
// Write out the profiling data that we collect during execution.
static void MR_write_out_profiling_parallel_execution(void);
#endif
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
static void MR_setup_thread_pinning(void);
@@ -363,28 +367,29 @@ MR_init_context_stuff(void)
#endif
MR_sem_init(&shutdown_ws_semaphore, 0);
#endif
pthread_mutex_init(&MR_STM_lock, MR_MUTEX_ATTR);
#ifdef MR_HIGHLEVEL_CODE
MR_KEY_CREATE(&MR_backjump_handler_key, NULL);
MR_KEY_CREATE(&MR_backjump_next_choice_id_key, (void *)0);
#endif
- MR_detect_num_processors();
- assert(MR_num_processors_detected > 0);
-
#ifdef MR_LL_PARALLEL_CONJ
- MR_setup_num_threads();
- assert(MR_num_ws_engines > 0);
+ MR_init_available_cpus_and_detect_num_processors();
+ #ifndef MR_HAVE_THREAD_PINNING
+ MR_free_available_cpus();
+ #endif
+
+ MR_setup_num_ws_engines(MR_num_processors_detected);
- #if defined(MR_HAVE_THREAD_PINNING)
+ #ifdef MR_HAVE_THREAD_PINNING
MR_setup_thread_pinning();
#endif
MR_granularity_wsdeque_length =
MR_granularity_wsdeque_length_factor * MR_num_ws_engines;
MR_spark_deques = MR_GC_NEW_ARRAY_ATTRIB(MR_SparkDeque*,
MR_max_engines, MR_ALLOC_SITE_RUNTIME);
for (i = 0; i < MR_max_engines; i++) {
MR_spark_deques[i] = NULL;
@@ -400,21 +405,78 @@ MR_init_context_stuff(void)
// before they are started would be useless).
esync->d.es_state = ENGINE_STATE_WORKING;
esync->d.es_action = MR_ENGINE_ACTION_NONE;
}
#endif
#endif // MR_THREAD_SAFE
}
#ifdef MR_THREAD_SAFE
-// Detect number of processors.
+
+unsigned
+MR_get_num_processors(void)
+{
+ unsigned result;
+
+ MR_OBTAIN_GLOBAL_LOCK("MR_get_num_processors");
+
+ // In low-level threaded grades, MR_num_processors_detected is initialised
+ // at startup to count the number of work-stealing engines to run.
+ // In high-level grades, MR_num_processors_detected is initialised on
+ // demand.
+ if (MR_num_processors_detected == 0) {
+ MR_init_available_cpus_and_detect_num_processors();
+ MR_free_available_cpus();
+ }
+
+ result = MR_num_processors_detected;
+
+ MR_RELEASE_GLOBAL_LOCK("MR_get_num_processors");
+
+ return result;
+}
+
+static void
+MR_init_available_cpus_and_detect_num_processors(void)
+{
+ #ifdef MR_HAVE_HWLOC
+ if (-1 == hwloc_topology_init(&MR_hw_topology)) {
+ MR_fatal_error("Error allocating libhwloc topology object");
+ }
+ if (-1 == hwloc_topology_load(MR_hw_topology)) {
+ MR_fatal_error("Error detecting hardware topology (hwloc)");
+ }
+ #endif
+
+ MR_reset_available_cpus();
+
+ #if defined(MR_HAVE_HWLOC)
+ MR_num_processors_detected = hwloc_bitmap_weight(MR_hw_available_pus);
+ #elif defined(MR_HAVE_LINUX_CPU_AFFINITY_API)
+ MR_num_processors_detected =
+ CPU_COUNT_S(MR_cpuset_size, MR_cpuset_available);
+ #elif defined(MR_WIN32_GETSYSTEMINFO)
+ {
+ SYSTEM_INFO sysinfo;
+ GetSystemInfo(&sysinfo);
+ MR_num_processors_detected = sysinfo.dwNumberOfProcessors;
+ }
+ #elif defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
+ {
+ long n = sysconf(_SC_NPROCESSORS_ONLN);
+ if (n > 0) {
+ MR_num_processors_detected = n;
+ }
+ }
+ #endif
+}
static void
MR_reset_available_cpus(void)
{
#if defined(MR_HAVE_HWLOC)
hwloc_cpuset_t inherited_binding;
// Gather the cpuset that our parent process bound this process to.
//
// (For information about how to deliberately restrict a process and it's
@@ -498,78 +560,57 @@ MR_reset_available_cpus(void)
} else {
MR_perror("Couldn't get CPU affinity");
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
MR_thread_pinning = MR_FALSE;
#endif
}
#endif
}
static void
-MR_detect_num_processors(void)
+MR_free_available_cpus(void)
{
- #ifdef MR_HAVE_HWLOC
- if (-1 == hwloc_topology_init(&MR_hw_topology)) {
- MR_fatal_error("Error allocating libhwloc topology object");
- }
- if (-1 == hwloc_topology_load(MR_hw_topology)) {
- MR_fatal_error("Error detecting hardware topology (hwloc)");
- }
- #endif
-
- // Setup num processors.
-
- MR_reset_available_cpus();
#if defined(MR_HAVE_HWLOC)
- MR_num_processors_detected = hwloc_bitmap_weight(MR_hw_available_pus);
+ // XXX Fill this in.
#elif defined(MR_HAVE_LINUX_CPU_AFFINITY_API)
- MR_num_processors_detected =
- CPU_COUNT_S(MR_cpuset_size, MR_cpuset_available);
- if (MR_num_processors_detected == 0) {
- // Carry on even if the MR_cpuset_available is somehow empty.
- MR_num_processors_detected = 1;
- }
- #elif defined(MR_WIN32_GETSYSTEMINFO)
- {
- SYSTEM_INFO sysinfo;
- GetSystemInfo(&sysinfo);
- MR_num_processors_detected = sysinfo.dwNumberOfProcessors;
+ MR_cpuset_size = 0;
+ if (MR_cpuset_available != NULL) {
+ CPU_FREE(MR_cpuset_available);
+ MR_cpuset_available = NULL;
}
- #elif defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
- MR_num_processors_detected = sysconf(_SC_NPROCESSORS_ONLN);
- #else
- #warning "Cannot detect MR_num_processors_detected"
- MR_num_processors_detected = 1;
#endif
}
+#endif // MR_THREAD_SAFE
+
#ifdef MR_LL_PARALLEL_CONJ
static void
-MR_setup_num_threads(void)
+MR_setup_num_ws_engines(unsigned num_processors_detected)
{
- // If MR_num_threads is unset, configure it to match number of processors
- // on the system. If we do this, then we prepare to set processor
- // affinities later on.
-
+ // If MR_num_ws_engines is unset, configure it to match the number of
+ // processors available to the process (if known). If we do this, then we
+ // prepare to set processor affinities later on.
if (MR_num_ws_engines == 0) {
- MR_num_ws_engines = MR_num_processors_detected;
+ MR_num_ws_engines = num_processors_detected;
+
+ // In case CPU detection failed for some reason.
+ if (MR_num_ws_engines == 0) {
+ MR_num_ws_engines = 1;
+ }
}
- #ifdef MR_DEBUG_THREADS
if (MR_debug_threads) {
- fprintf(stderr, "Detected %d processors, will use %d threads\n",
- MR_num_processors_detected, MR_num_ws_engines);
+ fprintf(stderr, "Detected %d processors, will use %d ws engines\n",
+ num_processors_detected, MR_num_ws_engines);
}
- #endif
}
#endif // MR_LL_PARALLEL_CONJ
-#endif // MR_THREAD_SAFE
// Thread pinning.
#if defined(MR_HAVE_THREAD_PINNING) && defined(MR_LL_PARALLEL_CONJ)
static int
MR_pin_thread_no_locking(void)
{
int initial_cpu;
int max;
@@ -620,38 +661,37 @@ MR_pin_thread(void)
{
int cpu;
MR_LOCK(&MR_thread_pinning_lock, "MR_pin_thread");
cpu = MR_pin_thread_no_locking();
MR_UNLOCK(&MR_thread_pinning_lock, "MR_pin_thread");
return cpu;
}
-void
+int
MR_pin_primordial_thread(void)
{
// We don't need locking to pin the primordial thread as it is called
// before any other threads exist.
- MR_primordial_thread_cpu = MR_pin_thread_no_locking();
+ return MR_pin_thread_no_locking();
}
static void MR_setup_thread_pinning(void)
{
MR_num_threads_left_to_pin = MR_num_ws_engines;
pthread_mutex_init(&MR_thread_pinning_lock, MR_MUTEX_ATTR);
- // Comment this back in to enable thread pinning by default
- // if we autodetected the number of CPUs without error.
-
+ // Restore this to enable thread pinning by default
+ // if we autodetected the number of CPUs without error.
#if 0
if (MR_num_processors_detected > 1) {
MR_thread_pinning = MR_TRUE;
}
#endif
}
// Determine which CPU this thread is currently running on.
static int MR_current_cpu(void)
@@ -749,35 +789,20 @@ MR_do_pin_thread(int cpu)
CPU_FREE(cpuset);
} else {
success = MR_FALSE;
}
return success;
#endif
return MR_TRUE;
}
-#if defined(MR_HAVE_HWLOC)
-static MR_bool MR_make_pu_unavailable(const struct hwloc_obj *pu);
-#endif
-
-static void MR_make_cpu_unavailable(int cpu)
-{
-#if defined(MR_HAVE_HWLOC)
- hwloc_obj_t pu;
- pu = hwloc_get_obj_by_type(MR_hw_topology, HWLOC_OBJ_PU, cpu);
- MR_make_pu_unavailable(pu);
-#elif defined(MR_HAVE_LINUX_CPU_AFFINITY_API)
- CPU_CLR_S(cpu, MR_cpuset_size, MR_cpuset_available);
-#endif
-}
-
#if defined(MR_HAVE_HWLOC)
static MR_bool MR_make_pu_unavailable(const struct hwloc_obj *pu)
{
hwloc_obj_t core;
static int siblings_to_make_unavailable;
int i;
#ifdef MR_DEBUG_THREAD_PINNING
char *cpusetstr;
@@ -821,20 +846,36 @@ static MR_bool MR_make_pu_unavailable(const struct hwloc_obj *pu)
return MR_FALSE;
}
}
}
}
return MR_TRUE;
}
#endif
+static void MR_make_cpu_unavailable(int cpu)
+{
+#if defined(MR_HAVE_HWLOC)
+ hwloc_obj_t pu;
+ pu = hwloc_get_obj_by_type(MR_hw_topology, HWLOC_OBJ_PU, cpu);
+ MR_make_pu_unavailable(pu);
+#elif defined(MR_HAVE_LINUX_CPU_AFFINITY_API)
+ CPU_CLR_S(cpu, MR_cpuset_size, MR_cpuset_available);
+#endif
+}
+
+void MR_done_thread_pinning(void)
+{
+ MR_free_available_cpus();
+}
+
#endif // MR_HAVE_THREAD_PINNING && MR_LL_PARALLEL_CONJ
void
MR_finalize_context_stuff(void)
{
#ifdef MR_THREAD_SAFE
pthread_mutex_destroy(&MR_runqueue_lock);
pthread_mutex_destroy(&free_context_list_lock);
#ifdef MR_LL_PARALLEL_CONJ
MR_sem_destroy(&shutdown_ws_semaphore);
diff --git a/runtime/mercury_context.h b/runtime/mercury_context.h
index 11d7d880c..3e0e8349f 100644
--- a/runtime/mercury_context.h
+++ b/runtime/mercury_context.h
@@ -1,14 +1,14 @@
// vim: ts=4 sw=4 expandtab ft=c
// Copyright (C) 1997-2007, 2009-2011 The University of Melbourne.
-// Copyright (C) 2014 The Mercury team.
+// Copyright (C) 2014-2016, 2018 The Mercury team.
// This file may only be copied under the terms of the GNU Library General
// Public License - see the file COPYING.LIB in the Mercury distribution.
// mercury_context.h - defines Mercury multithreading stuff.
//
// A "context" is a Mercury thread. (We use a different term than "thread"
// to avoid confusing Mercury threads and POSIX threads.)
// Each context is represented by a value of type MR_Context,
// which contains a detstack, a nondetstack, a trail (if needed), the various
// pointers that refer to them, a succip, and a thread-resumption continuation.
@@ -358,35 +358,30 @@ struct MR_Context_Struct {
// The runqueue is a linked list of contexts that are runnable.
extern MR_Context *MR_runqueue_head;
extern MR_Context *MR_runqueue_tail;
#ifdef MR_THREAD_SAFE
extern MercuryLock MR_runqueue_lock;
extern MercuryCond MR_runqueue_cond;
#endif
#ifdef MR_LL_PARALLEL_CONJ
- extern MR_bool MR_thread_pinning;
+ extern MR_bool MR_thread_pinning;
#endif
#ifdef MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
extern MR_bool MR_profile_parallel_execution;
// XXX: This is currently unused, we plan to use it in the future. -pbone
extern MR_Stats MR_profile_parallel_executed_local_sparks;
#endif
-#ifdef MR_THREAD_SAFE
-// The number of processors detected.
-extern unsigned MR_num_processors_detected;
-#endif
-
// As well as the runqueue, we maintain a linked list of contexts
// and associated file descriptors that are suspended blocked for
// reads/writes/exceptions. When the runqueue becomes empty, if
// this list is not empty then we call select and block until one
// or more of the file descriptors become ready for I/O, then
// wake the appropriate context.
// In addition, we should periodically check to see if the list of blocked
// contexts is non-empty and if so, poll to wake any contexts that
// can unblock. This, while not yielding true fairness (since this
// requires the current context to perform some yield-like action),
@@ -423,25 +418,28 @@ extern MR_PendingContext *MR_pending_contexts;
extern volatile MR_Integer MR_num_idle_ws_engines;
// Spark deques for work stealing, These are made visible so that they can
// be initialised by code in mercury_thread.c.
extern MR_SparkDeque **MR_spark_deques;
#endif // !MR_LL_PARALLEL_CONJ
////////////////////////////////////////////////////////////////////////////
-// Initializes a context structure, and gives it the given id. If gen is
-// non-NULL, the context is for the given generator.
+#ifdef MR_THREAD_SAFE
+// Return the number of processors available to this process or 0 if unknown.
+// This function is not directly related to contexts, but shares code with the
+// code to count the number of Mercury engines to start.
+extern unsigned MR_get_num_processors(void);
+#endif
-extern void MR_init_context(MR_Context *context, const char *id,
- MR_Generator *gen);
+////////////////////////////////////////////////////////////////////////////
// Allocates and initializes a new context structure, and gives it
// the given id. If gen is non-NULL, the context is for the given generator.
// The `MR_ctxt_thread_local_mutables' member must be initialised separately.
extern MR_Context *MR_create_context(const char *id,
MR_ContextSize ctxt_size, MR_Generator *gen);
// MR_release_context(context) returns the pointed-to context structure
// to the free list, and releases resources as necessary.
@@ -453,45 +451,41 @@ extern MR_Context *MR_create_context(const char *id,
// stack segment may still be referenced by the context. If that context
// is reused later, then it will clobber another context's stack!
extern void MR_release_context(MR_Context *context);
// MR_init_context_stuff() initializes the lock structures for the runqueue,
// and detects the number of threads to use on the LLC backend.
extern void MR_init_context_stuff(void);
+#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
// MR_pin_thread() pins the current thread to the next available processor ID,
// if thread pinning is enabled.
// MR_pin_primordial_thread() is a special case for the primordial thread.
// It should only be executed once, and only by the primordial thread _before_
// the other threads are started.
//
// Both functions return the CPU number that the thread is pinned to or would
// be pinned to if pinning was both enabled and supported. That is a valid
// value is always returned even if the thread is not actually pinned.
-
-#if defined(MR_LL_PARALLEL_CONJ)
-#if defined(MR_HAVE_THREAD_PINNING)
-extern void MR_pin_primordial_thread(void);
+extern int MR_pin_primordial_thread(void);
extern int MR_pin_thread(void);
-// The CPU that the primordial thread is running on.
-
-extern MR_Unsigned MR_primordial_thread_cpu;
+// Free resources no longer required after thread pinning is done.
+extern void MR_done_thread_pinning(void);
#endif
+#ifdef MR_LL_PARALLEL_CONJ
// Shutdown all the work-stealing engines.
// (Exclusive engines shut down by themselves.)
-
-extern void
-MR_shutdown_ws_engines(void);
+extern void MR_shutdown_ws_engines(void);
#endif
// MR_finalize_context_stuff() finalizes the lock structures for the runqueue
// among other things setup by MR_init_context_stuff().
extern void MR_finalize_context_stuff(void);
// MR_flounder() aborts with a runtime error message. It is called if
// the runqueue becomes empty and none of the running processes are
// working, which means that the computation has floundered.
diff --git a/runtime/mercury_wrapper.c b/runtime/mercury_wrapper.c
index f9f0c2341..c4ba5ff45 100644
--- a/runtime/mercury_wrapper.c
+++ b/runtime/mercury_wrapper.c
@@ -632,72 +632,73 @@ mercury_runtime_init(int argc, char **argv)
// initialize the trail
MR_trail_zone = MR_create_or_reuse_zone("trail",
MR_trail_size, MR_next_offset(),
MR_trail_zone_size, MR_default_handler);
MR_trail_ptr = (MR_TrailEntry *) MR_trail_zone->min;
MR_ticket_counter = 1;
MR_ticket_high_water = 1;
#endif
#else
-#if defined(MR_LL_PARALLEL_CONJ)
-#if defined(MR_HAVE_THREAD_PINNING)
+ #ifdef MR_LL_PARALLEL_CONJ
+ #ifdef MR_HAVE_THREAD_PINNING
MR_pin_primordial_thread();
-#endif
- #if defined(MR_THREADSCOPE)
+ #endif
+
+ #ifdef MR_THREADSCOPE
// We must setup threadscope before we setup the first engine.
// Pin the primordial thread, if thread pinning is configured.
-
MR_setup_threadscope();
// Setup the threadscope string tables before the standard library is
// initialised or engines are created.
-
(*MR_address_of_init_modules_threadscope_string_table)();
+ #endif
#endif
-#endif
-
// Start up the Mercury engine. We don't yet know how many slots will be
// needed for thread-local mutable values so allocate the maximum number.
-
MR_init_thread_inner(MR_use_now, MR_PRIMORIDAL_ENGINE_TYPE);
MR_SET_THREAD_LOCAL_MUTABLES(
MR_create_thread_local_mutables(MR_MAX_THREAD_LOCAL_MUTABLES));
// Start up additional work-stealing Mercury engines.
-
#ifdef MR_LL_PARALLEL_CONJ
{
int i;
for (i = 1; i < MR_num_ws_engines; i++) {
MR_create_worksteal_thread();
}
+
#ifdef MR_THREADSCOPE
// TSC Synchronization is not used, support is commented out.
// See runtime/mercury_threadscope.h for an explanation.
- *
for (i = 1; i < MR_num_threads; i++) {
MR_threadscope_sync_tsc_master();
}
-
#endif
+
while (MR_num_idle_ws_engines < MR_num_ws_engines-1) {
// busy wait until the worker threads are ready
MR_ATOMIC_PAUSE;
}
}
+
+ #ifdef MR_HAVE_THREAD_PINNING
+ MR_done_thread_pinning();
+ #endif
#endif // ! MR_LL_PARALLEL_CONJ
#endif // ! 0
#ifdef MR_BOEHM_GC
+ // XXX overrides MERCURY_OPTIONS -x
GC_enable();
#endif
if (MR_memdebug) {
MR_debug_memory(stderr);
}
// Initialize profiling.
#if defined(MR_MPROF_PROFILE_TIME) || defined(MR_MPROF_PROFILE_CALLS) \
--
2.17.0
More information about the reviews
mailing list