[m-rev.] for review: Separate processor counting from thread pinning.

Peter Wang novalazy at gmail.com
Mon Jun 23 17:36:38 AEST 2014


Branches: master, 14.01

In low-level C parallel grades we need the number MR_num_processors to
prime MR_num_threads when it is not explicitly set by the user, even if
thread pinning support is not available.

runtime/mercury_context.c:
	Refactor code to this effect.

	Add assertions that MR_num_processors > 0 and MR_num_threads > 0.
---
 runtime/mercury_context.c | 280 ++++++++++++++++++++++++++--------------------
 1 file changed, 160 insertions(+), 120 deletions(-)

diff --git a/runtime/mercury_context.c b/runtime/mercury_context.c
index 85fc51c..43cb53d 100644
--- a/runtime/mercury_context.c
+++ b/runtime/mercury_context.c
@@ -193,25 +193,30 @@ static MR_Integer       MR_profile_parallel_regular_context_kept = 0;
 #endif /* MR_PROFILE_PARALLEL_EXECUTION_SUPPORT */
 
 /*
-** Local variables for thread pinning.
+** Detect number of processors.
 */
-#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
-MR_bool                 MR_thread_pinning = MR_FALSE;
-
-static MercuryLock      MR_thread_pinning_lock;
-static unsigned         MR_num_threads_left_to_pin;
+#ifdef MR_LL_PARALLEL_CONJ
 static unsigned         MR_num_processors;
-MR_Unsigned             MR_primordial_thread_cpu;
-#ifdef MR_HAVE_HWLOC
+  #if defined(MR_HAVE_HWLOC)
     static hwloc_topology_t MR_hw_topology;
     static hwloc_cpuset_t   MR_hw_available_pus = NULL;
-#else /* MR_HAVE_SCHED_SETAFFINITY */
+  #elif defined(MR_HAVE_SCHED_SETAFFINITY)
     static cpu_set_t        *MR_available_cpus;
     /* The number of CPUs that MR_available_cpus can refer to */
     static unsigned         MR_cpuset_size = 0;
   #endif
 #endif
 
+/*
+** Local variables for thread pinning.
+*/
+#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
+MR_bool                 MR_thread_pinning = MR_FALSE;
+static MercuryLock      MR_thread_pinning_lock;
+static unsigned         MR_num_threads_left_to_pin;
+MR_Unsigned             MR_primordial_thread_cpu;
+#endif
+
 #if defined(MR_LL_PARALLEL_CONJ) && \
     defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT)
 /*
@@ -260,6 +265,19 @@ MR_SparkDeque           **MR_spark_deques = NULL;
 
 #ifdef MR_LL_PARALLEL_CONJ
 /*
+** Reset or initialize the cpuset that tracks which CPUs are available for
+** binding.
+*/
+static void
+MR_reset_available_cpus(void);
+
+static void
+MR_detect_num_processors(void);
+
+static void
+MR_setup_num_threads(void);
+
+/*
 ** Try to wake up a sleeping engine and tell it to do action. The engine is
 ** only woken if it is in the sleeping state.  If the engine is not sleeping
 ** use try_notify_engine below.  If the engine is woken without a race, this
@@ -293,7 +311,7 @@ static void
 MR_write_out_profiling_parallel_execution(void);
 #endif
 
-#if defined(MR_LL_PARALLEL_CONJ)
+#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_THREAD_PINNING)
 static void
 MR_setup_thread_pinning(void);
 
@@ -307,13 +325,6 @@ static int
 MR_current_cpu(void);
 
 /*
-** Reset or initialize the cpuset that tracks which CPUs are available for
-** binding.
-*/
-static void
-MR_reset_available_cpus(void);
-
-/*
 ** Mark the given CPU as unavailable for thread pinning.  This may mark other
 ** CPUs as unavailable, if, for instance they share resources with this
 ** processor and we can place other tasks elsewhere to avoid this sharing.
@@ -352,9 +363,16 @@ MR_init_context_stuff(void)
   #endif
 
   #ifdef MR_LL_PARALLEL_CONJ
+    MR_detect_num_processors();
+    assert(MR_num_processors > 0);
+
+    MR_setup_num_threads();
+    assert(MR_num_threads > 0);
+
     #if defined(MR_HAVE_THREAD_PINNING)
     MR_setup_thread_pinning();
     #endif
+
     MR_granularity_wsdeque_length =
         MR_granularity_wsdeque_length_factor * MR_num_threads;
 
@@ -380,10 +398,132 @@ MR_init_context_stuff(void)
 }
 
 /*
+** Detect number of processors
+*/
+
+#ifdef MR_LL_PARALLEL_CONJ
+static void
+MR_reset_available_cpus(void)
+{
+  #if defined(MR_HAVE_HWLOC)
+    hwloc_cpuset_t  inherited_binding;
+
+    /*
+    ** Gather the cpuset that our parent process bound this process to.
+    **
+    ** (For information about how to deliberately restrict a process and it's
+    ** sub-processors to a set of CPUs on Linux see cpuset(7).
+    */
+    inherited_binding = hwloc_bitmap_alloc();
+    hwloc_get_cpubind(MR_hw_topology, inherited_binding, HWLOC_CPUBIND_PROCESS);
+
+    /*
+    ** Set the available processors to the union of inherited_binding and the
+    ** cpuset we're allowed to use as reported by libhwloc.  In my tests with
+    ** libhwloc_1.0-1 (Debian) hwloc reported that all cpus on the system are
+    ** avaliable, it didn't exclude cpus not in the processor's cpuset(7).
+    */
+    if (MR_hw_available_pus == NULL) {
+        MR_hw_available_pus = hwloc_bitmap_alloc();
+    }
+    hwloc_bitmap_and(MR_hw_available_pus, inherited_binding,
+        hwloc_topology_get_allowed_cpuset(MR_hw_topology));
+
+    hwloc_bitmap_free(inherited_binding);
+  #elif defined(MR_HAVE_SCHED_GETAFFINITY)
+    unsigned cpuset_size;
+    unsigned num_processors;
+
+    if (MR_cpuset_size) {
+        cpuset_size = MR_cpuset_size;
+        num_processors = MR_num_processors;
+    } else {
+      #if defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
+        num_processors = sysconf(_SC_NPROCESSORS_ONLN);
+      #else
+        /*
+        ** Make the CPU set at least 32 processors wide.
+        */
+        num_processors = 32;
+      #endif
+        cpuset_size = CPU_ALLOC_SIZE(num_processors);
+        MR_cpuset_size = cpuset_size;
+    }
+
+    if (MR_available_cpus == NULL) {
+        MR_available_cpus = CPU_ALLOC(num_processors);
+    }
+
+    if (-1 == sched_getaffinity(0, cpuset_size, MR_available_cpus)) {
+        perror("Couldn't get CPU affinity");
+        MR_thread_pinning = MR_FALSE;
+        CPU_FREE(MR_available_cpus);
+        MR_available_cpus = NULL;
+    }
+  #endif
+}
+
+static void
+MR_detect_num_processors(void)
+{
+  #ifdef MR_HAVE_HWLOC
+    if (-1 == hwloc_topology_init(&MR_hw_topology)) {
+        MR_fatal_error("Error allocating libhwloc topology object");
+    }
+    if (-1 == hwloc_topology_load(MR_hw_topology)) {
+        MR_fatal_error("Error detecting hardware topology (hwloc)");
+    }
+  #endif
+
+    /*
+    ** Setup num processors
+    */
+    MR_reset_available_cpus();
+  #ifdef MR_HAVE_HWLOC
+    MR_num_processors = hwloc_bitmap_weight(MR_hw_available_pus);
+  #elif defined(MR_HAVE_SCHED_GETAFFINITY)
+    /*
+    ** This looks redundant but its not.  MR_num_processors is a guess that was
+    ** gathered by using sysconf.  But the number of CPUs in the CPU_SET is the
+    ** actual number of CPUs that this process is restricted to.
+    */
+    MR_num_processors = CPU_COUNT_S(MR_cpuset_size, MR_available_cpus);
+  #else
+    #warning "Cannot detect MR_num_processors"
+    MR_num_processors = 1;
+  #endif
+}
+
+static void
+MR_setup_num_threads(void)
+{
+    /*
+    ** If MR_num_threads is unset, configure it to match number of processors
+    ** on the system. If we do this, then we prepare to set processor
+    ** affinities later on.
+    */
+    if (MR_num_threads == 0) {
+        MR_num_threads = MR_num_processors;
+    }
+
+  #ifdef MR_DEBUG_THREADS
+    if (MR_debug_threads) {
+        fprintf(stderr, "Detected %d processors, will use %d threads\n",
+            MR_num_processors, MR_num_threads);
+    }
+  #endif
+}
+#endif /* MR_LL_PARALLEL_CONJ */
+
+/*
+** Thread pinning
+*/
+
+#if defined(MR_HAVE_THREAD_PINNING) && defined(MR_LL_PARALLEL_CONJ)
+/*
 ** Pin the primordial thread first to the CPU it is currently using
 ** (if support is available for thread pinning).
 */
-#if defined(MR_HAVE_THREAD_PINNING) && defined(MR_LL_PARALLEL_CONJ)
 static unsigned
 MR_pin_thread_no_locking(void)
 {
@@ -444,48 +584,8 @@ MR_pin_primordial_thread(void)
 
 static void MR_setup_thread_pinning(void)
 {
-    unsigned num_processors;
-
-#ifdef MR_HAVE_HWLOC
-    if (-1 == hwloc_topology_init(&MR_hw_topology)) {
-        MR_fatal_error("Error allocating libhwloc topology object");
-    }
-    if (-1 == hwloc_topology_load(MR_hw_topology)) {
-        MR_fatal_error("Error detecting hardware topology (hwloc)");
-    }
-#endif
-
-    /*
-    ** Setup num processors
-    */
-    MR_reset_available_cpus();
-#ifdef MR_HAVE_HWLOC
-    num_processors = hwloc_bitmap_weight(MR_hw_available_pus);
-#elif defined(MR_HAVE_SCHED_GETAFFINITY)
-    /*
-    ** This looks redundant but its not.  MR_num_processors is a guess that was
-    ** gathered by using sysconf.  But the number of CPUs in the CPU_SET is the
-    ** actual number of CPUs that this process is restricted to.
-    */
-    num_processors = CPU_COUNT_S(MR_cpuset_size, MR_available_cpus);
-#endif
-    MR_num_processors = num_processors;
-
-    /*
-    ** If MR_num_threads is unset, configure it to match number of processors
-    ** on the system. If we do this, then we prepare to set processor
-    ** affinities later on.
-    */
-    if (MR_num_threads == 0) {
-        MR_num_threads = num_processors;
-    }
     MR_num_threads_left_to_pin = MR_num_threads;
 
-#ifdef MR_DEBUG_THREAD_PINNING
-    fprintf(stderr, "Detected %d available processors, will use %d threads\n",
-        MR_num_processors, MR_num_threads);
-#endif
-
     pthread_mutex_init(&MR_thread_pinning_lock, MR_MUTEX_ATTR);
 
   /*
@@ -596,67 +696,6 @@ MR_do_pin_thread(int cpu)
     return MR_TRUE;
 }
 
-static void MR_reset_available_cpus(void)
-{
-#if defined(MR_HAVE_HWLOC)
-    hwloc_cpuset_t  inherited_binding;
-
-    /*
-    ** Gather the cpuset that our parent process bound this process to.
-    **
-    ** (For information about how to deliberately restrict a process and it's
-    ** sub-processors to a set of CPUs on Linux see cpuset(7).
-    */
-    inherited_binding = hwloc_bitmap_alloc();
-    hwloc_get_cpubind(MR_hw_topology, inherited_binding, HWLOC_CPUBIND_PROCESS);
-
-    /*
-    ** Set the available processors to the union of inherited_binding and the
-    ** cpuset we're allowed to use as reported by libhwloc.  In my tests with
-    ** libhwloc_1.0-1 (Debian) hwloc reported that all cpus on the system are
-    ** avaliable, it didn't exclude cpus not in the processor's cpuset(7).
-    */
-    if (MR_hw_available_pus == NULL) {
-        MR_hw_available_pus = hwloc_bitmap_alloc();
-    }
-    hwloc_bitmap_and(MR_hw_available_pus, inherited_binding,
-        hwloc_topology_get_allowed_cpuset(MR_hw_topology));
-
-    hwloc_bitmap_free(inherited_binding);
-#elif defined(MR_HAVE_SCHED_GETAFFINITY)
-    unsigned cpuset_size;
-    unsigned num_processors;
-
-    if (MR_cpuset_size) {
-        cpuset_size = MR_cpuset_size;
-        num_processors = MR_num_processors;
-    } else {
-  #if defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
-        num_processors = sysconf(_SC_NPROCESSORS_ONLN);
-  #else
-        /*
-        ** Make the CPU set at least 32 processors wide.
-        */
-        num_processors = 32;
-  #endif
-        cpuset_size = CPU_ALLOC_SIZE(num_processors);
-        MR_cpuset_size = cpuset_size;
-    }
-
-    if (MR_available_cpus == NULL) {
-        MR_available_cpus = CPU_ALLOC(num_processors);
-    }
-
-    if (-1 == sched_getaffinity(0, cpuset_size, MR_available_cpus))
-    {
-        perror("Couldn't get CPU affinity");
-        MR_thread_pinning = MR_FALSE;
-        CPU_FREE(MR_available_cpus);
-        MR_available_cpus = NULL;
-    }
-#endif
-}
-
 #if defined(MR_HAVE_HWLOC)
 static MR_bool MR_make_pu_unavailable(const struct hwloc_obj *pu);
 #endif
@@ -673,7 +712,8 @@ static void MR_make_cpu_unavailable(int cpu)
 }
 
 #if defined(MR_HAVE_HWLOC)
-static MR_bool MR_make_pu_unavailable(const struct hwloc_obj *pu) {
+static MR_bool MR_make_pu_unavailable(const struct hwloc_obj *pu)
+{
     hwloc_obj_t core;
     static int  siblings_to_make_unavailable;
     int         i;
-- 
1.8.4



More information about the reviews mailing list