[m-rev.] Parallel runtime thread pinning.
Paul Bone
pbone at csse.unimelb.edu.au
Thu Nov 5 16:44:57 AEDT 2009
On Tue, Aug 25, 2009 at 02:15:52AM +1000, Julien Fischer wrote:
>
> sysconf() is posix and the parameters for it that are a part of posix
> *must* be macros. It is extremely improbable that any non-standard
> parameters like this one would be defined using an enum.
>
> It will be sufficient to use the #ifdef here - it will also avoid
> yet more clutter in the configure script.
>
The following patch makes these corrections and makes some runtime modules
tidier and more conformant to our standards.
---
For post commit review.
Corrections in response to review comments on recent parallel runtime changes.
Made thread pinning off by default, the operating system should handle this
unless we have a good reason to.
configure.in:
Removed newly added declaration checking code.
doc/user_guide.texi:
Documentation corrections.
Adjusted the --pin-threads runtime option default.
runtime/mercury_atomic_ops.c
runtime/mercury_atomic_ops.h
Use __x86_64__ instead of __amd64__
Altered comments at the beginning of sections of the file to better
describe the contents of that section.
Placed comments at the end of long conditional compilation blocks that
match the condition at the beginning of the block.
runtime/mercury_conf.h.in:
Added editor hint for vim at the top of the file.
Remove newly added declarations section.
runtime/mercury_context.c:
Adjusted default behaviour of --pin-threads
Fixed some style issues.
runtime/mercury_context.h:
Fixed grammatical error.
runtime/mercury_wrapper.c:
Fixed grammatical error.
Fixed a missing break statement in a switch statement.
Index: configure.in
===================================================================
RCS file: /home/mercury1/repository/mercury/configure.in,v
retrieving revision 1.551
diff -u -p -b -r1.551 configure.in
--- configure.in 26 Oct 2009 13:32:47 -0000 1.551
+++ configure.in 5 Nov 2009 04:09:46 -0000
@@ -1173,24 +1173,6 @@ MERCURY_CHECK_FOR_FENV_FUNC([fesetround]
#-----------------------------------------------------------------------------#
#
-# Check for declarations.
-#
-
-mercury_check_for_declarations () {
- for mercury_cv_decl in "$@"
- do
- mercury_cv_decl_define="MR_HAVE_`echo $mercury_cv_decl | \
- tr abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ`"
- AC_CHECK_DECL($mercury_cv_decl, [
- AC_DEFINE_UNQUOTED($mercury_cv_decl_define)
- ])
- done
-}
-
-mercury_check_for_declarations "_SC_NPROCESSORS_ONLN"
-
-#-----------------------------------------------------------------------------#
-#
# Check the basics of sigaction
#
Index: doc/user_guide.texi
===================================================================
RCS file: /home/mercury1/repository/mercury/doc/user_guide.texi,v
retrieving revision 1.597
diff -u -p -b -r1.597 user_guide.texi
--- doc/user_guide.texi 30 Oct 2009 03:33:26 -0000 1.597
+++ doc/user_guide.texi 5 Nov 2009 04:03:39 -0000
@@ -9711,7 +9711,7 @@ This only has an effect if the executabl
@findex -P (runtime option)
Tells the runtime system to use @var{num} threads
if the program was built in a parallel low-level C grade.
-The mercury runtime attempts to automatically determine this value if support
+The Mercury runtime attempts to automatically determine this value if support
is available from the operating system.
If it cannot or support is unavailable it defaults to @samp{1}.
@@ -9737,13 +9737,15 @@ grade.
@sp 1
@item --thread-pinning
@findex --thread-pinning
-Request that the runtime system attempts to pin mercury engines (POSIX threads)
+Request that the runtime system attempts to pin Mercury engines (POSIX threads)
to CPU cores/hardware threads.
-This only has an effect if the executable was build in a parallel low-level C
+This only has an effect if the executable was built in a parallel low-level C
grade.
-It is disabled by default unless @samp{-P @var{num}} is not specified and the
-runtime system is able to detect the number of processors enabled by the
-operating system.
+This is disabled by default but may be enabled by default in the future.
+ at c In case this is enabled by default the following comment is relevant.
+ at c This is disabled by default unless @samp{-P @var{num}} is not specified and the
+ at c runtime system is able to detect the number of processors enabled by the
+ at c operating system.
@c @item -r @var{num}
@c @findex -r (runtime option)
Index: runtime/mercury_atomic_ops.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_atomic_ops.c,v
retrieving revision 1.4
diff -u -p -b -r1.4 mercury_atomic_ops.c
--- runtime/mercury_atomic_ops.c 16 Aug 2009 10:18:36 -0000 1.4
+++ runtime/mercury_atomic_ops.c 24 Aug 2009 04:49:48 -0000
@@ -14,11 +14,12 @@
#include "mercury_imp.h"
#include "mercury_atomic_ops.h"
+/*---------------------------------------------------------------------------*/
+
#if defined(MR_LL_PARALLEL_CONJ)
-/*---------------------------------------------------------------------------*/
/*
-** Provide definitions for functions declared `extern inline'.
+** Definitions for the atomic functions declared `extern inline'.
*/
MR_OUTLINE_DEFN(
@@ -69,20 +70,26 @@ MR_OUTLINE_DEFN(
#endif /* MR_LL_PARALLEL_CONJ */
+/*---------------------------------------------------------------------------*/
+
#if defined(MR_THREAD_SAFE) && defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT)
-#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__))
+/*
+** Profiling of the parallel runtime.
+*/
+
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
static MR_bool MR_rdtscp_is_available = MR_FALSE;
static MR_bool MR_rdtsc_is_available = MR_FALSE;
#endif
-#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__))
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
/* Set this to 1 to enable some printfs below */
#define MR_DEBUG_CPU_FEATURE_DETECTION 0
/*
-** cpuid, rdtscp and rdtsc are i386/amd64 instructions.
+** cpuid, rdtscp and rdtsc are i386/x86_64 instructions.
*/
static __inline__ void
MR_cpuid(MR_Unsigned code, MR_Unsigned sub_code,
@@ -94,18 +101,18 @@ MR_rdtscp(MR_uint_least64_t *tsc, MR_Uns
static __inline__ void
MR_rdtsc(MR_uint_least64_t *tsc);
-#endif
+#endif /* __GNUC__ && (__i386__ || __x86_64__) */
extern void
MR_configure_profiling_timers(void) {
-#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__))
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
MR_Unsigned a, b, c, d;
MR_Unsigned eflags, old_eflags;
/*
** Check for the CPUID instruction. CPUID is supported if we can flip bit
** 21 in the CPU's EFLAGS register. The assembly below is written in a
- ** subset of i386 and amd64 assembly. To read and write EFLAGS we have
+ ** subset of i386 and x86_64 assembly. To read and write EFLAGS we have
** to go via the C stack.
*/
__asm__ ("pushf; pop %0"
@@ -251,12 +258,12 @@ MR_configure_profiling_timers(void) {
#endif
MR_rdtscp_is_available = MR_TRUE;
-#endif
+#endif /* __GNUC__ && (__i386__ || __x86_64__) */
}
extern void
MR_profiling_start_timer(MR_Timer *timer) {
-#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__))
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
/*
** If we don't have enough data to fill in all the fields of this structure
** we leave them alone, we won't check them later without checking
@@ -275,7 +282,7 @@ MR_profiling_start_timer(MR_Timer *timer
extern void
MR_profiling_stop_timer(MR_Timer *timer, MR_Stats *stats) {
-#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__))
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
MR_Timer now;
MR_int_least64_t duration;
MR_uint_least64_t duration_squared;
@@ -303,16 +310,16 @@ MR_profiling_stop_timer(MR_Timer *timer,
MR_atomic_add_int(&(stats->MR_stat_sum), duration);
MR_atomic_add_int(&(stats->MR_stat_sum_squares), duration_squared);
}
-#elif
+#elif /* not __GNUC__ && (__i386__ || __x86_64__) */
/* No TSC support on this architecture or with this C compiler */
MR_atomic_inc_int(&(stats->MR_stat_count_recorded));
-#endif
+#endif /* not __GNUC__ && (__i386__ || __x86_64__) */
}
/*
-** It's convenient that this instruction is the same on both i386 and amd64
+** It's convenient that this instruction is the same on both i386 and x86_64
*/
-#if defined(__GNUC__) && (defined(__i386__) || defined(__amd64__))
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
static __inline__ void
MR_cpuid(MR_Unsigned code, MR_Unsigned sub_code,
@@ -348,7 +355,7 @@ MR_rdtsc(MR_uint_least64_t *tsc) {
*tsc |= tsc_high;
}
-#endif
+#endif /* __GNUC__ && (__i386__ || __x86_64__) */
-#endif
+#endif /* MR_THREAD_SAFE && MR_PROFILE_PARALLEL_EXECUTION_SUPPORT */
Index: runtime/mercury_atomic_ops.h
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_atomic_ops.h,v
retrieving revision 1.5
diff -u -p -b -r1.5 mercury_atomic_ops.h
--- runtime/mercury_atomic_ops.h 16 Aug 2009 10:47:55 -0000 1.5
+++ runtime/mercury_atomic_ops.h 24 Aug 2009 05:01:53 -0000
@@ -8,7 +8,8 @@
*/
/*
-** mercury_atomic.h - defines atomic operations.
+** mercury_atomic.h - defines atomic operations and other primitives used by
+** the parallel runtime.
*/
#ifndef MERCURY_ATOMIC_OPS_H
@@ -16,20 +17,15 @@
#include "mercury_std.h"
-/*
-** AMD say that __amd64__ is defined by the compiler for 64bit platforms,
-** Intel say that __x86_64__ is the correct macro. Really these refer to
-** the same thing that is simply branded differently, we use __amd64__ below
-** and define it if necessary ourselves.
-*/
-#if defined(__x86_64__) && !defined(__amd64__)
-#define __amd64__
-#endif
-
/*---------------------------------------------------------------------------*/
+
#if defined(MR_LL_PARALLEL_CONJ)
/*
+** Declarations for inline atomic operations.
+*/
+
+/*
** If the value at addr is equal to old, assign new to addr and return true.
** Otherwise return false.
*/
@@ -47,7 +43,7 @@ MR_compare_and_swap_word(volatile MR_Int
return __sync_bool_compare_and_swap(addr, old, new_val); \
} while (0)
-#elif defined(__GNUC__) && defined(__amd64__)
+#elif defined(__GNUC__) && defined(__x86_64__)
#define MR_COMPARE_AND_SWAP_WORD_BODY \
do { \
@@ -95,7 +91,7 @@ MR_compare_and_swap_word(volatile MR_Int
MR_EXTERN_INLINE void
MR_atomic_inc_int(volatile MR_Integer *addr);
-#if defined(__GNUC__) && defined(__amd64__)
+#if defined(__GNUC__) && defined(__x86_64__)
#define MR_ATOMIC_INC_INT_BODY \
do { \
@@ -148,7 +144,7 @@ MR_atomic_inc_int(volatile MR_Integer *a
MR_EXTERN_INLINE void
MR_atomic_dec_int(volatile MR_Integer *addr);
-#if defined(__GNUC__) && defined(__amd64__)
+#if defined(__GNUC__) && defined(__x86_64__)
#define MR_ATOMIC_DEC_INT_BODY \
do { \
@@ -191,7 +187,7 @@ MR_atomic_dec_int(volatile MR_Integer *a
MR_EXTERN_INLINE void
MR_atomic_add_int(volatile MR_Integer *addr, MR_Integer addend);
-#if defined(__GNUC__) && defined(__amd64__)
+#if defined(__GNUC__) && defined(__x86_64__)
#define MR_ATOMIC_ADD_INT_BODY \
do { \
@@ -233,7 +229,7 @@ MR_atomic_add_int(volatile MR_Integer *a
MR_EXTERN_INLINE void
MR_atomic_sub_int(volatile MR_Integer *addr, MR_Integer x);
-#if defined(__GNUC__) && defined(__amd64__)
+#if defined(__GNUC__) && defined(__x86_64__)
#define MR_ATOMIC_SUB_INT_BODY \
do { \
@@ -294,7 +290,7 @@ MR_atomic_sub_int(volatile MR_Integer *a
* References: Intel and AMD documentation for PAUSE, Intel optimisation
* guide.
*/
-#if defined(__GNUC__) && ( defined(__i386__) || defined(__amd64__) )
+#if defined(__GNUC__) && ( defined(__i386__) || defined(__x86_64__) )
#define MR_ATOMIC_PAUSE \
do { \
@@ -323,6 +319,10 @@ MR_atomic_sub_int(volatile MR_Integer *a
#if defined(MR_THREAD_SAFE) && defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT)
+/*
+** Declarations for profiling the parallel runtime.
+*/
+
typedef struct {
MR_Unsigned MR_stat_count_recorded;
MR_Unsigned MR_stat_count_not_recorded;
@@ -344,7 +344,7 @@ typedef struct {
} MR_Timer;
/*
-** Configure the profiling stats code. On i386 and amd64 machines this uses
+** Configure the profiling stats code. On i386 and x86_64 machines this uses
** CPUID to determine if the RDTSCP instruction is available and not prohibited
** by the OS.
*/
@@ -363,7 +363,7 @@ MR_profiling_start_timer(MR_Timer *timer
extern void
MR_profiling_stop_timer(MR_Timer *timer, MR_Stats *stats);
-#endif /* MR_THREAD_SAFE && MR_PROFILE_PARALLEL_EXECUTION */
+#endif /* MR_THREAD_SAFE && MR_PROFILE_PARALLEL_EXECUTION_SUPPORT */
/*---------------------------------------------------------------------------*/
Index: runtime/mercury_conf.h.in
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_conf.h.in,v
retrieving revision 1.65
diff -u -p -b -r1.65 mercury_conf.h.in
--- runtime/mercury_conf.h.in 23 Aug 2009 22:52:35 -0000 1.65
+++ runtime/mercury_conf.h.in 5 Nov 2009 04:12:08 -0000
@@ -1,4 +1,7 @@
/*
+** vim:ts=8 sw=8 noexpandtab
+*/
+/*
** Copyright (C) 1995-2003, 2005-2009 The University of Melbourne.
** This file may only be copied under the terms of the GNU Library General
** Public License - see the file COPYING.LIB in the Mercury distribution.
@@ -362,14 +365,6 @@
#undef MR_HAVE_SIGCONTEXT_STRUCT_2ARG
/*
-** These specify weather the given C macros are defined.
-**
-** MR_HAVE__SC_NPROCESSORS_ONLN, This is defined as a parameter for sysconf to
-** determine the number of processors online.
-*/
-#undef MR_HAVE__SC_NPROCESSORS_ONLN
-
-/*
** For debugging purposes, if we get a fatal signal, we print out the
** program counter (PC) at which the signal occurred.
**
Index: runtime/mercury_context.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_context.c,v
retrieving revision 1.68
diff -u -p -b -r1.68 mercury_context.c
--- runtime/mercury_context.c 23 Aug 2009 22:52:35 -0000 1.68
+++ runtime/mercury_context.c 5 Nov 2009 04:16:24 -0000
@@ -170,7 +170,7 @@ MR_init_thread_stuff(void)
*/
if (MR_num_threads == 0)
{
-#if defined(MR_HAVE_SYSCONF) && defined(MR_HAVE__SC_NPROCESSORS_ONLN)
+#if defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
long result;
result = sysconf(_SC_NPROCESSORS_ONLN);
@@ -179,19 +179,24 @@ MR_init_thread_stuff(void)
MR_num_threads = 1;
} else {
MR_num_threads = result;
- /* On systems that don't support sched_setaffinity we don't try to
+ /*
+ ** On systems that don't support sched_setaffinity we don't try to
** automatically enable thread pinning. This prevents a runtime
** warning that could unnecessarily confuse the user.
**/
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_SCHED_SETAFFINITY)
- MR_thread_pinning = MR_TRUE;
+ /*
+ ** Comment this back in to enable thread pinning by default if we
+ ** autodetected the correct number of CPUs.
+ */
+ /* MR_thread_pinning = MR_TRUE; */
#endif
}
-#else
+#else /* ! defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) */
MR_num_threads = 1;
-#endif
+#endif /* ! defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) */
}
-#endif
+#endif /* MR_THREAD_SAFE */
}
void
@@ -200,7 +205,7 @@ MR_pin_thread(void)
#if defined(MR_THREAD_SAFE) && defined(MR_LL_PARALLEL_CONJ) && \
defined(MR_HAVE_SCHED_SETAFFINITY)
MR_LOCK(&MR_next_cpu_lock, "MR_pin_thread");
- if (MR_thread_pinning == MR_TRUE) {
+ if (MR_thread_pinning) {
cpu_set_t cpus;
if (MR_next_cpu < CPU_SETSIZE) {
@@ -211,19 +216,21 @@ MR_pin_thread(void)
MR_next_cpu++;
} else {
perror("Warning: Couldn't set CPU affinity");
- /* if this failed once it will probably fail again so disable
- ** it.
+ /*
+ ** If this failed once it will probably fail again so we
+ ** disable it.
*/
MR_thread_pinning = MR_FALSE;
}
} else {
- perror("Warning: Couldn't set CPU affinity due to a static " \
+ perror("Warning: Couldn't set CPU affinity due to a static "
"system limit");
MR_thread_pinning = MR_FALSE;
}
}
MR_UNLOCK(&MR_next_cpu_lock, "MR_pin_thread");
-#endif
+#endif /* defined(MR_THREAD_SAFE) && defined(MR_LL_PARALLEL_CONJ) && \
+ defined(MR_HAVE_SCHED_SETAFFINITY) */
}
void
@@ -364,7 +371,7 @@ fprint_stats(FILE *stream, const char *m
}
};
-#endif
+#endif /* defined(MR_THREAD_SAFE) && defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT) */
static void
MR_init_context_maybe_generator(MR_Context *c, const char *id,
Index: runtime/mercury_context.h
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_context.h,v
retrieving revision 1.53
diff -u -p -b -r1.53 mercury_context.h
--- runtime/mercury_context.h 23 Aug 2009 22:52:35 -0000 1.53
+++ runtime/mercury_context.h 24 Aug 2009 00:37:46 -0000
@@ -460,7 +460,7 @@ extern void MR_destroy_context(M
/*
** MR_init_thread_stuff() initializes the lock structures for the runqueue,
-** and detect the number of threads to use on the LLC backend.
+** and detects the number of threads to use on the LLC backend.
*/
extern void MR_init_thread_stuff(void);
Index: runtime/mercury_wrapper.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_wrapper.c,v
retrieving revision 1.200
diff -u -p -b -r1.200 mercury_wrapper.c
--- runtime/mercury_wrapper.c 30 Oct 2009 03:33:30 -0000 1.200
+++ runtime/mercury_wrapper.c 5 Nov 2009 03:59:47 -0000
@@ -290,7 +290,7 @@ static char *MR_mem_usage_report
static int MR_num_output_args = 0;
/*
-** This is initialized to zero, if it is still zero after configuration of the
+** This is initialized to zero. If it is still zero after configuration of the
** runtime but before threads are started then the number of processors on the
** system is detected and used if support is available. Otherwise we fall back
** to 1
@@ -1736,6 +1736,7 @@ MR_process_options(int argc, char **argv
#if defined(MR_THREAD_SAFE) && defined(MR_LL_PARALLEL_CONJ)
MR_thread_pinning = MR_TRUE;
#endif
+ break;
case MR_PROFILE_PARALLEL_EXECUTION:
#if defined(MR_THREAD_SAFE) && defined(MR_PROFILE_PARALLEL_EXECUTION_SUPPORT)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 489 bytes
Desc: Digital signature
URL: <http://lists.mercurylang.org/archives/reviews/attachments/20091105/7a853821/attachment.sig>
More information about the reviews
mailing list