[m-rev.] diff: Change some default parallelism-related settings.
Paul Bone
pbone at csse.unimelb.edu.au
Wed Apr 13 16:31:05 AEST 2011
Adjust some default settings for automatic parallelism, and parallel runtime
options.
deep_profiler/mdprof_feedback.m:
Enable parallelisation of dependant conjunctions by default.
Enable the --report option by default.
Increase the desired amount of parallelism from 4.0 to 8.0.
Reduce the cost of waits from 250csc to 200csc
Reduce the clique cost threashold from 100,000csc to 2,000csc
Reduce the call site cost threashold from 50,000csc to 2,000csc
runtime/mercury_context.h:
runtime/mercury_context.c:
runtime/mercury_wrapper.c:
doc/user_guide.texi:
Enable thread pinning by default.
Modify how the thread pinning option is handled.
runtime/mercury_wrapper.c:
Change the default for --max-contexts-per-thread so that it is much more
generous in stack segment grades.
Index: deep_profiler/mdprof_feedback.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/mdprof_feedback.m,v
retrieving revision 1.35
diff -u -p -b -r1.35 mdprof_feedback.m
--- deep_profiler/mdprof_feedback.m 27 Jan 2011 08:03:53 -0000 1.35
+++ deep_profiler/mdprof_feedback.m 13 Apr 2011 06:27:47 -0000
@@ -257,8 +257,9 @@ help_message =
--debug-read-profile
Generate debugging messages when reading the deep profile
and creating the deep structure.
- -r --report Display a report about the feedback information in the file
- after any processing has been done.
+ --no-report Suppress the report about the feedback information in the
+ file that is usually displayed after any processing has
+ been done.
The following options select sets of feedback information useful
for particular compiler optimizations:
@@ -300,10 +301,8 @@ help_message =
--implicit-parallelism-call-site-cost-threshold <value>
The cost of a call site to be considered for parallelism
against another call site.
- --implicit-parallelism-dependant-conjunctions
- Advise the compiler to parallelise dependant conjunctions.
- This will become the default once the implementation is
- complete.
+ --no-implicit-parallelism-dependant-conjunctions
+ Disable parallelisation of dependant conjunctions.
--implicit-parallelism-dependant-conjunctions-algorithm <alg>
Choose the algorithm that is used to estimate the speedup for
dependant calculations. The algorithms are:
@@ -436,7 +435,6 @@ read_deep_file(Input, Debug, MaybeDeep,
short('h', help).
short('v', verbosity).
short('V', version).
-short('r', report).
:- pred long(string::in, option::out) is semidet.
@@ -491,7 +489,7 @@ defaults(help, bool(no)
defaults(verbosity, int(2)).
defaults(version, bool(no)).
defaults(debug_read_profile, bool(no)).
-defaults(report, bool(no)).
+defaults(report, bool(yes)).
defaults(calls_above_threshold_sorted, bool(no)).
defaults(calls_above_threshold_sorted_measure, string("mean")).
@@ -499,7 +497,7 @@ defaults(calls_above_threshold_sorted_me
defaults(candidate_parallel_conjunctions, bool(no)).
defaults(implicit_parallelism, bool(no)).
-defaults(desired_parallelism, string("4.0")).
+defaults(desired_parallelism, string("8.0")).
% XXX: These values have been chosen arbitrarily, appropriately values should
% be tested for.
defaults(implicit_parallelism_intermodule_var_use, bool(no)).
@@ -507,11 +505,11 @@ defaults(implicit_parallelism_sparking_c
defaults(implicit_parallelism_sparking_delay, int(1000)).
defaults(implicit_parallelism_barrier_cost, int(100)).
defaults(implicit_parallelism_future_signal_cost, int(100)).
-defaults(implicit_parallelism_future_wait_cost, int(250)).
+defaults(implicit_parallelism_future_wait_cost, int(200)).
defaults(implicit_parallelism_context_wakeup_delay, int(1000)).
-defaults(implicit_parallelism_clique_cost_threshold, int(100000)).
-defaults(implicit_parallelism_call_site_cost_threshold, int(50000)).
-defaults(implicit_parallelism_dependant_conjunctions, bool(no)).
+defaults(implicit_parallelism_clique_cost_threshold, int(2000)).
+defaults(implicit_parallelism_call_site_cost_threshold, int(2000)).
+defaults(implicit_parallelism_dependant_conjunctions, bool(yes)).
defaults(implicit_parallelism_dependant_conjunctions_algorithm,
string("overlap")).
defaults(implicit_parallelism_best_parallelisation_algorithm,
Index: doc/user_guide.texi
===================================================================
RCS file: /home/mercury1/repository/mercury/doc/user_guide.texi,v
retrieving revision 1.624
diff -u -p -b -r1.624 user_guide.texi
--- doc/user_guide.texi 5 Apr 2011 10:27:25 -0000 1.624
+++ doc/user_guide.texi 13 Apr 2011 06:27:53 -0000
@@ -9980,17 +9980,14 @@ grade.
@c parallel, threadscope grade.
@sp 1
- at item --thread-pinning
- at findex --thread-pinning
-Request that the runtime system attempts to pin Mercury engines (POSIX threads)
+ at item --no-thread-pinning
+ at findex --no-thread-pinning
+Prevent the runtime system from pinning Mercury engines (POSIX threads)
to CPU cores/hardware threads.
This only has an effect if the executable was built in a parallel low-level C
grade.
-This is disabled by default but may be enabled by default in the future.
- at c In case this is enabled by default the following comment is relevant.
- at c This is disabled by default unless @samp{-P @var{num}} is not specified and the
- at c runtime system is able to detect the number of processors enabled by the
- at c operating system.
+Manually specifying -P will also disable thread pinning if it is not explicitly
+enabled.
@c @item -r @var{num}
@c @findex -r (runtime option)
Index: runtime/mercury_context.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_context.c,v
retrieving revision 1.88
diff -u -p -b -r1.88 mercury_context.c
--- runtime/mercury_context.c 5 Apr 2011 10:27:26 -0000 1.88
+++ runtime/mercury_context.c 13 Apr 2011 06:27:52 -0000
@@ -115,7 +115,8 @@ static MR_Integer MR_profile_paral
*/
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_SCHED_SETAFFINITY)
static MercuryLock MR_next_cpu_lock;
-MR_bool MR_thread_pinning = MR_FALSE;
+MR_bool MR_thread_pinning_configured = MR_TRUE;
+MR_bool MR_thread_pinning_in_use;
static MR_Unsigned MR_next_cpu = 0;
#ifdef MR_HAVE_SCHED_GETCPU
static MR_Integer MR_primordial_thread_cpu = -1;
@@ -230,23 +231,17 @@ MR_init_thread_stuff(void)
if (result < 1) {
/* We couldn't determine the number of processors. */
MR_num_threads = 1;
+ #ifdef MR_LL_PARALLEL_CONJ
+ MR_thread_pinning_in_use = MR_FALSE;
+ #endif
} else {
MR_num_threads = result;
- /*
- ** On systems that don't support sched_setaffinity we don't try to
- ** automatically enable thread pinning. This prevents a runtime
- ** warning that could unnecessarily confuse the user.
- **/
- #if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_SCHED_SETAFFINITY)
- /*
- ** Comment this back in to enable thread pinning by default if we
- ** autodetected the correct number of CPUs.
- */
- /* MR_thread_pinning = MR_TRUE; */
- #endif
}
#else /* ! defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) */
MR_num_threads = 1;
+ #ifdef MR_LL_PARALLEL_CONJ
+ MR_thread_pinning_in_use = MR_FALSE;
+ #endif
#endif /* ! defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) */
}
#ifdef MR_LL_PARALLEL_CONJ
@@ -270,7 +265,7 @@ MR_pin_primordial_thread(void)
** We don't need locking to pin the primordial thread as it is called
** before any other threads exist.
*/
- if (MR_thread_pinning) {
+ if (MR_thread_pinning_configured && MR_thread_pinning_in_use) {
MR_primordial_thread_cpu = sched_getcpu();
if (MR_primordial_thread_cpu == -1) {
perror("Warning: unable to determine the current CPU for "
@@ -293,7 +288,7 @@ MR_pin_thread(void)
{
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_SCHED_SETAFFINITY)
MR_LOCK(&MR_next_cpu_lock, "MR_pin_thread");
- if (MR_thread_pinning) {
+ if (MR_thread_pinning_configured && MR_thread_pinning_in_use) {
#if defined(MR_HAVE_SCHED_GETCPU)
if (MR_next_cpu == MR_primordial_thread_cpu) {
MR_next_cpu++;
@@ -321,12 +316,12 @@ MR_do_pin_thread(int cpu)
** If this failed once, it will probably fail again, so we
** disable it.
*/
- MR_thread_pinning = MR_FALSE;
+ MR_thread_pinning_in_use = MR_FALSE;
}
} else {
perror("Warning: Couldn't set CPU affinity due to a static "
"system limit: ");
- MR_thread_pinning = MR_FALSE;
+ MR_thread_pinning_in_use = MR_FALSE;
}
}
#endif
Index: runtime/mercury_context.h
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_context.h,v
retrieving revision 1.64
diff -u -p -b -r1.64 mercury_context.h
--- runtime/mercury_context.h 5 Apr 2011 10:27:26 -0000 1.64
+++ runtime/mercury_context.h 13 Apr 2011 06:27:52 -0000
@@ -354,7 +354,8 @@ extern MR_Context *MR_runqueue_tai
extern MercuryCond MR_runqueue_cond;
#endif
#ifdef MR_LL_PARALLEL_CONJ
- extern MR_bool MR_thread_pinning;
+ extern MR_bool MR_thread_pinning_configured;
+ extern MR_bool MR_thread_pinning_in_use;
#endif
#ifdef MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
Index: runtime/mercury_wrapper.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_wrapper.c,v
retrieving revision 1.218
diff -u -p -b -r1.218 mercury_wrapper.c
--- runtime/mercury_wrapper.c 5 Apr 2011 10:27:26 -0000 1.218
+++ runtime/mercury_wrapper.c 13 Apr 2011 06:28:06 -0000
@@ -203,8 +203,17 @@ size_t MR_stack_margin_size = 128;
/* primary cache size to optimize for, in bytes */
size_t MR_pcache_size = 8192;
-/* soft limits on the number of contexts we can create */
+/*
+** Limits on the number of contexts we can create.
+** These allow 64MB of det stacks regardless of which grade is being used.
+** Where sizeof(MR_Word) 8 and the detstack is 64 and 4098 Kwords big for stseg
+** and non stseg grades.
+*/
+#ifdef MR_STACK_SEGMENTS
+MR_Unsigned MR_max_contexts_per_thread = 128;
+#else
MR_Unsigned MR_max_contexts_per_thread = 2;
+#endif
MR_Unsigned MR_max_outstanding_contexts;
#ifdef MR_LL_PARALLEL_CONJ
@@ -1402,7 +1411,7 @@ struct MR_option MR_long_opts[] = {
MR_RUNTIME_GRANULAITY_WSDEQUE_LENGTH_FACTOR },
{ "worksteal-max-attempts", 1, 0, MR_WORKSTEAL_MAX_ATTEMPTS },
{ "worksteal-sleep-msecs", 1, 0, MR_WORKSTEAL_SLEEP_MSECS },
- { "thread-pinning", 0, 0, MR_THREAD_PINNING },
+ { "no-thread-pinning", 0, 0, MR_THREAD_PINNING },
{ "profile-parallel-execution", 0, 0, MR_PROFILE_PARALLEL_EXECUTION },
{ "mdb-tty", 1, 0, MR_MDB_TTY },
{ "mdb-in", 1, 0, MR_MDB_IN },
@@ -1859,7 +1868,7 @@ MR_process_options(int argc, char **argv
case MR_THREAD_PINNING:
#if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_SCHED_SETAFFINITY)
- MR_thread_pinning = MR_TRUE;
+ MR_thread_pinning_configured = MR_FALSE;
#endif
break;
@@ -2283,6 +2292,7 @@ MR_process_options(int argc, char **argv
if (MR_num_threads < 1) {
MR_usage();
}
+ MR_thread_pinning_in_use = MR_FALSE;
#endif
break;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 489 bytes
Desc: Digital signature
URL: <http://lists.mercurylang.org/archives/reviews/attachments/20110413/34aefa01/attachment.sig>
More information about the reviews
mailing list