[m-rev.] diff: Change some default parallelism-related settings.

Paul Bone pbone at csse.unimelb.edu.au
Wed Apr 13 16:31:05 AEST 2011


Adjust some default settings for automatic parallelism, and parallel runtime
options.

deep_profiler/mdprof_feedback.m:
    Enable parallelisation of dependant conjunctions by default.

    Enable the --report option by default.

    Increase the desired amount of parallelism from 4.0 to 8.0.

    Reduce the cost of waits from 250csc to 200csc

    Reduce the clique cost threashold from 100,000csc to 2,000csc

    Reduce the call site cost threashold from 50,000csc to 2,000csc

runtime/mercury_context.h:
runtime/mercury_context.c:
runtime/mercury_wrapper.c:
doc/user_guide.texi:
    Enable thread pinning by default.

    Modify how the thread pinning option is handled.

runtime/mercury_wrapper.c:
    Change the default for --max-contexts-per-thread so that it is much more
    generous in stack segment grades.

Index: deep_profiler/mdprof_feedback.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/mdprof_feedback.m,v
retrieving revision 1.35
diff -u -p -b -r1.35 mdprof_feedback.m
--- deep_profiler/mdprof_feedback.m	27 Jan 2011 08:03:53 -0000	1.35
+++ deep_profiler/mdprof_feedback.m	13 Apr 2011 06:27:47 -0000
@@ -257,8 +257,9 @@ help_message =
     --debug-read-profile
                     Generate debugging messages when reading the deep profile
                     and creating the deep structure.
-    -r --report     Display a report about the feedback information in the file
-                    after any processing has been done.
+    --no-report     Suppress the report about the feedback information in the
+                    file that is usually displayed after any processing has
+                    been done.
 
     The following options select sets of feedback information useful
     for particular compiler optimizations:
@@ -300,10 +301,8 @@ help_message =
     --implicit-parallelism-call-site-cost-threshold <value>
                 The cost of a call site to be considered for parallelism
                 against another call site.
-    --implicit-parallelism-dependant-conjunctions
-                Advise the compiler to parallelise dependant conjunctions.
-                This will become the default once the implementation is
-                complete.
+    --no-implicit-parallelism-dependant-conjunctions
+                Disable parallelisation of dependant conjunctions.
     --implicit-parallelism-dependant-conjunctions-algorithm <alg>
                 Choose the algorithm that is used to estimate the speedup for
                 dependant calculations.  The algorithms are:
@@ -436,7 +435,6 @@ read_deep_file(Input, Debug, MaybeDeep, 
 short('h',  help).
 short('v',  verbosity).
 short('V',  version).
-short('r',  report).
 
 :- pred long(string::in, option::out) is semidet.
 
@@ -491,7 +489,7 @@ defaults(help,                  bool(no)
 defaults(verbosity,             int(2)).
 defaults(version,               bool(no)).
 defaults(debug_read_profile,    bool(no)).
-defaults(report,                bool(no)).
+defaults(report,                bool(yes)).
 
 defaults(calls_above_threshold_sorted,                      bool(no)).
 defaults(calls_above_threshold_sorted_measure,              string("mean")).
@@ -499,7 +497,7 @@ defaults(calls_above_threshold_sorted_me
 defaults(candidate_parallel_conjunctions,                   bool(no)).
 
 defaults(implicit_parallelism,                              bool(no)).
-defaults(desired_parallelism,                               string("4.0")).
+defaults(desired_parallelism,                               string("8.0")).
 % XXX: These values have been chosen arbitrarily, appropriately values should
 % be tested for.
 defaults(implicit_parallelism_intermodule_var_use,          bool(no)).
@@ -507,11 +505,11 @@ defaults(implicit_parallelism_sparking_c
 defaults(implicit_parallelism_sparking_delay,               int(1000)).
 defaults(implicit_parallelism_barrier_cost,                 int(100)).
 defaults(implicit_parallelism_future_signal_cost,           int(100)).
-defaults(implicit_parallelism_future_wait_cost,             int(250)).
+defaults(implicit_parallelism_future_wait_cost,             int(200)).
 defaults(implicit_parallelism_context_wakeup_delay,         int(1000)).
-defaults(implicit_parallelism_clique_cost_threshold,        int(100000)).
-defaults(implicit_parallelism_call_site_cost_threshold,     int(50000)).
-defaults(implicit_parallelism_dependant_conjunctions,       bool(no)).
+defaults(implicit_parallelism_clique_cost_threshold,        int(2000)).
+defaults(implicit_parallelism_call_site_cost_threshold,     int(2000)).
+defaults(implicit_parallelism_dependant_conjunctions,       bool(yes)).
 defaults(implicit_parallelism_dependant_conjunctions_algorithm,
     string("overlap")).
 defaults(implicit_parallelism_best_parallelisation_algorithm,
Index: doc/user_guide.texi
===================================================================
RCS file: /home/mercury1/repository/mercury/doc/user_guide.texi,v
retrieving revision 1.624
diff -u -p -b -r1.624 user_guide.texi
--- doc/user_guide.texi	5 Apr 2011 10:27:25 -0000	1.624
+++ doc/user_guide.texi	13 Apr 2011 06:27:53 -0000
@@ -9980,17 +9980,14 @@ grade.
 @c parallel, threadscope grade.
 
 @sp 1
- at item --thread-pinning
- at findex --thread-pinning
-Request that the runtime system attempts to pin Mercury engines (POSIX threads)
+ at item --no-thread-pinning
+ at findex --no-thread-pinning
+Prevent the runtime system from pinning Mercury engines (POSIX threads)
 to CPU cores/hardware threads.
 This only has an effect if the executable was built in a parallel low-level C
 grade.
-This is disabled by default but may be enabled by default in the future.
- at c In case this is enabled by default the following comment is relevant.
- at c This is disabled by default unless @samp{-P @var{num}} is not specified and the
- at c runtime system is able to detect the number of processors enabled by the
- at c operating system.
+Manually specifying -P will also disable thread pinning if it is not explicitly
+enabled.
 
 @c @item -r @var{num}
 @c @findex -r (runtime option)
Index: runtime/mercury_context.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_context.c,v
retrieving revision 1.88
diff -u -p -b -r1.88 mercury_context.c
--- runtime/mercury_context.c	5 Apr 2011 10:27:26 -0000	1.88
+++ runtime/mercury_context.c	13 Apr 2011 06:27:52 -0000
@@ -115,7 +115,8 @@ static MR_Integer       MR_profile_paral
 */
 #if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_SCHED_SETAFFINITY)
 static MercuryLock      MR_next_cpu_lock;
-MR_bool                 MR_thread_pinning = MR_FALSE;
+MR_bool                 MR_thread_pinning_configured = MR_TRUE;
+MR_bool                 MR_thread_pinning_in_use;
 static MR_Unsigned      MR_next_cpu = 0;
   #ifdef  MR_HAVE_SCHED_GETCPU
 static MR_Integer       MR_primordial_thread_cpu = -1;
@@ -230,23 +231,17 @@ MR_init_thread_stuff(void)
         if (result < 1) {
             /* We couldn't determine the number of processors. */
             MR_num_threads = 1;
+  #ifdef MR_LL_PARALLEL_CONJ
+            MR_thread_pinning_in_use = MR_FALSE;
+  #endif
         } else {
             MR_num_threads = result;
-            /*
-            ** On systems that don't support sched_setaffinity we don't try to
-            ** automatically enable thread pinning. This prevents a runtime
-            ** warning that could unnecessarily confuse the user.
-            **/
-    #if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_SCHED_SETAFFINITY)
-            /*
-            ** Comment this back in to enable thread pinning by default if we
-            ** autodetected the correct number of CPUs.
-            */
-            /* MR_thread_pinning = MR_TRUE; */
-    #endif
         }
     #else /* ! defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) */
         MR_num_threads = 1;
+  #ifdef MR_LL_PARALLEL_CONJ
+        MR_thread_pinning_in_use = MR_FALSE;
+  #endif
   #endif /* ! defined(MR_HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN) */
     }
   #ifdef MR_LL_PARALLEL_CONJ
@@ -270,7 +265,7 @@ MR_pin_primordial_thread(void)
     ** We don't need locking to pin the primordial thread as it is called
     ** before any other threads exist.
     */
-    if (MR_thread_pinning) {
+    if (MR_thread_pinning_configured && MR_thread_pinning_in_use) {
         MR_primordial_thread_cpu = sched_getcpu();
         if (MR_primordial_thread_cpu == -1) {
             perror("Warning: unable to determine the current CPU for "
@@ -293,7 +288,7 @@ MR_pin_thread(void)
 {
 #if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_SCHED_SETAFFINITY)
     MR_LOCK(&MR_next_cpu_lock, "MR_pin_thread");
-    if (MR_thread_pinning) {
+    if (MR_thread_pinning_configured && MR_thread_pinning_in_use) {
 #if defined(MR_HAVE_SCHED_GETCPU)
         if (MR_next_cpu == MR_primordial_thread_cpu) {
             MR_next_cpu++;
@@ -321,12 +316,12 @@ MR_do_pin_thread(int cpu)
             ** If this failed once, it will probably fail again, so we
             ** disable it.
             */
-            MR_thread_pinning = MR_FALSE;
+            MR_thread_pinning_in_use = MR_FALSE;
         }
     } else {
         perror("Warning: Couldn't set CPU affinity due to a static "
             "system limit: ");
-        MR_thread_pinning = MR_FALSE;
+        MR_thread_pinning_in_use = MR_FALSE;
     }
 }
 #endif
Index: runtime/mercury_context.h
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_context.h,v
retrieving revision 1.64
diff -u -p -b -r1.64 mercury_context.h
--- runtime/mercury_context.h	5 Apr 2011 10:27:26 -0000	1.64
+++ runtime/mercury_context.h	13 Apr 2011 06:27:52 -0000
@@ -354,7 +354,8 @@ extern      MR_Context  *MR_runqueue_tai
   extern    MercuryCond MR_runqueue_cond;
 #endif
 #ifdef  MR_LL_PARALLEL_CONJ
-  extern    MR_bool         MR_thread_pinning;
+  extern    MR_bool         MR_thread_pinning_configured;
+  extern    MR_bool         MR_thread_pinning_in_use;
 #endif
 
 #ifdef MR_PROFILE_PARALLEL_EXECUTION_SUPPORT
Index: runtime/mercury_wrapper.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_wrapper.c,v
retrieving revision 1.218
diff -u -p -b -r1.218 mercury_wrapper.c
--- runtime/mercury_wrapper.c	5 Apr 2011 10:27:26 -0000	1.218
+++ runtime/mercury_wrapper.c	13 Apr 2011 06:28:06 -0000
@@ -203,8 +203,17 @@ size_t      MR_stack_margin_size = 128;
 /* primary cache size to optimize for, in bytes */
 size_t      MR_pcache_size = 8192;
 
-/* soft limits on the number of contexts we can create */
+/*
+** Limits on the number of contexts we can create.
+** These allow 64MB of det stacks regardless of which grade is being used.
+** Where sizeof(MR_Word) 8 and the detstack is 64 and 4098 Kwords big for stseg
+** and non stseg grades.
+*/
+#ifdef MR_STACK_SEGMENTS
+MR_Unsigned MR_max_contexts_per_thread = 128;
+#else
 MR_Unsigned MR_max_contexts_per_thread = 2;
+#endif
 MR_Unsigned MR_max_outstanding_contexts;
 
 #ifdef MR_LL_PARALLEL_CONJ
@@ -1402,7 +1411,7 @@ struct MR_option MR_long_opts[] = {
         MR_RUNTIME_GRANULAITY_WSDEQUE_LENGTH_FACTOR },
     { "worksteal-max-attempts",         1, 0, MR_WORKSTEAL_MAX_ATTEMPTS },
     { "worksteal-sleep-msecs",          1, 0, MR_WORKSTEAL_SLEEP_MSECS },
-    { "thread-pinning",                 0, 0, MR_THREAD_PINNING },
+    { "no-thread-pinning",              0, 0, MR_THREAD_PINNING },
     { "profile-parallel-execution",     0, 0, MR_PROFILE_PARALLEL_EXECUTION },
     { "mdb-tty",                        1, 0, MR_MDB_TTY },
     { "mdb-in",                         1, 0, MR_MDB_IN },
@@ -1859,7 +1868,7 @@ MR_process_options(int argc, char **argv
 
             case MR_THREAD_PINNING:
 #if defined(MR_LL_PARALLEL_CONJ) && defined(MR_HAVE_SCHED_SETAFFINITY)
-                MR_thread_pinning = MR_TRUE;
+                MR_thread_pinning_configured = MR_FALSE;
 #endif
                 break;
 
@@ -2283,6 +2292,7 @@ MR_process_options(int argc, char **argv
                 if (MR_num_threads < 1) {
                     MR_usage();
                 }
+                MR_thread_pinning_in_use = MR_FALSE;
 #endif
                 break;
 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 489 bytes
Desc: Digital signature
URL: <http://lists.mercurylang.org/archives/reviews/attachments/20110413/34aefa01/attachment.sig>


More information about the reviews mailing list