[m-rev.] for review: reduce accesses to thread-specific variable

Peter Wang wangp at students.cs.mu.OZ.AU
Fri Jul 7 18:20:17 AEST 2006


On saturn this change makes asm_jump.par.gc 7% slower than asm_jump.gc,
rather than 21% slower as it was before (according to speedtest).


Estimated hours taken: 5
Branches: main

In low-level, parallel grades which don't use gcc global registers, each
reference to a Mercury register (etc.) references a thread-specific Mercury
engine base address `MR_thread_engine_base'.  This is either a thread-specific
variable or a call to pthread_getspecific().  This patch improves performance
in these grades by copying `MR_thread_engine_base' once at the start of each
basic block into a local variable, then using the local copy for the rest of
the basic block.

compiler/options.m:
	Add `--local-thread-engine-base' option, enabled by default.

compiler/llds_out.m:
	When `--local-thread-enabled-base' is enabled, emit code to redefine
	`MR_maybe_local_thread_engine_base' to `MR_local_thread_engine_base'
	before the code of a procedure, and undo it afterwards.

	After each label instruction initialise a local variable
	`MR_local_thread_engine_base' with a copy of `MR_thread_engine_base'.

runtime/mercury_engine.h:
	If gcc global registers are not used then #define `MR_engine_base' in
	terms of `MR_maybe_local_thread_engine_base', and initially #define
	`MR_maybe_local_thread_engine_base' to `MR_thread_engine_base'.


Index: ./compiler/options.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/options.m,v
retrieving revision 1.517
diff -u -r1.517 options.m
--- ./compiler/options.m	8 Jun 2006 08:19:26 -0000	1.517
+++ ./compiler/options.m	7 Jul 2006 04:24:18 -0000
@@ -602,6 +602,7 @@
     ;       emit_c_loops
     ;       procs_per_c_function
     ;       everything_in_one_c_function
+    ;       local_thread_engine_base
 
     %   - IL
     %   (none yet)
@@ -1306,7 +1307,8 @@
     use_macro_for_redo_fail             -   bool(no),
     emit_c_loops                        -   bool(no),
     procs_per_c_function                -   int(1),
-    everything_in_one_c_function        -   special
+    everything_in_one_c_function        -   special,
+    local_thread_engine_base            -   bool(yes)
 ]).
 option_defaults_2(target_code_compilation_option, [
     % Target code compilation options
@@ -2069,6 +2071,7 @@
 long_option("everything-in-one-c-function", everything_in_one_c_function).
 long_option("everything-in-one-C-function", everything_in_one_c_function).
 long_option("inline-alloc",         inline_alloc).
+long_option("local-thread-engine-base", local_thread_engine_base).
 
 % Target code compilation options
 long_option("target-debug",         target_debug).
@@ -4235,7 +4238,11 @@
         "\tThis option has the effect of putting the code for all",
         "\tthe Mercury procedures in a single C function,",
         "\twhich produces the most efficient code but tends to",
-        "\tseverely stress the C compiler on large modules."
+        "\tseverely stress the C compiler on large modules.",
+        "--no-local-thread-engine-base",
+        "\tDon't copy the thread-local Mercury engine base address",
+        "\tinto local variables. This option only affects parallel",
+        "\tgrades not using the global register variables extension."
     ]).
 
 :- pred options_help_target_code_compilation(io::di, io::uo) is det.
Index: ./compiler/llds_out.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/llds_out.m,v
retrieving revision 1.280
diff -u -r1.280 llds_out.m
--- ./compiler/llds_out.m	8 Jun 2006 08:19:16 -0000	1.280
+++ ./compiler/llds_out.m	7 Jul 2006 04:12:00 -0000
@@ -850,7 +850,11 @@
     io.write_string("\n", !IO),
     globals.io_lookup_bool_option(auto_comments, PrintComments, !IO),
     globals.io_lookup_bool_option(emit_c_loops, EmitCLoops, !IO),
-    list.foldl(output_c_procedure(PrintComments, EmitCLoops), Procedures, !IO),
+    globals.io_lookup_bool_option(local_thread_engine_base,
+        LocalThreadEngineBase, !IO),
+    list.foldl(output_c_procedure(PrintComments, EmitCLoops,
+            LocalThreadEngineBase),
+        Procedures, !IO),
     io.write_string("MR_END_MODULE\n", !IO).
 
 %-----------------------------------------------------------------------------%
@@ -1645,10 +1649,11 @@
     list.foldl2(output_instruction_decls(StackLayoutLabels), Instrs,
         !DeclSet, !IO).
 
-:- pred output_c_procedure(bool::in, bool::in, c_procedure::in,
+:- pred output_c_procedure(bool::in, bool::in, bool::in, c_procedure::in,
     io::di, io::uo) is det.
 
-output_c_procedure(PrintComments, EmitCLoops, Proc, !IO) :-
+output_c_procedure(PrintComments, EmitCLoops, LocalThreadEngineBase, Proc,
+        !IO) :-
     Proc = c_procedure(Name, Arity, proc(_, ProcId), _, Instrs, _, _, _),
     proc_id_to_int(ProcId, ModeNum),
     (
@@ -1682,8 +1687,31 @@
         EmitCLoops = no,
         WhileSet = set_tree234.init
     ),
+
+    (
+        LocalThreadEngineBase = yes,
+        io.write_string("#ifdef MR_maybe_local_thread_engine_base\n", !IO),
+        io.write_string("\t#undef MR_maybe_local_thread_engine_base\n", !IO),
+        io.write_string("\t#define MR_maybe_local_thread_engine_base " ++
+            "MR_local_thread_engine_base\n", !IO),
+        io.write_string("#endif\n", !IO)
+    ;
+        LocalThreadEngineBase = no
+    ),
+
     output_instruction_list(Instrs, PrintComments,
-        CallerLabel - ContLabelSet, WhileSet, !IO).
+        CallerLabel - ContLabelSet, WhileSet, !IO),
+
+    (
+        LocalThreadEngineBase = yes,
+        io.write_string("#ifdef MR_maybe_local_thread_engine_base\n", !IO),
+        io.write_string("\t#undef MR_maybe_local_thread_engine_base\n", !IO),
+        io.write_string("\t#define MR_maybe_local_thread_engine_base " ++
+            "MR_thread_engine_base\n", !IO),
+        io.write_string("#endif\n", !IO)
+    ;
+        LocalThreadEngineBase = no
+    ).
 
     % Find the entry label for the procedure, for use as the profiling
     % "caller label" field in calls within this procedure.
@@ -2272,6 +2300,17 @@
 
 output_instruction(label(Label), ProfInfo, !IO) :-
     output_label_defn(Label, !IO),
+    globals.io_lookup_bool_option(local_thread_engine_base,
+        LocalThreadEngineBase, !IO),
+    (
+        LocalThreadEngineBase = yes,
+        io.write_string("#ifdef MR_maybe_local_thread_engine_base\n", !IO),
+        io.write_string("\tMercuryEngine *MR_local_thread_engine_base = " ++
+            "MR_thread_engine_base;\n", !IO),
+        io.write_string("#endif\n", !IO)
+    ;
+        LocalThreadEngineBase = no
+    ),
     maybe_output_update_prof_counter(Label, ProfInfo, !IO).
 
 output_instruction(goto(CodeAddr), ProfInfo, !IO) :-
Index: ./runtime/mercury_engine.h
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_engine.h,v
retrieving revision 1.39
diff -u -r1.39 mercury_engine.h
--- ./runtime/mercury_engine.h	4 Jul 2006 04:46:38 -0000	1.39
+++ ./runtime/mercury_engine.h	7 Jul 2006 03:19:55 -0000
@@ -392,9 +392,10 @@
 ** MR_engine_base refers to the engine in which execution is taking place.
 ** In the non-thread-safe situation, it is just a global variable.
 ** In the thread-safe situation, MR_engine_base is either a global
-** register (if one is available), or a macro that accesses thread-local
-** storage. We provide two macros, MR_ENGINE(x) and MR_CONTEXT(x),
-** that can be used in both kinds of situations to refer to fields
+** register (if one is available), a thread-local variable (if compiler
+** support is available), or a macro that accesses thread-local storage.
+** We provide two macros, MR_ENGINE(x) and MR_CONTEXT(x),
+** that can be used in all three kinds of situations to refer to fields
 ** of the engine structure, and to fields of the engine's current context.
 */
 
@@ -421,7 +422,13 @@
     ** MR_engine_base is defined in machdeps/{arch}.h
     */
   #else
-    #define MR_engine_base  MR_thread_engine_base
+    /*
+    ** MR_maybe_local_thread_engine_base can be redefined to refer to a
+    ** local copy of MR_thread_engine_base.
+    */
+    #define MR_maybe_local_thread_engine_base   MR_thread_engine_base
+
+    #define MR_engine_base  MR_maybe_local_thread_engine_base
   #endif
 
   #define MR_ENGINE(x)      (((MercuryEngine *) MR_engine_base)->x)
--------------------------------------------------------------------------
mercury-reviews mailing list
post:  mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe:   Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------



More information about the reviews mailing list