[m-rev.] For post-commit review: Loop control now works.
Paul Bone
pbone at csse.unimelb.edu.au
Sun Oct 9 21:03:33 AEDT 2011
For partial post-commit review by Zoltan.
Zoltan, you might like to check my changes to par_conj_gen.m, code_info.m and
proc_gen.m - these changes allow me to generate out-of-line LLDS code, and
handle the generation of the LLDS code for the loop_control scope.
I will continue to work on loop control, for example it doesn't yet use
inline foreign code. Therefore, reviewing the majority of this code is not
productive since I'll probably change it anyway.
---
The loop control transformation now works.
This patch commits the code-generator parts of the loop control transformation.
It also makes corrections and changes to the source-to-source, runtime and
library parts of the transformation.
Preliminary results look good, loop controlled right-recursive dependent code
performs as fast as independent right-recursive code, and it does so using the
minimum number of contexts (8 on apollo (an i7)). Previously, when
transforming code by hand, we needed 32 contexts on a 4 core system (taura).
The reason for this is that we changed our design so that the master context
would become blocked if there was no free slot. This ensures that once a
worker finishes it's current job new work is either already available or can be
made available promptly.
compiler/par_conj_gen.m:
compiler/code_gen.m:
Generate code for the new loop_control scope.
compiler/llds_out_instr.m:
Write out the lc_spawn_off instruction correctly.
compiler/code_info.m:
Add support for storing out-of-line code in the code_info structure.
compiler/proc_gen.m:
After generating a procedure's body add any out-of-line code stored in the
code_info structure onto the end of the procedure (after the exit code).
compiler/par_loop_control.m:
Add missing parts to the loop control transformation:
+ Add the barrier in the base case.
+ Transform non-parallel recursive calls.
+ Add a join_and_terminate call to the end of the forked-off code.
Make minor corrections to comments.
runtime/mercury_par_builtin.h:
runtime/mercury_par_builtin.c:
MR_lc_wait_free_slot and MR_lc_spawn_off no-longer mangle the labels they
are passed.
Fix a typeo that caused a bug.
Add debugging code.
library/par_builtin.m:
Store the value of LC in a stack slot during lc_wait_for_slot, This makes
sure it is available in the case that lc_wait_for_slot suspends the
context.
Remove the loop_control_slot type, we now use integers to represent the
position of a slot within a loop control structure.
Index: compiler/code_gen.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/code_gen.m,v
retrieving revision 1.186
diff -u -p -b -r1.186 code_gen.m
--- compiler/code_gen.m 21 Jul 2011 06:58:24 -0000 1.186
+++ compiler/code_gen.m 9 Oct 2011 09:54:26 -0000
@@ -266,6 +266,9 @@ generate_goal_expr(GoalExpr, GoalInfo, C
( Reason = from_ground_term(TermVar, from_ground_term_construct) ->
unify_gen.generate_ground_term(TermVar, SubGoal, !CI),
Code = empty
+ ; Reason = loop_control(LCVar, LCSVar) ->
+ par_conj_gen.generate_loop_control(SubGoal, LCVar, LCSVar, Code,
+ !CI)
;
commit_gen.generate_scope(Reason, CodeModel, GoalInfo,
ForwardLiveVarsBeforeGoal, SubGoal, Code, !CI)
Index: compiler/code_info.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/code_info.m,v
retrieving revision 1.394
diff -u -p -b -r1.394 code_info.m
--- compiler/code_info.m 16 Sep 2011 07:03:34 -0000 1.394
+++ compiler/code_info.m 9 Oct 2011 09:54:26 -0000
@@ -248,6 +248,11 @@
:- pred get_containing_goal_map_det(code_info::in, containing_goal_map::out)
is det.
+:- pred add_out_of_line_code(llds_code::in, code_info::in, code_info::out)
+ is det.
+
+:- pred get_out_of_line_code(code_info::in, llds_code::out) is det.
+
%---------------------------------------------------------------------------%
:- implementation.
@@ -483,7 +488,12 @@
% and their IDs are placed in an array slot which can be
% referred to statically.
cip_ts_string_table_size :: int,
- cip_ts_rev_string_table :: list(string)
+ cip_ts_rev_string_table :: list(string),
+
+ % Code that is part of this procedure, but that can be placed
+ % after the procedure without a cache penalty. For example,
+ % code that is spawned off by loop control is placed here.
+ cip_out_of_line_code :: llds_code
).
%---------------------------------------------------------------------------%
@@ -605,7 +615,8 @@ code_info_init(SaveSuccip, Globals, Pred
set_tree234.init,
set.init,
TSStringTableSize,
- TSRevStringTable
+ TSRevStringTable,
+ cord.empty
)
),
init_maybe_trace_info(TraceLevel, Globals, ModuleInfo,
@@ -783,6 +794,13 @@ get_containing_goal_map_det(CI, Containi
unexpected($module, $pred, "no map")
).
+add_out_of_line_code(NewCode, !CI) :-
+ Code0 = !.CI ^ code_info_persistent ^ cip_out_of_line_code,
+ Code = Code0 ++ NewCode,
+ !CI ^ code_info_persistent ^ cip_out_of_line_code := Code.
+
+get_out_of_line_code(CI, CI ^ code_info_persistent ^ cip_out_of_line_code).
+
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
Index: compiler/llds_out_instr.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/llds_out_instr.m,v
retrieving revision 1.12
diff -u -p -b -r1.12 llds_out_instr.m
--- compiler/llds_out_instr.m 30 Sep 2011 05:53:51 -0000 1.12
+++ compiler/llds_out_instr.m 9 Oct 2011 09:54:27 -0000
@@ -938,13 +938,12 @@ output_instruction(Info, Instr, ProfInfo
io.write_string(");\n", !IO)
;
Instr = lc_spawn_off(LCRval, LCSRval, ChildLabel),
- % XXX placeholder for pbone to fill in
- io.write_string("\tMR_lc_spawn_off(", !IO),
+ io.write_string("\tMR_lc_spawn_off((MR_LoopControl*)", !IO),
output_rval(Info, LCRval, !IO),
io.write_string(", ", !IO),
output_rval(Info, LCSRval, !IO),
io.write_string(", ", !IO),
- output_label(ChildLabel, !IO),
+ output_label_as_code_addr(ChildLabel, !IO),
io.write_string(");\n", !IO)
;
Instr = lc_join_and_terminate(LCRval, LCSRval),
Index: compiler/par_conj_gen.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/par_conj_gen.m,v
retrieving revision 1.46
diff -u -p -b -r1.46 par_conj_gen.m
--- compiler/par_conj_gen.m 16 Aug 2011 03:26:32 -0000 1.46
+++ compiler/par_conj_gen.m 9 Oct 2011 09:54:26 -0000
@@ -91,6 +91,7 @@
:- import_module hlds.hlds_goal.
:- import_module ll_backend.code_info.
:- import_module ll_backend.llds.
+:- import_module parse_tree.prog_data.
:- import_module list.
@@ -99,6 +100,9 @@
:- pred generate_par_conj(list(hlds_goal)::in, hlds_goal_info::in,
code_model::in, llds_code::out, code_info::in, code_info::out) is det.
+:- pred generate_loop_control(hlds_goal::in, prog_var::in, prog_var::in,
+ llds_code::out, code_info::in, code_info::out) is det.
+
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
@@ -113,18 +117,21 @@
:- import_module libs.globals.
:- import_module libs.options.
:- import_module ll_backend.code_gen.
+:- import_module ll_backend.code_info.
:- import_module ll_backend.continuation_info.
:- import_module ll_backend.exprn_aux.
+:- import_module ll_backend.var_locn.
:- import_module mdbcomp.goal_path.
-:- import_module parse_tree.prog_data.
:- import_module parse_tree.set_of_var.
+:- import_module assoc_list.
:- import_module bool.
:- import_module cord.
:- import_module int.
:- import_module list.
:- import_module map.
:- import_module maybe.
+:- import_module pair.
:- import_module require.
:- import_module set.
:- import_module string.
@@ -321,6 +328,56 @@ MR_threadscope_post_end_par_conj(&MR_sv(
%-----------------------------------------------------------------------------%
+generate_loop_control(Goal, LCVar, LCSVar, Code, !CI) :-
+ % We don't need to save the parent stack pointer, we do not use it in the
+ % main context and all the worker contexts will never have some data that
+ % we shouldn't clobber there.
+ % We also expect the runtime code to setup the parent stack pointer for us.
+
+ get_known_variables(!.CI, KnownVars),
+ NonLocals = goal_info_get_nonlocals(Goal ^ hlds_goal_info),
+ InputVars = set_of_var.intersect(NonLocals, list_to_set(KnownVars)),
+ save_variables_on_stack(to_sorted_list(InputVars), SaveCode, !CI),
+
+ % Create the call to spawn_off.
+ place_var(LCVar, reg(reg_r, 1), PlaceLCVar, !CI),
+ place_var(LCSVar, reg(reg_r, 2), PlaceLCSVar, !CI),
+ remember_position(!.CI, PositionBeforeSpawnOff),
+
+ get_next_label(SpawnOffLabel, !CI),
+ SpawnOffCallCode =
+ singleton(llds_instr(lc_spawn_off(lval(reg(reg_r, 1)),
+ lval(reg(reg_r, 2)), SpawnOffLabel),
+ "Spawn off job for worker using loop control")),
+ SpawnOffCode = PlaceLCVar ++ PlaceLCSVar ++ SpawnOffCallCode,
+ remember_position(!.CI, PositionAfterSpawnOff),
+
+ % Code to spawn off.
+ LabelCode = singleton(llds_instr(label(SpawnOffLabel),
+ "Label for spawned off code")),
+ reset_to_position(PositionBeforeSpawnOff, !CI),
+ clear_all_registers(no, !CI),
+ generate_goal(model_det, Goal, GoalCode, !CI),
+ % We expect that the join_and_terminate call is already in Goal.
+ SpawnedOffCode0 = LabelCode ++ GoalCode,
+ % Note: Zoltan, Peter and I (Paul) have discussed compressing the stack
+ % frame of the spawned off computation. This would be _instead of_ using
+ % the parent stack pointer. TODO: Before we can do this we need to
+ % determine in which loop controls we should use the parent's stack frame,
+ % and then perform this selectively. The primitives in the runtime system
+ % also need to support this.
+ replace_stack_vars_by_parent_sv(SpawnedOffCode0, SpawnedOffCode),
+
+ reset_to_position(PositionAfterSpawnOff, !CI),
+
+ % The spawned off code is written into the procedure seperatly.
+ add_out_of_line_code(SpawnedOffCode, !CI),
+
+ % Concatentate the inline code.
+ Code = SaveCode ++ SpawnOffCode.
+
+%-----------------------------------------------------------------------------%
+
% In the code of parallel conjuncts we have to refer to stack slots in
% the procedure's stack frame via the `parent_sp' register instead of the
% usual `sp' register, as the conjunct could be running in a different
Index: compiler/par_loop_control.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/par_loop_control.m,v
retrieving revision 1.2
diff -u -p -b -r1.2 par_loop_control.m
--- compiler/par_loop_control.m 27 Sep 2011 06:22:47 -0000 1.2
+++ compiler/par_loop_control.m 9 Oct 2011 09:54:27 -0000
@@ -600,7 +600,7 @@ create_inner_proc(RecParConjIds, OldPred
PredProcId = proc(PredId, ProcId),
% Now transform the predicate, this could not be done earlier because
- % we needed to know the knew PredProcId to re-write the recursive calls
+ % we needed to know the new PredProcId to re-write the recursive calls
% in the body.
proc_info_get_argmodes(OldProcInfo, ArgModes0),
proc_info_get_headvars(OldProcInfo, HeadVars0),
@@ -611,14 +611,22 @@ create_inner_proc(RecParConjIds, OldPred
varset.new_named_var("LC", LCVar, VarSet0, VarSet1),
map.det_insert(LCVar, loop_control_var_type, VarTypes0, VarTypes1),
should_preserve_tail_recursion(!.ModuleInfo, PreserveTailRecursion),
- get_wait_free_slot_proc(!.ModuleInfo, WaitFreeSlotProc),
+ get_lc_wait_free_slot_proc(!.ModuleInfo, WaitFreeSlotProc),
+ get_lc_join_and_terminate_proc(!.ModuleInfo, JoinAndTerminateProc),
- Info = loop_control_info(LCVar, OldPredProcId, PredProcId, PredSym,
- PreserveTailRecursion, WaitFreeSlotProc, lc_wait_free_slot_name),
+ Info = loop_control_info(!.ModuleInfo, LCVar, OldPredProcId,
+ PredProcId, PredSym, PreserveTailRecursion, WaitFreeSlotProc,
+ lc_wait_free_slot_name, JoinAndTerminateProc,
+ lc_join_and_terminate_name),
goal_loop_control_all_paths(Info, RecParConjIds,
- ContainingGoalMap, Body0, Body, VarSet1, VarSet,
+ ContainingGoalMap, Body0, Body1, VarSet1, VarSet,
VarTypes1, VarTypes),
+ % Fixup the remaining recursive calls, and add barriers in the base
+ % cases.
+ goal_loop_control_fixup(Info, RecParConjIds, _,
+ Body1, Body),
+
% Now create the new proc_info structure.
HeadVars = [LCVar | HeadVars0],
ArgTypes = [loop_control_var_type | ArgTypes0],
@@ -656,13 +664,16 @@ should_preserve_tail_recursion(ModuleInf
:- type loop_control_info
---> loop_control_info(
+ lci_module_info :: module_info,
lci_lc_var :: prog_var,
lci_rec_pred_proc_id :: pred_proc_id,
lci_inner_pred_proc_id :: pred_proc_id,
lci_inner_pred_name :: sym_name,
lci_preserve_tail_recursion :: preserve_tail_recursion,
lci_wait_free_slot_proc :: pred_proc_id,
- lci_wait_free_slot_proc_name :: sym_name
+ lci_wait_free_slot_proc_name :: sym_name,
+ lci_join_and_terminate_proc :: pred_proc_id,
+ lci_join_and_terminate_proc_name :: sym_name
).
:- type preserve_tail_recursion
@@ -779,8 +790,7 @@ par_conj_loop_control(Info, [LastConj0 |
goal_rewrite_recursive_call(Info, LastConj0, LastConj, _),
goal_to_conj_list(LastConj, LastConjGoals),
- % Process the remaining conjuncts, building up the nested set of ITEs from
- % inside to outside.
+ % Process the remaining conjuncts.
par_conj_loop_control2(Info, RevConjs, LastConjGoals, Goals, !VarSet,
!VarTypes),
create_conj_from_list(Goals, plain_conj, Goal0),
@@ -795,21 +805,28 @@ par_conj_loop_control(Info, [LastConj0 |
prog_varset::in, prog_varset::out, vartypes::in, vartypes::out) is det.
par_conj_loop_control2(_, [], LaterGoals, LaterGoals, !VarSet, !VarTypes).
-par_conj_loop_control2(Info, [Conj | RevConjs], LaterGoals, Goals, !VarSet,
+par_conj_loop_control2(Info, [Conj0 | RevConjs], LaterGoals, Goals, !VarSet,
!VarTypes) :-
% Create the "get free slot" call..
create_get_free_slot_goal(Info, LCSVar, GetFreeSlotGoal, !VarSet,
!VarTypes),
- % Wrap Conj in the loop control scope.
- LCVar = Info ^ lci_lc_var,
- ConjGoalInfo = Conj ^ hlds_goal_info,
+ % Add a join_and_terminate goal to the end of Conj0 forming Conj.
+ create_join_and_termiate_goal(Info, LCVar, LCSVar, JoinAndTermiateGoal),
+ Conj0GoalInfo = Conj0 ^ hlds_goal_info,
+ goal_to_conj_list(Conj0, Conj0Goals),
+ ConjGoals = Conj0Goals ++ [JoinAndTermiateGoal],
some [!NonLocals] (
- !:NonLocals = goal_info_get_nonlocals(ConjGoalInfo),
+ !:NonLocals = goal_info_get_nonlocals(Conj0GoalInfo),
insert(LCSVar, !NonLocals),
insert(LCVar, !NonLocals),
- goal_info_set_nonlocals(!.NonLocals, ConjGoalInfo, ScopeGoalInfo)
+ goal_info_set_nonlocals(!.NonLocals, Conj0GoalInfo, ConjGoalInfo)
),
+ conj_list_to_goal(ConjGoals, ConjGoalInfo, Conj),
+
+ % Wrap Conj in the loop control scope.
+ LCVar = Info ^ lci_lc_var,
+ ScopeGoalInfo = ConjGoalInfo,
ScopeGoal = hlds_goal(scope(loop_control(LCVar, LCSVar), Conj),
ScopeGoalInfo),
@@ -927,6 +944,155 @@ goals_fixup_goal_info(List, Fixup) :-
%----------------------------------------------------------------------------%
+ % This predicate does two things:
+ % + It inserts a loop control barrier into the base case(s) of the
+ % predicate.
+ % + It re-writes the recursive calls that aren't part of parallel
+ % conjunctions so that they call the inner procedure and pass the loop
+ % control variable.
+ %
+:- pred goal_loop_control_fixup(loop_control_info::in,
+ list(goal_id)::in, fixup_goal_info::out,
+ hlds_goal::in, hlds_goal::out) is det.
+
+goal_loop_control_fixup(Info, RecParConjIds, FixupGoalInfo, !Goal) :-
+ GoalInfo0 = !.Goal ^ hlds_goal_info,
+ GoalId = goal_info_get_goal_id(GoalInfo0),
+ (
+ % This goal is one of the trasnformed parallel conjunctions, nothing
+ % needs to be done.
+ % XXX: This may not work, I don't know if the goal ID is maintained.
+ member(GoalId, RecParConjIds)
+ ->
+ FixupGoalInfo = do_not_fixup_goal_info
+ ;
+ % This goal is a base case, insert the barrier.
+ not ( some [Callee] (
+ goal_calls(!.Goal, Callee),
+ (
+ Callee = Info ^ lci_rec_pred_proc_id
+ ;
+ Callee = Info ^ lci_inner_pred_proc_id
+ )
+ ) )
+ ->
+ goal_to_conj_list(!.Goal, Conjs0),
+ create_finish_loop_control_goal(Info, FinishLCGoal),
+ Conjs = Conjs0 ++ [FinishLCGoal],
+ conj_list_to_goal(Conjs, GoalInfo0, !:Goal),
+ fixup_goal_info(Info, !Goal),
+ FixupGoalInfo = fixup_goal_info
+ ;
+ !.Goal = hlds_goal(GoalExpr0, _),
+ (
+ ( GoalExpr0 = unify(_, _, _, _, _)
+ ; GoalExpr0 = generic_call(_, _, _, _)
+ ; GoalExpr0 = call_foreign_proc(_, _, _, _, _, _, _)
+ ),
+ % These cannot be a recursive call and they cannot be a base case
+ % since that is detected above.
+ unexpected($module, $pred, "Non-recursive atomic goal")
+ ;
+ GoalExpr0 = plain_call(PredId, ProcId, Args0, Builtin,
+ MaybeContext, _SymName0),
+ % This can only be a recursive call, it must be re-written
+ RecPredProcId = Info ^ lci_rec_pred_proc_id,
+ expect(unify(RecPredProcId, proc(PredId, ProcId)), $module, $pred,
+ "Expected recursive call"),
+ proc(InnerPredId, InnerProcId) = Info ^ lci_inner_pred_proc_id,
+ LCVar = Info ^ lci_lc_var,
+ Args = [LCVar | Args0],
+ SymName = Info ^ lci_inner_pred_name,
+ GoalExpr = plain_call(InnerPredId, InnerProcId, Args, Builtin,
+ MaybeContext, SymName),
+ FixupGoalInfo = fixup_goal_info
+ ;
+ GoalExpr0 = conj(ConjType, Conjs0),
+ conj_loop_control_fixup(Info, RecParConjIds, FixupGoalInfo,
+ Conjs0, Conjs),
+ GoalExpr = conj(ConjType, Conjs)
+ ;
+ GoalExpr0 = disj(_),
+ sorry($module, $pred, "disjunction")
+ ;
+ GoalExpr0 = switch(Var, CanFail, Cases0),
+ map2(case_loop_control_fixup(Info, RecParConjIds), Cases0, Cases,
+ FixupGoalInfos),
+ goals_fixup_goal_info(FixupGoalInfos, FixupGoalInfo),
+ GoalExpr = switch(Var, CanFail, Cases)
+ ;
+ GoalExpr0 = negation(_),
+ sorry($module, $pred, "negation")
+ ;
+ GoalExpr0 = scope(Reason, SubGoal0),
+ goal_loop_control_fixup(Info, RecParConjIds, FixupGoalInfo,
+ SubGoal0, SubGoal),
+ GoalExpr = scope(Reason, SubGoal)
+ ;
+ GoalExpr0 = if_then_else(ExistVars, Cond, Then0, Else0),
+ goal_loop_control_fixup(Info, RecParConjIds, FixupGoalInfoThen,
+ Then0, Then),
+ goal_loop_control_fixup(Info, RecParConjIds, FixupGoalInfoElse,
+ Else0, Else),
+ goals_fixup_goal_info([FixupGoalInfoThen, FixupGoalInfoElse],
+ FixupGoalInfo),
+ GoalExpr = if_then_else(ExistVars, Cond, Then, Else)
+ ;
+ GoalExpr0 = shorthand(_),
+ unexpected($module, $pred, "shorthand")
+ ),
+ !Goal ^ hlds_goal_expr := GoalExpr,
+ (
+ FixupGoalInfo = fixup_goal_info,
+ some [!NonLocals, !GoalInfo] (
+ !:GoalInfo = !.Goal ^ hlds_goal_info,
+ !:NonLocals = goal_info_get_nonlocals(!.GoalInfo),
+ insert(Info ^ lci_lc_var, !NonLocals),
+ goal_info_set_nonlocals(!.NonLocals, !GoalInfo),
+ goal_info_set_purity(purity_impure, !GoalInfo),
+ !Goal ^ hlds_goal_info := !.GoalInfo
+ )
+ ;
+ FixupGoalInfo = do_not_fixup_goal_info
+ )
+ ).
+
+:- pred conj_loop_control_fixup(loop_control_info::in, list(goal_id)::in,
+ fixup_goal_info::out, list(hlds_goal)::in, list(hlds_goal)::out) is det.
+
+conj_loop_control_fixup(_Info, _RecGoalIds, do_not_fixup_goal_info, [], []).
+conj_loop_control_fixup(Info, RecGoalIds, FixupGoalInfo,
+ [!.Conj | !.Conjs], [!:Conj | !:Conjs]) :-
+ (
+ not goal_calls(!.Conj, Callee),
+ (
+ Callee = Info ^ lci_rec_pred_proc_id
+ ;
+ Callee = Info ^ lci_inner_pred_proc_id
+ )
+ ->
+ % This Conj does not make a recursive call or contain a recursive
+ % parallel conjunction. We don't need to transform it.
+ conj_loop_control_fixup(Info, RecGoalIds, FixupGoalInfo, !Conjs)
+ ;
+ % This Conj has something that needs to be transformed.
+ goal_loop_control_fixup(Info, RecGoalIds, FixupGoalInfo, !Conj)
+ % There's not going to be anything else in this conjunct that needs to
+ % be transformed, we don't make a recursive call.
+ ).
+
+:- pred case_loop_control_fixup(loop_control_info::in, list(goal_id)::in,
+ case::in, case::out, fixup_goal_info::out) is det.
+
+case_loop_control_fixup(Info, RecParConjIds, !Case, FixupGoalInfo) :-
+ some [!Goal] (
+ !:Goal = !.Case ^ case_goal,
+ goal_loop_control_fixup(Info, RecParConjIds, FixupGoalInfo, !Goal),
+ !Case ^ case_goal := !.Goal
+ ).
+
+%----------------------------------------------------------------------------%
+
:- pred create_get_free_slot_goal(loop_control_info::in, prog_var::out,
hlds_goal::out, prog_varset::in, prog_varset::out,
vartypes::in, vartypes::out) is det.
@@ -948,6 +1114,57 @@ create_get_free_slot_goal(Info, LCSVar,
%----------------------------------------------------------------------------%
+:- pred create_create_loop_control_goal(module_info::in, prog_var::in,
+ prog_var::out, hlds_goal::out, prog_varset::in, prog_varset::out,
+ vartypes::in, vartypes::out) is det.
+
+create_create_loop_control_goal(ModuleInfo, NumContextsVar, LCVar, Goal,
+ !VarSet, !VarTypes) :-
+ varset.new_named_var("LC", LCVar, !VarSet),
+ map.det_insert(LCVar, loop_control_var_type, !VarTypes),
+ get_lc_create_proc(ModuleInfo, LCCreatePredId, LCCreateProcId),
+ goal_info_init(list_to_set([NumContextsVar, LCVar]),
+ instmap_delta_bind_var(LCVar), detism_det, purity_pure,
+ LCCreateGoalInfo),
+ Goal = hlds_goal(plain_call(LCCreatePredId,
+ LCCreateProcId, [NumContextsVar, LCVar], not_builtin, no,
+ lc_create_name),
+ LCCreateGoalInfo).
+
+%----------------------------------------------------------------------------%
+
+:- pred create_join_and_termiate_goal(loop_control_info::in, prog_var::in,
+ prog_var::in, hlds_goal::out) is det.
+
+create_join_and_termiate_goal(Info, LCVar, LCSVar, Goal) :-
+ proc(PredId, ProcId) = Info ^ lci_join_and_terminate_proc,
+ SymName = Info ^ lci_join_and_terminate_proc_name,
+
+ GoalExpr = plain_call(PredId, ProcId, [LCVar, LCSVar], not_builtin, no,
+ SymName),
+ NonLocals = list_to_set([LCVar, LCSVar]),
+ instmap_delta_init_reachable(InstmapDelta),
+ GoalInfo = impure_init_goal_info(NonLocals, InstmapDelta, detism_det),
+ Goal = hlds_goal(GoalExpr, GoalInfo).
+
+%----------------------------------------------------------------------------%
+
+:- pred create_finish_loop_control_goal(loop_control_info::in, hlds_goal::out)
+ is det.
+
+create_finish_loop_control_goal(Info, Goal) :-
+ get_lc_finish_loop_control_proc(Info ^ lci_module_info, PredId, ProcId),
+ LCVar = Info ^ lci_lc_var,
+
+ GoalExpr = plain_call(PredId, ProcId, [LCVar], not_builtin, no,
+ lc_finish_loop_control_name),
+ NonLocals = list_to_set([LCVar]),
+ instmap_delta_init_reachable(InstmapDelta),
+ GoalInfo = impure_init_goal_info(NonLocals, InstmapDelta, detism_det),
+ Goal = hlds_goal(GoalExpr, GoalInfo).
+
+%----------------------------------------------------------------------------%
+
:- type fixup_goal_info
---> fixup_goal_info
; do_not_fixup_goal_info.
@@ -1015,16 +1232,8 @@ update_outer_proc(PredProcId, InnerPredP
GetNumContextsGoalInfo),
% Create the call to lc_create
- varset.new_named_var("LC", LCVar, !VarSet),
- map.det_insert(LCVar, loop_control_var_type, !VarTypes),
- get_lc_create_proc(ModuleInfo, LCCreatePredId, LCCreateProcId),
- goal_info_init(list_to_set([NumContextsVar, LCVar]),
- instmap_delta_bind_var(LCVar), detism_det, purity_pure,
- LCCreateGoalInfo),
- LCCreateGoal = hlds_goal(plain_call(LCCreatePredId,
- LCCreateProcId, [NumContextsVar, LCVar], not_builtin, no,
- lc_create_name),
- LCCreateGoalInfo),
+ create_create_loop_control_goal(ModuleInfo, NumContextsVar, LCVar,
+ LCCreateGoal, !VarSet, !VarTypes),
% Create the inner call.
InnerCallArgs = [LCVar | HeadVars],
@@ -1101,8 +1310,9 @@ loop_control_var_type = defined_type(Sym
:- func loop_control_slot_var_type = mer_type.
-loop_control_slot_var_type = defined_type(Sym, [], kind_star) :-
- Sym = qualified(par_builtin_module_sym, "loop_control_slot").
+loop_control_slot_var_type = builtin_type(builtin_type_int).
+
+%----------------------------------------------------------------------------%
:- func lc_wait_free_slot_name = sym_name.
@@ -1113,11 +1323,11 @@ lc_wait_free_slot_name =
lc_wait_free_slot_name_unqualified = "lc_wait_free_slot".
-:- pred get_wait_free_slot_proc(module_info::in, pred_proc_id::out) is det.
+:- pred get_lc_wait_free_slot_proc(module_info::in, pred_proc_id::out) is det.
-get_wait_free_slot_proc(ModuleInfo, proc(PredId, ProcId)) :-
- lookup_lc_pred_proc(ModuleInfo, lc_wait_free_slot_name_unqualified, 2, PredId,
- ProcId).
+get_lc_wait_free_slot_proc(ModuleInfo, proc(PredId, ProcId)) :-
+ lookup_lc_pred_proc(ModuleInfo, lc_wait_free_slot_name_unqualified, 2,
+ PredId, ProcId).
:- func lc_default_num_contexts_name_unqualified = string.
@@ -1151,6 +1361,40 @@ get_lc_create_proc(ModuleInfo, PredId, P
lookup_lc_pred_proc(ModuleInfo, lc_create_name_unqualified, 2, PredId,
ProcId).
+:- func lc_join_and_terminate_name_unqualified = string.
+
+lc_join_and_terminate_name_unqualified = "lc_join_and_terminate".
+
+:- func lc_join_and_terminate_name = sym_name.
+
+lc_join_and_terminate_name =
+ qualified(par_builtin_module_sym, lc_join_and_terminate_name_unqualified).
+
+:- pred get_lc_join_and_terminate_proc(module_info::in, pred_proc_id::out)
+ is det.
+
+get_lc_join_and_terminate_proc(ModuleInfo, proc(PredId, ProcId)) :-
+ lookup_lc_pred_proc(ModuleInfo, lc_join_and_terminate_name_unqualified, 2,
+ PredId, ProcId).
+
+:- func lc_finish_loop_control_name_unqualified = string.
+
+lc_finish_loop_control_name_unqualified = "lc_finish".
+
+:- func lc_finish_loop_control_name = sym_name.
+
+lc_finish_loop_control_name =
+ qualified(par_builtin_module_sym, lc_finish_loop_control_name_unqualified).
+
+:- pred get_lc_finish_loop_control_proc(module_info::in,
+ pred_id::out, proc_id::out) is det.
+
+get_lc_finish_loop_control_proc(ModuleInfo, PredId, ProcId) :-
+ lookup_lc_pred_proc(ModuleInfo, lc_finish_loop_control_name_unqualified, 1,
+ PredId, ProcId).
+
+%----------------------------------------------------------------------------%
+
:- pred lookup_lc_pred_proc(module_info::in, string::in, arity::in,
pred_id::out, proc_id::out) is det.
@@ -1158,6 +1402,8 @@ lookup_lc_pred_proc(ModuleInfo, Sym, Ari
lookup_builtin_pred_proc_id(ModuleInfo, par_builtin_module_sym,
Sym, pf_predicate, Arity, only_mode, PredId, ProcId).
+%----------------------------------------------------------------------------%
+
:- func par_builtin_module_sym = sym_name.
par_builtin_module_sym = unqualified("par_builtin").
Index: compiler/proc_gen.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/proc_gen.m,v
retrieving revision 1.50
diff -u -p -b -r1.50 proc_gen.m
--- compiler/proc_gen.m 21 Jul 2011 06:58:26 -0000 1.50
+++ compiler/proc_gen.m 9 Oct 2011 07:18:21 -0000
@@ -373,8 +373,10 @@ generate_proc_code(PredInfo, ProcInfo0,
% Generate code for the procedure.
generate_category_code(CodeModel, ProcContext, Goal, OutsideResumePoint,
- TraceSlotInfo, CodeTree, MaybeTraceCallLabel, FrameInfo,
+ TraceSlotInfo, CodeTree0, MaybeTraceCallLabel, FrameInfo,
CodeInfo0, CodeInfo),
+ get_out_of_line_code(CodeInfo, OutOfLineCode),
+ CodeTree = CodeTree0 ++ OutOfLineCode,
get_max_reg_in_use_at_trace(CodeInfo, MaxTraceReg),
get_static_cell_info(CodeInfo, StaticCellInfo),
global_data_set_static_cell_info(StaticCellInfo, !GlobalData),
Index: library/par_builtin.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/par_builtin.m,v
retrieving revision 1.31
diff -u -p -b -r1.31 par_builtin.m
--- library/par_builtin.m 30 Sep 2011 04:35:33 -0000 1.31
+++ library/par_builtin.m 9 Oct 2011 09:54:27 -0000
@@ -75,8 +75,6 @@
:- type loop_control.
-:- type loop_control_slot.
-
% Create a loop control structure.
% For documentation, see MR_lc_create in mercury_par_builtin.h.
%
@@ -91,19 +89,19 @@
% For documentation, see MR_lc_try_get_free_slot in mercury_par_builtin.h
% This call fails if there is no free slot available.
%
-:- impure pred lc_free_slot(loop_control::in, loop_control_slot::out)
+:- impure pred lc_free_slot(loop_control::in, int::out)
is semidet.
% Allocate a free slot from the loop control structure and return it.
% This call blocks the context until a free slot is available.
%
-:- impure pred lc_wait_free_slot(loop_control::in, loop_control_slot::out)
+:- impure pred lc_wait_free_slot(loop_control::in, int::out)
is det.
% Finish one iteration of the loop. This call does not return.
% For documentation, see MR_lc_join_and_terminate in mercury_par_builtin.h.
%
-:- impure pred lc_join_and_terminate(loop_control::in, loop_control_slot::in)
+:- impure pred lc_join_and_terminate(loop_control::in, int::in)
is det.
% Get the default number of contexts to use for loop control.
@@ -288,15 +286,6 @@ INIT mercury_sys_init_par_builtin_module
:- pragma foreign_type("C#", loop_control, "object").
:- pragma foreign_type("Java", loop_control, "java.lang.Object").
-:- pragma foreign_type("C", loop_control_slot, "MR_LoopControlSlot *",
- [can_pass_as_mercury_type]).
-
- % Placeholders only.
-:- pragma foreign_type(il, loop_control_slot, "class [mscorlib]System.Object").
-:- pragma foreign_type("Erlang", loop_control_slot, "").
-:- pragma foreign_type("C#", loop_control_slot, "object").
-:- pragma foreign_type("Java", loop_control_slot, "java.lang.Object").
-
:- pragma foreign_proc("C",
lc_create(NumWorkers::in, LC::out),
[will_not_call_mercury, will_not_throw_exception, thread_safe,
@@ -399,9 +388,12 @@ MR_def_label(par_builtin__lc_finish_1_0,
#endif
MR_END_MODULE
+MR_decl_label1(par_builtin__lc_wait_free_slot_2_0, 1)
+
MR_BEGIN_MODULE(par_builtin_module_lc_wait_free_slot)
MR_init_entry1(par_builtin__lc_wait_free_slot_2_0);
MR_INIT_PROC_LAYOUT_ADDR(mercury__par_builtin__lc_wait_free_slot_2_0);
+ MR_init_label1(par_builtin__lc_wait_free_slot_2_0,1);
MR_BEGIN_CODE
#ifdef MR_maybe_local_thread_engine_base
@@ -412,19 +404,30 @@ MR_BEGIN_CODE
MR_define_entry(mercury__par_builtin__lc_wait_free_slot_2_0)
MR_MAYBE_INIT_LOCAL_THREAD_ENGINE_BASE
+ MR_incr_sp(1);
+ MR_sv(1) = MR_r1; /* LC */
+ /*
+ ** LC must be saved to the stack so that we can resume from the label below
+ ** and retrieve it.
+ */
+
+MR_def_label(par_builtin__lc_wait_free_slot_2_0,1)
+ MR_MAYBE_INIT_LOCAL_THREAD_ENGINE_BASE
+
#if defined(MR_LL_PARALLEL_CONJ)
{
MR_LoopControl *lc;
- MR_LoopControlSlot *lcs;
+ MR_Unsigned lcs_idx;
- lc = (MR_LoopControl *) MR_r1;
- MR_lc_wait_free_slot(lc, lcs, par_builtin__lc_wait_free_slot_2_0);
- MR_r1 = (MR_Word)lcs;
+ lc = (MR_LoopControl *) MR_sv(1);
+ MR_lc_wait_free_slot(lc, lcs_idx, MR_LABEL_AP(par_builtin__lc_wait_free_slot_2_0_i1));
+ MR_r1 = (MR_Word)lcs_idx;
}
#else
MR_fatal_error(""lc_wait_free_slot is unavailable in this grade"");
#endif
+ MR_decr_sp(1);
MR_proceed();
#ifdef MR_maybe_local_thread_engine_base
@@ -473,8 +476,11 @@ mercury_sys_init_lc_write_out_proc_stati
[will_not_call_mercury, will_not_throw_exception, thread_safe],
"
#if defined(MR_THREAD_SAFE) && defined(MR_LL_PARALLEL_CONJ)
- LCS = MR_lc_try_get_free_slot(LC);
- SUCCESS_INDICATOR = (LCS != NULL);
+ {
+ MR_Unsigned LCS_0;
+ SUCCESS_INDICATOR = MR_lc_try_get_free_slot(LC, &LCS_0);
+ LCS = (MR_Integer)LCS_0;
+ }
#else
MR_fatal_error(""lc_free_slot is unavailable in this grade"");
#endif
Index: runtime/mercury_par_builtin.c
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_par_builtin.c,v
retrieving revision 1.6
diff -u -p -b -r1.6 mercury_par_builtin.c
--- runtime/mercury_par_builtin.c 4 Oct 2011 03:20:07 -0000 1.6
+++ runtime/mercury_par_builtin.c 9 Oct 2011 09:54:27 -0000
@@ -66,17 +66,21 @@ MR_lc_create(unsigned num_workers)
lc->MR_lc_finished = MR_FALSE;
lc->MR_lc_free_slot_hint = 0;
+#ifdef MR_DEBUG_LOOP_CONTROL
+ fprintf(stderr, "lc_create(%d) -> %p)\n", num_workers, lc);
+#endif
+
return lc;
}
/*
** Deprecated, this was part of our old loop control design.
*/
-MR_LoopControlSlot *
-MR_lc_try_get_free_slot(MR_LoopControl *lc)
+MR_Bool
+MR_lc_try_get_free_slot(MR_LoopControl *lc, MR_Unsigned *lcs_idx)
{
if (lc->MR_lc_outstanding_workers == lc->MR_lc_num_slots) {
- return NULL;
+ return MR_FALSE;
} else {
unsigned hint, offset, i;
@@ -93,17 +97,24 @@ MR_lc_try_get_free_slot(MR_LoopControl *
lc->MR_lc_slots[i].MR_lcs_is_free = MR_FALSE;
lc->MR_lc_free_slot_hint = (i+1) % lc->MR_lc_num_slots;
MR_atomic_inc_int(&(lc->MR_lc_outstanding_workers));
- return &(lc->MR_lc_slots[i]);
+ *lcs_idx = i;
+ return MR_TRUE;
}
}
- return NULL;
+ return MR_FALSE;
}
}
void
-MR_lc_spawn_off_func(MR_LoopControlSlot *lcs, MR_Code *code_ptr)
+MR_lc_spawn_off_func(MR_LoopControl *lc, MR_Unsigned lcs_idx, MR_Code
+ *code_ptr)
{
+ MR_LoopControlSlot *lcs = &(lc->MR_lc_slots[lcs_idx]);
+
+#if MR_DEBUG_LOOP_CONTROL
+ fprintf(stderr, "lc_spawn_off(%p, %d, %p)\n", lc, lcs_idx, code_ptr);
+#endif
if (lcs->MR_lcs_context == NULL) {
/*
** Allocate a new context.
@@ -120,14 +131,20 @@ MR_lc_spawn_off_func(MR_LoopControlSlot
}
void
-MR_lc_join(MR_LoopControl *lc, MR_LoopControlSlot *lcs)
+MR_lc_join(MR_LoopControl *lc, MR_Unsigned lcs_idx)
{
+ MR_LoopControlSlot *lcs;
MR_bool last_worker;
MR_Context *wakeup_context;
+ lcs = &(lc->MR_lc_slots[lcs_idx]);
+
+#ifdef MR_DEBUG_LOOP_CONTROL
+ fprintf(stderr, "lc_join(%p, %d)\n", lc, lcs_idx);
+#endif
+
lcs->MR_lcs_is_free = MR_TRUE;
- lc->MR_lc_free_slot_hint = (((MR_Word)lcs - (MR_Word)lc - sizeof(MR_LoopControl)) /
- sizeof(MR_LoopControlSlot)) + 1;
+ lc->MR_lc_free_slot_hint = lcs_idx;
/* Ensure the slot is free before we perform the decrement. */
MR_CPU_SFENCE;
last_worker =
@@ -149,6 +166,9 @@ MR_lc_join(MR_LoopControl *lc, MR_LoopCo
lc->MR_lc_master_context = NULL;
MR_US_UNLOCK(&(lc->MR_lc_master_context_lock));
if (wakeup_context != NULL) {
+#ifdef MR_DEBUG_LOOP_CONTROL
+ fprintf(stderr, "Waking up master\n");
+#endif
/*
** XXX: it is faster to switch to this context ourselves
** since we are going to unload our own context.
Index: runtime/mercury_par_builtin.h
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_par_builtin.h,v
retrieving revision 1.11
diff -u -p -b -r1.11 mercury_par_builtin.h
--- runtime/mercury_par_builtin.h 4 Oct 2011 03:20:07 -0000 1.11
+++ runtime/mercury_par_builtin.h 9 Oct 2011 09:54:27 -0000
@@ -342,6 +342,18 @@ typedef MR_Word MR_LoopControlSlot;
#if defined(MR_THREAD_SAFE) && defined(MR_LL_PARALLEL_CONJ)
+#ifdef MR_DEBUG_LOOP_CONTROL
+ #define MR_IF_DEBUG_LOOP_CONTORL(stmt) \
+ do { \
+ stmt; \
+ } while (0);
+#else
+ #define MR_IF_DEBUG_LOOP_CONTORL(stmt) \
+ do { \
+ ; \
+ } while (0);
+#endif
+
/*
** XXX: Make these functions macros, they are now functions to make debugging
** and testing easier.
@@ -358,6 +370,9 @@ extern MR_LoopControl *MR_lc_create(un
** that will be used in the name for a C label.
*/
#define MR_lc_finish_part1(lc, part2_label) \
+ MR_IF_DEBUG_LOOP_CONTORL( \
+ fprintf(stderr, "lc_finish_part1(%p, %p)\n", (lc), (part2_label))); \
+ \
do { \
(lc)->MR_lc_finished = MR_TRUE; \
/* \
@@ -391,6 +406,9 @@ extern MR_LoopControl *MR_lc_create(un
} while (0);
#define MR_lc_finish_part2(lc) \
+ MR_IF_DEBUG_LOOP_CONTORL( \
+ fprintf(stderr, "lc_finish_part2(%p)\n", (lc))); \
+ \
do { \
unsigned i; \
\
@@ -409,12 +427,17 @@ extern MR_LoopControl *MR_lc_create(un
**
** Deprecated: this was part of our old loop control design.
*/
-extern MR_LoopControlSlot *MR_lc_try_get_free_slot(MR_LoopControl *lc);
+extern MR_Bool MR_lc_try_get_free_slot(MR_LoopControl *lc,
+ MR_Unsigned *lcs_idx);
/*
** Get a free slot in the loop control, or block until one is available.
*/
-#define MR_lc_wait_free_slot(lc, lcs, retry_label) \
+#define MR_lc_wait_free_slot(lc, lcs_idx, retry_label) \
+ MR_IF_DEBUG_LOOP_CONTORL( \
+ fprintf(stderr, "lc_wait_free_slot(%p, _, %p)\n", (lc), \
+ retry_label)); \
+ \
do { \
unsigned hint, offset, i; \
\
@@ -436,9 +459,9 @@ extern MR_LoopControlSlot *MR_lc_try_g
ctxt = MR_ENGINE(MR_eng_this_context); \
(lc)->MR_lc_master_context = ctxt; \
MR_save_context(ctxt); \
- ctxt->MR_ctxt_resume = MR_add_prefix(retry_label); \
+ ctxt->MR_ctxt_resume = retry_label; \
ctxt->MR_ctxt_resume_owner_engine = MR_ENGINE(MR_eng_id); \
- MR_US_UNLOCK(&(lc->MR_lc_master_context_lock)); \
+ MR_US_UNLOCK(&((lc)->MR_lc_master_context_lock)); \
MR_ENGINE(MR_eng_this_context) = NULL; \
MR_idle(); \
} \
@@ -452,29 +475,33 @@ extern MR_LoopControlSlot *MR_lc_try_g
if ((lc)->MR_lc_slots[i].MR_lcs_is_free) { \
(lc)->MR_lc_slots[i].MR_lcs_is_free = MR_FALSE; \
(lc)->MR_lc_free_slot_hint = \
- (i + 1) % (lc)->MR_lc_free_slot_hint; \
+ (i + 1) % (lc)->MR_lc_num_slots; \
MR_atomic_inc_int(&((lc)->MR_lc_outstanding_workers)); \
- (lcs) = &((lc)->MR_lc_slots[i]); \
+ (lcs_idx) = i; \
break; \
} \
} \
\
+ MR_IF_DEBUG_LOOP_CONTORL( \
+ fprintf(stderr, "lc_wait_free_slot returning %d\n", (lcs_idx)));\
+ \
} while (0);
/*
** Try to spawn off this code using the free slot.
*/
-#define MR_lc_spawn_off(lcs, label) \
- MR_lc_spawn_off_func((lcs), MR_LABEL(MR_add_prefix(label)))
+#define MR_lc_spawn_off(lc, lcs_idx, label) \
+ MR_lc_spawn_off_func((lc), (lcs_idx), label)
-extern void MR_lc_spawn_off_func(MR_LoopControlSlot *lcs, MR_Code *code_ptr);
+extern void MR_lc_spawn_off_func(MR_LoopControl *lc, MR_Unsigned lcs_idx,
+ MR_Code *code_ptr);
/*
** Join and terminate a worker.
*/
-#define MR_lc_join_and_terminate(lc, lcs) \
+#define MR_lc_join_and_terminate(lc, lcs_idx) \
do { \
- MR_lc_join((lc), (lcs)); \
+ MR_lc_join((lc), (lcs_idx)); \
\
/* \
** Termination of this context must be handled in a macro so that \
@@ -494,7 +521,7 @@ extern void MR_lc_spawn_off_func(MR_Loop
** Join a worker context with the main thread. Termination of the context
** is handled in the macro above.
*/
-extern void MR_lc_join(MR_LoopControl *lc, MR_LoopControlSlot *lcs);
+extern void MR_lc_join(MR_LoopControl *lc, MR_Unsigned lcs_idx);
#endif /* MR_THREAD_SAFE && MR_LL_PARALLEL_CONJ */
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 490 bytes
Desc: Digital signature
URL: <http://lists.mercurylang.org/archives/reviews/attachments/20111009/30b8b101/attachment.sig>
More information about the reviews
mailing list