[m-rev.] diff: Handle stack slot allocation for loop_control scopes.

Paul Bone pbone at csse.unimelb.edu.au
Wed Oct 12 11:39:29 AEDT 2011


Handle stack slot allocation for loop_control scopes:

Variables communicated to and from the spawned off computation now have stack
slots allocated.

Variables that need stack slots in the spawned off computation must have
distinct stack slots to those that need stack slots between the spawned off
computation and the recursive call, including the recursive call itself.

compiler/live_vars.m:
    Refactored the parallel_stackvars structure.  It now makes a little more
    sense for loop control scopes.  It is also abstracted away by a number of
    new predicates, making it easier to work with.

    To support this a new field has been added to the alloc_data type, this is
    necessary for detecting recursive calls.

    A new method has been added to the stack_alloc_info(T) typeclass to
    communicate information about loop control scopes to the stack slot
    allocation algorithm.

compiler/hlds_llds.m:
    Create a new type, need_for_loop_control that contains the sets of
    stackvars that must have distinct stack slots for the purposes of loop
    control.

    Update a comment on the maybe_need field of the llds_code_gen_structure to
    refer to loop control scopes as well as parallel conjunctions.

compiler/par_conj_gen.m:
    Make corrections in how variable locations are managed during code
    generation for loop control scopes.

compiler/set_of_var.m:
    Provide new set operations cartesian_product and cartesian_product_list.

compiler/stack_alloc.m:
compiler/stack_opt.m:
compiler/tupling.m:
    Conform to changes in live_vars.m

Index: compiler/hlds_llds.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/hlds_llds.m,v
retrieving revision 1.28
diff -u -p -b -r1.28 hlds_llds.m
--- compiler/hlds_llds.m	16 Aug 2011 03:26:31 -0000	1.28
+++ compiler/hlds_llds.m	12 Oct 2011 00:37:00 -0000
@@ -23,6 +23,7 @@
 :- import_module parse_tree.set_of_var.
 
 :- import_module bool.
+:- import_module list.
 :- import_module map.
 :- import_module maybe.
 
@@ -123,6 +124,15 @@
                 par_conj_engine_vars    :: set_of_progvar
             ).
 
+    % loop_control_distinct_stackvars gives sets of variables that must not
+    % have overlapping stack slot allocations so that concurrent access to the
+    % same stack frame is safe.
+    %
+:- type need_for_loop_control
+    --->    need_for_loop_control(
+                loop_control_distinct_stackvars :: list(set_of_progvar)
+            ).
+
 :- type llds_code_gen_details.
 
 %-----------------------------------------------------------------------------%
@@ -382,10 +392,11 @@ explain_stack_slots_2([Var - Slot | Rest
                 % specifies what variables need to be stored on the stack
                 % at the resumption point established by the goal.
                 %
-                % For parallel conjunctions, the stackvars pass will set
-                % this argument to need_par_conj(NPC), where NPC specifies
-                % what variables are required to be stored on the stack
-                % by the parallel conjunction execution mechanism.
+                % For parallel conjunctions and loop control scopes, the
+                % stackvars pass will set this argument to need_par_conj(NPC),
+                % where NPC specifies what variables are required to be stored
+                % on the stack by the parallel conjunction and loop control
+                % execution mechanisms.
                 maybe_need          :: maybe_need
             ).
 
Index: compiler/live_vars.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/live_vars.m,v
retrieving revision 1.149
diff -u -p -b -r1.149 live_vars.m
--- compiler/live_vars.m	31 Aug 2011 07:59:32 -0000	1.149
+++ compiler/live_vars.m	12 Oct 2011 00:37:00 -0000
@@ -45,6 +45,7 @@
     --->    alloc_data(
                 ad_module_info          ::  module_info,
                 ad_proc_info            ::  proc_info,
+                ad_pred_proc_id         ::  pred_proc_id,
                 ad_typeinfo_liveness    ::  bool,
                 ad_opt_no_return_calls  ::  bool,
 
@@ -64,7 +65,9 @@
     pred at_resume_site(need_in_resume::in, alloc_data::in,
         T::in, T::out) is det,
     pred at_par_conj(need_in_par_conj::in, alloc_data::in,
-        T::in, T::out) is det
+        T::in, T::out) is det,
+    pred at_recursive_call_for_loop_control(need_for_loop_control::in,
+        alloc_data::in, T::in, T::out) is det
 ].
 
 :- pred build_live_sets_in_goal_no_par_stack(hlds_goal::in, hlds_goal::out,
@@ -89,6 +92,7 @@
 :- import_module enum.
 :- import_module int.
 :- import_module map.
+:- import_module maybe.
 :- import_module pair.
 :- import_module require.
 
@@ -132,33 +136,13 @@ set_dummy_array_elements(ModuleInfo, [Va
 
 %-----------------------------------------------------------------------------%
 
-    % Information about which variables in a parallel conjunction need stack
-    % slots.
-    %
-:- type parallel_stackvars
-    --->    parallel_stackvars(
-                % Variables nonlocal to the parallel conjunction which need
-                % their own stack slots.
-                set_of_progvar,
-
-                % Variables local to parallel conjuncts prior to the
-                % current conjunct which need stack slots.
-                list(set_of_progvar),
-
-                % Accumulating set of variables local to the current
-                % parallel conjunct which need stack slots.
-                set_of_progvar
-            ).
-
-%-----------------------------------------------------------------------------%
-
 % The stack_slots structure (map(prog_var, lval)) is threaded through the
 % traversal of the goal. The liveness information is computed from the liveness
 % delta annotations.
 
 build_live_sets_in_goal_no_par_stack(Goal0, Goal,
         ResumeVars0, AllocData, !StackAlloc, !Liveness, !NondetLiveness) :-
-    ParStackVars0 = parallel_stackvars(set_of_var.init, [], set_of_var.init),
+    empty_par_stackvars(ParStackVars0),
     build_live_sets_in_goal(Goal0, Goal, ResumeVars0,
         AllocData, !StackAlloc, !Liveness, !NondetLiveness,
         ParStackVars0, _ParStackVars).
@@ -264,8 +248,6 @@ build_live_sets_in_goal_2(GoalExpr0, Goa
                 !StackAlloc, !Liveness, !NondetLiveness, !ParStackVars)
         ;
             ConjType = parallel_conj,
-            !.ParStackVars = parallel_stackvars(OuterNonLocals,
-                OuterLocalStackVars, OuterAccStackVars0),
 
             % Since each parallel conjunct may be run in a different Mercury
             % context to the current context, we must save all the variables
@@ -279,35 +261,28 @@ build_live_sets_in_goal_2(GoalExpr0, Goa
             LiveSet =
                 set_of_var.union_list([NonLocals, !.Liveness, ResumeVars0]),
 
-            InnerNonLocals = LiveSet `set_of_var.union` OuterNonLocals,
-            InnerParStackVars0 =
-                parallel_stackvars(InnerNonLocals, [], set_of_var.init),
+            par_stack_vars_get_nonlocals(!.ParStackVars, OuterNonLocals),
+
+            OuterParStackVars = !.ParStackVars,
+            par_stack_vars_start_parallel_conjunction(LiveSet, !ParStackVars),
             build_live_sets_in_par_conj(Goals0, Goals, ResumeVars0, AllocData,
                 !StackAlloc, !Liveness, !NondetLiveness,
-                InnerParStackVars0, InnerParStackVars),
-            InnerParStackVars = parallel_stackvars(_, InnerStackVars, _),
+                !ParStackVars),
+            par_stack_vars_get_stackvars(!.ParStackVars, InnerStackVars),
 
             % This is safe but suboptimal. It causes all variables which need
             % stack slots in a parallel conjunction to have distinct stack
             % slots. Variables local to a single conjunct could share stack
             % slots, as long as the _sets_ of stack slots allocated to
             % different parallel conjuncts are distinct.
+            InnerNonLocals = LiveSet `set_of_var.union` OuterNonLocals,
             NeedInParConj = need_in_par_conj(InnerNonLocals `set_of_var.union`
-                set_of_var.union_list(InnerStackVars)),
+                InnerStackVars),
             record_par_conj(NeedInParConj, AllocData,
                 GoalInfo0, GoalInfo, !StackAlloc),
 
-            % All the local variables which needed stack slots in the parallel
-            % conjuncts (InnerStackVars) become part of the accumulating set of
-            % variables that have stack slots.  Variables which are not local
-            % to but are needed in the parallel conjunctions also become part
-            % of the accumulating set.
-            OuterAccStackVars = OuterAccStackVars0
-                `set_of_var.union` set_of_var.union_list(InnerStackVars)
-                `set_of_var.union`
-                    (LiveSet `set_of_var.difference` OuterNonLocals),
-            !:ParStackVars = parallel_stackvars(OuterNonLocals,
-                OuterLocalStackVars, OuterAccStackVars)
+            par_stack_vars_end_parallel_conjunction(LiveSet, OuterParStackVars,
+                !ParStackVars)
         ),
         GoalExpr = conj(ConjType, Goals)
     ;
@@ -404,9 +379,47 @@ build_live_sets_in_goal_2(GoalExpr0, Goa
             % conjunctions, so there are no updates to !StackAlloc,
             % !NondetLiveness, or !ParStackVars.
             set_of_var.insert(TermVar, !Liveness)
-        ;
+
             % XXX We could treat from_ground_term_deconstruct scopes specially
             % as well.
+        ; Reason = loop_control(LCVar, LCSVar) ->
+            % We must handle loop control scopes specially, see the comment for
+            % parallel conjunctions above.  Like parallel conjunctions, we need
+            % stack slots for the NonLocals, but we also need non-overlapping
+            % slots for LC and LCS.
+            %
+
+            NonLocals = goal_info_get_code_gen_nonlocals(GoalInfo0),
+            % Include NonLocals as these need to be on the stack to communicate
+            % with the spawned off context,
+            % Include ResumeVars0 because this goal may be backtracked over,
+            % (Except that loop control is only applied to deterministic
+            % predicates and it would mean entering a recursive call more than
+            % once.  So I think that ResumeVars0 will always be empty.
+            % The loop control variables must also be allocated stack slots.
+            % Inclusion of !.Liveness is a conservative approximation.  Values
+            % in !.Liveness don't need to have stack slots but if they already
+            % have stack slots then those slots most be distinct from others by
+            % the spawn off scope.  So what we want here is the intersection of
+            % !.Liveness and AlreadyHasStackSlot.
+            OuterParStackVars = !.ParStackVars,
+            LCStackVars =
+                set_of_var.union_list([NonLocals, !.Liveness, ResumeVars0])
+                `set_of_var.union` list_to_set([LCVar, LCSVar]),
+            par_stack_vars_start_loop_control(LCStackVars, !ParStackVars),
+            build_live_sets_in_goal(SubGoal0, SubGoal, ResumeVars0, AllocData,
+                !StackAlloc, !Liveness, !NondetLiveness, !ParStackVars),
+            par_stack_vars_get_stackvars(!.ParStackVars, InnerStackVars),
+
+            NeedInParConj =
+                need_in_par_conj(LCStackVars `set_of_var.union` InnerStackVars),
+            record_par_conj(NeedInParConj, AllocData,
+                GoalInfo0, GoalInfo, !StackAlloc),
+
+            par_stack_vars_end_loop_control(OuterParStackVars, !ParStackVars),
+
+            GoalExpr = scope(Reason, SubGoal)
+        ;
             NondetLiveness0 = !.NondetLiveness,
             build_live_sets_in_goal(SubGoal0, SubGoal, ResumeVars0, AllocData,
                 !StackAlloc, !Liveness, !NondetLiveness, !ParStackVars),
@@ -466,6 +479,23 @@ build_live_sets_in_goal_2(GoalExpr0, Goa
             build_live_sets_in_call(set_to_bitset(OutVars),
                 GoalInfo0, GoalInfo, ResumeVars0, AllocData,
                 !StackAlloc, !.Liveness, !NondetLiveness, !ParStackVars)
+        ),
+        CalleePredProcId = AllocData ^ ad_pred_proc_id,
+        ( CalleePredProcId = proc(PredId, ProcId) ->
+            % If a call is recursive and a loop control scope has been seen
+            % then the recursive call is a barrier for loop control, we have to
+            % ensure that spawned off computations use distinct stack slots
+            % from one another and the code up to and including this call.
+            par_stack_vars_recursive_call(MaybeNeedLC, !ParStackVars),
+            (
+                MaybeNeedLC = yes(NeedLC),
+                at_recursive_call_for_loop_control(NeedLC, AllocData,
+                    !StackAlloc)
+            ;
+                MaybeNeedLC = no
+            )
+        ;
+            true
         )
     ;
         GoalExpr0 = unify(_, _, _, Unification, _),
@@ -574,11 +604,7 @@ build_live_sets_in_call(OutVars, GoalInf
     % In a parallel conjunction all the stack slots we need must not be reused
     % in other parallel conjuncts.  We keep track of which variables have been
     % allocated stack slots in each conjunct.
-
-    !.ParStackVars = parallel_stackvars(Nonlocals, ParallelVars, AccVars0),
-    AccVars = AccVars0 `set_of_var.union`
-        (ForwardVars `set_of_var.difference` Nonlocals),
-    !:ParStackVars = parallel_stackvars(Nonlocals, ParallelVars, AccVars).
+    par_stack_vars_accumulate_stack_vars(ForwardVars, !ParStackVars).
 
 %-----------------------------------------------------------------------------%
 
@@ -618,20 +644,15 @@ build_live_sets_in_conj([Goal0 | Goals0]
     is det <= stack_alloc_info(T).
 
 build_live_sets_in_par_conj([], [], _, _,
-        !StackAlloc, Liveness, Liveness, !NondetLiveness,
-        ParStackVars, ParStackVars).
+        !StackAlloc, Liveness, Liveness, !NondetLiveness, !ParStackVars).
 build_live_sets_in_par_conj([Goal0 | Goals0], [Goal | Goals], ResumeVars0,
         AllocData, !StackAlloc, Liveness0, Liveness, !NondetLiveness,
-        ParStackVars0, ParStackVars) :-
+        !ParStackVars) :-
     build_live_sets_in_goal(Goal0, Goal, ResumeVars0, AllocData,
-        !StackAlloc, Liveness0, Liveness, !NondetLiveness,
-        ParStackVars0, ParStackVars1),
-    ParStackVars1 = parallel_stackvars(Nonlocals, PrevSets1, CurSet1),
-    ParStackVars2 = parallel_stackvars(Nonlocals, [CurSet1 | PrevSets1],
-        set_of_var.init),
+        !StackAlloc, Liveness0, Liveness, !NondetLiveness, !ParStackVars),
+    par_stack_vars_next_par_conjunct(!ParStackVars),
     build_live_sets_in_par_conj(Goals0, Goals, ResumeVars0, AllocData,
-        !StackAlloc, Liveness0, _Liveness1, !NondetLiveness,
-        ParStackVars2, ParStackVars).
+        !StackAlloc, Liveness0, _Liveness1, !NondetLiveness, !ParStackVars).
 
 %-----------------------------------------------------------------------------%
 
@@ -765,5 +786,212 @@ record_par_conj(NeedInParConj, AllocData
     at_par_conj(NeedInParConj, AllocData, !StackAlloc).
 
 %-----------------------------------------------------------------------------%
+
+    % Information about which variables in a parallel conjunction need stack
+    % slots.
+    %
+:- type parallel_stackvars
+    --->    not_in_parallel_context
+    ;       parallel_conjunction(
+                % Variables nonlocal to the parallel conjunction which need
+                % their own stack slots.
+                set_of_progvar,
+
+                % Variables local to parallel conjuncts prior to the
+                % current conjunct which need stack slots.
+                list(set_of_progvar),
+
+                % Accumulating set of variables local to the current
+                % parallel conjunct which need stack slots.
+                set_of_progvar
+            )
+    ;       loop_control_scope(
+                % Variables nonlocal to the scope, these all need stack slots.
+                % This field may be unnecessary since it's used to remove items
+                % from sets when adding them to the set below.  And then a
+                % union of it and the set below is calculated to set
+                % need_in_par_conj.
+                set_of_progvar,
+
+                % Accumulating set of variables local to the scope that need
+                % stack slots, these are allocated on the parent's stack frame
+                % (for now).
+                set_of_progvar
+            )
+    ;       after_loop_control_scope(
+                % List of variables local to each of the previous loop control
+                % scopes.
+                list(set_of_progvar),
+
+                % Accumulating set of variables that need stack slots between a
+                % loop control scope and either another loop control scope or a
+                % recursive call.
+                set_of_progvar
+            ).
+
+:- pred empty_par_stackvars(parallel_stackvars::out) is det.
+
+empty_par_stackvars(not_in_parallel_context).
+
+:- pred par_stack_vars_start_parallel_conjunction(set_of_progvar::in,
+    parallel_stackvars::in, parallel_stackvars::out) is det.
+
+par_stack_vars_start_parallel_conjunction(LiveSet, OuterParStackVars,
+        parallel_conjunction(InnerNonLocals, [], set_of_var.init)) :-
+    par_stack_vars_get_nonlocals(OuterParStackVars, OuterNonLocals),
+    InnerNonLocals = OuterNonLocals `set_of_var.union` LiveSet.
+
+:- pred par_stack_vars_end_parallel_conjunction(set_of_progvar::in,
+    parallel_stackvars::in, parallel_stackvars::in, parallel_stackvars::out)
+    is det.
+
+par_stack_vars_end_parallel_conjunction(LiveSet, OuterParStackVars,
+        ParStackVars0, ParStackVars) :-
+    par_stack_vars_get_stackvars(ParStackVars0, InnerStackVars),
+    (
+        OuterParStackVars = not_in_parallel_context,
+        ParStackVars = not_in_parallel_context
+    ;
+        OuterParStackVars = parallel_conjunction(OuterNonLocals,
+            OuterLocalStackVars, OuterAccStackVars0),
+        % All the local variables which needed stack slots in the parallel
+        % conjuncts (InnerStackVars) become part of the accumulating set of
+        % variables that have stack slots.  Variables which are not local to
+        % but are needed in the parallel conjunctions also become part of the
+        % accumulating set.
+        OuterAccStackVars = OuterAccStackVars0
+            `set_of_var.union` InnerStackVars
+            `set_of_var.union` (LiveSet `set_of_var.difference` OuterNonLocals),
+        ParStackVars = parallel_conjunction(OuterNonLocals,
+            OuterLocalStackVars, OuterAccStackVars)
+    ;
+        OuterParStackVars = loop_control_scope(OuterNonLocals, StackVars0),
+        % The loop control scope must ensure that any stackvars needed by a
+        % parallel conjunction are distinct from those already needed by loop
+        % control.  The same is true in the case for
+        % after_loop_control_scope/2 below.
+        StackVars = StackVars0 `set_of_var.union` InnerStackVars
+            `set_of_var.union` (LiveSet `set_of_var.difference` OuterNonLocals),
+        ParStackVars = loop_control_scope(OuterNonLocals, StackVars)
+    ;
+        OuterParStackVars = after_loop_control_scope(StackVarsList, StackVars0),
+        % In this case we don't have access to an OuterNonLocals set, so this
+        % is a conservative approximation.
+        StackVars = StackVars0 `set_of_var.union` InnerStackVars
+            `set_of_var.union` LiveSet,
+        ParStackVars =
+            after_loop_control_scope(StackVarsList, StackVars)
+    ).
+
+:- pred par_stack_vars_start_loop_control(set_of_progvar::in,
+    parallel_stackvars::in, parallel_stackvars::out) is det.
+
+par_stack_vars_start_loop_control(NonLocals, ParStackVars0,
+        loop_control_scope(NonLocals, set_of_var.init)) :-
+    (
+        ( ParStackVars0 = not_in_parallel_context
+        ; ParStackVars0 = after_loop_control_scope(_, _)
+        )
+    ;
+        ( ParStackVars0 = parallel_conjunction(_, _, _)
+        ; ParStackVars0 = loop_control_scope(_, _)
+        ),
+        unexpected($module, $pred,
+            "Loop control scope found in other parallel context")
+    ).
+
+:- pred par_stack_vars_end_loop_control(parallel_stackvars::in,
+    parallel_stackvars::in, parallel_stackvars::out) is det.
+
+par_stack_vars_end_loop_control(OldParStackVars, ParStackVars0, ParStackVars) :-
+    par_stack_vars_get_stackvars(ParStackVars0, NewStackVars),
+    (
+        OldParStackVars = not_in_parallel_context,
+        ParStackVars = after_loop_control_scope([NewStackVars], set_of_var.init)
+    ;
+        OldParStackVars = after_loop_control_scope(StackVarsList, StackVarsAcc),
+        ParStackVars =
+            after_loop_control_scope([NewStackVars | StackVarsList],
+                StackVarsAcc)
+    ;
+        ( OldParStackVars = parallel_conjunction(_, _, _)
+        ; OldParStackVars = loop_control_scope(_, _)
+        ),
+        unexpected($module, $pred,
+            "Loop control scope found in other parallel context")
+    ).
+
+
+:- pred par_stack_vars_get_stackvars(parallel_stackvars::in,
+    set_of_progvar::out) is det.
+
+par_stack_vars_get_stackvars(not_in_parallel_context, set_of_var.init).
+par_stack_vars_get_stackvars(parallel_conjunction(_, StackVarss, _),
+        StackVars) :-
+    StackVars = set_of_var.union_list(StackVarss).
+par_stack_vars_get_stackvars(loop_control_scope(_, StackVars), StackVars).
+par_stack_vars_get_stackvars(after_loop_control_scope(_, StackVars),
+    StackVars).
+
+:- pred par_stack_vars_accumulate_stack_vars(set_of_progvar::in,
+    parallel_stackvars::in, parallel_stackvars::out) is det.
+
+par_stack_vars_accumulate_stack_vars(_,
+        not_in_parallel_context, not_in_parallel_context).
+par_stack_vars_accumulate_stack_vars(ForwardVars,
+        parallel_conjunction(Nonlocals, ParallelVars, AccStackVars0),
+        parallel_conjunction(Nonlocals, ParallelVars, AccStackVars)) :-
+    AccStackVars = AccStackVars0 `set_of_var.union`
+        (ForwardVars `set_of_var.difference` Nonlocals).
+par_stack_vars_accumulate_stack_vars(NewStackVars,
+        loop_control_scope(NonLocals, AccStackVars0),
+        loop_control_scope(NonLocals, AccStackVars)) :-
+    AccStackVars = AccStackVars0 `set_of_var.union` NewStackVars.
+par_stack_vars_accumulate_stack_vars(NewStackVars,
+        after_loop_control_scope(LocalStackVars, AccStackVars0),
+        after_loop_control_scope(LocalStackVars, AccStackVars)) :-
+    AccStackVars = AccStackVars0 `set_of_var.union` NewStackVars.
+
+:- pred par_stack_vars_get_nonlocals(parallel_stackvars::in,
+    set_of_progvar::out) is det.
+
+par_stack_vars_get_nonlocals(not_in_parallel_context, set_of_var.init).
+par_stack_vars_get_nonlocals(parallel_conjunction(NonLocals, _, _), NonLocals).
+par_stack_vars_get_nonlocals(loop_control_scope(NonLocals, _), NonLocals).
+par_stack_vars_get_nonlocals(after_loop_control_scope(_, _), set_of_var.init).
+
+:- pred par_stack_vars_next_par_conjunct(
+    parallel_stackvars::in, parallel_stackvars::out) is det.
+
+par_stack_vars_next_par_conjunct(!ParStackVars) :-
+    ( !.ParStackVars = parallel_conjunction(Nonlocals, PrevSets, CurSet) ->
+        !:ParStackVars =
+            parallel_conjunction(Nonlocals, [CurSet | PrevSets],
+                set_of_var.init)
+    ;
+        unexpected($module, $pred, "expected parallel_conjunction/3")
+    ).
+
+:- pred par_stack_vars_recursive_call(maybe(need_for_loop_control)::out,
+    parallel_stackvars::in, parallel_stackvars::out) is det.
+
+par_stack_vars_recursive_call(MaybeNeedLC, !ParStackVars) :-
+    (
+        ( !.ParStackVars = not_in_parallel_context
+        ; !.ParStackVars = parallel_conjunction(_, _, _)
+        ),
+        MaybeNeedLC = no
+    ;
+        !.ParStackVars = loop_control_scope(_, _),
+        unexpected($module, $pred, "recursive call in loop control scope")
+    ;
+        !.ParStackVars = after_loop_control_scope(StackVarsList0, StackVars),
+        StackVarsList = [StackVars | StackVarsList0],
+        cartesian_product_list(StackVarsList, NonoverlapSets),
+        MaybeNeedLC = yes(need_for_loop_control(NonoverlapSets)),
+        !:ParStackVars = not_in_parallel_context
+    ).
+
+%-----------------------------------------------------------------------------%
 :- end_module ll_backend.live_vars.
 %-----------------------------------------------------------------------------%
Index: compiler/par_conj_gen.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/par_conj_gen.m,v
retrieving revision 1.47
diff -u -p -b -r1.47 par_conj_gen.m
--- compiler/par_conj_gen.m	9 Oct 2011 09:56:20 -0000	1.47
+++ compiler/par_conj_gen.m	12 Oct 2011 00:37:00 -0000
@@ -340,23 +340,28 @@ generate_loop_control(Goal, LCVar, LCSVa
     save_variables_on_stack(to_sorted_list(InputVars), SaveCode, !CI),
 
     % Create the call to spawn_off.
-    place_var(LCVar, reg(reg_r, 1), PlaceLCVar, !CI),
-    place_var(LCSVar, reg(reg_r, 2), PlaceLCSVar, !CI),
     remember_position(!.CI, PositionBeforeSpawnOff),
 
     get_next_label(SpawnOffLabel, !CI),
+    best_variable_location_det(!.CI, LCVar, LCVarLocn),
+    best_variable_location_det(!.CI, LCSVar, LCSVarLocn),
     SpawnOffCallCode =
-        singleton(llds_instr(lc_spawn_off(lval(reg(reg_r, 1)),
-                lval(reg(reg_r, 2)), SpawnOffLabel),
+        singleton(llds_instr(lc_spawn_off(lval(LCVarLocn), lval(LCSVarLocn),
+                SpawnOffLabel),
             "Spawn off job for worker using loop control")),
-    SpawnOffCode = PlaceLCVar ++ PlaceLCSVar ++ SpawnOffCallCode,
+    SpawnOffCode = SpawnOffCallCode,
     remember_position(!.CI, PositionAfterSpawnOff),
 
     % Code to spawn off.
     LabelCode = singleton(llds_instr(label(SpawnOffLabel),
         "Label for spawned off code")),
     reset_to_position(PositionBeforeSpawnOff, !CI),
-    clear_all_registers(no, !CI),
+
+    % We don't need to clear all the registers, all the variables except for
+    % LC and LCS are considered to be on the stack.
+    % mark only the registers used by LC and LCS as clobbered.
+    clobber_regs([LCVarLocn, LCSVarLocn], !CI),
+
     generate_goal(model_det, Goal, GoalCode, !CI),
     % We expect that the join_and_terminate call is already in Goal.
     SpawnedOffCode0 = LabelCode ++ GoalCode,
@@ -376,6 +381,38 @@ generate_loop_control(Goal, LCVar, LCSVa
     % Concatentate the inline code.
     Code = SaveCode ++ SpawnOffCode.
 
+%----------------------------------------------------------------------------%
+
+:- pred best_variable_location_det(code_info::in, prog_var::in, lval::out)
+    is det.
+
+best_variable_location_det(CI, Var, Locn) :-
+    promise_equivalent_solutions [Locn] (
+        ( best_variable_location(CI, Var, LocnPrime) ->
+            Locn = LocnPrime
+        ;
+            unexpected($module, $pred, "Could not find location for variable")
+        )
+    ).
+
+:- pred best_variable_location(code_info::in, prog_var::in, lval::out)
+    is nondet.
+
+best_variable_location(CI, Var, Locn) :-
+    variable_locations(CI, Map),
+    map.search(Map, Var, AllLocns),
+    filter(lval_is_reg, AllLocns, RegLocns),
+    ( member(LocnPrime, RegLocns) ->
+        % Commit to register locations before trying any location.
+        LocnPrime = Locn
+    ;
+        member(Locn, AllLocns)
+    ).
+
+:- pred lval_is_reg(lval::in) is semidet.
+
+lval_is_reg(reg(_, _)).
+
 %-----------------------------------------------------------------------------%
 
     % In the code of parallel conjuncts we have to refer to stack slots in
Index: compiler/set_of_var.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/set_of_var.m,v
retrieving revision 1.5
diff -u -p -b -r1.5 set_of_var.m
--- compiler/set_of_var.m	23 Aug 2011 03:50:53 -0000	1.5
+++ compiler/set_of_var.m	12 Oct 2011 00:20:33 -0000
@@ -111,6 +111,12 @@
 :- pred divide_by_set(set_of_var(T)::in, set_of_var(T)::in,
     set_of_var(T)::out, set_of_var(T)::out) is det.
 
+:- pred cartesian_product(set_of_var(T)::in, set_of_var(T)::in,
+    list(set_of_var(T))::out) is det.
+
+:- pred cartesian_product_list(list(set_of_var(T))::in,
+    list(set_of_var(T))::out) is det.
+
 %---------------
 % Traversals.
 
@@ -296,6 +302,28 @@ divide(Pred, Set, InPart, OutPart) :-
 divide_by_set(DivideBySet, Set, InPart, OutPart) :-
     tree_bitset.divide_by_set(DivideBySet, Set, InPart, OutPart).   % MODULE
 
+cartesian_product(A, B, Product) :-
+    fold(cartesian_product2(A), B, [], Product).
+
+:- pred cartesian_product2(set_of_var(T)::in, var(T)::in,
+    list(set_of_var(T))::in, list(set_of_var(T))::out) is det.
+
+cartesian_product2(SetA, VarB, !Sets) :-
+    set_of_var.fold((pred(VarA::in, SetsI0::in, SetsI::out) is det :-
+            Set = set_of_var.list_to_set([VarA, VarB]),
+            SetsI = [Set | SetsI0]
+        ), SetA, !Sets).
+
+cartesian_product_list([], []).
+cartesian_product_list([FirstSet | OtherSets], Product) :-
+    list.foldl(cartesian_product_list2(FirstSet), OtherSets, [], Product).
+
+:- pred cartesian_product_list2(set_of_var(T)::in, set_of_var(T)::in,
+    list(set_of_var(T))::in, list(set_of_var(T))::out) is det.
+
+cartesian_product_list2(A, B, SetsAcc, Product ++ SetsAcc) :-
+    cartesian_product(A, B, Product).
+
 %---------------
 % Traversals.
 
Index: compiler/stack_alloc.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/stack_alloc.m,v
retrieving revision 1.32
diff -u -p -b -r1.32 stack_alloc.m
--- compiler/stack_alloc.m	16 Aug 2011 03:26:33 -0000	1.32
+++ compiler/stack_alloc.m	12 Oct 2011 00:26:55 -0000
@@ -63,7 +63,7 @@
 
 %-----------------------------------------------------------------------------%
 
-allocate_stack_slots_in_proc(ModuleInfo, proc(PredId, _ProcId), !ProcInfo) :-
+allocate_stack_slots_in_proc(ModuleInfo, proc(PredId, ProcId), !ProcInfo) :-
     initial_liveness(!.ProcInfo, PredId, ModuleInfo, Liveness0),
     module_info_pred_info(ModuleInfo, PredId, PredInfo),
     module_info_get_globals(ModuleInfo, Globals),
@@ -81,8 +81,8 @@ allocate_stack_slots_in_proc(ModuleInfo,
         OptNoReturnCalls),
     proc_info_get_vartypes(!.ProcInfo, VarTypes),
     build_dummy_type_array(ModuleInfo, VarTypes, DummyTypeArray, DummyVars),
-    AllocData = alloc_data(ModuleInfo, !.ProcInfo, TypeInfoLiveness,
-        OptNoReturnCalls, DummyTypeArray),
+    AllocData = alloc_data(ModuleInfo, !.ProcInfo, proc(PredId, ProcId),
+        TypeInfoLiveness, OptNoReturnCalls, DummyTypeArray),
     NondetLiveness0 = set_of_var.init,
     SimpleStackAlloc0 = stack_alloc(set.make_singleton_set(FailVars)),
     proc_info_get_goal(!.ProcInfo, Goal0),
@@ -126,7 +126,9 @@ allocate_stack_slots_in_proc(ModuleInfo,
 :- instance stack_alloc_info(stack_alloc) where [
     pred(at_call_site/4) is alloc_at_call_site,
     pred(at_resume_site/4) is alloc_at_resume_site,
-    pred(at_par_conj/4) is alloc_at_par_conj
+    pred(at_par_conj/4) is alloc_at_par_conj,
+    pred(at_recursive_call_for_loop_control/4) is
+        alloc_at_recursive_call_for_loop_control
 ].
 
 :- pred alloc_at_call_site(need_across_call::in, alloc_data::in,
@@ -169,6 +171,22 @@ alloc_at_par_conj(NeedParConj, AllocData
     LiveSets = set.insert(LiveSets0, StackVars),
     !:StackAlloc = stack_alloc(LiveSets).
 
+:- pred alloc_at_recursive_call_for_loop_control(need_for_loop_control::in,
+    alloc_data::in, stack_alloc::in, stack_alloc::out) is det.
+
+alloc_at_recursive_call_for_loop_control(NeedLC, AllocData, !StackAlloc) :-
+    NeedLC = need_for_loop_control(StackVarsSets),
+    list.foldl(set_for_loop_control(AllocData), StackVarsSets, !StackAlloc).
+
+:- pred set_for_loop_control(alloc_data::in, set_of_progvar::in,
+    stack_alloc::in, stack_alloc::out) is det.
+
+set_for_loop_control(AllocData, Set0, !StackAlloc) :-
+    !.StackAlloc = stack_alloc(LiveSets0),
+    filter_out_dummy_vars(AllocData, Set0, Set),
+    LiveSets = set.insert(LiveSets0, Set),
+    !:StackAlloc = stack_alloc(LiveSets).
+
 :- pred filter_out_dummy_vars(alloc_data::in,
     set_of_progvar::in, set_of_progvar::out) is det.
 
Index: compiler/stack_opt.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/stack_opt.m,v
retrieving revision 1.55
diff -u -p -b -r1.55 stack_opt.m
--- compiler/stack_opt.m	22 Jul 2011 03:31:41 -0000	1.55
+++ compiler/stack_opt.m	12 Oct 2011 00:26:55 -0000
@@ -127,10 +127,15 @@
 % the set of vars that definitely need their own stack slots, and which this
 % optimization should not try to make reachable from a heap cell. At the
 % moment, the only variables we treat this way are those that are required to
-% be on the stack by a parallel conjunction.
+% be on the stack by a parallel conjunction or loop control scope.
 
 :- type opt_stack_alloc
     --->    opt_stack_alloc(
+                % XXX: this is an over-simplification, it gives stack slots to
+                % variables that may not need them.  For example, vars local to
+                % loop control scopes and parallel conjunctions, And vars used
+                % after a loop control scope but before a recursive call don't
+                % need to be placed here.
                 par_conj_own_slots      :: set_of_progvar
             ).
 
@@ -179,8 +184,8 @@ stack_opt_cell(PredProcId, !ProcInfo, !M
     globals.lookup_bool_option(Globals, opt_no_return_calls,
         OptNoReturnCalls),
     array.init(1, is_not_dummy_type, DummyDummyTypeArray),
-    AllocData = alloc_data(!.ModuleInfo, !.ProcInfo, TypeInfoLiveness,
-        OptNoReturnCalls, DummyDummyTypeArray),
+    AllocData = alloc_data(!.ModuleInfo, !.ProcInfo, PredProcId,
+        TypeInfoLiveness, OptNoReturnCalls, DummyDummyTypeArray),
     fill_goal_id_slots_in_proc(!.ModuleInfo, _, !ProcInfo),
     proc_info_get_goal(!.ProcInfo, Goal2),
     OptStackAlloc0 = init_opt_stack_alloc,
@@ -323,7 +328,9 @@ optimize_live_sets(ModuleInfo, OptAlloc,
 :- instance stack_alloc_info(opt_stack_alloc) where [
     pred(at_call_site/4) is opt_at_call_site,
     pred(at_resume_site/4) is opt_at_resume_site,
-    pred(at_par_conj/4) is opt_at_par_conj
+    pred(at_par_conj/4) is opt_at_par_conj,
+    pred(at_recursive_call_for_loop_control/4) is
+        opt_at_recursive_call_for_loop_control
 ].
 
 :- pred opt_at_call_site(need_across_call::in, alloc_data::in,
@@ -345,6 +352,16 @@ opt_at_par_conj(NeedParConj, _AllocData,
     ParConjOwnSlots = set_of_var.union(StackVars, ParConjOwnSlots0),
     !StackAlloc ^ par_conj_own_slots := ParConjOwnSlots.
 
+:- pred opt_at_recursive_call_for_loop_control(need_for_loop_control::in,
+    alloc_data::in, opt_stack_alloc::in, opt_stack_alloc::out) is det.
+
+opt_at_recursive_call_for_loop_control(NeedLC, _AllocData, !StackAlloc) :-
+    NeedLC = need_for_loop_control(StackVarsSets),
+    StackVars = set_of_var.union_list(StackVarsSets),
+    ParConjOwnSlots0 = !.StackAlloc ^ par_conj_own_slots,
+    ParConjOwnSlots = set_of_var.union(StackVars, ParConjOwnSlots0),
+    !StackAlloc ^ par_conj_own_slots := ParConjOwnSlots.
+
 %-----------------------------------------------------------------------------%
 %-----------------------------------------------------------------------------%
 
Index: compiler/tupling.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/tupling.m,v
retrieving revision 1.66
diff -u -p -b -r1.66 tupling.m
--- compiler/tupling.m	26 Sep 2011 07:08:56 -0000	1.66
+++ compiler/tupling.m	12 Oct 2011 00:26:55 -0000
@@ -886,7 +886,7 @@ prepare_proc_for_counting(PredProcId, !R
         globals.lookup_bool_option(Globals,
             opt_no_return_calls, OptNoReturnCalls),
         array.init(1, is_not_dummy_type, DummyDummyTypeArray),
-        AllocData = alloc_data(!.ModuleInfo, !.ProcInfo,
+        AllocData = alloc_data(!.ModuleInfo, !.ProcInfo, PredProcId,
             TypeInfoLiveness, OptNoReturnCalls, DummyDummyTypeArray),
         fill_goal_id_slots_in_proc(!.ModuleInfo, ContainingGoalMap, !ProcInfo),
         ReverseGoalPathMap = create_reverse_goal_path_map(ContainingGoalMap),
@@ -916,7 +916,9 @@ prepare_proc_for_counting(PredProcId, !R
 :- instance stack_alloc_info(opt_tuple_alloc) where [
     pred(at_call_site/4) is opt_at_call_site,
     pred(at_resume_site/4) is opt_at_resume_site,
-    pred(at_par_conj/4) is opt_at_par_conj
+    pred(at_par_conj/4) is opt_at_par_conj,
+    pred(at_recursive_call_for_loop_control/4) is
+        opt_at_recursive_call_for_loop_control
 ].
 
 :- pred opt_at_call_site(need_across_call::in, alloc_data::in,
@@ -934,6 +936,11 @@ opt_at_resume_site(_NeedAtResume, _Alloc
 
 opt_at_par_conj(_NeedParConj, _AllocData, !StackAlloc).
 
+:- pred opt_at_recursive_call_for_loop_control(need_for_loop_control::in,
+    alloc_data::in, opt_tuple_alloc::in, opt_tuple_alloc::out) is det.
+
+opt_at_recursive_call_for_loop_control(_NeedLC, _AllocData, !StackAlloc).
+
 %-----------------------------------------------------------------------------%
 
 :- pred count_load_stores_for_scc(trace_counts::in, tuning_params::in,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 490 bytes
Desc: Digital signature
URL: <http://lists.mercurylang.org/archives/reviews/attachments/20111012/61cd3007/attachment.sig>


More information about the reviews mailing list