[m-rev.] diff: Zoltan's refactoring of the auto-parallelism code.

Paul Bone pbone at csse.unimelb.edu.au
Fri Mar 30 15:06:20 AEDT 2012



Zoltan had been working on some changes to the automatic parallelisation code.
he's passed me his diffs and I intend to debug and commit them.  This is the
first such commit.

These changes are purely refactoring/tidy-up changes.  Since Zoltan and I have
both seen them I don't think a review is necessary. 

deep_profiler/autopar_calc_overlap.m:
    Refactor the calculate_parallel_cost predicate's signature.

deep_profiler/autopar_find_best_par.m:
    Rename some symbols to give them more accurate names.

deep_profiler/autopar_search_callgraph.m:
    Refactor some code, making it more succinct.

    Move pard_goal_detail_to_pard_goal into autpar_types.m

deep_profiler/autopar_search_goals.m:
    Refactoring.

    Conform to changes in deep_profiler/autopar_find_best_par.m

deep_profiler/autopar_types.m:
    Move pard_goal_detail_to_pard_goal here from autopar_search_callgraph.m

    Refactoring.

Index: deep_profiler/autopar_calc_overlap.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/autopar_calc_overlap.m,v
retrieving revision 1.3
diff -u -p -b -r1.3 autopar_calc_overlap.m
--- deep_profiler/autopar_calc_overlap.m	3 May 2011 04:35:00 -0000	1.3
+++ deep_profiler/autopar_calc_overlap.m	30 Mar 2012 03:46:56 -0000
@@ -19,15 +19,16 @@
 
 :- import_module mdprof_fb.automatic_parallelism.autopar_types.
 
-    % calculate_parallel_cost(Info, !Parallelisation).
+    % calculate_parallel_cost(Info, !Parallelisation, CostData).
     %
     % Analyse the parallel conjuncts and determine their likely performance.
     %
     % This is the new parallel execution overlap algorithm, it is general and
     % therefore we also use it for independent conjunctions.
     %
-:- pred calculate_parallel_cost(parallelisation_cost_data::out,
-    incomplete_parallelisation::in, incomplete_parallelisation::out) is det.
+:- pred calculate_parallel_cost(implicit_parallelism_info::in,
+    incomplete_parallelisation::in, incomplete_parallelisation::out,
+    parallelisation_cost_data::out) is det.
 
 %-----------------------------------------------------------------------------%
 %-----------------------------------------------------------------------------%
@@ -57,7 +58,7 @@
 
 %----------------------------------------------------------------------------%
 
-calculate_parallel_cost(CostData, !Parallelisation) :-
+calculate_parallel_cost(Info, !Parallelisation, CostData) :-
     ParConj = ip_get_par_conjs(!.Parallelisation),
     NumCalls = !.Parallelisation ^ ip_num_calls,
 
@@ -70,7 +71,6 @@ calculate_parallel_cost(CostData, !Paral
         (func(P0, MaybeCost) = P0 ^ ip_maybe_goals_after_cost := MaybeCost),
         ip_get_goals_after, CostAfterPercall, NumCalls, !Parallelisation),
 
-    Info = !.Parallelisation ^ ip_info,
     Opts = Info ^ ipi_opts,
     SparkCost = Opts ^ cpcp_sparking_cost,
     SparkDelay = Opts ^ cpcp_sparking_delay,
@@ -610,8 +610,8 @@ var_first_use_time(FindProdOrCons, TimeB
         UseTime = Use ^ vui_cost_until_use
     ;
         UseType = var_use_other,
-        % The analysis didn't recognise the instantiation here, so use a
-        % conservative default for the production time.
+        % The analysis didn't recognise the instantiation here,
+        % so use a conservative default for the production time.
         % XXX: How often does this occur?
         (
             FindProdOrCons = find_production,
Index: deep_profiler/autopar_find_best_par.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/autopar_find_best_par.m,v
retrieving revision 1.5
diff -u -p -b -r1.5 autopar_find_best_par.m
--- deep_profiler/autopar_find_best_par.m	6 May 2011 05:03:27 -0000	1.5
+++ deep_profiler/autopar_find_best_par.m	30 Mar 2012 03:46:56 -0000
@@ -27,18 +27,18 @@
 :- import_module list.
 :- import_module maybe.
 
-:- type best_parallelisation
-    --->    bp_parallel_execution(
-                bp_goals_before         :: list(pard_goal_detail),
-                bp_par_conjs            :: list(seq_conj(pard_goal_detail)),
-                bp_goals_after          :: list(pard_goal_detail),
-                bp_is_dependent         :: conjuncts_are_dependent,
-                bp_par_exec_metrics     :: parallel_exec_metrics
+:- type full_parallelisation
+    --->    fp_parallel_execution(
+                fp_goals_before         :: list(pard_goal_detail),
+                fp_par_conjs            :: list(seq_conj(pard_goal_detail)),
+                fp_goals_after          :: list(pard_goal_detail),
+                fp_is_dependent         :: conjuncts_are_dependent,
+                fp_par_exec_metrics     :: parallel_exec_metrics
             ).
 
 :- pred find_best_parallelisation(implicit_parallelism_info::in,
     program_location::in, list(pard_goal_detail)::in,
-    maybe(best_parallelisation)::out,
+    maybe(full_parallelisation)::out,
     cord(message)::in, cord(message)::out) is det.
 
 %-----------------------------------------------------------------------------%
@@ -208,7 +208,7 @@ preprocess_conjunction(Goals, MaybeGoals
     identify_costly_goals(Goals, 0, CostlyGoalsIndexes),
     (
         CostlyGoalsIndexes = [FirstCostlyGoalIndexPrime | OtherIndexes],
-        last(OtherIndexes, LastCostlyGoalIndexPrime)
+        list.last(OtherIndexes, LastCostlyGoalIndexPrime)
     ->
         FirstCostlyGoalIndex = FirstCostlyGoalIndexPrime,
         LastCostlyGoalIndex = LastCostlyGoalIndexPrime
@@ -224,7 +224,7 @@ preprocess_conjunction(Goals, MaybeGoals
         ; Detism = cc_multidet_rep
         ),
 
-        % Phase 3: Process the middle section into groups.
+        % Phase 4: Process the middle section into groups.
         foldl_sub_array(preprocess_conjunction_into_groups, GoalsArray,
             FirstCostlyGoalIndex, LastCostlyGoalIndex, [], RevGoalGroups),
         list.reverse(RevGoalGroups, GoalGroups),
@@ -269,10 +269,10 @@ build_dependency_graph([PG | PGs], ConjN
     InstMapInfo = PG ^ goal_annotation ^ pgd_inst_map_info,
 
     % For each variable consumed by a goal we find out which goals instantiate
-    % that variable and add them as it's dependencies along with their
+    % that variable and add them as its dependencies along with their
     % dependencies.  NOTE: We only consider variables that are read
-    % and not those that are set.  This is safe because we only bother
-    % analysing single assignment code.
+    % and not those that are set. This is safe because we only analyse
+    % single assignment code.
     RefedVars = InstMapInfo ^ im_consumed_vars,
     digraph.add_vertex(ConjNum, ThisConjKey, !Graph),
     MaybeAddEdge = ( pred(RefedVar::in, GraphI0::in, GraphI::out) is det :-
@@ -286,9 +286,9 @@ build_dependency_graph([PG | PGs], ConjN
     ),
     list.foldl(MaybeAddEdge, set.to_sorted_list(RefedVars), !Graph),
 
-    % For each variable instantiated by this goal add it to the VarDepMap with
-    % this goal as it's instantiator.  That is a maping from the variable to
-    % the conj num.
+    % For each variable instantiated by this goal, add it to the VarDepMap
+    % with this goal as its instantiator. That is a mapping from the variable
+    % to the conj num.
     InstVars = InstMapInfo ^ im_bound_vars,
     InsertForConjNum = ( pred(InstVar::in, MapI0::in, MapI::out) is det :-
         map.det_insert(InstVar, ConjNum, MapI0, MapI)
@@ -381,7 +381,7 @@ preprocess_conjunction_into_groups(Index
     %
 :- pred find_best_parallelisation_complete_bnb(implicit_parallelism_info::in,
     program_location::in, best_par_algorithm_simple::in,
-    goals_for_parallelisation::in, maybe(best_parallelisation)::out) is det.
+    goals_for_parallelisation::in, maybe(full_parallelisation)::out) is det.
 
 find_best_parallelisation_complete_bnb(Info, Location, Algorithm,
         PreprocessedGoals, MaybeBestParallelisation) :-
@@ -389,10 +389,11 @@ find_best_parallelisation_complete_bnb(I
         io.format("D: Find best parallelisation for:\n%s\n",
             [s(LocationString)], !IO),
         location_to_string(1, Location, LocationCord),
-        string.append_list(list(LocationCord), LocationString),
+        string.append_list(cord.list(LocationCord), LocationString),
         NumGoals = size(PreprocessedGoals ^ gfp_goals),
         NumGoalsBefore = PreprocessedGoals ^ gfp_first_costly_goal,
-        NumGoalsAfter = NumGoals - PreprocessedGoals ^ gfp_last_costly_goal - 1,
+        NumGoalsAfter = NumGoals -
+            PreprocessedGoals ^ gfp_last_costly_goal - 1,
         NumGoalsMiddle = NumGoals - NumGoalsBefore - NumGoalsAfter,
         io.format("D: Problem size (before, middle, after): %d,%d,%d\n",
             [i(NumGoalsBefore), i(NumGoalsMiddle), i(NumGoalsAfter)], !IO),
@@ -424,28 +425,25 @@ find_best_parallelisation_complete_bnb(I
             ParalleliseDepConjs = do_not_parallelise_dep_conjs,
             % Try again to get the best dependent parallelisation.
             % This is used for guided parallelisation.
-            TempInfo = Info ^ ipi_opts ^ cpcp_parallelise_dep_conjs :=
+            UpdatedInfo = Info ^ ipi_opts ^ cpcp_parallelise_dep_conjs :=
                 parallelise_dep_conjs(estimate_speedup_by_overlap),
-            find_best_parallelisation_complete_bnb(TempInfo, Location,
+            find_best_parallelisation_complete_bnb(UpdatedInfo, Location,
                 Algorithm, PreprocessedGoals, MaybeBestParallelisation)
         )
     ).
 
-    % The objective function for the branch and bound search.
-    % This is ParTime + ParOverheads * 2.  That is we are willing to pay
-    % 1 unit of parallel overheads to get a 2 unit improvement
-    % of parallel execution time.
+    % Profiling information for an execution of the solver.
     %
-:- func parallelisation_get_objective_value(best_parallelisation) = float.
+:- func parallelisation_get_objective_value(full_parallelisation) = float.
 
 parallelisation_get_objective_value(Parallelisation) = Value :-
-    Metrics = Parallelisation ^ bp_par_exec_metrics,
+    Metrics = Parallelisation ^ fp_par_exec_metrics,
     Value = Metrics ^ pem_par_time +
         parallel_exec_metrics_get_overheads(Metrics) * 2.0.
 
 :- impure pred generate_parallelisations(implicit_parallelism_info::in,
     best_par_algorithm_simple::in, goals_for_parallelisation::in,
-    bnb_state(best_parallelisation)::in, best_parallelisation::out) is nondet.
+    bnb_state(full_parallelisation)::in, full_parallelisation::out) is nondet.
 
 generate_parallelisations(Info, Algorithm, GoalsForParallelisation,
         BNBState, BestParallelisation) :-
@@ -488,7 +486,7 @@ start_building_parallelisation(Info, Pre
     % Finalise the parallelisation.
     %
 :- pred finalise_parallelisation(incomplete_parallelisation::in,
-    best_parallelisation::out) is det.
+    full_parallelisation::out) is det.
 
 finalise_parallelisation(Incomplete, Best) :-
     GoalsBefore = ip_get_goals_before(Incomplete),
@@ -506,13 +504,13 @@ finalise_parallelisation(Incomplete, Bes
     Metrics = finalise_parallel_exec_metrics(Metrics0),
     par_conj_overlap_is_dependent(Overlap, IsDependent),
     ParConjs = ip_get_par_conjs(Incomplete),
-    Best = bp_parallel_execution(GoalsBefore, ParConjs,
+    Best = fp_parallel_execution(GoalsBefore, ParConjs,
         GoalsAfter, IsDependent, Metrics).
 
 %----------------------------------------------------------------------------%
 
 :- semipure pred add_goals_into_first_par_conj(
-    bnb_state(best_parallelisation)::in,
+    bnb_state(full_parallelisation)::in,
     incomplete_parallelisation::in, incomplete_parallelisation::out) is multi.
 
 add_goals_into_first_par_conj(BNBState, !Parallelisation) :-
@@ -533,7 +531,7 @@ add_goals_into_first_par_conj(BNBState, 
     ).
 
 :- semipure pred add_goals_into_last_par_conj(
-    bnb_state(best_parallelisation)::in,
+    bnb_state(full_parallelisation)::in,
     incomplete_parallelisation::in, incomplete_parallelisation::out) is multi.
 
 add_goals_into_last_par_conj(BNBState, !Parallelisation) :-
@@ -578,7 +576,7 @@ start_first_par_conjunct(!GoalGroups, !P
     ).
 
 :- impure pred generate_parallelisations_body(implicit_parallelism_info::in,
-    bnb_state(best_parallelisation)::in, best_par_algorithm_simple::in,
+    bnb_state(full_parallelisation)::in, best_par_algorithm_simple::in,
     list(goal_group(goal_classification))::in,
     incomplete_parallelisation::in, incomplete_parallelisation::out) is nondet.
 
@@ -688,29 +686,30 @@ should_expand_search(BNBState, Algorithm
     % Test the parallelisation against the best one known to the branch and
     % bound solver.
     %
-:- semipure pred test_parallelisation(bnb_state(best_parallelisation)::in,
+:- semipure pred test_parallelisation(bnb_state(full_parallelisation)::in,
     incomplete_parallelisation::in, incomplete_parallelisation::out)
     is semidet.
 
 test_parallelisation(BNBState, !Parallelisation) :-
-    calculate_parallel_cost(CostData, !Parallelisation),
     Info = !.Parallelisation ^ ip_info,
-    test_dependance(Info, CostData),
+    calculate_parallel_cost(Info, !Parallelisation, CostData),
+    test_dependence(Info, CostData),
     % XXX: We shouldn't need to finalize the parallelisation before testing it.
     % This is a limitation of the branch and bound module.
     finalise_parallelisation(!.Parallelisation, TestParallelisation),
     semipure test_incomplete_solution(BNBState, TestParallelisation).
 
-    % Score the parallelisation.
+    % Test the parallelisation against the best one known to the branch and
+    % bound solver.
     %
-:- pred score_parallelisation(bnb_state(best_parallelisation)::in,
+:- pred score_parallelisation(bnb_state(full_parallelisation)::in,
     maybe(float)::out,
     incomplete_parallelisation::in, incomplete_parallelisation::out) is det.
 
 score_parallelisation(BNBState, MaybeScore, !Parallelisation) :-
-    calculate_parallel_cost(CostData, !Parallelisation),
     Info = !.Parallelisation ^ ip_info,
-    ( test_dependance(Info, CostData) ->
+    calculate_parallel_cost(Info, !Parallelisation, CostData),
+    ( test_dependence(Info, CostData) ->
         finalise_parallelisation(!.Parallelisation, TestParallelisation),
         score_solution(BNBState, TestParallelisation, Score),
         MaybeScore = yes(Score)
@@ -718,13 +717,13 @@ score_parallelisation(BNBState, MaybeSco
         MaybeScore = no
     ).
 
-    % Test that the parallelisation includes dependant parallelism
+    % Test that the parallelisation includes dependent parallelism
     % only if permitted by the user.
     %
-:- pred test_dependance(implicit_parallelism_info::in,
+:- pred test_dependence(implicit_parallelism_info::in,
     parallelisation_cost_data::in) is semidet.
 
-test_dependance(Info, CostData) :-
+test_dependence(Info, CostData) :-
     Overlap = CostData ^ pcd_par_exec_overlap,
     ParalleliseDepConjs = Info ^ ipi_opts ^ cpcp_parallelise_dep_conjs,
     par_conj_overlap_is_dependent(Overlap, IsDependent),
@@ -742,7 +741,7 @@ par_conj_overlap_is_dependent(peo_conjun
     par_conj_overlap_is_dependent(Left, IsDependent0),
     (
         IsDependent0 = conjuncts_are_dependent(VarSetLeft),
-        VarSet = union(VarSet0, VarSetLeft),
+        VarSet = set.union(VarSet0, VarSetLeft),
         IsDependent = conjuncts_are_dependent(VarSet)
     ;
         IsDependent0 = conjuncts_are_independent,
@@ -758,6 +757,8 @@ par_conj_overlap_is_dependent(peo_conjun
 :- pred add_one_goal_into_first_par_conj(incomplete_parallelisation::in,
     incomplete_parallelisation::out) is det.
 
+%----------------------------------------------------------------------------%
+
 :- pred add_one_goal_into_last_par_conj(incomplete_parallelisation::in,
     incomplete_parallelisation::out) is det.
 
@@ -781,11 +782,11 @@ add_one_goal_into_last_par_conj(!Paralle
 :- mode foldl_sub_array(pred(in, in, in, out) is det,
     in, in, in, in, out) is det.
 
-foldl_sub_array(Pred, Array, Index, Last, !A) :-
+foldl_sub_array(Pred, Array, Index, Last, !Acc) :-
     ( Index =< Last ->
-        X = lookup(Array, Index),
-        Pred(Index, X, !A),
-        foldl_sub_array(Pred, Array, Index + 1, Last, !A)
+        array.lookup(Array, Index, X),
+        Pred(Index, X, !Acc),
+        foldl_sub_array(Pred, Array, Index + 1, Last, !Acc)
     ;
         true
     ).
Index: deep_profiler/autopar_search_callgraph.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/autopar_search_callgraph.m,v
retrieving revision 1.8
diff -u -p -b -r1.8 autopar_search_callgraph.m
--- deep_profiler/autopar_search_callgraph.m	27 Sep 2011 04:41:25 -0000	1.8
+++ deep_profiler/autopar_search_callgraph.m	30 Mar 2012 03:46:56 -0000
@@ -9,9 +9,9 @@
 % File: autopar_search_callgraph.m
 % Author: pbone.
 %
-% This module contains the code for analysing deep profiles of programs in
-% order to determine how best to automatically parallelise the program.  This
-% code is used by the mdprof_create_feedback tool.
+% This module contains the code for analysing deep profiles of programs
+% in order to determine how best to automatically parallelise the program.
+% This code is used by the mdprof_create_feedback tool.
 %
 %-----------------------------------------------------------------------------%
 
@@ -21,7 +21,6 @@
 :- import_module mdbcomp.
 :- import_module mdbcomp.feedback.
 :- import_module mdbcomp.feedback.automatic_parallelism.
-:- import_module mdprof_fb.automatic_parallelism.autopar_types.
 :- import_module message.
 :- import_module profile.
 
@@ -32,17 +31,9 @@
     % Build the candidate parallel conjunctions feedback information used for
     % implicit parallelism.
     %
-:- pred candidate_parallel_conjunctions(
-    candidate_par_conjunctions_params::in, deep::in, cord(message)::out,
-    feedback_info::in, feedback_info::out) is det.
-
-%-----------------------------------------------------------------------------%
-
-% XXX temporary exports
-
-:- pred pard_goal_detail_to_pard_goal(
-    candidate_par_conjunction(pard_goal_detail)::in,
-    pard_goal_detail::in, pard_goal::out) is det.
+:- pred candidate_parallel_conjunctions(candidate_par_conjunctions_params::in,
+    deep::in, cord(message)::out, feedback_info::in, feedback_info::out)
+    is det.
 
 %-----------------------------------------------------------------------------%
 %-----------------------------------------------------------------------------%
@@ -56,6 +47,7 @@
 :- import_module mdprof_fb.automatic_parallelism.autopar_annotate.
 :- import_module mdprof_fb.automatic_parallelism.autopar_costs.
 :- import_module mdprof_fb.automatic_parallelism.autopar_search_goals.
+:- import_module mdprof_fb.automatic_parallelism.autopar_types.
 :- import_module measurement_units.
 :- import_module measurements.
 :- import_module program_representation_utils.
@@ -97,7 +89,7 @@ candidate_parallel_conjunctions(Params, 
     % Do not descend into cliques cheaper than the threshold.
     deep_lookup_clique_index(Deep, Deep ^ root, RootCliquePtr),
     % The +1 here accounts for the cost of the pseudo call into the mercury
-    % runtime since it is modeled here as a call site that in reality
+    % runtime, since it is modeled here as a call site that in reality
     % does not exist.
     RootParallelism = no_parallelism,
     candidate_parallel_conjunctions_clique(Params, Deep,
@@ -112,54 +104,6 @@ candidate_parallel_conjunctions(Params, 
             ConjunctionsAssocList),
     put_feedback_data(CandidateParallelConjunctions, !Feedback).
 
-pard_goal_detail_to_pard_goal(CPC, !Goal) :-
-    IsDependent = CPC ^ cpc_is_dependent,
-    (
-        IsDependent = conjuncts_are_dependent(SharedVars)
-    ;
-        IsDependent = conjuncts_are_independent,
-        SharedVars = set.init
-    ),
-    transform_goal_rep(pard_goal_detail_annon_to_pard_goal_annon(SharedVars),
-        !Goal).
-
-:- pred pard_goal_detail_annon_to_pard_goal_annon(set(var_rep)::in,
-    pard_goal_detail_annotation::in, pard_goal_annotation::out) is det.
-
-pard_goal_detail_annon_to_pard_goal_annon(SharedVarsSet, PGD, PG) :-
-    CostPercall = goal_cost_get_percall(PGD ^ pgd_cost),
-    CostAboveThreshold = PGD ^ pgd_cost_above_threshold,
-    SharedVars = to_sorted_list(SharedVarsSet),
-
-    Coverage = PGD ^ pgd_coverage,
-    get_coverage_before_det(Coverage, Calls),
-    ( Calls > 0 ->
-        list.foldl(build_var_use_list(PGD ^ pgd_var_production_map),
-            SharedVars, [], Productions),
-        list.foldl(build_var_use_list(PGD ^ pgd_var_consumption_map),
-            SharedVars, [], Consumptions)
-    ;
-        Productions = [],
-        Consumptions = []
-    ),
-
-    PG = pard_goal_annotation(CostPercall, CostAboveThreshold, Productions,
-        Consumptions).
-
-:- pred build_var_use_list(map(var_rep, lazy(var_use_info))::in, var_rep::in,
-    assoc_list(var_rep, float)::in, assoc_list(var_rep, float)::out) is det.
-
-build_var_use_list(Map, Var, !List) :-
-    (
-        map.search(Map, Var, LazyUse),
-        read_if_val(LazyUse, Use)
-    ->
-        UseTime = Use ^ vui_cost_until_use,
-        !:List = [Var - UseTime | !.List]
-    ;
-        true
-    ).
-
 %----------------------------------------------------------------------------%
 %
 % Recurse the call graph searching for parallelisation opportunities.
@@ -191,7 +135,7 @@ candidate_parallel_conjunctions_clique(O
         MaybeFirstPDPtr = no,
         deep_lookup_clique_index(Deep, Deep ^ root, RootCliquePtr),
         ( CliquePtr = RootCliquePtr ->
-            % It's okay, this clique never has an entry procedure.
+            % It is okay, this clique never has an entry procedure.
             PDPtrs = OtherPDPtrs
         ;
             CliquePtr = clique_ptr(CliqueNum),
@@ -407,10 +351,10 @@ update_parallelism_available_conj(Conj, 
         ConjNum =< FirstConjunct + Length
     ->
         % The call into this clique gets parallelised by Conj.
-        % XXX: If we knew the parallelisation type used for Conj we can do this
-        % calculation more accurately.  For instance, if this is a loop, then
-        % we use as many cores as the loop has iterations.  (Except for dead
-        % time).
+        % XXX: If we knew the parallelisation type used for Conj, we could
+        % do this calculation more accurately. For instance, if this is a loop,
+        % then we use as many cores as the loop has iterations. (Except for
+        % dead time).
         Metrics = Conj ^ cpc_par_exec_metrics,
         CPUTime = parallel_exec_metrics_get_cpu_time(Metrics),
         DeadTime = Metrics ^ pem_first_conj_dead_time +
@@ -659,17 +603,13 @@ build_candidate_par_conjunction_maps(Pro
         CandidateProc0 = candidate_par_conjunctions_proc(VarTablePrime,
             PushGoals, CPCs0),
         CPCs = [Candidate | CPCs0],
-        ( VarTable = VarTablePrime ->
-            true
-        ;
-            unexpected($module, $pred, "var tables do not match")
-        )
+        expect(unify(VarTable, VarTablePrime), $module, $pred,
+            "var tables do not match")
     ;
         CPCs = [Candidate],
         PushGoals = []
     ),
-    CandidateProc = candidate_par_conjunctions_proc(VarTable, PushGoals,
-        CPCs),
+    CandidateProc = candidate_par_conjunctions_proc(VarTable, PushGoals, CPCs),
     map.set(ProcLabel, CandidateProc, !Map).
 
 %----------------------------------------------------------------------------%
Index: deep_profiler/autopar_search_goals.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/autopar_search_goals.m,v
retrieving revision 1.6
diff -u -p -b -r1.6 autopar_search_goals.m
--- deep_profiler/autopar_search_goals.m	5 Dec 2011 05:34:34 -0000	1.6
+++ deep_profiler/autopar_search_goals.m	30 Mar 2012 03:46:56 -0000
@@ -9,7 +9,7 @@
 % File: autopar_search_goals.m
 % Authors: pbone, zs.
 %
-% This module contains the code for searching goals for conjunctions worth
+% This module contains the code for searching a goal for conjunctions worth
 % parallelising.
 %
 %-----------------------------------------------------------------------------%
@@ -42,8 +42,8 @@
     reverse_goal_path::in, goal_rep(goal_id)::in,
     pard_goal_detail::out, cord(message)::in, cord(message)::out) is det.
 
-    % Check if it is appropriate to parallelise this call. That is it must be
-    % model_det and have a cost above the call site cost threshold.
+    % Check if it is appropriate to parallelise this call. The call must be
+    % model_det, and must have a cost above the call site cost threshold.
     % XXX probable bug: the cost criterion is implemented elsewhere.
     %
 :- pred can_parallelise_goal(goal_rep(T)::in) is semidet.
@@ -58,7 +58,6 @@
 :- import_module mdprof_fb.automatic_parallelism.autopar_costs.
 :- import_module mdprof_fb.automatic_parallelism.autopar_find_best_par.
 :- import_module mdprof_fb.automatic_parallelism.autopar_reports.
-:- import_module mdprof_fb.automatic_parallelism.autopar_search_callgraph.
 :- import_module measurements.
 :- import_module program_representation_utils.
 :- import_module report.
@@ -332,8 +331,8 @@ conj_get_conjunctions_worth_parallelisin
         Costly = is_not_costly_goal,
         % This goal might be costly if it is pushed into the cotexted
         % of one of SinglesSoFar. This is common for recursive goals.
-        filter(single_context_makes_goal_costly(Info, Conj), SinglesSoFar0,
-            SinglesSoFarMakeConjCostly),
+        list.filter(single_context_makes_goal_costly(Info, Conj),
+            SinglesSoFar0, SinglesSoFarMakeConjCostly),
         (
             SinglesSoFarMakeConjCostly = []
         ;
@@ -533,18 +532,20 @@ merge_same_level_pushes(MainCandidate, [
     pard_goal_detail::in, list(pard_goal_detail)::in,
     reverse_goal_path::out, list(pard_goal_detail)::out) is det.
 
-push_goals_create_candidate(Info, RevCurPath, fgp_nil,
+push_goals_create_candidate(Info, RevCurPath, ForwardGoalPath,
         GoalToPushInto, GoalsToPush0, RevCandidateGoalPath, CandidateConjs) :-
+    (
+        ForwardGoalPath = fgp_nil,
     RevCandidateGoalPath = RevCurPath,
-    % The pushed goals will have different costs in this context, in particular
-    % the number of times they're called varies, This affects the per-call
-    % costs of recursive calls.
-    Calls = goal_cost_get_calls(GoalToPushInto ^ goal_annotation ^ pgd_cost),
+        % The pushed goals will have different costs in this context,
+        % in particular the number of times they're called varies. This affects
+        % the per-call costs of recursive calls.
+        Calls = goal_cost_get_calls(GoalToPushInto ^ goal_annotation
+            ^ pgd_cost),
     map(fix_goal_counts(Info, Calls), GoalsToPush0, GoalsToPush),
-    CandidateConjs = [GoalToPushInto | GoalsToPush].
-push_goals_create_candidate(Info, RevCurPath,
-        fgp_cons(FirstRelStep, TailRelPath),
-        GoalToPushInto, GoalsToPush0, RevCandidateGoalPath, CandidateConjs) :-
+            CandidateConjs = [GoalToPushInto | GoalsToPush]
+    ;
+        ForwardGoalPath = fgp_cons(FirstRelStep, TailRelPath),
     GoalToPushInto = goal_rep(GoalExpr, _, _),
     (
         FirstRelStep = step_conj(N),
@@ -554,12 +555,14 @@ push_goals_create_candidate(Info, RevCur
                 % Conjoin GoalsToPush not with just the expensive goal,
                 % but with the whole conjunction containing it.
                 RevCandidateGoalPath = RevCurPath,
-                % The pushed goals will have different costs in this context,
-                % in particular the number of times they're called varies, This
-                % affects the per-call costs of recursive calls.
+                    % The pushed goals will have different costs in this
+                    % context, in particular the number of times they're called
+                    % varies. This affects the per-call costs of recursive
+                    % calls.
                 Cost = GoalToPushInto ^ goal_annotation ^ pgd_cost,
                 Calls = goal_cost_get_calls(Cost),
-                map(fix_goal_counts(Info, Calls), GoalsToPush0, GoalsToPush),
+                    list.map(fix_goal_counts(Info, Calls),
+                        GoalsToPush0, GoalsToPush),
                 CandidateConjs = Goals ++ GoalsToPush
             ;
                 TailRelPath = fgp_cons(_, _),
@@ -570,19 +573,20 @@ push_goals_create_candidate(Info, RevCur
                         TailRelPath, SubGoal, GoalsToPush0,
                         RevCandidateGoalPath, CandidateConjs)
                 ;
-                    % We can't push goals into the non-last conjunct without
-                    % re-ordering, which is currently not supported.  By
-                    % building a conjunction here we may still be able to
-                    % create a worthwhile parallelisation.  However, there is a
-                    % trade-off to explore between this and not generating the
-                    % single expensive goal from within the conjunction and
-                    % therefore possibly finding other single expensive goals
-                    % later in this conjunction.
+                        % We can't push goals into a non-last conjunct without
+                        % reordering, which is currently not supported.
+                        % By building a conjunction here, we may still be able
+                        % to create a worthwhile parallelisation. However,
+                        % there is a trade-off to explore between this
+                        % and not generating the single expensive goal
+                        % from within the conjunction, and therefore possibly
+                        % finding other single expensive goals later in this
+                        % conjunction.
                     RevCandidateGoalPath = RevCurPath,
                     Cost = GoalToPushInto ^ goal_annotation ^ pgd_cost,
                     Calls = goal_cost_get_calls(Cost),
-                    map(fix_goal_counts(Info, Calls), GoalsToPush0,
-                        GoalsToPush),
+                            list.map(fix_goal_counts(Info, Calls),
+                                GoalsToPush0, GoalsToPush),
                     CandidateConjs = Goals ++ GoalsToPush
                 )
             )
@@ -666,6 +670,7 @@ push_goals_create_candidate(Info, RevCur
         FirstRelStep = step_atomic_orelse(_),
         % These should not exist in a profiled program.
         unexpected($module, $pred, "atomic_orelse")
+        )
     ).
 
 :- pred fix_goal_counts(implicit_parallelism_info::in, int::in,
@@ -705,7 +710,7 @@ pardgoals_build_candidate_conjunction(In
         FirstConjNum = 1,
         ParalleliseDepConjs = Info ^ ipi_opts ^ cpcp_parallelise_dep_conjs,
         SpeedupThreshold = Info ^ ipi_opts ^ cpcp_speedup_threshold,
-        BestParallelisation = bp_parallel_execution(GoalsBefore, ParConjs,
+        BestParallelisation = fp_parallel_execution(GoalsBefore, ParConjs,
             GoalsAfter, IsDependent, Metrics),
         Speedup = parallel_exec_metrics_get_speedup(Metrics),
         Calls = Metrics ^ pem_num_calls,
@@ -765,73 +770,76 @@ pardgoals_build_candidate_conjunction(In
 
 %-----------------------------------------------------------------------------%
 
-goal_to_pard_goal(Info, RevGoalPath, !Goal, !Messages) :-
-    !.Goal = goal_rep(GoalExpr0, Detism, GoalId),
+goal_to_pard_goal(Info, RevGoalPath, Goal, DetailGoal, !Messages) :-
+    Goal = goal_rep(GoalExpr, Detism, GoalId),
     InstMapInfo = get_goal_attribute_det(Info ^ ipi_inst_map_array, GoalId),
     Coverage = get_goal_attribute_det(Info ^ ipi_coverage_array, GoalId),
     get_coverage_before_det(Coverage, Before),
     (
         (
-            GoalExpr0 = conj_rep(Conjs0),
+            GoalExpr = conj_rep(Conjs),
             list.map_foldl2(conj_to_pard_goals(Info, RevGoalPath),
-                Conjs0, Conjs, 1, _, !Messages),
-            conj_calc_cost(Conjs, Before, Cost),
-            GoalExpr = conj_rep(Conjs)
+                Conjs, DetailConjs, 1, _, !Messages),
+            conj_calc_cost(DetailConjs, Before, Cost),
+            DetailGoalExpr = conj_rep(DetailConjs)
         ;
-            GoalExpr0 = disj_rep(Disjs0),
+            GoalExpr = disj_rep(Disjs),
             list.map_foldl2(disj_to_pard_goals(Info, RevGoalPath),
-                Disjs0, Disjs, 1, _, !Messages),
-            disj_calc_cost(Disjs, Before, Cost),
-            GoalExpr = disj_rep(Disjs)
+                Disjs, DetailDisjs, 1, _, !Messages),
+            disj_calc_cost(DetailDisjs, Before, Cost),
+            DetailGoalExpr = disj_rep(DetailDisjs)
         ;
-            GoalExpr0 = switch_rep(Var, CanFail, Cases0),
+            GoalExpr = switch_rep(Var, CanFail, Cases),
             list.map_foldl2(case_to_pard_goal(Info, RevGoalPath),
-                Cases0, Cases, 1, _, !Messages),
-            switch_calc_cost(Cases, Before, Cost),
-            GoalExpr = switch_rep(Var, CanFail, Cases)
-        ;
-            GoalExpr0 = ite_rep(Cond0, Then0, Else0),
-            goal_to_pard_goal(Info, rgp_cons(RevGoalPath, step_ite_cond),
-                Cond0, Cond, !Messages),
-            goal_to_pard_goal(Info, rgp_cons(RevGoalPath, step_ite_then),
-                Then0, Then, !Messages),
-            goal_to_pard_goal(Info, rgp_cons(RevGoalPath, step_ite_else),
-                Else0, Else, !Messages),
-            ite_calc_cost(Cond, Then, Else, Cost),
-            GoalExpr = ite_rep(Cond, Then, Else)
+                Cases, DetailCases, 1, _, !Messages),
+            switch_calc_cost(DetailCases, Before, Cost),
+            DetailGoalExpr = switch_rep(Var, CanFail, DetailCases)
+        ;
+            GoalExpr = ite_rep(Cond, Then, Else),
+            CondRevGoalPath = rgp_cons(RevGoalPath, step_ite_cond),
+            ThenRevGoalPath = rgp_cons(RevGoalPath, step_ite_then),
+            ElseRevGoalPath = rgp_cons(RevGoalPath, step_ite_else),
+            goal_to_pard_goal(Info, CondRevGoalPath, Cond, DetailCond,
+                !Messages),
+            goal_to_pard_goal(Info, ThenRevGoalPath, Then, DetailThen,
+                !Messages),
+            goal_to_pard_goal(Info, ElseRevGoalPath, Else, DetailElse,
+                !Messages),
+            ite_calc_cost(DetailCond, DetailThen, DetailElse, Cost),
+            DetailGoalExpr = ite_rep(DetailCond, DetailThen, DetailElse)
         ;
-            GoalExpr0 = negation_rep(SubGoal0),
-            goal_to_pard_goal(Info, rgp_cons(RevGoalPath, step_neg),
-                SubGoal0, SubGoal, !Messages),
-            Cost = SubGoal ^ goal_annotation ^ pgd_cost,
-            GoalExpr = negation_rep(SubGoal)
+            GoalExpr = negation_rep(SubGoal),
+            SubRevGoalPath = rgp_cons(RevGoalPath, step_neg),
+            goal_to_pard_goal(Info, SubRevGoalPath, SubGoal, DetailSubGoal,
+                !Messages),
+            Cost = DetailSubGoal ^ goal_annotation ^ pgd_cost,
+            DetailGoalExpr = negation_rep(DetailSubGoal)
         ;
-            GoalExpr0 = scope_rep(SubGoal0, MaybeCut),
-            goal_to_pard_goal(Info,
-                rgp_cons(RevGoalPath, step_scope(MaybeCut)),
-                SubGoal0, SubGoal, !Messages),
-            Cost = SubGoal ^ goal_annotation ^ pgd_cost,
-            GoalExpr = scope_rep(SubGoal, MaybeCut)
+            GoalExpr = scope_rep(SubGoal, MaybeCut),
+            SubRevGoalPath = rgp_cons(RevGoalPath, step_scope(MaybeCut)),
+            goal_to_pard_goal(Info, SubRevGoalPath, SubGoal, DetailSubGoal,
+                !Messages),
+            Cost = DetailSubGoal ^ goal_annotation ^ pgd_cost,
+            DetailGoalExpr = scope_rep(DetailSubGoal, MaybeCut)
         ),
         PardGoalType = pgt_non_atomic_goal,
 
         BoundVars = to_sorted_list(InstMapInfo ^ im_bound_vars),
         list.foldl(
-            goal_build_use_map(!.Goal, RevGoalPath, Cost, Info,
+            goal_build_use_map(Goal, RevGoalPath, Cost, Info,
                 var_use_production),
             BoundVars, map.init, ProductionUseMap),
         ConsumedVars = to_sorted_list(InstMapInfo ^ im_consumed_vars),
         list.foldl(
-            goal_build_use_map(!.Goal, RevGoalPath, Cost, Info,
+            goal_build_use_map(Goal, RevGoalPath, Cost, Info,
                 var_use_consumption),
             ConsumedVars, map.init, ConsumptionUseMap)
     ;
-        GoalExpr0 = atomic_goal_rep(Context, Line, BoundVars, AtomicGoal),
         GoalExpr = atomic_goal_rep(Context, Line, BoundVars, AtomicGoal),
+        DetailGoalExpr = atomic_goal_rep(Context, Line, BoundVars, AtomicGoal),
         atomic_pard_goal_type(Info, RevGoalPath, AtomicGoal, InstMapInfo,
             PardGoalType, Messages),
-        atomic_pard_goal_cost(Info, RevGoalPath, Coverage, AtomicGoal,
-            Cost),
+        atomic_pard_goal_cost(Info, RevGoalPath, Coverage, AtomicGoal, Cost),
 
         list.foldl(
             atomic_goal_build_use_map(AtomicGoal, RevGoalPath, Info,
@@ -845,11 +853,10 @@ goal_to_pard_goal(Info, RevGoalPath, !Go
 
         !:Messages = !.Messages ++ Messages
     ),
-    % XXX: The goal annotations cannot represent reasons why a goal
-    % can't be parallelised, for example it could be nondet, semidet or
-    % impure.
+    % XXX: The goal annotations cannot represent reasons why a goal cannot be
+    % parallelised, for example it could be nondet, semidet or impure.
     (
-        can_parallelise_goal(!.Goal),
+        can_parallelise_goal(Goal),
         goal_cost_above_par_threshold(Info, Cost)
     ->
         CostAboveThreshold = cost_above_par_threshold
@@ -859,7 +866,7 @@ goal_to_pard_goal(Info, RevGoalPath, !Go
     PardGoalAnnotation = pard_goal_detail(PardGoalType, InstMapInfo,
         RevGoalPath, Coverage, Cost, CostAboveThreshold,
         ProductionUseMap, ConsumptionUseMap),
-    !:Goal = goal_rep(GoalExpr, Detism, PardGoalAnnotation).
+    DetailGoal = goal_rep(DetailGoalExpr, Detism, PardGoalAnnotation).
 
 :- pred goal_build_use_map(goal_rep(goal_id)::in,
     reverse_goal_path::in, goal_cost_csq::in, implicit_parallelism_info::in,
@@ -913,29 +920,27 @@ compute_goal_var_use_lazy(Goal, RevGoalP
     ).
 
 :- pred conj_to_pard_goals(implicit_parallelism_info::in,
-    reverse_goal_path::in, goal_rep(goal_id)::in,
-    pard_goal_detail::out, int::in, int::out,
-    cord(message)::in, cord(message)::out) is det.
+    reverse_goal_path::in, goal_rep(goal_id)::in, pard_goal_detail::out,
+    int::in, int::out, cord(message)::in, cord(message)::out) is det.
 
 conj_to_pard_goals(Info, RevGoalPath, !Goal, !ConjNum, !Messages) :-
-    goal_to_pard_goal(Info, rgp_cons(RevGoalPath, step_conj(!.ConjNum)),
-        !Goal, !Messages),
+    ConjRevGoalPath = rgp_cons(RevGoalPath, step_conj(!.ConjNum)),
+    goal_to_pard_goal(Info, ConjRevGoalPath, !Goal, !Messages),
     !:ConjNum = !.ConjNum + 1.
 
 :- pred disj_to_pard_goals(implicit_parallelism_info::in,
-    reverse_goal_path::in, goal_rep(goal_id)::in,
-    pard_goal_detail::out, int::in, int::out,
-    cord(message)::in, cord(message)::out) is det.
+    reverse_goal_path::in, goal_rep(goal_id)::in, pard_goal_detail::out,
+    int::in, int::out, cord(message)::in, cord(message)::out) is det.
 
 disj_to_pard_goals(Info, RevGoalPath, !Goal, !DisjNum, !Messages) :-
-    goal_to_pard_goal(Info, rgp_cons(RevGoalPath, step_disj(!.DisjNum)),
-        !Goal, !Messages),
+    DisjRevGoalPath = rgp_cons(RevGoalPath, step_disj(!.DisjNum)),
+    goal_to_pard_goal(Info, DisjRevGoalPath, !Goal, !Messages),
     !:DisjNum = !.DisjNum + 1.
 
 :- pred case_to_pard_goal(implicit_parallelism_info::in,
-    reverse_goal_path::in, case_rep(goal_id)::in,
-    case_rep(pard_goal_detail_annotation)::out, int::in, int::out,
-    cord(message)::in, cord(message)::out) is det.
+    reverse_goal_path::in,
+    case_rep(goal_id)::in, case_rep(pard_goal_detail_annotation)::out,
+    int::in, int::out, cord(message)::in, cord(message)::out) is det.
 
 case_to_pard_goal(Info, RevGoalPath, !Case, !CaseNum, !Messages) :-
     !.Case = case_rep(ConsId, OtherConsId, Goal0),
Index: deep_profiler/autopar_types.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/autopar_types.m,v
retrieving revision 1.2
diff -u -p -b -r1.2 autopar_types.m
--- deep_profiler/autopar_types.m	27 Jan 2011 08:03:53 -0000	1.2
+++ deep_profiler/autopar_types.m	30 Mar 2012 03:46:56 -0000
@@ -57,11 +57,11 @@
                 ipi_proc_label      :: string_proc_label
             ).
 
-    % A representation of a goal within a parallel conjunction.  We don't have
+    % A representation of a goal within a parallel conjunction. We do not have
     % to represent many types of goals or details about them, at least for now.
-    % This type provides more detail than feedback.pard_goal, this detail isn't
-    % required by the compiler and therefore not part of the feedback file
-    % format.
+    % This type provides more detail than feedback.pard_goal. This extra detail
+    % is not required by the compiler, and therefore it is not part of the
+    % feedback file format.
     %
 :- type pard_goal_detail == goal_rep(pard_goal_detail_annotation).
 
@@ -84,11 +84,17 @@
 
                 pgd_cost_above_threshold    :: cost_above_par_threshold,
 
-                % Variable production and consumption information.
-                pgd_var_production_map      :: map(var_rep, lazy(var_use_info)),
-                pgd_var_consumption_map     :: map(var_rep, lazy(var_use_info))
+                % Information about when the inputs of this goal are consumed,
+                % and when its outputs are produced, relative to the start of
+                % this goal. Since we expect that many inputs and outputs
+                % will not be shared variables, we do not compute this
+                % information unless and until we need it.
+                pgd_var_production_map      :: lazy_var_use_map,
+                pgd_var_consumption_map     :: lazy_var_use_map
             ).
 
+:- type lazy_var_use_map == map(var_rep, lazy(var_use_info)).
+
 :- type pard_goal_type
     --->    pgt_call(
                 % The argument modes and use information.
@@ -100,22 +106,9 @@
     ;       pgt_other_atomic_goal
     ;       pgt_non_atomic_goal.
 
-:- func ip_get_goals_before(incomplete_parallelisation) =
-    list(pard_goal_detail).
-
-:- func ip_get_goals_after(incomplete_parallelisation) =
-    list(pard_goal_detail).
-
-:- func ip_get_par_conjs(incomplete_parallelisation) =
-    list(seq_conj(pard_goal_detail)).
-
-:- func ip_get_num_goals(incomplete_parallelisation) = int.
-
-:- func ip_get_num_parallel_conjuncts(incomplete_parallelisation) = int.
-
-:- func ip_get_num_goals_middle(incomplete_parallelisation) = int.
-
-:- func ip_calc_sharedvars_set(incomplete_parallelisation) = set(var_rep).
+:- pred pard_goal_detail_to_pard_goal(
+    candidate_par_conjunction(pard_goal_detail)::in,
+    pard_goal_detail::in, pard_goal::out) is det.
 
     % Build sets of produced and consumed vars for a conjunct in a conjunction.
     % Use with foldl to build these sets up for the whole conjunction.  At the
@@ -199,11 +192,11 @@
                 ip_last_scheduled_goal      :: int,
 
                 % The index of the last goal in each of the parallel conjuncts.
-                % the very last parallel conjunct is donated by
+                % The very last parallel conjunct is denoted by
                 % ip_last_par_goal.
                 ip_par_conjs_rev_last_goal  :: list(int),
 
-                % The number of calls into this conjunction.
+                % The number of calls in this conjunction.
                 ip_num_calls                :: int,
 
                 % Dependency relationships between goals.
@@ -225,7 +218,7 @@
                 pcd_productions_map     :: map(var_rep, float)
             ).
 
-    % This datastructure represents the execution of dependent conjuncts,
+    % This data structure represents the execution of dependent conjuncts,
     % it tracks which variables are produced and consumed.
     %
     % TODO: Implement a pretty printer for this data.
@@ -256,22 +249,129 @@
                 dce_consumptions        :: map(var_rep, float)
             ).
 
+:- func ip_get_goals_before(incomplete_parallelisation) =
+    list(pard_goal_detail).
+
+:- func ip_get_goals_after(incomplete_parallelisation) =
+    list(pard_goal_detail).
+
+:- func ip_get_par_conjs(incomplete_parallelisation) =
+    list(seq_conj(pard_goal_detail)).
+
+:- func ip_get_num_goals(incomplete_parallelisation) = int.
+
+:- func ip_get_num_parallel_conjuncts(incomplete_parallelisation) = int.
+
+:- func ip_get_num_goals_middle(incomplete_parallelisation) = int.
+
+:- func ip_calc_sharedvars_set(incomplete_parallelisation) = set(var_rep).
+
 %-----------------------------------------------------------------------------%
 
 :- implementation.
 
 :- import_module int.
+:- import_module pair.
+
+%-----------------------------------------------------------------------------%
+
+pard_goal_detail_to_pard_goal(CPC, !Goal) :-
+    IsDependent = CPC ^ cpc_is_dependent,
+    (
+        IsDependent = conjuncts_are_dependent(SharedVars)
+    ;
+        IsDependent = conjuncts_are_independent,
+        SharedVars = set.init
+    ),
+    transform_goal_rep(pard_goal_detail_annon_to_pard_goal_annon(SharedVars),
+        !Goal).
+
+:- pred pard_goal_detail_annon_to_pard_goal_annon(set(var_rep)::in,
+    pard_goal_detail_annotation::in, pard_goal_annotation::out) is det.
+
+pard_goal_detail_annon_to_pard_goal_annon(SharedVarsSet, PGD, PG) :-
+    CostPercall = goal_cost_get_percall(PGD ^ pgd_cost),
+    CostAboveThreshold = PGD ^ pgd_cost_above_threshold,
+    SharedVars = set.to_sorted_list(SharedVarsSet),
+
+    Coverage = PGD ^ pgd_coverage,
+    get_coverage_before_det(Coverage, Calls),
+    ( Calls > 0 ->
+        list.foldl(build_var_use_list(PGD ^ pgd_var_production_map),
+            SharedVars, [], Productions),
+        list.foldl(build_var_use_list(PGD ^ pgd_var_consumption_map),
+            SharedVars, [], Consumptions)
+    ;
+        Productions = [],
+        Consumptions = []
+    ),
+    PG = pard_goal_annotation(CostPercall, CostAboveThreshold,
+        Productions, Consumptions).
+
+:- pred build_var_use_list(map(var_rep, lazy(var_use_info))::in, var_rep::in,
+    assoc_list(var_rep, float)::in, assoc_list(var_rep, float)::out) is det.
+
+build_var_use_list(Map, Var, !List) :-
+    (
+        map.search(Map, Var, LazyUse),
+        read_if_val(LazyUse, Use)
+    ->
+        UseTime = Use ^ vui_cost_until_use,
+        !:List = [Var - UseTime | !.List]
+    ;
+        true
+    ).
+
+%-----------------------------------------------------------------------------%
+
+%-----------------------------------------------------------------------------%
+
+conj_produced_and_consumed_vars(Conj, !Produced, !Consumed) :-
+    InstMapInfo = Conj ^ goal_annotation ^ pgd_inst_map_info,
+    !:Produced = set.union(!.Produced, InstMapInfo ^ im_bound_vars),
+    !:Consumed = set.union(!.Consumed, InstMapInfo ^ im_consumed_vars).
+
+%-----------------------------------------------------------------------------%
+
+identify_costly_goal(Annotation, Costly) :-
+    CostAboveThreshold = Annotation ^ pgd_cost_above_threshold,
+    (
+        CostAboveThreshold = cost_above_par_threshold,
+        % TODO: distinguish between compound goals with one costly branch,
+        % and compound goals where all branches are costly.
+        % TODO: Provide information about how many costly goals are within
+        % the goal so that we can try to parallelise each of those against
+        % an outer costly goal.
+        Costly = is_costly_goal
+    ;
+        CostAboveThreshold = cost_not_above_par_threshold,
+        Costly = is_not_costly_goal
+    ).
+
+identify_costly_goals([], _, []).
+identify_costly_goals([Goal | Goals], Index, Indexes) :-
+    identify_costly_goals(Goals, Index + 1, Indexes0),
+    identify_costly_goal(Goal ^ goal_annotation, Costly),
+    (
+        Costly = is_costly_goal,
+        Indexes = [Index | Indexes0]
+    ;
+        Costly = is_not_costly_goal,
+        Indexes = Indexes0
+    ).
+
+%-----------------------------------------------------------------------------%
 
 ip_get_goals_before(Parallelisation) = GoalsBefore :-
     Goals = Parallelisation ^ ip_goals,
     FirstParGoalIndex = Parallelisation ^ ip_first_par_goal,
-    fetch_items(Goals, 0, FirstParGoalIndex - 1, GoalsBefore).
+    array.fetch_items(Goals, 0, FirstParGoalIndex - 1, GoalsBefore).
 
 ip_get_goals_after(Parallelisation) = GoalsAfter :-
     Goals = Parallelisation ^ ip_goals,
     LastParGoalIndex = Parallelisation ^ ip_last_par_goal,
-    NumGoals = size(Goals),
-    fetch_items(Goals, LastParGoalIndex + 1, NumGoals - 1, GoalsAfter).
+    NumGoals = array.size(Goals),
+    array.fetch_items(Goals, LastParGoalIndex + 1, NumGoals - 1, GoalsAfter).
 
 ip_get_par_conjs(Incomplete) = ParConjs :-
     Goals = Incomplete ^ ip_goals,
@@ -291,10 +391,10 @@ ip_get_par_conjs_2(Array, First, [Last |
     fetch_items(Array, First, Last, Goals),
     Conj = seq_conj(Goals).
 
-ip_get_num_goals(Incomplete) = size(Incomplete ^ ip_goals).
+ip_get_num_goals(Incomplete) = array.size(Incomplete ^ ip_goals).
 
 ip_get_num_parallel_conjuncts(Incomplete) =
-    length(Incomplete ^ ip_par_conjs_rev_last_goal) + 1.
+    list.length(Incomplete ^ ip_par_conjs_rev_last_goal) + 1.
 
 ip_get_num_goals_middle(Incomplete) = LastParGoal - FirstParGoal + 1 :-
     FirstParGoal = Incomplete ^ ip_first_par_goal,
@@ -312,44 +412,10 @@ ip_calc_sharedvars_set(Incomplete) = Sha
 build_sharedvars_set(seq_conj(Conjs), !BoundVars, !SharedVars) :-
     list.foldl2(conj_produced_and_consumed_vars, Conjs,
         set.init, ProducedVars, set.init, ConsumedVars),
-    % The new shared vars are previously bound variables that are cosumed in
+    % The new shared vars are previously bound variables that are consumed in
     % this conjunct.  This must be calculated before !BoundVars is updated.
     SharedVars = set.intersect(!.BoundVars, ConsumedVars),
     !:SharedVars = set.union(!.SharedVars, SharedVars),
     !:BoundVars = set.union(!.BoundVars, ProducedVars).
 
-conj_produced_and_consumed_vars(Conj, !Produced, !Consumed) :-
-    InstMapInfo = Conj ^ goal_annotation ^ pgd_inst_map_info,
-    !:Produced = set.union(!.Produced, InstMapInfo ^ im_bound_vars),
-    !:Consumed = set.union(!.Consumed, InstMapInfo ^ im_consumed_vars).
-
-%-----------------------------------------------------------------------------%
-
-identify_costly_goal(Annotation, Costly) :-
-    CostAboveThreshold = Annotation ^ pgd_cost_above_threshold,
-    (
-        CostAboveThreshold = cost_above_par_threshold,
-        % TODO: distinguish between compound goals with one costly branch,
-        % and compound goals where all branches are costly.
-        % TODO: Provide information about how many costly goals are within
-        % the goal so that we can try to parallelise each of those against
-        % an outer costly goal.
-        Costly = is_costly_goal
-    ;
-        CostAboveThreshold = cost_not_above_par_threshold,
-        Costly = is_not_costly_goal
-    ).
-
-identify_costly_goals([], _, []).
-identify_costly_goals([Goal | Goals], Index, Indexes) :-
-    identify_costly_goals(Goals, Index + 1, Indexes0),
-    identify_costly_goal(Goal ^ goal_annotation, Costly),
-    (
-        Costly = is_costly_goal,
-        Indexes = [Index | Indexes0]
-    ;
-        Costly = is_not_costly_goal,
-        Indexes = Indexes0
-    ).
-
 %-----------------------------------------------------------------------------%
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 489 bytes
Desc: Digital signature
URL: <http://lists.mercurylang.org/archives/reviews/attachments/20120330/d2ecd436/attachment.sig>


More information about the reviews mailing list