[m-rev.] for post-commit review: Implicit Parallelism Analysis

Paul Bone pbone at csse.unimelb.edu.au
Mon Oct 20 17:23:05 AEDT 2008
Previous message: [m-rev.] For post-commit review: Variable use analysis for Deep Profiler.
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
For post-commit review by Zoltan.

Estimated hours taken: 27 
Branches: main

Perform implicit parallelism analysis in mdprof_feedback.  This calculates the
amount of parallelism available in dependant conjunctions and advises the
compiler how to parallelise code via the feedback system.

deep_profiler/mdprof_feedback.m:
	Implement implicit parallelisation analysis.

deep_profiler/program_representation_utils.m:
	Add a simple implementation of inst maps which are used by the implicit
	parallelisation analysis.  This implementation also tracks that variables
	that are required in order to instantiate a variable.
	Export some procedures used by the variable use analysis for use in the
	parallelisation analysis in mdprof_feedback.m
	Create an extra predicate to retrieve all the variables used by an atomic
	goal.
	Move utility code in this module to the end.

deep_profiler/report.m:
	Add utility function to convert cost_until_var_use values to raw values
	either since the beginning of the procedure or before the end.

mdbcomp/feedback.m:
	Modified the format of implicit parallelism feedback information.
	Incremented the feedback file format version.

mdbcomp/program_representation.m:
	Added a procedure to search for a variable name in a variable table and
	fail if it cannot find it.

Index: deep_profiler/mdprof_feedback.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/mdprof_feedback.m,v
retrieving revision 1.12
diff -u -p -b -r1.12 mdprof_feedback.m
--- deep_profiler/mdprof_feedback.m	3 Oct 2008 07:39:04 -0000	1.12
+++ deep_profiler/mdprof_feedback.m	20 Oct 2008 06:28:30 -0000
@@ -31,17 +31,22 @@
 :- implementation.
 
 :- import_module conf.
+:- import_module create_report.
 :- import_module mdbcomp.
 :- import_module mdbcomp.feedback.
 :- import_module mdbcomp.program_representation.
 :- import_module measurements.
 :- import_module profile.
+:- import_module program_representation_utils.
+:- import_module query. % For the cmd structure
+:- import_module report.
 :- import_module startup.
 
 :- import_module array.
+:- import_module assoc_list.
 :- import_module bool.
-:- import_module cord.
 :- import_module char.
+:- import_module cord.
 :- import_module float.
 :- import_module getopt.
 :- import_module int.
@@ -49,9 +54,13 @@
 :- import_module list.
 :- import_module map.
 :- import_module maybe.
+:- import_module pair.
+:- import_module pqueue.
 :- import_module require.
+:- import_module set.
 :- import_module string.
 :- import_module svmap.
+:- import_module svset.
 
 %-----------------------------------------------------------------------------%
 %
@@ -159,6 +168,7 @@ help_message =
     --desired-parallelism <value>
                 The amount of desired parallelism for implicit parallelism,
                 value must be a floating point number above 1.0.
+                Note: This option is currently ignored.
     --implicit-parallelism-sparking-cost <value>
                 The cost of creating a spark, measured in the deep profiler's
                 call sequence counts.
@@ -169,6 +179,9 @@ help_message =
     --implicit-parallelism-proc-cost-threshold <value>
                 The cost threshold for procedures to be considered for implicit
                 parallelism, measured on the profiler's call sequence counts.
+    --implicit-parallelism-call-site-cost-threshold <value>
+                The cost of a call site to be considered for parallelism
+                against another call site.
 
     The following options select specific types of feedback information
     and parameterise them:
@@ -255,7 +268,12 @@ read_deep_file(Input, Verbose, MaybeDeep
     ;       desired_parallelism
     ;       implicit_parallelism_sparking_cost
     ;       implicit_parallelism_locking_cost
-    ;       implicit_parallelism_proc_cost_threshold.
+    ;       implicit_parallelism_proc_cost_threshold
+    ;       implicit_parallelism_call_site_cost_threshold.
+
+% TODO: Introduce an option to disable parallelisation of dependant
+% conjunctions, or switch to the simple calculations for independant
+% conjunctions.
 
 :- pred short(char::in, option::out) is semidet.
 
@@ -284,6 +302,8 @@ long("implicit-parallelism-sparking-cost
 long("implicit-parallelism-locking-cost",   implicit_parallelism_locking_cost).
 long("implicit-parallelism-proc-cost-threshold", 
     implicit_parallelism_proc_cost_threshold).
+long("implicit-parallelism-call-site-cost-threshold",
+    implicit_parallelism_call_site_cost_threshold).
 
 :- pred defaults(option::out, option_data::out) is multi.
 
@@ -304,6 +324,7 @@ defaults(desired_parallelism,           
 defaults(implicit_parallelism_sparking_cost,        int(100)).
 defaults(implicit_parallelism_locking_cost,         int(100)).
 defaults(implicit_parallelism_proc_cost_threshold,  int(100000)).
+defaults(implicit_parallelism_call_site_cost_threshold,     int(50000)).
 
 :- pred construct_measure(string::in, stat_measure::out) is semidet.
 
@@ -333,7 +354,8 @@ construct_measure("median",     stat_med
                 cpc_desired_parallelism     :: float,
                 cpc_sparking_cost           :: int,
                 cpc_locking_cost            :: int,
-                cpc_threshold               :: int
+                cpc_proc_threshold          :: int,
+                cpc_call_site_threshold     :: int
             ).
 
     % Check all the command line options and return a well-typed representation
@@ -390,24 +412,29 @@ check_options(Options0, RequestedFeedbac
         lookup_string_option(Options, desired_parallelism,
             DesiredParallelismStr),
         (
-            string.to_float(DesiredParallelismStr, DesiredParallelism),
-            DesiredParallelism > 1.0
+            string.to_float(DesiredParallelismStr, DesiredParallelismPrime),
+            DesiredParallelismPrime > 1.0
         ->
-            CandidateParallelConjunctionsOpts ^ cpc_desired_parallelism =
-                DesiredParallelism
+            DesiredParallelism = DesiredParallelismPrime
         ;
             error("Invalid value for desired_parallelism: " ++ 
                 DesiredParallelismStr)
         ),
         lookup_int_option(Options, implicit_parallelism_sparking_cost,
             SparkingCost),
-        CandidateParallelConjunctionsOpts ^ cpc_sparking_cost = SparkingCost,
         lookup_int_option(Options, implicit_parallelism_locking_cost,
             LockingCost),
-        CandidateParallelConjunctionsOpts ^ cpc_locking_cost = LockingCost,
         lookup_int_option(Options, implicit_parallelism_proc_cost_threshold,
-            CPCThreshold),
-        CandidateParallelConjunctionsOpts ^ cpc_threshold = CPCThreshold,
+            CPCProcThreshold),
+        lookup_int_option(Options, 
+            implicit_parallelism_call_site_cost_threshold,
+            CPCCallSiteThreshold),
+        CandidateParallelConjunctionsOpts =
+            candidate_parallel_conjunctions_opts(DesiredParallelism, 
+                SparkingCost,
+                LockingCost,
+                CPCProcThreshold,
+                CPCCallSiteThreshold),
         MaybeCandidateParallelConjunctionsOpts =
             yes(CandidateParallelConjunctionsOpts)
     ;
@@ -449,13 +476,811 @@ set_option(Option, Value, !Options) :-
     feedback_info::in, feedback_info::out) is det.
 
 process_deep_to_feedback(RequestedFeedbackInfo, Deep, !Feedback) :-
-    MaybeCallsAboveThresholdSorted =
+    MaybeCallsAboveThresholdSortedOpts =
         RequestedFeedbackInfo ^ maybe_calls_above_threshold_sorted,
     (
-        MaybeCallsAboveThresholdSorted = yes(Opts),
-        css_list_above_threshold(Opts, Deep, !Feedback)
+        MaybeCallsAboveThresholdSortedOpts = 
+            yes(CallsAboveThresholdSortedOpts),
+        css_list_above_threshold(CallsAboveThresholdSortedOpts, Deep,
+            !Feedback)
+    ;
+        MaybeCallsAboveThresholdSortedOpts = no
+    ),
+    
+    MaybeCandidateParallelConjunctionsOpts =
+        RequestedFeedbackInfo ^ maybe_candidate_parallel_conjunctions,
+    (
+        MaybeCandidateParallelConjunctionsOpts = 
+            yes(CandidateParallelConjunctionsOpts),
+        candidate_parallel_conjunctions(CandidateParallelConjunctionsOpts,
+            Deep, !Feedback)
     ;
-        MaybeCallsAboveThresholdSorted = no
+        MaybeCandidateParallelConjunctionsOpts = no
+    ).
+
+%----------------------------------------------------------------------------%
+%
+% Build the candidate parallel conjunctions feedback information used for
+% implicit parallelism.
+%
+
+%
+% This doesn't follow higher order or method calls yet it may be possible to
+% follow all the higher order and method calls seen during profiling and
+% average their statistics based on their execution probability (weight).
+%
+
+:- pred candidate_parallel_conjunctions(
+    candidate_parallel_conjunctions_opts::in, deep::in,
+    feedback_info::in, feedback_info::out) is det.
+
+candidate_parallel_conjunctions(Opts, Deep, !Feedback) :-
+    Opts = candidate_parallel_conjunctions_opts(DesiredParallelism,
+        SparkingCost, LockingCost, ProcThreshold, _CallSiteThreshold), 
+    % First retrieve the top procedures above the configured threshold.
+    % This is done 'overall' not 'per_call' because parallelising a procedure
+    % that is used a lot is more beneficial than parallelising a procedure that
+    % is used once when the more-frequently used function accounts for a higher
+    % amount of the programs runtime.  It may be disqualified later if per call
+    % it has very little benefit.
+    % TODO: don't bother using the 'create_report' interface to get this
+    % report, we should call it directly instead.
+    TopProcsCmd = deep_cmd_top_procs(threshold_value(float(ProcThreshold)),
+        cost_callseqs, self_and_desc, overall),
+    create_report(TopProcsCmd, Deep, TopProcsReport),
+    ( TopProcsReport = report_top_procs(MaybeTopProcsReport) ->
+        (
+            MaybeTopProcsReport = ok(TopProcs),
+            TopProcsList = TopProcs ^ tp_top_procs
+        ;
+            MaybeTopProcsReport = error(TopProcsReportError),
+            error(TopProcsReportError)
+        )
+    ;
+        error("create_report gave incorrect report, expected top_procs_report")
+    ),
+    
+    % Take the top procs list and look for conjunctions that can be
+    % parallelised and give an estimated speedup when parallelised.  There may
+    % be more than one opportunity for parallelism in any given procedure.
+    list.map(
+        candidate_parallel_conjunctions_proc(Opts, Deep), 
+        TopProcsList, Conjunctions0),
+    list.condense(Conjunctions0, Conjunctions),
+
+    % XXX: Analysing the clique tree to reduce the amount of nested parallel
+    % execution should be done here.
+    
+    CandidateParallelConjunctions =
+        feedback_data_candidate_parallel_conjunctions(DesiredParallelism,
+        SparkingCost, LockingCost, Conjunctions),
+    put_feedback_data(CandidateParallelConjunctions, !Feedback).
+
+:- type implicit_parallelism_info
+    --->    implicit_parallelism_info(
+                ipi_deep        :: deep,
+                ipi_progrep     :: prog_rep,
+                ipi_opts        :: candidate_parallel_conjunctions_opts,
+                ipi_call_sites  :: map(goal_path, call_site_perf),
+                ipi_var_table   :: var_table
+            ).
+
+    % Find candidate parallel conjunctions within the given procedure.
+    %
+:- pred candidate_parallel_conjunctions_proc(
+    candidate_parallel_conjunctions_opts::in, deep::in,
+    perf_row_data(proc_desc)::in,
+    assoc_list(string_proc_label, candidate_par_conjunction)::out) is det.
+
+candidate_parallel_conjunctions_proc(Opts, Deep, PrefRowData, 
+        Candidates) :-
+    % Lookup the proc static to find the ProcLabel.
+    PSPtr = PrefRowData ^ perf_row_subject ^ pdesc_ps_ptr,  
+    deep_lookup_proc_statics(Deep, PSPtr, PS),
+    ProcLabel = PS ^ ps_id,
+    
+    % Make a proc query to retrieve cost information for the call sites within
+    % the procedure.
+    % TODO: As above: this can probably be called directly rather than going via
+    % create_report.
+    create_report(deep_cmd_proc(PSPtr), Deep, Report),
+    ( Report = report_proc(MaybeProcReport) ->
+        det_open_maybe_error(MaybeProcReport, ProcReport)
+    ;
+        error(this_file ++ "Recieved incorrect report for proc report query")
+    ),
+    list.foldl(add_call_site_perf_to_map,
+        ProcReport ^ proc_call_site_summaries, map.init, CallSitesMap),
+    MaybeProgRep = Deep ^ procrep_file,
+    (
+        MaybeProgRep = yes(MaybeProgRep1),
+        (
+            MaybeProgRep1 = ok(ProgRep)
+        ;
+            MaybeProgRep1 = error(Error),
+            error(this_file ++ Error)
+        )
+    ;
+        MaybeProgRep = no,
+        error(this_file ++ "Could not open Deep.procrep")
+    ),
+    ( progrep_search_proc(ProgRep, ProcLabel, ProcRep) ->
+        ProcRep ^ pr_defn = ProcDefnRep,
+        ProcDefnRep ^ pdr_goal = Goal,
+        ProcDefnRep ^ pdr_var_table = VarTable,
+        Info = implicit_parallelism_info(Deep, ProgRep, Opts, CallSitesMap,
+            VarTable),
+        goal_get_conjunctions_worth_parallelising(Goal, empty_goal_path, Info,
+            initial_inst_map(ProcDefnRep), _, Candidates0,
+            SeenDuplicateInstantiation, _),
+        (
+            SeenDuplicateInstantiation = seen_duplicate_instantiation,
+            Candidates = []
+        ;
+            SeenDuplicateInstantiation = have_not_seen_duplicate_instantiation,
+            list.map((pred(Candidate0::in, Candidate::out) is det :-
+                    Candidate = (ProcLabel - Candidate0)
+                ), Candidates0, Candidates)
+        )
+    ;
+        % Builtin procedures cannot be found in the program representation, and
+        % cannot be parallelised either.
+        Candidates = []
+    ).
+    
+:- pred goal_get_conjunctions_worth_parallelising(goal_rep::in, goal_path::in,
+    implicit_parallelism_info::in, inst_map::in, inst_map::out,
+    list(candidate_par_conjunction)::out, seen_duplicate_instantiation::out,
+    maybe_call_conjunct::out) is det.
+
+goal_get_conjunctions_worth_parallelising(Goal, GoalPath, Info, !InstMap,
+        Candidates, SeenDuplicateInstantiation, MaybeCallConjunct) :-
+    Goal = goal_rep(GoalExpr, Detism, _),
+    (
+        (
+            GoalExpr = conj_rep(Conjuncts),
+            conj_get_conjunctions_worth_parallelising(Conjuncts, GoalPath,
+                Info, !InstMap, Candidates, SeenDuplicateInstantiation)
+        ;
+            GoalExpr = disj_rep(Disjuncts),
+            disj_get_conjunctions_worth_parallelising(Disjuncts, GoalPath, 1,
+                Info, !InstMap, Candidates, SeenDuplicateInstantiation)
+        ;
+            GoalExpr = switch_rep(_, _, Cases),
+            switch_case_get_conjunctions_worth_parallelising(Cases, GoalPath, 1,
+                Info, !InstMap, Candidates, SeenDuplicateInstantiation)
+        ;
+            GoalExpr = ite_rep(Cond, Then, Else),
+            ite_get_conjunctions_worth_parallelising(Cond, Then, Else,
+                GoalPath, Info, !InstMap, Candidates,
+                SeenDuplicateInstantiation)
+        ;
+            GoalExpr = scope_rep(SubGoal, MaybeCut),
+            ScopeGoalPath = 
+                goal_path_add_at_end(GoalPath, step_scope(MaybeCut)),
+            goal_get_conjunctions_worth_parallelising(SubGoal, ScopeGoalPath,
+                Info, !InstMap, Candidates, SeenDuplicateInstantiation, _) 
+        ;
+            GoalExpr = negation_rep(SubGoal),
+            NegGoalPath = goal_path_add_at_end(GoalPath, step_neg),
+            goal_get_conjunctions_worth_parallelising(SubGoal, NegGoalPath,
+                Info, !InstMap, Candidates, SeenDuplicateInstantiation, _) 
+        ),
+        % TODO: Parallelising conjunctions like 
+        %   ( call1(A, B) , not call2(C, D) )
+        % may be easy to do when writing the compiler's part of the code, if so
+        % then MaybeCallAboveThreshold should probably be set from some of
+        % these non-atomic goals based on goals within them.
+        MaybeCallConjunct = non_atomic_goal 
+    ;
+        GoalExpr = atomic_goal_rep(_, _, BoundVars, AtomicGoal),
+        InstMapBeforeCall = !.InstMap,
+        % The binding of a variable may depend on any number of other
+        % variables, and of course the variables they depend upon.  Except that
+        % variables involved in control flow (switched on vars, vars in ITE
+        % conds) are not recorded here, but should be for completeness.
+        atomic_goal_get_vars(AtomicGoal, AtomicGoalVars0),
+        list.foldl((pred(Var::in, Set0::in, Set::out) is det :-
+                ( set.remove(Set0, Var, SetPrime) ->
+                    Set = SetPrime
+                ;
+                    Set = Set0
+                )
+            ), BoundVars, AtomicGoalVars0, AtomicGoalVars),
+        inst_map_ground_vars(BoundVars, AtomicGoalVars, !InstMap,
+            SeenDuplicateInstantiation),
+        maybe_call_site_conjunct(Info, GoalPath, AtomicGoal, Detism,
+            InstMapBeforeCall, !.InstMap, BoundVars, MaybeCallConjunct),
+        Candidates = []
+    ).
+
+:- pred conj_get_conjunctions_worth_parallelising(list(goal_rep)::in,
+    goal_path::in, implicit_parallelism_info::in, inst_map::in,
+    inst_map::out, list(candidate_par_conjunction)::out,
+    seen_duplicate_instantiation::out) is det.
+
+conj_get_conjunctions_worth_parallelising(Conjs, GoalPath, Info,
+        !InstMap, Candidates, SeenDuplicateInstantiation) :-
+    % Note: it will be better to look at each pair of conjuncts, determine if
+    % they are parallelisable (perhaps by placing middle goals in either of the
+    % the parallel conjuncts to create the optimum amount of parallelism.  This
+    % will need to have an in-order representation of goals, and for each
+    % variable seen have a tree of variables it depends upon.
+    %
+    % For now consider parallelising conjuncts that seperated only by other
+    % atomic goals.
+    conj_get_conjunctions_worth_parallelising_2(Conjs, GoalPath, 1, Info,
+        !InstMap, Candidates0, CallSiteConjuncts, 
+        SeenDuplicateInstantiation),
+    (
+        % Only perform analysis at this point if it's not going to be
+        % thrown away later due to unhandled use of partial instantiation.
+        SeenDuplicateInstantiation = have_not_seen_duplicate_instantiation,
+        build_candidate_conjunctions(Info, !.InstMap, GoalPath,
+            list(CallSiteConjuncts), pqueue.init, CandidatesQueue),
+        % Pick best candidate from queue.
+        ( pqueue.remove(CandidatesQueue, _, Candidate, _) ->
+            Candidates = [Candidate | Candidates0]
+        ;
+            Candidates = Candidates0
+        )
+    ;
+        SeenDuplicateInstantiation = seen_duplicate_instantiation,
+        Candidates = Candidates0
+    ). 
+
+:- pred conj_get_conjunctions_worth_parallelising_2(list(goal_rep)::in,
+    goal_path::in, int::in, implicit_parallelism_info::in, 
+    inst_map::in, inst_map::out, list(candidate_par_conjunction)::out,
+    cord(maybe_call_conjunct)::out,
+    seen_duplicate_instantiation::out) is det.
+
+conj_get_conjunctions_worth_parallelising_2([], _, _, _, !InstMap, [], cord.empty, 
+        have_not_seen_duplicate_instantiation).
+conj_get_conjunctions_worth_parallelising_2([Conj | Conjs], GoalPath,
+        ConjunctNum, Info, !InstMap, Candidates, CallSiteConjuncts,
+        SeenDuplicateInstantiation) :-
+    ConjGoalPath = goal_path_add_at_end(GoalPath, step_conj(ConjunctNum)),
+    goal_get_conjunctions_worth_parallelising(Conj, ConjGoalPath, Info,
+        !InstMap, CandidatesHead, SeenDuplicateInstantiationHead,
+        MaybeCallSiteConjunct), 
+    
+    conj_get_conjunctions_worth_parallelising_2(Conjs, GoalPath, ConjunctNum+1,
+        Info, !InstMap, CandidatesTail, CallSiteConjuncts0,
+        SeenDuplicateInstantiationTail),
+
+    Candidates = CandidatesHead ++ CandidatesTail,
+    CallSiteConjuncts = cord.cons(MaybeCallSiteConjunct, CallSiteConjuncts0),
+    SeenDuplicateInstantiation = merge_seen_duplicate_instantiation(
+        SeenDuplicateInstantiationHead,
+        SeenDuplicateInstantiationTail).
+
+:- pred disj_get_conjunctions_worth_parallelising(list(goal_rep)::in,
+    goal_path::in, int::in, implicit_parallelism_info::in, inst_map::in,
+    inst_map::out, list(candidate_par_conjunction)::out,
+    seen_duplicate_instantiation::out) is det.
+
+disj_get_conjunctions_worth_parallelising([], _, _, _, !InstMap, [],
+    have_not_seen_duplicate_instantiation).
+disj_get_conjunctions_worth_parallelising([Disj | Disjs], GoalPath, DisjNum,
+        Info, InstMap0, InstMap, Candidates, SeenDuplicateInstantiation) :-
+    DisjGoalPath = goal_path_add_at_end(GoalPath, step_disj(DisjNum)),
+    HeadDetism = Disj ^ goal_detism_rep,
+    goal_get_conjunctions_worth_parallelising(Disj, DisjGoalPath, Info,
+        InstMap0, HeadInstMap, HeadCandidates, HeadSeenDuplicateInstantiation, 
+        _MaybeCallConjunct),
+    disj_get_conjunctions_worth_parallelising(Disjs, GoalPath, DisjNum + 1,
+        Info, InstMap0, TailInstMap, TailCandidates,
+        TailSeenDuplicateInstantiation),
+    Candidates = HeadCandidates ++ TailCandidates,
+    % merge_inst_map requires the detism of goals that produce both inst maps,
+    % we can create fake values that satisfy merge_inst_map easily.
+    (
+        Disjs = [],
+        TailDetism = failure_rep
+    ;
+        Disjs = [_ | _],
+        TailDetism = det_rep
+    ),
+    InstMap = merge_inst_map(HeadInstMap, HeadDetism, TailInstMap, TailDetism),
+    SeenDuplicateInstantiation = merge_seen_duplicate_instantiation(
+        HeadSeenDuplicateInstantiation,
+        TailSeenDuplicateInstantiation).
+
+:- pred switch_case_get_conjunctions_worth_parallelising(list(case_rep)::in,
+    goal_path::in, int::in, implicit_parallelism_info::in, inst_map::in,
+    inst_map::out, list(candidate_par_conjunction)::out,
+    seen_duplicate_instantiation::out) is det.
+
+switch_case_get_conjunctions_worth_parallelising([], _, _, _, !InstMap, [],
+    have_not_seen_duplicate_instantiation).
+switch_case_get_conjunctions_worth_parallelising([Case | Cases], GoalPath,
+        CaseNum, Info, InstMap0, InstMap, Candidates,
+        SeenDuplicateInstantiation) :-
+    Case = case_rep(_, _, Goal),
+    HeadDetism = Goal ^ goal_detism_rep,
+    CaseGoalPath = goal_path_add_at_end(GoalPath, step_switch(CaseNum, no)),
+    goal_get_conjunctions_worth_parallelising(Goal, CaseGoalPath, Info,
+        InstMap0, HeadInstMap, HeadCandidates, HeadSeenDuplicateInstantiation, 
+        _MaybeCallConjs),
+    switch_case_get_conjunctions_worth_parallelising(Cases, GoalPath, 
+        CaseNum + 1, Info, InstMap0, TailInstMap, TailCandidates,
+        TailSeenDuplicateInstantiation),
+    Candidates = HeadCandidates ++ TailCandidates,
+    % merge_inst_map requires the detism of goals that produce both inst maps,
+    % we can create fake values that satisfy merge_inst_map easily.
+    (
+        Cases = [],
+        TailDetism = failure_rep
+    ;
+        Cases = [_ | _],
+        TailDetism = det_rep
+    ),
+    InstMap = merge_inst_map(HeadInstMap, HeadDetism, TailInstMap, TailDetism),
+    SeenDuplicateInstantiation = merge_seen_duplicate_instantiation(
+        HeadSeenDuplicateInstantiation,
+        TailSeenDuplicateInstantiation).
+
+:- pred ite_get_conjunctions_worth_parallelising(goal_rep::in, goal_rep::in,
+    goal_rep::in, goal_path::in, implicit_parallelism_info::in, inst_map::in,
+    inst_map::out, list(candidate_par_conjunction)::out,
+    seen_duplicate_instantiation::out) is det.
+
+ite_get_conjunctions_worth_parallelising(Cond, Then, Else, GoalPath, Info,
+        !InstMap, Candidates, SeenDuplicateInstantiation) :-
+    CondGoalPath = goal_path_add_at_end(GoalPath, step_ite_cond),
+    ThenGoalPath = goal_path_add_at_end(GoalPath, step_ite_then),
+    ElseGoalPath = goal_path_add_at_end(GoalPath, step_ite_else),
+    goal_get_conjunctions_worth_parallelising(Cond, CondGoalPath, Info,
+        !.InstMap, PostCondInstMap, CondCandidates,
+        CondSeenDuplicateInstantiation, _),
+    goal_get_conjunctions_worth_parallelising(Then, ThenGoalPath, Info, 
+        PostCondInstMap, PostThenInstMap, ThenCandidates,
+        ThenSeenDuplicateInstantiation, _),
+    goal_get_conjunctions_worth_parallelising(Else, ElseGoalPath, Info, 
+        PostCondInstMap, PostElseInstMap, ElseCandidates,
+        ElseSeenDuplicateInstantiation, _),
+    Candidates = CondCandidates ++ ThenCandidates ++ ElseCandidates,
+    (
+        CondSeenDuplicateInstantiation = have_not_seen_duplicate_instantiation,
+        ThenSeenDuplicateInstantiation = have_not_seen_duplicate_instantiation,
+        ElseSeenDuplicateInstantiation = have_not_seen_duplicate_instantiation
+    ->
+        SeenDuplicateInstantiation = have_not_seen_duplicate_instantiation
+    ;
+        SeenDuplicateInstantiation = seen_duplicate_instantiation
+    ),
+    ThenDetism = Then ^ goal_detism_rep,
+    ElseDetism = Else ^ goal_detism_rep,
+    !:InstMap = merge_inst_map(PostThenInstMap, ThenDetism, 
+        PostElseInstMap, ElseDetism).
+
+    % This type represents a goal, if the goal is a call extra information used
+    % for parallelising it with another call is provided.
+    %
+    % This is similar to candidate_parallel_conjunct, except that it stores
+    % information that's used for the rest of the implicit parallelism
+    % analysis.  It must contain information for the following.
+    %
+    %   - Average cost information for this call site,
+    %
+    %   - Enough information to resolve the procedure call, (detism and
+    %     argument modes).
+    %
+    %   - Information that can be used to determine if this is part of a
+    %     dependant conjunction, and its role in the dependant conjunction.
+    %
+    %   - Enough information so that a candidate_par_conjuct structure can be
+    %     constructed from it and the deep profiling info.
+    %
+:- type maybe_call_conjunct 
+    --->    call(
+                mccc_callee             :: maybe(pair(string, string)),
+                mccc_detism             :: detism_rep,
+                mccc_args               :: list(var_mode_and_use),
+                mccc_perf               :: call_site_perf
+            )
+    ;       trivial_atomic_goal(
+                mccag_detism            :: detism_rep,
+                mccag_bound_vars        :: list(var_rep)
+            )
+    ;       non_atomic_goal.
+
+:- inst call
+    --->    call(ground, ground, ground, ground).
+
+    % A variable, it's mode and it's usage in the callee.  The mode information
+    % is also summarised within the variable use information.
+    %
+:- type var_mode_and_use
+    --->    var_mode_and_use(
+                vmu_var                 :: var_rep,
+                vmu_mode                :: var_mode_rep,
+                vmu_use                 :: var_use_info
+            ).
+
+:- pred maybe_call_site_conjunct(implicit_parallelism_info::in, goal_path::in,
+    atomic_goal_rep::in, detism_rep::in, inst_map::in, inst_map::in,
+    list(var_rep)::in, maybe_call_conjunct::out) is det.
+
+maybe_call_site_conjunct(Info, GoalPath, AtomicGoal, Detism,
+        InstMapBefore, InstMapAfter, BoundVars, MaybeCallConjunct) :-
+    (
+        ( AtomicGoal = unify_construct_rep(_, _, _)
+        ; AtomicGoal = unify_deconstruct_rep(_, _, _)
+        ; AtomicGoal = partial_construct_rep(_, _, _)
+        ; AtomicGoal = partial_deconstruct_rep(_, _, _)
+        ; AtomicGoal = unify_assign_rep(_, _)
+        ; AtomicGoal = cast_rep(_, _)
+        ; AtomicGoal = unify_simple_test_rep(_, _)
+        % Don't bother parallelising foreign code, builtins or events.
+        ; AtomicGoal = pragma_foreign_code_rep(_)
+        ; AtomicGoal = builtin_call_rep(_, _, _)
+        ; AtomicGoal = event_call_rep(_, _)
+        ),
+        MaybeCallConjunct = trivial_atomic_goal(Detism, BoundVars) 
+    ;
+        ( AtomicGoal = higher_order_call_rep(_, Args)
+        ; AtomicGoal = method_call_rep(_, _, Args)
+        ; AtomicGoal = plain_call_rep(_, _, Args)
+        ),
+        (
+            ( AtomicGoal = higher_order_call_rep(_, _)
+            ; AtomicGoal = method_call_rep(_, _, _)
+            ),
+            MaybeCallee = no
+        ; 
+            AtomicGoal = plain_call_rep(ModuleName, CalleeName, _),
+            MaybeCallee = yes(ModuleName - CalleeName)
+        ),
+        map.lookup(Info ^ ipi_call_sites, GoalPath, CallSitePerf),
+        % Lookup var use information.
+        CallSiteKind = CallSitePerf ^ csf_kind,
+        (
+            CallSiteKind = normal_call_and_info(NormalCalleeId),
+            PSPtr = NormalCalleeId ^ nci_callee_desc ^ pdesc_ps_ptr,
+            Deep = Info ^ ipi_deep,
+            create_proc_var_use_dump_report(Deep, PSPtr,
+                MaybeVarUseDumpInfo),
+            MaybeVarUseDumpInfo = ok(VarUseDumpInfo)
+        ->
+            VarUseInfos = VarUseDumpInfo ^ pvui_var_uses, 
+            list.map_corresponding((pred(Arg::in, VarUseInfo::in, 
+                        VarModeAndUse::out) is det :-
+                    var_get_mode(InstMapBefore, InstMapAfter, Arg, ArgMode),
+                    VarModeAndUse = var_mode_and_use(Arg, ArgMode,
+                        VarUseInfo)
+                ), Args, VarUseInfos, VarModeAndUses)
+        ;
+            list.map((pred(Arg::in, VarModeAndUse::out) is det :-
+                    var_get_mode(InstMapBefore, InstMapAfter, Arg, ArgMode),
+                    var_mode_to_var_use(ArgMode, VarUseType),
+                    pessimistic_var_use_info(VarUseType, VarUseInfo),
+                    VarModeAndUse = var_mode_and_use(Arg, ArgMode,
+                        VarUseInfo)
+                ), Args, VarModeAndUses)
+        ),
+        MaybeCallConjunct = 
+            call(MaybeCallee, Detism, VarModeAndUses, CallSitePerf)
+    ).
+
+:- pred var_get_mode(inst_map::in, inst_map::in, var_rep::in, var_mode_rep::out)
+    is det.
+
+var_get_mode(InstMapBefore, InstMapAfter, VarRep, VarModeRep) :-
+    inst_map_get(InstMapBefore, VarRep, InstBefore, _),
+    inst_map_get(InstMapAfter, VarRep, InstAfter, _),
+    VarModeRep = var_mode_rep(InstBefore, InstAfter).
+
+    % Note: this runs in quadratic time.
+    %
+:- pred build_candidate_conjunctions(implicit_parallelism_info::in,
+    inst_map::in, goal_path::in, list(maybe_call_conjunct)::in,
+    pqueue(float, candidate_par_conjunction)::in, 
+    pqueue(float, candidate_par_conjunction)::out) is det.
+
+build_candidate_conjunctions(_, _, _, [], !Candidates).
+build_candidate_conjunctions(Info, InstMap, GoalPath, [MaybeCall | MaybeCalls],
+        !Candidates) :-
+    (
+        MaybeCall = call(_, _, _, CallSitePerf),
+        Cost = CallSitePerf ^ csf_summary_perf ^ perf_row_self 
+            ^ perf_row_callseqs_percall,
+        ( Cost > float(Info ^ ipi_opts ^ cpc_call_site_threshold) ->
+            % This conjunction is a call and is expensive enough to
+            % parallelise, find some later conjunct to parallelise against it.
+            build_candidate_conjunctions_2(Info, InstMap, GoalPath, MaybeCall,
+                cord.empty, MaybeCalls, !Candidates)
+            % XXX: pick the most expensive non-overlapping candidates from the
+            % result.
+        ;
+            true
+        )
+    ;
+        MaybeCall = non_atomic_goal
+    ;
+        MaybeCall = trivial_atomic_goal(_, _)
+    ),
+    build_candidate_conjunctions(Info, InstMap, GoalPath, MaybeCalls,
+        !Candidates).
+
+:- pred build_candidate_conjunctions_2(implicit_parallelism_info::in,
+    inst_map::in, goal_path::in, maybe_call_conjunct::in(call),
+    cord(maybe_call_conjunct)::in, list(maybe_call_conjunct)::in,
+    pqueue(float, candidate_par_conjunction)::in, pqueue(float,
+    candidate_par_conjunction)::out) is det.
+
+build_candidate_conjunctions_2(_, _, _, _, _, [], !Candidates).
+build_candidate_conjunctions_2(Info, InstMap, GoalPath, CallA,
+        IntermediateGoals0, [MaybeCall | MaybeCalls], !Candidates) :-
+    (
+        MaybeCall = call(_, _, _, CallSitePerf),
+        CallB = MaybeCall,
+        Cost = call_site_perf_self_callseqs_percall(CallSitePerf),
+        ( Cost > float(Info ^ ipi_opts ^ cpc_call_site_threshold) ->
+            % This conjunction is a call and is expensive enough to
+            % parallelise.
+            are_conjuncts_dependant(CallA, CallB, InstMap, Dependance),
+            (
+                Dependance = conjuncts_are_dependant(DepVars),
+                compute_optimal_dependant_parallelisation(Info, CallA, CallB,
+                    DepVars, IntermediateGoals0, InstMap, CPCA, CPCB, Speedup)
+            ;
+                Dependance = conjuncts_are_independent,
+                compute_independant_parallelisation_speedup(Info, CallA, CallB, 
+                    length(IntermediateGoals0), CPCA, CPCB, Speedup)
+            ),
+            % XXX: This threshold should be configurable.
+            ( Speedup > 0.0 ->
+                % XXX: I think this should be a priority queue or somesuch.
+                GoalPathString = goal_path_to_string(GoalPath),
+                Candidate = candidate_par_conjunction(GoalPathString, 
+                    CPCA, CPCB, Dependance, Speedup),
+                % So that the candidates with the greatest speedup are removed
+                % first multiply speedup by -1.0.
+                pqueue.insert(!.Candidates, Speedup * -1.0, Candidate,
+                    !:Candidates)
+            ;
+                true
+            )
+        ;
+            % Don't recurse here, we don't parallelise over call goals.
+            true
+        )
+    ;
+        MaybeCall = trivial_atomic_goal(_, _),
+        build_candidate_conjunctions_2(Info, InstMap, GoalPath, CallA,
+            cord.snoc(IntermediateGoals0, MaybeCall), MaybeCalls, !Candidates)
+    ;
+        MaybeCall = non_atomic_goal
+        % Don't recurse in this case, we don't parallelise over non-atomic goals
+        % yet.
+    ).
+
+:- pred are_conjuncts_dependant(maybe_call_conjunct::in(call),
+    maybe_call_conjunct::in(call), inst_map::in, conjuncts_are_dependant::out)
+    is det.
+
+are_conjuncts_dependant(CallA, CallB, InstMap, Dependance) :-
+    % Conjuncts are dependant if there exists an input variable in CallB that
+    % is made ground by CallA or depends upon a variable made ground by CallA.
+    list.foldl(add_output_var_to_set, CallA ^ mccc_args, 
+        set.init, CallAOutputs),
+    list.foldl(are_conjuncts_dependant_var(CallAOutputs, InstMap), 
+        CallB ^ mccc_args, set.init, DepVars),
+    ( set.empty(DepVars) ->
+        Dependance = conjuncts_are_independent
+    ;
+        Dependance = conjuncts_are_dependant(DepVars)
+    ).
+
+:- pred are_conjuncts_dependant_var(set(var_rep)::in, inst_map::in,
+    var_mode_and_use::in, set(var_rep)::in, set(var_rep)::out) is det.
+
+are_conjuncts_dependant_var(CallAOutputs, InstMap, VarModeAndUse, !DepVars) :-
+    VarModeAndUse = var_mode_and_use(VarRep, VarModeRep, _),
+    ( VarModeRep = var_mode_rep(ir_ground_rep, ir_ground_rep) ->
+        inst_map_get_var_deps(InstMap, VarRep, VarDeps),
+        (
+            (
+                contains(CallAOutputs, VarRep)
+            ;
+                member(VarDep, VarDeps),
+                contains(CallAOutputs, VarDep)
+            )
+        ->
+            svset.insert(VarRep, !DepVars)
+        ;
+            true
+        )
+    ;
+        true
+    ).
+
+:- pred add_output_var_to_set(var_mode_and_use::in, 
+    set(var_rep)::in, set(var_rep)::out) is det.
+
+add_output_var_to_set(var_mode_and_use(VarRep, VarModeRep, _), !Set) :-
+    ( VarModeRep = var_mode_rep(ir_free_rep, ir_ground_rep) ->
+        svset.insert(VarRep, !Set)
+    ;
+        true
+    ).
+
+:- pred compute_independant_parallelisation_speedup(
+    implicit_parallelism_info::in, 
+    maybe_call_conjunct::in(call), maybe_call_conjunct::in(call),
+    int::in, candidate_par_conjunct::out, candidate_par_conjunct::out,
+    float::out) is det.
+
+compute_independant_parallelisation_speedup(Info, CallA, CallB, NumUnifications,
+        CPCA, CPCB, Speedup) :-
+    CostA = call_site_perf_self_callseqs_percall(CallA ^ mccc_perf),
+    CostB = call_site_perf_self_callseqs_percall(CallB ^ mccc_perf),
+    SequentialCost = CostA + CostB,
+    ParallelCost = max(CostA, CostB) + 
+        float(Info ^ ipi_opts ^ cpc_sparking_cost),
+    Speedup = SequentialCost - ParallelCost,
+    ( CostA < CostB ->
+        NumUnificationsA = NumUnifications,
+        NumUnificationsB = 0
+    ;
+        NumUnificationsA = 0,
+        NumUnificationsB = NumUnifications
+    ),
+    call_site_conj_to_candidate_par_conjunct(Info, CallA, NumUnificationsA,
+        CPCA),
+    call_site_conj_to_candidate_par_conjunct(Info, CallB, NumUnificationsB,
+        CPCB).
+
+:- pred compute_optimal_dependant_parallelisation(
+    implicit_parallelism_info::in,
+    maybe_call_conjunct::in(call), maybe_call_conjunct::in(call),
+    set(var_rep)::in, cord(maybe_call_conjunct)::in, inst_map::in,
+    candidate_par_conjunct::out, candidate_par_conjunct::out, float::out) 
+    is det.
+
+compute_optimal_dependant_parallelisation(Info, CallA, CallB,
+        DepVars, IntermediateGoals, InstMap, CPCA, CPCB, Speedup) :-
+    CostA = call_site_perf_self_callseqs_percall(CallA ^ mccc_perf),
+    CostB = call_site_perf_self_callseqs_percall(CallB ^ mccc_perf),
+    SequentialCost = CostA + CostB,
+    NumUnifications = length(IntermediateGoals),
+    ( singleton_set(DepVars, DepVar) ->
+        % Only parallelise conjunctions with a single dependant variable for
+        % now.
+        ( get_var_use_from_args(CallB ^ mccc_args, DepVar, DepVarConsume) ->
+            DepVarConsume = var_use_info(CostUntilConsume, _),
+            (
+                get_var_use_from_args(CallA ^ mccc_args, DepVar,
+                    DepVarProduce)
+            ->
+                DepVarProduce = var_use_info(CostUntilProduction, _),
+                CostBeforeProduction = 
+                    cost_until_to_cost_since_start(CostUntilProduction, CostA),
+                CostBeforeConsume = 
+                    cost_until_to_cost_since_start(CostUntilConsume, CostB),
+                CostAfterConsume = 
+                    cost_until_to_cost_before_end(CostUntilConsume, CostB),
+                % Unfications between the calls don't bind any variables useful
+                % for the calls, so serialise them with the cheaper call.
+                ( CostA < CostB ->
+                    NumUnificationsA = NumUnifications,
+                    NumUnificationsB = 0
+                ;
+                    NumUnificationsA = 0,
+                    NumUnificationsB = NumUnifications
+                )
+            ;
+                inst_map_get_var_deps(InstMap, DepVar, DepVarDeps),
+                set.fold(get_var_use_add_to_queue(CallA ^ mccc_args), 
+                    DepVarDeps, pqueue.init, ProduceDepVarQueue),
+                ( 
+                    pqueue.remove(ProduceDepVarQueue, _,
+                        CostUntilProductionPrime, _)
+                ->
+                    CostUntilProduction = CostUntilProductionPrime
+                ;
+                    error(this_file ++ 
+                        "Expected to find at least one dependant variable")
+                ),
+                CostBeforeProduction0 = 
+                    cost_until_to_cost_since_start(CostUntilProduction, CostA),
+                CostAfterProduction0 = 
+                    cost_until_to_cost_before_end(CostUntilProduction, CostA),
+                CostBeforeConsume0 = 
+                    cost_until_to_cost_since_start(CostUntilConsume, CostB),
+                CostAfterConsume0 = 
+                    cost_until_to_cost_before_end(CostUntilConsume, CostB),
+                % Compare time before consume vs time after production, the
+                % lesser one should have the unifications added to it.  This
+                % maximises the amount of parallelism.
+                ( CostBeforeConsume0 > CostAfterProduction0 ->
+                    NumUnificationsA = 0,
+                    NumUnificationsB = NumUnifications,
+                    CostBeforeProduction = CostBeforeProduction0,
+                    CostBeforeConsume = CostA,
+                    CostAfterConsume = 0.0
+                ;
+                    NumUnificationsA = NumUnifications,
+                    NumUnificationsB = 0,
+                    CostBeforeProduction = 0.0,
+                    CostBeforeConsume = CostA - CostAfterConsume0,
+                    CostAfterConsume = CostAfterConsume0 
+                )
+            ),
+            SparkingCost = float(Info ^ ipi_opts ^ cpc_sparking_cost),
+            LockingCost = float(Info ^ ipi_opts ^ cpc_locking_cost),
+            ParallelCost = max(CostA, 
+                    max(CostBeforeProduction, CostBeforeConsume) 
+                        + CostAfterConsume) 
+                + SparkingCost + LockingCost,
+            Speedup = SequentialCost - ParallelCost
+        ;
+            error("Dependant var not in consumer's arguments")
+        )
+    ;
+        Speedup = -1.0,
+        NumUnificationsA = NumUnifications,
+        NumUnificationsB = 0
+    ),
+    call_site_conj_to_candidate_par_conjunct(Info, CallA, NumUnificationsA,
+        CPCA),
+    call_site_conj_to_candidate_par_conjunct(Info, CallB, NumUnificationsB,
+        CPCB).
+
+:- pred get_var_use_from_args(list(var_mode_and_use)::in, var_rep::in, 
+    var_use_info::out) is semidet.
+
+get_var_use_from_args([], _, _) :- false.
+get_var_use_from_args([Arg | Args], Var, VarUse) :-
+    ( Arg = var_mode_and_use(Var, _, VarUsePrime) ->
+        VarUse = VarUsePrime
+    ;
+        get_var_use_from_args(Args, Var, VarUse)
+    ).
+
+:- pred get_var_use_add_to_queue(list(var_mode_and_use)::in, var_rep::in,
+    pqueue(float, cost_until_var_use)::in,
+    pqueue(float, cost_until_var_use)::out) is det.
+
+get_var_use_add_to_queue(VarsModeAndUse, VarRep, !Queue) :-
+    ( get_var_use_from_args(VarsModeAndUse, VarRep, VarUse) ->
+        VarUse = var_use_info(CostUntilVarUse, _),
+        % Priority queues return the smallest items first,  And we want to find
+        % the most pessimistic variable production so use the cost before the
+        % procedure's end.
+        Key = cost_until_to_cost_before_end(CostUntilVarUse, 0.0),
+        pqueue.insert(!.Queue, Key, CostUntilVarUse, !:Queue)
+    ;
+        true
+    ).
+
+:- func call_site_perf_self_callseqs_percall(call_site_perf) = float.
+
+call_site_perf_self_callseqs_percall(CSP) = 
+    CSP ^ csf_summary_perf ^ perf_row_self ^ perf_row_callseqs_percall.
+
+:- pred call_site_conj_to_candidate_par_conjunct(
+    implicit_parallelism_info::in, maybe_call_conjunct::in(call),
+    int::in, candidate_par_conjunct::out) is det.
+
+call_site_conj_to_candidate_par_conjunct(Info, Call, NumUnifications, CPC) :-
+    Call = call(MaybeCallee, _Detism, Args, Perf),
+    VarTable = Info ^ ipi_var_table,
+    list.map(var_mode_use_to_var_in_par_conj(VarTable), Args, Vars),
+    Cost = call_site_perf_self_callseqs_percall(Perf),
+    CPC = candidate_par_conjunct(MaybeCallee, Vars, Cost, NumUnifications).
+
+:- pred var_mode_use_to_var_in_par_conj(var_table::in, var_mode_and_use::in,
+    maybe(string)::out) is det.
+
+var_mode_use_to_var_in_par_conj(VarTable, var_mode_and_use(Var, _, _),
+        MaybeName) :-
+    ( search_var_name(VarTable, Var, Name) ->
+        MaybeName = yes(Name)
+    ;
+        MaybeName = no
     ).
 
 %----------------------------------------------------------------------------%
@@ -578,6 +1403,33 @@ css_to_call(Deep, CSS, Call) :-
     % Build the call datastructure.
     Call = call_site(Caller, Slot, CallTypeAndCallee).
 
+%----------------------------------------------------------------------------%
+%
+% Useful utility predicates.
+%
+
+    % Remove something from inside a maybe_error type and return it.  Throw an
+    % exception if the MaybeError variable has value error(_).
+    %
+:- pred det_open_maybe_error(maybe_error(T)::in, T::out) is det.
+
+det_open_maybe_error(ok(X), X).
+det_open_maybe_error(error(Error), _) :-
+    error(Error).
+
+:- pred add_call_site_perf_to_map(call_site_perf::in, 
+    map(goal_path, call_site_perf)::in, map(goal_path, call_site_perf)::out) 
+    is det.
+
+add_call_site_perf_to_map(CallSitePerf, !Map) :-
+    GoalPath = CallSitePerf ^ csf_summary_perf ^ perf_row_subject 
+        ^ csdesc_goal_path,
+    svmap.det_insert(GoalPath, CallSitePerf, !Map).
+
+:- func this_file = string.
+
+this_file = "mdprof_feedback: ".
+
 %-----------------------------------------------------------------------------%
 :- end_module mdprof_feedback.
 %-----------------------------------------------------------------------------%
Index: deep_profiler/program_representation_utils.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/program_representation_utils.m,v
retrieving revision 1.17
diff -u -p -b -r1.17 program_representation_utils.m
--- deep_profiler/program_representation_utils.m	16 Oct 2008 01:16:36 -0000	1.17
+++ deep_profiler/program_representation_utils.m	20 Oct 2008 05:33:37 -0000
@@ -80,6 +80,58 @@
 
 %----------------------------------------------------------------------------%
 
+    % A map of variables to instantiation states,  Like inst_map within the
+    % compiler.
+    %
+:- type inst_map.
+
+    % Build the initial inst for a procedure.
+    %
+:- func initial_inst_map(proc_defn_rep) = inst_map.
+
+    % inst_map_ground_vars(Vars, !InstMap, SeenDuplicateInstantiaton).
+    %
+    % Make the variables in the given list ground in the new copy of the inst
+    % map.
+    %
+    % SeenDuplicateInstantiation will be true iff at least one of these
+    % variables is already ground.
+    %
+:- pred inst_map_ground_vars(list(var_rep)::in, set(var_rep)::in, inst_map::in,
+    inst_map::out, seen_duplicate_instantiation::out) is det.
+    
+    % This type represents whether a traversal has seen more than one
+    % instantiation of a variable within a single branch.  If at the end of a
+    % traversal a duplicate instantiation has been seen we can either accept a
+    % pessimistic default or abort parallelisation of this particular
+    % conjunction.
+    %
+:- type seen_duplicate_instantiation
+    --->    seen_duplicate_instantiation
+    ;       have_not_seen_duplicate_instantiation.
+
+:- func merge_seen_duplicate_instantiation(seen_duplicate_instantiation,
+    seen_duplicate_instantiation) = seen_duplicate_instantiation.
+
+    % Retrieve the instantiatedness of a variable, and variables that it's
+    % binding depends upon from the instmap, if the variable is new ir_free is
+    % returned and the variables it depends upon is the empty set.
+    %
+:- pred inst_map_get(inst_map::in, var_rep::in, inst_rep::out,
+    set(var_rep)::out) is det.
+
+    % Merge two inst maps from different branches of execution.
+    %
+:- func merge_inst_map(inst_map, detism_rep, inst_map, detism_rep) = inst_map.
+
+    % Retrieve all the variables this variable depends upon,  indirect
+    % dependencies are also returned.
+    %
+:- pred inst_map_get_var_deps(inst_map::in, var_rep::in, set(var_rep)::out) 
+    is det.
+
+%----------------------------------------------------------------------------%
+
     % geneirc_vars_first_use(HeadVarsToVars, Deep, PSPtr, CallStack,
     %   VarUseInfos, ProcAverageCost).
     %
@@ -98,6 +150,16 @@
 :- pred head_vars_all(list(head_var_rep)::in, list(var_rep)::out,
     list(var_use_type)::out) is det.
 
+:- pred var_mode_to_var_use(var_mode_rep::in, var_use_type::out) is det.
+                    
+:- pred pessimistic_var_use_info(var_use_type::in, var_use_info::out) is det.
+
+%----------------------------------------------------------------------------%
+
+    % Retrieve a set of all the vars involved with this atomic goal.
+    %
+:- pred atomic_goal_get_vars(atomic_goal_rep::in, set(var_rep)::out) is det.
+
 %----------------------------------------------------------------------------%
 
 :- implementation.
@@ -112,6 +174,7 @@
 :- import_module io.
 :- import_module map.
 :- import_module require.
+:- import_module svmap.
 
 %----------------------------------------------------------------------------%
 
@@ -511,10 +574,10 @@ print_unit_to_strings(_, cord.empty).
 %----------------------------------------------------------------------------%
 
 progrep_search_proc(ProgRep, ProcLabel, ProcRep) :-
-    ( ProcLabel = str_ordinary_proc_label(_, Module, _DefModule, _, _, _)
-    ; ProcLabel = str_special_proc_label(_, Module, _DefModule, _, _, _)
+    ( ProcLabel = str_ordinary_proc_label(_, _DeclModule, DefModule, _, _, _)
+    ; ProcLabel = str_special_proc_label(_, _DeclModule, DefModule, _, _, _)
     ),
-    progrep_search_module(ProgRep, Module, ModuleRep),
+    progrep_search_module(ProgRep, DefModule, ModuleRep),
     modulerep_search_proc(ModuleRep, ProcLabel, ProcRep).
 
     % Search for a module within a program representation.
@@ -1487,39 +1550,6 @@ sum_after_coverage(After, !SumAfter) :-
     ).
 
 %----------------------------------------------------------------------------%
-
-:- type solution_count_rep
-    --->    at_most_zero_rep
-    ;       at_most_one_rep   % Including committed choice.
-    ;       at_most_many_rep.
-
-:- func detism_get_solutions(detism_rep) = solution_count_rep.
-
-detism_get_solutions(det_rep) =         at_most_one_rep.
-detism_get_solutions(semidet_rep) =     at_most_one_rep.
-detism_get_solutions(multidet_rep) =    at_most_many_rep.
-detism_get_solutions(nondet_rep) =      at_most_many_rep.
-detism_get_solutions(cc_multidet_rep) = at_most_one_rep.
-detism_get_solutions(cc_nondet_rep) =   at_most_one_rep.
-detism_get_solutions(erroneous_rep) =   at_most_zero_rep.
-detism_get_solutions(failure_rep) =     at_most_zero_rep.
-
-:- type can_fail_rep
-    --->    can_fail_rep
-    ;       cannot_fail_rep.
-
-:- func detism_get_can_fail(detism_rep) = can_fail_rep.
-
-detism_get_can_fail(det_rep) =          cannot_fail_rep.
-detism_get_can_fail(semidet_rep) =      can_fail_rep.
-detism_get_can_fail(multidet_rep) =     cannot_fail_rep.
-detism_get_can_fail(nondet_rep) =       can_fail_rep.
-detism_get_can_fail(cc_multidet_rep) =  cannot_fail_rep.
-detism_get_can_fail(cc_nondet_rep) =    can_fail_rep.
-detism_get_can_fail(erroneous_rep) =    cannot_fail_rep.
-detism_get_can_fail(failure_rep) =      can_fail_rep.
-
-%----------------------------------------------------------------------------%
 %
 % Coverage information helper predicates.
 %
@@ -1567,10 +1597,9 @@ get_coverage_before_and_after(coverage_k
     % lookup anyway to find cost information, This will callback to the deep
     % profiler as it crosses procedure boundaries.
     %
-    % Another value should be used to describe if we're looking for a producer
-    % or consumer, this affects how pessimistic defaults are calculated.  XXX:
-    % This is not yet implemented for now I implement the first time a variable
-    % is consumed and then hopefully extend that code.
+    % This does not follow higher order or method calls.  It may be possible to
+    % follow call the calls seen during profiling and aggregate their variable
+    % use information based on how often they are called from that call site.
     %
 :- pred goal_var_first_use(goal_path::in, goal_rep(coverage_info)::in,
     var_first_use_static_info::in, float::in, float::out, found_first_use::out)
@@ -1663,7 +1692,7 @@ call_var_first_use(AtomicGoal, BoundVars
     ProcCost = RowData ^ perf_row_callseqs_percall,
     % XXX: this doesn't work for (mutually-)recursive calls, the deep profiler
     % sets their cost to 1.0.  For now we just have to hope that the variables
-    % we're searching for are used in the recursive call so the trick above
+    % we're searching for are used in the recursive call so the trick below 
     % works.
     NextCostSoFar = CostSoFar + ProcCost,
     
@@ -1782,31 +1811,7 @@ atomic_trivial_var_first_use(AtomicGoal,
         FoundFirstUse) :-
     Var = StaticInfo ^ fui_var,
     VarUseType = StaticInfo ^ fui_var_use,
-    ( 
-        ( AtomicGoal = unify_construct_rep(UnifyVar, _, Vars0)
-        ; AtomicGoal = unify_deconstruct_rep(UnifyVar, _, Vars0)
-        ),
-        Vars = [UnifyVar | Vars0]
-    ;
-        ( AtomicGoal = 
-            partial_construct_rep(UnifyVar, _, MaybeVars0)
-        ; AtomicGoal = 
-            partial_deconstruct_rep(UnifyVar, _, MaybeVars0)
-        ),
-        list.filter_map(maybe_x_to_x, MaybeVars0, Vars0),
-        Vars = [UnifyVar | Vars0]
-    ;
-        ( AtomicGoal = unify_assign_rep(VarA, VarB)
-        ; AtomicGoal = cast_rep(VarA, VarB)
-        ; AtomicGoal = unify_simple_test_rep(VarA, VarB)
-        ),
-        Vars = [ VarA, VarB ]
-    ;
-        ( AtomicGoal = pragma_foreign_code_rep(Vars)
-        ; AtomicGoal = event_call_rep(_, Vars)
-        ; AtomicGoal = builtin_call_rep(_, _, Vars)
-        )
-    ),
+    atomic_goal_get_vars(AtomicGoal, Vars),
     (
         member(Var, Vars),
         (
@@ -1937,8 +1942,6 @@ disj_var_first_use_2(GoalPath, DisjNum, 
     ;
         HeadFoundFirstUse = found_first_use(HeadCost),
         TailFoundFirstUse = found_first_use(TailCost),
-        % A simple average, this gets overridden later anyway.
-        % XXX: no it doesn't.
         (
             VarUseType = var_use_consumption,
             % The variable is probably consumed in the first disjunct even if
@@ -2159,16 +2162,7 @@ proc_var_first_use(Deep, PSPtr, ArgNum, 
         % pointer points to the foreign code which can't be looked up even
         % though it uses a 'plain_call' call site.
         % Return a pessimistic default here.
-        (
-            VarUseType = var_use_consumption,
-            CostUntilUse = cost_since_proc_start(0.0)
-        ;
-            ( VarUseType = var_use_production
-            ; VarUseType = var_use_other
-            ),
-            CostUntilUse = cost_before_proc_end(0.0)
-        ),
-        VarUseInfo = var_use_info(CostUntilUse, VarUseType)
+        pessimistic_var_use_info(VarUseType, VarUseInfo)
     ),
     trace [compile_time(flag("debug_first_var_use")), io(!IO)]
         io.format("Trace: prog_var_first_use: %s\n",
@@ -2188,8 +2182,6 @@ head_vars_all(HeadVars, Vars, VarUseType
             var_mode_to_var_use(VarMode, VarUseType)
         ), HeadVars, Vars, VarUseTypes).
 
-:- pred var_mode_to_var_use(var_mode_rep::in, var_use_type::out) is det.
-
 var_mode_to_var_use(var_mode_rep(InitialInst, FinalInst), VarUseType) :-
     (
         InitialInst = ir_ground_rep,
@@ -2205,6 +2197,18 @@ var_mode_to_var_use(var_mode_rep(Initial
         VarUseType = var_use_other
     ).
 
+pessimistic_var_use_info(VarUseType, VarUseInfo) :-
+    (
+        VarUseType = var_use_consumption,
+        CostUntilUse = cost_since_proc_start(0.0)
+    ;
+        ( VarUseType = var_use_production
+        ; VarUseType = var_use_other
+        ),
+        CostUntilUse = cost_before_proc_end(0.0)
+    ),
+    VarUseInfo = var_use_info(CostUntilUse, VarUseType).
+
     % Perform the var_first_use for the vars returned by the closure. 
     %
 generic_vars_first_use(VarsPred, Deep, PSPtr, CallStack, MaybeResult) :- 
@@ -2266,27 +2270,240 @@ goal_var_first_use_wrapper(Deep, CallSta
             ; VarUseType = var_use_other
             ),
             CostUntilUse = cost_before_proc_end(CostUntilUseRaw)
-        )
+        ),
+        VarUseInfo = var_use_info(CostUntilUse, VarUseType)
     ;
         % If the first use has not been found, then use the average cost of the
         % procedure as the cost before the first use, since the variable is
         % never used.
         FoundFirstUse = have_not_found_first_use,
+        pessimistic_var_use_info(VarUseType, VarUseInfo)
+    ).
+
+%----------------------------------------------------------------------------%
+
+:- type inst_map
+    --->    inst_map(
+                map(var_rep, inst_rep),
+                    % The actual inst map.
+                
+                map(var_rep, set(var_rep))
+                    % A tree describing dependencies between bound variables.
+            ).
+
+initial_inst_map(ProcDefn) = InstMap :-
+    HeadVars = ProcDefn ^ pdr_head_vars, 
+    list.foldl(add_head_var_inst_to_map, HeadVars, 
+        inst_map(map.init, map.init), InstMap). 
+
+:- pred add_head_var_inst_to_map(head_var_rep::in, 
+    inst_map::in, inst_map::out) is det.
+
+add_head_var_inst_to_map(head_var_rep(VarRep, ModeRep), !InstMap) :-
+    ModeRep = var_mode_rep(InitialInstRep, _),
+    add_inst_mapping(VarRep, InitialInstRep, set.init, !InstMap).
+
+    % Add an inst mapping.
+    %
+:- pred add_inst_mapping(var_rep::in, inst_rep::in, set(var_rep)::in,
+    inst_map::in, inst_map::out) is det.
+
+add_inst_mapping(VarRep, InstRep, DepVars, InstMap0, InstMap) :-
+    InstMap0 = inst_map(VarToInst0, VarToDepVars0),
+    map.det_insert(VarToInst0, VarRep, InstRep, VarToInst),
+    map.det_insert(VarToDepVars0, VarRep, DepVars, VarToDepVars),
+    InstMap = inst_map(VarToInst, VarToDepVars).
+
+merge_inst_map(InstMapA, DetismA, InstMapB, DetismB) = InstMap :-
         (
-            VarUseType = var_use_consumption,
-            CostUntilUse = cost_since_proc_start(0.0)
+        ( DetismA = erroneous_rep
+        ; DetismA = failure_rep
+        ),
+        InstMap = InstMapB
         ;
-            ( VarUseType = var_use_production
-            ; VarUseType = var_use_other
+        ( DetismA = det_rep
+        ; DetismA = semidet_rep
+        ; DetismA = nondet_rep
+        ; DetismA = multidet_rep
+        ; DetismA = cc_nondet_rep
+        ; DetismA = cc_multidet_rep
             ),
-            CostUntilUse = cost_before_proc_end(0.0)
+        (
+            ( DetismB = erroneous_rep
+            ; DetismB = failure_rep
+            ),
+            InstMap = InstMapA
+        ;
+            ( DetismB = det_rep
+            ; DetismB = semidet_rep
+            ; DetismB = nondet_rep
+            ; DetismB = multidet_rep
+            ; DetismB = cc_nondet_rep
+            ; DetismB = cc_multidet_rep
+            ),
+            InstMapA = inst_map(VarToInstA, VarToDepVarsA),
+            InstMapB = inst_map(VarToInstB, VarToDepVarsB),
+            map.union(inst_intersect, VarToInstA, VarToInstB, VarToInst),
+            map.union(set.union, VarToDepVarsA, VarToDepVarsB, VarToDepVars),
+            InstMap = inst_map(VarToInst, VarToDepVars)
         )
+    ).
+
+:- pred inst_intersect(inst_rep::in, inst_rep::in, inst_rep::out) is det.
+
+inst_intersect(ir_free_rep,     ir_free_rep,    ir_free_rep).
+inst_intersect(ir_free_rep,     ir_ground_rep,  ir_other_rep).
+inst_intersect(ir_free_rep,     ir_other_rep,   ir_other_rep).
+inst_intersect(ir_ground_rep,   ir_free_rep,    ir_other_rep).
+inst_intersect(ir_ground_rep,   ir_ground_rep,  ir_ground_rep).
+inst_intersect(ir_ground_rep,   ir_other_rep,   ir_other_rep).
+inst_intersect(ir_other_rep,    ir_free_rep,    ir_other_rep).
+inst_intersect(ir_other_rep,    ir_ground_rep,  ir_other_rep).
+inst_intersect(ir_other_rep,    ir_other_rep,   ir_other_rep).
+
+inst_map_ground_vars(Vars, DepVars, !InstMap, SeenDuplicateInstantiation) :-
+    list.foldl2(inst_map_ground_var(DepVars), Vars, !InstMap,
+        have_not_seen_duplicate_instantiation, SeenDuplicateInstantiation).
+
+:- pred inst_map_ground_var(set(var_rep)::in, var_rep::in, 
+    inst_map::in, inst_map::out, seen_duplicate_instantiation::in,
+    seen_duplicate_instantiation::out) is det.
+
+inst_map_ground_var(DepVars0, Var, InstMap0, InstMap, !SeenDuplicateInstantiation) :-
+    InstMap0 = inst_map(VarToInst0, VarToDepVars0),
+    ( map.search(VarToInst0, Var, InstPrime) ->
+        Inst = InstPrime
+    ;
+        Inst = ir_free_rep
     ),
-    VarUseInfo = var_use_info(CostUntilUse, VarUseType).
+    (
+        Inst = ir_free_rep,
+        NewInst = ir_ground_rep,
+        DepVars = DepVars0
+    ;
+        ( Inst = ir_ground_rep
+        ; Inst = ir_other_rep
+        ),
+        NewInst = ir_other_rep,
+        map.lookup(VarToDepVars0, Var, DepVarsFromIM),
+        DepVars = set.union(DepVars0,  DepVarsFromIM),
+        !:SeenDuplicateInstantiation = seen_duplicate_instantiation
+    ),
+    map.set(VarToInst0, Var, NewInst, VarToInst),
+    map.set(VarToDepVars0, Var, DepVars, VarToDepVars),
+    InstMap = inst_map(VarToInst, VarToDepVars).
+
+inst_map_get(inst_map(VarToInst, VarToDepVars), Var, Inst, DepVars) :-
+    ( map.search(VarToInst, Var, InstPrime) ->
+        Inst = InstPrime,
+        map.lookup(VarToDepVars, Var, DepVars)
+    ;
+        Inst = ir_free_rep,
+        DepVars = set.init
+    ).
+
+inst_map_get_var_deps(inst_map(_, VarToDepVars), VarRep, DepVars) :-
+    inst_map_get_var_deps_2(VarToDepVars, VarRep, set.init, DepVars).
+
+:- pred inst_map_get_var_deps_2(map(var_rep, set(var_rep))::in, var_rep::in, 
+    set(var_rep)::in, set(var_rep)::out) is det.
+
+inst_map_get_var_deps_2(VarToDepVars, VarRep, !Set) :-
+    ( set.contains(!.Set, VarRep) ->
+        true
+        % This variable has already been visited, this prevents following any
+        % (impossible) cycles in the graph, or following the same path twice
+        % when there are diamonds in the graph.
+    ;
+        ( map.search(VarToDepVars, VarRep, DepVars) ->
+            !:Set = set.union(!.Set, DepVars),
+            set.fold(inst_map_get_var_deps_2(VarToDepVars), DepVars, !Set)
+        ;
+            true
+        )
+    ).
+
+%----------------------------------------------------------------------------%
+
+atomic_goal_get_vars(AtomicGoal, Vars) :-
+    ( 
+        ( AtomicGoal = unify_construct_rep(Var, _, VarsL)
+        ; AtomicGoal = unify_deconstruct_rep(Var, _, VarsL)
+        ; AtomicGoal = higher_order_call_rep(Var, VarsL)
+        ; AtomicGoal = method_call_rep(Var, _, VarsL)
+        ),
+        Vars0 = list_to_set(VarsL),
+        set.insert(Vars0, Var, Vars)
+    ;
+        ( AtomicGoal = partial_construct_rep(Var, _, MaybeVars)
+        ; AtomicGoal = partial_deconstruct_rep(Var, _, MaybeVars)
+        ),
+        list.foldl((pred(MaybeVar::in, Set0::in, Set::out) is det :-
+                (
+                    MaybeVar = yes(VarI),
+                    set.insert(Set0, VarI, Set)
+                ;
+                    MaybeVar = no,
+                    Set = Set0
+                )), MaybeVars, set.init, Vars0),
+        set.insert(Vars0, Var, Vars)
+    ;
+        ( AtomicGoal = unify_assign_rep(VarA, VarB)
+        ; AtomicGoal = cast_rep(VarA, VarB)
+        ; AtomicGoal = unify_simple_test_rep(VarA, VarB)
+        ),
+        Vars = list_to_set([ VarA, VarB ])
+    ;
+        ( AtomicGoal = pragma_foreign_code_rep(VarsL)
+        ; AtomicGoal = event_call_rep(_, VarsL)
+        ; AtomicGoal = builtin_call_rep(_, _, VarsL)
+        ; AtomicGoal = plain_call_rep(_, _, VarsL)
+        ),
+        Vars = list_to_set(VarsL)
+    ).
 
-:- pred maybe_x_to_x(maybe(T)::in, T::out) is semidet.
+merge_seen_duplicate_instantiation(A, B) = R :-
+    (
+        A = have_not_seen_duplicate_instantiation,
+        B = have_not_seen_duplicate_instantiation
+    ->
+        R = have_not_seen_duplicate_instantiation
+    ;
+        R = seen_duplicate_instantiation
+    ).
+
+%----------------------------------------------------------------------------%
 
-maybe_x_to_x(yes(X), X).
+:- type solution_count_rep
+    --->    at_most_zero_rep
+    ;       at_most_one_rep   % Including committed choice.
+    ;       at_most_many_rep.
+
+:- func detism_get_solutions(detism_rep) = solution_count_rep.
+
+detism_get_solutions(det_rep) =         at_most_one_rep.
+detism_get_solutions(semidet_rep) =     at_most_one_rep.
+detism_get_solutions(multidet_rep) =    at_most_many_rep.
+detism_get_solutions(nondet_rep) =      at_most_many_rep.
+detism_get_solutions(cc_multidet_rep) = at_most_one_rep.
+detism_get_solutions(cc_nondet_rep) =   at_most_one_rep.
+detism_get_solutions(erroneous_rep) =   at_most_zero_rep.
+detism_get_solutions(failure_rep) =     at_most_zero_rep.
+
+:- type can_fail_rep
+    --->    can_fail_rep
+    ;       cannot_fail_rep.
+
+:- func detism_get_can_fail(detism_rep) = can_fail_rep.
+
+detism_get_can_fail(det_rep) =          cannot_fail_rep.
+detism_get_can_fail(semidet_rep) =      can_fail_rep.
+detism_get_can_fail(multidet_rep) =     cannot_fail_rep.
+detism_get_can_fail(nondet_rep) =       can_fail_rep.
+detism_get_can_fail(cc_multidet_rep) =  cannot_fail_rep.
+detism_get_can_fail(cc_nondet_rep) =    can_fail_rep.
+detism_get_can_fail(erroneous_rep) =    cannot_fail_rep.
+detism_get_can_fail(failure_rep) =      can_fail_rep.
 
 %----------------------------------------------------------------------------%
 
Index: deep_profiler/report.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/report.m,v
retrieving revision 1.14
diff -u -p -b -r1.14 report.m
--- deep_profiler/report.m	16 Oct 2008 01:16:36 -0000	1.14
+++ deep_profiler/report.m	19 Oct 2008 09:59:55 -0000
@@ -332,6 +332,9 @@
     --->    cost_since_proc_start(float)
     ;       cost_before_proc_end(float).
 
+:- func cost_until_to_cost_since_start(cost_until_var_use, float) = float.
+:- func cost_until_to_cost_before_end(cost_until_var_use, float) = float.
+
     % This type represents information about the performance of the subject.
     % It is intended to be displayed on a browser page or used by a tool as is.
     % It is NOT intended to be subject to further processing, such as adding
@@ -472,6 +475,25 @@
                 cdesc_other_members         :: list(proc_desc)
             ).
 
+%----------------------------------------------------------------------------%
+%----------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module float.
+
+%----------------------------------------------------------------------------%
+
+cost_until_to_cost_since_start(cost_since_proc_start(Cost), _WholeCost) = 
+    Cost.
+cost_until_to_cost_since_start(cost_before_proc_end(Cost), WholeCost) = 
+    WholeCost - Cost.
+
+cost_until_to_cost_before_end(cost_since_proc_start(Cost), WholeCost) = 
+    WholeCost - Cost.
+cost_until_to_cost_before_end(cost_before_proc_end(Cost), _WholeCost) = 
+    Cost.
+
 %-----------------------------------------------------------------------------%
 :- end_module report.
 %-----------------------------------------------------------------------------%
Index: mdbcomp/feedback.m
===================================================================
RCS file: /home/mercury1/repository/mercury/mdbcomp/feedback.m,v
retrieving revision 1.3
diff -u -p -b -r1.3 feedback.m
--- mdbcomp/feedback.m	30 Sep 2008 02:30:51 -0000	1.3
+++ mdbcomp/feedback.m	20 Oct 2008 06:29:21 -0000
@@ -27,6 +27,8 @@
 :- import_module io.
 :- import_module list.
 :- import_module maybe.
+:- import_module pair.
+:- import_module set.
 :- import_module string.
 
 %-----------------------------------------------------------------------------%
@@ -77,9 +79,8 @@
                     % The cost of maintaining a lock on a single dependant
                     % variable in call sequence counts.
 
-                conjunctions        :: assoc_list(string, assoc_list(
-                                         string_proc_label, 
-                                         candidate_par_conjunction))
+                conjunctions        :: assoc_list(string_proc_label,
+                                            candidate_par_conjunction)
                     % Assoclist of module name and an assoclist of procedure
                     % labels and candidate parallel conjunctions.
             ).
@@ -97,50 +98,61 @@
     % by a procedure label, goal path to the conjunction and the call sites
     % within the conjunction that are to be parallelised.
     %
+    % TODO: In the future support more expressive candidate parallel
+    % conjunctions, so that more opportunities for parallelism can be found.
+    % Although it's probably not a good idea to parallelise three conjuncts or
+    % more against one another without first having a good system for reaching
+    % and maintaining the target amount of parallelism, this may involve
+    % distance granularity.
+    %
 :- type candidate_par_conjunction
     --->    candidate_par_conjunction(
                 goal_path       :: goal_path_string,
-                conjuncts       :: list(candidate_par_conjunct)
+                    % The path within the procedure to this conjunuction.
+                
+                par_conjunct_a  :: candidate_par_conjunct,
+                par_conjunct_b  :: candidate_par_conjunct,
+                    % The conjuncts worth parallelising
+
+                dependence      :: conjuncts_are_dependant,
+
+                speedup         :: float
             ).
 
 :- type candidate_par_conjunct
     --->    candidate_par_conjunct(
-                callee          :: call_type_and_callee,
-                vars            :: list(variable_in_par_conjunct),
-                cost            :: int
+                callee                  :: maybe(pair(string, string)),
+                vars                    :: list(maybe(string)),
+                cost                    :: float,
+                include_unifications    :: int
+                    % The number of unifications between conjuncts that should
+                    % be executed in sequence with this call.
             ).
 
-:- type variable_in_par_conjunct
-    --->    variable_in_par_conjunct(
-                maybe_name              :: maybe(string),
-                cost_before_first_use   :: int
-            ).
+:- type conjuncts_are_dependant
+    --->    conjuncts_are_dependant(
+                dependant_vars          :: set(var_rep)
+            )
+    ;       conjuncts_are_independent.
 
 %-----------------------------------------------------------------------------%
 
-    % put_feedback_data(Type, Data, !Info)
+    % put_feedback_data(Data, !Info)
     %
-    % 'Put' feedback data into the feedback files.  Data is stored based on
-    % the type of information being stored.
+    % Put feedback data into the feedback files.
     %
     % Data loaded from file (not added with put) will be removed from the
-    % internal state when data for the same info type is added.
-    %
-    % This will throw an exception if the feedback_type and feedback_data don't
-    % match. 
+    % internal state when data for the same type is added.
     %
 :- pred put_feedback_data(feedback_data::in,
     feedback_info::in, feedback_info::out) is det.
 
 %-----------------------------------------------------------------------------%
 
-    % get_feedback_data(Info, Type, MaybeData).
-    %
-    % To query the feedback files 'get' will give a value for a given info type
-    % if it exists.
+    % get_feedback_data(Info, Data).
     %
-    % This will throw an exception if the feedback_type and feedback_data
-    % within the Info structure do not match.
+    % When given a partially instantiated Data term representing the query this
+    % will either fully instantiate the term or fail.
     %
 :- pred get_feedback_data(feedback_info::in, 
     feedback_data::feedback_data_query) is semidet.
@@ -528,7 +540,7 @@ feedback_first_line = "Mercury Compiler 
 
 :- func feedback_version = string.
 
-feedback_version = "2".
+feedback_version = "3".
 
 %-----------------------------------------------------------------------------%
 :- end_module mdbcomp.feedback.
Index: mdbcomp/program_representation.m
===================================================================
RCS file: /home/mercury1/repository/mercury/mdbcomp/program_representation.m,v
retrieving revision 1.43
diff -u -p -b -r1.43 program_representation.m
--- mdbcomp/program_representation.m	3 Oct 2008 06:57:21 -0000	1.43
+++ mdbcomp/program_representation.m	20 Oct 2008 05:36:40 -0000
@@ -93,6 +93,12 @@
     % that can be used outside the compiler, e.g. in RTTI data structures
     % and in data filed generated by deep profiling.
     %
+    % When procedures are imported from one module to another, for example for
+    % inter-module optimisations the def_module field may be different to the
+    % decl_module feild.  In this case a procedure has been imported into the
+    % def_module from the decl_module.  This is also true for the type_module
+    % and def_module fields in the str_special_proc_label constructor.
+    %
 :- type string_proc_label
     --->    str_ordinary_proc_label(
                 s_ord_pred_or_func      :: pred_or_func,
@@ -354,6 +360,10 @@
     %
 :- pred lookup_var_name(var_table::in, var_rep::in, string::out) is det.
 
+    % Retrieve the name for this variable if it is known, otherwise fail.
+    %
+:- pred search_var_name(var_table::in, var_rep::in, string::out) is semidet.
+
     % If the given atomic goal behaves like a call in the sense that it
     % generates events as ordinary calls do, then return the list of variables
     % that are passed as arguments.
@@ -960,13 +970,16 @@ var_num_rep_byte(short, 1).
 :- type var_table == map(var_rep, string).
 
 lookup_var_name(VarTable, VarRep, String) :-
-    ( map.search(VarTable, VarRep, StringPrime) ->
+    ( search_var_name(VarTable, VarRep, StringPrime) ->
         String = StringPrime
     ;
         % Generate an automatic name for the variable.
         String = string.format("V_%d", [i(VarRep)])
     ).
 
+search_var_name(VarTable, VarRep, String) :-
+    map.search(VarTable, VarRep, String).
+
 %-----------------------------------------------------------------------------%
 
 :- pred read_file_as_bytecode(string::in, io.res(bytecode)::out,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mercurylang.org/archives/reviews/attachments/20081020/344ac3d8/attachment.sig>
Previous message: [m-rev.] For post-commit review: Variable use analysis for Deep Profiler.
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the reviews mailing list