[m-rev.] for post-commit review: Implicit Parallelism Analysis
Paul Bone
pbone at csse.unimelb.edu.au
Mon Oct 20 17:23:05 AEDT 2008
For post-commit review by Zoltan.
Estimated hours taken: 27
Branches: main
Perform implicit parallelism analysis in mdprof_feedback. This calculates the
amount of parallelism available in dependant conjunctions and advises the
compiler how to parallelise code via the feedback system.
deep_profiler/mdprof_feedback.m:
Implement implicit parallelisation analysis.
deep_profiler/program_representation_utils.m:
Add a simple implementation of inst maps which are used by the implicit
parallelisation analysis. This implementation also tracks that variables
that are required in order to instantiate a variable.
Export some procedures used by the variable use analysis for use in the
parallelisation analysis in mdprof_feedback.m
Create an extra predicate to retrieve all the variables used by an atomic
goal.
Move utility code in this module to the end.
deep_profiler/report.m:
Add utility function to convert cost_until_var_use values to raw values
either since the beginning of the procedure or before the end.
mdbcomp/feedback.m:
Modified the format of implicit parallelism feedback information.
Incremented the feedback file format version.
mdbcomp/program_representation.m:
Added a procedure to search for a variable name in a variable table and
fail if it cannot find it.
Index: deep_profiler/mdprof_feedback.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/mdprof_feedback.m,v
retrieving revision 1.12
diff -u -p -b -r1.12 mdprof_feedback.m
--- deep_profiler/mdprof_feedback.m 3 Oct 2008 07:39:04 -0000 1.12
+++ deep_profiler/mdprof_feedback.m 20 Oct 2008 06:28:30 -0000
@@ -31,17 +31,22 @@
:- implementation.
:- import_module conf.
+:- import_module create_report.
:- import_module mdbcomp.
:- import_module mdbcomp.feedback.
:- import_module mdbcomp.program_representation.
:- import_module measurements.
:- import_module profile.
+:- import_module program_representation_utils.
+:- import_module query. % For the cmd structure
+:- import_module report.
:- import_module startup.
:- import_module array.
+:- import_module assoc_list.
:- import_module bool.
-:- import_module cord.
:- import_module char.
+:- import_module cord.
:- import_module float.
:- import_module getopt.
:- import_module int.
@@ -49,9 +54,13 @@
:- import_module list.
:- import_module map.
:- import_module maybe.
+:- import_module pair.
+:- import_module pqueue.
:- import_module require.
+:- import_module set.
:- import_module string.
:- import_module svmap.
+:- import_module svset.
%-----------------------------------------------------------------------------%
%
@@ -159,6 +168,7 @@ help_message =
--desired-parallelism <value>
The amount of desired parallelism for implicit parallelism,
value must be a floating point number above 1.0.
+ Note: This option is currently ignored.
--implicit-parallelism-sparking-cost <value>
The cost of creating a spark, measured in the deep profiler's
call sequence counts.
@@ -169,6 +179,9 @@ help_message =
--implicit-parallelism-proc-cost-threshold <value>
The cost threshold for procedures to be considered for implicit
parallelism, measured on the profiler's call sequence counts.
+ --implicit-parallelism-call-site-cost-threshold <value>
+ The cost of a call site to be considered for parallelism
+ against another call site.
The following options select specific types of feedback information
and parameterise them:
@@ -255,7 +268,12 @@ read_deep_file(Input, Verbose, MaybeDeep
; desired_parallelism
; implicit_parallelism_sparking_cost
; implicit_parallelism_locking_cost
- ; implicit_parallelism_proc_cost_threshold.
+ ; implicit_parallelism_proc_cost_threshold
+ ; implicit_parallelism_call_site_cost_threshold.
+
+% TODO: Introduce an option to disable parallelisation of dependant
+% conjunctions, or switch to the simple calculations for independant
+% conjunctions.
:- pred short(char::in, option::out) is semidet.
@@ -284,6 +302,8 @@ long("implicit-parallelism-sparking-cost
long("implicit-parallelism-locking-cost", implicit_parallelism_locking_cost).
long("implicit-parallelism-proc-cost-threshold",
implicit_parallelism_proc_cost_threshold).
+long("implicit-parallelism-call-site-cost-threshold",
+ implicit_parallelism_call_site_cost_threshold).
:- pred defaults(option::out, option_data::out) is multi.
@@ -304,6 +324,7 @@ defaults(desired_parallelism,
defaults(implicit_parallelism_sparking_cost, int(100)).
defaults(implicit_parallelism_locking_cost, int(100)).
defaults(implicit_parallelism_proc_cost_threshold, int(100000)).
+defaults(implicit_parallelism_call_site_cost_threshold, int(50000)).
:- pred construct_measure(string::in, stat_measure::out) is semidet.
@@ -333,7 +354,8 @@ construct_measure("median", stat_med
cpc_desired_parallelism :: float,
cpc_sparking_cost :: int,
cpc_locking_cost :: int,
- cpc_threshold :: int
+ cpc_proc_threshold :: int,
+ cpc_call_site_threshold :: int
).
% Check all the command line options and return a well-typed representation
@@ -390,24 +412,29 @@ check_options(Options0, RequestedFeedbac
lookup_string_option(Options, desired_parallelism,
DesiredParallelismStr),
(
- string.to_float(DesiredParallelismStr, DesiredParallelism),
- DesiredParallelism > 1.0
+ string.to_float(DesiredParallelismStr, DesiredParallelismPrime),
+ DesiredParallelismPrime > 1.0
->
- CandidateParallelConjunctionsOpts ^ cpc_desired_parallelism =
- DesiredParallelism
+ DesiredParallelism = DesiredParallelismPrime
;
error("Invalid value for desired_parallelism: " ++
DesiredParallelismStr)
),
lookup_int_option(Options, implicit_parallelism_sparking_cost,
SparkingCost),
- CandidateParallelConjunctionsOpts ^ cpc_sparking_cost = SparkingCost,
lookup_int_option(Options, implicit_parallelism_locking_cost,
LockingCost),
- CandidateParallelConjunctionsOpts ^ cpc_locking_cost = LockingCost,
lookup_int_option(Options, implicit_parallelism_proc_cost_threshold,
- CPCThreshold),
- CandidateParallelConjunctionsOpts ^ cpc_threshold = CPCThreshold,
+ CPCProcThreshold),
+ lookup_int_option(Options,
+ implicit_parallelism_call_site_cost_threshold,
+ CPCCallSiteThreshold),
+ CandidateParallelConjunctionsOpts =
+ candidate_parallel_conjunctions_opts(DesiredParallelism,
+ SparkingCost,
+ LockingCost,
+ CPCProcThreshold,
+ CPCCallSiteThreshold),
MaybeCandidateParallelConjunctionsOpts =
yes(CandidateParallelConjunctionsOpts)
;
@@ -449,13 +476,811 @@ set_option(Option, Value, !Options) :-
feedback_info::in, feedback_info::out) is det.
process_deep_to_feedback(RequestedFeedbackInfo, Deep, !Feedback) :-
- MaybeCallsAboveThresholdSorted =
+ MaybeCallsAboveThresholdSortedOpts =
RequestedFeedbackInfo ^ maybe_calls_above_threshold_sorted,
(
- MaybeCallsAboveThresholdSorted = yes(Opts),
- css_list_above_threshold(Opts, Deep, !Feedback)
+ MaybeCallsAboveThresholdSortedOpts =
+ yes(CallsAboveThresholdSortedOpts),
+ css_list_above_threshold(CallsAboveThresholdSortedOpts, Deep,
+ !Feedback)
+ ;
+ MaybeCallsAboveThresholdSortedOpts = no
+ ),
+
+ MaybeCandidateParallelConjunctionsOpts =
+ RequestedFeedbackInfo ^ maybe_candidate_parallel_conjunctions,
+ (
+ MaybeCandidateParallelConjunctionsOpts =
+ yes(CandidateParallelConjunctionsOpts),
+ candidate_parallel_conjunctions(CandidateParallelConjunctionsOpts,
+ Deep, !Feedback)
;
- MaybeCallsAboveThresholdSorted = no
+ MaybeCandidateParallelConjunctionsOpts = no
+ ).
+
+%----------------------------------------------------------------------------%
+%
+% Build the candidate parallel conjunctions feedback information used for
+% implicit parallelism.
+%
+
+%
+% This doesn't follow higher order or method calls yet it may be possible to
+% follow all the higher order and method calls seen during profiling and
+% average their statistics based on their execution probability (weight).
+%
+
+:- pred candidate_parallel_conjunctions(
+ candidate_parallel_conjunctions_opts::in, deep::in,
+ feedback_info::in, feedback_info::out) is det.
+
+candidate_parallel_conjunctions(Opts, Deep, !Feedback) :-
+ Opts = candidate_parallel_conjunctions_opts(DesiredParallelism,
+ SparkingCost, LockingCost, ProcThreshold, _CallSiteThreshold),
+ % First retrieve the top procedures above the configured threshold.
+ % This is done 'overall' not 'per_call' because parallelising a procedure
+ % that is used a lot is more beneficial than parallelising a procedure that
+ % is used once when the more-frequently used function accounts for a higher
+ % amount of the programs runtime. It may be disqualified later if per call
+ % it has very little benefit.
+ % TODO: don't bother using the 'create_report' interface to get this
+ % report, we should call it directly instead.
+ TopProcsCmd = deep_cmd_top_procs(threshold_value(float(ProcThreshold)),
+ cost_callseqs, self_and_desc, overall),
+ create_report(TopProcsCmd, Deep, TopProcsReport),
+ ( TopProcsReport = report_top_procs(MaybeTopProcsReport) ->
+ (
+ MaybeTopProcsReport = ok(TopProcs),
+ TopProcsList = TopProcs ^ tp_top_procs
+ ;
+ MaybeTopProcsReport = error(TopProcsReportError),
+ error(TopProcsReportError)
+ )
+ ;
+ error("create_report gave incorrect report, expected top_procs_report")
+ ),
+
+ % Take the top procs list and look for conjunctions that can be
+ % parallelised and give an estimated speedup when parallelised. There may
+ % be more than one opportunity for parallelism in any given procedure.
+ list.map(
+ candidate_parallel_conjunctions_proc(Opts, Deep),
+ TopProcsList, Conjunctions0),
+ list.condense(Conjunctions0, Conjunctions),
+
+ % XXX: Analysing the clique tree to reduce the amount of nested parallel
+ % execution should be done here.
+
+ CandidateParallelConjunctions =
+ feedback_data_candidate_parallel_conjunctions(DesiredParallelism,
+ SparkingCost, LockingCost, Conjunctions),
+ put_feedback_data(CandidateParallelConjunctions, !Feedback).
+
+:- type implicit_parallelism_info
+ ---> implicit_parallelism_info(
+ ipi_deep :: deep,
+ ipi_progrep :: prog_rep,
+ ipi_opts :: candidate_parallel_conjunctions_opts,
+ ipi_call_sites :: map(goal_path, call_site_perf),
+ ipi_var_table :: var_table
+ ).
+
+ % Find candidate parallel conjunctions within the given procedure.
+ %
+:- pred candidate_parallel_conjunctions_proc(
+ candidate_parallel_conjunctions_opts::in, deep::in,
+ perf_row_data(proc_desc)::in,
+ assoc_list(string_proc_label, candidate_par_conjunction)::out) is det.
+
+candidate_parallel_conjunctions_proc(Opts, Deep, PrefRowData,
+ Candidates) :-
+ % Lookup the proc static to find the ProcLabel.
+ PSPtr = PrefRowData ^ perf_row_subject ^ pdesc_ps_ptr,
+ deep_lookup_proc_statics(Deep, PSPtr, PS),
+ ProcLabel = PS ^ ps_id,
+
+ % Make a proc query to retrieve cost information for the call sites within
+ % the procedure.
+ % TODO: As above: this can probably be called directly rather than going via
+ % create_report.
+ create_report(deep_cmd_proc(PSPtr), Deep, Report),
+ ( Report = report_proc(MaybeProcReport) ->
+ det_open_maybe_error(MaybeProcReport, ProcReport)
+ ;
+ error(this_file ++ "Recieved incorrect report for proc report query")
+ ),
+ list.foldl(add_call_site_perf_to_map,
+ ProcReport ^ proc_call_site_summaries, map.init, CallSitesMap),
+ MaybeProgRep = Deep ^ procrep_file,
+ (
+ MaybeProgRep = yes(MaybeProgRep1),
+ (
+ MaybeProgRep1 = ok(ProgRep)
+ ;
+ MaybeProgRep1 = error(Error),
+ error(this_file ++ Error)
+ )
+ ;
+ MaybeProgRep = no,
+ error(this_file ++ "Could not open Deep.procrep")
+ ),
+ ( progrep_search_proc(ProgRep, ProcLabel, ProcRep) ->
+ ProcRep ^ pr_defn = ProcDefnRep,
+ ProcDefnRep ^ pdr_goal = Goal,
+ ProcDefnRep ^ pdr_var_table = VarTable,
+ Info = implicit_parallelism_info(Deep, ProgRep, Opts, CallSitesMap,
+ VarTable),
+ goal_get_conjunctions_worth_parallelising(Goal, empty_goal_path, Info,
+ initial_inst_map(ProcDefnRep), _, Candidates0,
+ SeenDuplicateInstantiation, _),
+ (
+ SeenDuplicateInstantiation = seen_duplicate_instantiation,
+ Candidates = []
+ ;
+ SeenDuplicateInstantiation = have_not_seen_duplicate_instantiation,
+ list.map((pred(Candidate0::in, Candidate::out) is det :-
+ Candidate = (ProcLabel - Candidate0)
+ ), Candidates0, Candidates)
+ )
+ ;
+ % Builtin procedures cannot be found in the program representation, and
+ % cannot be parallelised either.
+ Candidates = []
+ ).
+
+:- pred goal_get_conjunctions_worth_parallelising(goal_rep::in, goal_path::in,
+ implicit_parallelism_info::in, inst_map::in, inst_map::out,
+ list(candidate_par_conjunction)::out, seen_duplicate_instantiation::out,
+ maybe_call_conjunct::out) is det.
+
+goal_get_conjunctions_worth_parallelising(Goal, GoalPath, Info, !InstMap,
+ Candidates, SeenDuplicateInstantiation, MaybeCallConjunct) :-
+ Goal = goal_rep(GoalExpr, Detism, _),
+ (
+ (
+ GoalExpr = conj_rep(Conjuncts),
+ conj_get_conjunctions_worth_parallelising(Conjuncts, GoalPath,
+ Info, !InstMap, Candidates, SeenDuplicateInstantiation)
+ ;
+ GoalExpr = disj_rep(Disjuncts),
+ disj_get_conjunctions_worth_parallelising(Disjuncts, GoalPath, 1,
+ Info, !InstMap, Candidates, SeenDuplicateInstantiation)
+ ;
+ GoalExpr = switch_rep(_, _, Cases),
+ switch_case_get_conjunctions_worth_parallelising(Cases, GoalPath, 1,
+ Info, !InstMap, Candidates, SeenDuplicateInstantiation)
+ ;
+ GoalExpr = ite_rep(Cond, Then, Else),
+ ite_get_conjunctions_worth_parallelising(Cond, Then, Else,
+ GoalPath, Info, !InstMap, Candidates,
+ SeenDuplicateInstantiation)
+ ;
+ GoalExpr = scope_rep(SubGoal, MaybeCut),
+ ScopeGoalPath =
+ goal_path_add_at_end(GoalPath, step_scope(MaybeCut)),
+ goal_get_conjunctions_worth_parallelising(SubGoal, ScopeGoalPath,
+ Info, !InstMap, Candidates, SeenDuplicateInstantiation, _)
+ ;
+ GoalExpr = negation_rep(SubGoal),
+ NegGoalPath = goal_path_add_at_end(GoalPath, step_neg),
+ goal_get_conjunctions_worth_parallelising(SubGoal, NegGoalPath,
+ Info, !InstMap, Candidates, SeenDuplicateInstantiation, _)
+ ),
+ % TODO: Parallelising conjunctions like
+ % ( call1(A, B) , not call2(C, D) )
+ % may be easy to do when writing the compiler's part of the code, if so
+ % then MaybeCallAboveThreshold should probably be set from some of
+ % these non-atomic goals based on goals within them.
+ MaybeCallConjunct = non_atomic_goal
+ ;
+ GoalExpr = atomic_goal_rep(_, _, BoundVars, AtomicGoal),
+ InstMapBeforeCall = !.InstMap,
+ % The binding of a variable may depend on any number of other
+ % variables, and of course the variables they depend upon. Except that
+ % variables involved in control flow (switched on vars, vars in ITE
+ % conds) are not recorded here, but should be for completeness.
+ atomic_goal_get_vars(AtomicGoal, AtomicGoalVars0),
+ list.foldl((pred(Var::in, Set0::in, Set::out) is det :-
+ ( set.remove(Set0, Var, SetPrime) ->
+ Set = SetPrime
+ ;
+ Set = Set0
+ )
+ ), BoundVars, AtomicGoalVars0, AtomicGoalVars),
+ inst_map_ground_vars(BoundVars, AtomicGoalVars, !InstMap,
+ SeenDuplicateInstantiation),
+ maybe_call_site_conjunct(Info, GoalPath, AtomicGoal, Detism,
+ InstMapBeforeCall, !.InstMap, BoundVars, MaybeCallConjunct),
+ Candidates = []
+ ).
+
+:- pred conj_get_conjunctions_worth_parallelising(list(goal_rep)::in,
+ goal_path::in, implicit_parallelism_info::in, inst_map::in,
+ inst_map::out, list(candidate_par_conjunction)::out,
+ seen_duplicate_instantiation::out) is det.
+
+conj_get_conjunctions_worth_parallelising(Conjs, GoalPath, Info,
+ !InstMap, Candidates, SeenDuplicateInstantiation) :-
+ % Note: it will be better to look at each pair of conjuncts, determine if
+ % they are parallelisable (perhaps by placing middle goals in either of the
+ % the parallel conjuncts to create the optimum amount of parallelism. This
+ % will need to have an in-order representation of goals, and for each
+ % variable seen have a tree of variables it depends upon.
+ %
+ % For now consider parallelising conjuncts that seperated only by other
+ % atomic goals.
+ conj_get_conjunctions_worth_parallelising_2(Conjs, GoalPath, 1, Info,
+ !InstMap, Candidates0, CallSiteConjuncts,
+ SeenDuplicateInstantiation),
+ (
+ % Only perform analysis at this point if it's not going to be
+ % thrown away later due to unhandled use of partial instantiation.
+ SeenDuplicateInstantiation = have_not_seen_duplicate_instantiation,
+ build_candidate_conjunctions(Info, !.InstMap, GoalPath,
+ list(CallSiteConjuncts), pqueue.init, CandidatesQueue),
+ % Pick best candidate from queue.
+ ( pqueue.remove(CandidatesQueue, _, Candidate, _) ->
+ Candidates = [Candidate | Candidates0]
+ ;
+ Candidates = Candidates0
+ )
+ ;
+ SeenDuplicateInstantiation = seen_duplicate_instantiation,
+ Candidates = Candidates0
+ ).
+
+:- pred conj_get_conjunctions_worth_parallelising_2(list(goal_rep)::in,
+ goal_path::in, int::in, implicit_parallelism_info::in,
+ inst_map::in, inst_map::out, list(candidate_par_conjunction)::out,
+ cord(maybe_call_conjunct)::out,
+ seen_duplicate_instantiation::out) is det.
+
+conj_get_conjunctions_worth_parallelising_2([], _, _, _, !InstMap, [], cord.empty,
+ have_not_seen_duplicate_instantiation).
+conj_get_conjunctions_worth_parallelising_2([Conj | Conjs], GoalPath,
+ ConjunctNum, Info, !InstMap, Candidates, CallSiteConjuncts,
+ SeenDuplicateInstantiation) :-
+ ConjGoalPath = goal_path_add_at_end(GoalPath, step_conj(ConjunctNum)),
+ goal_get_conjunctions_worth_parallelising(Conj, ConjGoalPath, Info,
+ !InstMap, CandidatesHead, SeenDuplicateInstantiationHead,
+ MaybeCallSiteConjunct),
+
+ conj_get_conjunctions_worth_parallelising_2(Conjs, GoalPath, ConjunctNum+1,
+ Info, !InstMap, CandidatesTail, CallSiteConjuncts0,
+ SeenDuplicateInstantiationTail),
+
+ Candidates = CandidatesHead ++ CandidatesTail,
+ CallSiteConjuncts = cord.cons(MaybeCallSiteConjunct, CallSiteConjuncts0),
+ SeenDuplicateInstantiation = merge_seen_duplicate_instantiation(
+ SeenDuplicateInstantiationHead,
+ SeenDuplicateInstantiationTail).
+
+:- pred disj_get_conjunctions_worth_parallelising(list(goal_rep)::in,
+ goal_path::in, int::in, implicit_parallelism_info::in, inst_map::in,
+ inst_map::out, list(candidate_par_conjunction)::out,
+ seen_duplicate_instantiation::out) is det.
+
+disj_get_conjunctions_worth_parallelising([], _, _, _, !InstMap, [],
+ have_not_seen_duplicate_instantiation).
+disj_get_conjunctions_worth_parallelising([Disj | Disjs], GoalPath, DisjNum,
+ Info, InstMap0, InstMap, Candidates, SeenDuplicateInstantiation) :-
+ DisjGoalPath = goal_path_add_at_end(GoalPath, step_disj(DisjNum)),
+ HeadDetism = Disj ^ goal_detism_rep,
+ goal_get_conjunctions_worth_parallelising(Disj, DisjGoalPath, Info,
+ InstMap0, HeadInstMap, HeadCandidates, HeadSeenDuplicateInstantiation,
+ _MaybeCallConjunct),
+ disj_get_conjunctions_worth_parallelising(Disjs, GoalPath, DisjNum + 1,
+ Info, InstMap0, TailInstMap, TailCandidates,
+ TailSeenDuplicateInstantiation),
+ Candidates = HeadCandidates ++ TailCandidates,
+ % merge_inst_map requires the detism of goals that produce both inst maps,
+ % we can create fake values that satisfy merge_inst_map easily.
+ (
+ Disjs = [],
+ TailDetism = failure_rep
+ ;
+ Disjs = [_ | _],
+ TailDetism = det_rep
+ ),
+ InstMap = merge_inst_map(HeadInstMap, HeadDetism, TailInstMap, TailDetism),
+ SeenDuplicateInstantiation = merge_seen_duplicate_instantiation(
+ HeadSeenDuplicateInstantiation,
+ TailSeenDuplicateInstantiation).
+
+:- pred switch_case_get_conjunctions_worth_parallelising(list(case_rep)::in,
+ goal_path::in, int::in, implicit_parallelism_info::in, inst_map::in,
+ inst_map::out, list(candidate_par_conjunction)::out,
+ seen_duplicate_instantiation::out) is det.
+
+switch_case_get_conjunctions_worth_parallelising([], _, _, _, !InstMap, [],
+ have_not_seen_duplicate_instantiation).
+switch_case_get_conjunctions_worth_parallelising([Case | Cases], GoalPath,
+ CaseNum, Info, InstMap0, InstMap, Candidates,
+ SeenDuplicateInstantiation) :-
+ Case = case_rep(_, _, Goal),
+ HeadDetism = Goal ^ goal_detism_rep,
+ CaseGoalPath = goal_path_add_at_end(GoalPath, step_switch(CaseNum, no)),
+ goal_get_conjunctions_worth_parallelising(Goal, CaseGoalPath, Info,
+ InstMap0, HeadInstMap, HeadCandidates, HeadSeenDuplicateInstantiation,
+ _MaybeCallConjs),
+ switch_case_get_conjunctions_worth_parallelising(Cases, GoalPath,
+ CaseNum + 1, Info, InstMap0, TailInstMap, TailCandidates,
+ TailSeenDuplicateInstantiation),
+ Candidates = HeadCandidates ++ TailCandidates,
+ % merge_inst_map requires the detism of goals that produce both inst maps,
+ % we can create fake values that satisfy merge_inst_map easily.
+ (
+ Cases = [],
+ TailDetism = failure_rep
+ ;
+ Cases = [_ | _],
+ TailDetism = det_rep
+ ),
+ InstMap = merge_inst_map(HeadInstMap, HeadDetism, TailInstMap, TailDetism),
+ SeenDuplicateInstantiation = merge_seen_duplicate_instantiation(
+ HeadSeenDuplicateInstantiation,
+ TailSeenDuplicateInstantiation).
+
+:- pred ite_get_conjunctions_worth_parallelising(goal_rep::in, goal_rep::in,
+ goal_rep::in, goal_path::in, implicit_parallelism_info::in, inst_map::in,
+ inst_map::out, list(candidate_par_conjunction)::out,
+ seen_duplicate_instantiation::out) is det.
+
+ite_get_conjunctions_worth_parallelising(Cond, Then, Else, GoalPath, Info,
+ !InstMap, Candidates, SeenDuplicateInstantiation) :-
+ CondGoalPath = goal_path_add_at_end(GoalPath, step_ite_cond),
+ ThenGoalPath = goal_path_add_at_end(GoalPath, step_ite_then),
+ ElseGoalPath = goal_path_add_at_end(GoalPath, step_ite_else),
+ goal_get_conjunctions_worth_parallelising(Cond, CondGoalPath, Info,
+ !.InstMap, PostCondInstMap, CondCandidates,
+ CondSeenDuplicateInstantiation, _),
+ goal_get_conjunctions_worth_parallelising(Then, ThenGoalPath, Info,
+ PostCondInstMap, PostThenInstMap, ThenCandidates,
+ ThenSeenDuplicateInstantiation, _),
+ goal_get_conjunctions_worth_parallelising(Else, ElseGoalPath, Info,
+ PostCondInstMap, PostElseInstMap, ElseCandidates,
+ ElseSeenDuplicateInstantiation, _),
+ Candidates = CondCandidates ++ ThenCandidates ++ ElseCandidates,
+ (
+ CondSeenDuplicateInstantiation = have_not_seen_duplicate_instantiation,
+ ThenSeenDuplicateInstantiation = have_not_seen_duplicate_instantiation,
+ ElseSeenDuplicateInstantiation = have_not_seen_duplicate_instantiation
+ ->
+ SeenDuplicateInstantiation = have_not_seen_duplicate_instantiation
+ ;
+ SeenDuplicateInstantiation = seen_duplicate_instantiation
+ ),
+ ThenDetism = Then ^ goal_detism_rep,
+ ElseDetism = Else ^ goal_detism_rep,
+ !:InstMap = merge_inst_map(PostThenInstMap, ThenDetism,
+ PostElseInstMap, ElseDetism).
+
+ % This type represents a goal, if the goal is a call extra information used
+ % for parallelising it with another call is provided.
+ %
+ % This is similar to candidate_parallel_conjunct, except that it stores
+ % information that's used for the rest of the implicit parallelism
+ % analysis. It must contain information for the following.
+ %
+ % - Average cost information for this call site,
+ %
+ % - Enough information to resolve the procedure call, (detism and
+ % argument modes).
+ %
+ % - Information that can be used to determine if this is part of a
+ % dependant conjunction, and its role in the dependant conjunction.
+ %
+ % - Enough information so that a candidate_par_conjuct structure can be
+ % constructed from it and the deep profiling info.
+ %
+:- type maybe_call_conjunct
+ ---> call(
+ mccc_callee :: maybe(pair(string, string)),
+ mccc_detism :: detism_rep,
+ mccc_args :: list(var_mode_and_use),
+ mccc_perf :: call_site_perf
+ )
+ ; trivial_atomic_goal(
+ mccag_detism :: detism_rep,
+ mccag_bound_vars :: list(var_rep)
+ )
+ ; non_atomic_goal.
+
+:- inst call
+ ---> call(ground, ground, ground, ground).
+
+ % A variable, it's mode and it's usage in the callee. The mode information
+ % is also summarised within the variable use information.
+ %
+:- type var_mode_and_use
+ ---> var_mode_and_use(
+ vmu_var :: var_rep,
+ vmu_mode :: var_mode_rep,
+ vmu_use :: var_use_info
+ ).
+
+:- pred maybe_call_site_conjunct(implicit_parallelism_info::in, goal_path::in,
+ atomic_goal_rep::in, detism_rep::in, inst_map::in, inst_map::in,
+ list(var_rep)::in, maybe_call_conjunct::out) is det.
+
+maybe_call_site_conjunct(Info, GoalPath, AtomicGoal, Detism,
+ InstMapBefore, InstMapAfter, BoundVars, MaybeCallConjunct) :-
+ (
+ ( AtomicGoal = unify_construct_rep(_, _, _)
+ ; AtomicGoal = unify_deconstruct_rep(_, _, _)
+ ; AtomicGoal = partial_construct_rep(_, _, _)
+ ; AtomicGoal = partial_deconstruct_rep(_, _, _)
+ ; AtomicGoal = unify_assign_rep(_, _)
+ ; AtomicGoal = cast_rep(_, _)
+ ; AtomicGoal = unify_simple_test_rep(_, _)
+ % Don't bother parallelising foreign code, builtins or events.
+ ; AtomicGoal = pragma_foreign_code_rep(_)
+ ; AtomicGoal = builtin_call_rep(_, _, _)
+ ; AtomicGoal = event_call_rep(_, _)
+ ),
+ MaybeCallConjunct = trivial_atomic_goal(Detism, BoundVars)
+ ;
+ ( AtomicGoal = higher_order_call_rep(_, Args)
+ ; AtomicGoal = method_call_rep(_, _, Args)
+ ; AtomicGoal = plain_call_rep(_, _, Args)
+ ),
+ (
+ ( AtomicGoal = higher_order_call_rep(_, _)
+ ; AtomicGoal = method_call_rep(_, _, _)
+ ),
+ MaybeCallee = no
+ ;
+ AtomicGoal = plain_call_rep(ModuleName, CalleeName, _),
+ MaybeCallee = yes(ModuleName - CalleeName)
+ ),
+ map.lookup(Info ^ ipi_call_sites, GoalPath, CallSitePerf),
+ % Lookup var use information.
+ CallSiteKind = CallSitePerf ^ csf_kind,
+ (
+ CallSiteKind = normal_call_and_info(NormalCalleeId),
+ PSPtr = NormalCalleeId ^ nci_callee_desc ^ pdesc_ps_ptr,
+ Deep = Info ^ ipi_deep,
+ create_proc_var_use_dump_report(Deep, PSPtr,
+ MaybeVarUseDumpInfo),
+ MaybeVarUseDumpInfo = ok(VarUseDumpInfo)
+ ->
+ VarUseInfos = VarUseDumpInfo ^ pvui_var_uses,
+ list.map_corresponding((pred(Arg::in, VarUseInfo::in,
+ VarModeAndUse::out) is det :-
+ var_get_mode(InstMapBefore, InstMapAfter, Arg, ArgMode),
+ VarModeAndUse = var_mode_and_use(Arg, ArgMode,
+ VarUseInfo)
+ ), Args, VarUseInfos, VarModeAndUses)
+ ;
+ list.map((pred(Arg::in, VarModeAndUse::out) is det :-
+ var_get_mode(InstMapBefore, InstMapAfter, Arg, ArgMode),
+ var_mode_to_var_use(ArgMode, VarUseType),
+ pessimistic_var_use_info(VarUseType, VarUseInfo),
+ VarModeAndUse = var_mode_and_use(Arg, ArgMode,
+ VarUseInfo)
+ ), Args, VarModeAndUses)
+ ),
+ MaybeCallConjunct =
+ call(MaybeCallee, Detism, VarModeAndUses, CallSitePerf)
+ ).
+
+:- pred var_get_mode(inst_map::in, inst_map::in, var_rep::in, var_mode_rep::out)
+ is det.
+
+var_get_mode(InstMapBefore, InstMapAfter, VarRep, VarModeRep) :-
+ inst_map_get(InstMapBefore, VarRep, InstBefore, _),
+ inst_map_get(InstMapAfter, VarRep, InstAfter, _),
+ VarModeRep = var_mode_rep(InstBefore, InstAfter).
+
+ % Note: this runs in quadratic time.
+ %
+:- pred build_candidate_conjunctions(implicit_parallelism_info::in,
+ inst_map::in, goal_path::in, list(maybe_call_conjunct)::in,
+ pqueue(float, candidate_par_conjunction)::in,
+ pqueue(float, candidate_par_conjunction)::out) is det.
+
+build_candidate_conjunctions(_, _, _, [], !Candidates).
+build_candidate_conjunctions(Info, InstMap, GoalPath, [MaybeCall | MaybeCalls],
+ !Candidates) :-
+ (
+ MaybeCall = call(_, _, _, CallSitePerf),
+ Cost = CallSitePerf ^ csf_summary_perf ^ perf_row_self
+ ^ perf_row_callseqs_percall,
+ ( Cost > float(Info ^ ipi_opts ^ cpc_call_site_threshold) ->
+ % This conjunction is a call and is expensive enough to
+ % parallelise, find some later conjunct to parallelise against it.
+ build_candidate_conjunctions_2(Info, InstMap, GoalPath, MaybeCall,
+ cord.empty, MaybeCalls, !Candidates)
+ % XXX: pick the most expensive non-overlapping candidates from the
+ % result.
+ ;
+ true
+ )
+ ;
+ MaybeCall = non_atomic_goal
+ ;
+ MaybeCall = trivial_atomic_goal(_, _)
+ ),
+ build_candidate_conjunctions(Info, InstMap, GoalPath, MaybeCalls,
+ !Candidates).
+
+:- pred build_candidate_conjunctions_2(implicit_parallelism_info::in,
+ inst_map::in, goal_path::in, maybe_call_conjunct::in(call),
+ cord(maybe_call_conjunct)::in, list(maybe_call_conjunct)::in,
+ pqueue(float, candidate_par_conjunction)::in, pqueue(float,
+ candidate_par_conjunction)::out) is det.
+
+build_candidate_conjunctions_2(_, _, _, _, _, [], !Candidates).
+build_candidate_conjunctions_2(Info, InstMap, GoalPath, CallA,
+ IntermediateGoals0, [MaybeCall | MaybeCalls], !Candidates) :-
+ (
+ MaybeCall = call(_, _, _, CallSitePerf),
+ CallB = MaybeCall,
+ Cost = call_site_perf_self_callseqs_percall(CallSitePerf),
+ ( Cost > float(Info ^ ipi_opts ^ cpc_call_site_threshold) ->
+ % This conjunction is a call and is expensive enough to
+ % parallelise.
+ are_conjuncts_dependant(CallA, CallB, InstMap, Dependance),
+ (
+ Dependance = conjuncts_are_dependant(DepVars),
+ compute_optimal_dependant_parallelisation(Info, CallA, CallB,
+ DepVars, IntermediateGoals0, InstMap, CPCA, CPCB, Speedup)
+ ;
+ Dependance = conjuncts_are_independent,
+ compute_independant_parallelisation_speedup(Info, CallA, CallB,
+ length(IntermediateGoals0), CPCA, CPCB, Speedup)
+ ),
+ % XXX: This threshold should be configurable.
+ ( Speedup > 0.0 ->
+ % XXX: I think this should be a priority queue or somesuch.
+ GoalPathString = goal_path_to_string(GoalPath),
+ Candidate = candidate_par_conjunction(GoalPathString,
+ CPCA, CPCB, Dependance, Speedup),
+ % So that the candidates with the greatest speedup are removed
+ % first multiply speedup by -1.0.
+ pqueue.insert(!.Candidates, Speedup * -1.0, Candidate,
+ !:Candidates)
+ ;
+ true
+ )
+ ;
+ % Don't recurse here, we don't parallelise over call goals.
+ true
+ )
+ ;
+ MaybeCall = trivial_atomic_goal(_, _),
+ build_candidate_conjunctions_2(Info, InstMap, GoalPath, CallA,
+ cord.snoc(IntermediateGoals0, MaybeCall), MaybeCalls, !Candidates)
+ ;
+ MaybeCall = non_atomic_goal
+ % Don't recurse in this case, we don't parallelise over non-atomic goals
+ % yet.
+ ).
+
+:- pred are_conjuncts_dependant(maybe_call_conjunct::in(call),
+ maybe_call_conjunct::in(call), inst_map::in, conjuncts_are_dependant::out)
+ is det.
+
+are_conjuncts_dependant(CallA, CallB, InstMap, Dependance) :-
+ % Conjuncts are dependant if there exists an input variable in CallB that
+ % is made ground by CallA or depends upon a variable made ground by CallA.
+ list.foldl(add_output_var_to_set, CallA ^ mccc_args,
+ set.init, CallAOutputs),
+ list.foldl(are_conjuncts_dependant_var(CallAOutputs, InstMap),
+ CallB ^ mccc_args, set.init, DepVars),
+ ( set.empty(DepVars) ->
+ Dependance = conjuncts_are_independent
+ ;
+ Dependance = conjuncts_are_dependant(DepVars)
+ ).
+
+:- pred are_conjuncts_dependant_var(set(var_rep)::in, inst_map::in,
+ var_mode_and_use::in, set(var_rep)::in, set(var_rep)::out) is det.
+
+are_conjuncts_dependant_var(CallAOutputs, InstMap, VarModeAndUse, !DepVars) :-
+ VarModeAndUse = var_mode_and_use(VarRep, VarModeRep, _),
+ ( VarModeRep = var_mode_rep(ir_ground_rep, ir_ground_rep) ->
+ inst_map_get_var_deps(InstMap, VarRep, VarDeps),
+ (
+ (
+ contains(CallAOutputs, VarRep)
+ ;
+ member(VarDep, VarDeps),
+ contains(CallAOutputs, VarDep)
+ )
+ ->
+ svset.insert(VarRep, !DepVars)
+ ;
+ true
+ )
+ ;
+ true
+ ).
+
+:- pred add_output_var_to_set(var_mode_and_use::in,
+ set(var_rep)::in, set(var_rep)::out) is det.
+
+add_output_var_to_set(var_mode_and_use(VarRep, VarModeRep, _), !Set) :-
+ ( VarModeRep = var_mode_rep(ir_free_rep, ir_ground_rep) ->
+ svset.insert(VarRep, !Set)
+ ;
+ true
+ ).
+
+:- pred compute_independant_parallelisation_speedup(
+ implicit_parallelism_info::in,
+ maybe_call_conjunct::in(call), maybe_call_conjunct::in(call),
+ int::in, candidate_par_conjunct::out, candidate_par_conjunct::out,
+ float::out) is det.
+
+compute_independant_parallelisation_speedup(Info, CallA, CallB, NumUnifications,
+ CPCA, CPCB, Speedup) :-
+ CostA = call_site_perf_self_callseqs_percall(CallA ^ mccc_perf),
+ CostB = call_site_perf_self_callseqs_percall(CallB ^ mccc_perf),
+ SequentialCost = CostA + CostB,
+ ParallelCost = max(CostA, CostB) +
+ float(Info ^ ipi_opts ^ cpc_sparking_cost),
+ Speedup = SequentialCost - ParallelCost,
+ ( CostA < CostB ->
+ NumUnificationsA = NumUnifications,
+ NumUnificationsB = 0
+ ;
+ NumUnificationsA = 0,
+ NumUnificationsB = NumUnifications
+ ),
+ call_site_conj_to_candidate_par_conjunct(Info, CallA, NumUnificationsA,
+ CPCA),
+ call_site_conj_to_candidate_par_conjunct(Info, CallB, NumUnificationsB,
+ CPCB).
+
+:- pred compute_optimal_dependant_parallelisation(
+ implicit_parallelism_info::in,
+ maybe_call_conjunct::in(call), maybe_call_conjunct::in(call),
+ set(var_rep)::in, cord(maybe_call_conjunct)::in, inst_map::in,
+ candidate_par_conjunct::out, candidate_par_conjunct::out, float::out)
+ is det.
+
+compute_optimal_dependant_parallelisation(Info, CallA, CallB,
+ DepVars, IntermediateGoals, InstMap, CPCA, CPCB, Speedup) :-
+ CostA = call_site_perf_self_callseqs_percall(CallA ^ mccc_perf),
+ CostB = call_site_perf_self_callseqs_percall(CallB ^ mccc_perf),
+ SequentialCost = CostA + CostB,
+ NumUnifications = length(IntermediateGoals),
+ ( singleton_set(DepVars, DepVar) ->
+ % Only parallelise conjunctions with a single dependant variable for
+ % now.
+ ( get_var_use_from_args(CallB ^ mccc_args, DepVar, DepVarConsume) ->
+ DepVarConsume = var_use_info(CostUntilConsume, _),
+ (
+ get_var_use_from_args(CallA ^ mccc_args, DepVar,
+ DepVarProduce)
+ ->
+ DepVarProduce = var_use_info(CostUntilProduction, _),
+ CostBeforeProduction =
+ cost_until_to_cost_since_start(CostUntilProduction, CostA),
+ CostBeforeConsume =
+ cost_until_to_cost_since_start(CostUntilConsume, CostB),
+ CostAfterConsume =
+ cost_until_to_cost_before_end(CostUntilConsume, CostB),
+ % Unfications between the calls don't bind any variables useful
+ % for the calls, so serialise them with the cheaper call.
+ ( CostA < CostB ->
+ NumUnificationsA = NumUnifications,
+ NumUnificationsB = 0
+ ;
+ NumUnificationsA = 0,
+ NumUnificationsB = NumUnifications
+ )
+ ;
+ inst_map_get_var_deps(InstMap, DepVar, DepVarDeps),
+ set.fold(get_var_use_add_to_queue(CallA ^ mccc_args),
+ DepVarDeps, pqueue.init, ProduceDepVarQueue),
+ (
+ pqueue.remove(ProduceDepVarQueue, _,
+ CostUntilProductionPrime, _)
+ ->
+ CostUntilProduction = CostUntilProductionPrime
+ ;
+ error(this_file ++
+ "Expected to find at least one dependant variable")
+ ),
+ CostBeforeProduction0 =
+ cost_until_to_cost_since_start(CostUntilProduction, CostA),
+ CostAfterProduction0 =
+ cost_until_to_cost_before_end(CostUntilProduction, CostA),
+ CostBeforeConsume0 =
+ cost_until_to_cost_since_start(CostUntilConsume, CostB),
+ CostAfterConsume0 =
+ cost_until_to_cost_before_end(CostUntilConsume, CostB),
+ % Compare time before consume vs time after production, the
+ % lesser one should have the unifications added to it. This
+ % maximises the amount of parallelism.
+ ( CostBeforeConsume0 > CostAfterProduction0 ->
+ NumUnificationsA = 0,
+ NumUnificationsB = NumUnifications,
+ CostBeforeProduction = CostBeforeProduction0,
+ CostBeforeConsume = CostA,
+ CostAfterConsume = 0.0
+ ;
+ NumUnificationsA = NumUnifications,
+ NumUnificationsB = 0,
+ CostBeforeProduction = 0.0,
+ CostBeforeConsume = CostA - CostAfterConsume0,
+ CostAfterConsume = CostAfterConsume0
+ )
+ ),
+ SparkingCost = float(Info ^ ipi_opts ^ cpc_sparking_cost),
+ LockingCost = float(Info ^ ipi_opts ^ cpc_locking_cost),
+ ParallelCost = max(CostA,
+ max(CostBeforeProduction, CostBeforeConsume)
+ + CostAfterConsume)
+ + SparkingCost + LockingCost,
+ Speedup = SequentialCost - ParallelCost
+ ;
+ error("Dependant var not in consumer's arguments")
+ )
+ ;
+ Speedup = -1.0,
+ NumUnificationsA = NumUnifications,
+ NumUnificationsB = 0
+ ),
+ call_site_conj_to_candidate_par_conjunct(Info, CallA, NumUnificationsA,
+ CPCA),
+ call_site_conj_to_candidate_par_conjunct(Info, CallB, NumUnificationsB,
+ CPCB).
+
+:- pred get_var_use_from_args(list(var_mode_and_use)::in, var_rep::in,
+ var_use_info::out) is semidet.
+
+get_var_use_from_args([], _, _) :- false.
+get_var_use_from_args([Arg | Args], Var, VarUse) :-
+ ( Arg = var_mode_and_use(Var, _, VarUsePrime) ->
+ VarUse = VarUsePrime
+ ;
+ get_var_use_from_args(Args, Var, VarUse)
+ ).
+
+:- pred get_var_use_add_to_queue(list(var_mode_and_use)::in, var_rep::in,
+ pqueue(float, cost_until_var_use)::in,
+ pqueue(float, cost_until_var_use)::out) is det.
+
+get_var_use_add_to_queue(VarsModeAndUse, VarRep, !Queue) :-
+ ( get_var_use_from_args(VarsModeAndUse, VarRep, VarUse) ->
+ VarUse = var_use_info(CostUntilVarUse, _),
+ % Priority queues return the smallest items first, And we want to find
+ % the most pessimistic variable production so use the cost before the
+ % procedure's end.
+ Key = cost_until_to_cost_before_end(CostUntilVarUse, 0.0),
+ pqueue.insert(!.Queue, Key, CostUntilVarUse, !:Queue)
+ ;
+ true
+ ).
+
+:- func call_site_perf_self_callseqs_percall(call_site_perf) = float.
+
+call_site_perf_self_callseqs_percall(CSP) =
+ CSP ^ csf_summary_perf ^ perf_row_self ^ perf_row_callseqs_percall.
+
+:- pred call_site_conj_to_candidate_par_conjunct(
+ implicit_parallelism_info::in, maybe_call_conjunct::in(call),
+ int::in, candidate_par_conjunct::out) is det.
+
+call_site_conj_to_candidate_par_conjunct(Info, Call, NumUnifications, CPC) :-
+ Call = call(MaybeCallee, _Detism, Args, Perf),
+ VarTable = Info ^ ipi_var_table,
+ list.map(var_mode_use_to_var_in_par_conj(VarTable), Args, Vars),
+ Cost = call_site_perf_self_callseqs_percall(Perf),
+ CPC = candidate_par_conjunct(MaybeCallee, Vars, Cost, NumUnifications).
+
+:- pred var_mode_use_to_var_in_par_conj(var_table::in, var_mode_and_use::in,
+ maybe(string)::out) is det.
+
+var_mode_use_to_var_in_par_conj(VarTable, var_mode_and_use(Var, _, _),
+ MaybeName) :-
+ ( search_var_name(VarTable, Var, Name) ->
+ MaybeName = yes(Name)
+ ;
+ MaybeName = no
).
%----------------------------------------------------------------------------%
@@ -578,6 +1403,33 @@ css_to_call(Deep, CSS, Call) :-
% Build the call datastructure.
Call = call_site(Caller, Slot, CallTypeAndCallee).
+%----------------------------------------------------------------------------%
+%
+% Useful utility predicates.
+%
+
+ % Remove something from inside a maybe_error type and return it. Throw an
+ % exception if the MaybeError variable has value error(_).
+ %
+:- pred det_open_maybe_error(maybe_error(T)::in, T::out) is det.
+
+det_open_maybe_error(ok(X), X).
+det_open_maybe_error(error(Error), _) :-
+ error(Error).
+
+:- pred add_call_site_perf_to_map(call_site_perf::in,
+ map(goal_path, call_site_perf)::in, map(goal_path, call_site_perf)::out)
+ is det.
+
+add_call_site_perf_to_map(CallSitePerf, !Map) :-
+ GoalPath = CallSitePerf ^ csf_summary_perf ^ perf_row_subject
+ ^ csdesc_goal_path,
+ svmap.det_insert(GoalPath, CallSitePerf, !Map).
+
+:- func this_file = string.
+
+this_file = "mdprof_feedback: ".
+
%-----------------------------------------------------------------------------%
:- end_module mdprof_feedback.
%-----------------------------------------------------------------------------%
Index: deep_profiler/program_representation_utils.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/program_representation_utils.m,v
retrieving revision 1.17
diff -u -p -b -r1.17 program_representation_utils.m
--- deep_profiler/program_representation_utils.m 16 Oct 2008 01:16:36 -0000 1.17
+++ deep_profiler/program_representation_utils.m 20 Oct 2008 05:33:37 -0000
@@ -80,6 +80,58 @@
%----------------------------------------------------------------------------%
+ % A map of variables to instantiation states, Like inst_map within the
+ % compiler.
+ %
+:- type inst_map.
+
+ % Build the initial inst for a procedure.
+ %
+:- func initial_inst_map(proc_defn_rep) = inst_map.
+
+ % inst_map_ground_vars(Vars, !InstMap, SeenDuplicateInstantiaton).
+ %
+ % Make the variables in the given list ground in the new copy of the inst
+ % map.
+ %
+ % SeenDuplicateInstantiation will be true iff at least one of these
+ % variables is already ground.
+ %
+:- pred inst_map_ground_vars(list(var_rep)::in, set(var_rep)::in, inst_map::in,
+ inst_map::out, seen_duplicate_instantiation::out) is det.
+
+ % This type represents whether a traversal has seen more than one
+ % instantiation of a variable within a single branch. If at the end of a
+ % traversal a duplicate instantiation has been seen we can either accept a
+ % pessimistic default or abort parallelisation of this particular
+ % conjunction.
+ %
+:- type seen_duplicate_instantiation
+ ---> seen_duplicate_instantiation
+ ; have_not_seen_duplicate_instantiation.
+
+:- func merge_seen_duplicate_instantiation(seen_duplicate_instantiation,
+ seen_duplicate_instantiation) = seen_duplicate_instantiation.
+
+ % Retrieve the instantiatedness of a variable, and variables that it's
+ % binding depends upon from the instmap, if the variable is new ir_free is
+ % returned and the variables it depends upon is the empty set.
+ %
+:- pred inst_map_get(inst_map::in, var_rep::in, inst_rep::out,
+ set(var_rep)::out) is det.
+
+ % Merge two inst maps from different branches of execution.
+ %
+:- func merge_inst_map(inst_map, detism_rep, inst_map, detism_rep) = inst_map.
+
+ % Retrieve all the variables this variable depends upon, indirect
+ % dependencies are also returned.
+ %
+:- pred inst_map_get_var_deps(inst_map::in, var_rep::in, set(var_rep)::out)
+ is det.
+
+%----------------------------------------------------------------------------%
+
% geneirc_vars_first_use(HeadVarsToVars, Deep, PSPtr, CallStack,
% VarUseInfos, ProcAverageCost).
%
@@ -98,6 +150,16 @@
:- pred head_vars_all(list(head_var_rep)::in, list(var_rep)::out,
list(var_use_type)::out) is det.
+:- pred var_mode_to_var_use(var_mode_rep::in, var_use_type::out) is det.
+
+:- pred pessimistic_var_use_info(var_use_type::in, var_use_info::out) is det.
+
+%----------------------------------------------------------------------------%
+
+ % Retrieve a set of all the vars involved with this atomic goal.
+ %
+:- pred atomic_goal_get_vars(atomic_goal_rep::in, set(var_rep)::out) is det.
+
%----------------------------------------------------------------------------%
:- implementation.
@@ -112,6 +174,7 @@
:- import_module io.
:- import_module map.
:- import_module require.
+:- import_module svmap.
%----------------------------------------------------------------------------%
@@ -511,10 +574,10 @@ print_unit_to_strings(_, cord.empty).
%----------------------------------------------------------------------------%
progrep_search_proc(ProgRep, ProcLabel, ProcRep) :-
- ( ProcLabel = str_ordinary_proc_label(_, Module, _DefModule, _, _, _)
- ; ProcLabel = str_special_proc_label(_, Module, _DefModule, _, _, _)
+ ( ProcLabel = str_ordinary_proc_label(_, _DeclModule, DefModule, _, _, _)
+ ; ProcLabel = str_special_proc_label(_, _DeclModule, DefModule, _, _, _)
),
- progrep_search_module(ProgRep, Module, ModuleRep),
+ progrep_search_module(ProgRep, DefModule, ModuleRep),
modulerep_search_proc(ModuleRep, ProcLabel, ProcRep).
% Search for a module within a program representation.
@@ -1487,39 +1550,6 @@ sum_after_coverage(After, !SumAfter) :-
).
%----------------------------------------------------------------------------%
-
-:- type solution_count_rep
- ---> at_most_zero_rep
- ; at_most_one_rep % Including committed choice.
- ; at_most_many_rep.
-
-:- func detism_get_solutions(detism_rep) = solution_count_rep.
-
-detism_get_solutions(det_rep) = at_most_one_rep.
-detism_get_solutions(semidet_rep) = at_most_one_rep.
-detism_get_solutions(multidet_rep) = at_most_many_rep.
-detism_get_solutions(nondet_rep) = at_most_many_rep.
-detism_get_solutions(cc_multidet_rep) = at_most_one_rep.
-detism_get_solutions(cc_nondet_rep) = at_most_one_rep.
-detism_get_solutions(erroneous_rep) = at_most_zero_rep.
-detism_get_solutions(failure_rep) = at_most_zero_rep.
-
-:- type can_fail_rep
- ---> can_fail_rep
- ; cannot_fail_rep.
-
-:- func detism_get_can_fail(detism_rep) = can_fail_rep.
-
-detism_get_can_fail(det_rep) = cannot_fail_rep.
-detism_get_can_fail(semidet_rep) = can_fail_rep.
-detism_get_can_fail(multidet_rep) = cannot_fail_rep.
-detism_get_can_fail(nondet_rep) = can_fail_rep.
-detism_get_can_fail(cc_multidet_rep) = cannot_fail_rep.
-detism_get_can_fail(cc_nondet_rep) = can_fail_rep.
-detism_get_can_fail(erroneous_rep) = cannot_fail_rep.
-detism_get_can_fail(failure_rep) = can_fail_rep.
-
-%----------------------------------------------------------------------------%
%
% Coverage information helper predicates.
%
@@ -1567,10 +1597,9 @@ get_coverage_before_and_after(coverage_k
% lookup anyway to find cost information, This will callback to the deep
% profiler as it crosses procedure boundaries.
%
- % Another value should be used to describe if we're looking for a producer
- % or consumer, this affects how pessimistic defaults are calculated. XXX:
- % This is not yet implemented for now I implement the first time a variable
- % is consumed and then hopefully extend that code.
+ % This does not follow higher order or method calls. It may be possible to
+ % follow call the calls seen during profiling and aggregate their variable
+ % use information based on how often they are called from that call site.
%
:- pred goal_var_first_use(goal_path::in, goal_rep(coverage_info)::in,
var_first_use_static_info::in, float::in, float::out, found_first_use::out)
@@ -1663,7 +1692,7 @@ call_var_first_use(AtomicGoal, BoundVars
ProcCost = RowData ^ perf_row_callseqs_percall,
% XXX: this doesn't work for (mutually-)recursive calls, the deep profiler
% sets their cost to 1.0. For now we just have to hope that the variables
- % we're searching for are used in the recursive call so the trick above
+ % we're searching for are used in the recursive call so the trick below
% works.
NextCostSoFar = CostSoFar + ProcCost,
@@ -1782,31 +1811,7 @@ atomic_trivial_var_first_use(AtomicGoal,
FoundFirstUse) :-
Var = StaticInfo ^ fui_var,
VarUseType = StaticInfo ^ fui_var_use,
- (
- ( AtomicGoal = unify_construct_rep(UnifyVar, _, Vars0)
- ; AtomicGoal = unify_deconstruct_rep(UnifyVar, _, Vars0)
- ),
- Vars = [UnifyVar | Vars0]
- ;
- ( AtomicGoal =
- partial_construct_rep(UnifyVar, _, MaybeVars0)
- ; AtomicGoal =
- partial_deconstruct_rep(UnifyVar, _, MaybeVars0)
- ),
- list.filter_map(maybe_x_to_x, MaybeVars0, Vars0),
- Vars = [UnifyVar | Vars0]
- ;
- ( AtomicGoal = unify_assign_rep(VarA, VarB)
- ; AtomicGoal = cast_rep(VarA, VarB)
- ; AtomicGoal = unify_simple_test_rep(VarA, VarB)
- ),
- Vars = [ VarA, VarB ]
- ;
- ( AtomicGoal = pragma_foreign_code_rep(Vars)
- ; AtomicGoal = event_call_rep(_, Vars)
- ; AtomicGoal = builtin_call_rep(_, _, Vars)
- )
- ),
+ atomic_goal_get_vars(AtomicGoal, Vars),
(
member(Var, Vars),
(
@@ -1937,8 +1942,6 @@ disj_var_first_use_2(GoalPath, DisjNum,
;
HeadFoundFirstUse = found_first_use(HeadCost),
TailFoundFirstUse = found_first_use(TailCost),
- % A simple average, this gets overridden later anyway.
- % XXX: no it doesn't.
(
VarUseType = var_use_consumption,
% The variable is probably consumed in the first disjunct even if
@@ -2159,16 +2162,7 @@ proc_var_first_use(Deep, PSPtr, ArgNum,
% pointer points to the foreign code which can't be looked up even
% though it uses a 'plain_call' call site.
% Return a pessimistic default here.
- (
- VarUseType = var_use_consumption,
- CostUntilUse = cost_since_proc_start(0.0)
- ;
- ( VarUseType = var_use_production
- ; VarUseType = var_use_other
- ),
- CostUntilUse = cost_before_proc_end(0.0)
- ),
- VarUseInfo = var_use_info(CostUntilUse, VarUseType)
+ pessimistic_var_use_info(VarUseType, VarUseInfo)
),
trace [compile_time(flag("debug_first_var_use")), io(!IO)]
io.format("Trace: prog_var_first_use: %s\n",
@@ -2188,8 +2182,6 @@ head_vars_all(HeadVars, Vars, VarUseType
var_mode_to_var_use(VarMode, VarUseType)
), HeadVars, Vars, VarUseTypes).
-:- pred var_mode_to_var_use(var_mode_rep::in, var_use_type::out) is det.
-
var_mode_to_var_use(var_mode_rep(InitialInst, FinalInst), VarUseType) :-
(
InitialInst = ir_ground_rep,
@@ -2205,6 +2197,18 @@ var_mode_to_var_use(var_mode_rep(Initial
VarUseType = var_use_other
).
+pessimistic_var_use_info(VarUseType, VarUseInfo) :-
+ (
+ VarUseType = var_use_consumption,
+ CostUntilUse = cost_since_proc_start(0.0)
+ ;
+ ( VarUseType = var_use_production
+ ; VarUseType = var_use_other
+ ),
+ CostUntilUse = cost_before_proc_end(0.0)
+ ),
+ VarUseInfo = var_use_info(CostUntilUse, VarUseType).
+
% Perform the var_first_use for the vars returned by the closure.
%
generic_vars_first_use(VarsPred, Deep, PSPtr, CallStack, MaybeResult) :-
@@ -2266,27 +2270,240 @@ goal_var_first_use_wrapper(Deep, CallSta
; VarUseType = var_use_other
),
CostUntilUse = cost_before_proc_end(CostUntilUseRaw)
- )
+ ),
+ VarUseInfo = var_use_info(CostUntilUse, VarUseType)
;
% If the first use has not been found, then use the average cost of the
% procedure as the cost before the first use, since the variable is
% never used.
FoundFirstUse = have_not_found_first_use,
+ pessimistic_var_use_info(VarUseType, VarUseInfo)
+ ).
+
+%----------------------------------------------------------------------------%
+
+:- type inst_map
+ ---> inst_map(
+ map(var_rep, inst_rep),
+ % The actual inst map.
+
+ map(var_rep, set(var_rep))
+ % A tree describing dependencies between bound variables.
+ ).
+
+initial_inst_map(ProcDefn) = InstMap :-
+ HeadVars = ProcDefn ^ pdr_head_vars,
+ list.foldl(add_head_var_inst_to_map, HeadVars,
+ inst_map(map.init, map.init), InstMap).
+
+:- pred add_head_var_inst_to_map(head_var_rep::in,
+ inst_map::in, inst_map::out) is det.
+
+add_head_var_inst_to_map(head_var_rep(VarRep, ModeRep), !InstMap) :-
+ ModeRep = var_mode_rep(InitialInstRep, _),
+ add_inst_mapping(VarRep, InitialInstRep, set.init, !InstMap).
+
+ % Add an inst mapping.
+ %
+:- pred add_inst_mapping(var_rep::in, inst_rep::in, set(var_rep)::in,
+ inst_map::in, inst_map::out) is det.
+
+add_inst_mapping(VarRep, InstRep, DepVars, InstMap0, InstMap) :-
+ InstMap0 = inst_map(VarToInst0, VarToDepVars0),
+ map.det_insert(VarToInst0, VarRep, InstRep, VarToInst),
+ map.det_insert(VarToDepVars0, VarRep, DepVars, VarToDepVars),
+ InstMap = inst_map(VarToInst, VarToDepVars).
+
+merge_inst_map(InstMapA, DetismA, InstMapB, DetismB) = InstMap :-
(
- VarUseType = var_use_consumption,
- CostUntilUse = cost_since_proc_start(0.0)
+ ( DetismA = erroneous_rep
+ ; DetismA = failure_rep
+ ),
+ InstMap = InstMapB
;
- ( VarUseType = var_use_production
- ; VarUseType = var_use_other
+ ( DetismA = det_rep
+ ; DetismA = semidet_rep
+ ; DetismA = nondet_rep
+ ; DetismA = multidet_rep
+ ; DetismA = cc_nondet_rep
+ ; DetismA = cc_multidet_rep
),
- CostUntilUse = cost_before_proc_end(0.0)
+ (
+ ( DetismB = erroneous_rep
+ ; DetismB = failure_rep
+ ),
+ InstMap = InstMapA
+ ;
+ ( DetismB = det_rep
+ ; DetismB = semidet_rep
+ ; DetismB = nondet_rep
+ ; DetismB = multidet_rep
+ ; DetismB = cc_nondet_rep
+ ; DetismB = cc_multidet_rep
+ ),
+ InstMapA = inst_map(VarToInstA, VarToDepVarsA),
+ InstMapB = inst_map(VarToInstB, VarToDepVarsB),
+ map.union(inst_intersect, VarToInstA, VarToInstB, VarToInst),
+ map.union(set.union, VarToDepVarsA, VarToDepVarsB, VarToDepVars),
+ InstMap = inst_map(VarToInst, VarToDepVars)
)
+ ).
+
+:- pred inst_intersect(inst_rep::in, inst_rep::in, inst_rep::out) is det.
+
+inst_intersect(ir_free_rep, ir_free_rep, ir_free_rep).
+inst_intersect(ir_free_rep, ir_ground_rep, ir_other_rep).
+inst_intersect(ir_free_rep, ir_other_rep, ir_other_rep).
+inst_intersect(ir_ground_rep, ir_free_rep, ir_other_rep).
+inst_intersect(ir_ground_rep, ir_ground_rep, ir_ground_rep).
+inst_intersect(ir_ground_rep, ir_other_rep, ir_other_rep).
+inst_intersect(ir_other_rep, ir_free_rep, ir_other_rep).
+inst_intersect(ir_other_rep, ir_ground_rep, ir_other_rep).
+inst_intersect(ir_other_rep, ir_other_rep, ir_other_rep).
+
+inst_map_ground_vars(Vars, DepVars, !InstMap, SeenDuplicateInstantiation) :-
+ list.foldl2(inst_map_ground_var(DepVars), Vars, !InstMap,
+ have_not_seen_duplicate_instantiation, SeenDuplicateInstantiation).
+
+:- pred inst_map_ground_var(set(var_rep)::in, var_rep::in,
+ inst_map::in, inst_map::out, seen_duplicate_instantiation::in,
+ seen_duplicate_instantiation::out) is det.
+
+inst_map_ground_var(DepVars0, Var, InstMap0, InstMap, !SeenDuplicateInstantiation) :-
+ InstMap0 = inst_map(VarToInst0, VarToDepVars0),
+ ( map.search(VarToInst0, Var, InstPrime) ->
+ Inst = InstPrime
+ ;
+ Inst = ir_free_rep
),
- VarUseInfo = var_use_info(CostUntilUse, VarUseType).
+ (
+ Inst = ir_free_rep,
+ NewInst = ir_ground_rep,
+ DepVars = DepVars0
+ ;
+ ( Inst = ir_ground_rep
+ ; Inst = ir_other_rep
+ ),
+ NewInst = ir_other_rep,
+ map.lookup(VarToDepVars0, Var, DepVarsFromIM),
+ DepVars = set.union(DepVars0, DepVarsFromIM),
+ !:SeenDuplicateInstantiation = seen_duplicate_instantiation
+ ),
+ map.set(VarToInst0, Var, NewInst, VarToInst),
+ map.set(VarToDepVars0, Var, DepVars, VarToDepVars),
+ InstMap = inst_map(VarToInst, VarToDepVars).
+
+inst_map_get(inst_map(VarToInst, VarToDepVars), Var, Inst, DepVars) :-
+ ( map.search(VarToInst, Var, InstPrime) ->
+ Inst = InstPrime,
+ map.lookup(VarToDepVars, Var, DepVars)
+ ;
+ Inst = ir_free_rep,
+ DepVars = set.init
+ ).
+
+inst_map_get_var_deps(inst_map(_, VarToDepVars), VarRep, DepVars) :-
+ inst_map_get_var_deps_2(VarToDepVars, VarRep, set.init, DepVars).
+
+:- pred inst_map_get_var_deps_2(map(var_rep, set(var_rep))::in, var_rep::in,
+ set(var_rep)::in, set(var_rep)::out) is det.
+
+inst_map_get_var_deps_2(VarToDepVars, VarRep, !Set) :-
+ ( set.contains(!.Set, VarRep) ->
+ true
+ % This variable has already been visited, this prevents following any
+ % (impossible) cycles in the graph, or following the same path twice
+ % when there are diamonds in the graph.
+ ;
+ ( map.search(VarToDepVars, VarRep, DepVars) ->
+ !:Set = set.union(!.Set, DepVars),
+ set.fold(inst_map_get_var_deps_2(VarToDepVars), DepVars, !Set)
+ ;
+ true
+ )
+ ).
+
+%----------------------------------------------------------------------------%
+
+atomic_goal_get_vars(AtomicGoal, Vars) :-
+ (
+ ( AtomicGoal = unify_construct_rep(Var, _, VarsL)
+ ; AtomicGoal = unify_deconstruct_rep(Var, _, VarsL)
+ ; AtomicGoal = higher_order_call_rep(Var, VarsL)
+ ; AtomicGoal = method_call_rep(Var, _, VarsL)
+ ),
+ Vars0 = list_to_set(VarsL),
+ set.insert(Vars0, Var, Vars)
+ ;
+ ( AtomicGoal = partial_construct_rep(Var, _, MaybeVars)
+ ; AtomicGoal = partial_deconstruct_rep(Var, _, MaybeVars)
+ ),
+ list.foldl((pred(MaybeVar::in, Set0::in, Set::out) is det :-
+ (
+ MaybeVar = yes(VarI),
+ set.insert(Set0, VarI, Set)
+ ;
+ MaybeVar = no,
+ Set = Set0
+ )), MaybeVars, set.init, Vars0),
+ set.insert(Vars0, Var, Vars)
+ ;
+ ( AtomicGoal = unify_assign_rep(VarA, VarB)
+ ; AtomicGoal = cast_rep(VarA, VarB)
+ ; AtomicGoal = unify_simple_test_rep(VarA, VarB)
+ ),
+ Vars = list_to_set([ VarA, VarB ])
+ ;
+ ( AtomicGoal = pragma_foreign_code_rep(VarsL)
+ ; AtomicGoal = event_call_rep(_, VarsL)
+ ; AtomicGoal = builtin_call_rep(_, _, VarsL)
+ ; AtomicGoal = plain_call_rep(_, _, VarsL)
+ ),
+ Vars = list_to_set(VarsL)
+ ).
-:- pred maybe_x_to_x(maybe(T)::in, T::out) is semidet.
+merge_seen_duplicate_instantiation(A, B) = R :-
+ (
+ A = have_not_seen_duplicate_instantiation,
+ B = have_not_seen_duplicate_instantiation
+ ->
+ R = have_not_seen_duplicate_instantiation
+ ;
+ R = seen_duplicate_instantiation
+ ).
+
+%----------------------------------------------------------------------------%
-maybe_x_to_x(yes(X), X).
+:- type solution_count_rep
+ ---> at_most_zero_rep
+ ; at_most_one_rep % Including committed choice.
+ ; at_most_many_rep.
+
+:- func detism_get_solutions(detism_rep) = solution_count_rep.
+
+detism_get_solutions(det_rep) = at_most_one_rep.
+detism_get_solutions(semidet_rep) = at_most_one_rep.
+detism_get_solutions(multidet_rep) = at_most_many_rep.
+detism_get_solutions(nondet_rep) = at_most_many_rep.
+detism_get_solutions(cc_multidet_rep) = at_most_one_rep.
+detism_get_solutions(cc_nondet_rep) = at_most_one_rep.
+detism_get_solutions(erroneous_rep) = at_most_zero_rep.
+detism_get_solutions(failure_rep) = at_most_zero_rep.
+
+:- type can_fail_rep
+ ---> can_fail_rep
+ ; cannot_fail_rep.
+
+:- func detism_get_can_fail(detism_rep) = can_fail_rep.
+
+detism_get_can_fail(det_rep) = cannot_fail_rep.
+detism_get_can_fail(semidet_rep) = can_fail_rep.
+detism_get_can_fail(multidet_rep) = cannot_fail_rep.
+detism_get_can_fail(nondet_rep) = can_fail_rep.
+detism_get_can_fail(cc_multidet_rep) = cannot_fail_rep.
+detism_get_can_fail(cc_nondet_rep) = can_fail_rep.
+detism_get_can_fail(erroneous_rep) = cannot_fail_rep.
+detism_get_can_fail(failure_rep) = can_fail_rep.
%----------------------------------------------------------------------------%
Index: deep_profiler/report.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/report.m,v
retrieving revision 1.14
diff -u -p -b -r1.14 report.m
--- deep_profiler/report.m 16 Oct 2008 01:16:36 -0000 1.14
+++ deep_profiler/report.m 19 Oct 2008 09:59:55 -0000
@@ -332,6 +332,9 @@
---> cost_since_proc_start(float)
; cost_before_proc_end(float).
+:- func cost_until_to_cost_since_start(cost_until_var_use, float) = float.
+:- func cost_until_to_cost_before_end(cost_until_var_use, float) = float.
+
% This type represents information about the performance of the subject.
% It is intended to be displayed on a browser page or used by a tool as is.
% It is NOT intended to be subject to further processing, such as adding
@@ -472,6 +475,25 @@
cdesc_other_members :: list(proc_desc)
).
+%----------------------------------------------------------------------------%
+%----------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module float.
+
+%----------------------------------------------------------------------------%
+
+cost_until_to_cost_since_start(cost_since_proc_start(Cost), _WholeCost) =
+ Cost.
+cost_until_to_cost_since_start(cost_before_proc_end(Cost), WholeCost) =
+ WholeCost - Cost.
+
+cost_until_to_cost_before_end(cost_since_proc_start(Cost), WholeCost) =
+ WholeCost - Cost.
+cost_until_to_cost_before_end(cost_before_proc_end(Cost), _WholeCost) =
+ Cost.
+
%-----------------------------------------------------------------------------%
:- end_module report.
%-----------------------------------------------------------------------------%
Index: mdbcomp/feedback.m
===================================================================
RCS file: /home/mercury1/repository/mercury/mdbcomp/feedback.m,v
retrieving revision 1.3
diff -u -p -b -r1.3 feedback.m
--- mdbcomp/feedback.m 30 Sep 2008 02:30:51 -0000 1.3
+++ mdbcomp/feedback.m 20 Oct 2008 06:29:21 -0000
@@ -27,6 +27,8 @@
:- import_module io.
:- import_module list.
:- import_module maybe.
+:- import_module pair.
+:- import_module set.
:- import_module string.
%-----------------------------------------------------------------------------%
@@ -77,9 +79,8 @@
% The cost of maintaining a lock on a single dependant
% variable in call sequence counts.
- conjunctions :: assoc_list(string, assoc_list(
- string_proc_label,
- candidate_par_conjunction))
+ conjunctions :: assoc_list(string_proc_label,
+ candidate_par_conjunction)
% Assoclist of module name and an assoclist of procedure
% labels and candidate parallel conjunctions.
).
@@ -97,50 +98,61 @@
% by a procedure label, goal path to the conjunction and the call sites
% within the conjunction that are to be parallelised.
%
+ % TODO: In the future support more expressive candidate parallel
+ % conjunctions, so that more opportunities for parallelism can be found.
+ % Although it's probably not a good idea to parallelise three conjuncts or
+ % more against one another without first having a good system for reaching
+ % and maintaining the target amount of parallelism, this may involve
+ % distance granularity.
+ %
:- type candidate_par_conjunction
---> candidate_par_conjunction(
goal_path :: goal_path_string,
- conjuncts :: list(candidate_par_conjunct)
+ % The path within the procedure to this conjunuction.
+
+ par_conjunct_a :: candidate_par_conjunct,
+ par_conjunct_b :: candidate_par_conjunct,
+ % The conjuncts worth parallelising
+
+ dependence :: conjuncts_are_dependant,
+
+ speedup :: float
).
:- type candidate_par_conjunct
---> candidate_par_conjunct(
- callee :: call_type_and_callee,
- vars :: list(variable_in_par_conjunct),
- cost :: int
+ callee :: maybe(pair(string, string)),
+ vars :: list(maybe(string)),
+ cost :: float,
+ include_unifications :: int
+ % The number of unifications between conjuncts that should
+ % be executed in sequence with this call.
).
-:- type variable_in_par_conjunct
- ---> variable_in_par_conjunct(
- maybe_name :: maybe(string),
- cost_before_first_use :: int
- ).
+:- type conjuncts_are_dependant
+ ---> conjuncts_are_dependant(
+ dependant_vars :: set(var_rep)
+ )
+ ; conjuncts_are_independent.
%-----------------------------------------------------------------------------%
- % put_feedback_data(Type, Data, !Info)
+ % put_feedback_data(Data, !Info)
%
- % 'Put' feedback data into the feedback files. Data is stored based on
- % the type of information being stored.
+ % Put feedback data into the feedback files.
%
% Data loaded from file (not added with put) will be removed from the
- % internal state when data for the same info type is added.
- %
- % This will throw an exception if the feedback_type and feedback_data don't
- % match.
+ % internal state when data for the same type is added.
%
:- pred put_feedback_data(feedback_data::in,
feedback_info::in, feedback_info::out) is det.
%-----------------------------------------------------------------------------%
- % get_feedback_data(Info, Type, MaybeData).
- %
- % To query the feedback files 'get' will give a value for a given info type
- % if it exists.
+ % get_feedback_data(Info, Data).
%
- % This will throw an exception if the feedback_type and feedback_data
- % within the Info structure do not match.
+ % When given a partially instantiated Data term representing the query this
+ % will either fully instantiate the term or fail.
%
:- pred get_feedback_data(feedback_info::in,
feedback_data::feedback_data_query) is semidet.
@@ -528,7 +540,7 @@ feedback_first_line = "Mercury Compiler
:- func feedback_version = string.
-feedback_version = "2".
+feedback_version = "3".
%-----------------------------------------------------------------------------%
:- end_module mdbcomp.feedback.
Index: mdbcomp/program_representation.m
===================================================================
RCS file: /home/mercury1/repository/mercury/mdbcomp/program_representation.m,v
retrieving revision 1.43
diff -u -p -b -r1.43 program_representation.m
--- mdbcomp/program_representation.m 3 Oct 2008 06:57:21 -0000 1.43
+++ mdbcomp/program_representation.m 20 Oct 2008 05:36:40 -0000
@@ -93,6 +93,12 @@
% that can be used outside the compiler, e.g. in RTTI data structures
% and in data filed generated by deep profiling.
%
+ % When procedures are imported from one module to another, for example for
+ % inter-module optimisations the def_module field may be different to the
+ % decl_module feild. In this case a procedure has been imported into the
+ % def_module from the decl_module. This is also true for the type_module
+ % and def_module fields in the str_special_proc_label constructor.
+ %
:- type string_proc_label
---> str_ordinary_proc_label(
s_ord_pred_or_func :: pred_or_func,
@@ -354,6 +360,10 @@
%
:- pred lookup_var_name(var_table::in, var_rep::in, string::out) is det.
+ % Retrieve the name for this variable if it is known, otherwise fail.
+ %
+:- pred search_var_name(var_table::in, var_rep::in, string::out) is semidet.
+
% If the given atomic goal behaves like a call in the sense that it
% generates events as ordinary calls do, then return the list of variables
% that are passed as arguments.
@@ -960,13 +970,16 @@ var_num_rep_byte(short, 1).
:- type var_table == map(var_rep, string).
lookup_var_name(VarTable, VarRep, String) :-
- ( map.search(VarTable, VarRep, StringPrime) ->
+ ( search_var_name(VarTable, VarRep, StringPrime) ->
String = StringPrime
;
% Generate an automatic name for the variable.
String = string.format("V_%d", [i(VarRep)])
).
+search_var_name(VarTable, VarRep, String) :-
+ map.search(VarTable, VarRep, String).
+
%-----------------------------------------------------------------------------%
:- pred read_file_as_bytecode(string::in, io.res(bytecode)::out,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mercurylang.org/archives/reviews/attachments/20081020/344ac3d8/attachment.sig>
More information about the reviews
mailing list