[m-rev.] For post-commit review: Automatic parallelisation improvements.
Paul Bone
pbone at csse.unimelb.edu.au
Tue Jun 8 22:57:53 AEST 2010
For post-commit review. Probably by Zoltan.
Automatic parallelisation improvements.
The automatic parallelisation analysis now searches for the best way to
parallelise a conjunction when it's deciding if parallelisation is worth-while.
This mostly affects unifications and other cheap goals between two calls being
parallelised; it decides which parallel conjunct each of these goals should be
placed in. The search used is a branch and bound search.
deep_profiler/mdprof_fb.automatic_parallelism.m:
As above,
find_costly_call now returns find_costly_call_result rather than using the
maybe type. Instantiation sub-typing is used to guarantee that the goal
returned is indeed a costly call.
Fix a bug in calculate_dependant_parallel_cost.
When printing the candidate parallel conjunction report include the goals
before and after a parallel conjunction.
Document the debug_parallel_conjunction_speedup trace flag.
Create a new type parallelise_dep_conjs rather than using a boolean to
represent this option.
Add a new trace flag, debug_branch_and_bound enables trace goals that can
be used to debug the branch and bound search for the best parallelisation.
deep_profiler/mdprof_feedback.m:
When printing the candidate parallel conjunction report remove some
vertical whitespace after the header of the report.
deep_profiler/Mercury.options:
Add a new trace flag, debug_branch_and_bound enables trace goals that can
be used to debug the branch and bound search for the best parallelisation.
This is commented out.
mdbcomp/feedback.m:
Include the goals before and after a parallel conjunction in the
candidate_par_conjunction type.
Modify the parallel_exec_metrics code so that it can handle the cost of the
goals before and after a parallel conjunction.
Increment the feedback file format version.
When reading a feedback file strip the program name of leading and trailing
whitespace.
When printing a message for the incorrect_program_name error enclose the
program names in quotes.
Conform to changes in mdprof_fb.automatic_parallelism.m
Index: deep_profiler/Mercury.options
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/Mercury.options,v
retrieving revision 1.13
diff -u -p -b -r1.13 Mercury.options
--- deep_profiler/Mercury.options 30 Apr 2010 13:09:54 -0000 1.13
+++ deep_profiler/Mercury.options 8 Jun 2010 12:42:27 -0000
@@ -40,4 +40,5 @@ MCFLAGS-top_procs = --no-optimize-dupli
#MCFLAGS-mdprof_fb.automatic_parallelism = \
# --trace-flag=debug_cpc_search \
# --trace-flag=debug_recursive_costs \
-# --trace-flag=debug_parallel_conjunction_speedup
+# --trace-flag=debug_parallel_conjunction_speedup \
+# --trace-flag=debug_branch_and_bound
Index: deep_profiler/mdprof_fb.automatic_parallelism.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/mdprof_fb.automatic_parallelism.m,v
retrieving revision 1.7
diff -u -p -b -r1.7 mdprof_fb.automatic_parallelism.m
--- deep_profiler/mdprof_fb.automatic_parallelism.m 30 Apr 2010 13:09:54 -0000 1.7
+++ deep_profiler/mdprof_fb.automatic_parallelism.m 8 Jun 2010 12:50:11 -0000
@@ -24,7 +24,6 @@
:- import_module mdbcomp.feedback.
:- import_module mdbcomp.program_representation.
-:- import_module bool.
:- import_module cord.
:- import_module int.
:- import_module float.
@@ -53,9 +52,13 @@
cpc_locking_cost :: int,
cpc_clique_threshold :: int,
cpc_call_site_threshold :: int,
- cpc_parallelise_dep_conjs :: bool
+ cpc_parallelise_dep_conjs :: parallelise_dep_conjs
).
+:- type parallelise_dep_conjs
+ ---> parallelise_dep_conjs
+ ; do_not_parallelise_dep_conjs.
+
%-----------------------------------------------------------------------------%
% Perform Jerome's analysis and update the feedback info structure.
@@ -96,6 +99,7 @@
:- import_module map.
:- import_module maybe.
:- import_module multi_map.
+:- import_module mutvar.
:- import_module pqueue.
:- import_module require.
:- import_module set.
@@ -112,6 +116,12 @@
% --trace-flag=debug_recursive_costs
% Debug the calculation of the costs of recursive call sites.
%
+% --trace-flag=debug_parallel_conjunction_speedup
+% Print candidate parallel conjunctions that are pessimisations.
+%
+% --trace-flag=debug_branch_and_bound
+% Debug the branch and bound search for the best parallelisation.
+%
candidate_parallel_conjunctions(Opts, Deep, Messages, !Feedback) :-
Opts = candidate_parallel_conjunctions_opts(DesiredParallelism,
@@ -193,6 +203,9 @@ pard_goal_detail_to_pard_goal(pard_goal_
% The inst map info attached to the original goal.
).
+:- inst pard_goal_detail(T)
+ ---> pard_goal_detail(T, ground, ground, ground).
+
:- type pard_goal_type
---> pgt_call(
pgtc_callee :: callee_rep,
@@ -1384,8 +1397,6 @@ partition_pard_goals(Location, [ PG | PG
pardgoals_build_candidate_conjunction(Info, DependencyMaps, Location, GoalPath,
GoalsPartition, MaybeCandidate) :-
- ParalleliseDependantConjs = Info ^ ipi_opts ^ cpc_parallelise_dep_conjs,
-
% Setting up the first parallel conjunct is a different algorithm to the
% latter ones, at this point we have the option of moving goals from before
% the first costly call to either before or during the parallel
@@ -1395,64 +1406,21 @@ pardgoals_build_candidate_conjunction(In
% conjunction dependant when it could be independent.
pard_goals_partition(GoalsList, PartNum) = GoalsPartition,
Goals = cord.from_list(GoalsList),
- find_costly_call(Goals, cord.empty, GoalsBefore0, MaybeFirstCall,
- GoalsDuringAfter),
- (
- MaybeFirstCall = yes(FirstCall),
- FirstCallType = FirstCall ^ pgd_pg_type,
+ find_best_parallelisation(Info, Location, PartNum, DependencyMaps,
+ Goals, Parallelisation),
(
- FirstCallType = pgt_call(_, _, _, _, _, FirstCallCallSite)
- ;
- ( FirstCallType = pgt_cheap_call(_, _, _, _, _)
- ; FirstCallType = pgt_other_atomic_goal
- ; FirstCallType = pgt_non_atomic_goal
- ),
- location_to_string(1, Location, LocationString),
- Error = singleton(this_file) ++ singleton("\n") ++
- LocationString ++
- nl_indent(1) ++ singleton(format("partition %d", [i(PartNum)]))
- ++ nl_indent(1) ++
- singleton("First call in conjunction is not a call\n"),
- error(append_list(cord.list(Error)))
- )
- ;
- MaybeFirstCall = no,
- location_to_string(1, Location, LocationString),
- Error = singleton(this_file) ++ singleton("\n") ++
- LocationString ++
- nl_indent(1) ++ singleton(format("partition %d", [i(PartNum)])) ++
- nl_indent(1) ++ singleton("Couldn't find first call\n"),
- error(append_list(cord.list(Error)))
- ),
- build_first_seq_conjunction(DependencyMaps,
- FirstCall ^ pgd_conj_num, length(GoalsBefore0),
- length(GoalsDuringAfter), GoalsBefore0,
- cord.singleton(FirstCall), FirstSeqConjunction0, _GoalsBefore),
- ( get_first(FirstSeqConjunction0, FirstConj) ->
- FirstConjNumOfFirstParConjunct = FirstConj ^ pgd_conj_num
+ Parallelisation = bp_no_best_parallelisation,
+ MaybeCandidate = no
;
- error(this_file ++ "Found empty first parallel conjunct")
- ),
- pardgoals_build_par_conjs(GoalsDuringAfter, DependencyMaps,
- FirstConjNumOfFirstParConjunct, FirstSeqConjunction0, _GoalsAfter,
- cord.empty, ParConjsCord, conjuncts_are_independent, IsDependant),
-
- (
- not (
- IsDependant = conjuncts_are_dependant,
- ParalleliseDependantConjs = no
- )
- ->
- calculate_parallel_cost(Info, ParConjsCord, ParMetricsIncomp),
- NumCalls = FirstCallCallSite ^ ccsr_call_site_summary ^
- perf_row_calls,
- ParExecMetrics =
- finalise_parallel_exec_metrics(ParMetricsIncomp, NumCalls),
- Speedup = parallel_exec_metrics_get_speedup(ParExecMetrics),
- ParConjs = list(ParConjsCord),
- Candidate = candidate_par_conjunction(
- goal_path_to_string(GoalPath), PartNum, IsDependant,
- ParConjs, ParExecMetrics),
+ Parallelisation = bp_parallel_execution(GoalsBeforeCord, ParConjsCord,
+ GoalsAfterCord, IsDependent, Metrics),
+ Speedup = parallel_exec_metrics_get_speedup(Metrics),
+ GoalsBefore = list(GoalsBeforeCord),
+ GoalsAfter = list(GoalsAfterCord),
+ ParConjs = map((func(CordI) = seq_conj(list(CordI))),
+ list(ParConjsCord)),
+ Candidate = candidate_par_conjunction(goal_path_to_string(GoalPath),
+ PartNum, IsDependent, GoalsBefore, ParConjs, GoalsAfter, Metrics),
( Speedup > 1.0 ->
MaybeCandidate = yes(Candidate)
;
@@ -1475,199 +1443,291 @@ pardgoals_build_candidate_conjunction(In
create_candidate_parallel_conj_report(ProcLabel - FBCandidate,
Report),
io.write_string("Not parallelising conjunction, " ++
- "insufficient speedup:", !IO),
- io.write_string(append_list(cord.list(Report)), !IO),
- io.nl(!IO)
+ "insufficient speedup:\n", !IO),
+ io.write_string(append_list(cord.list(Report)), !IO)
)
)
- ;
- MaybeCandidate = no
).
-:- pred build_first_seq_conjunction(dependency_maps::in,
- int::in, int::in, int::in, cord(pard_goal_detail)::in,
- cord(pard_goal_detail)::in, cord(pard_goal_detail)::out,
- cord(pard_goal_detail)::out) is det.
-
-build_first_seq_conjunction(DependencyMaps, CallConjNum, NumGoalsBefore,
- NumGoalsAfter, Goals0, !FirstSeqConjunction, GoalsBefore) :-
- % Move over goals in reverse order.
- ( split_last(Goals0, Goals, Goal) ->
- ConjNum = Goal ^ pgd_conj_num,
- depends_lookup_rev(DependencyMaps, ConjNum, GoalRevDeps),
- depends_lookup_rev(DependencyMaps, CallConjNum, CallRevDeps),
- (
- % If later goals depend on this goal but not on the first call.
- member(LaterGoalNum, (CallConjNum+1)..(CallConjNum+NumGoalsAfter)),
- (
- member(LaterGoalNum, GoalRevDeps)
- =>
- not member(LaterGoalNum, CallRevDeps)
- )
- ->
- % Then this goal and all those before it must be in GoalsBefore.
- % This can be pessimistic but we're not allowed to re-order goals.
- GoalsBefore = Goals0
- ;
- % This goal may be parallelised as part of the first conjunction.
- !:FirstSeqConjunction = cons(Goal, !.FirstSeqConjunction),
- build_first_seq_conjunction(DependencyMaps, CallConjNum,
- NumGoalsBefore, NumGoalsAfter, Goals, !FirstSeqConjunction,
- GoalsBefore)
+:- type find_costly_call_result
+ ---> costly_call_found(
+ fccrfc_goals_before :: cord(pard_goal_detail),
+ fccrfc_call :: pard_goal_detail,
+ fccrfc_gaols_after :: cord(pard_goal_detail)
)
- ;
- GoalsBefore = cord.empty
- ).
+ ; costly_call_not_found.
-:- pred pardgoals_build_par_conjs(cord(pard_goal_detail)::in,
- dependency_maps::in, int::in,
- cord(pard_goal_detail)::in, cord(pard_goal_detail)::out,
- cord(seq_conj(pard_goal_detail))::in,
- cord(seq_conj(pard_goal_detail))::out,
- conjuncts_are_dependant::in, conjuncts_are_dependant::out) is det.
-
-pardgoals_build_par_conjs(Goals0, DependencyMaps,
- FirstConjNumOfFirstParConjunct, CurSeqConjunction0, GoalsAfter,
- !ParConjs, !ConjsAreDependant) :-
- % Find the next costly call.
- find_costly_call(Goals0, cord.empty, GoalsBefore, MaybeNextCall,
- Goals),
- (
- MaybeNextCall = yes(NextCall),
-
- % XXX: We put all the goals between two paralleliseable goals in the
- % parallel conjunct with the second goal. This seems to work as often
- % small unifications occurring before the next goal are used to prepare
- % some arguments for it.
- CurSeqConjunction = CurSeqConjunction0,
- NewSeqConjunction = snoc(GoalsBefore, NextCall),
-
- % Has the conjunction become dependant.
- map_pred(pg_get_conj_num, CurSeqConjunction, CurSeqConjNumsCord),
- CurSeqConjNumsSet = set(list(CurSeqConjNumsCord)),
- map_pred(pg_get_conj_num, NewSeqConjunction, NewSeqConjNumsCord),
- (
- !.ConjsAreDependant = conjuncts_are_independent,
- member(NewSeqConjNum, NewSeqConjNumsCord),
- depends_lookup(DependencyMaps, NewSeqConjNum, Dependencies),
- intersect(Dependencies, CurSeqConjNumsSet, Intersection),
- not set.empty(Intersection)
- ->
- !:ConjsAreDependant = conjuncts_are_dependant
- ;
- true
- ),
+:- inst find_costly_call_result
+ ---> costly_call_found(ground, pard_goal_detail(pgt_call), ground)
+ ; costly_call_not_found.
- SeqConjunction = seq_conj(list(CurSeqConjunction)),
- !:ParConjs = snoc(!.ParConjs, SeqConjunction),
- pardgoals_build_par_conjs(Goals, DependencyMaps,
- FirstConjNumOfFirstParConjunct, NewSeqConjunction, GoalsAfter,
- !ParConjs, !ConjsAreDependant)
- ;
- MaybeNextCall = no,
- ( cord.get_first(CurSeqConjunction0, FirstGoalInLastConjunct) ->
- FirstConjNumOfLastParConjunct =
- FirstGoalInLastConjunct ^ pgd_conj_num
- ;
- error(this_file ++ " empty parallel conjunct")
- ),
- % Because this is the last parallel conjunction we have the
- % option of putting some remaining goals into the parallel conjunction
- % as part of the last parallel conjunct.
- sorted_list_to_set(
- FirstConjNumOfFirstParConjunct..(FirstConjNumOfLastParConjunct-1),
- GoalsInOtherParConjuncts),
- build_last_seq_conjunction(Goals0, DependencyMaps,
- GoalsInOtherParConjuncts, GoalsAfter,
- cord.empty, GoalsInLastConjunct),
- SeqConjunction = seq_conj(list(
- CurSeqConjunction0 ++ GoalsInLastConjunct)),
- !:ParConjs = snoc(!.ParConjs, SeqConjunction)
- ).
-
-:- pred find_costly_call(cord(pard_goal_detail)::in,
- cord(pard_goal_detail)::in, cord(pard_goal_detail)::out,
- maybe(pard_goal_detail)::out,
- cord(pard_goal_detail)::out) is det.
+:- pred find_costly_call(cord(pard_goal_detail)::in, cord(pard_goal_detail)::in,
+ find_costly_call_result::out(find_costly_call_result)) is det.
-find_costly_call(Goals, !GoalsBefore, MaybeCall, GoalsAfter) :-
+find_costly_call(Goals, GoalsBefore0, Result) :-
( head_tail(Goals, Goal, GoalsTail) ->
GoalType = Goal ^ pgd_pg_type,
(
GoalType = pgt_call(_, _, _, _, _, _),
- MaybeCall = yes(Goal),
- GoalsAfter = GoalsTail
+ Result = costly_call_found(GoalsBefore0, Goal, GoalsTail)
;
( GoalType = pgt_cheap_call(_, _, _, _, _)
; GoalType = pgt_other_atomic_goal
),
- !:GoalsBefore = snoc(!.GoalsBefore, Goal),
- find_costly_call(GoalsTail, !GoalsBefore, MaybeCall, GoalsAfter)
+ GoalsBefore = snoc(GoalsBefore0, Goal),
+ find_costly_call(GoalsTail, GoalsBefore, Result)
;
GoalType = pgt_non_atomic_goal,
error(this_file ++ "Found non-atomic goal")
)
;
- MaybeCall = no,
- GoalsAfter = cord.empty
+ Result = costly_call_not_found
).
-:- pred build_last_seq_conjunction(cord(pard_goal_detail)::in,
- dependency_maps::in, set(int)::in, cord(pard_goal_detail)::out,
- cord(pard_goal_detail)::in, cord(pard_goal_detail)::out) is det.
-
-build_last_seq_conjunction(Goals0, DependencyMaps, GoalsInOtherParConjuncts,
- GoalsAfter, !GoalsInLastConjunct) :-
- ( head_tail(Goals0, Goal, Goals) ->
- ConjNum = Goal ^ pgd_conj_num,
- depends_lookup(DependencyMaps, ConjNum, Dependencies),
- intersect(Dependencies, GoalsInOtherParConjuncts, Intersection),
- (
- % The goal is dependant apon a goal in a previous parallel conjunct.
- not set.empty(Intersection) =>
- % But not dependant upon a goal in the current parallel conjunct.
- (
- map_pred(pg_get_conj_num, !.GoalsInLastConjunct,
- ConjNumsInLastConjunct),
- intersect(Dependencies, set(list(ConjNumsInLastConjunct)),
- Intersection2),
- set.empty(Intersection2)
+:- type best_parallelisation
+ ---> bp_parallel_execution(
+ bp_goals_before :: cord(pard_goal_detail),
+ bp_par_conjs :: cord(cord(pard_goal_detail)),
+ bp_goals_after :: cord(pard_goal_detail),
+ bp_is_dependent :: conjuncts_are_dependant,
+ bp_par_exec_metrics :: parallel_exec_metrics
+ )
+ % Rather than using the sequential execution as an initial
+ % value for the branch and bound search we use this value.
+ % This allows us to report the best parallelisation even when
+ % sequential execution is optimal.
+ ; bp_no_best_parallelisation.
+
+:- type incomplete_parallelisation
+ ---> incomplete_parallelisation(
+ ip_goals_before :: cord(pard_goal_detail),
+ ip_par_conjs :: cord(cord(pard_goal_detail)),
+ ip_par_exec_overlap :: parallel_execution_overlap,
+ ip_par_exec_metrics :: parallel_exec_metrics_incomplete,
+ ip_is_outermost_conj :: is_outermost_conjunct,
+ ip_can_make_cheap_conj :: can_make_cheap_conj
+ ).
+
+:- type can_make_cheap_conj
+ ---> can_make_cheap_conj
+ ; cannot_make_cheap_conj.
+
+:- pred find_best_parallelisation(implicit_parallelism_info::in,
+ program_location::in, int::in, dependency_maps::in,
+ cord(pard_goal_detail)::in, best_parallelisation::out) is det.
+
+find_best_parallelisation(Info, Location, PartNum, DependencyMaps, Goals,
+ FinalBestParallelisation) :-
+ % Use a failure driven loop to implement a branch and bound solver.
+ promise_pure (
+ trace [compile_time(flag("debug_branch_and_bound")), io(!IO)] (
+ io.format("D: Branch and bound loop starting:\n%s\n",
+ [s(LocationString)], !IO),
+ location_to_string(1, Location, LocationCord),
+ string.append_list(list(LocationCord), LocationString)
+ ),
+ impure new_mutvar(bp_no_best_parallelisation,
+ BestParallelisationMutvar),
+ (
+ impure find_best_parallelisation_nondet(Info, Location, PartNum,
+ DependencyMaps, Goals, BestParallelisationMutvar,
+ BestParallelisation),
+ % this is the best parallelisation so far.
+ impure set_mutvar(BestParallelisationMutvar, BestParallelisation),
+ trace [compile_time(flag("debug_branch_and_bound")), io(!IO)] (
+ (
+ BestParallelisation = bp_no_best_parallelisation
+ ;
+ BestParallelisation = bp_parallel_execution(_, _, _, _,
+ Metrics),
+ io.format("D: Branch and bound: Solution par time: %f\n",
+ [f(ParTime)], !IO),
+ ParTime = parallel_exec_metrics_get_par_time(Metrics)
)
+ ),
+ semidet_fail
->
- % This goal and all those after it must be placed after the
- % conjunction.
- GoalsAfter = Goals0
+ error("Failure driven loop must fail")
;
- % This goal does not depend on goals in other parallel conjuncts,
- % we can parallelise it here without introducing an extra future.
- !:GoalsInLastConjunct = snoc(!.GoalsInLastConjunct, Goal),
- build_last_seq_conjunction(Goals, DependencyMaps,
- GoalsInOtherParConjuncts, GoalsAfter, !GoalsInLastConjunct)
+ impure get_mutvar(BestParallelisationMutvar,
+ FinalBestParallelisation),
+ trace [compile_time(flag("debug_branch_and_bound")), io(!IO)] (
+ io.write_string("D: Branch and bound loop finished\n", !IO)
+ )
)
- ;
- GoalsAfter = cord.empty
).
- % calculate_dependant_parallel_cost(Conjs, ParExecMetrics).
- %
- % Analyse the parallel conjuncts and determine their likely performance.
- %
- % This is the new parallel execution overlap algorithm, it is general and
- % therefore we also use is for independent conjunctions.
- %
-:- pred calculate_parallel_cost(implicit_parallelism_info::in,
- cord(seq_conj(pard_goal_detail))::in,
- parallel_exec_metrics_incomplete::out) is det.
-
-calculate_parallel_cost(Info, Conjs, ParallelExecMetrics) :-
- % We parepare the conjunctions before calculating the parallelisation cost.
- % In doing this we create a left-associative data structure that makes the
- % left-associative walk in the next step easier. The data structure also
- % contains extra information preventing quadratic code in the actual
- % calculation.
- calculate_parallel_cost_2(Info, Conjs, is_outermost_conjunct,
- ParallelExecMetrics, _ParallelExecOverlap, _ParallelExec,
- _NumConjuncts).
+:- impure pred find_best_parallelisation_nondet(implicit_parallelism_info::in,
+ program_location::in, int::in, dependency_maps::in,
+ cord(pard_goal_detail)::in,
+ mutvar(best_parallelisation)::in, best_parallelisation::out) is nondet.
+
+find_best_parallelisation_nondet(Info, Location, PartNum, DependencyMaps, Goals0,
+ BestParallelisationMutvar, BestParallelisation) :-
+ find_costly_call(Goals0, cord.empty, FindCostlyCallResult),
+ (
+ FindCostlyCallResult = costly_call_found(GoalsBeforeFirstCall,
+ FirstCall, GoalsAfterFirstCall)
+ ;
+ FindCostlyCallResult = costly_call_not_found,
+ location_to_string(1, Location, LocationString),
+ Error = singleton(this_file) ++ singleton("\n") ++
+ LocationString ++
+ nl_indent(1) ++ singleton(format("partition %d", [i(PartNum)])) ++
+ nl_indent(1) ++ singleton("Couldn't find first call\n"),
+ error(append_list(cord.list(Error)))
+ ),
+ FirstCallCallSite = FirstCall ^ pgd_pg_type ^ pgtc_call_site,
+ NumCalls = FirstCallCallSite ^ ccsr_call_site_summary ^
+ perf_row_calls,
+
+ % Generate goals before conjunction.
+ cord_split(GoalsBeforeFirstCall, GoalsBeforeConj,
+ GoalsBeforeCallInFirstConj),
+ Goals1 = GoalsBeforeCallInFirstConj ++ singleton(FirstCall) ++
+ GoalsAfterFirstCall,
+
+ foldl_pred(pardgoal_calc_cost, GoalsBeforeConj, 0.0, CostBeforeConj),
+ Metrics0 = init_empty_parallel_exec_metrics(CostBeforeConj),
+ Parallelisation0 = incomplete_parallelisation(GoalsBeforeConj,
+ empty, peo_empty_conjunct, Metrics0, is_outermost_conjunct,
+ cannot_make_cheap_conj),
+ impure find_best_parallelisation_2(Info, DependencyMaps,
+ BestParallelisationMutvar, 0, map.init, Goals1,
+ GoalsAfterConj, Parallelisation0, Parallelisation1),
+
+ % Finalise the metrics and determine if this is the best solution so far.
+ foldl_pred(pardgoal_calc_cost, GoalsAfterConj, 0.0, CostAfterConj),
+ Metrics1 = Parallelisation1 ^ ip_par_exec_metrics,
+ SparkDelay = Info ^ ipi_opts ^ cpc_sparking_delay,
+ SparkCost = Info ^ ipi_opts ^ cpc_sparking_cost,
+ Metrics = finalise_parallel_exec_metrics(Metrics1, NumCalls, CostAfterConj,
+ float(SparkDelay + SparkCost)),
+
+ impure get_mutvar(BestParallelisationMutvar, CurrentBestParallelisation),
+ cbmr_metrics_is_better = compare_best_parallelisation_and_metrics(
+ CurrentBestParallelisation, Metrics),
+
+ Conjuncts = Parallelisation1 ^ ip_par_conjs,
+ Overlap = Parallelisation1 ^ ip_par_exec_overlap,
+ par_conj_overlap_is_dependant(Overlap, IsDependent),
+ BestParallelisation = bp_parallel_execution(GoalsBeforeConj, Conjuncts,
+ GoalsAfterConj, IsDependent, Metrics).
+
+:- impure pred find_best_parallelisation_2(implicit_parallelism_info::in,
+ dependency_maps::in, mutvar(best_parallelisation)::in,
+ int::in, map(var_rep, float)::in,
+ cord(pard_goal_detail)::in, cord(pard_goal_detail)::out,
+ incomplete_parallelisation::in, incomplete_parallelisation::out) is nondet.
+
+find_best_parallelisation_2(Info, DependencyMaps, BestParallelisationMutvar,
+ NumConjuncts0, ProductionsMap0, !Goals, !Parallelisation) :-
+ IsOutermostConjunct = !.Parallelisation ^ ip_is_outermost_conj,
+ find_costly_call(!.Goals, cord.empty, FindCostlyCallResult),
+ (
+ FindCostlyCallResult =
+ costly_call_found(GoalsBefore0, Call, GoalsAfter0),
+
+ (
+ can_make_cheap_conj = !.Parallelisation ^ ip_can_make_cheap_conj,
+ find_costly_call(GoalsAfter0, cord.empty,
+ costly_call_found(_, _, _)),
+ cord_split(GoalsBefore0, Conj, GoalsBefore),
+ !:Goals = snoc(GoalsBefore, Call) ++ GoalsAfter0,
+ !Parallelisation ^ ip_can_make_cheap_conj := cannot_make_cheap_conj
+ ;
+ % Determine how many goals to include after the call in this
+ % parallel conjunct.
+ cord_split(GoalsAfter0, GoalsAfter, !:Goals),
+ % Don't include two costly calls in the same parallel conjunct.
+ find_costly_call(GoalsAfter, cord.empty, costly_call_not_found),
+
+ % Build the new conjunct and test to see if this is no worse than
+ % our best parallelisation.
+ Conj = snoc(GoalsBefore0, Call) ++ GoalsAfter,
+
+ % If we've found the last costly call then there are no more
+ % conjuncts to make and therefore we cannot make a cheap conjunct,
+ % otherwise we can make a cheap conjunct.
+ find_costly_call(!.Goals, cord.empty, Result2),
+ (
+ Result2 = costly_call_not_found,
+ !Parallelisation ^ ip_can_make_cheap_conj :=
+ cannot_make_cheap_conj
+ ;
+ Result2 = costly_call_found(_, _, _),
+ !Parallelisation ^ ip_can_make_cheap_conj := can_make_cheap_conj
+ )
+ ),
+ not is_empty(Conj),
+ Conjs0 = !.Parallelisation ^ ip_par_conjs,
+ Conjs = snoc(Conjs0, Conj),
+
+ Metrics0 = !.Parallelisation ^ ip_par_exec_metrics,
+ Overlap0 = !.Parallelisation ^ ip_par_exec_overlap,
+ calculate_parallel_cost_step(Info, IsOutermostConjunct, NumConjuncts0,
+ Conj, ProductionsMap0, ProductionsMap, Metrics0, Metrics,
+ Overlap0, Overlap),
+ (
+ Overlap = peo_empty_conjunct,
+ DepVars = set.init
+ ;
+ Overlap = peo_conjunction(_, _, DepVars)
+ ),
+ ParalleliseDepConjs = Info ^ ipi_opts ^ cpc_parallelise_dep_conjs,
+ (
+ ParalleliseDepConjs = do_not_parallelise_dep_conjs
+ =>
+ set.empty(DepVars)
+ ),
+ SparkCost = Info ^ ipi_opts ^ cpc_sparking_cost,
+ SparkDelay = Info ^ ipi_opts ^ cpc_sparking_delay,
+ MetricsComplete = finalise_parallel_exec_metrics(Metrics, 1, 0.0,
+ float(SparkDelay + SparkCost)),
+ impure get_mutvar(BestParallelisationMutvar,
+ CurrentBestParallelisation),
+ cbmr_metrics_is_better = compare_best_parallelisation_and_metrics(
+ CurrentBestParallelisation, MetricsComplete),
+
+ NumConjuncts = NumConjuncts0 + 1,
+ !Parallelisation ^ ip_par_exec_overlap := Overlap,
+ !Parallelisation ^ ip_par_exec_metrics := Metrics,
+ !Parallelisation ^ ip_par_conjs := Conjs,
+ !Parallelisation ^ ip_is_outermost_conj := is_not_outermost_conjunct,
+
+ impure find_best_parallelisation_2(Info, DependencyMaps,
+ BestParallelisationMutvar,
+ NumConjuncts, ProductionsMap, !Goals, !Parallelisation)
+ ;
+ FindCostlyCallResult = costly_call_not_found
+ ).
+
+:- type compare_best_and_metrics_result
+ ---> cbmr_metrics_is_better
+ ; cbmr_metrics_is_not_better.
+
+ % Compare the best parallelisation with the current parallelisation.
+ %
+:- func compare_best_parallelisation_and_metrics(best_parallelisation,
+ parallel_exec_metrics) = compare_best_and_metrics_result.
+
+compare_best_parallelisation_and_metrics(BestParallelisation, Metrics) =
+ Result :-
+ (
+ BestParallelisation = bp_no_best_parallelisation,
+ Result = cbmr_metrics_is_better
+ ;
+ BestParallelisation = bp_parallel_execution(_, _, _, _, BestMetrics),
+ BestParTime = parallel_exec_metrics_get_par_time(BestMetrics),
+ ParTime = parallel_exec_metrics_get_par_time(Metrics),
+ % TODO: Add other comparisons for tie breaking or a better optimisation
+ % formula.
+ ( BestParTime > ParTime ->
+ Result = cbmr_metrics_is_better
+ ;
+ Result = cbmr_metrics_is_not_better
+ )
+ ).
% This datastructure represents the execution of dependant conjuncts, it
% tracks which variabes are produced and consumed.
@@ -1706,6 +1766,13 @@ calculate_parallel_cost(Info, Conjs, Par
% calculate_parallel_cost(Info, Conjunctions, IsOutermostConjunct,
% ParallelExecMetrics, ParallelExecOverlap, ProductionsMap, NumConjuncts).
%
+ % Analyse the parallel conjuncts and determine their likely performance.
+ % This code performs one step of the computation the steps are linked
+ % together by find_best_parallelisation.
+ %
+ % This is the new parallel execution overlap algorithm, it is general and
+ % therefore we also use is for independent conjunctions.
+ %
% + CallerVars is the set of vars for which our caller wants us to build a
% productions map for.
%
@@ -1715,21 +1782,15 @@ calculate_parallel_cost(Info, Conjs, Par
% + ProductionsMap: A productions map that covers the production of
% CallerVars.
%
-:- pred calculate_parallel_cost_2(implicit_parallelism_info::in,
- cord(seq_conj(pard_goal_detail))::in, is_outermost_conjunct::in,
- parallel_exec_metrics_incomplete::out, parallel_execution_overlap::out,
- map(var_rep, float)::out, int::out) is det.
-
-calculate_parallel_cost_2(Info, Conjs0, IsOutermostConjunct,
- ParallelExecMetrics, ParallelExecOverlap, ProductionsMap,
- NumConjuncts) :-
- ( cord.split_last(Conjs0, Conjs, Conj) ->
- some [!ProductionsMap] (
- calculate_parallel_cost_2(Info, Conjs, is_not_outermost_conjunct,
- ParallelExecMetrics0, ParallelExecOverlap0, !:ProductionsMap,
- NumConjuncts0),
-
- seq_conj(Goals) = Conj,
+:- pred calculate_parallel_cost_step(implicit_parallelism_info::in,
+ is_outermost_conjunct::in, int::in, cord(pard_goal_detail)::in,
+ map(var_rep, float)::in, map(var_rep, float)::out,
+ parallel_exec_metrics_incomplete::in, parallel_exec_metrics_incomplete::out,
+ parallel_execution_overlap::in, parallel_execution_overlap::out) is det.
+
+calculate_parallel_cost_step(Info, IsOutermostConjunct, NumConjuncts, Conj,
+ !ProductionsMap, !Metrics, !Overlap) :-
+ Goals = list(Conj),
foldl(pardgoal_calc_cost, Goals, 0.0, CostB),
foldl(pardgoal_consumed_vars_accum, Goals, set.init,
RightConsumedVars),
@@ -1741,7 +1802,7 @@ calculate_parallel_cost_2(Info, Conjs0,
% the prevous conjunct. Which in turn may have been sparked by an
% earlier conjunct.
SparkDelay = Info ^ ipi_opts ^ cpc_sparking_delay,
- StartTime0 = (NumConjuncts0 * SparkDelay),
+ StartTime0 = (NumConjuncts * SparkDelay),
% If there are conjuncts after this conjunct we will have the
% additional cost of sparking them.
@@ -1761,16 +1822,16 @@ calculate_parallel_cost_2(Info, Conjs0,
reverse(ConsumptionsList0, ConsumptionsList),
% Determine how the parallel conjuncts overlap.
- foldl4(calculate_dependant_parallel_cost_2(Info, !.ProductionsMap),
+ foldl5(calculate_dependant_parallel_cost_2(Info, !.ProductionsMap),
ConsumptionsList, 0.0, LastSeqConsumeTime,
- StartTime, LastParConsumeTime, [], RevExecution0,
- map.init, ConsumptionsMap),
- RevExecution = [ (LastParConsumeTime - CostBPar) | RevExecution0 ],
+ StartTime, LastParConsumeTime, StartTime, LastResumeTime,
+ [], RevExecution0, map.init, ConsumptionsMap),
+ RevExecution = [ (LastResumeTime - CostBPar) | RevExecution0 ],
reverse(RevExecution, Execution),
CostBPar = LastParConsumeTime + (CostB - LastSeqConsumeTime),
- ParallelExecMetrics = init_parallel_exec_metrics_incomplete(
- ParallelExecMetrics0, CostB, CostBPar),
+ !:Metrics =
+ init_parallel_exec_metrics_incomplete(!.Metrics, CostB, CostBPar),
% Build the productions map for our parent. This map contains all
% the variables produced by this code, not just that are used for
@@ -1780,31 +1841,19 @@ calculate_parallel_cost_2(Info, Conjs0,
DepConjExec = dependant_conjunct_execution(Execution,
!.ProductionsMap, ConsumptionsMap),
- ParallelExecOverlap = peo_conjunction(ParallelExecOverlap0,
- DepConjExec, Vars),
-
- ProductionsMap = !.ProductionsMap,
- NumConjuncts = NumConjuncts0 + 1
- )
- ;
- ParallelExecMetrics = init_empty_parallel_exec_metrics,
- ParallelExecOverlap = peo_empty_conjunct,
- ProductionsMap = map.init,
- NumConjuncts = 0
- ).
-
+ !:Overlap = peo_conjunction(!.Overlap, DepConjExec, Vars).
% The main loop of the parallel overlap analysis.
%
:- pred calculate_dependant_parallel_cost_2(implicit_parallelism_info::in,
map(var_rep, float)::in, pair(var_rep, float)::in, float::in, float::out,
- float::in, float::out,
+ float::in, float::out, float::in, float::out,
assoc_list(float, float)::in, assoc_list(float, float)::out,
map(var_rep, float)::in, map(var_rep, float)::out) is det.
calculate_dependant_parallel_cost_2(Info, ProductionsMap, Var - SeqConsTime,
- !PrevSeqConsumeTime, !PrevParConsumeTime, !RevExecution,
- !ConsumptionsMap) :-
+ !PrevSeqConsumeTime, !PrevParConsumeTime, !ResumeTime,
+ !RevExecution, !ConsumptionsMap) :-
FutureSyncTime = float(Info ^ ipi_opts ^ cpc_locking_cost),
map.lookup(ProductionsMap, Var, ProdTime),
@@ -1831,7 +1880,8 @@ calculate_dependant_parallel_cost_2(Info
ProdTime > ParConsTimeNotBlocked
->
!:RevExecution =
- [ (!.PrevParConsumeTime - ParConsTimeNotBlocked) | !.RevExecution ]
+ [ (!.ResumeTime - ParConsTimeNotBlocked) | !.RevExecution ],
+ !:ResumeTime = ParConsTime
;
true
),
@@ -1841,6 +1891,17 @@ calculate_dependant_parallel_cost_2(Info
svmap.det_insert(Var, ParConsTime, !ConsumptionsMap).
+:- pred par_conj_overlap_is_dependant(parallel_execution_overlap::in,
+ conjuncts_are_dependant::out) is det.
+
+par_conj_overlap_is_dependant(peo_empty_conjunct, conjuncts_are_independent).
+par_conj_overlap_is_dependant(peo_conjunction(Left, _, VarSet), IsDependent) :-
+ ( set.empty(VarSet) ->
+ par_conj_overlap_is_dependant(Left, IsDependent)
+ ;
+ IsDependent = conjuncts_are_dependant
+ ).
+
:- pred pg_get_conj_num(pard_goal_detail::in, int::out) is det.
pg_get_conj_num(PG, PG ^ pgd_conj_num).
@@ -2778,7 +2839,7 @@ transitive_map_insert(K, V, !Map) :-
create_candidate_parallel_conj_report(Proc - CandidateParConjunction, Report) :-
print_proc_label_to_string(Proc, ProcString),
CandidateParConjunction = candidate_par_conjunction(GoalPath,
- PartNum, IsDependant, Conjs, ParExecMetrics),
+ PartNum, IsDependant, GoalsBefore, Conjs, GoalsAfter, ParExecMetrics),
NumCalls = parallel_exec_metrics_get_num_calls(ParExecMetrics),
(
IsDependant = conjuncts_are_independent,
@@ -2796,7 +2857,7 @@ create_candidate_parallel_conj_report(Pr
FutureDeadTime =
parallel_exec_metrics_get_future_dead_time(ParExecMetrics),
TotalDeadTime = parallel_exec_metrics_get_total_dead_time(ParExecMetrics),
- format("\n %s\n" ++
+ format(" %s\n" ++
" Path and Partition Num: %s, %d\n" ++
" Dependant: %s\n" ++
" NumCalls: %d\n" ++
@@ -2806,22 +2867,25 @@ create_candidate_parallel_conj_report(Pr
" Time saving: %f\n" ++
" First conj dead time: %f\n" ++
" Future dead time: %f\n" ++
- " Total dead time: %f",
+ " Total dead time: %f\n\n",
[s(ProcString), s(GoalPath), i(PartNum), s(DependanceString),
i(NumCalls), f(SeqTime), f(ParTime), f(Speedup), f(TimeSaving),
f(FirstConjDeadTime), f(FutureDeadTime), f(TotalDeadTime)],
ReportHeaderStr),
ReportHeader = singleton(ReportHeaderStr),
+ format_sequential_conjuncts(3, GoalsBefore, empty, ReportGoalsBefore),
format_parallel_conjunction(3, Conjs, ReportParConj),
+ format_sequential_conjuncts(3, GoalsAfter, empty, ReportGoalsAfter),
- Report = snoc(ReportHeader ++ ReportParConj, "\n").
+ Report = snoc(ReportHeader ++ ReportGoalsBefore ++ ReportParConj ++
+ ReportGoalsAfter, "\n").
:- pred format_parallel_conjunction(int::in,
list(seq_conj(pard_goal))::in, cord(string)::out) is det.
format_parallel_conjunction(Indent, Conjs, Report) :-
- IndentStr = nl_indent(Indent),
+ IndentStr = indent(Indent),
Report0 = IndentStr ++ singleton("(\n"),
format_parallel_conjuncts(Indent, Conjs, Report0, Report).
@@ -2830,7 +2894,7 @@ format_parallel_conjunction(Indent, Conj
format_parallel_conjuncts(Indent, [], !Report) :-
IndentStr = indent(Indent),
- !:Report = snoc(!.Report ++ IndentStr, ")").
+ !:Report = snoc(!.Report ++ IndentStr, ")\n").
format_parallel_conjuncts(Indent, [ Conj | Conjs ], !Report) :-
format_sequential_conjunction(Indent + 1, Conj, ConjReport),
!:Report = !.Report ++ ConjReport,
@@ -2922,5 +2986,27 @@ format_callee(named_callee(Module, Proc)
format("%s.%s", [s(Module), s(Proc)])).
%-----------------------------------------------------------------------------%
+
+:- pred cord_split(cord(T)::in, cord(T)::out, cord(T)::out) is multi.
+
+cord_split(Cord, A, B) :-
+ ( cord.head_tail(Cord, Head, Tail) ->
+ (
+ % Put the split before Cord,
+ A = cord.empty,
+ B = Cord
+ ;
+ % Put the split within or after Cord.
+ cord_split(Tail, TailA, B),
+ A = cons(Head, TailA)
+ )
+ ;
+ % An empty cord can only be split one way (since we say that the split
+ % may happen before the cord).
+ A = cord.empty,
+ B = cord.empty
+ ).
+
+%-----------------------------------------------------------------------------%
:- end_module mdprof_fb.automatic_parallelism.
%-----------------------------------------------------------------------------%
Index: deep_profiler/mdprof_feedback.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/mdprof_feedback.m,v
retrieving revision 1.22
diff -u -p -b -r1.22 mdprof_feedback.m
--- deep_profiler/mdprof_feedback.m 13 May 2010 02:27:38 -0000 1.22
+++ deep_profiler/mdprof_feedback.m 8 Jun 2010 07:33:25 -0000
@@ -191,7 +191,7 @@ create_feedback_report(feedback_data_can
" Sparking delay: %d\n" ++
" Locking cost: %d\n" ++
" Number of Parallel Conjunctions: %d\n" ++
- " Parallel Conjunctions:\n",
+ " Parallel Conjunctions:\n\n",
[f(DesiredParallelism), i(SparkingCost), i(SparkingDelay),
i(LockingCost), i(NumConjs)])),
map(create_candidate_parallel_conj_report, Conjs, ReportConjs),
@@ -513,7 +513,14 @@ check_options(Options0, RequestedFeedbac
CPCCallSiteThreshold),
lookup_bool_option(Options,
implicit_parallelism_dependant_conjunctions,
- ParalleliseDepConjs),
+ ParalleliseDepConjsBool),
+ (
+ ParalleliseDepConjsBool = yes,
+ ParalleliseDepConjs = parallelise_dep_conjs
+ ;
+ ParalleliseDepConjsBool = no,
+ ParalleliseDepConjs = do_not_parallelise_dep_conjs
+ ),
CandidateParallelConjunctionsOpts =
candidate_parallel_conjunctions_opts(DesiredParallelism,
SparkingCost,
Index: mdbcomp/feedback.m
===================================================================
RCS file: /home/mercury1/repository/mercury/mdbcomp/feedback.m,v
retrieving revision 1.10
diff -u -p -b -r1.10 feedback.m
--- mdbcomp/feedback.m 13 May 2010 02:27:38 -0000 1.10
+++ mdbcomp/feedback.m 8 Jun 2010 12:53:16 -0000
@@ -119,6 +119,8 @@
cpc_is_dependant :: conjuncts_are_dependant,
+ cpc_goals_before :: list(GoalType),
+
cpc_conjs :: list(seq_conj(GoalType)),
% A list of parallel conjuncts, each is a sequential
% conjunction of inner goals. All inner goals that are
@@ -131,6 +133,8 @@
% algorithms to construct the same parallel conjunction
% from the same program representation/HLDS structure.
+ cpc_goals_after :: list(GoalType),
+
cpc_par_exec_metrics :: parallel_exec_metrics
).
@@ -202,20 +206,6 @@
%
:- type parallel_exec_metrics_incomplete.
- % ParExecMetrics = init_parallel_execution_metrics(NumCalls,
- % CostA, CostBSeq, CostBPar).
- %
- % CostA is the cost of the first conjunct in a conjunction. CostBSeq is
- % the cost of the second conjunct if this is a normal conjunction.
- % CostBPar is the cost of the second conjunct if this is a parallel
- % conjunction.
- %
- % This function should be used when building metrics for parallel
- % conjunctions of exactly two conjuncts.
- %
-:- func init_parallel_exec_metrics(int, float, float, float) =
- parallel_exec_metrics.
-
% ParMetrics = init_parallel_exec_metrics_incomplete(PartMetricsA,
% CostBSeq, CostBPar)
%
@@ -233,20 +223,26 @@
:- func init_parallel_exec_metrics_incomplete(parallel_exec_metrics_incomplete,
float, float) = parallel_exec_metrics_incomplete.
- % StartMetrics = init_empty_parallel_exec_metrics.
+ % StartMetrics = init_empty_parallel_exec_metrics(CostBefore).
%
% Use this function to start with an empty set of metrics for an empty
% conjunction. Then use init_parallel_exec_metrics_incomplete to continue
% adding conjuncts on the right.
%
-:- func init_empty_parallel_exec_metrics = parallel_exec_metrics_incomplete.
+:- func init_empty_parallel_exec_metrics(float) =
+ parallel_exec_metrics_incomplete.
- % Metrics = finalise_parallel_exec_metrics(IncompleteMetrics, NumCalls).
+ % Metrics = finalise_parallel_exec_metrics(IncompleteMetrics, NumCalls,
+ % CostAfterConj, RightConjDelay).:w
%
% Make the metrics structure complete.
%
-:- func finalise_parallel_exec_metrics(parallel_exec_metrics_incomplete, int) =
- parallel_exec_metrics.
+ % RightConjDelay is the delay before the conjunct to the right of & will
+ % begin executing. & is considered to be right-associative since that's
+ % how sparks are sparked.
+ %
+:- func finalise_parallel_exec_metrics(parallel_exec_metrics_incomplete,
+ int, float, float) = parallel_exec_metrics.
:- func parallel_exec_metrics_get_num_calls(parallel_exec_metrics) = int.
@@ -573,7 +569,7 @@ read_program_name(Stream, _, Result, !IO
io.read_line_as_string(Stream, IOResultLine, !IO),
(
IOResultLine = ok(String),
- Result = ok(String)
+ Result = ok(strip(String))
;
IOResultLine = eof,
Result = error(unexpected_eof)
@@ -665,7 +661,7 @@ read_error_message_string(File, Error, M
Error = incorrect_program_name(Expected, Got),
MessagePart =
"Program name didn't match, is this the right feedback file?\n"
- ++ format("Expected: %s Got: %s", [s(Expected), s(Got)])
+ ++ format("Expected: '%s' Got: '%s'", [s(Expected), s(Got)])
),
string.format("%s: %s\n", [s(File), s(MessagePart)], Message).
@@ -738,7 +734,7 @@ feedback_first_line = "Mercury Compiler
:- func feedback_version = string.
-feedback_version = "7".
+feedback_version = "8".
%-----------------------------------------------------------------------------%
%
@@ -748,9 +744,13 @@ feedback_version = "7".
%
convert_candidate_par_conjunction(Conv, CPC0, CPC) :-
- Conjs0 = CPC0 ^ cpc_conjs,
+ CPC0 = candidate_par_conjunction(GoalPath, PartNum, IsDependent,
+ GoalsBefore0, Conjs0, GoalsAfter0, Metrics),
map(convert_seq_conj(Conv), Conjs0, Conjs),
- CPC = CPC0 ^ cpc_conjs := Conjs.
+ map(Conv, GoalsBefore0, GoalsBefore),
+ map(Conv, GoalsAfter0, GoalsAfter),
+ CPC = candidate_par_conjunction(GoalPath, PartNum, IsDependent,
+ GoalsBefore, Conjs, GoalsAfter, Metrics).
convert_seq_conj(Conv, seq_conj(Conjs0), seq_conj(Conjs)) :-
map(Conv, Conjs0, Conjs).
@@ -759,51 +759,70 @@ convert_seq_conj(Conv, seq_conj(Conjs0),
:- type parallel_exec_metrics
---> parallel_exec_metrics(
- inner_metrics :: parallel_exec_metrics_incomplete,
- num_calls :: int
+ pem_inner_metrics :: parallel_exec_metrics_incomplete,
+ pem_num_calls :: int,
+ pem_time_before_conj :: float,
+ pem_time_after_conj :: float,
+ pem_right_conj_delay :: float
+ % The delay before a conjunct to the right of & begins
+ % executing.
).
:- type parallel_exec_metrics_incomplete
- ---> pem_initial
+ ---> pem_initial(
+ pemi_time_before_conj :: float
+ )
; pem_additional(
- cost_left :: parallel_exec_metrics_incomplete,
- % The cost of the left conjunct (that may be a conjunction),
+ pemi_time_left :: parallel_exec_metrics_incomplete,
+ % The time of the left conjunct (that may be a conjunction),
- cost_right_seq :: float,
- % The cost of the right conjunct if it is running after
+ pemi_time_right_seq :: float,
+ % The time of the right conjunct if it is running after
% the left in normal sequential execution.
- cost_right_par :: float
- % THe cost of the right conjunct if it is running in
+ pemi_time_right_par :: float
+ % The time of the right conjunct if it is running in
% parallel with the left conjunct. It may have to stop and
- % wait for variables to be produced; therefore this cost is
- % different to cost_right_seq. This cost also includes
+ % wait for variables to be produced; therefore this time is
+ % different to time_right_seq. This time also includes
% parallel execution overheads and delays.
).
-init_parallel_exec_metrics(NumCalls, TimeA, TimeBSeq, TimeBPar) = Metrics :-
- Metrics0 = init_empty_parallel_exec_metrics,
- Metrics1 = init_parallel_exec_metrics_incomplete(Metrics0, TimeA, TimeA),
- Metrics2 =
- init_parallel_exec_metrics_incomplete(Metrics1, TimeBSeq, TimeBPar),
- Metrics = finalise_parallel_exec_metrics(Metrics2, NumCalls).
-
init_parallel_exec_metrics_incomplete(MetricsA, TimeBSeq, TimeBPar) =
pem_additional(MetricsA, TimeBSeq, TimeBPar).
-init_empty_parallel_exec_metrics = pem_initial.
-
-finalise_parallel_exec_metrics(IncompleteMetrics, NumCalls) =
- parallel_exec_metrics(IncompleteMetrics, NumCalls).
+init_empty_parallel_exec_metrics(TimeBefore) = pem_initial(TimeBefore).
-parallel_exec_metrics_get_num_calls(parallel_exec_metrics(_, NumCalls)) =
- NumCalls.
+finalise_parallel_exec_metrics(IncompleteMetrics, NumCalls, TimeAfter,
+ RightConjDelay)
+ =
+ parallel_exec_metrics(IncompleteMetrics, NumCalls, TimeBefore,
+ TimeAfter, RightConjDelay) :-
+ TimeBefore =
+ parallel_exec_metrics_incomp_get_time_before(IncompleteMetrics).
-parallel_exec_metrics_get_par_time(parallel_exec_metrics(Inner, _)) =
- parallel_exec_metrics_incomp_get_par_time(Inner).
+:- func parallel_exec_metrics_incomp_get_time_before(
+ parallel_exec_metrics_incomplete) = float.
-parallel_exec_metrics_get_seq_time(parallel_exec_metrics(Inner, _)) =
- parallel_exec_metrics_incomp_get_seq_time(Inner).
+parallel_exec_metrics_incomp_get_time_before(pem_initial(Time)) = Time.
+parallel_exec_metrics_incomp_get_time_before(
+ pem_additional(Left, _, _)) = Time :-
+ Time = parallel_exec_metrics_incomp_get_time_before(Left).
+
+parallel_exec_metrics_get_num_calls(PEM) = NumCalls :-
+ NumCalls = PEM ^ pem_num_calls.
+
+parallel_exec_metrics_get_par_time(PEM) = Time :-
+ Inner = PEM ^ pem_inner_metrics,
+ InnerTime = parallel_exec_metrics_incomp_get_par_time(Inner),
+ BeforeAndAfterTime = PEM ^ pem_time_before_conj + PEM ^ pem_time_after_conj,
+ Time = InnerTime + BeforeAndAfterTime.
+
+parallel_exec_metrics_get_seq_time(PEM) = Time :-
+ Inner = PEM ^ pem_inner_metrics,
+ InnerTime = parallel_exec_metrics_incomp_get_seq_time(Inner),
+ BeforeAndAfterTime = PEM ^ pem_time_before_conj + PEM ^ pem_time_after_conj,
+ Time = InnerTime + BeforeAndAfterTime.
parallel_exec_metrics_get_speedup(Metrics) = SeqTime / ParTime :-
SeqTime = parallel_exec_metrics_get_seq_time(Metrics),
@@ -814,7 +833,7 @@ parallel_exec_metrics_get_speedup(Metric
:- func parallel_exec_metrics_incomp_get_par_time(
parallel_exec_metrics_incomplete) = float.
-parallel_exec_metrics_incomp_get_par_time(pem_initial) = 0.0.
+parallel_exec_metrics_incomp_get_par_time(pem_initial(_)) = 0.0.
parallel_exec_metrics_incomp_get_par_time(pem_additional(MetricsLeft, _, TimeRight))
= Time :-
TimeLeft = parallel_exec_metrics_incomp_get_par_time(MetricsLeft),
@@ -825,7 +844,7 @@ parallel_exec_metrics_incomp_get_par_tim
:- func parallel_exec_metrics_incomp_get_seq_time(
parallel_exec_metrics_incomplete) = float.
-parallel_exec_metrics_incomp_get_seq_time(pem_initial) = 0.0.
+parallel_exec_metrics_incomp_get_seq_time(pem_initial(_)) = 0.0.
parallel_exec_metrics_incomp_get_seq_time(pem_additional(MetricsLeft, TimeRight, _))
= Time :-
TimeLeft = parallel_exec_metrics_incomp_get_seq_time(MetricsLeft),
@@ -836,18 +855,18 @@ parallel_exec_metrics_get_time_saving(Me
ParTime = parallel_exec_metrics_get_par_time(Metrics).
parallel_exec_metrics_get_first_conj_dead_time(Metrics) = DeadTime :-
- parallel_exec_metrics(Inner, _) = Metrics,
+ Inner = Metrics ^ pem_inner_metrics,
FirstConjTime = pem_get_first_conj_time(Inner),
MaxConjTime = pem_get_max_conj_time(Inner, 0.0),
DeadTime = MaxConjTime - FirstConjTime.
:- func pem_get_first_conj_time(parallel_exec_metrics_incomplete) = float.
-pem_get_first_conj_time(pem_initial) = _ :-
+pem_get_first_conj_time(pem_initial(_)) = _ :-
error("pem_get_first_conj_time: Empty conjunction").
pem_get_first_conj_time(pem_additional(Left, _RightSeq, RightPar)) = Time :-
(
- Left = pem_initial,
+ Left = pem_initial(_),
Time = RightPar
;
Left = pem_additional(_, _, _),
@@ -856,22 +875,33 @@ pem_get_first_conj_time(pem_additional(L
:- func pem_get_max_conj_time(parallel_exec_metrics_incomplete, float) = float.
-pem_get_max_conj_time(pem_initial, Max) = Max.
+pem_get_max_conj_time(pem_initial(_), Max) = Max.
pem_get_max_conj_time(pem_additional(Left, _, Par), Max0) = Max :-
Max1 = max(Par, Max0),
Max = pem_get_max_conj_time(Left, Max1).
parallel_exec_metrics_get_future_dead_time(Metrics) = DeadTime :-
- parallel_exec_metrics(Inner, _) = Metrics,
- DeadTime = pem_get_future_dead_time(Inner).
+ Inner = Metrics ^ pem_inner_metrics,
+ RightConjDelay = Metrics ^ pem_right_conj_delay,
+ DeadTime = pem_get_future_dead_time(Inner, RightConjDelay).
-:- func pem_get_future_dead_time(parallel_exec_metrics_incomplete) = float.
+:- func pem_get_future_dead_time(parallel_exec_metrics_incomplete, float)
+ = float.
-pem_get_future_dead_time(pem_initial) = 0.0.
-pem_get_future_dead_time(pem_additional(Left, Seq, Par)) = DeadTime :-
+pem_get_future_dead_time(pem_initial(_), _) = 0.0.
+pem_get_future_dead_time(pem_additional(Left, Seq, Par), Delay) = DeadTime :-
DeadTime = ThisDeadTime + LeftDeadTime,
- ThisDeadTime = Par - Seq,
- LeftDeadTime = pem_get_future_dead_time(Left).
+ % Only use the delay if this conjunction contains some code in it's left
+ % conjunct.
+ (
+ Left = pem_initial(_),
+ ThisDelay = 0.0
+ ;
+ Left = pem_additional(_, _, _),
+ ThisDelay = Delay
+ ),
+ ThisDeadTime = Par - Seq - ThisDelay,
+ LeftDeadTime = pem_get_future_dead_time(Left, Delay).
parallel_exec_metrics_get_total_dead_time(Metrics) = DeadTime :-
DeadTime = FirstConjDeadTime + FutureDeadTime,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 489 bytes
Desc: Digital signature
URL: <http://lists.mercurylang.org/archives/reviews/attachments/20100608/6417fb6f/attachment.sig>
More information about the reviews
mailing list