[m-rev.] diff: Zoltan's refactoring of the auto-parallelism code.
Paul Bone
pbone at csse.unimelb.edu.au
Fri Mar 30 15:06:20 AEDT 2012
Zoltan had been working on some changes to the automatic parallelisation code.
he's passed me his diffs and I intend to debug and commit them. This is the
first such commit.
These changes are purely refactoring/tidy-up changes. Since Zoltan and I have
both seen them I don't think a review is necessary.
deep_profiler/autopar_calc_overlap.m:
Refactor the calculate_parallel_cost predicate's signature.
deep_profiler/autopar_find_best_par.m:
Rename some symbols to give them more accurate names.
deep_profiler/autopar_search_callgraph.m:
Refactor some code, making it more succinct.
Move pard_goal_detail_to_pard_goal into autpar_types.m
deep_profiler/autopar_search_goals.m:
Refactoring.
Conform to changes in deep_profiler/autopar_find_best_par.m
deep_profiler/autopar_types.m:
Move pard_goal_detail_to_pard_goal here from autopar_search_callgraph.m
Refactoring.
Index: deep_profiler/autopar_calc_overlap.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/autopar_calc_overlap.m,v
retrieving revision 1.3
diff -u -p -b -r1.3 autopar_calc_overlap.m
--- deep_profiler/autopar_calc_overlap.m 3 May 2011 04:35:00 -0000 1.3
+++ deep_profiler/autopar_calc_overlap.m 30 Mar 2012 03:46:56 -0000
@@ -19,15 +19,16 @@
:- import_module mdprof_fb.automatic_parallelism.autopar_types.
- % calculate_parallel_cost(Info, !Parallelisation).
+ % calculate_parallel_cost(Info, !Parallelisation, CostData).
%
% Analyse the parallel conjuncts and determine their likely performance.
%
% This is the new parallel execution overlap algorithm, it is general and
% therefore we also use it for independent conjunctions.
%
-:- pred calculate_parallel_cost(parallelisation_cost_data::out,
- incomplete_parallelisation::in, incomplete_parallelisation::out) is det.
+:- pred calculate_parallel_cost(implicit_parallelism_info::in,
+ incomplete_parallelisation::in, incomplete_parallelisation::out,
+ parallelisation_cost_data::out) is det.
%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%
@@ -57,7 +58,7 @@
%----------------------------------------------------------------------------%
-calculate_parallel_cost(CostData, !Parallelisation) :-
+calculate_parallel_cost(Info, !Parallelisation, CostData) :-
ParConj = ip_get_par_conjs(!.Parallelisation),
NumCalls = !.Parallelisation ^ ip_num_calls,
@@ -70,7 +71,6 @@ calculate_parallel_cost(CostData, !Paral
(func(P0, MaybeCost) = P0 ^ ip_maybe_goals_after_cost := MaybeCost),
ip_get_goals_after, CostAfterPercall, NumCalls, !Parallelisation),
- Info = !.Parallelisation ^ ip_info,
Opts = Info ^ ipi_opts,
SparkCost = Opts ^ cpcp_sparking_cost,
SparkDelay = Opts ^ cpcp_sparking_delay,
@@ -610,8 +610,8 @@ var_first_use_time(FindProdOrCons, TimeB
UseTime = Use ^ vui_cost_until_use
;
UseType = var_use_other,
- % The analysis didn't recognise the instantiation here, so use a
- % conservative default for the production time.
+ % The analysis didn't recognise the instantiation here,
+ % so use a conservative default for the production time.
% XXX: How often does this occur?
(
FindProdOrCons = find_production,
Index: deep_profiler/autopar_find_best_par.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/autopar_find_best_par.m,v
retrieving revision 1.5
diff -u -p -b -r1.5 autopar_find_best_par.m
--- deep_profiler/autopar_find_best_par.m 6 May 2011 05:03:27 -0000 1.5
+++ deep_profiler/autopar_find_best_par.m 30 Mar 2012 03:46:56 -0000
@@ -27,18 +27,18 @@
:- import_module list.
:- import_module maybe.
-:- type best_parallelisation
- ---> bp_parallel_execution(
- bp_goals_before :: list(pard_goal_detail),
- bp_par_conjs :: list(seq_conj(pard_goal_detail)),
- bp_goals_after :: list(pard_goal_detail),
- bp_is_dependent :: conjuncts_are_dependent,
- bp_par_exec_metrics :: parallel_exec_metrics
+:- type full_parallelisation
+ ---> fp_parallel_execution(
+ fp_goals_before :: list(pard_goal_detail),
+ fp_par_conjs :: list(seq_conj(pard_goal_detail)),
+ fp_goals_after :: list(pard_goal_detail),
+ fp_is_dependent :: conjuncts_are_dependent,
+ fp_par_exec_metrics :: parallel_exec_metrics
).
:- pred find_best_parallelisation(implicit_parallelism_info::in,
program_location::in, list(pard_goal_detail)::in,
- maybe(best_parallelisation)::out,
+ maybe(full_parallelisation)::out,
cord(message)::in, cord(message)::out) is det.
%-----------------------------------------------------------------------------%
@@ -208,7 +208,7 @@ preprocess_conjunction(Goals, MaybeGoals
identify_costly_goals(Goals, 0, CostlyGoalsIndexes),
(
CostlyGoalsIndexes = [FirstCostlyGoalIndexPrime | OtherIndexes],
- last(OtherIndexes, LastCostlyGoalIndexPrime)
+ list.last(OtherIndexes, LastCostlyGoalIndexPrime)
->
FirstCostlyGoalIndex = FirstCostlyGoalIndexPrime,
LastCostlyGoalIndex = LastCostlyGoalIndexPrime
@@ -224,7 +224,7 @@ preprocess_conjunction(Goals, MaybeGoals
; Detism = cc_multidet_rep
),
- % Phase 3: Process the middle section into groups.
+ % Phase 4: Process the middle section into groups.
foldl_sub_array(preprocess_conjunction_into_groups, GoalsArray,
FirstCostlyGoalIndex, LastCostlyGoalIndex, [], RevGoalGroups),
list.reverse(RevGoalGroups, GoalGroups),
@@ -269,10 +269,10 @@ build_dependency_graph([PG | PGs], ConjN
InstMapInfo = PG ^ goal_annotation ^ pgd_inst_map_info,
% For each variable consumed by a goal we find out which goals instantiate
- % that variable and add them as it's dependencies along with their
+ % that variable and add them as its dependencies along with their
% dependencies. NOTE: We only consider variables that are read
- % and not those that are set. This is safe because we only bother
- % analysing single assignment code.
+ % and not those that are set. This is safe because we only analyse
+ % single assignment code.
RefedVars = InstMapInfo ^ im_consumed_vars,
digraph.add_vertex(ConjNum, ThisConjKey, !Graph),
MaybeAddEdge = ( pred(RefedVar::in, GraphI0::in, GraphI::out) is det :-
@@ -286,9 +286,9 @@ build_dependency_graph([PG | PGs], ConjN
),
list.foldl(MaybeAddEdge, set.to_sorted_list(RefedVars), !Graph),
- % For each variable instantiated by this goal add it to the VarDepMap with
- % this goal as it's instantiator. That is a maping from the variable to
- % the conj num.
+ % For each variable instantiated by this goal, add it to the VarDepMap
+ % with this goal as its instantiator. That is a mapping from the variable
+ % to the conj num.
InstVars = InstMapInfo ^ im_bound_vars,
InsertForConjNum = ( pred(InstVar::in, MapI0::in, MapI::out) is det :-
map.det_insert(InstVar, ConjNum, MapI0, MapI)
@@ -381,7 +381,7 @@ preprocess_conjunction_into_groups(Index
%
:- pred find_best_parallelisation_complete_bnb(implicit_parallelism_info::in,
program_location::in, best_par_algorithm_simple::in,
- goals_for_parallelisation::in, maybe(best_parallelisation)::out) is det.
+ goals_for_parallelisation::in, maybe(full_parallelisation)::out) is det.
find_best_parallelisation_complete_bnb(Info, Location, Algorithm,
PreprocessedGoals, MaybeBestParallelisation) :-
@@ -389,10 +389,11 @@ find_best_parallelisation_complete_bnb(I
io.format("D: Find best parallelisation for:\n%s\n",
[s(LocationString)], !IO),
location_to_string(1, Location, LocationCord),
- string.append_list(list(LocationCord), LocationString),
+ string.append_list(cord.list(LocationCord), LocationString),
NumGoals = size(PreprocessedGoals ^ gfp_goals),
NumGoalsBefore = PreprocessedGoals ^ gfp_first_costly_goal,
- NumGoalsAfter = NumGoals - PreprocessedGoals ^ gfp_last_costly_goal - 1,
+ NumGoalsAfter = NumGoals -
+ PreprocessedGoals ^ gfp_last_costly_goal - 1,
NumGoalsMiddle = NumGoals - NumGoalsBefore - NumGoalsAfter,
io.format("D: Problem size (before, middle, after): %d,%d,%d\n",
[i(NumGoalsBefore), i(NumGoalsMiddle), i(NumGoalsAfter)], !IO),
@@ -424,28 +425,25 @@ find_best_parallelisation_complete_bnb(I
ParalleliseDepConjs = do_not_parallelise_dep_conjs,
% Try again to get the best dependent parallelisation.
% This is used for guided parallelisation.
- TempInfo = Info ^ ipi_opts ^ cpcp_parallelise_dep_conjs :=
+ UpdatedInfo = Info ^ ipi_opts ^ cpcp_parallelise_dep_conjs :=
parallelise_dep_conjs(estimate_speedup_by_overlap),
- find_best_parallelisation_complete_bnb(TempInfo, Location,
+ find_best_parallelisation_complete_bnb(UpdatedInfo, Location,
Algorithm, PreprocessedGoals, MaybeBestParallelisation)
)
).
- % The objective function for the branch and bound search.
- % This is ParTime + ParOverheads * 2. That is we are willing to pay
- % 1 unit of parallel overheads to get a 2 unit improvement
- % of parallel execution time.
+ % Profiling information for an execution of the solver.
%
-:- func parallelisation_get_objective_value(best_parallelisation) = float.
+:- func parallelisation_get_objective_value(full_parallelisation) = float.
parallelisation_get_objective_value(Parallelisation) = Value :-
- Metrics = Parallelisation ^ bp_par_exec_metrics,
+ Metrics = Parallelisation ^ fp_par_exec_metrics,
Value = Metrics ^ pem_par_time +
parallel_exec_metrics_get_overheads(Metrics) * 2.0.
:- impure pred generate_parallelisations(implicit_parallelism_info::in,
best_par_algorithm_simple::in, goals_for_parallelisation::in,
- bnb_state(best_parallelisation)::in, best_parallelisation::out) is nondet.
+ bnb_state(full_parallelisation)::in, full_parallelisation::out) is nondet.
generate_parallelisations(Info, Algorithm, GoalsForParallelisation,
BNBState, BestParallelisation) :-
@@ -488,7 +486,7 @@ start_building_parallelisation(Info, Pre
% Finalise the parallelisation.
%
:- pred finalise_parallelisation(incomplete_parallelisation::in,
- best_parallelisation::out) is det.
+ full_parallelisation::out) is det.
finalise_parallelisation(Incomplete, Best) :-
GoalsBefore = ip_get_goals_before(Incomplete),
@@ -506,13 +504,13 @@ finalise_parallelisation(Incomplete, Bes
Metrics = finalise_parallel_exec_metrics(Metrics0),
par_conj_overlap_is_dependent(Overlap, IsDependent),
ParConjs = ip_get_par_conjs(Incomplete),
- Best = bp_parallel_execution(GoalsBefore, ParConjs,
+ Best = fp_parallel_execution(GoalsBefore, ParConjs,
GoalsAfter, IsDependent, Metrics).
%----------------------------------------------------------------------------%
:- semipure pred add_goals_into_first_par_conj(
- bnb_state(best_parallelisation)::in,
+ bnb_state(full_parallelisation)::in,
incomplete_parallelisation::in, incomplete_parallelisation::out) is multi.
add_goals_into_first_par_conj(BNBState, !Parallelisation) :-
@@ -533,7 +531,7 @@ add_goals_into_first_par_conj(BNBState,
).
:- semipure pred add_goals_into_last_par_conj(
- bnb_state(best_parallelisation)::in,
+ bnb_state(full_parallelisation)::in,
incomplete_parallelisation::in, incomplete_parallelisation::out) is multi.
add_goals_into_last_par_conj(BNBState, !Parallelisation) :-
@@ -578,7 +576,7 @@ start_first_par_conjunct(!GoalGroups, !P
).
:- impure pred generate_parallelisations_body(implicit_parallelism_info::in,
- bnb_state(best_parallelisation)::in, best_par_algorithm_simple::in,
+ bnb_state(full_parallelisation)::in, best_par_algorithm_simple::in,
list(goal_group(goal_classification))::in,
incomplete_parallelisation::in, incomplete_parallelisation::out) is nondet.
@@ -688,29 +686,30 @@ should_expand_search(BNBState, Algorithm
% Test the parallelisation against the best one known to the branch and
% bound solver.
%
-:- semipure pred test_parallelisation(bnb_state(best_parallelisation)::in,
+:- semipure pred test_parallelisation(bnb_state(full_parallelisation)::in,
incomplete_parallelisation::in, incomplete_parallelisation::out)
is semidet.
test_parallelisation(BNBState, !Parallelisation) :-
- calculate_parallel_cost(CostData, !Parallelisation),
Info = !.Parallelisation ^ ip_info,
- test_dependance(Info, CostData),
+ calculate_parallel_cost(Info, !Parallelisation, CostData),
+ test_dependence(Info, CostData),
% XXX: We shouldn't need to finalize the parallelisation before testing it.
% This is a limitation of the branch and bound module.
finalise_parallelisation(!.Parallelisation, TestParallelisation),
semipure test_incomplete_solution(BNBState, TestParallelisation).
- % Score the parallelisation.
+ % Test the parallelisation against the best one known to the branch and
+ % bound solver.
%
-:- pred score_parallelisation(bnb_state(best_parallelisation)::in,
+:- pred score_parallelisation(bnb_state(full_parallelisation)::in,
maybe(float)::out,
incomplete_parallelisation::in, incomplete_parallelisation::out) is det.
score_parallelisation(BNBState, MaybeScore, !Parallelisation) :-
- calculate_parallel_cost(CostData, !Parallelisation),
Info = !.Parallelisation ^ ip_info,
- ( test_dependance(Info, CostData) ->
+ calculate_parallel_cost(Info, !Parallelisation, CostData),
+ ( test_dependence(Info, CostData) ->
finalise_parallelisation(!.Parallelisation, TestParallelisation),
score_solution(BNBState, TestParallelisation, Score),
MaybeScore = yes(Score)
@@ -718,13 +717,13 @@ score_parallelisation(BNBState, MaybeSco
MaybeScore = no
).
- % Test that the parallelisation includes dependant parallelism
+ % Test that the parallelisation includes dependent parallelism
% only if permitted by the user.
%
-:- pred test_dependance(implicit_parallelism_info::in,
+:- pred test_dependence(implicit_parallelism_info::in,
parallelisation_cost_data::in) is semidet.
-test_dependance(Info, CostData) :-
+test_dependence(Info, CostData) :-
Overlap = CostData ^ pcd_par_exec_overlap,
ParalleliseDepConjs = Info ^ ipi_opts ^ cpcp_parallelise_dep_conjs,
par_conj_overlap_is_dependent(Overlap, IsDependent),
@@ -742,7 +741,7 @@ par_conj_overlap_is_dependent(peo_conjun
par_conj_overlap_is_dependent(Left, IsDependent0),
(
IsDependent0 = conjuncts_are_dependent(VarSetLeft),
- VarSet = union(VarSet0, VarSetLeft),
+ VarSet = set.union(VarSet0, VarSetLeft),
IsDependent = conjuncts_are_dependent(VarSet)
;
IsDependent0 = conjuncts_are_independent,
@@ -758,6 +757,8 @@ par_conj_overlap_is_dependent(peo_conjun
:- pred add_one_goal_into_first_par_conj(incomplete_parallelisation::in,
incomplete_parallelisation::out) is det.
+%----------------------------------------------------------------------------%
+
:- pred add_one_goal_into_last_par_conj(incomplete_parallelisation::in,
incomplete_parallelisation::out) is det.
@@ -781,11 +782,11 @@ add_one_goal_into_last_par_conj(!Paralle
:- mode foldl_sub_array(pred(in, in, in, out) is det,
in, in, in, in, out) is det.
-foldl_sub_array(Pred, Array, Index, Last, !A) :-
+foldl_sub_array(Pred, Array, Index, Last, !Acc) :-
( Index =< Last ->
- X = lookup(Array, Index),
- Pred(Index, X, !A),
- foldl_sub_array(Pred, Array, Index + 1, Last, !A)
+ array.lookup(Array, Index, X),
+ Pred(Index, X, !Acc),
+ foldl_sub_array(Pred, Array, Index + 1, Last, !Acc)
;
true
).
Index: deep_profiler/autopar_search_callgraph.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/autopar_search_callgraph.m,v
retrieving revision 1.8
diff -u -p -b -r1.8 autopar_search_callgraph.m
--- deep_profiler/autopar_search_callgraph.m 27 Sep 2011 04:41:25 -0000 1.8
+++ deep_profiler/autopar_search_callgraph.m 30 Mar 2012 03:46:56 -0000
@@ -9,9 +9,9 @@
% File: autopar_search_callgraph.m
% Author: pbone.
%
-% This module contains the code for analysing deep profiles of programs in
-% order to determine how best to automatically parallelise the program. This
-% code is used by the mdprof_create_feedback tool.
+% This module contains the code for analysing deep profiles of programs
+% in order to determine how best to automatically parallelise the program.
+% This code is used by the mdprof_create_feedback tool.
%
%-----------------------------------------------------------------------------%
@@ -21,7 +21,6 @@
:- import_module mdbcomp.
:- import_module mdbcomp.feedback.
:- import_module mdbcomp.feedback.automatic_parallelism.
-:- import_module mdprof_fb.automatic_parallelism.autopar_types.
:- import_module message.
:- import_module profile.
@@ -32,17 +31,9 @@
% Build the candidate parallel conjunctions feedback information used for
% implicit parallelism.
%
-:- pred candidate_parallel_conjunctions(
- candidate_par_conjunctions_params::in, deep::in, cord(message)::out,
- feedback_info::in, feedback_info::out) is det.
-
-%-----------------------------------------------------------------------------%
-
-% XXX temporary exports
-
-:- pred pard_goal_detail_to_pard_goal(
- candidate_par_conjunction(pard_goal_detail)::in,
- pard_goal_detail::in, pard_goal::out) is det.
+:- pred candidate_parallel_conjunctions(candidate_par_conjunctions_params::in,
+ deep::in, cord(message)::out, feedback_info::in, feedback_info::out)
+ is det.
%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%
@@ -56,6 +47,7 @@
:- import_module mdprof_fb.automatic_parallelism.autopar_annotate.
:- import_module mdprof_fb.automatic_parallelism.autopar_costs.
:- import_module mdprof_fb.automatic_parallelism.autopar_search_goals.
+:- import_module mdprof_fb.automatic_parallelism.autopar_types.
:- import_module measurement_units.
:- import_module measurements.
:- import_module program_representation_utils.
@@ -97,7 +89,7 @@ candidate_parallel_conjunctions(Params,
% Do not descend into cliques cheaper than the threshold.
deep_lookup_clique_index(Deep, Deep ^ root, RootCliquePtr),
% The +1 here accounts for the cost of the pseudo call into the mercury
- % runtime since it is modeled here as a call site that in reality
+ % runtime, since it is modeled here as a call site that in reality
% does not exist.
RootParallelism = no_parallelism,
candidate_parallel_conjunctions_clique(Params, Deep,
@@ -112,54 +104,6 @@ candidate_parallel_conjunctions(Params,
ConjunctionsAssocList),
put_feedback_data(CandidateParallelConjunctions, !Feedback).
-pard_goal_detail_to_pard_goal(CPC, !Goal) :-
- IsDependent = CPC ^ cpc_is_dependent,
- (
- IsDependent = conjuncts_are_dependent(SharedVars)
- ;
- IsDependent = conjuncts_are_independent,
- SharedVars = set.init
- ),
- transform_goal_rep(pard_goal_detail_annon_to_pard_goal_annon(SharedVars),
- !Goal).
-
-:- pred pard_goal_detail_annon_to_pard_goal_annon(set(var_rep)::in,
- pard_goal_detail_annotation::in, pard_goal_annotation::out) is det.
-
-pard_goal_detail_annon_to_pard_goal_annon(SharedVarsSet, PGD, PG) :-
- CostPercall = goal_cost_get_percall(PGD ^ pgd_cost),
- CostAboveThreshold = PGD ^ pgd_cost_above_threshold,
- SharedVars = to_sorted_list(SharedVarsSet),
-
- Coverage = PGD ^ pgd_coverage,
- get_coverage_before_det(Coverage, Calls),
- ( Calls > 0 ->
- list.foldl(build_var_use_list(PGD ^ pgd_var_production_map),
- SharedVars, [], Productions),
- list.foldl(build_var_use_list(PGD ^ pgd_var_consumption_map),
- SharedVars, [], Consumptions)
- ;
- Productions = [],
- Consumptions = []
- ),
-
- PG = pard_goal_annotation(CostPercall, CostAboveThreshold, Productions,
- Consumptions).
-
-:- pred build_var_use_list(map(var_rep, lazy(var_use_info))::in, var_rep::in,
- assoc_list(var_rep, float)::in, assoc_list(var_rep, float)::out) is det.
-
-build_var_use_list(Map, Var, !List) :-
- (
- map.search(Map, Var, LazyUse),
- read_if_val(LazyUse, Use)
- ->
- UseTime = Use ^ vui_cost_until_use,
- !:List = [Var - UseTime | !.List]
- ;
- true
- ).
-
%----------------------------------------------------------------------------%
%
% Recurse the call graph searching for parallelisation opportunities.
@@ -191,7 +135,7 @@ candidate_parallel_conjunctions_clique(O
MaybeFirstPDPtr = no,
deep_lookup_clique_index(Deep, Deep ^ root, RootCliquePtr),
( CliquePtr = RootCliquePtr ->
- % It's okay, this clique never has an entry procedure.
+ % It is okay, this clique never has an entry procedure.
PDPtrs = OtherPDPtrs
;
CliquePtr = clique_ptr(CliqueNum),
@@ -407,10 +351,10 @@ update_parallelism_available_conj(Conj,
ConjNum =< FirstConjunct + Length
->
% The call into this clique gets parallelised by Conj.
- % XXX: If we knew the parallelisation type used for Conj we can do this
- % calculation more accurately. For instance, if this is a loop, then
- % we use as many cores as the loop has iterations. (Except for dead
- % time).
+ % XXX: If we knew the parallelisation type used for Conj, we could
+ % do this calculation more accurately. For instance, if this is a loop,
+ % then we use as many cores as the loop has iterations. (Except for
+ % dead time).
Metrics = Conj ^ cpc_par_exec_metrics,
CPUTime = parallel_exec_metrics_get_cpu_time(Metrics),
DeadTime = Metrics ^ pem_first_conj_dead_time +
@@ -659,17 +603,13 @@ build_candidate_par_conjunction_maps(Pro
CandidateProc0 = candidate_par_conjunctions_proc(VarTablePrime,
PushGoals, CPCs0),
CPCs = [Candidate | CPCs0],
- ( VarTable = VarTablePrime ->
- true
- ;
- unexpected($module, $pred, "var tables do not match")
- )
+ expect(unify(VarTable, VarTablePrime), $module, $pred,
+ "var tables do not match")
;
CPCs = [Candidate],
PushGoals = []
),
- CandidateProc = candidate_par_conjunctions_proc(VarTable, PushGoals,
- CPCs),
+ CandidateProc = candidate_par_conjunctions_proc(VarTable, PushGoals, CPCs),
map.set(ProcLabel, CandidateProc, !Map).
%----------------------------------------------------------------------------%
Index: deep_profiler/autopar_search_goals.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/autopar_search_goals.m,v
retrieving revision 1.6
diff -u -p -b -r1.6 autopar_search_goals.m
--- deep_profiler/autopar_search_goals.m 5 Dec 2011 05:34:34 -0000 1.6
+++ deep_profiler/autopar_search_goals.m 30 Mar 2012 03:46:56 -0000
@@ -9,7 +9,7 @@
% File: autopar_search_goals.m
% Authors: pbone, zs.
%
-% This module contains the code for searching goals for conjunctions worth
+% This module contains the code for searching a goal for conjunctions worth
% parallelising.
%
%-----------------------------------------------------------------------------%
@@ -42,8 +42,8 @@
reverse_goal_path::in, goal_rep(goal_id)::in,
pard_goal_detail::out, cord(message)::in, cord(message)::out) is det.
- % Check if it is appropriate to parallelise this call. That is it must be
- % model_det and have a cost above the call site cost threshold.
+ % Check if it is appropriate to parallelise this call. The call must be
+ % model_det, and must have a cost above the call site cost threshold.
% XXX probable bug: the cost criterion is implemented elsewhere.
%
:- pred can_parallelise_goal(goal_rep(T)::in) is semidet.
@@ -58,7 +58,6 @@
:- import_module mdprof_fb.automatic_parallelism.autopar_costs.
:- import_module mdprof_fb.automatic_parallelism.autopar_find_best_par.
:- import_module mdprof_fb.automatic_parallelism.autopar_reports.
-:- import_module mdprof_fb.automatic_parallelism.autopar_search_callgraph.
:- import_module measurements.
:- import_module program_representation_utils.
:- import_module report.
@@ -332,8 +331,8 @@ conj_get_conjunctions_worth_parallelisin
Costly = is_not_costly_goal,
% This goal might be costly if it is pushed into the cotexted
% of one of SinglesSoFar. This is common for recursive goals.
- filter(single_context_makes_goal_costly(Info, Conj), SinglesSoFar0,
- SinglesSoFarMakeConjCostly),
+ list.filter(single_context_makes_goal_costly(Info, Conj),
+ SinglesSoFar0, SinglesSoFarMakeConjCostly),
(
SinglesSoFarMakeConjCostly = []
;
@@ -533,18 +532,20 @@ merge_same_level_pushes(MainCandidate, [
pard_goal_detail::in, list(pard_goal_detail)::in,
reverse_goal_path::out, list(pard_goal_detail)::out) is det.
-push_goals_create_candidate(Info, RevCurPath, fgp_nil,
+push_goals_create_candidate(Info, RevCurPath, ForwardGoalPath,
GoalToPushInto, GoalsToPush0, RevCandidateGoalPath, CandidateConjs) :-
+ (
+ ForwardGoalPath = fgp_nil,
RevCandidateGoalPath = RevCurPath,
- % The pushed goals will have different costs in this context, in particular
- % the number of times they're called varies, This affects the per-call
- % costs of recursive calls.
- Calls = goal_cost_get_calls(GoalToPushInto ^ goal_annotation ^ pgd_cost),
+ % The pushed goals will have different costs in this context,
+ % in particular the number of times they're called varies. This affects
+ % the per-call costs of recursive calls.
+ Calls = goal_cost_get_calls(GoalToPushInto ^ goal_annotation
+ ^ pgd_cost),
map(fix_goal_counts(Info, Calls), GoalsToPush0, GoalsToPush),
- CandidateConjs = [GoalToPushInto | GoalsToPush].
-push_goals_create_candidate(Info, RevCurPath,
- fgp_cons(FirstRelStep, TailRelPath),
- GoalToPushInto, GoalsToPush0, RevCandidateGoalPath, CandidateConjs) :-
+ CandidateConjs = [GoalToPushInto | GoalsToPush]
+ ;
+ ForwardGoalPath = fgp_cons(FirstRelStep, TailRelPath),
GoalToPushInto = goal_rep(GoalExpr, _, _),
(
FirstRelStep = step_conj(N),
@@ -554,12 +555,14 @@ push_goals_create_candidate(Info, RevCur
% Conjoin GoalsToPush not with just the expensive goal,
% but with the whole conjunction containing it.
RevCandidateGoalPath = RevCurPath,
- % The pushed goals will have different costs in this context,
- % in particular the number of times they're called varies, This
- % affects the per-call costs of recursive calls.
+ % The pushed goals will have different costs in this
+ % context, in particular the number of times they're called
+ % varies. This affects the per-call costs of recursive
+ % calls.
Cost = GoalToPushInto ^ goal_annotation ^ pgd_cost,
Calls = goal_cost_get_calls(Cost),
- map(fix_goal_counts(Info, Calls), GoalsToPush0, GoalsToPush),
+ list.map(fix_goal_counts(Info, Calls),
+ GoalsToPush0, GoalsToPush),
CandidateConjs = Goals ++ GoalsToPush
;
TailRelPath = fgp_cons(_, _),
@@ -570,19 +573,20 @@ push_goals_create_candidate(Info, RevCur
TailRelPath, SubGoal, GoalsToPush0,
RevCandidateGoalPath, CandidateConjs)
;
- % We can't push goals into the non-last conjunct without
- % re-ordering, which is currently not supported. By
- % building a conjunction here we may still be able to
- % create a worthwhile parallelisation. However, there is a
- % trade-off to explore between this and not generating the
- % single expensive goal from within the conjunction and
- % therefore possibly finding other single expensive goals
- % later in this conjunction.
+ % We can't push goals into a non-last conjunct without
+ % reordering, which is currently not supported.
+ % By building a conjunction here, we may still be able
+ % to create a worthwhile parallelisation. However,
+ % there is a trade-off to explore between this
+ % and not generating the single expensive goal
+ % from within the conjunction, and therefore possibly
+ % finding other single expensive goals later in this
+ % conjunction.
RevCandidateGoalPath = RevCurPath,
Cost = GoalToPushInto ^ goal_annotation ^ pgd_cost,
Calls = goal_cost_get_calls(Cost),
- map(fix_goal_counts(Info, Calls), GoalsToPush0,
- GoalsToPush),
+ list.map(fix_goal_counts(Info, Calls),
+ GoalsToPush0, GoalsToPush),
CandidateConjs = Goals ++ GoalsToPush
)
)
@@ -666,6 +670,7 @@ push_goals_create_candidate(Info, RevCur
FirstRelStep = step_atomic_orelse(_),
% These should not exist in a profiled program.
unexpected($module, $pred, "atomic_orelse")
+ )
).
:- pred fix_goal_counts(implicit_parallelism_info::in, int::in,
@@ -705,7 +710,7 @@ pardgoals_build_candidate_conjunction(In
FirstConjNum = 1,
ParalleliseDepConjs = Info ^ ipi_opts ^ cpcp_parallelise_dep_conjs,
SpeedupThreshold = Info ^ ipi_opts ^ cpcp_speedup_threshold,
- BestParallelisation = bp_parallel_execution(GoalsBefore, ParConjs,
+ BestParallelisation = fp_parallel_execution(GoalsBefore, ParConjs,
GoalsAfter, IsDependent, Metrics),
Speedup = parallel_exec_metrics_get_speedup(Metrics),
Calls = Metrics ^ pem_num_calls,
@@ -765,73 +770,76 @@ pardgoals_build_candidate_conjunction(In
%-----------------------------------------------------------------------------%
-goal_to_pard_goal(Info, RevGoalPath, !Goal, !Messages) :-
- !.Goal = goal_rep(GoalExpr0, Detism, GoalId),
+goal_to_pard_goal(Info, RevGoalPath, Goal, DetailGoal, !Messages) :-
+ Goal = goal_rep(GoalExpr, Detism, GoalId),
InstMapInfo = get_goal_attribute_det(Info ^ ipi_inst_map_array, GoalId),
Coverage = get_goal_attribute_det(Info ^ ipi_coverage_array, GoalId),
get_coverage_before_det(Coverage, Before),
(
(
- GoalExpr0 = conj_rep(Conjs0),
+ GoalExpr = conj_rep(Conjs),
list.map_foldl2(conj_to_pard_goals(Info, RevGoalPath),
- Conjs0, Conjs, 1, _, !Messages),
- conj_calc_cost(Conjs, Before, Cost),
- GoalExpr = conj_rep(Conjs)
+ Conjs, DetailConjs, 1, _, !Messages),
+ conj_calc_cost(DetailConjs, Before, Cost),
+ DetailGoalExpr = conj_rep(DetailConjs)
;
- GoalExpr0 = disj_rep(Disjs0),
+ GoalExpr = disj_rep(Disjs),
list.map_foldl2(disj_to_pard_goals(Info, RevGoalPath),
- Disjs0, Disjs, 1, _, !Messages),
- disj_calc_cost(Disjs, Before, Cost),
- GoalExpr = disj_rep(Disjs)
+ Disjs, DetailDisjs, 1, _, !Messages),
+ disj_calc_cost(DetailDisjs, Before, Cost),
+ DetailGoalExpr = disj_rep(DetailDisjs)
;
- GoalExpr0 = switch_rep(Var, CanFail, Cases0),
+ GoalExpr = switch_rep(Var, CanFail, Cases),
list.map_foldl2(case_to_pard_goal(Info, RevGoalPath),
- Cases0, Cases, 1, _, !Messages),
- switch_calc_cost(Cases, Before, Cost),
- GoalExpr = switch_rep(Var, CanFail, Cases)
- ;
- GoalExpr0 = ite_rep(Cond0, Then0, Else0),
- goal_to_pard_goal(Info, rgp_cons(RevGoalPath, step_ite_cond),
- Cond0, Cond, !Messages),
- goal_to_pard_goal(Info, rgp_cons(RevGoalPath, step_ite_then),
- Then0, Then, !Messages),
- goal_to_pard_goal(Info, rgp_cons(RevGoalPath, step_ite_else),
- Else0, Else, !Messages),
- ite_calc_cost(Cond, Then, Else, Cost),
- GoalExpr = ite_rep(Cond, Then, Else)
+ Cases, DetailCases, 1, _, !Messages),
+ switch_calc_cost(DetailCases, Before, Cost),
+ DetailGoalExpr = switch_rep(Var, CanFail, DetailCases)
+ ;
+ GoalExpr = ite_rep(Cond, Then, Else),
+ CondRevGoalPath = rgp_cons(RevGoalPath, step_ite_cond),
+ ThenRevGoalPath = rgp_cons(RevGoalPath, step_ite_then),
+ ElseRevGoalPath = rgp_cons(RevGoalPath, step_ite_else),
+ goal_to_pard_goal(Info, CondRevGoalPath, Cond, DetailCond,
+ !Messages),
+ goal_to_pard_goal(Info, ThenRevGoalPath, Then, DetailThen,
+ !Messages),
+ goal_to_pard_goal(Info, ElseRevGoalPath, Else, DetailElse,
+ !Messages),
+ ite_calc_cost(DetailCond, DetailThen, DetailElse, Cost),
+ DetailGoalExpr = ite_rep(DetailCond, DetailThen, DetailElse)
;
- GoalExpr0 = negation_rep(SubGoal0),
- goal_to_pard_goal(Info, rgp_cons(RevGoalPath, step_neg),
- SubGoal0, SubGoal, !Messages),
- Cost = SubGoal ^ goal_annotation ^ pgd_cost,
- GoalExpr = negation_rep(SubGoal)
+ GoalExpr = negation_rep(SubGoal),
+ SubRevGoalPath = rgp_cons(RevGoalPath, step_neg),
+ goal_to_pard_goal(Info, SubRevGoalPath, SubGoal, DetailSubGoal,
+ !Messages),
+ Cost = DetailSubGoal ^ goal_annotation ^ pgd_cost,
+ DetailGoalExpr = negation_rep(DetailSubGoal)
;
- GoalExpr0 = scope_rep(SubGoal0, MaybeCut),
- goal_to_pard_goal(Info,
- rgp_cons(RevGoalPath, step_scope(MaybeCut)),
- SubGoal0, SubGoal, !Messages),
- Cost = SubGoal ^ goal_annotation ^ pgd_cost,
- GoalExpr = scope_rep(SubGoal, MaybeCut)
+ GoalExpr = scope_rep(SubGoal, MaybeCut),
+ SubRevGoalPath = rgp_cons(RevGoalPath, step_scope(MaybeCut)),
+ goal_to_pard_goal(Info, SubRevGoalPath, SubGoal, DetailSubGoal,
+ !Messages),
+ Cost = DetailSubGoal ^ goal_annotation ^ pgd_cost,
+ DetailGoalExpr = scope_rep(DetailSubGoal, MaybeCut)
),
PardGoalType = pgt_non_atomic_goal,
BoundVars = to_sorted_list(InstMapInfo ^ im_bound_vars),
list.foldl(
- goal_build_use_map(!.Goal, RevGoalPath, Cost, Info,
+ goal_build_use_map(Goal, RevGoalPath, Cost, Info,
var_use_production),
BoundVars, map.init, ProductionUseMap),
ConsumedVars = to_sorted_list(InstMapInfo ^ im_consumed_vars),
list.foldl(
- goal_build_use_map(!.Goal, RevGoalPath, Cost, Info,
+ goal_build_use_map(Goal, RevGoalPath, Cost, Info,
var_use_consumption),
ConsumedVars, map.init, ConsumptionUseMap)
;
- GoalExpr0 = atomic_goal_rep(Context, Line, BoundVars, AtomicGoal),
GoalExpr = atomic_goal_rep(Context, Line, BoundVars, AtomicGoal),
+ DetailGoalExpr = atomic_goal_rep(Context, Line, BoundVars, AtomicGoal),
atomic_pard_goal_type(Info, RevGoalPath, AtomicGoal, InstMapInfo,
PardGoalType, Messages),
- atomic_pard_goal_cost(Info, RevGoalPath, Coverage, AtomicGoal,
- Cost),
+ atomic_pard_goal_cost(Info, RevGoalPath, Coverage, AtomicGoal, Cost),
list.foldl(
atomic_goal_build_use_map(AtomicGoal, RevGoalPath, Info,
@@ -845,11 +853,10 @@ goal_to_pard_goal(Info, RevGoalPath, !Go
!:Messages = !.Messages ++ Messages
),
- % XXX: The goal annotations cannot represent reasons why a goal
- % can't be parallelised, for example it could be nondet, semidet or
- % impure.
+ % XXX: The goal annotations cannot represent reasons why a goal cannot be
+ % parallelised, for example it could be nondet, semidet or impure.
(
- can_parallelise_goal(!.Goal),
+ can_parallelise_goal(Goal),
goal_cost_above_par_threshold(Info, Cost)
->
CostAboveThreshold = cost_above_par_threshold
@@ -859,7 +866,7 @@ goal_to_pard_goal(Info, RevGoalPath, !Go
PardGoalAnnotation = pard_goal_detail(PardGoalType, InstMapInfo,
RevGoalPath, Coverage, Cost, CostAboveThreshold,
ProductionUseMap, ConsumptionUseMap),
- !:Goal = goal_rep(GoalExpr, Detism, PardGoalAnnotation).
+ DetailGoal = goal_rep(DetailGoalExpr, Detism, PardGoalAnnotation).
:- pred goal_build_use_map(goal_rep(goal_id)::in,
reverse_goal_path::in, goal_cost_csq::in, implicit_parallelism_info::in,
@@ -913,29 +920,27 @@ compute_goal_var_use_lazy(Goal, RevGoalP
).
:- pred conj_to_pard_goals(implicit_parallelism_info::in,
- reverse_goal_path::in, goal_rep(goal_id)::in,
- pard_goal_detail::out, int::in, int::out,
- cord(message)::in, cord(message)::out) is det.
+ reverse_goal_path::in, goal_rep(goal_id)::in, pard_goal_detail::out,
+ int::in, int::out, cord(message)::in, cord(message)::out) is det.
conj_to_pard_goals(Info, RevGoalPath, !Goal, !ConjNum, !Messages) :-
- goal_to_pard_goal(Info, rgp_cons(RevGoalPath, step_conj(!.ConjNum)),
- !Goal, !Messages),
+ ConjRevGoalPath = rgp_cons(RevGoalPath, step_conj(!.ConjNum)),
+ goal_to_pard_goal(Info, ConjRevGoalPath, !Goal, !Messages),
!:ConjNum = !.ConjNum + 1.
:- pred disj_to_pard_goals(implicit_parallelism_info::in,
- reverse_goal_path::in, goal_rep(goal_id)::in,
- pard_goal_detail::out, int::in, int::out,
- cord(message)::in, cord(message)::out) is det.
+ reverse_goal_path::in, goal_rep(goal_id)::in, pard_goal_detail::out,
+ int::in, int::out, cord(message)::in, cord(message)::out) is det.
disj_to_pard_goals(Info, RevGoalPath, !Goal, !DisjNum, !Messages) :-
- goal_to_pard_goal(Info, rgp_cons(RevGoalPath, step_disj(!.DisjNum)),
- !Goal, !Messages),
+ DisjRevGoalPath = rgp_cons(RevGoalPath, step_disj(!.DisjNum)),
+ goal_to_pard_goal(Info, DisjRevGoalPath, !Goal, !Messages),
!:DisjNum = !.DisjNum + 1.
:- pred case_to_pard_goal(implicit_parallelism_info::in,
- reverse_goal_path::in, case_rep(goal_id)::in,
- case_rep(pard_goal_detail_annotation)::out, int::in, int::out,
- cord(message)::in, cord(message)::out) is det.
+ reverse_goal_path::in,
+ case_rep(goal_id)::in, case_rep(pard_goal_detail_annotation)::out,
+ int::in, int::out, cord(message)::in, cord(message)::out) is det.
case_to_pard_goal(Info, RevGoalPath, !Case, !CaseNum, !Messages) :-
!.Case = case_rep(ConsId, OtherConsId, Goal0),
Index: deep_profiler/autopar_types.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/autopar_types.m,v
retrieving revision 1.2
diff -u -p -b -r1.2 autopar_types.m
--- deep_profiler/autopar_types.m 27 Jan 2011 08:03:53 -0000 1.2
+++ deep_profiler/autopar_types.m 30 Mar 2012 03:46:56 -0000
@@ -57,11 +57,11 @@
ipi_proc_label :: string_proc_label
).
- % A representation of a goal within a parallel conjunction. We don't have
+ % A representation of a goal within a parallel conjunction. We do not have
% to represent many types of goals or details about them, at least for now.
- % This type provides more detail than feedback.pard_goal, this detail isn't
- % required by the compiler and therefore not part of the feedback file
- % format.
+ % This type provides more detail than feedback.pard_goal. This extra detail
+ % is not required by the compiler, and therefore it is not part of the
+ % feedback file format.
%
:- type pard_goal_detail == goal_rep(pard_goal_detail_annotation).
@@ -84,11 +84,17 @@
pgd_cost_above_threshold :: cost_above_par_threshold,
- % Variable production and consumption information.
- pgd_var_production_map :: map(var_rep, lazy(var_use_info)),
- pgd_var_consumption_map :: map(var_rep, lazy(var_use_info))
+ % Information about when the inputs of this goal are consumed,
+ % and when its outputs are produced, relative to the start of
+ % this goal. Since we expect that many inputs and outputs
+ % will not be shared variables, we do not compute this
+ % information unless and until we need it.
+ pgd_var_production_map :: lazy_var_use_map,
+ pgd_var_consumption_map :: lazy_var_use_map
).
+:- type lazy_var_use_map == map(var_rep, lazy(var_use_info)).
+
:- type pard_goal_type
---> pgt_call(
% The argument modes and use information.
@@ -100,22 +106,9 @@
; pgt_other_atomic_goal
; pgt_non_atomic_goal.
-:- func ip_get_goals_before(incomplete_parallelisation) =
- list(pard_goal_detail).
-
-:- func ip_get_goals_after(incomplete_parallelisation) =
- list(pard_goal_detail).
-
-:- func ip_get_par_conjs(incomplete_parallelisation) =
- list(seq_conj(pard_goal_detail)).
-
-:- func ip_get_num_goals(incomplete_parallelisation) = int.
-
-:- func ip_get_num_parallel_conjuncts(incomplete_parallelisation) = int.
-
-:- func ip_get_num_goals_middle(incomplete_parallelisation) = int.
-
-:- func ip_calc_sharedvars_set(incomplete_parallelisation) = set(var_rep).
+:- pred pard_goal_detail_to_pard_goal(
+ candidate_par_conjunction(pard_goal_detail)::in,
+ pard_goal_detail::in, pard_goal::out) is det.
% Build sets of produced and consumed vars for a conjunct in a conjunction.
% Use with foldl to build these sets up for the whole conjunction. At the
@@ -199,11 +192,11 @@
ip_last_scheduled_goal :: int,
% The index of the last goal in each of the parallel conjuncts.
- % the very last parallel conjunct is donated by
+ % The very last parallel conjunct is denoted by
% ip_last_par_goal.
ip_par_conjs_rev_last_goal :: list(int),
- % The number of calls into this conjunction.
+ % The number of calls in this conjunction.
ip_num_calls :: int,
% Dependency relationships between goals.
@@ -225,7 +218,7 @@
pcd_productions_map :: map(var_rep, float)
).
- % This datastructure represents the execution of dependent conjuncts,
+ % This data structure represents the execution of dependent conjuncts,
% it tracks which variables are produced and consumed.
%
% TODO: Implement a pretty printer for this data.
@@ -256,22 +249,129 @@
dce_consumptions :: map(var_rep, float)
).
+:- func ip_get_goals_before(incomplete_parallelisation) =
+ list(pard_goal_detail).
+
+:- func ip_get_goals_after(incomplete_parallelisation) =
+ list(pard_goal_detail).
+
+:- func ip_get_par_conjs(incomplete_parallelisation) =
+ list(seq_conj(pard_goal_detail)).
+
+:- func ip_get_num_goals(incomplete_parallelisation) = int.
+
+:- func ip_get_num_parallel_conjuncts(incomplete_parallelisation) = int.
+
+:- func ip_get_num_goals_middle(incomplete_parallelisation) = int.
+
+:- func ip_calc_sharedvars_set(incomplete_parallelisation) = set(var_rep).
+
%-----------------------------------------------------------------------------%
:- implementation.
:- import_module int.
+:- import_module pair.
+
+%-----------------------------------------------------------------------------%
+
+pard_goal_detail_to_pard_goal(CPC, !Goal) :-
+ IsDependent = CPC ^ cpc_is_dependent,
+ (
+ IsDependent = conjuncts_are_dependent(SharedVars)
+ ;
+ IsDependent = conjuncts_are_independent,
+ SharedVars = set.init
+ ),
+ transform_goal_rep(pard_goal_detail_annon_to_pard_goal_annon(SharedVars),
+ !Goal).
+
+:- pred pard_goal_detail_annon_to_pard_goal_annon(set(var_rep)::in,
+ pard_goal_detail_annotation::in, pard_goal_annotation::out) is det.
+
+pard_goal_detail_annon_to_pard_goal_annon(SharedVarsSet, PGD, PG) :-
+ CostPercall = goal_cost_get_percall(PGD ^ pgd_cost),
+ CostAboveThreshold = PGD ^ pgd_cost_above_threshold,
+ SharedVars = set.to_sorted_list(SharedVarsSet),
+
+ Coverage = PGD ^ pgd_coverage,
+ get_coverage_before_det(Coverage, Calls),
+ ( Calls > 0 ->
+ list.foldl(build_var_use_list(PGD ^ pgd_var_production_map),
+ SharedVars, [], Productions),
+ list.foldl(build_var_use_list(PGD ^ pgd_var_consumption_map),
+ SharedVars, [], Consumptions)
+ ;
+ Productions = [],
+ Consumptions = []
+ ),
+ PG = pard_goal_annotation(CostPercall, CostAboveThreshold,
+ Productions, Consumptions).
+
+:- pred build_var_use_list(map(var_rep, lazy(var_use_info))::in, var_rep::in,
+ assoc_list(var_rep, float)::in, assoc_list(var_rep, float)::out) is det.
+
+build_var_use_list(Map, Var, !List) :-
+ (
+ map.search(Map, Var, LazyUse),
+ read_if_val(LazyUse, Use)
+ ->
+ UseTime = Use ^ vui_cost_until_use,
+ !:List = [Var - UseTime | !.List]
+ ;
+ true
+ ).
+
+%-----------------------------------------------------------------------------%
+
+%-----------------------------------------------------------------------------%
+
+conj_produced_and_consumed_vars(Conj, !Produced, !Consumed) :-
+ InstMapInfo = Conj ^ goal_annotation ^ pgd_inst_map_info,
+ !:Produced = set.union(!.Produced, InstMapInfo ^ im_bound_vars),
+ !:Consumed = set.union(!.Consumed, InstMapInfo ^ im_consumed_vars).
+
+%-----------------------------------------------------------------------------%
+
+identify_costly_goal(Annotation, Costly) :-
+ CostAboveThreshold = Annotation ^ pgd_cost_above_threshold,
+ (
+ CostAboveThreshold = cost_above_par_threshold,
+ % TODO: distinguish between compound goals with one costly branch,
+ % and compound goals where all branches are costly.
+ % TODO: Provide information about how many costly goals are within
+ % the goal so that we can try to parallelise each of those against
+ % an outer costly goal.
+ Costly = is_costly_goal
+ ;
+ CostAboveThreshold = cost_not_above_par_threshold,
+ Costly = is_not_costly_goal
+ ).
+
+identify_costly_goals([], _, []).
+identify_costly_goals([Goal | Goals], Index, Indexes) :-
+ identify_costly_goals(Goals, Index + 1, Indexes0),
+ identify_costly_goal(Goal ^ goal_annotation, Costly),
+ (
+ Costly = is_costly_goal,
+ Indexes = [Index | Indexes0]
+ ;
+ Costly = is_not_costly_goal,
+ Indexes = Indexes0
+ ).
+
+%-----------------------------------------------------------------------------%
ip_get_goals_before(Parallelisation) = GoalsBefore :-
Goals = Parallelisation ^ ip_goals,
FirstParGoalIndex = Parallelisation ^ ip_first_par_goal,
- fetch_items(Goals, 0, FirstParGoalIndex - 1, GoalsBefore).
+ array.fetch_items(Goals, 0, FirstParGoalIndex - 1, GoalsBefore).
ip_get_goals_after(Parallelisation) = GoalsAfter :-
Goals = Parallelisation ^ ip_goals,
LastParGoalIndex = Parallelisation ^ ip_last_par_goal,
- NumGoals = size(Goals),
- fetch_items(Goals, LastParGoalIndex + 1, NumGoals - 1, GoalsAfter).
+ NumGoals = array.size(Goals),
+ array.fetch_items(Goals, LastParGoalIndex + 1, NumGoals - 1, GoalsAfter).
ip_get_par_conjs(Incomplete) = ParConjs :-
Goals = Incomplete ^ ip_goals,
@@ -291,10 +391,10 @@ ip_get_par_conjs_2(Array, First, [Last |
fetch_items(Array, First, Last, Goals),
Conj = seq_conj(Goals).
-ip_get_num_goals(Incomplete) = size(Incomplete ^ ip_goals).
+ip_get_num_goals(Incomplete) = array.size(Incomplete ^ ip_goals).
ip_get_num_parallel_conjuncts(Incomplete) =
- length(Incomplete ^ ip_par_conjs_rev_last_goal) + 1.
+ list.length(Incomplete ^ ip_par_conjs_rev_last_goal) + 1.
ip_get_num_goals_middle(Incomplete) = LastParGoal - FirstParGoal + 1 :-
FirstParGoal = Incomplete ^ ip_first_par_goal,
@@ -312,44 +412,10 @@ ip_calc_sharedvars_set(Incomplete) = Sha
build_sharedvars_set(seq_conj(Conjs), !BoundVars, !SharedVars) :-
list.foldl2(conj_produced_and_consumed_vars, Conjs,
set.init, ProducedVars, set.init, ConsumedVars),
- % The new shared vars are previously bound variables that are cosumed in
+ % The new shared vars are previously bound variables that are consumed in
% this conjunct. This must be calculated before !BoundVars is updated.
SharedVars = set.intersect(!.BoundVars, ConsumedVars),
!:SharedVars = set.union(!.SharedVars, SharedVars),
!:BoundVars = set.union(!.BoundVars, ProducedVars).
-conj_produced_and_consumed_vars(Conj, !Produced, !Consumed) :-
- InstMapInfo = Conj ^ goal_annotation ^ pgd_inst_map_info,
- !:Produced = set.union(!.Produced, InstMapInfo ^ im_bound_vars),
- !:Consumed = set.union(!.Consumed, InstMapInfo ^ im_consumed_vars).
-
-%-----------------------------------------------------------------------------%
-
-identify_costly_goal(Annotation, Costly) :-
- CostAboveThreshold = Annotation ^ pgd_cost_above_threshold,
- (
- CostAboveThreshold = cost_above_par_threshold,
- % TODO: distinguish between compound goals with one costly branch,
- % and compound goals where all branches are costly.
- % TODO: Provide information about how many costly goals are within
- % the goal so that we can try to parallelise each of those against
- % an outer costly goal.
- Costly = is_costly_goal
- ;
- CostAboveThreshold = cost_not_above_par_threshold,
- Costly = is_not_costly_goal
- ).
-
-identify_costly_goals([], _, []).
-identify_costly_goals([Goal | Goals], Index, Indexes) :-
- identify_costly_goals(Goals, Index + 1, Indexes0),
- identify_costly_goal(Goal ^ goal_annotation, Costly),
- (
- Costly = is_costly_goal,
- Indexes = [Index | Indexes0]
- ;
- Costly = is_not_costly_goal,
- Indexes = Indexes0
- ).
-
%-----------------------------------------------------------------------------%
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 489 bytes
Desc: Digital signature
URL: <http://lists.mercurylang.org/archives/reviews/attachments/20120330/d2ecd436/attachment.sig>
More information about the reviews
mailing list