[m-rev.] for review: implicit_parallelism (new module)
Jerome Tannier
jerome.tannier at student.fundp.ac.be
Tue Nov 14 13:38:08 AEDT 2006
Estimated hours taken: 40
Branches: main
compiler/implicit_parallelism.m:
New module which reads the profiling feedback file and decides where
parallelism should be introduced.
compiler/dep_par_conj.m:
Moved find_shared_variables in the interface (Needed for implicit_parallelism.m).
compiler/mercury_compile.m:
Add the call to apply implicit parallelism.
compiler/options:
Add implicit-parallelism and feedback-file options.
compiler/transform_hlds.m:
Add transform_paralellism in the imported modules.
deep_profiler/dump.m:
Add "all" option to dump everything out of the Deep.data file.
deep_profiler/mdprof_feedback.m:
Rename distribution to measure.
Add handling of dump_stages and dump_options.
Correct the way the list of CSS is built (elems were put in the wrong order).
? DIFF
#-----------------------------------------------------------------------------#
Index: compiler/dep_par_conj.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/dep_par_conj.m,v
retrieving revision 1.13
diff -u -r1.13 dep_par_conj.m
--- compiler/dep_par_conj.m 20 Oct 2006 02:06:29 -0000 1.13
+++ compiler/dep_par_conj.m 14 Nov 2006 02:29:03 -0000
@@ -64,15 +64,23 @@
:- module transform_hlds.dep_par_conj.
:- interface.
+:- import_module hlds.hlds_goal.
:- import_module hlds.hlds_module.
+:- import_module hlds.instmap.
+:- import_module parse_tree.prog_data.
:- import_module io.
+:- import_module set.
%-----------------------------------------------------------------------------%
:- pred dependent_par_conj(module_info::in, module_info::out, io::di, io::uo)
is det.
+ %Used by transform_hlds.implicit_parallelism.
+:- func find_shared_variables(module_info, instmap, hlds_goals)
+ = set(prog_var).
+
%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%
@@ -82,16 +90,13 @@
:- import_module check_hlds.mode_util.
:- import_module check_hlds.purity.
:- import_module hlds.goal_util.
-:- import_module hlds.hlds_goal.
:- import_module hlds.hlds_pred.
-:- import_module hlds.instmap.
:- import_module hlds.pred_table.
:- import_module hlds.quantification.
:- import_module libs.compiler_util.
:- import_module libs.globals.
:- import_module libs.options.
:- import_module mdbcomp.prim_data.
-:- import_module parse_tree.prog_data.
:- import_module parse_tree.prog_type.
:- import_module parse_tree.prog_util.
:- import_module transform_hlds.dependency_graph.
@@ -103,7 +108,6 @@
:- import_module map.
:- import_module maybe.
:- import_module pair.
-:- import_module set.
:- import_module std_util.
:- import_module string.
:- import_module svmap.
@@ -678,9 +682,6 @@
% XXX this code is probably too complicated. I think Thomas already had a
% more elegant way to find the shared variables somewhere, using multisets.
%
-:- func find_shared_variables(module_info, instmap, hlds_goals)
- = set(prog_var).
-
find_shared_variables(ModuleInfo, InstMap, Goals) = SharedVars :-
list.map2(get_nonlocals_and_instmaps, Goals, Nonlocals, InstMapDeltas),
find_shared_variables_2(ModuleInfo, 0, Nonlocals, InstMap, InstMapDeltas,
Index: compiler/implicit_parallelism.m
===================================================================
RCS file: compiler/implicit_parallelism.m
diff -N compiler/implicit_parallelism.m
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ compiler/implicit_parallelism.m 14 Nov 2006 02:29:03 -0000
@@ -0,0 +1,935 @@
+%-----------------------------------------------------------------------------%
+% vim: ft=mercury ts=4 sw=4 et
+%-----------------------------------------------------------------------------%
+% Copyright (C) 2006 The University of Melbourne.
+% This file may only be copied under the terms of the GNU General
+% Public License - see the file COPYING in the Mercury distribution.
+%-----------------------------------------------------------------------------%
+%
+% Author: tannier.
+%
+% This module reads the profiling feedback file generated by the mdprof_feedback
+% module and decides where parallelism should be used (implicit parallelism).
+%
+%TODO - Once a call which has to be parallelized is found, search forward AND
+% backward for the closet goal to be parallelized/parallel conjunction and
+% determine which side is the best (on the basis of the number of shared
+% variables).
+%
+%-----------------------------------------------------------------------------%
+
+:- module transform_hlds.implicit_parallelism.
+:- interface.
+
+:- import_module hlds.hlds_module.
+
+:- import_module io.
+
+%-----------------------------------------------------------------------------%
+
+:- pred apply_implicit_parallelism_transformation(module_info::in,
+ module_info::out, string::in, io::di, io::uo) is det.
+
+%-----------------------------------------------------------------------------%
+%-----------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module check_hlds.inst_match.
+:- import_module check_hlds.mode_util.
+:- import_module hlds.goal_util.
+:- import_module hlds.hlds_goal.
+:- import_module hlds.hlds_pred.
+:- import_module hlds.instmap.
+:- import_module hlds.quantification.
+:- import_module mdbcomp.prim_data.
+:- import_module parse_tree.error_util.
+:- import_module parse_tree.prog_data.
+:- import_module transform_hlds.dep_par_conj.
+
+:- import_module bool.
+:- import_module char.
+:- import_module counter.
+:- import_module int.
+:- import_module list.
+:- import_module maybe.
+:- import_module pair.
+:- import_module require.
+:- import_module set.
+:- import_module string.
+
+%-----------------------------------------------------------------------------%
+
+ % Represent a call site static which has to be parallelized.
+ %
+:- type css_to_be_parallelized
+ ---> css_to_be_parallelized(
+ caller :: string,
+ slot_number :: int,
+ kind :: string,
+ callee :: string
+ ).
+
+%-----------------------------------------------------------------------------%
+
+apply_implicit_parallelism_transformation(!ModuleInfo, FeedbackFile, !IO) :-
+ parse_feedback_file(FeedbackFile, MaybeCSSListToBeParallelized, !IO),
+ (
+ MaybeCSSListToBeParallelized = error(Err),
+ io.stderr_stream(Stderr, !IO),
+ io.write_string(Stderr, Err ++ "\n", !IO)
+ ;
+ MaybeCSSListToBeParallelized = ok(CSSListToBeParallelized),
+ module_info_predids(!.ModuleInfo, PredIds),
+ process_preds_for_implicit_parallelism(PredIds,
+ CSSListToBeParallelized, !ModuleInfo)
+ ).
+
+ % Process predicates for implicit parallelism.
+ %
+:- pred process_preds_for_implicit_parallelism(list(pred_id)::in,
+ list(css_to_be_parallelized)::in, module_info::in, module_info::out)
+ is det.
+
+process_preds_for_implicit_parallelism([], _CSSListToBeParallelized,
+ !ModuleInfo).
+process_preds_for_implicit_parallelism([PredId | PredIdList],
+ CSSListToBeParallelized, !ModuleInfo) :-
+ module_info_pred_info(!.ModuleInfo, PredId, PredInfo),
+ ProcIds = pred_info_non_imported_procids(PredInfo),
+ process_procs_for_implicit_parallelism(PredId, ProcIds,
+ CSSListToBeParallelized, !ModuleInfo),
+ process_preds_for_implicit_parallelism(PredIdList,
+ CSSListToBeParallelized, !ModuleInfo).
+
+ % Process procedures for implicit parallelism.
+ %
+:- pred process_procs_for_implicit_parallelism(pred_id::in,
+ list(proc_id)::in, list(css_to_be_parallelized)::in,
+ module_info::in, module_info::out) is det.
+
+process_procs_for_implicit_parallelism(_PredId, [],
+ _CSSListToBeParallelized, !ModuleInfo).
+process_procs_for_implicit_parallelism(PredId, [ProcId | ProcIds],
+ CSSListToBeParallelized, !ModuleInfo) :-
+ module_info_pred_proc_info(!.ModuleInfo, PredId, ProcId,
+ PredInfo0, ProcInfo0),
+ % Initialize the counter for the slot number.
+ Counter = counter.init(0),
+ pred_proc_id_to_raw_id(PredInfo0, ProcId, CallerRawId),
+ get_callees_feedback(CallerRawId, CSSListToBeParallelized, [],
+ CalleesList),
+ list.length(CalleesList, CalleesListLength),
+ ( CalleesListLength = 0 ->
+ % No calls to be parallelized in this procedure.
+ process_procs_for_implicit_parallelism(PredId, ProcIds,
+ CSSListToBeParallelized, !ModuleInfo)
+ ;
+ proc_info_get_goal(ProcInfo0, Body0),
+ process_goal_for_implicit_parallelism(Body0, Body, ProcInfo0,
+ !ModuleInfo, no, _, 0, _, CalleesList, _, Counter, _),
+ proc_info_set_goal(Body, ProcInfo0, ProcInfo1),
+ pred_info_get_markers(PredInfo0, Markers0),
+ add_marker(marker_may_have_parallel_conj, Markers0, Markers),
+ pred_info_set_markers(Markers, PredInfo0, PredInfo1),
+ requantify_proc(ProcInfo1, ProcInfo2),
+ RecomputeAtomic = no,
+ recompute_instmap_delta_proc(RecomputeAtomic, ProcInfo2, ProcInfo,
+ !ModuleInfo),
+ pred_info_set_proc_info(ProcId, ProcInfo, PredInfo1, PredInfo),
+ module_info_set_pred_info(PredId, PredInfo, !ModuleInfo),
+ process_procs_for_implicit_parallelism(PredId, ProcIds,
+ CSSListToBeParallelized, !ModuleInfo)
+ ).
+
+ % By using the feedback file, build a list of css_to_be_parallelized whose
+ % caller is equal to the first parameter.
+ %
+:- pred get_callees_feedback(string::in, list(css_to_be_parallelized)::in,
+ list(css_to_be_parallelized)::in, list(css_to_be_parallelized)::out) is det.
+
+get_callees_feedback(_Caller, [], !ResultAcc).
+get_callees_feedback(Caller, [CSSToBeParallelized | CSSListToBeParallelized],
+ !ResultAcc) :-
+ CSSToBeParallelized = css_to_be_parallelized(CSSCaller, _, _, _),
+ ( Caller = CSSCaller ->
+ !:ResultAcc = [ CSSToBeParallelized | !.ResultAcc ],
+ get_callees_feedback(Caller, CSSListToBeParallelized, !ResultAcc)
+ ;
+ get_callees_feedback(Caller, CSSListToBeParallelized, !ResultAcc)
+ ).
+
+ % Process a goal for implicit parallelism.
+ % MaybeConj is the conjunction which contains HLDSGoal.
+ %
+:- pred process_goal_for_implicit_parallelism(hlds_goal::in, hlds_goal::out,
+ proc_info::in, module_info::in, module_info::out,
+ maybe(hlds_goal_expr)::in, maybe(hlds_goal_expr)::out, int ::in, int::out,
+ list(css_to_be_parallelized)::in, list(css_to_be_parallelized)::out,
+ counter::in, counter::out) is det.
+
+process_goal_for_implicit_parallelism(!HLDSGoal, ProcInfo, !ModuleInfo,
+ !MaybeConj, !IndexInConj, !CalleeListToBeParallelized, !Counter) :-
+ !.HLDSGoal = HLDSGoalExpr0 - HLDSGoalInfo,
+ (
+ HLDSGoalExpr0 = unify(_, _, _, _, _),
+ increment_index_if_in_conj(!.MaybeConj, !IndexInConj)
+ ;
+ HLDSGoalExpr0 = plain_call(_, _, _, _, _, _),
+ process_call_for_implicit_parallelism(!.HLDSGoal, ProcInfo, !ModuleInfo,
+ !IndexInConj, !MaybeConj, !CalleeListToBeParallelized, !Counter)
+ % We deal with the index in the conjunction in
+ % process_call_for_implicit_parallelism.
+ ;
+ HLDSGoalExpr0 = call_foreign_proc(_, _, _, _, _, _, _),
+ process_call_for_implicit_parallelism(!.HLDSGoal, ProcInfo, !ModuleInfo,
+ !IndexInConj, !MaybeConj, !CalleeListToBeParallelized, !Counter)
+ ;
+ HLDSGoalExpr0 = generic_call(Details, _, _, _),
+ (
+ Details = higher_order(_, _, _, _),
+ process_call_for_implicit_parallelism(!.HLDSGoal, ProcInfo,
+ !ModuleInfo, !IndexInConj, !MaybeConj,
+ !CalleeListToBeParallelized, !Counter)
+ ;
+ Details = class_method(_, _, _, _),
+ process_call_for_implicit_parallelism(!.HLDSGoal, ProcInfo,
+ !ModuleInfo, !IndexInConj, !MaybeConj,
+ !CalleeListToBeParallelized, !Counter)
+ ;
+ Details = event_call(_),
+ increment_index_if_in_conj(!.MaybeConj, !IndexInConj)
+ ;
+ Details = cast(_),
+ increment_index_if_in_conj(!.MaybeConj, !IndexInConj)
+ )
+ ;
+ % No distinction is made between plain conjunctions and parallel
+ % conjunctions. We have to process the parallel conjunction for the
+ % slot number.
+ HLDSGoalExpr0 = conj(_, _),
+ process_conj_for_implicit_parallelism(HLDSGoalExpr0, HLDSGoalExpr, 1,
+ ProcInfo, !ModuleInfo, !CalleeListToBeParallelized, !Counter),
+ % A plain conjunction will never be contained in an other plain
+ % conjunction. As for parallel conjunctions, they wont
+ % be modified. Therefore, incrementing the index suffices (no need to
+ % call update_conj_and_index).
+ !:HLDSGoal = HLDSGoalExpr - HLDSGoalInfo,
+ increment_index_if_in_conj(!.MaybeConj, !IndexInConj)
+ ;
+ HLDSGoalExpr0 = disj(HLDSGoals0),
+ process_disj_for_implicit_parallelism(HLDSGoals0, [], HLDSGoals,
+ ProcInfo, !ModuleInfo, !CalleeListToBeParallelized, !Counter),
+ GoalProcessed = disj(HLDSGoals) - HLDSGoalInfo,
+ update_conj_and_index(!MaybeConj, GoalProcessed, !IndexInConj),
+ % If we are not in a conjunction, then we need to return the modified
+ % value of HLDSGoal. In we are in a conjunction, that information is not
+ % read (see process_conj_for_implicit_parallelism).
+ !:HLDSGoal = GoalProcessed
+ ;
+ HLDSGoalExpr0 = switch(Var, CanFail, Cases0),
+ process_switch_cases_for_implicit_parallelism(Cases0, [], Cases,
+ ProcInfo, !ModuleInfo, !CalleeListToBeParallelized, !Counter),
+ GoalProcessed = switch(Var, CanFail, Cases) - HLDSGoalInfo,
+ update_conj_and_index(!MaybeConj, GoalProcessed, !IndexInConj),
+ !:HLDSGoal = GoalProcessed
+ ;
+ HLDSGoalExpr0 = negation(HLDSGoal0),
+ process_goal_for_implicit_parallelism(HLDSGoal0, HLDSGoal, ProcInfo,
+ !ModuleInfo, !MaybeConj, !IndexInConj, !CalleeListToBeParallelized,
+ !Counter),
+ GoalProcessed = negation(HLDSGoal) - HLDSGoalInfo,
+ update_conj_and_index(!MaybeConj, GoalProcessed, !IndexInConj),
+ !:HLDSGoal = GoalProcessed
+ ;
+ HLDSGoalExpr0 = scope(_, _),
+ increment_index_if_in_conj(!.MaybeConj, !IndexInConj)
+ ;
+ HLDSGoalExpr0 = if_then_else(Vars, If0, Then0, Else0),
+ % 0 is the default value when we are not in a conjunction (in this case
+ % an if then else).
+ process_goal_for_implicit_parallelism(If0, If, ProcInfo, !ModuleInfo,
+ no, _, 0, _, !CalleeListToBeParallelized, !Counter),
+ process_goal_for_implicit_parallelism(Then0, Then, ProcInfo, !ModuleInfo
+ , no, _, 0, _, !CalleeListToBeParallelized, !Counter),
+ process_goal_for_implicit_parallelism(Else0, Else, ProcInfo, !ModuleInfo
+ , no, _, 0, _, !CalleeListToBeParallelized, !Counter),
+ GoalProcessed = if_then_else(Vars, If, Then, Else) - HLDSGoalInfo,
+ update_conj_and_index(!MaybeConj, GoalProcessed, !IndexInConj),
+ !:HLDSGoal = GoalProcessed
+ ;
+ HLDSGoalExpr0 = shorthand(_),
+ increment_index_if_in_conj(!.MaybeConj, !IndexInConj)
+ ).
+
+ % Increment the index if we are in a conjunction.
+ %
+:- pred increment_index_if_in_conj(maybe(hlds_goal_expr)::in, int::in, int::out)
+ is det.
+
+increment_index_if_in_conj(MaybeConj, !IndexInConj) :-
+ (
+ MaybeConj = yes(_),
+ !:IndexInConj = !.IndexInConj + 1
+ ;
+ MaybeConj = no
+ ).
+
+ % Process a call for implicit parallelism.
+ %
+:- pred process_call_for_implicit_parallelism(hlds_goal::in, proc_info::in,
+ module_info::in, module_info::out, int::in, int::out,
+ maybe(hlds_goal_expr)::in, maybe(hlds_goal_expr)::out,
+ list(css_to_be_parallelized)::in, list(css_to_be_parallelized)::out,
+ counter::in, counter::out) is det.
+
+process_call_for_implicit_parallelism(Call, ProcInfo, !ModuleInfo, !IndexInConj
+ , !MaybeConj, !CalleeListToBeParallelized, !Counter) :-
+ counter.allocate(SlotNumber, !Counter),
+ get_call_kind_and_callee(!.ModuleInfo, Call, Kind, CalleeRawId),
+ ( !.MaybeConj = yes(Conj0), Conj0 = conj(plain_conj, ConjGoals0)
+ ->
+ (is_in_css_list_to_be_parallelized(Kind, SlotNumber, CalleeRawId,
+ !.CalleeListToBeParallelized, [], !:CalleeListToBeParallelized)
+ ->
+ ( build_goals_surrounded_by_calls_to_be_parallelized(ConjGoals0,
+ !.ModuleInfo, [Call], Goals, !.IndexInConj + 1, End, !Counter,
+ !CalleeListToBeParallelized)
+ ->
+ parallelize_calls(Goals, !.IndexInConj, End, Conj0, Conj,
+ ProcInfo, !ModuleInfo),
+ !:IndexInConj = End,
+ !:MaybeConj = yes(Conj)
+ ;
+ % The next call is not in the feedback file or we've hit a
+ % plain conjunction/disjunction/switch/if then else.
+ !:IndexInConj = !.IndexInConj + 1
+ )
+ ;
+ % Not to be parallelized.
+ !:IndexInConj = !.IndexInConj + 1
+ )
+ ;
+ % Call is not in a conjunction or the call is already in a parallel
+ % conjunction.
+ true
+ ).
+
+ % Give the raw id (the same as in the deep profiler) of a callee contained
+ % in a call.
+ %
+:- pred get_call_kind_and_callee(module_info::in, hlds_goal::in, string::out,
+ string::out) is det.
+
+get_call_kind_and_callee(ModuleInfo, Call, Kind, CalleeRawId) :-
+ GoalExpr = fst(Call),
+ ( GoalExpr = plain_call(PredId, ProcId, _, _, _, _)
+ ->
+ module_info_pred_proc_info(ModuleInfo, PredId, ProcId,
+ PredInfo, _),
+ pred_proc_id_to_raw_id(PredInfo, ProcId, CalleeRawId),
+ Kind = "normal_call"
+ ;
+ ( GoalExpr = call_foreign_proc(_, PredId, ProcId, _, _, _, _)
+ ->
+ module_info_pred_proc_info(ModuleInfo, PredId, ProcId,
+ PredInfo, _),
+ pred_proc_id_to_raw_id(PredInfo, ProcId, CalleeRawId),
+ Kind = "special_call"
+ ;
+ ( GoalExpr = generic_call(Details, _, _, _)
+ ->
+ (
+ Details = higher_order(_, _, _, _),
+ CalleeRawId = "",
+ Kind = "higher_order_call"
+ ;
+ Details = class_method(_, _, _, _),
+ CalleeRawId = "",
+ Kind = "method_call"
+ ;
+ Details = event_call(_),
+ error("get_call_kind_and_callee:: the call is an event" ++
+ " call")
+ ;
+ Details = cast(_),
+ error("get_call_kind_and_callee: the call is a cast")
+ )
+ ;
+ error("get_call_kind_and_callee: not a call")
+ )
+ )
+ ).
+
+ % Convert a pred_info and a proc_id to the raw procedure id (the same used
+ % in the deep profiler).
+ %
+:- pred pred_proc_id_to_raw_id(pred_info::in, proc_id::in, string::out) is det.
+
+pred_proc_id_to_raw_id(PredInfo, ProcId, RawId) :-
+ Module = pred_info_module(PredInfo),
+ Name = pred_info_name(PredInfo),
+ OrigArity = pred_info_orig_arity(PredInfo),
+ IsPredOrFunc = pred_info_is_pred_or_func(PredInfo),
+ ModuleString = sym_name_to_string(Module),
+ ProcIdInt = proc_id_to_int(ProcId),
+ RawId = string.append_list([ModuleString, ".", Name, "/",
+ string.int_to_string(OrigArity),
+ ( IsPredOrFunc = function -> "+1" ; ""), "-",
+ string.from_int(ProcIdInt)]).
+
+ % Succeed if the caller, slot number and callee correspond to a
+ % css_to_be_parallelized in the list given as a parameter.
+ % Fail otherwise.
+ %
+:- pred is_in_css_list_to_be_parallelized(string::in, int::in, string::in,
+ list(css_to_be_parallelized)::in,
+ list(css_to_be_parallelized)::in, list(css_to_be_parallelized)::out)
+ is semidet.
+
+is_in_css_list_to_be_parallelized(Kind, SlotNumber, CalleeRawId,
+ CSSListToBeParallelized, !ResultAcc) :-
+ (
+ CSSListToBeParallelized = [],
+ fail
+ ;
+ CSSListToBeParallelized = [ CSSToBeParallelized |
+ CSSListToBeParallelized0],
+ CSSToBeParallelized = css_to_be_parallelized(_, CSSSlotNumber, CSSKind,
+ CSSCallee),
+ % =< because there is not a one to one correspondance with the source
+ % code. New calls might have been added by the previous stages of the
+ % compiler.
+ ( CSSSlotNumber =< SlotNumber, CSSKind = Kind, CSSCallee = CalleeRawId
+ ->
+ list.append(!.ResultAcc, CSSListToBeParallelized0, !:ResultAcc)
+ ;
+ list.append(!.ResultAcc, [CSSToBeParallelized], !:ResultAcc),
+ is_in_css_list_to_be_parallelized(Kind, SlotNumber, CalleeRawId,
+ CSSListToBeParallelized0, !ResultAcc)
+ )
+ ).
+
+ % Build a list of goals surrounded by two calls which are in the feedback
+ % file or by a call which is in the feedback file and a parallel
+ % conjunction.
+ %
+ % Succeed if we can build that list of goals.
+ % Fail otherwise.
+ %
+:- pred build_goals_surrounded_by_calls_to_be_parallelized(list(hlds_goal)::in,
+ module_info::in, list(hlds_goal)::in, list(hlds_goal)::out,
+ int::in, int::out, counter::in, counter::out,
+ list(css_to_be_parallelized)::in, list(css_to_be_parallelized)::out)
+ is semidet.
+
+build_goals_surrounded_by_calls_to_be_parallelized(ConjGoals, ModuleInfo,
+ !ResultAcc, !Index, !Counter, !CalleeListToBeParallelized) :-
+ list.length(ConjGoals, Length),
+ ( !.Index > Length
+ ->
+ fail
+ ;
+ list.index1_det(ConjGoals, !.Index, Goal),
+ GoalExpr = fst(Goal),
+ ( ( GoalExpr = disj(_)
+ ; GoalExpr = switch(_, _, _)
+ ; GoalExpr = if_then_else(_, _, _, _)
+ ; GoalExpr = conj(plain_conj, _)
+ ) ->
+ fail
+ ;
+ ( is_a_conjunct(Goal, parallel_conj) ->
+ list.append(!.ResultAcc, [Goal], !:ResultAcc)
+ ;
+ ( is_a_call(Goal)
+ ->
+ counter.allocate(SlotNumber, !Counter),
+ get_call_kind_and_callee(ModuleInfo, Goal, Kind,
+ CalleeRawId),
+ ( is_in_css_list_to_be_parallelized(Kind, SlotNumber,
+ CalleeRawId, !.CalleeListToBeParallelized, [],
+ !:CalleeListToBeParallelized)
+ ->
+ list.append(!.ResultAcc, [Goal], !:ResultAcc)
+ ;
+ list.append(!.ResultAcc, [Goal], !:ResultAcc),
+ !:Index = !.Index + 1,
+ build_goals_surrounded_by_calls_to_be_parallelized(
+ ConjGoals, ModuleInfo, !ResultAcc, !Index, !Counter,
+ !CalleeListToBeParallelized)
+ )
+ ;
+ list.append(!.ResultAcc, [Goal], !:ResultAcc),
+ !:Index = !.Index + 1,
+ build_goals_surrounded_by_calls_to_be_parallelized(
+ ConjGoals, ModuleInfo, !ResultAcc, !Index, !Counter,
+ !CalleeListToBeParallelized)
+ )
+ )
+ )
+ ).
+
+ % Succeed if the Goal is a conjunction.
+ % Fail otherwise.
+ %
+:- pred is_a_conjunct(hlds_goal::in, conj_type::out) is semidet.
+
+is_a_conjunct(Goal, Type) :-
+ GoalExpr = fst(Goal),
+ GoalExpr = conj(Type, _).
+
+ % Succeed if Goal is a call or a call inside a negation.
+ % Fail otherwise.
+ %
+:- pred is_a_call(hlds_goal::in) is semidet.
+
+is_a_call(Goal) :-
+ GoalExpr = fst(Goal),
+ (
+ GoalExpr = plain_call(_, _, _, _, _, _)
+ ;
+ GoalExpr = call_foreign_proc(_, _, _, _, _, _, _)
+ ;
+ GoalExpr = generic_call(Details, _, _, _),
+ (
+ Details = class_method(_, _, _, _)
+ ;
+ Details = higher_order(_, _, _, _)
+ )
+ ;
+ GoalExpr = negation(GoalNeg),
+ GoalNegExpr = fst(GoalNeg),
+ (
+ GoalNegExpr = plain_call(_, _, _, _, _, _)
+ ;
+ GoalNegExpr = call_foreign_proc(_, _, _, _, _, _, _)
+ ;
+ GoalNegExpr = generic_call(Details, _, _, _),
+ (
+ Details = class_method(_, _, _, _)
+ ;
+ Details = higher_order(_, _, _, _)
+ )
+ )
+ ).
+
+ % Parallelize two calls/a call and a parallel conjunction which might have
+ % goals between them. If these have no dependencies with the first call then
+ % we move them before the first call and parallelize the two calls/call and
+ % parallel conjunction.
+ %
+ % Goals is contained in Conj.
+ %
+:- pred parallelize_calls(list(hlds_goal)::in, int::in, int::in,
+ hlds_goal_expr::in, hlds_goal_expr::out, proc_info::in,
+ module_info::in, module_info::out) is det.
+
+parallelize_calls(Goals, Start, End, !Conj, ProcInfo, !ModuleInfo) :-
+ ( !.Conj = conj(plain_conj, ConjGoals0) ->
+ ( ConjGoals0 = [FirstGoal, LastGoal] ->
+ ( is_worth_parallelizing(FirstGoal, LastGoal, ProcInfo,
+ !.ModuleInfo)
+ ->
+ ( is_a_conjunct(LastGoal, parallel_conj) ->
+ % The parallel conjunction has to remain flatened.
+ add_call_to_parallel_conjunction(FirstGoal, LastGoal,
+ ParallelGoal),
+ !:Conj = fst(ParallelGoal)
+ ;
+ !:Conj = conj(parallel_conj, ConjGoals0)
+ )
+ ;
+ % Not worth parallelizing.
+ true
+ )
+ ;
+ % There are more than two goals in the conjunction.
+ list.length(Goals, Length),
+ list.index1_det(Goals, 1, FirstGoal),
+ list.index1_det(Goals, Length, LastGoal),
+ ( is_worth_parallelizing(FirstGoal, LastGoal, ProcInfo,
+ !.ModuleInfo)
+ ->
+ GoalsInBetweenAndLast = list.det_tail(Goals),
+ list.delete_all(GoalsInBetweenAndLast, LastGoal,
+ GoalsInBetween),
+ % Check the dependencies of GoalsInBetween with FirstGoal.
+ list.filter(goal_depends_on_goal(FirstGoal),
+ GoalsInBetween, GoalsFiltered),
+ ( list.is_empty(GoalsFiltered) ->
+ ( is_a_conjunct(LastGoal, parallel_conj) ->
+ add_call_to_parallel_conjunction(FirstGoal, LastGoal,
+ ParallelGoal)
+ ;
+ create_parallel_conj(FirstGoal, LastGoal, ParallelGoal)
+ ),
+ ( Start = 1 ->
+ GoalsFront = []
+ ;
+ list.det_split_list(Start - 1, ConjGoals0,
+ GoalsFront, _)
+ ),
+ list.length(ConjGoals0, ConjLength),
+ ( End = ConjLength ->
+ GoalsBack = []
+ ;
+ list.det_split_list(End, ConjGoals0, _,
+ GoalsBack)
+ ),
+ list.append(GoalsFront, GoalsInBetween,
+ GoalsFrontWithBetween),
+ list.append(GoalsFrontWithBetween, [ParallelGoal],
+ GoalsWithoutBack),
+ list.append(GoalsWithoutBack, GoalsBack, ConjGoals),
+ !:Conj = conj(plain_conj, ConjGoals)
+ ;
+ % The goals between the two calls/call and parallel
+ % conjunction can't be moved before the first call.
+ true
+ )
+ ;
+ % Not worth parallelizing.
+ true
+ )
+ )
+ ;
+ error("parallelize_calls: not in a plain conjunct ")
+ ).
+
+ % Two calls are worth parallelizing if the number of shared variables is
+ % smaller than the number of argument variables of at least one of the two
+ % calls.
+ %
+ % A call and a parallel conjunction are worth parallelizing if the number of
+ % shared variables is smaller than the number of argument variables of the
+ % call.
+ %
+ % Succeed if it is worth parallelizing the two goals.
+ % Fail otherwise.
+ %
+ %
+:- pred is_worth_parallelizing(hlds_goal::in, hlds_goal::in, proc_info::in,
+ module_info::in) is semidet.
+
+is_worth_parallelizing(GoalA, GoalB, ProcInfo, ModuleInfo) :-
+ proc_info_get_initial_instmap(ProcInfo, ModuleInfo, InstMap),
+ SharedVars = find_shared_variables(ModuleInfo, InstMap, [GoalA, GoalB]),
+ set.to_sorted_list(SharedVars, SharedVarsList),
+ list.length(SharedVarsList, NbSharedVars),
+ ( NbSharedVars = 0 ->
+ % No shared vars between the goals.
+ true
+ ;
+ ( is_a_conjunct(GoalB, parallel_conj) ->
+ get_number_args(GoalA, NbArgsA),
+ NbSharedVars < NbArgsA
+ ;
+ ( get_number_args(GoalA, NbArgsA), get_number_args(GoalB, NbArgsB)
+ ->
+ ( ( NbSharedVars < NbArgsA, NbSharedVars < NbArgsB
+ ; NbSharedVars = NbArgsA, NbSharedVars < NbArgsB
+ ; NbSharedVars < NbArgsA, NbSharedVars = NbArgsB
+ )
+ ->
+ true
+ ;
+ fail
+ )
+ ;
+ error("is_worth_parallelizing: GoalA and/or GoalB are/is" ++
+ " not a call")
+ )
+ )
+ ).
+
+ % Give the number of argument variables of a call.
+ %
+:- pred get_number_args(hlds_goal::in, int::out) is semidet.
+
+get_number_args(Call, NbArgs) :-
+ CallExpr = fst(Call),
+ (
+ CallExpr = plain_call(_, _, Args, _, _, _),
+ list.length(Args, NbArgs)
+ ;
+ CallExpr = generic_call(Details, Args, _, _),
+ (
+ Details = higher_order(_, _, _, _),
+ list.length(Args, NbArgs)
+ ;
+ Details = class_method(_, _, _, _),
+ list.length(Args, NbArgs)
+ )
+ ;
+ CallExpr = call_foreign_proc(_, _, _, Args, _, _, _),
+ list.length(Args, NbArgs)
+ ).
+
+ % Add a call to an existing parallel conjunction.
+ %
+:- pred add_call_to_parallel_conjunction(hlds_goal::in, hlds_goal::in,
+ hlds_goal::out) is det.
+
+add_call_to_parallel_conjunction(Call, ParallelGoal0, ParallelGoal) :-
+ ParallelGoalExpr0 = fst(ParallelGoal0),
+ ParallelGoalInfo0 = snd(ParallelGoal0),
+ ( ParallelGoalExpr0 = conj(parallel_conj, GoalList0) ->
+ GoalList = [Call | GoalList0],
+ goal_list_nonlocals(GoalList, NonLocals),
+ goal_list_instmap_delta(GoalList, InstMapDelta),
+ goal_list_determinism(GoalList, Detism),
+ goal_list_purity(GoalList, Purity),
+ goal_info_set_nonlocals(NonLocals, ParallelGoalInfo0,
+ ParallelGoalInfo1),
+ goal_info_set_instmap_delta(InstMapDelta, ParallelGoalInfo1,
+ ParallelGoalInfo2),
+ goal_info_set_determinism(Detism, ParallelGoalInfo2, ParallelGoalInfo3),
+ goal_info_set_purity(Purity, ParallelGoalInfo3, ParallelGoalInfo),
+ ParallelGoalExpr = conj(parallel_conj, GoalList),
+ ParallelGoal = ParallelGoalExpr - ParallelGoalInfo
+ ;
+ error("add_call_to_parallel_conjunction: ParallelGoal0 is not a " ++
+ "parallel conjunction")
+ ).
+
+ % Create a parallel conjunction.
+ %
+:- pred create_parallel_conj(hlds_goal::in, hlds_goal::in, hlds_goal::out)
+ is det.
+
+create_parallel_conj(GoalA, GoalB, ParallelGoal) :-
+ GoalsInParallelConj = [GoalA, GoalB],
+ ParallelGoalExpr = conj(parallel_conj, GoalsInParallelConj),
+ goal_list_nonlocals(GoalsInParallelConj, NonLocals),
+ goal_list_instmap_delta(GoalsInParallelConj, InstMapDelta),
+ goal_list_determinism(GoalsInParallelConj, Detism),
+ goal_list_purity(GoalsInParallelConj, Purity),
+ GoalAInfo = snd(GoalA),
+ goal_info_get_context(GoalAInfo, Context),
+ goal_info_init(NonLocals, InstMapDelta, Detism, Purity, Context,
+ ParallelGoalInfo),
+ ParallelGoal = ParallelGoalExpr - ParallelGoalInfo.
+
+ % Succeed if the first goal depends on the second one.
+ % Fail otherwise.
+ %
+:- pred goal_depends_on_goal(hlds_goal::in, hlds_goal::in) is semidet.
+
+goal_depends_on_goal(_ - GoalInfo1, _ - GoalInfo2) :-
+ goal_info_get_instmap_delta(GoalInfo1, InstmapDelta1),
+ instmap_delta_changed_vars(InstmapDelta1, ChangedVars1),
+ goal_info_get_nonlocals(GoalInfo2, NonLocals2),
+ set.intersect(ChangedVars1, NonLocals2, Intersection),
+ \+ set.empty(Intersection).
+
+ % Process a conjunction for implicit parallelism.
+ %
+:- pred process_conj_for_implicit_parallelism(
+ hlds_goal_expr::in, hlds_goal_expr::out, int::in,
+ proc_info::in, module_info::in, module_info::out,
+ list(css_to_be_parallelized)::in, list(css_to_be_parallelized)::out,
+ counter::in, counter::out) is det.
+
+process_conj_for_implicit_parallelism(!HLDSGoalExpr, IndexInConj, ProcInfo,
+ !ModuleInfo, !CalleeListToBeParallelized, !Counter) :-
+ ( !.HLDSGoalExpr = conj(_, HLDSGoalsConj) ->
+ list.length(HLDSGoalsConj, Length),
+ ( IndexInConj > Length ->
+ true
+ ;
+ MaybeConj0 = yes(!.HLDSGoalExpr),
+ list.index1_det(HLDSGoalsConj, IndexInConj, HLDSGoalInConj),
+ % We are not interested in the return value of HLDSGoalInConj, only
+ % MaybeConj matters.
+ process_goal_for_implicit_parallelism(HLDSGoalInConj, _, ProcInfo,
+ !ModuleInfo, MaybeConj0, MaybeConj, IndexInConj, IndexInConj0,
+ !CalleeListToBeParallelized, !Counter),
+ ( MaybeConj = yes(HLDSGoalExprProcessed) ->
+ !:HLDSGoalExpr = HLDSGoalExprProcessed
+ ;
+ error("process_conj_for_implicit_parallelism: wrong maybe" ++
+ " value")
+ ),
+ process_conj_for_implicit_parallelism(!HLDSGoalExpr, IndexInConj0,
+ ProcInfo, !ModuleInfo, !CalleeListToBeParallelized, !Counter)
+ )
+ ;
+ error("process_conj_for_implicit_parallelism: not a conjunct")
+ ).
+
+ % Process a disjunction for implicit parallelism.
+ %
+:- pred process_disj_for_implicit_parallelism(
+ list(hlds_goal)::in, list(hlds_goal)::in, list(hlds_goal)::out,
+ proc_info::in, module_info::in, module_info::out,
+ list(css_to_be_parallelized)::in, list(css_to_be_parallelized)::out,
+ counter::in, counter::out) is det.
+
+process_disj_for_implicit_parallelism([], !HLDSGoalsAcc, _ProcInfo,
+ !ModuleInfo, !CalleeListToBeParallelized, !Counter).
+process_disj_for_implicit_parallelism([HLDSGoal | HLDSGoals0], !HLDSGoalsAcc,
+ ProcInfo, !ModuleInfo, !CalleeListToBeParallelized, !Counter) :-
+ process_goal_for_implicit_parallelism(HLDSGoal, HLDSGoalProcessed,
+ ProcInfo, !ModuleInfo, no, _, 0, _, !CalleeListToBeParallelized,
+ !Counter),
+ !:HLDSGoalsAcc = [ HLDSGoalProcessed | !.HLDSGoalsAcc ],
+ process_disj_for_implicit_parallelism(HLDSGoals0, !HLDSGoalsAcc, ProcInfo,
+ !ModuleInfo, !CalleeListToBeParallelized, !Counter).
+
+ % If we are in a conjunction, update it by replacing the goal at index by
+ % Goal and increment the index.
+ %
+:- pred update_conj_and_index(
+ maybe(hlds_goal_expr)::in, maybe(hlds_goal_expr)::out,
+ hlds_goal::in, int::in, int::out) is det.
+
+update_conj_and_index(!MaybeConj, Goal, !IndexInConj) :-
+ ( !.MaybeConj = yes(conj(Type, Goals0)) ->
+ list.replace_nth_det(Goals0, !.IndexInConj, Goal, Goals),
+ !:IndexInConj = !.IndexInConj + 1,
+ !:MaybeConj = yes(conj(Type, Goals))
+ ;
+ true
+ ).
+
+ % Process a switch for implicit parallelism.
+ %
+:- pred process_switch_cases_for_implicit_parallelism(
+ list(case)::in, list(case)::in, list(case)::out, proc_info::in,
+ module_info::in, module_info::out,
+ list(css_to_be_parallelized)::in, list(css_to_be_parallelized)::out,
+ counter::in, counter::out) is det.
+
+process_switch_cases_for_implicit_parallelism([], !CasesAcc, _ProcInfo,
+ !ModuleInfo, !CalleeListToBeParallelized, !Counter).
+process_switch_cases_for_implicit_parallelism([Case | Cases0], !CasesAcc,
+ ProcInfo, !ModuleInfo, !CalleeListToBeParallelized, !Counter) :-
+ Case = case(Functor, Goal0),
+ process_goal_for_implicit_parallelism(Goal0, Goal, ProcInfo,
+ !ModuleInfo, no, _, 0, _, !CalleeListToBeParallelized, !Counter),
+ !:CasesAcc = [ case(Functor, Goal) | !.CasesAcc ],
+ process_switch_cases_for_implicit_parallelism(Cases0, !CasesAcc,
+ ProcInfo, !ModuleInfo, !CalleeListToBeParallelized, !Counter).
+
+%-----------------------------------------------------------------------------%
+
+ % Parse the feedback file (header and body).
+ %
+:- pred parse_feedback_file(string::in,
+ maybe_error(list(css_to_be_parallelized))::out, io::di, io::uo) is det.
+
+parse_feedback_file(InputFile, MaybeCSSListToBeParallelized, !IO) :-
+ io.open_input(InputFile, Result, !IO),
+ (
+ Result = io.error(ErrInput),
+ MaybeCSSListToBeParallelized = error(io.error_message(ErrInput))
+ ;
+ Result = ok(InStrm),
+ io.read_file_as_string(InStrm, MaybeFileAsString, !IO),
+ (
+ MaybeFileAsString = ok(FileAsString),
+ LineList = string.words_separator(is_carriage_return,
+ FileAsString),
+ process_header(LineList, MaybeBodyFileAsListString, !IO),
+ (
+ MaybeBodyFileAsListString = error(ErrProcessHeader),
+ MaybeCSSListToBeParallelized = error(ErrProcessHeader)
+ ;
+ MaybeBodyFileAsListString = ok(BodyFileAsListString),
+ process_body(BodyFileAsListString,
+ MaybeCSSListToBeParallelized0),
+ (
+ MaybeCSSListToBeParallelized0 =
+ ok(CSSListToBeParallelized),
+ MaybeCSSListToBeParallelized = ok(CSSListToBeParallelized)
+ ;
+ MaybeCSSListToBeParallelized0 = error(Err),
+ MaybeCSSListToBeParallelized = error(Err)
+ )
+ )
+ ;
+ MaybeFileAsString = error(_, ErrReadFileAsString),
+ MaybeCSSListToBeParallelized =
+ error(io.error_message(ErrReadFileAsString))
+ ),
+ io.close_input(InStrm, !IO)
+ ).
+
+:- pred is_carriage_return(char::in) is semidet.
+
+is_carriage_return(Char) :- Char = '\n'.
+
+ % Process the header of the feedback file.
+ %
+:- pred process_header(list(string)::in, maybe_error(list(string))::out,
+ io::di, io::uo) is det.
+
+process_header(FileAsListString, MaybeFileAsListStringWithoutHeader, !IO) :-
+ ( list.index0(FileAsListString, 0, Type) ->
+ ( Type = "Profiling feedback file" ->
+ (list.index0(FileAsListString, 1, Version) ->
+ ( Version = "Version = 1.0" ->
+ list.det_split_list(4, FileAsListString, _,
+ FileAsListStringWithoutHeader),
+ MaybeFileAsListStringWithoutHeader =
+ ok(FileAsListStringWithoutHeader)
+ ;
+ MaybeFileAsListStringWithoutHeader = error("Profiling" ++
+ " feedback file version incorrect")
+ )
+ ;
+ MaybeFileAsListStringWithoutHeader = error("Not a profiling"
+ ++ " feedback file")
+ )
+ ;
+ MaybeFileAsListStringWithoutHeader = error("Not a profiling" ++
+ " feedback file")
+ )
+ ;
+ MaybeFileAsListStringWithoutHeader = error("Not a profiling feedback"
+ ++ " file")
+ ).
+
+ % Process the body of the feedback file.
+ %
+:- pred process_body(list(string)::in,
+ maybe_error(list(css_to_be_parallelized))::out) is det.
+
+process_body(CoreFileAsListString, MaybeCSSListToBeParallelized) :-
+ ( process_body2(CoreFileAsListString, [], CSSListToBeParallelized) ->
+ MaybeCSSListToBeParallelized = ok(CSSListToBeParallelized)
+ ;
+ MaybeCSSListToBeParallelized = error("Profiling feedback file has been"
+ ++ " tampered with")
+ ).
+
+:- pred process_body2(list(string)::in, list(css_to_be_parallelized)::in,
+ list(css_to_be_parallelized)::out) is semidet.
+
+process_body2([], !CSSListToBeParallelizedAcc).
+process_body2([Line | Lines], !CSSListToBeParallelizedAcc) :-
+ Words = string.words_separator(is_whitespace, Line),
+ list.index0_det(Words, 0, Caller),
+ ( Caller = "Mercury" ->
+ process_body2(Lines, !CSSListToBeParallelizedAcc)
+ ;
+ list.index0_det(Words, 1, SlotNumber),
+ string.to_int(SlotNumber, IntSlotNumber),
+ list.index0_det(Words, 2, Kind),
+ ( Kind = "normal_call" ->
+ list.index0_det(Words, 3, Callee),
+ CSStoBeParallelized = css_to_be_parallelized(Caller, IntSlotNumber,
+ Kind, Callee)
+ ;
+ CSStoBeParallelized = css_to_be_parallelized(Caller, IntSlotNumber,
+ Kind, "")
+ ),
+ !:CSSListToBeParallelizedAcc = [ CSStoBeParallelized |
+ !.CSSListToBeParallelizedAcc ],
+ process_body2(Lines, !CSSListToBeParallelizedAcc)
+ ).
+
+%-----------------------------------------------------------------------------%
+:- end_module transform_hlds.implicit_parallelism.
+%-----------------------------------------------------------------------------%
Index: compiler/mercury_compile.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/mercury_compile.m,v
retrieving revision 1.414
diff -u -r1.414 mercury_compile.m
--- compiler/mercury_compile.m 3 Nov 2006 08:31:09 -0000 1.414
+++ compiler/mercury_compile.m 14 Nov 2006 02:29:03 -0000
@@ -78,6 +78,7 @@
:- import_module transform_hlds.trailing_analysis.
:- import_module transform_hlds.tabling_analysis.
:- import_module transform_hlds.higher_order.
+:- import_module transform_hlds.implicit_parallelism.
:- import_module transform_hlds.accumulator.
:- import_module transform_hlds.tupling.
:- import_module transform_hlds.untupling.
@@ -2414,7 +2415,7 @@
module_info_get_globals(!.HLDS, Globals),
globals.lookup_bool_option(Globals, verbose, Verbose),
globals.lookup_bool_option(Globals, statistics, Stats),
-
+
maybe_read_experimental_complexity_file(!HLDS, !IO),
tabling(Verbose, Stats, !HLDS, !IO),
@@ -2523,6 +2524,9 @@
maybe_structure_reuse_analysis(Verbose, Stats, !HLDS, !IO),
maybe_dump_hlds(!.HLDS, 195, "structure_reuse", !DumpInfo, !IO),
+ maybe_implicit_parallelism(Verbose, Stats, !HLDS, !IO),
+ maybe_dump_hlds(!.HLDS, 199, "implicit_parallelism", !DumpInfo, !IO),
+
maybe_control_granularity(Verbose, Stats, !HLDS, !IO),
maybe_dump_hlds(!.HLDS, 200, "granularity", !DumpInfo, !IO),
@@ -3828,6 +3832,30 @@
maybe_report_stats(Stats, !IO)
;
Sharing = no
+ ).
+
+:- pred maybe_implicit_parallelism(bool::in, bool::in,
+ module_info::in, module_info::out, io::di, io::uo) is det.
+
+maybe_implicit_parallelism(Verbose, Stats, !HLDS, !IO) :-
+ module_info_get_globals(!.HLDS, Globals),
+ globals.lookup_bool_option(Globals, implicit_parallelism,
+ ImplicitParallelism),
+ globals.lookup_string_option(Globals, feedback_file,
+ FeedbackFile),
+ ( FeedbackFile = "" ->
+ true
+ ;
+ ( ImplicitParallelism = yes ->
+ maybe_write_string(Verbose, "% Applying implicit parallelism...\n",
+ !IO),
+ maybe_flush_output(Verbose, !IO),
+ apply_implicit_parallelism_transformation(!HLDS, FeedbackFile, !IO),
+ maybe_write_string(Verbose, "% done.\n", !IO),
+ maybe_report_stats(Stats, !IO)
+ ;
+ true
+ )
).
:- pred maybe_control_granularity(bool::in, bool::in,
Index: compiler/options.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/options.m,v
retrieving revision 1.536
diff -u -r1.536 options.m
--- compiler/options.m 3 Nov 2006 08:31:10 -0000 1.536
+++ compiler/options.m 14 Nov 2006 02:29:03 -0000
@@ -548,6 +548,7 @@
; tuple_min_args
; control_granularity
; parallelism_target
+ ; implicit_parallelism
% Stuff for the CTGC system (structure sharing / structure reuse).
; structure_sharing_analysis
@@ -789,7 +790,7 @@
% recent when no other test can easily be constructed in
% configure.in.
- ; experiment.
+ ; experiment
% This option is provided for use by implementors who want to
% compare a new way of doing something with the old way. The idea
% is that the code that switches between the two ways should
@@ -802,6 +803,7 @@
% option to control their code, but adding an option requires
% recompiling most of the modules in the compiler. Having this
% option permanently here should reduce the need for that.
+ ; feedback_file.
%----------------------------------------------------------------------------%
%----------------------------------------------------------------------------%
@@ -1296,6 +1298,7 @@
tuple_min_args - int(4),
control_granularity - bool(no),
parallelism_target - int(4),
+ implicit_parallelism - bool(no),
% HLDS -> LLDS
smart_indexing - bool(no),
@@ -1533,7 +1536,8 @@
fullarch - string(""),
local_module_id - accumulating([]),
compiler_sufficiently_recent - bool(no),
- experiment - string("")
+ experiment - string(""),
+ feedback_file - string("")
]).
% please keep this in alphabetic order
@@ -2047,6 +2051,7 @@
long_option("tuple-min-args", tuple_min_args).
long_option("control-granularity", control_granularity).
long_option("parallelism-target", parallelism_target).
+long_option("implicit-parallelism", implicit_parallelism).
% CTGC related options.
long_option("structure-sharing", structure_sharing_analysis).
@@ -2307,6 +2312,7 @@
long_option("no-noncompact-ho-call-2004-01-15", compiler_sufficiently_recent).
long_option("trace-io-builtins-2006-08-14", compiler_sufficiently_recent).
long_option("experiment", experiment).
+long_option("feedback-file", feedback_file).
%-----------------------------------------------------------------------------%
@@ -4215,7 +4221,10 @@
"\thandle, which is specified using --parallelism-target.",
"--parallelism-target N",
"\tSpecified the number of CPUs of the target machine, for use by",
- "\tthe --control-granularity option."
+ "\tthe --control-granularity option.",
+ "--implicit-parallelism",
+ "\tApply implicit parallelism if a profiling feedback file is",
+ "\tspecified using the feedback-file option."
]).
:- pred options_help_hlds_llds_optimization(io::di, io::uo) is det.
@@ -4711,12 +4720,15 @@
"\tcompile several modules without the overhead of process",
"\tcreation for each one.)",
"--version",
- "\tDisplay the compiler version."
+ "\tDisplay the compiler version.",
% The `--fullarch' option is reserved for
% use by the `Mercury.config' file.
% The `--local-module-id' option is used by `mmc --make'.
+ "--feedback-file",
+ "\tUse the specified feedback file which may currently only",
+ "\tprocessed for implicit parallelism."
]).
:- pred write_tabbed_lines(list(string)::in, io::di, io::uo) is det.
Index: compiler/transform_hlds.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/transform_hlds.m,v
retrieving revision 1.25
diff -u -r1.25 transform_hlds.m
--- compiler/transform_hlds.m 3 Nov 2006 08:31:12 -0000 1.25
+++ compiler/transform_hlds.m 14 Nov 2006 02:29:03 -0000
@@ -85,6 +85,7 @@
:- include_module untupling.
:- include_module granularity.
:- include_module dep_par_conj.
+:- include_module implicit_parallelism.
:- include_module mmc_analysis.
Index: deep_profiler/dump.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/dump.m,v
retrieving revision 1.8
diff -u -r1.8 dump.m
--- deep_profiler/dump.m 12 Oct 2006 06:30:21 -0000 1.8
+++ deep_profiler/dump.m 14 Nov 2006 02:29:05 -0000
@@ -742,6 +742,7 @@
should_dump(DumpOptions, What) :-
( list.member(What, DumpOptions)
+ ; list.member("all", DumpOptions)
; DumpOptions = []
).
Index: deep_profiler/mdprof_feedback.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/mdprof_feedback.m,v
retrieving revision 1.1
diff -u -r1.1 mdprof_feedback.m
--- deep_profiler/mdprof_feedback.m 19 Oct 2006 06:20:20 -0000 1.1
+++ deep_profiler/mdprof_feedback.m 14 Nov 2006 02:29:05 -0000
@@ -64,17 +64,22 @@
write_help_message(ProgName, !IO)
;
( Args = [Input, Output] ->
- lookup_string_option(Options, distribution, Distribution),
- ( construct_distribution(Distribution, DistributionType) ->
+ lookup_string_option(Options, measure, Measure),
+ ( construct_measure(Measure, MeasureType) ->
lookup_int_option(Options, threshold, Threshold),
lookup_bool_option(Options, verbose, Verbose),
- read_deep_file(Input, Verbose, MaybeProfile, !IO),
+ lookup_accumulating_option(Options, dump_stages,
+ DumpStages),
+ lookup_accumulating_option(Options, dump_options,
+ DumpOptions),
+ read_deep_file(Input, Verbose, DumpStages, DumpOptions,
+ MaybeProfile, !IO),
(
MaybeProfile = ok(Deep),
compute_css_list_above_threshold(0, Deep, Threshold,
- DistributionType, [], CSSListAboveThreshold),
+ MeasureType, [], CSSListAboveThreshold),
generate_feedback_file(CSSListAboveThreshold, Deep,
- DistributionType, Threshold, Output, !IO)
+ MeasureType, Threshold, Output, !IO)
;
MaybeProfile = error(Error),
io.stderr_stream(Stderr, !IO),
@@ -111,7 +116,7 @@
io.format("--verbose Generate progress messages.\n", []),
io.format("--threshold <value>\n", []),
io.format(" Set the threshold to <value>.\n",[]),
- io.format("--distrib average|median\n",[]),
+ io.format("--measure average|median\n",[]),
io.format(" average : Write to <output> the call sites\n",[]),
io.format(" static whose call sites dynamic's average\n",[]),
io.format(" call sequence counts exceed the given\n",[]),
@@ -135,10 +140,11 @@
% Read a deep profiling data file.
%
-:- pred read_deep_file(string::in, bool::in, maybe_error(deep)::out,
- io::di, io::uo) is det.
+:- pred read_deep_file(string::in, bool::in,
+ list(string)::in, list(string)::in,
+ maybe_error(deep)::out, io::di, io::uo) is det.
-read_deep_file(Input, Verbose, MaybeProfile, !IO) :-
+read_deep_file(Input, Verbose, DumpStages, DumpOptions, MaybeProfile, !IO) :-
server_name(Machine, !IO),
(
Verbose = yes,
@@ -148,17 +154,17 @@
Verbose = no,
MaybeOutput = no
),
- read_and_startup(Machine, [Input], no, MaybeOutput, [], [], MaybeProfile,
- !IO).
+ read_and_startup(Machine, [Input], no, MaybeOutput,
+ DumpStages, DumpOptions, MaybeProfile, !IO).
% Determine those CSSs whose CSDs' average/median call sequence counts
% exceed the given threshold.
%
:- pred compute_css_list_above_threshold(int::in, deep::in, int::in,
- distribution_type::in, list(call_site_static)::in,
+ measure_type::in, list(call_site_static)::in,
list(call_site_static)::out) is det.
-compute_css_list_above_threshold(Index, Deep, Threshold, Distribution,
+compute_css_list_above_threshold(Index, Deep, Threshold, Measure,
!CSSAcc) :-
array.size(Deep ^ call_site_statics, Size),
( Index = Size ->
@@ -173,13 +179,13 @@
Callseqs = 0
;
(
- Distribution = average,
+ Measure = average,
list.foldr(sum_callseqs_csd_ptr(Deep), CSDList,
0, SumCallseqs),
% NOTE: we have checked that NumCSD is not zero above.
Callseqs = SumCallseqs // NumCSD
;
- Distribution = median,
+ Measure = median,
list.sort(compare_csd_ptr(Deep), CSDList, CSDListSorted),
IndexMedian = NumCSD // 2,
list.index0_det(CSDListSorted, IndexMedian, MedianPtr),
@@ -188,12 +194,12 @@
),
( Callseqs >= Threshold ->
CSS = array.lookup(Deep ^ call_site_statics, Index),
- !:CSSAcc = [ CSS | !.CSSAcc ],
+ list.append(!.CSSAcc, [CSS], !:CSSAcc),
compute_css_list_above_threshold(Index + 1, Deep, Threshold,
- Distribution, !CSSAcc)
+ Measure, !CSSAcc)
;
compute_css_list_above_threshold(Index + 1, Deep, Threshold,
- Distribution, !CSSAcc)
+ Measure, !CSSAcc)
)
).
@@ -223,9 +229,9 @@
% threshold.
%
:- pred generate_feedback_file(list(call_site_static)::in, deep::in,
- distribution_type::in, int::in, string::in, io::di, io::uo) is det.
+ measure_type::in, int::in, string::in, io::di, io::uo) is det.
-generate_feedback_file(CSSList, Deep, Distribution, Threshold, Output, !IO) :-
+generate_feedback_file(CSSList, Deep, Measure, Threshold, Output, !IO) :-
io.open_output(Output, Result, !IO),
(
Result = io.error(Err),
@@ -236,11 +242,11 @@
io.write_string(Stream, "Profiling feedback file\n", !IO),
io.write_string(Stream, "Version = 1.0\n", !IO),
(
- Distribution = average,
- io.write_string(Stream, "Distribution = average\n", !IO)
+ Measure = average,
+ io.write_string(Stream, "Measure = average\n", !IO)
;
- Distribution = median,
- io.write_string(Stream, "Distribution = median\n", !IO)
+ Measure = median,
+ io.write_string(Stream, "Measure = median\n", !IO)
),
io.format(Stream, "Threshold = %i\n", [i(Threshold)], !IO),
write_css_list(CSSList, Deep, Stream, !IO),
@@ -290,9 +296,11 @@
; help
; verbose
; version
- ; distribution.
+ ; measure
+ ; dump_stages
+ ; dump_options.
-:- type distribution_type
+:- type measure_type
---> average
; median.
@@ -304,7 +312,9 @@
short('t', threshold).
short('h', help).
short('v', version).
-short('d', distribution).
+short('m', measure).
+short('d', dump_stages).
+short('D', dump_options).
:- pred long(string::in, option::out) is semidet.
@@ -313,8 +323,9 @@
long("help", help).
long("verbose", verbose).
long("version", version).
-long("distrib", distribution).
-long("distribution", distribution).
+long("measure", measure).
+long("dump-stages", dump_stages).
+long("dump-options", dump_options).
:- pred defaults(option::out, option_data::out) is multi.
@@ -322,12 +333,14 @@
defaults(help, bool(no)).
defaults(verbose, bool(no)).
defaults(version, bool(no)).
-defaults(distribution, string("average")).
+defaults(measure, string("average")).
+defaults(dump_stages, accumulating([])).
+defaults(dump_options, accumulating([])).
-:- pred construct_distribution(string::in, distribution_type::out) is semidet.
+:- pred construct_measure(string::in, measure_type::out) is semidet.
-construct_distribution("average", average).
-construct_distribution("median", median).
+construct_measure("average", average).
+construct_measure("median", median).
%-----------------------------------------------------------------------------%
:- end_module mdprof_feedback.
--------------------------------------------------------------------------
mercury-reviews mailing list
Post messages to: mercury-reviews at csse.unimelb.edu.au
Administrative Queries: owner-mercury-reviews at csse.unimelb.edu.au
Subscriptions: mercury-reviews-request at csse.unimelb.edu.au
--------------------------------------------------------------------------
More information about the reviews
mailing list