[m-rev.] diff: Use determinstic code for the best parallelisation search.

Paul Bone pbone at csse.unimelb.edu.au
Wed Apr 4 11:49:39 AEST 2012
Previous message: [m-rev.] for review: try_and_insert
Next message: [m-rev.] diff: fix nondet stack exhaustion problem
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
This is some more of Zoltan's work, I've simply fixed a bug and refactored some
code so it's harder to write that particular bug.

Use determinstic code for the search for the best parallelisation.  The
impurity in this module has been removed, the deterministic code is simplier
and the branch_and_bound.m code is now unused.

deep_profiler/autopar_find_best_par.m:
    Use deterministic code for the branch and bound search.

    Conform to changes in autopar_types.m

deep_profiler/autopar_types.m:
    Remove the info field in the incomplete_parallelization structure.

Index: deep_profiler/autopar_find_best_par.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/autopar_find_best_par.m,v
retrieving revision 1.6
diff -u -p -b -r1.6 autopar_find_best_par.m
--- deep_profiler/autopar_find_best_par.m	30 Mar 2012 04:05:11 -0000	1.6
+++ deep_profiler/autopar_find_best_par.m	4 Apr 2012 00:37:42 -0000
@@ -12,6 +12,9 @@
 % This module contains the code for finding the best way to parallelize
 % a given conjunction.
 %
+% The following compile-time flags may introduce trace goals:
+%   debug_branch_and_bound
+%
 %-----------------------------------------------------------------------------%
 
 :- module mdprof_fb.automatic_parallelism.autopar_find_best_par.
@@ -46,18 +49,18 @@
 
 :- implementation.
 
-:- import_module branch_and_bound.
 :- import_module mdbcomp.program_representation.
 :- import_module mdprof_fb.automatic_parallelism.autopar_calc_overlap.
-:- import_module mdprof_fb.automatic_parallelism.autopar_search_goals. % XXX
 :- import_module measurements.
 
 :- import_module array.
+:- import_module benchmarking.
 :- import_module digraph.
 :- import_module float.
 :- import_module io.
 :- import_module int.
 :- import_module map.
+:- import_module pair.
 :- import_module require.
 :- import_module set.
 :- import_module string.
@@ -400,23 +403,29 @@ find_best_parallelisation_complete_bnb(I
         io.flush_output(!IO)
     ),
 
-    branch_and_bound(
-        generate_parallelisations(Info, Algorithm, PreprocessedGoals),
-        parallelisation_get_objective_value,
-        Solutions, Profile),
+    promise_equivalent_solutions [GenParTime, EqualBestSolns, Profile] (
+        benchmark_det(
+            generate_parallelisations(Info, Algorithm),
+            PreprocessedGoals, EqualBestSolns - Profile, 1, GenParTime)
+    ),
 
     trace [compile_time(flag("debug_branch_and_bound")), io(!IO)] (
         io.format("D: Solutions: %d\n",
-            [i(set.count(Solutions))], !IO),
-        io.format("D: Branch and bound profile: %s\n\n",
+            [i(list.length(EqualBestSolns))], !IO),
+        io.format("D: Branch and bound profile: %s\n",
             [s(string(Profile))], !IO),
+        io.format("D: Time: %d ms\n\n",
+            [i(GenParTime)], !IO),
         io.flush_output(!IO)
     ),
 
-    ( set.remove_least(BestParallelisation, Solutions, _) ->
+    (
+        EqualBestSolns = [BestIncompleteParallelisation | _],
+        finalise_parallelisation(BestIncompleteParallelisation,
+            BestParallelisation),
         MaybeBestParallelisation = yes(BestParallelisation)
     ;
-        % Solutions is empty.
+        EqualBestSolns = [],
         ParalleliseDepConjs = Info ^ ipi_opts ^ cpcp_parallelise_dep_conjs,
         (
             ParalleliseDepConjs = parallelise_dep_conjs(_),
@@ -434,287 +443,331 @@ find_best_parallelisation_complete_bnb(I
 
     % Profiling information for an execution of the solver.
     %
-:- func parallelisation_get_objective_value(full_parallelisation) = float.
-
-parallelisation_get_objective_value(Parallelisation) = Value :-
-    Metrics = Parallelisation ^ fp_par_exec_metrics,
-    Value = Metrics ^ pem_par_time +
-        parallel_exec_metrics_get_overheads(Metrics) * 2.0.
+:- type bnb_profile
+    --->    bnb_profile(
+                bnbp_incomplete_good_enough         :: int,
+                bnbp_incomplete_not_good_enough     :: int,
+                bnbp_complete_best_solution         :: int,
+                bnbp_complete_equal_solution        :: int,
+                bnbp_complete_worse_solution        :: int,
+                bnbp_complete_non_solution          :: int
+            ).
+
+    % The equal best solutions found so far (if we have found some solutions),
+    % and the value of the objective function for these solutions.
+    % The objective function represents a cost, so we look for solutions
+    % with the smallest possible value of the objective function.
+    %
+:- type best_solutions(T)
+    --->    no_best_solutions
+    ;       best_solutions(
+                bs_solutions            :: list(T),
+                bs_objective_value      :: float
+            ).
 
-:- impure pred generate_parallelisations(implicit_parallelism_info::in,
+:- pred generate_parallelisations(implicit_parallelism_info::in,
     best_par_algorithm_simple::in, goals_for_parallelisation::in,
-    bnb_state(full_parallelisation)::in, full_parallelisation::out) is nondet.
+    pair(list(incomplete_parallelisation), bnb_profile)::out) is det.
 
 generate_parallelisations(Info, Algorithm, GoalsForParallelisation,
-        BNBState, BestParallelisation) :-
-    some [!Parallelisation, !GoalGroups] (
-        start_building_parallelisation(Info, GoalsForParallelisation,
-            !:Parallelisation),
+        EqualBestSolns - FinalProfile) :-
+    some [!GoalGroups, !MaybeBestSolns, !Profile] (
+        start_building_parallelisation(GoalsForParallelisation,
+            IncompleteParallelisation0),
+
+        % Set the last scheduled goal to the goal at the end of the first
+        % group, popping the first group off the list. This initialises the
+        % parallelisation with the first goal group occurring first in the
+        % first parallel conjunction.
+        %
+        % We do this outside of the loop below because the first goal group
+        % will always be added to the first (initially empty) parallel
+        % conjunct; it does not make sense to have it start a new parallel
+        % conjunct.
 
         !:GoalGroups = GoalsForParallelisation ^ gfp_groups,
-        start_first_par_conjunct(!GoalGroups, !Parallelisation),
-        impure generate_parallelisations_body(Info, BNBState, Algorithm,
-            !.GoalGroups, !Parallelisation),
-
-        ( semipure should_expand_search(BNBState, Algorithm) ->
-            % Try to push goals into the first and last parallel conjuncts
-            % from outside the parallel conjunction.
-            semipure add_goals_into_first_par_conj(BNBState, !Parallelisation),
-            semipure add_goals_into_last_par_conj(BNBState, !Parallelisation)
-        ;
-            true
-        ),
-
-        finalise_parallelisation(!.Parallelisation, BestParallelisation)
-    ),
-    semipure test_incomplete_solution(BNBState, BestParallelisation).
-
-:- pred start_building_parallelisation(implicit_parallelism_info::in,
-    goals_for_parallelisation::in,
-    incomplete_parallelisation::out) is det.
-
-start_building_parallelisation(Info, PreprocessedGoals, Parallelisation) :-
-    GoalsArray = PreprocessedGoals ^ gfp_goals,
-    FirstParGoal = PreprocessedGoals ^ gfp_first_costly_goal,
-    LastParGoal = PreprocessedGoals ^ gfp_last_costly_goal,
-    NumCalls = PreprocessedGoals ^ gfp_num_calls,
-    DependencyGraphs = PreprocessedGoals ^ gfp_dependency_graphs,
-    Parallelisation = incomplete_parallelisation(Info, GoalsArray,
-        FirstParGoal, LastParGoal, FirstParGoal, [], NumCalls,
-        DependencyGraphs, no, no, no).
-
-    % Finalise the parallelisation.
-    %
-:- pred finalise_parallelisation(incomplete_parallelisation::in,
-    full_parallelisation::out) is det.
-
-finalise_parallelisation(Incomplete, Best) :-
-    GoalsBefore = ip_get_goals_before(Incomplete),
-    GoalsAfter = ip_get_goals_after(Incomplete),
-
-    MaybeCostData = Incomplete ^ ip_maybe_par_cost_data,
     (
-        MaybeCostData = yes(CostData)
+            !.GoalGroups = [],
+            unexpected($module, $pred, "no goal groups")
     ;
-        MaybeCostData = no,
-        unexpected($module, $pred, "parallelisation has no cost data")
+            !.GoalGroups = [_],
+            unexpected($module, $pred, "only one goal group")
+        ;
+            !.GoalGroups = [Group, _ | _],
+            !.GoalGroups = [_ | !:GoalGroups],
+            gg_get_details(Group, Index, Num, _),
+            LastScheduledGoal = Index + Num - 1,
+            IncompleteParallelisation1 =
+                IncompleteParallelisation0 ^ ip_last_scheduled_goal
+                    := LastScheduledGoal
     ),
-    CostData = parallelisation_cost_data(_, Overlap, Metrics0, _),
 
-    Metrics = finalise_parallel_exec_metrics(Metrics0),
-    par_conj_overlap_is_dependent(Overlap, IsDependent),
-    ParConjs = ip_get_par_conjs(Incomplete),
-    Best = fp_parallel_execution(GoalsBefore, ParConjs,
-        GoalsAfter, IsDependent, Metrics).
+        !:MaybeBestSolns = no_best_solutions,
+        !:Profile = bnb_profile(0, 0, 0, 0, 0, 0),
 
-%----------------------------------------------------------------------------%
+        generate_parallelisations_loop(Info, Algorithm, !.GoalGroups,
+            IncompleteParallelisation1, !MaybeBestSolns, !Profile),
 
-:- semipure pred add_goals_into_first_par_conj(
-    bnb_state(full_parallelisation)::in,
-    incomplete_parallelisation::in, incomplete_parallelisation::out) is multi.
+% XXX
+%       ( semipure should_expand_search(BNBState, Algorithm) ->
+%           % Try to push goals into the first and last parallel conjuncts
+%           % from outside the parallel conjunction.
+%           semipure add_goals_into_first_par_conj(BNBState, !Parallelisation),
+%           semipure add_goals_into_last_par_conj(BNBState, !Parallelisation)
+%       ;
+%           true
+%       ),
 
-add_goals_into_first_par_conj(BNBState, !Parallelisation) :-
-    FirstGoal0 = !.Parallelisation ^ ip_first_par_goal,
     (
-        FirstGoal0 > 0,
-        Goals = !.Parallelisation ^ ip_goals,
-        Goal = lookup(Goals, FirstGoal0 - 1),
-        can_parallelise_goal(Goal),
-
-        % There are goals before the parallel conjunction that can be included
-        % in the parallel conjunction.
-        add_one_goal_into_first_par_conj(!Parallelisation),
-        semipure test_parallelisation(BNBState, !Parallelisation),
-        semipure add_goals_into_first_par_conj(BNBState, !Parallelisation)
+            !.MaybeBestSolns = no_best_solutions,
+            EqualBestSolns = []
     ;
-        true
-    ).
-
-:- semipure pred add_goals_into_last_par_conj(
-    bnb_state(full_parallelisation)::in,
-    incomplete_parallelisation::in, incomplete_parallelisation::out) is multi.
-
-add_goals_into_last_par_conj(BNBState, !Parallelisation) :-
-    NumGoals = ip_get_num_goals(!.Parallelisation),
-    LastParGoal = !.Parallelisation ^ ip_last_par_goal,
-    (
-        LastParGoal < NumGoals - 1,
-        Goals = !.Parallelisation ^ ip_goals,
-        Goal = lookup(Goals, LastParGoal + 1),
-        can_parallelise_goal(Goal),
-
-        % Try to move a goal from after the parallelisation into the
-        % parallelisation.
-        add_one_goal_into_last_par_conj(!Parallelisation),
-        semipure test_parallelisation(BNBState, !Parallelisation),
-        semipure add_goals_into_last_par_conj(BNBState, !Parallelisation)
-    ;
-        true
+            !.MaybeBestSolns = best_solutions(EqualBestSolns, _)
+        ),
+        FinalProfile = !.Profile
     ).
 
-    % Set the last scheduled goal to the goal at the end of the first group,
-    % popping the first group off the list. This initialises the
-    % parallelisation with the first goal group occurring first in the first
-    % parallel conjunction.
-    %
-    % This is done outside of the loop below since the first goal group will
-    % always be added to the first (initially empty) parallel conjunction.
-    %
-:- pred start_first_par_conjunct(
-    list(goal_group(T))::in, list(goal_group(T))::out,
-    incomplete_parallelisation::in, incomplete_parallelisation::out) is det.
+:- pred generate_parallelisations_loop(implicit_parallelism_info::in,
+    best_par_algorithm_simple::in, list(goal_group(goal_classification))::in,
+    incomplete_parallelisation::in,
+    best_solutions(incomplete_parallelisation)::in,
+    best_solutions(incomplete_parallelisation)::out,
+    bnb_profile::in, bnb_profile::out) is det.
 
-start_first_par_conjunct(!GoalGroups, !Parallelisation) :-
-    (
-        !.GoalGroups = [],
-        unexpected($module, $pred, "no goal groups")
+generate_parallelisations_loop(_, _, [],
+        !.IncompleteParallelisation, !MaybeBestSolns, !Profile) :-
+    % Verify that we have generated at least two parallel conjuncts.
+    ( ip_get_num_parallel_conjuncts(!.IncompleteParallelisation) >= 2 ->
+        maybe_update_best_complete_parallelisation(!.IncompleteParallelisation,
+            !MaybeBestSolns, !Profile)
     ;
-        !.GoalGroups = [Group | !:GoalGroups],
-        gg_get_details(Group, Index, Num, _),
-        LastScheduledGoal = Index + Num - 1,
-        !Parallelisation ^ ip_last_scheduled_goal := LastScheduledGoal
+        % This is not a solution, so do not try to update !MaybeBestSolns.
+        !Profile ^ bnbp_complete_non_solution :=
+            !.Profile ^ bnbp_complete_non_solution + 1
     ).
-
-:- impure pred generate_parallelisations_body(implicit_parallelism_info::in,
-    bnb_state(full_parallelisation)::in, best_par_algorithm_simple::in,
-    list(goal_group(goal_classification))::in,
-    incomplete_parallelisation::in, incomplete_parallelisation::out) is nondet.
-
-generate_parallelisations_body(_, _, _, [], !Parallelisation) :-
-    % Verify that we've generated at least two parallel conjuncts.
-    ip_get_num_parallel_conjuncts(!.Parallelisation) >= 2.
-generate_parallelisations_body(Info, BNBState, Algorithm,
-        [GoalGroup | GoalGroups], !Parallelisation) :-
-    LastScheduledGoal0 = !.Parallelisation ^ ip_last_scheduled_goal,
+generate_parallelisations_loop(Info, Algorithm, [GoalGroup | GoalGroups],
+        !.IncompleteParallelisation, !MaybeBestSolns, !Profile) :-
+    LastScheduledGoal0 = !.IncompleteParallelisation ^ ip_last_scheduled_goal,
     gg_get_details(GoalGroup, _Index, Num, _Classification),
 
     LastScheduledGoal = LastScheduledGoal0 + Num,
     some [!AddToLastParallelisation, !AddToNewParallelisation] (
-        !:AddToLastParallelisation = !.Parallelisation,
-        !:AddToNewParallelisation = !.Parallelisation,
+        !:AddToLastParallelisation = !.IncompleteParallelisation,
+        !:AddToNewParallelisation = !.IncompleteParallelisation,
 
         % Consider adding this goal to the last parallel conjunct.
         !AddToLastParallelisation ^ ip_last_scheduled_goal
             := LastScheduledGoal,
-        score_parallelisation(BNBState, MaybeAddToLastScore,
-            !AddToLastParallelisation),
+        update_incomplete_parallelisation_cost(Info, !AddToLastParallelisation,
+            MaybeAddToLastCost),
 
         % Consider putting this goal into a new parallel conjunct.
-        ParConjsRevLastGoal0 = !.Parallelisation ^ ip_par_conjs_rev_last_goal,
+        ParConjsRevLastGoal0 =
+            !.IncompleteParallelisation ^ ip_par_conjs_rev_last_goal,
         ParConjsRevLastGoal = [LastScheduledGoal0 | ParConjsRevLastGoal0],
         !AddToNewParallelisation ^ ip_par_conjs_rev_last_goal :=
             ParConjsRevLastGoal,
         !AddToNewParallelisation ^ ip_last_scheduled_goal := LastScheduledGoal,
-        score_parallelisation(BNBState, MaybeAddToNewScore,
-            !AddToNewParallelisation),
+        update_incomplete_parallelisation_cost(Info, !AddToNewParallelisation,
+            MaybeAddToNewCost),
 
         (
-            MaybeAddToLastScore = yes(AddToLastScore),
+            MaybeAddToLastCost = yes(AddToLastCost),
             (
-                MaybeAddToNewScore = yes(AddToNewScore),
-                (
-                    % Smaller scores are better.
-                    AddToNewScore > AddToLastScore
-                ->
+                MaybeAddToNewCost = yes(AddToNewCost),
+                ( AddToNewCost > AddToLastCost ->
                     % Adding to the last parallel conjunct is better.
-                    BestOption = !.AddToLastParallelisation,
-                    MaybeSndBestOption = yes(!.AddToNewParallelisation)
+                    Best0 = !.AddToLastParallelisation,
+                    MaybeNextBest0 = yes(!.AddToNewParallelisation)
                 ;
                     % Adding to a new parallel conjunct is better.
-                    BestOption = !.AddToNewParallelisation,
-                    MaybeSndBestOption = yes(!.AddToLastParallelisation)
+                    Best0 = !.AddToNewParallelisation,
+                    MaybeNextBest0 = yes(!.AddToLastParallelisation)
                 )
             ;
-                MaybeAddToNewScore = no,
+                MaybeAddToNewCost = no,
                 % Adding to the last parallel conjunct is the only option.
-                BestOption = !.AddToLastParallelisation,
-                MaybeSndBestOption = no
+                Best0 = !.AddToLastParallelisation,
+                MaybeNextBest0 = no
             )
         ;
-            MaybeAddToLastScore = no,
+            MaybeAddToLastCost = no,
             % Adding to a new parallel conjunct is the only option.
-            BestOption = !.AddToNewParallelisation,
-            MaybeSndBestOption = no
+            Best0 = !.AddToNewParallelisation,
+            MaybeNextBest0 = no
         )
     ),
 
+    % XXX: This ite could be simpler, and the algorithm would be closer to the
+    % one in the paper.
     (
-        MaybeSndBestOption = no,
-        !:Parallelisation = BestOption
-    ;
-        MaybeSndBestOption = yes(SndBestOption),
-        (
-            % Should an alternative branch be created here?
-            semipure should_expand_search(BNBState, Algorithm)
+        % Can we create an alternative branch here?
+        MaybeNextBest0 = yes(NextBest0),
+        % Should we create an alternative branch here?
+        should_expand_search(Algorithm, !.Profile)
         ->
             % Create a branch.
-            impure add_alternative(BNBState),
-            % This tries the leftmost disjunct first, so try the best option
-            % there.
+        incomplete_parallelisation_is_good_enough(Info, !.MaybeBestSolns,
+            Best0, Best, !Profile, BestGoodEnough),
             (
-                !:Parallelisation = BestOption
+            BestGoodEnough = is_good_enough,
+            generate_parallelisations_loop(Info, Algorithm,
+                GoalGroups, Best, !MaybeBestSolns, !Profile)
             ;
-                impure close_alternative(BNBState),
-                !:Parallelisation = SndBestOption
+            BestGoodEnough = is_not_good_enough
+        ),
+
+        incomplete_parallelisation_is_good_enough(Info, !.MaybeBestSolns,
+            NextBest0, NextBest, !Profile, NextBestGoodEnough),
+        (
+            NextBestGoodEnough = is_good_enough,
+            generate_parallelisations_loop(Info, Algorithm,
+                GoalGroups, NextBest, !MaybeBestSolns, !Profile)
+        ;
+            NextBestGoodEnough = is_not_good_enough
             )
         ;
-            !:Parallelisation = BestOption
+        incomplete_parallelisation_is_good_enough(Info, !.MaybeBestSolns,
+            Best0, Best, !Profile, BestGoodEnough),
+        (
+            BestGoodEnough = is_good_enough,
+            generate_parallelisations_loop(Info, Algorithm,
+                GoalGroups, Best, !MaybeBestSolns, !Profile)
+        ;
+            BestGoodEnough = is_not_good_enough
         )
-    ),
+    ).
 
-    semipure test_parallelisation(BNBState, !Parallelisation),
+:- pred start_building_parallelisation(goals_for_parallelisation::in,
+    incomplete_parallelisation::out) is det.
 
-    impure generate_parallelisations_body(Info, BNBState, Algorithm,
-        GoalGroups, !Parallelisation).
+start_building_parallelisation(PreprocessedGoals, Parallelisation) :-
+    GoalsArray = PreprocessedGoals ^ gfp_goals,
+    FirstParGoal = PreprocessedGoals ^ gfp_first_costly_goal,
+    LastParGoal = PreprocessedGoals ^ gfp_last_costly_goal,
+    NumCalls = PreprocessedGoals ^ gfp_num_calls,
+    DependencyGraphs = PreprocessedGoals ^ gfp_dependency_graphs,
+    Parallelisation = incomplete_parallelisation(GoalsArray,
+        FirstParGoal, LastParGoal, FirstParGoal, [], NumCalls,
+        DependencyGraphs, no, no, no).
+
+    % Finalise the parallelisation.
+    %
+:- pred finalise_parallelisation(incomplete_parallelisation::in,
+    full_parallelisation::out) is det.
+
+finalise_parallelisation(Incomplete, Best) :-
+    GoalsBefore = ip_get_goals_before(Incomplete),
+    GoalsAfter = ip_get_goals_after(Incomplete),
+
+    MaybeCostData = Incomplete ^ ip_maybe_par_cost_data,
+    (
+        MaybeCostData = yes(CostData)
+    ;
+        MaybeCostData = no,
+        unexpected($module, $pred, "parallelisation has no cost data")
+    ),
+    CostData = parallelisation_cost_data(_, Overlap, Metrics0, _),
+
+    Metrics = finalise_parallel_exec_metrics(Metrics0),
+    par_conj_overlap_is_dependent(Overlap, IsDependent),
+    ParConjs = ip_get_par_conjs(Incomplete),
+    Best = fp_parallel_execution(GoalsBefore, ParConjs, GoalsAfter,
+        IsDependent, Metrics).
 
     % True if we should expand the search for parallelisation alternatives by
     % creating a choice point.
     %
-:- semipure pred should_expand_search(bnb_state(T)::in,
-    best_par_algorithm_simple::in) is semidet.
+:- pred should_expand_search(best_par_algorithm_simple::in, bnb_profile::in)
+    is semidet.
 
-should_expand_search(BNBState, Algorithm) :-
+should_expand_search(Algorithm, Profile) :-
     Algorithm = bpas_complete(MaybeLimit),
     (
         MaybeLimit = yes(Limit),
-        semipure num_alternatives(BNBState, Open, Closed),
-        Open + Closed < Limit
+        NumIncompleteTests =
+            Profile ^ bnbp_incomplete_not_good_enough +
+            Profile ^ bnbp_incomplete_good_enough,
+        NumIncompleteTests < Limit
     ;
         MaybeLimit = no
     ).
 
-    % Test the parallelisation against the best one known to the branch and
-    % bound solver.
-    %
-:- semipure pred test_parallelisation(bnb_state(full_parallelisation)::in,
-    incomplete_parallelisation::in, incomplete_parallelisation::out)
-    is semidet.
+:- pred maybe_update_best_complete_parallelisation(
+    incomplete_parallelisation::in,
+    best_solutions(incomplete_parallelisation)::in,
+    best_solutions(incomplete_parallelisation)::out,
+    bnb_profile::in, bnb_profile::out) is det.
+
+maybe_update_best_complete_parallelisation(CurSoln,
+        MaybeBestSolns0, MaybeBestSolns, !Profile) :-
+    % We don't use state variable syntax for MaybeBestSolns so that mmc can
+    % check that we've explicitly provided a value for MaybeBestSolns.
+    CurSolnCost = incomplete_parallelisation_cost(CurSoln),
+    (
+        MaybeBestSolns0 = no_best_solutions,
+        MaybeBestSolns = best_solutions([CurSoln], CurSolnCost),
+        !Profile ^ bnbp_complete_best_solution :=
+            !.Profile ^ bnbp_complete_best_solution + 1
+    ;
+        MaybeBestSolns0 = best_solutions(BestSolns0, BestCost0),
+        ( CurSolnCost < BestCost0 ->
+            MaybeBestSolns = best_solutions([CurSoln], CurSolnCost),
+            !Profile ^ bnbp_complete_best_solution :=
+                !.Profile ^ bnbp_complete_best_solution + 1
+        ; CurSolnCost = BestCost0 ->
+            BestSolns = [CurSoln | BestSolns0],
+            MaybeBestSolns = best_solutions(BestSolns, BestCost0),
+            !Profile ^ bnbp_complete_equal_solution :=
+                !.Profile ^ bnbp_complete_equal_solution + 1
+        ;
+            % Do not update !MaybeBestSolns.
+            MaybeBestSolns = MaybeBestSolns0,
+            !Profile ^ bnbp_complete_worse_solution :=
+                !.Profile ^ bnbp_complete_worse_solution + 1
+        )
+    ).
 
-test_parallelisation(BNBState, !Parallelisation) :-
-    Info = !.Parallelisation ^ ip_info,
-    calculate_parallel_cost(Info, !Parallelisation, CostData),
-    test_dependence(Info, CostData),
-    % XXX: We shouldn't need to finalize the parallelisation before testing it.
-    % This is a limitation of the branch and bound module.
-    finalise_parallelisation(!.Parallelisation, TestParallelisation),
-    semipure test_incomplete_solution(BNBState, TestParallelisation).
+:- type is_good_enough
+    --->    is_not_good_enough
+    ;       is_good_enough.
 
     % Test the parallelisation against the best one known to the branch and
     % bound solver.
     %
-:- pred score_parallelisation(bnb_state(full_parallelisation)::in,
-    maybe(float)::out,
-    incomplete_parallelisation::in, incomplete_parallelisation::out) is det.
-
-score_parallelisation(BNBState, MaybeScore, !Parallelisation) :-
-    Info = !.Parallelisation ^ ip_info,
-    calculate_parallel_cost(Info, !Parallelisation, CostData),
+:- pred incomplete_parallelisation_is_good_enough(
+    implicit_parallelism_info::in,
+    best_solutions(incomplete_parallelisation)::in,
+    incomplete_parallelisation::in, incomplete_parallelisation::out,
+    bnb_profile::in, bnb_profile::out, is_good_enough::out) is det.
+
+incomplete_parallelisation_is_good_enough(Info, MaybeBestSolns,
+        !IncompleteParallelisation, !Profile, GoodEnough) :-
+    calculate_parallel_cost(Info, !IncompleteParallelisation, CostData),
     ( test_dependence(Info, CostData) ->
-        finalise_parallelisation(!.Parallelisation, TestParallelisation),
-        score_solution(BNBState, TestParallelisation, Score),
-        MaybeScore = yes(Score)
+        (
+            MaybeBestSolns = no_best_solutions,
+            !Profile ^ bnbp_incomplete_good_enough :=
+                !.Profile ^ bnbp_incomplete_good_enough + 1,
+            GoodEnough = is_good_enough
+        ;
+            MaybeBestSolns = best_solutions(_, BestSolnCost),
+            CurIncompleteCost =
+                incomplete_parallelisation_cost(!.IncompleteParallelisation),
+            ( CurIncompleteCost > BestSolnCost ->
+                !Profile ^ bnbp_incomplete_not_good_enough :=
+                    !.Profile ^ bnbp_incomplete_not_good_enough + 1,
+                GoodEnough = is_not_good_enough
+            ;
+                !Profile ^ bnbp_incomplete_good_enough :=
+                    !.Profile ^ bnbp_incomplete_good_enough + 1,
+                GoodEnough = is_good_enough
+            )
+        )
     ;
-        MaybeScore = no
+        !Profile ^ bnbp_incomplete_not_good_enough :=
+            !.Profile ^ bnbp_incomplete_not_good_enough + 1,
+        GoodEnough = is_not_good_enough
     ).
 
     % Test that the parallelisation includes dependent parallelism
@@ -752,14 +805,107 @@ par_conj_overlap_is_dependent(peo_conjun
         )
     ).
 
+    % Compute the cost of the parallelisation.
+    %
+:- pred update_incomplete_parallelisation_cost(implicit_parallelism_info::in,
+    incomplete_parallelisation::in, incomplete_parallelisation::out,
+    maybe(float)::out) is det.
+
+update_incomplete_parallelisation_cost(Info, !IncompleteParallelisation,
+        MaybeCost) :-
+    calculate_parallel_cost(Info, !IncompleteParallelisation, CostData),
+    ( test_dependence(Info, CostData) ->
+        Cost = incomplete_parallelisation_cost(!.IncompleteParallelisation),
+        MaybeCost = yes(Cost)
+    ;
+        MaybeCost = no
+    ).
+
+:- func incomplete_parallelisation_cost(incomplete_parallelisation) = float.
+
+incomplete_parallelisation_cost(IncompleteParallelisation) = Cost :-
+    MaybeCostData = IncompleteParallelisation ^ ip_maybe_par_cost_data,
+    (
+        MaybeCostData = yes(CostData)
+    ;
+        MaybeCostData = no,
+        unexpected($module, $pred,
+            "incomplete parallelisation has no cost data")
+    ),
+    IncompleteMetrics = CostData ^ pcd_par_exec_metrics,
+    FullMetrics = finalise_parallel_exec_metrics(IncompleteMetrics),
+    Cost = full_parallelisation_metrics_cost(FullMetrics).
+
+    % The objective function for the branch and bound search.
+    % This is ParTime + ParOverheads * 2. That is we are willing to pay
+    % 1 unit of parallel overheads to get a 2 unit improvement
+    % of parallel execution time.
+    %
+    % XXX This looks wrong, for two reasons. First, it would be simpler
+    % and faster to just multiply the costs of all the overheads by 2.
+    % Second, the fudge factor should be configurable.
+    %
+:- func full_parallelisation_metrics_cost(parallel_exec_metrics) = float.
+
+full_parallelisation_metrics_cost(FullMetrics) = Cost :-
+    Cost = FullMetrics ^ pem_par_time +
+        parallel_exec_metrics_get_overheads(FullMetrics) * 2.0.
+
+:- func full_parallelisation_cost(full_parallelisation) = float.
+
+full_parallelisation_cost(FullParallelisation) = Cost :-
+    FullMetrics = FullParallelisation ^ fp_par_exec_metrics,
+    Cost = full_parallelisation_metrics_cost(FullMetrics).
+
 %----------------------------------------------------------------------------%
 
-:- pred add_one_goal_into_first_par_conj(incomplete_parallelisation::in,
-    incomplete_parallelisation::out) is det.
+% XXX
+% :- semipure pred add_goals_into_first_par_conj(
+%     bnb_state(full_parallelisation)::in,
+%     incomplete_parallelisation::in, incomplete_parallelisation::out) is multi.
+%
+% add_goals_into_first_par_conj(BNBState, !Parallelisation) :-
+%     FirstGoal0 = !.Parallelisation ^ ip_first_par_goal,
+%     (
+%         FirstGoal0 > 0,
+%         Goals = !.Parallelisation ^ ip_goals,
+%         Goal = lookup(Goals, FirstGoal0 - 1),
+%         can_parallelise_goal(Goal),
+%
+%         % There are goals before the parallel conjunction that can be included
+%         % in the parallel conjunction.
+%         add_one_goal_into_first_par_conj(!Parallelisation),
+%         semipure test_parallelisation(BNBState, !Parallelisation),
+%         semipure add_goals_into_first_par_conj(BNBState, !Parallelisation)
+%     ;
+%         true
+%     ).
+%
+% :- semipure pred add_goals_into_last_par_conj(
+%     bnb_state(full_parallelisation)::in,
+%     incomplete_parallelisation::in, incomplete_parallelisation::out) is multi.
+%
+% add_goals_into_last_par_conj(BNBState, !Parallelisation) :-
+%     NumGoals = ip_get_num_goals(!.Parallelisation),
+%     LastParGoal = !.Parallelisation ^ ip_last_par_goal,
+%     (
+%         LastParGoal < NumGoals - 1,
+%         Goals = !.Parallelisation ^ ip_goals,
+%         Goal = lookup(Goals, LastParGoal + 1),
+%         can_parallelise_goal(Goal),
+%
+%         % Try to move a goal from after the parallelisation into the
+%         % parallelisation.
+%         add_one_goal_into_last_par_conj(!Parallelisation),
+%         semipure test_parallelisation(BNBState, !Parallelisation),
+%         semipure add_goals_into_last_par_conj(BNBState, !Parallelisation)
+%     ;
+%         true
+%     ).
 
 %----------------------------------------------------------------------------%
 
-:- pred add_one_goal_into_last_par_conj(incomplete_parallelisation::in,
+:- pred add_one_goal_into_first_par_conj(incomplete_parallelisation::in,
     incomplete_parallelisation::out) is det.
 
 add_one_goal_into_first_par_conj(!Parallelisation) :-
@@ -769,6 +915,9 @@ add_one_goal_into_first_par_conj(!Parall
     !Parallelisation ^ ip_maybe_goals_before_cost := no,
     !Parallelisation ^ ip_maybe_par_cost_data := no.
 
+:- pred add_one_goal_into_last_par_conj(incomplete_parallelisation::in,
+    incomplete_parallelisation::out) is det.
+
 add_one_goal_into_last_par_conj(!Parallelisation) :-
     LastGoal0 = !.Parallelisation ^ ip_last_par_goal,
     LastGoal = LastGoal0 + 1,
Index: deep_profiler/autopar_types.m
===================================================================
RCS file: /home/mercury1/repository/mercury/deep_profiler/autopar_types.m,v
retrieving revision 1.3
diff -u -p -b -r1.3 autopar_types.m
--- deep_profiler/autopar_types.m	30 Mar 2012 04:05:11 -0000	1.3
+++ deep_profiler/autopar_types.m	4 Apr 2012 00:37:42 -0000
@@ -174,8 +174,6 @@
 
 :- type incomplete_parallelisation
     --->    incomplete_parallelisation(
-                ip_info                     :: implicit_parallelism_info,
-
                 ip_goals                    :: array(pard_goal_detail),
 
                 % The index of the first goal in the parallelised goals,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 489 bytes
Desc: Digital signature
URL: <http://lists.mercurylang.org/archives/reviews/attachments/20120404/75cb3f85/attachment.sig>
Previous message: [m-rev.] for review: try_and_insert
Next message: [m-rev.] diff: fix nondet stack exhaustion problem
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]
More information about the reviews mailing list