Use deterministic code for the branch and bound search.

deep_profiler/autopar_find_best_par.m:
    Use deterministic code for the branch and bound search.

    Conform to changes in autopar_types.m

deep_profiler/autopar_types.m:
    Remove the info field in the incomplete_parallelization structure.
This commit is contained in:
Paul Bone
2012-04-04 01:46:00 +00:00
parent 1725f301c9
commit 31f66b2a68
2 changed files with 389 additions and 242 deletions

View File

@@ -12,6 +12,9 @@
% This module contains the code for finding the best way to parallelize
% a given conjunction.
%
% The following compile-time flags may introduce trace goals:
% debug_branch_and_bound
%
%-----------------------------------------------------------------------------%
:- module mdprof_fb.automatic_parallelism.autopar_find_best_par.
@@ -46,18 +49,18 @@
:- implementation.
:- import_module branch_and_bound.
:- import_module mdbcomp.program_representation.
:- import_module mdprof_fb.automatic_parallelism.autopar_calc_overlap.
:- import_module mdprof_fb.automatic_parallelism.autopar_search_goals. % XXX
:- import_module measurements.
:- import_module array.
:- import_module benchmarking.
:- import_module digraph.
:- import_module float.
:- import_module io.
:- import_module int.
:- import_module map.
:- import_module pair.
:- import_module require.
:- import_module set.
:- import_module string.
@@ -400,23 +403,29 @@ find_best_parallelisation_complete_bnb(Info, Location, Algorithm,
io.flush_output(!IO)
),
branch_and_bound(
generate_parallelisations(Info, Algorithm, PreprocessedGoals),
parallelisation_get_objective_value,
Solutions, Profile),
promise_equivalent_solutions [GenParTime, EqualBestSolns, Profile] (
benchmark_det(
generate_parallelisations(Info, Algorithm),
PreprocessedGoals, EqualBestSolns - Profile, 1, GenParTime)
),
trace [compile_time(flag("debug_branch_and_bound")), io(!IO)] (
io.format("D: Solutions: %d\n",
[i(set.count(Solutions))], !IO),
io.format("D: Branch and bound profile: %s\n\n",
[i(list.length(EqualBestSolns))], !IO),
io.format("D: Branch and bound profile: %s\n",
[s(string(Profile))], !IO),
io.format("D: Time: %d ms\n\n",
[i(GenParTime)], !IO),
io.flush_output(!IO)
),
( set.remove_least(BestParallelisation, Solutions, _) ->
(
EqualBestSolns = [BestIncompleteParallelisation | _],
finalise_parallelisation(BestIncompleteParallelisation,
BestParallelisation),
MaybeBestParallelisation = yes(BestParallelisation)
;
% Solutions is empty.
EqualBestSolns = [],
ParalleliseDepConjs = Info ^ ipi_opts ^ cpcp_parallelise_dep_conjs,
(
ParalleliseDepConjs = parallelise_dep_conjs(_),
@@ -434,52 +443,211 @@ find_best_parallelisation_complete_bnb(Info, Location, Algorithm,
% Profiling information for an execution of the solver.
%
:- func parallelisation_get_objective_value(full_parallelisation) = float.
:- type bnb_profile
---> bnb_profile(
bnbp_incomplete_good_enough :: int,
bnbp_incomplete_not_good_enough :: int,
bnbp_complete_best_solution :: int,
bnbp_complete_equal_solution :: int,
bnbp_complete_worse_solution :: int,
bnbp_complete_non_solution :: int
).
parallelisation_get_objective_value(Parallelisation) = Value :-
Metrics = Parallelisation ^ fp_par_exec_metrics,
Value = Metrics ^ pem_par_time +
parallel_exec_metrics_get_overheads(Metrics) * 2.0.
% The equal best solutions found so far (if we have found some solutions),
% and the value of the objective function for these solutions.
% The objective function represents a cost, so we look for solutions
% with the smallest possible value of the objective function.
%
:- type best_solutions(T)
---> no_best_solutions
; best_solutions(
bs_solutions :: list(T),
bs_objective_value :: float
).
:- impure pred generate_parallelisations(implicit_parallelism_info::in,
:- pred generate_parallelisations(implicit_parallelism_info::in,
best_par_algorithm_simple::in, goals_for_parallelisation::in,
bnb_state(full_parallelisation)::in, full_parallelisation::out) is nondet.
pair(list(incomplete_parallelisation), bnb_profile)::out) is det.
generate_parallelisations(Info, Algorithm, GoalsForParallelisation,
BNBState, BestParallelisation) :-
some [!Parallelisation, !GoalGroups] (
start_building_parallelisation(Info, GoalsForParallelisation,
!:Parallelisation),
EqualBestSolns - FinalProfile) :-
some [!GoalGroups, !MaybeBestSolns, !Profile] (
start_building_parallelisation(GoalsForParallelisation,
IncompleteParallelisation0),
% Set the last scheduled goal to the goal at the end of the first
% group, popping the first group off the list. This initialises the
% parallelisation with the first goal group occurring first in the
% first parallel conjunction.
%
% We do this outside of the loop below because the first goal group
% will always be added to the first (initially empty) parallel
% conjunct; it does not make sense to have it start a new parallel
% conjunct.
!:GoalGroups = GoalsForParallelisation ^ gfp_groups,
start_first_par_conjunct(!GoalGroups, !Parallelisation),
impure generate_parallelisations_body(Info, BNBState, Algorithm,
!.GoalGroups, !Parallelisation),
( semipure should_expand_search(BNBState, Algorithm) ->
% Try to push goals into the first and last parallel conjuncts
% from outside the parallel conjunction.
semipure add_goals_into_first_par_conj(BNBState, !Parallelisation),
semipure add_goals_into_last_par_conj(BNBState, !Parallelisation)
(
!.GoalGroups = [],
unexpected($module, $pred, "no goal groups")
;
true
!.GoalGroups = [_],
unexpected($module, $pred, "only one goal group")
;
!.GoalGroups = [Group, _ | _],
!.GoalGroups = [_ | !:GoalGroups],
gg_get_details(Group, Index, Num, _),
LastScheduledGoal = Index + Num - 1,
IncompleteParallelisation1 =
IncompleteParallelisation0 ^ ip_last_scheduled_goal
:= LastScheduledGoal
),
finalise_parallelisation(!.Parallelisation, BestParallelisation)
),
semipure test_incomplete_solution(BNBState, BestParallelisation).
!:MaybeBestSolns = no_best_solutions,
!:Profile = bnb_profile(0, 0, 0, 0, 0, 0),
:- pred start_building_parallelisation(implicit_parallelism_info::in,
goals_for_parallelisation::in,
generate_parallelisations_loop(Info, Algorithm, !.GoalGroups,
IncompleteParallelisation1, !MaybeBestSolns, !Profile),
% XXX
% ( semipure should_expand_search(BNBState, Algorithm) ->
% % Try to push goals into the first and last parallel conjuncts
% % from outside the parallel conjunction.
% semipure add_goals_into_first_par_conj(BNBState, !Parallelisation),
% semipure add_goals_into_last_par_conj(BNBState, !Parallelisation)
% ;
% true
% ),
(
!.MaybeBestSolns = no_best_solutions,
EqualBestSolns = []
;
!.MaybeBestSolns = best_solutions(EqualBestSolns, _)
),
FinalProfile = !.Profile
).
:- pred generate_parallelisations_loop(implicit_parallelism_info::in,
best_par_algorithm_simple::in, list(goal_group(goal_classification))::in,
incomplete_parallelisation::in,
best_solutions(incomplete_parallelisation)::in,
best_solutions(incomplete_parallelisation)::out,
bnb_profile::in, bnb_profile::out) is det.
generate_parallelisations_loop(_, _, [],
!.IncompleteParallelisation, !MaybeBestSolns, !Profile) :-
% Verify that we have generated at least two parallel conjuncts.
( ip_get_num_parallel_conjuncts(!.IncompleteParallelisation) >= 2 ->
maybe_update_best_complete_parallelisation(!.IncompleteParallelisation,
!MaybeBestSolns, !Profile)
;
% This is not a solution, so do not try to update !MaybeBestSolns.
!Profile ^ bnbp_complete_non_solution :=
!.Profile ^ bnbp_complete_non_solution + 1
).
generate_parallelisations_loop(Info, Algorithm, [GoalGroup | GoalGroups],
!.IncompleteParallelisation, !MaybeBestSolns, !Profile) :-
LastScheduledGoal0 = !.IncompleteParallelisation ^ ip_last_scheduled_goal,
gg_get_details(GoalGroup, _Index, Num, _Classification),
LastScheduledGoal = LastScheduledGoal0 + Num,
some [!AddToLastParallelisation, !AddToNewParallelisation] (
!:AddToLastParallelisation = !.IncompleteParallelisation,
!:AddToNewParallelisation = !.IncompleteParallelisation,
% Consider adding this goal to the last parallel conjunct.
!AddToLastParallelisation ^ ip_last_scheduled_goal
:= LastScheduledGoal,
update_incomplete_parallelisation_cost(Info, !AddToLastParallelisation,
MaybeAddToLastCost),
% Consider putting this goal into a new parallel conjunct.
ParConjsRevLastGoal0 =
!.IncompleteParallelisation ^ ip_par_conjs_rev_last_goal,
ParConjsRevLastGoal = [LastScheduledGoal0 | ParConjsRevLastGoal0],
!AddToNewParallelisation ^ ip_par_conjs_rev_last_goal :=
ParConjsRevLastGoal,
!AddToNewParallelisation ^ ip_last_scheduled_goal := LastScheduledGoal,
update_incomplete_parallelisation_cost(Info, !AddToNewParallelisation,
MaybeAddToNewCost),
(
MaybeAddToLastCost = yes(AddToLastCost),
(
MaybeAddToNewCost = yes(AddToNewCost),
( AddToNewCost > AddToLastCost ->
% Adding to the last parallel conjunct is better.
Best0 = !.AddToLastParallelisation,
MaybeNextBest0 = yes(!.AddToNewParallelisation)
;
% Adding to a new parallel conjunct is better.
Best0 = !.AddToNewParallelisation,
MaybeNextBest0 = yes(!.AddToLastParallelisation)
)
;
MaybeAddToNewCost = no,
% Adding to the last parallel conjunct is the only option.
Best0 = !.AddToLastParallelisation,
MaybeNextBest0 = no
)
;
MaybeAddToLastCost = no,
% Adding to a new parallel conjunct is the only option.
Best0 = !.AddToNewParallelisation,
MaybeNextBest0 = no
)
),
% XXX: This ite could be simpler, and the algorithm would be closer to the
% one in the paper.
(
% Can we create an alternative branch here?
MaybeNextBest0 = yes(NextBest0),
% Should we create an alternative branch here?
should_expand_search(Algorithm, !.Profile)
->
% Create a branch.
incomplete_parallelisation_is_good_enough(Info, !.MaybeBestSolns,
Best0, Best, !Profile, BestGoodEnough),
(
BestGoodEnough = is_good_enough,
generate_parallelisations_loop(Info, Algorithm,
GoalGroups, Best, !MaybeBestSolns, !Profile)
;
BestGoodEnough = is_not_good_enough
),
incomplete_parallelisation_is_good_enough(Info, !.MaybeBestSolns,
NextBest0, NextBest, !Profile, NextBestGoodEnough),
(
NextBestGoodEnough = is_good_enough,
generate_parallelisations_loop(Info, Algorithm,
GoalGroups, NextBest, !MaybeBestSolns, !Profile)
;
NextBestGoodEnough = is_not_good_enough
)
;
incomplete_parallelisation_is_good_enough(Info, !.MaybeBestSolns,
Best0, Best, !Profile, BestGoodEnough),
(
BestGoodEnough = is_good_enough,
generate_parallelisations_loop(Info, Algorithm,
GoalGroups, Best, !MaybeBestSolns, !Profile)
;
BestGoodEnough = is_not_good_enough
)
).
:- pred start_building_parallelisation(goals_for_parallelisation::in,
incomplete_parallelisation::out) is det.
start_building_parallelisation(Info, PreprocessedGoals, Parallelisation) :-
start_building_parallelisation(PreprocessedGoals, Parallelisation) :-
GoalsArray = PreprocessedGoals ^ gfp_goals,
FirstParGoal = PreprocessedGoals ^ gfp_first_costly_goal,
LastParGoal = PreprocessedGoals ^ gfp_last_costly_goal,
NumCalls = PreprocessedGoals ^ gfp_num_calls,
DependencyGraphs = PreprocessedGoals ^ gfp_dependency_graphs,
Parallelisation = incomplete_parallelisation(Info, GoalsArray,
Parallelisation = incomplete_parallelisation(GoalsArray,
FirstParGoal, LastParGoal, FirstParGoal, [], NumCalls,
DependencyGraphs, no, no, no).
@@ -504,217 +672,102 @@ finalise_parallelisation(Incomplete, Best) :-
Metrics = finalise_parallel_exec_metrics(Metrics0),
par_conj_overlap_is_dependent(Overlap, IsDependent),
ParConjs = ip_get_par_conjs(Incomplete),
Best = fp_parallel_execution(GoalsBefore, ParConjs,
GoalsAfter, IsDependent, Metrics).
%----------------------------------------------------------------------------%
:- semipure pred add_goals_into_first_par_conj(
bnb_state(full_parallelisation)::in,
incomplete_parallelisation::in, incomplete_parallelisation::out) is multi.
add_goals_into_first_par_conj(BNBState, !Parallelisation) :-
FirstGoal0 = !.Parallelisation ^ ip_first_par_goal,
(
FirstGoal0 > 0,
Goals = !.Parallelisation ^ ip_goals,
Goal = lookup(Goals, FirstGoal0 - 1),
can_parallelise_goal(Goal),
% There are goals before the parallel conjunction that can be included
% in the parallel conjunction.
add_one_goal_into_first_par_conj(!Parallelisation),
semipure test_parallelisation(BNBState, !Parallelisation),
semipure add_goals_into_first_par_conj(BNBState, !Parallelisation)
;
true
).
:- semipure pred add_goals_into_last_par_conj(
bnb_state(full_parallelisation)::in,
incomplete_parallelisation::in, incomplete_parallelisation::out) is multi.
add_goals_into_last_par_conj(BNBState, !Parallelisation) :-
NumGoals = ip_get_num_goals(!.Parallelisation),
LastParGoal = !.Parallelisation ^ ip_last_par_goal,
(
LastParGoal < NumGoals - 1,
Goals = !.Parallelisation ^ ip_goals,
Goal = lookup(Goals, LastParGoal + 1),
can_parallelise_goal(Goal),
% Try to move a goal from after the parallelisation into the
% parallelisation.
add_one_goal_into_last_par_conj(!Parallelisation),
semipure test_parallelisation(BNBState, !Parallelisation),
semipure add_goals_into_last_par_conj(BNBState, !Parallelisation)
;
true
).
% Set the last scheduled goal to the goal at the end of the first group,
% popping the first group off the list. This initialises the
% parallelisation with the first goal group occurring first in the first
% parallel conjunction.
%
% This is done outside of the loop below since the first goal group will
% always be added to the first (initially empty) parallel conjunction.
%
:- pred start_first_par_conjunct(
list(goal_group(T))::in, list(goal_group(T))::out,
incomplete_parallelisation::in, incomplete_parallelisation::out) is det.
start_first_par_conjunct(!GoalGroups, !Parallelisation) :-
(
!.GoalGroups = [],
unexpected($module, $pred, "no goal groups")
;
!.GoalGroups = [Group | !:GoalGroups],
gg_get_details(Group, Index, Num, _),
LastScheduledGoal = Index + Num - 1,
!Parallelisation ^ ip_last_scheduled_goal := LastScheduledGoal
).
:- impure pred generate_parallelisations_body(implicit_parallelism_info::in,
bnb_state(full_parallelisation)::in, best_par_algorithm_simple::in,
list(goal_group(goal_classification))::in,
incomplete_parallelisation::in, incomplete_parallelisation::out) is nondet.
generate_parallelisations_body(_, _, _, [], !Parallelisation) :-
% Verify that we've generated at least two parallel conjuncts.
ip_get_num_parallel_conjuncts(!.Parallelisation) >= 2.
generate_parallelisations_body(Info, BNBState, Algorithm,
[GoalGroup | GoalGroups], !Parallelisation) :-
LastScheduledGoal0 = !.Parallelisation ^ ip_last_scheduled_goal,
gg_get_details(GoalGroup, _Index, Num, _Classification),
LastScheduledGoal = LastScheduledGoal0 + Num,
some [!AddToLastParallelisation, !AddToNewParallelisation] (
!:AddToLastParallelisation = !.Parallelisation,
!:AddToNewParallelisation = !.Parallelisation,
% Consider adding this goal to the last parallel conjunct.
!AddToLastParallelisation ^ ip_last_scheduled_goal
:= LastScheduledGoal,
score_parallelisation(BNBState, MaybeAddToLastScore,
!AddToLastParallelisation),
% Consider putting this goal into a new parallel conjunct.
ParConjsRevLastGoal0 = !.Parallelisation ^ ip_par_conjs_rev_last_goal,
ParConjsRevLastGoal = [LastScheduledGoal0 | ParConjsRevLastGoal0],
!AddToNewParallelisation ^ ip_par_conjs_rev_last_goal :=
ParConjsRevLastGoal,
!AddToNewParallelisation ^ ip_last_scheduled_goal := LastScheduledGoal,
score_parallelisation(BNBState, MaybeAddToNewScore,
!AddToNewParallelisation),
(
MaybeAddToLastScore = yes(AddToLastScore),
(
MaybeAddToNewScore = yes(AddToNewScore),
(
% Smaller scores are better.
AddToNewScore > AddToLastScore
->
% Adding to the last parallel conjunct is better.
BestOption = !.AddToLastParallelisation,
MaybeSndBestOption = yes(!.AddToNewParallelisation)
;
% Adding to a new parallel conjunct is better.
BestOption = !.AddToNewParallelisation,
MaybeSndBestOption = yes(!.AddToLastParallelisation)
)
;
MaybeAddToNewScore = no,
% Adding to the last parallel conjunct is the only option.
BestOption = !.AddToLastParallelisation,
MaybeSndBestOption = no
)
;
MaybeAddToLastScore = no,
% Adding to a new parallel conjunct is the only option.
BestOption = !.AddToNewParallelisation,
MaybeSndBestOption = no
)
),
(
MaybeSndBestOption = no,
!:Parallelisation = BestOption
;
MaybeSndBestOption = yes(SndBestOption),
(
% Should an alternative branch be created here?
semipure should_expand_search(BNBState, Algorithm)
->
% Create a branch.
impure add_alternative(BNBState),
% This tries the leftmost disjunct first, so try the best option
% there.
(
!:Parallelisation = BestOption
;
impure close_alternative(BNBState),
!:Parallelisation = SndBestOption
)
;
!:Parallelisation = BestOption
)
),
semipure test_parallelisation(BNBState, !Parallelisation),
impure generate_parallelisations_body(Info, BNBState, Algorithm,
GoalGroups, !Parallelisation).
Best = fp_parallel_execution(GoalsBefore, ParConjs, GoalsAfter,
IsDependent, Metrics).
% True if we should expand the search for parallelisation alternatives by
% creating a choice point.
%
:- semipure pred should_expand_search(bnb_state(T)::in,
best_par_algorithm_simple::in) is semidet.
:- pred should_expand_search(best_par_algorithm_simple::in, bnb_profile::in)
is semidet.
should_expand_search(BNBState, Algorithm) :-
should_expand_search(Algorithm, Profile) :-
Algorithm = bpas_complete(MaybeLimit),
(
MaybeLimit = yes(Limit),
semipure num_alternatives(BNBState, Open, Closed),
Open + Closed < Limit
NumIncompleteTests =
Profile ^ bnbp_incomplete_not_good_enough +
Profile ^ bnbp_incomplete_good_enough,
NumIncompleteTests < Limit
;
MaybeLimit = no
).
% Test the parallelisation against the best one known to the branch and
% bound solver.
%
:- semipure pred test_parallelisation(bnb_state(full_parallelisation)::in,
incomplete_parallelisation::in, incomplete_parallelisation::out)
is semidet.
:- pred maybe_update_best_complete_parallelisation(
incomplete_parallelisation::in,
best_solutions(incomplete_parallelisation)::in,
best_solutions(incomplete_parallelisation)::out,
bnb_profile::in, bnb_profile::out) is det.
test_parallelisation(BNBState, !Parallelisation) :-
Info = !.Parallelisation ^ ip_info,
calculate_parallel_cost(Info, !Parallelisation, CostData),
test_dependence(Info, CostData),
% XXX: We shouldn't need to finalize the parallelisation before testing it.
% This is a limitation of the branch and bound module.
finalise_parallelisation(!.Parallelisation, TestParallelisation),
semipure test_incomplete_solution(BNBState, TestParallelisation).
% Test the parallelisation against the best one known to the branch and
% bound solver.
%
:- pred score_parallelisation(bnb_state(full_parallelisation)::in,
maybe(float)::out,
incomplete_parallelisation::in, incomplete_parallelisation::out) is det.
score_parallelisation(BNBState, MaybeScore, !Parallelisation) :-
Info = !.Parallelisation ^ ip_info,
calculate_parallel_cost(Info, !Parallelisation, CostData),
( test_dependence(Info, CostData) ->
finalise_parallelisation(!.Parallelisation, TestParallelisation),
score_solution(BNBState, TestParallelisation, Score),
MaybeScore = yes(Score)
maybe_update_best_complete_parallelisation(CurSoln,
MaybeBestSolns0, MaybeBestSolns, !Profile) :-
% We don't use state variable syntax for MaybeBestSolns so that mmc can
% check that we've explicitly provided a value for MaybeBestSolns.
CurSolnCost = incomplete_parallelisation_cost(CurSoln),
(
MaybeBestSolns0 = no_best_solutions,
MaybeBestSolns = best_solutions([CurSoln], CurSolnCost),
!Profile ^ bnbp_complete_best_solution :=
!.Profile ^ bnbp_complete_best_solution + 1
;
MaybeScore = no
MaybeBestSolns0 = best_solutions(BestSolns0, BestCost0),
( CurSolnCost < BestCost0 ->
MaybeBestSolns = best_solutions([CurSoln], CurSolnCost),
!Profile ^ bnbp_complete_best_solution :=
!.Profile ^ bnbp_complete_best_solution + 1
; CurSolnCost = BestCost0 ->
BestSolns = [CurSoln | BestSolns0],
MaybeBestSolns = best_solutions(BestSolns, BestCost0),
!Profile ^ bnbp_complete_equal_solution :=
!.Profile ^ bnbp_complete_equal_solution + 1
;
% Do not update !MaybeBestSolns.
MaybeBestSolns = MaybeBestSolns0,
!Profile ^ bnbp_complete_worse_solution :=
!.Profile ^ bnbp_complete_worse_solution + 1
)
).
:- type is_good_enough
---> is_not_good_enough
; is_good_enough.
% Test the parallelisation against the best one known to the branch and
% bound solver.
%
:- pred incomplete_parallelisation_is_good_enough(
implicit_parallelism_info::in,
best_solutions(incomplete_parallelisation)::in,
incomplete_parallelisation::in, incomplete_parallelisation::out,
bnb_profile::in, bnb_profile::out, is_good_enough::out) is det.
incomplete_parallelisation_is_good_enough(Info, MaybeBestSolns,
!IncompleteParallelisation, !Profile, GoodEnough) :-
calculate_parallel_cost(Info, !IncompleteParallelisation, CostData),
( test_dependence(Info, CostData) ->
(
MaybeBestSolns = no_best_solutions,
!Profile ^ bnbp_incomplete_good_enough :=
!.Profile ^ bnbp_incomplete_good_enough + 1,
GoodEnough = is_good_enough
;
MaybeBestSolns = best_solutions(_, BestSolnCost),
CurIncompleteCost =
incomplete_parallelisation_cost(!.IncompleteParallelisation),
( CurIncompleteCost > BestSolnCost ->
!Profile ^ bnbp_incomplete_not_good_enough :=
!.Profile ^ bnbp_incomplete_not_good_enough + 1,
GoodEnough = is_not_good_enough
;
!Profile ^ bnbp_incomplete_good_enough :=
!.Profile ^ bnbp_incomplete_good_enough + 1,
GoodEnough = is_good_enough
)
)
;
!Profile ^ bnbp_incomplete_not_good_enough :=
!.Profile ^ bnbp_incomplete_not_good_enough + 1,
GoodEnough = is_not_good_enough
).
% Test that the parallelisation includes dependent parallelism
@@ -752,16 +805,109 @@ par_conj_overlap_is_dependent(peo_conjunction(Left, _, VarSet0), IsDependent) :-
)
).
% Compute the cost of the parallelisation.
%
:- pred update_incomplete_parallelisation_cost(implicit_parallelism_info::in,
incomplete_parallelisation::in, incomplete_parallelisation::out,
maybe(float)::out) is det.
update_incomplete_parallelisation_cost(Info, !IncompleteParallelisation,
MaybeCost) :-
calculate_parallel_cost(Info, !IncompleteParallelisation, CostData),
( test_dependence(Info, CostData) ->
Cost = incomplete_parallelisation_cost(!.IncompleteParallelisation),
MaybeCost = yes(Cost)
;
MaybeCost = no
).
:- func incomplete_parallelisation_cost(incomplete_parallelisation) = float.
incomplete_parallelisation_cost(IncompleteParallelisation) = Cost :-
MaybeCostData = IncompleteParallelisation ^ ip_maybe_par_cost_data,
(
MaybeCostData = yes(CostData)
;
MaybeCostData = no,
unexpected($module, $pred,
"incomplete parallelisation has no cost data")
),
IncompleteMetrics = CostData ^ pcd_par_exec_metrics,
FullMetrics = finalise_parallel_exec_metrics(IncompleteMetrics),
Cost = full_parallelisation_metrics_cost(FullMetrics).
% The objective function for the branch and bound search.
% This is ParTime + ParOverheads * 2. That is we are willing to pay
% 1 unit of parallel overheads to get a 2 unit improvement
% of parallel execution time.
%
% XXX This looks wrong, for two reasons. First, it would be simpler
% and faster to just multiply the costs of all the overheads by 2.
% Second, the fudge factor should be configurable.
%
:- func full_parallelisation_metrics_cost(parallel_exec_metrics) = float.
full_parallelisation_metrics_cost(FullMetrics) = Cost :-
Cost = FullMetrics ^ pem_par_time +
parallel_exec_metrics_get_overheads(FullMetrics) * 2.0.
:- func full_parallelisation_cost(full_parallelisation) = float.
full_parallelisation_cost(FullParallelisation) = Cost :-
FullMetrics = FullParallelisation ^ fp_par_exec_metrics,
Cost = full_parallelisation_metrics_cost(FullMetrics).
%----------------------------------------------------------------------------%
% XXX
% :- semipure pred add_goals_into_first_par_conj(
% bnb_state(full_parallelisation)::in,
% incomplete_parallelisation::in, incomplete_parallelisation::out) is multi.
%
% add_goals_into_first_par_conj(BNBState, !Parallelisation) :-
% FirstGoal0 = !.Parallelisation ^ ip_first_par_goal,
% (
% FirstGoal0 > 0,
% Goals = !.Parallelisation ^ ip_goals,
% Goal = lookup(Goals, FirstGoal0 - 1),
% can_parallelise_goal(Goal),
%
% % There are goals before the parallel conjunction that can be included
% % in the parallel conjunction.
% add_one_goal_into_first_par_conj(!Parallelisation),
% semipure test_parallelisation(BNBState, !Parallelisation),
% semipure add_goals_into_first_par_conj(BNBState, !Parallelisation)
% ;
% true
% ).
%
% :- semipure pred add_goals_into_last_par_conj(
% bnb_state(full_parallelisation)::in,
% incomplete_parallelisation::in, incomplete_parallelisation::out) is multi.
%
% add_goals_into_last_par_conj(BNBState, !Parallelisation) :-
% NumGoals = ip_get_num_goals(!.Parallelisation),
% LastParGoal = !.Parallelisation ^ ip_last_par_goal,
% (
% LastParGoal < NumGoals - 1,
% Goals = !.Parallelisation ^ ip_goals,
% Goal = lookup(Goals, LastParGoal + 1),
% can_parallelise_goal(Goal),
%
% % Try to move a goal from after the parallelisation into the
% % parallelisation.
% add_one_goal_into_last_par_conj(!Parallelisation),
% semipure test_parallelisation(BNBState, !Parallelisation),
% semipure add_goals_into_last_par_conj(BNBState, !Parallelisation)
% ;
% true
% ).
%----------------------------------------------------------------------------%
:- pred add_one_goal_into_first_par_conj(incomplete_parallelisation::in,
incomplete_parallelisation::out) is det.
%----------------------------------------------------------------------------%
:- pred add_one_goal_into_last_par_conj(incomplete_parallelisation::in,
incomplete_parallelisation::out) is det.
add_one_goal_into_first_par_conj(!Parallelisation) :-
FirstGoal0 = !.Parallelisation ^ ip_first_par_goal,
FirstGoal = FirstGoal0 - 1,
@@ -769,6 +915,9 @@ add_one_goal_into_first_par_conj(!Parallelisation) :-
!Parallelisation ^ ip_maybe_goals_before_cost := no,
!Parallelisation ^ ip_maybe_par_cost_data := no.
:- pred add_one_goal_into_last_par_conj(incomplete_parallelisation::in,
incomplete_parallelisation::out) is det.
add_one_goal_into_last_par_conj(!Parallelisation) :-
LastGoal0 = !.Parallelisation ^ ip_last_par_goal,
LastGoal = LastGoal0 + 1,

View File

@@ -174,8 +174,6 @@
:- type incomplete_parallelisation
---> incomplete_parallelisation(
ip_info :: implicit_parallelism_info,
ip_goals :: array(pard_goal_detail),
% The index of the first goal in the parallelised goals,