%---------------------------------------------------------------------------% % vim: ft=mercury ts=4 sw=4 et %---------------------------------------------------------------------------% % Copyright (C) 2011-2012 The University of Melbourne. % Copyright (C) 2014-2015, 2017, 2019, 2022 The Mercury team. % This file may only be copied under the terms of the GNU General % Public License - see the file COPYING in the Mercury distribution. %---------------------------------------------------------------------------% % % File: autopar_costs.m % Authors: pbone, zs. % % This module contains the code for computing costs of goals, as well as % costs up to the time of the production or first consumption of a variable. % %---------------------------------------------------------------------------% :- module mdprof_fb.automatic_parallelism.autopar_costs. :- interface. :- import_module mdbcomp. :- import_module mdbcomp.goal_path. :- import_module mdbcomp.program_representation. :- import_module mdprof_fb.automatic_parallelism.autopar_types. :- import_module measurements. :- import_module report. :- import_module var_use_analysis. :- import_module lazy. :- import_module list. :- import_module map. :- import_module maybe. %---------------------------------------------------------------------------% :- pred conj_calc_cost(list(pard_goal_detail)::in, int::in, goal_cost_csq::out) is det. :- pred disj_calc_cost(detism_rep::in, list(pard_goal_detail)::in, int::in, goal_cost_csq::out) is det. :- pred switch_calc_cost(list(case_rep(pard_goal_detail_annotation))::in, int::in, goal_cost_csq::out) is det. :- pred ite_calc_cost(pard_goal_detail::in, pard_goal_detail::in, pard_goal_detail::in, goal_cost_csq::out) is det. %---------------------------------------------------------------------------% :- pred atomic_goal_build_use_map(atomic_goal_rep::in, reverse_goal_path::in, implicit_parallelism_info::in, var_use_type::in, var_rep::in, map(var_rep, lazy(var_use_info))::in, map(var_rep, lazy(var_use_info))::out) is det. %---------------------------------------------------------------------------% :- pred implicit_par_info_intermodule_var_use(implicit_parallelism_info::in, intermodule_var_use::out) is det. %---------------------------------------------------------------------------% :- pred recursion_type_get_interesting_parallelisation_depth( recursion_type, maybe(recursion_depth)). :- mode recursion_type_get_interesting_parallelisation_depth( in(recursion_type_known_costs), out(maybe_yes(ground))) is det. :- mode recursion_type_get_interesting_parallelisation_depth( in, out) is det. %---------------------------------------------------------------------------% %---------------------------------------------------------------------------% :- implementation. :- import_module analysis_utils. :- import_module coverage. :- import_module mdbcomp.feedback. :- import_module mdbcomp.feedback.automatic_parallelism. :- import_module message. :- import_module profile. :- import_module program_representation_utils. :- import_module bool. :- import_module cord. :- import_module float. :- import_module int. :- import_module io. :- import_module require. :- import_module set. :- import_module string. %---------------------------------------------------------------------------% conj_calc_cost([], Calls, simple_goal_cost(Calls)). conj_calc_cost([Conjunct | Conjuncts], _, Cost) :- Coverage = Conjunct ^ goal_annotation ^ pgd_coverage, get_coverage_after_det(Coverage, After), conj_calc_cost(Conjuncts, After, ConjunctsCost), ConjunctCost = Conjunct ^ goal_annotation ^ pgd_cost, Cost = add_goal_costs_seq(ConjunctCost, ConjunctsCost). disj_calc_cost(Detism, Disjs, Calls, Cost) :- Solutions = detism_get_solutions(Detism), ( ( Solutions = at_most_zero_rep ; Solutions = at_most_one_rep ), % This is a semidet or committed choice disjunction; % it has no backtracking. disj_calc_cost_semidet(Disjs, Calls, Cost) ; Solutions = at_most_many_rep, disj_calc_cost_nondet(Disjs, Calls, Cost) ). :- pred disj_calc_cost_semidet(list(pard_goal_detail)::in, int::in, goal_cost_csq::out) is det. disj_calc_cost_semidet([], Calls, simple_goal_cost(Calls)). disj_calc_cost_semidet([Disjunct | Disjuncts], _, Cost) :- Coverage = Disjunct ^ goal_annotation ^ pgd_coverage, get_coverage_before_and_after_det(Coverage, Before, After), ( if Before = 0 then % Avoid a divide by zero. Cost = dead_goal_cost else _Successes = After, Failures = Before - After, disj_calc_cost_semidet(Disjuncts, Failures, FailureCost), DisjunctCost = Disjunct ^ goal_annotation ^ pgd_cost, SuccessCost = atomic_goal_cost(After), BranchCost = add_goal_costs_branch(Before, FailureCost, SuccessCost), Cost = add_goal_costs_seq(DisjunctCost, BranchCost) ). :- pred disj_calc_cost_nondet(list(pard_goal_detail)::in, int::in, goal_cost_csq::out) is det. disj_calc_cost_nondet([], Calls, simple_goal_cost(Calls)). disj_calc_cost_nondet([Disjunct | Disjuncts], Calls, Cost) :- Coverage = Disjunct ^ goal_annotation ^ pgd_coverage, get_coverage_before_det(Coverage, Before), ( if Before = 0 then % Avoid a divide by zero. Cost = dead_goal_cost else % TODO: This is very approximate, it calculates the percall cost. % For nondet code we probably want the per-call and per-redo cost. disj_calc_cost_nondet(Disjuncts, Calls, DisjunctsCost), DisjunctCost = Disjunct ^ goal_annotation ^ pgd_cost, Cost = add_goal_costs_seq(DisjunctCost, DisjunctsCost) ). switch_calc_cost([], Calls, simple_goal_cost(Calls)). switch_calc_cost([Case | Cases], TotalCalls, Cost) :- ( if TotalCalls = 0 then % Avoid a divide by zero. Cost = dead_goal_cost else Coverage = Case ^ cr_case_goal ^ goal_annotation ^ pgd_coverage, get_coverage_before_det(Coverage, CaseCalls), switch_calc_cost(Cases, TotalCalls - CaseCalls, CasesCost), CaseCost = Case ^ cr_case_goal ^ goal_annotation ^ pgd_cost, Cost = add_goal_costs_branch(TotalCalls, CaseCost, CasesCost) ). ite_calc_cost(Cond, Then, Else, Cost) :- CondCost = Cond ^ goal_annotation ^ pgd_cost, ThenCost = Then ^ goal_annotation ^ pgd_cost, ElseCost = Else ^ goal_annotation ^ pgd_cost, Coverage = Cond ^ goal_annotation ^ pgd_coverage, get_coverage_before_det(Coverage, Before), ThenElseCost = add_goal_costs_branch(Before, ThenCost, ElseCost), Cost = add_goal_costs_seq(CondCost, ThenElseCost). :- func simple_goal_cost(int) = goal_cost_csq. simple_goal_cost(Calls) = Cost :- ( if Calls = 0 then Cost = dead_goal_cost else Cost = atomic_goal_cost(Calls) ). %---------------------------------------------------------------------------% atomic_goal_build_use_map(AtomicGoal, RevGoalPath, Info, VarUseType, Var, !Map) :- atomic_goal_is_call(AtomicGoal, IsCall), ( IsCall = atomic_goal_is_trivial, ( VarUseType = var_use_consumption, CostUntilUse = 0.0 ; ( VarUseType = var_use_production ; VarUseType = var_use_other ), CostUntilUse = 1.0 ), LazyUse = val(var_use_info(CostUntilUse, 1.0, VarUseType)) ; IsCall = atomic_goal_is_call(Args), LazyUse = delay( (func) = compute_var_use_lazy(Info, RevGoalPath, Var, Args, VarUseType)) ), map.det_insert(Var, LazyUse, !Map). :- func compute_var_use_lazy(implicit_parallelism_info, reverse_goal_path, var_rep, list(var_rep), var_use_type) = var_use_info. compute_var_use_lazy(Info, RevGoalPath, Var, Args, VarUseType) = EarliestUse :- CliquePtr = Info ^ ipi_clique, map.lookup(Info ^ ipi_call_sites, RevGoalPath, CostAndCallee), ( if cost_and_callees_is_recursive(CliquePtr, CostAndCallee), map.search(Info ^ ipi_rec_call_sites, RevGoalPath, RecCost) then Cost = RecCost else Cost = CostAndCallee ^ cac_cost ), compute_var_use_lazy_arg(Info, Var, Args, CostAndCallee, Cost, VarUseType, Uses), ( VarUseType = var_use_consumption, ( Uses = [], unexpected($pred, "No uses") ; Uses = [FirstUse | OtherUses], find_earliest_use(FirstUse, OtherUses, EarliestUse) ) ; ( VarUseType = var_use_production ; VarUseType = var_use_other ), ( Uses = [], unexpected($pred, "No uses") ; Uses = [EarliestUse] ; Uses = [_, _ | _], unexpected($pred, "Too many uses") ) ). :- pred find_earliest_use(var_use_info::in, list(var_use_info)::in, var_use_info::out) is det. find_earliest_use(CurEarliest, [], CurEarliest). find_earliest_use(CurEarliest, [HeadVarUse | TailVarUses], Earliest) :- TimeCur = CurEarliest ^ vui_cost_until_use, TimeHead = HeadVarUse ^ vui_cost_until_use, ( if TimeCur < TimeHead then NextEarliest = CurEarliest else NextEarliest = HeadVarUse ), find_earliest_use(NextEarliest, TailVarUses, Earliest). :- pred compute_var_use_lazy_arg(implicit_parallelism_info::in, var_rep::in, list(var_rep)::in, cost_and_callees::in, cs_cost_csq::in, var_use_type::in, list(var_use_info)::out) is det. compute_var_use_lazy_arg(Info, Var, Args, CostAndCallee, Cost, VarUseType, Uses) :- ( if 0.0 < cs_cost_get_calls(Cost) then CostPercall = cs_cost_get_percall(Cost), list.member_indexes0(Var, Args, ArgNums), ( ArgNums = [_ | _], HigherOrder = CostAndCallee ^ cac_call_site_is_ho, ( HigherOrder = higher_order_call, % We cannot push signals or waits into higher order calls. pessimistic_var_use_info(VarUseType, CostPercall, Use), Uses = [Use] ; HigherOrder = first_order_call, ( if is_singleton(CostAndCallee ^ cac_callees, CalleePrime) then Callee = CalleePrime else unexpected($pred, "first-order call site has wrong number of CSDs") ), CSDPtr = Callee ^ c_csd, RecursionType = Info ^ ipi_recursion_type, recursion_type_get_interesting_parallelisation_depth( RecursionType, MaybeCurDepth), list.map( compute_var_use_2(Info, RecursionType, MaybeCurDepth, VarUseType, CostPercall, CSDPtr), ArgNums, Uses0), list.sort_and_remove_dups(Uses0, Uses) ) ; ArgNums = [], ( if VarUseType = var_use_consumption then Uses = [var_use_info(0.0, CostPercall, VarUseType)] else unexpected($pred, "Var use type most be consumption if " ++ "\\+ member(Var, Args)") ) ) else % This call site is never called. pessimistic_var_use_info(VarUseType, 0.0, Use), Uses = [Use] ). :- pred compute_var_use_2(implicit_parallelism_info::in, recursion_type::in, maybe(recursion_depth)::in, var_use_type::in, float::in, call_site_dynamic_ptr::in, int::in, var_use_info::out) is det. compute_var_use_2(Info, RecursionType, MaybeCurDepth, VarUseType, Cost, CSDPtr, ArgNum, Use) :- Deep = Info ^ ipi_deep, CliquePtr = Info ^ ipi_clique, implicit_par_info_intermodule_var_use(Info, FollowCallsAcrossModules), VarUseOptions = var_use_options(Deep, FollowCallsAcrossModules, VarUseType), get_call_site_dynamic_var_use_info_rec_level(CliquePtr, CSDPtr, ArgNum, RecursionType, MaybeCurDepth, Cost, set.init, VarUseOptions, MaybeUse), ( MaybeUse = ok(Use) ; MaybeUse = error(Error), pessimistic_var_use_info(VarUseType, Cost, Use), append_message(pl_csd(CSDPtr), warning_cannot_compute_first_use_time(Error), cord.empty, Messages), trace [io(!IO)] ( stderr_stream(Stderr, !IO), write_out_messages(Stderr, Messages, !IO) ) ). %---------------------------------------------------------------------------% implicit_par_info_intermodule_var_use(Info, FollowCallsAcrossModules) :- IntermoduleVarUse = Info ^ ipi_opts ^ cpcp_intermodule_var_use, ( IntermoduleVarUse = yes, FollowCallsAcrossModules = follow_any_call ; IntermoduleVarUse = no, ProcLabel = Info ^ ipi_proc_label, ( ProcLabel = str_ordinary_proc_label(_, _, Module, _, _, _) ; ProcLabel = str_special_proc_label(_, _, Module, _, _, _) ), FollowCallsAcrossModules = follow_calls_into_module(Module) ). %---------------------------------------------------------------------------% recursion_type_get_interesting_parallelisation_depth(RecursionType, MaybeDepth) :- ( RecursionType = rt_not_recursive, MaybeDepth = yes(recursion_depth_from_float(0.0)) ; RecursionType = rt_single(_, _, _DepthF, _, _), % The interesting recursion depth is at the bottom of the recursion, if % we can't parallelise here then there's no point parallelising the % loop in general. % XXX: Update metrics to understand that this is a loop. MaybeDepth = yes(recursion_depth_from_float(2.0)) ; ( RecursionType = rt_divide_and_conquer(_, _) ; RecursionType = rt_mutual_recursion(_) ; RecursionType = rt_other(_) ; RecursionType = rt_errors(_) ), MaybeDepth = no ). %---------------------------------------------------------------------------% :- end_module autopar_costs. %---------------------------------------------------------------------------%