Files
mercury/deep_profiler/measurements.m
Zoltan Somogyi 429652d68a Clarify the code of add_goal_costs_seq.
And add documentation and an XXX.
2020-10-16 07:26:38 +11:00

1259 lines
43 KiB
Mathematica

%---------------------------------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et
%---------------------------------------------------------------------------%
% Copyright (C) 2001, 2004-2006, 2008-2012 The University of Melbourne.
% This file may only be copied under the terms of the GNU General
% Public License - see the file COPYING in the Mercury distribution.
%---------------------------------------------------------------------------%
%
% Authors: conway, zs.
%
% This module defines the data structures that store deep profiling
% measurements and the operations on them.
%
%---------------------------------------------------------------------------%
:- module measurements.
:- interface.
:- import_module array.
:- import_module list.
:- import_module maybe.
:- import_module mdbcomp.
:- import_module mdbcomp.feedback.
:- import_module mdbcomp.feedback.automatic_parallelism.
:- import_module measurement_units.
%---------------------------------------------------------------------------%
:- type own_prof_info.
:- type inherit_prof_info.
:- func calls(own_prof_info) = int.
:- func exits(own_prof_info) = int.
:- func fails(own_prof_info) = int.
:- func redos(own_prof_info) = int.
:- func excps(own_prof_info) = int.
:- func quanta(own_prof_info) = int.
:- func callseqs(own_prof_info) = int.
:- func allocs(own_prof_info) = int.
:- func words(own_prof_info) = int.
:- func zero_own_prof_info = own_prof_info.
:- pred is_zero_own_prof_info(own_prof_info::in) is semidet.
:- func inherit_quanta(inherit_prof_info) = int.
:- func inherit_callseqs(inherit_prof_info) = int.
:- func inherit_allocs(inherit_prof_info) = int.
:- func inherit_words(inherit_prof_info) = int.
:- func zero_inherit_prof_info = inherit_prof_info.
:- pred is_zero_inherit_prof_info(inherit_prof_info::in) is semidet.
:- func add_inherit_to_inherit(inherit_prof_info, inherit_prof_info)
= inherit_prof_info.
:- func add_own_to_inherit(own_prof_info, inherit_prof_info)
= inherit_prof_info.
:- func subtract_own_from_inherit(own_prof_info, inherit_prof_info)
= inherit_prof_info.
:- func subtract_inherit_from_inherit(inherit_prof_info, inherit_prof_info)
= inherit_prof_info.
:- func add_inherit_to_own(inherit_prof_info, own_prof_info) = own_prof_info.
:- func add_own_to_own(own_prof_info, own_prof_info) = own_prof_info.
:- func sum_own_infos(list(own_prof_info)) = own_prof_info.
:- func sum_inherit_infos(list(inherit_prof_info)) = inherit_prof_info.
:- func compress_profile(int, int, int, int, int, int, int, int)
= own_prof_info.
:- func compress_profile(own_prof_info) = own_prof_info.
:- pred decompress_profile(own_prof_info::in, int::out, int::out, int::out,
int::out, int::out, int::out, int::out, int::out, int::out) is det.
:- func own_to_string(own_prof_info) = string.
:- type is_active
---> is_active
; is_not_active.
% Tests if this profiling information represents an entity in the program
% that was inactive during the profiling run, e.g. a module or procedure
% that has had no calls made to it.
%
:- func compute_is_active(own_prof_info) = is_active.
%---------------------------------------------------------------------------%
:- type proc_cost_csq.
:- type cs_cost_csq.
% build_proc_cost_csq(NRCalls, RCalls, TotalCost) = ParentCost.
%
% NRCalls: the number of non-recursive calls into this context.
% RCalls: the number of recursive (inc mutually-recursive) calls into this
% context.
%
:- func build_proc_cost_csq(int, int, int) = proc_cost_csq.
% build_cs_cost_csq(Calls, TotalCost) = CallSiteCost.
%
:- func build_cs_cost_csq(int, float) = cs_cost_csq.
% build_cs_cost_csq_percall(Calls, PercallCost) = CallSiteCost.
%
:- func build_cs_cost_csq_percall(float, float) = cs_cost_csq.
% Call site cost structure that has a zero cost and zero calls.
%
:- func zero_cs_cost = cs_cost_csq.
% Retrieve the total cost of this context.
%
:- func proc_cost_get_total(proc_cost_csq) = float.
% Retrieve the number of calls made to this procedure.
%
:- func proc_cost_get_calls_total(proc_cost_csq) = int.
:- func proc_cost_get_calls_nonrec(proc_cost_csq) = int.
:- func proc_cost_get_calls_rec(proc_cost_csq) = int.
% Retrieve the total cost of a call site.
%
:- func cs_cost_get_total(cs_cost_csq) = float.
% Retrieve the per-call cost of a call site.
% Note that this may throw an exception if the number of calls is zero.
%
:- func cs_cost_get_percall(cs_cost_csq) = float.
% Retrieve the number of calls made from this call site.
%
:- func cs_cost_get_calls(cs_cost_csq) = float.
% Convert a call site cost to a proc cost.
%
:- pred cs_cost_to_proc_cost(cs_cost_csq::in, int::in,
proc_cost_csq::out) is det.
:- func cs_cost_per_proc_call(cs_cost_csq, proc_cost_csq) = cs_cost_csq.
%---------------------------------------------------------------------------%
% The cost of a goal.
%
:- type goal_cost_csq.
% atomic_goal_cost(Calls) = Cost.
%
% Cost is the cost of an atomic goal called Calls times.
%
:- func atomic_goal_cost(int) = goal_cost_csq.
% dead_goal_cost = Cost.
%
% Cost is the cost of a goal that is never called.
%
:- func dead_goal_cost = goal_cost_csq.
% call_goal_cost(NumCalls, PerCallCost) = Cost
%
:- func call_goal_cost(int, float) = goal_cost_csq.
:- func call_goal_cost(cs_cost_csq) = goal_cost_csq.
% add_goal_costs_seq(EarlierGoalCost, LaterGoalCost) = Cost.
%
% Compute the total cost of two goals executed one after the other,
% either because EarlierGoal and LaterGoal are in a conjunction,
% or because execution can backtrack from EarlierGoal to LaterGoal.
%
:- func add_goal_costs_seq(goal_cost_csq, goal_cost_csq) = goal_cost_csq.
% add_goal_costs_branch(TotalCalls, EarlierBranchCost, LaterBranchCost)
% = Cost.
%
% Compute the total cost of two alternative branches of execution.
%
:- func add_goal_costs_branch(int, goal_cost_csq, goal_cost_csq) =
goal_cost_csq.
:- func goal_cost_get_percall(goal_cost_csq) = float.
:- func goal_cost_get_total(goal_cost_csq) = float.
:- func goal_cost_get_calls(goal_cost_csq) = int.
:- func goal_cost_change_calls(goal_cost_csq, int) = goal_cost_csq.
%---------------------------------------------------------------------------%
:- type recursion_depth.
:- func recursion_depth_from_float(float) = recursion_depth.
:- func recursion_depth_to_float(recursion_depth) = float.
:- func recursion_depth_to_int(recursion_depth) = int.
:- pred recursion_depth_descend(recursion_depth, recursion_depth).
:- mode recursion_depth_descend(in, out) is det.
:- pred recursion_depth_is_base_case(recursion_depth::in) is semidet.
%---------------------------------------------------------------------------%
:- type static_coverage_info.
:- func zero_static_coverage = static_coverage_info.
:- pred add_coverage_arrays(array(int)::in,
static_coverage_info::in, static_coverage_info::out) is det.
:- pred array_to_static_coverage(array(int)::in, static_coverage_info::out)
is det.
:- func static_coverage_maybe_get_coverage_points(static_coverage_info) =
maybe(array(int)).
%---------------------------------------------------------------------------%
% The amount of parallelism either available or exploited.
%
:- type parallelism_amount.
:- func no_parallelism = parallelism_amount.
:- func some_parallelism(float) = parallelism_amount.
% sub_computation_parallelism(ParentParallelism, ChildRunnableProb,
% ChildParallelism, Parallelism).
% sub_computation_parallelism(ParentParallelism, ChildRunnableProb,
% Parallelism).
%
% Compute the total parallelism seen during the child's execution if the
% parent is executed in a context with parallelism and the child it's self
% has some amount of parallelism and there is a probability of
% ChildRunnableProb that the child will be executed.
%
% In the three argument version we assume that the child has no
% parallelism. In this version it is useful to think of ChildRunnableProb
% as the chance a forked off child (of a pair of two) will be 'runnable'
% during the execution of it's sibling.
%
:- pred sub_computation_parallelism(parallelism_amount::in, probability::in,
parallelism_amount::in, parallelism_amount::out) is det.
:- pred sub_computation_parallelism(parallelism_amount::in, probability::in,
parallelism_amount::out) is det.
% exceeded_desired_parallelism(DesiredParallelism, Parallelism)
%
% True iff Parallelism > DesiredParallelism
%
:- pred exceeded_desired_parallelism(float::in, parallelism_amount::in)
is semidet.
%---------------------------------------------------------------------------%
% Represent the metrics of part of a parallel execution.
%
:- type parallel_exec_metrics_incomplete.
% ParMetrics = init_parallel_exec_metrics_incomplete(PartMetricsA,
% TimeSignal, TimeBSeq, TimeBPar)
%
% Use this function to build parallel execution metrics for a parallel
% conjunction of any size greater than one.
%
% Although the parallel conjunction operator is operationally
% right-associative, parallel overlap due in dependant parallel
% conjunctions is easier to model if we consider it to be left associative.
% That is the conjunction ( A & B & C ) should be modeled as two conjuncts
% (A & B) (which is a conjunction itself and C. This is because how C
% waits for variables in either A or B may depend on How B waits for
% variables in A.
%
:- func init_parallel_exec_metrics_incomplete(parallel_exec_metrics_incomplete,
float, float, float, float, float) = parallel_exec_metrics_incomplete.
% StartMetrics = init_empty_parallel_exec_metrics(CostBefore, CostAfter,
% NumCalls, SparkCost, SparkDelay, BarrierCost, ContextWakeupDelay).
%
% Use this function to start with an empty set of metrics for an empty
% conjunction. Then use init_parallel_exec_metrics_incomplete to continue
% adding conjuncts on the right.
%
:- func init_empty_parallel_exec_metrics(float, float, int, float, float,
float, float) = parallel_exec_metrics_incomplete.
% Metrics = finalise_parallel_exec_metrics(IncompleteMetrics).
%
% Make the metrics structure complete.
%
:- func finalise_parallel_exec_metrics(parallel_exec_metrics_incomplete)
= parallel_exec_metrics.
% Calls = parallel_exec_metrics_get_num_calls(IncompleteMetrics).
%
% Get the number of calls.
%
:- func parallel_exec_metrics_get_num_calls(parallel_exec_metrics_incomplete)
= int.
%---------------------------------------------------------------------------%
:- pred weighted_average(list(float)::in, list(float)::in, float::out) is det.
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
:- implementation.
:- import_module float.
:- import_module int.
:- import_module require.
:- import_module string.
:- import_module array_util.
%---------------------------------------------------------------------------%
:- type own_prof_info
---> own_prof_all(
opa_exits :: int,
opa_fails :: int,
opa_redos :: int,
opa_excps :: int,
opa_quanta :: int,
opa_callseqs :: int,
opa_allocs :: int,
opa_words :: int
)
% implicit calls = exits + fails + excps - redos
; own_prof_det(
opd_exits :: int,
opd_quanta :: int,
opd_callseqs :: int,
opd_allocs :: int,
opd_words :: int
)
% implicit fails == redos == excps == 0
% implicit calls == exits
; own_prof_fast_det(
opfd_exits :: int,
opfd_callseqs :: int,
opfd_allocs :: int,
opfd_words :: int
)
% implicit fails == redos == excps == 0
% implicit calls == exits
% implicit quanta == 0
; own_prof_fast_nomem_semi(
opfns_exits :: int,
opfns_fails :: int,
opfns_callseqs :: int
).
% implicit redos == excps == 0
% implicit calls == exits + fails
% implicit quanta == 0
% implicit allocs == words == 0
:- type inherit_prof_info
---> inherit_prof_info(
ipo_quanta :: int,
ipo_callseqs :: int,
ipo_allocs :: int,
ipo_words :: int
).
calls(own_prof_fast_nomem_semi(Exits, Fails, _)) = Exits + Fails.
calls(own_prof_fast_det(Exits, _, _, _)) = Exits.
calls(own_prof_det(Exits, _, _, _, _)) = Exits.
calls(own_prof_all(Exits, Fails, Redos, Excps, _, _, _, _)) =
Exits + Fails + Excps - Redos.
exits(own_prof_fast_nomem_semi(Exits, _, _)) = Exits.
exits(own_prof_fast_det(Exits, _, _, _)) = Exits.
exits(own_prof_det(Exits, _, _, _, _)) = Exits.
exits(own_prof_all(Exits, _, _, _, _, _, _, _)) = Exits.
fails(own_prof_fast_nomem_semi(_, Fails, _)) = Fails.
fails(own_prof_fast_det(_, _, _, _)) = 0.
fails(own_prof_det(_, _, _, _, _)) = 0.
fails(own_prof_all(_, Fails, _, _, _, _, _, _)) = Fails.
redos(own_prof_fast_nomem_semi(_, _, _)) = 0.
redos(own_prof_fast_det(_, _, _, _)) = 0.
redos(own_prof_det(_, _, _, _, _)) = 0.
redos(own_prof_all(_, _, Redos, _, _, _, _, _)) = Redos.
excps(own_prof_fast_nomem_semi(_, _, _)) = 0.
excps(own_prof_fast_det(_, _, _, _)) = 0.
excps(own_prof_det(_, _, _, _, _)) = 0.
excps(own_prof_all(_, _, _, Excps, _, _, _, _)) = Excps.
quanta(own_prof_fast_nomem_semi(_, _, _)) = 0.
quanta(own_prof_fast_det(_, _, _, _)) = 0.
quanta(own_prof_det(_, Quanta, _, _, _)) = Quanta.
quanta(own_prof_all(_, _, _, _, Quanta, _, _, _)) = Quanta.
callseqs(own_prof_fast_nomem_semi(_, _, CallSeqs)) = CallSeqs.
callseqs(own_prof_fast_det(_, CallSeqs, _, _)) = CallSeqs.
callseqs(own_prof_det(_, _, CallSeqs, _, _)) = CallSeqs.
callseqs(own_prof_all(_, _, _, _, _, CallSeqs, _, _)) = CallSeqs.
allocs(own_prof_fast_nomem_semi(_, _, _)) = 0.
allocs(own_prof_fast_det(_, _, Allocs, _)) = Allocs.
allocs(own_prof_det(_, _, _, Allocs, _)) = Allocs.
allocs(own_prof_all(_, _, _, _, _, _, Allocs, _)) = Allocs.
words(own_prof_fast_nomem_semi(_, _, _)) = 0.
words(own_prof_fast_det(_, _, _, Words)) = Words.
words(own_prof_det(_, _, _, _, Words)) = Words.
words(own_prof_all(_, _, _, _, _, _, _, Words)) = Words.
zero_own_prof_info = own_prof_fast_nomem_semi(0, 0, 0).
is_zero_own_prof_info(own_prof_all(0, 0, 0, 0, 0, 0, 0, 0)).
is_zero_own_prof_info(own_prof_det(0, 0, 0, 0, 0)).
is_zero_own_prof_info(own_prof_fast_det(0, 0, 0, 0)).
is_zero_own_prof_info(own_prof_fast_nomem_semi(0, 0, 0)).
inherit_quanta(inherit_prof_info(Quanta, _, _, _)) = Quanta.
inherit_callseqs(inherit_prof_info(_, CallSeqs, _, _)) = CallSeqs.
inherit_allocs(inherit_prof_info(_, _, Allocs, _)) = Allocs.
inherit_words(inherit_prof_info(_, _, _, Words)) = Words.
zero_inherit_prof_info = inherit_prof_info(0, 0, 0, 0).
is_zero_inherit_prof_info(inherit_prof_info(0, 0, 0, 0)).
add_inherit_to_inherit(PI1, PI2) = SumPI :-
Quanta = inherit_quanta(PI1) + inherit_quanta(PI2),
CallSeqs = inherit_callseqs(PI1) + inherit_callseqs(PI2),
Allocs = inherit_allocs(PI1) + inherit_allocs(PI2),
Words = inherit_words(PI1) + inherit_words(PI2),
SumPI = inherit_prof_info(Quanta, CallSeqs, Allocs, Words).
add_own_to_inherit(PI1, PI2) = SumPI :-
Quanta = quanta(PI1) + inherit_quanta(PI2),
CallSeqs = callseqs(PI1) + inherit_callseqs(PI2),
Allocs = allocs(PI1) + inherit_allocs(PI2),
Words = words(PI1) + inherit_words(PI2),
SumPI = inherit_prof_info(Quanta, CallSeqs, Allocs, Words).
subtract_own_from_inherit(PI1, PI2) = SumPI :-
Quanta = inherit_quanta(PI2) - quanta(PI1),
CallSeqs = inherit_callseqs(PI2) - callseqs(PI1),
Allocs = inherit_allocs(PI2) - allocs(PI1),
Words = inherit_words(PI2) - words(PI1),
SumPI = inherit_prof_info(Quanta, CallSeqs, Allocs, Words).
subtract_inherit_from_inherit(PI1, PI2) = SumPI :-
Quanta = inherit_quanta(PI2) - inherit_quanta(PI1),
CallSeqs = inherit_callseqs(PI2) - inherit_callseqs(PI1),
Allocs = inherit_allocs(PI2) - inherit_allocs(PI1),
Words = inherit_words(PI2) - inherit_words(PI1),
SumPI = inherit_prof_info(Quanta, CallSeqs, Allocs, Words).
add_inherit_to_own(PI1, PI2) = SumPI :-
Exits = exits(PI2),
Fails = fails(PI2),
Redos = redos(PI2),
Excps = excps(PI2),
Quanta = inherit_quanta(PI1) + quanta(PI2),
CallSeqs = inherit_callseqs(PI1) + callseqs(PI2),
Allocs = inherit_allocs(PI1) + allocs(PI2),
Words = inherit_words(PI1) + words(PI2),
SumPI = compress_profile(Exits, Fails, Redos, Excps,
Quanta, CallSeqs, Allocs, Words).
add_own_to_own(PI1, PI2) = SumPI :-
Exits = exits(PI1) + exits(PI2),
Fails = fails(PI1) + fails(PI2),
Redos = redos(PI1) + redos(PI2),
Excps = excps(PI1) + excps(PI2),
Quanta = quanta(PI1) + quanta(PI2),
CallSeqs = callseqs(PI1) + callseqs(PI2),
Allocs = allocs(PI1) + allocs(PI2),
Words = words(PI1) + words(PI2),
SumPI = compress_profile(Exits, Fails, Redos, Excps,
Quanta, CallSeqs, Allocs, Words).
sum_own_infos(Owns) =
list.foldl(add_own_to_own, Owns, zero_own_prof_info).
sum_inherit_infos(Inherits) =
list.foldl(add_inherit_to_inherit, Inherits, zero_inherit_prof_info).
compress_profile(Exits, Fails, Redos, Excps, Quanta, CallSeqs, Allocs, Words)
= PI :-
( if
Redos = 0,
Excps = 0,
Quanta = 0,
Allocs = 0,
Words = 0
then
PI = own_prof_fast_nomem_semi(Exits, Fails, CallSeqs)
else if
Fails = 0,
Redos = 0,
Excps = 0
then
( if Quanta = 0 then
PI = own_prof_fast_det(Exits, CallSeqs, Allocs, Words)
else
PI = own_prof_det(Exits, Quanta, CallSeqs, Allocs, Words)
)
else
PI = own_prof_all(Exits, Fails, Redos, Excps, Quanta, CallSeqs,
Allocs, Words)
).
compress_profile(PI0) = PI :-
(
PI0 = own_prof_all(Exits, Fails, Redos, Excps, Quanta, CallSeqs,
Allocs, Words),
( if
Redos = 0,
Excps = 0,
Quanta = 0,
Allocs = 0,
Words = 0
then
PI = own_prof_fast_nomem_semi(Exits, Fails, CallSeqs)
else if
Fails = 0,
Redos = 0,
Excps = 0
then
( if Quanta = 0 then
PI = own_prof_fast_det(Exits, CallSeqs, Allocs, Words)
else
PI = own_prof_det(Exits, Quanta, CallSeqs, Allocs, Words)
)
else
PI = PI0
)
;
PI0 = own_prof_det(Exits, Quanta, CallSeqs, Allocs, Words),
( if Allocs = 0, Words = 0 then
PI = own_prof_fast_nomem_semi(Exits, 0, CallSeqs)
else if Quanta = 0 then
PI = own_prof_fast_det(Exits, CallSeqs, Allocs, Words)
else
PI = PI0
)
;
PI0 = own_prof_fast_det(Exits, CallSeqs, Allocs, Words),
( if Allocs = 0, Words = 0 then
PI = own_prof_fast_nomem_semi(Exits, 0, CallSeqs)
else
PI = PI0
)
;
PI0 = own_prof_fast_nomem_semi(_, _, _),
PI = PI0
).
%---------------------------------------------------------------------------%
decompress_profile(Own, Calls, Exits, Fails, Redos, Excps, Quanta, CallSeqs,
Allocs, Words) :-
(
Own = own_prof_all(Exits, Fails, Redos, Excps, Quanta, CallSeqs,
Allocs, Words),
Calls = Exits + Fails - Redos
;
Own = own_prof_det(Exits, Quanta, CallSeqs, Allocs, Words),
Calls = Exits,
Fails = 0,
Redos = 0,
Excps = 0
;
Own = own_prof_fast_det(Exits, CallSeqs, Allocs, Words),
Calls = Exits,
Fails = 0,
Redos = 0,
Excps = 0,
Quanta = 0
;
Own = own_prof_fast_nomem_semi(Exits, Fails, CallSeqs),
Calls = Exits + Fails,
Redos = 0,
Excps = 0,
Quanta = 0,
Allocs = 0,
Words = 0
).
own_to_string(own_prof_all(Exits, Fails, Redos, Excps, Quanta, CallSeqs,
Allocs, Words)) =
"all(" ++
string.int_to_string(Exits) ++ ", " ++
string.int_to_string(Fails) ++ ", " ++
string.int_to_string(Redos) ++ ", " ++
string.int_to_string(Excps) ++ ", " ++
string.int_to_string(Quanta) ++ ", " ++
string.int_to_string(CallSeqs) ++ ", " ++
string.int_to_string(Allocs) ++ ", " ++
string.int_to_string(Words) ++
")".
own_to_string(own_prof_det(Exits, Quanta, CallSeqs, Allocs, Words)) =
"det(" ++
string.int_to_string(Exits) ++ ", " ++
string.int_to_string(Quanta) ++ ", " ++
string.int_to_string(CallSeqs) ++ ", " ++
string.int_to_string(Allocs) ++ ", " ++
string.int_to_string(Words) ++
")".
own_to_string(own_prof_fast_det(Exits, CallSeqs, Allocs, Words)) =
"fast_det(" ++
string.int_to_string(Exits) ++ ", " ++
string.int_to_string(CallSeqs) ++ ", " ++
string.int_to_string(Allocs) ++ ", " ++
string.int_to_string(Words) ++
")".
own_to_string(own_prof_fast_nomem_semi(Exits, Fails, CallSeqs)) =
"fast_det(" ++
string.int_to_string(Exits) ++ ", " ++
string.int_to_string(Fails) ++ ", " ++
string.int_to_string(CallSeqs) ++
")".
compute_is_active(Own) = IsActive :-
( if
( Own = own_prof_all(0, 0, 0, 0, _, _, _, _)
; Own = own_prof_det(0, _, _, _, _)
; Own = own_prof_fast_det(0, _, _, _)
; Own = own_prof_fast_nomem_semi(0, 0, _)
)
then
IsActive = is_not_active
else
IsActive = is_active
).
%---------------------------------------------------------------------------%
:- type proc_cost_csq
---> proc_cost_csq(
% The number of non-recursive calls into this context.
% For example if this is a clique this is the number of
% calls made from the parent' clique to this one.
pcc_nr_calls :: int,
% The number of recursive calls into this context.
% This includes mutual recursion.
pcc_r_calls :: int,
% The number of callseq counts per call,
pcc_csq :: cost
).
:- type cs_cost_csq
---> cs_cost_csq(
% The number of calls (per parent invocation) through this
% call site.
cscc_calls :: float,
% The cost of the call site per call.
cscc_csq_cost :: cost
).
%---------------------------------------------------------------------------%
build_proc_cost_csq(NonRecursiveCalls, RecursiveCalls, TotalCost) =
proc_cost_csq(NonRecursiveCalls, RecursiveCalls,
cost_total(float(TotalCost))).
build_cs_cost_csq(Calls, TotalCost) =
cs_cost_csq(float(Calls), cost_total(TotalCost)).
build_cs_cost_csq_percall(Calls, PercallCost) =
cs_cost_csq(Calls, cost_per_call(PercallCost)).
zero_cs_cost =
% Build this using the percall structure so that if a percall cost is ever
% retrieved, we don't have to divide by zero. This is only a partial
% solution.
build_cs_cost_csq_percall(0.0, 0.0).
%---------------------------------------------------------------------------%
proc_cost_get_total(proc_cost_csq(NRCalls, RCalls, Cost)) =
cost_get_total(float(NRCalls + RCalls), Cost).
proc_cost_get_calls_total(proc_cost_csq(NRCalls, RCalls, _)) =
NRCalls + RCalls.
proc_cost_get_calls_nonrec(proc_cost_csq(NRCalls, _, _)) = NRCalls.
proc_cost_get_calls_rec(proc_cost_csq(_, RCalls, _)) = RCalls.
%---------------------------------------------------------------------------%
cs_cost_get_total(cs_cost_csq(Calls, Cost)) =
cost_get_total(Calls, Cost).
cs_cost_get_percall(cs_cost_csq(Calls, Cost)) =
cost_get_percall(Calls, Cost).
cs_cost_get_calls(cs_cost_csq(Calls, _)) = Calls.
%---------------------------------------------------------------------------%
cs_cost_to_proc_cost(cs_cost_csq(CSCalls, CSCost), TotalCalls,
proc_cost_csq(NRCalls, RCalls, PCost)) :-
NRCalls = round_to_int(CSCalls),
RCalls = TotalCalls - round_to_int(CSCalls),
% The negative one represents the cost of the callsite itself.
PCost = cost_total(cost_get_total(CSCalls, CSCost) - 1.0 * CSCalls).
cs_cost_per_proc_call(cs_cost_csq(CSCalls0, CSCost0), ParentCost) =
cs_cost_csq(CSCalls, CSCost) :-
TotalParentCalls = proc_cost_get_calls_nonrec(ParentCost),
CSCalls = CSCalls0 / float(TotalParentCalls),
CSCost = CSCost0 / TotalParentCalls.
%---------------------------------------------------------------------------%
:- type goal_cost_csq
---> dead_goal
; trivial_goal(
tg_calls :: int
)
; non_trivial_goal(
ntg_avg_cost :: cost,
ntg_calls :: int
).
atomic_goal_cost(Calls) = trivial_goal(Calls).
dead_goal_cost = dead_goal.
call_goal_cost(Calls, PercallCost) = non_trivial_goal(Cost, Calls) :-
Cost = cost_per_call(PercallCost).
call_goal_cost(CSCost) = non_trivial_goal(Cost, Calls) :-
Calls = round_to_int(cs_cost_get_calls(CSCost)),
Cost = CSCost ^ cscc_csq_cost.
add_goal_costs_seq(GoalCostsA, GoalCostsB) = GoalCosts :-
% GoalA is earlier and GoalB is later, during either forward
% or backward execution.
%
% XXX The reasons for the decisions taken by this predicate
% are not obvious, and may be invalid. Take with a grain of salt.
(
GoalCostsA = dead_goal,
GoalCosts = GoalCostsB
;
GoalCostsA = trivial_goal(CallsA),
(
( GoalCostsB = dead_goal
; GoalCostsB = trivial_goal(_CallsB)
),
GoalCosts = GoalCostsA
;
GoalCostsB = non_trivial_goal(CostB, CallsB),
CostTotal = cost_get_total(float(CallsB), CostB),
Cost = cost_total(CostTotal),
GoalCosts = non_trivial_goal(Cost, CallsA)
)
;
GoalCostsA = non_trivial_goal(CostA, CallsA),
(
( GoalCostsB = dead_goal
; GoalCostsB = trivial_goal(_CallsB)
),
GoalCosts = GoalCostsA
;
GoalCostsB = non_trivial_goal(CostB, CallsB),
Calls = CallsA,
CostTotal =
cost_get_total(float(CallsA), CostA) +
cost_get_total(float(CallsB), CostB),
Cost = cost_total(CostTotal),
( if
Calls = 0,
CostTotal \= 0.0
then
unexpected($pred, "Calls = 0, Cost \\= 0")
else
true
),
GoalCosts = non_trivial_goal(Cost, Calls)
)
).
add_goal_costs_branch(TotalCalls, A, B) = R :-
( if TotalCalls = 0 then
R = dead_goal
else
(
A = dead_goal,
CallsA = 0,
(
B = dead_goal,
unexpected($pred, "TotalCalls \\= 0 for a dead goal")
;
B = trivial_goal(CallsB),
R = trivial_goal(TotalCalls)
;
B = non_trivial_goal(Cost, CallsB),
R = non_trivial_goal(Cost, TotalCalls)
)
;
A = trivial_goal(CallsA),
(
B = dead_goal,
CallsB = 0,
R = trivial_goal(TotalCalls)
;
B = trivial_goal(CallsB),
R = trivial_goal(TotalCalls)
;
B = non_trivial_goal(Cost, CallsB),
R = non_trivial_goal(Cost, TotalCalls)
)
;
A = non_trivial_goal(CostA, CallsA),
(
B = dead_goal,
CallsB = 0,
R = non_trivial_goal(CostA, TotalCalls)
;
B = trivial_goal(CallsB),
R = non_trivial_goal(CostA, TotalCalls)
;
B = non_trivial_goal(CostB, CallsB),
Cost = sum_costs(float(CallsA), CostA, float(CallsB), CostB),
R = non_trivial_goal(Cost, CallsA + CallsB)
)
),
check_total_calls(CallsA, CallsB, TotalCalls)
).
:- pred check_total_calls(int::in, int::in, int::in) is det.
check_total_calls(CallsA, CallsB, TotalCalls) :-
Calls = CallsA + CallsB,
( if unify(Calls, TotalCalls) then
true
else
unexpected($pred, "TotalCalls \\= CallsA + CallsB")
).
goal_cost_get_percall(dead_goal) = 0.0.
goal_cost_get_percall(trivial_goal(_)) = 0.0.
goal_cost_get_percall(non_trivial_goal(Cost, Calls)) =
( if Calls = 0 then
0.0
else
cost_get_percall(float(Calls), Cost)
).
goal_cost_get_total(dead_goal) = 0.0.
goal_cost_get_total(trivial_goal(_)) = 0.0.
goal_cost_get_total(non_trivial_goal(Cost, Calls)) =
cost_get_total(float(Calls), Cost).
goal_cost_get_calls(dead_goal) = 0.
goal_cost_get_calls(trivial_goal(Calls)) = Calls.
goal_cost_get_calls(non_trivial_goal(_, Calls)) = Calls.
goal_cost_change_calls(dead_goal, _) =
unexpected($pred, "Cannot compute new cost").
goal_cost_change_calls(trivial_goal(_), Calls) = trivial_goal(Calls).
goal_cost_change_calls(non_trivial_goal(Cost0, Calls0), Calls) =
non_trivial_goal(Cost, Calls) :-
Cost = cost_per_call(cost_get_percall(float(Calls0), Cost0)).
%---------------------------------------------------------------------------%
:- type cost
---> cost_per_call(float)
; cost_total(float).
:- func cost_get_total(float, cost) = float.
cost_get_total(_, cost_total(Total)) = Total.
cost_get_total(Calls, cost_per_call(Percall)) = Calls * Percall.
:- func cost_get_percall(float, cost) = float.
cost_get_percall(Calls, cost_total(Total)) = Total / Calls.
cost_get_percall(_, cost_per_call(Percall)) = Percall.
:- func (cost) / (int) = cost.
Cost0 / Denom = Cost :-
(
Cost0 = cost_total(Total),
Cost = cost_total(Total / float(Denom))
;
Cost0 = cost_per_call(Percall),
Cost = cost_per_call(Percall / float(Denom))
).
:- func cost_by_weight(float, cost) = cost.
cost_by_weight(Weight, cost_total(Total)) =
cost_total(Total * Weight).
cost_by_weight(Weight, cost_per_call(Percall)) =
cost_per_call(Percall * Weight).
:- func sum_costs(float, cost, float, cost) = cost.
sum_costs(CallsA, CostA, CallsB, CostB) = cost_total(Sum) :-
Sum = CostTotalA + CostTotalB,
CostTotalA = cost_get_total(CallsA, CostA),
CostTotalB = cost_get_total(CallsB, CostB).
%---------------------------------------------------------------------------%
:- type recursion_depth
---> recursion_depth(float).
recursion_depth_from_float(F) = recursion_depth(F).
recursion_depth_to_float(recursion_depth(F)) = F.
recursion_depth_to_int(D) =
round_to_int(recursion_depth_to_float(D)).
recursion_depth_descend(recursion_depth(D), recursion_depth(D - 1.0)) :-
( if D >= 0.5 then
true
else
unexpected($pred,
format("Recursion depth will be less than zero: %f", [f(D - 1.0)]))
).
recursion_depth_is_base_case(recursion_depth(D)) :-
D < 0.5,
D >= -0.5.
%---------------------------------------------------------------------------%
:- type static_coverage_info == maybe(array(int)).
zero_static_coverage = no.
add_coverage_arrays(Array, no, yes(Array)).
add_coverage_arrays(NewArray, yes(!.Array), yes(!:Array)) :-
( if
array.bounds(NewArray, Min, Max),
array.bounds(!.Array, Min, Max)
then
!:Array = copy(!.Array),
array_foldl_from_0(
(pred(Index::in, E::in, A0::array_di, A::array_uo) is det :-
array.lookup(A0, Index, Value),
array.set(Index, Value + E, A0, A)
), NewArray, !Array)
else
unexpected($pred, "arrays' bounds do not match")
).
array_to_static_coverage(Array, yes(Array)).
static_coverage_maybe_get_coverage_points(MaybeCoverage) = MaybeCoverage.
%---------------------------------------------------------------------------%
% This type can be represented in multiple ways. A single value expressing
% the probable value, or a probable value and a measure of
% deviation/variance. Or as below three values that can express the
% skewedness of a distribution of values.
%
% For now represent it as a single scalar. Consider uncommenting the other
% two fields in the future.
%
:- type parallelism_amount
---> parallelism_amount(
% pa_best :: float,
% % An upper bound on the probable amount of parallelism.
% % IE: the most optimistic value.
%
% pa_worst :: float,
% % A lower bound on the probable amount of parallelism.
%
pa_likely :: float
% The likely amount of parallelism here.
).
no_parallelism = parallelism_amount(1.0).
some_parallelism(Num) = parallelism_amount(Num) :-
( if Num < 1.0 then
unexpected($pred, "Parallelism amount cannot ever be less than 1.0")
else
true
).
sub_computation_parallelism(ParentParallelism, Prob, ChildParallelism,
Parallelism) :-
probability_to_float(Prob) = ProbFloat,
ParentParallelism = parallelism_amount(ParLikely),
ChildParallelism = parallelism_amount(ChildLikely),
Likely = ParLikely + (ProbFloat * ChildLikely),
Parallelism = parallelism_amount(Likely).
sub_computation_parallelism(ParentParallelism, Prob, Parallelism) :-
sub_computation_parallelism(ParentParallelism, Prob, no_parallelism,
Parallelism).
exceeded_desired_parallelism(DesiredParallelism, Parallelism) :-
Parallelism = parallelism_amount(LikelyParallelism),
DesiredParallelism < LikelyParallelism.
%---------------------------------------------------------------------------%
:- type parallel_exec_metrics_incomplete
---> pem_incomplete(
pemi_time_before_conj :: float,
pemi_time_after_conj :: float,
pemi_num_calls :: int,
pemi_spark_cost :: float,
pemi_spark_delay :: float,
pemi_barrier_cost :: float,
pemi_context_wakeup_delay :: float,
% If there are no internal conjuncts then the parallel
% conjunction is empty.
pemi_internal ::
maybe(parallel_exec_metrics_internal)
).
:- type parallel_exec_metrics_internal
---> pem_left_most(
pemi_time_left_seq :: float,
pemi_time_left_par :: float,
pemi_time_left_signals :: float
% While the leftmost conjunct does have dead time it's not
% possible to calculate this until we know the parallel
% execution time of the whole conjunction, therefore it is not
% included here and is in parallel_exec_metrics_incomplete.
)
; pem_additional(
% The time of the left conjunct (that may be a conjunction).
pemi_time_left :: parallel_exec_metrics_internal,
% The additional cost of calling signal during this conjunct.
pemi_time_signals :: float,
% The additional cost of calling wait during this conjunct.
pemi_time_waits :: float,
% The time of the right conjunct if it is running after
% the left in normal sequential execution.
pemi_time_right_seq :: float,
% The time of the right conjunct if it is running in parallel
% with the left conjunct. Overheads are included in this value
% so it will usually be larger than time_right_seq.
pemi_time_right_par :: float,
% The dead time of this conjunct, This is the time that the
% context will be blocked on futures. It does not include the
% spark delay because the contact may not exist for most of
% that time.
pemi_time_right_dead :: float
).
init_parallel_exec_metrics_incomplete(Metrics0, TimeSignals, TimeWaits,
TimeBSeq, TimeBPar, TimeBDead) = Metrics :-
MaybeInternal0 = Metrics0 ^ pemi_internal,
(
MaybeInternal0 = yes(Internal0),
Internal = pem_additional(Internal0, TimeSignals, TimeWaits, TimeBSeq,
TimeBPar, TimeBDead)
;
MaybeInternal0 = no,
Internal = pem_left_most(TimeBSeq, TimeBPar, TimeSignals),
( if
TimeBDead = 0.0,
TimeWaits = 0.0
then
true
else
unexpected($pred, "TimeWaits != 0 or TimeBDead != 0")
)
),
Metrics = Metrics0 ^ pemi_internal := yes(Internal).
init_empty_parallel_exec_metrics(TimeBefore, TimeAfter, NumCalls, SparkCost,
SparkDelay, BarrierCost, ContextWakeupDelay) =
pem_incomplete(TimeBefore, TimeAfter, NumCalls, SparkCost, SparkDelay,
BarrierCost, ContextWakeupDelay, no).
finalise_parallel_exec_metrics(IncompleteMetrics) = Metrics :-
IncompleteMetrics = pem_incomplete(TimeBefore, TimeAfter, NumCalls,
SparkCost, SparkDelay, BarrierCost, ContextWakeupDelay, MaybeInternal),
(
MaybeInternal = yes(Internal)
;
MaybeInternal = no,
unexpected($pred, "cannot finalise empty parallel metrics")
),
BeforeAndAfterTime = TimeBefore + TimeAfter,
% Calculate par time.
NumConjuncts = parallel_exec_metrics_internal_get_num_conjs(Internal),
InnerParTime = parallel_exec_metrics_internal_get_par_time(Internal,
SparkDelay, NumConjuncts),
( if FirstConjDeadTime > 0.0 then
FirstConjWakeupPenalty = ContextWakeupDelay
else
FirstConjWakeupPenalty = 0.0
),
ParTime = InnerParTime + BeforeAndAfterTime + FirstConjWakeupPenalty,
% Calculate the sequential execution time.
InnerSeqTime = parallel_exec_metrics_internal_get_seq_time(Internal),
SeqTime = InnerSeqTime + BeforeAndAfterTime,
% Calculate the amount of time that the first conjunct is blocked for.
FirstConjParTime = pem_get_first_conj_par_time(Internal),
FirstConjDeadTime = InnerParTime - FirstConjParTime,
% Calculate the amount of time that the conjunction spends blocking on
% futures.
FutureDeadTime = pem_get_future_dead_time(Internal),
% Calculate the overheads of parallelisation.
%
% These are already included in ParTime, we don't need to add them, just
% calculate what they would be for reporting.
SparkCosts = float(NumConjuncts - 1) * SparkCost,
BarrierCosts = float(NumConjuncts) * BarrierCost,
SignalCosts = pem_get_signal_costs(Internal),
WaitCosts = pem_get_wait_costs(Internal),
Metrics = parallel_exec_metrics(NumCalls, SeqTime, ParTime, SparkCosts,
BarrierCosts, SignalCosts, WaitCosts, FirstConjDeadTime,
FutureDeadTime).
parallel_exec_metrics_get_num_calls(Pem) =
Pem ^ pemi_num_calls.
:- func parallel_exec_metrics_internal_get_num_conjs(
parallel_exec_metrics_internal) = int.
parallel_exec_metrics_internal_get_num_conjs(pem_left_most(_, _, _)) = 1.
parallel_exec_metrics_internal_get_num_conjs(
pem_additional(Left, _, _, _, _, _)) =
1 + parallel_exec_metrics_internal_get_num_conjs(Left).
% The expected parallel execution time, because this is the elapsed
% execution time of the whole parallel conjunct it must include dead time.
%
:- func parallel_exec_metrics_internal_get_par_time(
parallel_exec_metrics_internal, float, int) = float.
parallel_exec_metrics_internal_get_par_time(PEM, SparkDelay, Depth) = Time :-
(
PEM = pem_left_most(_, ParTime, _),
Time = ParTime
;
PEM = pem_additional(MetricsLeft, _, _, _,
TimeRightPar, TimeRightDead),
TimeRight = TimeRightPar + TimeRightDead +
SparkDelay * float(Depth - 1),
TimeLeft = parallel_exec_metrics_internal_get_par_time(MetricsLeft,
SparkDelay, Depth - 1),
Time = max(TimeLeft, TimeRight)
).
% The expected sequential execution time.
%
:- func parallel_exec_metrics_internal_get_seq_time(
parallel_exec_metrics_internal) = float.
parallel_exec_metrics_internal_get_seq_time(pem_left_most(Time, _, _)) = Time.
parallel_exec_metrics_internal_get_seq_time(pem_additional(MetricsLeft, _,
_, TimeRight, _, _)) = Time :-
TimeLeft = parallel_exec_metrics_internal_get_seq_time(MetricsLeft),
Time = TimeLeft + TimeRight.
% Get the parallel execution time of the first conjunct. This is used for
% calculating the first conjunct's dead time (above).
%
:- func pem_get_first_conj_par_time(parallel_exec_metrics_internal) = float.
pem_get_first_conj_par_time(pem_left_most(_, Time, _)) = Time.
pem_get_first_conj_par_time(pem_additional(Left, _, _, _, _, _)) =
Time :-
Time = pem_get_first_conj_par_time(Left).
:- func pem_get_future_dead_time(parallel_exec_metrics_internal) = float.
pem_get_future_dead_time(pem_left_most(_, _, _)) = 0.0.
pem_get_future_dead_time(pem_additional(Left, _, _, _, _, RightDeadTime)) =
RightDeadTime + LeftDeadTime :-
LeftDeadTime = pem_get_future_dead_time(Left).
% Get the overheads of parallelisation.
%
% Remember that these are already represented within the parallel execution
% time.
%
:- func pem_get_par_overheads(parallel_exec_metrics_internal) = float.
pem_get_par_overheads(pem_left_most(Seq, Par, _)) = Par - Seq.
pem_get_par_overheads(pem_additional(Left, _, _, Seq, Par, _)) =
Overheads :-
Overheads = LeftOverheads + Par - Seq,
pem_get_par_overheads(Left) = LeftOverheads.
:- func pem_get_signal_costs(parallel_exec_metrics_internal) = float.
pem_get_signal_costs(pem_left_most(_, _, SignalCosts)) = SignalCosts.
pem_get_signal_costs(pem_additional(Left, SignalsR, _, _, _, _)) = Signals :-
Signals = SignalsR + SignalsL,
SignalsL = pem_get_signal_costs(Left).
:- func pem_get_wait_costs(parallel_exec_metrics_internal) = float.
pem_get_wait_costs(pem_left_most(_, _, _)) = 0.0.
pem_get_wait_costs(pem_additional(Left, _, WaitsR, _, _, _)) = Waits :-
Waits = WaitsR + WaitsL,
WaitsL = pem_get_wait_costs(Left).
%---------------------------------------------------------------------------%
weighted_average(Weights, Values, Average) :-
list.foldl2_corresponding(update_weighted_sum,
Weights, Values, 0.0, WeightedSum, 0.0, TotalWeight),
( if abs(TotalWeight) < epsilon then
Average = 0.0
else
Average = WeightedSum / TotalWeight
).
:- pred update_weighted_sum(float::in, float::in,
float::in, float::out, float::in, float::out) is det.
update_weighted_sum(Weight, Value, !WeightedSum, !TotalWeight) :-
!:WeightedSum = !.WeightedSum + (Weight * Value),
!:TotalWeight = !.TotalWeight + Weight.
%---------------------------------------------------------------------------%
:- end_module measurements.
%---------------------------------------------------------------------------%