mirror of
https://github.com/Mercury-Language/mercury.git
synced 2026-04-15 01:13:30 +00:00
compiler/switch_detection.m:
As above.
compiler/switch_candidates.m:
Adapt the underlying infrastructure.
444 lines
20 KiB
Mathematica
444 lines
20 KiB
Mathematica
%---------------------------------------------------------------------------%
|
|
% vim: ft=mercury ts=4 sw=4 et
|
|
%---------------------------------------------------------------------------%
|
|
% Copyright (C) 2016-2026 The Mercury team.
|
|
% This file may only be copied under the terms of the GNU General
|
|
% Public License - see the file COPYING in the Mercury distribution.
|
|
%---------------------------------------------------------------------------%
|
|
%
|
|
% File: switch_candidates.m.
|
|
% Author: zs.
|
|
%
|
|
% Sometimes, a disjunction can be converted into a switch
|
|
% on more than one variable. This module contains the logic we use
|
|
% to decide which variable we should choose.
|
|
%
|
|
%---------------------------------------------------------------------------%
|
|
|
|
:- module check_hlds.switch_candidates.
|
|
:- interface.
|
|
|
|
:- import_module hlds.
|
|
:- import_module hlds.hlds_goal.
|
|
:- import_module hlds.hlds_module.
|
|
:- import_module libs.
|
|
:- import_module libs.maybe_util.
|
|
:- import_module parse_tree.
|
|
:- import_module parse_tree.prog_data.
|
|
|
|
:- import_module list.
|
|
|
|
% A disjunction is a candidate for conversion to a switch on cs_var
|
|
% if the conditions that is_candidate_switch tests for are satisfied.
|
|
%
|
|
% The disjuncts of the original disjunction will each end up in one
|
|
% the next three fields: cs_cases, cs_unreachable_case_goals, and
|
|
% cs_left_over_disjuncts.
|
|
%
|
|
% The left over disjuncts are the disjuncts that do not unify cs_var
|
|
% with any function symbol, at least in a way that is visible to
|
|
% switch detection. The disjuncts that *do* unify cs_var with a function
|
|
% symbol will be converted into cases. If two (or more) disjuncts
|
|
% unify cs_var with the same function symbol, those disjuncts will be put
|
|
% into the same case (with the case's goal being a disjunction containing
|
|
% just these disjuncts). Some cases may turn out to be unselectable,
|
|
% because cs_var's initial inst guarantees that it won't be unifiable
|
|
% with the case's function symbol; cs_unreachable_case_goals contains
|
|
% the bodies of such cases.
|
|
%
|
|
% Since we try to convert only non-empty disjunctions to switches,
|
|
% the above guarantees that at least one of cs_cases,
|
|
% cs_unreachable_case_goals and cs_left_over_disjuncts will be non-empty.
|
|
% If both cs_cases and cs_unreachable_case_goals would be empty for a given
|
|
% variable, then we won't consider converting the disjunction into
|
|
% a switch on that variable, so for any candidate_switch we *do* construct,
|
|
% at least one of cs_cases and cs_unreachable_case_goals will be nonempty.
|
|
% However, it is possible for either one of those fields to be empty
|
|
% if the other contains at least one entry.
|
|
%
|
|
% Some disjunctions can be converted into switches on more than one
|
|
% variable. We prefer to pick the variable that will allow determinism
|
|
% analysis to find the tightest possible bounds on the number of solutions.
|
|
% As a heuristic to help us choose well, we associate a rank with each
|
|
% candidate conversion scheme. If there is more than candidate switch
|
|
% we can turn the disjunction into, we choose the candidate with
|
|
% the highest rank; we break any ties by picking the candidate
|
|
% with the smallest variable number.
|
|
%
|
|
% When we convert the disjunction into a switch based on the chosen
|
|
% candidate, we need to fill in the can_fail field of the switch
|
|
% we create. We get the value we need from the cs_can_fail field.
|
|
:- type candidate_switch
|
|
---> candidate_switch(
|
|
cs_var :: prog_var,
|
|
cs_cases :: list(case),
|
|
cs_unreachable_case_goals :: list(hlds_goal),
|
|
cs_left_over_disjuncts :: list(hlds_goal),
|
|
cs_rank :: candidate_switch_rank,
|
|
cs_can_fail :: can_fail,
|
|
cs_requant :: need_to_requantify
|
|
).
|
|
|
|
% The initial version of switch detection, which we used for a *long* time,
|
|
% looked at nonlocal variables in variable number order and stopped looking
|
|
% when it found a viable candidate switch.
|
|
%
|
|
% This could lead to an suboptimal outcome for two separate reasons.
|
|
%
|
|
% - First, when the user specifies which variable the switch should be on
|
|
% (via a require_switch_* scope), the committed-to variable is
|
|
% not necessarily the specified variable.
|
|
%
|
|
% - Second, switching on a variable later in the order can lead to a
|
|
% tighter determinism (because it leads to a cannot_fail switch, when
|
|
% the switch on the earlier, committed-to variable is can_fail).
|
|
%
|
|
% We fixed the first problem by putting RequiredVar at the start of
|
|
% VarsToTry in cases where MaybeRequiredVar is yes(RequiredVar), and
|
|
% we fixed the second by evaluating all candidate switches, without
|
|
% stopping when we found a viable one. The second fix obsoletes the first;
|
|
% if we look at all candidates and select the one with the best rank,
|
|
% then for correctness, it *doesn't matter* in what order we look at
|
|
% the nonlocals.
|
|
%
|
|
% It might matter for performance. When MaybeRequiredVar is
|
|
% yes(RequiredVar), we *could* arrange to look at RequiredVar first,
|
|
% and if it does yield a candidate switch with the best possible rank,
|
|
% stop looking at the other variables. However, this would require
|
|
% detect_switch_candidates_in_disj testing that condition after finding
|
|
% each candidate switch. Since require_switch_* scopes are relatively rare,
|
|
% the cost of the test in the common case where MaybeRequiredVar is "no"
|
|
% would probably cost us more overall than we could save in cases where
|
|
% MaybeRequiredVar is "yes".
|
|
|
|
% The order of preference that we use to decide which candidate switch
|
|
% to turn a disjunction into, for disjunctions in which we actually
|
|
% have a choice. The ranks are in order from the least attractive
|
|
% to the most attractive choice.
|
|
%
|
|
% In general, we prefer to have no disjuncts "left over" after we convert
|
|
% disjuncts to case arms. All else being equal, we also prefer switches
|
|
% in which the resulting switch arms cover all the function symbols
|
|
% in the type of the switched-on variable that are allowed by the instmap
|
|
% at entry to the switch.
|
|
%
|
|
:- type candidate_switch_rank
|
|
---> some_leftover_can_fail(
|
|
% Some of the disjuncts will have to remain outside the switch,
|
|
% and the switch will be can_fail. This is the least useful
|
|
% kind of switch that switch detection can create.
|
|
int % number of case arms
|
|
)
|
|
|
|
; some_leftover_cannot_fail(
|
|
% Some of the disjuncts will have to remain outside the switch,
|
|
% but at least the switch will be cannot_fail (though the
|
|
% code inside the switch arms may fail).
|
|
int % number of case arms
|
|
)
|
|
|
|
; no_leftover_twoplus_cases_finite_can_fail
|
|
% All disjuncts unify the switch variable with a function symbol,
|
|
% but there is at least one function symbol that the switch
|
|
% variable can be bound to at the start of the disjunction
|
|
% that is not covered by any of the original disjuncts.
|
|
% There are at least two cases, and at least one is reachable
|
|
% (the rest may be unreachable).
|
|
|
|
; no_leftover_one_case
|
|
% With no_leftover_twoplus_cases_finite_can_fail, we *know*
|
|
% that the resulting switch will be can_fail, and therefore
|
|
% it can't be det. However, if all the disjuncts unify this
|
|
% candidate var with the *same* function symbol, which is the
|
|
% situation that no_leftover_one_case describes, then we know that
|
|
% cse_detection.m will pull this deconstruction unification
|
|
% out of all the disjuncts. Then, when cse_detection.m repeats
|
|
% switch detection, there is at least a chance that we will be
|
|
% able to transform this disjunction to a det switch.
|
|
|
|
; no_leftover_twoplus_cases_infinite_can_fail
|
|
% All disjuncts unify the switch variable with a function symbol,
|
|
% but the domain of the switch variable is infinite, so it is
|
|
% not possible for all the function symbols in the domain
|
|
% to be covered by a case.
|
|
%
|
|
% I (zs) don't know of any strong argument for deciding
|
|
% the relative order of no_leftover_twoplus_cases_infinite_can_fail
|
|
% and no_leftover_one_case either way. The current order replicates
|
|
% the relative order between these two cases that was effectively
|
|
% imposed by old code.
|
|
|
|
; no_leftover_twoplus_cases_finite_cannot_fail
|
|
% The best switch we can hope for in normal circumstances;
|
|
% all disjuncts unify the switch variable with a function symbol,
|
|
% and every function symbol that the switch variable can be
|
|
% bound to at the start of the disjunction is covered by at least
|
|
% one disjunct. There are at least two cases, and at least one
|
|
% is reachable (the rest may be unreachable).
|
|
|
|
; all_disjuncts_are_unreachable
|
|
% We can convert the entire disjunction to just `fail'.
|
|
% This is possible only in the very rare case when all disjuncts
|
|
% unify the switch variable with a function symbol that the switch
|
|
% variable's initial inst rules out, but when it *is* possible,
|
|
% the resulting goal will have the tightest possible determinism
|
|
% we can hope for, namely failure.
|
|
|
|
; no_leftover_twoplus_cases_explicitly_selected.
|
|
% If the disjunction is what the programmer would consider
|
|
% to be a switch on the variable that they explicitly said
|
|
% that they expect the disjunction to switch on, i.e. if
|
|
% all disjunct unify the specified variable with a function
|
|
% symbol and there are at least two cases, then follow the
|
|
% programmer's lead. The programmer may prefer an incomplete
|
|
% switch on the specified variable to a complete switch on
|
|
% another variable. An example from the compiler: when handling
|
|
% special options in options.m, we would prefer the option handler
|
|
% to switch on the option, not on the kind of data (none, bool,
|
|
% int, string, maybe_string) given to it.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
:- pred is_candidate_switch(list(case)::in, list(hlds_goal)::in) is semidet.
|
|
|
|
:- type maybe_required_switch_var
|
|
---> nrsv
|
|
% "no required switch var": This goal is not inside a scope
|
|
% that requires its top level goal to be a switch on a specific
|
|
% variable.
|
|
; rsv(prog_var).
|
|
% "required switch var": This goal *is* inside a scope
|
|
% that requires its top level goal to be a switch on *this*
|
|
% variable.
|
|
|
|
:- pred categorize_candidate_switch(module_info::in,
|
|
maybe_required_switch_var::in, prog_var::in, mer_type::in, mer_inst::in,
|
|
list(case)::in, list(hlds_goal)::in, need_to_requantify::in,
|
|
candidate_switch::out) is det.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
:- type maybe_candidate_switch
|
|
---> no_candidate_switch
|
|
; best_candidate_switch_so_far(candidate_switch).
|
|
|
|
:- pred record_candidate_switch(candidate_switch::in,
|
|
maybe_candidate_switch::in, maybe_candidate_switch::out) is det.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
%---------------------------------------------------------------------------%
|
|
|
|
:- implementation.
|
|
|
|
:- import_module check_hlds.det_util.
|
|
:- import_module hlds.inst_test.
|
|
:- import_module hlds.type_util.
|
|
:- import_module parse_tree.prog_mode.
|
|
:- import_module parse_tree.prog_type.
|
|
|
|
:- import_module int.
|
|
:- import_module set_tree234.
|
|
:- import_module term.
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
is_candidate_switch(Cases0, LeftOver) :-
|
|
(
|
|
% If every disjunct unifies Var with a function symbol, then
|
|
% it is candidate switch on Var, *even if* all disjuncts unify Var
|
|
% with the *same* function symbol. This is because the resulting
|
|
% single-arm switch may turn out to contain sub-switches on the
|
|
% *arguments* of that function symbol.
|
|
LeftOver = []
|
|
;
|
|
% If some disjunct does not unify Var with any function symbol,
|
|
% then we insist on at least two cases (though one may unreachable,
|
|
% see below). We do this because the presence of the LeftOver
|
|
% disjunct(s) requires us to have an outer disjunction anyway;
|
|
% having one of its arms be a single-arm switch would be
|
|
% indistinguishable from the original disjunction in almost all cases.
|
|
% The only exception I (zs) can think of would happen if the same
|
|
% X = f(...) goal occurred inside all the disjuncts that would end up
|
|
% in an inner disjunction inside the single-arm switch's single arm,
|
|
% but not in the other disjuncts. In that case, acting on the
|
|
% candidate we would create here may allow cse_detection.m to make
|
|
% a change could enable later follow-on changes by switch detection
|
|
% itself. However, I have never seen any real-life code that could
|
|
% benefit from this theoretical possibility, and until we do see
|
|
% such code, so the gain from deleting this test would be minimal
|
|
% at best, while the cost of deleting it would be to greatly increase
|
|
% the number of candidates and thus the time taken by switch detection.
|
|
Cases0 = [_, _ | _]
|
|
).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
categorize_candidate_switch(ModuleInfo, MaybeRequiredVar, Var, VarType,
|
|
VarInst0, Cases0, LeftOver, Requant, Candidate) :-
|
|
can_candidate_switch_fail(ModuleInfo, VarType, VarInst0, Cases0,
|
|
CanFail, CasesMissing, Cases, UnreachableCaseGoals),
|
|
(
|
|
LeftOver = [],
|
|
(
|
|
Cases = [],
|
|
Rank = all_disjuncts_are_unreachable
|
|
;
|
|
Cases = [_FirstCase | LaterCases],
|
|
( if
|
|
LaterCases = [],
|
|
UnreachableCaseGoals = []
|
|
then
|
|
Rank = no_leftover_one_case
|
|
else
|
|
% FirstCase is one case, and whichever of LaterCases and
|
|
% UnreachableCaseGoals is nonempty is the second case.
|
|
( if
|
|
MaybeRequiredVar = rsv(RequiredVar),
|
|
RequiredVar = Var
|
|
then
|
|
Rank = no_leftover_twoplus_cases_explicitly_selected
|
|
else
|
|
(
|
|
CasesMissing = some_cases_missing,
|
|
Rank = no_leftover_twoplus_cases_finite_can_fail
|
|
;
|
|
CasesMissing = no_cases_missing,
|
|
Rank = no_leftover_twoplus_cases_finite_cannot_fail
|
|
;
|
|
CasesMissing = unbounded_cases,
|
|
Rank = no_leftover_twoplus_cases_infinite_can_fail
|
|
)
|
|
)
|
|
)
|
|
)
|
|
;
|
|
LeftOver = [_ | _],
|
|
list.length(Cases, NumCases),
|
|
(
|
|
CanFail = cannot_fail,
|
|
Rank = some_leftover_cannot_fail(NumCases)
|
|
;
|
|
CanFail = can_fail,
|
|
Rank = some_leftover_can_fail(NumCases)
|
|
)
|
|
),
|
|
Candidate = candidate_switch(Var, Cases, UnreachableCaseGoals,
|
|
LeftOver, Rank, CanFail, Requant).
|
|
|
|
:- type cases_missing
|
|
---> no_cases_missing
|
|
; some_cases_missing
|
|
; unbounded_cases.
|
|
|
|
% Find out whether a switch on a given variable with a given set
|
|
% of cases can fail.
|
|
%
|
|
:- pred can_candidate_switch_fail(module_info::in, mer_type::in, mer_inst::in,
|
|
list(case)::in, can_fail::out, cases_missing::out, list(case)::out,
|
|
list(hlds_goal)::out) is det.
|
|
|
|
can_candidate_switch_fail(ModuleInfo, VarType, VarInst0, Cases0,
|
|
SwitchCanFail, CasesMissing, Cases, UnreachableCaseGoals) :-
|
|
( if inst_is_bound_to_functors(ModuleInfo, VarInst0, BoundFunctors) then
|
|
type_to_ctor_det(VarType, TypeCtor),
|
|
bound_functors_to_cons_ids(TypeCtor, BoundFunctors, InstConsIds),
|
|
set_tree234.list_to_set(InstConsIds, InstConsIdSet),
|
|
delete_unreachable_cases(Cases0, InstConsIdSet,
|
|
Cases, UnreachableCaseGoals),
|
|
switch_can_fail_with_bound_functors(InstConsIdSet, Cases,
|
|
SwitchCanFail, CasesMissing)
|
|
else
|
|
% We do not have any inst information that would allow us to decide
|
|
% that any case is unreachable.
|
|
Cases = Cases0,
|
|
UnreachableCaseGoals = [],
|
|
( if switch_type_num_functors(ModuleInfo, VarType, NumFunctors) then
|
|
% We could check for each cons_id of the type whether a case covers
|
|
% it, but given that type checking ensures that the set of covered
|
|
% cons_ids is a subset of the set of cons_ids of the type, checking
|
|
% whether the cardinalities of the two sets match is *equivalent*
|
|
% to checking whether they are the same set.
|
|
does_switch_cover_n_cases(NumFunctors, Cases,
|
|
SwitchCanFail, CasesMissing)
|
|
else
|
|
% switch_type_num_functors fails only for types on which
|
|
% you cannot have a complete switch, e.g. integers and strings.
|
|
SwitchCanFail = can_fail,
|
|
CasesMissing = unbounded_cases
|
|
)
|
|
).
|
|
|
|
:- pred switch_can_fail_with_bound_functors(set_tree234(cons_id)::in,
|
|
list(case)::in, can_fail::out, cases_missing::out) is det.
|
|
|
|
switch_can_fail_with_bound_functors(InstConsIds, Cases,
|
|
SwitchCanFail, CasesMissing) :-
|
|
acc_covered_functors(Cases, set_tree234.init, CoveredConsIds),
|
|
set_tree234.difference(InstConsIds, CoveredConsIds, UncoveredConsIds),
|
|
( if set_tree234.is_empty(UncoveredConsIds) then
|
|
SwitchCanFail = cannot_fail,
|
|
CasesMissing = no_cases_missing
|
|
else
|
|
SwitchCanFail = can_fail,
|
|
CasesMissing = some_cases_missing
|
|
).
|
|
|
|
% Delete from !UncoveredConsIds all cons_ids mentioned in any of the cases.
|
|
%
|
|
:- pred acc_covered_functors(list(case)::in,
|
|
set_tree234(cons_id)::in, set_tree234(cons_id)::out) is det.
|
|
|
|
acc_covered_functors([], !CoveredConsIds).
|
|
acc_covered_functors([Case | Cases], !CoveredConsIds) :-
|
|
Case = case(MainConsId, OtherConsIds, _Goal),
|
|
set_tree234.insert(MainConsId, !CoveredConsIds),
|
|
set_tree234.insert_list(OtherConsIds, !CoveredConsIds),
|
|
acc_covered_functors(Cases, !CoveredConsIds).
|
|
|
|
% Check whether a switch handles the given number of cons_ids.
|
|
%
|
|
:- pred does_switch_cover_n_cases(int::in, list(case)::in,
|
|
can_fail::out, cases_missing::out) is det.
|
|
|
|
does_switch_cover_n_cases(NumFunctors, Cases, SwitchCanFail, CasesMissing) :-
|
|
count_covered_cons_ids(Cases, 0, NumCoveredConsIds),
|
|
( if NumCoveredConsIds = NumFunctors then
|
|
SwitchCanFail = cannot_fail,
|
|
CasesMissing = no_cases_missing
|
|
else
|
|
SwitchCanFail = can_fail,
|
|
CasesMissing = some_cases_missing
|
|
).
|
|
|
|
:- pred count_covered_cons_ids(list(case)::in, int::in, int::out) is det.
|
|
|
|
count_covered_cons_ids([], !NumCoveredConsIds).
|
|
count_covered_cons_ids([Case | Cases], !NumCoveredConsIds) :-
|
|
Case = case(_MainConsId, OtherConsIds, _Goal),
|
|
!:NumCoveredConsIds = !.NumCoveredConsIds + 1 + list.length(OtherConsIds),
|
|
count_covered_cons_ids(Cases, !NumCoveredConsIds).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
|
|
record_candidate_switch(ThisCandidate, MaybeBestSoFar0, MaybeBestSoFar) :-
|
|
(
|
|
MaybeBestSoFar0 = no_candidate_switch,
|
|
MaybeBestSoFar = best_candidate_switch_so_far(ThisCandidate)
|
|
;
|
|
MaybeBestSoFar0 = best_candidate_switch_so_far(BestCandidate0),
|
|
compare(Result, ThisCandidate ^ cs_rank, BestCandidate0 ^ cs_rank),
|
|
(
|
|
( Result = (<)
|
|
; Result = (=)
|
|
),
|
|
MaybeBestSoFar = MaybeBestSoFar0
|
|
;
|
|
Result = (>),
|
|
MaybeBestSoFar = best_candidate_switch_so_far(ThisCandidate)
|
|
)
|
|
).
|
|
|
|
%---------------------------------------------------------------------------%
|
|
:- end_module check_hlds.switch_candidates.
|
|
%---------------------------------------------------------------------------%
|