Files
mercury/compiler/module_baggage.m
2025-08-02 06:19:59 +02:00

431 lines
18 KiB
Mathematica

%---------------------------------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et
%---------------------------------------------------------------------------%
% Copyright (C) 2022-2025 The Mercury team.
% This file may only be copied under the terms of the GNU General
% Public License - see the file COPYING in the Mercury distribution.
%---------------------------------------------------------------------------%
%
% File: module_baggage.m.
%
% This module contains the information we gather while reading in a module
% other than the module's parse tree.
%
%---------------------------------------------------------------------------%
:- module parse_tree.module_baggage.
:- interface.
:- import_module libs.
:- import_module libs.file_util.
:- import_module libs.globals.
:- import_module libs.timestamp.
:- import_module mdbcomp.
:- import_module mdbcomp.sym_name.
:- import_module parse_tree.error_spec.
:- import_module parse_tree.file_kind.
:- import_module parse_tree.parse_error.
:- import_module parse_tree.prog_parse_tree.
:- import_module list.
:- import_module map.
:- import_module maybe.
:- import_module set.
%---------------------------------------------------------------------------%
% The "baggage" of a module is the information the compiler may need
% to know about the process of reading in both that module, and the other
% modules needed to augment it.
%
:- type module_baggage
---> module_baggage(
% The name of the source file and directory
% containing the module source.
%
% Currently, the source_file dir field is *always* set
% to dir.this_directory, so strictly speaking, this field
% is redundant. However, there are arguments for keeping it.
%
% 1. In the future, we may want to support reading in
% source files from places other than the current dir.
%
% 2. The source_file_dir field in the module_dep_summary
% structure is set by searching for a .module_dep summary
% file in a search path, so its contents need not be
% the current directory. Keeping it here as well
% is consistent with that.
%
% 3. A typical compiler execution creates few values
% of this type, so the cost of keeping this field
% is negligible.
mb_source_file_name :: file_name,
mb_source_file_dir :: dir_name,
% The name of the top-level module in the above source file.
mb_source_file_top_module_name :: module_name,
% The other modules included in the same source file,
% if this module is the top-level module in its file.
%
% Invariant: this is top_module(...) if and only if
% source_file_name = source_file_module_name.
mb_maybe_top_module :: maybe_top_module,
% The timestamp of the source file, if requested.
mb_maybe_timestamp :: maybe(timestamp),
% If we are doing smart recompilation, we need to keep
% the timestamps of the modules read in.
mb_maybe_timestamp_map :: maybe(module_timestamp_map),
mb_grabbed_file_map :: grabbed_file_map,
% Any errors we have encountered when reading in this module,
% and their diagnostics.
mb_errors :: read_module_errors
).
% A "burdened" augmented compilation unit contains, besides the augmented
% compilation unit itself, the "baggage" of the module parse trees
% (source, interface file, and optimization file) that the augmented
% compilation unit consists of.
%
:- type burdened_aug_comp_unit
---> burdened_aug_comp_unit(
bacu_baggage :: module_baggage,
bacu_acu :: aug_compilation_unit
).
:- type burdened_module
---> burdened_module(
bm_baggage :: module_baggage,
bm_module :: parse_tree_module_src
).
%---------------------------------------------------------------------------%
% The following sections define the types holding baggage components,
% and operations on those types.
%---------------------------------------------------------------------------%
:- type maybe_top_module
---> top_module(set(module_name))
% This module is the top module in its source file,
% and the argument gives the names of all its descendants
% (i.e. its children, its children's children, and so on).
; not_top_module.
% This module is NOT the top module in its source file.
% Return the module's nested childred IF it is a top module.
% Otherwise, return the empty set or list.
%
:- func get_nested_children_of_top_module(maybe_top_module) =
set(module_name).
:- func get_nested_children_list_of_top_module(maybe_top_module) =
list(module_name).
%---------------------------------------------------------------------------%
% When doing smart recompilation, we record, for each module,
% which of its versions (source .m file, generated .int0/.int3/.int2/.int
% file, or generated .opt/.transopt file) we read, and the modification
% time of the file.
%
% We also record whether code in the source module whose compilation
% we are concerned with may refer to items in the read-in module
% using names that are only partially qualified, or not.
%
% What follows in the next two paragraphs is conjecture by wangp,
% but I (zs) agree that it is likely a correct description of
% what we use this information for.
%
% Suppose a module msrc gets added to it a :- use_module declaration
% for a module m1, when previously it did not import module m1 in any
% shape or form. If msrc compiled cleanly until now, then a non-fully-
% qualified reference (e.g. f) to a item (type, data constructor,
% predicate etc) could not have been a reference to an item in m1,
% even if m1 defines an item named f of the relevant kind.
%
% On the other hand, consider a version of the situation above
% in which msrc gains access to m1 via an :- import_module declaration.
% In that case, if m1 defines an item named f, then a non-fully-qualified
% reference to that name *could* refer to that item in m1. If msrc
% compiled cleanly before, this f must have been resolved to refer
% to an item defined in some other module, but now that msrc imports m1,
% the non-fully-qualified reference to f has become ambiguous.
% This means that msrc must be recompiled, so that the recompilation
% may detect and report any such ambiguities.
%
% XXX The handling of the map looks wrong to me (zs), because
% when we read in e.g. mod1.int, we simply overwrite any existing entry
% in the module_timestamp_map for mod1. I see no documented argument
% anywhere for any of the following propositions, which could each make
% the above the right thing to do.
%
% - Proposition 1: when we add an entry for a module, the map
% cannot contain any previous entry for that module.
% I (zs) think this is the correct answer, but have no proof.
%
% - Proposition 2a: when we add an entry for a module, the map
% *can* contain a previous entry for the module, but for
% a file of that module than contains at most as much information
% as the previously-read-in file (as e.g. a .int2 file cannot
% contain more information than a .int file for the same
% module).
%
% - Proposition 2b: when we add an entry for a module, the map
% *can* contain a previous entry for the module, the avail_kind
% field in the new entry is at least as restrictive as in
% the old entry.
%
:- type module_timestamp_map == map(module_name, module_timestamp).
:- type module_timestamp
---> module_timestamp(
mts_file_kind :: file_kind,
mts_timestamp :: timestamp,
mts_avail_kind :: recomp_avail
).
:- type recomp_avail
---> recomp_avail_src
% The module is the souurce module.
; recomp_avail_int_import
% There was an ":- import_module" in the interface section,
% or in an ancestor module. In either case, references to
% items defined by the read-in module may be made anywhere
% in the source module.
; recomp_avail_imp_import
% There was an ":- import_module" in the implementation section,
% or in an optimization file. In either case, references to
% items defined by the read-in module may be made only in
% in the implementation section of the source module.
; recomp_avail_int_use
% There was an ":- use_module" in the interface section,
% or in an ancestor module. In either case, references to
% items defined by the read-in module may be made anywhere
% in the source module.
; recomp_avail_imp_use
% There was an ":- use_module" in the implementation section,
% or in an optimization file. In either case, references to
% items defined by the read-in module may be made only in
% in the implementation section of the source module.
; recomp_avail_int_use_imp_import.
% There was an ":- use_module" in the interface section
% and an ":- import_module" in the implementation section.
%---------------------------------------------------------------------------%
:- type grabbed_file
---> gf_src(parse_tree_module_src)
; gf_int0(parse_tree_int0, read_why_int0)
; gf_int1(parse_tree_int1, read_why_int1)
; gf_int2(parse_tree_int2, read_why_int2)
; gf_int3(parse_tree_int3, read_why_int3).
% This maps each module to the interface file (or in one case,
% the source file) that contains the most information about the module.
% So for example, when compiling module A which imports module B,
% and module B also imports module A (a circular dependency), then
% the presence of a gf_src entry for module A will tell us that there is
% no point in reading in A.int2.
%
:- type grabbed_file_map == map(module_name, grabbed_file).
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
% This predicate is used by
%
% deps_map.m
% d_file_deps.m
% make.get_module_dep_info.m
%
% for building dependency maps between modules.
%
:- pred parse_tree_src_to_burdened_module_list(globals::in, file_name::in,
read_module_errors::in, maybe(timestamp)::in, parse_tree_src::in,
list(error_spec)::out, list(burdened_module)::out) is det.
%---------------------------------------------------------------------------%
%
% Predicates for getting information from parse_tree_module_srcs.
%
:- pred parse_tree_module_src_get_int_imp_deps(parse_tree_module_src::in,
set(module_name)::out, set(module_name)::out) is det.
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
:- implementation.
:- import_module parse_tree.item_util.
:- import_module parse_tree.prog_data.
:- import_module parse_tree.prog_data_foreign.
:- import_module parse_tree.prog_item.
:- import_module parse_tree.split_parse_tree_src.
:- import_module cord.
:- import_module dir.
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
get_nested_children_of_top_module(MaybeTopModule) = Modules :-
(
MaybeTopModule = top_module(Modules)
;
MaybeTopModule = not_top_module,
set.init(Modules)
).
get_nested_children_list_of_top_module(MaybeTopModule) = Modules :-
(
MaybeTopModule = top_module(ModulesSet),
Modules = set.to_sorted_list(ModulesSet)
;
MaybeTopModule = not_top_module,
Modules = []
).
%---------------------------------------------------------------------------%
parse_tree_src_to_burdened_module_list(Globals, SourceFileName,
ReadModuleErrors, MaybeTimestamp, ParseTreeSrc,
!:Specs, BurdenedModules) :-
!:Specs = get_read_module_specs(ReadModuleErrors),
split_into_component_modules_perform_checks(Globals, ParseTreeSrc,
ParseTreeModuleSrcs, !Specs),
ParseTreeSrc = parse_tree_src(TopModuleName, _, _),
AllModuleNames = set.list_to_set(
list.map(parse_tree_module_src_project_name, ParseTreeModuleSrcs)),
% Including all the error_specs in ReadModuleErrors in *all* of the
% modules in ParseTreeSrc would needlessly duplicate them in situations
% in which ParseTreeSrc contains more than one module. We could avoid
% this duplication by adding the error_specs only to the baggage
% of the top-level module, or by adding the error_specs to the baggage
% of none of the modules, returning them as a separate output argument.
% We do the latter, both because it is simpler, and because (with our
% current design) there is no way to tell which submodule any error_spec
% actually complains about.
%
% We do duplicate the sets of error kinds, both fatal and nonfatal,
% in ReadModuleErrors, once for each module in ParseTreeSrc.
% This is an overestimation, since it associates each error kind
% not just with the (sub)module in which it occurred, but with
% all (sub)modules. However, (a) this is the best we can do without
% recording error kinds per submodule, not per file, and (b) this
% overestimation does not prevent us from doing anything we want to do,
% or are even are likely to want to do in the foreseeable future.
%
% XXX Unfortunately, nuking the error_specs while keeping the
% fatal error indications violates an invariant that the
% do_we_have_a_valid_module_dep predicate in write_deps_file.m
% depends on, so this code is commented out. This is the behavior
% preferred by all callers of this predicate.
% ReadModuleErrors =
% read_module_errors(FatalErrors, _, NonFatalErrors, _, _),
% ReadModuleErrorsToUse =
% read_module_errors(FatalErrors, [], NonFatalErrors, [], [])
list.map(
maybe_nested_init_burdened_module(SourceFileName,
TopModuleName, AllModuleNames,
ReadModuleErrors, MaybeTimestamp),
ParseTreeModuleSrcs, BurdenedModules).
:- pred maybe_nested_init_burdened_module(file_name::in,
module_name::in, set(module_name)::in, read_module_errors::in,
maybe(timestamp)::in, parse_tree_module_src::in, burdened_module::out)
is det.
maybe_nested_init_burdened_module(SourceFileName,
SourceFileModuleName, AllModuleNames, ReadModuleErrors, MaybeTimestamp,
ParseTreeModuleSrc, BurdenedModule) :-
ModuleName = ParseTreeModuleSrc ^ ptms_module_name,
( if ModuleName = SourceFileModuleName then
set.delete(ModuleName, AllModuleNames, NestedModuleNames),
MaybeTopModule = top_module(NestedModuleNames)
else
MaybeTopModule = not_top_module
),
MaybeTimestampMap = maybe.no,
GrabbedFileMap = map.singleton(ModuleName, gf_src(ParseTreeModuleSrc)),
Baggage = module_baggage(SourceFileName, dir.this_directory,
SourceFileModuleName, MaybeTopModule,
MaybeTimestamp, MaybeTimestampMap, GrabbedFileMap, ReadModuleErrors),
BurdenedModule = burdened_module(Baggage, ParseTreeModuleSrc).
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
parse_tree_module_src_get_int_imp_deps(ParseTreeModuleSrc, IntDeps, ImpDeps) :-
ImportUseMap = ParseTreeModuleSrc ^ ptms_import_use_map,
map.foldl2(add_module_dep, ImportUseMap,
set.init, IntDeps, set.init, ImpDeps).
:- pred add_module_dep(module_name::in, maybe_implicit_import_and_or_use::in,
set(module_name)::in, set(module_name)::out,
set(module_name)::in, set(module_name)::out) is det.
add_module_dep(ModuleName, MaybeImplicit, !IntDeps, !ImpDeps) :-
(
MaybeImplicit = explicit_avail(SectionImportUse),
Section = section_import_and_or_use_int_imp(SectionImportUse)
;
MaybeImplicit = implicit_avail(Implicit, MaybeExplicit),
(
( Implicit = implicit_int_import
; Implicit = implicit_int_use
),
ImplicitSection = ms_interface
;
Implicit = implicit_imp_use,
ImplicitSection = ms_implementation
),
(
MaybeExplicit = no,
Section = ImplicitSection
;
MaybeExplicit = yes(SectionImportUse),
ExplicitSection =
section_import_and_or_use_int_imp(SectionImportUse),
( if
( ImplicitSection = ms_interface
; ExplicitSection = ms_interface
)
then
Section = ms_interface
else
Section = ms_implementation
)
)
),
(
Section = ms_interface,
set.insert(ModuleName, !IntDeps)
;
Section = ms_implementation,
set.insert(ModuleName, !ImpDeps)
).
:- func section_import_and_or_use_int_imp(section_import_and_or_use)
= module_section.
section_import_and_or_use_int_imp(SectionImportUse) = Section :-
(
( SectionImportUse = int_import(_)
; SectionImportUse = int_use(_)
; SectionImportUse = int_use_imp_import(_, _)
),
Section = ms_interface
;
( SectionImportUse = imp_import(_)
; SectionImportUse = imp_use(_)
),
Section = ms_implementation
).
%---------------------------------------------------------------------------%
:- end_module parse_tree.module_baggage.
%---------------------------------------------------------------------------%