mercury/compiler/frameopt.m

%-----------------------------------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et
%-----------------------------------------------------------------------------%
% Copyright (C) 1994-2001,2003-2012 The University of Melbourne.
% This file may only be copied under the terms of the GNU General
% Public License - see the file COPYING in the Mercury distribution.
%-----------------------------------------------------------------------------%
%
% File: frameopt.m.
% Main author: zs.
%
% This module performs two kinds of transformation to optimize code that
% manipulates detstack frames.
%
% The first and more common transformation transforms code such as
%
%   proc_entry:
%       incr_sp(N)
%       stackvar(N) = succip
%       if (cond) goto L1
%       ...
%   L1: finalization
%       succip = stackvar(N)
%       decr_sp(N)
%       proceed
%
% into
%
%   proc_entry:
%       if (cond) goto L1
%       incr_sp(N)
%       stackvar(N) = succip
%       ...
%       finalization
%       succip = stackvar(N)
%       decr_sp(N)
%       proceed
%   L1: finalization
%       proceed
%
% The advantage is that we don't set up the stack frame unless we need it.
%
% The actual optimization is more complex than this, because we want to delay
% the construction of the stack frame across more than one jump, if this is
% possible.
%
% The second transformation transforms code such as
%
%   proc_entry:
%       incr_sp(N)
%       stackvar(N) = succip
%       ...
%       finalization
%       succip = stackvar(N)
%       decr_sp(N)
%       goto proc_entry
%
% into
%
%   proc_entry:
%       incr_sp(N)
%       stackvar(N) = succip
%   L1:
%       ...
%       succip = stackvar(N)
%       finalization
%       goto L1
%
% The advantage is that we don't destroy the stack frame just to set it up
% again. The restore of succip can be omitted if the procedure makes no calls,
% since in that case succip must still contain the value it had when this
% procedure was called.
%
% Since the second transformation is a bigger win, we prefer to use it
% whenever both transformations are possible.
%
% For predicates that live on the nondet stack, we attempt to perform
% only the second transformation.
%
% NOTE: This module cannot handle code sequences of the form
%
%   label1:
%   label2:
%
% Jump optimization converts any gotos to label1 to gotos to label2, making
% label1 unused, and label optimization removes unused labels. (This works
% for any number of labels in a sequence, not just two.) Therefore the
% handle_options module turns on the jump and label optimizations whenever
% frame optimization is turned on.
%
%-----------------------------------------------------------------------------%

:- module ll_backend.frameopt.
:- interface.

:- import_module ll_backend.llds.
:- import_module mdbcomp.prim_data.

:- import_module bool.
:- import_module counter.
:- import_module list.
:- import_module set_tree234.

%-----------------------------------------------------------------------------%

    % frameopt_main_det_stack(ProcLabel, !LabelCounter, !Instrs, AddComments,
    %   AnyChange):
    %
    % Attempt to update !Instrs using the one of the transformations
    % described above for procedures that live on the det stack.
    %
    % ProcLabel should be the ProcLabel of the procedure whose body !.Instrs
    % implements, and !.LabelCounter that procedure's label counter.
    % If frameopt_main allocates any labels, !:LabelCounter will reflect this.
    %
    % AnyChange says whether we performed any modifications.
    % If yes, then we also introduced some extra labels that should be
    % deleted and probably some jumps that could be profitably be
    % short-circuited.
    %
:- pred frameopt_main_det_stack(proc_label::in, counter::in, counter::out,
    list(instruction)::in, list(instruction)::out, bool::in, bool::out)
    is det.

    % frameopt_main_nondet_stack(ProcLabel, !LabelCounter, !Instrs,
    %   AddComments, AnyChange):
    %
    % The equivalent of frameopt_main_det_stack for procedures that live on
    % the nondet stack, but attempting only the transformation that delays
    % the creation of stack frames.
    %
:- pred frameopt_main_nondet_stack(proc_label::in, counter::in, counter::out,
    list(instruction)::in, list(instruction)::out, bool::in, bool::out)
    is det.

    % frameopt_keep_nondet_frame(ProcLabel, LayoutLabels,
    %   !LabelCounter, !Instrs, AnyChange):
    %
    % The equivalent of frameopt_main_det_stack for procedures that live on
    % the nondet stack, but attempting only the transformation that keeps
    % existing stack frames at tail calls. This should be called before
    % other LLDS optimizations on the procedure body.
    %
    % LayoutLabels should be the set of labels in the procedure with layout
    % structures, while MayAlterRtti should say whether we are allowed to
    % perform optimizations that may interfere with RTTI.
    %
:- pred frameopt_keep_nondet_frame(proc_label::in, set_tree234(label)::in,
    counter::in, counter::out, list(instruction)::in, list(instruction)::out,
    bool::out) is det.

%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%

:- implementation.

:- import_module ll_backend.opt_debug.
:- import_module ll_backend.opt_util.
:- import_module parse_tree.prog_data.

:- import_module assoc_list.
:- import_module int.
:- import_module map.
:- import_module maybe.
:- import_module pair.
:- import_module queue.
:- import_module require.
:- import_module set.
:- import_module string.

%-----------------------------------------------------------------------------%

frameopt_main_det_stack(ProcLabel, !C, Instrs0, Instrs, AddComments, Mod) :-
    opt_util.get_prologue(Instrs0, LabelInstr, Comments0, Instrs1),
    ( detect_det_entry(Instrs1, _, _, EntryInfo) ->
        some [!BlockMap] (
            map.init(!:BlockMap),
            divide_into_basic_blocks([LabelInstr | Instrs1], ProcLabel,
                BasicInstrs, !C),
            build_frame_block_map(BasicInstrs, EntryInfo, LabelSeq0, no, no,
                ProcLabel, !BlockMap, map.init, PredMap, !C,
                map.init, PreExitDummyLabelMap),
            analyze_block_map(LabelSeq0, PreExitDummyLabelMap, !BlockMap,
                KeepFrame),
            (
                KeepFrame = yes(FirstLabel - SecondLabel),
                CanClobberSuccip = can_clobber_succip(LabelSeq0, !.BlockMap),
                keep_frame_transform(LabelSeq0, FirstLabel, SecondLabel,
                    CanClobberSuccip, !BlockMap),
                LabelSeq = LabelSeq0,
                NewComment = llds_instr(comment("keeping stack frame"), ""),
                Comments = Comments0 ++ [NewComment],
                flatten_block_seq(LabelSeq, !.BlockMap, BodyInstrs),
                Instrs = Comments ++ BodyInstrs,
                Mod = yes
            ;
                KeepFrame = no,
                ( can_delay_frame(LabelSeq0, !.BlockMap) ->
                    delay_frame_transform(LabelSeq0, LabelSeq, EntryInfo,
                        ProcLabel, PredMap, !C, !BlockMap, AddComments,
                        TransformComments, DescComments, CanTransform),
                    (
                        CanTransform = can_transform,
                        Comments = Comments0 ++ TransformComments
                            ++ DescComments,
                        flatten_block_seq(LabelSeq, !.BlockMap, BodyInstrs),
                        Instrs = Comments ++ BodyInstrs,
                        Mod = yes
                    ;
                        CanTransform = cannot_transform,
                        maybe_add_comments(AddComments, DescComments,
                            Instrs0, Instrs, Mod)
                    )
                ;
                    Instrs = Instrs0,
                    Mod = no
                )
            )
        )
    ;
        Instrs = Instrs0,
        Mod = no
    ).

frameopt_main_nondet_stack(ProcLabel, !C, Instrs0, Instrs, AddComments, Mod) :-
    opt_util.get_prologue(Instrs0, LabelInstr, Comments0, Instrs1),
    ( detect_nondet_entry(Instrs1, _, _, EntryInfo) ->
        some [!BlockMap] (
            map.init(!:BlockMap),
            divide_into_basic_blocks([LabelInstr | Instrs1], ProcLabel,
                BasicInstrs, !C),
            build_frame_block_map(BasicInstrs, EntryInfo, LabelSeq0, no, no,
                ProcLabel, !BlockMap, map.init, PredMap, !C,
                map.init, PreExitDummyLabelMap),
            analyze_block_map(LabelSeq0, PreExitDummyLabelMap, !BlockMap,
                _KeepFrame),
            ( can_delay_frame(LabelSeq0, !.BlockMap) ->
                delay_frame_transform(LabelSeq0, LabelSeq, EntryInfo,
                    ProcLabel, PredMap, !C, !BlockMap, AddComments,
                    TransformComments, DescComments, CanTransform),
                (
                    CanTransform = can_transform,
                    Comments = Comments0 ++ TransformComments ++ DescComments,
                    flatten_block_seq(LabelSeq, !.BlockMap, BodyInstrs),
                    Instrs = Comments ++ BodyInstrs,
                    Mod = yes
                ;
                    CanTransform = cannot_transform,
                    maybe_add_comments(AddComments, DescComments,
                        Instrs0, Instrs, Mod)
                )
            ;
                Instrs = Instrs0,
                Mod = no
            )
        )
    ;
        Instrs = Instrs0,
        Mod = no
    ).

:- pred maybe_add_comments(bool::in, list(instruction)::in,
    list(instruction)::in, list(instruction)::out, bool::out) is det.

maybe_add_comments(Comments, DescComments, Instrs0, Instrs, Mod) :-
    (
        Comments = no,
        Instrs = Instrs0,
        Mod = no
    ;
        Comments = yes,
        Instrs =
            [llds_instr(comment("could not delay frame creation"), "")]
            ++ DescComments ++ Instrs0,
        Mod = yes
    ).

%-----------------------------------------------------------------------------%

frameopt_keep_nondet_frame(ProcLabel, LayoutLabels, !C, Instrs0, Instrs,
        Mod) :-
    opt_util.get_prologue(Instrs0, LabelInstr, Comments0, Instrs1),
    (
        nondetstack_setup(Instrs1, FrameInfo, Redoip, MkframeInstr, Remain),
        MkframeInstr = llds_instr(MkframeUinstr, MkframeComment),
        find_succeed_labels(Instrs1, map.init, SuccMap),
        counter.allocate(KeepFrameLabelNum, !C),
        KeepFrameLabel = internal_label(KeepFrameLabelNum, ProcLabel),
        keep_nondet_frame(Remain, Instrs2, ProcLabel, KeepFrameLabel,
            MkframeUinstr, SuccMap, LayoutLabels, no, yes)
    ->
        list.condense([[LabelInstr], Comments0,
            [llds_instr(mkframe(FrameInfo, no), MkframeComment),
            llds_instr(label(KeepFrameLabel),
                "tail recursion target, nofulljump"),
            llds_instr(assign(redoip_slot(lval(curfr)),
                const(llconst_code_addr(Redoip))), "")],
            Instrs2], Instrs),
        Mod = yes
    ;
        Instrs = Instrs0,
        Mod = no
    ).

:- pred nondetstack_setup(list(instruction)::in, nondet_frame_info::out,
    code_addr::out, instruction::out, list(instruction)::out) is semidet.

nondetstack_setup(Instrs0, FrameInfo, Redoip, MkframeInstr, Remain) :-
    Instrs0 = [MkframeInstr | Remain],
    MkframeInstr = llds_instr(mkframe(FrameInfo, yes(Redoip)), _),
    FrameInfo = ordinary_frame(_, _).

:- pred find_succeed_labels(list(instruction)::in, tailmap::in, tailmap::out)
    is det.

find_succeed_labels([], !SuccMap).
find_succeed_labels([Instr | Instrs], !SuccMap) :-
    Instr = llds_instr(Uinstr, _),
    (
        Uinstr = label(Label),
        opt_util.skip_comments(Instrs, TailInstrs),
        opt_util.is_succeed_next(TailInstrs, Between)
    ->
        map.det_insert(Label, Between, !SuccMap)
    ;
        true
    ),
    find_succeed_labels(Instrs, !SuccMap).

:- pred keep_nondet_frame(list(instruction)::in, list(instruction)::out,
    proc_label::in, label::in, instr::in, tailmap::in, set_tree234(label)::in,
    bool::in, bool::out) is det.

keep_nondet_frame([], [], _, _, _, _, _, !Changed).
keep_nondet_frame([Instr0 | Instrs0], Instrs, ProcLabel, KeepFrameLabel,
        PrevInstr, SuccMap, LayoutLabels, !Changed) :-
    Instr0 = llds_instr(Uinstr0, Comment),
    (
        % Look for nondet style tailcalls which do not need
        % a runtime check.
        Uinstr0 = llcall(code_label(entry_label(_, ProcLabel)),
            code_label(RetLabel), _, _, _, CallModel),
        CallModel = call_model_nondet(unchecked_tail_call),
        map.search(SuccMap, RetLabel, BetweenIncl),
        BetweenIncl = [llds_instr(livevals(_), _), llds_instr(goto(_), _)],
        PrevInstr = livevals(Livevals),
        not set_tree234.member(RetLabel, LayoutLabels)
    ->
        keep_nondet_frame(Instrs0, Instrs1, ProcLabel, KeepFrameLabel,
            Uinstr0, SuccMap, LayoutLabels, !.Changed, _),
        !:Changed = yes,
        NewComment = Comment ++ " (nondet tailcall)",
        Instrs = [
            llds_instr(livevals(Livevals), ""),
            llds_instr(goto(code_label(KeepFrameLabel)), NewComment)
            | Instrs1
        ]
    ;
        keep_nondet_frame(Instrs0, Instrs1, ProcLabel, KeepFrameLabel,
            Uinstr0, SuccMap, LayoutLabels, !Changed),
        Instrs = [Instr0 | Instrs1]
    ).

%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%

:- type frame_block_map(En, Ex)   ==  map(label, frame_block_info(En, Ex)).

:- type det_frame_block_map == frame_block_map(det_entry_info, det_exit_info).

:- type frame_block_info(En, Ex)
    --->    frame_block_info(
                fb_label        :: label,
                                % The label of the first instr.

                fb_instrs       :: list(instruction),
                                % The code of the block.

                fb_fallen_into  :: maybe(label),
                                % Does the previous block (if any)
                                % fall through to this block, and if yes,
                                % what is its label?

                fb_jump_dests   :: list(label),
                                % The labels we can jump to
                                % (not falling through).

                fb_fall_dest    :: maybe(label),
                                % The label we fall through to
                                % (if there is one).

                fb_type         :: block_type(En, Ex)
            ).

:- type block_type(EntryInfo, ExitInfo)
    --->    entry_block(EntryInfo)
    ;       ordinary_block(block_needs_frame, maybe_dummy)
    ;       exit_block(ExitInfo).

:- type maybe_dummy
    --->    is_not_dummy
    ;       is_post_entry_dummy
    ;       is_pre_exit_dummy.

    % Until 30 July 2010, we used to keep a set of reasons *why* a block
    % needs a frame. If that info can be useful to you, revert that diff
    % in your workspace.
:- type block_needs_frame
    --->    block_needs_frame
    ;       block_doesnt_need_frame.

:- type det_entry_info
    --->    det_entry(
                int,            % The frame size.
                string,         % The msg of the incr_sp instruction.
                stack_incr_kind
            ).

:- type det_exit_info
    --->    det_exit(           % This block contains det stack teardown
                                % and goto code.
                list(instruction),
                                % The instr that restores succip (if any).
                list(instruction),
                                % The livevals instr before the goto (if any).
                instruction
                                % The goto instr.
            ).

:- type nondet_entry_info
    --->    nondet_entry(
                string,         % The msg of the mkframe instruction.
                int,            % The frame size.
                code_addr       % The initial redoip.
            ).

:- type nondet_exit_info
    --->    nondet_plain_exit(  % This block contains nondet stack exit code
                                % that doesn't throw away the stack frame.
                list(instruction),
                                % The livevals instr before the goto (if any).
                instruction
                                % The goto(do_succeed) instr.
            )
    ;       nondet_teardown_exit(
                                % This block contains nondet stack exit code
                                % that *does* throw away the stack frame.
                instruction,    % The restore of the succip.
                instruction,    % The restore of the maxfr.
                instruction,    % The restore of the curfr.
                list(instruction),
                                % The livevals instr before the goto (if any).
                instruction
                                % The goto(entry(_, _)) instr.
            ).

:- typeclass block_entry_exit(En, Ex) <= ((En -> Ex), (Ex -> En)) where [
    pred detect_entry(list(instruction)::in, list(instruction)::out,
        list(instruction)::out, En::out) is semidet,
    pred detect_exit(list(instruction)::in, En::in, list(instruction)::out,
        list(instruction)::out, list(instruction)::out, Ex::out) is semidet,
    func late_setup_code(En) = list(instruction),
    func non_teardown_exit_code(Ex) = list(instruction),
    func describe_entry(En) = string,
    func describe_exit(maybe(proc_label), Ex) = string
].

:- instance block_entry_exit(det_entry_info, det_exit_info) where [
    pred(detect_entry/4) is detect_det_entry,
    pred(detect_exit/6)  is detect_det_exit,
    func(late_setup_code/1) is det_late_setup,
    func(non_teardown_exit_code/1) is det_non_teardown_exit_code,
    func(describe_entry/1) is describe_det_entry,
    func(describe_exit/2)  is describe_det_exit
].

:- instance block_entry_exit(nondet_entry_info, nondet_exit_info) where [
    pred(detect_entry/4) is detect_nondet_entry,
    pred(detect_exit/6)  is detect_nondet_exit,
    func(late_setup_code/1) is nondet_late_setup,
    func(non_teardown_exit_code/1) is nondet_non_teardown_exit_code,
    func(describe_entry/1) is describe_nondet_entry,
    func(describe_exit/2)  is describe_nondet_exit
].

%-----------------------------------------------------------------------------%

    % Add labels to the given instruction sequence so that
    % every basic block has labels around it.
    %
:- pred divide_into_basic_blocks(list(instruction)::in, proc_label::in,
    list(instruction)::out, counter::in, counter::out) is det.

divide_into_basic_blocks([], _, [], !C).
    % Control can fall off the end of a procedure if that procedure
    % ends with a call to another procedure that cannot succeed.
    % This is the only situation in which the base case can be reached.
divide_into_basic_blocks([Instr0 | Instrs0], ProcLabel, Instrs, !C) :-
    Instr0 = llds_instr(Uinstr0, _Comment),
    ( opt_util.can_instr_branch_away(Uinstr0) = yes ->
        (
            Instrs0 = [Instr1 | _],
            ( Instr1 = llds_instr(label(_), _) ->
                divide_into_basic_blocks(Instrs0, ProcLabel, Instrs1, !C),
                Instrs = [Instr0 | Instrs1]
            ;
                counter.allocate(N, !C),
                NewLabel = internal_label(N, ProcLabel),
                NewInstr = llds_instr(label(NewLabel), ""),
                divide_into_basic_blocks(Instrs0, ProcLabel, Instrs1, !C),
                Instrs = [Instr0, NewInstr | Instrs1]
            )
        ;
            Instrs0 = [],
            Instrs = [Instr0]
        )
    ;
        divide_into_basic_blocks(Instrs0, ProcLabel, Instrs1, !C),
        Instrs = [Instr0 | Instrs1]
    ).

:- pred flatten_block_seq(list(label)::in, frame_block_map(En, Ex)::in,
    list(instruction)::out) is det.

flatten_block_seq([], _, []).
flatten_block_seq([Label | Labels], BlockMap, Instrs) :-
    flatten_block_seq(Labels, BlockMap, RestInstrs),
    map.lookup(BlockMap, Label, BlockInfo),
    BlockInstrs = BlockInfo ^ fb_instrs,
    (
        list.split_last(BlockInstrs, MostInstrs, LastInstr),
        Labels = [NextLabel | _],
        LastInstr = llds_instr(goto(code_label(NextLabel)), _)
    ->
        % Optimize away the redundant goto, which we probably introduced.
        % The next invocation of jumpopt would also do this, but doing it here
        % is cheaper and may let us reach a fixpoint in the optimization
        % sequence earlier.
        Instrs = MostInstrs ++ RestInstrs
    ;
        Instrs = BlockInstrs ++ RestInstrs
    ).

%-----------------------------------------------------------------------------%

    % This type is explained in the big comment in the body of
    % build_frame_block_map below.
:- type pre_exit_dummy_label_map == map(label, label).

    % Given an instruction list in which labels mark the start of every
    % basic block, divide it up into basic blocks of one of three types:
    %
    % - entry blocks, blocks that contain only stack setup instructions
    %   (incr_sp and assignment of succip to the bottom stack slot);
    %
    % - ordinary blocks that contain neither a stack setup nor a
    %   stack teardown;
    %
    % - exit blocks that remove an existing stack frame.
    %
    % For such each block, create a frame_block_info structure that gives the
    % label starting the block, the instructions in the block, and its
    % type. Two of the fields of the frame_block_info structure are filled in
    % with dummy values; they will be filled in for real later.
    %
    % Put these frame_block_info structures into a table indexed by the label,
    % and return the sequence of labels of the blocks in their original
    % order.
    %
:- pred build_frame_block_map(list(instruction)::in, En::in,
    list(label)::out, maybe(label)::in, maybe(label)::in, proc_label::in,
    frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out,
    pred_map::in, pred_map::out, counter::in, counter::out,
    pre_exit_dummy_label_map::in, pre_exit_dummy_label_map::out) is det
    <= block_entry_exit(En, Ex).

build_frame_block_map([], _, [], _, _, _, !BlockMap, !PredMap, !C,
        !PreExitDummyLabelMap).
build_frame_block_map([Instr0 | Instrs0], EntryInfo, LabelSeq,
        MaybePrevLabel, FallInto, ProcLabel, !BlockMap, !PredMap, !C,
        !PreExitDummyLabelMap) :-
    ( Instr0 = llds_instr(label(Label), _) ->
        (
            MaybePrevLabel = yes(PrevLabel),
            map.det_insert(Label, PrevLabel, !PredMap)
        ;
            MaybePrevLabel = no
        ),
        (
            detect_entry(Instrs0, EntryInstrs, Instrs1, EntryInfo)
        ->
            % Create a block with just the entry instructions in it,
            % followed by an empty block. The reason why we need the empty
            % block is that process_frame_delay below doesn't handle any
            % transition from entry blocks directly to exit blocks.
            % Fixing that would be complicated; this fix is simpler.
            %
            % We would like to put EmptyLabel *after* FallThroughLabel
            % to make the next invocation of labelopt eliminate EmptyLabel
            % rather than FallThroughLabel, but doing that would require
            % updating LabelSeq and BlockMap.

            counter.allocate(EmptyN, !C),
            EmptyLabel = internal_label(EmptyN, ProcLabel),

            % The fb_jump_dests and fb_fall_dest fields are only dummies.
            FallThroughToEmptyInstr =
                llds_instr(goto(code_label(EmptyLabel)), "fall through"),
            BlockInfo = frame_block_info(Label,
                [Instr0 | EntryInstrs] ++ [FallThroughToEmptyInstr],
                FallInto, [], no, entry_block(EntryInfo)),

            % Ensure that the left over, non-entry part of the original block
            % starts with a label, and that the empty block ends with a goto
            % that falls through to this label.
            (
                Instrs1 = [Instr1 | _],
                Instr1 = llds_instr(label(NextLabelPrime), _)
            ->
                NextLabel = NextLabelPrime,
                Instrs2 = Instrs1
            ;
                counter.allocate(N, !C),
                NextLabel = internal_label(N, ProcLabel),
                NextLabelInstr = llds_instr(label(NextLabel), ""),
                Instrs2 = [NextLabelInstr | Instrs1]
            ),

            EmptyLabelInstr = llds_instr(label(EmptyLabel), ""),
            FallThroughFromEmptyInstr =
                llds_instr(goto(code_label(NextLabel)), "fall through"),
            % The fb_jump_dests and fb_fall_dest fields are only dummies.
            EmptyBlockType = ordinary_block(block_doesnt_need_frame,
                is_post_entry_dummy),
            EmptyBlockInfo = frame_block_info(EmptyLabel,
                [EmptyLabelInstr, FallThroughFromEmptyInstr],
                yes(Label), [], no, EmptyBlockType),

            build_frame_block_map(Instrs2, EntryInfo, LabelSeq0,
                yes(EmptyLabel), yes(EmptyLabel), ProcLabel, !BlockMap,
                !PredMap, !C, !PreExitDummyLabelMap),
            map.det_insert(Label, BlockInfo, !BlockMap),
            map.det_insert(EmptyLabel, EmptyBlockInfo, !BlockMap),
            LabelSeq = [Label, EmptyLabel | LabelSeq0]
        ;
            detect_exit(Instrs0, EntryInfo, Extra, ExitInstrs,
                Remain, ExitInfo)
        ->
            % We always insert an ordinary block before the exit block,
            % because doing otherwise could lead to a violation of our
            % invariant that exit blocks never *need* a stack frame
            % in cases where the redoip of a nondet frame is assigned
            % the label of the exit block. By inserting a dummy block
            % before the exit block if necessary, we ensure that the redoip
            % points not to the exit block but to the ordinary block
            % preceding it.
            %
            % However, having other blocks jump to a pre_exit dummy block
            % instead of the exit block loses opportunities for optimization.
            % This is because we may (and typically will) fall into the
            % pre_exit dummy block from an ordinary block that needs a stack
            % frame, and this requirement is propagated to the pre_exit dummy
            % block by propagate_frame_requirement_to_successors, which will
            % cause propagate_frame_requirement_to_predecessors to propagate
            % that requirement to blocks that don't actually need a stack
            % frame. This is why in the is_pre_exit_dummy case below we record
            % the mapping from the label of the pre_exit dummy block to the
            % label of the exit block, so that we can alter instructions
            % that jump to the first label to proceed to the second. Since
            % there is no code between those two labels (except a dummy goto
            % instruction that implements the fallthrough), this is safe.
            % To avoid violating the invariant mentioned at the top of this
            % comment, we don't substitute labels used as code addresses
            % in assignments to redoip slots.
            counter.allocate(N, !C),
            ExitLabel = internal_label(N, ProcLabel),

            compute_block_needs_frame(Label, Extra, NeedsFrame),
            FallThroughInstr =
                llds_instr(goto(code_label(ExitLabel)), "fall through"),
            % The fb_jump_dests and fb_fall_dest fields are only dummies.
            (
                Extra = [],
                expect(unify(NeedsFrame, block_doesnt_need_frame),
                    $module, $pred, "[] needs frame"),
                map.det_insert(Label, ExitLabel, !PreExitDummyLabelMap),
                ExtraBlockType = ordinary_block(NeedsFrame, is_pre_exit_dummy)
            ;
                Extra = [_ | _],
                ExtraBlockType = ordinary_block(NeedsFrame, is_not_dummy)
            ),
            ExtraInstrs = [Instr0 | Extra] ++ [FallThroughInstr],
            ExtraInfo = frame_block_info(Label, ExtraInstrs, FallInto,
                [], no, ExtraBlockType),

            ExitLabelInstr = llds_instr(label(ExitLabel), ""),
            LabelledBlock = [ExitLabelInstr | ExitInstrs],
            % The fb_jump_dests and fb_fall_dest fields are only dummies.
            ExitBlockInfo = frame_block_info(ExitLabel, LabelledBlock,
                yes(Label), [], no, exit_block(ExitInfo)),
            map.det_insert(ExitLabel, Label, !PredMap),

            build_frame_block_map(Remain, EntryInfo, LabelSeq0, yes(ExitLabel),
                no, ProcLabel, !BlockMap, !PredMap, !C, !PreExitDummyLabelMap),

            map.det_insert(ExitLabel, ExitBlockInfo, !BlockMap),
            map.det_insert(Label, ExtraInfo, !BlockMap),
            LabelSeq = [Label, ExitLabel | LabelSeq0]
        ;
            opt_util.skip_to_next_label(Instrs0, Block, Instrs1),
            compute_block_needs_frame(Label, Block, NeedsFrame),
            BlockInstrs = [Instr0 | Block],
            % The fb_jump_dests and fb_fall_dest fields are only dummies.
            BlockInfo = frame_block_info(Label, BlockInstrs, FallInto,
                [], no, ordinary_block(NeedsFrame, is_not_dummy)),
            ( list.last(BlockInstrs, LastBlockInstr) ->
                LastBlockInstr = llds_instr(LastBlockUinstr, _),
                NextFallIntoBool =
                    opt_util.can_instr_fall_through(LastBlockUinstr),
                (
                    NextFallIntoBool = yes,
                    NextFallInto = yes(Label)
                ;
                    NextFallIntoBool = no,
                    NextFallInto = no
                )
            ;
                NextFallInto = yes(Label)
            ),
            build_frame_block_map(Instrs1, EntryInfo, LabelSeq0, yes(Label),
                NextFallInto, ProcLabel, !BlockMap, !PredMap, !C,
                !PreExitDummyLabelMap),
            map.det_insert(Label, BlockInfo, !BlockMap),
            LabelSeq = [Label | LabelSeq0]
        )
    ;
        unexpected($module, $pred, "block does not start with label")
    ).

%-----------------------------------------------------------------------------%

    % Does the given code start with a setup of a det stack frame? If yes,
    % return the size of the frame and two instruction sequences,
    % Setup and Others ++ Remain. Setup is the instruction sequence
    % that sets up the det stack frame, Others is a sequence of
    % non-interfering instructions that were interspersed with Setup
    % but can be moved after Setup, and Remain is all remaining instructions.
    %
:- pred detect_det_entry(list(instruction)::in, list(instruction)::out,
    list(instruction)::out, det_entry_info::out) is semidet.

detect_det_entry(Instrs0, Setup, Others ++ Remain, EntryInfo) :-
    opt_util.gather_comments(Instrs0, Others0, Instrs1),
    Instrs1 = [SetupInstr1 | Instrs2],
    SetupInstr1 = llds_instr(incr_sp(FrameSize, Msg, Kind), _),
    detstack_setup(Instrs2, FrameSize, SetupInstr2, Others0, Others, Remain),
    Setup = [SetupInstr1, SetupInstr2],
    EntryInfo = det_entry(FrameSize, Msg, Kind).

:- pred detstack_setup(list(instruction)::in, int::in, instruction::out,
    list(instruction)::in, list(instruction)::out, list(instruction)::out)
    is semidet.

detstack_setup([Instr0 | Instrs0], FrameSize, Setup, !Others, Remain) :-
    Instr0 = llds_instr(Uinstr0, _),
    ( Uinstr0 = assign(Lval, Rval) ->
        (
            Lval = stackvar(FrameSize),
            Rval = lval(succip)
        ->
            Setup = Instr0,
            Remain = Instrs0
        ;
            Lval \= succip,
            Lval \= stackvar(FrameSize)
        ->
            !:Others = !.Others ++ [Instr0],
            detstack_setup(Instrs0, FrameSize, Setup, !Others, Remain)
        ;
            fail
        )
    ; Uinstr0 = comment(_) ->
        !:Others = !.Others ++ [Instr0],
        detstack_setup(Instrs0, FrameSize, Setup, !Others, Remain)
    ;
        fail
    ).

:- pred detect_nondet_entry(list(instruction)::in, list(instruction)::out,
    list(instruction)::out, nondet_entry_info::out) is semidet.

detect_nondet_entry(Instrs0, [MkframeInstr], Remain, EntryInfo) :-
    Instrs0 = [MkframeInstr | Remain],
    MkframeInstr = llds_instr(mkframe(FrameInfo, MaybeRedoip), _),
    % We could allow MaybeRedoip to be `no', and search for the instruction
    % that sets the redoip of the new frame. That is left for future work.
    MaybeRedoip = yes(Redoip),
    % If the initial mkframe sets the redoip to something other than do_fail,
    % then even this entry block needs a stack frame, so frameopt cannot do
    % anything.
    Redoip = do_fail,
    FrameInfo = ordinary_frame(Msg, Size),
    EntryInfo = nondet_entry(Msg, Size, Redoip).

%-----------------------------------------------------------------------------%

:- pred detect_det_exit(list(instruction)::in, det_entry_info::in,
    list(instruction)::out, list(instruction)::out, list(instruction)::out,
    det_exit_info::out) is semidet.

detect_det_exit(Instrs0, EntryInfo, Extra, ExitInstrs, Remain, ExitInfo) :-
    EntryInfo = det_entry(FrameSize, _Msg, _),
    detstack_teardown(Instrs0, FrameSize, Extra, SuccipRestore, Decrsp,
        Livevals, Goto, Remain),
    ExitInstrs = SuccipRestore ++ Decrsp ++ Livevals ++ [Goto],
    ExitInfo = det_exit(SuccipRestore, Livevals, Goto).

    % Does the following block contain a teardown of a det stack frame,
    % and a proceed or tailcall? If yes, we return
    %
    % - the instruction that restores succip as Succip
    % - the decr_sp instruction as Decrsp
    % - the livevals instruction as Livevals
    % - the goto instruction as Goto
    %
    % The first three can appear in any order or may be missing, due to
    % value numbering. This is also why we allow the teardown instructions
    % to be interleaved with instructions that do not access the stack;
    % any such instructions are returned as Extra. Remain is all the
    % instructions after the teardown.
    %
:- pred detstack_teardown(list(instruction)::in, int::in,
    list(instruction)::out, list(instruction)::out,
    list(instruction)::out, list(instruction)::out,
    instruction::out, list(instruction)::out) is semidet.

detstack_teardown([Instr0 | Instrs0], FrameSize, Extra, SuccipRestore, Decrsp,
        Livevals, Goto, Remain) :-
    (
        Instr0 = llds_instr(label(_), _)
    ->
        fail
    ;
        detstack_teardown_2([Instr0 | Instrs0], FrameSize,
            [], ExtraPrime, [], SuccipRestorePrime, [], DecrspPrime,
            [], LivevalsPrime, GotoPrime, RemainPrime)
    ->
        Extra = ExtraPrime,
        SuccipRestore = SuccipRestorePrime,
        Decrsp = DecrspPrime,
        Livevals = LivevalsPrime,
        Goto = GotoPrime,
        Remain = RemainPrime
    ;
        detstack_teardown(Instrs0, FrameSize, Extra1, SuccipRestore, Decrsp,
            Livevals, Goto, Remain),
        Extra = [Instr0 | Extra1]
    ).

:- pred detstack_teardown_2(list(instruction)::in, int::in,
    list(instruction)::in, list(instruction)::out,
    list(instruction)::in, list(instruction)::out,
    list(instruction)::in, list(instruction)::out,
    list(instruction)::in, list(instruction)::out,
    instruction::out, list(instruction)::out) is semidet.

detstack_teardown_2(Instrs0, FrameSize, !Extra, !SuccipRestore, !Decrsp,
        !Livevals, Goto, Remain) :-
    opt_util.skip_comments(Instrs0, Instrs1),
    Instrs1 = [Instr1 | Instrs2],
    Instr1 = llds_instr(Uinstr1, _),
    % XXX allow other instruction types in Extras, e.g. incr_hp
    (
        Uinstr1 = assign(Lval, Rval),
        (
            Lval = succip,
            Rval = lval(stackvar(FrameSize))
        ->
            !.SuccipRestore = [],
            !.Decrsp = [],
            !:SuccipRestore = [Instr1],
            detstack_teardown_2(Instrs2, FrameSize, !Extra, !SuccipRestore,
                !Decrsp, !Livevals, Goto, Remain)
        ;
            opt_util.lval_refers_stackvars(Lval) = no,
            opt_util.rval_refers_stackvars(Rval) = no,
            !:Extra = !.Extra ++ [Instr1],
            detstack_teardown_2(Instrs2, FrameSize, !Extra, !SuccipRestore,
                !Decrsp, !Livevals, Goto, Remain)
        )
    ;
        Uinstr1 = decr_sp(FrameSize),
        !.Decrsp = [],
        !:Decrsp = [Instr1],
        detstack_teardown_2(Instrs2, FrameSize, !Extra, !SuccipRestore,
            !Decrsp, !Livevals, Goto, Remain)
    ;
        Uinstr1 = livevals(_),
        !.Livevals = [],
        !:Livevals = [Instr1],
        detstack_teardown_2(Instrs2, FrameSize, !Extra, !SuccipRestore,
            !Decrsp, !Livevals, Goto, Remain)
    ;
        Uinstr1 = goto(_),
        !.Decrsp = [_],
        Goto = Instr1,
        Remain = Instrs2
    ).

:- pred detect_nondet_exit(list(instruction)::in, nondet_entry_info::in,
    list(instruction)::out, list(instruction)::out, list(instruction)::out,
    nondet_exit_info::out) is semidet.

detect_nondet_exit(Instrs0, _EntryInfo, Extra, ExitInstrs, Remain, ExitInfo) :-
    nondetstack_teardown(Instrs0, Extra, SuccipRestore, Maxfr, Curfr,
        Livevals, Goto, GotoTarget, Remain),
    ExitInstrs = SuccipRestore ++ Maxfr ++ Curfr ++ Livevals ++ [Goto],
    (
        Curfr = [],
        % MR_succeed refers to the current stack frame, so it is valid
        % only if we haven't thrown away the stack frame yet by resetting
        % curfr.
        Maxfr = [],
        SuccipRestore = [],
        GotoTarget = do_succeed(_),
        ExitInfo = nondet_plain_exit(Livevals, Goto)
    ;
        Curfr = [CurfrInstr],
        % If we *have* thrown away the current stack frame, we can exit
        % only via tailcall, and in that case we ought to have also reset
        % maxfr and succip.
        Maxfr = [MaxfrInstr],
        SuccipRestore = [SuccipRestoreInstr],
        ( GotoTarget = code_label(entry_label(_, _))
        ; GotoTarget = code_imported_proc(_)
        ),
        ExitInfo = nondet_teardown_exit(SuccipRestoreInstr,
            MaxfrInstr, CurfrInstr, Livevals, Goto)
    ).

    % Does the following block contain a succeed from a nondet stack frame?
    % If yes, we return
    %
    % - the livevals instruction (if any) as Livevals
    % - the goto instruction as Goto
    %
    % These may be preceded by instructions that do not access the stack;
    % any such instructions are returned as Extra. Remain is all the
    % instructions after the succeed.
    %
:- pred nondetstack_teardown(list(instruction)::in, list(instruction)::out,
    list(instruction)::out, list(instruction)::out,
    list(instruction)::out, list(instruction)::out,
    instruction::out, code_addr::out, list(instruction)::out) is semidet.

nondetstack_teardown([Instr0 | Instrs0], Extra, SuccipRestore, Maxfr, Curfr,
        Livevals, Goto, GotoTarget, Remain) :-
    (
        Instr0 = llds_instr(label(_), _)
    ->
        fail
    ;
        nondetstack_teardown_2([Instr0 | Instrs0], [], ExtraPrime,
            [], SuccipRestorePrime, [], MaxfrPrime, [], CurfrPrime,
            [], LivevalsPrime, GotoPrime, GotoTargetPrime, RemainPrime)
    ->
        Extra = ExtraPrime,
        SuccipRestore = SuccipRestorePrime,
        Maxfr = MaxfrPrime,
        Curfr = CurfrPrime,
        Livevals = LivevalsPrime,
        Goto = GotoPrime,
        GotoTarget = GotoTargetPrime,
        Remain = RemainPrime
    ;
        nondetstack_teardown(Instrs0, Extra1, SuccipRestore, Maxfr, Curfr,
            Livevals, Goto, GotoTarget, Remain),
        Extra = [Instr0 | Extra1]
    ).

:- pred nondetstack_teardown_2(list(instruction)::in,
    list(instruction)::in, list(instruction)::out,
    list(instruction)::in, list(instruction)::out,
    list(instruction)::in, list(instruction)::out,
    list(instruction)::in, list(instruction)::out,
    list(instruction)::in, list(instruction)::out,
    instruction::out, code_addr::out, list(instruction)::out) is semidet.

nondetstack_teardown_2(Instrs0, !Extra, !SuccipRestore, !Maxfr, !Curfr,
        !Livevals, Goto, GotoTarget, Remain) :-
    opt_util.skip_comments(Instrs0, Instrs1),
    Instrs1 = [Instr1 | Instrs2],
    Instr1 = llds_instr(Uinstr1, _),
    % XXX allow other instruction types in Extras, e.g. incr_hp
    (
        Uinstr1 = assign(Lval, Rval),
        (
            Lval = succip,
            Rval = lval(succip_slot(lval(curfr))),
            !.SuccipRestore = [],
            % The restore instruction is valid only if curfr hasn't been
            % modified yet.
            !.Curfr = []
        ->
            !:SuccipRestore = [Instr1]
        ;
            Lval = maxfr,
            Rval = lval(prevfr_slot(lval(curfr))),
            !.Maxfr = [],
            % The restore instruction is valid only if curfr hasn't been
            % modified yet.
            !.Curfr = []
        ->
            !:Maxfr = [Instr1]
        ;
            Lval = curfr,
            Rval = lval(succfr_slot(lval(curfr))),
            !.Curfr = []
        ->
            !:Curfr = [Instr1]
        ;
            opt_util.lval_refers_stackvars(Lval) = no,
            opt_util.rval_refers_stackvars(Rval) = no
        ->
            !:Extra = !.Extra ++ [Instr1]
        ;
            fail
        ),
        nondetstack_teardown_2(Instrs2, !Extra, !SuccipRestore, !Maxfr, !Curfr,
            !Livevals, Goto, GotoTarget, Remain)
    ;
        Uinstr1 = livevals(_),
        !.Livevals = [],
        !:Livevals = [Instr1],
        nondetstack_teardown_2(Instrs2, !Extra, !SuccipRestore, !Maxfr, !Curfr,
            !Livevals, Goto, GotoTarget, Remain)
    ;
        Uinstr1 = goto(GotoTarget),
        Goto = Instr1,
        Remain = Instrs2
    ).

%-----------------------------------------------------------------------------%

    % Does an ordinary block with the given content need a stack frame?
    %
:- pred compute_block_needs_frame(label::in, list(instruction)::in,
    block_needs_frame::out) is det.

compute_block_needs_frame(_Label, Instrs, NeedsFrame) :-
    opt_util.block_refers_to_stack(Instrs) = ReferStackVars,
    (
        ReferStackVars = yes,
        NeedsFrame = block_needs_frame
    ;
        ReferStackVars = no,
        (
            list.member(Instr, Instrs),
            Instr = llds_instr(Uinstr, _),
            (
                Uinstr = llcall(_, _, _, _, _, _)
            ;
                Uinstr = mkframe(_, _)
            ;
                Uinstr = arbitrary_c_code(_, _, _)
            ;
                Uinstr = foreign_proc_code(_, _, MayCallMercury, _,
                    MaybeLayout, MaybeOnlyLayout, _, MaybeDefLabel,
                    NeedStack, _),
                (
                    MayCallMercury = proc_may_call_mercury
                ;
                    MaybeLayout = yes(_)
                ;
                    MaybeOnlyLayout = yes(_)
                ;
                    MaybeDefLabel = yes(_)
                ;
                    NeedStack = yes
                )
            ;
                % If we assign to the succip register, then we need the saved
                % copy of the original return address in the stack frame.
                Uinstr = assign(succip, _)
            )
        ->
            NeedsFrame = block_needs_frame
        ;
            NeedsFrame = block_doesnt_need_frame
        )
    ).

%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%

    % For each block in the given sequence, whose frame_block_info structures
    % in the given block map have been partially filled in, fill in the
    % remaining two fields. These two fields give the labels the block
    % can branch to on the side (this includes return addresses for calls),
    % and the label if any to which it falls through.
    %
    % Also decide whether the optimization we want to apply is keeping
    % the stack frame for recursive tail calls once it has been set up
    % by the initial entry to the procedure, or delaying the creation of
    % the stack frame as long as possible. We want to do the former
    % whenever we find at least one exit block that branches back
    % to the beginning of the procedure; in such cases we return the
    % the label starting the procedure and the label that should replace
    % it in tailcalls that avoid the stack teardown, which is the label
    % immediately after the initial stack setup block.
    %
:- pred analyze_block_map(list(label)::in, pre_exit_dummy_label_map::in,
    frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out,
    maybe(pair(label))::out) is det <= block_entry_exit(En, Ex).

analyze_block_map(LabelSeq, PreExitDummyLabelMap, !BlockMap, KeepFrameData) :-
    (
        LabelSeq = [FirstLabel, SecondLabel | _],
        map.search(!.BlockMap, FirstLabel, FirstBlockInfo),
        FirstBlockInfo = frame_block_info(FirstLabel, _, _, _, _, BlockType),
        BlockType = entry_block(_)
    ->
        ProcLabel = get_proc_label(FirstLabel),
        analyze_block_map_2(LabelSeq, FirstLabel, ProcLabel,
            PreExitDummyLabelMap, !BlockMap, no, AnyBlockNeedsFrame,
            no, JumpToStart),
        % We want to apply the transformation to keep the stack frame only if
        % (a) some block actually needs the stack frame, and (b) there is at
        % least one block that jumps back to the start of the procedure.
        (
            AnyBlockNeedsFrame = yes,
            JumpToStart = yes
        ->
            KeepFrameData = yes(FirstLabel - SecondLabel)
        ;
            KeepFrameData = no
        )
    ;
        unexpected($module, $pred, "bad data")
    ).

:- pred analyze_block_map_2(list(label)::in, label::in, proc_label::in,
    pre_exit_dummy_label_map::in,
    frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out,
    bool::in, bool::out, bool::in, bool::out) is det
    <= block_entry_exit(En, Ex).

analyze_block_map_2([], _, _, _, !BlockMap, !AnyBlockNeedsFrame, !KeepFrame).
analyze_block_map_2([Label | Labels], FirstLabel, ProcLabel,
        PreExitDummyLabelMap, !BlockMap, !AnyBlockNeedsFrame, !JumpToStart) :-
    analyze_block(Label, Labels, FirstLabel, ProcLabel, PreExitDummyLabelMap,
        !BlockMap, !AnyBlockNeedsFrame, !JumpToStart),
    analyze_block_map_2(Labels, FirstLabel, ProcLabel, PreExitDummyLabelMap,
        !BlockMap, !AnyBlockNeedsFrame, !JumpToStart).

:- pred analyze_block(label::in, list(label)::in, label::in, proc_label::in,
    pre_exit_dummy_label_map::in,
    frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out,
    bool::in, bool::out, bool::in, bool::out) is det
    <= block_entry_exit(En, Ex).

analyze_block(Label, FollowingLabels, FirstLabel, ProcLabel,
        PreExitDummyLabelMap, !BlockMap, !AnyBlockNeedsFrame, !JumpToStart) :-
    map.lookup(!.BlockMap, Label, BlockInfo0),
    BlockInfo0 = frame_block_info(BlockLabel, BlockInstrs0, FallInto,
        _, _, Type),
    (
        Type = ordinary_block(block_needs_frame, _),
        !:AnyBlockNeedsFrame = yes
    ;
        ( Type = ordinary_block(block_doesnt_need_frame, _)
        ; Type = entry_block(_)
        ; Type = exit_block(_)
        )
    ),
    (
        Label = BlockLabel, % sanity check
        list.det_split_last(BlockInstrs0, AllButLastInstrs, LastInstr0)
    ->
        LastInstr0 = llds_instr(LastUinstr0, Comment),
        (
            LastUinstr0 = goto(GotoTarget0)
        ->
            replace_labels_code_addr(GotoTarget0, GotoTarget,
                PreExitDummyLabelMap),
            LastUinstr = goto(GotoTarget),
            LastInstr = llds_instr(LastUinstr, Comment),
            BlockInstrs = AllButLastInstrs ++ [LastInstr]
        ;
            LastUinstr0 = if_val(Rval, GotoTarget0)
        ->
            replace_labels_code_addr(GotoTarget0, GotoTarget,
                PreExitDummyLabelMap),
            LastUinstr = if_val(Rval, GotoTarget),
            LastInstr = llds_instr(LastUinstr, Comment),
            BlockInstrs = AllButLastInstrs ++ [LastInstr]
        ;
            LastUinstr0 = computed_goto(Rval, GotoTargets0)
        ->
            replace_labels_maybe_label_list(GotoTargets0, GotoTargets,
                PreExitDummyLabelMap),
            LastUinstr = computed_goto(Rval, GotoTargets),
            LastInstr = llds_instr(LastUinstr, Comment),
            BlockInstrs = AllButLastInstrs ++ [LastInstr]
        ;
            LastUinstr0 = foreign_proc_code(D, Comps0, MC, FNL, FL, FOL, NF0,
                MDL, S, MD)
        ->
            (
                NF0 = no,
                NF = no,
                Comps0 = Comps
            ;
                NF0 = yes(NFLabel0),
                replace_labels_label(NFLabel0, NFLabel, PreExitDummyLabelMap),
                NF = yes(NFLabel),
                replace_labels_comps(Comps0, Comps, PreExitDummyLabelMap)
            ),
            LastUinstr = foreign_proc_code(D, Comps, MC, FNL, FL, FOL, NF,
                MDL, S, MD),
            LastInstr = llds_instr(LastUinstr, Comment),
            BlockInstrs = AllButLastInstrs ++ [LastInstr]
        ;
            LastUinstr = LastUinstr0,
            BlockInstrs = BlockInstrs0
        ),
        possible_targets(LastUinstr, SideLabels0, _SideCodeAddrs),
        list.filter(local_label(ProcLabel), SideLabels0, SideLabels),
        (
            opt_util.can_instr_fall_through(LastUinstr) = yes,
            FollowingLabels = [NextLabel | _]
        ->
            MaybeFallThrough = yes(NextLabel)
        ;
            MaybeFallThrough = no
        ),
        (
            LastUinstr = goto(code_label(GotoLabel)),
            matching_label_ref(FirstLabel, GotoLabel)
        ->
            !:JumpToStart = yes
        ;
            true
        )
    ;
        unexpected($module, $pred, "mismatch or no last instr")
    ),
    BlockInfo = frame_block_info(BlockLabel, BlockInstrs, FallInto,
        SideLabels, MaybeFallThrough, Type),
    map.det_update(Label, BlockInfo, !BlockMap),
    find_redoip_labels(BlockInstrs, ProcLabel, [], RedoipLabels),
    list.foldl(mark_redoip_label, RedoipLabels, !BlockMap).

:- pred local_label(proc_label::in, label::in) is semidet.

local_label(ProcLabel, entry_label(_, ProcLabel)).
local_label(ProcLabel, internal_label(_, ProcLabel)).

    % The form of a label used in a tailcall may be different from
    % the form used in the initial label. The initial label may be
    % exported from the Mercury module or possibly from the C module,
    % while the label used in the tailcall may use a more local form
    % for better performance.
    %
    % This predicate tests whether the second label (from the tailcall)
    % is a proper reference to the first label (from the initial label
    % instruction).
    %
:- pred matching_label_ref(label::in, label::in) is semidet.

matching_label_ref(FirstLabel, GotoLabel) :-
    FirstLabel = entry_label(FirstLabelType, ProcLabel),
    GotoLabel = entry_label(GotoLabelType, ProcLabel),
    matching_entry_type(FirstLabelType, GotoLabelType).

:- pred matching_entry_type(entry_label_type::in, entry_label_type::in)
    is semidet.

matching_entry_type(entry_label_exported, entry_label_exported).
matching_entry_type(entry_label_exported, entry_label_c_local).
matching_entry_type(entry_label_exported, entry_label_local).
matching_entry_type(entry_label_local, entry_label_c_local).
matching_entry_type(entry_label_local, entry_label_local).
matching_entry_type(entry_label_c_local, entry_label_c_local).

:- pred find_redoip_labels(list(instruction)::in, proc_label::in,
    list(label)::in, list(label)::out) is det.

find_redoip_labels([], _, !RedoipLabels).
find_redoip_labels([Instr | Instrs], ProcLabel, !RedoipLabels) :-
    Instr = llds_instr(Uinstr, _),
    (
        Uinstr = assign(redoip_slot(_),
            const(llconst_code_addr(code_label(Label)))),
        get_proc_label(Label) = ProcLabel
    ->
        !:RedoipLabels = [Label | !.RedoipLabels]
    ;
        true
    ),
    find_redoip_labels(Instrs, ProcLabel, !RedoipLabels).

:- pred mark_redoip_label(label::in,
    frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out) is det.

mark_redoip_label(Label, !BlockMap) :-
    map.lookup(!.BlockMap, Label, BlockInfo0),
    BlockType0 = BlockInfo0 ^ fb_type,
    (
        BlockType0 = entry_block(_),
        unexpected($module, $pred, "entry_block")
    ;
        BlockType0 = ordinary_block(_, MaybeDummy),
        BlockType = ordinary_block(block_needs_frame, MaybeDummy),
        BlockInfo = BlockInfo0 ^ fb_type := BlockType,
        map.det_update(Label, BlockInfo, !BlockMap)
    ;
        BlockType0 = exit_block(_),
        unexpected($module, $pred, "exit_block")
    ).

%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%

:- func can_clobber_succip(list(label), frame_block_map(_, _)) = bool.

can_clobber_succip([], _BlockMap) = no.
can_clobber_succip([Label | Labels], BlockMap) = CanClobberSuccip :-
    map.lookup(BlockMap, Label, BlockInfo),
    Instrs = BlockInfo ^ fb_instrs,
    (
        list.member(Instr, Instrs),
        Instr = llds_instr(Uinstr, _),
        (
            Uinstr = llcall(_, _, _, _, _, _)
        ;
            % Only may_call_mercury foreign_proc_codes can clobber succip.
            Uinstr = foreign_proc_code(_, _, proc_may_call_mercury,
                _, _, _, _, _, _, _)
        )
    ->
        CanClobberSuccip = yes
    ;
        CanClobberSuccip = can_clobber_succip(Labels, BlockMap)
    ).

%-----------------------------------------------------------------------------%

    % Transform the given block sequence to effect the optimization
    % of keeping the stack frame for recursive tail calls once it has
    % been set up by the initial entry to the procedure.
    % The two label arguments are the label starting the procedure
    % (a form of which appears in existing tailcalls) and the label that
    % should replace it in tailcalls that avoid the stack teardown.
    %
:- pred keep_frame_transform(list(label)::in, label::in, label::in, bool::in,
    det_frame_block_map::in, det_frame_block_map::out) is det.

keep_frame_transform([], _, _, _, !BlockMap).
keep_frame_transform([Label | Labels], FirstLabel, SecondLabel,
        CanClobberSuccip, !BlockMap) :-
    map.lookup(!.BlockMap, Label, BlockInfo0),
    (
        BlockInfo0 = frame_block_info(Label, OrigInstrs, FallInto, [_], no,
            exit_block(det_exit(Succip, Livevals, Goto))),
        Goto = llds_instr(goto(code_label(GotoLabel)), Comment),
        matching_label_ref(FirstLabel, GotoLabel)
    ->
        (
            OrigInstrs = [OrigInstr0 | _],
            OrigInstr0 = llds_instr(label(_), _)
        ->
            OrigLabelInstr = OrigInstr0
        ;
            unexpected($module, $pred, "block does not begin with label")
        ),
        string.append(Comment, " (keeping frame)", NewComment),
        NewGoto = llds_instr(goto(code_label(SecondLabel)), NewComment),
        LivevalsGoto = Livevals ++ [NewGoto],
        (
            CanClobberSuccip = yes,
            BackInstrs = Succip ++ LivevalsGoto
        ;
            CanClobberSuccip = no,
            BackInstrs = LivevalsGoto
        ),
        Instrs = [OrigLabelInstr | BackInstrs],
        BlockType = ordinary_block(block_needs_frame, is_not_dummy),
        BlockInfo = frame_block_info(Label, Instrs, FallInto, [SecondLabel],
            no, BlockType),
        map.det_update(Label, BlockInfo, !BlockMap)
    ;
        true
    ),
    keep_frame_transform(Labels, FirstLabel, SecondLabel, CanClobberSuccip,
        !BlockMap).

%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%

    % Check that we can use the delay_frame transformation. This requires
    % that only the first block is of the setup type.
    %
:- pred can_delay_frame(list(label)::in, frame_block_map(En, Ex)::in)
    is semidet.

can_delay_frame([], _).
can_delay_frame([Label | _Labels], BlockMap) :-
    map.lookup(BlockMap, Label, BlockInfo),
    BlockInfo ^ fb_type = entry_block(_).

%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%

    % The data structures used in the delaying optimizations.

    % map.search(RevMap, Label, SideLabels) should be true if the block
    % started by Label can be reached via jump or fallthrough from the labels
    % in SideLabels.
    %
:- type rev_map ==  map(label, list(label)).

    % Given the label L starting a block, map.search(PredMap, L, PrevL)
    % is true if PrevL starts the block immediately before L.
    % If L is the first block in the sequence (which should be a setup block),
    % map.search(PredMap, L, _) fails.
:- type pred_map == map(label, label).

    % map.search(SetupParMap, L, SetupL) should be true if L starts
    % an ordinary block that needs a stack frame that can be jumped to by
    % blocks that do not have a stack frame. In this case, SetupL will be the
    % label starting a new block that sets up the stack frame before handing
    % control to L. We put SetupL's block immediately before L's block.
    % If L cannot be fallen into, this is fine. If it can, we put a goto
    % around SetupL's block after the previous block. In most cases,
    % full jump optimization to duplicate code as necessary to optimize away
    % the goto.
    %
:- type setup_par_map
    --->    setup_par_map(map(label, label)).

    % map.search(ExitParMap, L, ParallelL should be true if L starts
    % an exit block and ParallelL starts a copy of L's block from which
    % the instructions to tear down the stack frame have been deleted.
    % If the block immediately before L does not have a stack frame,
    % we put ParallelL before L. If it does, we put ParallelL after L.
    % since neither L nor ParallelL can fall through, we don't need any gotos
    % to jump around blocks.
    %
:- type exit_par_map
    --->    exit_par_map(map(label, label)).

%-----------------------------------------------------------------------------%

:- type can_transform
    --->    can_transform
    ;       cannot_transform.

    % XXX This documentation needs updating.

    % The optimization of delaying the creation of stack frames as long
    % as possible is in three main phases:
    %
    % - The first phase finds out which ordinary blocks need a stack frame.
    %   This naturally includes blocks that access stackvars, and blocks
    %   that perform calls. It also includes blocks which are jumped to
    %   or fallen through to from ordinary blocks that need a stack frame;
    %   this is done by propagate_frame_requirement_to_successors.
    %   A predecessor of a block that needs a frame need not have a frame
    %   itself, since we can interpose the frame setup code on the jump or
    %   fallthrough between them. However, if all of a block's successors
    %   need a frame, then interposing the setup code on *all* those jumps
    %   and/or fallthroughs is a space cost, and delaying the setup doesn't
    %   gain any time, so we may as well say that the predecessor itself needs
    %   a frame. This is done by propagate_frame_requirement_to_predecessors.
    %   If a propagation step says that a setup block needs a stack frame,
    %   that implies that all blocks need a frame, and thus we cannot avoid
    %   setting up a stack frame on any path. In such cases, this predicate
    %   wil return CanTransform = cannot_transform, which tells our caller
    %   to leave the procedure's code unmodified.
    %
    % - The second phase gets rid of the frame setup code in the initial
    %   setup block, but its main task is to transform ordinary blocks that
    %   do not need a frame. Every block that is a successor of such a block,
    %   whether via jump or fallthrough, will be an ordinary block or an
    %   exit block.
    %
    %   - If the successor is an ordinary block that doesn't need a frame,
    %     the transfer of control remains as before.
    %
    %   - If the successor is an ordinary block B that does need a stack frame,
    %     we need to insert the frame setup code at the transfer between the
    %     two blocks. In phase 2, we note the need for this insertion by
    %     allocating a new label SetupB, which will start a new block that will
    %     create the stack frame and then fall through to B. In the second
    %     phase, we alter the transfer of control to goto to SetupB instead
    %     of B; the creation of SetupB's block is left to the third phase.
    %     The correspondence between B and SetupB is recorded in SetupParMap.
    %
    %   - If the successor is an exit block B, then we modify the transfer
    %     of control to jump a new label ParallelB, whose block has the same
    %     code as B's block, except for the deletion of the instructions that
    %     tear down the (nonexistent along this path) stack frame. The
    %     correspondence between B and ParallelB is recorded in ExitParMap.
    %
    % - The third phase has the job of adding the pieces of code whose
    %   existence is assumed by the modified code output by the second stage.
    %
    %   For every frame-needing ordinary block that can be jumped to from
    %   someplace that does not have a stack frame (i.e. for every B in
    %   SetupParMap), we insert before it the SetupB label followed by
    %   the code to set up the stack frame SetupB's predecessor may need to
    %   jump around SetupB's block if previously it fell through to B.)
    %
    %   For every teardown block that can jumped to from someplace that does
    %   not have a stack frame (i.e. for every B in ExitParMap), we create
    %   a new block that is a clone of B with the stack teardown deleted.
    %   Whether we put B or ParallelB first depends on whether the immediately
    %   previous block has a stack frame or not.
    %
:- pred delay_frame_transform(list(label)::in, list(label)::out,
    En::in, proc_label::in, pred_map::in, counter::in, counter::out,
    frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out, bool::in,
    list(instruction)::out, list(instruction)::out, can_transform::out) is det
    <= block_entry_exit(En, Ex).

delay_frame_transform(!LabelSeq, EntryInfo, ProcLabel, PredMap, !C, !BlockMap,
        AddComments, TransformComments, DescComments, CanTransform) :-
    some [!OrdNeedsFrame, !CanTransform, !PropagationStepsLeft] (
        !:OrdNeedsFrame = map.init,
        !:CanTransform = can_transform,
        !:PropagationStepsLeft = max_propagation_steps,
        delay_frame_init(!.LabelSeq, !.BlockMap, map.init, RevMap,
            queue.init, SuccQueue, !OrdNeedsFrame),
        propagate_frame_requirement_to_successors(SuccQueue, !.BlockMap,
            !OrdNeedsFrame, set.init, !PropagationStepsLeft, !CanTransform),
        map.to_assoc_list(!.OrdNeedsFrame, OrdNeedsFrameList),
        list.filter_map(key_block_needs_frame, OrdNeedsFrameList, Frontier),
        queue.list_to_queue(Frontier, PredQueue),
        propagate_frame_requirement_to_predecessors(PredQueue, !.BlockMap,
            RevMap, !OrdNeedsFrame, !.PropagationStepsLeft, _, !CanTransform),
        (
            !.CanTransform = cannot_transform
            % The delay frame optimization is not applicable; our caller will
            % ignore all the other output arguments.
        ;
            !.CanTransform = can_transform,
            process_frame_delay(!.LabelSeq, !.OrdNeedsFrame, ProcLabel, !C,
                !BlockMap, setup_par_map(map.init), SetupParMap,
                exit_par_map(map.init), ExitParMap),
            create_parallels(!LabelSeq, EntryInfo, ProcLabel, !C,
                !.OrdNeedsFrame, SetupParMap, ExitParMap, PredMap, !BlockMap)
        ),
        (
            AddComments = no,
            TransformComments = [],
            DescComments = []
        ;
            AddComments = yes,
            TransformComments =
                [llds_instr(comment("delaying stack frame"), "")],
            list.map(describe_block(!.BlockMap, !.OrdNeedsFrame,
                PredMap, ProcLabel), !.LabelSeq, DescComments)
        ),
        CanTransform = !.CanTransform
    ).

    % We want to stop the transformation if we need more than this many
    % propagation steps. For such large predicates (e.g. write_ordinary_term)
    % any performance benefit from frameopt is unlikely to be noticeable.
    %
:- func max_propagation_steps = int.

max_propagation_steps = 10000.

:- pred key_block_needs_frame(pair(label, block_needs_frame)::in, label::out)
    is semidet.

key_block_needs_frame(Label - block_needs_frame, Label).

%-----------------------------------------------------------------------------%

    % Maps the label of each ordinary block to a bool that says whether
    % the block needs a stack frame or not.
    %
:- type ord_needs_frame == map(label, block_needs_frame).

:- type prop_queue == queue(label).

    % Initialize the data structures for the delaying operation.
    % The first is a map showing the predecessors of each block,
    % i.e. the set of blocks that can jump to or fall through each other block.
    % The second is a queue of ordinary blocks that need a stack frame.
    % The third says, for each ordinary block, whether it needs a stack frame.
    %
    % This predicate implements the first part of the first phase of
    % delay_frame_transform.
    %
:- pred delay_frame_init(list(label)::in, frame_block_map(En, Ex)::in,
    rev_map::in, rev_map::out, prop_queue::in, prop_queue::out,
    ord_needs_frame::in, ord_needs_frame::out) is det.

delay_frame_init([], _, !RevMap, !Queue, !OrdNeedsFrame).
delay_frame_init([Label | Labels], BlockMap, !RevMap, !Queue,
        !OrdNeedsFrame) :-
    map.lookup(BlockMap, Label, BlockInfo),
    BlockType = BlockInfo ^ fb_type,
    (
        BlockType = entry_block(_)
    ;
        BlockType = ordinary_block(NeedsFrame, _),
        (
            NeedsFrame = block_doesnt_need_frame,
            map.det_insert(Label, block_doesnt_need_frame, !OrdNeedsFrame)
        ;
            NeedsFrame = block_needs_frame,
            map.det_insert(Label, block_needs_frame, !OrdNeedsFrame),
            queue.put(Label, !Queue)
        )
    ;
        BlockType = exit_block(_)
    ),
    rev_map_side_labels(successors(BlockInfo), Label, !RevMap),
    delay_frame_init(Labels, BlockMap, !RevMap, !Queue, !OrdNeedsFrame).

:- pred rev_map_side_labels(list(label)::in, label::in,
    rev_map::in, rev_map::out) is det.

rev_map_side_labels([], _Label, !RevMap).
rev_map_side_labels([Label | Labels], SourceLabel, !RevMap) :-
    ( map.search(!.RevMap, Label, OtherSources0) ->
        OtherSources = [SourceLabel | OtherSources0],
        map.det_update(Label, OtherSources, !RevMap)
    ;
        OtherSources = [SourceLabel],
        map.det_insert(Label, OtherSources, !RevMap)
    ),
    rev_map_side_labels(Labels, SourceLabel, !RevMap).

%-----------------------------------------------------------------------------%

:- pred ord_needs_frame(label::in,
    ord_needs_frame::in, ord_needs_frame::out) is det.

ord_needs_frame(Label, !OrdNeedsFrame) :-
    map.lookup(!.OrdNeedsFrame, Label, NeedsFrame0),
    (
        NeedsFrame0 = block_doesnt_need_frame,
        map.det_update(Label, block_needs_frame, !OrdNeedsFrame)
    ;
        NeedsFrame0 = block_needs_frame
    ).

    % Given a queue of labels representing ordinary blocks that must have
    % a stack frame, propagate the requirement for a stack frame to all
    % other ordinary blocks that are their successors.
    %
    % This predicate implements the second part of the first phase of
    % delay_frame_transform.
    %
:- pred propagate_frame_requirement_to_successors(prop_queue::in,
    frame_block_map(En, Ex)::in, ord_needs_frame::in, ord_needs_frame::out,
    set(label)::in, int::in, int::out, can_transform::in, can_transform::out)
    is det.

propagate_frame_requirement_to_successors(!.Queue, BlockMap, !OrdNeedsFrame,
        !.AlreadyProcessed, !PropagationStepsLeft, !CanTransform) :-
    (
        !.CanTransform = cannot_transform
    ;
        !.CanTransform = can_transform,
        ( !.PropagationStepsLeft < 0 ->
            !:CanTransform = cannot_transform
        ; queue.get(Label, !Queue) ->
            !:PropagationStepsLeft = !.PropagationStepsLeft - 1,
            set.insert(Label, !AlreadyProcessed),
            map.lookup(BlockMap, Label, BlockInfo),
            BlockType = BlockInfo ^ fb_type,
            (
                BlockType = ordinary_block(_, _MaybeDummy),
                ord_needs_frame(Label, !OrdNeedsFrame),
                % Putting an already processed label into the queue could
                % lead to an infinite loop. However, we cannot decide whether
                % a label has been processed by checking whether
                % !.OrdNeedsFrame maps Label to yes, since !.OrdNeedsFrame
                % doesn't mention setup frames, and we want to set
                % !:CanTransform to no if any successor is a setup frame.
                % We cannot assume that successors not in !.OrdNeedsFrame
                % should set !:CanTransform to no either, since we don't want
                % to do that for exit frames.
                list.filter(set.contains(!.AlreadyProcessed),
                    successors(BlockInfo), _, UnprocessedSuccessors),
                queue.put_list(UnprocessedSuccessors, !Queue)
            ;
                BlockType = entry_block(_),
                !:CanTransform = cannot_transform
            ;
                BlockType = exit_block(_)
                % Exit blocks never *need* stack frames.
            ),
            propagate_frame_requirement_to_successors(!.Queue, BlockMap,
                !OrdNeedsFrame, !.AlreadyProcessed, !PropagationStepsLeft,
                !CanTransform)
        ;
            true
        )
    ).

    % This predicate implements the third part of the first phase of
    % delay_frame_transform; see the documentation there.
    %
:- pred propagate_frame_requirement_to_predecessors(prop_queue::in,
    frame_block_map(En, Ex)::in, rev_map::in,
    ord_needs_frame::in, ord_needs_frame::out, int::in, int::out,
    can_transform::in, can_transform::out) is det.

propagate_frame_requirement_to_predecessors(!.Queue, BlockMap, RevMap,
        !OrdNeedsFrame, !PropagationStepsLeft, !CanTransform) :-
    (
        !.CanTransform = cannot_transform
    ;
        !.CanTransform = can_transform,
        ( !.PropagationStepsLeft < 0 ->
            !:CanTransform = cannot_transform
        ; queue.get(Label, !Queue) ->
            !:PropagationStepsLeft = !.PropagationStepsLeft - 1,
            ( map.search(RevMap, Label, PredecessorsPrime) ->
                Predecessors = PredecessorsPrime
            ;
                % We get here if Label cannot be reached by a fallthrough or an
                % explicit jump, but only by backtracking. In that case, the
                % code that sets up the resumption point saves the address of
                % Label on the stack, and thus is already known to need
                % a stack frame.
                Predecessors = [],
                ord_needs_frame(Label, !OrdNeedsFrame)
            ),
            list.filter(all_successors_need_frame(BlockMap, !.OrdNeedsFrame),
                Predecessors, NowNeedFrameLabels),
            list.foldl2(record_frame_need(BlockMap), NowNeedFrameLabels,
                !OrdNeedsFrame, !CanTransform),
            % XXX map.lookup(BlockMap, Label, BlockInfo),
            % XXX Successors = successors(BlockInfo),
            queue.put_list(NowNeedFrameLabels, !Queue),
            propagate_frame_requirement_to_predecessors(!.Queue, BlockMap,
                RevMap, !OrdNeedsFrame, !PropagationStepsLeft, !CanTransform)
        ;
            true
        )
    ).

:- pred record_frame_need(frame_block_map(En, Ex)::in,
    label::in, ord_needs_frame::in, ord_needs_frame::out,
    can_transform::in, can_transform::out) is det.

record_frame_need(BlockMap, Label, !OrdNeedsFrame, !CanTransform) :-
    map.lookup(BlockMap, Label, BlockInfo),
    BlockType = BlockInfo ^ fb_type,
    (
        BlockType = entry_block(_),
        !:CanTransform = cannot_transform
    ;
        BlockType = ordinary_block(_, _),
        ord_needs_frame(Label, !OrdNeedsFrame)
    ;
        BlockType = exit_block(_),
        unexpected($module, $pred, "exit_block")
    ).

:- pred all_successors_need_frame(frame_block_map(En, Ex)::in,
    ord_needs_frame::in, label::in) is semidet.

all_successors_need_frame(BlockMap, OrdNeedsFrame, Label) :-
    map.lookup(BlockMap, Label, BlockInfo),
    Successors = successors(BlockInfo),
    list.filter(label_needs_frame(OrdNeedsFrame), Successors,
        _NeedFrameSuccessors, NoNeedFrameSuccessors),
    NoNeedFrameSuccessors = [].

:- pred label_needs_frame(ord_needs_frame::in, label::in) is semidet.

label_needs_frame(OrdNeedsFrame, Label) :-
    ( map.search(OrdNeedsFrame, Label, NeedsFrame) ->
        NeedsFrame = block_needs_frame
    ;
        % If the map.search fails, Label is not an ordinary frame.
        % Entry blocks and exit blocks don't need frames.
        fail
    ).

    % Returns the set of successors of the given block as a list
    % (which may contain duplicates).
    %
:- func successors(frame_block_info(En, Ex)) = list(label).

successors(BlockInfo) = Successors :-
    SideLabels = BlockInfo ^ fb_jump_dests,
    MaybeFallThrough = BlockInfo ^ fb_fall_dest,
    (
        MaybeFallThrough = no,
        Successors = SideLabels
    ;
        MaybeFallThrough = yes(FallThrough),
        Successors = [FallThrough | SideLabels]
    ).

%-----------------------------------------------------------------------------%

    % The predicates process_frame_delay and transform_ordinary_block
    % implement the second phase of delay_frame_transform. For documentation,
    % see the comment at the top of delay_frame_transform.
    %
:- pred process_frame_delay(list(label)::in, ord_needs_frame::in,
    proc_label::in, counter::in, counter::out,
    frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out,
    setup_par_map::in, setup_par_map::out, exit_par_map::in, exit_par_map::out)
    is det <= block_entry_exit(En, Ex).

process_frame_delay([], _, _, !C, !BlockMap, !SetupParMap, !ExitParMap).
process_frame_delay([Label0 | Labels0], OrdNeedsFrame, ProcLabel, !C,
        !BlockMap, !SetupParMap, !ExitParMap) :-
    map.lookup(!.BlockMap, Label0, BlockInfo0),
    BlockInfo0 = frame_block_info(Label0Copy, Instrs0, FallInto, SideLabels0,
        MaybeFallThrough0, Type),
    expect(unify(Label0, Label0Copy), $module, $pred,
        "label in frame_block_info is not copy"),
    (
        Type = entry_block(_),
        (
            Instrs0 = [LabelInstrPrime | _],
            LabelInstrPrime = llds_instr(label(_), _)
        ->
            LabelInstr = LabelInstrPrime
        ;
            unexpected($module, $pred, "setup block does not begin with label")
        ),
        BlockInfo = frame_block_info(Label0, [LabelInstr], FallInto,
            SideLabels0, MaybeFallThrough0,
            ordinary_block(block_doesnt_need_frame, is_not_dummy)),
        map.det_update(Label0, BlockInfo, !BlockMap),
        process_frame_delay(Labels0, OrdNeedsFrame,
            ProcLabel, !C, !BlockMap, !SetupParMap, !ExitParMap)
    ;
        Type = ordinary_block(_, _),
        map.lookup(OrdNeedsFrame, Label0, NeedsFrame),
        (
            NeedsFrame = block_needs_frame,
            % Every block reachable from this block, whether via jump or
            % fallthrough, will be an ordinary block also mapped to `yes'
            % by OrdNeedsFrame, or will be an exit block, or will be a pre-exit
            % dummy block. We already have a stack frame, and all our
            % successors expect one, so we need not do anything.
            process_frame_delay(Labels0, OrdNeedsFrame, ProcLabel, !C,
                !BlockMap, !SetupParMap, !ExitParMap)
        ;
            NeedsFrame = block_doesnt_need_frame,
            transform_nostack_ordinary_block(Label0, Labels0, BlockInfo0,
                OrdNeedsFrame, ProcLabel, !C, !BlockMap,
                !SetupParMap, !ExitParMap)
        )
    ;
        Type = exit_block(_),
        process_frame_delay(Labels0, OrdNeedsFrame, ProcLabel, !C,
            !BlockMap, !SetupParMap, !ExitParMap)
    ).

    % Transform an ordinary block that doesn't have a stack frame.
    % Every block that is a successor of this block, whether via jump or
    % fallthrough, will be an ordinary block or an exit block.
    %
    % - If it is an ordinary block that doesn't need a frame, we need not
    %   do anything.
    %
    % - If it is an ordinary block B that does need a stack frame, we need to
    %   insert the frame setup code at the transfer between the two blocks.
    %   The label S of the block that contains the setup code and then goes
    %   to block B will be given by map.lookup(!.SetupParMap, B, S).
    %   Here, we just allocate the label S; the block will be created later.
    %
    % - If it is exit block B, then we need to jump to a variant of B
    %   that does no teardown, since there is no stack frame to tear down.
    %   The label S of the variant block will be given by
    %   map.lookup(!.ExitParMap, B, S). Here, we just allocate
    %   the label S; the block will be created later.
    %
:- pred transform_nostack_ordinary_block(label::in, list(label)::in,
    frame_block_info(En, Ex)::in, ord_needs_frame::in,
    proc_label::in, counter::in, counter::out,
    frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out,
    setup_par_map::in, setup_par_map::out, exit_par_map::in, exit_par_map::out)
    is det <= block_entry_exit(En, Ex).

transform_nostack_ordinary_block(Label0, Labels0, BlockInfo0, OrdNeedsFrame,
        ProcLabel, !C, !BlockMap, !SetupParMap, !ExitParMap) :-
    BlockInfo0 = frame_block_info(_, Instrs0, FallInto,
        SideLabels0, MaybeFallThrough0, Type),
    mark_parallels_for_nostack_successors(SideLabels0, SideLabels,
        SideAssocLabelMap, OrdNeedsFrame, !.BlockMap, ProcLabel, !C,
        !SetupParMap, !ExitParMap),
    (
        MaybeFallThrough0 = yes(FallThroughLabel0),
        mark_parallel_for_nostack_successor(FallThroughLabel0,
            FallThroughLabel, OrdNeedsFrame, !.BlockMap, ProcLabel, !C,
            !SetupParMap, !ExitParMap),
        MaybeFallThrough = yes(FallThroughLabel),
        expect(no_disagreement(SideAssocLabelMap,
            FallThroughLabel0, FallThroughLabel), $module, $pred,
            "disagreement"),
        AssocLabelMap = [FallThroughLabel0 - FallThroughLabel
            | SideAssocLabelMap],
        ( FallThroughLabel = FallThroughLabel0 ->
            RedirectFallThrough = []
        ;
            RedirectFallThrough =
                [llds_instr(goto(code_label(FallThroughLabel)),
                    "redirect fallthrough")]
            % We can expect this jump to be optimized away in most cases.
        )
    ;
        MaybeFallThrough0 = no,
        MaybeFallThrough = no,
        AssocLabelMap = SideAssocLabelMap,
        RedirectFallThrough = []
    ),
    list.det_split_last(Instrs0, PrevInstrs, LastInstr0),
    map.from_assoc_list(AssocLabelMap, LabelMap),
    opt_util.replace_labels_instruction(LastInstr0, LastInstr, LabelMap, no),
    Instrs = PrevInstrs ++ [LastInstr | RedirectFallThrough],
    BlockInfo = frame_block_info(Label0, Instrs, FallInto,
        SideLabels, MaybeFallThrough, Type),
    map.set(Label0, BlockInfo, !BlockMap),
    process_frame_delay(Labels0, OrdNeedsFrame, ProcLabel, !C, !BlockMap,
        !SetupParMap, !ExitParMap).

:- pred no_disagreement(assoc_list(label, label)::in, label::in, label::in)
    is semidet.

no_disagreement([], _, _).
no_disagreement([K - V | KVs], Key, Value) :-
    ( K = Key => V = Value ),
    no_disagreement(KVs, Key, Value).

%-----------------------------------------------------------------------------%

    % Invokes mark_parallel_for_nostack_successor on each input label,
    % and returns both the updated list of labels and the substitution
    % (represented as an association list) that will have to applied
    % to the jumping instruction.
    %
:- pred mark_parallels_for_nostack_successors(list(label)::in,
    list(label)::out, assoc_list(label)::out, ord_needs_frame::in,
    frame_block_map(En, Ex)::in, proc_label::in, counter::in, counter::out,
    setup_par_map::in, setup_par_map::out,
    exit_par_map::in, exit_par_map::out) is det.

mark_parallels_for_nostack_successors([], [], [], _, _, _, !C,
        !SetupParMap, !ExitParMap).
mark_parallels_for_nostack_successors([Label0 | Labels0], [Label | Labels],
        [Label0 - Label | LabelMap], OrdNeedsFrame, BlockMap, ProcLabel, !C,
        !SetupParMap, !ExitParMap) :-
    mark_parallel_for_nostack_successor(Label0, Label,
        OrdNeedsFrame, BlockMap, ProcLabel, !C, !SetupParMap, !ExitParMap),
    mark_parallels_for_nostack_successors(Labels0, Labels, LabelMap,
        OrdNeedsFrame, BlockMap, ProcLabel, !C, !SetupParMap, !ExitParMap).

    % Label0 is a label that is a successor of a block which has no stack
    % frame.
    %
    % If Label0 starts an exit block, we ensure that it has a non-teardown
    % parallel Label.
    %
    % If Label0 starts an ordinary block that needs a stack frame, we ensure
    % that it has a parallel Label that allocates a stack frame before handing
    % control to Label0.
    %
:- pred mark_parallel_for_nostack_successor(label::in, label::out,
    ord_needs_frame::in, frame_block_map(En, Ex)::in, proc_label::in,
    counter::in, counter::out, setup_par_map::in, setup_par_map::out,
    exit_par_map::in, exit_par_map::out) is det.

mark_parallel_for_nostack_successor(Label0, Label, OrdNeedsFrame, BlockMap,
        ProcLabel, !C, !SetupParMap, !ExitParMap) :-
    map.lookup(BlockMap, Label0, BlockInfo),
    Type = BlockInfo ^ fb_type,
    (
        Type = entry_block(_),
        unexpected($module, $pred,
            "reached setup via jump from ordinary block")
    ;
        Type = ordinary_block(_, _),
        map.lookup(OrdNeedsFrame, Label0, NeedsFrame),
        (
            NeedsFrame = block_needs_frame,
            ensure_setup_parallel(Label0, Label, ProcLabel, !C, !SetupParMap)
        ;
            NeedsFrame = block_doesnt_need_frame,
            Label = Label0
        )
    ;
        Type = exit_block(_),
        ensure_exit_parallel(Label0, Label, ProcLabel, !C, !ExitParMap)
    ).

%-----------------------------------------------------------------------------%

    % The third phase of the delay_frame_transform optimization, creating
    %
    % - the setup code of ordinary blocks that need frames but (some of)
    %   whose predecessors don't have one, and
    %
    % - the parallels of exit blocks that can assume there is no frame to
    %   tear down.
    %
:- pred create_parallels(list(label)::in, list(label)::out, En::in,
    proc_label::in, counter::in, counter::out, ord_needs_frame::in,
    setup_par_map::in, exit_par_map::in, pred_map::in,
    frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out) is det
    <= block_entry_exit(En, Ex).

create_parallels([], [], _, _, !C, _, _, _, _, !BlockMap).
create_parallels([Label0 | Labels0], Labels, EntryInfo, ProcLabel, !C,
        OrdNeedsFrame, SetupParMap, ExitParMap, PredMap, !BlockMap) :-
    create_parallels(Labels0, Labels1, EntryInfo, ProcLabel, !C,
        OrdNeedsFrame, SetupParMap, ExitParMap, PredMap, !BlockMap),
    map.lookup(!.BlockMap, Label0, BlockInfo0),
    BlockInfo0 = frame_block_info(Label0Copy, _, FallInto,
        SideLabels, MaybeFallThrough, Type),
    expect(unify(Label0, Label0Copy), $module, $pred,
        "label in frame_block_info is not copy"),
    ( search_exit_par_map(ExitParMap, Label0, ParallelLabel) ->
        expect(unify(MaybeFallThrough, no), $module, $pred,
            "exit block with parallel has fall through"),
        (
            SideLabels = [],
            Comments = []
        ;
            SideLabels = [_ | _],
            % This can happen if fulljump optimization has redirected the
            % return.
            Comments = [llds_instr(comment("exit side labels "
                ++ dump_labels(yes(ProcLabel), SideLabels)), "")]
        ),
        PrevNeedsFrame = prev_block_needs_frame(OrdNeedsFrame, BlockInfo0),
        (
            Type = exit_block(ExitInfo),
            LabelInstr = llds_instr(label(ParallelLabel),
                "non-teardown parallel"),
            ReplacementCode = [LabelInstr] ++ Comments ++
                non_teardown_exit_code(ExitInfo),
            (
                PrevNeedsFrame = block_doesnt_need_frame,
                Labels = [ParallelLabel, Label0 | Labels1],
                BlockInfo = BlockInfo0 ^ fb_fallen_into := no,
                map.det_update(Label0, BlockInfo, !BlockMap),
                ParallelBlockFallInto = FallInto
            ;
                PrevNeedsFrame = block_needs_frame,
                Labels = [Label0, ParallelLabel | Labels1],
                ParallelBlockFallInto = no
            ),
            ParallelBlockInfo = frame_block_info(ParallelLabel,
                ReplacementCode, ParallelBlockFallInto, SideLabels,
                no, ordinary_block(block_doesnt_need_frame, is_not_dummy)),
            map.det_insert(ParallelLabel, ParallelBlockInfo, !BlockMap)
        ;
            ( Type = entry_block(_)
            ; Type = ordinary_block(_, _)
            ),
            unexpected($module, $pred, "block in exit_par_map is not exit")
        )
    ; search_setup_par_map(SetupParMap, Label0, SetupLabel) ->
        expect(is_ordinary_block(Type), $module, $pred,
            "block in setup map is not ordinary"),
        PrevNeedsFrame = prev_block_needs_frame(OrdNeedsFrame, BlockInfo0),
        (
            PrevNeedsFrame = block_needs_frame,
            counter.allocate(N, !C),
            JumpAroundLabel = internal_label(N, ProcLabel),
            % By not including a label instruction at the start of
            % JumpAroundCode, we are breaking an invariant of frame_block_maps.
            % However, we don't execute any code during or after
            % create_parallels that depends on that invariant, and not
            % including the label saves memory and reduces the amount of work
            % labelopt has to do. (The label *would* be optimized away, since
            % it can't be referred to from anywhere.)
            JumpAroundCode =
                [llds_instr(goto(code_label(Label0)), "jump around setup")],
            Labels = [JumpAroundLabel, SetupLabel, Label0 | Labels1],
            JumpAroundBlockInfo = frame_block_info(JumpAroundLabel,
                JumpAroundCode, no, [Label0], FallInto,
                ordinary_block(block_needs_frame, is_not_dummy)),
            map.det_insert(JumpAroundLabel, JumpAroundBlockInfo, !BlockMap),
            SetupFallInto = yes(JumpAroundLabel),
            BlockInfo = BlockInfo0 ^ fb_fallen_into := yes(SetupLabel),
            map.det_update(Label0, BlockInfo, !BlockMap)
        ;
            PrevNeedsFrame = block_doesnt_need_frame,
            Labels = [SetupLabel, Label0 | Labels1],
            SetupFallInto = no
        ),
        SetupCode = [llds_instr(label(SetupLabel), "late setup label")]
            ++ late_setup_code(EntryInfo),
        SetupBlockInfo = frame_block_info(SetupLabel, SetupCode,
            SetupFallInto, [], yes(Label0), entry_block(EntryInfo)),
        map.det_insert(SetupLabel, SetupBlockInfo, !BlockMap)
    ;
        Labels = [Label0 | Labels1]
    ).

:- func prev_block_needs_frame(ord_needs_frame, frame_block_info(En, Ex)) =
    block_needs_frame.

prev_block_needs_frame(OrdNeedsFrame, BlockInfo) = PrevNeedsFrame :-
    MaybeFallIntoFrom = BlockInfo ^ fb_fallen_into,
    (
        MaybeFallIntoFrom = yes(FallIntoFrom),
        ( map.search(OrdNeedsFrame, FallIntoFrom, NeedsFrame) ->
            % FallIntoFrom is an ordinary block that can fall through
            % to this block.
            PrevNeedsFrame = NeedsFrame
        ;
            % FallIntoFrom is a setup block; exit blocks cannot fall
            % through. Entry blocks don't need frames.
            PrevNeedsFrame = block_doesnt_need_frame
        )
    ;
        MaybeFallIntoFrom = no,
        % The previous block doesn't care whether the following block
        % has a frame or not.
        PrevNeedsFrame = block_doesnt_need_frame
    ).

:- pred is_ordinary_block(block_type(En, Ex)::in) is semidet.

is_ordinary_block(ordinary_block(_, _)).

%-----------------------------------------------------------------------------%

:- func det_late_setup(det_entry_info) = list(instruction).

det_late_setup(det_entry(FrameSize, Msg, Kind)) =
    [llds_instr(incr_sp(FrameSize, Msg, Kind), "late setup"),
    llds_instr(assign(stackvar(FrameSize), lval(succip)), "late save")].

:- func det_non_teardown_exit_code(det_exit_info) = list(instruction).

det_non_teardown_exit_code(det_exit(_, Livevals, Goto)) = Livevals ++ [Goto].

:- func nondet_late_setup(nondet_entry_info) = list(instruction).

nondet_late_setup(nondet_entry(Msg, FrameSize, Redoip)) =
    [llds_instr(mkframe(ordinary_frame(Msg, FrameSize), yes(Redoip)),
        "late setup")].

:- func nondet_non_teardown_exit_code(nondet_exit_info) = list(instruction).

nondet_non_teardown_exit_code(nondet_plain_exit(Livevals, _Goto)) =
    Livevals ++ [llds_instr(goto(code_succip), "")].
nondet_non_teardown_exit_code(nondet_teardown_exit(_, _, _, Livevals, Goto)) =
    Livevals ++ [Goto].

%-----------------------------------------------------------------------------%

    % Given the label of a block, allocate a label for its parallel
    % in the given setup map if it doesn't already have one.
    %
:- pred ensure_setup_parallel(label::in, label::out, proc_label::in,
    counter::in, counter::out, setup_par_map::in, setup_par_map::out) is det.

ensure_setup_parallel(Label, ParallelLabel, ProcLabel, !C, !SetupParMap) :-
    !.SetupParMap = setup_par_map(ParMap0),
    ( map.search(ParMap0, Label, OldParallel) ->
        ParallelLabel = OldParallel
    ;
        counter.allocate(N, !C),
        NewParallel = internal_label(N, ProcLabel),
        ParallelLabel = NewParallel,
        map.det_insert(Label, NewParallel, ParMap0, ParMap),
        !:SetupParMap = setup_par_map(ParMap)
    ).

    % Given the label of a block, allocate a label for its parallel
    % in the given exit map if it doesn't already have one.
    %
:- pred ensure_exit_parallel(label::in, label::out, proc_label::in,
    counter::in, counter::out, exit_par_map::in, exit_par_map::out) is det.

ensure_exit_parallel(Label, ParallelLabel, ProcLabel, !C, !ExitParMap) :-
    !.ExitParMap = exit_par_map(ParMap0),
    ( map.search(ParMap0, Label, OldParallel) ->
        ParallelLabel = OldParallel
    ;
        counter.allocate(N, !C),
        NewParallel = internal_label(N, ProcLabel),
        ParallelLabel = NewParallel,
        map.det_insert(Label, NewParallel, ParMap0, ParMap),
        !:ExitParMap = exit_par_map(ParMap)
    ).

%-----------------------------------------------------------------------------%

    % This predicate generates a human-readable description of a block
    % as a comment instruction. This can make it much easier to debug
    % frameopt.
    %
:- pred describe_block(frame_block_map(En, Ex)::in, ord_needs_frame::in,
    pred_map::in, proc_label::in, label::in, instruction::out) is det
    <= block_entry_exit(En, Ex).

describe_block(BlockMap, OrdNeedsFrame, PredMap, ProcLabel, Label, Instr) :-
    map.lookup(BlockMap, Label, BlockInfo),
    BlockInfo = frame_block_info(BlockLabel, BlockInstrs, FallInto,
        SideLabels, MaybeFallThrough, Type),
    expect(unify(Label, BlockLabel), $module, $pred, "label mismatch"),
    YesProcLabel = yes(ProcLabel),
    LabelStr = dump_label(YesProcLabel, Label),
    BlockInstrsStr = dump_fullinstrs(YesProcLabel, yes, BlockInstrs),
    Heading = "\nBLOCK " ++ LabelStr ++ "\n\n",
    ( map.search(PredMap, Label, PredLabel) ->
        PredStr = "previous label " ++
            dump_label(YesProcLabel, PredLabel) ++ "\n"
    ;
        PredStr = "no previous label\n"
    ),
    (
        FallInto = yes(FallIntoFromLabel),
        FallIntoStr = "fallen into from " ++
            dump_label(YesProcLabel, FallIntoFromLabel) ++ "\n"
    ;
        FallInto = no,
        FallIntoStr = "not fallen into\n"
    ),
    (
        SideLabels = [],
        SideStr = "no side labels\n"
    ;
        SideLabels = [_ | _],
        SideStr = "side labels " ++
            dump_labels(YesProcLabel, SideLabels) ++ "\n"
    ),
    (
        MaybeFallThrough = yes(FallThroughLabel),
        FallThroughStr = "falls through to " ++
            dump_label(YesProcLabel, FallThroughLabel) ++ "\n"
    ;
        MaybeFallThrough = no,
        FallThroughStr = "does not fall through\n"
    ),
    (
        Type = entry_block(Entry),
        TypeStr = "entry_block\n" ++ describe_entry(Entry)
    ;
        Type = ordinary_block(UsesFrame, MaybeDummy),
        (
            MaybeDummy = is_not_dummy,
            TypeStr0 = "ordinary_block; "
        ;
            MaybeDummy = is_post_entry_dummy,
            TypeStr0 = "ordinary_block (post_entry_dummy); "
        ;
            MaybeDummy = is_pre_exit_dummy,
            TypeStr0 = "ordinary_block (pre_exit_dummy); "
        ),
        (
            UsesFrame = block_needs_frame,
            TypeStr1 = TypeStr0 ++ "uses frame\n"
        ;
            UsesFrame = block_doesnt_need_frame,
            TypeStr1 = TypeStr0 ++ "does not use frame\n"
        ),
        ( map.search(OrdNeedsFrame, Label, NeedsFrame) ->
            (
                NeedsFrame = block_doesnt_need_frame,
                expect(unify(UsesFrame, block_doesnt_need_frame),
                    $module, $pred,
                    "NeedsFrame=block_doesnt_need_frame, " ++
                    "UsesFrame=block_needs_frame"),
                TypeStr = TypeStr1 ++ "does not need frame\n"
            ;
                NeedsFrame = block_needs_frame,
                TypeStr = TypeStr1 ++ "does need frame\n"
            )
        ;
            % We can get here if delay_frame_transform fails.
            TypeStr = TypeStr1 ++ "(unknown whether it does need frame)\n"
        )
    ;
        Type = exit_block(Exit),
        expect(unify(MaybeFallThrough, no), $module, $pred,
            "exit_block, MaybeFallThrough=yes(_)"),
        TypeStr = "exit_block\n" ++ describe_exit(YesProcLabel, Exit)
    ),
    Comment = Heading ++ PredStr ++ FallIntoStr ++ SideStr ++ FallThroughStr
        ++ TypeStr ++ "CODE:\n" ++ BlockInstrsStr,
    Instr = llds_instr(comment(Comment), "").

:- func describe_det_entry(det_entry_info) = string.

describe_det_entry(det_entry(Size, Msg, Kind)) =
    "size: " ++ int_to_string(Size) ++
    ", msg: " ++ Msg ++
    ", kind: " ++ dump_stack_incr_kind(Kind) ++
    "\n".

:- func describe_det_exit(maybe(proc_label), det_exit_info) = string.

describe_det_exit(MaybeProcLabel, det_exit(RestoreSuccip, Livevals, Goto)) =
    "restore:  "
    ++ dump_fullinstrs(MaybeProcLabel, yes, RestoreSuccip)
    ++ "livevals: "
    ++ dump_fullinstrs(MaybeProcLabel, yes, Livevals)
    ++ "goto:     "
    ++ dump_fullinstr(MaybeProcLabel, yes, Goto).

:- func describe_nondet_entry(nondet_entry_info) = string.

describe_nondet_entry(nondet_entry(Msg, Size, Redoip)) =
    "msg: "
    ++ Msg
    ++ ", size: "
    ++ int_to_string(Size)
    ++ ", redoip: "
    ++ dump_code_addr(no, Redoip)
    ++ "\n".

:- func describe_nondet_exit(maybe(proc_label), nondet_exit_info) = string.

describe_nondet_exit(MaybeProcLabel, nondet_plain_exit(Livevals, Goto)) =
    "livevals: "
    ++ dump_fullinstrs(MaybeProcLabel, yes, Livevals)
    ++ "goto:     "
    ++ dump_fullinstr(MaybeProcLabel, yes, Goto).
describe_nondet_exit(MaybeProcLabel, nondet_teardown_exit(Succip, Maxfr, Curfr,
        Livevals, Goto)) =
    "succip: "
    ++ dump_fullinstr(MaybeProcLabel, yes, Succip)
    ++ "maxfr: "
    ++ dump_fullinstr(MaybeProcLabel, yes, Maxfr)
    ++ "curfr: "
    ++ dump_fullinstr(MaybeProcLabel, yes, Curfr)
    ++ "livevals: "
    ++ dump_fullinstrs(MaybeProcLabel, yes, Livevals)
    ++ "goto:     "
    ++ dump_fullinstr(MaybeProcLabel, yes, Goto).

%-----------------------------------------------------------------------------%

:- pred search_setup_par_map(setup_par_map::in, label::in, label::out)
    is semidet.

search_setup_par_map(setup_par_map(ParMap), Label, ParallelLabel) :-
    map.search(ParMap, Label, ParallelLabel).

:- pred search_exit_par_map(exit_par_map::in, label::in, label::out)
    is semidet.

search_exit_par_map(exit_par_map(ParMap), Label, ParallelLabel) :-
    map.search(ParMap, Label, ParallelLabel).

%-----------------------------------------------------------------------------%

:- pred is_yes(maybe(T)::in) is semidet.

is_yes(yes(_)).

:- pred pair_with(T::in, U::in, pair(T, U)::out) is det.

pair_with(T, U, T - U).

%-----------------------------------------------------------------------------%
:- end_module ll_backend.frameopt.
%-----------------------------------------------------------------------------%