%-----------------------------------------------------------------------------% % vim: ft=mercury ts=4 sw=4 et %-----------------------------------------------------------------------------% % Copyright (C) 1994-2001, 2003-2012 The University of Melbourne. % Copyright (C) 2015-2020, 2023-2025 The Mercury team. % This file may only be copied under the terms of the GNU General % Public License - see the file COPYING in the Mercury distribution. %-----------------------------------------------------------------------------% % % File: frameopt.m. % Main author: zs. % % This module performs two kinds of transformation to optimize code that % manipulates detstack frames. % % The first and more common transformation transforms code such as % % proc_entry: % incr_sp(N) % stackvar(N) = succip % if (cond) goto L1 % ... % L1: % finalization % succip = stackvar(N) % decr_sp(N) % proceed % % into % % proc_entry: % if (cond) goto L1 % incr_sp(N) % stackvar(N) = succip % ... % finalization % succip = stackvar(N) % decr_sp(N) % proceed % L1: % finalization % proceed % % The advantage is that we don't set up the stack frame unless we need it. % % The actual optimization is more complex than this, because we want to delay % the construction of the stack frame across more than one jump, if this is % possible. % % The second transformation transforms code such as % % proc_entry: % incr_sp(N) % stackvar(N) = succip % ... % finalization % succip = stackvar(N) % decr_sp(N) % goto proc_entry % % into % % proc_entry: % incr_sp(N) % stackvar(N) = succip % L1: % ... % succip = stackvar(N) % finalization % goto L1 % % The advantage is that we don't destroy the stack frame just to set it up % again. The restore of succip can be omitted if the procedure makes no calls, % since in that case succip must still contain the value it had when this % procedure was called. % % Since the second transformation is a bigger win, we prefer to use it % whenever both transformations are possible. % % For predicates that live on the nondet stack, we attempt to perform % only the second transformation. % % NOTE: This module cannot handle code sequences of the form % % label1: % label2: % % Jump optimization converts any gotos to label1 to gotos to label2, making % label1 unused, and label optimization removes unused labels. (This works % for any number of labels in a sequence, not just two.) Therefore the % handle_options module turns on the jump and label optimizations whenever % frame optimization is turned on. % %-----------------------------------------------------------------------------% :- module ll_backend.frameopt. :- interface. :- import_module ll_backend.llds. :- import_module mdbcomp. :- import_module mdbcomp.prim_data. :- import_module bool. :- import_module counter. :- import_module list. :- import_module set_tree234. %-----------------------------------------------------------------------------% % frameopt_main_det_stack(ProcLabel, !LabelCounter, !Instrs, AddComments, % AnyChange): % % Attempt to update !Instrs using the one of the transformations % described above for procedures that live on the det stack. % % ProcLabel should be the ProcLabel of the procedure whose body !.Instrs % implements, and !.LabelCounter that procedure's label counter. % If frameopt_main allocates any labels, !:LabelCounter will reflect this. % % AnyChange says whether we performed any modifications. % If yes, then we also introduced some extra labels that should be % deleted and probably some jumps that could be profitably be % short-circuited. % :- pred frameopt_main_det_stack(proc_label::in, counter::in, counter::out, list(instruction)::in, list(instruction)::out, bool::in, bool::out) is det. % frameopt_main_nondet_stack(ProcLabel, !LabelCounter, !Instrs, % AddComments, AnyChange): % % The equivalent of frameopt_main_det_stack for procedures that live on % the nondet stack, but attempting only the transformation that delays % the creation of stack frames. % :- pred frameopt_main_nondet_stack(proc_label::in, counter::in, counter::out, list(instruction)::in, list(instruction)::out, bool::in, bool::out) is det. % frameopt_keep_nondet_frame(ProcLabel, LayoutLabels, % !LabelCounter, !Instrs, AnyChange): % % The equivalent of frameopt_main_det_stack for procedures that live on % the nondet stack, but attempting only the transformation that keeps % existing stack frames at tail calls. This should be called before % other LLDS optimizations on the procedure body. % % LayoutLabels should be the set of labels in the procedure with layout % structures, while MayAlterRtti should say whether we are allowed to % perform optimizations that may interfere with RTTI. % :- pred frameopt_keep_nondet_frame(proc_label::in, set_tree234(label)::in, counter::in, counter::out, list(instruction)::in, list(instruction)::out, bool::out) is det. %-----------------------------------------------------------------------------% %-----------------------------------------------------------------------------% :- implementation. :- import_module ll_backend.opt_debug. :- import_module ll_backend.opt_util. :- import_module parse_tree. :- import_module parse_tree.prog_data_foreign. :- import_module assoc_list. :- import_module int. :- import_module map. :- import_module maybe. :- import_module pair. :- import_module queue. :- import_module require. :- import_module string. %-----------------------------------------------------------------------------% frameopt_main_det_stack(ProcLabel, !C, Instrs0, Instrs, AddComments, Mod) :- opt_util.get_prologue(Instrs0, LabelInstr, Comments0, Instrs1), ( if detect_det_entry(Instrs1, EntryInfo, _, _) then some [!BlockMap] ( map.init(!:BlockMap), divide_into_basic_blocks(ProcLabel, [LabelInstr | Instrs1], BasicInstrs, !C), build_frame_block_map(ProcLabel, EntryInfo, no, no, BasicInstrs, LabelSeq0, !BlockMap, map.init, PredMap, !C, map.init, PreExitDummyLabelMap), analyze_block_map(PreExitDummyLabelMap, LabelSeq0, KeepFrame, !BlockMap), ( KeepFrame = yes(FirstLabel - SecondLabel), CanClobberSuccip = can_clobber_succip(LabelSeq0, !.BlockMap), keep_frame_transform(FirstLabel, SecondLabel, CanClobberSuccip, LabelSeq0, !BlockMap), LabelSeq = LabelSeq0, NewComment = llds_instr(comment("keeping stack frame"), ""), Comments = Comments0 ++ [NewComment], flatten_block_seq(!.BlockMap, LabelSeq, BodyInstrs), Instrs = Comments ++ BodyInstrs, Mod = yes ; KeepFrame = no, ( if can_delay_frame(!.BlockMap, LabelSeq0) then delay_frame_transform(EntryInfo, ProcLabel, PredMap, AddComments, TransformComments, DescComments, CanTransform, LabelSeq0, LabelSeq, !C, !BlockMap), ( CanTransform = can_transform, Comments = Comments0 ++ TransformComments ++ DescComments, flatten_block_seq(!.BlockMap, LabelSeq, BodyInstrs), Instrs = Comments ++ BodyInstrs, Mod = yes ; CanTransform = cannot_transform, maybe_add_comments(AddComments, DescComments, Instrs0, Instrs, Mod) ) else Instrs = Instrs0, Mod = no ) ) ) else Instrs = Instrs0, Mod = no ). frameopt_main_nondet_stack(ProcLabel, !C, Instrs0, Instrs, AddComments, Mod) :- opt_util.get_prologue(Instrs0, LabelInstr, Comments0, Instrs1), ( if detect_nondet_entry(Instrs1, EntryInfo, _, _) then some [!BlockMap] ( map.init(!:BlockMap), divide_into_basic_blocks(ProcLabel, [LabelInstr | Instrs1], BasicInstrs, !C), build_frame_block_map(ProcLabel, EntryInfo, no, no, BasicInstrs, LabelSeq0, !BlockMap, map.init, PredMap, !C, map.init, PreExitDummyLabelMap), analyze_block_map(PreExitDummyLabelMap, LabelSeq0, _KeepFrame, !BlockMap), ( if can_delay_frame(!.BlockMap, LabelSeq0) then delay_frame_transform(EntryInfo, ProcLabel, PredMap, AddComments, TransformComments, DescComments, CanTransform, LabelSeq0, LabelSeq, !C, !BlockMap), ( CanTransform = can_transform, Comments = Comments0 ++ TransformComments ++ DescComments, flatten_block_seq(!.BlockMap, LabelSeq, BodyInstrs), Instrs = Comments ++ BodyInstrs, Mod = yes ; CanTransform = cannot_transform, maybe_add_comments(AddComments, DescComments, Instrs0, Instrs, Mod) ) else Instrs = Instrs0, Mod = no ) ) else Instrs = Instrs0, Mod = no ). :- pred maybe_add_comments(bool::in, list(instruction)::in, list(instruction)::in, list(instruction)::out, bool::out) is det. maybe_add_comments(Comments, DescComments, Instrs0, Instrs, Mod) :- ( Comments = no, Instrs = Instrs0, Mod = no ; Comments = yes, Instrs = [llds_instr(comment("could not delay frame creation"), "")] ++ DescComments ++ Instrs0, Mod = yes ). %-----------------------------------------------------------------------------% frameopt_keep_nondet_frame(ProcLabel, LayoutLabels, !C, Instrs0, Instrs, Mod) :- opt_util.get_prologue(Instrs0, LabelInstr, Comments0, Instrs1), ( if nondetstack_setup(Instrs1, FrameInfo, Redoip, MkframeInstr, Remain), MkframeInstr = llds_instr(MkframeUinstr, MkframeComment), find_succeed_labels(Instrs1, map.init, SuccMap), counter.allocate(KeepFrameLabelNum, !C), KeepFrameLabel = internal_label(KeepFrameLabelNum, ProcLabel), keep_nondet_frame(Remain, Instrs2, ProcLabel, KeepFrameLabel, MkframeUinstr, SuccMap, LayoutLabels, no, yes) then list.condense([[LabelInstr], Comments0, [llds_instr(mkframe(FrameInfo, no), MkframeComment), llds_instr(label(KeepFrameLabel), "tail recursion target, nofulljump"), llds_instr(assign(redoip_slot(lval(curfr)), const(llconst_code_addr(Redoip))), "")], Instrs2], Instrs), Mod = yes else Instrs = Instrs0, Mod = no ). :- pred nondetstack_setup(list(instruction)::in, nondet_frame_info::out, code_addr::out, instruction::out, list(instruction)::out) is semidet. nondetstack_setup(Instrs0, FrameInfo, Redoip, MkframeInstr, Remain) :- Instrs0 = [MkframeInstr | Remain], MkframeInstr = llds_instr(mkframe(FrameInfo, yes(Redoip)), _), FrameInfo = ordinary_frame(_, _). :- pred find_succeed_labels(list(instruction)::in, tailmap::in, tailmap::out) is det. find_succeed_labels([], !SuccMap). find_succeed_labels([Instr | Instrs], !SuccMap) :- Instr = llds_instr(Uinstr, _), ( if Uinstr = label(Label), opt_util.skip_comments(Instrs, TailInstrs), opt_util.is_succeed_next(TailInstrs, Between) then map.det_insert(Label, Between, !SuccMap) else true ), find_succeed_labels(Instrs, !SuccMap). :- pred keep_nondet_frame(list(instruction)::in, list(instruction)::out, proc_label::in, label::in, instr::in, tailmap::in, set_tree234(label)::in, bool::in, bool::out) is det. keep_nondet_frame([], [], _, _, _, _, _, !Changed). keep_nondet_frame([Instr0 | Instrs0], Instrs, ProcLabel, KeepFrameLabel, PrevInstr, SuccMap, LayoutLabels, !Changed) :- Instr0 = llds_instr(Uinstr0, Comment), ( if % Look for nondet style tailcalls which do not need a runtime check. Uinstr0 = llcall(code_label(entry_label(_, ProcLabel)), code_label(RetLabel), _, _, _, CallModel), CallModel = call_model_nondet(unchecked_tail_call), map.search(SuccMap, RetLabel, BetweenIncl), BetweenIncl = [llds_instr(livevals(_), _), llds_instr(goto(_), _)], PrevInstr = livevals(Livevals), not set_tree234.member(RetLabel, LayoutLabels) then keep_nondet_frame(Instrs0, Instrs1, ProcLabel, KeepFrameLabel, Uinstr0, SuccMap, LayoutLabels, !.Changed, _), !:Changed = yes, NewComment = Comment ++ " (nondet tailcall)", Instrs = [ llds_instr(livevals(Livevals), ""), llds_instr(goto(code_label(KeepFrameLabel)), NewComment) | Instrs1 ] else keep_nondet_frame(Instrs0, Instrs1, ProcLabel, KeepFrameLabel, Uinstr0, SuccMap, LayoutLabels, !Changed), Instrs = [Instr0 | Instrs1] ). %-----------------------------------------------------------------------------% %-----------------------------------------------------------------------------% % We use this type for optimizing procedures of all determinisms, % but the information we need depends on which stack the procedure uses: % the det stack or the nondet stack. This is why this type is parametric % in the types we use to record information about block entry (En) and % block exist (Ex). En will always be either det_entry_info or % nondet_entry_info. while Ex will always be det_exit_info or % nondet_exit_info. det_entry_info will be paired with nondet_exit_info, % or vice versa. This is guaranteed by the functional dependencies % on the definition of the block_entry_exit typeclass. :- type frame_block_map(En, Ex) == map(label, frame_block_info(En, Ex)). :- type det_frame_block_map == frame_block_map(det_entry_info, det_exit_info). :- type frame_block_info(En, Ex) ---> frame_block_info( % The label of the first instr. fb_label :: label, % The code of the block. fb_instrs :: list(instruction), % Does the previous block (if any) fall through to this block, % and if yes, what is its label? fb_fallen_into :: maybe(label), % The labels we can jump to (not falling through). fb_jump_dests :: list(label), % The label we fall through to (if there is one). fb_fall_dest :: maybe(label), fb_type :: block_type(En, Ex) ). :- type block_type(EntryInfo, ExitInfo) ---> entry_block(EntryInfo) ; ordinary_block(block_needs_frame, maybe_dummy) ; exit_block(ExitInfo). :- type maybe_dummy ---> is_not_dummy ; is_post_entry_dummy ; is_pre_exit_dummy. % Until 30 July 2010, we used to keep a set of reasons *why* a block % needs a frame. If that info can be useful to you, revert that diff % in your workspace. :- type block_needs_frame ---> block_needs_frame ; block_does_not_need_frame. :- type det_entry_info ---> det_entry( % The frame size. int, % The msg of the incr_sp instruction. string, stack_incr_kind ). :- type det_exit_info ---> det_exit( % This block contains det stack teardown and goto code. % The instr that restores succip (if any). list(instruction), % The livevals instr before the goto (if any). list(instruction), % The goto instr. instruction ). :- type nondet_entry_info ---> nondet_entry( % The msg of the mkframe instruction. string, % The frame size. int, % The initial redoip. code_addr ). :- type nondet_exit_info ---> nondet_plain_exit( % This block contains nondet stack exit code % that doesn't throw away the stack frame. % The livevals instr before the goto (if any). list(instruction), % The goto(do_succeed) instr. instruction ) ; nondet_teardown_exit( % This block contains nondet stack exit code % that *does* throw away the stack frame. % The restore of the succip, maxfr and curfr respectively. instruction, instruction, instruction, % The livevals instr before the goto (if any). list(instruction), % The goto(entry(_, _)) instr. instruction ). :- typeclass block_entry_exit(En, Ex) <= ((En -> Ex), (Ex -> En)) where [ pred detect_entry(list(instruction)::in, En::out, list(instruction)::out, list(instruction)::out) is semidet, pred detect_exit(En::in, list(instruction)::in, Ex::out, list(instruction)::out, list(instruction)::out, list(instruction)::out) is semidet, func late_setup_code(En) = list(instruction), func non_teardown_exit_code(Ex) = list(instruction), func describe_entry(En) = string, func describe_exit(maybe(proc_label), Ex) = string ]. :- instance block_entry_exit(det_entry_info, det_exit_info) where [ pred(detect_entry/4) is detect_det_entry, pred(detect_exit/6) is detect_det_exit, func(late_setup_code/1) is det_late_setup, func(non_teardown_exit_code/1) is det_non_teardown_exit_code, func(describe_entry/1) is describe_det_entry, func(describe_exit/2) is describe_det_exit ]. :- instance block_entry_exit(nondet_entry_info, nondet_exit_info) where [ pred(detect_entry/4) is detect_nondet_entry, pred(detect_exit/6) is detect_nondet_exit, func(late_setup_code/1) is nondet_late_setup, func(non_teardown_exit_code/1) is nondet_non_teardown_exit_code, func(describe_entry/1) is describe_nondet_entry, func(describe_exit/2) is describe_nondet_exit ]. %-----------------------------------------------------------------------------% % Add labels to the given instruction sequence so that % every basic block has labels around it. % :- pred divide_into_basic_blocks(proc_label::in, list(instruction)::in, list(instruction)::out, counter::in, counter::out) is det. divide_into_basic_blocks(_, [], [], !C). % Control can fall off the end of a procedure if that procedure % ends with a call to another procedure that cannot succeed. % This is the only situation in which the base case can be reached. divide_into_basic_blocks(ProcLabel, [Instr0 | Instrs0], Instrs, !C) :- Instr0 = llds_instr(Uinstr0, _Comment), CanBranchAway = opt_util.can_instr_branch_away(Uinstr0), ( CanBranchAway = yes, ( Instrs0 = [Instr1 | _], ( if Instr1 = llds_instr(label(_), _) then divide_into_basic_blocks(ProcLabel, Instrs0, Instrs1, !C), Instrs = [Instr0 | Instrs1] else counter.allocate(N, !C), NewLabel = internal_label(N, ProcLabel), NewInstr = llds_instr(label(NewLabel), ""), divide_into_basic_blocks(ProcLabel, Instrs0, Instrs1, !C), Instrs = [Instr0, NewInstr | Instrs1] ) ; Instrs0 = [], Instrs = [Instr0] ) ; CanBranchAway = no, divide_into_basic_blocks(ProcLabel, Instrs0, Instrs1, !C), Instrs = [Instr0 | Instrs1] ). :- pred flatten_block_seq(frame_block_map(En, Ex)::in, list(label)::in, list(instruction)::out) is det. flatten_block_seq(_, [], []). flatten_block_seq(BlockMap, [Label | Labels], Instrs) :- flatten_block_seq(BlockMap, Labels, RestInstrs), map.lookup(BlockMap, Label, BlockInfo), BlockInstrs = BlockInfo ^ fb_instrs, ( if list.split_last(BlockInstrs, MostInstrs, LastInstr), Labels = [NextLabel | _], LastInstr = llds_instr(goto(code_label(NextLabel)), _) then % Optimize away the redundant goto, which we probably introduced. % The next invocation of jumpopt would also do this, but doing it here % is cheaper and may let us reach a fixpoint in the optimization % sequence earlier. Instrs = MostInstrs ++ RestInstrs else Instrs = BlockInstrs ++ RestInstrs ). %-----------------------------------------------------------------------------% % This type is explained in the big comment in the body of % build_frame_block_map below. :- type pre_exit_dummy_label_map == map(label, label). % Given an instruction list in which labels mark the start of every % basic block, divide it up into basic blocks of one of three types: % % - entry blocks, blocks that contain only stack setup instructions % (incr_sp and assignment of succip to the bottom stack slot); % % - ordinary blocks that contain neither a stack setup nor a % stack teardown; % % - exit blocks that remove an existing stack frame. % % For such each block, create a frame_block_info structure that gives the % label starting the block, the instructions in the block, and its % type. Two of the fields of the frame_block_info structure are filled in % with dummy values; they will be filled in for real later. % % Put these frame_block_info structures into a table indexed by the label, % and return the sequence of labels of the blocks in their original % order. % :- pred build_frame_block_map(proc_label::in, En::in, maybe(label)::in, maybe(label)::in, list(instruction)::in, list(label)::out, frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out, pred_map::in, pred_map::out, counter::in, counter::out, pre_exit_dummy_label_map::in, pre_exit_dummy_label_map::out) is det <= block_entry_exit(En, Ex). build_frame_block_map(_, _, _, _, [], [], !BlockMap, !PredMap, !C, !PreExitDummyLabelMap). build_frame_block_map(ProcLabel, EntryInfo, MaybePrevLabel, FallInto, [Instr0 | Instrs0], LabelSeq, !BlockMap, !PredMap, !C, !PreExitDummyLabelMap) :- ( if Instr0 = llds_instr(label(LabelPrime), _) then Label = LabelPrime else unexpected($pred, "block does not start with label") ), ( MaybePrevLabel = yes(PrevLabel), map.det_insert(Label, PrevLabel, !PredMap) ; MaybePrevLabel = no ), ( if detect_entry(Instrs0, EntryInfo, EntryInstrs, Instrs1) then % Create a block with just the entry instructions in it, % followed by an empty block. The reason why we need the empty % block is that process_frame_delay below doesn't handle any % transition from entry blocks directly to exit blocks. % Fixing that would be complicated; this fix is simpler. % % We would like to put EmptyLabel *after* FallThroughLabel % to make the next invocation of labelopt eliminate EmptyLabel % rather than FallThroughLabel, but doing that would require % updating LabelSeq and BlockMap. counter.allocate(EmptyN, !C), EmptyLabel = internal_label(EmptyN, ProcLabel), % The fb_jump_dests and fb_fall_dest fields are only dummies. FallThroughToEmptyInstr = llds_instr(goto(code_label(EmptyLabel)), "fall through"), BlockInfo = frame_block_info(Label, [Instr0 | EntryInstrs] ++ [FallThroughToEmptyInstr], FallInto, [], no, entry_block(EntryInfo)), % Ensure that the left over, non-entry part of the original block % starts with a label, and that the empty block ends with a goto % that falls through to this label. ( if Instrs1 = [Instr1 | _], Instr1 = llds_instr(label(NextLabelPrime), _) then NextLabel = NextLabelPrime, Instrs2 = Instrs1 else counter.allocate(N, !C), NextLabel = internal_label(N, ProcLabel), NextLabelInstr = llds_instr(label(NextLabel), ""), Instrs2 = [NextLabelInstr | Instrs1] ), EmptyLabelInstr = llds_instr(label(EmptyLabel), ""), FallThroughFromEmptyInstr = llds_instr(goto(code_label(NextLabel)), "fall through"), % The fb_jump_dests and fb_fall_dest fields are only dummies. EmptyBlockType = ordinary_block(block_does_not_need_frame, is_post_entry_dummy), EmptyBlockInfo = frame_block_info(EmptyLabel, [EmptyLabelInstr, FallThroughFromEmptyInstr], yes(Label), [], no, EmptyBlockType), build_frame_block_map(ProcLabel, EntryInfo, yes(EmptyLabel), yes(EmptyLabel), Instrs2, TailLabelSeq, !BlockMap, !PredMap, !C, !PreExitDummyLabelMap), map.det_insert(Label, BlockInfo, !BlockMap), map.det_insert(EmptyLabel, EmptyBlockInfo, !BlockMap), LabelSeq = [Label, EmptyLabel | TailLabelSeq] else if detect_exit(EntryInfo, Instrs0, ExitInfo, Extra, ExitInstrs, Remain) then % We always insert an ordinary block before the exit block, % because doing otherwise could lead to a violation of our % invariant that exit blocks never *need* a stack frame % in cases where the redoip of a nondet frame is assigned % the label of the exit block. By inserting a dummy block % before the exit block if necessary, we ensure that the redoip % points not to the exit block but to the ordinary block % preceding it. % % However, having other blocks jump to a pre_exit dummy block % instead of the exit block loses opportunities for optimization. % This is because we may (and typically will) fall into the % pre_exit dummy block from an ordinary block that needs a stack % frame, and this requirement is propagated to the pre_exit dummy % block by propagate_frame_requirement_to_successors, which will % cause propagate_frame_requirement_to_predecessors to propagate % that requirement to blocks that don't actually need a stack % frame. This is why in the is_pre_exit_dummy case below we record % the mapping from the label of the pre_exit dummy block to the % label of the exit block, so that we can alter instructions % that jump to the first label to proceed to the second. Since % there is no code between those two labels (except a dummy goto % instruction that implements the fallthrough), this is safe. % To avoid violating the invariant mentioned at the top of this % comment, we don't substitute labels used as code addresses % in assignments to redoip slots. counter.allocate(N, !C), ExitLabel = internal_label(N, ProcLabel), does_block_need_frame(Label, Extra, NeedsFrame), FallThroughInstr = llds_instr(goto(code_label(ExitLabel)), "fall through"), % The fb_jump_dests and fb_fall_dest fields are only dummies. ( Extra = [], expect(unify(NeedsFrame, block_does_not_need_frame), $pred, "[] needs frame"), map.det_insert(Label, ExitLabel, !PreExitDummyLabelMap), ExtraBlockType = ordinary_block(NeedsFrame, is_pre_exit_dummy) ; Extra = [_ | _], ExtraBlockType = ordinary_block(NeedsFrame, is_not_dummy) ), ExtraInstrs = [Instr0 | Extra] ++ [FallThroughInstr], ExtraInfo = frame_block_info(Label, ExtraInstrs, FallInto, [], no, ExtraBlockType), ExitLabelInstr = llds_instr(label(ExitLabel), ""), LabelledBlock = [ExitLabelInstr | ExitInstrs], % The fb_jump_dests and fb_fall_dest fields are only dummies. ExitBlockInfo = frame_block_info(ExitLabel, LabelledBlock, yes(Label), [], no, exit_block(ExitInfo)), map.det_insert(ExitLabel, Label, !PredMap), build_frame_block_map(ProcLabel, EntryInfo, yes(ExitLabel), no, Remain, TailLabelSeq, !BlockMap, !PredMap, !C, !PreExitDummyLabelMap), map.det_insert(ExitLabel, ExitBlockInfo, !BlockMap), map.det_insert(Label, ExtraInfo, !BlockMap), LabelSeq = [Label, ExitLabel | TailLabelSeq] else opt_util.skip_to_next_label(Instrs0, Block, Instrs1), does_block_need_frame(Label, Block, NeedsFrame), BlockInstrs = [Instr0 | Block], % The fb_jump_dests and fb_fall_dest fields are only dummies. BlockInfo = frame_block_info(Label, BlockInstrs, FallInto, [], no, ordinary_block(NeedsFrame, is_not_dummy)), ( if list.last(BlockInstrs, LastBlockInstr) then LastBlockInstr = llds_instr(LastBlockUinstr, _), NextFallIntoBool = opt_util.can_instr_fall_through(LastBlockUinstr), ( NextFallIntoBool = yes, NextFallInto = yes(Label) ; NextFallIntoBool = no, NextFallInto = no ) else NextFallInto = yes(Label) ), build_frame_block_map(ProcLabel, EntryInfo, yes(Label), NextFallInto, Instrs1, LabelSeq0, !BlockMap, !PredMap, !C, !PreExitDummyLabelMap), map.det_insert(Label, BlockInfo, !BlockMap), LabelSeq = [Label | LabelSeq0] ). %-----------------------------------------------------------------------------% % Does the given code start with a setup of a det stack frame? If yes, % return the size of the frame and two instruction sequences, % Setup and Others ++ Remain. Setup is the instruction sequence % that sets up the det stack frame, Others is a sequence of % non-interfering instructions that were interspersed with Setup % but can be moved after Setup, and Remain is all remaining instructions. % :- pred detect_det_entry(list(instruction)::in, det_entry_info::out, list(instruction)::out, list(instruction)::out) is semidet. detect_det_entry(Instrs0, EntryInfo, Setup, Others ++ Remain) :- opt_util.gather_comments(Instrs0, Others0, Instrs1), Instrs1 = [SetupInstr1 | Instrs2], SetupInstr1 = llds_instr(incr_sp(FrameSize, Msg, Kind), _), detstack_setup(Instrs2, FrameSize, SetupInstr2, Others0, Others, Remain), Setup = [SetupInstr1, SetupInstr2], EntryInfo = det_entry(FrameSize, Msg, Kind). :- pred detstack_setup(list(instruction)::in, int::in, instruction::out, list(instruction)::in, list(instruction)::out, list(instruction)::out) is semidet. detstack_setup([Instr0 | Instrs0], FrameSize, Setup, !Others, Remain) :- Instr0 = llds_instr(Uinstr0, _), ( if Uinstr0 = assign(Lval, Rval) then ( if Lval = stackvar(FrameSize), Rval = lval(succip) then Setup = Instr0, Remain = Instrs0 else if Lval \= succip, Lval \= stackvar(FrameSize) then !:Others = !.Others ++ [Instr0], detstack_setup(Instrs0, FrameSize, Setup, !Others, Remain) else fail ) else if Uinstr0 = comment(_) then !:Others = !.Others ++ [Instr0], detstack_setup(Instrs0, FrameSize, Setup, !Others, Remain) else fail ). :- pred detect_nondet_entry(list(instruction)::in, nondet_entry_info::out, list(instruction)::out, list(instruction)::out) is semidet. detect_nondet_entry(Instrs0, EntryInfo, [MkframeInstr], Remain) :- Instrs0 = [MkframeInstr | Remain], MkframeInstr = llds_instr(mkframe(FrameInfo, MaybeRedoip), _), % We could allow MaybeRedoip to be `no', and search for the instruction % that sets the redoip of the new frame. That is left for future work. MaybeRedoip = yes(Redoip), % If the initial mkframe sets the redoip to something other than do_fail, % then even this entry block needs a stack frame, so frameopt cannot do % anything. Redoip = do_fail, FrameInfo = ordinary_frame(Msg, Size), EntryInfo = nondet_entry(Msg, Size, Redoip). %-----------------------------------------------------------------------------% :- pred detect_det_exit(det_entry_info::in, list(instruction)::in, det_exit_info::out, list(instruction)::out, list(instruction)::out, list(instruction)::out) is semidet. detect_det_exit(EntryInfo, Instrs0, ExitInfo, Extra, ExitInstrs, Remain) :- EntryInfo = det_entry(FrameSize, _Msg, _), detstack_teardown(FrameSize, Instrs0, Extra, SuccipRestore, Decrsp, Livevals, Goto, Remain), ExitInstrs = SuccipRestore ++ Decrsp ++ Livevals ++ [Goto], ExitInfo = det_exit(SuccipRestore, Livevals, Goto). % Does the following block contain a teardown of a det stack frame, % and a proceed or tailcall? If yes, we return % % - the instruction that restores succip as Succip % - the decr_sp instruction as Decrsp % - the livevals instruction as Livevals % - the goto instruction as Goto % % The first three can appear in any order or may be missing, due to % value numbering. This is also why we allow the teardown instructions % to be interleaved with instructions that do not access the stack; % any such instructions are returned as Extra. Remain is all the % instructions after the teardown. % :- pred detstack_teardown(int::in, list(instruction)::in, list(instruction)::out, list(instruction)::out, list(instruction)::out, list(instruction)::out, instruction::out, list(instruction)::out) is semidet. detstack_teardown(FrameSize, [Instr0 | Instrs0], Extra, SuccipRestore, Decrsp, Livevals, Goto, Remain) :- ( if Instr0 = llds_instr(label(_), _) then fail else if detstack_teardown_2(FrameSize, [Instr0 | Instrs0], GotoPrime, RemainPrime, [], ExtraPrime, [], SuccipRestorePrime, [], DecrspPrime, [], LivevalsPrime) then Extra = ExtraPrime, SuccipRestore = SuccipRestorePrime, Decrsp = DecrspPrime, Livevals = LivevalsPrime, Goto = GotoPrime, Remain = RemainPrime else detstack_teardown(FrameSize, Instrs0, Extra1, SuccipRestore, Decrsp, Livevals, Goto, Remain), Extra = [Instr0 | Extra1] ). :- pred detstack_teardown_2(int::in, list(instruction)::in, instruction::out, list(instruction)::out, list(instruction)::in, list(instruction)::out, list(instruction)::in, list(instruction)::out, list(instruction)::in, list(instruction)::out, list(instruction)::in, list(instruction)::out) is semidet. detstack_teardown_2(FrameSize, Instrs0, Goto, Remain, !Extra, !SuccipRestore, !Decrsp, !Livevals) :- opt_util.skip_comments(Instrs0, Instrs1), Instrs1 = [Instr1 | Instrs2], Instr1 = llds_instr(Uinstr1, _), % XXX allow other instruction types in Extras, e.g. incr_hp ( Uinstr1 = assign(Lval, Rval), ( if Lval = succip, Rval = lval(stackvar(FrameSize)) then !.SuccipRestore = [], !.Decrsp = [], !:SuccipRestore = [Instr1], detstack_teardown_2(FrameSize, Instrs2, Goto, Remain, !Extra, !SuccipRestore, !Decrsp, !Livevals) else opt_util.lval_refers_stackvars(Lval) = no, opt_util.rval_refers_stackvars(Rval) = no, !:Extra = !.Extra ++ [Instr1], detstack_teardown_2(FrameSize, Instrs2, Goto, Remain, !Extra, !SuccipRestore, !Decrsp, !Livevals) ) ; Uinstr1 = decr_sp(FrameSize), !.Decrsp = [], !:Decrsp = [Instr1], detstack_teardown_2(FrameSize, Instrs2, Goto, Remain, !Extra, !SuccipRestore, !Decrsp, !Livevals) ; Uinstr1 = livevals(_), !.Livevals = [], !:Livevals = [Instr1], detstack_teardown_2(FrameSize, Instrs2, Goto, Remain, !Extra, !SuccipRestore, !Decrsp, !Livevals) ; Uinstr1 = goto(_), !.Decrsp = [_], Goto = Instr1, Remain = Instrs2 ). :- pred detect_nondet_exit(nondet_entry_info::in, list(instruction)::in, nondet_exit_info::out, list(instruction)::out, list(instruction)::out, list(instruction)::out) is semidet. detect_nondet_exit(_EntryInfo, Instrs0, ExitInfo, Extra, ExitInstrs, Remain) :- nondetstack_teardown(Instrs0, Extra, SuccipRestore, Maxfr, Curfr, Livevals, Goto, GotoTarget, Remain), ExitInstrs = SuccipRestore ++ Maxfr ++ Curfr ++ Livevals ++ [Goto], ( Curfr = [], % MR_succeed refers to the current stack frame, so it is valid % only if we haven't thrown away the stack frame yet by resetting % curfr. Maxfr = [], SuccipRestore = [], GotoTarget = do_succeed(_), ExitInfo = nondet_plain_exit(Livevals, Goto) ; Curfr = [CurfrInstr], % If we *have* thrown away the current stack frame, we can exit % only via tailcall, and in that case we ought to have also reset % maxfr and succip. Maxfr = [MaxfrInstr], SuccipRestore = [SuccipRestoreInstr], ( GotoTarget = code_label(entry_label(_, _)) ; GotoTarget = code_imported_proc(_) ), ExitInfo = nondet_teardown_exit(SuccipRestoreInstr, MaxfrInstr, CurfrInstr, Livevals, Goto) ). % Does the following block contain a succeed from a nondet stack frame? % If yes, we return % % - the livevals instruction (if any) as Livevals % - the goto instruction as Goto % % These may be preceded by instructions that do not access the stack; % any such instructions are returned as Extra. Remain is all the % instructions after the succeed. % :- pred nondetstack_teardown(list(instruction)::in, list(instruction)::out, list(instruction)::out, list(instruction)::out, list(instruction)::out, list(instruction)::out, instruction::out, code_addr::out, list(instruction)::out) is semidet. nondetstack_teardown([Instr0 | Instrs0], Extra, SuccipRestore, Maxfr, Curfr, Livevals, Goto, GotoTarget, Remain) :- ( if Instr0 = llds_instr(label(_), _) then fail else if nondetstack_teardown_2([Instr0 | Instrs0], GotoPrime, GotoTargetPrime, RemainPrime, [], ExtraPrime, [], SuccipRestorePrime, [], MaxfrPrime, [], CurfrPrime, [], LivevalsPrime) then Extra = ExtraPrime, SuccipRestore = SuccipRestorePrime, Maxfr = MaxfrPrime, Curfr = CurfrPrime, Livevals = LivevalsPrime, Goto = GotoPrime, GotoTarget = GotoTargetPrime, Remain = RemainPrime else nondetstack_teardown(Instrs0, Extra1, SuccipRestore, Maxfr, Curfr, Livevals, Goto, GotoTarget, Remain), Extra = [Instr0 | Extra1] ). :- pred nondetstack_teardown_2(list(instruction)::in, instruction::out, code_addr::out, list(instruction)::out, list(instruction)::in, list(instruction)::out, list(instruction)::in, list(instruction)::out, list(instruction)::in, list(instruction)::out, list(instruction)::in, list(instruction)::out, list(instruction)::in, list(instruction)::out) is semidet. nondetstack_teardown_2(Instrs0, Goto, GotoTarget, Remain, !Extra, !SuccipRestore, !Maxfr, !Curfr, !Livevals) :- opt_util.skip_comments(Instrs0, Instrs1), Instrs1 = [Instr1 | Instrs2], Instr1 = llds_instr(Uinstr1, _), % XXX allow other instruction types in Extras, e.g. incr_hp ( Uinstr1 = assign(Lval, Rval), ( if Lval = succip, Rval = lval(succip_slot(lval(curfr))), !.SuccipRestore = [], % The restore instruction is valid only if curfr hasn't been % modified yet. !.Curfr = [] then !:SuccipRestore = [Instr1] else if Lval = maxfr, Rval = lval(prevfr_slot(lval(curfr))), !.Maxfr = [], % The restore instruction is valid only if curfr hasn't been % modified yet. !.Curfr = [] then !:Maxfr = [Instr1] else if Lval = curfr, Rval = lval(succfr_slot(lval(curfr))), !.Curfr = [] then !:Curfr = [Instr1] else if opt_util.lval_refers_stackvars(Lval) = no, opt_util.rval_refers_stackvars(Rval) = no then !:Extra = !.Extra ++ [Instr1] else fail ), nondetstack_teardown_2(Instrs2, Goto, GotoTarget, Remain, !Extra, !SuccipRestore, !Maxfr, !Curfr, !Livevals) ; Uinstr1 = livevals(_), !.Livevals = [], !:Livevals = [Instr1], nondetstack_teardown_2(Instrs2, Goto, GotoTarget, Remain, !Extra, !SuccipRestore, !Maxfr, !Curfr, !Livevals) ; Uinstr1 = goto(GotoTarget), Goto = Instr1, Remain = Instrs2 ). %-----------------------------------------------------------------------------% % Does an ordinary block with the given content need a stack frame? % :- pred does_block_need_frame(label::in, list(instruction)::in, block_needs_frame::out) is det. does_block_need_frame(_Label, Instrs, NeedsFrame) :- opt_util.block_refers_to_stack(Instrs) = ReferStackVars, ( ReferStackVars = yes, NeedsFrame = block_needs_frame ; ReferStackVars = no, ( if list.member(Instr, Instrs), Instr = llds_instr(Uinstr, _), ( Uinstr = llcall(_, _, _, _, _, _) ; Uinstr = mkframe(_, _) ; Uinstr = arbitrary_c_code(_, _, _) ; Uinstr = foreign_proc_code(_, _, MayCallMercury, _, MaybeLayout, MaybeOnlyLayout, _, MaybeDefLabel, NeedStack, _), ( MayCallMercury = proc_may_call_mercury ; MaybeLayout = yes(_) ; MaybeOnlyLayout = yes(_) ; MaybeDefLabel = yes(_) ; NeedStack = refers_to_llds_stack ) ; % If we assign to the succip register, then we need the saved % copy of the original return address in the stack frame. Uinstr = assign(succip, _) ) then NeedsFrame = block_needs_frame else NeedsFrame = block_does_not_need_frame ) ). %-----------------------------------------------------------------------------% %-----------------------------------------------------------------------------% % For each block in the given sequence, whose frame_block_info structures % in the given block map have been partially filled in, fill in the % remaining two fields. These two fields give the labels the block % can branch to on the side (this includes return addresses for calls), % and the label if any to which it falls through. % % Also decide whether the optimization we want to apply is keeping % the stack frame for recursive tail calls once it has been set up % by the initial entry to the procedure, or delaying the creation of % the stack frame as long as possible. We want to do the former % whenever we find at least one exit block that branches back % to the beginning of the procedure; in such cases we return the % the label starting the procedure and the label that should replace % it in tailcalls that avoid the stack teardown, which is the label % immediately after the initial stack setup block. % :- pred analyze_block_map(pre_exit_dummy_label_map::in, list(label)::in, maybe(pair(label))::out, frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out) is det <= block_entry_exit(En, Ex). analyze_block_map(PreExitDummyLabelMap, LabelSeq, KeepFrameData, !BlockMap) :- ( if LabelSeq = [FirstLabel, SecondLabel | _], map.search(!.BlockMap, FirstLabel, FirstBlockInfo), FirstBlockInfo = frame_block_info(FirstLabel, _, _, _, _, BlockType), BlockType = entry_block(_) then ProcLabel = get_proc_label(FirstLabel), analyze_blocks(ProcLabel, FirstLabel, PreExitDummyLabelMap, LabelSeq, !BlockMap, no, AnyBlockNeedsFrame, no, JumpToStart), % We want to apply the transformation to keep the stack frame only if % (a) some block actually needs the stack frame, and (b) there is at % least one block that jumps back to the start of the procedure. ( if AnyBlockNeedsFrame = yes, JumpToStart = yes then KeepFrameData = yes(FirstLabel - SecondLabel) else KeepFrameData = no ) else unexpected($pred, "bad data") ). :- pred analyze_blocks(proc_label::in, label::in, pre_exit_dummy_label_map::in, list(label)::in, frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out, bool::in, bool::out, bool::in, bool::out) is det <= block_entry_exit(En, Ex). analyze_blocks(_, _, _, [], !BlockMap, !AnyBlockNeedsFrame, !KeepFrame). analyze_blocks(ProcLabel, FirstLabel, PreExitDummyLabelMap, [Label | Labels], !BlockMap, !AnyBlockNeedsFrame, !JumpToStart) :- analyze_block(ProcLabel, FirstLabel, PreExitDummyLabelMap, Label, Labels, !BlockMap, !AnyBlockNeedsFrame, !JumpToStart), analyze_blocks(ProcLabel, FirstLabel, PreExitDummyLabelMap, Labels, !BlockMap, !AnyBlockNeedsFrame, !JumpToStart). :- pred analyze_block(proc_label::in, label::in, pre_exit_dummy_label_map::in, label::in, list(label)::in, frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out, bool::in, bool::out, bool::in, bool::out) is det <= block_entry_exit(En, Ex). analyze_block(ProcLabel, FirstLabel, PreExitDummyLabelMap, Label, FollowingLabels, !BlockMap, !AnyBlockNeedsFrame, !JumpToStart) :- map.lookup(!.BlockMap, Label, BlockInfo0), BlockInfo0 = frame_block_info(BlockLabel, BlockInstrs0, FallInto, _, _, Type), ( Type = ordinary_block(block_needs_frame, _), !:AnyBlockNeedsFrame = yes ; ( Type = ordinary_block(block_does_not_need_frame, _) ; Type = entry_block(_) ; Type = exit_block(_) ) ), ( if Label = BlockLabel, % sanity check list.det_split_last(BlockInstrs0, AllButLastInstrs, LastInstr0) then LastInstr0 = llds_instr(LastUinstr0, Comment), ( if LastUinstr0 = goto(GotoTarget0) then replace_labels_code_addr(GotoTarget0, GotoTarget, PreExitDummyLabelMap), LastUinstr = goto(GotoTarget), LastInstr = llds_instr(LastUinstr, Comment), BlockInstrs = AllButLastInstrs ++ [LastInstr] else if LastUinstr0 = if_val(Rval, GotoTarget0) then replace_labels_code_addr(GotoTarget0, GotoTarget, PreExitDummyLabelMap), LastUinstr = if_val(Rval, GotoTarget), LastInstr = llds_instr(LastUinstr, Comment), BlockInstrs = AllButLastInstrs ++ [LastInstr] else if LastUinstr0 = computed_goto(Rval, MaybeMaxIndex, GotoTargets0) then replace_labels_maybe_label_list(GotoTargets0, GotoTargets, PreExitDummyLabelMap), LastUinstr = computed_goto(Rval, MaybeMaxIndex, GotoTargets), LastInstr = llds_instr(LastUinstr, Comment), BlockInstrs = AllButLastInstrs ++ [LastInstr] else if LastUinstr0 = foreign_proc_code(D, Comps0, MC, FNL, FL, FOL, NF0, MDL, S, MD) then ( NF0 = no, NF = no, Comps0 = Comps ; NF0 = yes(NFLabel0), replace_labels_label(NFLabel0, NFLabel, PreExitDummyLabelMap), NF = yes(NFLabel), replace_labels_comps(Comps0, Comps, PreExitDummyLabelMap) ), LastUinstr = foreign_proc_code(D, Comps, MC, FNL, FL, FOL, NF, MDL, S, MD), LastInstr = llds_instr(LastUinstr, Comment), BlockInstrs = AllButLastInstrs ++ [LastInstr] else LastUinstr = LastUinstr0, BlockInstrs = BlockInstrs0 ), possible_targets(LastUinstr, SideLabels0, _SideCodeAddrs), list.filter(local_label(ProcLabel), SideLabels0, SideLabels), ( if opt_util.can_instr_fall_through(LastUinstr) = yes, FollowingLabels = [NextLabel | _] then MaybeFallThrough = yes(NextLabel) else MaybeFallThrough = no ), ( if LastUinstr = goto(code_label(GotoLabel)), matching_label_ref(FirstLabel, GotoLabel) then !:JumpToStart = yes else true ) else unexpected($pred, "mismatch or no last instr") ), BlockInfo = frame_block_info(BlockLabel, BlockInstrs, FallInto, SideLabels, MaybeFallThrough, Type), map.det_update(Label, BlockInfo, !BlockMap), find_redoip_labels(ProcLabel, BlockInstrs, [], RedoipLabels), list.foldl(mark_redoip_label, RedoipLabels, !BlockMap). :- pred local_label(proc_label::in, label::in) is semidet. local_label(ProcLabel, entry_label(_, ProcLabel)). local_label(ProcLabel, internal_label(_, ProcLabel)). % The form of a label used in a tailcall may be different from % the form used in the initial label. The initial label may be % exported from the Mercury module or possibly from the C module, % while the label used in the tailcall may use a more local form % for better performance. % % This predicate tests whether the second label (from the tailcall) % is a proper reference to the first label (from the initial label % instruction). % :- pred matching_label_ref(label::in, label::in) is semidet. matching_label_ref(FirstLabel, GotoLabel) :- FirstLabel = entry_label(FirstLabelType, ProcLabel), GotoLabel = entry_label(GotoLabelType, ProcLabel), matching_entry_type(FirstLabelType, GotoLabelType). :- pred matching_entry_type(entry_label_type::in, entry_label_type::in) is semidet. matching_entry_type(FirstLabel, GotoLabel) :- require_complete_switch [FirstLabel] ( FirstLabel = entry_label_exported, require_complete_switch [GotoLabel] ( GotoLabel = entry_label_exported ; GotoLabel = entry_label_c_local ; GotoLabel = entry_label_local ) ; FirstLabel = entry_label_local, require_complete_switch [GotoLabel] ( ( GotoLabel = entry_label_c_local ; GotoLabel = entry_label_local ), Match = yes ; GotoLabel = entry_label_exported, Match = no ), Match = yes ; FirstLabel = entry_label_c_local, require_complete_switch [GotoLabel] ( GotoLabel = entry_label_c_local, Match = yes ; ( GotoLabel = entry_label_exported ; GotoLabel = entry_label_local ), Match = no ), Match = yes ). :- pred find_redoip_labels(proc_label::in, list(instruction)::in, list(label)::in, list(label)::out) is det. find_redoip_labels(_, [], !RedoipLabels). find_redoip_labels(ProcLabel, [Instr | Instrs], !RedoipLabels) :- Instr = llds_instr(Uinstr, _), ( if Uinstr = assign(redoip_slot(_), const(llconst_code_addr(code_label(Label)))), get_proc_label(Label) = ProcLabel then !:RedoipLabels = [Label | !.RedoipLabels] else true ), find_redoip_labels(ProcLabel, Instrs, !RedoipLabels). :- pred mark_redoip_label(label::in, frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out) is det. mark_redoip_label(Label, !BlockMap) :- map.lookup(!.BlockMap, Label, BlockInfo0), BlockType0 = BlockInfo0 ^ fb_type, ( BlockType0 = entry_block(_), unexpected($pred, "entry_block") ; BlockType0 = ordinary_block(_, MaybeDummy), BlockType = ordinary_block(block_needs_frame, MaybeDummy), BlockInfo = BlockInfo0 ^ fb_type := BlockType, map.det_update(Label, BlockInfo, !BlockMap) ; BlockType0 = exit_block(_), unexpected($pred, "exit_block") ). %-----------------------------------------------------------------------------% %-----------------------------------------------------------------------------% :- func can_clobber_succip(list(label), frame_block_map(_, _)) = bool. can_clobber_succip([], _BlockMap) = no. can_clobber_succip([Label | Labels], BlockMap) = CanClobberSuccip :- map.lookup(BlockMap, Label, BlockInfo), Instrs = BlockInfo ^ fb_instrs, ( if list.member(Instr, Instrs), Instr = llds_instr(Uinstr, _), ( Uinstr = llcall(_, _, _, _, _, _) ; % Only may_call_mercury foreign_proc_codes can clobber succip. Uinstr = foreign_proc_code(_, _, proc_may_call_mercury, _, _, _, _, _, _, _) ) then CanClobberSuccip = yes else CanClobberSuccip = can_clobber_succip(Labels, BlockMap) ). %-----------------------------------------------------------------------------% % Transform the given block sequence to effect the optimization % of keeping the stack frame for recursive tail calls once it has % been set up by the initial entry to the procedure. % The two label arguments are the label starting the procedure % (a form of which appears in existing tailcalls) and the label that % should replace it in tailcalls that avoid the stack teardown. % :- pred keep_frame_transform(label::in, label::in, bool::in, list(label)::in, det_frame_block_map::in, det_frame_block_map::out) is det. keep_frame_transform(_, _, _, [], !BlockMap). keep_frame_transform(FirstLabel, SecondLabel, CanClobberSuccip, [Label | Labels], !BlockMap) :- map.lookup(!.BlockMap, Label, BlockInfo0), ( if BlockInfo0 = frame_block_info(Label, OrigInstrs, FallInto, [_], no, exit_block(det_exit(Succip, Livevals, Goto))), Goto = llds_instr(goto(code_label(GotoLabel)), Comment), matching_label_ref(FirstLabel, GotoLabel) then ( if OrigInstrs = [OrigInstr0 | _], OrigInstr0 = llds_instr(label(_), _) then OrigLabelInstr = OrigInstr0 else unexpected($pred, "block does not begin with label") ), string.append(Comment, " (keeping frame)", NewComment), NewGoto = llds_instr(goto(code_label(SecondLabel)), NewComment), LivevalsGoto = Livevals ++ [NewGoto], ( CanClobberSuccip = yes, BackInstrs = Succip ++ LivevalsGoto ; CanClobberSuccip = no, BackInstrs = LivevalsGoto ), Instrs = [OrigLabelInstr | BackInstrs], BlockType = ordinary_block(block_needs_frame, is_not_dummy), BlockInfo = frame_block_info(Label, Instrs, FallInto, [SecondLabel], no, BlockType), map.det_update(Label, BlockInfo, !BlockMap) else true ), keep_frame_transform(FirstLabel, SecondLabel, CanClobberSuccip, Labels, !BlockMap). %-----------------------------------------------------------------------------% %-----------------------------------------------------------------------------% % Check that we can use the delay_frame transformation. This requires % that only the first block is of the setup type. % :- pred can_delay_frame(frame_block_map(En, Ex)::in, list(label)::in) is semidet. can_delay_frame(_, []). can_delay_frame(BlockMap, [Label | _Labels]) :- map.lookup(BlockMap, Label, BlockInfo), BlockInfo ^ fb_type = entry_block(_). %-----------------------------------------------------------------------------% %-----------------------------------------------------------------------------% % The data structures used in the delaying optimizations. % map.search(RevMap, Label, SideLabels) should be true if the block % started by Label can be reached via jump or fallthrough from the labels % in SideLabels. % :- type rev_map == map(label, list(label)). % Given the label L starting a block, map.search(PredMap, L, PrevL) % is true if PrevL starts the block immediately before L. % If L is the first block in the sequence (which should be a setup block), % map.search(PredMap, L, _) fails. :- type pred_map == map(label, label). % map.search(SetupParMap, L, SetupL) should be true if L starts % an ordinary block that needs a stack frame that can be jumped to by % blocks that do not have a stack frame. In this case, SetupL will be the % label starting a new block that sets up the stack frame before handing % control to L. We put SetupL's block immediately before L's block. % If L cannot be fallen into, this is fine. If it can, we put a goto % around SetupL's block after the previous block. In most cases, % full jump optimization to duplicate code as necessary to optimize away % the goto. % :- type setup_par_map ---> setup_par_map(map(label, label)). % map.search(ExitParMap, L, ParallelL should be true if L starts % an exit block and ParallelL starts a copy of L's block from which % the instructions to tear down the stack frame have been deleted. % If the block immediately before L does not have a stack frame, % we put ParallelL before L. If it does, we put ParallelL after L. % since neither L nor ParallelL can fall through, we don't need any gotos % to jump around blocks. % :- type exit_par_map ---> exit_par_map(map(label, label)). %-----------------------------------------------------------------------------% :- type can_transform ---> can_transform ; cannot_transform. % XXX This documentation needs updating. % The optimization of delaying the creation of stack frames as long % as possible is in three main phases: % % - The first phase finds out which ordinary blocks need a stack frame. % This naturally includes blocks that access stackvars, and blocks % that perform calls. It also includes blocks which are jumped to % or fallen through to from ordinary blocks that need a stack frame; % this is done by propagate_frame_requirement_to_successors. % A predecessor of a block that needs a frame need not have a frame % itself, since we can interpose the frame setup code on the jump or % fallthrough between them. However, if all of a block's successors % need a frame, then interposing the setup code on *all* those jumps % and/or fallthroughs is a space cost, and delaying the setup doesn't % gain any time, so we may as well say that the predecessor itself needs % a frame. This is done by propagate_frame_requirement_to_predecessors. % If a propagation step says that a setup block needs a stack frame, % that implies that all blocks need a frame, and thus we cannot avoid % setting up a stack frame on any path. In such cases, this predicate % wil return CanTransform = cannot_transform, which tells our caller % to leave the procedure's code unmodified. % % - The second phase gets rid of the frame setup code in the initial % setup block, but its main task is to transform ordinary blocks that % do not need a frame. Every block that is a successor of such a block, % whether via jump or fallthrough, will be an ordinary block or an % exit block. % % - If the successor is an ordinary block that doesn't need a frame, % the transfer of control remains as before. % % - If the successor is an ordinary block B that does need a stack frame, % we need to insert the frame setup code at the transfer between the % two blocks. In phase 2, we note the need for this insertion by % allocating a new label SetupB, which will start a new block that will % create the stack frame and then fall through to B. In the second % phase, we alter the transfer of control to goto to SetupB instead % of B; the creation of SetupB's block is left to the third phase. % The correspondence between B and SetupB is recorded in SetupParMap. % % - If the successor is an exit block B, then we modify the transfer % of control to jump a new label ParallelB, whose block has the same % code as B's block, except for the deletion of the instructions that % tear down the (nonexistent along this path) stack frame. The % correspondence between B and ParallelB is recorded in ExitParMap. % % - The third phase has the job of adding the pieces of code whose % existence is assumed by the modified code output by the second stage. % % For every frame-needing ordinary block that can be jumped to from % someplace that does not have a stack frame (i.e. for every B in % SetupParMap), we insert before it the SetupB label followed by % the code to set up the stack frame SetupB's predecessor may need to % jump around SetupB's block if previously it fell through to B.) % % For every teardown block that can jumped to from someplace that does % not have a stack frame (i.e. for every B in ExitParMap), we create % a new block that is a clone of B with the stack teardown deleted. % Whether we put B or ParallelB first depends on whether the immediately % previous block has a stack frame or not. % :- pred delay_frame_transform(En::in, proc_label::in, pred_map::in, bool::in, list(instruction)::out, list(instruction)::out, can_transform::out, list(label)::in, list(label)::out, counter::in, counter::out, frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out) is det <= block_entry_exit(En, Ex). delay_frame_transform(EntryInfo, ProcLabel, PredMap, AddComments, TransformComments, DescComments, CanTransform, !LabelSeq, !C, !BlockMap) :- some [!OrdNeedsFrameMap, !PropagationStepsLeft] ( !:OrdNeedsFrameMap = map.init, !:PropagationStepsLeft = max_propagation_steps, delay_frame_init(!.BlockMap, !.LabelSeq, map.init, RevMap, queue.init, SuccQueue, !OrdNeedsFrameMap), propagate_frame_requirement_to_successors(!.BlockMap, SuccQueue, set_tree234.init, SuccCanTransform, !OrdNeedsFrameMap, !PropagationStepsLeft), ( SuccCanTransform = cannot_transform, CanTransform = cannot_transform ; SuccCanTransform = can_transform, map.to_assoc_list(!.OrdNeedsFrameMap, OrdNeedsFrameAL), list.filter_map(key_block_needs_frame, OrdNeedsFrameAL, Frontier), queue.list_to_queue(Frontier, PredQueue), propagate_frame_requirement_to_predecessors(!.BlockMap, RevMap, PredQueue, set_tree234.init, !.PropagationStepsLeft, CanTransform, !OrdNeedsFrameMap), ( CanTransform = cannot_transform % The delay frame optimization is not applicable; % our caller will ignore all the other output arguments. ; CanTransform = can_transform, process_frame_delay(!.OrdNeedsFrameMap, ProcLabel, !.LabelSeq, !C, !BlockMap, setup_par_map(map.init), SetupParMap, exit_par_map(map.init), ExitParMap), create_parallels(EntryInfo, ProcLabel, !.OrdNeedsFrameMap, SetupParMap, ExitParMap, PredMap, !LabelSeq, !C, !BlockMap) ) ), ( AddComments = no, TransformComments = [], DescComments = [] ; AddComments = yes, TransformComments = [llds_instr(comment("delaying stack frame"), "")], list.map(describe_block(!.BlockMap, !.OrdNeedsFrameMap, PredMap, ProcLabel), !.LabelSeq, DescComments) ) ). % We want to stop the transformation if we need more than this many % propagation steps. For such large predicates (e.g. write_ordinary_term) % any performance benefit from frameopt is unlikely to be noticeable. % :- func max_propagation_steps = int. max_propagation_steps = 10000. :- pred key_block_needs_frame(pair(label, block_needs_frame)::in, label::out) is semidet. key_block_needs_frame(Label - block_needs_frame, Label). %-----------------------------------------------------------------------------% % Maps the label of each ordinary block to a bool that says whether % the block needs a stack frame or not. % :- type ord_needs_frame_map == map(label, block_needs_frame). :- type prop_queue == queue(label). % Initialize the data structures for the delaying operation. % The first is a map showing the predecessors of each block, % i.e. the set of blocks that can jump to or fall through each other block. % The second is a queue of ordinary blocks that need a stack frame. % The third says, for each ordinary block, whether it needs a stack frame. % % This predicate implements the first part of the first phase of % delay_frame_transform. % :- pred delay_frame_init(frame_block_map(En, Ex)::in, list(label)::in, rev_map::in, rev_map::out, prop_queue::in, prop_queue::out, ord_needs_frame_map::in, ord_needs_frame_map::out) is det. delay_frame_init(_, [], !RevMap, !Queue, !OrdNeedsFrameMap). delay_frame_init(BlockMap, [Label | Labels], !RevMap, !Queue, !OrdNeedsFrameMap) :- map.lookup(BlockMap, Label, BlockInfo), BlockType = BlockInfo ^ fb_type, ( BlockType = entry_block(_) ; BlockType = ordinary_block(NeedsFrame, _), ( NeedsFrame = block_does_not_need_frame, map.det_insert(Label, block_does_not_need_frame, !OrdNeedsFrameMap) ; NeedsFrame = block_needs_frame, map.det_insert(Label, block_needs_frame, !OrdNeedsFrameMap), queue.put(Label, !Queue) ) ; BlockType = exit_block(_) ), rev_map_side_labels(successors(BlockInfo), Label, !RevMap), delay_frame_init(BlockMap, Labels, !RevMap, !Queue, !OrdNeedsFrameMap). :- pred rev_map_side_labels(list(label)::in, label::in, rev_map::in, rev_map::out) is det. rev_map_side_labels([], _Label, !RevMap). rev_map_side_labels([Label | Labels], SourceLabel, !RevMap) :- ( if map.search(!.RevMap, Label, OtherSources0) then OtherSources = [SourceLabel | OtherSources0], map.det_update(Label, OtherSources, !RevMap) else OtherSources = [SourceLabel], map.det_insert(Label, OtherSources, !RevMap) ), rev_map_side_labels(Labels, SourceLabel, !RevMap). %-----------------------------------------------------------------------------% :- pred ord_needs_frame(label::in, ord_needs_frame_map::in, ord_needs_frame_map::out) is det. ord_needs_frame(Label, !OrdNeedsFrameMap) :- map.lookup(!.OrdNeedsFrameMap, Label, NeedsFrame0), ( NeedsFrame0 = block_does_not_need_frame, map.det_update(Label, block_needs_frame, !OrdNeedsFrameMap) ; NeedsFrame0 = block_needs_frame ). % Given a queue of labels representing ordinary blocks that must have % a stack frame, propagate the requirement for a stack frame to all % other ordinary blocks that are their successors. % % This predicate implements the second part of the first phase of % delay_frame_transform. % :- pred propagate_frame_requirement_to_successors(frame_block_map(En, Ex)::in, prop_queue::in, set_tree234(label)::in, can_transform::out, ord_needs_frame_map::in, ord_needs_frame_map::out, int::in, int::out) is det. propagate_frame_requirement_to_successors(BlockMap, !.Queue, !.DoneLabels, CanTransform, !OrdNeedsFrameMap, !PropagationStepsLeft) :- ( if !.PropagationStepsLeft < 0 then CanTransform = cannot_transform else if queue.get(Label, !Queue) then !:PropagationStepsLeft = !.PropagationStepsLeft - 1, set_tree234.insert(Label, !DoneLabels), map.lookup(BlockMap, Label, BlockInfo), BlockType = BlockInfo ^ fb_type, ( BlockType = entry_block(_), CanTransform = cannot_transform ; BlockType = ordinary_block(_, _MaybeDummy), ord_needs_frame(Label, !OrdNeedsFrameMap), % Putting an already processed label into the queue could % lead to an infinite loop. However, we cannot decide whether % a label has been processed by checking whether % !.OrdNeedsFrameMap maps Label to yes, since !.OrdNeedsFrameMap % doesn't mention setup frames, and we want to set % !:CanTransform to no if any successor is a setup frame. % We cannot assume that successors not in !.OrdNeedsFrameMap % should set !:CanTransform to no either, since we don't want % to do that for exit frames. list.filter(set_tree234.contains(!.DoneLabels), successors(BlockInfo), _, UnprocessedSuccessors), queue.put_list(UnprocessedSuccessors, !Queue), disable_warning [suspicious_recursion] ( propagate_frame_requirement_to_successors(BlockMap, !.Queue, !.DoneLabels, CanTransform, !OrdNeedsFrameMap, !PropagationStepsLeft) ) ; BlockType = exit_block(_), % Exit blocks never *need* stack frames. disable_warning [suspicious_recursion] ( propagate_frame_requirement_to_successors(BlockMap, !.Queue, !.DoneLabels, CanTransform, !OrdNeedsFrameMap, !PropagationStepsLeft) ) ) else CanTransform = can_transform ). % This predicate implements the third part of the first phase of % delay_frame_transform; see the documentation there. % :- pred propagate_frame_requirement_to_predecessors( frame_block_map(En, Ex)::in, rev_map::in, prop_queue::in, set_tree234(label)::in, int::in, can_transform::out, ord_needs_frame_map::in, ord_needs_frame_map::out) is det. propagate_frame_requirement_to_predecessors(BlockMap, RevMap, !.Queue, !.DoneLabels, !.PropagationStepsLeft, CanTransform, !OrdNeedsFrameMap) :- ( if !.PropagationStepsLeft < 0 then CanTransform = cannot_transform else if queue.get(Label, !Queue) then !:PropagationStepsLeft = !.PropagationStepsLeft - 1, set_tree234.insert(Label, !DoneLabels), ( if map.search(RevMap, Label, PredecessorsPrime) then Predecessors = PredecessorsPrime else % We get here if Label cannot be reached by a fallthrough or an % explicit jump, but only by backtracking. In that case, the % code that sets up the resumption point saves the address of % Label on the stack, and thus is already known to need % a stack frame. Predecessors = [], ord_needs_frame(Label, !OrdNeedsFrameMap) ), % A block (say block A) that has N successors (e.g. because it does % a switch) is by definition the predecessor of those N blocks. % When we process one of those N successors, say block B_i % for i in {1,N}, we will test whether all of those B_i need a frame. % so this algorithm is quadratic. This is a problem, because % in some cases, such as for the optdb predicate in options.m, % N can be very big. % % The obvious way to avoid this performance problem is to process % block A just once. This can change the output of the algorithm % in some cases, because the outcome of the "do all the B_i need a % frame?" test can change as we modify !OrdNeedsFrameMap. % However, a comparison of the .c files generated at the default % optimization for the modules in the compiler directory by compilers % with and without this change showed very few differences, % and none of those looked like they would lead to slowdowns. % (In fact, most look to be speed*ups*, but they won't speed up % the compiler in any detectable way, since they occur in predicates % that already contribute almost nothing to the compiler's runtime.) % % On the other hand, the reduction in algorithmic complexity here % from quadratic to linear speeds up the compilation of options.m % by about 13% (from 2.63s to 2.28s on average). % % About the implementation of that complexity reduction: % there are two ways we can ensure that we do not process % a predecessor of Label in !.DoneLabels more than once. % % (1) We can put that predecessor into !Queue, but test for % each label we get out of !.Queue whether is it in !.DoneLabels, % and ignore the label if that test succeeds. % % (2) We can avoid putting that predecessor into !Queue in the first % place. % % We do the latter, because it leads to less memory allocation. list.filter( all_nyd_successors_need_frame(BlockMap, !.OrdNeedsFrameMap, !.DoneLabels), Predecessors, NowNeedFrameLabels), list.foldl2(record_frame_need(BlockMap), NowNeedFrameLabels, !OrdNeedsFrameMap, can_transform, NowNeedCanTransform), ( NowNeedCanTransform = cannot_transform, CanTransform = cannot_transform ; NowNeedCanTransform = can_transform, % XXX map.lookup(BlockMap, Label, BlockInfo), % XXX Successors = successors(BlockInfo), queue.put_list(NowNeedFrameLabels, !Queue), disable_warning [suspicious_recursion] ( propagate_frame_requirement_to_predecessors(BlockMap, RevMap, !.Queue, !.DoneLabels, !.PropagationStepsLeft, CanTransform, !OrdNeedsFrameMap) ) ) else CanTransform = can_transform ). :- pred record_frame_need(frame_block_map(En, Ex)::in, label::in, ord_needs_frame_map::in, ord_needs_frame_map::out, can_transform::in, can_transform::out) is det. record_frame_need(BlockMap, Label, !OrdNeedsFrameMap, !CanTransform) :- map.lookup(BlockMap, Label, BlockInfo), BlockType = BlockInfo ^ fb_type, ( BlockType = entry_block(_), !:CanTransform = cannot_transform ; BlockType = ordinary_block(_, _), ord_needs_frame(Label, !OrdNeedsFrameMap) ; BlockType = exit_block(_), unexpected($pred, "exit_block") ). % Succeed if Label is "not yet done" ("nyd" for short) and % all its successors need a frame. % :- pred all_nyd_successors_need_frame(frame_block_map(En, Ex)::in, ord_needs_frame_map::in, set_tree234(label)::in, label::in) is semidet. all_nyd_successors_need_frame(BlockMap, OrdNeedsFrameMap, DoneLabels, Label) :- not set_tree234.contains(DoneLabels, Label), map.lookup(BlockMap, Label, BlockInfo), Successors = successors(BlockInfo), list.filter(label_does_not_need_frame(OrdNeedsFrameMap), Successors, NoNeedFrameSuccessors), NoNeedFrameSuccessors = []. :- pred label_does_not_need_frame(ord_needs_frame_map::in, label::in) is semidet. label_does_not_need_frame(OrdNeedsFrameMap, Label) :- ( if map.search(OrdNeedsFrameMap, Label, NeedsFrame) then NeedsFrame = block_does_not_need_frame else % If the map.search fails, Label is not an ordinary frame. % Entry blocks and exit blocks don't need frames. true ). % Returns the set of successors of the given block as a list % (which may contain duplicates). % :- func successors(frame_block_info(En, Ex)) = list(label). successors(BlockInfo) = Successors :- SideLabels = BlockInfo ^ fb_jump_dests, MaybeFallThrough = BlockInfo ^ fb_fall_dest, ( MaybeFallThrough = no, Successors = SideLabels ; MaybeFallThrough = yes(FallThrough), Successors = [FallThrough | SideLabels] ). %-----------------------------------------------------------------------------% % The predicates process_frame_delay and transform_ordinary_block % implement the second phase of delay_frame_transform. For documentation, % see the comment at the top of delay_frame_transform. % :- pred process_frame_delay(ord_needs_frame_map::in, proc_label::in, list(label)::in, counter::in, counter::out, frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out, setup_par_map::in, setup_par_map::out, exit_par_map::in, exit_par_map::out) is det <= block_entry_exit(En, Ex). process_frame_delay(_, _, [], !C, !BlockMap, !SetupParMap, !ExitParMap). process_frame_delay(OrdNeedsFrameMap, ProcLabel, [Label0 | Labels0], !C, !BlockMap, !SetupParMap, !ExitParMap) :- map.lookup(!.BlockMap, Label0, BlockInfo0), BlockInfo0 = frame_block_info(Label0Copy, Instrs0, FallInto, SideLabels0, MaybeFallThrough0, Type), expect(unify(Label0, Label0Copy), $pred, "label in frame_block_info is not copy"), ( Type = entry_block(_), ( if Instrs0 = [LabelInstrPrime | _], LabelInstrPrime = llds_instr(label(_), _) then LabelInstr = LabelInstrPrime else unexpected($pred, "setup block does not begin with label") ), BlockInfo = frame_block_info(Label0, [LabelInstr], FallInto, SideLabels0, MaybeFallThrough0, ordinary_block(block_does_not_need_frame, is_not_dummy)), map.det_update(Label0, BlockInfo, !BlockMap), process_frame_delay(OrdNeedsFrameMap, ProcLabel, Labels0, !C, !BlockMap, !SetupParMap, !ExitParMap) ; Type = ordinary_block(_, _), map.lookup(OrdNeedsFrameMap, Label0, NeedsFrame), ( NeedsFrame = block_needs_frame, % Every block reachable from this block, whether via jump or % fallthrough, will be an ordinary block also mapped to % block_needs_frame by OrdNeedsFrameMap, or will be an exit block, % or will be a pre-exit dummy block. We already have % a stack frame, and all our successors expect one, so % we need not do anything. process_frame_delay(OrdNeedsFrameMap, ProcLabel, Labels0, !C, !BlockMap, !SetupParMap, !ExitParMap) ; NeedsFrame = block_does_not_need_frame, transform_nostack_ordinary_block(OrdNeedsFrameMap, ProcLabel, Label0, Labels0, BlockInfo0, !C, !BlockMap, !SetupParMap, !ExitParMap) ) ; Type = exit_block(_), process_frame_delay(OrdNeedsFrameMap, ProcLabel, Labels0, !C, !BlockMap, !SetupParMap, !ExitParMap) ). % Transform an ordinary block that doesn't have a stack frame. % Every block that is a successor of this block, whether via jump or % fallthrough, will be an ordinary block or an exit block. % % - If it is an ordinary block that doesn't need a frame, we need not % do anything. % % - If it is an ordinary block B that does need a stack frame, we need to % insert the frame setup code at the transfer between the two blocks. % The label S of the block that contains the setup code and then goes % to block B will be given by map.lookup(!.SetupParMap, B, S). % Here, we just allocate the label S; the block will be created later. % % - If it is exit block B, then we need to jump to a variant of B % that does no teardown, since there is no stack frame to tear down. % The label S of the variant block will be given by % map.lookup(!.ExitParMap, B, S). Here, we just allocate % the label S; the block will be created later. % :- pred transform_nostack_ordinary_block(ord_needs_frame_map::in, proc_label::in, label::in, list(label)::in, frame_block_info(En, Ex)::in, counter::in, counter::out, frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out, setup_par_map::in, setup_par_map::out, exit_par_map::in, exit_par_map::out) is det <= block_entry_exit(En, Ex). transform_nostack_ordinary_block(OrdNeedsFrameMap, ProcLabel, Label0, Labels0, BlockInfo0, !C, !BlockMap, !SetupParMap, !ExitParMap) :- BlockInfo0 = frame_block_info(_, Instrs0, FallInto, SideLabels0, MaybeFallThrough0, Type), mark_parallels_for_nostack_successors(ProcLabel, OrdNeedsFrameMap, !.BlockMap, SideLabels0, SideLabels, SideAssocLabelMap, !C, !SetupParMap, !ExitParMap), ( MaybeFallThrough0 = yes(FallThroughLabel0), mark_parallel_for_nostack_successor(ProcLabel, OrdNeedsFrameMap, !.BlockMap, FallThroughLabel0, FallThroughLabel, !C, !SetupParMap, !ExitParMap), MaybeFallThrough = yes(FallThroughLabel), expect(no_disagreement(SideAssocLabelMap, FallThroughLabel0, FallThroughLabel), $pred, "disagreement"), AssocLabelMap = [FallThroughLabel0 - FallThroughLabel | SideAssocLabelMap], ( if FallThroughLabel = FallThroughLabel0 then RedirectFallThrough = [] else RedirectFallThrough = [llds_instr(goto(code_label(FallThroughLabel)), "redirect fallthrough")] % We can expect this jump to be optimized away in most cases. ) ; MaybeFallThrough0 = no, MaybeFallThrough = no, AssocLabelMap = SideAssocLabelMap, RedirectFallThrough = [] ), list.det_split_last(Instrs0, PrevInstrs, LastInstr0), map.from_assoc_list(AssocLabelMap, LabelMap), opt_util.replace_labels_instruction(LastInstr0, LastInstr, LabelMap, no), Instrs = PrevInstrs ++ [LastInstr | RedirectFallThrough], BlockInfo = frame_block_info(Label0, Instrs, FallInto, SideLabels, MaybeFallThrough, Type), map.set(Label0, BlockInfo, !BlockMap), process_frame_delay(OrdNeedsFrameMap, ProcLabel, Labels0, !C, !BlockMap, !SetupParMap, !ExitParMap). :- pred no_disagreement(assoc_list(label, label)::in, label::in, label::in) is semidet. no_disagreement([], _, _). no_disagreement([K - V | KVs], Key, Value) :- ( K = Key => V = Value ), no_disagreement(KVs, Key, Value). %-----------------------------------------------------------------------------% % Invokes mark_parallel_for_nostack_successor on each input label, % and returns both the updated list of labels and the substitution % (represented as an association list) that will have to applied % to the jumping instruction. % :- pred mark_parallels_for_nostack_successors(proc_label::in, ord_needs_frame_map::in, frame_block_map(En, Ex)::in, list(label)::in, list(label)::out, assoc_list(label)::out, counter::in, counter::out, setup_par_map::in, setup_par_map::out, exit_par_map::in, exit_par_map::out) is det. mark_parallels_for_nostack_successors(_, _, _, [], [], [], !C, !SetupParMap, !ExitParMap). mark_parallels_for_nostack_successors(ProcLabel, OrdNeedsFrameMap, BlockMap, [Label0 | Labels0], [Label | Labels], [Label0 - Label | LabelMap], !C, !SetupParMap, !ExitParMap) :- mark_parallel_for_nostack_successor(ProcLabel, OrdNeedsFrameMap, BlockMap, Label0, Label, !C, !SetupParMap, !ExitParMap), mark_parallels_for_nostack_successors(ProcLabel, OrdNeedsFrameMap, BlockMap, Labels0, Labels, LabelMap, !C, !SetupParMap, !ExitParMap). % Label0 is a label that is a successor of a block which has no stack % frame. % % If Label0 starts an exit block, we ensure that it has a non-teardown % parallel Label. % % If Label0 starts an ordinary block that needs a stack frame, we ensure % that it has a parallel Label that allocates a stack frame before handing % control to Label0. % :- pred mark_parallel_for_nostack_successor(proc_label::in, ord_needs_frame_map::in, frame_block_map(En, Ex)::in, label::in, label::out, counter::in, counter::out, setup_par_map::in, setup_par_map::out, exit_par_map::in, exit_par_map::out) is det. mark_parallel_for_nostack_successor(ProcLabel, OrdNeedsFrameMap, BlockMap, Label0, Label, !C, !SetupParMap, !ExitParMap) :- map.lookup(BlockMap, Label0, BlockInfo), Type = BlockInfo ^ fb_type, ( Type = entry_block(_), unexpected($pred, "reached setup via jump from ordinary block") ; Type = ordinary_block(_, _), map.lookup(OrdNeedsFrameMap, Label0, NeedsFrame), ( NeedsFrame = block_needs_frame, ensure_setup_parallel(Label0, Label, ProcLabel, !C, !SetupParMap) ; NeedsFrame = block_does_not_need_frame, Label = Label0 ) ; Type = exit_block(_), ensure_exit_parallel(Label0, Label, ProcLabel, !C, !ExitParMap) ). %-----------------------------------------------------------------------------% % The third phase of the delay_frame_transform optimization, creating % % - the setup code of ordinary blocks that need frames but (some of) % whose predecessors don't have one, and % % - the parallels of exit blocks that can assume there is no frame to % tear down. % :- pred create_parallels(En::in, proc_label::in, ord_needs_frame_map::in, setup_par_map::in, exit_par_map::in, pred_map::in, list(label)::in, list(label)::out, counter::in, counter::out, frame_block_map(En, Ex)::in, frame_block_map(En, Ex)::out) is det <= block_entry_exit(En, Ex). create_parallels(_, _, _, _, _, _, [], [], !C, !BlockMap). create_parallels(EntryInfo, ProcLabel, OrdNeedsFrameMap, SetupParMap, ExitParMap, PredMap, [Label0 | Labels0], Labels, !C, !BlockMap) :- create_parallels(EntryInfo, ProcLabel, OrdNeedsFrameMap, SetupParMap, ExitParMap, PredMap, Labels0, Labels1, !C, !BlockMap), map.lookup(!.BlockMap, Label0, BlockInfo0), BlockInfo0 = frame_block_info(Label0Copy, _, FallInto, SideLabels, MaybeFallThrough, Type), expect(unify(Label0, Label0Copy), $pred, "label in frame_block_info is not copy"), ( if search_exit_par_map(ExitParMap, Label0, ParallelLabel) then expect(unify(MaybeFallThrough, no), $pred, "exit block with parallel has fall through"), ( SideLabels = [], Comments = [] ; SideLabels = [_ | _], % This can happen if fulljump optimization has redirected the % return. Comments = [llds_instr(comment("exit side labels " ++ dump_labels(yes(ProcLabel), SideLabels)), "")] ), PrevNeedsFrame = prev_block_needs_frame(OrdNeedsFrameMap, BlockInfo0), ( Type = exit_block(ExitInfo), LabelInstr = llds_instr(label(ParallelLabel), "non-teardown parallel"), ReplacementCode = [LabelInstr] ++ Comments ++ non_teardown_exit_code(ExitInfo), ( PrevNeedsFrame = block_does_not_need_frame, Labels = [ParallelLabel, Label0 | Labels1], BlockInfo = BlockInfo0 ^ fb_fallen_into := no, map.det_update(Label0, BlockInfo, !BlockMap), ParallelBlockFallInto = FallInto ; PrevNeedsFrame = block_needs_frame, Labels = [Label0, ParallelLabel | Labels1], ParallelBlockFallInto = no ), ParallelBlockInfo = frame_block_info(ParallelLabel, ReplacementCode, ParallelBlockFallInto, SideLabels, no, ordinary_block(block_does_not_need_frame, is_not_dummy)), map.det_insert(ParallelLabel, ParallelBlockInfo, !BlockMap) ; ( Type = entry_block(_) ; Type = ordinary_block(_, _) ), unexpected($pred, "block in exit_par_map is not exit") ) else if search_setup_par_map(SetupParMap, Label0, SetupLabel) then expect(is_ordinary_block(Type), $pred, "block in setup map is not ordinary"), PrevNeedsFrame = prev_block_needs_frame(OrdNeedsFrameMap, BlockInfo0), ( PrevNeedsFrame = block_needs_frame, counter.allocate(N, !C), JumpAroundLabel = internal_label(N, ProcLabel), % By not including a label instruction at the start of % JumpAroundCode, we are breaking an invariant of frame_block_maps. % However, we don't execute any code during or after % create_parallels that depends on that invariant, and not % including the label saves memory and reduces the amount of work % labelopt has to do. (The label *would* be optimized away, since % it can't be referred to from anywhere.) JumpAroundCode = [llds_instr(goto(code_label(Label0)), "jump around setup")], Labels = [JumpAroundLabel, SetupLabel, Label0 | Labels1], JumpAroundBlockInfo = frame_block_info(JumpAroundLabel, JumpAroundCode, no, [Label0], FallInto, ordinary_block(block_needs_frame, is_not_dummy)), map.det_insert(JumpAroundLabel, JumpAroundBlockInfo, !BlockMap), SetupFallInto = yes(JumpAroundLabel), BlockInfo = BlockInfo0 ^ fb_fallen_into := yes(SetupLabel), map.det_update(Label0, BlockInfo, !BlockMap) ; PrevNeedsFrame = block_does_not_need_frame, Labels = [SetupLabel, Label0 | Labels1], SetupFallInto = no ), SetupCode = [llds_instr(label(SetupLabel), "late setup label")] ++ late_setup_code(EntryInfo), SetupBlockInfo = frame_block_info(SetupLabel, SetupCode, SetupFallInto, [], yes(Label0), entry_block(EntryInfo)), map.det_insert(SetupLabel, SetupBlockInfo, !BlockMap) else Labels = [Label0 | Labels1] ). :- func prev_block_needs_frame(ord_needs_frame_map, frame_block_info(En, Ex)) = block_needs_frame. prev_block_needs_frame(OrdNeedsFrameMap, BlockInfo) = PrevNeedsFrame :- MaybeFallIntoFrom = BlockInfo ^ fb_fallen_into, ( MaybeFallIntoFrom = yes(FallIntoFrom), ( if map.search(OrdNeedsFrameMap, FallIntoFrom, NeedsFrame) then % FallIntoFrom is an ordinary block that can fall through % to this block. PrevNeedsFrame = NeedsFrame else % FallIntoFrom is a setup block; exit blocks cannot fall % through. Entry blocks don't need frames. PrevNeedsFrame = block_does_not_need_frame ) ; MaybeFallIntoFrom = no, % The previous block doesn't care whether the following block % has a frame or not. PrevNeedsFrame = block_does_not_need_frame ). :- pred is_ordinary_block(block_type(En, Ex)::in) is semidet. is_ordinary_block(ordinary_block(_, _)). %-----------------------------------------------------------------------------% :- func det_late_setup(det_entry_info) = list(instruction). det_late_setup(det_entry(FrameSize, Msg, Kind)) = [llds_instr(incr_sp(FrameSize, Msg, Kind), "late setup"), llds_instr(assign(stackvar(FrameSize), lval(succip)), "late save")]. :- func det_non_teardown_exit_code(det_exit_info) = list(instruction). det_non_teardown_exit_code(det_exit(_, Livevals, Goto)) = Livevals ++ [Goto]. :- func nondet_late_setup(nondet_entry_info) = list(instruction). nondet_late_setup(nondet_entry(Msg, FrameSize, Redoip)) = [llds_instr(mkframe(ordinary_frame(Msg, FrameSize), yes(Redoip)), "late setup")]. :- func nondet_non_teardown_exit_code(nondet_exit_info) = list(instruction). nondet_non_teardown_exit_code(nondet_plain_exit(Livevals, _Goto)) = Livevals ++ [llds_instr(goto(code_succip), "")]. nondet_non_teardown_exit_code(nondet_teardown_exit(_, _, _, Livevals, Goto)) = Livevals ++ [Goto]. %-----------------------------------------------------------------------------% % Given the label of a block, allocate a label for its parallel % in the given setup map if it doesn't already have one. % :- pred ensure_setup_parallel(label::in, label::out, proc_label::in, counter::in, counter::out, setup_par_map::in, setup_par_map::out) is det. ensure_setup_parallel(Label, ParallelLabel, ProcLabel, !C, !SetupParMap) :- !.SetupParMap = setup_par_map(ParMap0), ( if map.search(ParMap0, Label, OldParallel) then ParallelLabel = OldParallel else counter.allocate(N, !C), NewParallel = internal_label(N, ProcLabel), ParallelLabel = NewParallel, map.det_insert(Label, NewParallel, ParMap0, ParMap), !:SetupParMap = setup_par_map(ParMap) ). % Given the label of a block, allocate a label for its parallel % in the given exit map if it doesn't already have one. % :- pred ensure_exit_parallel(label::in, label::out, proc_label::in, counter::in, counter::out, exit_par_map::in, exit_par_map::out) is det. ensure_exit_parallel(Label, ParallelLabel, ProcLabel, !C, !ExitParMap) :- !.ExitParMap = exit_par_map(ParMap0), ( if map.search(ParMap0, Label, OldParallel) then ParallelLabel = OldParallel else counter.allocate(N, !C), NewParallel = internal_label(N, ProcLabel), ParallelLabel = NewParallel, map.det_insert(Label, NewParallel, ParMap0, ParMap), !:ExitParMap = exit_par_map(ParMap) ). %-----------------------------------------------------------------------------% % This predicate generates a human-readable description of a block % as a comment instruction. This can make it much easier to debug % frameopt. % :- pred describe_block(frame_block_map(En, Ex)::in, ord_needs_frame_map::in, pred_map::in, proc_label::in, label::in, instruction::out) is det <= block_entry_exit(En, Ex). describe_block(BlockMap, OrdNeedsFrameMap, PredMap, ProcLabel, Label, Instr) :- map.lookup(BlockMap, Label, BlockInfo), BlockInfo = frame_block_info(BlockLabel, BlockInstrs, FallInto, SideLabels, MaybeFallThrough, Type), expect(unify(Label, BlockLabel), $pred, "label mismatch"), YesProcLabel = yes(ProcLabel), LabelStr = dump_label(YesProcLabel, Label), BlockInstrsStr = dump_fullinstrs(YesProcLabel, auto_comments, BlockInstrs), Heading = "\nBLOCK " ++ LabelStr ++ "\n\n", ( if map.search(PredMap, Label, PredLabel) then PredStr = "previous label " ++ dump_label(YesProcLabel, PredLabel) ++ "\n" else PredStr = "no previous label\n" ), ( FallInto = yes(FallIntoFromLabel), FallIntoStr = "fallen into from " ++ dump_label(YesProcLabel, FallIntoFromLabel) ++ "\n" ; FallInto = no, FallIntoStr = "not fallen into\n" ), ( SideLabels = [], SideStr = "no side labels\n" ; SideLabels = [_ | _], SideStr = "side labels " ++ dump_labels(YesProcLabel, SideLabels) ++ "\n" ), ( MaybeFallThrough = yes(FallThroughLabel), FallThroughStr = "falls through to " ++ dump_label(YesProcLabel, FallThroughLabel) ++ "\n" ; MaybeFallThrough = no, FallThroughStr = "does not fall through\n" ), ( Type = entry_block(Entry), TypeStr = "entry_block\n" ++ describe_entry(Entry) ; Type = ordinary_block(UsesFrame, MaybeDummy), ( MaybeDummy = is_not_dummy, TypeStr0 = "ordinary_block; " ; MaybeDummy = is_post_entry_dummy, TypeStr0 = "ordinary_block (post_entry_dummy); " ; MaybeDummy = is_pre_exit_dummy, TypeStr0 = "ordinary_block (pre_exit_dummy); " ), ( UsesFrame = block_needs_frame, TypeStr1 = TypeStr0 ++ "uses frame\n" ; UsesFrame = block_does_not_need_frame, TypeStr1 = TypeStr0 ++ "does not use frame\n" ), ( if map.search(OrdNeedsFrameMap, Label, NeedsFrame) then ( NeedsFrame = block_does_not_need_frame, expect(unify(UsesFrame, block_does_not_need_frame), $pred, "NeedsFrame=block_does_not_need_frame, " ++ "UsesFrame=block_needs_frame"), TypeStr = TypeStr1 ++ "does not need frame\n" ; NeedsFrame = block_needs_frame, TypeStr = TypeStr1 ++ "does need frame\n" ) else % We can get here if delay_frame_transform fails. TypeStr = TypeStr1 ++ "(unknown whether it does need frame)\n" ) ; Type = exit_block(Exit), expect(unify(MaybeFallThrough, no), $pred, "exit_block, MaybeFallThrough=yes(_)"), TypeStr = "exit_block\n" ++ describe_exit(YesProcLabel, Exit) ), Comment = Heading ++ PredStr ++ FallIntoStr ++ SideStr ++ FallThroughStr ++ TypeStr ++ "CODE:\n" ++ BlockInstrsStr, Instr = llds_instr(comment(Comment), ""). :- func describe_det_entry(det_entry_info) = string. describe_det_entry(det_entry(Size, Msg, Kind)) = Desc :- string.format("size: %d, msg: %s, kind: %s\n", [i(Size), s(Msg), s(dump_stack_incr_kind(Kind))], Desc). :- func describe_det_exit(maybe(proc_label), det_exit_info) = string. describe_det_exit(MaybeProcLabel, det_exit(RestoreSuccip, Livevals, Goto)) = "restore: " ++ dump_fullinstrs(MaybeProcLabel, auto_comments, RestoreSuccip) ++ "livevals: " ++ dump_fullinstrs(MaybeProcLabel, auto_comments, Livevals) ++ "goto: " ++ dump_fullinstr(MaybeProcLabel, auto_comments, Goto). :- func describe_nondet_entry(nondet_entry_info) = string. describe_nondet_entry(nondet_entry(Msg, Size, Redoip)) = Desc :- string.format("msg: %s, size: %d, redoip: %s", [s(Msg), i(Size), s(dump_code_addr(no, Redoip))], Desc). :- func describe_nondet_exit(maybe(proc_label), nondet_exit_info) = string. describe_nondet_exit(MaybeProcLabel, nondet_plain_exit(Livevals, Goto)) = "livevals: " ++ dump_fullinstrs(MaybeProcLabel, auto_comments, Livevals) ++ "goto: " ++ dump_fullinstr(MaybeProcLabel, auto_comments, Goto). describe_nondet_exit(MaybeProcLabel, nondet_teardown_exit(Succip, Maxfr, Curfr, Livevals, Goto)) = "succip: " ++ dump_fullinstr(MaybeProcLabel, auto_comments, Succip) ++ "maxfr: " ++ dump_fullinstr(MaybeProcLabel, auto_comments, Maxfr) ++ "curfr: " ++ dump_fullinstr(MaybeProcLabel, auto_comments, Curfr) ++ "livevals: " ++ dump_fullinstrs(MaybeProcLabel, auto_comments, Livevals) ++ "goto: " ++ dump_fullinstr(MaybeProcLabel, auto_comments, Goto). %-----------------------------------------------------------------------------% :- pred search_setup_par_map(setup_par_map::in, label::in, label::out) is semidet. search_setup_par_map(setup_par_map(ParMap), Label, ParallelLabel) :- map.search(ParMap, Label, ParallelLabel). :- pred search_exit_par_map(exit_par_map::in, label::in, label::out) is semidet. search_exit_par_map(exit_par_map(ParMap), Label, ParallelLabel) :- map.search(ParMap, Label, ParallelLabel). %-----------------------------------------------------------------------------% :- end_module ll_backend.frameopt. %-----------------------------------------------------------------------------%