Files
mercury/compiler/optimize.m
Zoltan Somogyi b913c795ca A rewrite of frameopt, with supporting changes in other modules.
Estimated hours taken: 25

A rewrite of frameopt, with supporting changes in other modules.

frameopt:
	A complete rewrite, with three objectives.

	The first is to fix a basic design flaw that was in the module from
	the beginning, which is that it looked at whether a block would have
	a stack frame if the frame setup wasd delayed as long as possible,
	and took this as gospel. This sometimes led to code that throws away
	the frame to enter a block that does not need a frame and then
	constructing it again to enter another block which does need a frame.
	It also lead to some twisted code when we jumped from a block without
	a frame to a block with one, since we'd have to set up a stack frame
	on arrival at the target block; this sometimes required branches
	around this setup code at the start of the target block to properly
	support fallthroughs.

	We now work out in advance which blocks must have a frame, and
	propagate the requirement for a frame both forwards and backwards
	until a fixpoint is reached, and only then transform the code.
	The propagation phase ensures that we never suffer from either
	of the problems described above.

	The second objective is to integrate another optimization concerned
	with stack frames: not delaying the creation, but reusing a frame
	set up for one call to also act as the frame of a tail recursive call.
	We used to this (badly) in peephole; we now do it (well) here.

	The third objective is to separate out the filling of delay slots,
	so frameopt can be invoked before value numbering. (Filling delay
	slots creates code that refers to the same location by two distinct
	names, detstackvar(0) and detstackvar(N) where N>0, which breaks the
	assumption behind value numbering.) Invoking frameopt before value
	numbering should make value numbering more effective whenever frameopt
	decides to keep the stack frame.

delay_slot:
	A new module to perform the optimization of filling branch delay slots.

opt_util:
	Return the initial label instruction from opt_util__get_prologue,
	and delete some predicates that aren't and won't be needed.

peephole:
	Don't pass around the Teardown and Setup maps, since the optimization
	they were needed for (keeping stack frames) is now done by frameopt.

optimize:
	Use the new interface of frameopt and peephole.

	Invoke frameopt before the value numbering passes.

	We don't need a dedicated peephole pass after frameopt anymore,
	What we need is a labelopt pass to get rid of the extra labels frameopt
	introduces, and possibly a jumpopt pass to short-circuit any jumps
	that replace tailcalls.

	Invoke delay_slot optimization and post_value_number at the very end.
	We don't need to invoke any frameopt post-pass anymore.

	Fix a couple of places where we were not dumping the instruction
	properly when --debug-opt was given.

value_number:
	Use the new interface of peephole and opt_util__get_prologue.

jumpopt:
	Under some circumstances we were generating the instruction "r1 = r1";
	we don't do this anymore.

llds_out:
	Add a missing newline at the end of garbage collection annotations.
1997-01-11 08:51:20 +00:00

341 lines
9.3 KiB
Mathematica

%-----------------------------------------------------------------------------%
% Copyright (C) 1995 University of Melbourne.
% This file may only be copied under the terms of the GNU General
% Public License - see the file COPYING in the Mercury distribution.
%-----------------------------------------------------------------------------%
% optimize.m - LLDS to LLDS optimizations.
% Main author: zs.
%-----------------------------------------------------------------------------%
:- module optimize.
:- interface.
:- import_module llds, io, options.
:- pred optimize__main(list(c_procedure), list(c_procedure),
io__state, io__state).
:- mode optimize__main(in, out, di, uo) is det.
:- pred optimize__proc(c_procedure, c_procedure, io__state, io__state).
:- mode optimize__proc(in, out, di, uo) is det.
%-----------------------------------------------------------------------------%
:- implementation.
:- import_module bool, list, map, bimap, int, std_util.
:- import_module jumpopt, labelopt, dupelim, peephole.
:- import_module frameopt, delay_slot, value_number.
:- import_module globals, passes_aux, opt_util, opt_debug, vn_debug.
optimize__main([], []) --> [].
optimize__main([Proc0|Procs0], [Proc|Procs]) -->
optimize__proc(Proc0, Proc),
optimize__main(Procs0, Procs).
optimize__proc(c_procedure(Name, Arity, Mode, Instrs0),
c_procedure(Name, Arity, Mode, Instrs)) -->
globals__io_lookup_bool_option(debug_opt, DebugOpt),
opt_debug__msg(DebugOpt, "before optimization"),
opt_debug__dump_instrs(DebugOpt, Instrs0),
globals__io_lookup_int_option(optimize_repeat, AllRepeat),
globals__io_lookup_int_option(optimize_vnrepeat, VnRepeat),
globals__io_lookup_bool_option(optimize_value_number, ValueNumber),
( { ValueNumber = yes } ->
{ NovnRepeat is AllRepeat - VnRepeat },
optimize__repeat(NovnRepeat, no, Instrs0, Instrs1),
optimize__middle(Instrs1, no, Instrs2),
optimize__repeat(VnRepeat, yes, Instrs2, Instrs3)
;
optimize__repeat(AllRepeat, no, Instrs0, Instrs1),
optimize__middle(Instrs1, yes, Instrs3)
),
optimize__last(Instrs3, Instrs).
%-----------------------------------------------------------------------------%
:- pred optimize__repeat(int, bool, list(instruction), list(instruction),
io__state, io__state).
:- mode optimize__repeat(in, in, in, out, di, uo) is det.
optimize__repeat(Iter0, DoVn, Instrs0, Instrs) -->
(
{ Iter0 > 0 }
->
{ Iter1 is Iter0 - 1 },
( { Iter1 = 0 } ->
{ Final = yes }
;
{ Final = no }
),
optimize__repeated(Instrs0, DoVn, Final, Instrs1, Mod),
( { Mod = yes } ->
optimize__repeat(Iter1, DoVn, Instrs1, Instrs)
;
{ Instrs = Instrs1 }
)
;
{ Instrs = Instrs0 }
).
% We short-circuit jump sequences before normal peepholing
% to create more opportunities for use of the tailcall macro.
:- pred optimize__repeated(list(instruction), bool, bool,
list(instruction), bool, io__state, io__state).
:- mode optimize__repeated(in, in, in, out, out, di, uo) is det.
optimize__repeated(Instrs0, DoVn, Final, Instrs, Mod) -->
globals__io_lookup_bool_option(very_verbose, VeryVerbose),
globals__io_lookup_bool_option(debug_opt, DebugOpt),
{ opt_util__find_first_label(Instrs0, Label) },
{ opt_util__format_label(Label, LabelStr) },
globals__io_lookup_bool_option(optimize_value_number, ValueNumber),
( { ValueNumber = yes, DoVn = yes } ->
( { VeryVerbose = yes } ->
io__write_string("% Optimizing value number for "),
io__write_string(LabelStr),
io__write_string("\n")
;
[]
),
value_number__main(Instrs0, Instrs1),
( { Instrs1 = Instrs0 } ->
[]
;
opt_debug__msg(DebugOpt, "after value numbering"),
opt_debug__dump_instrs(DebugOpt, Instrs1)
)
;
{ Instrs1 = Instrs0 }
),
globals__io_lookup_bool_option(optimize_jumps, Jumpopt),
globals__io_lookup_bool_option(optimize_fulljumps, FullJumpopt),
( { Jumpopt = yes } ->
( { VeryVerbose = yes } ->
io__write_string("% Optimizing jumps for "),
io__write_string(LabelStr),
io__write_string("\n")
;
[]
),
{ jumpopt__main(Instrs1, FullJumpopt, Final, Instrs2, Mod1) },
( { Mod1 = yes } ->
opt_debug__msg(DebugOpt, "after jump optimization"),
opt_debug__dump_instrs(DebugOpt, Instrs2)
;
[]
)
;
{ Instrs2 = Instrs1 },
{ Mod1 = no }
),
globals__io_lookup_bool_option(optimize_peep, Peephole),
( { Peephole = yes } ->
( { VeryVerbose = yes } ->
io__write_string("% Optimizing locally for "),
io__write_string(LabelStr),
io__write_string("\n")
;
[]
),
{ peephole__optimize(Instrs2, Instrs3, Mod2) },
( { Mod2 = yes } ->
opt_debug__msg(DebugOpt, "after peepholing"),
opt_debug__dump_instrs(DebugOpt, Instrs3)
;
[]
)
;
{ Instrs3 = Instrs2 },
{ Mod2 = no }
),
globals__io_lookup_bool_option(optimize_labels, LabelElim),
( { LabelElim = yes } ->
( { VeryVerbose = yes } ->
io__write_string("% Optimizing labels for "),
io__write_string(LabelStr),
io__write_string("\n")
;
[]
),
{ labelopt__main(Instrs3, Final, Instrs4, Mod3) },
( { Mod3 = yes } ->
opt_debug__msg(DebugOpt, "after label optimization"),
opt_debug__dump_instrs(DebugOpt, Instrs4)
;
[]
)
;
{ Instrs4 = Instrs3 },
{ Mod3 = no }
),
globals__io_lookup_bool_option(optimize_dups, DupElim),
( { DupElim = yes } ->
( { VeryVerbose = yes } ->
io__write_string("% Optimizing duplicates for "),
io__write_string(LabelStr),
io__write_string("\n")
;
[]
),
{ dupelim__main(Instrs4, Instrs) },
( { Instrs = Instrs4 } ->
[]
;
opt_debug__msg(DebugOpt, "after duplicate elimination"),
opt_debug__dump_instrs(DebugOpt, Instrs)
)
;
{ Instrs = Instrs4 }
),
{ Mod1 = no, Mod2 = no, Mod3 = no, Instrs = Instrs0 ->
Mod = no
;
Mod = yes
},
globals__io_lookup_bool_option(statistics, Statistics),
maybe_report_stats(Statistics).
:- pred optimize__middle(list(instruction), bool, list(instruction),
io__state, io__state).
:- mode optimize__middle(in, in, out, di, uo) is det.
optimize__middle(Instrs0, Final, Instrs) -->
globals__io_lookup_bool_option(very_verbose, VeryVerbose),
globals__io_lookup_bool_option(debug_opt, DebugOpt),
{ opt_util__find_first_label(Instrs0, Label) },
{ opt_util__format_label(Label, LabelStr) },
globals__io_lookup_bool_option(optimize_frames, FrameOpt),
( { FrameOpt = yes } ->
( { VeryVerbose = yes } ->
io__write_string("% Optimizing frames for "),
io__write_string(LabelStr),
io__write_string("\n")
;
[]
),
{ frameopt__main(Instrs0, Instrs1, Mod1, Jumps) },
( { Mod1 = yes } ->
opt_debug__msg(DebugOpt, "after frame optimization"),
opt_debug__dump_instrs(DebugOpt, Instrs1)
;
[]
),
globals__io_lookup_bool_option(optimize_fulljumps, FullJumpopt),
( { Jumps = yes, FullJumpopt = yes } ->
( { VeryVerbose = yes } ->
io__write_string("% Optimizing jumps for "),
io__write_string(LabelStr),
io__write_string("\n")
;
[]
),
{ jumpopt__main(Instrs1, FullJumpopt, Final, Instrs2, Mod2) },
( { Mod2 = yes } ->
opt_debug__msg(DebugOpt, "after jump optimization"),
opt_debug__dump_instrs(DebugOpt, Instrs2)
;
[]
)
;
{ Instrs2 = Instrs1 }
),
( { Mod1 = yes } ->
( { VeryVerbose = yes } ->
io__write_string("% Optimizing labels for "),
io__write_string(LabelStr),
io__write_string("\n")
;
[]
),
{ labelopt__main(Instrs2, Final, Instrs, Mod3) },
( { Mod3 = yes } ->
opt_debug__msg(DebugOpt, "after label optimization"),
opt_debug__dump_instrs(DebugOpt, Instrs)
;
[]
)
;
{ Instrs = Instrs2 }
)
;
{ Instrs = Instrs0 }
).
:- pred optimize__last(list(instruction), list(instruction),
io__state, io__state).
:- mode optimize__last(in, out, di, uo) is det.
optimize__last(Instrs0, Instrs) -->
globals__io_lookup_bool_option(very_verbose, VeryVerbose),
globals__io_lookup_bool_option(debug_opt, DebugOpt),
{ opt_util__find_first_label(Instrs0, Label) },
{ opt_util__format_label(Label, LabelStr) },
globals__io_lookup_bool_option(optimize_delay_slot, DelaySlot),
globals__io_lookup_bool_option(optimize_value_number, ValueNumber),
( { DelaySlot = yes ; ValueNumber = yes } ->
% We must get rid of any extra labels added by other passes,
% since they can confuse both post_value_number and delay_slot.
( { VeryVerbose = yes } ->
io__write_string("% Optimizing labels for "),
io__write_string(LabelStr),
io__write_string("\n")
;
[]
),
{ labelopt__main(Instrs0, no, Instrs1, Mod1) },
( { Mod1 = yes } ->
opt_debug__msg(DebugOpt, "after label optimization"),
opt_debug__dump_instrs(DebugOpt, Instrs1)
;
[]
)
;
{ Instrs1 = Instrs0 }
),
( { DelaySlot = yes } ->
( { VeryVerbose = yes } ->
io__write_string("% Optimizing delay slot for "),
io__write_string(LabelStr),
io__write_string("\n")
;
[]
),
{ fill_branch_delay_slot(Instrs1, Instrs2) },
( { Instrs1 = Instrs0 } ->
opt_debug__msg(DebugOpt, "after delay slot filling"),
opt_debug__dump_instrs(DebugOpt, Instrs2)
;
[]
)
;
{ Instrs2 = Instrs1 }
),
( { ValueNumber = yes } ->
( { VeryVerbose = yes } ->
io__write_string("% Optimizing post value number for "),
io__write_string(LabelStr),
io__write_string("\n")
;
[]
),
{ value_number__post_main(Instrs2, Instrs) },
( { Instrs = Instrs2 } ->
[]
;
opt_debug__msg(DebugOpt, "after post value number"),
opt_debug__dump_instrs(DebugOpt, Instrs)
)
;
{ Instrs = Instrs1 }
).