mirror of
https://github.com/Mercury-Language/mercury.git
synced 2026-04-16 09:53:36 +00:00
samples/diff/globals.m: Replace the use of the deprecated I/O globals with a mutable. Delete unnecessary module qualification. samples/diff/*.m: Update copyright notices.
469 lines
16 KiB
Mathematica
469 lines
16 KiB
Mathematica
%-----------------------------------------------------------------------------%
|
|
% vim: ft=mercury ts=4 sw=4 et wm=0 tw=0
|
|
%-----------------------------------------------------------------------------%
|
|
% Copyright (C) 1998, 2006, 2011 The University of Melbourne.
|
|
% Copyright (C) 2015, 2019 The Mercury team.
|
|
% This file may only be copied under the terms of the GNU General
|
|
% Public License - see the file COPYING in the Mercury distribution.
|
|
%-----------------------------------------------------------------------------%
|
|
%
|
|
% File: myers.m.
|
|
% Main author: bromage.
|
|
%
|
|
% TO DO: We should implement the big-snake heuristic (a.k.a.
|
|
% --speed-large-files).
|
|
%
|
|
% ALSO TO DO: Gene Myers et al have since produced another algorithm
|
|
% which takes O(NP) time where P is the number of deletions in the edit script.
|
|
% If the `too expensive' heuristic can be retro-fitted onto that algorithm
|
|
% easily enough, we should try out this algorithm and see how fast it runs.
|
|
% In theory, we should be looking at about a 2x speedup.
|
|
%
|
|
%-----------------------------------------------------------------------------%
|
|
%-----------------------------------------------------------------------------%
|
|
|
|
:- module myers.
|
|
:- interface.
|
|
|
|
:- import_module difftype.
|
|
|
|
:- import_module array.
|
|
:- import_module io.
|
|
|
|
%-----------------------------------------------------------------------------%
|
|
|
|
:- pred diff_by_myers(array(int)::in, array(int)::in, diff::out,
|
|
io::di, io::uo) is det.
|
|
|
|
%-----------------------------------------------------------------------------%
|
|
%-----------------------------------------------------------------------------%
|
|
|
|
:- implementation.
|
|
|
|
:- import_module globals.
|
|
:- import_module options.
|
|
|
|
:- import_module bool.
|
|
:- import_module int.
|
|
:- import_module list.
|
|
:- import_module pair.
|
|
:- import_module require.
|
|
|
|
%-----------------------------------------------------------------------------%
|
|
|
|
% The basic algorithm is described in:
|
|
% "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
|
|
% Algorithmica Vol. 1 No. 2, 1986, pp. 251-266.
|
|
%
|
|
% This uses the variation in section 4b.
|
|
|
|
diff_by_myers(FileX, FileY, Diff, !IO) :-
|
|
globals.io_lookup_bool_option(minimal, Minimal, !IO),
|
|
array.size(FileX, SizeX),
|
|
array.size(FileY, SizeY),
|
|
SizeMax = SizeX + SizeY + 3,
|
|
DOffset = SizeY + 1,
|
|
|
|
% If we don't insist on --minimal, calculate the approximate square root of
|
|
% the input size for the "too expensive" heuristic. The effect of this is
|
|
% to limit the amount of work to about O(n ** (1.5 log n)) at the expense
|
|
% of finding a possibly non-minimal diff.
|
|
|
|
(
|
|
Minimal = yes,
|
|
Heur = none
|
|
;
|
|
Minimal = no,
|
|
int.log2(SizeMax, SizeLog2),
|
|
int.max(minimum_too_expensive, 1 << (SizeLog2 // 2), SizeHeuristic),
|
|
Heur = too_expensive(SizeHeuristic)
|
|
),
|
|
|
|
% Fill the arrays with nondescript numbers which
|
|
% the algorithm shouldn't produce. (For debugging purposes.)
|
|
array.init(SizeMax, -65537, Fwd),
|
|
array.init(SizeMax, -65537, Bwd),
|
|
myers.bsearch(DOffset, FileX, FileY, 0, SizeX, 0, SizeY,
|
|
Heur, Fwd, _, Bwd, _, [], Diff).
|
|
|
|
% XXX This lower bound is a guess. Need to do some measurements
|
|
% to see if it's good or not.
|
|
:- func minimum_too_expensive = int.
|
|
|
|
minimum_too_expensive = 256.
|
|
|
|
:- pred myers.bsearch(int::in, array(int)::in, array(int)::in, int::in,
|
|
int::in, int::in, int::in, heur::in,
|
|
array(int)::array_di, array(int)::array_uo,
|
|
array(int)::array_di, array(int)::array_uo,
|
|
diff::in, diff::out) is det.
|
|
|
|
myers.bsearch(DOffset, FileX, FileY, Xlow0, Xhigh0, Ylow0, Yhigh0, Heur,
|
|
!Fwd, !Bwd, !Diff) :-
|
|
scan_forward(FileX, FileY, Xhigh0, Yhigh0, Xlow0, Xlow, Ylow0, Ylow),
|
|
scan_backward(FileX, FileY, Xlow, Ylow, Xhigh0, Xhigh, Yhigh0, Yhigh),
|
|
( if
|
|
( Xlow >= Xhigh
|
|
; Ylow >= Yhigh
|
|
)
|
|
then
|
|
add_edit(Xlow - Xhigh, Ylow - Yhigh, !Diff)
|
|
else
|
|
find_middle(DOffset, FileX, FileY, Xlow, Xhigh, Ylow, Yhigh, Heur,
|
|
!Fwd, !Bwd, Xmid, Ymid, Cost, LeftHeur - RightHeur),
|
|
( if
|
|
Cost > 0
|
|
then
|
|
myers.bsearch(DOffset, FileX, FileY, Xmid, Xhigh, Ymid, Yhigh,
|
|
LeftHeur, !Fwd, !Bwd, !Diff),
|
|
myers.bsearch(DOffset, FileX, FileY, Xlow, Xmid, Ylow, Ymid,
|
|
RightHeur, !Fwd, !Bwd, !Diff)
|
|
else
|
|
error("myers.bsearch")
|
|
)
|
|
).
|
|
|
|
:- type myers_constants
|
|
---> constants(
|
|
int, % DOffset
|
|
array(int), % X
|
|
array(int), % Y
|
|
int, % Xlow
|
|
int, % Xhigh
|
|
int, % Ylow
|
|
int, % Yhigh
|
|
int, % Dmin
|
|
int, % Dmax
|
|
bool, % DeltaOdd
|
|
heur % "Too expensive" heuristic.
|
|
).
|
|
|
|
:- type heur
|
|
---> too_expensive(int)
|
|
; none.
|
|
|
|
% The best part about this algorithm is: We don't actually need to find the
|
|
% middle of the diff. We only have to find an estimate to it. If we don't
|
|
% find the exact middle, we will have a correct diff, but it won't
|
|
% necessarily be minimal.
|
|
%
|
|
:- pred find_middle(int::in, array(int)::in, array(int)::in, pos::in, pos::in,
|
|
pos::in, pos::in, heur::in, array(int)::array_di, array(int)::array_uo,
|
|
array(int)::array_di, array(int)::array_uo, pos::out, pos::out, int::out,
|
|
pair(heur)::out) is det.
|
|
|
|
find_middle(DOffset, FileX, FileY, Xlow, Xhigh, Ylow, Yhigh, Heur, !Fwd,
|
|
!Bwd, Xmid, Ymid, Cost, HeurReq) :-
|
|
|
|
Dmin = Xlow - Yhigh,
|
|
Dmax = Xhigh - Ylow,
|
|
|
|
Fmid = Xlow - Ylow,
|
|
array.set(Fmid + DOffset, Xlow, !Fwd),
|
|
Bmid = Xhigh - Yhigh,
|
|
array.set(Bmid + DOffset, Xhigh, !Bwd),
|
|
|
|
( if 1 = (Fmid - Bmid) /\ 1 then
|
|
DeltaOdd = yes
|
|
else
|
|
DeltaOdd = no
|
|
),
|
|
|
|
Constants = constants(
|
|
DOffset, FileX, FileY, Xlow, Xhigh, Ylow, Yhigh,
|
|
Dmin, Dmax, DeltaOdd, Heur
|
|
),
|
|
|
|
find_middle_2(Constants, !Fwd, !Bwd, Fmid, Fmid, Bmid, Bmid, 1, Cost,
|
|
Xmid - Ymid, HeurReq).
|
|
|
|
:- pred find_middle_2(myers_constants::in,
|
|
array(int)::array_di, array(int)::array_uo,
|
|
array(int)::array_di, array(int)::array_uo,
|
|
int::in, int::in, int::in, int::in, int::in,
|
|
int::out, pair(pos)::out, pair(heur)::out) is det.
|
|
|
|
find_middle_2(Constants, !Fwd, !Bwd, Fmin, Fmax, Bmin, Bmax, !Cost,
|
|
Mid, HeurReq) :-
|
|
Constants = constants(DOffset, _, _, _, _, _, _, Dmin, Dmax, _, _),
|
|
( if Fmin > Dmin then
|
|
Fmin1 = Fmin - 1,
|
|
array.set(Fmin1 + DOffset - 1, -1, !Fwd)
|
|
else
|
|
Fmin1 = Fmin + 1
|
|
),
|
|
( if Fmax < Dmax then
|
|
Fmax1 = Fmax + 1,
|
|
array.set(Fmax1 + DOffset + 1, -1, !Fwd)
|
|
else
|
|
Fmax1 = Fmax - 1
|
|
),
|
|
find_forward_reaching_path(Constants, !Fwd, !Bwd, Fmin1, Fmax1,
|
|
Bmin, Bmax, Fmax1, !Cost, Mid, HeurReq).
|
|
|
|
:- pred find_forward_reaching_path(myers_constants::in,
|
|
array(int)::array_di, array(int)::array_uo,
|
|
array(int)::array_di, array(int)::array_uo,
|
|
int::in, int::in, int::in, int::in, int::in, int::in, int::out,
|
|
pair(pos)::out, pair(heur)::out) is det.
|
|
|
|
find_forward_reaching_path(Constants, !Fwd, !Bwd, Fmin, Fmax, Bmin, Bmax,
|
|
SearchCost, !Cost, Mid, HeurReq) :-
|
|
( if SearchCost < Fmin then
|
|
Constants = constants(DOffset, _, _, _, _, _, _, Dmin, Dmax, _, _),
|
|
int.max_int(MaxInt),
|
|
( if Bmin > Dmin then
|
|
Bmin1 = Bmin - 1,
|
|
array.set(Bmin1 + DOffset - 1, MaxInt, !Bwd)
|
|
else
|
|
Bmin1 = Bmin + 1
|
|
),
|
|
( if Bmax < Dmax then
|
|
Bmax1 = Bmax + 1,
|
|
array.set(Bmax1 + DOffset + 1, MaxInt, !Bwd)
|
|
else
|
|
Bmax1 = Bmax - 1
|
|
),
|
|
find_backward_reaching_path(Constants, !Fwd, !Bwd, Fmin, Fmax,
|
|
Bmin1, Bmax1, Bmax1, !Cost, Mid, HeurReq)
|
|
else
|
|
Constants = constants(DOffset, _, _, _, _, _, _, _, _, _, _),
|
|
array.lookup(!.Fwd, SearchCost + DOffset - 1, Tlo),
|
|
array.lookup(!.Fwd, SearchCost + DOffset + 1, Thi),
|
|
( if Tlo >= Thi then
|
|
X0 = Tlo + 1
|
|
else
|
|
X0 = Thi
|
|
),
|
|
Y0 = X0 - SearchCost,
|
|
Constants = constants(_, FileX, FileY, _, Xhigh, _, Yhigh, _, _, _, _),
|
|
scan_forward(FileX, FileY, Xhigh, Yhigh, X0, X, Y0, Y),
|
|
array.set(SearchCost + DOffset, X, !Fwd),
|
|
|
|
Constants = constants(_, _, _, _, _, _, _, _, _, DeltaOdd, _),
|
|
( if
|
|
DeltaOdd = yes,
|
|
Bmin =< SearchCost,
|
|
SearchCost =< Bmax,
|
|
array.lookup(!.Bwd, SearchCost + DOffset, BB),
|
|
BB =< X
|
|
then
|
|
Mid = X - Y,
|
|
!:Cost = 2 * !.Cost + 1,
|
|
HeurReq = none - none
|
|
else
|
|
find_forward_reaching_path(Constants, !Fwd, !Bwd, Fmin, Fmax,
|
|
Bmin, Bmax, SearchCost - 2, !Cost, Mid, HeurReq)
|
|
)
|
|
).
|
|
|
|
:- pred find_backward_reaching_path(myers_constants::in,
|
|
array(int)::array_di, array(int)::array_uo,
|
|
array(int)::array_di, array(int)::array_uo, int::in, int::in, int::in,
|
|
int::in, int::in, int::in, int::out, pair(pos)::out, pair(heur)::out)
|
|
is det.
|
|
|
|
find_backward_reaching_path(Constants, !Fwd, !Bwd, Fmin, Fmax, Bmin, Bmax,
|
|
SearchCost, !Cost, Mid, HeurReq) :-
|
|
( if SearchCost < Bmin then
|
|
try_heuristics(Constants, !Fwd, !Bwd, Fmin, Fmax, Bmin, Bmax, !Cost,
|
|
Mid, HeurReq)
|
|
else
|
|
Constants = constants(DOffset, _, _, _, _, _, _, _, _, _, _),
|
|
array.lookup(!.Bwd, SearchCost + DOffset - 1, Tlo),
|
|
array.lookup(!.Bwd, SearchCost + DOffset + 1, Thi),
|
|
( if Tlo < Thi then
|
|
X0 = Tlo
|
|
else
|
|
X0 = Thi - 1
|
|
),
|
|
Y0 = X0 - SearchCost,
|
|
Constants = constants(_, FileX, FileY, Xlow, _, Ylow, _, _, _, _, _),
|
|
scan_backward(FileX, FileY, Xlow, Ylow, X0, X, Y0, Y),
|
|
array.set(SearchCost + DOffset, X, !Bwd),
|
|
|
|
Constants = constants(_, _, _, _, _, _, _, _, _, DeltaOdd, _),
|
|
( if
|
|
DeltaOdd = no,
|
|
Fmin =< SearchCost,
|
|
SearchCost =< Fmax,
|
|
array.lookup(!.Fwd, SearchCost + DOffset, FF),
|
|
X =< FF
|
|
then
|
|
Mid = X - Y,
|
|
!:Cost = 2 * !.Cost,
|
|
HeurReq = none - none
|
|
else
|
|
find_backward_reaching_path(Constants, !Fwd, !Bwd, Fmin, Fmax,
|
|
Bmin, Bmax, SearchCost - 2, !Cost, Mid, HeurReq)
|
|
)
|
|
).
|
|
|
|
% Try applying some heuristics to see if we can avoid some work.
|
|
%
|
|
:- pred try_heuristics(myers_constants::in,
|
|
array(int)::array_di, array(int)::array_uo,
|
|
array(int)::array_di, array(int)::array_uo,
|
|
int::in, int::in, int::in, int::in, int::in, int::out,
|
|
pair(pos)::out, pair(heur)::out) is det.
|
|
|
|
try_heuristics(Constants, !Fwd, !Bwd, Fmin, Fmax, Bmin, Bmax, !Cost,
|
|
Mid, HeurReq) :-
|
|
Constants = constants(_, _, _, _, _, _, _, _, _, _, Heur),
|
|
( if
|
|
Heur = too_expensive(Cutoff),
|
|
!.Cost >= Cutoff
|
|
then
|
|
% If we've done too much work, stop here.
|
|
too_expensive_heuristic(Constants, !.Fwd, !.Bwd, Fmin, Fmax,
|
|
Bmin, Bmax, !Cost, Mid, HeurReq)
|
|
else
|
|
% Can't apply heuristic, so try looking for a diff of size Cost0 + 1.
|
|
!:Cost = !.Cost + 1,
|
|
find_middle_2(Constants, !Fwd, !Bwd, Fmin, Fmax, Bmin, Bmax, !Cost,
|
|
Mid, HeurReq)
|
|
).
|
|
|
|
%-----------------------------------------------------------------------------%
|
|
|
|
% We've done too much work, so make our best guess.
|
|
:- pred too_expensive_heuristic(myers_constants::in, array(int)::array_ui,
|
|
array(int)::array_ui, int::in, int::in, int::in, int::in,
|
|
int::in, int::out, pair(pos)::out, pair(heur)::out) is det.
|
|
|
|
too_expensive_heuristic(Constants, Fwd, Bwd, Fmin, Fmax, Bmin, Bmax, !Cost,
|
|
Mid, HeurReq) :-
|
|
% Find the best diagonal that we can, take the end of that diagonal as the
|
|
% "middle". Do not apply the heuristic recursively to that best diagonal.
|
|
|
|
Constants = constants(DOffset, _, _, Xlow, Xhigh, Ylow, Yhigh, _, _, _,
|
|
Heur),
|
|
|
|
% Find the best forward diagonal.
|
|
find_best_forward_diagonal(Fmax, Fmin, Fwd, Xhigh, Yhigh, DOffset, -1,
|
|
FXYBest, 0, FXBest),
|
|
|
|
% Find the best backward diagonal.
|
|
int.max_int(MaxInt),
|
|
find_best_backward_diagonal(Bmax, Bmin, Bwd, Xlow, Ylow, DOffset, MaxInt,
|
|
BXYBest, 0, BXBest),
|
|
|
|
% Choose which of these diagonals is the better one
|
|
% and return that as the "middle" point.
|
|
( if
|
|
FXYBest - (Xhigh + Yhigh) < (Xlow + Ylow) - BXYBest
|
|
then
|
|
Xmid = FXBest,
|
|
Ymid = FXYBest - FXBest,
|
|
HeurReq = none - Heur
|
|
else
|
|
Xmid = BXBest,
|
|
Ymid = BXYBest - BXBest,
|
|
HeurReq = Heur - none
|
|
),
|
|
Mid = Xmid - Ymid,
|
|
!:Cost = 2 * !.Cost - 1.
|
|
|
|
:- pred find_best_forward_diagonal(int::in, int::in, array(int)::array_ui,
|
|
int::in, int::in, int::in, int::in, int::out, int::in, int::out) is det.
|
|
|
|
find_best_forward_diagonal(D, Fmin, Fwd, Xhigh, Yhigh, DOffset,
|
|
!FXYBest, !FXBest) :-
|
|
( if D < Fmin then
|
|
true
|
|
else
|
|
array.lookup(Fwd, D + DOffset, X0),
|
|
int.min(Xhigh, X0, X1),
|
|
Y0 = X1 - D,
|
|
|
|
( if Yhigh < Y0 then
|
|
X = Yhigh + D,
|
|
Y = Yhigh
|
|
else
|
|
X = X1,
|
|
Y = Y0
|
|
),
|
|
|
|
NewFXY = X + Y,
|
|
( if !.FXYBest < NewFXY then
|
|
find_best_forward_diagonal(D - 2, Fmin, Fwd, Xhigh, Yhigh,
|
|
DOffset, NewFXY, !:FXYBest, X, !:FXBest)
|
|
else
|
|
find_best_forward_diagonal(D - 2, Fmin, Fwd, Xhigh, Yhigh,
|
|
DOffset, !FXYBest, !FXBest)
|
|
)
|
|
).
|
|
|
|
:- pred find_best_backward_diagonal(int::in, int::in, array(int)::array_ui,
|
|
int::in, int::in, int::in, int::in, int::out, int::in, int::out) is det.
|
|
|
|
find_best_backward_diagonal(D, Bmin, Bwd, Xlow, Ylow, DOffset, !BXYBest,
|
|
!BXBest) :-
|
|
( if D < Bmin then
|
|
true
|
|
else
|
|
array.lookup(Bwd, D + DOffset, X0),
|
|
int.max(Xlow, X0, X1),
|
|
Y0 = X1 - D,
|
|
|
|
( if Y0 < Ylow then
|
|
X = Ylow + D,
|
|
Y = Ylow
|
|
else
|
|
X = X1,
|
|
Y = Y0
|
|
),
|
|
|
|
NewBXY = X + Y,
|
|
( if NewBXY < !.BXYBest then
|
|
find_best_backward_diagonal(D - 2, Bmin, Bwd, Xlow, Ylow, DOffset,
|
|
NewBXY, !:BXYBest, X, !:BXBest)
|
|
else
|
|
find_best_backward_diagonal(D - 2, Bmin, Bwd, Xlow, Ylow, DOffset,
|
|
!BXYBest, !BXBest)
|
|
)
|
|
).
|
|
|
|
%-----------------------------------------------------------------------------%
|
|
|
|
% Travel forwards along a snake.
|
|
:- pred scan_forward(array(int)::in, array(int)::in, int::in, int::in,
|
|
int::in, int::out, int::in, int::out) is det.
|
|
|
|
scan_forward(FileX, FileY, Xhigh, Yhigh, !Xlow, !Ylow) :-
|
|
( if
|
|
!.Xlow < Xhigh,
|
|
!.Ylow < Yhigh,
|
|
array.lookup(FileX, !.Xlow, Line),
|
|
array.lookup(FileY, !.Ylow, Line)
|
|
then
|
|
!:Xlow = !.Xlow + 1,
|
|
!:Ylow = !.Ylow + 1,
|
|
scan_forward(FileX, FileY, Xhigh, Yhigh, !Xlow, !Ylow)
|
|
else
|
|
true
|
|
).
|
|
|
|
% Travel backwards along a snake.
|
|
%
|
|
:- pred scan_backward(array(int)::in, array(int)::in, int::in, int::in,
|
|
int::in, int::out, int::in, int::out) is det.
|
|
|
|
scan_backward(FileX, FileY, Xlow, Ylow, !Xhigh, !Yhigh) :-
|
|
( if
|
|
!.Xhigh > Xlow,
|
|
!.Yhigh > Ylow,
|
|
array.lookup(FileX, !.Xhigh - 1, Line),
|
|
array.lookup(FileY, !.Yhigh - 1, Line)
|
|
then
|
|
!:Xhigh = !.Xhigh - 1,
|
|
!:Yhigh = !.Yhigh - 1,
|
|
scan_backward(FileX, FileY, Xlow, Ylow, !Xhigh, !Yhigh)
|
|
else
|
|
true
|
|
).
|
|
|
|
%-----------------------------------------------------------------------------%
|
|
:- end_module myers.
|
|
%-----------------------------------------------------------------------------%
|