Files
mercury/samples/diff/match.m
Andrew Bromage b3af56e8fa Modified files
Estimated hours taken: 30

Modified files
--------------

samples/diff/Mmakefile:
	Minor documentation update.

samples/diff/README:
samples/diff/TODO:
	Update stuff that's now done, do a couple of minor wording
	changes.

samples/diff/diff.m:
	Fix case of identical filenames (which implicitly assumed
	no_diff_implies_no_output).  Add new match pass, call the
	new diff algorithm.

samples/diff/diff_out.m:
	Add --cvs-merge-conflict output style.  Slight reorganisation
	of top-level predicate.  Lots of small fixes to use better
	syntax (e.g. functional style for integer maths operations).

samples/diff/difftype.m:
	Added first_mentioned_positions/3, last_mentioned_positions/3,
	add_edit/4.

samples/diff/file.m:
	Use io__read_line_as_string.

samples/diff/filter.m:
	Minor syntax/wording changes.

samples/diff/options.m:
	Update all the newly handled options.

New files
---------

samples/diff/myers.m:
	New diff algorithm.

samples/diff/match.m:
	New pass to match common lines in the files to be diffed.

Removed file
------------

samples/diff/lcss.m:
	Functionality replaced by myers.m.
1998-09-15 04:54:41 +00:00

153 lines
4.8 KiB
Mathematica

%-----------------------------------------------------------------------------%
% Copyright (C) 1998 The University of Melbourne.
% This file may only be copied under the terms of the GNU General
% Public License - see the file COPYING in the Mercury distribution.
%-----------------------------------------------------------------------------%
% Main author: bromage
% This module contains code to match common lines before diffing, based on
% the command-line options presented. The important command-line options
% are --ignore-case, --ignore-all-space and --ignore-space-change.
% The output of build_matches is two arrays of integers, where any two
% lines are assigned the same integer iff they are identical (modulo case,
% space and/or space change depending on the command line options). An
% added benefit of doing this here is that the diff algorithm (myers.m)
% only has to compare integers instead of strings.
% TO DO: We should collapse sequences of lines which only appear in one
% file and pretend the whole sequence is just one line. (GNU
% diff does the same thing a slightly different way, but this
% approach seems a bit more Mercury-esque.) Since Myers'
% algorithm runs in O(ND) time, and performing this pre-filtering
% here would reduce the value of D (by quite a lot in real-world
% cases), things should speed up.
%-----------------------------------------------------------------------------%
:- module match.
:- interface.
:- import_module file, io, array.
:- pred build_matches(file :: in, file :: in,
array(int) :: out, array(int) :: out,
io__state :: di, io__state :: uo) is det.
%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%
:- implementation.
:- import_module globals, options.
:- import_module bool, list, int, std_util, string, char, map, require.
:- type match_options
---> match_options(
bool, % No options set
bool, % --ignore-case
bool, % --ignore-all-space
bool % --ignore-space-change
).
build_matches(File1, File2, FileX, FileY) -->
globals__io_lookup_bool_option(ignore_case, IgnCase),
globals__io_lookup_bool_option(ignore_all_space, IgnAllSpc),
globals__io_lookup_bool_option(ignore_space_change, IgnSpcChg),
{
bool__or_list([IgnCase, IgnAllSpc, IgnSpcChg], AnyOpts),
bool__not(AnyOpts, NoOpts),
Opts = match_options(NoOpts, IgnCase, IgnAllSpc, IgnSpcChg),
map__init(MatchMap0),
file__get_numlines(File1, SizeX),
array__init(SizeX, -1, FileX0),
build_matches_for_file(Opts, File1, SizeX - 1, MatchMap0,
MatchMap1, 0, ID1, FileX0, FileX),
file__get_numlines(File2, SizeY),
array__init(SizeY, -1, FileY0),
build_matches_for_file(Opts, File2, SizeY - 1, MatchMap1, _,
ID1, _, FileY0, FileY)
}.
:- pred build_matches_for_file(match_options, file, int,
map(string, int), map(string, int), int, int, array(int), array(int)).
:- mode build_matches_for_file(in, in, in, in, out, in, out,
array_di, array_uo) is det.
build_matches_for_file(Opts, OrigFile, I, MatchMap0, MatchMap, ID0, ID,
File0, File) :-
( I < 0 ->
MatchMap = MatchMap0,
ID = ID0,
File = File0
;
( file__get_line(OrigFile, I, Line0) ->
Line1 = Line0
;
error("build_matches_for_file")
),
Opts = match_options(NoOpts, IgnCase, IgnAllSpc, IgnSpcChg),
( NoOpts = yes ->
Line = Line1
;
string__to_char_list(Line1, Chars0),
normalise_line(no, IgnCase, IgnAllSpc, IgnSpcChg,
Chars0, Chars1),
string__from_char_list(Chars1, Line)
),
( map__search(MatchMap0, Line, MaybeID) ->
array__set(File0, I, MaybeID, File1),
MatchMap1 = MatchMap0,
ID1 = ID0
;
array__set(File0, I, ID0, File1),
map__det_insert(MatchMap0, Line, ID0, MatchMap1),
ID1 is ID0 + 1
),
build_matches_for_file(Opts, OrigFile, I - 1, MatchMap1,
MatchMap, ID1, ID, File1, File)
).
:- pred normalise_line(bool, bool, bool, bool, list(char), list(char)).
:- mode normalise_line(in, in, in, in, in, out) is det.
normalise_line(_, _, _, _, [], []).
normalise_line(LastSpace, IgnCase, IgnAllSpc, IgnSpcChg, [C0 | Cs0], Cs) :-
( IgnCase = yes ->
char__to_lower(C0, C)
;
C = C0
),
(
char__is_whitespace(C),
(
IgnAllSpc = yes
->
normalise_line(LastSpace, IgnCase, IgnAllSpc, IgnSpcChg,
Cs0, CsX)
;
IgnSpcChg = yes
->
( LastSpace = yes ->
normalise_line(yes, IgnCase, IgnAllSpc,
IgnSpcChg, Cs0, CsX)
;
normalise_line(yes, IgnCase, IgnAllSpc,
IgnSpcChg, Cs0, Cs1),
CsX = [' ' | Cs1]
)
;
fail
)
->
Cs = CsX
;
normalise_line(no, IgnCase, IgnAllSpc, IgnSpcChg,
Cs0, Cs1),
Cs = [C | Cs1]
).
%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%