diff: update of diff/samples/*
Andrew Bromage
bromage at cs.mu.oz.au
Mon Jul 28 16:03:03 AEST 1997
G'day all.
Here's the log and diff for those who care. It's all committed.
Cheers,
Andrew Bromage
Estimated hours taken: 4
General cleanup and bug fix for diff. Features of this diff:
- Changed indenting so it more closely matches the
coding standard.
- Bug fix which was causing it to bomb out if the two
files were identical.
- Update to use unique arrays (array.m).
samples/diff/README:
Added something which resembles this log message.
samples/diff/Mmakefile:
Turned C optimisation off to get around a gcc 2.7.2 bug.
samples/diff/diff.m:
samples/diff/diffs.m:
samples/diff/file.m:
samples/diff/lcss.m:
samples/diff/lcsstype.m:
Changes detailed above.
Index: Mmakefile
===================================================================
RCS file: /home/staff/zs/imp/mercury/samples/diff/Mmakefile,v
retrieving revision 1.1
diff -u -r1.1 Mmakefile
--- Mmakefile 1997/02/13 21:37:32 1.1
+++ Mmakefile 1997/07/28 04:28:21
@@ -8,3 +8,10 @@
MAIN_TARGET=all
depend: diff.depend
all: diff
+
+# Unfortunately, diff does not work quite properly when compiled under gcc
+# 2.7.2 under Digital Unix 3.2 due to a bug in the compiler. The bug was
+# fixed in 2.7.2.1, so feel free to comment out this line if you're using
+# an unbuggy compiler.
+MGNUCFLAGS=-O0
+
Index: README
===================================================================
RCS file: /home/staff/zs/imp/mercury/samples/diff/README,v
retrieving revision 1.2
diff -u -r1.2 README
--- README 1997/07/27 15:09:18 1.2
+++ README 1997/07/28 05:52:10
@@ -60,3 +60,26 @@
--
Marnix Klooster
marnix at worldonline.nl
+
+---------------------------------
+
+The version which appears here is a re-hacked version of Marnix Klooster's
+hacked version of my original. Special thanks to him for making my code
+a lot more maintainable than it originally was. :-)
+
+The changes from the previous version:
+
+ - Bug fix which was causing it to bomb out if the two
+ files were identical.
+
+ - Changed indenting so it more closely matches the Mercury
+ compiler coding standard.
+
+ - Update to use unique arrays (now called array.m).
+
+ - Various minor documentation tweaks.
+
+Oh, and it still runs in nowhere near the speed of GNU diff.
+
+Andrew Bromage 28 Jul 1997
+
Index: diff.m
===================================================================
RCS file: /home/staff/zs/imp/mercury/samples/diff/diff.m,v
retrieving revision 1.7
diff -u -r1.7 diff.m
--- diff.m 1997/07/27 15:09:20 1.7
+++ diff.m 1997/07/28 05:44:15
@@ -1,12 +1,11 @@
%-----------------------------------------------------------------------------%
-% Copyright (C) 1995 The University of Melbourne.
+% Copyright (C) 1995-1997 The University of Melbourne.
% This file may only be copied under the terms of the GNU General
% Public License - see the file COPYING in the Mercury distribution.
%-----------------------------------------------------------------------------%
% Main author: bromage
% Simplified by Marnix Klooster <marnix at worldonline.nl>
-% Last changed 22 October 1996
% Something very similar to the standard diff utility. Sort of. :-)
@@ -71,37 +70,37 @@
usage_error("missing operand").
main_2(no, [Fname1 | Rest]) -->
( { Rest = [Fname2 | _] },
- ( { Fname1 = Fname2 } ->
+ ( { Fname1 = Fname2 } ->
% There are no differences between identical files.
- []
- ;
- % If either file is "-", simply use standard input.
- % (Note: Both can't be "-" since that was dealt with
- % in the previous case.)
- ( { Fname1 = "-" } ->
- file__read_input(Contents1),
- file__read_file(Fname2, Contents2)
- ; { Fname2 = "-" } ->
- file__read_file(Fname1, Contents1),
- file__read_input(Contents2)
- ;
- % Otherwise read the files normally.
- file__read_file(Fname1, Contents1),
- file__read_file(Fname2, Contents2)
- ),
- % Now do the diff.
- ( { Contents1 = ok(File1), Contents2 = ok(File2) } ->
- diff__do_diff(File1, File2)
- ; { Contents1 = error(Msg) } ->
- usage_io_error(Msg)
- ; { Contents2 = error(Msg) } ->
- usage_io_error(Msg)
+ []
;
- { error("main2") }
+ % If either file is "-", simply use standard input.
+ % (Note: Both can't be "-" since that was dealt with
+ % in the previous case.)
+ ( { Fname1 = "-" } ->
+ file__read_input(Contents1),
+ file__read_file(Fname2, Contents2)
+ ; { Fname2 = "-" } ->
+ file__read_file(Fname1, Contents1),
+ file__read_input(Contents2)
+ ;
+ % Otherwise read the files normally.
+ file__read_file(Fname1, Contents1),
+ file__read_file(Fname2, Contents2)
+ ),
+ % Now do the diff.
+ ( { Contents1 = ok(File1), Contents2 = ok(File2) } ->
+ diff__do_diff(File1, File2)
+ ; { Contents1 = error(Msg) } ->
+ usage_io_error(Msg)
+ ; { Contents2 = error(Msg) } ->
+ usage_io_error(Msg)
+ ;
+ { error("main2") }
+ )
)
- )
; { Rest = [] },
- usage_error("missing operand")
+ usage_error("missing operand")
).
%-----------------------------------------------------------------------------%
Index: diffs.m
===================================================================
RCS file: /home/staff/zs/imp/mercury/samples/diff/diffs.m,v
retrieving revision 1.2
diff -u -r1.2 diffs.m
--- diffs.m 1997/07/27 15:09:21 1.2
+++ diffs.m 1997/07/28 05:44:23
@@ -1,12 +1,11 @@
%-----------------------------------------------------------------------------%
-% Copyright (C) 1995 The University of Melbourne.
+% Copyright (C) 1995-1997 The University of Melbourne.
% This file may only be copied under the terms of the GNU General
% Public License - see the file COPYING in the Mercury distribution.
%-----------------------------------------------------------------------------%
% Main author: bromage
% Simplified by Marnix Klooster <marnix at worldonline.nl>
-% Last changed 22 October 1996
% This module contains the predicates to convert an lcss to a diff,
% and to display diffs.
@@ -41,7 +40,7 @@
%-----------------------------------------------------------------------------%
:- implementation.
-:- import_module require, std_util, int, list.
+:- import_module require, std_util, int, list, char.
% A segment is a pair of positions. Numbering items from 0,
% segment P-Q stands for items P up to, but not including, Q.
@@ -79,17 +78,17 @@
XNext is X2 + 1, YNext is Y2 + 1,
diffs__to_diff2(XNext, YNext, Lcss, Diff1),
( X = X2 ->
- ( Y = Y2 ->
- Diff = Diff1
- ;
- Diff = [add(X,Y - Y2) | Diff1]
- )
+ ( Y = Y2 ->
+ Diff = Diff1
+ ;
+ Diff = [add(X,Y - Y2) | Diff1]
+ )
;
- ( Y = Y2 ->
- Diff = [delete(X - X2,Y) | Diff1]
- ;
- Diff = [change(X - X2,Y - Y2) | Diff1]
- )
+ ( Y = Y2 ->
+ Diff = [delete(X - X2,Y) | Diff1]
+ ;
+ Diff = [change(X - X2,Y - Y2) | Diff1]
+ )
).
%-----------------------------------------------------------------------------%
@@ -100,16 +99,16 @@
diffs__display_diff(_, _, []) --> { true }.
diffs__display_diff(File1, File2, [SingDiff | Diff]) -->
( { SingDiff = add(X, Y1 - Y2) },
- diffs__write_command(X - X, 'a', Y1 - Y2),
- diffs__show_file(File2, "> ", Y1 - Y2)
+ diffs__write_command(X - X, 'a', Y1 - Y2),
+ diffs__show_file(File2, "> ", Y1 - Y2)
; { SingDiff = delete(X1 - X2, Y) },
- diffs__write_command(X1 - X2, 'd', Y - Y),
- diffs__show_file(File1, "< ", X1 - X2)
+ diffs__write_command(X1 - X2, 'd', Y - Y),
+ diffs__show_file(File1, "< ", X1 - X2)
; { SingDiff = change(X1 - X2, Y1 - Y2) },
- diffs__write_command(X1 - X2, 'c', Y1 - Y2),
- diffs__show_file(File1, "< ", X1 - X2),
- io__write_string("---\n"),
- diffs__show_file(File2, "> ", Y1 - Y2)
+ diffs__write_command(X1 - X2, 'c', Y1 - Y2),
+ diffs__show_file(File1, "< ", X1 - X2),
+ io__write_string("---\n"),
+ diffs__show_file(File2, "> ", Y1 - Y2)
),
diffs__display_diff(File1, File2, Diff).
@@ -121,21 +120,21 @@
:- mode diffs__write_command(in, in, in, di, uo) is det.
diffs__write_command(X - X2, C, Y - Y2) -->
{ X1 is X + 1 },
- ( { X1 >= X2 } -> % either empty or singleton segment
- io__write_int(X2)
+ ( { X1 >= X2 } -> % either empty or singleton segment
+ io__write_int(X2)
;
- io__write_int(X1),
- io__write_char(','),
- io__write_int(X2)
+ io__write_int(X1),
+ io__write_char(','),
+ io__write_int(X2)
),
io__write_char(C),
{ Y1 is Y + 1 },
- ( { Y1 >= Y2 } -> % either empty or singleton segment
- io__write_int(Y2)
+ ( { Y1 >= Y2 } -> % either empty or singleton segment
+ io__write_int(Y2)
;
- io__write_int(Y1),
- io__write_char(','),
- io__write_int(Y2)
+ io__write_int(Y1),
+ io__write_char(','),
+ io__write_int(Y2)
),
io__write_char('\n').
@@ -143,18 +142,18 @@
diffs__display_diff_rcs(_File1, _File2, []) --> { true }.
diffs__display_diff_rcs(File1, File2, [Cmd | Diff]) -->
( { Cmd = add(X, Y1 - Y2) },
- { Y is Y2 - Y1 },
- diffs__write_command_rcs('a', X, Y),
- diffs__show_file(File2, "", Y1 - Y2)
+ { Y is Y2 - Y1 },
+ diffs__write_command_rcs('a', X, Y),
+ diffs__show_file(File2, "", Y1 - Y2)
; { Cmd = delete(X1 - X2, _Y) },
- { X is X2 - X1 },
- diffs__write_command_rcs('d', X1, X)
+ { X is X2 - X1 },
+ diffs__write_command_rcs('d', X1, X)
; { Cmd = change(X1 - X2, Y1 - Y2) },
- { X is X2 - X1 },
- { Y is Y2 - Y1 },
- diffs__write_command_rcs('d', X1, X),
- diffs__write_command_rcs('a', X1, Y),
- diffs__show_file(File2, "", Y1 - Y2)
+ { X is X2 - X1 },
+ { Y is Y2 - Y1 },
+ diffs__write_command_rcs('d', X1, X),
+ diffs__write_command_rcs('a', X1, Y),
+ diffs__show_file(File2, "", Y1 - Y2)
),
diffs__display_diff_rcs(File1, File2, Diff).
@@ -181,15 +180,15 @@
:- mode diffs__show_file(in, in, in, di, uo) is det.
diffs__show_file(File, Prefix, Low - High) -->
( { Low < High } ->
- ( { file__get_line(File, Low, Line) } ->
- { Low1 is Low + 1 },
- io__write_strings([Prefix, Line]),
- diffs__show_file(File, Prefix, Low1 - High)
- ;
- { error("diffs_show_file: file ended prematurely") }
- )
+ ( { file__get_line(File, Low, Line) } ->
+ { Low1 is Low + 1 },
+ io__write_strings([Prefix, Line]),
+ diffs__show_file(File, Prefix, Low1 - High)
+ ;
+ { error("diffs_show_file: file ended prematurely") }
+ )
;
- { true }
+ { true }
).
%-----------------------------------------------------------------------------%
Index: file.m
===================================================================
RCS file: /home/staff/zs/imp/mercury/samples/diff/file.m,v
retrieving revision 1.8
diff -u -r1.8 file.m
--- file.m 1997/07/27 15:09:22 1.8
+++ file.m 1997/07/28 04:54:21
@@ -1,12 +1,11 @@
%-----------------------------------------------------------------------------%
-% Copyright (C) 1995 The University of Melbourne.
+% Copyright (C) 1995-1997 The University of Melbourne.
% This file may only be copied under the terms of the GNU General
% Public License - see the file COPYING in the Mercury distribution.
%-----------------------------------------------------------------------------%
% Main author: bromage
% Simplified by Marnix Klooster <marnix at worldonline.nl>
-% Last changed 22 October 1996
% This module provides file input. One can read a file entirely,
% select a single line from a read file, get the number of lines
@@ -60,11 +59,11 @@
file__read_file(FileName, Contents) -->
io__open_input(FileName, Res),
( { Res = ok(InputStream) },
- file__read_stream(InputStream, Contents0),
- io__close_input(InputStream),
- { Contents = ok(Contents0) }
+ file__read_stream(InputStream, Contents0),
+ io__close_input(InputStream),
+ { Contents = ok(Contents0) }
; { Res = error(Error) },
- { Contents = error(Error) }
+ { Contents = error(Error) }
).
% Get the input stream, then read from it.
@@ -74,31 +73,28 @@
% file__read_stream is the "real" file reader.
:- pred file__read_stream(io__input_stream, file, io__state, io__state).
-:- mode file__read_stream(in, out, di, uo) is det.
+:- mode file__read_stream(in, array_uo, di, uo) is det.
file__read_stream(Stream, File) -->
- file__read_stream2(Stream, 0, _, File).
+ file__read_stream2(Stream, 0, File).
% Given a Stream from which LinesIn lines have already been
% read, fill File[LinesIn] to File[LinesOut-1] with the rest
% of the lines. LinesOut is the number of lines in the file.
% (Note that line numbering starts at zero.)
-:- pred file__read_stream2(io__input_stream, int, int, file,
- io__state, io__state).
-:- mode file__read_stream2(in, in, out, out, di, uo) is det.
-file__read_stream2(Stream, LinesIn, LinesOut, File) -->
+:- pred file__read_stream2(io__input_stream, int, file, io__state, io__state).
+:- mode file__read_stream2(in, in, array_uo, di, uo) is det.
+file__read_stream2(Stream, LineNo, File) -->
io__read_line(Stream, Res),
( { Res = eof },
- { LinesOut = LinesIn },
- { LinesOut1 is LinesOut - 1 },
- { array__init(0, LinesOut1, "", File) }
+ { array__init(LineNo, "", File) }
; { Res = ok(Line) },
- { string__from_char_list(Line, Line1) },
- { LinesIn1 is LinesIn + 1 },
- file__read_stream2(Stream, LinesIn1, LinesOut, File1),
- { array__set(File1, LinesIn, Line1, File) }
+ { string__from_char_list(Line, Line1) },
+ { LineNo1 is LineNo + 1 },
+ file__read_stream2(Stream, LineNo1, File1),
+ { array__set(File1, LineNo, Line1, File) }
; { Res = error(Error) },
- { io__error_message(Error, Msg) },
- { error(Msg) }
+ { io__error_message(Error, Msg) },
+ { error(Msg) }
).
%-----------------------------------------------------------------------------%
Index: lcss.m
===================================================================
RCS file: /home/staff/zs/imp/mercury/samples/diff/lcss.m,v
retrieving revision 1.11
diff -u -r1.11 lcss.m
--- lcss.m 1997/07/27 15:09:23 1.11
+++ lcss.m 1997/07/28 05:44:43
@@ -1,12 +1,11 @@
%-----------------------------------------------------------------------------%
-% Copyright (C) 1995 The University of Melbourne.
+% Copyright (C) 1995-1997 The University of Melbourne.
% This file may only be copied under the terms of the GNU General
% Public License - see the file COPYING in the Mercury distribution.
%-----------------------------------------------------------------------------%
% Main author: bromage
% Simplified by Marnix Klooster <marnix at worldonline.nl>
-% Last changed 22 October 1996
% The only predicate exported from this module is given two lists, and
% it generates a 'longest common subsequence.' A 'common subsequence'
@@ -40,7 +39,7 @@
:- import_module lcsstype.
:- pred lcss__find_lcss(list(A) :: in, list(A) :: in, int :: in, int :: in,
- lcss :: out) is det.
+ lcss :: out) is det.
%-----------------------------------------------------------------------------%
@@ -68,9 +67,9 @@
% Find a longest common subsequence. The algorithm
% used is very similar to that in:
%
- % Hunt & Szymanski, "A fast algorithm for computing
- % longest common subsequences", CACM 20:5, pp 350--353,
- % 1977.
+ % Hunt & Szymanski, "A fast algorithm for computing
+ % longest common subsequences", CACM 20:5, pp 350--353,
+ % 1977.
% The essence of the algorithm is simple. A 'match' is pair
% I-J so that List1[I]=List2[J]. For every length (>=0) it
@@ -102,9 +101,12 @@
% The consequence is that build_thresh and build_lcss need a
% value representing 'infinity'; previously we could use the
% length of the first list for this. Now we use length of
- % the longest list.
+ % the longest list plus one. The reason for the plus one is
+ % that it should be greater than any other threshold, and
+ % on identical files, thresholds may get as large as L1=L2.
- int__max(L1, L2, Inf),
+ int__max(L1, L2, Inf0),
+ Inf is Inf0 + 1,
% The original version uses arrays of the same length as the
% longest list. But it is sufficient to use the length of the
@@ -138,11 +140,13 @@
% First, invert List2. The inverted list is a
% mapping from strings to lists of integers where
% a given string maps to the list of strings in List2
- % which match that string.
+ % which match that string, in reverse order. (The
+ % reversal is for efficiency reasons.)
lcss__build_match_map(0, List2, Map),
- % Now match each line in List1 with those in List2.
+ % Now match each line in List1 with those in List2,
+ % reversing the matches as we go.
lcss__match_map_to_matchlist(List1, Map, MatchList).
@@ -154,9 +158,9 @@
N1 is N + 1,
lcss__build_match_map(N1, Ss, MapIn),
( map__search(MapIn, S, Ns0) ->
- list__append(Ns0, [N], Ns1)
+ Ns1 = [N | Ns0]
;
- Ns1 = [ N ]
+ Ns1 = [ N ]
),
map__set(MapIn, S, Ns1, MapOut).
@@ -167,9 +171,9 @@
lcss__match_map_to_matchlist([S | Ss], Map, [M | Ms]) :-
lcss__match_map_to_matchlist(Ss, Map, Ms),
( map__search(Map, S, Ns0) ->
- M = Ns0
+ list__reverse(Ns0, M)
;
- M = []
+ M = []
).
%-----------------------------------------------------------------------------%
@@ -184,13 +188,19 @@
:- pred lcss__build_thresh(int, list(list(int)), int,
array(int), array(lcss)).
-:- mode lcss__build_thresh(in, in, in, out, out) is det.
+:- mode lcss__build_thresh(in, in, in, array_uo, array_uo) is det.
lcss__build_thresh(N, MatchList, Inf, Thresh, Link) :-
% Initialize Thresh and Link.
- array__init(0, N, Inf, Thresh0), % Thresh[0..N] := Inf
- array__set(Thresh0, 0, -1, Thresh1), % Thresh[0] := -1
- array__init(0, N, [], Link1), % Link[0..N] := []
+
+ N1 is N + 1, % Why this size? Suppose we have two identical
+ % files of length N. Then the links will be
+ % [], [0-0], [0-0,1-1], ... [0-0..N-N], which
+ % makes N+1 links in total.
+
+ array__init(N1, Inf, Thresh0), % Thresh[0..N] := Inf
+ array__set(Thresh0, 0, -1, Thresh1), % Thresh[0] := -1
+ array__init(N1, [], Link1), % Link[0..N] := []
% Process all matches in Matchlist in lexicographical order.
lcss__build_thresh2(N, 0, MatchList, Thresh1, Link1, Thresh, Link).
@@ -199,7 +209,8 @@
:- pred lcss__build_thresh2(int, int, list(list(int)),
array(int), array(lcss),
array(int), array(lcss)).
-:- mode lcss__build_thresh2(in, in, in, in, in, out, out) is det.
+:- mode lcss__build_thresh2(in, in, in,
+ array_di, array_di, array_uo, array_uo) is det.
lcss__build_thresh2(_N, _I, [], Thresh0, Link0, Thresh0, Link0).
lcss__build_thresh2(N, I, [Matches | MatchRest], Thresh0, Link0,
Thresh1, Link1) :-
@@ -212,7 +223,8 @@
:- pred lcss__build_thresh3(int, int, list(int),
array(int), array(lcss),
array(int), array(lcss)).
-:- mode lcss__build_thresh3(in, in, in, in, in, out, out) is det.
+:- mode lcss__build_thresh3(in, in, in,
+ array_di, array_di, array_uo, array_uo) is det.
lcss__build_thresh3(_, _, [], Thresh, Link, Thresh, Link).
lcss__build_thresh3(N, I, [ J | Js ], Thresh0, Link0, Thresh1, Link1) :-
@@ -228,16 +240,16 @@
array__lookup(Thresh0, K, ThreshK),
( J < ThreshK ->
+ % Yes, so make this match part of a new entry, by
+ % doing Link[K] := [I-J | Link[K-1]]
- % Yes, so make this match part of a new entry, by
- % doing Link[K] := [I-J | Link[K-1]]
- K1 is K - 1,
- array__set(Thresh0, K, J, Thresh2),
- array__lookup(Link0, K1, LinkK1),
- array__set(Link0, K, [I - J | LinkK1], Link2)
+ K1 is K - 1,
+ array__set(Thresh0, K, J, Thresh2),
+ array__lookup(Link0, K1, LinkK1),
+ array__set(Link0, K, [I - J | LinkK1], Link2)
;
- % Otherwise forget it.
- Link0 = Link2, Thresh0 = Thresh2
+ % Otherwise forget it.
+ Link0 = Link2, Thresh0 = Thresh2
),
% Process the remaining matches that have I as their first
@@ -253,18 +265,18 @@
lcss__build_thresh4(Lo, Hi, J, K, Thresh) :-
Width is Hi - Lo,
( Width < 1 ->
- error("lcss__build_thresh4")
+ error("lcss__build_thresh4")
; Width = 1 ->
- K = Hi
+ K = Hi
;
- % Use the middle element of the range.
- Mid is (Lo + Hi) // 2,
- array__lookup(Thresh, Mid, ThreshMid),
- ( ThreshMid < J ->
- lcss__build_thresh4(Mid, Hi, J, K, Thresh)
- ;
- lcss__build_thresh4(Lo, Mid, J, K, Thresh)
- )
+ % Use the middle element of the range.
+ Mid is (Lo + Hi) // 2,
+ array__lookup(Thresh, Mid, ThreshMid),
+ ( ThreshMid < J ->
+ lcss__build_thresh4(Mid, Hi, J, K, Thresh)
+ ;
+ lcss__build_thresh4(Lo, Mid, J, K, Thresh)
+ )
).
%-----------------------------------------------------------------------------%
@@ -298,10 +310,10 @@
:- mode lcss__build_lcss2(in, in, in, out) is det.
lcss__build_lcss2(N, Inf, Thresh, K) :-
( array__lookup(Thresh, N, Inf) ->
- N1 is N - 1,
- lcss__build_lcss2(N1, Inf, Thresh, K)
+ N1 is N - 1,
+ lcss__build_lcss2(N1, Inf, Thresh, K)
;
- K = N
+ K = N
).
%-----------------------------------------------------------------------------%
Index: lcsstype.m
===================================================================
RCS file: /home/staff/zs/imp/mercury/samples/diff/lcsstype.m,v
retrieving revision 1.2
diff -u -r1.2 lcsstype.m
--- lcsstype.m 1997/07/27 15:09:24 1.2
+++ lcsstype.m 1997/07/28 05:44:35
@@ -1,12 +1,11 @@
%-----------------------------------------------------------------------------%
-% Copyright (C) 1995 The University of Melbourne.
+% Copyright (C) 1995-1997 The University of Melbourne.
% This file may only be copied under the terms of the GNU General
% Public License - see the file COPYING in the Mercury distribution.
%-----------------------------------------------------------------------------%
% Main author: bromage
% Simplified by Marnix Klooster <marnix at worldonline.nl>
-% Last changed 22 October 1996
% This module contains types common to the modules lcss and diffs.
@@ -15,7 +14,7 @@
:- module lcsstype.
:- interface.
-:- import_module std_util.
+:- import_module std_util, list.
% A pos is a non-negative number representing a position in a
More information about the developers
mailing list