|
1 |
| -#!/usr/local/bin/perl |
| 1 | +#!/usr/bin/perl |
2 | 2 |
|
| 3 | +# diff_hypermail_archives |
3 | 4 | #
|
4 |
| -# Compare 2 directories with archives produced by Hypermail. |
| 5 | +# Compare two directories with archives produced by Hypermail. |
5 | 6 | # Intended primarily to test changes to Hypermail.
|
6 |
| -# Written by Peter McCluskey (pcm@rahul.net). |
7 | 7 | #
|
| 8 | +# Originally written by Peter McCluskey (pcm@rahul.net). |
| 9 | +# Rewritten and extended by Jose Kahan (jose.kahan@w3.org). |
| 10 | +# |
| 11 | +# TODO: If more portability is needed, try switch the sytem |
| 12 | +# diff and grep to File::Compare |
| 13 | +# https://perldoc.perl.org/File/Compare.html |
8 | 14 |
|
9 |
| -$dir1 = $ARGV[0]; |
10 |
| -$dir2 = $ARGV[1]; |
| 15 | +use strict; |
| 16 | +use warnings; |
11 | 17 |
|
12 |
| -opendir(FDDIR,$dir1) || die("cant open $dir1"); |
13 |
| -@files1 = sort readdir(FDDIR); |
| 18 | +use File::Find; |
| 19 | +use FindBin '$Script'; |
| 20 | +use Cwd 'abs_path'; |
14 | 21 |
|
15 |
| -foreach $file (@files1) |
16 |
| -{ |
17 |
| - my $file1name = "$dir1/$file"; |
18 |
| - my $file2name = "$dir2/$file"; |
19 |
| - if(!(-e $file2name)) |
20 |
| - { |
21 |
| - print "$file2name is missing\n"; |
22 |
| - next; |
23 |
| - } |
24 |
| - if(-d $file1name) |
25 |
| - { |
26 |
| - print "$file2name is not a directory\n" if(!(-d $file2name)); |
27 |
| - next; |
28 |
| - } |
29 |
| - open(FD, "diff $file1name $file2name |") || die("cannot diff $file1name $file2name"); |
30 |
| - while($line = <FD>) |
31 |
| - { |
32 |
| - if($line =~ /\d+c\d+/) |
33 |
| - { |
34 |
| - if(&non_date_diffs($diff1, $diff2)) |
35 |
| - { |
36 |
| - print "$file: $diffs"; |
| 22 | +use Getopt::Std; |
| 23 | + |
| 24 | +## |
| 25 | +## configurable options |
| 26 | +## |
| 27 | + |
| 28 | +# path to diff command |
| 29 | +our $diff_cmd = "/usr/bin/diff"; |
| 30 | +# path to grep command |
| 31 | +our $grep_cmd= "/bin/grep"; |
| 32 | + |
| 33 | +## |
| 34 | +## End of configurable options |
| 35 | +## |
| 36 | + |
| 37 | +## |
| 38 | +## Global variables |
| 39 | +## |
| 40 | + |
| 41 | +# don't print progress messages |
| 42 | +our $quiet; |
| 43 | +# print a "." for each processed file |
| 44 | +our $show_progress; |
| 45 | +# enable to print extra messages |
| 46 | +our $debug; |
| 47 | +# ignore all content below the footer trailer |
| 48 | +our $ignore_footer; |
| 49 | +# hash with filenames, dirnames that must be ignored |
| 50 | +our @ignore_regex; |
| 51 | +# thw two dirs that need to be compared |
| 52 | +our $dir1; |
| 53 | +our $dir2; |
| 54 | +# global difference counter (we consider them errors) |
| 55 | +our $errors = 0; |
| 56 | +# attachmed dir prefix, hard-coded into hypermail |
| 57 | +our $attachment_dir_prefix = "att-"; |
| 58 | +# archive generated by hypermail generated text blurb |
| 59 | +our $hmail_generated_by_text = q(This archive was generated by <a.*hypermail-project.org/"); |
| 60 | +# footer trailer generated by hypermail |
| 61 | +our $footer_trailer = q(<!-- trailer="footer" -->); |
| 62 | + |
| 63 | +# returns the line number of the footer or of the generated_by hypermail |
| 64 | +# blurb if found |
| 65 | +sub get_hypermail_generated_by_lines { |
| 66 | + my $filename = shift; |
| 67 | + my $counter = 0; |
| 68 | + my $expected_counter = ($ignore_footer) ? 1 : 2; |
| 69 | + my %generated_by_lines; |
| 70 | + |
| 71 | + my $needle = $ignore_footer ? $footer_trailer : $hmail_generated_by_text; |
| 72 | + my @grep_args = ($grep_cmd, $ignore_footer ? "-A0" : "-A1", "-n", $needle, $filename); |
| 73 | + |
| 74 | + open (my $fh, "-|", @grep_args) || die("cannot @grep_args\n"); |
| 75 | + |
| 76 | + while (my $line = <$fh>) { |
| 77 | + if ($line =~ m/^\d+-?:\ ?/) { |
| 78 | + my $line_nb = (split /:/, $line)[0]; |
| 79 | + $line_nb =~ s/-$//; |
| 80 | + $generated_by_lines{$line_nb} = 1; |
| 81 | + $counter++; |
| 82 | + } |
| 83 | + } |
| 84 | + close ($fh); |
| 85 | + |
| 86 | + return ($counter == $expected_counter) ? \%generated_by_lines : {}; |
| 87 | + |
| 88 | +} # get_hypermail_generated_by_lines |
| 89 | + |
| 90 | +# checks if the filenames exist in both directories and the type |
| 91 | +sub compare_filenames { |
| 92 | + my ($filename1, $filename2) = @_; |
| 93 | + my $res = 0; |
| 94 | + |
| 95 | + if (!-e $filename2) { |
| 96 | + $errors++; |
| 97 | + $res = -1; |
| 98 | + print "\n" if $show_progress; |
| 99 | + print "[$errors] $filename2 does not exist\n" unless $quiet; |
| 100 | + |
| 101 | + } elsif (-d $filename1) { |
| 102 | + if (!-d $filename2) { |
| 103 | + print "\n" if $show_progress; |
| 104 | + print "[$errors] $filename2 is not a directory\n" unless $quiet; |
| 105 | + $errors++; |
| 106 | + } |
| 107 | + $res = -1; |
| 108 | + } |
| 109 | + |
| 110 | + return $res; |
| 111 | + |
| 112 | +} # compare_filenames |
| 113 | + |
| 114 | +# filter out files we're not interested in |
| 115 | +sub filter_filenames { |
| 116 | + my $filename = shift; |
| 117 | + my $res = 0; |
| 118 | + |
| 119 | + foreach my $regex (@ignore_regex) { |
| 120 | + if ($filename =~ m/$regex/) { |
| 121 | + $res = -1; |
| 122 | + print "$filename is ignored per regex: " . $regex . "\n" unless $quiet; |
| 123 | + } |
| 124 | + } |
| 125 | + |
| 126 | + return $res; |
| 127 | + |
| 128 | +} # filter_filenames |
| 129 | + |
| 130 | +# does a diff on existing directories, files, and file content |
| 131 | +sub diff_files_complete { |
| 132 | + my $file = $_; |
| 133 | + my $filename1 = $File::Find::name; |
| 134 | + my $filename2 = $filename1; |
| 135 | + $filename2 =~ s/$dir1/$dir2/; |
| 136 | + my $generated_by_lines; |
| 137 | + my $footer_line; |
| 138 | + my $diffs = ""; |
| 139 | + my $local_errors = 0; |
| 140 | + |
| 141 | + print "." if $show_progress; |
| 142 | + |
| 143 | + if (compare_filenames ($filename1, $filename2) |
| 144 | + || filter_filenames ($filename1)) { |
| 145 | + return; |
| 146 | + } |
| 147 | + |
| 148 | + my $is_attachment_dir = $filename1 =~ m/\/$attachment_dir_prefix/; |
| 149 | + |
| 150 | + if (!$is_attachment_dir) { |
| 151 | + if ($filename1 =~ m/\.html$/) { |
| 152 | + $generated_by_lines = get_hypermail_generated_by_lines ($filename1); |
| 153 | + if ($ignore_footer) { |
| 154 | + $footer_line = (keys %{ $generated_by_lines } )[0]; |
37 | 155 | }
|
38 |
| - $diffs = ''; |
39 |
| - $diff1 = ''; |
40 |
| - $diff2 = ''; |
| 156 | + } |
| 157 | + } |
| 158 | + |
| 159 | + print "comparing $filename1\n" if $debug; |
| 160 | + |
| 161 | + my @diff_args = ($diff_cmd, $filename1, $filename2); |
| 162 | + open (my $fh, "-|", @diff_args) || die("cannot diff $filename1 $filename2\n"); |
| 163 | + |
| 164 | + while (my $line = <$fh>) { |
| 165 | + |
| 166 | + if ($line eq "") { |
| 167 | + next; |
| 168 | + } |
| 169 | + |
| 170 | + # for hypermail generated messages and indexes, if the diff |
| 171 | + # finds the the generated_by blurb, we assume that the only |
| 172 | + # things that changed are the version number and/or the |
| 173 | + # generation date. We ignore the rest of the diff output at |
| 174 | + # this point. |
| 175 | + if ($line =~ /\d+c\d+/) { |
| 176 | + if (!$is_attachment_dir) { |
| 177 | + my ($ln_1, $ln_2) = split /c/, $line, 2; |
| 178 | + |
| 179 | + if ($ln_1 eq $ln_2 && %{ $generated_by_lines }) { |
| 180 | + if ($ignore_footer) { |
| 181 | + if ($ln_1 >= $footer_line) { |
| 182 | + last; |
| 183 | + } |
| 184 | + } elsif ($$generated_by_lines{$ln_1}) { |
| 185 | + last; |
| 186 | + } |
| 187 | + } |
| 188 | + } |
| 189 | + $local_errors++; |
41 | 190 | }
|
42 | 191 | $diffs .= $line;
|
43 |
| - $diff1 .= $line if(substr($line,0,1) eq '<'); |
44 |
| - $diff2 .= $line if(substr($line,0,1) eq '>'); |
45 | 192 | }
|
46 |
| - if(&non_date_diffs($diff1, $diff2)) |
47 |
| - { |
48 |
| - print "$file: $diffs"; |
| 193 | + close ($fh); |
| 194 | + |
| 195 | + if ($diffs ne "" && !$quiet) { |
| 196 | + $errors++; |
| 197 | + print "\n" if $show_progress; |
| 198 | + print "[$errors] $filename1\n[$errors] $filename2: found $local_errors difference" . ($errors == 1 ? "" : "s") . "\n"; |
| 199 | + print "$diffs\n"; |
49 | 200 | }
|
50 |
| - $diffs = ''; |
51 |
| - $diff1 = ''; |
52 |
| - $diff2 = ''; |
53 |
| -} |
| 201 | + |
| 202 | +} # diff_files_complete |
54 | 203 |
|
55 |
| -sub non_date_diffs |
| 204 | +# only does a diff to see if the same directories and files exist. |
| 205 | +# Ignores content differences. |
| 206 | +sub diff_files_dir { |
| 207 | + my $filename1 = $File::Find::name; |
| 208 | + my $filename2 = $filename1; |
| 209 | + $filename2 =~ s/$dir1/$dir2/; |
| 210 | + |
| 211 | + print "." if $show_progress; |
| 212 | + |
| 213 | + compare_filenames ($filename1, $filename2); |
| 214 | + |
| 215 | + return; |
| 216 | + |
| 217 | +} # diff_files |
| 218 | + |
| 219 | +sub process_options { |
| 220 | + my %options=(); |
| 221 | + |
| 222 | + getopts("qhfpdi:", \%options); |
| 223 | + |
| 224 | + $dir1 = $ARGV[0]; |
| 225 | + $dir2 = $ARGV[1]; |
| 226 | + |
| 227 | + if (defined $options{d}) { |
| 228 | + $debug = 1; |
| 229 | + } |
| 230 | + if (defined $options{q}) { |
| 231 | + $quiet = 1; |
| 232 | + } |
| 233 | + |
| 234 | + if (defined $options{p} && !$quiet && !$debug) { |
| 235 | + $show_progress = 1; |
| 236 | + } |
| 237 | + |
| 238 | + if (defined $options{f}) { |
| 239 | + $ignore_footer = 1; |
| 240 | + } |
| 241 | + |
| 242 | + if (defined $options{h} || !defined $dir1 || !defined $dir2) { |
| 243 | + die ("\nUsage: $Script [-q -h -i foo:bar] dir1 dir2\n" |
| 244 | + . "\t-q quiet mode\n" |
| 245 | + . "\t-p show processing progress\n" |
| 246 | + . "\t-h help prints this message\n" |
| 247 | + . "\t-f ignore all content below the footer trailer comment\n" |
| 248 | + . "\t-i list of colon separated regex corresponding to directories/filenames to ignore\n" |
| 249 | + . "\tdir1, dir2 paths to the two directories to compare\n\n"); |
| 250 | + } |
| 251 | + |
| 252 | + # remove trailing / if given |
| 253 | + $dir1 =~ s/\/+$//; |
| 254 | + $dir2 =~ s/\/+$//; |
| 255 | + |
| 256 | + if (-l $dir1) { |
| 257 | + $dir1 = abs_path ($dir1); |
| 258 | + } |
| 259 | + |
| 260 | + if (-l $dir2) { |
| 261 | + $dir2 = abs_path ($dir2); |
| 262 | + } |
| 263 | + |
| 264 | + if (!-d $dir1) { |
| 265 | + die ("directory $dir1 doesn't exist\n"); |
| 266 | + } |
| 267 | + |
| 268 | + if (!-d $dir2) { |
| 269 | + die ("directory $dir2 doesn't exist\n"); |
| 270 | + } |
| 271 | + |
| 272 | + if (defined $options{i}) { |
| 273 | + @ignore_regex = split (/:/, $options{i}); |
| 274 | + } |
| 275 | + |
| 276 | +} # process_options |
| 277 | + |
| 278 | +# main |
56 | 279 | {
|
57 |
| - my ($diff1, $diff2) = @_; |
58 |
| - my $i; |
59 |
| - return 1 if(length($diff1) != length($diff2)); |
60 |
| - for($i = 0; $i < length($diff2); ++$i) |
61 |
| - { |
62 |
| - my $c1 = substr($diff1, $i, 1); |
63 |
| - my $c2 = substr($diff2, $i, 1); |
64 |
| - next if($c1 eq $c2); |
65 |
| - next if(($c1 eq "<") && ($c2 eq ">")); |
66 |
| - next if(($c1 =~ /\d/) && ($c2 =~ /\d/)); |
67 |
| - return 1; |
68 |
| - } |
69 |
| - return 0; |
70 |
| -} |
| 280 | + # read command-line options |
| 281 | + process_options(); |
| 282 | + |
| 283 | + my %find_options = ('follow' => 1, |
| 284 | + 'wanted' => \&diff_files_complete, |
| 285 | + ); |
| 286 | + |
| 287 | + print "\n" unless $quiet; |
| 288 | + print "comparing $dir1 against $dir2\n" unless $quiet; |
| 289 | + print "\n" if $debug && !$quiet; |
| 290 | + |
| 291 | + find(\%find_options, $dir1); |
| 292 | + |
| 293 | + # do the opposite diff too, to make sure we are not generating new files |
| 294 | + print "\n\n" if $show_progress || $debug; |
| 295 | + |
| 296 | + print "comparing $dir2 filenames against $dir1\n" unless $quiet; |
| 297 | + |
| 298 | + ($dir1, $dir2) = ($dir2, $dir1); |
| 299 | + |
| 300 | + $find_options{wanted} = \&diff_files_dir; |
| 301 | + find(\%find_options, $dir1); |
| 302 | + |
| 303 | + print "\n" if $show_progress; |
| 304 | + print "\n" unless $quiet; |
| 305 | + |
| 306 | + if ($errors) { |
| 307 | + print "=> $dir1 and $dir2 differ ($errors difference", $errors > 1 ? "s" : "", ")\n\n" unless $quiet; |
| 308 | + } else { |
| 309 | + print "=> Archives are identical\n\n" unless $quiet; |
| 310 | + } |
| 311 | + |
| 312 | + exit (($errors == 0) ? 0 : -1); |
| 313 | + |
| 314 | +} # main |
0 commit comments