remove_stale.pl 9.39 KB
Newer Older
1
#!/usr/bin/perl
2 3 4 5

# This script searches through all the translation directories for HTML
# files not having a matching WML file, and removes those HTML files from
# both the local directory and the install directory. This is needed so that
6 7
# a removing a WML file from the repository causes the corresponding HTML file
# to go away.
8

Peter Karlsson's avatar
Peter Karlsson committed
9
# Originally written 2001-03-22 by Peter Krefting <peterk@debian.org>
10
# Revised in 2010 by Bas Zoetekouw <bas@debian.org>
11 12 13
# Updated in 2018 by Steve McIntyre <93sam@debian.org> to use the
# generic VCS infrastructure as part of the git migration.
# © Copyright 2001-2018 Software in the public interest, Inc.
14 15
# This program is released under the GNU General Public License, v2.

16
## $Id$
17 18

use strict;
19 20
use warnings;

21
use Getopt::Std;
22 23 24
use Data::Dumper;
use File::Spec::Functions;
use File::Find;
25

26 27 28
use FindBin;
FindBin::again();
use lib "$FindBin::Bin/Perl";
29

30
use Webwml::Langs;
31
use Local::VCS;
32

33

34 35
# directory where "make install" installs the website
use constant INSTALLDIR => '../www';
36

37 38
my $VCS = Local::VCS->new();

39 40
my $verbose = 0;
our $opts_v;
41

42 43
###############################################################
# "main"
44 45
{

46
	my %opts;
47
	show_help("Unknown option\n")     if not getopts('dhv:',\%opts);
48 49 50 51
	show_help()                       if exists $opts{'h'};
	show_help("Not in webwml root\n") if not -d 'english';

	my $reallyremove =  exists( $opts{'d'} );
52 53 54
	if (exists ($opts{'v'})) {
		$verbose = $opts{'v'};
	}
55

56 57
	# Read the list of languages
	my @languages = sort Webwml::Langs->new()->names();
58

59 60 61 62
	# Cache the repo history for performance
	print "Initialising VCS cache\n";
	$VCS->cache_repo();

63 64 65
	# check all subdirs to find stale html files
	my @files;
	foreach my $language (@languages)
66
	{
67
		push @files, find_stale_files($language);
68
	}
69

70 71 72

	# remove or report the files
	foreach my $file (@files)
73
	{
74
		if ( $reallyremove )
75
		{
76
			remove_file( $file, \@languages );
77
		}
78
		else
79
		{
80
			report_file( $file, \@languages );
81 82 83
		}
	}

84 85 86 87 88 89 90 91 92 93
	my $numfiles = @files;
	print "\n$numfiles stale translations found.\n";
	print "Use -d option to remove files.\n"
		if @files and not $reallyremove;

	# Done.
	exit;
}


94 95 96 97 98 99 100 101 102 103 104 105 106 107
# log very verbose messages
sub vvlog {
    if ($verbose >= 2) {
	print STDOUT $_[0] . "\n";
    }
}

# log verbose messages
sub vlog {
    if ($verbose >= 1) {
	print STDOUT $_[0] . "\n";
    }
}

108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
#############################################################
# show help text
sub show_help
{
	my $help = shift;

	print $help if $help;

	open(my $fd, '<:utf8', $0) or return;
	while (my $line = <$fd>)
	{
		next if $line =~ m/^#!/;
		next if $line =~ m/^##/;
		last if $line =~ /^[^#\s]/;

		chomp $line;
		$line =~ s/^#\s+//;

		print $line, "\n";
	}
	close($fd);

	print "Run this script from the webwml directory to remove stale HTML files.\n\n";
	print "Usage: $0 [-d]\n\n";
	print "  -d  Remove files, just not report.\n";

	exit defined($help) ? 1 : 0;
}

#############################################################
# Find the stale html files in the specified directory
sub find_stale_files
{
	# Get parameter.
	my $dir       = shift or die('No directory specified');
143 144
	my $scanned_count = 0;
	my $remove_count = 0;
145 146 147

	print "Recursing into `$dir'\n";

148
	# the language subdir possibly doesn't exist yet for newly 
149 150 151 152 153 154 155 156 157 158 159
	# started translations
	return 0  unless  -d $dir;

	# create a list of *.html files and a hash of *.wml files in this translation
	#my (%wmlfiles,@htmlfiles);
	#find( sub { $wmlfiles{$File::Find::name}++     if -f and /\.wml$/  }, $dir );
	#find( sub { push @htmlfiles, $File::Find::name if -f and /\.html$/ }, $dir );

	my @htmlfiles =                 find_files_ext( $dir, 'html' );
	my %wmlfiles  = map { $_ => 1 } find_files_ext( $dir, 'wml' );

160
	# Locate all HTML files, and find out which ones do not correspond
161
	# to a WML file, and does not live in the VCS by itself.
162 163
	my @toremove;
	foreach my $htmlfile (sort @htmlfiles)
164
	{
165 166 167 168 169
		$scanned_count++;
		if (0 == ($scanned_count % 500)) {
			vlog("  scanned $scanned_count files, found $remove_count to remove");
		}
		vvlog("  Looking at $htmlfile");
170 171 172 173 174 175
		# the name of the wml file that this html file is potentially
		# generated from
		my $source = $htmlfile;
		$source =~ s/(?:\.[-\w]+)?\.html$/.wml/  
			or die("Can't determine WML source file for `$htmlfile'");

176 177 178 179 180 181 182 183 184
		# Don't try to do anything in subdirectories of l10n.
		next  if  $htmlfile =~ m{/international/l10n/po[-\w]*/[\w_\@]+\.[-\w]+\.html$};
		
		# Don't try to do anything in stats either.
		next  if  $htmlfile =~ m{/devel/website/stats/[-\w]+\.[-\w]+\.html$};

		# Don't try to remove yaboot-howto.
		next  if  $htmlfile =~ m{/ports/powerpc/inst/yaboot-howto.html};

185 186 187 188
		# as a special exception, sitemaps don't have a wml source in the
		# translation tree (they are generated from english/)
		next  if  $htmlfile =~ m{/sitemap\.[-\w]+\.html$};

189 190
		# does the wml source file exist?
		my $haswml = exists( $wmlfiles{$source} ) || -f $source || 0;
191
		next  if  $haswml;
192 193

		# is the html file checked in the VCS?
194
		my $checkedin = $VCS->file_info($htmlfile , quiet => 1 ) ? 1 : 0;
195
		next  if  $checkedin;
196 197 198 199 200 201

		#if ($checkedin) 
		#{ print "==> `$htmlfile' : `$source' : $haswml : $checkedin\n"; }

		# File has no reason for being here.
		push @toremove, $htmlfile;
202 203
		$remove_count++;
		vlog("$htmlfile needs to be removed");
204 205
	}

206
	vlog("  scanned $scanned_count files, found $remove_count to remove");
207 208 209 210 211 212 213 214 215 216 217
	return @toremove;
}

#############################################################
# returns a list of filenames with the given extension
sub find_files_ext
{
	my $dir = shift or die('Internal error: No dir specified');
	my $ext = shift or die('Internal error: No ext specified');

	my @files;
218
	find( sub { push @files, $File::Find::name if -f and m/\.$ext$/ }, $dir );
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
	return @files;
}

#############################################################
# get the filenames of files related to the given htmlfile
sub gather_file_info 
{
	my $htmlfile  = shift or die('Internal error: No htmlfile specified');
	my $languages = shift or die('Internal error: No languages specified');

	die("Not an html file: `$htmlfile'\n") unless $htmlfile =~ m/\.html$/;

	my $wmlsrc = $htmlfile;
	$wmlsrc =~ s{\.[-\w]+\.html$}{.wml};
	die("No valid wml source for `$htmlfile' could be constructed\n")
		if $wmlsrc eq $htmlfile;

	# Name of file installed by make install.
	my $installed = $htmlfile;
	$installed =~ s{^[^/]+/}{}; # remove "dutch/" at the beginning
	$installed = catfile(INSTALLDIR,$installed);

	# Name of corresponding ICS file for events.
	my $icslocal = $htmlfile;
	$icslocal =~ s/html$/ics/;
	my $icsinstalled = $installed;
	$icsinstalled =~ s/html$/ics/;

	# Extra symlinks for languages
	my $extra = $installed;
	$extra =~ s/\.no\.html$/.nb.html/  or  $extra = '';
	#what about en-us and en-gb ?

	# Check for translations to other languages, they
	# need to have their .wml file touched
	my @translations = findtranslations($wmlsrc,$languages);

	return ($wmlsrc,$installed,$icslocal,$icsinstalled,$extra,@translations);
}

#############################################################
# remove the given html file and all files related to it
sub remove_file
{
	my $htmlfile  = shift or die('Internal error: No htmlfile specified');
	my $languages = shift or die('Internal error: No languages specified');

	my ($wmlsrc,$installed,$icslocal,$icsinstalled,$extra,@translations) 
		= gather_file_info( $htmlfile, $languages );

	if (-f $extra or -l $extra)
	{
		print "Removing $extra\n";
		unlink $extra
			or die "Unable to remove $extra: $!\n";
	}
	if (-f $installed)
	{
		print "Removing $installed\n";
		unlink $installed
			or die "Unable to remove $installed: $!\n";
	}
	if (-f $icsinstalled)
	{
		print "Removing $icsinstalled\n";
		unlink $icsinstalled
			or die "Unable to remove $icsinstalled: $!\n";
	}
	if (-f $icslocal)
	{
		print "Removing $icslocal\n";
		unlink $icslocal
			or die "Unable to remove $icslocal: $!\n";
292 293
	}

294 295 296
	# Touch translation sources to update the list of
	# translations on them
	if (@translations)
297
	{
298 299
		utime(undef,undef,@translations)
			or warn "touch: error code $?";
300
	}
301

302 303 304 305
	print "Removing $htmlfile\n";
	unlink $htmlfile
		or die "Unable to remove $htmlfile: $!\n";

306
}
307

308 309 310
#############################################################
# report files that would be removed
sub report_file 
311
{
312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
	my $htmlfile  = shift or die('Internal error: No htmlfile specified');
	my $languages = shift or die('Internal error: No languages specified');

	my ($wmlsrc,$installed,$icslocal,$icsinstalled,$extra,@translations) 
		= gather_file_info( $htmlfile, $languages );

	print "$htmlfile is stale\n";
	print "  installed file is $installed";
	print "  (does not exist)"  unless  -f $installed;
	print "\n";
	print "  and $extra\n"
		if (-f $extra || -l $extra) and $extra ne $installed;
	print "  installed ICS file: $icsinstalled\n"
		if -f $icsinstalled;
	print "  local ICS file: $icslocal\n"
		if -f $icslocal;

	foreach my $translation (@translations)
330
	{
331
		print "  translation in $translation\n";
332 333 334
	}
}

335
#############################################################
336 337 338
# Locate all translated copies of this wml file
sub findtranslations
{
339 340 341
	my $wml       = shift or die('Internal error: no wml file specified');
	my $languages = shift or die('Internal error: no languages specified');

342 343 344 345 346
	my @files;

	# Remove the first component of the path (which contains the
	# current language)
	my $tail = $wml;
347
	$tail =~ s{^[^/]+/}{};
348

349 350
	# Locate all translated copies
	foreach my $language (@$languages)
351
	{
352 353
		my $translated = "$language/$tail";
		push @files, $translated   if  -f $translated;
354 355 356 357
	}

	return @files;
}
358 359

__END__