copypage.pl 11 KB
Newer Older
1 2 3
#!/usr/bin/perl -w

# This script copies the file named on the command line to the translation
4
# named in language.conf, and adds the translation-check header to it.
5 6 7
# It also will create the destination directory if necessary, and create the
# Makefile.  It will do this by simply including the English version -- copied
# Makefiles are not supported anymore for they bear too much space for errors.
8

Peter Karlsson's avatar
Peter Karlsson committed
9
# Originally written 2000-02-26 by Peter Krefting <peterk@debian.org>
10
# © Copyright 2000-2008 Software in the public interest, Inc.
11 12 13 14
# This program is released under the GNU General Public License, v2.

# $Id$

15 16 17 18
use FindBin;
FindBin::again();
use lib "$FindBin::Bin/Perl";

19
use File::Path;
20
use Local::VCS qw(vcs_file_info);
21

22 23 24 25 26 27 28
# Declare variables only used in references to avoid warnings
use vars qw(@iso_8859_2_compat  @iso_8859_3_compat  @iso_8859_4_compat
            @iso_8859_5_compat  @iso_8859_6_compat  @iso_8859_7_compat
            @iso_8859_8_compat  @iso_8859_9_compat  @iso_8859_10_compat
            @iso_8859_13_compat @iso_8859_14_compat @iso_8859_15_compat
            @iso_8859_16_compat);

29
# Get configuration
30 31
# Read first two valid lines from language.conf
if (open CONF, "<language.conf")
32
{
33 34 35 36 37
	while (<CONF>)
	{
		next if /^#/;
		chomp;
		$language = $_, next unless defined $language;
38
		$maintainer = $_, next unless defined $maintainer;
39
	}
40
	close CONF;
41 42 43
}
else
{
44
	warn "Unable to open language.conf. Using environment variables...\n";
45 46
}

47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
# Values are overwritten by environment variables
if (exists $ENV{DWWW_LANG})
{
	$language = $ENV{DWWW_LANG};
}
if (exists $ENV{DWWW_MAINT})
{
	$maintainer = $ENV{DWWW_MAINT};
}

die "Language not defined in DWWW_LANG or language.conf\n"
	if not defined $language;

#warn "Maintainer name not defined in DWWW_MAINT or language.conf\n"
#	if not defined $maintainer;


64 65 66 67 68
# Check usage.
if ($#ARGV == -1)
{
	print "Usage: $0 page ...\n\n";
	print "Copies the page from the english/ directory to the $language/ directory\n";
69 70
	print "and adds the translation-check header with the current revision,\n";
	print "optionally adds also the maintainer name.\n";
71
	print "If the directory does not exist, it will be created, and the Makefile\n";
72
	print "copied or created, depending on your language.conf setting.\n\n";
73
	print "The 'english/' part of the input path is optional.\n";
74 75 76
	exit;
}

77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
# Table of entities used when copying to non-latin1 encodings
@entities = (
	'&nbsp;', '&iexcl;', '&cent;', '&pound;', '&curren;', '&yen;',
	'&brvbar;', '&sect;', '&uml;', '&copy;', '&ordf;', '&laquo;', '&not;',
	'&shy;', '&reg;', '&macr;', '&deg;', '&plusmn;', '&sup2;', '&sup3;',
	'&acute;', '&micro;', '&para;', '&middot;', '&cedil;', '&sup1;',
	'&ordm;', '&raquo;', '&frac14;', '&frac12;', '&frac34;', '&iquest;',
	'&Agrave;', '&Aacute;', '&Acirc;', '&Atilde;', '&Auml;', '&Aring;',
	'&AElig;', '&Ccedil;', '&Egrave;', '&Eacute;', '&Ecirc;', '&Euml;',
	'&Igrave;', '&Iacute;', '&Icirc;', '&Iuml;', '&ETH;', '&Ntilde;',
	'&Ograve;', '&Oacute;', '&Ocirc;', '&Otilde;', '&Ouml;', '&times;',
	'&Oslash;', '&Ugrave;', '&Uacute;', '&Ucirc;', '&Uuml;', '&Yacute;',
	'&THORN;', '&szlig;', '&agrave;', '&aacute;', '&acirc;', '&atilde;',
	'&auml;', '&aring;', '&aelig;', '&ccedil;', '&egrave;', '&eacute;',
	'&ecirc;', '&euml;', '&igrave;', '&iacute;', '&icirc;', '&iuml;',
	'&eth;', '&ntilde;', '&ograve;', '&oacute;', '&ocirc;', '&otilde;',
	'&ouml;', '&divide;', '&oslash;', '&ugrave;', '&uacute;', '&ucirc;',
	'&uuml;', '&yacute;', '&thorn;', '&yuml;'
);

97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
# Compatibility tables for the iso-8859 series; 1 indicates that the
# codepoint is the same as in iso-8859-1. Used to perform partial remaps
# for these.
@iso_8859_2_compat = (1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0);
@iso_8859_3_compat = (1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0);
@iso_8859_4_compat = (1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0);
@iso_8859_5_compat = (1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
@iso_8859_6_compat = (1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
@iso_8859_7_compat = (1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
@iso_8859_8_compat = (1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
@iso_8859_9_compat = (1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1);
@iso_8859_10_compat =(1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0);
@iso_8859_13_compat =(1,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,0,0);
@iso_8859_14_compat =(1,0,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1);
@iso_8859_15_compat =(1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1);
@iso_8859_16_compat =(1,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,1);

# Check destination character encoding
my $recode = 0;
if (open WMLRC, "$language/.wmlrc")
{
	while (<WMLRC>)
	{
		if (s/^-D CHARSET=//)
		{
			$recode = 1 unless /^iso-8859-1$/i;
			if ($recode && /^iso-8859-([0-9]+)$/)
			{
				my $compattablename = 'iso_8859_' . $1 . '_compat';
				$compat = \@{$compattablename} if defined @{$compattablename};
			}
			last;
		}
	}
}

133
# Loop over command line
134
foreach $page (@ARGV)
135
{
136
	# Check if valid source
137
	if ($page =~ /wml$/ || $page =~ /src$/)
138
	{
139
		&copy($page, $recode, $compat);
140 141 142 143 144
	}
	else
	{
		print "$page does not seem to be a valid page.\n";
	}
145 146
}

147 148
# Subroutine to copy a page
sub copy
149
{
150
	my $page = shift;
151
	my $recodelatin1 = shift;
152
	my $compattable = shift;
153
	print "Processing $page...\n";
154

155 156 157 158 159
	# Remove english/ from path
	if ($page =~ m[^english/])
	{
		$page =~ s[^english/][];
	}
160

161 162 163
	# Create needed file and directory names
	my $srcfile = "english/$page";		# Source file path
	$dstfile = "$language/$page";		# Destination file path
164

165 166 167 168
	my $srcdir =  $srcfile;
	$srcdir =~ s[(.*/).*][$1];			# Source directory (trailing /)
	my $dstdir =  $dstfile;
	$dstdir =~ s[(.*/).*][$1];			# Desination directory (trailing /)
169

170 171
	my $filename = $srcfile;
	$filename =~ s[$srcdir][];			# Pathless filename
172

173 174
	my $srcmake = $srcdir . "Makefile";	# Name of source Makefile
	my $dstmake = $dstdir . "Makefile";	# Name of destination Makefile
175

176 177 178
	my $dsttitle = $dstfile;
	$dsttitle =~ s/\.wml$/.title/;		# Name of possible title translation

179 180 181
	# Sanity checks
	die "Directory $srcdir does not exist\n" unless -d $srcdir;
	die "File $srcfile does not exist\n"     unless -e $srcfile;
182 183 184 185 186
	if (-e $dstfile)
	{
		warn "File $dstfile already exists\n";
		return;
	}
187

188 189 190 191
	# Check if destination exists, if not - create it
	unless (-d $dstdir)
	{
		print "Destination directory $dstdir does not exist,\n";
192

193
		mkpath([$dstdir],0,0755)
194
			or die "Could not create $dstdir: $!\n";
195 196
		if ( -e $srcmake )
		{
197 198 199 200 201
			print "creating it and making a $dstmake\n";
			open MK, "> $dstmake"
				or die "Could not create $dstmake: $!\n";
			print MK "include \$(subst webwml/$language,webwml/english,\$(CURDIR))/Makefile\n";
			close MK;
202
		}
203
	}
204

205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
	# Check if title translation exists, if so - load it
	my $pagetitle;
	if (-e $dsttitle)
	{
		open TTL, $dsttitle
			or die "Could not read $dsttitle ($!)\n";

		# Scan for title;
		while (<TTL>)
		{
			$pagetitle = $_, last
				if /^<define-tag pagetitle>/;
		}

		close TTL;
	}
	else
	{
		undef $dsttitle;
	}

226
	# Retrieve VCS revision number
227
	my %vcsinfo = vcs_file_info( $srcfile );
228

229
	if ( not %vcsinfo  or  not exists $vcsinfo{'cmt_rev'}  )
230
	{
231
		die "Could not get revision number for `$srcfile' - bug in script?\n";
232
	}
233

234 235 236 237 238 239 240
	# Open the files
	open SRC, $srcfile
		or die "Could not read $srcfile ($!)\n";

	open DST, ">$dstfile"
		or die "Could not create $dstfile ($!)\n";

241 242
	# Copy the file and insert the revision number
	my $insertedrevision = 0;
243

244
	while (<SRC>)
245
	{
Peter Karlsson's avatar
Peter Karlsson committed
246 247
		next if /\$Id/;

248
		unless ($insertedrevision || /^#/)
249
		{
250
			printf DST qq'#use wml::debian::translation-check translation="%s"', $vcsinfo{'cmt_rev'};
251 252 253
			print DST qq' maintainer="$maintainer"'
				if defined $maintainer;
			print DST qq'\n';
254 255
			$insertedrevision = 1;
		}
256 257 258 259 260 261
		if (defined $pagetitle && /^<define-tag pagetitle>/)
		{
			print DST $pagetitle;
		}
		else
		{
262 263 264 265 266 267 268 269 270 271 272
			# Transform the string into a string that is fit for the encoding
			# of the output language. We do that by first converting any
			# SGML entities in the input stream into 8-bit ISO 8859-1
			# encoding, and then convert extended characters (back) into
			# entities if necessary for the target encoding.

			# Decode
			s/(&[^#;]+;)/&decodeentity($1)/ge;
			s/&#(1[6-9][0-9]|2[0-4][0-9]|25[0-5]);/chr($1)/ge;

			# Encode
273 274
			if (defined $compattable)
			{
275 276
				# Output encoding is in part compatible with ISO 8859-1, only
				# convert incompatible characters into entities.
277 278 279
				s/([\xA0-\xFF])/$$compattable[ord($1)-160]?$1:$entities[ord($1)-160]/ge;
			}
			elsif ($recodelatin1)
280
			{
281 282
				# Output encoding is incompatible with ISO 8859-1, convert all
				# 8-bit characters into entities.
283
				s/([\xA0-\xFF])/$entities[ord($1)-160]/ge;
284 285
			}

286 287
			print DST $_;
		}
288 289
	}

290 291
	unless ($insertedrevision)
	{
292
		printf DST qq'#use wml::debian::translation-check translation="%s"', $vcsinfo{'cmt_rev'};
293 294 295
		print DST qq' maintainer="$maintainer"'
			if defined $maintainer;
		print DST qq'\n';
296 297
	}

298 299 300 301 302
	close SRC;
	close DST;

	# We're done
	print "Copied $page, remember to edit $dstfile\n";
303 304
	print "and to remove $dsttitle when finished\n"
		if defined $dsttitle;
305
}
306 307 308 309 310 311 312 313 314 315 316 317

# Return the ISO-8859-1 character that corresponds to the given entity
sub decodeentity
{
	my $ent = shift;
	# Start at one to avoid decoding &nbsp;
	for (my $i = 1; $i < $#entities; ++ $i)
	{
		return chr($i + 160) if $entities[$i] eq $ent;
	}
	return $ent;
}