Commit ab90d999 authored by Paul Wise's avatar Paul Wise

Fix website hit stats and translation prioritization (Closes: #791678)

CVS version numbers

get-www-stats: 1.5 -> 1.6 
stattrans.pl: 1.115 -> 1.116 
arabic/po/stats.ar.po: 1.2 -> 1.3 
bulgarian/po/stats.bg.po: 1.15 -> 1.16 
chinese/po/stats.zh.po: 1.3 -> 1.4 
danish/po/stats.da.po: 1.7 -> 1.8 
dutch/po/stats.nl.po: 1.7 -> 1.8 
finnish/po/stats.fi.po: 1.4 -> 1.5 
french/po/stats.fr.po: 1.8 -> 1.9 
galician/po/stats.gl.po: 1.2 -> 1.3 
german/po/stats.de.po: 1.11 -> 1.12 
italian/po/stats.it.po: 1.8 -> 1.9 
japanese/po/stats.ja.po: 1.8 -> 1.9 
norwegian/po/stats.nb.po: 1.4 -> 1.5 
polish/po/stats.pl.po: 1.6 -> 1.7 
portuguese/po/stats.pt.po: 1.11 -> 1.12 
russian/po/stats.ru.po: 1.6 -> 1.7 
slovak/po/stats.sk.po: 1.3 -> 1.4 
spanish/po/stats.es.po: 1.6 -> 1.7 
swedish/po/stats.sv.po: 1.4 -> 1.5
parent f258a82d
......@@ -28,11 +28,6 @@ msgstr ""
msgid "There are %d strings to translate."
msgstr ""
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr ""
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr ""
......
......@@ -30,11 +30,6 @@ msgstr "Има %d байта за превод."
msgid "There are %d strings to translate."
msgstr "Има %d низа за превод."
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr "Данни за попаденията от %s, събрани на %s."
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr "Преводът е твърде стар"
......
......@@ -19,11 +19,6 @@ msgstr "一共有 %d [CN:字節:][HKTW:位元组:]需要翻譯。"
msgid "There are %d strings to translate."
msgstr "一共有 %d 個[CN:字符串:][HKTW:字串:]需要翻譯。"
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr ""
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr "此譯文太過時了"
......
......@@ -19,11 +19,6 @@ msgstr "Der er %d bytes at oversætte."
msgid "There are %d strings to translate."
msgstr "Der er %d strenge at oversætte."
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr "Besøgsdata fra %s, indsamlet %s."
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr "Denne oversættelse er forældet"
......
......@@ -33,11 +33,6 @@ msgstr "Er zijn %d bytes te vertalen."
msgid "There are %d strings to translate."
msgstr "Er zijn %d strings te vertalen."
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr "Gegevens over bezoekers van %s, verzameld %s."
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr "Deze vertaling is te verouderd"
......
......@@ -25,11 +25,6 @@ msgstr "Yhteensä %d tavua käännettävänä."
msgid "There are %d strings to translate."
msgstr "Yhteensä %d merkkijonoa käännettävänä."
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr ""
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr "Tämä käännös on vanhentunut"
......
......@@ -31,11 +31,6 @@ msgstr "%d octets sont à traduire."
msgid "There are %d strings to translate."
msgstr "%d chaînes sont à traduire."
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr "Nombre de visites sur %s, rassemblées le %s."
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr "Cette traduction est trop incomplète"
......
......@@ -28,11 +28,6 @@ msgstr "Hai %d bytes por traducir."
msgid "There are %d strings to translate."
msgstr "Hai %d cadeas por traducir."
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr "Datos de visitas de %s, obtidos o %s."
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr "Esta tradución está demasiado desfasada"
......
......@@ -29,11 +29,6 @@ msgstr "Es sind %d Byte zu übersetzen."
msgid "There are %d strings to translate."
msgstr "Es sind %d Zeichenketten zu übersetzen."
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr "Daten von %s erhalten, erfasst am %s."
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr "Diese Übersetzung ist zu alt"
......
......@@ -16,22 +16,7 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# This program is run from a crontab on a Debian website mirror like this:
#
# MAILTO="porridge@debian.org"
# # Atomically and concurrent-safely create a stats.tgz
# 18 3 * * * cd "$HOME" && d=$(mktemp -d stats-wip-XXXXXXXXXX) && printf '{"hostname":"\%s"}' $(hostname -f) > "$d/stats.meta.json" && ./get-www-stats > "$d/stats.json" && tar zcf stats-wip.tgz "$d" && rm -rf "$d" && mv stats-wip.tgz stats.tgz
#
# And the output is transferred to dde.debian.net like this:
#
# MAILTO="porridge@debian.org"
# # Atomically transfer stats and replace them.
# 18 4 * * * cd $HOME && { [ ! -e stats-old ] || please_cleanup_failed_run ; } && cp -al stats-new stats-old && ln -s stats-old stats-old.s && mv -T stats-old.s stats && { scp -q -i .ssh/stats-transfer-nopass senfl.debian.org:stats.tgz stats.tgz || scp_failed ; } && rm -rf stats-new && mkdir stats-new && tar zxf stats.tgz -C stats-new --strip-components=1 && rm stats.tgz && ln -s stats-new stats-new.s && mv -T stats-new.s stats && rm -rf stats-old
#
# The output is then exported via DDE (see https://wiki.debian.org/DDE) and used
# by the stattrans.pl script to sort the page lists in the Debian web site
# translation statistics pages.
# This program is run from debwww crontab on Debian website master server.
try:
import json
......@@ -39,6 +24,7 @@ except ImportError:
import simplejson as json
from gzip import open as gzopen
from glob import glob
import logging
import os
import re
......@@ -46,15 +32,11 @@ import sys
#logging.basicConfig(level=logging.INFO)
logs_dir = '/var/log/apache2'
logs_prefix = 'www.debian.org-access.log'
logs_count = 10
log_files = glob('/srv/weblogs.debian.org/incoming/*.debian.org/www.debian.org-access.log*')
logs = []
for f in os.listdir(logs_dir):
if not f.startswith(logs_prefix):
continue
parts = f.split('-')
for f in log_files:
parts = os.path.split(f)[-1].split('-')
if len(parts) == 2:
logs.append((99999999, f, False))
elif len(parts) == 3:
......@@ -70,15 +52,23 @@ for f in os.listdir(logs_dir):
counts = {}
for n, f, gzipped in sorted(logs)[-logs_count:]:
logfile = os.path.join(logs_dir, f)
for n, logfile, gzipped in sorted(logs):
logging.info('Reading %s.' % logfile)
opener = gzipped and gzopen or open
for line in opener(logfile):
line = line.rstrip()
tokens = line.split()
if tokens[5] != '"GET':
continue
url = tokens[6]
url = re.sub(r'\...\.html$', '', url)
url = re.sub(r'#.*$', '', url)
url = re.sub(r'\?.*$', '', url)
url = re.sub(r'//+', '/', url)
url = re.sub(r'/(\./)+', '/', url)
url = re.sub(r'^/\.\./', '/', url)
url = re.sub(r'/[^./]*/\.\./', '/', url)
url = re.sub(r'\.([a-z]{2}|[a-z]{2}-[a-z]{2})\.(html|xml|rdf|pdf)$', '', url)
url = re.sub(r'\.(html|xml|rdf|pdf)(\.([a-z]{2}|[a-z]{2}-[a-z]{2}))?$', '', url)
url = re.sub(r'/$', '/index', url)
if url in counts:
counts[url] += 1
......
......@@ -32,11 +32,6 @@ msgstr "Ci sono %d byte da tradurre."
msgid "There are %d strings to translate."
msgstr "Ci sono %d stringhe da tradurre."
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr "I dati sul numero di visite sono stati recuperati da %s il %s."
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr "Questa traduzione è troppo datata"
......
......@@ -25,11 +25,6 @@ msgstr "翻訳するバイト数 %d"
msgid "There are %d strings to translate."
msgstr "翻訳する文の数 %d"
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr "ヒット数は %s の %s 時点の情報です。"
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr "翻訳が古すぎです"
......
......@@ -25,11 +25,6 @@ msgstr "Det er %d byte å oversette."
msgid "There are %d strings to translate."
msgstr "Det er %d strenger å oversette."
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr ""
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr "Denne oversettelsen for utdatert"
......
......@@ -20,11 +20,6 @@ msgstr "Jest %d bajtów do przetłumaczenia."
msgid "There are %d strings to translate."
msgstr "Jest %d napisów do przetłumaczenia."
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr "Dane dotyczące odwiedzin z %s, zebrane %s."
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr "To tłumaczenie jest przestarzałe."
......
......@@ -31,11 +31,6 @@ msgstr "Existem %d bytes para traduzir."
msgid "There are %d strings to translate."
msgstr "Existem %d strings para traduzir."
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr "Dados de acesso obtidos de %s, coletados em %s."
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr "Esta tradução está muito desatualizada"
......
......@@ -30,11 +30,6 @@ msgstr "Байт для перевода: %d."
msgid "There are %d strings to translate."
msgstr "Строк для перевода: %d."
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr "Данные об обращениях с %s собраны %s."
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr "Этот перевод слишком устарел"
......
......@@ -31,11 +31,6 @@ msgstr "%d bajtov na preloženie."
msgid "There are %d strings to translate."
msgstr "%d reťazcov na preloženie."
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr "Dáta o návštevách %s, zhromaždené %s."
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr "Tento preklad je príliš zastaraný"
......
......@@ -56,11 +56,6 @@ msgstr "Hay %d bytes por traducir."
msgid "There are %d strings to translate."
msgstr "Hay %d cadenas de caracteres por traducir."
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr "Datos de acceso de %s, obtenidos en %s."
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr "Esta traducción está muy desactualizada"
......
......@@ -28,18 +28,7 @@ use Webwml::TransCheck;
use Webwml::TransIgnore;
use Debian::L10n::Db ('%LanguageList');
use Net::Domain qw(hostfqdn);
BEGIN {
$dde_available = 0;
eval {
require JSON;
require LWP::Simple;
LWP::Simple->import;
$dde_available = 1;
}; if ($@) {
warn "One or more modules required for DDE support failed to load: $@\n";
}
}
use JSON;
$| = 1;
......@@ -52,8 +41,8 @@ $opt_v = 0;
$opt_d = "u";
$opt_l = undef;
$opt_b = ""; # Base URL, if not debian.org
# URL of JSON data or path to plaintext file with lines: "1299999 /doc/index\n"
$opt_f = "http://dde.debian.net/dde/q/static/porridge/stats?t=json";
# path of web stats JSON data
$opt_f = "/srv/www.debian.org/webwml/stats.json";
getopts('h:w:b:p:t:vd:l:f:') || die;
# Replace filename globbing by Perl regexps
$opt_p =~ s/\./\\./g;
......@@ -323,43 +312,20 @@ mkdir ($config{'htmldir'}, 02775) if (! -d $config{'htmldir'});
# Read website hit statistics, if available
my %hits;
my $hits_hostname;
my $hits_datetime;
my $file_sorter = sub($$) { $_[0] cmp $_[1] };
if ($config{'hit_file'} and $config{'hit_file'} =~ m{^(f|ht)tps?://} and ! $dde_available) {
warn "Disabling fetching of hit data.\n";
$config{'hit_file'} = undef;
}
if ($config{'hit_file'}) {
if ($config{'hit_file'} =~ m{^(f|ht)tps?://}) {
printf("Retrieving hit data from [%s].\n", $config{'hit_file'}) if ($config{'verbose'});
my $json = LWP::Simple::get($config{'hit_file'});
if ($json) {
my $perl = JSON::from_json($json, {utf8 => 1});
my %metadata = %{$perl->{'m'}};
$hits_hostname = $metadata{'hostname'} || undef;
$hits_datetime = defined $metadata{'Last-Modified'} ? strftime "%Y-%m-%d %T GMT", gmtime $metadata{'Last-Modified'} : undef;
foreach my $e (@{$perl->{'r'}}) {
my ($count, $url) = @$e;
last if $count < 3; # URLS with 2 or 1 hits are most likely mistakes; let's not waste RAM on them
$hits{substr($url, 1)} = $count;
}
} else {
warn "Retrieving hit data failed.\n";
}
} else {
$hits_hostname = hostfqdn;
$hits_datetime = strftime "%Y-%m-%d %T %Z", localtime;
open(HITS, $config{'hit_file'}) or die sprintf("Opening hit file [%s] failed: $!", $config{'hit_file'});
printf "Reading hit file [%s]\n", $config{'hit_file'} if ($config{'verbose'});
foreach my $hit_line (<HITS>) {
chomp $hit_line;
$hit_line =~ /^\s*(\d+)\s+(.*)/ or warn sprintf("unrecognized hit file [%s] line [%s]", $config{'hit_file'}, $hit_line);
my ($count, $url) = ($1, $2);
printf("Retrieving hit data from [%s].\n", $config{'hit_file'}) if ($config{'verbose'});
open( my $fh, '<', $config{'hit_file'} );
local $/;
my $json = <$fh>;
close $fh;
if ($json) {
my $perl = JSON::from_json($json, {utf8 => 1});
foreach my $e (@{$perl}) {
my ($count, $url) = @$e;
last if $count < 3; # URLS with 2 or 1 hits are most likely mistakes; let's not waste RAM on them
$hits{substr($url, 1)} = $count;
}
close(HITS) or die sprintf("Closing hit file [%s] failed: $!", $config{'hit_file'});
}
if (%hits) {
$file_sorter = sub($$) {
......@@ -593,9 +559,6 @@ foreach $lang (@search_in) {
print HTML "<toc-display/>\n";
if (%hits) {
print HTML '<p><gettext domain="stats">Note: the lists of pages are sorted by popularity. Hover over the page name to see the number of hits.</gettext>';
if (defined $hits_hostname and defined $hits_datetime) {
printf HTML ' <stats_hit_source "%s" "%s">', $hits_hostname, $hits_datetime;
}
print HTML "</p>\n";
}
......
......@@ -28,11 +28,6 @@ msgstr "Det finns %d bytes att översätta."
msgid "There are %d strings to translate."
msgstr "Det finns %d strängar att översätta."
#. TRANSLATORS: The first string is hostname of the debian website mirror, second string is date and time.
#: ../../english/template/debian/stats_tags.wml:23
msgid "Hit data from %s, gathered %s."
msgstr "Träffdata från %s, insamlat %s."
#: ../../stattrans.pl:240 ../../stattrans.pl:244
msgid "This translation is too out of date"
msgstr "Den här översättningen är föråldrad"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment