Commit 91a8b326 authored by Steve McIntyre's avatar Steve McIntyre
Browse files

Improve the parse_hosts_deny script

Rewrite in perl

By default, only go count blocks in the last N days (90 by default).
parent 2d4c7b09
#!/bin/sh
#!/usr/bin/perl
#
# Trivial script to work out where most of the spam is coming from; use to help when maintaining hosts_deny and networks_deny
# Simple script: parse the blocked hosts in hosts_deny* to work out
# where most of the spam is coming from; use to help when maintaining
# hosts_deny and networks_deny
base="/srv/wiki.debian.org"
data="${base}/etc/moin/hosts_deny ${base}/etc/moin/hosts_deny_auto"
use strict;
use warnings;
use Time::Local;
use Getopt::Long;
awk '{
# Count IPv6 as one lump (for now!)
if (0 != match($1, ":")) {
spammers_v6++
next
}
c = $1;
gsub(".[[:digit:]]+$","",c)
spammers_c[c]++
b = c;
gsub(".[[:digit:]]+$","",b)
spammers_b[b]++
a = b;
gsub(".[[:digit:]]+$","",a)
spammers_a[a]++
my $basedir = "/srv/wiki.debian.org/etc/moin";
# my $basedir = ".";
my @datafiles = ("$basedir/hosts_deny", "$basedir/hosts_deny_auto");
my $spam_v6 = 0;
my %spam_a;
my %spam_b;
my %spam_c;
my $num_days = 90; # 3 months-ish back by default
GetOptions ("days=i" => \$num_days);
my $max_age = ($num_days * 86400);
my $oldest_time = time() - $max_age;
my $string_start = gmtime($oldest_time);
sub within_date($) {
my $text_date = shift;
my ($year, $momth, $day) = split(/-/, $text_date);
my $compare = timegm(0, 0, 0, $day, $momth-1, $year);
if ($compare >= $oldest_time) {
return 1;
}
END {
printf("%d spammers in IPv6\n", spammers_v6);
for (a in spammers_a) {
if (spammers_a[a] > 100) {
printf("%d spammers in class A network %s\n", spammers_a[a], a)
}
}
for (b in spammers_b) {
if (spammers_b[b] > 50) {
printf("%d spammers in class B network %s\n", spammers_b[b], b)
}
}
for (c in spammers_c) {
if (spammers_c[c] > 10) {
printf("%d spammers in class C network %s\n", spammers_c[c], c)
}
}
}' $data | sort -nrk1
# else
return 0;
}
foreach my $filename(@datafiles) {
open (IN, "< $filename")
or die "Unable to open data file $filename: $!\n";
while (defined (my $line = <IN>)) {
chomp $line;
if ($line =~ /^([0-9a-fA-F:.]*) # (\d\d\d\d-\d\d-\d\d)/) {
my $addr = $1;
my $date = $2;
if (!within_date($date)) {
next;
}
# print "found address $addr, date $date is good\n";
if ($addr =~ /:/) {
# Count IPv6 as one lump (for now!)
$spam_v6++;
next;
}
my $short = $addr;
$short =~ s/\.\d+$//g;
if ($spam_c{"$short"}) {
$spam_c{"$short"}++;
} else {
$spam_c{"$short"} = 1;
}
$short =~ s/\.\d+$//g;
if ($spam_b{"$short"}) {
$spam_b{"$short"}++;
} else {
$spam_b{"$short"} = 1;
}
$short =~ s/\.\d+$//g;
if ($spam_a{"$short"}) {
$spam_a{"$short"}++;
} else {
$spam_a{"$short"} = 1;
}
}
}
}
print "Since $string_start:\n";
printf("%d spammers in IPv6\n", $spam_v6);
foreach my $key (sort { $spam_a{$b} <=> $spam_a{$a} } keys %spam_a) {
printf("%d spammers in class A network %s\n", $spam_a{$key}, $key);
}
foreach my $key (sort { $spam_b{$b} <=> $spam_b{$a} } keys %spam_b) {
printf("%d spammers in class B network %s\n", $spam_b{$key}, $key);
}
foreach my $key (sort { $spam_c{$b} <=> $spam_c{$a} } keys %spam_c) {
printf("%d spammers in class C network %s\n", $spam_c{$key}, $key);
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment