#! /usr/bin/perl

# spam-stats.pl
#
# $Id: spam-stats.pl,v 1.3 2004/11/21 05:41:41 root Exp $
#
# count and classify discard: and reject: lines in postfix mail logs
#
# Copyright Craig Sanders <cas@taz.net.au> 2001
#
# This script is licensed under the terms of the GNU General Public 
# License (GPL).
#
# the latest version can always be found at http://taz.net.au/postfix/scripts

use File::MMagic ;
use strict ;

# add bin directory to @INC for openlogfile.pl
use FindBin;
use lib "$FindBin::Bin";

require "openlogfile.pl" ;

my (%count, %spamd, $total, $stotal);
my $FH ;

push @ARGV, "/var/log/mail.log" if (@ARGV == 0) ;

foreach my $file (@ARGV) {
    $FH = &open_log_file($file) ;
    while (<$FH>) {
        #if (/spam_scan: yes/i) {
        #    $count{"SpamAssassin"}++ ;
        #};

        next unless (/postfix\/.* (?:discard|reject): |spamd\[\d+\]:|amavis\[\d+\]:/) ;
        # we don't care about local problems like quota
        next if (/Insufficient system storage;/i) ;
        next if (/Message size exceeds fixed limit/i) ;
        chomp ;
        if (/ spamd\[\d+\]: /) {
          if (/ clean message/) { $spamd{"clean"}++ } 
          elsif (/ identified spam/) { $spamd{"spam"}++ }
          next ;
        } ;
        if (/ amavis\[\d+\]: /) {
          if (/, quarantine spam-/) { $spamd{"spam"}++ } 
          elsif (/ Passed, /) { $spamd{"clean"}++ }
          next ;
        } ;

		# treat discard as if it is reject.
        s/.*(?:discard|reject): /reject: /io ;

        if (/reject:.*Relay access denied/) { $count{"Relay access denied"}++ } 
        elsif (/reject:.*blocked using/i) { 
            s/.*blocked using //io ;
            s/[, 	;].*// ;
            $count{"RBL $_"}++ ;
        }
        elsif (/reject: .*access denied/i) { 
            s/reject: [^:]+: [^:]+: // ;
            s/:.*//; 
            $count{"Local access rule: $_"}++ 
        } 
        elsif (/reject: .*Recipient address rejected: Greylisted/i) { $count{"Greylisted delivery attempt"}++ } 
        elsif (/reject: .*Use a host name, not your IP address/i) { $count{"IP Address in HELO"}++ }
        elsif (/reject: .*Sobig.F/i) { $count{"Sobig.F Virus"}++ }
        elsif (/reject: .*dynamic IP trespass spam rejected/i) { $count{"Dynamic IP Trespass"}++ }
        elsif (/reject: mime-error improper use of 8-bit data in message header:/i) { $count{"strict 7-bit headers"}++ }
        elsif (/reject: .*SpamAssassin Score far too high/i) { $count{"SpamAssassin score far too high"}++ ; $spamd{"spam"}--} 
        elsif (/reject: header .*: VIRUS/i) { $count{"header checks (VIRUS)"}++ } 
        elsif (/reject: header /i) { $count{"header checks (Spam)"}++ } 
        elsif (/reject: .*: unwanted virus notification/i) { $count{"Unwanted Virus Notification"}++ }
        elsif (/reject: body .*: VIRUS/i) { $count{"body checks (VIRUS)"}++ }
        elsif (/reject: body /i) { $count{"body checks (Spam)"}++ }
        elsif (/reject: .*forgery of local address/i) { $count{"Local address forgery"}++ } 
        elsif (/reject: .*user unknown/i) { $count{"User unknown"}++ } 
        elsif (/reject: .*Recipient address rejected: Domain not found/i) { $count{"Recipient Domain Not Found"}++ } 
        elsif (/reject: .*Sender address rejected: Domain not found/i) { $count{"Sender Domain Not Found"}++ } 
        elsif (/reject: .*Sender address rejected: /i) { $count{"Local access rule: Sender address rejected"}++ } 
        elsif (/reject: ETRN /i) { $count{"ETRN"}++ } 
        elsif (/reject: .*domain not found/i) { $count{"Domain Not Found"}++ } 
        elsif (/reject: .*need fully-qualified/i) { $count{"Need FQDN address"}++ } 
        elsif (/reject: .*Please use user\@domain address forms only/i) { $count{"Bad recipient form"}++ } 
        elsif (/reject: .*Recipient address rejected/i) { $count{"Recipient address rejected"}++ } 
        elsif (/reject: .*Helo command rejected/i) { $count{"Bad HELO"}++ } 
        elsif (/reject: .*SMTP command pipelining/i) { $count{"Bad pipelining"}++ } 
        elsif (/reject: .*cannot find your hostname/i) { $count{"unknown client"}++ } 
        elsif (/reject: .*Server configuration problem/i) { next} 
        else { $count{"Other"}++ ; print $_,"\n"; }
    }
    close($FH);
} ;

foreach (sort { $count{$a} <=> $count{$b} } keys %count) {
    printf "%7i\t%s\n", $count{$_},$_ ;
    $total += $count{$_} ;
} ;

printf "\n%7i\tTOTAL\n\n\n", $total;

print "Spamassassin stats:\n";
foreach (sort { $spamd{$a} <=> $spamd{$b} } keys %spamd) {
    printf "%7i\t%s\n", $spamd{$_},$_ ;
    $stotal += $spamd{$_} ;
} ;

printf "%7i\tTOTAL\n", $stotal;

print "\nPercentages:\n";
printf "spam:non-spam   (%i/%i) %3.2f%%\n", $total+$spamd{"spam"}, $total+$stotal, (($total+$spamd{"spam"})/($total+$stotal))*100;
printf "tagged messages (%i/%i) %3.2f%%\n", $spamd{"spam"}, $stotal, ($spamd{"spam"}/$stotal)*100;
printf "rejected spam   (%i/%i) %3.2f%%\n", $total, $total+$spamd{"spam"}, ($total/($total+$spamd{"spam"}))*100;

