#!/usr/bin/perl 
# mailstats.pl; <glisha@gmail.com>; 31.10.2004
# http://glisha.softver.org.mk/ueb/
#
# utf-8
#
# Broi kolku posebni verzii na klienti za posta se koristele vo daden 
# mbox(lista, lichni stari poraki i sl.
#
# ako gi nema perl modulite sto mu trebaat
# perl -MCPAN -e 'install Encode';
# perl -MCPAN -e 'install MIME:Words';
# perl -MCPAN -e 'install MboxParser';
#

if ($ARGV[0] eq "") {
    print "Povikuvanje: $0 /pateka/do/mbox\n";
    exit;
} 

if (! -r $ARGV[0]) {
    print "Datotekata $ARGV[0] ne postoi ili nemozhe da ja chitash.\n";
    exit;
}

use Mail::MboxParser;
use Encode;
use MIME::Words qw(:all);

my $parseropts = {
        enable_cache    => 1,
        enable_grep     => 1,
        cache_file_name => '/tmp/cache-mailparse',
        decode          => 'HEADER',
    };

my $mb = Mail::MboxParser->new($ARGV[0], decode => 'ALL', parseropts => $parseropts);

while (my $msg = $mb->next_message) {
    $agent = "Непознато";
    $from = "Непознато";

    #ova go ostava From vo original, bez da dekodira
    # from: e posto sekoj mejl ima i samo From heder, da ne go
    # vadi i nego
    # $orig_from = $msg->get_field('from:'); 
    $orig_from = $msg->from->{email};

    # go otkrivam enkdingod na QP stringot
    #($encoding) = ($orig_from =~ /=\?([a-zA-Z0-9\-]+)\?/);
    # go dekodiram stringot
    #$fromraw = decode_mimewords($orig_from);
    #go prefralm of $encoding vo UTF-8, latinichnite gi ostavam
    #    if ($encoding ne '') {
    #    $from = decode($encoding, $fromraw);
    #    }
    
    # gi vadi User-Agent ili X-Mailer hederite
    # go postavuva $agent na toa sto e polno
    $useragent = $msg->get_field('user-agent');
    if (! $msg->error) { $agent = $useragent; }

    $xmailer = $msg->get_field('x-mailer');
    if (! $msg->error) { $agent = $xmailer; }

# trgni gi hederite posto get_field() gi vrakja i niv
$agent =~ s/User-Agent: |X-Mailer: //;

# hash: glisha@mail.net.mk -> Mutt1.3.1
# samo posledniot mejl i User-Agent go pamti
$nizata{$orig_from} = $agent;

#vaka go dobivam za stats.php
#print "$orig_from | $agent | ";
} 

# sekoj od elementite se sporeduva so sekoj drug i broi
foreach $key (keys (%nizata)){ 
    $y = 0;
    $value = $nizata{$key};

    foreach $key1 (keys (%nizata)) {
        if ( $value eq $nizata{$key1} ) { $y++; }
    }

# Mutt/1.4.2.1i => 1
$agenti{$value} = $y;
}

#broi kolku se windows kolku linux
@linux = ("X11","Mutt","KMail","Evolution","Sylpheed",
          "KNode","slrn","tin");

@windows = ("Microsoft","Windows","Forte Agent","CommuniGate",
            "IMP","40tude","Hamster", "Win32");

foreach $key (keys (%agenti)) {

    #windows 
    foreach (@windows) {
        if ( $key =~ /$_/ ) { $kolkuWin++; }
    }
    
    #linux
    foreach (@linux) {
        if ( $key =~ /$_/ ) { $kolkuLin++; }
    }
}

# printaj rezultatot
$kolkuMejla = keys(%nizata);
$kolkuAgenti = values(%agenti); #odam so values posto tie se pomali od keys vo ovoj sluchaj

print "На листава има вкупно $kolkuMejla корисници со различни адреси\n";
printf "Од нив %d се со непознати клиенти.\n",$kolkuMejla-$kolkuAgenti;

print "\nA се користат $kolkuAgenti различни верзии на клиенти за пошта\n";
print "Од нив:\n";
print "\t$kolkuWin се на Windows\n";
print "\t$kolkuLin се на Linux\n";
printf "\tОстанатите %d се webmail или слично.\n",$kolkuAgenti-($kolkuWin+$kolkuLin);

# kolku [tab] Agentot
print "\n#####################################################\n";
#sortiraj
foreach $key (sort { $agenti{$b} <=> $agenti{$a} } keys %agenti) {
  printf "%d\t %s\n", $agenti{$key}, $key;
}

