#!/usr/bin/perl # IMAP based Spamassassin bayes training # (C) Richard Harman 2005, perl ((__NOSPAM__)) richardharman.com # # Distribution permitted under the GNU GPL. # # This script is (basically) meant to be run every couple minutes in a user's crontab. # This gives the user the option of having their mail trained automagically or not. use strict; use warnings; # find the user's homedir my $userdir = sprintf( "%s/Maildir", (getpwuid($<))[7] ); do mkdir($userdir) if (! -d $userdir); # Here's the folders where we expect to find, and move spam from/to. my $TRAINING_SPAM = "$userdir/.SPAM.Training.SPAM"; my $TRAINING_HAM = "$userdir/.SPAM.Training.HAM"; my $TRAINED_SPAM = "$userdir/.SPAM.Trained.SPAM"; my $TRAINED_HAM = "$userdir/.SPAM.Trained.HAM"; # make the directories if they don't already exist mkdir($TRAINING_SPAM) if ( !-d $TRAINING_SPAM ); mkdir($TRAINING_HAM) if ( !-d $TRAINING_HAM ); mkdir($TRAINED_SPAM) if ( !-d $TRAINED_SPAM ); mkdir($TRAINED_HAM) if ( !-d $TRAINED_HAM ); foreach my $dir ($TRAINING_SPAM,$TRAINING_HAM,$TRAINED_SPAM,$TRAINED_HAM) { do mkdir("$dir/cur") if (! -d "$dir/cur"); do mkdir("$dir/new") if (! -d "$dir/new"); do mkdir("$dir/tmp") if (! -d "$dir/tmp"); } # pipe to sa-lean # maildir has "cur" and "new" directories for read and unread mail foreach my $maildir ( "cur", "new" ) { my $dir = "$TRAINING_SPAM/$maildir"; # the following grep is easier this way chdir $dir; opendir( SPAM_DIR, $dir ) or die "Couldn't open $dir for reading ($!)"; # find NORMAL files my @files = grep { -f $_ } readdir(SPAM_DIR); closedir SPAM_DIR; foreach my $file (@files) { open( LEARN_SPAM, "|-", "/usr/bin/sa-learn -L --spam 2>&1 > /dev/null" ) or die "Couldn't open pipe to sa-learn for spam learning ($!)"; open( SPAM, "<", $file ) or die "Couldn't open $file for reading! ($!)"; $/ = undef; # pipe it into sa-learn print LEARN_SPAM <SPAM>; close SPAM; # move the file to the trained folder rename( $file, "$TRAINED_SPAM/new/$file" ); close LEARN_SPAM; } } foreach my $maildir ( "cur", "new" ) { my $dir = "$TRAINING_HAM/$maildir"; chdir $dir; opendir( SPAM_DIR, $dir ) or die "Couldn't open $dir for reading ($!)"; my @files = grep { -f $_ } readdir(SPAM_DIR); closedir SPAM_DIR; foreach my $file (@files) { open( LEARN_HAM, "|-", "/usr/bin/sa-learn -L --ham 2>&1 > /dev/null" ) or die "Couldn't open pipe to sa-learn for ham learning ($!)"; open( SPAM, "<", $file ) or die "Couldn't open $file for reading! ($!)"; $/ = undef; print LEARN_HAM <SPAM>; close SPAM; rename( $file, "$TRAINED_HAM/new/$file" ); close LEARN_HAM; } }
xabean dot com : my code : train_spam
Here's the syntax highlighted version of train_spam. Click here to download the sourcecode.