xabean dot com : my code : train

xabean dot com : my code : train_spam

Navigation
Daily
Comix
Here's the syntax highlighted version of train_spam. Click here to download the sourcecode.
#!/usr/bin/perl

# IMAP based Spamassassin bayes training
# (C) Richard Harman 2005, perl ((__NOSPAM__)) richardharman.com
#
# Distribution permitted under the GNU GPL.
#
# This script is (basically) meant to be run every couple minutes in a user's crontab.
# This gives the user the option of having their mail trained automagically or not.

use strict;
use warnings;

# find the user's homedir
my $userdir = sprintf( "%s/Maildir", (getpwuid($<))[7] );

do mkdir($userdir) if (! -d $userdir);

# Here's the folders where we expect to find, and move spam from/to.
my $TRAINING_SPAM = "$userdir/.SPAM.Training.SPAM";
my $TRAINING_HAM  = "$userdir/.SPAM.Training.HAM";
my $TRAINED_SPAM = "$userdir/.SPAM.Trained.SPAM";
my $TRAINED_HAM  = "$userdir/.SPAM.Trained.HAM";

# make the directories if they don't already exist
mkdir($TRAINING_SPAM) if ( !-d $TRAINING_SPAM );
mkdir($TRAINING_HAM)  if ( !-d $TRAINING_HAM );
mkdir($TRAINED_SPAM) if ( !-d $TRAINED_SPAM );
mkdir($TRAINED_HAM)  if ( !-d $TRAINED_HAM );

foreach my $dir ($TRAINING_SPAM,$TRAINING_HAM,$TRAINED_SPAM,$TRAINED_HAM) {
  do mkdir("$dir/cur") if (! -d "$dir/cur");
  do mkdir("$dir/new") if (! -d "$dir/new");
  do mkdir("$dir/tmp") if (! -d "$dir/tmp");
}

# pipe to sa-lean

# maildir has "cur" and "new" directories for read and unread mail
foreach my $maildir ( "cur", "new" ) {
  my $dir = "$TRAINING_SPAM/$maildir";

  # the following grep is easier this way
  chdir $dir;

  opendir( SPAM_DIR, $dir ) or die "Couldn't open $dir for reading ($!)";

  # find NORMAL files
  my @files = grep { -f $_ } readdir(SPAM_DIR);
  closedir SPAM_DIR;

  foreach my $file (@files) {
    open( LEARN_SPAM, "|-", "/usr/bin/sa-learn -L --spam 2>&1 > /dev/null" ) or die "Couldn't open pipe to sa-learn for spam learning ($!)";
    open( SPAM, "<", $file ) or die "Couldn't open $file for reading! ($!)";
    $/ = undef;
    # pipe it into sa-learn
    print LEARN_SPAM <SPAM>;
    close SPAM;
    # move the file to the trained folder
    rename( $file, "$TRAINED_SPAM/new/$file" );
    close LEARN_SPAM;
  }
}

foreach my $maildir ( "cur", "new" ) {
  my $dir = "$TRAINING_HAM/$maildir";
  chdir $dir;
  opendir( SPAM_DIR, $dir ) or die "Couldn't open $dir for reading ($!)";
  my @files = grep { -f $_ } readdir(SPAM_DIR);
  closedir SPAM_DIR;
  foreach my $file (@files) {
    open( LEARN_HAM, "|-", "/usr/bin/sa-learn -L --ham 2>&1 > /dev/null" ) or die "Couldn't open pipe to sa-learn for ham learning ($!)";
    open( SPAM, "<", $file ) or die "Couldn't open $file for reading! ($!)";
    $/ = undef;
    print LEARN_HAM <SPAM>;
    close SPAM;
    rename( $file, "$TRAINED_HAM/new/$file" );
    close LEARN_HAM;
  }
}