=head1 NAME
MimeMagic - provides eval test checking MIME types from header and file name/content.
=head1 SYNOPSIS
loadplugin Mail::SpamAssassin::Plugin::MimeMagic /usr/local/etc/mail/spamassassin/plugins/MimeMagic.pm
mimemagic_map application/ole-document application/ole-storage
describe TYPE_MISMATCH Content type mismatch
header TYPE_MISMATCH eval:mimemagic_mismatch()
score TYPE_MISMATCH 0.5
describe EXE_MISMATCH Content type mismatch for executable file
header EXE_MISMATCH eval:mimemagic_mismatch_datatype('executable')
score EXE_MISMATCH 2.0
describe ZIPRAR_MISMATCH ZIP file name for RAR file
header ZIPRAR_MISMATCH eval:mimemagic_mismatch('-','/zip','/rar')
score ZIPRAR_MISMATCH 1.0
describe OFFICE_MISMATCH Office document type mismatch
header OFFICE_MISMATCH eval:mimemagic_mismatch_extension('doc','rtf','xls')
score OFFICE_MISMATCH 0.5
describe OLE_ATTACH OLE part attached
header OLE_ATTACH eval:mimemagic_find_datatype('application/ole-storage')
score OLE_ATTACH 0.01
=head1 DESCRIPTION
This module provides eval tests for MIME types and mismatches between part
MIME content type, file name, and file content.
=head1 REQUIREMENT
=over
=item *
SpamAssassin
=item *
IO:Scalar
=item *
File::MimeInfo::Magic
=item *
freedesktop mime database
=back
=head1 CONFIGURATION
=head2 Eval tests
The find tests simply checks if there is a part matching the specified
criteria.
The mismatch tests all checks for differences between the MIME type
specified for a part, the parts file extension (if any), and what the
parts contents looks like.
=over
=item mimemagic_find
Optional parameters (in the order as listed):
=over
=item *
extension as in mimemagic_find_extension
=item *
mime type as in mimemagic_find_extensiontype
=item *
mime type as in mimemagic_find_datatype
=item *
mime type as in mimemagic_find_contenttype
=back
Check for parts matching the provided parameters or
any part at all if no parameters are specified.
=item mimemagic_find_extension
Required parameter: extension
Check for parts with a file name matching the specified extension.
Specifying the extension '' will match empty file names and file
names without extension but not parts without a file name.
=item mimemagic_find_extensiontype
Required parameter: MIME type
Check for parts with a file name with a extension mapped to the
specified MIME type.
=item mimemagic_find_datatype
Required parameter: MIME type
Check for parts with content that seems to be of the specified
MIME type.
=item mimemagic_find_contenttype
Required parameter: MIME type
Check for parts with a a content type matching the specified
MIME type.
=item mimemagic_mismatch
Optional parameters (in the order as listed):
=over
=item *
extension as in mimemagic_mismatch_extension
=item *
mime type as in mimemagic_mismatch_extensiontype
=item *
mime type as in mimemagic_mismatch_datatype
=item *
mime type as in mimemagic_mismatch_contenttype
=back
Checks all parts matching the provided parameters or
all parts if no parameters are specified.
=item mimemagic_mismatch_extension
Required parameter: extension
Only check parts with a file name matching the specified extension.
Specifying the extension '' will match empty file names and file
names without extension but not parts without a file name.
=item mimemagic_mismatch_extensiontype
Required parameter: MIME type
Only check parts with a file name with a extension mapped to the
specified MIME type.
=item mimemagic_mismatch_datatype
Required parameter: MIME type
Only check parts with content that seems to be of the specified
MIME type.
=item mimemagic_mismatch_contenttype
Required parameter: MIME type
Only check parts with a a content type matching the specified
MIME type.
=back
=head2 Options
=over
=item mimemagic_map
Map one MIME type to another for the mismatch checks.
=item mimemagic_datasize
Maximum size of data to check (default 16KB).
=item mimemagic_stderr
Print debug output to STDERR.
=back
=cut
package Mail::SpamAssassin::Plugin::MimeMagic;
# $Id: MimeMagic.pm,v 1.7 2009/06/30 14:01:22 jonas Exp $
use strict;
use base 'Mail::SpamAssassin::Plugin';
use IO::Scalar;
use File::MimeInfo::Magic ();
sub new {
my ($class,$mailsa) = @_;
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsa);
bless($self,$class);
$self->register_eval_rule('mimemagic_mismatch');
$self->register_eval_rule('mimemagic_mismatch_extension');
$self->register_eval_rule('mimemagic_mismatch_contenttype');
$self->register_eval_rule('mimemagic_mismatch_extensiontype');
$self->register_eval_rule('mimemagic_mismatch_datatype');
$self->register_eval_rule('mimemagic_find');
$self->register_eval_rule('mimemagic_find_extension');
$self->register_eval_rule('mimemagic_find_contenttype');
$self->register_eval_rule('mimemagic_find_extensiontype');
$self->register_eval_rule('mimemagic_find_datatype');
$self->{typemaps} = {
'application/pgpsignature' => 'text/plain',
'application/mbox' => 'text/plain',
'message/rfc822' => 'text/plain',
'application/msword' => 'application/olestorage',
'application/vndmsexcel' => 'application/olestorage',
};
$self->{cfg_datasize} = 16*1024;
#$self->dbg('registered');
return $self;
}
sub dbg {
my $self = shift;
my $msg = shift;
Mail::SpamAssassin::Plugin::dbg(sprintf("mimemagic: $msg",@_));
print STDERR sprintf("mimemagic: $msg\n",@_) if (defined($self) && $self->{cfg_stderr});
}
sub parse_config {
my ($self,$pars) = @_;
return 0 unless ($pars->{key} =~ /^mimemagic_(.+)$/i);
my $key = lc($1);
my @val = split(/\s+/,$pars->{value});
if ($key eq 'map') {
return 0 unless ($#val == 1);
$self->{typemaps}->{$self->_simplify($val[0])} = $self->_simplify($val[1]);
#$self->dbg('cfg map: "%s" -> "%s"',$val[0],$val[1]);
} elsif ($pars->{user_config}) {
return 0;
} elsif ($key =~ /^(?:stderr|datasize)$/) {
$self->{"cfg_$key"} = ($key eq 'datasize') ? eval($pars->{value}) : $pars->{value};
#$self->dbg('cfg %s: "%s"',$key,$self->{"cfg_$key"});
} else {
return 0;
}
$self->inhibit_further_callbacks();
return 1;
}
sub _simplify {
my ($self,$mt,$def) = @_;
$mt = lc($mt ? $mt : $def ? $def : '');
$mt =~ s/(^|\/)[Xx]-/$1/g;
$mt =~ s/[-_.]//g;
return $mt;
}
sub _massage_item {
my ($self,$mt,$ext) = @_;
return $mt unless (defined($mt));
if ($ext) {
$mt =~ s/^\.//;
} else {
$mt = $self->_simplify($mt);
}
$mt =~ s/([^-_.*?A-Za-z0-9])/\\$1/g;
return $mt;
}
sub _massage_list {
my ($self,$mtl,$ext) = @_;
return undef unless (defined($mtl));
my @ml = ();
foreach my $mt (@{$mtl}) {
push @ml, $self->_massage_item($mt,$ext) if (defined($mt));
}
return undef unless (@ml);
return $ml[0] unless ($#ml);
return '(?:'.join('|',@ml).')';
}
sub _massage_string {
my ($self,$mtl,$ext) = @_;
return undef unless (defined($mtl));
return $self->_massage_list([split(/\s*;\s*/,$mtl)],$ext);
}
sub _massage {
my ($self,$mtl,$ext) = @_;
return $self->_massage_string($mtl,$ext) unless (ref($mtl));
return $self->_massage_list($mtl,$ext) if (ref($mtl) eq 'ARRAY');
return $self->_massage_string($$mtl,$ext) if (ref($mtl) eq 'SCALAR');
return undef;
}
sub _compare_fuzz {
my ($self,$mt1,$mt2) = @_;
return 1 if ($mt1 eq '*BIN*' && $mt2 !~ /^(text|message)\//);
return 1 if ($mt1 eq '*TXT*' && $mt2 =~ /^(text|message)\//);
return 1 if ($mt1 eq 'application/octetstream' && $mt2 !~ /^(text|message)\//);
return 1 if (defined($self->{typemaps}->{$mt1}) && $self->{typemaps}->{$mt1} eq $mt2);
return 0;
}
sub _compare_types {
my ($self,$mt1,$mt2) = @_;
#$self->dbg('"%s" ?= "%s"',$mt1,$mt2);
return 1 if ($mt1 eq $mt2);
#$self->dbg('"%s" != "%s"',$mt1,$mt2);
return -1 if ($mt1 eq '' || $mt2 eq '');
return -1 if ($self->_compare_fuzz($mt1,$mt2));
return -1 if ($self->_compare_fuzz($mt2,$mt1));
#$self->dbg('"%s" !~ "%s"',$mt1,$mt2);
return 0;
}
sub _compare {
my ($self,$mt1,$mt2) = @_;
my $res = $self->_compare_types($mt1,$mt2);
$self->dbg('"%s" %s "%s"',$mt1,$res>0?'=':$res<0?'~':'!',$mt2);
return $res;
}
sub _load {
my ($self) = @_;
$self->{mmagic} = File::MimeInfo::Magic->new() unless ($self->{mmagic});
return $self->{mmagic} ? 1 : 0;
}
sub _type_data {
my ($self,$part) = @_;
return '' unless ($part->is_leaf);
if ($part->{magic_mime_type}) {
#$self->dbg('td stored magic type: "%s"',$part->{magic_mime_type});
return $self->_simplify($part->{magic_mime_type});
}
return '' unless ($self->_load);
my $data = ($self->{cfg_datasize} > 0) ? $part->decode($self->{cfg_datasize}) : $part->decode();
return '' unless (defined($data));
my $dh = new IO::Scalar(\$data);
my $mt = $self->{mmagic}->magic($dh);
$dh->close();
unless (defined($mt)) {
no warnings;
if (utf8::valid($data)) {
use bytes;
return '*TXT*' unless ($data =~ /[^\m\t\r\n\s\x20-\xF6\xF9-\xFF]/s);
} else {
return '*TXT*' unless ($data =~ /[^\m\t\r\n\s\x20-\x7E\x80-\xFE]/s);
}
return '*BIN*'
}
#$self->dbg('td magic type: "%s"',$mt);
$part->{magic_mime_type} = $mt;
return $self->_simplify($mt);
}
sub _type_name {
my ($self,$part) = @_;
return '' unless ($part->is_leaf);
my $fn = $part->{name};
return '' unless (defined($fn) && $fn ne '');
return '' unless ($self->_load);
my $mt = $self->{mmagic}->globs($part->{name});
return $self->_simplify($mt);
}
sub _check_parts {
my ($self,$pms,$mis,$ext,$extmt,$datmt,$cntmt) = @_;
$ext = $self->_massage($ext,1);
$extmt = $self->_massage($extmt);
$datmt = $self->_massage($datmt);
$cntmt = $self->_massage($cntmt);
#$self->dbg('ext @ "%s"',$ext) if (defined($ext));
#$self->dbg('extmt @ "%s"',$extmt) if (defined($extmt));
#$self->dbg('datmt @ "%s"',$datmt) if (defined($datmt));
#$self->dbg('cntmt @ "%s"',$cntmt) if (defined($cntmt));
my $pc = 0;
$pms->{mimemagic_parts} = {} unless (defined($pms->{mimemagic_parts}));
foreach my $part ($pms->{msg}->find_parts('.*',1,1)) {
$pc ++;
#$self->dbg('name : "%s"',$part->{name}) if (defined($part->{name}));
if (defined($ext)) {
next unless (defined($part->{name}));
next unless ($part->{name} =~ /\.$ext$/i || ($ext eq '' && $part->{name} !~ /\./));
}
$pms->{mimemagic_parts}->{$pc} = {} unless (defined($pms->{mimemagic_parts}->{$pc}));
$pms->{mimemagic_parts}->{$pc}->{mtc} = $self->_simplify($part->{type}) unless (defined($pms->{mimemagic_parts}->{$pc}->{mtc}));
#$self->dbg('cntmt : "%s"',$pms->{mimemagic_parts}->{$pc}->{mtc});
next if (defined($cntmt) && $pms->{mimemagic_parts}->{$pc}->{mtc} !~ /$cntmt/i);
$pms->{mimemagic_parts}->{$pc}->{mtn} = $self->_type_name($part) unless (defined($pms->{mimemagic_parts}->{$pc}->{mtn}));
#$self->dbg('extmt : "%s"',$pms->{mimemagic_parts}->{$pc}->{mtn});
next if (defined($extmt) && $pms->{mimemagic_parts}->{$pc}->{mtn} !~ /$extmt/i);
$pms->{mimemagic_parts}->{$pc}->{mtd} = $self->_type_data($part) unless (defined($pms->{mimemagic_parts}->{$pc}->{mtd}));
#$self->dbg('datmt : "%s"',$pms->{mimemagic_parts}->{$pc}->{mtd});
next if (defined($datmt) && $pms->{mimemagic_parts}->{$pc}->{mtd} !~ /$datmt/i);
return 1 unless ($mis);
$pms->{mimemagic_parts}->{$pc}->{res} = (
$self->_compare($pms->{mimemagic_parts}->{$pc}->{mtc},$pms->{mimemagic_parts}->{$pc}->{mtd}) &&
$self->_compare($pms->{mimemagic_parts}->{$pc}->{mtd},$pms->{mimemagic_parts}->{$pc}->{mtn}) &&
$self->_compare($pms->{mimemagic_parts}->{$pc}->{mtn},$pms->{mimemagic_parts}->{$pc}->{mtc})
) unless (defined($pms->{mimemagic_parts}->{$pc}->{res}));
next if ($pms->{mimemagic_parts}->{$pc}->{res});
return 1;
}
return 0;
}
sub mimemagic_mismatch {
my ($self,$pms,$ext,$extmt,$datmt,$cntmt) = @_;
$ext = undef if (defined($ext) && $ext =~ /^[-*?]$/);
$extmt = undef if (defined($extmt) && $extmt =~ /^[-*?]$/);
$datmt = undef if (defined($datmt) && $datmt =~ /^[-*?]$/);
$cntmt = undef if (defined($cntmt) && $cntmt =~ /^[-*?]$/);
return _check_parts($self,$pms,1,$ext,$extmt,$datmt,$cntmt);
}
sub mimemagic_mismatch_extension {
my ($self,$pms,@ext) = @_;
return 0 unless (@ext);
return _check_parts($self,$pms,1,\@ext);
}
sub mimemagic_mismatch_extensiontype {
my ($self,$pms,@extmt) = @_;
return 0 unless (@extmt);
return _check_parts($self,$pms,1,undef,\@extmt);
}
sub mimemagic_mismatch_datatype {
my ($self,$pms,@datmt) = @_;
return 0 unless (@datmt);
return _check_parts($self,$pms,1,undef,undef,\@datmt);
}
sub mimemagic_mismatch_contenttype {
my ($self,$pms,@cntmt) = @_;
return 0 unless (@cntmt);
return _check_parts($self,$pms,1,undef,undef,undef,\@cntmt);
}
sub mimemagic_find {
my ($self,$pms,$ext,$extmt,$datmt,$cntmt) = @_;
$ext = undef if (defined($ext) && $ext =~ /^[-*?]$/);
$extmt = undef if (defined($extmt) && $extmt =~ /^[-*?]$/);
$datmt = undef if (defined($datmt) && $datmt =~ /^[-*?]$/);
$cntmt = undef if (defined($cntmt) && $cntmt =~ /^[-*?]$/);
return _check_parts($self,$pms,0,$ext,$extmt,$datmt,$cntmt);
}
sub mimemagic_find_extension {
my ($self,$pms,@ext) = @_;
return 0 unless (@ext);
return _check_parts($self,$pms,0,\@ext);
}
sub mimemagic_find_extensiontype {
my ($self,$pms,@extmt) = @_;
return 0 unless (@extmt);
return _check_parts($self,$pms,0,undef,\@extmt);
}
sub mimemagic_find_datatype {
my ($self,$pms,@datmt) = @_;
return 0 unless (@datmt);
return _check_parts($self,$pms,0,undef,undef,\@datmt);
}
sub mimemagic_find_contenttype {
my ($self,$pms,@cntmt) = @_;
return 0 unless (@cntmt);
return _check_parts($self,$pms,0,undef,undef,undef,\@cntmt);
}
1;
(2008-02-01)