Commit e0473960 authored by unknown's avatar unknown

Made new version of mail_to_db.pl


tests/mail_to_db.pl:
  Fixed a bug in mail_to_db.pl when it couldn't handle
  Mac - type inboxes. Made also output more clear.
parent c865e963
...@@ -17,7 +17,7 @@ use DBI; ...@@ -17,7 +17,7 @@ use DBI;
use Getopt::Long; use Getopt::Long;
$| = 1; $| = 1;
$VER = "2.4"; $VER = "2.5";
$opt_help = 0; $opt_help = 0;
$opt_version = 0; $opt_version = 0;
...@@ -152,19 +152,41 @@ sub main ...@@ -152,19 +152,41 @@ sub main
$dbh->disconnect if (!$opt_test); $dbh->disconnect if (!$opt_test);
$ignored = ($mail_no_from_f + $mail_no_subject_f + $mail_no_txt_f + $ignored = ($mail_no_from_f + $mail_no_subject_f + $mail_no_txt_f +
$mail_too_big + $mail_duplicates); $mail_too_big + $mail_duplicates + $mail_fixed);
print "Mails inserted:\t\t\t$mail_inserted\n"; print "Mails inserted:\t\t\t\t\t$mail_inserted\n\n";
print "Mails ignored:\t\t\t$ignored\n"; if ($ignored)
print "Mails without \"From:\" -field:\t$mail_no_from_f\n"; {
print "Mails without message:\t\t$mail_no_txt_f\n"; print "Ignored mails\n";
print "Mails without subject:\t\t$mail_no_subject_f\n"; print "-------------\n";
print "Too big mails (> $opt_max_mail_size):\t$mail_too_big\n"; $mail_no_from_f ?
print "Duplicate mails:\t\t$mail_duplicates\n"; print "Reason: mail without \"From:\" -field:\t\t$mail_no_from_f\n" :
print "Forwarded mails:\t\t$mail_forwarded\n"; print "";
print "Total number of mails:\t\t"; $mail_no_txt_f ?
print "Reason: mail without message:\t\t\t$mail_no_txt_f\n" :
print "";
$mail_no_subject_f ?
print "Reason: mail without subject:\t\t\t$mail_no_subject_f\n" :
print "";
$mail_too_big ?
print "Reason: mail too big (over $opt_max_mail_size bytes):\t$mail_too_big\n" :
print "";
$mail_duplicates ?
print "Reason: duplicate mail, or in db already:\t$mail_duplicates\n" :
print "";
$mail_fixed ?
print "Reason: mail was an unsubscribe - mail:\t\t$mail_fixed\n" :
print "";
print " ";
print "=" . "=" x length("$ignored") . "=\n";
print "Total number of ignored mails:\t\t\t$ignored\n\n";
}
print "Total number of mails:\t\t\t\t";
print $mail_inserted + $ignored; print $mail_inserted + $ignored;
print "\n"; print " (OK: ";
print "Mails with unsubscribe removed:\t$mail_fixed\n"; print sprintf("%.1f", (($mail_inserted / ($mail_inserted+$ignored)) * 100));
print "% Ignored: ";
print sprintf("%.1f", (($ignored / ($mail_inserted + $ignored)) * 100));
print "%)\n";
exit(0); exit(0);
} }
...@@ -269,7 +291,8 @@ sub process_mail_file ...@@ -269,7 +291,8 @@ sub process_mail_file
$values{$type} .= "\n" . $_; $values{$type} .= "\n" . $_;
$check--; $check--;
} }
elsif (/^From .* \d\d:\d\d:\d\d\s\d\d\d\d$/) elsif (/^From .* \d\d:\d\d:\d\d\s\d\d\d\d$/ ||
/^From .* \d\d\d\d\s\d\d:\d\d:\d\d/)
{ {
$values{'hash'} = checksum("$values{'message'}"); $values{'hash'} = checksum("$values{'message'}");
update_table($dbh, $file_name, \%values); update_table($dbh, $file_name, \%values);
...@@ -338,26 +361,26 @@ sub date_parser ...@@ -338,26 +361,26 @@ sub date_parser
sub update_table sub update_table
{ {
my($dbh, $file_name, $values) = @_; my($dbh, $file_name, $values) = @_;
my($q,$tail,$message); my($q, $tail, $message);
if (!defined($values->{'subject'}) || !defined($values->{'to'})) if (!defined($values->{'subject'}) || !defined($values->{'to'}))
{ {
$mail_no_subject_f++; $mail_no_subject_f++;
return; # Ignore these return; # Ignore these
} }
$message=$values->{'message'}; $message = $values->{'message'};
$message =~ s/^\s*//; #removes whitespaces from the beginning $message =~ s/^\s*//; # removes whitespaces from the beginning
restart: restart:
$message =~ s/[\s\n>]*$//; #removes whitespaces and '>' from the end $message =~ s/[\s\n>]*$//; # removes whitespaces and '>' from the end
$values->{'message'}=$message; $values->{'message'} = $message;
foreach $tail (@remove_tail) foreach $tail (@remove_tail)
{ {
$message =~ s/$tail//; $message =~ s/$tail//;
} }
if ($message ne $values->{'message'}) if ($message ne $values->{'message'})
{ {
$message =~ s/\s*$//; #removes whitespaces from the end $message =~ s/\s*$//; # removes whitespaces from the end
$mail_fixed++; $mail_fixed++;
goto restart; # Some mails may have duplicated messages goto restart; # Some mails may have duplicated messages
} }
...@@ -445,7 +468,7 @@ sub update_table ...@@ -445,7 +468,7 @@ sub update_table
sub checksum sub checksum
{ {
my ($txt)= @_; my ($txt)= @_;
my ($crc,$i,$count); my ($crc, $i, $count);
$count = length($txt); $count = length($txt);
for ($crc = $i = 0; $i < $count ; $i++) for ($crc = $i = 0; $i < $count ; $i++)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment