Commit 120352af authored by unknown's avatar unknown

New versions of mail_to_db.pl and pmail.pl.

Added email threading capabilities to both
programs.


tests/mail_to_db.pl:
  Changes to mail_to_db.pl
  - Removed table name as optional. Future releases may require
    more than one table, after which it's better to have a fixed
    table name.
  - Fixed a bug in report (division by zero error), if table was created,
    but no mails was inserted.
  - Added fields message_id and in_reply_to.
tests/pmail.pl:
  Changed pmail:
  
  New option: --thread. Prints all sub sequent replies in the thread.
  New option: --message_id. Prints message_id and number of replies found.
  Both options are run recursively. That means, not just direct replies to
  the mail found are being searched, but also replies to replies and so on
  until the whole thread has been found.
  
  Clean up: Localized variables and moved code into functions.
parent 20542236
......@@ -17,7 +17,7 @@ use DBI;
use Getopt::Long;
$| = 1;
$VER = "2.6";
$VER = "3.0";
$opt_help = 0;
$opt_version = 0;
......@@ -26,7 +26,6 @@ $opt_host = undef();
$opt_port = undef();
$opt_socket = undef();
$opt_db = "mail";
$opt_table = "mails";
$opt_user = undef();
$opt_password = undef();
$opt_max_mail_size = 65536;
......@@ -97,7 +96,7 @@ sub main
print "the my.cnf file. This command is available from the latest MySQL\n";
print "distribution.\n";
}
GetOptions("help","version","host=s","port=i","socket=s","db=s","table=s",
GetOptions("help","version","host=s","port=i","socket=s","db=s",
"user=s","password=s","max_mail_size=i","create","test",
"no_path","debug","stop_on_error","stdin")
|| die "Wrong option! See $progname --help\n";
......@@ -123,7 +122,6 @@ sub main
|| die "Couldn't connect: $DBI::errstr\n";
die "You must specify the database; use --db=" if (!defined($opt_db));
die "You must specify the table; use --table=" if (!defined($opt_table));
create_table($dbh) if ($opt_create);
......@@ -218,9 +216,9 @@ sub main
print "Total number of mails:\t\t\t\t";
print $mail_inserted + $ignored;
print " (OK: ";
print sprintf("%.1f", (($mail_inserted / ($mail_inserted+$ignored)) * 100));
print sprintf("%.1f", ($mail_inserted + $ignored) ? (($mail_inserted / ($mail_inserted+$ignored)) * 100) : 0.0);
print "% Ignored: ";
print sprintf("%.1f", (($ignored / ($mail_inserted + $ignored)) * 100));
print sprintf("%.1f", ($mail_inserted + $ignored) ? (($ignored / ($mail_inserted + $ignored)) * 100) : 0);
print "%)\n";
print "################################ End Report ##################################\n";
exit(0);
......@@ -232,13 +230,15 @@ sub main
sub create_table
{
my ($dbh) = @_;
my ($dbh)= @_;
my ($sth, $query);
$query = <<EOF;
CREATE TABLE $opt_table
$query= <<EOF;
CREATE TABLE my_mail
(
mail_id MEDIUMINT UNSIGNED NOT NULL auto_increment,
message_id VARCHAR(255),
in_reply_to VARCHAR(255),
date DATETIME NOT NULL,
time_zone VARCHAR(20),
mail_from VARCHAR(120) NOT NULL,
......@@ -250,6 +250,8 @@ CREATE TABLE $opt_table
file VARCHAR(64) NOT NULL,
hash INTEGER NOT NULL,
KEY (mail_id),
KEY (message_id),
KEY (in_reply_to),
PRIMARY KEY (mail_from, date, hash))
TYPE=MyISAM COMMENT=''
EOF
......@@ -277,7 +279,7 @@ sub process_mail_file
chop if (substr($_, -1, 1) eq "\r");
if ($type ne "message")
{
if (/^Reply-To: (.*)/i)
if (/^Reply-To:\s*(.*)/i)
{
$type = "reply";
$values{$type} = $1;
......@@ -302,14 +304,27 @@ sub process_mail_file
$type = "subject";
$values{$type} = $1;
}
elsif (/^Message-Id:\s*(.*)/i)
{
$type = "message_id";
s/^\s*(<.*>)\s*/$1/;
$values{$type} = $1;
}
elsif (/^In-Reply-To:\s*(.*)/i)
{
$type = "in_reply_to";
s/^\s*(<.*>)\s*/$1/;
$values{$type} = $1;
}
elsif (/^Date: (.*)/i)
{
date_parser($1, \%values, $file_name);
$type = "rubbish";
}
elsif (/^[\w\W-]+:\s/)
# Catch those fields that we don't or can't handle (yet)
elsif (/^[\w\W-]+:/)
{
$type = "rubbish";
$type = "rubbish";
}
elsif ($_ eq "")
{
......@@ -319,6 +334,10 @@ sub process_mail_file
else
{
s/^\s*/ /;
if ($type eq 'message_id' || $type eq 'in_reply_to')
{
s/^\s*(<.*>)\s*/$1/;
}
$values{$type} .= $_;
}
}
......@@ -421,8 +440,10 @@ sub update_table
goto restart; # Some mails may have duplicated messages
}
$q = "INSERT INTO $opt_table (";
$q = "INSERT INTO my_mail (";
$q.= "mail_id,";
$q.= "message_id,";
$q.= "in_reply_to,";
$q.= "date,";
$q.= "time_zone,";
$q.= "mail_from,";
......@@ -435,6 +456,12 @@ sub update_table
$q.= "hash";
$q.= ") VALUES (";
$q.= "NULL,";
$q.= (defined($values->{'message_id'}) ?
$dbh->quote($values->{'message_id'}) : "NULL");
$q.= ",";
$q.= (defined($values->{'in_reply_to'}) ?
$dbh->quote($values->{'in_reply_to'}) : "NULL");
$q.= ",";
$q.= "'" . $values->{'date'} . "',";
$q.= (defined($values->{'time_zone'}) ?
$dbh->quote($values->{'time_zone'}) : "NULL");
......@@ -575,7 +602,6 @@ Options:
--port=# TCP/IP port to be used with connection.
--socket=... MySQL UNIX socket to be used with connection.
--db=... Database to be used.
--table=... Table name for mails.
--user=... Username for connecting.
--password=... Password for the user.
--stdin Read mails from stdin.
......
#!/usr/bin/perl
#!/usr/bin/perl -w
#
# Prints mails to standard output
#
......@@ -9,21 +9,25 @@
use DBI;
use Getopt::Long;
$VER="1.5";
$VER="2.0";
@fldnms= ("mail_from","mail_to","cc","date","time_zone","file","sbj","txt");
$fields=8;
@mail= (@from,@to,@cc,@date,@time_zone,@file,@sbj,@txt);
my $fields= 0;
my $base_q= "";
my $mail_count= 0;
$opt_user= $opt_password= "";
$opt_socket= "/tmp/mysql.sock";
$opt_port= 3306;
$opt_db="mail";
$opt_table="mails";
$opt_table="my_mail";
$opt_help=$opt_count=0;
$opt_thread= 0;
$opt_host= "";
$opt_message_id= 0;
GetOptions("help","count","port=i","db=s","table=s","host=s","password=s",
"user=s","socket=s") || usage();
"user=s","socket=s", "thread","message_id") || usage();
if ($opt_host eq '')
{
......@@ -39,81 +43,194 @@ if ($opt_help || !$ARGV[0])
#### Connect and parsing the query to MySQL
####
$dbh= DBI->connect("DBI:mysql:$opt_db:$opt_host:port=$opt_port:mysql_socket=$opt_mysql_socket", $opt_user,$opt_password, { PrintError => 0})
$dbh= DBI->connect("DBI:mysql:$opt_db:$opt_host:port=$opt_port:mysql_socket=$opt_socket", $opt_user,$opt_password, { PrintError => 0})
|| die $DBI::errstr;
if ($opt_count)
{
count_mails();
}
main();
$fields=0;
$query = "select ";
foreach $val (@fldnms)
####
#### main
####
sub main
{
if (!$fields)
my ($row, $val, $q, $mail, $sth);
if ($opt_count)
{
$query.= "$val";
count_mails();
}
else
$base_q= "SELECT ";
foreach $val (@fldnms)
{
if (!$fields)
{
$base_q.= "$val";
}
else
{
$base_q.= ",$val";
}
$fields++;
}
$base_q.= ",message_id" if ($opt_thread || $opt_message_id);
$base_q.= " FROM $opt_table";
$q= " WHERE $ARGV[0]";
$sth= $dbh->prepare($base_q . $q);
if (!$sth->execute)
{
print "$DBI::errstr\n";
$sth->finish;
die;
}
for (; ($row= $sth->fetchrow_arrayref); $mail_count++)
{
$query.= ",$val";
for ($i= 0; $i < $fields; $i++)
{
if ($opt_message_id)
{
$mail[$fields][$mail_count]= $row->[$fields];
$mail[$fields][$mail_count].= "\nNumber of Replies: " . get_nr_replies($row->[$fields]);
}
$mail[$i][$mail_count]= $row->[$i];
}
if ($opt_thread)
{
get_mail_by_message_id($row->[$fields], $mail);
}
}
$fields++;
print_mails($mail);
}
$query.= " from $opt_table where $ARGV[0] order by date desc";
####
#### Send query and save result
#### Function, which fetches mail by searching in-reply-to with
#### a given message_id. Saves the value (mail) in mail variable.
#### Returns the message id of the mail found and searches again
#### and saves, until no more mails are found with that message_id.
####
$sth= $dbh->prepare($query);
if (!$sth->execute)
sub get_mail_by_message_id
{
print "$DBI::errstr\n";
$sth->finish;
die;
}
for ($i=0; ($row= $sth->fetchrow_arrayref); $i++)
{
for ($j=0; $j < $fields; $j++)
my ($message_id, $mail)= @_;
my ($q, $query, $i, $row, $sth);
$q= " WHERE in_reply_to = \"$message_id\"";
$query= $base_q . $q;
$sth= $dbh->prepare($query);
if (!$sth->execute)
{
print "QUERY: $query\n$DBI::errstr\n";
$sth->finish;
die;
}
while (($row= $sth->fetchrow_arrayref))
{
$mail[$j][$i]= $row->[$j];
$mail_count++;
for ($i= 0; $i < $fields; $i++)
{
if ($opt_message_id)
{
$mail[$fields][$mail_count]= $row->[$fields];
$mail[$fields][$mail_count].= "\nNumber of Replies: " . get_nr_replies($row->[$fields]);
}
$mail[$i][$mail_count]= $row->[$i];
}
$new_message_id= $row->[$fields];
if (defined($new_message_id) && length($new_message_id))
{
get_mail_by_message_id($new_message_id, $mail);
}
}
return;
}
####
#### Print to stderr
#### Get number of replies for a given message_id
####
for ($i=0; $mail[0][$i]; $i++)
sub get_nr_replies
{
print "#" x 33;
print " " . ($i+1) . ". Mail ";
print "#" x 33;
print "\nFrom: $mail[0][$i]\n";
print "To: $mail[1][$i]\n";
print "Cc: $mail[2][$i]\n";
print "Date: $mail[3][$i]\n";
print "Timezone: $mail[4][$i]\n";
print "File: $mail[5][$i]\n";
print "Subject: $mail[6][$i]\n";
print "Message:\n$mail[7][$i]\n";
}
print "#" x 20;
print " Summary: ";
if ($i == 1)
{
print "$i Mail ";
print "matches the query ";
my ($message_id)= @_;
my ($sth, $sth2, $q, $row, $row2, $nr_replies);
$nr_replies= 0;
$q= "SELECT COUNT(*) FROM my_mail WHERE in_reply_to=\"$message_id\"";
$sth= $dbh->prepare($q);
if (!$sth->execute)
{
print "QUERY: $q\n$DBI::errstr\n";
$sth->finish;
die;
}
while (($row= $sth->fetchrow_arrayref))
{
if (($nr_replies= $row->[0]))
{
$q= "SELECT message_id FROM my_mail WHERE in_reply_to=\"$message_id\"";
$sth2= $dbh->prepare($q);
if (!$sth2->execute)
{
print "QUERY: $q\n$DBI::errstr\n";
$sth->finish;
die;
}
while (($row2= $sth2->fetchrow_arrayref))
{
# There may be several replies to the same mail. Also the
# replies to the 'parent' mail may contain several replies
# and so on. Thus we need to calculate it recursively.
$nr_replies+= get_nr_replies($row2->[0]);
}
}
return $nr_replies;
}
}
else
####
#### Print mails
####
sub print_mails
{
print "$i Mails ";
print "match the query ";
}
print "#" x 20;
print "\n";
my ($mail)= @_;
my ($i);
for ($i=0; $mail[0][$i]; $i++)
{
print "#" x 33;
print " " . ($i+1) . ". Mail ";
print "#" x 33;
print "\n";
if ($opt_message_id)
{
print "Msg ID: $mail[$fields][$i]\n";
}
print "From: $mail[0][$i]\n";
print "To: $mail[1][$i]\n";
print "Cc:" . (defined($mail[2][$i]) ? $mail[2][$i] : "") . "\n";
print "Date: $mail[3][$i]\n";
print "Timezone: $mail[4][$i]\n";
print "File: $mail[5][$i]\n";
print "Subject: $mail[6][$i]\n";
print "Message:\n$mail[7][$i]\n";
}
print "#" x 20;
print " Summary: ";
if ($i == 1)
{
print "$i Mail ";
print "matches the query ";
}
else
{
print "$i Mails ";
print "match the query ";
}
print "#" x 20;
print "\n";
}
####
#### Count mails that matches the query, but don't show them
......@@ -121,6 +238,8 @@ print "\n";
sub count_mails
{
my ($sth);
$sth= $dbh->prepare("select count(*) from $opt_table where $ARGV[0]");
if (!$sth->execute)
{
......@@ -154,15 +273,21 @@ sub usage
Usage: pmail [options] "SQL where clause"
Options:
--help show this help
--count Shows how many mails matches the query, but not the mails.
--db= database to use (Default: $opt_db)
--table= table to use (Default: $opt_table)
--host= Hostname which to connect (Default: $opt_host)
--socket= Unix socket to be used for connection (Default: $opt_socket)
--password= Password to use for mysql
--user= User to be used for mysql connection, if not current user
--port= mysql port to be used (Default: $opt_port)
--help show this help
--count Shows how many mails matches the query, but not the mails.
--db= database to use (Default: $opt_db)
--host= Hostname which to connect (Default: $opt_host)
--socket= Unix socket to be used for connection (Default: $opt_socket)
--password= Password to use for mysql
--user= User to be used for mysql connection, if not current user
--port= mysql port to be used (Default: $opt_port)
--thread Will search for possible replies to emails found by the search
criteria. Replies, if found, will be displayed right after the
original mail.
--message_id Display message_id on top of each mail. Useful when searching
email threads with --thread. On the second line is the number
of replies to the same thread, starting counting from that
mail (excluding possible parent mails).
"SQL where clause" is the end of the select clause,
where the condition is expressed. The result will
be the mail(s) that matches the condition and
......@@ -176,18 +301,20 @@ sub usage
- Subject
- Message text
The field names that can be used in the where clause are:
Field Type
- mail_from varchar(120)
- date datetime
- sbj varchar(200)
- txt mediumtext
- cc text
- mail_to text
- time_zone varchar(6)
- reply varchar(120)
- file varchar(32)
- hash int(11)
An example of the pmail:
Field Type
- message_id varchar(255) # Use with --thread and --message_id
- in_reply_to varchar(255) # Internally used by --thread
- mail_from varchar(120)
- date datetime
- sbj varchar(200)
- txt mediumtext
- cc text
- mail_to text
- time_zone varchar(6)
- reply varchar(120)
- file varchar(32)
- hash int(11)
An example of pmail:
pmail "txt like '%libmysql.dll%' and sbj like '%delphi%'"
NOTE: the txt field is NOT case sensitive!
EOF
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment