ft benchmark suite: initial checkin

parent 03fbbb7a
......@@ -602,3 +602,6 @@ vio/test-ssl
vio/test-sslclient
vio/test-sslserver
vio/viotest-ssl
myisam/ftbench/var/*
myisam/ftbench/data
myisam/ftbench/t
#!/usr/bin/perl
# compares out-files (as created by Ereport.pl) from dir1/*.out and dir2/*.out
# for each effectiveness column computes the probability of the hypothesis
# "Both files have the same effectiveness"
# sign test is used to verify that test results are statistically
# significant to support the hypothesis. Function is computed on the fly.
# basic formula is \sum_{r=0}^R C_N^r 2^{-N}
# As N can be big, we'll work with logarithms
$log2=log(2);
sub probab {
my $N=shift, $R=shift;
my $r, $sum=0;
for $r (0..$R) {
$sum+=exp(logfac($N)-logfac($r)-logfac($N-$r)-$N*$log2);
}
return $sum;
}
# log(N!)
# for N<20 exact value from the table (below) is taken
# otherwise, Stirling approximation for N! is used
sub logfac {
my $n=shift; die "n=$n<0" if $n<0;
return $logfactab[$n] if $n<=$#logfactab;
return $n*log($n)-$n+log(2*3.14159265358*$n)/2;
}
@logfactab=(
0, 0, 0.693147180559945, 1.79175946922805, 3.17805383034795,
4.78749174278205, 6.57925121201010, 8.52516136106541, 10.6046029027453,
12.8018274800815, 15.1044125730755, 17.5023078458739, 19.9872144956619,
22.5521638531234, 25.1912211827387, 27.8992713838409, 30.6718601060807,
33.5050734501369, 36.3954452080331, 39.3398841871995, 42.3356164607535,
);
############################# main () ###############################
#$p=shift; $m=shift; $p-=$m;
#if($p>$m) {
# print "1 > 2 [+$p-$m]: ", probab($p+$m, $m), "\n";
#} elsif($p<$m) {
# print "1 < 2 [+$p-$m]: ", probab($p+$m, $p), "\n";
#} else {
# print "1 = 2 [+$p-$m]: ", probab($p+$m, $m), "\n";
#}
#exit;
die "Use: $0 dir1 dir2\n" unless @ARGV==2 &&
-d ($dir1=shift) && -d ($dir2=shift);
$_=`cd $dir1; echo *.out`;
s/\.out\b//g;
$total="";
for $file (split) {
open(OUT1,$out1="$dir1/$file.out") || die "Cannot open $out1: $!";
open(OUT2,$out2="$dir2/$file.out") || die "Cannot open $out2: $!";
@p=@m=();
while(!eof(OUT1) || !eof(OUT2)) {
$_=<OUT1>; @l1=split; shift @l1;
$_=<OUT2>; @l2=split; shift @l2;
die "Number of columns differ in line $.\n" unless $#l1 == $#l2;
for (0..$#l1) {
$p[$_]+= $l1[$_] > $l2[$_];
$m[$_]+= $l1[$_] < $l2[$_];
}
}
for (0..$#l1) {
$pp[$_]+=$p[$_]; $mm[$_]+=$m[$_];
$total.=rep($file, ($#l1 ? $_ : undef), $p[$_], $m[$_]);
}
close OUT1;
close OUT2;
}
for (0..$#l1) {
rep($total, ($#l1 ? $_ : undef), $pp[$_], $mm[$_]);
}
sub rep {
my ($test, $n, $p, $m, $c, $r)=@_;
if ($p>$m) { $c=">"; $r="+"; }
elsif($p<$m) { $c="<"; $r="-"; }
else { $c="="; $r="="; }
$n=" $n: " if defined $n;
printf "%-8s $n $dir1 $c $dir2 [+%03d-%03d]: %16.15f\n",
$test, $p, $m, probab($p+$m, ($p>=$m ? $m : $p));
$r;
}
#!/usr/bin/perl
die "Use: $0 eval_output qrels_file\n" unless @ARGV==2;
open(EOUT,$eout=shift) || die "Cannot open $eout: $!";
open(RELJ,$relj=shift) || die "Cannot open $relj: $!";
$_=<EOUT>;
die "$eout must start with a number!\n "unless /^[1-9][0-9]*\n/;
$ndocs=$_+0;
$qid=0;
$relj_str=<RELJ>;
$eout_str=<EOUT>;
while(!eof(RELJ) || !eof(EOUT)) {
++$qid;
%dq=();
$A=$B=$AB=0;
$Ravg=$Pavg=0;
while($relj_str =~ /^0*$qid\s+(\d+)/) {
++$A;
$dq{$1+0}=1;
last unless $relj_str=<RELJ>;
}
# Favg measure = 1/(a/Pavg+(1-a)/Ravg)
sub Favg { my $a=shift; $Pavg*$Ravg ? 1/($a/$Pavg+(1-$a)/$Ravg) : 0; }
# F0 : a=0 -- ignore precision
# F5 : a=0.5
# F1 : a=1 -- ignore recall
while($eout_str =~ /^$qid\s+(\d+)\s+(\d+\.\d+)/) {
$B++;
$AB++ if $dq{$1+0};
$Ravg+=$AB;
$Pavg+=$AB/$B;
last unless $eout_str=<EOUT>;
}
next unless $A;
$Ravg/=$B*$A if $B;
$Pavg/=$B if $B;
write;
format=
@##### @#.#######
$qid, Favg(0.5)
.
}
exit 0;
#!/bin/sh -x
if [ ! -x ./ft-test-run.sh ] ; then
echo "Usage: ./ft-test-run.sh"
exit 1
fi
BASE=`pwd`
DATA=$BASE/var
ROOT=`cd ../..; pwd`
MYSQLD=$ROOT/sql/mysqld
MYSQL=$ROOT/client/mysql
MYSQLADMIN=$ROOT/client/mysqladmin
SOCK=$DATA/mysql.sock
PID=$DATA/mysql.pid
H=../ftdefs.h
OPTS="--no-defaults --socket=$SOCK --character-sets-dir=$ROOT/sql/share/charsets"
# --ft_min_word_len=#
# --ft_max_word_len=#
# --ft_max_word_len_for_sort=#
# --ft_stopword_file=name
# --key_buffer_size=#
stop_myslqd()
{
[ -S $SOCK ] && $MYSQLADMIN $OPTS shutdown
[ -f $PID ] && kill `cat $PID` && sleep 15 && [ -f $PID ] && kill -9 `cat $PID`
}
if [ ! -d t/BEST ] ; then
echo "No ./t/BEST directory! Aborting..."
exit 1
fi
rm -f t/BEST/report.txt
if [ -w $H ] ; then
echo "$H is writeable! Aborting..."
exit 1
fi
for batch in t/BEST t/* ; do
A=`ls $batch/*.out`
[ ! -d $batch -o -n "$A" ] && continue
rm -f $H
ln -s $BASE/$batch/ftdefs.h $H
touch $H
OPTS="--defaults-file=$BASE/$batch/my.cnf --socket=$SOCK --character-sets-dir=$ROOT/sql/share/charsets"
stop_myslqd
rm $MYSQLD
(cd $ROOT; gmake)
for prog in $MYSQLD $MYSQL $MYSQLADMIN ; do
if [ ! -x $prog ] ; then
echo "No $prog"
exit 1
fi
done
rm -rf var 2>&1 >/dev/null
mkdir var
mkdir var/test
$MYSQLD $OPTS --basedir=$BASE --skip-bdb --pid-file=$PID \
--language=$ROOT/sql/share/english \
--skip-grant-tables --skip-innodb \
--skip-networking --tmpdir=$DATA &
$MYSQLADMIN $OPTS --connect_timeout=60 ping
if [ $? != 0 ] ; then
echo "$MYSQLD refused to start"
exit 1
fi
for test in `cd data; echo *.test|sed "s/\.test\>//g"` ; do
$MYSQL $OPTS --skip-column-names test <data/$test.test >var/$test.eval
./Ereport.pl var/$test.eval data/$test.relj > $batch/$test.out || exit
done
stop_myslqd
rm -f $H
[ $batch -ef t/BEST ] || ./Ecompare.pl t/BEST $batch >> t/BEST/report.txt
done
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment