Commit 9e636731 authored by rich prohaska's avatar rich prohaska

#45 add the tpch database loader tests

parent c88937f1
script to run the queries is called run.atc.ontime.bash.
The script to run the load the air traffic ontime database and run queries against it
is called run.atc.ontime.bas.
queries are in the ft-engine/scripts/atc.ontime directory.
The queries are in the ft-engine/scripts/atc.ontime directory.
data for the ontime database is in the tokutek-mysql-data amazon s3 bucket.
The data for the ontime database is in the amazon s3 bucket called tokutek-mysql-data.
$ s3ls -l tokutek-mysql-data --prefix=atc_On_Time_Performance
2010-06-15T13:07:09.000Z 1073741824 atc_On_Time_Performance.mysql.csv.gz.aa
......@@ -10,3 +11,9 @@ $ s3ls -l tokutek-mysql-data --prefix=atc_On_Time_Performance
2010-06-15T13:09:38.000Z 1073741824 atc_On_Time_Performance.mysql.csv.gz.ac
2010-06-15T13:10:54.000Z 446709742 atc_On_Time_Performance.mysql.csv.gz.ad
2010-06-15T13:11:26.000Z 503 atc_On_Time_Performance.mysql.csv.gz.xml
The raw data is also stored in the amazon s3 bucket called tokutek-mysql-data.
$ s3ls -l tokutek-mysql-data --prefix=atc
#!/usr/bin/env bash
function usage() {
echo "run the TPCH load and compare test"
echo "[--SCALE=$SCALE] [--ENGINE=$ENGINE]"
echo "[--dbgen=$dbgen] [--load=$load] [--check=$check] [--compare=$compare] [--query=$query]"
echo "[--mysqlbuild=$mysqlbuild] [--commit=$commit]"
echo "[--testinstance=$testinstance] [--tokudb_load_save_space=$tokudb_load_save_space]"
}
function retry() {
local cmd
local retries
local exitcode
cmd=$*
let retries=0
while [ $retries -le 10 ] ; do
echo `date` $cmd
bash -c "$cmd"
exitcode=$?
echo `date` $cmd $exitcode $retries
let retries=retries+1
if [ $exitcode -eq 0 ] ; then break; fi
sleep 1
done
test $exitcode = 0
}
SCALE=1
ENGINE=tokudb
TABLES="part partsupp customer lineitem nation orders region supplier"
dbgen=1
load=1
compare=1
query=0
check=1
datadir=/usr/local/mysql/data
mysqlbuild=
commit=0
mysqlserver=`hostname`
mysqluser=`whoami`
mysqlsocket=/tmp/mysql.sock
basedir=$HOME/svn.build
builddir=$basedir/mysql.build
system=`uname -s | tr [:upper:] [:lower:]`
arch=`uname -m | tr [:upper:] [:lower:]`
testinstance=
tokudb_load_save_space=0
svn_server=https://svn.tokutek.com/tokudb
svn_branch=.
svn_revision=HEAD
# parse the command line
while [ $# -gt 0 ] ; do
arg=$1; shift
if [[ $arg =~ --(.*)=(.*) ]] ; then
eval ${BASH_REMATCH[1]}=${BASH_REMATCH[2]}
else
usage; exit 1
fi
done
if [[ $mysqlbuild =~ (.*)-(tokudb\-.*)-(linux)-(x86_64) ]] ; then
mysql=${BASH_REMATCH[1]}
tokudb=${BASH_REMATCH[2]}
system=${BASH_REMATCH[3]}
arch=${BASH_REMATCH[4]}
else
exit 1
fi
dbname=tpch${SCALE}G_${ENGINE}
if [ "$testinstance" != "" ] ; then dbname=${dbname}_${testinstance}; fi
tpchdir=$basedir/tpch${SCALE}G
if [ -d /usr/local/mysql ] ; then
export PATH=/usr/local/mysql/bin:$PATH
fi
if [ -d /usr/local/mysql/lib/mysql ] ; then
export LD_LIBRARY_PATH=/usr/local/mysql/lib/mysql:$PATH
fi
# goto the base directory
if [ ! -d $basedir ] ; then mkdir $basedir; fi
pushd $basedir
# update the build directory
if [ $commit != 0 ] ; then
if [ ! -d $builddir ] ; then mkdir $builddir; fi
date=`date +%Y%m%d`
testresultsdir=$builddir/$date
pushd $builddir
while [ ! -d $date ] ; do
svn mkdir $svn_server/mysql.build/$date -m ""
svn checkout -q $svn_server/mysql.build/$date
if [ $? -ne 0 ] ; then rm -rf $date; fi
done
popd
else
testresultsdir=$PWD
fi
runfile=$testresultsdir/$dbname
if [ $tokudb_load_save_space != 0 ] ; then runfile=$runfile-compress; fi
runfile=$runfile-$mysqlbuild-$mysqlserver
rm -rf $runfile
testresult="PASS"
# maybe get the tpch data from AWS S3
if [ $compare != 0 ] && [ ! -d $tpchdir ] ; then
tpchtarball=tpch${SCALE}G_data_dump.tar
if [ ! -f $tpchtarball ] ; then
echo `date` s3get --bundle tokutek-mysql-data $tpchtarball >>$runfile 2>&1
s3get --verbose --bundle tokutek-mysql-data $tpchtarball >>$runfile 2>&1
exitcode=$?
echo `date` s3get --bundle tokutek-mysql-data $tpchtarball $exitcode >>$runfile 2>&1
if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
fi
if [ $testresult = "PASS" ] ; then
tar xf $tpchtarball
exitcode=$?
echo `date` tar xf $tpchtarball $exitcode >>$runfile 2>&1
if [ $exitcode -ne 0 ] ; then
testresult="FAIL"
else
# gunzip the data files
pushd tpch${SCALE}G/data/tpch${SCALE}G
for f in *.gz ; do
echo `date` gunzip $f >>$runfile 2>&1
gunzip $f
done
ls -l >>$runfile 2>&1
popd
fi
fi
fi
# checkout the tpch scripts
tpchtestdir=tpch-$mysqlbuild
if [ "$testinstance" != "" ] ; then tpchtestdir=${tpchtestdir}_${testinstance}; fi
if [ $testresult = "PASS" ] ; then
rm -rf $tpchtestdir
retry svn export -q -r $svn_revision $svn_server/$svn_branch/tpch $tpchtestdir
exitcode=$?
echo `date` export $svn_server/$svn_branch/tpch $exitcode >>$runfile 2>&1
if [ $exitcode != 0 ] ; then
retry svn export -q -r $svn_revision $svn_server/tpch $tpchtestdir
exitcode=$?
echo `date` export $svn_server/tpch $exitcode >>$runfile 2>&1
fi
if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
fi
# generate the tpch data
if [ $dbgen != 0 -a $testresult = "PASS" ] ; then
pushd $tpchtestdir/dbgen
make
exitcode=$?
echo `date` make dbgen $exitcode >>$runfile 2>&1
if [ $exitcode != 0 ] ; then testresult="FAIL"; fi
popd
if [ $testresult = "PASS" ] ; then
dbgen=0
mkdir -p tpch${SCALE}G/data/tpch${SCALE}G
pushd tpch${SCALE}G/data/tpch${SCALE}G
if [ ! -f lineitem.tbl ] ; then dbgen=1; fi
popd
if [ $dbgen != 0 ] ; then
pushd $tpchtestdir/dbgen
./dbgen -fF -s $SCALE
exitcode=$?
echo `date` dbgen -fF -s $SCALE $exitcode >>$runfile 2>&1
if [ $exitcode != 0 ] ; then
testresult="FAIL"
else
ls -l *.tbl >>$runfile
chmod 0644 *.tbl
ls -l *.tbl >>$runfile
mv *.tbl $basedir/tpch${SCALE}G/data/tpch${SCALE}G
fi
popd
fi
fi
fi
# create the tpch database
if [ $load != 0 -a $testresult = "PASS" ] ; then
echo `date` drop database if exists $dbname >>$runfile
mysql -S $mysqlsocket -u $mysqluser -e "drop database if exists $dbname" >>$runfile 2>&1
exitcode=$?
echo `date` drop database if exists $dbname $exitcode>>$runfile
if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
echo `date` create database $dbname >>$runfile
mysql -S $mysqlsocket -u $mysqluser -e "create database $dbname" >>$runfile 2>&1
exitcode=$?
echo `date` create database $dbname $exitcode >>$runfile
if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
fi
# create the tpch tables
if [ $load != 0 -a $testresult = "PASS" ] ; then
echo `date` create table >>$runfile
mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "source $basedir/tpch-$mysqlbuild/scripts/${ENGINE}_tpch_create_table.sql" >>$runfile 2>&1
exitcode=$?
echo `date` create table $exitcode >>$runfile
if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
fi
# load the data
if [ $load != 0 -a $testresult = "PASS" ] ; then
for tblname in $TABLES ; do
echo `date` load table $tblname >>$runfile
ls -l $tpchdir/data/tpch${SCALE}G/$tblname.tbl >>$runfile
start=$(date +%s)
mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "set session tokudb_load_save_space=$tokudb_load_save_space; load data infile '$tpchdir/data/tpch${SCALE}G/$tblname.tbl' into table $tblname fields terminated by '|'" >>$runfile 2>&1
exitcode=$?
let loadtime=$(date +%s)-$start
echo `date` load table $tblname $exitcode loadtime=$loadtime>>$runfile
if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
done
fi
if [ $check != 0 -a $testresult = "PASS" ] ; then
for tblname in lineitem ; do
echo `date` add clustering index $tblname >>$runfile
start=$(date +%s)
mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "set session tokudb_create_index_online=0;create clustering index i_shipdate on lineitem (l_shipdate)" >>$runfile 2>&1
exitcode=$?
let loadtime=$(date +%s)-$start
echo `date` add clustering index $tblname $exitcode loadtime=$loadtime >>$runfile
if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
done
fi
# check the tables
if [ $check != 0 -a $testresult = "PASS" ] ; then
for tblname in $TABLES ; do
echo `date` check table $tblname >>$runfile
start=$(date +%s)
mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "check table $tblname" >>$runfile 2>&1
exitcode=$?
let checktime=$(date +%s)-$start
echo `date` check table $tblname $exitcode checktime=$checktime >>$runfile
if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
done
fi
if [ $check != 0 -a $testresult = "PASS" ] ; then
for tblname in lineitem ; do
echo `date` drop index $tblname >>$runfile
mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "drop index i_shipdate on lineitem" >>$runfile 2>&1
exitcode=$?
echo `date` drop index $tblname $exitcode >>$runfile
if [ $exitcode -ne 0 ] ; then testresult="FAIL"; fi
done
fi
# compare the data
if [ $compare != 0 -a $testresult = "PASS" ] ; then
if [ -d $tpchdir/dump/tpch${SCALE}G ] ; then
mysql -S $mysqlsocket -u $mysqluser -D $dbname -e "source $basedir/tpch-$mysqlbuild/scripts/dumptpch.sql" >>$runfile 2>&1
exitcode=$?
echo `date` dump data $exitcode >>$runfile
if [ $exitcode -ne 0 ] ; then
testresult="FAIL"
else
# force the permissions on the dumpdir open
pushd $datadir/$dbname
exitcode=$?
if [ $exitcode != 0 ] ; then
sudo chmod g+rwx $datadir
sudo chmod g+rwx $datadir/$dbname
pushd $datadir/$dbname
exitcode=$?
fi
if [ $exitcode = 0 ] ; then
popd
fi
# compare the dump files
dumpdir=$datadir/$dbname
comparedir=$tpchdir/dump/tpch${SCALE}G
for f in $dumpdir/dump* ; do
d=`basename $f`
if [ ! -f $comparedir/$d ] && [ -f $comparedir/$d.gz ] ; then
pushd $comparedir; gunzip $d.gz; popd
fi
if [ -f $comparedir/$d ] ; then
diff -q $dumpdir/$d $comparedir/$d
if [ $? = 0 ] ; then
result="PASS"
else
result="FAIL"
testresult="FAIL"
fi
else
result="MISSING"
testresult="FAIL"
fi
echo `date` $d $result >>$runfile
done
if [ $testresult = "PASS" ] ; then
# remove the dump files
rm -f $datadir/$dbname/dump*
fi
fi
fi
fi
# commit results
if [ $commit != 0 ] ; then
svn add $runfile
retry svn commit -m \"$testresult $dbname $mysqlbuild $mysqlserver compress=$tokudb_load_save_space\" $runfile
fi
popd
if [ $testresult = "PASS" ] ; then exitcode=0; else exitcode=1; fi
exit $exitcode
TPCH is an industry standard data warehouse benchmark. We use TPCH databases to test the TokuDB loader.
The run.tpch.bash script loads a TPCH database at a given scale factor into TokuDB. It then uses the SQL
CHECK TABLE statement to verify the fractal tree invariants. Finally, it dumps the database and compares
with a precomputed dump of the database from InnoDB.
Here are some TPCH databases dumped from InnoDB. These dumps are used to compare with TPCH data loaded
into TokuDB.
$ s3ls tokutek-mysql-data -l --prefix=tpch
2010-08-16T21:21:10.000Z 1073741824 tpch10G_data_dump.tar.0
2010-08-16T21:38:45.000Z 1073741824 tpch10G_data_dump.tar.1
2010-08-16T21:56:43.000Z 1073741824 tpch10G_data_dump.tar.2
2010-08-16T22:14:49.000Z 1073741824 tpch10G_data_dump.tar.3
2010-08-16T22:32:38.000Z 1073741824 tpch10G_data_dump.tar.4
2010-08-16T22:51:04.000Z 1073741824 tpch10G_data_dump.tar.5
2010-08-16T23:08:51.000Z 91262976 tpch10G_data_dump.tar.6
2010-08-16T23:10:21.000Z 654 tpch10G_data_dump.tar.xml
2010-08-12T17:45:09.000Z 633579520 tpch1G_data_dump.tar
2010-08-12T17:56:30.000Z 160 tpch1G_data_dump.tar.xml
2010-08-06T13:57:51.000Z 633610240 tpch1G_data_dump_innodb.tar
2010-08-06T14:07:09.000Z 174 tpch1G_data_dump_innodb.tar.xml
2010-11-28T12:20:58.000Z 886 tpch30G_data_dump.tar.xml
2010-09-14T19:16:30.000Z 1073741824 tpch30G_dump_data.tar.0
2010-09-14T19:40:02.000Z 1073741824 tpch30G_dump_data.tar.1
2010-09-14T20:12:22.000Z 1073741824 tpch30G_dump_data.tar.2
2010-09-14T20:45:23.000Z 1073741824 tpch30G_dump_data.tar.3
2010-09-14T21:14:07.000Z 1073741824 tpch30G_dump_data.tar.4
2010-09-14T21:37:54.000Z 1073741824 tpch30G_dump_data.tar.5
2010-09-14T21:57:02.000Z 1073741824 tpch30G_dump_data.tar.6
2010-09-14T22:16:59.000Z 1073741824 tpch30G_dump_data.tar.7
2010-09-14T22:36:22.000Z 1073741824 tpch30G_dump_data.tar.8
2010-09-14T22:55:25.000Z 382511104 tpch30G_dump_data.tar.9
2010-09-14T23:02:04.000Z 886 tpch30G_dump_data.tar.xml
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment