run.stress-tests.py 27 KB
Newer Older
1
#!/usr/bin/env python
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
"""
A script for running our stress tests repeatedly to see if any fail.

Runs a list of stress tests in parallel, reporting passes and collecting
failure scenarios until killed.  Runs with different table sizes,
cachetable sizes, and numbers of threads.

Suitable for running on a dev branch, or a release branch, or main.

Just run the script from within a branch you want to test.

By default, we stop everything, update from svn, rebuild, and restart the
tests once a day.
"""

17 18
import logging
import os
19
import re
20 21 22
import sys
import time

23
from glob import glob
24
from logging import debug, info, warning, error, exception
25
from optparse import OptionGroup, OptionParser
26
from Queue import Queue
27
from random import randrange, shuffle
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55
from resource import setrlimit, RLIMIT_CORE
from shutil import copy, copytree, move, rmtree
from signal import signal, SIGHUP, SIGINT, SIGPIPE, SIGALRM, SIGTERM
from subprocess import call, Popen, PIPE, STDOUT
from tempfile import mkdtemp, mkstemp
from threading import Event, Thread, Timer

__version__   = '$Id$'
__copyright__ = """Copyright (c) 2007-2012 Tokutek Inc.  All rights reserved.

                The technology is licensed by the Massachusetts Institute
                of Technology, Rutgers State University of New Jersey, and
                the Research Foundation of State University of New York at
                Stony Brook under United States of America Serial
                No. 11/760379 and to the patents and/or patent
                applications resulting from it."""

def setlimits():
    setrlimit(RLIMIT_CORE, (-1, -1))
    os.nice(7)

class TestFailure(Exception):
    pass

class Killed(Exception):
    pass

class TestRunnerBase(object):
56
    def __init__(self, scheduler, builddir, installdir, rev, jemalloc, execf, tsize, csize, test_time, savedir):
57
        self.scheduler = scheduler
58 59
        self.builddir = builddir
        self.installdir = installdir
60 61 62 63 64 65
        self.rev = rev
        self.execf = execf
        self.tsize = tsize
        self.csize = csize
        self.test_time = test_time
        self.savedir = savedir
66

67
        self.env = os.environ
68
        libpath = os.path.join(self.installdir, 'lib')
69 70 71 72 73 74 75 76 77 78 79
        if 'LD_LIBRARY_PATH' in self.env:
            self.env['LD_LIBRARY_PATH'] = '%s:%s' % (libpath, self.env['LD_LIBRARY_PATH'])
        else:
            self.env['LD_LIBRARY_PATH'] = libpath

        if jemalloc is not None and len(jemalloc) > 0:
            preload = os.path.normpath(jemalloc)
            if 'LD_PRELOAD' in self.env:
                self.env['LD_PRELOAD'] = '%s:%s' % (preload, self.env['LD_PRELOAD'])
            else:
                self.env['LD_PRELOAD'] = preload
80 81

        self.nruns = 0
82
        self.rundir = None
83 84
        self.outf = None
        self.times = [0, 0]
85
        self.is_large = (tsize >= 10000000)
86
        self.oldversionstr = 'noupgrade'
87 88

    def __str__(self):
89 90
        return (self.__class__.__name__ +
                '<%(execf)s, %(tsize)d, %(csize)d, %(oldversionstr)s>') % self
91

Leif Walsh's avatar
Leif Walsh committed
92
    def __getitem__(self, k):
93
        return self.__getattribute__(k)
Leif Walsh's avatar
Leif Walsh committed
94

95 96 97 98 99
    def infostr(self):
        return '\t'.join(['%(execf)s',
                          '%(rev)s',
                          '%(tsize)d',
                          '%(csize)d',
100
                          '%(oldversionstr)s',
101 102 103 104
                          '%(num_ptquery)d',
                          '%(num_update)d',
                          '%(time)d']) % self

105 106 107 108 109 110
    @property
    def time(self):
        if self.times[0] != 0 and self.times[1] != 0:
            return self.times[1] - self.times[0]
        else:
            return 0
111

112 113
    @property
    def num_ptquery(self):
114
        if self.nruns % 2 < 1:
115
            return 1
116
        else:
117 118 119 120
            return randrange(16)

    @property
    def num_update(self):
121
        if self.nruns % 4 < 2:
122
            return 1
123
        else:
124 125
            return randrange(16)

126 127 128 129
    @property
    def envdir(self):
        return os.path.join(self.rundir, 'envdir')

130 131 132
    @property
    def prepareloc(self):
        preparename = 'dir.%(execf)s-%(tsize)d-%(csize)d' % self
133
        return os.path.join(self.builddir, 'src', 'tests', preparename)
134

135 136 137
    def prepare(self):
        if os.path.isdir(self.prepareloc):
            debug('%s found existing environment.', self)
138
            copytree(self.prepareloc, self.envdir)
139 140 141
        else:
            debug('%s preparing an environment.', self)
            self.run_prepare()
142 143 144 145 146
            self.save_prepared_envdir()

    def save_prepared_envdir(self):
        debug('%s copying environment to %s.', self, self.prepareloc)
        copytree(self.envdir, self.prepareloc)
147 148

    def run(self):
149
        srctests = os.path.join(self.builddir, 'src', 'tests')
150
        self.rundir = mkdtemp(dir=srctests)
151 152

        try:
153 154 155
            outname = os.path.join(self.rundir, 'output.txt')
            self.outf = open(outname, 'w')

156
            try:
157
                self.prepare()
158
                debug('%s testing.', self)
159
                self.times[0] = time.time()
160
                self.run_test()
161
                self.times[1] = time.time()
162
                debug('%s done.', self)
163 164 165 166
            except Killed:
                pass
            except TestFailure:
                savedir = self.save()
167
                self.scheduler.report_failure(self)
168
                warning('Saved environment to %s', savedir)
169
            else:
170
                self.scheduler.report_success(self)
171
        finally:
172 173
            self.outf.close()
            rmtree(self.rundir)
174
            self.rundir = None
175
            self.times = [0, 0]
176 177 178
            self.nruns += 1

    def save(self):
179
        savepfx = '%(execf)s-%(rev)s-%(tsize)d-%(csize)d-%(num_ptquery)d-%(num_update)d-%(phase)s-' % self
180 181 182
        savedir = mkdtemp(dir=self.savedir, prefix=savepfx)
        def targetfor(path):
            return os.path.join(savedir, os.path.basename(path))
183

184 185 186 187 188
        for f in glob(os.path.join(self.rundir, '*')):
            if os.path.isdir(f):
                copytree(f, targetfor(f))
            else:
                copy(f, targetfor(f))
189
        fullexecf = os.path.join(self.builddir, 'src', 'tests', self.execf)
190
        copy(fullexecf, targetfor(fullexecf))
191
        for lib in glob(os.path.join(self.installdir, 'lib', '*.so')):
192 193
            copy(lib, targetfor(lib))

194
        return savedir
195 196 197 198 199 200 201 202

    def waitfor(self, proc):
        while proc.poll() is None:
            self.scheduler.stopping.wait(1)
            if self.scheduler.stopping.isSet():
                os.kill(proc.pid, SIGTERM)
                raise Killed()

203
    def spawn_child(self, args):
204
        logging.debug('%s spawning %s', self, ' '.join([self.execf] + args))
205 206 207
        commandsf = open(os.path.join(self.rundir, 'commands.txt'), 'a')
        print >>commandsf, ' '.join([self.execf] + args)
        commandsf.close()
208
        proc = Popen([self.execf] + args,
209 210 211 212
                     executable=os.path.join('..', self.execf),
                     env=self.env,
                     cwd=self.rundir,
                     preexec_fn=setlimits,
213
                     stdout=self.outf,
214 215 216 217
                     stderr=STDOUT)
        self.waitfor(proc)
        return proc.returncode

218 219 220 221 222 223
    @property
    def extraargs(self):
        # for overriding
        return []

    @property
224 225
    def prepareargs(self):
        return ['-v',
226
                '--envdir', 'envdir',
227
                '--num_elements', str(self.tsize),
228
                '--cachetable_size', str(self.csize)] + self.extraargs
229

230
    @property
231 232
    def testargs(self):
        return ['--num_seconds', str(self.test_time),
233
                '--no-crash_on_operation_failure',
234
                '--num_ptquery_threads', str(self.num_ptquery),
235
                '--num_update_threads', str(self.num_update)] + self.prepareargs
236

237
class TestRunner(TestRunnerBase):
238
    def run_prepare(self):
239
        self.phase = "create"
240
        if self.spawn_child(['--only_create'] + self.prepareargs) != 0:
241 242 243 244
            raise TestFailure('%s crashed during --only_create.' % self.execf)

    def run_test(self):
        self.phase = "stress"
245
        if self.spawn_child(['--only_stress'] + self.testargs) != 0:
246 247 248
            raise TestFailure('%s crashed during --only_stress.' % self.execf)

class RecoverTestRunner(TestRunnerBase):
249 250
    def run_prepare(self):
        self.phase = "create"
251
        if self.spawn_child(['--only_create', '--test'] + self.prepareargs) != 0:
252
            raise TestFailure('%s crashed during --only_create --test.' % self.execf)
253 254

    def run_test(self):
255
        self.phase = "test"
256
        if self.spawn_child(['--only_stress', '--test'] + self.testargs) == 0:
257
            raise TestFailure('%s did not crash during --only_stress --test' % self.execf)
258
        self.phase = "recover"
259
        if self.spawn_child(['--recover'] + self.prepareargs) != 0:
260 261
            raise TestFailure('%s crashed during --recover' % self.execf)

262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
class UpgradeTestRunnerMixin(TestRunnerBase):
    def __init__(self, old_environments_dir, version, pristine_or_stressed, **kwargs):
        super(UpgradeTestRunnerMixin, self).__init__(**kwargs)
        self.version = version
        self.pristine_or_stressed = pristine_or_stressed
        self.old_env_dirs = os.path.join(old_environments_dir, version)
        self.oldversionstr = '%(version)s-%(pristine_or_stressed)s' % self

    @property
    def extraargs(self):
        return ['--num_DBs', '1']

    @property
    def old_envdir(self):
        oldname = 'saved%(pristine_or_stressed)s-%(tsize)d-dir' % self
        logging.debug('%s using old version environment %s from %s.', self, oldname, self.old_env_dirs)
        return os.path.join(self.old_env_dirs, oldname)

    def save_prepared_envdir(self):
        # no need to do this
        pass

    def run_prepare(self):
        self.phase = "create"
        copytree(self.old_envdir, self.envdir)

class DoubleTestRunnerMixin(TestRunnerBase):
    """Runs the test phase twice in a row.

    Good for upgrade tests, to run the test once to upgrade it and then
    again to make sure the upgrade left it in a good state.
    """

    def run_test(self):
        super(DoubleTestRunnerMixin, self).run_test()
        super(DoubleTestRunnerMixin, self).run_test()

class UpgradeTestRunner(UpgradeTestRunnerMixin, TestRunner):
    pass

class UpgradeRecoverTestRunner(UpgradeTestRunnerMixin, RecoverTestRunner):
    pass

class DoubleUpgradeTestRunner(DoubleTestRunnerMixin, UpgradeTestRunner):
    pass

class DoubleUpgradeRecoverTestRunner(DoubleTestRunnerMixin, UpgradeRecoverTestRunner):
    pass

311 312 313 314 315 316
class Worker(Thread):
    def __init__(self, scheduler):
        super(Worker, self).__init__()
        self.scheduler = scheduler

    def run(self):
317
        debug('%s starting.' % self)
318 319
        while not self.scheduler.stopping.isSet():
            test_runner = self.scheduler.get()
320 321 322 323 324 325 326
            if test_runner.is_large:
                if self.scheduler.nlarge + 1 > self.scheduler.maxlarge:
                    debug('%s pulled a large test, but there are already %d running.  Putting it back.',
                          self, self.scheduler.nlarge)
                    self.scheduler.put(test_runner)
                    continue
                self.scheduler.nlarge += 1
327 328 329
            try:
                test_runner.run()
            except Exception, e:
330 331
                exception('Fatal error in worker thread.')
                info('Killing all workers.')
332 333
                self.scheduler.error = e
                self.scheduler.stop()
334 335
            if test_runner.is_large:
                self.scheduler.nlarge -= 1
336 337
            if not self.scheduler.stopping.isSet():
                self.scheduler.put(test_runner)
338
        debug('%s exiting.' % self)
339 340

class Scheduler(Queue):
341
    def __init__(self, nworkers, maxlarge, logger):
342
        Queue.__init__(self)
343
        info('Initializing scheduler with %d jobs.', nworkers)
344
        self.nworkers = nworkers
345 346 347
        self.logger = logger
        self.maxlarge = maxlarge
        self.nlarge = 0  # not thread safe, don't really care right now
348 349 350 351 352 353 354 355
        self.passed = 0
        self.failed = 0
        self.workers = []
        self.stopping = Event()
        self.timer = None
        self.error = None

    def run(self, timeout):
356
        info('Starting workers.')
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
        self.stopping.clear()
        for i in range(self.nworkers):
            w = Worker(self)
            self.workers.append(w)
            w.start()
        if timeout != 0:
            self.timer = Timer(timeout, self.stop)
            self.timer.start()
        while not self.stopping.isSet():
            try:
                for w in self.workers:
                    if self.stopping.isSet():
                        break
                    w.join(timeout=1.0)
            except (KeyboardInterrupt, SystemExit):
372
                debug('Scheduler interrupted.  Stopping and joining threads.')
373 374 375 376
                self.stop()
                self.join()
                sys.exit(0)
        else:
377
            debug('Scheduler stopped by someone else.  Joining threads.')
378 379 380 381 382 383 384 385 386
            self.join()

    def join(self):
        if self.timer is not None:
            self.timer.cancel()
        while len(self.workers) > 0:
            self.workers.pop().join()

    def stop(self):
387
        info('Stopping workers.')
388 389
        self.stopping.set()

390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
    def __getitem__(self, k):
        return self.__dict__[k]

    def reportstr(self):
        return '[PASS=%(passed)d FAIL=%(failed)d]' % self

    def report_success(self, runner):
        self.passed += 1
        self.logger.info('PASSED %s', runner.infostr())
        info('%s PASSED %s', self.reportstr(), runner.infostr())

    def report_failure(self, runner):
        self.failed += 1
        self.logger.warning('FAILED %s', runner.infostr())
        warning('%s FAILED %s', self.reportstr(), runner.infostr())

406 407 408 409 410 411 412 413 414 415
def compiler_works(cc):
    try:
        devnull = open(os.devnull, 'w')
        r = call([cc, '-v'], stdout=devnull, stderr=STDOUT)
        devnull.close()
        return r == 0
    except OSError:
        exception('Error running %s.', cc)
        return False

416
def rebuild(tokudb, builddir, installdir, cc, tests):
417 418 419 420 421 422
    info('Updating from svn.')
    devnull = open(os.devnull, 'w')
    call(['svn', 'up'], stdout=devnull, stderr=STDOUT, cwd=tokudb)
    devnull.close()
    if not compiler_works(cc):
        error('Cannot find working compiler named "%s".  Try sourcing the icc env script or providing another compiler with --cc.', cc)
423
        sys.exit(2)
424 425 426 427
    if cc == 'icc':
        iccstr = 'ON'
    else:
        iccstr = 'OFF'
428
    info('Building tokudb.')
429 430 431 432
    if not os.path.exists(builddir):
        os.mkdir(builddir)
    r = call(['cmake',
              '-DCMAKE_BUILD_TYPE=Debug',
433
              '-DINTEL_CC=%s' % iccstr,
434
              '-DCMAKE_INSTALL_DIR=%s' % installdir,
435
              tokudb],
436 437
             cwd=builddir)
    r = call(['make', '-s'] + tests, cwd=builddir)
438
    if r != 0:
439
        error('Building the tests failed.')
440 441 442 443 444 445 446
        sys.exit(r)

def revfor(tokudb):
    proc = Popen("svn info | awk '/Revision/ {print $2}'",
                 shell=True, cwd=tokudb, stdout=PIPE)
    (out, err) = proc.communicate()
    rev = out.strip()
447
    info('Using tokudb at r%s.', rev)
448 449 450
    return rev

def main(opts):
451 452
    builddir = os.path.join(opts.tokudb, 'build')
    installdir = os.path.join(opts.tokudb, 'install')
453
    if opts.build:
454
        rebuild(opts.tokudb, builddir, installdir, opts.cc, opts.testnames + opts.recover_testnames)
455
    rev = revfor(opts.tokudb)
456

457 458
    if not os.path.exists(opts.savedir):
        os.mkdir(opts.savedir)
459 460 461 462 463 464

    logger = logging.getLogger('stress')
    logger.propagate = False
    logger.setLevel(logging.INFO)
    logger.addHandler(logging.FileHandler(opts.log))

465 466
    info('Saving pass/fail logs to %s.', opts.log)
    info('Saving failure environments to %s.', opts.savedir)
467

468
    scheduler = Scheduler(opts.jobs, opts.maxlarge, logger)
469 470 471 472

    runners = []
    for tsize in [2000, 200000, 50000000]:
        for csize in [50 * tsize, 1000 ** 3]:
473 474
            kwargs = {
                'scheduler': scheduler,
475 476
                'builddir': builddir,
                'installdir': installdir,
477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
                'rev': rev,
                'jemalloc': opts.jemalloc,
                'tsize': tsize,
                'csize': csize,
                'test_time': opts.test_time,
                'savedir': opts.savedir
                }
            for test in opts.testnames:
                if opts.run_non_upgrade:
                    runners.append(TestRunner(execf=test, **kwargs))

                if opts.run_upgrade:
                    for version in opts.old_versions:
                        for pristine_or_stressed in ['pristine', 'stressed']:
                            upgrade_kwargs = {
                                'old_environments_dir': opts.old_environments_dir,
                                'version': version,
                                'pristine_or_stressed': pristine_or_stressed
                                }
                            upgrade_kwargs.update(kwargs)
                            if opts.double_upgrade:
                                runners.append(DoubleUpgradeTestRunner(
                                        execf=test,
                                        **upgrade_kwargs))
                            else:
                                runners.append(UpgradeTestRunner(
                                        execf=test,
                                        **upgrade_kwargs))

            for test in opts.recover_testnames:
                if opts.run_non_upgrade:
                    runners.append(RecoverTestRunner(execf=test, **kwargs))

                if opts.run_upgrade:
                    for version in opts.old_versions:
                        for pristine_or_stressed in ['pristine', 'stressed']:
                            upgrade_kwargs = {
                                'old_environments_dir': opts.old_environments_dir,
                                'version': version,
                                'pristine_or_stressed': pristine_or_stressed
                                }
                            upgrade_kwargs.update(kwargs)
                            if opts.double_upgrade:
                                runners.append(DoubleUpgradeRecoverTestRunner(
                                        execf=test,
                                        **upgrade_kwargs))
                            else:
                                runners.append(UpgradeRecoverTestRunner(
                                        execf=test,
                                        **upgrade_kwargs))
527

528 529
    shuffle(runners)

530 531 532 533 534 535 536
    for runner in runners:
        scheduler.put(runner)

    try:
        while scheduler.error is None:
            scheduler.run(opts.rebuild_period)
            if scheduler.error is not None:
537
                error('Scheduler reported an error.')
538
                raise scheduler.error
539
            rebuild(opts.tokudb, builddir, installdir, opts.cc, opts.testnames + opts.recover_testnames)
540 541 542 543 544
            rev = revfor(opts.tokudb)
            for runner in runners:
                runner.rev = rev
    except (KeyboardInterrupt, SystemExit):
        sys.exit(0)
545 546 547
    except Exception, e:
        exception('Unhandled exception caught in main.')
        raise e
548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605

# relpath implementation for python <2.6
# from http://unittest-ext.googlecode.com/hg-history/1df911640f7be239e58fb185b06ac2a8489dcdc4/unittest2/unittest2/compatibility.py
if not hasattr(os.path, 'relpath'):
    if os.path is sys.modules.get('ntpath'):
        def relpath(path, start=os.path.curdir):
            """Return a relative version of a path"""

            if not path:
                raise ValueError("no path specified")
            start_list = os.path.abspath(start).split(os.path.sep)
            path_list = os.path.abspath(path).split(os.path.sep)
            if start_list[0].lower() != path_list[0].lower():
                unc_path, rest = os.path.splitunc(path)
                unc_start, rest = os.path.splitunc(start)
                if bool(unc_path) ^ bool(unc_start):
                    raise ValueError("Cannot mix UNC and non-UNC paths (%s and %s)"
                                                                        % (path, start))
                else:
                    raise ValueError("path is on drive %s, start on drive %s"
                                                        % (path_list[0], start_list[0]))
            # Work out how much of the filepath is shared by start and path.
            for i in range(min(len(start_list), len(path_list))):
                if start_list[i].lower() != path_list[i].lower():
                    break
            else:
                i += 1

            rel_list = [os.path.pardir] * (len(start_list)-i) + path_list[i:]
            if not rel_list:
                return os.path.curdir
            return os.path.join(*rel_list)

    else:
        # default to posixpath definition
        def relpath(path, start=os.path.curdir):
            """Return a relative version of a path"""

            if not path:
                raise ValueError("no path specified")

            start_list = os.path.abspath(start).split(os.path.sep)
            path_list = os.path.abspath(path).split(os.path.sep)

            # Work out how much of the filepath is shared by start and path.
            i = len(os.path.commonprefix([start_list, path_list]))

            rel_list = [os.path.pardir] * (len(start_list)-i) + path_list[i:]
            if not rel_list:
                return os.path.curdir
            return os.path.join(*rel_list)

    os.path.relpath = relpath

if __name__ == '__main__':
    a0 = os.path.abspath(sys.argv[0])
    usage = '%prog [options]\n' + __doc__
    parser = OptionParser(usage=usage)
606 607
    parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, help='show build status, passing tests, and other info')
    parser.add_option('-d', '--debug', action='store_true', dest='debug', default=False, help='show debugging info')
608 609 610 611 612 613
    parser.add_option('-l', '--log', type='string', dest='log',
                      default='/tmp/run.stress-tests.log',
                      help='where to save logfiles')
    parser.add_option('-s', '--savedir', type='string', dest='savedir',
                      default='/tmp/run.stress-tests.failures',
                      help='where to save environments and extra data for failed tests')
614
    default_toplevel = os.path.dirname(os.path.dirname(a0))
615 616
    parser.add_option('--tokudb', type='string', dest='tokudb',
                      default=default_toplevel,
617
                      help=('top of the tokudb tree (contains ft/ and src/) [default=%s]' % os.path.relpath(default_toplevel)))
618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660

    test_group = OptionGroup(parser, 'Scheduler Options', 'Control how the scheduler runs jobs.')
    test_group.add_option('-t', '--test_time', type='int', dest='test_time',
                          default=600,
                          help='time to run each test, in seconds [default=600]'),
    test_group.add_option('-j', '--jobs', type='int', dest='jobs', default=8,
                          help='how many concurrent tests to run [default=8]')
    test_group.add_option('--maxlarge', type='int', dest='maxlarge', default=2,
                          help='maximum number of large tests to run concurrently (helps prevent swapping) [default=2]')
    parser.add_option_group(test_group)


    default_testnames = ['test_stress1.tdb',
                         'test_stress5.tdb',
                         'test_stress6.tdb']
    default_recover_testnames = ['recover-test_stress1.tdb',
                                 'recover-test_stress2.tdb',
                                 'recover-test_stress3.tdb']
    build_group = OptionGroup(parser, 'Build Options', 'Control how the fractal tree and tests get built.')
    build_group.add_option('--skip_build', action='store_false', dest='build', default=True,
                           help='skip the svn up and build phase before testing [default=False]')
    build_group.add_option('--rebuild_period', type='int', dest='rebuild_period', default=60 * 60 * 24,
                           help='how many seconds between doing an svn up and rebuild, 0 means never rebuild [default=24 hours]')
    build_group.add_option('--cc', type='string', dest='cc', default='icc',
                           help='which compiler to use [default=icc]')
    build_group.add_option('--jemalloc', type='string', dest='jemalloc',
                           help='a libjemalloc.so to put in LD_PRELOAD when running tests')
    build_group.add_option('--add_test', action='append', type='string', dest='testnames', default=default_testnames,
                           help=('add a stress test to run [default=%r]' % default_testnames))
    build_group.add_option('--add_recover_test', action='append', type='string', dest='recover_testnames', default=default_recover_testnames,
                           help=('add a recover stress test to run [default=%r]' % default_recover_testnames))
    parser.add_option_group(build_group)

    upgrade_group = OptionGroup(parser, 'Upgrade Options', 'Also run on environments from old versions of tokudb.')
    upgrade_group.add_option('--run_upgrade', action='store_true', dest='run_upgrade', default=False,
                             help='run the tests on old dictionaries as well, to test upgrade [default=False]')
    upgrade_group.add_option('--skip_non_upgrade', action='store_false', dest='run_non_upgrade', default=True,
                             help="skip the tests that don't involve upgrade [default=False]")
    upgrade_group.add_option('--double_upgrade', action='store_true', dest='double_upgrade', default=False,
                             help='run the upgrade tests twice in a row [default=False]')
    upgrade_group.add_option('--add_old_version', action='append', type='choice', dest='old_versions', choices=['4.2.0', '5.0.8', '5.2.7'],
                             help='which old versions to use for running the stress tests in upgrade mode. can be specified multiple times [options=4.2.0, 5.0.8, 5.2.7]')
    upgrade_group.add_option('--old_environments_dir', type='string', dest='old_environments_dir',
661
                             default='../../tokudb.data/old-stress-test-envs',
662 663 664
                             help='directory containing old version environments (should contain 5.0.8/, 5.2.7/, etc, and the environments should be in those) [default=../../tokudb.data/stress_environments]')
    parser.add_option_group(upgrade_group)

665 666
    (opts, args) = parser.parse_args()
    if len(args) > 0:
667 668 669 670 671 672 673 674 675 676 677
        parser.error('Invalid arguments: %r' % args)

    if opts.run_upgrade:
        if not os.path.isdir(opts.old_environments_dir):
            parser.error('You specified --run_upgrade but did not specify an --old_environments_dir that exists.')
        if len(opts.old_versions) < 1:
            parser.error('You specified --run_upgrade but gave no --old_versions to run against.')
        for version in opts.old_versions:
            version_dir = os.path.join(opts.old_environments_dir, version)
            if not os.path.isdir(version_dir):
                parser.error('You specified --run_upgrade but %s is not a directory.' % version_dir)
678 679 680 681 682 683 684 685

    if opts.debug:
        logging.basicConfig(level=logging.DEBUG)
    elif opts.verbose:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARNING)

686
    main(opts)