scanscan.c 16.2 KB
Newer Older
1
/* Scan the bench.tokudb/bench.db over and over. */
2
#define DONT_DEPRECATE_MALLOC
3

4
#include <toku_portability.h>
5
#include "tokudb_common_funcs.h"
6
#include <toku_assert.h>
7 8 9
#include <db.h>
#include <errno.h>
#include <stdlib.h>
10
#include <string.h>
11
#include <fcntl.h>
12
#include <unistd.h>
13 14
#ifdef TOKUDB
#include "key.h"
15 16
#include "cachetable.h"
#include "trace_mem.h"
17
#endif
18

19
const char *pname;
20
enum run_mode { RUN_HWC, RUN_LWC, RUN_VERIFY, RUN_RANGE, RUN_FLATTEN} run_mode = RUN_HWC;
21 22
int do_txns=1, prelock=0, prelockflag=0;
u_int32_t lock_flag = 0;
23
long limitcount=-1;
24
u_int32_t cachesize = 127*1024*1024;
25
static int do_mysql = 0;
26 27
static u_int64_t start_range = 0, end_range = 0;
static int n_experiments = 2;
28

29
static int verbose = 0;
30
static const char *log_dir = NULL;
31

32

33
static int print_usage (const char *argv0) {
34
    fprintf(stderr, "Usage:\n%s [--verify-lwc | --lwc | --nohwc] [--prelock] [--prelockflag] [--prelockwriteflag] [--env DIR] [--verbose]\n", argv0);
35
    fprintf(stderr, "  --verify-lwc        means to run the light weight cursor and the heavyweight cursor to verify that they get the same answer.\n");
36
    fprintf(stderr, "  --flatten           Flatten only using special flatten function\n");
37 38 39 40 41 42 43 44 45
    fprintf(stderr, "  --lwc               run light weight cursors instead of heavy weight cursors\n");
    fprintf(stderr, "  --prelock           acquire a read lock on the entire table before running\n");
    fprintf(stderr, "  --prelockflag       pass DB_PRELOCKED to the the cursor get operation whenever the locks have been acquired\n");
    fprintf(stderr, "  --prelockwriteflag  pass DB_PRELOCKED_WRITE to the cursor get operation\n");
    fprintf(stderr, "  --nox               no transactions (no locking)\n");
    fprintf(stderr, "  --count <count>     read the first COUNT rows and then  stop.\n");
    fprintf(stderr, "  --cachesize <n>     set the env cachesize to <n>\n");
    fprintf(stderr, "  --mysql             compare keys that are mysql big int not null types\n");
    fprintf(stderr, "  --env DIR           put db files in DIR instead of default\n");
46
    fprintf(stderr, "  --log_dir LOGDIR    put the logs in LOGDIR\n");
47 48 49
    fprintf(stderr, "  --range LOW HIGH    set the LOW and HIGH key boundaries in which random range queries are made\n");
    fprintf(stderr, "  --experiments N     run N experiments (default:%d)\n", n_experiments);
    fprintf(stderr, "  --srandom N         srandom(N)\n");
50
    fprintf(stderr, "  --recover           run recovery\n");
51
    fprintf(stderr, "  --verbose           print verbose information\n");
52 53 54 55 56 57 58 59 60 61 62 63 64 65
    return 1;
}

DB_ENV *env;
DB *db;
DB_TXN *tid=0;

#define STRINGIFY2(s) #s
#define STRINGIFY(s) STRINGIFY2(s)
const char *dbdir = "./bench."  STRINGIFY(DIRSUF); /* DIRSUF is passed in as a -D argument to the compiler. */
int env_open_flags_yesx = DB_CREATE|DB_PRIVATE|DB_INIT_MPOOL|DB_INIT_TXN|DB_INIT_LOG|DB_INIT_LOCK;
int env_open_flags_nox = DB_CREATE|DB_PRIVATE|DB_INIT_MPOOL;
char *dbfilename = "bench.db";

66 67 68 69 70 71
static double gettime (void) {
    struct timeval tv;
    int r = gettimeofday(&tv, 0);
    assert(r==0);
    return tv.tv_sec + 1e-6*tv.tv_usec;
}
72

73
static void parse_args (int argc, char *const argv[]) {
74
    pname=argv[0];
75
    argc--; argv++;
76
    int specified_run_mode=0;
77
    while (argc>0) {
78 79 80
        if (strcmp(*argv,"--verbose")==0) {
            verbose++;
        } else if (strcmp(*argv,"--verify-lwc")==0) {
81 82
	    if (specified_run_mode && run_mode!=RUN_VERIFY) { two_modes: fprintf(stderr, "You specified two run modes\n"); exit(1); }
	    run_mode = RUN_VERIFY;
83 84 85
	} else if (strcmp(*argv, "--flatten")==0)  {
	    if (specified_run_mode && run_mode!=RUN_FLATTEN) goto two_modes;
	    run_mode = RUN_FLATTEN;
86 87 88 89 90 91 92
	} else if (strcmp(*argv, "--lwc")==0)  {
	    if (specified_run_mode && run_mode!=RUN_LWC) goto two_modes;
	    run_mode = RUN_LWC;
	} else if (strcmp(*argv, "--hwc")==0)  {
	    if (specified_run_mode && run_mode!=RUN_VERIFY) goto two_modes;
	    run_mode = RUN_HWC;
	} else if (strcmp(*argv, "--prelock")==0) prelock=1;
93
#ifdef TOKUDB
94
        else if (strcmp(*argv, "--prelockflag")==0)      { prelockflag=1; lock_flag = DB_PRELOCKED; }
Yoni Fogel's avatar
Yoni Fogel committed
95
        else if (strcmp(*argv, "--prelockwriteflag")==0) { prelockflag=1; lock_flag = DB_PRELOCKED_WRITE; }
96
#endif
97
	else if (strcmp(*argv, "--nox")==0)              { do_txns=0; }
98 99
	else if (strcmp(*argv, "--count")==0)            {
	    char *end;
100
            argc--; argv++; 
101 102
	    errno=0; limitcount=strtol(*argv, &end, 10); assert(errno==0);
	    printf("Limiting count to %ld\n", limitcount);
103 104
        } else if (strcmp(*argv, "--cachesize")==0 && argc>0) {
            char *end;
105
            argc--; argv++; 
106
            cachesize=(u_int32_t)strtol(*argv, &end, 10);
107
	} else if (strcmp(*argv, "--env") == 0) {
108
            argc--; argv++;
109 110
	    if (argc==0) exit(print_usage(pname));
	    dbdir = *argv;
111 112 113 114
	} else if (strcmp(*argv, "--log_dir") == 0) {
            argc--; argv++;
	    if (argc==0) exit(print_usage(pname));
	    log_dir = *argv;
115 116
        } else if (strcmp(*argv, "--mysql") == 0) {
            do_mysql = 1;
117 118
        } else if (strcmp(*argv, "--verbose") == 0) {
            verbose = 1;
119 120 121 122 123 124 125 126 127
        } else if (strcmp(*argv, "--range") == 0 && argc > 2) {
            run_mode = RUN_RANGE;
            argc--; argv++;
            start_range = strtoll(*argv, NULL, 10);
            argc--; argv++;
            end_range = strtoll(*argv, NULL, 10);
        } else if (strcmp(*argv, "--experiments") == 0 && argc > 1) {
            argc--; argv++;
            n_experiments = strtol(*argv, NULL, 10);
128 129 130
        } else if (strcmp(*argv, "--srandom") == 0 && argc > 1) {
	    argc--; argv++;
            srandom(atoi(*argv));
131 132 133
        } else if (strcmp(*argv, "--recover") == 0) {
            env_open_flags_yesx |= DB_RECOVER;
            env_open_flags_nox |= DB_RECOVER;
134
	} else {
135
            exit(print_usage(pname));
136
	}
137
	argc--; argv++;
138
    }
139 140 141 142 143 144
    //Prelocking is meaningless without transactions
    if (do_txns==0) {
        prelockflag=0;
        lock_flag=0;
        prelock=0;
    }
145 146 147
}


148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
static inline uint64_t mysql_get_bigint(unsigned char *d) {
    uint64_t r = 0;
    memcpy(&r, d, sizeof r);
    return r;
}

static int mysql_key_compare(DB *mydb __attribute__((unused)),
                               const DBT *adbt, const DBT *bdbt) {
    unsigned char *adata = adbt->data;
    unsigned char *bdata = bdbt->data;
    uint64_t a, b;
    assert(adbt->size == 9 && bdbt->size == 9);
    assert(adata[0] == 0 && bdata[0] == 0);
    a = mysql_get_bigint(adata+1);
    b = mysql_get_bigint(bdata+1);
    if (a < b) return -1;
    if (a > b) return +1;
    return 0;
}

168
static void scanscan_setup (void) {
169
    int r;
170
    r = db_env_create(&env, 0);                                                           assert(r==0);
171 172 173 174 175
#ifdef TOKUDB
    if (do_mysql) {
        r = env->set_default_bt_compare(env, mysql_key_compare); assert(r == 0);
    }
#endif
176
    r = env->set_cachesize(env, 0, cachesize, 1);                                         assert(r==0);
177 178 179
    if (log_dir) {
        r = env->set_lg_dir(env, log_dir);                                                assert(r==0);
    }
180
    double tstart = gettime();
181
    r = env->open(env, dbdir, do_txns? env_open_flags_yesx : env_open_flags_nox, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);   assert(r==0);
182 183 184
    double tend = gettime();
    if (verbose)
        printf("env open %f seconds\n", tend-tstart);
185
    r = db_create(&db, env, 0);                                                           assert(r==0);
186
#ifndef TOKUDB
187 188 189
    if (do_mysql) {
        r = db->set_bt_compare(db, mysql_key_compare); assert(r == 0);
    }
190
#endif
191 192 193
    if (do_txns) {
	r = env->txn_begin(env, 0, &tid, 0);                                              assert(r==0);
    }
194
    r = db->open(db, tid, dbfilename, NULL, DB_BTREE, 0, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);                           assert(r==0);
195
#ifdef TOKUDB
196
    if (prelock) {
197 198
	r = db->pre_acquire_table_lock(db,
				      tid);
199 200
	assert(r==0);
    }
201
#endif
202 203
}

204
static void scanscan_shutdown (void) {
205
    int r;
206
    r = db->close(db, 0);                                       assert(r==0);
207 208 209
    if (do_txns) {
	r = tid->commit(tid, 0);                                    assert(r==0);
    }
210
    r = env->close(env, 0);                                     assert(r==0);
211
    env = NULL;
212 213

#if 0 && defined TOKUDB
214
    {
Yoni Fogel's avatar
Yoni Fogel committed
215 216 217 218 219
	extern int toku_os_get_max_rss(int64_t*);
        int64_t mrss;
        int r = toku_os_get_max_rss(&mrss);
        assert(r==0);
	printf("maxrss=%.2fMB\n", mrss/256.0);
220
    }
221
#endif
222 223
}

224 225 226 227 228 229 230 231

static void print_engine_status(void) {
#if defined TOKUDB
    if (verbose) {
      int buffsize = 1024 * 32;
      char buff[buffsize];
      env->get_engine_status_text(env, buff, buffsize);
      printf("Engine status:\n");
232
      printf("%s", buff);
233 234 235 236 237
    }
#endif
}


238
static void scanscan_hwc (void) {
239 240
    int r;
    int counter=0;
241
    for (counter=0; counter<n_experiments; counter++) {
242 243 244
	long long totalbytes=0;
	int rowcounter=0;
	double prevtime = gettime();
245
	DBT k,v;
246 247
	DBC *dbc;
	r = db->cursor(db, tid, &dbc, 0);                           assert(r==0);
248 249
	memset(&k, 0, sizeof(k));
	memset(&v, 0, sizeof(v));
Yoni Fogel's avatar
Yoni Fogel committed
250 251 252 253 254
        u_int32_t c_get_flags = DB_NEXT;
        if (prelockflag && (counter || prelock)) {
            c_get_flags |= lock_flag;
        }
	while (0 == (r = dbc->c_get(dbc, &k, &v, c_get_flags))) {
255 256
	    totalbytes += k.size + v.size;
	    rowcounter++;
257
	    if (limitcount>0 && rowcounter>=limitcount) break;
258
	}
Bradley C. Kuszmaul's avatar
Bradley C. Kuszmaul committed
259
	assert(r==DB_NOTFOUND); // complain about things like lock-not-found
260
	r = dbc->c_close(dbc);                                      assert(r==0);
261 262
	double thistime = gettime();
	double tdiff = thistime-prevtime;
263
	printf("Scan    %lld bytes (%d rows) in %9.6fs at %9fMB/s\n", totalbytes, rowcounter, tdiff, 1e-6*totalbytes/tdiff);
264
	print_engine_status();
265 266 267
    }
}

268 269
#ifdef TOKUDB

270 271 272 273
struct extra_count {
    long long totalbytes;
    int rowcounter;
};
274

275
static int counttotalbytes (DBT const *key, DBT const *data, void *extrav) {
276 277 278
    struct extra_count *e=extrav;
    e->totalbytes += key->size + data->size;
    e->rowcounter++;
279 280
    if (do_mysql && 0) {
        static uint64_t expect_key = 0;
281
        uint64_t k = mysql_get_bigint((unsigned char*)key->data+1);
282
        if (k != expect_key)
283
            printf("%s:%d %"PRIu64" %"PRIu64"\n", __FUNCTION__, __LINE__, k, expect_key);
284 285
        expect_key = k + 1;
    }
286
    return 0;
287 288
}

289
static void scanscan_lwc (void) {
290 291
    int r;
    int counter=0;
292
    for (counter=0; counter<n_experiments; counter++) {
293 294 295 296
	struct extra_count e = {0,0};
	double prevtime = gettime();
	DBC *dbc;
	r = db->cursor(db, tid, &dbc, 0);                           assert(r==0);
Yoni Fogel's avatar
Yoni Fogel committed
297 298 299 300
        u_int32_t f_flags = 0;
        if (prelockflag && (counter || prelock)) {
            f_flags |= lock_flag;
        }
301 302 303 304 305
	long rowcounter=0;
	while (0 == (r = dbc->c_getf_next(dbc, f_flags, counttotalbytes, &e))) {
	    rowcounter++;
	    if (limitcount>0 && rowcounter>=limitcount) break;
	}
Bradley C. Kuszmaul's avatar
Bradley C. Kuszmaul committed
306
	assert(r==DB_NOTFOUND);
307 308 309 310
	r = dbc->c_close(dbc);                                      assert(r==0);
	double thistime = gettime();
	double tdiff = thistime-prevtime;
	printf("LWC Scan %lld bytes (%d rows) in %9.6fs at %9fMB/s\n", e.totalbytes, e.rowcounter, tdiff, 1e-6*e.totalbytes/tdiff);
311
	print_engine_status();
312 313
    }
}
314 315 316

static void scanscan_flatten (void) {
    int r;
317
    for (int counter=0; counter<n_experiments; counter++) {
318 319 320 321 322 323 324 325
	double prevtime = gettime();
        r = db->flatten(db, tid);
	assert(r==0);
	double thistime = gettime();
	double tdiff = thistime-prevtime;
	printf("Flatten Scan in %9.6fs\n", tdiff);
    }
}
326
#endif
327

328 329
static void scanscan_range (void) {
    int r;
330 331

    double texperiments[n_experiments];
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
    u_int64_t k = 0;
    char kv[8];
    DBT key, val;
  
    for (int counter = 0; counter < n_experiments; counter++) {

        if (1) { //if ((counter&1) == 0) {   
   	makekey:
	    // generate a random key in the key range
	    k = (start_range + (random() % (end_range - start_range))) * (1<<6);
	    for (int i = 0; i < 8; i++)
                kv[i] = k >> (56-8*i);
	}
	memset(&key, 0, sizeof key); key.data = &kv, key.size = sizeof kv;
	memset(&val, 0, sizeof val);
347

348 349 350 351 352
        double tstart = gettime();

        DBC *dbc;
        r = db->cursor(db, tid, &dbc, 0); assert(r==0);

353
        // set the cursor to the random key
354 355 356
        r = dbc->c_get(dbc, &key, &val, DB_SET_RANGE+lock_flag);
        if (r != 0) {
            assert(r == DB_NOTFOUND);
357
            printf("%s:%d %"PRIu64"\n", __FUNCTION__, __LINE__, k);
358 359
            goto makekey;
        }
360

361
#ifdef TOKUDB
362
        // do the range scan
363
	long rowcounter = 0;
364
	struct extra_count e = {0,0};
365 366 367
        while (limitcount > 0 && rowcounter < limitcount) {
            r = dbc->c_getf_next(dbc, prelockflag ? lock_flag : 0, counttotalbytes, &e);
            if (r != 0)
368
                break;
369
	    rowcounter++;
370
	}
371
#endif
372

373 374
        r = dbc->c_close(dbc);                                      
        assert(r==0);
375

376
        texperiments[counter] = gettime() - tstart;
377
        printf("%"PRIu64" %f\n", k, texperiments[counter]); fflush(stdout);
378
    }
379

380 381
    // print the times
    double tsum = 0.0, tmin = 0.0, tmax = 0.0;
382
    for (int counter = 0; counter < n_experiments; counter++) {
383
        if (counter==0 || texperiments[counter] < tmin)
384
            tmin = texperiments[counter];
385
        if (counter==0 || texperiments[counter] > tmax)
386 387 388 389
            tmax = texperiments[counter];
        tsum += texperiments[counter];
    }
    printf("%f %f %f/%d = %f\n", tmin, tmax, tsum, n_experiments, tsum / n_experiments);
390 391
}

392
#ifdef TOKUDB
393

394 395 396 397 398
struct extra_verify {
    long long totalbytes;
    int rowcounter;
    DBT k,v; // the k and v are gotten using the old cursor
};
399

400 401
static int
checkbytes (DBT const *key, DBT const *data, void *extrav) {
402 403 404 405 406 407 408 409 410
    struct extra_verify *e=extrav;
    e->totalbytes += key->size + data->size;
    e->rowcounter++;
    assert(e->k.size == key->size);
    assert(e->v.size == data->size);
    assert(memcmp(e->k.data, key->data,  key->size)==0);
    assert(memcmp(e->v.data, data->data, data->size)==0);
    assert(e->k.data != key->data);
    assert(e->v.data != data->data);
411
    return 0;
412 413 414
}
    

415
static void scanscan_verify (void) {
416 417
    int r;
    int counter=0;
418
    for (counter=0; counter<n_experiments; counter++) {
419 420 421 422 423 424 425 426 427
	struct extra_verify v;
	v.totalbytes=0;
	v.rowcounter=0;
	double prevtime = gettime();
	DBC *dbc1, *dbc2;
	r = db->cursor(db, tid, &dbc1, 0);                           assert(r==0);
	r = db->cursor(db, tid, &dbc2, 0);                           assert(r==0);
	memset(&v.k, 0, sizeof(v.k));
	memset(&v.v, 0, sizeof(v.v));
Yoni Fogel's avatar
Yoni Fogel committed
428 429 430 431 432 433
        u_int32_t f_flags = 0;
        u_int32_t c_get_flags = DB_NEXT;
        if (prelockflag && (counter || prelock)) {
            f_flags     |= lock_flag;
            c_get_flags |= lock_flag;
        }
434
	while (1) {
435 436 437
	    int r1,r2;
	    r2 = dbc1->c_get(dbc1, &v.k, &v.v, c_get_flags);
	    r1 = dbc2->c_getf_next(dbc2, f_flags, checkbytes, &v);
438 439 440 441 442 443 444 445
	    assert(r1==r2);
	    if (r1) break;
	}
	r = dbc1->c_close(dbc1);                                      assert(r==0);
	r = dbc2->c_close(dbc2);                                      assert(r==0);
	double thistime = gettime();
	double tdiff = thistime-prevtime;
	printf("verify   %lld bytes (%d rows) in %9.6fs at %9fMB/s\n", v.totalbytes, v.rowcounter, tdiff, 1e-6*v.totalbytes/tdiff);
446
	print_engine_status();
447 448 449
    }
}

450
#endif
451

452
static int test_main (int argc, char *const argv[]) {
453 454 455

    parse_args(argc,argv);

456
    scanscan_setup();
457
    switch (run_mode) {
458 459 460
    case RUN_HWC:    scanscan_hwc();    break;
#ifdef TOKUDB
    case RUN_LWC:    scanscan_lwc();    break;
461
    case RUN_FLATTEN:    scanscan_flatten();    break;
462
    case RUN_VERIFY: scanscan_verify(); break;
Yoni Fogel's avatar
Yoni Fogel committed
463
#endif
464
    case RUN_RANGE:  scanscan_range();  break;
465
    default:         assert(0);         break;
466
    }
467
    scanscan_shutdown();
468

469
#if defined(TOKUDB)
470 471
    // if tokudb has tracing enabled (see trace_mem.h) then this will dump the trace data
    if (1) toku_print_trace_mem(stderr);
472
#endif
473
#if defined(__linux__) && __linux__
474 475 476 477 478 479 480 481 482 483 484 485
    if (verbose) {
        char fname[256];
        sprintf(fname, "/proc/%d/status", toku_os_getpid());
        FILE *f = fopen(fname, "r");
        if (f) {
            char line[256];
            while (fgets(line, sizeof line, f)) {
                int n;
                if (sscanf(line, "VmPeak: %d", &n) || sscanf(line, "VmHWM: %d", &n) || sscanf(line, "VmRSS: %d", &n))
                    fputs(line, stdout);
            }
            fclose(f);
486
        }
487
    }
488
#endif
489 490
    return 0;
}