From e5497364c19a5fc4621ec0b6e7b6b395f4ca8a2d Mon Sep 17 00:00:00 2001 From: Yoni Fogel <yoni@tokutek.com> Date: Sat, 15 Sep 2007 00:24:48 +0000 Subject: [PATCH] Random data generator. make test will run a test on it. The test works currently but is very ugly. git-svn-id: file:///svn/tokudb@313 c7de825b-a66e-492c-adef-691d508d4ae1 --- utils/Makefile | 45 ++++ utils/ydb_gen.c | 608 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 653 insertions(+) create mode 100755 utils/Makefile create mode 100755 utils/ydb_gen.c diff --git a/utils/Makefile b/utils/Makefile new file mode 100755 index 00000000000..ca8ffe2f756 --- /dev/null +++ b/utils/Makefile @@ -0,0 +1,45 @@ +CFLAGS = --pedantic -std=c99 -W -Wall -Werror -Wno-unused -g -fPIC -O +LFLAGS = -l CPPFLAGS = -I../include -I../newbrt + +#cc $(CPPFLAGS) $(DBBINS) -shared -o libdb.so $(CFLAGS) + +BDB_DUMP=/usr/local/BerkeleyDB.4.1/bin/db_dump +BDB_LOAD=/usr/local/BerkeleyDB.4.1/bin/db_load + +UTILS= \ + ydb_gen \ +# ydb_dump \ +# ydb_load \ +#End + +.PHONY: all clean test test_gen test_gen_hex + +all: $(UTILS) + +test: test_gen + +test_gen: test_gen_hex + +SHELL=/bin/bash + +BDB_LOAD=/usr/local/BerkeleyDB.4.1/bin/db_load +BDB_DUMP=/usr/local/BerkeleyDB.4.1/bin/db_dump + +TEST_GEN_HEX_FLAGS=-n 1000 -m 0 -M 1024 -r 5 + +test_gen_hex: + #Generating 10,000 keys. 0 to 1024 bytes (not including identifier overhead) + echo "Generating text input > db > text" + rm -f test_gen_1 + ./ydb_gen $(TEST_GEN_HEX_FLAGS) -o >($(BDB_LOAD) test_gen_1) + $(BDB_DUMP) test_gen_1 > 1 + ./ydb_gen -Hf > 2 + ./ydb_gen $(TEST_GEN_HEX_FLAGS) -d g -s h | tr "h" "\n" | sort -t g -k 1,1 | tr -d "\n" | tr "g" "\n" >> 2 + ./ydb_gen -Fh >> 2 + diff -q 1 2 + if ! diff -q 1 2; then echo Files different!; exit 1; fi + +#if diff -q <(echo "foo") <(echo "foo") > /dev/null; then echo yes; else echo no; fi +clean: + rm -rf *.so *.o $(UTILS) + diff --git a/utils/ydb_gen.c b/utils/ydb_gen.c new file mode 100755 index 00000000000..1c783b1bdfc --- /dev/null +++ b/utils/ydb_gen.c @@ -0,0 +1,608 @@ +#include <assert.h> +#include <stdio.h> +#include <sys/types.h> +#include <stdlib.h> +#include <limits.h> +#include <unistd.h> +#include <string.h> +#include <ctype.h> + +extern char* optarg; +extern int optind; +extern int optopt; +extern int opterr; +extern int optreset; + +#if !defined(bool) +typedef unsigned char bool; +#endif + +#if !defined(true) +#define true ((bool)1) +#endif + +#if !defined(false) +#define false ((bool)0) +#endif +#define args(arguments) arguments + +int usage args( (const char* progname) ); +void generate_keys args( ( + char dbt_delimiter, + char* sort_delimiter, + const char* progname, + bool plaintext, + long minsize, + long maxsize, + long long maxnumkeys, + long maxkibibytes, + unsigned long seed, + bool printableonly +) ); + +int main (int argc, char *argv[]) { + char ch; + char dbt_delimiter = '\n'; + char* sort_delimiter = ""; + const char* progname = argv[0]; + bool plaintext = false; + long minsize = -1; + long maxsize = -1; + long long maxnumkeys = -1; + long maxkibibytes = -1; + bool header = true; + bool footer = true; + bool justheader = false; + bool justfooter = false; + bool outputkeys = true; + unsigned long seed = 1; + bool printableonly = false; + + while ((ch = getopt(argc, argv, "PfFhHTr:s:d:p:m:M:n:N:?o:")) != EOF) { + switch (ch) { + case ('P'): { + printableonly = true; + break; + } + case ('h'): { + header = false; + break; + } + case ('H'): { + justheader = true; + break; + } + case ('f'): { + footer = false; + break; + } + case ('F'): { + justfooter = true; + break; + } + case ('T'): { + plaintext = true; + break; + } + case ('o'): { + extern int errno; + if (freopen(optarg, "w", stdout) == NULL) + { + fprintf( + stderr, + "%s: %s: reopen: %s\n", + progname, + optarg, + strerror(errno) + ); + return (EXIT_FAILURE); + } + break; + } + case ('d'): { + if (strlen(optarg) != 1) { + fprintf( + stderr, + "%s: %s: (-n) Key (or value) delimiter must be one character.", + progname, + optarg + ); + return (EXIT_FAILURE); + } + dbt_delimiter = optarg[0]; + if (isxdigit(dbt_delimiter)) { + fprintf( + stderr, + "%s: %c: (-n) Key (or value) delimiter cannot be a hex digit.", + progname, + dbt_delimiter + ); + return (EXIT_FAILURE); + } + break; + } + case ('s'): { + sort_delimiter = optarg; + if (strlen(sort_delimiter) != 1) { + fprintf( + stderr, + "%s: %s: (-s) Sorting (Between key/value pairs) delimiter must be one character.", + progname, + optarg + ); + return (EXIT_FAILURE); + } + if (isxdigit(sort_delimiter[0])) { + fprintf( + stderr, + "%s: %s: (-s) Sorting (Between key/value pairs) delimiter cannot be a hex digit.", + progname, + sort_delimiter + ); + return (EXIT_FAILURE); + } + break; + } + case ('r'): + { + char* test; + + seed = strtol(optarg, &test, 10); + if ( + optarg[0] == '\0' || + *test != '\0' + ) + { + fprintf( + stderr, + "%s: %s: (-r) Random seed invalid.", + progname, + optarg + ); + } + break; + } + case ('m'): + { + char* test; + + if ( + optarg[0] == '\0' || + (minsize = strtol(optarg, &test, 10)) < 0 || + *test != '\0' + ) + { + fprintf( + stderr, + "%s: %s: (-m) Min size of keys/values invalid.", + progname, + optarg + ); + } + break; + } + case ('M'): + { + char* test; + + if ( + optarg[0] == '\0' || + (maxsize = strtol(optarg, &test, 10)) < 0 || + *test != '\0' + ) + { + fprintf( + stderr, + "%s: %s: (-M) Max size of keys/values invalid.", + progname, + optarg + ); + } + break; + } + case ('n'): + { + char* test; + + if ( + optarg[0] == '\0' || + (maxnumkeys = strtoll(optarg, &test, 10)) <= 0 || + *test != '\0' + ) + { + fprintf( + stderr, + "%s: %s: (-n) Max number of keys to generate invalid.", + progname, + optarg + ); + } + break; + } + case ('N'): + { + char* test; + + if ( + optarg[0] == '\0' || + (maxkibibytes = strtol(optarg, &test, 10)) <= 0 || + *test != '\0' + ) + { + fprintf( + stderr, + "%s: %s: (-N) Max kibibytes to generate invalid.", + progname, + optarg + ); + } + break; + } + case ('?'): + default: { + return (usage(progname)); + } + } + } + argc -= optind; + argv += optind; + + if (justheader && !header) { + fprintf( + stderr, + "%s: The -h and -H options may not both be specified.\n", + progname + ); + usage(progname); + return (EXIT_FAILURE); + } + if (justfooter && !footer) { + fprintf( + stderr, + "%s: The -f and -F options may not both be specified.\n", + progname + ); + usage(progname); + return (EXIT_FAILURE); + } + if (justfooter && justheader) { + fprintf( + stderr, + "%s: The -H and -F options may not both be specified.\n", + progname + ); + usage(progname); + return (EXIT_FAILURE); + } + if (justfooter && header) { + fprintf( + stderr, + "%s: -F implies -h\n", + progname + ); + header = false; + } + if (justheader && footer) { + fprintf( + stderr, + "%s: -H implies -f\n", + progname + ); + footer = false; + } + if (plaintext) + { + if (footer) + { + fprintf( + stderr, + "%s: -T implies -f\n", + progname + ); + footer = false; + } + if (header) + { + fprintf( + stderr, + "%s: -T implies -h\n", + progname + ); + header = false; + } + } + if (justfooter || justheader) + { + outputkeys = false; + } + else if ( + (maxnumkeys > 0 && maxkibibytes > 0) || + (maxnumkeys <= 0 && maxkibibytes <= 0) + ) + { + fprintf( + stderr, + "%s: exactly one of the -n and -N options must be specified.\n", + progname + ); + usage(progname); + return (EXIT_FAILURE); + } + if (outputkeys && seed == 1) + { + fprintf( + stderr, + "%s: Using default seed. (-r 1).\n", + progname + ); + seed = 1; + } + if (outputkeys && minsize == -1) { + fprintf( + stderr, + "%s: Using default minsize. (-m 0).\n", + progname + ); + minsize = 0; + } + if (outputkeys && maxsize == -1) { + fprintf( + stderr, + "%s: Using default maxsize. (-M 1024).\n", + progname + ); + maxsize = 1024; + } + if (outputkeys && minsize > maxsize) { + fprintf( + stderr, + "%s: Max key size must be greater than min key size.\n", + progname + ); + usage(progname); + return (EXIT_FAILURE); + } + + if (argc != 0) { + return (usage(progname)); + } + if (header) + { + printf( + "VERSION=3\n" + "format=%s\n" + "type=btree\n" + "db_pagesize=4096\n" + "HEADER=END\n", + ( + plaintext ? + "print" : + "bytevalue" + ) + ); + } + if (justheader) + { + return 0; + } + if (outputkeys) + { + /* Generate Keys! */ + generate_keys( + dbt_delimiter, + sort_delimiter, + progname, + plaintext, + minsize, + maxsize, + maxnumkeys, + maxkibibytes, + seed, + printableonly + ); + } + if (footer) + { + printf("DATA=END\n"); + } + return 0; +} + +int usage(const char* progname) +{ + printf + ( + "usage: %s [-ThHfF] [-d delimiter] [-s delimiter]\n" + " -m minsize -M maxsize [-r random seed]\n" + " (-n maxnumkeys | -N maxkibibytes) [-o filename]\n", + progname + ); + return 1; +} + +unsigned char randbyte() +{ + static int numsavedbits = 0; + static unsigned long long savedbits = 0; + unsigned char retval; + + if (numsavedbits < 8) + { + savedbits |= ((unsigned long long)random()) << numsavedbits; + numsavedbits += 31; /* Random generates 31 random bits. */ + } + retval = savedbits & 0xff; + numsavedbits -= 8; + savedbits >>= 8; + return retval; +} + +/* Uniformly random int from [min,max] */ +int random_range(int min, int max) +{ + int power; + int number; + int choices; + + if (min == 0 && max == 0) { + return 0; + } + + choices = max - min + 1; + if (choices < 2) + { + return min; + } + + for (power = 2; power < choices; power <<= 1) + { + } + + do + { + number = random() & (power - 1); + } + while (number >= choices); + + return min + number; +} + +void outputbyte(unsigned char ch, bool plaintext) +{ + if (plaintext) { + if (ch != '\n' && isprint(ch)) { + switch (ch) { + case ('\\'): { + printf("\\\\"); + break; + } + default: + { + printf("%c", ch); + break; + } + } + } + else { + printf( + "\\%c%c", + "0123456789abcdef"[(ch & 0xf0) >> 4], + "0123456789abcdef"[ch & 0x0f] + ); + } + } + else { + printf( + "%c%c", + "0123456789abcdef"[(ch & 0xf0) >> 4], + "0123456789abcdef"[ch & 0x0f] + ); + } +} + +void outputstring(char* str, bool plaintext) +{ + char* p; + + for (p = str; *p != '\0'; p++) + { + outputbyte((unsigned char)*p, plaintext); + } +} + +void generate_keys( + char dbt_delimiter, + char* sort_delimiter, + const char* progname, + bool plaintext, + long minsize, + long maxsize, + long long maxnumkeys, + long maxkibibytes, + unsigned long seed, + bool printableonly +) +{ + bool usedemptykey = false; + long long numgenerated = 0; + long long totalsize = 0; + char identifier[24]; /* 8 bytes * 2 = 16; 16+1=17; 17+null terminator = 18. Extra padding. */ + int length; + int i; + + srandom(seed); + while ( + ( + maxnumkeys == -1 || + numgenerated < maxnumkeys + ) && + ( + maxkibibytes == -1 || + totalsize >> 10 < maxkibibytes + ) + ) + { + numgenerated++; + + /* Generate a key. */ + if (!plaintext) { + printf(" "); /* Each key is preceded by a space. */ + } + { + /* Pick a key length. */ + length = random_range(minsize, maxsize); + + /* Output 'length' random bytes. */ + for (i = 0; i < length; i++) + { + unsigned char ch; + + do { + ch = randbyte(); + } + while (printableonly && !isprint(ch)); + + outputbyte(ch, plaintext); + } + totalsize += length; + if (length == 0 && !usedemptykey) + { + usedemptykey = true; + } + else + { + /* Append identifier to ensure uniqueness. */ + sprintf(identifier, "x%llx", numgenerated); + outputstring(identifier, plaintext); + totalsize += strlen(identifier); + } + } + printf("%c", dbt_delimiter); + + /* Generate a value. */ + if (!plaintext) { + printf(" "); /* Each value is preceded by a space. */ + } + { + /* Pick a key length. */ + length = random_range(minsize, maxsize); + + /* Output 'length' random bytes. */ + for (i = 0; i < length; i++) + { + unsigned char ch; + + do { + ch = randbyte(); + } + while (printableonly && !isprint(ch)); + + outputbyte(ch, plaintext); + } + totalsize += length; + } + printf("%c", dbt_delimiter); + + printf("%s", sort_delimiter); + } +} -- 2.30.9