Commit 701339ea authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul

Use prime numbers for the hash table sizes

git-svn-id: file:///svn/tokudb@126 c7de825b-a66e-492c-adef-691d508d4ae1
parent e9f6f948
...@@ -39,26 +39,27 @@ pma.o: pma.h yerror.h pma-internal.h memory.h key.h ybt.h brttypes.h ../include ...@@ -39,26 +39,27 @@ pma.o: pma.h yerror.h pma-internal.h memory.h key.h ybt.h brttypes.h ../include
ybt.o: ybt.h brttypes.h ybt.o: ybt.h brttypes.h
ybt-test: ybt-test.o ybt.o memory.o ybt-test: ybt-test.o ybt.o memory.o
cachetable.o: cachetable.h hashfun.h cachetable.o: cachetable.h hashfun.h
brt-test: ybt.o brt.o hashtable.o pma.o memory.o brt-serialize.o cachetable.o header-io.o ybt.o key.o brt-test: ybt.o brt.o hashtable.o pma.o memory.o brt-serialize.o cachetable.o header-io.o ybt.o key.o primes.o
brt-test.o brt.o: brt.h hashtable.h pma.h brttypes.h cachetable.h brt-test.o brt.o: brt.h hashtable.h pma.h brttypes.h cachetable.h
brt-serialize-test.o: pma.h yerror.h brt.h memory.h hashtable.h brttypes.h brt-internal.h brt-serialize-test.o: pma.h yerror.h brt.h memory.h hashtable.h brttypes.h brt-internal.h
brt.o: brt.h mdict.h pma.h brttypes.h memory.h brt-internal.h cachetable.h hashtable.h brt.o: brt.h mdict.h pma.h brttypes.h memory.h brt-internal.h cachetable.h hashtable.h
mdict.o: pma.h mdict.o: pma.h
hashtable.o: hashtable.h brttypes.h memory.h key.h yerror.h ../include/ydb-constants.h hashfun.h hashtable.o: hashtable.h brttypes.h memory.h key.h yerror.h ../include/ydb-constants.h hashfun.h
memory.o: memory.h memory.o: memory.h
hashtest: hashtable.o memory.o primes.o: primes.h
hashtest: hashtable.o memory.o primes.o
brt-serialize.o: brt.h cachetable.h memory.h mdict.h pma.h brttypes.h brt-internal.h hashtable.h brt-serialize.o: brt.h cachetable.h memory.h mdict.h pma.h brttypes.h brt-internal.h hashtable.h
header-io.o: brttypes.h brt-internal.h memory.h header-io.o: brttypes.h brt-internal.h memory.h
mdict-test: hashtable.o pma.o memory.o mdict-test: hashtable.o pma.o memory.o
brt-bigtest: memory.o ybt.o brt.o pma.o cachetable.o key.o hashtable.o brt-serialize.o brt-bigtest: memory.o ybt.o brt.o pma.o cachetable.o key.o hashtable.o brt-serialize.o
log-test: log.o memory.o log-test: log.o memory.o
brt-serialize-test: brt-serialize-test.o brt-serialize.o memory.o hashtable.o pma.o key.o ybt.o brt.o cachetable.o brt-serialize-test: brt-serialize-test.o brt-serialize.o memory.o hashtable.o pma.o key.o ybt.o brt.o cachetable.o primes.o
cachetable-test.o: cachetable.h memory.h cachetable-test.o: cachetable.h memory.h
cachetable-test: cachetable.o memory.o cachetable-test.o cachetable-test: cachetable.o memory.o cachetable-test.o
benchmark-test: benchmark-test.o ybt.o memory.o brt.o pma.o cachetable.o key.o hashtable.o brt-serialize.o benchmark-test: benchmark-test.o ybt.o memory.o brt.o pma.o cachetable.o key.o hashtable.o brt-serialize.o primes.o
clean: clean:
rm -rf *.o hashtest brt-test cachetable-test randbrt randdb4 benchmark-test *.bb *.bbg *.da rm -rf *.o hashtest brt-test cachetable-test randbrt randdb4 benchmark-test *.bb *.bbg *.da
......
/* Hash table with chaining. */ /* Hash table with chaining. */
#include "hashtable.h" #include "hashtable.h"
#include "memory.h" #include "memory.h"
#include "primes.h"
#include "../include/ydb-constants.h" #include "../include/ydb-constants.h"
#include <assert.h> #include <assert.h>
#include <string.h> #include <string.h>
...@@ -13,10 +14,11 @@ ...@@ -13,10 +14,11 @@
int toku_hashtable_create (HASHTABLE *h) { int toku_hashtable_create (HASHTABLE *h) {
HASHTABLE MALLOC(tab); HASHTABLE MALLOC(tab);
int i; unsigned int i;
if (tab==0) return -1; if (tab==0) return -1;
tab->n_keys=0; tab->n_keys=0;
tab->arraysize=8; tab->primeidx=0;
tab->arraysize=get_prime(tab->primeidx);
assert(sizeof(*tab->array)==sizeof(void*)); assert(sizeof(*tab->array)==sizeof(void*));
tab->array = toku_calloc(tab->arraysize, sizeof(*tab->array)); tab->array = toku_calloc(tab->arraysize, sizeof(*tab->array));
for (i=0; i<tab->arraysize; i++) tab->array[i]=0; for (i=0; i<tab->arraysize; i++) tab->array[i]=0;
...@@ -51,9 +53,9 @@ int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec *data, I ...@@ -51,9 +53,9 @@ int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec *data, I
} }
} }
int toku_hash_rehash_everything (HASHTABLE tab, int newarraysize) { int toku_hash_rehash_everything (HASHTABLE tab, unsigned int newarraysize) {
HASHELT *newarray = toku_calloc(newarraysize, sizeof(*tab->array)); HASHELT *newarray = toku_calloc(newarraysize, sizeof(*tab->array));
int i; unsigned int i;
assert(newarray!=0); assert(newarray!=0);
for (i=0; i<newarraysize; i++) newarray[i]=0; for (i=0; i<newarraysize; i++) newarray[i]=0;
for (i=0; i<tab->arraysize; i++) { for (i=0; i<tab->arraysize; i++) {
...@@ -128,8 +130,8 @@ int toku_hash_delete (HASHTABLE tab, const void *key, ITEMLEN keylen) { ...@@ -128,8 +130,8 @@ int toku_hash_delete (HASHTABLE tab, const void *key, ITEMLEN keylen) {
int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen) { int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen) {
int i; unsigned int i;
int usei = random()%h->arraysize; unsigned int usei = random()%h->arraysize;
for (i=0; i<h->arraysize; i++, usei++) { for (i=0; i<h->arraysize; i++, usei++) {
if (usei>=h->arraysize) usei=0; if (usei>=h->arraysize) usei=0;
HASHELT he=h->array[usei]; HASHELT he=h->array[usei];
...@@ -209,7 +211,7 @@ void toku_hashtable_free(HASHTABLE *tab) { ...@@ -209,7 +211,7 @@ void toku_hashtable_free(HASHTABLE *tab) {
void toku_hashtable_clear(HASHTABLE tab) { void toku_hashtable_clear(HASHTABLE tab) {
int i; unsigned int i;
for (i=0; i<tab->arraysize; i++) { for (i=0; i<tab->arraysize; i++) {
hasheltlist_free(tab->array[i]); hasheltlist_free(tab->array[i]);
tab->array[i]=0; tab->array[i]=0;
......
...@@ -36,8 +36,9 @@ struct hashelt { ...@@ -36,8 +36,9 @@ struct hashelt {
}; };
struct hashtable { struct hashtable {
int n_keys; unsigned int n_keys;
int arraysize; unsigned int arraysize;
unsigned int primeidx;
HASHELT *array; HASHELT *array;
}; };
...@@ -45,7 +46,7 @@ struct hashtable { ...@@ -45,7 +46,7 @@ struct hashtable {
void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen,void*), void*); void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen,void*), void*);
// If you don't want to use something, do something like use "key __attribute__((__unused__))" for keyvar. // If you don't want to use something, do something like use "key __attribute__((__unused__))" for keyvar.
#define HASHTABLE_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,body) ({ \ #define HASHTABLE_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,body) ({ \
int hi_counter; \ unsigned int hi_counter; \
for (hi_counter=0; hi_counter<table->arraysize; hi_counter++) { \ for (hi_counter=0; hi_counter<table->arraysize; hi_counter++) { \
HASHELT hi_he; \ HASHELT hi_he; \
for (hi_he=table->array[hi_counter]; hi_he; hi_he=hi_he->next) { \ for (hi_he=table->array[hi_counter]; hi_he; hi_he=hi_he->next) { \
......
#include "key.h" #include "key.h"
#include "hashtable.h" #include "hashtable.h"
#include "memory.h" #include "memory.h"
#include "primes.h"
#include <stdlib.h> #include <stdlib.h>
#include <assert.h> #include <assert.h>
#include <stdio.h> #include <stdio.h>
...@@ -136,6 +137,7 @@ void test1(void) { ...@@ -136,6 +137,7 @@ void test1(void) {
} }
int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) { int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) {
test_primes();
test0(); test0();
test1(); test1();
malloc_cleanup(); malloc_cleanup();
......
#include <assert.h>
int is_prime (int n) {
int i;
if (n==2) return 1;
if (n%2==0) return 0;
for (i=3; i*i<=n; i+=2) {
if (n%i==0) return 0;
}
return 1;
}
#define N_PRIMES 30
static unsigned int primes[N_PRIMES]={0};
int get_prime (unsigned int idx) {
if (primes[0]==0) {
int i;
for (i=0; i<N_PRIMES; i++) {
int j;
for (j=2<<i; !is_prime(j); j++) {
}
primes[i]=j;
}
}
assert(idx<N_PRIMES);
return primes[idx];
}
void test_primes (void) {
assert(get_prime(0)==2);
assert(get_prime(1)==5);
assert(get_prime(2)==11);
assert(get_prime(3)==17);
assert(get_prime(4)==37);
assert(get_prime(5)==67);
assert(get_prime(6)==131);
}
/* Return the smallest prime >= 2^(idx+1)
* Only works for idx<30 */
int get_prime (unsigned int idx);
void test_primes(void);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment