Commit eddee5ba authored by Herbert Xu's avatar Herbert Xu Committed by David S. Miller

rhashtable: Fix walker behaviour during rehash

Previously whenever the walker encountered a resize it simply
snaps back to the beginning and starts again.  However, this only
works if the rehash started and completed while the walker was
idle.

If the walker attempts to restart while the rehash is still ongoing,
we may miss objects that we shouldn't have.

This patch fixes this by making the walker walk the old table
followed by the new table just like all other readers.  If a
rehash is detected we will still signal our caller of the fact
so they can prepare for duplicates but we will simply continue
the walk onto the new table after the old one is finished either
by us or by the rehasher.
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 96026d05
...@@ -53,6 +53,7 @@ struct rhash_head { ...@@ -53,6 +53,7 @@ struct rhash_head {
* @shift: Current size (1 << shift) * @shift: Current size (1 << shift)
* @locks_mask: Mask to apply before accessing locks[] * @locks_mask: Mask to apply before accessing locks[]
* @locks: Array of spinlocks protecting individual buckets * @locks: Array of spinlocks protecting individual buckets
* @walkers: List of active walkers
* @buckets: size * hash buckets * @buckets: size * hash buckets
*/ */
struct bucket_table { struct bucket_table {
...@@ -61,6 +62,7 @@ struct bucket_table { ...@@ -61,6 +62,7 @@ struct bucket_table {
u32 shift; u32 shift;
unsigned int locks_mask; unsigned int locks_mask;
spinlock_t *locks; spinlock_t *locks;
struct list_head walkers;
struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
}; };
...@@ -104,7 +106,6 @@ struct rhashtable_params { ...@@ -104,7 +106,6 @@ struct rhashtable_params {
* @p: Configuration parameters * @p: Configuration parameters
* @run_work: Deferred worker to expand/shrink asynchronously * @run_work: Deferred worker to expand/shrink asynchronously
* @mutex: Mutex to protect current/future table swapping * @mutex: Mutex to protect current/future table swapping
* @walkers: List of active walkers
* @being_destroyed: True if table is set up for destruction * @being_destroyed: True if table is set up for destruction
*/ */
struct rhashtable { struct rhashtable {
...@@ -115,17 +116,16 @@ struct rhashtable { ...@@ -115,17 +116,16 @@ struct rhashtable {
struct rhashtable_params p; struct rhashtable_params p;
struct work_struct run_work; struct work_struct run_work;
struct mutex mutex; struct mutex mutex;
struct list_head walkers;
}; };
/** /**
* struct rhashtable_walker - Hash table walker * struct rhashtable_walker - Hash table walker
* @list: List entry on list of walkers * @list: List entry on list of walkers
* @resize: Resize event occured * @tbl: The table that we were walking over
*/ */
struct rhashtable_walker { struct rhashtable_walker {
struct list_head list; struct list_head list;
bool resize; struct bucket_table *tbl;
}; };
/** /**
......
...@@ -170,6 +170,8 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, ...@@ -170,6 +170,8 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
return NULL; return NULL;
} }
INIT_LIST_HEAD(&tbl->walkers);
for (i = 0; i < nbuckets; i++) for (i = 0; i < nbuckets; i++)
INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i); INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
...@@ -264,6 +266,7 @@ static void rhashtable_rehash(struct rhashtable *ht, ...@@ -264,6 +266,7 @@ static void rhashtable_rehash(struct rhashtable *ht,
struct bucket_table *new_tbl) struct bucket_table *new_tbl)
{ {
struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht);
struct rhashtable_walker *walker;
unsigned old_hash; unsigned old_hash;
get_random_bytes(&new_tbl->hash_rnd, sizeof(new_tbl->hash_rnd)); get_random_bytes(&new_tbl->hash_rnd, sizeof(new_tbl->hash_rnd));
...@@ -284,6 +287,9 @@ static void rhashtable_rehash(struct rhashtable *ht, ...@@ -284,6 +287,9 @@ static void rhashtable_rehash(struct rhashtable *ht,
/* Publish the new table pointer. */ /* Publish the new table pointer. */
rcu_assign_pointer(ht->tbl, new_tbl); rcu_assign_pointer(ht->tbl, new_tbl);
list_for_each_entry(walker, &old_tbl->walkers, list)
walker->tbl = NULL;
/* Wait for readers. All new readers will see the new /* Wait for readers. All new readers will see the new
* table, and thus no references to the old table will * table, and thus no references to the old table will
* remain. * remain.
...@@ -358,7 +364,6 @@ static void rht_deferred_worker(struct work_struct *work) ...@@ -358,7 +364,6 @@ static void rht_deferred_worker(struct work_struct *work)
{ {
struct rhashtable *ht; struct rhashtable *ht;
struct bucket_table *tbl; struct bucket_table *tbl;
struct rhashtable_walker *walker;
ht = container_of(work, struct rhashtable, run_work); ht = container_of(work, struct rhashtable, run_work);
mutex_lock(&ht->mutex); mutex_lock(&ht->mutex);
...@@ -367,9 +372,6 @@ static void rht_deferred_worker(struct work_struct *work) ...@@ -367,9 +372,6 @@ static void rht_deferred_worker(struct work_struct *work)
tbl = rht_dereference(ht->tbl, ht); tbl = rht_dereference(ht->tbl, ht);
list_for_each_entry(walker, &ht->walkers, list)
walker->resize = true;
if (rht_grow_above_75(ht, tbl)) if (rht_grow_above_75(ht, tbl))
rhashtable_expand(ht); rhashtable_expand(ht);
else if (rht_shrink_below_30(ht, tbl)) else if (rht_shrink_below_30(ht, tbl))
...@@ -725,11 +727,9 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter) ...@@ -725,11 +727,9 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
if (!iter->walker) if (!iter->walker)
return -ENOMEM; return -ENOMEM;
INIT_LIST_HEAD(&iter->walker->list);
iter->walker->resize = false;
mutex_lock(&ht->mutex); mutex_lock(&ht->mutex);
list_add(&iter->walker->list, &ht->walkers); iter->walker->tbl = rht_dereference(ht->tbl, ht);
list_add(&iter->walker->list, &iter->walker->tbl->walkers);
mutex_unlock(&ht->mutex); mutex_unlock(&ht->mutex);
return 0; return 0;
...@@ -745,7 +745,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init); ...@@ -745,7 +745,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init);
void rhashtable_walk_exit(struct rhashtable_iter *iter) void rhashtable_walk_exit(struct rhashtable_iter *iter)
{ {
mutex_lock(&iter->ht->mutex); mutex_lock(&iter->ht->mutex);
list_del(&iter->walker->list); if (iter->walker->tbl)
list_del(&iter->walker->list);
mutex_unlock(&iter->ht->mutex); mutex_unlock(&iter->ht->mutex);
kfree(iter->walker); kfree(iter->walker);
} }
...@@ -767,12 +768,19 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit); ...@@ -767,12 +768,19 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
*/ */
int rhashtable_walk_start(struct rhashtable_iter *iter) int rhashtable_walk_start(struct rhashtable_iter *iter)
{ {
struct rhashtable *ht = iter->ht;
mutex_lock(&ht->mutex);
if (iter->walker->tbl)
list_del(&iter->walker->list);
rcu_read_lock(); rcu_read_lock();
if (iter->walker->resize) { mutex_unlock(&ht->mutex);
iter->slot = 0;
iter->skip = 0; if (!iter->walker->tbl) {
iter->walker->resize = false; iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht);
return -EAGAIN; return -EAGAIN;
} }
...@@ -794,13 +802,11 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start); ...@@ -794,13 +802,11 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start);
*/ */
void *rhashtable_walk_next(struct rhashtable_iter *iter) void *rhashtable_walk_next(struct rhashtable_iter *iter)
{ {
const struct bucket_table *tbl; struct bucket_table *tbl = iter->walker->tbl;
struct rhashtable *ht = iter->ht; struct rhashtable *ht = iter->ht;
struct rhash_head *p = iter->p; struct rhash_head *p = iter->p;
void *obj = NULL; void *obj = NULL;
tbl = rht_dereference_rcu(ht->tbl, ht);
if (p) { if (p) {
p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
goto next; goto next;
...@@ -826,17 +832,18 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter) ...@@ -826,17 +832,18 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter)
iter->skip = 0; iter->skip = 0;
} }
iter->p = NULL; iter->walker->tbl = rht_dereference_rcu(ht->future_tbl, ht);
if (iter->walker->tbl != tbl) {
out:
if (iter->walker->resize) {
iter->p = NULL;
iter->slot = 0; iter->slot = 0;
iter->skip = 0; iter->skip = 0;
iter->walker->resize = false;
return ERR_PTR(-EAGAIN); return ERR_PTR(-EAGAIN);
} }
iter->walker->tbl = NULL;
iter->p = NULL;
out:
return obj; return obj;
} }
EXPORT_SYMBOL_GPL(rhashtable_walk_next); EXPORT_SYMBOL_GPL(rhashtable_walk_next);
...@@ -849,7 +856,24 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next); ...@@ -849,7 +856,24 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next);
*/ */
void rhashtable_walk_stop(struct rhashtable_iter *iter) void rhashtable_walk_stop(struct rhashtable_iter *iter)
{ {
struct rhashtable *ht;
struct bucket_table *tbl = iter->walker->tbl;
rcu_read_unlock(); rcu_read_unlock();
if (!tbl)
return;
ht = iter->ht;
mutex_lock(&ht->mutex);
if (rht_dereference(ht->tbl, ht) == tbl ||
rht_dereference(ht->future_tbl, ht) == tbl)
list_add(&iter->walker->list, &tbl->walkers);
else
iter->walker->tbl = NULL;
mutex_unlock(&ht->mutex);
iter->p = NULL; iter->p = NULL;
} }
EXPORT_SYMBOL_GPL(rhashtable_walk_stop); EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
...@@ -927,7 +951,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) ...@@ -927,7 +951,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
memset(ht, 0, sizeof(*ht)); memset(ht, 0, sizeof(*ht));
mutex_init(&ht->mutex); mutex_init(&ht->mutex);
memcpy(&ht->p, params, sizeof(*params)); memcpy(&ht->p, params, sizeof(*params));
INIT_LIST_HEAD(&ht->walkers);
if (params->locks_mul) if (params->locks_mul)
ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment