Commit 2f971a8f authored by Pablo Neira Ayuso's avatar Pablo Neira Ayuso

netfilter: nf_conncount: move all list iterations under spinlock

Two CPUs may race to remove a connection from the list, the existing
conn->dead will result in a use-after-free. Use the per-list spinlock to
protect list iterations.

As all accesses to the list now happen while holding the per-list lock,
we no longer need to delay free operations with rcu.

Joint work with Florian.

Fixes: 5c789e13 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
Reviewed-by: default avatarShawn Bohrer <sbohrer@cloudflare.com>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parent df4a9025
...@@ -43,8 +43,6 @@ struct nf_conncount_tuple { ...@@ -43,8 +43,6 @@ struct nf_conncount_tuple {
struct nf_conntrack_zone zone; struct nf_conntrack_zone zone;
int cpu; int cpu;
u32 jiffies32; u32 jiffies32;
bool dead;
struct rcu_head rcu_head;
}; };
struct nf_conncount_rb { struct nf_conncount_rb {
...@@ -83,36 +81,21 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen) ...@@ -83,36 +81,21 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
return memcmp(a, b, klen * sizeof(u32)); return memcmp(a, b, klen * sizeof(u32));
} }
static void __conn_free(struct rcu_head *h)
{
struct nf_conncount_tuple *conn;
conn = container_of(h, struct nf_conncount_tuple, rcu_head);
kmem_cache_free(conncount_conn_cachep, conn);
}
static bool conn_free(struct nf_conncount_list *list, static bool conn_free(struct nf_conncount_list *list,
struct nf_conncount_tuple *conn) struct nf_conncount_tuple *conn)
{ {
bool free_entry = false; bool free_entry = false;
spin_lock_bh(&list->list_lock); lockdep_assert_held(&list->list_lock);
if (conn->dead) {
spin_unlock_bh(&list->list_lock);
return free_entry;
}
list->count--; list->count--;
conn->dead = true; list_del(&conn->node);
list_del_rcu(&conn->node);
if (list->count == 0) { if (list->count == 0) {
list->dead = true; list->dead = true;
free_entry = true; free_entry = true;
} }
spin_unlock_bh(&list->list_lock); kmem_cache_free(conncount_conn_cachep, conn);
call_rcu(&conn->rcu_head, __conn_free);
return free_entry; return free_entry;
} }
...@@ -242,7 +225,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list) ...@@ -242,7 +225,7 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
} }
EXPORT_SYMBOL_GPL(nf_conncount_list_init); EXPORT_SYMBOL_GPL(nf_conncount_list_init);
/* Return true if the list is empty */ /* Return true if the list is empty. Must be called with BH disabled. */
bool nf_conncount_gc_list(struct net *net, bool nf_conncount_gc_list(struct net *net,
struct nf_conncount_list *list) struct nf_conncount_list *list)
{ {
...@@ -253,12 +236,18 @@ bool nf_conncount_gc_list(struct net *net, ...@@ -253,12 +236,18 @@ bool nf_conncount_gc_list(struct net *net,
bool free_entry = false; bool free_entry = false;
bool ret = false; bool ret = false;
/* don't bother if other cpu is already doing GC */
if (!spin_trylock(&list->list_lock))
return false;
list_for_each_entry_safe(conn, conn_n, &list->head, node) { list_for_each_entry_safe(conn, conn_n, &list->head, node) {
found = find_or_evict(net, list, conn, &free_entry); found = find_or_evict(net, list, conn, &free_entry);
if (IS_ERR(found)) { if (IS_ERR(found)) {
if (PTR_ERR(found) == -ENOENT) { if (PTR_ERR(found) == -ENOENT) {
if (free_entry) if (free_entry) {
spin_unlock(&list->list_lock);
return true; return true;
}
collected++; collected++;
} }
continue; continue;
...@@ -271,23 +260,24 @@ bool nf_conncount_gc_list(struct net *net, ...@@ -271,23 +260,24 @@ bool nf_conncount_gc_list(struct net *net,
* closed already -> ditch it * closed already -> ditch it
*/ */
nf_ct_put(found_ct); nf_ct_put(found_ct);
if (conn_free(list, conn)) if (conn_free(list, conn)) {
spin_unlock(&list->list_lock);
return true; return true;
}
collected++; collected++;
continue; continue;
} }
nf_ct_put(found_ct); nf_ct_put(found_ct);
if (collected > CONNCOUNT_GC_MAX_NODES) if (collected > CONNCOUNT_GC_MAX_NODES)
return false; break;
} }
spin_lock_bh(&list->list_lock);
if (!list->count) { if (!list->count) {
list->dead = true; list->dead = true;
ret = true; ret = true;
} }
spin_unlock_bh(&list->list_lock); spin_unlock(&list->list_lock);
return ret; return ret;
} }
...@@ -478,6 +468,7 @@ static void tree_gc_worker(struct work_struct *work) ...@@ -478,6 +468,7 @@ static void tree_gc_worker(struct work_struct *work)
tree = data->gc_tree % CONNCOUNT_SLOTS; tree = data->gc_tree % CONNCOUNT_SLOTS;
root = &data->root[tree]; root = &data->root[tree];
local_bh_disable();
rcu_read_lock(); rcu_read_lock();
for (node = rb_first(root); node != NULL; node = rb_next(node)) { for (node = rb_first(root); node != NULL; node = rb_next(node)) {
rbconn = rb_entry(node, struct nf_conncount_rb, node); rbconn = rb_entry(node, struct nf_conncount_rb, node);
...@@ -485,6 +476,9 @@ static void tree_gc_worker(struct work_struct *work) ...@@ -485,6 +476,9 @@ static void tree_gc_worker(struct work_struct *work)
gc_count++; gc_count++;
} }
rcu_read_unlock(); rcu_read_unlock();
local_bh_enable();
cond_resched();
spin_lock_bh(&nf_conncount_locks[tree]); spin_lock_bh(&nf_conncount_locks[tree]);
if (gc_count < ARRAY_SIZE(gc_nodes)) if (gc_count < ARRAY_SIZE(gc_nodes))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment