Commit c80f10bc authored by Pablo Neira Ayuso's avatar Pablo Neira Ayuso

netfilter: nf_conncount: speculative garbage collection on empty lists

Instead of removing a empty list node that might be reintroduced soon
thereafter, tentatively place the empty list node on the list passed to
tree_nodes_free(), then re-check if the list is empty again before erasing
it from the tree.

[ Florian: rebase on top of pending nf_conncount fixes ]

Fixes: 5c789e13 ("netfilter: nf_conncount: Add list lock and gc worker, and RCU for init tree search")
Reviewed-by: default avatarShawn Bohrer <sbohrer@cloudflare.com>
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parent 2f971a8f
...@@ -9,7 +9,6 @@ struct nf_conncount_list { ...@@ -9,7 +9,6 @@ struct nf_conncount_list {
spinlock_t list_lock; spinlock_t list_lock;
struct list_head head; /* connections with the same filtering key */ struct list_head head; /* connections with the same filtering key */
unsigned int count; /* length of list */ unsigned int count; /* length of list */
bool dead;
}; };
struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family, struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family,
......
...@@ -81,27 +81,20 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen) ...@@ -81,27 +81,20 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
return memcmp(a, b, klen * sizeof(u32)); return memcmp(a, b, klen * sizeof(u32));
} }
static bool conn_free(struct nf_conncount_list *list, static void conn_free(struct nf_conncount_list *list,
struct nf_conncount_tuple *conn) struct nf_conncount_tuple *conn)
{ {
bool free_entry = false;
lockdep_assert_held(&list->list_lock); lockdep_assert_held(&list->list_lock);
list->count--; list->count--;
list_del(&conn->node); list_del(&conn->node);
if (list->count == 0) {
list->dead = true;
free_entry = true;
}
kmem_cache_free(conncount_conn_cachep, conn); kmem_cache_free(conncount_conn_cachep, conn);
return free_entry;
} }
static const struct nf_conntrack_tuple_hash * static const struct nf_conntrack_tuple_hash *
find_or_evict(struct net *net, struct nf_conncount_list *list, find_or_evict(struct net *net, struct nf_conncount_list *list,
struct nf_conncount_tuple *conn, bool *free_entry) struct nf_conncount_tuple *conn)
{ {
const struct nf_conntrack_tuple_hash *found; const struct nf_conntrack_tuple_hash *found;
unsigned long a, b; unsigned long a, b;
...@@ -121,7 +114,7 @@ find_or_evict(struct net *net, struct nf_conncount_list *list, ...@@ -121,7 +114,7 @@ find_or_evict(struct net *net, struct nf_conncount_list *list,
*/ */
age = a - b; age = a - b;
if (conn->cpu == cpu || age >= 2) { if (conn->cpu == cpu || age >= 2) {
*free_entry = conn_free(list, conn); conn_free(list, conn);
return ERR_PTR(-ENOENT); return ERR_PTR(-ENOENT);
} }
...@@ -137,14 +130,13 @@ static int __nf_conncount_add(struct net *net, ...@@ -137,14 +130,13 @@ static int __nf_conncount_add(struct net *net,
struct nf_conncount_tuple *conn, *conn_n; struct nf_conncount_tuple *conn, *conn_n;
struct nf_conn *found_ct; struct nf_conn *found_ct;
unsigned int collect = 0; unsigned int collect = 0;
bool free_entry = false;
/* check the saved connections */ /* check the saved connections */
list_for_each_entry_safe(conn, conn_n, &list->head, node) { list_for_each_entry_safe(conn, conn_n, &list->head, node) {
if (collect > CONNCOUNT_GC_MAX_NODES) if (collect > CONNCOUNT_GC_MAX_NODES)
break; break;
found = find_or_evict(net, list, conn, &free_entry); found = find_or_evict(net, list, conn);
if (IS_ERR(found)) { if (IS_ERR(found)) {
/* Not found, but might be about to be confirmed */ /* Not found, but might be about to be confirmed */
if (PTR_ERR(found) == -EAGAIN) { if (PTR_ERR(found) == -EAGAIN) {
...@@ -221,7 +213,6 @@ void nf_conncount_list_init(struct nf_conncount_list *list) ...@@ -221,7 +213,6 @@ void nf_conncount_list_init(struct nf_conncount_list *list)
spin_lock_init(&list->list_lock); spin_lock_init(&list->list_lock);
INIT_LIST_HEAD(&list->head); INIT_LIST_HEAD(&list->head);
list->count = 0; list->count = 0;
list->dead = false;
} }
EXPORT_SYMBOL_GPL(nf_conncount_list_init); EXPORT_SYMBOL_GPL(nf_conncount_list_init);
...@@ -233,7 +224,6 @@ bool nf_conncount_gc_list(struct net *net, ...@@ -233,7 +224,6 @@ bool nf_conncount_gc_list(struct net *net,
struct nf_conncount_tuple *conn, *conn_n; struct nf_conncount_tuple *conn, *conn_n;
struct nf_conn *found_ct; struct nf_conn *found_ct;
unsigned int collected = 0; unsigned int collected = 0;
bool free_entry = false;
bool ret = false; bool ret = false;
/* don't bother if other cpu is already doing GC */ /* don't bother if other cpu is already doing GC */
...@@ -241,15 +231,10 @@ bool nf_conncount_gc_list(struct net *net, ...@@ -241,15 +231,10 @@ bool nf_conncount_gc_list(struct net *net,
return false; return false;
list_for_each_entry_safe(conn, conn_n, &list->head, node) { list_for_each_entry_safe(conn, conn_n, &list->head, node) {
found = find_or_evict(net, list, conn, &free_entry); found = find_or_evict(net, list, conn);
if (IS_ERR(found)) { if (IS_ERR(found)) {
if (PTR_ERR(found) == -ENOENT) { if (PTR_ERR(found) == -ENOENT)
if (free_entry) {
spin_unlock(&list->list_lock);
return true;
}
collected++; collected++;
}
continue; continue;
} }
...@@ -260,10 +245,7 @@ bool nf_conncount_gc_list(struct net *net, ...@@ -260,10 +245,7 @@ bool nf_conncount_gc_list(struct net *net,
* closed already -> ditch it * closed already -> ditch it
*/ */
nf_ct_put(found_ct); nf_ct_put(found_ct);
if (conn_free(list, conn)) { conn_free(list, conn);
spin_unlock(&list->list_lock);
return true;
}
collected++; collected++;
continue; continue;
} }
...@@ -273,10 +255,8 @@ bool nf_conncount_gc_list(struct net *net, ...@@ -273,10 +255,8 @@ bool nf_conncount_gc_list(struct net *net,
break; break;
} }
if (!list->count) { if (!list->count)
list->dead = true;
ret = true; ret = true;
}
spin_unlock(&list->list_lock); spin_unlock(&list->list_lock);
return ret; return ret;
...@@ -291,6 +271,7 @@ static void __tree_nodes_free(struct rcu_head *h) ...@@ -291,6 +271,7 @@ static void __tree_nodes_free(struct rcu_head *h)
kmem_cache_free(conncount_rb_cachep, rbconn); kmem_cache_free(conncount_rb_cachep, rbconn);
} }
/* caller must hold tree nf_conncount_locks[] lock */
static void tree_nodes_free(struct rb_root *root, static void tree_nodes_free(struct rb_root *root,
struct nf_conncount_rb *gc_nodes[], struct nf_conncount_rb *gc_nodes[],
unsigned int gc_count) unsigned int gc_count)
...@@ -300,8 +281,10 @@ static void tree_nodes_free(struct rb_root *root, ...@@ -300,8 +281,10 @@ static void tree_nodes_free(struct rb_root *root,
while (gc_count) { while (gc_count) {
rbconn = gc_nodes[--gc_count]; rbconn = gc_nodes[--gc_count];
spin_lock(&rbconn->list.list_lock); spin_lock(&rbconn->list.list_lock);
rb_erase(&rbconn->node, root); if (!rbconn->list.count) {
call_rcu(&rbconn->rcu_head, __tree_nodes_free); rb_erase(&rbconn->node, root);
call_rcu(&rbconn->rcu_head, __tree_nodes_free);
}
spin_unlock(&rbconn->list.list_lock); spin_unlock(&rbconn->list.list_lock);
} }
} }
...@@ -318,7 +301,6 @@ insert_tree(struct net *net, ...@@ -318,7 +301,6 @@ insert_tree(struct net *net,
struct rb_root *root, struct rb_root *root,
unsigned int hash, unsigned int hash,
const u32 *key, const u32 *key,
u8 keylen,
const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone) const struct nf_conntrack_zone *zone)
{ {
...@@ -327,6 +309,7 @@ insert_tree(struct net *net, ...@@ -327,6 +309,7 @@ insert_tree(struct net *net,
struct nf_conncount_rb *rbconn; struct nf_conncount_rb *rbconn;
struct nf_conncount_tuple *conn; struct nf_conncount_tuple *conn;
unsigned int count = 0, gc_count = 0; unsigned int count = 0, gc_count = 0;
u8 keylen = data->keylen;
bool do_gc = true; bool do_gc = true;
spin_lock_bh(&nf_conncount_locks[hash]); spin_lock_bh(&nf_conncount_locks[hash]);
...@@ -454,7 +437,7 @@ count_tree(struct net *net, ...@@ -454,7 +437,7 @@ count_tree(struct net *net,
if (!tuple) if (!tuple)
return 0; return 0;
return insert_tree(net, data, root, hash, key, keylen, tuple, zone); return insert_tree(net, data, root, hash, key, tuple, zone);
} }
static void tree_gc_worker(struct work_struct *work) static void tree_gc_worker(struct work_struct *work)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment