Commit 7c966435 authored by Florian Westphal's avatar Florian Westphal Committed by Pablo Neira Ayuso

netfilter: move nat hlist_head to nf_conn

The nat extension structure is 32bytes in size on x86_64:

struct nf_conn_nat {
        struct hlist_node          bysource;             /*     0    16 */
        struct nf_conn *           ct;                   /*    16     8 */
        union nf_conntrack_nat_help help;                /*    24     4 */
        int                        masq_index;           /*    28     4 */
        /* size: 32, cachelines: 1, members: 4 */
        /* last cacheline: 32 bytes */
};

The hlist is needed to quickly check for possible tuple collisions
when installing a new nat binding. Storing this in the extension
area has two drawbacks:

1. We need ct backpointer to get the conntrack struct from the extension.
2. When reallocation of extension area occurs we need to fixup the bysource
   hash head via hlist_replace_rcu.

We can avoid both by placing the hlist_head in nf_conn and place nf_conn in
the bysource hash rather than the extenstion.

We can also remove the ->move support; no other extension needs it.

Moving the entire nat extension into nf_conn would be possible as well but
then we have to add yet another callback for deletion from the bysource
hash table rather than just using nat extension ->destroy hook for this.

nf_conn size doesn't increase due to aligment, followup patch replaces
hlist_node with single pointer.
Signed-off-by: default avatarFlorian Westphal <fw@strlen.de>
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parent 242922a0
...@@ -117,6 +117,9 @@ struct nf_conn { ...@@ -117,6 +117,9 @@ struct nf_conn {
/* Extensions */ /* Extensions */
struct nf_ct_ext *ext; struct nf_ct_ext *ext;
#if IS_ENABLED(CONFIG_NF_NAT)
struct hlist_node nat_bysource;
#endif
/* Storage reserved for other modules, must be the last member */ /* Storage reserved for other modules, must be the last member */
union nf_conntrack_proto proto; union nf_conntrack_proto proto;
}; };
......
...@@ -99,9 +99,6 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, ...@@ -99,9 +99,6 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
struct nf_ct_ext_type { struct nf_ct_ext_type {
/* Destroys relationships (can be NULL). */ /* Destroys relationships (can be NULL). */
void (*destroy)(struct nf_conn *ct); void (*destroy)(struct nf_conn *ct);
/* Called when realloacted (can be NULL).
Contents has already been moved. */
void (*move)(void *new, void *old);
enum nf_ct_ext_id id; enum nf_ct_ext_id id;
......
...@@ -29,8 +29,6 @@ struct nf_conn; ...@@ -29,8 +29,6 @@ struct nf_conn;
/* The structure embedded in the conntrack structure. */ /* The structure embedded in the conntrack structure. */
struct nf_conn_nat { struct nf_conn_nat {
struct hlist_node bysource;
struct nf_conn *ct;
union nf_conntrack_nat_help help; union nf_conntrack_nat_help help;
#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \ #if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \
IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6) IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6)
......
...@@ -73,7 +73,7 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, ...@@ -73,7 +73,7 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
size_t var_alloc_len, gfp_t gfp) size_t var_alloc_len, gfp_t gfp)
{ {
struct nf_ct_ext *old, *new; struct nf_ct_ext *old, *new;
int i, newlen, newoff; int newlen, newoff;
struct nf_ct_ext_type *t; struct nf_ct_ext_type *t;
/* Conntrack must not be confirmed to avoid races on reallocation. */ /* Conntrack must not be confirmed to avoid races on reallocation. */
...@@ -99,19 +99,8 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, ...@@ -99,19 +99,8 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
return NULL; return NULL;
if (new != old) { if (new != old) {
for (i = 0; i < NF_CT_EXT_NUM; i++) {
if (!__nf_ct_ext_exist(old, i))
continue;
rcu_read_lock();
t = rcu_dereference(nf_ct_ext_types[i]);
if (t && t->move)
t->move((void *)new + new->offset[i],
(void *)old + old->offset[i]);
rcu_read_unlock();
}
kfree_rcu(old, rcu); kfree_rcu(old, rcu);
ct->ext = new; rcu_assign_pointer(ct->ext, new);
} }
new->offset[id] = newoff; new->offset[id] = newoff;
......
...@@ -198,11 +198,9 @@ find_appropriate_src(struct net *net, ...@@ -198,11 +198,9 @@ find_appropriate_src(struct net *net,
const struct nf_nat_range *range) const struct nf_nat_range *range)
{ {
unsigned int h = hash_by_src(net, tuple); unsigned int h = hash_by_src(net, tuple);
const struct nf_conn_nat *nat;
const struct nf_conn *ct; const struct nf_conn *ct;
hlist_for_each_entry_rcu(nat, &nf_nat_bysource[h], bysource) { hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) {
ct = nat->ct;
if (same_src(ct, tuple) && if (same_src(ct, tuple) &&
net_eq(net, nf_ct_net(ct)) && net_eq(net, nf_ct_net(ct)) &&
nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
...@@ -435,8 +433,7 @@ nf_nat_setup_info(struct nf_conn *ct, ...@@ -435,8 +433,7 @@ nf_nat_setup_info(struct nf_conn *ct,
spin_lock_bh(&nf_nat_lock); spin_lock_bh(&nf_nat_lock);
/* nf_conntrack_alter_reply might re-allocate extension aera */ /* nf_conntrack_alter_reply might re-allocate extension aera */
nat = nfct_nat(ct); nat = nfct_nat(ct);
nat->ct = ct; hlist_add_head_rcu(&ct->nat_bysource,
hlist_add_head_rcu(&nat->bysource,
&nf_nat_bysource[srchash]); &nf_nat_bysource[srchash]);
spin_unlock_bh(&nf_nat_lock); spin_unlock_bh(&nf_nat_lock);
} }
...@@ -543,7 +540,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) ...@@ -543,7 +540,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
if (nf_nat_proto_remove(ct, data)) if (nf_nat_proto_remove(ct, data))
return 1; return 1;
if (!nat || !nat->ct) if (!nat)
return 0; return 0;
/* This netns is being destroyed, and conntrack has nat null binding. /* This netns is being destroyed, and conntrack has nat null binding.
...@@ -556,9 +553,8 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) ...@@ -556,9 +553,8 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
return 1; return 1;
spin_lock_bh(&nf_nat_lock); spin_lock_bh(&nf_nat_lock);
hlist_del_rcu(&nat->bysource); hlist_del_rcu(&ct->nat_bysource);
ct->status &= ~IPS_NAT_DONE_MASK; ct->status &= ~IPS_NAT_DONE_MASK;
nat->ct = NULL;
spin_unlock_bh(&nf_nat_lock); spin_unlock_bh(&nf_nat_lock);
add_timer(&ct->timeout); add_timer(&ct->timeout);
...@@ -688,27 +684,13 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) ...@@ -688,27 +684,13 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{ {
struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
if (nat == NULL || nat->ct == NULL) if (!nat)
return; return;
NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE); NF_CT_ASSERT(ct->status & IPS_SRC_NAT_DONE);
spin_lock_bh(&nf_nat_lock);
hlist_del_rcu(&nat->bysource);
spin_unlock_bh(&nf_nat_lock);
}
static void nf_nat_move_storage(void *new, void *old)
{
struct nf_conn_nat *new_nat = new;
struct nf_conn_nat *old_nat = old;
struct nf_conn *ct = old_nat->ct;
if (!ct || !(ct->status & IPS_SRC_NAT_DONE))
return;
spin_lock_bh(&nf_nat_lock); spin_lock_bh(&nf_nat_lock);
hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); hlist_del_rcu(&ct->nat_bysource);
spin_unlock_bh(&nf_nat_lock); spin_unlock_bh(&nf_nat_lock);
} }
...@@ -716,7 +698,6 @@ static struct nf_ct_ext_type nat_extend __read_mostly = { ...@@ -716,7 +698,6 @@ static struct nf_ct_ext_type nat_extend __read_mostly = {
.len = sizeof(struct nf_conn_nat), .len = sizeof(struct nf_conn_nat),
.align = __alignof__(struct nf_conn_nat), .align = __alignof__(struct nf_conn_nat),
.destroy = nf_nat_cleanup_conntrack, .destroy = nf_nat_cleanup_conntrack,
.move = nf_nat_move_storage,
.id = NF_CT_EXT_NAT, .id = NF_CT_EXT_NAT,
.flags = NF_CT_EXT_F_PREALLOC, .flags = NF_CT_EXT_F_PREALLOC,
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment