Commit 67cc570e authored by Pablo Neira Ayuso's avatar Pablo Neira Ayuso

netfilter: nf_tables: coalesce multiple notifications into one skbuff

On x86_64, each notification results in one skbuff allocation which
consumes at least 768 bytes due to the skbuff overhead.

This patch coalesces several notifications into one single skbuff, so
each notification consumes at least ~211 bytes, that ~3.5 times less
memory consumption. As a result, this is reducing the chances to exhaust
the netlink socket receive buffer.

Rule of thumb is that each notification batch only contains netlink
messages whose report flag is the same, nfnetlink_send() requires this
to do appropriate delivery to userspace, either via unicast (echo
mode) or multicast (monitor mode).

The skbuff control buffer is used to annotate the report flag for later
handling at the new coalescing routine.

The batch skbuff notification size is NLMSG_GOODSIZE, using a larger
skbuff would allow for more socket receiver buffer savings (to amortize
the cost of the skbuff even more), however, going over that size might
break userspace applications, so let's be conservative and stick to
NLMSG_GOODSIZE.
Reported-by: default avatarPhil Sutter <phil@nwl.cc>
Acked-by: default avatarPhil Sutter <phil@nwl.cc>
Signed-off-by: default avatarPablo Neira Ayuso <pablo@netfilter.org>
parent 1cc5ef91
...@@ -8,6 +8,7 @@ struct netns_nftables { ...@@ -8,6 +8,7 @@ struct netns_nftables {
struct list_head tables; struct list_head tables;
struct list_head commit_list; struct list_head commit_list;
struct list_head module_list; struct list_head module_list;
struct list_head notify_list;
struct mutex commit_mutex; struct mutex commit_mutex;
unsigned int base_seq; unsigned int base_seq;
u8 gencursor; u8 gencursor;
......
...@@ -684,6 +684,18 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net, ...@@ -684,6 +684,18 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
return -1; return -1;
} }
struct nftnl_skb_parms {
bool report;
};
#define NFT_CB(skb) (*(struct nftnl_skb_parms*)&((skb)->cb))
static void nft_notify_enqueue(struct sk_buff *skb, bool report,
struct list_head *notify_list)
{
NFT_CB(skb).report = report;
list_add_tail(&skb->list, notify_list);
}
static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
{ {
struct sk_buff *skb; struct sk_buff *skb;
...@@ -715,8 +727,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event) ...@@ -715,8 +727,7 @@ static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
goto err; goto err;
} }
nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
ctx->report, GFP_KERNEL);
return; return;
err: err:
nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
...@@ -1468,8 +1479,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event) ...@@ -1468,8 +1479,7 @@ static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
goto err; goto err;
} }
nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
ctx->report, GFP_KERNEL);
return; return;
err: err:
nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
...@@ -2807,8 +2817,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx, ...@@ -2807,8 +2817,7 @@ static void nf_tables_rule_notify(const struct nft_ctx *ctx,
goto err; goto err;
} }
nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
ctx->report, GFP_KERNEL);
return; return;
err: err:
nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
...@@ -3837,8 +3846,7 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx, ...@@ -3837,8 +3846,7 @@ static void nf_tables_set_notify(const struct nft_ctx *ctx,
goto err; goto err;
} }
nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, ctx->report, nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
gfp_flags);
return; return;
err: err:
nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS); nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
...@@ -4959,8 +4967,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx, ...@@ -4959,8 +4967,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
goto err; goto err;
} }
nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report, nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
GFP_KERNEL);
return; return;
err: err:
nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
...@@ -6275,7 +6282,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table, ...@@ -6275,7 +6282,7 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
goto err; goto err;
} }
nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp); nft_notify_enqueue(skb, report, &net->nft.notify_list);
return; return;
err: err:
nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS); nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
...@@ -7085,8 +7092,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx, ...@@ -7085,8 +7092,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
goto err; goto err;
} }
nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES, nft_notify_enqueue(skb, ctx->report, &ctx->net->nft.notify_list);
ctx->report, GFP_KERNEL);
return; return;
err: err:
nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS); nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
...@@ -7695,6 +7701,41 @@ static void nf_tables_commit_release(struct net *net) ...@@ -7695,6 +7701,41 @@ static void nf_tables_commit_release(struct net *net)
mutex_unlock(&net->nft.commit_mutex); mutex_unlock(&net->nft.commit_mutex);
} }
static void nft_commit_notify(struct net *net, u32 portid)
{
struct sk_buff *batch_skb = NULL, *nskb, *skb;
unsigned char *data;
int len;
list_for_each_entry_safe(skb, nskb, &net->nft.notify_list, list) {
if (!batch_skb) {
new_batch:
batch_skb = skb;
len = NLMSG_GOODSIZE - skb->len;
list_del(&skb->list);
continue;
}
len -= skb->len;
if (len > 0 && NFT_CB(skb).report == NFT_CB(batch_skb).report) {
data = skb_put(batch_skb, skb->len);
memcpy(data, skb->data, skb->len);
list_del(&skb->list);
kfree_skb(skb);
continue;
}
nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES,
NFT_CB(batch_skb).report, GFP_KERNEL);
goto new_batch;
}
if (batch_skb) {
nfnetlink_send(batch_skb, net, portid, NFNLGRP_NFTABLES,
NFT_CB(batch_skb).report, GFP_KERNEL);
}
WARN_ON_ONCE(!list_empty(&net->nft.notify_list));
}
static int nf_tables_commit(struct net *net, struct sk_buff *skb) static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{ {
struct nft_trans *trans, *next; struct nft_trans *trans, *next;
...@@ -7897,6 +7938,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) ...@@ -7897,6 +7938,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
} }
} }
nft_commit_notify(net, NETLINK_CB(skb).portid);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
nf_tables_commit_release(net); nf_tables_commit_release(net);
...@@ -8721,6 +8763,7 @@ static int __net_init nf_tables_init_net(struct net *net) ...@@ -8721,6 +8763,7 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&net->nft.tables); INIT_LIST_HEAD(&net->nft.tables);
INIT_LIST_HEAD(&net->nft.commit_list); INIT_LIST_HEAD(&net->nft.commit_list);
INIT_LIST_HEAD(&net->nft.module_list); INIT_LIST_HEAD(&net->nft.module_list);
INIT_LIST_HEAD(&net->nft.notify_list);
mutex_init(&net->nft.commit_mutex); mutex_init(&net->nft.commit_mutex);
net->nft.base_seq = 1; net->nft.base_seq = 1;
net->nft.validate_state = NFT_VALIDATE_SKIP; net->nft.validate_state = NFT_VALIDATE_SKIP;
...@@ -8737,6 +8780,7 @@ static void __net_exit nf_tables_exit_net(struct net *net) ...@@ -8737,6 +8780,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
mutex_unlock(&net->nft.commit_mutex); mutex_unlock(&net->nft.commit_mutex);
WARN_ON_ONCE(!list_empty(&net->nft.tables)); WARN_ON_ONCE(!list_empty(&net->nft.tables));
WARN_ON_ONCE(!list_empty(&net->nft.module_list)); WARN_ON_ONCE(!list_empty(&net->nft.module_list));
WARN_ON_ONCE(!list_empty(&net->nft.notify_list));
} }
static struct pernet_operations nf_tables_net_ops = { static struct pernet_operations nf_tables_net_ops = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment