Commit 5bfddbd4 authored by Eric Dumazet's avatar Eric Dumazet Committed by Patrick McHardy

netfilter: nf_conntrack: IPS_UNTRACKED bit

NOTRACK makes all cpus share a cache line on nf_conntrack_untracked
twice per packet. This is bad for performance.
__read_mostly annotation is also a bad choice.

This patch introduces IPS_UNTRACKED bit so that we can use later a
per_cpu untrack structure more easily.

A new helper, nf_ct_untracked_get() returns a pointer to
nf_conntrack_untracked.

Another one, nf_ct_untracked_status_or() is used by nf_nat_init() to add
IPS_NAT_DONE_MASK bits to untracked status.

nf_ct_is_untracked() prototype is changed to work on a nf_conn pointer.
Signed-off-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: default avatarPatrick McHardy <kaber@trash.net>
parent 339bb99e
...@@ -76,6 +76,10 @@ enum ip_conntrack_status { ...@@ -76,6 +76,10 @@ enum ip_conntrack_status {
/* Conntrack is a template */ /* Conntrack is a template */
IPS_TEMPLATE_BIT = 11, IPS_TEMPLATE_BIT = 11,
IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT), IPS_TEMPLATE = (1 << IPS_TEMPLATE_BIT),
/* Conntrack is a fake untracked entry */
IPS_UNTRACKED_BIT = 12,
IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
}; };
/* Connection tracking event types */ /* Connection tracking event types */
......
...@@ -261,7 +261,13 @@ extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct, ...@@ -261,7 +261,13 @@ extern s16 (*nf_ct_nat_offset)(const struct nf_conn *ct,
u32 seq); u32 seq);
/* Fake conntrack entry for untracked connections */ /* Fake conntrack entry for untracked connections */
extern struct nf_conn nf_conntrack_untracked; static inline struct nf_conn *nf_ct_untracked_get(void)
{
extern struct nf_conn nf_conntrack_untracked;
return &nf_conntrack_untracked;
}
extern void nf_ct_untracked_status_or(unsigned long bits);
/* Iterate over all conntracks: if iter returns true, it's deleted. */ /* Iterate over all conntracks: if iter returns true, it's deleted. */
extern void extern void
...@@ -289,9 +295,9 @@ static inline int nf_ct_is_dying(struct nf_conn *ct) ...@@ -289,9 +295,9 @@ static inline int nf_ct_is_dying(struct nf_conn *ct)
return test_bit(IPS_DYING_BIT, &ct->status); return test_bit(IPS_DYING_BIT, &ct->status);
} }
static inline int nf_ct_is_untracked(const struct sk_buff *skb) static inline int nf_ct_is_untracked(const struct nf_conn *ct)
{ {
return (skb->nfct == &nf_conntrack_untracked.ct_general); return test_bit(IPS_UNTRACKED_BIT, &ct->status);
} }
extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); extern int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp);
......
...@@ -60,7 +60,7 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) ...@@ -60,7 +60,7 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
struct nf_conn *ct = (struct nf_conn *)skb->nfct; struct nf_conn *ct = (struct nf_conn *)skb->nfct;
int ret = NF_ACCEPT; int ret = NF_ACCEPT;
if (ct && ct != &nf_conntrack_untracked) { if (ct && !nf_ct_is_untracked(ct)) {
if (!nf_ct_is_confirmed(ct)) if (!nf_ct_is_confirmed(ct))
ret = __nf_conntrack_confirm(skb); ret = __nf_conntrack_confirm(skb);
if (likely(ret == NF_ACCEPT)) if (likely(ret == NF_ACCEPT))
......
...@@ -742,7 +742,7 @@ static int __init nf_nat_init(void) ...@@ -742,7 +742,7 @@ static int __init nf_nat_init(void)
spin_unlock_bh(&nf_nat_lock); spin_unlock_bh(&nf_nat_lock);
/* Initialize fake conntrack so that NAT will skip it */ /* Initialize fake conntrack so that NAT will skip it */
nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK; nf_ct_untracked_status_or(IPS_NAT_DONE_MASK);
l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET); l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
......
...@@ -98,7 +98,7 @@ nf_nat_fn(unsigned int hooknum, ...@@ -98,7 +98,7 @@ nf_nat_fn(unsigned int hooknum,
return NF_ACCEPT; return NF_ACCEPT;
/* Don't try to NAT if this packet is not conntracked */ /* Don't try to NAT if this packet is not conntracked */
if (ct == &nf_conntrack_untracked) if (nf_ct_is_untracked(ct))
return NF_ACCEPT; return NF_ACCEPT;
nat = nfct_nat(ct); nat = nfct_nat(ct);
......
...@@ -208,7 +208,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, ...@@ -208,7 +208,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl,
type = icmp6h->icmp6_type - 130; type = icmp6h->icmp6_type - 130;
if (type >= 0 && type < sizeof(noct_valid_new) && if (type >= 0 && type < sizeof(noct_valid_new) &&
noct_valid_new[type]) { noct_valid_new[type]) {
skb->nfct = &nf_conntrack_untracked.ct_general; skb->nfct = &nf_ct_untracked_get()->ct_general;
skb->nfctinfo = IP_CT_NEW; skb->nfctinfo = IP_CT_NEW;
nf_conntrack_get(skb->nfct); nf_conntrack_get(skb->nfct);
return NF_ACCEPT; return NF_ACCEPT;
......
...@@ -62,7 +62,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); ...@@ -62,7 +62,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly; unsigned int nf_conntrack_max __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_max); EXPORT_SYMBOL_GPL(nf_conntrack_max);
struct nf_conn nf_conntrack_untracked __read_mostly; struct nf_conn nf_conntrack_untracked;
EXPORT_SYMBOL_GPL(nf_conntrack_untracked); EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
static int nf_conntrack_hash_rnd_initted; static int nf_conntrack_hash_rnd_initted;
...@@ -1321,6 +1321,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); ...@@ -1321,6 +1321,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
&nf_conntrack_htable_size, 0600); &nf_conntrack_htable_size, 0600);
void nf_ct_untracked_status_or(unsigned long bits)
{
nf_conntrack_untracked.status |= bits;
}
EXPORT_SYMBOL_GPL(nf_ct_untracked_status_or);
static int nf_conntrack_init_init_net(void) static int nf_conntrack_init_init_net(void)
{ {
int max_factor = 8; int max_factor = 8;
...@@ -1368,8 +1374,7 @@ static int nf_conntrack_init_init_net(void) ...@@ -1368,8 +1374,7 @@ static int nf_conntrack_init_init_net(void)
#endif #endif
atomic_set(&nf_conntrack_untracked.ct_general.use, 1); atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
/* - and look it like as a confirmed connection */ /* - and look it like as a confirmed connection */
set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
return 0; return 0;
#ifdef CONFIG_NF_CONNTRACK_ZONES #ifdef CONFIG_NF_CONNTRACK_ZONES
......
...@@ -480,7 +480,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) ...@@ -480,7 +480,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
int err; int err;
/* ignore our fake conntrack entry */ /* ignore our fake conntrack entry */
if (ct == &nf_conntrack_untracked) if (nf_ct_is_untracked(ct))
return 0; return 0;
if (events & (1 << IPCT_DESTROY)) { if (events & (1 << IPCT_DESTROY)) {
......
...@@ -67,7 +67,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par) ...@@ -67,7 +67,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
return -EINVAL; return -EINVAL;
if (info->flags & XT_CT_NOTRACK) { if (info->flags & XT_CT_NOTRACK) {
ct = &nf_conntrack_untracked; ct = nf_ct_untracked_get();
atomic_inc(&ct->ct_general.use); atomic_inc(&ct->ct_general.use);
goto out; goto out;
} }
...@@ -132,7 +132,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par) ...@@ -132,7 +132,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
struct nf_conn *ct = info->ct; struct nf_conn *ct = info->ct;
struct nf_conn_help *help; struct nf_conn_help *help;
if (ct != &nf_conntrack_untracked) { if (!nf_ct_is_untracked(ct)) {
help = nfct_help(ct); help = nfct_help(ct);
if (help) if (help)
module_put(help->helper->me); module_put(help->helper->me);
......
...@@ -23,7 +23,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -23,7 +23,7 @@ notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
If there is a real ct entry correspondig to this packet, If there is a real ct entry correspondig to this packet,
it'll hang aroun till timing out. We don't deal with it it'll hang aroun till timing out. We don't deal with it
for performance reasons. JK */ for performance reasons. JK */
skb->nfct = &nf_conntrack_untracked.ct_general; skb->nfct = &nf_ct_untracked_get()->ct_general;
skb->nfctinfo = IP_CT_NEW; skb->nfctinfo = IP_CT_NEW;
nf_conntrack_get(skb->nfct); nf_conntrack_get(skb->nfct);
......
...@@ -104,7 +104,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -104,7 +104,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
#ifdef WITH_CONNTRACK #ifdef WITH_CONNTRACK
/* Avoid counting cloned packets towards the original connection. */ /* Avoid counting cloned packets towards the original connection. */
nf_conntrack_put(skb->nfct); nf_conntrack_put(skb->nfct);
skb->nfct = &nf_conntrack_untracked.ct_general; skb->nfct = &nf_ct_untracked_get()->ct_general;
skb->nfctinfo = IP_CT_NEW; skb->nfctinfo = IP_CT_NEW;
nf_conntrack_get(skb->nfct); nf_conntrack_get(skb->nfct);
#endif #endif
...@@ -177,7 +177,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -177,7 +177,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
#ifdef WITH_CONNTRACK #ifdef WITH_CONNTRACK
nf_conntrack_put(skb->nfct); nf_conntrack_put(skb->nfct);
skb->nfct = &nf_conntrack_untracked.ct_general; skb->nfct = &nf_ct_untracked_get()->ct_general;
skb->nfctinfo = IP_CT_NEW; skb->nfctinfo = IP_CT_NEW;
nf_conntrack_get(skb->nfct); nf_conntrack_get(skb->nfct);
#endif #endif
......
...@@ -120,7 +120,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par) ...@@ -120,7 +120,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (ct == NULL) if (ct == NULL)
return false; return false;
if (ct == &nf_conntrack_untracked) if (nf_ct_is_untracked(ct))
return false; return false;
if (ct->master) if (ct->master)
......
...@@ -123,11 +123,12 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, ...@@ -123,11 +123,12 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
ct = nf_ct_get(skb, &ctinfo); ct = nf_ct_get(skb, &ctinfo);
if (ct == &nf_conntrack_untracked) if (ct) {
if (nf_ct_is_untracked(ct))
statebit = XT_CONNTRACK_STATE_UNTRACKED; statebit = XT_CONNTRACK_STATE_UNTRACKED;
else if (ct != NULL)
statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
else else
statebit = XT_CONNTRACK_STATE_BIT(ctinfo);
} else
statebit = XT_CONNTRACK_STATE_INVALID; statebit = XT_CONNTRACK_STATE_INVALID;
if (info->match_flags & XT_CONNTRACK_STATE) { if (info->match_flags & XT_CONNTRACK_STATE) {
......
...@@ -127,7 +127,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, ...@@ -127,7 +127,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
* reply packet of an established SNAT-ted connection. */ * reply packet of an established SNAT-ted connection. */
ct = nf_ct_get(skb, &ctinfo); ct = nf_ct_get(skb, &ctinfo);
if (ct && (ct != &nf_conntrack_untracked) && if (ct && !nf_ct_is_untracked(ct) &&
((iph->protocol != IPPROTO_ICMP && ((iph->protocol != IPPROTO_ICMP &&
ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) || ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) ||
(iph->protocol == IPPROTO_ICMP && (iph->protocol == IPPROTO_ICMP &&
......
...@@ -26,14 +26,16 @@ state_mt(const struct sk_buff *skb, struct xt_action_param *par) ...@@ -26,14 +26,16 @@ state_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct xt_state_info *sinfo = par->matchinfo; const struct xt_state_info *sinfo = par->matchinfo;
enum ip_conntrack_info ctinfo; enum ip_conntrack_info ctinfo;
unsigned int statebit; unsigned int statebit;
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
if (nf_ct_is_untracked(skb)) if (!ct)
statebit = XT_STATE_UNTRACKED;
else if (!nf_ct_get(skb, &ctinfo))
statebit = XT_STATE_INVALID; statebit = XT_STATE_INVALID;
else {
if (nf_ct_is_untracked(ct))
statebit = XT_STATE_UNTRACKED;
else else
statebit = XT_STATE_BIT(ctinfo); statebit = XT_STATE_BIT(ctinfo);
}
return (sinfo->statemask & statebit); return (sinfo->statemask & statebit);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment