Commit 9dc20a64 authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter updates for net-next

The following patchset contains Netfilter updates for net-next, they are:

1) A couple of cleanups for the netfilter core hook from Eric Biederman.

2) Net namespace hook registration, also from Eric. This adds a dependency with
   the rtnl_lock. This should be fine by now but we have to keep an eye on this
   because if we ever get the per-subsys nfnl_lock before rtnl we have may
   problems in the future. But we have room to remove this in the future by
   propagating the complexity to the clients, by registering hooks for the init
   netns functions.

3) Update nf_tables to use the new net namespace hook infrastructure, also from
   Eric.

4) Three patches to refine and to address problems from the new net namespace
   hook infrastructure.

5) Switch to alternate jumpstack in xtables iff the packet is reentering. This
   only applies to a very special case, the TEE target, but Eric Dumazet
   reports that this is slowing down things for everyone else. So let's only
   switch to the alternate jumpstack if the tee target is in used through a
   static key. This batch also comes with offline precalculation of the
   jumpstack based on the callchain depth. From Florian Westphal.

6) Minimal SCTP multihoming support for our conntrack helper, from Michal
   Kubecek.

7) Reduce nf_bridge_info per skbuff scratchpad area to 32 bytes, from Florian
   Westphal.

8) Fix several checkpatch errors in bridge netfilter, from Bernhard Thaler.

9) Get rid of useless debug message in ip6t_REJECT, from Subash Abhinov.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents d1b22e4d a6cd379b
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/static_key.h> #include <linux/static_key.h>
#include <linux/netfilter_defs.h> #include <linux/netfilter_defs.h>
#include <linux/netdevice.h>
#include <net/net_namespace.h>
#ifdef CONFIG_NETFILTER #ifdef CONFIG_NETFILTER
static inline int NF_DROP_GETERR(int verdict) static inline int NF_DROP_GETERR(int verdict)
...@@ -118,6 +120,13 @@ struct nf_sockopt_ops { ...@@ -118,6 +120,13 @@ struct nf_sockopt_ops {
}; };
/* Function to register/unregister hook points. */ /* Function to register/unregister hook points. */
int nf_register_net_hook(struct net *net, const struct nf_hook_ops *ops);
void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *ops);
int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
unsigned int n);
void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
unsigned int n);
int nf_register_hook(struct nf_hook_ops *reg); int nf_register_hook(struct nf_hook_ops *reg);
void nf_unregister_hook(struct nf_hook_ops *reg); void nf_unregister_hook(struct nf_hook_ops *reg);
int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n);
...@@ -128,33 +137,26 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); ...@@ -128,33 +137,26 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n);
int nf_register_sockopt(struct nf_sockopt_ops *reg); int nf_register_sockopt(struct nf_sockopt_ops *reg);
void nf_unregister_sockopt(struct nf_sockopt_ops *reg); void nf_unregister_sockopt(struct nf_sockopt_ops *reg);
extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
#ifdef HAVE_JUMP_LABEL #ifdef HAVE_JUMP_LABEL
extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
static inline bool nf_hook_list_active(struct list_head *nf_hook_list, static inline bool nf_hook_list_active(struct list_head *hook_list,
u_int8_t pf, unsigned int hook) u_int8_t pf, unsigned int hook)
{ {
if (__builtin_constant_p(pf) && if (__builtin_constant_p(pf) &&
__builtin_constant_p(hook)) __builtin_constant_p(hook))
return static_key_false(&nf_hooks_needed[pf][hook]); return static_key_false(&nf_hooks_needed[pf][hook]);
return !list_empty(nf_hook_list); return !list_empty(hook_list);
} }
#else #else
static inline bool nf_hook_list_active(struct list_head *nf_hook_list, static inline bool nf_hook_list_active(struct list_head *hook_list,
u_int8_t pf, unsigned int hook) u_int8_t pf, unsigned int hook)
{ {
return !list_empty(nf_hook_list); return !list_empty(hook_list);
} }
#endif #endif
static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
{
return nf_hook_list_active(&nf_hooks[pf][hook], pf, hook);
}
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state);
/** /**
...@@ -172,10 +174,13 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, ...@@ -172,10 +174,13 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
int (*okfn)(struct sock *, struct sk_buff *), int (*okfn)(struct sock *, struct sk_buff *),
int thresh) int thresh)
{ {
if (nf_hooks_active(pf, hook)) { struct net *net = dev_net(indev ? indev : outdev);
struct list_head *hook_list = &net->nf.hooks[pf][hook];
if (nf_hook_list_active(hook_list, pf, hook)) {
struct nf_hook_state state; struct nf_hook_state state;
nf_hook_state_init(&state, &nf_hooks[pf][hook], hook, thresh, nf_hook_state_init(&state, hook_list, hook, thresh,
pf, indev, outdev, sk, okfn); pf, indev, outdev, sk, okfn);
return nf_hook_slow(skb, &state); return nf_hook_slow(skb, &state);
} }
...@@ -385,4 +390,15 @@ extern struct nfq_ct_hook __rcu *nfq_ct_hook; ...@@ -385,4 +390,15 @@ extern struct nfq_ct_hook __rcu *nfq_ct_hook;
static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
#endif #endif
/**
* nf_skb_duplicated - TEE target has sent a packet
*
* When a xtables target sends a packet, the OUTPUT and POSTROUTING
* hooks are traversed again, i.e. nft and xtables are invoked recursively.
*
* This is used by xtables TEE target to prevent the duplicated skb from
* being duplicated again.
*/
DECLARE_PER_CPU(bool, nf_skb_duplicated);
#endif /*__LINUX_NETFILTER_H*/ #endif /*__LINUX_NETFILTER_H*/
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/static_key.h>
#include <uapi/linux/netfilter/x_tables.h> #include <uapi/linux/netfilter/x_tables.h>
/** /**
...@@ -222,7 +223,6 @@ struct xt_table_info { ...@@ -222,7 +223,6 @@ struct xt_table_info {
* @stacksize jumps (number of user chains) can possibly be made. * @stacksize jumps (number of user chains) can possibly be made.
*/ */
unsigned int stacksize; unsigned int stacksize;
unsigned int __percpu *stackptr;
void ***jumpstack; void ***jumpstack;
unsigned char entries[0] __aligned(8); unsigned char entries[0] __aligned(8);
...@@ -281,6 +281,12 @@ void xt_free_table_info(struct xt_table_info *info); ...@@ -281,6 +281,12 @@ void xt_free_table_info(struct xt_table_info *info);
*/ */
DECLARE_PER_CPU(seqcount_t, xt_recseq); DECLARE_PER_CPU(seqcount_t, xt_recseq);
/* xt_tee_enabled - true if x_tables needs to handle reentrancy
*
* Enabled if current ip(6)tables ruleset has at least one -j TEE rule.
*/
extern struct static_key xt_tee_enabled;
/** /**
* xt_write_recseq_begin - start of a write section * xt_write_recseq_begin - start of a write section
* *
......
...@@ -17,9 +17,6 @@ enum nf_br_hook_priorities { ...@@ -17,9 +17,6 @@ enum nf_br_hook_priorities {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
#define BRNF_BRIDGED_DNAT 0x02
#define BRNF_NF_BRIDGE_PREROUTING 0x08
int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb); int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
static inline void br_drop_fake_rtable(struct sk_buff *skb) static inline void br_drop_fake_rtable(struct sk_buff *skb)
...@@ -63,8 +60,17 @@ nf_bridge_get_physoutdev(const struct sk_buff *skb) ...@@ -63,8 +60,17 @@ nf_bridge_get_physoutdev(const struct sk_buff *skb)
{ {
return skb->nf_bridge ? skb->nf_bridge->physoutdev : NULL; return skb->nf_bridge ? skb->nf_bridge->physoutdev : NULL;
} }
static inline bool nf_bridge_in_prerouting(const struct sk_buff *skb)
{
return skb->nf_bridge && skb->nf_bridge->in_prerouting;
}
#else #else
#define br_drop_fake_rtable(skb) do { } while (0) #define br_drop_fake_rtable(skb) do { } while (0)
static inline bool nf_bridge_in_prerouting(const struct sk_buff *skb)
{
return false;
}
#endif /* CONFIG_BRIDGE_NETFILTER */ #endif /* CONFIG_BRIDGE_NETFILTER */
#endif #endif
...@@ -174,17 +174,24 @@ struct nf_bridge_info { ...@@ -174,17 +174,24 @@ struct nf_bridge_info {
BRNF_PROTO_8021Q, BRNF_PROTO_8021Q,
BRNF_PROTO_PPPOE BRNF_PROTO_PPPOE
} orig_proto:8; } orig_proto:8;
bool pkt_otherhost; u8 pkt_otherhost:1;
u8 in_prerouting:1;
u8 bridged_dnat:1;
__u16 frag_max_size; __u16 frag_max_size;
unsigned int mask;
struct net_device *physindev; struct net_device *physindev;
union { union {
struct net_device *physoutdev; /* prerouting: detect dnat in orig/reply direction */
char neigh_header[8];
};
union {
__be32 ipv4_daddr; __be32 ipv4_daddr;
struct in6_addr ipv6_daddr; struct in6_addr ipv6_daddr;
/* after prerouting + nat detected: store original source
* mac since neigh resolution overwrites it, only used while
* skb is out in neigh layer.
*/
char neigh_header[8];
/* always valid & non-NULL from FORWARD on, for physdev match */
struct net_device *physoutdev;
}; };
}; };
#endif #endif
......
...@@ -14,5 +14,6 @@ struct netns_nf { ...@@ -14,5 +14,6 @@ struct netns_nf {
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
struct ctl_table_header *nf_log_dir_header; struct ctl_table_header *nf_log_dir_header;
#endif #endif
struct list_head hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
}; };
#endif #endif
...@@ -13,6 +13,8 @@ enum sctp_conntrack { ...@@ -13,6 +13,8 @@ enum sctp_conntrack {
SCTP_CONNTRACK_SHUTDOWN_SENT, SCTP_CONNTRACK_SHUTDOWN_SENT,
SCTP_CONNTRACK_SHUTDOWN_RECD, SCTP_CONNTRACK_SHUTDOWN_RECD,
SCTP_CONNTRACK_SHUTDOWN_ACK_SENT, SCTP_CONNTRACK_SHUTDOWN_ACK_SENT,
SCTP_CONNTRACK_HEARTBEAT_SENT,
SCTP_CONNTRACK_HEARTBEAT_ACKED,
SCTP_CONNTRACK_MAX SCTP_CONNTRACK_MAX
}; };
......
...@@ -92,6 +92,8 @@ enum ctattr_timeout_sctp { ...@@ -92,6 +92,8 @@ enum ctattr_timeout_sctp {
CTA_TIMEOUT_SCTP_SHUTDOWN_SENT, CTA_TIMEOUT_SCTP_SHUTDOWN_SENT,
CTA_TIMEOUT_SCTP_SHUTDOWN_RECD, CTA_TIMEOUT_SCTP_SHUTDOWN_RECD,
CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT,
CTA_TIMEOUT_SCTP_HEARTBEAT_SENT,
CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED,
__CTA_TIMEOUT_SCTP_MAX __CTA_TIMEOUT_SCTP_MAX
}; };
#define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1) #define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1)
......
...@@ -49,9 +49,9 @@ static struct ctl_table_header *brnf_sysctl_header; ...@@ -49,9 +49,9 @@ static struct ctl_table_header *brnf_sysctl_header;
static int brnf_call_iptables __read_mostly = 1; static int brnf_call_iptables __read_mostly = 1;
static int brnf_call_ip6tables __read_mostly = 1; static int brnf_call_ip6tables __read_mostly = 1;
static int brnf_call_arptables __read_mostly = 1; static int brnf_call_arptables __read_mostly = 1;
static int brnf_filter_vlan_tagged __read_mostly = 0; static int brnf_filter_vlan_tagged __read_mostly;
static int brnf_filter_pppoe_tagged __read_mostly = 0; static int brnf_filter_pppoe_tagged __read_mostly;
static int brnf_pass_vlan_indev __read_mostly = 0; static int brnf_pass_vlan_indev __read_mostly;
#else #else
#define brnf_call_iptables 1 #define brnf_call_iptables 1
#define brnf_call_ip6tables 1 #define brnf_call_ip6tables 1
...@@ -284,7 +284,7 @@ int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) ...@@ -284,7 +284,7 @@ int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
nf_bridge->neigh_header, nf_bridge->neigh_header,
ETH_HLEN-ETH_ALEN); ETH_HLEN-ETH_ALEN);
/* tell br_dev_xmit to continue with forwarding */ /* tell br_dev_xmit to continue with forwarding */
nf_bridge->mask |= BRNF_BRIDGED_DNAT; nf_bridge->bridged_dnat = 1;
/* FIXME Need to refragment */ /* FIXME Need to refragment */
ret = neigh->output(neigh, skb); ret = neigh->output(neigh, skb);
} }
...@@ -356,7 +356,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) ...@@ -356,7 +356,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
skb->pkt_type = PACKET_OTHERHOST; skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->pkt_otherhost = false; nf_bridge->pkt_otherhost = false;
} }
nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; nf_bridge->in_prerouting = 0;
if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) { if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) { if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
struct in_device *in_dev = __in_dev_get_rcu(dev); struct in_device *in_dev = __in_dev_get_rcu(dev);
...@@ -444,7 +444,7 @@ struct net_device *setup_pre_routing(struct sk_buff *skb) ...@@ -444,7 +444,7 @@ struct net_device *setup_pre_routing(struct sk_buff *skb)
nf_bridge->pkt_otherhost = true; nf_bridge->pkt_otherhost = true;
} }
nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; nf_bridge->in_prerouting = 1;
nf_bridge->physindev = skb->dev; nf_bridge->physindev = skb->dev;
skb->dev = brnf_get_logical_dev(skb, skb->dev); skb->dev = brnf_get_logical_dev(skb, skb->dev);
...@@ -850,10 +850,8 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, ...@@ -850,10 +850,8 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
struct sk_buff *skb, struct sk_buff *skb,
const struct nf_hook_state *state) const struct nf_hook_state *state)
{ {
if (skb->nf_bridge && if (skb->nf_bridge && !skb->nf_bridge->in_prerouting)
!(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
return NF_STOP; return NF_STOP;
}
return NF_ACCEPT; return NF_ACCEPT;
} }
...@@ -872,7 +870,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) ...@@ -872,7 +870,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
skb_pull(skb, ETH_HLEN); skb_pull(skb, ETH_HLEN);
nf_bridge->mask &= ~BRNF_BRIDGED_DNAT; nf_bridge->bridged_dnat = 0;
BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN));
...@@ -887,7 +885,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) ...@@ -887,7 +885,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
static int br_nf_dev_xmit(struct sk_buff *skb) static int br_nf_dev_xmit(struct sk_buff *skb)
{ {
if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) { if (skb->nf_bridge && skb->nf_bridge->bridged_dnat) {
br_nf_pre_routing_finish_bridge_slow(skb); br_nf_pre_routing_finish_bridge_slow(skb);
return 1; return 1;
} }
......
...@@ -174,7 +174,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) ...@@ -174,7 +174,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
skb->pkt_type = PACKET_OTHERHOST; skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->pkt_otherhost = false; nf_bridge->pkt_otherhost = false;
} }
nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING; nf_bridge->in_prerouting = 0;
if (br_nf_ipv6_daddr_was_changed(skb, nf_bridge)) { if (br_nf_ipv6_daddr_was_changed(skb, nf_bridge)) {
skb_dst_drop(skb); skb_dst_drop(skb);
v6ops->route_input(skb); v6ops->route_input(skb);
......
...@@ -240,7 +240,7 @@ get_entry(const void *base, unsigned int offset) ...@@ -240,7 +240,7 @@ get_entry(const void *base, unsigned int offset)
return (struct arpt_entry *)(base + offset); return (struct arpt_entry *)(base + offset);
} }
static inline __pure static inline
struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry) struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
{ {
return (void *)entry + entry->next_offset; return (void *)entry + entry->next_offset;
...@@ -280,6 +280,9 @@ unsigned int arpt_do_table(struct sk_buff *skb, ...@@ -280,6 +280,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
table_base = private->entries; table_base = private->entries;
jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; jumpstack = (struct arpt_entry **)private->jumpstack[cpu];
/* No TEE support for arptables, so no need to switch to alternate
* stack. All targets that reenter must return absolute verdicts.
*/
e = get_entry(table_base, private->hook_entry[hook]); e = get_entry(table_base, private->hook_entry[hook]);
acpar.in = state->in; acpar.in = state->in;
...@@ -325,11 +328,6 @@ unsigned int arpt_do_table(struct sk_buff *skb, ...@@ -325,11 +328,6 @@ unsigned int arpt_do_table(struct sk_buff *skb,
} }
if (table_base + v if (table_base + v
!= arpt_next_entry(e)) { != arpt_next_entry(e)) {
if (stackidx >= private->stacksize) {
verdict = NF_DROP;
break;
}
jumpstack[stackidx++] = e; jumpstack[stackidx++] = e;
} }
...@@ -337,9 +335,6 @@ unsigned int arpt_do_table(struct sk_buff *skb, ...@@ -337,9 +335,6 @@ unsigned int arpt_do_table(struct sk_buff *skb,
continue; continue;
} }
/* Targets which reenter must return
* abs. verdicts
*/
acpar.target = t->u.kernel.target; acpar.target = t->u.kernel.target;
acpar.targinfo = t->data; acpar.targinfo = t->data;
verdict = t->u.kernel.target->target(skb, &acpar); verdict = t->u.kernel.target->target(skb, &acpar);
...@@ -372,10 +367,13 @@ static inline bool unconditional(const struct arpt_arp *arp) ...@@ -372,10 +367,13 @@ static inline bool unconditional(const struct arpt_arp *arp)
/* Figures out from what hook each rule can be called: returns 0 if /* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom. * there are loops. Puts hook bitmask in comefrom.
*
* Keeps track of largest call depth seen and stores it in newinfo->stacksize.
*/ */
static int mark_source_chains(const struct xt_table_info *newinfo, static int mark_source_chains(struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0) unsigned int valid_hooks, void *entry0)
{ {
unsigned int calldepth, max_calldepth = 0;
unsigned int hook; unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset /* No recursion; use packet counter to save back ptrs (reset
...@@ -391,6 +389,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo, ...@@ -391,6 +389,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
/* Set initial back pointer. */ /* Set initial back pointer. */
e->counters.pcnt = pos; e->counters.pcnt = pos;
calldepth = 0;
for (;;) { for (;;) {
const struct xt_standard_target *t const struct xt_standard_target *t
...@@ -445,6 +444,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo, ...@@ -445,6 +444,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
(entry0 + pos + size); (entry0 + pos + size);
e->counters.pcnt = pos; e->counters.pcnt = pos;
pos += size; pos += size;
if (calldepth > 0)
--calldepth;
} else { } else {
int newpos = t->verdict; int newpos = t->verdict;
...@@ -459,6 +460,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo, ...@@ -459,6 +460,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
return 0; return 0;
} }
if (entry0 + newpos != arpt_next_entry(e) &&
++calldepth > max_calldepth)
max_calldepth = calldepth;
/* This a jump; chase it. */ /* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n", duprintf("Jump rule %u -> %u\n",
pos, newpos); pos, newpos);
...@@ -475,6 +480,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo, ...@@ -475,6 +480,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
next: next:
duprintf("Finished chain %u\n", hook); duprintf("Finished chain %u\n", hook);
} }
newinfo->stacksize = max_calldepth;
return 1; return 1;
} }
...@@ -664,9 +670,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, ...@@ -664,9 +670,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
if (ret != 0) if (ret != 0)
break; break;
++i; ++i;
if (strcmp(arpt_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
} }
duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
if (ret != 0) if (ret != 0)
...@@ -1439,9 +1442,6 @@ static int translate_compat_table(const char *name, ...@@ -1439,9 +1442,6 @@ static int translate_compat_table(const char *name,
break; break;
} }
++i; ++i;
if (strcmp(arpt_get_target(iter1)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
} }
if (ret) { if (ret) {
/* /*
......
...@@ -276,7 +276,7 @@ static void trace_packet(const struct sk_buff *skb, ...@@ -276,7 +276,7 @@ static void trace_packet(const struct sk_buff *skb,
} }
#endif #endif
static inline __pure static inline
struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
{ {
return (void *)entry + entry->next_offset; return (void *)entry + entry->next_offset;
...@@ -296,12 +296,13 @@ ipt_do_table(struct sk_buff *skb, ...@@ -296,12 +296,13 @@ ipt_do_table(struct sk_buff *skb,
const char *indev, *outdev; const char *indev, *outdev;
const void *table_base; const void *table_base;
struct ipt_entry *e, **jumpstack; struct ipt_entry *e, **jumpstack;
unsigned int *stackptr, origptr, cpu; unsigned int stackidx, cpu;
const struct xt_table_info *private; const struct xt_table_info *private;
struct xt_action_param acpar; struct xt_action_param acpar;
unsigned int addend; unsigned int addend;
/* Initialization */ /* Initialization */
stackidx = 0;
ip = ip_hdr(skb); ip = ip_hdr(skb);
indev = state->in ? state->in->name : nulldevname; indev = state->in ? state->in->name : nulldevname;
outdev = state->out ? state->out->name : nulldevname; outdev = state->out ? state->out->name : nulldevname;
...@@ -331,13 +332,21 @@ ipt_do_table(struct sk_buff *skb, ...@@ -331,13 +332,21 @@ ipt_do_table(struct sk_buff *skb,
smp_read_barrier_depends(); smp_read_barrier_depends();
table_base = private->entries; table_base = private->entries;
jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
stackptr = per_cpu_ptr(private->stackptr, cpu);
origptr = *stackptr; /* Switch to alternate jumpstack if we're being invoked via TEE.
* TEE issues XT_CONTINUE verdict on original skb so we must not
* clobber the jumpstack.
*
* For recursion via REJECT or SYNPROXY the stack will be clobbered
* but it is no problem since absolute verdict is issued by these.
*/
if (static_key_false(&xt_tee_enabled))
jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
e = get_entry(table_base, private->hook_entry[hook]); e = get_entry(table_base, private->hook_entry[hook]);
pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n", pr_debug("Entering %s(hook %u), UF %p\n",
table->name, hook, origptr, table->name, hook,
get_entry(table_base, private->underflow[hook])); get_entry(table_base, private->underflow[hook]));
do { do {
...@@ -383,28 +392,24 @@ ipt_do_table(struct sk_buff *skb, ...@@ -383,28 +392,24 @@ ipt_do_table(struct sk_buff *skb,
verdict = (unsigned int)(-v) - 1; verdict = (unsigned int)(-v) - 1;
break; break;
} }
if (*stackptr <= origptr) { if (stackidx == 0) {
e = get_entry(table_base, e = get_entry(table_base,
private->underflow[hook]); private->underflow[hook]);
pr_debug("Underflow (this is normal) " pr_debug("Underflow (this is normal) "
"to %p\n", e); "to %p\n", e);
} else { } else {
e = jumpstack[--*stackptr]; e = jumpstack[--stackidx];
pr_debug("Pulled %p out from pos %u\n", pr_debug("Pulled %p out from pos %u\n",
e, *stackptr); e, stackidx);
e = ipt_next_entry(e); e = ipt_next_entry(e);
} }
continue; continue;
} }
if (table_base + v != ipt_next_entry(e) && if (table_base + v != ipt_next_entry(e) &&
!(e->ip.flags & IPT_F_GOTO)) { !(e->ip.flags & IPT_F_GOTO)) {
if (*stackptr >= private->stacksize) { jumpstack[stackidx++] = e;
verdict = NF_DROP;
break;
}
jumpstack[(*stackptr)++] = e;
pr_debug("Pushed %p into pos %u\n", pr_debug("Pushed %p into pos %u\n",
e, *stackptr - 1); e, stackidx - 1);
} }
e = get_entry(table_base, v); e = get_entry(table_base, v);
...@@ -423,9 +428,8 @@ ipt_do_table(struct sk_buff *skb, ...@@ -423,9 +428,8 @@ ipt_do_table(struct sk_buff *skb,
/* Verdict */ /* Verdict */
break; break;
} while (!acpar.hotdrop); } while (!acpar.hotdrop);
pr_debug("Exiting %s; resetting sp from %u to %u\n", pr_debug("Exiting %s; sp at %u\n", __func__, stackidx);
__func__, *stackptr, origptr);
*stackptr = origptr;
xt_write_recseq_end(addend); xt_write_recseq_end(addend);
local_bh_enable(); local_bh_enable();
...@@ -439,11 +443,15 @@ ipt_do_table(struct sk_buff *skb, ...@@ -439,11 +443,15 @@ ipt_do_table(struct sk_buff *skb,
} }
/* Figures out from what hook each rule can be called: returns 0 if /* Figures out from what hook each rule can be called: returns 0 if
there are loops. Puts hook bitmask in comefrom. */ * there are loops. Puts hook bitmask in comefrom.
*
* Keeps track of largest call depth seen and stores it in newinfo->stacksize.
*/
static int static int
mark_source_chains(const struct xt_table_info *newinfo, mark_source_chains(struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0) unsigned int valid_hooks, void *entry0)
{ {
unsigned int calldepth, max_calldepth = 0;
unsigned int hook; unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset /* No recursion; use packet counter to save back ptrs (reset
...@@ -457,6 +465,7 @@ mark_source_chains(const struct xt_table_info *newinfo, ...@@ -457,6 +465,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
/* Set initial back pointer. */ /* Set initial back pointer. */
e->counters.pcnt = pos; e->counters.pcnt = pos;
calldepth = 0;
for (;;) { for (;;) {
const struct xt_standard_target *t const struct xt_standard_target *t
...@@ -518,6 +527,9 @@ mark_source_chains(const struct xt_table_info *newinfo, ...@@ -518,6 +527,9 @@ mark_source_chains(const struct xt_table_info *newinfo,
(entry0 + pos + size); (entry0 + pos + size);
e->counters.pcnt = pos; e->counters.pcnt = pos;
pos += size; pos += size;
WARN_ON_ONCE(calldepth == 0);
if (calldepth > 0)
--calldepth;
} else { } else {
int newpos = t->verdict; int newpos = t->verdict;
...@@ -531,9 +543,14 @@ mark_source_chains(const struct xt_table_info *newinfo, ...@@ -531,9 +543,14 @@ mark_source_chains(const struct xt_table_info *newinfo,
newpos); newpos);
return 0; return 0;
} }
if (entry0 + newpos != ipt_next_entry(e) &&
!(e->ip.flags & IPT_F_GOTO) &&
++calldepth > max_calldepth)
max_calldepth = calldepth;
/* This a jump; chase it. */ /* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n", duprintf("Jump rule %u -> %u, calldepth %d\n",
pos, newpos); pos, newpos, calldepth);
} else { } else {
/* ... this is a fallthru */ /* ... this is a fallthru */
newpos = pos + e->next_offset; newpos = pos + e->next_offset;
...@@ -547,6 +564,7 @@ mark_source_chains(const struct xt_table_info *newinfo, ...@@ -547,6 +564,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
next: next:
duprintf("Finished chain %u\n", hook); duprintf("Finished chain %u\n", hook);
} }
newinfo->stacksize = max_calldepth;
return 1; return 1;
} }
...@@ -826,9 +844,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, ...@@ -826,9 +844,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (ret != 0) if (ret != 0)
return ret; return ret;
++i; ++i;
if (strcmp(ipt_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
} }
if (i != repl->num_entries) { if (i != repl->num_entries) {
...@@ -1744,9 +1759,6 @@ translate_compat_table(struct net *net, ...@@ -1744,9 +1759,6 @@ translate_compat_table(struct net *net,
if (ret != 0) if (ret != 0)
break; break;
++i; ++i;
if (strcmp(ipt_get_target(iter1)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
} }
if (ret) { if (ret) {
/* /*
......
...@@ -49,12 +49,9 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, ...@@ -49,12 +49,9 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
if (skb->nfct) if (skb->nfct)
zone = nf_ct_zone((struct nf_conn *)skb->nfct); zone = nf_ct_zone((struct nf_conn *)skb->nfct);
#endif #endif
if (nf_bridge_in_prerouting(skb))
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (skb->nf_bridge &&
skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone; return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
#endif
if (hooknum == NF_INET_PRE_ROUTING) if (hooknum == NF_INET_PRE_ROUTING)
return IP_DEFRAG_CONNTRACK_IN + zone; return IP_DEFRAG_CONNTRACK_IN + zone;
else else
......
...@@ -305,7 +305,7 @@ static void trace_packet(const struct sk_buff *skb, ...@@ -305,7 +305,7 @@ static void trace_packet(const struct sk_buff *skb,
} }
#endif #endif
static inline __pure struct ip6t_entry * static inline struct ip6t_entry *
ip6t_next_entry(const struct ip6t_entry *entry) ip6t_next_entry(const struct ip6t_entry *entry)
{ {
return (void *)entry + entry->next_offset; return (void *)entry + entry->next_offset;
...@@ -324,12 +324,13 @@ ip6t_do_table(struct sk_buff *skb, ...@@ -324,12 +324,13 @@ ip6t_do_table(struct sk_buff *skb,
const char *indev, *outdev; const char *indev, *outdev;
const void *table_base; const void *table_base;
struct ip6t_entry *e, **jumpstack; struct ip6t_entry *e, **jumpstack;
unsigned int *stackptr, origptr, cpu; unsigned int stackidx, cpu;
const struct xt_table_info *private; const struct xt_table_info *private;
struct xt_action_param acpar; struct xt_action_param acpar;
unsigned int addend; unsigned int addend;
/* Initialization */ /* Initialization */
stackidx = 0;
indev = state->in ? state->in->name : nulldevname; indev = state->in ? state->in->name : nulldevname;
outdev = state->out ? state->out->name : nulldevname; outdev = state->out ? state->out->name : nulldevname;
/* We handle fragments by dealing with the first fragment as /* We handle fragments by dealing with the first fragment as
...@@ -357,8 +358,16 @@ ip6t_do_table(struct sk_buff *skb, ...@@ -357,8 +358,16 @@ ip6t_do_table(struct sk_buff *skb,
cpu = smp_processor_id(); cpu = smp_processor_id();
table_base = private->entries; table_base = private->entries;
jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
stackptr = per_cpu_ptr(private->stackptr, cpu);
origptr = *stackptr; /* Switch to alternate jumpstack if we're being invoked via TEE.
* TEE issues XT_CONTINUE verdict on original skb so we must not
* clobber the jumpstack.
*
* For recursion via REJECT or SYNPROXY the stack will be clobbered
* but it is no problem since absolute verdict is issued by these.
*/
if (static_key_false(&xt_tee_enabled))
jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
e = get_entry(table_base, private->hook_entry[hook]); e = get_entry(table_base, private->hook_entry[hook]);
...@@ -406,20 +415,16 @@ ip6t_do_table(struct sk_buff *skb, ...@@ -406,20 +415,16 @@ ip6t_do_table(struct sk_buff *skb,
verdict = (unsigned int)(-v) - 1; verdict = (unsigned int)(-v) - 1;
break; break;
} }
if (*stackptr <= origptr) if (stackidx == 0)
e = get_entry(table_base, e = get_entry(table_base,
private->underflow[hook]); private->underflow[hook]);
else else
e = ip6t_next_entry(jumpstack[--*stackptr]); e = ip6t_next_entry(jumpstack[--stackidx]);
continue; continue;
} }
if (table_base + v != ip6t_next_entry(e) && if (table_base + v != ip6t_next_entry(e) &&
!(e->ipv6.flags & IP6T_F_GOTO)) { !(e->ipv6.flags & IP6T_F_GOTO)) {
if (*stackptr >= private->stacksize) { jumpstack[stackidx++] = e;
verdict = NF_DROP;
break;
}
jumpstack[(*stackptr)++] = e;
} }
e = get_entry(table_base, v); e = get_entry(table_base, v);
...@@ -437,8 +442,6 @@ ip6t_do_table(struct sk_buff *skb, ...@@ -437,8 +442,6 @@ ip6t_do_table(struct sk_buff *skb,
break; break;
} while (!acpar.hotdrop); } while (!acpar.hotdrop);
*stackptr = origptr;
xt_write_recseq_end(addend); xt_write_recseq_end(addend);
local_bh_enable(); local_bh_enable();
...@@ -452,11 +455,15 @@ ip6t_do_table(struct sk_buff *skb, ...@@ -452,11 +455,15 @@ ip6t_do_table(struct sk_buff *skb,
} }
/* Figures out from what hook each rule can be called: returns 0 if /* Figures out from what hook each rule can be called: returns 0 if
there are loops. Puts hook bitmask in comefrom. */ * there are loops. Puts hook bitmask in comefrom.
*
* Keeps track of largest call depth seen and stores it in newinfo->stacksize.
*/
static int static int
mark_source_chains(const struct xt_table_info *newinfo, mark_source_chains(struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0) unsigned int valid_hooks, void *entry0)
{ {
unsigned int calldepth, max_calldepth = 0;
unsigned int hook; unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset /* No recursion; use packet counter to save back ptrs (reset
...@@ -470,6 +477,7 @@ mark_source_chains(const struct xt_table_info *newinfo, ...@@ -470,6 +477,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
/* Set initial back pointer. */ /* Set initial back pointer. */
e->counters.pcnt = pos; e->counters.pcnt = pos;
calldepth = 0;
for (;;) { for (;;) {
const struct xt_standard_target *t const struct xt_standard_target *t
...@@ -531,6 +539,8 @@ mark_source_chains(const struct xt_table_info *newinfo, ...@@ -531,6 +539,8 @@ mark_source_chains(const struct xt_table_info *newinfo,
(entry0 + pos + size); (entry0 + pos + size);
e->counters.pcnt = pos; e->counters.pcnt = pos;
pos += size; pos += size;
if (calldepth > 0)
--calldepth;
} else { } else {
int newpos = t->verdict; int newpos = t->verdict;
...@@ -544,6 +554,11 @@ mark_source_chains(const struct xt_table_info *newinfo, ...@@ -544,6 +554,11 @@ mark_source_chains(const struct xt_table_info *newinfo,
newpos); newpos);
return 0; return 0;
} }
if (entry0 + newpos != ip6t_next_entry(e) &&
!(e->ipv6.flags & IP6T_F_GOTO) &&
++calldepth > max_calldepth)
max_calldepth = calldepth;
/* This a jump; chase it. */ /* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n", duprintf("Jump rule %u -> %u\n",
pos, newpos); pos, newpos);
...@@ -560,6 +575,7 @@ mark_source_chains(const struct xt_table_info *newinfo, ...@@ -560,6 +575,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
next: next:
duprintf("Finished chain %u\n", hook); duprintf("Finished chain %u\n", hook);
} }
newinfo->stacksize = max_calldepth;
return 1; return 1;
} }
...@@ -839,9 +855,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, ...@@ -839,9 +855,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (ret != 0) if (ret != 0)
return ret; return ret;
++i; ++i;
if (strcmp(ip6t_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
} }
if (i != repl->num_entries) { if (i != repl->num_entries) {
...@@ -1754,9 +1767,6 @@ translate_compat_table(struct net *net, ...@@ -1754,9 +1767,6 @@ translate_compat_table(struct net *net,
if (ret != 0) if (ret != 0)
break; break;
++i; ++i;
if (strcmp(ip6t_get_target(iter1)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
} }
if (ret) { if (ret) {
/* /*
......
...@@ -35,14 +35,12 @@ MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>"); ...@@ -35,14 +35,12 @@ MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6"); MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
static unsigned int static unsigned int
reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{ {
const struct ip6t_reject_info *reject = par->targinfo; const struct ip6t_reject_info *reject = par->targinfo;
struct net *net = dev_net((par->in != NULL) ? par->in : par->out); struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
pr_debug("%s: medium point\n", __func__);
switch (reject->with) { switch (reject->with) {
case IP6T_ICMP6_NO_ROUTE: case IP6T_ICMP6_NO_ROUTE:
nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum); nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum);
...@@ -65,9 +63,6 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -65,9 +63,6 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
case IP6T_TCP_RESET: case IP6T_TCP_RESET:
nf_send_reset6(net, skb, par->hooknum); nf_send_reset6(net, skb, par->hooknum);
break; break;
default:
net_info_ratelimited("case %u not handled yet\n", reject->with);
break;
} }
return NF_DROP; return NF_DROP;
......
...@@ -39,12 +39,9 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, ...@@ -39,12 +39,9 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
if (skb->nfct) if (skb->nfct)
zone = nf_ct_zone((struct nf_conn *)skb->nfct); zone = nf_ct_zone((struct nf_conn *)skb->nfct);
#endif #endif
if (nf_bridge_in_prerouting(skb))
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (skb->nf_bridge &&
skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone; return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
#endif
if (hooknum == NF_INET_PRE_ROUTING) if (hooknum == NF_INET_PRE_ROUTING)
return IP6_DEFRAG_CONNTRACK_IN + zone; return IP6_DEFRAG_CONNTRACK_IN + zone;
else else
......
...@@ -34,6 +34,9 @@ EXPORT_SYMBOL(nf_afinfo); ...@@ -34,6 +34,9 @@ EXPORT_SYMBOL(nf_afinfo);
const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly; const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
EXPORT_SYMBOL_GPL(nf_ipv6_ops); EXPORT_SYMBOL_GPL(nf_ipv6_ops);
DEFINE_PER_CPU(bool, nf_skb_duplicated);
EXPORT_SYMBOL_GPL(nf_skb_duplicated);
int nf_register_afinfo(const struct nf_afinfo *afinfo) int nf_register_afinfo(const struct nf_afinfo *afinfo)
{ {
mutex_lock(&afinfo_mutex); mutex_lock(&afinfo_mutex);
...@@ -52,9 +55,6 @@ void nf_unregister_afinfo(const struct nf_afinfo *afinfo) ...@@ -52,9 +55,6 @@ void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
} }
EXPORT_SYMBOL_GPL(nf_unregister_afinfo); EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
EXPORT_SYMBOL(nf_hooks);
#ifdef HAVE_JUMP_LABEL #ifdef HAVE_JUMP_LABEL
struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
EXPORT_SYMBOL(nf_hooks_needed); EXPORT_SYMBOL(nf_hooks_needed);
...@@ -62,63 +62,166 @@ EXPORT_SYMBOL(nf_hooks_needed); ...@@ -62,63 +62,166 @@ EXPORT_SYMBOL(nf_hooks_needed);
static DEFINE_MUTEX(nf_hook_mutex); static DEFINE_MUTEX(nf_hook_mutex);
int nf_register_hook(struct nf_hook_ops *reg) static struct list_head *nf_find_hook_list(struct net *net,
const struct nf_hook_ops *reg)
{ {
struct list_head *nf_hook_list; struct list_head *hook_list = NULL;
struct nf_hook_ops *elem;
mutex_lock(&nf_hook_mutex); if (reg->pf != NFPROTO_NETDEV)
switch (reg->pf) { hook_list = &net->nf.hooks[reg->pf][reg->hooknum];
case NFPROTO_NETDEV: else if (reg->hooknum == NF_NETDEV_INGRESS) {
#ifdef CONFIG_NETFILTER_INGRESS #ifdef CONFIG_NETFILTER_INGRESS
if (reg->hooknum == NF_NETDEV_INGRESS) { if (reg->dev && dev_net(reg->dev) == net)
BUG_ON(reg->dev == NULL); hook_list = &reg->dev->nf_hooks_ingress;
nf_hook_list = &reg->dev->nf_hooks_ingress;
net_inc_ingress_queue();
break;
}
#endif #endif
/* Fall through. */ }
default: return hook_list;
nf_hook_list = &nf_hooks[reg->pf][reg->hooknum]; }
break;
struct nf_hook_entry {
const struct nf_hook_ops *orig_ops;
struct nf_hook_ops ops;
};
int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
struct list_head *hook_list;
struct nf_hook_entry *entry;
struct nf_hook_ops *elem;
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
return -ENOMEM;
entry->orig_ops = reg;
entry->ops = *reg;
hook_list = nf_find_hook_list(net, reg);
if (!hook_list) {
kfree(entry);
return -ENOENT;
} }
list_for_each_entry(elem, nf_hook_list, list) { mutex_lock(&nf_hook_mutex);
list_for_each_entry(elem, hook_list, list) {
if (reg->priority < elem->priority) if (reg->priority < elem->priority)
break; break;
} }
list_add_rcu(&reg->list, elem->list.prev); list_add_rcu(&entry->ops.list, elem->list.prev);
mutex_unlock(&nf_hook_mutex); mutex_unlock(&nf_hook_mutex);
#ifdef CONFIG_NETFILTER_INGRESS
if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
net_inc_ingress_queue();
#endif
#ifdef HAVE_JUMP_LABEL #ifdef HAVE_JUMP_LABEL
static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif #endif
return 0; return 0;
} }
EXPORT_SYMBOL(nf_register_hook); EXPORT_SYMBOL(nf_register_net_hook);
void nf_unregister_hook(struct nf_hook_ops *reg) void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
{ {
struct list_head *hook_list;
struct nf_hook_entry *entry;
struct nf_hook_ops *elem;
hook_list = nf_find_hook_list(net, reg);
if (!hook_list)
return;
mutex_lock(&nf_hook_mutex); mutex_lock(&nf_hook_mutex);
list_del_rcu(&reg->list); list_for_each_entry(elem, hook_list, list) {
entry = container_of(elem, struct nf_hook_entry, ops);
if (entry->orig_ops == reg) {
list_del_rcu(&entry->ops.list);
break;
}
}
mutex_unlock(&nf_hook_mutex); mutex_unlock(&nf_hook_mutex);
switch (reg->pf) { if (&elem->list == hook_list) {
case NFPROTO_NETDEV: WARN(1, "nf_unregister_net_hook: hook not found!\n");
return;
}
#ifdef CONFIG_NETFILTER_INGRESS #ifdef CONFIG_NETFILTER_INGRESS
if (reg->hooknum == NF_NETDEV_INGRESS) { if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
net_dec_ingress_queue(); net_dec_ingress_queue();
break;
}
break;
#endif #endif
default:
break;
}
#ifdef HAVE_JUMP_LABEL #ifdef HAVE_JUMP_LABEL
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif #endif
synchronize_net(); synchronize_net();
nf_queue_nf_hook_drop(reg); nf_queue_nf_hook_drop(net, &entry->ops);
kfree(entry);
}
EXPORT_SYMBOL(nf_unregister_net_hook);
int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
unsigned int n)
{
unsigned int i;
int err = 0;
for (i = 0; i < n; i++) {
err = nf_register_net_hook(net, &reg[i]);
if (err)
goto err;
}
return err;
err:
if (i > 0)
nf_unregister_net_hooks(net, reg, i);
return err;
}
EXPORT_SYMBOL(nf_register_net_hooks);
void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
unsigned int n)
{
while (n-- > 0)
nf_unregister_net_hook(net, &reg[n]);
}
EXPORT_SYMBOL(nf_unregister_net_hooks);
static LIST_HEAD(nf_hook_list);
int nf_register_hook(struct nf_hook_ops *reg)
{
struct net *net, *last;
int ret;
rtnl_lock();
for_each_net(net) {
ret = nf_register_net_hook(net, reg);
if (ret && ret != -ENOENT)
goto rollback;
}
list_add_tail(&reg->list, &nf_hook_list);
rtnl_unlock();
return 0;
rollback:
last = net;
for_each_net(net) {
if (net == last)
break;
nf_unregister_net_hook(net, reg);
}
rtnl_unlock();
return ret;
}
EXPORT_SYMBOL(nf_register_hook);
void nf_unregister_hook(struct nf_hook_ops *reg)
{
struct net *net;
rtnl_lock();
list_del(&reg->list);
for_each_net(net)
nf_unregister_net_hook(net, reg);
rtnl_unlock();
} }
EXPORT_SYMBOL(nf_unregister_hook); EXPORT_SYMBOL(nf_unregister_hook);
...@@ -295,8 +398,46 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); ...@@ -295,8 +398,46 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
EXPORT_SYMBOL(nf_nat_decode_session_hook); EXPORT_SYMBOL(nf_nat_decode_session_hook);
#endif #endif
static int nf_register_hook_list(struct net *net)
{
struct nf_hook_ops *elem;
int ret;
rtnl_lock();
list_for_each_entry(elem, &nf_hook_list, list) {
ret = nf_register_net_hook(net, elem);
if (ret && ret != -ENOENT)
goto out_undo;
}
rtnl_unlock();
return 0;
out_undo:
list_for_each_entry_continue_reverse(elem, &nf_hook_list, list)
nf_unregister_net_hook(net, elem);
rtnl_unlock();
return ret;
}
static void nf_unregister_hook_list(struct net *net)
{
struct nf_hook_ops *elem;
rtnl_lock();
list_for_each_entry(elem, &nf_hook_list, list)
nf_unregister_net_hook(net, elem);
rtnl_unlock();
}
static int __net_init netfilter_net_init(struct net *net) static int __net_init netfilter_net_init(struct net *net)
{ {
int i, h, ret;
for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
for (h = 0; h < NF_MAX_HOOKS; h++)
INIT_LIST_HEAD(&net->nf.hooks[i][h]);
}
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter", net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
net->proc_net); net->proc_net);
...@@ -307,11 +448,16 @@ static int __net_init netfilter_net_init(struct net *net) ...@@ -307,11 +448,16 @@ static int __net_init netfilter_net_init(struct net *net)
return -ENOMEM; return -ENOMEM;
} }
#endif #endif
return 0; ret = nf_register_hook_list(net);
if (ret)
remove_proc_entry("netfilter", net->proc_net);
return ret;
} }
static void __net_exit netfilter_net_exit(struct net *net) static void __net_exit netfilter_net_exit(struct net *net)
{ {
nf_unregister_hook_list(net);
remove_proc_entry("netfilter", net->proc_net); remove_proc_entry("netfilter", net->proc_net);
} }
...@@ -322,12 +468,7 @@ static struct pernet_operations netfilter_net_ops = { ...@@ -322,12 +468,7 @@ static struct pernet_operations netfilter_net_ops = {
int __init netfilter_init(void) int __init netfilter_init(void)
{ {
int i, h, ret; int ret;
for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) {
for (h = 0; h < NF_MAX_HOOKS; h++)
INIT_LIST_HEAD(&nf_hooks[i][h]);
}
ret = register_pernet_subsys(&netfilter_net_ops); ret = register_pernet_subsys(&netfilter_net_ops);
if (ret < 0) if (ret < 0)
......
...@@ -137,7 +137,7 @@ struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name) ...@@ -137,7 +137,7 @@ struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
{ {
if (scheduler && scheduler->module) if (scheduler)
module_put(scheduler->module); module_put(scheduler->module);
} }
......
...@@ -42,6 +42,8 @@ static const char *const sctp_conntrack_names[] = { ...@@ -42,6 +42,8 @@ static const char *const sctp_conntrack_names[] = {
"SHUTDOWN_SENT", "SHUTDOWN_SENT",
"SHUTDOWN_RECD", "SHUTDOWN_RECD",
"SHUTDOWN_ACK_SENT", "SHUTDOWN_ACK_SENT",
"HEARTBEAT_SENT",
"HEARTBEAT_ACKED",
}; };
#define SECS * HZ #define SECS * HZ
...@@ -57,6 +59,8 @@ static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = { ...@@ -57,6 +59,8 @@ static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = {
[SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000, [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000, [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS, [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
[SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
[SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS,
}; };
#define sNO SCTP_CONNTRACK_NONE #define sNO SCTP_CONNTRACK_NONE
...@@ -67,6 +71,8 @@ static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = { ...@@ -67,6 +71,8 @@ static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = {
#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT #define sSS SCTP_CONNTRACK_SHUTDOWN_SENT
#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD #define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT #define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
#define sHS SCTP_CONNTRACK_HEARTBEAT_SENT
#define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED
#define sIV SCTP_CONNTRACK_MAX #define sIV SCTP_CONNTRACK_MAX
/* /*
...@@ -88,6 +94,10 @@ SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite ...@@ -88,6 +94,10 @@ SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
to that of the SHUTDOWN chunk. to that of the SHUTDOWN chunk.
CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
the SHUTDOWN chunk. Connection is closed. the SHUTDOWN chunk. Connection is closed.
HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow.
HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK in the direction opposite to
that of the HEARTBEAT chunk. Secondary connection is
established.
*/ */
/* TODO /* TODO
...@@ -97,36 +107,40 @@ CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of ...@@ -97,36 +107,40 @@ CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
- Check the error type in the reply dir before transitioning from - Check the error type in the reply dir before transitioning from
cookie echoed to closed. cookie echoed to closed.
- Sec 5.2.4 of RFC 2960 - Sec 5.2.4 of RFC 2960
- Multi Homing support. - Full Multi Homing support.
*/ */
/* SCTP conntrack state transitions */ /* SCTP conntrack state transitions */
static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = { static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
{ {
/* ORIGINAL */ /* ORIGINAL */
/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */
/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA}, /* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA},
/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA}, /* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},
/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, /* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA}, /* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS},
/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA}, /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA},
/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't have Stale cookie*/ /* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't have Stale cookie*/
/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */ /* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* 5.2.4 - Big TODO */
/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in orig dir */ /* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't come in orig dir */
/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL} /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA},
/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA},
/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}
}, },
{ {
/* REPLY */ /* REPLY */
/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ /* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */
/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */ /* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* INIT in sCL Big TODO */
/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA}, /* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},
/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, /* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL},
/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA}, /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR},
/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA}, /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA},
/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA}, /* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA},
/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in reply dir */ /* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* Can't come in reply dir */
/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA}, /* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA},
/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL} /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA},
/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA},
/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA}
} }
}; };
...@@ -278,9 +292,16 @@ static int sctp_new_state(enum ip_conntrack_dir dir, ...@@ -278,9 +292,16 @@ static int sctp_new_state(enum ip_conntrack_dir dir,
pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n"); pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n");
i = 8; i = 8;
break; break;
case SCTP_CID_HEARTBEAT:
pr_debug("SCTP_CID_HEARTBEAT");
i = 9;
break;
case SCTP_CID_HEARTBEAT_ACK:
pr_debug("SCTP_CID_HEARTBEAT_ACK");
i = 10;
break;
default: default:
/* Other chunks like DATA, SACK, HEARTBEAT and /* Other chunks like DATA or SACK do not change the state */
its ACK do not cause a change in state */
pr_debug("Unknown chunk type, Will stay in %s\n", pr_debug("Unknown chunk type, Will stay in %s\n",
sctp_conntrack_names[cur_state]); sctp_conntrack_names[cur_state]);
return cur_state; return cur_state;
...@@ -329,6 +350,8 @@ static int sctp_packet(struct nf_conn *ct, ...@@ -329,6 +350,8 @@ static int sctp_packet(struct nf_conn *ct,
!test_bit(SCTP_CID_COOKIE_ECHO, map) && !test_bit(SCTP_CID_COOKIE_ECHO, map) &&
!test_bit(SCTP_CID_ABORT, map) && !test_bit(SCTP_CID_ABORT, map) &&
!test_bit(SCTP_CID_SHUTDOWN_ACK, map) && !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
!test_bit(SCTP_CID_HEARTBEAT, map) &&
!test_bit(SCTP_CID_HEARTBEAT_ACK, map) &&
sh->vtag != ct->proto.sctp.vtag[dir]) { sh->vtag != ct->proto.sctp.vtag[dir]) {
pr_debug("Verification tag check failed\n"); pr_debug("Verification tag check failed\n");
goto out; goto out;
...@@ -357,6 +380,16 @@ static int sctp_packet(struct nf_conn *ct, ...@@ -357,6 +380,16 @@ static int sctp_packet(struct nf_conn *ct,
/* Sec 8.5.1 (D) */ /* Sec 8.5.1 (D) */
if (sh->vtag != ct->proto.sctp.vtag[dir]) if (sh->vtag != ct->proto.sctp.vtag[dir])
goto out_unlock; goto out_unlock;
} else if (sch->type == SCTP_CID_HEARTBEAT ||
sch->type == SCTP_CID_HEARTBEAT_ACK) {
if (ct->proto.sctp.vtag[dir] == 0) {
pr_debug("Setting vtag %x for dir %d\n",
sh->vtag, dir);
ct->proto.sctp.vtag[dir] = sh->vtag;
} else if (sh->vtag != ct->proto.sctp.vtag[dir]) {
pr_debug("Verification tag check failed\n");
goto out_unlock;
}
} }
old_state = ct->proto.sctp.state; old_state = ct->proto.sctp.state;
...@@ -466,6 +499,10 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, ...@@ -466,6 +499,10 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
/* Sec 8.5.1 (A) */ /* Sec 8.5.1 (A) */
return false; return false;
} }
} else if (sch->type == SCTP_CID_HEARTBEAT) {
pr_debug("Setting vtag %x for secondary conntrack\n",
sh->vtag);
ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
} }
/* If it is a shutdown ack OOTB packet, we expect a return /* If it is a shutdown ack OOTB packet, we expect a return
shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
...@@ -610,6 +647,8 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = { ...@@ -610,6 +647,8 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = {
[CTA_TIMEOUT_SCTP_SHUTDOWN_SENT] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_SHUTDOWN_SENT] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_SHUTDOWN_RECD] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_SHUTDOWN_RECD] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { .type = NLA_U32 },
}; };
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ #endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
...@@ -658,6 +697,18 @@ static struct ctl_table sctp_sysctl_table[] = { ...@@ -658,6 +697,18 @@ static struct ctl_table sctp_sysctl_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec_jiffies, .proc_handler = proc_dointvec_jiffies,
}, },
{
.procname = "nf_conntrack_sctp_timeout_heartbeat_sent",
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "nf_conntrack_sctp_timeout_heartbeat_acked",
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{ } { }
}; };
...@@ -730,6 +781,8 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, ...@@ -730,6 +781,8 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
pn->ctl_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT]; pn->ctl_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT];
pn->ctl_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD]; pn->ctl_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD];
pn->ctl_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT]; pn->ctl_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT];
pn->ctl_table[7].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_SENT];
pn->ctl_table[8].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_ACKED];
#endif #endif
return 0; return 0;
} }
......
...@@ -19,7 +19,7 @@ unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, ...@@ -19,7 +19,7 @@ unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb,
/* nf_queue.c */ /* nf_queue.c */
int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem,
struct nf_hook_state *state, unsigned int queuenum); struct nf_hook_state *state, unsigned int queuenum);
void nf_queue_nf_hook_drop(struct nf_hook_ops *ops); void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops);
int __init netfilter_queue_init(void); int __init netfilter_queue_init(void);
/* nf_log.c */ /* nf_log.c */
......
...@@ -105,21 +105,15 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) ...@@ -105,21 +105,15 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
} }
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
void nf_queue_nf_hook_drop(struct nf_hook_ops *ops) void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops)
{ {
const struct nf_queue_handler *qh; const struct nf_queue_handler *qh;
struct net *net;
rtnl_lock();
rcu_read_lock(); rcu_read_lock();
qh = rcu_dereference(queue_handler); qh = rcu_dereference(queue_handler);
if (qh) { if (qh)
for_each_net(net) {
qh->nf_hook_drop(net, ops); qh->nf_hook_drop(net, ops);
}
}
rcu_read_unlock(); rcu_read_unlock();
rtnl_unlock();
} }
/* /*
......
...@@ -130,20 +130,24 @@ static void nft_trans_destroy(struct nft_trans *trans) ...@@ -130,20 +130,24 @@ static void nft_trans_destroy(struct nft_trans *trans)
int nft_register_basechain(struct nft_base_chain *basechain, int nft_register_basechain(struct nft_base_chain *basechain,
unsigned int hook_nops) unsigned int hook_nops)
{ {
struct net *net = read_pnet(&basechain->pnet);
if (basechain->flags & NFT_BASECHAIN_DISABLED) if (basechain->flags & NFT_BASECHAIN_DISABLED)
return 0; return 0;
return nf_register_hooks(basechain->ops, hook_nops); return nf_register_net_hooks(net, basechain->ops, hook_nops);
} }
EXPORT_SYMBOL_GPL(nft_register_basechain); EXPORT_SYMBOL_GPL(nft_register_basechain);
void nft_unregister_basechain(struct nft_base_chain *basechain, void nft_unregister_basechain(struct nft_base_chain *basechain,
unsigned int hook_nops) unsigned int hook_nops)
{ {
struct net *net = read_pnet(&basechain->pnet);
if (basechain->flags & NFT_BASECHAIN_DISABLED) if (basechain->flags & NFT_BASECHAIN_DISABLED)
return; return;
nf_unregister_hooks(basechain->ops, hook_nops); nf_unregister_net_hooks(net, basechain->ops, hook_nops);
} }
EXPORT_SYMBOL_GPL(nft_unregister_basechain); EXPORT_SYMBOL_GPL(nft_unregister_basechain);
......
...@@ -114,7 +114,6 @@ unsigned int ...@@ -114,7 +114,6 @@ unsigned int
nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
{ {
const struct nft_chain *chain = ops->priv, *basechain = chain; const struct nft_chain *chain = ops->priv, *basechain = chain;
const struct net *chain_net = read_pnet(&nft_base_chain(basechain)->pnet);
const struct net *net = dev_net(pkt->in ? pkt->in : pkt->out); const struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
const struct nft_rule *rule; const struct nft_rule *rule;
const struct nft_expr *expr, *last; const struct nft_expr *expr, *last;
...@@ -125,10 +124,6 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) ...@@ -125,10 +124,6 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
int rulenum; int rulenum;
unsigned int gencursor = nft_genmask_cur(net); unsigned int gencursor = nft_genmask_cur(net);
/* Ignore chains that are not for the current network namespace */
if (!net_eq(net, chain_net))
return NF_ACCEPT;
do_chain: do_chain:
rulenum = 0; rulenum = 0;
rule = list_entry(&chain->rules, struct nft_rule, list); rule = list_entry(&chain->rules, struct nft_rule, list);
......
...@@ -67,9 +67,6 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = { ...@@ -67,9 +67,6 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
[NFPROTO_IPV6] = "ip6", [NFPROTO_IPV6] = "ip6",
}; };
/* Allow this many total (re)entries. */
static const unsigned int xt_jumpstack_multiplier = 2;
/* Registration hooks for targets. */ /* Registration hooks for targets. */
int xt_register_target(struct xt_target *target) int xt_register_target(struct xt_target *target)
{ {
...@@ -688,8 +685,6 @@ void xt_free_table_info(struct xt_table_info *info) ...@@ -688,8 +685,6 @@ void xt_free_table_info(struct xt_table_info *info)
kvfree(info->jumpstack); kvfree(info->jumpstack);
} }
free_percpu(info->stackptr);
kvfree(info); kvfree(info);
} }
EXPORT_SYMBOL(xt_free_table_info); EXPORT_SYMBOL(xt_free_table_info);
...@@ -732,15 +727,14 @@ EXPORT_SYMBOL_GPL(xt_compat_unlock); ...@@ -732,15 +727,14 @@ EXPORT_SYMBOL_GPL(xt_compat_unlock);
DEFINE_PER_CPU(seqcount_t, xt_recseq); DEFINE_PER_CPU(seqcount_t, xt_recseq);
EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq); EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
struct static_key xt_tee_enabled __read_mostly;
EXPORT_SYMBOL_GPL(xt_tee_enabled);
static int xt_jumpstack_alloc(struct xt_table_info *i) static int xt_jumpstack_alloc(struct xt_table_info *i)
{ {
unsigned int size; unsigned int size;
int cpu; int cpu;
i->stackptr = alloc_percpu(unsigned int);
if (i->stackptr == NULL)
return -ENOMEM;
size = sizeof(void **) * nr_cpu_ids; size = sizeof(void **) * nr_cpu_ids;
if (size > PAGE_SIZE) if (size > PAGE_SIZE)
i->jumpstack = vzalloc(size); i->jumpstack = vzalloc(size);
...@@ -749,8 +743,21 @@ static int xt_jumpstack_alloc(struct xt_table_info *i) ...@@ -749,8 +743,21 @@ static int xt_jumpstack_alloc(struct xt_table_info *i)
if (i->jumpstack == NULL) if (i->jumpstack == NULL)
return -ENOMEM; return -ENOMEM;
i->stacksize *= xt_jumpstack_multiplier; /* ruleset without jumps -- no stack needed */
size = sizeof(void *) * i->stacksize; if (i->stacksize == 0)
return 0;
/* Jumpstack needs to be able to record two full callchains, one
* from the first rule set traversal, plus one table reentrancy
* via -j TEE without clobbering the callchain that brought us to
* TEE target.
*
* This is done by allocating two jumpstacks per cpu, on reentry
* the upper half of the stack is used.
*
* see the jumpstack setup in ipt_do_table() for more details.
*/
size = sizeof(void *) * i->stacksize * 2u;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
if (size > PAGE_SIZE) if (size > PAGE_SIZE)
i->jumpstack[cpu] = vmalloc_node(size, i->jumpstack[cpu] = vmalloc_node(size,
......
...@@ -37,7 +37,6 @@ struct xt_tee_priv { ...@@ -37,7 +37,6 @@ struct xt_tee_priv {
}; };
static const union nf_inet_addr tee_zero_address; static const union nf_inet_addr tee_zero_address;
static DEFINE_PER_CPU(bool, tee_active);
static struct net *pick_net(struct sk_buff *skb) static struct net *pick_net(struct sk_buff *skb)
{ {
...@@ -88,7 +87,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -88,7 +87,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
const struct xt_tee_tginfo *info = par->targinfo; const struct xt_tee_tginfo *info = par->targinfo;
struct iphdr *iph; struct iphdr *iph;
if (__this_cpu_read(tee_active)) if (__this_cpu_read(nf_skb_duplicated))
return XT_CONTINUE; return XT_CONTINUE;
/* /*
* Copy the skb, and route the copy. Will later return %XT_CONTINUE for * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
...@@ -125,9 +124,9 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -125,9 +124,9 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
ip_send_check(iph); ip_send_check(iph);
if (tee_tg_route4(skb, info)) { if (tee_tg_route4(skb, info)) {
__this_cpu_write(tee_active, true); __this_cpu_write(nf_skb_duplicated, true);
ip_local_out(skb); ip_local_out(skb);
__this_cpu_write(tee_active, false); __this_cpu_write(nf_skb_duplicated, false);
} else { } else {
kfree_skb(skb); kfree_skb(skb);
} }
...@@ -170,7 +169,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -170,7 +169,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{ {
const struct xt_tee_tginfo *info = par->targinfo; const struct xt_tee_tginfo *info = par->targinfo;
if (__this_cpu_read(tee_active)) if (__this_cpu_read(nf_skb_duplicated))
return XT_CONTINUE; return XT_CONTINUE;
skb = pskb_copy(skb, GFP_ATOMIC); skb = pskb_copy(skb, GFP_ATOMIC);
if (skb == NULL) if (skb == NULL)
...@@ -188,9 +187,9 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -188,9 +187,9 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
--iph->hop_limit; --iph->hop_limit;
} }
if (tee_tg_route6(skb, info)) { if (tee_tg_route6(skb, info)) {
__this_cpu_write(tee_active, true); __this_cpu_write(nf_skb_duplicated, true);
ip6_local_out(skb); ip6_local_out(skb);
__this_cpu_write(tee_active, false); __this_cpu_write(nf_skb_duplicated, false);
} else { } else {
kfree_skb(skb); kfree_skb(skb);
} }
...@@ -252,6 +251,7 @@ static int tee_tg_check(const struct xt_tgchk_param *par) ...@@ -252,6 +251,7 @@ static int tee_tg_check(const struct xt_tgchk_param *par)
} else } else
info->priv = NULL; info->priv = NULL;
static_key_slow_inc(&xt_tee_enabled);
return 0; return 0;
} }
...@@ -263,6 +263,7 @@ static void tee_tg_destroy(const struct xt_tgdtor_param *par) ...@@ -263,6 +263,7 @@ static void tee_tg_destroy(const struct xt_tgdtor_param *par)
unregister_netdevice_notifier(&info->priv->notifier); unregister_netdevice_notifier(&info->priv->notifier);
kfree(info->priv); kfree(info->priv);
} }
static_key_slow_dec(&xt_tee_enabled);
} }
static struct xt_target tee_tg_reg[] __read_mostly = { static struct xt_target tee_tg_reg[] __read_mostly = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment