Commit 4b1f5dda authored by David S. Miller's avatar David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next

Pablo Neira Ayuso says:

====================
Netfilter/IPVS updates for net-next

The following patchset contains Netfilter/IPVS updates for net-next,
more specifically:

* Updates for ipset:

1) Coding style fix for ipset comment extension, from Jeremy Sowden.

2) De-inline many functions in ipset, from Jeremy Sowden.

3) Move ipset function definition from header to source file.

4) Move ip_set_put_flags() to source, export it as a symbol, remove
   inline.

5) Move range_to_mask() to the source file where this is used.

6) Move ip_set_get_ip_port() to the source file where this is used.

* IPVS selftests and netns improvements:

7) Two patches to speedup ipvs netns dismantle, from Haishuang Yan.

8) Three patches to add selftest script for ipvs, also from
   Haishuang Yan.

* Conntrack updates and new nf_hook_slow_list() function:

9) Document ct ecache extension, from Florian Westphal.

10) Skip ct extensions from ctnetlink dump, from Florian.

11) Free ct extension immediately, from Florian.

12) Skip access to ecache extension from nf_ct_deliver_cached_events()
    this is not correct as reported by Syzbot.

13) Add and use nf_hook_slow_list(), from Florian.

* Flowtable infrastructure updates:

14) Move priority to nf_flowtable definition.

15) Dynamic allocation of per-device hooks in flowtables.

16) Allow to include netdevice only once in flowtable definitions.

17) Rise maximum number of devices per flowtable.

* Netfilter hardware offload infrastructure updates:

18) Add nft_flow_block_chain() helper function.

19) Pass callback list to nft_setup_cb_call().

20) Add nft_flow_cls_offload_setup() helper function.

21) Remove rules for the unregistered device via netdevice event.

22) Support for multiple devices in a basechain definition at the
    ingress hook.

22) Add nft_chain_offload_cmd() helper function.

23) Add nft_flow_block_offload_init() helper function.

24) Rewind in case of failing to bind multiple devices to hook.

25) Typo in IPv6 tproxy module description, from Norman Rasmussen.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 64fe8e97 671312e1
......@@ -199,6 +199,8 @@ extern struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
const struct nf_hook_entries *e, unsigned int i);
void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
const struct nf_hook_entries *e);
/**
* nf_hook - call a netfilter hook
*
......@@ -311,17 +313,36 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
struct list_head *head, struct net_device *in, struct net_device *out,
int (*okfn)(struct net *, struct sock *, struct sk_buff *))
{
struct sk_buff *skb, *next;
struct list_head sublist;
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
list_del(&skb->list);
if (nf_hook(pf, hook, net, sk, skb, in, out, okfn) == 1)
list_add_tail(&skb->list, &sublist);
struct nf_hook_entries *hook_head = NULL;
#ifdef CONFIG_JUMP_LABEL
if (__builtin_constant_p(pf) &&
__builtin_constant_p(hook) &&
!static_key_false(&nf_hooks_needed[pf][hook]))
return;
#endif
rcu_read_lock();
switch (pf) {
case NFPROTO_IPV4:
hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]);
break;
case NFPROTO_IPV6:
hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]);
break;
default:
WARN_ON_ONCE(1);
break;
}
/* Put passed packets back on main list */
list_splice(&sublist, head);
if (hook_head) {
struct nf_hook_state state;
nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn);
nf_hook_slow_list(head, &state, hook_head);
}
rcu_read_unlock();
}
/* Call setsockopt() */
......
......@@ -269,34 +269,15 @@ ip_set_ext_destroy(struct ip_set *set, void *data)
/* Check that the extension is enabled for the set and
* call it's destroy function for its extension part in data.
*/
if (SET_WITH_COMMENT(set))
ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy(
set, ext_comment(data, set));
}
if (SET_WITH_COMMENT(set)) {
struct ip_set_comment *c = ext_comment(data, set);
static inline int
ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
{
u32 cadt_flags = 0;
if (SET_WITH_TIMEOUT(set))
if (unlikely(nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
htonl(set->timeout))))
return -EMSGSIZE;
if (SET_WITH_COUNTER(set))
cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
if (SET_WITH_COMMENT(set))
cadt_flags |= IPSET_FLAG_WITH_COMMENT;
if (SET_WITH_SKBINFO(set))
cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
if (SET_WITH_FORCEADD(set))
cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
if (!cadt_flags)
return 0;
return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags));
ip_set_extensions[IPSET_EXT_ID_COMMENT].destroy(set, c);
}
}
int ip_set_put_flags(struct sk_buff *skb, struct ip_set *set);
/* Netlink CB args */
enum {
IPSET_CB_NET = 0, /* net namespace */
......@@ -506,144 +487,8 @@ ip_set_timeout_set(unsigned long *timeout, u32 value)
*timeout = t;
}
static inline u32
ip_set_timeout_get(const unsigned long *timeout)
{
u32 t;
if (*timeout == IPSET_ELEM_PERMANENT)
return 0;
t = jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC;
/* Zero value in userspace means no timeout */
return t == 0 ? 1 : t;
}
static inline char*
ip_set_comment_uget(struct nlattr *tb)
{
return nla_data(tb);
}
/* Called from uadd only, protected by the set spinlock.
* The kadt functions don't use the comment extensions in any way.
*/
static inline void
ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
const struct ip_set_ext *ext)
{
struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
size_t len = ext->comment ? strlen(ext->comment) : 0;
if (unlikely(c)) {
set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
kfree_rcu(c, rcu);
rcu_assign_pointer(comment->c, NULL);
}
if (!len)
return;
if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
len = IPSET_MAX_COMMENT_SIZE;
c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
if (unlikely(!c))
return;
strlcpy(c->str, ext->comment, len + 1);
set->ext_size += sizeof(*c) + strlen(c->str) + 1;
rcu_assign_pointer(comment->c, c);
}
/* Used only when dumping a set, protected by rcu_read_lock() */
static inline int
ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
{
struct ip_set_comment_rcu *c = rcu_dereference(comment->c);
if (!c)
return 0;
return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
}
/* Called from uadd/udel, flush or the garbage collectors protected
* by the set spinlock.
* Called when the set is destroyed and when there can't be any user
* of the set data anymore.
*/
static inline void
ip_set_comment_free(struct ip_set *set, struct ip_set_comment *comment)
{
struct ip_set_comment_rcu *c;
c = rcu_dereference_protected(comment->c, 1);
if (unlikely(!c))
return;
set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
kfree_rcu(c, rcu);
rcu_assign_pointer(comment->c, NULL);
}
static inline void
ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
{
atomic64_add((long long)bytes, &(counter)->bytes);
}
static inline void
ip_set_add_packets(u64 packets, struct ip_set_counter *counter)
{
atomic64_add((long long)packets, &(counter)->packets);
}
static inline u64
ip_set_get_bytes(const struct ip_set_counter *counter)
{
return (u64)atomic64_read(&(counter)->bytes);
}
static inline u64
ip_set_get_packets(const struct ip_set_counter *counter)
{
return (u64)atomic64_read(&(counter)->packets);
}
static inline bool
ip_set_match_counter(u64 counter, u64 match, u8 op)
{
switch (op) {
case IPSET_COUNTER_NONE:
return true;
case IPSET_COUNTER_EQ:
return counter == match;
case IPSET_COUNTER_NE:
return counter != match;
case IPSET_COUNTER_LT:
return counter < match;
case IPSET_COUNTER_GT:
return counter > match;
}
return false;
}
static inline void
ip_set_update_counter(struct ip_set_counter *counter,
const struct ip_set_ext *ext, u32 flags)
{
if (ext->packets != ULLONG_MAX &&
!(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) {
ip_set_add_bytes(ext->bytes, counter);
ip_set_add_packets(ext->packets, counter);
}
}
static inline bool
ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
{
return nla_put_net64(skb, IPSET_ATTR_BYTES,
cpu_to_be64(ip_set_get_bytes(counter)),
IPSET_ATTR_PAD) ||
nla_put_net64(skb, IPSET_ATTR_PACKETS,
cpu_to_be64(ip_set_get_packets(counter)),
IPSET_ATTR_PAD);
}
void ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
const struct ip_set_ext *ext);
static inline void
ip_set_init_counter(struct ip_set_counter *counter,
......@@ -655,31 +500,6 @@ ip_set_init_counter(struct ip_set_counter *counter,
atomic64_set(&(counter)->packets, (long long)(ext->packets));
}
static inline void
ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags)
{
mext->skbinfo = *skbinfo;
}
static inline bool
ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo)
{
/* Send nonzero parameters only */
return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
nla_put_net64(skb, IPSET_ATTR_SKBMARK,
cpu_to_be64((u64)skbinfo->skbmark << 32 |
skbinfo->skbmarkmask),
IPSET_ATTR_PAD)) ||
(skbinfo->skbprio &&
nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
cpu_to_be32(skbinfo->skbprio))) ||
(skbinfo->skbqueue &&
nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
cpu_to_be16(skbinfo->skbqueue)));
}
static inline void
ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
const struct ip_set_ext *ext)
......
......@@ -12,18 +12,4 @@ enum {
IPSET_ADD_START_STORED_TIMEOUT,
};
/* Common functions */
static inline u32
range_to_mask(u32 from, u32 to, u8 *bits)
{
u32 mask = 0xFFFFFFFE;
*bits = 32;
while (--(*bits) > 0 && mask && (to & mask) != from)
mask <<= 1;
return mask;
}
#endif /* __IP_SET_BITMAP_H */
......@@ -20,9 +20,6 @@ static inline bool ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
}
#endif
extern bool ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src,
__be16 *port);
static inline bool ip_set_proto_with_ports(u8 proto)
{
switch (proto) {
......
......@@ -1324,7 +1324,7 @@ void ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs);
void ip_vs_control_net_cleanup(struct netns_ipvs *ipvs);
void ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs);
void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs);
void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs);
void ip_vs_service_nets_cleanup(struct list_head *net_list);
/* IPVS application functions
* (from ip_vs_app.c)
......
......@@ -43,7 +43,6 @@ enum nf_ct_ext_id {
/* Extensions: optional stuff which isn't permanently in struct. */
struct nf_ct_ext {
struct rcu_head rcu;
u8 offset[NF_CT_EXT_NUM];
u8 len;
char data[0];
......@@ -72,15 +71,6 @@ static inline void *__nf_ct_ext_find(const struct nf_conn *ct, u8 id)
/* Destroy all relationships */
void nf_ct_ext_destroy(struct nf_conn *ct);
/* Free operation. If you want to free a object referred from private area,
* please implement __nf_ct_ext_free() and call it.
*/
static inline void nf_ct_ext_free(struct nf_conn *ct)
{
if (ct->ext)
kfree_rcu(ct->ext, rcu);
}
/* Add this type, returns pointer to data or NULL. */
void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp);
......
......@@ -24,6 +24,7 @@ struct nf_flowtable_type {
struct nf_flowtable {
struct list_head list;
struct rhashtable rhashtable;
int priority;
const struct nf_flowtable_type *type;
struct delayed_work gc_work;
};
......
......@@ -963,25 +963,31 @@ struct nft_stats {
struct u64_stats_sync syncp;
};
struct nft_hook {
struct list_head list;
struct nf_hook_ops ops;
struct rcu_head rcu;
};
/**
* struct nft_base_chain - nf_tables base chain
*
* @ops: netfilter hook ops
* @hook_list: list of netfilter hooks (for NFPROTO_NETDEV family)
* @type: chain type
* @policy: default policy
* @stats: per-cpu chain stats
* @chain: the chain
* @dev_name: device name that this base chain is attached to (if any)
* @flow_block: flow block (for hardware offload)
*/
struct nft_base_chain {
struct nf_hook_ops ops;
struct list_head hook_list;
const struct nft_chain_type *type;
u8 policy;
u8 flags;
struct nft_stats __percpu *stats;
struct nft_chain chain;
char dev_name[IFNAMSIZ];
struct flow_block flow_block;
};
......@@ -1146,7 +1152,7 @@ struct nft_object_ops {
int nft_register_obj(struct nft_object_type *obj_type);
void nft_unregister_obj(struct nft_object_type *obj_type);
#define NFT_FLOWTABLE_DEVICE_MAX 8
#define NFT_NETDEVICE_MAX 256
/**
* struct nft_flowtable - nf_tables flow table
......@@ -1155,7 +1161,6 @@ void nft_unregister_obj(struct nft_object_type *obj_type);
* @table: the table the flow table is contained in
* @name: name of this flow table
* @hooknum: hook number
* @priority: hook priority
* @ops_len: number of hooks in array
* @genmask: generation mask
* @use: number of references to this flow table
......@@ -1169,13 +1174,12 @@ struct nft_flowtable {
struct nft_table *table;
char *name;
int hooknum;
int priority;
int ops_len;
u32 genmask:2,
use:30;
u64 handle;
/* runtime data below here */
struct nf_hook_ops *ops ____cacheline_aligned;
struct list_head hook_list ____cacheline_aligned;
struct nf_flowtable data;
};
......
......@@ -144,12 +144,14 @@ enum nft_list_attributes {
* @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32)
* @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32)
* @NFTA_HOOK_DEV: netdevice name (NLA_STRING)
* @NFTA_HOOK_DEVS: list of netdevices (NLA_NESTED)
*/
enum nft_hook_attributes {
NFTA_HOOK_UNSPEC,
NFTA_HOOK_HOOKNUM,
NFTA_HOOK_PRIORITY,
NFTA_HOOK_DEV,
NFTA_HOOK_DEVS,
__NFTA_HOOK_MAX
};
#define NFTA_HOOK_MAX (__NFTA_HOOK_MAX - 1)
......
......@@ -150,4 +150,4 @@ EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v6);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
MODULE_DESCRIPTION("Netfilter IPv4 transparent proxy support");
MODULE_DESCRIPTION("Netfilter IPv6 transparent proxy support");
......@@ -536,6 +536,26 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
}
EXPORT_SYMBOL(nf_hook_slow);
void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
const struct nf_hook_entries *e)
{
struct sk_buff *skb, *next;
struct list_head sublist;
int ret;
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
skb_list_del_init(skb);
ret = nf_hook_slow(skb, state, e, 0);
if (ret == 1)
list_add_tail(&skb->list, &sublist);
}
/* Put passed packets back on main list */
list_splice(&sublist, head);
}
EXPORT_SYMBOL(nf_hook_slow_list);
/* This needs to be compiled in any case to avoid dependencies between the
* nfnetlink_queue code and nf_conntrack.
*/
......
......@@ -192,7 +192,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
#ifndef IP_SET_BITMAP_STORED_TIMEOUT
static inline bool
static bool
mtype_is_filled(const struct mtype_elem *x)
{
return true;
......
......@@ -55,7 +55,7 @@ struct bitmap_ip_adt_elem {
u16 id;
};
static inline u32
static u32
ip_to_id(const struct bitmap_ip *m, u32 ip)
{
return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip) / m->hosts;
......@@ -63,33 +63,33 @@ ip_to_id(const struct bitmap_ip *m, u32 ip)
/* Common functions */
static inline int
static int
bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e,
struct bitmap_ip *map, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
static inline int
static int
bitmap_ip_gc_test(u16 id, const struct bitmap_ip *map, size_t dsize)
{
return !!test_bit(id, map->members);
}
static inline int
static int
bitmap_ip_do_add(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map,
u32 flags, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
static inline int
static int
bitmap_ip_do_del(const struct bitmap_ip_adt_elem *e, struct bitmap_ip *map)
{
return !test_and_clear_bit(e->id, map->members);
}
static inline int
static int
bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id,
size_t dsize)
{
......@@ -97,7 +97,7 @@ bitmap_ip_do_list(struct sk_buff *skb, const struct bitmap_ip *map, u32 id,
htonl(map->first_ip + id * map->hosts));
}
static inline int
static int
bitmap_ip_do_head(struct sk_buff *skb, const struct bitmap_ip *map)
{
return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
......@@ -237,6 +237,18 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map,
return true;
}
static u32
range_to_mask(u32 from, u32 to, u8 *bits)
{
u32 mask = 0xFFFFFFFE;
*bits = 32;
while (--(*bits) > 0 && mask && (to & mask) != from)
mask <<= 1;
return mask;
}
static int
bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
u32 flags)
......
......@@ -65,7 +65,7 @@ struct bitmap_ipmac_elem {
unsigned char filled;
} __aligned(__alignof__(u64));
static inline u32
static u32
ip_to_id(const struct bitmap_ipmac *m, u32 ip)
{
return ip - m->first_ip;
......@@ -79,7 +79,7 @@ ip_to_id(const struct bitmap_ipmac *m, u32 ip)
/* Common functions */
static inline int
static int
bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
const struct bitmap_ipmac *map, size_t dsize)
{
......@@ -94,7 +94,7 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
return -EAGAIN;
}
static inline int
static int
bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
{
const struct bitmap_ipmac_elem *elem;
......@@ -106,13 +106,13 @@ bitmap_ipmac_gc_test(u16 id, const struct bitmap_ipmac *map, size_t dsize)
return elem->filled == MAC_FILLED;
}
static inline int
static int
bitmap_ipmac_is_filled(const struct bitmap_ipmac_elem *elem)
{
return elem->filled == MAC_FILLED;
}
static inline int
static int
bitmap_ipmac_add_timeout(unsigned long *timeout,
const struct bitmap_ipmac_adt_elem *e,
const struct ip_set_ext *ext, struct ip_set *set,
......@@ -139,7 +139,7 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
return 0;
}
static inline int
static int
bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
struct bitmap_ipmac *map, u32 flags, size_t dsize)
{
......@@ -177,14 +177,14 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
return IPSET_ADD_STORE_PLAIN_TIMEOUT;
}
static inline int
static int
bitmap_ipmac_do_del(const struct bitmap_ipmac_adt_elem *e,
struct bitmap_ipmac *map)
{
return !test_and_clear_bit(e->id, map->members);
}
static inline int
static int
bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
u32 id, size_t dsize)
{
......@@ -197,7 +197,7 @@ bitmap_ipmac_do_list(struct sk_buff *skb, const struct bitmap_ipmac *map,
nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, elem->ether));
}
static inline int
static int
bitmap_ipmac_do_head(struct sk_buff *skb, const struct bitmap_ipmac *map)
{
return nla_put_ipaddr4(skb, IPSET_ATTR_IP, htonl(map->first_ip)) ||
......
......@@ -46,7 +46,7 @@ struct bitmap_port_adt_elem {
u16 id;
};
static inline u16
static u16
port_to_id(const struct bitmap_port *m, u16 port)
{
return port - m->first_port;
......@@ -54,34 +54,34 @@ port_to_id(const struct bitmap_port *m, u16 port)
/* Common functions */
static inline int
static int
bitmap_port_do_test(const struct bitmap_port_adt_elem *e,
const struct bitmap_port *map, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
static inline int
static int
bitmap_port_gc_test(u16 id, const struct bitmap_port *map, size_t dsize)
{
return !!test_bit(id, map->members);
}
static inline int
static int
bitmap_port_do_add(const struct bitmap_port_adt_elem *e,
struct bitmap_port *map, u32 flags, size_t dsize)
{
return !!test_bit(e->id, map->members);
}
static inline int
static int
bitmap_port_do_del(const struct bitmap_port_adt_elem *e,
struct bitmap_port *map)
{
return !test_and_clear_bit(e->id, map->members);
}
static inline int
static int
bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id,
size_t dsize)
{
......@@ -89,13 +89,40 @@ bitmap_port_do_list(struct sk_buff *skb, const struct bitmap_port *map, u32 id,
htons(map->first_port + id));
}
static inline int
static int
bitmap_port_do_head(struct sk_buff *skb, const struct bitmap_port *map)
{
return nla_put_net16(skb, IPSET_ATTR_PORT, htons(map->first_port)) ||
nla_put_net16(skb, IPSET_ATTR_PORT_TO, htons(map->last_port));
}
static bool
ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
{
bool ret;
u8 proto;
switch (pf) {
case NFPROTO_IPV4:
ret = ip_set_get_ip4_port(skb, src, port, &proto);
break;
case NFPROTO_IPV6:
ret = ip_set_get_ip6_port(skb, src, port, &proto);
break;
default:
return false;
}
if (!ret)
return ret;
switch (proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
return true;
default:
return false;
}
}
static int
bitmap_port_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
......
......@@ -35,7 +35,7 @@ struct ip_set_net {
static unsigned int ip_set_net_id __read_mostly;
static inline struct ip_set_net *ip_set_pernet(struct net *net)
static struct ip_set_net *ip_set_pernet(struct net *net)
{
return net_generic(net, ip_set_net_id);
}
......@@ -67,13 +67,13 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
* serialized by ip_set_type_mutex.
*/
static inline void
static void
ip_set_type_lock(void)
{
mutex_lock(&ip_set_type_mutex);
}
static inline void
static void
ip_set_type_unlock(void)
{
mutex_unlock(&ip_set_type_mutex);
......@@ -277,7 +277,7 @@ ip_set_free(void *members)
}
EXPORT_SYMBOL_GPL(ip_set_free);
static inline bool
static bool
flag_nested(const struct nlattr *nla)
{
return nla->nla_type & NLA_F_NESTED;
......@@ -325,6 +325,83 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
}
EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
static u32
ip_set_timeout_get(const unsigned long *timeout)
{
u32 t;
if (*timeout == IPSET_ELEM_PERMANENT)
return 0;
t = jiffies_to_msecs(*timeout - jiffies) / MSEC_PER_SEC;
/* Zero value in userspace means no timeout */
return t == 0 ? 1 : t;
}
static char *
ip_set_comment_uget(struct nlattr *tb)
{
return nla_data(tb);
}
/* Called from uadd only, protected by the set spinlock.
* The kadt functions don't use the comment extensions in any way.
*/
void
ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
const struct ip_set_ext *ext)
{
struct ip_set_comment_rcu *c = rcu_dereference_protected(comment->c, 1);
size_t len = ext->comment ? strlen(ext->comment) : 0;
if (unlikely(c)) {
set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
kfree_rcu(c, rcu);
rcu_assign_pointer(comment->c, NULL);
}
if (!len)
return;
if (unlikely(len > IPSET_MAX_COMMENT_SIZE))
len = IPSET_MAX_COMMENT_SIZE;
c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC);
if (unlikely(!c))
return;
strlcpy(c->str, ext->comment, len + 1);
set->ext_size += sizeof(*c) + strlen(c->str) + 1;
rcu_assign_pointer(comment->c, c);
}
EXPORT_SYMBOL_GPL(ip_set_init_comment);
/* Used only when dumping a set, protected by rcu_read_lock() */
static int
ip_set_put_comment(struct sk_buff *skb, const struct ip_set_comment *comment)
{
struct ip_set_comment_rcu *c = rcu_dereference(comment->c);
if (!c)
return 0;
return nla_put_string(skb, IPSET_ATTR_COMMENT, c->str);
}
/* Called from uadd/udel, flush or the garbage collectors protected
* by the set spinlock.
* Called when the set is destroyed and when there can't be any user
* of the set data anymore.
*/
static void
ip_set_comment_free(struct ip_set *set, void *ptr)
{
struct ip_set_comment *comment = ptr;
struct ip_set_comment_rcu *c;
c = rcu_dereference_protected(comment->c, 1);
if (unlikely(!c))
return;
set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
kfree_rcu(c, rcu);
rcu_assign_pointer(comment->c, NULL);
}
typedef void (*destroyer)(struct ip_set *, void *);
/* ipset data extension types, in size order */
......@@ -351,12 +428,12 @@ const struct ip_set_ext_type ip_set_extensions[] = {
.flag = IPSET_FLAG_WITH_COMMENT,
.len = sizeof(struct ip_set_comment),
.align = __alignof__(struct ip_set_comment),
.destroy = (destroyer) ip_set_comment_free,
.destroy = ip_set_comment_free,
},
};
EXPORT_SYMBOL_GPL(ip_set_extensions);
static inline bool
static bool
add_extension(enum ip_set_ext_id id, u32 flags, struct nlattr *tb[])
{
return ip_set_extensions[id].flag ?
......@@ -446,6 +523,46 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
}
EXPORT_SYMBOL_GPL(ip_set_get_extensions);
static u64
ip_set_get_bytes(const struct ip_set_counter *counter)
{
return (u64)atomic64_read(&(counter)->bytes);
}
static u64
ip_set_get_packets(const struct ip_set_counter *counter)
{
return (u64)atomic64_read(&(counter)->packets);
}
static bool
ip_set_put_counter(struct sk_buff *skb, const struct ip_set_counter *counter)
{
return nla_put_net64(skb, IPSET_ATTR_BYTES,
cpu_to_be64(ip_set_get_bytes(counter)),
IPSET_ATTR_PAD) ||
nla_put_net64(skb, IPSET_ATTR_PACKETS,
cpu_to_be64(ip_set_get_packets(counter)),
IPSET_ATTR_PAD);
}
static bool
ip_set_put_skbinfo(struct sk_buff *skb, const struct ip_set_skbinfo *skbinfo)
{
/* Send nonzero parameters only */
return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
nla_put_net64(skb, IPSET_ATTR_SKBMARK,
cpu_to_be64((u64)skbinfo->skbmark << 32 |
skbinfo->skbmarkmask),
IPSET_ATTR_PAD)) ||
(skbinfo->skbprio &&
nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
cpu_to_be32(skbinfo->skbprio))) ||
(skbinfo->skbqueue &&
nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
cpu_to_be16(skbinfo->skbqueue)));
}
int
ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
const void *e, bool active)
......@@ -471,6 +588,55 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
}
EXPORT_SYMBOL_GPL(ip_set_put_extensions);
static bool
ip_set_match_counter(u64 counter, u64 match, u8 op)
{
switch (op) {
case IPSET_COUNTER_NONE:
return true;
case IPSET_COUNTER_EQ:
return counter == match;
case IPSET_COUNTER_NE:
return counter != match;
case IPSET_COUNTER_LT:
return counter < match;
case IPSET_COUNTER_GT:
return counter > match;
}
return false;
}
static void
ip_set_add_bytes(u64 bytes, struct ip_set_counter *counter)
{
atomic64_add((long long)bytes, &(counter)->bytes);
}
static void
ip_set_add_packets(u64 packets, struct ip_set_counter *counter)
{
atomic64_add((long long)packets, &(counter)->packets);
}
static void
ip_set_update_counter(struct ip_set_counter *counter,
const struct ip_set_ext *ext, u32 flags)
{
if (ext->packets != ULLONG_MAX &&
!(flags & IPSET_FLAG_SKIP_COUNTER_UPDATE)) {
ip_set_add_bytes(ext->bytes, counter);
ip_set_add_packets(ext->packets, counter);
}
}
static void
ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags)
{
mext->skbinfo = *skbinfo;
}
bool
ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags, void *data)
......@@ -506,7 +672,7 @@ EXPORT_SYMBOL_GPL(ip_set_match_extensions);
* The set behind an index may change by swapping only, from userspace.
*/
static inline void
static void
__ip_set_get(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
......@@ -514,7 +680,7 @@ __ip_set_get(struct ip_set *set)
write_unlock_bh(&ip_set_ref_lock);
}
static inline void
static void
__ip_set_put(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
......@@ -526,7 +692,7 @@ __ip_set_put(struct ip_set *set)
/* set->ref can be swapped out by ip_set_swap, netlink events (like dump) need
* a separate reference counter
*/
static inline void
static void
__ip_set_put_netlink(struct ip_set *set)
{
write_lock_bh(&ip_set_ref_lock);
......@@ -541,7 +707,7 @@ __ip_set_put_netlink(struct ip_set *set)
* so it can't be destroyed (or changed) under our foot.
*/
static inline struct ip_set *
static struct ip_set *
ip_set_rcu_get(struct net *net, ip_set_id_t index)
{
struct ip_set *set;
......@@ -670,7 +836,7 @@ EXPORT_SYMBOL_GPL(ip_set_get_byname);
*
*/
static inline void
static void
__ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
{
struct ip_set *set;
......@@ -1252,6 +1418,30 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
#define DUMP_TYPE(arg) (((u32)(arg)) & 0x0000FFFF)
#define DUMP_FLAGS(arg) (((u32)(arg)) >> 16)
int
ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
{
u32 cadt_flags = 0;
if (SET_WITH_TIMEOUT(set))
if (unlikely(nla_put_net32(skb, IPSET_ATTR_TIMEOUT,
htonl(set->timeout))))
return -EMSGSIZE;
if (SET_WITH_COUNTER(set))
cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
if (SET_WITH_COMMENT(set))
cadt_flags |= IPSET_FLAG_WITH_COMMENT;
if (SET_WITH_SKBINFO(set))
cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
if (SET_WITH_FORCEADD(set))
cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
if (!cadt_flags)
return 0;
return nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(cadt_flags));
}
EXPORT_SYMBOL_GPL(ip_set_put_flags);
static int
ip_set_dump_done(struct netlink_callback *cb)
{
......
......@@ -148,31 +148,3 @@ ip_set_get_ip6_port(const struct sk_buff *skb, bool src,
}
EXPORT_SYMBOL_GPL(ip_set_get_ip6_port);
#endif
bool
ip_set_get_ip_port(const struct sk_buff *skb, u8 pf, bool src, __be16 *port)
{
bool ret;
u8 proto;
switch (pf) {
case NFPROTO_IPV4:
ret = ip_set_get_ip4_port(skb, src, port, &proto);
break;
case NFPROTO_IPV6:
ret = ip_set_get_ip6_port(skb, src, port, &proto);
break;
default:
return false;
}
if (!ret)
return ret;
switch (proto) {
case IPPROTO_TCP:
case IPPROTO_UDP:
return true;
default:
return false;
}
}
EXPORT_SYMBOL_GPL(ip_set_get_ip_port);
......@@ -39,7 +39,7 @@
#ifdef IP_SET_HASH_WITH_MULTI
#define AHASH_MAX(h) ((h)->ahash_max)
static inline u8
static u8
tune_ahash_max(u8 curr, u32 multi)
{
u32 n;
......@@ -909,7 +909,7 @@ mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext,
return ret;
}
static inline int
static int
mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
struct ip_set_ext *mext, struct ip_set *set, u32 flags)
{
......
......@@ -44,7 +44,7 @@ struct hash_ip4_elem {
/* Common functions */
static inline bool
static bool
hash_ip4_data_equal(const struct hash_ip4_elem *e1,
const struct hash_ip4_elem *e2,
u32 *multi)
......@@ -63,7 +63,7 @@ hash_ip4_data_list(struct sk_buff *skb, const struct hash_ip4_elem *e)
return true;
}
static inline void
static void
hash_ip4_data_next(struct hash_ip4_elem *next, const struct hash_ip4_elem *e)
{
next->ip = e->ip;
......@@ -171,7 +171,7 @@ struct hash_ip6_elem {
/* Common functions */
static inline bool
static bool
hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
const struct hash_ip6_elem *ip2,
u32 *multi)
......@@ -179,7 +179,7 @@ hash_ip6_data_equal(const struct hash_ip6_elem *ip1,
return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6);
}
static inline void
static void
hash_ip6_netmask(union nf_inet_addr *ip, u8 prefix)
{
ip6_netmask(ip, prefix);
......@@ -196,7 +196,7 @@ hash_ip6_data_list(struct sk_buff *skb, const struct hash_ip6_elem *e)
return true;
}
static inline void
static void
hash_ip6_data_next(struct hash_ip6_elem *next, const struct hash_ip6_elem *e)
{
}
......
......@@ -47,7 +47,7 @@ struct hash_ipmac4_elem {
/* Common functions */
static inline bool
static bool
hash_ipmac4_data_equal(const struct hash_ipmac4_elem *e1,
const struct hash_ipmac4_elem *e2,
u32 *multi)
......@@ -67,7 +67,7 @@ hash_ipmac4_data_list(struct sk_buff *skb, const struct hash_ipmac4_elem *e)
return true;
}
static inline void
static void
hash_ipmac4_data_next(struct hash_ipmac4_elem *next,
const struct hash_ipmac4_elem *e)
{
......@@ -154,7 +154,7 @@ struct hash_ipmac6_elem {
/* Common functions */
static inline bool
static bool
hash_ipmac6_data_equal(const struct hash_ipmac6_elem *e1,
const struct hash_ipmac6_elem *e2,
u32 *multi)
......@@ -175,7 +175,7 @@ hash_ipmac6_data_list(struct sk_buff *skb, const struct hash_ipmac6_elem *e)
return true;
}
static inline void
static void
hash_ipmac6_data_next(struct hash_ipmac6_elem *next,
const struct hash_ipmac6_elem *e)
{
......
......@@ -42,7 +42,7 @@ struct hash_ipmark4_elem {
/* Common functions */
static inline bool
static bool
hash_ipmark4_data_equal(const struct hash_ipmark4_elem *ip1,
const struct hash_ipmark4_elem *ip2,
u32 *multi)
......@@ -64,7 +64,7 @@ hash_ipmark4_data_list(struct sk_buff *skb,
return true;
}
static inline void
static void
hash_ipmark4_data_next(struct hash_ipmark4_elem *next,
const struct hash_ipmark4_elem *d)
{
......@@ -165,7 +165,7 @@ struct hash_ipmark6_elem {
/* Common functions */
static inline bool
static bool
hash_ipmark6_data_equal(const struct hash_ipmark6_elem *ip1,
const struct hash_ipmark6_elem *ip2,
u32 *multi)
......@@ -187,7 +187,7 @@ hash_ipmark6_data_list(struct sk_buff *skb,
return true;
}
static inline void
static void
hash_ipmark6_data_next(struct hash_ipmark6_elem *next,
const struct hash_ipmark6_elem *d)
{
......
......@@ -47,7 +47,7 @@ struct hash_ipport4_elem {
/* Common functions */
static inline bool
static bool
hash_ipport4_data_equal(const struct hash_ipport4_elem *ip1,
const struct hash_ipport4_elem *ip2,
u32 *multi)
......@@ -71,7 +71,7 @@ hash_ipport4_data_list(struct sk_buff *skb,
return true;
}
static inline void
static void
hash_ipport4_data_next(struct hash_ipport4_elem *next,
const struct hash_ipport4_elem *d)
{
......@@ -202,7 +202,7 @@ struct hash_ipport6_elem {
/* Common functions */
static inline bool
static bool
hash_ipport6_data_equal(const struct hash_ipport6_elem *ip1,
const struct hash_ipport6_elem *ip2,
u32 *multi)
......@@ -226,7 +226,7 @@ hash_ipport6_data_list(struct sk_buff *skb,
return true;
}
static inline void
static void
hash_ipport6_data_next(struct hash_ipport6_elem *next,
const struct hash_ipport6_elem *d)
{
......
......@@ -46,7 +46,7 @@ struct hash_ipportip4_elem {
u8 padding;
};
static inline bool
static bool
hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
const struct hash_ipportip4_elem *ip2,
u32 *multi)
......@@ -72,7 +72,7 @@ hash_ipportip4_data_list(struct sk_buff *skb,
return true;
}
static inline void
static void
hash_ipportip4_data_next(struct hash_ipportip4_elem *next,
const struct hash_ipportip4_elem *d)
{
......@@ -210,7 +210,7 @@ struct hash_ipportip6_elem {
/* Common functions */
static inline bool
static bool
hash_ipportip6_data_equal(const struct hash_ipportip6_elem *ip1,
const struct hash_ipportip6_elem *ip2,
u32 *multi)
......@@ -236,7 +236,7 @@ hash_ipportip6_data_list(struct sk_buff *skb,
return true;
}
static inline void
static void
hash_ipportip6_data_next(struct hash_ipportip6_elem *next,
const struct hash_ipportip6_elem *d)
{
......
......@@ -59,7 +59,7 @@ struct hash_ipportnet4_elem {
/* Common functions */
static inline bool
static bool
hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
const struct hash_ipportnet4_elem *ip2,
u32 *multi)
......@@ -71,25 +71,25 @@ hash_ipportnet4_data_equal(const struct hash_ipportnet4_elem *ip1,
ip1->proto == ip2->proto;
}
static inline int
static int
hash_ipportnet4_do_data_match(const struct hash_ipportnet4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
static void
hash_ipportnet4_data_set_flags(struct hash_ipportnet4_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
static inline void
static void
hash_ipportnet4_data_reset_flags(struct hash_ipportnet4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
static inline void
static void
hash_ipportnet4_data_netmask(struct hash_ipportnet4_elem *elem, u8 cidr)
{
elem->ip2 &= ip_set_netmask(cidr);
......@@ -116,7 +116,7 @@ hash_ipportnet4_data_list(struct sk_buff *skb,
return true;
}
static inline void
static void
hash_ipportnet4_data_next(struct hash_ipportnet4_elem *next,
const struct hash_ipportnet4_elem *d)
{
......@@ -308,7 +308,7 @@ struct hash_ipportnet6_elem {
/* Common functions */
static inline bool
static bool
hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
const struct hash_ipportnet6_elem *ip2,
u32 *multi)
......@@ -320,25 +320,25 @@ hash_ipportnet6_data_equal(const struct hash_ipportnet6_elem *ip1,
ip1->proto == ip2->proto;
}
static inline int
static int
hash_ipportnet6_do_data_match(const struct hash_ipportnet6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
static void
hash_ipportnet6_data_set_flags(struct hash_ipportnet6_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
static inline void
static void
hash_ipportnet6_data_reset_flags(struct hash_ipportnet6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
static inline void
static void
hash_ipportnet6_data_netmask(struct hash_ipportnet6_elem *elem, u8 cidr)
{
ip6_netmask(&elem->ip2, cidr);
......@@ -365,7 +365,7 @@ hash_ipportnet6_data_list(struct sk_buff *skb,
return true;
}
static inline void
static void
hash_ipportnet6_data_next(struct hash_ipportnet6_elem *next,
const struct hash_ipportnet6_elem *d)
{
......
......@@ -37,7 +37,7 @@ struct hash_mac4_elem {
/* Common functions */
static inline bool
static bool
hash_mac4_data_equal(const struct hash_mac4_elem *e1,
const struct hash_mac4_elem *e2,
u32 *multi)
......@@ -45,7 +45,7 @@ hash_mac4_data_equal(const struct hash_mac4_elem *e1,
return ether_addr_equal(e1->ether, e2->ether);
}
static inline bool
static bool
hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e)
{
if (nla_put(skb, IPSET_ATTR_ETHER, ETH_ALEN, e->ether))
......@@ -56,7 +56,7 @@ hash_mac4_data_list(struct sk_buff *skb, const struct hash_mac4_elem *e)
return true;
}
static inline void
static void
hash_mac4_data_next(struct hash_mac4_elem *next,
const struct hash_mac4_elem *e)
{
......
......@@ -47,7 +47,7 @@ struct hash_net4_elem {
/* Common functions */
static inline bool
static bool
hash_net4_data_equal(const struct hash_net4_elem *ip1,
const struct hash_net4_elem *ip2,
u32 *multi)
......@@ -56,25 +56,25 @@ hash_net4_data_equal(const struct hash_net4_elem *ip1,
ip1->cidr == ip2->cidr;
}
static inline int
static int
hash_net4_do_data_match(const struct hash_net4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
static void
hash_net4_data_set_flags(struct hash_net4_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
static inline void
static void
hash_net4_data_reset_flags(struct hash_net4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
static inline void
static void
hash_net4_data_netmask(struct hash_net4_elem *elem, u8 cidr)
{
elem->ip &= ip_set_netmask(cidr);
......@@ -97,7 +97,7 @@ hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data)
return true;
}
static inline void
static void
hash_net4_data_next(struct hash_net4_elem *next,
const struct hash_net4_elem *d)
{
......@@ -212,7 +212,7 @@ struct hash_net6_elem {
/* Common functions */
static inline bool
static bool
hash_net6_data_equal(const struct hash_net6_elem *ip1,
const struct hash_net6_elem *ip2,
u32 *multi)
......@@ -221,25 +221,25 @@ hash_net6_data_equal(const struct hash_net6_elem *ip1,
ip1->cidr == ip2->cidr;
}
static inline int
static int
hash_net6_do_data_match(const struct hash_net6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
static void
hash_net6_data_set_flags(struct hash_net6_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
static inline void
static void
hash_net6_data_reset_flags(struct hash_net6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
static inline void
static void
hash_net6_data_netmask(struct hash_net6_elem *elem, u8 cidr)
{
ip6_netmask(&elem->ip, cidr);
......@@ -262,7 +262,7 @@ hash_net6_data_list(struct sk_buff *skb, const struct hash_net6_elem *data)
return true;
}
static inline void
static void
hash_net6_data_next(struct hash_net6_elem *next,
const struct hash_net6_elem *d)
{
......
......@@ -62,7 +62,7 @@ struct hash_netiface4_elem {
/* Common functions */
static inline bool
static bool
hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
const struct hash_netiface4_elem *ip2,
u32 *multi)
......@@ -74,25 +74,25 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
strcmp(ip1->iface, ip2->iface) == 0;
}
static inline int
static int
hash_netiface4_do_data_match(const struct hash_netiface4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
static void
hash_netiface4_data_set_flags(struct hash_netiface4_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
static inline void
static void
hash_netiface4_data_reset_flags(struct hash_netiface4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
static inline void
static void
hash_netiface4_data_netmask(struct hash_netiface4_elem *elem, u8 cidr)
{
elem->ip &= ip_set_netmask(cidr);
......@@ -119,7 +119,7 @@ hash_netiface4_data_list(struct sk_buff *skb,
return true;
}
static inline void
static void
hash_netiface4_data_next(struct hash_netiface4_elem *next,
const struct hash_netiface4_elem *d)
{
......@@ -285,7 +285,7 @@ struct hash_netiface6_elem {
/* Common functions */
static inline bool
static bool
hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
const struct hash_netiface6_elem *ip2,
u32 *multi)
......@@ -297,25 +297,25 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
strcmp(ip1->iface, ip2->iface) == 0;
}
static inline int
static int
hash_netiface6_do_data_match(const struct hash_netiface6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
static void
hash_netiface6_data_set_flags(struct hash_netiface6_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
static inline void
static void
hash_netiface6_data_reset_flags(struct hash_netiface6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
static inline void
static void
hash_netiface6_data_netmask(struct hash_netiface6_elem *elem, u8 cidr)
{
ip6_netmask(&elem->ip, cidr);
......@@ -342,7 +342,7 @@ hash_netiface6_data_list(struct sk_buff *skb,
return true;
}
static inline void
static void
hash_netiface6_data_next(struct hash_netiface6_elem *next,
const struct hash_netiface6_elem *d)
{
......
......@@ -52,7 +52,7 @@ struct hash_netnet4_elem {
/* Common functions */
static inline bool
static bool
hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
const struct hash_netnet4_elem *ip2,
u32 *multi)
......@@ -61,32 +61,32 @@ hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
ip1->ccmp == ip2->ccmp;
}
static inline int
static int
hash_netnet4_do_data_match(const struct hash_netnet4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
static void
hash_netnet4_data_set_flags(struct hash_netnet4_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
static inline void
static void
hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
static inline void
static void
hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem,
struct hash_netnet4_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
static inline void
static void
hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner)
{
if (inner) {
......@@ -117,7 +117,7 @@ hash_netnet4_data_list(struct sk_buff *skb,
return true;
}
static inline void
static void
hash_netnet4_data_next(struct hash_netnet4_elem *next,
const struct hash_netnet4_elem *d)
{
......@@ -282,7 +282,7 @@ struct hash_netnet6_elem {
/* Common functions */
static inline bool
static bool
hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
const struct hash_netnet6_elem *ip2,
u32 *multi)
......@@ -292,32 +292,32 @@ hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
ip1->ccmp == ip2->ccmp;
}
static inline int
static int
hash_netnet6_do_data_match(const struct hash_netnet6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
static void
hash_netnet6_data_set_flags(struct hash_netnet6_elem *elem, u32 flags)
{
elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH;
}
static inline void
static void
hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
static inline void
static void
hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem,
struct hash_netnet6_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
static inline void
static void
hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner)
{
if (inner) {
......@@ -348,7 +348,7 @@ hash_netnet6_data_list(struct sk_buff *skb,
return true;
}
static inline void
static void
hash_netnet6_data_next(struct hash_netnet6_elem *next,
const struct hash_netnet6_elem *d)
{
......
......@@ -57,7 +57,7 @@ struct hash_netport4_elem {
/* Common functions */
static inline bool
static bool
hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
const struct hash_netport4_elem *ip2,
u32 *multi)
......@@ -68,25 +68,25 @@ hash_netport4_data_equal(const struct hash_netport4_elem *ip1,
ip1->cidr == ip2->cidr;
}
static inline int
static int
hash_netport4_do_data_match(const struct hash_netport4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
static void
hash_netport4_data_set_flags(struct hash_netport4_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
static inline void
static void
hash_netport4_data_reset_flags(struct hash_netport4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
static inline void
static void
hash_netport4_data_netmask(struct hash_netport4_elem *elem, u8 cidr)
{
elem->ip &= ip_set_netmask(cidr);
......@@ -112,7 +112,7 @@ hash_netport4_data_list(struct sk_buff *skb,
return true;
}
static inline void
static void
hash_netport4_data_next(struct hash_netport4_elem *next,
const struct hash_netport4_elem *d)
{
......@@ -270,7 +270,7 @@ struct hash_netport6_elem {
/* Common functions */
static inline bool
static bool
hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
const struct hash_netport6_elem *ip2,
u32 *multi)
......@@ -281,25 +281,25 @@ hash_netport6_data_equal(const struct hash_netport6_elem *ip1,
ip1->cidr == ip2->cidr;
}
static inline int
static int
hash_netport6_do_data_match(const struct hash_netport6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
static void
hash_netport6_data_set_flags(struct hash_netport6_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
static inline void
static void
hash_netport6_data_reset_flags(struct hash_netport6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
static inline void
static void
hash_netport6_data_netmask(struct hash_netport6_elem *elem, u8 cidr)
{
ip6_netmask(&elem->ip, cidr);
......@@ -325,7 +325,7 @@ hash_netport6_data_list(struct sk_buff *skb,
return true;
}
static inline void
static void
hash_netport6_data_next(struct hash_netport6_elem *next,
const struct hash_netport6_elem *d)
{
......
......@@ -56,7 +56,7 @@ struct hash_netportnet4_elem {
/* Common functions */
static inline bool
static bool
hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
const struct hash_netportnet4_elem *ip2,
u32 *multi)
......@@ -67,32 +67,32 @@ hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
ip1->proto == ip2->proto;
}
static inline int
static int
hash_netportnet4_do_data_match(const struct hash_netportnet4_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
static void
hash_netportnet4_data_set_flags(struct hash_netportnet4_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
static inline void
static void
hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
static inline void
static void
hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem,
struct hash_netportnet4_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
static inline void
static void
hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem,
u8 cidr, bool inner)
{
......@@ -126,7 +126,7 @@ hash_netportnet4_data_list(struct sk_buff *skb,
return true;
}
static inline void
static void
hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
const struct hash_netportnet4_elem *d)
{
......@@ -331,7 +331,7 @@ struct hash_netportnet6_elem {
/* Common functions */
static inline bool
static bool
hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
const struct hash_netportnet6_elem *ip2,
u32 *multi)
......@@ -343,32 +343,32 @@ hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
ip1->proto == ip2->proto;
}
static inline int
static int
hash_netportnet6_do_data_match(const struct hash_netportnet6_elem *elem)
{
return elem->nomatch ? -ENOTEMPTY : 1;
}
static inline void
static void
hash_netportnet6_data_set_flags(struct hash_netportnet6_elem *elem, u32 flags)
{
elem->nomatch = !!((flags >> 16) & IPSET_FLAG_NOMATCH);
}
static inline void
static void
hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags)
{
swap(*flags, elem->nomatch);
}
static inline void
static void
hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem,
struct hash_netportnet6_elem *orig)
{
elem->ip[1] = orig->ip[1];
}
static inline void
static void
hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem,
u8 cidr, bool inner)
{
......@@ -402,7 +402,7 @@ hash_netportnet6_data_list(struct sk_buff *skb,
return true;
}
static inline void
static void
hash_netportnet6_data_next(struct hash_netportnet6_elem *next,
const struct hash_netportnet6_elem *d)
{
......
......@@ -149,7 +149,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
kfree(e);
}
static inline void
static void
list_set_del(struct ip_set *set, struct set_elem *e)
{
struct list_set *map = set->data;
......@@ -160,7 +160,7 @@ list_set_del(struct ip_set *set, struct set_elem *e)
call_rcu(&e->rcu, __list_set_del_rcu);
}
static inline void
static void
list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem *old)
{
struct list_set *map = set->data;
......
......@@ -2402,18 +2402,22 @@ static int __net_init __ip_vs_init(struct net *net)
return -ENOMEM;
}
static void __net_exit __ip_vs_cleanup(struct net *net)
static void __net_exit __ip_vs_cleanup_batch(struct list_head *net_list)
{
struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */
ip_vs_conn_net_cleanup(ipvs);
ip_vs_app_net_cleanup(ipvs);
ip_vs_protocol_net_cleanup(ipvs);
ip_vs_control_net_cleanup(ipvs);
ip_vs_estimator_net_cleanup(ipvs);
IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
net->ipvs = NULL;
struct netns_ipvs *ipvs;
struct net *net;
ip_vs_service_nets_cleanup(net_list); /* ip_vs_flush() with locks */
list_for_each_entry(net, net_list, exit_list) {
ipvs = net_ipvs(net);
ip_vs_conn_net_cleanup(ipvs);
ip_vs_app_net_cleanup(ipvs);
ip_vs_protocol_net_cleanup(ipvs);
ip_vs_control_net_cleanup(ipvs);
ip_vs_estimator_net_cleanup(ipvs);
IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
net->ipvs = NULL;
}
}
static int __net_init __ip_vs_dev_init(struct net *net)
......@@ -2429,27 +2433,32 @@ static int __net_init __ip_vs_dev_init(struct net *net)
return ret;
}
static void __net_exit __ip_vs_dev_cleanup(struct net *net)
static void __net_exit __ip_vs_dev_cleanup_batch(struct list_head *net_list)
{
struct netns_ipvs *ipvs = net_ipvs(net);
struct netns_ipvs *ipvs;
struct net *net;
EnterFunction(2);
nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
ipvs->enable = 0; /* Disable packet reception */
smp_wmb();
ip_vs_sync_net_cleanup(ipvs);
list_for_each_entry(net, net_list, exit_list) {
ipvs = net_ipvs(net);
nf_unregister_net_hooks(net, ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
ipvs->enable = 0; /* Disable packet reception */
smp_wmb();
ip_vs_sync_net_cleanup(ipvs);
}
LeaveFunction(2);
}
static struct pernet_operations ipvs_core_ops = {
.init = __ip_vs_init,
.exit = __ip_vs_cleanup,
.exit_batch = __ip_vs_cleanup_batch,
.id = &ip_vs_net_id,
.size = sizeof(struct netns_ipvs),
};
static struct pernet_operations ipvs_core_dev_ops = {
.init = __ip_vs_dev_init,
.exit = __ip_vs_dev_cleanup,
.exit_batch = __ip_vs_dev_cleanup_batch,
};
/*
......
......@@ -1607,14 +1607,20 @@ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
/*
* Delete service by {netns} in the service table.
* Called by __ip_vs_cleanup()
* Called by __ip_vs_batch_cleanup()
*/
void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
void ip_vs_service_nets_cleanup(struct list_head *net_list)
{
struct netns_ipvs *ipvs;
struct net *net;
EnterFunction(2);
/* Check for "full" addressed entries */
mutex_lock(&__ip_vs_mutex);
ip_vs_flush(ipvs, true);
list_for_each_entry(net, net_list, exit_list) {
ipvs = net_ipvs(net);
ip_vs_flush(ipvs, true);
}
mutex_unlock(&__ip_vs_mutex);
LeaveFunction(2);
}
......
......@@ -407,12 +407,9 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
goto err_put;
skb_dst_drop(skb);
if (noref) {
if (!local)
skb_dst_set_noref(skb, &rt->dst);
else
skb_dst_set(skb, dst_clone(&rt->dst));
} else
if (noref)
skb_dst_set_noref(skb, &rt->dst);
else
skb_dst_set(skb, &rt->dst);
return local;
......@@ -574,12 +571,9 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
goto err_put;
skb_dst_drop(skb);
if (noref) {
if (!local)
skb_dst_set_noref(skb, &rt->dst);
else
skb_dst_set(skb, dst_clone(&rt->dst));
} else
if (noref)
skb_dst_set_noref(skb, &rt->dst);
else
skb_dst_set(skb, &rt->dst);
return local;
......
......@@ -573,7 +573,6 @@ EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
void nf_ct_tmpl_free(struct nf_conn *tmpl)
{
nf_ct_ext_destroy(tmpl);
nf_ct_ext_free(tmpl);
if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK)
kfree((char *)tmpl - tmpl->proto.tmpl_padto);
......@@ -1417,7 +1416,6 @@ void nf_conntrack_free(struct nf_conn *ct)
WARN_ON(atomic_read(&ct->ct_general.use) != 0);
nf_ct_ext_destroy(ct);
nf_ct_ext_free(ct);
kmem_cache_free(nf_conntrack_cachep, ct);
smp_mb__before_atomic();
atomic_dec(&net->ct.count);
......
......@@ -30,6 +30,7 @@
static DEFINE_MUTEX(nf_ct_ecache_mutex);
#define ECACHE_RETRY_WAIT (HZ/10)
#define ECACHE_STACK_ALLOC (256 / sizeof(void *))
enum retry_state {
STATE_CONGESTED,
......@@ -39,11 +40,11 @@ enum retry_state {
static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
{
struct nf_conn *refs[16];
struct nf_conn *refs[ECACHE_STACK_ALLOC];
enum retry_state ret = STATE_DONE;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
unsigned int evicted = 0;
enum retry_state ret = STATE_DONE;
spin_lock(&pcpu->lock);
......@@ -54,10 +55,22 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu)
if (!nf_ct_is_confirmed(ct))
continue;
/* This ecache access is safe because the ct is on the
* pcpu dying list and we hold the spinlock -- the entry
* cannot be free'd until after the lock is released.
*
* This is true even if ct has a refcount of 0: the
* cpu that is about to free the entry must remove it
* from the dying list and needs the lock to do so.
*/
e = nf_ct_ecache_find(ct);
if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL)
continue;
/* ct is in NFCT_ECACHE_DESTROY_FAIL state, this means
* the worker owns this entry: the ct will remain valid
* until the worker puts its ct reference.
*/
if (nf_conntrack_event(IPCT_DESTROY, ct)) {
ret = STATE_CONGESTED;
break;
......@@ -189,15 +202,15 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct)
if (notify == NULL)
goto out_unlock;
if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
goto out_unlock;
e = nf_ct_ecache_find(ct);
if (e == NULL)
goto out_unlock;
events = xchg(&e->cache, 0);
if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct))
goto out_unlock;
/* We make a copy of the missed event cache without taking
* the lock, thus we may send missed events twice. However,
* this does not harm and it happens very rarely. */
......
......@@ -34,21 +34,24 @@ void nf_ct_ext_destroy(struct nf_conn *ct)
t->destroy(ct);
rcu_read_unlock();
}
kfree(ct->ext);
}
EXPORT_SYMBOL(nf_ct_ext_destroy);
void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
{
unsigned int newlen, newoff, oldlen, alloc;
struct nf_ct_ext *old, *new;
struct nf_ct_ext_type *t;
struct nf_ct_ext *new;
/* Conntrack must not be confirmed to avoid races on reallocation. */
WARN_ON(nf_ct_is_confirmed(ct));
old = ct->ext;
if (old) {
if (ct->ext) {
const struct nf_ct_ext *old = ct->ext;
if (__nf_ct_ext_exist(old, id))
return NULL;
oldlen = old->len;
......@@ -68,22 +71,18 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
rcu_read_unlock();
alloc = max(newlen, NF_CT_EXT_PREALLOC);
kmemleak_not_leak(old);
new = __krealloc(old, alloc, gfp);
new = krealloc(ct->ext, alloc, gfp);
if (!new)
return NULL;
if (!old) {
if (!ct->ext)
memset(new->offset, 0, sizeof(new->offset));
ct->ext = new;
} else if (new != old) {
kfree_rcu(old, rcu);
rcu_assign_pointer(ct->ext, new);
}
new->offset[id] = newoff;
new->len = newlen;
memset((void *)new + newoff, 0, newlen - newoff);
ct->ext = new;
return (void *)new + newoff;
}
EXPORT_SYMBOL(nf_ct_ext_add);
......
......@@ -506,9 +506,45 @@ static int ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct)
return -1;
}
/* all these functions access ct->ext. Caller must either hold a reference
* on ct or prevent its deletion by holding either the bucket spinlock or
* pcpu dying list lock.
*/
static int ctnetlink_dump_extinfo(struct sk_buff *skb,
struct nf_conn *ct, u32 type)
{
if (ctnetlink_dump_acct(skb, ct, type) < 0 ||
ctnetlink_dump_timestamp(skb, ct) < 0 ||
ctnetlink_dump_helpinfo(skb, ct) < 0 ||
ctnetlink_dump_labels(skb, ct) < 0 ||
ctnetlink_dump_ct_seq_adj(skb, ct) < 0 ||
ctnetlink_dump_ct_synproxy(skb, ct) < 0)
return -1;
return 0;
}
static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
{
if (ctnetlink_dump_status(skb, ct) < 0 ||
ctnetlink_dump_mark(skb, ct) < 0 ||
ctnetlink_dump_secctx(skb, ct) < 0 ||
ctnetlink_dump_id(skb, ct) < 0 ||
ctnetlink_dump_use(skb, ct) < 0 ||
ctnetlink_dump_master(skb, ct) < 0)
return -1;
if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) &&
(ctnetlink_dump_timeout(skb, ct) < 0 ||
ctnetlink_dump_protoinfo(skb, ct) < 0))
return -1;
return 0;
}
static int
ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
struct nf_conn *ct)
struct nf_conn *ct, bool extinfo)
{
const struct nf_conntrack_zone *zone;
struct nlmsghdr *nlh;
......@@ -552,23 +588,9 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
NF_CT_DEFAULT_ZONE_DIR) < 0)
goto nla_put_failure;
if (ctnetlink_dump_status(skb, ct) < 0 ||
ctnetlink_dump_acct(skb, ct, type) < 0 ||
ctnetlink_dump_timestamp(skb, ct) < 0 ||
ctnetlink_dump_helpinfo(skb, ct) < 0 ||
ctnetlink_dump_mark(skb, ct) < 0 ||
ctnetlink_dump_secctx(skb, ct) < 0 ||
ctnetlink_dump_labels(skb, ct) < 0 ||
ctnetlink_dump_id(skb, ct) < 0 ||
ctnetlink_dump_use(skb, ct) < 0 ||
ctnetlink_dump_master(skb, ct) < 0 ||
ctnetlink_dump_ct_seq_adj(skb, ct) < 0 ||
ctnetlink_dump_ct_synproxy(skb, ct) < 0)
if (ctnetlink_dump_info(skb, ct) < 0)
goto nla_put_failure;
if (!test_bit(IPS_OFFLOAD_BIT, &ct->status) &&
(ctnetlink_dump_timeout(skb, ct) < 0 ||
ctnetlink_dump_protoinfo(skb, ct) < 0))
if (extinfo && ctnetlink_dump_extinfo(skb, ct, type) < 0)
goto nla_put_failure;
nlmsg_end(skb, nlh);
......@@ -953,13 +975,11 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
if (!ctnetlink_filter_match(ct, cb->data))
continue;
rcu_read_lock();
res =
ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
ct);
rcu_read_unlock();
ct, true);
if (res < 0) {
nf_conntrack_get(&ct->ct_general);
cb->args[1] = (unsigned long)ct;
......@@ -1364,10 +1384,8 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
return -ENOMEM;
}
rcu_read_lock();
err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
NFNL_MSG_TYPE(nlh->nlmsg_type), ct);
rcu_read_unlock();
NFNL_MSG_TYPE(nlh->nlmsg_type), ct, true);
nf_ct_put(ct);
if (err <= 0)
goto free;
......@@ -1429,12 +1447,18 @@ ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, bool dying
continue;
cb->args[1] = 0;
}
rcu_read_lock();
/* We can't dump extension info for the unconfirmed
* list because unconfirmed conntracks can have
* ct->ext reallocated (and thus freed).
*
* In the dying list case ct->ext can't be free'd
* until after we drop pcpu->lock.
*/
res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
ct);
rcu_read_unlock();
ct, dying ? true : false);
if (res < 0) {
if (!atomic_inc_not_zero(&ct->ct_general.use))
continue;
......
This diff is collapsed.
......@@ -132,13 +132,13 @@ static void nft_flow_offload_common_init(struct flow_cls_common_offload *common,
common->extack = extack;
}
static int nft_setup_cb_call(struct nft_base_chain *basechain,
enum tc_setup_type type, void *type_data)
static int nft_setup_cb_call(enum tc_setup_type type, void *type_data,
struct list_head *cb_list)
{
struct flow_block_cb *block_cb;
int err;
list_for_each_entry(block_cb, &basechain->flow_block.cb_list, list) {
list_for_each_entry(block_cb, cb_list, list) {
err = block_cb->cb(type, type_data, block_cb->cb_priv);
if (err < 0)
return err;
......@@ -155,32 +155,44 @@ int nft_chain_offload_priority(struct nft_base_chain *basechain)
return 0;
}
static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
const struct nft_base_chain *basechain,
const struct nft_rule *rule,
const struct nft_flow_rule *flow,
enum flow_cls_command command)
{
struct netlink_ext_ack extack;
__be16 proto = ETH_P_ALL;
memset(cls_flow, 0, sizeof(*cls_flow));
if (flow)
proto = flow->proto;
nft_flow_offload_common_init(&cls_flow->common, proto,
basechain->ops.priority, &extack);
cls_flow->command = command;
cls_flow->cookie = (unsigned long) rule;
if (flow)
cls_flow->rule = flow->rule;
}
static int nft_flow_offload_rule(struct nft_chain *chain,
struct nft_rule *rule,
struct nft_flow_rule *flow,
enum flow_cls_command command)
{
struct flow_cls_offload cls_flow = {};
struct flow_cls_offload cls_flow;
struct nft_base_chain *basechain;
struct netlink_ext_ack extack;
__be16 proto = ETH_P_ALL;
if (!nft_is_base_chain(chain))
return -EOPNOTSUPP;
basechain = nft_base_chain(chain);
nft_flow_cls_offload_setup(&cls_flow, basechain, rule, flow, command);
if (flow)
proto = flow->proto;
nft_flow_offload_common_init(&cls_flow.common, proto,
basechain->ops.priority, &extack);
cls_flow.command = command;
cls_flow.cookie = (unsigned long) rule;
if (flow)
cls_flow.rule = flow->rule;
return nft_setup_cb_call(basechain, TC_SETUP_CLSFLOWER, &cls_flow);
return nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow,
&basechain->flow_block.cb_list);
}
static int nft_flow_offload_bind(struct flow_block_offload *bo,
......@@ -194,6 +206,16 @@ static int nft_flow_offload_unbind(struct flow_block_offload *bo,
struct nft_base_chain *basechain)
{
struct flow_block_cb *block_cb, *next;
struct flow_cls_offload cls_flow;
struct nft_chain *chain;
struct nft_rule *rule;
chain = &basechain->chain;
list_for_each_entry(rule, &chain->rules, list) {
nft_flow_cls_offload_setup(&cls_flow, basechain, rule, NULL,
FLOW_CLS_DESTROY);
nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow, &bo->cb_list);
}
list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
list_del(&block_cb->list);
......@@ -224,20 +246,30 @@ static int nft_block_setup(struct nft_base_chain *basechain,
return err;
}
static void nft_flow_block_offload_init(struct flow_block_offload *bo,
struct net *net,
enum flow_block_command cmd,
struct nft_base_chain *basechain,
struct netlink_ext_ack *extack)
{
memset(bo, 0, sizeof(*bo));
bo->net = net;
bo->block = &basechain->flow_block;
bo->command = cmd;
bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
bo->extack = extack;
INIT_LIST_HEAD(&bo->cb_list);
}
static int nft_block_offload_cmd(struct nft_base_chain *chain,
struct net_device *dev,
enum flow_block_command cmd)
{
struct netlink_ext_ack extack = {};
struct flow_block_offload bo = {};
struct flow_block_offload bo;
int err;
bo.net = dev_net(dev);
bo.block = &chain->flow_block;
bo.command = cmd;
bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
bo.extack = &extack;
INIT_LIST_HEAD(&bo.cb_list);
nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
if (err < 0)
......@@ -253,17 +285,12 @@ static void nft_indr_block_ing_cmd(struct net_device *dev,
enum flow_block_command cmd)
{
struct netlink_ext_ack extack = {};
struct flow_block_offload bo = {};
struct flow_block_offload bo;
if (!chain)
return;
bo.net = dev_net(dev);
bo.block = &chain->flow_block;
bo.command = cmd;
bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
bo.extack = &extack;
INIT_LIST_HEAD(&bo.cb_list);
nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
......@@ -274,15 +301,10 @@ static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
struct net_device *dev,
enum flow_block_command cmd)
{
struct flow_block_offload bo = {};
struct netlink_ext_ack extack = {};
struct flow_block_offload bo;
bo.net = dev_net(dev);
bo.block = &chain->flow_block;
bo.command = cmd;
bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
bo.extack = &extack;
INIT_LIST_HEAD(&bo.cb_list);
nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
flow_indr_block_call(dev, &bo, cmd);
......@@ -294,32 +316,73 @@ static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
#define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
static int nft_flow_offload_chain(struct nft_chain *chain,
u8 *ppolicy,
static int nft_chain_offload_cmd(struct nft_base_chain *basechain,
struct net_device *dev,
enum flow_block_command cmd)
{
int err;
if (dev->netdev_ops->ndo_setup_tc)
err = nft_block_offload_cmd(basechain, dev, cmd);
else
err = nft_indr_block_offload_cmd(basechain, dev, cmd);
return err;
}
static int nft_flow_block_chain(struct nft_base_chain *basechain,
const struct net_device *this_dev,
enum flow_block_command cmd)
{
struct net_device *dev;
struct nft_hook *hook;
int err, i = 0;
list_for_each_entry(hook, &basechain->hook_list, list) {
dev = hook->ops.dev;
if (this_dev && this_dev != dev)
continue;
err = nft_chain_offload_cmd(basechain, dev, cmd);
if (err < 0 && cmd == FLOW_BLOCK_BIND) {
if (!this_dev)
goto err_flow_block;
return err;
}
i++;
}
return 0;
err_flow_block:
list_for_each_entry(hook, &basechain->hook_list, list) {
if (i-- <= 0)
break;
dev = hook->ops.dev;
nft_chain_offload_cmd(basechain, dev, FLOW_BLOCK_UNBIND);
}
return err;
}
static int nft_flow_offload_chain(struct nft_chain *chain, u8 *ppolicy,
enum flow_block_command cmd)
{
struct nft_base_chain *basechain;
struct net_device *dev;
u8 policy;
if (!nft_is_base_chain(chain))
return -EOPNOTSUPP;
basechain = nft_base_chain(chain);
dev = basechain->ops.dev;
if (!dev)
return -EOPNOTSUPP;
policy = ppolicy ? *ppolicy : basechain->policy;
/* Only default policy to accept is supported for now. */
if (cmd == FLOW_BLOCK_BIND && policy == NF_DROP)
return -EOPNOTSUPP;
if (dev->netdev_ops->ndo_setup_tc)
return nft_block_offload_cmd(basechain, dev, cmd);
else
return nft_indr_block_offload_cmd(basechain, dev, cmd);
return nft_flow_block_chain(basechain, NULL, cmd);
}
int nft_flow_rule_offload_commit(struct net *net)
......@@ -385,6 +448,7 @@ static struct nft_chain *__nft_offload_get_chain(struct net_device *dev)
{
struct nft_base_chain *basechain;
struct net *net = dev_net(dev);
struct nft_hook *hook, *found;
const struct nft_table *table;
struct nft_chain *chain;
......@@ -397,8 +461,16 @@ static struct nft_chain *__nft_offload_get_chain(struct net_device *dev)
!(chain->flags & NFT_CHAIN_HW_OFFLOAD))
continue;
found = NULL;
basechain = nft_base_chain(chain);
if (strncmp(basechain->dev_name, dev->name, IFNAMSIZ))
list_for_each_entry(hook, &basechain->hook_list, list) {
if (hook->ops.dev != dev)
continue;
found = hook;
break;
}
if (!found)
continue;
return chain;
......@@ -426,18 +498,6 @@ static void nft_indr_block_cb(struct net_device *dev,
mutex_unlock(&net->nft.commit_mutex);
}
static void nft_offload_chain_clean(struct nft_chain *chain)
{
struct nft_rule *rule;
list_for_each_entry(rule, &chain->rules, list) {
nft_flow_offload_rule(chain, rule,
NULL, FLOW_CLS_DESTROY);
}
nft_flow_offload_chain(chain, NULL, FLOW_BLOCK_UNBIND);
}
static int nft_offload_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
......@@ -448,7 +508,9 @@ static int nft_offload_netdev_event(struct notifier_block *this,
mutex_lock(&net->nft.commit_mutex);
chain = __nft_offload_get_chain(dev);
if (chain)
nft_offload_chain_clean(chain);
nft_flow_block_chain(nft_base_chain(chain), dev,
FLOW_BLOCK_UNBIND);
mutex_unlock(&net->nft.commit_mutex);
return NOTIFY_DONE;
......
......@@ -287,28 +287,35 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
struct nft_ctx *ctx)
{
struct nft_base_chain *basechain = nft_base_chain(ctx->chain);
struct nft_hook *hook, *found = NULL;
int n = 0;
switch (event) {
case NETDEV_UNREGISTER:
if (strcmp(basechain->dev_name, dev->name) != 0)
return;
/* UNREGISTER events are also happpening on netns exit.
*
* Altough nf_tables core releases all tables/chains, only
* this event handler provides guarantee that
* basechain.ops->dev is still accessible, so we cannot
* skip exiting net namespaces.
*/
__nft_release_basechain(ctx);
break;
case NETDEV_CHANGENAME:
if (dev->ifindex != basechain->ops.dev->ifindex)
return;
if (event != NETDEV_UNREGISTER)
return;
strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
break;
list_for_each_entry(hook, &basechain->hook_list, list) {
if (hook->ops.dev == dev)
found = hook;
n++;
}
if (!found)
return;
if (n > 1) {
nf_unregister_net_hook(ctx->net, &found->ops);
list_del_rcu(&found->list);
kfree_rcu(found, rcu);
return;
}
/* UNREGISTER events are also happening on netns exit.
*
* Although nf_tables core releases all tables/chains, only this event
* handler provides guarantee that hook->ops.dev is still accessible,
* so we cannot skip exiting net namespaces.
*/
__nft_release_basechain(ctx);
}
static int nf_tables_netdev_event(struct notifier_block *this,
......
......@@ -2,6 +2,6 @@
# Makefile for netfilter selftests
TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh
include ../lib.mk
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
#
# End-to-end ipvs test suite
# Topology:
#--------------------------------------------------------------+
# | |
# ns0 | ns1 |
# ----------- | ----------- ----------- |
# | veth01 | --------- | veth10 | | veth12 | |
# ----------- peer ----------- ----------- |
# | | | |
# ----------- | | |
# | br0 | |----------------- peer |--------------|
# ----------- | | |
# | | | |
# ---------- peer ---------- ----------- |
# | veth02 | --------- | veth20 | | veth21 | |
# ---------- | ---------- ----------- |
# | ns2 |
# | |
#--------------------------------------------------------------+
#
# We assume that all network driver are loaded
#
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
ret=0
GREEN='\033[0;92m'
RED='\033[0;31m'
NC='\033[0m' # No Color
readonly port=8080
readonly vip_v4=207.175.44.110
readonly cip_v4=10.0.0.2
readonly gip_v4=10.0.0.1
readonly dip_v4=172.16.0.1
readonly rip_v4=172.16.0.2
readonly sip_v4=10.0.0.3
readonly infile="$(mktemp)"
readonly outfile="$(mktemp)"
readonly datalen=32
sysipvsnet="/proc/sys/net/ipv4/vs/"
if [ ! -d $sysipvsnet ]; then
modprobe -q ip_vs
if [ $? -ne 0 ]; then
echo "skip: could not run test without ipvs module"
exit $ksft_skip
fi
fi
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ]; then
echo "SKIP: Could not run test without ip tool"
exit $ksft_skip
fi
ipvsadm -v > /dev/null 2>&1
if [ $? -ne 0 ]; then
echo "SKIP: Could not run test without ipvsadm"
exit $ksft_skip
fi
setup() {
ip netns add ns0
ip netns add ns1
ip netns add ns2
ip link add veth01 netns ns0 type veth peer name veth10 netns ns1
ip link add veth02 netns ns0 type veth peer name veth20 netns ns2
ip link add veth12 netns ns1 type veth peer name veth21 netns ns2
ip netns exec ns0 ip link set veth01 up
ip netns exec ns0 ip link set veth02 up
ip netns exec ns0 ip link add br0 type bridge
ip netns exec ns0 ip link set veth01 master br0
ip netns exec ns0 ip link set veth02 master br0
ip netns exec ns0 ip link set br0 up
ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0
ip netns exec ns1 ip link set lo up
ip netns exec ns1 ip link set veth10 up
ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10
ip netns exec ns1 ip link set veth12 up
ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12
ip netns exec ns2 ip link set lo up
ip netns exec ns2 ip link set veth21 up
ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21
ip netns exec ns2 ip link set veth20 up
ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20
sleep 1
dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
}
cleanup() {
for i in 0 1 2
do
ip netns del ns$i > /dev/null 2>&1
done
if [ -f "${outfile}" ]; then
rm "${outfile}"
fi
if [ -f "${infile}" ]; then
rm "${infile}"
fi
}
server_listen() {
ip netns exec ns2 nc -l -p 8080 > "${outfile}" &
server_pid=$!
sleep 0.2
}
client_connect() {
ip netns exec ns0 timeout 2 nc -w 1 ${vip_v4} ${port} < "${infile}"
}
verify_data() {
wait "${server_pid}"
cmp "$infile" "$outfile" 2>/dev/null
}
test_service() {
server_listen
client_connect
verify_data
}
test_dr() {
ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
ip netns exec ns1 ipvsadm -a -t ${vip_v4}:${port} -r ${rip_v4}:${port}
ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
# avoid incorrect arp response
ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
# avoid reverse route lookup
ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0
ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0
ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
test_service
}
test_nat() {
ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port}
ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
ip netns exec ns2 ip link del veth20
ip netns exec ns2 ip route add default via ${dip_v4} dev veth21
test_service
}
test_tun() {
ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
ip netns exec ns1 modprobe ipip
ip netns exec ns1 ip link set tunl0 up
ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0
ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0
ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0
ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port}
ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
ip netns exec ns2 modprobe ipip
ip netns exec ns2 ip link set tunl0 up
ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0
ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0
ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
test_service
}
run_tests() {
local errors=
echo "Testing DR mode..."
cleanup
setup
test_dr
errors=$(( $errors + $? ))
echo "Testing NAT mode..."
cleanup
setup
test_nat
errors=$(( $errors + $? ))
echo "Testing Tunnel mode..."
cleanup
setup
test_tun
errors=$(( $errors + $? ))
return $errors
}
trap cleanup EXIT
run_tests
if [ $? -ne 0 ]; then
echo -e "$(basename $0): ${RED}FAIL${NC}"
exit 1
fi
echo -e "$(basename $0): ${GREEN}PASS${NC}"
exit 0
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment