Commit 77c7a7b3 authored by David S. Miller's avatar David S. Miller

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next

Steffen Klassert says:

====================
pull request (net-next): ipsec-next 2018-12-18

1) Add xfrm policy selftest scripts.
   From Florian Westphal.

2) Split inexact policies into four different search list
   classes and use the rbtree infrastructure to store/lookup
   the policies. This is to improve the policy lookup
   performance after the flowcache removal.
   Patches from Florian Westphal.

3) Various coding style fixes, from Colin Ian King.

4) Fix policy lookup logic after adding the inexact policy
   search tree infrastructure. From Florian Westphal.

5) Remove a useless remove BUG_ON from xfrm6_dst_ifdown.
   From Li RongQing.

6) Use the correct policy direction for lookups on hash
   rebuilding. From Florian Westphal.

Please pull or let me know if there are problems.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents b12c97d4 77990464
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/rhashtable-types.h>
#include <linux/xfrm.h> #include <linux/xfrm.h>
#include <net/dst_ops.h> #include <net/dst_ops.h>
...@@ -53,6 +54,7 @@ struct netns_xfrm { ...@@ -53,6 +54,7 @@ struct netns_xfrm {
unsigned int policy_count[XFRM_POLICY_MAX * 2]; unsigned int policy_count[XFRM_POLICY_MAX * 2];
struct work_struct policy_hash_work; struct work_struct policy_hash_work;
struct xfrm_policy_hthresh policy_hthresh; struct xfrm_policy_hthresh policy_hthresh;
struct list_head inexact_bins;
struct sock *nlsk; struct sock *nlsk;
......
...@@ -577,6 +577,7 @@ struct xfrm_policy { ...@@ -577,6 +577,7 @@ struct xfrm_policy {
/* This lock only affects elements except for entry. */ /* This lock only affects elements except for entry. */
rwlock_t lock; rwlock_t lock;
refcount_t refcnt; refcount_t refcnt;
u32 pos;
struct timer_list timer; struct timer_list timer;
atomic_t genid; atomic_t genid;
...@@ -589,6 +590,7 @@ struct xfrm_policy { ...@@ -589,6 +590,7 @@ struct xfrm_policy {
struct xfrm_lifetime_cur curlft; struct xfrm_lifetime_cur curlft;
struct xfrm_policy_walk_entry walk; struct xfrm_policy_walk_entry walk;
struct xfrm_policy_queue polq; struct xfrm_policy_queue polq;
bool bydst_reinsert;
u8 type; u8 type;
u8 action; u8 action;
u8 flags; u8 flags;
...@@ -596,6 +598,7 @@ struct xfrm_policy { ...@@ -596,6 +598,7 @@ struct xfrm_policy {
u16 family; u16 family;
struct xfrm_sec_ctx *security; struct xfrm_sec_ctx *security;
struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH];
struct hlist_node bydst_inexact_list;
struct rcu_head rcu; struct rcu_head rcu;
}; };
......
...@@ -262,7 +262,6 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, ...@@ -262,7 +262,6 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
if (xdst->u.rt6.rt6i_idev->dev == dev) { if (xdst->u.rt6.rt6i_idev->dev == dev) {
struct inet6_dev *loopback_idev = struct inet6_dev *loopback_idev =
in6_dev_get(dev_net(dev)->loopback_dev); in6_dev_get(dev_net(dev)->loopback_dev);
BUG_ON(!loopback_idev);
do { do {
in6_dev_put(xdst->u.rt6.rt6i_idev); in6_dev_put(xdst->u.rt6.rt6i_idev);
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include <linux/cache.h> #include <linux/cache.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/audit.h> #include <linux/audit.h>
#include <linux/rhashtable.h>
#include <net/dst.h> #include <net/dst.h>
#include <net/flow.h> #include <net/flow.h>
#include <net/xfrm.h> #include <net/xfrm.h>
...@@ -45,6 +46,99 @@ struct xfrm_flo { ...@@ -45,6 +46,99 @@ struct xfrm_flo {
u8 flags; u8 flags;
}; };
/* prefixes smaller than this are stored in lists, not trees. */
#define INEXACT_PREFIXLEN_IPV4 16
#define INEXACT_PREFIXLEN_IPV6 48
struct xfrm_pol_inexact_node {
struct rb_node node;
union {
xfrm_address_t addr;
struct rcu_head rcu;
};
u8 prefixlen;
struct rb_root root;
/* the policies matching this node, can be empty list */
struct hlist_head hhead;
};
/* xfrm inexact policy search tree:
* xfrm_pol_inexact_bin = hash(dir,type,family,if_id);
* |
* +---- root_d: sorted by daddr:prefix
* | |
* | xfrm_pol_inexact_node
* | |
* | +- root: sorted by saddr/prefix
* | | |
* | | xfrm_pol_inexact_node
* | | |
* | | + root: unused
* | | |
* | | + hhead: saddr:daddr policies
* | |
* | +- coarse policies and all any:daddr policies
* |
* +---- root_s: sorted by saddr:prefix
* | |
* | xfrm_pol_inexact_node
* | |
* | + root: unused
* | |
* | + hhead: saddr:any policies
* |
* +---- coarse policies and all any:any policies
*
* Lookups return four candidate lists:
* 1. any:any list from top-level xfrm_pol_inexact_bin
* 2. any:daddr list from daddr tree
* 3. saddr:daddr list from 2nd level daddr tree
* 4. saddr:any list from saddr tree
*
* This result set then needs to be searched for the policy with
* the lowest priority. If two results have same prio, youngest one wins.
*/
struct xfrm_pol_inexact_key {
possible_net_t net;
u32 if_id;
u16 family;
u8 dir, type;
};
struct xfrm_pol_inexact_bin {
struct xfrm_pol_inexact_key k;
struct rhash_head head;
/* list containing '*:*' policies */
struct hlist_head hhead;
seqcount_t count;
/* tree sorted by daddr/prefix */
struct rb_root root_d;
/* tree sorted by saddr/prefix */
struct rb_root root_s;
/* slow path below */
struct list_head inexact_bins;
struct rcu_head rcu;
};
enum xfrm_pol_inexact_candidate_type {
XFRM_POL_CAND_BOTH,
XFRM_POL_CAND_SADDR,
XFRM_POL_CAND_DADDR,
XFRM_POL_CAND_ANY,
XFRM_POL_CAND_MAX,
};
struct xfrm_pol_inexact_candidates {
struct hlist_head *res[XFRM_POL_CAND_MAX];
};
static DEFINE_SPINLOCK(xfrm_if_cb_lock); static DEFINE_SPINLOCK(xfrm_if_cb_lock);
static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly; static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
...@@ -55,6 +149,9 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] ...@@ -55,6 +149,9 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
static struct kmem_cache *xfrm_dst_cache __ro_after_init; static struct kmem_cache *xfrm_dst_cache __ro_after_init;
static __read_mostly seqcount_t xfrm_policy_hash_generation; static __read_mostly seqcount_t xfrm_policy_hash_generation;
static struct rhashtable xfrm_policy_inexact_table;
static const struct rhashtable_params xfrm_pol_inexact_params;
static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr); static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr);
static int stale_bundle(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst);
static int xfrm_bundle_ok(struct xfrm_dst *xdst); static int xfrm_bundle_ok(struct xfrm_dst *xdst);
...@@ -64,6 +161,25 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir); ...@@ -64,6 +161,25 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
int dir); int dir);
static struct xfrm_pol_inexact_bin *
xfrm_policy_inexact_lookup(struct net *net, u8 type, u16 family, u8 dir,
u32 if_id);
static struct xfrm_pol_inexact_bin *
xfrm_policy_inexact_lookup_rcu(struct net *net,
u8 type, u16 family, u8 dir, u32 if_id);
static struct xfrm_policy *
xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy,
bool excl);
static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
struct xfrm_policy *policy);
static bool
xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
struct xfrm_pol_inexact_bin *b,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr);
static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy) static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy)
{ {
return refcount_inc_not_zero(&policy->refcnt); return refcount_inc_not_zero(&policy->refcnt);
...@@ -269,6 +385,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) ...@@ -269,6 +385,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
if (policy) { if (policy) {
write_pnet(&policy->xp_net, net); write_pnet(&policy->xp_net, net);
INIT_LIST_HEAD(&policy->walk.all); INIT_LIST_HEAD(&policy->walk.all);
INIT_HLIST_NODE(&policy->bydst_inexact_list);
INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->bydst);
INIT_HLIST_NODE(&policy->byidx); INIT_HLIST_NODE(&policy->byidx);
rwlock_init(&policy->lock); rwlock_init(&policy->lock);
...@@ -365,7 +482,7 @@ static struct hlist_head *policy_hash_bysel(struct net *net, ...@@ -365,7 +482,7 @@ static struct hlist_head *policy_hash_bysel(struct net *net,
hash = __sel_hash(sel, family, hmask, dbits, sbits); hash = __sel_hash(sel, family, hmask, dbits, sbits);
if (hash == hmask + 1) if (hash == hmask + 1)
return &net->xfrm.policy_inexact[dir]; return NULL;
return rcu_dereference_check(net->xfrm.policy_bydst[dir].table, return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash; lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
...@@ -563,6 +680,533 @@ static void xfrm_hash_resize(struct work_struct *work) ...@@ -563,6 +680,533 @@ static void xfrm_hash_resize(struct work_struct *work)
mutex_unlock(&hash_resize_mutex); mutex_unlock(&hash_resize_mutex);
} }
static void xfrm_hash_reset_inexact_table(struct net *net)
{
struct xfrm_pol_inexact_bin *b;
lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
list_for_each_entry(b, &net->xfrm.inexact_bins, inexact_bins)
INIT_HLIST_HEAD(&b->hhead);
}
/* Make sure *pol can be inserted into fastbin.
* Useful to check that later insert requests will be sucessful
* (provided xfrm_policy_lock is held throughout).
*/
static struct xfrm_pol_inexact_bin *
xfrm_policy_inexact_alloc_bin(const struct xfrm_policy *pol, u8 dir)
{
struct xfrm_pol_inexact_bin *bin, *prev;
struct xfrm_pol_inexact_key k = {
.family = pol->family,
.type = pol->type,
.dir = dir,
.if_id = pol->if_id,
};
struct net *net = xp_net(pol);
lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
write_pnet(&k.net, net);
bin = rhashtable_lookup_fast(&xfrm_policy_inexact_table, &k,
xfrm_pol_inexact_params);
if (bin)
return bin;
bin = kzalloc(sizeof(*bin), GFP_ATOMIC);
if (!bin)
return NULL;
bin->k = k;
INIT_HLIST_HEAD(&bin->hhead);
bin->root_d = RB_ROOT;
bin->root_s = RB_ROOT;
seqcount_init(&bin->count);
prev = rhashtable_lookup_get_insert_key(&xfrm_policy_inexact_table,
&bin->k, &bin->head,
xfrm_pol_inexact_params);
if (!prev) {
list_add(&bin->inexact_bins, &net->xfrm.inexact_bins);
return bin;
}
kfree(bin);
return IS_ERR(prev) ? NULL : prev;
}
static bool xfrm_pol_inexact_addr_use_any_list(const xfrm_address_t *addr,
int family, u8 prefixlen)
{
if (xfrm_addr_any(addr, family))
return true;
if (family == AF_INET6 && prefixlen < INEXACT_PREFIXLEN_IPV6)
return true;
if (family == AF_INET && prefixlen < INEXACT_PREFIXLEN_IPV4)
return true;
return false;
}
static bool
xfrm_policy_inexact_insert_use_any_list(const struct xfrm_policy *policy)
{
const xfrm_address_t *addr;
bool saddr_any, daddr_any;
u8 prefixlen;
addr = &policy->selector.saddr;
prefixlen = policy->selector.prefixlen_s;
saddr_any = xfrm_pol_inexact_addr_use_any_list(addr,
policy->family,
prefixlen);
addr = &policy->selector.daddr;
prefixlen = policy->selector.prefixlen_d;
daddr_any = xfrm_pol_inexact_addr_use_any_list(addr,
policy->family,
prefixlen);
return saddr_any && daddr_any;
}
static void xfrm_pol_inexact_node_init(struct xfrm_pol_inexact_node *node,
const xfrm_address_t *addr, u8 prefixlen)
{
node->addr = *addr;
node->prefixlen = prefixlen;
}
static struct xfrm_pol_inexact_node *
xfrm_pol_inexact_node_alloc(const xfrm_address_t *addr, u8 prefixlen)
{
struct xfrm_pol_inexact_node *node;
node = kzalloc(sizeof(*node), GFP_ATOMIC);
if (node)
xfrm_pol_inexact_node_init(node, addr, prefixlen);
return node;
}
static int xfrm_policy_addr_delta(const xfrm_address_t *a,
const xfrm_address_t *b,
u8 prefixlen, u16 family)
{
unsigned int pdw, pbi;
int delta = 0;
switch (family) {
case AF_INET:
if (sizeof(long) == 4 && prefixlen == 0)
return ntohl(a->a4) - ntohl(b->a4);
return (ntohl(a->a4) & ((~0UL << (32 - prefixlen)))) -
(ntohl(b->a4) & ((~0UL << (32 - prefixlen))));
case AF_INET6:
pdw = prefixlen >> 5;
pbi = prefixlen & 0x1f;
if (pdw) {
delta = memcmp(a->a6, b->a6, pdw << 2);
if (delta)
return delta;
}
if (pbi) {
u32 mask = ~0u << (32 - pbi);
delta = (ntohl(a->a6[pdw]) & mask) -
(ntohl(b->a6[pdw]) & mask);
}
break;
default:
break;
}
return delta;
}
static void xfrm_policy_inexact_list_reinsert(struct net *net,
struct xfrm_pol_inexact_node *n,
u16 family)
{
unsigned int matched_s, matched_d;
struct hlist_node *newpos = NULL;
struct xfrm_policy *policy, *p;
matched_s = 0;
matched_d = 0;
list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
bool matches_s, matches_d;
if (!policy->bydst_reinsert)
continue;
WARN_ON_ONCE(policy->family != family);
policy->bydst_reinsert = false;
hlist_for_each_entry(p, &n->hhead, bydst) {
if (policy->priority >= p->priority)
newpos = &p->bydst;
else
break;
}
if (newpos)
hlist_add_behind(&policy->bydst, newpos);
else
hlist_add_head(&policy->bydst, &n->hhead);
/* paranoia checks follow.
* Check that the reinserted policy matches at least
* saddr or daddr for current node prefix.
*
* Matching both is fine, matching saddr in one policy
* (but not daddr) and then matching only daddr in another
* is a bug.
*/
matches_s = xfrm_policy_addr_delta(&policy->selector.saddr,
&n->addr,
n->prefixlen,
family) == 0;
matches_d = xfrm_policy_addr_delta(&policy->selector.daddr,
&n->addr,
n->prefixlen,
family) == 0;
if (matches_s && matches_d)
continue;
WARN_ON_ONCE(!matches_s && !matches_d);
if (matches_s)
matched_s++;
if (matches_d)
matched_d++;
WARN_ON_ONCE(matched_s && matched_d);
}
}
static void xfrm_policy_inexact_node_reinsert(struct net *net,
struct xfrm_pol_inexact_node *n,
struct rb_root *new,
u16 family)
{
struct rb_node **p, *parent = NULL;
struct xfrm_pol_inexact_node *node;
/* we should not have another subtree here */
WARN_ON_ONCE(!RB_EMPTY_ROOT(&n->root));
p = &new->rb_node;
while (*p) {
u8 prefixlen;
int delta;
parent = *p;
node = rb_entry(*p, struct xfrm_pol_inexact_node, node);
prefixlen = min(node->prefixlen, n->prefixlen);
delta = xfrm_policy_addr_delta(&n->addr, &node->addr,
prefixlen, family);
if (delta < 0) {
p = &parent->rb_left;
} else if (delta > 0) {
p = &parent->rb_right;
} else {
struct xfrm_policy *tmp;
hlist_for_each_entry(tmp, &node->hhead, bydst)
tmp->bydst_reinsert = true;
hlist_for_each_entry(tmp, &n->hhead, bydst)
tmp->bydst_reinsert = true;
INIT_HLIST_HEAD(&node->hhead);
xfrm_policy_inexact_list_reinsert(net, node, family);
if (node->prefixlen == n->prefixlen) {
kfree_rcu(n, rcu);
return;
}
rb_erase(*p, new);
kfree_rcu(n, rcu);
n = node;
n->prefixlen = prefixlen;
*p = new->rb_node;
parent = NULL;
}
}
rb_link_node_rcu(&n->node, parent, p);
rb_insert_color(&n->node, new);
}
/* merge nodes v and n */
static void xfrm_policy_inexact_node_merge(struct net *net,
struct xfrm_pol_inexact_node *v,
struct xfrm_pol_inexact_node *n,
u16 family)
{
struct xfrm_pol_inexact_node *node;
struct xfrm_policy *tmp;
struct rb_node *rnode;
/* To-be-merged node v has a subtree.
*
* Dismantle it and insert its nodes to n->root.
*/
while ((rnode = rb_first(&v->root)) != NULL) {
node = rb_entry(rnode, struct xfrm_pol_inexact_node, node);
rb_erase(&node->node, &v->root);
xfrm_policy_inexact_node_reinsert(net, node, &n->root,
family);
}
hlist_for_each_entry(tmp, &v->hhead, bydst)
tmp->bydst_reinsert = true;
hlist_for_each_entry(tmp, &n->hhead, bydst)
tmp->bydst_reinsert = true;
INIT_HLIST_HEAD(&n->hhead);
xfrm_policy_inexact_list_reinsert(net, n, family);
}
static struct xfrm_pol_inexact_node *
xfrm_policy_inexact_insert_node(struct net *net,
struct rb_root *root,
xfrm_address_t *addr,
u16 family, u8 prefixlen, u8 dir)
{
struct xfrm_pol_inexact_node *cached = NULL;
struct rb_node **p, *parent = NULL;
struct xfrm_pol_inexact_node *node;
p = &root->rb_node;
while (*p) {
int delta;
parent = *p;
node = rb_entry(*p, struct xfrm_pol_inexact_node, node);
delta = xfrm_policy_addr_delta(addr, &node->addr,
node->prefixlen,
family);
if (delta == 0 && prefixlen >= node->prefixlen) {
WARN_ON_ONCE(cached); /* ipsec policies got lost */
return node;
}
if (delta < 0)
p = &parent->rb_left;
else
p = &parent->rb_right;
if (prefixlen < node->prefixlen) {
delta = xfrm_policy_addr_delta(addr, &node->addr,
prefixlen,
family);
if (delta)
continue;
/* This node is a subnet of the new prefix. It needs
* to be removed and re-inserted with the smaller
* prefix and all nodes that are now also covered
* by the reduced prefixlen.
*/
rb_erase(&node->node, root);
if (!cached) {
xfrm_pol_inexact_node_init(node, addr,
prefixlen);
cached = node;
} else {
/* This node also falls within the new
* prefixlen. Merge the to-be-reinserted
* node and this one.
*/
xfrm_policy_inexact_node_merge(net, node,
cached, family);
kfree_rcu(node, rcu);
}
/* restart */
p = &root->rb_node;
parent = NULL;
}
}
node = cached;
if (!node) {
node = xfrm_pol_inexact_node_alloc(addr, prefixlen);
if (!node)
return NULL;
}
rb_link_node_rcu(&node->node, parent, p);
rb_insert_color(&node->node, root);
return node;
}
static void xfrm_policy_inexact_gc_tree(struct rb_root *r, bool rm)
{
struct xfrm_pol_inexact_node *node;
struct rb_node *rn = rb_first(r);
while (rn) {
node = rb_entry(rn, struct xfrm_pol_inexact_node, node);
xfrm_policy_inexact_gc_tree(&node->root, rm);
rn = rb_next(rn);
if (!hlist_empty(&node->hhead) || !RB_EMPTY_ROOT(&node->root)) {
WARN_ON_ONCE(rm);
continue;
}
rb_erase(&node->node, r);
kfree_rcu(node, rcu);
}
}
static void __xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b, bool net_exit)
{
write_seqcount_begin(&b->count);
xfrm_policy_inexact_gc_tree(&b->root_d, net_exit);
xfrm_policy_inexact_gc_tree(&b->root_s, net_exit);
write_seqcount_end(&b->count);
if (!RB_EMPTY_ROOT(&b->root_d) || !RB_EMPTY_ROOT(&b->root_s) ||
!hlist_empty(&b->hhead)) {
WARN_ON_ONCE(net_exit);
return;
}
if (rhashtable_remove_fast(&xfrm_policy_inexact_table, &b->head,
xfrm_pol_inexact_params) == 0) {
list_del(&b->inexact_bins);
kfree_rcu(b, rcu);
}
}
static void xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b)
{
struct net *net = read_pnet(&b->k.net);
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
__xfrm_policy_inexact_prune_bin(b, false);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
}
static void __xfrm_policy_inexact_flush(struct net *net)
{
struct xfrm_pol_inexact_bin *bin, *t;
lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
list_for_each_entry_safe(bin, t, &net->xfrm.inexact_bins, inexact_bins)
__xfrm_policy_inexact_prune_bin(bin, false);
}
static struct hlist_head *
xfrm_policy_inexact_alloc_chain(struct xfrm_pol_inexact_bin *bin,
struct xfrm_policy *policy, u8 dir)
{
struct xfrm_pol_inexact_node *n;
struct net *net;
net = xp_net(policy);
lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
if (xfrm_policy_inexact_insert_use_any_list(policy))
return &bin->hhead;
if (xfrm_pol_inexact_addr_use_any_list(&policy->selector.daddr,
policy->family,
policy->selector.prefixlen_d)) {
write_seqcount_begin(&bin->count);
n = xfrm_policy_inexact_insert_node(net,
&bin->root_s,
&policy->selector.saddr,
policy->family,
policy->selector.prefixlen_s,
dir);
write_seqcount_end(&bin->count);
if (!n)
return NULL;
return &n->hhead;
}
/* daddr is fixed */
write_seqcount_begin(&bin->count);
n = xfrm_policy_inexact_insert_node(net,
&bin->root_d,
&policy->selector.daddr,
policy->family,
policy->selector.prefixlen_d, dir);
write_seqcount_end(&bin->count);
if (!n)
return NULL;
/* saddr is wildcard */
if (xfrm_pol_inexact_addr_use_any_list(&policy->selector.saddr,
policy->family,
policy->selector.prefixlen_s))
return &n->hhead;
write_seqcount_begin(&bin->count);
n = xfrm_policy_inexact_insert_node(net,
&n->root,
&policy->selector.saddr,
policy->family,
policy->selector.prefixlen_s, dir);
write_seqcount_end(&bin->count);
if (!n)
return NULL;
return &n->hhead;
}
static struct xfrm_policy *
xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl)
{
struct xfrm_pol_inexact_bin *bin;
struct xfrm_policy *delpol;
struct hlist_head *chain;
struct net *net;
bin = xfrm_policy_inexact_alloc_bin(policy, dir);
if (!bin)
return ERR_PTR(-ENOMEM);
net = xp_net(policy);
lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
chain = xfrm_policy_inexact_alloc_chain(bin, policy, dir);
if (!chain) {
__xfrm_policy_inexact_prune_bin(bin, false);
return ERR_PTR(-ENOMEM);
}
delpol = xfrm_policy_insert_list(chain, policy, excl);
if (delpol && excl) {
__xfrm_policy_inexact_prune_bin(bin, false);
return ERR_PTR(-EEXIST);
}
chain = &net->xfrm.policy_inexact[dir];
xfrm_policy_insert_inexact_list(chain, policy);
if (delpol)
__xfrm_policy_inexact_prune_bin(bin, false);
return delpol;
}
static void xfrm_hash_rebuild(struct work_struct *work) static void xfrm_hash_rebuild(struct work_struct *work)
{ {
struct net *net = container_of(work, struct net, struct net *net = container_of(work, struct net,
...@@ -592,7 +1236,50 @@ static void xfrm_hash_rebuild(struct work_struct *work) ...@@ -592,7 +1236,50 @@ static void xfrm_hash_rebuild(struct work_struct *work)
spin_lock_bh(&net->xfrm.xfrm_policy_lock); spin_lock_bh(&net->xfrm.xfrm_policy_lock);
/* make sure that we can insert the indirect policies again before
* we start with destructive action.
*/
list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) {
struct xfrm_pol_inexact_bin *bin;
u8 dbits, sbits;
dir = xfrm_policy_id2dir(policy->index);
if (policy->walk.dead || dir >= XFRM_POLICY_MAX)
continue;
if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
if (policy->family == AF_INET) {
dbits = rbits4;
sbits = lbits4;
} else {
dbits = rbits6;
sbits = lbits6;
}
} else {
if (policy->family == AF_INET) {
dbits = lbits4;
sbits = rbits4;
} else {
dbits = lbits6;
sbits = rbits6;
}
}
if (policy->selector.prefixlen_d < dbits ||
policy->selector.prefixlen_s < sbits)
continue;
bin = xfrm_policy_inexact_alloc_bin(policy, dir);
if (!bin)
goto out_unlock;
if (!xfrm_policy_inexact_alloc_chain(bin, policy, dir))
goto out_unlock;
}
/* reset the bydst and inexact table in all directions */ /* reset the bydst and inexact table in all directions */
xfrm_hash_reset_inexact_table(net);
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
hmask = net->xfrm.policy_bydst[dir].hmask; hmask = net->xfrm.policy_bydst[dir].hmask;
...@@ -616,15 +1303,23 @@ static void xfrm_hash_rebuild(struct work_struct *work) ...@@ -616,15 +1303,23 @@ static void xfrm_hash_rebuild(struct work_struct *work)
/* re-insert all policies by order of creation */ /* re-insert all policies by order of creation */
list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
if (policy->walk.dead || if (policy->walk.dead)
xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) { continue;
dir = xfrm_policy_id2dir(policy->index);
if (dir >= XFRM_POLICY_MAX) {
/* skip socket policies */ /* skip socket policies */
continue; continue;
} }
newpos = NULL; newpos = NULL;
chain = policy_hash_bysel(net, &policy->selector, chain = policy_hash_bysel(net, &policy->selector,
policy->family, policy->family, dir);
xfrm_policy_id2dir(policy->index)); if (!chain) {
void *p = xfrm_policy_inexact_insert(policy, dir, 0);
WARN_ONCE(IS_ERR(p), "reinsert: %ld\n", PTR_ERR(p));
continue;
}
hlist_for_each_entry(pol, chain, bydst) { hlist_for_each_entry(pol, chain, bydst) {
if (policy->priority >= pol->priority) if (policy->priority >= pol->priority)
newpos = &pol->bydst; newpos = &pol->bydst;
...@@ -637,6 +1332,8 @@ static void xfrm_hash_rebuild(struct work_struct *work) ...@@ -637,6 +1332,8 @@ static void xfrm_hash_rebuild(struct work_struct *work)
hlist_add_head_rcu(&policy->bydst, chain); hlist_add_head_rcu(&policy->bydst, chain);
} }
out_unlock:
__xfrm_policy_inexact_flush(net);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
mutex_unlock(&hash_resize_mutex); mutex_unlock(&hash_resize_mutex);
...@@ -725,33 +1422,112 @@ static void xfrm_policy_requeue(struct xfrm_policy *old, ...@@ -725,33 +1422,112 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
spin_unlock_bh(&pq->hold_queue.lock); spin_unlock_bh(&pq->hold_queue.lock);
} }
static bool xfrm_policy_mark_match(struct xfrm_policy *policy, static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
struct xfrm_policy *pol) struct xfrm_policy *pol)
{
u32 mark = policy->mark.v & policy->mark.m;
if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
return true;
if ((mark & pol->mark.m) == pol->mark.v &&
policy->priority == pol->priority)
return true;
return false;
}
static u32 xfrm_pol_bin_key(const void *data, u32 len, u32 seed)
{
const struct xfrm_pol_inexact_key *k = data;
u32 a = k->type << 24 | k->dir << 16 | k->family;
return jhash_3words(a, k->if_id, net_hash_mix(read_pnet(&k->net)),
seed);
}
static u32 xfrm_pol_bin_obj(const void *data, u32 len, u32 seed)
{
const struct xfrm_pol_inexact_bin *b = data;
return xfrm_pol_bin_key(&b->k, 0, seed);
}
static int xfrm_pol_bin_cmp(struct rhashtable_compare_arg *arg,
const void *ptr)
{
const struct xfrm_pol_inexact_key *key = arg->key;
const struct xfrm_pol_inexact_bin *b = ptr;
int ret;
if (!net_eq(read_pnet(&b->k.net), read_pnet(&key->net)))
return -1;
ret = b->k.dir ^ key->dir;
if (ret)
return ret;
ret = b->k.type ^ key->type;
if (ret)
return ret;
ret = b->k.family ^ key->family;
if (ret)
return ret;
return b->k.if_id ^ key->if_id;
}
static const struct rhashtable_params xfrm_pol_inexact_params = {
.head_offset = offsetof(struct xfrm_pol_inexact_bin, head),
.hashfn = xfrm_pol_bin_key,
.obj_hashfn = xfrm_pol_bin_obj,
.obj_cmpfn = xfrm_pol_bin_cmp,
.automatic_shrinking = true,
};
static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
struct xfrm_policy *policy)
{ {
u32 mark = policy->mark.v & policy->mark.m; struct xfrm_policy *pol, *delpol = NULL;
struct hlist_node *newpos = NULL;
int i = 0;
if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m) hlist_for_each_entry(pol, chain, bydst_inexact_list) {
return true; if (pol->type == policy->type &&
pol->if_id == policy->if_id &&
!selector_cmp(&pol->selector, &policy->selector) &&
xfrm_policy_mark_match(policy, pol) &&
xfrm_sec_ctx_match(pol->security, policy->security) &&
!WARN_ON(delpol)) {
delpol = pol;
if (policy->priority > pol->priority)
continue;
} else if (policy->priority >= pol->priority) {
newpos = &pol->bydst_inexact_list;
continue;
}
if (delpol)
break;
}
if ((mark & pol->mark.m) == pol->mark.v && if (newpos)
policy->priority == pol->priority) hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos);
return true; else
hlist_add_head_rcu(&policy->bydst_inexact_list, chain);
return false; hlist_for_each_entry(pol, chain, bydst_inexact_list) {
pol->pos = i;
i++;
}
} }
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
struct xfrm_policy *policy,
bool excl)
{ {
struct net *net = xp_net(policy); struct xfrm_policy *pol, *newpos = NULL, *delpol = NULL;
struct xfrm_policy *pol;
struct xfrm_policy *delpol;
struct hlist_head *chain;
struct hlist_node *newpos;
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
delpol = NULL;
newpos = NULL;
hlist_for_each_entry(pol, chain, bydst) { hlist_for_each_entry(pol, chain, bydst) {
if (pol->type == policy->type && if (pol->type == policy->type &&
pol->if_id == policy->if_id && pol->if_id == policy->if_id &&
...@@ -759,24 +1535,45 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) ...@@ -759,24 +1535,45 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
xfrm_policy_mark_match(policy, pol) && xfrm_policy_mark_match(policy, pol) &&
xfrm_sec_ctx_match(pol->security, policy->security) && xfrm_sec_ctx_match(pol->security, policy->security) &&
!WARN_ON(delpol)) { !WARN_ON(delpol)) {
if (excl) { if (excl)
spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return ERR_PTR(-EEXIST);
return -EEXIST;
}
delpol = pol; delpol = pol;
if (policy->priority > pol->priority) if (policy->priority > pol->priority)
continue; continue;
} else if (policy->priority >= pol->priority) { } else if (policy->priority >= pol->priority) {
newpos = &pol->bydst; newpos = pol;
continue; continue;
} }
if (delpol) if (delpol)
break; break;
} }
if (newpos) if (newpos)
hlist_add_behind_rcu(&policy->bydst, newpos); hlist_add_behind_rcu(&policy->bydst, &newpos->bydst);
else else
hlist_add_head_rcu(&policy->bydst, chain); hlist_add_head_rcu(&policy->bydst, chain);
return delpol;
}
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
{
struct net *net = xp_net(policy);
struct xfrm_policy *delpol;
struct hlist_head *chain;
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
if (chain)
delpol = xfrm_policy_insert_list(chain, policy, excl);
else
delpol = xfrm_policy_inexact_insert(policy, dir, excl);
if (IS_ERR(delpol)) {
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return PTR_ERR(delpol);
}
__xfrm_policy_link(policy, dir); __xfrm_policy_link(policy, dir);
/* After previous checking, family can either be AF_INET or AF_INET6 */ /* After previous checking, family can either be AF_INET or AF_INET6 */
...@@ -806,29 +1603,82 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) ...@@ -806,29 +1603,82 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
} }
EXPORT_SYMBOL(xfrm_policy_insert); EXPORT_SYMBOL(xfrm_policy_insert);
static struct xfrm_policy *
__xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id,
u8 type, int dir,
struct xfrm_selector *sel,
struct xfrm_sec_ctx *ctx)
{
struct xfrm_policy *pol;
if (!chain)
return NULL;
hlist_for_each_entry(pol, chain, bydst) {
if (pol->type == type &&
pol->if_id == if_id &&
(mark & pol->mark.m) == pol->mark.v &&
!selector_cmp(sel, &pol->selector) &&
xfrm_sec_ctx_match(ctx, pol->security))
return pol;
}
return NULL;
}
struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id, struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
u8 type, int dir, u8 type, int dir,
struct xfrm_selector *sel, struct xfrm_selector *sel,
struct xfrm_sec_ctx *ctx, int delete, struct xfrm_sec_ctx *ctx, int delete,
int *err) int *err)
{ {
struct xfrm_policy *pol, *ret; struct xfrm_pol_inexact_bin *bin = NULL;
struct xfrm_policy *pol, *ret = NULL;
struct hlist_head *chain; struct hlist_head *chain;
*err = 0; *err = 0;
spin_lock_bh(&net->xfrm.xfrm_policy_lock); spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_bysel(net, sel, sel->family, dir); chain = policy_hash_bysel(net, sel, sel->family, dir);
ret = NULL; if (!chain) {
hlist_for_each_entry(pol, chain, bydst) { struct xfrm_pol_inexact_candidates cand;
if (pol->type == type && int i;
pol->if_id == if_id &&
(mark & pol->mark.m) == pol->mark.v && bin = xfrm_policy_inexact_lookup(net, type,
!selector_cmp(sel, &pol->selector) && sel->family, dir, if_id);
xfrm_sec_ctx_match(ctx, pol->security)) { if (!bin) {
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return NULL;
}
if (!xfrm_policy_find_inexact_candidates(&cand, bin,
&sel->saddr,
&sel->daddr)) {
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return NULL;
}
pol = NULL;
for (i = 0; i < ARRAY_SIZE(cand.res); i++) {
struct xfrm_policy *tmp;
tmp = __xfrm_policy_bysel_ctx(cand.res[i], mark,
if_id, type, dir,
sel, ctx);
if (!tmp)
continue;
if (!pol || tmp->pos < pol->pos)
pol = tmp;
}
} else {
pol = __xfrm_policy_bysel_ctx(chain, mark, if_id, type, dir,
sel, ctx);
}
if (pol) {
xfrm_pol_hold(pol); xfrm_pol_hold(pol);
if (delete) { if (delete) {
*err = security_xfrm_policy_delete( *err = security_xfrm_policy_delete(pol->security);
pol->security);
if (*err) { if (*err) {
spin_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return pol; return pol;
...@@ -836,13 +1686,13 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id, ...@@ -836,13 +1686,13 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
__xfrm_policy_unlink(pol, dir); __xfrm_policy_unlink(pol, dir);
} }
ret = pol; ret = pol;
break;
}
} }
spin_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (ret && delete) if (ret && delete)
xfrm_policy_kill(ret); xfrm_policy_kill(ret);
if (bin && delete)
xfrm_policy_inexact_prune_bin(bin);
return ret; return ret;
} }
EXPORT_SYMBOL(xfrm_policy_bysel_ctx); EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
...@@ -892,38 +1742,21 @@ EXPORT_SYMBOL(xfrm_policy_byid); ...@@ -892,38 +1742,21 @@ EXPORT_SYMBOL(xfrm_policy_byid);
static inline int static inline int
xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
{ {
int dir, err = 0;
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
struct xfrm_policy *pol; struct xfrm_policy *pol;
int i; int err = 0;
hlist_for_each_entry(pol, list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
&net->xfrm.policy_inexact[dir], bydst) { if (pol->walk.dead ||
if (pol->type != type) xfrm_policy_id2dir(pol->index) >= XFRM_POLICY_MAX ||
pol->type != type)
continue; continue;
err = security_xfrm_policy_delete(pol->security); err = security_xfrm_policy_delete(pol->security);
if (err) { if (err) {
xfrm_audit_policy_delete(pol, 0, task_valid); xfrm_audit_policy_delete(pol, 0, task_valid);
return err; return err;
} }
} }
for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
hlist_for_each_entry(pol,
net->xfrm.policy_bydst[dir].table + i,
bydst) {
if (pol->type != type)
continue;
err = security_xfrm_policy_delete(
pol->security);
if (err) {
xfrm_audit_policy_delete(pol, 0,
task_valid);
return err;
}
}
}
}
return err; return err;
} }
#else #else
...@@ -937,6 +1770,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) ...@@ -937,6 +1770,7 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
{ {
int dir, err = 0, cnt = 0; int dir, err = 0, cnt = 0;
struct xfrm_policy *pol;
spin_lock_bh(&net->xfrm.xfrm_policy_lock); spin_lock_bh(&net->xfrm.xfrm_policy_lock);
...@@ -944,48 +1778,25 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) ...@@ -944,48 +1778,25 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
if (err) if (err)
goto out; goto out;
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { again:
struct xfrm_policy *pol; list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) {
int i; dir = xfrm_policy_id2dir(pol->index);
if (pol->walk.dead ||
again1: dir >= XFRM_POLICY_MAX ||
hlist_for_each_entry(pol, pol->type != type)
&net->xfrm.policy_inexact[dir], bydst) {
if (pol->type != type)
continue; continue;
__xfrm_policy_unlink(pol, dir);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
cnt++;
xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol);
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
goto again1;
}
for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
again2:
hlist_for_each_entry(pol,
net->xfrm.policy_bydst[dir].table + i,
bydst) {
if (pol->type != type)
continue;
__xfrm_policy_unlink(pol, dir); __xfrm_policy_unlink(pol, dir);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
cnt++; cnt++;
xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol); xfrm_policy_kill(pol);
spin_lock_bh(&net->xfrm.xfrm_policy_lock); spin_lock_bh(&net->xfrm.xfrm_policy_lock);
goto again2; goto again;
}
}
} }
if (!cnt) if (cnt)
__xfrm_policy_inexact_flush(net);
else
err = -ESRCH; err = -ESRCH;
out: out:
spin_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
...@@ -1084,21 +1895,189 @@ static int xfrm_policy_match(const struct xfrm_policy *pol, ...@@ -1084,21 +1895,189 @@ static int xfrm_policy_match(const struct xfrm_policy *pol,
if (match) if (match)
ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
dir); dir);
return ret; return ret;
} }
static struct xfrm_pol_inexact_node *
xfrm_policy_lookup_inexact_addr(const struct rb_root *r,
seqcount_t *count,
const xfrm_address_t *addr, u16 family)
{
const struct rb_node *parent;
int seq;
again:
seq = read_seqcount_begin(count);
parent = rcu_dereference_raw(r->rb_node);
while (parent) {
struct xfrm_pol_inexact_node *node;
int delta;
node = rb_entry(parent, struct xfrm_pol_inexact_node, node);
delta = xfrm_policy_addr_delta(addr, &node->addr,
node->prefixlen, family);
if (delta < 0) {
parent = rcu_dereference_raw(parent->rb_left);
continue;
} else if (delta > 0) {
parent = rcu_dereference_raw(parent->rb_right);
continue;
}
return node;
}
if (read_seqcount_retry(count, seq))
goto again;
return NULL;
}
static bool
xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
struct xfrm_pol_inexact_bin *b,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr)
{
struct xfrm_pol_inexact_node *n;
u16 family;
if (!b)
return false;
family = b->k.family;
memset(cand, 0, sizeof(*cand));
cand->res[XFRM_POL_CAND_ANY] = &b->hhead;
n = xfrm_policy_lookup_inexact_addr(&b->root_d, &b->count, daddr,
family);
if (n) {
cand->res[XFRM_POL_CAND_DADDR] = &n->hhead;
n = xfrm_policy_lookup_inexact_addr(&n->root, &b->count, saddr,
family);
if (n)
cand->res[XFRM_POL_CAND_BOTH] = &n->hhead;
}
n = xfrm_policy_lookup_inexact_addr(&b->root_s, &b->count, saddr,
family);
if (n)
cand->res[XFRM_POL_CAND_SADDR] = &n->hhead;
return true;
}
static struct xfrm_pol_inexact_bin *
xfrm_policy_inexact_lookup_rcu(struct net *net, u8 type, u16 family,
u8 dir, u32 if_id)
{
struct xfrm_pol_inexact_key k = {
.family = family,
.type = type,
.dir = dir,
.if_id = if_id,
};
write_pnet(&k.net, net);
return rhashtable_lookup(&xfrm_policy_inexact_table, &k,
xfrm_pol_inexact_params);
}
static struct xfrm_pol_inexact_bin *
xfrm_policy_inexact_lookup(struct net *net, u8 type, u16 family,
u8 dir, u32 if_id)
{
struct xfrm_pol_inexact_bin *bin;
lockdep_assert_held(&net->xfrm.xfrm_policy_lock);
rcu_read_lock();
bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id);
rcu_read_unlock();
return bin;
}
static struct xfrm_policy *
__xfrm_policy_eval_candidates(struct hlist_head *chain,
struct xfrm_policy *prefer,
const struct flowi *fl,
u8 type, u16 family, int dir, u32 if_id)
{
u32 priority = prefer ? prefer->priority : ~0u;
struct xfrm_policy *pol;
if (!chain)
return NULL;
hlist_for_each_entry_rcu(pol, chain, bydst) {
int err;
if (pol->priority > priority)
break;
err = xfrm_policy_match(pol, fl, type, family, dir, if_id);
if (err) {
if (err != -ESRCH)
return ERR_PTR(err);
continue;
}
if (prefer) {
/* matches. Is it older than *prefer? */
if (pol->priority == priority &&
prefer->pos < pol->pos)
return prefer;
}
return pol;
}
return NULL;
}
static struct xfrm_policy *
xfrm_policy_eval_candidates(struct xfrm_pol_inexact_candidates *cand,
struct xfrm_policy *prefer,
const struct flowi *fl,
u8 type, u16 family, int dir, u32 if_id)
{
struct xfrm_policy *tmp;
int i;
for (i = 0; i < ARRAY_SIZE(cand->res); i++) {
tmp = __xfrm_policy_eval_candidates(cand->res[i],
prefer,
fl, type, family, dir,
if_id);
if (!tmp)
continue;
if (IS_ERR(tmp))
return tmp;
prefer = tmp;
}
return prefer;
}
static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
const struct flowi *fl, const struct flowi *fl,
u16 family, u8 dir, u16 family, u8 dir,
u32 if_id) u32 if_id)
{ {
int err; struct xfrm_pol_inexact_candidates cand;
struct xfrm_policy *pol, *ret;
const xfrm_address_t *daddr, *saddr; const xfrm_address_t *daddr, *saddr;
struct xfrm_pol_inexact_bin *bin;
struct xfrm_policy *pol, *ret;
struct hlist_head *chain; struct hlist_head *chain;
unsigned int sequence; unsigned int sequence;
u32 priority; u32 priority;
int err;
daddr = xfrm_flowi_daddr(fl, family); daddr = xfrm_flowi_daddr(fl, family);
saddr = xfrm_flowi_saddr(fl, family); saddr = xfrm_flowi_saddr(fl, family);
...@@ -1129,25 +2108,20 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, ...@@ -1129,25 +2108,20 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
break; break;
} }
} }
chain = &net->xfrm.policy_inexact[dir]; bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id);
hlist_for_each_entry_rcu(pol, chain, bydst) { if (!bin || !xfrm_policy_find_inexact_candidates(&cand, bin, saddr,
if ((pol->priority >= priority) && ret) daddr))
break; goto skip_inexact;
err = xfrm_policy_match(pol, fl, type, family, dir, if_id); pol = xfrm_policy_eval_candidates(&cand, ret, fl, type,
if (err) { family, dir, if_id);
if (err == -ESRCH) if (pol) {
continue;
else {
ret = ERR_PTR(err);
goto fail;
}
} else {
ret = pol; ret = pol;
break; if (IS_ERR(pol))
} goto fail;
} }
skip_inexact:
if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence))
goto retry; goto retry;
...@@ -1239,6 +2213,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, ...@@ -1239,6 +2213,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
/* Socket policies are not hashed. */ /* Socket policies are not hashed. */
if (!hlist_unhashed(&pol->bydst)) { if (!hlist_unhashed(&pol->bydst)) {
hlist_del_rcu(&pol->bydst); hlist_del_rcu(&pol->bydst);
hlist_del_init(&pol->bydst_inexact_list);
hlist_del(&pol->byidx); hlist_del(&pol->byidx);
} }
...@@ -2816,13 +3791,17 @@ static void xfrm_statistics_fini(struct net *net) ...@@ -2816,13 +3791,17 @@ static void xfrm_statistics_fini(struct net *net)
static int __net_init xfrm_policy_init(struct net *net) static int __net_init xfrm_policy_init(struct net *net)
{ {
unsigned int hmask, sz; unsigned int hmask, sz;
int dir; int dir, err;
if (net_eq(net, &init_net)) if (net_eq(net, &init_net)) {
xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
sizeof(struct xfrm_dst), sizeof(struct xfrm_dst),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL); NULL);
err = rhashtable_init(&xfrm_policy_inexact_table,
&xfrm_pol_inexact_params);
BUG_ON(err);
}
hmask = 8 - 1; hmask = 8 - 1;
sz = (hmask+1) * sizeof(struct hlist_head); sz = (hmask+1) * sizeof(struct hlist_head);
...@@ -2857,6 +3836,7 @@ static int __net_init xfrm_policy_init(struct net *net) ...@@ -2857,6 +3836,7 @@ static int __net_init xfrm_policy_init(struct net *net)
seqlock_init(&net->xfrm.policy_hthresh.lock); seqlock_init(&net->xfrm.policy_hthresh.lock);
INIT_LIST_HEAD(&net->xfrm.policy_all); INIT_LIST_HEAD(&net->xfrm.policy_all);
INIT_LIST_HEAD(&net->xfrm.inexact_bins);
INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild); INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
return 0; return 0;
...@@ -2875,6 +3855,7 @@ static int __net_init xfrm_policy_init(struct net *net) ...@@ -2875,6 +3855,7 @@ static int __net_init xfrm_policy_init(struct net *net)
static void xfrm_policy_fini(struct net *net) static void xfrm_policy_fini(struct net *net)
{ {
struct xfrm_pol_inexact_bin *b, *t;
unsigned int sz; unsigned int sz;
int dir; int dir;
...@@ -2900,6 +3881,11 @@ static void xfrm_policy_fini(struct net *net) ...@@ -2900,6 +3881,11 @@ static void xfrm_policy_fini(struct net *net)
sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
WARN_ON(!hlist_empty(net->xfrm.policy_byidx)); WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
xfrm_hash_free(net->xfrm.policy_byidx, sz); xfrm_hash_free(net->xfrm.policy_byidx, sz);
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
list_for_each_entry_safe(b, t, &net->xfrm.inexact_bins, inexact_bins)
__xfrm_policy_inexact_prune_bin(b, true);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
} }
static int __net_init xfrm_net_init(struct net *net) static int __net_init xfrm_net_init(struct net *net)
...@@ -3065,7 +4051,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * ...@@ -3065,7 +4051,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
} }
} }
chain = &net->xfrm.policy_inexact[dir]; chain = &net->xfrm.policy_inexact[dir];
hlist_for_each_entry(pol, chain, bydst) { hlist_for_each_entry(pol, chain, bydst_inexact_list) {
if ((pol->priority >= priority) && ret) if ((pol->priority >= priority) && ret)
break; break;
......
...@@ -4,7 +4,8 @@ ...@@ -4,7 +4,8 @@
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../usr/include/ CFLAGS += -I../../../../usr/include/
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
rtnetlink.sh xfrm_policy.sh
TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
......
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Check xfrm policy resolution. Topology:
#
# 1.2 1.1 3.1 3.10 2.1 2.2
# eth1 eth1 veth0 veth0 eth1 eth1
# ns1 ---- ns3 ----- ns4 ---- ns2
#
# ns3 and ns4 are connected via ipsec tunnel.
# pings from ns1 to ns2 (and vice versa) are supposed to work like this:
# ns1: ping 10.0.2.2: passes via ipsec tunnel.
# ns2: ping 10.0.1.2: passes via ipsec tunnel.
# ns1: ping 10.0.1.253: passes via ipsec tunnel (direct policy)
# ns2: ping 10.0.2.253: passes via ipsec tunnel (direct policy)
#
# ns1: ping 10.0.2.254: does NOT pass via ipsec tunnel (exception)
# ns2: ping 10.0.1.254: does NOT pass via ipsec tunnel (exception)
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
ret=0
policy_checks_ok=1
KEY_SHA=0xdeadbeef1234567890abcdefabcdefabcdefabcd
KEY_AES=0x0123456789abcdef0123456789012345
SPI1=0x1
SPI2=0x2
do_esp() {
local ns=$1
local me=$2
local remote=$3
local lnet=$4
local rnet=$5
local spi_out=$6
local spi_in=$7
ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet
ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet
# to encrypt packets as they go out (includes forwarded packets that need encapsulation)
ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow
# to fwd decrypted packets after esp processing:
ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow
}
do_esp_policy_get_check() {
local ns=$1
local lnet=$2
local rnet=$3
ip -net $ns xfrm policy get src $lnet dst $rnet dir out > /dev/null
if [ $? -ne 0 ] && [ $policy_checks_ok -eq 1 ] ;then
policy_checks_ok=0
echo "FAIL: ip -net $ns xfrm policy get src $lnet dst $rnet dir out"
ret=1
fi
ip -net $ns xfrm policy get src $rnet dst $lnet dir fwd > /dev/null
if [ $? -ne 0 ] && [ $policy_checks_ok -eq 1 ] ;then
policy_checks_ok=0
echo "FAIL: ip -net $ns xfrm policy get src $rnet dst $lnet dir fwd"
ret=1
fi
}
do_exception() {
local ns=$1
local me=$2
local remote=$3
local encryptip=$4
local plain=$5
# network $plain passes without tunnel
ip -net $ns xfrm policy add dst $plain dir out priority 10 action allow
# direct policy for $encryptip, use tunnel, higher prio takes precedence
ip -net $ns xfrm policy add dst $encryptip dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow
}
# policies that are not supposed to match any packets generated in this test.
do_dummies4() {
local ns=$1
for i in $(seq 10 16);do
# dummy policy with wildcard src/dst.
echo netns exec $ns ip xfrm policy add src 0.0.0.0/0 dst 10.$i.99.0/30 dir out action block
echo netns exec $ns ip xfrm policy add src 10.$i.99.0/30 dst 0.0.0.0/0 dir out action block
for j in $(seq 32 64);do
echo netns exec $ns ip xfrm policy add src 10.$i.1.0/30 dst 10.$i.$j.0/30 dir out action block
# silly, as it encompasses the one above too, but its allowed:
echo netns exec $ns ip xfrm policy add src 10.$i.1.0/29 dst 10.$i.$j.0/29 dir out action block
# and yet again, even more broad one.
echo netns exec $ns ip xfrm policy add src 10.$i.1.0/24 dst 10.$i.$j.0/24 dir out action block
echo netns exec $ns ip xfrm policy add src 10.$i.$j.0/24 dst 10.$i.1.0/24 dir fwd action block
done
done | ip -batch /dev/stdin
}
do_dummies6() {
local ns=$1
for i in $(seq 10 16);do
for j in $(seq 32 64);do
echo netns exec $ns ip xfrm policy add src dead:$i::/64 dst dead:$i:$j::/64 dir out action block
echo netns exec $ns ip xfrm policy add src dead:$i:$j::/64 dst dead:$i::/24 dir fwd action block
done
done | ip -batch /dev/stdin
}
check_ipt_policy_count()
{
ns=$1
ip netns exec $ns iptables-save -c |grep policy | ( read c rest
ip netns exec $ns iptables -Z
if [ x"$c" = x'[0:0]' ]; then
exit 0
elif [ x"$c" = x ]; then
echo "ERROR: No counters"
ret=1
exit 111
else
exit 1
fi
)
}
check_xfrm() {
# 0: iptables -m policy rule count == 0
# 1: iptables -m policy rule count != 0
rval=$1
ip=$2
lret=0
ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null
check_ipt_policy_count ns3
if [ $? -ne $rval ] ; then
lret=1
fi
check_ipt_policy_count ns4
if [ $? -ne $rval ] ; then
lret=1
fi
ip netns exec ns2 ping -q -c 1 10.0.1.$ip > /dev/null
check_ipt_policy_count ns3
if [ $? -ne $rval ] ; then
lret=1
fi
check_ipt_policy_count ns4
if [ $? -ne $rval ] ; then
lret=1
fi
return $lret
}
#check for needed privileges
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
exit $ksft_skip
fi
ip -Version 2>/dev/null >/dev/null
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without the ip tool"
exit $ksft_skip
fi
# needed to check if policy lookup got valid ipsec result
iptables --version 2>/dev/null >/dev/null
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without iptables tool"
exit $ksft_skip
fi
for i in 1 2 3 4; do
ip netns add ns$i
ip -net ns$i link set lo up
done
DEV=veth0
ip link add $DEV netns ns1 type veth peer name eth1 netns ns3
ip link add $DEV netns ns2 type veth peer name eth1 netns ns4
ip link add $DEV netns ns3 type veth peer name veth0 netns ns4
DEV=veth0
for i in 1 2; do
ip -net ns$i link set $DEV up
ip -net ns$i addr add 10.0.$i.2/24 dev $DEV
ip -net ns$i addr add dead:$i::2/64 dev $DEV
ip -net ns$i addr add 10.0.$i.253 dev $DEV
ip -net ns$i addr add 10.0.$i.254 dev $DEV
ip -net ns$i addr add dead:$i::fd dev $DEV
ip -net ns$i addr add dead:$i::fe dev $DEV
done
for i in 3 4; do
ip -net ns$i link set eth1 up
ip -net ns$i link set veth0 up
done
ip -net ns1 route add default via 10.0.1.1
ip -net ns2 route add default via 10.0.2.1
ip -net ns3 addr add 10.0.1.1/24 dev eth1
ip -net ns3 addr add 10.0.3.1/24 dev veth0
ip -net ns3 addr add 2001:1::1/64 dev eth1
ip -net ns3 addr add 2001:3::1/64 dev veth0
ip -net ns3 route add default via 10.0.3.10
ip -net ns4 addr add 10.0.2.1/24 dev eth1
ip -net ns4 addr add 10.0.3.10/24 dev veth0
ip -net ns4 addr add 2001:2::1/64 dev eth1
ip -net ns4 addr add 2001:3::10/64 dev veth0
ip -net ns4 route add default via 10.0.3.1
for j in 4 6; do
for i in 3 4;do
ip netns exec ns$i sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null
ip netns exec ns$i sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null
done
done
# abuse iptables rule counter to check if ping matches a policy
ip netns exec ns3 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
ip netns exec ns4 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
if [ $? -ne 0 ];then
echo "SKIP: Could not insert iptables rule"
for i in 1 2 3 4;do ip netns del ns$i;done
exit $ksft_skip
fi
# localip remoteip localnet remotenet
do_esp ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
do_esp ns3 dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2
do_esp ns4 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
do_esp ns4 dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1
do_dummies4 ns3
do_dummies6 ns4
do_esp_policy_get_check ns3 10.0.1.0/24 10.0.2.0/24
do_esp_policy_get_check ns4 10.0.2.0/24 10.0.1.0/24
do_esp_policy_get_check ns3 dead:1::/64 dead:2::/64
do_esp_policy_get_check ns4 dead:2::/64 dead:1::/64
# ping to .254 should use ipsec, exception is not installed.
check_xfrm 1 254
if [ $? -ne 0 ]; then
echo "FAIL: expected ping to .254 to use ipsec tunnel"
ret=1
else
echo "PASS: policy before exception matches"
fi
# installs exceptions
# localip remoteip encryptdst plaindst
do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
do_exception ns4 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28
do_exception ns3 dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96
do_exception ns4 dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96
# ping to .254 should now be excluded from the tunnel
check_xfrm 0 254
if [ $? -ne 0 ]; then
echo "FAIL: expected ping to .254 to fail"
ret=1
else
echo "PASS: ping to .254 bypassed ipsec tunnel"
fi
# ping to .253 should use use ipsec due to direct policy exception.
check_xfrm 1 253
if [ $? -ne 0 ]; then
echo "FAIL: expected ping to .253 to use ipsec tunnel"
ret=1
else
echo "PASS: direct policy matches"
fi
# ping to .2 should use ipsec.
check_xfrm 1 2
if [ $? -ne 0 ]; then
echo "FAIL: expected ping to .2 to use ipsec tunnel"
ret=1
else
echo "PASS: policy matches"
fi
for i in 1 2 3 4;do ip netns del ns$i;done
exit $ret
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment