Commit 5ce90c84 authored by Steffen Klassert's avatar Steffen Klassert

Merge branch 'xfrm: speed up policy insertions'

Florian Westphal says:

====================
Policy insertions do not scale well, due to both a lienar list walk
to find the insertion spot and another list walk to set the 'pos' value
(a tie-breaker to detect which policy is older when there is ambiguity
as to which one should be matched).

First patch gets rid of the second list walk on insert.
Rest of the patches get rid of the insertion walk.

This list walk was only needed because when I moved the policy db
implementation to rbtree I retained the old insertion method for the
sake of XFRM_MIGRATE.

Switching that to tree-based lookup avoids the need for the full
list search.

After this, insertion of a policy is largely independent of the number
of pre-existing policies as long as they do not share the same source/
destination networks.

Note that this is compile tested only as I did not find any
tests for XFRM_MIGRATE.
====================
Signed-off-by: default avatarSteffen Klassert <steffen.klassert@secunet.com>
parents 54f2f78d a54ad727
......@@ -555,7 +555,6 @@ struct xfrm_policy {
u16 family;
struct xfrm_sec_ctx *security;
struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH];
struct hlist_node bydst_inexact_list;
struct rcu_head rcu;
struct xfrm_dev_offload xdo;
......
......@@ -196,8 +196,6 @@ xfrm_policy_inexact_lookup_rcu(struct net *net,
static struct xfrm_policy *
xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy,
bool excl);
static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
struct xfrm_policy *policy);
static bool
xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand,
......@@ -410,7 +408,6 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
if (policy) {
write_pnet(&policy->xp_net, net);
INIT_LIST_HEAD(&policy->walk.all);
INIT_HLIST_NODE(&policy->bydst_inexact_list);
INIT_HLIST_NODE(&policy->bydst);
INIT_HLIST_NODE(&policy->byidx);
rwlock_init(&policy->lock);
......@@ -1228,26 +1225,31 @@ xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl)
return ERR_PTR(-EEXIST);
}
chain = &net->xfrm.policy_inexact[dir];
xfrm_policy_insert_inexact_list(chain, policy);
if (delpol)
__xfrm_policy_inexact_prune_bin(bin, false);
return delpol;
}
static bool xfrm_policy_is_dead_or_sk(const struct xfrm_policy *policy)
{
int dir;
if (policy->walk.dead)
return true;
dir = xfrm_policy_id2dir(policy->index);
return dir >= XFRM_POLICY_MAX;
}
static void xfrm_hash_rebuild(struct work_struct *work)
{
struct net *net = container_of(work, struct net,
xfrm.policy_hthresh.work);
unsigned int hmask;
struct xfrm_policy *pol;
struct xfrm_policy *policy;
struct hlist_head *chain;
struct hlist_head *odst;
struct hlist_node *newpos;
int i;
int dir;
unsigned seq;
u8 lbits4, rbits4, lbits6, rbits6;
......@@ -1311,23 +1313,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
goto out_unlock;
}
/* reset the bydst and inexact table in all directions */
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
struct hlist_node *n;
hlist_for_each_entry_safe(policy, n,
&net->xfrm.policy_inexact[dir],
bydst_inexact_list) {
hlist_del_rcu(&policy->bydst);
hlist_del_init(&policy->bydst_inexact_list);
}
hmask = net->xfrm.policy_bydst[dir].hmask;
odst = net->xfrm.policy_bydst[dir].table;
for (i = hmask; i >= 0; i--) {
hlist_for_each_entry_safe(policy, n, odst + i, bydst)
hlist_del_rcu(&policy->bydst);
}
if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
/* dir out => dst = remote, src = local */
net->xfrm.policy_bydst[dir].dbits4 = rbits4;
......@@ -1352,6 +1338,9 @@ static void xfrm_hash_rebuild(struct work_struct *work)
/* skip socket policies */
continue;
}
hlist_del_rcu(&policy->bydst);
newpos = NULL;
chain = policy_hash_bysel(net, &policy->selector,
policy->family, dir);
......@@ -1519,42 +1508,6 @@ static const struct rhashtable_params xfrm_pol_inexact_params = {
.automatic_shrinking = true,
};
static void xfrm_policy_insert_inexact_list(struct hlist_head *chain,
struct xfrm_policy *policy)
{
struct xfrm_policy *pol, *delpol = NULL;
struct hlist_node *newpos = NULL;
int i = 0;
hlist_for_each_entry(pol, chain, bydst_inexact_list) {
if (pol->type == policy->type &&
pol->if_id == policy->if_id &&
!selector_cmp(&pol->selector, &policy->selector) &&
xfrm_policy_mark_match(&policy->mark, pol) &&
xfrm_sec_ctx_match(pol->security, policy->security) &&
!WARN_ON(delpol)) {
delpol = pol;
if (policy->priority > pol->priority)
continue;
} else if (policy->priority >= pol->priority) {
newpos = &pol->bydst_inexact_list;
continue;
}
if (delpol)
break;
}
if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET)
hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos);
else
hlist_add_head_rcu(&policy->bydst_inexact_list, chain);
hlist_for_each_entry(pol, chain, bydst_inexact_list) {
pol->pos = i;
i++;
}
}
static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain,
struct xfrm_policy *policy,
bool excl)
......@@ -2294,10 +2247,52 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
return pol;
}
static u32 xfrm_gen_pos_slow(struct net *net)
{
struct xfrm_policy *policy;
u32 i = 0;
/* oldest entry is last in list */
list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
if (!xfrm_policy_is_dead_or_sk(policy))
policy->pos = ++i;
}
return i;
}
static u32 xfrm_gen_pos(struct net *net)
{
const struct xfrm_policy *policy;
u32 i = 0;
/* most recently added policy is at the head of the list */
list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) {
if (xfrm_policy_is_dead_or_sk(policy))
continue;
if (policy->pos == UINT_MAX)
return xfrm_gen_pos_slow(net);
i = policy->pos + 1;
break;
}
return i;
}
static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
{
struct net *net = xp_net(pol);
switch (dir) {
case XFRM_POLICY_IN:
case XFRM_POLICY_FWD:
case XFRM_POLICY_OUT:
pol->pos = xfrm_gen_pos(net);
break;
}
list_add(&pol->walk.all, &net->xfrm.policy_all);
net->xfrm.policy_count[dir]++;
xfrm_pol_hold(pol);
......@@ -2314,7 +2309,6 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
/* Socket policies are not hashed. */
if (!hlist_unhashed(&pol->bydst)) {
hlist_del_rcu(&pol->bydst);
hlist_del_init(&pol->bydst_inexact_list);
hlist_del(&pol->byidx);
}
......@@ -4437,63 +4431,50 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
#endif
#ifdef CONFIG_XFRM_MIGRATE
static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
const struct xfrm_selector *sel_tgt)
{
if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
if (sel_tgt->family == sel_cmp->family &&
xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
sel_cmp->family) &&
xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
sel_cmp->family) &&
sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
return true;
}
} else {
if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
return true;
}
}
return false;
}
static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
u8 dir, u8 type, struct net *net, u32 if_id)
{
struct xfrm_policy *pol, *ret = NULL;
struct hlist_head *chain;
u32 priority = ~0U;
struct flowi fl;
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
hlist_for_each_entry(pol, chain, bydst) {
if ((if_id == 0 || pol->if_id == if_id) &&
xfrm_migrate_selector_match(sel, &pol->selector) &&
pol->type == type) {
ret = pol;
priority = ret->priority;
break;
}
}
chain = &net->xfrm.policy_inexact[dir];
hlist_for_each_entry(pol, chain, bydst_inexact_list) {
if ((pol->priority >= priority) && ret)
break;
memset(&fl, 0, sizeof(fl));
if ((if_id == 0 || pol->if_id == if_id) &&
xfrm_migrate_selector_match(sel, &pol->selector) &&
pol->type == type) {
ret = pol;
fl.flowi_proto = sel->proto;
switch (sel->family) {
case AF_INET:
fl.u.ip4.saddr = sel->saddr.a4;
fl.u.ip4.daddr = sel->daddr.a4;
if (sel->proto == IPSEC_ULPROTO_ANY)
break;
}
fl.u.flowi4_oif = sel->ifindex;
fl.u.ip4.fl4_sport = sel->sport;
fl.u.ip4.fl4_dport = sel->dport;
break;
case AF_INET6:
fl.u.ip6.saddr = sel->saddr.in6;
fl.u.ip6.daddr = sel->daddr.in6;
if (sel->proto == IPSEC_ULPROTO_ANY)
break;
fl.u.flowi6_oif = sel->ifindex;
fl.u.ip6.fl4_sport = sel->sport;
fl.u.ip6.fl4_dport = sel->dport;
break;
default:
return ERR_PTR(-EAFNOSUPPORT);
}
xfrm_pol_hold(ret);
rcu_read_lock();
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
pol = xfrm_policy_lookup_bytype(net, type, &fl, sel->family, dir, if_id);
if (IS_ERR_OR_NULL(pol))
goto out_unlock;
return ret;
if (!xfrm_pol_hold_rcu(ret))
pol = NULL;
out_unlock:
rcu_read_unlock();
return pol;
}
static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
......@@ -4630,9 +4611,9 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
/* Stage 1 - find policy */
pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id);
if (!pol) {
if (IS_ERR_OR_NULL(pol)) {
NL_SET_ERR_MSG(extack, "Target policy not found");
err = -ENOENT;
err = IS_ERR(pol) ? PTR_ERR(pol) : -ENOENT;
goto out;
}
......
......@@ -56,7 +56,7 @@ TEST_PROGS += ip_local_port_range.sh
TEST_PROGS += rps_default_mask.sh
TEST_PROGS += big_tcp.sh
TEST_PROGS += netns-sysctl.sh
TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh
TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
......
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
source lib.sh
timeout=4m
ret=0
tmp=$(mktemp)
cleanup() {
cleanup_all_ns
rm -f "$tmp"
}
trap cleanup EXIT
maxpolicies=100000
[ "$KSFT_MACHINE_SLOW" = "yes" ] && maxpolicies=10000
do_dummies4() {
local dir="$1"
local max="$2"
local policies
local pfx
pfx=30
policies=0
ip netns exec "$ns" ip xfrm policy flush
for i in $(seq 1 100);do
local s
local d
for j in $(seq 1 255);do
s=$((i+0))
d=$((i+100))
for a in $(seq 1 8 255); do
policies=$((policies+1))
[ "$policies" -gt "$max" ] && return
echo xfrm policy add src 10.$s.$j.0/30 dst 10.$d.$j.$a/$pfx dir $dir action block
done
for a in $(seq 1 8 255); do
policies=$((policies+1))
[ "$policies" -gt "$max" ] && return
echo xfrm policy add src 10.$s.$j.$a/30 dst 10.$d.$j.0/$pfx dir $dir action block
done
done
done
}
setup_ns ns
do_bench()
{
local max="$1"
start=$(date +%s%3N)
do_dummies4 "out" "$max" > "$tmp"
if ! timeout "$timeout" ip netns exec "$ns" ip -batch "$tmp";then
echo "WARNING: policy insertion cancelled after $timeout"
ret=1
fi
stop=$(date +%s%3N)
result=$((stop-start))
policies=$(wc -l < "$tmp")
printf "Inserted %-06s policies in $result ms\n" $policies
have=$(ip netns exec "$ns" ip xfrm policy show | grep "action block" | wc -l)
if [ "$have" -ne "$policies" ]; then
echo "WARNING: mismatch, have $have policies, expected $policies"
ret=1
fi
}
p=100
while [ $p -le "$maxpolicies" ]; do
do_bench "$p"
p="${p}0"
done
exit $ret
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment