Commit 575f9c43 authored by David S. Miller's avatar David S. Miller

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next

Steffen Klassert says:

====================
ipsec-next 2016-09-08

1) Constify the xfrm_replay structures. From Julia Lawall

2) Protect xfrm state hash tables with rcu, lookups
   can be done now without acquiring xfrm_state_lock.
   From Florian Westphal.

3) Protect xfrm policy hash tables with rcu, lookups
   can be done now without acquiring xfrm_policy_lock.
   From Florian Westphal.

4) We don't need to have a garbage collector list per
   namespace anymore, so use a global one instead.
   From Florian Westphal.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 0f76d256 35db57bb
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
struct ctl_table_header; struct ctl_table_header;
struct xfrm_policy_hash { struct xfrm_policy_hash {
struct hlist_head *table; struct hlist_head __rcu *table;
unsigned int hmask; unsigned int hmask;
u8 dbits4; u8 dbits4;
u8 sbits4; u8 sbits4;
...@@ -38,14 +38,12 @@ struct netns_xfrm { ...@@ -38,14 +38,12 @@ struct netns_xfrm {
* mode. Also, it can be used by ah/esp icmp error handler to find * mode. Also, it can be used by ah/esp icmp error handler to find
* offending SA. * offending SA.
*/ */
struct hlist_head *state_bydst; struct hlist_head __rcu *state_bydst;
struct hlist_head *state_bysrc; struct hlist_head __rcu *state_bysrc;
struct hlist_head *state_byspi; struct hlist_head __rcu *state_byspi;
unsigned int state_hmask; unsigned int state_hmask;
unsigned int state_num; unsigned int state_num;
struct work_struct state_hash_work; struct work_struct state_hash_work;
struct hlist_head state_gc_list;
struct work_struct state_gc_work;
struct list_head policy_all; struct list_head policy_all;
struct hlist_head *policy_byidx; struct hlist_head *policy_byidx;
...@@ -73,7 +71,7 @@ struct netns_xfrm { ...@@ -73,7 +71,7 @@ struct netns_xfrm {
struct dst_ops xfrm6_dst_ops; struct dst_ops xfrm6_dst_ops;
#endif #endif
spinlock_t xfrm_state_lock; spinlock_t xfrm_state_lock;
rwlock_t xfrm_policy_lock; spinlock_t xfrm_policy_lock;
struct mutex xfrm_cfg_mutex; struct mutex xfrm_cfg_mutex;
/* flow cache part */ /* flow cache part */
......
...@@ -187,7 +187,7 @@ struct xfrm_state { ...@@ -187,7 +187,7 @@ struct xfrm_state {
struct xfrm_replay_state_esn *preplay_esn; struct xfrm_replay_state_esn *preplay_esn;
/* The functions for replay detection. */ /* The functions for replay detection. */
struct xfrm_replay *repl; const struct xfrm_replay *repl;
/* internal flag that only holds state for delayed aevent at the /* internal flag that only holds state for delayed aevent at the
* moment * moment
......
...@@ -49,6 +49,7 @@ static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] ...@@ -49,6 +49,7 @@ static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
__read_mostly; __read_mostly;
static struct kmem_cache *xfrm_dst_cache __read_mostly; static struct kmem_cache *xfrm_dst_cache __read_mostly;
static __read_mostly seqcount_t xfrm_policy_hash_generation;
static void xfrm_init_pmtu(struct dst_entry *dst); static void xfrm_init_pmtu(struct dst_entry *dst);
static int stale_bundle(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst);
...@@ -59,6 +60,11 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir); ...@@ -59,6 +60,11 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
int dir); int dir);
static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy)
{
return atomic_inc_not_zero(&policy->refcnt);
}
static inline bool static inline bool
__xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
{ {
...@@ -385,9 +391,11 @@ static struct hlist_head *policy_hash_bysel(struct net *net, ...@@ -385,9 +391,11 @@ static struct hlist_head *policy_hash_bysel(struct net *net,
__get_hash_thresh(net, family, dir, &dbits, &sbits); __get_hash_thresh(net, family, dir, &dbits, &sbits);
hash = __sel_hash(sel, family, hmask, dbits, sbits); hash = __sel_hash(sel, family, hmask, dbits, sbits);
return (hash == hmask + 1 ? if (hash == hmask + 1)
&net->xfrm.policy_inexact[dir] : return &net->xfrm.policy_inexact[dir];
net->xfrm.policy_bydst[dir].table + hash);
return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
} }
static struct hlist_head *policy_hash_direct(struct net *net, static struct hlist_head *policy_hash_direct(struct net *net,
...@@ -403,7 +411,8 @@ static struct hlist_head *policy_hash_direct(struct net *net, ...@@ -403,7 +411,8 @@ static struct hlist_head *policy_hash_direct(struct net *net,
__get_hash_thresh(net, family, dir, &dbits, &sbits); __get_hash_thresh(net, family, dir, &dbits, &sbits);
hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits); hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
return net->xfrm.policy_bydst[dir].table + hash; return rcu_dereference_check(net->xfrm.policy_bydst[dir].table,
lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash;
} }
static void xfrm_dst_hash_transfer(struct net *net, static void xfrm_dst_hash_transfer(struct net *net,
...@@ -426,14 +435,14 @@ static void xfrm_dst_hash_transfer(struct net *net, ...@@ -426,14 +435,14 @@ static void xfrm_dst_hash_transfer(struct net *net,
h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
pol->family, nhashmask, dbits, sbits); pol->family, nhashmask, dbits, sbits);
if (!entry0) { if (!entry0) {
hlist_del(&pol->bydst); hlist_del_rcu(&pol->bydst);
hlist_add_head(&pol->bydst, ndsttable+h); hlist_add_head_rcu(&pol->bydst, ndsttable + h);
h0 = h; h0 = h;
} else { } else {
if (h != h0) if (h != h0)
continue; continue;
hlist_del(&pol->bydst); hlist_del_rcu(&pol->bydst);
hlist_add_behind(&pol->bydst, entry0); hlist_add_behind_rcu(&pol->bydst, entry0);
} }
entry0 = &pol->bydst; entry0 = &pol->bydst;
} }
...@@ -468,22 +477,32 @@ static void xfrm_bydst_resize(struct net *net, int dir) ...@@ -468,22 +477,32 @@ static void xfrm_bydst_resize(struct net *net, int dir)
unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nhashmask = xfrm_new_hash_mask(hmask);
unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
struct hlist_head *ndst = xfrm_hash_alloc(nsize); struct hlist_head *ndst = xfrm_hash_alloc(nsize);
struct hlist_head *odst;
int i; int i;
if (!ndst) if (!ndst)
return; return;
write_lock_bh(&net->xfrm.xfrm_policy_lock); spin_lock_bh(&net->xfrm.xfrm_policy_lock);
write_seqcount_begin(&xfrm_policy_hash_generation);
odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
lockdep_is_held(&net->xfrm.xfrm_policy_lock));
odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table,
lockdep_is_held(&net->xfrm.xfrm_policy_lock));
for (i = hmask; i >= 0; i--) for (i = hmask; i >= 0; i--)
xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir); xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
net->xfrm.policy_bydst[dir].table = ndst; rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst);
net->xfrm.policy_bydst[dir].hmask = nhashmask; net->xfrm.policy_bydst[dir].hmask = nhashmask;
write_unlock_bh(&net->xfrm.xfrm_policy_lock); write_seqcount_end(&xfrm_policy_hash_generation);
spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
synchronize_rcu();
xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
} }
...@@ -500,7 +519,7 @@ static void xfrm_byidx_resize(struct net *net, int total) ...@@ -500,7 +519,7 @@ static void xfrm_byidx_resize(struct net *net, int total)
if (!nidx) if (!nidx)
return; return;
write_lock_bh(&net->xfrm.xfrm_policy_lock); spin_lock_bh(&net->xfrm.xfrm_policy_lock);
for (i = hmask; i >= 0; i--) for (i = hmask; i >= 0; i--)
xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
...@@ -508,7 +527,7 @@ static void xfrm_byidx_resize(struct net *net, int total) ...@@ -508,7 +527,7 @@ static void xfrm_byidx_resize(struct net *net, int total)
net->xfrm.policy_byidx = nidx; net->xfrm.policy_byidx = nidx;
net->xfrm.policy_idx_hmask = nhashmask; net->xfrm.policy_idx_hmask = nhashmask;
write_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
} }
...@@ -541,7 +560,6 @@ static inline int xfrm_byidx_should_resize(struct net *net, int total) ...@@ -541,7 +560,6 @@ static inline int xfrm_byidx_should_resize(struct net *net, int total)
void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
{ {
read_lock_bh(&net->xfrm.xfrm_policy_lock);
si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
...@@ -550,7 +568,6 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) ...@@ -550,7 +568,6 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
si->spdhcnt = net->xfrm.policy_idx_hmask; si->spdhcnt = net->xfrm.policy_idx_hmask;
si->spdhmcnt = xfrm_policy_hashmax; si->spdhmcnt = xfrm_policy_hashmax;
read_unlock_bh(&net->xfrm.xfrm_policy_lock);
} }
EXPORT_SYMBOL(xfrm_spd_getinfo); EXPORT_SYMBOL(xfrm_spd_getinfo);
...@@ -600,7 +617,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) ...@@ -600,7 +617,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
rbits6 = net->xfrm.policy_hthresh.rbits6; rbits6 = net->xfrm.policy_hthresh.rbits6;
} while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
write_lock_bh(&net->xfrm.xfrm_policy_lock); spin_lock_bh(&net->xfrm.xfrm_policy_lock);
/* reset the bydst and inexact table in all directions */ /* reset the bydst and inexact table in all directions */
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
...@@ -642,7 +659,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) ...@@ -642,7 +659,7 @@ static void xfrm_hash_rebuild(struct work_struct *work)
hlist_add_head(&policy->bydst, chain); hlist_add_head(&policy->bydst, chain);
} }
write_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
mutex_unlock(&hash_resize_mutex); mutex_unlock(&hash_resize_mutex);
} }
...@@ -753,7 +770,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) ...@@ -753,7 +770,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
struct hlist_head *chain; struct hlist_head *chain;
struct hlist_node *newpos; struct hlist_node *newpos;
write_lock_bh(&net->xfrm.xfrm_policy_lock); spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
delpol = NULL; delpol = NULL;
newpos = NULL; newpos = NULL;
...@@ -764,7 +781,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) ...@@ -764,7 +781,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
xfrm_sec_ctx_match(pol->security, policy->security) && xfrm_sec_ctx_match(pol->security, policy->security) &&
!WARN_ON(delpol)) { !WARN_ON(delpol)) {
if (excl) { if (excl) {
write_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return -EEXIST; return -EEXIST;
} }
delpol = pol; delpol = pol;
...@@ -800,7 +817,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) ...@@ -800,7 +817,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
policy->curlft.use_time = 0; policy->curlft.use_time = 0;
if (!mod_timer(&policy->timer, jiffies + HZ)) if (!mod_timer(&policy->timer, jiffies + HZ))
xfrm_pol_hold(policy); xfrm_pol_hold(policy);
write_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (delpol) if (delpol)
xfrm_policy_kill(delpol); xfrm_policy_kill(delpol);
...@@ -820,7 +837,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, ...@@ -820,7 +837,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
struct hlist_head *chain; struct hlist_head *chain;
*err = 0; *err = 0;
write_lock_bh(&net->xfrm.xfrm_policy_lock); spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_bysel(net, sel, sel->family, dir); chain = policy_hash_bysel(net, sel, sel->family, dir);
ret = NULL; ret = NULL;
hlist_for_each_entry(pol, chain, bydst) { hlist_for_each_entry(pol, chain, bydst) {
...@@ -833,7 +850,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, ...@@ -833,7 +850,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
*err = security_xfrm_policy_delete( *err = security_xfrm_policy_delete(
pol->security); pol->security);
if (*err) { if (*err) {
write_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return pol; return pol;
} }
__xfrm_policy_unlink(pol, dir); __xfrm_policy_unlink(pol, dir);
...@@ -842,7 +859,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, ...@@ -842,7 +859,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
break; break;
} }
} }
write_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (ret && delete) if (ret && delete)
xfrm_policy_kill(ret); xfrm_policy_kill(ret);
...@@ -861,7 +878,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, ...@@ -861,7 +878,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
return NULL; return NULL;
*err = 0; *err = 0;
write_lock_bh(&net->xfrm.xfrm_policy_lock); spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = net->xfrm.policy_byidx + idx_hash(net, id); chain = net->xfrm.policy_byidx + idx_hash(net, id);
ret = NULL; ret = NULL;
hlist_for_each_entry(pol, chain, byidx) { hlist_for_each_entry(pol, chain, byidx) {
...@@ -872,7 +889,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, ...@@ -872,7 +889,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
*err = security_xfrm_policy_delete( *err = security_xfrm_policy_delete(
pol->security); pol->security);
if (*err) { if (*err) {
write_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return pol; return pol;
} }
__xfrm_policy_unlink(pol, dir); __xfrm_policy_unlink(pol, dir);
...@@ -881,7 +898,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, ...@@ -881,7 +898,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
break; break;
} }
} }
write_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (ret && delete) if (ret && delete)
xfrm_policy_kill(ret); xfrm_policy_kill(ret);
...@@ -939,7 +956,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) ...@@ -939,7 +956,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
{ {
int dir, err = 0, cnt = 0; int dir, err = 0, cnt = 0;
write_lock_bh(&net->xfrm.xfrm_policy_lock); spin_lock_bh(&net->xfrm.xfrm_policy_lock);
err = xfrm_policy_flush_secctx_check(net, type, task_valid); err = xfrm_policy_flush_secctx_check(net, type, task_valid);
if (err) if (err)
...@@ -955,14 +972,14 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) ...@@ -955,14 +972,14 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
if (pol->type != type) if (pol->type != type)
continue; continue;
__xfrm_policy_unlink(pol, dir); __xfrm_policy_unlink(pol, dir);
write_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
cnt++; cnt++;
xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol); xfrm_policy_kill(pol);
write_lock_bh(&net->xfrm.xfrm_policy_lock); spin_lock_bh(&net->xfrm.xfrm_policy_lock);
goto again1; goto again1;
} }
...@@ -974,13 +991,13 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) ...@@ -974,13 +991,13 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
if (pol->type != type) if (pol->type != type)
continue; continue;
__xfrm_policy_unlink(pol, dir); __xfrm_policy_unlink(pol, dir);
write_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
cnt++; cnt++;
xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_audit_policy_delete(pol, 1, task_valid);
xfrm_policy_kill(pol); xfrm_policy_kill(pol);
write_lock_bh(&net->xfrm.xfrm_policy_lock); spin_lock_bh(&net->xfrm.xfrm_policy_lock);
goto again2; goto again2;
} }
} }
...@@ -989,7 +1006,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) ...@@ -989,7 +1006,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
if (!cnt) if (!cnt)
err = -ESRCH; err = -ESRCH;
out: out:
write_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return err; return err;
} }
EXPORT_SYMBOL(xfrm_policy_flush); EXPORT_SYMBOL(xfrm_policy_flush);
...@@ -1009,7 +1026,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, ...@@ -1009,7 +1026,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
if (list_empty(&walk->walk.all) && walk->seq != 0) if (list_empty(&walk->walk.all) && walk->seq != 0)
return 0; return 0;
write_lock_bh(&net->xfrm.xfrm_policy_lock); spin_lock_bh(&net->xfrm.xfrm_policy_lock);
if (list_empty(&walk->walk.all)) if (list_empty(&walk->walk.all))
x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
else else
...@@ -1037,7 +1054,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, ...@@ -1037,7 +1054,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
} }
list_del_init(&walk->walk.all); list_del_init(&walk->walk.all);
out: out:
write_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return error; return error;
} }
EXPORT_SYMBOL(xfrm_policy_walk); EXPORT_SYMBOL(xfrm_policy_walk);
...@@ -1056,9 +1073,9 @@ void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net) ...@@ -1056,9 +1073,9 @@ void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net)
if (list_empty(&walk->walk.all)) if (list_empty(&walk->walk.all))
return; return;
write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ spin_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
list_del(&walk->walk.all); list_del(&walk->walk.all);
write_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
} }
EXPORT_SYMBOL(xfrm_policy_walk_done); EXPORT_SYMBOL(xfrm_policy_walk_done);
...@@ -1096,17 +1113,24 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, ...@@ -1096,17 +1113,24 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
struct xfrm_policy *pol, *ret; struct xfrm_policy *pol, *ret;
const xfrm_address_t *daddr, *saddr; const xfrm_address_t *daddr, *saddr;
struct hlist_head *chain; struct hlist_head *chain;
u32 priority = ~0U; unsigned int sequence;
u32 priority;
daddr = xfrm_flowi_daddr(fl, family); daddr = xfrm_flowi_daddr(fl, family);
saddr = xfrm_flowi_saddr(fl, family); saddr = xfrm_flowi_saddr(fl, family);
if (unlikely(!daddr || !saddr)) if (unlikely(!daddr || !saddr))
return NULL; return NULL;
read_lock_bh(&net->xfrm.xfrm_policy_lock); rcu_read_lock();
chain = policy_hash_direct(net, daddr, saddr, family, dir); retry:
do {
sequence = read_seqcount_begin(&xfrm_policy_hash_generation);
chain = policy_hash_direct(net, daddr, saddr, family, dir);
} while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence));
priority = ~0U;
ret = NULL; ret = NULL;
hlist_for_each_entry(pol, chain, bydst) { hlist_for_each_entry_rcu(pol, chain, bydst) {
err = xfrm_policy_match(pol, fl, type, family, dir); err = xfrm_policy_match(pol, fl, type, family, dir);
if (err) { if (err) {
if (err == -ESRCH) if (err == -ESRCH)
...@@ -1122,7 +1146,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, ...@@ -1122,7 +1146,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
} }
} }
chain = &net->xfrm.policy_inexact[dir]; chain = &net->xfrm.policy_inexact[dir];
hlist_for_each_entry(pol, chain, bydst) { hlist_for_each_entry_rcu(pol, chain, bydst) {
if ((pol->priority >= priority) && ret) if ((pol->priority >= priority) && ret)
break; break;
...@@ -1140,9 +1164,13 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, ...@@ -1140,9 +1164,13 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
} }
} }
xfrm_pol_hold(ret); if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence))
goto retry;
if (ret && !xfrm_pol_hold_rcu(ret))
goto retry;
fail: fail:
read_unlock_bh(&net->xfrm.xfrm_policy_lock); rcu_read_unlock();
return ret; return ret;
} }
...@@ -1219,10 +1247,9 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, ...@@ -1219,10 +1247,9 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
const struct flowi *fl) const struct flowi *fl)
{ {
struct xfrm_policy *pol; struct xfrm_policy *pol;
struct net *net = sock_net(sk);
rcu_read_lock(); rcu_read_lock();
read_lock_bh(&net->xfrm.xfrm_policy_lock); again:
pol = rcu_dereference(sk->sk_policy[dir]); pol = rcu_dereference(sk->sk_policy[dir]);
if (pol != NULL) { if (pol != NULL) {
bool match = xfrm_selector_match(&pol->selector, fl, bool match = xfrm_selector_match(&pol->selector, fl,
...@@ -1237,8 +1264,8 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, ...@@ -1237,8 +1264,8 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
err = security_xfrm_policy_lookup(pol->security, err = security_xfrm_policy_lookup(pol->security,
fl->flowi_secid, fl->flowi_secid,
policy_to_flow_dir(dir)); policy_to_flow_dir(dir));
if (!err) if (!err && !xfrm_pol_hold_rcu(pol))
xfrm_pol_hold(pol); goto again;
else if (err == -ESRCH) else if (err == -ESRCH)
pol = NULL; pol = NULL;
else else
...@@ -1247,7 +1274,6 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, ...@@ -1247,7 +1274,6 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
pol = NULL; pol = NULL;
} }
out: out:
read_unlock_bh(&net->xfrm.xfrm_policy_lock);
rcu_read_unlock(); rcu_read_unlock();
return pol; return pol;
} }
...@@ -1271,7 +1297,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, ...@@ -1271,7 +1297,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
/* Socket policies are not hashed. */ /* Socket policies are not hashed. */
if (!hlist_unhashed(&pol->bydst)) { if (!hlist_unhashed(&pol->bydst)) {
hlist_del(&pol->bydst); hlist_del_rcu(&pol->bydst);
hlist_del(&pol->byidx); hlist_del(&pol->byidx);
} }
...@@ -1295,9 +1321,9 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir) ...@@ -1295,9 +1321,9 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
{ {
struct net *net = xp_net(pol); struct net *net = xp_net(pol);
write_lock_bh(&net->xfrm.xfrm_policy_lock); spin_lock_bh(&net->xfrm.xfrm_policy_lock);
pol = __xfrm_policy_unlink(pol, dir); pol = __xfrm_policy_unlink(pol, dir);
write_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (pol) { if (pol) {
xfrm_policy_kill(pol); xfrm_policy_kill(pol);
return 0; return 0;
...@@ -1316,7 +1342,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) ...@@ -1316,7 +1342,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
return -EINVAL; return -EINVAL;
#endif #endif
write_lock_bh(&net->xfrm.xfrm_policy_lock); spin_lock_bh(&net->xfrm.xfrm_policy_lock);
old_pol = rcu_dereference_protected(sk->sk_policy[dir], old_pol = rcu_dereference_protected(sk->sk_policy[dir],
lockdep_is_held(&net->xfrm.xfrm_policy_lock)); lockdep_is_held(&net->xfrm.xfrm_policy_lock));
if (pol) { if (pol) {
...@@ -1334,7 +1360,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) ...@@ -1334,7 +1360,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
*/ */
xfrm_sk_policy_unlink(old_pol, dir); xfrm_sk_policy_unlink(old_pol, dir);
} }
write_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
if (old_pol) { if (old_pol) {
xfrm_policy_kill(old_pol); xfrm_policy_kill(old_pol);
...@@ -1364,9 +1390,9 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) ...@@ -1364,9 +1390,9 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
newp->type = old->type; newp->type = old->type;
memcpy(newp->xfrm_vec, old->xfrm_vec, memcpy(newp->xfrm_vec, old->xfrm_vec,
newp->xfrm_nr*sizeof(struct xfrm_tmpl)); newp->xfrm_nr*sizeof(struct xfrm_tmpl));
write_lock_bh(&net->xfrm.xfrm_policy_lock); spin_lock_bh(&net->xfrm.xfrm_policy_lock);
xfrm_sk_policy_link(newp, dir); xfrm_sk_policy_link(newp, dir);
write_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
xfrm_pol_put(newp); xfrm_pol_put(newp);
} }
return newp; return newp;
...@@ -3048,7 +3074,7 @@ static int __net_init xfrm_net_init(struct net *net) ...@@ -3048,7 +3074,7 @@ static int __net_init xfrm_net_init(struct net *net)
/* Initialize the per-net locks here */ /* Initialize the per-net locks here */
spin_lock_init(&net->xfrm.xfrm_state_lock); spin_lock_init(&net->xfrm.xfrm_state_lock);
rwlock_init(&net->xfrm.xfrm_policy_lock); spin_lock_init(&net->xfrm.xfrm_policy_lock);
mutex_init(&net->xfrm.xfrm_cfg_mutex); mutex_init(&net->xfrm.xfrm_cfg_mutex);
return 0; return 0;
...@@ -3082,6 +3108,7 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { ...@@ -3082,6 +3108,7 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
void __init xfrm_init(void) void __init xfrm_init(void)
{ {
register_pernet_subsys(&xfrm_net_ops); register_pernet_subsys(&xfrm_net_ops);
seqcount_init(&xfrm_policy_hash_generation);
xfrm_input_init(); xfrm_input_init();
} }
...@@ -3179,7 +3206,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * ...@@ -3179,7 +3206,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
struct hlist_head *chain; struct hlist_head *chain;
u32 priority = ~0U; u32 priority = ~0U;
read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/ spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
hlist_for_each_entry(pol, chain, bydst) { hlist_for_each_entry(pol, chain, bydst) {
if (xfrm_migrate_selector_match(sel, &pol->selector) && if (xfrm_migrate_selector_match(sel, &pol->selector) &&
...@@ -3203,7 +3230,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * ...@@ -3203,7 +3230,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
xfrm_pol_hold(ret); xfrm_pol_hold(ret);
read_unlock_bh(&net->xfrm.xfrm_policy_lock); spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
return ret; return ret;
} }
......
...@@ -558,7 +558,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) ...@@ -558,7 +558,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq)
x->repl->notify(x, XFRM_REPLAY_UPDATE); x->repl->notify(x, XFRM_REPLAY_UPDATE);
} }
static struct xfrm_replay xfrm_replay_legacy = { static const struct xfrm_replay xfrm_replay_legacy = {
.advance = xfrm_replay_advance, .advance = xfrm_replay_advance,
.check = xfrm_replay_check, .check = xfrm_replay_check,
.recheck = xfrm_replay_check, .recheck = xfrm_replay_check,
...@@ -566,7 +566,7 @@ static struct xfrm_replay xfrm_replay_legacy = { ...@@ -566,7 +566,7 @@ static struct xfrm_replay xfrm_replay_legacy = {
.overflow = xfrm_replay_overflow, .overflow = xfrm_replay_overflow,
}; };
static struct xfrm_replay xfrm_replay_bmp = { static const struct xfrm_replay xfrm_replay_bmp = {
.advance = xfrm_replay_advance_bmp, .advance = xfrm_replay_advance_bmp,
.check = xfrm_replay_check_bmp, .check = xfrm_replay_check_bmp,
.recheck = xfrm_replay_check_bmp, .recheck = xfrm_replay_check_bmp,
...@@ -574,7 +574,7 @@ static struct xfrm_replay xfrm_replay_bmp = { ...@@ -574,7 +574,7 @@ static struct xfrm_replay xfrm_replay_bmp = {
.overflow = xfrm_replay_overflow_bmp, .overflow = xfrm_replay_overflow_bmp,
}; };
static struct xfrm_replay xfrm_replay_esn = { static const struct xfrm_replay xfrm_replay_esn = {
.advance = xfrm_replay_advance_esn, .advance = xfrm_replay_advance_esn,
.check = xfrm_replay_check_esn, .check = xfrm_replay_check_esn,
.recheck = xfrm_replay_recheck_esn, .recheck = xfrm_replay_recheck_esn,
......
...@@ -28,6 +28,11 @@ ...@@ -28,6 +28,11 @@
#include "xfrm_hash.h" #include "xfrm_hash.h"
#define xfrm_state_deref_prot(table, net) \
rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
static void xfrm_state_gc_task(struct work_struct *work);
/* Each xfrm_state may be linked to two tables: /* Each xfrm_state may be linked to two tables:
1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
...@@ -36,6 +41,15 @@ ...@@ -36,6 +41,15 @@
*/ */
static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
static HLIST_HEAD(xfrm_state_gc_list);
static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
{
return atomic_inc_not_zero(&x->refcnt);
}
static inline unsigned int xfrm_dst_hash(struct net *net, static inline unsigned int xfrm_dst_hash(struct net *net,
const xfrm_address_t *daddr, const xfrm_address_t *daddr,
...@@ -76,18 +90,18 @@ static void xfrm_hash_transfer(struct hlist_head *list, ...@@ -76,18 +90,18 @@ static void xfrm_hash_transfer(struct hlist_head *list,
h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
x->props.reqid, x->props.family, x->props.reqid, x->props.family,
nhashmask); nhashmask);
hlist_add_head(&x->bydst, ndsttable+h); hlist_add_head_rcu(&x->bydst, ndsttable + h);
h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr, h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
x->props.family, x->props.family,
nhashmask); nhashmask);
hlist_add_head(&x->bysrc, nsrctable+h); hlist_add_head_rcu(&x->bysrc, nsrctable + h);
if (x->id.spi) { if (x->id.spi) {
h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
x->id.proto, x->props.family, x->id.proto, x->props.family,
nhashmask); nhashmask);
hlist_add_head(&x->byspi, nspitable+h); hlist_add_head_rcu(&x->byspi, nspitable + h);
} }
} }
} }
...@@ -122,25 +136,29 @@ static void xfrm_hash_resize(struct work_struct *work) ...@@ -122,25 +136,29 @@ static void xfrm_hash_resize(struct work_struct *work)
} }
spin_lock_bh(&net->xfrm.xfrm_state_lock); spin_lock_bh(&net->xfrm.xfrm_state_lock);
write_seqcount_begin(&xfrm_state_hash_generation);
nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
for (i = net->xfrm.state_hmask; i >= 0; i--) for (i = net->xfrm.state_hmask; i >= 0; i--)
xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi, xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask);
nhashmask);
odst = net->xfrm.state_bydst; osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
osrc = net->xfrm.state_bysrc; ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
ospi = net->xfrm.state_byspi;
ohashmask = net->xfrm.state_hmask; ohashmask = net->xfrm.state_hmask;
net->xfrm.state_bydst = ndst; rcu_assign_pointer(net->xfrm.state_bydst, ndst);
net->xfrm.state_bysrc = nsrc; rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
net->xfrm.state_byspi = nspi; rcu_assign_pointer(net->xfrm.state_byspi, nspi);
net->xfrm.state_hmask = nhashmask; net->xfrm.state_hmask = nhashmask;
write_seqcount_end(&xfrm_state_hash_generation);
spin_unlock_bh(&net->xfrm.xfrm_state_lock); spin_unlock_bh(&net->xfrm.xfrm_state_lock);
osize = (ohashmask + 1) * sizeof(struct hlist_head); osize = (ohashmask + 1) * sizeof(struct hlist_head);
synchronize_rcu();
xfrm_hash_free(odst, osize); xfrm_hash_free(odst, osize);
xfrm_hash_free(osrc, osize); xfrm_hash_free(osrc, osize);
xfrm_hash_free(ospi, osize); xfrm_hash_free(ospi, osize);
...@@ -355,15 +373,16 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) ...@@ -355,15 +373,16 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
static void xfrm_state_gc_task(struct work_struct *work) static void xfrm_state_gc_task(struct work_struct *work)
{ {
struct net *net = container_of(work, struct net, xfrm.state_gc_work);
struct xfrm_state *x; struct xfrm_state *x;
struct hlist_node *tmp; struct hlist_node *tmp;
struct hlist_head gc_list; struct hlist_head gc_list;
spin_lock_bh(&xfrm_state_gc_lock); spin_lock_bh(&xfrm_state_gc_lock);
hlist_move_list(&net->xfrm.state_gc_list, &gc_list); hlist_move_list(&xfrm_state_gc_list, &gc_list);
spin_unlock_bh(&xfrm_state_gc_lock); spin_unlock_bh(&xfrm_state_gc_lock);
synchronize_rcu();
hlist_for_each_entry_safe(x, tmp, &gc_list, gclist) hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
xfrm_state_gc_destroy(x); xfrm_state_gc_destroy(x);
} }
...@@ -500,14 +519,12 @@ EXPORT_SYMBOL(xfrm_state_alloc); ...@@ -500,14 +519,12 @@ EXPORT_SYMBOL(xfrm_state_alloc);
void __xfrm_state_destroy(struct xfrm_state *x) void __xfrm_state_destroy(struct xfrm_state *x)
{ {
struct net *net = xs_net(x);
WARN_ON(x->km.state != XFRM_STATE_DEAD); WARN_ON(x->km.state != XFRM_STATE_DEAD);
spin_lock_bh(&xfrm_state_gc_lock); spin_lock_bh(&xfrm_state_gc_lock);
hlist_add_head(&x->gclist, &net->xfrm.state_gc_list); hlist_add_head(&x->gclist, &xfrm_state_gc_list);
spin_unlock_bh(&xfrm_state_gc_lock); spin_unlock_bh(&xfrm_state_gc_lock);
schedule_work(&net->xfrm.state_gc_work); schedule_work(&xfrm_state_gc_work);
} }
EXPORT_SYMBOL(__xfrm_state_destroy); EXPORT_SYMBOL(__xfrm_state_destroy);
...@@ -520,10 +537,10 @@ int __xfrm_state_delete(struct xfrm_state *x) ...@@ -520,10 +537,10 @@ int __xfrm_state_delete(struct xfrm_state *x)
x->km.state = XFRM_STATE_DEAD; x->km.state = XFRM_STATE_DEAD;
spin_lock(&net->xfrm.xfrm_state_lock); spin_lock(&net->xfrm.xfrm_state_lock);
list_del(&x->km.all); list_del(&x->km.all);
hlist_del(&x->bydst); hlist_del_rcu(&x->bydst);
hlist_del(&x->bysrc); hlist_del_rcu(&x->bysrc);
if (x->id.spi) if (x->id.spi)
hlist_del(&x->byspi); hlist_del_rcu(&x->byspi);
net->xfrm.state_num--; net->xfrm.state_num--;
spin_unlock(&net->xfrm.xfrm_state_lock); spin_unlock(&net->xfrm.xfrm_state_lock);
...@@ -659,7 +676,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, ...@@ -659,7 +676,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
struct xfrm_state *x; struct xfrm_state *x;
hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) { hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
if (x->props.family != family || if (x->props.family != family ||
x->id.spi != spi || x->id.spi != spi ||
x->id.proto != proto || x->id.proto != proto ||
...@@ -668,7 +685,8 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, ...@@ -668,7 +685,8 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
if ((mark & x->mark.m) != x->mark.v) if ((mark & x->mark.m) != x->mark.v)
continue; continue;
xfrm_state_hold(x); if (!xfrm_state_hold_rcu(x))
continue;
return x; return x;
} }
...@@ -683,7 +701,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, ...@@ -683,7 +701,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
unsigned int h = xfrm_src_hash(net, daddr, saddr, family); unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
struct xfrm_state *x; struct xfrm_state *x;
hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
if (x->props.family != family || if (x->props.family != family ||
x->id.proto != proto || x->id.proto != proto ||
!xfrm_addr_equal(&x->id.daddr, daddr, family) || !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
...@@ -692,7 +710,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, ...@@ -692,7 +710,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
if ((mark & x->mark.m) != x->mark.v) if ((mark & x->mark.m) != x->mark.v)
continue; continue;
xfrm_state_hold(x); if (!xfrm_state_hold_rcu(x))
continue;
return x; return x;
} }
...@@ -775,13 +794,16 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, ...@@ -775,13 +794,16 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
struct xfrm_state *best = NULL; struct xfrm_state *best = NULL;
u32 mark = pol->mark.v & pol->mark.m; u32 mark = pol->mark.v & pol->mark.m;
unsigned short encap_family = tmpl->encap_family; unsigned short encap_family = tmpl->encap_family;
unsigned int sequence;
struct km_event c; struct km_event c;
to_put = NULL; to_put = NULL;
spin_lock_bh(&net->xfrm.xfrm_state_lock); sequence = read_seqcount_begin(&xfrm_state_hash_generation);
rcu_read_lock();
h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
if (x->props.family == encap_family && if (x->props.family == encap_family &&
x->props.reqid == tmpl->reqid && x->props.reqid == tmpl->reqid &&
(mark & x->mark.m) == x->mark.v && (mark & x->mark.m) == x->mark.v &&
...@@ -797,7 +819,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, ...@@ -797,7 +819,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
goto found; goto found;
h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) { hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
if (x->props.family == encap_family && if (x->props.family == encap_family &&
x->props.reqid == tmpl->reqid && x->props.reqid == tmpl->reqid &&
(mark & x->mark.m) == x->mark.v && (mark & x->mark.m) == x->mark.v &&
...@@ -850,19 +872,21 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, ...@@ -850,19 +872,21 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
} }
if (km_query(x, tmpl, pol) == 0) { if (km_query(x, tmpl, pol) == 0) {
spin_lock_bh(&net->xfrm.xfrm_state_lock);
x->km.state = XFRM_STATE_ACQ; x->km.state = XFRM_STATE_ACQ;
list_add(&x->km.all, &net->xfrm.state_all); list_add(&x->km.all, &net->xfrm.state_all);
hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
h = xfrm_src_hash(net, daddr, saddr, encap_family); h = xfrm_src_hash(net, daddr, saddr, encap_family);
hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
if (x->id.spi) { if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
} }
x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
net->xfrm.state_num++; net->xfrm.state_num++;
xfrm_hash_grow_check(net, x->bydst.next != NULL); xfrm_hash_grow_check(net, x->bydst.next != NULL);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
} else { } else {
x->km.state = XFRM_STATE_DEAD; x->km.state = XFRM_STATE_DEAD;
to_put = x; to_put = x;
...@@ -871,13 +895,26 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, ...@@ -871,13 +895,26 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
} }
} }
out: out:
if (x) if (x) {
xfrm_state_hold(x); if (!xfrm_state_hold_rcu(x)) {
else *err = -EAGAIN;
x = NULL;
}
} else {
*err = acquire_in_progress ? -EAGAIN : error; *err = acquire_in_progress ? -EAGAIN : error;
spin_unlock_bh(&net->xfrm.xfrm_state_lock); }
rcu_read_unlock();
if (to_put) if (to_put)
xfrm_state_put(to_put); xfrm_state_put(to_put);
if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) {
*err = -EAGAIN;
if (x) {
xfrm_state_put(x);
x = NULL;
}
}
return x; return x;
} }
...@@ -945,16 +982,16 @@ static void __xfrm_state_insert(struct xfrm_state *x) ...@@ -945,16 +982,16 @@ static void __xfrm_state_insert(struct xfrm_state *x)
h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
x->props.reqid, x->props.family); x->props.reqid, x->props.family);
hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family); h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
if (x->id.spi) { if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
x->props.family); x->props.family);
hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
} }
tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
...@@ -1063,9 +1100,9 @@ static struct xfrm_state *__find_acq_core(struct net *net, ...@@ -1063,9 +1100,9 @@ static struct xfrm_state *__find_acq_core(struct net *net,
xfrm_state_hold(x); xfrm_state_hold(x);
tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
list_add(&x->km.all, &net->xfrm.state_all); list_add(&x->km.all, &net->xfrm.state_all);
hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
h = xfrm_src_hash(net, daddr, saddr, family); h = xfrm_src_hash(net, daddr, saddr, family);
hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
net->xfrm.state_num++; net->xfrm.state_num++;
...@@ -1581,7 +1618,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) ...@@ -1581,7 +1618,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
if (x->id.spi) { if (x->id.spi) {
spin_lock_bh(&net->xfrm.xfrm_state_lock); spin_lock_bh(&net->xfrm.xfrm_state_lock);
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
spin_unlock_bh(&net->xfrm.xfrm_state_lock); spin_unlock_bh(&net->xfrm.xfrm_state_lock);
err = 0; err = 0;
...@@ -2099,8 +2136,6 @@ int __net_init xfrm_state_init(struct net *net) ...@@ -2099,8 +2136,6 @@ int __net_init xfrm_state_init(struct net *net)
net->xfrm.state_num = 0; net->xfrm.state_num = 0;
INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
INIT_HLIST_HEAD(&net->xfrm.state_gc_list);
INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task);
spin_lock_init(&net->xfrm.xfrm_state_lock); spin_lock_init(&net->xfrm.xfrm_state_lock);
return 0; return 0;
...@@ -2118,7 +2153,7 @@ void xfrm_state_fini(struct net *net) ...@@ -2118,7 +2153,7 @@ void xfrm_state_fini(struct net *net)
flush_work(&net->xfrm.state_hash_work); flush_work(&net->xfrm.state_hash_work);
xfrm_state_flush(net, IPSEC_PROTO_ANY, false); xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
flush_work(&net->xfrm.state_gc_work); flush_work(&xfrm_state_gc_work);
WARN_ON(!list_empty(&net->xfrm.state_all)); WARN_ON(!list_empty(&net->xfrm.state_all));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment