Commit 1f5a7407 authored by David S. Miller's avatar David S. Miller

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next

Steffen Klassert says:

====================
1) Introduce skb_to_sgvec_nomark function to add further data to the sg list
   without calling sg_unmark_end first. Needed to add extended sequence
   number informations. From Fan Du.

2) Add IPsec extended sequence numbers support to the Authentication Header
   protocol for ipv4 and ipv6. From Fan Du.

3) Make the IPsec flowcache namespace aware, from Fan Du.

4) Avoid creating temporary SA for every packet when no key manager is
   registered. From Horia Geanta.

5) Support filtering of SA dumps to show only the SAs that match a
   given filter. From Nicolas Dichtel.

6) Remove caching of xfrm_policy_sk_bundles. The cached socket policy bundles
   are never used, instead we create a new cache entry whenever xfrm_lookup()
   is called on a socket policy. Most protocols cache the used routes to the
   socket, so this caching is not needed.

7)  Fix a forgotten SADB_X_EXT_FILTER length check in pfkey, from Nicolas
    Dichtel.

8) Cleanup error handling of xfrm_state_clone.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 3b5c8ab1 cc9ab60e
......@@ -6009,6 +6009,7 @@ L: netdev@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec.git
T: git git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next.git
S: Maintained
F: net/core/flow.c
F: net/xfrm/
F: net/key/
F: net/ipv4/xfrm*
......
......@@ -691,6 +691,8 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
unsigned int headroom);
struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
int newtailroom, gfp_t priority);
int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
int offset, int len);
int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset,
int len);
int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
......
......@@ -218,9 +218,10 @@ struct flow_cache_object *flow_cache_lookup(struct net *net,
const struct flowi *key, u16 family,
u8 dir, flow_resolve_t resolver,
void *ctx);
int flow_cache_init(struct net *net);
void flow_cache_flush(void);
void flow_cache_flush_deferred(void);
void flow_cache_flush(struct net *net);
void flow_cache_flush_deferred(struct net *net);
extern atomic_t flow_cache_genid;
#endif
#ifndef _NET_FLOWCACHE_H
#define _NET_FLOWCACHE_H
#include <linux/interrupt.h>
#include <linux/types.h>
#include <linux/timer.h>
#include <linux/notifier.h>
struct flow_cache_percpu {
struct hlist_head *hash_table;
int hash_count;
u32 hash_rnd;
int hash_rnd_recalc;
struct tasklet_struct flush_tasklet;
};
struct flow_cache {
u32 hash_shift;
struct flow_cache_percpu __percpu *percpu;
struct notifier_block hotcpu_notifier;
int low_watermark;
int high_watermark;
struct timer_list rnd_timer;
};
#endif /* _NET_FLOWCACHE_H */
......@@ -6,6 +6,7 @@
#include <linux/workqueue.h>
#include <linux/xfrm.h>
#include <net/dst_ops.h>
#include <net/flowcache.h>
struct ctl_table_header;
......@@ -58,9 +59,18 @@ struct netns_xfrm {
struct dst_ops xfrm6_dst_ops;
#endif
spinlock_t xfrm_state_lock;
spinlock_t xfrm_policy_sk_bundle_lock;
rwlock_t xfrm_policy_lock;
struct mutex xfrm_cfg_mutex;
/* flow cache part */
struct flow_cache flow_cache_global;
struct kmem_cache *flow_cachep;
atomic_t flow_cache_genid;
struct list_head flow_cache_gc_list;
spinlock_t flow_cache_gc_lock;
struct work_struct flow_cache_gc_work;
struct work_struct flow_cache_flush_work;
struct mutex flow_flush_sem;
};
#endif
......@@ -118,11 +118,10 @@
struct xfrm_state_walk {
struct list_head all;
u8 state;
union {
u8 dying;
u8 proto;
};
u8 dying;
u8 proto;
u32 seq;
struct xfrm_filter *filter;
};
/* Full description of state of transformer. */
......@@ -594,6 +593,7 @@ struct xfrm_mgr {
const struct xfrm_migrate *m,
int num_bundles,
const struct xfrm_kmaddress *k);
bool (*is_alive)(const struct km_event *c);
};
int xfrm_register_km(struct xfrm_mgr *km);
......@@ -1405,7 +1405,8 @@ static inline void xfrm_sysctl_fini(struct net *net)
}
#endif
void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto);
void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
struct xfrm_filter *filter);
int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
int (*func)(struct xfrm_state *, int, void*), void *);
void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net);
......@@ -1646,6 +1647,20 @@ static inline int xfrm_aevent_is_on(struct net *net)
rcu_read_unlock();
return ret;
}
static inline int xfrm_acquire_is_on(struct net *net)
{
struct sock *nlsk;
int ret = 0;
rcu_read_lock();
nlsk = rcu_dereference(net->xfrm.nlsk);
if (nlsk)
ret = netlink_has_listeners(nlsk, XFRMNLGRP_ACQUIRE);
rcu_read_unlock();
return ret;
}
#endif
static inline int xfrm_alg_len(const struct xfrm_algo *alg)
......
......@@ -235,6 +235,18 @@ struct sadb_x_kmaddress {
} __attribute__((packed));
/* sizeof(struct sadb_x_kmaddress) == 8 */
/* To specify the SA dump filter */
struct sadb_x_filter {
__u16 sadb_x_filter_len;
__u16 sadb_x_filter_exttype;
__u32 sadb_x_filter_saddr[4];
__u32 sadb_x_filter_daddr[4];
__u16 sadb_x_filter_family;
__u8 sadb_x_filter_splen;
__u8 sadb_x_filter_dplen;
} __attribute__((packed));
/* sizeof(struct sadb_x_filter) == 40 */
/* Message types */
#define SADB_RESERVED 0
#define SADB_GETSPI 1
......@@ -358,7 +370,8 @@ struct sadb_x_kmaddress {
#define SADB_X_EXT_SEC_CTX 24
/* Used with MIGRATE to pass @ to IKE for negotiation */
#define SADB_X_EXT_KMADDRESS 25
#define SADB_EXT_MAX 25
#define SADB_X_EXT_FILTER 26
#define SADB_EXT_MAX 26
/* Identity Extension values */
#define SADB_IDENTTYPE_RESERVED 0
......
......@@ -298,6 +298,8 @@ enum xfrm_attr_type_t {
XFRMA_TFCPAD, /* __u32 */
XFRMA_REPLAY_ESN_VAL, /* struct xfrm_replay_esn */
XFRMA_SA_EXTRA_FLAGS, /* __u32 */
XFRMA_PROTO, /* __u8 */
XFRMA_FILTER, /* struct xfrm_filter */
__XFRMA_MAX
#define XFRMA_MAX (__XFRMA_MAX - 1)
......@@ -474,6 +476,14 @@ struct xfrm_user_mapping {
__be16 new_sport;
};
struct xfrm_filter {
xfrm_address_t saddr;
xfrm_address_t daddr;
__u16 family;
__u8 splen;
__u8 dplen;
};
#ifndef __KERNEL__
/* backwards compatibility for userspace */
#define XFRMGRP_ACQUIRE 1
......
......@@ -24,6 +24,7 @@
#include <net/flow.h>
#include <linux/atomic.h>
#include <linux/security.h>
#include <net/net_namespace.h>
struct flow_cache_entry {
union {
......@@ -38,37 +39,12 @@ struct flow_cache_entry {
struct flow_cache_object *object;
};
struct flow_cache_percpu {
struct hlist_head *hash_table;
int hash_count;
u32 hash_rnd;
int hash_rnd_recalc;
struct tasklet_struct flush_tasklet;
};
struct flow_flush_info {
struct flow_cache *cache;
atomic_t cpuleft;
struct completion completion;
};
struct flow_cache {
u32 hash_shift;
struct flow_cache_percpu __percpu *percpu;
struct notifier_block hotcpu_notifier;
int low_watermark;
int high_watermark;
struct timer_list rnd_timer;
};
atomic_t flow_cache_genid = ATOMIC_INIT(0);
EXPORT_SYMBOL(flow_cache_genid);
static struct flow_cache flow_cache_global;
static struct kmem_cache *flow_cachep __read_mostly;
static DEFINE_SPINLOCK(flow_cache_gc_lock);
static LIST_HEAD(flow_cache_gc_list);
#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
......@@ -84,46 +60,50 @@ static void flow_cache_new_hashrnd(unsigned long arg)
add_timer(&fc->rnd_timer);
}
static int flow_entry_valid(struct flow_cache_entry *fle)
static int flow_entry_valid(struct flow_cache_entry *fle,
struct netns_xfrm *xfrm)
{
if (atomic_read(&flow_cache_genid) != fle->genid)
if (atomic_read(&xfrm->flow_cache_genid) != fle->genid)
return 0;
if (fle->object && !fle->object->ops->check(fle->object))
return 0;
return 1;
}
static void flow_entry_kill(struct flow_cache_entry *fle)
static void flow_entry_kill(struct flow_cache_entry *fle,
struct netns_xfrm *xfrm)
{
if (fle->object)
fle->object->ops->delete(fle->object);
kmem_cache_free(flow_cachep, fle);
kmem_cache_free(xfrm->flow_cachep, fle);
}
static void flow_cache_gc_task(struct work_struct *work)
{
struct list_head gc_list;
struct flow_cache_entry *fce, *n;
struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
flow_cache_gc_work);
INIT_LIST_HEAD(&gc_list);
spin_lock_bh(&flow_cache_gc_lock);
list_splice_tail_init(&flow_cache_gc_list, &gc_list);
spin_unlock_bh(&flow_cache_gc_lock);
spin_lock_bh(&xfrm->flow_cache_gc_lock);
list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
spin_unlock_bh(&xfrm->flow_cache_gc_lock);
list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
flow_entry_kill(fce);
flow_entry_kill(fce, xfrm);
}
static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
int deleted, struct list_head *gc_list)
int deleted, struct list_head *gc_list,
struct netns_xfrm *xfrm)
{
if (deleted) {
fcp->hash_count -= deleted;
spin_lock_bh(&flow_cache_gc_lock);
list_splice_tail(gc_list, &flow_cache_gc_list);
spin_unlock_bh(&flow_cache_gc_lock);
schedule_work(&flow_cache_gc_work);
spin_lock_bh(&xfrm->flow_cache_gc_lock);
list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
spin_unlock_bh(&xfrm->flow_cache_gc_lock);
schedule_work(&xfrm->flow_cache_gc_work);
}
}
......@@ -135,6 +115,8 @@ static void __flow_cache_shrink(struct flow_cache *fc,
struct hlist_node *tmp;
LIST_HEAD(gc_list);
int i, deleted = 0;
struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
flow_cache_global);
for (i = 0; i < flow_cache_hash_size(fc); i++) {
int saved = 0;
......@@ -142,7 +124,7 @@ static void __flow_cache_shrink(struct flow_cache *fc,
hlist_for_each_entry_safe(fle, tmp,
&fcp->hash_table[i], u.hlist) {
if (saved < shrink_to &&
flow_entry_valid(fle)) {
flow_entry_valid(fle, xfrm)) {
saved++;
} else {
deleted++;
......@@ -152,7 +134,7 @@ static void __flow_cache_shrink(struct flow_cache *fc,
}
}
flow_cache_queue_garbage(fcp, deleted, &gc_list);
flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
}
static void flow_cache_shrink(struct flow_cache *fc,
......@@ -208,7 +190,7 @@ struct flow_cache_object *
flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
flow_resolve_t resolver, void *ctx)
{
struct flow_cache *fc = &flow_cache_global;
struct flow_cache *fc = &net->xfrm.flow_cache_global;
struct flow_cache_percpu *fcp;
struct flow_cache_entry *fle, *tfle;
struct flow_cache_object *flo;
......@@ -248,7 +230,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
if (fcp->hash_count > fc->high_watermark)
flow_cache_shrink(fc, fcp);
fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
fle = kmem_cache_alloc(net->xfrm.flow_cachep, GFP_ATOMIC);
if (fle) {
fle->net = net;
fle->family = family;
......@@ -258,7 +240,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
fcp->hash_count++;
}
} else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
} else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) {
flo = fle->object;
if (!flo)
goto ret_object;
......@@ -279,7 +261,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
}
flo = resolver(net, key, family, dir, flo, ctx);
if (fle) {
fle->genid = atomic_read(&flow_cache_genid);
fle->genid = atomic_read(&net->xfrm.flow_cache_genid);
if (!IS_ERR(flo))
fle->object = flo;
else
......@@ -303,12 +285,14 @@ static void flow_cache_flush_tasklet(unsigned long data)
struct hlist_node *tmp;
LIST_HEAD(gc_list);
int i, deleted = 0;
struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
flow_cache_global);
fcp = this_cpu_ptr(fc->percpu);
for (i = 0; i < flow_cache_hash_size(fc); i++) {
hlist_for_each_entry_safe(fle, tmp,
&fcp->hash_table[i], u.hlist) {
if (flow_entry_valid(fle))
if (flow_entry_valid(fle, xfrm))
continue;
deleted++;
......@@ -317,7 +301,7 @@ static void flow_cache_flush_tasklet(unsigned long data)
}
}
flow_cache_queue_garbage(fcp, deleted, &gc_list);
flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
if (atomic_dec_and_test(&info->cpuleft))
complete(&info->completion);
......@@ -351,10 +335,9 @@ static void flow_cache_flush_per_cpu(void *data)
tasklet_schedule(tasklet);
}
void flow_cache_flush(void)
void flow_cache_flush(struct net *net)
{
struct flow_flush_info info;
static DEFINE_MUTEX(flow_flush_sem);
cpumask_var_t mask;
int i, self;
......@@ -365,8 +348,8 @@ void flow_cache_flush(void)
/* Don't want cpus going down or up during this. */
get_online_cpus();
mutex_lock(&flow_flush_sem);
info.cache = &flow_cache_global;
mutex_lock(&net->xfrm.flow_flush_sem);
info.cache = &net->xfrm.flow_cache_global;
for_each_online_cpu(i)
if (!flow_cache_percpu_empty(info.cache, i))
cpumask_set_cpu(i, mask);
......@@ -386,21 +369,23 @@ void flow_cache_flush(void)
wait_for_completion(&info.completion);
done:
mutex_unlock(&flow_flush_sem);
mutex_unlock(&net->xfrm.flow_flush_sem);
put_online_cpus();
free_cpumask_var(mask);
}
static void flow_cache_flush_task(struct work_struct *work)
{
flow_cache_flush();
}
struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
flow_cache_gc_work);
struct net *net = container_of(xfrm, struct net, xfrm);
static DECLARE_WORK(flow_cache_flush_work, flow_cache_flush_task);
flow_cache_flush(net);
}
void flow_cache_flush_deferred(void)
void flow_cache_flush_deferred(struct net *net)
{
schedule_work(&flow_cache_flush_work);
schedule_work(&net->xfrm.flow_cache_flush_work);
}
static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
......@@ -425,7 +410,8 @@ static int flow_cache_cpu(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
struct flow_cache *fc = container_of(nfb, struct flow_cache,
hotcpu_notifier);
int res, cpu = (unsigned long) hcpu;
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
......@@ -444,9 +430,20 @@ static int flow_cache_cpu(struct notifier_block *nfb,
return NOTIFY_OK;
}
static int __init flow_cache_init(struct flow_cache *fc)
int flow_cache_init(struct net *net)
{
int i;
struct flow_cache *fc = &net->xfrm.flow_cache_global;
/* Initialize per-net flow cache global variables here */
net->xfrm.flow_cachep = kmem_cache_create("flow_cache",
sizeof(struct flow_cache_entry),
0, SLAB_PANIC, NULL);
spin_lock_init(&net->xfrm.flow_cache_gc_lock);
INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list);
INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
mutex_init(&net->xfrm.flow_flush_sem);
fc->hash_shift = 10;
fc->low_watermark = 2 * flow_cache_hash_size(fc);
......@@ -484,14 +481,4 @@ static int __init flow_cache_init(struct flow_cache *fc)
return -ENOMEM;
}
static int __init flow_cache_init_global(void)
{
flow_cachep = kmem_cache_create("flow_cache",
sizeof(struct flow_cache_entry),
0, SLAB_PANIC, NULL);
return flow_cache_init(&flow_cache_global);
}
module_init(flow_cache_init_global);
EXPORT_SYMBOL(flow_cache_init);
......@@ -3281,6 +3281,32 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
return elt;
}
/* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given
* sglist without mark the sg which contain last skb data as the end.
* So the caller can mannipulate sg list as will when padding new data after
* the first call without calling sg_unmark_end to expend sg list.
*
* Scenario to use skb_to_sgvec_nomark:
* 1. sg_init_table
* 2. skb_to_sgvec_nomark(payload1)
* 3. skb_to_sgvec_nomark(payload2)
*
* This is equivalent to:
* 1. sg_init_table
* 2. skb_to_sgvec(payload1)
* 3. sg_unmark_end
* 4. skb_to_sgvec(payload2)
*
* When mapping mutilple payload conditionally, skb_to_sgvec_nomark
* is more preferable.
*/
int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
int offset, int len)
{
return __skb_to_sgvec(skb, sg, offset, len);
}
EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark);
int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
{
int nsg = __skb_to_sgvec(skb, sg, offset, len);
......
......@@ -155,6 +155,10 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
struct iphdr *iph, *top_iph;
struct ip_auth_hdr *ah;
struct ah_data *ahp;
int seqhi_len = 0;
__be32 *seqhi;
int sglists = 0;
struct scatterlist *seqhisg;
ahp = x->data;
ahash = ahp->ahash;
......@@ -167,14 +171,19 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
ah = ip_auth_hdr(skb);
ihl = ip_hdrlen(skb);
if (x->props.flags & XFRM_STATE_ESN) {
sglists = 1;
seqhi_len = sizeof(*seqhi);
}
err = -ENOMEM;
iph = ah_alloc_tmp(ahash, nfrags, ihl);
iph = ah_alloc_tmp(ahash, nfrags + sglists, ihl + seqhi_len);
if (!iph)
goto out;
icv = ah_tmp_icv(ahash, iph, ihl);
seqhi = (__be32 *)((char *)iph + ihl);
icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
seqhisg = sg + nfrags;
memset(ah->auth_data, 0, ahp->icv_trunc_len);
......@@ -210,10 +219,15 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
ah->spi = x->id.spi;
ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
sg_init_table(sg, nfrags);
skb_to_sgvec(skb, sg, 0, skb->len);
sg_init_table(sg, nfrags + sglists);
skb_to_sgvec_nomark(skb, sg, 0, skb->len);
ahash_request_set_crypt(req, sg, icv, skb->len);
if (x->props.flags & XFRM_STATE_ESN) {
/* Attach seqhi sg right after packet payload */
*seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
sg_set_buf(seqhisg, seqhi, seqhi_len);
}
ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah_output_done, skb);
AH_SKB_CB(skb)->tmp = iph;
......@@ -295,6 +309,10 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
struct ip_auth_hdr *ah;
struct ah_data *ahp;
int err = -ENOMEM;
int seqhi_len = 0;
__be32 *seqhi;
int sglists = 0;
struct scatterlist *seqhisg;
if (!pskb_may_pull(skb, sizeof(*ah)))
goto out;
......@@ -335,14 +353,22 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
iph = ip_hdr(skb);
ihl = ip_hdrlen(skb);
work_iph = ah_alloc_tmp(ahash, nfrags, ihl + ahp->icv_trunc_len);
if (x->props.flags & XFRM_STATE_ESN) {
sglists = 1;
seqhi_len = sizeof(*seqhi);
}
work_iph = ah_alloc_tmp(ahash, nfrags + sglists, ihl +
ahp->icv_trunc_len + seqhi_len);
if (!work_iph)
goto out;
auth_data = ah_tmp_auth(work_iph, ihl);
seqhi = (__be32 *)((char *)work_iph + ihl);
auth_data = ah_tmp_auth(seqhi, seqhi_len);
icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
seqhisg = sg + nfrags;
memcpy(work_iph, iph, ihl);
memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
......@@ -361,10 +387,15 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
skb_push(skb, ihl);
sg_init_table(sg, nfrags);
skb_to_sgvec(skb, sg, 0, skb->len);
sg_init_table(sg, nfrags + sglists);
skb_to_sgvec_nomark(skb, sg, 0, skb->len);
ahash_request_set_crypt(req, sg, icv, skb->len);
if (x->props.flags & XFRM_STATE_ESN) {
/* Attach seqhi sg right after packet payload */
*seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
sg_set_buf(seqhisg, seqhi, seqhi_len);
}
ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah_input_done, skb);
AH_SKB_CB(skb)->tmp = work_iph;
......
......@@ -346,6 +346,10 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
struct ip_auth_hdr *ah;
struct ah_data *ahp;
struct tmp_ext *iph_ext;
int seqhi_len = 0;
__be32 *seqhi;
int sglists = 0;
struct scatterlist *seqhisg;
ahp = x->data;
ahash = ahp->ahash;
......@@ -359,15 +363,22 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
if (extlen)
extlen += sizeof(*iph_ext);
if (x->props.flags & XFRM_STATE_ESN) {
sglists = 1;
seqhi_len = sizeof(*seqhi);
}
err = -ENOMEM;
iph_base = ah_alloc_tmp(ahash, nfrags, IPV6HDR_BASELEN + extlen);
iph_base = ah_alloc_tmp(ahash, nfrags + sglists, IPV6HDR_BASELEN +
extlen + seqhi_len);
if (!iph_base)
goto out;
iph_ext = ah_tmp_ext(iph_base);
icv = ah_tmp_icv(ahash, iph_ext, extlen);
seqhi = (__be32 *)((char *)iph_ext + extlen);
icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
seqhisg = sg + nfrags;
ah = ip_auth_hdr(skb);
memset(ah->auth_data, 0, ahp->icv_trunc_len);
......@@ -411,10 +422,15 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
ah->spi = x->id.spi;
ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
sg_init_table(sg, nfrags);
skb_to_sgvec(skb, sg, 0, skb->len);
sg_init_table(sg, nfrags + sglists);
skb_to_sgvec_nomark(skb, sg, 0, skb->len);
ahash_request_set_crypt(req, sg, icv, skb->len);
if (x->props.flags & XFRM_STATE_ESN) {
/* Attach seqhi sg right after packet payload */
*seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
sg_set_buf(seqhisg, seqhi, seqhi_len);
}
ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah6_output_done, skb);
AH_SKB_CB(skb)->tmp = iph_base;
......@@ -514,6 +530,10 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
int nexthdr;
int nfrags;
int err = -ENOMEM;
int seqhi_len = 0;
__be32 *seqhi;
int sglists = 0;
struct scatterlist *seqhisg;
if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
goto out;
......@@ -550,14 +570,22 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
skb_push(skb, hdr_len);
work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len);
if (x->props.flags & XFRM_STATE_ESN) {
sglists = 1;
seqhi_len = sizeof(*seqhi);
}
work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len +
ahp->icv_trunc_len + seqhi_len);
if (!work_iph)
goto out;
auth_data = ah_tmp_auth(work_iph, hdr_len);
icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len);
auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len);
seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len);
icv = ah_tmp_icv(ahash, seqhi, seqhi_len);
req = ah_tmp_req(ahash, icv);
sg = ah_req_sg(ahash, req);
seqhisg = sg + nfrags;
memcpy(work_iph, ip6h, hdr_len);
memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
......@@ -572,10 +600,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
ip6h->flow_lbl[2] = 0;
ip6h->hop_limit = 0;
sg_init_table(sg, nfrags);
skb_to_sgvec(skb, sg, 0, skb->len);
sg_init_table(sg, nfrags + sglists);
skb_to_sgvec_nomark(skb, sg, 0, skb->len);
if (x->props.flags & XFRM_STATE_ESN) {
/* Attach seqhi sg right after packet payload */
*seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
sg_set_buf(seqhisg, seqhi, seqhi_len);
}
ahash_request_set_crypt(req, sg, icv, skb->len);
ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
ahash_request_set_callback(req, 0, ah6_input_done, skb);
AH_SKB_CB(skb)->tmp = work_iph;
......
......@@ -365,6 +365,7 @@ static const u8 sadb_ext_min_len[] = {
[SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address),
[SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx),
[SADB_X_EXT_KMADDRESS] = (u8) sizeof(struct sadb_x_kmaddress),
[SADB_X_EXT_FILTER] = (u8) sizeof(struct sadb_x_filter),
};
/* Verify sadb_address_{len,prefixlen} against sa_family. */
......@@ -1798,6 +1799,7 @@ static void pfkey_dump_sa_done(struct pfkey_sock *pfk)
static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs)
{
u8 proto;
struct xfrm_filter *filter = NULL;
struct pfkey_sock *pfk = pfkey_sk(sk);
if (pfk->dump.dump != NULL)
......@@ -1807,11 +1809,27 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
if (proto == 0)
return -EINVAL;
if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
filter = kmalloc(sizeof(*filter), GFP_KERNEL);
if (filter == NULL)
return -ENOMEM;
memcpy(&filter->saddr, &xfilter->sadb_x_filter_saddr,
sizeof(xfrm_address_t));
memcpy(&filter->daddr, &xfilter->sadb_x_filter_daddr,
sizeof(xfrm_address_t));
filter->family = xfilter->sadb_x_filter_family;
filter->splen = xfilter->sadb_x_filter_splen;
filter->dplen = xfilter->sadb_x_filter_dplen;
}
pfk->dump.msg_version = hdr->sadb_msg_version;
pfk->dump.msg_portid = hdr->sadb_msg_pid;
pfk->dump.dump = pfkey_dump_sa;
pfk->dump.done = pfkey_dump_sa_done;
xfrm_state_walk_init(&pfk->dump.u.state, proto);
xfrm_state_walk_init(&pfk->dump.u.state, proto, filter);
return pfkey_do_dump(pfk);
}
......@@ -3059,6 +3077,24 @@ static u32 get_acqseq(void)
return res;
}
static bool pfkey_is_alive(const struct km_event *c)
{
struct netns_pfkey *net_pfkey = net_generic(c->net, pfkey_net_id);
struct sock *sk;
bool is_alive = false;
rcu_read_lock();
sk_for_each_rcu(sk, &net_pfkey->table) {
if (pfkey_sk(sk)->registered) {
is_alive = true;
break;
}
}
rcu_read_unlock();
return is_alive;
}
static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp)
{
struct sk_buff *skb;
......@@ -3784,6 +3820,7 @@ static struct xfrm_mgr pfkeyv2_mgr =
.new_mapping = pfkey_send_new_mapping,
.notify_policy = pfkey_send_policy_notify,
.migrate = pfkey_send_migrate,
.is_alive = pfkey_is_alive,
};
static int __net_init pfkey_net_init(struct net *net)
......
......@@ -39,8 +39,6 @@
#define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
#define XFRM_MAX_QUEUE_LEN 100
static struct dst_entry *xfrm_policy_sk_bundles;
static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
__read_mostly;
......@@ -661,7 +659,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
hlist_add_head(&policy->bydst, chain);
xfrm_pol_hold(policy);
net->xfrm.policy_count[dir]++;
atomic_inc(&flow_cache_genid);
atomic_inc(&net->xfrm.flow_cache_genid);
/* After previous checking, family can either be AF_INET or AF_INET6 */
if (policy->family == AF_INET)
......@@ -2109,13 +2107,6 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
goto no_transform;
}
dst_hold(&xdst->u.dst);
spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
xdst->u.dst.next = xfrm_policy_sk_bundles;
xfrm_policy_sk_bundles = &xdst->u.dst;
spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
route = xdst->route;
}
}
......@@ -2549,33 +2540,15 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
return dst;
}
static void __xfrm_garbage_collect(struct net *net)
{
struct dst_entry *head, *next;
spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
head = xfrm_policy_sk_bundles;
xfrm_policy_sk_bundles = NULL;
spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
while (head) {
next = head->next;
dst_free(head);
head = next;
}
}
void xfrm_garbage_collect(struct net *net)
{
flow_cache_flush();
__xfrm_garbage_collect(net);
flow_cache_flush(net);
}
EXPORT_SYMBOL(xfrm_garbage_collect);
static void xfrm_garbage_collect_deferred(struct net *net)
{
flow_cache_flush_deferred();
__xfrm_garbage_collect(net);
flow_cache_flush_deferred(net);
}
static void xfrm_init_pmtu(struct dst_entry *dst)
......@@ -2944,9 +2917,9 @@ static int __net_init xfrm_net_init(struct net *net)
/* Initialize the per-net locks here */
spin_lock_init(&net->xfrm.xfrm_state_lock);
rwlock_init(&net->xfrm.xfrm_policy_lock);
spin_lock_init(&net->xfrm.xfrm_policy_sk_bundle_lock);
mutex_init(&net->xfrm.xfrm_cfg_mutex);
flow_cache_init(net);
return 0;
out_sysctl:
......
......@@ -161,6 +161,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock);
int __xfrm_state_delete(struct xfrm_state *x);
int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
bool km_is_alive(const struct km_event *c);
void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
static DEFINE_SPINLOCK(xfrm_type_lock);
......@@ -788,6 +789,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
struct xfrm_state *best = NULL;
u32 mark = pol->mark.v & pol->mark.m;
unsigned short encap_family = tmpl->encap_family;
struct km_event c;
to_put = NULL;
......@@ -832,6 +834,17 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
error = -EEXIST;
goto out;
}
c.net = net;
/* If the KMs have no listeners (yet...), avoid allocating an SA
* for each and every packet - garbage collection might not
* handle the flood.
*/
if (!km_is_alive(&c)) {
error = -ESRCH;
goto out;
}
x = xfrm_state_alloc(net);
if (x == NULL) {
error = -ENOMEM;
......@@ -1135,10 +1148,9 @@ int xfrm_state_add(struct xfrm_state *x)
EXPORT_SYMBOL(xfrm_state_add);
#ifdef CONFIG_XFRM_MIGRATE
static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig)
{
struct net *net = xs_net(orig);
int err = -ENOMEM;
struct xfrm_state *x = xfrm_state_alloc(net);
if (!x)
goto out;
......@@ -1187,15 +1199,13 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
}
if (orig->replay_esn) {
err = xfrm_replay_clone(x, orig);
if (err)
if (xfrm_replay_clone(x, orig))
goto error;
}
memcpy(&x->mark, &orig->mark, sizeof(x->mark));
err = xfrm_init_state(x);
if (err)
if (xfrm_init_state(x) < 0)
goto error;
x->props.flags = orig->props.flags;
......@@ -1210,8 +1220,6 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
error:
xfrm_state_put(x);
out:
if (errp)
*errp = err;
return NULL;
}
......@@ -1263,9 +1271,8 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
struct xfrm_migrate *m)
{
struct xfrm_state *xc;
int err;
xc = xfrm_state_clone(x, &err);
xc = xfrm_state_clone(x);
if (!xc)
return NULL;
......@@ -1278,7 +1285,7 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
state is to be updated as it is a part of triplet */
xfrm_state_insert(xc);
} else {
if ((err = xfrm_state_add(xc)) < 0)
if (xfrm_state_add(xc) < 0)
goto error;
}
......@@ -1590,6 +1597,23 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
}
EXPORT_SYMBOL(xfrm_alloc_spi);
static bool __xfrm_state_filter_match(struct xfrm_state *x,
struct xfrm_filter *filter)
{
if (filter) {
if ((filter->family == AF_INET ||
filter->family == AF_INET6) &&
x->props.family != filter->family)
return false;
return addr_match(&x->props.saddr, &filter->saddr,
filter->splen) &&
addr_match(&x->id.daddr, &filter->daddr,
filter->dplen);
}
return true;
}
int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
int (*func)(struct xfrm_state *, int, void*),
void *data)
......@@ -1612,6 +1636,8 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
state = container_of(x, struct xfrm_state, km);
if (!xfrm_id_proto_match(state->id.proto, walk->proto))
continue;
if (!__xfrm_state_filter_match(state, walk->filter))
continue;
err = func(state, walk->seq, data);
if (err) {
list_move_tail(&walk->all, &x->all);
......@@ -1630,17 +1656,21 @@ int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
}
EXPORT_SYMBOL(xfrm_state_walk);
void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto)
void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
struct xfrm_filter *filter)
{
INIT_LIST_HEAD(&walk->all);
walk->proto = proto;
walk->state = XFRM_STATE_DEAD;
walk->seq = 0;
walk->filter = filter;
}
EXPORT_SYMBOL(xfrm_state_walk_init);
void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net)
{
kfree(walk->filter);
if (list_empty(&walk->all))
return;
......@@ -1793,6 +1823,24 @@ int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address
}
EXPORT_SYMBOL(km_report);
bool km_is_alive(const struct km_event *c)
{
struct xfrm_mgr *km;
bool is_alive = false;
rcu_read_lock();
list_for_each_entry_rcu(km, &xfrm_km_list, list) {
if (km->is_alive && km->is_alive(c)) {
is_alive = true;
break;
}
}
rcu_read_unlock();
return is_alive;
}
EXPORT_SYMBOL(km_is_alive);
int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
{
int err;
......
......@@ -142,7 +142,8 @@ static inline int verify_replay(struct xfrm_usersa_info *p,
if (!rt)
return 0;
if (p->id.proto != IPPROTO_ESP)
/* As only ESP and AH support ESN feature. */
if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH))
return -EINVAL;
if (p->replay_window != 0)
......@@ -886,6 +887,7 @@ static int xfrm_dump_sa_done(struct netlink_callback *cb)
return 0;
}
static const struct nla_policy xfrma_policy[XFRMA_MAX+1];
static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
......@@ -901,8 +903,31 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
info.nlmsg_flags = NLM_F_MULTI;
if (!cb->args[0]) {
struct nlattr *attrs[XFRMA_MAX+1];
struct xfrm_filter *filter = NULL;
u8 proto = 0;
int err;
cb->args[0] = 1;
xfrm_state_walk_init(walk, 0);
err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX,
xfrma_policy);
if (err < 0)
return err;
if (attrs[XFRMA_FILTER]) {
filter = kmalloc(sizeof(*filter), GFP_KERNEL);
if (filter == NULL)
return -ENOMEM;
memcpy(filter, nla_data(attrs[XFRMA_FILTER]),
sizeof(*filter));
}
if (attrs[XFRMA_PROTO])
proto = nla_get_u8(attrs[XFRMA_PROTO]);
xfrm_state_walk_init(walk, proto, filter);
}
(void) xfrm_state_walk(net, walk, dump_one_state, &info);
......@@ -2308,6 +2333,8 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_TFCPAD] = { .type = NLA_U32 },
[XFRMA_REPLAY_ESN_VAL] = { .len = sizeof(struct xfrm_replay_state_esn) },
[XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 },
[XFRMA_PROTO] = { .type = NLA_U8 },
[XFRMA_FILTER] = { .len = sizeof(struct xfrm_filter) },
};
static const struct xfrm_link {
......@@ -2981,6 +3008,11 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_MAPPING, GFP_ATOMIC);
}
static bool xfrm_is_alive(const struct km_event *c)
{
return (bool)xfrm_acquire_is_on(c->net);
}
static struct xfrm_mgr netlink_mgr = {
.id = "netlink",
.notify = xfrm_send_state_notify,
......@@ -2990,6 +3022,7 @@ static struct xfrm_mgr netlink_mgr = {
.report = xfrm_send_report,
.migrate = xfrm_send_migrate,
.new_mapping = xfrm_send_mapping,
.is_alive = xfrm_is_alive,
};
static int __net_init xfrm_user_net_init(struct net *net)
......
......@@ -45,10 +45,11 @@ static inline void selinux_xfrm_notify_policyload(void)
{
struct net *net;
atomic_inc(&flow_cache_genid);
rtnl_lock();
for_each_net(net)
for_each_net(net) {
atomic_inc(&net->xfrm.flow_cache_genid);
rt_genid_bump_all(net);
}
rtnl_unlock();
}
#else
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment