Commit f958137c authored by David S. Miller's avatar David S. Miller

[NET]: Split out policy flow cache to be a generic facility.

parent a7b43a56
......@@ -75,4 +75,16 @@ struct flowi {
#define fl_icmp_code uli_u.icmpt.code
#define fl_ipsec_spi uli_u.spi
};
#define FLOW_DIR_IN 0
#define FLOW_DIR_OUT 1
#define FLOW_DIR_FWD 2
typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir,
void **objp, atomic_t **obj_refp);
extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
flow_resolve_t resolver);
extern atomic_t flow_cache_genid;
#endif
......@@ -788,7 +788,6 @@ void xfrm4_policy_init(void);
void xfrm6_policy_init(void);
struct xfrm_policy *xfrm_policy_alloc(int gfp);
extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *);
struct xfrm_policy *xfrm_policy_lookup(int dir, struct flowi *fl, unsigned short family);
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
struct xfrm_policy *xfrm_policy_delete(int dir, struct xfrm_selector *sel);
struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete);
......
......@@ -10,7 +10,7 @@ obj-y += sysctl_net_core.o
endif
endif
obj-$(CONFIG_NET) += dev.o dev_mcast.o dst.o neighbour.o rtnetlink.o utils.o link_watch.o filter.o
obj-$(CONFIG_NET) += flow.o dev.o dev_mcast.o dst.o neighbour.o rtnetlink.o utils.o link_watch.o filter.o
obj-$(CONFIG_NETFILTER) += netfilter.o
obj-$(CONFIG_NET_DIVERT) += dv.o
......
/* flow.c: Generic flow cache.
*
* Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru)
* Copyright (C) 2003 David S. Miller (davem@redhat.com)
*/
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/jhash.h>
#include <linux/interrupt.h>
#include <linux/mm.h>
#include <linux/random.h>
#include <linux/init.h>
#include <net/flow.h>
#include <asm/atomic.h>
struct flow_cache_entry {
struct flow_cache_entry *next;
u16 family;
u8 dir;
struct flowi key;
u32 genid;
void *object;
atomic_t *object_ref;
};
atomic_t flow_cache_genid = ATOMIC_INIT(0);
static u32 flow_hash_shift;
#define flow_hash_size (1 << flow_hash_shift)
static struct flow_cache_entry **flow_table;
static kmem_cache_t *flow_cachep;
static int flow_lwm, flow_hwm;
struct flow_percpu_info {
u32 hash_rnd;
int number;
} ____cacheline_aligned;
static struct flow_percpu_info flow_hash_info[NR_CPUS];
#define flow_count(cpu) (flow_hash_info[cpu].number)
#define flow_hash_rnd(cpu) (flow_hash_info[cpu].hash_rnd)
static struct timer_list flow_hash_rnd_timer;
#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
static void flow_cache_new_hashrnd(unsigned long arg)
{
int i;
for (i = 0; i < NR_CPUS; i++)
get_random_bytes(&flow_hash_rnd(i), sizeof(u32));
flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
add_timer(&flow_hash_rnd_timer);
}
static void flow_cache_shrink(int cpu)
{
struct flow_cache_entry *fle, **flp;
int shrink_to = flow_lwm / flow_hash_size;
int i;
for (i = 0; i < flow_hash_size; i++) {
int k = 0;
flp = &flow_table[cpu*flow_hash_size+i];
while ((fle = *flp) != NULL && k < shrink_to) {
k++;
flp = &fle->next;
}
while ((fle = *flp) != NULL) {
*flp = fle->next;
if (fle->object)
atomic_dec(fle->object_ref);
kmem_cache_free(flow_cachep, fle);
flow_count(cpu)--;
}
}
}
static u32 flow_hash_code(struct flowi *key, int cpu)
{
u32 *k = (u32 *) key;
return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
(flow_hash_size - 1));
}
#if (BITS_PER_LONG == 64)
typedef u64 flow_compare_t;
#else
typedef u32 flow_compare_t;
#endif
extern void flowi_is_missized(void);
/* I hear what you're saying, use memcmp. But memcmp cannot make
* important assumptions that we can here, such as alignment and
* constant size.
*/
static int flow_key_compare(struct flowi *key1, struct flowi *key2)
{
flow_compare_t *k1, *k1_lim, *k2;
const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
if (sizeof(struct flowi) % sizeof(flow_compare_t))
flowi_is_missized();
k1 = (flow_compare_t *) key1;
k1_lim = k1 + n_elem;
k2 = (flow_compare_t *) key2;
do {
if (*k1++ != *k2++)
return 1;
} while (k1 < k1_lim);
return 0;
}
void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
flow_resolve_t resolver)
{
struct flow_cache_entry *fle, **head;
unsigned int hash;
int cpu;
local_bh_disable();
cpu = smp_processor_id();
hash = flow_hash_code(key, cpu);
head = &flow_table[(cpu << flow_hash_shift) + hash];
for (fle = *head; fle; fle = fle->next) {
if (fle->family == family &&
fle->dir == dir &&
flow_key_compare(key, &fle->key) == 0) {
if (fle->genid == atomic_read(&flow_cache_genid)) {
void *ret = fle->object;
if (ret)
atomic_inc(fle->object_ref);
local_bh_enable();
return ret;
}
break;
}
}
{
void *obj;
atomic_t *obj_ref;
resolver(key, family, dir, &obj, &obj_ref);
if (fle) {
fle->genid = atomic_read(&flow_cache_genid);
if (fle->object)
atomic_dec(fle->object_ref);
fle->object = obj;
fle->object_ref = obj_ref;
if (obj)
atomic_inc(fle->object_ref);
} else {
if (flow_count(cpu) > flow_hwm)
flow_cache_shrink(cpu);
fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC);
if (fle) {
fle->next = *head;
*head = fle;
fle->family = family;
fle->dir = dir;
memcpy(&fle->key, key, sizeof(*key));
fle->genid = atomic_read(&flow_cache_genid);
fle->object = obj;
fle->object_ref = obj_ref;
flow_count(cpu)++;
}
}
local_bh_enable();
return obj;
}
}
static int __init flow_cache_init(void)
{
unsigned long order;
int i;
flow_cachep = kmem_cache_create("flow_cache",
sizeof(struct flow_cache_entry),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
if (!flow_cachep)
panic("NET: failed to allocate flow cache slab\n");
flow_hash_shift = 10;
flow_lwm = 2 * flow_hash_size;
flow_hwm = 4 * flow_hash_size;
for (i = 0; i < NR_CPUS; i++) {
flow_hash_rnd(i) =
(u32) ((num_physpages ^ (num_physpages>>8)) ^
(jiffies ^ (jiffies >> 7)));
flow_hash_rnd(i) ^= i;
}
init_timer(&flow_hash_rnd_timer);
flow_hash_rnd_timer.function = flow_cache_new_hashrnd;
flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
add_timer(&flow_hash_rnd_timer);
for (order = 0;
(PAGE_SIZE << order) <
(NR_CPUS*sizeof(struct flow_entry *)*flow_hash_size);
order++)
/* NOTHING */;
flow_table = (struct flow_cache_entry **)
__get_free_pages(GFP_ATOMIC, order);
if (!flow_table)
panic("Failed to allocate flow cache hash table\n");
memset(flow_table, 0, PAGE_SIZE << order);
return 0;
}
module_init(flow_cache_init);
......@@ -292,7 +292,6 @@ EXPORT_SYMBOL(km_new_mapping);
EXPORT_SYMBOL(xfrm_cfg_sem);
EXPORT_SYMBOL(xfrm_policy_alloc);
EXPORT_SYMBOL(__xfrm_policy_destroy);
EXPORT_SYMBOL(xfrm_policy_lookup);
EXPORT_SYMBOL(xfrm_lookup);
EXPORT_SYMBOL(__xfrm_policy_check);
EXPORT_SYMBOL(__xfrm_route_forward);
......@@ -363,6 +362,9 @@ EXPORT_SYMBOL_GPL(pskb_put);
EXPORT_SYMBOL_GPL(skb_to_sgvec);
#endif
EXPORT_SYMBOL(flow_cache_lookup);
EXPORT_SYMBOL(flow_cache_genid);
#if defined (CONFIG_IPV6_MODULE) || defined (CONFIG_IP_SCTP_MODULE)
/* inet functions common to v4 and v6 */
EXPORT_SYMBOL(inet_release);
......
......@@ -19,7 +19,6 @@
DECLARE_MUTEX(xfrm_cfg_sem);
static u32 xfrm_policy_genid;
static rwlock_t xfrm_policy_lock = RW_LOCK_UNLOCKED;
struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
......@@ -29,142 +28,6 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
kmem_cache_t *xfrm_dst_cache;
/* Limited flow cache. Its function now is to accelerate search for
* policy rules.
*
* Flow cache is private to cpus, at the moment this is important
* mostly for flows which do not match any rule, so that flow lookups
* are absolultely cpu-local. When a rule exists we do some updates
* to rule (refcnt, stats), so that locality is broken. Later this
* can be repaired.
*/
struct flow_entry
{
struct flow_entry *next;
struct flowi fl;
u8 dir;
u32 genid;
struct xfrm_policy *pol;
};
static kmem_cache_t *flow_cachep;
struct flow_entry **flow_table;
static int flow_lwm = 2*XFRM_FLOWCACHE_HASH_SIZE;
static int flow_hwm = 4*XFRM_FLOWCACHE_HASH_SIZE;
static int flow_number[NR_CPUS] __cacheline_aligned;
#define flow_count(cpu) (flow_number[cpu])
static void flow_cache_shrink(int cpu)
{
int i;
struct flow_entry *fle, **flp;
int shrink_to = flow_lwm/XFRM_FLOWCACHE_HASH_SIZE;
for (i=0; i<XFRM_FLOWCACHE_HASH_SIZE; i++) {
int k = 0;
flp = &flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+i];
while ((fle=*flp) != NULL && k<shrink_to) {
k++;
flp = &fle->next;
}
while ((fle=*flp) != NULL) {
*flp = fle->next;
if (fle->pol)
xfrm_pol_put(fle->pol);
kmem_cache_free(flow_cachep, fle);
}
}
}
struct xfrm_policy *flow_lookup(int dir, struct flowi *fl,
unsigned short family)
{
struct xfrm_policy *pol = NULL;
struct flow_entry *fle;
u32 hash;
int cpu;
hash = flow_hash(fl, family);
local_bh_disable();
cpu = smp_processor_id();
for (fle = flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+hash];
fle; fle = fle->next) {
if (memcmp(fl, &fle->fl, sizeof(fle->fl)) == 0 &&
fle->dir == dir) {
if (fle->genid == xfrm_policy_genid) {
if ((pol = fle->pol) != NULL)
xfrm_pol_hold(pol);
local_bh_enable();
return pol;
}
break;
}
}
pol = xfrm_policy_lookup(dir, fl, family);
if (fle) {
/* Stale flow entry found. Update it. */
fle->genid = xfrm_policy_genid;
if (fle->pol)
xfrm_pol_put(fle->pol);
fle->pol = pol;
if (pol)
xfrm_pol_hold(pol);
} else {
if (flow_count(cpu) > flow_hwm)
flow_cache_shrink(cpu);
fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC);
if (fle) {
flow_count(cpu)++;
fle->fl = *fl;
fle->genid = xfrm_policy_genid;
fle->dir = dir;
fle->pol = pol;
if (pol)
xfrm_pol_hold(pol);
fle->next = flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+hash];
flow_table[cpu*XFRM_FLOWCACHE_HASH_SIZE+hash] = fle;
}
}
local_bh_enable();
return pol;
}
void __init flow_cache_init(void)
{
int order;
flow_cachep = kmem_cache_create("flow_cache",
sizeof(struct flow_entry),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
if (!flow_cachep)
panic("NET: failed to allocate flow cache slab\n");
for (order = 0;
(PAGE_SIZE<<order) < (NR_CPUS*sizeof(struct flow_entry *)*XFRM_FLOWCACHE_HASH_SIZE);
order++)
/* NOTHING */;
flow_table = (struct flow_entry **)__get_free_pages(GFP_ATOMIC, order);
if (!flow_table)
panic("Failed to allocate flow cache hash table\n");
memset(flow_table, 0, PAGE_SIZE<<order);
}
int xfrm_register_type(struct xfrm_type *type, unsigned short family)
{
struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
......@@ -395,7 +258,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
xfrm_pol_hold(policy);
policy->next = pol ? pol->next : NULL;
*p = policy;
xfrm_policy_genid++;
atomic_inc(&flow_cache_genid);
policy->index = pol ? pol->index : xfrm_gen_index(dir);
policy->curlft.add_time = (unsigned long)xtime.tv_sec;
policy->curlft.use_time = 0;
......@@ -424,7 +287,7 @@ struct xfrm_policy *xfrm_policy_delete(int dir, struct xfrm_selector *sel)
}
}
if (pol)
xfrm_policy_genid++;
atomic_inc(&flow_cache_genid);
write_unlock_bh(&xfrm_policy_lock);
return pol;
}
......@@ -443,7 +306,7 @@ struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
}
if (pol) {
if (delete)
xfrm_policy_genid++;
atomic_inc(&flow_cache_genid);
else
xfrm_pol_hold(pol);
}
......@@ -468,7 +331,7 @@ void xfrm_policy_flush()
write_lock_bh(&xfrm_policy_lock);
}
}
xfrm_policy_genid++;
atomic_inc(&flow_cache_genid);
write_unlock_bh(&xfrm_policy_lock);
}
......@@ -507,8 +370,8 @@ int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
/* Find policy to apply to this flow. */
struct xfrm_policy *xfrm_policy_lookup(int dir, struct flowi *fl,
unsigned short family)
void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
void **objp, atomic_t **obj_refp)
{
struct xfrm_policy *pol;
......@@ -527,7 +390,8 @@ struct xfrm_policy *xfrm_policy_lookup(int dir, struct flowi *fl,
}
}
read_unlock_bh(&xfrm_policy_lock);
return pol;
if ((*objp = (void *) pol) != NULL)
*obj_refp = &pol->refcnt;
}
struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
......@@ -719,6 +583,23 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
return err;
}
static inline int policy_to_flow_dir(int dir)
{
if (XFRM_POLICY_IN == FLOW_DIR_IN &&
XFRM_POLICY_OUT == FLOW_DIR_OUT &&
XFRM_POLICY_FWD == FLOW_DIR_FWD)
return dir;
switch (dir) {
default:
case XFRM_POLICY_IN:
return FLOW_DIR_IN;
case XFRM_POLICY_OUT:
return FLOW_DIR_OUT;
case XFRM_POLICY_FWD:
return FLOW_DIR_FWD;
};
}
/* Main function: finds/creates a bundle for given flow.
*
* At the moment we eat a raw IP route. Mostly to speed up lookups
......@@ -749,7 +630,7 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
}
restart:
genid = xfrm_policy_genid;
genid = atomic_read(&flow_cache_genid);
policy = NULL;
if (sk && sk->policy[1])
policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
......@@ -759,7 +640,9 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
if ((rt->u.dst.flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
return 0;
policy = flow_lookup(XFRM_POLICY_OUT, fl, family);
policy = flow_cache_lookup(fl, family,
policy_to_flow_dir(XFRM_POLICY_OUT),
xfrm_policy_lookup);
}
if (!policy)
......@@ -817,7 +700,7 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
goto error;
}
if (err == -EAGAIN ||
genid != xfrm_policy_genid)
genid != atomic_read(&flow_cache_genid))
goto restart;
}
if (err)
......@@ -941,7 +824,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
pol = xfrm_sk_policy_lookup(sk, dir, &fl);
if (!pol)
pol = flow_lookup(dir, &fl, family);
pol = flow_cache_lookup(&fl, family,
policy_to_flow_dir(dir),
xfrm_policy_lookup);
if (!pol)
return 1;
......@@ -1237,7 +1122,6 @@ void __init xfrm_policy_init(void)
void __init xfrm_init(void)
{
xfrm_state_init();
flow_cache_init();
xfrm_policy_init();
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment