Commit c5038a83 authored by David S. Miller's avatar David S. Miller

ipv4: Cache routes in nexthop exception entries.

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent d26b3a7c
...@@ -55,6 +55,7 @@ struct fib_nh_exception { ...@@ -55,6 +55,7 @@ struct fib_nh_exception {
u32 fnhe_pmtu; u32 fnhe_pmtu;
__be32 fnhe_gw; __be32 fnhe_gw;
unsigned long fnhe_expires; unsigned long fnhe_expires;
struct rtable __rcu *fnhe_rth;
unsigned long fnhe_stamp; unsigned long fnhe_stamp;
}; };
......
...@@ -140,6 +140,21 @@ const struct fib_prop fib_props[RTN_MAX + 1] = { ...@@ -140,6 +140,21 @@ const struct fib_prop fib_props[RTN_MAX + 1] = {
}, },
}; };
static void rt_fibinfo_free(struct rtable __rcu **rtp)
{
struct rtable *rt = rcu_dereference_protected(*rtp, 1);
if (!rt)
return;
/* Not even needed : RCU_INIT_POINTER(*rtp, NULL);
* because we waited an RCU grace period before calling
* free_fib_info_rcu()
*/
dst_free(&rt->dst);
}
static void free_nh_exceptions(struct fib_nh *nh) static void free_nh_exceptions(struct fib_nh *nh)
{ {
struct fnhe_hash_bucket *hash = nh->nh_exceptions; struct fnhe_hash_bucket *hash = nh->nh_exceptions;
...@@ -153,6 +168,9 @@ static void free_nh_exceptions(struct fib_nh *nh) ...@@ -153,6 +168,9 @@ static void free_nh_exceptions(struct fib_nh *nh)
struct fib_nh_exception *next; struct fib_nh_exception *next;
next = rcu_dereference_protected(fnhe->fnhe_next, 1); next = rcu_dereference_protected(fnhe->fnhe_next, 1);
rt_fibinfo_free(&fnhe->fnhe_rth);
kfree(fnhe); kfree(fnhe);
fnhe = next; fnhe = next;
...@@ -161,22 +179,7 @@ static void free_nh_exceptions(struct fib_nh *nh) ...@@ -161,22 +179,7 @@ static void free_nh_exceptions(struct fib_nh *nh)
kfree(hash); kfree(hash);
} }
static void rt_nexthop_free(struct rtable __rcu **rtp) static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp)
{
struct rtable *rt = rcu_dereference_protected(*rtp, 1);
if (!rt)
return;
/* Not even needed : RCU_INIT_POINTER(*rtp, NULL);
* because we waited an RCU grace period before calling
* free_fib_info_rcu()
*/
dst_free(&rt->dst);
}
static void rt_nexthop_free_cpus(struct rtable __rcu * __percpu *rtp)
{ {
int cpu; int cpu;
...@@ -203,8 +206,8 @@ static void free_fib_info_rcu(struct rcu_head *head) ...@@ -203,8 +206,8 @@ static void free_fib_info_rcu(struct rcu_head *head)
dev_put(nexthop_nh->nh_dev); dev_put(nexthop_nh->nh_dev);
if (nexthop_nh->nh_exceptions) if (nexthop_nh->nh_exceptions)
free_nh_exceptions(nexthop_nh); free_nh_exceptions(nexthop_nh);
rt_nexthop_free_cpus(nexthop_nh->nh_pcpu_rth_output); rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
rt_nexthop_free(&nexthop_nh->nh_rth_input); rt_fibinfo_free(&nexthop_nh->nh_rth_input);
} endfor_nexthops(fi); } endfor_nexthops(fi);
release_net(fi->fib_net); release_net(fi->fib_net);
......
...@@ -587,11 +587,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, ...@@ -587,11 +587,17 @@ static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
build_sk_flow_key(fl4, sk); build_sk_flow_key(fl4, sk);
} }
static DEFINE_SEQLOCK(fnhe_seqlock); static inline void rt_free(struct rtable *rt)
{
call_rcu(&rt->dst.rcu_head, dst_rcu_free);
}
static DEFINE_SPINLOCK(fnhe_lock);
static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
{ {
struct fib_nh_exception *fnhe, *oldest; struct fib_nh_exception *fnhe, *oldest;
struct rtable *orig;
oldest = rcu_dereference(hash->chain); oldest = rcu_dereference(hash->chain);
for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe; for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
...@@ -599,6 +605,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash) ...@@ -599,6 +605,11 @@ static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
oldest = fnhe; oldest = fnhe;
} }
orig = rcu_dereference(oldest->fnhe_rth);
if (orig) {
RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
rt_free(orig);
}
return oldest; return oldest;
} }
...@@ -620,7 +631,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, ...@@ -620,7 +631,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
int depth; int depth;
u32 hval = fnhe_hashfun(daddr); u32 hval = fnhe_hashfun(daddr);
write_seqlock_bh(&fnhe_seqlock); spin_lock_bh(&fnhe_lock);
hash = nh->nh_exceptions; hash = nh->nh_exceptions;
if (!hash) { if (!hash) {
...@@ -667,7 +678,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, ...@@ -667,7 +678,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_stamp = jiffies; fnhe->fnhe_stamp = jiffies;
out_unlock: out_unlock:
write_sequnlock_bh(&fnhe_seqlock); spin_unlock_bh(&fnhe_lock);
return; return;
} }
...@@ -1167,41 +1178,40 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) ...@@ -1167,41 +1178,40 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
__be32 daddr) __be32 daddr)
{ {
__be32 fnhe_daddr, gw; spin_lock_bh(&fnhe_lock);
unsigned long expires;
unsigned int seq;
u32 pmtu;
restart:
seq = read_seqbegin(&fnhe_seqlock);
fnhe_daddr = fnhe->fnhe_daddr;
gw = fnhe->fnhe_gw;
pmtu = fnhe->fnhe_pmtu;
expires = fnhe->fnhe_expires;
if (read_seqretry(&fnhe_seqlock, seq))
goto restart;
if (daddr != fnhe_daddr)
return;
if (pmtu) { if (daddr == fnhe->fnhe_daddr) {
unsigned long diff = expires - jiffies; struct rtable *orig;
if (time_before(jiffies, expires)) { if (fnhe->fnhe_pmtu) {
rt->rt_pmtu = pmtu; unsigned long expires = fnhe->fnhe_expires;
dst_set_expires(&rt->dst, diff); unsigned long diff = expires - jiffies;
if (time_before(jiffies, expires)) {
rt->rt_pmtu = fnhe->fnhe_pmtu;
dst_set_expires(&rt->dst, diff);
}
}
if (fnhe->fnhe_gw) {
rt->rt_flags |= RTCF_REDIRECTED;
rt->rt_gateway = fnhe->fnhe_gw;
} }
}
if (gw) {
rt->rt_flags |= RTCF_REDIRECTED;
rt->rt_gateway = gw;
}
fnhe->fnhe_stamp = jiffies;
}
static inline void rt_free(struct rtable *rt) orig = rcu_dereference(fnhe->fnhe_rth);
{ rcu_assign_pointer(fnhe->fnhe_rth, rt);
call_rcu(&rt->dst.rcu_head, dst_rcu_free); if (orig)
rt_free(orig);
fnhe->fnhe_stamp = jiffies;
} else {
/* Routes we intend to cache in nexthop exception have
* the DST_NOCACHE bit clear. However, if we are
* unsuccessful at storing this route into the cache
* we really need to set it.
*/
rt->dst.flags |= DST_NOCACHE;
}
spin_unlock_bh(&fnhe_lock);
} }
static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
...@@ -1249,13 +1259,13 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, ...@@ -1249,13 +1259,13 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
rt->rt_gateway = nh->nh_gw; rt->rt_gateway = nh->nh_gw;
if (unlikely(fnhe))
rt_bind_exception(rt, fnhe, daddr);
dst_init_metrics(&rt->dst, fi->fib_metrics, true); dst_init_metrics(&rt->dst, fi->fib_metrics, true);
#ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid; rt->dst.tclassid = nh->nh_tclassid;
#endif #endif
if (!(rt->dst.flags & DST_NOCACHE)) if (unlikely(fnhe))
rt_bind_exception(rt, fnhe, daddr);
else if (!(rt->dst.flags & DST_NOCACHE))
rt_cache_route(nh, rt); rt_cache_route(nh, rt);
} }
...@@ -1753,22 +1763,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res, ...@@ -1753,22 +1763,23 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
fnhe = NULL; fnhe = NULL;
if (fi) { if (fi) {
fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); struct rtable __rcu **prth;
if (!fnhe && FIB_RES_NH(*res).nh_pcpu_rth_output) {
struct rtable __rcu **prth;
fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr);
if (fnhe)
prth = &fnhe->fnhe_rth;
else
prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output); prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output);
rth = rcu_dereference(*prth); rth = rcu_dereference(*prth);
if (rt_cache_valid(rth)) { if (rt_cache_valid(rth)) {
dst_hold(&rth->dst); dst_hold(&rth->dst);
return rth; return rth;
}
} }
} }
rth = rt_dst_alloc(dev_out, rth = rt_dst_alloc(dev_out,
IN_DEV_CONF_GET(in_dev, NOPOLICY), IN_DEV_CONF_GET(in_dev, NOPOLICY),
IN_DEV_CONF_GET(in_dev, NOXFRM), IN_DEV_CONF_GET(in_dev, NOXFRM),
fi && !fnhe); fi);
if (!rth) if (!rth)
return ERR_PTR(-ENOBUFS); return ERR_PTR(-ENOBUFS);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment