Commit caacf05e authored by David S. Miller's avatar David S. Miller

ipv4: Properly purge netdev references on uncached routes.

When a device is unregistered, we have to purge all of the
references to it that may exist in the entire system.

If a route is uncached, we currently have no way of accomplishing
this.

So create a global list that is scanned when a network device goes
down.  This mirrors the logic in net/core/dst.c's dst_ifdown().
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent c5038a83
...@@ -57,6 +57,8 @@ struct rtable { ...@@ -57,6 +57,8 @@ struct rtable {
/* Miscellaneous cached information */ /* Miscellaneous cached information */
u32 rt_pmtu; u32 rt_pmtu;
struct list_head rt_uncached;
}; };
static inline bool rt_is_input_route(const struct rtable *rt) static inline bool rt_is_input_route(const struct rtable *rt)
...@@ -107,6 +109,7 @@ extern struct ip_rt_acct __percpu *ip_rt_acct; ...@@ -107,6 +109,7 @@ extern struct ip_rt_acct __percpu *ip_rt_acct;
struct in_device; struct in_device;
extern int ip_rt_init(void); extern int ip_rt_init(void);
extern void rt_cache_flush(struct net *net, int how); extern void rt_cache_flush(struct net *net, int how);
extern void rt_flush_dev(struct net_device *dev);
extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp); extern struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp);
extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, extern struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
struct sock *sk); struct sock *sk);
......
...@@ -1046,6 +1046,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo ...@@ -1046,6 +1046,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
if (event == NETDEV_UNREGISTER) { if (event == NETDEV_UNREGISTER) {
fib_disable_ip(dev, 2, -1); fib_disable_ip(dev, 2, -1);
rt_flush_dev(dev);
return NOTIFY_DONE; return NOTIFY_DONE;
} }
......
...@@ -147,6 +147,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, ...@@ -147,6 +147,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu); struct sk_buff *skb, u32 mtu);
static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb); struct sk_buff *skb);
static void ipv4_dst_destroy(struct dst_entry *dst);
static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
int how) int how)
...@@ -170,6 +171,7 @@ static struct dst_ops ipv4_dst_ops = { ...@@ -170,6 +171,7 @@ static struct dst_ops ipv4_dst_ops = {
.default_advmss = ipv4_default_advmss, .default_advmss = ipv4_default_advmss,
.mtu = ipv4_mtu, .mtu = ipv4_mtu,
.cow_metrics = ipv4_cow_metrics, .cow_metrics = ipv4_cow_metrics,
.destroy = ipv4_dst_destroy,
.ifdown = ipv4_dst_ifdown, .ifdown = ipv4_dst_ifdown,
.negative_advice = ipv4_negative_advice, .negative_advice = ipv4_negative_advice,
.link_failure = ipv4_link_failure, .link_failure = ipv4_link_failure,
...@@ -1175,9 +1177,11 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) ...@@ -1175,9 +1177,11 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
return NULL; return NULL;
} }
static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
__be32 daddr) __be32 daddr)
{ {
bool ret = false;
spin_lock_bh(&fnhe_lock); spin_lock_bh(&fnhe_lock);
if (daddr == fnhe->fnhe_daddr) { if (daddr == fnhe->fnhe_daddr) {
...@@ -1203,6 +1207,7 @@ static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, ...@@ -1203,6 +1207,7 @@ static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
rt_free(orig); rt_free(orig);
fnhe->fnhe_stamp = jiffies; fnhe->fnhe_stamp = jiffies;
ret = true;
} else { } else {
/* Routes we intend to cache in nexthop exception have /* Routes we intend to cache in nexthop exception have
* the DST_NOCACHE bit clear. However, if we are * the DST_NOCACHE bit clear. However, if we are
...@@ -1212,11 +1217,14 @@ static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, ...@@ -1212,11 +1217,14 @@ static void rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
rt->dst.flags |= DST_NOCACHE; rt->dst.flags |= DST_NOCACHE;
} }
spin_unlock_bh(&fnhe_lock); spin_unlock_bh(&fnhe_lock);
return ret;
} }
static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
{ {
struct rtable *orig, *prev, **p; struct rtable *orig, *prev, **p;
bool ret = true;
if (rt_is_input_route(rt)) { if (rt_is_input_route(rt)) {
p = (struct rtable **)&nh->nh_rth_input; p = (struct rtable **)&nh->nh_rth_input;
...@@ -1239,6 +1247,48 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt) ...@@ -1239,6 +1247,48 @@ static void rt_cache_route(struct fib_nh *nh, struct rtable *rt)
*/ */
nocache: nocache:
rt->dst.flags |= DST_NOCACHE; rt->dst.flags |= DST_NOCACHE;
ret = false;
}
return ret;
}
static DEFINE_SPINLOCK(rt_uncached_lock);
static LIST_HEAD(rt_uncached_list);
static void rt_add_uncached_list(struct rtable *rt)
{
spin_lock_bh(&rt_uncached_lock);
list_add_tail(&rt->rt_uncached, &rt_uncached_list);
spin_unlock_bh(&rt_uncached_lock);
}
static void ipv4_dst_destroy(struct dst_entry *dst)
{
struct rtable *rt = (struct rtable *) dst;
if (dst->flags & DST_NOCACHE) {
spin_lock_bh(&rt_uncached_lock);
list_del(&rt->rt_uncached);
spin_unlock_bh(&rt_uncached_lock);
}
}
void rt_flush_dev(struct net_device *dev)
{
if (!list_empty(&rt_uncached_list)) {
struct net *net = dev_net(dev);
struct rtable *rt;
spin_lock_bh(&rt_uncached_lock);
list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
if (rt->dst.dev != dev)
continue;
rt->dst.dev = net->loopback_dev;
dev_hold(rt->dst.dev);
dev_put(dev);
}
spin_unlock_bh(&rt_uncached_lock);
} }
} }
...@@ -1254,6 +1304,8 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, ...@@ -1254,6 +1304,8 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
struct fib_nh_exception *fnhe, struct fib_nh_exception *fnhe,
struct fib_info *fi, u16 type, u32 itag) struct fib_info *fi, u16 type, u32 itag)
{ {
bool cached = false;
if (fi) { if (fi) {
struct fib_nh *nh = &FIB_RES_NH(*res); struct fib_nh *nh = &FIB_RES_NH(*res);
...@@ -1264,10 +1316,12 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, ...@@ -1264,10 +1316,12 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
rt->dst.tclassid = nh->nh_tclassid; rt->dst.tclassid = nh->nh_tclassid;
#endif #endif
if (unlikely(fnhe)) if (unlikely(fnhe))
rt_bind_exception(rt, fnhe, daddr); cached = rt_bind_exception(rt, fnhe, daddr);
else if (!(rt->dst.flags & DST_NOCACHE)) else if (!(rt->dst.flags & DST_NOCACHE))
rt_cache_route(nh, rt); cached = rt_cache_route(nh, rt);
} }
if (unlikely(!cached))
rt_add_uncached_list(rt);
#ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_ROUTE_CLASSID
#ifdef CONFIG_IP_MULTIPLE_TABLES #ifdef CONFIG_IP_MULTIPLE_TABLES
...@@ -1334,6 +1388,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, ...@@ -1334,6 +1388,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->rt_iif = 0; rth->rt_iif = 0;
rth->rt_pmtu = 0; rth->rt_pmtu = 0;
rth->rt_gateway = 0; rth->rt_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
if (our) { if (our) {
rth->dst.input= ip_local_deliver; rth->dst.input= ip_local_deliver;
rth->rt_flags |= RTCF_LOCAL; rth->rt_flags |= RTCF_LOCAL;
...@@ -1459,6 +1514,7 @@ static int __mkroute_input(struct sk_buff *skb, ...@@ -1459,6 +1514,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_iif = 0; rth->rt_iif = 0;
rth->rt_pmtu = 0; rth->rt_pmtu = 0;
rth->rt_gateway = 0; rth->rt_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
rth->dst.input = ip_forward; rth->dst.input = ip_forward;
rth->dst.output = ip_output; rth->dst.output = ip_output;
...@@ -1625,6 +1681,7 @@ out: return err; ...@@ -1625,6 +1681,7 @@ out: return err;
rth->rt_iif = 0; rth->rt_iif = 0;
rth->rt_pmtu = 0; rth->rt_pmtu = 0;
rth->rt_gateway = 0; rth->rt_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
if (res.type == RTN_UNREACHABLE) { if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error; rth->dst.input= ip_error;
rth->dst.error= -err; rth->dst.error= -err;
...@@ -1792,6 +1849,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, ...@@ -1792,6 +1849,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
rth->rt_iif = orig_oif ? : 0; rth->rt_iif = orig_oif ? : 0;
rth->rt_pmtu = 0; rth->rt_pmtu = 0;
rth->rt_gateway = 0; rth->rt_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
RT_CACHE_STAT_INC(out_slow_tot); RT_CACHE_STAT_INC(out_slow_tot);
...@@ -2071,6 +2129,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or ...@@ -2071,6 +2129,8 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_type = ort->rt_type; rt->rt_type = ort->rt_type;
rt->rt_gateway = ort->rt_gateway; rt->rt_gateway = ort->rt_gateway;
INIT_LIST_HEAD(&rt->rt_uncached);
dst_free(new); dst_free(new);
} }
......
...@@ -92,6 +92,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, ...@@ -92,6 +92,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt.rt_type = rt->rt_type; xdst->u.rt.rt_type = rt->rt_type;
xdst->u.rt.rt_gateway = rt->rt_gateway; xdst->u.rt.rt_gateway = rt->rt_gateway;
xdst->u.rt.rt_pmtu = rt->rt_pmtu; xdst->u.rt.rt_pmtu = rt->rt_pmtu;
INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment