Commit c002496b authored by David S. Miller's avatar David S. Miller

Merge branch 'ipv6-loopback'

Eric Dumazet says:

====================
ipv6: remove addrconf reliance on loopback

Second patch in this series removes IPv6 requirement about the netns
loopback device being the last device being dismantled.

This was needed because rt6_uncached_list_flush_dev()
and ip6_dst_ifdown() had to switch dst dev to a known
device (loopback).

Instead of loopback, we can use the (hidden) blackhole_netdev
which is also always there.

This will allow future simplfications of netdev_run_to()
and other parts of the stack like default_device_exit_batch().

Last two patches are optimizations for both IP families.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 926eae60 29e5375d
...@@ -367,9 +367,8 @@ struct rt6_statistics { ...@@ -367,9 +367,8 @@ struct rt6_statistics {
__u32 fib_rt_cache; /* cached rt entries in exception table */ __u32 fib_rt_cache; /* cached rt entries in exception table */
__u32 fib_discarded_routes; /* total number of routes delete */ __u32 fib_discarded_routes; /* total number of routes delete */
/* The following stats are not protected by any lock */ /* The following stat is not protected by any lock */
atomic_t fib_rt_alloc; /* total number of routes alloced */ atomic_t fib_rt_alloc; /* total number of routes alloced */
atomic_t fib_rt_uncache; /* rt entries in uncached list */
}; };
#define RTN_TL_ROOT 0x0001 #define RTN_TL_ROOT 0x0001
......
...@@ -1485,6 +1485,7 @@ static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt) ...@@ -1485,6 +1485,7 @@ static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt)
struct uncached_list { struct uncached_list {
spinlock_t lock; spinlock_t lock;
struct list_head head; struct list_head head;
struct list_head quarantine;
}; };
static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
...@@ -1506,7 +1507,7 @@ void rt_del_uncached_list(struct rtable *rt) ...@@ -1506,7 +1507,7 @@ void rt_del_uncached_list(struct rtable *rt)
struct uncached_list *ul = rt->rt_uncached_list; struct uncached_list *ul = rt->rt_uncached_list;
spin_lock_bh(&ul->lock); spin_lock_bh(&ul->lock);
list_del(&rt->rt_uncached); list_del_init(&rt->rt_uncached);
spin_unlock_bh(&ul->lock); spin_unlock_bh(&ul->lock);
} }
} }
...@@ -1521,20 +1522,24 @@ static void ipv4_dst_destroy(struct dst_entry *dst) ...@@ -1521,20 +1522,24 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
void rt_flush_dev(struct net_device *dev) void rt_flush_dev(struct net_device *dev)
{ {
struct rtable *rt; struct rtable *rt, *safe;
int cpu; int cpu;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
if (list_empty(&ul->head))
continue;
spin_lock_bh(&ul->lock); spin_lock_bh(&ul->lock);
list_for_each_entry(rt, &ul->head, rt_uncached) { list_for_each_entry_safe(rt, safe, &ul->head, rt_uncached) {
if (rt->dst.dev != dev) if (rt->dst.dev != dev)
continue; continue;
rt->dst.dev = blackhole_netdev; rt->dst.dev = blackhole_netdev;
dev_replace_track(dev, blackhole_netdev, dev_replace_track(dev, blackhole_netdev,
&rt->dst.dev_tracker, &rt->dst.dev_tracker,
GFP_ATOMIC); GFP_ATOMIC);
list_move(&rt->rt_uncached, &ul->quarantine);
} }
spin_unlock_bh(&ul->lock); spin_unlock_bh(&ul->lock);
} }
...@@ -3706,6 +3711,7 @@ int __init ip_rt_init(void) ...@@ -3706,6 +3711,7 @@ int __init ip_rt_init(void)
struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
INIT_LIST_HEAD(&ul->head); INIT_LIST_HEAD(&ul->head);
INIT_LIST_HEAD(&ul->quarantine);
spin_lock_init(&ul->lock); spin_lock_init(&ul->lock);
} }
#ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_ROUTE_CLASSID
......
...@@ -372,7 +372,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ...@@ -372,7 +372,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ASSERT_RTNL(); ASSERT_RTNL();
if (dev->mtu < IPV6_MIN_MTU) if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL); ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL);
...@@ -400,21 +400,22 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ...@@ -400,21 +400,22 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
/* We refer to the device */ /* We refer to the device */
dev_hold_track(dev, &ndev->dev_tracker, GFP_KERNEL); dev_hold_track(dev, &ndev->dev_tracker, GFP_KERNEL);
if (snmp6_alloc_dev(ndev) < 0) { if (dev != blackhole_netdev) {
netdev_dbg(dev, "%s: cannot allocate memory for statistics\n", if (snmp6_alloc_dev(ndev) < 0) {
__func__); netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
neigh_parms_release(&nd_tbl, ndev->nd_parms); __func__);
dev_put_track(dev, &ndev->dev_tracker); neigh_parms_release(&nd_tbl, ndev->nd_parms);
kfree(ndev); dev_put_track(dev, &ndev->dev_tracker);
return ERR_PTR(err); kfree(ndev);
} return ERR_PTR(err);
}
if (snmp6_register_dev(ndev) < 0) { if (snmp6_register_dev(ndev) < 0) {
netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n", netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n",
__func__, dev->name); __func__, dev->name);
goto err_release; goto err_release;
}
} }
/* One reference from device. */ /* One reference from device. */
refcount_set(&ndev->refcnt, 1); refcount_set(&ndev->refcnt, 1);
...@@ -445,25 +446,28 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ...@@ -445,25 +446,28 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ipv6_mc_init_dev(ndev); ipv6_mc_init_dev(ndev);
ndev->tstamp = jiffies; ndev->tstamp = jiffies;
err = addrconf_sysctl_register(ndev); if (dev != blackhole_netdev) {
if (err) { err = addrconf_sysctl_register(ndev);
ipv6_mc_destroy_dev(ndev); if (err) {
snmp6_unregister_dev(ndev); ipv6_mc_destroy_dev(ndev);
goto err_release; snmp6_unregister_dev(ndev);
goto err_release;
}
} }
/* protected by rtnl_lock */ /* protected by rtnl_lock */
rcu_assign_pointer(dev->ip6_ptr, ndev); rcu_assign_pointer(dev->ip6_ptr, ndev);
/* Join interface-local all-node multicast group */ if (dev != blackhole_netdev) {
ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes); /* Join interface-local all-node multicast group */
ipv6_dev_mc_inc(dev, &in6addr_interfacelocal_allnodes);
/* Join all-node multicast group */ /* Join all-node multicast group */
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes); ipv6_dev_mc_inc(dev, &in6addr_linklocal_allnodes);
/* Join all-router multicast group if forwarding is set */
if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST))
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
/* Join all-router multicast group if forwarding is set */
if (ndev->cnf.forwarding && (dev->flags & IFF_MULTICAST))
ipv6_dev_mc_inc(dev, &in6addr_linklocal_allrouters);
}
return ndev; return ndev;
err_release: err_release:
...@@ -7233,26 +7237,8 @@ int __init addrconf_init(void) ...@@ -7233,26 +7237,8 @@ int __init addrconf_init(void)
goto out_nowq; goto out_nowq;
} }
/* The addrconf netdev notifier requires that loopback_dev
* has it's ipv6 private information allocated and setup
* before it can bring up and give link-local addresses
* to other devices which are up.
*
* Unfortunately, loopback_dev is not necessarily the first
* entry in the global dev_base list of net devices. In fact,
* it is likely to be the very last entry on that list.
* So this causes the notifier registry below to try and
* give link-local addresses to all devices besides loopback_dev
* first, then loopback_dev, which cases all the non-loopback_dev
* devices to fail to get a link-local address.
*
* So, as a temporary fix, allocate the ipv6 structure for
* loopback_dev first by hand.
* Longer term, all of the dependencies ipv6 has upon the loopback
* device and it being up should be removed.
*/
rtnl_lock(); rtnl_lock();
idev = ipv6_add_dev(init_net.loopback_dev); idev = ipv6_add_dev(blackhole_netdev);
rtnl_unlock(); rtnl_unlock();
if (IS_ERR(idev)) { if (IS_ERR(idev)) {
err = PTR_ERR(idev); err = PTR_ERR(idev);
......
...@@ -130,6 +130,7 @@ static struct fib6_info *rt6_get_route_info(struct net *net, ...@@ -130,6 +130,7 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
struct uncached_list { struct uncached_list {
spinlock_t lock; spinlock_t lock;
struct list_head head; struct list_head head;
struct list_head quarantine;
}; };
static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list);
...@@ -149,35 +150,34 @@ void rt6_uncached_list_del(struct rt6_info *rt) ...@@ -149,35 +150,34 @@ void rt6_uncached_list_del(struct rt6_info *rt)
{ {
if (!list_empty(&rt->rt6i_uncached)) { if (!list_empty(&rt->rt6i_uncached)) {
struct uncached_list *ul = rt->rt6i_uncached_list; struct uncached_list *ul = rt->rt6i_uncached_list;
struct net *net = dev_net(rt->dst.dev);
spin_lock_bh(&ul->lock); spin_lock_bh(&ul->lock);
list_del(&rt->rt6i_uncached); list_del_init(&rt->rt6i_uncached);
atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache);
spin_unlock_bh(&ul->lock); spin_unlock_bh(&ul->lock);
} }
} }
static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) static void rt6_uncached_list_flush_dev(struct net_device *dev)
{ {
struct net_device *loopback_dev = net->loopback_dev;
int cpu; int cpu;
if (dev == loopback_dev)
return;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
struct rt6_info *rt; struct rt6_info *rt, *safe;
if (list_empty(&ul->head))
continue;
spin_lock_bh(&ul->lock); spin_lock_bh(&ul->lock);
list_for_each_entry(rt, &ul->head, rt6i_uncached) { list_for_each_entry_safe(rt, safe, &ul->head, rt6i_uncached) {
struct inet6_dev *rt_idev = rt->rt6i_idev; struct inet6_dev *rt_idev = rt->rt6i_idev;
struct net_device *rt_dev = rt->dst.dev; struct net_device *rt_dev = rt->dst.dev;
bool handled = false;
if (rt_idev->dev == dev) { if (rt_idev->dev == dev) {
rt->rt6i_idev = in6_dev_get(loopback_dev); rt->rt6i_idev = in6_dev_get(blackhole_netdev);
in6_dev_put(rt_idev); in6_dev_put(rt_idev);
handled = true;
} }
if (rt_dev == dev) { if (rt_dev == dev) {
...@@ -185,7 +185,11 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) ...@@ -185,7 +185,11 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
dev_replace_track(rt_dev, blackhole_netdev, dev_replace_track(rt_dev, blackhole_netdev,
&rt->dst.dev_tracker, &rt->dst.dev_tracker,
GFP_ATOMIC); GFP_ATOMIC);
handled = true;
} }
if (handled)
list_move(&rt->rt6i_uncached,
&ul->quarantine);
} }
spin_unlock_bh(&ul->lock); spin_unlock_bh(&ul->lock);
} }
...@@ -373,13 +377,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, ...@@ -373,13 +377,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
{ {
struct rt6_info *rt = (struct rt6_info *)dst; struct rt6_info *rt = (struct rt6_info *)dst;
struct inet6_dev *idev = rt->rt6i_idev; struct inet6_dev *idev = rt->rt6i_idev;
struct net_device *loopback_dev =
dev_net(dev)->loopback_dev;
if (idev && idev->dev != loopback_dev) { if (idev && idev->dev != blackhole_netdev) {
struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev); struct inet6_dev *blackhole_idev = in6_dev_get(blackhole_netdev);
if (loopback_idev) {
rt->rt6i_idev = loopback_idev; if (blackhole_idev) {
rt->rt6i_idev = blackhole_idev;
in6_dev_put(idev); in6_dev_put(idev);
} }
} }
...@@ -2244,7 +2247,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, ...@@ -2244,7 +2247,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
* if caller sets RT6_LOOKUP_F_DST_NOREF flag. * if caller sets RT6_LOOKUP_F_DST_NOREF flag.
*/ */
rt6_uncached_list_add(rt); rt6_uncached_list_add(rt);
atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
rcu_read_unlock(); rcu_read_unlock();
return rt; return rt;
...@@ -3287,7 +3289,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, ...@@ -3287,7 +3289,6 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
* do proper release of the net_device * do proper release of the net_device
*/ */
rt6_uncached_list_add(rt); rt6_uncached_list_add(rt);
atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache);
dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0);
...@@ -4896,7 +4897,7 @@ void rt6_sync_down_dev(struct net_device *dev, unsigned long event) ...@@ -4896,7 +4897,7 @@ void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
void rt6_disable_ip(struct net_device *dev, unsigned long event) void rt6_disable_ip(struct net_device *dev, unsigned long event)
{ {
rt6_sync_down_dev(dev, event); rt6_sync_down_dev(dev, event);
rt6_uncached_list_flush_dev(dev_net(dev), dev); rt6_uncached_list_flush_dev(dev);
neigh_ifdown(&nd_tbl, dev); neigh_ifdown(&nd_tbl, dev);
} }
...@@ -6736,6 +6737,7 @@ int __init ip6_route_init(void) ...@@ -6736,6 +6737,7 @@ int __init ip6_route_init(void)
struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
INIT_LIST_HEAD(&ul->head); INIT_LIST_HEAD(&ul->head);
INIT_LIST_HEAD(&ul->quarantine);
spin_lock_init(&ul->lock); spin_lock_init(&ul->lock);
} }
......
...@@ -92,7 +92,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, ...@@ -92,7 +92,6 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt6.rt6i_src = rt->rt6i_src; xdst->u.rt6.rt6i_src = rt->rt6i_src;
INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached); INIT_LIST_HEAD(&xdst->u.rt6.rt6i_uncached);
rt6_uncached_list_add(&xdst->u.rt6); rt6_uncached_list_add(&xdst->u.rt6);
atomic_inc(&dev_net(dev)->ipv6.rt6_stats->fib_rt_uncache);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment