Commit 0115e8e3 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

net: remove delay at device dismantle

I noticed extra one second delay in device dismantle, tracked down to
a call to dst_dev_event() while some call_rcu() are still in RCU queues.

These call_rcu() were posted by rt_free(struct rtable *rt) calls.

We then wait a little (but one second) in netdev_wait_allrefs() before
kicking again NETDEV_UNREGISTER.

As the call_rcu() are now completed, dst_dev_event() can do the needed
device swap on busy dst.

To solve this problem, add a new NETDEV_UNREGISTER_FINAL, called
after a rcu_barrier(), but outside of RTNL lock.

Use NETDEV_UNREGISTER_FINAL with care !

Change dst_dev_event() handler to react to NETDEV_UNREGISTER_FINAL

Also remove NETDEV_UNREGISTER_BATCH, as its not used anymore after
IP cache removal.

With help from Gao feng
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Tom Herbert <therbert@google.com>
Cc: Mahesh Bandewar <maheshb@google.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Gao feng <gaofeng@cn.fujitsu.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent bf277b0c
...@@ -1553,7 +1553,7 @@ struct packet_type { ...@@ -1553,7 +1553,7 @@ struct packet_type {
#define NETDEV_PRE_TYPE_CHANGE 0x000E #define NETDEV_PRE_TYPE_CHANGE 0x000E
#define NETDEV_POST_TYPE_CHANGE 0x000F #define NETDEV_POST_TYPE_CHANGE 0x000F
#define NETDEV_POST_INIT 0x0010 #define NETDEV_POST_INIT 0x0010
#define NETDEV_UNREGISTER_BATCH 0x0011 #define NETDEV_UNREGISTER_FINAL 0x0011
#define NETDEV_RELEASE 0x0012 #define NETDEV_RELEASE 0x0012
#define NETDEV_NOTIFY_PEERS 0x0013 #define NETDEV_NOTIFY_PEERS 0x0013
#define NETDEV_JOIN 0x0014 #define NETDEV_JOIN 0x0014
......
...@@ -1406,7 +1406,6 @@ int register_netdevice_notifier(struct notifier_block *nb) ...@@ -1406,7 +1406,6 @@ int register_netdevice_notifier(struct notifier_block *nb)
nb->notifier_call(nb, NETDEV_DOWN, dev); nb->notifier_call(nb, NETDEV_DOWN, dev);
} }
nb->notifier_call(nb, NETDEV_UNREGISTER, dev); nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
} }
} }
...@@ -1448,7 +1447,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb) ...@@ -1448,7 +1447,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb)
nb->notifier_call(nb, NETDEV_DOWN, dev); nb->notifier_call(nb, NETDEV_DOWN, dev);
} }
nb->notifier_call(nb, NETDEV_UNREGISTER, dev); nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
} }
} }
unlock: unlock:
...@@ -1468,7 +1466,8 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); ...@@ -1468,7 +1466,8 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
int call_netdevice_notifiers(unsigned long val, struct net_device *dev) int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{ {
ASSERT_RTNL(); if (val != NETDEV_UNREGISTER_FINAL)
ASSERT_RTNL();
return raw_notifier_call_chain(&netdev_chain, val, dev); return raw_notifier_call_chain(&netdev_chain, val, dev);
} }
EXPORT_SYMBOL(call_netdevice_notifiers); EXPORT_SYMBOL(call_netdevice_notifiers);
...@@ -5331,10 +5330,6 @@ static void rollback_registered_many(struct list_head *head) ...@@ -5331,10 +5330,6 @@ static void rollback_registered_many(struct list_head *head)
netdev_unregister_kobject(dev); netdev_unregister_kobject(dev);
} }
/* Process any work delayed until the end of the batch */
dev = list_first_entry(head, struct net_device, unreg_list);
call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
synchronize_net(); synchronize_net();
list_for_each_entry(dev, head, unreg_list) list_for_each_entry(dev, head, unreg_list)
...@@ -5787,9 +5782,8 @@ static void netdev_wait_allrefs(struct net_device *dev) ...@@ -5787,9 +5782,8 @@ static void netdev_wait_allrefs(struct net_device *dev)
/* Rebroadcast unregister notification */ /* Rebroadcast unregister notification */
call_netdevice_notifiers(NETDEV_UNREGISTER, dev); call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
/* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users rcu_barrier();
* should have already handle it the first time */ call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
if (test_bit(__LINK_STATE_LINKWATCH_PENDING, if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
&dev->state)) { &dev->state)) {
/* We must not have linkwatch events /* We must not have linkwatch events
...@@ -5851,9 +5845,8 @@ void netdev_run_todo(void) ...@@ -5851,9 +5845,8 @@ void netdev_run_todo(void)
__rtnl_unlock(); __rtnl_unlock();
/* Wait for rcu callbacks to finish before attempting to drain
* the device list. This usually avoids a 250ms wait. /* Wait for rcu callbacks to finish before next phase */
*/
if (!list_empty(&list)) if (!list_empty(&list))
rcu_barrier(); rcu_barrier();
...@@ -5862,6 +5855,8 @@ void netdev_run_todo(void) ...@@ -5862,6 +5855,8 @@ void netdev_run_todo(void)
= list_first_entry(&list, struct net_device, todo_list); = list_first_entry(&list, struct net_device, todo_list);
list_del(&dev->todo_list); list_del(&dev->todo_list);
call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev);
if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
pr_err("network todo '%s' but state %d\n", pr_err("network todo '%s' but state %d\n",
dev->name, dev->reg_state); dev->name, dev->reg_state);
...@@ -6256,7 +6251,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char ...@@ -6256,7 +6251,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
the device is just moving and can keep their slaves up. the device is just moving and can keep their slaves up.
*/ */
call_netdevice_notifiers(NETDEV_UNREGISTER, dev); call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); rtmsg_ifinfo(RTM_DELLINK, dev, ~0U);
/* /*
......
...@@ -374,7 +374,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, ...@@ -374,7 +374,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
struct dst_entry *dst, *last = NULL; struct dst_entry *dst, *last = NULL;
switch (event) { switch (event) {
case NETDEV_UNREGISTER: case NETDEV_UNREGISTER_FINAL:
case NETDEV_DOWN: case NETDEV_DOWN:
mutex_lock(&dst_gc_mutex); mutex_lock(&dst_gc_mutex);
for (dst = dst_busy_list; dst; dst = dst->next) { for (dst = dst_busy_list; dst; dst = dst->next) {
......
...@@ -711,15 +711,16 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, ...@@ -711,15 +711,16 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
struct net *net = dev_net(dev); struct net *net = dev_net(dev);
struct fib_rules_ops *ops; struct fib_rules_ops *ops;
ASSERT_RTNL();
switch (event) { switch (event) {
case NETDEV_REGISTER: case NETDEV_REGISTER:
ASSERT_RTNL();
list_for_each_entry(ops, &net->rules_ops, list) list_for_each_entry(ops, &net->rules_ops, list)
attach_rules(&ops->rules_list, dev); attach_rules(&ops->rules_list, dev);
break; break;
case NETDEV_UNREGISTER: case NETDEV_UNREGISTER:
ASSERT_RTNL();
list_for_each_entry(ops, &net->rules_ops, list) list_for_each_entry(ops, &net->rules_ops, list)
detach_rules(&ops->rules_list, dev); detach_rules(&ops->rules_list, dev);
break; break;
......
...@@ -2358,7 +2358,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi ...@@ -2358,7 +2358,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
case NETDEV_PRE_TYPE_CHANGE: case NETDEV_PRE_TYPE_CHANGE:
case NETDEV_GOING_DOWN: case NETDEV_GOING_DOWN:
case NETDEV_UNREGISTER: case NETDEV_UNREGISTER:
case NETDEV_UNREGISTER_BATCH: case NETDEV_UNREGISTER_FINAL:
case NETDEV_RELEASE: case NETDEV_RELEASE:
case NETDEV_JOIN: case NETDEV_JOIN:
break; break;
......
...@@ -1147,8 +1147,12 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, ...@@ -1147,8 +1147,12 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
void *ptr) void *ptr)
{ {
struct net_device *dev = ptr; struct net_device *dev = ptr;
struct in_device *in_dev = __in_dev_get_rtnl(dev); struct in_device *in_dev;
if (event == NETDEV_UNREGISTER_FINAL)
goto out;
in_dev = __in_dev_get_rtnl(dev);
ASSERT_RTNL(); ASSERT_RTNL();
if (!in_dev) { if (!in_dev) {
......
...@@ -1041,7 +1041,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, ...@@ -1041,7 +1041,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
{ {
struct net_device *dev = ptr; struct net_device *dev = ptr;
struct in_device *in_dev = __in_dev_get_rtnl(dev); struct in_device *in_dev;
struct net *net = dev_net(dev); struct net *net = dev_net(dev);
if (event == NETDEV_UNREGISTER) { if (event == NETDEV_UNREGISTER) {
...@@ -1050,9 +1050,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo ...@@ -1050,9 +1050,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
return NOTIFY_DONE; return NOTIFY_DONE;
} }
if (!in_dev) if (event == NETDEV_UNREGISTER_FINAL)
return NOTIFY_DONE; return NOTIFY_DONE;
in_dev = __in_dev_get_rtnl(dev);
switch (event) { switch (event) {
case NETDEV_UP: case NETDEV_UP:
for_ifa(in_dev) { for_ifa(in_dev) {
...@@ -1071,8 +1073,6 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo ...@@ -1071,8 +1073,6 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
case NETDEV_CHANGE: case NETDEV_CHANGE:
rt_cache_flush(dev_net(dev), 0); rt_cache_flush(dev_net(dev), 0);
break; break;
case NETDEV_UNREGISTER_BATCH:
break;
} }
return NOTIFY_DONE; return NOTIFY_DONE;
} }
......
...@@ -2566,10 +2566,14 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, ...@@ -2566,10 +2566,14 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
void *data) void *data)
{ {
struct net_device *dev = (struct net_device *) data; struct net_device *dev = (struct net_device *) data;
struct inet6_dev *idev = __in6_dev_get(dev); struct inet6_dev *idev;
int run_pending = 0; int run_pending = 0;
int err; int err;
if (event == NETDEV_UNREGISTER_FINAL)
return NOTIFY_DONE;
idev = __in6_dev_get(dev);
switch (event) { switch (event) {
case NETDEV_REGISTER: case NETDEV_REGISTER:
if (!idev && dev->mtu >= IPV6_MIN_MTU) { if (!idev && dev->mtu >= IPV6_MIN_MTU) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment