Commit 4161529e authored by Feras Daoud's avatar Feras Daoud Committed by Greg Kroah-Hartman

IB/ipoib: Fix deadlock between rmmod and set_mode

commit 0a0007f2 upstream.

When calling set_mode from sys/fs, the call flow locks the sys/fs lock
first and then tries to lock rtnl_lock (when calling ipoib_set_mod).
On the other hand, the rmmod call flow takes the rtnl_lock first
(when calling unregister_netdev) and then tries to take the sys/fs
lock. Deadlock a->b, b->a.

The problem starts when ipoib_set_mod frees it's rtnl_lck and tries
to get it after that.

    set_mod:
    [<ffffffff8104f2bd>] ? check_preempt_curr+0x6d/0x90
    [<ffffffff814fee8e>] __mutex_lock_slowpath+0x13e/0x180
    [<ffffffff81448655>] ? __rtnl_unlock+0x15/0x20
    [<ffffffff814fed2b>] mutex_lock+0x2b/0x50
    [<ffffffff81448675>] rtnl_lock+0x15/0x20
    [<ffffffffa02ad807>] ipoib_set_mode+0x97/0x160 [ib_ipoib]
    [<ffffffffa02b5f5b>] set_mode+0x3b/0x80 [ib_ipoib]
    [<ffffffff8134b840>] dev_attr_store+0x20/0x30
    [<ffffffff811f0fe5>] sysfs_write_file+0xe5/0x170
    [<ffffffff8117b068>] vfs_write+0xb8/0x1a0
    [<ffffffff8117ba81>] sys_write+0x51/0x90
    [<ffffffff8100b0f2>] system_call_fastpath+0x16/0x1b

    rmmod:
    [<ffffffff81279ffc>] ? put_dec+0x10c/0x110
    [<ffffffff8127a2ee>] ? number+0x2ee/0x320
    [<ffffffff814fe6a5>] schedule_timeout+0x215/0x2e0
    [<ffffffff8127cc04>] ? vsnprintf+0x484/0x5f0
    [<ffffffff8127b550>] ? string+0x40/0x100
    [<ffffffff814fe323>] wait_for_common+0x123/0x180
    [<ffffffff81060250>] ? default_wake_function+0x0/0x20
    [<ffffffff8119661e>] ? ifind_fast+0x5e/0xb0
    [<ffffffff814fe43d>] wait_for_completion+0x1d/0x20
    [<ffffffff811f2e68>] sysfs_addrm_finish+0x228/0x270
    [<ffffffff811f2fb3>] sysfs_remove_dir+0xa3/0xf0
    [<ffffffff81273f66>] kobject_del+0x16/0x40
    [<ffffffff8134cd14>] device_del+0x184/0x1e0
    [<ffffffff8144e59b>] netdev_unregister_kobject+0xab/0xc0
    [<ffffffff8143c05e>] rollback_registered+0xae/0x130
    [<ffffffff8143c102>] unregister_netdevice+0x22/0x70
    [<ffffffff8143c16e>] unregister_netdev+0x1e/0x30
    [<ffffffffa02a91b0>] ipoib_remove_one+0xe0/0x120 [ib_ipoib]
    [<ffffffffa01ed95f>] ib_unregister_device+0x4f/0x100 [ib_core]
    [<ffffffffa021f5e1>] mlx4_ib_remove+0x41/0x180 [mlx4_ib]
    [<ffffffffa01ab771>] mlx4_remove_device+0x71/0x90 [mlx4_core]

Fixes: 862096a8 ("IB/ipoib: Add more rtnl_link_ops callbacks")
Cc: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: default avatarFeras Daoud <ferasda@mellanox.com>
Signed-off-by: default avatarErez Shitrit <erezsh@mellanox.com>
Signed-off-by: default avatarLeon Romanovsky <leon@kernel.org>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent b68b7e6b
...@@ -1478,12 +1478,14 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, ...@@ -1478,12 +1478,14 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
ret = ipoib_set_mode(dev, buf); ret = ipoib_set_mode(dev, buf);
rtnl_unlock(); /* The assumption is that the function ipoib_set_mode returned
* with the rtnl held by it, if not the value -EBUSY returned,
if (!ret) * then no need to rtnl_unlock
return count; */
if (ret != -EBUSY)
rtnl_unlock();
return ret; return (!ret || ret == -EBUSY) ? count : ret;
} }
static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode); static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode);
......
...@@ -236,8 +236,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) ...@@ -236,8 +236,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM; priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
ipoib_flush_paths(dev); ipoib_flush_paths(dev);
rtnl_lock(); return (!rtnl_trylock()) ? -EBUSY : 0;
return 0;
} }
if (!strcmp(buf, "datagram\n")) { if (!strcmp(buf, "datagram\n")) {
...@@ -246,8 +245,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) ...@@ -246,8 +245,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf)
dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu)); dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
rtnl_unlock(); rtnl_unlock();
ipoib_flush_paths(dev); ipoib_flush_paths(dev);
rtnl_lock(); return (!rtnl_trylock()) ? -EBUSY : 0;
return 0;
} }
return -EINVAL; return -EINVAL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment