Commit acd7c8fe authored by Tadeusz Struk's avatar Tadeusz Struk Committed by Doug Ledford

IB/hfi1: Fix an Oops on pci device force remove

This patch fixes an Oops on device unbind, when the device is used
by a PSM user process. PSM processes access device resources which
are freed on device removal. Similar protection exists in uverbs
in ib_core for Verbs clients, but PSM doesn't use ib_uverbs hence
a separate protection is required for PSM clients.

Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Reviewed-by: default avatarIra Weiny <ira.weiny@intel.com>
Reviewed-by: default avatarDean Luick <dean.luick@intel.com>
Reviewed-by: default avatarDennis Dalessandro <dennis.dalessandro@intel.com>
Signed-off-by: default avatarTadeusz Struk <tadeusz.struk@intel.com>
Signed-off-by: default avatarDoug Ledford <dledford@redhat.com>
parent d9ac4555
...@@ -14691,6 +14691,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, ...@@ -14691,6 +14691,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
if (ret) if (ret)
goto bail_free_cntrs; goto bail_free_cntrs;
init_completion(&dd->user_comp);
/* The user refcount starts with one to inidicate an active device */
atomic_set(&dd->user_refcount, 1);
goto bail; goto bail;
bail_free_rcverr: bail_free_rcverr:
......
...@@ -172,6 +172,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) ...@@ -172,6 +172,9 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
struct hfi1_devdata, struct hfi1_devdata,
user_cdev); user_cdev);
if (!atomic_inc_not_zero(&dd->user_refcount))
return -ENXIO;
/* Just take a ref now. Not all opens result in a context assign */ /* Just take a ref now. Not all opens result in a context assign */
kobject_get(&dd->kobj); kobject_get(&dd->kobj);
...@@ -183,11 +186,17 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) ...@@ -183,11 +186,17 @@ static int hfi1_file_open(struct inode *inode, struct file *fp)
fd->rec_cpu_num = -1; /* no cpu affinity by default */ fd->rec_cpu_num = -1; /* no cpu affinity by default */
fd->mm = current->mm; fd->mm = current->mm;
atomic_inc(&fd->mm->mm_count); atomic_inc(&fd->mm->mm_count);
}
fp->private_data = fd; fp->private_data = fd;
} else {
fp->private_data = NULL;
if (atomic_dec_and_test(&dd->user_refcount))
complete(&dd->user_comp);
return fd ? 0 : -ENOMEM; return -ENOMEM;
}
return 0;
} }
static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
...@@ -798,6 +807,10 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) ...@@ -798,6 +807,10 @@ static int hfi1_file_close(struct inode *inode, struct file *fp)
done: done:
mmdrop(fdata->mm); mmdrop(fdata->mm);
kobject_put(&dd->kobj); kobject_put(&dd->kobj);
if (atomic_dec_and_test(&dd->user_refcount))
complete(&dd->user_comp);
kfree(fdata); kfree(fdata);
return 0; return 0;
} }
......
...@@ -1174,6 +1174,10 @@ struct hfi1_devdata { ...@@ -1174,6 +1174,10 @@ struct hfi1_devdata {
spinlock_t aspm_lock; spinlock_t aspm_lock;
/* Number of verbs contexts which have disabled ASPM */ /* Number of verbs contexts which have disabled ASPM */
atomic_t aspm_disabled_cnt; atomic_t aspm_disabled_cnt;
/* Keeps track of user space clients */
atomic_t user_refcount;
/* Used to wait for outstanding user space clients before dev removal */
struct completion user_comp;
struct hfi1_affinity *affinity; struct hfi1_affinity *affinity;
struct rhashtable sdma_rht; struct rhashtable sdma_rht;
......
...@@ -1538,12 +1538,31 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) ...@@ -1538,12 +1538,31 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
return ret; return ret;
} }
static void wait_for_clients(struct hfi1_devdata *dd)
{
/*
* Remove the device init value and complete the device if there is
* no clients or wait for active clients to finish.
*/
if (atomic_dec_and_test(&dd->user_refcount))
complete(&dd->user_comp);
wait_for_completion(&dd->user_comp);
}
static void remove_one(struct pci_dev *pdev) static void remove_one(struct pci_dev *pdev)
{ {
struct hfi1_devdata *dd = pci_get_drvdata(pdev); struct hfi1_devdata *dd = pci_get_drvdata(pdev);
/* close debugfs files before ib unregister */ /* close debugfs files before ib unregister */
hfi1_dbg_ibdev_exit(&dd->verbs_dev); hfi1_dbg_ibdev_exit(&dd->verbs_dev);
/* remove the /dev hfi1 interface */
hfi1_device_remove(dd);
/* wait for existing user space clients to finish */
wait_for_clients(dd);
/* unregister from IB core */ /* unregister from IB core */
hfi1_unregister_ib_device(dd); hfi1_unregister_ib_device(dd);
...@@ -1558,8 +1577,6 @@ static void remove_one(struct pci_dev *pdev) ...@@ -1558,8 +1577,6 @@ static void remove_one(struct pci_dev *pdev)
/* wait until all of our (qsfp) queue_work() calls complete */ /* wait until all of our (qsfp) queue_work() calls complete */
flush_workqueue(ib_wq); flush_workqueue(ib_wq);
hfi1_device_remove(dd);
postinit_cleanup(dd); postinit_cleanup(dd);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment