drm/i915/gvt: Fix possible recursive locking issue

vfio_unpin_pages will hold a read semaphore however it is already hold in the same thread by vfio ioctl. It will cause below warning: [ 5102.127454] ============================================ [ 5102.133379] WARNING: possible recursive locking detected [ 5102.139304] 4.12.0-rc4+ #3 Not tainted [ 5102.143483] -------------------------------------------- [ 5102.149407] qemu-system-x86/1620 is trying to acquire lock: [ 5102.155624] (&container->group_lock){++++++}, at: [<ffffffff817768c6>] vfio_unpin_pages+0x96/0xf0 [ 5102.165626] but task is already holding lock: [ 5102.172134] (&container->group_lock){++++++}, at: [<ffffffff8177728f>] vfio_fops_unl_ioctl+0x5f/0x280 [ 5102.182522] other info that might help us debug this: [ 5102.189806] Possible unsafe locking scenario: [ 5102.196411] CPU0 [ 5102.199136] ---- [ 5102.201861] lock(&container->group_lock); [ 5102.206527] lock(&container->group_lock); [ 5102.211191] *** DEADLOCK *** [ 5102.217796] May be due to missing lock nesting notation [ 5102.225370] 3 locks held by qemu-system-x86/1620: [ 5102.230618] #0: (&container->group_lock){++++++}, at: [<ffffffff8177728f>] vfio_fops_unl_ioctl+0x5f/0x280 [ 5102.241482] #1: (&(&iommu->notifier)->rwsem){++++..}, at: [<ffffffff810de775>] __blocking_notifier_call_chain+0x35/0x70 [ 5102.253713] #2: (&vgpu->vdev.cache_lock){+.+...}, at: [<ffffffff8157b007>] intel_vgpu_iommu_notifier+0x77/0x120 [ 5102.265163] stack backtrace: [ 5102.270022] CPU: 5 PID: 1620 Comm: qemu-system-x86 Not tainted 4.12.0-rc4+ #3 [ 5102.277991] Hardware name: Intel Corporation S1200RP/S1200RP, BIOS S1200RP.86B.03.01.APER.061220151418 06/12/2015 [ 5102.289445] Call Trace: [ 5102.292175] dump_stack+0x85/0xc7 [ 5102.295871] validate_chain.isra.21+0x9da/0xaf0 [ 5102.300925] __lock_acquire+0x405/0x820 [ 5102.305202] lock_acquire+0xc7/0x220 [ 5102.309191] ? vfio_unpin_pages+0x96/0xf0 [ 5102.313666] down_read+0x2b/0x50 [ 5102.317259] ? vfio_unpin_pages+0x96/0xf0 [ 5102.321732] vfio_unpin_pages+0x96/0xf0 [ 5102.326024] intel_vgpu_iommu_notifier+0xe5/0x120 [ 5102.331283] notifier_call_chain+0x4a/0x70 [ 5102.335851] __blocking_notifier_call_chain+0x4d/0x70 [ 5102.341490] blocking_notifier_call_chain+0x16/0x20 [ 5102.346935] vfio_iommu_type1_ioctl+0x87b/0x920 [ 5102.351994] vfio_fops_unl_ioctl+0x81/0x280 [ 5102.356660] ? __fget+0xf0/0x210 [ 5102.360261] do_vfs_ioctl+0x93/0x6a0 [ 5102.364247] ? __fget+0x111/0x210 [ 5102.367942] SyS_ioctl+0x41/0x70 [ 5102.371542] entry_SYSCALL_64_fastpath+0x1f/0xbe put the vfio_unpin_pages in a workqueue can fix this. v2: - use for style instead of do{}while(1). (Zhenyu) v3: - rename gvt_cache_mark to gvt_cache_mark_remove. (Zhenyu) Fixes: 659643f7 ("drm/i915/gvt/kvmgt: add vfio/mdev support to KVMGT") Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com> Cc: Zhenyu Wang <zhenyuw@linux.intel.com> Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>

drm/i915/gvt: Fix possible recursive locking issue
vfio_unpin_pages will hold a read semaphore however it is already hold in the same thread by vfio ioctl. It will cause below warning: [ 5102.127454] ============================================ [ 5102.133379] WARNING: possible recursive locking detected [ 5102.139304] 4.12.0-rc4+ #3 Not tainted [ 5102.143483] -------------------------------------------- [ 5102.149407] qemu-system-x86/1620 is trying to acquire lock: [ 5102.155624] (&container->group_lock){++++++}, at: [<ffffffff817768c6>] vfio_unpin_pages+0x96/0xf0 [ 5102.165626] but task is already holding lock: [ 5102.172134] (&container->group_lock){++++++}, at: [<ffffffff8177728f>] vfio_fops_unl_ioctl+0x5f/0x280 [ 5102.182522] other info that might help us debug this: [ 5102.189806] Possible unsafe locking scenario: [ 5102.196411] CPU0 [ 5102.199136] ---- [ 5102.201861] lock(&container->group_lock); [ 5102.206527] lock(&container->group_lock); [ 5102.211191] *** DEADLOCK *** [ 5102.217796] May be due to missing lock nesting notation [ 5102.225370] 3 locks held by qemu-system-x86/1620: [ 5102.230618] #0: (&container->group_lock){++++++}, at: [<ffffffff8177728f>] vfio_fops_unl_ioctl+0x5f/0x280 [ 5102.241482] #1: (&(&iommu->notifier)->rwsem){++++..}, at: [<ffffffff810de775>] __blocking_notifier_call_chain+0x35/0x70 [ 5102.253713] #2: (&vgpu->vdev.cache_lock){+.+...}, at: [<ffffffff8157b007>] intel_vgpu_iommu_notifier+0x77/0x120 [ 5102.265163] stack backtrace: [ 5102.270022] CPU: 5 PID: 1620 Comm: qemu-system-x86 Not tainted 4.12.0-rc4+ #3 [ 5102.277991] Hardware name: Intel Corporation S1200RP/S1200RP, BIOS S1200RP.86B.03.01.APER.061220151418 06/12/2015 [ 5102.289445] Call Trace: [ 5102.292175] dump_stack+0x85/0xc7 [ 5102.295871] validate_chain.isra.21+0x9da/0xaf0 [ 5102.300925] __lock_acquire+0x405/0x820 [ 5102.305202] lock_acquire+0xc7/0x220 [ 5102.309191] ? vfio_unpin_pages+0x96/0xf0 [ 5102.313666] down_read+0x2b/0x50 [ 5102.317259] ? vfio_unpin_pages+0x96/0xf0 [ 5102.321732] vfio_unpin_pages+0x96/0xf0 [ 5102.326024] intel_vgpu_iommu_notifier+0xe5/0x120 [ 5102.331283] notifier_call_chain+0x4a/0x70 [ 5102.335851] __blocking_notifier_call_chain+0x4d/0x70 [ 5102.341490] blocking_notifier_call_chain+0x16/0x20 [ 5102.346935] vfio_iommu_type1_ioctl+0x87b/0x920 [ 5102.351994] vfio_fops_unl_ioctl+0x81/0x280 [ 5102.356660] ? __fget+0xf0/0x210 [ 5102.360261] do_vfs_ioctl+0x93/0x6a0 [ 5102.364247] ? __fget+0x111/0x210 [ 5102.367942] SyS_ioctl+0x41/0x70 [ 5102.371542] entry_SYSCALL_64_fastpath+0x1f/0xbe put the vfio_unpin_pages in a workqueue can fix this. v2: - use for style instead of do{}while(1). (Zhenyu) v3: - rename gvt_cache_mark to gvt_cache_mark_remove. (Zhenyu) Fixes: 659643f7 ("drm/i915/gvt/kvmgt: add vfio/mdev support to KVMGT") Signed-off-by: Chuanxiao Dong <chuanxiao.dong@intel.com> Cc: Zhenyu Wang <zhenyuw@linux.intel.com> Cc: stable@vger.kernel.org # v4.10+ Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
62d02fd1 · Chuanxiao Dong · Zhenyu Wang · e274086e · 62d02fd1 · 62d02fd1
Commit 62d02fd1 authored Jun 26, 2017 by Chuanxiao Dong Committed by Zhenyu Wang Jun 26, 2017
Hide whitespace changes
Inline Side-by-side

Showing with 48 additions and 10 deletions

drivers/gpu/drm/i915/gvt/gvt.h drivers/gpu/drm/i915/gvt/gvt.h +3 -0

drivers/gpu/drm/i915/gvt/kvmgt.c drivers/gpu/drm/i915/gvt/kvmgt.c +45 -10

No files found.
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -183,6 +183,9 @@ struct intel_vgpu {
 		struct kvm *kvm;
 		struct work_struct release_work;
 		atomic_t released;
+		struct work_struct unpin_work;
+		spinlock_t unpin_lock; /* To protect unpin_list */
+		struct list_head unpin_list;
 	} vdev;
 #endif
 };

--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -78,6 +78,7 @@ struct gvt_dma {
 	struct rb_node node;
 	gfn_t gfn;
 	unsigned long iova;
+	struct list_head list;
 };
 static inline bool handle_valid(unsigned long handle)
@@ -166,6 +167,7 @@ static void gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
 	new->gfn = gfn;
 	new->iova = iova;
+	INIT_LIST_HEAD(&new->list);
 	mutex_lock(&vgpu->vdev.cache_lock);
 	while (*link) {
@@ -197,26 +199,52 @@ static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
 	kfree(entry);
 }
-static void gvt_cache_remove(struct intel_vgpu *vgpu, gfn_t gfn)
+static void intel_vgpu_unpin_work(struct work_struct *work)
 {
+	struct intel_vgpu *vgpu = container_of(work, struct intel_vgpu,
+					       vdev.unpin_work);
 	struct device *dev = mdev_dev(vgpu->vdev.mdev);
 	struct gvt_dma *this;
-	unsigned long g1;
+	unsigned long gfn;
-	int rc;
+	for (;;) {
+		spin_lock(&vgpu->vdev.unpin_lock);
+		if (list_empty(&vgpu->vdev.unpin_list)) {
+			spin_unlock(&vgpu->vdev.unpin_lock);
+			break;
+		}
+		this = list_first_entry(&vgpu->vdev.unpin_list,
+					struct gvt_dma, list);
+		list_del(&this->list);
+		spin_unlock(&vgpu->vdev.unpin_lock);
+		gfn = this->gfn;
+		vfio_unpin_pages(dev, &gfn, 1);
+		kfree(this);
+	}
+}
+static bool gvt_cache_mark_remove(struct intel_vgpu *vgpu, gfn_t gfn)
+{
+	struct gvt_dma *this;
 	mutex_lock(&vgpu->vdev.cache_lock);
 	this  = __gvt_cache_find(vgpu, gfn);
 	if (!this) {
 		mutex_unlock(&vgpu->vdev.cache_lock);
-		return;
+		return false;
 	}
-	g1 = gfn;
 	gvt_dma_unmap_iova(vgpu, this->iova);
-	rc = vfio_unpin_pages(dev, &g1, 1);
+	/* remove this from rb tree */
-	WARN_ON(rc != 1);
+	rb_erase(&this->node, &vgpu->vdev.cache);
-	__gvt_cache_remove_entry(vgpu, this);
 	mutex_unlock(&vgpu->vdev.cache_lock);
+	/* put this to the unpin_list */
+	spin_lock(&vgpu->vdev.unpin_lock);
+	list_move_tail(&this->list, &vgpu->vdev.unpin_list);
+	spin_unlock(&vgpu->vdev.unpin_lock);
+	return true;
 }
 static void gvt_cache_init(struct intel_vgpu *vgpu)
@@ -453,6 +481,9 @@ static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
 	}
 	INIT_WORK(&vgpu->vdev.release_work, intel_vgpu_release_work);
+	INIT_WORK(&vgpu->vdev.unpin_work, intel_vgpu_unpin_work);
+	spin_lock_init(&vgpu->vdev.unpin_lock);
+	INIT_LIST_HEAD(&vgpu->vdev.unpin_list);
 	vgpu->vdev.mdev = mdev;
 	mdev_set_drvdata(mdev, vgpu);
@@ -482,6 +513,7 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
 	struct intel_vgpu *vgpu = container_of(nb,
 					struct intel_vgpu,
 					vdev.iommu_notifier);
+	bool sched_unmap = false;
 	if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
 		struct vfio_iommu_type1_dma_unmap *unmap = data;
@@ -491,7 +523,10 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
 		end_gfn = gfn + unmap->size / PAGE_SIZE;
 		while (gfn < end_gfn)
-			gvt_cache_remove(vgpu, gfn++);
+			sched_unmap |= gvt_cache_mark_remove(vgpu, gfn++);
+		if (sched_unmap)
+			schedule_work(&vgpu->vdev.unpin_work);
 	}
 	return NOTIFY_OK;