drm/i915: Fix locking around GuC firmware load

The GuC firmware load requires struct_mutex to create a GEM object, but this collides badly with request_firmware. Move struct_mutex locking down into the loader itself, so we don't hold it across the entire load process, including request_firmware. [ 20.451400] ====================================================== [ 20.451420] [ INFO: possible circular locking dependency detected ] [ 20.451441] 4.3.0-rc5+ #1 Tainted: G W [ 20.451457] ------------------------------------------------------- [ 20.451477] plymouthd/371 is trying to acquire lock: [ 20.451494] (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0093c62>] drm_gem_mmap+0x112/0x290 [drm] [ 20.451538] but task is already holding lock: [ 20.451557] (&mm->mmap_sem){++++++}, at: [<ffffffff811fd9ac>] vm_mmap_pgoff+0x8c/0xf0 [ 20.451591] which lock already depends on the new lock. [ 20.451617] the existing dependency chain (in reverse order) is: [ 20.451640] -> #3 (&mm->mmap_sem){++++++}: [ 20.451661] [<ffffffff8110644e>] lock_acquire+0xce/0x1c0 [ 20.451683] [<ffffffff8120ec9a>] __might_fault+0x7a/0xa0 [ 20.451705] [<ffffffff8127e34e>] filldir+0x9e/0x130 [ 20.451726] [<ffffffff81295b86>] dcache_readdir+0x186/0x230 [ 20.451748] [<ffffffff8127e117>] iterate_dir+0x97/0x130 [ 20.451769] [<ffffffff8127e66a>] SyS_getdents+0x9a/0x130 [ 20.451790] [<ffffffff8184f2f2>] entry_SYSCALL_64_fastpath+0x12/0x76 [ 20.451829] -> #2 (&sb->s_type->i_mutex_key#2){+.+.+.}: [ 20.451852] [<ffffffff8110644e>] lock_acquire+0xce/0x1c0 [ 20.451872] [<ffffffff8184b516>] mutex_lock_nested+0x86/0x400 [ 20.451893] [<ffffffff81277790>] walk_component+0x1d0/0x2a0 [ 20.451914] [<ffffffff812779f0>] link_path_walk+0x190/0x5a0 [ 20.451935] [<ffffffff8127803b>] path_openat+0xab/0x1260 [ 20.451955] [<ffffffff8127a651>] do_filp_open+0x91/0x100 [ 20.451975] [<ffffffff81267e67>] file_open_name+0xf7/0x150 [ 20.451995] [<ffffffff81267ef3>] filp_open+0x33/0x60 [ 20.452014] [<ffffffff8157e1e7>] _request_firmware+0x277/0x880 [ 20.452038] [<ffffffff8157e9e4>] request_firmware_work_func+0x34/0x80 [ 20.452060] [<ffffffff810c7020>] process_one_work+0x230/0x680 [ 20.452082] [<ffffffff810c74be>] worker_thread+0x4e/0x450 [ 20.452102] [<ffffffff810ce511>] kthread+0x101/0x120 [ 20.452121] [<ffffffff8184f66f>] ret_from_fork+0x3f/0x70 [ 20.452140] -> #1 (umhelper_sem){++++.+}: [ 20.452159] [<ffffffff8110644e>] lock_acquire+0xce/0x1c0 [ 20.452178] [<ffffffff8184c5c1>] down_read+0x51/0xa0 [ 20.452197] [<ffffffff810c203b>] usermodehelper_read_trylock+0x5b/0x130 [ 20.452221] [<ffffffff8157e147>] _request_firmware+0x1d7/0x880 [ 20.452242] [<ffffffff8157e821>] request_firmware+0x31/0x50 [ 20.452262] [<ffffffffa01b54a4>] intel_guc_ucode_init+0xf4/0x400 [i915] [ 20.452305] [<ffffffffa0213913>] i915_driver_load+0xd63/0x16e0 [i915] [ 20.452343] [<ffffffffa00987d9>] drm_dev_register+0xa9/0xc0 [drm] [ 20.452369] [<ffffffffa009ae3d>] drm_get_pci_dev+0x8d/0x1e0 [drm] [ 20.452396] [<ffffffffa01521e4>] i915_pci_probe+0x34/0x50 [i915] [ 20.452421] [<ffffffff81464675>] local_pci_probe+0x45/0xa0 [ 20.452443] [<ffffffff81465a6d>] pci_device_probe+0xfd/0x140 [ 20.452464] [<ffffffff8156a2e4>] driver_probe_device+0x224/0x480 [ 20.452486] [<ffffffff8156a5c8>] __driver_attach+0x88/0x90 [ 20.452505] [<ffffffff81567cf3>] bus_for_each_dev+0x73/0xc0 [ 20.452526] [<ffffffff81569a7e>] driver_attach+0x1e/0x20 [ 20.452546] [<ffffffff815695ae>] bus_add_driver+0x1ee/0x280 [ 20.452566] [<ffffffff8156b100>] driver_register+0x60/0xe0 [ 20.453197] [<ffffffff81464050>] __pci_register_driver+0x60/0x70 [ 20.453845] [<ffffffffa009b070>] drm_pci_init+0xe0/0x110 [drm] [ 20.454497] [<ffffffffa027f092>] 0xffffffffa027f092 [ 20.455156] [<ffffffff81002123>] do_one_initcall+0xb3/0x200 [ 20.455796] [<ffffffff811d8c01>] do_init_module+0x5f/0x1e7 [ 20.456434] [<ffffffff8114c4e6>] load_module+0x2126/0x27d0 [ 20.457071] [<ffffffff8114cdf9>] SyS_finit_module+0xb9/0xf0 [ 20.457738] [<ffffffff8184f2f2>] entry_SYSCALL_64_fastpath+0x12/0x76 [ 20.458370] -> #0 (&dev->struct_mutex){+.+.+.}: [ 20.459773] [<ffffffff8110584f>] __lock_acquire+0x191f/0x1ba0 [ 20.460451] [<ffffffff8110644e>] lock_acquire+0xce/0x1c0 [ 20.461074] [<ffffffffa0093c88>] drm_gem_mmap+0x138/0x290 [drm] [ 20.461693] [<ffffffff8121a5ec>] mmap_region+0x3ec/0x670 [ 20.462298] [<ffffffff8121abb2>] do_mmap+0x342/0x420 [ 20.462901] [<ffffffff811fd9d2>] vm_mmap_pgoff+0xb2/0xf0 [ 20.463532] [<ffffffff81218f62>] SyS_mmap_pgoff+0x1f2/0x290 [ 20.464118] [<ffffffff8102187b>] SyS_mmap+0x1b/0x30 [ 20.464702] [<ffffffff8184f2f2>] entry_SYSCALL_64_fastpath+0x12/0x76 [ 20.465289] other info that might help us debug this: [ 20.467179] Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#2 --> &mm->mmap_sem [ 20.468928] Possible unsafe locking scenario: [ 20.470161] CPU0 CPU1 [ 20.470745] ---- ---- [ 20.471325] lock(&mm->mmap_sem); [ 20.471902] lock(&sb->s_type->i_mutex_key#2); [ 20.472538] lock(&mm->mmap_sem); [ 20.473118] lock(&dev->struct_mutex); [ 20.473704] *** DEADLOCK *** Signed-off-by: Daniel Stone <daniels@collabora.com> Signed-off-by: Dave Airlie <airlied@redhat.com>

drm/i915: Fix locking around GuC firmware load
The GuC firmware load requires struct_mutex to create a GEM object, but this collides badly with request_firmware. Move struct_mutex locking down into the loader itself, so we don't hold it across the entire load process, including request_firmware. [ 20.451400] ====================================================== [ 20.451420] [ INFO: possible circular locking dependency detected ] [ 20.451441] 4.3.0-rc5+ #1 Tainted: G W [ 20.451457] ------------------------------------------------------- [ 20.451477] plymouthd/371 is trying to acquire lock: [ 20.451494] (&dev->struct_mutex){+.+.+.}, at: [<ffffffffa0093c62>] drm_gem_mmap+0x112/0x290 [drm] [ 20.451538] but task is already holding lock: [ 20.451557] (&mm->mmap_sem){++++++}, at: [<ffffffff811fd9ac>] vm_mmap_pgoff+0x8c/0xf0 [ 20.451591] which lock already depends on the new lock. [ 20.451617] the existing dependency chain (in reverse order) is: [ 20.451640] -> #3 (&mm->mmap_sem){++++++}: [ 20.451661] [<ffffffff8110644e>] lock_acquire+0xce/0x1c0 [ 20.451683] [<ffffffff8120ec9a>] __might_fault+0x7a/0xa0 [ 20.451705] [<ffffffff8127e34e>] filldir+0x9e/0x130 [ 20.451726] [<ffffffff81295b86>] dcache_readdir+0x186/0x230 [ 20.451748] [<ffffffff8127e117>] iterate_dir+0x97/0x130 [ 20.451769] [<ffffffff8127e66a>] SyS_getdents+0x9a/0x130 [ 20.451790] [<ffffffff8184f2f2>] entry_SYSCALL_64_fastpath+0x12/0x76 [ 20.451829] -> #2 (&sb->s_type->i_mutex_key#2){+.+.+.}: [ 20.451852] [<ffffffff8110644e>] lock_acquire+0xce/0x1c0 [ 20.451872] [<ffffffff8184b516>] mutex_lock_nested+0x86/0x400 [ 20.451893] [<ffffffff81277790>] walk_component+0x1d0/0x2a0 [ 20.451914] [<ffffffff812779f0>] link_path_walk+0x190/0x5a0 [ 20.451935] [<ffffffff8127803b>] path_openat+0xab/0x1260 [ 20.451955] [<ffffffff8127a651>] do_filp_open+0x91/0x100 [ 20.451975] [<ffffffff81267e67>] file_open_name+0xf7/0x150 [ 20.451995] [<ffffffff81267ef3>] filp_open+0x33/0x60 [ 20.452014] [<ffffffff8157e1e7>] _request_firmware+0x277/0x880 [ 20.452038] [<ffffffff8157e9e4>] request_firmware_work_func+0x34/0x80 [ 20.452060] [<ffffffff810c7020>] process_one_work+0x230/0x680 [ 20.452082] [<ffffffff810c74be>] worker_thread+0x4e/0x450 [ 20.452102] [<ffffffff810ce511>] kthread+0x101/0x120 [ 20.452121] [<ffffffff8184f66f>] ret_from_fork+0x3f/0x70 [ 20.452140] -> #1 (umhelper_sem){++++.+}: [ 20.452159] [<ffffffff8110644e>] lock_acquire+0xce/0x1c0 [ 20.452178] [<ffffffff8184c5c1>] down_read+0x51/0xa0 [ 20.452197] [<ffffffff810c203b>] usermodehelper_read_trylock+0x5b/0x130 [ 20.452221] [<ffffffff8157e147>] _request_firmware+0x1d7/0x880 [ 20.452242] [<ffffffff8157e821>] request_firmware+0x31/0x50 [ 20.452262] [<ffffffffa01b54a4>] intel_guc_ucode_init+0xf4/0x400 [i915] [ 20.452305] [<ffffffffa0213913>] i915_driver_load+0xd63/0x16e0 [i915] [ 20.452343] [<ffffffffa00987d9>] drm_dev_register+0xa9/0xc0 [drm] [ 20.452369] [<ffffffffa009ae3d>] drm_get_pci_dev+0x8d/0x1e0 [drm] [ 20.452396] [<ffffffffa01521e4>] i915_pci_probe+0x34/0x50 [i915] [ 20.452421] [<ffffffff81464675>] local_pci_probe+0x45/0xa0 [ 20.452443] [<ffffffff81465a6d>] pci_device_probe+0xfd/0x140 [ 20.452464] [<ffffffff8156a2e4>] driver_probe_device+0x224/0x480 [ 20.452486] [<ffffffff8156a5c8>] __driver_attach+0x88/0x90 [ 20.452505] [<ffffffff81567cf3>] bus_for_each_dev+0x73/0xc0 [ 20.452526] [<ffffffff81569a7e>] driver_attach+0x1e/0x20 [ 20.452546] [<ffffffff815695ae>] bus_add_driver+0x1ee/0x280 [ 20.452566] [<ffffffff8156b100>] driver_register+0x60/0xe0 [ 20.453197] [<ffffffff81464050>] __pci_register_driver+0x60/0x70 [ 20.453845] [<ffffffffa009b070>] drm_pci_init+0xe0/0x110 [drm] [ 20.454497] [<ffffffffa027f092>] 0xffffffffa027f092 [ 20.455156] [<ffffffff81002123>] do_one_initcall+0xb3/0x200 [ 20.455796] [<ffffffff811d8c01>] do_init_module+0x5f/0x1e7 [ 20.456434] [<ffffffff8114c4e6>] load_module+0x2126/0x27d0 [ 20.457071] [<ffffffff8114cdf9>] SyS_finit_module+0xb9/0xf0 [ 20.457738] [<ffffffff8184f2f2>] entry_SYSCALL_64_fastpath+0x12/0x76 [ 20.458370] -> #0 (&dev->struct_mutex){+.+.+.}: [ 20.459773] [<ffffffff8110584f>] __lock_acquire+0x191f/0x1ba0 [ 20.460451] [<ffffffff8110644e>] lock_acquire+0xce/0x1c0 [ 20.461074] [<ffffffffa0093c88>] drm_gem_mmap+0x138/0x290 [drm] [ 20.461693] [<ffffffff8121a5ec>] mmap_region+0x3ec/0x670 [ 20.462298] [<ffffffff8121abb2>] do_mmap+0x342/0x420 [ 20.462901] [<ffffffff811fd9d2>] vm_mmap_pgoff+0xb2/0xf0 [ 20.463532] [<ffffffff81218f62>] SyS_mmap_pgoff+0x1f2/0x290 [ 20.464118] [<ffffffff8102187b>] SyS_mmap+0x1b/0x30 [ 20.464702] [<ffffffff8184f2f2>] entry_SYSCALL_64_fastpath+0x12/0x76 [ 20.465289] other info that might help us debug this: [ 20.467179] Chain exists of: &dev->struct_mutex --> &sb->s_type->i_mutex_key#2 --> &mm->mmap_sem [ 20.468928] Possible unsafe locking scenario: [ 20.470161] CPU0 CPU1 [ 20.470745] ---- ---- [ 20.471325] lock(&mm->mmap_sem); [ 20.471902] lock(&sb->s_type->i_mutex_key#2); [ 20.472538] lock(&mm->mmap_sem); [ 20.473118] lock(&dev->struct_mutex); [ 20.473704] *** DEADLOCK *** Signed-off-by: Daniel Stone <daniels@collabora.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
bf248ca1 · Daniel Stone · Dave Airlie · 1c431cb4 · bf248ca1 · bf248ca1
Commit bf248ca1 authored Nov 03, 2015 by Daniel Stone Committed by Dave Airlie Nov 04, 2015
Show whitespace changes
Inline Side-by-side

Showing with 5 additions and 8 deletions

drivers/gpu/drm/i915/i915_dma.c drivers/gpu/drm/i915/i915_dma.c +1 -6

drivers/gpu/drm/i915/intel_guc_loader.c drivers/gpu/drm/i915/intel_guc_loader.c +4 -2

No files found.
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -406,10 +406,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 	 * working irqs for e.g. gmbus and dp aux transfers. */
 	intel_modeset_init(dev);
-	/* intel_guc_ucode_init() needs the mutex to allocate GEM objects */
-	mutex_lock(&dev->struct_mutex);
 	intel_guc_ucode_init(dev);
-	mutex_unlock(&dev->struct_mutex);
 	ret = i915_gem_init(dev);
 	if (ret)
@@ -452,9 +449,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
 cleanup_irq:
-	mutex_lock(&dev->struct_mutex);
 	intel_guc_ucode_fini(dev);
-	mutex_unlock(&dev->struct_mutex);
 	drm_irq_uninstall(dev);
 cleanup_gem_stolen:
 	i915_gem_cleanup_stolen(dev);
@@ -1193,8 +1188,8 @@ int i915_driver_unload(struct drm_device *dev)
 	/* Flush any outstanding unpin_work. */
 	flush_workqueue(dev_priv->wq);
-	mutex_lock(&dev->struct_mutex);
 	intel_guc_ucode_fini(dev);
+	mutex_lock(&dev->struct_mutex);
 	i915_gem_cleanup_ringbuffer(dev);
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);

--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -504,7 +504,9 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw)
 			guc_fw->guc_fw_major_found, guc_fw->guc_fw_minor_found,
 			guc_fw->guc_fw_major_wanted, guc_fw->guc_fw_minor_wanted);
+	mutex_lock(&dev->struct_mutex);
 	obj = i915_gem_object_create_from_data(dev, fw->data, fw->size);
+	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR_OR_NULL(obj)) {
 		err = obj ? PTR_ERR(obj) : -ENOMEM;
 		goto fail;
@@ -540,8 +542,6 @@ static void guc_fw_fetch(struct drm_device *dev, struct intel_guc_fw *guc_fw)
 * @dev:	drm device
 *
 * Called early during driver load, but after GEM is initialised.
- * The device struct_mutex must be held by the caller, as we're
- * going to allocate a GEM object to hold the firmware image.
 *
 * The firmware will be transferred to the GuC's memory later,
 * when intel_guc_ucode_load() is called.
@@ -598,9 +598,11 @@ void intel_guc_ucode_fini(struct drm_device *dev)
 	direct_interrupts_to_host(dev_priv);
 	i915_guc_submission_fini(dev);
+	mutex_lock(&dev->struct_mutex);
 	if (guc_fw->guc_fw_obj)
 		drm_gem_object_unreference(&guc_fw->guc_fw_obj->base);
 	guc_fw->guc_fw_obj = NULL;
+	mutex_unlock(&dev->struct_mutex);
 	guc_fw->guc_fw_fetch_status = GUC_FIRMWARE_NONE;
 }