Commit 70deeadd authored by Sagar Arun Kamble's avatar Sagar Arun Kamble Committed by Chris Wilson

drm/i915/guc: Fix lockdep due to log relay channel handling under struct_mutex

This patch fixes lockdep issue due to circular locking dependency of
struct_mutex, i_mutex_key, mmap_sem, relay_channels_mutex.
For GuC log relay channel we create debugfs file that requires i_mutex_key
lock and we are doing that under struct_mutex. So we introduced newer
dependency as:
    &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem
However, there is dependency from mmap_sem to struct_mutex. Hence we
separate the relay create/destroy operation from under struct_mutex.
Also added runtime check of relay buffer status.
Reviewed-by: default avatarChris Wilson <chris@chris-wilson.co.uk>

======================================================
WARNING: possible circular locking dependency detected
4.15.0-rc6-CI-Patchwork_7614+ #1 Not tainted
------------------------------------------------------
debugfs_test/1388 is trying to acquire lock:
 (&dev->struct_mutex){+.+.}, at: [<00000000d5e1d915>] i915_mutex_lock_interruptible+0x47/0x130 [i915]

but task is already holding lock:
 (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #3 (&mm->mmap_sem){++++}:
       _copy_to_user+0x1e/0x70
       filldir+0x8c/0xf0
       dcache_readdir+0xeb/0x160
       iterate_dir+0xdc/0x140
       SyS_getdents+0xa0/0x130
       entry_SYSCALL_64_fastpath+0x1c/0x89

-> #2 (&sb->s_type->i_mutex_key#3){++++}:
       start_creating+0x59/0x110
       __debugfs_create_file+0x2e/0xe0
       relay_create_buf_file+0x62/0x80
       relay_late_setup_files+0x84/0x250
       guc_log_late_setup+0x4f/0x110 [i915]
       i915_guc_log_register+0x32/0x40 [i915]
       i915_driver_load+0x7b6/0x1720 [i915]
       i915_pci_probe+0x2e/0x90 [i915]
       pci_device_probe+0x9c/0x120
       driver_probe_device+0x2a3/0x480
       __driver_attach+0xd9/0xe0
       bus_for_each_dev+0x57/0x90
       bus_add_driver+0x168/0x260
       driver_register+0x52/0xc0
       do_one_initcall+0x39/0x150
       do_init_module+0x56/0x1ef
       load_module+0x231c/0x2d70
       SyS_finit_module+0xa5/0xe0
       entry_SYSCALL_64_fastpath+0x1c/0x89

-> #1 (relay_channels_mutex){+.+.}:
       relay_open+0x12c/0x2b0
       intel_guc_log_runtime_create+0xab/0x230 [i915]
       intel_guc_init+0x81/0x120 [i915]
       intel_uc_init+0x29/0xa0 [i915]
       i915_gem_init+0x182/0x530 [i915]
       i915_driver_load+0xaa9/0x1720 [i915]
       i915_pci_probe+0x2e/0x90 [i915]
       pci_device_probe+0x9c/0x120
       driver_probe_device+0x2a3/0x480
       __driver_attach+0xd9/0xe0
       bus_for_each_dev+0x57/0x90
       bus_add_driver+0x168/0x260
       driver_register+0x52/0xc0
       do_one_initcall+0x39/0x150
       do_init_module+0x56/0x1ef
       load_module+0x231c/0x2d70
       SyS_finit_module+0xa5/0xe0
       entry_SYSCALL_64_fastpath+0x1c/0x89

-> #0 (&dev->struct_mutex){+.+.}:
       __mutex_lock+0x81/0x9b0
       i915_mutex_lock_interruptible+0x47/0x130 [i915]
       i915_gem_fault+0x201/0x790 [i915]
       __do_fault+0x15/0x70
       __handle_mm_fault+0x677/0xdc0
       handle_mm_fault+0x14f/0x2f0
       __do_page_fault+0x2d1/0x560
       page_fault+0x4c/0x60

other info that might help us debug this:

Chain exists of:
  &dev->struct_mutex --> &sb->s_type->i_mutex_key#3 --> &mm->mmap_sem

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(&mm->mmap_sem);
                               lock(&sb->s_type->i_mutex_key#3);
                               lock(&mm->mmap_sem);
  lock(&dev->struct_mutex);

 *** DEADLOCK ***

1 lock held by debugfs_test/1388:
 #0:  (&mm->mmap_sem){++++}, at: [<0000000029a9c131>] __do_page_fault+0x106/0x560

stack backtrace:
CPU: 2 PID: 1388 Comm: debugfs_test Not tainted 4.15.0-rc6-CI-Patchwork_7614+ #1
Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./J4205-ITX, BIOS P1.10 09/29/2016
Call Trace:
 dump_stack+0x5f/0x86
 print_circular_bug.isra.18+0x1d0/0x2c0
 __lock_acquire+0x14ae/0x1b60
 ? lock_acquire+0xaf/0x200
 lock_acquire+0xaf/0x200
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 __mutex_lock+0x81/0x9b0
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? i915_mutex_lock_interruptible+0x47/0x130 [i915]
 i915_mutex_lock_interruptible+0x47/0x130 [i915]
 ? __pm_runtime_resume+0x4f/0x80
 i915_gem_fault+0x201/0x790 [i915]
 __do_fault+0x15/0x70
 ? _raw_spin_unlock+0x29/0x40
 __handle_mm_fault+0x677/0xdc0
 handle_mm_fault+0x14f/0x2f0
 __do_page_fault+0x2d1/0x560
 ? page_fault+0x36/0x60
 page_fault+0x4c/0x60

v2: Added lock protection to guc->log.runtime.relay_chan (Chris)
    Fixed locking inside guc_flush_logs uncovered by new lockdep.

v3: Locking guc_read_update_log_buffer entirely with relay_lock. (Chris)
    Prepared intel_guc_init_early. Moved relay_lock inside relay_create
    relay_destroy, relay_file_create, guc_read_update_log_buffer. (Michal)
    Removed struct_mutex lock around guc_log_flush and removed usage
    of guc_log_has_relay() from runtime_create path as it needs
    struct_mutex lock.

v4: Handle NULL relay sub buffer pointer earlier in read_update_log_buffer
    (Chris). Fixed comment suffix **/. (Michal)

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104693
Testcase: igt/debugfs_test/read_all_entries # with enable_guc=1 and guc_log_level=1
Signed-off-by: default avatarSagar Arun Kamble <sagar.a.kamble@intel.com>
Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Marta Lofstedt <marta.lofstedt@intel.com>
Cc: Michal Winiarski <michal.winiarski@intel.com>
Signed-off-by: default avatarChris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/1516808821-3638-3-git-send-email-sagar.a.kamble@intel.com
parent 1ed21cb4
...@@ -2467,7 +2467,6 @@ static int i915_guc_log_control_get(void *data, u64 *val) ...@@ -2467,7 +2467,6 @@ static int i915_guc_log_control_get(void *data, u64 *val)
static int i915_guc_log_control_set(void *data, u64 val) static int i915_guc_log_control_set(void *data, u64 val)
{ {
struct drm_i915_private *dev_priv = data; struct drm_i915_private *dev_priv = data;
int ret;
if (!HAS_GUC(dev_priv)) if (!HAS_GUC(dev_priv))
return -ENODEV; return -ENODEV;
...@@ -2475,16 +2474,7 @@ static int i915_guc_log_control_set(void *data, u64 val) ...@@ -2475,16 +2474,7 @@ static int i915_guc_log_control_set(void *data, u64 val)
if (!dev_priv->guc.log.vma) if (!dev_priv->guc.log.vma)
return -EINVAL; return -EINVAL;
ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); return i915_guc_log_control(dev_priv, val);
if (ret)
return ret;
intel_runtime_pm_get(dev_priv);
ret = i915_guc_log_control(dev_priv, val);
intel_runtime_pm_put(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex);
return ret;
} }
DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_control_fops, DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_control_fops,
......
...@@ -626,7 +626,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv) ...@@ -626,7 +626,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv)
i915_gem_contexts_fini(dev_priv); i915_gem_contexts_fini(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex); mutex_unlock(&dev_priv->drm.struct_mutex);
intel_uc_fini_wq(dev_priv); intel_uc_fini_misc(dev_priv);
i915_gem_cleanup_userptr(dev_priv); i915_gem_cleanup_userptr(dev_priv);
i915_gem_drain_freed_objects(dev_priv); i915_gem_drain_freed_objects(dev_priv);
......
...@@ -5272,7 +5272,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) ...@@ -5272,7 +5272,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
if (ret) if (ret)
return ret; return ret;
ret = intel_uc_init_wq(dev_priv); ret = intel_uc_init_misc(dev_priv);
if (ret) if (ret)
return ret; return ret;
...@@ -5368,7 +5368,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv) ...@@ -5368,7 +5368,7 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
mutex_unlock(&dev_priv->drm.struct_mutex); mutex_unlock(&dev_priv->drm.struct_mutex);
intel_uc_fini_wq(dev_priv); intel_uc_fini_misc(dev_priv);
if (ret != -EIO) if (ret != -EIO)
i915_gem_cleanup_userptr(dev_priv); i915_gem_cleanup_userptr(dev_priv);
......
...@@ -64,6 +64,7 @@ void intel_guc_init_early(struct intel_guc *guc) ...@@ -64,6 +64,7 @@ void intel_guc_init_early(struct intel_guc *guc)
{ {
intel_guc_fw_init_early(guc); intel_guc_fw_init_early(guc);
intel_guc_ct_init_early(&guc->ct); intel_guc_ct_init_early(&guc->ct);
intel_guc_log_init_early(guc);
mutex_init(&guc->send_mutex); mutex_init(&guc->send_mutex);
guc->send = intel_guc_send_nop; guc->send = intel_guc_send_nop;
...@@ -87,8 +88,10 @@ int intel_guc_init_wq(struct intel_guc *guc) ...@@ -87,8 +88,10 @@ int intel_guc_init_wq(struct intel_guc *guc)
*/ */
guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log", guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
WQ_HIGHPRI | WQ_FREEZABLE); WQ_HIGHPRI | WQ_FREEZABLE);
if (!guc->log.runtime.flush_wq) if (!guc->log.runtime.flush_wq) {
DRM_ERROR("Couldn't allocate workqueue for GuC log\n");
return -ENOMEM; return -ENOMEM;
}
/* /*
* Even though both sending GuC action, and adding a new workitem to * Even though both sending GuC action, and adding a new workitem to
...@@ -109,6 +112,8 @@ int intel_guc_init_wq(struct intel_guc *guc) ...@@ -109,6 +112,8 @@ int intel_guc_init_wq(struct intel_guc *guc)
WQ_HIGHPRI); WQ_HIGHPRI);
if (!guc->preempt_wq) { if (!guc->preempt_wq) {
destroy_workqueue(guc->log.runtime.flush_wq); destroy_workqueue(guc->log.runtime.flush_wq);
DRM_ERROR("Couldn't allocate workqueue for GuC "
"preemption\n");
return -ENOMEM; return -ENOMEM;
} }
} }
......
This diff is collapsed.
...@@ -32,6 +32,13 @@ ...@@ -32,6 +32,13 @@
struct drm_i915_private; struct drm_i915_private;
struct intel_guc; struct intel_guc;
/*
* The first page is to save log buffer state. Allocate one
* extra page for others in case for overlap
*/
#define GUC_LOG_SIZE ((1 + GUC_LOG_DPC_PAGES + 1 + GUC_LOG_ISR_PAGES + \
1 + GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT)
struct intel_guc_log { struct intel_guc_log {
u32 flags; u32 flags;
struct i915_vma *vma; struct i915_vma *vma;
...@@ -41,6 +48,8 @@ struct intel_guc_log { ...@@ -41,6 +48,8 @@ struct intel_guc_log {
struct workqueue_struct *flush_wq; struct workqueue_struct *flush_wq;
struct work_struct flush_work; struct work_struct flush_work;
struct rchan *relay_chan; struct rchan *relay_chan;
/* To serialize the access to relay_chan */
struct mutex relay_lock;
} runtime; } runtime;
/* logging related stats */ /* logging related stats */
u32 capture_miss_count; u32 capture_miss_count;
...@@ -52,6 +61,9 @@ struct intel_guc_log { ...@@ -52,6 +61,9 @@ struct intel_guc_log {
int intel_guc_log_create(struct intel_guc *guc); int intel_guc_log_create(struct intel_guc *guc);
void intel_guc_log_destroy(struct intel_guc *guc); void intel_guc_log_destroy(struct intel_guc *guc);
void intel_guc_log_init_early(struct intel_guc *guc);
int intel_guc_log_relay_create(struct intel_guc *guc);
void intel_guc_log_relay_destroy(struct intel_guc *guc);
int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);
void i915_guc_log_register(struct drm_i915_private *dev_priv); void i915_guc_log_register(struct drm_i915_private *dev_priv);
void i915_guc_log_unregister(struct drm_i915_private *dev_priv); void i915_guc_log_unregister(struct drm_i915_private *dev_priv);
......
...@@ -236,28 +236,44 @@ static void guc_disable_communication(struct intel_guc *guc) ...@@ -236,28 +236,44 @@ static void guc_disable_communication(struct intel_guc *guc)
guc->send = intel_guc_send_nop; guc->send = intel_guc_send_nop;
} }
int intel_uc_init_wq(struct drm_i915_private *dev_priv) int intel_uc_init_misc(struct drm_i915_private *dev_priv)
{ {
struct intel_guc *guc = &dev_priv->guc;
int ret; int ret;
if (!USES_GUC(dev_priv)) if (!USES_GUC(dev_priv))
return 0; return 0;
ret = intel_guc_init_wq(&dev_priv->guc); ret = intel_guc_init_wq(guc);
if (ret) { if (ret) {
DRM_ERROR("Couldn't allocate workqueues for GuC\n"); DRM_ERROR("Couldn't allocate workqueues for GuC\n");
return ret; goto err;
}
ret = intel_guc_log_relay_create(guc);
if (ret) {
DRM_ERROR("Couldn't allocate relay for GuC log\n");
goto err_relay;
} }
return 0; return 0;
err_relay:
intel_guc_fini_wq(guc);
err:
return ret;
} }
void intel_uc_fini_wq(struct drm_i915_private *dev_priv) void intel_uc_fini_misc(struct drm_i915_private *dev_priv)
{ {
struct intel_guc *guc = &dev_priv->guc;
if (!USES_GUC(dev_priv)) if (!USES_GUC(dev_priv))
return; return;
intel_guc_fini_wq(&dev_priv->guc); intel_guc_fini_wq(guc);
intel_guc_log_relay_destroy(guc);
} }
int intel_uc_init(struct drm_i915_private *dev_priv) int intel_uc_init(struct drm_i915_private *dev_priv)
......
...@@ -33,8 +33,8 @@ void intel_uc_init_early(struct drm_i915_private *dev_priv); ...@@ -33,8 +33,8 @@ void intel_uc_init_early(struct drm_i915_private *dev_priv);
void intel_uc_init_mmio(struct drm_i915_private *dev_priv); void intel_uc_init_mmio(struct drm_i915_private *dev_priv);
void intel_uc_init_fw(struct drm_i915_private *dev_priv); void intel_uc_init_fw(struct drm_i915_private *dev_priv);
void intel_uc_fini_fw(struct drm_i915_private *dev_priv); void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
int intel_uc_init_wq(struct drm_i915_private *dev_priv); int intel_uc_init_misc(struct drm_i915_private *dev_priv);
void intel_uc_fini_wq(struct drm_i915_private *dev_priv); void intel_uc_fini_misc(struct drm_i915_private *dev_priv);
int intel_uc_init_hw(struct drm_i915_private *dev_priv); int intel_uc_init_hw(struct drm_i915_private *dev_priv);
void intel_uc_fini_hw(struct drm_i915_private *dev_priv); void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
int intel_uc_init(struct drm_i915_private *dev_priv); int intel_uc_init(struct drm_i915_private *dev_priv);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment