Commit 0417a5f8 authored by Matthew Brost's avatar Matthew Brost

drm/xe: Always capture exec queues on snapshot

Always capture exec queues on snapshot regardless if exec queue has
pending jobs or not. Having jobs or not does indicate whether the exec
queue capture is useful.

Example bugs that would not be easily detected by skipping capture when
pending job list is empty:
- Jobs pending on exec queue have dependencies
- Leaking exec queue refs
- GuC protocol issues (i.e. losing G2H)

In addition to above bugs, in general it just useful to see every exec
queue registered with the GuC and its state.

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarRodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240405211632.223568-2-matthew.brost@intel.com
parent 31ced035
...@@ -188,7 +188,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, ...@@ -188,7 +188,7 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n"); xe_gt_info(ss->gt, "failed to get forcewake for coredump capture\n");
coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true); coredump->snapshot.ct = xe_guc_ct_snapshot_capture(&guc->ct, true);
coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(job); coredump->snapshot.ge = xe_guc_exec_queue_snapshot_capture(q);
coredump->snapshot.job = xe_sched_job_snapshot_capture(job); coredump->snapshot.job = xe_sched_job_snapshot_capture(job);
coredump->snapshot.vm = xe_vm_snapshot_capture(q->vm); coredump->snapshot.vm = xe_vm_snapshot_capture(q->vm);
......
...@@ -1775,7 +1775,7 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps ...@@ -1775,7 +1775,7 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
/** /**
* xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine. * xe_guc_exec_queue_snapshot_capture - Take a quick snapshot of the GuC Engine.
* @job: faulty Xe scheduled job. * @q: faulty exec queue
* *
* This can be printed out in a later stage like during dev_coredump * This can be printed out in a later stage like during dev_coredump
* analysis. * analysis.
...@@ -1784,9 +1784,8 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps ...@@ -1784,9 +1784,8 @@ guc_exec_queue_wq_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
* caller, using `xe_guc_exec_queue_snapshot_free`. * caller, using `xe_guc_exec_queue_snapshot_free`.
*/ */
struct xe_guc_submit_exec_queue_snapshot * struct xe_guc_submit_exec_queue_snapshot *
xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job) xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q)
{ {
struct xe_exec_queue *q = job->q;
struct xe_gpu_scheduler *sched = &q->guc->sched; struct xe_gpu_scheduler *sched = &q->guc->sched;
struct xe_guc_submit_exec_queue_snapshot *snapshot; struct xe_guc_submit_exec_queue_snapshot *snapshot;
int i; int i;
...@@ -1942,28 +1941,10 @@ void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *s ...@@ -1942,28 +1941,10 @@ void xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *s
static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p) static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
{ {
struct xe_guc_submit_exec_queue_snapshot *snapshot; struct xe_guc_submit_exec_queue_snapshot *snapshot;
struct xe_gpu_scheduler *sched = &q->guc->sched;
struct xe_sched_job *job;
bool found = false;
spin_lock(&sched->base.job_list_lock); snapshot = xe_guc_exec_queue_snapshot_capture(q);
list_for_each_entry(job, &sched->base.pending_list, drm.list) {
if (job->q == q) {
xe_sched_job_get(job);
found = true;
break;
}
}
spin_unlock(&sched->base.job_list_lock);
if (!found)
return;
snapshot = xe_guc_exec_queue_snapshot_capture(job);
xe_guc_exec_queue_snapshot_print(snapshot, p); xe_guc_exec_queue_snapshot_print(snapshot, p);
xe_guc_exec_queue_snapshot_free(snapshot); xe_guc_exec_queue_snapshot_free(snapshot);
xe_sched_job_put(job);
} }
/** /**
......
...@@ -9,8 +9,8 @@ ...@@ -9,8 +9,8 @@
#include <linux/types.h> #include <linux/types.h>
struct drm_printer; struct drm_printer;
struct xe_exec_queue;
struct xe_guc; struct xe_guc;
struct xe_sched_job;
int xe_guc_submit_init(struct xe_guc *guc); int xe_guc_submit_init(struct xe_guc *guc);
...@@ -27,7 +27,7 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg, ...@@ -27,7 +27,7 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len); int xe_guc_exec_queue_reset_failure_handler(struct xe_guc *guc, u32 *msg, u32 len);
struct xe_guc_submit_exec_queue_snapshot * struct xe_guc_submit_exec_queue_snapshot *
xe_guc_exec_queue_snapshot_capture(struct xe_sched_job *job); xe_guc_exec_queue_snapshot_capture(struct xe_exec_queue *q);
void void
xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot); xe_guc_exec_queue_snapshot_capture_delayed(struct xe_guc_submit_exec_queue_snapshot *snapshot);
void void
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment