Commit 303760aa authored by Umesh Nerlige Ramappa's avatar Umesh Nerlige Ramappa Committed by John Harrison

i915/guc/reset: Make __guc_reset_context aware of guilty engines

There are 2 ways an engine can get reset in i915 and the method of reset
affects how KMD labels a context as guilty/innocent.

(1) GuC initiated engine-reset: GuC resets a hung engine and notifies
KMD. The context that hung on the engine is marked guilty and all other
contexts are innocent. The innocent contexts are resubmitted.

(2) GT based reset: When an engine heartbeat fails to tick, KMD
initiates a gt/chip reset. All active contexts are marked as guilty and
discarded.

In order to correctly mark the contexts as guilty/innocent, pass a mask
of engines that were reset to __guc_reset_context.

Fixes: eb5e7da7 ("drm/i915/guc: Reset implementation for new GuC interface")
Signed-off-by: default avatarUmesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Reviewed-by: default avatarAlan Previn <alan.previn.teres.alexis@intel.com>
Signed-off-by: default avatarJohn Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20220426003045.3929439-1-umesh.nerlige.ramappa@intel.com
parent ad5f74f3
...@@ -808,7 +808,7 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) ...@@ -808,7 +808,7 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask)
__intel_engine_reset(engine, stalled_mask & engine->mask); __intel_engine_reset(engine, stalled_mask & engine->mask);
local_bh_enable(); local_bh_enable();
intel_uc_reset(&gt->uc, true); intel_uc_reset(&gt->uc, ALL_ENGINES);
intel_ggtt_restore_fences(gt->ggtt); intel_ggtt_restore_fences(gt->ggtt);
......
...@@ -443,7 +443,7 @@ int intel_guc_global_policies_update(struct intel_guc *guc); ...@@ -443,7 +443,7 @@ int intel_guc_global_policies_update(struct intel_guc *guc);
void intel_guc_context_ban(struct intel_context *ce, struct i915_request *rq); void intel_guc_context_ban(struct intel_context *ce, struct i915_request *rq);
void intel_guc_submission_reset_prepare(struct intel_guc *guc); void intel_guc_submission_reset_prepare(struct intel_guc *guc);
void intel_guc_submission_reset(struct intel_guc *guc, bool stalled); void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled);
void intel_guc_submission_reset_finish(struct intel_guc *guc); void intel_guc_submission_reset_finish(struct intel_guc *guc);
void intel_guc_submission_cancel_requests(struct intel_guc *guc); void intel_guc_submission_cancel_requests(struct intel_guc *guc);
......
...@@ -1654,9 +1654,9 @@ __unwind_incomplete_requests(struct intel_context *ce) ...@@ -1654,9 +1654,9 @@ __unwind_incomplete_requests(struct intel_context *ce)
spin_unlock_irqrestore(&sched_engine->lock, flags); spin_unlock_irqrestore(&sched_engine->lock, flags);
} }
static void __guc_reset_context(struct intel_context *ce, bool stalled) static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t stalled)
{ {
bool local_stalled; bool guilty;
struct i915_request *rq; struct i915_request *rq;
unsigned long flags; unsigned long flags;
u32 head; u32 head;
...@@ -1684,7 +1684,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) ...@@ -1684,7 +1684,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
if (!intel_context_is_pinned(ce)) if (!intel_context_is_pinned(ce))
goto next_context; goto next_context;
local_stalled = false; guilty = false;
rq = intel_context_find_active_request(ce); rq = intel_context_find_active_request(ce);
if (!rq) { if (!rq) {
head = ce->ring->tail; head = ce->ring->tail;
...@@ -1692,14 +1692,14 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) ...@@ -1692,14 +1692,14 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
} }
if (i915_request_started(rq)) if (i915_request_started(rq))
local_stalled = true; guilty = stalled & ce->engine->mask;
GEM_BUG_ON(i915_active_is_idle(&ce->active)); GEM_BUG_ON(i915_active_is_idle(&ce->active));
head = intel_ring_wrap(ce->ring, rq->head); head = intel_ring_wrap(ce->ring, rq->head);
__i915_request_reset(rq, local_stalled && stalled); __i915_request_reset(rq, guilty);
out_replay: out_replay:
guc_reset_state(ce, head, local_stalled && stalled); guc_reset_state(ce, head, guilty);
next_context: next_context:
if (i != number_children) if (i != number_children)
ce = list_next_entry(ce, parallel.child_link); ce = list_next_entry(ce, parallel.child_link);
...@@ -1709,7 +1709,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled) ...@@ -1709,7 +1709,7 @@ static void __guc_reset_context(struct intel_context *ce, bool stalled)
intel_context_put(parent); intel_context_put(parent);
} }
void intel_guc_submission_reset(struct intel_guc *guc, bool stalled) void intel_guc_submission_reset(struct intel_guc *guc, intel_engine_mask_t stalled)
{ {
struct intel_context *ce; struct intel_context *ce;
unsigned long index; unsigned long index;
...@@ -4228,7 +4228,7 @@ static void guc_context_replay(struct intel_context *ce) ...@@ -4228,7 +4228,7 @@ static void guc_context_replay(struct intel_context *ce)
{ {
struct i915_sched_engine *sched_engine = ce->engine->sched_engine; struct i915_sched_engine *sched_engine = ce->engine->sched_engine;
__guc_reset_context(ce, true); __guc_reset_context(ce, ce->engine->mask);
tasklet_hi_schedule(&sched_engine->tasklet); tasklet_hi_schedule(&sched_engine->tasklet);
} }
......
...@@ -597,7 +597,7 @@ void intel_uc_reset_prepare(struct intel_uc *uc) ...@@ -597,7 +597,7 @@ void intel_uc_reset_prepare(struct intel_uc *uc)
__uc_sanitize(uc); __uc_sanitize(uc);
} }
void intel_uc_reset(struct intel_uc *uc, bool stalled) void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled)
{ {
struct intel_guc *guc = &uc->guc; struct intel_guc *guc = &uc->guc;
......
...@@ -42,7 +42,7 @@ void intel_uc_driver_late_release(struct intel_uc *uc); ...@@ -42,7 +42,7 @@ void intel_uc_driver_late_release(struct intel_uc *uc);
void intel_uc_driver_remove(struct intel_uc *uc); void intel_uc_driver_remove(struct intel_uc *uc);
void intel_uc_init_mmio(struct intel_uc *uc); void intel_uc_init_mmio(struct intel_uc *uc);
void intel_uc_reset_prepare(struct intel_uc *uc); void intel_uc_reset_prepare(struct intel_uc *uc);
void intel_uc_reset(struct intel_uc *uc, bool stalled); void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled);
void intel_uc_reset_finish(struct intel_uc *uc); void intel_uc_reset_finish(struct intel_uc *uc);
void intel_uc_cancel_requests(struct intel_uc *uc); void intel_uc_cancel_requests(struct intel_uc *uc);
void intel_uc_suspend(struct intel_uc *uc); void intel_uc_suspend(struct intel_uc *uc);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment