Commit ae8ac10d authored by Matthew Brost's avatar Matthew Brost Committed by John Harrison

drm/i915/guc: Implement banned contexts for GuC submission

When using GuC submission, if a context gets banned disable scheduling
and mark all inflight requests as complete.

Cc: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: default avatarMatthew Brost <matthew.brost@intel.com>
Reviewed-by: default avatarJohn Harrison <John.C.Harrison@Intel.com>
Signed-off-by: default avatarJohn Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-25-matthew.brost@intel.com
parent 481d458c
...@@ -1084,7 +1084,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban) ...@@ -1084,7 +1084,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
for_each_gem_engine(ce, engines, it) { for_each_gem_engine(ce, engines, it) {
struct intel_engine_cs *engine; struct intel_engine_cs *engine;
if (ban && intel_context_set_banned(ce)) if (ban && intel_context_ban(ce, NULL))
continue; continue;
/* /*
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "intel_engine_types.h" #include "intel_engine_types.h"
#include "intel_ring_types.h" #include "intel_ring_types.h"
#include "intel_timeline_types.h" #include "intel_timeline_types.h"
#include "i915_trace.h"
#define CE_TRACE(ce, fmt, ...) do { \ #define CE_TRACE(ce, fmt, ...) do { \
const struct intel_context *ce__ = (ce); \ const struct intel_context *ce__ = (ce); \
...@@ -243,6 +244,18 @@ static inline bool intel_context_set_banned(struct intel_context *ce) ...@@ -243,6 +244,18 @@ static inline bool intel_context_set_banned(struct intel_context *ce)
return test_and_set_bit(CONTEXT_BANNED, &ce->flags); return test_and_set_bit(CONTEXT_BANNED, &ce->flags);
} }
static inline bool intel_context_ban(struct intel_context *ce,
struct i915_request *rq)
{
bool ret = intel_context_set_banned(ce);
trace_intel_context_ban(ce);
if (ce->ops->ban)
ce->ops->ban(ce, rq);
return ret;
}
static inline bool static inline bool
intel_context_force_single_submission(const struct intel_context *ce) intel_context_force_single_submission(const struct intel_context *ce)
{ {
......
...@@ -35,6 +35,8 @@ struct intel_context_ops { ...@@ -35,6 +35,8 @@ struct intel_context_ops {
int (*alloc)(struct intel_context *ce); int (*alloc)(struct intel_context *ce);
void (*ban)(struct intel_context *ce, struct i915_request *rq);
int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr); int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr);
int (*pin)(struct intel_context *ce, void *vaddr); int (*pin)(struct intel_context *ce, void *vaddr);
void (*unpin)(struct intel_context *ce); void (*unpin)(struct intel_context *ce);
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
#include "intel_reset.h" #include "intel_reset.h"
#include "uc/intel_guc.h" #include "uc/intel_guc.h"
#include "uc/intel_guc_submission.h"
#define RESET_MAX_RETRIES 3 #define RESET_MAX_RETRIES 3
...@@ -39,21 +38,6 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) ...@@ -39,21 +38,6 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr)
intel_uncore_rmw_fw(uncore, reg, clr, 0); intel_uncore_rmw_fw(uncore, reg, clr, 0);
} }
static void skip_context(struct i915_request *rq)
{
struct intel_context *hung_ctx = rq->context;
list_for_each_entry_from_rcu(rq, &hung_ctx->timeline->requests, link) {
if (!i915_request_is_active(rq))
return;
if (rq->context == hung_ctx) {
i915_request_set_error_once(rq, -EIO);
__i915_request_skip(rq);
}
}
}
static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
{ {
struct drm_i915_file_private *file_priv = ctx->file_priv; struct drm_i915_file_private *file_priv = ctx->file_priv;
...@@ -88,10 +72,8 @@ static bool mark_guilty(struct i915_request *rq) ...@@ -88,10 +72,8 @@ static bool mark_guilty(struct i915_request *rq)
bool banned; bool banned;
int i; int i;
if (intel_context_is_closed(rq->context)) { if (intel_context_is_closed(rq->context))
intel_context_set_banned(rq->context);
return true; return true;
}
rcu_read_lock(); rcu_read_lock();
ctx = rcu_dereference(rq->context->gem_context); ctx = rcu_dereference(rq->context->gem_context);
...@@ -123,11 +105,9 @@ static bool mark_guilty(struct i915_request *rq) ...@@ -123,11 +105,9 @@ static bool mark_guilty(struct i915_request *rq)
banned = !i915_gem_context_is_recoverable(ctx); banned = !i915_gem_context_is_recoverable(ctx);
if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES)) if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES))
banned = true; banned = true;
if (banned) { if (banned)
drm_dbg(&ctx->i915->drm, "context %s: guilty %d, banned\n", drm_dbg(&ctx->i915->drm, "context %s: guilty %d, banned\n",
ctx->name, atomic_read(&ctx->guilty_count)); ctx->name, atomic_read(&ctx->guilty_count));
intel_context_set_banned(rq->context);
}
client_mark_guilty(ctx, banned); client_mark_guilty(ctx, banned);
...@@ -149,6 +129,8 @@ static void mark_innocent(struct i915_request *rq) ...@@ -149,6 +129,8 @@ static void mark_innocent(struct i915_request *rq)
void __i915_request_reset(struct i915_request *rq, bool guilty) void __i915_request_reset(struct i915_request *rq, bool guilty)
{ {
bool banned = false;
RQ_TRACE(rq, "guilty? %s\n", yesno(guilty)); RQ_TRACE(rq, "guilty? %s\n", yesno(guilty));
GEM_BUG_ON(__i915_request_is_complete(rq)); GEM_BUG_ON(__i915_request_is_complete(rq));
...@@ -156,13 +138,15 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) ...@@ -156,13 +138,15 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
if (guilty) { if (guilty) {
i915_request_set_error_once(rq, -EIO); i915_request_set_error_once(rq, -EIO);
__i915_request_skip(rq); __i915_request_skip(rq);
if (mark_guilty(rq) && !intel_engine_uses_guc(rq->engine)) banned = mark_guilty(rq);
skip_context(rq);
} else { } else {
i915_request_set_error_once(rq, -EAGAIN); i915_request_set_error_once(rq, -EAGAIN);
mark_innocent(rq); mark_innocent(rq);
} }
rcu_read_unlock(); rcu_read_unlock();
if (banned)
intel_context_ban(rq->context, rq);
} }
static bool i915_in_reset(struct pci_dev *pdev) static bool i915_in_reset(struct pci_dev *pdev)
......
...@@ -586,9 +586,29 @@ static void ring_context_reset(struct intel_context *ce) ...@@ -586,9 +586,29 @@ static void ring_context_reset(struct intel_context *ce)
clear_bit(CONTEXT_VALID_BIT, &ce->flags); clear_bit(CONTEXT_VALID_BIT, &ce->flags);
} }
static void ring_context_ban(struct intel_context *ce,
struct i915_request *rq)
{
struct intel_engine_cs *engine;
if (!rq || !i915_request_is_active(rq))
return;
engine = rq->engine;
lockdep_assert_held(&engine->sched_engine->lock);
list_for_each_entry_continue(rq, &engine->sched_engine->requests,
sched.link)
if (rq->context == ce) {
i915_request_set_error_once(rq, -EIO);
__i915_request_skip(rq);
}
}
static const struct intel_context_ops ring_context_ops = { static const struct intel_context_ops ring_context_ops = {
.alloc = ring_context_alloc, .alloc = ring_context_alloc,
.ban = ring_context_ban,
.pre_pin = ring_context_pre_pin, .pre_pin = ring_context_pre_pin,
.pin = ring_context_pin, .pin = ring_context_pin,
.unpin = ring_context_unpin, .unpin = ring_context_unpin,
......
...@@ -281,6 +281,8 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine); ...@@ -281,6 +281,8 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine);
int intel_guc_global_policies_update(struct intel_guc *guc); int intel_guc_global_policies_update(struct intel_guc *guc);
void intel_guc_context_ban(struct intel_context *ce, struct i915_request *rq);
void intel_guc_submission_reset_prepare(struct intel_guc *guc); void intel_guc_submission_reset_prepare(struct intel_guc *guc);
void intel_guc_submission_reset(struct intel_guc *guc, bool stalled); void intel_guc_submission_reset(struct intel_guc *guc, bool stalled);
void intel_guc_submission_reset_finish(struct intel_guc *guc); void intel_guc_submission_reset_finish(struct intel_guc *guc);
......
...@@ -925,6 +925,11 @@ DEFINE_EVENT(intel_context, intel_context_reset, ...@@ -925,6 +925,11 @@ DEFINE_EVENT(intel_context, intel_context_reset,
TP_ARGS(ce) TP_ARGS(ce)
); );
DEFINE_EVENT(intel_context, intel_context_ban,
TP_PROTO(struct intel_context *ce),
TP_ARGS(ce)
);
DEFINE_EVENT(intel_context, intel_context_register, DEFINE_EVENT(intel_context, intel_context_register,
TP_PROTO(struct intel_context *ce), TP_PROTO(struct intel_context *ce),
TP_ARGS(ce) TP_ARGS(ce)
...@@ -1017,6 +1022,11 @@ trace_intel_context_reset(struct intel_context *ce) ...@@ -1017,6 +1022,11 @@ trace_intel_context_reset(struct intel_context *ce)
{ {
} }
static inline void
trace_intel_context_ban(struct intel_context *ce)
{
}
static inline void static inline void
trace_intel_context_register(struct intel_context *ce) trace_intel_context_register(struct intel_context *ce)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment