Commit 15a57448 authored by Dave Airlie's avatar Dave Airlie

Merge tag 'drm-intel-gt-next-2023-02-01' of...

Merge tag 'drm-intel-gt-next-2023-02-01' of git://anongit.freedesktop.org/drm/drm-intel into drm-next

Driver Changes:

Fixes/improvements/new stuff:

- Fix bcs default context on Meteorlake (Lucas De Marchi)
- GAM registers don't need to be re-applied on engine resets (Matt Roper)
- Correct implementation of Wa_18018781329 (Matt Roper)
- Avoid potential vm use-after-free (Rob Clark)
- GuC error capture fixes (John Harrison)
- Fix potential bit_17 double-free (Rob Clark)
- Don't complain about missing regs on MTL (John Harrison)

Future platform enablement:

- Convert PSS_MODE2 to multicast register (Gustavo Sousa)
- Move/adjust register definitions related to Wa_22011450934 (Matt Roper)
- Move LSC_CHICKEN_BIT* workarounds to correct function (Gustavo Sousa)
- Document where to implement register workarounds (Gustavo Sousa)
- Use uabi engines for the default engine map (Tvrtko Ursulin)
- Flush all tiles on test exit (Tvrtko Ursulin)
- Annotate a couple more workaround registers as MCR (Matt Roper)

Driver refactors:

- Add and use GuC oriented print macros (Michal Wajdeczko)

Miscellaneous:

- Fix intel_selftest_modify_policy argument types (Arnd Bergmann)

Backmerges:

Merge drm/drm-next into drm-intel-gt-next (for conflict resolution) (Tvrtko Ursulin)
Signed-off-by: default avatarDave Airlie <airlied@redhat.com>
From: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/Y9pOsq7VKnq7rgnW@tursulin-desk
parents 535cd710 003e11ed
......@@ -1096,16 +1096,15 @@ static struct i915_gem_engines *alloc_engines(unsigned int count)
static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx,
struct intel_sseu rcs_sseu)
{
const struct intel_gt *gt = to_gt(ctx->i915);
const unsigned int max = I915_NUM_ENGINES;
struct intel_engine_cs *engine;
struct i915_gem_engines *e, *err;
enum intel_engine_id id;
e = alloc_engines(I915_NUM_ENGINES);
e = alloc_engines(max);
if (!e)
return ERR_PTR(-ENOMEM);
for_each_engine(engine, gt, id) {
for_each_uabi_engine(engine, ctx->i915) {
struct intel_context *ce;
struct intel_sseu sseu = {};
int ret;
......@@ -1113,7 +1112,7 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx,
if (engine->legacy_idx == INVALID_ENGINE)
continue;
GEM_BUG_ON(engine->legacy_idx >= I915_NUM_ENGINES);
GEM_BUG_ON(engine->legacy_idx >= max);
GEM_BUG_ON(e->engines[engine->legacy_idx]);
ce = intel_context_create(engine);
......@@ -1861,11 +1860,19 @@ static int get_ppgtt(struct drm_i915_file_private *file_priv,
vm = ctx->vm;
GEM_BUG_ON(!vm);
/*
* Get a reference for the allocated handle. Once the handle is
* visible in the vm_xa table, userspace could try to close it
* from under our feet, so we need to hold the extra reference
* first.
*/
i915_vm_get(vm);
err = xa_alloc(&file_priv->vm_xa, &id, vm, xa_limit_32b, GFP_KERNEL);
if (err)
if (err) {
i915_vm_put(vm);
return err;
i915_vm_get(vm);
}
GEM_BUG_ON(id == 0); /* reserved for invalid/unassigned ppgtt */
args->value = id;
......
......@@ -305,10 +305,6 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
spin_unlock(&obj->vma.lock);
obj->tiling_and_stride = tiling | stride;
i915_gem_object_unlock(obj);
/* Force the fence to be reacquired for GTT access */
i915_gem_object_release_mmap_gtt(obj);
/* Try to preallocate memory required to save swizzling on put-pages */
if (i915_gem_object_needs_bit17_swizzle(obj)) {
......@@ -321,6 +317,11 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
obj->bit_17 = NULL;
}
i915_gem_object_unlock(obj);
/* Force the fence to be reacquired for GTT access */
i915_gem_object_release_mmap_gtt(obj);
return 0;
}
......
......@@ -528,7 +528,7 @@ struct i915_request *intel_context_create_request(struct intel_context *ce)
return rq;
}
struct i915_request *intel_context_find_active_request(struct intel_context *ce)
struct i915_request *intel_context_get_active_request(struct intel_context *ce)
{
struct intel_context *parent = intel_context_to_parent(ce);
struct i915_request *rq, *active = NULL;
......@@ -552,6 +552,8 @@ struct i915_request *intel_context_find_active_request(struct intel_context *ce)
active = rq;
}
if (active)
active = i915_request_get_rcu(active);
spin_unlock_irqrestore(&parent->guc_state.lock, flags);
return active;
......
......@@ -268,8 +268,7 @@ int intel_context_prepare_remote_request(struct intel_context *ce,
struct i915_request *intel_context_create_request(struct intel_context *ce);
struct i915_request *
intel_context_find_active_request(struct intel_context *ce);
struct i915_request *intel_context_get_active_request(struct intel_context *ce);
static inline bool intel_context_is_barrier(const struct intel_context *ce)
{
......
......@@ -250,8 +250,8 @@ void intel_engine_dump_active_requests(struct list_head *requests,
ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine,
ktime_t *now);
struct i915_request *
intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine);
void intel_engine_get_hung_entity(struct intel_engine_cs *engine,
struct intel_context **ce, struct i915_request **rq);
u32 intel_engine_context_size(struct intel_gt *gt, u8 class);
struct intel_context *
......
......@@ -1584,11 +1584,8 @@ static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
};
u32 val;
if (!_reg[engine->id].reg) {
drm_err(&engine->i915->drm,
"MSG IDLE undefined for engine id %u\n", engine->id);
if (!_reg[engine->id].reg)
return 0;
}
val = intel_uncore_read(engine->uncore, _reg[engine->id]);
......@@ -2114,17 +2111,6 @@ static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
}
}
static unsigned long list_count(struct list_head *list)
{
struct list_head *pos;
unsigned long count = 0;
list_for_each(pos, list)
count++;
return count;
}
static unsigned long read_ul(void *p, size_t x)
{
return *(unsigned long *)(p + x);
......@@ -2216,11 +2202,11 @@ void intel_engine_dump_active_requests(struct list_head *requests,
}
}
static void engine_dump_active_requests(struct intel_engine_cs *engine, struct drm_printer *m)
static void engine_dump_active_requests(struct intel_engine_cs *engine,
struct drm_printer *m)
{
struct intel_context *hung_ce = NULL;
struct i915_request *hung_rq = NULL;
struct intel_context *ce;
bool guc;
/*
* No need for an engine->irq_seqno_barrier() before the seqno reads.
......@@ -2229,27 +2215,22 @@ static void engine_dump_active_requests(struct intel_engine_cs *engine, struct d
* But the intention here is just to report an instantaneous snapshot
* so that's fine.
*/
lockdep_assert_held(&engine->sched_engine->lock);
intel_engine_get_hung_entity(engine, &hung_ce, &hung_rq);
drm_printf(m, "\tRequests:\n");
guc = intel_uc_uses_guc_submission(&engine->gt->uc);
if (guc) {
ce = intel_engine_get_hung_context(engine);
if (ce)
hung_rq = intel_context_find_active_request(ce);
} else {
hung_rq = intel_engine_execlist_find_hung_request(engine);
}
if (hung_rq)
engine_dump_request(hung_rq, m, "\t\thung");
else if (hung_ce)
drm_printf(m, "\t\tGot hung ce but no hung rq!\n");
if (guc)
if (intel_uc_uses_guc_submission(&engine->gt->uc))
intel_guc_dump_active_requests(engine, hung_rq, m);
else
intel_engine_dump_active_requests(&engine->sched_engine->requests,
hung_rq, m);
intel_execlists_dump_active_requests(engine, hung_rq, m);
if (hung_rq)
i915_request_put(hung_rq);
}
void intel_engine_dump(struct intel_engine_cs *engine,
......@@ -2259,7 +2240,6 @@ void intel_engine_dump(struct intel_engine_cs *engine,
struct i915_gpu_error * const error = &engine->i915->gpu_error;
struct i915_request *rq;
intel_wakeref_t wakeref;
unsigned long flags;
ktime_t dummy;
if (header) {
......@@ -2296,13 +2276,8 @@ void intel_engine_dump(struct intel_engine_cs *engine,
i915_reset_count(error));
print_properties(engine, m);
spin_lock_irqsave(&engine->sched_engine->lock, flags);
engine_dump_active_requests(engine, m);
drm_printf(m, "\tOn hold?: %lu\n",
list_count(&engine->sched_engine->hold));
spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base);
wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm);
if (wakeref) {
......@@ -2348,8 +2323,7 @@ intel_engine_create_virtual(struct intel_engine_cs **siblings,
return siblings[0]->cops->create_virtual(siblings, count, flags);
}
struct i915_request *
intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine)
static struct i915_request *engine_execlist_find_hung_request(struct intel_engine_cs *engine)
{
struct i915_request *request, *active = NULL;
......@@ -2401,6 +2375,33 @@ intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine)
return active;
}
void intel_engine_get_hung_entity(struct intel_engine_cs *engine,
struct intel_context **ce, struct i915_request **rq)
{
unsigned long flags;
*ce = intel_engine_get_hung_context(engine);
if (*ce) {
intel_engine_clear_hung_context(engine);
*rq = intel_context_get_active_request(*ce);
return;
}
/*
* Getting here with GuC enabled means it is a forced error capture
* with no actual hang. So, no need to attempt the execlist search.
*/
if (intel_uc_uses_guc_submission(&engine->gt->uc))
return;
spin_lock_irqsave(&engine->sched_engine->lock, flags);
*rq = engine_execlist_find_hung_request(engine);
if (*rq)
*rq = i915_request_get_rcu(*rq);
spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
}
void xehp_enable_ccs_engines(struct intel_engine_cs *engine)
{
/*
......
......@@ -81,6 +81,7 @@
#define RING_EIR(base) _MMIO((base) + 0xb0)
#define RING_EMR(base) _MMIO((base) + 0xb4)
#define RING_ESR(base) _MMIO((base) + 0xb8)
#define GEN12_STATE_ACK_DEBUG(base) _MMIO((base) + 0xbc)
#define RING_INSTPM(base) _MMIO((base) + 0xc0)
#define RING_CMD_CCTL(base) _MMIO((base) + 0xc4)
#define ACTHD(base) _MMIO((base) + 0xc8)
......
......@@ -4150,6 +4150,33 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
spin_unlock_irqrestore(&sched_engine->lock, flags);
}
static unsigned long list_count(struct list_head *list)
{
struct list_head *pos;
unsigned long count = 0;
list_for_each(pos, list)
count++;
return count;
}
void intel_execlists_dump_active_requests(struct intel_engine_cs *engine,
struct i915_request *hung_rq,
struct drm_printer *m)
{
unsigned long flags;
spin_lock_irqsave(&engine->sched_engine->lock, flags);
intel_engine_dump_active_requests(&engine->sched_engine->requests, hung_rq, m);
drm_printf(m, "\tOn hold?: %lu\n",
list_count(&engine->sched_engine->hold));
spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
}
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_execlists.c"
#endif
......@@ -32,6 +32,10 @@ void intel_execlists_show_requests(struct intel_engine_cs *engine,
int indent),
unsigned int max);
void intel_execlists_dump_active_requests(struct intel_engine_cs *engine,
struct i915_request *hung_rq,
struct drm_printer *m);
bool
intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine);
......
......@@ -407,6 +407,8 @@
#define GEN9_WM_CHICKEN3 _MMIO(0x5588)
#define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9)
#define XEHP_CULLBIT1 MCR_REG(0x6100)
#define CHICKEN_RASTER_1 MCR_REG(0x6204)
#define DIS_SF_ROUND_NEAREST_EVEN REG_BIT(8)
......@@ -457,10 +459,12 @@
#define HZ_DEPTH_TEST_LE_GE_OPT_DISABLE REG_BIT(13)
#define BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE REG_BIT(3)
#define XEHP_CULLBIT2 MCR_REG(0x7030)
#define GEN8_L3CNTLREG _MMIO(0x7034)
#define GEN8_ERRDETBCTRL (1 << 9)
#define PSS_MODE2 _MMIO(0x703c)
#define XEHP_PSS_MODE2 MCR_REG(0x703c)
#define SCOREBOARD_STALL_FLUSH_CONTROL REG_BIT(5)
#define GEN7_SC_INSTDONE _MMIO(0x7100)
......@@ -1096,16 +1100,19 @@
#define XEHP_MERT_MOD_CTRL MCR_REG(0xcf28)
#define RENDER_MOD_CTRL MCR_REG(0xcf2c)
#define COMP_MOD_CTRL MCR_REG(0xcf30)
#define VDBX_MOD_CTRL MCR_REG(0xcf34)
#define VEBX_MOD_CTRL MCR_REG(0xcf38)
#define XELPMP_GSC_MOD_CTRL _MMIO(0xcf30) /* media GT only */
#define XEHP_VDBX_MOD_CTRL MCR_REG(0xcf34)
#define XELPMP_VDBX_MOD_CTRL _MMIO(0xcf34)
#define XEHP_VEBX_MOD_CTRL MCR_REG(0xcf38)
#define XELPMP_VEBX_MOD_CTRL _MMIO(0xcf38)
#define FORCE_MISS_FTLB REG_BIT(3)
#define GEN12_GAMSTLB_CTRL _MMIO(0xcf4c)
#define XEHP_GAMSTLB_CTRL MCR_REG(0xcf4c)
#define CONTROL_BLOCK_CLKGATE_DIS REG_BIT(12)
#define EGRESS_BLOCK_CLKGATE_DIS REG_BIT(11)
#define TAG_BLOCK_CLKGATE_DIS REG_BIT(7)
#define GEN12_GAMCNTRL_CTRL _MMIO(0xcf54)
#define XEHP_GAMCNTRL_CTRL MCR_REG(0xcf54)
#define INVALIDATION_BROADCAST_MODE_DIS REG_BIT(12)
#define GLOBAL_INVALIDATION_MODE REG_BIT(2)
......
......@@ -1316,16 +1316,16 @@ static u32 *
dg2_emit_rcs_hang_wabb(const struct intel_context *ce, u32 *cs)
{
*cs++ = MI_LOAD_REGISTER_IMM(1);
*cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG);
*cs++ = i915_mmio_reg_offset(GEN12_STATE_ACK_DEBUG(ce->engine->mmio_base));
*cs++ = 0x21;
*cs++ = MI_LOAD_REGISTER_REG;
*cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
*cs++ = i915_mmio_reg_offset(GEN12_CULLBIT1);
*cs++ = i915_mmio_reg_offset(XEHP_CULLBIT1);
*cs++ = MI_LOAD_REGISTER_REG;
*cs++ = i915_mmio_reg_offset(RING_NOPID(ce->engine->mmio_base));
*cs++ = i915_mmio_reg_offset(GEN12_CULLBIT2);
*cs++ = i915_mmio_reg_offset(XEHP_CULLBIT2);
return cs;
}
......
This diff is collapsed.
......@@ -11,6 +11,7 @@
#include "intel_guc.h"
#include "intel_guc_ads.h"
#include "intel_guc_capture.h"
#include "intel_guc_print.h"
#include "intel_guc_slpc.h"
#include "intel_guc_submission.h"
#include "i915_drv.h"
......@@ -94,7 +95,7 @@ static void gen9_enable_guc_interrupts(struct intel_guc *guc)
assert_rpm_wakelock_held(&gt->i915->runtime_pm);
spin_lock_irq(gt->irq_lock);
WARN_ON_ONCE(intel_uncore_read(gt->uncore, GEN8_GT_IIR(2)) &
guc_WARN_ON_ONCE(guc, intel_uncore_read(gt->uncore, GEN8_GT_IIR(2)) &
gt->pm_guc_events);
gen6_gt_pm_enable_irq(gt, gt->pm_guc_events);
spin_unlock_irq(gt->irq_lock);
......@@ -342,7 +343,7 @@ static void guc_init_params(struct intel_guc *guc)
params[GUC_CTL_DEVID] = guc_ctl_devid(guc);
for (i = 0; i < GUC_CTL_MAX_DWORDS; i++)
DRM_DEBUG_DRIVER("param[%2d] = %#x\n", i, params[i]);
guc_dbg(guc, "param[%2d] = %#x\n", i, params[i]);
}
/*
......@@ -389,7 +390,6 @@ void intel_guc_dump_time_info(struct intel_guc *guc, struct drm_printer *p)
int intel_guc_init(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
int ret;
ret = intel_uc_fw_init(&guc->fw);
......@@ -451,7 +451,7 @@ int intel_guc_init(struct intel_guc *guc)
intel_uc_fw_fini(&guc->fw);
out:
intel_uc_fw_change_status(&guc->fw, INTEL_UC_FIRMWARE_INIT_FAIL);
i915_probe_error(gt->i915, "failed with %d\n", ret);
guc_probe_error(guc, "failed with %pe\n", ERR_PTR(ret));
return ret;
}
......@@ -480,7 +480,6 @@ void intel_guc_fini(struct intel_guc *guc)
int intel_guc_send_mmio(struct intel_guc *guc, const u32 *request, u32 len,
u32 *response_buf, u32 response_buf_size)
{
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
struct intel_uncore *uncore = guc_to_gt(guc)->uncore;
u32 header;
int i;
......@@ -515,7 +514,7 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *request, u32 len,
10, 10, &header);
if (unlikely(ret)) {
timeout:
drm_err(&i915->drm, "mmio request %#x: no reply %x\n",
guc_err(guc, "mmio request %#x: no reply %x\n",
request[0], header);
goto out;
}
......@@ -537,7 +536,7 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *request, u32 len,
if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) == GUC_HXG_TYPE_NO_RESPONSE_RETRY) {
u32 reason = FIELD_GET(GUC_HXG_RETRY_MSG_0_REASON, header);
drm_dbg(&i915->drm, "mmio request %#x: retrying, reason %u\n",
guc_dbg(guc, "mmio request %#x: retrying, reason %u\n",
request[0], reason);
goto retry;
}
......@@ -546,7 +545,7 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *request, u32 len,
u32 hint = FIELD_GET(GUC_HXG_FAILURE_MSG_0_HINT, header);
u32 error = FIELD_GET(GUC_HXG_FAILURE_MSG_0_ERROR, header);
drm_err(&i915->drm, "mmio request %#x: failure %x/%u\n",
guc_err(guc, "mmio request %#x: failure %x/%u\n",
request[0], error, hint);
ret = -ENXIO;
goto out;
......@@ -554,7 +553,7 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *request, u32 len,
if (FIELD_GET(GUC_HXG_MSG_0_TYPE, header) != GUC_HXG_TYPE_RESPONSE_SUCCESS) {
proto:
drm_err(&i915->drm, "mmio request %#x: unexpected reply %#x\n",
guc_err(guc, "mmio request %#x: unexpected reply %#x\n",
request[0], header);
ret = -EPROTO;
goto out;
......@@ -597,9 +596,9 @@ int intel_guc_to_host_process_recv_msg(struct intel_guc *guc,
msg = payload[0] & guc->msg_enabled_mask;
if (msg & INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED)
drm_err(&guc_to_gt(guc)->i915->drm, "Received early GuC crash dump notification!\n");
guc_err(guc, "Received early crash dump notification!\n");
if (msg & INTEL_GUC_RECV_MSG_EXCEPTION)
drm_err(&guc_to_gt(guc)->i915->drm, "Received early GuC exception notification!\n");
guc_err(guc, "Received early exception notification!\n");
return 0;
}
......@@ -653,7 +652,8 @@ int intel_guc_suspend(struct intel_guc *guc)
*/
ret = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0);
if (ret)
DRM_ERROR("GuC suspend: RESET_CLIENT action failed with error %d!\n", ret);
guc_err(guc, "suspend: RESET_CLIENT action failed with %pe\n",
ERR_PTR(ret));
}
/* Signal that the GuC isn't running. */
......@@ -828,11 +828,10 @@ static int __guc_action_self_cfg(struct intel_guc *guc, u16 key, u16 len, u64 va
static int __guc_self_cfg(struct intel_guc *guc, u16 key, u16 len, u64 value)
{
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
int err = __guc_action_self_cfg(guc, key, len, value);
if (unlikely(err))
i915_probe_error(i915, "Unsuccessful self-config (%pe) key %#hx value %#llx\n",
guc_probe_error(guc, "Unsuccessful self-config (%pe) key %#hx value %#llx\n",
ERR_PTR(err), key, value);
return err;
}
......
......@@ -15,6 +15,7 @@
#include "intel_guc_ads.h"
#include "intel_guc_capture.h"
#include "intel_guc_fwif.h"
#include "intel_guc_print.h"
#include "intel_uc.h"
#include "i915_drv.h"
......@@ -427,7 +428,7 @@ static long guc_mmio_reg_state_create(struct intel_guc *guc)
guc->ads_regset = temp_set.storage;
drm_dbg(&guc_to_gt(guc)->i915->drm, "Used %zu KB for temporary ADS regset\n",
guc_dbg(guc, "Used %zu KB for temporary ADS regset\n",
(temp_set.storage_max * sizeof(struct guc_mmio_reg)) >> 10);
return total * sizeof(struct guc_mmio_reg);
......@@ -621,7 +622,7 @@ static void guc_init_golden_context(struct intel_guc *guc)
engine = find_engine_state(gt, engine_class);
if (!engine) {
drm_err(&gt->i915->drm, "No engine state recorded for class %d!\n",
guc_err(guc, "No engine state recorded for class %d!\n",
engine_class);
ads_blob_write(guc, ads.eng_state_size[guc_class], 0);
ads_blob_write(guc, ads.golden_context_lrca[guc_class], 0);
......@@ -646,7 +647,6 @@ static int
guc_capture_prep_lists(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
u32 ads_ggtt, capture_offset, null_ggtt, total_size = 0;
struct guc_gt_system_info local_info;
struct iosys_map info_map;
......@@ -751,7 +751,7 @@ guc_capture_prep_lists(struct intel_guc *guc)
}
if (guc->ads_capture_size && guc->ads_capture_size != PAGE_ALIGN(total_size))
drm_warn(&i915->drm, "GuC->ADS->Capture alloc size changed from %d to %d\n",
guc_warn(guc, "ADS capture alloc size changed from %d to %d\n",
guc->ads_capture_size, PAGE_ALIGN(total_size));
return PAGE_ALIGN(total_size);
......
......@@ -1506,7 +1506,7 @@ int intel_guc_capture_print_engine_node(struct drm_i915_error_state_buf *ebuf,
if (!ebuf || !ee)
return -EINVAL;
cap = ee->capture;
cap = ee->guc_capture;
if (!cap || !ee->engine)
return -ENODEV;
......@@ -1576,8 +1576,8 @@ void intel_guc_capture_free_node(struct intel_engine_coredump *ee)
if (!ee || !ee->guc_capture_node)
return;
guc_capture_add_node_to_cachelist(ee->capture, ee->guc_capture_node);
ee->capture = NULL;
guc_capture_add_node_to_cachelist(ee->guc_capture, ee->guc_capture_node);
ee->guc_capture = NULL;
ee->guc_capture_node = NULL;
}
......@@ -1611,7 +1611,7 @@ void intel_guc_capture_get_matching_node(struct intel_gt *gt,
(ce->lrc.lrca & CTX_GTT_ADDRESS_MASK)) {
list_del(&n->link);
ee->guc_capture_node = n;
ee->capture = guc->capture;
ee->guc_capture = guc->capture;
return;
}
}
......
......@@ -11,38 +11,23 @@
#include "i915_drv.h"
#include "intel_guc_ct.h"
#include "gt/intel_gt.h"
#include "intel_guc_print.h"
static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct)
{
return container_of(ct, struct intel_guc, ct);
}
static inline struct intel_gt *ct_to_gt(struct intel_guc_ct *ct)
{
return guc_to_gt(ct_to_guc(ct));
}
static inline struct drm_i915_private *ct_to_i915(struct intel_guc_ct *ct)
{
return ct_to_gt(ct)->i915;
}
static inline struct drm_device *ct_to_drm(struct intel_guc_ct *ct)
{
return &ct_to_i915(ct)->drm;
}
#define CT_ERROR(_ct, _fmt, ...) \
drm_err(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__)
guc_err(ct_to_guc(_ct), "CT: " _fmt, ##__VA_ARGS__)
#ifdef CONFIG_DRM_I915_DEBUG_GUC
#define CT_DEBUG(_ct, _fmt, ...) \
drm_dbg(ct_to_drm(_ct), "CT: " _fmt, ##__VA_ARGS__)
guc_dbg(ct_to_guc(_ct), "CT: " _fmt, ##__VA_ARGS__)
#else
#define CT_DEBUG(...) do { } while (0)
#endif
#define CT_PROBE_ERROR(_ct, _fmt, ...) \
i915_probe_error(ct_to_i915(ct), "CT: " _fmt, ##__VA_ARGS__)
guc_probe_error(ct_to_guc(ct), "CT: " _fmt, ##__VA_ARGS__)
/**
* DOC: CTB Blob
......
......@@ -13,6 +13,7 @@
#include "gt/intel_gt_mcr.h"
#include "gt/intel_gt_regs.h"
#include "intel_guc_fw.h"
#include "intel_guc_print.h"
#include "i915_drv.h"
static void guc_prepare_xfer(struct intel_gt *gt)
......@@ -103,8 +104,10 @@ static inline bool guc_ready(struct intel_uncore *uncore, u32 *status)
return uk_val == INTEL_GUC_LOAD_STATUS_READY;
}
static int guc_wait_ucode(struct intel_uncore *uncore)
static int guc_wait_ucode(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
struct intel_uncore *uncore = gt->uncore;
u32 status;
int ret;
......@@ -127,10 +130,8 @@ static int guc_wait_ucode(struct intel_uncore *uncore)
*/
ret = wait_for(guc_ready(uncore, &status), 200);
if (ret) {
struct drm_device *drm = &uncore->i915->drm;
drm_info(drm, "GuC load failed: status = 0x%08X\n", status);
drm_info(drm, "GuC load failed: status: Reset = %d, "
guc_info(guc, "load failed: status = 0x%08X\n", status);
guc_info(guc, "load failed: status: Reset = %d, "
"BootROM = 0x%02X, UKernel = 0x%02X, "
"MIA = 0x%02X, Auth = 0x%02X\n",
REG_FIELD_GET(GS_MIA_IN_RESET, status),
......@@ -140,12 +141,12 @@ static int guc_wait_ucode(struct intel_uncore *uncore)
REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) {
drm_info(drm, "GuC firmware signature verification failed\n");
guc_info(guc, "firmware signature verification failed\n");
ret = -ENOEXEC;
}
if (REG_FIELD_GET(GS_UKERNEL_MASK, status) == INTEL_GUC_LOAD_STATUS_EXCEPTION) {
drm_info(drm, "GuC firmware exception. EIP: %#x\n",
guc_info(guc, "firmware exception. EIP: %#x\n",
intel_uncore_read(uncore, SOFT_SCRATCH(13)));
ret = -ENXIO;
}
......@@ -194,7 +195,7 @@ int intel_guc_fw_upload(struct intel_guc *guc)
if (ret)
goto out;
ret = guc_wait_ucode(uncore);
ret = guc_wait_ucode(guc);
if (ret)
goto out;
......
......@@ -12,6 +12,7 @@
#include "i915_memcpy.h"
#include "intel_guc_capture.h"
#include "intel_guc_log.h"
#include "intel_guc_print.h"
#if defined(CONFIG_DRM_I915_DEBUG_GUC)
#define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZE SZ_2M
......@@ -39,7 +40,6 @@ struct guc_log_section {
static void _guc_log_init_sizes(struct intel_guc_log *log)
{
struct intel_guc *guc = log_to_guc(log);
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
static const struct guc_log_section sections[GUC_LOG_SECTIONS_LIMIT] = {
{
GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT,
......@@ -82,12 +82,12 @@ static void _guc_log_init_sizes(struct intel_guc_log *log)
}
if (!IS_ALIGNED(log->sizes[i].bytes, log->sizes[i].units))
drm_err(&i915->drm, "Mis-aligned GuC log %s size: 0x%X vs 0x%X!",
guc_err(guc, "Mis-aligned log %s size: 0x%X vs 0x%X!\n",
sections[i].name, log->sizes[i].bytes, log->sizes[i].units);
log->sizes[i].count = log->sizes[i].bytes / log->sizes[i].units;
if (!log->sizes[i].count) {
drm_err(&i915->drm, "Zero GuC log %s size!", sections[i].name);
guc_err(guc, "Zero log %s size!\n", sections[i].name);
} else {
/* Size is +1 unit */
log->sizes[i].count--;
......@@ -95,14 +95,14 @@ static void _guc_log_init_sizes(struct intel_guc_log *log)
/* Clip to field size */
if (log->sizes[i].count > sections[i].max) {
drm_err(&i915->drm, "GuC log %s size too large: %d vs %d!",
guc_err(guc, "log %s size too large: %d vs %d!\n",
sections[i].name, log->sizes[i].count + 1, sections[i].max + 1);
log->sizes[i].count = sections[i].max;
}
}
if (log->sizes[GUC_LOG_SECTIONS_CRASH].units != log->sizes[GUC_LOG_SECTIONS_DEBUG].units) {
drm_err(&i915->drm, "Unit mis-match for GuC log crash and debug sections: %d vs %d!",
guc_err(guc, "Unit mismatch for crash and debug sections: %d vs %d!\n",
log->sizes[GUC_LOG_SECTIONS_CRASH].units,
log->sizes[GUC_LOG_SECTIONS_DEBUG].units);
log->sizes[GUC_LOG_SECTIONS_CRASH].units = log->sizes[GUC_LOG_SECTIONS_DEBUG].units;
......@@ -374,6 +374,7 @@ size_t intel_guc_get_log_buffer_offset(struct intel_guc_log *log,
static void _guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
{
struct intel_guc *guc = log_to_guc(log);
unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_cnt;
struct guc_log_buffer_state *log_buf_state, *log_buf_snapshot_state;
struct guc_log_buffer_state log_buf_state_local;
......@@ -383,7 +384,7 @@ static void _guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
mutex_lock(&log->relay.lock);
if (WARN_ON(!intel_guc_log_relay_created(log)))
if (guc_WARN_ON(guc, !intel_guc_log_relay_created(log)))
goto out_unlock;
/* Get the pointer to shared GuC log buffer */
......@@ -398,7 +399,7 @@ static void _guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
* Used rate limited to avoid deluge of messages, logs might be
* getting consumed by User at a slow rate.
*/
DRM_ERROR_RATELIMITED("no sub-buffer to copy general logs\n");
guc_err_ratelimited(guc, "no sub-buffer to copy general logs\n");
log->relay.full_count++;
goto out_unlock;
......@@ -451,7 +452,7 @@ static void _guc_log_copy_debuglogs_for_relay(struct intel_guc_log *log)
write_offset = buffer_size;
} else if (unlikely((read_offset > buffer_size) ||
(write_offset > buffer_size))) {
DRM_ERROR("invalid log buffer state\n");
guc_err(guc, "invalid log buffer state\n");
/* copy whole buffer as offsets are unreliable */
read_offset = 0;
write_offset = buffer_size;
......@@ -547,7 +548,7 @@ static int guc_log_relay_create(struct intel_guc_log *log)
subbuf_size, n_subbufs,
&relay_callbacks, dev_priv);
if (!guc_log_relay_chan) {
DRM_ERROR("Couldn't create relay chan for GuC logging\n");
guc_err(guc, "Couldn't create relay channel for logging\n");
ret = -ENOMEM;
return ret;
......@@ -596,9 +597,8 @@ static u32 __get_default_log_level(struct intel_guc_log *log)
}
if (i915->params.guc_log_level > GUC_LOG_LEVEL_MAX) {
DRM_WARN("Incompatible option detected: %s=%d, %s!\n",
"guc_log_level", i915->params.guc_log_level,
"verbosity too high");
guc_warn(guc, "Log verbosity param out of range: %d > %d!\n",
i915->params.guc_log_level, GUC_LOG_LEVEL_MAX);
return (IS_ENABLED(CONFIG_DRM_I915_DEBUG) ||
IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) ?
GUC_LOG_LEVEL_MAX : GUC_LOG_LEVEL_DISABLED;
......@@ -641,7 +641,7 @@ int intel_guc_log_create(struct intel_guc_log *log)
log->buf_addr = vaddr;
log->level = __get_default_log_level(log);
DRM_DEBUG_DRIVER("guc_log_level=%d (%s, verbose:%s, verbosity:%d)\n",
guc_dbg(guc, "guc_log_level=%d (%s, verbose:%s, verbosity:%d)\n",
log->level, str_enabled_disabled(log->level),
str_yes_no(GUC_LOG_LEVEL_IS_VERBOSE(log->level)),
GUC_LOG_LEVEL_TO_VERBOSITY(log->level));
......@@ -649,7 +649,7 @@ int intel_guc_log_create(struct intel_guc_log *log)
return 0;
err:
DRM_ERROR("Failed to allocate or map GuC log buffer. %d\n", ret);
guc_err(guc, "Failed to allocate or map log buffer %pe\n", ERR_PTR(ret));
return ret;
}
......@@ -687,7 +687,7 @@ int intel_guc_log_set_level(struct intel_guc_log *log, u32 level)
GUC_LOG_LEVEL_IS_ENABLED(level),
GUC_LOG_LEVEL_TO_VERBOSITY(level));
if (ret) {
DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret);
guc_dbg(guc, "guc_log_control action failed %pe\n", ERR_PTR(ret));
goto out_unlock;
}
......@@ -905,7 +905,7 @@ int intel_guc_log_dump(struct intel_guc_log *log, struct drm_printer *p,
map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
if (IS_ERR(map)) {
DRM_DEBUG("Failed to pin object\n");
guc_dbg(guc, "Failed to pin log object: %pe\n", map);
drm_puts(p, "(log data unaccessible)\n");
free_page((unsigned long)page);
return PTR_ERR(map);
......
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2023 Intel Corporation
*/
#ifndef __INTEL_GUC_PRINT__
#define __INTEL_GUC_PRINT__
#include "gt/intel_gt.h"
#include "gt/intel_gt_print.h"
#define guc_printk(_guc, _level, _fmt, ...) \
gt_##_level(guc_to_gt(_guc), "GUC: " _fmt, ##__VA_ARGS__)
#define guc_err(_guc, _fmt, ...) \
guc_printk((_guc), err, _fmt, ##__VA_ARGS__)
#define guc_warn(_guc, _fmt, ...) \
guc_printk((_guc), warn, _fmt, ##__VA_ARGS__)
#define guc_notice(_guc, _fmt, ...) \
guc_printk((_guc), notice, _fmt, ##__VA_ARGS__)
#define guc_info(_guc, _fmt, ...) \
guc_printk((_guc), info, _fmt, ##__VA_ARGS__)
#define guc_dbg(_guc, _fmt, ...) \
guc_printk((_guc), dbg, _fmt, ##__VA_ARGS__)
#define guc_err_ratelimited(_guc, _fmt, ...) \
guc_printk((_guc), err_ratelimited, _fmt, ##__VA_ARGS__)
#define guc_probe_error(_guc, _fmt, ...) \
guc_printk((_guc), probe_error, _fmt, ##__VA_ARGS__)
#define guc_WARN(_guc, _cond, _fmt, ...) \
gt_WARN(guc_to_gt(_guc), _cond, "GUC: " _fmt, ##__VA_ARGS__)
#define guc_WARN_ONCE(_guc, _cond, _fmt, ...) \
gt_WARN_ONCE(guc_to_gt(_guc), _cond, "GUC: " _fmt, ##__VA_ARGS__)
#define guc_WARN_ON(_guc, _cond) \
gt_WARN(guc_to_gt(_guc), _cond, "%s(%s)", "guc_WARN_ON", __stringify(_cond))
#define guc_WARN_ON_ONCE(_guc, _cond) \
gt_WARN_ONCE(guc_to_gt(_guc), _cond, "%s(%s)", "guc_WARN_ON_ONCE", __stringify(_cond))
#endif /* __INTEL_GUC_PRINT__ */
......@@ -27,6 +27,7 @@
#include "intel_guc_ads.h"
#include "intel_guc_capture.h"
#include "intel_guc_print.h"
#include "intel_guc_submission.h"
#include "i915_drv.h"
......@@ -1443,8 +1444,7 @@ static void guc_init_engine_stats(struct intel_guc *guc)
int ret = guc_action_enable_usage_stats(guc);
if (ret)
drm_err(&gt->i915->drm,
"Failed to enable usage stats: %d!\n", ret);
guc_err(guc, "Failed to enable usage stats: %pe\n", ERR_PTR(ret));
}
}
......@@ -1702,7 +1702,7 @@ static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t st
goto next_context;
guilty = false;
rq = intel_context_find_active_request(ce);
rq = intel_context_get_active_request(ce);
if (!rq) {
head = ce->ring->tail;
goto out_replay;
......@@ -1715,6 +1715,7 @@ static void __guc_reset_context(struct intel_context *ce, intel_engine_mask_t st
head = intel_ring_wrap(ce->ring, rq->head);
__i915_request_reset(rq, guilty);
i915_request_put(rq);
out_replay:
guc_reset_state(ce, head, guilty);
next_context:
......@@ -3585,8 +3586,7 @@ static int guc_request_alloc(struct i915_request *rq)
intel_context_sched_disable_unpin(ce);
else if (intel_context_is_closed(ce))
if (wait_for(context_close_done(ce), 1500))
drm_warn(&guc_to_gt(guc)->i915->drm,
"timed out waiting on context sched close before realloc\n");
guc_warn(guc, "timed out waiting on context sched close before realloc\n");
/*
* Call pin_guc_id here rather than in the pinning step as with
* dma_resv, contexts can be repeatedly pinned / unpinned trashing the
......@@ -4349,11 +4349,14 @@ static int __guc_action_set_scheduling_policies(struct intel_guc *guc,
ret = intel_guc_send(guc, (u32 *)&policy->h2g,
__guc_scheduling_policy_action_size(policy));
if (ret < 0)
if (ret < 0) {
guc_probe_error(guc, "Failed to configure global scheduling policies: %pe!\n",
ERR_PTR(ret));
return ret;
}
if (ret != policy->count) {
drm_warn(&guc_to_gt(guc)->i915->drm, "GuC global scheduler policy processed %d of %d KLVs!",
guc_warn(guc, "global scheduler policy processed %d of %d KLVs!",
ret, policy->count);
if (ret > policy->count)
return -EPROTO;
......@@ -4367,7 +4370,7 @@ static int guc_init_global_schedule_policy(struct intel_guc *guc)
struct scheduling_policy policy;
struct intel_gt *gt = guc_to_gt(guc);
intel_wakeref_t wakeref;
int ret = 0;
int ret;
if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0))
return 0;
......@@ -4385,10 +4388,6 @@ static int guc_init_global_schedule_policy(struct intel_guc *guc)
yield, ARRAY_SIZE(yield));
ret = __guc_action_set_scheduling_policies(guc, &policy);
if (ret)
i915_probe_error(gt->i915,
"Failed to configure global scheduling policies: %pe!\n",
ERR_PTR(ret));
}
return ret;
......@@ -4487,21 +4486,18 @@ g2h_context_lookup(struct intel_guc *guc, u32 ctx_id)
struct intel_context *ce;
if (unlikely(ctx_id >= GUC_MAX_CONTEXT_ID)) {
drm_err(&guc_to_gt(guc)->i915->drm,
"Invalid ctx_id %u\n", ctx_id);
guc_err(guc, "Invalid ctx_id %u\n", ctx_id);
return NULL;
}
ce = __get_context(guc, ctx_id);
if (unlikely(!ce)) {
drm_err(&guc_to_gt(guc)->i915->drm,
"Context is NULL, ctx_id %u\n", ctx_id);
guc_err(guc, "Context is NULL, ctx_id %u\n", ctx_id);
return NULL;
}
if (unlikely(intel_context_is_child(ce))) {
drm_err(&guc_to_gt(guc)->i915->drm,
"Context is child, ctx_id %u\n", ctx_id);
guc_err(guc, "Context is child, ctx_id %u\n", ctx_id);
return NULL;
}
......@@ -4516,7 +4512,7 @@ int intel_guc_deregister_done_process_msg(struct intel_guc *guc,
u32 ctx_id;
if (unlikely(len < 1)) {
drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
guc_err(guc, "Invalid length %u\n", len);
return -EPROTO;
}
ctx_id = msg[0];
......@@ -4568,7 +4564,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
u32 ctx_id;
if (unlikely(len < 2)) {
drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u\n", len);
guc_err(guc, "Invalid length %u\n", len);
return -EPROTO;
}
ctx_id = msg[0];
......@@ -4580,8 +4576,7 @@ int intel_guc_sched_done_process_msg(struct intel_guc *guc,
if (unlikely(context_destroyed(ce) ||
(!context_pending_enable(ce) &&
!context_pending_disable(ce)))) {
drm_err(&guc_to_gt(guc)->i915->drm,
"Bad context sched_state 0x%x, ctx_id %u\n",
guc_err(guc, "Bad context sched_state 0x%x, ctx_id %u\n",
ce->guc_state.sched_state, ctx_id);
return -EPROTO;
}
......@@ -4665,12 +4660,15 @@ static void guc_handle_context_reset(struct intel_guc *guc,
{
trace_intel_context_reset(ce);
drm_dbg(&guc_to_gt(guc)->i915->drm, "Got GuC reset of 0x%04X, exiting = %d, banned = %d\n",
ce->guc_id.id, test_bit(CONTEXT_EXITING, &ce->flags),
test_bit(CONTEXT_BANNED, &ce->flags));
if (likely(intel_context_is_schedulable(ce))) {
capture_error_state(guc, ce);
guc_context_replay(ce);
} else {
drm_info(&guc_to_gt(guc)->i915->drm,
"Ignoring context reset notification of exiting context 0x%04X on %s",
guc_info(guc, "Ignoring context reset notification of exiting context 0x%04X on %s",
ce->guc_id.id, ce->engine->name);
}
}
......@@ -4683,7 +4681,7 @@ int intel_guc_context_reset_process_msg(struct intel_guc *guc,
int ctx_id;
if (unlikely(len != 1)) {
drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
guc_err(guc, "Invalid length %u", len);
return -EPROTO;
}
......@@ -4716,13 +4714,13 @@ int intel_guc_error_capture_process_msg(struct intel_guc *guc,
u32 status;
if (unlikely(len != 1)) {
drm_dbg(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
guc_dbg(guc, "Invalid length %u", len);
return -EPROTO;
}
status = msg[0] & INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK;
if (status == INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE)
drm_warn(&guc_to_gt(guc)->i915->drm, "G2H-Error capture no space");
guc_warn(guc, "No space for error capture");
intel_guc_capture_process(guc);
......@@ -4754,24 +4752,36 @@ static void reset_fail_worker_func(struct work_struct *w)
guc->submission_state.reset_fail_mask = 0;
spin_unlock_irqrestore(&guc->submission_state.lock, flags);
if (likely(reset_fail_mask))
if (likely(reset_fail_mask)) {
struct intel_engine_cs *engine;
enum intel_engine_id id;
/*
* GuC is toast at this point - it dead loops after sending the failed
* reset notification. So need to manually determine the guilty context.
* Note that it should be reliable to do this here because the GuC is
* toast and will not be scheduling behind the KMD's back.
*/
for_each_engine_masked(engine, gt, reset_fail_mask, id)
intel_guc_find_hung_context(engine);
intel_gt_handle_error(gt, reset_fail_mask,
I915_ERROR_CAPTURE,
"GuC failed to reset engine mask=0x%x\n",
"GuC failed to reset engine mask=0x%x",
reset_fail_mask);
}
}
int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
const u32 *msg, u32 len)
{
struct intel_engine_cs *engine;
struct intel_gt *gt = guc_to_gt(guc);
u8 guc_class, instance;
u32 reason;
unsigned long flags;
if (unlikely(len != 3)) {
drm_err(&gt->i915->drm, "Invalid length %u", len);
guc_err(guc, "Invalid length %u", len);
return -EPROTO;
}
......@@ -4781,8 +4791,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
engine = intel_guc_lookup_engine(guc, guc_class, instance);
if (unlikely(!engine)) {
drm_err(&gt->i915->drm,
"Invalid engine %d:%d", guc_class, instance);
guc_err(guc, "Invalid engine %d:%d", guc_class, instance);
return -EPROTO;
}
......@@ -4790,7 +4799,7 @@ int intel_guc_engine_failure_process_msg(struct intel_guc *guc,
* This is an unexpected failure of a hardware feature. So, log a real
* error message not just the informational that comes with the reset.
*/
drm_err(&gt->i915->drm, "GuC engine reset request failed on %d:%d (%s) because 0x%08X",
guc_err(guc, "Engine reset failed on %d:%d (%s) because 0x%08X",
guc_class, instance, engine->name, reason);
spin_lock_irqsave(&guc->submission_state.lock, flags);
......@@ -4820,6 +4829,8 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine)
xa_lock_irqsave(&guc->context_lookup, flags);
xa_for_each(&guc->context_lookup, index, ce) {
bool found;
if (!kref_get_unless_zero(&ce->ref))
continue;
......@@ -4836,10 +4847,18 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine)
goto next;
}
found = false;
spin_lock(&ce->guc_state.lock);
list_for_each_entry(rq, &ce->guc_state.requests, sched.link) {
if (i915_test_request_state(rq) != I915_REQUEST_ACTIVE)
continue;
found = true;
break;
}
spin_unlock(&ce->guc_state.lock);
if (found) {
intel_engine_set_hung_context(engine, ce);
/* Can only cope with one hang at a time... */
......@@ -4847,6 +4866,7 @@ void intel_guc_find_hung_context(struct intel_engine_cs *engine)
xa_lock(&guc->context_lookup);
goto done;
}
next:
intel_context_put(ce);
xa_lock(&guc->context_lookup);
......@@ -5342,7 +5362,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
GEM_BUG_ON(!is_power_of_2(sibling->mask));
if (sibling->mask & ve->base.mask) {
DRM_DEBUG("duplicate %s entry in load balancer\n",
guc_dbg(guc, "duplicate %s entry in load balancer\n",
sibling->name);
err = -EINVAL;
goto err_put;
......@@ -5352,7 +5372,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count,
ve->base.logical_mask |= sibling->logical_mask;
if (n != 0 && ve->base.class != sibling->class) {
DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n",
guc_dbg(guc, "invalid mixing of engine class, sibling %d, already %d\n",
sibling->class, ve->base.class);
err = -EINVAL;
goto err_put;
......
......@@ -6,11 +6,13 @@
#include <linux/string_helpers.h>
#include "gt/intel_gt.h"
#include "gt/intel_gt_print.h"
#include "gt/intel_reset.h"
#include "intel_gsc_fw.h"
#include "intel_gsc_uc.h"
#include "intel_guc.h"
#include "intel_guc_ads.h"
#include "intel_guc_print.h"
#include "intel_guc_submission.h"
#include "gt/intel_rps.h"
#include "intel_uc.h"
......@@ -67,12 +69,12 @@ static int __intel_uc_reset_hw(struct intel_uc *uc)
ret = intel_reset_guc(gt);
if (ret) {
DRM_ERROR("Failed to reset GuC, ret = %d\n", ret);
gt_err(gt, "Failed to reset GuC, ret = %d\n", ret);
return ret;
}
guc_status = intel_uncore_read(gt->uncore, GUC_STATUS);
WARN(!(guc_status & GS_MIA_IN_RESET),
gt_WARN(gt, !(guc_status & GS_MIA_IN_RESET),
"GuC status: 0x%x, MIA core expected to be in reset\n",
guc_status);
......@@ -252,15 +254,13 @@ static int guc_enable_communication(struct intel_guc *guc)
intel_guc_ct_event_handler(&guc->ct);
spin_unlock_irq(gt->irq_lock);
drm_dbg(&i915->drm, "GuC communication enabled\n");
guc_dbg(guc, "communication enabled\n");
return 0;
}
static void guc_disable_communication(struct intel_guc *guc)
{
struct drm_i915_private *i915 = guc_to_gt(guc)->i915;
/*
* Events generated during or after CT disable are logged by guc in
* via mmio. Make sure the register is clear before disabling CT since
......@@ -280,11 +280,12 @@ static void guc_disable_communication(struct intel_guc *guc)
*/
guc_get_mmio_msg(guc);
drm_dbg(&i915->drm, "GuC communication disabled\n");
guc_dbg(guc, "communication disabled\n");
}
static void __uc_fetch_firmwares(struct intel_uc *uc)
{
struct intel_gt *gt = uc_to_gt(uc);
int err;
GEM_BUG_ON(!intel_uc_wants_guc(uc));
......@@ -293,15 +294,13 @@ static void __uc_fetch_firmwares(struct intel_uc *uc)
if (err) {
/* Make sure we transition out of transient "SELECTED" state */
if (intel_uc_wants_huc(uc)) {
drm_dbg(&uc_to_gt(uc)->i915->drm,
"Failed to fetch GuC: %d disabling HuC\n", err);
gt_dbg(gt, "Failed to fetch GuC fw (%pe) disabling HuC\n", ERR_PTR(err));
intel_uc_fw_change_status(&uc->huc.fw,
INTEL_UC_FIRMWARE_ERROR);
}
if (intel_uc_wants_gsc_uc(uc)) {
drm_dbg(&uc_to_gt(uc)->i915->drm,
"Failed to fetch GuC: %d disabling GSC\n", err);
gt_dbg(gt, "Failed to fetch GuC fw (%pe) disabling GSC\n", ERR_PTR(err));
intel_uc_fw_change_status(&uc->gsc.fw,
INTEL_UC_FIRMWARE_ERROR);
}
......@@ -382,7 +381,7 @@ static int uc_init_wopcm(struct intel_uc *uc)
int err;
if (unlikely(!base || !size)) {
i915_probe_error(gt->i915, "Unsuccessful WOPCM partitioning\n");
gt_probe_error(gt, "Unsuccessful WOPCM partitioning\n");
return -E2BIG;
}
......@@ -413,11 +412,11 @@ static int uc_init_wopcm(struct intel_uc *uc)
return 0;
err_out:
i915_probe_error(gt->i915, "Failed to init uC WOPCM registers!\n");
i915_probe_error(gt->i915, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET",
gt_probe_error(gt, "Failed to init uC WOPCM registers!\n");
gt_probe_error(gt, "%s(%#x)=%#x\n", "DMA_GUC_WOPCM_OFFSET",
i915_mmio_reg_offset(DMA_GUC_WOPCM_OFFSET),
intel_uncore_read(uncore, DMA_GUC_WOPCM_OFFSET));
i915_probe_error(gt->i915, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE",
gt_probe_error(gt, "%s(%#x)=%#x\n", "GUC_WOPCM_SIZE",
i915_mmio_reg_offset(GUC_WOPCM_SIZE),
intel_uncore_read(uncore, GUC_WOPCM_SIZE));
......@@ -449,11 +448,9 @@ static int __uc_check_hw(struct intel_uc *uc)
return 0;
}
static void print_fw_ver(struct intel_uc *uc, struct intel_uc_fw *fw)
static void print_fw_ver(struct intel_gt *gt, struct intel_uc_fw *fw)
{
struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
drm_info(&i915->drm, "%s firmware %s version %u.%u.%u\n",
gt_info(gt, "%s firmware %s version %u.%u.%u\n",
intel_uc_fw_type_repr(fw->type), fw->file_selected.path,
fw->file_selected.ver.major,
fw->file_selected.ver.minor,
......@@ -462,7 +459,8 @@ static void print_fw_ver(struct intel_uc *uc, struct intel_uc_fw *fw)
static int __uc_init_hw(struct intel_uc *uc)
{
struct drm_i915_private *i915 = uc_to_gt(uc)->i915;
struct intel_gt *gt = uc_to_gt(uc);
struct drm_i915_private *i915 = gt->i915;
struct intel_guc *guc = &uc->guc;
struct intel_huc *huc = &uc->huc;
int ret, attempts;
......@@ -470,10 +468,10 @@ static int __uc_init_hw(struct intel_uc *uc)
GEM_BUG_ON(!intel_uc_supports_guc(uc));
GEM_BUG_ON(!intel_uc_wants_guc(uc));
print_fw_ver(uc, &guc->fw);
print_fw_ver(gt, &guc->fw);
if (intel_uc_uses_huc(uc))
print_fw_ver(uc, &huc->fw);
print_fw_ver(gt, &huc->fw);
if (!intel_uc_fw_is_loadable(&guc->fw)) {
ret = __uc_check_hw(uc) ||
......@@ -514,8 +512,8 @@ static int __uc_init_hw(struct intel_uc *uc)
if (ret == 0)
break;
DRM_DEBUG_DRIVER("GuC fw load failed: %d; will reset and "
"retry %d more time(s)\n", ret, attempts);
gt_dbg(gt, "GuC fw load failed (%pe) will reset and retry %d more time(s)\n",
ERR_PTR(ret), attempts);
}
/* Did we succeded or run out of retries? */
......@@ -551,9 +549,9 @@ static int __uc_init_hw(struct intel_uc *uc)
intel_gsc_uc_load_start(&uc->gsc);
drm_info(&i915->drm, "GuC submission %s\n",
gt_info(gt, "GuC submission %s\n",
str_enabled_disabled(intel_uc_uses_guc_submission(uc)));
drm_info(&i915->drm, "GuC SLPC %s\n",
gt_info(gt, "GuC SLPC %s\n",
str_enabled_disabled(intel_uc_uses_guc_slpc(uc)));
return 0;
......@@ -572,12 +570,12 @@ static int __uc_init_hw(struct intel_uc *uc)
__uc_sanitize(uc);
if (!ret) {
drm_notice(&i915->drm, "GuC is uninitialized\n");
gt_notice(gt, "GuC is uninitialized\n");
/* We want to run without GuC submission */
return 0;
}
i915_probe_error(i915, "GuC initialization failed %d\n", ret);
gt_probe_error(gt, "GuC initialization failed %pe\n", ERR_PTR(ret));
/* We want to keep KMS alive */
return -EIO;
......@@ -690,7 +688,7 @@ void intel_uc_suspend(struct intel_uc *uc)
with_intel_runtime_pm(&uc_to_gt(uc)->i915->runtime_pm, wakeref) {
err = intel_guc_suspend(guc);
if (err)
DRM_DEBUG_DRIVER("Failed to suspend GuC, err=%d", err);
guc_dbg(guc, "Failed to suspend, %pe", ERR_PTR(err));
}
}
......@@ -718,7 +716,7 @@ static int __uc_resume(struct intel_uc *uc, bool enable_communication)
err = intel_guc_resume(guc);
if (err) {
DRM_DEBUG_DRIVER("Failed to resume GuC, err=%d", err);
guc_dbg(guc, "Failed to resume, %pe", ERR_PTR(err));
return err;
}
......
......@@ -696,6 +696,10 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
(IS_METEORLAKE(__i915) && \
IS_DISPLAY_STEP(__i915, since, until))
#define IS_MTL_MEDIA_STEP(__i915, since, until) \
(IS_METEORLAKE(__i915) && \
IS_MEDIA_STEP(__i915, since, until))
/*
* DG2 hardware steppings are a bit unusual. The hardware design was forked to
* create three variants (G10, G11, and G12) which each have distinct
......
......@@ -1370,14 +1370,14 @@ static void engine_record_execlists(struct intel_engine_coredump *ee)
}
static bool record_context(struct i915_gem_context_coredump *e,
const struct i915_request *rq)
struct intel_context *ce)
{
struct i915_gem_context *ctx;
struct task_struct *task;
bool simulated;
rcu_read_lock();
ctx = rcu_dereference(rq->context->gem_context);
ctx = rcu_dereference(ce->gem_context);
if (ctx && !kref_get_unless_zero(&ctx->ref))
ctx = NULL;
rcu_read_unlock();
......@@ -1396,8 +1396,8 @@ static bool record_context(struct i915_gem_context_coredump *e,
e->guilty = atomic_read(&ctx->guilty_count);
e->active = atomic_read(&ctx->active_count);
e->total_runtime = intel_context_get_total_runtime_ns(rq->context);
e->avg_runtime = intel_context_get_avg_runtime_ns(rq->context);
e->total_runtime = intel_context_get_total_runtime_ns(ce);
e->avg_runtime = intel_context_get_avg_runtime_ns(ce);
simulated = i915_gem_context_no_error_capture(ctx);
......@@ -1532,15 +1532,37 @@ intel_engine_coredump_alloc(struct intel_engine_cs *engine, gfp_t gfp, u32 dump_
return ee;
}
static struct intel_engine_capture_vma *
engine_coredump_add_context(struct intel_engine_coredump *ee,
struct intel_context *ce,
gfp_t gfp)
{
struct intel_engine_capture_vma *vma = NULL;
ee->simulated |= record_context(&ee->context, ce);
if (ee->simulated)
return NULL;
/*
* We need to copy these to an anonymous buffer
* as the simplest method to avoid being overwritten
* by userspace.
*/
vma = capture_vma(vma, ce->ring->vma, "ring", gfp);
vma = capture_vma(vma, ce->state, "HW context", gfp);
return vma;
}
struct intel_engine_capture_vma *
intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
struct i915_request *rq,
gfp_t gfp)
{
struct intel_engine_capture_vma *vma = NULL;
struct intel_engine_capture_vma *vma;
ee->simulated |= record_context(&ee->context, rq);
if (ee->simulated)
vma = engine_coredump_add_context(ee, rq->context, gfp);
if (!vma)
return NULL;
/*
......@@ -1550,8 +1572,6 @@ intel_engine_coredump_add_request(struct intel_engine_coredump *ee,
*/
vma = capture_vma_snapshot(vma, rq->batch_res, gfp, "batch");
vma = capture_user(vma, rq, gfp);
vma = capture_vma(vma, rq->ring->vma, "ring", gfp);
vma = capture_vma(vma, rq->context->state, "HW context", gfp);
ee->rq_head = rq->head;
ee->rq_post = rq->postfix;
......@@ -1596,54 +1616,36 @@ capture_engine(struct intel_engine_cs *engine,
{
struct intel_engine_capture_vma *capture = NULL;
struct intel_engine_coredump *ee;
struct intel_context *ce;
struct intel_context *ce = NULL;
struct i915_request *rq = NULL;
unsigned long flags;
ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL, dump_flags);
if (!ee)
return NULL;
ce = intel_engine_get_hung_context(engine);
if (ce) {
intel_engine_clear_hung_context(engine);
rq = intel_context_find_active_request(ce);
if (!rq || !i915_request_started(rq))
goto no_request_capture;
} else {
/*
* Getting here with GuC enabled means it is a forced error capture
* with no actual hang. So, no need to attempt the execlist search.
*/
if (!intel_uc_uses_guc_submission(&engine->gt->uc)) {
spin_lock_irqsave(&engine->sched_engine->lock, flags);
rq = intel_engine_execlist_find_hung_request(engine);
spin_unlock_irqrestore(&engine->sched_engine->lock,
flags);
}
}
if (rq)
rq = i915_request_get_rcu(rq);
if (!rq)
goto no_request_capture;
intel_engine_get_hung_entity(engine, &ce, &rq);
if (rq && !i915_request_started(rq))
drm_info(&engine->gt->i915->drm, "Got hung context on %s with active request %lld:%lld [0x%04X] not yet started\n",
engine->name, rq->fence.context, rq->fence.seqno, ce->guc_id.id);
if (rq) {
capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL);
if (!capture) {
i915_request_put(rq);
goto no_request_capture;
} else if (ce) {
capture = engine_coredump_add_context(ee, ce, ATOMIC_MAYFAIL);
}
if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
intel_guc_capture_get_matching_node(engine->gt, ee, ce);
if (capture) {
intel_engine_coredump_add_vma(ee, capture, compress);
i915_request_put(rq);
return ee;
no_request_capture:
if (dump_flags & CORE_DUMP_FLAG_IS_GUC_CAPTURE)
intel_guc_capture_get_matching_node(engine->gt, ee, ce);
} else {
kfree(ee);
return NULL;
ee = NULL;
}
return ee;
}
static void
......
......@@ -94,7 +94,7 @@ struct intel_engine_coredump {
struct intel_instdone instdone;
/* GuC matched capture-lists info */
struct intel_guc_state_capture *capture;
struct intel_guc_state_capture *guc_capture;
struct __guc_capture_parsed_output *guc_capture_node;
struct i915_gem_context_coredump {
......
......@@ -8114,10 +8114,6 @@ enum skl_power_gate {
#define CLKGATE_DIS_MISC _MMIO(0x46534)
#define CLKGATE_DIS_MISC_DMASC_GATING_DIS REG_BIT(21)
#define GEN12_CULLBIT1 _MMIO(0x6100)
#define GEN12_CULLBIT2 _MMIO(0x7030)
#define GEN12_STATE_ACK_DEBUG _MMIO(0x20BC)
#define _MTL_CLKGATE_DIS_TRANS_A 0x604E8
#define _MTL_CLKGATE_DIS_TRANS_B 0x614E8
#define MTL_CLKGATE_DIS_TRANS(trans) _MMIO_TRANS2(trans, _MTL_CLKGATE_DIS_TRANS_A)
......
......@@ -14,8 +14,13 @@
int igt_flush_test(struct drm_i915_private *i915)
{
struct intel_gt *gt = to_gt(i915);
int ret = intel_gt_is_wedged(gt) ? -EIO : 0;
struct intel_gt *gt;
unsigned int i;
int ret = 0;
for_each_gt(gt, i915, i) {
if (intel_gt_is_wedged(gt))
ret = -EIO;
cond_resched();
......@@ -30,6 +35,7 @@ int igt_flush_test(struct drm_i915_private *i915)
intel_gt_set_wedged(gt);
ret = -EIO;
}
}
return ret;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment