Commit 79a899e3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'drm-fixes-2024-08-24' of https://gitlab.freedesktop.org/drm/kernel

Pull drm fixes from Dave Airlie:
 "Weekly fixes. xe and msm are the major groups, with
  amdgpu/i915/nouveau having smaller bits. xe has a bunch of hw
  workaround fixes that were found to be missing, so that is why there
  are a bunch of scattered fixes, and one larger one. But overall size
  doesn't look too out of the ordinary.

  msm:
   - virtual plane fixes:
      - drop yuv on hw where not supported
      - csc vs yuv format fix
      - rotation fix
   - fix fb cleanup on close
   - reset phy before link training
   - fix visual corruption at 4K
   - fix NULL ptr crash on hotplug
   - simplify debug macros
   - sc7180 fix
   - adreno firmware name error path fix

  amdgpu:
   - GFX10 firmware loading fix
   - SDMA 5.2 fix
   - Debugfs parameter validation fix
   - eGPU hotplug fix

  i915:
   - fix HDCP timeouts

  nouveau:
   - fix SG_DEBUG crash

  xe:
   - Fix OA format masks which were breaking build with gcc-5
   - Fix opregion leak (Lucas)
   - Fix OA sysfs entry (Ashutosh)
   - Fix VM dma-resv lock (Brost)
   - Fix tile fini sequence (Brost)
   - Prevent UAF around preempt fence (Auld)
   - Fix DGFX display suspend/resume (Maarten)
   - Many Xe/Xe2 critical workarounds (Auld, Ngai-Mint, Bommu, Tejas, Daniele)
   - Fix devm/drmm issues (Daniele)
   - Fix missing workqueue destroy in xe_gt_pagefault (Stuart)
   - Drop HW fence pointer to HW fence ctx (Brost)
   - Free job before xe_exec_queue_put (Brost)"

* tag 'drm-fixes-2024-08-24' of https://gitlab.freedesktop.org/drm/kernel: (35 commits)
  drm/xe: Free job before xe_exec_queue_put
  drm/xe: Drop HW fence pointer to HW fence ctx
  drm/xe: Fix missing workqueue destroy in xe_gt_pagefault
  drm/amdgpu: fix eGPU hotplug regression
  drm/amdgpu: Validate TA binary size
  drm/amdgpu/sdma5.2: limit wptr workaround to sdma 5.2.1
  drm/amdgpu: fixing rlc firmware loading failure issue
  drm/xe/uc: Use devm to register cleanup that includes exec_queues
  drm/xe: use devm instead of drmm for managed bo
  drm/xe/xe2hpg: Add Wa_14021821874
  drm/xe: fix WA 14018094691
  drm/xe/xe2: Add Wa_15015404425
  drm/xe/xe2: Make subsequent L2 flush sequential
  drm/xe/xe2lpg: Extend workaround 14021402888
  drm/xe/xe2lpm: Extend Wa_16021639441
  drm/xe/bmg: implement Wa_16023588340
  drm/xe/oa/uapi: Make bit masks unsigned
  drm/xe/display: Make display suspend/resume work on discrete
  drm/xe: prevent UAF around preempt fence
  drm/xe: Fix tile fini sequence
  ...
parents d5afaf91 76f46186
......@@ -278,7 +278,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev,
msg = RREG32(mmMP0_SMN_C2PMSG_33);
if (msg & 0x80000000)
break;
usleep_range(1000, 1100);
msleep(1);
}
}
......
......@@ -166,6 +166,9 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t
if (ret)
return -EFAULT;
if (ta_bin_len > PSP_1_MEG)
return -EINVAL;
copy_pos += sizeof(uint32_t);
ta_bin = kzalloc(ta_bin_len, GFP_KERNEL);
......
......@@ -4116,6 +4116,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev)
static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
{
char fw_name[53];
char ucode_prefix[30];
const char *wks = "";
int err;
......@@ -4149,8 +4150,8 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE);
if (!amdgpu_sriov_vf(adev)) {
err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw,
"amdgpu/%s_rlc.bin", ucode_prefix);
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix);
err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
if (err)
goto out;
......
......@@ -176,6 +176,7 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring)
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(5, 2, 1)) {
/* SDMA seems to miss doorbells sometimes when powergating kicks in.
* Updating the wptr directly will wake it. This is only safe because
* we disallow gfxoff in begin_use() and then allow it again in end_use().
......@@ -184,6 +185,7 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring)
lower_32_bits(ring->wptr << 2));
WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
upper_32_bits(ring->wptr << 2));
}
} else {
DRM_DEBUG("Not using doorbell -- "
"mmSDMA%i_GFX_RB_WPTR == 0x%08x "
......
......@@ -39,7 +39,9 @@ static u32 transcoder_to_stream_enc_status(enum transcoder cpu_transcoder)
static void intel_dp_hdcp_wait_for_cp_irq(struct intel_connector *connector,
int timeout)
{
struct intel_hdcp *hdcp = &connector->hdcp;
struct intel_digital_port *dig_port = intel_attached_dig_port(connector);
struct intel_dp *dp = &dig_port->dp;
struct intel_hdcp *hdcp = &dp->attached_connector->hdcp;
long ret;
#define C (hdcp->cp_irq_count_cached != atomic_read(&hdcp->cp_irq_count))
......
......@@ -99,7 +99,7 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname,
* was a bad idea, and is only provided for backwards
* compatibility for older targets.
*/
return -ENODEV;
return -ENOENT;
}
if (IS_ERR(fw)) {
......
......@@ -1171,8 +1171,6 @@ static void dpu_encoder_virt_atomic_mode_set(struct drm_encoder *drm_enc,
cstate->num_mixers = num_lm;
dpu_enc->connector = conn_state->connector;
for (i = 0; i < dpu_enc->num_phys_encs; i++) {
struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i];
......@@ -1270,6 +1268,8 @@ static void dpu_encoder_virt_atomic_enable(struct drm_encoder *drm_enc,
dpu_enc->commit_done_timedout = false;
dpu_enc->connector = drm_atomic_get_new_connector_for_encoder(state, drm_enc);
cur_mode = &dpu_enc->base.crtc->state->adjusted_mode;
dpu_enc->wide_bus_en = dpu_encoder_is_widebus_enabled(drm_enc);
......
......@@ -308,8 +308,8 @@ static const u32 wb2_formats_rgb_yuv[] = {
{ \
.maxdwnscale = SSPP_UNITY_SCALE, \
.maxupscale = SSPP_UNITY_SCALE, \
.format_list = plane_formats_yuv, \
.num_formats = ARRAY_SIZE(plane_formats_yuv), \
.format_list = plane_formats, \
.num_formats = ARRAY_SIZE(plane_formats), \
.virt_format_list = plane_formats, \
.virt_num_formats = ARRAY_SIZE(plane_formats), \
}
......
......@@ -31,24 +31,14 @@
* @fmt: Pointer to format string
*/
#define DPU_DEBUG(fmt, ...) \
do { \
if (drm_debug_enabled(DRM_UT_KMS)) \
DRM_DEBUG(fmt, ##__VA_ARGS__); \
else \
pr_debug(fmt, ##__VA_ARGS__); \
} while (0)
DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
/**
* DPU_DEBUG_DRIVER - macro for hardware driver logging
* @fmt: Pointer to format string
*/
#define DPU_DEBUG_DRIVER(fmt, ...) \
do { \
if (drm_debug_enabled(DRM_UT_DRIVER)) \
DRM_ERROR(fmt, ##__VA_ARGS__); \
else \
pr_debug(fmt, ##__VA_ARGS__); \
} while (0)
DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
#define DPU_ERROR(fmt, ...) pr_err("[dpu error]" fmt, ##__VA_ARGS__)
#define DPU_ERROR_RATELIMITED(fmt, ...) pr_err_ratelimited("[dpu error]" fmt, ##__VA_ARGS__)
......
......@@ -681,6 +681,9 @@ static int dpu_plane_prepare_fb(struct drm_plane *plane,
new_state->fb, &layout);
if (ret) {
DPU_ERROR_PLANE(pdpu, "failed to get format layout, %d\n", ret);
if (pstate->aspace)
msm_framebuffer_cleanup(new_state->fb, pstate->aspace,
pstate->needs_dirtyfb);
return ret;
}
......@@ -744,10 +747,9 @@ static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu,
min_src_size = MSM_FORMAT_IS_YUV(fmt) ? 2 : 1;
if (MSM_FORMAT_IS_YUV(fmt) &&
(!pipe->sspp->cap->sblk->scaler_blk.len ||
!pipe->sspp->cap->sblk->csc_blk.len)) {
!pipe->sspp->cap->sblk->csc_blk.len) {
DPU_DEBUG_PLANE(pdpu,
"plane doesn't have scaler/csc for yuv\n");
"plane doesn't have csc for yuv\n");
return -EINVAL;
}
......@@ -864,6 +866,10 @@ static int dpu_plane_atomic_check(struct drm_plane *plane,
max_linewidth = pdpu->catalog->caps->max_linewidth;
drm_rect_rotate(&pipe_cfg->src_rect,
new_plane_state->fb->width, new_plane_state->fb->height,
new_plane_state->rotation);
if ((drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) ||
_dpu_plane_calc_clk(&crtc_state->adjusted_mode, pipe_cfg) > max_mdp_clk_rate) {
/*
......@@ -913,6 +919,14 @@ static int dpu_plane_atomic_check(struct drm_plane *plane,
r_pipe_cfg->dst_rect.x1 = pipe_cfg->dst_rect.x2;
}
drm_rect_rotate_inv(&pipe_cfg->src_rect,
new_plane_state->fb->width, new_plane_state->fb->height,
new_plane_state->rotation);
if (r_pipe->sspp)
drm_rect_rotate_inv(&r_pipe_cfg->src_rect,
new_plane_state->fb->width, new_plane_state->fb->height,
new_plane_state->rotation);
ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, fmt, &crtc_state->adjusted_mode);
if (ret)
return ret;
......
......@@ -1286,6 +1286,8 @@ static int dp_ctrl_link_train(struct dp_ctrl_private *ctrl,
link_info.rate = ctrl->link->link_params.rate;
link_info.capabilities = DP_LINK_CAP_ENHANCED_FRAMING;
dp_link_reset_phy_params_vx_px(ctrl->link);
dp_aux_link_configure(ctrl->aux, &link_info);
if (drm_dp_max_downspread(dpcd))
......
......@@ -90,22 +90,22 @@ static int dp_panel_read_dpcd(struct dp_panel *dp_panel)
static u32 dp_panel_get_supported_bpp(struct dp_panel *dp_panel,
u32 mode_edid_bpp, u32 mode_pclk_khz)
{
struct dp_link_info *link_info;
const struct dp_link_info *link_info;
const u32 max_supported_bpp = 30, min_supported_bpp = 18;
u32 bpp = 0, data_rate_khz = 0;
u32 bpp, data_rate_khz;
bpp = min_t(u32, mode_edid_bpp, max_supported_bpp);
bpp = min(mode_edid_bpp, max_supported_bpp);
link_info = &dp_panel->link_info;
data_rate_khz = link_info->num_lanes * link_info->rate * 8;
while (bpp > min_supported_bpp) {
do {
if (mode_pclk_khz * bpp <= data_rate_khz)
break;
return bpp;
bpp -= 6;
}
} while (bpp > min_supported_bpp);
return bpp;
return min_supported_bpp;
}
int dp_panel_read_sink_caps(struct dp_panel *dp_panel,
......@@ -423,8 +423,9 @@ int dp_panel_init_panel_info(struct dp_panel *dp_panel)
drm_mode->clock);
drm_dbg_dp(panel->drm_dev, "bpp = %d\n", dp_panel->dp_mode.bpp);
dp_panel->dp_mode.bpp = max_t(u32, 18,
min_t(u32, dp_panel->dp_mode.bpp, 30));
dp_panel->dp_mode.bpp = dp_panel_get_mode_bpp(dp_panel, dp_panel->dp_mode.bpp,
dp_panel->dp_mode.drm_mode.clock);
drm_dbg_dp(panel->drm_dev, "updated bpp = %d\n",
dp_panel->dp_mode.bpp);
......
......@@ -577,7 +577,7 @@ static const struct msm_mdss_data sc7180_data = {
.ubwc_enc_version = UBWC_2_0,
.ubwc_dec_version = UBWC_2_0,
.ubwc_static = 0x1e,
.highest_bank_bit = 0x3,
.highest_bank_bit = 0x1,
.reg_bus_bw = 76800,
};
......
......@@ -205,7 +205,8 @@ nvkm_firmware_dtor(struct nvkm_firmware *fw)
break;
case NVKM_FIRMWARE_IMG_DMA:
nvkm_memory_unref(&memory);
dma_free_coherent(fw->device->dev, sg_dma_len(&fw->mem.sgl), fw->img, fw->phys);
dma_free_noncoherent(fw->device->dev, sg_dma_len(&fw->mem.sgl),
fw->img, fw->phys, DMA_TO_DEVICE);
break;
case NVKM_FIRMWARE_IMG_SGT:
nvkm_memory_unref(&memory);
......@@ -236,10 +237,12 @@ nvkm_firmware_ctor(const struct nvkm_firmware_func *func, const char *name,
break;
case NVKM_FIRMWARE_IMG_DMA: {
dma_addr_t addr;
len = ALIGN(fw->len, PAGE_SIZE);
fw->img = dma_alloc_coherent(fw->device->dev, len, &addr, GFP_KERNEL);
fw->img = dma_alloc_noncoherent(fw->device->dev,
len, &addr,
DMA_TO_DEVICE,
GFP_KERNEL);
if (fw->img) {
memcpy(fw->img, src, fw->len);
fw->phys = addr;
......
......@@ -89,6 +89,12 @@ nvkm_falcon_fw_boot(struct nvkm_falcon_fw *fw, struct nvkm_subdev *user,
nvkm_falcon_fw_dtor_sigs(fw);
}
/* after last write to the img, sync dma mappings */
dma_sync_single_for_device(fw->fw.device->dev,
fw->fw.phys,
sg_dma_len(&fw->fw.mem.sgl),
DMA_TO_DEVICE);
FLCNFW_DBG(fw, "resetting");
fw->func->reset(fw);
......
......@@ -25,12 +25,14 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
uses_generated_oob := \
$(obj)/xe_ggtt.o \
$(obj)/xe_device.o \
$(obj)/xe_gsc.o \
$(obj)/xe_gt.o \
$(obj)/xe_guc.o \
$(obj)/xe_guc_ads.o \
$(obj)/xe_guc_pc.o \
$(obj)/xe_migrate.o \
$(obj)/xe_pat.o \
$(obj)/xe_ring_ops.o \
$(obj)/xe_vm.o \
$(obj)/xe_wa.o \
......
......@@ -132,6 +132,7 @@ static void xe_display_fini_noirq(void *arg)
return;
intel_display_driver_remove_noirq(xe);
intel_opregion_cleanup(xe);
}
int xe_display_init_noirq(struct xe_device *xe)
......@@ -157,8 +158,10 @@ int xe_display_init_noirq(struct xe_device *xe)
intel_display_device_info_runtime_init(xe);
err = intel_display_driver_probe_noirq(xe);
if (err)
if (err) {
intel_opregion_cleanup(xe);
return err;
}
return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_noirq, xe);
}
......@@ -280,6 +283,27 @@ static bool suspend_to_idle(void)
return false;
}
static void xe_display_flush_cleanup_work(struct xe_device *xe)
{
struct intel_crtc *crtc;
for_each_intel_crtc(&xe->drm, crtc) {
struct drm_crtc_commit *commit;
spin_lock(&crtc->base.commit_lock);
commit = list_first_entry_or_null(&crtc->base.commit_list,
struct drm_crtc_commit, commit_entry);
if (commit)
drm_crtc_commit_get(commit);
spin_unlock(&crtc->base.commit_lock);
if (commit) {
wait_for_completion(&commit->cleanup_done);
drm_crtc_commit_put(commit);
}
}
}
void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
{
bool s2idle = suspend_to_idle();
......@@ -297,6 +321,8 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
if (!runtime)
intel_display_driver_suspend(xe);
xe_display_flush_cleanup_work(xe);
intel_dp_mst_suspend(xe);
intel_hpd_cancel_work(xe);
......
......@@ -7,6 +7,8 @@
#include "intel_display_types.h"
#include "intel_dsb_buffer.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_device_types.h"
#include "xe_gt.h"
u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
......@@ -16,7 +18,10 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val)
{
struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
xe_device_l2_flush(xe);
}
u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
......@@ -26,9 +31,12 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size)
{
struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf));
iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size);
xe_device_l2_flush(xe);
}
bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size)
......
......@@ -10,6 +10,7 @@
#include "intel_fb.h"
#include "intel_fb_pin.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_ggtt.h"
#include "xe_gt.h"
#include "xe_pm.h"
......@@ -304,6 +305,8 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
if (ret)
goto err_unpin;
/* Ensure DPT writes are flushed */
xe_device_l2_flush(xe);
return vma;
err_unpin:
......
......@@ -80,6 +80,9 @@
#define LE_CACHEABILITY_MASK REG_GENMASK(1, 0)
#define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value)
#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194)
#define CG_DIS_CNTLBUS REG_BIT(6)
#define CCS_AUX_INV XE_REG(0x4208)
#define VD0_AUX_INV XE_REG(0x4218)
......@@ -372,6 +375,11 @@
#define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8)
#define XE2_GLOBAL_INVAL XE_REG(0xb404)
#define SCRATCH1LPFC XE_REG(0xb474)
#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0)
#define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658)
#define XE2_TDF_CTRL XE_REG(0xb418)
......@@ -429,6 +437,7 @@
#define DIS_FIX_EOT1_FLUSH REG_BIT(9)
#define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED)
#define STK_ID_RESTRICT REG_BIT(12)
#define SLM_WMTP_RESTORE REG_BIT(11)
#define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED)
......
......@@ -1575,7 +1575,7 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
return bo;
}
static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg)
static void __xe_bo_unpin_map_no_vm(void *arg)
{
xe_bo_unpin_map_no_vm(arg);
}
......@@ -1590,7 +1590,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile
if (IS_ERR(bo))
return bo;
ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo);
ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo);
if (ret)
return ERR_PTR(ret);
......@@ -1638,7 +1638,7 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str
if (IS_ERR(bo))
return PTR_ERR(bo);
drmm_release_action(&xe->drm, __xe_bo_unpin_map_no_vm, *src);
devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src);
*src = bo;
return 0;
......
......@@ -54,6 +54,9 @@
#include "xe_vm.h"
#include "xe_vram.h"
#include "xe_wait_user_fence.h"
#include "xe_wa.h"
#include <generated/xe_wa_oob.h>
static int xe_file_open(struct drm_device *dev, struct drm_file *file)
{
......@@ -820,6 +823,11 @@ void xe_device_td_flush(struct xe_device *xe)
if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
return;
if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
xe_device_l2_flush(xe);
return;
}
for_each_gt(gt, xe, id) {
if (xe_gt_is_media_type(gt))
continue;
......@@ -843,6 +851,30 @@ void xe_device_td_flush(struct xe_device *xe)
}
}
void xe_device_l2_flush(struct xe_device *xe)
{
struct xe_gt *gt;
int err;
gt = xe_root_mmio_gt(xe);
if (!XE_WA(gt, 16023588340))
return;
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (err)
return;
spin_lock(&gt->global_invl_lock);
xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1);
if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true))
xe_gt_err_once(gt, "Global invalidation timeout\n");
spin_unlock(&gt->global_invl_lock);
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
}
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
{
return xe_device_has_flat_ccs(xe) ?
......
......@@ -162,6 +162,7 @@ u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address);
u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address);
void xe_device_td_flush(struct xe_device *xe);
void xe_device_l2_flush(struct xe_device *xe);
static inline bool xe_device_wedged(struct xe_device *xe)
{
......
......@@ -105,22 +105,35 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
static int __xe_exec_queue_init(struct xe_exec_queue *q)
{
struct xe_vm *vm = q->vm;
int i, err;
if (vm) {
err = xe_vm_lock(vm, true);
if (err)
return err;
}
for (i = 0; i < q->width; ++i) {
q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K);
if (IS_ERR(q->lrc[i])) {
err = PTR_ERR(q->lrc[i]);
goto err_lrc;
goto err_unlock;
}
}
if (vm)
xe_vm_unlock(vm);
err = q->ops->init(q);
if (err)
goto err_lrc;
return 0;
err_unlock:
if (vm)
xe_vm_unlock(vm);
err_lrc:
for (i = i - 1; i >= 0; --i)
xe_lrc_put(q->lrc[i]);
......@@ -140,15 +153,7 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v
if (IS_ERR(q))
return q;
if (vm) {
err = xe_vm_lock(vm, true);
if (err)
goto err_post_alloc;
}
err = __xe_exec_queue_init(q);
if (vm)
xe_vm_unlock(vm);
if (err)
goto err_post_alloc;
......@@ -638,7 +643,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
if (xe_vm_in_preempt_fence_mode(vm)) {
q->lr.context = dma_fence_context_alloc(1);
spin_lock_init(&q->lr.lock);
err = xe_vm_add_compute_exec_queue(vm, q);
if (XE_IOCTL_DBG(xe, err))
......
......@@ -126,8 +126,6 @@ struct xe_exec_queue {
u32 seqno;
/** @lr.link: link into VM's list of exec queues */
struct list_head link;
/** @lr.lock: preemption fences lock */
spinlock_t lock;
} lr;
/** @ops: submission backend exec queue operations */
......
......@@ -260,7 +260,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
struct xe_tile *tile = gt_to_tile(gt);
int ret;
if (XE_WA(gt, 14018094691)) {
if (XE_WA(tile->primary_gt, 14018094691)) {
ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
/*
......@@ -278,7 +278,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
ret = gsc_upload(gsc);
if (XE_WA(gt, 14018094691))
if (XE_WA(tile->primary_gt, 14018094691))
xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
if (ret)
......@@ -437,7 +437,7 @@ int xe_gsc_init(struct xe_gsc *gsc)
return ret;
}
static void free_resources(struct drm_device *drm, void *arg)
static void free_resources(void *arg)
{
struct xe_gsc *gsc = arg;
......@@ -501,7 +501,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
gsc->q = q;
gsc->wq = wq;
err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc);
err = devm_add_action_or_reset(xe->drm.dev, free_resources, gsc);
if (err)
return err;
......
......@@ -11,6 +11,8 @@
#include <drm/xe_drm.h>
#include <generated/xe_wa_oob.h>
#include <generated/xe_wa_oob.h>
#include "instructions/xe_gfxpipe_commands.h"
#include "instructions/xe_mi_commands.h"
#include "regs/xe_gt_regs.h"
......@@ -95,6 +97,51 @@ void xe_gt_sanitize(struct xe_gt *gt)
gt->uc.guc.submission_state.enabled = false;
}
static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
{
u32 reg;
int err;
if (!XE_WA(gt, 16023588340))
return;
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (WARN_ON(err))
return;
if (!xe_gt_is_media_type(gt)) {
xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH);
reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
reg |= CG_DIS_CNTLBUS;
xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
}
xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
}
static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
{
u32 reg;
int err;
if (!XE_WA(gt, 16023588340))
return;
if (xe_gt_is_media_type(gt))
return;
err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (WARN_ON(err))
return;
reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL);
reg &= ~CG_DIS_CNTLBUS;
xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg);
xe_force_wake_put(gt_to_fw(gt), XE_FW_GT);
}
/**
* xe_gt_remove() - Clean up the GT structures before driver removal
* @gt: the GT object
......@@ -111,6 +158,8 @@ void xe_gt_remove(struct xe_gt *gt)
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
xe_gt_disable_host_l2_vram(gt);
}
static void gt_reset_worker(struct work_struct *w);
......@@ -339,6 +388,7 @@ int xe_gt_init_early(struct xe_gt *gt)
xe_force_wake_init_gt(gt, gt_to_fw(gt));
xe_pcode_init(gt);
spin_lock_init(&gt->global_invl_lock);
return 0;
}
......@@ -508,6 +558,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt)
xe_gt_mcr_init_early(gt);
xe_pat_init(gt);
xe_gt_enable_host_l2_vram(gt);
err = xe_uc_init(&gt->uc);
if (err)
......@@ -643,6 +694,8 @@ static int do_gt_restart(struct xe_gt *gt)
xe_pat_init(gt);
xe_gt_enable_host_l2_vram(gt);
xe_gt_mcr_set_implicit_defaults(gt);
xe_reg_sr_apply_mmio(&gt->reg_sr, gt);
......@@ -796,6 +849,8 @@ int xe_gt_suspend(struct xe_gt *gt)
xe_gt_idle_disable_pg(gt);
xe_gt_disable_host_l2_vram(gt);
XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
xe_gt_dbg(gt, "suspended\n");
......
......@@ -382,6 +382,18 @@ static void pf_queue_work_func(struct work_struct *w)
static void acc_queue_work_func(struct work_struct *w);
static void pagefault_fini(void *arg)
{
struct xe_gt *gt = arg;
struct xe_device *xe = gt_to_xe(gt);
if (!xe->info.has_usm)
return;
destroy_workqueue(gt->usm.acc_wq);
destroy_workqueue(gt->usm.pf_wq);
}
int xe_gt_pagefault_init(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
......@@ -409,10 +421,12 @@ int xe_gt_pagefault_init(struct xe_gt *gt)
gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue",
WQ_UNBOUND | WQ_HIGHPRI,
NUM_ACC_QUEUE);
if (!gt->usm.acc_wq)
if (!gt->usm.acc_wq) {
destroy_workqueue(gt->usm.pf_wq);
return -ENOMEM;
}
return 0;
return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt);
}
void xe_gt_pagefault_reset(struct xe_gt *gt)
......
......@@ -362,6 +362,12 @@ struct xe_gt {
*/
spinlock_t mcr_lock;
/**
* @global_invl_lock: protects the register for the duration
* of a global invalidation of l2 cache
*/
spinlock_t global_invl_lock;
/** @wa_active: keep track of active workarounds */
struct {
/** @wa_active.gt: bitmap with active GT workarounds */
......
......@@ -284,7 +284,7 @@ static void guc_submit_fini(struct drm_device *drm, void *arg)
free_submit_wq(guc);
}
static void guc_submit_wedged_fini(struct drm_device *drm, void *arg)
static void guc_submit_wedged_fini(void *arg)
{
struct xe_guc *guc = arg;
struct xe_exec_queue *q;
......@@ -877,7 +877,7 @@ void xe_guc_submit_wedge(struct xe_guc *guc)
xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode);
err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm,
err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev,
guc_submit_wedged_fini, guc);
if (err) {
drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n");
......
......@@ -148,20 +148,20 @@ static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence)
{
struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev);
return dev_name(fence->xe->drm.dev);
}
static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence)
{
struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
return fence->ctx->name;
return fence->name;
}
static bool xe_hw_fence_signaled(struct dma_fence *dma_fence)
{
struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence);
struct xe_device *xe = gt_to_xe(fence->ctx->gt);
struct xe_device *xe = fence->xe;
u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32);
return dma_fence->error ||
......@@ -253,7 +253,8 @@ void xe_hw_fence_init(struct dma_fence *fence, struct xe_hw_fence_ctx *ctx,
struct xe_hw_fence *hw_fence =
container_of(fence, typeof(*hw_fence), dma);
hw_fence->ctx = ctx;
hw_fence->xe = gt_to_xe(ctx->gt);
snprintf(hw_fence->name, sizeof(hw_fence->name), "%s", ctx->name);
hw_fence->seqno_map = seqno_map;
INIT_LIST_HEAD(&hw_fence->irq_link);
......
......@@ -12,6 +12,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
struct xe_device;
struct xe_gt;
/**
......@@ -61,8 +62,10 @@ struct xe_hw_fence_ctx {
struct xe_hw_fence {
/** @dma: base dma fence for hardware fence context */
struct dma_fence dma;
/** @ctx: hardware fence context */
struct xe_hw_fence_ctx *ctx;
/** @xe: Xe device for hw fence driver name */
struct xe_device *xe;
/** @name: name of hardware fence context */
char name[MAX_FENCE_NAME_LEN];
/** @seqno_map: I/O map for seqno */
struct iosys_map seqno_map;
/** @irq_link: Link in struct xe_hw_fence_irq.pending */
......
......@@ -30,6 +30,7 @@ static void tiles_fini(void *arg)
int id;
for_each_tile(tile, xe, id)
if (tile != xe_device_get_root_tile(xe))
tile->mmio.regs = NULL;
}
......@@ -91,9 +92,11 @@ int xe_mmio_probe_tiles(struct xe_device *xe)
static void mmio_fini(void *arg)
{
struct xe_device *xe = arg;
struct xe_tile *root_tile = xe_device_get_root_tile(xe);
pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs);
xe->mmio.regs = NULL;
root_tile->mmio.regs = NULL;
}
int xe_mmio_init(struct xe_device *xe)
......@@ -121,12 +124,29 @@ int xe_mmio_init(struct xe_device *xe)
return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe);
}
static void mmio_flush_pending_writes(struct xe_gt *gt)
{
#define DUMMY_REG_OFFSET 0x130030
struct xe_tile *tile = gt_to_tile(gt);
int i;
if (tile->xe->info.platform != XE_LUNARLAKE)
return;
/* 4 dummy writes */
for (i = 0; i < 4; i++)
writel(0, tile->mmio.regs + DUMMY_REG_OFFSET);
}
u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
{
struct xe_tile *tile = gt_to_tile(gt);
u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
u8 val;
/* Wa_15015404425 */
mmio_flush_pending_writes(gt);
val = readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
......@@ -139,6 +159,9 @@ u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg)
u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
u16 val;
/* Wa_15015404425 */
mmio_flush_pending_writes(gt);
val = readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr);
trace_xe_reg_rw(gt, false, addr, val, sizeof(val));
......@@ -160,6 +183,9 @@ u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
u32 addr = xe_mmio_adjusted_addr(gt, reg.addr);
u32 val;
/* Wa_15015404425 */
mmio_flush_pending_writes(gt);
if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt)))
val = xe_gt_sriov_vf_read32(gt, reg);
else
......
......@@ -66,7 +66,6 @@ static struct ctl_table observation_ctl_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{}
};
/**
......
......@@ -7,6 +7,8 @@
#include <drm/xe_drm.h>
#include <generated/xe_wa_oob.h>
#include "regs/xe_reg_defs.h"
#include "xe_assert.h"
#include "xe_device.h"
......@@ -15,6 +17,7 @@
#include "xe_gt_mcr.h"
#include "xe_mmio.h"
#include "xe_sriov.h"
#include "xe_wa.h"
#define _PAT_ATS 0x47fc
#define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \
......@@ -382,7 +385,13 @@ void xe_pat_init_early(struct xe_device *xe)
if (GRAPHICS_VER(xe) == 20) {
xe->pat.ops = &xe2_pat_ops;
xe->pat.table = xe2_pat_table;
/* Wa_16023588340. XXX: Should use XE_WA */
if (GRAPHICS_VERx100(xe) == 2001)
xe->pat.n_entries = 28; /* Disable CLOS3 */
else
xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table);
xe->pat.idx[XE_CACHE_NONE] = 3;
xe->pat.idx[XE_CACHE_WT] = 15;
xe->pat.idx[XE_CACHE_WB] = 2;
......
......@@ -91,13 +91,13 @@ int xe_pm_suspend(struct xe_device *xe)
for_each_gt(gt, xe, id)
xe_gt_suspend_prepare(gt);
xe_display_pm_suspend(xe, false);
/* FIXME: Super racey... */
err = xe_bo_evict_all(xe);
if (err)
goto err;
xe_display_pm_suspend(xe, false);
for_each_gt(gt, xe, id) {
err = xe_gt_suspend(gt);
if (err) {
......@@ -151,11 +151,11 @@ int xe_pm_resume(struct xe_device *xe)
xe_irq_resume(xe);
xe_display_pm_resume(xe, false);
for_each_gt(gt, xe, id)
xe_gt_resume(gt);
xe_display_pm_resume(xe, false);
err = xe_bo_restore_user(xe);
if (err)
goto err;
......@@ -363,10 +363,11 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
mutex_unlock(&xe->mem_access.vram_userfault.lock);
if (xe->d3cold.allowed) {
xe_display_pm_suspend(xe, true);
err = xe_bo_evict_all(xe);
if (err)
goto out;
xe_display_pm_suspend(xe, true);
}
for_each_gt(gt, xe, id) {
......
......@@ -128,8 +128,9 @@ xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q,
{
list_del_init(&pfence->link);
pfence->q = xe_exec_queue_get(q);
spin_lock_init(&pfence->lock);
dma_fence_init(&pfence->base, &preempt_fence_ops,
&q->lr.lock, context, seqno);
&pfence->lock, context, seqno);
return &pfence->base;
}
......
......@@ -25,6 +25,8 @@ struct xe_preempt_fence {
struct xe_exec_queue *q;
/** @preempt_work: work struct which issues preemption */
struct work_struct preempt_work;
/** @lock: dma-fence fence lock */
spinlock_t lock;
/** @error: preempt fence is in error state */
int error;
};
......
......@@ -171,12 +171,13 @@ void xe_sched_job_destroy(struct kref *ref)
struct xe_sched_job *job =
container_of(ref, struct xe_sched_job, refcount);
struct xe_device *xe = job_to_xe(job);
struct xe_exec_queue *q = job->q;
xe_sched_job_free_fences(job);
xe_exec_queue_put(job->q);
dma_fence_put(job->fence);
drm_sched_job_cleanup(&job->drm);
job_free(job);
xe_exec_queue_put(q);
xe_pm_runtime_put(xe);
}
......
......@@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(xe_hw_fence,
TP_ARGS(fence),
TP_STRUCT__entry(
__string(dev, __dev_name_gt(fence->ctx->gt))
__string(dev, __dev_name_xe(fence->xe))
__field(u64, ctx)
__field(u32, seqno)
__field(struct xe_hw_fence *, fence)
......
......@@ -486,6 +486,10 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE))
},
{ XE_RTP_NAME("14021402888"),
XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
},
/* Xe2_HPG */
......@@ -538,6 +542,20 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
},
{ XE_RTP_NAME("14021821874"),
XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT))
},
/* Xe2_LPM */
{ XE_RTP_NAME("16021639441"),
XE_RTP_RULES(MEDIA_VERSION(2000)),
XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0),
GHWSP_CSB_REPORT_DIS |
PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS,
XE_RTP_ACTION_FLAG(ENGINE_BASE)))
},
/* Xe2_HPM */
......
......@@ -29,3 +29,4 @@
13011645652 GRAPHICS_VERSION(2004)
22019338487 MEDIA_VERSION(2000)
GRAPHICS_VERSION(2001)
16023588340 GRAPHICS_VERSION(2001)
......@@ -1590,10 +1590,10 @@ enum drm_xe_oa_property_id {
* b. Counter select c. Counter size and d. BC report. Also refer to the
* oa_formats array in drivers/gpu/drm/xe/xe_oa.c.
*/
#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xff << 0)
#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xff << 8)
#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xff << 16)
#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xff << 24)
#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xffu << 0)
#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xffu << 8)
#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xffu << 16)
#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xffu << 24)
/**
* @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment