Merge tag 'drm-fixes-2024-10-11' of https://gitlab.freedesktop.org/drm/kernel

Pull drm fixes from Dave Airlie: "Weekly fixes haul for drm, lots of small fixes all over, amdgpu, xe lead the way, some minor nouveau and radeon fixes, and then a bunch of misc all over. Nothing too scary or out of the unusual. sched: - Avoid leaking lockdep map fbdev-dma: - Only clean up deferred I/O if instanciated amdgpu: - Fix invalid UBSAN warnings - Fix artifacts in MPO transitions - Hibernation fix amdkfd: - Fix an eviction fence leak radeon: - Add late register for connectors - Always set GEM function pointers i915: - HDCP refcount fix nouveau: - dmem: Fix privileged error in copy engine channel; Fix possible data leak in migrate_to_ram() - gsp: Fix coding style v3d: - Stop active perfmon before destroying it vc4: - Stop active perfmon before destroying it xe: - Drop GuC submit_wq pool - Fix error checking with xa_store() - Fix missing freq restore on GSC load error - Fix wedged_mode file permission - Fix use-after-free in ct communication" * tag 'drm-fixes-2024-10-11' of https://gitlab.freedesktop.org/drm/kernel: drm/fbdev-dma: Only cleanup deferred I/O if necessary drm/xe: Make wedged_mode debugfs writable drm/xe: Restore GT freq on GSC load error drm/xe/guc_submit: fix xa_store() error checking drm/xe/ct: fix xa_store() error checking drm/xe/ct: prevent UAF in send_recv() drm/radeon: always set GEM function pointer nouveau/dmem: Fix vulnerability in migrate_to_ram upon copy error nouveau/dmem: Fix privileged error in copy engine channel drm/amd/display: fix hibernate entry for DCN35+ drm/amd/display: Clear update flags after update has been applied drm/amdgpu: partially revert powerplay `__counted_by` changes drm/radeon: add late_register for connector drm/amdkfd: Fix an eviction fence leak drm/vc4: Stop the active perfmon before being destroyed drm/v3d: Stop the active perfmon before being destroyed drm/i915/hdcp: fix connector refcounting drm/nouveau/gsp: remove extraneous ; after mutex drm/xe: Drop GuC submit_wq pool drm/sched: Use drm sched lockdep map for submit_wq

Merge tag 'drm-fixes-2024-10-11' of https://gitlab.freedesktop.org/drm/kernel
Pull drm fixes from Dave Airlie: "Weekly fixes haul for drm, lots of small fixes all over, amdgpu, xe lead the way, some minor nouveau and radeon fixes, and then a bunch of misc all over. Nothing too scary or out of the unusual. sched: - Avoid leaking lockdep map fbdev-dma: - Only clean up deferred I/O if instanciated amdgpu: - Fix invalid UBSAN warnings - Fix artifacts in MPO transitions - Hibernation fix amdkfd: - Fix an eviction fence leak radeon: - Add late register for connectors - Always set GEM function pointers i915: - HDCP refcount fix nouveau: - dmem: Fix privileged error in copy engine channel; Fix possible data leak in migrate_to_ram() - gsp: Fix coding style v3d: - Stop active perfmon before destroying it vc4: - Stop active perfmon before destroying it xe: - Drop GuC submit_wq pool - Fix error checking with xa_store() - Fix missing freq restore on GSC load error - Fix wedged_mode file permission - Fix use-after-free in ct communication" * tag 'drm-fixes-2024-10-11' of https://gitlab.freedesktop.org/drm/kernel: drm/fbdev-dma: Only cleanup deferred I/O if necessary drm/xe: Make wedged_mode debugfs writable drm/xe: Restore GT freq on GSC load error drm/xe/guc_submit: fix xa_store() error checking drm/xe/ct: fix xa_store() error checking drm/xe/ct: prevent UAF in send_recv() drm/radeon: always set GEM function pointer nouveau/dmem: Fix vulnerability in migrate_to_ram upon copy error nouveau/dmem: Fix privileged error in copy engine channel drm/amd/display: fix hibernate entry for DCN35+ drm/amd/display: Clear update flags after update has been applied drm/amdgpu: partially revert powerplay `__counted_by` changes drm/radeon: add late_register for connector drm/amdkfd: Fix an eviction fence leak drm/vc4: Stop the active perfmon before being destroyed drm/v3d: Stop the active perfmon before being destroyed drm/i915/hdcp: fix connector refcounting drm/nouveau/gsp: remove extraneous ; after mutex drm/xe: Drop GuC submit_wq pool drm/sched: Use drm sched lockdep map for submit_wq
befcc893 · Linus Torvalds · 1d227fcc · ac44ff7c · befcc893 · befcc893
Commit befcc893 authored Oct 11, 2024 by Linus Torvalds
22 changed files
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1439,7 +1439,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 	list_add_tail(&vm->vm_list_node,
 			&(vm->process_info->vm_list_head));
 	vm->process_info->n_vms++;
+	if (ef)
 		*ef = dma_fence_get(&vm->process_info->eviction_fence->base);
 	mutex_unlock(&vm->process_info->lock);

--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1702,12 +1702,15 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 	ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, avm,
 						     &p->kgd_process_info,
-						     &ef);
+						     p->ef ? NULL : &ef);
 	if (ret) {
 		dev_err(dev->adev->dev, "Failed to create process VM object\n");
 		return ret;
 	}
+	if (!p->ef)
 		RCU_INIT_POINTER(p->ef, ef);
 	pdd->drm_priv = drm_file->private_data;
 	ret = kfd_process_device_reserve_ib_mem(pdd);

--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2972,10 +2972,11 @@ static int dm_suspend(void *handle)
 	hpd_rx_irq_work_suspend(dm);
-	if (adev->dm.dc->caps.ips_support)
-		dc_allow_idle_optimizations(adev->dm.dc, true);
 	dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3);
+	if (dm->dc->caps.ips_support && adev->in_s0ix)
+		dc_allow_idle_optimizations(dm->dc, true);
 	dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3);
 	return 0;

--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -5065,11 +5065,26 @@ static bool update_planes_and_stream_v3(struct dc *dc,
 	return true;
 }
+static void clear_update_flags(struct dc_surface_update *srf_updates,
+	int surface_count, struct dc_stream_state *stream)
+{
+	int i;
+	if (stream)
+		stream->update_flags.raw = 0;
+	for (i = 0; i < surface_count; i++)
+		if (srf_updates[i].surface)
+			srf_updates[i].surface->update_flags.raw = 0;
+}
 bool dc_update_planes_and_stream(struct dc *dc,
 		struct dc_surface_update *srf_updates, int surface_count,
 		struct dc_stream_state *stream,
 		struct dc_stream_update *stream_update)
 {
+	bool ret = false;
 	dc_exit_ips_for_hw_access(dc);
 	/*
 	 * update planes and stream version 3 separates FULL and FAST updates
@@ -5086,10 +5101,16 @@ bool dc_update_planes_and_stream(struct dc *dc,
 	 * features as they are now transparent to the new sequence.
 	 */
 	if (dc->ctx->dce_version >= DCN_VERSION_4_01)
-		return update_planes_and_stream_v3(dc, srf_updates,
+		ret = update_planes_and_stream_v3(dc, srf_updates,
 				surface_count, stream, stream_update);
-	return update_planes_and_stream_v2(dc, srf_updates,
+	else
+		ret = update_planes_and_stream_v2(dc, srf_updates,
 			surface_count, stream, stream_update);
+	if (ret)
+		clear_update_flags(srf_updates, surface_count, stream);
+	return ret;
 }
 void dc_commit_updates_for_stream(struct dc *dc,
@@ -5099,6 +5120,8 @@ void dc_commit_updates_for_stream(struct dc *dc,
 		struct dc_stream_update *stream_update,
 		struct dc_state *state)
 {
+	bool ret = false;
 	dc_exit_ips_for_hw_access(dc);
 	/* TODO: Since change commit sequence can have a huge impact,
 	 * we decided to only enable it for DCN3x. However, as soon as
@@ -5106,17 +5129,17 @@ void dc_commit_updates_for_stream(struct dc *dc,
 	 * the new sequence for all ASICs.
 	 */
 	if (dc->ctx->dce_version >= DCN_VERSION_4_01) {
-		update_planes_and_stream_v3(dc, srf_updates, surface_count,
+		ret = update_planes_and_stream_v3(dc, srf_updates, surface_count,
 				stream, stream_update);
-		return;
+	} else if (dc->ctx->dce_version >= DCN_VERSION_3_2) {
-	}
+		ret = update_planes_and_stream_v2(dc, srf_updates, surface_count,
-	if (dc->ctx->dce_version >= DCN_VERSION_3_2) {
-		update_planes_and_stream_v2(dc, srf_updates, surface_count,
 				stream, stream_update);
-		return;
+	} else
-	}
+		ret = update_planes_and_stream_v1(dc, srf_updates, surface_count, stream,
-	update_planes_and_stream_v1(dc, srf_updates, surface_count, stream,
 				stream_update, state);
+	if (ret)
+		clear_update_flags(srf_updates, surface_count, stream);
 }
 uint8_t dc_get_current_stream_count(struct dc *dc)

--- a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
@@ -60,7 +60,7 @@ struct vi_dpm_level {
 struct vi_dpm_table {
 	uint32_t count;
-	struct vi_dpm_level dpm_level[] __counted_by(count);
+	struct vi_dpm_level dpm_level[];
 };
 #define PCIE_PERF_REQ_REMOVE_REGISTRY   0
@@ -91,7 +91,7 @@ struct phm_set_power_state_input {
 struct phm_clock_array {
 	uint32_t count;
-	uint32_t values[] __counted_by(count);
+	uint32_t values[];
 };
 struct phm_clock_voltage_dependency_record {
@@ -123,7 +123,7 @@ struct phm_acpclock_voltage_dependency_record {
 struct phm_clock_voltage_dependency_table {
 	uint32_t count;
-	struct phm_clock_voltage_dependency_record entries[] __counted_by(count);
+	struct phm_clock_voltage_dependency_record entries[];
 };
 struct phm_phase_shedding_limits_record {
@@ -140,7 +140,7 @@ struct phm_uvd_clock_voltage_dependency_record {
 struct phm_uvd_clock_voltage_dependency_table {
 	uint8_t count;
-	struct phm_uvd_clock_voltage_dependency_record entries[] __counted_by(count);
+	struct phm_uvd_clock_voltage_dependency_record entries[];
 };
 struct phm_acp_clock_voltage_dependency_record {
@@ -150,7 +150,7 @@ struct phm_acp_clock_voltage_dependency_record {
 struct phm_acp_clock_voltage_dependency_table {
 	uint32_t count;
-	struct phm_acp_clock_voltage_dependency_record entries[] __counted_by(count);
+	struct phm_acp_clock_voltage_dependency_record entries[];
 };
 struct phm_vce_clock_voltage_dependency_record {
@@ -161,32 +161,32 @@ struct phm_vce_clock_voltage_dependency_record {
 struct phm_phase_shedding_limits_table {
 	uint32_t count;
-	struct phm_phase_shedding_limits_record  entries[] __counted_by(count);
+	struct phm_phase_shedding_limits_record  entries[];
 };
 struct phm_vceclock_voltage_dependency_table {
 	uint8_t count;
-	struct phm_vceclock_voltage_dependency_record entries[] __counted_by(count);
+	struct phm_vceclock_voltage_dependency_record entries[];
 };
 struct phm_uvdclock_voltage_dependency_table {
 	uint8_t count;
-	struct phm_uvdclock_voltage_dependency_record entries[] __counted_by(count);
+	struct phm_uvdclock_voltage_dependency_record entries[];
 };
 struct phm_samuclock_voltage_dependency_table {
 	uint8_t count;
-	struct phm_samuclock_voltage_dependency_record entries[] __counted_by(count);
+	struct phm_samuclock_voltage_dependency_record entries[];
 };
 struct phm_acpclock_voltage_dependency_table {
 	uint32_t count;
-	struct phm_acpclock_voltage_dependency_record entries[] __counted_by(count);
+	struct phm_acpclock_voltage_dependency_record entries[];
 };
 struct phm_vce_clock_voltage_dependency_table {
 	uint8_t count;
-	struct phm_vce_clock_voltage_dependency_record entries[] __counted_by(count);
+	struct phm_vce_clock_voltage_dependency_record entries[];
 };
@@ -393,7 +393,7 @@ union phm_cac_leakage_record {
 struct phm_cac_leakage_table {
 	uint32_t count;
-	union phm_cac_leakage_record entries[] __counted_by(count);
+	union phm_cac_leakage_record entries[];
 };
 struct phm_samu_clock_voltage_dependency_record {
@@ -404,7 +404,7 @@ struct phm_samu_clock_voltage_dependency_record {
 struct phm_samu_clock_voltage_dependency_table {
 	uint8_t count;
-	struct phm_samu_clock_voltage_dependency_record entries[] __counted_by(count);
+	struct phm_samu_clock_voltage_dependency_record entries[];
 };
 struct phm_cac_tdp_table {

--- a/drivers/gpu/drm/drm_fbdev_dma.c
+++ b/drivers/gpu/drm/drm_fbdev_dma.c
@@ -50,6 +50,7 @@ static void drm_fbdev_dma_fb_destroy(struct fb_info *info)
 	if (!fb_helper->dev)
 		return;
+	if (info->fbdefio)
 		fb_deferred_io_cleanup(info);
 	drm_fb_helper_fini(fb_helper);

--- a/drivers/gpu/drm/i915/display/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.c
@@ -1094,7 +1094,8 @@ static void intel_hdcp_update_value(struct intel_connector *connector,
 	hdcp->value = value;
 	if (update_property) {
 		drm_connector_get(&connector->base);
-		queue_work(i915->unordered_wq, &hdcp->prop_work);
+		if (!queue_work(i915->unordered_wq, &hdcp->prop_work))
+			drm_connector_put(&connector->base);
 	}
 }
@@ -2524,7 +2525,8 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state,
 		mutex_lock(&hdcp->mutex);
 		hdcp->value = DRM_MODE_CONTENT_PROTECTION_DESIRED;
 		drm_connector_get(&connector->base);
-		queue_work(i915->unordered_wq, &hdcp->prop_work);
+		if (!queue_work(i915->unordered_wq, &hdcp->prop_work))
+			drm_connector_put(&connector->base);
 		mutex_unlock(&hdcp->mutex);
 	}
@@ -2541,7 +2543,9 @@ void intel_hdcp_update_pipe(struct intel_atomic_state *state,
 		 */
 		if (!desired_and_not_enabled && !content_protection_type_changed) {
 			drm_connector_get(&connector->base);
-			queue_work(i915->unordered_wq, &hdcp->prop_work);
+			if (!queue_work(i915->unordered_wq, &hdcp->prop_work))
+				drm_connector_put(&connector->base);
 		}
 	}

--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
@@ -210,7 +210,7 @@ struct nvkm_gsp {
 	} *rm;
 	struct {
-		struct mutex mutex;;
+		struct mutex mutex;
 		struct idr idr;
 	} client_id;

--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -193,7 +193,7 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
 	if (!spage || !(src & MIGRATE_PFN_MIGRATE))
 		goto done;
-	dpage = alloc_page_vma(GFP_HIGHUSER, vmf->vma, vmf->address);
+	dpage = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vmf->vma, vmf->address);
 	if (!dpage)
 		goto done;

--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -331,7 +331,7 @@ nouveau_accel_ce_init(struct nouveau_drm *drm)
 		return;
 	}
-	ret = nouveau_channel_new(&drm->client, false, runm, NvDmaFB, NvDmaTT, &drm->cechan);
+	ret = nouveau_channel_new(&drm->client, true, runm, NvDmaFB, NvDmaTT, &drm->cechan);
 	if (ret)
 		NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
 }

--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -228,10 +228,8 @@ void radeon_dp_aux_init(struct radeon_connector *radeon_connector)
 {
 	struct drm_device *dev = radeon_connector->base.dev;
 	struct radeon_device *rdev = dev->dev_private;
-	int ret;
 	radeon_connector->ddc_bus->rec.hpd = radeon_connector->hpd.hpd;
-	radeon_connector->ddc_bus->aux.dev = radeon_connector->base.kdev;
 	radeon_connector->ddc_bus->aux.drm_dev = radeon_connector->base.dev;
 	if (ASIC_IS_DCE5(rdev)) {
 		if (radeon_auxch)
@@ -242,11 +240,8 @@ void radeon_dp_aux_init(struct radeon_connector *radeon_connector)
 		radeon_connector->ddc_bus->aux.transfer = radeon_dp_aux_transfer_atom;
 	}
-	ret = drm_dp_aux_register(&radeon_connector->ddc_bus->aux);
+	drm_dp_aux_init(&radeon_connector->ddc_bus->aux);
-	if (!ret)
 	radeon_connector->ddc_bus->has_aux = true;
-	WARN(ret, "drm_dp_aux_register() failed with error %d\n", ret);
 }
 /***** general DP utility functions *****/

--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -1786,6 +1786,20 @@ static enum drm_mode_status radeon_dp_mode_valid(struct drm_connector *connector
 	return MODE_OK;
 }
+static int
+radeon_connector_late_register(struct drm_connector *connector)
+{
+	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+	int r = 0;
+	if (radeon_connector->ddc_bus->has_aux) {
+		radeon_connector->ddc_bus->aux.dev = radeon_connector->base.kdev;
+		r = drm_dp_aux_register(&radeon_connector->ddc_bus->aux);
+	}
+	return r;
+}
 static const struct drm_connector_helper_funcs radeon_dp_connector_helper_funcs = {
 	.get_modes = radeon_dp_get_modes,
 	.mode_valid = radeon_dp_mode_valid,
@@ -1800,6 +1814,7 @@ static const struct drm_connector_funcs radeon_dp_connector_funcs = {
 	.early_unregister = radeon_connector_unregister,
 	.destroy = radeon_connector_destroy,
 	.force = radeon_dvi_force,
+	.late_register = radeon_connector_late_register,
 };
 static const struct drm_connector_funcs radeon_edp_connector_funcs = {
@@ -1810,6 +1825,7 @@ static const struct drm_connector_funcs radeon_edp_connector_funcs = {
 	.early_unregister = radeon_connector_unregister,
 	.destroy = radeon_connector_destroy,
 	.force = radeon_dvi_force,
+	.late_register = radeon_connector_late_register,
 };
 static const struct drm_connector_funcs radeon_lvds_bridge_connector_funcs = {
@@ -1820,6 +1836,7 @@ static const struct drm_connector_funcs radeon_lvds_bridge_connector_funcs = {
 	.early_unregister = radeon_connector_unregister,
 	.destroy = radeon_connector_destroy,
 	.force = radeon_dvi_force,
+	.late_register = radeon_connector_late_register,
 };
 void

--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -44,8 +44,6 @@ struct sg_table *radeon_gem_prime_get_sg_table(struct drm_gem_object *obj);
 int radeon_gem_prime_pin(struct drm_gem_object *obj);
 void radeon_gem_prime_unpin(struct drm_gem_object *obj);
-const struct drm_gem_object_funcs radeon_gem_object_funcs;
 static vm_fault_t radeon_gem_fault(struct vm_fault *vmf)
 {
 	struct ttm_buffer_object *bo = vmf->vma->vm_private_data;
@@ -132,7 +130,6 @@ int radeon_gem_object_create(struct radeon_device *rdev, unsigned long size,
 		return r;
 	}
 	*obj = &robj->tbo.base;
-	(*obj)->funcs = &radeon_gem_object_funcs;
 	robj->pid = task_pid_nr(current);
 	mutex_lock(&rdev->gem.mutex);

--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -151,6 +151,7 @@ int radeon_bo_create(struct radeon_device *rdev,
 	if (bo == NULL)
 		return -ENOMEM;
 	drm_gem_private_object_init(rdev_to_drm(rdev), &bo->tbo.base, size);
+	bo->tbo.base.funcs = &radeon_gem_object_funcs;
 	bo->rdev = rdev;
 	bo->surface_reg = -1;
 	INIT_LIST_HEAD(&bo->list);

--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -87,6 +87,12 @@
 #define CREATE_TRACE_POINTS
 #include "gpu_scheduler_trace.h"
+#ifdef CONFIG_LOCKDEP
+static struct lockdep_map drm_sched_lockdep_map = {
+	.name = "drm_sched_lockdep_map"
+};
+#endif
 #define to_drm_sched_job(sched_job)		\
 		container_of((sched_job), struct drm_sched_job, queue_node)
@@ -1269,7 +1275,12 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 		sched->submit_wq = submit_wq;
 		sched->own_submit_wq = false;
 	} else {
+#ifdef CONFIG_LOCKDEP
+		sched->submit_wq = alloc_ordered_workqueue_lockdep_map(name, 0,
+								       &drm_sched_lockdep_map);
+#else
 		sched->submit_wq = alloc_ordered_workqueue(name, 0);
+#endif
 		if (!sched->submit_wq)
 			return -ENOMEM;

--- a/drivers/gpu/drm/v3d/v3d_perfmon.c
+++ b/drivers/gpu/drm/v3d/v3d_perfmon.c
@@ -306,6 +306,11 @@ void v3d_perfmon_open_file(struct v3d_file_priv *v3d_priv)
 static int v3d_perfmon_idr_del(int id, void *elem, void *data)
 {
 	struct v3d_perfmon *perfmon = elem;
+	struct v3d_dev *v3d = (struct v3d_dev *)data;
+	/* If the active perfmon is being destroyed, stop it first */
+	if (perfmon == v3d->active_perfmon)
+		v3d_perfmon_stop(v3d, perfmon, false);
 	v3d_perfmon_put(perfmon);
@@ -314,8 +319,10 @@ static int v3d_perfmon_idr_del(int id, void *elem, void *data)
 void v3d_perfmon_close_file(struct v3d_file_priv *v3d_priv)
 {
+	struct v3d_dev *v3d = v3d_priv->v3d;
 	mutex_lock(&v3d_priv->perfmon.lock);
-	idr_for_each(&v3d_priv->perfmon.idr, v3d_perfmon_idr_del, NULL);
+	idr_for_each(&v3d_priv->perfmon.idr, v3d_perfmon_idr_del, v3d);
 	idr_destroy(&v3d_priv->perfmon.idr);
 	mutex_unlock(&v3d_priv->perfmon.lock);
 	mutex_destroy(&v3d_priv->perfmon.lock);

--- a/drivers/gpu/drm/vc4/vc4_perfmon.c
+++ b/drivers/gpu/drm/vc4/vc4_perfmon.c
@@ -116,6 +116,11 @@ void vc4_perfmon_open_file(struct vc4_file *vc4file)
 static int vc4_perfmon_idr_del(int id, void *elem, void *data)
 {
 	struct vc4_perfmon *perfmon = elem;
+	struct vc4_dev *vc4 = (struct vc4_dev *)data;
+	/* If the active perfmon is being destroyed, stop it first */
+	if (perfmon == vc4->active_perfmon)
+		vc4_perfmon_stop(vc4, perfmon, false);
 	vc4_perfmon_put(perfmon);
@@ -130,7 +135,7 @@ void vc4_perfmon_close_file(struct vc4_file *vc4file)
 		return;
 	mutex_lock(&vc4file->perfmon.lock);
-	idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, NULL);
+	idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, vc4);
 	idr_destroy(&vc4file->perfmon.idr);
 	mutex_unlock(&vc4file->perfmon.lock);
 	mutex_destroy(&vc4file->perfmon.lock);

--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -187,7 +187,7 @@ void xe_debugfs_register(struct xe_device *xe)
 	debugfs_create_file("forcewake_all", 0400, root, xe,
 			    &forcewake_all_fops);
-	debugfs_create_file("wedged_mode", 0400, root, xe,
+	debugfs_create_file("wedged_mode", 0600, root, xe,
 			    &wedged_mode_fops);
 	for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {

--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -874,7 +874,9 @@ int xe_gt_sanitize_freq(struct xe_gt *gt)
 	int ret = 0;
 	if ((!xe_uc_fw_is_available(&gt->uc.gsc.fw) ||
-	     xe_uc_fw_is_loaded(&gt->uc.gsc.fw)) && XE_WA(gt, 22019338487))
+	     xe_uc_fw_is_loaded(&gt->uc.gsc.fw) ||
+	     xe_uc_fw_is_in_error_state(&gt->uc.gsc.fw)) &&
+	    XE_WA(gt, 22019338487))
 		ret = xe_guc_pc_restore_stashed_freq(&gt->uc.guc.pc);
 	return ret;

--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -667,17 +667,13 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
 		num_g2h = 1;
 		if (g2h_fence_needs_alloc(g2h_fence)) {
-			void *ptr;
 			g2h_fence->seqno = next_ct_seqno(ct, true);
-			ptr = xa_store(&ct->fence_lookup,
+			ret = xa_err(xa_store(&ct->fence_lookup,
-				       g2h_fence->seqno,
+					      g2h_fence->seqno, g2h_fence,
-				       g2h_fence, GFP_ATOMIC);
+					      GFP_ATOMIC));
-			if (IS_ERR(ptr)) {
+			if (ret)
-				ret = PTR_ERR(ptr);
 				goto out;
 		}
-		}
 		seqno = g2h_fence->seqno;
 	} else {
@@ -879,14 +875,11 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
 retry_same_fence:
 	ret = guc_ct_send(ct, action, len, 0, 0, &g2h_fence);
 	if (unlikely(ret == -ENOMEM)) {
-		void *ptr;
 		/* Retry allocation /w GFP_KERNEL */
-		ptr = xa_store(&ct->fence_lookup,
+		ret = xa_err(xa_store(&ct->fence_lookup, g2h_fence.seqno,
-			       g2h_fence.seqno,
+				      &g2h_fence, GFP_KERNEL));
-			       &g2h_fence, GFP_KERNEL);
+		if (ret)
-		if (IS_ERR(ptr))
+			return ret;
-			return PTR_ERR(ptr);
 		goto retry_same_fence;
 	} else if (unlikely(ret)) {
@@ -903,16 +896,26 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
 	}
 	ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
+	/*
+	 * Ensure we serialize with completion side to prevent UAF with fence going out of scope on
+	 * the stack, since we have no clue if it will fire after the timeout before we can erase
+	 * from the xa. Also we have some dependent loads and stores below for which we need the
+	 * correct ordering, and we lack the needed barriers.
+	 */
+	mutex_lock(&ct->lock);
 	if (!ret) {
-		xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x",
+		xe_gt_err(gt, "Timed out wait for G2H, fence %u, action %04x, done %s",
-			  g2h_fence.seqno, action[0]);
+			  g2h_fence.seqno, action[0], str_yes_no(g2h_fence.done));
 		xa_erase_irq(&ct->fence_lookup, g2h_fence.seqno);
+		mutex_unlock(&ct->lock);
 		return -ETIME;
 	}
 	if (g2h_fence.retry) {
 		xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n",
 			  action[0], g2h_fence.reason);
+		mutex_unlock(&ct->lock);
 		goto retry;
 	}
 	if (g2h_fence.fail) {
@@ -921,7 +924,12 @@ static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
 		ret = -EIO;
 	}
-	return ret > 0 ? response_buffer ? g2h_fence.response_len : g2h_fence.response_data : ret;
+	if (ret > 0)
+		ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data;
+	mutex_unlock(&ct->lock);
+	return ret;
 }
 /**

--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -224,80 +224,11 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
 		 EXEC_QUEUE_STATE_BANNED));
 }
-#ifdef CONFIG_PROVE_LOCKING
-static int alloc_submit_wq(struct xe_guc *guc)
-{
-	int i;
-	for (i = 0; i < NUM_SUBMIT_WQ; ++i) {
-		guc->submission_state.submit_wq_pool[i] =
-			alloc_ordered_workqueue("submit_wq", 0);
-		if (!guc->submission_state.submit_wq_pool[i])
-			goto err_free;
-	}
-	return 0;
-err_free:
-	while (i)
-		destroy_workqueue(guc->submission_state.submit_wq_pool[--i]);
-	return -ENOMEM;
-}
-static void free_submit_wq(struct xe_guc *guc)
-{
-	int i;
-	for (i = 0; i < NUM_SUBMIT_WQ; ++i)
-		destroy_workqueue(guc->submission_state.submit_wq_pool[i]);
-}
-static struct workqueue_struct *get_submit_wq(struct xe_guc *guc)
-{
-	int idx = guc->submission_state.submit_wq_idx++ % NUM_SUBMIT_WQ;
-	return guc->submission_state.submit_wq_pool[idx];
-}
-#else
-static int alloc_submit_wq(struct xe_guc *guc)
-{
-	return 0;
-}
-static void free_submit_wq(struct xe_guc *guc)
-{
-}
-static struct workqueue_struct *get_submit_wq(struct xe_guc *guc)
-{
-	return NULL;
-}
-#endif
-static void xe_guc_submit_fini(struct xe_guc *guc)
-{
-	struct xe_device *xe = guc_to_xe(guc);
-	struct xe_gt *gt = guc_to_gt(guc);
-	int ret;
-	ret = wait_event_timeout(guc->submission_state.fini_wq,
-				 xa_empty(&guc->submission_state.exec_queue_lookup),
-				 HZ * 5);
-	drain_workqueue(xe->destroy_wq);
-	xe_gt_assert(gt, ret);
-}
 static void guc_submit_fini(struct drm_device *drm, void *arg)
 {
 	struct xe_guc *guc = arg;
-	xe_guc_submit_fini(guc);
 	xa_destroy(&guc->submission_state.exec_queue_lookup);
-	free_submit_wq(guc);
 }
 static void guc_submit_wedged_fini(void *arg)
@@ -359,10 +290,6 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
 	if (err)
 		return err;
-	err = alloc_submit_wq(guc);
-	if (err)
-		return err;
 	gt->exec_queue_ops = &guc_exec_queue_ops;
 	xa_init(&guc->submission_state.exec_queue_lookup);
@@ -393,7 +320,6 @@ static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa
 static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
 {
 	int ret;
-	void *ptr;
 	int i;
 	/*
@@ -413,13 +339,11 @@ static int alloc_guc_id(struct xe_guc *guc, struct xe_exec_queue *q)
 	q->guc->id = ret;
 	for (i = 0; i < q->width; ++i) {
-		ptr = xa_store(&guc->submission_state.exec_queue_lookup,
+		ret = xa_err(xa_store(&guc->submission_state.exec_queue_lookup,
-			       q->guc->id + i, q, GFP_NOWAIT);
+				      q->guc->id + i, q, GFP_NOWAIT));
-		if (IS_ERR(ptr)) {
+		if (ret)
-			ret = PTR_ERR(ptr);
 			goto err_release;
 	}
-	}
 	return 0;
@@ -1482,8 +1406,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
 	timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
 		  msecs_to_jiffies(q->sched_props.job_timeout_ms);
 	err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
-			    get_submit_wq(guc),
+			    NULL, q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64,
-			    q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64,
 			    timeout, guc_to_gt(guc)->ordered_wq, NULL,
 			    q->name, gt_to_xe(q->gt)->drm.dev);
 	if (err)

--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -72,13 +72,6 @@ struct xe_guc {
 		atomic_t stopped;
 		/** @submission_state.lock: protects submission state */
 		struct mutex lock;
-#ifdef CONFIG_PROVE_LOCKING
-#define NUM_SUBMIT_WQ	256
-		/** @submission_state.submit_wq_pool: submission ordered workqueues pool */
-		struct workqueue_struct *submit_wq_pool[NUM_SUBMIT_WQ];
-		/** @submission_state.submit_wq_idx: submission ordered workqueue index */
-		int submit_wq_idx;
-#endif
 		/** @submission_state.enabled: submission is enabled */
 		bool enabled;
 		/** @submission_state.fini_wq: submit fini wait queue */