Merge tag 'drm-amdkfd-next-2017-08-18' of git://people.freedesktop.org/~gabbayo/linux into drm-next

This is the amdkfd pull request for 4.14 merge window. AMD has started cleaning the pipe and sending patches from their internal development to the upstream community. The plan as I understand it is to first get all the non-dGPU patches to upstream and then move to upstream dGPU support. The patches here are relevant only for Kaveri and Carrizo. The following is a summary of the changes: - Add new IOCTL to set a Scratch memory VA - Update PM4 headers for new firmware that support scratch memory - Support image tiling mode - Remove all uses of BUG_ON - Various Bug fixes and coding style fixes * tag 'drm-amdkfd-next-2017-08-18' of git://people.freedesktop.org/~gabbayo/linux: (24 commits) drm/amdkfd: Implement image tiling mode support v2 drm/amdgpu: Add kgd kfd interface get_tile_config() v2 drm/amdkfd: Adding new IOCTL for scratch memory v2 drm/amdgpu: Add kgd/kfd interface to support scratch memory v2 drm/amdgpu: Program SH_STATIC_MEM_CONFIG globally, not per-VMID drm/amd: Update MEC HQD loading code for KFD drm/amdgpu: Disable GFX PG on CZ drm/amdkfd: Update PM4 packet headers drm/amdkfd: Clamp EOP queue size correctly on Gfx8 drm/amdkfd: Add more error printing to help bringup v2 drm/amdkfd: Handle remaining BUG_ONs more gracefully v2 drm/amdkfd: Allocate gtt_sa_bitmap in long units drm/amdkfd: Fix doorbell initialization and finalization drm/amdkfd: Remove BUG_ONs for NULL pointer arguments drm/amdkfd: Remove usage of alloc(sizeof(struct... drm/amdkfd: Fix goto usage v2 drm/amdkfd: Change x==NULL/false references to !x drm/amdkfd: Consolidate and clean up log commands drm/amdkfd: Clean up KFD style errors and warnings v2 drm/amdgpu: Remove hard-coded assumptions about compute pipes ...

Merge tag 'drm-amdkfd-next-2017-08-18' of git://people.freedesktop.org/~gabbayo/linux into drm-next
This is the amdkfd pull request for 4.14 merge window. AMD has started cleaning the pipe and sending patches from their internal development to the upstream community. The plan as I understand it is to first get all the non-dGPU patches to upstream and then move to upstream dGPU support. The patches here are relevant only for Kaveri and Carrizo. The following is a summary of the changes: - Add new IOCTL to set a Scratch memory VA - Update PM4 headers for new firmware that support scratch memory - Support image tiling mode - Remove all uses of BUG_ON - Various Bug fixes and coding style fixes * tag 'drm-amdkfd-next-2017-08-18' of git://people.freedesktop.org/~gabbayo/linux: (24 commits) drm/amdkfd: Implement image tiling mode support v2 drm/amdgpu: Add kgd kfd interface get_tile_config() v2 drm/amdkfd: Adding new IOCTL for scratch memory v2 drm/amdgpu: Add kgd/kfd interface to support scratch memory v2 drm/amdgpu: Program SH_STATIC_MEM_CONFIG globally, not per-VMID drm/amd: Update MEC HQD loading code for KFD drm/amdgpu: Disable GFX PG on CZ drm/amdkfd: Update PM4 packet headers drm/amdkfd: Clamp EOP queue size correctly on Gfx8 drm/amdkfd: Add more error printing to help bringup v2 drm/amdkfd: Handle remaining BUG_ONs more gracefully v2 drm/amdkfd: Allocate gtt_sa_bitmap in long units drm/amdkfd: Fix doorbell initialization and finalization drm/amdkfd: Remove BUG_ONs for NULL pointer arguments drm/amdkfd: Remove usage of alloc(sizeof(struct... drm/amdkfd: Fix goto usage v2 drm/amdkfd: Change x==NULL/false references to !x drm/amdkfd: Consolidate and clean up log commands drm/amdkfd: Clean up KFD style errors and warnings v2 drm/amdgpu: Remove hard-coded assumptions about compute pipes ...
a0aeb3b2 · Dave Airlie · 5fd27c2a · 5d71dbc3 · a0aeb3b2 · a0aeb3b2
Commit a0aeb3b2 authored Aug 21, 2017 by Dave Airlie
36 changed files
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -28,14 +28,14 @@
 #include <linux/module.h>

 const struct kgd2kfd_calls *kgd2kfd;
-bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
+bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);

 int amdgpu_amdkfd_init(void)
 {
 	int ret;

 #if defined(CONFIG_HSA_AMD_MODULE)
-	int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
+	int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);

 	kgd2kfd_init_p = symbol_request(kgd2kfd_init);


--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -26,6 +26,7 @@
 #define AMDGPU_AMDKFD_H_INCLUDED

 #include <linux/types.h>
+#include <linux/mmu_context.h>
 #include <kgd_kfd_interface.h>

 struct amdgpu_device;
@@ -60,4 +61,19 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);

 uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);

+#define read_user_wptr(mmptr, wptr, dst)				\
+	({								\
+		bool valid = false;					\
+		if ((mmptr) && (wptr)) {				\
+			if ((mmptr) == current->mm) {			\
+				valid = !get_user((dst), (wptr));	\
+			} else if (current->mm == NULL) {		\
+				use_mm(mmptr);				\
+				valid = !get_user((dst), (wptr));	\
+				unuse_mm(mmptr);			\
+			}						\
+		}							\
+		valid;							\
+	})
+
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -39,6 +39,12 @@
 #include "gmc/gmc_7_1_sh_mask.h"
 #include "cik_structs.h"

+enum hqd_dequeue_request_type {
+	NO_ACTION = 0,
+	DRAIN_PIPE,
+	RESET_WAVES
+};
+
 enum {
 	MAX_TRAPID = 8,		/* 3 bits in the bitfield. */
 	MAX_WATCH_ADDRESSES = 4
@@ -96,12 +102,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t hpd_size, uint64_t hpd_gpu_addr);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-			uint32_t queue_id, uint32_t __user *wptr);
+			uint32_t queue_id, uint32_t __user *wptr,
+			uint32_t wptr_shift, uint32_t wptr_mask,
+			struct mm_struct *mm);
 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 				uint32_t pipe_id, uint32_t queue_id);

-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+				enum kfd_preempt_type reset_type,
 				unsigned int utimeout, uint32_t pipe_id,
 				uint32_t queue_id);
 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
@@ -126,6 +135,33 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);

 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+					uint64_t va, uint32_t vmid);
+
+/* Because of REG_GET_FIELD() being used, we put this function in the
+ * asic specific file.
+ */
+static int get_tile_config(struct kgd_dev *kgd,
+		struct tile_config *config)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	config->gb_addr_config = adev->gfx.config.gb_addr_config;
+	config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+				MC_ARB_RAMCFG, NOOFBANK);
+	config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+				MC_ARB_RAMCFG, NOOFRANKS);
+
+	config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+	config->num_tile_configs =
+			ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+	config->macro_tile_config_ptr =
+			adev->gfx.config.macrotile_mode_array;
+	config->num_macro_tile_configs =
+			ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+	return 0;
+}

 static const struct kfd2kgd_calls kfd2kgd = {
 	.init_gtt_mem_allocation = alloc_gtt_mem,
@@ -150,7 +186,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
 	.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
 	.write_vmid_invalidate_request = write_vmid_invalidate_request,
-	.get_fw_version = get_fw_version
+	.get_fw_version = get_fw_version,
+	.set_scratch_backing_va = set_scratch_backing_va,
+	.get_tile_config = get_tile_config,
 };

 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -186,7 +224,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);

-	uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);

 	lock_srbm(kgd, mec, pipe, queue_id, 0);
@@ -290,20 +328,38 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
 }

 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-			uint32_t queue_id, uint32_t __user *wptr)
+			uint32_t queue_id, uint32_t __user *wptr,
+			uint32_t wptr_shift, uint32_t wptr_mask,
+			struct mm_struct *mm)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-	uint32_t wptr_shadow, is_wptr_shadow_valid;
 	struct cik_mqd *m;
+	uint32_t *mqd_hqd;
+	uint32_t reg, wptr_val, data;

 	m = get_mqd(mqd);

-	is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
-	if (is_wptr_shadow_valid)
-		m->cp_hqd_pq_wptr = wptr_shadow;
-
 	acquire_queue(kgd, pipe_id, queue_id);
-	gfx_v7_0_mqd_commit(adev, m);
+
+	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_MQD_CONTROL. */
+	mqd_hqd = &m->cp_mqd_base_addr_lo;
+
+	for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_MQD_CONTROL; reg++)
+		WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+	/* Copy userspace write pointer value to register.
+	 * Activate doorbell logic to monitor subsequent changes.
+	 */
+	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+	if (read_user_wptr(mm, wptr, wptr_val))
+		WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
+
+	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+	WREG32(mmCP_HQD_ACTIVE, data);
+
 	release_queue(kgd);

 	return 0;
@@ -382,30 +438,99 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 	return false;
 }

-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+				enum kfd_preempt_type reset_type,
 				unsigned int utimeout, uint32_t pipe_id,
 				uint32_t queue_id)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	uint32_t temp;
-	int timeout = utimeout;
+	enum hqd_dequeue_request_type type;
+	unsigned long flags, end_jiffies;
+	int retry;

 	acquire_queue(kgd, pipe_id, queue_id);
 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);

-	WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
+	switch (reset_type) {
+	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+		type = DRAIN_PIPE;
+		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+		type = RESET_WAVES;
+		break;
+	default:
+		type = DRAIN_PIPE;
+		break;
+	}
+
+	/* Workaround: If IQ timer is active and the wait time is close to or
+	 * equal to 0, dequeueing is not safe. Wait until either the wait time
+	 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
+	 * cleared before continuing. Also, ensure wait times are set to at
+	 * least 0x3.
+	 */
+	local_irq_save(flags);
+	preempt_disable();
+	retry = 5000; /* wait for 500 usecs at maximum */
+	while (true) {
+		temp = RREG32(mmCP_HQD_IQ_TIMER);
+		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
+			pr_debug("HW is processing IQ\n");
+			goto loop;
+		}
+		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
+			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
+					== 3) /* SEM-rearm is safe */
+				break;
+			/* Wait time 3 is safe for CP, but our MMIO read/write
+			 * time is close to 1 microsecond, so check for 10 to
+			 * leave more buffer room
+			 */
+			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
+					>= 10)
+				break;
+			pr_debug("IQ timer is active\n");
+		} else
+			break;
+loop:
+		if (!retry) {
+			pr_err("CP HQD IQ timer status time out\n");
+			break;
+		}
+		ndelay(100);
+		--retry;
+	}
+	retry = 1000;
+	while (true) {
+		temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
+		if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
+			break;
+		pr_debug("Dequeue request is pending\n");

+		if (!retry) {
+			pr_err("CP HQD dequeue request time out\n");
+			break;
+		}
+		ndelay(100);
+		--retry;
+	}
+	local_irq_restore(flags);
+	preempt_enable();
+
+	WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
+
+	end_jiffies = (utimeout * HZ / 1000) + jiffies;
 	while (true) {
 		temp = RREG32(mmCP_HQD_ACTIVE);
-		if (temp & CP_HQD_ACTIVE__ACTIVE_MASK)
+		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
 			break;
-		if (timeout <= 0) {
-			pr_err("kfd: cp queue preemption time out.\n");
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("cp queue preemption time out\n");
 			release_queue(kgd);
 			return -ETIME;
 		}
-		msleep(20);
-		timeout -= 20;
+		usleep_range(500, 1000);
 	}

 	release_queue(kgd);
@@ -556,6 +681,16 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
 	WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
 }

+static void set_scratch_backing_va(struct kgd_dev *kgd,
+					uint64_t va, uint32_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	lock_srbm(kgd, 0, 0, 0, vmid);
+	WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
+	unlock_srbm(kgd);
+}
+
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
@@ -566,42 +701,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 	switch (type) {
 	case KGD_ENGINE_PFP:
 		hdr = (const union amdgpu_firmware_header *)
-							adev->gfx.pfp_fw->data;
+						adev->gfx.pfp_fw->data;
 		break;

 	case KGD_ENGINE_ME:
 		hdr = (const union amdgpu_firmware_header *)
-							adev->gfx.me_fw->data;
+						adev->gfx.me_fw->data;
 		break;

 	case KGD_ENGINE_CE:
 		hdr = (const union amdgpu_firmware_header *)
-							adev->gfx.ce_fw->data;
+						adev->gfx.ce_fw->data;
 		break;

 	case KGD_ENGINE_MEC1:
 		hdr = (const union amdgpu_firmware_header *)
-							adev->gfx.mec_fw->data;
+						adev->gfx.mec_fw->data;
 		break;

 	case KGD_ENGINE_MEC2:
 		hdr = (const union amdgpu_firmware_header *)
-							adev->gfx.mec2_fw->data;
+						adev->gfx.mec2_fw->data;
 		break;

 	case KGD_ENGINE_RLC:
 		hdr = (const union amdgpu_firmware_header *)
-							adev->gfx.rlc_fw->data;
+						adev->gfx.rlc_fw->data;
 		break;

 	case KGD_ENGINE_SDMA1:
 		hdr = (const union amdgpu_firmware_header *)
-							adev->sdma.instance[0].fw->data;
+						adev->sdma.instance[0].fw->data;
 		break;

 	case KGD_ENGINE_SDMA2:
 		hdr = (const union amdgpu_firmware_header *)
-							adev->sdma.instance[1].fw->data;
+						adev->sdma.instance[1].fw->data;
 		break;

 	default:

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -39,6 +39,12 @@
 #include "vi_structs.h"
 #include "vid.h"

+enum hqd_dequeue_request_type {
+	NO_ACTION = 0,
+	DRAIN_PIPE,
+	RESET_WAVES
+};
+
 struct cik_sdma_rlc_registers;

 /*
@@ -55,12 +61,15 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 		uint32_t hpd_size, uint64_t hpd_gpu_addr);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-		uint32_t queue_id, uint32_t __user *wptr);
+			uint32_t queue_id, uint32_t __user *wptr,
+			uint32_t wptr_shift, uint32_t wptr_mask,
+			struct mm_struct *mm);
 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 		uint32_t pipe_id, uint32_t queue_id);
 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+				enum kfd_preempt_type reset_type,
 				unsigned int utimeout, uint32_t pipe_id,
 				uint32_t queue_id);
 static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
@@ -85,6 +94,33 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 		uint8_t vmid);
 static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+					uint64_t va, uint32_t vmid);
+
+/* Because of REG_GET_FIELD() being used, we put this function in the
+ * asic specific file.
+ */
+static int get_tile_config(struct kgd_dev *kgd,
+		struct tile_config *config)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	config->gb_addr_config = adev->gfx.config.gb_addr_config;
+	config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+				MC_ARB_RAMCFG, NOOFBANK);
+	config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+				MC_ARB_RAMCFG, NOOFRANKS);
+
+	config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+	config->num_tile_configs =
+			ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+	config->macro_tile_config_ptr =
+			adev->gfx.config.macrotile_mode_array;
+	config->num_macro_tile_configs =
+			ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+	return 0;
+}

 static const struct kfd2kgd_calls kfd2kgd = {
 	.init_gtt_mem_allocation = alloc_gtt_mem,
@@ -111,12 +147,15 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.get_atc_vmid_pasid_mapping_valid =
 			get_atc_vmid_pasid_mapping_valid,
 	.write_vmid_invalidate_request = write_vmid_invalidate_request,
-	.get_fw_version = get_fw_version
+	.get_fw_version = get_fw_version,
+	.set_scratch_backing_va = set_scratch_backing_va,
+	.get_tile_config = get_tile_config,
 };

 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
 {
 	return (struct kfd2kgd_calls *)&kfd2kgd;
+	return (struct kfd2kgd_calls *)&kfd2kgd;
 }

 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
@@ -147,7 +186,7 @@ static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);

-	uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);

 	lock_srbm(kgd, mec, pipe, queue_id, 0);
@@ -216,7 +255,7 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 	uint32_t mec;
 	uint32_t pipe;

-	mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
 	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);

 	lock_srbm(kgd, mec, pipe, 0, 0);
@@ -244,20 +283,67 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
 }

 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-			uint32_t queue_id, uint32_t __user *wptr)
+			uint32_t queue_id, uint32_t __user *wptr,
+			uint32_t wptr_shift, uint32_t wptr_mask,
+			struct mm_struct *mm)
 {
-	struct vi_mqd *m;
-	uint32_t shadow_wptr, valid_wptr;
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct vi_mqd *m;
+	uint32_t *mqd_hqd;
+	uint32_t reg, wptr_val, data;

 	m = get_mqd(mqd);

-	valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
-	if (valid_wptr == 0)
-		m->cp_hqd_pq_wptr = shadow_wptr;
-
 	acquire_queue(kgd, pipe_id, queue_id);
-	gfx_v8_0_mqd_commit(adev, mqd);
+
+	/* HIQ is set during driver init period with vmid set to 0*/
+	if (m->cp_hqd_vmid == 0) {
+		uint32_t value, mec, pipe;
+
+		mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+		pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+		pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+			mec, pipe, queue_id);
+		value = RREG32(mmRLC_CP_SCHEDULERS);
+		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+			((mec << 5) | (pipe << 3) | queue_id | 0x80));
+		WREG32(mmRLC_CP_SCHEDULERS, value);
+	}
+
+	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+	mqd_hqd = &m->cp_mqd_base_addr_lo;
+
+	for (reg = mmCP_MQD_BASE_ADDR; reg <= mmCP_HQD_EOP_CONTROL; reg++)
+		WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+	/* Tonga errata: EOP RPTR/WPTR should be left unmodified.
+	 * This is safe since EOP RPTR==WPTR for any inactive HQD
+	 * on ASICs that do not support context-save.
+	 * EOP writes/reads can start anywhere in the ring.
+	 */
+	if (get_amdgpu_device(kgd)->asic_type != CHIP_TONGA) {
+		WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
+		WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
+		WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
+	}
+
+	for (reg = mmCP_HQD_EOP_EVENTS; reg <= mmCP_HQD_ERROR; reg++)
+		WREG32(reg, mqd_hqd[reg - mmCP_MQD_BASE_ADDR]);
+
+	/* Copy userspace write pointer value to register.
+	 * Activate doorbell logic to monitor subsequent changes.
+	 */
+	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, data);
+
+	if (read_user_wptr(mm, wptr, wptr_val))
+		WREG32(mmCP_HQD_PQ_WPTR, (wptr_val << wptr_shift) & wptr_mask);
+
+	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+	WREG32(mmCP_HQD_ACTIVE, data);
+
 	release_queue(kgd);

 	return 0;
@@ -308,29 +394,102 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 	return false;
 }

-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+				enum kfd_preempt_type reset_type,
 				unsigned int utimeout, uint32_t pipe_id,
 				uint32_t queue_id)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	uint32_t temp;
-	int timeout = utimeout;
+	enum hqd_dequeue_request_type type;
+	unsigned long flags, end_jiffies;
+	int retry;
+	struct vi_mqd *m = get_mqd(mqd);

 	acquire_queue(kgd, pipe_id, queue_id);

-	WREG32(mmCP_HQD_DEQUEUE_REQUEST, reset_type);
+	if (m->cp_hqd_vmid == 0)
+		WREG32_FIELD(RLC_CP_SCHEDULERS, scheduler1, 0);
+
+	switch (reset_type) {
+	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+		type = DRAIN_PIPE;
+		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+		type = RESET_WAVES;
+		break;
+	default:
+		type = DRAIN_PIPE;
+		break;
+	}

+	/* Workaround: If IQ timer is active and the wait time is close to or
+	 * equal to 0, dequeueing is not safe. Wait until either the wait time
+	 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
+	 * cleared before continuing. Also, ensure wait times are set to at
+	 * least 0x3.
+	 */
+	local_irq_save(flags);
+	preempt_disable();
+	retry = 5000; /* wait for 500 usecs at maximum */
+	while (true) {
+		temp = RREG32(mmCP_HQD_IQ_TIMER);
+		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
+			pr_debug("HW is processing IQ\n");
+			goto loop;
+		}
+		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
+			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
+					== 3) /* SEM-rearm is safe */
+				break;
+			/* Wait time 3 is safe for CP, but our MMIO read/write
+			 * time is close to 1 microsecond, so check for 10 to
+			 * leave more buffer room
+			 */
+			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
+					>= 10)
+				break;
+			pr_debug("IQ timer is active\n");
+		} else
+			break;
+loop:
+		if (!retry) {
+			pr_err("CP HQD IQ timer status time out\n");
+			break;
+		}
+		ndelay(100);
+		--retry;
+	}
+	retry = 1000;
+	while (true) {
+		temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
+		if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
+			break;
+		pr_debug("Dequeue request is pending\n");
+
+		if (!retry) {
+			pr_err("CP HQD dequeue request time out\n");
+			break;
+		}
+		ndelay(100);
+		--retry;
+	}
+	local_irq_restore(flags);
+	preempt_enable();
+
+	WREG32(mmCP_HQD_DEQUEUE_REQUEST, type);
+
+	end_jiffies = (utimeout * HZ / 1000) + jiffies;
 	while (true) {
 		temp = RREG32(mmCP_HQD_ACTIVE);
-		if (temp & CP_HQD_ACTIVE__ACTIVE_MASK)
+		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
 			break;
-		if (timeout <= 0) {
-			pr_err("kfd: cp queue preemption time out.\n");
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("cp queue preemption time out.\n");
 			release_queue(kgd);
 			return -ETIME;
 		}
-		msleep(20);
-		timeout -= 20;
+		usleep_range(500, 1000);
 	}

 	release_queue(kgd);
@@ -444,6 +603,16 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 	return 0;
 }

+static void set_scratch_backing_va(struct kgd_dev *kgd,
+					uint64_t va, uint32_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	lock_srbm(kgd, 0, 0, 0, vmid);
+	WREG32(mmSH_HIDDEN_PRIVATE_BASE_VMID, va);
+	unlock_srbm(kgd);
+}
+
 static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
@@ -454,42 +623,42 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
 	switch (type) {
 	case KGD_ENGINE_PFP:
 		hdr = (const union amdgpu_firmware_header *)
-							adev->gfx.pfp_fw->data;
+						adev->gfx.pfp_fw->data;
 		break;

 	case KGD_ENGINE_ME:
 		hdr = (const union amdgpu_firmware_header *)
-							adev->gfx.me_fw->data;
+						adev->gfx.me_fw->data;
 		break;

 	case KGD_ENGINE_CE:
 		hdr = (const union amdgpu_firmware_header *)
-							adev->gfx.ce_fw->data;
+						adev->gfx.ce_fw->data;
 		break;

 	case KGD_ENGINE_MEC1:
 		hdr = (const union amdgpu_firmware_header *)
-							adev->gfx.mec_fw->data;
+						adev->gfx.mec_fw->data;
 		break;

 	case KGD_ENGINE_MEC2:
 		hdr = (const union amdgpu_firmware_header *)
-							adev->gfx.mec2_fw->data;
+						adev->gfx.mec2_fw->data;
 		break;

 	case KGD_ENGINE_RLC:
 		hdr = (const union amdgpu_firmware_header *)
-							adev->gfx.rlc_fw->data;
+						adev->gfx.rlc_fw->data;
 		break;

 	case KGD_ENGINE_SDMA1:
 		hdr = (const union amdgpu_firmware_header *)
-							adev->sdma.instance[0].fw->data;
+						adev->sdma.instance[0].fw->data;
 		break;

 	case KGD_ENGINE_SDMA2:
 		hdr = (const union amdgpu_firmware_header *)
-							adev->sdma.instance[1].fw->data;
+						adev->sdma.instance[1].fw->data;
 		break;

 	default:

--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1921,6 +1921,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
 				   ELEMENT_SIZE, 1);
 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
 				   INDEX_STRIDE, 3);
+	WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);

 	mutex_lock(&adev->srbm_mutex);
 	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
@@ -1934,7 +1935,6 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
 		WREG32(mmSH_MEM_APE1_BASE, 1);
 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
 		WREG32(mmSH_MEM_BASES, sh_mem_base);
-		WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
 	}
 	cik_srbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);

--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -3707,6 +3707,8 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
 				   ELEMENT_SIZE, 1);
 	sh_static_mem_cfg = REG_SET_FIELD(sh_static_mem_cfg, SH_STATIC_MEM_CONFIG,
 				   INDEX_STRIDE, 3);
+	WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
+
 	mutex_lock(&adev->srbm_mutex);
 	for (i = 0; i < adev->vm_manager.id_mgr[0].num_ids; i++) {
 		vi_srbm_select(adev, 0, 0, 0, i);
@@ -3730,7 +3732,6 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)

 		WREG32(mmSH_MEM_APE1_BASE, 1);
 		WREG32(mmSH_MEM_APE1_LIMIT, 0);
-		WREG32(mmSH_STATIC_MEM_CONFIG, sh_static_mem_cfg);
 	}
 	vi_srbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);

--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1028,8 +1028,7 @@ static int vi_common_early_init(void *handle)
 		/* rev0 hardware requires workarounds to support PG */
 		adev->pg_flags = 0;
 		if (adev->rev_id != 0x00 || CZ_REV_BRISTOL(adev->pdev->revision)) {
-			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
-				AMD_PG_SUPPORT_GFX_SMG |
+			adev->pg_flags |= AMD_PG_SUPPORT_GFX_SMG |
 				AMD_PG_SUPPORT_GFX_PIPELINE |
 				AMD_PG_SUPPORT_CP |
 				AMD_PG_SUPPORT_UVD |

--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -142,12 +142,12 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
 				struct kfd_ioctl_create_queue_args *args)
 {
 	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
-		pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
+		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
 		return -EINVAL;
 	}

 	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
-		pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
+		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
 		return -EINVAL;
 	}

@@ -155,26 +155,26 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
 		(!access_ok(VERIFY_WRITE,
 			(const void __user *) args->ring_base_address,
 			sizeof(uint64_t)))) {
-		pr_err("kfd: can't access ring base address\n");
+		pr_err("Can't access ring base address\n");
 		return -EFAULT;
 	}

 	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
-		pr_err("kfd: ring size must be a power of 2 or 0\n");
+		pr_err("Ring size must be a power of 2 or 0\n");
 		return -EINVAL;
 	}

 	if (!access_ok(VERIFY_WRITE,
 			(const void __user *) args->read_pointer_address,
 			sizeof(uint32_t))) {
-		pr_err("kfd: can't access read pointer\n");
+		pr_err("Can't access read pointer\n");
 		return -EFAULT;
 	}

 	if (!access_ok(VERIFY_WRITE,
 			(const void __user *) args->write_pointer_address,
 			sizeof(uint32_t))) {
-		pr_err("kfd: can't access write pointer\n");
+		pr_err("Can't access write pointer\n");
 		return -EFAULT;
 	}

@@ -182,7 +182,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
 		!access_ok(VERIFY_WRITE,
 			(const void __user *) args->eop_buffer_address,
 			sizeof(uint32_t))) {
-		pr_debug("kfd: can't access eop buffer");
+		pr_debug("Can't access eop buffer");
 		return -EFAULT;
 	}

@@ -190,7 +190,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
 		!access_ok(VERIFY_WRITE,
 			(const void __user *) args->ctx_save_restore_address,
 			sizeof(uint32_t))) {
-		pr_debug("kfd: can't access ctx save restore buffer");
+		pr_debug("Can't access ctx save restore buffer");
 		return -EFAULT;
 	}

@@ -219,27 +219,27 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
 	else
 		q_properties->format = KFD_QUEUE_FORMAT_PM4;

-	pr_debug("Queue Percentage (%d, %d)\n",
+	pr_debug("Queue Percentage: %d, %d\n",
 			q_properties->queue_percent, args->queue_percentage);

-	pr_debug("Queue Priority (%d, %d)\n",
+	pr_debug("Queue Priority: %d, %d\n",
 			q_properties->priority, args->queue_priority);

-	pr_debug("Queue Address (0x%llX, 0x%llX)\n",
+	pr_debug("Queue Address: 0x%llX, 0x%llX\n",
 			q_properties->queue_address, args->ring_base_address);

-	pr_debug("Queue Size (0x%llX, %u)\n",
+	pr_debug("Queue Size: 0x%llX, %u\n",
 			q_properties->queue_size, args->ring_size);

-	pr_debug("Queue r/w Pointers (0x%llX, 0x%llX)\n",
-			(uint64_t) q_properties->read_ptr,
-			(uint64_t) q_properties->write_ptr);
+	pr_debug("Queue r/w Pointers: %p, %p\n",
+			q_properties->read_ptr,
+			q_properties->write_ptr);

-	pr_debug("Queue Format (%d)\n", q_properties->format);
+	pr_debug("Queue Format: %d\n", q_properties->format);

-	pr_debug("Queue EOP (0x%llX)\n", q_properties->eop_ring_buffer_address);
+	pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);

-	pr_debug("Queue CTX save arex (0x%llX)\n",
+	pr_debug("Queue CTX save area: 0x%llX\n",
 			q_properties->ctx_save_restore_area_address);

 	return 0;
@@ -257,16 +257,16 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,

 	memset(&q_properties, 0, sizeof(struct queue_properties));

-	pr_debug("kfd: creating queue ioctl\n");
+	pr_debug("Creating queue ioctl\n");

 	err = set_queue_properties_from_user(&q_properties, args);
 	if (err)
 		return err;

-	pr_debug("kfd: looking for gpu id 0x%x\n", args->gpu_id);
+	pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
 	dev = kfd_device_by_id(args->gpu_id);
-	if (dev == NULL) {
-		pr_debug("kfd: gpu id 0x%x was not found\n", args->gpu_id);
+	if (!dev) {
+		pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
 		return -EINVAL;
 	}

@@ -278,7 +278,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
 		goto err_bind_process;
 	}

-	pr_debug("kfd: creating queue for PASID %d on GPU 0x%x\n",
+	pr_debug("Creating queue for PASID %d on gpu 0x%x\n",
 			p->pasid,
 			dev->id);

@@ -296,15 +296,15 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,

 	mutex_unlock(&p->mutex);

-	pr_debug("kfd: queue id %d was created successfully\n", args->queue_id);
+	pr_debug("Queue id %d was created successfully\n", args->queue_id);

-	pr_debug("ring buffer address == 0x%016llX\n",
+	pr_debug("Ring buffer address == 0x%016llX\n",
 			args->ring_base_address);

-	pr_debug("read ptr address    == 0x%016llX\n",
+	pr_debug("Read ptr address    == 0x%016llX\n",
 			args->read_pointer_address);

-	pr_debug("write ptr address   == 0x%016llX\n",
+	pr_debug("Write ptr address   == 0x%016llX\n",
 			args->write_pointer_address);

 	return 0;
@@ -321,7 +321,7 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
 	int retval;
 	struct kfd_ioctl_destroy_queue_args *args = data;

-	pr_debug("kfd: destroying queue id %d for PASID %d\n",
+	pr_debug("Destroying queue id %d for pasid %d\n",
 				args->queue_id,
 				p->pasid);

@@ -341,12 +341,12 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
 	struct queue_properties properties;

 	if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
-		pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
+		pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
 		return -EINVAL;
 	}

 	if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
-		pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
+		pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
 		return -EINVAL;
 	}

@@ -354,12 +354,12 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
 		(!access_ok(VERIFY_WRITE,
 			(const void __user *) args->ring_base_address,
 			sizeof(uint64_t)))) {
-		pr_err("kfd: can't access ring base address\n");
+		pr_err("Can't access ring base address\n");
 		return -EFAULT;
 	}

 	if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
-		pr_err("kfd: ring size must be a power of 2 or 0\n");
+		pr_err("Ring size must be a power of 2 or 0\n");
 		return -EINVAL;
 	}

@@ -368,7 +368,7 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
 	properties.queue_percent = args->queue_percentage;
 	properties.priority = args->queue_priority;

-	pr_debug("kfd: updating queue id %d for PASID %d\n",
+	pr_debug("Updating queue id %d for pasid %d\n",
 			args->queue_id, p->pasid);

 	mutex_lock(&p->mutex);
@@ -400,7 +400,7 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
 	}

 	dev = kfd_device_by_id(args->gpu_id);
-	if (dev == NULL)
+	if (!dev)
 		return -EINVAL;

 	mutex_lock(&p->mutex);
@@ -443,7 +443,7 @@ static int kfd_ioctl_dbg_register(struct file *filep,
 	long status = 0;

 	dev = kfd_device_by_id(args->gpu_id);
-	if (dev == NULL)
+	if (!dev)
 		return -EINVAL;

 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
@@ -460,12 +460,11 @@ static int kfd_ioctl_dbg_register(struct file *filep,
 	 */
 	pdd = kfd_bind_process_to_device(dev, p);
 	if (IS_ERR(pdd)) {
-		mutex_unlock(&p->mutex);
-		mutex_unlock(kfd_get_dbgmgr_mutex());
-		return PTR_ERR(pdd);
+		status = PTR_ERR(pdd);
+		goto out;
 	}

-	if (dev->dbgmgr == NULL) {
+	if (!dev->dbgmgr) {
 		/* In case of a legal call, we have no dbgmgr yet */
 		create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
 		if (create_ok) {
@@ -480,6 +479,7 @@ static int kfd_ioctl_dbg_register(struct file *filep,
 		status = -EINVAL;
 	}

+out:
 	mutex_unlock(&p->mutex);
 	mutex_unlock(kfd_get_dbgmgr_mutex());

@@ -494,7 +494,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep,
 	long status;

 	dev = kfd_device_by_id(args->gpu_id);
-	if (dev == NULL)
+	if (!dev)
 		return -EINVAL;

 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
@@ -505,7 +505,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep,
 	mutex_lock(kfd_get_dbgmgr_mutex());

 	status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
-	if (status == 0) {
+	if (!status) {
 		kfd_dbgmgr_destroy(dev->dbgmgr);
 		dev->dbgmgr = NULL;
 	}
@@ -539,7 +539,7 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep,
 	memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));

 	dev = kfd_device_by_id(args->gpu_id);
-	if (dev == NULL)
+	if (!dev)
 		return -EINVAL;

 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
@@ -580,8 +580,8 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep,
 	args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;

 	if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
-		kfree(args_buff);
-		return -EINVAL;
+		status = -EINVAL;
+		goto out;
 	}

 	watch_mask_value = (uint64_t) args_buff[args_idx];
@@ -604,8 +604,8 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep,
 	}

 	if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
-		kfree(args_buff);
-		return -EINVAL;
+		status = -EINVAL;
+		goto out;
 	}

 	/* Currently HSA Event is not supported for DBG */
@@ -617,6 +617,7 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep,

 	mutex_unlock(kfd_get_dbgmgr_mutex());

+out:
 	kfree(args_buff);

 	return status;
@@ -646,7 +647,7 @@ static int kfd_ioctl_dbg_wave_control(struct file *filep,
 				sizeof(wac_info.trapId);

 	dev = kfd_device_by_id(args->gpu_id);
-	if (dev == NULL)
+	if (!dev)
 		return -EINVAL;

 	if (dev->device_info->asic_family == CHIP_CARRIZO) {
@@ -782,8 +783,9 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
 				"scratch_limit %llX\n", pdd->scratch_limit);

 			args->num_of_nodes++;
-		} while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL &&
-				(args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
+
+			pdd = kfd_get_next_process_device_data(p, pdd);
+		} while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
 	}

 	mutex_unlock(&p->mutex);
@@ -846,9 +848,84 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,

 	return err;
 }
+static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_set_scratch_backing_va_args *args = data;
+	struct kfd_process_device *pdd;
+	struct kfd_dev *dev;
+	long err;
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (!dev)
+		return -EINVAL;
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		err = PTR_ERR(pdd);
+		goto bind_process_to_device_fail;
+	}
+
+	pdd->qpd.sh_hidden_private_base = args->va_addr;
+
+	mutex_unlock(&p->mutex);
+
+	if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0)
+		dev->kfd2kgd->set_scratch_backing_va(
+			dev->kgd, args->va_addr, pdd->qpd.vmid);
+
+	return 0;
+
+bind_process_to_device_fail:
+	mutex_unlock(&p->mutex);
+	return err;
+}
+
+static int kfd_ioctl_get_tile_config(struct file *filep,
+		struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_get_tile_config_args *args = data;
+	struct kfd_dev *dev;
+	struct tile_config config;
+	int err = 0;
+
+	dev = kfd_device_by_id(args->gpu_id);
+
+	dev->kfd2kgd->get_tile_config(dev->kgd, &config);
+
+	args->gb_addr_config = config.gb_addr_config;
+	args->num_banks = config.num_banks;
+	args->num_ranks = config.num_ranks;
+
+	if (args->num_tile_configs > config.num_tile_configs)
+		args->num_tile_configs = config.num_tile_configs;
+	err = copy_to_user((void __user *)args->tile_config_ptr,
+			config.tile_config_ptr,
+			args->num_tile_configs * sizeof(uint32_t));
+	if (err) {
+		args->num_tile_configs = 0;
+		return -EFAULT;
+	}
+
+	if (args->num_macro_tile_configs > config.num_macro_tile_configs)
+		args->num_macro_tile_configs =
+				config.num_macro_tile_configs;
+	err = copy_to_user((void __user *)args->macro_tile_config_ptr,
+			config.macro_tile_config_ptr,
+			args->num_macro_tile_configs * sizeof(uint32_t));
+	if (err) {
+		args->num_macro_tile_configs = 0;
+		return -EFAULT;
+	}
+
+	return 0;
+}

 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
-	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl}
+	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
+			    .cmd_drv = 0, .name = #ioctl}

 /** Ioctl table */
 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
@@ -899,6 +976,12 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {

 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
 			kfd_ioctl_dbg_wave_control, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
+			kfd_ioctl_set_scratch_backing_va, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
+			kfd_ioctl_get_tile_config, 0)
 };

 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)

--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -42,8 +42,6 @@

 static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
 {
-	BUG_ON(!dev || !dev->kfd2kgd);
-
 	dev->kfd2kgd->address_watch_disable(dev->kgd);
 }

@@ -62,7 +60,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 	unsigned int *ib_packet_buff;
 	int status;

-	BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes);
+	if (WARN_ON(!size_in_bytes))
+		return -EINVAL;

 	kq = dbgdev->kq;

@@ -77,8 +76,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 	status = kq->ops.acquire_packet_buffer(kq,
 				pq_packets_size_in_bytes / sizeof(uint32_t),
 				&ib_packet_buff);
-	if (status != 0) {
-		pr_err("amdkfd: acquire_packet_buffer failed\n");
+	if (status) {
+		pr_err("acquire_packet_buffer failed\n");
 		return status;
 	}

@@ -115,8 +114,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 	status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
 					&mem_obj);

-	if (status != 0) {
-		pr_err("amdkfd: Failed to allocate GART memory\n");
+	if (status) {
+		pr_err("Failed to allocate GART memory\n");
 		kq->ops.rollback_packet(kq);
 		return status;
 	}
@@ -168,8 +167,6 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,

 static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
 {
-	BUG_ON(!dbgdev);
-
 	/*
 	 * no action is needed in this case,
 	 * just make sure diq will not be used
@@ -187,14 +184,12 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
 	struct kernel_queue *kq = NULL;
 	int status;

-	BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev);
-
 	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
 				&properties, 0, KFD_QUEUE_TYPE_DIQ,
 				&qid);

 	if (status) {
-		pr_err("amdkfd: Failed to create DIQ\n");
+		pr_err("Failed to create DIQ\n");
 		return status;
 	}

@@ -202,8 +197,8 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)

 	kq = pqm_get_kernel_queue(dbgdev->pqm, qid);

-	if (kq == NULL) {
-		pr_err("amdkfd: Error getting DIQ\n");
+	if (!kq) {
+		pr_err("Error getting DIQ\n");
 		pqm_destroy_queue(dbgdev->pqm, qid);
 		return -EFAULT;
 	}
@@ -215,8 +210,6 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)

 static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
 {
-	BUG_ON(!dbgdev || !dbgdev->dev);
-
 	/* disable watch address */
 	dbgdev_address_watch_disable_nodiq(dbgdev->dev);
 	return 0;
@@ -227,8 +220,6 @@ static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
 	/* todo - disable address watch */
 	int status;

-	BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq);
-
 	status = pqm_destroy_queue(dbgdev->pqm,
 			dbgdev->kq->queue->properties.queue_id);
 	dbgdev->kq = NULL;
@@ -245,14 +236,12 @@ static void dbgdev_address_watch_set_registers(
 {
 	union ULARGE_INTEGER addr;

-	BUG_ON(!adw_info || !addrHi || !addrLo || !cntl);
-
 	addr.quad_part = 0;
 	addrHi->u32All = 0;
 	addrLo->u32All = 0;
 	cntl->u32All = 0;

-	if (adw_info->watch_mask != NULL)
+	if (adw_info->watch_mask)
 		cntl->bitfields.mask =
 			(uint32_t) (adw_info->watch_mask[index] &
 					ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
@@ -279,7 +268,7 @@ static void dbgdev_address_watch_set_registers(
 }

 static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
-					struct dbg_address_watch_info *adw_info)
+				      struct dbg_address_watch_info *adw_info)
 {
 	union TCP_WATCH_ADDR_H_BITS addrHi;
 	union TCP_WATCH_ADDR_L_BITS addrLo;
@@ -287,13 +276,11 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
 	struct kfd_process_device *pdd;
 	unsigned int i;

-	BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
-
 	/* taking the vmid for that process on the safe way using pdd */
 	pdd = kfd_get_process_device_data(dbgdev->dev,
 					adw_info->process);
 	if (!pdd) {
-		pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
+		pr_err("Failed to get pdd for wave control no DIQ\n");
 		return -EFAULT;
 	}

@@ -303,17 +290,16 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,

 	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
 			(adw_info->num_watch_points == 0)) {
-		pr_err("amdkfd: num_watch_points is invalid\n");
+		pr_err("num_watch_points is invalid\n");
 		return -EINVAL;
 	}

-	if ((adw_info->watch_mode == NULL) ||
-		(adw_info->watch_address == NULL)) {
-		pr_err("amdkfd: adw_info fields are not valid\n");
+	if (!adw_info->watch_mode || !adw_info->watch_address) {
+		pr_err("adw_info fields are not valid\n");
 		return -EINVAL;
 	}

-	for (i = 0 ; i < adw_info->num_watch_points ; i++) {
+	for (i = 0; i < adw_info->num_watch_points; i++) {
 		dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
 						&cntl, i, pdd->qpd.vmid);

@@ -348,7 +334,7 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
 }

 static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
-					struct dbg_address_watch_info *adw_info)
+				    struct dbg_address_watch_info *adw_info)
 {
 	struct pm4__set_config_reg *packets_vec;
 	union TCP_WATCH_ADDR_H_BITS addrHi;
@@ -363,28 +349,25 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
 	/* we do not control the vmid in DIQ mode, just a place holder */
 	unsigned int vmid = 0;

-	BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
-
 	addrHi.u32All = 0;
 	addrLo.u32All = 0;
 	cntl.u32All = 0;

 	if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
 			(adw_info->num_watch_points == 0)) {
-		pr_err("amdkfd: num_watch_points is invalid\n");
+		pr_err("num_watch_points is invalid\n");
 		return -EINVAL;
 	}

-	if ((NULL == adw_info->watch_mode) ||
-			(NULL == adw_info->watch_address)) {
-		pr_err("amdkfd: adw_info fields are not valid\n");
+	if (!adw_info->watch_mode || !adw_info->watch_address) {
+		pr_err("adw_info fields are not valid\n");
 		return -EINVAL;
 	}

 	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);

-	if (status != 0) {
-		pr_err("amdkfd: Failed to allocate GART memory\n");
+	if (status) {
+		pr_err("Failed to allocate GART memory\n");
 		return status;
 	}

@@ -442,8 +425,6 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
 					i,
 					ADDRESS_WATCH_REG_CNTL);

-		aw_reg_add_dword /= sizeof(uint32_t);
-
 		packets_vec[0].bitfields2.reg_offset =
 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;

@@ -455,8 +436,6 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
 					i,
 					ADDRESS_WATCH_REG_ADDR_HI);

-		aw_reg_add_dword /= sizeof(uint32_t);
-
 		packets_vec[1].bitfields2.reg_offset =
 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 		packets_vec[1].reg_data[0] = addrHi.u32All;
@@ -467,8 +446,6 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
 					i,
 					ADDRESS_WATCH_REG_ADDR_LO);

-		aw_reg_add_dword /= sizeof(uint32_t);
-
 		packets_vec[2].bitfields2.reg_offset =
 				aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 		packets_vec[2].reg_data[0] = addrLo.u32All;
@@ -485,8 +462,6 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
 					i,
 					ADDRESS_WATCH_REG_CNTL);

-		aw_reg_add_dword /= sizeof(uint32_t);
-
 		packets_vec[3].bitfields2.reg_offset =
 					aw_reg_add_dword - AMD_CONFIG_REG_BASE;
 		packets_vec[3].reg_data[0] = cntl.u32All;
@@ -498,8 +473,8 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
 					packet_buff_uint,
 					ib_size);

-		if (status != 0) {
-			pr_err("amdkfd: Failed to submit IB to DIQ\n");
+		if (status) {
+			pr_err("Failed to submit IB to DIQ\n");
 			break;
 		}
 	}
@@ -518,8 +493,6 @@ static int dbgdev_wave_control_set_registers(
 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
 	struct HsaDbgWaveMsgAMDGen2 *pMsg;

-	BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index);
-
 	reg_sq_cmd.u32All = 0;
 	reg_gfx_index.u32All = 0;
 	pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
@@ -620,18 +593,16 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
 	struct pm4__set_config_reg *packets_vec;
 	size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;

-	BUG_ON(!dbgdev || !wac_info);
-
 	reg_sq_cmd.u32All = 0;

 	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
 							&reg_gfx_index);
 	if (status) {
-		pr_err("amdkfd: Failed to set wave control registers\n");
+		pr_err("Failed to set wave control registers\n");
 		return status;
 	}

-	/* we do not control the VMID in DIQ,so reset it to a known value */
+	/* we do not control the VMID in DIQ, so reset it to a known value */
 	reg_sq_cmd.bits.vm_id = 0;

 	pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
@@ -667,7 +638,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
 	status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);

 	if (status != 0) {
-		pr_err("amdkfd: Failed to allocate GART memory\n");
+		pr_err("Failed to allocate GART memory\n");
 		return status;
 	}

@@ -719,8 +690,8 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
 			packet_buff_uint,
 			ib_size);

-	if (status != 0)
-		pr_err("amdkfd: Failed to submit IB to DIQ\n");
+	if (status)
+		pr_err("Failed to submit IB to DIQ\n");

 	kfd_gtt_sa_free(dbgdev->dev, mem_obj);

@@ -735,21 +706,19 @@ static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
 	struct kfd_process_device *pdd;

-	BUG_ON(!dbgdev || !dbgdev->dev || !wac_info);
-
 	reg_sq_cmd.u32All = 0;

 	/* taking the VMID for that process on the safe way using PDD */
 	pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);

 	if (!pdd) {
-		pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
+		pr_err("Failed to get pdd for wave control no DIQ\n");
 		return -EFAULT;
 	}
 	status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
 							&reg_gfx_index);
 	if (status) {
-		pr_err("amdkfd: Failed to set wave control registers\n");
+		pr_err("Failed to set wave control registers\n");
 		return status;
 	}

@@ -818,12 +787,13 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)

 	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
 	 * ATC_VMID15_PASID_MAPPING
-	 * to check which VMID the current process is mapped to. */
+	 * to check which VMID the current process is mapped to.
+	 */

 	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
 		if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
 				(dev->kgd, vmid)) {
-			if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
+			if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
 					(dev->kgd, vmid) == p->pasid) {
 				pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
 						vmid, p->pasid);
@@ -833,7 +803,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
 	}

 	if (vmid > last_vmid_to_scan) {
-		pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid);
+		pr_err("Didn't find vmid for pasid %d\n", p->pasid);
 		return -EFAULT;
 	}

@@ -860,8 +830,6 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
 void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
 			enum DBGDEV_TYPE type)
 {
-	BUG_ON(!pdbgdev || !pdev);
-
 	pdbgdev->dev = pdev;
 	pdbgdev->kq = NULL;
 	pdbgdev->type = type;

--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
@@ -44,8 +44,6 @@ struct mutex *kfd_get_dbgmgr_mutex(void)

 static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr)
 {
-	BUG_ON(!pmgr);
-
 	kfree(pmgr->dbgdev);

 	pmgr->dbgdev = NULL;
@@ -55,7 +53,7 @@ static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr)

 void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr)
 {
-	if (pmgr != NULL) {
+	if (pmgr) {
 		kfd_dbgmgr_uninitialize(pmgr);
 		kfree(pmgr);
 	}
@@ -66,12 +64,12 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
 	enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ;
 	struct kfd_dbgmgr *new_buff;

-	BUG_ON(pdev == NULL);
-	BUG_ON(!pdev->init_complete);
+	if (WARN_ON(!pdev->init_complete))
+		return false;

 	new_buff = kfd_alloc_struct(new_buff);
 	if (!new_buff) {
-		pr_err("amdkfd: Failed to allocate dbgmgr instance\n");
+		pr_err("Failed to allocate dbgmgr instance\n");
 		return false;
 	}

@@ -79,7 +77,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
 	new_buff->dev = pdev;
 	new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev);
 	if (!new_buff->dbgdev) {
-		pr_err("amdkfd: Failed to allocate dbgdev instance\n");
+		pr_err("Failed to allocate dbgdev instance\n");
 		kfree(new_buff);
 		return false;
 	}
@@ -96,8 +94,6 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)

 long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
 {
-	BUG_ON(!p || !pmgr || !pmgr->dbgdev);
-
 	if (pmgr->pasid != 0) {
 		pr_debug("H/W debugger is already active using pasid %d\n",
 				pmgr->pasid);
@@ -118,8 +114,6 @@ long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p)

 long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
 {
-	BUG_ON(!p || !pmgr || !pmgr->dbgdev);
-
 	/* Is the requests coming from the already registered process? */
 	if (pmgr->pasid != p->pasid) {
 		pr_debug("H/W debugger is not registered by calling pasid %d\n",
@@ -137,8 +131,6 @@ long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
 long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
 				struct dbg_wave_control_info *wac_info)
 {
-	BUG_ON(!pmgr || !pmgr->dbgdev || !wac_info);
-
 	/* Is the requests coming from the already registered process? */
 	if (pmgr->pasid != wac_info->process->pasid) {
 		pr_debug("H/W debugger support was not registered for requester pasid %d\n",
@@ -152,9 +144,6 @@ long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
 long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
 				struct dbg_address_watch_info *adw_info)
 {
-	BUG_ON(!pmgr || !pmgr->dbgdev || !adw_info);
-
-
 	/* Is the requests coming from the already registered process? */
 	if (pmgr->pasid != adw_info->process->pasid) {
 		pr_debug("H/W debugger support was not registered for requester pasid %d\n",

--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
@@ -30,13 +30,11 @@
 #pragma pack(push, 4)

 enum HSA_DBG_WAVEOP {
-	HSA_DBG_WAVEOP_HALT = 1,	/* Halts a wavefront		*/
-	HSA_DBG_WAVEOP_RESUME = 2,	/* Resumes a wavefront		*/
-	HSA_DBG_WAVEOP_KILL = 3,	/* Kills a wavefront		*/
-	HSA_DBG_WAVEOP_DEBUG = 4,	/* Causes wavefront to enter
-						debug mode		*/
-	HSA_DBG_WAVEOP_TRAP = 5,	/* Causes wavefront to take
-						a trap			*/
+	HSA_DBG_WAVEOP_HALT = 1,   /* Halts a wavefront */
+	HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */
+	HSA_DBG_WAVEOP_KILL = 3,   /* Kills a wavefront */
+	HSA_DBG_WAVEOP_DEBUG = 4,  /* Causes wavefront to enter dbg mode */
+	HSA_DBG_WAVEOP_TRAP = 5,   /* Causes wavefront to take a trap */
 	HSA_DBG_NUM_WAVEOP = 5,
 	HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF
 };
@@ -81,15 +79,13 @@ struct HsaDbgWaveMsgAMDGen2 {
 			uint32_t UserData:8;	/* user data */
 			uint32_t ShaderArray:1;	/* Shader array */
 			uint32_t Priv:1;	/* Privileged */
-			uint32_t Reserved0:4;	/* This field is reserved,
-						   should be 0 */
+			uint32_t Reserved0:4;	/* Reserved, should be 0 */
 			uint32_t WaveId:4;	/* wave id */
 			uint32_t SIMD:2;	/* SIMD id */
 			uint32_t HSACU:4;	/* Compute unit */
 			uint32_t ShaderEngine:2;/* Shader engine */
 			uint32_t MessageType:2;	/* see HSA_DBG_WAVEMSG_TYPE */
-			uint32_t Reserved1:4;	/* This field is reserved,
-						   should be 0 */
+			uint32_t Reserved1:4;	/* Reserved, should be 0 */
 		} ui32;
 		uint32_t Value;
 	};
@@ -121,20 +117,23 @@ struct HsaDbgWaveMessage {
 * in the user mode instruction stream. The OS scheduler event is typically
 * associated and signaled by an interrupt issued by the GPU, but other HSA
 * system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced
- * by the KFD by this mechanism, too. */
+ * by the KFD by this mechanism, too.
+ */

 /* these are the new definitions for events */
 enum HSA_EVENTTYPE {
 	HSA_EVENTTYPE_SIGNAL = 0,	/* user-mode generated GPU signal */
 	HSA_EVENTTYPE_NODECHANGE = 1,	/* HSA node change (attach/detach) */
 	HSA_EVENTTYPE_DEVICESTATECHANGE = 2,	/* HSA device state change
-						   (start/stop) */
+						 * (start/stop)
+						 */
 	HSA_EVENTTYPE_HW_EXCEPTION = 3,	/* GPU shader exception event */
 	HSA_EVENTTYPE_SYSTEM_EVENT = 4,	/* GPU SYSCALL with parameter info */
 	HSA_EVENTTYPE_DEBUG_EVENT = 5,	/* GPU signal for debugging */
 	HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */
 	HSA_EVENTTYPE_QUEUE_EVENT = 7,	/* GPU signal queue idle state
-					   (EOP pm4) */
+					 * (EOP pm4)
+					 */
 	/* ...  */
 	HSA_EVENTTYPE_MAXID,
 	HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF

--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -26,7 +26,7 @@
 #include <linux/slab.h>
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
-#include "kfd_pm4_headers.h"
+#include "kfd_pm4_headers_vi.h"

 #define MQD_SIZE_ALIGNED 768

@@ -98,11 +98,14 @@ static const struct kfd_device_info *lookup_device_info(unsigned short did)

 	for (i = 0; i < ARRAY_SIZE(supported_devices); i++) {
 		if (supported_devices[i].did == did) {
-			BUG_ON(supported_devices[i].device_info == NULL);
+			WARN_ON(!supported_devices[i].device_info);
 			return supported_devices[i].device_info;
 		}
 	}

+	dev_warn(kfd_device, "DID %04x is missing in supported_devices\n",
+		 did);
+
 	return NULL;
 }

@@ -114,8 +117,10 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
 	const struct kfd_device_info *device_info =
 					lookup_device_info(pdev->device);

-	if (!device_info)
+	if (!device_info) {
+		dev_err(kfd_device, "kgd2kfd_probe failed\n");
 		return NULL;
+	}

 	kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
 	if (!kfd)
@@ -152,15 +157,16 @@ static bool device_iommu_pasid_init(struct kfd_dev *kfd)
 	}

 	if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
-		dev_err(kfd_device, "error required iommu flags ats(%i), pri(%i), pasid(%i)\n",
+		dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n",
 		       (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
 		       (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
-		       (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) != 0);
+		       (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
+									!= 0);
 		return false;
 	}

 	pasid_limit = min_t(unsigned int,
-			(unsigned int)1 << kfd->device_info->max_pasid_bits,
+			(unsigned int)(1 << kfd->device_info->max_pasid_bits),
 			iommu_info.max_pasids);
 	/*
 	 * last pasid is used for kernel queues doorbells
@@ -211,9 +217,8 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
 			flags);

 	dev = kfd_device_by_pci_dev(pdev);
-	BUG_ON(dev == NULL);
-
-	kfd_signal_iommu_event(dev, pasid, address,
+	if (!WARN_ON(!dev))
+		kfd_signal_iommu_event(dev, pasid, address,
 			flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);

 	return AMD_IOMMU_INV_PRI_RSP_INVALID;
@@ -234,9 +239,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 	 * calculate max size of runlist packet.
 	 * There can be only 2 packets at once
 	 */
-	size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) +
-		max_num_of_queues_per_device *
-		sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2;
+	size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_mes_map_process) +
+		max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
+		+ sizeof(struct pm4_mes_runlist)) * 2;

 	/* Add size of HIQ & DIQ */
 	size += KFD_KERNEL_QUEUE_SIZE * 2;
@@ -247,42 +252,37 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 	if (kfd->kfd2kgd->init_gtt_mem_allocation(
 			kfd->kgd, size, &kfd->gtt_mem,
 			&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)){
-		dev_err(kfd_device,
-			"Could not allocate %d bytes for device (%x:%x)\n",
-			size, kfd->pdev->vendor, kfd->pdev->device);
+		dev_err(kfd_device, "Could not allocate %d bytes\n", size);
 		goto out;
 	}

-	dev_info(kfd_device,
-		"Allocated %d bytes on gart for device(%x:%x)\n",
-		size, kfd->pdev->vendor, kfd->pdev->device);
+	dev_info(kfd_device, "Allocated %d bytes on gart\n", size);

 	/* Initialize GTT sa with 512 byte chunk size */
 	if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
-		dev_err(kfd_device,
-			"Error initializing gtt sub-allocator\n");
+		dev_err(kfd_device, "Error initializing gtt sub-allocator\n");
 		goto kfd_gtt_sa_init_error;
 	}

-	kfd_doorbell_init(kfd);
-
-	if (kfd_topology_add_device(kfd) != 0) {
+	if (kfd_doorbell_init(kfd)) {
 		dev_err(kfd_device,
-			"Error adding device (%x:%x) to topology\n",
-			kfd->pdev->vendor, kfd->pdev->device);
+			"Error initializing doorbell aperture\n");
+		goto kfd_doorbell_error;
+	}
+
+	if (kfd_topology_add_device(kfd)) {
+		dev_err(kfd_device, "Error adding device to topology\n");
 		goto kfd_topology_add_device_error;
 	}

 	if (kfd_interrupt_init(kfd)) {
-		dev_err(kfd_device,
-			"Error initializing interrupts for device (%x:%x)\n",
-			kfd->pdev->vendor, kfd->pdev->device);
+		dev_err(kfd_device, "Error initializing interrupts\n");
 		goto kfd_interrupt_error;
 	}

 	if (!device_iommu_pasid_init(kfd)) {
 		dev_err(kfd_device,
-			"Error initializing iommuv2 for device (%x:%x)\n",
+			"Error initializing iommuv2 for device %x:%x\n",
 			kfd->pdev->vendor, kfd->pdev->device);
 		goto device_iommu_pasid_error;
 	}
@@ -292,15 +292,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,

 	kfd->dqm = device_queue_manager_init(kfd);
 	if (!kfd->dqm) {
-		dev_err(kfd_device,
-			"Error initializing queue manager for device (%x:%x)\n",
-			kfd->pdev->vendor, kfd->pdev->device);
+		dev_err(kfd_device, "Error initializing queue manager\n");
 		goto device_queue_manager_error;
 	}

-	if (kfd->dqm->ops.start(kfd->dqm) != 0) {
+	if (kfd->dqm->ops.start(kfd->dqm)) {
 		dev_err(kfd_device,
-			"Error starting queuen manager for device (%x:%x)\n",
+			"Error starting queue manager for device %x:%x\n",
 			kfd->pdev->vendor, kfd->pdev->device);
 		goto dqm_start_error;
 	}
@@ -308,10 +306,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 	kfd->dbgmgr = NULL;

 	kfd->init_complete = true;
-	dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
+	dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
 		 kfd->pdev->device);

-	pr_debug("kfd: Starting kfd with the following scheduling policy %d\n",
+	pr_debug("Starting kfd with the following scheduling policy %d\n",
 		sched_policy);

 	goto out;
@@ -325,11 +323,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 kfd_interrupt_error:
 	kfd_topology_remove_device(kfd);
 kfd_topology_add_device_error:
+	kfd_doorbell_fini(kfd);
+kfd_doorbell_error:
 	kfd_gtt_sa_fini(kfd);
 kfd_gtt_sa_init_error:
 	kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
 	dev_err(kfd_device,
-		"device (%x:%x) NOT added due to errors\n",
+		"device %x:%x NOT added due to errors\n",
 		kfd->pdev->vendor, kfd->pdev->device);
 out:
 	return kfd->init_complete;
@@ -342,6 +342,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
 		amd_iommu_free_device(kfd->pdev);
 		kfd_interrupt_exit(kfd);
 		kfd_topology_remove_device(kfd);
+		kfd_doorbell_fini(kfd);
 		kfd_gtt_sa_fini(kfd);
 		kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
 	}
@@ -351,8 +352,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)

 void kgd2kfd_suspend(struct kfd_dev *kfd)
 {
-	BUG_ON(kfd == NULL);
-
 	if (kfd->init_complete) {
 		kfd->dqm->ops.stop(kfd->dqm);
 		amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
@@ -366,14 +365,15 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
 	unsigned int pasid_limit;
 	int err;

-	BUG_ON(kfd == NULL);
-
 	pasid_limit = kfd_get_pasid_limit();

 	if (kfd->init_complete) {
 		err = amd_iommu_init_device(kfd->pdev, pasid_limit);
-		if (err < 0)
+		if (err < 0) {
+			dev_err(kfd_device, "failed to initialize iommu\n");
 			return -ENXIO;
+		}
+
 		amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
 						iommu_pasid_shutdown_callback);
 		amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb);
@@ -402,26 +402,27 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
 				unsigned int chunk_size)
 {
-	unsigned int num_of_bits;
+	unsigned int num_of_longs;

-	BUG_ON(!kfd);
-	BUG_ON(!kfd->gtt_mem);
-	BUG_ON(buf_size < chunk_size);
-	BUG_ON(buf_size == 0);
-	BUG_ON(chunk_size == 0);
+	if (WARN_ON(buf_size < chunk_size))
+		return -EINVAL;
+	if (WARN_ON(buf_size == 0))
+		return -EINVAL;
+	if (WARN_ON(chunk_size == 0))
+		return -EINVAL;

 	kfd->gtt_sa_chunk_size = chunk_size;
 	kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;

-	num_of_bits = kfd->gtt_sa_num_of_chunks / BITS_PER_BYTE;
-	BUG_ON(num_of_bits == 0);
+	num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) /
+		BITS_PER_LONG;

-	kfd->gtt_sa_bitmap = kzalloc(num_of_bits, GFP_KERNEL);
+	kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL);

 	if (!kfd->gtt_sa_bitmap)
 		return -ENOMEM;

-	pr_debug("kfd: gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
+	pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
 			kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);

 	mutex_init(&kfd->gtt_sa_lock);
@@ -455,8 +456,6 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
 {
 	unsigned int found, start_search, cur_size;

-	BUG_ON(!kfd);
-
 	if (size == 0)
 		return -EINVAL;

@@ -467,7 +466,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
 	if ((*mem_obj) == NULL)
 		return -ENOMEM;

-	pr_debug("kfd: allocated mem_obj = %p for size = %d\n", *mem_obj, size);
+	pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size);

 	start_search = 0;

@@ -479,7 +478,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
 					kfd->gtt_sa_num_of_chunks,
 					start_search);

-	pr_debug("kfd: found = %d\n", found);
+	pr_debug("Found = %d\n", found);

 	/* If there wasn't any free chunk, bail out */
 	if (found == kfd->gtt_sa_num_of_chunks)
@@ -497,12 +496,12 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
 					found,
 					kfd->gtt_sa_chunk_size);

-	pr_debug("kfd: gpu_addr = %p, cpu_addr = %p\n",
+	pr_debug("gpu_addr = %p, cpu_addr = %p\n",
 			(uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);

 	/* If we need only one chunk, mark it as allocated and get out */
 	if (size <= kfd->gtt_sa_chunk_size) {
-		pr_debug("kfd: single bit\n");
+		pr_debug("Single bit\n");
 		set_bit(found, kfd->gtt_sa_bitmap);
 		goto kfd_gtt_out;
 	}
@@ -537,7 +536,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,

 	} while (cur_size > 0);

-	pr_debug("kfd: range_start = %d, range_end = %d\n",
+	pr_debug("range_start = %d, range_end = %d\n",
 		(*mem_obj)->range_start, (*mem_obj)->range_end);

 	/* Mark the chunks as allocated */
@@ -551,7 +550,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
 	return 0;

 kfd_gtt_no_free_chunk:
-	pr_debug("kfd: allocation failed with mem_obj = %p\n", mem_obj);
+	pr_debug("Allocation failed with mem_obj = %p\n", mem_obj);
 	mutex_unlock(&kfd->gtt_sa_lock);
 	kfree(mem_obj);
 	return -ENOMEM;
@@ -561,13 +560,11 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
 {
 	unsigned int bit;

-	BUG_ON(!kfd);
-
 	/* Act like kfree when trying to free a NULL object */
 	if (!mem_obj)
 		return 0;

-	pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n",
+	pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n",
 			mem_obj, mem_obj->range_start, mem_obj->range_end);

 	mutex_lock(&kfd->gtt_sa_lock);

--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -79,20 +79,17 @@ static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)

 unsigned int get_queues_num(struct device_queue_manager *dqm)
 {
-	BUG_ON(!dqm || !dqm->dev);
 	return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
 				KGD_MAX_QUEUES);
 }

 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
 {
-	BUG_ON(!dqm || !dqm->dev);
 	return dqm->dev->shared_resources.num_queue_per_pipe;
 }

 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
 {
-	BUG_ON(!dqm || !dqm->dev);
 	return dqm->dev->shared_resources.num_pipe_per_mec;
 }

@@ -121,7 +118,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,

 	/* Kaveri kfd vmid's starts from vmid 8 */
 	allocated_vmid = bit + KFD_VMID_START_OFFSET;
-	pr_debug("kfd: vmid allocation %d\n", allocated_vmid);
+	pr_debug("vmid allocation %d\n", allocated_vmid);
 	qpd->vmid = allocated_vmid;
 	q->properties.vmid = allocated_vmid;

@@ -152,42 +149,38 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
 {
 	int retval;

-	BUG_ON(!dqm || !q || !qpd || !allocated_vmid);
-
-	pr_debug("kfd: In func %s\n", __func__);
 	print_queue(q);

 	mutex_lock(&dqm->lock);

 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
-		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+		pr_warn("Can't create new usermode queue because %d queues were already created\n",
 				dqm->total_queue_count);
-		mutex_unlock(&dqm->lock);
-		return -EPERM;
+		retval = -EPERM;
+		goto out_unlock;
 	}

 	if (list_empty(&qpd->queues_list)) {
 		retval = allocate_vmid(dqm, qpd, q);
-		if (retval != 0) {
-			mutex_unlock(&dqm->lock);
-			return retval;
-		}
+		if (retval)
+			goto out_unlock;
 	}
 	*allocated_vmid = qpd->vmid;
 	q->properties.vmid = qpd->vmid;

 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
 		retval = create_compute_queue_nocpsch(dqm, q, qpd);
-	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
 		retval = create_sdma_queue_nocpsch(dqm, q, qpd);
+	else
+		retval = -EINVAL;

-	if (retval != 0) {
+	if (retval) {
 		if (list_empty(&qpd->queues_list)) {
 			deallocate_vmid(dqm, qpd, q);
 			*allocated_vmid = 0;
 		}
-		mutex_unlock(&dqm->lock);
-		return retval;
+		goto out_unlock;
 	}

 	list_add(&q->list, &qpd->queues_list);
@@ -205,8 +198,9 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
 	pr_debug("Total of %d queues are accountable so far\n",
 			dqm->total_queue_count);

+out_unlock:
 	mutex_unlock(&dqm->lock);
-	return 0;
+	return retval;
 }

 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
@@ -216,7 +210,8 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)

 	set = false;

-	for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_per_mec(dqm);
+	for (pipe = dqm->next_pipe_to_allocate, i = 0;
+			i < get_pipes_per_mec(dqm);
 			pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {

 		if (!is_pipe_enabled(dqm, 0, pipe))
@@ -239,8 +234,7 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
 	if (!set)
 		return -EBUSY;

-	pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n",
-				__func__, q->pipe, q->queue);
+	pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
 	/* horizontal hqd allocation */
 	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);

@@ -260,36 +254,38 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
 	int retval;
 	struct mqd_manager *mqd;

-	BUG_ON(!dqm || !q || !qpd);
-
 	mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
-	if (mqd == NULL)
+	if (!mqd)
 		return -ENOMEM;

 	retval = allocate_hqd(dqm, q);
-	if (retval != 0)
+	if (retval)
 		return retval;

 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
 				&q->gart_mqd_addr, &q->properties);
-	if (retval != 0) {
-		deallocate_hqd(dqm, q);
-		return retval;
-	}
+	if (retval)
+		goto out_deallocate_hqd;

-	pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n",
-			q->pipe,
-			q->queue);
+	pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
+			q->pipe, q->queue);

-	retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
-			q->queue, (uint32_t __user *) q->properties.write_ptr);
-	if (retval != 0) {
-		deallocate_hqd(dqm, q);
-		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
-		return retval;
-	}
+	dqm->dev->kfd2kgd->set_scratch_backing_va(
+			dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);
+
+	retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties,
+			       q->process->mm);
+	if (retval)
+		goto out_uninit_mqd;

 	return 0;
+
+out_uninit_mqd:
+	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+out_deallocate_hqd:
+	deallocate_hqd(dqm, q);
+
+	return retval;
 }

 static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
@@ -299,12 +295,8 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
 	int retval;
 	struct mqd_manager *mqd;

-	BUG_ON(!dqm || !q || !q->mqd || !qpd);
-
 	retval = 0;

-	pr_debug("kfd: In Func %s\n", __func__);
-
 	mutex_lock(&dqm->lock);

 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
@@ -323,7 +315,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
 		dqm->sdma_queue_count--;
 		deallocate_sdma_queue(dqm, q->sdma_id);
 	} else {
-		pr_debug("q->properties.type is invalid (%d)\n",
+		pr_debug("q->properties.type %d is invalid\n",
 				q->properties.type);
 		retval = -EINVAL;
 		goto out;
@@ -334,7 +326,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
 				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
 				q->pipe, q->queue);

-	if (retval != 0)
+	if (retval)
 		goto out;

 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
@@ -364,14 +356,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
 	struct mqd_manager *mqd;
 	bool prev_active = false;

-	BUG_ON(!dqm || !q || !q->mqd);
-
 	mutex_lock(&dqm->lock);
 	mqd = dqm->ops.get_mqd_manager(dqm,
 			get_mqd_type_from_queue_type(q->properties.type));
-	if (mqd == NULL) {
-		mutex_unlock(&dqm->lock);
-		return -ENOMEM;
+	if (!mqd) {
+		retval = -ENOMEM;
+		goto out_unlock;
 	}

 	if (q->properties.is_active)
@@ -385,12 +375,13 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
 	retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
 	if ((q->properties.is_active) && (!prev_active))
 		dqm->queue_count++;
-	else if ((!q->properties.is_active) && (prev_active))
+	else if (!q->properties.is_active && prev_active)
 		dqm->queue_count--;

 	if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
 		retval = execute_queues_cpsch(dqm, false);

+out_unlock:
 	mutex_unlock(&dqm->lock);
 	return retval;
 }
@@ -400,15 +391,16 @@ static struct mqd_manager *get_mqd_manager_nocpsch(
 {
 	struct mqd_manager *mqd;

-	BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX);
+	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+		return NULL;

-	pr_debug("kfd: In func %s mqd type %d\n", __func__, type);
+	pr_debug("mqd type %d\n", type);

 	mqd = dqm->mqds[type];
 	if (!mqd) {
 		mqd = mqd_manager_init(type, dqm->dev);
-		if (mqd == NULL)
-			pr_err("kfd: mqd manager is NULL");
+		if (!mqd)
+			pr_err("mqd manager is NULL");
 		dqm->mqds[type] = mqd;
 	}

@@ -421,11 +413,7 @@ static int register_process_nocpsch(struct device_queue_manager *dqm,
 	struct device_process_node *n;
 	int retval;

-	BUG_ON(!dqm || !qpd);
-
-	pr_debug("kfd: In func %s\n", __func__);
-
-	n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL);
+	n = kzalloc(sizeof(*n), GFP_KERNEL);
 	if (!n)
 		return -ENOMEM;

@@ -449,10 +437,6 @@ static int unregister_process_nocpsch(struct device_queue_manager *dqm,
 	int retval;
 	struct device_process_node *cur, *next;

-	BUG_ON(!dqm || !qpd);
-
-	pr_debug("In func %s\n", __func__);
-
 	pr_debug("qpd->queues_list is %s\n",
 			list_empty(&qpd->queues_list) ? "empty" : "not empty");

@@ -493,51 +477,39 @@ static void init_interrupts(struct device_queue_manager *dqm)
 {
 	unsigned int i;

-	BUG_ON(dqm == NULL);
-
 	for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
 		if (is_pipe_enabled(dqm, 0, i))
 			dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
 }

-static int init_scheduler(struct device_queue_manager *dqm)
-{
-	int retval = 0;
-
-	BUG_ON(!dqm);
-
-	pr_debug("kfd: In %s\n", __func__);
-
-	return retval;
-}
-
 static int initialize_nocpsch(struct device_queue_manager *dqm)
 {
-	int i;
+	int pipe, queue;

-	BUG_ON(!dqm);
+	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));

-	pr_debug("kfd: In func %s num of pipes: %d\n",
-			__func__, get_pipes_per_mec(dqm));
+	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
+					sizeof(unsigned int), GFP_KERNEL);
+	if (!dqm->allocated_queues)
+		return -ENOMEM;

 	mutex_init(&dqm->lock);
 	INIT_LIST_HEAD(&dqm->queues);
 	dqm->queue_count = dqm->next_pipe_to_allocate = 0;
 	dqm->sdma_queue_count = 0;
-	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
-					sizeof(unsigned int), GFP_KERNEL);
-	if (!dqm->allocated_queues) {
-		mutex_destroy(&dqm->lock);
-		return -ENOMEM;
-	}

-	for (i = 0; i < get_pipes_per_mec(dqm); i++)
-		dqm->allocated_queues[i] = (1 << get_queues_per_pipe(dqm)) - 1;
+	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
+		int pipe_offset = pipe * get_queues_per_pipe(dqm);
+
+		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
+			if (test_bit(pipe_offset + queue,
+				     dqm->dev->shared_resources.queue_bitmap))
+				dqm->allocated_queues[pipe] |= 1 << queue;
+	}

 	dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
 	dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;

-	init_scheduler(dqm);
 	return 0;
 }

@@ -545,9 +517,7 @@ static void uninitialize_nocpsch(struct device_queue_manager *dqm)
 {
 	int i;

-	BUG_ON(!dqm);
-
-	BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
+	WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);

 	kfree(dqm->allocated_queues);
 	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
@@ -604,33 +574,34 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
 		return -ENOMEM;

 	retval = allocate_sdma_queue(dqm, &q->sdma_id);
-	if (retval != 0)
+	if (retval)
 		return retval;

 	q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
 	q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM;

-	pr_debug("kfd: sdma id is:    %d\n", q->sdma_id);
-	pr_debug("     sdma queue id: %d\n", q->properties.sdma_queue_id);
-	pr_debug("     sdma engine id: %d\n", q->properties.sdma_engine_id);
+	pr_debug("SDMA id is:    %d\n", q->sdma_id);
+	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
+	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);

 	dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
 				&q->gart_mqd_addr, &q->properties);
-	if (retval != 0) {
-		deallocate_sdma_queue(dqm, q->sdma_id);
-		return retval;
-	}
+	if (retval)
+		goto out_deallocate_sdma_queue;

-	retval = mqd->load_mqd(mqd, q->mqd, 0,
-				0, NULL);
-	if (retval != 0) {
-		deallocate_sdma_queue(dqm, q->sdma_id);
-		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
-		return retval;
-	}
+	retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL);
+	if (retval)
+		goto out_uninit_mqd;

 	return 0;
+
+out_uninit_mqd:
+	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+out_deallocate_sdma_queue:
+	deallocate_sdma_queue(dqm, q->sdma_id);
+
+	return retval;
 }

 /*
@@ -642,10 +613,6 @@ static int set_sched_resources(struct device_queue_manager *dqm)
 	int i, mec;
 	struct scheduling_resources res;

-	BUG_ON(!dqm);
-
-	pr_debug("kfd: In func %s\n", __func__);
-
 	res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
 	res.vmid_mask <<= KFD_VMID_START_OFFSET;

@@ -663,7 +630,8 @@ static int set_sched_resources(struct device_queue_manager *dqm)

 		/* This situation may be hit in the future if a new HW
 		 * generation exposes more than 64 queues. If so, the
-		 * definition of res.queue_mask needs updating */
+		 * definition of res.queue_mask needs updating
+		 */
 		if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
 			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
 			break;
@@ -674,9 +642,9 @@ static int set_sched_resources(struct device_queue_manager *dqm)
 	res.gws_mask = res.oac_mask = res.gds_heap_base =
 						res.gds_heap_size = 0;

-	pr_debug("kfd: scheduling resources:\n"
-			"      vmid mask: 0x%8X\n"
-			"      queue mask: 0x%8llX\n",
+	pr_debug("Scheduling resources:\n"
+			"vmid mask: 0x%8X\n"
+			"queue mask: 0x%8llX\n",
 			res.vmid_mask, res.queue_mask);

 	return pm_send_set_resources(&dqm->packets, &res);
@@ -686,10 +654,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
 {
 	int retval;

-	BUG_ON(!dqm);
-
-	pr_debug("kfd: In func %s num of pipes: %d\n",
-			__func__, get_pipes_per_mec(dqm));
+	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));

 	mutex_init(&dqm->lock);
 	INIT_LIST_HEAD(&dqm->queues);
@@ -697,13 +662,9 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
 	dqm->sdma_queue_count = 0;
 	dqm->active_runlist = false;
 	retval = dqm->ops_asic_specific.initialize(dqm);
-	if (retval != 0)
-		goto fail_init_pipelines;
-
-	return 0;
+	if (retval)
+		mutex_destroy(&dqm->lock);

-fail_init_pipelines:
-	mutex_destroy(&dqm->lock);
 	return retval;
 }

@@ -712,25 +673,23 @@ static int start_cpsch(struct device_queue_manager *dqm)
 	struct device_process_node *node;
 	int retval;

-	BUG_ON(!dqm);
-
 	retval = 0;

 	retval = pm_init(&dqm->packets, dqm);
-	if (retval != 0)
+	if (retval)
 		goto fail_packet_manager_init;

 	retval = set_sched_resources(dqm);
-	if (retval != 0)
+	if (retval)
 		goto fail_set_sched_resources;

-	pr_debug("kfd: allocating fence memory\n");
+	pr_debug("Allocating fence memory\n");

 	/* allocate fence memory on the gart */
 	retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
 					&dqm->fence_mem);

-	if (retval != 0)
+	if (retval)
 		goto fail_allocate_vidmem;

 	dqm->fence_addr = dqm->fence_mem->cpu_ptr;
@@ -758,8 +717,6 @@ static int stop_cpsch(struct device_queue_manager *dqm)
 	struct device_process_node *node;
 	struct kfd_process_device *pdd;

-	BUG_ON(!dqm);
-
 	destroy_queues_cpsch(dqm, true, true);

 	list_for_each_entry(node, &dqm->queues, list) {
@@ -776,13 +733,9 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
 					struct kernel_queue *kq,
 					struct qcm_process_device *qpd)
 {
-	BUG_ON(!dqm || !kq || !qpd);
-
-	pr_debug("kfd: In func %s\n", __func__);
-
 	mutex_lock(&dqm->lock);
 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
-		pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n",
+		pr_warn("Can't create new kernel queue because %d queues were already created\n",
 				dqm->total_queue_count);
 		mutex_unlock(&dqm->lock);
 		return -EPERM;
@@ -809,10 +762,6 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
 					struct kernel_queue *kq,
 					struct qcm_process_device *qpd)
 {
-	BUG_ON(!dqm || !kq);
-
-	pr_debug("kfd: In %s\n", __func__);
-
 	mutex_lock(&dqm->lock);
 	/* here we actually preempt the DIQ */
 	destroy_queues_cpsch(dqm, true, false);
@@ -844,8 +793,6 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 	int retval;
 	struct mqd_manager *mqd;

-	BUG_ON(!dqm || !q || !qpd);
-
 	retval = 0;

 	if (allocate_vmid)
@@ -854,7 +801,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 	mutex_lock(&dqm->lock);

 	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
-		pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+		pr_warn("Can't create new usermode queue because %d queues were already created\n",
 				dqm->total_queue_count);
 		retval = -EPERM;
 		goto out;
@@ -866,15 +813,15 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 	mqd = dqm->ops.get_mqd_manager(dqm,
 			get_mqd_type_from_queue_type(q->properties.type));

-	if (mqd == NULL) {
-		mutex_unlock(&dqm->lock);
-		return -ENOMEM;
+	if (!mqd) {
+		retval = -ENOMEM;
+		goto out;
 	}

 	dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
 	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
 				&q->gart_mqd_addr, &q->properties);
-	if (retval != 0)
+	if (retval)
 		goto out;

 	list_add(&q->list, &qpd->queues_list);
@@ -884,7 +831,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 	}

 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
-			dqm->sdma_queue_count++;
+		dqm->sdma_queue_count++;
 	/*
 	 * Unconditionally increment this counter, regardless of the queue's
 	 * type or whether the queue is active.
@@ -903,12 +850,11 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
 				unsigned int fence_value,
 				unsigned long timeout)
 {
-	BUG_ON(!fence_addr);
 	timeout += jiffies;

 	while (*fence_addr != fence_value) {
 		if (time_after(jiffies, timeout)) {
-			pr_err("kfd: qcm fence wait loop timeout expired\n");
+			pr_err("qcm fence wait loop timeout expired\n");
 			return -ETIME;
 		}
 		schedule();
@@ -932,8 +878,6 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm,
 	enum kfd_preempt_type_filter preempt_type;
 	struct kfd_process_device *pdd;

-	BUG_ON(!dqm);
-
 	retval = 0;

 	if (lock)
@@ -941,7 +885,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm,
 	if (!dqm->active_runlist)
 		goto out;

-	pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n",
+	pr_debug("Before destroying queues, sdma queue count is : %u\n",
 		dqm->sdma_queue_count);

 	if (dqm->sdma_queue_count > 0) {
@@ -955,7 +899,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm,

 	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
 			preempt_type, 0, false, 0);
-	if (retval != 0)
+	if (retval)
 		goto out;

 	*dqm->fence_addr = KFD_FENCE_INIT;
@@ -964,7 +908,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm,
 	/* should be timed out */
 	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
 				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
-	if (retval != 0) {
+	if (retval) {
 		pdd = kfd_get_process_device_data(dqm->dev,
 				kfd_get_process(current));
 		pdd->reset_wavefronts = true;
@@ -983,14 +927,12 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
 {
 	int retval;

-	BUG_ON(!dqm);
-
 	if (lock)
 		mutex_lock(&dqm->lock);

 	retval = destroy_queues_cpsch(dqm, false, false);
-	if (retval != 0) {
-		pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
+	if (retval) {
+		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption");
 		goto out;
 	}

@@ -1005,8 +947,8 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
 	}

 	retval = pm_send_runlist(&dqm->packets, &dqm->queues);
-	if (retval != 0) {
-		pr_err("kfd: failed to execute runlist");
+	if (retval) {
+		pr_err("failed to execute runlist");
 		goto out;
 	}
 	dqm->active_runlist = true;
@@ -1025,8 +967,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 	struct mqd_manager *mqd;
 	bool preempt_all_queues;

-	BUG_ON(!dqm || !qpd || !q);
-
 	preempt_all_queues = false;

 	retval = 0;
@@ -1098,8 +1038,6 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
 {
 	bool retval;

-	pr_debug("kfd: In func %s\n", __func__);
-
 	mutex_lock(&dqm->lock);

 	if (alternate_aperture_size == 0) {
@@ -1120,14 +1058,11 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
 		uint64_t base = (uintptr_t)alternate_aperture_base;
 		uint64_t limit = base + alternate_aperture_size - 1;

-		if (limit <= base)
-			goto out;
-
-		if ((base & APE1_FIXED_BITS_MASK) != 0)
-			goto out;
-
-		if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT)
+		if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
+		   (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
+			retval = false;
 			goto out;
+		}

 		qpd->sh_mem_ape1_base = base >> 16;
 		qpd->sh_mem_ape1_limit = limit >> 16;
@@ -1144,27 +1079,22 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
 	if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
 		program_sh_mem_settings(dqm, qpd);

-	pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
+	pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
 		qpd->sh_mem_config, qpd->sh_mem_ape1_base,
 		qpd->sh_mem_ape1_limit);

-	mutex_unlock(&dqm->lock);
-	return retval;
-
 out:
 	mutex_unlock(&dqm->lock);
-	return false;
+	return retval;
 }

 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 {
 	struct device_queue_manager *dqm;

-	BUG_ON(!dev);
+	pr_debug("Loading device queue manager\n");

-	pr_debug("kfd: loading device queue manager\n");
-
-	dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL);
+	dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
 	if (!dqm)
 		return NULL;

@@ -1202,8 +1132,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
 		break;
 	default:
-		BUG();
-		break;
+		pr_err("Invalid scheduling policy %d\n", sched_policy);
+		goto out_free;
 	}

 	switch (dev->device_info->asic_family) {
@@ -1216,18 +1146,16 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 		break;
 	}

-	if (dqm->ops.initialize(dqm) != 0) {
-		kfree(dqm);
-		return NULL;
-	}
+	if (!dqm->ops.initialize(dqm))
+		return dqm;

-	return dqm;
+out_free:
+	kfree(dqm);
+	return NULL;
 }

 void device_queue_manager_uninit(struct device_queue_manager *dqm)
 {
-	BUG_ON(!dqm);
-
 	dqm->ops.uninitialize(dqm);
 	kfree(dqm);
 }
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -24,6 +24,7 @@
 #include "kfd_device_queue_manager.h"
 #include "cik_regs.h"
 #include "oss/oss_2_4_sh_mask.h"
+#include "gca/gfx_7_2_sh_mask.h"

 static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
 				   struct qcm_process_device *qpd,
@@ -65,7 +66,7 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
 	 * for LDS/Scratch and GPUVM.
 	 */

-	BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
+	WARN_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
 		top_address_nybble == 0);

 	return PRIVATE_BASE(top_address_nybble << 12) |
@@ -104,8 +105,6 @@ static int register_process_cik(struct device_queue_manager *dqm,
 	struct kfd_process_device *pdd;
 	unsigned int temp;

-	BUG_ON(!dqm || !qpd);
-
 	pdd = qpd_to_pdd(qpd);

 	/* check if sh_mem_config register already configured */
@@ -125,9 +124,10 @@ static int register_process_cik(struct device_queue_manager *dqm,
 	} else {
 		temp = get_sh_mem_bases_nybble_64(pdd);
 		qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
+		qpd->sh_mem_config |= 1  << SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
 	}

-	pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
+	pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
 		qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);

 	return 0;

--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -67,7 +67,7 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
 	 * for LDS/Scratch and GPUVM.
 	 */

-	BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
+	WARN_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
 		top_address_nybble == 0);

 	return top_address_nybble << 12 |
@@ -110,8 +110,6 @@ static int register_process_vi(struct device_queue_manager *dqm,
 	struct kfd_process_device *pdd;
 	unsigned int temp;

-	BUG_ON(!dqm || !qpd);
-
 	pdd = qpd_to_pdd(qpd);

 	/* check if sh_mem_config register already configured */
@@ -137,9 +135,11 @@ static int register_process_vi(struct device_queue_manager *dqm,
 		qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
 		qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 <<
 			SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
+		qpd->sh_mem_config |= 1  <<
+			SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
 	}

-	pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
+	pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
 		qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);

 	return 0;

--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -59,7 +59,7 @@ static inline size_t doorbell_process_allocation(void)
 }

 /* Doorbell calculations for device init. */
-void kfd_doorbell_init(struct kfd_dev *kfd)
+int kfd_doorbell_init(struct kfd_dev *kfd)
 {
 	size_t doorbell_start_offset;
 	size_t doorbell_aperture_size;
@@ -95,26 +95,35 @@ void kfd_doorbell_init(struct kfd_dev *kfd)
 	kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
 						doorbell_process_allocation());

-	BUG_ON(!kfd->doorbell_kernel_ptr);
+	if (!kfd->doorbell_kernel_ptr)
+		return -ENOMEM;

-	pr_debug("kfd: doorbell initialization:\n");
-	pr_debug("kfd: doorbell base           == 0x%08lX\n",
+	pr_debug("Doorbell initialization:\n");
+	pr_debug("doorbell base           == 0x%08lX\n",
 			(uintptr_t)kfd->doorbell_base);

-	pr_debug("kfd: doorbell_id_offset      == 0x%08lX\n",
+	pr_debug("doorbell_id_offset      == 0x%08lX\n",
 			kfd->doorbell_id_offset);

-	pr_debug("kfd: doorbell_process_limit  == 0x%08lX\n",
+	pr_debug("doorbell_process_limit  == 0x%08lX\n",
 			doorbell_process_limit);

-	pr_debug("kfd: doorbell_kernel_offset  == 0x%08lX\n",
+	pr_debug("doorbell_kernel_offset  == 0x%08lX\n",
 			(uintptr_t)kfd->doorbell_base);

-	pr_debug("kfd: doorbell aperture size  == 0x%08lX\n",
+	pr_debug("doorbell aperture size  == 0x%08lX\n",
 			kfd->shared_resources.doorbell_aperture_size);

-	pr_debug("kfd: doorbell kernel address == 0x%08lX\n",
+	pr_debug("doorbell kernel address == 0x%08lX\n",
 			(uintptr_t)kfd->doorbell_kernel_ptr);
+
+	return 0;
+}
+
+void kfd_doorbell_fini(struct kfd_dev *kfd)
+{
+	if (kfd->doorbell_kernel_ptr)
+		iounmap(kfd->doorbell_kernel_ptr);
 }

 int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
@@ -131,7 +140,7 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)

 	/* Find kfd device according to gpu id */
 	dev = kfd_device_by_id(vma->vm_pgoff);
-	if (dev == NULL)
+	if (!dev)
 		return -EINVAL;

 	/* Calculate physical address of doorbell */
@@ -142,12 +151,11 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)

 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);

-	pr_debug("kfd: mapping doorbell page in %s\n"
+	pr_debug("Mapping doorbell page\n"
 		 "     target user address == 0x%08llX\n"
 		 "     physical address    == 0x%08llX\n"
 		 "     vm_flags            == 0x%04lX\n"
 		 "     size                == 0x%04lX\n",
-		 __func__,
 		 (unsigned long long) vma->vm_start, address, vma->vm_flags,
 		 doorbell_process_allocation());

@@ -166,8 +174,6 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
 {
 	u32 inx;

-	BUG_ON(!kfd || !doorbell_off);
-
 	mutex_lock(&kfd->doorbell_mutex);
 	inx = find_first_zero_bit(kfd->doorbell_available_index,
 					KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
@@ -185,7 +191,7 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
 	*doorbell_off = KERNEL_DOORBELL_PASID * (doorbell_process_allocation() /
 							sizeof(u32)) + inx;

-	pr_debug("kfd: get kernel queue doorbell\n"
+	pr_debug("Get kernel queue doorbell\n"
 			 "     doorbell offset   == 0x%08X\n"
 			 "     kernel address    == 0x%08lX\n",
 		*doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx));
@@ -197,8 +203,6 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
 {
 	unsigned int inx;

-	BUG_ON(!kfd || !db_addr);
-
 	inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);

 	mutex_lock(&kfd->doorbell_mutex);
@@ -210,7 +214,7 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
 {
 	if (db) {
 		writel(value, db);
-		pr_debug("writing %d to doorbell address 0x%p\n", value, db);
+		pr_debug("Writing %d to doorbell address 0x%p\n", value, db);
 	}
 }


--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -110,7 +110,7 @@ static bool allocate_free_slot(struct kfd_process *process,
 			*out_page = page;
 			*out_slot_index = slot;

-			pr_debug("allocated event signal slot in page %p, slot %d\n",
+			pr_debug("Allocated event signal slot in page %p, slot %d\n",
 					page, slot);

 			return true;
@@ -155,9 +155,9 @@ static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p)
 						   struct signal_page,
 						   event_pages)->page_index + 1;

-	pr_debug("allocated new event signal page at %p, for process %p\n",
+	pr_debug("Allocated new event signal page at %p, for process %p\n",
 			page, p);
-	pr_debug("page index is %d\n", page->page_index);
+	pr_debug("Page index is %d\n", page->page_index);

 	list_add(&page->event_pages, &p->signal_event_pages);

@@ -194,7 +194,8 @@ static void release_event_notification_slot(struct signal_page *page,
 	page->free_slots++;

 	/* We don't free signal pages, they are retained by the process
-	 * and reused until it exits. */
+	 * and reused until it exits.
+	 */
 }

 static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p,
@@ -246,7 +247,7 @@ static u32 make_nonsignal_event_id(struct kfd_process *p)

 	for (id = p->next_nonsignal_event_id;
 		id < KFD_LAST_NONSIGNAL_EVENT_ID &&
-		lookup_event_by_id(p, id) != NULL;
+		lookup_event_by_id(p, id);
 		id++)
 		;

@@ -265,7 +266,7 @@ static u32 make_nonsignal_event_id(struct kfd_process *p)

 	for (id = KFD_FIRST_NONSIGNAL_EVENT_ID;
 		id < KFD_LAST_NONSIGNAL_EVENT_ID &&
-		lookup_event_by_id(p, id) != NULL;
+		lookup_event_by_id(p, id);
 		id++)
 		;

@@ -291,13 +292,13 @@ static int create_signal_event(struct file *devkfd,
 				struct kfd_event *ev)
 {
 	if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) {
-		pr_warn("amdkfd: Signal event wasn't created because limit was reached\n");
+		pr_warn("Signal event wasn't created because limit was reached\n");
 		return -ENOMEM;
 	}

 	if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page,
 						&ev->signal_slot_index)) {
-		pr_warn("amdkfd: Signal event wasn't created because out of kernel memory\n");
+		pr_warn("Signal event wasn't created because out of kernel memory\n");
 		return -ENOMEM;
 	}

@@ -309,11 +310,7 @@ static int create_signal_event(struct file *devkfd,
 	ev->event_id = make_signal_event_id(ev->signal_page,
 						ev->signal_slot_index);

-	pr_debug("signal event number %zu created with id %d, address %p\n",
-			p->signal_event_count, ev->event_id,
-			ev->user_signal_address);
-
-	pr_debug("signal event number %zu created with id %d, address %p\n",
+	pr_debug("Signal event number %zu created with id %d, address %p\n",
 			p->signal_event_count, ev->event_id,
 			ev->user_signal_address);

@@ -345,7 +342,7 @@ void kfd_event_init_process(struct kfd_process *p)

 static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
 {
-	if (ev->signal_page != NULL) {
+	if (ev->signal_page) {
 		release_event_notification_slot(ev->signal_page,
 						ev->signal_slot_index);
 		p->signal_event_count--;
@@ -584,7 +581,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
 		 * search faster.
 		 */
 		struct signal_page *page;
-		unsigned i;
+		unsigned int i;

 		list_for_each_entry(page, &p->signal_event_pages, event_pages)
 			for (i = 0; i < SLOTS_PER_PAGE; i++)
@@ -816,7 +813,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
 	/* check required size is logical */
 	if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) !=
 			get_order(vma->vm_end - vma->vm_start)) {
-		pr_err("amdkfd: event page mmap requested illegal size\n");
+		pr_err("Event page mmap requested illegal size\n");
 		return -EINVAL;
 	}

@@ -825,7 +822,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
 	page = lookup_signal_page_by_index(p, page_index);
 	if (!page) {
 		/* Probably KFD bug, but mmap is user-accessible. */
-		pr_debug("signal page could not be found for page_index %u\n",
+		pr_debug("Signal page could not be found for page_index %u\n",
 				page_index);
 		return -EINVAL;
 	}
@@ -836,7 +833,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
 	vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE
 		       | VM_DONTDUMP | VM_PFNMAP;

-	pr_debug("mapping signal page\n");
+	pr_debug("Mapping signal page\n");
 	pr_debug("     start user address  == 0x%08lx\n", vma->vm_start);
 	pr_debug("     end user address    == 0x%08lx\n", vma->vm_end);
 	pr_debug("     pfn                 == 0x%016lX\n", pfn);

--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -304,7 +304,7 @@ int kfd_init_apertures(struct kfd_process *process)
 		id < NUM_OF_SUPPORTED_GPUS) {

 		pdd = kfd_create_process_device_data(dev, process);
-		if (pdd == NULL) {
+		if (!pdd) {
 			pr_err("Failed to create process device data\n");
 			return -1;
 		}

--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -179,7 +179,7 @@ static void interrupt_wq(struct work_struct *work)
 bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry)
 {
 	/* integer and bitwise OR so there is no boolean short-circuiting */
-	unsigned wanted = 0;
+	unsigned int wanted = 0;

 	wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
 								ih_ring_entry);

--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -41,11 +41,11 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
 	int retval;
 	union PM4_MES_TYPE_3_HEADER nop;

-	BUG_ON(!kq || !dev);
-	BUG_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ);
+	if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ))
+		return false;

-	pr_debug("amdkfd: In func %s initializing queue type %d size %d\n",
-			__func__, KFD_QUEUE_TYPE_HIQ, queue_size);
+	pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ,
+			queue_size);

 	memset(&prop, 0, sizeof(prop));
 	memset(&nop, 0, sizeof(nop));
@@ -63,23 +63,23 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
 						KFD_MQD_TYPE_HIQ);
 		break;
 	default:
-		BUG();
-		break;
+		pr_err("Invalid queue type %d\n", type);
+		return false;
 	}

-	if (kq->mqd == NULL)
+	if (!kq->mqd)
 		return false;

 	prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off);

-	if (prop.doorbell_ptr == NULL) {
-		pr_err("amdkfd: error init doorbell");
+	if (!prop.doorbell_ptr) {
+		pr_err("Failed to initialize doorbell");
 		goto err_get_kernel_doorbell;
 	}

 	retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq);
 	if (retval != 0) {
-		pr_err("amdkfd: error init pq queues size (%d)\n", queue_size);
+		pr_err("Failed to init pq queues size %d\n", queue_size);
 		goto err_pq_allocate_vidmem;
 	}

@@ -87,7 +87,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
 	kq->pq_gpu_addr = kq->pq->gpu_addr;

 	retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size);
-	if (retval == false)
+	if (!retval)
 		goto err_eop_allocate_vidmem;

 	retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel),
@@ -139,11 +139,12 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,

 	/* assign HIQ to HQD */
 	if (type == KFD_QUEUE_TYPE_HIQ) {
-		pr_debug("assigning hiq to hqd\n");
+		pr_debug("Assigning hiq to hqd\n");
 		kq->queue->pipe = KFD_CIK_HIQ_PIPE;
 		kq->queue->queue = KFD_CIK_HIQ_QUEUE;
 		kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe,
-					kq->queue->queue, NULL);
+				  kq->queue->queue, &kq->queue->properties,
+				  NULL);
 	} else {
 		/* allocate fence for DIQ */

@@ -180,8 +181,6 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,

 static void uninitialize(struct kernel_queue *kq)
 {
-	BUG_ON(!kq);
-
 	if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
 		kq->mqd->destroy_mqd(kq->mqd,
 					NULL,
@@ -211,8 +210,6 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
 	uint32_t wptr, rptr;
 	unsigned int *queue_address;

-	BUG_ON(!kq || !buffer_ptr);
-
 	rptr = *kq->rptr_kernel;
 	wptr = *kq->wptr_kernel;
 	queue_address = (unsigned int *)kq->pq_kernel_addr;
@@ -252,11 +249,7 @@ static void submit_packet(struct kernel_queue *kq)
 {
 #ifdef DEBUG
 	int i;
-#endif
-
-	BUG_ON(!kq);

-#ifdef DEBUG
 	for (i = *kq->wptr_kernel; i < kq->pending_wptr; i++) {
 		pr_debug("0x%2X ", kq->pq_kernel_addr[i]);
 		if (i % 15 == 0)
@@ -272,7 +265,6 @@ static void submit_packet(struct kernel_queue *kq)

 static void rollback_packet(struct kernel_queue *kq)
 {
-	BUG_ON(!kq);
 	kq->pending_wptr = *kq->queue->properties.write_ptr;
 }

@@ -281,9 +273,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
 {
 	struct kernel_queue *kq;

-	BUG_ON(!dev);
-
-	kq = kzalloc(sizeof(struct kernel_queue), GFP_KERNEL);
+	kq = kzalloc(sizeof(*kq), GFP_KERNEL);
 	if (!kq)
 		return NULL;

@@ -304,7 +294,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
 	}

 	if (!kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE)) {
-		pr_err("amdkfd: failed to init kernel queue\n");
+		pr_err("Failed to init kernel queue\n");
 		kfree(kq);
 		return NULL;
 	}
@@ -313,32 +303,37 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,

 void kernel_queue_uninit(struct kernel_queue *kq)
 {
-	BUG_ON(!kq);
-
 	kq->ops.uninitialize(kq);
 	kfree(kq);
 }

+/* FIXME: Can this test be removed? */
 static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
 {
 	struct kernel_queue *kq;
 	uint32_t *buffer, i;
 	int retval;

-	BUG_ON(!dev);
-
-	pr_err("amdkfd: starting kernel queue test\n");
+	pr_err("Starting kernel queue test\n");

 	kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
-	BUG_ON(!kq);
+	if (unlikely(!kq)) {
+		pr_err("  Failed to initialize HIQ\n");
+		pr_err("Kernel queue test failed\n");
+		return;
+	}

 	retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer);
-	BUG_ON(retval != 0);
+	if (unlikely(retval != 0)) {
+		pr_err("  Failed to acquire packet buffer\n");
+		pr_err("Kernel queue test failed\n");
+		return;
+	}
 	for (i = 0; i < 5; i++)
 		buffer[i] = kq->nop_packet;
 	kq->ops.submit_packet(kq);

-	pr_err("amdkfd: ending kernel queue test\n");
+	pr_err("Ending kernel queue test\n");
 }


--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -61,7 +61,8 @@ MODULE_PARM_DESC(send_sigterm,

 static int amdkfd_init_completed;

-int kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f)
+int kgd2kfd_init(unsigned int interface_version,
+		const struct kgd2kfd_calls **g2f)
 {
 	if (!amdkfd_init_completed)
 		return -EPROBE_DEFER;
@@ -90,7 +91,7 @@ static int __init kfd_module_init(void)
 	/* Verify module parameters */
 	if ((sched_policy < KFD_SCHED_POLICY_HWS) ||
 		(sched_policy > KFD_SCHED_POLICY_NO_HWS)) {
-		pr_err("kfd: sched_policy has invalid value\n");
+		pr_err("sched_policy has invalid value\n");
 		return -1;
 	}

@@ -98,13 +99,13 @@ static int __init kfd_module_init(void)
 	if ((max_num_of_queues_per_device < 1) ||
 		(max_num_of_queues_per_device >
 			KFD_MAX_NUM_OF_QUEUES_PER_DEVICE)) {
-		pr_err("kfd: max_num_of_queues_per_device must be between 1 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n");
+		pr_err("max_num_of_queues_per_device must be between 1 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n");
 		return -1;
 	}

 	err = kfd_pasid_init();
 	if (err < 0)
-		goto err_pasid;
+		return err;

 	err = kfd_chardev_init();
 	if (err < 0)
@@ -126,7 +127,6 @@ static int __init kfd_module_init(void)
 	kfd_chardev_exit();
 err_ioctl:
 	kfd_pasid_exit();
-err_pasid:
 	return err;
 }


--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -67,7 +67,8 @@ struct mqd_manager {

 	int	(*load_mqd)(struct mqd_manager *mm, void *mqd,
 				uint32_t pipe_id, uint32_t queue_id,
-				uint32_t __user *wptr);
+				struct queue_properties *p,
+				struct mm_struct *mms);

 	int	(*update_mqd)(struct mqd_manager *mm, void *mqd,
 				struct queue_properties *q);

--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -44,10 +44,6 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
 	struct cik_mqd *m;
 	int retval;

-	BUG_ON(!mm || !q || !mqd);
-
-	pr_debug("kfd: In func %s\n", __func__);
-
 	retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
 					mqd_mem_obj);

@@ -101,7 +97,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
 		m->cp_hqd_iq_rptr = AQL_ENABLE;

 	*mqd = m;
-	if (gart_addr != NULL)
+	if (gart_addr)
 		*gart_addr = addr;
 	retval = mm->update_mqd(mm, m, q);

@@ -115,8 +111,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
 	int retval;
 	struct cik_sdma_rlc_registers *m;

-	BUG_ON(!mm || !mqd || !mqd_mem_obj);
-
 	retval = kfd_gtt_sa_allocate(mm->dev,
 					sizeof(struct cik_sdma_rlc_registers),
 					mqd_mem_obj);
@@ -129,7 +123,7 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
 	memset(m, 0, sizeof(struct cik_sdma_rlc_registers));

 	*mqd = m;
-	if (gart_addr != NULL)
+	if (gart_addr)
 		*gart_addr = (*mqd_mem_obj)->gpu_addr;

 	retval = mm->update_mqd(mm, m, q);
@@ -140,27 +134,31 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
 static void uninit_mqd(struct mqd_manager *mm, void *mqd,
 			struct kfd_mem_obj *mqd_mem_obj)
 {
-	BUG_ON(!mm || !mqd);
 	kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
 }

 static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
 				struct kfd_mem_obj *mqd_mem_obj)
 {
-	BUG_ON(!mm || !mqd);
 	kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
 }

 static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
-			uint32_t queue_id, uint32_t __user *wptr)
+		    uint32_t queue_id, struct queue_properties *p,
+		    struct mm_struct *mms)
 {
-	return mm->dev->kfd2kgd->hqd_load
-		(mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
+	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+	uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1);
+
+	return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+					  (uint32_t __user *)p->write_ptr,
+					  wptr_shift, wptr_mask, mms);
 }

 static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
-			uint32_t pipe_id, uint32_t queue_id,
-			uint32_t __user *wptr)
+			 uint32_t pipe_id, uint32_t queue_id,
+			 struct queue_properties *p, struct mm_struct *mms)
 {
 	return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd);
 }
@@ -170,10 +168,6 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
 {
 	struct cik_mqd *m;

-	BUG_ON(!mm || !q || !mqd);
-
-	pr_debug("kfd: In func %s\n", __func__);
-
 	m = get_mqd(mqd);
 	m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
 				DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN;
@@ -188,21 +182,17 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
 	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
 	m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
 	m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
-	m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
-					DOORBELL_OFFSET(q->doorbell_off);
+	m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off);

 	m->cp_hqd_vmid = q->vmid;

-	if (q->format == KFD_QUEUE_FORMAT_AQL) {
+	if (q->format == KFD_QUEUE_FORMAT_AQL)
 		m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
-	}

-	m->cp_hqd_active = 0;
 	q->is_active = false;
 	if (q->queue_size > 0 &&
 			q->queue_address != 0 &&
 			q->queue_percent > 0) {
-		m->cp_hqd_active = 1;
 		q->is_active = true;
 	}

@@ -214,8 +204,6 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
 {
 	struct cik_sdma_rlc_registers *m;

-	BUG_ON(!mm || !mqd || !q);
-
 	m = get_sdma_mqd(mqd);
 	m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) <<
 			SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
@@ -254,7 +242,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
 			unsigned int timeout, uint32_t pipe_id,
 			uint32_t queue_id)
 {
-	return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout,
+	return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, mqd, type, timeout,
 					pipe_id, queue_id);
 }

@@ -301,10 +289,6 @@ static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
 	struct cik_mqd *m;
 	int retval;

-	BUG_ON(!mm || !q || !mqd || !mqd_mem_obj);
-
-	pr_debug("kfd: In func %s\n", __func__);
-
 	retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
 					mqd_mem_obj);

@@ -359,10 +343,6 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
 {
 	struct cik_mqd *m;

-	BUG_ON(!mm || !q || !mqd);
-
-	pr_debug("kfd: In func %s\n", __func__);
-
 	m = get_mqd(mqd);
 	m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
 				DEFAULT_MIN_AVAIL_SIZE |
@@ -400,8 +380,6 @@ struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
 {
 	struct cik_sdma_rlc_registers *m;

-	BUG_ON(!mqd);
-
 	m = (struct cik_sdma_rlc_registers *)mqd;

 	return m;
@@ -412,12 +390,10 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
 {
 	struct mqd_manager *mqd;

-	BUG_ON(!dev);
-	BUG_ON(type >= KFD_MQD_TYPE_MAX);
-
-	pr_debug("kfd: In func %s\n", __func__);
+	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+		return NULL;

-	mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL);
+	mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
 	if (!mqd)
 		return NULL;


--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -85,7 +85,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
 		m->cp_hqd_iq_rptr = 1;

 	*mqd = m;
-	if (gart_addr != NULL)
+	if (gart_addr)
 		*gart_addr = addr;
 	retval = mm->update_mqd(mm, m, q);

@@ -94,10 +94,15 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,

 static int load_mqd(struct mqd_manager *mm, void *mqd,
 			uint32_t pipe_id, uint32_t queue_id,
-			uint32_t __user *wptr)
+			struct queue_properties *p, struct mm_struct *mms)
 {
-	return mm->dev->kfd2kgd->hqd_load
-		(mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
+	/* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+	uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+	uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1);
+
+	return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+					  (uint32_t __user *)p->write_ptr,
+					  wptr_shift, wptr_mask, mms);
 }

 static int __update_mqd(struct mqd_manager *mm, void *mqd,
@@ -106,10 +111,6 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
 {
 	struct vi_mqd *m;

-	BUG_ON(!mm || !q || !mqd);
-
-	pr_debug("kfd: In func %s\n", __func__);
-
 	m = get_mqd(mqd);

 	m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT |
@@ -117,7 +118,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
 			mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT;
 	m->cp_hqd_pq_control |=
 			ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
-	pr_debug("kfd: cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+	pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);

 	m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
 	m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
@@ -126,10 +127,9 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
 	m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);

 	m->cp_hqd_pq_doorbell_control =
-		1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN__SHIFT |
 		q->doorbell_off <<
 			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
-	pr_debug("kfd: cp_hqd_pq_doorbell_control 0x%x\n",
+	pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
 			m->cp_hqd_pq_doorbell_control);

 	m->cp_hqd_eop_control = atc_bit << CP_HQD_EOP_CONTROL__EOP_ATC__SHIFT |
@@ -139,8 +139,15 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
 			3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
 			mtype << CP_HQD_IB_CONTROL__MTYPE__SHIFT;

-	m->cp_hqd_eop_control |=
-		ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1;
+	/*
+	 * HW does not clamp this field correctly. Maximum EOP queue size
+	 * is constrained by per-SE EOP done signal count, which is 8-bit.
+	 * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
+	 * more than (EOP entry count - 1) so a queue size of 0x800 dwords
+	 * is safe, giving a maximum field value of 0xA.
+	 */
+	m->cp_hqd_eop_control |= min(0xA,
+		ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1);
 	m->cp_hqd_eop_base_addr_lo =
 			lower_32_bits(q->eop_ring_buffer_address >> 8);
 	m->cp_hqd_eop_base_addr_hi =
@@ -156,12 +163,10 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
 				2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
 	}

-	m->cp_hqd_active = 0;
 	q->is_active = false;
 	if (q->queue_size > 0 &&
 			q->queue_address != 0 &&
 			q->queue_percent > 0) {
-		m->cp_hqd_active = 1;
 		q->is_active = true;
 	}

@@ -181,14 +186,13 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
 			uint32_t queue_id)
 {
 	return mm->dev->kfd2kgd->hqd_destroy
-		(mm->dev->kgd, type, timeout,
+		(mm->dev->kgd, mqd, type, timeout,
 		pipe_id, queue_id);
 }

 static void uninit_mqd(struct mqd_manager *mm, void *mqd,
 			struct kfd_mem_obj *mqd_mem_obj)
 {
-	BUG_ON(!mm || !mqd);
 	kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
 }

@@ -238,12 +242,10 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
 {
 	struct mqd_manager *mqd;

-	BUG_ON(!dev);
-	BUG_ON(type >= KFD_MQD_TYPE_MAX);
-
-	pr_debug("kfd: In func %s\n", __func__);
+	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+		return NULL;

-	mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL);
+	mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
 	if (!mqd)
 		return NULL;


--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -26,7 +26,6 @@
 #include "kfd_device_queue_manager.h"
 #include "kfd_kernel_queue.h"
 #include "kfd_priv.h"
-#include "kfd_pm4_headers.h"
 #include "kfd_pm4_headers_vi.h"
 #include "kfd_pm4_opcodes.h"

@@ -35,7 +34,8 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
 {
 	unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t);

-	BUG_ON((temp * sizeof(uint32_t)) > buffer_size_bytes);
+	WARN((temp * sizeof(uint32_t)) > buffer_size_bytes,
+	     "Runlist IB overflow");
 	*wptr = temp;
 }

@@ -43,12 +43,12 @@ static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size)
 {
 	union PM4_MES_TYPE_3_HEADER header;

-	header.u32all = 0;
+	header.u32All = 0;
 	header.opcode = opcode;
 	header.count = packet_size/sizeof(uint32_t) - 2;
 	header.type = PM4_TYPE_3;

-	return header.u32all;
+	return header.u32All;
 }

 static void pm_calc_rlib_size(struct packet_manager *pm,
@@ -58,8 +58,6 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
 	unsigned int process_count, queue_count;
 	unsigned int map_queue_size;

-	BUG_ON(!pm || !rlib_size || !over_subscription);
-
 	process_count = pm->dqm->processes_count;
 	queue_count = pm->dqm->queue_count;

@@ -67,15 +65,12 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
 	*over_subscription = false;
 	if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) {
 		*over_subscription = true;
-		pr_debug("kfd: over subscribed runlist\n");
+		pr_debug("Over subscribed runlist\n");
 	}

-	map_queue_size =
-		(pm->dqm->dev->device_info->asic_family == CHIP_CARRIZO) ?
-		sizeof(struct pm4_mes_map_queues) :
-		sizeof(struct pm4_map_queues);
+	map_queue_size = sizeof(struct pm4_mes_map_queues);
 	/* calculate run list ib allocation size */
-	*rlib_size = process_count * sizeof(struct pm4_map_process) +
+	*rlib_size = process_count * sizeof(struct pm4_mes_map_process) +
 		     queue_count * map_queue_size;

 	/*
@@ -83,9 +78,9 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
 	 * when over subscription
 	 */
 	if (*over_subscription)
-		*rlib_size += sizeof(struct pm4_runlist);
+		*rlib_size += sizeof(struct pm4_mes_runlist);

-	pr_debug("kfd: runlist ib size %d\n", *rlib_size);
+	pr_debug("runlist ib size %d\n", *rlib_size);
 }

 static int pm_allocate_runlist_ib(struct packet_manager *pm,
@@ -96,17 +91,16 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
 {
 	int retval;

-	BUG_ON(!pm);
-	BUG_ON(pm->allocated);
-	BUG_ON(is_over_subscription == NULL);
+	if (WARN_ON(pm->allocated))
+		return -EINVAL;

 	pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);

 	retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
 					&pm->ib_buffer_obj);

-	if (retval != 0) {
-		pr_err("kfd: failed to allocate runlist IB\n");
+	if (retval) {
+		pr_err("Failed to allocate runlist IB\n");
 		return retval;
 	}

@@ -121,15 +115,16 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
 static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
 			uint64_t ib, size_t ib_size_in_dwords, bool chain)
 {
-	struct pm4_runlist *packet;
+	struct pm4_mes_runlist *packet;

-	BUG_ON(!pm || !buffer || !ib);
+	if (WARN_ON(!ib))
+		return -EFAULT;

-	packet = (struct pm4_runlist *)buffer;
+	packet = (struct pm4_mes_runlist *)buffer;

-	memset(buffer, 0, sizeof(struct pm4_runlist));
-	packet->header.u32all = build_pm4_header(IT_RUN_LIST,
-						sizeof(struct pm4_runlist));
+	memset(buffer, 0, sizeof(struct pm4_mes_runlist));
+	packet->header.u32All = build_pm4_header(IT_RUN_LIST,
+						sizeof(struct pm4_mes_runlist));

 	packet->bitfields4.ib_size = ib_size_in_dwords;
 	packet->bitfields4.chain = chain ? 1 : 0;
@@ -144,20 +139,16 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
 static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
 				struct qcm_process_device *qpd)
 {
-	struct pm4_map_process *packet;
+	struct pm4_mes_map_process *packet;
 	struct queue *cur;
 	uint32_t num_queues;

-	BUG_ON(!pm || !buffer || !qpd);
-
-	packet = (struct pm4_map_process *)buffer;
-
-	pr_debug("kfd: In func %s\n", __func__);
+	packet = (struct pm4_mes_map_process *)buffer;

-	memset(buffer, 0, sizeof(struct pm4_map_process));
+	memset(buffer, 0, sizeof(struct pm4_mes_map_process));

-	packet->header.u32all = build_pm4_header(IT_MAP_PROCESS,
-					sizeof(struct pm4_map_process));
+	packet->header.u32All = build_pm4_header(IT_MAP_PROCESS,
+					sizeof(struct pm4_mes_map_process));
 	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
 	packet->bitfields2.process_quantum = 1;
 	packet->bitfields2.pasid = qpd->pqm->process->pasid;
@@ -175,27 +166,26 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
 	packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
 	packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;

+	/* TODO: scratch support */
+	packet->sh_hidden_private_base_vmid = 0;
+
 	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
 	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);

 	return 0;
 }

-static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer,
+static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
 		struct queue *q, bool is_static)
 {
 	struct pm4_mes_map_queues *packet;
 	bool use_static = is_static;

-	BUG_ON(!pm || !buffer || !q);
-
-	pr_debug("kfd: In func %s\n", __func__);
-
 	packet = (struct pm4_mes_map_queues *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_map_queues));
+	memset(buffer, 0, sizeof(struct pm4_mes_map_queues));

-	packet->header.u32all = build_pm4_header(IT_MAP_QUEUES,
-						sizeof(struct pm4_map_queues));
+	packet->header.u32All = build_pm4_header(IT_MAP_QUEUES,
+						sizeof(struct pm4_mes_map_queues));
 	packet->bitfields2.alloc_format =
 		alloc_format__mes_map_queues__one_per_pipe_vi;
 	packet->bitfields2.num_queues = 1;
@@ -223,10 +213,8 @@ static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer,
 		use_static = false; /* no static queues under SDMA */
 		break;
 	default:
-		pr_err("kfd: in %s queue type %d\n", __func__,
-				q->properties.type);
-		BUG();
-		break;
+		WARN(1, "queue type %d", q->properties.type);
+		return -EINVAL;
 	}
 	packet->bitfields3.doorbell_offset =
 			q->properties.doorbell_off;
@@ -246,68 +234,6 @@ static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer,
 	return 0;
 }

-static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
-				struct queue *q, bool is_static)
-{
-	struct pm4_map_queues *packet;
-	bool use_static = is_static;
-
-	BUG_ON(!pm || !buffer || !q);
-
-	pr_debug("kfd: In func %s\n", __func__);
-
-	packet = (struct pm4_map_queues *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_map_queues));
-
-	packet->header.u32all = build_pm4_header(IT_MAP_QUEUES,
-						sizeof(struct pm4_map_queues));
-	packet->bitfields2.alloc_format =
-				alloc_format__mes_map_queues__one_per_pipe;
-	packet->bitfields2.num_queues = 1;
-	packet->bitfields2.queue_sel =
-		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots;
-
-	packet->bitfields2.vidmem = (q->properties.is_interop) ?
-			vidmem__mes_map_queues__uses_video_memory :
-			vidmem__mes_map_queues__uses_no_video_memory;
-
-	switch (q->properties.type) {
-	case KFD_QUEUE_TYPE_COMPUTE:
-	case KFD_QUEUE_TYPE_DIQ:
-		packet->bitfields2.engine_sel =
-				engine_sel__mes_map_queues__compute;
-		break;
-	case KFD_QUEUE_TYPE_SDMA:
-		packet->bitfields2.engine_sel =
-				engine_sel__mes_map_queues__sdma0;
-		use_static = false; /* no static queues under SDMA */
-		break;
-	default:
-		BUG();
-		break;
-	}
-
-	packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset =
-			q->properties.doorbell_off;
-
-	packet->mes_map_queues_ordinals[0].bitfields3.is_static =
-			(use_static) ? 1 : 0;
-
-	packet->mes_map_queues_ordinals[0].mqd_addr_lo =
-			lower_32_bits(q->gart_mqd_addr);
-
-	packet->mes_map_queues_ordinals[0].mqd_addr_hi =
-			upper_32_bits(q->gart_mqd_addr);
-
-	packet->mes_map_queues_ordinals[0].wptr_addr_lo =
-			lower_32_bits((uint64_t)q->properties.write_ptr);
-
-	packet->mes_map_queues_ordinals[0].wptr_addr_hi =
-			upper_32_bits((uint64_t)q->properties.write_ptr);
-
-	return 0;
-}
-
 static int pm_create_runlist_ib(struct packet_manager *pm,
 				struct list_head *queues,
 				uint64_t *rl_gpu_addr,
@@ -322,19 +248,16 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 	struct kernel_queue *kq;
 	bool is_over_subscription;

-	BUG_ON(!pm || !queues || !rl_size_bytes || !rl_gpu_addr);
-
 	rl_wptr = retval = proccesses_mapped = 0;

 	retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
 				&alloc_size_bytes, &is_over_subscription);
-	if (retval != 0)
+	if (retval)
 		return retval;

 	*rl_size_bytes = alloc_size_bytes;

-	pr_debug("kfd: In func %s\n", __func__);
-	pr_debug("kfd: building runlist ib process count: %d queues count %d\n",
+	pr_debug("Building runlist ib process count: %d queues count %d\n",
 		pm->dqm->processes_count, pm->dqm->queue_count);

 	/* build the run list ib packet */
@@ -342,42 +265,35 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 		qpd = cur->qpd;
 		/* build map process packet */
 		if (proccesses_mapped >= pm->dqm->processes_count) {
-			pr_debug("kfd: not enough space left in runlist IB\n");
+			pr_debug("Not enough space left in runlist IB\n");
 			pm_release_ib(pm);
 			return -ENOMEM;
 		}

 		retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd);
-		if (retval != 0)
+		if (retval)
 			return retval;

 		proccesses_mapped++;
-		inc_wptr(&rl_wptr, sizeof(struct pm4_map_process),
+		inc_wptr(&rl_wptr, sizeof(struct pm4_mes_map_process),
 				alloc_size_bytes);

 		list_for_each_entry(kq, &qpd->priv_queue_list, list) {
 			if (!kq->queue->properties.is_active)
 				continue;

-			pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n",
+			pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
 				kq->queue->queue, qpd->is_debug);

-			if (pm->dqm->dev->device_info->asic_family ==
-					CHIP_CARRIZO)
-				retval = pm_create_map_queue_vi(pm,
-						&rl_buffer[rl_wptr],
-						kq->queue,
-						qpd->is_debug);
-			else
-				retval = pm_create_map_queue(pm,
+			retval = pm_create_map_queue(pm,
 						&rl_buffer[rl_wptr],
 						kq->queue,
 						qpd->is_debug);
-			if (retval != 0)
+			if (retval)
 				return retval;

 			inc_wptr(&rl_wptr,
-				sizeof(struct pm4_map_queues),
+				sizeof(struct pm4_mes_map_queues),
 				alloc_size_bytes);
 		}

@@ -385,51 +301,44 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
 			if (!q->properties.is_active)
 				continue;

-			pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n",
+			pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
 				q->queue, qpd->is_debug);

-			if (pm->dqm->dev->device_info->asic_family ==
-					CHIP_CARRIZO)
-				retval = pm_create_map_queue_vi(pm,
-						&rl_buffer[rl_wptr],
-						q,
-						qpd->is_debug);
-			else
-				retval = pm_create_map_queue(pm,
+			retval = pm_create_map_queue(pm,
 						&rl_buffer[rl_wptr],
 						q,
 						qpd->is_debug);

-			if (retval != 0)
+			if (retval)
 				return retval;

 			inc_wptr(&rl_wptr,
-				sizeof(struct pm4_map_queues),
+				sizeof(struct pm4_mes_map_queues),
 				alloc_size_bytes);
 		}
 	}

-	pr_debug("kfd: finished map process and queues to runlist\n");
+	pr_debug("Finished map process and queues to runlist\n");

 	if (is_over_subscription)
-		pm_create_runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr,
-				alloc_size_bytes / sizeof(uint32_t), true);
+		retval = pm_create_runlist(pm, &rl_buffer[rl_wptr],
+					*rl_gpu_addr,
+					alloc_size_bytes / sizeof(uint32_t),
+					true);

 	for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)
 		pr_debug("0x%2X ", rl_buffer[i]);
 	pr_debug("\n");

-	return 0;
+	return retval;
 }

 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
 {
-	BUG_ON(!dqm);
-
 	pm->dqm = dqm;
 	mutex_init(&pm->lock);
 	pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ);
-	if (pm->priv_queue == NULL) {
+	if (!pm->priv_queue) {
 		mutex_destroy(&pm->lock);
 		return -ENOMEM;
 	}
@@ -440,8 +349,6 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)

 void pm_uninit(struct packet_manager *pm)
 {
-	BUG_ON(!pm);
-
 	mutex_destroy(&pm->lock);
 	kernel_queue_uninit(pm->priv_queue);
 }
@@ -449,25 +356,22 @@ void pm_uninit(struct packet_manager *pm)
 int pm_send_set_resources(struct packet_manager *pm,
 				struct scheduling_resources *res)
 {
-	struct pm4_set_resources *packet;
-
-	BUG_ON(!pm || !res);
-
-	pr_debug("kfd: In func %s\n", __func__);
+	struct pm4_mes_set_resources *packet;
+	int retval = 0;

 	mutex_lock(&pm->lock);
 	pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
 					sizeof(*packet) / sizeof(uint32_t),
-			(unsigned int **)&packet);
-	if (packet == NULL) {
-		mutex_unlock(&pm->lock);
-		pr_err("kfd: failed to allocate buffer on kernel queue\n");
-		return -ENOMEM;
+					(unsigned int **)&packet);
+	if (!packet) {
+		pr_err("Failed to allocate buffer on kernel queue\n");
+		retval = -ENOMEM;
+		goto out;
 	}

-	memset(packet, 0, sizeof(struct pm4_set_resources));
-	packet->header.u32all = build_pm4_header(IT_SET_RESOURCES,
-					sizeof(struct pm4_set_resources));
+	memset(packet, 0, sizeof(struct pm4_mes_set_resources));
+	packet->header.u32All = build_pm4_header(IT_SET_RESOURCES,
+					sizeof(struct pm4_mes_set_resources));

 	packet->bitfields2.queue_type =
 			queue_type__mes_set_resources__hsa_interface_queue_hiq;
@@ -485,9 +389,10 @@ int pm_send_set_resources(struct packet_manager *pm,

 	pm->priv_queue->ops.submit_packet(pm->priv_queue);

+out:
 	mutex_unlock(&pm->lock);

-	return 0;
+	return retval;
 }

 int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
@@ -497,26 +402,24 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
 	size_t rl_ib_size, packet_size_dwords;
 	int retval;

-	BUG_ON(!pm || !dqm_queues);
-
 	retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
 					&rl_ib_size);
-	if (retval != 0)
+	if (retval)
 		goto fail_create_runlist_ib;

-	pr_debug("kfd: runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
+	pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);

-	packet_size_dwords = sizeof(struct pm4_runlist) / sizeof(uint32_t);
+	packet_size_dwords = sizeof(struct pm4_mes_runlist) / sizeof(uint32_t);
 	mutex_lock(&pm->lock);

 	retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
 					packet_size_dwords, &rl_buffer);
-	if (retval != 0)
+	if (retval)
 		goto fail_acquire_packet_buffer;

 	retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr,
 					rl_ib_size / sizeof(uint32_t), false);
-	if (retval != 0)
+	if (retval)
 		goto fail_create_runlist;

 	pm->priv_queue->ops.submit_packet(pm->priv_queue);
@@ -530,8 +433,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
 fail_acquire_packet_buffer:
 	mutex_unlock(&pm->lock);
 fail_create_runlist_ib:
-	if (pm->allocated)
-		pm_release_ib(pm);
+	pm_release_ib(pm);
 	return retval;
 }

@@ -539,20 +441,21 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
 			uint32_t fence_value)
 {
 	int retval;
-	struct pm4_query_status *packet;
+	struct pm4_mes_query_status *packet;

-	BUG_ON(!pm || !fence_address);
+	if (WARN_ON(!fence_address))
+		return -EFAULT;

 	mutex_lock(&pm->lock);
 	retval = pm->priv_queue->ops.acquire_packet_buffer(
 			pm->priv_queue,
-			sizeof(struct pm4_query_status) / sizeof(uint32_t),
+			sizeof(struct pm4_mes_query_status) / sizeof(uint32_t),
 			(unsigned int **)&packet);
-	if (retval != 0)
+	if (retval)
 		goto fail_acquire_packet_buffer;

-	packet->header.u32all = build_pm4_header(IT_QUERY_STATUS,
-					sizeof(struct pm4_query_status));
+	packet->header.u32All = build_pm4_header(IT_QUERY_STATUS,
+					sizeof(struct pm4_mes_query_status));

 	packet->bitfields2.context_id = 0;
 	packet->bitfields2.interrupt_sel =
@@ -566,9 +469,6 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
 	packet->data_lo = lower_32_bits((uint64_t)fence_value);

 	pm->priv_queue->ops.submit_packet(pm->priv_queue);
-	mutex_unlock(&pm->lock);
-
-	return 0;

 fail_acquire_packet_buffer:
 	mutex_unlock(&pm->lock);
@@ -582,24 +482,22 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 {
 	int retval;
 	uint32_t *buffer;
-	struct pm4_unmap_queues *packet;
-
-	BUG_ON(!pm);
+	struct pm4_mes_unmap_queues *packet;

 	mutex_lock(&pm->lock);
 	retval = pm->priv_queue->ops.acquire_packet_buffer(
 			pm->priv_queue,
-			sizeof(struct pm4_unmap_queues) / sizeof(uint32_t),
+			sizeof(struct pm4_mes_unmap_queues) / sizeof(uint32_t),
 			&buffer);
-	if (retval != 0)
+	if (retval)
 		goto err_acquire_packet_buffer;

-	packet = (struct pm4_unmap_queues *)buffer;
-	memset(buffer, 0, sizeof(struct pm4_unmap_queues));
-	pr_debug("kfd: static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n",
+	packet = (struct pm4_mes_unmap_queues *)buffer;
+	memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
+	pr_debug("static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n",
 		mode, reset, type);
-	packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES,
-					sizeof(struct pm4_unmap_queues));
+	packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES,
+					sizeof(struct pm4_mes_unmap_queues));
 	switch (type) {
 	case KFD_QUEUE_TYPE_COMPUTE:
 	case KFD_QUEUE_TYPE_DIQ:
@@ -611,8 +509,9 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 			engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
 		break;
 	default:
-		BUG();
-		break;
+		WARN(1, "queue type %d", type);
+		retval = -EINVAL;
+		goto err_invalid;
 	}

 	if (reset)
@@ -636,16 +535,17 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 		break;
 	case KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES:
 		packet->bitfields2.queue_sel =
-				queue_sel__mes_unmap_queues__perform_request_on_all_active_queues;
+				queue_sel__mes_unmap_queues__unmap_all_queues;
 		break;
 	case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES:
 		/* in this case, we do not preempt static queues */
 		packet->bitfields2.queue_sel =
-				queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only;
+				queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
 		break;
 	default:
-		BUG();
-		break;
+		WARN(1, "filter %d", mode);
+		retval = -EINVAL;
+		goto err_invalid;
 	}

 	pm->priv_queue->ops.submit_packet(pm->priv_queue);
@@ -653,6 +553,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 	mutex_unlock(&pm->lock);
 	return 0;

+err_invalid:
+	pm->priv_queue->ops.rollback_packet(pm->priv_queue);
 err_acquire_packet_buffer:
 	mutex_unlock(&pm->lock);
 	return retval;
@@ -660,8 +562,6 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,

 void pm_release_ib(struct packet_manager *pm)
 {
-	BUG_ON(!pm);
-
 	mutex_lock(&pm->lock);
 	if (pm->allocated) {
 		kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj);

--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
@@ -32,7 +32,8 @@ int kfd_pasid_init(void)
 {
 	pasid_limit = KFD_MAX_NUM_OF_PROCESSES;

-	pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL);
+	pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long),
+				GFP_KERNEL);
 	if (!pasid_bitmap)
 		return -ENOMEM;

@@ -91,6 +92,6 @@ unsigned int kfd_pasid_alloc(void)

 void kfd_pasid_free(unsigned int pasid)
 {
-	BUG_ON(pasid == 0 || pasid >= pasid_limit);
-	clear_bit(pasid, pasid_bitmap);
+	if (!WARN_ON(pasid == 0 || pasid >= pasid_limit))
+		clear_bit(pasid, pasid_bitmap);
 }
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
@@ -28,112 +28,19 @@
 #define PM4_MES_HEADER_DEFINED
 union PM4_MES_TYPE_3_HEADER {
 	struct {
-		uint32_t reserved1:8;	/* < reserved */
-		uint32_t opcode:8;	/* < IT opcode */
-		uint32_t count:14;	/* < number of DWORDs - 1
-					 * in the information body.
-					 */
-		uint32_t type:2;	/* < packet identifier.
-					 * It should be 3 for type 3 packets
-					 */
+		/* reserved */
+		uint32_t reserved1:8;
+		/* IT opcode */
+		uint32_t opcode:8;
+		/* number of DWORDs - 1 in the information body */
+		uint32_t count:14;
+		/* packet identifier. It should be 3 for type 3 packets */
+		uint32_t type:2;
 	};
 	uint32_t u32all;
 };
 #endif /* PM4_MES_HEADER_DEFINED */

-/* --------------------MES_SET_RESOURCES-------------------- */
-
-#ifndef PM4_MES_SET_RESOURCES_DEFINED
-#define PM4_MES_SET_RESOURCES_DEFINED
-enum set_resources_queue_type_enum {
-	queue_type__mes_set_resources__kernel_interface_queue_kiq = 0,
-	queue_type__mes_set_resources__hsa_interface_queue_hiq = 1,
-	queue_type__mes_set_resources__hsa_debug_interface_queue = 4
-};
-
-struct pm4_set_resources {
-	union {
-		union PM4_MES_TYPE_3_HEADER header;	/* header */
-		uint32_t ordinal1;
-	};
-
-	union {
-		struct {
-			uint32_t vmid_mask:16;
-			uint32_t unmap_latency:8;
-			uint32_t reserved1:5;
-			enum set_resources_queue_type_enum queue_type:3;
-		} bitfields2;
-		uint32_t ordinal2;
-	};
-
-	uint32_t queue_mask_lo;
-	uint32_t queue_mask_hi;
-	uint32_t gws_mask_lo;
-	uint32_t gws_mask_hi;
-
-	union {
-		struct {
-			uint32_t oac_mask:16;
-			uint32_t reserved2:16;
-		} bitfields7;
-		uint32_t ordinal7;
-	};
-
-	union {
-		struct {
-			uint32_t gds_heap_base:6;
-			uint32_t reserved3:5;
-			uint32_t gds_heap_size:6;
-			uint32_t reserved4:15;
-		} bitfields8;
-		uint32_t ordinal8;
-	};
-
-};
-#endif
-
-/*--------------------MES_RUN_LIST-------------------- */
-
-#ifndef PM4_MES_RUN_LIST_DEFINED
-#define PM4_MES_RUN_LIST_DEFINED
-
-struct pm4_runlist {
-	union {
-		union PM4_MES_TYPE_3_HEADER header;	/* header */
-		uint32_t ordinal1;
-	};
-
-	union {
-		struct {
-			uint32_t reserved1:2;
-			uint32_t ib_base_lo:30;
-		} bitfields2;
-		uint32_t ordinal2;
-	};
-
-	union {
-		struct {
-			uint32_t ib_base_hi:16;
-			uint32_t reserved2:16;
-		} bitfields3;
-		uint32_t ordinal3;
-	};
-
-	union {
-		struct {
-			uint32_t ib_size:20;
-			uint32_t chain:1;
-			uint32_t offload_polling:1;
-			uint32_t reserved3:1;
-			uint32_t valid:1;
-			uint32_t reserved4:8;
-		} bitfields4;
-		uint32_t ordinal4;
-	};
-
-};
-#endif

 /*--------------------MES_MAP_PROCESS-------------------- */

@@ -186,217 +93,58 @@ struct pm4_map_process {
 };
 #endif

-/*--------------------MES_MAP_QUEUES--------------------*/
-
-#ifndef PM4_MES_MAP_QUEUES_DEFINED
-#define PM4_MES_MAP_QUEUES_DEFINED
-enum map_queues_queue_sel_enum {
-	queue_sel__mes_map_queues__map_to_specified_queue_slots = 0,
-	queue_sel__mes_map_queues__map_to_hws_determined_queue_slots = 1,
-	queue_sel__mes_map_queues__enable_process_queues = 2
-};
+#ifndef PM4_MES_MAP_PROCESS_DEFINED_KV_SCRATCH
+#define PM4_MES_MAP_PROCESS_DEFINED_KV_SCRATCH

-enum map_queues_vidmem_enum {
-	vidmem__mes_map_queues__uses_no_video_memory = 0,
-	vidmem__mes_map_queues__uses_video_memory = 1
-};
-
-enum map_queues_alloc_format_enum {
-	alloc_format__mes_map_queues__one_per_pipe = 0,
-	alloc_format__mes_map_queues__all_on_one_pipe = 1
-};
-
-enum map_queues_engine_sel_enum {
-	engine_sel__mes_map_queues__compute = 0,
-	engine_sel__mes_map_queues__sdma0 = 2,
-	engine_sel__mes_map_queues__sdma1 = 3
-};
-
-struct pm4_map_queues {
+struct pm4_map_process_scratch_kv {
 	union {
-		union PM4_MES_TYPE_3_HEADER header;	/* header */
-		uint32_t ordinal1;
-	};
-
-	union {
-		struct {
-			uint32_t reserved1:4;
-			enum map_queues_queue_sel_enum queue_sel:2;
-			uint32_t reserved2:2;
-			uint32_t vmid:4;
-			uint32_t reserved3:4;
-			enum map_queues_vidmem_enum vidmem:2;
-			uint32_t reserved4:6;
-			enum map_queues_alloc_format_enum alloc_format:2;
-			enum map_queues_engine_sel_enum engine_sel:3;
-			uint32_t num_queues:3;
-		} bitfields2;
-		uint32_t ordinal2;
-	};
-
-	struct {
-		union {
-			struct {
-				uint32_t is_static:1;
-				uint32_t reserved5:1;
-				uint32_t doorbell_offset:21;
-				uint32_t reserved6:3;
-				uint32_t queue:6;
-			} bitfields3;
-			uint32_t ordinal3;
-		};
-
-		uint32_t mqd_addr_lo;
-		uint32_t mqd_addr_hi;
-		uint32_t wptr_addr_lo;
-		uint32_t wptr_addr_hi;
-
-	} mes_map_queues_ordinals[1];	/* 1..N of these ordinal groups */
-
-};
-#endif
-
-/*--------------------MES_QUERY_STATUS--------------------*/
-
-#ifndef PM4_MES_QUERY_STATUS_DEFINED
-#define PM4_MES_QUERY_STATUS_DEFINED
-enum query_status_interrupt_sel_enum {
-	interrupt_sel__mes_query_status__completion_status = 0,
-	interrupt_sel__mes_query_status__process_status = 1,
-	interrupt_sel__mes_query_status__queue_status = 2
-};
-
-enum query_status_command_enum {
-	command__mes_query_status__interrupt_only = 0,
-	command__mes_query_status__fence_only_immediate = 1,
-	command__mes_query_status__fence_only_after_write_ack = 2,
-	command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3
-};
-
-enum query_status_engine_sel_enum {
-	engine_sel__mes_query_status__compute = 0,
-	engine_sel__mes_query_status__sdma0_queue = 2,
-	engine_sel__mes_query_status__sdma1_queue = 3
-};
-
-struct pm4_query_status {
-	union {
-		union PM4_MES_TYPE_3_HEADER header;	/* header */
-		uint32_t ordinal1;
-	};
-
-	union {
-		struct {
-			uint32_t context_id:28;
-			enum query_status_interrupt_sel_enum interrupt_sel:2;
-			enum query_status_command_enum command:2;
-		} bitfields2;
-		uint32_t ordinal2;
+		union PM4_MES_TYPE_3_HEADER   header; /* header */
+		uint32_t            ordinal1;
 	};

 	union {
 		struct {
 			uint32_t pasid:16;
-			uint32_t reserved1:16;
-		} bitfields3a;
-		struct {
-			uint32_t reserved2:2;
-			uint32_t doorbell_offset:21;
-			uint32_t reserved3:3;
-			enum query_status_engine_sel_enum engine_sel:3;
-			uint32_t reserved4:3;
-		} bitfields3b;
-		uint32_t ordinal3;
-	};
-
-	uint32_t addr_lo;
-	uint32_t addr_hi;
-	uint32_t data_lo;
-	uint32_t data_hi;
-};
-#endif
-
-/*--------------------MES_UNMAP_QUEUES--------------------*/
-
-#ifndef PM4_MES_UNMAP_QUEUES_DEFINED
-#define PM4_MES_UNMAP_QUEUES_DEFINED
-enum unmap_queues_action_enum {
-	action__mes_unmap_queues__preempt_queues = 0,
-	action__mes_unmap_queues__reset_queues = 1,
-	action__mes_unmap_queues__disable_process_queues = 2
-};
-
-enum unmap_queues_queue_sel_enum {
-	queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
-	queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
-	queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2,
-	queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only = 3
-};
-
-enum unmap_queues_engine_sel_enum {
-	engine_sel__mes_unmap_queues__compute = 0,
-	engine_sel__mes_unmap_queues__sdma0 = 2,
-	engine_sel__mes_unmap_queues__sdma1 = 3
-};
-
-struct pm4_unmap_queues {
-	union {
-		union PM4_MES_TYPE_3_HEADER header;	/* header */
-		uint32_t ordinal1;
-	};
-
-	union {
-		struct {
-			enum unmap_queues_action_enum action:2;
-			uint32_t reserved1:2;
-			enum unmap_queues_queue_sel_enum queue_sel:2;
-			uint32_t reserved2:20;
-			enum unmap_queues_engine_sel_enum engine_sel:3;
-			uint32_t num_queues:3;
+			uint32_t reserved1:8;
+			uint32_t diq_enable:1;
+			uint32_t process_quantum:7;
 		} bitfields2;
 		uint32_t ordinal2;
 	};

 	union {
 		struct {
-			uint32_t pasid:16;
-			uint32_t reserved3:16;
-		} bitfields3a;
-		struct {
-			uint32_t reserved4:2;
-			uint32_t doorbell_offset0:21;
-			uint32_t reserved5:9;
-		} bitfields3b;
+			uint32_t page_table_base:28;
+			uint32_t reserved2:4;
+		} bitfields3;
 		uint32_t ordinal3;
 	};

-	union {
-		struct {
-			uint32_t reserved6:2;
-			uint32_t doorbell_offset1:21;
-			uint32_t reserved7:9;
-		} bitfields4;
-		uint32_t ordinal4;
-	};
-
-	union {
-		struct {
-			uint32_t reserved8:2;
-			uint32_t doorbell_offset2:21;
-			uint32_t reserved9:9;
-		} bitfields5;
-		uint32_t ordinal5;
-	};
+	uint32_t reserved3;
+	uint32_t sh_mem_bases;
+	uint32_t sh_mem_config;
+	uint32_t sh_mem_ape1_base;
+	uint32_t sh_mem_ape1_limit;
+	uint32_t sh_hidden_private_base_vmid;
+	uint32_t reserved4;
+	uint32_t reserved5;
+	uint32_t gds_addr_lo;
+	uint32_t gds_addr_hi;

 	union {
 		struct {
-			uint32_t reserved10:2;
-			uint32_t doorbell_offset3:21;
-			uint32_t reserved11:9;
-		} bitfields6;
-		uint32_t ordinal6;
+			uint32_t num_gws:6;
+			uint32_t reserved6:2;
+			uint32_t num_oac:4;
+			uint32_t reserved7:4;
+			uint32_t gds_size:6;
+			uint32_t num_queues:10;
+		} bitfields14;
+		uint32_t ordinal14;
 	};

+	uint32_t completion_signal_lo32;
+uint32_t completion_signal_hi32;
 };
 #endif


--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
@@ -30,10 +30,12 @@ union PM4_MES_TYPE_3_HEADER {
 	struct {
 		uint32_t reserved1 : 8; /* < reserved */
 		uint32_t opcode    : 8; /* < IT opcode */
-		uint32_t count     : 14;/* < number of DWORDs - 1 in the
-		information body. */
-		uint32_t type      : 2; /* < packet identifier.
-					It should be 3 for type 3 packets */
+		uint32_t count     : 14;/* < Number of DWORDS - 1 in the
+					 *   information body
+					 */
+		uint32_t type      : 2; /* < packet identifier
+					 *   It should be 3 for type 3 packets
+					 */
 	};
 	uint32_t u32All;
 };
@@ -124,9 +126,10 @@ struct pm4_mes_runlist {
 			uint32_t ib_size:20;
 			uint32_t chain:1;
 			uint32_t offload_polling:1;
-			uint32_t reserved3:1;
+			uint32_t reserved2:1;
 			uint32_t valid:1;
-			uint32_t reserved4:8;
+			uint32_t process_cnt:4;
+			uint32_t reserved3:4;
 		} bitfields4;
 		uint32_t ordinal4;
 	};
@@ -141,8 +144,8 @@ struct pm4_mes_runlist {

 struct pm4_mes_map_process {
 	union {
-		union PM4_MES_TYPE_3_HEADER   header;            /* header */
-		uint32_t            ordinal1;
+		union PM4_MES_TYPE_3_HEADER header;	/* header */
+		uint32_t ordinal1;
 	};

 	union {
@@ -153,36 +156,48 @@ struct pm4_mes_map_process {
 			uint32_t process_quantum:7;
 		} bitfields2;
 		uint32_t ordinal2;
-};
+	};

 	union {
 		struct {
 			uint32_t page_table_base:28;
-			uint32_t reserved2:4;
+			uint32_t reserved3:4;
 		} bitfields3;
 		uint32_t ordinal3;
 	};

+	uint32_t reserved;
+
 	uint32_t sh_mem_bases;
+	uint32_t sh_mem_config;
 	uint32_t sh_mem_ape1_base;
 	uint32_t sh_mem_ape1_limit;
-	uint32_t sh_mem_config;
+
+	uint32_t sh_hidden_private_base_vmid;
+
+	uint32_t reserved2;
+	uint32_t reserved3;
+
 	uint32_t gds_addr_lo;
 	uint32_t gds_addr_hi;

 	union {
 		struct {
 			uint32_t num_gws:6;
-			uint32_t reserved3:2;
+			uint32_t reserved4:2;
 			uint32_t num_oac:4;
-			uint32_t reserved4:4;
+			uint32_t reserved5:4;
 			uint32_t gds_size:6;
 			uint32_t num_queues:10;
 		} bitfields10;
 		uint32_t ordinal10;
 	};

+	uint32_t completion_signal_lo;
+	uint32_t completion_signal_hi;
+
 };
+
 #endif

 /*--------------------MES_MAP_QUEUES--------------------*/
@@ -335,7 +350,7 @@ enum mes_unmap_queues_engine_sel_enum {
 	engine_sel__mes_unmap_queues__sdmal = 3
 };

-struct PM4_MES_UNMAP_QUEUES {
+struct pm4_mes_unmap_queues {
 	union {
 		union PM4_MES_TYPE_3_HEADER   header;            /* header */
 		uint32_t            ordinal1;
@@ -395,4 +410,101 @@ struct PM4_MES_UNMAP_QUEUES {
 };
 #endif

+#ifndef PM4_MEC_RELEASE_MEM_DEFINED
+#define PM4_MEC_RELEASE_MEM_DEFINED
+enum RELEASE_MEM_event_index_enum {
+	event_index___release_mem__end_of_pipe = 5,
+	event_index___release_mem__shader_done = 6
+};
+
+enum RELEASE_MEM_cache_policy_enum {
+	cache_policy___release_mem__lru = 0,
+	cache_policy___release_mem__stream = 1,
+	cache_policy___release_mem__bypass = 2
+};
+
+enum RELEASE_MEM_dst_sel_enum {
+	dst_sel___release_mem__memory_controller = 0,
+	dst_sel___release_mem__tc_l2 = 1,
+	dst_sel___release_mem__queue_write_pointer_register = 2,
+	dst_sel___release_mem__queue_write_pointer_poll_mask_bit = 3
+};
+
+enum RELEASE_MEM_int_sel_enum {
+	int_sel___release_mem__none = 0,
+	int_sel___release_mem__send_interrupt_only = 1,
+	int_sel___release_mem__send_interrupt_after_write_confirm = 2,
+	int_sel___release_mem__send_data_after_write_confirm = 3
+};
+
+enum RELEASE_MEM_data_sel_enum {
+	data_sel___release_mem__none = 0,
+	data_sel___release_mem__send_32_bit_low = 1,
+	data_sel___release_mem__send_64_bit_data = 2,
+	data_sel___release_mem__send_gpu_clock_counter = 3,
+	data_sel___release_mem__send_cp_perfcounter_hi_lo = 4,
+	data_sel___release_mem__store_gds_data_to_memory = 5
+};
+
+struct pm4_mec_release_mem {
+	union {
+		union PM4_MES_TYPE_3_HEADER header;     /*header */
+		unsigned int ordinal1;
+	};
+
+	union {
+		struct {
+			unsigned int event_type:6;
+			unsigned int reserved1:2;
+			enum RELEASE_MEM_event_index_enum event_index:4;
+			unsigned int tcl1_vol_action_ena:1;
+			unsigned int tc_vol_action_ena:1;
+			unsigned int reserved2:1;
+			unsigned int tc_wb_action_ena:1;
+			unsigned int tcl1_action_ena:1;
+			unsigned int tc_action_ena:1;
+			unsigned int reserved3:6;
+			unsigned int atc:1;
+			enum RELEASE_MEM_cache_policy_enum cache_policy:2;
+			unsigned int reserved4:5;
+		} bitfields2;
+		unsigned int ordinal2;
+	};
+
+	union {
+		struct {
+			unsigned int reserved5:16;
+			enum RELEASE_MEM_dst_sel_enum dst_sel:2;
+			unsigned int reserved6:6;
+			enum RELEASE_MEM_int_sel_enum int_sel:3;
+			unsigned int reserved7:2;
+			enum RELEASE_MEM_data_sel_enum data_sel:3;
+		} bitfields3;
+		unsigned int ordinal3;
+	};
+
+	union {
+		struct {
+			unsigned int reserved8:2;
+			unsigned int address_lo_32b:30;
+		} bitfields4;
+		struct {
+			unsigned int reserved9:3;
+			unsigned int address_lo_64b:29;
+		} bitfields5;
+		unsigned int ordinal4;
+	};
+
+	unsigned int address_hi;
+
+	unsigned int data_lo;
+
+	unsigned int data_hi;
+};
+#endif
+
+enum {
+	CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014
+};
+
 #endif
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -239,11 +239,6 @@ enum kfd_preempt_type_filter {
 	KFD_PREEMPT_TYPE_FILTER_BY_PASID
 };

-enum kfd_preempt_type {
-	KFD_PREEMPT_TYPE_WAVEFRONT,
-	KFD_PREEMPT_TYPE_WAVEFRONT_RESET
-};
-
 /**
 * enum kfd_queue_type
 *
@@ -294,13 +289,13 @@ enum kfd_queue_format {
 * @write_ptr: Defines the number of dwords written to the ring buffer.
 *
 * @doorbell_ptr: This field aim is to notify the H/W of new packet written to
- * the queue ring buffer. This field should be similar to write_ptr and the user
- * should update this field after he updated the write_ptr.
+ * the queue ring buffer. This field should be similar to write_ptr and the
+ * user should update this field after he updated the write_ptr.
 *
 * @doorbell_off: The doorbell offset in the doorbell pci-bar.
 *
- * @is_interop: Defines if this is a interop queue. Interop queue means that the
- * queue can access both graphics and compute resources.
+ * @is_interop: Defines if this is a interop queue. Interop queue means that
+ * the queue can access both graphics and compute resources.
 *
 * @is_active: Defines if the queue is active or not.
 *
@@ -352,9 +347,10 @@ struct queue_properties {
 * @properties: The queue properties.
 *
 * @mec: Used only in no cp scheduling mode and identifies to micro engine id
- * that the queue should be execute on.
+ *	 that the queue should be execute on.
 *
- * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe id.
+ * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe
+ *	  id.
 *
 * @queue: Used only in no cp scheduliong mode and identifies the queue's slot.
 *
@@ -436,6 +432,7 @@ struct qcm_process_device {
 	uint32_t gds_size;
 	uint32_t num_gws;
 	uint32_t num_oac;
+	uint32_t sh_hidden_private_base;
 };

 /* Data that is per-process-per device. */
@@ -520,8 +517,8 @@ struct kfd_process {
 	struct mutex event_mutex;
 	/* All events in process hashed by ID, linked on kfd_event.events. */
 	DECLARE_HASHTABLE(events, 4);
-	struct list_head signal_event_pages;	/* struct slot_page_header.
-								event_pages */
+	/* struct slot_page_header.event_pages */
+	struct list_head signal_event_pages;
 	u32 next_nonsignal_event_id;
 	size_t signal_event_count;
 };
@@ -559,8 +556,10 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 							struct kfd_process *p);

 /* Process device data iterator */
-struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p);
-struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p,
+struct kfd_process_device *kfd_get_first_process_device_data(
+							struct kfd_process *p);
+struct kfd_process_device *kfd_get_next_process_device_data(
+						struct kfd_process *p,
 						struct kfd_process_device *pdd);
 bool kfd_has_process_device_data(struct kfd_process *p);

@@ -573,7 +572,8 @@ unsigned int kfd_pasid_alloc(void);
 void kfd_pasid_free(unsigned int pasid);

 /* Doorbells */
-void kfd_doorbell_init(struct kfd_dev *kfd);
+int kfd_doorbell_init(struct kfd_dev *kfd);
+void kfd_doorbell_fini(struct kfd_dev *kfd);
 int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
 u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
 					unsigned int *doorbell_off);

--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -79,9 +79,7 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread)
 {
 	struct kfd_process *process;

-	BUG_ON(!kfd_process_wq);
-
-	if (thread->mm == NULL)
+	if (!thread->mm)
 		return ERR_PTR(-EINVAL);

 	/* Only the pthreads threading model is supported. */
@@ -101,7 +99,7 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread)
 	/* A prior open of /dev/kfd could have already created the process. */
 	process = find_process(thread);
 	if (process)
-		pr_debug("kfd: process already found\n");
+		pr_debug("Process already found\n");

 	if (!process)
 		process = create_process(thread);
@@ -117,7 +115,7 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread)
 {
 	struct kfd_process *process;

-	if (thread->mm == NULL)
+	if (!thread->mm)
 		return ERR_PTR(-EINVAL);

 	/* Only the pthreads threading model is supported. */
@@ -202,10 +200,8 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu)
 	struct kfd_process_release_work *work;
 	struct kfd_process *p;

-	BUG_ON(!kfd_process_wq);
-
 	p = container_of(rcu, struct kfd_process, rcu);
-	BUG_ON(atomic_read(&p->mm->mm_count) <= 0);
+	WARN_ON(atomic_read(&p->mm->mm_count) <= 0);

 	mmdrop(p->mm);

@@ -229,7 +225,8 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 	 * mmu_notifier srcu is read locked
 	 */
 	p = container_of(mn, struct kfd_process, mmu_notifier);
-	BUG_ON(p->mm != mm);
+	if (WARN_ON(p->mm != mm))
+		return;

 	mutex_lock(&kfd_processes_mutex);
 	hash_del_rcu(&p->kfd_processes);
@@ -250,7 +247,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 			kfd_dbgmgr_destroy(pdd->dev->dbgmgr);

 		if (pdd->reset_wavefronts) {
-			pr_warn("amdkfd: Resetting all wave fronts\n");
+			pr_warn("Resetting all wave fronts\n");
 			dbgdev_wave_reset_wavefronts(pdd->dev, p);
 			pdd->reset_wavefronts = false;
 		}
@@ -407,8 +404,6 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
 	struct kfd_process *p;
 	struct kfd_process_device *pdd;

-	BUG_ON(dev == NULL);
-
 	/*
 	 * Look for the process that matches the pasid. If there is no such
 	 * process, we either released it in amdkfd's own notifier, or there
@@ -449,14 +444,16 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
 	mutex_unlock(&p->mutex);
 }

-struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p)
+struct kfd_process_device *kfd_get_first_process_device_data(
+						struct kfd_process *p)
 {
 	return list_first_entry(&p->per_device_data,
 				struct kfd_process_device,
 				per_device_list);
 }

-struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p,
+struct kfd_process_device *kfd_get_next_process_device_data(
+						struct kfd_process *p,
 						struct kfd_process_device *pdd)
 {
 	if (list_is_last(&pdd->per_device_list, &p->per_device_data))

--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -32,12 +32,9 @@ static inline struct process_queue_node *get_queue_by_qid(
 {
 	struct process_queue_node *pqn;

-	BUG_ON(!pqm);
-
 	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
-		if (pqn->q && pqn->q->properties.queue_id == qid)
-			return pqn;
-		if (pqn->kq && pqn->kq->queue->properties.queue_id == qid)
+		if ((pqn->q && pqn->q->properties.queue_id == qid) ||
+		    (pqn->kq && pqn->kq->queue->properties.queue_id == qid))
 			return pqn;
 	}

@@ -49,17 +46,13 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
 {
 	unsigned long found;

-	BUG_ON(!pqm || !qid);
-
-	pr_debug("kfd: in %s\n", __func__);
-
 	found = find_first_zero_bit(pqm->queue_slot_bitmap,
 			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);

-	pr_debug("kfd: the new slot id %lu\n", found);
+	pr_debug("The new slot id %lu\n", found);

 	if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
-		pr_info("amdkfd: Can not open more queues for process with pasid %d\n",
+		pr_info("Cannot open more queues for process with pasid %d\n",
 				pqm->process->pasid);
 		return -ENOMEM;
 	}
@@ -72,13 +65,11 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,

 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
 {
-	BUG_ON(!pqm);
-
 	INIT_LIST_HEAD(&pqm->queues);
 	pqm->queue_slot_bitmap =
 			kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
 					BITS_PER_BYTE), GFP_KERNEL);
-	if (pqm->queue_slot_bitmap == NULL)
+	if (!pqm->queue_slot_bitmap)
 		return -ENOMEM;
 	pqm->process = p;

@@ -90,10 +81,6 @@ void pqm_uninit(struct process_queue_manager *pqm)
 	int retval;
 	struct process_queue_node *pqn, *next;

-	BUG_ON(!pqm);
-
-	pr_debug("In func %s\n", __func__);
-
 	list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
 		retval = pqm_destroy_queue(
 				pqm,
@@ -102,7 +89,7 @@ void pqm_uninit(struct process_queue_manager *pqm)
 					pqn->kq->queue->properties.queue_id);

 		if (retval != 0) {
-			pr_err("kfd: failed to destroy queue\n");
+			pr_err("failed to destroy queue\n");
 			return;
 		}
 	}
@@ -117,8 +104,6 @@ static int create_cp_queue(struct process_queue_manager *pqm,
 {
 	int retval;

-	retval = 0;
-
 	/* Doorbell initialized in user space*/
 	q_properties->doorbell_ptr = NULL;

@@ -131,16 +116,13 @@ static int create_cp_queue(struct process_queue_manager *pqm,

 	retval = init_queue(q, q_properties);
 	if (retval != 0)
-		goto err_init_queue;
+		return retval;

 	(*q)->device = dev;
 	(*q)->process = pqm->process;

-	pr_debug("kfd: PQM After init queue");
-
-	return retval;
+	pr_debug("PQM After init queue");

-err_init_queue:
 	return retval;
 }

@@ -161,8 +143,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 	int num_queues = 0;
 	struct queue *cur;

-	BUG_ON(!pqm || !dev || !properties || !qid);
-
 	memset(&q_properties, 0, sizeof(struct queue_properties));
 	memcpy(&q_properties, properties, sizeof(struct queue_properties));
 	q = NULL;
@@ -185,7 +165,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 		list_for_each_entry(cur, &pdd->qpd.queues_list, list)
 			num_queues++;
 		if (num_queues >= dev->device_info->max_no_of_hqd/2)
-			return (-ENOSPC);
+			return -ENOSPC;
 	}

 	retval = find_available_queue_slot(pqm, qid);
@@ -197,7 +177,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 		dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
 	}

-	pqn = kzalloc(sizeof(struct process_queue_node), GFP_KERNEL);
+	pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
 	if (!pqn) {
 		retval = -ENOMEM;
 		goto err_allocate_pqn;
@@ -210,7 +190,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 		if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
 		((dev->dqm->processes_count >= VMID_PER_DEVICE) ||
 		(dev->dqm->queue_count >= get_queues_num(dev->dqm)))) {
-			pr_err("kfd: over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
+			pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
 			retval = -EPERM;
 			goto err_create_queue;
 		}
@@ -227,7 +207,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 		break;
 	case KFD_QUEUE_TYPE_DIQ:
 		kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ);
-		if (kq == NULL) {
+		if (!kq) {
 			retval = -ENOMEM;
 			goto err_create_queue;
 		}
@@ -238,22 +218,22 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 							kq, &pdd->qpd);
 		break;
 	default:
-		BUG();
-		break;
+		WARN(1, "Invalid queue type %d", type);
+		retval = -EINVAL;
 	}

 	if (retval != 0) {
-		pr_debug("Error dqm create queue\n");
+		pr_err("DQM create queue failed\n");
 		goto err_create_queue;
 	}

-	pr_debug("kfd: PQM After DQM create queue\n");
+	pr_debug("PQM After DQM create queue\n");

 	list_add(&pqn->process_queue_list, &pqm->queues);

 	if (q) {
 		*properties = q->properties;
-		pr_debug("kfd: PQM done creating queue\n");
+		pr_debug("PQM done creating queue\n");
 		print_queue_properties(properties);
 	}

@@ -279,14 +259,11 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)

 	dqm = NULL;

-	BUG_ON(!pqm);
 	retval = 0;

-	pr_debug("kfd: In Func %s\n", __func__);
-
 	pqn = get_queue_by_qid(pqm, qid);
-	if (pqn == NULL) {
-		pr_err("kfd: queue id does not match any known queue\n");
+	if (!pqn) {
+		pr_err("Queue id does not match any known queue\n");
 		return -EINVAL;
 	}

@@ -295,7 +272,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
 		dev = pqn->kq->dev;
 	if (pqn->q)
 		dev = pqn->q->device;
-	BUG_ON(!dev);
+	if (WARN_ON(!dev))
+		return -ENODEV;

 	pdd = kfd_get_process_device_data(dev, pqm->process);
 	if (!pdd) {
@@ -335,12 +313,9 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
 	int retval;
 	struct process_queue_node *pqn;

-	BUG_ON(!pqm);
-
 	pqn = get_queue_by_qid(pqm, qid);
 	if (!pqn) {
-		pr_debug("amdkfd: No queue %d exists for update operation\n",
-				qid);
+		pr_debug("No queue %d exists for update operation\n", qid);
 		return -EFAULT;
 	}

@@ -363,8 +338,6 @@ struct kernel_queue *pqm_get_kernel_queue(
 {
 	struct process_queue_node *pqn;

-	BUG_ON(!pqm);
-
 	pqn = get_queue_by_qid(pqm, qid);
 	if (pqn && pqn->kq)
 		return pqn->kq;

--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -65,17 +65,15 @@ void print_queue(struct queue *q)

 int init_queue(struct queue **q, const struct queue_properties *properties)
 {
-	struct queue *tmp;
+	struct queue *tmp_q;

-	BUG_ON(!q);
-
-	tmp = kzalloc(sizeof(struct queue), GFP_KERNEL);
-	if (!tmp)
+	tmp_q = kzalloc(sizeof(*tmp_q), GFP_KERNEL);
+	if (!tmp_q)
 		return -ENOMEM;

-	memcpy(&tmp->properties, properties, sizeof(struct queue_properties));
+	memcpy(&tmp_q->properties, properties, sizeof(*properties));

-	*q = tmp;
+	*q = tmp_q;
 	return 0;
 }


--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -108,9 +108,6 @@ static int kfd_topology_get_crat_acpi(void *crat_image, size_t *size)
 static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
 		struct crat_subtype_computeunit *cu)
 {
-	BUG_ON(!dev);
-	BUG_ON(!cu);
-
 	dev->node_props.cpu_cores_count = cu->num_cpu_cores;
 	dev->node_props.cpu_core_id_base = cu->processor_id_low;
 	if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)
@@ -123,9 +120,6 @@ static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
 static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev,
 		struct crat_subtype_computeunit *cu)
 {
-	BUG_ON(!dev);
-	BUG_ON(!cu);
-
 	dev->node_props.simd_id_base = cu->processor_id_low;
 	dev->node_props.simd_count = cu->num_simd_cores;
 	dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;
@@ -148,8 +142,6 @@ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu)
 	struct kfd_topology_device *dev;
 	int i = 0;

-	BUG_ON(!cu);
-
 	pr_info("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n",
 			cu->proximity_domain, cu->hsa_capability);
 	list_for_each_entry(dev, &topology_device_list, list) {
@@ -177,8 +169,6 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem)
 	struct kfd_topology_device *dev;
 	int i = 0;

-	BUG_ON(!mem);
-
 	pr_info("Found memory entry in CRAT table with proximity_domain=%d\n",
 			mem->promixity_domain);
 	list_for_each_entry(dev, &topology_device_list, list) {
@@ -223,8 +213,6 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache)
 	struct kfd_topology_device *dev;
 	uint32_t id;

-	BUG_ON(!cache);
-
 	id = cache->processor_id_low;

 	pr_info("Found cache entry in CRAT table with processor_id=%d\n", id);
@@ -274,8 +262,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink)
 	uint32_t id_from;
 	uint32_t id_to;

-	BUG_ON(!iolink);
-
 	id_from = iolink->proximity_domain_from;
 	id_to = iolink->proximity_domain_to;

@@ -323,8 +309,6 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr)
 	struct crat_subtype_iolink *iolink;
 	int ret = 0;

-	BUG_ON(!sub_type_hdr);
-
 	switch (sub_type_hdr->type) {
 	case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY:
 		cu = (struct crat_subtype_computeunit *)sub_type_hdr;
@@ -368,8 +352,6 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
 	struct kfd_cache_properties *cache;
 	struct kfd_iolink_properties *iolink;

-	BUG_ON(!dev);
-
 	list_del(&dev->list);

 	while (dev->mem_props.next != &dev->mem_props) {
@@ -416,7 +398,7 @@ static struct kfd_topology_device *kfd_create_topology_device(void)
 	struct kfd_topology_device *dev;

 	dev = kfd_alloc_struct(dev);
-	if (dev == NULL) {
+	if (!dev) {
 		pr_err("No memory to allocate a topology device");
 		return NULL;
 	}
@@ -666,7 +648,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 			dev->node_props.simd_count);

 	if (dev->mem_bank_count < dev->node_props.mem_banks_count) {
-		pr_info_once("kfd: mem_banks_count truncated from %d to %d\n",
+		pr_info_once("mem_banks_count truncated from %d to %d\n",
 				dev->node_props.mem_banks_count,
 				dev->mem_bank_count);
 		sysfs_show_32bit_prop(buffer, "mem_banks_count",
@@ -763,8 +745,6 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
 	struct kfd_cache_properties *cache;
 	struct kfd_mem_properties *mem;

-	BUG_ON(!dev);
-
 	if (dev->kobj_iolink) {
 		list_for_each_entry(iolink, &dev->io_link_props, list)
 			if (iolink->kobj) {
@@ -819,12 +799,12 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
 	int ret;
 	uint32_t i;

-	BUG_ON(!dev);
+	if (WARN_ON(dev->kobj_node))
+		return -EEXIST;

 	/*
 	 * Creating the sysfs folders
 	 */
-	BUG_ON(dev->kobj_node);
 	dev->kobj_node = kfd_alloc_struct(dev->kobj_node);
 	if (!dev->kobj_node)
 		return -ENOMEM;
@@ -957,7 +937,7 @@ static int kfd_topology_update_sysfs(void)
 	int ret;

 	pr_info("Creating topology SYSFS entries\n");
-	if (sys_props.kobj_topology == NULL) {
+	if (!sys_props.kobj_topology) {
 		sys_props.kobj_topology =
 				kfd_alloc_struct(sys_props.kobj_topology);
 		if (!sys_props.kobj_topology)
@@ -1117,10 +1097,8 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
 	struct kfd_topology_device *dev;
 	struct kfd_topology_device *out_dev = NULL;

-	BUG_ON(!gpu);
-
 	list_for_each_entry(dev, &topology_device_list, list)
-		if (dev->gpu == NULL && dev->node_props.simd_count > 0) {
+		if (!dev->gpu && (dev->node_props.simd_count > 0)) {
 			dev->gpu = gpu;
 			out_dev = dev;
 			break;
@@ -1143,11 +1121,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 	struct kfd_topology_device *dev;
 	int res;

-	BUG_ON(!gpu);
-
 	gpu_id = kfd_generate_gpu_id(gpu);

-	pr_debug("kfd: Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
+	pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);

 	down_write(&topology_lock);
 	/*
@@ -1170,8 +1146,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 		 * GPU vBIOS
 		 */

-		/*
-		 * Update the SYSFS tree, since we added another topology device
+		/* Update the SYSFS tree, since we added another topology
+		 * device
 		 */
 		if (kfd_topology_update_sysfs() < 0)
 			kfd_topology_release_sysfs();
@@ -1190,7 +1166,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)

 	if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
 		dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE;
-		pr_info("amdkfd: adding doorbell packet type capability\n");
+		pr_info("Adding doorbell packet type capability\n");
 	}

 	res = 0;
@@ -1210,8 +1186,6 @@ int kfd_topology_remove_device(struct kfd_dev *gpu)
 	uint32_t gpu_id;
 	int res = -ENODEV;

-	BUG_ON(!gpu);
-
 	down_write(&topology_lock);

 	list_for_each_entry(dev, &topology_device_list, list)

--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -41,6 +41,11 @@ struct kgd_dev;

 struct kgd_mem;

+enum kfd_preempt_type {
+	KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN = 0,
+	KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
+};
+
 enum kgd_memory_pool {
 	KGD_POOL_SYSTEM_CACHEABLE = 1,
 	KGD_POOL_SYSTEM_WRITECOMBINE = 2,
@@ -82,6 +87,17 @@ struct kgd2kfd_shared_resources {
 	size_t doorbell_start_offset;
 };

+struct tile_config {
+	uint32_t *tile_config_ptr;
+	uint32_t *macro_tile_config_ptr;
+	uint32_t num_tile_configs;
+	uint32_t num_macro_tile_configs;
+
+	uint32_t gb_addr_config;
+	uint32_t num_banks;
+	uint32_t num_ranks;
+};
+
 /**
 * struct kfd2kgd_calls
 *
@@ -123,6 +139,11 @@ struct kgd2kfd_shared_resources {
 *
 * @get_fw_version: Returns FW versions from the header
 *
+ * @set_scratch_backing_va: Sets VA for scratch backing memory of a VMID.
+ * Only used for no cp scheduling mode
+ *
+ * @get_tile_config: Returns GPU-specific tiling mode information
+ *
 * This structure contains function pointers to services that the kgd driver
 * provides to amdkfd driver.
 *
@@ -153,14 +174,16 @@ struct kfd2kgd_calls {
 	int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id);

 	int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-			uint32_t queue_id, uint32_t __user *wptr);
+			uint32_t queue_id, uint32_t __user *wptr,
+			uint32_t wptr_shift, uint32_t wptr_mask,
+			struct mm_struct *mm);

 	int (*hqd_sdma_load)(struct kgd_dev *kgd, void *mqd);

 	bool (*hqd_is_occupied)(struct kgd_dev *kgd, uint64_t queue_address,
 				uint32_t pipe_id, uint32_t queue_id);

-	int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type,
+	int (*hqd_destroy)(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
 				unsigned int timeout, uint32_t pipe_id,
 				uint32_t queue_id);

@@ -192,6 +215,9 @@ struct kfd2kgd_calls {

 	uint16_t (*get_fw_version)(struct kgd_dev *kgd,
 				enum kgd_engine_type type);
+	void (*set_scratch_backing_va)(struct kgd_dev *kgd,
+				uint64_t va, uint32_t vmid);
+	int (*get_tile_config)(struct kgd_dev *kgd, struct tile_config *config);
 };

 /**

--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -75,12 +75,14 @@ static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t hpd_size, uint64_t hpd_gpu_addr);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-			uint32_t queue_id, uint32_t __user *wptr);
+			uint32_t queue_id, uint32_t __user *wptr,
+			uint32_t wptr_shift, uint32_t wptr_mask,
+			struct mm_struct *mm);
 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 				uint32_t pipe_id, uint32_t queue_id);

-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
 				unsigned int timeout, uint32_t pipe_id,
 				uint32_t queue_id);
 static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
@@ -482,7 +484,9 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
 }

 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
-			uint32_t queue_id, uint32_t __user *wptr)
+			uint32_t queue_id, uint32_t __user *wptr,
+			uint32_t wptr_shift, uint32_t wptr_mask,
+			struct mm_struct *mm)
 {
 	uint32_t wptr_shadow, is_wptr_shadow_valid;
 	struct cik_mqd *m;
@@ -636,7 +640,7 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
 	return false;
 }

-static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type,
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t reset_type,
 				unsigned int timeout, uint32_t pipe_id,
 				uint32_t queue_id)
 {
@@ -785,7 +789,8 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
 					unsigned int watch_point_id,
 					unsigned int reg_offset)
 {
-	return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset];
+	return watchRegs[watch_point_id * ADDRESS_WATCH_REG_MAX + reg_offset]
+		/ 4;
 }

 static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid)

--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -232,6 +232,35 @@ struct kfd_ioctl_wait_events_args {
 	uint32_t wait_result;		/* from KFD */
 };

+struct kfd_ioctl_set_scratch_backing_va_args {
+	uint64_t va_addr;	/* to KFD */
+	uint32_t gpu_id;	/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_get_tile_config_args {
+	/* to KFD: pointer to tile array */
+	uint64_t tile_config_ptr;
+	/* to KFD: pointer to macro tile array */
+	uint64_t macro_tile_config_ptr;
+	/* to KFD: array size allocated by user mode
+	 * from KFD: array size filled by kernel
+	 */
+	uint32_t num_tile_configs;
+	/* to KFD: array size allocated by user mode
+	 * from KFD: array size filled by kernel
+	 */
+	uint32_t num_macro_tile_configs;
+
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t gb_addr_config;	/* from KFD */
+	uint32_t num_banks;		/* from KFD */
+	uint32_t num_ranks;		/* from KFD */
+	/* struct size can be extended later if needed
+	 * without breaking ABI compatibility
+	 */
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -286,7 +315,13 @@ struct kfd_ioctl_wait_events_args {
 #define AMDKFD_IOC_DBG_WAVE_CONTROL		\
 		AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)

+#define AMDKFD_IOC_SET_SCRATCH_BACKING_VA	\
+		AMDKFD_IOWR(0x11, struct kfd_ioctl_set_scratch_backing_va_args)
+
+#define AMDKFD_IOC_GET_TILE_CONFIG                                      \
+		AMDKFD_IOWR(0x12, struct kfd_ioctl_get_tile_config_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x11
+#define AMDKFD_COMMAND_END		0x13

 #endif