Commit e2069a7b authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher

drm/amdkfd: Add XCC instance to kgd2kfd interface (v3)

Gfx 9 starts to have multiple XCC instances in one device. Add instance
parameter to kgd2kfd functions where XCC instance was hard coded as 0.
Also, update code to pass the correct instance number when running
on a multi-XCC setup.

v2: introduce the XCC instance to gfx v11 (Morris)
v3: rebase (Alex)
Signed-off-by: default avatarAmber Lin <Amber.Lin@amd.com>
Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Tested-by: default avatarAmber Lin <Amber.Lin@amd.com>
Signed-off-by: default avatarMorris Zhang <Shiwu.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 3c8bdb51
......@@ -33,7 +33,7 @@
#include "soc15.h"
static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct amdgpu_device *adev,
u32 pasid, unsigned int vmid)
u32 pasid, unsigned int vmid, uint32_t inst)
{
unsigned long timeout;
......@@ -47,11 +47,11 @@ static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct amdgpu_device *adev,
uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
ATC_VMID0_PASID_MAPPING__VALID_MASK;
WREG32(SOC15_REG_OFFSET(ATHUB, 0,
WREG32(SOC15_REG_OFFSET(ATHUB, inst,
regATC_VMID0_PASID_MAPPING) + vmid, pasid_mapping);
timeout = jiffies + msecs_to_jiffies(10);
while (!(RREG32(SOC15_REG_OFFSET(ATHUB, 0,
while (!(RREG32(SOC15_REG_OFFSET(ATHUB, inst,
regATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
(1U << vmid))) {
if (time_after(jiffies, timeout)) {
......@@ -61,13 +61,13 @@ static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct amdgpu_device *adev,
cpu_relax();
}
WREG32(SOC15_REG_OFFSET(ATHUB, 0,
WREG32(SOC15_REG_OFFSET(ATHUB, inst,
regATC_VMID_PASID_MAPPING_UPDATE_STATUS),
1U << vmid);
WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
WREG32(SOC15_REG_OFFSET(OSSSYS, inst, mmIH_VMID_0_LUT) + vmid,
pasid_mapping);
WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
WREG32(SOC15_REG_OFFSET(OSSSYS, inst, mmIH_VMID_0_LUT_MM) + vmid,
pasid_mapping);
return 0;
......@@ -81,7 +81,7 @@ static inline struct v9_mqd *get_mqd(void *mqd)
static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift,
uint32_t wptr_mask, struct mm_struct *mm)
uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
struct v9_mqd *m;
uint32_t *mqd_hqd;
......@@ -89,12 +89,12 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
m = get_mqd(mqd);
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id);
kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id, inst);
/* HQD registers extend to CP_HQD_AQL_DISPATCH_ID_HI */
mqd_hqd = &m->cp_mqd_base_addr_lo;
hqd_base = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR);
hqd_end = SOC15_REG_OFFSET(GC, 0, regCP_HQD_AQL_DISPATCH_ID_HI);
hqd_base = SOC15_REG_OFFSET(GC, inst, regCP_MQD_BASE_ADDR);
hqd_end = SOC15_REG_OFFSET(GC, inst, regCP_HQD_AQL_DISPATCH_ID_HI);
for (reg = hqd_base; reg <= hqd_end; reg++)
WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);
......@@ -103,7 +103,7 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
/* Activate doorbell logic before triggering WPTR poll. */
data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL),
WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_PQ_DOORBELL_CONTROL),
data);
if (wptr) {
......@@ -133,29 +133,29 @@ static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd,
guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_LO),
WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_PQ_WPTR_LO),
lower_32_bits(guessed_wptr));
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI),
WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_PQ_WPTR_HI),
upper_32_bits(guessed_wptr));
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR),
WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_PQ_WPTR_POLL_ADDR),
lower_32_bits((uintptr_t)wptr));
WREG32_RLC(SOC15_REG_OFFSET(GC, 0,
WREG32_RLC(SOC15_REG_OFFSET(GC, inst,
regCP_HQD_PQ_WPTR_POLL_ADDR_HI),
upper_32_bits((uintptr_t)wptr));
WREG32(SOC15_REG_OFFSET(GC, 0, regCP_PQ_WPTR_POLL_CNTL1),
WREG32(SOC15_REG_OFFSET(GC, inst, regCP_PQ_WPTR_POLL_CNTL1),
(uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id,
queue_id));
}
/* Start the EOP fetcher */
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_EOP_RPTR),
WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_EOP_RPTR),
REG_SET_FIELD(m->cp_hqd_eop_rptr,
CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE), data);
WREG32_RLC(SOC15_REG_OFFSET(GC, inst, regCP_HQD_ACTIVE), data);
kgd_gfx_v9_release_queue(adev);
kgd_gfx_v9_release_queue(adev, inst);
return 0;
}
......
......@@ -79,7 +79,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases)
uint32_t sh_mem_bases, uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);
......@@ -91,7 +91,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
}
static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
unsigned int vmid)
unsigned int vmid, uint32_t inst)
{
/*
* We have to assume that there is no outstanding mapping.
......@@ -135,7 +135,8 @@ static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
* but still works
*/
static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t inst)
{
uint32_t mec;
uint32_t pipe;
......@@ -205,7 +206,7 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift,
uint32_t wptr_mask, struct mm_struct *mm)
uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
struct v10_compute_mqd *m;
uint32_t *mqd_hqd;
......@@ -286,7 +287,7 @@ static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t doorbell_off)
uint32_t doorbell_off, uint32_t inst)
{
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
struct v10_compute_mqd *m;
......@@ -338,7 +339,7 @@ static int kgd_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
static int kgd_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
uint32_t i = 0, reg;
#define HQD_N_REGS 56
......@@ -469,7 +470,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id)
uint32_t queue_id, uint32_t inst)
{
uint32_t act;
bool retval = false;
......@@ -510,7 +511,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id)
uint32_t queue_id, uint32_t inst)
{
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
......@@ -673,7 +674,7 @@ static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
uint32_t sq_cmd)
uint32_t sq_cmd, uint32_t inst)
{
uint32_t data = 0;
......@@ -709,7 +710,8 @@ static void set_vm_context_page_table_base(struct amdgpu_device *adev,
}
static void program_trap_handler_settings(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);
......
......@@ -80,7 +80,7 @@ static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t v
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases)
uint32_t sh_mem_bases, uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);
......@@ -93,7 +93,7 @@ static void program_sh_mem_settings_v10_3(struct amdgpu_device *adev, uint32_t v
/* ATC is defeatured on Sienna_Cichlid */
static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int pasid,
unsigned int vmid)
unsigned int vmid, uint32_t inst)
{
uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
......@@ -105,7 +105,8 @@ static int set_pasid_vmid_mapping_v10_3(struct amdgpu_device *adev, unsigned int
return 0;
}
static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id)
static int init_interrupts_v10_3(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t inst)
{
uint32_t mec;
uint32_t pipe;
......@@ -177,7 +178,7 @@ static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift,
uint32_t wptr_mask, struct mm_struct *mm)
uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
struct v10_compute_mqd *m;
uint32_t *mqd_hqd;
......@@ -273,7 +274,7 @@ static int hqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t doorbell_off)
uint32_t doorbell_off, uint32_t inst)
{
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
struct v10_compute_mqd *m;
......@@ -325,7 +326,7 @@ static int hiq_mqd_load_v10_3(struct amdgpu_device *adev, void *mqd,
static int hqd_dump_v10_3(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
uint32_t i = 0, reg;
#define HQD_N_REGS 56
......@@ -456,7 +457,7 @@ static int hqd_sdma_dump_v10_3(struct amdgpu_device *adev,
static bool hqd_is_occupied_v10_3(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id)
uint32_t queue_id, uint32_t inst)
{
uint32_t act;
bool retval = false;
......@@ -498,7 +499,7 @@ static bool hqd_sdma_is_occupied_v10_3(struct amdgpu_device *adev,
static int hqd_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id)
uint32_t queue_id, uint32_t inst)
{
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
......@@ -586,7 +587,7 @@ static int hqd_sdma_destroy_v10_3(struct amdgpu_device *adev, void *mqd,
static int wave_control_execute_v10_3(struct amdgpu_device *adev,
uint32_t gfx_index_val,
uint32_t sq_cmd)
uint32_t sq_cmd, uint32_t inst)
{
uint32_t data = 0;
......@@ -628,7 +629,8 @@ static void set_vm_context_page_table_base_v10_3(struct amdgpu_device *adev,
}
static void program_trap_handler_settings_v10_3(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr)
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);
......@@ -765,7 +767,7 @@ uint32_t set_wave_launch_mode_v10_3(struct amdgpu_device *adev,
* deq_retry_wait_time -- Wait Count for Global Wave Syncs.
*/
void get_iq_wait_times_v10_3(struct amdgpu_device *adev,
uint32_t *wait_times)
uint32_t *wait_times, uint32_t inst)
{
*wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
......@@ -775,7 +777,8 @@ void build_grace_period_packet_info_v10_3(struct amdgpu_device *adev,
uint32_t wait_times,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data)
uint32_t *reg_data,
uint32_t inst)
{
*reg_data = wait_times;
......
......@@ -78,7 +78,7 @@ static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmi
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases)
uint32_t sh_mem_bases, uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);
......@@ -89,7 +89,7 @@ static void program_sh_mem_settings_v11(struct amdgpu_device *adev, uint32_t vmi
}
static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int pasid,
unsigned int vmid)
unsigned int vmid, uint32_t inst)
{
uint32_t value = pasid << IH_VMID_0_LUT__PASID__SHIFT;
......@@ -101,7 +101,8 @@ static int set_pasid_vmid_mapping_v11(struct amdgpu_device *adev, unsigned int p
return 0;
}
static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id)
static int init_interrupts_v11(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t inst)
{
uint32_t mec;
uint32_t pipe;
......@@ -162,7 +163,7 @@ static inline struct v11_sdma_mqd *get_sdma_mqd(void *mqd)
static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm)
struct mm_struct *mm, uint32_t inst)
{
struct v11_compute_mqd *m;
uint32_t *mqd_hqd;
......@@ -258,7 +259,7 @@ static int hqd_load_v11(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t doorbell_off)
uint32_t doorbell_off, uint32_t inst)
{
struct amdgpu_ring *kiq_ring = &adev->gfx.kiq[0].ring;
struct v11_compute_mqd *m;
......@@ -310,7 +311,7 @@ static int hiq_mqd_load_v11(struct amdgpu_device *adev, void *mqd,
static int hqd_dump_v11(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
uint32_t i = 0, reg;
#define HQD_N_REGS 56
......@@ -445,7 +446,7 @@ static int hqd_sdma_dump_v11(struct amdgpu_device *adev,
}
static bool hqd_is_occupied_v11(struct amdgpu_device *adev, uint64_t queue_address,
uint32_t pipe_id, uint32_t queue_id)
uint32_t pipe_id, uint32_t queue_id, uint32_t inst)
{
uint32_t act;
bool retval = false;
......@@ -486,7 +487,7 @@ static bool hqd_sdma_is_occupied_v11(struct amdgpu_device *adev, void *mqd)
static int hqd_destroy_v11(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id)
uint32_t queue_id, uint32_t inst)
{
enum hqd_dequeue_request_type type;
unsigned long end_jiffies;
......@@ -571,7 +572,7 @@ static int hqd_sdma_destroy_v11(struct amdgpu_device *adev, void *mqd,
static int wave_control_execute_v11(struct amdgpu_device *adev,
uint32_t gfx_index_val,
uint32_t sq_cmd)
uint32_t sq_cmd, uint32_t inst)
{
uint32_t data = 0;
......
......@@ -78,7 +78,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases)
uint32_t sh_mem_bases, uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);
......@@ -91,7 +91,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
}
static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
unsigned int vmid)
unsigned int vmid, uint32_t inst)
{
/*
* We have to assume that there is no outstanding mapping.
......@@ -114,7 +114,8 @@ static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
return 0;
}
static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t inst)
{
uint32_t mec;
uint32_t pipe;
......@@ -158,7 +159,7 @@ static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift,
uint32_t wptr_mask, struct mm_struct *mm)
uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
struct cik_mqd *m;
uint32_t *mqd_hqd;
......@@ -202,7 +203,7 @@ static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
static int kgd_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
uint32_t i = 0, reg;
#define HQD_N_REGS (35+4)
......@@ -318,7 +319,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id)
uint32_t queue_id, uint32_t inst)
{
uint32_t act;
bool retval = false;
......@@ -358,7 +359,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id)
uint32_t queue_id, uint32_t inst)
{
uint32_t temp;
enum hqd_dequeue_request_type type;
......@@ -494,7 +495,7 @@ static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd,
static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
uint32_t sq_cmd)
uint32_t sq_cmd, uint32_t inst)
{
uint32_t data;
......
......@@ -72,7 +72,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases)
uint32_t sh_mem_bases, uint32_t inst)
{
lock_srbm(adev, 0, 0, 0, vmid);
......@@ -85,7 +85,7 @@ static void kgd_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmi
}
static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
unsigned int vmid)
unsigned int vmid, uint32_t inst)
{
/*
* We have to assume that there is no outstanding mapping.
......@@ -109,7 +109,8 @@ static int kgd_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
return 0;
}
static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id)
static int kgd_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t inst)
{
uint32_t mec;
uint32_t pipe;
......@@ -153,7 +154,7 @@ static inline struct vi_sdma_mqd *get_sdma_mqd(void *mqd)
static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t __user *wptr, uint32_t wptr_shift,
uint32_t wptr_mask, struct mm_struct *mm)
uint32_t wptr_mask, struct mm_struct *mm, uint32_t inst)
{
struct vi_mqd *m;
uint32_t *mqd_hqd;
......@@ -226,7 +227,7 @@ static int kgd_hqd_load(struct amdgpu_device *adev, void *mqd,
static int kgd_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs)
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst)
{
uint32_t i = 0, reg;
#define HQD_N_REGS (54+4)
......@@ -350,7 +351,7 @@ static int kgd_hqd_sdma_dump(struct amdgpu_device *adev,
static bool kgd_hqd_is_occupied(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id)
uint32_t queue_id, uint32_t inst)
{
uint32_t act;
bool retval = false;
......@@ -390,7 +391,7 @@ static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd)
static int kgd_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id)
uint32_t queue_id, uint32_t inst)
{
uint32_t temp;
enum hqd_dequeue_request_type type;
......@@ -540,7 +541,7 @@ static bool get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
static int kgd_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
uint32_t sq_cmd)
uint32_t sq_cmd, uint32_t inst)
{
uint32_t data = 0;
......
......@@ -25,41 +25,42 @@
void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config,
uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
uint32_t sh_mem_bases);
uint32_t sh_mem_bases, uint32_t inst);
int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid,
unsigned int vmid);
int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id);
unsigned int vmid, uint32_t inst);
int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t inst);
int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm);
struct mm_struct *mm, uint32_t inst);
int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t doorbell_off);
uint32_t doorbell_off, uint32_t inst);
int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs);
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst);
bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id);
uint32_t queue_id, uint32_t inst);
int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int utimeout, uint32_t pipe_id,
uint32_t queue_id);
uint32_t queue_id, uint32_t inst);
int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev,
uint32_t gfx_index_val,
uint32_t sq_cmd);
uint32_t sq_cmd, uint32_t inst);
bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev,
uint8_t vmid, uint16_t *p_pasid);
void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev,
uint32_t vmid, uint64_t page_table_base);
void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
int *pasid_wave_cnt, int *max_waves_per_cu);
int *pasid_wave_cnt, int *max_waves_per_cu, uint32_t inst);
void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr);
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
uint32_t inst);
void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t queue_id);
uint32_t queue_id, uint32_t inst);
uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id);
void kgd_gfx_v9_release_queue(struct amdgpu_device *adev);
void kgd_gfx_v9_release_queue(struct amdgpu_device *adev, uint32_t inst);
......@@ -127,12 +127,16 @@ static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manag
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
return dqm->dev->kfd2kgd->program_sh_mem_settings(
int xcc = 0;
for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++)
dqm->dev->kfd2kgd->program_sh_mem_settings(
dqm->dev->adev, qpd->vmid,
qpd->sh_mem_config,
qpd->sh_mem_ape1_base,
qpd->sh_mem_ape1_limit,
qpd->sh_mem_bases);
qpd->sh_mem_bases,
dqm->dev->start_xcc_id + xcc);
}
static void kfd_hws_hang(struct device_queue_manager *dqm)
......@@ -405,10 +409,14 @@ static void deallocate_doorbell(struct qcm_process_device *qpd,
static void program_trap_handler_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
int xcc = 0;
if (dqm->dev->kfd2kgd->program_trap_handler_settings)
for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++)
dqm->dev->kfd2kgd->program_trap_handler_settings(
dqm->dev->adev, qpd->vmid,
qpd->tba_addr, qpd->tma_addr);
qpd->tba_addr, qpd->tma_addr,
dqm->dev->start_xcc_id + xcc);
}
static int allocate_vmid(struct device_queue_manager *dqm,
......@@ -671,6 +679,7 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process
struct kfd_process_device *pdd;
int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
int xcc = 0;
reg_sq_cmd.u32All = 0;
reg_gfx_index.u32All = 0;
......@@ -715,9 +724,11 @@ static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process
reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
reg_sq_cmd.bits.vm_id = vmid;
for (xcc = 0; xcc < dev->num_xcc_per_node; xcc++)
dev->kfd2kgd->wave_control_execute(dev->adev,
reg_gfx_index.u32All,
reg_sq_cmd.u32All);
reg_sq_cmd.u32All,
dev->start_xcc_id + xcc);
return 0;
}
......@@ -1229,17 +1240,32 @@ static int
set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
unsigned int vmid)
{
return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
dqm->dev->adev, pasid, vmid);
int xcc = 0, ret;
for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++) {
ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
dqm->dev->adev, pasid, vmid,
dqm->dev->start_xcc_id + xcc);
if (ret)
break;
}
return ret;
}
static void init_interrupts(struct device_queue_manager *dqm)
{
unsigned int i;
unsigned int i, xcc;
for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
if (is_pipe_enabled(dqm, 0, i))
dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i);
for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) {
if (is_pipe_enabled(dqm, 0, i)) {
for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++)
dqm->dev->kfd2kgd->init_interrupts(
dqm->dev->adev, i,
dqm->dev->start_xcc_id +
xcc);
}
}
}
static void init_sdma_bitmaps(struct device_queue_manager *dqm)
......@@ -2455,19 +2481,23 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
struct device_queue_manager *dqm = data;
uint32_t (*dump)[2], n_regs;
int pipe, queue;
int r = 0;
int r = 0, xcc;
uint32_t inst;
if (!dqm->sched_running) {
seq_puts(m, " Device is stopped\n");
return 0;
}
for (xcc = 0; xcc < dqm->dev->num_xcc_per_node; xcc++) {
inst = dqm->dev->start_xcc_id + xcc;
r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
&dump, &n_regs);
&dump, &n_regs, inst);
if (!r) {
seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n",
KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
seq_printf(m,
" Inst %d, HIQ on MEC %d Pipe %d Queue %d\n",
inst, KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
KFD_CIK_HIQ_QUEUE);
seq_reg_dump(m, dump, n_regs);
......@@ -2484,17 +2514,18 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
continue;
r = dqm->dev->kfd2kgd->hqd_dump(
dqm->dev->adev, pipe, queue, &dump, &n_regs);
dqm->dev->adev, pipe, queue, &dump, &n_regs, inst);
if (r)
break;
seq_printf(m, " CP Pipe %d, Queue %d\n",
pipe, queue);
seq_printf(m, " Inst %d, CP Pipe %d, Queue %d\n",
inst, pipe, queue);
seq_reg_dump(m, dump, n_regs);
kfree(dump);
}
}
}
for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
for (queue = 0;
......
......@@ -190,7 +190,7 @@ int kfd_hiq_load_mqd_kiq(struct mqd_manager *mm, void *mqd,
struct queue_properties *p, struct mm_struct *mms)
{
return mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, mqd, pipe_id,
queue_id, p->doorbell_off);
queue_id, p->doorbell_off, 0);
}
int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd,
......@@ -198,7 +198,7 @@ int kfd_destroy_mqd_cp(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id)
{
return mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, mqd, type, timeout,
pipe_id, queue_id);
pipe_id, queue_id, 0);
}
void kfd_free_mqd_cp(struct mqd_manager *mm, void *mqd,
......@@ -217,7 +217,7 @@ bool kfd_is_occupied_cp(struct mqd_manager *mm, void *mqd,
uint32_t queue_id)
{
return mm->dev->kfd2kgd->hqd_is_occupied(mm->dev->adev, queue_address,
pipe_id, queue_id);
pipe_id, queue_id, 0);
}
int kfd_load_mqd_sdma(struct mqd_manager *mm, void *mqd,
......
......@@ -167,7 +167,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
wptr_shift, wptr_mask, mms);
wptr_shift, wptr_mask, mms, 0);
}
static void __update_mqd(struct mqd_manager *mm, void *mqd,
......
......@@ -151,7 +151,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
wptr_shift, 0, mms);
wptr_shift, 0, mms, 0);
return r;
}
......
......@@ -184,7 +184,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
r = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
wptr_shift, 0, mms);
wptr_shift, 0, mms, 0);
return r;
}
......
......@@ -218,7 +218,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
wptr_shift, 0, mms);
wptr_shift, 0, mms, 0);
}
static void update_mqd(struct mqd_manager *mm, void *mqd,
......@@ -501,13 +501,15 @@ static int hiq_load_mqd_kiq_v9_4_3(struct mqd_manager *mm, void *mqd,
{
int xcc, err;
void *xcc_mqd;
uint32_t start_inst = mm->dev->start_xcc_id;
uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
xcc_mqd = mqd + hiq_mqd_size * xcc;
err = mm->dev->kfd2kgd->hiq_mqd_load(mm->dev->adev, xcc_mqd,
pipe_id, queue_id,
p->doorbell_off);
p->doorbell_off,
start_inst+xcc);
if (err) {
pr_debug("Failed to load HIQ MQD for XCC: %d\n", xcc);
break;
......@@ -523,13 +525,14 @@ static int destroy_hiq_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
{
int xcc = 0, err;
void *xcc_mqd;
uint32_t start_inst = mm->dev->start_xcc_id;
uint64_t hiq_mqd_size = kfd_hiq_mqd_stride(mm->dev);
for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
xcc_mqd = mqd + hiq_mqd_size * xcc;
err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd,
type, timeout, pipe_id,
queue_id);
queue_id, start_inst+xcc);
if (err) {
pr_debug("Destroy MQD failed for xcc: %d\n", xcc);
break;
......@@ -641,6 +644,7 @@ static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
void *xcc_mqd;
struct v9_mqd *m;
uint64_t mqd_offset;
uint32_t start_inst = mm->dev->start_xcc_id;
m = get_mqd(mqd);
mqd_offset = m->cp_mqd_stride_size;
......@@ -649,7 +653,7 @@ static int destroy_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
xcc_mqd = mqd + mqd_offset * xcc;
err = mm->dev->kfd2kgd->hqd_destroy(mm->dev->adev, xcc_mqd,
type, timeout, pipe_id,
queue_id);
queue_id, start_inst+xcc);
if (err) {
pr_debug("Destroy MQD failed for xcc: %d\n", xcc);
break;
......@@ -667,6 +671,7 @@ static int load_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
int xcc = 0, err;
void *xcc_mqd;
uint32_t start_inst = mm->dev->start_xcc_id;
uint64_t mqd_stride_size = mm->mqd_stride(mm, p);
for (xcc = 0; xcc < mm->dev->num_xcc_per_node; xcc++) {
......@@ -674,7 +679,7 @@ static int load_mqd_v9_4_3(struct mqd_manager *mm, void *mqd,
err = mm->dev->kfd2kgd->hqd_load(mm->dev->adev, xcc_mqd,
pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
wptr_shift, 0, mms);
wptr_shift, 0, mms, start_inst+xcc);
if (err) {
pr_debug("Load MQD failed for xcc: %d\n", xcc);
break;
......
......@@ -165,7 +165,7 @@ static int load_mqd(struct mqd_manager *mm, void *mqd,
return mm->dev->kfd2kgd->hqd_load(mm->dev->adev, mqd, pipe_id, queue_id,
(uint32_t __user *)p->write_ptr,
wptr_shift, wptr_mask, mms);
wptr_shift, wptr_mask, mms, 0);
}
static void __update_mqd(struct mqd_manager *mm, void *mqd,
......
......@@ -290,7 +290,7 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer)
wave_cnt = 0;
max_waves_per_cu = 0;
dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt,
&max_waves_per_cu);
&max_waves_per_cu, 0);
/* Translate wave count to number of compute units */
cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu;
......
......@@ -230,28 +230,30 @@ struct kfd2kgd_calls {
/* Register access functions */
void (*program_sh_mem_settings)(struct amdgpu_device *adev, uint32_t vmid,
uint32_t sh_mem_config, uint32_t sh_mem_ape1_base,
uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases);
uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases,
uint32_t inst);
int (*set_pasid_vmid_mapping)(struct amdgpu_device *adev, u32 pasid,
unsigned int vmid);
unsigned int vmid, uint32_t inst);
int (*init_interrupts)(struct amdgpu_device *adev, uint32_t pipe_id);
int (*init_interrupts)(struct amdgpu_device *adev, uint32_t pipe_id,
uint32_t inst);
int (*hqd_load)(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
uint32_t wptr_shift, uint32_t wptr_mask,
struct mm_struct *mm);
struct mm_struct *mm, uint32_t inst);
int (*hiq_mqd_load)(struct amdgpu_device *adev, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
uint32_t doorbell_off);
uint32_t doorbell_off, uint32_t inst);
int (*hqd_sdma_load)(struct amdgpu_device *adev, void *mqd,
uint32_t __user *wptr, struct mm_struct *mm);
int (*hqd_dump)(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
uint32_t (**dump)[2], uint32_t *n_regs);
uint32_t (**dump)[2], uint32_t *n_regs, uint32_t inst);
int (*hqd_sdma_dump)(struct amdgpu_device *adev,
uint32_t engine_id, uint32_t queue_id,
......@@ -259,12 +261,12 @@ struct kfd2kgd_calls {
bool (*hqd_is_occupied)(struct amdgpu_device *adev,
uint64_t queue_address, uint32_t pipe_id,
uint32_t queue_id);
uint32_t queue_id, uint32_t inst);
int (*hqd_destroy)(struct amdgpu_device *adev, void *mqd,
enum kfd_preempt_type reset_type,
unsigned int timeout, uint32_t pipe_id,
uint32_t queue_id);
uint32_t queue_id, uint32_t inst);
bool (*hqd_sdma_is_occupied)(struct amdgpu_device *adev, void *mqd);
......@@ -273,7 +275,7 @@ struct kfd2kgd_calls {
int (*wave_control_execute)(struct amdgpu_device *adev,
uint32_t gfx_index_val,
uint32_t sq_cmd);
uint32_t sq_cmd, uint32_t inst);
bool (*get_atc_vmid_pasid_mapping_info)(struct amdgpu_device *adev,
uint8_t vmid,
uint16_t *p_pasid);
......@@ -290,9 +292,10 @@ struct kfd2kgd_calls {
uint32_t (*read_vmid_from_vmfault_reg)(struct amdgpu_device *adev);
void (*get_cu_occupancy)(struct amdgpu_device *adev, int pasid,
int *wave_cnt, int *max_waves_per_cu);
int *wave_cnt, int *max_waves_per_cu, uint32_t inst);
void (*program_trap_handler_settings)(struct amdgpu_device *adev,
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr);
uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr,
uint32_t inst);
};
#endif /* KGD_KFD_INTERFACE_H_INCLUDED */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment