Commit 1c0a4625 authored by Oded Gabbay's avatar Oded Gabbay

drm/radeon: adding synchronization for GRBM GFX

Implementing a lock for selecting and accessing shader engines and arrays.
This lock will make sure that radeon and amdkfd are not colliding when
accessing shader engines and arrays with GRBM_GFX_INDEX register.
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@amd.com>
parent ebff8453
...@@ -1563,6 +1563,8 @@ static const u32 godavari_golden_registers[] = ...@@ -1563,6 +1563,8 @@ static const u32 godavari_golden_registers[] =
static void cik_init_golden_registers(struct radeon_device *rdev) static void cik_init_golden_registers(struct radeon_device *rdev)
{ {
/* Some of the registers might be dependent on GRBM_GFX_INDEX */
mutex_lock(&rdev->grbm_idx_mutex);
switch (rdev->family) { switch (rdev->family) {
case CHIP_BONAIRE: case CHIP_BONAIRE:
radeon_program_register_sequence(rdev, radeon_program_register_sequence(rdev,
...@@ -1637,6 +1639,7 @@ static void cik_init_golden_registers(struct radeon_device *rdev) ...@@ -1637,6 +1639,7 @@ static void cik_init_golden_registers(struct radeon_device *rdev)
default: default:
break; break;
} }
mutex_unlock(&rdev->grbm_idx_mutex);
} }
/** /**
...@@ -3428,6 +3431,7 @@ static void cik_setup_rb(struct radeon_device *rdev, ...@@ -3428,6 +3431,7 @@ static void cik_setup_rb(struct radeon_device *rdev,
u32 disabled_rbs = 0; u32 disabled_rbs = 0;
u32 enabled_rbs = 0; u32 enabled_rbs = 0;
mutex_lock(&rdev->grbm_idx_mutex);
for (i = 0; i < se_num; i++) { for (i = 0; i < se_num; i++) {
for (j = 0; j < sh_per_se; j++) { for (j = 0; j < sh_per_se; j++) {
cik_select_se_sh(rdev, i, j); cik_select_se_sh(rdev, i, j);
...@@ -3439,6 +3443,7 @@ static void cik_setup_rb(struct radeon_device *rdev, ...@@ -3439,6 +3443,7 @@ static void cik_setup_rb(struct radeon_device *rdev,
} }
} }
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
mutex_unlock(&rdev->grbm_idx_mutex);
mask = 1; mask = 1;
for (i = 0; i < max_rb_num_per_se * se_num; i++) { for (i = 0; i < max_rb_num_per_se * se_num; i++) {
...@@ -3449,6 +3454,7 @@ static void cik_setup_rb(struct radeon_device *rdev, ...@@ -3449,6 +3454,7 @@ static void cik_setup_rb(struct radeon_device *rdev,
rdev->config.cik.backend_enable_mask = enabled_rbs; rdev->config.cik.backend_enable_mask = enabled_rbs;
mutex_lock(&rdev->grbm_idx_mutex);
for (i = 0; i < se_num; i++) { for (i = 0; i < se_num; i++) {
cik_select_se_sh(rdev, i, 0xffffffff); cik_select_se_sh(rdev, i, 0xffffffff);
data = 0; data = 0;
...@@ -3476,6 +3482,7 @@ static void cik_setup_rb(struct radeon_device *rdev, ...@@ -3476,6 +3482,7 @@ static void cik_setup_rb(struct radeon_device *rdev,
WREG32(PA_SC_RASTER_CONFIG, data); WREG32(PA_SC_RASTER_CONFIG, data);
} }
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
mutex_unlock(&rdev->grbm_idx_mutex);
} }
/** /**
...@@ -3693,6 +3700,12 @@ static void cik_gpu_init(struct radeon_device *rdev) ...@@ -3693,6 +3700,12 @@ static void cik_gpu_init(struct radeon_device *rdev)
/* set HW defaults for 3D engine */ /* set HW defaults for 3D engine */
WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60)); WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
mutex_lock(&rdev->grbm_idx_mutex);
/*
* making sure that the following register writes will be broadcasted
* to all the shaders
*/
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
WREG32(SX_DEBUG_1, 0x20); WREG32(SX_DEBUG_1, 0x20);
WREG32(TA_CNTL_AUX, 0x00010000); WREG32(TA_CNTL_AUX, 0x00010000);
...@@ -3748,6 +3761,7 @@ static void cik_gpu_init(struct radeon_device *rdev) ...@@ -3748,6 +3761,7 @@ static void cik_gpu_init(struct radeon_device *rdev)
WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3)); WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER); WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
mutex_unlock(&rdev->grbm_idx_mutex);
udelay(50); udelay(50);
} }
...@@ -6068,6 +6082,7 @@ static void cik_wait_for_rlc_serdes(struct radeon_device *rdev) ...@@ -6068,6 +6082,7 @@ static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
u32 i, j, k; u32 i, j, k;
u32 mask; u32 mask;
mutex_lock(&rdev->grbm_idx_mutex);
for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
cik_select_se_sh(rdev, i, j); cik_select_se_sh(rdev, i, j);
...@@ -6079,6 +6094,7 @@ static void cik_wait_for_rlc_serdes(struct radeon_device *rdev) ...@@ -6079,6 +6094,7 @@ static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
} }
} }
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
mutex_unlock(&rdev->grbm_idx_mutex);
mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY; mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
for (k = 0; k < rdev->usec_timeout; k++) { for (k = 0; k < rdev->usec_timeout; k++) {
...@@ -6213,10 +6229,12 @@ static int cik_rlc_resume(struct radeon_device *rdev) ...@@ -6213,10 +6229,12 @@ static int cik_rlc_resume(struct radeon_device *rdev)
WREG32(RLC_LB_CNTR_INIT, 0); WREG32(RLC_LB_CNTR_INIT, 0);
WREG32(RLC_LB_CNTR_MAX, 0x00008000); WREG32(RLC_LB_CNTR_MAX, 0x00008000);
mutex_lock(&rdev->grbm_idx_mutex);
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff); WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
WREG32(RLC_LB_PARAMS, 0x00600408); WREG32(RLC_LB_PARAMS, 0x00600408);
WREG32(RLC_LB_CNTL, 0x80000004); WREG32(RLC_LB_CNTL, 0x80000004);
mutex_unlock(&rdev->grbm_idx_mutex);
WREG32(RLC_MC_CNTL, 0); WREG32(RLC_MC_CNTL, 0);
WREG32(RLC_UCODE_CNTL, 0); WREG32(RLC_UCODE_CNTL, 0);
...@@ -6283,11 +6301,13 @@ static void cik_enable_cgcg(struct radeon_device *rdev, bool enable) ...@@ -6283,11 +6301,13 @@ static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
tmp = cik_halt_rlc(rdev); tmp = cik_halt_rlc(rdev);
mutex_lock(&rdev->grbm_idx_mutex);
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE; tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
WREG32(RLC_SERDES_WR_CTRL, tmp2); WREG32(RLC_SERDES_WR_CTRL, tmp2);
mutex_unlock(&rdev->grbm_idx_mutex);
cik_update_rlc(rdev, tmp); cik_update_rlc(rdev, tmp);
...@@ -6329,11 +6349,13 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) ...@@ -6329,11 +6349,13 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
tmp = cik_halt_rlc(rdev); tmp = cik_halt_rlc(rdev);
mutex_lock(&rdev->grbm_idx_mutex);
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
data = BPM_ADDR_MASK | MGCG_OVERRIDE_0; data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
WREG32(RLC_SERDES_WR_CTRL, data); WREG32(RLC_SERDES_WR_CTRL, data);
mutex_unlock(&rdev->grbm_idx_mutex);
cik_update_rlc(rdev, tmp); cik_update_rlc(rdev, tmp);
...@@ -6377,11 +6399,13 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) ...@@ -6377,11 +6399,13 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
tmp = cik_halt_rlc(rdev); tmp = cik_halt_rlc(rdev);
mutex_lock(&rdev->grbm_idx_mutex);
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
data = BPM_ADDR_MASK | MGCG_OVERRIDE_1; data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
WREG32(RLC_SERDES_WR_CTRL, data); WREG32(RLC_SERDES_WR_CTRL, data);
mutex_unlock(&rdev->grbm_idx_mutex);
cik_update_rlc(rdev, tmp); cik_update_rlc(rdev, tmp);
} }
...@@ -6810,10 +6834,12 @@ static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh) ...@@ -6810,10 +6834,12 @@ static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
u32 mask = 0, tmp, tmp1; u32 mask = 0, tmp, tmp1;
int i; int i;
mutex_lock(&rdev->grbm_idx_mutex);
cik_select_se_sh(rdev, se, sh); cik_select_se_sh(rdev, se, sh);
tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG); tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG); tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
mutex_unlock(&rdev->grbm_idx_mutex);
tmp &= 0xffff0000; tmp &= 0xffff0000;
......
...@@ -2397,6 +2397,8 @@ struct radeon_device { ...@@ -2397,6 +2397,8 @@ struct radeon_device {
struct radeon_atcs atcs; struct radeon_atcs atcs;
/* srbm instance registers */ /* srbm instance registers */
struct mutex srbm_mutex; struct mutex srbm_mutex;
/* GRBM index mutex. Protects concurrents access to GRBM index */
struct mutex grbm_idx_mutex;
/* clock, powergating flags */ /* clock, powergating flags */
u32 cg_flags; u32 cg_flags;
u32 pg_flags; u32 pg_flags;
......
...@@ -1303,6 +1303,7 @@ int radeon_device_init(struct radeon_device *rdev, ...@@ -1303,6 +1303,7 @@ int radeon_device_init(struct radeon_device *rdev,
mutex_init(&rdev->pm.mutex); mutex_init(&rdev->pm.mutex);
mutex_init(&rdev->gpu_clock_mutex); mutex_init(&rdev->gpu_clock_mutex);
mutex_init(&rdev->srbm_mutex); mutex_init(&rdev->srbm_mutex);
mutex_init(&rdev->grbm_idx_mutex);
init_rwsem(&rdev->pm.mclk_lock); init_rwsem(&rdev->pm.mclk_lock);
init_rwsem(&rdev->exclusive_lock); init_rwsem(&rdev->exclusive_lock);
init_waitqueue_head(&rdev->irq.vblank_queue); init_waitqueue_head(&rdev->irq.vblank_queue);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment