Commit e42dd87e authored by Sonny Jiang's avatar Sonny Jiang Committed by Alex Deucher

drm/amdgpu: VCN 3.0 multiple queue ring reset

Add firmware write/read point reset sync through shared memory, port from vcn2.5.
Signed-off-by: default avatarSonny Jiang <sonny.jiang@amd.com>
Reviewed-by: default avatarJames Zhu <James.Zhu@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 0339258b
...@@ -237,7 +237,8 @@ static int vcn_v3_0_sw_init(void *handle) ...@@ -237,7 +237,8 @@ static int vcn_v3_0_sw_init(void *handle)
} }
fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG); fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG) |
cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG);
fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED); fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED);
} }
...@@ -935,6 +936,7 @@ static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst) ...@@ -935,6 +936,7 @@ static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
{ {
volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp; uint32_t rb_bufsz, tmp;
...@@ -1048,6 +1050,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo ...@@ -1048,6 +1050,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
/* set the write pointer delay */ /* set the write pointer delay */
WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0); WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
...@@ -1071,6 +1074,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo ...@@ -1071,6 +1074,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
lower_32_bits(ring->wptr)); lower_32_bits(ring->wptr));
fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
/* Unstall DPG */ /* Unstall DPG */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
...@@ -1080,6 +1084,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo ...@@ -1080,6 +1084,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
static int vcn_v3_0_start(struct amdgpu_device *adev) static int vcn_v3_0_start(struct amdgpu_device *adev)
{ {
volatile struct amdgpu_fw_shared *fw_shared;
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
uint32_t rb_bufsz, tmp; uint32_t rb_bufsz, tmp;
int i, j, k, r; int i, j, k, r;
...@@ -1222,6 +1227,9 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) ...@@ -1222,6 +1227,9 @@ static int vcn_v3_0_start(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp); WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
/* programm the RB_BASE for ring buffer */ /* programm the RB_BASE for ring buffer */
WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
lower_32_bits(ring->gpu_addr)); lower_32_bits(ring->gpu_addr));
...@@ -1234,19 +1242,25 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) ...@@ -1234,19 +1242,25 @@ static int vcn_v3_0_start(struct amdgpu_device *adev)
ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR); ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR, WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
lower_32_bits(ring->wptr)); lower_32_bits(ring->wptr));
fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
ring = &adev->vcn.inst[i].ring_enc[0]; ring = &adev->vcn.inst[i].ring_enc[0];
WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4); WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
ring = &adev->vcn.inst[i].ring_enc[1]; ring = &adev->vcn.inst[i].ring_enc[1];
WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
} }
return 0; return 0;
...@@ -1595,6 +1609,7 @@ static int vcn_v3_0_stop(struct amdgpu_device *adev) ...@@ -1595,6 +1609,7 @@ static int vcn_v3_0_stop(struct amdgpu_device *adev)
static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
int inst_idx, struct dpg_pause_state *new_state) int inst_idx, struct dpg_pause_state *new_state)
{ {
volatile struct amdgpu_fw_shared *fw_shared;
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
uint32_t reg_data = 0; uint32_t reg_data = 0;
int ret_code; int ret_code;
...@@ -1626,6 +1641,8 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, ...@@ -1626,6 +1641,8 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
/* Restore */ /* Restore */
fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
ring = &adev->vcn.inst[inst_idx].ring_enc[0]; ring = &adev->vcn.inst[inst_idx].ring_enc[0];
ring->wptr = 0; ring->wptr = 0;
WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
...@@ -1633,7 +1650,9 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, ...@@ -1633,7 +1650,9 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
ring = &adev->vcn.inst[inst_idx].ring_enc[1]; ring = &adev->vcn.inst[inst_idx].ring_enc[1];
ring->wptr = 0; ring->wptr = 0;
WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
...@@ -1641,6 +1660,7 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, ...@@ -1641,6 +1660,7 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev,
WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
/* Unstall DPG */ /* Unstall DPG */
WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment