Commit 51668b0b authored by Frank Min's avatar Frank Min Committed by Alex Deucher

drm/amdgpu/sdma4: Enable sdma poll mem addr on vega10 for SRIOV

While doing flr on VFs, there is possibility to lost the doorbell
writing for sdma, so enable poll mem for sdma, then sdma fw would
check the pollmem holding wptr.
Signed-off-by: default avatarFrank Min <Frank.Min@amd.com>
Signed-off-by: default avatarXiangliang.Yu <Xiangliang.Yu@amd.com>
Acked-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 81fe3f35
...@@ -1209,6 +1209,7 @@ struct amdgpu_sdma_instance { ...@@ -1209,6 +1209,7 @@ struct amdgpu_sdma_instance {
struct amdgpu_ring ring; struct amdgpu_ring ring;
bool burst_nop; bool burst_nop;
uint32_t poll_mem_offs;
}; };
struct amdgpu_sdma { struct amdgpu_sdma {
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include "vega10/MMHUB/mmhub_1_0_offset.h" #include "vega10/MMHUB/mmhub_1_0_offset.h"
#include "vega10/MMHUB/mmhub_1_0_sh_mask.h" #include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
#include "vega10/HDP/hdp_4_0_offset.h" #include "vega10/HDP/hdp_4_0_offset.h"
#include "vega10/NBIO/nbio_6_1_offset.h"
#include "raven1/SDMA0/sdma0_4_1_default.h" #include "raven1/SDMA0/sdma0_4_1_default.h"
#include "soc15_common.h" #include "soc15_common.h"
...@@ -287,6 +288,8 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) ...@@ -287,6 +288,8 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
*/ */
static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
{ {
int i;
u32 offset;
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
DRM_DEBUG("Setting write pointer\n"); DRM_DEBUG("Setting write pointer\n");
...@@ -303,6 +306,17 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) ...@@ -303,6 +306,17 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring)
adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2); adev->wb.wb[ring->wptr_offs + 1] = upper_32_bits(ring->wptr << 2);
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
ring->doorbell_index, ring->wptr << 2); ring->doorbell_index, ring->wptr << 2);
if (amdgpu_sriov_vf(adev)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
if (&adev->sdma.instance[i].ring == ring) {
offset = adev->sdma.instance[i].poll_mem_offs;
atomic64_set((atomic64_t *)&adev->wb.wb[offset],
(ring->wptr << 2));
nbio_v6_1_hdp_flush(adev);
}
}
}
WDOORBELL64(ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
} else { } else {
int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1; int me = (ring == &ring->adev->sdma.instance[0].ring) ? 0 : 1;
...@@ -573,9 +587,9 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) ...@@ -573,9 +587,9 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
{ {
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
u32 rb_cntl, ib_cntl; u32 rb_cntl, ib_cntl, wptr_poll_addr_lo, wptr_poll_addr_hi, wptr_poll_cntl;
u32 rb_bufsz; u32 rb_bufsz;
u32 wb_offset; u32 wb_offset, poll_offset;
u32 doorbell; u32 doorbell;
u32 doorbell_offset; u32 doorbell_offset;
u32 temp; u32 temp;
...@@ -687,6 +701,21 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) ...@@ -687,6 +701,21 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
if (adev->mman.buffer_funcs_ring == ring) if (adev->mman.buffer_funcs_ring == ring)
amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size); amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size);
if (amdgpu_sriov_vf(adev)) {
poll_offset = adev->sdma.instance[i].poll_mem_offs * 4;
wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
wptr_poll_addr_lo = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO));
wptr_poll_addr_lo = REG_SET_FIELD(wptr_poll_addr_lo, SDMA0_GFX_RB_WPTR_POLL_ADDR_LO, ADDR,
lower_32_bits(adev->wb.gpu_addr + poll_offset) >> 2);
wptr_poll_addr_hi = upper_32_bits(adev->wb.gpu_addr + poll_offset);
wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), wptr_poll_addr_lo);
WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), wptr_poll_addr_hi);
WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
}
} }
return 0; return 0;
...@@ -1247,6 +1276,15 @@ static int sdma_v4_0_sw_init(void *handle) ...@@ -1247,6 +1276,15 @@ static int sdma_v4_0_sw_init(void *handle)
(i == 0) ? (i == 0) ?
AMDGPU_SDMA_IRQ_TRAP0 : AMDGPU_SDMA_IRQ_TRAP0 :
AMDGPU_SDMA_IRQ_TRAP1); AMDGPU_SDMA_IRQ_TRAP1);
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_wb_get_64bit(adev,
&adev->sdma.instance[i].poll_mem_offs);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate SDMA poll mem wb.\n", r);
return r;
}
}
if (r) if (r)
return r; return r;
} }
...@@ -1259,9 +1297,13 @@ static int sdma_v4_0_sw_fini(void *handle) ...@@ -1259,9 +1297,13 @@ static int sdma_v4_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i; int i;
for (i = 0; i < adev->sdma.num_instances; i++) for (i = 0; i < adev->sdma.num_instances; i++) {
amdgpu_ring_fini(&adev->sdma.instance[i].ring); amdgpu_ring_fini(&adev->sdma.instance[i].ring);
if (amdgpu_sriov_vf(adev))
amdgpu_wb_free_64bit(adev,
adev->sdma.instance[i].poll_mem_offs);
}
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment