Commit 72a9987e authored by Michel Dänzer's avatar Michel Dänzer Committed by Alex Deucher

drm/radeon: Always flush the HDP cache before submitting a CS to the GPU

This ensures the GPU sees all previous CPU writes to VRAM, which makes it
safe:

* For userspace to stream data from CPU to GPU via VRAM instead of GTT
* For IBs to be stored in VRAM instead of GTT
* For ring buffers to be stored in VRAM instead of GTT, if the HPD flush
  is performed via MMIO
Signed-off-by: default avatarMichel Dänzer <michel.daenzer@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 124764f1
...@@ -3890,8 +3890,6 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev, ...@@ -3890,8 +3890,6 @@ void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2)); radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
radeon_ring_write(ring, fence->seq); radeon_ring_write(ring, fence->seq);
radeon_ring_write(ring, 0); radeon_ring_write(ring, 0);
/* HDP flush */
cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
} }
/** /**
...@@ -3920,8 +3918,6 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev, ...@@ -3920,8 +3918,6 @@ void cik_fence_compute_ring_emit(struct radeon_device *rdev,
radeon_ring_write(ring, upper_32_bits(addr)); radeon_ring_write(ring, upper_32_bits(addr));
radeon_ring_write(ring, fence->seq); radeon_ring_write(ring, fence->seq);
radeon_ring_write(ring, 0); radeon_ring_write(ring, 0);
/* HDP flush */
cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
} }
bool cik_semaphore_ring_emit(struct radeon_device *rdev, bool cik_semaphore_ring_emit(struct radeon_device *rdev,
......
...@@ -837,11 +837,7 @@ void r100_fence_ring_emit(struct radeon_device *rdev, ...@@ -837,11 +837,7 @@ void r100_fence_ring_emit(struct radeon_device *rdev,
/* Wait until IDLE & CLEAN */ /* Wait until IDLE & CLEAN */
radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0)); radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN); radeon_ring_write(ring, RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0)); r100_ring_hdp_flush(rdev, ring);
radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
RADEON_HDP_READ_BUFFER_INVALIDATE);
radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
/* Emit fence sequence & fire IRQ */ /* Emit fence sequence & fire IRQ */
radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0)); radeon_ring_write(ring, PACKET0(rdev->fence_drv[fence->ring].scratch_reg, 0));
radeon_ring_write(ring, fence->seq); radeon_ring_write(ring, fence->seq);
...@@ -1060,6 +1056,20 @@ void r100_gfx_set_wptr(struct radeon_device *rdev, ...@@ -1060,6 +1056,20 @@ void r100_gfx_set_wptr(struct radeon_device *rdev,
(void)RREG32(RADEON_CP_RB_WPTR); (void)RREG32(RADEON_CP_RB_WPTR);
} }
/**
* r100_ring_hdp_flush - flush Host Data Path via the ring buffer
* rdev: radeon device structure
* ring: ring buffer struct for emitting packets
*/
void r100_ring_hdp_flush(struct radeon_device *rdev, struct radeon_ring *ring)
{
radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
radeon_ring_write(ring, rdev->config.r100.hdp_cntl |
RADEON_HDP_READ_BUFFER_INVALIDATE);
radeon_ring_write(ring, PACKET0(RADEON_HOST_PATH_CNTL, 0));
radeon_ring_write(ring, rdev->config.r100.hdp_cntl);
}
static void r100_cp_load_microcode(struct radeon_device *rdev) static void r100_cp_load_microcode(struct radeon_device *rdev)
{ {
const __be32 *fw_data; const __be32 *fw_data;
......
...@@ -1749,6 +1749,7 @@ struct radeon_asic_ring { ...@@ -1749,6 +1749,7 @@ struct radeon_asic_ring {
/* command emmit functions */ /* command emmit functions */
void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib); void (*ib_execute)(struct radeon_device *rdev, struct radeon_ib *ib);
void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence); void (*emit_fence)(struct radeon_device *rdev, struct radeon_fence *fence);
void (*hdp_flush)(struct radeon_device *rdev, struct radeon_ring *ring);
bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp, bool (*emit_semaphore)(struct radeon_device *rdev, struct radeon_ring *cp,
struct radeon_semaphore *semaphore, bool emit_wait); struct radeon_semaphore *semaphore, bool emit_wait);
void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm); void (*vm_flush)(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
......
...@@ -185,6 +185,7 @@ static struct radeon_asic_ring r100_gfx_ring = { ...@@ -185,6 +185,7 @@ static struct radeon_asic_ring r100_gfx_ring = {
.get_rptr = &r100_gfx_get_rptr, .get_rptr = &r100_gfx_get_rptr,
.get_wptr = &r100_gfx_get_wptr, .get_wptr = &r100_gfx_get_wptr,
.set_wptr = &r100_gfx_set_wptr, .set_wptr = &r100_gfx_set_wptr,
.hdp_flush = &r100_ring_hdp_flush,
}; };
static struct radeon_asic r100_asic = { static struct radeon_asic r100_asic = {
...@@ -331,6 +332,7 @@ static struct radeon_asic_ring r300_gfx_ring = { ...@@ -331,6 +332,7 @@ static struct radeon_asic_ring r300_gfx_ring = {
.get_rptr = &r100_gfx_get_rptr, .get_rptr = &r100_gfx_get_rptr,
.get_wptr = &r100_gfx_get_wptr, .get_wptr = &r100_gfx_get_wptr,
.set_wptr = &r100_gfx_set_wptr, .set_wptr = &r100_gfx_set_wptr,
.hdp_flush = &r100_ring_hdp_flush,
}; };
static struct radeon_asic r300_asic = { static struct radeon_asic r300_asic = {
...@@ -1987,7 +1989,7 @@ static struct radeon_asic ci_asic = { ...@@ -1987,7 +1989,7 @@ static struct radeon_asic ci_asic = {
.resume = &cik_resume, .resume = &cik_resume,
.asic_reset = &cik_asic_reset, .asic_reset = &cik_asic_reset,
.vga_set_state = &r600_vga_set_state, .vga_set_state = &r600_vga_set_state,
.mmio_hdp_flush = NULL, .mmio_hdp_flush = &r600_mmio_hdp_flush,
.gui_idle = &r600_gui_idle, .gui_idle = &r600_gui_idle,
.mc_wait_for_idle = &evergreen_mc_wait_for_idle, .mc_wait_for_idle = &evergreen_mc_wait_for_idle,
.get_xclk = &cik_get_xclk, .get_xclk = &cik_get_xclk,
...@@ -2091,7 +2093,7 @@ static struct radeon_asic kv_asic = { ...@@ -2091,7 +2093,7 @@ static struct radeon_asic kv_asic = {
.resume = &cik_resume, .resume = &cik_resume,
.asic_reset = &cik_asic_reset, .asic_reset = &cik_asic_reset,
.vga_set_state = &r600_vga_set_state, .vga_set_state = &r600_vga_set_state,
.mmio_hdp_flush = NULL, .mmio_hdp_flush = &r600_mmio_hdp_flush,
.gui_idle = &r600_gui_idle, .gui_idle = &r600_gui_idle,
.mc_wait_for_idle = &evergreen_mc_wait_for_idle, .mc_wait_for_idle = &evergreen_mc_wait_for_idle,
.get_xclk = &cik_get_xclk, .get_xclk = &cik_get_xclk,
......
...@@ -148,7 +148,8 @@ u32 r100_gfx_get_wptr(struct radeon_device *rdev, ...@@ -148,7 +148,8 @@ u32 r100_gfx_get_wptr(struct radeon_device *rdev,
struct radeon_ring *ring); struct radeon_ring *ring);
void r100_gfx_set_wptr(struct radeon_device *rdev, void r100_gfx_set_wptr(struct radeon_device *rdev,
struct radeon_ring *ring); struct radeon_ring *ring);
void r100_ring_hdp_flush(struct radeon_device *rdev,
struct radeon_ring *ring);
/* /*
* r200,rv250,rs300,rv280 * r200,rv250,rs300,rv280
*/ */
......
...@@ -82,9 +82,11 @@ ...@@ -82,9 +82,11 @@
* 2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN), * 2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN),
* CIK: 1D and linear tiling modes contain valid PIPE_CONFIG * CIK: 1D and linear tiling modes contain valid PIPE_CONFIG
* 2.39.0 - Add INFO query for number of active CUs * 2.39.0 - Add INFO query for number of active CUs
* 2.40.0 - Add RADEON_GEM_GTT_WC/UC, flush HDP cache before submitting
* CS to GPU
*/ */
#define KMS_DRIVER_MAJOR 2 #define KMS_DRIVER_MAJOR 2
#define KMS_DRIVER_MINOR 39 #define KMS_DRIVER_MINOR 40
#define KMS_DRIVER_PATCHLEVEL 0 #define KMS_DRIVER_PATCHLEVEL 0
int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
int radeon_driver_unload_kms(struct drm_device *dev); int radeon_driver_unload_kms(struct drm_device *dev);
......
...@@ -183,11 +183,21 @@ int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsig ...@@ -183,11 +183,21 @@ int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsig
*/ */
void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring) void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring)
{ {
/* If we are emitting the HDP flush via the ring buffer, we need to
* do it before padding.
*/
if (rdev->asic->ring[ring->idx]->hdp_flush)
rdev->asic->ring[ring->idx]->hdp_flush(rdev, ring);
/* We pad to match fetch size */ /* We pad to match fetch size */
while (ring->wptr & ring->align_mask) { while (ring->wptr & ring->align_mask) {
radeon_ring_write(ring, ring->nop); radeon_ring_write(ring, ring->nop);
} }
mb(); mb();
/* If we are emitting the HDP flush via MMIO, we need to do it after
* all CPU writes to VRAM finished.
*/
if (rdev->asic->mmio_hdp_flush)
rdev->asic->mmio_hdp_flush(rdev);
radeon_ring_set_wptr(rdev, ring); radeon_ring_set_wptr(rdev, ring);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment