Commit 0a52a6ca authored by Nirmoy Das's avatar Nirmoy Das Committed by Alex Deucher

drm/amdgpu: add wave limit functionality for gfx8,9

Wave limiting can be use to load balance high priority
compute jobs along with gfx jobs. When enabled, this will reserve
~75% of waves for compute jobs.

We do not need this from gfx10 onwards because >=gfx10 has
asynchronous compute tunneling to replace wave limit requirement.
Signed-off-by: default avatarNirmoy Das <nirmoy.das@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 8c0225d7
...@@ -197,6 +197,7 @@ struct amdgpu_ring_funcs { ...@@ -197,6 +197,7 @@ struct amdgpu_ring_funcs {
void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid); void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
int (*preempt_ib)(struct amdgpu_ring *ring); int (*preempt_ib)(struct amdgpu_ring *ring);
void (*emit_mem_sync)(struct amdgpu_ring *ring); void (*emit_mem_sync)(struct amdgpu_ring *ring);
void (*emit_wave_limit)(struct amdgpu_ring *ring, bool enable);
}; };
struct amdgpu_ring { struct amdgpu_ring {
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_gfx.h" #include "amdgpu_gfx.h"
#include "amdgpu_ring.h"
#include "vi.h" #include "vi.h"
#include "vi_structs.h" #include "vi_structs.h"
#include "vid.h" #include "vid.h"
...@@ -6845,6 +6846,19 @@ static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring) ...@@ -6845,6 +6846,19 @@ static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
} }
#define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT 0x07ffffff
static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
{
uint32_t val;
/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
* number of gfx waves. Setting 5 bit will make sure gfx only gets
* around 25% of gpu resources.
*/
val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
}
static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
.name = "gfx_v8_0", .name = "gfx_v8_0",
.early_init = gfx_v8_0_early_init, .early_init = gfx_v8_0_early_init,
...@@ -6928,7 +6942,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { ...@@ -6928,7 +6942,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7 + /* gfx_v8_0_ring_emit_pipeline_sync */ 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
7, /* gfx_v8_0_emit_mem_sync_compute */ 7 + /* gfx_v8_0_emit_mem_sync_compute */
5, /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
.emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */ .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
.emit_ib = gfx_v8_0_ring_emit_ib_compute, .emit_ib = gfx_v8_0_ring_emit_ib_compute,
.emit_fence = gfx_v8_0_ring_emit_fence_compute, .emit_fence = gfx_v8_0_ring_emit_fence_compute,
...@@ -6942,6 +6957,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { ...@@ -6942,6 +6957,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.pad_ib = amdgpu_ring_generic_pad_ib, .pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v8_0_ring_emit_wreg, .emit_wreg = gfx_v8_0_ring_emit_wreg,
.emit_mem_sync = gfx_v8_0_emit_mem_sync_compute, .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
.emit_wave_limit = gfx_v8_0_emit_wave_limit,
}; };
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
......
...@@ -52,6 +52,7 @@ ...@@ -52,6 +52,7 @@
#include "asic_reg/pwr/pwr_10_0_offset.h" #include "asic_reg/pwr/pwr_10_0_offset.h"
#include "asic_reg/pwr/pwr_10_0_sh_mask.h" #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
#include "asic_reg/gc/gc_9_0_default.h"
#define GFX9_NUM_GFX_RINGS 1 #define GFX9_NUM_GFX_RINGS 1
#define GFX9_MEC_HPD_SIZE 4096 #define GFX9_MEC_HPD_SIZE 4096
...@@ -6667,6 +6668,22 @@ static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring) ...@@ -6667,6 +6668,22 @@ static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */ amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
} }
static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
{
struct amdgpu_device *adev = ring->adev;
uint32_t val;
/* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
* number of gfx waves. Setting 5 bit will make sure gfx only gets
* around 25% of gpu resources.
*/
val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
amdgpu_ring_emit_wreg(ring,
SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
val);
}
static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
.name = "gfx_v9_0", .name = "gfx_v9_0",
.early_init = gfx_v9_0_early_init, .early_init = gfx_v9_0_early_init,
...@@ -6756,7 +6773,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { ...@@ -6756,7 +6773,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* gfx_v9_0_ring_emit_vm_flush */ 2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
7, /* gfx_v9_0_emit_mem_sync */ 7 + /* gfx_v9_0_emit_mem_sync */
5, /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */ .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
.emit_ib = gfx_v9_0_ring_emit_ib_compute, .emit_ib = gfx_v9_0_ring_emit_ib_compute,
.emit_fence = gfx_v9_0_ring_emit_fence, .emit_fence = gfx_v9_0_ring_emit_fence,
...@@ -6772,6 +6790,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { ...@@ -6772,6 +6790,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
.emit_mem_sync = gfx_v9_0_emit_mem_sync, .emit_mem_sync = gfx_v9_0_emit_mem_sync,
.emit_wave_limit = gfx_v9_0_emit_wave_limit,
}; };
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment