Commit 87cc7f9e authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: load balance VCN3 decode as well v8

Add VCN3 IB parsing to figure out to which instance we can send the
stream for decode.

v2: remove VCN instance limit as well, fix amdgpu_cs_find_mapping,
    check supported formats instead of unsupported.
v3: fix typo and error handling
v4: make sure the message BO is CPU accessible
v5: fix addr calculation once more
v6: only check message buffers
v7: fix constant and use defines
v8: fix create msg calculation
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarSonny Jiang <sonny.jiang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent c62dfdbb
...@@ -50,6 +50,9 @@ ...@@ -50,6 +50,9 @@
#define VCN_INSTANCES_SIENNA_CICHLID 2 #define VCN_INSTANCES_SIENNA_CICHLID 2
#define DEC_SW_RING_ENABLED FALSE #define DEC_SW_RING_ENABLED FALSE
#define RDECODE_MSG_CREATE 0x00000000
#define RDECODE_MESSAGE_CREATE 0x00000001
static int amdgpu_ih_clientid_vcns[] = { static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN, SOC15_IH_CLIENTID_VCN,
SOC15_IH_CLIENTID_VCN1 SOC15_IH_CLIENTID_VCN1
...@@ -208,8 +211,6 @@ static int vcn_v3_0_sw_init(void *handle) ...@@ -208,8 +211,6 @@ static int vcn_v3_0_sw_init(void *handle)
} else { } else {
ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i; ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i;
} }
if (adev->asic_type == CHIP_SIENNA_CICHLID && i != 0)
ring->no_scheduler = true;
sprintf(ring->name, "vcn_dec_%d", i); sprintf(ring->name, "vcn_dec_%d", i);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
AMDGPU_RING_PRIO_DEFAULT, AMDGPU_RING_PRIO_DEFAULT,
...@@ -1847,6 +1848,132 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { ...@@ -1847,6 +1848,132 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = {
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
}; };
static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p)
{
struct drm_gpu_scheduler **scheds;
/* The create msg must be in the first IB submitted */
if (atomic_read(&p->entity->fence_seq))
return -EINVAL;
scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]
[AMDGPU_RING_PRIO_DEFAULT].sched;
drm_sched_entity_modify_sched(p->entity, scheds, 1);
return 0;
}
static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
{
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_bo_va_mapping *map;
uint32_t *msg, num_buffers;
struct amdgpu_bo *bo;
uint64_t start, end;
unsigned int i;
void * ptr;
int r;
addr &= AMDGPU_GMC_HOLE_MASK;
r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
if (r) {
DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
return r;
}
start = map->start * AMDGPU_GPU_PAGE_SIZE;
end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
if (addr & 0x7) {
DRM_ERROR("VCN messages must be 8 byte aligned!\n");
return -EINVAL;
}
bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r) {
DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
return r;
}
r = amdgpu_bo_kmap(bo, &ptr);
if (r) {
DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
return r;
}
msg = ptr + addr - start;
/* Check length */
if (msg[1] > end - addr) {
r = -EINVAL;
goto out;
}
if (msg[3] != RDECODE_MSG_CREATE)
goto out;
num_buffers = msg[2];
for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
uint32_t offset, size, *create;
if (msg[0] != RDECODE_MESSAGE_CREATE)
continue;
offset = msg[1];
size = msg[2];
if (offset + size > end) {
r = -EINVAL;
goto out;
}
create = ptr + addr + offset - start;
/* H246, HEVC and VP9 can run on any instance */
if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
continue;
r = vcn_v3_0_limit_sched(p);
if (r)
goto out;
}
out:
amdgpu_bo_kunmap(bo);
return r;
}
static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
uint32_t ib_idx)
{
struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
uint32_t msg_lo = 0, msg_hi = 0;
unsigned i;
int r;
/* The first instance can decode anything */
if (!ring->me)
return 0;
for (i = 0; i < ib->length_dw; i += 2) {
uint32_t reg = amdgpu_get_ib_value(p, ib_idx, i);
uint32_t val = amdgpu_get_ib_value(p, ib_idx, i + 1);
if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) {
msg_lo = val;
} else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) {
msg_hi = val;
} else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) &&
val == 0) {
r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo);
if (r)
return r;
}
}
return 0;
}
static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_DEC, .type = AMDGPU_RING_TYPE_VCN_DEC,
.align_mask = 0xf, .align_mask = 0xf,
...@@ -1854,6 +1981,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { ...@@ -1854,6 +1981,7 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = {
.get_rptr = vcn_v3_0_dec_ring_get_rptr, .get_rptr = vcn_v3_0_dec_ring_get_rptr,
.get_wptr = vcn_v3_0_dec_ring_get_wptr, .get_wptr = vcn_v3_0_dec_ring_get_wptr,
.set_wptr = vcn_v3_0_dec_ring_set_wptr, .set_wptr = vcn_v3_0_dec_ring_set_wptr,
.patch_cs_in_place = vcn_v3_0_ring_patch_cs_in_place,
.emit_frame_size = .emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment