Commit fffe347e authored by Alex Deucher's avatar Alex Deucher

drm/amdgpu: cleanup MES12 command submission

The approach of having a separate WB slot for each submission doesn't
really work well and for example breaks GPU reset.

Use a status query packet for the fence update instead since those
should always succeed we can use the fence of the original packet to
signal the state of the operation.

While at it cleanup the coding style.

Fixes: ade887c6 ("drm/amdgpu/mes12: Use a separate fence per transaction")
Reviewed-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Suggested-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 3af2c80a
...@@ -144,18 +144,18 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, ...@@ -144,18 +144,18 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
void *pkt, int size, void *pkt, int size,
int api_status_off) int api_status_off)
{ {
int ndw = size / 4; union MESAPI__QUERY_MES_STATUS mes_status_pkt;
signed long r; signed long timeout = 3000000; /* 3000 ms */
union MESAPI__MISC *x_pkt = pkt;
struct MES_API_STATUS *api_status;
struct amdgpu_device *adev = mes->adev; struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring; struct amdgpu_ring *ring = &mes->ring;
unsigned long flags; struct MES_API_STATUS *api_status;
union MESAPI__MISC *x_pkt = pkt;
const char *op_str, *misc_op_str; const char *op_str, *misc_op_str;
signed long timeout = 3000000; /* 3000 ms */ unsigned long flags;
u32 fence_offset; u64 status_gpu_addr;
u64 fence_gpu_addr; u32 status_offset;
u64 *fence_ptr; u64 *status_ptr;
signed long r;
int ret; int ret;
if (x_pkt->header.opcode >= MES_SCH_API_MAX) if (x_pkt->header.opcode >= MES_SCH_API_MAX)
...@@ -167,28 +167,38 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, ...@@ -167,28 +167,38 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
/* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */ /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
timeout = 15 * 600 * 1000; timeout = 15 * 600 * 1000;
} }
BUG_ON(size % 4 != 0);
ret = amdgpu_device_wb_get(adev, &fence_offset); ret = amdgpu_device_wb_get(adev, &status_offset);
if (ret) if (ret)
return ret; return ret;
fence_gpu_addr =
adev->wb.gpu_addr + (fence_offset * 4); status_gpu_addr = adev->wb.gpu_addr + (status_offset * 4);
fence_ptr = (u64 *)&adev->wb.wb[fence_offset]; status_ptr = (u64 *)&adev->wb.wb[status_offset];
*fence_ptr = 0; *status_ptr = 0;
spin_lock_irqsave(&mes->ring_lock, flags); spin_lock_irqsave(&mes->ring_lock, flags);
if (amdgpu_ring_alloc(ring, ndw)) { r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4);
spin_unlock_irqrestore(&mes->ring_lock, flags); if (r)
amdgpu_device_wb_free(adev, fence_offset); goto error_unlock_free;
return -ENOMEM;
}
api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
api_status->api_completion_fence_addr = fence_gpu_addr; api_status->api_completion_fence_addr = status_gpu_addr;
api_status->api_completion_fence_value = 1; api_status->api_completion_fence_value = 1;
amdgpu_ring_write_multiple(ring, pkt, ndw); amdgpu_ring_write_multiple(ring, pkt, size / 4);
memset(&mes_status_pkt, 0, sizeof(mes_status_pkt));
mes_status_pkt.header.type = MES_API_TYPE_SCHEDULER;
mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_status_pkt.api_status.api_completion_fence_addr =
ring->fence_drv.gpu_addr;
mes_status_pkt.api_status.api_completion_fence_value =
++ring->fence_drv.sync_seq;
amdgpu_ring_write_multiple(ring, &mes_status_pkt,
sizeof(mes_status_pkt) / 4);
amdgpu_ring_commit(ring); amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&mes->ring_lock, flags); spin_unlock_irqrestore(&mes->ring_lock, flags);
...@@ -196,16 +206,17 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, ...@@ -196,16 +206,17 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
misc_op_str = mes_v12_0_get_misc_op_string(x_pkt); misc_op_str = mes_v12_0_get_misc_op_string(x_pkt);
if (misc_op_str) if (misc_op_str)
dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, misc_op_str); dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str,
misc_op_str);
else if (op_str) else if (op_str)
dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str);
else else
dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); dev_dbg(adev->dev, "MES msg=%d was emitted\n",
x_pkt->header.opcode);
r = amdgpu_mes_fence_wait_polling(fence_ptr, (u64)1, timeout); r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout);
amdgpu_device_wb_free(adev, fence_offset); if (r < 1 || !*status_ptr) {
if (r < 1) {
if (misc_op_str) if (misc_op_str)
dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n",
op_str, misc_op_str); op_str, misc_op_str);
...@@ -219,10 +230,19 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, ...@@ -219,10 +230,19 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
while (halt_if_hws_hang) while (halt_if_hws_hang)
schedule(); schedule();
return -ETIMEDOUT; r = -ETIMEDOUT;
goto error_wb_free;
} }
amdgpu_device_wb_free(adev, status_offset);
return 0; return 0;
error_unlock_free:
spin_unlock_irqrestore(&mes->ring_lock, flags);
error_wb_free:
amdgpu_device_wb_free(adev, status_offset);
return r;
} }
static int convert_to_mes_queue_type(int queue_type) static int convert_to_mes_queue_type(int queue_type)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment