Commit df83d1eb authored by Chunming Zhou's avatar Chunming Zhou Committed by Alex Deucher

drm/amdgpu: add sched sync for amdgpu job v2

this is an improvement for previous patch, the sched_sync is to store fence
that could be skipped as scheduled, when job is executed, we didn't need
pipeline_sync if all fences in sched_sync are signalled, otherwise insert
pipeline_sync still.

v2: handle error when adding fence to sync failed.
Signed-off-by: default avatarChunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com> (v1)
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent a022c54e
...@@ -1124,6 +1124,7 @@ struct amdgpu_job { ...@@ -1124,6 +1124,7 @@ struct amdgpu_job {
struct amdgpu_vm *vm; struct amdgpu_vm *vm;
struct amdgpu_ring *ring; struct amdgpu_ring *ring;
struct amdgpu_sync sync; struct amdgpu_sync sync;
struct amdgpu_sync sched_sync;
struct amdgpu_ib *ibs; struct amdgpu_ib *ibs;
struct dma_fence *fence; /* the hw fence */ struct dma_fence *fence; /* the hw fence */
uint32_t preamble_status; uint32_t preamble_status;
......
...@@ -121,6 +121,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -121,6 +121,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib *ib = &ibs[0]; struct amdgpu_ib *ib = &ibs[0];
struct dma_fence *tmp;
bool skip_preamble, need_ctx_switch; bool skip_preamble, need_ctx_switch;
unsigned patch_offset = ~0; unsigned patch_offset = ~0;
struct amdgpu_vm *vm; struct amdgpu_vm *vm;
...@@ -160,8 +161,13 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -160,8 +161,13 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
dev_err(adev->dev, "scheduling IB failed (%d).\n", r); dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
return r; return r;
} }
if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync)
if (ring->funcs->emit_pipeline_sync && job &&
(tmp = amdgpu_sync_get_fence(&job->sched_sync))) {
job->need_pipeline_sync = true;
amdgpu_ring_emit_pipeline_sync(ring); amdgpu_ring_emit_pipeline_sync(ring);
dma_fence_put(tmp);
}
if (vm) { if (vm) {
r = amdgpu_vm_flush(ring, job); r = amdgpu_vm_flush(ring, job);
......
...@@ -60,6 +60,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, ...@@ -60,6 +60,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
(*job)->need_pipeline_sync = false; (*job)->need_pipeline_sync = false;
amdgpu_sync_create(&(*job)->sync); amdgpu_sync_create(&(*job)->sync);
amdgpu_sync_create(&(*job)->sched_sync);
return 0; return 0;
} }
...@@ -98,6 +99,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) ...@@ -98,6 +99,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job)
dma_fence_put(job->fence); dma_fence_put(job->fence);
amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sync);
amdgpu_sync_free(&job->sched_sync);
kfree(job); kfree(job);
} }
...@@ -107,6 +109,7 @@ void amdgpu_job_free(struct amdgpu_job *job) ...@@ -107,6 +109,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
dma_fence_put(job->fence); dma_fence_put(job->fence);
amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->sync);
amdgpu_sync_free(&job->sched_sync);
kfree(job); kfree(job);
} }
...@@ -139,10 +142,10 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) ...@@ -139,10 +142,10 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
struct amdgpu_vm *vm = job->vm; struct amdgpu_vm *vm = job->vm;
struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync); struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync);
int r;
while (fence == NULL && vm && !job->vm_id) { while (fence == NULL && vm && !job->vm_id) {
struct amdgpu_ring *ring = job->ring; struct amdgpu_ring *ring = job->ring;
int r;
r = amdgpu_vm_grab_id(vm, ring, &job->sync, r = amdgpu_vm_grab_id(vm, ring, &job->sync,
&job->base.s_fence->finished, &job->base.s_fence->finished,
...@@ -153,9 +156,11 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) ...@@ -153,9 +156,11 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
fence = amdgpu_sync_get_fence(&job->sync); fence = amdgpu_sync_get_fence(&job->sync);
} }
if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) {
job->need_pipeline_sync = true; r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence);
if (r)
DRM_ERROR("Error adding fence to sync (%d)\n", r);
}
return fence; return fence;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment