Commit 2ba272d7 authored by Chunming Zhou's avatar Chunming Zhou Committed by Alex Deucher

drm/amdgpu: add pipeline sync while vmid switch in same ctx

Since vmid-mgr supports vmid sharing in one vm, the same ctx could
get different vmids for two emits without vm flush, vm_flush could
be done in another ring.
Signed-off-by: default avatarChunming Zhou <David1.Zhou@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent fdff8cfa
...@@ -799,6 +799,7 @@ struct amdgpu_ring { ...@@ -799,6 +799,7 @@ struct amdgpu_ring {
unsigned cond_exe_offs; unsigned cond_exe_offs;
u64 cond_exe_gpu_addr; u64 cond_exe_gpu_addr;
volatile u32 *cond_exe_cpu_addr; volatile u32 *cond_exe_cpu_addr;
int vmid;
}; };
/* /*
...@@ -936,7 +937,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, ...@@ -936,7 +937,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
unsigned vm_id, uint64_t pd_addr, unsigned vm_id, uint64_t pd_addr,
uint32_t gds_base, uint32_t gds_size, uint32_t gds_base, uint32_t gds_size,
uint32_t gws_base, uint32_t gws_size, uint32_t gws_base, uint32_t gws_size,
uint32_t oa_base, uint32_t oa_size); uint32_t oa_base, uint32_t oa_size,
bool vmid_switch);
void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id); void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
......
...@@ -122,6 +122,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -122,6 +122,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
bool skip_preamble, need_ctx_switch; bool skip_preamble, need_ctx_switch;
unsigned patch_offset = ~0; unsigned patch_offset = ~0;
struct amdgpu_vm *vm; struct amdgpu_vm *vm;
int vmid = 0, old_vmid = ring->vmid;
struct fence *hwf; struct fence *hwf;
uint64_t ctx; uint64_t ctx;
...@@ -135,9 +136,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -135,9 +136,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
if (job) { if (job) {
vm = job->vm; vm = job->vm;
ctx = job->ctx; ctx = job->ctx;
vmid = job->vm_id;
} else { } else {
vm = NULL; vm = NULL;
ctx = 0; ctx = 0;
vmid = 0;
} }
if (!ring->ready) { if (!ring->ready) {
...@@ -163,7 +166,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -163,7 +166,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr, r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr,
job->gds_base, job->gds_size, job->gds_base, job->gds_size,
job->gws_base, job->gws_size, job->gws_base, job->gws_size,
job->oa_base, job->oa_size); job->oa_base, job->oa_size,
(ring->current_ctx == ctx) && (old_vmid != vmid));
if (r) { if (r) {
amdgpu_ring_undo(ring); amdgpu_ring_undo(ring);
return r; return r;
...@@ -180,7 +184,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -180,7 +184,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
need_ctx_switch = ring->current_ctx != ctx; need_ctx_switch = ring->current_ctx != ctx;
for (i = 0; i < num_ibs; ++i) { for (i = 0; i < num_ibs; ++i) {
ib = &ibs[i]; ib = &ibs[i];
/* drop preamble IBs if we don't have a context switch */ /* drop preamble IBs if we don't have a context switch */
if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble) if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
continue; continue;
...@@ -188,6 +191,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -188,6 +191,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
need_ctx_switch); need_ctx_switch);
need_ctx_switch = false; need_ctx_switch = false;
ring->vmid = vmid;
} }
if (ring->funcs->emit_hdp_invalidate) if (ring->funcs->emit_hdp_invalidate)
...@@ -198,6 +202,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -198,6 +202,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
dev_err(adev->dev, "failed to emit fence (%d)\n", r); dev_err(adev->dev, "failed to emit fence (%d)\n", r);
if (job && job->vm_id) if (job && job->vm_id)
amdgpu_vm_reset_id(adev, job->vm_id); amdgpu_vm_reset_id(adev, job->vm_id);
ring->vmid = old_vmid;
amdgpu_ring_undo(ring); amdgpu_ring_undo(ring);
return r; return r;
} }
......
...@@ -298,7 +298,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, ...@@ -298,7 +298,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
unsigned vm_id, uint64_t pd_addr, unsigned vm_id, uint64_t pd_addr,
uint32_t gds_base, uint32_t gds_size, uint32_t gds_base, uint32_t gds_size,
uint32_t gws_base, uint32_t gws_size, uint32_t gws_base, uint32_t gws_size,
uint32_t oa_base, uint32_t oa_size) uint32_t oa_base, uint32_t oa_size,
bool vmid_switch)
{ {
struct amdgpu_device *adev = ring->adev; struct amdgpu_device *adev = ring->adev;
struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id]; struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
...@@ -312,8 +313,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, ...@@ -312,8 +313,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
int r; int r;
if (ring->funcs->emit_pipeline_sync && ( if (ring->funcs->emit_pipeline_sync && (
pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || vmid_switch))
ring->type == AMDGPU_RING_TYPE_COMPUTE))
amdgpu_ring_emit_pipeline_sync(ring); amdgpu_ring_emit_pipeline_sync(ring);
if (ring->funcs->emit_vm_flush && if (ring->funcs->emit_vm_flush &&
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment