Commit a70cb217 authored by Christian König's avatar Christian König Committed by Alex Deucher

drm/amdgpu: rework gmc_v10_0_flush_gpu_tlb v2

Move the SDMA workaround necessary for Navi 1x into a higher layer.

v2: use dev_err
Signed-off-by: default avatarChristian König <christian.koenig@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 8c14a67b
......@@ -582,6 +582,54 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
return 0;
}
void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
{
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
struct dma_fence *fence;
struct amdgpu_job *job;
int r;
if (!hub->sdma_invalidation_workaround || vmid ||
!adev->mman.buffer_funcs_enabled ||
!adev->ib_pool_ready || amdgpu_in_reset(adev) ||
!ring->sched.ready) {
adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub,
flush_type);
return;
}
/* The SDMA on Navi 1x has a bug which can theoretically result in memory
* corruption if an invalidation happens at the same time as an VA
* translation. Avoid this by doing the invalidation from the SDMA
* itself at least for GART.
*/
mutex_lock(&adev->mman.gtt_window_lock);
r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
AMDGPU_FENCE_OWNER_UNDEFINED,
16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
&job);
if (r)
goto error_alloc;
job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
job->vm_needs_flush = true;
job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
fence = amdgpu_job_submit(job);
mutex_unlock(&adev->mman.gtt_window_lock);
dma_fence_wait(fence, false);
dma_fence_put(fence);
return;
error_alloc:
mutex_unlock(&adev->mman.gtt_window_lock);
dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r);
}
/**
* amdgpu_gmc_tmz_set -- check and set if a device supports TMZ
* @adev: amdgpu_device pointer
......
......@@ -117,6 +117,8 @@ struct amdgpu_vmhub {
uint32_t vm_contexts_disable;
bool sdma_invalidation_workaround;
const struct amdgpu_vmhub_funcs *vmhub_funcs;
};
......@@ -335,7 +337,6 @@ struct amdgpu_gmc {
u64 noretry_flags;
};
#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub, inst) \
((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
((adev), (pasid), (type), (allhub), (inst)))
......@@ -401,6 +402,8 @@ int amdgpu_gmc_ras_sw_init(struct amdgpu_device *adev);
int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type);
extern void amdgpu_gmc_tmz_set(struct amdgpu_device *adev);
extern void amdgpu_gmc_noretry_set(struct amdgpu_device *adev);
......
......@@ -471,6 +471,9 @@ static void gfxhub_v2_0_init(struct amdgpu_device *adev)
GCVM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
GCVM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
/* TODO: This is only needed on some Navi 1x revisions */
hub->sdma_invalidation_workaround = true;
hub->vmhub_funcs = &gfxhub_v2_0_vmhub_funcs;
}
......
......@@ -231,20 +231,49 @@ static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(
* by the amdgpu vm/hsa code.
*/
static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
unsigned int vmhub, uint32_t flush_type)
/**
* gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
*
* @adev: amdgpu_device pointer
* @vmid: vm instance to flush
* @vmhub: vmhub type
* @flush_type: the flush type
*
* Flush the TLB for the requested page table.
*/
static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
{
bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
u32 tmp;
/* Use register 17 for GART */
const unsigned int eng = 17;
unsigned int i;
unsigned char hub_ip = 0;
u32 sem, req, ack;
unsigned int i;
u32 tmp;
hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ?
GC_HWIP : MMHUB_HWIP;
sem = hub->vm_inv_eng0_sem + hub->eng_distance * eng;
req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
/* flush hdp cache */
adev->hdp.funcs->flush_hdp(adev, NULL);
/* For SRIOV run time, driver shouldn't access the register through MMIO
* Directly use kiq to do the vm invalidation instead
*/
if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
down_read_trylock(&adev->reset_domain->sem)) {
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
1 << vmid);
up_read(&adev->reset_domain->sem);
return;
}
hub_ip = (vmhub == AMDGPU_GFXHUB(0)) ? GC_HWIP : MMHUB_HWIP;
spin_lock(&adev->gmc.invalidate_lock);
/*
......@@ -258,9 +287,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
if (use_semaphore) {
for (i = 0; i < adev->usec_timeout; i++) {
/* a read return value of 1 means semaphore acuqire */
tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
hub->eng_distance * eng, hub_ip);
tmp = RREG32_RLC_NO_KIQ(sem, hub_ip);
if (tmp & 0x1)
break;
udelay(1);
......@@ -270,9 +297,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n");
}
WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
hub->eng_distance * eng,
inv_req, hub_ip);
WREG32_RLC_NO_KIQ(req, inv_req, hub_ip);
/*
* Issue a dummy read to wait for the ACK register to be cleared
......@@ -280,14 +305,11 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
*/
if ((vmhub == AMDGPU_GFXHUB(0)) &&
(amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 3, 0)))
RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_req +
hub->eng_distance * eng, hub_ip);
RREG32_RLC_NO_KIQ(req, hub_ip);
/* Wait for ACK with a delay.*/
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32_RLC_NO_KIQ(hub->vm_inv_eng0_ack +
hub->eng_distance * eng, hub_ip);
tmp = RREG32_RLC_NO_KIQ(ack, hub_ip);
tmp &= 1 << vmid;
if (tmp)
break;
......@@ -297,109 +319,13 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
if (use_semaphore)
/*
* add semaphore release after invalidation,
* write with 0 means semaphore release
*/
WREG32_RLC_NO_KIQ(hub->vm_inv_eng0_sem +
hub->eng_distance * eng, 0, hub_ip);
WREG32_RLC_NO_KIQ(sem, 0, hub_ip);
spin_unlock(&adev->gmc.invalidate_lock);
if (i < adev->usec_timeout)
return;
DRM_ERROR("Timeout waiting for VM flush hub: %d!\n", vmhub);
}
/**
* gmc_v10_0_flush_gpu_tlb - gart tlb flush callback
*
* @adev: amdgpu_device pointer
* @vmid: vm instance to flush
* @vmhub: vmhub type
* @flush_type: the flush type
*
* Flush the TLB for the requested page table.
*/
static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
{
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct dma_fence *fence;
struct amdgpu_job *job;
int r;
/* flush hdp cache */
adev->hdp.funcs->flush_hdp(adev, NULL);
/* For SRIOV run time, driver shouldn't access the register through MMIO
* Directly use kiq to do the vm invalidation instead
*/
if (adev->gfx.kiq[0].ring.sched.ready && !adev->enable_mes &&
(amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
down_read_trylock(&adev->reset_domain->sem)) {
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
const unsigned int eng = 17;
u32 inv_req = hub->vmhub_funcs->get_invalidate_req(vmid, flush_type);
u32 req = hub->vm_inv_eng0_req + hub->eng_distance * eng;
u32 ack = hub->vm_inv_eng0_ack + hub->eng_distance * eng;
amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
1 << vmid);
up_read(&adev->reset_domain->sem);
return;
}
mutex_lock(&adev->mman.gtt_window_lock);
if (vmhub == AMDGPU_MMHUB0(0)) {
gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB0(0), 0);
mutex_unlock(&adev->mman.gtt_window_lock);
return;
}
BUG_ON(vmhub != AMDGPU_GFXHUB(0));
if (!adev->mman.buffer_funcs_enabled ||
!adev->ib_pool_ready ||
amdgpu_in_reset(adev) ||
ring->sched.ready == false) {
gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_GFXHUB(0), 0);
mutex_unlock(&adev->mman.gtt_window_lock);
return;
}
/* The SDMA on Navi has a bug which can theoretically result in memory
* corruption if an invalidation happens at the same time as an VA
* translation. Avoid this by doing the invalidation from the SDMA
* itself.
*/
r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr,
AMDGPU_FENCE_OWNER_UNDEFINED,
16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
&job);
if (r)
goto error_alloc;
job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
job->vm_needs_flush = true;
job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
fence = amdgpu_job_submit(job);
mutex_unlock(&adev->mman.gtt_window_lock);
dma_fence_wait(fence, false);
dma_fence_put(fence);
return;
error_alloc:
mutex_unlock(&adev->mman.gtt_window_lock);
DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
if (i >= adev->usec_timeout)
dev_err(adev->dev, "Timeout waiting for VM flush hub: %d!\n",
vmhub);
}
/**
......
......@@ -271,7 +271,7 @@ static void gmc_v11_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
if (i < adev->usec_timeout)
return;
DRM_ERROR("Timeout waiting for VM flush ACK!\n");
dev_err(adev->dev, "Timeout waiting for VM flush ACK!\n");
}
/**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment