Commit c0289557 authored by Felix Kuehling's avatar Felix Kuehling Committed by Alex Deucher

drm/amdkfd: Fix mm reference in SVM eviction worker

Use the mm reference from the fence. This allows removing the
svm_bo->svms pointer, which was problematic because we cannot assume
that the struct kfd_process containing the svms is still allocated
without holding a refcount on the process.

Use mmget_not_zero to ensure the mm is still valid, and drop the svm_bo
reference if it isn't.
Signed-off-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: default avatarPhilip Yang <Philip.Yang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent ea8793f2
...@@ -541,7 +541,6 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, ...@@ -541,7 +541,6 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
kfree(svm_bo); kfree(svm_bo);
return -ESRCH; return -ESRCH;
} }
svm_bo->svms = prange->svms;
svm_bo->eviction_fence = svm_bo->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
mm, mm,
...@@ -3273,7 +3272,6 @@ int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) ...@@ -3273,7 +3272,6 @@ int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
static void svm_range_evict_svm_bo_worker(struct work_struct *work) static void svm_range_evict_svm_bo_worker(struct work_struct *work)
{ {
struct svm_range_bo *svm_bo; struct svm_range_bo *svm_bo;
struct kfd_process *p;
struct mm_struct *mm; struct mm_struct *mm;
int r = 0; int r = 0;
...@@ -3281,13 +3279,12 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) ...@@ -3281,13 +3279,12 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
if (!svm_bo_ref_unless_zero(svm_bo)) if (!svm_bo_ref_unless_zero(svm_bo))
return; /* svm_bo was freed while eviction was pending */ return; /* svm_bo was freed while eviction was pending */
/* svm_range_bo_release destroys this worker thread. So during if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
* the lifetime of this thread, kfd_process and mm will be valid. mm = svm_bo->eviction_fence->mm;
*/ } else {
p = container_of(svm_bo->svms, struct kfd_process, svms); svm_range_bo_unref(svm_bo);
mm = p->mm;
if (!mm)
return; return;
}
mmap_read_lock(mm); mmap_read_lock(mm);
spin_lock(&svm_bo->list_lock); spin_lock(&svm_bo->list_lock);
...@@ -3305,8 +3302,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) ...@@ -3305,8 +3302,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
mutex_lock(&prange->migrate_mutex); mutex_lock(&prange->migrate_mutex);
do { do {
r = svm_migrate_vram_to_ram(prange, r = svm_migrate_vram_to_ram(prange, mm,
svm_bo->eviction_fence->mm,
KFD_MIGRATE_TRIGGER_TTM_EVICTION); KFD_MIGRATE_TRIGGER_TTM_EVICTION);
} while (!r && prange->actual_loc && --retries); } while (!r && prange->actual_loc && --retries);
...@@ -3324,6 +3320,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) ...@@ -3324,6 +3320,7 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
} }
spin_unlock(&svm_bo->list_lock); spin_unlock(&svm_bo->list_lock);
mmap_read_unlock(mm); mmap_read_unlock(mm);
mmput(mm);
dma_fence_signal(&svm_bo->eviction_fence->base); dma_fence_signal(&svm_bo->eviction_fence->base);
......
...@@ -46,7 +46,6 @@ struct svm_range_bo { ...@@ -46,7 +46,6 @@ struct svm_range_bo {
spinlock_t list_lock; spinlock_t list_lock;
struct amdgpu_amdkfd_fence *eviction_fence; struct amdgpu_amdkfd_fence *eviction_fence;
struct work_struct eviction_work; struct work_struct eviction_work;
struct svm_range_list *svms;
uint32_t evicting; uint32_t evicting;
struct work_struct release_work; struct work_struct release_work;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment