Commit fe1f05df authored by Mukul Joshi's avatar Mukul Joshi Committed by Alex Deucher

drm/amdkfd: Rework kfd_locked handling

Currently, even if kfd_locked is set, a process is first
created and then removed to work around a race condition
in updating kfd_locked flag. Rework kfd_locked handling to
ensure no processes is created if kfd_locked is set. This
is achieved by updating kfd_locked under kfd_processes_mutex.
With this there is no need for kfd_locked to be an atomic
counter. Instead, it can be a regular integer.
Signed-off-by: default avatarMukul Joshi <mukul.joshi@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 6b22ef25
...@@ -146,13 +146,6 @@ static int kfd_open(struct inode *inode, struct file *filep) ...@@ -146,13 +146,6 @@ static int kfd_open(struct inode *inode, struct file *filep)
if (IS_ERR(process)) if (IS_ERR(process))
return PTR_ERR(process); return PTR_ERR(process);
if (kfd_is_locked()) {
dev_dbg(kfd_device, "kfd is locked!\n"
"process %d unreferenced", process->pasid);
kfd_unref_process(process);
return -EAGAIN;
}
/* filep now owns the reference returned by kfd_create_process */ /* filep now owns the reference returned by kfd_create_process */
filep->private_data = process; filep->private_data = process;
......
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
* once locked, kfd driver will stop any further GPU execution. * once locked, kfd driver will stop any further GPU execution.
* create process (open) will return -EAGAIN. * create process (open) will return -EAGAIN.
*/ */
static atomic_t kfd_locked = ATOMIC_INIT(0); static int kfd_locked;
#ifdef CONFIG_DRM_AMDGPU_CIK #ifdef CONFIG_DRM_AMDGPU_CIK
extern const struct kfd2kgd_calls gfx_v7_kfd2kgd; extern const struct kfd2kgd_calls gfx_v7_kfd2kgd;
...@@ -880,7 +880,9 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd) ...@@ -880,7 +880,9 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
return ret; return ret;
} }
atomic_dec(&kfd_locked); mutex_lock(&kfd_processes_mutex);
--kfd_locked;
mutex_unlock(&kfd_processes_mutex);
for (i = 0; i < kfd->num_nodes; i++) { for (i = 0; i < kfd->num_nodes; i++) {
node = kfd->nodes[i]; node = kfd->nodes[i];
...@@ -893,21 +895,27 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd) ...@@ -893,21 +895,27 @@ int kgd2kfd_post_reset(struct kfd_dev *kfd)
bool kfd_is_locked(void) bool kfd_is_locked(void)
{ {
return (atomic_read(&kfd_locked) > 0); lockdep_assert_held(&kfd_processes_mutex);
return (kfd_locked > 0);
} }
void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
{ {
struct kfd_node *node; struct kfd_node *node;
int i; int i;
int count;
if (!kfd->init_complete) if (!kfd->init_complete)
return; return;
/* for runtime suspend, skip locking kfd */ /* for runtime suspend, skip locking kfd */
if (!run_pm) { if (!run_pm) {
mutex_lock(&kfd_processes_mutex);
count = ++kfd_locked;
mutex_unlock(&kfd_processes_mutex);
/* For first KFD device suspend all the KFD processes */ /* For first KFD device suspend all the KFD processes */
if (atomic_inc_return(&kfd_locked) == 1) if (count == 1)
kfd_suspend_all_processes(); kfd_suspend_all_processes();
} }
...@@ -933,7 +941,10 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) ...@@ -933,7 +941,10 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
/* for runtime resume, skip unlocking kfd */ /* for runtime resume, skip unlocking kfd */
if (!run_pm) { if (!run_pm) {
count = atomic_dec_return(&kfd_locked); mutex_lock(&kfd_processes_mutex);
count = --kfd_locked;
mutex_unlock(&kfd_processes_mutex);
WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
if (count == 0) if (count == 0)
ret = kfd_resume_all_processes(); ret = kfd_resume_all_processes();
......
...@@ -201,6 +201,8 @@ extern int amdgpu_no_queue_eviction_on_vm_fault; ...@@ -201,6 +201,8 @@ extern int amdgpu_no_queue_eviction_on_vm_fault;
/* Enable eviction debug messages */ /* Enable eviction debug messages */
extern bool debug_evictions; extern bool debug_evictions;
extern struct mutex kfd_processes_mutex;
enum cache_policy { enum cache_policy {
cache_policy_coherent, cache_policy_coherent,
cache_policy_noncoherent cache_policy_noncoherent
......
...@@ -50,7 +50,7 @@ struct mm_struct; ...@@ -50,7 +50,7 @@ struct mm_struct;
* Unique/indexed by mm_struct* * Unique/indexed by mm_struct*
*/ */
DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
static DEFINE_MUTEX(kfd_processes_mutex); DEFINE_MUTEX(kfd_processes_mutex);
DEFINE_SRCU(kfd_processes_srcu); DEFINE_SRCU(kfd_processes_srcu);
...@@ -818,6 +818,12 @@ struct kfd_process *kfd_create_process(struct file *filep) ...@@ -818,6 +818,12 @@ struct kfd_process *kfd_create_process(struct file *filep)
*/ */
mutex_lock(&kfd_processes_mutex); mutex_lock(&kfd_processes_mutex);
if (kfd_is_locked()) {
mutex_unlock(&kfd_processes_mutex);
pr_debug("KFD is locked! Cannot create process");
return ERR_PTR(-EINVAL);
}
/* A prior open of /dev/kfd could have already created the process. */ /* A prior open of /dev/kfd could have already created the process. */
process = find_process(thread, false); process = find_process(thread, false);
if (process) { if (process) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment