Commit 3fdcd0a3 authored by YiPeng Chai's avatar YiPeng Chai Committed by Alex Deucher

drm/amdgpu: Prepare for asynchronous processing of umc page retirement

Preparing for asynchronous processing of umc page retirement.
Signed-off-by: default avatarYiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 22f6e3e1
...@@ -2660,6 +2660,25 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, ...@@ -2660,6 +2660,25 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev,
} }
} }
static int amdgpu_ras_page_retirement_thread(void *param)
{
struct amdgpu_device *adev = (struct amdgpu_device *)param;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
while (!kthread_should_stop()) {
wait_event_interruptible(con->page_retirement_wq,
atomic_read(&con->page_retirement_req_cnt));
dev_info(adev->dev, "Start processing page retirement. request:%d\n",
atomic_read(&con->page_retirement_req_cnt));
atomic_dec(&con->page_retirement_req_cnt);
}
return 0;
}
int amdgpu_ras_recovery_init(struct amdgpu_device *adev) int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
{ {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
...@@ -2723,6 +2742,16 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) ...@@ -2723,6 +2742,16 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
} }
} }
mutex_init(&con->page_retirement_lock);
init_waitqueue_head(&con->page_retirement_wq);
atomic_set(&con->page_retirement_req_cnt, 0);
con->page_retirement_thread =
kthread_run(amdgpu_ras_page_retirement_thread, adev, "umc_page_retirement");
if (IS_ERR(con->page_retirement_thread)) {
con->page_retirement_thread = NULL;
dev_warn(adev->dev, "Failed to create umc_page_retirement thread!!!\n");
}
#ifdef CONFIG_X86_MCE_AMD #ifdef CONFIG_X86_MCE_AMD
if ((adev->asic_type == CHIP_ALDEBARAN) && if ((adev->asic_type == CHIP_ALDEBARAN) &&
(adev->gmc.xgmi.connected_to_cpu)) (adev->gmc.xgmi.connected_to_cpu))
...@@ -2758,6 +2787,11 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) ...@@ -2758,6 +2787,11 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev)
if (!data) if (!data)
return 0; return 0;
if (con->page_retirement_thread)
kthread_stop(con->page_retirement_thread);
atomic_set(&con->page_retirement_req_cnt, 0);
cancel_work_sync(&con->recovery_work); cancel_work_sync(&con->recovery_work);
mutex_lock(&con->recovery_lock); mutex_lock(&con->recovery_lock);
......
...@@ -461,6 +461,11 @@ struct amdgpu_ras { ...@@ -461,6 +461,11 @@ struct amdgpu_ras {
/* Record special requirements of gpu reset caller */ /* Record special requirements of gpu reset caller */
uint32_t gpu_reset_flags; uint32_t gpu_reset_flags;
struct task_struct *page_retirement_thread;
wait_queue_head_t page_retirement_wq;
struct mutex page_retirement_lock;
atomic_t page_retirement_req_cnt;
}; };
struct ras_fs_data { struct ras_fs_data {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment