Commit b5fd0cf3 authored by Andrey Grodzovsky's avatar Andrey Grodzovsky Committed by Alex Deucher

drm/amdgpu: Add work_struct for GPU reset from kfd.

We need to have a work_struct to cancel this reset if another
already in progress.
Signed-off-by: default avatarAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 2f83658f
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <uapi/linux/kfd_ioctl.h> #include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_ras.h" #include "amdgpu_ras.h"
#include "amdgpu_umc.h" #include "amdgpu_umc.h"
#include "amdgpu_reset.h"
/* Total memory size in system memory and all GPU VRAM. Used to /* Total memory size in system memory and all GPU VRAM. Used to
* estimate worst case amount of memory to reserve for page tables * estimate worst case amount of memory to reserve for page tables
...@@ -122,6 +123,15 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, ...@@ -122,6 +123,15 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
} }
} }
static void amdgpu_amdkfd_reset_work(struct work_struct *work)
{
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
kfd.reset_work);
amdgpu_device_gpu_recover_imp(adev, NULL);
}
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{ {
int i; int i;
...@@ -180,6 +190,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) ...@@ -180,6 +190,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
adev_to_drm(adev), &gpu_resources); adev_to_drm(adev), &gpu_resources);
INIT_WORK(&adev->kfd.reset_work, amdgpu_amdkfd_reset_work);
} }
} }
...@@ -247,7 +259,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) ...@@ -247,7 +259,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev) void amdgpu_amdkfd_gpu_reset(struct amdgpu_device *adev)
{ {
if (amdgpu_device_should_recover_gpu(adev)) if (amdgpu_device_should_recover_gpu(adev))
amdgpu_device_gpu_recover(adev, NULL); amdgpu_reset_domain_schedule(adev->reset_domain,
&adev->kfd.reset_work);
} }
int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size, int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
......
...@@ -97,6 +97,7 @@ struct amdgpu_kfd_dev { ...@@ -97,6 +97,7 @@ struct amdgpu_kfd_dev {
struct kfd_dev *dev; struct kfd_dev *dev;
uint64_t vram_used; uint64_t vram_used;
bool init_complete; bool init_complete;
struct work_struct reset_work;
}; };
enum kgd_engine_type { enum kgd_engine_type {
......
...@@ -5323,37 +5323,6 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, ...@@ -5323,37 +5323,6 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
return r; return r;
} }
struct amdgpu_recover_work_struct {
struct work_struct base;
struct amdgpu_device *adev;
struct amdgpu_job *job;
int ret;
};
static void amdgpu_device_queue_gpu_recover_work(struct work_struct *work)
{
struct amdgpu_recover_work_struct *recover_work = container_of(work, struct amdgpu_recover_work_struct, base);
amdgpu_device_gpu_recover_imp(recover_work->adev, recover_work->job);
}
/*
* Serialize gpu recover into reset domain single threaded wq
*/
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job)
{
struct amdgpu_recover_work_struct work = {.adev = adev, .job = job};
INIT_WORK(&work.base, amdgpu_device_queue_gpu_recover_work);
if (!amdgpu_reset_domain_schedule(adev->reset_domain, &work.base))
return -EAGAIN;
flush_work(&work.base);
return atomic_read(&adev->reset_domain->reset_res);
}
/** /**
* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
* *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment