Commit 26bc5340 authored by Andrey Grodzovsky's avatar Andrey Grodzovsky Committed by Alex Deucher

drm/amdgpu: Refactor GPU reset for XGMI hive case

For XGMI hive case do reset in steps where each step iterates over
all devs in hive. This especially important for asic reset
since all PSP FW in hive must come up within a limited time
(around 1 sec) to properply negotiate the link.
Do this by  refactoring  amdgpu_device_gpu_recover and amdgpu_device_reset
into pre_asic_reset, asic_reset and post_asic_reset functions where is part
is exectued for all the GPUs in the hive before going to the next step.

v2: Update names for amdgpu_device_lock/unlock functions.

v3: Introduce per hive locking to avoid multiple resets for GPUs
    in same hive.
v4:
Remove delayed_workqueue()/ttm_bo_unlock_delayed_workqueue() - they
are copy & pasted over from radeon and on amdgpu there isn't
any reason for that any more.
Signed-off-by: default avatarAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent ed2bf522
...@@ -910,6 +910,7 @@ struct amdgpu_device { ...@@ -910,6 +910,7 @@ struct amdgpu_device {
bool in_gpu_reset; bool in_gpu_reset;
struct mutex lock_reset; struct mutex lock_reset;
struct amdgpu_doorbell_index doorbell_index; struct amdgpu_doorbell_index doorbell_index;
int asic_reset_res;
}; };
static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
......
...@@ -3161,86 +3161,6 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev) ...@@ -3161,86 +3161,6 @@ static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
return 0; return 0;
} }
/**
* amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough
*
* @adev: amdgpu device pointer
*
* attempt to do soft-reset or full-reset and reinitialize Asic
* return 0 means succeeded otherwise failed
*/
static int amdgpu_device_reset(struct amdgpu_device *adev)
{
bool need_full_reset, vram_lost = 0;
int r;
need_full_reset = amdgpu_device_ip_need_full_reset(adev);
if (!need_full_reset) {
amdgpu_device_ip_pre_soft_reset(adev);
r = amdgpu_device_ip_soft_reset(adev);
amdgpu_device_ip_post_soft_reset(adev);
if (r || amdgpu_device_ip_check_soft_reset(adev)) {
DRM_INFO("soft reset failed, will fallback to full reset!\n");
need_full_reset = true;
}
}
if (need_full_reset) {
r = amdgpu_device_ip_suspend(adev);
retry:
r = amdgpu_asic_reset(adev);
/* post card */
amdgpu_atom_asic_init(adev->mode_info.atom_context);
if (!r) {
dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
r = amdgpu_device_ip_resume_phase1(adev);
if (r)
goto out;
vram_lost = amdgpu_device_check_vram_lost(adev);
if (vram_lost) {
DRM_ERROR("VRAM is lost!\n");
atomic_inc(&adev->vram_lost_counter);
}
r = amdgpu_gtt_mgr_recover(
&adev->mman.bdev.man[TTM_PL_TT]);
if (r)
goto out;
r = amdgpu_device_fw_loading(adev);
if (r)
return r;
r = amdgpu_device_ip_resume_phase2(adev);
if (r)
goto out;
if (vram_lost)
amdgpu_device_fill_reset_magic(adev);
}
}
out:
if (!r) {
amdgpu_irq_gpu_reset_resume_helper(adev);
r = amdgpu_ib_ring_tests(adev);
if (r) {
dev_err(adev->dev, "ib ring test failed (%d).\n", r);
r = amdgpu_device_ip_suspend(adev);
need_full_reset = true;
goto retry;
}
}
if (!r)
r = amdgpu_device_recover_vram(adev);
return r;
}
/** /**
* amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
...@@ -3339,31 +3259,13 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) ...@@ -3339,31 +3259,13 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
return false; return false;
} }
/**
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
*
* @adev: amdgpu device pointer
* @job: which job trigger hang
*
* Attempt to reset the GPU if it has hung (all asics).
* Returns 0 for success or an error on failure.
*/
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job)
{
int i, r, resched;
dev_info(adev->dev, "GPU reset begin!\n"); static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
struct amdgpu_job *job,
mutex_lock(&adev->lock_reset); bool *need_full_reset_arg)
atomic_inc(&adev->gpu_reset_counter); {
adev->in_gpu_reset = 1; int i, r = 0;
bool need_full_reset = *need_full_reset_arg;
/* Block kfd */
amdgpu_amdkfd_pre_reset(adev);
/* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
/* block all schedulers and reset given job's ring */ /* block all schedulers and reset given job's ring */
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
...@@ -3383,10 +3285,123 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, ...@@ -3383,10 +3285,123 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
amdgpu_fence_driver_force_completion(ring); amdgpu_fence_driver_force_completion(ring);
} }
if (amdgpu_sriov_vf(adev))
r = amdgpu_device_reset_sriov(adev, job ? false : true);
if (!amdgpu_sriov_vf(adev)) {
if (!need_full_reset)
need_full_reset = amdgpu_device_ip_need_full_reset(adev);
if (!need_full_reset) {
amdgpu_device_ip_pre_soft_reset(adev);
r = amdgpu_device_ip_soft_reset(adev);
amdgpu_device_ip_post_soft_reset(adev);
if (r || amdgpu_device_ip_check_soft_reset(adev)) {
DRM_INFO("soft reset failed, will fallback to full reset!\n");
need_full_reset = true;
}
}
if (need_full_reset)
r = amdgpu_device_ip_suspend(adev);
*need_full_reset_arg = need_full_reset;
}
return r;
}
static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
struct list_head *device_list_handle,
bool *need_full_reset_arg)
{
struct amdgpu_device *tmp_adev = NULL;
bool need_full_reset = *need_full_reset_arg, vram_lost = false;
int r = 0;
/*
* ASIC reset has to be done on all HGMI hive nodes ASAP
* to allow proper links negotiation in FW (within 1 sec)
*/
if (need_full_reset) {
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
r = amdgpu_asic_reset(tmp_adev);
if (r)
DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s",
r, tmp_adev->ddev->unique);
}
}
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
if (need_full_reset) {
/* post card */
if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context))
DRM_WARN("asic atom init failed!");
if (!r) {
dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
r = amdgpu_device_ip_resume_phase1(tmp_adev);
if (r)
goto out;
vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
if (vram_lost) {
DRM_ERROR("VRAM is lost!\n");
atomic_inc(&tmp_adev->vram_lost_counter);
}
r = amdgpu_gtt_mgr_recover(
&tmp_adev->mman.bdev.man[TTM_PL_TT]);
if (r)
goto out;
r = amdgpu_device_fw_loading(tmp_adev);
if (r)
return r;
r = amdgpu_device_ip_resume_phase2(tmp_adev);
if (r)
goto out;
if (vram_lost)
amdgpu_device_fill_reset_magic(tmp_adev);
/* Update PSP FW topology after reset */
if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
r = amdgpu_xgmi_update_topology(hive, tmp_adev);
}
}
out:
if (!r) {
amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
r = amdgpu_ib_ring_tests(tmp_adev);
if (r) {
dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
r = amdgpu_device_ip_suspend(tmp_adev);
need_full_reset = true;
r = -EAGAIN;
goto end;
}
}
if (!r)
r = amdgpu_device_recover_vram(tmp_adev);
else else
r = amdgpu_device_reset(adev); tmp_adev->asic_reset_res = r;
}
end:
*need_full_reset_arg = need_full_reset;
return r;
}
static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
struct amdgpu_job *job)
{
int i;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i]; struct amdgpu_ring *ring = adev->rings[i];
...@@ -3398,7 +3413,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, ...@@ -3398,7 +3413,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
* or all rings (in the case @job is NULL) * or all rings (in the case @job is NULL)
* after above amdgpu_reset accomplished * after above amdgpu_reset accomplished
*/ */
if ((!job || job->base.sched == &ring->sched) && !r) if ((!job || job->base.sched == &ring->sched) && !adev->asic_reset_res)
drm_sched_job_recovery(&ring->sched); drm_sched_job_recovery(&ring->sched);
kthread_unpark(ring->sched.thread); kthread_unpark(ring->sched.thread);
...@@ -3408,21 +3423,144 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, ...@@ -3408,21 +3423,144 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
drm_helper_resume_force_mode(adev->ddev); drm_helper_resume_force_mode(adev->ddev);
} }
ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); adev->asic_reset_res = 0;
}
if (r) { static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
/* bad news, how to tell it to userspace ? */ {
dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter)); mutex_lock(&adev->lock_reset);
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); atomic_inc(&adev->gpu_reset_counter);
} else { adev->in_gpu_reset = 1;
dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter)); /* Block kfd */
} amdgpu_amdkfd_pre_reset(adev);
}
static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
{
/*unlock kfd */ /*unlock kfd */
amdgpu_amdkfd_post_reset(adev); amdgpu_amdkfd_post_reset(adev);
amdgpu_vf_error_trans_all(adev); amdgpu_vf_error_trans_all(adev);
adev->in_gpu_reset = 0; adev->in_gpu_reset = 0;
mutex_unlock(&adev->lock_reset); mutex_unlock(&adev->lock_reset);
}
/**
* amdgpu_device_gpu_recover - reset the asic and recover scheduler
*
* @adev: amdgpu device pointer
* @job: which job trigger hang
*
* Attempt to reset the GPU if it has hung (all asics).
* Attempt to do soft-reset or full-reset and reinitialize Asic
* Returns 0 for success or an error on failure.
*/
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job)
{
int r;
struct amdgpu_hive_info *hive = NULL;
bool need_full_reset = false;
struct amdgpu_device *tmp_adev = NULL;
struct list_head device_list, *device_list_handle = NULL;
INIT_LIST_HEAD(&device_list);
dev_info(adev->dev, "GPU reset begin!\n");
/*
* In case of XGMI hive disallow concurrent resets to be triggered
* by different nodes. No point also since the one node already executing
* reset will also reset all the other nodes in the hive.
*/
hive = amdgpu_get_xgmi_hive(adev);
if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
!mutex_trylock(&hive->hive_lock))
return 0;
/* Start with adev pre asic reset first for soft reset check.*/
amdgpu_device_lock_adev(adev);
r = amdgpu_device_pre_asic_reset(adev,
job,
&need_full_reset);
if (r) {
/*TODO Should we stop ?*/
DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
r, adev->ddev->unique);
adev->asic_reset_res = r;
}
/* Build list of devices to reset */
if (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
if (!hive) {
amdgpu_device_unlock_adev(adev);
return -ENODEV;
}
/*
* In case we are in XGMI hive mode device reset is done for all the
* nodes in the hive to retrain all XGMI links and hence the reset
* sequence is executed in loop on all nodes.
*/
device_list_handle = &hive->device_list;
} else {
list_add_tail(&adev->gmc.xgmi.head, &device_list);
device_list_handle = &device_list;
}
retry: /* Rest of adevs pre asic reset from XGMI hive. */
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
if (tmp_adev == adev)
continue;
dev_info(tmp_adev->dev, "GPU reset begin for drm dev %s!\n", adev->ddev->unique);
amdgpu_device_lock_adev(tmp_adev);
r = amdgpu_device_pre_asic_reset(tmp_adev,
NULL,
&need_full_reset);
/*TODO Should we stop ?*/
if (r) {
DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
r, tmp_adev->ddev->unique);
tmp_adev->asic_reset_res = r;
}
}
/* Actual ASIC resets if needed.*/
/* TODO Implement XGMI hive reset logic for SRIOV */
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_device_reset_sriov(adev, job ? false : true);
if (r)
adev->asic_reset_res = r;
} else {
r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset);
if (r && r == -EAGAIN)
goto retry;
}
/* Post ASIC reset for all devs .*/
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL);
if (r) {
/* bad news, how to tell it to userspace ? */
dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
} else {
dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter));
}
amdgpu_device_unlock_adev(tmp_adev);
}
if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
mutex_unlock(&hive->hive_lock);
if (r)
dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
return r; return r;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment