Commit fdafb359 authored by Evan Quan's avatar Evan Quan Committed by Alex Deucher

drm/amdgpu: fix MGPU fan boost enablement for XGMI reset

MGPU fan boost feature should not be enabled until all the
devices from the same hive are all back from reset.
Signed-off-by: default avatarEvan Quan <evan.quan@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 4b22e7e3
......@@ -1216,6 +1216,10 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev );
static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; }
#endif
void amdgpu_register_gpu_instance(struct amdgpu_device *adev);
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev);
#include "amdgpu_object.h"
/* used by df_v3_6.c and amdgpu_pmu.c */
......
......@@ -3559,6 +3559,12 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
if (vram_lost)
amdgpu_device_fill_reset_magic(tmp_adev);
/*
* Add this ASIC as tracked as reset was already
* complete successfully.
*/
amdgpu_register_gpu_instance(tmp_adev);
r = amdgpu_device_ip_late_init(tmp_adev);
if (r)
goto out;
......@@ -3693,6 +3699,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
device_list_handle = &device_list;
}
/*
* Mark these ASICs to be reseted as untracked first
* And add them back after reset completed
*/
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
amdgpu_unregister_gpu_instance(tmp_adev);
/* block all schedulers and reset given job's ring */
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
......
......@@ -44,7 +44,7 @@
#include "amdgpu_display.h"
#include "amdgpu_ras.h"
static void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
{
struct amdgpu_gpu_instance *gpu_instance;
int i;
......@@ -105,7 +105,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
dev->dev_private = NULL;
}
static void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
{
struct amdgpu_gpu_instance *gpu_instance;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment