Commit fdafb359 authored by Evan Quan's avatar Evan Quan Committed by Alex Deucher

drm/amdgpu: fix MGPU fan boost enablement for XGMI reset

MGPU fan boost feature should not be enabled until all the
devices from the same hive are all back from reset.
Signed-off-by: default avatarEvan Quan <evan.quan@amd.com>
Reviewed-by: default avatarAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 4b22e7e3
...@@ -1216,6 +1216,10 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev ); ...@@ -1216,6 +1216,10 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev );
static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; } static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; }
#endif #endif
void amdgpu_register_gpu_instance(struct amdgpu_device *adev);
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev);
#include "amdgpu_object.h" #include "amdgpu_object.h"
/* used by df_v3_6.c and amdgpu_pmu.c */ /* used by df_v3_6.c and amdgpu_pmu.c */
......
...@@ -3559,6 +3559,12 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, ...@@ -3559,6 +3559,12 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
if (vram_lost) if (vram_lost)
amdgpu_device_fill_reset_magic(tmp_adev); amdgpu_device_fill_reset_magic(tmp_adev);
/*
* Add this ASIC as tracked as reset was already
* complete successfully.
*/
amdgpu_register_gpu_instance(tmp_adev);
r = amdgpu_device_ip_late_init(tmp_adev); r = amdgpu_device_ip_late_init(tmp_adev);
if (r) if (r)
goto out; goto out;
...@@ -3693,6 +3699,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, ...@@ -3693,6 +3699,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
device_list_handle = &device_list; device_list_handle = &device_list;
} }
/*
* Mark these ASICs to be reseted as untracked first
* And add them back after reset completed
*/
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
amdgpu_unregister_gpu_instance(tmp_adev);
/* block all schedulers and reset given job's ring */ /* block all schedulers and reset given job's ring */
list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
......
...@@ -44,7 +44,7 @@ ...@@ -44,7 +44,7 @@
#include "amdgpu_display.h" #include "amdgpu_display.h"
#include "amdgpu_ras.h" #include "amdgpu_ras.h"
static void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
{ {
struct amdgpu_gpu_instance *gpu_instance; struct amdgpu_gpu_instance *gpu_instance;
int i; int i;
...@@ -105,7 +105,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev) ...@@ -105,7 +105,7 @@ void amdgpu_driver_unload_kms(struct drm_device *dev)
dev->dev_private = NULL; dev->dev_private = NULL;
} }
static void amdgpu_register_gpu_instance(struct amdgpu_device *adev) void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
{ {
struct amdgpu_gpu_instance *gpu_instance; struct amdgpu_gpu_instance *gpu_instance;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment