Commit e923be99 authored by Andrey Grodzovsky's avatar Andrey Grodzovsky

drm/amdgpu: Rework amdgpu_device_lock_adev

This functions needs to be split into 2 parts where
one is called only once for locking single instance of
reset_domain's sem and reset flag and the other part
which handles MP1 states should still be called for
each device in XGMI hive.
Signed-off-by: default avatarAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Link: https://www.spinics.net/lists/amd-gfx/msg74118.html
parent 89a7a870
...@@ -4825,16 +4825,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, ...@@ -4825,16 +4825,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
return r; return r;
} }
static void amdgpu_device_lock_adev(struct amdgpu_device *adev, static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
struct amdgpu_hive_info *hive)
{ {
atomic_set(&adev->reset_domain->in_gpu_reset, 1);
if (hive) {
down_write_nest_lock(&adev->reset_domain->sem, &hive->hive_lock);
} else {
down_write(&adev->reset_domain->sem);
}
switch (amdgpu_asic_reset_method(adev)) { switch (amdgpu_asic_reset_method(adev)) {
case AMD_RESET_METHOD_MODE1: case AMD_RESET_METHOD_MODE1:
...@@ -4849,12 +4841,10 @@ static void amdgpu_device_lock_adev(struct amdgpu_device *adev, ...@@ -4849,12 +4841,10 @@ static void amdgpu_device_lock_adev(struct amdgpu_device *adev,
} }
} }
static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
{ {
amdgpu_vf_error_trans_all(adev); amdgpu_vf_error_trans_all(adev);
adev->mp1_state = PP_MP1_STATE_NONE; adev->mp1_state = PP_MP1_STATE_NONE;
atomic_set(&adev->reset_domain->in_gpu_reset, 0);
up_write(&adev->reset_domain->sem);
} }
static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
...@@ -5060,10 +5050,15 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, ...@@ -5060,10 +5050,15 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
device_list_handle = &device_list; device_list_handle = &device_list;
} }
/* We need to lock reset domain only once both for XGMI and single device */
tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
reset_list);
amdgpu_device_lock_reset_domain(tmp_adev->reset_domain, hive);
/* block all schedulers and reset given job's ring */ /* block all schedulers and reset given job's ring */
list_for_each_entry(tmp_adev, device_list_handle, reset_list) { list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
amdgpu_device_lock_adev(tmp_adev, hive); amdgpu_device_set_mp1_state(tmp_adev);
/* /*
* Try to put the audio codec into suspend state * Try to put the audio codec into suspend state
...@@ -5213,9 +5208,14 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, ...@@ -5213,9 +5208,14 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev,
if (audio_suspended) if (audio_suspended)
amdgpu_device_resume_display_audio(tmp_adev); amdgpu_device_resume_display_audio(tmp_adev);
amdgpu_device_unlock_adev(tmp_adev);
amdgpu_device_unset_mp1_state(tmp_adev);
} }
tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
reset_list);
amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
if (hive) { if (hive) {
mutex_unlock(&hive->hive_lock); mutex_unlock(&hive->hive_lock);
amdgpu_put_xgmi_hive(hive); amdgpu_put_xgmi_hive(hive);
...@@ -5477,7 +5477,8 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta ...@@ -5477,7 +5477,8 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
* Locking adev->reset_domain->sem will prevent any external access * Locking adev->reset_domain->sem will prevent any external access
* to GPU during PCI error recovery * to GPU during PCI error recovery
*/ */
amdgpu_device_lock_adev(adev, NULL); amdgpu_device_lock_reset_domain(adev->reset_domain, NULL);
amdgpu_device_set_mp1_state(adev);
/* /*
* Block any work scheduling as we do for regular GPU reset * Block any work scheduling as we do for regular GPU reset
...@@ -5584,7 +5585,8 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) ...@@ -5584,7 +5585,8 @@ pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
DRM_INFO("PCIe error recovery succeeded\n"); DRM_INFO("PCIe error recovery succeeded\n");
} else { } else {
DRM_ERROR("PCIe error recovery failed, err:%d", r); DRM_ERROR("PCIe error recovery failed, err:%d", r);
amdgpu_device_unlock_adev(adev); amdgpu_device_unset_mp1_state(adev);
amdgpu_device_unlock_reset_domain(adev->reset_domain);
} }
return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
...@@ -5621,7 +5623,8 @@ void amdgpu_pci_resume(struct pci_dev *pdev) ...@@ -5621,7 +5623,8 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
drm_sched_start(&ring->sched, true); drm_sched_start(&ring->sched, true);
} }
amdgpu_device_unlock_adev(adev); amdgpu_device_unset_mp1_state(adev);
amdgpu_device_unlock_reset_domain(adev->reset_domain);
} }
bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
......
...@@ -137,5 +137,24 @@ struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_d ...@@ -137,5 +137,24 @@ struct amdgpu_reset_domain *amdgpu_reset_create_reset_domain(enum amdgpu_reset_d
return reset_domain; return reset_domain;
} }
void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain,
struct amdgpu_hive_info *hive)
{
atomic_set(&reset_domain->in_gpu_reset, 1);
if (hive) {
down_write_nest_lock(&reset_domain->sem, &hive->hive_lock);
} else {
down_write(&reset_domain->sem);
}
}
void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain)
{
atomic_set(&reset_domain->in_gpu_reset, 0);
up_write(&reset_domain->sem);
}
...@@ -118,5 +118,9 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma ...@@ -118,5 +118,9 @@ static inline bool amdgpu_reset_domain_schedule(struct amdgpu_reset_domain *doma
return queue_work(domain->wq, work); return queue_work(domain->wq, work);
} }
void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain,
struct amdgpu_hive_info *hive);
void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment