Commit 97c002be authored by Alex Deucher's avatar Alex Deucher

drm/amdgpu: enable BACO reset for SMU7 based dGPUs (v2)

Use BACO to reset the GPU if supported on SMU7 based
dGPUs.

v2: don't use baco on CI parts
Reviewed-by: default avatarEvan Quan <evan.quan@amd.com>
Acked-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 2a113c74
...@@ -1270,15 +1270,15 @@ static int cik_gpu_pci_config_reset(struct amdgpu_device *adev) ...@@ -1270,15 +1270,15 @@ static int cik_gpu_pci_config_reset(struct amdgpu_device *adev)
} }
/** /**
* cik_asic_reset - soft reset GPU * cik_asic_pci_config_reset - soft reset GPU
* *
* @adev: amdgpu_device pointer * @adev: amdgpu_device pointer
* *
* Look up which blocks are hung and attempt * Use PCI Config method to reset the GPU.
* to reset them. *
* Returns 0 for success. * Returns 0 for success.
*/ */
static int cik_asic_reset(struct amdgpu_device *adev) static int cik_asic_pci_config_reset(struct amdgpu_device *adev)
{ {
int r; int r;
...@@ -1294,7 +1294,45 @@ static int cik_asic_reset(struct amdgpu_device *adev) ...@@ -1294,7 +1294,45 @@ static int cik_asic_reset(struct amdgpu_device *adev)
static enum amd_reset_method static enum amd_reset_method
cik_asic_reset_method(struct amdgpu_device *adev) cik_asic_reset_method(struct amdgpu_device *adev)
{ {
return AMD_RESET_METHOD_LEGACY; bool baco_reset;
switch (adev->asic_type) {
case CHIP_BONAIRE:
case CHIP_HAWAII:
/* disable baco reset until it works */
/* smu7_asic_get_baco_capability(adev, &baco_reset); */
baco_reset = false;
break;
default:
baco_reset = false;
break;
}
if (baco_reset)
return AMD_RESET_METHOD_BACO;
else
return AMD_RESET_METHOD_LEGACY;
}
/**
* cik_asic_reset - soft reset GPU
*
* @adev: amdgpu_device pointer
*
* Look up which blocks are hung and attempt
* to reset them.
* Returns 0 for success.
*/
static int cik_asic_reset(struct amdgpu_device *adev)
{
int r;
if (cik_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
r = smu7_asic_baco_reset(adev);
else
r = cik_asic_pci_config_reset(adev);
return r;
} }
static u32 cik_get_config_memsize(struct amdgpu_device *adev) static u32 cik_get_config_memsize(struct amdgpu_device *adev)
......
...@@ -31,4 +31,7 @@ void cik_srbm_select(struct amdgpu_device *adev, ...@@ -31,4 +31,7 @@ void cik_srbm_select(struct amdgpu_device *adev,
int cik_set_ip_blocks(struct amdgpu_device *adev); int cik_set_ip_blocks(struct amdgpu_device *adev);
void legacy_doorbell_index_init(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev);
int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap);
int smu7_asic_baco_reset(struct amdgpu_device *adev);
#endif #endif
...@@ -689,16 +689,50 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev) ...@@ -689,16 +689,50 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev)
return -EINVAL; return -EINVAL;
} }
int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap)
{
void *pp_handle = adev->powerplay.pp_handle;
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
if (!pp_funcs || !pp_funcs->get_asic_baco_capability) {
*cap = false;
return -ENOENT;
}
return pp_funcs->get_asic_baco_capability(pp_handle, cap);
}
int smu7_asic_baco_reset(struct amdgpu_device *adev)
{
void *pp_handle = adev->powerplay.pp_handle;
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
if (!pp_funcs ||!pp_funcs->get_asic_baco_state ||!pp_funcs->set_asic_baco_state)
return -ENOENT;
/* enter BACO state */
if (pp_funcs->set_asic_baco_state(pp_handle, 1))
return -EIO;
/* exit BACO state */
if (pp_funcs->set_asic_baco_state(pp_handle, 0))
return -EIO;
dev_info(adev->dev, "GPU BACO reset\n");
return 0;
}
/** /**
* vi_asic_reset - soft reset GPU * vi_asic_pci_config_reset - soft reset GPU
* *
* @adev: amdgpu_device pointer * @adev: amdgpu_device pointer
* *
* Look up which blocks are hung and attempt * Use PCI Config method to reset the GPU.
* to reset them. *
* Returns 0 for success. * Returns 0 for success.
*/ */
static int vi_asic_reset(struct amdgpu_device *adev) static int vi_asic_pci_config_reset(struct amdgpu_device *adev)
{ {
int r; int r;
...@@ -714,7 +748,47 @@ static int vi_asic_reset(struct amdgpu_device *adev) ...@@ -714,7 +748,47 @@ static int vi_asic_reset(struct amdgpu_device *adev)
static enum amd_reset_method static enum amd_reset_method
vi_asic_reset_method(struct amdgpu_device *adev) vi_asic_reset_method(struct amdgpu_device *adev)
{ {
return AMD_RESET_METHOD_LEGACY; bool baco_reset;
switch (adev->asic_type) {
case CHIP_FIJI:
case CHIP_TONGA:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_TOPAZ:
smu7_asic_get_baco_capability(adev, &baco_reset);
break;
default:
baco_reset = false;
break;
}
if (baco_reset)
return AMD_RESET_METHOD_BACO;
else
return AMD_RESET_METHOD_LEGACY;
}
/**
* vi_asic_reset - soft reset GPU
*
* @adev: amdgpu_device pointer
*
* Look up which blocks are hung and attempt
* to reset them.
* Returns 0 for success.
*/
static int vi_asic_reset(struct amdgpu_device *adev)
{
int r;
if (vi_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)
r = smu7_asic_baco_reset(adev);
else
r = vi_asic_pci_config_reset(adev);
return r;
} }
static u32 vi_get_config_memsize(struct amdgpu_device *adev) static u32 vi_get_config_memsize(struct amdgpu_device *adev)
......
...@@ -31,4 +31,7 @@ void vi_srbm_select(struct amdgpu_device *adev, ...@@ -31,4 +31,7 @@ void vi_srbm_select(struct amdgpu_device *adev,
int vi_set_ip_blocks(struct amdgpu_device *adev); int vi_set_ip_blocks(struct amdgpu_device *adev);
void legacy_doorbell_index_init(struct amdgpu_device *adev); void legacy_doorbell_index_init(struct amdgpu_device *adev);
int smu7_asic_get_baco_capability(struct amdgpu_device *adev, bool *cap);
int smu7_asic_baco_reset(struct amdgpu_device *adev);
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment