Commit ffde7210 authored by André Almeida's avatar André Almeida Committed by Alex Deucher

drm/amdgpu: Create an option to disable soft recovery

Create a module option to disable soft recoveries on amdgpu, making
every recovery go through the device reset path. This option makes
easier to force device resets for testing and debugging purposes.
Signed-off-by: default avatarAndré Almeida <andrealmeid@igalia.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarHamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 887db1e4
...@@ -1102,6 +1102,7 @@ struct amdgpu_device { ...@@ -1102,6 +1102,7 @@ struct amdgpu_device {
/* Debug */ /* Debug */
bool debug_vm; bool debug_vm;
bool debug_largebar; bool debug_largebar;
bool debug_disable_soft_recovery;
}; };
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
......
...@@ -124,6 +124,7 @@ ...@@ -124,6 +124,7 @@
enum AMDGPU_DEBUG_MASK { enum AMDGPU_DEBUG_MASK {
AMDGPU_DEBUG_VM = BIT(0), AMDGPU_DEBUG_VM = BIT(0),
AMDGPU_DEBUG_LARGEBAR = BIT(1), AMDGPU_DEBUG_LARGEBAR = BIT(1),
AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY = BIT(2),
}; };
unsigned int amdgpu_vram_limit = UINT_MAX; unsigned int amdgpu_vram_limit = UINT_MAX;
...@@ -945,6 +946,7 @@ MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics ...@@ -945,6 +946,7 @@ MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics
* - 0x2: Enable simulating large-bar capability on non-large bar system. This * - 0x2: Enable simulating large-bar capability on non-large bar system. This
* limits the VRAM size reported to ROCm applications to the visible * limits the VRAM size reported to ROCm applications to the visible
* size, usually 256MB. * size, usually 256MB.
* - 0x4: Disable GPU soft recovery, always do a full reset
*/ */
MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default"); MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444); module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
...@@ -2064,6 +2066,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) ...@@ -2064,6 +2066,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
pr_info("debug: enabled simulating large-bar capability on non-large bar system\n"); pr_info("debug: enabled simulating large-bar capability on non-large bar system\n");
adev->debug_largebar = true; adev->debug_largebar = true;
} }
if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_SOFT_RECOVERY) {
pr_info("debug: soft reset for GPU recovery disabled\n");
adev->debug_disable_soft_recovery = true;
}
} }
static int amdgpu_pci_probe(struct pci_dev *pdev, static int amdgpu_pci_probe(struct pci_dev *pdev,
......
...@@ -434,8 +434,12 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, ...@@ -434,8 +434,12 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
struct dma_fence *fence) struct dma_fence *fence)
{ {
unsigned long flags; unsigned long flags;
ktime_t deadline;
ktime_t deadline = ktime_add_us(ktime_get(), 10000); if (unlikely(ring->adev->debug_disable_soft_recovery))
return false;
deadline = ktime_add_us(ktime_get(), 10000);
if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence) if (amdgpu_sriov_vf(ring->adev) || !ring->funcs->soft_recovery || !fence)
return false; return false;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment