drm/amdgpu: remove redundant GPU reset

Because bad pages saving has been moved to UMC error interrupt callback, which will trigger a new GPU reset after saving. Signed-off-by: Dennis Li <Dennis.Li@amd.com> Reviewed-by: Hawking Zhang <hawking.zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

drm/amdgpu: remove redundant GPU reset
Because bad pages saving has been moved to UMC error interrupt callback, which will trigger a new GPU reset after saving. Signed-off-by: Dennis Li <Dennis.Li@amd.com> Reviewed-by: Hawking Zhang <hawking.zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
5eeb4593 · Dennis Li · Alex Deucher · 22503d80 · 5eeb4593 · 5eeb4593
Commit 5eeb4593 authored Oct 19, 2020 by Dennis Li Committed by Alex Deucher Oct 30, 2020
Showing with 1 addition and 25 deletions

drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +1 -9

drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +0 -16

No files found.
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -33,7 +33,6 @@

 #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS		(0x1 << 0)
 #define AMDGPU_RAS_FLAG_INIT_NEED_RESET		(0x1 << 1)
-#define AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV	(0x1 << 2)

 enum amdgpu_ras_block {
 	AMDGPU_RAS_BLOCK__UMC = 0,
@@ -513,14 +512,7 @@ static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);

-	/*
-	 * Save bad page to eeprom before gpu reset, i2c may be unstable
-	 * in gpu reset.
-	 *
-	 * Also, exclude the case when ras recovery issuer is
-	 * eeprom page write itself.
-	 */
-	if (!(ras->flags & AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV) && in_task())
+	if (in_task())
 		amdgpu_ras_reserve_bad_pages(adev);

 	if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0)

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -479,7 +479,6 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
 	int i, ret = 0;
 	struct i2c_msg *msgs, *msg;
 	unsigned char *buffs, *buff;
-	bool sched_ras_recovery = false;
 	struct eeprom_table_record *record;
 	struct amdgpu_device *adev = to_amdgpu_device(control);
 	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
@@ -517,7 +516,6 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
 			"Saved bad pages(%d) reaches threshold value(%d).\n",
 			control->num_recs + num, ras->bad_page_cnt_threshold);
 		control->tbl_hdr.header = EEPROM_TABLE_HDR_BAD;
-		sched_ras_recovery = true;
 	}

 	/* In case of overflow just start from beginning to not lose newest records */
@@ -603,20 +601,6 @@ int amdgpu_ras_eeprom_process_recods(struct amdgpu_ras_eeprom_control *control,
 		__update_tbl_checksum(control, records, num, old_hdr_byte_sum);

 		__update_table_header(control, buffs);
-
-		if (sched_ras_recovery) {
-			/*
-			 * Before scheduling ras recovery, assert the related
-			 * flag first, which shall bypass common bad page
-			 * reservation execution in amdgpu_ras_reset_gpu.
-			 */
-			amdgpu_ras_get_context(adev)->flags |=
-				AMDGPU_RAS_FLAG_SKIP_BAD_PAGE_RESV;
-
-			dev_warn(adev->dev, "Conduct ras recovery due to bad "
-				"page threshold reached.\n");
-			amdgpu_ras_reset_gpu(adev);
-		}
 	} else if (!__validate_tbl_checksum(control, records, num)) {
 		DRM_WARN("EEPROM Table checksum mismatch!");
 		/* TODO Uncomment when EEPROM read/write is relliable */