Commit e74313be authored by YiPeng Chai's avatar YiPeng Chai Committed by Alex Deucher

drm/amdgpu: add condition check for amdgpu_umc_fill_error_record

Add condition check for amdgpu_umc_fill_error_record.
Signed-off-by: default avatarYiPeng Chai <YiPeng.Chai@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 2cf8e50e
...@@ -579,6 +579,7 @@ struct ras_err_data { ...@@ -579,6 +579,7 @@ struct ras_err_data {
unsigned long de_count; unsigned long de_count;
unsigned long err_addr_cnt; unsigned long err_addr_cnt;
struct eeprom_table_record *err_addr; struct eeprom_table_record *err_addr;
unsigned long err_addr_len;
u32 err_list_count; u32 err_list_count;
struct list_head err_node_list; struct list_head err_node_list;
}; };
......
...@@ -66,6 +66,8 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, ...@@ -66,6 +66,8 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
goto out_fini_err_data; goto out_fini_err_data;
} }
err_data.err_addr_len = adev->umc.max_ras_err_cnt_per_query;
/* /*
* Translate UMC channel address to Physical address * Translate UMC channel address to Physical address
*/ */
...@@ -121,6 +123,8 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, ...@@ -121,6 +123,8 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
if(!err_data->err_addr) if(!err_data->err_addr)
dev_warn(adev->dev, "Failed to alloc memory for " dev_warn(adev->dev, "Failed to alloc memory for "
"umc error address record!\n"); "umc error address record!\n");
else
err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query;
/* umc query_ras_error_address is also responsible for clearing /* umc query_ras_error_address is also responsible for clearing
* error status * error status
...@@ -146,6 +150,8 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, ...@@ -146,6 +150,8 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,
if(!err_data->err_addr) if(!err_data->err_addr)
dev_warn(adev->dev, "Failed to alloc memory for " dev_warn(adev->dev, "Failed to alloc memory for "
"umc error address record!\n"); "umc error address record!\n");
else
err_data->err_addr_len = adev->umc.max_ras_err_cnt_per_query;
/* umc query_ras_error_address is also responsible for clearing /* umc query_ras_error_address is also responsible for clearing
* error status * error status
...@@ -389,14 +395,20 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, ...@@ -389,14 +395,20 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
return 0; return 0;
} }
void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, int amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
uint64_t err_addr, uint64_t err_addr,
uint64_t retired_page, uint64_t retired_page,
uint32_t channel_index, uint32_t channel_index,
uint32_t umc_inst) uint32_t umc_inst)
{ {
struct eeprom_table_record *err_rec = struct eeprom_table_record *err_rec;
&err_data->err_addr[err_data->err_addr_cnt];
if (!err_data ||
!err_data->err_addr ||
(err_data->err_addr_cnt >= err_data->err_addr_len))
return -EINVAL;
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
err_rec->address = err_addr; err_rec->address = err_addr;
/* page frame address is saved */ /* page frame address is saved */
...@@ -408,6 +420,8 @@ void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, ...@@ -408,6 +420,8 @@ void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
err_rec->mcumc_id = umc_inst; err_rec->mcumc_id = umc_inst;
err_data->err_addr_cnt++; err_data->err_addr_cnt++;
return 0;
} }
int amdgpu_umc_loop_channels(struct amdgpu_device *adev, int amdgpu_umc_loop_channels(struct amdgpu_device *adev,
......
...@@ -109,7 +109,7 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, ...@@ -109,7 +109,7 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry); struct amdgpu_iv_entry *entry);
void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, int amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
uint64_t err_addr, uint64_t err_addr,
uint64_t retired_page, uint64_t retired_page,
uint32_t channel_index, uint32_t channel_index,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment