Commit 029fbd43 authored by Hawking Zhang's avatar Hawking Zhang Committed by Alex Deucher

drm/amdgpu: initialize ras structures for xgmi block (v2)

init ras common interface and fs node for xgmi block

v2: remove unnecessary physical node number check before
invoking amdgpu_xgmi_ras_late_init
Signed-off-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: default avatarGuchun Chen <guchun.chen@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent acb9acbe
...@@ -123,6 +123,7 @@ struct amdgpu_xgmi { ...@@ -123,6 +123,7 @@ struct amdgpu_xgmi {
/* gpu list in the same hive */ /* gpu list in the same hive */
struct list_head head; struct list_head head;
bool supported; bool supported;
struct ras_common_if *ras_if;
}; };
struct amdgpu_gmc { struct amdgpu_gmc {
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "amdgpu.h" #include "amdgpu.h"
#include "amdgpu_xgmi.h" #include "amdgpu_xgmi.h"
#include "amdgpu_smu.h" #include "amdgpu_smu.h"
#include "amdgpu_ras.h"
#include "df/df_3_6_offset.h" #include "df/df_3_6_offset.h"
static DEFINE_MUTEX(xgmi_mutex); static DEFINE_MUTEX(xgmi_mutex);
...@@ -437,3 +438,38 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev) ...@@ -437,3 +438,38 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
mutex_unlock(&hive->hive_lock); mutex_unlock(&hive->hive_lock);
} }
} }
int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
{
int r;
struct ras_ih_if ih_info = {
.cb = NULL,
};
struct ras_fs_if fs_info = {
.sysfs_name = "xgmi_wafl_err_count",
.debugfs_name = "xgmi_wafl_err_inject",
};
if (!adev->gmc.xgmi.supported ||
adev->gmc.xgmi.num_physical_nodes == 0)
return 0;
if (!adev->gmc.xgmi.ras_if) {
adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
if (!adev->gmc.xgmi.ras_if)
return -ENOMEM;
adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL;
adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
adev->gmc.xgmi.ras_if->sub_block_index = 0;
strcpy(adev->gmc.xgmi.ras_if->name, "xgmi_wafl");
}
ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if;
r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if,
&fs_info, &ih_info);
if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) {
kfree(adev->gmc.xgmi.ras_if);
adev->gmc.xgmi.ras_if = NULL;
}
return r;
}
...@@ -42,6 +42,7 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); ...@@ -42,6 +42,7 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev); struct amdgpu_device *peer_adev);
int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev);
static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
struct amdgpu_device *bo_adev) struct amdgpu_device *bo_adev)
......
...@@ -55,6 +55,7 @@ ...@@ -55,6 +55,7 @@
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
#include "amdgpu_ras.h" #include "amdgpu_ras.h"
#include "amdgpu_xgmi.h"
/* add these here since we already include dce12 headers and these are for DCN */ /* add these here since we already include dce12 headers and these are for DCN */
#define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d #define mmHUBP0_DCSURF_PRI_VIEWPORT_DIMENSION 0x055d
...@@ -808,7 +809,8 @@ static int gmc_v9_0_ecc_late_init(void *handle) ...@@ -808,7 +809,8 @@ static int gmc_v9_0_ecc_late_init(void *handle)
if (r) if (r)
return r; return r;
} }
return 0;
return amdgpu_xgmi_ras_late_init(adev);
} }
static int gmc_v9_0_late_init(void *handle) static int gmc_v9_0_late_init(void *handle)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment