Commit 59f488be authored by Yang Wang's avatar Yang Wang Committed by Alex Deucher

drm/amdgpu: add ras event state device attribute support

add amdgpu ras 'event_state' sysfs device attribute support
Signed-off-by: default avatarYang Wang <kevinyang.wang@amd.com>
Reviewed-by: default avatarTao Zhou <tao.zhou1@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 1dd34092
...@@ -1731,6 +1731,39 @@ static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev, ...@@ -1731,6 +1731,39 @@ static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev,
return sysfs_emit(buf, "schema: 0x%x\n", con->schema); return sysfs_emit(buf, "schema: 0x%x\n", con->schema);
} }
static struct {
enum ras_event_type type;
const char *name;
} dump_event[] = {
{RAS_EVENT_TYPE_FATAL, "Fatal Error"},
{RAS_EVENT_TYPE_POISON_CREATION, "Poison Creation"},
{RAS_EVENT_TYPE_POISON_CONSUMPTION, "Poison Consumption"},
};
static ssize_t amdgpu_ras_sysfs_event_state_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct amdgpu_ras *con =
container_of(attr, struct amdgpu_ras, event_state_attr);
struct ras_event_manager *event_mgr = con->event_mgr;
struct ras_event_state *event_state;
int i, size = 0;
if (!event_mgr)
return -EINVAL;
size += sysfs_emit_at(buf, size, "current seqno: %llu\n", atomic64_read(&event_mgr->seqno));
for (i = 0; i < ARRAY_SIZE(dump_event); i++) {
event_state = &event_mgr->event_state[dump_event[i].type];
size += sysfs_emit_at(buf, size, "%s: count:%llu, last_seqno:%llu\n",
dump_event[i].name,
atomic64_read(&event_state->count),
event_state->last_seqno);
}
return (ssize_t)size;
}
static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev) static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
{ {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
...@@ -1748,6 +1781,7 @@ static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev) ...@@ -1748,6 +1781,7 @@ static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev)
&con->features_attr.attr, &con->features_attr.attr,
&con->version_attr.attr, &con->version_attr.attr,
&con->schema_attr.attr, &con->schema_attr.attr,
&con->event_state_attr.attr,
NULL NULL
}; };
struct attribute_group group = { struct attribute_group group = {
...@@ -1980,6 +2014,8 @@ static DEVICE_ATTR(version, 0444, ...@@ -1980,6 +2014,8 @@ static DEVICE_ATTR(version, 0444,
amdgpu_ras_sysfs_version_show, NULL); amdgpu_ras_sysfs_version_show, NULL);
static DEVICE_ATTR(schema, 0444, static DEVICE_ATTR(schema, 0444,
amdgpu_ras_sysfs_schema_show, NULL); amdgpu_ras_sysfs_schema_show, NULL);
static DEVICE_ATTR(event_state, 0444,
amdgpu_ras_sysfs_event_state_show, NULL);
static int amdgpu_ras_fs_init(struct amdgpu_device *adev) static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
{ {
struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
...@@ -1990,6 +2026,7 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) ...@@ -1990,6 +2026,7 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
&con->features_attr.attr, &con->features_attr.attr,
&con->version_attr.attr, &con->version_attr.attr,
&con->schema_attr.attr, &con->schema_attr.attr,
&con->event_state_attr.attr,
NULL NULL
}; };
struct bin_attribute *bin_attrs[] = { struct bin_attribute *bin_attrs[] = {
...@@ -2012,6 +2049,10 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) ...@@ -2012,6 +2049,10 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev)
con->schema_attr = dev_attr_schema; con->schema_attr = dev_attr_schema;
sysfs_attr_init(attrs[2]); sysfs_attr_init(attrs[2]);
/* add event_state entry */
con->event_state_attr = dev_attr_event_state;
sysfs_attr_init(attrs[3]);
if (amdgpu_bad_page_threshold != 0) { if (amdgpu_bad_page_threshold != 0) {
/* add bad_page_features entry */ /* add bad_page_features entry */
bin_attr_gpu_vram_bad_pages.private = NULL; bin_attr_gpu_vram_bad_pages.private = NULL;
...@@ -3440,13 +3481,17 @@ static int amdgpu_get_ras_schema(struct amdgpu_device *adev) ...@@ -3440,13 +3481,17 @@ static int amdgpu_get_ras_schema(struct amdgpu_device *adev)
static void ras_event_mgr_init(struct ras_event_manager *mgr) static void ras_event_mgr_init(struct ras_event_manager *mgr)
{ {
struct ras_event_state *event_state;
int i; int i;
memset(mgr, 0, sizeof(*mgr)); memset(mgr, 0, sizeof(*mgr));
atomic64_set(&mgr->seqno, 0); atomic64_set(&mgr->seqno, 0);
for (i = 0; i < ARRAY_SIZE(mgr->last_seqno); i++) for (i = 0; i < ARRAY_SIZE(mgr->event_state); i++) {
mgr->last_seqno[i] = RAS_EVENT_INVALID_ID; event_state = &mgr->event_state[i];
event_state->last_seqno = RAS_EVENT_INVALID_ID;
atomic64_set(&event_state->count, 0);
}
} }
static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev) static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev)
...@@ -3961,6 +4006,7 @@ int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_ ...@@ -3961,6 +4006,7 @@ int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_
const void *caller) const void *caller)
{ {
struct ras_event_manager *event_mgr; struct ras_event_manager *event_mgr;
struct ras_event_state *event_state;
int ret = 0; int ret = 0;
if (type >= RAS_EVENT_TYPE_COUNT) { if (type >= RAS_EVENT_TYPE_COUNT) {
...@@ -3974,7 +4020,9 @@ int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_ ...@@ -3974,7 +4020,9 @@ int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_
goto out; goto out;
} }
event_mgr->last_seqno[type] = atomic64_inc_return(&event_mgr->seqno); event_state = &event_mgr->event_state[type];
event_state->last_seqno = atomic64_inc_return(&event_mgr->seqno);
atomic64_inc(&event_state->count);
out: out:
if (ret && caller) if (ret && caller)
...@@ -4000,7 +4048,7 @@ u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type ...@@ -4000,7 +4048,7 @@ u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type
if (!event_mgr) if (!event_mgr)
return RAS_EVENT_INVALID_ID; return RAS_EVENT_INVALID_ID;
id = event_mgr->last_seqno[type]; id = event_mgr->event_state[type].last_seqno;
break; break;
case RAS_EVENT_TYPE_INVALID: case RAS_EVENT_TYPE_INVALID:
default: default:
......
...@@ -440,9 +440,14 @@ enum ras_event_type { ...@@ -440,9 +440,14 @@ enum ras_event_type {
RAS_EVENT_TYPE_COUNT, RAS_EVENT_TYPE_COUNT,
}; };
struct ras_event_state {
u64 last_seqno;
atomic64_t count;
};
struct ras_event_manager { struct ras_event_manager {
atomic64_t seqno; atomic64_t seqno;
u64 last_seqno[RAS_EVENT_TYPE_COUNT]; struct ras_event_state event_state[RAS_EVENT_TYPE_COUNT];
}; };
struct ras_event_id { struct ras_event_id {
...@@ -496,6 +501,7 @@ struct amdgpu_ras { ...@@ -496,6 +501,7 @@ struct amdgpu_ras {
struct device_attribute features_attr; struct device_attribute features_attr;
struct device_attribute version_attr; struct device_attribute version_attr;
struct device_attribute schema_attr; struct device_attribute schema_attr;
struct device_attribute event_state_attr;
struct bin_attribute badpages_attr; struct bin_attribute badpages_attr;
struct dentry *de_ras_eeprom_table; struct dentry *de_ras_eeprom_table;
/* block array */ /* block array */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment