Commit 8d8ffe37 authored by Shiwu Zhang's avatar Shiwu Zhang Committed by Alex Deucher

drm/amdgpu: expose num_hops and num_links xgmi info through dev attr

Add these two dev attrs for xgmi info details which is helpful for
developers checking the xgmi topology by catting the sys file directly.

Take 4 cards with xgmi connection as an example, get the num_hops for each
device or node through xmig_hive_info dir like,
cat /sys/bus/pci/devices/0000:41:00.0/xgmi_hive_info/node1/num_hops
will return "00 41 41 41" where "00" stands for the hops to node1 itself
and "41" is the hops in hex format to every other node in the same hive.
There are node1/node2/node3/node4 representing 4 cards in the hive.

The same for num_links dev attr.
Signed-off-by: default avatarShiwu Zhang <shiwu.zhang@amd.com>
Acked-by: default avatarLe Ma <le.ma@amd.com>
Reviewed-by: default avatarHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 188d3f80
...@@ -325,6 +325,36 @@ static ssize_t amdgpu_xgmi_show_device_id(struct device *dev, ...@@ -325,6 +325,36 @@ static ssize_t amdgpu_xgmi_show_device_id(struct device *dev,
} }
static ssize_t amdgpu_xgmi_show_num_hops(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
int i;
for (i = 0; i < top->num_nodes; i++)
sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_hops);
return sysfs_emit(buf, "%s\n", buf);
}
static ssize_t amdgpu_xgmi_show_num_links(struct device *dev,
struct device_attribute *attr,
char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
int i;
for (i = 0; i < top->num_nodes; i++)
sprintf(buf + 3 * i, "%02x ", top->nodes[i].num_links);
return sysfs_emit(buf, "%s\n", buf);
}
#define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801) #define AMDGPU_XGMI_SET_FICAA(o) ((o) | 0x456801)
static ssize_t amdgpu_xgmi_show_error(struct device *dev, static ssize_t amdgpu_xgmi_show_error(struct device *dev,
struct device_attribute *attr, struct device_attribute *attr,
...@@ -361,6 +391,8 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev, ...@@ -361,6 +391,8 @@ static ssize_t amdgpu_xgmi_show_error(struct device *dev,
static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL); static DEVICE_ATTR(xgmi_device_id, S_IRUGO, amdgpu_xgmi_show_device_id, NULL);
static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL); static DEVICE_ATTR(xgmi_error, S_IRUGO, amdgpu_xgmi_show_error, NULL);
static DEVICE_ATTR(xgmi_num_hops, S_IRUGO, amdgpu_xgmi_show_num_hops, NULL);
static DEVICE_ATTR(xgmi_num_links, S_IRUGO, amdgpu_xgmi_show_num_links, NULL);
static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
struct amdgpu_hive_info *hive) struct amdgpu_hive_info *hive)
...@@ -380,6 +412,15 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, ...@@ -380,6 +412,15 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
if (ret) if (ret)
pr_err("failed to create xgmi_error\n"); pr_err("failed to create xgmi_error\n");
/* Create xgmi num hops file */
ret = device_create_file(adev->dev, &dev_attr_xgmi_num_hops);
if (ret)
pr_err("failed to create xgmi_num_hops\n");
/* Create xgmi num links file */
ret = device_create_file(adev->dev, &dev_attr_xgmi_num_links);
if (ret)
pr_err("failed to create xgmi_num_links\n");
/* Create sysfs link to hive info folder on the first device */ /* Create sysfs link to hive info folder on the first device */
if (hive->kobj.parent != (&adev->dev->kobj)) { if (hive->kobj.parent != (&adev->dev->kobj)) {
...@@ -407,6 +448,9 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev, ...@@ -407,6 +448,9 @@ static int amdgpu_xgmi_sysfs_add_dev_info(struct amdgpu_device *adev,
remove_file: remove_file:
device_remove_file(adev->dev, &dev_attr_xgmi_device_id); device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
device_remove_file(adev->dev, &dev_attr_xgmi_error);
device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
success: success:
return ret; return ret;
...@@ -420,6 +464,8 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev, ...@@ -420,6 +464,8 @@ static void amdgpu_xgmi_sysfs_rem_dev_info(struct amdgpu_device *adev,
device_remove_file(adev->dev, &dev_attr_xgmi_device_id); device_remove_file(adev->dev, &dev_attr_xgmi_device_id);
device_remove_file(adev->dev, &dev_attr_xgmi_error); device_remove_file(adev->dev, &dev_attr_xgmi_error);
device_remove_file(adev->dev, &dev_attr_xgmi_num_hops);
device_remove_file(adev->dev, &dev_attr_xgmi_num_links);
if (hive->kobj.parent != (&adev->dev->kobj)) if (hive->kobj.parent != (&adev->dev->kobj))
sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info"); sysfs_remove_link(&adev->dev->kobj,"xgmi_hive_info");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment