Commit 5e2ddd1e authored by Aharon Landau's avatar Aharon Landau Committed by Jason Gunthorpe

RDMA/counter: Add optional counter support

An optional counter is a driver-specific counter that may be dynamically
enabled/disabled.  This enhancement allows drivers to expose counters
which are, for example, mutually exclusive and cannot be enabled at the
same time, counters that might degrades performance, optional debug
counters, etc.

Optional counters are marked with IB_STAT_FLAG_OPTIONAL flag. They are not
exported in sysfs, and must be at the end of all stats, otherwise the
attr->show() in sysfs would get wrong indexes for hwcounters that are
behind optional counters.

Link: https://lore.kernel.org/r/20211008122439.166063-7-markzhang@nvidia.comSigned-off-by: default avatarAharon Landau <aharonl@nvidia.com>
Signed-off-by: default avatarNeta Ostrovsky <netao@nvidia.com>
Signed-off-by: default avatarLeon Romanovsky <leonro@nvidia.com>
Signed-off-by: default avatarMark Zhang <markzhang@nvidia.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@nvidia.com>
parent 0dc89684
...@@ -106,6 +106,38 @@ static int __rdma_counter_bind_qp(struct rdma_counter *counter, ...@@ -106,6 +106,38 @@ static int __rdma_counter_bind_qp(struct rdma_counter *counter,
return ret; return ret;
} }
int rdma_counter_modify(struct ib_device *dev, u32 port,
unsigned int index, bool enable)
{
struct rdma_hw_stats *stats;
int ret = 0;
if (!dev->ops.modify_hw_stat)
return -EOPNOTSUPP;
stats = ib_get_hw_stats_port(dev, port);
if (!stats || index >= stats->num_counters ||
!(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
return -EINVAL;
mutex_lock(&stats->lock);
if (enable != test_bit(index, stats->is_disabled))
goto out;
ret = dev->ops.modify_hw_stat(dev, port, index, enable);
if (ret)
goto out;
if (enable)
clear_bit(index, stats->is_disabled);
else
set_bit(index, stats->is_disabled);
out:
mutex_unlock(&stats->lock);
return ret;
}
static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port, static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
struct ib_qp *qp, struct ib_qp *qp,
enum rdma_nl_counter_mode mode) enum rdma_nl_counter_mode mode)
......
...@@ -2676,6 +2676,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops) ...@@ -2676,6 +2676,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
SET_DEVICE_OP(dev_ops, modify_cq); SET_DEVICE_OP(dev_ops, modify_cq);
SET_DEVICE_OP(dev_ops, modify_device); SET_DEVICE_OP(dev_ops, modify_device);
SET_DEVICE_OP(dev_ops, modify_flow_action_esp); SET_DEVICE_OP(dev_ops, modify_flow_action_esp);
SET_DEVICE_OP(dev_ops, modify_hw_stat);
SET_DEVICE_OP(dev_ops, modify_port); SET_DEVICE_OP(dev_ops, modify_port);
SET_DEVICE_OP(dev_ops, modify_qp); SET_DEVICE_OP(dev_ops, modify_qp);
SET_DEVICE_OP(dev_ops, modify_srq); SET_DEVICE_OP(dev_ops, modify_srq);
......
...@@ -934,7 +934,8 @@ int ib_setup_device_attrs(struct ib_device *ibdev) ...@@ -934,7 +934,8 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
{ {
struct hw_stats_device_attribute *attr; struct hw_stats_device_attribute *attr;
struct hw_stats_device_data *data; struct hw_stats_device_data *data;
int i, ret; bool opstat_skipped = false;
int i, ret, pos = 0;
data = alloc_hw_stats_device(ibdev); data = alloc_hw_stats_device(ibdev);
if (IS_ERR(data)) { if (IS_ERR(data)) {
...@@ -955,16 +956,23 @@ int ib_setup_device_attrs(struct ib_device *ibdev) ...@@ -955,16 +956,23 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
data->stats->timestamp = jiffies; data->stats->timestamp = jiffies;
for (i = 0; i < data->stats->num_counters; i++) { for (i = 0; i < data->stats->num_counters; i++) {
attr = &data->attrs[i]; if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
opstat_skipped = true;
continue;
}
WARN_ON(opstat_skipped);
attr = &data->attrs[pos];
sysfs_attr_init(&attr->attr.attr); sysfs_attr_init(&attr->attr.attr);
attr->attr.attr.name = data->stats->descs[i].name; attr->attr.attr.name = data->stats->descs[i].name;
attr->attr.attr.mode = 0444; attr->attr.attr.mode = 0444;
attr->attr.show = hw_stat_device_show; attr->attr.show = hw_stat_device_show;
attr->show = show_hw_stats; attr->show = show_hw_stats;
data->group.attrs[i] = &attr->attr.attr; data->group.attrs[pos] = &attr->attr.attr;
pos++;
} }
attr = &data->attrs[i]; attr = &data->attrs[pos];
sysfs_attr_init(&attr->attr.attr); sysfs_attr_init(&attr->attr.attr);
attr->attr.attr.name = "lifespan"; attr->attr.attr.name = "lifespan";
attr->attr.attr.mode = 0644; attr->attr.attr.mode = 0644;
...@@ -972,7 +980,7 @@ int ib_setup_device_attrs(struct ib_device *ibdev) ...@@ -972,7 +980,7 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
attr->show = show_stats_lifespan; attr->show = show_stats_lifespan;
attr->attr.store = hw_stat_device_store; attr->attr.store = hw_stat_device_store;
attr->store = set_stats_lifespan; attr->store = set_stats_lifespan;
data->group.attrs[i] = &attr->attr.attr; data->group.attrs[pos] = &attr->attr.attr;
for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++) for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++)
if (!ibdev->groups[i]) { if (!ibdev->groups[i]) {
ibdev->groups[i] = &data->group; ibdev->groups[i] = &data->group;
...@@ -1027,7 +1035,8 @@ static int setup_hw_port_stats(struct ib_port *port, ...@@ -1027,7 +1035,8 @@ static int setup_hw_port_stats(struct ib_port *port,
{ {
struct hw_stats_port_attribute *attr; struct hw_stats_port_attribute *attr;
struct hw_stats_port_data *data; struct hw_stats_port_data *data;
int i, ret; bool opstat_skipped = false;
int i, ret, pos = 0;
data = alloc_hw_stats_port(port, group); data = alloc_hw_stats_port(port, group);
if (IS_ERR(data)) if (IS_ERR(data))
...@@ -1045,16 +1054,23 @@ static int setup_hw_port_stats(struct ib_port *port, ...@@ -1045,16 +1054,23 @@ static int setup_hw_port_stats(struct ib_port *port,
data->stats->timestamp = jiffies; data->stats->timestamp = jiffies;
for (i = 0; i < data->stats->num_counters; i++) { for (i = 0; i < data->stats->num_counters; i++) {
attr = &data->attrs[i]; if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
opstat_skipped = true;
continue;
}
WARN_ON(opstat_skipped);
attr = &data->attrs[pos];
sysfs_attr_init(&attr->attr.attr); sysfs_attr_init(&attr->attr.attr);
attr->attr.attr.name = data->stats->descs[i].name; attr->attr.attr.name = data->stats->descs[i].name;
attr->attr.attr.mode = 0444; attr->attr.attr.mode = 0444;
attr->attr.show = hw_stat_port_show; attr->attr.show = hw_stat_port_show;
attr->show = show_hw_stats; attr->show = show_hw_stats;
group->attrs[i] = &attr->attr.attr; group->attrs[pos] = &attr->attr.attr;
pos++;
} }
attr = &data->attrs[i]; attr = &data->attrs[pos];
sysfs_attr_init(&attr->attr.attr); sysfs_attr_init(&attr->attr.attr);
attr->attr.attr.name = "lifespan"; attr->attr.attr.name = "lifespan";
attr->attr.attr.mode = 0644; attr->attr.attr.mode = 0644;
...@@ -1062,7 +1078,7 @@ static int setup_hw_port_stats(struct ib_port *port, ...@@ -1062,7 +1078,7 @@ static int setup_hw_port_stats(struct ib_port *port,
attr->show = show_stats_lifespan; attr->show = show_stats_lifespan;
attr->attr.store = hw_stat_port_store; attr->attr.store = hw_stat_port_store;
attr->store = set_stats_lifespan; attr->store = set_stats_lifespan;
group->attrs[i] = &attr->attr.attr; group->attrs[pos] = &attr->attr.attr;
port->hw_stats_data = data; port->hw_stats_data = data;
return 0; return 0;
......
...@@ -545,12 +545,18 @@ enum ib_port_speed { ...@@ -545,12 +545,18 @@ enum ib_port_speed {
IB_SPEED_NDR = 128, IB_SPEED_NDR = 128,
}; };
enum ib_stat_flag {
IB_STAT_FLAG_OPTIONAL = 1 << 0,
};
/** /**
* struct rdma_stat_desc * struct rdma_stat_desc
* @name - The name of the counter * @name - The name of the counter
* @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL
*/ */
struct rdma_stat_desc { struct rdma_stat_desc {
const char *name; const char *name;
unsigned int flags;
}; };
/** /**
...@@ -2562,6 +2568,13 @@ struct ib_device_ops { ...@@ -2562,6 +2568,13 @@ struct ib_device_ops {
int (*get_hw_stats)(struct ib_device *device, int (*get_hw_stats)(struct ib_device *device,
struct rdma_hw_stats *stats, u32 port, int index); struct rdma_hw_stats *stats, u32 port, int index);
/**
* modify_hw_stat - Modify the counter configuration
* @enable: true/false when enable/disable a counter
* Return codes - 0 on success or error code otherwise.
*/
int (*modify_hw_stat)(struct ib_device *device, u32 port,
unsigned int counter_index, bool enable);
/** /**
* Allows rdma drivers to add their own restrack attributes. * Allows rdma drivers to add their own restrack attributes.
*/ */
......
...@@ -63,4 +63,6 @@ int rdma_counter_get_mode(struct ib_device *dev, u32 port, ...@@ -63,4 +63,6 @@ int rdma_counter_get_mode(struct ib_device *dev, u32 port,
enum rdma_nl_counter_mode *mode, enum rdma_nl_counter_mode *mode,
enum rdma_nl_counter_mask *mask); enum rdma_nl_counter_mask *mask);
int rdma_counter_modify(struct ib_device *dev, u32 port,
unsigned int index, bool enable);
#endif /* _RDMA_COUNTER_H_ */ #endif /* _RDMA_COUNTER_H_ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment