Commit 0fe132ea authored by Dmytro Linkin's avatar Dmytro Linkin Committed by Saeed Mahameed

net/mlx5: E-switch, Allow to add vports to rate groups

Implement eswitch API that allows updating rate groups. If group
pointer is NULL, then move the vport to internal unlimited group zero.

Implement devlink_ops->rate_parent_node_set() callback in the terms of
the new eswitch group update API.

Enable QoS for all group's elements if a group has allocated BW share.
Co-developed-by: default avatarVlad Buslov <vladbu@nvidia.com>
Signed-off-by: default avatarVlad Buslov <vladbu@nvidia.com>
Signed-off-by: default avatarDmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: default avatarHuy Nguyen <huyn@nvidia.com>
Reviewed-by: default avatarMark Bloch <mbloch@nvidia.com>
Reviewed-by: default avatarParav Pandit <parav@nvidia.com>
Reviewed-by: default avatarSaeed Mahameed <saeedm@nvidia.com>
parent f47e04eb
...@@ -299,6 +299,7 @@ static const struct devlink_ops mlx5_devlink_ops = { ...@@ -299,6 +299,7 @@ static const struct devlink_ops mlx5_devlink_ops = {
.rate_node_tx_max_set = mlx5_esw_devlink_rate_node_tx_max_set, .rate_node_tx_max_set = mlx5_esw_devlink_rate_node_tx_max_set,
.rate_node_new = mlx5_esw_devlink_rate_node_new, .rate_node_new = mlx5_esw_devlink_rate_node_new,
.rate_node_del = mlx5_esw_devlink_rate_node_del, .rate_node_del = mlx5_esw_devlink_rate_node_del,
.rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set,
#endif #endif
#ifdef CONFIG_MLX5_SF_MANAGER #ifdef CONFIG_MLX5_SF_MANAGER
.port_new = mlx5_devlink_sf_port_new, .port_new = mlx5_devlink_sf_port_new,
......
...@@ -116,8 +116,10 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo ...@@ -116,8 +116,10 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo
if (IS_ERR(vport)) if (IS_ERR(vport))
return; return;
if (vport->dl_port->devlink_rate) if (vport->dl_port->devlink_rate) {
mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL);
devlink_rate_leaf_destroy(vport->dl_port); devlink_rate_leaf_destroy(vport->dl_port);
}
devlink_port_unregister(vport->dl_port); devlink_port_unregister(vport->dl_port);
mlx5_esw_dl_port_free(vport->dl_port); mlx5_esw_dl_port_free(vport->dl_port);
...@@ -178,8 +180,10 @@ void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num ...@@ -178,8 +180,10 @@ void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num
if (IS_ERR(vport)) if (IS_ERR(vport))
return; return;
if (vport->dl_port->devlink_rate) if (vport->dl_port->devlink_rate) {
mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL);
devlink_rate_leaf_destroy(vport->dl_port); devlink_rate_leaf_destroy(vport->dl_port);
}
devlink_port_unregister(vport->dl_port); devlink_port_unregister(vport->dl_port);
vport->dl_port = NULL; vport->dl_port = NULL;
......
...@@ -63,20 +63,23 @@ static int esw_qos_vport_config(struct mlx5_eswitch *esw, ...@@ -63,20 +63,23 @@ static int esw_qos_vport_config(struct mlx5_eswitch *esw,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
struct mlx5_esw_rate_group *group = vport->qos.group;
struct mlx5_core_dev *dev = esw->dev; struct mlx5_core_dev *dev = esw->dev;
u32 parent_tsar_ix;
void *vport_elem; void *vport_elem;
int err; int err;
if (!vport->qos.enabled) if (!vport->qos.enabled)
return -EIO; return -EIO;
parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
MLX5_SET(scheduling_context, sched_ctx, element_type, MLX5_SET(scheduling_context, sched_ctx, element_type,
SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
element_attributes); element_attributes);
MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
err = esw_qos_tsar_config(dev, sched_ctx, esw->qos.root_tsar_ix, vport->qos.esw_tsar_ix, err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix,
max_rate, bw_share); max_rate, bw_share);
if (err) { if (err) {
esw_warn(esw->dev, esw_warn(esw->dev,
...@@ -109,7 +112,7 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, ...@@ -109,7 +112,7 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
} else { } else {
mlx5_esw_for_each_vport(esw, i, evport) { mlx5_esw_for_each_vport(esw, i, evport) {
if (!evport->enabled || !evport->qos.enabled || if (!evport->enabled || !evport->qos.enabled ||
evport->qos.min_rate < max_guarantee) evport->qos.group != group || evport->qos.min_rate < max_guarantee)
continue; continue;
max_guarantee = evport->qos.min_rate; max_guarantee = evport->qos.min_rate;
} }
...@@ -117,6 +120,12 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, ...@@ -117,6 +120,12 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
if (max_guarantee) if (max_guarantee)
return max_t(u32, max_guarantee / fw_max_bw_share, 1); return max_t(u32, max_guarantee / fw_max_bw_share, 1);
/* If vports min rate divider is 0 but their group has bw_share configured, then
* need to set bw_share for vports to minimal value.
*/
if (!group_level && !max_guarantee && group->bw_share)
return 1;
return 0; return 0;
} }
...@@ -140,7 +149,7 @@ static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw, ...@@ -140,7 +149,7 @@ static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
int err; int err;
mlx5_esw_for_each_vport(esw, i, evport) { mlx5_esw_for_each_vport(esw, i, evport) {
if (!evport->enabled || !evport->qos.enabled) if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
continue; continue;
bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share); bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
...@@ -176,6 +185,14 @@ static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divid ...@@ -176,6 +185,14 @@ static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divid
return err; return err;
group->bw_share = bw_share; group->bw_share = bw_share;
/* All the group's vports need to be set with default bw_share
* to enable them with QOS
*/
err = esw_qos_normalize_vports_min_rate(esw, group, extack);
if (err)
return err;
} }
return 0; return 0;
...@@ -201,7 +218,7 @@ int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, ...@@ -201,7 +218,7 @@ int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw,
previous_min_rate = evport->qos.min_rate; previous_min_rate = evport->qos.min_rate;
evport->qos.min_rate = min_rate; evport->qos.min_rate = min_rate;
err = esw_qos_normalize_vports_min_rate(esw, NULL, extack); err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
if (err) if (err)
evport->qos.min_rate = previous_min_rate; evport->qos.min_rate = previous_min_rate;
...@@ -213,6 +230,7 @@ int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, ...@@ -213,6 +230,7 @@ int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
u32 max_rate, u32 max_rate,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
u32 act_max_rate = max_rate;
bool max_rate_supported; bool max_rate_supported;
int err; int err;
...@@ -224,7 +242,13 @@ int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, ...@@ -224,7 +242,13 @@ int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
if (max_rate == evport->qos.max_rate) if (max_rate == evport->qos.max_rate)
return 0; return 0;
err = esw_qos_vport_config(esw, evport, max_rate, evport->qos.bw_share, extack); /* If parent group has rate limit need to set to group
* value when new max rate is 0.
*/
if (evport->qos.group && !max_rate)
act_max_rate = evport->qos.group->max_rate;
err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
if (!err) if (!err)
evport->qos.max_rate = max_rate; evport->qos.max_rate = max_rate;
...@@ -267,6 +291,8 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, ...@@ -267,6 +291,8 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
struct mlx5_esw_rate_group *group, struct mlx5_esw_rate_group *group,
u32 max_rate, struct netlink_ext_ack *extack) u32 max_rate, struct netlink_ext_ack *extack)
{ {
struct mlx5_vport *vport;
unsigned long i;
int err; int err;
if (group->max_rate == max_rate) if (group->max_rate == max_rate)
...@@ -278,9 +304,127 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, ...@@ -278,9 +304,127 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
group->max_rate = max_rate; group->max_rate = max_rate;
/* Any unlimited vports in the group should be set
* with the value of the group.
*/
mlx5_esw_for_each_vport(esw, i, vport) {
if (!vport->enabled || !vport->qos.enabled ||
vport->qos.group != group || vport->qos.max_rate)
continue;
err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
if (err)
NL_SET_ERR_MSG_MOD(extack,
"E-Switch vport implicit rate limit setting failed");
}
return err;
}
static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
struct mlx5_vport *vport,
u32 max_rate, u32 bw_share)
{
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
struct mlx5_esw_rate_group *group = vport->qos.group;
struct mlx5_core_dev *dev = esw->dev;
u32 parent_tsar_ix;
void *vport_elem;
int err;
parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
MLX5_SET(scheduling_context, sched_ctx, element_type,
SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
err = mlx5_create_scheduling_element_cmd(dev,
SCHEDULING_HIERARCHY_E_SWITCH,
sched_ctx,
&vport->qos.esw_tsar_ix);
if (err) {
esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
vport->vport, err);
return err;
}
return 0;
}
static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
struct mlx5_vport *vport,
struct mlx5_esw_rate_group *curr_group,
struct mlx5_esw_rate_group *new_group,
struct netlink_ext_ack *extack)
{
u32 max_rate;
int err;
err = mlx5_destroy_scheduling_element_cmd(esw->dev,
SCHEDULING_HIERARCHY_E_SWITCH,
vport->qos.esw_tsar_ix);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
return err;
}
vport->qos.group = new_group;
max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
/* If vport is unlimited, we set the group's value.
* Therefore, if the group is limited it will apply to
* the vport as well and if not, vport will remain unlimited.
*/
err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
if (err) {
NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
goto err_sched;
}
return 0;
err_sched:
vport->qos.group = curr_group;
max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
vport->vport);
return err; return err;
} }
static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
struct mlx5_vport *vport,
struct mlx5_esw_rate_group *group,
struct netlink_ext_ack *extack)
{
struct mlx5_esw_rate_group *new_group, *curr_group;
int err;
if (!vport->enabled)
return -EINVAL;
curr_group = vport->qos.group;
new_group = group ?: esw->qos.group0;
if (curr_group == new_group)
return 0;
err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
if (err)
return err;
/* Recalculate bw share weights of old and new groups */
if (vport->qos.bw_share) {
esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
esw_qos_normalize_vports_min_rate(esw, new_group, extack);
}
return 0;
}
static struct mlx5_esw_rate_group * static struct mlx5_esw_rate_group *
esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
{ {
...@@ -457,9 +601,6 @@ void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw) ...@@ -457,9 +601,6 @@ void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw)
int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
u32 max_rate, u32 bw_share) u32 max_rate, u32 bw_share)
{ {
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
struct mlx5_core_dev *dev = esw->dev;
void *vport_elem;
int err; int err;
lockdep_assert_held(&esw->state_lock); lockdep_assert_held(&esw->state_lock);
...@@ -469,22 +610,10 @@ int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport ...@@ -469,22 +610,10 @@ int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport
if (vport->qos.enabled) if (vport->qos.enabled)
return -EEXIST; return -EEXIST;
MLX5_SET(scheduling_context, sched_ctx, element_type, vport->qos.group = esw->qos.group0;
SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
MLX5_SET(scheduling_context, sched_ctx, parent_element_id, esw->qos.root_tsar_ix);
MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
err = mlx5_create_scheduling_element_cmd(dev, err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
SCHEDULING_HIERARCHY_E_SWITCH, if (!err)
sched_ctx,
&vport->qos.esw_tsar_ix);
if (err)
esw_warn(dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
vport->vport, err);
else
vport->qos.enabled = true; vport->qos.enabled = true;
return err; return err;
...@@ -497,6 +626,8 @@ void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo ...@@ -497,6 +626,8 @@ void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo
lockdep_assert_held(&esw->state_lock); lockdep_assert_held(&esw->state_lock);
if (!esw->qos.enabled || !vport->qos.enabled) if (!esw->qos.enabled || !vport->qos.enabled)
return; return;
WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
"Disabling QoS on port before detaching it from group");
err = mlx5_destroy_scheduling_element_cmd(esw->dev, err = mlx5_destroy_scheduling_element_cmd(esw->dev,
SCHEDULING_HIERARCHY_E_SWITCH, SCHEDULING_HIERARCHY_E_SWITCH,
...@@ -696,3 +827,32 @@ int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, ...@@ -696,3 +827,32 @@ int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
mutex_unlock(&esw->state_lock); mutex_unlock(&esw->state_lock);
return err; return err;
} }
int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
struct mlx5_vport *vport,
struct mlx5_esw_rate_group *group,
struct netlink_ext_ack *extack)
{
int err;
mutex_lock(&esw->state_lock);
err = esw_qos_vport_update_group(esw, vport, group, extack);
mutex_unlock(&esw->state_lock);
return err;
}
int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
struct devlink_rate *parent,
void *priv, void *parent_priv,
struct netlink_ext_ack *extack)
{
struct mlx5_esw_rate_group *group;
struct mlx5_vport *vport = priv;
if (!parent)
return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
vport, NULL, extack);
group = parent_priv;
return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
}
...@@ -32,6 +32,10 @@ int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, ...@@ -32,6 +32,10 @@ int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
struct netlink_ext_ack *extack); struct netlink_ext_ack *extack);
int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
struct netlink_ext_ack *extack); struct netlink_ext_ack *extack);
int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
struct devlink_rate *parent,
void *priv, void *parent_priv,
struct netlink_ext_ack *extack);
#endif #endif
#endif #endif
...@@ -177,6 +177,7 @@ struct mlx5_vport { ...@@ -177,6 +177,7 @@ struct mlx5_vport {
u32 bw_share; u32 bw_share;
u32 min_rate; u32 min_rate;
u32 max_rate; u32 max_rate;
struct mlx5_esw_rate_group *group;
} qos; } qos;
u16 vport; u16 vport;
...@@ -356,6 +357,10 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, ...@@ -356,6 +357,10 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
u16 vport_num, bool setting); u16 vport_num, bool setting);
int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
u32 max_rate, u32 min_rate); u32 max_rate, u32 min_rate);
int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
struct mlx5_vport *vport,
struct mlx5_esw_rate_group *group,
struct netlink_ext_ack *extack);
int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting); int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting);
int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting); int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting);
int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment