Commit d57beb0e authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'mlxsw-offload-root-tbf-as-port-shaper'

Ido Schimmel says:

====================
mlxsw: Offload root TBF as port shaper

Petr says:

Egress configuration in an mlxsw deployment would generally have an ETS
qdisc at root, with a number of bands and a priority dispatch between them.
Some of those bands could then have a RED and/or TBF qdiscs attached.

When TBF is used like this, mlxsw configures shaper on a subgroup, which is
the pair of traffic classes (UC + BUM) corresponding to the band where TBF
is installed. This way it is possible to limit traffic on several bands
(subgroups) independently by configuring several TBF qdiscs, each on a
different band.

It is however not possible to limit traffic flowing through the port as
such. The ASIC supports this through port shapers (as opposed to the
abovementioned subgroup shapers). An obvious way to express this as a user
would be to configure a root TBF qdisc, and then add the whole ETS
hierarchy as its child.

TBF (and RED) can currently be used as a root qdisc. This usage has always
been accepted as a special case, when only one subgroup is configured, and
that is the subgroup that root TBF and RED configure. However it was never
possible to install ETS under that TBF.

In this patchset, this limitation is relaxed. TBF qdisc in root position is
now always offloaded as a port shaper. Such TBF qdisc does not limit
offload of further children. It is thus possible to configure the usual
priority classification through ETS, with RED and/or TBF on individual
bands, all that below a port-level TBF. For example:

    (1) # tc qdisc replace dev swp1 root handle 1: tbf rate 800mbit burst 16kb limit 1M
    (2) # tc qdisc replace dev swp1 parent 1:1 handle 11: ets strict 8 priomap 7 6 5 4 3 2 1 0
    (3) # tc qdisc replace dev swp1 parent 11:1 handle 111: tbf rate 600mbit burst 16kb limit 1M
    (4) # tc qdisc replace dev swp1 parent 11:2 handle 112: tbf rate 600mbit burst 16kb limit 1M

Here, (1) configures a 800-Mbps port shaper, (2) adds an ETS element with 8
strictly-prioritized bands, and (3) and (4) configure two more shapers,
each 600 Mbps, one under 11:1 (band 0, TCs 7 and 15), one under 11:2 (band
1, TCs 6 and 14). This way, traffic on bands 0 and 1 are each independently
capped at 600 Mbps, and at the same time, traffic through the port as a
whole is capped at 800 Mbps.

In patch #1, TBF is permitted as root qdisc, under which the usual qdisc
tree can be installed.

In patch #2, the qdisc offloadability selftest is extended to cover the
root TBF as well.

Patch #3 then tests that the offloaded TBF shapes as expected.
====================

Link: https://lore.kernel.org/r/20211027152001.1320496-1-idosch@idosch.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 7df621a3 2b11e24e
...@@ -271,6 +271,7 @@ mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port, ...@@ -271,6 +271,7 @@ mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc_tree_validate { struct mlxsw_sp_qdisc_tree_validate {
bool forbid_ets; bool forbid_ets;
bool forbid_root_tbf;
bool forbid_tbf; bool forbid_tbf;
bool forbid_red; bool forbid_red;
}; };
...@@ -310,18 +311,26 @@ __mlxsw_sp_qdisc_tree_validate(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, ...@@ -310,18 +311,26 @@ __mlxsw_sp_qdisc_tree_validate(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
if (validate.forbid_red) if (validate.forbid_red)
return -EINVAL; return -EINVAL;
validate.forbid_red = true; validate.forbid_red = true;
validate.forbid_root_tbf = true;
validate.forbid_ets = true; validate.forbid_ets = true;
break; break;
case MLXSW_SP_QDISC_TBF: case MLXSW_SP_QDISC_TBF:
if (validate.forbid_tbf) if (validate.forbid_root_tbf) {
return -EINVAL; if (validate.forbid_tbf)
validate.forbid_tbf = true; return -EINVAL;
validate.forbid_ets = true; /* This is a TC TBF. */
validate.forbid_tbf = true;
validate.forbid_ets = true;
} else {
/* This is root TBF. */
validate.forbid_root_tbf = true;
}
break; break;
case MLXSW_SP_QDISC_PRIO: case MLXSW_SP_QDISC_PRIO:
case MLXSW_SP_QDISC_ETS: case MLXSW_SP_QDISC_ETS:
if (validate.forbid_ets) if (validate.forbid_ets)
return -EINVAL; return -EINVAL;
validate.forbid_root_tbf = true;
validate.forbid_ets = true; validate.forbid_ets = true;
break; break;
default: default:
...@@ -905,16 +914,34 @@ mlxsw_sp_setup_tc_qdisc_leaf_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, ...@@ -905,16 +914,34 @@ mlxsw_sp_setup_tc_qdisc_leaf_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port,
mlxsw_sp_qdisc->stats_base.backlog = 0; mlxsw_sp_qdisc->stats_base.backlog = 0;
} }
static enum mlxsw_reg_qeec_hr
mlxsw_sp_qdisc_tbf_hr(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
{
if (mlxsw_sp_qdisc == &mlxsw_sp_port->qdisc->root_qdisc)
return MLXSW_REG_QEEC_HR_PORT;
/* Configure subgroup shaper, so that both UC and MC traffic is subject
* to shaping. That is unlike RED, however UC queue lengths are going to
* be different than MC ones due to different pool and quota
* configurations, so the configuration is not applicable. For shaper on
* the other hand, subjecting the overall stream to the configured
* shaper makes sense. Also note that that is what we do for
* ieee_setmaxrate().
*/
return MLXSW_REG_QEEC_HR_SUBGROUP;
}
static int static int
mlxsw_sp_qdisc_tbf_destroy(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_qdisc_tbf_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
{ {
enum mlxsw_reg_qeec_hr hr = mlxsw_sp_qdisc_tbf_hr(mlxsw_sp_port,
mlxsw_sp_qdisc);
int tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port, int tclass_num = mlxsw_sp_qdisc_get_tclass_num(mlxsw_sp_port,
mlxsw_sp_qdisc); mlxsw_sp_qdisc);
return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, hr, tclass_num, 0,
MLXSW_REG_QEEC_HR_SUBGROUP,
tclass_num, 0,
MLXSW_REG_QEEC_MAS_DIS, 0); MLXSW_REG_QEEC_MAS_DIS, 0);
} }
...@@ -996,6 +1023,8 @@ mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, ...@@ -996,6 +1023,8 @@ mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
void *params) void *params)
{ {
enum mlxsw_reg_qeec_hr hr = mlxsw_sp_qdisc_tbf_hr(mlxsw_sp_port,
mlxsw_sp_qdisc);
struct tc_tbf_qopt_offload_replace_params *p = params; struct tc_tbf_qopt_offload_replace_params *p = params;
u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p); u64 rate_kbps = mlxsw_sp_qdisc_tbf_rate_kbps(p);
int tclass_num; int tclass_num;
...@@ -1016,17 +1045,7 @@ mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, ...@@ -1016,17 +1045,7 @@ mlxsw_sp_qdisc_tbf_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle,
/* check_params above was supposed to reject this value. */ /* check_params above was supposed to reject this value. */
return -EINVAL; return -EINVAL;
/* Configure subgroup shaper, so that both UC and MC traffic is subject return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port, hr, tclass_num, 0,
* to shaping. That is unlike RED, however UC queue lengths are going to
* be different than MC ones due to different pool and quota
* configurations, so the configuration is not applicable. For shaper on
* the other hand, subjecting the overall stream to the configured
* shaper makes sense. Also note that that is what we do for
* ieee_setmaxrate().
*/
return mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
MLXSW_REG_QEEC_HR_SUBGROUP,
tclass_num, 0,
rate_kbps, burst_size); rate_kbps, burst_size);
} }
......
...@@ -6,7 +6,9 @@ ...@@ -6,7 +6,9 @@
ALL_TESTS=" ALL_TESTS="
test_root test_root
test_port_tbf
test_etsprio test_etsprio
test_etsprio_port_tbf
" "
NUM_NETIFS=1 NUM_NETIFS=1
lib_dir=$(dirname $0)/../../../net/forwarding lib_dir=$(dirname $0)/../../../net/forwarding
...@@ -221,6 +223,12 @@ test_root() ...@@ -221,6 +223,12 @@ test_root()
do_test_combinations 1 0 do_test_combinations 1 0
} }
test_port_tbf()
{
with_tbf 1: root \
do_test_combinations 8 1
}
do_test_etsprio() do_test_etsprio()
{ {
local parent=$1; shift local parent=$1; shift
...@@ -264,6 +272,12 @@ test_etsprio() ...@@ -264,6 +272,12 @@ test_etsprio()
do_test_etsprio root "" do_test_etsprio root ""
} }
test_etsprio_port_tbf()
{
with_tbf 1: root \
do_test_etsprio "parent 1:1" "-TBF"
}
cleanup() cleanup()
{ {
tc qdisc del dev $h1 root &>/dev/null tc qdisc del dev $h1 root &>/dev/null
......
...@@ -4,9 +4,12 @@ ...@@ -4,9 +4,12 @@
ALL_TESTS=" ALL_TESTS="
ping_ipv4 ping_ipv4
tbf_test tbf_test
tbf_root_test
" "
source $lib_dir/sch_tbf_core.sh source $lib_dir/sch_tbf_core.sh
QDISC_TYPE=${QDISC% *}
tbf_test_one() tbf_test_one()
{ {
local bs=$1; shift local bs=$1; shift
...@@ -22,6 +25,8 @@ tbf_test_one() ...@@ -22,6 +25,8 @@ tbf_test_one()
tbf_test() tbf_test()
{ {
log_info "Testing root-$QDISC_TYPE-tbf"
# This test is used for both ETS and PRIO. Even though we only need two # This test is used for both ETS and PRIO. Even though we only need two
# bands, PRIO demands a minimum of three. # bands, PRIO demands a minimum of three.
tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0 tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0
...@@ -29,6 +34,29 @@ tbf_test() ...@@ -29,6 +34,29 @@ tbf_test()
tc qdisc del dev $swp2 root tc qdisc del dev $swp2 root
} }
tbf_root_test()
{
local bs=128K
log_info "Testing root-tbf-$QDISC_TYPE"
tc qdisc replace dev $swp2 root handle 1: \
tbf rate 400Mbit burst $bs limit 1M
tc qdisc replace dev $swp2 parent 1:1 handle 10: \
$QDISC 3 priomap 2 1 0
tc qdisc replace dev $swp2 parent 10:3 handle 103: \
bfifo limit 1M
tc qdisc replace dev $swp2 parent 10:2 handle 102: \
bfifo limit 1M
tc qdisc replace dev $swp2 parent 10:1 handle 101: \
bfifo limit 1M
do_tbf_test 10 400 $bs
do_tbf_test 11 400 $bs
tc qdisc del dev $swp2 root
}
trap cleanup EXIT trap cleanup EXIT
setup_prepare setup_prepare
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment