Commit e13dbc4f authored by David S. Miller's avatar David S. Miller

Merge branch 'mlxsw-PFC-and-headroom-selftests'

Petr Machata says:

====================
mlxsw: PFC and headroom selftests

Recent changes in the headroom management code made it clear that an
automated way of testing this functionality is needed. This patchset brings
two tests: a synthetic headroom behavior test, which verifies mechanics of
headroom management. And a PFC test, which verifies whether this behavior
actually translates into a working lossless configuration.

Both of these tests rely on mlnx_qos[1], a tool that interfaces with Linux
DCB API. The tool was originally written to work with Mellanox NICs, but
does not actually rely on anything Mellanox-specific, and can be used for
mlxsw as well as for any other NIC-like driver. Unlike Open LLDP it does
support buffer commands and permits a fire-and-forget approach to
configuration, which makes it very handy for writing of selftests.

Patches #1-#3 extend the selftest devlink_lib.sh in various ways. Patch #4
then adds a helper wrapper for mlnx_qos to mlxsw's qos_lib.sh.

Patch #5 adds a test for management of port headroom.

Patch #6 adds a PFC test.

[1] https://github.com/Mellanox/mlnx-tools/
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 611ba753 bfa80478
......@@ -147,17 +147,26 @@ switch_create()
# Make sure that ingress quotas are smaller than egress so that there is
# room for both streams of traffic to be admitted to shared buffer.
devlink_pool_size_thtype_save 0
devlink_pool_size_thtype_set 0 dynamic 10000000
devlink_pool_size_thtype_save 4
devlink_pool_size_thtype_set 4 dynamic 10000000
devlink_port_pool_th_save $swp1 0
devlink_port_pool_th_set $swp1 0 6
devlink_tc_bind_pool_th_save $swp1 1 ingress
devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6
devlink_port_pool_th_save $swp2 0
devlink_port_pool_th_set $swp2 0 6
devlink_tc_bind_pool_th_save $swp2 2 ingress
devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6
devlink_tc_bind_pool_th_save $swp3 1 egress
devlink_tc_bind_pool_th_set $swp3 1 egress 4 7
devlink_tc_bind_pool_th_save $swp3 2 egress
devlink_tc_bind_pool_th_set $swp3 2 egress 4 7
devlink_port_pool_th_save $swp3 4
devlink_port_pool_th_set $swp3 4 7
}
......
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
ALL_TESTS="
test_defaults
test_dcb_ets
test_mtu
test_pfc
test_int_buf
test_tc_priomap
test_tc_mtu
test_tc_sizes
test_tc_int_buf
"
lib_dir=$(dirname $0)/../../../net/forwarding
NUM_NETIFS=0
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
source qos_lib.sh
swp=$NETIF_NO_CABLE
cleanup()
{
pre_cleanup
}
get_prio_pg()
{
__mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2-
}
get_prio_pfc()
{
__mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2-
}
get_prio_tc()
{
__mlnx_qos -i $swp | sed -n '/^tc/,$p' |
awk '/^tc/ { TC = $2 }
/priority:/ { PRIO[$2]=TC }
END {
for (i in PRIO)
printf("%d ", PRIO[i])
}'
}
get_buf_size()
{
local idx=$1; shift
__mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1))
}
get_tot_size()
{
__mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//'
}
check_prio_pg()
{
local expect=$1; shift
local current=$(get_prio_pg)
test "$current" = "$expect"
check_err $? "prio2buffer is '$current', expected '$expect'"
}
check_prio_pfc()
{
local expect=$1; shift
local current=$(get_prio_pfc)
test "$current" = "$expect"
check_err $? "prio PFC is '$current', expected '$expect'"
}
check_prio_tc()
{
local expect=$1; shift
local current=$(get_prio_tc)
test "$current" = "$expect"
check_err $? "prio_tc is '$current', expected '$expect'"
}
__check_buf_size()
{
local idx=$1; shift
local expr=$1; shift
local what=$1; shift
local current=$(get_buf_size $idx)
((current $expr))
check_err $? "${what}buffer $idx size is '$current', expected '$expr'"
echo $current
}
check_buf_size()
{
__check_buf_size "$@" > /dev/null
}
test_defaults()
{
RET=0
check_prio_pg "0 0 0 0 0 0 0 0 "
check_prio_tc "0 0 0 0 0 0 0 0 "
check_prio_pfc "0 0 0 0 0 0 0 0 "
log_test "Default headroom configuration"
}
test_dcb_ets()
{
RET=0
__mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null
check_prio_pg "0 2 4 6 1 3 5 7 "
check_prio_tc "0 2 4 6 1 3 5 7 "
check_prio_pfc "0 0 0 0 0 0 0 0 "
__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
check_prio_pg "0 0 0 0 0 0 0 0 "
check_prio_tc "0 0 0 0 0 0 0 0 "
__mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null
check_fail $? "prio2buffer accepted in DCB mode"
log_test "Configuring headroom through ETS"
}
test_mtu()
{
local what=$1; shift
local buf0size_2
local buf0size
RET=0
buf0size=$(__check_buf_size 0 "> 0")
mtu_set $swp 3000
buf0size_2=$(__check_buf_size 0 "> $buf0size" "MTU 3000: ")
mtu_restore $swp
mtu_set $swp 6000
check_buf_size 0 "> $buf0size_2" "MTU 6000: "
mtu_restore $swp
check_buf_size 0 "== $buf0size"
log_test "${what}MTU impacts buffer size"
}
test_tc_mtu()
{
# In TC mode, MTU still impacts the threshold below which a buffer is
# not permitted to go.
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
test_mtu "TC: "
tc qdisc delete dev $swp root
}
test_pfc()
{
RET=0
__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null
local buf0size=$(get_buf_size 0)
local buf1size=$(get_buf_size 1)
local buf2size=$(get_buf_size 2)
local buf3size=$(get_buf_size 3)
check_buf_size 0 "> 0"
check_buf_size 1 "> 0"
check_buf_size 2 "> 0"
check_buf_size 3 "> 0"
check_buf_size 4 "== 0"
check_buf_size 5 "== 0"
check_buf_size 6 "== 0"
check_buf_size 7 "== 0"
log_test "Buffer size sans PFC"
RET=0
__mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null
check_prio_pg "0 0 0 0 0 1 2 3 "
check_prio_pfc "0 0 0 0 0 1 1 1 "
check_buf_size 0 "== $buf0size"
check_buf_size 1 "> $buf1size"
check_buf_size 2 "> $buf2size"
check_buf_size 3 "> $buf3size"
local buf1size=$(get_buf_size 1)
check_buf_size 2 "== $buf1size"
check_buf_size 3 "== $buf1size"
log_test "PFC: Cable length 0"
RET=0
__mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null
check_buf_size 0 "== $buf0size"
check_buf_size 1 "> $buf1size"
check_buf_size 2 "> $buf1size"
check_buf_size 3 "> $buf1size"
log_test "PFC: Cable length 1000"
RET=0
__mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null
__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
check_prio_pg "0 0 0 0 0 0 0 0 "
check_prio_tc "0 0 0 0 0 0 0 0 "
check_buf_size 0 "> 0"
check_buf_size 1 "== 0"
check_buf_size 2 "== 0"
check_buf_size 3 "== 0"
check_buf_size 4 "== 0"
check_buf_size 5 "== 0"
check_buf_size 6 "== 0"
check_buf_size 7 "== 0"
log_test "PFC: Restore defaults"
}
test_tc_priomap()
{
RET=0
__mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null
check_prio_pg "0 1 2 3 4 5 6 7 "
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
check_prio_pg "0 0 0 0 0 0 0 0 "
__mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null
check_prio_pg "1 3 5 7 0 2 4 6 "
tc qdisc delete dev $swp root
check_prio_pg "0 1 2 3 4 5 6 7 "
# Clean up.
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
__mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null
tc qdisc delete dev $swp root
__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
log_test "TC: priomap"
}
test_tc_sizes()
{
local cell_size=$(devlink_cell_size_get)
local size=$((cell_size * 1000))
RET=0
__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
check_fail $? "buffer_size should fail before qdisc is added"
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
check_err $? "buffer_size should pass after qdisc is added"
check_buf_size 0 "== $size" "set size: "
mtu_set $swp 6000
check_buf_size 0 "== $size" "set MTU: "
mtu_restore $swp
__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
# After replacing the qdisc for the same kind, buffer_size still has to
# work.
tc qdisc replace dev $swp root handle 1: bfifo limit 1M
__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
check_buf_size 0 "== $size" "post replace, set size: "
__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
# Likewise after replacing for a different kind.
tc qdisc replace dev $swp root handle 2: prio bands 8
__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
check_buf_size 0 "== $size" "post replace different kind, set size: "
tc qdisc delete dev $swp root
__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
check_fail $? "buffer_size should fail after qdisc is deleted"
log_test "TC: buffer size"
}
test_int_buf()
{
local what=$1; shift
RET=0
local buf0size=$(get_buf_size 0)
local tot_size=$(get_tot_size)
# Size of internal buffer and buffer 9.
local dsize=$((tot_size - buf0size))
tc qdisc add dev $swp clsact
tc filter add dev $swp egress matchall skip_sw action mirred egress mirror dev $swp
local buf0size_2=$(get_buf_size 0)
local tot_size_2=$(get_tot_size)
local dsize_2=$((tot_size_2 - buf0size_2))
# Egress SPAN should have added to the "invisible" buffer configuration.
((dsize_2 > dsize))
check_err $? "Invisible buffers account for '$dsize_2', expected '> $dsize'"
mtu_set $swp 3000
local buf0size_3=$(get_buf_size 0)
local tot_size_3=$(get_tot_size)
local dsize_3=$((tot_size_3 - buf0size_3))
# MTU change might change buffer 0, which will show at total, but the
# hidden buffers should stay the same size.
((dsize_3 == dsize_2))
check_err $? "MTU change: Invisible buffers account for '$dsize_3', expected '== $dsize_2'"
mtu_restore $swp
tc qdisc del dev $swp clsact
# After SPAN removal, hidden buffers should be back to the original sizes.
local buf0size_4=$(get_buf_size 0)
local tot_size_4=$(get_tot_size)
local dsize_4=$((tot_size_4 - buf0size_4))
((dsize_4 == dsize))
check_err $? "SPAN removed: Invisible buffers account for '$dsize_4', expected '== $dsize'"
log_test "${what}internal buffer size"
}
test_tc_int_buf()
{
local cell_size=$(devlink_cell_size_get)
local size=$((cell_size * 1000))
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
test_int_buf "TC: "
__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
test_int_buf "TC+buffsize: "
__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
tc qdisc delete dev $swp root
}
trap cleanup EXIT
bail_on_lldpad
setup_wait
tests_run
exit $EXIT_STATUS
......@@ -82,3 +82,17 @@ bail_on_lldpad()
fi
fi
}
__mlnx_qos()
{
local err
mlnx_qos "$@" 2>/dev/null
err=$?
if ((err)); then
echo "Error ($err) in mlnx_qos $@" >/dev/stderr
fi
return $err
}
......@@ -145,12 +145,17 @@ switch_create()
# Make sure that ingress quotas are smaller than egress so that there is
# room for both streams of traffic to be admitted to shared buffer.
devlink_port_pool_th_save $swp1 0
devlink_port_pool_th_set $swp1 0 5
devlink_tc_bind_pool_th_save $swp1 0 ingress
devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5
devlink_port_pool_th_save $swp2 0
devlink_port_pool_th_set $swp2 0 5
devlink_tc_bind_pool_th_save $swp2 1 ingress
devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5
devlink_port_pool_th_save $swp3 4
devlink_port_pool_th_set $swp3 4 12
}
......
This diff is collapsed.
......@@ -27,11 +27,17 @@ switch_create()
# amount of traffic that is admitted to the shared buffers. This makes
# sure that there is always enough traffic of all types to select from
# for the DWRR process.
devlink_port_pool_th_save $swp1 0
devlink_port_pool_th_set $swp1 0 12
devlink_tc_bind_pool_th_save $swp1 0 ingress
devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12
devlink_port_pool_th_save $swp2 4
devlink_port_pool_th_set $swp2 4 12
devlink_tc_bind_pool_th_save $swp2 7 egress
devlink_tc_bind_pool_th_set $swp2 7 egress 4 5
devlink_tc_bind_pool_th_save $swp2 6 egress
devlink_tc_bind_pool_th_set $swp2 6 egress 4 5
devlink_tc_bind_pool_th_save $swp2 5 egress
devlink_tc_bind_pool_th_set $swp2 5 egress 4 5
# Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet
......
......@@ -208,6 +208,7 @@ switch_create()
ip link set dev br2_11 up
local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1)
devlink_port_pool_th_save $swp3 8
devlink_port_pool_th_set $swp3 8 $size
}
......
......@@ -5,7 +5,7 @@
# Defines
if [[ ! -v DEVLINK_DEV ]]; then
DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \
DEVLINK_DEV=$(devlink port show "${NETIFS[p1]:-$NETIF_NO_CABLE}" -j \
| jq -r '.port | keys[]' | cut -d/ -f-2)
if [ -z "$DEVLINK_DEV" ]; then
echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
......@@ -117,6 +117,12 @@ devlink_reload()
declare -A DEVLINK_ORIG
# Changing pool type from static to dynamic causes reinterpretation of threshold
# values. They therefore need to be saved before pool type is changed, then the
# pool type can be changed, and then the new values need to be set up. Therefore
# instead of saving the current state implicitly in the _set call, provide
# functions for all three primitives: save, set, and restore.
devlink_port_pool_threshold()
{
local port=$1; shift
......@@ -126,14 +132,21 @@ devlink_port_pool_threshold()
| jq '.port_pool."'"$port"'"[].threshold'
}
devlink_port_pool_th_set()
devlink_port_pool_th_save()
{
local port=$1; shift
local pool=$1; shift
local th=$1; shift
local key="port_pool($port,$pool).threshold"
DEVLINK_ORIG[$key]=$(devlink_port_pool_threshold $port $pool)
}
devlink_port_pool_th_set()
{
local port=$1; shift
local pool=$1; shift
local th=$1; shift
devlink sb port pool set $port pool $pool th $th
}
......@@ -142,8 +155,13 @@ devlink_port_pool_th_restore()
local port=$1; shift
local pool=$1; shift
local key="port_pool($port,$pool).threshold"
local -a orig=(${DEVLINK_ORIG[$key]})
devlink sb port pool set $port pool $pool th ${DEVLINK_ORIG[$key]}
if [[ -z $orig ]]; then
echo "WARNING: Mismatched devlink_port_pool_th_restore"
else
devlink sb port pool set $port pool $pool th $orig
fi
}
devlink_pool_size_thtype()
......@@ -154,14 +172,20 @@ devlink_pool_size_thtype()
| jq -r '.pool[][] | (.size, .thtype)'
}
devlink_pool_size_thtype_save()
{
local pool=$1; shift
local key="pool($pool).size_thtype"
DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool)
}
devlink_pool_size_thtype_set()
{
local pool=$1; shift
local thtype=$1; shift
local size=$1; shift
local key="pool($pool).size_thtype"
DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool)
devlink sb pool set "$DEVLINK_DEV" pool $pool size $size thtype $thtype
}
......@@ -171,8 +195,12 @@ devlink_pool_size_thtype_restore()
local key="pool($pool).size_thtype"
local -a orig=(${DEVLINK_ORIG[$key]})
devlink sb pool set "$DEVLINK_DEV" pool $pool \
size ${orig[0]} thtype ${orig[1]}
if [[ -z ${orig[0]} ]]; then
echo "WARNING: Mismatched devlink_pool_size_thtype_restore"
else
devlink sb pool set "$DEVLINK_DEV" pool $pool \
size ${orig[0]} thtype ${orig[1]}
fi
}
devlink_tc_bind_pool_th()
......@@ -185,6 +213,16 @@ devlink_tc_bind_pool_th()
| jq -r '.tc_bind[][] | (.pool, .threshold)'
}
devlink_tc_bind_pool_th_save()
{
local port=$1; shift
local tc=$1; shift
local dir=$1; shift
local key="tc_bind($port,$dir,$tc).pool_th"
DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir)
}
devlink_tc_bind_pool_th_set()
{
local port=$1; shift
......@@ -192,9 +230,7 @@ devlink_tc_bind_pool_th_set()
local dir=$1; shift
local pool=$1; shift
local th=$1; shift
local key="tc_bind($port,$dir,$tc).pool_th"
DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir)
devlink sb tc bind set $port tc $tc type $dir pool $pool th $th
}
......@@ -206,8 +242,12 @@ devlink_tc_bind_pool_th_restore()
local key="tc_bind($port,$dir,$tc).pool_th"
local -a orig=(${DEVLINK_ORIG[$key]})
devlink sb tc bind set $port tc $tc type $dir \
pool ${orig[0]} th ${orig[1]}
if [[ -z ${orig[0]} ]]; then
echo "WARNING: Mismatched devlink_tc_bind_pool_th_restore"
else
devlink sb tc bind set $port tc $tc type $dir \
pool ${orig[0]} th ${orig[1]}
fi
}
devlink_traps_num_get()
......@@ -509,3 +549,9 @@ devlink_cpu_port_get()
echo "$DEVLINK_DEV/$cpu_dl_port_num"
}
devlink_cell_size_get()
{
devlink sb pool show "$DEVLINK_DEV" pool 0 -j \
| jq '.pool[][].cell_size'
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment