Commit bf837e8f authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'nexthop-refactor-and-fix-nexthop-selection-for-multipath-routes'

Benjamin Poirier says:

====================
nexthop: Refactor and fix nexthop selection for multipath routes

In order to select a nexthop for multipath routes, fib_select_multipath()
is used with legacy nexthops and nexthop_select_path_hthr() is used with
nexthop objects. Those two functions perform a validity test on the
neighbor related to each nexthop but their logic is structured differently.
This causes a divergence in behavior and nexthop_select_path_hthr() may
return a nexthop that failed the neighbor validity test even if there was
one that passed.

Refactor nexthop_select_path_hthr() to make it more similar to
fib_select_multipath() and fix the problem mentioned above.

v1: https://lore.kernel.org/netdev/20230529201914.69828-1-bpoirier@nvidia.com/
====================

Link: https://lore.kernel.org/r/20230719-nh_select-v2-0-04383e89f868@nvidia.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 022add1d c7e95bbd
...@@ -1152,41 +1152,64 @@ static bool ipv4_good_nh(const struct fib_nh *nh) ...@@ -1152,41 +1152,64 @@ static bool ipv4_good_nh(const struct fib_nh *nh)
return !!(state & NUD_VALID); return !!(state & NUD_VALID);
} }
static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash) static bool nexthop_is_good_nh(const struct nexthop *nh)
{
struct nh_info *nhi = rcu_dereference(nh->nh_info);
switch (nhi->family) {
case AF_INET:
return ipv4_good_nh(&nhi->fib_nh);
case AF_INET6:
return ipv6_good_nh(&nhi->fib6_nh);
}
return false;
}
static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
{ {
struct nexthop *rc = NULL;
int i; int i;
for (i = 0; i < nhg->num_nh; ++i) { for (i = 0; i < nhg->num_nh; i++) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i]; struct nh_grp_entry *nhge = &nhg->nh_entries[i];
struct nh_info *nhi;
if (hash > atomic_read(&nhge->hthr.upper_bound)) if (hash > atomic_read(&nhge->hthr.upper_bound))
continue; continue;
nhi = rcu_dereference(nhge->nh->nh_info);
if (nhi->fdb_nh)
return nhge->nh; return nhge->nh;
}
WARN_ON_ONCE(1);
return NULL;
}
static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
{
struct nexthop *rc = NULL;
int i;
if (nhg->fdb_nh)
return nexthop_select_path_fdb(nhg, hash);
for (i = 0; i < nhg->num_nh; ++i) {
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
/* nexthops always check if it is good and does /* nexthops always check if it is good and does
* not rely on a sysctl for this behavior * not rely on a sysctl for this behavior
*/ */
switch (nhi->family) { if (!nexthop_is_good_nh(nhge->nh))
case AF_INET: continue;
if (ipv4_good_nh(&nhi->fib_nh))
return nhge->nh;
break;
case AF_INET6:
if (ipv6_good_nh(&nhi->fib6_nh))
return nhge->nh;
break;
}
if (!rc) if (!rc)
rc = nhge->nh; rc = nhge->nh;
if (hash > atomic_read(&nhge->hthr.upper_bound))
continue;
return nhge->nh;
} }
return rc; return rc ? : nhg->nh_entries[0].nh;
} }
static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash) static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
......
...@@ -29,6 +29,7 @@ IPV4_TESTS=" ...@@ -29,6 +29,7 @@ IPV4_TESTS="
ipv4_large_res_grp ipv4_large_res_grp
ipv4_compat_mode ipv4_compat_mode
ipv4_fdb_grp_fcnal ipv4_fdb_grp_fcnal
ipv4_mpath_select
ipv4_torture ipv4_torture
ipv4_res_torture ipv4_res_torture
" "
...@@ -42,6 +43,7 @@ IPV6_TESTS=" ...@@ -42,6 +43,7 @@ IPV6_TESTS="
ipv6_large_res_grp ipv6_large_res_grp
ipv6_compat_mode ipv6_compat_mode
ipv6_fdb_grp_fcnal ipv6_fdb_grp_fcnal
ipv6_mpath_select
ipv6_torture ipv6_torture
ipv6_res_torture ipv6_res_torture
" "
...@@ -370,6 +372,27 @@ check_large_res_grp() ...@@ -370,6 +372,27 @@ check_large_res_grp()
log_test $? 0 "Dump large (x$buckets) nexthop buckets" log_test $? 0 "Dump large (x$buckets) nexthop buckets"
} }
get_route_dev()
{
local pfx="$1"
local out
if out=$($IP -j route get "$pfx" | jq -re ".[0].dev"); then
echo "$out"
fi
}
check_route_dev()
{
local pfx="$1"
local expected="$2"
local out
out=$(get_route_dev "$pfx")
check_output "$out" "$expected"
}
start_ip_monitor() start_ip_monitor()
{ {
local mtype=$1 local mtype=$1
...@@ -575,6 +598,112 @@ ipv4_fdb_grp_fcnal() ...@@ -575,6 +598,112 @@ ipv4_fdb_grp_fcnal()
$IP link del dev vx10 $IP link del dev vx10
} }
ipv4_mpath_select()
{
local rc dev match h addr
echo
echo "IPv4 multipath selection"
echo "------------------------"
if [ ! -x "$(command -v jq)" ]; then
echo "SKIP: Could not run test; need jq tool"
return $ksft_skip
fi
# Use status of existing neighbor entry when determining nexthop for
# multipath routes.
local -A gws
gws=([veth1]=172.16.1.2 [veth3]=172.16.2.2)
local -A other_dev
other_dev=([veth1]=veth3 [veth3]=veth1)
run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1"
run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3"
run_cmd "$IP nexthop add id 1001 group 1/2"
run_cmd "$IP ro add 172.16.101.0/24 nhid 1001"
rc=0
for dev in veth1 veth3; do
match=0
for h in {1..254}; do
addr="172.16.101.$h"
if [ "$(get_route_dev "$addr")" = "$dev" ]; then
match=1
break
fi
done
if (( match == 0 )); then
echo "SKIP: Did not find a route using device $dev"
return $ksft_skip
fi
run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed"
if ! check_route_dev "$addr" "${other_dev[$dev]}"; then
rc=1
break
fi
run_cmd "$IP neigh del ${gws[$dev]} dev $dev"
done
log_test $rc 0 "Use valid neighbor during multipath selection"
run_cmd "$IP neigh add 172.16.1.2 dev veth1 nud incomplete"
run_cmd "$IP neigh add 172.16.2.2 dev veth3 nud incomplete"
run_cmd "$IP route get 172.16.101.1"
# if we did not crash, success
log_test $rc 0 "Multipath selection with no valid neighbor"
}
ipv6_mpath_select()
{
local rc dev match h addr
echo
echo "IPv6 multipath selection"
echo "------------------------"
if [ ! -x "$(command -v jq)" ]; then
echo "SKIP: Could not run test; need jq tool"
return $ksft_skip
fi
# Use status of existing neighbor entry when determining nexthop for
# multipath routes.
local -A gws
gws=([veth1]=2001:db8:91::2 [veth3]=2001:db8:92::2)
local -A other_dev
other_dev=([veth1]=veth3 [veth3]=veth1)
run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1"
run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3"
run_cmd "$IP nexthop add id 1001 group 1/2"
run_cmd "$IP ro add 2001:db8:101::/64 nhid 1001"
rc=0
for dev in veth1 veth3; do
match=0
for h in {1..65535}; do
addr=$(printf "2001:db8:101::%x" $h)
if [ "$(get_route_dev "$addr")" = "$dev" ]; then
match=1
break
fi
done
if (( match == 0 )); then
echo "SKIP: Did not find a route using device $dev"
return $ksft_skip
fi
run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed"
if ! check_route_dev "$addr" "${other_dev[$dev]}"; then
rc=1
break
fi
run_cmd "$IP neigh del ${gws[$dev]} dev $dev"
done
log_test $rc 0 "Use valid neighbor during multipath selection"
run_cmd "$IP neigh add 2001:db8:91::2 dev veth1 nud incomplete"
run_cmd "$IP neigh add 2001:db8:92::2 dev veth3 nud incomplete"
run_cmd "$IP route get 2001:db8:101::1"
# if we did not crash, success
log_test $rc 0 "Multipath selection with no valid neighbor"
}
################################################################################ ################################################################################
# basic operations (add, delete, replace) on nexthops and nexthop groups # basic operations (add, delete, replace) on nexthops and nexthop groups
# #
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment