Commit 52fa3ee0 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'make-neighbor-eviction-controllable-by-userspace'

James Prestwood says:

====================
Make neighbor eviction controllable by userspace
====================

Link: https://lore.kernel.org/r/20211101173630.300969-1-prestwoj@gmail.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 1d6d336f f86ca07e
...@@ -1611,6 +1611,15 @@ arp_accept - BOOLEAN ...@@ -1611,6 +1611,15 @@ arp_accept - BOOLEAN
gratuitous arp frame, the arp table will be updated regardless gratuitous arp frame, the arp table will be updated regardless
if this setting is on or off. if this setting is on or off.
arp_evict_nocarrier - BOOLEAN
Clears the ARP cache on NOCARRIER events. This option is important for
wireless devices where the ARP cache should not be cleared when roaming
between access points on the same network. In most cases this should
remain as the default (1).
- 1 - (default): Clear the ARP cache on NOCARRIER events
- 0 - Do not clear ARP cache on NOCARRIER events
mcast_solicit - INTEGER mcast_solicit - INTEGER
The maximum number of multicast probes in INCOMPLETE state, The maximum number of multicast probes in INCOMPLETE state,
when the associated hardware address is unknown. Defaults when the associated hardware address is unknown. Defaults
...@@ -2341,6 +2350,15 @@ ndisc_tclass - INTEGER ...@@ -2341,6 +2350,15 @@ ndisc_tclass - INTEGER
* 0 - (default) * 0 - (default)
ndisc_evict_nocarrier - BOOLEAN
Clears the neighbor discovery table on NOCARRIER events. This option is
important for wireless devices where the neighbor discovery cache should
not be cleared when roaming between access points on the same network.
In most cases this should remain as the default (1).
- 1 - (default): Clear neighbor discover cache on NOCARRIER events.
- 0 - Do not clear neighbor discovery cache on NOCARRIER events.
mldv1_unsolicited_report_interval - INTEGER mldv1_unsolicited_report_interval - INTEGER
The interval in milliseconds in which the next unsolicited The interval in milliseconds in which the next unsolicited
MLDv1 report retransmit will take place. MLDv1 report retransmit will take place.
......
...@@ -133,6 +133,8 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) ...@@ -133,6 +133,8 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
#define IN_DEV_ARP_ANNOUNCE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE) #define IN_DEV_ARP_ANNOUNCE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE)
#define IN_DEV_ARP_IGNORE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_IGNORE) #define IN_DEV_ARP_IGNORE(in_dev) IN_DEV_MAXCONF((in_dev), ARP_IGNORE)
#define IN_DEV_ARP_NOTIFY(in_dev) IN_DEV_MAXCONF((in_dev), ARP_NOTIFY) #define IN_DEV_ARP_NOTIFY(in_dev) IN_DEV_MAXCONF((in_dev), ARP_NOTIFY)
#define IN_DEV_ARP_EVICT_NOCARRIER(in_dev) IN_DEV_ANDCONF((in_dev), \
ARP_EVICT_NOCARRIER)
struct in_ifaddr { struct in_ifaddr {
struct hlist_node hash; struct hlist_node hash;
......
...@@ -79,6 +79,7 @@ struct ipv6_devconf { ...@@ -79,6 +79,7 @@ struct ipv6_devconf {
__u32 ioam6_id; __u32 ioam6_id;
__u32 ioam6_id_wide; __u32 ioam6_id_wide;
__u8 ioam6_enabled; __u8 ioam6_enabled;
__u8 ndisc_evict_nocarrier;
struct ctl_table_header *sysctl_header; struct ctl_table_header *sysctl_header;
}; };
......
...@@ -169,6 +169,7 @@ enum ...@@ -169,6 +169,7 @@ enum
IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST,
IPV4_DEVCONF_DROP_GRATUITOUS_ARP, IPV4_DEVCONF_DROP_GRATUITOUS_ARP,
IPV4_DEVCONF_BC_FORWARDING, IPV4_DEVCONF_BC_FORWARDING,
IPV4_DEVCONF_ARP_EVICT_NOCARRIER,
__IPV4_DEVCONF_MAX __IPV4_DEVCONF_MAX
}; };
......
...@@ -193,6 +193,7 @@ enum { ...@@ -193,6 +193,7 @@ enum {
DEVCONF_IOAM6_ENABLED, DEVCONF_IOAM6_ENABLED,
DEVCONF_IOAM6_ID, DEVCONF_IOAM6_ID,
DEVCONF_IOAM6_ID_WIDE, DEVCONF_IOAM6_ID_WIDE,
DEVCONF_NDISC_EVICT_NOCARRIER,
DEVCONF_MAX DEVCONF_MAX
}; };
......
...@@ -482,6 +482,7 @@ enum ...@@ -482,6 +482,7 @@ enum
NET_IPV4_CONF_PROMOTE_SECONDARIES=20, NET_IPV4_CONF_PROMOTE_SECONDARIES=20,
NET_IPV4_CONF_ARP_ACCEPT=21, NET_IPV4_CONF_ARP_ACCEPT=21,
NET_IPV4_CONF_ARP_NOTIFY=22, NET_IPV4_CONF_ARP_NOTIFY=22,
NET_IPV4_CONF_ARP_EVICT_NOCARRIER=23,
}; };
/* /proc/sys/net/ipv4/netfilter */ /* /proc/sys/net/ipv4/netfilter */
......
...@@ -1247,6 +1247,8 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, ...@@ -1247,6 +1247,8 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event,
{ {
struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct netdev_notifier_change_info *change_info; struct netdev_notifier_change_info *change_info;
struct in_device *in_dev;
bool evict_nocarrier;
switch (event) { switch (event) {
case NETDEV_CHANGEADDR: case NETDEV_CHANGEADDR:
...@@ -1257,7 +1259,14 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, ...@@ -1257,7 +1259,14 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event,
change_info = ptr; change_info = ptr;
if (change_info->flags_changed & IFF_NOARP) if (change_info->flags_changed & IFF_NOARP)
neigh_changeaddr(&arp_tbl, dev); neigh_changeaddr(&arp_tbl, dev);
if (!netif_carrier_ok(dev))
in_dev = __in_dev_get_rtnl(dev);
if (!in_dev)
evict_nocarrier = true;
else
evict_nocarrier = IN_DEV_ARP_EVICT_NOCARRIER(in_dev);
if (evict_nocarrier && !netif_carrier_ok(dev))
neigh_carrier_down(&arp_tbl, dev); neigh_carrier_down(&arp_tbl, dev);
break; break;
default: default:
......
...@@ -75,6 +75,7 @@ static struct ipv4_devconf ipv4_devconf = { ...@@ -75,6 +75,7 @@ static struct ipv4_devconf ipv4_devconf = {
[IPV4_DEVCONF_SHARED_MEDIA - 1] = 1, [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
}, },
}; };
...@@ -87,6 +88,7 @@ static struct ipv4_devconf ipv4_devconf_dflt = { ...@@ -87,6 +88,7 @@ static struct ipv4_devconf ipv4_devconf_dflt = {
[IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1, [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
[IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/, [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL - 1] = 10000 /*ms*/,
[IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/, [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL - 1] = 1000 /*ms*/,
[IPV4_DEVCONF_ARP_EVICT_NOCARRIER - 1] = 1,
}, },
}; };
...@@ -2532,6 +2534,8 @@ static struct devinet_sysctl_table { ...@@ -2532,6 +2534,8 @@ static struct devinet_sysctl_table {
DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
DEVINET_SYSCTL_RW_ENTRY(ARP_EVICT_NOCARRIER,
"arp_evict_nocarrier"),
DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"), DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION, DEVINET_SYSCTL_RW_ENTRY(FORCE_IGMP_VERSION,
"force_igmp_version"), "force_igmp_version"),
......
...@@ -241,6 +241,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { ...@@ -241,6 +241,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.ioam6_enabled = 0, .ioam6_enabled = 0,
.ioam6_id = IOAM6_DEFAULT_IF_ID, .ioam6_id = IOAM6_DEFAULT_IF_ID,
.ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
.ndisc_evict_nocarrier = 1,
}; };
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
...@@ -300,6 +301,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { ...@@ -300,6 +301,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.ioam6_enabled = 0, .ioam6_enabled = 0,
.ioam6_id = IOAM6_DEFAULT_IF_ID, .ioam6_id = IOAM6_DEFAULT_IF_ID,
.ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE, .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
.ndisc_evict_nocarrier = 1,
}; };
/* Check if link is ready: is it up and is a valid qdisc available */ /* Check if link is ready: is it up and is a valid qdisc available */
...@@ -5545,6 +5547,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, ...@@ -5545,6 +5547,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled; array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled;
array[DEVCONF_IOAM6_ID] = cnf->ioam6_id; array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide; array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
array[DEVCONF_NDISC_EVICT_NOCARRIER] = cnf->ndisc_evict_nocarrier;
} }
static inline size_t inet6_ifla6_size(void) static inline size_t inet6_ifla6_size(void)
...@@ -6986,6 +6989,15 @@ static const struct ctl_table addrconf_sysctl[] = { ...@@ -6986,6 +6989,15 @@ static const struct ctl_table addrconf_sysctl[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_douintvec, .proc_handler = proc_douintvec,
}, },
{
.procname = "ndisc_evict_nocarrier",
.data = &ipv6_devconf.ndisc_evict_nocarrier,
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
.extra1 = (void *)SYSCTL_ZERO,
.extra2 = (void *)SYSCTL_ONE,
},
{ {
/* sentinel */ /* sentinel */
} }
......
...@@ -1794,6 +1794,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, ...@@ -1794,6 +1794,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
struct netdev_notifier_change_info *change_info; struct netdev_notifier_change_info *change_info;
struct net *net = dev_net(dev); struct net *net = dev_net(dev);
struct inet6_dev *idev; struct inet6_dev *idev;
bool evict_nocarrier;
switch (event) { switch (event) {
case NETDEV_CHANGEADDR: case NETDEV_CHANGEADDR:
...@@ -1810,10 +1811,19 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, ...@@ -1810,10 +1811,19 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
in6_dev_put(idev); in6_dev_put(idev);
break; break;
case NETDEV_CHANGE: case NETDEV_CHANGE:
idev = in6_dev_get(dev);
if (!idev)
evict_nocarrier = true;
else {
evict_nocarrier = idev->cnf.ndisc_evict_nocarrier &&
net->ipv6.devconf_all->ndisc_evict_nocarrier;
in6_dev_put(idev);
}
change_info = ptr; change_info = ptr;
if (change_info->flags_changed & IFF_NOARP) if (change_info->flags_changed & IFF_NOARP)
neigh_changeaddr(&nd_tbl, dev); neigh_changeaddr(&nd_tbl, dev);
if (!netif_carrier_ok(dev)) if (evict_nocarrier && !netif_carrier_ok(dev))
neigh_carrier_down(&nd_tbl, dev); neigh_carrier_down(&nd_tbl, dev);
break; break;
case NETDEV_DOWN: case NETDEV_DOWN:
......
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Tests sysctl options {arp,ndisc}_evict_nocarrier={0,1}
#
# Create a veth pair and set IPs/routes on both. Then ping to establish
# an entry in the ARP/ND table. Depending on the test set sysctl option to
# 1 or 0. Set remote veth down which will cause local veth to go into a no
# carrier state. Depending on the test check the ARP/ND table:
#
# {arp,ndisc}_evict_nocarrier=1 should contain no ARP/ND after no carrier
# {arp,ndisc}_evict_nocarrer=0 should still contain the single ARP/ND entry
#
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
readonly V4_ADDR0=10.0.10.1
readonly V4_ADDR1=10.0.10.2
readonly V6_ADDR0=2001:db8:91::1
readonly V6_ADDR1=2001:db8:91::2
nsid=100
cleanup_v6()
{
ip netns del me
ip netns del peer
sysctl -w net.ipv4.conf.veth0.ndisc_evict_nocarrier=1 >/dev/null 2>&1
sysctl -w net.ipv4.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1
}
create_ns()
{
local n=${1}
ip netns del ${n} 2>/dev/null
ip netns add ${n}
ip netns set ${n} $((nsid++))
ip -netns ${n} link set lo up
}
setup_v6() {
create_ns me
create_ns peer
IP="ip -netns me"
$IP li add veth1 type veth peer name veth2
$IP li set veth1 up
$IP -6 addr add $V6_ADDR0/64 dev veth1 nodad
$IP li set veth2 netns peer up
ip -netns peer -6 addr add $V6_ADDR1/64 dev veth2 nodad
ip netns exec me sysctl -w $1 >/dev/null 2>&1
# Establish an ND cache entry
ip netns exec me ping -6 -c1 -Iveth1 $V6_ADDR1 >/dev/null 2>&1
# Should have the veth1 entry in ND table
ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
if [ $? -ne 0 ]; then
cleanup_v6
echo "failed"
exit
fi
# Set veth2 down, which will put veth1 in NOCARRIER state
ip netns exec peer ip link set veth2 down
}
setup_v4() {
ip netns add "${PEER_NS}"
ip link add name veth0 type veth peer name veth1
ip link set dev veth0 up
ip link set dev veth1 netns "${PEER_NS}"
ip netns exec "${PEER_NS}" ip link set dev veth1 up
ip addr add $V4_ADDR0/24 dev veth0
ip netns exec "${PEER_NS}" ip addr add $V4_ADDR1/24 dev veth1
ip netns exec ${PEER_NS} ip route add default via $V4_ADDR1 dev veth1
ip route add default via $V4_ADDR0 dev veth0
sysctl -w "$1" >/dev/null 2>&1
# Establish an ARP cache entry
ping -c1 -I veth0 $V4_ADDR1 -q >/dev/null 2>&1
# Should have the veth1 entry in ARP table
ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
if [ $? -ne 0 ]; then
cleanup_v4
echo "failed"
exit
fi
# Set veth1 down, which will put veth0 in NOCARRIER state
ip netns exec "${PEER_NS}" ip link set veth1 down
}
cleanup_v4() {
ip neigh flush dev veth0
ip link del veth0
local -r ns="$(ip netns list|grep $PEER_NS)"
[ -n "$ns" ] && ip netns del $ns 2>/dev/null
sysctl -w net.ipv4.conf.veth0.arp_evict_nocarrier=1 >/dev/null 2>&1
sysctl -w net.ipv4.conf.all.arp_evict_nocarrier=1 >/dev/null 2>&1
}
# Run test when arp_evict_nocarrier = 1 (default).
run_arp_evict_nocarrier_enabled() {
echo "run arp_evict_nocarrier=1 test"
setup_v4 "net.ipv4.conf.veth0.arp_evict_nocarrier=1"
# ARP table should be empty
ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
if [ $? -eq 0 ];then
echo "failed"
else
echo "ok"
fi
cleanup_v4
}
# Run test when arp_evict_nocarrier = 0
run_arp_evict_nocarrier_disabled() {
echo "run arp_evict_nocarrier=0 test"
setup_v4 "net.ipv4.conf.veth0.arp_evict_nocarrier=0"
# ARP table should still contain the entry
ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
if [ $? -eq 0 ];then
echo "ok"
else
echo "failed"
fi
cleanup_v4
}
run_arp_evict_nocarrier_disabled_all() {
echo "run all.arp_evict_nocarrier=0 test"
setup_v4 "net.ipv4.conf.all.arp_evict_nocarrier=0"
# ARP table should still contain the entry
ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
if [ $? -eq 0 ];then
echo "ok"
else
echo "failed"
fi
cleanup_v4
}
run_ndisc_evict_nocarrier_enabled() {
echo "run ndisc_evict_nocarrier=1 test"
setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=1"
ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
if [ $? -eq 0 ];then
echo "failed"
else
echo "ok"
fi
cleanup_v6
}
run_ndisc_evict_nocarrier_disabled() {
echo "run ndisc_evict_nocarrier=0 test"
setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=0"
ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
if [ $? -eq 0 ];then
echo "ok"
else
echo "failed"
fi
cleanup_v6
}
run_ndisc_evict_nocarrier_disabled_all() {
echo "run all.ndisc_evict_nocarrier=0 test"
setup_v6 "net.ipv6.conf.all.ndisc_evict_nocarrier=0"
ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
if [ $? -eq 0 ];then
echo "ok"
else
echo "failed"
fi
cleanup_v6
}
run_all_tests() {
run_arp_evict_nocarrier_enabled
run_arp_evict_nocarrier_disabled
run_arp_evict_nocarrier_disabled_all
run_ndisc_evict_nocarrier_enabled
run_ndisc_evict_nocarrier_disabled
run_ndisc_evict_nocarrier_disabled_all
}
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
exit $ksft_skip;
fi
run_all_tests
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment