Commit 7de196a6 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'make-dsa-work-with-bonding-s-arp-monitor'

Vladimir Oltean says:

====================
Make DSA work with bonding's ARP monitor

Since commit 2b86cb82 ("net: dsa: declare lockless TX feature for
slave ports") in v5.7, DSA breaks the ARP monitoring logic from the
bonding driver, fact which was pointed out by Brian Hutchinson who uses
a linux-5.10.y stable kernel.

Initially I got lured by other similar hacks introduced for other
NETIF_F_LLTX drivers, which, inspired by the bonding documentation,
update the trans_start of their TX queues by hand.

However Jakub pointed out that this simply isn't a proper solution, and
after coming to think more about it, I agree, and it doesn't work
properly with DSA nor is it maintainable for the future changes I plan
for it (multiple DSA masters in a LAG).

I've tested these changes using a DSA-based setup and a veth-based
setup, using the active-backup mode and ARP monitoring, with and without
arp_validate.

Link to v1:
https://patchwork.kernel.org/project/netdevbpf/patch/20220715232641.952532-1-vladimir.oltean@nxp.com/

Link to v2:
https://patchwork.kernel.org/project/netdevbpf/patch/20220727152000.3616086-1-vladimir.oltean@nxp.com/
====================

Link: https://lore.kernel.org/r/20220731124108.2810233-1-vladimir.oltean@nxp.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents f86d1fbb cba8d8f5
...@@ -1982,15 +1982,6 @@ uses the response as an indication that the link is operating. This ...@@ -1982,15 +1982,6 @@ uses the response as an indication that the link is operating. This
gives some assurance that traffic is actually flowing to and from one gives some assurance that traffic is actually flowing to and from one
or more peers on the local network. or more peers on the local network.
The ARP monitor relies on the device driver itself to verify
that traffic is flowing. In particular, the driver must keep up to
date the last receive time, dev->last_rx. Drivers that use NETIF_F_LLTX
flag must also update netdev_queue->trans_start. If they do not, then the
ARP monitor will immediately fail any slaves using that driver, and
those slaves will stay down. If networking monitoring (tcpdump, etc)
shows the ARP requests and replies on the network, then it may be that
your device driver is not updating last_rx and trans_start.
7.2 Configuring Multiple ARP Targets 7.2 Configuring Multiple ARP Targets
------------------------------------ ------------------------------------
......
...@@ -2001,6 +2001,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, ...@@ -2001,6 +2001,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) for (i = 0; i < BOND_MAX_ARP_TARGETS; i++)
new_slave->target_last_arp_rx[i] = new_slave->last_rx; new_slave->target_last_arp_rx[i] = new_slave->last_rx;
new_slave->last_tx = new_slave->last_rx;
if (bond->params.miimon && !bond->params.use_carrier) { if (bond->params.miimon && !bond->params.use_carrier) {
link_reporting = bond_check_dev_link(bond, slave_dev, 1); link_reporting = bond_check_dev_link(bond, slave_dev, 1);
...@@ -2884,8 +2886,11 @@ static void bond_arp_send(struct slave *slave, int arp_op, __be32 dest_ip, ...@@ -2884,8 +2886,11 @@ static void bond_arp_send(struct slave *slave, int arp_op, __be32 dest_ip,
return; return;
} }
if (bond_handle_vlan(slave, tags, skb)) if (bond_handle_vlan(slave, tags, skb)) {
slave_update_last_tx(slave);
arp_xmit(skb); arp_xmit(skb);
}
return; return;
} }
...@@ -3074,8 +3079,7 @@ static int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, ...@@ -3074,8 +3079,7 @@ static int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
curr_active_slave->last_link_up)) curr_active_slave->last_link_up))
bond_validate_arp(bond, slave, tip, sip); bond_validate_arp(bond, slave, tip, sip);
else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) && else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) &&
bond_time_in_interval(bond, bond_time_in_interval(bond, slave_last_tx(curr_arp_slave), 1))
dev_trans_start(curr_arp_slave->dev), 1))
bond_validate_arp(bond, slave, sip, tip); bond_validate_arp(bond, slave, sip, tip);
out_unlock: out_unlock:
...@@ -3103,8 +3107,10 @@ static void bond_ns_send(struct slave *slave, const struct in6_addr *daddr, ...@@ -3103,8 +3107,10 @@ static void bond_ns_send(struct slave *slave, const struct in6_addr *daddr,
} }
addrconf_addr_solict_mult(daddr, &mcaddr); addrconf_addr_solict_mult(daddr, &mcaddr);
if (bond_handle_vlan(slave, tags, skb)) if (bond_handle_vlan(slave, tags, skb)) {
slave_update_last_tx(slave);
ndisc_send_skb(skb, &mcaddr, saddr); ndisc_send_skb(skb, &mcaddr, saddr);
}
} }
static void bond_ns_send_all(struct bonding *bond, struct slave *slave) static void bond_ns_send_all(struct bonding *bond, struct slave *slave)
...@@ -3246,8 +3252,7 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond, ...@@ -3246,8 +3252,7 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond,
curr_active_slave->last_link_up)) curr_active_slave->last_link_up))
bond_validate_ns(bond, slave, saddr, daddr); bond_validate_ns(bond, slave, saddr, daddr);
else if (curr_arp_slave && else if (curr_arp_slave &&
bond_time_in_interval(bond, bond_time_in_interval(bond, slave_last_tx(curr_arp_slave), 1))
dev_trans_start(curr_arp_slave->dev), 1))
bond_validate_ns(bond, slave, saddr, daddr); bond_validate_ns(bond, slave, saddr, daddr);
out: out:
...@@ -3335,12 +3340,12 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) ...@@ -3335,12 +3340,12 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
* so it can wait * so it can wait
*/ */
bond_for_each_slave_rcu(bond, slave, iter) { bond_for_each_slave_rcu(bond, slave, iter) {
unsigned long trans_start = dev_trans_start(slave->dev); unsigned long last_tx = slave_last_tx(slave);
bond_propose_link_state(slave, BOND_LINK_NOCHANGE); bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
if (slave->link != BOND_LINK_UP) { if (slave->link != BOND_LINK_UP) {
if (bond_time_in_interval(bond, trans_start, 1) && if (bond_time_in_interval(bond, last_tx, 1) &&
bond_time_in_interval(bond, slave->last_rx, 1)) { bond_time_in_interval(bond, slave->last_rx, 1)) {
bond_propose_link_state(slave, BOND_LINK_UP); bond_propose_link_state(slave, BOND_LINK_UP);
...@@ -3365,7 +3370,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) ...@@ -3365,7 +3370,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
* when the source ip is 0, so don't take the link down * when the source ip is 0, so don't take the link down
* if we don't know our ip yet * if we don't know our ip yet
*/ */
if (!bond_time_in_interval(bond, trans_start, bond->params.missed_max) || if (!bond_time_in_interval(bond, last_tx, bond->params.missed_max) ||
!bond_time_in_interval(bond, slave->last_rx, bond->params.missed_max)) { !bond_time_in_interval(bond, slave->last_rx, bond->params.missed_max)) {
bond_propose_link_state(slave, BOND_LINK_DOWN); bond_propose_link_state(slave, BOND_LINK_DOWN);
...@@ -3431,7 +3436,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond) ...@@ -3431,7 +3436,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
*/ */
static int bond_ab_arp_inspect(struct bonding *bond) static int bond_ab_arp_inspect(struct bonding *bond)
{ {
unsigned long trans_start, last_rx; unsigned long last_tx, last_rx;
struct list_head *iter; struct list_head *iter;
struct slave *slave; struct slave *slave;
int commit = 0; int commit = 0;
...@@ -3482,9 +3487,9 @@ static int bond_ab_arp_inspect(struct bonding *bond) ...@@ -3482,9 +3487,9 @@ static int bond_ab_arp_inspect(struct bonding *bond)
* - (more than missed_max*delta since receive AND * - (more than missed_max*delta since receive AND
* the bond has an IP address) * the bond has an IP address)
*/ */
trans_start = dev_trans_start(slave->dev); last_tx = slave_last_tx(slave);
if (bond_is_active_slave(slave) && if (bond_is_active_slave(slave) &&
(!bond_time_in_interval(bond, trans_start, bond->params.missed_max) || (!bond_time_in_interval(bond, last_tx, bond->params.missed_max) ||
!bond_time_in_interval(bond, last_rx, bond->params.missed_max))) { !bond_time_in_interval(bond, last_rx, bond->params.missed_max))) {
bond_propose_link_state(slave, BOND_LINK_DOWN); bond_propose_link_state(slave, BOND_LINK_DOWN);
commit++; commit++;
...@@ -3501,8 +3506,8 @@ static int bond_ab_arp_inspect(struct bonding *bond) ...@@ -3501,8 +3506,8 @@ static int bond_ab_arp_inspect(struct bonding *bond)
*/ */
static void bond_ab_arp_commit(struct bonding *bond) static void bond_ab_arp_commit(struct bonding *bond)
{ {
unsigned long trans_start;
struct list_head *iter; struct list_head *iter;
unsigned long last_tx;
struct slave *slave; struct slave *slave;
bond_for_each_slave(bond, slave, iter) { bond_for_each_slave(bond, slave, iter) {
...@@ -3511,10 +3516,10 @@ static void bond_ab_arp_commit(struct bonding *bond) ...@@ -3511,10 +3516,10 @@ static void bond_ab_arp_commit(struct bonding *bond)
continue; continue;
case BOND_LINK_UP: case BOND_LINK_UP:
trans_start = dev_trans_start(slave->dev); last_tx = slave_last_tx(slave);
if (rtnl_dereference(bond->curr_active_slave) != slave || if (rtnl_dereference(bond->curr_active_slave) != slave ||
(!rtnl_dereference(bond->curr_active_slave) && (!rtnl_dereference(bond->curr_active_slave) &&
bond_time_in_interval(bond, trans_start, 1))) { bond_time_in_interval(bond, last_tx, 1))) {
struct slave *current_arp_slave; struct slave *current_arp_slave;
current_arp_slave = rtnl_dereference(bond->current_arp_slave); current_arp_slave = rtnl_dereference(bond->current_arp_slave);
......
...@@ -312,7 +312,6 @@ static bool veth_skb_is_eligible_for_gro(const struct net_device *dev, ...@@ -312,7 +312,6 @@ static bool veth_skb_is_eligible_for_gro(const struct net_device *dev,
static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
{ {
struct veth_priv *rcv_priv, *priv = netdev_priv(dev); struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
struct netdev_queue *queue = NULL;
struct veth_rq *rq = NULL; struct veth_rq *rq = NULL;
struct net_device *rcv; struct net_device *rcv;
int length = skb->len; int length = skb->len;
...@@ -330,7 +329,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -330,7 +329,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
rxq = skb_get_queue_mapping(skb); rxq = skb_get_queue_mapping(skb);
if (rxq < rcv->real_num_rx_queues) { if (rxq < rcv->real_num_rx_queues) {
rq = &rcv_priv->rq[rxq]; rq = &rcv_priv->rq[rxq];
queue = netdev_get_tx_queue(dev, rxq);
/* The napi pointer is available when an XDP program is /* The napi pointer is available when an XDP program is
* attached or when GRO is enabled * attached or when GRO is enabled
...@@ -342,8 +340,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -342,8 +340,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
skb_tx_timestamp(skb); skb_tx_timestamp(skb);
if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) { if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
if (queue)
txq_trans_cond_update(queue);
if (!use_napi) if (!use_napi)
dev_lstats_add(dev, length); dev_lstats_add(dev, length);
} else { } else {
......
...@@ -161,8 +161,9 @@ struct slave { ...@@ -161,8 +161,9 @@ struct slave {
struct net_device *dev; /* first - useful for panic debug */ struct net_device *dev; /* first - useful for panic debug */
struct bonding *bond; /* our master */ struct bonding *bond; /* our master */
int delay; int delay;
/* all three in jiffies */ /* all 4 in jiffies */
unsigned long last_link_up; unsigned long last_link_up;
unsigned long last_tx;
unsigned long last_rx; unsigned long last_rx;
unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS]; unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS];
s8 link; /* one of BOND_LINK_XXXX */ s8 link; /* one of BOND_LINK_XXXX */
...@@ -540,6 +541,16 @@ static inline unsigned long slave_last_rx(struct bonding *bond, ...@@ -540,6 +541,16 @@ static inline unsigned long slave_last_rx(struct bonding *bond,
return slave->last_rx; return slave->last_rx;
} }
static inline void slave_update_last_tx(struct slave *slave)
{
WRITE_ONCE(slave->last_tx, jiffies);
}
static inline unsigned long slave_last_tx(struct slave *slave)
{
return READ_ONCE(slave->last_tx);
}
#ifdef CONFIG_NET_POLL_CONTROLLER #ifdef CONFIG_NET_POLL_CONTROLLER
static inline netdev_tx_t bond_netpoll_send_skb(const struct slave *slave, static inline netdev_tx_t bond_netpoll_send_skb(const struct slave *slave,
struct sk_buff *skb) struct sk_buff *skb)
......
...@@ -427,14 +427,10 @@ void __qdisc_run(struct Qdisc *q) ...@@ -427,14 +427,10 @@ void __qdisc_run(struct Qdisc *q)
unsigned long dev_trans_start(struct net_device *dev) unsigned long dev_trans_start(struct net_device *dev)
{ {
unsigned long val, res; unsigned long res = READ_ONCE(netdev_get_tx_queue(dev, 0)->trans_start);
unsigned long val;
unsigned int i; unsigned int i;
if (is_vlan_dev(dev))
dev = vlan_dev_real_dev(dev);
else if (netif_is_macvlan(dev))
dev = macvlan_dev_real_dev(dev);
res = READ_ONCE(netdev_get_tx_queue(dev, 0)->trans_start);
for (i = 1; i < dev->num_tx_queues; i++) { for (i = 1; i < dev->num_tx_queues; i++) {
val = READ_ONCE(netdev_get_tx_queue(dev, i)->trans_start); val = READ_ONCE(netdev_get_tx_queue(dev, i)->trans_start);
if (val && time_after(val, res)) if (val && time_after(val, res))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment