Commit 23f94f1b authored by David S. Miller's avatar David S. Miller

Merge branch 'ipmr-remove-rwlocks'

Eric Dumazet says:

====================
ipmr: get rid of rwlocks

We need to get rid of rwlocks in networking stacks,
if read_lock() is (ab)used from softirq context.

As discussed recently [1], rwlock are unfair by design in this case,
and writers can starve and trigger soft lockups.

This series convert ipmr code (both IPv4 and IPv6 families)
to RCU and spinlocks.

[1] https://lkml.org/lkml/2022/6/17/272

v2: fixed two typos, and resent because patch 19/19
    did not make it to patchwork.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7fcb820c a96f7a6a
......@@ -26,7 +26,7 @@
* @remote: Remote address for tunnels
*/
struct vif_device {
struct net_device *dev;
struct net_device __rcu *dev;
netdevice_tracker dev_tracker;
unsigned long bytes_in, bytes_out;
unsigned long pkt_in, pkt_out;
......@@ -52,6 +52,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb,
unsigned short family,
enum fib_event_type event_type,
struct vif_device *vif,
struct net_device *vif_dev,
unsigned short vif_index, u32 tb_id,
struct netlink_ext_ack *extack)
{
......@@ -60,7 +61,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb,
.family = family,
.extack = extack,
},
.dev = vif->dev,
.dev = vif_dev,
.vif_index = vif_index,
.vif_flags = vif->flags,
.tb_id = tb_id,
......@@ -73,6 +74,7 @@ static inline int mr_call_vif_notifiers(struct net *net,
unsigned short family,
enum fib_event_type event_type,
struct vif_device *vif,
struct net_device *vif_dev,
unsigned short vif_index, u32 tb_id,
unsigned int *ipmr_seq)
{
......@@ -80,7 +82,7 @@ static inline int mr_call_vif_notifiers(struct net *net,
.info = {
.family = family,
},
.dev = vif->dev,
.dev = vif_dev,
.vif_index = vif_index,
.vif_flags = vif->flags,
.tb_id = tb_id,
......@@ -98,7 +100,8 @@ static inline int mr_call_vif_notifiers(struct net *net,
#define MAXVIFS 32
#endif
#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev))
/* Note: This helper is deprecated. */
#define VIF_EXISTS(_mrt, _idx) (!!rcu_access_pointer((_mrt)->vif_table[_idx].dev))
/* mfc_flags:
* MFC_STATIC - the entry was added statically (not by a routing daemon)
......@@ -305,7 +308,7 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
struct netlink_ext_ack *extack),
struct mr_table *(*mr_iter)(struct net *net,
struct mr_table *mrt),
rwlock_t *mrt_lock, struct netlink_ext_ack *extack);
struct netlink_ext_ack *extack);
#else
static inline void vif_device_init(struct vif_device *v,
struct net_device *dev,
......@@ -360,7 +363,7 @@ static inline int mr_dump(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack),
struct mr_table *(*mr_iter)(struct net *net,
struct mr_table *mrt),
rwlock_t *mrt_lock, struct netlink_ext_ack *extack)
struct netlink_ext_ack *extack)
{
return -EINVAL;
}
......
This diff is collapsed.
......@@ -13,7 +13,7 @@ void vif_device_init(struct vif_device *v,
unsigned short flags,
unsigned short get_iflink_mask)
{
v->dev = NULL;
RCU_INIT_POINTER(v->dev, NULL);
v->bytes_in = 0;
v->bytes_out = 0;
v->pkt_in = 0;
......@@ -208,6 +208,7 @@ EXPORT_SYMBOL(mr_mfc_seq_next);
int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
struct mr_mfc *c, struct rtmsg *rtm)
{
struct net_device *vif_dev;
struct rta_mfc_stats mfcs;
struct nlattr *mp_attr;
struct rtnexthop *nhp;
......@@ -220,10 +221,13 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
return -ENOENT;
}
if (VIF_EXISTS(mrt, c->mfc_parent) &&
nla_put_u32(skb, RTA_IIF,
mrt->vif_table[c->mfc_parent].dev->ifindex) < 0)
rcu_read_lock();
vif_dev = rcu_dereference(mrt->vif_table[c->mfc_parent].dev);
if (vif_dev && nla_put_u32(skb, RTA_IIF, vif_dev->ifindex) < 0) {
rcu_read_unlock();
return -EMSGSIZE;
}
rcu_read_unlock();
if (c->mfc_flags & MFC_OFFLOAD)
rtm->rtm_flags |= RTNH_F_OFFLOAD;
......@@ -232,23 +236,27 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
if (!mp_attr)
return -EMSGSIZE;
rcu_read_lock();
for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
struct vif_device *vif;
struct vif_device *vif = &mrt->vif_table[ct];
vif_dev = rcu_dereference(vif->dev);
if (vif_dev && c->mfc_un.res.ttls[ct] < 255) {
nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
if (!nhp) {
rcu_read_unlock();
nla_nest_cancel(skb, mp_attr);
return -EMSGSIZE;
}
nhp->rtnh_flags = 0;
nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
vif = &mrt->vif_table[ct];
nhp->rtnh_ifindex = vif->dev->ifindex;
nhp->rtnh_ifindex = vif_dev->ifindex;
nhp->rtnh_len = sizeof(*nhp);
}
}
rcu_read_unlock();
nla_nest_end(skb, mp_attr);
......@@ -275,13 +283,14 @@ static bool mr_mfc_uses_dev(const struct mr_table *mrt,
int ct;
for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
const struct vif_device *vif;
vif = &mrt->vif_table[ct];
if (vif->dev == dev)
return true;
}
const struct net_device *vif_dev;
const struct vif_device *vif;
vif = &mrt->vif_table[ct];
vif_dev = rcu_access_pointer(vif->dev);
if (vif_dev && c->mfc_un.res.ttls[ct] < 255 &&
vif_dev == dev)
return true;
}
return false;
}
......@@ -390,7 +399,6 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
struct netlink_ext_ack *extack),
struct mr_table *(*mr_iter)(struct net *net,
struct mr_table *mrt),
rwlock_t *mrt_lock,
struct netlink_ext_ack *extack)
{
struct mr_table *mrt;
......@@ -402,22 +410,25 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
for (mrt = mr_iter(net, NULL); mrt; mrt = mr_iter(net, mrt)) {
struct vif_device *v = &mrt->vif_table[0];
struct net_device *vif_dev;
struct mr_mfc *mfc;
int vifi;
/* Notifiy on table VIF entries */
read_lock(mrt_lock);
rcu_read_lock();
for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
if (!v->dev)
vif_dev = rcu_dereference(v->dev);
if (!vif_dev)
continue;
err = mr_call_vif_notifier(nb, family,
FIB_EVENT_VIF_ADD,
v, vifi, mrt->id, extack);
FIB_EVENT_VIF_ADD, v,
vif_dev, vifi,
mrt->id, extack);
if (err)
break;
}
read_unlock(mrt_lock);
rcu_read_unlock();
if (err)
return err;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment