Commit 23f94f1b authored by David S. Miller's avatar David S. Miller

Merge branch 'ipmr-remove-rwlocks'

Eric Dumazet says:

====================
ipmr: get rid of rwlocks

We need to get rid of rwlocks in networking stacks,
if read_lock() is (ab)used from softirq context.

As discussed recently [1], rwlock are unfair by design in this case,
and writers can starve and trigger soft lockups.

This series convert ipmr code (both IPv4 and IPv6 families)
to RCU and spinlocks.

[1] https://lkml.org/lkml/2022/6/17/272

v2: fixed two typos, and resent because patch 19/19
    did not make it to patchwork.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 7fcb820c a96f7a6a
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
* @remote: Remote address for tunnels * @remote: Remote address for tunnels
*/ */
struct vif_device { struct vif_device {
struct net_device *dev; struct net_device __rcu *dev;
netdevice_tracker dev_tracker; netdevice_tracker dev_tracker;
unsigned long bytes_in, bytes_out; unsigned long bytes_in, bytes_out;
unsigned long pkt_in, pkt_out; unsigned long pkt_in, pkt_out;
...@@ -52,6 +52,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb, ...@@ -52,6 +52,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb,
unsigned short family, unsigned short family,
enum fib_event_type event_type, enum fib_event_type event_type,
struct vif_device *vif, struct vif_device *vif,
struct net_device *vif_dev,
unsigned short vif_index, u32 tb_id, unsigned short vif_index, u32 tb_id,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
...@@ -60,7 +61,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb, ...@@ -60,7 +61,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb,
.family = family, .family = family,
.extack = extack, .extack = extack,
}, },
.dev = vif->dev, .dev = vif_dev,
.vif_index = vif_index, .vif_index = vif_index,
.vif_flags = vif->flags, .vif_flags = vif->flags,
.tb_id = tb_id, .tb_id = tb_id,
...@@ -73,6 +74,7 @@ static inline int mr_call_vif_notifiers(struct net *net, ...@@ -73,6 +74,7 @@ static inline int mr_call_vif_notifiers(struct net *net,
unsigned short family, unsigned short family,
enum fib_event_type event_type, enum fib_event_type event_type,
struct vif_device *vif, struct vif_device *vif,
struct net_device *vif_dev,
unsigned short vif_index, u32 tb_id, unsigned short vif_index, u32 tb_id,
unsigned int *ipmr_seq) unsigned int *ipmr_seq)
{ {
...@@ -80,7 +82,7 @@ static inline int mr_call_vif_notifiers(struct net *net, ...@@ -80,7 +82,7 @@ static inline int mr_call_vif_notifiers(struct net *net,
.info = { .info = {
.family = family, .family = family,
}, },
.dev = vif->dev, .dev = vif_dev,
.vif_index = vif_index, .vif_index = vif_index,
.vif_flags = vif->flags, .vif_flags = vif->flags,
.tb_id = tb_id, .tb_id = tb_id,
...@@ -98,7 +100,8 @@ static inline int mr_call_vif_notifiers(struct net *net, ...@@ -98,7 +100,8 @@ static inline int mr_call_vif_notifiers(struct net *net,
#define MAXVIFS 32 #define MAXVIFS 32
#endif #endif
#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev)) /* Note: This helper is deprecated. */
#define VIF_EXISTS(_mrt, _idx) (!!rcu_access_pointer((_mrt)->vif_table[_idx].dev))
/* mfc_flags: /* mfc_flags:
* MFC_STATIC - the entry was added statically (not by a routing daemon) * MFC_STATIC - the entry was added statically (not by a routing daemon)
...@@ -305,7 +308,7 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, ...@@ -305,7 +308,7 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
struct netlink_ext_ack *extack), struct netlink_ext_ack *extack),
struct mr_table *(*mr_iter)(struct net *net, struct mr_table *(*mr_iter)(struct net *net,
struct mr_table *mrt), struct mr_table *mrt),
rwlock_t *mrt_lock, struct netlink_ext_ack *extack); struct netlink_ext_ack *extack);
#else #else
static inline void vif_device_init(struct vif_device *v, static inline void vif_device_init(struct vif_device *v,
struct net_device *dev, struct net_device *dev,
...@@ -360,7 +363,7 @@ static inline int mr_dump(struct net *net, struct notifier_block *nb, ...@@ -360,7 +363,7 @@ static inline int mr_dump(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack), struct netlink_ext_ack *extack),
struct mr_table *(*mr_iter)(struct net *net, struct mr_table *(*mr_iter)(struct net *net,
struct mr_table *mrt), struct mr_table *mrt),
rwlock_t *mrt_lock, struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
return -EINVAL; return -EINVAL;
} }
......
This diff is collapsed.
...@@ -13,7 +13,7 @@ void vif_device_init(struct vif_device *v, ...@@ -13,7 +13,7 @@ void vif_device_init(struct vif_device *v,
unsigned short flags, unsigned short flags,
unsigned short get_iflink_mask) unsigned short get_iflink_mask)
{ {
v->dev = NULL; RCU_INIT_POINTER(v->dev, NULL);
v->bytes_in = 0; v->bytes_in = 0;
v->bytes_out = 0; v->bytes_out = 0;
v->pkt_in = 0; v->pkt_in = 0;
...@@ -208,6 +208,7 @@ EXPORT_SYMBOL(mr_mfc_seq_next); ...@@ -208,6 +208,7 @@ EXPORT_SYMBOL(mr_mfc_seq_next);
int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
struct mr_mfc *c, struct rtmsg *rtm) struct mr_mfc *c, struct rtmsg *rtm)
{ {
struct net_device *vif_dev;
struct rta_mfc_stats mfcs; struct rta_mfc_stats mfcs;
struct nlattr *mp_attr; struct nlattr *mp_attr;
struct rtnexthop *nhp; struct rtnexthop *nhp;
...@@ -220,10 +221,13 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, ...@@ -220,10 +221,13 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
return -ENOENT; return -ENOENT;
} }
if (VIF_EXISTS(mrt, c->mfc_parent) && rcu_read_lock();
nla_put_u32(skb, RTA_IIF, vif_dev = rcu_dereference(mrt->vif_table[c->mfc_parent].dev);
mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) if (vif_dev && nla_put_u32(skb, RTA_IIF, vif_dev->ifindex) < 0) {
rcu_read_unlock();
return -EMSGSIZE; return -EMSGSIZE;
}
rcu_read_unlock();
if (c->mfc_flags & MFC_OFFLOAD) if (c->mfc_flags & MFC_OFFLOAD)
rtm->rtm_flags |= RTNH_F_OFFLOAD; rtm->rtm_flags |= RTNH_F_OFFLOAD;
...@@ -232,23 +236,27 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, ...@@ -232,23 +236,27 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
if (!mp_attr) if (!mp_attr)
return -EMSGSIZE; return -EMSGSIZE;
rcu_read_lock();
for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { struct vif_device *vif = &mrt->vif_table[ct];
struct vif_device *vif;
vif_dev = rcu_dereference(vif->dev);
if (vif_dev && c->mfc_un.res.ttls[ct] < 255) {
nhp = nla_reserve_nohdr(skb, sizeof(*nhp)); nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
if (!nhp) { if (!nhp) {
rcu_read_unlock();
nla_nest_cancel(skb, mp_attr); nla_nest_cancel(skb, mp_attr);
return -EMSGSIZE; return -EMSGSIZE;
} }
nhp->rtnh_flags = 0; nhp->rtnh_flags = 0;
nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
vif = &mrt->vif_table[ct]; nhp->rtnh_ifindex = vif_dev->ifindex;
nhp->rtnh_ifindex = vif->dev->ifindex;
nhp->rtnh_len = sizeof(*nhp); nhp->rtnh_len = sizeof(*nhp);
} }
} }
rcu_read_unlock();
nla_nest_end(skb, mp_attr); nla_nest_end(skb, mp_attr);
...@@ -275,13 +283,14 @@ static bool mr_mfc_uses_dev(const struct mr_table *mrt, ...@@ -275,13 +283,14 @@ static bool mr_mfc_uses_dev(const struct mr_table *mrt,
int ct; int ct;
for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { const struct net_device *vif_dev;
const struct vif_device *vif; const struct vif_device *vif;
vif = &mrt->vif_table[ct]; vif = &mrt->vif_table[ct];
if (vif->dev == dev) vif_dev = rcu_access_pointer(vif->dev);
return true; if (vif_dev && c->mfc_un.res.ttls[ct] < 255 &&
} vif_dev == dev)
return true;
} }
return false; return false;
} }
...@@ -390,7 +399,6 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, ...@@ -390,7 +399,6 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
struct netlink_ext_ack *extack), struct netlink_ext_ack *extack),
struct mr_table *(*mr_iter)(struct net *net, struct mr_table *(*mr_iter)(struct net *net,
struct mr_table *mrt), struct mr_table *mrt),
rwlock_t *mrt_lock,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
struct mr_table *mrt; struct mr_table *mrt;
...@@ -402,22 +410,25 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, ...@@ -402,22 +410,25 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family,
for (mrt = mr_iter(net, NULL); mrt; mrt = mr_iter(net, mrt)) { for (mrt = mr_iter(net, NULL); mrt; mrt = mr_iter(net, mrt)) {
struct vif_device *v = &mrt->vif_table[0]; struct vif_device *v = &mrt->vif_table[0];
struct net_device *vif_dev;
struct mr_mfc *mfc; struct mr_mfc *mfc;
int vifi; int vifi;
/* Notifiy on table VIF entries */ /* Notifiy on table VIF entries */
read_lock(mrt_lock); rcu_read_lock();
for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) { for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) {
if (!v->dev) vif_dev = rcu_dereference(v->dev);
if (!vif_dev)
continue; continue;
err = mr_call_vif_notifier(nb, family, err = mr_call_vif_notifier(nb, family,
FIB_EVENT_VIF_ADD, FIB_EVENT_VIF_ADD, v,
v, vifi, mrt->id, extack); vif_dev, vifi,
mrt->id, extack);
if (err) if (err)
break; break;
} }
read_unlock(mrt_lock); rcu_read_unlock();
if (err) if (err)
return err; return err;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment