Commit 7b67baf1 authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'rtnetlink-more-rcu-conversions-for-rtnl_fill_ifinfo'

Eric Dumazet says:

====================
rtnetlink: more rcu conversions for rtnl_fill_ifinfo()

We want to no longer rely on RTNL for "ip link show" command.

This is a long road, this series takes care of some parts.
====================

Link: https://lore.kernel.org/r/20240503192059.3884225-1-edumazet@google.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 25010156 9cf621bd
...@@ -1357,7 +1357,7 @@ static struct net *ppp_nl_get_link_net(const struct net_device *dev) ...@@ -1357,7 +1357,7 @@ static struct net *ppp_nl_get_link_net(const struct net_device *dev)
{ {
struct ppp *ppp = netdev_priv(dev); struct ppp *ppp = netdev_priv(dev);
return ppp->ppp_net; return READ_ONCE(ppp->ppp_net);
} }
static struct rtnl_link_ops ppp_link_ops __read_mostly = { static struct rtnl_link_ops ppp_link_ops __read_mostly = {
......
...@@ -4569,7 +4569,7 @@ static struct net *vxlan_get_link_net(const struct net_device *dev) ...@@ -4569,7 +4569,7 @@ static struct net *vxlan_get_link_net(const struct net_device *dev)
{ {
struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_dev *vxlan = netdev_priv(dev);
return vxlan->net; return READ_ONCE(vxlan->net);
} }
static struct rtnl_link_ops vxlan_link_ops __read_mostly = { static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
......
...@@ -8544,27 +8544,29 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) ...@@ -8544,27 +8544,29 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify) static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
{ {
unsigned int old_flags = dev->flags; unsigned int old_flags = dev->flags;
unsigned int promiscuity, flags;
kuid_t uid; kuid_t uid;
kgid_t gid; kgid_t gid;
ASSERT_RTNL(); ASSERT_RTNL();
dev->flags |= IFF_PROMISC; promiscuity = dev->promiscuity + inc;
dev->promiscuity += inc; if (promiscuity == 0) {
if (dev->promiscuity == 0) {
/* /*
* Avoid overflow. * Avoid overflow.
* If inc causes overflow, untouch promisc and return error. * If inc causes overflow, untouch promisc and return error.
*/ */
if (inc < 0) if (unlikely(inc > 0)) {
dev->flags &= ~IFF_PROMISC;
else {
dev->promiscuity -= inc;
netdev_warn(dev, "promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n"); netdev_warn(dev, "promiscuity touches roof, set promiscuity failed. promiscuity feature of device might be broken.\n");
return -EOVERFLOW; return -EOVERFLOW;
} }
flags = old_flags & ~IFF_PROMISC;
} else {
flags = old_flags | IFF_PROMISC;
} }
if (dev->flags != old_flags) { WRITE_ONCE(dev->promiscuity, promiscuity);
if (flags != old_flags) {
WRITE_ONCE(dev->flags, flags);
netdev_info(dev, "%s promiscuous mode\n", netdev_info(dev, "%s promiscuous mode\n",
dev->flags & IFF_PROMISC ? "entered" : "left"); dev->flags & IFF_PROMISC ? "entered" : "left");
if (audit_enabled) { if (audit_enabled) {
...@@ -8615,25 +8617,27 @@ EXPORT_SYMBOL(dev_set_promiscuity); ...@@ -8615,25 +8617,27 @@ EXPORT_SYMBOL(dev_set_promiscuity);
static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify) static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
{ {
unsigned int old_flags = dev->flags, old_gflags = dev->gflags; unsigned int old_flags = dev->flags, old_gflags = dev->gflags;
unsigned int allmulti, flags;
ASSERT_RTNL(); ASSERT_RTNL();
dev->flags |= IFF_ALLMULTI; allmulti = dev->allmulti + inc;
dev->allmulti += inc; if (allmulti == 0) {
if (dev->allmulti == 0) {
/* /*
* Avoid overflow. * Avoid overflow.
* If inc causes overflow, untouch allmulti and return error. * If inc causes overflow, untouch allmulti and return error.
*/ */
if (inc < 0) if (unlikely(inc > 0)) {
dev->flags &= ~IFF_ALLMULTI;
else {
dev->allmulti -= inc;
netdev_warn(dev, "allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n"); netdev_warn(dev, "allmulti touches roof, set allmulti failed. allmulti feature of device might be broken.\n");
return -EOVERFLOW; return -EOVERFLOW;
} }
flags = old_flags & ~IFF_ALLMULTI;
} else {
flags = old_flags | IFF_ALLMULTI;
} }
if (dev->flags ^ old_flags) { WRITE_ONCE(dev->allmulti, allmulti);
if (flags != old_flags) {
WRITE_ONCE(dev->flags, flags);
netdev_info(dev, "%s allmulticast mode\n", netdev_info(dev, "%s allmulticast mode\n",
dev->flags & IFF_ALLMULTI ? "entered" : "left"); dev->flags & IFF_ALLMULTI ? "entered" : "left");
dev_change_rx_flags(dev, IFF_ALLMULTI); dev_change_rx_flags(dev, IFF_ALLMULTI);
...@@ -8959,7 +8963,7 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len) ...@@ -8959,7 +8963,7 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
return -ERANGE; return -ERANGE;
if (new_len != orig_len) { if (new_len != orig_len) {
dev->tx_queue_len = new_len; WRITE_ONCE(dev->tx_queue_len, new_len);
res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev); res = call_netdevice_notifiers(NETDEV_CHANGE_TX_QUEUE_LEN, dev);
res = notifier_to_errno(res); res = notifier_to_errno(res);
if (res) if (res)
...@@ -8973,7 +8977,7 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len) ...@@ -8973,7 +8977,7 @@ int dev_change_tx_queue_len(struct net_device *dev, unsigned long new_len)
err_rollback: err_rollback:
netdev_err(dev, "refused to change device tx_queue_len\n"); netdev_err(dev, "refused to change device tx_queue_len\n");
dev->tx_queue_len = orig_len; WRITE_ONCE(dev->tx_queue_len, orig_len);
return res; return res;
} }
...@@ -9219,7 +9223,7 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) ...@@ -9219,7 +9223,7 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
netif_carrier_off(dev); netif_carrier_off(dev);
else else
netif_carrier_on(dev); netif_carrier_on(dev);
dev->proto_down = proto_down; WRITE_ONCE(dev->proto_down, proto_down);
return 0; return 0;
} }
...@@ -9233,18 +9237,21 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) ...@@ -9233,18 +9237,21 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask, void dev_change_proto_down_reason(struct net_device *dev, unsigned long mask,
u32 value) u32 value)
{ {
u32 proto_down_reason;
int b; int b;
if (!mask) { if (!mask) {
dev->proto_down_reason = value; proto_down_reason = value;
} else { } else {
proto_down_reason = dev->proto_down_reason;
for_each_set_bit(b, &mask, 32) { for_each_set_bit(b, &mask, 32) {
if (value & (1 << b)) if (value & (1 << b))
dev->proto_down_reason |= BIT(b); proto_down_reason |= BIT(b);
else else
dev->proto_down_reason &= ~BIT(b); proto_down_reason &= ~BIT(b);
} }
} }
WRITE_ONCE(dev->proto_down_reason, proto_down_reason);
} }
struct bpf_xdp_link { struct bpf_xdp_link {
......
...@@ -1036,7 +1036,7 @@ static size_t rtnl_proto_down_size(const struct net_device *dev) ...@@ -1036,7 +1036,7 @@ static size_t rtnl_proto_down_size(const struct net_device *dev)
{ {
size_t size = nla_total_size(1); size_t size = nla_total_size(1);
if (dev->proto_down_reason) /* Assume dev->proto_down_reason is not zero. */
size += nla_total_size(0) + nla_total_size(4); size += nla_total_size(0) + nla_total_size(4);
return size; return size;
...@@ -1477,13 +1477,15 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, ...@@ -1477,13 +1477,15 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb,
static u32 rtnl_xdp_prog_skb(struct net_device *dev) static u32 rtnl_xdp_prog_skb(struct net_device *dev)
{ {
const struct bpf_prog *generic_xdp_prog; const struct bpf_prog *generic_xdp_prog;
u32 res = 0;
ASSERT_RTNL(); rcu_read_lock();
generic_xdp_prog = rcu_dereference(dev->xdp_prog);
if (generic_xdp_prog)
res = generic_xdp_prog->aux->id;
rcu_read_unlock();
generic_xdp_prog = rtnl_dereference(dev->xdp_prog); return res;
if (!generic_xdp_prog)
return 0;
return generic_xdp_prog->aux->id;
} }
static u32 rtnl_xdp_prog_drv(struct net_device *dev) static u32 rtnl_xdp_prog_drv(struct net_device *dev)
...@@ -1603,7 +1605,8 @@ static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev) ...@@ -1603,7 +1605,8 @@ static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev)
upper_dev = netdev_master_upper_dev_get_rcu(dev); upper_dev = netdev_master_upper_dev_get_rcu(dev);
if (upper_dev) if (upper_dev)
ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex); ret = nla_put_u32(skb, IFLA_MASTER,
READ_ONCE(upper_dev->ifindex));
rcu_read_unlock(); rcu_read_unlock();
return ret; return ret;
...@@ -1736,10 +1739,10 @@ static int rtnl_fill_proto_down(struct sk_buff *skb, ...@@ -1736,10 +1739,10 @@ static int rtnl_fill_proto_down(struct sk_buff *skb,
struct nlattr *pr; struct nlattr *pr;
u32 preason; u32 preason;
if (nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) if (nla_put_u8(skb, IFLA_PROTO_DOWN, READ_ONCE(dev->proto_down)))
goto nla_put_failure; goto nla_put_failure;
preason = dev->proto_down_reason; preason = READ_ONCE(dev->proto_down_reason);
if (!preason) if (!preason)
return 0; return 0;
...@@ -1812,6 +1815,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, ...@@ -1812,6 +1815,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
u32 event, int *new_nsid, int new_ifindex, u32 event, int *new_nsid, int new_ifindex,
int tgt_netnsid, gfp_t gfp) int tgt_netnsid, gfp_t gfp)
{ {
char devname[IFNAMSIZ];
struct ifinfomsg *ifm; struct ifinfomsg *ifm;
struct nlmsghdr *nlh; struct nlmsghdr *nlh;
struct Qdisc *qdisc; struct Qdisc *qdisc;
...@@ -1824,41 +1828,51 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, ...@@ -1824,41 +1828,51 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
ifm = nlmsg_data(nlh); ifm = nlmsg_data(nlh);
ifm->ifi_family = AF_UNSPEC; ifm->ifi_family = AF_UNSPEC;
ifm->__ifi_pad = 0; ifm->__ifi_pad = 0;
ifm->ifi_type = dev->type; ifm->ifi_type = READ_ONCE(dev->type);
ifm->ifi_index = dev->ifindex; ifm->ifi_index = READ_ONCE(dev->ifindex);
ifm->ifi_flags = dev_get_flags(dev); ifm->ifi_flags = dev_get_flags(dev);
ifm->ifi_change = change; ifm->ifi_change = change;
if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid)) if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid))
goto nla_put_failure; goto nla_put_failure;
qdisc = rtnl_dereference(dev->qdisc); netdev_copy_name(dev, devname);
if (nla_put_string(skb, IFLA_IFNAME, dev->name) || if (nla_put_string(skb, IFLA_IFNAME, devname))
nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) || goto nla_put_failure;
if (nla_put_u32(skb, IFLA_TXQLEN, READ_ONCE(dev->tx_queue_len)) ||
nla_put_u8(skb, IFLA_OPERSTATE, nla_put_u8(skb, IFLA_OPERSTATE,
netif_running(dev) ? dev->operstate : IF_OPER_DOWN) || netif_running(dev) ? READ_ONCE(dev->operstate) :
nla_put_u8(skb, IFLA_LINKMODE, dev->link_mode) || IF_OPER_DOWN) ||
nla_put_u32(skb, IFLA_MTU, dev->mtu) || nla_put_u8(skb, IFLA_LINKMODE, READ_ONCE(dev->link_mode)) ||
nla_put_u32(skb, IFLA_MIN_MTU, dev->min_mtu) || nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) ||
nla_put_u32(skb, IFLA_MAX_MTU, dev->max_mtu) || nla_put_u32(skb, IFLA_MIN_MTU, READ_ONCE(dev->min_mtu)) ||
nla_put_u32(skb, IFLA_GROUP, dev->group) || nla_put_u32(skb, IFLA_MAX_MTU, READ_ONCE(dev->max_mtu)) ||
nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || nla_put_u32(skb, IFLA_GROUP, READ_ONCE(dev->group)) ||
nla_put_u32(skb, IFLA_ALLMULTI, dev->allmulti) || nla_put_u32(skb, IFLA_PROMISCUITY, READ_ONCE(dev->promiscuity)) ||
nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || nla_put_u32(skb, IFLA_ALLMULTI, READ_ONCE(dev->allmulti)) ||
nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) || nla_put_u32(skb, IFLA_NUM_TX_QUEUES,
nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || READ_ONCE(dev->num_tx_queues)) ||
nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) || nla_put_u32(skb, IFLA_GSO_MAX_SEGS,
nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE, dev->gso_ipv4_max_size) || READ_ONCE(dev->gso_max_segs)) ||
nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE, dev->gro_ipv4_max_size) || nla_put_u32(skb, IFLA_GSO_MAX_SIZE,
nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) || READ_ONCE(dev->gso_max_size)) ||
nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) || nla_put_u32(skb, IFLA_GRO_MAX_SIZE,
READ_ONCE(dev->gro_max_size)) ||
nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE,
READ_ONCE(dev->gso_ipv4_max_size)) ||
nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE,
READ_ONCE(dev->gro_ipv4_max_size)) ||
nla_put_u32(skb, IFLA_TSO_MAX_SIZE,
READ_ONCE(dev->tso_max_size)) ||
nla_put_u32(skb, IFLA_TSO_MAX_SEGS,
READ_ONCE(dev->tso_max_segs)) ||
#ifdef CONFIG_RPS #ifdef CONFIG_RPS
nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || nla_put_u32(skb, IFLA_NUM_RX_QUEUES,
READ_ONCE(dev->num_rx_queues)) ||
#endif #endif
put_master_ifindex(skb, dev) || put_master_ifindex(skb, dev) ||
nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) ||
(qdisc &&
nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) ||
nla_put_ifalias(skb, dev) || nla_put_ifalias(skb, dev) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES, nla_put_u32(skb, IFLA_CARRIER_CHANGES,
atomic_read(&dev->carrier_up_count) + atomic_read(&dev->carrier_up_count) +
...@@ -1909,9 +1923,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, ...@@ -1909,9 +1923,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure; goto nla_put_failure;
} }
if (rtnl_fill_link_netnsid(skb, dev, src_net, gfp))
goto nla_put_failure;
if (new_nsid && if (new_nsid &&
nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0) nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0)
goto nla_put_failure; goto nla_put_failure;
...@@ -1924,6 +1935,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, ...@@ -1924,6 +1935,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure; goto nla_put_failure;
rcu_read_lock(); rcu_read_lock();
if (rtnl_fill_link_netnsid(skb, dev, src_net, GFP_ATOMIC))
goto nla_put_failure_rcu;
qdisc = rcu_dereference(dev->qdisc);
if (qdisc && nla_put_string(skb, IFLA_QDISC, qdisc->ops->id))
goto nla_put_failure_rcu;
if (rtnl_fill_link_af(skb, dev, ext_filter_mask)) if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
goto nla_put_failure_rcu; goto nla_put_failure_rcu;
if (rtnl_fill_link_ifmap(skb, dev)) if (rtnl_fill_link_ifmap(skb, dev))
......
...@@ -1120,7 +1120,7 @@ struct net *ip_tunnel_get_link_net(const struct net_device *dev) ...@@ -1120,7 +1120,7 @@ struct net *ip_tunnel_get_link_net(const struct net_device *dev)
{ {
struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel *tunnel = netdev_priv(dev);
return tunnel->net; return READ_ONCE(tunnel->net);
} }
EXPORT_SYMBOL(ip_tunnel_get_link_net); EXPORT_SYMBOL(ip_tunnel_get_link_net);
......
...@@ -2146,7 +2146,7 @@ struct net *ip6_tnl_get_link_net(const struct net_device *dev) ...@@ -2146,7 +2146,7 @@ struct net *ip6_tnl_get_link_net(const struct net_device *dev)
{ {
struct ip6_tnl *tunnel = netdev_priv(dev); struct ip6_tnl *tunnel = netdev_priv(dev);
return tunnel->net; return READ_ONCE(tunnel->net);
} }
EXPORT_SYMBOL(ip6_tnl_get_link_net); EXPORT_SYMBOL(ip6_tnl_get_link_net);
......
...@@ -1334,7 +1334,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, ...@@ -1334,7 +1334,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
* before again attaching a qdisc. * before again attaching a qdisc.
*/ */
if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) { if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; WRITE_ONCE(dev->tx_queue_len, DEFAULT_TX_QUEUE_LEN);
netdev_info(dev, "Caught tx_queue_len zero misconfig\n"); netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
} }
......
...@@ -78,7 +78,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) ...@@ -78,7 +78,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
struct net_device *dev = qdisc_dev(sch); struct net_device *dev = qdisc_dev(sch);
struct teql_sched_data *q = qdisc_priv(sch); struct teql_sched_data *q = qdisc_priv(sch);
if (q->q.qlen < dev->tx_queue_len) { if (q->q.qlen < READ_ONCE(dev->tx_queue_len)) {
__skb_queue_tail(&q->q, skb); __skb_queue_tail(&q->q, skb);
return NET_XMIT_SUCCESS; return NET_XMIT_SUCCESS;
} }
......
...@@ -926,7 +926,7 @@ static struct net *xfrmi_get_link_net(const struct net_device *dev) ...@@ -926,7 +926,7 @@ static struct net *xfrmi_get_link_net(const struct net_device *dev)
{ {
struct xfrm_if *xi = netdev_priv(dev); struct xfrm_if *xi = netdev_priv(dev);
return xi->net; return READ_ONCE(xi->net);
} }
static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = { static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment