Commit 5bdc312c authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'net-store-netdevs-in-an-xarray'

Jakub Kicinski says:

====================
net: store netdevs in an xarray

One of more annoying developer experience gaps we have in netlink
is iterating over netdevs. It's painful. Add an xarray to make
it trivial.

v1: https://lore.kernel.org/all/20230722014237.4078962-1-kuba@kernel.org/
====================

Link: https://lore.kernel.org/r/20230726185530.2247698-1-kuba@kernel.orgSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents 083476a2 84e00d9b
...@@ -3016,6 +3016,9 @@ extern rwlock_t dev_base_lock; /* Device list lock */ ...@@ -3016,6 +3016,9 @@ extern rwlock_t dev_base_lock; /* Device list lock */
if (netdev_master_upper_dev_get_rcu(slave) == (bond)) if (netdev_master_upper_dev_get_rcu(slave) == (bond))
#define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list)
#define for_each_netdev_dump(net, d, ifindex) \
xa_for_each_start(&(net)->dev_by_index, (ifindex), (d), (ifindex))
static inline struct net_device *next_net_device(struct net_device *dev) static inline struct net_device *next_net_device(struct net_device *dev)
{ {
struct list_head *lh; struct list_head *lh;
......
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include <linux/idr.h> #include <linux/idr.h>
#include <linux/skbuff.h> #include <linux/skbuff.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/xarray.h>
struct user_namespace; struct user_namespace;
struct proc_dir_entry; struct proc_dir_entry;
...@@ -69,7 +70,7 @@ struct net { ...@@ -69,7 +70,7 @@ struct net {
atomic_t dev_unreg_count; atomic_t dev_unreg_count;
unsigned int dev_base_seq; /* protected by rtnl_mutex */ unsigned int dev_base_seq; /* protected by rtnl_mutex */
int ifindex; u32 ifindex;
spinlock_t nsid_lock; spinlock_t nsid_lock;
atomic_t fnhe_genid; atomic_t fnhe_genid;
...@@ -110,6 +111,7 @@ struct net { ...@@ -110,6 +111,7 @@ struct net {
struct hlist_head *dev_name_head; struct hlist_head *dev_name_head;
struct hlist_head *dev_index_head; struct hlist_head *dev_index_head;
struct xarray dev_by_index;
struct raw_notifier_head netdev_chain; struct raw_notifier_head netdev_chain;
/* Note that @hash_mix can be read millions times per second, /* Note that @hash_mix can be read millions times per second,
......
...@@ -388,6 +388,8 @@ static void list_netdevice(struct net_device *dev) ...@@ -388,6 +388,8 @@ static void list_netdevice(struct net_device *dev)
hlist_add_head_rcu(&dev->index_hlist, hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex)); dev_index_hash(net, dev->ifindex));
write_unlock(&dev_base_lock); write_unlock(&dev_base_lock);
/* We reserved the ifindex, this can't fail */
WARN_ON(xa_store(&net->dev_by_index, dev->ifindex, dev, GFP_KERNEL));
dev_base_seq_inc(net); dev_base_seq_inc(net);
} }
...@@ -397,8 +399,12 @@ static void list_netdevice(struct net_device *dev) ...@@ -397,8 +399,12 @@ static void list_netdevice(struct net_device *dev)
*/ */
static void unlist_netdevice(struct net_device *dev, bool lock) static void unlist_netdevice(struct net_device *dev, bool lock)
{ {
struct net *net = dev_net(dev);
ASSERT_RTNL(); ASSERT_RTNL();
xa_erase(&net->dev_by_index, dev->ifindex);
/* Unlink dev from the device chain */ /* Unlink dev from the device chain */
if (lock) if (lock)
write_lock(&dev_base_lock); write_lock(&dev_base_lock);
...@@ -9565,23 +9571,35 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, ...@@ -9565,23 +9571,35 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
} }
/** /**
* dev_new_index - allocate an ifindex * dev_index_reserve() - allocate an ifindex in a namespace
* @net: the applicable net namespace * @net: the applicable net namespace
* @ifindex: requested ifindex, pass %0 to get one allocated
*
* Allocate a ifindex for a new device. Caller must either use the ifindex
* to store the device (via list_netdevice()) or call dev_index_release()
* to give the index up.
* *
* Returns a suitable unique value for a new device interface * Return: a suitable unique value for a new device interface number or -errno.
* number. The caller must hold the rtnl semaphore or the
* dev_base_lock to be sure it remains unique.
*/ */
static int dev_new_index(struct net *net) static int dev_index_reserve(struct net *net, u32 ifindex)
{ {
int ifindex = net->ifindex; int err;
for (;;) { if (!ifindex)
if (++ifindex <= 0) err = xa_alloc_cyclic(&net->dev_by_index, &ifindex, NULL,
ifindex = 1; xa_limit_31b, &net->ifindex, GFP_KERNEL);
if (!__dev_get_by_index(net, ifindex)) else
return net->ifindex = ifindex; err = xa_insert(&net->dev_by_index, ifindex, NULL, GFP_KERNEL);
} if (err < 0)
return err;
return ifindex;
}
static void dev_index_release(struct net *net, int ifindex)
{
/* Expect only unused indexes, unlist_netdevice() removes the used */
WARN_ON(xa_erase(&net->dev_by_index, ifindex));
} }
/* Delayed registration/unregisteration */ /* Delayed registration/unregisteration */
...@@ -10051,11 +10069,10 @@ int register_netdevice(struct net_device *dev) ...@@ -10051,11 +10069,10 @@ int register_netdevice(struct net_device *dev)
goto err_uninit; goto err_uninit;
} }
ret = -EBUSY; ret = dev_index_reserve(net, dev->ifindex);
if (!dev->ifindex) if (ret < 0)
dev->ifindex = dev_new_index(net);
else if (__dev_get_by_index(net, dev->ifindex))
goto err_uninit; goto err_uninit;
dev->ifindex = ret;
/* Transfer changeable features to wanted_features and enable /* Transfer changeable features to wanted_features and enable
* software offloads (GSO and GRO). * software offloads (GSO and GRO).
...@@ -10102,7 +10119,7 @@ int register_netdevice(struct net_device *dev) ...@@ -10102,7 +10119,7 @@ int register_netdevice(struct net_device *dev)
ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
ret = notifier_to_errno(ret); ret = notifier_to_errno(ret);
if (ret) if (ret)
goto err_uninit; goto err_ifindex_release;
ret = netdev_register_kobject(dev); ret = netdev_register_kobject(dev);
write_lock(&dev_base_lock); write_lock(&dev_base_lock);
...@@ -10158,6 +10175,8 @@ int register_netdevice(struct net_device *dev) ...@@ -10158,6 +10175,8 @@ int register_netdevice(struct net_device *dev)
err_uninit_notify: err_uninit_notify:
call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev); call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev);
err_ifindex_release:
dev_index_release(net, dev->ifindex);
err_uninit: err_uninit:
if (dev->netdev_ops->ndo_uninit) if (dev->netdev_ops->ndo_uninit)
dev->netdev_ops->ndo_uninit(dev); dev->netdev_ops->ndo_uninit(dev);
...@@ -11035,9 +11054,19 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, ...@@ -11035,9 +11054,19 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
} }
/* Check that new_ifindex isn't used yet. */ /* Check that new_ifindex isn't used yet. */
err = -EBUSY; if (new_ifindex) {
if (new_ifindex && __dev_get_by_index(net, new_ifindex)) err = dev_index_reserve(net, new_ifindex);
if (err < 0)
goto out; goto out;
} else {
/* If there is an ifindex conflict assign a new one */
err = dev_index_reserve(net, dev->ifindex);
if (err == -EBUSY)
err = dev_index_reserve(net, 0);
if (err < 0)
goto out;
new_ifindex = err;
}
/* /*
* And now a mini version of register_netdevice unregister_netdevice. * And now a mini version of register_netdevice unregister_netdevice.
...@@ -11065,13 +11094,6 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net, ...@@ -11065,13 +11094,6 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
rcu_barrier(); rcu_barrier();
new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL); new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
/* If there is an ifindex conflict assign a new one */
if (!new_ifindex) {
if (__dev_get_by_index(net, dev->ifindex))
new_ifindex = dev_new_index(net);
else
new_ifindex = dev->ifindex;
}
rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid, rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
new_ifindex); new_ifindex);
...@@ -11249,6 +11271,9 @@ static int __net_init netdev_init(struct net *net) ...@@ -11249,6 +11271,9 @@ static int __net_init netdev_init(struct net *net)
if (net->dev_index_head == NULL) if (net->dev_index_head == NULL)
goto err_idx; goto err_idx;
net->ifindex = 1;
xa_init_flags(&net->dev_by_index, XA_FLAGS_ALLOC);
RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain); RAW_INIT_NOTIFIER_HEAD(&net->netdev_chain);
return 0; return 0;
...@@ -11346,6 +11371,7 @@ static void __net_exit netdev_exit(struct net *net) ...@@ -11346,6 +11371,7 @@ static void __net_exit netdev_exit(struct net *net)
{ {
kfree(net->dev_name_head); kfree(net->dev_name_head);
kfree(net->dev_index_head); kfree(net->dev_index_head);
xa_destroy(&net->dev_by_index);
if (net != &init_net) if (net != &init_net)
WARN_ON_ONCE(!list_empty(&net->dev_base_head)); WARN_ON_ONCE(!list_empty(&net->dev_base_head));
} }
......
...@@ -101,43 +101,22 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -101,43 +101,22 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{ {
struct net *net = sock_net(skb->sk); struct net *net = sock_net(skb->sk);
struct net_device *netdev; struct net_device *netdev;
int idx = 0, s_idx; int err = 0;
int h, s_h;
int err;
s_h = cb->args[0];
s_idx = cb->args[1];
rtnl_lock(); rtnl_lock();
for_each_netdev_dump(net, netdev, cb->args[0]) {
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
struct hlist_head *head;
idx = 0;
head = &net->dev_index_head[h];
hlist_for_each_entry(netdev, head, index_hlist) {
if (idx < s_idx)
goto cont;
err = netdev_nl_dev_fill(netdev, skb, err = netdev_nl_dev_fill(netdev, skb,
NETLINK_CB(cb->skb).portid, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, 0, cb->nlh->nlmsg_seq, 0,
NETDEV_CMD_DEV_GET); NETDEV_CMD_DEV_GET);
if (err < 0) if (err < 0)
break; break;
cont:
idx++;
} }
}
rtnl_unlock(); rtnl_unlock();
if (err != -EMSGSIZE) if (err != -EMSGSIZE)
return err; return err;
cb->args[1] = idx;
cb->args[0] = h;
cb->seq = net->dev_base_seq;
return skb->len; return skb->len;
} }
......
...@@ -252,8 +252,7 @@ int ethnl_multicast(struct sk_buff *skb, struct net_device *dev) ...@@ -252,8 +252,7 @@ int ethnl_multicast(struct sk_buff *skb, struct net_device *dev)
* @ops: request ops of currently processed message type * @ops: request ops of currently processed message type
* @req_info: parsed request header of processed request * @req_info: parsed request header of processed request
* @reply_data: data needed to compose the reply * @reply_data: data needed to compose the reply
* @pos_hash: saved iteration position - hashbucket * @pos_ifindex: saved iteration position - ifindex
* @pos_idx: saved iteration position - index
* *
* These parameters are kept in struct netlink_callback as context preserved * These parameters are kept in struct netlink_callback as context preserved
* between iterations. They are initialized by ethnl_default_start() and used * between iterations. They are initialized by ethnl_default_start() and used
...@@ -263,8 +262,7 @@ struct ethnl_dump_ctx { ...@@ -263,8 +262,7 @@ struct ethnl_dump_ctx {
const struct ethnl_request_ops *ops; const struct ethnl_request_ops *ops;
struct ethnl_req_info *req_info; struct ethnl_req_info *req_info;
struct ethnl_reply_data *reply_data; struct ethnl_reply_data *reply_data;
int pos_hash; unsigned long pos_ifindex;
int pos_idx;
}; };
static const struct ethnl_request_ops * static const struct ethnl_request_ops *
...@@ -490,55 +488,27 @@ static int ethnl_default_dumpit(struct sk_buff *skb, ...@@ -490,55 +488,27 @@ static int ethnl_default_dumpit(struct sk_buff *skb,
{ {
struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb); struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);
struct net *net = sock_net(skb->sk); struct net *net = sock_net(skb->sk);
int s_idx = ctx->pos_idx; struct net_device *dev;
int h, idx = 0;
int ret = 0; int ret = 0;
rtnl_lock(); rtnl_lock();
for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { for_each_netdev_dump(net, dev, ctx->pos_ifindex) {
struct hlist_head *head;
struct net_device *dev;
unsigned int seq;
head = &net->dev_index_head[h];
restart_chain:
seq = net->dev_base_seq;
cb->seq = seq;
idx = 0;
hlist_for_each_entry(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
dev_hold(dev); dev_hold(dev);
rtnl_unlock(); rtnl_unlock();
ret = ethnl_default_dump_one(skb, dev, ctx, cb); ret = ethnl_default_dump_one(skb, dev, ctx, cb);
rtnl_lock();
dev_put(dev); dev_put(dev);
if (ret < 0) {
if (ret == -EOPNOTSUPP) if (ret < 0 && ret != -EOPNOTSUPP) {
goto lock_and_cont;
if (likely(skb->len)) if (likely(skb->len))
ret = skb->len; ret = skb->len;
goto out; break;
}
lock_and_cont:
rtnl_lock();
if (net->dev_base_seq != seq) {
s_idx = idx + 1;
goto restart_chain;
} }
cont:
idx++;
}
} }
rtnl_unlock(); rtnl_unlock();
out:
ctx->pos_hash = h;
ctx->pos_idx = idx;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
return ret; return ret;
} }
...@@ -584,8 +554,7 @@ static int ethnl_default_start(struct netlink_callback *cb) ...@@ -584,8 +554,7 @@ static int ethnl_default_start(struct netlink_callback *cb)
ctx->ops = ops; ctx->ops = ops;
ctx->req_info = req_info; ctx->req_info = req_info;
ctx->reply_data = reply_data; ctx->reply_data = reply_data;
ctx->pos_hash = 0; ctx->pos_ifindex = 0;
ctx->pos_idx = 0;
return 0; return 0;
......
...@@ -212,8 +212,7 @@ int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info) ...@@ -212,8 +212,7 @@ int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info)
struct ethnl_tunnel_info_dump_ctx { struct ethnl_tunnel_info_dump_ctx {
struct ethnl_req_info req_info; struct ethnl_req_info req_info;
int pos_hash; unsigned long ifindex;
int pos_idx;
}; };
int ethnl_tunnel_info_start(struct netlink_callback *cb) int ethnl_tunnel_info_start(struct netlink_callback *cb)
...@@ -243,34 +242,24 @@ int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -243,34 +242,24 @@ int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{ {
struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx; struct ethnl_tunnel_info_dump_ctx *ctx = (void *)cb->ctx;
struct net *net = sock_net(skb->sk); struct net *net = sock_net(skb->sk);
int s_idx = ctx->pos_idx; struct net_device *dev;
int h, idx = 0;
int ret = 0; int ret = 0;
void *ehdr; void *ehdr;
rtnl_lock(); rtnl_lock();
cb->seq = net->dev_base_seq; for_each_netdev_dump(net, dev, ctx->ifindex) {
for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
struct hlist_head *head;
struct net_device *dev;
head = &net->dev_index_head[h];
idx = 0;
hlist_for_each_entry(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
ehdr = ethnl_dump_put(skb, cb, ehdr = ethnl_dump_put(skb, cb,
ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY); ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY);
if (!ehdr) { if (!ehdr) {
ret = -EMSGSIZE; ret = -EMSGSIZE;
goto out; break;
} }
ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_TUNNEL_INFO_HEADER); ret = ethnl_fill_reply_header(skb, dev,
ETHTOOL_A_TUNNEL_INFO_HEADER);
if (ret < 0) { if (ret < 0) {
genlmsg_cancel(skb, ehdr); genlmsg_cancel(skb, ehdr);
goto out; break;
} }
ctx->req_info.dev = dev; ctx->req_info.dev = dev;
...@@ -279,21 +268,13 @@ int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb) ...@@ -279,21 +268,13 @@ int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
if (ret < 0) { if (ret < 0) {
genlmsg_cancel(skb, ehdr); genlmsg_cancel(skb, ehdr);
if (ret == -EOPNOTSUPP) if (ret == -EOPNOTSUPP)
goto cont; continue;
goto out; break;
} }
genlmsg_end(skb, ehdr); genlmsg_end(skb, ehdr);
cont:
idx++;
} }
}
out:
rtnl_unlock(); rtnl_unlock();
ctx->pos_hash = h;
ctx->pos_idx = idx;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
if (ret == -EMSGSIZE && skb->len) if (ret == -EMSGSIZE && skb->len)
return skb->len; return skb->len;
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment