Commit 75ccae62 authored by Toke Høiland-Jørgensen's avatar Toke Høiland-Jørgensen Committed by Alexei Starovoitov

xdp: Move devmap bulk queue into struct net_device

Commit 96360004 ("xdp: Make devmap flush_list common for all map
instances"), changed devmap flushing to be a global operation instead of a
per-map operation. However, the queue structure used for bulking was still
allocated as part of the containing map.

This patch moves the devmap bulk queue into struct net_device. The
motivation for this is reusing it for the non-map variant of XDP_REDIRECT,
which will be changed in a subsequent commit.  To avoid other fields of
struct net_device moving to different cache lines, we also move a couple of
other members around.

We defer the actual allocation of the bulk queue structure until the
NETDEV_REGISTER notification devmap.c. This makes it possible to check for
ndo_xdp_xmit support before allocating the structure, which is not possible
at the time struct net_device is allocated. However, we keep the freeing in
free_netdev() to avoid adding another RCU callback on NETDEV_UNREGISTER.

Because of this change, we lose the reference back to the map that
originated the redirect, so change the tracepoint to always return 0 as the
map ID and index. Otherwise no functional change is intended with this
patch.

After this patch, the relevant part of struct net_device looks like this,
according to pahole:

	/* --- cacheline 14 boundary (896 bytes) --- */
	struct netdev_queue *      _tx __attribute__((__aligned__(64))); /*   896     8 */
	unsigned int               num_tx_queues;        /*   904     4 */
	unsigned int               real_num_tx_queues;   /*   908     4 */
	struct Qdisc *             qdisc;                /*   912     8 */
	unsigned int               tx_queue_len;         /*   920     4 */
	spinlock_t                 tx_global_lock;       /*   924     4 */
	struct xdp_dev_bulk_queue * xdp_bulkq;           /*   928     8 */
	struct xps_dev_maps *      xps_cpus_map;         /*   936     8 */
	struct xps_dev_maps *      xps_rxqs_map;         /*   944     8 */
	struct mini_Qdisc *        miniq_egress;         /*   952     8 */
	/* --- cacheline 15 boundary (960 bytes) --- */
	struct hlist_head  qdisc_hash[16];               /*   960   128 */
	/* --- cacheline 17 boundary (1088 bytes) --- */
	struct timer_list  watchdog_timer;               /*  1088    40 */

	/* XXX last struct has 4 bytes of padding */

	int                        watchdog_timeo;       /*  1128     4 */

	/* XXX 4 bytes hole, try to pack */

	struct list_head   todo_list;                    /*  1136    16 */
	/* --- cacheline 18 boundary (1152 bytes) --- */
Signed-off-by: default avatarToke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Acked-by: default avatarBjörn Töpel <bjorn.topel@intel.com>
Acked-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/157918768397.1458396.12673224324627072349.stgit@toke.dk
parent 20f21d98
...@@ -876,6 +876,7 @@ enum bpf_netdev_command { ...@@ -876,6 +876,7 @@ enum bpf_netdev_command {
struct bpf_prog_offload_ops; struct bpf_prog_offload_ops;
struct netlink_ext_ack; struct netlink_ext_ack;
struct xdp_umem; struct xdp_umem;
struct xdp_dev_bulk_queue;
struct netdev_bpf { struct netdev_bpf {
enum bpf_netdev_command command; enum bpf_netdev_command command;
...@@ -1986,12 +1987,10 @@ struct net_device { ...@@ -1986,12 +1987,10 @@ struct net_device {
unsigned int num_tx_queues; unsigned int num_tx_queues;
unsigned int real_num_tx_queues; unsigned int real_num_tx_queues;
struct Qdisc *qdisc; struct Qdisc *qdisc;
#ifdef CONFIG_NET_SCHED
DECLARE_HASHTABLE (qdisc_hash, 4);
#endif
unsigned int tx_queue_len; unsigned int tx_queue_len;
spinlock_t tx_global_lock; spinlock_t tx_global_lock;
int watchdog_timeo;
struct xdp_dev_bulk_queue __percpu *xdp_bulkq;
#ifdef CONFIG_XPS #ifdef CONFIG_XPS
struct xps_dev_maps __rcu *xps_cpus_map; struct xps_dev_maps __rcu *xps_cpus_map;
...@@ -2001,11 +2000,15 @@ struct net_device { ...@@ -2001,11 +2000,15 @@ struct net_device {
struct mini_Qdisc __rcu *miniq_egress; struct mini_Qdisc __rcu *miniq_egress;
#endif #endif
#ifdef CONFIG_NET_SCHED
DECLARE_HASHTABLE (qdisc_hash, 4);
#endif
/* These may be needed for future network-power-down code. */ /* These may be needed for future network-power-down code. */
struct timer_list watchdog_timer; struct timer_list watchdog_timer;
int watchdog_timeo;
int __percpu *pcpu_refcnt;
struct list_head todo_list; struct list_head todo_list;
int __percpu *pcpu_refcnt;
struct list_head link_watch_list; struct list_head link_watch_list;
......
...@@ -278,7 +278,7 @@ TRACE_EVENT(xdp_devmap_xmit, ...@@ -278,7 +278,7 @@ TRACE_EVENT(xdp_devmap_xmit,
), ),
TP_fast_assign( TP_fast_assign(
__entry->map_id = map->id; __entry->map_id = map ? map->id : 0;
__entry->act = XDP_REDIRECT; __entry->act = XDP_REDIRECT;
__entry->map_index = map_index; __entry->map_index = map_index;
__entry->drops = drops; __entry->drops = drops;
......
...@@ -53,13 +53,11 @@ ...@@ -53,13 +53,11 @@
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
#define DEV_MAP_BULK_SIZE 16 #define DEV_MAP_BULK_SIZE 16
struct bpf_dtab_netdev; struct xdp_dev_bulk_queue {
struct xdp_bulk_queue {
struct xdp_frame *q[DEV_MAP_BULK_SIZE]; struct xdp_frame *q[DEV_MAP_BULK_SIZE];
struct list_head flush_node; struct list_head flush_node;
struct net_device *dev;
struct net_device *dev_rx; struct net_device *dev_rx;
struct bpf_dtab_netdev *obj;
unsigned int count; unsigned int count;
}; };
...@@ -67,9 +65,8 @@ struct bpf_dtab_netdev { ...@@ -67,9 +65,8 @@ struct bpf_dtab_netdev {
struct net_device *dev; /* must be first member, due to tracepoint */ struct net_device *dev; /* must be first member, due to tracepoint */
struct hlist_node index_hlist; struct hlist_node index_hlist;
struct bpf_dtab *dtab; struct bpf_dtab *dtab;
struct xdp_bulk_queue __percpu *bulkq;
struct rcu_head rcu; struct rcu_head rcu;
unsigned int idx; /* keep track of map index for tracepoint */ unsigned int idx;
}; };
struct bpf_dtab { struct bpf_dtab {
...@@ -219,7 +216,6 @@ static void dev_map_free(struct bpf_map *map) ...@@ -219,7 +216,6 @@ static void dev_map_free(struct bpf_map *map)
hlist_for_each_entry_safe(dev, next, head, index_hlist) { hlist_for_each_entry_safe(dev, next, head, index_hlist) {
hlist_del_rcu(&dev->index_hlist); hlist_del_rcu(&dev->index_hlist);
free_percpu(dev->bulkq);
dev_put(dev->dev); dev_put(dev->dev);
kfree(dev); kfree(dev);
} }
...@@ -234,7 +230,6 @@ static void dev_map_free(struct bpf_map *map) ...@@ -234,7 +230,6 @@ static void dev_map_free(struct bpf_map *map)
if (!dev) if (!dev)
continue; continue;
free_percpu(dev->bulkq);
dev_put(dev->dev); dev_put(dev->dev);
kfree(dev); kfree(dev);
} }
...@@ -320,10 +315,9 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key, ...@@ -320,10 +315,9 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
return -ENOENT; return -ENOENT;
} }
static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags) static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
{ {
struct bpf_dtab_netdev *obj = bq->obj; struct net_device *dev = bq->dev;
struct net_device *dev = obj->dev;
int sent = 0, drops = 0, err = 0; int sent = 0, drops = 0, err = 0;
int i; int i;
...@@ -346,8 +340,7 @@ static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags) ...@@ -346,8 +340,7 @@ static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags)
out: out:
bq->count = 0; bq->count = 0;
trace_xdp_devmap_xmit(&obj->dtab->map, obj->idx, trace_xdp_devmap_xmit(NULL, 0, sent, drops, bq->dev_rx, dev, err);
sent, drops, bq->dev_rx, dev, err);
bq->dev_rx = NULL; bq->dev_rx = NULL;
__list_del_clearprev(&bq->flush_node); __list_del_clearprev(&bq->flush_node);
return 0; return 0;
...@@ -374,7 +367,7 @@ static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags) ...@@ -374,7 +367,7 @@ static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags)
void __dev_map_flush(void) void __dev_map_flush(void)
{ {
struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list); struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
struct xdp_bulk_queue *bq, *tmp; struct xdp_dev_bulk_queue *bq, *tmp;
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) list_for_each_entry_safe(bq, tmp, flush_list, flush_node)
...@@ -401,12 +394,12 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key) ...@@ -401,12 +394,12 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
/* Runs under RCU-read-side, plus in softirq under NAPI protection. /* Runs under RCU-read-side, plus in softirq under NAPI protection.
* Thus, safe percpu variable access. * Thus, safe percpu variable access.
*/ */
static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf, static int bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
struct net_device *dev_rx) struct net_device *dev_rx)
{ {
struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list); struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq); struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
bq_xmit_all(bq, 0); bq_xmit_all(bq, 0);
...@@ -444,7 +437,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, ...@@ -444,7 +437,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
if (unlikely(!xdpf)) if (unlikely(!xdpf))
return -EOVERFLOW; return -EOVERFLOW;
return bq_enqueue(dst, xdpf, dev_rx); return bq_enqueue(dev, xdpf, dev_rx);
} }
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
...@@ -483,7 +476,6 @@ static void __dev_map_entry_free(struct rcu_head *rcu) ...@@ -483,7 +476,6 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
struct bpf_dtab_netdev *dev; struct bpf_dtab_netdev *dev;
dev = container_of(rcu, struct bpf_dtab_netdev, rcu); dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
free_percpu(dev->bulkq);
dev_put(dev->dev); dev_put(dev->dev);
kfree(dev); kfree(dev);
} }
...@@ -538,30 +530,15 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, ...@@ -538,30 +530,15 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
u32 ifindex, u32 ifindex,
unsigned int idx) unsigned int idx)
{ {
gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
struct bpf_dtab_netdev *dev; struct bpf_dtab_netdev *dev;
struct xdp_bulk_queue *bq;
int cpu;
dev = kmalloc_node(sizeof(*dev), gfp, dtab->map.numa_node); dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
dtab->map.numa_node);
if (!dev) if (!dev)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
sizeof(void *), gfp);
if (!dev->bulkq) {
kfree(dev);
return ERR_PTR(-ENOMEM);
}
for_each_possible_cpu(cpu) {
bq = per_cpu_ptr(dev->bulkq, cpu);
bq->obj = dev;
}
dev->dev = dev_get_by_index(net, ifindex); dev->dev = dev_get_by_index(net, ifindex);
if (!dev->dev) { if (!dev->dev) {
free_percpu(dev->bulkq);
kfree(dev); kfree(dev);
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
...@@ -721,9 +698,23 @@ static int dev_map_notification(struct notifier_block *notifier, ...@@ -721,9 +698,23 @@ static int dev_map_notification(struct notifier_block *notifier,
{ {
struct net_device *netdev = netdev_notifier_info_to_dev(ptr); struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
struct bpf_dtab *dtab; struct bpf_dtab *dtab;
int i; int i, cpu;
switch (event) { switch (event) {
case NETDEV_REGISTER:
if (!netdev->netdev_ops->ndo_xdp_xmit || netdev->xdp_bulkq)
break;
/* will be freed in free_netdev() */
netdev->xdp_bulkq =
__alloc_percpu_gfp(sizeof(struct xdp_dev_bulk_queue),
sizeof(void *), GFP_ATOMIC);
if (!netdev->xdp_bulkq)
return NOTIFY_BAD;
for_each_possible_cpu(cpu)
per_cpu_ptr(netdev->xdp_bulkq, cpu)->dev = netdev;
break;
case NETDEV_UNREGISTER: case NETDEV_UNREGISTER:
/* This rcu_read_lock/unlock pair is needed because /* This rcu_read_lock/unlock pair is needed because
* dev_map_list is an RCU list AND to ensure a delete * dev_map_list is an RCU list AND to ensure a delete
......
...@@ -9847,6 +9847,8 @@ void free_netdev(struct net_device *dev) ...@@ -9847,6 +9847,8 @@ void free_netdev(struct net_device *dev)
free_percpu(dev->pcpu_refcnt); free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL; dev->pcpu_refcnt = NULL;
free_percpu(dev->xdp_bulkq);
dev->xdp_bulkq = NULL;
netdev_unregister_lockdep_key(dev); netdev_unregister_lockdep_key(dev);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment