Commit e0bc8927 authored by Alexei Starovoitov's avatar Alexei Starovoitov

Merge branch 'Generic XDP improvements'

Kumar Kartikeya says:

====================

This small series makes some improvements to generic XDP mode and brings it
closer to native XDP. Patch 1 splits out generic XDP processing into reusable
parts, patch 2 adds pointer friendly wrappers for bitops (not have to cast back
and forth the address of local pointer to unsigned long *), patch 3 implements
generic cpumap support (details in commit) and patch 4 allows devmap bpf prog
execution before generic_xdp_tx is called.

Patch 5 just updates a couple of selftests to adapt to changes in behavior (in
that specifying devmap/cpumap prog fd in generic mode is now allowed).

Changelog:
----------
v5 -> v6
v5: https://lore.kernel.org/bpf/20210701002759.381983-1-memxor@gmail.com
 * Put rcpu->prog check before RCU-bh section to avoid do_softirq (Jesper)

v4 -> v5
v4: https://lore.kernel.org/bpf/20210628114746.129669-1-memxor@gmail.com
 * Add comments and examples for new bitops macros (Alexei)

v3 -> v4
v3: https://lore.kernel.org/bpf/20210622202835.1151230-1-memxor@gmail.com
 * Add detach now that attach of XDP program succeeds (Toke)
 * Clean up the test to use new ASSERT macros

v2 -> v3
v2: https://lore.kernel.org/bpf/20210622195527.1110497-1-memxor@gmail.com
 * list_for_each_entry -> list_for_each_entry_safe (due to deletion of skb)

v1 -> v2
v1: https://lore.kernel.org/bpf/20210620233200.855534-1-memxor@gmail.com
 * Move __ptr_{set,clear,test}_bit to bitops.h (Toke)
   Also changed argument order to match the bit op they wrap.
 * Remove map value size checking functions for cpumap/devmap (Toke)
 * Rework prog run for skb in cpu_map_kthread_run (Toke)
 * Set skb->dev to dst->dev after devmap prog has run
 * Don't set xdp rxq that will be overwritten in cpumap prog run
====================
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
parents a080cdcc 36246d5a
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <asm/types.h> #include <asm/types.h>
#include <linux/bits.h> #include <linux/bits.h>
#include <linux/typecheck.h>
#include <uapi/linux/kernel.h> #include <uapi/linux/kernel.h>
...@@ -253,6 +254,55 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, ...@@ -253,6 +254,55 @@ static __always_inline void __assign_bit(long nr, volatile unsigned long *addr,
__clear_bit(nr, addr); __clear_bit(nr, addr);
} }
/**
* __ptr_set_bit - Set bit in a pointer's value
* @nr: the bit to set
* @addr: the address of the pointer variable
*
* Example:
* void *p = foo();
* __ptr_set_bit(bit, &p);
*/
#define __ptr_set_bit(nr, addr) \
({ \
typecheck_pointer(*(addr)); \
__set_bit(nr, (unsigned long *)(addr)); \
})
/**
* __ptr_clear_bit - Clear bit in a pointer's value
* @nr: the bit to clear
* @addr: the address of the pointer variable
*
* Example:
* void *p = foo();
* __ptr_clear_bit(bit, &p);
*/
#define __ptr_clear_bit(nr, addr) \
({ \
typecheck_pointer(*(addr)); \
__clear_bit(nr, (unsigned long *)(addr)); \
})
/**
* __ptr_test_bit - Test bit in a pointer's value
* @nr: the bit to test
* @addr: the address of the pointer variable
*
* Example:
* void *p = foo();
* if (__ptr_test_bit(bit, &p)) {
* ...
* } else {
* ...
* }
*/
#define __ptr_test_bit(nr, addr) \
({ \
typecheck_pointer(*(addr)); \
test_bit(nr, (unsigned long *)(addr)); \
})
#ifdef __KERNEL__ #ifdef __KERNEL__
#ifndef set_mask_bits #ifndef set_mask_bits
......
...@@ -1508,12 +1508,12 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, ...@@ -1508,12 +1508,12 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
struct bpf_prog *xdp_prog, struct bpf_map *map, struct bpf_prog *xdp_prog, struct bpf_map *map,
bool exclude_ingress); bool exclude_ingress);
bool dev_map_can_have_prog(struct bpf_map *map);
void __cpu_map_flush(void); void __cpu_map_flush(void);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx); struct net_device *dev_rx);
bool cpu_map_prog_allowed(struct bpf_map *map); int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
struct sk_buff *skb);
/* Return map's numa specified by userspace */ /* Return map's numa specified by userspace */
static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
...@@ -1710,6 +1710,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, ...@@ -1710,6 +1710,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu,
return 0; return 0;
} }
static inline int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
struct sk_buff *skb)
{
return -EOPNOTSUPP;
}
static inline bool cpu_map_prog_allowed(struct bpf_map *map) static inline bool cpu_map_prog_allowed(struct bpf_map *map)
{ {
return false; return false;
......
...@@ -3984,6 +3984,8 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) ...@@ -3984,6 +3984,8 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
__dev_kfree_skb_any(skb, SKB_REASON_CONSUMED); __dev_kfree_skb_any(skb, SKB_REASON_CONSUMED);
} }
u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
struct bpf_prog *xdp_prog);
void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog);
int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb);
int netif_rx(struct sk_buff *skb); int netif_rx(struct sk_buff *skb);
......
...@@ -863,8 +863,8 @@ struct sk_buff { ...@@ -863,8 +863,8 @@ struct sk_buff {
__u8 tc_skip_classify:1; __u8 tc_skip_classify:1;
__u8 tc_at_ingress:1; __u8 tc_at_ingress:1;
#endif #endif
#ifdef CONFIG_NET_REDIRECT
__u8 redirected:1; __u8 redirected:1;
#ifdef CONFIG_NET_REDIRECT
__u8 from_ingress:1; __u8 from_ingress:1;
#endif #endif
#ifdef CONFIG_TLS_DEVICE #ifdef CONFIG_TLS_DEVICE
...@@ -4664,17 +4664,13 @@ static inline __wsum lco_csum(struct sk_buff *skb) ...@@ -4664,17 +4664,13 @@ static inline __wsum lco_csum(struct sk_buff *skb)
static inline bool skb_is_redirected(const struct sk_buff *skb) static inline bool skb_is_redirected(const struct sk_buff *skb)
{ {
#ifdef CONFIG_NET_REDIRECT
return skb->redirected; return skb->redirected;
#else
return false;
#endif
} }
static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
{ {
#ifdef CONFIG_NET_REDIRECT
skb->redirected = 1; skb->redirected = 1;
#ifdef CONFIG_NET_REDIRECT
skb->from_ingress = from_ingress; skb->from_ingress = from_ingress;
if (skb->from_ingress) if (skb->from_ingress)
skb->tstamp = 0; skb->tstamp = 0;
...@@ -4683,9 +4679,7 @@ static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) ...@@ -4683,9 +4679,7 @@ static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress)
static inline void skb_reset_redirect(struct sk_buff *skb) static inline void skb_reset_redirect(struct sk_buff *skb)
{ {
#ifdef CONFIG_NET_REDIRECT
skb->redirected = 0; skb->redirected = 0;
#endif
} }
static inline bool skb_csum_is_sctp(struct sk_buff *skb) static inline bool skb_csum_is_sctp(struct sk_buff *skb)
......
...@@ -22,4 +22,13 @@ ...@@ -22,4 +22,13 @@
(void)__tmp; \ (void)__tmp; \
}) })
/*
* Check at compile time that something is a pointer type.
*/
#define typecheck_pointer(x) \
({ typeof(x) __dummy; \
(void)sizeof(*__dummy); \
1; \
})
#endif /* TYPECHECK_H_INCLUDED */ #endif /* TYPECHECK_H_INCLUDED */
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
* netstack, and assigning dedicated CPUs for this stage. This * netstack, and assigning dedicated CPUs for this stage. This
* basically allows for 10G wirespeed pre-filtering via bpf. * basically allows for 10G wirespeed pre-filtering via bpf.
*/ */
#include <linux/bitops.h>
#include <linux/bpf.h> #include <linux/bpf.h>
#include <linux/filter.h> #include <linux/filter.h>
#include <linux/ptr_ring.h> #include <linux/ptr_ring.h>
...@@ -168,6 +169,46 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu) ...@@ -168,6 +169,46 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
} }
} }
static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
struct list_head *listp,
struct xdp_cpumap_stats *stats)
{
struct sk_buff *skb, *tmp;
struct xdp_buff xdp;
u32 act;
int err;
list_for_each_entry_safe(skb, tmp, listp, list) {
act = bpf_prog_run_generic_xdp(skb, &xdp, rcpu->prog);
switch (act) {
case XDP_PASS:
break;
case XDP_REDIRECT:
skb_list_del_init(skb);
err = xdp_do_generic_redirect(skb->dev, skb, &xdp,
rcpu->prog);
if (unlikely(err)) {
kfree_skb(skb);
stats->drop++;
} else {
stats->redirect++;
}
return;
default:
bpf_warn_invalid_xdp_action(act);
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(skb->dev, rcpu->prog, act);
fallthrough;
case XDP_DROP:
skb_list_del_init(skb);
kfree_skb(skb);
stats->drop++;
return;
}
}
}
static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
void **frames, int n, void **frames, int n,
struct xdp_cpumap_stats *stats) struct xdp_cpumap_stats *stats)
...@@ -176,11 +217,6 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, ...@@ -176,11 +217,6 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
struct xdp_buff xdp; struct xdp_buff xdp;
int i, nframes = 0; int i, nframes = 0;
if (!rcpu->prog)
return n;
rcu_read_lock_bh();
xdp_set_return_frame_no_direct(); xdp_set_return_frame_no_direct();
xdp.rxq = &rxq; xdp.rxq = &rxq;
...@@ -227,17 +263,37 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, ...@@ -227,17 +263,37 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu,
} }
} }
xdp_clear_return_frame_no_direct();
return nframes;
}
#define CPUMAP_BATCH 8
static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
int xdp_n, struct xdp_cpumap_stats *stats,
struct list_head *list)
{
int nframes;
if (!rcpu->prog)
return xdp_n;
rcu_read_lock_bh();
nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, xdp_n, stats);
if (stats->redirect) if (stats->redirect)
xdp_do_flush_map(); xdp_do_flush();
xdp_clear_return_frame_no_direct(); if (unlikely(!list_empty(list)))
cpu_map_bpf_prog_run_skb(rcpu, list, stats);
rcu_read_unlock_bh(); /* resched point, may call do_softirq() */ rcu_read_unlock_bh(); /* resched point, may call do_softirq() */
return nframes; return nframes;
} }
#define CPUMAP_BATCH 8
static int cpu_map_kthread_run(void *data) static int cpu_map_kthread_run(void *data)
{ {
...@@ -254,9 +310,9 @@ static int cpu_map_kthread_run(void *data) ...@@ -254,9 +310,9 @@ static int cpu_map_kthread_run(void *data)
struct xdp_cpumap_stats stats = {}; /* zero stats */ struct xdp_cpumap_stats stats = {}; /* zero stats */
unsigned int kmem_alloc_drops = 0, sched = 0; unsigned int kmem_alloc_drops = 0, sched = 0;
gfp_t gfp = __GFP_ZERO | GFP_ATOMIC; gfp_t gfp = __GFP_ZERO | GFP_ATOMIC;
int i, n, m, nframes, xdp_n;
void *frames[CPUMAP_BATCH]; void *frames[CPUMAP_BATCH];
void *skbs[CPUMAP_BATCH]; void *skbs[CPUMAP_BATCH];
int i, n, m, nframes;
LIST_HEAD(list); LIST_HEAD(list);
/* Release CPU reschedule checks */ /* Release CPU reschedule checks */
...@@ -280,9 +336,20 @@ static int cpu_map_kthread_run(void *data) ...@@ -280,9 +336,20 @@ static int cpu_map_kthread_run(void *data)
*/ */
n = __ptr_ring_consume_batched(rcpu->queue, frames, n = __ptr_ring_consume_batched(rcpu->queue, frames,
CPUMAP_BATCH); CPUMAP_BATCH);
for (i = 0; i < n; i++) { for (i = 0, xdp_n = 0; i < n; i++) {
void *f = frames[i]; void *f = frames[i];
struct page *page = virt_to_page(f); struct page *page;
if (unlikely(__ptr_test_bit(0, &f))) {
struct sk_buff *skb = f;
__ptr_clear_bit(0, &skb);
list_add_tail(&skb->list, &list);
continue;
}
frames[xdp_n++] = f;
page = virt_to_page(f);
/* Bring struct page memory area to curr CPU. Read by /* Bring struct page memory area to curr CPU. Read by
* build_skb_around via page_is_pfmemalloc(), and when * build_skb_around via page_is_pfmemalloc(), and when
...@@ -292,7 +359,7 @@ static int cpu_map_kthread_run(void *data) ...@@ -292,7 +359,7 @@ static int cpu_map_kthread_run(void *data)
} }
/* Support running another XDP prog on this CPU */ /* Support running another XDP prog on this CPU */
nframes = cpu_map_bpf_prog_run_xdp(rcpu, frames, n, &stats); nframes = cpu_map_bpf_prog_run(rcpu, frames, xdp_n, &stats, &list);
if (nframes) { if (nframes) {
m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs); m = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, skbs);
if (unlikely(m == 0)) { if (unlikely(m == 0)) {
...@@ -330,12 +397,6 @@ static int cpu_map_kthread_run(void *data) ...@@ -330,12 +397,6 @@ static int cpu_map_kthread_run(void *data)
return 0; return 0;
} }
bool cpu_map_prog_allowed(struct bpf_map *map)
{
return map->map_type == BPF_MAP_TYPE_CPUMAP &&
map->value_size != offsetofend(struct bpf_cpumap_val, qsize);
}
static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd) static int __cpu_map_load_bpf_program(struct bpf_cpu_map_entry *rcpu, int fd)
{ {
struct bpf_prog *prog; struct bpf_prog *prog;
...@@ -701,6 +762,25 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, ...@@ -701,6 +762,25 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
return 0; return 0;
} }
int cpu_map_generic_redirect(struct bpf_cpu_map_entry *rcpu,
struct sk_buff *skb)
{
int ret;
__skb_pull(skb, skb->mac_len);
skb_set_redirected(skb, false);
__ptr_set_bit(0, &skb);
ret = ptr_ring_produce(rcpu->queue, skb);
if (ret < 0)
goto trace;
wake_up_process(rcpu->kthread);
trace:
trace_xdp_cpumap_enqueue(rcpu->map_id, !ret, !!ret, rcpu->cpu);
return ret;
}
void __cpu_map_flush(void) void __cpu_map_flush(void)
{ {
struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list); struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
......
...@@ -322,16 +322,6 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key, ...@@ -322,16 +322,6 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
return -ENOENT; return -ENOENT;
} }
bool dev_map_can_have_prog(struct bpf_map *map)
{
if ((map->map_type == BPF_MAP_TYPE_DEVMAP ||
map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) &&
map->value_size != offsetofend(struct bpf_devmap_val, ifindex))
return true;
return false;
}
static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog, static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
struct xdp_frame **frames, int n, struct xdp_frame **frames, int n,
struct net_device *dev) struct net_device *dev)
...@@ -499,6 +489,37 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, ...@@ -499,6 +489,37 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
return 0; return 0;
} }
static u32 dev_map_bpf_prog_run_skb(struct sk_buff *skb, struct bpf_dtab_netdev *dst)
{
struct xdp_txq_info txq = { .dev = dst->dev };
struct xdp_buff xdp;
u32 act;
if (!dst->xdp_prog)
return XDP_PASS;
__skb_pull(skb, skb->mac_len);
xdp.txq = &txq;
act = bpf_prog_run_generic_xdp(skb, &xdp, dst->xdp_prog);
switch (act) {
case XDP_PASS:
__skb_push(skb, skb->mac_len);
break;
default:
bpf_warn_invalid_xdp_action(act);
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(dst->dev, dst->xdp_prog, act);
fallthrough;
case XDP_DROP:
kfree_skb(skb);
break;
}
return act;
}
int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
struct net_device *dev_rx) struct net_device *dev_rx)
{ {
...@@ -614,6 +635,14 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, ...@@ -614,6 +635,14 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
err = xdp_ok_fwd_dev(dst->dev, skb->len); err = xdp_ok_fwd_dev(dst->dev, skb->len);
if (unlikely(err)) if (unlikely(err))
return err; return err;
/* Redirect has already succeeded semantically at this point, so we just
* return 0 even if packet is dropped. Helper below takes care of
* freeing skb.
*/
if (dev_map_bpf_prog_run_skb(skb, dst) != XDP_PASS)
return 0;
skb->dev = dst->dev; skb->dev = dst->dev;
generic_xdp_tx(skb, xdp_prog); generic_xdp_tx(skb, xdp_prog);
......
...@@ -4744,45 +4744,18 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb) ...@@ -4744,45 +4744,18 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb)
return rxqueue; return rxqueue;
} }
static u32 netif_receive_generic_xdp(struct sk_buff *skb, u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp,
struct xdp_buff *xdp,
struct bpf_prog *xdp_prog) struct bpf_prog *xdp_prog)
{ {
void *orig_data, *orig_data_end, *hard_start; void *orig_data, *orig_data_end, *hard_start;
struct netdev_rx_queue *rxqueue; struct netdev_rx_queue *rxqueue;
u32 metalen, act = XDP_DROP;
bool orig_bcast, orig_host; bool orig_bcast, orig_host;
u32 mac_len, frame_sz; u32 mac_len, frame_sz;
__be16 orig_eth_type; __be16 orig_eth_type;
struct ethhdr *eth; struct ethhdr *eth;
u32 metalen, act;
int off; int off;
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
*/
if (skb_is_redirected(skb))
return XDP_PASS;
/* XDP packets must be linear and must have sufficient headroom
* of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
* native XDP provides, thus we need to do it here as well.
*/
if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
int troom = skb->tail + skb->data_len - skb->end;
/* In case we have to go down the path and also linearize,
* then lets do the pskb_expand_head() work just once here.
*/
if (pskb_expand_head(skb,
hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
goto do_drop;
if (skb_linearize(skb))
goto do_drop;
}
/* The XDP program wants to see the packet starting at the MAC /* The XDP program wants to see the packet starting at the MAC
* header. * header.
*/ */
...@@ -4837,6 +4810,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, ...@@ -4837,6 +4810,13 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
skb->protocol = eth_type_trans(skb, skb->dev); skb->protocol = eth_type_trans(skb, skb->dev);
} }
/* Redirect/Tx gives L2 packet, code that will reuse skb must __skb_pull
* before calling us again on redirect path. We do not call do_redirect
* as we leave that up to the caller.
*
* Caller is responsible for managing lifetime of skb (i.e. calling
* kfree_skb in response to actions it cannot handle/XDP_DROP).
*/
switch (act) { switch (act) {
case XDP_REDIRECT: case XDP_REDIRECT:
case XDP_TX: case XDP_TX:
...@@ -4847,6 +4827,49 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, ...@@ -4847,6 +4827,49 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
if (metalen) if (metalen)
skb_metadata_set(skb, metalen); skb_metadata_set(skb, metalen);
break; break;
}
return act;
}
static u32 netif_receive_generic_xdp(struct sk_buff *skb,
struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
u32 act = XDP_DROP;
/* Reinjected packets coming from act_mirred or similar should
* not get XDP generic processing.
*/
if (skb_is_redirected(skb))
return XDP_PASS;
/* XDP packets must be linear and must have sufficient headroom
* of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
* native XDP provides, thus we need to do it here as well.
*/
if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
skb_headroom(skb) < XDP_PACKET_HEADROOM) {
int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
int troom = skb->tail + skb->data_len - skb->end;
/* In case we have to go down the path and also linearize,
* then lets do the pskb_expand_head() work just once here.
*/
if (pskb_expand_head(skb,
hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
goto do_drop;
if (skb_linearize(skb))
goto do_drop;
}
act = bpf_prog_run_generic_xdp(skb, xdp, xdp_prog);
switch (act) {
case XDP_REDIRECT:
case XDP_TX:
case XDP_PASS:
break;
default: default:
bpf_warn_invalid_xdp_action(act); bpf_warn_invalid_xdp_action(act);
fallthrough; fallthrough;
...@@ -5312,7 +5335,6 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, ...@@ -5312,7 +5335,6 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
ret = NET_RX_DROP; ret = NET_RX_DROP;
goto out; goto out;
} }
skb_reset_mac_len(skb);
} }
if (eth_type_vlan(skb->protocol)) { if (eth_type_vlan(skb->protocol)) {
...@@ -5638,25 +5660,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) ...@@ -5638,25 +5660,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
struct bpf_prog *new = xdp->prog; struct bpf_prog *new = xdp->prog;
int ret = 0; int ret = 0;
if (new) {
u32 i;
mutex_lock(&new->aux->used_maps_mutex);
/* generic XDP does not work with DEVMAPs that can
* have a bpf_prog installed on an entry
*/
for (i = 0; i < new->aux->used_map_cnt; i++) {
if (dev_map_can_have_prog(new->aux->used_maps[i]) ||
cpu_map_prog_allowed(new->aux->used_maps[i])) {
mutex_unlock(&new->aux->used_maps_mutex);
return -EINVAL;
}
}
mutex_unlock(&new->aux->used_maps_mutex);
}
switch (xdp->command) { switch (xdp->command) {
case XDP_SETUP_PROG: case XDP_SETUP_PROG:
rcu_assign_pointer(dev->xdp_prog, new); rcu_assign_pointer(dev->xdp_prog, new);
......
...@@ -4040,8 +4040,12 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, ...@@ -4040,8 +4040,12 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
goto err; goto err;
consume_skb(skb); consume_skb(skb);
break; break;
case BPF_MAP_TYPE_CPUMAP:
err = cpu_map_generic_redirect(fwd, skb);
if (unlikely(err))
goto err;
break;
default: default:
/* TODO: Handle BPF_MAP_TYPE_CPUMAP */
err = -EBADRQC; err = -EBADRQC;
goto err; goto err;
} }
......
...@@ -7,64 +7,53 @@ ...@@ -7,64 +7,53 @@
#define IFINDEX_LO 1 #define IFINDEX_LO 1
void test_xdp_with_cpumap_helpers(void) void test_xdp_cpumap_attach(void)
{ {
struct test_xdp_with_cpumap_helpers *skel; struct test_xdp_with_cpumap_helpers *skel;
struct bpf_prog_info info = {}; struct bpf_prog_info info = {};
__u32 len = sizeof(info);
struct bpf_cpumap_val val = { struct bpf_cpumap_val val = {
.qsize = 192, .qsize = 192,
}; };
__u32 duration = 0, idx = 0;
__u32 len = sizeof(info);
int err, prog_fd, map_fd; int err, prog_fd, map_fd;
__u32 idx = 0;
skel = test_xdp_with_cpumap_helpers__open_and_load(); skel = test_xdp_with_cpumap_helpers__open_and_load();
if (CHECK_FAIL(!skel)) { if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load"))
perror("test_xdp_with_cpumap_helpers__open_and_load");
return; return;
}
/* can not attach program with cpumaps that allow programs
* as xdp generic
*/
prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog); prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE); err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
CHECK(err == 0, "Generic attach of program with 8-byte CPUMAP", if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP"))
"should have failed\n"); goto out_close;
err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
ASSERT_OK(err, "XDP program detach");
prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm); prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
map_fd = bpf_map__fd(skel->maps.cpu_map); map_fd = bpf_map__fd(skel->maps.cpu_map);
err = bpf_obj_get_info_by_fd(prog_fd, &info, &len); err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
if (CHECK_FAIL(err)) if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
goto out_close; goto out_close;
val.bpf_prog.fd = prog_fd; val.bpf_prog.fd = prog_fd;
err = bpf_map_update_elem(map_fd, &idx, &val, 0); err = bpf_map_update_elem(map_fd, &idx, &val, 0);
CHECK(err, "Add program to cpumap entry", "err %d errno %d\n", ASSERT_OK(err, "Add program to cpumap entry");
err, errno);
err = bpf_map_lookup_elem(map_fd, &idx, &val); err = bpf_map_lookup_elem(map_fd, &idx, &val);
CHECK(err, "Read cpumap entry", "err %d errno %d\n", err, errno); ASSERT_OK(err, "Read cpumap entry");
CHECK(info.id != val.bpf_prog.id, "Expected program id in cpumap entry", ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
"expected %u read %u\n", info.id, val.bpf_prog.id);
/* can not attach BPF_XDP_CPUMAP program to a device */ /* can not attach BPF_XDP_CPUMAP program to a device */
err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE); err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
CHECK(err == 0, "Attach of BPF_XDP_CPUMAP program", if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program"))
"should have failed\n"); bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
val.qsize = 192; val.qsize = 192;
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog); val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
err = bpf_map_update_elem(map_fd, &idx, &val, 0); err = bpf_map_update_elem(map_fd, &idx, &val, 0);
CHECK(err == 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry", ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
"should have failed\n");
out_close: out_close:
test_xdp_with_cpumap_helpers__destroy(skel); test_xdp_with_cpumap_helpers__destroy(skel);
} }
void test_xdp_cpumap_attach(void)
{
if (test__start_subtest("cpumap_with_progs"))
test_xdp_with_cpumap_helpers();
}
...@@ -16,50 +16,45 @@ void test_xdp_with_devmap_helpers(void) ...@@ -16,50 +16,45 @@ void test_xdp_with_devmap_helpers(void)
.ifindex = IFINDEX_LO, .ifindex = IFINDEX_LO,
}; };
__u32 len = sizeof(info); __u32 len = sizeof(info);
__u32 duration = 0, idx = 0;
int err, dm_fd, map_fd; int err, dm_fd, map_fd;
__u32 idx = 0;
skel = test_xdp_with_devmap_helpers__open_and_load(); skel = test_xdp_with_devmap_helpers__open_and_load();
if (CHECK_FAIL(!skel)) { if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
perror("test_xdp_with_devmap_helpers__open_and_load");
return; return;
}
/* can not attach program with DEVMAPs that allow programs
* as xdp generic
*/
dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog); dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE); err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
CHECK(err == 0, "Generic attach of program with 8-byte devmap", if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap"))
"should have failed\n"); goto out_close;
err = bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
ASSERT_OK(err, "XDP program detach");
dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm); dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
map_fd = bpf_map__fd(skel->maps.dm_ports); map_fd = bpf_map__fd(skel->maps.dm_ports);
err = bpf_obj_get_info_by_fd(dm_fd, &info, &len); err = bpf_obj_get_info_by_fd(dm_fd, &info, &len);
if (CHECK_FAIL(err)) if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
goto out_close; goto out_close;
val.bpf_prog.fd = dm_fd; val.bpf_prog.fd = dm_fd;
err = bpf_map_update_elem(map_fd, &idx, &val, 0); err = bpf_map_update_elem(map_fd, &idx, &val, 0);
CHECK(err, "Add program to devmap entry", ASSERT_OK(err, "Add program to devmap entry");
"err %d errno %d\n", err, errno);
err = bpf_map_lookup_elem(map_fd, &idx, &val); err = bpf_map_lookup_elem(map_fd, &idx, &val);
CHECK(err, "Read devmap entry", "err %d errno %d\n", err, errno); ASSERT_OK(err, "Read devmap entry");
CHECK(info.id != val.bpf_prog.id, "Expected program id in devmap entry", ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id");
"expected %u read %u\n", info.id, val.bpf_prog.id);
/* can not attach BPF_XDP_DEVMAP program to a device */ /* can not attach BPF_XDP_DEVMAP program to a device */
err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE); err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program", if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program"))
"should have failed\n"); bpf_set_link_xdp_fd(IFINDEX_LO, -1, XDP_FLAGS_SKB_MODE);
val.ifindex = 1; val.ifindex = 1;
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog); val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
err = bpf_map_update_elem(map_fd, &idx, &val, 0); err = bpf_map_update_elem(map_fd, &idx, &val, 0);
CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry", ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry");
"should have failed\n");
out_close: out_close:
test_xdp_with_devmap_helpers__destroy(skel); test_xdp_with_devmap_helpers__destroy(skel);
...@@ -68,12 +63,10 @@ void test_xdp_with_devmap_helpers(void) ...@@ -68,12 +63,10 @@ void test_xdp_with_devmap_helpers(void)
void test_neg_xdp_devmap_helpers(void) void test_neg_xdp_devmap_helpers(void)
{ {
struct test_xdp_devmap_helpers *skel; struct test_xdp_devmap_helpers *skel;
__u32 duration = 0;
skel = test_xdp_devmap_helpers__open_and_load(); skel = test_xdp_devmap_helpers__open_and_load();
if (CHECK(skel, if (!ASSERT_EQ(skel, NULL,
"Load of XDP program accessing egress ifindex without attach type", "Load of XDP program accessing egress ifindex without attach type")) {
"should have failed\n")) {
test_xdp_devmap_helpers__destroy(skel); test_xdp_devmap_helpers__destroy(skel);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment