Commit d1396004 authored by Toshiaki Makita's avatar Toshiaki Makita Committed by Daniel Borkmann

veth: Add XDP TX and REDIRECT

This allows further redirection of xdp_frames like

 NIC   -> veth--veth -> veth--veth
 (XDP)          (XDP)         (XDP)

The intermediate XDP, redirecting packets from NIC to the other veth,
reuses xdp_mem_info from NIC so that page recycling of the NIC works on
the destination veth's XDP.
In this way return_frame is not fully guarded by NAPI, since another
NAPI handler on another cpu may use the same xdp_mem_info concurrently.
Thus disable napi_direct by xdp_set_return_frame_no_direct() during the
NAPI context.

v8:
- Don't use xdp_frame pointer address for data_hard_start of xdp_buff.

v4:
- Use xdp_[set|clear]_return_frame_no_direct() instead of a flag in
  xdp_mem_info.

v3:
- Fix double free when veth_xdp_tx() returns a positive value.
- Convert xdp_xmit and xdp_redir variables into flags.
Signed-off-by: default avatarToshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent 2539650f
...@@ -32,6 +32,10 @@ ...@@ -32,6 +32,10 @@
#define VETH_RING_SIZE 256 #define VETH_RING_SIZE 256
#define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN) #define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN)
/* Separating two types of XDP xmit */
#define VETH_XDP_TX BIT(0)
#define VETH_XDP_REDIR BIT(1)
struct pcpu_vstats { struct pcpu_vstats {
u64 packets; u64 packets;
u64 bytes; u64 bytes;
...@@ -45,6 +49,7 @@ struct veth_priv { ...@@ -45,6 +49,7 @@ struct veth_priv {
struct bpf_prog *_xdp_prog; struct bpf_prog *_xdp_prog;
struct net_device __rcu *peer; struct net_device __rcu *peer;
atomic64_t dropped; atomic64_t dropped;
struct xdp_mem_info xdp_mem;
unsigned requested_headroom; unsigned requested_headroom;
bool rx_notify_masked; bool rx_notify_masked;
struct ptr_ring xdp_ring; struct ptr_ring xdp_ring;
...@@ -317,12 +322,44 @@ static int veth_xdp_xmit(struct net_device *dev, int n, ...@@ -317,12 +322,44 @@ static int veth_xdp_xmit(struct net_device *dev, int n,
return n - drops; return n - drops;
} }
static void veth_xdp_flush(struct net_device *dev)
{
struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
struct net_device *rcv;
rcu_read_lock();
rcv = rcu_dereference(priv->peer);
if (unlikely(!rcv))
goto out;
rcv_priv = netdev_priv(rcv);
/* xdp_ring is initialized on receive side? */
if (unlikely(!rcu_access_pointer(rcv_priv->xdp_prog)))
goto out;
__veth_xdp_flush(rcv_priv);
out:
rcu_read_unlock();
}
static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
{
struct xdp_frame *frame = convert_to_xdp_frame(xdp);
if (unlikely(!frame))
return -EOVERFLOW;
return veth_xdp_xmit(dev, 1, &frame, 0);
}
static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv, static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
struct xdp_frame *frame) struct xdp_frame *frame,
unsigned int *xdp_xmit)
{ {
void *hard_start = frame->data - frame->headroom; void *hard_start = frame->data - frame->headroom;
void *head = hard_start - sizeof(struct xdp_frame); void *head = hard_start - sizeof(struct xdp_frame);
int len = frame->len, delta = 0; int len = frame->len, delta = 0;
struct xdp_frame orig_frame;
struct bpf_prog *xdp_prog; struct bpf_prog *xdp_prog;
unsigned int headroom; unsigned int headroom;
struct sk_buff *skb; struct sk_buff *skb;
...@@ -346,6 +383,29 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv, ...@@ -346,6 +383,29 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
delta = frame->data - xdp.data; delta = frame->data - xdp.data;
len = xdp.data_end - xdp.data; len = xdp.data_end - xdp.data;
break; break;
case XDP_TX:
orig_frame = *frame;
xdp.data_hard_start = head;
xdp.rxq->mem = frame->mem;
if (unlikely(veth_xdp_tx(priv->dev, &xdp) < 0)) {
trace_xdp_exception(priv->dev, xdp_prog, act);
frame = &orig_frame;
goto err_xdp;
}
*xdp_xmit |= VETH_XDP_TX;
rcu_read_unlock();
goto xdp_xmit;
case XDP_REDIRECT:
orig_frame = *frame;
xdp.data_hard_start = head;
xdp.rxq->mem = frame->mem;
if (xdp_do_redirect(priv->dev, &xdp, xdp_prog)) {
frame = &orig_frame;
goto err_xdp;
}
*xdp_xmit |= VETH_XDP_REDIR;
rcu_read_unlock();
goto xdp_xmit;
default: default:
bpf_warn_invalid_xdp_action(act); bpf_warn_invalid_xdp_action(act);
case XDP_ABORTED: case XDP_ABORTED:
...@@ -370,12 +430,13 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv, ...@@ -370,12 +430,13 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_priv *priv,
err_xdp: err_xdp:
rcu_read_unlock(); rcu_read_unlock();
xdp_return_frame(frame); xdp_return_frame(frame);
xdp_xmit:
return NULL; return NULL;
} }
static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv, static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv,
struct sk_buff *skb) struct sk_buff *skb,
unsigned int *xdp_xmit)
{ {
u32 pktlen, headroom, act, metalen; u32 pktlen, headroom, act, metalen;
void *orig_data, *orig_data_end; void *orig_data, *orig_data_end;
...@@ -447,6 +508,26 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv, ...@@ -447,6 +508,26 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv,
switch (act) { switch (act) {
case XDP_PASS: case XDP_PASS:
break; break;
case XDP_TX:
get_page(virt_to_page(xdp.data));
consume_skb(skb);
xdp.rxq->mem = priv->xdp_mem;
if (unlikely(veth_xdp_tx(priv->dev, &xdp) < 0)) {
trace_xdp_exception(priv->dev, xdp_prog, act);
goto err_xdp;
}
*xdp_xmit |= VETH_XDP_TX;
rcu_read_unlock();
goto xdp_xmit;
case XDP_REDIRECT:
get_page(virt_to_page(xdp.data));
consume_skb(skb);
xdp.rxq->mem = priv->xdp_mem;
if (xdp_do_redirect(priv->dev, &xdp, xdp_prog))
goto err_xdp;
*xdp_xmit |= VETH_XDP_REDIR;
rcu_read_unlock();
goto xdp_xmit;
default: default:
bpf_warn_invalid_xdp_action(act); bpf_warn_invalid_xdp_action(act);
case XDP_ABORTED: case XDP_ABORTED:
...@@ -477,9 +558,15 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv, ...@@ -477,9 +558,15 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv,
rcu_read_unlock(); rcu_read_unlock();
kfree_skb(skb); kfree_skb(skb);
return NULL; return NULL;
err_xdp:
rcu_read_unlock();
page_frag_free(xdp.data);
xdp_xmit:
return NULL;
} }
static int veth_xdp_rcv(struct veth_priv *priv, int budget) static int veth_xdp_rcv(struct veth_priv *priv, int budget,
unsigned int *xdp_xmit)
{ {
int i, done = 0; int i, done = 0;
...@@ -490,10 +577,12 @@ static int veth_xdp_rcv(struct veth_priv *priv, int budget) ...@@ -490,10 +577,12 @@ static int veth_xdp_rcv(struct veth_priv *priv, int budget)
if (!ptr) if (!ptr)
break; break;
if (veth_is_xdp_frame(ptr)) if (veth_is_xdp_frame(ptr)) {
skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr)); skb = veth_xdp_rcv_one(priv, veth_ptr_to_xdp(ptr),
else xdp_xmit);
skb = veth_xdp_rcv_skb(priv, ptr); } else {
skb = veth_xdp_rcv_skb(priv, ptr, xdp_xmit);
}
if (skb) if (skb)
napi_gro_receive(&priv->xdp_napi, skb); napi_gro_receive(&priv->xdp_napi, skb);
...@@ -508,9 +597,11 @@ static int veth_poll(struct napi_struct *napi, int budget) ...@@ -508,9 +597,11 @@ static int veth_poll(struct napi_struct *napi, int budget)
{ {
struct veth_priv *priv = struct veth_priv *priv =
container_of(napi, struct veth_priv, xdp_napi); container_of(napi, struct veth_priv, xdp_napi);
unsigned int xdp_xmit = 0;
int done; int done;
done = veth_xdp_rcv(priv, budget); xdp_set_return_frame_no_direct();
done = veth_xdp_rcv(priv, budget, &xdp_xmit);
if (done < budget && napi_complete_done(napi, done)) { if (done < budget && napi_complete_done(napi, done)) {
/* Write rx_notify_masked before reading ptr_ring */ /* Write rx_notify_masked before reading ptr_ring */
...@@ -521,6 +612,12 @@ static int veth_poll(struct napi_struct *napi, int budget) ...@@ -521,6 +612,12 @@ static int veth_poll(struct napi_struct *napi, int budget)
} }
} }
if (xdp_xmit & VETH_XDP_TX)
veth_xdp_flush(priv->dev);
if (xdp_xmit & VETH_XDP_REDIR)
xdp_do_flush_map();
xdp_clear_return_frame_no_direct();
return done; return done;
} }
...@@ -567,6 +664,9 @@ static int veth_enable_xdp(struct net_device *dev) ...@@ -567,6 +664,9 @@ static int veth_enable_xdp(struct net_device *dev)
err = veth_napi_add(dev); err = veth_napi_add(dev);
if (err) if (err)
goto err; goto err;
/* Save original mem info as it can be overwritten */
priv->xdp_mem = priv->xdp_rxq.mem;
} }
rcu_assign_pointer(priv->xdp_prog, priv->_xdp_prog); rcu_assign_pointer(priv->xdp_prog, priv->_xdp_prog);
...@@ -584,6 +684,7 @@ static void veth_disable_xdp(struct net_device *dev) ...@@ -584,6 +684,7 @@ static void veth_disable_xdp(struct net_device *dev)
rcu_assign_pointer(priv->xdp_prog, NULL); rcu_assign_pointer(priv->xdp_prog, NULL);
veth_napi_del(dev); veth_napi_del(dev);
priv->xdp_rxq.mem = priv->xdp_mem;
xdp_rxq_info_unreg(&priv->xdp_rxq); xdp_rxq_info_unreg(&priv->xdp_rxq);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment