Commit 9cda7807 authored by Toshiaki Makita's avatar Toshiaki Makita Committed by Daniel Borkmann

veth: Support bulk XDP_TX

XDP_TX is similar to XDP_REDIRECT as it essentially redirects packets to
the device itself. XDP_REDIRECT has bulk transmit mechanism to avoid the
heavy cost of indirect call but it also reduces lock acquisition on the
destination device that needs locks like veth and tun.

XDP_TX does not use indirect calls but drivers which require locks can
benefit from the bulk transmit for XDP_TX as well.

This patch introduces bulk transmit mechanism in veth using bulk queue
on stack, and improves XDP_TX performance by about 9%.

Here are single-core/single-flow XDP_TX test results. CPU consumptions
are taken from "perf report --no-child".

- Before:

  7.26 Mpps

  _raw_spin_lock  7.83%
  veth_xdp_xmit  12.23%

- After:

  7.94 Mpps

  _raw_spin_lock  1.08%
  veth_xdp_xmit   6.10%

v2:
- Use stack for bulk queue instead of a global variable.
Signed-off-by: default avatarToshiaki Makita <toshiaki.makita1@gmail.com>
Acked-by: default avatarJesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent e7d47989
...@@ -38,6 +38,8 @@ ...@@ -38,6 +38,8 @@
#define VETH_XDP_TX BIT(0) #define VETH_XDP_TX BIT(0)
#define VETH_XDP_REDIR BIT(1) #define VETH_XDP_REDIR BIT(1)
#define VETH_XDP_TX_BULK_SIZE 16
struct veth_rq_stats { struct veth_rq_stats {
u64 xdp_packets; u64 xdp_packets;
u64 xdp_bytes; u64 xdp_bytes;
...@@ -64,6 +66,11 @@ struct veth_priv { ...@@ -64,6 +66,11 @@ struct veth_priv {
unsigned int requested_headroom; unsigned int requested_headroom;
}; };
struct veth_xdp_tx_bq {
struct xdp_frame *q[VETH_XDP_TX_BULK_SIZE];
unsigned int count;
};
/* /*
* ethtool interface * ethtool interface
*/ */
...@@ -442,13 +449,30 @@ static int veth_xdp_xmit(struct net_device *dev, int n, ...@@ -442,13 +449,30 @@ static int veth_xdp_xmit(struct net_device *dev, int n,
return ret; return ret;
} }
static void veth_xdp_flush(struct net_device *dev) static void veth_xdp_flush_bq(struct net_device *dev, struct veth_xdp_tx_bq *bq)
{
int sent, i, err = 0;
sent = veth_xdp_xmit(dev, bq->count, bq->q, 0);
if (sent < 0) {
err = sent;
sent = 0;
for (i = 0; i < bq->count; i++)
xdp_return_frame(bq->q[i]);
}
trace_xdp_bulk_tx(dev, sent, bq->count - sent, err);
bq->count = 0;
}
static void veth_xdp_flush(struct net_device *dev, struct veth_xdp_tx_bq *bq)
{ {
struct veth_priv *rcv_priv, *priv = netdev_priv(dev); struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
struct net_device *rcv; struct net_device *rcv;
struct veth_rq *rq; struct veth_rq *rq;
rcu_read_lock(); rcu_read_lock();
veth_xdp_flush_bq(dev, bq);
rcv = rcu_dereference(priv->peer); rcv = rcu_dereference(priv->peer);
if (unlikely(!rcv)) if (unlikely(!rcv))
goto out; goto out;
...@@ -464,19 +488,26 @@ static void veth_xdp_flush(struct net_device *dev) ...@@ -464,19 +488,26 @@ static void veth_xdp_flush(struct net_device *dev)
rcu_read_unlock(); rcu_read_unlock();
} }
static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp) static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp,
struct veth_xdp_tx_bq *bq)
{ {
struct xdp_frame *frame = convert_to_xdp_frame(xdp); struct xdp_frame *frame = convert_to_xdp_frame(xdp);
if (unlikely(!frame)) if (unlikely(!frame))
return -EOVERFLOW; return -EOVERFLOW;
return veth_xdp_xmit(dev, 1, &frame, 0); if (unlikely(bq->count == VETH_XDP_TX_BULK_SIZE))
veth_xdp_flush_bq(dev, bq);
bq->q[bq->count++] = frame;
return 0;
} }
static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
struct xdp_frame *frame, struct xdp_frame *frame,
unsigned int *xdp_xmit) unsigned int *xdp_xmit,
struct veth_xdp_tx_bq *bq)
{ {
void *hard_start = frame->data - frame->headroom; void *hard_start = frame->data - frame->headroom;
void *head = hard_start - sizeof(struct xdp_frame); void *head = hard_start - sizeof(struct xdp_frame);
...@@ -509,7 +540,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, ...@@ -509,7 +540,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
orig_frame = *frame; orig_frame = *frame;
xdp.data_hard_start = head; xdp.data_hard_start = head;
xdp.rxq->mem = frame->mem; xdp.rxq->mem = frame->mem;
if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) { if (unlikely(veth_xdp_tx(rq->dev, &xdp, bq) < 0)) {
trace_xdp_exception(rq->dev, xdp_prog, act); trace_xdp_exception(rq->dev, xdp_prog, act);
frame = &orig_frame; frame = &orig_frame;
goto err_xdp; goto err_xdp;
...@@ -560,7 +591,8 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq, ...@@ -560,7 +591,8 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
} }
static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb, static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb,
unsigned int *xdp_xmit) unsigned int *xdp_xmit,
struct veth_xdp_tx_bq *bq)
{ {
u32 pktlen, headroom, act, metalen; u32 pktlen, headroom, act, metalen;
void *orig_data, *orig_data_end; void *orig_data, *orig_data_end;
...@@ -636,7 +668,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb, ...@@ -636,7 +668,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb,
get_page(virt_to_page(xdp.data)); get_page(virt_to_page(xdp.data));
consume_skb(skb); consume_skb(skb);
xdp.rxq->mem = rq->xdp_mem; xdp.rxq->mem = rq->xdp_mem;
if (unlikely(veth_xdp_tx(rq->dev, &xdp) < 0)) { if (unlikely(veth_xdp_tx(rq->dev, &xdp, bq) < 0)) {
trace_xdp_exception(rq->dev, xdp_prog, act); trace_xdp_exception(rq->dev, xdp_prog, act);
goto err_xdp; goto err_xdp;
} }
...@@ -691,7 +723,8 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb, ...@@ -691,7 +723,8 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq, struct sk_buff *skb,
return NULL; return NULL;
} }
static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit) static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit,
struct veth_xdp_tx_bq *bq)
{ {
int i, done = 0, drops = 0, bytes = 0; int i, done = 0, drops = 0, bytes = 0;
...@@ -707,11 +740,11 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit) ...@@ -707,11 +740,11 @@ static int veth_xdp_rcv(struct veth_rq *rq, int budget, unsigned int *xdp_xmit)
struct xdp_frame *frame = veth_ptr_to_xdp(ptr); struct xdp_frame *frame = veth_ptr_to_xdp(ptr);
bytes += frame->len; bytes += frame->len;
skb = veth_xdp_rcv_one(rq, frame, &xdp_xmit_one); skb = veth_xdp_rcv_one(rq, frame, &xdp_xmit_one, bq);
} else { } else {
skb = ptr; skb = ptr;
bytes += skb->len; bytes += skb->len;
skb = veth_xdp_rcv_skb(rq, skb, &xdp_xmit_one); skb = veth_xdp_rcv_skb(rq, skb, &xdp_xmit_one, bq);
} }
*xdp_xmit |= xdp_xmit_one; *xdp_xmit |= xdp_xmit_one;
...@@ -737,10 +770,13 @@ static int veth_poll(struct napi_struct *napi, int budget) ...@@ -737,10 +770,13 @@ static int veth_poll(struct napi_struct *napi, int budget)
struct veth_rq *rq = struct veth_rq *rq =
container_of(napi, struct veth_rq, xdp_napi); container_of(napi, struct veth_rq, xdp_napi);
unsigned int xdp_xmit = 0; unsigned int xdp_xmit = 0;
struct veth_xdp_tx_bq bq;
int done; int done;
bq.count = 0;
xdp_set_return_frame_no_direct(); xdp_set_return_frame_no_direct();
done = veth_xdp_rcv(rq, budget, &xdp_xmit); done = veth_xdp_rcv(rq, budget, &xdp_xmit, &bq);
if (done < budget && napi_complete_done(napi, done)) { if (done < budget && napi_complete_done(napi, done)) {
/* Write rx_notify_masked before reading ptr_ring */ /* Write rx_notify_masked before reading ptr_ring */
...@@ -752,7 +788,7 @@ static int veth_poll(struct napi_struct *napi, int budget) ...@@ -752,7 +788,7 @@ static int veth_poll(struct napi_struct *napi, int budget)
} }
if (xdp_xmit & VETH_XDP_TX) if (xdp_xmit & VETH_XDP_TX)
veth_xdp_flush(rq->dev); veth_xdp_flush(rq->dev, &bq);
if (xdp_xmit & VETH_XDP_REDIR) if (xdp_xmit & VETH_XDP_REDIR)
xdp_do_flush_map(); xdp_do_flush_map();
xdp_clear_return_frame_no_direct(); xdp_clear_return_frame_no_direct();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment