Commit 40ea3ee2 authored by David S. Miller's avatar David S. Miller

Merge branch 'virtio-net-xdp-multi-buffer'

Heng Qi says:

====================
virtio-net: support multi buffer xdp

Changes since PATCH v4:
- Make netdev_warn() in [PATCH 2/10] independent from [PATCH 3/10].

Changes since PATCH v3:
- Separate fix patch [2/10] for MTU calculation of single buffer xdp.
  Note that this patch needs to be backported to the stable branch.

Changes since PATCH v2:
- Even if single buffer xdp has a hole mechanism, there will be no
  problem (limiting mtu and turning off GUEST GSO), so there is no
  need to backport "[PATCH 1/9]";
- Modify calculation of MTU for single buffer xdp in virtnet_xdp_set();
- Make truesize in mergeable mode return to literal meaning;
- Add some comments for legibility;

Changes since RFC:
- Using headroom instead of vi->xdp_enabled to avoid re-reading
  in add_recvbuf_mergeable();
- Disable GRO_HW and keep linearization for single buffer xdp;
- Renamed to virtnet_build_xdp_buff_mrg();
- pr_debug() to netdev_dbg();
- Adjusted the order of the patch series.

Currently, virtio net only supports xdp for single-buffer packets
or linearized multi-buffer packets. This patchset supports xdp for
multi-buffer packets, then larger MTU can be used if xdp sets the
xdp.frags. This does not affect single buffer handling.

In order to build multi-buffer xdp neatly, we integrated the code
into virtnet_build_xdp_buff_mrg() for xdp. The first buffer is used
for prepared xdp buff, and the rest of the buffers are added to
its skb_shared_info structure. This structure can also be
conveniently converted during XDP_PASS to get the corresponding skb.

Since virtio net uses comp pages, and bpf_xdp_frags_increase_tail()
is based on the assumption of the page pool,
(rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag))
is negative in most cases. So we didn't set xdp_rxq->frag_size in
virtnet_open() to disable the tail increase.

====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 5129bd8e fab89baf
...@@ -446,9 +446,7 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx) ...@@ -446,9 +446,7 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
static struct sk_buff *page_to_skb(struct virtnet_info *vi, static struct sk_buff *page_to_skb(struct virtnet_info *vi,
struct receive_queue *rq, struct receive_queue *rq,
struct page *page, unsigned int offset, struct page *page, unsigned int offset,
unsigned int len, unsigned int truesize, unsigned int len, unsigned int truesize)
bool hdr_valid, unsigned int metasize,
unsigned int headroom)
{ {
struct sk_buff *skb; struct sk_buff *skb;
struct virtio_net_hdr_mrg_rxbuf *hdr; struct virtio_net_hdr_mrg_rxbuf *hdr;
...@@ -466,21 +464,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, ...@@ -466,21 +464,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
else else
hdr_padded_len = sizeof(struct padded_vnet_hdr); hdr_padded_len = sizeof(struct padded_vnet_hdr);
/* If headroom is not 0, there is an offset between the beginning of the buf = p;
* data and the allocated space, otherwise the data and the allocated
* space are aligned.
*
* Buffers with headroom use PAGE_SIZE as alloc size, see
* add_recvbuf_mergeable() + get_mergeable_buf_len()
*/
truesize = headroom ? PAGE_SIZE : truesize;
tailroom = truesize - headroom;
buf = p - headroom;
len -= hdr_len; len -= hdr_len;
offset += hdr_padded_len; offset += hdr_padded_len;
p += hdr_padded_len; p += hdr_padded_len;
tailroom -= hdr_padded_len + len; tailroom = truesize - hdr_padded_len - len;
shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
...@@ -510,7 +498,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, ...@@ -510,7 +498,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
if (len <= skb_tailroom(skb)) if (len <= skb_tailroom(skb))
copy = len; copy = len;
else else
copy = ETH_HLEN + metasize; copy = ETH_HLEN;
skb_put_data(skb, p, copy); skb_put_data(skb, p, copy);
len -= copy; len -= copy;
...@@ -549,19 +537,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, ...@@ -549,19 +537,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
give_pages(rq, page); give_pages(rq, page);
ok: ok:
/* hdr_valid means no XDP, so we can copy the vnet header */ hdr = skb_vnet_hdr(skb);
if (hdr_valid) { memcpy(hdr, hdr_p, hdr_len);
hdr = skb_vnet_hdr(skb);
memcpy(hdr, hdr_p, hdr_len);
}
if (page_to_free) if (page_to_free)
put_page(page_to_free); put_page(page_to_free);
if (metasize) {
__skb_pull(skb, metasize);
skb_metadata_set(skb, metasize);
}
return skb; return skb;
} }
...@@ -570,22 +550,43 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, ...@@ -570,22 +550,43 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
struct xdp_frame *xdpf) struct xdp_frame *xdpf)
{ {
struct virtio_net_hdr_mrg_rxbuf *hdr; struct virtio_net_hdr_mrg_rxbuf *hdr;
int err; struct skb_shared_info *shinfo;
u8 nr_frags = 0;
int err, i;
if (unlikely(xdpf->headroom < vi->hdr_len)) if (unlikely(xdpf->headroom < vi->hdr_len))
return -EOVERFLOW; return -EOVERFLOW;
/* Make room for virtqueue hdr (also change xdpf->headroom?) */ if (unlikely(xdp_frame_has_frags(xdpf))) {
shinfo = xdp_get_shared_info_from_frame(xdpf);
nr_frags = shinfo->nr_frags;
}
/* In wrapping function virtnet_xdp_xmit(), we need to free
* up the pending old buffers, where we need to calculate the
* position of skb_shared_info in xdp_get_frame_len() and
* xdp_return_frame(), which will involve to xdpf->data and
* xdpf->headroom. Therefore, we need to update the value of
* headroom synchronously here.
*/
xdpf->headroom -= vi->hdr_len;
xdpf->data -= vi->hdr_len; xdpf->data -= vi->hdr_len;
/* Zero header and leave csum up to XDP layers */ /* Zero header and leave csum up to XDP layers */
hdr = xdpf->data; hdr = xdpf->data;
memset(hdr, 0, vi->hdr_len); memset(hdr, 0, vi->hdr_len);
xdpf->len += vi->hdr_len; xdpf->len += vi->hdr_len;
sg_init_one(sq->sg, xdpf->data, xdpf->len); sg_init_table(sq->sg, nr_frags + 1);
sg_set_buf(sq->sg, xdpf->data, xdpf->len);
for (i = 0; i < nr_frags; i++) {
skb_frag_t *frag = &shinfo->frags[i];
sg_set_page(&sq->sg[i + 1], skb_frag_page(frag),
skb_frag_size(frag), skb_frag_off(frag));
}
err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf), err = virtqueue_add_outbuf(sq->vq, sq->sg, nr_frags + 1,
GFP_ATOMIC); xdp_to_ptr(xdpf), GFP_ATOMIC);
if (unlikely(err)) if (unlikely(err))
return -ENOSPC; /* Caller handle free/refcnt */ return -ENOSPC; /* Caller handle free/refcnt */
...@@ -665,7 +666,7 @@ static int virtnet_xdp_xmit(struct net_device *dev, ...@@ -665,7 +666,7 @@ static int virtnet_xdp_xmit(struct net_device *dev,
if (likely(is_xdp_frame(ptr))) { if (likely(is_xdp_frame(ptr))) {
struct xdp_frame *frame = ptr_to_xdp(ptr); struct xdp_frame *frame = ptr_to_xdp(ptr);
bytes += frame->len; bytes += xdp_get_frame_len(frame);
xdp_return_frame(frame); xdp_return_frame(frame);
} else { } else {
struct sk_buff *skb = ptr; struct sk_buff *skb = ptr;
...@@ -924,7 +925,7 @@ static struct sk_buff *receive_big(struct net_device *dev, ...@@ -924,7 +925,7 @@ static struct sk_buff *receive_big(struct net_device *dev,
{ {
struct page *page = buf; struct page *page = buf;
struct sk_buff *skb = struct sk_buff *skb =
page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0, 0); page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
stats->bytes += len - vi->hdr_len; stats->bytes += len - vi->hdr_len;
if (unlikely(!skb)) if (unlikely(!skb))
...@@ -938,6 +939,140 @@ static struct sk_buff *receive_big(struct net_device *dev, ...@@ -938,6 +939,140 @@ static struct sk_buff *receive_big(struct net_device *dev,
return NULL; return NULL;
} }
/* Why not use xdp_build_skb_from_frame() ?
* XDP core assumes that xdp frags are PAGE_SIZE in length, while in
* virtio-net there are 2 points that do not match its requirements:
* 1. The size of the prefilled buffer is not fixed before xdp is set.
* 2. xdp_build_skb_from_frame() does more checks that we don't need,
* like eth_type_trans() (which virtio-net does in receive_buf()).
*/
static struct sk_buff *build_skb_from_xdp_buff(struct net_device *dev,
struct virtnet_info *vi,
struct xdp_buff *xdp,
unsigned int xdp_frags_truesz)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
unsigned int headroom, data_len;
struct sk_buff *skb;
int metasize;
u8 nr_frags;
if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) {
pr_debug("Error building skb as missing reserved tailroom for xdp");
return NULL;
}
if (unlikely(xdp_buff_has_frags(xdp)))
nr_frags = sinfo->nr_frags;
skb = build_skb(xdp->data_hard_start, xdp->frame_sz);
if (unlikely(!skb))
return NULL;
headroom = xdp->data - xdp->data_hard_start;
data_len = xdp->data_end - xdp->data;
skb_reserve(skb, headroom);
__skb_put(skb, data_len);
metasize = xdp->data - xdp->data_meta;
metasize = metasize > 0 ? metasize : 0;
if (metasize)
skb_metadata_set(skb, metasize);
if (unlikely(xdp_buff_has_frags(xdp)))
xdp_update_skb_shared_info(skb, nr_frags,
sinfo->xdp_frags_size,
xdp_frags_truesz,
xdp_buff_is_frag_pfmemalloc(xdp));
return skb;
}
/* TODO: build xdp in big mode */
static int virtnet_build_xdp_buff_mrg(struct net_device *dev,
struct virtnet_info *vi,
struct receive_queue *rq,
struct xdp_buff *xdp,
void *buf,
unsigned int len,
unsigned int frame_sz,
u16 *num_buf,
unsigned int *xdp_frags_truesize,
struct virtnet_rq_stats *stats)
{
struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
unsigned int headroom, tailroom, room;
unsigned int truesize, cur_frag_size;
struct skb_shared_info *shinfo;
unsigned int xdp_frags_truesz = 0;
struct page *page;
skb_frag_t *frag;
int offset;
void *ctx;
xdp_init_buff(xdp, frame_sz, &rq->xdp_rxq);
xdp_prepare_buff(xdp, buf - VIRTIO_XDP_HEADROOM,
VIRTIO_XDP_HEADROOM + vi->hdr_len, len - vi->hdr_len, true);
if (*num_buf > 1) {
/* If we want to build multi-buffer xdp, we need
* to specify that the flags of xdp_buff have the
* XDP_FLAGS_HAS_FRAG bit.
*/
if (!xdp_buff_has_frags(xdp))
xdp_buff_set_frags_flag(xdp);
shinfo = xdp_get_shared_info_from_buff(xdp);
shinfo->nr_frags = 0;
shinfo->xdp_frags_size = 0;
}
if ((*num_buf - 1) > MAX_SKB_FRAGS)
return -EINVAL;
while ((--*num_buf) >= 1) {
buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
if (unlikely(!buf)) {
pr_debug("%s: rx error: %d buffers out of %d missing\n",
dev->name, *num_buf,
virtio16_to_cpu(vi->vdev, hdr->num_buffers));
dev->stats.rx_length_errors++;
return -EINVAL;
}
stats->bytes += len;
page = virt_to_head_page(buf);
offset = buf - page_address(page);
truesize = mergeable_ctx_to_truesize(ctx);
headroom = mergeable_ctx_to_headroom(ctx);
tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
room = SKB_DATA_ALIGN(headroom + tailroom);
cur_frag_size = truesize;
xdp_frags_truesz += cur_frag_size;
if (unlikely(len > truesize - room || cur_frag_size > PAGE_SIZE)) {
put_page(page);
pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
dev->name, len, (unsigned long)(truesize - room));
dev->stats.rx_length_errors++;
return -EINVAL;
}
frag = &shinfo->frags[shinfo->nr_frags++];
__skb_frag_set_page(frag, page);
skb_frag_off_set(frag, offset);
skb_frag_size_set(frag, len);
if (page_is_pfmemalloc(page))
xdp_buff_set_frag_pfmemalloc(xdp);
shinfo->xdp_frags_size += len;
}
*xdp_frags_truesize = xdp_frags_truesz;
return 0;
}
static struct sk_buff *receive_mergeable(struct net_device *dev, static struct sk_buff *receive_mergeable(struct net_device *dev,
struct virtnet_info *vi, struct virtnet_info *vi,
struct receive_queue *rq, struct receive_queue *rq,
...@@ -955,16 +1090,17 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -955,16 +1090,17 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
struct bpf_prog *xdp_prog; struct bpf_prog *xdp_prog;
unsigned int truesize = mergeable_ctx_to_truesize(ctx); unsigned int truesize = mergeable_ctx_to_truesize(ctx);
unsigned int headroom = mergeable_ctx_to_headroom(ctx); unsigned int headroom = mergeable_ctx_to_headroom(ctx);
unsigned int metasize = 0; unsigned int tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
unsigned int frame_sz; unsigned int room = SKB_DATA_ALIGN(headroom + tailroom);
unsigned int frame_sz, xdp_room;
int err; int err;
head_skb = NULL; head_skb = NULL;
stats->bytes += len - vi->hdr_len; stats->bytes += len - vi->hdr_len;
if (unlikely(len > truesize)) { if (unlikely(len > truesize - room)) {
pr_debug("%s: rx error: len %u exceeds truesize %lu\n", pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
dev->name, len, (unsigned long)ctx); dev->name, len, (unsigned long)(truesize - room));
dev->stats.rx_length_errors++; dev->stats.rx_length_errors++;
goto err_skb; goto err_skb;
} }
...@@ -977,11 +1113,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -977,11 +1113,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
rcu_read_lock(); rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog); xdp_prog = rcu_dereference(rq->xdp_prog);
if (xdp_prog) { if (xdp_prog) {
unsigned int xdp_frags_truesz = 0;
struct skb_shared_info *shinfo;
struct xdp_frame *xdpf; struct xdp_frame *xdpf;
struct page *xdp_page; struct page *xdp_page;
struct xdp_buff xdp; struct xdp_buff xdp;
void *data; void *data;
u32 act; u32 act;
int i;
/* Transient failure which in theory could occur if /* Transient failure which in theory could occur if
* in-flight packets from before XDP was enabled reach * in-flight packets from before XDP was enabled reach
...@@ -990,19 +1129,23 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -990,19 +1129,23 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
if (unlikely(hdr->hdr.gso_type)) if (unlikely(hdr->hdr.gso_type))
goto err_xdp; goto err_xdp;
/* Buffers with headroom use PAGE_SIZE as alloc size, /* Now XDP core assumes frag size is PAGE_SIZE, but buffers
* see add_recvbuf_mergeable() + get_mergeable_buf_len() * with headroom may add hole in truesize, which
* make their length exceed PAGE_SIZE. So we disabled the
* hole mechanism for xdp. See add_recvbuf_mergeable().
*/ */
frame_sz = headroom ? PAGE_SIZE : truesize; frame_sz = truesize;
/* This happens when rx buffer size is underestimated /* This happens when headroom is not enough because
* or headroom is not enough because of the buffer * of the buffer was prefilled before XDP is set.
* was refilled before XDP is set. This should only * This should only happen for the first several packets.
* happen for the first several packets, so we don't * In fact, vq reset can be used here to help us clean up
* care much about its performance. * the prefilled buffers, but many existing devices do not
* support it, and we don't want to bother users who are
* using xdp normally.
*/ */
if (unlikely(num_buf > 1 || if (!xdp_prog->aux->xdp_has_frags &&
headroom < virtnet_get_headroom(vi))) { (num_buf > 1 || headroom < virtnet_get_headroom(vi))) {
/* linearize data for XDP */ /* linearize data for XDP */
xdp_page = xdp_linearize_page(rq, &num_buf, xdp_page = xdp_linearize_page(rq, &num_buf,
page, offset, page, offset,
...@@ -1013,82 +1156,53 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -1013,82 +1156,53 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
if (!xdp_page) if (!xdp_page)
goto err_xdp; goto err_xdp;
offset = VIRTIO_XDP_HEADROOM; offset = VIRTIO_XDP_HEADROOM;
} else if (unlikely(headroom < virtnet_get_headroom(vi))) {
xdp_room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM +
sizeof(struct skb_shared_info));
if (len + xdp_room > PAGE_SIZE)
goto err_xdp;
xdp_page = alloc_page(GFP_ATOMIC);
if (!xdp_page)
goto err_xdp;
memcpy(page_address(xdp_page) + VIRTIO_XDP_HEADROOM,
page_address(page) + offset, len);
frame_sz = PAGE_SIZE;
offset = VIRTIO_XDP_HEADROOM;
} else { } else {
xdp_page = page; xdp_page = page;
} }
/* Allow consuming headroom but reserve enough space to push
* the descriptor on if we get an XDP_TX return code.
*/
data = page_address(xdp_page) + offset; data = page_address(xdp_page) + offset;
xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq); err = virtnet_build_xdp_buff_mrg(dev, vi, rq, &xdp, data, len, frame_sz,
xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM + vi->hdr_len, &num_buf, &xdp_frags_truesz, stats);
VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true); if (unlikely(err))
goto err_xdp_frags;
act = bpf_prog_run_xdp(xdp_prog, &xdp); act = bpf_prog_run_xdp(xdp_prog, &xdp);
stats->xdp_packets++; stats->xdp_packets++;
switch (act) { switch (act) {
case XDP_PASS: case XDP_PASS:
metasize = xdp.data - xdp.data_meta; if (unlikely(xdp_page != page))
/* recalculate offset to account for any header
* adjustments and minus the metasize to copy the
* metadata in page_to_skb(). Note other cases do not
* build an skb and avoid using offset
*/
offset = xdp.data - page_address(xdp_page) -
vi->hdr_len - metasize;
/* recalculate len if xdp.data, xdp.data_end or
* xdp.data_meta were adjusted
*/
len = xdp.data_end - xdp.data + vi->hdr_len + metasize;
/* recalculate headroom if xdp.data or xdp_data_meta
* were adjusted, note that offset should always point
* to the start of the reserved bytes for virtio_net
* header which are followed by xdp.data, that means
* that offset is equal to the headroom (when buf is
* starting at the beginning of the page, otherwise
* there is a base offset inside the page) but it's used
* with a different starting point (buf start) than
* xdp.data (buf start + vnet hdr size). If xdp.data or
* data_meta were adjusted by the xdp prog then the
* headroom size has changed and so has the offset, we
* can use data_hard_start, which points at buf start +
* vnet hdr size, to calculate the new headroom and use
* it later to compute buf start in page_to_skb()
*/
headroom = xdp.data - xdp.data_hard_start - metasize;
/* We can only create skb based on xdp_page. */
if (unlikely(xdp_page != page)) {
rcu_read_unlock();
put_page(page); put_page(page);
head_skb = page_to_skb(vi, rq, xdp_page, offset, head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz);
len, PAGE_SIZE, false, rcu_read_unlock();
metasize, return head_skb;
headroom);
return head_skb;
}
break;
case XDP_TX: case XDP_TX:
stats->xdp_tx++; stats->xdp_tx++;
xdpf = xdp_convert_buff_to_frame(&xdp); xdpf = xdp_convert_buff_to_frame(&xdp);
if (unlikely(!xdpf)) { if (unlikely(!xdpf)) {
if (unlikely(xdp_page != page)) netdev_dbg(dev, "convert buff to frame failed for xdp\n");
put_page(xdp_page); goto err_xdp_frags;
goto err_xdp;
} }
err = virtnet_xdp_xmit(dev, 1, &xdpf, 0); err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
if (unlikely(!err)) { if (unlikely(!err)) {
xdp_return_frame_rx_napi(xdpf); xdp_return_frame_rx_napi(xdpf);
} else if (unlikely(err < 0)) { } else if (unlikely(err < 0)) {
trace_xdp_exception(vi->dev, xdp_prog, act); trace_xdp_exception(vi->dev, xdp_prog, act);
if (unlikely(xdp_page != page)) goto err_xdp_frags;
put_page(xdp_page);
goto err_xdp;
} }
*xdp_xmit |= VIRTIO_XDP_TX; *xdp_xmit |= VIRTIO_XDP_TX;
if (unlikely(xdp_page != page)) if (unlikely(xdp_page != page))
...@@ -1098,11 +1212,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -1098,11 +1212,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
case XDP_REDIRECT: case XDP_REDIRECT:
stats->xdp_redirects++; stats->xdp_redirects++;
err = xdp_do_redirect(dev, &xdp, xdp_prog); err = xdp_do_redirect(dev, &xdp, xdp_prog);
if (err) { if (err)
if (unlikely(xdp_page != page)) goto err_xdp_frags;
put_page(xdp_page);
goto err_xdp;
}
*xdp_xmit |= VIRTIO_XDP_REDIR; *xdp_xmit |= VIRTIO_XDP_REDIR;
if (unlikely(xdp_page != page)) if (unlikely(xdp_page != page))
put_page(page); put_page(page);
...@@ -1115,16 +1226,26 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -1115,16 +1226,26 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
trace_xdp_exception(vi->dev, xdp_prog, act); trace_xdp_exception(vi->dev, xdp_prog, act);
fallthrough; fallthrough;
case XDP_DROP: case XDP_DROP:
if (unlikely(xdp_page != page)) goto err_xdp_frags;
__free_pages(xdp_page, 0);
goto err_xdp;
} }
err_xdp_frags:
if (unlikely(xdp_page != page))
__free_pages(xdp_page, 0);
if (xdp_buff_has_frags(&xdp)) {
shinfo = xdp_get_shared_info_from_buff(&xdp);
for (i = 0; i < shinfo->nr_frags; i++) {
xdp_page = skb_frag_page(&shinfo->frags[i]);
put_page(xdp_page);
}
}
goto err_xdp;
} }
rcu_read_unlock(); rcu_read_unlock();
skip_xdp: skip_xdp:
head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
metasize, headroom);
curr_skb = head_skb; curr_skb = head_skb;
if (unlikely(!curr_skb)) if (unlikely(!curr_skb))
...@@ -1146,9 +1267,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, ...@@ -1146,9 +1267,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
page = virt_to_head_page(buf); page = virt_to_head_page(buf);
truesize = mergeable_ctx_to_truesize(ctx); truesize = mergeable_ctx_to_truesize(ctx);
if (unlikely(len > truesize)) { headroom = mergeable_ctx_to_headroom(ctx);
tailroom = headroom ? sizeof(struct skb_shared_info) : 0;
room = SKB_DATA_ALIGN(headroom + tailroom);
if (unlikely(len > truesize - room)) {
pr_debug("%s: rx error: len %u exceeds truesize %lu\n", pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
dev->name, len, (unsigned long)ctx); dev->name, len, (unsigned long)(truesize - room));
dev->stats.rx_length_errors++; dev->stats.rx_length_errors++;
goto err_skb; goto err_skb;
} }
...@@ -1426,13 +1550,16 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, ...@@ -1426,13 +1550,16 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
/* To avoid internal fragmentation, if there is very likely not /* To avoid internal fragmentation, if there is very likely not
* enough space for another buffer, add the remaining space to * enough space for another buffer, add the remaining space to
* the current buffer. * the current buffer.
* XDP core assumes that frame_size of xdp_buff and the length
* of the frag are PAGE_SIZE, so we disable the hole mechanism.
*/ */
len += hole; if (!headroom)
len += hole;
alloc_frag->offset += hole; alloc_frag->offset += hole;
} }
sg_init_one(rq->sg, buf, len); sg_init_one(rq->sg, buf, len);
ctx = mergeable_len_to_ctx(len, headroom); ctx = mergeable_len_to_ctx(len + room, headroom);
err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
if (err < 0) if (err < 0)
put_page(virt_to_head_page(buf)); put_page(virt_to_head_page(buf));
...@@ -1608,7 +1735,7 @@ static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) ...@@ -1608,7 +1735,7 @@ static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
} else { } else {
struct xdp_frame *frame = ptr_to_xdp(ptr); struct xdp_frame *frame = ptr_to_xdp(ptr);
bytes += frame->len; bytes += xdp_get_frame_len(frame);
xdp_return_frame(frame); xdp_return_frame(frame);
} }
packets++; packets++;
...@@ -3078,7 +3205,9 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi) ...@@ -3078,7 +3205,9 @@ static int virtnet_restore_guest_offloads(struct virtnet_info *vi)
static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
struct netlink_ext_ack *extack) struct netlink_ext_ack *extack)
{ {
unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr); unsigned int room = SKB_DATA_ALIGN(VIRTIO_XDP_HEADROOM +
sizeof(struct skb_shared_info));
unsigned int max_sz = PAGE_SIZE - room - ETH_HLEN;
struct virtnet_info *vi = netdev_priv(dev); struct virtnet_info *vi = netdev_priv(dev);
struct bpf_prog *old_prog; struct bpf_prog *old_prog;
u16 xdp_qp = 0, curr_qp; u16 xdp_qp = 0, curr_qp;
...@@ -3101,9 +3230,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, ...@@ -3101,9 +3230,9 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
return -EINVAL; return -EINVAL;
} }
if (dev->mtu > max_sz) { if (prog && !prog->aux->xdp_has_frags && dev->mtu > max_sz) {
NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP"); NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP without frags");
netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz); netdev_warn(dev, "single-buffer XDP requires MTU less than %u\n", max_sz);
return -EINVAL; return -EINVAL;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment