Commit 88529176 authored by Jason Wang's avatar Jason Wang Committed by David S. Miller

tuntap: do not zerocopy if iov needs more pages than MAX_SKB_FRAGS

We try to linearize part of the skb when the number of iov is greater than
MAX_SKB_FRAGS. This is not enough since each single vector may occupy more than
one pages, so zerocopy_sg_fromiovec() may still fail and may break the guest
network.

Solve this problem by calculate the pages needed for iov before trying to do
zerocopy and switch to use copy instead of zerocopy if it needs more than
MAX_SKB_FRAGS.

This is done through introducing a new helper to count the pages for iov, and
call uarg->callback() manually when switching from zerocopy to copy to notify
vhost.

We can do further optimization on top.

The bug were introduced from commit 0690899b
(tun: experimental zero copy tx support)

Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: default avatarJason Wang <jasowang@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 87f40dd6
...@@ -1035,6 +1035,29 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, ...@@ -1035,6 +1035,29 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
return 0; return 0;
} }
static unsigned long iov_pages(const struct iovec *iv, int offset,
unsigned long nr_segs)
{
unsigned long seg, base;
int pages = 0, len, size;
while (nr_segs && (offset >= iv->iov_len)) {
offset -= iv->iov_len;
++iv;
--nr_segs;
}
for (seg = 0; seg < nr_segs; seg++) {
base = (unsigned long)iv[seg].iov_base + offset;
len = iv[seg].iov_len - offset;
size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
pages += size;
offset = 0;
}
return pages;
}
/* Get packet from user space buffer */ /* Get packet from user space buffer */
static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
void *msg_control, const struct iovec *iv, void *msg_control, const struct iovec *iv,
...@@ -1082,32 +1105,18 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, ...@@ -1082,32 +1105,18 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
return -EINVAL; return -EINVAL;
} }
if (msg_control) if (msg_control) {
zerocopy = true; /* There are 256 bytes to be copied in skb, so there is
* enough room for skb expand head in case it is used.
if (zerocopy) {
/* Userspace may produce vectors with count greater than
* MAX_SKB_FRAGS, so we need to linearize parts of the skb
* to let the rest of data to be fit in the frags.
*/
if (count > MAX_SKB_FRAGS) {
copylen = iov_length(iv, count - MAX_SKB_FRAGS);
if (copylen < offset)
copylen = 0;
else
copylen -= offset;
} else
copylen = 0;
/* There are 256 bytes to be copied in skb, so there is enough
* room for skb expand head in case it is used.
* The rest of the buffer is mapped from userspace. * The rest of the buffer is mapped from userspace.
*/ */
if (copylen < gso.hdr_len) copylen = gso.hdr_len ? gso.hdr_len : GOODCOPY_LEN;
copylen = gso.hdr_len;
if (!copylen)
copylen = GOODCOPY_LEN;
linear = copylen; linear = copylen;
} else { if (iov_pages(iv, offset + copylen, count) <= MAX_SKB_FRAGS)
zerocopy = true;
}
if (!zerocopy) {
copylen = len; copylen = len;
linear = gso.hdr_len; linear = gso.hdr_len;
} }
...@@ -1121,8 +1130,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, ...@@ -1121,8 +1130,13 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
if (zerocopy) if (zerocopy)
err = zerocopy_sg_from_iovec(skb, iv, offset, count); err = zerocopy_sg_from_iovec(skb, iv, offset, count);
else else {
err = skb_copy_datagram_from_iovec(skb, 0, iv, offset, len); err = skb_copy_datagram_from_iovec(skb, 0, iv, offset, len);
if (!err && msg_control) {
struct ubuf_info *uarg = msg_control;
uarg->callback(uarg, false);
}
}
if (err) { if (err) {
tun->dev->stats.rx_dropped++; tun->dev->stats.rx_dropped++;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment