Commit e8b18af8 authored by David S. Miller's avatar David S. Miller

Merge branch 'XDP-transmission-for-tuntap'

Jason Wang says:

====================
XDP transmission for tuntap

This series tries to implement XDP transmission (ndo_xdp_xmit) for
tuntap. Pointer ring was used for queuing both XDP buffers and
sk_buff, this is done by encoding the type into lowest bit of the
pointer and storin XDP metadata in the headroom of XDP buff.

Tests gets 3.05 Mpps when doing xdp_redirect_map from ixgbe to VM
(testpmd + virtio-net in guest). This gives us ~20% improvments
compared to use skb during redirect.

Please review.

Changes from V1:

- slient warnings
- fix typos
- add skb mode number in the commit log
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents a0ce0931 fc72d1d5
...@@ -330,7 +330,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) ...@@ -330,7 +330,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
if (!q) if (!q)
return RX_HANDLER_PASS; return RX_HANDLER_PASS;
if (__skb_array_full(&q->skb_array)) if (__ptr_ring_full(&q->ring))
goto drop; goto drop;
skb_push(skb, ETH_HLEN); skb_push(skb, ETH_HLEN);
...@@ -348,7 +348,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) ...@@ -348,7 +348,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
goto drop; goto drop;
if (!segs) { if (!segs) {
if (skb_array_produce(&q->skb_array, skb)) if (ptr_ring_produce(&q->ring, skb))
goto drop; goto drop;
goto wake_up; goto wake_up;
} }
...@@ -358,7 +358,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) ...@@ -358,7 +358,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
struct sk_buff *nskb = segs->next; struct sk_buff *nskb = segs->next;
segs->next = NULL; segs->next = NULL;
if (skb_array_produce(&q->skb_array, segs)) { if (ptr_ring_produce(&q->ring, segs)) {
kfree_skb(segs); kfree_skb(segs);
kfree_skb_list(nskb); kfree_skb_list(nskb);
break; break;
...@@ -375,7 +375,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) ...@@ -375,7 +375,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
!(features & NETIF_F_CSUM_MASK) && !(features & NETIF_F_CSUM_MASK) &&
skb_checksum_help(skb)) skb_checksum_help(skb))
goto drop; goto drop;
if (skb_array_produce(&q->skb_array, skb)) if (ptr_ring_produce(&q->ring, skb))
goto drop; goto drop;
} }
...@@ -497,7 +497,7 @@ static void tap_sock_destruct(struct sock *sk) ...@@ -497,7 +497,7 @@ static void tap_sock_destruct(struct sock *sk)
{ {
struct tap_queue *q = container_of(sk, struct tap_queue, sk); struct tap_queue *q = container_of(sk, struct tap_queue, sk);
skb_array_cleanup(&q->skb_array); ptr_ring_cleanup(&q->ring, __skb_array_destroy_skb);
} }
static int tap_open(struct inode *inode, struct file *file) static int tap_open(struct inode *inode, struct file *file)
...@@ -517,7 +517,7 @@ static int tap_open(struct inode *inode, struct file *file) ...@@ -517,7 +517,7 @@ static int tap_open(struct inode *inode, struct file *file)
&tap_proto, 0); &tap_proto, 0);
if (!q) if (!q)
goto err; goto err;
if (skb_array_init(&q->skb_array, tap->dev->tx_queue_len, GFP_KERNEL)) { if (ptr_ring_init(&q->ring, tap->dev->tx_queue_len, GFP_KERNEL)) {
sk_free(&q->sk); sk_free(&q->sk);
goto err; goto err;
} }
...@@ -546,7 +546,7 @@ static int tap_open(struct inode *inode, struct file *file) ...@@ -546,7 +546,7 @@ static int tap_open(struct inode *inode, struct file *file)
err = tap_set_queue(tap, file, q); err = tap_set_queue(tap, file, q);
if (err) { if (err) {
/* tap_sock_destruct() will take care of freeing skb_array */ /* tap_sock_destruct() will take care of freeing ptr_ring */
goto err_put; goto err_put;
} }
...@@ -583,7 +583,7 @@ static unsigned int tap_poll(struct file *file, poll_table *wait) ...@@ -583,7 +583,7 @@ static unsigned int tap_poll(struct file *file, poll_table *wait)
mask = 0; mask = 0;
poll_wait(file, &q->wq.wait, wait); poll_wait(file, &q->wq.wait, wait);
if (!skb_array_empty(&q->skb_array)) if (!ptr_ring_empty(&q->ring))
mask |= POLLIN | POLLRDNORM; mask |= POLLIN | POLLRDNORM;
if (sock_writeable(&q->sk) || if (sock_writeable(&q->sk) ||
...@@ -844,7 +844,7 @@ static ssize_t tap_do_read(struct tap_queue *q, ...@@ -844,7 +844,7 @@ static ssize_t tap_do_read(struct tap_queue *q,
TASK_INTERRUPTIBLE); TASK_INTERRUPTIBLE);
/* Read frames from the queue */ /* Read frames from the queue */
skb = skb_array_consume(&q->skb_array); skb = ptr_ring_consume(&q->ring);
if (skb) if (skb)
break; break;
if (noblock) { if (noblock) {
...@@ -1176,7 +1176,7 @@ static int tap_peek_len(struct socket *sock) ...@@ -1176,7 +1176,7 @@ static int tap_peek_len(struct socket *sock)
{ {
struct tap_queue *q = container_of(sock, struct tap_queue, struct tap_queue *q = container_of(sock, struct tap_queue,
sock); sock);
return skb_array_peek_len(&q->skb_array); return PTR_RING_PEEK_CALL(&q->ring, __skb_array_len_with_tag);
} }
/* Ops structure to mimic raw sockets with tun */ /* Ops structure to mimic raw sockets with tun */
...@@ -1202,7 +1202,7 @@ struct socket *tap_get_socket(struct file *file) ...@@ -1202,7 +1202,7 @@ struct socket *tap_get_socket(struct file *file)
} }
EXPORT_SYMBOL_GPL(tap_get_socket); EXPORT_SYMBOL_GPL(tap_get_socket);
struct skb_array *tap_get_skb_array(struct file *file) struct ptr_ring *tap_get_ptr_ring(struct file *file)
{ {
struct tap_queue *q; struct tap_queue *q;
...@@ -1211,29 +1211,30 @@ struct skb_array *tap_get_skb_array(struct file *file) ...@@ -1211,29 +1211,30 @@ struct skb_array *tap_get_skb_array(struct file *file)
q = file->private_data; q = file->private_data;
if (!q) if (!q)
return ERR_PTR(-EBADFD); return ERR_PTR(-EBADFD);
return &q->skb_array; return &q->ring;
} }
EXPORT_SYMBOL_GPL(tap_get_skb_array); EXPORT_SYMBOL_GPL(tap_get_ptr_ring);
int tap_queue_resize(struct tap_dev *tap) int tap_queue_resize(struct tap_dev *tap)
{ {
struct net_device *dev = tap->dev; struct net_device *dev = tap->dev;
struct tap_queue *q; struct tap_queue *q;
struct skb_array **arrays; struct ptr_ring **rings;
int n = tap->numqueues; int n = tap->numqueues;
int ret, i = 0; int ret, i = 0;
arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL); rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL);
if (!arrays) if (!rings)
return -ENOMEM; return -ENOMEM;
list_for_each_entry(q, &tap->queue_list, next) list_for_each_entry(q, &tap->queue_list, next)
arrays[i++] = &q->skb_array; rings[i++] = &q->ring;
ret = skb_array_resize_multiple(arrays, n, ret = ptr_ring_resize_multiple(rings, n,
dev->tx_queue_len, GFP_KERNEL); dev->tx_queue_len, GFP_KERNEL,
__skb_array_destroy_skb);
kfree(arrays); kfree(rings);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(tap_queue_resize); EXPORT_SYMBOL_GPL(tap_queue_resize);
......
...@@ -179,7 +179,7 @@ struct tun_file { ...@@ -179,7 +179,7 @@ struct tun_file {
struct mutex napi_mutex; /* Protects access to the above napi */ struct mutex napi_mutex; /* Protects access to the above napi */
struct list_head next; struct list_head next;
struct tun_struct *detached; struct tun_struct *detached;
struct skb_array tx_array; struct ptr_ring tx_ring;
struct xdp_rxq_info xdp_rxq; struct xdp_rxq_info xdp_rxq;
}; };
...@@ -241,6 +241,24 @@ struct tun_struct { ...@@ -241,6 +241,24 @@ struct tun_struct {
struct tun_steering_prog __rcu *steering_prog; struct tun_steering_prog __rcu *steering_prog;
}; };
bool tun_is_xdp_buff(void *ptr)
{
return (unsigned long)ptr & TUN_XDP_FLAG;
}
EXPORT_SYMBOL(tun_is_xdp_buff);
void *tun_xdp_to_ptr(void *ptr)
{
return (void *)((unsigned long)ptr | TUN_XDP_FLAG);
}
EXPORT_SYMBOL(tun_xdp_to_ptr);
void *tun_ptr_to_xdp(void *ptr)
{
return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG);
}
EXPORT_SYMBOL(tun_ptr_to_xdp);
static int tun_napi_receive(struct napi_struct *napi, int budget) static int tun_napi_receive(struct napi_struct *napi, int budget)
{ {
struct tun_file *tfile = container_of(napi, struct tun_file, napi); struct tun_file *tfile = container_of(napi, struct tun_file, napi);
...@@ -631,12 +649,25 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile) ...@@ -631,12 +649,25 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
return tun; return tun;
} }
static void tun_ptr_free(void *ptr)
{
if (!ptr)
return;
if (tun_is_xdp_buff(ptr)) {
struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
put_page(virt_to_head_page(xdp->data));
} else {
__skb_array_destroy_skb(ptr);
}
}
static void tun_queue_purge(struct tun_file *tfile) static void tun_queue_purge(struct tun_file *tfile)
{ {
struct sk_buff *skb; void *ptr;
while ((skb = skb_array_consume(&tfile->tx_array)) != NULL) while ((ptr = ptr_ring_consume(&tfile->tx_ring)) != NULL)
kfree_skb(skb); tun_ptr_free(ptr);
skb_queue_purge(&tfile->sk.sk_write_queue); skb_queue_purge(&tfile->sk.sk_write_queue);
skb_queue_purge(&tfile->sk.sk_error_queue); skb_queue_purge(&tfile->sk.sk_error_queue);
...@@ -689,7 +720,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean) ...@@ -689,7 +720,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
unregister_netdevice(tun->dev); unregister_netdevice(tun->dev);
} }
if (tun) { if (tun) {
skb_array_cleanup(&tfile->tx_array); ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
xdp_rxq_info_unreg(&tfile->xdp_rxq); xdp_rxq_info_unreg(&tfile->xdp_rxq);
} }
sock_put(&tfile->sk); sock_put(&tfile->sk);
...@@ -782,7 +813,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, ...@@ -782,7 +813,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
} }
if (!tfile->detached && if (!tfile->detached &&
skb_array_init(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) { ptr_ring_init(&tfile->tx_ring, dev->tx_queue_len, GFP_KERNEL)) {
err = -ENOMEM; err = -ENOMEM;
goto out; goto out;
} }
...@@ -1048,7 +1079,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -1048,7 +1079,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
nf_reset(skb); nf_reset(skb);
if (skb_array_produce(&tfile->tx_array, skb)) if (ptr_ring_produce(&tfile->tx_ring, skb))
goto drop; goto drop;
/* Notify and wake up reader process */ /* Notify and wake up reader process */
...@@ -1221,6 +1252,67 @@ static const struct net_device_ops tun_netdev_ops = { ...@@ -1221,6 +1252,67 @@ static const struct net_device_ops tun_netdev_ops = {
.ndo_get_stats64 = tun_net_get_stats64, .ndo_get_stats64 = tun_net_get_stats64,
}; };
static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
{
struct tun_struct *tun = netdev_priv(dev);
struct xdp_buff *buff = xdp->data_hard_start;
int headroom = xdp->data - xdp->data_hard_start;
struct tun_file *tfile;
u32 numqueues;
int ret = 0;
/* Assure headroom is available and buff is properly aligned */
if (unlikely(headroom < sizeof(*xdp) || tun_is_xdp_buff(xdp)))
return -ENOSPC;
*buff = *xdp;
rcu_read_lock();
numqueues = READ_ONCE(tun->numqueues);
if (!numqueues) {
ret = -ENOSPC;
goto out;
}
tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
numqueues]);
/* Encode the XDP flag into lowest bit for consumer to differ
* XDP buffer from sk_buff.
*/
if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(buff))) {
this_cpu_inc(tun->pcpu_stats->tx_dropped);
ret = -ENOSPC;
}
out:
rcu_read_unlock();
return ret;
}
static void tun_xdp_flush(struct net_device *dev)
{
struct tun_struct *tun = netdev_priv(dev);
struct tun_file *tfile;
u32 numqueues;
rcu_read_lock();
numqueues = READ_ONCE(tun->numqueues);
if (!numqueues)
goto out;
tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
numqueues]);
/* Notify and wake up reader process */
if (tfile->flags & TUN_FASYNC)
kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
tfile->socket.sk->sk_data_ready(tfile->socket.sk);
out:
rcu_read_unlock();
}
static const struct net_device_ops tap_netdev_ops = { static const struct net_device_ops tap_netdev_ops = {
.ndo_uninit = tun_net_uninit, .ndo_uninit = tun_net_uninit,
.ndo_open = tun_net_open, .ndo_open = tun_net_open,
...@@ -1238,6 +1330,8 @@ static const struct net_device_ops tap_netdev_ops = { ...@@ -1238,6 +1330,8 @@ static const struct net_device_ops tap_netdev_ops = {
.ndo_set_rx_headroom = tun_set_headroom, .ndo_set_rx_headroom = tun_set_headroom,
.ndo_get_stats64 = tun_net_get_stats64, .ndo_get_stats64 = tun_net_get_stats64,
.ndo_bpf = tun_xdp, .ndo_bpf = tun_xdp,
.ndo_xdp_xmit = tun_xdp_xmit,
.ndo_xdp_flush = tun_xdp_flush,
}; };
static void tun_flow_init(struct tun_struct *tun) static void tun_flow_init(struct tun_struct *tun)
...@@ -1316,7 +1410,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait) ...@@ -1316,7 +1410,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
poll_wait(file, sk_sleep(sk), wait); poll_wait(file, sk_sleep(sk), wait);
if (!skb_array_empty(&tfile->tx_array)) if (!ptr_ring_empty(&tfile->tx_ring))
mask |= POLLIN | POLLRDNORM; mask |= POLLIN | POLLRDNORM;
if (tun->dev->flags & IFF_UP && if (tun->dev->flags & IFF_UP &&
...@@ -1862,6 +1956,40 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -1862,6 +1956,40 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
return result; return result;
} }
static ssize_t tun_put_user_xdp(struct tun_struct *tun,
struct tun_file *tfile,
struct xdp_buff *xdp,
struct iov_iter *iter)
{
int vnet_hdr_sz = 0;
size_t size = xdp->data_end - xdp->data;
struct tun_pcpu_stats *stats;
size_t ret;
if (tun->flags & IFF_VNET_HDR) {
struct virtio_net_hdr gso = { 0 };
vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
if (unlikely(iov_iter_count(iter) < vnet_hdr_sz))
return -EINVAL;
if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) !=
sizeof(gso)))
return -EFAULT;
iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
}
ret = copy_to_iter(xdp->data, size, iter) + vnet_hdr_sz;
stats = get_cpu_ptr(tun->pcpu_stats);
u64_stats_update_begin(&stats->syncp);
stats->tx_packets++;
stats->tx_bytes += ret;
u64_stats_update_end(&stats->syncp);
put_cpu_ptr(tun->pcpu_stats);
return ret;
}
/* Put packet to the user space buffer */ /* Put packet to the user space buffer */
static ssize_t tun_put_user(struct tun_struct *tun, static ssize_t tun_put_user(struct tun_struct *tun,
struct tun_file *tfile, struct tun_file *tfile,
...@@ -1959,15 +2087,14 @@ static ssize_t tun_put_user(struct tun_struct *tun, ...@@ -1959,15 +2087,14 @@ static ssize_t tun_put_user(struct tun_struct *tun,
return total; return total;
} }
static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock, static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err)
int *err)
{ {
DECLARE_WAITQUEUE(wait, current); DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb = NULL; void *ptr = NULL;
int error = 0; int error = 0;
skb = skb_array_consume(&tfile->tx_array); ptr = ptr_ring_consume(&tfile->tx_ring);
if (skb) if (ptr)
goto out; goto out;
if (noblock) { if (noblock) {
error = -EAGAIN; error = -EAGAIN;
...@@ -1978,8 +2105,8 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock, ...@@ -1978,8 +2105,8 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
current->state = TASK_INTERRUPTIBLE; current->state = TASK_INTERRUPTIBLE;
while (1) { while (1) {
skb = skb_array_consume(&tfile->tx_array); ptr = ptr_ring_consume(&tfile->tx_ring);
if (skb) if (ptr)
break; break;
if (signal_pending(current)) { if (signal_pending(current)) {
error = -ERESTARTSYS; error = -ERESTARTSYS;
...@@ -1998,12 +2125,12 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock, ...@@ -1998,12 +2125,12 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
out: out:
*err = error; *err = error;
return skb; return ptr;
} }
static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
struct iov_iter *to, struct iov_iter *to,
int noblock, struct sk_buff *skb) int noblock, void *ptr)
{ {
ssize_t ret; ssize_t ret;
int err; int err;
...@@ -2011,23 +2138,31 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, ...@@ -2011,23 +2138,31 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
tun_debug(KERN_INFO, tun, "tun_do_read\n"); tun_debug(KERN_INFO, tun, "tun_do_read\n");
if (!iov_iter_count(to)) { if (!iov_iter_count(to)) {
if (skb) tun_ptr_free(ptr);
kfree_skb(skb);
return 0; return 0;
} }
if (!skb) { if (!ptr) {
/* Read frames from ring */ /* Read frames from ring */
skb = tun_ring_recv(tfile, noblock, &err); ptr = tun_ring_recv(tfile, noblock, &err);
if (!skb) if (!ptr)
return err; return err;
} }
if (tun_is_xdp_buff(ptr)) {
struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
ret = tun_put_user_xdp(tun, tfile, xdp, to);
put_page(virt_to_head_page(xdp->data));
} else {
struct sk_buff *skb = ptr;
ret = tun_put_user(tun, tfile, skb, to); ret = tun_put_user(tun, tfile, skb, to);
if (unlikely(ret < 0)) if (unlikely(ret < 0))
kfree_skb(skb); kfree_skb(skb);
else else
consume_skb(skb); consume_skb(skb);
}
return ret; return ret;
} }
...@@ -2164,12 +2299,12 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, ...@@ -2164,12 +2299,12 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
{ {
struct tun_file *tfile = container_of(sock, struct tun_file, socket); struct tun_file *tfile = container_of(sock, struct tun_file, socket);
struct tun_struct *tun = tun_get(tfile); struct tun_struct *tun = tun_get(tfile);
struct sk_buff *skb = m->msg_control; void *ptr = m->msg_control;
int ret; int ret;
if (!tun) { if (!tun) {
ret = -EBADFD; ret = -EBADFD;
goto out_free_skb; goto out_free;
} }
if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) { if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
...@@ -2181,7 +2316,7 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, ...@@ -2181,7 +2316,7 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
SOL_PACKET, TUN_TX_TIMESTAMP); SOL_PACKET, TUN_TX_TIMESTAMP);
goto out; goto out;
} }
ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb); ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, ptr);
if (ret > (ssize_t)total_len) { if (ret > (ssize_t)total_len) {
m->msg_flags |= MSG_TRUNC; m->msg_flags |= MSG_TRUNC;
ret = flags & MSG_TRUNC ? ret : total_len; ret = flags & MSG_TRUNC ? ret : total_len;
...@@ -2192,12 +2327,25 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, ...@@ -2192,12 +2327,25 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
out_put_tun: out_put_tun:
tun_put(tun); tun_put(tun);
out_free_skb: out_free:
if (skb) tun_ptr_free(ptr);
kfree_skb(skb);
return ret; return ret;
} }
static int tun_ptr_peek_len(void *ptr)
{
if (likely(ptr)) {
if (tun_is_xdp_buff(ptr)) {
struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
return xdp->data_end - xdp->data;
}
return __skb_array_len_with_tag(ptr);
} else {
return 0;
}
}
static int tun_peek_len(struct socket *sock) static int tun_peek_len(struct socket *sock)
{ {
struct tun_file *tfile = container_of(sock, struct tun_file, socket); struct tun_file *tfile = container_of(sock, struct tun_file, socket);
...@@ -2208,7 +2356,7 @@ static int tun_peek_len(struct socket *sock) ...@@ -2208,7 +2356,7 @@ static int tun_peek_len(struct socket *sock)
if (!tun) if (!tun)
return 0; return 0;
ret = skb_array_peek_len(&tfile->tx_array); ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, tun_ptr_peek_len);
tun_put(tun); tun_put(tun);
return ret; return ret;
...@@ -3114,25 +3262,26 @@ static int tun_queue_resize(struct tun_struct *tun) ...@@ -3114,25 +3262,26 @@ static int tun_queue_resize(struct tun_struct *tun)
{ {
struct net_device *dev = tun->dev; struct net_device *dev = tun->dev;
struct tun_file *tfile; struct tun_file *tfile;
struct skb_array **arrays; struct ptr_ring **rings;
int n = tun->numqueues + tun->numdisabled; int n = tun->numqueues + tun->numdisabled;
int ret, i; int ret, i;
arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL); rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL);
if (!arrays) if (!rings)
return -ENOMEM; return -ENOMEM;
for (i = 0; i < tun->numqueues; i++) { for (i = 0; i < tun->numqueues; i++) {
tfile = rtnl_dereference(tun->tfiles[i]); tfile = rtnl_dereference(tun->tfiles[i]);
arrays[i] = &tfile->tx_array; rings[i] = &tfile->tx_ring;
} }
list_for_each_entry(tfile, &tun->disabled, next) list_for_each_entry(tfile, &tun->disabled, next)
arrays[i++] = &tfile->tx_array; rings[i++] = &tfile->tx_ring;
ret = skb_array_resize_multiple(arrays, n, ret = ptr_ring_resize_multiple(rings, n,
dev->tx_queue_len, GFP_KERNEL); dev->tx_queue_len, GFP_KERNEL,
tun_ptr_free);
kfree(arrays); kfree(rings);
return ret; return ret;
} }
...@@ -3218,7 +3367,7 @@ struct socket *tun_get_socket(struct file *file) ...@@ -3218,7 +3367,7 @@ struct socket *tun_get_socket(struct file *file)
} }
EXPORT_SYMBOL_GPL(tun_get_socket); EXPORT_SYMBOL_GPL(tun_get_socket);
struct skb_array *tun_get_skb_array(struct file *file) struct ptr_ring *tun_get_tx_ring(struct file *file)
{ {
struct tun_file *tfile; struct tun_file *tfile;
...@@ -3227,9 +3376,9 @@ struct skb_array *tun_get_skb_array(struct file *file) ...@@ -3227,9 +3376,9 @@ struct skb_array *tun_get_skb_array(struct file *file)
tfile = file->private_data; tfile = file->private_data;
if (!tfile) if (!tfile)
return ERR_PTR(-EBADFD); return ERR_PTR(-EBADFD);
return &tfile->tx_array; return &tfile->tx_ring;
} }
EXPORT_SYMBOL_GPL(tun_get_skb_array); EXPORT_SYMBOL_GPL(tun_get_tx_ring);
module_init(tun_init); module_init(tun_init);
module_exit(tun_cleanup); module_exit(tun_cleanup);
......
...@@ -89,7 +89,7 @@ struct vhost_net_ubuf_ref { ...@@ -89,7 +89,7 @@ struct vhost_net_ubuf_ref {
#define VHOST_RX_BATCH 64 #define VHOST_RX_BATCH 64
struct vhost_net_buf { struct vhost_net_buf {
struct sk_buff **queue; void **queue;
int tail; int tail;
int head; int head;
}; };
...@@ -108,7 +108,7 @@ struct vhost_net_virtqueue { ...@@ -108,7 +108,7 @@ struct vhost_net_virtqueue {
/* Reference counting for outstanding ubufs. /* Reference counting for outstanding ubufs.
* Protected by vq mutex. Writers must also take device mutex. */ * Protected by vq mutex. Writers must also take device mutex. */
struct vhost_net_ubuf_ref *ubufs; struct vhost_net_ubuf_ref *ubufs;
struct skb_array *rx_array; struct ptr_ring *rx_ring;
struct vhost_net_buf rxq; struct vhost_net_buf rxq;
}; };
...@@ -158,7 +158,7 @@ static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq) ...@@ -158,7 +158,7 @@ static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq)
struct vhost_net_buf *rxq = &nvq->rxq; struct vhost_net_buf *rxq = &nvq->rxq;
rxq->head = 0; rxq->head = 0;
rxq->tail = skb_array_consume_batched(nvq->rx_array, rxq->queue, rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue,
VHOST_RX_BATCH); VHOST_RX_BATCH);
return rxq->tail; return rxq->tail;
} }
...@@ -167,13 +167,25 @@ static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq) ...@@ -167,13 +167,25 @@ static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq)
{ {
struct vhost_net_buf *rxq = &nvq->rxq; struct vhost_net_buf *rxq = &nvq->rxq;
if (nvq->rx_array && !vhost_net_buf_is_empty(rxq)) { if (nvq->rx_ring && !vhost_net_buf_is_empty(rxq)) {
skb_array_unconsume(nvq->rx_array, rxq->queue + rxq->head, ptr_ring_unconsume(nvq->rx_ring, rxq->queue + rxq->head,
vhost_net_buf_get_size(rxq)); vhost_net_buf_get_size(rxq),
__skb_array_destroy_skb);
rxq->head = rxq->tail = 0; rxq->head = rxq->tail = 0;
} }
} }
static int vhost_net_buf_peek_len(void *ptr)
{
if (tun_is_xdp_buff(ptr)) {
struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
return xdp->data_end - xdp->data;
}
return __skb_array_len_with_tag(ptr);
}
static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq) static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
{ {
struct vhost_net_buf *rxq = &nvq->rxq; struct vhost_net_buf *rxq = &nvq->rxq;
...@@ -185,7 +197,7 @@ static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq) ...@@ -185,7 +197,7 @@ static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
return 0; return 0;
out: out:
return __skb_array_len_with_tag(vhost_net_buf_get_ptr(rxq)); return vhost_net_buf_peek_len(vhost_net_buf_get_ptr(rxq));
} }
static void vhost_net_buf_init(struct vhost_net_buf *rxq) static void vhost_net_buf_init(struct vhost_net_buf *rxq)
...@@ -583,7 +595,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk) ...@@ -583,7 +595,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
int len = 0; int len = 0;
unsigned long flags; unsigned long flags;
if (rvq->rx_array) if (rvq->rx_ring)
return vhost_net_buf_peek(rvq); return vhost_net_buf_peek(rvq);
spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
...@@ -790,7 +802,7 @@ static void handle_rx(struct vhost_net *net) ...@@ -790,7 +802,7 @@ static void handle_rx(struct vhost_net *net)
* they refilled. */ * they refilled. */
goto out; goto out;
} }
if (nvq->rx_array) if (nvq->rx_ring)
msg.msg_control = vhost_net_buf_consume(&nvq->rxq); msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
/* On overrun, truncate and discard */ /* On overrun, truncate and discard */
if (unlikely(headcount > UIO_MAXIOV)) { if (unlikely(headcount > UIO_MAXIOV)) {
...@@ -896,7 +908,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) ...@@ -896,7 +908,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
struct vhost_net *n; struct vhost_net *n;
struct vhost_dev *dev; struct vhost_dev *dev;
struct vhost_virtqueue **vqs; struct vhost_virtqueue **vqs;
struct sk_buff **queue; void **queue;
int i; int i;
n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL); n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
...@@ -908,7 +920,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) ...@@ -908,7 +920,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
return -ENOMEM; return -ENOMEM;
} }
queue = kmalloc_array(VHOST_RX_BATCH, sizeof(struct sk_buff *), queue = kmalloc_array(VHOST_RX_BATCH, sizeof(void *),
GFP_KERNEL); GFP_KERNEL);
if (!queue) { if (!queue) {
kfree(vqs); kfree(vqs);
...@@ -1046,23 +1058,23 @@ static struct socket *get_raw_socket(int fd) ...@@ -1046,23 +1058,23 @@ static struct socket *get_raw_socket(int fd)
return ERR_PTR(r); return ERR_PTR(r);
} }
static struct skb_array *get_tap_skb_array(int fd) static struct ptr_ring *get_tap_ptr_ring(int fd)
{ {
struct skb_array *array; struct ptr_ring *ring;
struct file *file = fget(fd); struct file *file = fget(fd);
if (!file) if (!file)
return NULL; return NULL;
array = tun_get_skb_array(file); ring = tun_get_tx_ring(file);
if (!IS_ERR(array)) if (!IS_ERR(ring))
goto out; goto out;
array = tap_get_skb_array(file); ring = tap_get_ptr_ring(file);
if (!IS_ERR(array)) if (!IS_ERR(ring))
goto out; goto out;
array = NULL; ring = NULL;
out: out:
fput(file); fput(file);
return array; return ring;
} }
static struct socket *get_tap_socket(int fd) static struct socket *get_tap_socket(int fd)
...@@ -1143,7 +1155,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) ...@@ -1143,7 +1155,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
vq->private_data = sock; vq->private_data = sock;
vhost_net_buf_unproduce(nvq); vhost_net_buf_unproduce(nvq);
if (index == VHOST_NET_VQ_RX) if (index == VHOST_NET_VQ_RX)
nvq->rx_array = get_tap_skb_array(fd); nvq->rx_ring = get_tap_ptr_ring(fd);
r = vhost_vq_init_access(vq); r = vhost_vq_init_access(vq);
if (r) if (r)
goto err_used; goto err_used;
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#if IS_ENABLED(CONFIG_TAP) #if IS_ENABLED(CONFIG_TAP)
struct socket *tap_get_socket(struct file *); struct socket *tap_get_socket(struct file *);
struct skb_array *tap_get_skb_array(struct file *file); struct ptr_ring *tap_get_ptr_ring(struct file *file);
#else #else
#include <linux/err.h> #include <linux/err.h>
#include <linux/errno.h> #include <linux/errno.h>
...@@ -14,7 +14,7 @@ static inline struct socket *tap_get_socket(struct file *f) ...@@ -14,7 +14,7 @@ static inline struct socket *tap_get_socket(struct file *f)
{ {
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
static inline struct skb_array *tap_get_skb_array(struct file *f) static inline struct ptr_ring *tap_get_ptr_ring(struct file *f)
{ {
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
...@@ -70,7 +70,7 @@ struct tap_queue { ...@@ -70,7 +70,7 @@ struct tap_queue {
u16 queue_index; u16 queue_index;
bool enabled; bool enabled;
struct list_head next; struct list_head next;
struct skb_array skb_array; struct ptr_ring ring;
}; };
rx_handler_result_t tap_handle_frame(struct sk_buff **pskb); rx_handler_result_t tap_handle_frame(struct sk_buff **pskb);
......
...@@ -17,9 +17,14 @@ ...@@ -17,9 +17,14 @@
#include <uapi/linux/if_tun.h> #include <uapi/linux/if_tun.h>
#define TUN_XDP_FLAG 0x1UL
#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
struct socket *tun_get_socket(struct file *); struct socket *tun_get_socket(struct file *);
struct skb_array *tun_get_skb_array(struct file *file); struct ptr_ring *tun_get_tx_ring(struct file *file);
bool tun_is_xdp_buff(void *ptr);
void *tun_xdp_to_ptr(void *ptr);
void *tun_ptr_to_xdp(void *ptr);
#else #else
#include <linux/err.h> #include <linux/err.h>
#include <linux/errno.h> #include <linux/errno.h>
...@@ -29,9 +34,21 @@ static inline struct socket *tun_get_socket(struct file *f) ...@@ -29,9 +34,21 @@ static inline struct socket *tun_get_socket(struct file *f)
{ {
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
static inline struct skb_array *tun_get_skb_array(struct file *f) static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
{ {
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
static inline bool tun_is_xdp_buff(void *ptr)
{
return false;
}
void *tun_xdp_to_ptr(void *ptr)
{
return NULL;
}
void *tun_ptr_to_xdp(void *ptr)
{
return NULL;
}
#endif /* CONFIG_TUN */ #endif /* CONFIG_TUN */
#endif /* __IF_TUN_H */ #endif /* __IF_TUN_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment