Commit 362899b8 authored by Jason Wang's avatar Jason Wang Committed by David S. Miller

macvtap: switch to use skb array

This patch switch to use skb array instead of sk_receive_queue to
avoid spinlock contentions. Tests shows about 21% improvements for
guest rx pps:

Before: 1472731 pkts/s
After:  1786289 pkts/s
Signed-off-by: default avatarJason Wang <jasowang@redhat.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 1b16bf42
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <net/rtnetlink.h> #include <net/rtnetlink.h>
#include <net/sock.h> #include <net/sock.h>
#include <linux/virtio_net.h> #include <linux/virtio_net.h>
#include <linux/skb_array.h>
/* /*
* A macvtap queue is the central object of this driver, it connects * A macvtap queue is the central object of this driver, it connects
...@@ -43,6 +44,7 @@ struct macvtap_queue { ...@@ -43,6 +44,7 @@ struct macvtap_queue {
u16 queue_index; u16 queue_index;
bool enabled; bool enabled;
struct list_head next; struct list_head next;
struct skb_array skb_array;
}; };
#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) #define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
...@@ -273,6 +275,7 @@ static void macvtap_put_queue(struct macvtap_queue *q) ...@@ -273,6 +275,7 @@ static void macvtap_put_queue(struct macvtap_queue *q)
rtnl_unlock(); rtnl_unlock();
synchronize_rcu(); synchronize_rcu();
skb_array_cleanup(&q->skb_array);
sock_put(&q->sk); sock_put(&q->sk);
} }
...@@ -366,7 +369,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) ...@@ -366,7 +369,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
if (!q) if (!q)
return RX_HANDLER_PASS; return RX_HANDLER_PASS;
if (skb_queue_len(&q->sk.sk_receive_queue) >= dev->tx_queue_len) if (__skb_array_full(&q->skb_array))
goto drop; goto drop;
skb_push(skb, ETH_HLEN); skb_push(skb, ETH_HLEN);
...@@ -384,7 +387,8 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) ...@@ -384,7 +387,8 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
goto drop; goto drop;
if (!segs) { if (!segs) {
skb_queue_tail(&q->sk.sk_receive_queue, skb); if (skb_array_produce(&q->skb_array, skb))
goto drop;
goto wake_up; goto wake_up;
} }
...@@ -393,7 +397,11 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) ...@@ -393,7 +397,11 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
struct sk_buff *nskb = segs->next; struct sk_buff *nskb = segs->next;
segs->next = NULL; segs->next = NULL;
skb_queue_tail(&q->sk.sk_receive_queue, segs); if (skb_array_produce(&q->skb_array, segs)) {
kfree_skb(segs);
kfree_skb_list(nskb);
break;
}
segs = nskb; segs = nskb;
} }
} else { } else {
...@@ -406,7 +414,8 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb) ...@@ -406,7 +414,8 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
!(features & NETIF_F_CSUM_MASK) && !(features & NETIF_F_CSUM_MASK) &&
skb_checksum_help(skb)) skb_checksum_help(skb))
goto drop; goto drop;
skb_queue_tail(&q->sk.sk_receive_queue, skb); if (skb_array_produce(&q->skb_array, skb))
goto drop;
} }
wake_up: wake_up:
...@@ -523,7 +532,11 @@ static void macvtap_sock_write_space(struct sock *sk) ...@@ -523,7 +532,11 @@ static void macvtap_sock_write_space(struct sock *sk)
static void macvtap_sock_destruct(struct sock *sk) static void macvtap_sock_destruct(struct sock *sk)
{ {
skb_queue_purge(&sk->sk_receive_queue); struct macvtap_queue *q = container_of(sk, struct macvtap_queue, sk);
struct sk_buff *skb;
while ((skb = skb_array_consume(&q->skb_array)) != NULL)
kfree(skb);
} }
static int macvtap_open(struct inode *inode, struct file *file) static int macvtap_open(struct inode *inode, struct file *file)
...@@ -536,13 +549,13 @@ static int macvtap_open(struct inode *inode, struct file *file) ...@@ -536,13 +549,13 @@ static int macvtap_open(struct inode *inode, struct file *file)
rtnl_lock(); rtnl_lock();
dev = dev_get_by_macvtap_minor(iminor(inode)); dev = dev_get_by_macvtap_minor(iminor(inode));
if (!dev) if (!dev)
goto out; goto err;
err = -ENOMEM; err = -ENOMEM;
q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
&macvtap_proto, 0); &macvtap_proto, 0);
if (!q) if (!q)
goto out; goto err;
RCU_INIT_POINTER(q->sock.wq, &q->wq); RCU_INIT_POINTER(q->sock.wq, &q->wq);
init_waitqueue_head(&q->wq.wait); init_waitqueue_head(&q->wq.wait);
...@@ -566,11 +579,24 @@ static int macvtap_open(struct inode *inode, struct file *file) ...@@ -566,11 +579,24 @@ static int macvtap_open(struct inode *inode, struct file *file)
if ((dev->features & NETIF_F_HIGHDMA) && (dev->features & NETIF_F_SG)) if ((dev->features & NETIF_F_HIGHDMA) && (dev->features & NETIF_F_SG))
sock_set_flag(&q->sk, SOCK_ZEROCOPY); sock_set_flag(&q->sk, SOCK_ZEROCOPY);
err = -ENOMEM;
if (skb_array_init(&q->skb_array, dev->tx_queue_len, GFP_KERNEL))
goto err_array;
err = macvtap_set_queue(dev, file, q); err = macvtap_set_queue(dev, file, q);
if (err) if (err)
sock_put(&q->sk); goto err_queue;
out: dev_put(dev);
rtnl_unlock();
return err;
err_queue:
skb_array_cleanup(&q->skb_array);
err_array:
sock_put(&q->sk);
err:
if (dev) if (dev)
dev_put(dev); dev_put(dev);
...@@ -596,7 +622,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait) ...@@ -596,7 +622,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait)
mask = 0; mask = 0;
poll_wait(file, &q->wq.wait, wait); poll_wait(file, &q->wq.wait, wait);
if (!skb_queue_empty(&q->sk.sk_receive_queue)) if (!skb_array_empty(&q->skb_array))
mask |= POLLIN | POLLRDNORM; mask |= POLLIN | POLLRDNORM;
if (sock_writeable(&q->sk) || if (sock_writeable(&q->sk) ||
...@@ -856,7 +882,7 @@ static ssize_t macvtap_do_read(struct macvtap_queue *q, ...@@ -856,7 +882,7 @@ static ssize_t macvtap_do_read(struct macvtap_queue *q,
TASK_INTERRUPTIBLE); TASK_INTERRUPTIBLE);
/* Read frames from the queue */ /* Read frames from the queue */
skb = skb_dequeue(&q->sk.sk_receive_queue); skb = skb_array_consume(&q->skb_array);
if (skb) if (skb)
break; break;
if (noblock) { if (noblock) {
...@@ -1180,10 +1206,18 @@ static int macvtap_recvmsg(struct socket *sock, struct msghdr *m, ...@@ -1180,10 +1206,18 @@ static int macvtap_recvmsg(struct socket *sock, struct msghdr *m,
return ret; return ret;
} }
static int macvtap_peek_len(struct socket *sock)
{
struct macvtap_queue *q = container_of(sock, struct macvtap_queue,
sock);
return skb_array_peek_len(&q->skb_array);
}
/* Ops structure to mimic raw sockets with tun */ /* Ops structure to mimic raw sockets with tun */
static const struct proto_ops macvtap_socket_ops = { static const struct proto_ops macvtap_socket_ops = {
.sendmsg = macvtap_sendmsg, .sendmsg = macvtap_sendmsg,
.recvmsg = macvtap_recvmsg, .recvmsg = macvtap_recvmsg,
.peek_len = macvtap_peek_len,
}; };
/* Get an underlying socket object from tun file. Returns error unless file is /* Get an underlying socket object from tun file. Returns error unless file is
...@@ -1202,6 +1236,28 @@ struct socket *macvtap_get_socket(struct file *file) ...@@ -1202,6 +1236,28 @@ struct socket *macvtap_get_socket(struct file *file)
} }
EXPORT_SYMBOL_GPL(macvtap_get_socket); EXPORT_SYMBOL_GPL(macvtap_get_socket);
static int macvtap_queue_resize(struct macvlan_dev *vlan)
{
struct net_device *dev = vlan->dev;
struct macvtap_queue *q;
struct skb_array **arrays;
int n = vlan->numqueues;
int ret, i = 0;
arrays = kmalloc(sizeof *arrays * n, GFP_KERNEL);
if (!arrays)
return -ENOMEM;
list_for_each_entry(q, &vlan->queue_list, next)
arrays[i++] = &q->skb_array;
ret = skb_array_resize_multiple(arrays, n,
dev->tx_queue_len, GFP_KERNEL);
kfree(arrays);
return ret;
}
static int macvtap_device_event(struct notifier_block *unused, static int macvtap_device_event(struct notifier_block *unused,
unsigned long event, void *ptr) unsigned long event, void *ptr)
{ {
...@@ -1249,6 +1305,10 @@ static int macvtap_device_event(struct notifier_block *unused, ...@@ -1249,6 +1305,10 @@ static int macvtap_device_event(struct notifier_block *unused,
device_destroy(&macvtap_class, devt); device_destroy(&macvtap_class, devt);
macvtap_free_minor(vlan); macvtap_free_minor(vlan);
break; break;
case NETDEV_CHANGE_TX_QUEUE_LEN:
if (macvtap_queue_resize(vlan))
return NOTIFY_BAD;
break;
} }
return NOTIFY_DONE; return NOTIFY_DONE;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment