Commit ac383f58 authored by Sunil Muthuswamy's avatar Sunil Muthuswamy Committed by David S. Miller

hv_sock: perf: Allow the socket buffer size options to influence the actual socket buffers

Currently, the hv_sock buffer size is static and can't scale to the
bandwidth requirements of the application. This change allows the
applications to influence the socket buffer sizes using the SO_SNDBUF and
the SO_RCVBUF socket options.

Few interesting points to note:
1. Since the VMBUS does not allow a resize operation of the ring size, the
socket buffer size option should be set prior to establishing the
connection for it to take effect.
2. Setting the socket option comes with the cost of that much memory being
reserved/allocated by the kernel, for the lifetime of the connection.

Perf data:
Total Data Transfer: 1GB
Single threaded reader/writer
Results below are summarized over 10 iterations.

Linux hvsocket writer + Windows hvsocket reader:
|---------------------------------------------------------------------------------------------|
|Packet size ->   |      128B       |       1KB       |       4KB       |        64KB         |
|---------------------------------------------------------------------------------------------|
|SO_SNDBUF size | |                 Throughput in MB/s (min/max/avg/median):                  |
|               v |                                                                           |
|---------------------------------------------------------------------------------------------|
|      Default    | 109/118/114/116 | 636/774/701/700 | 435/507/480/476 |   410/491/462/470   |
|      16KB       | 110/116/112/111 | 575/705/662/671 | 749/900/854/869 |   592/824/692/676   |
|      32KB       | 108/120/115/115 | 703/823/767/772 | 718/878/850/866 | 1593/2124/2000/2085 |
|      64KB       | 108/119/114/114 | 592/732/683/688 | 805/934/903/911 | 1784/1943/1862/1843 |
|---------------------------------------------------------------------------------------------|

Windows hvsocket writer + Linux hvsocket reader:
|---------------------------------------------------------------------------------------------|
|Packet size ->   |     128B    |      1KB        |          4KB        |        64KB         |
|---------------------------------------------------------------------------------------------|
|SO_RCVBUF size | |               Throughput in MB/s (min/max/avg/median):                    |
|               v |                                                                           |
|---------------------------------------------------------------------------------------------|
|      Default    | 69/82/75/73 | 313/343/333/336 |   418/477/446/445   |   659/701/676/678   |
|      16KB       | 69/83/76/77 | 350/401/375/382 |   506/548/517/516   |   602/624/615/615   |
|      32KB       | 62/83/73/73 | 471/529/496/494 |   830/1046/935/939  | 944/1180/1070/1100  |
|      64KB       | 64/70/68/69 | 467/533/501/497 | 1260/1590/1430/1431 | 1605/1819/1670/1660 |
|---------------------------------------------------------------------------------------------|
Signed-off-by: default avatarSunil Muthuswamy <sunilmut@microsoft.com>
Reviewed-by: default avatarDexuan Cui <decui@microsoft.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 0db355d4
...@@ -23,14 +23,14 @@ ...@@ -23,14 +23,14 @@
#include <net/sock.h> #include <net/sock.h>
#include <net/af_vsock.h> #include <net/af_vsock.h>
/* The host side's design of the feature requires 6 exact 4KB pages for /* Older (VMBUS version 'VERSION_WIN10' or before) Windows hosts have some
* recv/send rings respectively -- this is suboptimal considering memory * stricter requirements on the hv_sock ring buffer size of six 4K pages. Newer
* consumption, however unluckily we have to live with it, before the * hosts don't have this limitation; but, keep the defaults the same for compat.
* host comes up with a better design in the future.
*/ */
#define PAGE_SIZE_4K 4096 #define PAGE_SIZE_4K 4096
#define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6) #define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
#define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6) #define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
#define RINGBUFFER_HVS_MAX_SIZE (PAGE_SIZE_4K * 64)
/* The MTU is 16KB per the host side's design */ /* The MTU is 16KB per the host side's design */
#define HVS_MTU_SIZE (1024 * 16) #define HVS_MTU_SIZE (1024 * 16)
...@@ -344,9 +344,12 @@ static void hvs_open_connection(struct vmbus_channel *chan) ...@@ -344,9 +344,12 @@ static void hvs_open_connection(struct vmbus_channel *chan)
struct sockaddr_vm addr; struct sockaddr_vm addr;
struct sock *sk, *new = NULL; struct sock *sk, *new = NULL;
struct vsock_sock *vnew; struct vsock_sock *vnew = NULL;
struct hvsock *hvs, *hvs_new; struct hvsock *hvs = NULL;
struct hvsock *hvs_new = NULL;
int rcvbuf;
int ret; int ret;
int sndbuf;
if_type = &chan->offermsg.offer.if_type; if_type = &chan->offermsg.offer.if_type;
if_instance = &chan->offermsg.offer.if_instance; if_instance = &chan->offermsg.offer.if_instance;
...@@ -388,9 +391,34 @@ static void hvs_open_connection(struct vmbus_channel *chan) ...@@ -388,9 +391,34 @@ static void hvs_open_connection(struct vmbus_channel *chan)
} }
set_channel_read_mode(chan, HV_CALL_DIRECT); set_channel_read_mode(chan, HV_CALL_DIRECT);
ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE,
RINGBUFFER_HVS_RCV_SIZE, NULL, 0, /* Use the socket buffer sizes as hints for the VMBUS ring size. For
hvs_channel_cb, conn_from_host ? new : sk); * server side sockets, 'sk' is the parent socket and thus, this will
* allow the child sockets to inherit the size from the parent. Keep
* the mins to the default value and align to page size as per VMBUS
* requirements.
* For the max, the socket core library will limit the socket buffer
* size that can be set by the user, but, since currently, the hv_sock
* VMBUS ring buffer is physically contiguous allocation, restrict it
* further.
* Older versions of hv_sock host side code cannot handle bigger VMBUS
* ring buffer size. Use the version number to limit the change to newer
* versions.
*/
if (vmbus_proto_version < VERSION_WIN10_V5) {
sndbuf = RINGBUFFER_HVS_SND_SIZE;
rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
} else {
sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE);
sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE);
sndbuf = ALIGN(sndbuf, PAGE_SIZE);
rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE);
rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE);
rcvbuf = ALIGN(rcvbuf, PAGE_SIZE);
}
ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb,
conn_from_host ? new : sk);
if (ret != 0) { if (ret != 0) {
if (conn_from_host) { if (conn_from_host) {
hvs_new->chan = NULL; hvs_new->chan = NULL;
...@@ -441,6 +469,7 @@ static u32 hvs_get_local_cid(void) ...@@ -441,6 +469,7 @@ static u32 hvs_get_local_cid(void)
static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk) static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
{ {
struct hvsock *hvs; struct hvsock *hvs;
struct sock *sk = sk_vsock(vsk);
hvs = kzalloc(sizeof(*hvs), GFP_KERNEL); hvs = kzalloc(sizeof(*hvs), GFP_KERNEL);
if (!hvs) if (!hvs)
...@@ -448,7 +477,8 @@ static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk) ...@@ -448,7 +477,8 @@ static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
vsk->trans = hvs; vsk->trans = hvs;
hvs->vsk = vsk; hvs->vsk = vsk;
sk->sk_sndbuf = RINGBUFFER_HVS_SND_SIZE;
sk->sk_rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment