Commit aacb0c2e authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

selftests: net: tcp_mmap must use TCP_ZEROCOPY_RECEIVE

After prior kernel change, mmap() on TCP socket only reserves VMA.

We have to use getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...)
to perform the transfert of pages from skbs in TCP receive queue into such VMA.

struct tcp_zerocopy_receive {
	__u64 address;		/* in: address of mapping */
	__u32 length;		/* in/out: number of bytes to map/mapped */
	__u32 recv_skip_hint;	/* out: amount of bytes to skip */
};

After a successful getsockopt(...TCP_ZEROCOPY_RECEIVE...), @length contains
number of bytes that were mapped, and @recv_skip_hint contains number of bytes
that should be read using conventional read()/recv()/recvmsg() system calls,
to skip a sequence of bytes that can not be mapped, because not properly page
aligned.
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Acked-by: default avatarSoheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 05255b82
...@@ -76,9 +76,10 @@ ...@@ -76,9 +76,10 @@
#include <time.h> #include <time.h>
#include <sys/time.h> #include <sys/time.h>
#include <netinet/in.h> #include <netinet/in.h>
#include <netinet/tcp.h>
#include <arpa/inet.h> #include <arpa/inet.h>
#include <poll.h> #include <poll.h>
#include <linux/tcp.h>
#include <assert.h>
#ifndef MSG_ZEROCOPY #ifndef MSG_ZEROCOPY
#define MSG_ZEROCOPY 0x4000000 #define MSG_ZEROCOPY 0x4000000
...@@ -134,11 +135,12 @@ void hash_zone(void *zone, unsigned int length) ...@@ -134,11 +135,12 @@ void hash_zone(void *zone, unsigned int length)
void *child_thread(void *arg) void *child_thread(void *arg)
{ {
unsigned long total_mmap = 0, total = 0; unsigned long total_mmap = 0, total = 0;
struct tcp_zerocopy_receive zc;
unsigned long delta_usec; unsigned long delta_usec;
int flags = MAP_SHARED; int flags = MAP_SHARED;
struct timeval t0, t1; struct timeval t0, t1;
char *buffer = NULL; char *buffer = NULL;
void *oaddr = NULL; void *addr = NULL;
double throughput; double throughput;
struct rusage ru; struct rusage ru;
int lu, fd; int lu, fd;
...@@ -153,41 +155,46 @@ void *child_thread(void *arg) ...@@ -153,41 +155,46 @@ void *child_thread(void *arg)
perror("malloc"); perror("malloc");
goto error; goto error;
} }
if (zflg) {
addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0);
if (addr == (void *)-1)
zflg = 0;
}
while (1) { while (1) {
struct pollfd pfd = { .fd = fd, .events = POLLIN, }; struct pollfd pfd = { .fd = fd, .events = POLLIN, };
int sub; int sub;
poll(&pfd, 1, 10000); poll(&pfd, 1, 10000);
if (zflg) { if (zflg) {
void *naddr; socklen_t zc_len = sizeof(zc);
int res;
naddr = mmap(oaddr, chunk_size, PROT_READ, flags, fd, 0);
if (naddr == (void *)-1) { zc.address = (__u64)addr;
if (errno == EAGAIN) { zc.length = chunk_size;
/* That is if SO_RCVLOWAT is buggy */ zc.recv_skip_hint = 0;
usleep(1000); res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
continue; &zc, &zc_len);
} if (res == -1)
if (errno == EINVAL) {
flags = MAP_SHARED;
oaddr = NULL;
goto fallback;
}
if (errno != EIO)
perror("mmap()");
break; break;
if (zc.length) {
assert(zc.length <= chunk_size);
total_mmap += zc.length;
if (xflg)
hash_zone(addr, zc.length);
total += zc.length;
} }
total_mmap += chunk_size; if (zc.recv_skip_hint) {
assert(zc.recv_skip_hint <= chunk_size);
lu = read(fd, buffer, zc.recv_skip_hint);
if (lu > 0) {
if (xflg) if (xflg)
hash_zone(naddr, chunk_size); hash_zone(buffer, lu);
total += chunk_size; total += lu;
if (!keepflag) { }
flags |= MAP_FIXED;
oaddr = naddr;
} }
continue; continue;
} }
fallback:
sub = 0; sub = 0;
while (sub < chunk_size) { while (sub < chunk_size) {
lu = read(fd, buffer + sub, chunk_size - sub); lu = read(fd, buffer + sub, chunk_size - sub);
...@@ -228,6 +235,8 @@ void *child_thread(void *arg) ...@@ -228,6 +235,8 @@ void *child_thread(void *arg)
error: error:
free(buffer); free(buffer);
close(fd); close(fd);
if (zflg)
munmap(addr, chunk_size);
pthread_exit(0); pthread_exit(0);
} }
...@@ -371,7 +380,8 @@ int main(int argc, char *argv[]) ...@@ -371,7 +380,8 @@ int main(int argc, char *argv[])
setup_sockaddr(cfg_family, host, &listenaddr); setup_sockaddr(cfg_family, host, &listenaddr);
if (mss && if (mss &&
setsockopt(fdlisten, SOL_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) { setsockopt(fdlisten, IPPROTO_TCP, TCP_MAXSEG,
&mss, sizeof(mss)) == -1) {
perror("setsockopt TCP_MAXSEG"); perror("setsockopt TCP_MAXSEG");
exit(1); exit(1);
} }
...@@ -402,7 +412,7 @@ int main(int argc, char *argv[]) ...@@ -402,7 +412,7 @@ int main(int argc, char *argv[])
setup_sockaddr(cfg_family, host, &addr); setup_sockaddr(cfg_family, host, &addr);
if (mss && if (mss &&
setsockopt(fd, SOL_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) { setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
perror("setsockopt TCP_MAXSEG"); perror("setsockopt TCP_MAXSEG");
exit(1); exit(1);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment