Commit 1fc0bf49 authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-samples-sockmap-improvements'

John Fastabend says:

====================
The sockmap sample is pretty simple at the moment. All it does is open
a few sockets attach BPF programs/sockmaps and sends a few packets.

However, for testing and debugging I wanted to have more control over
the sendmsg format and data than provided by tools like iperf3/netperf,
etc. The reason is for testing BPF programs and stream parser it is
helpful to be able submit multiple sendmsg calls with different msg
layouts. For example lots of 1B iovs or a single large MB of data, etc.

Additionally, my current test setup requires an entire orchestration
layer (cilium) to run. As well as lighttpd and http traffic generators
or for kafka testing brokers and clients. This makes it a bit more
difficult when doing performance optimizations to incrementally test
small changes and come up with performance delta's and perf numbers.

By adding a few more options and an additional few tests the sockmap
sample program can show a more complete example and do some of the
above. Because the sample program is self contained it doesn't require
additional infrastructure to run either.

This series, although still fairly crude, does provide some nice
additions. They are

  - a new sendmsg tests with a sender and recv threads
  - a new base tests so we can get metrics/data without BPF
  - multiple GBps of throughput on base and sendmsg tests
  - automatically set rlimit and common variables

That said the UI is still primitive, more features could be added,
more tests might be useful, the reporting is bare bones, etc. But,
IMO lets push this now rather than sit on it for weeks until I get
time to do the above improvements. Additional patches can address
the other limitations/issues. Another thing I am considering is
moving this into selftests, after a few more fixes so we avoid
false failures, so that we get more sockmap testing.

v2: removed bogus file added by patch 3/7
v3: 1/7 replace goto out with returns, remove sighandler update,
    2/7 free iov in error cases
    3/7 fix bogus makefile change, bail out early on errors
v4: add Martin's "nits" and ACKs along with fixes to 2/7 iov free
    also pointed out by Martin.

Thanks Daniel and Martin for the reviews!
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents 31e95b61 8e0ef380
......@@ -23,8 +23,11 @@
#include <stdbool.h>
#include <signal.h>
#include <fcntl.h>
#include <sys/wait.h>
#include <time.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/types.h>
#include <linux/netlink.h>
......@@ -35,6 +38,8 @@
#include <assert.h>
#include <libgen.h>
#include <getopt.h>
#include "../bpf/bpf_load.h"
#include "../bpf/bpf_util.h"
#include "../bpf/libbpf.h"
......@@ -46,15 +51,42 @@ void running_handler(int a);
#define S1_PORT 10000
#define S2_PORT 10001
static int sockmap_test_sockets(int rate, int dot)
/* global sockets */
int s1, s2, c1, c2, p1, p2;
static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
{"cgroup", required_argument, NULL, 'c' },
{"rate", required_argument, NULL, 'r' },
{"verbose", no_argument, NULL, 'v' },
{"iov_count", required_argument, NULL, 'i' },
{"length", required_argument, NULL, 'l' },
{"test", required_argument, NULL, 't' },
{0, 0, NULL, 0 }
};
static void usage(char *argv[])
{
int i;
printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
printf(" options:\n");
for (i = 0; long_options[i].name != 0; i++) {
printf(" --%-12s", long_options[i].name);
if (long_options[i].flag != NULL)
printf(" flag (internal value:%d)\n",
*long_options[i].flag);
else
printf(" -%c\n", long_options[i].val);
}
printf("\n");
}
static int sockmap_init_sockets(void)
{
int i, sc, err, max_fd, one = 1;
int s1, s2, c1, c2, p1, p2;
int i, err, one = 1;
struct sockaddr_in addr;
struct timeval timeout;
char buf[1024] = {0};
int *fds[4] = {&s1, &s2, &c1, &c2};
fd_set w;
s1 = s2 = p1 = p2 = c1 = c2 = 0;
......@@ -63,8 +95,7 @@ static int sockmap_test_sockets(int rate, int dot)
*fds[i] = socket(AF_INET, SOCK_STREAM, 0);
if (*fds[i] < 0) {
perror("socket s1 failed()");
err = *fds[i];
goto out;
return errno;
}
}
......@@ -74,16 +105,16 @@ static int sockmap_test_sockets(int rate, int dot)
(char *)&one, sizeof(one));
if (err) {
perror("setsockopt failed()");
goto out;
return errno;
}
}
/* Non-blocking sockets */
for (i = 0; i < 4; i++) {
for (i = 0; i < 2; i++) {
err = ioctl(*fds[i], FIONBIO, (char *)&one);
if (err < 0) {
perror("ioctl s1 failed()");
goto out;
return errno;
}
}
......@@ -96,14 +127,14 @@ static int sockmap_test_sockets(int rate, int dot)
err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0) {
perror("bind s1 failed()\n");
goto out;
return errno;
}
addr.sin_port = htons(S2_PORT);
err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0) {
perror("bind s2 failed()\n");
goto out;
return errno;
}
/* Listen server sockets */
......@@ -111,14 +142,14 @@ static int sockmap_test_sockets(int rate, int dot)
err = listen(s1, 32);
if (err < 0) {
perror("listen s1 failed()\n");
goto out;
return errno;
}
addr.sin_port = htons(S2_PORT);
err = listen(s2, 32);
if (err < 0) {
perror("listen s1 failed()\n");
goto out;
return errno;
}
/* Initiate Connect */
......@@ -126,46 +157,232 @@ static int sockmap_test_sockets(int rate, int dot)
err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0 && errno != EINPROGRESS) {
perror("connect c1 failed()\n");
goto out;
return errno;
}
addr.sin_port = htons(S2_PORT);
err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
if (err < 0 && errno != EINPROGRESS) {
perror("connect c2 failed()\n");
goto out;
return errno;
} else if (err < 0) {
err = 0;
}
/* Accept Connecrtions */
p1 = accept(s1, NULL, NULL);
if (p1 < 0) {
perror("accept s1 failed()\n");
goto out;
return errno;
}
p2 = accept(s2, NULL, NULL);
if (p2 < 0) {
perror("accept s1 failed()\n");
goto out;
return errno;
}
max_fd = p2;
timeout.tv_sec = 10;
timeout.tv_usec = 0;
printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
c1, s1, c2, s2);
return 0;
}
struct msg_stats {
size_t bytes_sent;
size_t bytes_recvd;
struct timespec start;
struct timespec end;
};
static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
struct msg_stats *s, bool tx)
{
struct msghdr msg = {0};
int err, i, flags = MSG_NOSIGNAL;
struct iovec *iov;
iov = calloc(iov_count, sizeof(struct iovec));
if (!iov)
return errno;
for (i = 0; i < iov_count; i++) {
char *d = calloc(iov_length, sizeof(char));
if (!d) {
fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
goto out_errno;
}
iov[i].iov_base = d;
iov[i].iov_len = iov_length;
}
msg.msg_iov = iov;
msg.msg_iovlen = iov_count;
if (tx) {
clock_gettime(CLOCK_MONOTONIC, &s->start);
for (i = 0; i < cnt; i++) {
int sent = sendmsg(fd, &msg, flags);
if (sent < 0) {
perror("send loop error:");
goto out_errno;
}
s->bytes_sent += sent;
}
clock_gettime(CLOCK_MONOTONIC, &s->end);
} else {
int slct, recv, max_fd = fd;
struct timeval timeout;
float total_bytes;
fd_set w;
total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
err = clock_gettime(CLOCK_MONOTONIC, &s->start);
if (err < 0)
perror("recv start time: ");
while (s->bytes_recvd < total_bytes) {
timeout.tv_sec = 1;
timeout.tv_usec = 0;
/* FD sets */
FD_ZERO(&w);
FD_SET(fd, &w);
slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
if (slct == -1) {
perror("select()");
clock_gettime(CLOCK_MONOTONIC, &s->end);
goto out_errno;
} else if (!slct) {
fprintf(stderr, "unexpected timeout\n");
errno = -EIO;
clock_gettime(CLOCK_MONOTONIC, &s->end);
goto out_errno;
}
recv = recvmsg(fd, &msg, flags);
if (recv < 0) {
if (errno != EWOULDBLOCK) {
clock_gettime(CLOCK_MONOTONIC, &s->end);
perror("recv failed()\n");
goto out_errno;
}
}
s->bytes_recvd += recv;
}
clock_gettime(CLOCK_MONOTONIC, &s->end);
}
for (i = 0; i < iov_count; i++)
free(iov[i].iov_base);
free(iov);
return 0;
out_errno:
for (i = 0; i < iov_count; i++)
free(iov[i].iov_base);
free(iov);
return errno;
}
static float giga = 1000000000;
static inline float sentBps(struct msg_stats s)
{
return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
}
static inline float recvdBps(struct msg_stats s)
{
return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
}
static int sendmsg_test(int iov_count, int iov_buf, int cnt,
int verbose, bool base)
{
float sent_Bps = 0, recvd_Bps = 0;
int rx_fd, txpid, rxpid, err = 0;
struct msg_stats s = {0};
int status;
errno = 0;
if (base)
rx_fd = p1;
else
rx_fd = p2;
rxpid = fork();
if (rxpid == 0) {
err = msg_loop(rx_fd, iov_count, iov_buf, cnt, &s, false);
if (err)
fprintf(stderr,
"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
iov_count, iov_buf, cnt, err);
shutdown(p2, SHUT_RDWR);
shutdown(p1, SHUT_RDWR);
if (s.end.tv_sec - s.start.tv_sec) {
sent_Bps = sentBps(s);
recvd_Bps = recvdBps(s);
}
fprintf(stdout,
"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
s.bytes_sent, sent_Bps, sent_Bps/giga,
s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
exit(1);
} else if (rxpid == -1) {
perror("msg_loop_rx: ");
return errno;
}
txpid = fork();
if (txpid == 0) {
err = msg_loop(c1, iov_count, iov_buf, cnt, &s, true);
if (err)
fprintf(stderr,
"msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
iov_count, iov_buf, cnt, err);
shutdown(c1, SHUT_RDWR);
if (s.end.tv_sec - s.start.tv_sec) {
sent_Bps = sentBps(s);
recvd_Bps = recvdBps(s);
}
fprintf(stdout,
"tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
s.bytes_sent, sent_Bps, sent_Bps/giga,
s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
exit(1);
} else if (txpid == -1) {
perror("msg_loop_tx: ");
return errno;
}
assert(waitpid(rxpid, &status, 0) == rxpid);
assert(waitpid(txpid, &status, 0) == txpid);
return err;
}
static int forever_ping_pong(int rate, int verbose)
{
struct timeval timeout;
char buf[1024] = {0};
int sc;
timeout.tv_sec = 10;
timeout.tv_usec = 0;
/* Ping/Pong data from client to server */
sc = send(c1, buf, sizeof(buf), 0);
if (sc < 0) {
perror("send failed()\n");
goto out;
return sc;
}
do {
int s, rc, i;
int s, rc, i, max_fd = p2;
fd_set w;
/* FD sets */
FD_ZERO(&w);
......@@ -193,7 +410,7 @@ static int sockmap_test_sockets(int rate, int dot)
if (rc < 0) {
if (errno != EWOULDBLOCK) {
perror("recv failed()\n");
break;
return rc;
}
}
......@@ -205,35 +422,92 @@ static int sockmap_test_sockets(int rate, int dot)
sc = send(i, buf, rc, 0);
if (sc < 0) {
perror("send failed()\n");
break;
return sc;
}
}
if (rate)
sleep(rate);
if (dot) {
if (verbose) {
printf(".");
fflush(stdout);
}
} while (running);
out:
close(s1);
close(s2);
close(p1);
close(p2);
close(c1);
close(c2);
return err;
return 0;
}
enum {
PING_PONG,
SENDMSG,
BASE,
};
int main(int argc, char **argv)
{
int rate = 1, dot = 1;
int iov_count = 1, length = 1024, rate = 1, verbose = 0;
struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
int opt, longindex, err, cg_fd = 0;
int test = PING_PONG;
char filename[256];
int err, cg_fd;
char *cg_path;
cg_path = argv[argc - 1];
while ((opt = getopt_long(argc, argv, "hvc:r:i:l:t:",
long_options, &longindex)) != -1) {
switch (opt) {
/* Cgroup configuration */
case 'c':
cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
if (cg_fd < 0) {
fprintf(stderr,
"ERROR: (%i) open cg path failed: %s\n",
cg_fd, optarg);
return cg_fd;
}
break;
case 'r':
rate = atoi(optarg);
break;
case 'v':
verbose = 1;
break;
case 'i':
iov_count = atoi(optarg);
break;
case 'l':
length = atoi(optarg);
break;
case 't':
if (strcmp(optarg, "ping") == 0) {
test = PING_PONG;
} else if (strcmp(optarg, "sendmsg") == 0) {
test = SENDMSG;
} else if (strcmp(optarg, "base") == 0) {
test = BASE;
} else {
usage(argv);
return -1;
}
break;
case 'h':
default:
usage(argv);
return -1;
}
}
if (!cg_fd) {
fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
argv[0]);
return -1;
}
if (setrlimit(RLIMIT_MEMLOCK, &r)) {
perror("setrlimit(RLIMIT_MEMLOCK)");
return 1;
}
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
running = 1;
......@@ -241,20 +515,16 @@ int main(int argc, char **argv)
/* catch SIGINT */
signal(SIGINT, running_handler);
/* If base test skip BPF setup */
if (test == BASE)
goto run;
if (load_bpf_file(filename)) {
fprintf(stderr, "load_bpf_file: (%s) %s\n",
filename, strerror(errno));
return 1;
}
/* Cgroup configuration */
cg_fd = open(cg_path, O_DIRECTORY, O_RDONLY);
if (cg_fd < 0) {
fprintf(stderr, "ERROR: (%i) open cg path failed: %s\n",
cg_fd, cg_path);
return cg_fd;
}
/* Attach programs to sockmap */
err = bpf_prog_attach(prog_fd[0], map_fd[0],
BPF_SK_SKB_STREAM_PARSER, 0);
......@@ -280,12 +550,30 @@ int main(int argc, char **argv)
return err;
}
err = sockmap_test_sockets(rate, dot);
run:
err = sockmap_init_sockets();
if (err) {
fprintf(stderr, "ERROR: test socket failed: %d\n", err);
return err;
goto out;
}
return 0;
if (test == PING_PONG)
err = forever_ping_pong(rate, verbose);
else if (test == SENDMSG)
err = sendmsg_test(iov_count, length, rate, verbose, false);
else if (test == BASE)
err = sendmsg_test(iov_count, length, rate, verbose, true);
else
fprintf(stderr, "unknown test\n");
out:
close(s1);
close(s2);
close(p1);
close(p2);
close(c1);
close(c2);
close(cg_fd);
return err;
}
void running_handler(int a)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment