Commit 1379ef82 authored by Daniel Borkmann's avatar Daniel Borkmann

Merge branch 'bpf-sockmap-ingress'

John Fastabend says:

====================
This series adds the BPF_F_INGRESS flag support to the redirect APIs.
Bringing the sockmap API in-line with the cls_bpf redirect APIs.

We add it to both variants of sockmap programs, the first patch adds
support for tx ulp hooks and the third patch adds support for the recv
skb hooks. Patches two and four add tests for the corresponding
ingress redirect hooks.

Follow on patches can address busy polling support, but next series
from me will move the sockmap sample program into selftests.

v2: added static to function definition caught by kbuild bot
v3: fixed an error branch with missing mem_uncharge
    in recvmsg op moved receive_queue check outside of RCU region
====================
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parents 22527437 2e3f6c5f
......@@ -521,6 +521,8 @@ struct sk_msg_buff {
__u32 key;
__u32 flags;
struct bpf_map *map;
struct sk_buff *skb;
struct list_head list;
};
/* Compute the linear packet data range [data, data_end) which
......
......@@ -1085,6 +1085,7 @@ struct proto {
#endif
bool (*stream_memory_free)(const struct sock *sk);
bool (*stream_memory_read)(const struct sock *sk);
/* Memory pressure */
void (*enter_memory_pressure)(struct sock *sk);
void (*leave_memory_pressure)(struct sock *sk);
......
This diff is collapsed.
......@@ -1855,7 +1855,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
/* If user passes invalid input drop the packet. */
if (unlikely(flags))
if (unlikely(flags & ~(BPF_F_INGRESS)))
return SK_DROP;
tcb->bpf.key = key;
......@@ -1894,7 +1894,7 @@ BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
struct bpf_map *, map, u32, key, u64, flags)
{
/* If user passes invalid input drop the packet. */
if (unlikely(flags))
if (unlikely(flags & ~(BPF_F_INGRESS)))
return SK_DROP;
msg->key = key;
......
......@@ -485,6 +485,14 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
}
}
static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
int target, struct sock *sk)
{
return (tp->rcv_nxt - tp->copied_seq >= target) ||
(sk->sk_prot->stream_memory_read ?
sk->sk_prot->stream_memory_read(sk) : false);
}
/*
* Wait for a TCP event.
*
......@@ -554,7 +562,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
tp->urg_data)
target++;
if (tp->rcv_nxt - tp->copied_seq >= target)
if (tcp_stream_is_readable(tp, target, sk))
mask |= EPOLLIN | EPOLLRDNORM;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
......
......@@ -54,7 +54,7 @@ struct bpf_map_def SEC("maps") sock_map_redir = {
.type = BPF_MAP_TYPE_SOCKMAP,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 1,
.max_entries = 20,
};
struct bpf_map_def SEC("maps") sock_apply_bytes = {
......@@ -78,6 +78,19 @@ struct bpf_map_def SEC("maps") sock_pull_bytes = {
.max_entries = 2
};
struct bpf_map_def SEC("maps") sock_redir_flags = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 1
};
struct bpf_map_def SEC("maps") sock_skb_opts = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 1
};
SEC("sk_skb1")
int bpf_prog1(struct __sk_buff *skb)
......@@ -90,15 +103,24 @@ int bpf_prog2(struct __sk_buff *skb)
{
__u32 lport = skb->local_port;
__u32 rport = skb->remote_port;
int ret = 0;
int len, *f, ret, zero = 0;
__u64 flags = 0;
if (lport == 10000)
ret = 10;
else
ret = 1;
bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret);
return bpf_sk_redirect_map(skb, &sock_map, ret, 0);
len = (__u32)skb->data_end - (__u32)skb->data;
f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
if (f && *f) {
ret = 3;
flags = *f;
}
bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
len, flags);
return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
}
SEC("sockops")
......@@ -197,8 +219,9 @@ int bpf_prog5(struct sk_msg_md *msg)
SEC("sk_msg3")
int bpf_prog6(struct sk_msg_md *msg)
{
int *bytes, zero = 0, one = 1;
int *start, *end;
int *bytes, zero = 0, one = 1, key = 0;
int *start, *end, *f;
__u64 flags = 0;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
if (bytes)
......@@ -210,15 +233,22 @@ int bpf_prog6(struct sk_msg_md *msg)
end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
if (start && end)
bpf_msg_pull_data(msg, *start, *end, 0);
return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
if (f && *f) {
key = 2;
flags = *f;
}
return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
}
SEC("sk_msg4")
int bpf_prog7(struct sk_msg_md *msg)
{
int err1 = 0, err2 = 0, zero = 0, one = 1;
int *bytes, *start, *end, len1, len2;
int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
int *f, *bytes, *start, *end, len1, len2;
__u64 flags = 0;
int err;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
if (bytes)
err1 = bpf_msg_apply_bytes(msg, *bytes);
......@@ -229,7 +259,6 @@ int bpf_prog7(struct sk_msg_md *msg)
start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
if (start && end) {
int err;
bpf_printk("sk_msg2: pull(%i:%i)\n",
start ? *start : 0, end ? *end : 0);
......@@ -241,9 +270,16 @@ int bpf_prog7(struct sk_msg_md *msg)
bpf_printk("sk_msg2: length update %i->%i\n",
len1, len2);
}
bpf_printk("sk_msg3: redirect(%iB) err1=%i err2=%i\n",
len1, err1, err2);
return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
if (f && *f) {
key = 2;
flags = *f;
}
bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
len1, flags, err1 ? err1 : err2);
err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
bpf_printk("sk_msg3: err %i\n", err);
return err;
}
SEC("sk_msg5")
......
#Test a bunch of positive cases to verify basic functionality
for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
for prog in "--txmsg_redir --txmsg_skb" "--txmsg_redir --txmsg_ingress" "--txmsg" "--txmsg_redir" "--txmsg_redir --txmsg_ingress" "--txmsg_drop"; do
for t in "sendmsg" "sendpage"; do
for r in 1 10 100; do
for i in 1 10 100; do
......@@ -100,6 +100,25 @@ for t in "sendmsg" "sendpage"; do
sleep 2
done
prog="--txmsg_redir --txmsg_apply 1 --txmsg_ingress"
for t in "sendmsg" "sendpage"; do
TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
echo $TEST
$TEST
sleep 2
done
prog="--txmsg_redir --txmsg_apply 1 --txmsg_skb"
for t in "sendmsg" "sendpage"; do
TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
echo $TEST
$TEST
sleep 2
done
# Test apply and redirect with larger value than send
r=1
i=8
......@@ -113,6 +132,25 @@ for t in "sendmsg" "sendpage"; do
sleep 2
done
prog="--txmsg_redir --txmsg_apply 2048 --txmsg_ingress"
for t in "sendmsg" "sendpage"; do
TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
echo $TEST
$TEST
sleep 2
done
prog="--txmsg_redir --txmsg_apply 2048 --txmsg_skb"
for t in "sendmsg" "sendpage"; do
TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
echo $TEST
$TEST
sleep 2
done
# Test apply and redirect with apply that never reaches limit
r=1024
i=1
......
......@@ -64,6 +64,8 @@ int txmsg_apply;
int txmsg_cork;
int txmsg_start;
int txmsg_end;
int txmsg_ingress;
int txmsg_skb;
static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
......@@ -83,6 +85,8 @@ static const struct option long_options[] = {
{"txmsg_cork", required_argument, NULL, 'k'},
{"txmsg_start", required_argument, NULL, 's'},
{"txmsg_end", required_argument, NULL, 'e'},
{"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
{"txmsg_skb", no_argument, &txmsg_skb, 1 },
{0, 0, NULL, 0 }
};
......@@ -793,6 +797,60 @@ int main(int argc, char **argv)
return err;
}
}
if (txmsg_ingress) {
int in = BPF_F_INGRESS;
i = 0;
err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
err, strerror(errno));
}
i = 1;
err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
err, strerror(errno));
}
err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
err, strerror(errno));
}
i = 2;
err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
err, strerror(errno));
}
}
if (txmsg_skb) {
int skb_fd = (test == SENDMSG || test == SENDPAGE) ? p2 : p1;
int ingress = BPF_F_INGRESS;
i = 0;
err = bpf_map_update_elem(map_fd[7], &i, &ingress, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
err, strerror(errno));
}
i = 3;
err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
err, strerror(errno));
}
}
}
if (txmsg_drop)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment