Commit fa246693 authored by John Fastabend's avatar John Fastabend Committed by Daniel Borkmann

bpf: sockmap, BPF_F_INGRESS flag for BPF_SK_SKB_STREAM_VERDICT:

Add support for the BPF_F_INGRESS flag in skb redirect helper. To
do this convert skb into a scatterlist and push into ingress queue.
This is the same logic that is used in the sk_msg redirect helper
so it should feel familiar.
Signed-off-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Signed-off-by: default avatarDaniel Borkmann <daniel@iogearbox.net>
parent 2596f64c
...@@ -521,6 +521,7 @@ struct sk_msg_buff { ...@@ -521,6 +521,7 @@ struct sk_msg_buff {
__u32 key; __u32 key;
__u32 flags; __u32 flags;
struct bpf_map *map; struct bpf_map *map;
struct sk_buff *skb;
struct list_head list; struct list_head list;
}; };
......
...@@ -785,7 +785,8 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, ...@@ -785,7 +785,8 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
i++; i++;
if (i == MAX_SKB_FRAGS) if (i == MAX_SKB_FRAGS)
i = 0; i = 0;
put_page(page); if (!md->skb)
put_page(page);
} }
if (copied == len) if (copied == len)
break; break;
...@@ -794,6 +795,8 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, ...@@ -794,6 +795,8 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (!sg->length && md->sg_start == md->sg_end) { if (!sg->length && md->sg_start == md->sg_end) {
list_del(&md->list); list_del(&md->list);
if (md->skb)
consume_skb(md->skb);
kfree(md); kfree(md);
} }
} }
...@@ -1045,27 +1048,72 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) ...@@ -1045,27 +1048,72 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
__SK_DROP; __SK_DROP;
} }
static int smap_do_ingress(struct smap_psock *psock, struct sk_buff *skb)
{
struct sock *sk = psock->sock;
int copied = 0, num_sg;
struct sk_msg_buff *r;
r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_ATOMIC);
if (unlikely(!r))
return -EAGAIN;
if (!sk_rmem_schedule(sk, skb, skb->len)) {
kfree(r);
return -EAGAIN;
}
sg_init_table(r->sg_data, MAX_SKB_FRAGS);
num_sg = skb_to_sgvec(skb, r->sg_data, 0, skb->len);
if (unlikely(num_sg < 0)) {
kfree(r);
return num_sg;
}
sk_mem_charge(sk, skb->len);
copied = skb->len;
r->sg_start = 0;
r->sg_end = num_sg == MAX_SKB_FRAGS ? 0 : num_sg;
r->skb = skb;
list_add_tail(&r->list, &psock->ingress);
sk->sk_data_ready(sk);
return copied;
}
static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb) static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
{ {
struct smap_psock *peer;
struct sock *sk; struct sock *sk;
__u32 in;
int rc; int rc;
rc = smap_verdict_func(psock, skb); rc = smap_verdict_func(psock, skb);
switch (rc) { switch (rc) {
case __SK_REDIRECT: case __SK_REDIRECT:
sk = do_sk_redirect_map(skb); sk = do_sk_redirect_map(skb);
if (likely(sk)) { if (!sk) {
struct smap_psock *peer = smap_psock_sk(sk); kfree_skb(skb);
break;
if (likely(peer && }
test_bit(SMAP_TX_RUNNING, &peer->state) &&
!sock_flag(sk, SOCK_DEAD) && peer = smap_psock_sk(sk);
sock_writeable(sk))) { in = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
skb_set_owner_w(skb, sk);
skb_queue_tail(&peer->rxqueue, skb); if (unlikely(!peer || sock_flag(sk, SOCK_DEAD) ||
schedule_work(&peer->tx_work); !test_bit(SMAP_TX_RUNNING, &peer->state))) {
break; kfree_skb(skb);
} break;
}
if (!in && sock_writeable(sk)) {
skb_set_owner_w(skb, sk);
skb_queue_tail(&peer->rxqueue, skb);
schedule_work(&peer->tx_work);
break;
} else if (in &&
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
skb_queue_tail(&peer->rxqueue, skb);
schedule_work(&peer->tx_work);
break;
} }
/* Fall through and free skb otherwise */ /* Fall through and free skb otherwise */
case __SK_DROP: case __SK_DROP:
...@@ -1127,15 +1175,23 @@ static void smap_tx_work(struct work_struct *w) ...@@ -1127,15 +1175,23 @@ static void smap_tx_work(struct work_struct *w)
} }
while ((skb = skb_dequeue(&psock->rxqueue))) { while ((skb = skb_dequeue(&psock->rxqueue))) {
__u32 flags;
rem = skb->len; rem = skb->len;
off = 0; off = 0;
start: start:
flags = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
do { do {
if (likely(psock->sock->sk_socket)) if (likely(psock->sock->sk_socket)) {
n = skb_send_sock_locked(psock->sock, if (flags)
skb, off, rem); n = smap_do_ingress(psock, skb);
else else
n = skb_send_sock_locked(psock->sock,
skb, off, rem);
} else {
n = -EINVAL; n = -EINVAL;
}
if (n <= 0) { if (n <= 0) {
if (n == -EAGAIN) { if (n == -EAGAIN) {
/* Retry when space is available */ /* Retry when space is available */
...@@ -1153,7 +1209,9 @@ static void smap_tx_work(struct work_struct *w) ...@@ -1153,7 +1209,9 @@ static void smap_tx_work(struct work_struct *w)
rem -= n; rem -= n;
off += n; off += n;
} while (rem); } while (rem);
kfree_skb(skb);
if (!flags)
kfree_skb(skb);
} }
out: out:
release_sock(psock->sock); release_sock(psock->sock);
......
...@@ -1855,7 +1855,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, ...@@ -1855,7 +1855,7 @@ BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
/* If user passes invalid input drop the packet. */ /* If user passes invalid input drop the packet. */
if (unlikely(flags)) if (unlikely(flags & ~(BPF_F_INGRESS)))
return SK_DROP; return SK_DROP;
tcb->bpf.key = key; tcb->bpf.key = key;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment