Commit cd9733f5 authored by Liu Jian's avatar Liu Jian Committed by Alexei Starovoitov

tcp_bpf: Fix one concurrency problem in the tcp_bpf_send_verdict function

With two Msgs, msgA and msgB and a user doing nonblocking sendmsg calls (or
multiple cores) on a single socket 'sk' we could get the following flow.

 msgA, sk                               msgB, sk
 -----------                            ---------------
 tcp_bpf_sendmsg()
 lock(sk)
 psock = sk->psock
                                        tcp_bpf_sendmsg()
                                        lock(sk) ... blocking
tcp_bpf_send_verdict
if (psock->eval == NONE)
   psock->eval = sk_psock_msg_verdict
 ..
 < handle SK_REDIRECT case >
   release_sock(sk)                     < lock dropped so grab here >
   ret = tcp_bpf_sendmsg_redir
                                        psock = sk->psock
                                        tcp_bpf_send_verdict
 lock_sock(sk) ... blocking on B
                                        if (psock->eval == NONE) <- boom.
                                         psock->eval will have msgA state

The problem here is we dropped the lock on msgA and grabbed it with msgB.
Now we have old state in psock and importantly psock->eval has not been
cleared. So msgB will run whatever action was done on A and the verdict
program may never see it.

Fixes: 604326b4 ("bpf, sockmap: convert to generic sk_msg interface")
Signed-off-by: default avatarLiu Jian <liujian56@huawei.com>
Signed-off-by: default avatarAlexei Starovoitov <ast@kernel.org>
Acked-by: default avatarJohn Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20211012052019.184398-1-liujian56@huawei.com
parent 04f8ef56
...@@ -232,6 +232,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, ...@@ -232,6 +232,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
bool cork = false, enospc = sk_msg_full(msg); bool cork = false, enospc = sk_msg_full(msg);
struct sock *sk_redir; struct sock *sk_redir;
u32 tosend, delta = 0; u32 tosend, delta = 0;
u32 eval = __SK_NONE;
int ret; int ret;
more_data: more_data:
...@@ -275,13 +276,24 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, ...@@ -275,13 +276,24 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
case __SK_REDIRECT: case __SK_REDIRECT:
sk_redir = psock->sk_redir; sk_redir = psock->sk_redir;
sk_msg_apply_bytes(psock, tosend); sk_msg_apply_bytes(psock, tosend);
if (!psock->apply_bytes) {
/* Clean up before releasing the sock lock. */
eval = psock->eval;
psock->eval = __SK_NONE;
psock->sk_redir = NULL;
}
if (psock->cork) { if (psock->cork) {
cork = true; cork = true;
psock->cork = NULL; psock->cork = NULL;
} }
sk_msg_return(sk, msg, tosend); sk_msg_return(sk, msg, tosend);
release_sock(sk); release_sock(sk);
ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags); ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
if (eval == __SK_REDIRECT)
sock_put(sk_redir);
lock_sock(sk); lock_sock(sk);
if (unlikely(ret < 0)) { if (unlikely(ret < 0)) {
int free = sk_msg_free_nocharge(sk, msg); int free = sk_msg_free_nocharge(sk, msg);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment