Commit 0f4b437b authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'tcp-fix-tcp_poll-races'

Eric Dumazet says:

====================
tcp: fix tcp_poll() races

Flakes in packetdrill tests stressing epoll_wait()
were root caused to bad ordering in tcp_write_err()

Precisely, we have to call sk_error_report() after
tcp_done().

When fixing this issue, we discovered tcp_abort(),
tcp_v4_err() and tcp_v6_err() had similar issues.

Since tcp_reset() has the correct ordering,
first patch takes part of it and creates
tcp_done_with_error() helper.
====================

Link: https://lore.kernel.org/r/20240528125253.1966136-1-edumazet@google.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents c3390677 fde6f897
......@@ -677,6 +677,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb,
/* tcp_input.c */
void tcp_rearm_rto(struct sock *sk);
void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
void tcp_done_with_error(struct sock *sk, int err);
void tcp_reset(struct sock *sk, struct sk_buff *skb);
void tcp_fin(struct sock *sk);
void tcp_check_space(struct sock *sk);
......
......@@ -598,7 +598,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
*/
mask |= EPOLLOUT | EPOLLWRNORM;
}
/* This barrier is coupled with smp_wmb() in tcp_reset() */
/* This barrier is coupled with smp_wmb() in tcp_done_with_error() */
smp_rmb();
if (READ_ONCE(sk->sk_err) ||
!skb_queue_empty_lockless(&sk->sk_error_queue))
......@@ -4576,14 +4576,10 @@ int tcp_abort(struct sock *sk, int err)
bh_lock_sock(sk);
if (!sock_flag(sk, SOCK_DEAD)) {
WRITE_ONCE(sk->sk_err, err);
/* This barrier is coupled with smp_rmb() in tcp_poll() */
smp_wmb();
sk_error_report(sk);
if (tcp_need_reset(sk->sk_state))
tcp_send_active_reset(sk, GFP_ATOMIC,
SK_RST_REASON_NOT_SPECIFIED);
tcp_done(sk);
tcp_done_with_error(sk, err);
}
bh_unlock_sock(sk);
......
......@@ -4436,9 +4436,26 @@ static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp,
return SKB_NOT_DROPPED_YET;
}
void tcp_done_with_error(struct sock *sk, int err)
{
/* This barrier is coupled with smp_rmb() in tcp_poll() */
WRITE_ONCE(sk->sk_err, err);
smp_wmb();
tcp_write_queue_purge(sk);
tcp_done(sk);
if (!sock_flag(sk, SOCK_DEAD))
sk_error_report(sk);
}
EXPORT_SYMBOL(tcp_done_with_error);
/* When we get a reset we do this. */
void tcp_reset(struct sock *sk, struct sk_buff *skb)
{
int err;
trace_tcp_receive_reset(sk);
/* mptcp can't tell us to ignore reset pkts,
......@@ -4450,24 +4467,17 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb)
/* We want the right error as BSD sees it (and indeed as we do). */
switch (sk->sk_state) {
case TCP_SYN_SENT:
WRITE_ONCE(sk->sk_err, ECONNREFUSED);
err = ECONNREFUSED;
break;
case TCP_CLOSE_WAIT:
WRITE_ONCE(sk->sk_err, EPIPE);
err = EPIPE;
break;
case TCP_CLOSE:
return;
default:
WRITE_ONCE(sk->sk_err, ECONNRESET);
err = ECONNRESET;
}
/* This barrier is coupled with smp_rmb() in tcp_poll() */
smp_wmb();
tcp_write_queue_purge(sk);
tcp_done(sk);
if (!sock_flag(sk, SOCK_DEAD))
sk_error_report(sk);
tcp_done_with_error(sk, err);
}
/*
......
......@@ -611,15 +611,10 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th);
if (!sock_owned_by_user(sk)) {
WRITE_ONCE(sk->sk_err, err);
sk_error_report(sk);
tcp_done(sk);
} else {
if (!sock_owned_by_user(sk))
tcp_done_with_error(sk, err);
else
WRITE_ONCE(sk->sk_err_soft, err);
}
goto out;
}
......
......@@ -74,11 +74,7 @@ u32 tcp_clamp_probe0_to_user_timeout(const struct sock *sk, u32 when)
static void tcp_write_err(struct sock *sk)
{
WRITE_ONCE(sk->sk_err, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT);
sk_error_report(sk);
tcp_write_queue_purge(sk);
tcp_done(sk);
tcp_done_with_error(sk, READ_ONCE(sk->sk_err_soft) ? : ETIMEDOUT);
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONTIMEOUT);
}
......
......@@ -490,14 +490,10 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
if (!sock_owned_by_user(sk)) {
WRITE_ONCE(sk->sk_err, err);
sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
tcp_done(sk);
} else {
if (!sock_owned_by_user(sk))
tcp_done_with_error(sk, err);
else
WRITE_ONCE(sk->sk_err_soft, err);
}
goto out;
case TCP_LISTEN:
break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment