Commit d2fd6cf3 authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'tcp-fix-isn-selection-in-timewait-syn_recv'

Eric Dumazet says:

====================
tcp: fix ISN selection in TIMEWAIT -> SYN_RECV

TCP can transform a TIMEWAIT socket into a SYN_RECV one from
a SYN packet, and the ISN of the SYNACK packet is normally
generated using TIMEWAIT tw_snd_nxt.

This SYN packet also bypasses normal checks against listen queue
being full or not.

Unfortunately this has been broken almost one decade ago.

This series fixes the issue, in two patches.

First patch refactors code to add tcp_tw_isn as a parameter
to ->route_req(), to make the second patch smaller.

Second patch fixes the issue, by no longer using TCP_SKB_CB(skb)
to store the tcp_tw_isn.

Following packetdrill test passes after this series:

// Set up a server listening socket.
    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
   +0 bind(3, ..., ...) = 0
   +0 listen(3, 1) = 0

// Establish connection
   +0 < S 0:0(0) win 32792 <mss 1460,nop,nop,sackOK>
   +0 > S. 0:0(0) ack 1    <mss 1460,nop,nop,sackOK>
 +.01 < . 1:1(0) ack 1 win 32792

   +0 accept(3, ..., ...) = 4

// We close(), send a FIN, and get an ACK and FIN, in order to get into TIME_WAIT.

 +.01 close(4) = 0
   +0 > F. 1:1(0) ack 1
 +.01 < F. 1:1(0) ack 2 win 32792
   +0 > . 2:2(0) ack 2

// SYN hitting a TIME_WAIT -> should use an ISN based on TIMEWAIT tw_snd_nxt

 +.01 < S 1000:1000(0) win 65535 <mss 1460,nop,nop,sackOK>
   +0 > S. 65539:65539(0) ack 1001 <mss 1460,nop,nop,sackOK>
====================

Link: https://lore.kernel.org/r/20240407093322.3172088-1-edumazet@google.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 1c25fe9a 41eecbd7
...@@ -52,6 +52,8 @@ extern struct inet_hashinfo tcp_hashinfo; ...@@ -52,6 +52,8 @@ extern struct inet_hashinfo tcp_hashinfo;
DECLARE_PER_CPU(unsigned int, tcp_orphan_count); DECLARE_PER_CPU(unsigned int, tcp_orphan_count);
int tcp_orphan_count_sum(void); int tcp_orphan_count_sum(void);
DECLARE_PER_CPU(u32, tcp_tw_isn);
void tcp_time_wait(struct sock *sk, int state, int timeo); void tcp_time_wait(struct sock *sk, int state, int timeo);
#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER) #define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER)
...@@ -392,7 +394,8 @@ enum tcp_tw_status { ...@@ -392,7 +394,8 @@ enum tcp_tw_status {
enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
struct sk_buff *skb, struct sk_buff *skb,
const struct tcphdr *th); const struct tcphdr *th,
u32 *tw_isn);
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req, bool fastopen, struct request_sock *req, bool fastopen,
bool *lost_race); bool *lost_race);
...@@ -935,13 +938,10 @@ struct tcp_skb_cb { ...@@ -935,13 +938,10 @@ struct tcp_skb_cb {
__u32 seq; /* Starting sequence number */ __u32 seq; /* Starting sequence number */
__u32 end_seq; /* SEQ + FIN + SYN + datalen */ __u32 end_seq; /* SEQ + FIN + SYN + datalen */
union { union {
/* Note : tcp_tw_isn is used in input path only /* Note :
* (isn chosen by tcp_timewait_state_process())
*
* tcp_gso_segs/size are used in write queue only, * tcp_gso_segs/size are used in write queue only,
* cf tcp_skb_pcount()/tcp_skb_mss() * cf tcp_skb_pcount()/tcp_skb_mss()
*/ */
__u32 tcp_tw_isn;
struct { struct {
u16 tcp_gso_segs; u16 tcp_gso_segs;
u16 tcp_gso_size; u16 tcp_gso_size;
...@@ -2284,7 +2284,8 @@ struct tcp_request_sock_ops { ...@@ -2284,7 +2284,8 @@ struct tcp_request_sock_ops {
struct dst_entry *(*route_req)(const struct sock *sk, struct dst_entry *(*route_req)(const struct sock *sk,
struct sk_buff *skb, struct sk_buff *skb,
struct flowi *fl, struct flowi *fl,
struct request_sock *req); struct request_sock *req,
u32 tw_isn);
u32 (*init_seq)(const struct sk_buff *skb); u32 (*init_seq)(const struct sk_buff *skb);
u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb); u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb);
int (*send_synack)(const struct sock *sk, struct dst_entry *dst, int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
......
...@@ -290,6 +290,9 @@ enum { ...@@ -290,6 +290,9 @@ enum {
DEFINE_PER_CPU(unsigned int, tcp_orphan_count); DEFINE_PER_CPU(unsigned int, tcp_orphan_count);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count); EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
DEFINE_PER_CPU(u32, tcp_tw_isn);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn);
long sysctl_tcp_mem[3] __read_mostly; long sysctl_tcp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_tcp_mem); EXPORT_SYMBOL(sysctl_tcp_mem);
......
...@@ -7097,7 +7097,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, ...@@ -7097,7 +7097,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct sock *sk, struct sk_buff *skb) struct sock *sk, struct sk_buff *skb)
{ {
struct tcp_fastopen_cookie foc = { .len = -1 }; struct tcp_fastopen_cookie foc = { .len = -1 };
__u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
struct tcp_options_received tmp_opt; struct tcp_options_received tmp_opt;
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
...@@ -7107,21 +7106,28 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, ...@@ -7107,21 +7106,28 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct dst_entry *dst; struct dst_entry *dst;
struct flowi fl; struct flowi fl;
u8 syncookies; u8 syncookies;
u32 isn;
#ifdef CONFIG_TCP_AO #ifdef CONFIG_TCP_AO
const struct tcp_ao_hdr *aoh; const struct tcp_ao_hdr *aoh;
#endif #endif
syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies); isn = __this_cpu_read(tcp_tw_isn);
if (isn) {
/* TW buckets are converted to open requests without
* limitations, they conserve resources and peer is
* evidently real one.
*/
__this_cpu_write(tcp_tw_isn, 0);
} else {
syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
/* TW buckets are converted to open requests without if (syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) {
* limitations, they conserve resources and peer is want_cookie = tcp_syn_flood_action(sk,
* evidently real one. rsk_ops->slab_name);
*/ if (!want_cookie)
if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) { goto drop;
want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name); }
if (!want_cookie)
goto drop;
} }
if (sk_acceptq_is_full(sk)) { if (sk_acceptq_is_full(sk)) {
...@@ -7160,7 +7166,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, ...@@ -7160,7 +7166,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
/* Note: tcp_v6_init_req() might override ir_iif for link locals */ /* Note: tcp_v6_init_req() might override ir_iif for link locals */
inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb); inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
dst = af_ops->route_req(sk, skb, &fl, req); dst = af_ops->route_req(sk, skb, &fl, req, isn);
if (!dst) if (!dst)
goto drop_and_free; goto drop_and_free;
......
...@@ -1666,7 +1666,8 @@ static void tcp_v4_init_req(struct request_sock *req, ...@@ -1666,7 +1666,8 @@ static void tcp_v4_init_req(struct request_sock *req,
static struct dst_entry *tcp_v4_route_req(const struct sock *sk, static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
struct sk_buff *skb, struct sk_buff *skb,
struct flowi *fl, struct flowi *fl,
struct request_sock *req) struct request_sock *req,
u32 tw_isn)
{ {
tcp_v4_init_req(req, sk, skb); tcp_v4_init_req(req, sk, skb);
...@@ -2145,7 +2146,6 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, ...@@ -2145,7 +2146,6 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
skb->len - th->doff * 4); skb->len - th->doff * 4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
TCP_SKB_CB(skb)->sacked = 0; TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp =
...@@ -2167,6 +2167,7 @@ int tcp_v4_rcv(struct sk_buff *skb) ...@@ -2167,6 +2167,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
bool refcounted; bool refcounted;
struct sock *sk; struct sock *sk;
int ret; int ret;
u32 isn;
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (skb->pkt_type != PACKET_HOST) if (skb->pkt_type != PACKET_HOST)
...@@ -2382,7 +2383,7 @@ int tcp_v4_rcv(struct sk_buff *skb) ...@@ -2382,7 +2383,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
inet_twsk_put(inet_twsk(sk)); inet_twsk_put(inet_twsk(sk));
goto csum_error; goto csum_error;
} }
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
case TCP_TW_SYN: { case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(net, struct sock *sk2 = inet_lookup_listener(net,
net->ipv4.tcp_death_row.hashinfo, net->ipv4.tcp_death_row.hashinfo,
...@@ -2396,6 +2397,7 @@ int tcp_v4_rcv(struct sk_buff *skb) ...@@ -2396,6 +2397,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
sk = sk2; sk = sk2;
tcp_v4_restore_cb(skb); tcp_v4_restore_cb(skb);
refcounted = false; refcounted = false;
__this_cpu_write(tcp_tw_isn, isn);
goto process; goto process;
} }
} }
......
...@@ -95,7 +95,7 @@ static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq) ...@@ -95,7 +95,7 @@ static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq)
*/ */
enum tcp_tw_status enum tcp_tw_status
tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
const struct tcphdr *th) const struct tcphdr *th, u32 *tw_isn)
{ {
struct tcp_options_received tmp_opt; struct tcp_options_received tmp_opt;
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
...@@ -228,7 +228,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, ...@@ -228,7 +228,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
u32 isn = tcptw->tw_snd_nxt + 65535 + 2; u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
if (isn == 0) if (isn == 0)
isn++; isn++;
TCP_SKB_CB(skb)->tcp_tw_isn = isn; *tw_isn = isn;
return TCP_TW_SYN; return TCP_TW_SYN;
} }
......
...@@ -793,7 +793,8 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, ...@@ -793,7 +793,8 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
static void tcp_v6_init_req(struct request_sock *req, static void tcp_v6_init_req(struct request_sock *req,
const struct sock *sk_listener, const struct sock *sk_listener,
struct sk_buff *skb) struct sk_buff *skb,
u32 tw_isn)
{ {
bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
struct inet_request_sock *ireq = inet_rsk(req); struct inet_request_sock *ireq = inet_rsk(req);
...@@ -807,7 +808,7 @@ static void tcp_v6_init_req(struct request_sock *req, ...@@ -807,7 +808,7 @@ static void tcp_v6_init_req(struct request_sock *req,
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = tcp_v6_iif(skb); ireq->ir_iif = tcp_v6_iif(skb);
if (!TCP_SKB_CB(skb)->tcp_tw_isn && if (!tw_isn &&
(ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxinfo ||
np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
...@@ -820,9 +821,10 @@ static void tcp_v6_init_req(struct request_sock *req, ...@@ -820,9 +821,10 @@ static void tcp_v6_init_req(struct request_sock *req,
static struct dst_entry *tcp_v6_route_req(const struct sock *sk, static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
struct sk_buff *skb, struct sk_buff *skb,
struct flowi *fl, struct flowi *fl,
struct request_sock *req) struct request_sock *req,
u32 tw_isn)
{ {
tcp_v6_init_req(req, sk, skb); tcp_v6_init_req(req, sk, skb, tw_isn);
if (security_inet_conn_request(sk, skb, req)) if (security_inet_conn_request(sk, skb, req))
return NULL; return NULL;
...@@ -1739,7 +1741,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, ...@@ -1739,7 +1741,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
skb->len - th->doff*4); skb->len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0; TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp =
...@@ -1756,6 +1757,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) ...@@ -1756,6 +1757,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
bool refcounted; bool refcounted;
struct sock *sk; struct sock *sk;
int ret; int ret;
u32 isn;
struct net *net = dev_net(skb->dev); struct net *net = dev_net(skb->dev);
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
...@@ -1965,7 +1967,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) ...@@ -1965,7 +1967,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
goto csum_error; goto csum_error;
} }
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
case TCP_TW_SYN: case TCP_TW_SYN:
{ {
struct sock *sk2; struct sock *sk2;
...@@ -1983,6 +1985,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) ...@@ -1983,6 +1985,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
sk = sk2; sk = sk2;
tcp_v6_restore_cb(skb); tcp_v6_restore_cb(skb);
refcounted = false; refcounted = false;
__this_cpu_write(tcp_tw_isn, isn);
goto process; goto process;
} }
} }
......
...@@ -289,7 +289,8 @@ EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req); ...@@ -289,7 +289,8 @@ EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req);
static struct dst_entry *subflow_v4_route_req(const struct sock *sk, static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
struct sk_buff *skb, struct sk_buff *skb,
struct flowi *fl, struct flowi *fl,
struct request_sock *req) struct request_sock *req,
u32 tw_isn)
{ {
struct dst_entry *dst; struct dst_entry *dst;
int err; int err;
...@@ -297,7 +298,7 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk, ...@@ -297,7 +298,7 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
tcp_rsk(req)->is_mptcp = 1; tcp_rsk(req)->is_mptcp = 1;
subflow_init_req(req, sk); subflow_init_req(req, sk);
dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req); dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req, tw_isn);
if (!dst) if (!dst)
return NULL; return NULL;
...@@ -356,7 +357,8 @@ static int subflow_v6_send_synack(const struct sock *sk, struct dst_entry *dst, ...@@ -356,7 +357,8 @@ static int subflow_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
static struct dst_entry *subflow_v6_route_req(const struct sock *sk, static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
struct sk_buff *skb, struct sk_buff *skb,
struct flowi *fl, struct flowi *fl,
struct request_sock *req) struct request_sock *req,
u32 tw_isn)
{ {
struct dst_entry *dst; struct dst_entry *dst;
int err; int err;
...@@ -364,7 +366,7 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk, ...@@ -364,7 +366,7 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
tcp_rsk(req)->is_mptcp = 1; tcp_rsk(req)->is_mptcp = 1;
subflow_init_req(req, sk); subflow_init_req(req, sk);
dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req); dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req, tw_isn);
if (!dst) if (!dst)
return NULL; return NULL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment