Commit d2fd6cf3 authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'tcp-fix-isn-selection-in-timewait-syn_recv'

Eric Dumazet says:

====================
tcp: fix ISN selection in TIMEWAIT -> SYN_RECV

TCP can transform a TIMEWAIT socket into a SYN_RECV one from
a SYN packet, and the ISN of the SYNACK packet is normally
generated using TIMEWAIT tw_snd_nxt.

This SYN packet also bypasses normal checks against listen queue
being full or not.

Unfortunately this has been broken almost one decade ago.

This series fixes the issue, in two patches.

First patch refactors code to add tcp_tw_isn as a parameter
to ->route_req(), to make the second patch smaller.

Second patch fixes the issue, by no longer using TCP_SKB_CB(skb)
to store the tcp_tw_isn.

Following packetdrill test passes after this series:

// Set up a server listening socket.
    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
   +0 bind(3, ..., ...) = 0
   +0 listen(3, 1) = 0

// Establish connection
   +0 < S 0:0(0) win 32792 <mss 1460,nop,nop,sackOK>
   +0 > S. 0:0(0) ack 1    <mss 1460,nop,nop,sackOK>
 +.01 < . 1:1(0) ack 1 win 32792

   +0 accept(3, ..., ...) = 4

// We close(), send a FIN, and get an ACK and FIN, in order to get into TIME_WAIT.

 +.01 close(4) = 0
   +0 > F. 1:1(0) ack 1
 +.01 < F. 1:1(0) ack 2 win 32792
   +0 > . 2:2(0) ack 2

// SYN hitting a TIME_WAIT -> should use an ISN based on TIMEWAIT tw_snd_nxt

 +.01 < S 1000:1000(0) win 65535 <mss 1460,nop,nop,sackOK>
   +0 > S. 65539:65539(0) ack 1001 <mss 1460,nop,nop,sackOK>
====================

Link: https://lore.kernel.org/r/20240407093322.3172088-1-edumazet@google.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 1c25fe9a 41eecbd7
......@@ -52,6 +52,8 @@ extern struct inet_hashinfo tcp_hashinfo;
DECLARE_PER_CPU(unsigned int, tcp_orphan_count);
int tcp_orphan_count_sum(void);
DECLARE_PER_CPU(u32, tcp_tw_isn);
void tcp_time_wait(struct sock *sk, int state, int timeo);
#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER)
......@@ -392,7 +394,8 @@ enum tcp_tw_status {
enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
struct sk_buff *skb,
const struct tcphdr *th);
const struct tcphdr *th,
u32 *tw_isn);
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
struct request_sock *req, bool fastopen,
bool *lost_race);
......@@ -935,13 +938,10 @@ struct tcp_skb_cb {
__u32 seq; /* Starting sequence number */
__u32 end_seq; /* SEQ + FIN + SYN + datalen */
union {
/* Note : tcp_tw_isn is used in input path only
* (isn chosen by tcp_timewait_state_process())
*
/* Note :
* tcp_gso_segs/size are used in write queue only,
* cf tcp_skb_pcount()/tcp_skb_mss()
*/
__u32 tcp_tw_isn;
struct {
u16 tcp_gso_segs;
u16 tcp_gso_size;
......@@ -2284,7 +2284,8 @@ struct tcp_request_sock_ops {
struct dst_entry *(*route_req)(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
struct request_sock *req);
struct request_sock *req,
u32 tw_isn);
u32 (*init_seq)(const struct sk_buff *skb);
u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb);
int (*send_synack)(const struct sock *sk, struct dst_entry *dst,
......
......@@ -290,6 +290,9 @@ enum {
DEFINE_PER_CPU(unsigned int, tcp_orphan_count);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count);
DEFINE_PER_CPU(u32, tcp_tw_isn);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn);
long sysctl_tcp_mem[3] __read_mostly;
EXPORT_SYMBOL(sysctl_tcp_mem);
......
......@@ -7097,7 +7097,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct sock *sk, struct sk_buff *skb)
{
struct tcp_fastopen_cookie foc = { .len = -1 };
__u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
struct tcp_options_received tmp_opt;
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
......@@ -7107,22 +7106,29 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
struct dst_entry *dst;
struct flowi fl;
u8 syncookies;
u32 isn;
#ifdef CONFIG_TCP_AO
const struct tcp_ao_hdr *aoh;
#endif
syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
isn = __this_cpu_read(tcp_tw_isn);
if (isn) {
/* TW buckets are converted to open requests without
* limitations, they conserve resources and peer is
* evidently real one.
*/
if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
__this_cpu_write(tcp_tw_isn, 0);
} else {
syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
if (syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) {
want_cookie = tcp_syn_flood_action(sk,
rsk_ops->slab_name);
if (!want_cookie)
goto drop;
}
}
if (sk_acceptq_is_full(sk)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
......@@ -7160,7 +7166,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
/* Note: tcp_v6_init_req() might override ir_iif for link locals */
inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
dst = af_ops->route_req(sk, skb, &fl, req);
dst = af_ops->route_req(sk, skb, &fl, req, isn);
if (!dst)
goto drop_and_free;
......
......@@ -1666,7 +1666,8 @@ static void tcp_v4_init_req(struct request_sock *req,
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
struct request_sock *req)
struct request_sock *req,
u32 tw_isn)
{
tcp_v4_init_req(req, sk, skb);
......@@ -2145,7 +2146,6 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph,
skb->len - th->doff * 4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp =
......@@ -2167,6 +2167,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
bool refcounted;
struct sock *sk;
int ret;
u32 isn;
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (skb->pkt_type != PACKET_HOST)
......@@ -2382,7 +2383,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
inet_twsk_put(inet_twsk(sk));
goto csum_error;
}
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
case TCP_TW_SYN: {
struct sock *sk2 = inet_lookup_listener(net,
net->ipv4.tcp_death_row.hashinfo,
......@@ -2396,6 +2397,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
sk = sk2;
tcp_v4_restore_cb(skb);
refcounted = false;
__this_cpu_write(tcp_tw_isn, isn);
goto process;
}
}
......
......@@ -95,7 +95,7 @@ static void twsk_rcv_nxt_update(struct tcp_timewait_sock *tcptw, u32 seq)
*/
enum tcp_tw_status
tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
const struct tcphdr *th)
const struct tcphdr *th, u32 *tw_isn)
{
struct tcp_options_received tmp_opt;
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
......@@ -228,7 +228,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
u32 isn = tcptw->tw_snd_nxt + 65535 + 2;
if (isn == 0)
isn++;
TCP_SKB_CB(skb)->tcp_tw_isn = isn;
*tw_isn = isn;
return TCP_TW_SYN;
}
......
......@@ -793,7 +793,8 @@ static int tcp_v6_md5_hash_skb(char *md5_hash,
static void tcp_v6_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
struct sk_buff *skb,
u32 tw_isn)
{
bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
struct inet_request_sock *ireq = inet_rsk(req);
......@@ -807,7 +808,7 @@ static void tcp_v6_init_req(struct request_sock *req,
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = tcp_v6_iif(skb);
if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
if (!tw_isn &&
(ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo ||
np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
......@@ -820,9 +821,10 @@ static void tcp_v6_init_req(struct request_sock *req,
static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
struct request_sock *req)
struct request_sock *req,
u32 tw_isn)
{
tcp_v6_init_req(req, sk, skb);
tcp_v6_init_req(req, sk, skb, tw_isn);
if (security_inet_conn_request(sk, skb, req))
return NULL;
......@@ -1739,7 +1741,6 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
skb->len - th->doff*4);
TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
TCP_SKB_CB(skb)->tcp_tw_isn = 0;
TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
TCP_SKB_CB(skb)->sacked = 0;
TCP_SKB_CB(skb)->has_rxtstamp =
......@@ -1756,6 +1757,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
bool refcounted;
struct sock *sk;
int ret;
u32 isn;
struct net *net = dev_net(skb->dev);
drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
......@@ -1965,7 +1967,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
goto csum_error;
}
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) {
case TCP_TW_SYN:
{
struct sock *sk2;
......@@ -1983,6 +1985,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
sk = sk2;
tcp_v6_restore_cb(skb);
refcounted = false;
__this_cpu_write(tcp_tw_isn, isn);
goto process;
}
}
......
......@@ -289,7 +289,8 @@ EXPORT_SYMBOL_GPL(mptcp_subflow_init_cookie_req);
static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
struct request_sock *req)
struct request_sock *req,
u32 tw_isn)
{
struct dst_entry *dst;
int err;
......@@ -297,7 +298,7 @@ static struct dst_entry *subflow_v4_route_req(const struct sock *sk,
tcp_rsk(req)->is_mptcp = 1;
subflow_init_req(req, sk);
dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req);
dst = tcp_request_sock_ipv4_ops.route_req(sk, skb, fl, req, tw_isn);
if (!dst)
return NULL;
......@@ -356,7 +357,8 @@ static int subflow_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
struct sk_buff *skb,
struct flowi *fl,
struct request_sock *req)
struct request_sock *req,
u32 tw_isn)
{
struct dst_entry *dst;
int err;
......@@ -364,7 +366,7 @@ static struct dst_entry *subflow_v6_route_req(const struct sock *sk,
tcp_rsk(req)->is_mptcp = 1;
subflow_init_req(req, sk);
dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req);
dst = tcp_request_sock_ipv6_ops.route_req(sk, skb, fl, req, tw_isn);
if (!dst)
return NULL;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment