Commit ec3c0982 authored by Patrick McManus's avatar Patrick McManus Committed by David S. Miller

[TCP]: TCP_DEFER_ACCEPT updates - process as established

Change TCP_DEFER_ACCEPT implementation so that it transitions a
connection to ESTABLISHED after handshake is complete instead of
leaving it in SYN-RECV until some data arrvies. Place connection in
accept queue when first data packet arrives from slow path.

Benefits:
  - established connection is now reset if it never makes it
   to the accept queue

 - diagnostic state of established matches with the packet traces
   showing completed handshake

 - TCP_DEFER_ACCEPT timeouts are expressed in seconds and can now be
   enforced with reasonable accuracy instead of rounding up to next
   exponential back-off of syn-ack retry.
Signed-off-by: default avatarPatrick McManus <mcmanus@ducksong.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e4c78840
...@@ -239,6 +239,11 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) ...@@ -239,6 +239,11 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
return (struct tcp_request_sock *)req; return (struct tcp_request_sock *)req;
} }
struct tcp_deferred_accept_info {
struct sock *listen_sk;
struct request_sock *request;
};
struct tcp_sock { struct tcp_sock {
/* inet_connection_sock has to be the first member of tcp_sock */ /* inet_connection_sock has to be the first member of tcp_sock */
struct inet_connection_sock inet_conn; struct inet_connection_sock inet_conn;
...@@ -374,6 +379,8 @@ struct tcp_sock { ...@@ -374,6 +379,8 @@ struct tcp_sock {
unsigned int keepalive_intvl; /* time interval between keep alive probes */ unsigned int keepalive_intvl; /* time interval between keep alive probes */
int linger2; int linger2;
struct tcp_deferred_accept_info defer_tcp_accept;
unsigned long last_synq_overflow; unsigned long last_synq_overflow;
u32 tso_deferred; u32 tso_deferred;
......
...@@ -115,8 +115,8 @@ struct request_sock_queue { ...@@ -115,8 +115,8 @@ struct request_sock_queue {
struct request_sock *rskq_accept_head; struct request_sock *rskq_accept_head;
struct request_sock *rskq_accept_tail; struct request_sock *rskq_accept_tail;
rwlock_t syn_wait_lock; rwlock_t syn_wait_lock;
u8 rskq_defer_accept; u16 rskq_defer_accept;
/* 3 bytes hole, try to pack */ /* 2 bytes hole, try to pack */
struct listen_sock *listen_opt; struct listen_sock *listen_opt;
}; };
......
...@@ -139,6 +139,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); ...@@ -139,6 +139,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
#define MAX_TCP_KEEPINTVL 32767 #define MAX_TCP_KEEPINTVL 32767
#define MAX_TCP_KEEPCNT 127 #define MAX_TCP_KEEPCNT 127
#define MAX_TCP_SYNCNT 127 #define MAX_TCP_SYNCNT 127
#define MAX_TCP_ACCEPT_DEFERRED 65535
#define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */ #define TCP_SYNQ_INTERVAL (HZ/5) /* Period of SYNACK timer */
......
...@@ -414,8 +414,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, ...@@ -414,8 +414,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
struct inet_connection_sock *icsk = inet_csk(parent); struct inet_connection_sock *icsk = inet_csk(parent);
struct request_sock_queue *queue = &icsk->icsk_accept_queue; struct request_sock_queue *queue = &icsk->icsk_accept_queue;
struct listen_sock *lopt = queue->listen_opt; struct listen_sock *lopt = queue->listen_opt;
int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
int thresh = max_retries;
unsigned long now = jiffies; unsigned long now = jiffies;
struct request_sock **reqp, *req; struct request_sock **reqp, *req;
int i, budget; int i, budget;
...@@ -451,9 +450,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, ...@@ -451,9 +450,6 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
} }
} }
if (queue->rskq_defer_accept)
max_retries = queue->rskq_defer_accept;
budget = 2 * (lopt->nr_table_entries / (timeout / interval)); budget = 2 * (lopt->nr_table_entries / (timeout / interval));
i = lopt->clock_hand; i = lopt->clock_hand;
...@@ -461,9 +457,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, ...@@ -461,9 +457,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
reqp=&lopt->syn_table[i]; reqp=&lopt->syn_table[i];
while ((req = *reqp) != NULL) { while ((req = *reqp) != NULL) {
if (time_after_eq(now, req->expires)) { if (time_after_eq(now, req->expires)) {
if ((req->retrans < (inet_rsk(req)->acked ? max_retries : thresh)) && if (req->retrans < thresh &&
(inet_rsk(req)->acked || !req->rsk_ops->rtx_syn_ack(parent, req)) {
!req->rsk_ops->rtx_syn_ack(parent, req))) {
unsigned long timeo; unsigned long timeo;
if (req->retrans++ == 0) if (req->retrans++ == 0)
......
...@@ -2105,15 +2105,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, ...@@ -2105,15 +2105,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break; break;
case TCP_DEFER_ACCEPT: case TCP_DEFER_ACCEPT:
icsk->icsk_accept_queue.rskq_defer_accept = 0; if (val < 0) {
if (val > 0) { err = -EINVAL;
/* Translate value in seconds to number of } else {
* retransmits */ if (val > MAX_TCP_ACCEPT_DEFERRED)
while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && val = MAX_TCP_ACCEPT_DEFERRED;
val > ((TCP_TIMEOUT_INIT / HZ) << icsk->icsk_accept_queue.rskq_defer_accept = val;
icsk->icsk_accept_queue.rskq_defer_accept))
icsk->icsk_accept_queue.rskq_defer_accept++;
icsk->icsk_accept_queue.rskq_defer_accept++;
} }
break; break;
...@@ -2295,8 +2292,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, ...@@ -2295,8 +2292,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = (val ? : sysctl_tcp_fin_timeout) / HZ; val = (val ? : sysctl_tcp_fin_timeout) / HZ;
break; break;
case TCP_DEFER_ACCEPT: case TCP_DEFER_ACCEPT:
val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : val = icsk->icsk_accept_queue.rskq_defer_accept;
((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
break; break;
case TCP_WINDOW_CLAMP: case TCP_WINDOW_CLAMP:
val = tp->window_clamp; val = tp->window_clamp;
......
...@@ -4451,6 +4451,49 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th) ...@@ -4451,6 +4451,49 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
} }
} }
static int tcp_defer_accept_check(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tp->defer_tcp_accept.request) {
int queued_data = tp->rcv_nxt - tp->copied_seq;
int hasfin = !skb_queue_empty(&sk->sk_receive_queue) ?
tcp_hdr((struct sk_buff *)
sk->sk_receive_queue.prev)->fin : 0;
if (queued_data && hasfin)
queued_data--;
if (queued_data &&
tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) {
if (sock_flag(sk, SOCK_KEEPOPEN)) {
inet_csk_reset_keepalive_timer(sk,
keepalive_time_when(tp));
} else {
inet_csk_delete_keepalive_timer(sk);
}
inet_csk_reqsk_queue_add(
tp->defer_tcp_accept.listen_sk,
tp->defer_tcp_accept.request,
sk);
tp->defer_tcp_accept.listen_sk->sk_data_ready(
tp->defer_tcp_accept.listen_sk, 0);
sock_put(tp->defer_tcp_accept.listen_sk);
sock_put(sk);
tp->defer_tcp_accept.listen_sk = NULL;
tp->defer_tcp_accept.request = NULL;
} else if (hasfin ||
tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) {
tcp_reset(sk);
return -1;
}
}
return 0;
}
static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
...@@ -4811,6 +4854,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, ...@@ -4811,6 +4854,9 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
tcp_data_snd_check(sk); tcp_data_snd_check(sk);
tcp_ack_snd_check(sk); tcp_ack_snd_check(sk);
if (tcp_defer_accept_check(sk))
return -1;
return 0; return 0;
csum_error: csum_error:
......
...@@ -1920,6 +1920,14 @@ int tcp_v4_destroy_sock(struct sock *sk) ...@@ -1920,6 +1920,14 @@ int tcp_v4_destroy_sock(struct sock *sk)
sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_page = NULL;
} }
if (tp->defer_tcp_accept.request) {
reqsk_free(tp->defer_tcp_accept.request);
sock_put(tp->defer_tcp_accept.listen_sk);
sock_put(sk);
tp->defer_tcp_accept.listen_sk = NULL;
tp->defer_tcp_accept.request = NULL;
}
atomic_dec(&tcp_sockets_allocated); atomic_dec(&tcp_sockets_allocated);
return 0; return 0;
......
...@@ -571,10 +571,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, ...@@ -571,10 +571,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
does sequence test, SYN is truncated, and thus we consider does sequence test, SYN is truncated, and thus we consider
it a bare ACK. it a bare ACK.
If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this Both ends (listening sockets) accept the new incoming
bare ACK. Otherwise, we create an established connection. Both connection and try to talk to each other. 8-)
ends (listening sockets) accept the new incoming connection and try
to talk to each other. 8-)
Note: This case is both harmless, and rare. Possibility is about the Note: This case is both harmless, and rare. Possibility is about the
same as us discovering intelligent life on another plant tomorrow. same as us discovering intelligent life on another plant tomorrow.
...@@ -642,13 +640,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, ...@@ -642,13 +640,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
if (!(flg & TCP_FLAG_ACK)) if (!(flg & TCP_FLAG_ACK))
return NULL; return NULL;
/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
inet_rsk(req)->acked = 1;
return NULL;
}
/* OK, ACK is valid, create big socket and /* OK, ACK is valid, create big socket and
* feed this segment to it. It will repeat all * feed this segment to it. It will repeat all
* the tests. THIS SEGMENT MUST MOVE SOCKET TO * the tests. THIS SEGMENT MUST MOVE SOCKET TO
...@@ -687,7 +678,24 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, ...@@ -687,7 +678,24 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
inet_csk_reqsk_queue_unlink(sk, req, prev); inet_csk_reqsk_queue_unlink(sk, req, prev);
inet_csk_reqsk_queue_removed(sk, req); inet_csk_reqsk_queue_removed(sk, req);
inet_csk_reqsk_queue_add(sk, req, child); if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
/* the accept queue handling is done is est recv slow
* path so lets make sure to start there
*/
tcp_sk(child)->pred_flags = 0;
sock_hold(sk);
sock_hold(child);
tcp_sk(child)->defer_tcp_accept.listen_sk = sk;
tcp_sk(child)->defer_tcp_accept.request = req;
inet_csk_reset_keepalive_timer(child,
inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ);
} else {
inet_csk_reqsk_queue_add(sk, req, child);
}
return child; return child;
listen_overflow: listen_overflow:
......
...@@ -481,6 +481,11 @@ static void tcp_keepalive_timer (unsigned long data) ...@@ -481,6 +481,11 @@ static void tcp_keepalive_timer (unsigned long data)
goto death; goto death;
} }
if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
tcp_send_active_reset(sk, GFP_ATOMIC);
goto death;
}
if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
goto out; goto out;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment