Commit 0f1c28ae authored by Yuchung Cheng's avatar Yuchung Cheng Committed by David S. Miller

tcp: usec resolution SYN/ACK RTT

Currently SYN/ACK RTT is measured in jiffies. For LAN the SYN/ACK
RTT is often measured as 0ms or sometimes 1ms, which would affect
RTT estimation and min RTT samping used by some congestion control.

This patch improves SYN/ACK RTT to be usec resolution if platform
supports it. While the timestamping of SYN/ACK is done in request
sock, the RTT measurement is carefully arranged to avoid storing
another u64 timestamp in tcp_sock.

For regular handshake w/o SYNACK retransmission, the RTT is sampled
right after the child socket is created and right before the request
sock is released (tcp_check_req() in tcp_minisocks.c)

For Fast Open the child socket is already created when SYN/ACK was
sent, the RTT is sampled in tcp_rcv_state_process() after processing
the final ACK an right before the request socket is released.

If the SYN/ACK was retransmistted or SYN-cookie was used, we rely
on TCP timestamps to measure the RTT. The sample is taken at the
same place in tcp_rcv_state_process() after the timestamp values
are validated in tcp_validate_incoming(). Note that we do not store
TS echo value in request_sock for SYN-cookies, because the value
is already stored in tp->rx_opt used by tcp_ack_update_rtt().

One side benefit is that the RTT measurement now happens before
initializing congestion control (of the passive side). Therefore
the congestion control can use the SYN/ACK RTT.
Signed-off-by: default avatarYuchung Cheng <ycheng@google.com>
Signed-off-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 21fe8af4
...@@ -112,11 +112,11 @@ struct tcp_request_sock_ops; ...@@ -112,11 +112,11 @@ struct tcp_request_sock_ops;
struct tcp_request_sock { struct tcp_request_sock {
struct inet_request_sock req; struct inet_request_sock req;
const struct tcp_request_sock_ops *af_specific; const struct tcp_request_sock_ops *af_specific;
struct skb_mstamp snt_synack; /* first SYNACK sent time */
bool tfo_listener; bool tfo_listener;
u32 txhash; u32 txhash;
u32 rcv_isn; u32 rcv_isn;
u32 snt_isn; u32 snt_isn;
u32 snt_synack; /* synack sent time */
u32 last_oow_ack_time; /* last SYNACK */ u32 last_oow_ack_time; /* last SYNACK */
u32 rcv_nxt; /* the ack # by SYNACK. For u32 rcv_nxt; /* the ack # by SYNACK. For
* FastOpen it's the seq# * FastOpen it's the seq#
......
...@@ -565,6 +565,7 @@ bool tcp_schedule_loss_probe(struct sock *sk); ...@@ -565,6 +565,7 @@ bool tcp_schedule_loss_probe(struct sock *sk);
/* tcp_input.c */ /* tcp_input.c */
void tcp_resume_early_retransmit(struct sock *sk); void tcp_resume_early_retransmit(struct sock *sk);
void tcp_rearm_rto(struct sock *sk); void tcp_rearm_rto(struct sock *sk);
void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
void tcp_reset(struct sock *sk); void tcp_reset(struct sock *sk);
/* tcp_timer.c */ /* tcp_timer.c */
......
...@@ -345,7 +345,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) ...@@ -345,7 +345,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok = tcp_opt.wscale_ok; ireq->wscale_ok = tcp_opt.wscale_ok;
ireq->tstamp_ok = tcp_opt.saw_tstamp; ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; treq->snt_synack.v64 = 0;
treq->tfo_listener = false; treq->tfo_listener = false;
ireq->ir_iif = sk->sk_bound_dev_if; ireq->ir_iif = sk->sk_bound_dev_if;
......
...@@ -2953,21 +2953,21 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, ...@@ -2953,21 +2953,21 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
} }
/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */ /* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp) void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
{ {
struct tcp_sock *tp = tcp_sk(sk); long rtt_us = -1L;
long seq_rtt_us = -1L;
if (synack_stamp && !tp->total_retrans) if (req && !req->num_retrans && tcp_rsk(req)->snt_synack.v64) {
seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp); struct skb_mstamp now;
/* If the ACK acks both the SYNACK and the (Fast Open'd) data packets skb_mstamp_get(&now);
* sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack() rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack);
*/ }
if (!tp->srtt_us)
tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L); tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L);
} }
static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{ {
const struct inet_connection_sock *icsk = inet_csk(sk); const struct inet_connection_sock *icsk = inet_csk(sk);
...@@ -5706,7 +5706,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -5706,7 +5706,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct request_sock *req; struct request_sock *req;
int queued = 0; int queued = 0;
bool acceptable; bool acceptable;
u32 synack_stamp;
tp->rx_opt.saw_tstamp = 0; tp->rx_opt.saw_tstamp = 0;
...@@ -5785,15 +5784,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -5785,15 +5784,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (!acceptable) if (!acceptable)
return 1; return 1;
if (!tp->srtt_us)
tcp_synack_rtt_meas(sk, req);
/* Once we leave TCP_SYN_RECV, we no longer need req /* Once we leave TCP_SYN_RECV, we no longer need req
* so release it. * so release it.
*/ */
if (req) { if (req) {
synack_stamp = tcp_rsk(req)->snt_synack;
tp->total_retrans = req->num_retrans; tp->total_retrans = req->num_retrans;
reqsk_fastopen_remove(sk, req, false); reqsk_fastopen_remove(sk, req, false);
} else { } else {
synack_stamp = tp->lsndtime;
/* Make sure socket is routed, for correct metrics. */ /* Make sure socket is routed, for correct metrics. */
icsk->icsk_af_ops->rebuild_header(sk); icsk->icsk_af_ops->rebuild_header(sk);
tcp_init_congestion_control(sk); tcp_init_congestion_control(sk);
...@@ -5816,7 +5816,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -5816,7 +5816,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
tp->snd_una = TCP_SKB_CB(skb)->ack_seq; tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
tcp_synack_rtt_meas(sk, synack_stamp);
if (tp->rx_opt.tstamp_ok) if (tp->rx_opt.tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
...@@ -6027,7 +6026,7 @@ static void tcp_openreq_init(struct request_sock *req, ...@@ -6027,7 +6026,7 @@ static void tcp_openreq_init(struct request_sock *req,
req->cookie_ts = 0; req->cookie_ts = 0;
tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
tcp_rsk(req)->snt_synack = tcp_time_stamp; skb_mstamp_get(&tcp_rsk(req)->snt_synack);
tcp_rsk(req)->last_oow_ack_time = 0; tcp_rsk(req)->last_oow_ack_time = 0;
req->mss = rx_opt->mss_clamp; req->mss = rx_opt->mss_clamp;
req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
......
...@@ -470,7 +470,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, ...@@ -470,7 +470,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tcp_enable_early_retrans(newtp); tcp_enable_early_retrans(newtp);
newtp->tlp_high_seq = 0; newtp->tlp_high_seq = 0;
newtp->lsndtime = treq->snt_synack; newtp->lsndtime = treq->snt_synack.stamp_jiffies;
newtp->last_oow_ack_time = 0; newtp->last_oow_ack_time = 0;
newtp->total_retrans = req->num_retrans; newtp->total_retrans = req->num_retrans;
...@@ -760,6 +760,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, ...@@ -760,6 +760,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
if (!child) if (!child)
goto listen_overflow; goto listen_overflow;
tcp_synack_rtt_meas(child, req);
inet_csk_reqsk_queue_drop(sk, req); inet_csk_reqsk_queue_drop(sk, req);
inet_csk_reqsk_queue_add(sk, req, child); inet_csk_reqsk_queue_add(sk, req, child);
/* Warning: caller must not call reqsk_put(req); /* Warning: caller must not call reqsk_put(req);
......
...@@ -210,7 +210,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ...@@ -210,7 +210,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok = tcp_opt.wscale_ok; ireq->wscale_ok = tcp_opt.wscale_ok;
ireq->tstamp_ok = tcp_opt.saw_tstamp; ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; treq->snt_synack.v64 = 0;
treq->rcv_isn = ntohl(th->seq) - 1; treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie; treq->snt_isn = cookie;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment