Commit 5f6188a8 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

tcp: do not change tcp_wstamp_ns in tcp_mstamp_refresh

In EDT design, I made the mistake of using tcp_wstamp_ns
to store the last tcp_clock_ns() sample and to store the
pacing virtual timer.

This causes major regressions at high speed flows.

Introduce tcp_clock_cache to store last tcp_clock_ns().
This is needed because some arches have slow high-resolution
kernel time service.

tcp_wstamp_ns is only updated when a packet is sent.

Note that we can remove tcp_mstamp in the future since
tcp_mstamp is essentially tcp_clock_cache/1000, so the
apparent socket size increase is temporary.

Fixes: 9799ccb0 ("tcp: add tcp_wstamp_ns socket field")
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Acked-by: default avatarSoheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 1a3aea25
...@@ -249,6 +249,7 @@ struct tcp_sock { ...@@ -249,6 +249,7 @@ struct tcp_sock {
u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */
u64 tcp_wstamp_ns; /* departure time for next sent data packet */ u64 tcp_wstamp_ns; /* departure time for next sent data packet */
u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */
/* RTT measurement */ /* RTT measurement */
u64 tcp_mstamp; /* most recent packet received/sent */ u64 tcp_mstamp; /* most recent packet received/sent */
......
...@@ -52,9 +52,8 @@ void tcp_mstamp_refresh(struct tcp_sock *tp) ...@@ -52,9 +52,8 @@ void tcp_mstamp_refresh(struct tcp_sock *tp)
{ {
u64 val = tcp_clock_ns(); u64 val = tcp_clock_ns();
/* departure time for next data packet */ if (val > tp->tcp_clock_cache)
if (val > tp->tcp_wstamp_ns) tp->tcp_clock_cache = val;
tp->tcp_wstamp_ns = val;
val = div_u64(val, NSEC_PER_USEC); val = div_u64(val, NSEC_PER_USEC);
if (val > tp->tcp_mstamp) if (val > tp->tcp_mstamp)
...@@ -1050,6 +1049,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, ...@@ -1050,6 +1049,10 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
if (unlikely(!skb)) if (unlikely(!skb))
return -ENOBUFS; return -ENOBUFS;
} }
/* TODO: might take care of jitter here */
tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache);
skb->skb_mstamp_ns = tp->tcp_wstamp_ns; skb->skb_mstamp_ns = tp->tcp_wstamp_ns;
inet = inet_sk(sk); inet = inet_sk(sk);
......
...@@ -360,7 +360,7 @@ static void tcp_probe_timer(struct sock *sk) ...@@ -360,7 +360,7 @@ static void tcp_probe_timer(struct sock *sk)
*/ */
start_ts = tcp_skb_timestamp(skb); start_ts = tcp_skb_timestamp(skb);
if (!start_ts) if (!start_ts)
skb->skb_mstamp_ns = tp->tcp_wstamp_ns; skb->skb_mstamp_ns = tp->tcp_clock_cache;
else if (icsk->icsk_user_timeout && else if (icsk->icsk_user_timeout &&
(s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout) (s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
goto abort; goto abort;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment