Commit 6f021c62 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

tcp: fix slow start after idle vs TSO/GSO

slow start after idle might reduce cwnd, but we perform this
after first packet was cooked and sent.

With TSO/GSO, it means that we might send a full TSO packet
even if cwnd should have been reduced to IW10.

Moving the SSAI check in skb_entail() makes sense, because
we slightly reduce number of times this check is done,
especially for large send() and TCP Small queue callbacks from
softirq context.

As Neal pointed out, we also need to perform the check
if/when receive window opens.

Tested:

Following packetdrill test demonstrates the problem
// Test of slow start after idle

`sysctl -q net.ipv4.tcp_slow_start_after_idle=1`

0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+0    setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+0    bind(3, ..., ...) = 0
+0    listen(3, 1) = 0

+0    < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+0    > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 6>
+.100 < . 1:1(0) ack 1 win 511
+0    accept(3, ..., ...) = 4
+0    setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0

+0    write(4, ..., 26000) = 26000
+0    > . 1:5001(5000) ack 1
+0    > . 5001:10001(5000) ack 1
+0    %{ assert tcpi_snd_cwnd == 10 }%

+.100 < . 1:1(0) ack 10001 win 511
+0    %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
+0    > . 10001:20001(10000) ack 1
+0    > P. 20001:26001(6000) ack 1

+.100 < . 1:1(0) ack 26001 win 511
+0    %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%

+4 write(4, ..., 20000) = 20000
// If slow start after idle works properly, we should send 5 MSS here (cwnd/2)
+0    > . 26001:31001(5000) ack 1
+0    %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+0    > . 31001:36001(5000) ack 1
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 56fff0a0
...@@ -1165,6 +1165,19 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt) ...@@ -1165,6 +1165,19 @@ static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
} }
u32 tcp_default_init_rwnd(u32 mss); u32 tcp_default_init_rwnd(u32 mss);
void tcp_cwnd_restart(struct sock *sk, s32 delta);
static inline void tcp_slow_start_after_idle_check(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
s32 delta;
if (!sysctl_tcp_slow_start_after_idle || tp->packets_out)
return;
delta = tcp_time_stamp - tp->lsndtime;
if (delta > inet_csk(sk)->icsk_rto)
tcp_cwnd_restart(sk, delta);
}
/* Determine a window scaling and initial window to offer. */ /* Determine a window scaling and initial window to offer. */
void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd,
......
...@@ -627,6 +627,8 @@ static void skb_entail(struct sock *sk, struct sk_buff *skb) ...@@ -627,6 +627,8 @@ static void skb_entail(struct sock *sk, struct sk_buff *skb)
sk_mem_charge(sk, skb->truesize); sk_mem_charge(sk, skb->truesize);
if (tp->nonagle & TCP_NAGLE_PUSH) if (tp->nonagle & TCP_NAGLE_PUSH)
tp->nonagle &= ~TCP_NAGLE_PUSH; tp->nonagle &= ~TCP_NAGLE_PUSH;
tcp_slow_start_after_idle_check(sk);
} }
static inline void tcp_mark_urg(struct tcp_sock *tp, int flags) static inline void tcp_mark_urg(struct tcp_sock *tp, int flags)
......
...@@ -3332,6 +3332,9 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ...@@ -3332,6 +3332,9 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
tp->pred_flags = 0; tp->pred_flags = 0;
tcp_fast_path_check(sk); tcp_fast_path_check(sk);
if (tcp_send_head(sk))
tcp_slow_start_after_idle_check(sk);
if (nwin > tp->max_window) { if (nwin > tp->max_window) {
tp->max_window = nwin; tp->max_window = nwin;
tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie); tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
......
...@@ -137,12 +137,12 @@ static __u16 tcp_advertise_mss(struct sock *sk) ...@@ -137,12 +137,12 @@ static __u16 tcp_advertise_mss(struct sock *sk)
} }
/* RFC2861. Reset CWND after idle period longer RTO to "restart window". /* RFC2861. Reset CWND after idle period longer RTO to "restart window".
* This is the first part of cwnd validation mechanism. */ * This is the first part of cwnd validation mechanism.
static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst) */
void tcp_cwnd_restart(struct sock *sk, s32 delta)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
s32 delta = tcp_time_stamp - tp->lsndtime; u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
u32 restart_cwnd = tcp_init_cwnd(tp, dst);
u32 cwnd = tp->snd_cwnd; u32 cwnd = tp->snd_cwnd;
tcp_ca_event(sk, CA_EVENT_CWND_RESTART); tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
...@@ -164,10 +164,6 @@ static void tcp_event_data_sent(struct tcp_sock *tp, ...@@ -164,10 +164,6 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock *icsk = inet_csk(sk);
const u32 now = tcp_time_stamp; const u32 now = tcp_time_stamp;
if (sysctl_tcp_slow_start_after_idle &&
(!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
tcp_cwnd_restart(sk, __sk_dst_get(sk));
tp->lsndtime = now; tp->lsndtime = now;
/* If it is a reply for ato after last received /* If it is a reply for ato after last received
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment