Commit 9b44190d authored by Yuchung Cheng's avatar Yuchung Cheng Committed by David S. Miller

tcp: refactor F-RTO

The patch series refactor the F-RTO feature (RFC4138/5682).

This is to simplify the loss recovery processing. Existing F-RTO
was developed during the experimental stage (RFC4138) and has
many experimental features.  It takes a separate code path from
the traditional timeout processing by overloading CA_Disorder
instead of using CA_Loss state. This complicates CA_Disorder state
handling because it's also used for handling dubious ACKs and undos.
While the algorithm in the RFC does not change the congestion control,
the implementation intercepts congestion control in various places
(e.g., frto_cwnd in tcp_ack()).

The new code implements newer F-RTO RFC5682 using CA_Loss processing
path.  F-RTO becomes a small extension in the timeout processing
and interfaces with congestion control and Eifel undo modules.
It lets congestion control (module) determines how many to send
independently.  F-RTO only chooses what to send in order to detect
spurious retranmission. If timeout is found spurious it invokes
existing Eifel undo algorithms like DSACK or TCP timestamp based
detection.

The first patch removes all F-RTO code except the sysctl_tcp_frto is
left for the new implementation.  Since CA_EVENT_FRTO is removed, TCP
westwood now computes ssthresh on regular timeout CA_EVENT_LOSS event.
Signed-off-by: default avatarYuchung Cheng <ycheng@google.com>
Acked-by: default avatarNeal Cardwell <ncardwell@google.com>
Acked-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent e306e2c1
...@@ -239,23 +239,6 @@ tcp_frto - INTEGER ...@@ -239,23 +239,6 @@ tcp_frto - INTEGER
interacts badly with the packet counting of the SACK enabled TCP interacts badly with the packet counting of the SACK enabled TCP
flow. flow.
tcp_frto_response - INTEGER
When F-RTO has detected that a TCP retransmission timeout was
spurious (i.e, the timeout would have been avoided had TCP set a
longer retransmission timeout), TCP has several options what to do
next. Possible values are:
0 Rate halving based; a smooth and conservative response,
results in halved cwnd and ssthresh after one RTT
1 Very conservative response; not recommended because even
though being valid, it interacts poorly with the rest of
Linux TCP, halves cwnd and ssthresh immediately
2 Aggressive response; undoes congestion control measures
that are now known to be unnecessary (ignoring the
possibility of a lost retransmission that would require
TCP to be more cautious), cwnd and ssthresh are restored
to the values prior timeout
Default: 0 (rate halving based)
tcp_keepalive_time - INTEGER tcp_keepalive_time - INTEGER
How often TCP sends out keepalive messages when keepalive is enabled. How often TCP sends out keepalive messages when keepalive is enabled.
Default: 2hours. Default: 2hours.
......
...@@ -187,14 +187,12 @@ struct tcp_sock { ...@@ -187,14 +187,12 @@ struct tcp_sock {
u32 window_clamp; /* Maximal window to advertise */ u32 window_clamp; /* Maximal window to advertise */
u32 rcv_ssthresh; /* Current window clamp */ u32 rcv_ssthresh; /* Current window clamp */
u32 frto_highmark; /* snd_nxt when RTO occurred */
u16 advmss; /* Advertised MSS */ u16 advmss; /* Advertised MSS */
u8 frto_counter; /* Number of new acks after RTO */ u8 unused;
u8 nonagle : 4,/* Disable Nagle algorithm? */ u8 nonagle : 4,/* Disable Nagle algorithm? */
thin_lto : 1,/* Use linear timeouts for thin streams */ thin_lto : 1,/* Use linear timeouts for thin streams */
thin_dupack : 1,/* Fast retransmit on first dupack */ thin_dupack : 1,/* Fast retransmit on first dupack */
repair : 1, repair : 1;
unused : 1;
u8 repair_queue; u8 repair_queue;
u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */
syn_data:1, /* SYN includes data */ syn_data:1, /* SYN includes data */
......
...@@ -272,7 +272,6 @@ extern int sysctl_tcp_app_win; ...@@ -272,7 +272,6 @@ extern int sysctl_tcp_app_win;
extern int sysctl_tcp_adv_win_scale; extern int sysctl_tcp_adv_win_scale;
extern int sysctl_tcp_tw_reuse; extern int sysctl_tcp_tw_reuse;
extern int sysctl_tcp_frto; extern int sysctl_tcp_frto;
extern int sysctl_tcp_frto_response;
extern int sysctl_tcp_low_latency; extern int sysctl_tcp_low_latency;
extern int sysctl_tcp_dma_copybreak; extern int sysctl_tcp_dma_copybreak;
extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_nometrics_save;
...@@ -424,8 +423,6 @@ extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, ...@@ -424,8 +423,6 @@ extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
bool fastopen); bool fastopen);
extern int tcp_child_process(struct sock *parent, struct sock *child, extern int tcp_child_process(struct sock *parent, struct sock *child,
struct sk_buff *skb); struct sk_buff *skb);
extern bool tcp_use_frto(struct sock *sk);
extern void tcp_enter_frto(struct sock *sk);
extern void tcp_enter_loss(struct sock *sk, int how); extern void tcp_enter_loss(struct sock *sk, int how);
extern void tcp_clear_retrans(struct tcp_sock *tp); extern void tcp_clear_retrans(struct tcp_sock *tp);
extern void tcp_update_metrics(struct sock *sk); extern void tcp_update_metrics(struct sock *sk);
...@@ -756,7 +753,6 @@ enum tcp_ca_event { ...@@ -756,7 +753,6 @@ enum tcp_ca_event {
CA_EVENT_TX_START, /* first transmit when no packets in flight */ CA_EVENT_TX_START, /* first transmit when no packets in flight */
CA_EVENT_CWND_RESTART, /* congestion window restart */ CA_EVENT_CWND_RESTART, /* congestion window restart */
CA_EVENT_COMPLETE_CWR, /* end of congestion recovery */ CA_EVENT_COMPLETE_CWR, /* end of congestion recovery */
CA_EVENT_FRTO, /* fast recovery timeout */
CA_EVENT_LOSS, /* loss timeout */ CA_EVENT_LOSS, /* loss timeout */
CA_EVENT_FAST_ACK, /* in sequence ack */ CA_EVENT_FAST_ACK, /* in sequence ack */
CA_EVENT_SLOW_ACK, /* other ack */ CA_EVENT_SLOW_ACK, /* other ack */
......
...@@ -591,13 +591,6 @@ static struct ctl_table ipv4_table[] = { ...@@ -591,13 +591,6 @@ static struct ctl_table ipv4_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec .proc_handler = proc_dointvec
}, },
{
.procname = "tcp_frto_response",
.data = &sysctl_tcp_frto_response,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec
},
{ {
.procname = "tcp_low_latency", .procname = "tcp_low_latency",
.data = &sysctl_tcp_low_latency, .data = &sysctl_tcp_low_latency,
......
This diff is collapsed.
...@@ -422,9 +422,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, ...@@ -422,9 +422,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd = TCP_INIT_CWND;
newtp->snd_cwnd_cnt = 0; newtp->snd_cwnd_cnt = 0;
newtp->frto_counter = 0;
newtp->frto_highmark = 0;
if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops && if (newicsk->icsk_ca_ops != &tcp_init_congestion_ops &&
!try_module_get(newicsk->icsk_ca_ops->owner)) !try_module_get(newicsk->icsk_ca_ops->owner))
newicsk->icsk_ca_ops = &tcp_init_congestion_ops; newicsk->icsk_ca_ops = &tcp_init_congestion_ops;
......
...@@ -78,10 +78,6 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) ...@@ -78,10 +78,6 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
tcp_advance_send_head(sk, skb); tcp_advance_send_head(sk, skb);
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
/* Don't override Nagle indefinitely with F-RTO */
if (tp->frto_counter == 2)
tp->frto_counter = 3;
tp->packets_out += tcp_skb_pcount(skb); tp->packets_out += tcp_skb_pcount(skb);
if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || if (!prior_packets || icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
...@@ -1470,11 +1466,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf ...@@ -1470,11 +1466,8 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf
if (nonagle & TCP_NAGLE_PUSH) if (nonagle & TCP_NAGLE_PUSH)
return true; return true;
/* Don't use the nagle rule for urgent data (or for the final FIN). /* Don't use the nagle rule for urgent data (or for the final FIN). */
* Nagle can be ignored during F-RTO too (see RFC4138). if (tcp_urg_mode(tp) || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
*/
if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN))
return true; return true;
if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
......
...@@ -416,11 +416,7 @@ void tcp_retransmit_timer(struct sock *sk) ...@@ -416,11 +416,7 @@ void tcp_retransmit_timer(struct sock *sk)
NET_INC_STATS_BH(sock_net(sk), mib_idx); NET_INC_STATS_BH(sock_net(sk), mib_idx);
} }
if (tcp_use_frto(sk)) {
tcp_enter_frto(sk);
} else {
tcp_enter_loss(sk, 0); tcp_enter_loss(sk, 0);
}
if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) { if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk)) > 0) {
/* Retransmission failed because of local congestion, /* Retransmission failed because of local congestion,
......
...@@ -236,7 +236,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) ...@@ -236,7 +236,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event)
tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); tp->snd_cwnd = tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
break; break;
case CA_EVENT_FRTO: case CA_EVENT_LOSS:
tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk); tp->snd_ssthresh = tcp_westwood_bw_rttmin(sk);
/* Update RTT_min when next ack arrives */ /* Update RTT_min when next ack arrives */
w->reset_rtt_min = 1; w->reset_rtt_min = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment