Commit 9772efb9 authored by Stephen Hemminger's avatar Stephen Hemminger Committed by David S. Miller

[TCP]: Appropriate Byte Count support

This is an updated version of the RFC3465 ABC patch originally
for Linux 2.6.11-rc4 by Yee-Ting Li. ABC is a way of counting
bytes ack'd rather than packets when updating congestion control.

The orignal ABC described in the RFC applied to a Reno style
algorithm. For advanced congestion control there is little
change after leaving slow start.
Signed-off-by: default avatarStephen Hemminger <shemminger@osdl.org>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 7faffa1c
...@@ -78,6 +78,11 @@ inet_peer_gc_maxtime - INTEGER ...@@ -78,6 +78,11 @@ inet_peer_gc_maxtime - INTEGER
TCP variables: TCP variables:
tcp_abc - INTEGER
Controls Appropriate Byte Count defined in RFC3465. If set to
0 then does congestion avoid once per ack. 1 is conservative
value, and 2 is more agressive.
tcp_syn_retries - INTEGER tcp_syn_retries - INTEGER
Number of times initial SYNs for an active TCP connection attempt Number of times initial SYNs for an active TCP connection attempt
will be retransmitted. Should not be higher than 255. Default value will be retransmitted. Should not be higher than 255. Default value
......
...@@ -390,6 +390,7 @@ enum ...@@ -390,6 +390,7 @@ enum
NET_TCP_BIC_BETA=108, NET_TCP_BIC_BETA=108,
NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
NET_TCP_CONG_CONTROL=110, NET_TCP_CONG_CONTROL=110,
NET_TCP_ABC=111,
}; };
enum { enum {
......
...@@ -326,6 +326,7 @@ struct tcp_sock { ...@@ -326,6 +326,7 @@ struct tcp_sock {
__u32 snd_up; /* Urgent pointer */ __u32 snd_up; /* Urgent pointer */
__u32 total_retrans; /* Total retransmits for entire connection */ __u32 total_retrans; /* Total retransmits for entire connection */
__u32 bytes_acked; /* Appropriate Byte Counting - RFC3465 */
unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_time; /* time before keep alive takes place */
unsigned int keepalive_intvl; /* time interval between keep alive probes */ unsigned int keepalive_intvl; /* time interval between keep alive probes */
......
...@@ -218,6 +218,7 @@ extern int sysctl_tcp_low_latency; ...@@ -218,6 +218,7 @@ extern int sysctl_tcp_low_latency;
extern int sysctl_tcp_nometrics_save; extern int sysctl_tcp_nometrics_save;
extern int sysctl_tcp_moderate_rcvbuf; extern int sysctl_tcp_moderate_rcvbuf;
extern int sysctl_tcp_tso_win_divisor; extern int sysctl_tcp_tso_win_divisor;
extern int sysctl_tcp_abc;
extern atomic_t tcp_memory_allocated; extern atomic_t tcp_memory_allocated;
extern atomic_t tcp_sockets_allocated; extern atomic_t tcp_sockets_allocated;
...@@ -770,6 +771,23 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) ...@@ -770,6 +771,23 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
*/ */
static inline void tcp_slow_start(struct tcp_sock *tp) static inline void tcp_slow_start(struct tcp_sock *tp)
{ {
if (sysctl_tcp_abc) {
/* RFC3465: Slow Start
* TCP sender SHOULD increase cwnd by the number of
* previously unacknowledged bytes ACKed by each incoming
* acknowledgment, provided the increase is not more than L
*/
if (tp->bytes_acked < tp->mss_cache)
return;
/* We MAY increase by 2 if discovered delayed ack */
if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd++;
}
}
tp->bytes_acked = 0;
if (tp->snd_cwnd < tp->snd_cwnd_clamp) if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd++; tp->snd_cwnd++;
} }
...@@ -804,6 +822,7 @@ static inline void tcp_enter_cwr(struct sock *sk) ...@@ -804,6 +822,7 @@ static inline void tcp_enter_cwr(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
tp->prior_ssthresh = 0; tp->prior_ssthresh = 0;
tp->bytes_acked = 0;
if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
__tcp_enter_cwr(sk); __tcp_enter_cwr(sk);
tcp_set_ca_state(sk, TCP_CA_CWR); tcp_set_ca_state(sk, TCP_CA_CWR);
......
...@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = { ...@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = {
.proc_handler = &proc_tcp_congestion_control, .proc_handler = &proc_tcp_congestion_control,
.strategy = &sysctl_tcp_congestion_control, .strategy = &sysctl_tcp_congestion_control,
}, },
{
.ctl_name = NET_TCP_ABC,
.procname = "tcp_abc",
.data = &sysctl_tcp_abc,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{ .ctl_name = 0 } { .ctl_name = 0 }
}; };
......
...@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags) ...@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->packets_out = 0; tp->packets_out = 0;
tp->snd_ssthresh = 0x7fffffff; tp->snd_ssthresh = 0x7fffffff;
tp->snd_cwnd_cnt = 0; tp->snd_cwnd_cnt = 0;
tp->bytes_acked = 0;
tcp_set_ca_state(sk, TCP_CA_Open); tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp); tcp_clear_retrans(tp);
inet_csk_delack_init(sk); inet_csk_delack_init(sk);
......
...@@ -192,10 +192,19 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, ...@@ -192,10 +192,19 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
/* In "safe" area, increase. */ /* In "safe" area, increase. */
if (tp->snd_cwnd <= tp->snd_ssthresh) if (tp->snd_cwnd <= tp->snd_ssthresh)
tcp_slow_start(tp); tcp_slow_start(tp);
else {
/* In dangerous area, increase slowly. /* In dangerous area, increase slowly. */
* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd else if (sysctl_tcp_abc) {
/* RFC3465: Apppriate Byte Count
* increase once for each full cwnd acked
*/ */
if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd++;
}
} else {
/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
if (tp->snd_cwnd < tp->snd_cwnd_clamp) if (tp->snd_cwnd < tp->snd_cwnd_clamp)
tp->snd_cwnd++; tp->snd_cwnd++;
......
...@@ -89,6 +89,7 @@ int sysctl_tcp_frto; ...@@ -89,6 +89,7 @@ int sysctl_tcp_frto;
int sysctl_tcp_nometrics_save; int sysctl_tcp_nometrics_save;
int sysctl_tcp_moderate_rcvbuf = 1; int sysctl_tcp_moderate_rcvbuf = 1;
int sysctl_tcp_abc = 1;
#define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_DATA 0x01 /* Incoming frame contained data. */
#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
...@@ -1247,6 +1248,7 @@ void tcp_enter_loss(struct sock *sk, int how) ...@@ -1247,6 +1248,7 @@ void tcp_enter_loss(struct sock *sk, int how)
tp->snd_cwnd_cnt = 0; tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp; tp->snd_cwnd_stamp = tcp_time_stamp;
tp->bytes_acked = 0;
tcp_clear_retrans(tp); tcp_clear_retrans(tp);
/* Push undo marker, if it was plain RTO and nothing /* Push undo marker, if it was plain RTO and nothing
...@@ -1904,6 +1906,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, ...@@ -1904,6 +1906,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
TCP_ECN_queue_cwr(tp); TCP_ECN_queue_cwr(tp);
} }
tp->bytes_acked = 0;
tp->snd_cwnd_cnt = 0; tp->snd_cwnd_cnt = 0;
tcp_set_ca_state(sk, TCP_CA_Recovery); tcp_set_ca_state(sk, TCP_CA_Recovery);
} }
...@@ -2310,6 +2313,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) ...@@ -2310,6 +2313,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
if (before(ack, prior_snd_una)) if (before(ack, prior_snd_una))
goto old_ack; goto old_ack;
if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
tp->bytes_acked += ack - prior_snd_una;
if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
/* Window is constant, pure forward advance. /* Window is constant, pure forward advance.
* No more checks are required. * No more checks are required.
...@@ -4370,6 +4376,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, ...@@ -4370,6 +4376,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
EXPORT_SYMBOL(sysctl_tcp_ecn); EXPORT_SYMBOL(sysctl_tcp_ecn);
EXPORT_SYMBOL(sysctl_tcp_reordering); EXPORT_SYMBOL(sysctl_tcp_reordering);
EXPORT_SYMBOL(sysctl_tcp_abc);
EXPORT_SYMBOL(tcp_parse_options); EXPORT_SYMBOL(tcp_parse_options);
EXPORT_SYMBOL(tcp_rcv_established); EXPORT_SYMBOL(tcp_rcv_established);
EXPORT_SYMBOL(tcp_rcv_state_process); EXPORT_SYMBOL(tcp_rcv_state_process);
...@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, ...@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
*/ */
newtp->snd_cwnd = 2; newtp->snd_cwnd = 2;
newtp->snd_cwnd_cnt = 0; newtp->snd_cwnd_cnt = 0;
newtp->bytes_acked = 0;
newtp->frto_counter = 0; newtp->frto_counter = 0;
newtp->frto_highmark = 0; newtp->frto_highmark = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment