Commit ef897573 authored by David S. Miller's avatar David S. Miller

[TCP]: Smooth out TSO ack clocking.

- Export tcp_trim_head() and call it directly from
  tcp_tso_acked().  This also fixes URG handling.

- Make tcp_trim_head() adjust the skb->truesize of
  the packet and liberate that space from the socket
  send buffer.

- In tcp_current_mss(), limit TSO factor to 1/4 of
  snd_cwnd.  The idea is from John Heffner.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 28135721
...@@ -944,6 +944,7 @@ extern int tcp_write_xmit(struct sock *, int nonagle); ...@@ -944,6 +944,7 @@ extern int tcp_write_xmit(struct sock *, int nonagle);
extern int tcp_retransmit_skb(struct sock *, struct sk_buff *); extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
extern void tcp_xmit_retransmit_queue(struct sock *); extern void tcp_xmit_retransmit_queue(struct sock *);
extern void tcp_simple_retransmit(struct sock *); extern void tcp_simple_retransmit(struct sock *);
extern int tcp_trim_head(struct sock *, struct sk_buff *, u32);
extern void tcp_send_probe0(struct sock *); extern void tcp_send_probe0(struct sock *);
extern void tcp_send_partial(struct sock *); extern void tcp_send_partial(struct sock *);
......
...@@ -2364,13 +2364,14 @@ static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp) ...@@ -2364,13 +2364,14 @@ static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
* then making a write space wakeup callback is a possible * then making a write space wakeup callback is a possible
* future enhancement. WARNING: it is not trivial to make. * future enhancement. WARNING: it is not trivial to make.
*/ */
static int tcp_tso_acked(struct tcp_opt *tp, struct sk_buff *skb, static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
__u32 now, __s32 *seq_rtt) __u32 now, __s32 *seq_rtt)
{ {
struct tcp_opt *tp = tcp_sk(sk);
struct tcp_skb_cb *scb = TCP_SKB_CB(skb); struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
__u32 mss = scb->tso_mss; __u32 mss = scb->tso_mss;
__u32 snd_una = tp->snd_una; __u32 snd_una = tp->snd_una;
__u32 seq = scb->seq; __u32 orig_seq, seq;
__u32 packets_acked = 0; __u32 packets_acked = 0;
int acked = 0; int acked = 0;
...@@ -2379,22 +2380,18 @@ static int tcp_tso_acked(struct tcp_opt *tp, struct sk_buff *skb, ...@@ -2379,22 +2380,18 @@ static int tcp_tso_acked(struct tcp_opt *tp, struct sk_buff *skb,
*/ */
BUG_ON(!after(scb->end_seq, snd_una)); BUG_ON(!after(scb->end_seq, snd_una));
seq = orig_seq = scb->seq;
while (!after(seq + mss, snd_una)) { while (!after(seq + mss, snd_una)) {
packets_acked++; packets_acked++;
seq += mss; seq += mss;
} }
if (tcp_trim_head(sk, skb, (seq - orig_seq)))
return 0;
if (packets_acked) { if (packets_acked) {
__u8 sacked = scb->sacked; __u8 sacked = scb->sacked;
/* We adjust scb->seq but we do not pskb_pull() the
* SKB. We let tcp_retransmit_skb() handle this case
* by checking skb->len against the data sequence span.
* This way, we avoid the pskb_pull() work unless we
* actually need to retransmit the SKB.
*/
scb->seq = seq;
acked |= FLAG_DATA_ACKED; acked |= FLAG_DATA_ACKED;
if (sacked) { if (sacked) {
if (sacked & TCPCB_RETRANS) { if (sacked & TCPCB_RETRANS) {
...@@ -2413,7 +2410,7 @@ static int tcp_tso_acked(struct tcp_opt *tp, struct sk_buff *skb, ...@@ -2413,7 +2410,7 @@ static int tcp_tso_acked(struct tcp_opt *tp, struct sk_buff *skb,
packets_acked); packets_acked);
if (sacked & TCPCB_URG) { if (sacked & TCPCB_URG) {
if (tp->urg_mode && if (tp->urg_mode &&
!before(scb->seq, tp->snd_up)) !before(orig_seq, tp->snd_up))
tp->urg_mode = 0; tp->urg_mode = 0;
} }
} else if (*seq_rtt < 0) } else if (*seq_rtt < 0)
...@@ -2425,7 +2422,6 @@ static int tcp_tso_acked(struct tcp_opt *tp, struct sk_buff *skb, ...@@ -2425,7 +2422,6 @@ static int tcp_tso_acked(struct tcp_opt *tp, struct sk_buff *skb,
tcp_dec_pcount_explicit(&tp->fackets_out, dval); tcp_dec_pcount_explicit(&tp->fackets_out, dval);
} }
tcp_dec_pcount_explicit(&tp->packets_out, packets_acked); tcp_dec_pcount_explicit(&tp->packets_out, packets_acked);
scb->tso_factor -= packets_acked;
BUG_ON(scb->tso_factor == 0); BUG_ON(scb->tso_factor == 0);
BUG_ON(!before(scb->seq, scb->end_seq)); BUG_ON(!before(scb->seq, scb->end_seq));
...@@ -2455,7 +2451,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) ...@@ -2455,7 +2451,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
*/ */
if (after(scb->end_seq, tp->snd_una)) { if (after(scb->end_seq, tp->snd_una)) {
if (scb->tso_factor > 1) if (scb->tso_factor > 1)
acked |= tcp_tso_acked(tp, skb, acked |= tcp_tso_acked(sk, skb,
now, &seq_rtt); now, &seq_rtt);
break; break;
} }
......
...@@ -525,7 +525,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) ...@@ -525,7 +525,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
* eventually). The difference is that pulled data not copied, but * eventually). The difference is that pulled data not copied, but
* immediately discarded. * immediately discarded.
*/ */
unsigned char * __pskb_trim_head(struct sk_buff *skb, int len) static unsigned char *__pskb_trim_head(struct sk_buff *skb, int len)
{ {
int i, k, eat; int i, k, eat;
...@@ -553,8 +553,10 @@ unsigned char * __pskb_trim_head(struct sk_buff *skb, int len) ...@@ -553,8 +553,10 @@ unsigned char * __pskb_trim_head(struct sk_buff *skb, int len)
return skb->tail; return skb->tail;
} }
static int __tcp_trim_head(struct tcp_opt *tp, struct sk_buff *skb, u32 len) int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
{ {
struct tcp_opt *tp = tcp_sk(sk);
if (skb_cloned(skb) && if (skb_cloned(skb) &&
pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
return -ENOMEM; return -ENOMEM;
...@@ -566,8 +568,14 @@ static int __tcp_trim_head(struct tcp_opt *tp, struct sk_buff *skb, u32 len) ...@@ -566,8 +568,14 @@ static int __tcp_trim_head(struct tcp_opt *tp, struct sk_buff *skb, u32 len)
return -ENOMEM; return -ENOMEM;
} }
TCP_SKB_CB(skb)->seq += len;
skb->ip_summed = CHECKSUM_HW; skb->ip_summed = CHECKSUM_HW;
skb->truesize -= len;
sk->sk_queue_shrunk = 1;
sk->sk_wmem_queued -= len;
sk->sk_forward_alloc += len;
/* Any change of skb->len requires recalculation of tso /* Any change of skb->len requires recalculation of tso
* factor and mss. * factor and mss.
*/ */
...@@ -576,16 +584,6 @@ static int __tcp_trim_head(struct tcp_opt *tp, struct sk_buff *skb, u32 len) ...@@ -576,16 +584,6 @@ static int __tcp_trim_head(struct tcp_opt *tp, struct sk_buff *skb, u32 len)
return 0; return 0;
} }
static inline int tcp_trim_head(struct tcp_opt *tp, struct sk_buff *skb, u32 len)
{
int err = __tcp_trim_head(tp, skb, len);
if (!err)
TCP_SKB_CB(skb)->seq += len;
return err;
}
/* This function synchronize snd mss to current pmtu/exthdr set. /* This function synchronize snd mss to current pmtu/exthdr set.
tp->user_mss is mss set by user by TCP_MAXSEG. It does NOT counts tp->user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
...@@ -686,11 +684,12 @@ unsigned int tcp_current_mss(struct sock *sk, int large) ...@@ -686,11 +684,12 @@ unsigned int tcp_current_mss(struct sock *sk, int large)
68U - tp->tcp_header_len); 68U - tp->tcp_header_len);
/* Always keep large mss multiple of real mss, but /* Always keep large mss multiple of real mss, but
* do not exceed congestion window. * do not exceed 1/4 of the congestion window so we
* can keep the ACK clock ticking.
*/ */
factor = large_mss / mss_now; factor = large_mss / mss_now;
if (factor > tp->snd_cwnd) if (factor > (tp->snd_cwnd >> 2))
factor = tp->snd_cwnd; factor = max(1, tp->snd_cwnd >> 2);
tp->mss_cache = mss_now * factor; tp->mss_cache = mss_now * factor;
...@@ -1003,7 +1002,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) ...@@ -1003,7 +1002,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
{ {
struct tcp_opt *tp = tcp_sk(sk); struct tcp_opt *tp = tcp_sk(sk);
unsigned int cur_mss = tcp_current_mss(sk, 0); unsigned int cur_mss = tcp_current_mss(sk, 0);
__u32 data_seq, data_end_seq;
int err; int err;
/* Do not sent more than we queued. 1/4 is reserved for possible /* Do not sent more than we queued. 1/4 is reserved for possible
...@@ -1013,24 +1011,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) ...@@ -1013,24 +1011,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
return -EAGAIN; return -EAGAIN;
/* What is going on here? When TSO packets are partially ACK'd,
* we adjust the TCP_SKB_CB(skb)->seq value forward but we do
* not adjust the data area of the SKB. We defer that to here
* so that we can avoid the work unless we really retransmit
* the packet.
*/
data_seq = TCP_SKB_CB(skb)->seq;
data_end_seq = TCP_SKB_CB(skb)->end_seq;
if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
data_end_seq--;
if (skb->len > (data_end_seq - data_seq)) {
u32 to_trim = skb->len - (data_end_seq - data_seq);
if (__tcp_trim_head(tp, skb, to_trim))
return -ENOMEM;
}
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
BUG(); BUG();
...@@ -1041,7 +1021,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) ...@@ -1041,7 +1021,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
tp->mss_cache = tp->mss_cache_std; tp->mss_cache = tp->mss_cache_std;
} }
if (tcp_trim_head(tp, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
return -ENOMEM; return -ENOMEM;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment