Commit 846998ae authored by David S. Miller's avatar David S. Miller Committed by Linus Torvalds

[PATCH] tcp: fix TSO sizing bugs

MSS changes can be lost since we preemptively initialize the tso_segs count
for an SKB before we %100 commit to sending it out.

So, by the time we send it out, the tso_size information can be stale due
to PMTU events.  This mucks up all of the logic in our send engine, and can
even result in the BUG() triggering in tcp_tso_should_defer().

Another problem we have is that we're storing the tp->mss_cache, not the
SACK block normalized MSS, as the tso_size.  That's wrong too.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 0c3dba15
...@@ -403,11 +403,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) ...@@ -403,11 +403,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
sk->sk_send_head = skb; sk->sk_send_head = skb;
} }
static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
{ {
struct tcp_sock *tp = tcp_sk(sk); if (skb->len <= mss_now ||
if (skb->len <= tp->mss_cache ||
!(sk->sk_route_caps & NETIF_F_TSO)) { !(sk->sk_route_caps & NETIF_F_TSO)) {
/* Avoid the costly divide in the normal /* Avoid the costly divide in the normal
* non-TSO case. * non-TSO case.
...@@ -417,10 +415,10 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb) ...@@ -417,10 +415,10 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
} else { } else {
unsigned int factor; unsigned int factor;
factor = skb->len + (tp->mss_cache - 1); factor = skb->len + (mss_now - 1);
factor /= tp->mss_cache; factor /= mss_now;
skb_shinfo(skb)->tso_segs = factor; skb_shinfo(skb)->tso_segs = factor;
skb_shinfo(skb)->tso_size = tp->mss_cache; skb_shinfo(skb)->tso_size = mss_now;
} }
} }
...@@ -429,7 +427,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb) ...@@ -429,7 +427,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
* packet to the list. This won't be called frequently, I hope. * packet to the list. This won't be called frequently, I hope.
* Remember, these are still headerless SKBs at this point. * Remember, these are still headerless SKBs at this point.
*/ */
static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff; struct sk_buff *buff;
...@@ -492,8 +490,8 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) ...@@ -492,8 +490,8 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
} }
/* Fix up tso_factor for both original and new SKB. */ /* Fix up tso_factor for both original and new SKB. */
tcp_set_skb_tso_segs(sk, skb); tcp_set_skb_tso_segs(sk, skb, mss_now);
tcp_set_skb_tso_segs(sk, buff); tcp_set_skb_tso_segs(sk, buff, mss_now);
if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
tp->lost_out += tcp_skb_pcount(skb); tp->lost_out += tcp_skb_pcount(skb);
...@@ -569,7 +567,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) ...@@ -569,7 +567,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
* factor and mss. * factor and mss.
*/ */
if (tcp_skb_pcount(skb) > 1) if (tcp_skb_pcount(skb) > 1)
tcp_set_skb_tso_segs(sk, skb); tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1));
return 0; return 0;
} }
...@@ -734,12 +732,14 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk ...@@ -734,12 +732,14 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk
/* This must be invoked the first time we consider transmitting /* This must be invoked the first time we consider transmitting
* SKB onto the wire. * SKB onto the wire.
*/ */
static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb) static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
{ {
int tso_segs = tcp_skb_pcount(skb); int tso_segs = tcp_skb_pcount(skb);
if (!tso_segs) { if (!tso_segs ||
tcp_set_skb_tso_segs(sk, skb); (tso_segs > 1 &&
skb_shinfo(skb)->tso_size != mss_now)) {
tcp_set_skb_tso_segs(sk, skb, mss_now);
tso_segs = tcp_skb_pcount(skb); tso_segs = tcp_skb_pcount(skb);
} }
return tso_segs; return tso_segs;
...@@ -817,7 +817,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, ...@@ -817,7 +817,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
unsigned int cwnd_quota; unsigned int cwnd_quota;
tcp_init_tso_segs(sk, skb); tcp_init_tso_segs(sk, skb, cur_mss);
if (!tcp_nagle_test(tp, skb, cur_mss, nonagle)) if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
return 0; return 0;
...@@ -854,7 +854,7 @@ int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp) ...@@ -854,7 +854,7 @@ int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
* know that all the data is in scatter-gather pages, and that the * know that all the data is in scatter-gather pages, and that the
* packet has never been sent out before (and thus is not cloned). * packet has never been sent out before (and thus is not cloned).
*/ */
static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len) static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now)
{ {
struct sk_buff *buff; struct sk_buff *buff;
int nlen = skb->len - len; int nlen = skb->len - len;
...@@ -887,8 +887,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len) ...@@ -887,8 +887,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len)
skb_split(skb, buff, len); skb_split(skb, buff, len);
/* Fix up tso_factor for both original and new SKB. */ /* Fix up tso_factor for both original and new SKB. */
tcp_set_skb_tso_segs(sk, skb); tcp_set_skb_tso_segs(sk, skb, mss_now);
tcp_set_skb_tso_segs(sk, buff); tcp_set_skb_tso_segs(sk, buff, mss_now);
/* Link BUFF into the send queue. */ /* Link BUFF into the send queue. */
skb_header_release(buff); skb_header_release(buff);
...@@ -976,7 +976,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) ...@@ -976,7 +976,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
if (unlikely(!skb)) if (unlikely(!skb))
return 0; return 0;
tso_segs = tcp_init_tso_segs(sk, skb); tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
cwnd_quota = tcp_cwnd_test(tp, skb); cwnd_quota = tcp_cwnd_test(tp, skb);
if (unlikely(!cwnd_quota)) if (unlikely(!cwnd_quota))
goto out; goto out;
...@@ -1006,11 +1006,11 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) ...@@ -1006,11 +1006,11 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
limit = skb->len - trim; limit = skb->len - trim;
} }
if (skb->len > limit) { if (skb->len > limit) {
if (tso_fragment(sk, skb, limit)) if (tso_fragment(sk, skb, limit, mss_now))
break; break;
} }
} else if (unlikely(skb->len > mss_now)) { } else if (unlikely(skb->len > mss_now)) {
if (unlikely(tcp_fragment(sk, skb, mss_now))) if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now)))
break; break;
} }
...@@ -1039,7 +1039,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) ...@@ -1039,7 +1039,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
skb = sk->sk_send_head; skb = sk->sk_send_head;
if (!skb) if (!skb)
break; break;
tso_segs = tcp_init_tso_segs(sk, skb); tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
} }
if (likely(sent_pkts)) { if (likely(sent_pkts)) {
...@@ -1076,7 +1076,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) ...@@ -1076,7 +1076,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
BUG_ON(!skb || skb->len < mss_now); BUG_ON(!skb || skb->len < mss_now);
tso_segs = tcp_init_tso_segs(sk, skb); tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH);
if (likely(cwnd_quota)) { if (likely(cwnd_quota)) {
...@@ -1093,11 +1093,11 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) ...@@ -1093,11 +1093,11 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
limit = skb->len - trim; limit = skb->len - trim;
} }
if (skb->len > limit) { if (skb->len > limit) {
if (unlikely(tso_fragment(sk, skb, limit))) if (unlikely(tso_fragment(sk, skb, limit, mss_now)))
return; return;
} }
} else if (unlikely(skb->len > mss_now)) { } else if (unlikely(skb->len > mss_now)) {
if (unlikely(tcp_fragment(sk, skb, mss_now))) if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now)))
return; return;
} }
...@@ -1388,7 +1388,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) ...@@ -1388,7 +1388,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
int old_factor = tcp_skb_pcount(skb); int old_factor = tcp_skb_pcount(skb);
int new_factor; int new_factor;
if (tcp_fragment(sk, skb, cur_mss)) if (tcp_fragment(sk, skb, cur_mss, cur_mss))
return -ENOMEM; /* We'll try again later. */ return -ENOMEM; /* We'll try again later. */
/* New SKB created, account for it. */ /* New SKB created, account for it. */
...@@ -1991,7 +1991,7 @@ int tcp_write_wakeup(struct sock *sk) ...@@ -1991,7 +1991,7 @@ int tcp_write_wakeup(struct sock *sk)
skb->len > mss) { skb->len > mss) {
seg_size = min(seg_size, mss); seg_size = min(seg_size, mss);
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
if (tcp_fragment(sk, skb, seg_size)) if (tcp_fragment(sk, skb, seg_size, mss))
return -1; return -1;
/* SWS override triggered forced fragmentation. /* SWS override triggered forced fragmentation.
* Disable TSO, the connection is too sick. */ * Disable TSO, the connection is too sick. */
...@@ -2000,7 +2000,7 @@ int tcp_write_wakeup(struct sock *sk) ...@@ -2000,7 +2000,7 @@ int tcp_write_wakeup(struct sock *sk)
sk->sk_route_caps &= ~NETIF_F_TSO; sk->sk_route_caps &= ~NETIF_F_TSO;
} }
} else if (!tcp_skb_pcount(skb)) } else if (!tcp_skb_pcount(skb))
tcp_set_skb_tso_segs(sk, skb); tcp_set_skb_tso_segs(sk, skb, mss);
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
TCP_SKB_CB(skb)->when = tcp_time_stamp; TCP_SKB_CB(skb)->when = tcp_time_stamp;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment