Commit 2331ccc5 authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

tcp: enhance tcp collapsing

As Ilya Lesokhin suggested, we can collapse two skbs at retransmit
time even if the skb at the right has fragments.

We simply have to use more generic skb_copy_bits() instead of
skb_copy_from_linear_data() in tcp_collapse_retrans()

Also need to guard this skb_copy_bits() in case there is nothing to
copy, otherwise skb_put() could panic if left skb has frags.

Tested:

Used following packetdrill test

// Establish a connection.
0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
   +0 bind(3, ..., ...) = 0
   +0 listen(3, 1) = 0

   +0 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 8>
   +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+.100 < . 1:1(0) ack 1 win 257
   +0 accept(3, ..., ...) = 4

   +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
   +0 write(4, ..., 200) = 200
   +0 > P. 1:201(200) ack 1
+.001 write(4, ..., 200) = 200
   +0 > P. 201:401(200) ack 1
+.001 write(4, ..., 200) = 200
   +0 > P. 401:601(200) ack 1
+.001 write(4, ..., 200) = 200
   +0 > P. 601:801(200) ack 1
+.001 write(4, ..., 200) = 200
   +0 > P. 801:1001(200) ack 1
+.001 write(4, ..., 100) = 100
   +0 > P. 1001:1101(100) ack 1
+.001 write(4, ..., 100) = 100
   +0 > P. 1101:1201(100) ack 1
+.001 write(4, ..., 100) = 100
   +0 > P. 1201:1301(100) ack 1
+.001 write(4, ..., 100) = 100
   +0 > P. 1301:1401(100) ack 1

+.100 < . 1:1(0) ack 1 win 257 <nop,nop,sack 1001:1401>
// Check that TCP collapse works :
   +0 > P. 1:1001(1000) ack 1
Reported-by: default avatarIlya Lesokhin <ilyal@mellanox.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Acked-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent b646cf29
...@@ -2529,7 +2529,8 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) ...@@ -2529,7 +2529,8 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
tcp_unlink_write_queue(next_skb, sk); tcp_unlink_write_queue(next_skb, sk);
skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), if (next_skb_size)
skb_copy_bits(next_skb, 0, skb_put(skb, next_skb_size),
next_skb_size); next_skb_size);
if (next_skb->ip_summed == CHECKSUM_PARTIAL) if (next_skb->ip_summed == CHECKSUM_PARTIAL)
...@@ -2567,14 +2568,11 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb) ...@@ -2567,14 +2568,11 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb)
{ {
if (tcp_skb_pcount(skb) > 1) if (tcp_skb_pcount(skb) > 1)
return false; return false;
/* TODO: SACK collapsing could be used to remove this condition */
if (skb_shinfo(skb)->nr_frags != 0)
return false;
if (skb_cloned(skb)) if (skb_cloned(skb))
return false; return false;
if (skb == tcp_send_head(sk)) if (skb == tcp_send_head(sk))
return false; return false;
/* Some heurestics for collapsing over SACK'd could be invented */ /* Some heuristics for collapsing over SACK'd could be invented */
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
return false; return false;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment