Commit bd1e75ab authored by Eric Dumazet's avatar Eric Dumazet Committed by David S. Miller

tcp: add coalescing attempt in tcp_ofo_queue()

In order to make TCP more resilient in presence of reorders, we need
to allow coalescing to happen when skbs from out of order queue are
transferred into receive queue. LRO/GRO can be completely canceled
in some pathological cases, like per packet load balancing on aggregated
links.

I had to move tcp_try_coalesce() up in the file above tcp_ofo_queue()
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 4cdf507d
...@@ -4061,6 +4061,44 @@ static void tcp_sack_remove(struct tcp_sock *tp) ...@@ -4061,6 +4061,44 @@ static void tcp_sack_remove(struct tcp_sock *tp)
tp->rx_opt.num_sacks = num_sacks; tp->rx_opt.num_sacks = num_sacks;
} }
/**
* tcp_try_coalesce - try to merge skb to prior one
* @sk: socket
* @to: prior buffer
* @from: buffer to add in queue
* @fragstolen: pointer to boolean
*
* Before queueing skb @from after @to, try to merge them
* to reduce overall memory use and queue lengths, if cost is small.
* Packets in ofo or receive queues can stay a long time.
* Better try to coalesce them right now to avoid future collapses.
* Returns true if caller should free @from instead of queueing it
*/
static bool tcp_try_coalesce(struct sock *sk,
struct sk_buff *to,
struct sk_buff *from,
bool *fragstolen)
{
int delta;
*fragstolen = false;
/* Its possible this segment overlaps with prior segment in queue */
if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
return false;
if (!skb_try_coalesce(to, from, fragstolen, &delta))
return false;
atomic_add(delta, &sk->sk_rmem_alloc);
sk_mem_charge(sk, delta);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
return true;
}
/* This one checks to see if we can put data from the /* This one checks to see if we can put data from the
* out_of_order queue into the receive_queue. * out_of_order queue into the receive_queue.
*/ */
...@@ -4068,7 +4106,8 @@ static void tcp_ofo_queue(struct sock *sk) ...@@ -4068,7 +4106,8 @@ static void tcp_ofo_queue(struct sock *sk)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
__u32 dsack_high = tp->rcv_nxt; __u32 dsack_high = tp->rcv_nxt;
struct sk_buff *skb; struct sk_buff *skb, *tail;
bool fragstolen, eaten;
while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) { while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
...@@ -4081,9 +4120,9 @@ static void tcp_ofo_queue(struct sock *sk) ...@@ -4081,9 +4120,9 @@ static void tcp_ofo_queue(struct sock *sk)
tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack); tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
} }
__skb_unlink(skb, &tp->out_of_order_queue);
if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
SOCK_DEBUG(sk, "ofo packet was already received\n"); SOCK_DEBUG(sk, "ofo packet was already received\n");
__skb_unlink(skb, &tp->out_of_order_queue);
__kfree_skb(skb); __kfree_skb(skb);
continue; continue;
} }
...@@ -4091,11 +4130,15 @@ static void tcp_ofo_queue(struct sock *sk) ...@@ -4091,11 +4130,15 @@ static void tcp_ofo_queue(struct sock *sk)
tp->rcv_nxt, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, TCP_SKB_CB(skb)->seq,
TCP_SKB_CB(skb)->end_seq); TCP_SKB_CB(skb)->end_seq);
__skb_unlink(skb, &tp->out_of_order_queue); tail = skb_peek_tail(&sk->sk_receive_queue);
__skb_queue_tail(&sk->sk_receive_queue, skb); eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
if (!eaten)
__skb_queue_tail(&sk->sk_receive_queue, skb);
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
tcp_fin(sk); tcp_fin(sk);
if (eaten)
kfree_skb_partial(skb, fragstolen);
} }
} }
...@@ -4122,44 +4165,6 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, ...@@ -4122,44 +4165,6 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb,
return 0; return 0;
} }
/**
* tcp_try_coalesce - try to merge skb to prior one
* @sk: socket
* @to: prior buffer
* @from: buffer to add in queue
* @fragstolen: pointer to boolean
*
* Before queueing skb @from after @to, try to merge them
* to reduce overall memory use and queue lengths, if cost is small.
* Packets in ofo or receive queues can stay a long time.
* Better try to coalesce them right now to avoid future collapses.
* Returns true if caller should free @from instead of queueing it
*/
static bool tcp_try_coalesce(struct sock *sk,
struct sk_buff *to,
struct sk_buff *from,
bool *fragstolen)
{
int delta;
*fragstolen = false;
/* Its possible this segment overlaps with prior segment in queue */
if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
return false;
if (!skb_try_coalesce(to, from, fragstolen, &delta))
return false;
atomic_add(delta, &sk->sk_rmem_alloc);
sk_mem_charge(sk, delta);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOALESCE);
TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq;
TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq;
TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags;
return true;
}
static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment