Commit 1d0833df authored by Yuchung Cheng's avatar Yuchung Cheng Committed by David S. Miller

tcp: use sequence to break TS ties for RACK loss detection

The packets inside a jumbo skb (e.g., TSO) share the same skb
timestamp, even though they are sent sequentially on the wire. Since
RACK is based on time, it can not detect some packets inside the
same skb are lost.  However, we can leverage the packet sequence
numbers as extended timestamps to detect losses. Therefore, when
RACK timestamp is identical to skb's timestamp (i.e., one of the
packets of the skb is acked or sacked), we use the sequence numbers
of the acked and unacked packets to break ties.

We can use the same sequence logic to advance RACK xmit time as
well to detect more losses and avoid timeout.
Signed-off-by: default avatarYuchung Cheng <ycheng@google.com>
Signed-off-by: default avatarNeal Cardwell <ncardwell@google.com>
Acked-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 57dde7f7
...@@ -208,6 +208,7 @@ struct tcp_sock { ...@@ -208,6 +208,7 @@ struct tcp_sock {
struct tcp_rack { struct tcp_rack {
struct skb_mstamp mstamp; /* (Re)sent time of the skb */ struct skb_mstamp mstamp; /* (Re)sent time of the skb */
u32 rtt_us; /* Associated RTT */ u32 rtt_us; /* Associated RTT */
u32 end_seq; /* Ending TCP sequence of the skb */
u8 advanced; /* mstamp advanced since last lost marking */ u8 advanced; /* mstamp advanced since last lost marking */
u8 reord; /* reordering detected */ u8 reord; /* reordering detected */
} rack; } rack;
......
...@@ -1867,7 +1867,7 @@ extern int sysctl_tcp_recovery; ...@@ -1867,7 +1867,7 @@ extern int sysctl_tcp_recovery;
#define TCP_RACK_LOST_RETRANS 0x1 #define TCP_RACK_LOST_RETRANS 0x1
extern void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now); extern void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now);
extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
const struct skb_mstamp *xmit_time, const struct skb_mstamp *xmit_time,
const struct skb_mstamp *ack_time); const struct skb_mstamp *ack_time);
extern void tcp_rack_reo_timeout(struct sock *sk); extern void tcp_rack_reo_timeout(struct sock *sk);
......
...@@ -1218,7 +1218,8 @@ static u8 tcp_sacktag_one(struct sock *sk, ...@@ -1218,7 +1218,8 @@ static u8 tcp_sacktag_one(struct sock *sk,
return sacked; return sacked;
if (!(sacked & TCPCB_SACKED_ACKED)) { if (!(sacked & TCPCB_SACKED_ACKED)) {
tcp_rack_advance(tp, sacked, xmit_time, &state->ack_time); tcp_rack_advance(tp, sacked, end_seq,
xmit_time, &state->ack_time);
if (sacked & TCPCB_SACKED_RETRANS) { if (sacked & TCPCB_SACKED_RETRANS) {
/* If the segment is not tagged as lost, /* If the segment is not tagged as lost,
...@@ -3171,7 +3172,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, ...@@ -3171,7 +3172,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
} else if (tcp_is_sack(tp)) { } else if (tcp_is_sack(tp)) {
tp->delivered += acked_pcount; tp->delivered += acked_pcount;
if (!tcp_skb_spurious_retrans(tp, skb)) if (!tcp_skb_spurious_retrans(tp, skb))
tcp_rack_advance(tp, sacked, tcp_rack_advance(tp, sacked, scb->end_seq,
&skb->skb_mstamp, &skb->skb_mstamp,
&sack->ack_time); &sack->ack_time);
} }
......
...@@ -16,6 +16,14 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) ...@@ -16,6 +16,14 @@ static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb)
} }
} }
static bool tcp_rack_sent_after(const struct skb_mstamp *t1,
const struct skb_mstamp *t2,
u32 seq1, u32 seq2)
{
return skb_mstamp_after(t1, t2) ||
(t1->v64 == t2->v64 && after(seq1, seq2));
}
/* Marks a packet lost, if some packet sent later has been (s)acked. /* Marks a packet lost, if some packet sent later has been (s)acked.
* The underlying idea is similar to the traditional dupthresh and FACK * The underlying idea is similar to the traditional dupthresh and FACK
* but they look at different metrics: * but they look at different metrics:
...@@ -60,7 +68,8 @@ static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now, ...@@ -60,7 +68,8 @@ static void tcp_rack_detect_loss(struct sock *sk, const struct skb_mstamp *now,
scb->sacked & TCPCB_SACKED_ACKED) scb->sacked & TCPCB_SACKED_ACKED)
continue; continue;
if (skb_mstamp_after(&tp->rack.mstamp, &skb->skb_mstamp)) { if (tcp_rack_sent_after(&tp->rack.mstamp, &skb->skb_mstamp,
tp->rack.end_seq, scb->end_seq)) {
/* Step 3 in draft-cheng-tcpm-rack-00.txt: /* Step 3 in draft-cheng-tcpm-rack-00.txt:
* A packet is lost if its elapsed time is beyond * A packet is lost if its elapsed time is beyond
* the recent RTT plus the reordering window. * the recent RTT plus the reordering window.
...@@ -113,14 +122,15 @@ void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now) ...@@ -113,14 +122,15 @@ void tcp_rack_mark_lost(struct sock *sk, const struct skb_mstamp *now)
* This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from * This is "Step 3: Advance RACK.xmit_time and update RACK.RTT" from
* draft-cheng-tcpm-rack-00.txt * draft-cheng-tcpm-rack-00.txt
*/ */
void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
const struct skb_mstamp *xmit_time, const struct skb_mstamp *xmit_time,
const struct skb_mstamp *ack_time) const struct skb_mstamp *ack_time)
{ {
u32 rtt_us; u32 rtt_us;
if (tp->rack.mstamp.v64 && if (tp->rack.mstamp.v64 &&
!skb_mstamp_after(xmit_time, &tp->rack.mstamp)) !tcp_rack_sent_after(xmit_time, &tp->rack.mstamp,
end_seq, tp->rack.end_seq))
return; return;
rtt_us = skb_mstamp_us_delta(ack_time, xmit_time); rtt_us = skb_mstamp_us_delta(ack_time, xmit_time);
...@@ -140,6 +150,7 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, ...@@ -140,6 +150,7 @@ void tcp_rack_advance(struct tcp_sock *tp, u8 sacked,
} }
tp->rack.rtt_us = rtt_us; tp->rack.rtt_us = rtt_us;
tp->rack.mstamp = *xmit_time; tp->rack.mstamp = *xmit_time;
tp->rack.end_seq = end_seq;
tp->rack.advanced = 1; tp->rack.advanced = 1;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment