Commit 27cde44a authored by Yuchung Cheng's avatar Yuchung Cheng Committed by David S. Miller

tcp: do not cancel delay-AcK on DCTCP special ACK

Currently when a DCTCP receiver delays an ACK and receive a
data packet with a different CE mark from the previous one's, it
sends two immediate ACKs acking previous and latest sequences
respectly (for ECN accounting).

Previously sending the first ACK may mark off the delayed ACK timer
(tcp_event_ack_sent). This may subsequently prevent sending the
second ACK to acknowledge the latest sequence (tcp_ack_snd_check).
The culprit is that tcp_send_ack() assumes it always acknowleges
the latest sequence, which is not true for the first special ACK.

The fix is to not make the assumption in tcp_send_ack and check the
actual ack sequence before cancelling the delayed ACK. Further it's
safer to pass the ack sequence number as a local variable into
tcp_send_ack routine, instead of intercepting tp->rcv_nxt to avoid
future bugs like this.
Reported-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarYuchung Cheng <ycheng@google.com>
Acked-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 2987babb
...@@ -539,6 +539,7 @@ void tcp_send_fin(struct sock *sk); ...@@ -539,6 +539,7 @@ void tcp_send_fin(struct sock *sk);
void tcp_send_active_reset(struct sock *sk, gfp_t priority); void tcp_send_active_reset(struct sock *sk, gfp_t priority);
int tcp_send_synack(struct sock *); int tcp_send_synack(struct sock *);
void tcp_push_one(struct sock *, unsigned int mss_now); void tcp_push_one(struct sock *, unsigned int mss_now);
void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
void tcp_send_ack(struct sock *sk); void tcp_send_ack(struct sock *sk);
void tcp_send_delayed_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk);
void tcp_send_loss_probe(struct sock *sk); void tcp_send_loss_probe(struct sock *sk);
......
...@@ -133,21 +133,8 @@ static void dctcp_ce_state_0_to_1(struct sock *sk) ...@@ -133,21 +133,8 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
* ACK has not sent yet. * ACK has not sent yet.
*/ */
if (!ca->ce_state && if (!ca->ce_state &&
inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
u32 tmp_rcv_nxt; __tcp_send_ack(sk, ca->prior_rcv_nxt);
/* Save current rcv_nxt. */
tmp_rcv_nxt = tp->rcv_nxt;
/* Generate previous ack with CE=0. */
tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
tp->rcv_nxt = ca->prior_rcv_nxt;
tcp_send_ack(sk);
/* Recover current rcv_nxt. */
tp->rcv_nxt = tmp_rcv_nxt;
}
ca->prior_rcv_nxt = tp->rcv_nxt; ca->prior_rcv_nxt = tp->rcv_nxt;
ca->ce_state = 1; ca->ce_state = 1;
...@@ -164,21 +151,8 @@ static void dctcp_ce_state_1_to_0(struct sock *sk) ...@@ -164,21 +151,8 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
* ACK has not sent yet. * ACK has not sent yet.
*/ */
if (ca->ce_state && if (ca->ce_state &&
inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
u32 tmp_rcv_nxt; __tcp_send_ack(sk, ca->prior_rcv_nxt);
/* Save current rcv_nxt. */
tmp_rcv_nxt = tp->rcv_nxt;
/* Generate previous ack with CE=1. */
tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
tp->rcv_nxt = ca->prior_rcv_nxt;
tcp_send_ack(sk);
/* Recover current rcv_nxt. */
tp->rcv_nxt = tmp_rcv_nxt;
}
ca->prior_rcv_nxt = tp->rcv_nxt; ca->prior_rcv_nxt = tp->rcv_nxt;
ca->ce_state = 0; ca->ce_state = 0;
......
...@@ -160,7 +160,8 @@ static void tcp_event_data_sent(struct tcp_sock *tp, ...@@ -160,7 +160,8 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
} }
/* Account for an ACK we sent. */ /* Account for an ACK we sent. */
static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
u32 rcv_nxt)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
...@@ -171,6 +172,9 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) ...@@ -171,6 +172,9 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1) if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk); __sock_put(sk);
} }
if (unlikely(rcv_nxt != tp->rcv_nxt))
return; /* Special ACK sent by DCTCP to reflect ECN */
tcp_dec_quickack_mode(sk, pkts); tcp_dec_quickack_mode(sk, pkts);
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
} }
...@@ -1141,7 +1145,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, ...@@ -1141,7 +1145,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
icsk->icsk_af_ops->send_check(sk, skb); icsk->icsk_af_ops->send_check(sk, skb);
if (likely(tcb->tcp_flags & TCPHDR_ACK)) if (likely(tcb->tcp_flags & TCPHDR_ACK))
tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
if (skb->len != tcp_header_size) { if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk); tcp_event_data_sent(tp, sk);
...@@ -3613,12 +3617,12 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt) ...@@ -3613,12 +3617,12 @@ void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
/* Send it off, this clears delayed acks for us. */ /* Send it off, this clears delayed acks for us. */
__tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt); __tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt);
} }
EXPORT_SYMBOL_GPL(__tcp_send_ack);
void tcp_send_ack(struct sock *sk) void tcp_send_ack(struct sock *sk)
{ {
__tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt); __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
} }
EXPORT_SYMBOL_GPL(tcp_send_ack);
/* This routine sends a packet with an out of date sequence /* This routine sends a packet with an out of date sequence
* number. It assumes the other end will try to ack it. * number. It assumes the other end will try to ack it.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment