Commit f7a6eb1e authored by David S. Miller's avatar David S. Miller

Merge branch 'tcp-fix-DCTCP-ECE-Ack-series'

Yuchung Cheng says:

====================
fix DCTCP ECE Ack series

This patch set address that the existing DCTCP implementation does not
fully implement the ACK policy specified in the RFC. This improves
the responsiveness of CE status change particularly on flows with
small inflight.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents f7482683 a0496ef2
...@@ -342,6 +342,7 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, ...@@ -342,6 +342,7 @@ ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len, struct pipe_inode_info *pipe, size_t len,
unsigned int flags); unsigned int flags);
void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks);
static inline void tcp_dec_quickack_mode(struct sock *sk, static inline void tcp_dec_quickack_mode(struct sock *sk,
const unsigned int pkts) const unsigned int pkts)
{ {
...@@ -539,6 +540,7 @@ void tcp_send_fin(struct sock *sk); ...@@ -539,6 +540,7 @@ void tcp_send_fin(struct sock *sk);
void tcp_send_active_reset(struct sock *sk, gfp_t priority); void tcp_send_active_reset(struct sock *sk, gfp_t priority);
int tcp_send_synack(struct sock *); int tcp_send_synack(struct sock *);
void tcp_push_one(struct sock *, unsigned int mss_now); void tcp_push_one(struct sock *, unsigned int mss_now);
void __tcp_send_ack(struct sock *sk, u32 rcv_nxt);
void tcp_send_ack(struct sock *sk); void tcp_send_ack(struct sock *sk);
void tcp_send_delayed_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk);
void tcp_send_loss_probe(struct sock *sk); void tcp_send_loss_probe(struct sock *sk);
......
...@@ -129,24 +129,14 @@ static void dctcp_ce_state_0_to_1(struct sock *sk) ...@@ -129,24 +129,14 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk); struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
/* State has changed from CE=0 to CE=1 and delayed if (!ca->ce_state) {
* ACK has not sent yet. /* State has changed from CE=0 to CE=1, force an immediate
* ACK to reflect the new CE state. If an ACK was delayed,
* send that first to reflect the prior CE state.
*/ */
if (!ca->ce_state && if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { __tcp_send_ack(sk, ca->prior_rcv_nxt);
u32 tmp_rcv_nxt; tcp_enter_quickack_mode(sk, 1);
/* Save current rcv_nxt. */
tmp_rcv_nxt = tp->rcv_nxt;
/* Generate previous ack with CE=0. */
tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
tp->rcv_nxt = ca->prior_rcv_nxt;
tcp_send_ack(sk);
/* Recover current rcv_nxt. */
tp->rcv_nxt = tmp_rcv_nxt;
} }
ca->prior_rcv_nxt = tp->rcv_nxt; ca->prior_rcv_nxt = tp->rcv_nxt;
...@@ -160,24 +150,14 @@ static void dctcp_ce_state_1_to_0(struct sock *sk) ...@@ -160,24 +150,14 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk); struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
/* State has changed from CE=1 to CE=0 and delayed if (ca->ce_state) {
* ACK has not sent yet. /* State has changed from CE=1 to CE=0, force an immediate
* ACK to reflect the new CE state. If an ACK was delayed,
* send that first to reflect the prior CE state.
*/ */
if (ca->ce_state && if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { __tcp_send_ack(sk, ca->prior_rcv_nxt);
u32 tmp_rcv_nxt; tcp_enter_quickack_mode(sk, 1);
/* Save current rcv_nxt. */
tmp_rcv_nxt = tp->rcv_nxt;
/* Generate previous ack with CE=1. */
tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
tp->rcv_nxt = ca->prior_rcv_nxt;
tcp_send_ack(sk);
/* Recover current rcv_nxt. */
tp->rcv_nxt = tmp_rcv_nxt;
} }
ca->prior_rcv_nxt = tp->rcv_nxt; ca->prior_rcv_nxt = tp->rcv_nxt;
......
...@@ -215,7 +215,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks) ...@@ -215,7 +215,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
icsk->icsk_ack.quick = quickacks; icsk->icsk_ack.quick = quickacks;
} }
static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
{ {
struct inet_connection_sock *icsk = inet_csk(sk); struct inet_connection_sock *icsk = inet_csk(sk);
...@@ -223,6 +223,7 @@ static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks) ...@@ -223,6 +223,7 @@ static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
icsk->icsk_ack.pingpong = 0; icsk->icsk_ack.pingpong = 0;
icsk->icsk_ack.ato = TCP_ATO_MIN; icsk->icsk_ack.ato = TCP_ATO_MIN;
} }
EXPORT_SYMBOL(tcp_enter_quickack_mode);
/* Send ACKs quickly, if "quick" count is not exhausted /* Send ACKs quickly, if "quick" count is not exhausted
* and the session is not interactive. * and the session is not interactive.
......
...@@ -160,7 +160,8 @@ static void tcp_event_data_sent(struct tcp_sock *tp, ...@@ -160,7 +160,8 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
} }
/* Account for an ACK we sent. */ /* Account for an ACK we sent. */
static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
u32 rcv_nxt)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
...@@ -171,6 +172,9 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) ...@@ -171,6 +172,9 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1) if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk); __sock_put(sk);
} }
if (unlikely(rcv_nxt != tp->rcv_nxt))
return; /* Special ACK sent by DCTCP to reflect ECN */
tcp_dec_quickack_mode(sk, pkts); tcp_dec_quickack_mode(sk, pkts);
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
} }
...@@ -1023,8 +1027,8 @@ static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb) ...@@ -1023,8 +1027,8 @@ static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
* We are working here with either a clone of the original * We are working here with either a clone of the original
* SKB, or a fresh unique copy made by the retransmit engine. * SKB, or a fresh unique copy made by the retransmit engine.
*/ */
static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
gfp_t gfp_mask) int clone_it, gfp_t gfp_mask, u32 rcv_nxt)
{ {
const struct inet_connection_sock *icsk = inet_csk(sk); const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet; struct inet_sock *inet;
...@@ -1100,7 +1104,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, ...@@ -1100,7 +1104,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
th->source = inet->inet_sport; th->source = inet->inet_sport;
th->dest = inet->inet_dport; th->dest = inet->inet_dport;
th->seq = htonl(tcb->seq); th->seq = htonl(tcb->seq);
th->ack_seq = htonl(tp->rcv_nxt); th->ack_seq = htonl(rcv_nxt);
*(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
tcb->tcp_flags); tcb->tcp_flags);
...@@ -1141,7 +1145,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, ...@@ -1141,7 +1145,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
icsk->icsk_af_ops->send_check(sk, skb); icsk->icsk_af_ops->send_check(sk, skb);
if (likely(tcb->tcp_flags & TCPHDR_ACK)) if (likely(tcb->tcp_flags & TCPHDR_ACK))
tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
if (skb->len != tcp_header_size) { if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk); tcp_event_data_sent(tp, sk);
...@@ -1178,6 +1182,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, ...@@ -1178,6 +1182,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
return err; return err;
} }
static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
gfp_t gfp_mask)
{
return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask,
tcp_sk(sk)->rcv_nxt);
}
/* This routine just queues the buffer for sending. /* This routine just queues the buffer for sending.
* *
* NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
...@@ -3571,7 +3582,7 @@ void tcp_send_delayed_ack(struct sock *sk) ...@@ -3571,7 +3582,7 @@ void tcp_send_delayed_ack(struct sock *sk)
} }
/* This routine sends an ack and also updates the window. */ /* This routine sends an ack and also updates the window. */
void tcp_send_ack(struct sock *sk) void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
{ {
struct sk_buff *buff; struct sk_buff *buff;
...@@ -3604,9 +3615,14 @@ void tcp_send_ack(struct sock *sk) ...@@ -3604,9 +3615,14 @@ void tcp_send_ack(struct sock *sk)
skb_set_tcp_pure_ack(buff); skb_set_tcp_pure_ack(buff);
/* Send it off, this clears delayed acks for us. */ /* Send it off, this clears delayed acks for us. */
tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0); __tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt);
}
EXPORT_SYMBOL_GPL(__tcp_send_ack);
void tcp_send_ack(struct sock *sk)
{
__tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
} }
EXPORT_SYMBOL_GPL(tcp_send_ack);
/* This routine sends a packet with an out of date sequence /* This routine sends a packet with an out of date sequence
* number. It assumes the other end will try to ack it. * number. It assumes the other end will try to ack it.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment