Commit c41a38ef authored by Paolo Abeni's avatar Paolo Abeni

Merge branch 'tcp-save-flowlabel-and-use-for-receiver-repathing'

David Morley says:

====================
tcp: save flowlabel and use for receiver repathing

This patch series stores the last received ipv6 flowlabel. This last
received flowlabel is then used to help decide whether a packet is
likely an RTO retransmit or the result of a TLP. This new information
is used to better inform the flowlabel change decision for data
receivers.
====================

Link: https://lore.kernel.org/r/20231006011841.3558307-1-morleyd.kernel@gmail.comSigned-off-by: default avatarPaolo Abeni <pabeni@redhat.com>
parents 8cea95b0 93946301
......@@ -114,7 +114,10 @@ struct inet_connection_sock {
__u8 quick; /* Scheduled number of quick acks */
__u8 pingpong; /* The session is interactive */
__u8 retry; /* Number of attempts */
__u32 ato; /* Predicted tick of soft clock */
#define ATO_BITS 8
__u32 ato:ATO_BITS, /* Predicted tick of soft clock */
lrcv_flowlabel:20, /* last received ipv6 flowlabel */
unused:4;
unsigned long timeout; /* Currently scheduled timeout */
__u32 lrcvtime; /* timestamp of last received data packet */
__u16 last_seg_size; /* Size of last incoming segment */
......
......@@ -131,6 +131,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */
#define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */
static_assert((1 << ATO_BITS) > TCP_DELACK_MAX);
#if HZ >= 100
#define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */
#define TCP_ATO_MIN ((unsigned)(HZ/25))
......
......@@ -196,8 +196,8 @@ static void dccp_delack_timer(struct timer_list *t)
if (inet_csk_ack_scheduled(sk)) {
if (!inet_csk_in_pingpong_mode(sk)) {
/* Delayed ACK missed: inflate ATO. */
icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
icsk->icsk_rto);
icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1,
icsk->icsk_rto);
} else {
/* Delayed ACK missed: leave pingpong mode and
* deflate ATO.
......
......@@ -3756,8 +3756,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_options |= TCPI_OPT_SYN_DATA;
info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
info->tcpi_ato = jiffies_to_usecs(min(icsk->icsk_ack.ato,
tcp_delack_max(sk)));
info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato,
tcp_delack_max(sk)));
info->tcpi_snd_mss = tp->mss_cache;
info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
......
......@@ -778,6 +778,16 @@ void tcp_rcv_space_adjust(struct sock *sk)
tp->rcvq_space.time = tp->tcp_mstamp;
}
static void tcp_save_lrcv_flowlabel(struct sock *sk, const struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_IPV6)
struct inet_connection_sock *icsk = inet_csk(sk);
if (skb->protocol == htons(ETH_P_IPV6))
icsk->icsk_ack.lrcv_flowlabel = ntohl(ip6_flowlabel(ipv6_hdr(skb)));
#endif
}
/* There is something which you must keep in mind when you analyze the
* behavior of the tp->ato delayed ack timeout interval. When a
* connection starts up, we want to ack as quickly as possible. The
......@@ -826,6 +836,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
}
}
icsk->icsk_ack.lrcvtime = now;
tcp_save_lrcv_flowlabel(sk, skb);
tcp_ecn_check_ce(sk, skb);
......@@ -4513,12 +4524,23 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
{
/* When the ACK path fails or drops most ACKs, the sender would
* timeout and spuriously retransmit the same segment repeatedly.
* The receiver remembers and reflects via DSACKs. Leverage the
* DSACK state and change the txhash to re-route speculatively.
* If it seems our ACKs are not reaching the other side,
* based on receiving a duplicate data segment with new flowlabel
* (suggesting the sender suffered an RTO), and we are not already
* repathing due to our own RTO, then rehash the socket to repath our
* packets.
*/
if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq &&
#if IS_ENABLED(CONFIG_IPV6)
if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss &&
skb->protocol == htons(ETH_P_IPV6) &&
(tcp_sk(sk)->inet_conn.icsk_ack.lrcv_flowlabel !=
ntohl(ip6_flowlabel(ipv6_hdr(skb)))) &&
sk_rethink_txhash(sk))
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
/* Save last flowlabel after a spurious retrans. */
tcp_save_lrcv_flowlabel(sk, skb);
#endif
}
static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
......@@ -4835,6 +4857,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
u32 seq, end_seq;
bool fragstolen;
tcp_save_lrcv_flowlabel(sk, skb);
tcp_ecn_check_ce(sk, skb);
if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
......
......@@ -322,7 +322,7 @@ void tcp_delack_timer_handler(struct sock *sk)
if (inet_csk_ack_scheduled(sk)) {
if (!inet_csk_in_pingpong_mode(sk)) {
/* Delayed ACK missed: inflate ATO. */
icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, icsk->icsk_rto);
} else {
/* Delayed ACK missed: leave pingpong mode and
* deflate ATO.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment