Commit eb8329e0 authored by Yuchung Cheng's avatar Yuchung Cheng Committed by David S. Miller

tcp: export data delivery rate

This commit export two new fields in struct tcp_info:

  tcpi_delivery_rate: The most recent goodput, as measured by
    tcp_rate_gen(). If the socket is limited by the sending
    application (e.g., no data to send), it reports the highest
    measurement instead of the most recent. The unit is bytes per
    second (like other rate fields in tcp_info).

  tcpi_delivery_rate_app_limited: A boolean indicating if the goodput
    was measured when the socket's throughput was limited by the
    sending application.

This delivery rate information can be useful for applications that
want to know the current throughput the TCP connection is seeing,
e.g. adaptive bitrate video streaming. It can also be very useful for
debugging or troubleshooting.
Signed-off-by: default avatarVan Jacobson <vanj@google.com>
Signed-off-by: default avatarNeal Cardwell <ncardwell@google.com>
Signed-off-by: default avatarYuchung Cheng <ycheng@google.com>
Signed-off-by: default avatarNandita Dukkipati <nanditad@google.com>
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Signed-off-by: default avatarSoheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent d7722e85
...@@ -213,7 +213,8 @@ struct tcp_sock { ...@@ -213,7 +213,8 @@ struct tcp_sock {
u8 reord; /* reordering detected */ u8 reord; /* reordering detected */
} rack; } rack;
u16 advmss; /* Advertised MSS */ u16 advmss; /* Advertised MSS */
u8 unused; u8 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */
unused:7;
u8 nonagle : 4,/* Disable Nagle algorithm? */ u8 nonagle : 4,/* Disable Nagle algorithm? */
thin_lto : 1,/* Use linear timeouts for thin streams */ thin_lto : 1,/* Use linear timeouts for thin streams */
thin_dupack : 1,/* Fast retransmit on first dupack */ thin_dupack : 1,/* Fast retransmit on first dupack */
...@@ -271,6 +272,8 @@ struct tcp_sock { ...@@ -271,6 +272,8 @@ struct tcp_sock {
u32 app_limited; /* limited until "delivered" reaches this val */ u32 app_limited; /* limited until "delivered" reaches this val */
struct skb_mstamp first_tx_mstamp; /* start of window send phase */ struct skb_mstamp first_tx_mstamp; /* start of window send phase */
struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */
u32 rate_delivered; /* saved rate sample: packets delivered */
u32 rate_interval_us; /* saved rate sample: time elapsed */
u32 rcv_wnd; /* Current receiver window */ u32 rcv_wnd; /* Current receiver window */
u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */
......
...@@ -167,6 +167,7 @@ struct tcp_info { ...@@ -167,6 +167,7 @@ struct tcp_info {
__u8 tcpi_backoff; __u8 tcpi_backoff;
__u8 tcpi_options; __u8 tcpi_options;
__u8 tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4; __u8 tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4;
__u8 tcpi_delivery_rate_app_limited:1;
__u32 tcpi_rto; __u32 tcpi_rto;
__u32 tcpi_ato; __u32 tcpi_ato;
...@@ -211,6 +212,8 @@ struct tcp_info { ...@@ -211,6 +212,8 @@ struct tcp_info {
__u32 tcpi_min_rtt; __u32 tcpi_min_rtt;
__u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */ __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */
__u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */ __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */
__u64 tcpi_delivery_rate;
}; };
/* for TCP_MD5SIG socket option */ /* for TCP_MD5SIG socket option */
......
...@@ -2712,7 +2712,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) ...@@ -2712,7 +2712,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
{ {
const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
const struct inet_connection_sock *icsk = inet_csk(sk); const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp; u32 now = tcp_time_stamp, intv;
unsigned int start; unsigned int start;
int notsent_bytes; int notsent_bytes;
u64 rate64; u64 rate64;
...@@ -2802,6 +2802,15 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) ...@@ -2802,6 +2802,15 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_min_rtt = tcp_min_rtt(tp); info->tcpi_min_rtt = tcp_min_rtt(tp);
info->tcpi_data_segs_in = tp->data_segs_in; info->tcpi_data_segs_in = tp->data_segs_in;
info->tcpi_data_segs_out = tp->data_segs_out; info->tcpi_data_segs_out = tp->data_segs_out;
info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0;
rate = READ_ONCE(tp->rate_delivered);
intv = READ_ONCE(tp->rate_interval_us);
if (rate && intv) {
rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC;
do_div(rate64, intv);
put_unaligned(rate64, &info->tcpi_delivery_rate);
}
} }
EXPORT_SYMBOL_GPL(tcp_get_info); EXPORT_SYMBOL_GPL(tcp_get_info);
......
...@@ -149,12 +149,22 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, ...@@ -149,12 +149,22 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost,
* for connections suffer heavy or prolonged losses. * for connections suffer heavy or prolonged losses.
*/ */
if (unlikely(rs->interval_us < tcp_min_rtt(tp))) { if (unlikely(rs->interval_us < tcp_min_rtt(tp))) {
rs->interval_us = -1;
if (!rs->is_retrans) if (!rs->is_retrans)
pr_debug("tcp rate: %ld %d %u %u %u\n", pr_debug("tcp rate: %ld %d %u %u %u\n",
rs->interval_us, rs->delivered, rs->interval_us, rs->delivered,
inet_csk(sk)->icsk_ca_state, inet_csk(sk)->icsk_ca_state,
tp->rx_opt.sack_ok, tcp_min_rtt(tp)); tp->rx_opt.sack_ok, tcp_min_rtt(tp));
rs->interval_us = -1;
return;
}
/* Record the last non-app-limited or the highest app-limited bw */
if (!rs->is_app_limited ||
((u64)rs->delivered * tp->rate_interval_us >=
(u64)tp->rate_delivered * rs->interval_us)) {
tp->rate_delivered = rs->delivered;
tp->rate_interval_us = rs->interval_us;
tp->rate_app_limited = rs->is_app_limited;
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment