Commit 374297e8 authored by Jakub Kicinski's avatar Jakub Kicinski

Merge branch 'tcp_metrics-series-of-fixes'

Eric Dumazet says:

====================
tcp_metrics: series of fixes

This series contains a fix for addr_same() and various
data-race annotations.

We still have to address races over tm->tcpm_saddr and
tm->tcpm_daddr later.
====================

Link: https://lore.kernel.org/r/20230802131500.1478140-1-edumazet@google.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parents b755c25f ddf251fa
...@@ -40,7 +40,7 @@ struct tcp_fastopen_metrics { ...@@ -40,7 +40,7 @@ struct tcp_fastopen_metrics {
struct tcp_metrics_block { struct tcp_metrics_block {
struct tcp_metrics_block __rcu *tcpm_next; struct tcp_metrics_block __rcu *tcpm_next;
possible_net_t tcpm_net; struct net *tcpm_net;
struct inetpeer_addr tcpm_saddr; struct inetpeer_addr tcpm_saddr;
struct inetpeer_addr tcpm_daddr; struct inetpeer_addr tcpm_daddr;
unsigned long tcpm_stamp; unsigned long tcpm_stamp;
...@@ -51,34 +51,38 @@ struct tcp_metrics_block { ...@@ -51,34 +51,38 @@ struct tcp_metrics_block {
struct rcu_head rcu_head; struct rcu_head rcu_head;
}; };
static inline struct net *tm_net(struct tcp_metrics_block *tm) static inline struct net *tm_net(const struct tcp_metrics_block *tm)
{ {
return read_pnet(&tm->tcpm_net); /* Paired with the WRITE_ONCE() in tcpm_new() */
return READ_ONCE(tm->tcpm_net);
} }
static bool tcp_metric_locked(struct tcp_metrics_block *tm, static bool tcp_metric_locked(struct tcp_metrics_block *tm,
enum tcp_metric_index idx) enum tcp_metric_index idx)
{ {
return tm->tcpm_lock & (1 << idx); /* Paired with WRITE_ONCE() in tcpm_suck_dst() */
return READ_ONCE(tm->tcpm_lock) & (1 << idx);
} }
static u32 tcp_metric_get(struct tcp_metrics_block *tm, static u32 tcp_metric_get(const struct tcp_metrics_block *tm,
enum tcp_metric_index idx) enum tcp_metric_index idx)
{ {
return tm->tcpm_vals[idx]; /* Paired with WRITE_ONCE() in tcp_metric_set() */
return READ_ONCE(tm->tcpm_vals[idx]);
} }
static void tcp_metric_set(struct tcp_metrics_block *tm, static void tcp_metric_set(struct tcp_metrics_block *tm,
enum tcp_metric_index idx, enum tcp_metric_index idx,
u32 val) u32 val)
{ {
tm->tcpm_vals[idx] = val; /* Paired with READ_ONCE() in tcp_metric_get() */
WRITE_ONCE(tm->tcpm_vals[idx], val);
} }
static bool addr_same(const struct inetpeer_addr *a, static bool addr_same(const struct inetpeer_addr *a,
const struct inetpeer_addr *b) const struct inetpeer_addr *b)
{ {
return inetpeer_addr_cmp(a, b) == 0; return (a->family == b->family) && !inetpeer_addr_cmp(a, b);
} }
struct tcpm_hash_bucket { struct tcpm_hash_bucket {
...@@ -89,6 +93,7 @@ static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly; ...@@ -89,6 +93,7 @@ static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly;
static unsigned int tcp_metrics_hash_log __read_mostly; static unsigned int tcp_metrics_hash_log __read_mostly;
static DEFINE_SPINLOCK(tcp_metrics_lock); static DEFINE_SPINLOCK(tcp_metrics_lock);
static DEFINE_SEQLOCK(fastopen_seqlock);
static void tcpm_suck_dst(struct tcp_metrics_block *tm, static void tcpm_suck_dst(struct tcp_metrics_block *tm,
const struct dst_entry *dst, const struct dst_entry *dst,
...@@ -97,7 +102,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, ...@@ -97,7 +102,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
u32 msval; u32 msval;
u32 val; u32 val;
tm->tcpm_stamp = jiffies; WRITE_ONCE(tm->tcpm_stamp, jiffies);
val = 0; val = 0;
if (dst_metric_locked(dst, RTAX_RTT)) if (dst_metric_locked(dst, RTAX_RTT))
...@@ -110,30 +115,42 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, ...@@ -110,30 +115,42 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
val |= 1 << TCP_METRIC_CWND; val |= 1 << TCP_METRIC_CWND;
if (dst_metric_locked(dst, RTAX_REORDERING)) if (dst_metric_locked(dst, RTAX_REORDERING))
val |= 1 << TCP_METRIC_REORDERING; val |= 1 << TCP_METRIC_REORDERING;
tm->tcpm_lock = val; /* Paired with READ_ONCE() in tcp_metric_locked() */
WRITE_ONCE(tm->tcpm_lock, val);
msval = dst_metric_raw(dst, RTAX_RTT); msval = dst_metric_raw(dst, RTAX_RTT);
tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC; tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC);
msval = dst_metric_raw(dst, RTAX_RTTVAR); msval = dst_metric_raw(dst, RTAX_RTTVAR);
tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC; tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC);
tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH); tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND); dst_metric_raw(dst, RTAX_SSTHRESH));
tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING); tcp_metric_set(tm, TCP_METRIC_CWND,
dst_metric_raw(dst, RTAX_CWND));
tcp_metric_set(tm, TCP_METRIC_REORDERING,
dst_metric_raw(dst, RTAX_REORDERING));
if (fastopen_clear) { if (fastopen_clear) {
write_seqlock(&fastopen_seqlock);
tm->tcpm_fastopen.mss = 0; tm->tcpm_fastopen.mss = 0;
tm->tcpm_fastopen.syn_loss = 0; tm->tcpm_fastopen.syn_loss = 0;
tm->tcpm_fastopen.try_exp = 0; tm->tcpm_fastopen.try_exp = 0;
tm->tcpm_fastopen.cookie.exp = false; tm->tcpm_fastopen.cookie.exp = false;
tm->tcpm_fastopen.cookie.len = 0; tm->tcpm_fastopen.cookie.len = 0;
write_sequnlock(&fastopen_seqlock);
} }
} }
#define TCP_METRICS_TIMEOUT (60 * 60 * HZ) #define TCP_METRICS_TIMEOUT (60 * 60 * HZ)
static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) static void tcpm_check_stamp(struct tcp_metrics_block *tm,
const struct dst_entry *dst)
{ {
if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) unsigned long limit;
if (!tm)
return;
limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT;
if (unlikely(time_after(jiffies, limit)))
tcpm_suck_dst(tm, dst, false); tcpm_suck_dst(tm, dst, false);
} }
...@@ -174,20 +191,23 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, ...@@ -174,20 +191,23 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
oldest = deref_locked(tcp_metrics_hash[hash].chain); oldest = deref_locked(tcp_metrics_hash[hash].chain);
for (tm = deref_locked(oldest->tcpm_next); tm; for (tm = deref_locked(oldest->tcpm_next); tm;
tm = deref_locked(tm->tcpm_next)) { tm = deref_locked(tm->tcpm_next)) {
if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp)) if (time_before(READ_ONCE(tm->tcpm_stamp),
READ_ONCE(oldest->tcpm_stamp)))
oldest = tm; oldest = tm;
} }
tm = oldest; tm = oldest;
} else { } else {
tm = kmalloc(sizeof(*tm), GFP_ATOMIC); tm = kzalloc(sizeof(*tm), GFP_ATOMIC);
if (!tm) if (!tm)
goto out_unlock; goto out_unlock;
} }
write_pnet(&tm->tcpm_net, net); /* Paired with the READ_ONCE() in tm_net() */
WRITE_ONCE(tm->tcpm_net, net);
tm->tcpm_saddr = *saddr; tm->tcpm_saddr = *saddr;
tm->tcpm_daddr = *daddr; tm->tcpm_daddr = *daddr;
tcpm_suck_dst(tm, dst, true); tcpm_suck_dst(tm, dst, reclaim);
if (likely(!reclaim)) { if (likely(!reclaim)) {
tm->tcpm_next = tcp_metrics_hash[hash].chain; tm->tcpm_next = tcp_metrics_hash[hash].chain;
...@@ -434,7 +454,7 @@ void tcp_update_metrics(struct sock *sk) ...@@ -434,7 +454,7 @@ void tcp_update_metrics(struct sock *sk)
tp->reordering); tp->reordering);
} }
} }
tm->tcpm_stamp = jiffies; WRITE_ONCE(tm->tcpm_stamp, jiffies);
out_unlock: out_unlock:
rcu_read_unlock(); rcu_read_unlock();
} }
...@@ -539,8 +559,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst) ...@@ -539,8 +559,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
return ret; return ret;
} }
static DEFINE_SEQLOCK(fastopen_seqlock);
void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie) struct tcp_fastopen_cookie *cookie)
{ {
...@@ -647,7 +665,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, ...@@ -647,7 +665,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
} }
if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE, if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE,
jiffies - tm->tcpm_stamp, jiffies - READ_ONCE(tm->tcpm_stamp),
TCP_METRICS_ATTR_PAD) < 0) TCP_METRICS_ATTR_PAD) < 0)
goto nla_put_failure; goto nla_put_failure;
...@@ -658,7 +676,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg, ...@@ -658,7 +676,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
if (!nest) if (!nest)
goto nla_put_failure; goto nla_put_failure;
for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) { for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
u32 val = tm->tcpm_vals[i]; u32 val = tcp_metric_get(tm, i);
if (!val) if (!val)
continue; continue;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment