Commit b20a7ca8 authored by David S. Miller's avatar David S. Miller

Merge branch 'sysctl-races-part-5'

Kuniyuki Iwashima says:

====================
sysctl: Fix data-races around ipv4_net_table (Round 5).

This series fixes data-races around 15 knobs after tcp_dsack in
ipv4_net_table.

tcp_tso_win_divisor was skipped because it already uses READ_ONCE().

So, the final round for ipv4_net_table will start with tcp_pacing_ss_ratio.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents ebbbe23f 2afdbe7b
...@@ -1419,7 +1419,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, ...@@ -1419,7 +1419,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space,
static inline int tcp_win_from_space(const struct sock *sk, int space) static inline int tcp_win_from_space(const struct sock *sk, int space)
{ {
int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);
return tcp_adv_win_scale <= 0 ? return tcp_adv_win_scale <= 0 ?
(space>>(-tcp_adv_win_scale)) : (space>>(-tcp_adv_win_scale)) :
......
...@@ -686,7 +686,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, ...@@ -686,7 +686,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
int size_goal) int size_goal)
{ {
return skb->len < size_goal && return skb->len < size_goal &&
sock_net(sk)->ipv4.sysctl_tcp_autocorking && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&
!tcp_rtx_queue_empty(sk) && !tcp_rtx_queue_empty(sk) &&
refcount_read(&sk->sk_wmem_alloc) > skb->truesize && refcount_read(&sk->sk_wmem_alloc) > skb->truesize &&
tcp_skb_can_collapse_to(skb); tcp_skb_can_collapse_to(skb);
......
...@@ -534,7 +534,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb, ...@@ -534,7 +534,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb,
*/ */
static void tcp_init_buffer_space(struct sock *sk) static void tcp_init_buffer_space(struct sock *sk)
{ {
int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win; int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
int maxwin; int maxwin;
...@@ -724,7 +724,7 @@ void tcp_rcv_space_adjust(struct sock *sk) ...@@ -724,7 +724,7 @@ void tcp_rcv_space_adjust(struct sock *sk)
* <prev RTT . ><current RTT .. ><next RTT .... > * <prev RTT . ><current RTT .. ><next RTT .... >
*/ */
if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvmem, rcvbuf; int rcvmem, rcvbuf;
u64 rcvwin, grow; u64 rcvwin, grow;
...@@ -2175,7 +2175,7 @@ void tcp_enter_loss(struct sock *sk) ...@@ -2175,7 +2175,7 @@ void tcp_enter_loss(struct sock *sk)
* loss recovery is underway except recurring timeout(s) on * loss recovery is underway except recurring timeout(s) on
* the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
*/ */
tp->frto = net->ipv4.sysctl_tcp_frto && tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
(new_recovery || icsk->icsk_retransmits) && (new_recovery || icsk->icsk_retransmits) &&
!inet_csk(sk)->icsk_mtup.probe_size; !inet_csk(sk)->icsk_mtup.probe_size;
} }
...@@ -3058,7 +3058,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, ...@@ -3058,7 +3058,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag) static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
{ {
u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) { if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
...@@ -3581,7 +3581,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, ...@@ -3581,7 +3581,8 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
if (*last_oow_ack_time) { if (*last_oow_ack_time) {
s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) { if (0 <= elapsed &&
elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
NET_INC_STATS(net, mib_idx); NET_INC_STATS(net, mib_idx);
return true; /* rate-limited: don't send yet! */ return true; /* rate-limited: don't send yet! */
} }
...@@ -3629,7 +3630,7 @@ static void tcp_send_challenge_ack(struct sock *sk) ...@@ -3629,7 +3630,7 @@ static void tcp_send_challenge_ack(struct sock *sk)
/* Then check host-wide RFC 5961 rate limit. */ /* Then check host-wide RFC 5961 rate limit. */
now = jiffies / HZ; now = jiffies / HZ;
if (now != challenge_timestamp) { if (now != challenge_timestamp) {
u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit; u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);
u32 half = (ack_limit + 1) >> 1; u32 half = (ack_limit + 1) >> 1;
challenge_timestamp = now; challenge_timestamp = now;
...@@ -4426,7 +4427,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) ...@@ -4426,7 +4427,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
int mib_idx; int mib_idx;
if (before(seq, tp->rcv_nxt)) if (before(seq, tp->rcv_nxt))
...@@ -4473,7 +4474,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) ...@@ -4473,7 +4474,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq; u32 end_seq = TCP_SKB_CB(skb)->end_seq;
tcp_rcv_spurious_retrans(sk, skb); tcp_rcv_spurious_retrans(sk, skb);
......
...@@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk) ...@@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk)
int m; int m;
sk_dst_confirm(sk); sk_dst_confirm(sk);
if (net->ipv4.sysctl_tcp_nometrics_save || !dst) if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst)
return; return;
rcu_read_lock(); rcu_read_lock();
...@@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk) ...@@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk)
if (tcp_in_initial_slowstart(tp)) { if (tcp_in_initial_slowstart(tp)) {
/* Slow start still did not finish. */ /* Slow start still did not finish. */
if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && (tcp_snd_cwnd(tp) >> 1) > val) if (val && (tcp_snd_cwnd(tp) >> 1) > val)
...@@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk) ...@@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk)
} else if (!tcp_in_slow_start(tp) && } else if (!tcp_in_slow_start(tp) &&
icsk->icsk_ca_state == TCP_CA_Open) { icsk->icsk_ca_state == TCP_CA_Open) {
/* Cong. avoidance phase, cwnd is reliable. */ /* Cong. avoidance phase, cwnd is reliable. */
if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
tcp_metric_set(tm, TCP_METRIC_SSTHRESH, tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh)); max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh));
...@@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk) ...@@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk)
tcp_metric_set(tm, TCP_METRIC_CWND, tcp_metric_set(tm, TCP_METRIC_CWND,
(val + tp->snd_ssthresh) >> 1); (val + tp->snd_ssthresh) >> 1);
} }
if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && tp->snd_ssthresh > val) if (val && tp->snd_ssthresh > val)
...@@ -463,7 +463,7 @@ void tcp_init_metrics(struct sock *sk) ...@@ -463,7 +463,7 @@ void tcp_init_metrics(struct sock *sk)
if (tcp_metric_locked(tm, TCP_METRIC_CWND)) if (tcp_metric_locked(tm, TCP_METRIC_CWND))
tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND); tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ? val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ?
0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH); 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val) { if (val) {
tp->snd_ssthresh = val; tp->snd_ssthresh = val;
......
...@@ -230,7 +230,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, ...@@ -230,7 +230,7 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
* which we interpret as a sign the remote TCP is not * which we interpret as a sign the remote TCP is not
* misinterpreting the window field as a signed quantity. * misinterpreting the window field as a signed quantity.
*/ */
if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows) if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows))
(*rcv_wnd) = min(space, MAX_TCP_WINDOW); (*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else else
(*rcv_wnd) = min_t(u32, space, U16_MAX); (*rcv_wnd) = min_t(u32, space, U16_MAX);
...@@ -285,7 +285,7 @@ static u16 tcp_select_window(struct sock *sk) ...@@ -285,7 +285,7 @@ static u16 tcp_select_window(struct sock *sk)
* scaled window. * scaled window.
*/ */
if (!tp->rx_opt.rcv_wscale && if (!tp->rx_opt.rcv_wscale &&
sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows) READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows))
new_win = min(new_win, MAX_TCP_WINDOW); new_win = min(new_win, MAX_TCP_WINDOW);
else else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
...@@ -1976,7 +1976,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, ...@@ -1976,7 +1976,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift); bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift);
r = tcp_min_rtt(tcp_sk(sk)) >> sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log; r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log);
if (r < BITS_PER_TYPE(sk->sk_gso_max_size)) if (r < BITS_PER_TYPE(sk->sk_gso_max_size))
bytes += sk->sk_gso_max_size >> r; bytes += sk->sk_gso_max_size >> r;
...@@ -1995,7 +1995,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) ...@@ -1995,7 +1995,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
min_tso = ca_ops->min_tso_segs ? min_tso = ca_ops->min_tso_segs ?
ca_ops->min_tso_segs(sk) : ca_ops->min_tso_segs(sk) :
sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
return min_t(u32, tso_segs, sk->sk_gso_max_segs); return min_t(u32, tso_segs, sk->sk_gso_max_segs);
...@@ -2507,7 +2507,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, ...@@ -2507,7 +2507,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift)); sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
if (sk->sk_pacing_status == SK_PACING_NONE) if (sk->sk_pacing_status == SK_PACING_NONE)
limit = min_t(unsigned long, limit, limit = min_t(unsigned long, limit,
sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));
limit <<= factor; limit <<= factor;
if (static_branch_unlikely(&tcp_tx_delay_enabled) && if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
......
...@@ -1271,7 +1271,7 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th) ...@@ -1271,7 +1271,7 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th)
if (unlikely(th->syn)) if (unlikely(th->syn))
new_win = min(new_win, 65535U) << tp->rx_opt.rcv_wscale; new_win = min(new_win, 65535U) << tp->rx_opt.rcv_wscale;
if (!tp->rx_opt.rcv_wscale && if (!tp->rx_opt.rcv_wscale &&
sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows) READ_ONCE(sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows))
new_win = min(new_win, MAX_TCP_WINDOW); new_win = min(new_win, MAX_TCP_WINDOW);
else else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
......
...@@ -1908,7 +1908,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) ...@@ -1908,7 +1908,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (msk->rcvq_space.copied <= msk->rcvq_space.space) if (msk->rcvq_space.copied <= msk->rcvq_space.space)
goto new_measure; goto new_measure;
if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvmem, rcvbuf; int rcvmem, rcvbuf;
u64 rcvwin, grow; u64 rcvwin, grow;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment