Commit 7d5424b2 authored by David S. Miller's avatar David S. Miller

Merge branch 'net-sysctl-races'

Kuniyuki Iwashima says:

====================
sysctl: Fix data-races around ipv4_net_table (Roun).

This series fixes data-races around the first 13 knobs and
nexthop_compat_mode in ipv4_net_table.

I will post another patch for three early_demux knobs later,
so the next round will start from ip_default_ttl.
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 22b9c41a bdf00bf2
...@@ -1179,7 +1179,7 @@ ip_autobind_reuse - BOOLEAN ...@@ -1179,7 +1179,7 @@ ip_autobind_reuse - BOOLEAN
option should only be set by experts. option should only be set by experts.
Default: 0 Default: 0
ip_dynaddr - BOOLEAN ip_dynaddr - INTEGER
If set non-zero, enables support for dynamic addresses. If set non-zero, enables support for dynamic addresses.
If set to a non-zero value larger than 1, a kernel log If set to a non-zero value larger than 1, a kernel log
message will be printed when dynamic address rewriting message will be printed when dynamic address rewriting
......
...@@ -1392,7 +1392,7 @@ static void chtls_pass_accept_request(struct sock *sk, ...@@ -1392,7 +1392,7 @@ static void chtls_pass_accept_request(struct sock *sk,
th_ecn = tcph->ece && tcph->cwr; th_ecn = tcph->ece && tcph->cwr;
if (th_ecn) { if (th_ecn) {
ect = !INET_ECN_is_not_ect(ip_dsfield); ect = !INET_ECN_is_not_ect(ip_dsfield);
ecn_ok = sock_net(sk)->ipv4.sysctl_tcp_ecn; ecn_ok = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn);
if ((!ect && ecn_ok) || tcp_ca_needs_ecn(sk)) if ((!ect && ecn_ok) || tcp_ca_needs_ecn(sk))
inet_rsk(oreq)->ecn_ok = 1; inet_rsk(oreq)->ecn_ok = 1;
} }
......
...@@ -75,7 +75,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, ...@@ -75,7 +75,7 @@ static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if,
int dif, int sdif) int dif, int sdif)
{ {
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept, return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept),
bound_dev_if, dif, sdif); bound_dev_if, dif, sdif);
#else #else
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
......
...@@ -1007,13 +1007,13 @@ int proc_dou8vec_minmax(struct ctl_table *table, int write, ...@@ -1007,13 +1007,13 @@ int proc_dou8vec_minmax(struct ctl_table *table, int write,
tmp.maxlen = sizeof(val); tmp.maxlen = sizeof(val);
tmp.data = &val; tmp.data = &val;
val = *data; val = READ_ONCE(*data);
res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos, res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
do_proc_douintvec_minmax_conv, &param); do_proc_douintvec_minmax_conv, &param);
if (res) if (res)
return res; return res;
if (write) if (write)
*data = val; WRITE_ONCE(*data, val);
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(proc_dou8vec_minmax); EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
...@@ -1224,9 +1224,9 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp, ...@@ -1224,9 +1224,9 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
if (jif > INT_MAX) if (jif > INT_MAX)
return 1; return 1;
*valp = (int)jif; WRITE_ONCE(*valp, (int)jif);
} else { } else {
int val = *valp; int val = READ_ONCE(*valp);
unsigned long lval; unsigned long lval;
if (val < 0) { if (val < 0) {
*negp = true; *negp = true;
...@@ -1294,8 +1294,8 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, ...@@ -1294,8 +1294,8 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
* @ppos: the current position in the file * @ppos: the current position in the file
* *
* Reads/writes up to table->maxlen/sizeof(unsigned int) integer * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
* values from/to the user buffer, treated as an ASCII string. * values from/to the user buffer, treated as an ASCII string.
* The values read are assumed to be in 1/1000 seconds, and * The values read are assumed to be in 1/1000 seconds, and
* are converted into jiffies. * are converted into jiffies.
* *
* Returns 0 on success. * Returns 0 on success.
......
...@@ -1246,7 +1246,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) ...@@ -1246,7 +1246,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
if (new_saddr == old_saddr) if (new_saddr == old_saddr)
return 0; return 0;
if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) { if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) {
pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n", pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
__func__, &old_saddr, &new_saddr); __func__, &old_saddr, &new_saddr);
} }
...@@ -1301,7 +1301,7 @@ int inet_sk_rebuild_header(struct sock *sk) ...@@ -1301,7 +1301,7 @@ int inet_sk_rebuild_header(struct sock *sk)
* Other protocols have to map its equivalent state to TCP_SYN_SENT. * Other protocols have to map its equivalent state to TCP_SYN_SENT.
* DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
*/ */
if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr || if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) ||
sk->sk_state != TCP_SYN_SENT || sk->sk_state != TCP_SYN_SENT ||
(sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
(err = inet_sk_reselect_saddr(sk)) != 0) (err = inet_sk_reselect_saddr(sk)) != 0)
......
...@@ -1811,7 +1811,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, ...@@ -1811,7 +1811,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
goto nla_put_failure; goto nla_put_failure;
if (nexthop_is_blackhole(fi->nh)) if (nexthop_is_blackhole(fi->nh))
rtm->rtm_type = RTN_BLACKHOLE; rtm->rtm_type = RTN_BLACKHOLE;
if (!fi->fib_net->ipv4.sysctl_nexthop_compat_mode) if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode))
goto offload; goto offload;
} }
......
...@@ -282,7 +282,7 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code) ...@@ -282,7 +282,7 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code)
return true; return true;
/* Limit if icmp type is enabled in ratemask. */ /* Limit if icmp type is enabled in ratemask. */
if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask)) if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask)))
return true; return true;
return false; return false;
...@@ -320,7 +320,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, ...@@ -320,7 +320,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
vif = l3mdev_master_ifindex(dst->dev); vif = l3mdev_master_ifindex(dst->dev);
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1); peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit); rc = inet_peer_xrlim_allow(peer,
READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
if (peer) if (peer)
inet_putpeer(peer); inet_putpeer(peer);
out: out:
...@@ -693,7 +694,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, ...@@ -693,7 +694,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
rcu_read_lock(); rcu_read_lock();
if (rt_is_input_route(rt) && if (rt_is_input_route(rt) &&
net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr) READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr))
dev = dev_get_by_index_rcu(net, inet_iif(skb_in)); dev = dev_get_by_index_rcu(net, inet_iif(skb_in));
if (dev) if (dev)
...@@ -933,7 +934,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb) ...@@ -933,7 +934,7 @@ static enum skb_drop_reason icmp_unreach(struct sk_buff *skb)
* get the other vendor to fix their kit. * get the other vendor to fix their kit.
*/ */
if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses && if (!READ_ONCE(net->ipv4.sysctl_icmp_ignore_bogus_error_responses) &&
inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) { inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) {
net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n", net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n",
&ip_hdr(skb)->saddr, &ip_hdr(skb)->saddr,
...@@ -993,7 +994,7 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb) ...@@ -993,7 +994,7 @@ static enum skb_drop_reason icmp_echo(struct sk_buff *skb)
net = dev_net(skb_dst(skb)->dev); net = dev_net(skb_dst(skb)->dev);
/* should there be an ICMP stat for ignored echos? */ /* should there be an ICMP stat for ignored echos? */
if (net->ipv4.sysctl_icmp_echo_ignore_all) if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all))
return SKB_NOT_DROPPED_YET; return SKB_NOT_DROPPED_YET;
icmp_param.data.icmph = *icmp_hdr(skb); icmp_param.data.icmph = *icmp_hdr(skb);
...@@ -1028,7 +1029,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) ...@@ -1028,7 +1029,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr)
u16 ident_len; u16 ident_len;
u8 status; u8 status;
if (!net->ipv4.sysctl_icmp_echo_enable_probe) if (!READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
return false; return false;
/* We currently only support probing interfaces on the proxy node /* We currently only support probing interfaces on the proxy node
...@@ -1249,7 +1250,7 @@ int icmp_rcv(struct sk_buff *skb) ...@@ -1249,7 +1250,7 @@ int icmp_rcv(struct sk_buff *skb)
*/ */
if ((icmph->type == ICMP_ECHO || if ((icmph->type == ICMP_ECHO ||
icmph->type == ICMP_TIMESTAMP) && icmph->type == ICMP_TIMESTAMP) &&
net->ipv4.sysctl_icmp_echo_ignore_broadcasts) { READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_broadcasts)) {
reason = SKB_DROP_REASON_INVALID_PROTO; reason = SKB_DROP_REASON_INVALID_PROTO;
goto error; goto error;
} }
......
...@@ -156,7 +156,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, ...@@ -156,7 +156,8 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
{ {
struct inet_timewait_sock *tw; struct inet_timewait_sock *tw;
if (refcount_read(&dr->tw_refcount) - 1 >= dr->sysctl_max_tw_buckets) if (refcount_read(&dr->tw_refcount) - 1 >=
READ_ONCE(dr->sysctl_max_tw_buckets))
return NULL; return NULL;
tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
......
...@@ -1858,7 +1858,7 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) ...@@ -1858,7 +1858,7 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
/* __ip6_del_rt does a release, so do a hold here */ /* __ip6_del_rt does a release, so do a hold here */
fib6_info_hold(f6i); fib6_info_hold(f6i);
ipv6_stub->ip6_del_rt(net, f6i, ipv6_stub->ip6_del_rt(net, f6i,
!net->ipv4.sysctl_nexthop_compat_mode); !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode));
} }
} }
...@@ -2361,7 +2361,8 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh, ...@@ -2361,7 +2361,8 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh,
if (!rc) { if (!rc) {
nh_base_seq_inc(net); nh_base_seq_inc(net);
nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo); nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode) if (replace_notify &&
READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode))
nexthop_replace_notify(net, new_nh, &cfg->nlinfo); nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
} }
......
...@@ -273,7 +273,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt, ...@@ -273,7 +273,7 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
if (!ecn_ok) if (!ecn_ok)
return false; return false;
if (net->ipv4.sysctl_tcp_ecn) if (READ_ONCE(net->ipv4.sysctl_tcp_ecn))
return true; return true;
return dst_feature(dst, RTAX_FEATURE_ECN); return dst_feature(dst, RTAX_FEATURE_ECN);
......
...@@ -599,6 +599,8 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -599,6 +599,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8), .maxlen = sizeof(u8),
.mode = 0644, .mode = 0644,
.proc_handler = proc_dou8vec_minmax, .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE
}, },
{ {
.procname = "icmp_echo_enable_probe", .procname = "icmp_echo_enable_probe",
...@@ -615,6 +617,8 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -615,6 +617,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8), .maxlen = sizeof(u8),
.mode = 0644, .mode = 0644,
.proc_handler = proc_dou8vec_minmax, .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE
}, },
{ {
.procname = "icmp_ignore_bogus_error_responses", .procname = "icmp_ignore_bogus_error_responses",
...@@ -622,6 +626,8 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -622,6 +626,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8), .maxlen = sizeof(u8),
.mode = 0644, .mode = 0644,
.proc_handler = proc_dou8vec_minmax, .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE
}, },
{ {
.procname = "icmp_errors_use_inbound_ifaddr", .procname = "icmp_errors_use_inbound_ifaddr",
...@@ -629,6 +635,8 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -629,6 +635,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8), .maxlen = sizeof(u8),
.mode = 0644, .mode = 0644,
.proc_handler = proc_dou8vec_minmax, .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE
}, },
{ {
.procname = "icmp_ratelimit", .procname = "icmp_ratelimit",
...@@ -668,6 +676,8 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -668,6 +676,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8), .maxlen = sizeof(u8),
.mode = 0644, .mode = 0644,
.proc_handler = proc_dou8vec_minmax, .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO,
}, },
{ {
.procname = "tcp_ecn_fallback", .procname = "tcp_ecn_fallback",
...@@ -675,6 +685,8 @@ static struct ctl_table ipv4_net_table[] = { ...@@ -675,6 +685,8 @@ static struct ctl_table ipv4_net_table[] = {
.maxlen = sizeof(u8), .maxlen = sizeof(u8),
.mode = 0644, .mode = 0644,
.proc_handler = proc_dou8vec_minmax, .proc_handler = proc_dou8vec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
}, },
{ {
.procname = "ip_dynaddr", .procname = "ip_dynaddr",
......
...@@ -6729,7 +6729,7 @@ static void tcp_ecn_create_request(struct request_sock *req, ...@@ -6729,7 +6729,7 @@ static void tcp_ecn_create_request(struct request_sock *req,
ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield); ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK); ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst; ecn_ok = READ_ONCE(net->ipv4.sysctl_tcp_ecn) || ecn_ok_dst;
if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) || if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
(ecn_ok_dst & DST_FEATURE_ECN_CA) || (ecn_ok_dst & DST_FEATURE_ECN_CA) ||
......
...@@ -324,7 +324,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) ...@@ -324,7 +324,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
{ {
struct tcp_sock *tp = tcp_sk(sk); struct tcp_sock *tp = tcp_sk(sk);
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk); bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
tcp_ca_needs_ecn(sk) || bpf_needs_ecn; tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
if (!use_ecn) { if (!use_ecn) {
...@@ -346,7 +346,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) ...@@ -346,7 +346,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
{ {
if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback) if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback))
/* tp->ecn_flags are cleared at a later point in time when /* tp->ecn_flags are cleared at a later point in time when
* SYN ACK is ultimatively being received. * SYN ACK is ultimatively being received.
*/ */
......
...@@ -925,7 +925,7 @@ static int icmpv6_rcv(struct sk_buff *skb) ...@@ -925,7 +925,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
break; break;
case ICMPV6_EXT_ECHO_REQUEST: case ICMPV6_EXT_ECHO_REQUEST:
if (!net->ipv6.sysctl.icmpv6_echo_ignore_all && if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
net->ipv4.sysctl_icmp_echo_enable_probe) READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
icmpv6_echo_reply(skb); icmpv6_echo_reply(skb);
break; break;
......
...@@ -5741,7 +5741,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, ...@@ -5741,7 +5741,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
if (nexthop_is_blackhole(rt->nh)) if (nexthop_is_blackhole(rt->nh))
rtm->rtm_type = RTN_BLACKHOLE; rtm->rtm_type = RTN_BLACKHOLE;
if (net->ipv4.sysctl_nexthop_compat_mode && if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) &&
rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0) rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
goto nla_put_failure; goto nla_put_failure;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment