Commit 9cb7c013 authored by Eric Dumazet's avatar Eric Dumazet Committed by Jakub Kicinski

ipv6: make ip6_rt_gc_expire an atomic_t

Reads and Writes to ip6_rt_gc_expire always have been racy,
as syzbot reported lately [1]

There is a possible risk of under-flow, leading
to unexpected high value passed to fib6_run_gc(),
although I have not observed this in the field.

Hosts hitting ip6_dst_gc() very hard are under pretty bad
state anyway.

[1]
BUG: KCSAN: data-race in ip6_dst_gc / ip6_dst_gc

read-write to 0xffff888102110744 of 4 bytes by task 13165 on cpu 1:
 ip6_dst_gc+0x1f3/0x220 net/ipv6/route.c:3311
 dst_alloc+0x9b/0x160 net/core/dst.c:86
 ip6_dst_alloc net/ipv6/route.c:344 [inline]
 icmp6_dst_alloc+0xb2/0x360 net/ipv6/route.c:3261
 mld_sendpack+0x2b9/0x580 net/ipv6/mcast.c:1807
 mld_send_cr net/ipv6/mcast.c:2119 [inline]
 mld_ifc_work+0x576/0x800 net/ipv6/mcast.c:2651
 process_one_work+0x3d3/0x720 kernel/workqueue.c:2289
 worker_thread+0x618/0xa70 kernel/workqueue.c:2436
 kthread+0x1a9/0x1e0 kernel/kthread.c:376
 ret_from_fork+0x1f/0x30

read-write to 0xffff888102110744 of 4 bytes by task 11607 on cpu 0:
 ip6_dst_gc+0x1f3/0x220 net/ipv6/route.c:3311
 dst_alloc+0x9b/0x160 net/core/dst.c:86
 ip6_dst_alloc net/ipv6/route.c:344 [inline]
 icmp6_dst_alloc+0xb2/0x360 net/ipv6/route.c:3261
 mld_sendpack+0x2b9/0x580 net/ipv6/mcast.c:1807
 mld_send_cr net/ipv6/mcast.c:2119 [inline]
 mld_ifc_work+0x576/0x800 net/ipv6/mcast.c:2651
 process_one_work+0x3d3/0x720 kernel/workqueue.c:2289
 worker_thread+0x618/0xa70 kernel/workqueue.c:2436
 kthread+0x1a9/0x1e0 kernel/kthread.c:376
 ret_from_fork+0x1f/0x30

value changed: 0x00000bb3 -> 0x00000ba9

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 11607 Comm: kworker/0:21 Not tainted 5.18.0-rc1-syzkaller-00037-g42e7a03d-dirty #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: mld mld_ifc_work

Fixes: 1da177e4 ("Linux-2.6.12-rc2")
Signed-off-by: default avatarEric Dumazet <edumazet@google.com>
Reported-by: default avatarsyzbot <syzkaller@googlegroups.com>
Reviewed-by: default avatarDavid Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20220413181333.649424-1-eric.dumazet@gmail.comSigned-off-by: default avatarJakub Kicinski <kuba@kernel.org>
parent 268b41b3
...@@ -75,8 +75,8 @@ struct netns_ipv6 { ...@@ -75,8 +75,8 @@ struct netns_ipv6 {
struct list_head fib6_walkers; struct list_head fib6_walkers;
rwlock_t fib6_walker_lock; rwlock_t fib6_walker_lock;
spinlock_t fib6_gc_lock; spinlock_t fib6_gc_lock;
unsigned int ip6_rt_gc_expire; atomic_t ip6_rt_gc_expire;
unsigned long ip6_rt_last_gc; unsigned long ip6_rt_last_gc;
unsigned char flowlabel_has_excl; unsigned char flowlabel_has_excl;
#ifdef CONFIG_IPV6_MULTIPLE_TABLES #ifdef CONFIG_IPV6_MULTIPLE_TABLES
bool fib6_has_custom_rules; bool fib6_has_custom_rules;
......
...@@ -3292,6 +3292,7 @@ static int ip6_dst_gc(struct dst_ops *ops) ...@@ -3292,6 +3292,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
unsigned int val;
int entries; int entries;
entries = dst_entries_get_fast(ops); entries = dst_entries_get_fast(ops);
...@@ -3302,13 +3303,13 @@ static int ip6_dst_gc(struct dst_ops *ops) ...@@ -3302,13 +3303,13 @@ static int ip6_dst_gc(struct dst_ops *ops)
entries <= rt_max_size) entries <= rt_max_size)
goto out; goto out;
net->ipv6.ip6_rt_gc_expire++; fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
entries = dst_entries_get_slow(ops); entries = dst_entries_get_slow(ops);
if (entries < ops->gc_thresh) if (entries < ops->gc_thresh)
net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; atomic_set(&net->ipv6.ip6_rt_gc_expire, rt_gc_timeout >> 1);
out: out:
net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
return entries > rt_max_size; return entries > rt_max_size;
} }
...@@ -6509,7 +6510,7 @@ static int __net_init ip6_route_net_init(struct net *net) ...@@ -6509,7 +6510,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
net->ipv6.sysctl.skip_notify_on_dev_down = 0; net->ipv6.sysctl.skip_notify_on_dev_down = 0;
net->ipv6.ip6_rt_gc_expire = 30*HZ; atomic_set(&net->ipv6.ip6_rt_gc_expire, 30*HZ);
ret = 0; ret = 0;
out: out:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment