Commit 1f979b11 authored by David S. Miller's avatar David S. Miller

Merge branch 'ipv6_percpu_rt_deadlock'

Martin KaFai Lau says:

====================
ipv6: Fix a potential deadlock when creating pcpu rt

v1 -> v2:
A minor change in the commit message of patch 2.

This patch series fixes a potential deadlock when creating a pcpu rt.
It happens when dst_alloc() decided to run gc. Something like this:

read_lock(&table->tb6_lock);
ip6_rt_pcpu_alloc()
=> dst_alloc()
=> ip6_dst_gc()
=> write_lock(&table->tb6_lock); /* oops */

Patch 1 and 2 are some prep works.
Patch 3 is the fix.

Original report: https://bugzilla.kernel.org/show_bug.cgi?id=102291
====================
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parents 776829de 9c7370a1
...@@ -172,6 +172,8 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) ...@@ -172,6 +172,8 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
*ppcpu_rt = NULL; *ppcpu_rt = NULL;
} }
} }
non_pcpu_rt->rt6i_pcpu = NULL;
} }
static void rt6_release(struct rt6_info *rt) static void rt6_release(struct rt6_info *rt)
......
...@@ -318,8 +318,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = { ...@@ -318,8 +318,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
/* allocate dst with ip6_dst_ops */ /* allocate dst with ip6_dst_ops */
static struct rt6_info *__ip6_dst_alloc(struct net *net, static struct rt6_info *__ip6_dst_alloc(struct net *net,
struct net_device *dev, struct net_device *dev,
int flags, int flags)
struct fib6_table *table)
{ {
struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
0, DST_OBSOLETE_FORCE_CHK, flags); 0, DST_OBSOLETE_FORCE_CHK, flags);
...@@ -336,10 +335,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net, ...@@ -336,10 +335,9 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net,
static struct rt6_info *ip6_dst_alloc(struct net *net, static struct rt6_info *ip6_dst_alloc(struct net *net,
struct net_device *dev, struct net_device *dev,
int flags, int flags)
struct fib6_table *table)
{ {
struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table); struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags);
if (rt) { if (rt) {
rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
...@@ -950,8 +948,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, ...@@ -950,8 +948,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
ort = (struct rt6_info *)ort->dst.from; ort = (struct rt6_info *)ort->dst.from;
rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0);
0, ort->rt6i_table);
if (!rt) if (!rt)
return NULL; return NULL;
...@@ -983,8 +980,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) ...@@ -983,8 +980,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
struct rt6_info *pcpu_rt; struct rt6_info *pcpu_rt;
pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev), pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
rt->dst.dev, rt->dst.flags, rt->dst.dev, rt->dst.flags);
rt->rt6i_table);
if (!pcpu_rt) if (!pcpu_rt)
return NULL; return NULL;
...@@ -997,32 +993,53 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) ...@@ -997,32 +993,53 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
/* It should be called with read_lock_bh(&tb6_lock) acquired */ /* It should be called with read_lock_bh(&tb6_lock) acquired */
static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
{ {
struct rt6_info *pcpu_rt, *prev, **p; struct rt6_info *pcpu_rt, **p;
p = this_cpu_ptr(rt->rt6i_pcpu); p = this_cpu_ptr(rt->rt6i_pcpu);
pcpu_rt = *p; pcpu_rt = *p;
if (pcpu_rt) if (pcpu_rt) {
goto done; dst_hold(&pcpu_rt->dst);
rt6_dst_from_metrics_check(pcpu_rt);
}
return pcpu_rt;
}
static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt)
{
struct fib6_table *table = rt->rt6i_table;
struct rt6_info *pcpu_rt, *prev, **p;
pcpu_rt = ip6_rt_pcpu_alloc(rt); pcpu_rt = ip6_rt_pcpu_alloc(rt);
if (!pcpu_rt) { if (!pcpu_rt) {
struct net *net = dev_net(rt->dst.dev); struct net *net = dev_net(rt->dst.dev);
pcpu_rt = net->ipv6.ip6_null_entry; dst_hold(&net->ipv6.ip6_null_entry->dst);
goto done; return net->ipv6.ip6_null_entry;
} }
read_lock_bh(&table->tb6_lock);
if (rt->rt6i_pcpu) {
p = this_cpu_ptr(rt->rt6i_pcpu);
prev = cmpxchg(p, NULL, pcpu_rt); prev = cmpxchg(p, NULL, pcpu_rt);
if (prev) { if (prev) {
/* If someone did it before us, return prev instead */ /* If someone did it before us, return prev instead */
dst_destroy(&pcpu_rt->dst); dst_destroy(&pcpu_rt->dst);
pcpu_rt = prev; pcpu_rt = prev;
} }
} else {
done: /* rt has been removed from the fib6 tree
* before we have a chance to acquire the read_lock.
* In this case, don't brother to create a pcpu rt
* since rt is going away anyway. The next
* dst_check() will trigger a re-lookup.
*/
dst_destroy(&pcpu_rt->dst);
pcpu_rt = rt;
}
dst_hold(&pcpu_rt->dst); dst_hold(&pcpu_rt->dst);
rt6_dst_from_metrics_check(pcpu_rt); rt6_dst_from_metrics_check(pcpu_rt);
read_unlock_bh(&table->tb6_lock);
return pcpu_rt; return pcpu_rt;
} }
...@@ -1097,9 +1114,22 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, ...@@ -1097,9 +1114,22 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
rt->dst.lastuse = jiffies; rt->dst.lastuse = jiffies;
rt->dst.__use++; rt->dst.__use++;
pcpu_rt = rt6_get_pcpu_route(rt); pcpu_rt = rt6_get_pcpu_route(rt);
if (pcpu_rt) {
read_unlock_bh(&table->tb6_lock); read_unlock_bh(&table->tb6_lock);
} else {
/* We have to do the read_unlock first
* because rt6_make_pcpu_route() may trigger
* ip6_dst_gc() which will take the write_lock.
*/
dst_hold(&rt->dst);
read_unlock_bh(&table->tb6_lock);
pcpu_rt = rt6_make_pcpu_route(rt);
dst_release(&rt->dst);
}
return pcpu_rt; return pcpu_rt;
} }
} }
...@@ -1555,7 +1585,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, ...@@ -1555,7 +1585,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
if (unlikely(!idev)) if (unlikely(!idev))
return ERR_PTR(-ENODEV); return ERR_PTR(-ENODEV);
rt = ip6_dst_alloc(net, dev, 0, NULL); rt = ip6_dst_alloc(net, dev, 0);
if (unlikely(!rt)) { if (unlikely(!rt)) {
in6_dev_put(idev); in6_dev_put(idev);
dst = ERR_PTR(-ENOMEM); dst = ERR_PTR(-ENOMEM);
...@@ -1742,7 +1772,8 @@ int ip6_route_add(struct fib6_config *cfg) ...@@ -1742,7 +1772,8 @@ int ip6_route_add(struct fib6_config *cfg)
if (!table) if (!table)
goto out; goto out;
rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table); rt = ip6_dst_alloc(net, NULL,
(cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT);
if (!rt) { if (!rt) {
err = -ENOMEM; err = -ENOMEM;
...@@ -2399,7 +2430,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, ...@@ -2399,7 +2430,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
{ {
struct net *net = dev_net(idev->dev); struct net *net = dev_net(idev->dev);
struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
DST_NOCOUNT, NULL); DST_NOCOUNT);
if (!rt) if (!rt)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment