Commit 896f97ea authored by David Decotigny's avatar David Decotigny Committed by Linus Torvalds

lib: cpu_rmap: avoid flushing all workqueues

In some cases, free_irq_cpu_rmap() is called while holding a lock (eg
rtnl).  This can lead to deadlocks, because it invokes
flush_scheduled_work() which ends up waiting for whole system workqueue
to flush, but some pending works might try to acquire the lock we are
already holding.

This commit uses reference-counting to replace
irq_run_affinity_notifiers().  It also removes
irq_run_affinity_notifiers() altogether.

[akpm@linux-foundation.org: eliminate free_cpu_rmap, rename cpu_rmap_reclaim() to cpu_rmap_release(), propagate kref_put() retval from cpu_rmap_put()]
Signed-off-by: default avatarDavid Decotigny <decot@googlers.com>
Reviewed-by: default avatarBen Hutchings <bhutchings@solarflare.com>
Acked-by: default avatarEric Dumazet <edumazet@google.com>
Reviewed-by: default avatarJosh Triplett <josh@joshtriplett.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Or Gerlitz <ogerlitz@mellanox.com>
Acked-by: default avatarAmir Vadai <amirv@mellanox.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 254adaa4
...@@ -13,9 +13,11 @@ ...@@ -13,9 +13,11 @@
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/kref.h>
/** /**
* struct cpu_rmap - CPU affinity reverse-map * struct cpu_rmap - CPU affinity reverse-map
* @refcount: kref for object
* @size: Number of objects to be reverse-mapped * @size: Number of objects to be reverse-mapped
* @used: Number of objects added * @used: Number of objects added
* @obj: Pointer to array of object pointers * @obj: Pointer to array of object pointers
...@@ -23,6 +25,7 @@ ...@@ -23,6 +25,7 @@
* based on affinity masks * based on affinity masks
*/ */
struct cpu_rmap { struct cpu_rmap {
struct kref refcount;
u16 size, used; u16 size, used;
void **obj; void **obj;
struct { struct {
...@@ -33,15 +36,7 @@ struct cpu_rmap { ...@@ -33,15 +36,7 @@ struct cpu_rmap {
#define CPU_RMAP_DIST_INF 0xffff #define CPU_RMAP_DIST_INF 0xffff
extern struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags); extern struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags);
extern int cpu_rmap_put(struct cpu_rmap *rmap);
/**
* free_cpu_rmap - free CPU affinity reverse-map
* @rmap: Reverse-map allocated with alloc_cpu_rmap(), or %NULL
*/
static inline void free_cpu_rmap(struct cpu_rmap *rmap)
{
kfree(rmap);
}
extern int cpu_rmap_add(struct cpu_rmap *rmap, void *obj); extern int cpu_rmap_add(struct cpu_rmap *rmap, void *obj);
extern int cpu_rmap_update(struct cpu_rmap *rmap, u16 index, extern int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
......
...@@ -268,11 +268,6 @@ struct irq_affinity_notify { ...@@ -268,11 +268,6 @@ struct irq_affinity_notify {
extern int extern int
irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify); irq_set_affinity_notifier(unsigned int irq, struct irq_affinity_notify *notify);
static inline void irq_run_affinity_notifiers(void)
{
flush_scheduled_work();
}
#else /* CONFIG_SMP */ #else /* CONFIG_SMP */
static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m) static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
......
...@@ -45,6 +45,7 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags) ...@@ -45,6 +45,7 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
if (!rmap) if (!rmap)
return NULL; return NULL;
kref_init(&rmap->refcount);
rmap->obj = (void **)((char *)rmap + obj_offset); rmap->obj = (void **)((char *)rmap + obj_offset);
/* Initially assign CPUs to objects on a rota, since we have /* Initially assign CPUs to objects on a rota, since we have
...@@ -63,6 +64,35 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags) ...@@ -63,6 +64,35 @@ struct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
} }
EXPORT_SYMBOL(alloc_cpu_rmap); EXPORT_SYMBOL(alloc_cpu_rmap);
/**
* cpu_rmap_release - internal reclaiming helper called from kref_put
* @ref: kref to struct cpu_rmap
*/
static void cpu_rmap_release(struct kref *ref)
{
struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount);
kfree(rmap);
}
/**
* cpu_rmap_get - internal helper to get new ref on a cpu_rmap
* @rmap: reverse-map allocated with alloc_cpu_rmap()
*/
static inline void cpu_rmap_get(struct cpu_rmap *rmap)
{
kref_get(&rmap->refcount);
}
/**
* cpu_rmap_put - release ref on a cpu_rmap
* @rmap: reverse-map allocated with alloc_cpu_rmap()
*/
int cpu_rmap_put(struct cpu_rmap *rmap)
{
return kref_put(&rmap->refcount, cpu_rmap_release);
}
EXPORT_SYMBOL(cpu_rmap_put);
/* Reevaluate nearest object for given CPU, comparing with the given /* Reevaluate nearest object for given CPU, comparing with the given
* neighbours at the given distance. * neighbours at the given distance.
*/ */
...@@ -197,8 +227,7 @@ struct irq_glue { ...@@ -197,8 +227,7 @@ struct irq_glue {
* free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
* @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
* *
* Must be called in process context, before freeing the IRQs, and * Must be called in process context, before freeing the IRQs.
* without holding any locks required by global workqueue items.
*/ */
void free_irq_cpu_rmap(struct cpu_rmap *rmap) void free_irq_cpu_rmap(struct cpu_rmap *rmap)
{ {
...@@ -212,12 +241,18 @@ void free_irq_cpu_rmap(struct cpu_rmap *rmap) ...@@ -212,12 +241,18 @@ void free_irq_cpu_rmap(struct cpu_rmap *rmap)
glue = rmap->obj[index]; glue = rmap->obj[index];
irq_set_affinity_notifier(glue->notify.irq, NULL); irq_set_affinity_notifier(glue->notify.irq, NULL);
} }
irq_run_affinity_notifiers();
kfree(rmap); cpu_rmap_put(rmap);
} }
EXPORT_SYMBOL(free_irq_cpu_rmap); EXPORT_SYMBOL(free_irq_cpu_rmap);
/**
* irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated
* @notify: struct irq_affinity_notify passed by irq/manage.c
* @mask: cpu mask for new SMP affinity
*
* This is executed in workqueue context.
*/
static void static void
irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
{ {
...@@ -230,10 +265,16 @@ irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask) ...@@ -230,10 +265,16 @@ irq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc); pr_warning("irq_cpu_rmap_notify: update failed: %d\n", rc);
} }
/**
* irq_cpu_rmap_release - reclaiming callback for IRQ subsystem
* @ref: kref to struct irq_affinity_notify passed by irq/manage.c
*/
static void irq_cpu_rmap_release(struct kref *ref) static void irq_cpu_rmap_release(struct kref *ref)
{ {
struct irq_glue *glue = struct irq_glue *glue =
container_of(ref, struct irq_glue, notify.kref); container_of(ref, struct irq_glue, notify.kref);
cpu_rmap_put(glue->rmap);
kfree(glue); kfree(glue);
} }
...@@ -258,10 +299,13 @@ int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq) ...@@ -258,10 +299,13 @@ int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
glue->notify.notify = irq_cpu_rmap_notify; glue->notify.notify = irq_cpu_rmap_notify;
glue->notify.release = irq_cpu_rmap_release; glue->notify.release = irq_cpu_rmap_release;
glue->rmap = rmap; glue->rmap = rmap;
cpu_rmap_get(rmap);
glue->index = cpu_rmap_add(rmap, glue); glue->index = cpu_rmap_add(rmap, glue);
rc = irq_set_affinity_notifier(irq, &glue->notify); rc = irq_set_affinity_notifier(irq, &glue->notify);
if (rc) if (rc) {
cpu_rmap_put(glue->rmap);
kfree(glue); kfree(glue);
}
return rc; return rc;
} }
EXPORT_SYMBOL(irq_cpu_rmap_add); EXPORT_SYMBOL(irq_cpu_rmap_add);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment