Commit 4b44a21d authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

irq_work, smp: Allow irq_work on call_single_queue

Currently irq_work_queue_on() will issue an unconditional
arch_send_call_function_single_ipi() and has the handler do
irq_work_run().

This is unfortunate in that it makes the IPI handler look at a second
cacheline and it misses the opportunity to avoid the IPI. Instead note
that struct irq_work and struct __call_single_data are very similar in
layout, so use a few bits in the flags word to encode a type and stick
the irq_work on the call_single_queue list.
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20200526161908.011635912@infradead.org
parent b2a02fc4
...@@ -13,6 +13,8 @@ ...@@ -13,6 +13,8 @@
* busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed * busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
*/ */
/* flags share CSD_FLAG_ space */
#define IRQ_WORK_PENDING BIT(0) #define IRQ_WORK_PENDING BIT(0)
#define IRQ_WORK_BUSY BIT(1) #define IRQ_WORK_BUSY BIT(1)
...@@ -23,9 +25,12 @@ ...@@ -23,9 +25,12 @@
#define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY) #define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY)
/*
* structure shares layout with single_call_data_t.
*/
struct irq_work { struct irq_work {
atomic_t flags;
struct llist_node llnode; struct llist_node llnode;
atomic_t flags;
void (*func)(struct irq_work *); void (*func)(struct irq_work *);
}; };
......
...@@ -16,17 +16,38 @@ ...@@ -16,17 +16,38 @@
typedef void (*smp_call_func_t)(void *info); typedef void (*smp_call_func_t)(void *info);
typedef bool (*smp_cond_func_t)(int cpu, void *info); typedef bool (*smp_cond_func_t)(int cpu, void *info);
enum {
CSD_FLAG_LOCK = 0x01,
/* IRQ_WORK_flags */
CSD_TYPE_ASYNC = 0x00,
CSD_TYPE_SYNC = 0x10,
CSD_TYPE_IRQ_WORK = 0x20,
CSD_FLAG_TYPE_MASK = 0xF0,
};
/*
* structure shares (partial) layout with struct irq_work
*/
struct __call_single_data { struct __call_single_data {
struct llist_node llist; struct llist_node llist;
unsigned int flags;
smp_call_func_t func; smp_call_func_t func;
void *info; void *info;
unsigned int flags;
}; };
/* Use __aligned() to avoid to use 2 cache lines for 1 csd */ /* Use __aligned() to avoid to use 2 cache lines for 1 csd */
typedef struct __call_single_data call_single_data_t typedef struct __call_single_data call_single_data_t
__aligned(sizeof(struct __call_single_data)); __aligned(sizeof(struct __call_single_data));
/*
* Enqueue a llist_node on the call_single_queue; be very careful, read
* flush_smp_call_function_queue() in detail.
*/
extern void __smp_call_single_queue(int cpu, struct llist_node *node);
/* total number of cpus in this system (may exceed NR_CPUS) */ /* total number of cpus in this system (may exceed NR_CPUS) */
extern unsigned int total_cpus; extern unsigned int total_cpus;
......
...@@ -31,7 +31,7 @@ static bool irq_work_claim(struct irq_work *work) ...@@ -31,7 +31,7 @@ static bool irq_work_claim(struct irq_work *work)
{ {
int oflags; int oflags;
oflags = atomic_fetch_or(IRQ_WORK_CLAIMED, &work->flags); oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->flags);
/* /*
* If the work is already pending, no need to raise the IPI. * If the work is already pending, no need to raise the IPI.
* The pairing atomic_fetch_andnot() in irq_work_run() makes sure * The pairing atomic_fetch_andnot() in irq_work_run() makes sure
...@@ -102,8 +102,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) ...@@ -102,8 +102,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
if (cpu != smp_processor_id()) { if (cpu != smp_processor_id()) {
/* Arch remote IPI send/receive backend aren't NMI safe */ /* Arch remote IPI send/receive backend aren't NMI safe */
WARN_ON_ONCE(in_nmi()); WARN_ON_ONCE(in_nmi());
if (llist_add(&work->llnode, &per_cpu(raised_list, cpu))) __smp_call_single_queue(cpu, &work->llnode);
arch_send_call_function_single_ipi(cpu);
} else { } else {
__irq_work_queue_local(work); __irq_work_queue_local(work);
} }
...@@ -131,6 +130,31 @@ bool irq_work_needs_cpu(void) ...@@ -131,6 +130,31 @@ bool irq_work_needs_cpu(void)
return true; return true;
} }
void irq_work_single(void *arg)
{
struct irq_work *work = arg;
int flags;
/*
* Clear the PENDING bit, after this point the @work
* can be re-used.
* Make it immediately visible so that other CPUs trying
* to claim that work don't rely on us to handle their data
* while we are in the middle of the func.
*/
flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags);
lockdep_irq_work_enter(work);
work->func(work);
lockdep_irq_work_exit(work);
/*
* Clear the BUSY bit and return to the free state if
* no-one else claimed it meanwhile.
*/
flags &= ~IRQ_WORK_PENDING;
(void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
}
static void irq_work_run_list(struct llist_head *list) static void irq_work_run_list(struct llist_head *list)
{ {
struct irq_work *work, *tmp; struct irq_work *work, *tmp;
...@@ -142,27 +166,8 @@ static void irq_work_run_list(struct llist_head *list) ...@@ -142,27 +166,8 @@ static void irq_work_run_list(struct llist_head *list)
return; return;
llnode = llist_del_all(list); llnode = llist_del_all(list);
llist_for_each_entry_safe(work, tmp, llnode, llnode) { llist_for_each_entry_safe(work, tmp, llnode, llnode)
int flags; irq_work_single(work);
/*
* Clear the PENDING bit, after this point the @work
* can be re-used.
* Make it immediately visible so that other CPUs trying
* to claim that work don't rely on us to handle their data
* while we are in the middle of the func.
*/
flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags);
lockdep_irq_work_enter(work);
work->func(work);
lockdep_irq_work_exit(work);
/*
* Clear the BUSY bit and return to the free state if
* no-one else claimed it meanwhile.
*/
flags &= ~IRQ_WORK_PENDING;
(void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
}
} }
/* /*
......
...@@ -23,10 +23,8 @@ ...@@ -23,10 +23,8 @@
#include "smpboot.h" #include "smpboot.h"
enum {
CSD_FLAG_LOCK = 0x01, #define CSD_TYPE(_csd) ((_csd)->flags & CSD_FLAG_TYPE_MASK)
CSD_FLAG_SYNCHRONOUS = 0x02,
};
struct call_function_data { struct call_function_data {
call_single_data_t __percpu *csd; call_single_data_t __percpu *csd;
...@@ -137,15 +135,33 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data); ...@@ -137,15 +135,33 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
extern void send_call_function_single_ipi(int cpu); extern void send_call_function_single_ipi(int cpu);
void __smp_call_single_queue(int cpu, struct llist_node *node)
{
/*
* The list addition should be visible before sending the IPI
* handler locks the list to pull the entry off it because of
* normal cache coherency rules implied by spinlocks.
*
* If IPIs can go out of order to the cache coherency protocol
* in an architecture, sufficient synchronisation should be added
* to arch code to make it appear to obey cache coherency WRT
* locking and barrier primitives. Generic code isn't really
* equipped to do the right thing...
*/
if (llist_add(node, &per_cpu(call_single_queue, cpu)))
send_call_function_single_ipi(cpu);
}
/* /*
* Insert a previously allocated call_single_data_t element * Insert a previously allocated call_single_data_t element
* for execution on the given CPU. data must already have * for execution on the given CPU. data must already have
* ->func, ->info, and ->flags set. * ->func, ->info, and ->flags set.
*/ */
static int generic_exec_single(int cpu, call_single_data_t *csd, static int generic_exec_single(int cpu, call_single_data_t *csd)
smp_call_func_t func, void *info)
{ {
if (cpu == smp_processor_id()) { if (cpu == smp_processor_id()) {
smp_call_func_t func = csd->func;
void *info = csd->info;
unsigned long flags; unsigned long flags;
/* /*
...@@ -159,28 +175,12 @@ static int generic_exec_single(int cpu, call_single_data_t *csd, ...@@ -159,28 +175,12 @@ static int generic_exec_single(int cpu, call_single_data_t *csd,
return 0; return 0;
} }
if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) { if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
csd_unlock(csd); csd_unlock(csd);
return -ENXIO; return -ENXIO;
} }
csd->func = func; __smp_call_single_queue(cpu, &csd->llist);
csd->info = info;
/*
* The list addition should be visible before sending the IPI
* handler locks the list to pull the entry off it because of
* normal cache coherency rules implied by spinlocks.
*
* If IPIs can go out of order to the cache coherency protocol
* in an architecture, sufficient synchronisation should be added
* to arch code to make it appear to obey cache coherency WRT
* locking and barrier primitives. Generic code isn't really
* equipped to do the right thing...
*/
if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
send_call_function_single_ipi(cpu);
return 0; return 0;
} }
...@@ -194,16 +194,10 @@ static int generic_exec_single(int cpu, call_single_data_t *csd, ...@@ -194,16 +194,10 @@ static int generic_exec_single(int cpu, call_single_data_t *csd,
void generic_smp_call_function_single_interrupt(void) void generic_smp_call_function_single_interrupt(void)
{ {
flush_smp_call_function_queue(true); flush_smp_call_function_queue(true);
/*
* Handle irq works queued remotely by irq_work_queue_on().
* Smp functions above are typically synchronous so they
* better run first since some other CPUs may be busy waiting
* for them.
*/
irq_work_run();
} }
extern void irq_work_single(void *);
/** /**
* flush_smp_call_function_queue - Flush pending smp-call-function callbacks * flush_smp_call_function_queue - Flush pending smp-call-function callbacks
* *
...@@ -241,9 +235,21 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) ...@@ -241,9 +235,21 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
* We don't have to use the _safe() variant here * We don't have to use the _safe() variant here
* because we are not invoking the IPI handlers yet. * because we are not invoking the IPI handlers yet.
*/ */
llist_for_each_entry(csd, entry, llist) llist_for_each_entry(csd, entry, llist) {
pr_warn("IPI callback %pS sent to offline CPU\n", switch (CSD_TYPE(csd)) {
csd->func); case CSD_TYPE_ASYNC:
case CSD_TYPE_SYNC:
case CSD_TYPE_IRQ_WORK:
pr_warn("IPI callback %pS sent to offline CPU\n",
csd->func);
break;
default:
pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
CSD_TYPE(csd));
break;
}
}
} }
/* /*
...@@ -251,16 +257,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) ...@@ -251,16 +257,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
*/ */
prev = NULL; prev = NULL;
llist_for_each_entry_safe(csd, csd_next, entry, llist) { llist_for_each_entry_safe(csd, csd_next, entry, llist) {
smp_call_func_t func = csd->func;
void *info = csd->info;
/* Do we wait until *after* callback? */ /* Do we wait until *after* callback? */
if (csd->flags & CSD_FLAG_SYNCHRONOUS) { if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
smp_call_func_t func = csd->func;
void *info = csd->info;
if (prev) { if (prev) {
prev->next = &csd_next->llist; prev->next = &csd_next->llist;
} else { } else {
entry = &csd_next->llist; entry = &csd_next->llist;
} }
func(info); func(info);
csd_unlock(csd); csd_unlock(csd);
} else { } else {
...@@ -272,11 +279,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) ...@@ -272,11 +279,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
* Second; run all !SYNC callbacks. * Second; run all !SYNC callbacks.
*/ */
llist_for_each_entry_safe(csd, csd_next, entry, llist) { llist_for_each_entry_safe(csd, csd_next, entry, llist) {
smp_call_func_t func = csd->func; int type = CSD_TYPE(csd);
void *info = csd->info;
csd_unlock(csd); if (type == CSD_TYPE_ASYNC) {
func(info); smp_call_func_t func = csd->func;
void *info = csd->info;
csd_unlock(csd);
func(info);
} else if (type == CSD_TYPE_IRQ_WORK) {
irq_work_single(csd);
}
} }
} }
...@@ -305,7 +318,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, ...@@ -305,7 +318,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
{ {
call_single_data_t *csd; call_single_data_t *csd;
call_single_data_t csd_stack = { call_single_data_t csd_stack = {
.flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS, .flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC,
}; };
int this_cpu; int this_cpu;
int err; int err;
...@@ -339,7 +352,10 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, ...@@ -339,7 +352,10 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
csd_lock(csd); csd_lock(csd);
} }
err = generic_exec_single(cpu, csd, func, info); csd->func = func;
csd->info = info;
err = generic_exec_single(cpu, csd);
if (wait) if (wait)
csd_lock_wait(csd); csd_lock_wait(csd);
...@@ -385,7 +401,7 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd) ...@@ -385,7 +401,7 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd)
csd->flags = CSD_FLAG_LOCK; csd->flags = CSD_FLAG_LOCK;
smp_wmb(); smp_wmb();
err = generic_exec_single(cpu, csd, csd->func, csd->info); err = generic_exec_single(cpu, csd);
out: out:
preempt_enable(); preempt_enable();
...@@ -500,7 +516,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask, ...@@ -500,7 +516,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
csd_lock(csd); csd_lock(csd);
if (wait) if (wait)
csd->flags |= CSD_FLAG_SYNCHRONOUS; csd->flags |= CSD_TYPE_SYNC;
csd->func = func; csd->func = func;
csd->info = info; csd->info = info;
if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
...@@ -632,6 +648,17 @@ void __init smp_init(void) ...@@ -632,6 +648,17 @@ void __init smp_init(void)
{ {
int num_nodes, num_cpus; int num_nodes, num_cpus;
/*
* Ensure struct irq_work layout matches so that
* flush_smp_call_function_queue() can do horrible things.
*/
BUILD_BUG_ON(offsetof(struct irq_work, llnode) !=
offsetof(struct __call_single_data, llist));
BUILD_BUG_ON(offsetof(struct irq_work, func) !=
offsetof(struct __call_single_data, func));
BUILD_BUG_ON(offsetof(struct irq_work, flags) !=
offsetof(struct __call_single_data, flags));
idle_threads_init(); idle_threads_init();
cpuhp_threads_init(); cpuhp_threads_init();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment