Commit d490b3e2 authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'timers/nohz-irq-work-v7' of...

Merge branch 'timers/nohz-irq-work-v7' of git://git.kernel.org/pub/scm/linux/kernel/git/frederic/linux-dynticks into timers/nohz

Pull nohz updates from Frederic Weisbecker:

 " This set moves the nohz kick, used to notify a full dynticks CPU when
   events require tick rescheduling, out of the scheduler tick to a
   dedicated IPI.

   This debloats a bit the scheduler IPI from off-topic work that was
   abusing that scheduler fast path for its convenient asynchronous
   properties. Now the nohz kick uses irq-work for its own needs.

   Of course this implied quite some background infrastructure rework,
   including:

     * Clean up some irq-work internals
     * Implement remote irq-work
     * Implement nohz kick on top of remote irq-work
     * Move full dynticks timer enqueue notification to new kick
     * Move multi-task notification to new kick
     * Remove unecessary barriers on multi-task notification
 "
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents ebe06187 3882ec64
...@@ -33,6 +33,11 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) ...@@ -33,6 +33,11 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
#define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), } #define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), }
bool irq_work_queue(struct irq_work *work); bool irq_work_queue(struct irq_work *work);
#ifdef CONFIG_SMP
bool irq_work_queue_on(struct irq_work *work, int cpu);
#endif
void irq_work_run(void); void irq_work_run(void);
void irq_work_sync(struct irq_work *work); void irq_work_sync(struct irq_work *work);
......
...@@ -181,7 +181,13 @@ static inline bool tick_nohz_full_cpu(int cpu) ...@@ -181,7 +181,13 @@ static inline bool tick_nohz_full_cpu(int cpu)
extern void tick_nohz_init(void); extern void tick_nohz_init(void);
extern void __tick_nohz_full_check(void); extern void __tick_nohz_full_check(void);
extern void tick_nohz_full_kick(void); extern void tick_nohz_full_kick_cpu(int cpu);
static inline void tick_nohz_full_kick(void)
{
tick_nohz_full_kick_cpu(smp_processor_id());
}
extern void tick_nohz_full_kick_all(void); extern void tick_nohz_full_kick_all(void);
extern void __tick_nohz_task_switch(struct task_struct *tsk); extern void __tick_nohz_task_switch(struct task_struct *tsk);
#else #else
...@@ -189,6 +195,7 @@ static inline void tick_nohz_init(void) { } ...@@ -189,6 +195,7 @@ static inline void tick_nohz_init(void) { }
static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_enabled(void) { return false; }
static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; }
static inline void __tick_nohz_full_check(void) { } static inline void __tick_nohz_full_check(void) { }
static inline void tick_nohz_full_kick_cpu(int cpu) { }
static inline void tick_nohz_full_kick(void) { } static inline void tick_nohz_full_kick(void) { }
static inline void tick_nohz_full_kick_all(void) { } static inline void tick_nohz_full_kick_all(void) { }
static inline void __tick_nohz_task_switch(struct task_struct *tsk) { } static inline void __tick_nohz_task_switch(struct task_struct *tsk) { }
......
...@@ -16,11 +16,12 @@ ...@@ -16,11 +16,12 @@
#include <linux/tick.h> #include <linux/tick.h>
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/notifier.h> #include <linux/notifier.h>
#include <linux/smp.h>
#include <asm/processor.h> #include <asm/processor.h>
static DEFINE_PER_CPU(struct llist_head, irq_work_list); static DEFINE_PER_CPU(struct llist_head, raised_list);
static DEFINE_PER_CPU(int, irq_work_raised); static DEFINE_PER_CPU(struct llist_head, lazy_list);
/* /*
* Claim the entry so that no one else will poke at it. * Claim the entry so that no one else will poke at it.
...@@ -55,12 +56,34 @@ void __weak arch_irq_work_raise(void) ...@@ -55,12 +56,34 @@ void __weak arch_irq_work_raise(void)
*/ */
} }
#ifdef CONFIG_SMP
/* /*
* Enqueue the irq_work @entry unless it's already pending * Enqueue the irq_work @work on @cpu unless it's already pending
* somewhere. * somewhere.
* *
* Can be re-enqueued while the callback is still in progress. * Can be re-enqueued while the callback is still in progress.
*/ */
bool irq_work_queue_on(struct irq_work *work, int cpu)
{
/* All work should have been flushed before going offline */
WARN_ON_ONCE(cpu_is_offline(cpu));
/* Arch remote IPI send/receive backend aren't NMI safe */
WARN_ON_ONCE(in_nmi());
/* Only queue if not already pending */
if (!irq_work_claim(work))
return false;
if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
arch_send_call_function_single_ipi(cpu);
return true;
}
EXPORT_SYMBOL_GPL(irq_work_queue_on);
#endif
/* Enqueue the irq work @work on the current CPU */
bool irq_work_queue(struct irq_work *work) bool irq_work_queue(struct irq_work *work)
{ {
/* Only queue if not already pending */ /* Only queue if not already pending */
...@@ -70,15 +93,13 @@ bool irq_work_queue(struct irq_work *work) ...@@ -70,15 +93,13 @@ bool irq_work_queue(struct irq_work *work)
/* Queue the entry and raise the IPI if needed. */ /* Queue the entry and raise the IPI if needed. */
preempt_disable(); preempt_disable();
llist_add(&work->llnode, &__get_cpu_var(irq_work_list)); /* If the work is "lazy", handle it from next tick if any */
if (work->flags & IRQ_WORK_LAZY) {
/* if (llist_add(&work->llnode, &__get_cpu_var(lazy_list)) &&
* If the work is not "lazy" or the tick is stopped, raise the irq tick_nohz_tick_stopped())
* work interrupt (if supported by the arch), otherwise, just wait arch_irq_work_raise();
* for the next tick. } else {
*/ if (llist_add(&work->llnode, &__get_cpu_var(raised_list)))
if (!(work->flags & IRQ_WORK_LAZY) || tick_nohz_tick_stopped()) {
if (!this_cpu_cmpxchg(irq_work_raised, 0, 1))
arch_irq_work_raise(); arch_irq_work_raise();
} }
...@@ -90,10 +111,11 @@ EXPORT_SYMBOL_GPL(irq_work_queue); ...@@ -90,10 +111,11 @@ EXPORT_SYMBOL_GPL(irq_work_queue);
bool irq_work_needs_cpu(void) bool irq_work_needs_cpu(void)
{ {
struct llist_head *this_list; struct llist_head *raised, *lazy;
this_list = &__get_cpu_var(irq_work_list); raised = &__get_cpu_var(raised_list);
if (llist_empty(this_list)) lazy = &__get_cpu_var(lazy_list);
if (llist_empty(raised) && llist_empty(lazy))
return false; return false;
/* All work should have been flushed before going offline */ /* All work should have been flushed before going offline */
...@@ -102,28 +124,18 @@ bool irq_work_needs_cpu(void) ...@@ -102,28 +124,18 @@ bool irq_work_needs_cpu(void)
return true; return true;
} }
static void __irq_work_run(void) static void irq_work_run_list(struct llist_head *list)
{ {
unsigned long flags; unsigned long flags;
struct irq_work *work; struct irq_work *work;
struct llist_head *this_list;
struct llist_node *llnode; struct llist_node *llnode;
BUG_ON(!irqs_disabled());
/* if (llist_empty(list))
* Reset the "raised" state right before we check the list because
* an NMI may enqueue after we find the list empty from the runner.
*/
__this_cpu_write(irq_work_raised, 0);
barrier();
this_list = &__get_cpu_var(irq_work_list);
if (llist_empty(this_list))
return; return;
BUG_ON(!irqs_disabled()); llnode = llist_del_all(list);
llnode = llist_del_all(this_list);
while (llnode != NULL) { while (llnode != NULL) {
work = llist_entry(llnode, struct irq_work, llnode); work = llist_entry(llnode, struct irq_work, llnode);
...@@ -148,6 +160,12 @@ static void __irq_work_run(void) ...@@ -148,6 +160,12 @@ static void __irq_work_run(void)
} }
} }
static void __irq_work_run(void)
{
irq_work_run_list(&__get_cpu_var(raised_list));
irq_work_run_list(&__get_cpu_var(lazy_list));
}
/* /*
* Run the irq_work entries on this cpu. Requires to be ran from hardirq * Run the irq_work entries on this cpu. Requires to be ran from hardirq
* context with local IRQs disabled. * context with local IRQs disabled.
......
...@@ -684,10 +684,16 @@ static void wake_up_idle_cpu(int cpu) ...@@ -684,10 +684,16 @@ static void wake_up_idle_cpu(int cpu)
static bool wake_up_full_nohz_cpu(int cpu) static bool wake_up_full_nohz_cpu(int cpu)
{ {
/*
* We just need the target to call irq_exit() and re-evaluate
* the next tick. The nohz full kick at least implies that.
* If needed we can still optimize that later with an
* empty IRQ.
*/
if (tick_nohz_full_cpu(cpu)) { if (tick_nohz_full_cpu(cpu)) {
if (cpu != smp_processor_id() || if (cpu != smp_processor_id() ||
tick_nohz_tick_stopped()) tick_nohz_tick_stopped())
smp_send_reschedule(cpu); tick_nohz_full_kick_cpu(cpu);
return true; return true;
} }
...@@ -734,10 +740,11 @@ bool sched_can_stop_tick(void) ...@@ -734,10 +740,11 @@ bool sched_can_stop_tick(void)
rq = this_rq(); rq = this_rq();
/* Make sure rq->nr_running update is visible after the IPI */ /*
smp_rmb(); * More than one running task need preemption.
* nr_running update is assumed to be visible
/* More than one running task need preemption */ * after IPI is sent from wakers.
*/
if (rq->nr_running > 1) if (rq->nr_running > 1)
return false; return false;
...@@ -1568,9 +1575,7 @@ void scheduler_ipi(void) ...@@ -1568,9 +1575,7 @@ void scheduler_ipi(void)
*/ */
preempt_fold_need_resched(); preempt_fold_need_resched();
if (llist_empty(&this_rq()->wake_list) if (llist_empty(&this_rq()->wake_list) && !got_nohz_idle_kick())
&& !tick_nohz_full_cpu(smp_processor_id())
&& !got_nohz_idle_kick())
return; return;
/* /*
...@@ -1587,7 +1592,6 @@ void scheduler_ipi(void) ...@@ -1587,7 +1592,6 @@ void scheduler_ipi(void)
* somewhat pessimize the simple resched case. * somewhat pessimize the simple resched case.
*/ */
irq_enter(); irq_enter();
tick_nohz_full_check();
sched_ttwu_pending(); sched_ttwu_pending();
/* /*
......
...@@ -1221,9 +1221,15 @@ static inline void add_nr_running(struct rq *rq, unsigned count) ...@@ -1221,9 +1221,15 @@ static inline void add_nr_running(struct rq *rq, unsigned count)
#ifdef CONFIG_NO_HZ_FULL #ifdef CONFIG_NO_HZ_FULL
if (prev_nr < 2 && rq->nr_running >= 2) { if (prev_nr < 2 && rq->nr_running >= 2) {
if (tick_nohz_full_cpu(rq->cpu)) { if (tick_nohz_full_cpu(rq->cpu)) {
/* Order rq->nr_running write against the IPI */ /*
smp_wmb(); * Tick is needed if more than one task runs on a CPU.
smp_send_reschedule(rq->cpu); * Send the target an IPI to kick it out of nohz mode.
*
* We assume that IPI implies full memory barrier and the
* new value of rq->nr_running is visible on reception
* from the target.
*/
tick_nohz_full_kick_cpu(rq->cpu);
} }
} }
#endif #endif
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
* *
* (C) Jens Axboe <jens.axboe@oracle.com> 2008 * (C) Jens Axboe <jens.axboe@oracle.com> 2008
*/ */
#include <linux/irq_work.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <linux/rculist.h> #include <linux/rculist.h>
#include <linux/kernel.h> #include <linux/kernel.h>
...@@ -210,6 +211,14 @@ void generic_smp_call_function_single_interrupt(void) ...@@ -210,6 +211,14 @@ void generic_smp_call_function_single_interrupt(void)
csd->func(csd->info); csd->func(csd->info);
csd_unlock(csd); csd_unlock(csd);
} }
/*
* Handle irq works queued remotely by irq_work_queue_on().
* Smp functions above are typically synchronous so they
* better run first since some other CPUs may be busy waiting
* for them.
*/
irq_work_run();
} }
/* /*
......
...@@ -224,13 +224,15 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = { ...@@ -224,13 +224,15 @@ static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
}; };
/* /*
* Kick the current CPU if it's full dynticks in order to force it to * Kick the CPU if it's full dynticks in order to force it to
* re-evaluate its dependency on the tick and restart it if necessary. * re-evaluate its dependency on the tick and restart it if necessary.
*/ */
void tick_nohz_full_kick(void) void tick_nohz_full_kick_cpu(int cpu)
{ {
if (tick_nohz_full_cpu(smp_processor_id())) if (!tick_nohz_full_cpu(cpu))
irq_work_queue(&__get_cpu_var(nohz_full_kick_work)); return;
irq_work_queue_on(&per_cpu(nohz_full_kick_work, cpu), cpu);
} }
static void nohz_full_kick_ipi(void *info) static void nohz_full_kick_ipi(void *info)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment