Commit c5cb83bb authored by Thomas Gleixner's avatar Thomas Gleixner

genirq/cpuhotplug: Handle managed IRQs on CPU hotplug

If a CPU goes offline, interrupts affine to the CPU are moved away. If the
outgoing CPU is the last CPU in the affinity mask the migration code breaks
the affinity and sets it it all online cpus.

This is a problem for affinity managed interrupts as CPU hotplug is often
used for power management purposes. If the affinity is broken, the
interrupt is not longer affine to the CPUs to which it was allocated.

The affinity spreading allows to lay out multi queue devices in a way that
they are assigned to a single CPU or a group of CPUs. If the last CPU goes
offline, then the queue is not longer used, so the interrupt can be
shutdown gracefully and parked until one of the assigned CPUs comes online
again.

Add a graceful shutdown mechanism into the irq affinity breaking code path,
mark the irq as MANAGED_SHUTDOWN and leave the affinity mask unmodified.

In the online path, scan the active interrupts for managed interrupts and
if the interrupt is functional and the newly online CPU is part of the
affinity mask, restart the interrupt if it is marked MANAGED_SHUTDOWN or if
the interrupts is started up, try to add the CPU back to the effective
affinity mask.
Originally-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Keith Busch <keith.busch@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20170619235447.273417334@linutronix.de
parent 761ea388
...@@ -124,6 +124,7 @@ enum cpuhp_state { ...@@ -124,6 +124,7 @@ enum cpuhp_state {
CPUHP_AP_ONLINE_IDLE, CPUHP_AP_ONLINE_IDLE,
CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_SMPBOOT_THREADS,
CPUHP_AP_X86_VDSO_VMA_ONLINE, CPUHP_AP_X86_VDSO_VMA_ONLINE,
CPUHP_AP_IRQ_AFFINITY_ONLINE,
CPUHP_AP_PERF_ONLINE, CPUHP_AP_PERF_ONLINE,
CPUHP_AP_PERF_X86_ONLINE, CPUHP_AP_PERF_X86_ONLINE,
CPUHP_AP_PERF_X86_UNCORE_ONLINE, CPUHP_AP_PERF_X86_UNCORE_ONLINE,
......
...@@ -500,7 +500,12 @@ extern int irq_set_affinity_locked(struct irq_data *data, ...@@ -500,7 +500,12 @@ extern int irq_set_affinity_locked(struct irq_data *data,
const struct cpumask *cpumask, bool force); const struct cpumask *cpumask, bool force);
extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info); extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info);
#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_IRQ_MIGRATION)
extern void irq_migrate_all_off_this_cpu(void); extern void irq_migrate_all_off_this_cpu(void);
extern int irq_affinity_online_cpu(unsigned int cpu);
#else
# define irq_affinity_online_cpu NULL
#endif
#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
void irq_move_irq(struct irq_data *data); void irq_move_irq(struct irq_data *data);
......
...@@ -1252,6 +1252,11 @@ static struct cpuhp_step cpuhp_ap_states[] = { ...@@ -1252,6 +1252,11 @@ static struct cpuhp_step cpuhp_ap_states[] = {
.startup.single = smpboot_unpark_threads, .startup.single = smpboot_unpark_threads,
.teardown.single = NULL, .teardown.single = NULL,
}, },
[CPUHP_AP_IRQ_AFFINITY_ONLINE] = {
.name = "irq/affinity:online",
.startup.single = irq_affinity_online_cpu,
.teardown.single = NULL,
},
[CPUHP_AP_PERF_ONLINE] = { [CPUHP_AP_PERF_ONLINE] = {
.name = "perf:online", .name = "perf:online",
.startup.single = perf_event_init_cpu, .startup.single = perf_event_init_cpu,
......
...@@ -83,6 +83,15 @@ static bool migrate_one_irq(struct irq_desc *desc) ...@@ -83,6 +83,15 @@ static bool migrate_one_irq(struct irq_desc *desc)
chip->irq_mask(d); chip->irq_mask(d);
if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
/*
* If the interrupt is managed, then shut it down and leave
* the affinity untouched.
*/
if (irqd_affinity_is_managed(d)) {
irqd_set_managed_shutdown(d);
irq_shutdown(desc);
return false;
}
affinity = cpu_online_mask; affinity = cpu_online_mask;
brokeaff = true; brokeaff = true;
} }
...@@ -129,3 +138,39 @@ void irq_migrate_all_off_this_cpu(void) ...@@ -129,3 +138,39 @@ void irq_migrate_all_off_this_cpu(void)
} }
} }
} }
static void irq_restore_affinity_of_irq(struct irq_desc *desc, unsigned int cpu)
{
struct irq_data *data = irq_desc_get_irq_data(desc);
const struct cpumask *affinity = irq_data_get_affinity_mask(data);
if (!irqd_affinity_is_managed(data) || !desc->action ||
!irq_data_get_irq_chip(data) || !cpumask_test_cpu(cpu, affinity))
return;
if (irqd_is_managed_and_shutdown(data))
irq_startup(desc, IRQ_RESEND, IRQ_START_COND);
else
irq_set_affinity_locked(data, affinity, false);
}
/**
* irq_affinity_online_cpu - Restore affinity for managed interrupts
* @cpu: Upcoming CPU for which interrupts should be restored
*/
int irq_affinity_online_cpu(unsigned int cpu)
{
struct irq_desc *desc;
unsigned int irq;
irq_lock_sparse();
for_each_active_irq(irq) {
desc = irq_to_desc(irq);
raw_spin_lock_irq(&desc->lock);
irq_restore_affinity_of_irq(desc, cpu);
raw_spin_unlock_irq(&desc->lock);
}
irq_unlock_sparse();
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment