Commit 280ff974 authored by Hong H. Pham's avatar Hong H. Pham Committed by David S. Miller

sparc64: fix and optimize irq distribution

irq_choose_cpu() should compare the affinity mask against cpu_online_map
rather than CPU_MASK_ALL, since irq_select_affinity() sets the interrupt's
affinity mask to cpu_online_map "and" CPU_MASK_ALL (which ends up being
just cpu_online_map).  The mask comparison in irq_choose_cpu() will always
fail since the two masks are not the same.  So the CPU chosen is the first CPU
in the intersection of cpu_online_map and CPU_MASK_ALL, which is always CPU0.
That means all interrupts are reassigned to CPU0...

Distributing interrupts to CPUs in a linearly increasing round robin fashion
is not optimal for the UltraSPARC T1/T2.  Also, the irq_rover in
irq_choose_cpu() causes an interrupt to be assigned to a different
processor each time the interrupt is allocated and released.  This may lead
to an unbalanced distribution over time.

A static mapping of interrupts to processors is done to optimize and balance
interrupt distribution.  For the T1/T2, interrupts are spread to different
cores first, and then to strands within a core.

The following is some benchmarks showing the effects of interrupt
distribution on a T2.  The test was done with iperf using a pair of T5220
boxes, each with a 10GBe NIU (XAUI) connected back to back.

  TCP     | Stock       Linear RR IRQ  Optimized IRQ
  Streams | 2.6.30-rc5  Distribution   Distribution
          | GBits/sec   GBits/sec      GBits/sec
  --------+-----------------------------------------
    1       0.839       0.862          0.868
    8       1.16        4.96           5.88
   16       1.15        6.40           8.04
  100       1.09        7.28           8.68
Signed-off-by: default avatarHong H. Pham <hong.pham@windriver.com>
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 4fd78a5f
......@@ -54,6 +54,7 @@ obj-$(CONFIG_SPARC64) += sstate.o
obj-$(CONFIG_SPARC64) += mdesc.o
obj-$(CONFIG_SPARC64) += pcr.o
obj-$(CONFIG_SPARC64) += nmi.o
obj-$(CONFIG_SPARC64_SMP) += cpumap.o
# sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation
obj-$(CONFIG_SPARC32) += devres.o
......
This diff is collapsed.
#ifndef _CPUMAP_H
#define _CPUMAP_H
#ifdef CONFIG_SMP
extern void cpu_map_rebuild(void);
extern int map_to_cpu(unsigned int index);
#define cpu_map_init() cpu_map_rebuild()
#else
#define cpu_map_init() do {} while (0)
static inline int map_to_cpu(unsigned int index)
{
return raw_smp_processor_id();
}
#endif
#endif
......@@ -45,6 +45,7 @@
#include <asm/cacheflush.h>
#include "entry.h"
#include "cpumap.h"
#define NUM_IVECS (IMAP_INR + 1)
......@@ -256,35 +257,13 @@ static int irq_choose_cpu(unsigned int virt_irq)
int cpuid;
cpumask_copy(&mask, irq_desc[virt_irq].affinity);
if (cpus_equal(mask, CPU_MASK_ALL)) {
static int irq_rover;
static DEFINE_SPINLOCK(irq_rover_lock);
unsigned long flags;
/* Round-robin distribution... */
do_round_robin:
spin_lock_irqsave(&irq_rover_lock, flags);
while (!cpu_online(irq_rover)) {
if (++irq_rover >= nr_cpu_ids)
irq_rover = 0;
}
cpuid = irq_rover;
do {
if (++irq_rover >= nr_cpu_ids)
irq_rover = 0;
} while (!cpu_online(irq_rover));
spin_unlock_irqrestore(&irq_rover_lock, flags);
if (cpus_equal(mask, cpu_online_map)) {
cpuid = map_to_cpu(virt_irq);
} else {
cpumask_t tmp;
cpus_and(tmp, cpu_online_map, mask);
if (cpus_empty(tmp))
goto do_round_robin;
cpuid = first_cpu(tmp);
cpuid = cpus_empty(tmp) ? map_to_cpu(virt_irq) : first_cpu(tmp);
}
return cpuid;
......
......@@ -48,6 +48,8 @@
#include <asm/ldc.h>
#include <asm/hypervisor.h>
#include "cpumap.h"
int sparc64_multi_core __read_mostly;
DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
......@@ -1314,6 +1316,8 @@ int __cpu_disable(void)
cpu_clear(cpu, cpu_online_map);
ipi_call_unlock();
cpu_map_rebuild();
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment