sparc64: fix and optimize irq distribution

irq_choose_cpu() should compare the affinity mask against cpu_online_map rather than CPU_MASK_ALL, since irq_select_affinity() sets the interrupt's affinity mask to cpu_online_map "and" CPU_MASK_ALL (which ends up being just cpu_online_map). The mask comparison in irq_choose_cpu() will always fail since the two masks are not the same. So the CPU chosen is the first CPU in the intersection of cpu_online_map and CPU_MASK_ALL, which is always CPU0. That means all interrupts are reassigned to CPU0... Distributing interrupts to CPUs in a linearly increasing round robin fashion is not optimal for the UltraSPARC T1/T2. Also, the irq_rover in irq_choose_cpu() causes an interrupt to be assigned to a different processor each time the interrupt is allocated and released. This may lead to an unbalanced distribution over time. A static mapping of interrupts to processors is done to optimize and balance interrupt distribution. For the T1/T2, interrupts are spread to different cores first, and then to strands within a core. The following is some benchmarks showing the effects of interrupt distribution on a T2. The test was done with iperf using a pair of T5220 boxes, each with a 10GBe NIU (XAUI) connected back to back. TCP | Stock Linear RR IRQ Optimized IRQ Streams | 2.6.30-rc5 Distribution Distribution | GBits/sec GBits/sec GBits/sec --------+----------------------------------------- 1 0.839 0.862 0.868 8 1.16 4.96 5.88 16 1.15 6.40 8.04 100 1.09 7.28 8.68 Signed-off-by: Hong H. Pham <hong.pham@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>

sparc64: fix and optimize irq distribution
irq_choose_cpu() should compare the affinity mask against cpu_online_map rather than CPU_MASK_ALL, since irq_select_affinity() sets the interrupt's affinity mask to cpu_online_map "and" CPU_MASK_ALL (which ends up being just cpu_online_map). The mask comparison in irq_choose_cpu() will always fail since the two masks are not the same. So the CPU chosen is the first CPU in the intersection of cpu_online_map and CPU_MASK_ALL, which is always CPU0. That means all interrupts are reassigned to CPU0... Distributing interrupts to CPUs in a linearly increasing round robin fashion is not optimal for the UltraSPARC T1/T2. Also, the irq_rover in irq_choose_cpu() causes an interrupt to be assigned to a different processor each time the interrupt is allocated and released. This may lead to an unbalanced distribution over time. A static mapping of interrupts to processors is done to optimize and balance interrupt distribution. For the T1/T2, interrupts are spread to different cores first, and then to strands within a core. The following is some benchmarks showing the effects of interrupt distribution on a T2. The test was done with iperf using a pair of T5220 boxes, each with a 10GBe NIU (XAUI) connected back to back. TCP | Stock Linear RR IRQ Optimized IRQ Streams | 2.6.30-rc5 Distribution Distribution | GBits/sec GBits/sec GBits/sec --------+----------------------------------------- 1 0.839 0.862 0.868 8 1.16 4.96 5.88 16 1.15 6.40 8.04 100 1.09 7.28 8.68 Signed-off-by: Hong H. Pham <hong.pham@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net>
280ff974 · Hong H. Pham · David S. Miller · 4fd78a5f · 280ff974 · 280ff974
Commit 280ff974 authored Jun 04, 2009 by Hong H. Pham Committed by David S. Miller Jun 16, 2009
5 changed files
--- a/arch/sparc/kernel/Makefile
+++ b/arch/sparc/kernel/Makefile
@@ -54,6 +54,7 @@ obj-$(CONFIG_SPARC64)   += sstate.o
 obj-$(CONFIG_SPARC64)   += mdesc.o
 obj-$(CONFIG_SPARC64)	+= pcr.o
 obj-$(CONFIG_SPARC64)	+= nmi.o
+obj-$(CONFIG_SPARC64_SMP) += cpumap.o

 # sparc32 do not use GENERIC_HARDIRQS but uses the generic devres implementation
 obj-$(CONFIG_SPARC32)     += devres.o

--- a/arch/sparc/kernel/cpumap.c
+++ b/arch/sparc/kernel/cpumap.c
--- a/arch/sparc/kernel/cpumap.h
+++ b/arch/sparc/kernel/cpumap.h
+#ifndef _CPUMAP_H
+#define _CPUMAP_H
+
+#ifdef CONFIG_SMP
+extern void cpu_map_rebuild(void);
+extern int  map_to_cpu(unsigned int index);
+#define cpu_map_init() cpu_map_rebuild()
+#else
+#define cpu_map_init() do {} while (0)
+static inline int map_to_cpu(unsigned int index)
+{
+	return raw_smp_processor_id();
+}
+#endif
+
+#endif
--- a/arch/sparc/kernel/irq_64.c
+++ b/arch/sparc/kernel/irq_64.c
@@ -45,6 +45,7 @@
 #include <asm/cacheflush.h>

 #include "entry.h"
+#include "cpumap.h"

 #define NUM_IVECS	(IMAP_INR + 1)

@@ -256,35 +257,13 @@ static int irq_choose_cpu(unsigned int virt_irq)
 	int cpuid;

 	cpumask_copy(&mask, irq_desc[virt_irq].affinity);
-	if (cpus_equal(mask, CPU_MASK_ALL)) {
-		static int irq_rover;
-		static DEFINE_SPINLOCK(irq_rover_lock);
-		unsigned long flags;
-
-		/* Round-robin distribution... */
-	do_round_robin:
-		spin_lock_irqsave(&irq_rover_lock, flags);
-
-		while (!cpu_online(irq_rover)) {
-			if (++irq_rover >= nr_cpu_ids)
-				irq_rover = 0;
-		}
-		cpuid = irq_rover;
-		do {
-			if (++irq_rover >= nr_cpu_ids)
-				irq_rover = 0;
-		} while (!cpu_online(irq_rover));
-
-		spin_unlock_irqrestore(&irq_rover_lock, flags);
+	if (cpus_equal(mask, cpu_online_map)) {
+		cpuid = map_to_cpu(virt_irq);
 	} else {
 		cpumask_t tmp;

 		cpus_and(tmp, cpu_online_map, mask);
-
-		if (cpus_empty(tmp))
-			goto do_round_robin;
-
-		cpuid = first_cpu(tmp);
+		cpuid = cpus_empty(tmp) ? map_to_cpu(virt_irq) : first_cpu(tmp);
 	}

 	return cpuid;

--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -48,6 +48,8 @@
 #include <asm/ldc.h>
 #include <asm/hypervisor.h>

+#include "cpumap.h"
+
 int sparc64_multi_core __read_mostly;

 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
@@ -1314,6 +1316,8 @@ int __cpu_disable(void)
 	cpu_clear(cpu, cpu_online_map);
 	ipi_call_unlock();

+	cpu_map_rebuild();
+
 	return 0;
 }