Commit 22adb358 authored by David S. Miller's avatar David S. Miller

[SPARC64]: Eliminate NR_CPUS limitations.

Cheetah systems can have cpuids as large as 1023, although physical
systems don't have that many cpus.

Only three limitations existed in the kernel preventing arbitrary
NR_CPUS values:

1) dcache dirty cpu state stored in page->flags on
   D-cache aliasing platforms.  With some build time
   calculations and some build-time BUG checks on
   page->flags layout, this one was easily solved.

2) The cheetah XCALL delivery code could only handle
   a cpumask with up to 32 cpus set.  Some simple looping
   logic clears that up too.

3) thread_info->cpu was a u8, easily changed to a u16.

There are a few spots in the kernel that still put NR_CPUS
sized arrays on the kernel stack, but that's not a sparc64
specific problem.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 5cbc3073
...@@ -147,10 +147,10 @@ config SMP ...@@ -147,10 +147,10 @@ config SMP
If you don't know what to do here, say N. If you don't know what to do here, say N.
config NR_CPUS config NR_CPUS
int "Maximum number of CPUs (2-64)" int "Maximum number of CPUs (2-1024)"
range 2 64 range 2 1024
depends on SMP depends on SMP
default "32" default "64"
source "drivers/cpufreq/Kconfig" source "drivers/cpufreq/Kconfig"
......
...@@ -523,7 +523,7 @@ tlb_fixup_done: ...@@ -523,7 +523,7 @@ tlb_fixup_done:
#else #else
mov 0, %o0 mov 0, %o0
#endif #endif
stb %o0, [%g6 + TI_CPU] sth %o0, [%g6 + TI_CPU]
/* Off we go.... */ /* Off we go.... */
call start_kernel call start_kernel
......
...@@ -400,7 +400,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c ...@@ -400,7 +400,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c
static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask) static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
{ {
u64 pstate, ver; u64 pstate, ver;
int nack_busy_id, is_jbus; int nack_busy_id, is_jbus, need_more;
if (cpus_empty(mask)) if (cpus_empty(mask))
return; return;
...@@ -416,6 +416,7 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas ...@@ -416,6 +416,7 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate)); __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
retry: retry:
need_more = 0;
__asm__ __volatile__("wrpr %0, %1, %%pstate\n\t" __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
: : "r" (pstate), "i" (PSTATE_IE)); : : "r" (pstate), "i" (PSTATE_IE));
...@@ -444,6 +445,10 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas ...@@ -444,6 +445,10 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
: /* no outputs */ : /* no outputs */
: "r" (target), "i" (ASI_INTR_W)); : "r" (target), "i" (ASI_INTR_W));
nack_busy_id++; nack_busy_id++;
if (nack_busy_id == 32) {
need_more = 1;
break;
}
} }
} }
...@@ -460,6 +465,16 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas ...@@ -460,6 +465,16 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
if (dispatch_stat == 0UL) { if (dispatch_stat == 0UL) {
__asm__ __volatile__("wrpr %0, 0x0, %%pstate" __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
: : "r" (pstate)); : : "r" (pstate));
if (unlikely(need_more)) {
int i, cnt = 0;
for_each_cpu_mask(i, mask) {
cpu_clear(i, mask);
cnt++;
if (cnt == 32)
break;
}
goto retry;
}
return; return;
} }
if (!--stuck) if (!--stuck)
...@@ -497,6 +512,8 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas ...@@ -497,6 +512,8 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
if ((dispatch_stat & check_mask) == 0) if ((dispatch_stat & check_mask) == 0)
cpu_clear(i, mask); cpu_clear(i, mask);
this_busy_nack += 2; this_busy_nack += 2;
if (this_busy_nack == 64)
break;
} }
goto retry; goto retry;
......
...@@ -191,12 +191,9 @@ inline void flush_dcache_page_impl(struct page *page) ...@@ -191,12 +191,9 @@ inline void flush_dcache_page_impl(struct page *page)
} }
#define PG_dcache_dirty PG_arch_1 #define PG_dcache_dirty PG_arch_1
#define PG_dcache_cpu_shift 24UL #define PG_dcache_cpu_shift 32UL
#define PG_dcache_cpu_mask (256UL - 1UL) #define PG_dcache_cpu_mask \
((1UL<<ilog2(roundup_pow_of_two(NR_CPUS)))-1UL)
#if NR_CPUS > 256
#error D-cache dirty tracking and thread_info->cpu need fixing for > 256 cpus
#endif
#define dcache_dirty_cpu(page) \ #define dcache_dirty_cpu(page) \
(((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask) (((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask)
...@@ -1349,6 +1346,19 @@ void __init paging_init(void) ...@@ -1349,6 +1346,19 @@ void __init paging_init(void)
unsigned long end_pfn, pages_avail, shift, phys_base; unsigned long end_pfn, pages_avail, shift, phys_base;
unsigned long real_end, i; unsigned long real_end, i;
/* These build time checkes make sure that the dcache_dirty_cpu()
* page->flags usage will work.
*
* When a page gets marked as dcache-dirty, we store the
* cpu number starting at bit 32 in the page->flags. Also,
* functions like clear_dcache_dirty_cpu use the cpu mask
* in 13-bit signed-immediate instruction fields.
*/
BUILD_BUG_ON(FLAGS_RESERVED != 32);
BUILD_BUG_ON(SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH +
ilog2(roundup_pow_of_two(NR_CPUS)) > FLAGS_RESERVED);
BUILD_BUG_ON(NR_CPUS > 4096);
kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL; kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
kern_size = (unsigned long)&_end - (unsigned long)KERNBASE; kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;
......
...@@ -202,7 +202,7 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch, ...@@ -202,7 +202,7 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
* the calculations done by the macro mid-stream. * the calculations done by the macro mid-stream.
*/ */
#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \ #define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \
ldub [THR + TI_CPU], REG1; \ lduh [THR + TI_CPU], REG1; \
sethi %hi(__per_cpu_shift), REG3; \ sethi %hi(__per_cpu_shift), REG3; \
sethi %hi(__per_cpu_base), REG2; \ sethi %hi(__per_cpu_base), REG2; \
ldx [REG3 + %lo(__per_cpu_shift)], REG3; \ ldx [REG3 + %lo(__per_cpu_shift)], REG3; \
......
...@@ -38,8 +38,8 @@ struct thread_info { ...@@ -38,8 +38,8 @@ struct thread_info {
/* D$ line 1 */ /* D$ line 1 */
struct task_struct *task; struct task_struct *task;
unsigned long flags; unsigned long flags;
__u8 cpu;
__u8 fpsaved[7]; __u8 fpsaved[7];
__u8 pad;
unsigned long ksp; unsigned long ksp;
/* D$ line 2 */ /* D$ line 2 */
...@@ -49,7 +49,7 @@ struct thread_info { ...@@ -49,7 +49,7 @@ struct thread_info {
int preempt_count; /* 0 => preemptable, <0 => BUG */ int preempt_count; /* 0 => preemptable, <0 => BUG */
__u8 new_child; __u8 new_child;
__u8 syscall_noerror; __u8 syscall_noerror;
__u16 __pad; __u16 cpu;
unsigned long *utraps; unsigned long *utraps;
...@@ -83,8 +83,7 @@ struct thread_info { ...@@ -83,8 +83,7 @@ struct thread_info {
#define TI_CURRENT_DS (TI_FLAGS + TI_FLAG_BYTE_CURRENT_DS) #define TI_CURRENT_DS (TI_FLAGS + TI_FLAG_BYTE_CURRENT_DS)
#define TI_FPDEPTH (TI_FLAGS + TI_FLAG_BYTE_FPDEPTH) #define TI_FPDEPTH (TI_FLAGS + TI_FLAG_BYTE_FPDEPTH)
#define TI_WSAVED (TI_FLAGS + TI_FLAG_BYTE_WSAVED) #define TI_WSAVED (TI_FLAGS + TI_FLAG_BYTE_WSAVED)
#define TI_CPU 0x00000010 #define TI_FPSAVED 0x00000010
#define TI_FPSAVED 0x00000011
#define TI_KSP 0x00000018 #define TI_KSP 0x00000018
#define TI_FAULT_ADDR 0x00000020 #define TI_FAULT_ADDR 0x00000020
#define TI_KREGS 0x00000028 #define TI_KREGS 0x00000028
...@@ -92,6 +91,7 @@ struct thread_info { ...@@ -92,6 +91,7 @@ struct thread_info {
#define TI_PRE_COUNT 0x00000038 #define TI_PRE_COUNT 0x00000038
#define TI_NEW_CHILD 0x0000003c #define TI_NEW_CHILD 0x0000003c
#define TI_SYS_NOERROR 0x0000003d #define TI_SYS_NOERROR 0x0000003d
#define TI_CPU 0x0000003e
#define TI_UTRAPS 0x00000040 #define TI_UTRAPS 0x00000040
#define TI_REG_WINDOW 0x00000048 #define TI_REG_WINDOW 0x00000048
#define TI_RWIN_SPTRS 0x000003c8 #define TI_RWIN_SPTRS 0x000003c8
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment