Commit ce3dc447 authored by Martin Schwidefsky's avatar Martin Schwidefsky

s390: add support for virtually mapped kernel stacks

With virtually mapped kernel stacks the kernel stack overflow detection
is now fault based, every stack has a guard page in the vmalloc space.
The panic_stack is renamed to nodat_stack and is used for all function
that need to run without DAT, e.g. memcpy_real or do_start_kdump.

The main effect is a reduction in the kernel image size as with vmap
stacks the old style overflow checking that adds two instructions per
function is not needed anymore. Result from bloat-o-meter:

add/remove: 20/1 grow/shrink: 13/26854 up/down: 2198/-216240 (-214042)

In regard to performance the micro-benchmark for fork has a hit of a
few microseconds, allocating 4 pages in vmalloc space is more expensive
compare to an order-2 page allocation. But with real workload I could
not find a noticeable difference.
Acked-by: default avatarHeiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: default avatarMartin Schwidefsky <schwidefsky@de.ibm.com>
parent ff340d24
...@@ -125,6 +125,7 @@ config S390 ...@@ -125,6 +125,7 @@ config S390
select HAVE_ARCH_SOFT_DIRTY select HAVE_ARCH_SOFT_DIRTY
select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_VMAP_STACK
select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES select HAVE_EBPF_JIT if PACK_STACK && HAVE_MARCH_Z196_FEATURES
select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL select HAVE_CMPXCHG_LOCAL
...@@ -649,6 +650,7 @@ config PACK_STACK ...@@ -649,6 +650,7 @@ config PACK_STACK
config CHECK_STACK config CHECK_STACK
def_bool y def_bool y
depends on !VMAP_STACK
prompt "Detect kernel stack overflow" prompt "Detect kernel stack overflow"
help help
This option enables the compiler option -mstack-guard and This option enables the compiler option -mstack-guard and
......
...@@ -102,9 +102,9 @@ struct lowcore { ...@@ -102,9 +102,9 @@ struct lowcore {
__u64 current_task; /* 0x0338 */ __u64 current_task; /* 0x0338 */
__u64 kernel_stack; /* 0x0340 */ __u64 kernel_stack; /* 0x0340 */
/* Interrupt, panic and restart stack. */ /* Interrupt, DAT-off and restartstack. */
__u64 async_stack; /* 0x0348 */ __u64 async_stack; /* 0x0348 */
__u64 panic_stack; /* 0x0350 */ __u64 nodat_stack; /* 0x0350 */
__u64 restart_stack; /* 0x0358 */ __u64 restart_stack; /* 0x0358 */
/* Restart function and parameter. */ /* Restart function and parameter. */
......
...@@ -162,6 +162,14 @@ struct thread_struct { ...@@ -162,6 +162,14 @@ struct thread_struct {
typedef struct thread_struct thread_struct; typedef struct thread_struct thread_struct;
/*
* General size of a stack
*/
#define STACK_ORDER 2
#define STACK_SIZE (PAGE_SIZE << STACK_ORDER)
#define STACK_INIT_OFFSET \
(STACK_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs))
/* /*
* Stack layout of a C stack frame. * Stack layout of a C stack frame.
*/ */
......
...@@ -14,10 +14,7 @@ ...@@ -14,10 +14,7 @@
* Size of kernel stack for each process * Size of kernel stack for each process
*/ */
#define THREAD_SIZE_ORDER 2 #define THREAD_SIZE_ORDER 2
#define ASYNC_ORDER 2
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define ASYNC_SIZE (PAGE_SIZE << ASYNC_ORDER)
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <asm/lowcore.h> #include <asm/lowcore.h>
......
...@@ -159,7 +159,7 @@ int main(void) ...@@ -159,7 +159,7 @@ int main(void)
OFFSET(__LC_CURRENT, lowcore, current_task); OFFSET(__LC_CURRENT, lowcore, current_task);
OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack); OFFSET(__LC_KERNEL_STACK, lowcore, kernel_stack);
OFFSET(__LC_ASYNC_STACK, lowcore, async_stack); OFFSET(__LC_ASYNC_STACK, lowcore, async_stack);
OFFSET(__LC_PANIC_STACK, lowcore, panic_stack); OFFSET(__LC_NODAT_STACK, lowcore, nodat_stack);
OFFSET(__LC_RESTART_STACK, lowcore, restart_stack); OFFSET(__LC_RESTART_STACK, lowcore, restart_stack);
OFFSET(__LC_RESTART_FN, lowcore, restart_fn); OFFSET(__LC_RESTART_FN, lowcore, restart_fn);
OFFSET(__LC_RESTART_DATA, lowcore, restart_data); OFFSET(__LC_RESTART_DATA, lowcore, restart_data);
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
ENTRY(s390_base_mcck_handler) ENTRY(s390_base_mcck_handler)
basr %r13,0 basr %r13,0
0: lg %r15,__LC_PANIC_STACK # load panic stack 0: lg %r15,__LC_NODAT_STACK # load panic stack
aghi %r15,-STACK_FRAME_OVERHEAD aghi %r15,-STACK_FRAME_OVERHEAD
larl %r1,s390_base_mcck_handler_fn larl %r1,s390_base_mcck_handler_fn
lg %r9,0(%r1) lg %r9,0(%r1)
......
...@@ -77,11 +77,11 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task, ...@@ -77,11 +77,11 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
#ifdef CONFIG_CHECK_STACK #ifdef CONFIG_CHECK_STACK
sp = __dump_trace(func, data, sp, sp = __dump_trace(func, data, sp,
S390_lowcore.panic_stack + frame_size - PAGE_SIZE, S390_lowcore.nodat_stack + frame_size - STACK_SIZE,
S390_lowcore.panic_stack + frame_size); S390_lowcore.nodat_stack + frame_size);
#endif #endif
sp = __dump_trace(func, data, sp, sp = __dump_trace(func, data, sp,
S390_lowcore.async_stack + frame_size - ASYNC_SIZE, S390_lowcore.async_stack + frame_size - STACK_SIZE,
S390_lowcore.async_stack + frame_size); S390_lowcore.async_stack + frame_size);
task = task ?: current; task = task ?: current;
__dump_trace(func, data, sp, __dump_trace(func, data, sp,
......
...@@ -85,14 +85,34 @@ _LPP_OFFSET = __LC_LPP ...@@ -85,14 +85,34 @@ _LPP_OFFSET = __LC_LPP
#endif #endif
.endm .endm
.macro CHECK_STACK stacksize,savearea .macro CHECK_STACK savearea
#ifdef CONFIG_CHECK_STACK #ifdef CONFIG_CHECK_STACK
tml %r15,\stacksize - CONFIG_STACK_GUARD tml %r15,STACK_SIZE - CONFIG_STACK_GUARD
lghi %r14,\savearea lghi %r14,\savearea
jz stack_overflow jz stack_overflow
#endif #endif
.endm .endm
.macro CHECK_VMAP_STACK savearea,oklabel
#ifdef CONFIG_VMAP_STACK
lgr %r14,%r15
nill %r14,0x10000 - STACK_SIZE
oill %r14,STACK_INIT
clg %r14,__LC_KERNEL_STACK
je \oklabel
clg %r14,__LC_ASYNC_STACK
je \oklabel
clg %r14,__LC_NODAT_STACK
je \oklabel
clg %r14,__LC_RESTART_STACK
je \oklabel
lghi %r14,\savearea
j stack_overflow
#else
j \oklabel
#endif
.endm
.macro SWITCH_ASYNC savearea,timer .macro SWITCH_ASYNC savearea,timer
tmhh %r8,0x0001 # interrupting from user ? tmhh %r8,0x0001 # interrupting from user ?
jnz 1f jnz 1f
...@@ -104,11 +124,11 @@ _LPP_OFFSET = __LC_LPP ...@@ -104,11 +124,11 @@ _LPP_OFFSET = __LC_LPP
brasl %r14,cleanup_critical brasl %r14,cleanup_critical
tmhh %r8,0x0001 # retest problem state after cleanup tmhh %r8,0x0001 # retest problem state after cleanup
jnz 1f jnz 1f
0: lg %r14,__LC_ASYNC_STACK # are we already on the async stack? 0: lg %r14,__LC_ASYNC_STACK # are we already on the target stack?
slgr %r14,%r15 slgr %r14,%r15
srag %r14,%r14,STACK_SHIFT srag %r14,%r14,STACK_SHIFT
jnz 2f jnz 2f
CHECK_STACK 1<<STACK_SHIFT,\savearea CHECK_STACK \savearea
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 3f j 3f
1: UPDATE_VTIME %r14,%r15,\timer 1: UPDATE_VTIME %r14,%r15,\timer
...@@ -600,9 +620,10 @@ ENTRY(pgm_check_handler) ...@@ -600,9 +620,10 @@ ENTRY(pgm_check_handler)
jnz 1f # -> enabled, can't be a double fault jnz 1f # -> enabled, can't be a double fault
tm __LC_PGM_ILC+3,0x80 # check for per exception tm __LC_PGM_ILC+3,0x80 # check for per exception
jnz .Lpgm_svcper # -> single stepped svc jnz .Lpgm_svcper # -> single stepped svc
1: CHECK_STACK STACK_SIZE,__LC_SAVE_AREA_SYNC 1: CHECK_STACK __LC_SAVE_AREA_SYNC
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 4f # CHECK_VMAP_STACK branches to stack_overflow or 4f
CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER 2: UPDATE_VTIME %r14,%r15,__LC_SYNC_ENTER_TIMER
BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP BPENTER __TI_flags(%r12),_TIF_ISOLATE_BP
lg %r15,__LC_KERNEL_STACK lg %r15,__LC_KERNEL_STACK
...@@ -1136,7 +1157,8 @@ ENTRY(mcck_int_handler) ...@@ -1136,7 +1157,8 @@ ENTRY(mcck_int_handler)
jnz 4f jnz 4f
TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
jno .Lmcck_panic jno .Lmcck_panic
4: SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER 4: ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
SWITCH_ASYNC __LC_GPREGS_SAVE_AREA+64,__LC_MCCK_ENTER_TIMER
.Lmcck_skip: .Lmcck_skip:
lghi %r14,__LC_GPREGS_SAVE_AREA+64 lghi %r14,__LC_GPREGS_SAVE_AREA+64
stmg %r0,%r7,__PT_R0(%r11) stmg %r0,%r7,__PT_R0(%r11)
...@@ -1163,7 +1185,6 @@ ENTRY(mcck_int_handler) ...@@ -1163,7 +1185,6 @@ ENTRY(mcck_int_handler)
xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) xc __SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1)
la %r11,STACK_FRAME_OVERHEAD(%r1) la %r11,STACK_FRAME_OVERHEAD(%r1)
lgr %r15,%r1 lgr %r15,%r1
ssm __LC_PGM_NEW_PSW # turn dat on, keep irqs off
TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING TSTMSK __LC_CPU_FLAGS,_CIF_MCCK_PENDING
jno .Lmcck_return jno .Lmcck_return
TRACE_IRQS_OFF TRACE_IRQS_OFF
...@@ -1182,7 +1203,7 @@ ENTRY(mcck_int_handler) ...@@ -1182,7 +1203,7 @@ ENTRY(mcck_int_handler)
lpswe __LC_RETURN_MCCK_PSW lpswe __LC_RETURN_MCCK_PSW
.Lmcck_panic: .Lmcck_panic:
lg %r15,__LC_PANIC_STACK lg %r15,__LC_NODAT_STACK
la %r11,STACK_FRAME_OVERHEAD(%r15) la %r11,STACK_FRAME_OVERHEAD(%r15)
j .Lmcck_skip j .Lmcck_skip
...@@ -1193,12 +1214,10 @@ ENTRY(restart_int_handler) ...@@ -1193,12 +1214,10 @@ ENTRY(restart_int_handler)
ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40 ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
stg %r15,__LC_SAVE_AREA_RESTART stg %r15,__LC_SAVE_AREA_RESTART
lg %r15,__LC_RESTART_STACK lg %r15,__LC_RESTART_STACK
aghi %r15,-__PT_SIZE # create pt_regs on stack xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
xc 0(__PT_SIZE,%r15),0(%r15) stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
stmg %r0,%r14,__PT_R0(%r15) mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
mvc __PT_R15(8,%r15),__LC_SAVE_AREA_RESTART mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW
mvc __PT_PSW(16,%r15),__LC_RST_OLD_PSW # store restart old psw
aghi %r15,-STACK_FRAME_OVERHEAD # create stack frame on stack
xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
lg %r1,__LC_RESTART_FN # load fn, parm & source cpu lg %r1,__LC_RESTART_FN # load fn, parm & source cpu
lg %r2,__LC_RESTART_DATA lg %r2,__LC_RESTART_DATA
...@@ -1216,14 +1235,14 @@ ENTRY(restart_int_handler) ...@@ -1216,14 +1235,14 @@ ENTRY(restart_int_handler)
.section .kprobes.text, "ax" .section .kprobes.text, "ax"
#ifdef CONFIG_CHECK_STACK #if defined(CONFIG_CHECK_STACK) || defined(CONFIG_VMAP_STACK)
/* /*
* The synchronous or the asynchronous stack overflowed. We are dead. * The synchronous or the asynchronous stack overflowed. We are dead.
* No need to properly save the registers, we are going to panic anyway. * No need to properly save the registers, we are going to panic anyway.
* Setup a pt_regs so that show_trace can provide a good call trace. * Setup a pt_regs so that show_trace can provide a good call trace.
*/ */
stack_overflow: stack_overflow:
lg %r15,__LC_PANIC_STACK # change to panic stack lg %r15,__LC_NODAT_STACK # change to panic stack
la %r11,STACK_FRAME_OVERHEAD(%r15) la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11) stmg %r0,%r7,__PT_R0(%r11)
stmg %r8,%r9,__PT_PSW(%r11) stmg %r8,%r9,__PT_PSW(%r11)
......
...@@ -86,4 +86,7 @@ DECLARE_PER_CPU(u64, mt_cycles[8]); ...@@ -86,4 +86,7 @@ DECLARE_PER_CPU(u64, mt_cycles[8]);
void gs_load_bc_cb(struct pt_regs *regs); void gs_load_bc_cb(struct pt_regs *regs);
void set_fs_fixup(void); void set_fs_fixup(void);
unsigned long stack_alloc(void);
void stack_free(unsigned long stack);
#endif /* _ENTRY_H */ #endif /* _ENTRY_H */
...@@ -36,9 +36,7 @@ ENTRY(startup_continue) ...@@ -36,9 +36,7 @@ ENTRY(startup_continue)
# #
larl %r14,init_task larl %r14,init_task
stg %r14,__LC_CURRENT stg %r14,__LC_CURRENT
larl %r15,init_thread_union+THREAD_SIZE larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD
stg %r15,__LC_KERNEL_STACK # set end of kernel stack
aghi %r15,-STACK_FRAME_OVERHEAD
# #
# Early setup functions that may not rely on an initialized bss section, # Early setup functions that may not rely on an initialized bss section,
# like moving the initrd. Returns with an initialized bss section. # like moving the initrd. Returns with an initialized bss section.
......
...@@ -171,7 +171,7 @@ void do_softirq_own_stack(void) ...@@ -171,7 +171,7 @@ void do_softirq_own_stack(void)
old = current_stack_pointer(); old = current_stack_pointer();
/* Check against async. stack address range. */ /* Check against async. stack address range. */
new = S390_lowcore.async_stack; new = S390_lowcore.async_stack;
if (((new - old) >> (PAGE_SHIFT + THREAD_SIZE_ORDER)) != 0) { if (((new - old) >> (PAGE_SHIFT + STACK_ORDER)) != 0) {
CALL_ON_STACK(__do_softirq, new, 0); CALL_ON_STACK(__do_softirq, new, 0);
} else { } else {
/* We are already on the async stack. */ /* We are already on the async stack. */
......
...@@ -142,18 +142,27 @@ static noinline void __machine_kdump(void *image) ...@@ -142,18 +142,27 @@ static noinline void __machine_kdump(void *image)
} }
#endif #endif
static unsigned long do_start_kdump(unsigned long addr)
{
struct kimage *image = (struct kimage *) addr;
int (*start_kdump)(int) = (void *)image->start;
int rc;
__arch_local_irq_stnsm(0xfb); /* disable DAT */
rc = start_kdump(0);
__arch_local_irq_stosm(0x04); /* enable DAT */
return rc;
}
/* /*
* Check if kdump checksums are valid: We call purgatory with parameter "0" * Check if kdump checksums are valid: We call purgatory with parameter "0"
*/ */
static bool kdump_csum_valid(struct kimage *image) static bool kdump_csum_valid(struct kimage *image)
{ {
#ifdef CONFIG_CRASH_DUMP #ifdef CONFIG_CRASH_DUMP
int (*start_kdump)(int) = (void *)image->start;
int rc; int rc;
__arch_local_irq_stnsm(0xfb); /* disable DAT */ rc = CALL_ON_STACK(do_start_kdump, S390_lowcore.nodat_stack, 1, image);
rc = start_kdump(0);
__arch_local_irq_stosm(0x04); /* enable DAT */
return rc == 0; return rc == 0;
#else #else
return false; return false;
......
...@@ -49,6 +49,7 @@ ...@@ -49,6 +49,7 @@
#include <linux/crash_dump.h> #include <linux/crash_dump.h>
#include <linux/memory.h> #include <linux/memory.h>
#include <linux/compat.h> #include <linux/compat.h>
#include <linux/start_kernel.h>
#include <asm/ipl.h> #include <asm/ipl.h>
#include <asm/facility.h> #include <asm/facility.h>
...@@ -303,6 +304,78 @@ early_param("vmalloc", parse_vmalloc); ...@@ -303,6 +304,78 @@ early_param("vmalloc", parse_vmalloc);
void *restart_stack __section(.data); void *restart_stack __section(.data);
unsigned long stack_alloc(void)
{
#ifdef CONFIG_VMAP_STACK
return (unsigned long)
__vmalloc_node_range(STACK_SIZE, STACK_SIZE,
VMALLOC_START, VMALLOC_END,
THREADINFO_GFP,
PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
#else
return __get_free_pages(GFP_KERNEL, STACK_ORDER);
#endif
}
void stack_free(unsigned long stack)
{
#ifdef CONFIG_VMAP_STACK
vfree((void *) stack);
#else
free_pages(stack, STACK_ORDER);
#endif
}
int __init arch_early_irq_init(void)
{
unsigned long stack;
stack = __get_free_pages(GFP_KERNEL, STACK_ORDER);
if (!stack)
panic("Couldn't allocate async stack");
S390_lowcore.async_stack = stack + STACK_INIT_OFFSET;
return 0;
}
static int __init async_stack_realloc(void)
{
unsigned long old, new;
old = S390_lowcore.async_stack - STACK_INIT_OFFSET;
new = stack_alloc();
if (!new)
panic("Couldn't allocate async stack");
S390_lowcore.async_stack = new + STACK_INIT_OFFSET;
free_pages(old, STACK_ORDER);
return 0;
}
early_initcall(async_stack_realloc);
void __init arch_call_rest_init(void)
{
struct stack_frame *frame;
unsigned long stack;
stack = stack_alloc();
if (!stack)
panic("Couldn't allocate kernel stack");
current->stack = (void *) stack;
#ifdef CONFIG_VMAP_STACK
current->stack_vm_area = (void *) stack;
#endif
set_task_stack_end_magic(current);
stack += STACK_INIT_OFFSET;
S390_lowcore.kernel_stack = stack;
frame = (struct stack_frame *) stack;
memset(frame, 0, sizeof(*frame));
/* Branch to rest_init on the new stack, never returns */
asm volatile(
" la 15,0(%[_frame])\n"
" jg rest_init\n"
: : [_frame] "a" (frame));
}
static void __init setup_lowcore(void) static void __init setup_lowcore(void)
{ {
struct lowcore *lc; struct lowcore *lc;
...@@ -329,14 +402,8 @@ static void __init setup_lowcore(void) ...@@ -329,14 +402,8 @@ static void __init setup_lowcore(void)
PSW_MASK_DAT | PSW_MASK_MCHECK; PSW_MASK_DAT | PSW_MASK_MCHECK;
lc->io_new_psw.addr = (unsigned long) io_int_handler; lc->io_new_psw.addr = (unsigned long) io_int_handler;
lc->clock_comparator = clock_comparator_max; lc->clock_comparator = clock_comparator_max;
lc->kernel_stack = ((unsigned long) &init_thread_union) lc->nodat_stack = ((unsigned long) &init_thread_union)
+ THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->async_stack = (unsigned long)
memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE)
+ ASYNC_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->panic_stack = (unsigned long)
memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE)
+ PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
lc->current_task = (unsigned long)&init_task; lc->current_task = (unsigned long)&init_task;
lc->lpp = LPP_MAGIC; lc->lpp = LPP_MAGIC;
lc->machine_flags = S390_lowcore.machine_flags; lc->machine_flags = S390_lowcore.machine_flags;
...@@ -357,8 +424,12 @@ static void __init setup_lowcore(void) ...@@ -357,8 +424,12 @@ static void __init setup_lowcore(void)
lc->last_update_timer = S390_lowcore.last_update_timer; lc->last_update_timer = S390_lowcore.last_update_timer;
lc->last_update_clock = S390_lowcore.last_update_clock; lc->last_update_clock = S390_lowcore.last_update_clock;
restart_stack = memblock_virt_alloc(ASYNC_SIZE, ASYNC_SIZE); /*
restart_stack += ASYNC_SIZE; * Allocate the global restart stack which is the same for
* all CPUs in cast *one* of them does a PSW restart.
*/
restart_stack = memblock_virt_alloc(STACK_SIZE, STACK_SIZE);
restart_stack += STACK_INIT_OFFSET;
/* /*
* Set up PSW restart to call ipl.c:do_restart(). Copy the relevant * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
......
...@@ -186,36 +186,34 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit) ...@@ -186,36 +186,34 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
pcpu_sigp_retry(pcpu, order, 0); pcpu_sigp_retry(pcpu, order, 0);
} }
#define ASYNC_FRAME_OFFSET (ASYNC_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
#define PANIC_FRAME_OFFSET (PAGE_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE)
static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
{ {
unsigned long async_stack, panic_stack; unsigned long async_stack, nodat_stack;
struct lowcore *lc; struct lowcore *lc;
if (pcpu != &pcpu_devices[0]) { if (pcpu != &pcpu_devices[0]) {
pcpu->lowcore = (struct lowcore *) pcpu->lowcore = (struct lowcore *)
__get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER); __get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); nodat_stack = __get_free_pages(GFP_KERNEL, STACK_ORDER);
panic_stack = __get_free_page(GFP_KERNEL); if (!pcpu->lowcore || !nodat_stack)
if (!pcpu->lowcore || !panic_stack || !async_stack)
goto out; goto out;
} else { } else {
async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET; nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET;
panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET;
} }
async_stack = stack_alloc();
if (!async_stack)
goto out;
lc = pcpu->lowcore; lc = pcpu->lowcore;
memcpy(lc, &S390_lowcore, 512); memcpy(lc, &S390_lowcore, 512);
memset((char *) lc + 512, 0, sizeof(*lc) - 512); memset((char *) lc + 512, 0, sizeof(*lc) - 512);
lc->async_stack = async_stack + ASYNC_FRAME_OFFSET; lc->async_stack = async_stack + STACK_INIT_OFFSET;
lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET; lc->nodat_stack = nodat_stack + STACK_INIT_OFFSET;
lc->cpu_nr = cpu; lc->cpu_nr = cpu;
lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_lockval = arch_spin_lockval(cpu);
lc->spinlock_index = 0; lc->spinlock_index = 0;
lc->br_r1_trampoline = 0x07f1; /* br %r1 */ lc->br_r1_trampoline = 0x07f1; /* br %r1 */
if (nmi_alloc_per_cpu(lc)) if (nmi_alloc_per_cpu(lc))
goto out; goto out_async;
if (vdso_alloc_per_cpu(lc)) if (vdso_alloc_per_cpu(lc))
goto out_mcesa; goto out_mcesa;
lowcore_ptr[cpu] = lc; lowcore_ptr[cpu] = lc;
...@@ -224,10 +222,11 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) ...@@ -224,10 +222,11 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
out_mcesa: out_mcesa:
nmi_free_per_cpu(lc); nmi_free_per_cpu(lc);
out_async:
stack_free(async_stack);
out: out:
if (pcpu != &pcpu_devices[0]) { if (pcpu != &pcpu_devices[0]) {
free_page(panic_stack); free_pages(nodat_stack, STACK_ORDER);
free_pages(async_stack, ASYNC_ORDER);
free_pages((unsigned long) pcpu->lowcore, LC_ORDER); free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
} }
return -ENOMEM; return -ENOMEM;
...@@ -237,15 +236,21 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) ...@@ -237,15 +236,21 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
static void pcpu_free_lowcore(struct pcpu *pcpu) static void pcpu_free_lowcore(struct pcpu *pcpu)
{ {
unsigned long async_stack, nodat_stack, lowcore;
nodat_stack = pcpu->lowcore->nodat_stack - STACK_INIT_OFFSET;
async_stack = pcpu->lowcore->async_stack - STACK_INIT_OFFSET;
lowcore = (unsigned long) pcpu->lowcore;
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0); pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
lowcore_ptr[pcpu - pcpu_devices] = NULL; lowcore_ptr[pcpu - pcpu_devices] = NULL;
vdso_free_per_cpu(pcpu->lowcore); vdso_free_per_cpu(pcpu->lowcore);
nmi_free_per_cpu(pcpu->lowcore); nmi_free_per_cpu(pcpu->lowcore);
stack_free(async_stack);
if (pcpu == &pcpu_devices[0]) if (pcpu == &pcpu_devices[0])
return; return;
free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET); free_pages(nodat_stack, STACK_ORDER);
free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER); free_pages(lowcore, LC_ORDER);
free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
} }
#endif /* CONFIG_HOTPLUG_CPU */ #endif /* CONFIG_HOTPLUG_CPU */
...@@ -293,7 +298,7 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) ...@@ -293,7 +298,7 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
{ {
struct lowcore *lc = pcpu->lowcore; struct lowcore *lc = pcpu->lowcore;
lc->restart_stack = lc->kernel_stack; lc->restart_stack = lc->nodat_stack;
lc->restart_fn = (unsigned long) func; lc->restart_fn = (unsigned long) func;
lc->restart_data = (unsigned long) data; lc->restart_data = (unsigned long) data;
lc->restart_source = -1UL; lc->restart_source = -1UL;
...@@ -303,15 +308,20 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) ...@@ -303,15 +308,20 @@ static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
/* /*
* Call function via PSW restart on pcpu and stop the current cpu. * Call function via PSW restart on pcpu and stop the current cpu.
*/ */
static void __pcpu_delegate(void (*func)(void*), void *data)
{
func(data); /* should not return */
}
static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *), static void pcpu_delegate(struct pcpu *pcpu, void (*func)(void *),
void *data, unsigned long stack) void *data, unsigned long stack)
{ {
struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices]; struct lowcore *lc = lowcore_ptr[pcpu - pcpu_devices];
unsigned long source_cpu = stap(); unsigned long source_cpu = stap();
__load_psw_mask(PSW_KERNEL_BITS); __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
if (pcpu->address == source_cpu) if (pcpu->address == source_cpu)
func(data); /* should not return */ CALL_ON_STACK(__pcpu_delegate, stack, 2, func, data);
/* Stop target cpu (if func returns this stops the current cpu). */ /* Stop target cpu (if func returns this stops the current cpu). */
pcpu_sigp_retry(pcpu, SIGP_STOP, 0); pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
/* Restart func on the target cpu and stop the current cpu. */ /* Restart func on the target cpu and stop the current cpu. */
...@@ -372,8 +382,7 @@ void smp_call_online_cpu(void (*func)(void *), void *data) ...@@ -372,8 +382,7 @@ void smp_call_online_cpu(void (*func)(void *), void *data)
void smp_call_ipl_cpu(void (*func)(void *), void *data) void smp_call_ipl_cpu(void (*func)(void *), void *data)
{ {
pcpu_delegate(&pcpu_devices[0], func, data, pcpu_delegate(&pcpu_devices[0], func, data,
pcpu_devices->lowcore->panic_stack - pcpu_devices->lowcore->nodat_stack);
PANIC_FRAME_OFFSET + PAGE_SIZE);
} }
int smp_find_processor_id(u16 address) int smp_find_processor_id(u16 address)
...@@ -791,37 +800,42 @@ void __init smp_detect_cpus(void) ...@@ -791,37 +800,42 @@ void __init smp_detect_cpus(void)
memblock_free_early((unsigned long)info, sizeof(*info)); memblock_free_early((unsigned long)info, sizeof(*info));
} }
/* static void smp_init_secondary(void)
* Activate a secondary processor.
*/
static void smp_start_secondary(void *cpuvoid)
{ {
int cpu = smp_processor_id(); int cpu = smp_processor_id();
S390_lowcore.last_update_clock = get_tod_clock();
S390_lowcore.restart_stack = (unsigned long) restart_stack;
S390_lowcore.restart_fn = (unsigned long) do_restart;
S390_lowcore.restart_data = 0;
S390_lowcore.restart_source = -1UL;
restore_access_regs(S390_lowcore.access_regs_save_area);
__ctl_load(S390_lowcore.cregs_save_area, 0, 15);
__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
cpu_init(); cpu_init();
preempt_disable(); preempt_disable();
init_cpu_timer(); init_cpu_timer();
vtime_init(); vtime_init();
pfault_init(); pfault_init();
notify_cpu_starting(cpu); notify_cpu_starting(smp_processor_id());
if (topology_cpu_dedicated(cpu)) if (topology_cpu_dedicated(cpu))
set_cpu_flag(CIF_DEDICATED_CPU); set_cpu_flag(CIF_DEDICATED_CPU);
else else
clear_cpu_flag(CIF_DEDICATED_CPU); clear_cpu_flag(CIF_DEDICATED_CPU);
set_cpu_online(cpu, true); set_cpu_online(smp_processor_id(), true);
inc_irq_stat(CPU_RST); inc_irq_stat(CPU_RST);
local_irq_enable(); local_irq_enable();
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
} }
/*
* Activate a secondary processor.
*/
static void smp_start_secondary(void *cpuvoid)
{
S390_lowcore.last_update_clock = get_tod_clock();
S390_lowcore.restart_stack = (unsigned long) restart_stack;
S390_lowcore.restart_fn = (unsigned long) do_restart;
S390_lowcore.restart_data = 0;
S390_lowcore.restart_source = -1UL;
restore_access_regs(S390_lowcore.access_regs_save_area);
__ctl_load(S390_lowcore.cregs_save_area, 0, 15);
__load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
CALL_ON_STACK(smp_init_secondary, S390_lowcore.kernel_stack, 0);
}
/* Upping and downing of CPUs */ /* Upping and downing of CPUs */
int __cpu_up(unsigned int cpu, struct task_struct *tidle) int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{ {
......
...@@ -29,10 +29,11 @@ ...@@ -29,10 +29,11 @@
.section .text .section .text
ENTRY(swsusp_arch_suspend) ENTRY(swsusp_arch_suspend)
stmg %r6,%r15,__SF_GPRS(%r15) lg %r1,__LC_NODAT_STACK
aghi %r1,-STACK_FRAME_OVERHEAD
stmg %r6,%r15,__SF_GPRS(%r1)
stg %r15,__SF_BACKCHAIN(%r1)
lgr %r1,%r15 lgr %r1,%r15
aghi %r15,-STACK_FRAME_OVERHEAD
stg %r1,__SF_BACKCHAIN(%r15)
/* Store FPU registers */ /* Store FPU registers */
brasl %r14,save_fpu_regs brasl %r14,save_fpu_regs
......
...@@ -89,10 +89,8 @@ static int __memcpy_real(void *dest, void *src, size_t count) ...@@ -89,10 +89,8 @@ static int __memcpy_real(void *dest, void *src, size_t count)
return rc; return rc;
} }
/* static unsigned long _memcpy_real(unsigned long dest, unsigned long src,
* Copy memory in real mode (kernel to kernel) unsigned long count)
*/
int memcpy_real(void *dest, void *src, size_t count)
{ {
int irqs_disabled, rc; int irqs_disabled, rc;
unsigned long flags; unsigned long flags;
...@@ -103,13 +101,30 @@ int memcpy_real(void *dest, void *src, size_t count) ...@@ -103,13 +101,30 @@ int memcpy_real(void *dest, void *src, size_t count)
irqs_disabled = arch_irqs_disabled_flags(flags); irqs_disabled = arch_irqs_disabled_flags(flags);
if (!irqs_disabled) if (!irqs_disabled)
trace_hardirqs_off(); trace_hardirqs_off();
rc = __memcpy_real(dest, src, count); rc = __memcpy_real((void *) dest, (void *) src, (size_t) count);
if (!irqs_disabled) if (!irqs_disabled)
trace_hardirqs_on(); trace_hardirqs_on();
__arch_local_irq_ssm(flags); __arch_local_irq_ssm(flags);
return rc; return rc;
} }
/*
* Copy memory in real mode (kernel to kernel)
*/
int memcpy_real(void *dest, void *src, size_t count)
{
if (S390_lowcore.nodat_stack != 0)
return CALL_ON_STACK(_memcpy_real, S390_lowcore.nodat_stack,
3, dest, src, count);
/*
* This is a really early memcpy_real call, the stacks are
* not set up yet. Just call _memcpy_real on the early boot
* stack
*/
return _memcpy_real((unsigned long) dest,(unsigned long) src,
(unsigned long) count);
}
/* /*
* Copy memory in absolute mode (kernel to kernel) * Copy memory in absolute mode (kernel to kernel)
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment