Commit da822b83 authored by Martin Schwidefsky's avatar Martin Schwidefsky Committed by Linus Torvalds

[PATCH] s390: micro optimizations.

 - Put cpu number to lowcore.
 - Put percpu_offset to lowcore.
 - Put current pointer to lowcore.
 - Replace barrier() with cpu_relax().
parent 5df799ac
...@@ -141,7 +141,8 @@ __switch_to_noper: ...@@ -141,7 +141,8 @@ __switch_to_noper:
stam %a4,%a4,__THREAD_ar4(%r2) # store kernel access reg. 4 stam %a4,%a4,__THREAD_ar4(%r2) # store kernel access reg. 4
lam %a2,%a2,__THREAD_ar2(%r3) # load kernel access reg. 2 lam %a2,%a2,__THREAD_ar2(%r3) # load kernel access reg. 2
lam %a4,%a4,__THREAD_ar4(%r3) # load kernel access reg. 4 lam %a4,%a4,__THREAD_ar4(%r3) # load kernel access reg. 4
lm %r6,%r15,24(%r15) # load resume registers of next task lm %r6,%r15,24(%r15) # load __switch_to registers of next task
st %r3,__LC_CURRENT # __LC_CURRENT = current task struct
l %r3,__THREAD_info(%r3) # load thread_info from task struct l %r3,__THREAD_info(%r3) # load thread_info from task struct
ahi %r3,8192 ahi %r3,8192
st %r3,__LC_KERNEL_STACK # __LC_KERNEL_STACK = new kernel stack st %r3,__LC_KERNEL_STACK # __LC_KERNEL_STACK = new kernel stack
......
...@@ -127,7 +127,8 @@ __switch_to_noper: ...@@ -127,7 +127,8 @@ __switch_to_noper:
stam %a4,%a4,__THREAD_ar4(%r2) # store kernel access reg. 4 stam %a4,%a4,__THREAD_ar4(%r2) # store kernel access reg. 4
lam %a2,%a2,__THREAD_ar2(%r3) # load kernel access reg. 2 lam %a2,%a2,__THREAD_ar2(%r3) # load kernel access reg. 2
lam %a4,%a4,__THREAD_ar4(%r3) # load kernel access reg. 4 lam %a4,%a4,__THREAD_ar4(%r3) # load kernel access reg. 4
lmg %r6,%r15,48(%r15) # load resume registers of next task lmg %r6,%r15,48(%r15) # load __switch_to registers of next task
stg %r3,__LC_CURRENT # __LC_CURRENT = current task struct
lg %r3,__THREAD_info(%r3) # load thread_info from task struct lg %r3,__THREAD_info(%r3) # load thread_info from task struct
aghi %r3,16384 aghi %r3,16384
stg %r3,__LC_KERNEL_STACK # __LC_KERNEL_STACK = new kernel stack stg %r3,__LC_KERNEL_STACK # __LC_KERNEL_STACK = new kernel stack
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <linux/config.h> #include <linux/config.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/lowcore.h> #include <asm/lowcore.h>
#include <asm/offsets.h>
#ifndef CONFIG_IPL #ifndef CONFIG_IPL
.org 0 .org 0
...@@ -633,6 +634,7 @@ _stext: basr %r13,0 # get base ...@@ -633,6 +634,7 @@ _stext: basr %r13,0 # get base
# Setup stack # Setup stack
# #
l %r15,.Linittu-.LPG2(%r13) l %r15,.Linittu-.LPG2(%r13)
mvc __LC_CURRENT(4),__TI_task(%r15)
ahi %r15,8192 # init_task_union + 8192 ahi %r15,8192 # init_task_union + 8192
st %r15,__LC_KERNEL_STACK # set end of kernel stack st %r15,__LC_KERNEL_STACK # set end of kernel stack
ahi %r15,-96 ahi %r15,-96
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include <linux/config.h> #include <linux/config.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/lowcore.h> #include <asm/lowcore.h>
#include <asm/offsets.h>
#ifndef CONFIG_IPL #ifndef CONFIG_IPL
.org 0 .org 0
...@@ -642,6 +643,8 @@ _stext: basr %r13,0 # get base ...@@ -642,6 +643,8 @@ _stext: basr %r13,0 # get base
# Setup stack # Setup stack
# #
larl %r15,init_thread_union larl %r15,init_thread_union
lg %r14,__TI_task(%r15) # cache current in lowcore
stg %r14,__LC_CURRENT
aghi %r15,16384 # init_task_union + 16384 aghi %r15,16384 # init_task_union + 16384
stg %r15,__LC_KERNEL_STACK # set end of kernel stack stg %r15,__LC_KERNEL_STACK # set end of kernel stack
aghi %r15,-160 aghi %r15,-160
......
...@@ -97,7 +97,6 @@ void __devinit cpu_init (void) ...@@ -97,7 +97,6 @@ void __devinit cpu_init (void)
*/ */
asm volatile ("stidp %0": "=m" (S390_lowcore.cpu_data.cpu_id)); asm volatile ("stidp %0": "=m" (S390_lowcore.cpu_data.cpu_id));
S390_lowcore.cpu_data.cpu_addr = addr; S390_lowcore.cpu_data.cpu_addr = addr;
S390_lowcore.cpu_data.cpu_nr = nr;
/* /*
* Force FPU initialization: * Force FPU initialization:
...@@ -418,7 +417,7 @@ void __init setup_arch(char **cmdline_p) ...@@ -418,7 +417,7 @@ void __init setup_arch(char **cmdline_p)
* we are rounding upwards: * we are rounding upwards:
*/ */
start_pfn = (__pa(&_end) + PAGE_SIZE - 1) >> PAGE_SHIFT; start_pfn = (__pa(&_end) + PAGE_SIZE - 1) >> PAGE_SHIFT;
end_pfn = memory_end >> PAGE_SHIFT; end_pfn = max_pfn = memory_end >> PAGE_SHIFT;
/* /*
* Initialize the boot-time allocator (with low memory only): * Initialize the boot-time allocator (with low memory only):
...@@ -497,21 +496,17 @@ void __init setup_arch(char **cmdline_p) ...@@ -497,21 +496,17 @@ void __init setup_arch(char **cmdline_p)
lc->io_new_psw.addr = PSW_ADDR_AMODE + (unsigned long) io_int_handler; lc->io_new_psw.addr = PSW_ADDR_AMODE + (unsigned long) io_int_handler;
lc->ipl_device = S390_lowcore.ipl_device; lc->ipl_device = S390_lowcore.ipl_device;
lc->jiffy_timer = -1LL; lc->jiffy_timer = -1LL;
#ifndef CONFIG_ARCH_S390X lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
lc->kernel_stack = ((__u32) &init_thread_union) + 8192; lc->async_stack = (unsigned long)
lc->async_stack = (__u32) __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
__alloc_bootmem(2*PAGE_SIZE, 2*PAGE_SIZE, 0) + 8192; lc->current_task = (unsigned long) init_thread_union.thread_info.task;
set_prefix((__u32) lc); #ifdef CONFIG_ARCH_S390X
#else /* CONFIG_ARCH_S390X */
lc->kernel_stack = ((__u64) &init_thread_union) + 16384;
lc->async_stack = (__u64)
__alloc_bootmem(4*PAGE_SIZE, 4*PAGE_SIZE, 0) + 16384;
if (MACHINE_HAS_DIAG44) if (MACHINE_HAS_DIAG44)
lc->diag44_opcode = 0x83000044; lc->diag44_opcode = 0x83000044;
else else
lc->diag44_opcode = 0x07000700; lc->diag44_opcode = 0x07000700;
set_prefix((__u32)(__u64) lc);
#endif /* CONFIG_ARCH_S390X */ #endif /* CONFIG_ARCH_S390X */
set_prefix((u32)(unsigned long) lc);
cpu_init(); cpu_init();
__cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr; __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr;
......
...@@ -138,11 +138,11 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, ...@@ -138,11 +138,11 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
/* Wait for response */ /* Wait for response */
while (atomic_read(&data.started) != cpus) while (atomic_read(&data.started) != cpus)
barrier(); cpu_relax();
if (wait) if (wait)
while (atomic_read(&data.finished) != cpus) while (atomic_read(&data.finished) != cpus)
barrier(); cpu_relax();
spin_unlock(&call_lock); spin_unlock(&call_lock);
return 0; return 0;
...@@ -207,7 +207,8 @@ static void do_machine_restart(void * __unused) ...@@ -207,7 +207,8 @@ static void do_machine_restart(void * __unused)
cpu_clear(smp_processor_id(), cpu_restart_map); cpu_clear(smp_processor_id(), cpu_restart_map);
if (smp_processor_id() == 0) { if (smp_processor_id() == 0) {
/* Wait for all other cpus to enter do_machine_restart. */ /* Wait for all other cpus to enter do_machine_restart. */
while (!cpus_empty(cpu_restart_map)); while (!cpus_empty(cpu_restart_map))
cpu_relax();
/* Store status of other cpus. */ /* Store status of other cpus. */
do_store_status(); do_store_status();
/* /*
...@@ -524,6 +525,9 @@ int __cpu_up(unsigned int cpu) ...@@ -524,6 +525,9 @@ int __cpu_up(unsigned int cpu)
__asm__ __volatile__("stam 0,15,0(%0)" __asm__ __volatile__("stam 0,15,0(%0)"
: : "a" (&cpu_lowcore->access_regs_save_area) : : "a" (&cpu_lowcore->access_regs_save_area)
: "memory"); : "memory");
cpu_lowcore->percpu_offset = __per_cpu_offset[cpu];
cpu_lowcore->current_task = (unsigned long) idle;
cpu_lowcore->cpu_data.cpu_nr = cpu;
eieio(); eieio();
signal_processor(cpu,sigp_restart); signal_processor(cpu,sigp_restart);
...@@ -570,6 +574,7 @@ void __devinit smp_prepare_boot_cpu(void) ...@@ -570,6 +574,7 @@ void __devinit smp_prepare_boot_cpu(void)
{ {
cpu_set(smp_processor_id(), cpu_online_map); cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), cpu_possible_map); cpu_set(smp_processor_id(), cpu_possible_map);
S390_lowcore.percpu_offset = __per_cpu_offset[smp_processor_id()];
} }
void smp_cpus_done(unsigned int max_cpus) void smp_cpus_done(unsigned int max_cpus)
......
...@@ -34,7 +34,8 @@ void __delay(unsigned long loops) ...@@ -34,7 +34,8 @@ void __delay(unsigned long loops)
} }
/* /*
* Waits for 'usecs' microseconds using the tod clock * Waits for 'usecs' microseconds using the tod clock, giving up the time slice
* of the virtual PU inbetween to avoid congestion.
*/ */
void __udelay(unsigned long usecs) void __udelay(unsigned long usecs)
{ {
...@@ -44,7 +45,7 @@ void __udelay(unsigned long usecs) ...@@ -44,7 +45,7 @@ void __udelay(unsigned long usecs)
return; return;
asm volatile ("STCK %0" : "=m" (start_cc)); asm volatile ("STCK %0" : "=m" (start_cc));
do { do {
cpu_relax();
asm volatile ("STCK %0" : "=m" (end_cc)); asm volatile ("STCK %0" : "=m" (end_cc));
} while (((end_cc - start_cc)/4096) < usecs); } while (((end_cc - start_cc)/4096) < usecs);
} }
...@@ -488,7 +488,7 @@ typedef struct { ...@@ -488,7 +488,7 @@ typedef struct {
int pfault_init(void) int pfault_init(void)
{ {
pfault_refbk_t refbk = pfault_refbk_t refbk =
{ 0x258, 0, 5, 2, __LC_KERNEL_STACK, 1ULL << 48, 1ULL << 48, { 0x258, 0, 5, 2, __LC_CURRENT, 1ULL << 48, 1ULL << 48,
__PF_RES_FIELD }; __PF_RES_FIELD };
int rc; int rc;
...@@ -555,8 +555,7 @@ pfault_interrupt(struct pt_regs *regs, __u16 error_code) ...@@ -555,8 +555,7 @@ pfault_interrupt(struct pt_regs *regs, __u16 error_code)
/* /*
* Get the token (= address of kernel stack of affected task). * Get the token (= address of kernel stack of affected task).
*/ */
tsk = (struct task_struct *) tsk = *(struct task_struct **) __LC_PFAULT_INTPARM;
(*((unsigned long *) __LC_PFAULT_INTPARM) - THREAD_SIZE);
/* /*
* We got all needed information from the lowcore and can * We got all needed information from the lowcore and can
......
...@@ -12,17 +12,11 @@ ...@@ -12,17 +12,11 @@
#define _S390_CURRENT_H #define _S390_CURRENT_H
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <asm/lowcore.h>
#include <linux/thread_info.h>
struct task_struct; struct task_struct;
static inline struct task_struct * get_current(void) #define current ((struct task_struct *const)S390_lowcore.current_task)
{
return current_thread_info()->task;
}
#define current get_current()
#endif #endif
......
...@@ -18,14 +18,17 @@ ...@@ -18,14 +18,17 @@
#include <linux/cache.h> #include <linux/cache.h>
#include <asm/lowcore.h> #include <asm/lowcore.h>
/* entry.S is sensitive to the offsets of these fields */ /* irq_cpustat_t is unused currently, but could be converted
* into a percpu variable instead of storing softirq_pending
* on the lowcore */
typedef struct { typedef struct {
unsigned int __softirq_pending; unsigned int __softirq_pending;
unsigned int __syscall_count; } irq_cpustat_t;
struct task_struct * __ksoftirqd_task; /* waitqueue is too large */
} ____cacheline_aligned irq_cpustat_t;
#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ #define softirq_pending(cpu) (lowcore_ptr[(cpu)]->softirq_pending)
#define local_softirq_pending() (S390_lowcore.softirq_pending)
#define __ARCH_IRQ_STAT
/* /*
* We put the hardirq and softirq counter into the preemption * We put the hardirq and softirq counter into the preemption
...@@ -76,7 +79,12 @@ typedef struct { ...@@ -76,7 +79,12 @@ typedef struct {
#define hardirq_trylock() (!in_interrupt()) #define hardirq_trylock() (!in_interrupt())
#define hardirq_endlock() do { } while (0) #define hardirq_endlock() do { } while (0)
#define irq_enter() (preempt_count() += HARDIRQ_OFFSET) #define irq_enter() \
do { \
BUG_ON( hardirq_count() ); \
(preempt_count() += HARDIRQ_OFFSET); \
} while(0)
extern void do_call_softirq(void); extern void do_call_softirq(void);
...@@ -93,16 +101,10 @@ extern void do_call_softirq(void); ...@@ -93,16 +101,10 @@ extern void do_call_softirq(void);
#define irq_exit() \ #define irq_exit() \
do { \ do { \
preempt_count() -= IRQ_EXIT_OFFSET; \ preempt_count() -= IRQ_EXIT_OFFSET; \
if (!in_interrupt() && softirq_pending(smp_processor_id())) \ if (!in_interrupt() && local_softirq_pending()) \
/* Use the async. stack for softirq */ \ /* Use the async. stack for softirq */ \
do_call_softirq(); \ do_call_softirq(); \
preempt_enable_no_resched(); \ preempt_enable_no_resched(); \
} while (0) } while (0)
#ifndef CONFIG_SMP
# define synchronize_irq(irq) barrier()
#else
extern void synchronize_irq(unsigned int irq);
#endif /* CONFIG_SMP */
#endif /* __ASM_HARDIRQ_H */ #endif /* __ASM_HARDIRQ_H */
...@@ -65,6 +65,7 @@ ...@@ -65,6 +65,7 @@
#define __LC_CPUADDR 0xC68 #define __LC_CPUADDR 0xC68
#define __LC_IPLDEV 0xC7C #define __LC_IPLDEV 0xC7C
#define __LC_JIFFY_TIMER 0xC80 #define __LC_JIFFY_TIMER 0xC80
#define __LC_CURRENT 0xC90
#else /* __s390x__ */ #else /* __s390x__ */
#define __LC_KERNEL_STACK 0xD40 #define __LC_KERNEL_STACK 0xD40
#define __LC_ASYNC_STACK 0xD48 #define __LC_ASYNC_STACK 0xD48
...@@ -72,6 +73,7 @@ ...@@ -72,6 +73,7 @@
#define __LC_CPUADDR 0xD98 #define __LC_CPUADDR 0xD98
#define __LC_IPLDEV 0xDB8 #define __LC_IPLDEV 0xDB8
#define __LC_JIFFY_TIMER 0xDC0 #define __LC_JIFFY_TIMER 0xDC0
#define __LC_CURRENT 0xDD8
#endif /* __s390x__ */ #endif /* __s390x__ */
#define __LC_PANIC_MAGIC 0xE00 #define __LC_PANIC_MAGIC 0xE00
...@@ -169,7 +171,10 @@ struct _lowcore ...@@ -169,7 +171,10 @@ struct _lowcore
/* SMP info area: defined by DJB */ /* SMP info area: defined by DJB */
__u64 jiffy_timer; /* 0xc80 */ __u64 jiffy_timer; /* 0xc80 */
__u32 ext_call_fast; /* 0xc88 */ __u32 ext_call_fast; /* 0xc88 */
__u8 pad11[0xe00-0xc8c]; /* 0xc8c */ __u32 percpu_offset; /* 0xc8c */
__u32 current_task; /* 0xc90 */
__u32 softirq_pending; /* 0xc94 */
__u8 pad11[0xe00-0xc98]; /* 0xc98 */
/* 0xe00 is used as indicator for dump tools */ /* 0xe00 is used as indicator for dump tools */
/* whether the kernel died with panic() or not */ /* whether the kernel died with panic() or not */
...@@ -244,7 +249,10 @@ struct _lowcore ...@@ -244,7 +249,10 @@ struct _lowcore
/* SMP info area: defined by DJB */ /* SMP info area: defined by DJB */
__u64 jiffy_timer; /* 0xdc0 */ __u64 jiffy_timer; /* 0xdc0 */
__u64 ext_call_fast; /* 0xdc8 */ __u64 ext_call_fast; /* 0xdc8 */
__u8 pad12[0xe00-0xdd0]; /* 0xdd0 */ __u64 percpu_offset; /* 0xdd0 */
__u64 current_task; /* 0xdd8 */
__u64 softirq_pending; /* 0xde0 */
__u8 pad12[0xe00-0xde8]; /* 0xde8 */
/* 0xe00 is used as indicator for dump tools */ /* 0xe00 is used as indicator for dump tools */
/* whether the kernel died with panic() or not */ /* whether the kernel died with panic() or not */
......
...@@ -2,5 +2,13 @@ ...@@ -2,5 +2,13 @@
#define __ARCH_S390_PERCPU__ #define __ARCH_S390_PERCPU__
#include <asm-generic/percpu.h> #include <asm-generic/percpu.h>
#include <asm/lowcore.h>
/*
* s390 uses the generic implementation for per cpu data, with the exception that
* the offset of the cpu local data area is cached in the cpu's lowcore memory
*/
#undef __get_cpu_var
#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, S390_lowcore.percpu_offset))
#endif /* __ARCH_S390_PERCPU__ */ #endif /* __ARCH_S390_PERCPU__ */
...@@ -46,7 +46,7 @@ extern cpumask_t cpu_possible_map; ...@@ -46,7 +46,7 @@ extern cpumask_t cpu_possible_map;
#define PROC_CHANGE_PENALTY 20 /* Schedule penalty */ #define PROC_CHANGE_PENALTY 20 /* Schedule penalty */
#define smp_processor_id() (current_thread_info()->cpu) #define smp_processor_id() (S390_lowcore.cpu_data.cpu_nr)
#define cpu_online(cpu) cpu_isset(cpu, cpu_online_map) #define cpu_online(cpu) cpu_isset(cpu, cpu_online_map)
#define cpu_possible(cpu) cpu_isset(cpu, cpu_possible_map) #define cpu_possible(cpu) cpu_isset(cpu, cpu_possible_map)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment