Commit da822b83 authored by Martin Schwidefsky's avatar Martin Schwidefsky Committed by Linus Torvalds

[PATCH] s390: micro optimizations.

 - Put cpu number to lowcore.
 - Put percpu_offset to lowcore.
 - Put current pointer to lowcore.
 - Replace barrier() with cpu_relax().
parent 5df799ac
......@@ -141,7 +141,8 @@ __switch_to_noper:
stam %a4,%a4,__THREAD_ar4(%r2) # store kernel access reg. 4
lam %a2,%a2,__THREAD_ar2(%r3) # load kernel access reg. 2
lam %a4,%a4,__THREAD_ar4(%r3) # load kernel access reg. 4
lm %r6,%r15,24(%r15) # load resume registers of next task
lm %r6,%r15,24(%r15) # load __switch_to registers of next task
st %r3,__LC_CURRENT # __LC_CURRENT = current task struct
l %r3,__THREAD_info(%r3) # load thread_info from task struct
ahi %r3,8192
st %r3,__LC_KERNEL_STACK # __LC_KERNEL_STACK = new kernel stack
......
......@@ -127,7 +127,8 @@ __switch_to_noper:
stam %a4,%a4,__THREAD_ar4(%r2) # store kernel access reg. 4
lam %a2,%a2,__THREAD_ar2(%r3) # load kernel access reg. 2
lam %a4,%a4,__THREAD_ar4(%r3) # load kernel access reg. 4
lmg %r6,%r15,48(%r15) # load resume registers of next task
lmg %r6,%r15,48(%r15) # load __switch_to registers of next task
stg %r3,__LC_CURRENT # __LC_CURRENT = current task struct
lg %r3,__THREAD_info(%r3) # load thread_info from task struct
aghi %r3,16384
stg %r3,__LC_KERNEL_STACK # __LC_KERNEL_STACK = new kernel stack
......
......@@ -30,6 +30,7 @@
#include <linux/config.h>
#include <asm/setup.h>
#include <asm/lowcore.h>
#include <asm/offsets.h>
#ifndef CONFIG_IPL
.org 0
......@@ -633,6 +634,7 @@ _stext: basr %r13,0 # get base
# Setup stack
#
l %r15,.Linittu-.LPG2(%r13)
mvc __LC_CURRENT(4),__TI_task(%r15)
ahi %r15,8192 # init_task_union + 8192
st %r15,__LC_KERNEL_STACK # set end of kernel stack
ahi %r15,-96
......
......@@ -30,6 +30,7 @@
#include <linux/config.h>
#include <asm/setup.h>
#include <asm/lowcore.h>
#include <asm/offsets.h>
#ifndef CONFIG_IPL
.org 0
......@@ -642,6 +643,8 @@ _stext: basr %r13,0 # get base
# Setup stack
#
larl %r15,init_thread_union
lg %r14,__TI_task(%r15) # cache current in lowcore
stg %r14,__LC_CURRENT
aghi %r15,16384 # init_task_union + 16384
stg %r15,__LC_KERNEL_STACK # set end of kernel stack
aghi %r15,-160
......
......@@ -97,7 +97,6 @@ void __devinit cpu_init (void)
*/
asm volatile ("stidp %0": "=m" (S390_lowcore.cpu_data.cpu_id));
S390_lowcore.cpu_data.cpu_addr = addr;
S390_lowcore.cpu_data.cpu_nr = nr;
/*
* Force FPU initialization:
......@@ -418,7 +417,7 @@ void __init setup_arch(char **cmdline_p)
* we are rounding upwards:
*/
start_pfn = (__pa(&_end) + PAGE_SIZE - 1) >> PAGE_SHIFT;
end_pfn = memory_end >> PAGE_SHIFT;
end_pfn = max_pfn = memory_end >> PAGE_SHIFT;
/*
* Initialize the boot-time allocator (with low memory only):
......@@ -497,21 +496,17 @@ void __init setup_arch(char **cmdline_p)
lc->io_new_psw.addr = PSW_ADDR_AMODE + (unsigned long) io_int_handler;
lc->ipl_device = S390_lowcore.ipl_device;
lc->jiffy_timer = -1LL;
#ifndef CONFIG_ARCH_S390X
lc->kernel_stack = ((__u32) &init_thread_union) + 8192;
lc->async_stack = (__u32)
__alloc_bootmem(2*PAGE_SIZE, 2*PAGE_SIZE, 0) + 8192;
set_prefix((__u32) lc);
#else /* CONFIG_ARCH_S390X */
lc->kernel_stack = ((__u64) &init_thread_union) + 16384;
lc->async_stack = (__u64)
__alloc_bootmem(4*PAGE_SIZE, 4*PAGE_SIZE, 0) + 16384;
lc->kernel_stack = ((unsigned long) &init_thread_union) + THREAD_SIZE;
lc->async_stack = (unsigned long)
__alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0) + ASYNC_SIZE;
lc->current_task = (unsigned long) init_thread_union.thread_info.task;
#ifdef CONFIG_ARCH_S390X
if (MACHINE_HAS_DIAG44)
lc->diag44_opcode = 0x83000044;
else
lc->diag44_opcode = 0x07000700;
set_prefix((__u32)(__u64) lc);
#endif /* CONFIG_ARCH_S390X */
set_prefix((u32)(unsigned long) lc);
cpu_init();
__cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr;
......
......@@ -138,11 +138,11 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
/* Wait for response */
while (atomic_read(&data.started) != cpus)
barrier();
cpu_relax();
if (wait)
while (atomic_read(&data.finished) != cpus)
barrier();
cpu_relax();
spin_unlock(&call_lock);
return 0;
......@@ -207,7 +207,8 @@ static void do_machine_restart(void * __unused)
cpu_clear(smp_processor_id(), cpu_restart_map);
if (smp_processor_id() == 0) {
/* Wait for all other cpus to enter do_machine_restart. */
while (!cpus_empty(cpu_restart_map));
while (!cpus_empty(cpu_restart_map))
cpu_relax();
/* Store status of other cpus. */
do_store_status();
/*
......@@ -524,8 +525,11 @@ int __cpu_up(unsigned int cpu)
__asm__ __volatile__("stam 0,15,0(%0)"
: : "a" (&cpu_lowcore->access_regs_save_area)
: "memory");
eieio();
signal_processor(cpu,sigp_restart);
cpu_lowcore->percpu_offset = __per_cpu_offset[cpu];
cpu_lowcore->current_task = (unsigned long) idle;
cpu_lowcore->cpu_data.cpu_nr = cpu;
eieio();
signal_processor(cpu,sigp_restart);
while (!cpu_online(cpu));
return 0;
......@@ -570,6 +574,7 @@ void __devinit smp_prepare_boot_cpu(void)
{
cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), cpu_possible_map);
S390_lowcore.percpu_offset = __per_cpu_offset[smp_processor_id()];
}
void smp_cpus_done(unsigned int max_cpus)
......
......@@ -34,7 +34,8 @@ void __delay(unsigned long loops)
}
/*
* Waits for 'usecs' microseconds using the tod clock
* Waits for 'usecs' microseconds using the tod clock, giving up the time slice
* of the virtual PU inbetween to avoid congestion.
*/
void __udelay(unsigned long usecs)
{
......@@ -44,7 +45,7 @@ void __udelay(unsigned long usecs)
return;
asm volatile ("STCK %0" : "=m" (start_cc));
do {
cpu_relax();
asm volatile ("STCK %0" : "=m" (end_cc));
} while (((end_cc - start_cc)/4096) < usecs);
}
......@@ -488,7 +488,7 @@ typedef struct {
int pfault_init(void)
{
pfault_refbk_t refbk =
{ 0x258, 0, 5, 2, __LC_KERNEL_STACK, 1ULL << 48, 1ULL << 48,
{ 0x258, 0, 5, 2, __LC_CURRENT, 1ULL << 48, 1ULL << 48,
__PF_RES_FIELD };
int rc;
......@@ -555,8 +555,7 @@ pfault_interrupt(struct pt_regs *regs, __u16 error_code)
/*
* Get the token (= address of kernel stack of affected task).
*/
tsk = (struct task_struct *)
(*((unsigned long *) __LC_PFAULT_INTPARM) - THREAD_SIZE);
tsk = *(struct task_struct **) __LC_PFAULT_INTPARM;
/*
* We got all needed information from the lowcore and can
......
......@@ -12,17 +12,11 @@
#define _S390_CURRENT_H
#ifdef __KERNEL__
#include <linux/thread_info.h>
#include <asm/lowcore.h>
struct task_struct;
static inline struct task_struct * get_current(void)
{
return current_thread_info()->task;
}
#define current get_current()
#define current ((struct task_struct *const)S390_lowcore.current_task)
#endif
......
......@@ -18,14 +18,17 @@
#include <linux/cache.h>
#include <asm/lowcore.h>
/* entry.S is sensitive to the offsets of these fields */
/* irq_cpustat_t is unused currently, but could be converted
* into a percpu variable instead of storing softirq_pending
* on the lowcore */
typedef struct {
unsigned int __softirq_pending;
unsigned int __syscall_count;
struct task_struct * __ksoftirqd_task; /* waitqueue is too large */
} ____cacheline_aligned irq_cpustat_t;
} irq_cpustat_t;
#include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */
#define softirq_pending(cpu) (lowcore_ptr[(cpu)]->softirq_pending)
#define local_softirq_pending() (S390_lowcore.softirq_pending)
#define __ARCH_IRQ_STAT
/*
* We put the hardirq and softirq counter into the preemption
......@@ -76,7 +79,12 @@ typedef struct {
#define hardirq_trylock() (!in_interrupt())
#define hardirq_endlock() do { } while (0)
#define irq_enter() (preempt_count() += HARDIRQ_OFFSET)
#define irq_enter() \
do { \
BUG_ON( hardirq_count() ); \
(preempt_count() += HARDIRQ_OFFSET); \
} while(0)
extern void do_call_softirq(void);
......@@ -93,16 +101,10 @@ extern void do_call_softirq(void);
#define irq_exit() \
do { \
preempt_count() -= IRQ_EXIT_OFFSET; \
if (!in_interrupt() && softirq_pending(smp_processor_id())) \
if (!in_interrupt() && local_softirq_pending()) \
/* Use the async. stack for softirq */ \
do_call_softirq(); \
preempt_enable_no_resched(); \
} while (0)
#ifndef CONFIG_SMP
# define synchronize_irq(irq) barrier()
#else
extern void synchronize_irq(unsigned int irq);
#endif /* CONFIG_SMP */
#endif /* __ASM_HARDIRQ_H */
......@@ -65,6 +65,7 @@
#define __LC_CPUADDR 0xC68
#define __LC_IPLDEV 0xC7C
#define __LC_JIFFY_TIMER 0xC80
#define __LC_CURRENT 0xC90
#else /* __s390x__ */
#define __LC_KERNEL_STACK 0xD40
#define __LC_ASYNC_STACK 0xD48
......@@ -72,6 +73,7 @@
#define __LC_CPUADDR 0xD98
#define __LC_IPLDEV 0xDB8
#define __LC_JIFFY_TIMER 0xDC0
#define __LC_CURRENT 0xDD8
#endif /* __s390x__ */
#define __LC_PANIC_MAGIC 0xE00
......@@ -169,7 +171,10 @@ struct _lowcore
/* SMP info area: defined by DJB */
__u64 jiffy_timer; /* 0xc80 */
__u32 ext_call_fast; /* 0xc88 */
__u8 pad11[0xe00-0xc8c]; /* 0xc8c */
__u32 percpu_offset; /* 0xc8c */
__u32 current_task; /* 0xc90 */
__u32 softirq_pending; /* 0xc94 */
__u8 pad11[0xe00-0xc98]; /* 0xc98 */
/* 0xe00 is used as indicator for dump tools */
/* whether the kernel died with panic() or not */
......@@ -244,7 +249,10 @@ struct _lowcore
/* SMP info area: defined by DJB */
__u64 jiffy_timer; /* 0xdc0 */
__u64 ext_call_fast; /* 0xdc8 */
__u8 pad12[0xe00-0xdd0]; /* 0xdd0 */
__u64 percpu_offset; /* 0xdd0 */
__u64 current_task; /* 0xdd8 */
__u64 softirq_pending; /* 0xde0 */
__u8 pad12[0xe00-0xde8]; /* 0xde8 */
/* 0xe00 is used as indicator for dump tools */
/* whether the kernel died with panic() or not */
......
......@@ -2,5 +2,13 @@
#define __ARCH_S390_PERCPU__
#include <asm-generic/percpu.h>
#include <asm/lowcore.h>
/*
* s390 uses the generic implementation for per cpu data, with the exception that
* the offset of the cpu local data area is cached in the cpu's lowcore memory
*/
#undef __get_cpu_var
#define __get_cpu_var(var) (*RELOC_HIDE(&per_cpu__##var, S390_lowcore.percpu_offset))
#endif /* __ARCH_S390_PERCPU__ */
......@@ -46,7 +46,7 @@ extern cpumask_t cpu_possible_map;
#define PROC_CHANGE_PENALTY 20 /* Schedule penalty */
#define smp_processor_id() (current_thread_info()->cpu)
#define smp_processor_id() (S390_lowcore.cpu_data.cpu_nr)
#define cpu_online(cpu) cpu_isset(cpu, cpu_online_map)
#define cpu_possible(cpu) cpu_isset(cpu, cpu_possible_map)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment