Commit ba9ccbcf authored by David Mosberger's avatar David Mosberger

ia64: Finish the fsyscall support (finally!). Now fsyscall stubs will

	run faster than break-based syscall stubs, even if there is
	no light-weight syscall handler.
	Adds a new boot command-line option "nolwsys" which can be used
	to turn off light-weight system call handlers.  Good for
	performance measurement and (potentially) for debugging.
parent ce2070ec
......@@ -1464,3 +1464,6 @@ sys_call_table:
data8 ia64_ni_syscall
data8 ia64_ni_syscall
data8 ia64_ni_syscall
data8 ia64_ni_syscall
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
This diff is collapsed.
......@@ -11,12 +11,10 @@
#include <asm/sigcontext.h>
#include <asm/system.h>
#include <asm/unistd.h>
#include <asm/page.h>
.section .text.gate, "ax"
.start_gate:
#if CONFIG_FSYS
#include <asm/errno.h>
......@@ -49,6 +47,7 @@ END(syscall_via_break)
* all other "scratch" registers: undefined
* all "preserved" registers: same as on entry
*/
GLOBAL_ENTRY(syscall_via_epc)
.prologue
.altrp b6
......@@ -65,19 +64,38 @@ GLOBAL_ENTRY(syscall_via_epc)
}
;;
rsm psr.be
movl r18=fsyscall_table
movl r14=fsyscall_table
mov r16=IA64_KR(CURRENT)
mov r19=255
;;
shladd r18=r17,3,r18
cmp.geu p6,p0=r19,r17 // (syscall > 0 && syscall <= 1024+255)?
mov r16=IA64_KR(CURRENT) // 12 cycle read latency
mov r19=NR_syscalls-1
;;
shladd r18=r17,3,r14
srlz.d // ensure little-endian byteorder is in effect
cmp.ne p8,p0=r0,r0 // p8 <- FALSE
/* Note: if r17 is a NaT, p6 will be set to zero. */
cmp.geu p6,p7=r19,r17 // (syscall > 0 && syscall < 1024+NR_syscalls)?
;;
(p6) ld8 r18=[r18]
mov r29=psr // read psr (12 cyc load latency)
add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
;;
(p6) mov b7=r18
(p6) tbit.z p8,p0=r18,0
(p8) br.dptk.many b7
mov r27=ar.rsc
mov r21=ar.fpsr
mov r26=ar.pfs
#if 1/*def CONFIG_ITANIUM*/
(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down
;;
(p6) mov b7=r14
(p6) br.sptk.many b7
#else
/* We can't do this until gate is a proper ELF DSO. */
(p6) brl.cond.sptk fsys_bubble_down
#endif
mov r10=-1
mov r8=ENOSYS
......@@ -85,24 +103,6 @@ GLOBAL_ENTRY(syscall_via_epc)
br.ret.sptk.many b6
END(syscall_via_epc)
#if 0
GLOBAL_ENTRY(fsys_fallback_syscall)
/*
* It would be better/fsyser to do the SAVE_MIN magic directly here, but for now
* we simply fall back on doing a system-call via break. Good enough
* to get started. (Note: we have to do this through the gate page again, since
* the br.ret will switch us back to user-level privilege.)
*
* XXX Move this back to fsys.S after changing it over to avoid break 0x100000.
*/
movl r2=(syscall_via_break - .start_gate) + GATE_ADDR
;;
MCKINLEY_E9_WORKAROUND
mov b7=r2
br.ret.sptk.many b7
END(fsys_fallback_syscall)
#endif
#endif /* CONFIG_FSYS */
# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET)
......
......@@ -637,7 +637,6 @@ END(daccess_bit)
/////////////////////////////////////////////////////////////////////////////////////////
// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
ENTRY(break_fault)
.global ia64_enter_syscall
/*
* The streamlined system call entry/exit paths only save/restore the initial part
* of pt_regs. This implies that the callers of system-calls must adhere to the
......@@ -654,7 +653,7 @@ ENTRY(break_fault)
* to prevent leaking bits from kernel to user level.
*/
DBG_FAULT(11)
mov r16=IA64_KR(CURRENT) // r16 = current (physical); 12 cycle read lat.
mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle read lat.
mov r17=cr.iim
mov r18=__IA64_BREAK_SYSCALL
mov r21=ar.fpsr
......@@ -673,7 +672,7 @@ ENTRY(break_fault)
;;
ld1 r17=[r16] // load current->thread.on_ustack flag
st1 [r16]=r0 // clear current->thread.on_ustack flag
adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT
add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT
;;
invala
......@@ -682,6 +681,7 @@ ENTRY(break_fault)
extr.u r8=r29,41,2 // extract ei field from cr.ipsr
;;
cmp.eq p6,p7=2,r8 // isr.ei==2?
mov r2=r1 // setup r2 for ia64_syscall_setup
;;
(p6) mov r8=0 // clear ei to 0
(p6) adds r28=16,r28 // switch cr.iip to next bundle cr.ipsr.ei wrapped
......@@ -691,19 +691,25 @@ ENTRY(break_fault)
dep r29=r8,r29,41,2 // insert new ei into cr.ipsr
;;
ia64_enter_syscall:
// switch from user to kernel RBS:
MINSTATE_START_SAVE_MIN_VIRT
br.call.sptk.many b7=setup_syscall_via_break
br.call.sptk.many b7=ia64_syscall_setup
;;
mov r3=255
MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1
ssm psr.ic | PSR_DEFAULT_BITS
;;
srlz.i // guarantee that interruption collection is on
;;
(p15) ssm psr.i // restore psr.i
;;
mov r3=NR_syscalls - 1
movl r16=sys_call_table
adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024
movl r2=ia64_ret_from_syscall
;;
shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024)
cmp.geu p0,p7=r3,r15 // (syscall > 0 && syscall <= 1024+255) ?
cmp.geu p0,p7=r3,r15 // (syscall > 0 && syscall < 1024 + NR_syscalls) ?
mov rp=r2 // set the real return addr
;;
(p7) add r20=(__NR_ni_syscall-1024)*8,r16 // force __NR_ni_syscall
......@@ -764,11 +770,44 @@ END(interrupt)
* fault ever gets "unreserved", simply moved the following code to a more
* suitable spot...
*
* setup_syscall_via_break() is a separate subroutine so that it can
* ia64_syscall_setup() is a separate subroutine so that it can
* allocate stacked registers so it can safely demine any
* potential NaT values from the input registers.
*
* On entry:
* - executing on bank 0 or bank 1 register set (doesn't matter)
* - r1: stack pointer
* - r2: current task pointer
* - r3: preserved
* - r11: original contents (saved ar.pfs to be saved)
* - r12: original contents (sp to be saved)
* - r13: original contents (tp to be saved)
* - r15: original contents (syscall # to be saved)
* - r18: saved bsp (after switching to kernel stack)
* - r19: saved b6
* - r20: saved r1 (gp)
* - r21: saved ar.fpsr
* - r22: kernel's register backing store base (krbs_base)
* - r23: saved ar.bspstore
* - r24: saved ar.rnat
* - r25: saved ar.unat
* - r26: saved ar.pfs
* - r27: saved ar.rsc
* - r28: saved cr.iip
* - r29: saved cr.ipsr
* - r31: saved pr
* - b0: original contents (to be saved)
* On exit:
* - executing on bank 1 registers
* - psr.ic enabled, interrupts restored
* - r1: kernel's gp
* - r3: preserved (same as on entry)
* - r12: points to kernel stack
* - r13: points to current task
* - p15: TRUE if interrupts need to be re-enabled
* - ar.fpsr: set to kernel settings
*/
ENTRY(setup_syscall_via_break)
GLOBAL_ENTRY(ia64_syscall_setup)
#if PT(B6) != 0
# error This code assumes that b6 is the first field in pt_regs.
#endif
......@@ -786,7 +825,7 @@ ENTRY(setup_syscall_via_break)
;;
st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip
mov r28=b0
mov r28=b0 // save b0 (2 cyc)
(p8) mov in0=-1
;;
......@@ -836,23 +875,19 @@ ENTRY(setup_syscall_via_break)
adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch)
(p14) mov in6=-1
mov r13=IA64_KR(CURRENT) // establish `current'
mov r13=r2 // establish `current'
movl r1=__gp // establish kernel global pointer
;;
(p8) mov in7=-1
tnat.nz p9,p0=r15
MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1
ssm psr.ic | PSR_DEFAULT_BITS
cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
movl r17=FPSR_DEFAULT
;;
srlz.i // guarantee that interruption collection is on
cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
(p9) mov r15=-1
(p15) ssm psr.i // restore psr.i
mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value
(p9) mov r15=-1
br.ret.sptk.many b7
END(setup_syscall_via_break)
END(ia64_syscall_setup)
.org ia64_ivt+0x3c00
/////////////////////////////////////////////////////////////////////////////////////////
......
......@@ -27,6 +27,7 @@
#include <asm/sal.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/tlb.h>
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
......@@ -569,6 +570,29 @@ count_reserved_pages (u64 start, u64 end, void *arg)
return 0;
}
#ifdef CONFIG_FSYS
/*
* Boot command-line option "nolwsys" can be used to disable the use of any light-weight
* system call handler. When this option is in effect, all fsyscalls will end up bubbling
* down into the kernel and calling the normal (heavy-weight) syscall handler. This is
* useful for performance testing, but conceivably could also come in handy for debugging
* purposes.
*/
static int nolwsys;
static int __init
nolwsys_setup (char *s)
{
nolwsys = 1;
return 1;
}
__setup("nolwsys", nolwsys_setup);
#endif /* CONFIG_FSYS */
void
mem_init (void)
{
......@@ -622,6 +646,25 @@ mem_init (void)
if (num_pgt_pages > (u64) pgt_cache_water[1])
pgt_cache_water[1] = num_pgt_pages;
#ifdef CONFIG_FSYS
{
int i;
/*
* For fsyscall entrpoints with no light-weight handler, use the ordinary
* (heavy-weight) handler, but mark it by setting bit 0, so the fsyscall entry
* code can tell them apart.
*/
for (i = 0; i < NR_syscalls; ++i) {
extern unsigned long fsyscall_table[NR_syscalls];
extern unsigned long sys_call_table[NR_syscalls];
if (!fsyscall_table[i] || nolwsys)
fsyscall_table[i] = sys_call_table[i] | 1;
}
}
#endif
/* install the gate page in the global page table: */
put_gate_page(virt_to_page(ia64_imva(__start_gate_section)), GATE_ADDR);
......
......@@ -247,6 +247,8 @@
#define __NR_sys_clock_getres 1255
#define __NR_sys_clock_nanosleep 1256
#define NR_syscalls 256 /* length of syscall table */
#if !defined(__ASSEMBLY__) && !defined(ASSEMBLER)
extern long __ia64_syscall (long a0, long a1, long a2, long a3, long a4, long nr);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment