Commit 2f9ac29e authored by Chris Metcalf's avatar Chris Metcalf

tile: fast-path unaligned memory access for tilegx

This change enables unaligned userspace memory access via a kernel
fast path on tilegx.  The kernel tracks user PC/instruction pairs
per-thread using a direct-mapped cache in userspace.  The cache
maps those PC/instruction pairs to JIT'ed instruction sequences that
load or store using byte-wide load store intructions and then
synthesize 2-, 4- or 8-byte load or store results.  Once an
instruction has been seen to generate an unaligned access once,
subsequent hits on that instruction typically require overhead
of only around 50 cycles if cache and TLB is hot.

We support the prctl() PR_GET_UNALIGN / PR_SET_UNALIGN sys call to
enable or disable unaligned fixups on a per-process basis.

To do this we pull some of the tilepro unaligned support out of the
single_step.c file; tilepro uses instruction disassembly for both
single-step and unaligned access support.  Since tilegx actually has
hardware singlestep support, though, it's cleaner to keep the tilegx
unaligned access code in a separate file.  While we're at it,
properly rename the tilepro-specific types, etc., to have tilepro
suffixes instead of generic tile suffixes.
Signed-off-by: default avatarChris Metcalf <cmetcalf@tilera.com>
parent f10da547
...@@ -247,6 +247,13 @@ unsigned long get_wchan(struct task_struct *p); ...@@ -247,6 +247,13 @@ unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(task) task_pc(task) #define KSTK_EIP(task) task_pc(task)
#define KSTK_ESP(task) task_sp(task) #define KSTK_ESP(task) task_sp(task)
/* Fine-grained unaligned JIT support */
#define GET_UNALIGN_CTL(tsk, adr) get_unalign_ctl((tsk), (adr))
#define SET_UNALIGN_CTL(tsk, val) set_unalign_ctl((tsk), (val))
extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr);
extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
/* Standard format for printing registers and other word-size data. */ /* Standard format for printing registers and other word-size data. */
#ifdef __tilegx__ #ifdef __tilegx__
# define REGFMT "0x%016lx" # define REGFMT "0x%016lx"
......
...@@ -79,8 +79,7 @@ extern void single_step_execve(void); ...@@ -79,8 +79,7 @@ extern void single_step_execve(void);
struct task_struct; struct task_struct;
extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs);
int error_code);
#ifdef __tilegx__ #ifdef __tilegx__
/* We need this since sigval_t has a user pointer in it, for GETSIGINFO etc. */ /* We need this since sigval_t has a user pointer in it, for GETSIGINFO etc. */
......
...@@ -28,7 +28,9 @@ extern char __w1data_begin[], __w1data_end[]; ...@@ -28,7 +28,9 @@ extern char __w1data_begin[], __w1data_end[];
/* Not exactly sections, but PC comparison points in the code. */ /* Not exactly sections, but PC comparison points in the code. */
extern char __rt_sigreturn[], __rt_sigreturn_end[]; extern char __rt_sigreturn[], __rt_sigreturn_end[];
#ifndef __tilegx__ #ifdef __tilegx__
extern char __start_unalign_asm_code[], __end_unalign_asm_code[];
#else
extern char sys_cmpxchg[], __sys_cmpxchg_end[]; extern char sys_cmpxchg[], __sys_cmpxchg_end[];
extern char __sys_cmpxchg_grab_lock[]; extern char __sys_cmpxchg_grab_lock[];
extern char __start_atomic_asm_code[], __end_atomic_asm_code[]; extern char __start_atomic_asm_code[], __end_atomic_asm_code[];
......
...@@ -39,6 +39,11 @@ struct thread_info { ...@@ -39,6 +39,11 @@ struct thread_info {
struct restart_block restart_block; struct restart_block restart_block;
struct single_step_state *step_state; /* single step state struct single_step_state *step_state; /* single step state
(if non-zero) */ (if non-zero) */
int align_ctl; /* controls unaligned access */
#ifdef __tilegx__
unsigned long unalign_jit_tmp[4]; /* temp r0..r3 storage */
void __user *unalign_jit_base; /* unalign fixup JIT base */
#endif
}; };
/* /*
...@@ -56,6 +61,7 @@ struct thread_info { ...@@ -56,6 +61,7 @@ struct thread_info {
.fn = do_no_restart_syscall, \ .fn = do_no_restart_syscall, \
}, \ }, \
.step_state = NULL, \ .step_state = NULL, \
.align_ctl = 0, \
} }
#define init_thread_info (init_thread_union.thread_info) #define init_thread_info (init_thread_union.thread_info)
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#ifndef _ASM_TILE_TRAPS_H #ifndef _ASM_TILE_TRAPS_H
#define _ASM_TILE_TRAPS_H #define _ASM_TILE_TRAPS_H
#ifndef __ASSEMBLY__
#include <arch/chip.h> #include <arch/chip.h>
/* mm/fault.c */ /* mm/fault.c */
...@@ -69,6 +70,16 @@ void gx_singlestep_handle(struct pt_regs *, int fault_num); ...@@ -69,6 +70,16 @@ void gx_singlestep_handle(struct pt_regs *, int fault_num);
/* kernel/intvec_64.S */ /* kernel/intvec_64.S */
void fill_ra_stack(void); void fill_ra_stack(void);
/* Handle unalign data fixup. */
extern void do_unaligned(struct pt_regs *regs, int vecnum);
#endif
#endif /* __ASSEMBLY__ */
#ifdef __tilegx__
/* 128 byte JIT per unalign fixup. */
#define UNALIGN_JIT_SHIFT 7
#endif #endif
#endif /* _ASM_TILE_TRAPS_H */ #endif /* _ASM_TILE_TRAPS_H */
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
extra-y := vmlinux.lds head_$(BITS).o extra-y := vmlinux.lds head_$(BITS).o
obj-y := backtrace.o entry.o irq.o messaging.o \ obj-y := backtrace.o entry.o irq.o messaging.o \
pci-dma.o proc.o process.o ptrace.o reboot.o \ pci-dma.o proc.o process.o ptrace.o reboot.o \
setup.o signal.o single_step.o stack.o sys.o sysfs.o time.o traps.o \ setup.o signal.o single_step.o stack.o sys.o \
sysfs.o time.o traps.o unaligned.o \
intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o
obj-$(CONFIG_HARDWALL) += hardwall.o obj-$(CONFIG_HARDWALL) += hardwall.o
......
...@@ -60,6 +60,12 @@ void foo(void) ...@@ -60,6 +60,12 @@ void foo(void)
offsetof(struct thread_info, homecache_cpu)); offsetof(struct thread_info, homecache_cpu));
DEFINE(THREAD_INFO_STEP_STATE_OFFSET, DEFINE(THREAD_INFO_STEP_STATE_OFFSET,
offsetof(struct thread_info, step_state)); offsetof(struct thread_info, step_state));
#ifdef __tilegx__
DEFINE(THREAD_INFO_UNALIGN_JIT_BASE_OFFSET,
offsetof(struct thread_info, unalign_jit_base));
DEFINE(THREAD_INFO_UNALIGN_JIT_TMP_OFFSET,
offsetof(struct thread_info, unalign_jit_tmp));
#endif
DEFINE(TASK_STRUCT_THREAD_KSP_OFFSET, DEFINE(TASK_STRUCT_THREAD_KSP_OFFSET,
offsetof(struct task_struct, thread.ksp)); offsetof(struct task_struct, thread.ksp));
......
...@@ -1420,7 +1420,6 @@ handle_ill: ...@@ -1420,7 +1420,6 @@ handle_ill:
{ {
lw r0, r0 /* indirect thru thread_info to get task_info*/ lw r0, r0 /* indirect thru thread_info to get task_info*/
addi r1, sp, C_ABI_SAVE_AREA_SIZE /* put ptregs pointer into r1 */ addi r1, sp, C_ABI_SAVE_AREA_SIZE /* put ptregs pointer into r1 */
move r2, zero /* load error code into r2 */
} }
jal send_sigtrap /* issue a SIGTRAP */ jal send_sigtrap /* issue a SIGTRAP */
......
...@@ -17,11 +17,13 @@ ...@@ -17,11 +17,13 @@
#include <linux/linkage.h> #include <linux/linkage.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/unistd.h> #include <linux/unistd.h>
#include <linux/init.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/thread_info.h> #include <asm/thread_info.h>
#include <asm/irqflags.h> #include <asm/irqflags.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
#include <asm/types.h> #include <asm/types.h>
#include <asm/traps.h>
#include <asm/signal.h> #include <asm/signal.h>
#include <hv/hypervisor.h> #include <hv/hypervisor.h>
#include <arch/abi.h> #include <arch/abi.h>
...@@ -98,6 +100,189 @@ ...@@ -98,6 +100,189 @@
} }
.endm .endm
/*
* Unalign data exception fast handling: In order to handle
* unaligned data access, a fast JIT version is generated and stored
* in a specific area in user space. We first need to do a quick poke
* to see if the JIT is available. We use certain bits in the fault
* PC (3 to 9 is used for 16KB page size) as index to address the JIT
* code area. The first 64bit word is the fault PC, and the 2nd one is
* the fault bundle itself. If these 2 words both match, then we
* directly "iret" to JIT code. If not, a slow path is invoked to
* generate new JIT code. Note: the current JIT code WILL be
* overwritten if it existed. So, ideally we can handle 128 unalign
* fixups via JIT. For lookup efficiency and to effectively support
* tight loops with multiple unaligned reference, a simple
* direct-mapped cache is used.
*
* SPR_EX_CONTEXT_K_0 is modified to return to JIT code.
* SPR_EX_CONTEXT_K_1 has ICS set.
* SPR_EX_CONTEXT_0_0 is setup to user program's next PC.
* SPR_EX_CONTEXT_0_1 = 0.
*/
.macro int_hand_unalign_fast vecnum, vecname
.org (\vecnum << 8)
intvec_\vecname:
/* Put r3 in SPR_SYSTEM_SAVE_K_1. */
mtspr SPR_SYSTEM_SAVE_K_1, r3
mfspr r3, SPR_EX_CONTEXT_K_1
/*
* Examine if exception comes from user without ICS set.
* If not, just go directly to the slow path.
*/
bnez r3, hand_unalign_slow_nonuser
mfspr r3, SPR_SYSTEM_SAVE_K_0
/* Get &thread_info->unalign_jit_tmp[0] in r3. */
mm r3, zero, LOG2_THREAD_SIZE, 63
#if THREAD_SIZE < 65536
addli r3, r3, -(PAGE_SIZE - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET)
#else
addli r3, r3, -(PAGE_SIZE/2)
addli r3, r3, -(PAGE_SIZE/2 - THREAD_INFO_UNALIGN_JIT_TMP_OFFSET)
#endif
/*
* Save r0, r1, r2 into thread_info array r3 points to
* from low to high memory in order.
*/
st_add r3, r0, 8
st_add r3, r1, 8
{
st_add r3, r2, 8
andi r2, sp, 7
}
/* Save stored r3 value so we can revert it on a page fault. */
mfspr r1, SPR_SYSTEM_SAVE_K_1
st r3, r1
{
/* Generate a SIGBUS if sp is not 8-byte aligned. */
bnez r2, hand_unalign_slow_badsp
}
/*
* Get the thread_info in r0; load r1 with pc. Set the low bit of sp
* as an indicator to the page fault code in case we fault.
*/
{
ori sp, sp, 1
mfspr r1, SPR_EX_CONTEXT_K_0
}
/* Add the jit_info offset in thread_info; extract r1 [3:9] into r2. */
{
addli r0, r3, THREAD_INFO_UNALIGN_JIT_BASE_OFFSET - \
(THREAD_INFO_UNALIGN_JIT_TMP_OFFSET + (3 * 8))
bfextu r2, r1, 3, (2 + PAGE_SHIFT - UNALIGN_JIT_SHIFT)
}
/* Load the jit_info; multiply r2 by 128. */
{
ld r0, r0
shli r2, r2, UNALIGN_JIT_SHIFT
}
/*
* If r0 is NULL, the JIT page is not mapped, so go to slow path;
* add offset r2 to r0 at the same time.
*/
{
beqz r0, hand_unalign_slow
add r2, r0, r2
}
/*
* We are loading from userspace (both the JIT info PC and
* instruction word, and the instruction word we executed)
* and since either could fault while holding the interrupt
* critical section, we must tag this region and check it in
* do_page_fault() to handle it properly.
*/
ENTRY(__start_unalign_asm_code)
/* Load first word of JIT in r0 and increment r2 by 8. */
ld_add r0, r2, 8
/*
* Compare the PC with the 1st word in JIT; load the fault bundle
* into r1.
*/
{
cmpeq r0, r0, r1
ld r1, r1
}
/* Go to slow path if PC doesn't match. */
beqz r0, hand_unalign_slow
/*
* Load the 2nd word of JIT, which is supposed to be the fault
* bundle for a cache hit. Increment r2; after this bundle r2 will
* point to the potential start of the JIT code we want to run.
*/
ld_add r0, r2, 8
/* No further accesses to userspace are done after this point. */
ENTRY(__end_unalign_asm_code)
/* Compare the real bundle with what is saved in the JIT area. */
{
cmpeq r0, r1, r0
mtspr SPR_EX_CONTEXT_0_1, zero
}
/* Go to slow path if the fault bundle does not match. */
beqz r0, hand_unalign_slow
/*
* A cache hit is found.
* r2 points to start of JIT code (3rd word).
* r0 is the fault pc.
* r1 is the fault bundle.
* Reset the low bit of sp.
*/
{
mfspr r0, SPR_EX_CONTEXT_K_0
andi sp, sp, ~1
}
/* Write r2 into EX_CONTEXT_K_0 and increment PC. */
{
mtspr SPR_EX_CONTEXT_K_0, r2
addi r0, r0, 8
}
/*
* Set ICS on kernel EX_CONTEXT_K_1 in order to "iret" to
* user with ICS set. This way, if the JIT fixup causes another
* unalign exception (which shouldn't be possible) the user
* process will be terminated with SIGBUS. Also, our fixup will
* run without interleaving with external interrupts.
* Each fixup is at most 14 bundles, so it won't hold ICS for long.
*/
{
movei r1, PL_ICS_EX1(USER_PL, 1)
mtspr SPR_EX_CONTEXT_0_0, r0
}
{
mtspr SPR_EX_CONTEXT_K_1, r1
addi r3, r3, -(3 * 8)
}
/* Restore r0..r3. */
ld_add r0, r3, 8
ld_add r1, r3, 8
ld_add r2, r3, 8
ld r3, r3
iret
ENDPROC(intvec_\vecname)
.endm
#ifdef __COLLECT_LINKER_FEEDBACK__ #ifdef __COLLECT_LINKER_FEEDBACK__
.pushsection .text.intvec_feedback,"ax" .pushsection .text.intvec_feedback,"ax"
...@@ -118,15 +303,21 @@ intvec_feedback: ...@@ -118,15 +303,21 @@ intvec_feedback:
* The "processing" argument specifies the code for processing * The "processing" argument specifies the code for processing
* the interrupt. Defaults to "handle_interrupt". * the interrupt. Defaults to "handle_interrupt".
*/ */
.macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt .macro __int_hand vecnum, vecname, c_routine,processing=handle_interrupt
.org (\vecnum << 8)
intvec_\vecname: intvec_\vecname:
/* Temporarily save a register so we have somewhere to work. */ /* Temporarily save a register so we have somewhere to work. */
mtspr SPR_SYSTEM_SAVE_K_1, r0 mtspr SPR_SYSTEM_SAVE_K_1, r0
mfspr r0, SPR_EX_CONTEXT_K_1 mfspr r0, SPR_EX_CONTEXT_K_1
andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ /*
* The unalign data fastpath code sets the low bit in sp to
* force us to reset it here on fault.
*/
{
blbs sp, 2f
andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
}
.ifc \vecnum, INT_DOUBLE_FAULT .ifc \vecnum, INT_DOUBLE_FAULT
/* /*
...@@ -176,7 +367,7 @@ intvec_\vecname: ...@@ -176,7 +367,7 @@ intvec_\vecname:
} }
.endif .endif
2:
/* /*
* SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and
* the current stack top in the higher bits. So we recover * the current stack top in the higher bits. So we recover
...@@ -1223,10 +1414,31 @@ STD_ENTRY(_sys_clone) ...@@ -1223,10 +1414,31 @@ STD_ENTRY(_sys_clone)
j sys_clone j sys_clone
STD_ENDPROC(_sys_clone) STD_ENDPROC(_sys_clone)
/* The single-step support may need to read all the registers. */ /*
* Recover r3, r2, r1 and r0 here saved by unalign fast vector.
* The vector area limit is 32 bundles, so we handle the reload here.
* r0, r1, r2 are in thread_info from low to high memory in order.
* r3 points to location the original r3 was saved.
* We put this code in the __HEAD section so it can be reached
* via a conditional branch from the fast path.
*/
__HEAD
hand_unalign_slow:
andi sp, sp, ~1
hand_unalign_slow_badsp:
addi r3, r3, -(3 * 8)
ld_add r0, r3, 8
ld_add r1, r3, 8
ld r2, r3
hand_unalign_slow_nonuser:
mfspr r3, SPR_SYSTEM_SAVE_K_1
__int_hand INT_UNALIGN_DATA, UNALIGN_DATA_SLOW, int_unalign
/* The unaligned data support needs to read all the registers. */
int_unalign: int_unalign:
push_extra_callee_saves r0 push_extra_callee_saves r0
j do_trap j do_unaligned
ENDPROC(hand_unalign_slow)
/* Fill the return address stack with nonzero entries. */ /* Fill the return address stack with nonzero entries. */
STD_ENTRY(fill_ra_stack) STD_ENTRY(fill_ra_stack)
...@@ -1240,6 +1452,11 @@ STD_ENTRY(fill_ra_stack) ...@@ -1240,6 +1452,11 @@ STD_ENTRY(fill_ra_stack)
4: jrp r0 4: jrp r0
STD_ENDPROC(fill_ra_stack) STD_ENDPROC(fill_ra_stack)
.macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt
.org (\vecnum << 8)
__int_hand \vecnum, \vecname, \c_routine, \processing
.endm
/* Include .intrpt1 array of interrupt vectors */ /* Include .intrpt1 array of interrupt vectors */
.section ".intrpt1", "ax" .section ".intrpt1", "ax"
...@@ -1272,7 +1489,7 @@ STD_ENTRY(fill_ra_stack) ...@@ -1272,7 +1489,7 @@ STD_ENTRY(fill_ra_stack)
int_hand INT_SWINT_1, SWINT_1, SYSCALL, handle_syscall int_hand INT_SWINT_1, SWINT_1, SYSCALL, handle_syscall
int_hand INT_SWINT_0, SWINT_0, do_trap int_hand INT_SWINT_0, SWINT_0, do_trap
int_hand INT_ILL_TRANS, ILL_TRANS, do_trap int_hand INT_ILL_TRANS, ILL_TRANS, do_trap
int_hand INT_UNALIGN_DATA, UNALIGN_DATA, int_unalign int_hand_unalign_fast INT_UNALIGN_DATA, UNALIGN_DATA
int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault
int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault
int_hand INT_IDN_FIREWALL, IDN_FIREWALL, do_hardwall_trap int_hand INT_IDN_FIREWALL, IDN_FIREWALL, do_hardwall_trap
......
...@@ -113,7 +113,6 @@ arch_initcall(proc_tile_init); ...@@ -113,7 +113,6 @@ arch_initcall(proc_tile_init);
* Support /proc/sys/tile directory * Support /proc/sys/tile directory
*/ */
#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */
static ctl_table unaligned_subtable[] = { static ctl_table unaligned_subtable[] = {
{ {
.procname = "enabled", .procname = "enabled",
...@@ -160,4 +159,3 @@ static int __init proc_sys_tile_init(void) ...@@ -160,4 +159,3 @@ static int __init proc_sys_tile_init(void)
} }
arch_initcall(proc_sys_tile_init); arch_initcall(proc_sys_tile_init);
#endif
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <asm/syscalls.h> #include <asm/syscalls.h>
#include <asm/traps.h> #include <asm/traps.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <asm/uaccess.h>
#ifdef CONFIG_HARDWALL #ifdef CONFIG_HARDWALL
#include <asm/hardwall.h> #include <asm/hardwall.h>
#endif #endif
...@@ -147,6 +148,14 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, ...@@ -147,6 +148,14 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
*/ */
task_thread_info(p)->step_state = NULL; task_thread_info(p)->step_state = NULL;
#ifdef __tilegx__
/*
* Do not clone unalign jit fixup from the parent; each thread
* must allocate its own on demand.
*/
task_thread_info(p)->unalign_jit_base = NULL;
#endif
/* /*
* Copy the registers onto the kernel stack so the * Copy the registers onto the kernel stack so the
* return-from-interrupt code will reload it into registers. * return-from-interrupt code will reload it into registers.
...@@ -205,6 +214,18 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, ...@@ -205,6 +214,18 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
return 0; return 0;
} }
int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
{
task_thread_info(tsk)->align_ctl = val;
return 0;
}
int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
{
return put_user(task_thread_info(tsk)->align_ctl,
(unsigned int __user *)adr);
}
/* /*
* Return "current" if it looks plausible, or else a pointer to a dummy. * Return "current" if it looks plausible, or else a pointer to a dummy.
* This can be helpful if we are just trying to emit a clean panic. * This can be helpful if we are just trying to emit a clean panic.
......
...@@ -272,7 +272,7 @@ void do_syscall_trace_exit(struct pt_regs *regs) ...@@ -272,7 +272,7 @@ void do_syscall_trace_exit(struct pt_regs *regs)
trace_sys_exit(regs, regs->regs[0]); trace_sys_exit(regs, regs->regs[0]);
} }
void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs)
{ {
struct siginfo info; struct siginfo info;
...@@ -288,5 +288,5 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) ...@@ -288,5 +288,5 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
/* Handle synthetic interrupt delivered only by the simulator. */ /* Handle synthetic interrupt delivered only by the simulator. */
void __kprobes do_breakpoint(struct pt_regs* regs, int fault_num) void __kprobes do_breakpoint(struct pt_regs* regs, int fault_num)
{ {
send_sigtrap(current, regs, fault_num); send_sigtrap(current, regs);
} }
...@@ -12,41 +12,30 @@ ...@@ -12,41 +12,30 @@
* more details. * more details.
* *
* A code-rewriter that enables instruction single-stepping. * A code-rewriter that enables instruction single-stepping.
* Derived from iLib's single-stepping code.
*/ */
#ifndef __tilegx__ /* Hardware support for single step unavailable. */ #include <linux/smp.h>
#include <linux/ptrace.h>
/* These functions are only used on the TILE platform */
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/thread_info.h> #include <linux/thread_info.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <linux/mman.h> #include <linux/mman.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/prctl.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/traps.h>
#include <asm/uaccess.h>
#include <asm/unaligned.h> #include <asm/unaligned.h>
#include <arch/abi.h> #include <arch/abi.h>
#include <arch/spr_def.h>
#include <arch/opcode.h> #include <arch/opcode.h>
#define signExtend17(val) sign_extend((val), 17)
#define TILE_X1_MASK (0xffffffffULL << 31)
int unaligned_printk;
static int __init setup_unaligned_printk(char *str) #ifndef __tilegx__ /* Hardware support for single step unavailable. */
{
long val;
if (strict_strtol(str, 0, &val) != 0)
return 0;
unaligned_printk = val;
pr_info("Printk for each unaligned data accesses is %s\n",
unaligned_printk ? "enabled" : "disabled");
return 1;
}
__setup("unaligned_printk=", setup_unaligned_printk);
unsigned int unaligned_fixup_count; #define signExtend17(val) sign_extend((val), 17)
#define TILE_X1_MASK (0xffffffffULL << 31)
enum mem_op { enum mem_op {
MEMOP_NONE, MEMOP_NONE,
...@@ -56,12 +45,13 @@ enum mem_op { ...@@ -56,12 +45,13 @@ enum mem_op {
MEMOP_STORE_POSTINCR MEMOP_STORE_POSTINCR
}; };
static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset) static inline tilepro_bundle_bits set_BrOff_X1(tilepro_bundle_bits n,
s32 offset)
{ {
tile_bundle_bits result; tilepro_bundle_bits result;
/* mask out the old offset */ /* mask out the old offset */
tile_bundle_bits mask = create_BrOff_X1(-1); tilepro_bundle_bits mask = create_BrOff_X1(-1);
result = n & (~mask); result = n & (~mask);
/* or in the new offset */ /* or in the new offset */
...@@ -70,10 +60,11 @@ static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset) ...@@ -70,10 +60,11 @@ static inline tile_bundle_bits set_BrOff_X1(tile_bundle_bits n, s32 offset)
return result; return result;
} }
static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src) static inline tilepro_bundle_bits move_X1(tilepro_bundle_bits n, int dest,
int src)
{ {
tile_bundle_bits result; tilepro_bundle_bits result;
tile_bundle_bits op; tilepro_bundle_bits op;
result = n & (~TILE_X1_MASK); result = n & (~TILE_X1_MASK);
...@@ -87,13 +78,13 @@ static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src) ...@@ -87,13 +78,13 @@ static inline tile_bundle_bits move_X1(tile_bundle_bits n, int dest, int src)
return result; return result;
} }
static inline tile_bundle_bits nop_X1(tile_bundle_bits n) static inline tilepro_bundle_bits nop_X1(tilepro_bundle_bits n)
{ {
return move_X1(n, TREG_ZERO, TREG_ZERO); return move_X1(n, TREG_ZERO, TREG_ZERO);
} }
static inline tile_bundle_bits addi_X1( static inline tilepro_bundle_bits addi_X1(
tile_bundle_bits n, int dest, int src, int imm) tilepro_bundle_bits n, int dest, int src, int imm)
{ {
n &= ~TILE_X1_MASK; n &= ~TILE_X1_MASK;
...@@ -107,15 +98,26 @@ static inline tile_bundle_bits addi_X1( ...@@ -107,15 +98,26 @@ static inline tile_bundle_bits addi_X1(
return n; return n;
} }
static tile_bundle_bits rewrite_load_store_unaligned( static tilepro_bundle_bits rewrite_load_store_unaligned(
struct single_step_state *state, struct single_step_state *state,
tile_bundle_bits bundle, tilepro_bundle_bits bundle,
struct pt_regs *regs, struct pt_regs *regs,
enum mem_op mem_op, enum mem_op mem_op,
int size, int sign_ext) int size, int sign_ext)
{ {
unsigned char __user *addr; unsigned char __user *addr;
int val_reg, addr_reg, err, val; int val_reg, addr_reg, err, val;
int align_ctl;
align_ctl = unaligned_fixup;
switch (task_thread_info(current)->align_ctl) {
case PR_UNALIGN_NOPRINT:
align_ctl = 1;
break;
case PR_UNALIGN_SIGBUS:
align_ctl = 0;
break;
}
/* Get address and value registers */ /* Get address and value registers */
if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) { if (bundle & TILEPRO_BUNDLE_Y_ENCODING_MASK) {
...@@ -160,7 +162,7 @@ static tile_bundle_bits rewrite_load_store_unaligned( ...@@ -160,7 +162,7 @@ static tile_bundle_bits rewrite_load_store_unaligned(
* tilepro hardware would be doing, if it could provide us with the * tilepro hardware would be doing, if it could provide us with the
* actual bad address in an SPR, which it doesn't. * actual bad address in an SPR, which it doesn't.
*/ */
if (unaligned_fixup == 0) { if (align_ctl == 0) {
siginfo_t info = { siginfo_t info = {
.si_signo = SIGBUS, .si_signo = SIGBUS,
.si_code = BUS_ADRALN, .si_code = BUS_ADRALN,
...@@ -209,14 +211,14 @@ static tile_bundle_bits rewrite_load_store_unaligned( ...@@ -209,14 +211,14 @@ static tile_bundle_bits rewrite_load_store_unaligned(
if (err) { if (err) {
siginfo_t info = { siginfo_t info = {
.si_signo = SIGSEGV, .si_signo = SIGBUS,
.si_code = SEGV_MAPERR, .si_code = BUS_ADRALN,
.si_addr = addr .si_addr = addr
}; };
trace_unhandled_signal("segfault", regs, trace_unhandled_signal("bad address for unaligned fixup", regs,
(unsigned long)addr, SIGSEGV); (unsigned long)addr, SIGBUS);
force_sig_info(info.si_signo, &info, current); force_sig_info(info.si_signo, &info, current);
return (tile_bundle_bits) 0; return (tilepro_bundle_bits) 0;
} }
if (unaligned_printk || unaligned_fixup_count == 0) { if (unaligned_printk || unaligned_fixup_count == 0) {
...@@ -285,7 +287,7 @@ void single_step_execve(void) ...@@ -285,7 +287,7 @@ void single_step_execve(void)
ti->step_state = NULL; ti->step_state = NULL;
} }
/** /*
* single_step_once() - entry point when single stepping has been triggered. * single_step_once() - entry point when single stepping has been triggered.
* @regs: The machine register state * @regs: The machine register state
* *
...@@ -304,20 +306,31 @@ void single_step_execve(void) ...@@ -304,20 +306,31 @@ void single_step_execve(void)
*/ */
void single_step_once(struct pt_regs *regs) void single_step_once(struct pt_regs *regs)
{ {
extern tile_bundle_bits __single_step_ill_insn; extern tilepro_bundle_bits __single_step_ill_insn;
extern tile_bundle_bits __single_step_j_insn; extern tilepro_bundle_bits __single_step_j_insn;
extern tile_bundle_bits __single_step_addli_insn; extern tilepro_bundle_bits __single_step_addli_insn;
extern tile_bundle_bits __single_step_auli_insn; extern tilepro_bundle_bits __single_step_auli_insn;
struct thread_info *info = (void *)current_thread_info(); struct thread_info *info = (void *)current_thread_info();
struct single_step_state *state = info->step_state; struct single_step_state *state = info->step_state;
int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP); int is_single_step = test_ti_thread_flag(info, TIF_SINGLESTEP);
tile_bundle_bits __user *buffer, *pc; tilepro_bundle_bits __user *buffer, *pc;
tile_bundle_bits bundle; tilepro_bundle_bits bundle;
int temp_reg; int temp_reg;
int target_reg = TREG_LR; int target_reg = TREG_LR;
int err; int err;
enum mem_op mem_op = MEMOP_NONE; enum mem_op mem_op = MEMOP_NONE;
int size = 0, sign_ext = 0; /* happy compiler */ int size = 0, sign_ext = 0; /* happy compiler */
int align_ctl;
align_ctl = unaligned_fixup;
switch (task_thread_info(current)->align_ctl) {
case PR_UNALIGN_NOPRINT:
align_ctl = 1;
break;
case PR_UNALIGN_SIGBUS:
align_ctl = 0;
break;
}
asm( asm(
" .pushsection .rodata.single_step\n" " .pushsection .rodata.single_step\n"
...@@ -390,7 +403,7 @@ void single_step_once(struct pt_regs *regs) ...@@ -390,7 +403,7 @@ void single_step_once(struct pt_regs *regs)
if (regs->faultnum == INT_SWINT_1) if (regs->faultnum == INT_SWINT_1)
regs->pc -= 8; regs->pc -= 8;
pc = (tile_bundle_bits __user *)(regs->pc); pc = (tilepro_bundle_bits __user *)(regs->pc);
if (get_user(bundle, pc) != 0) { if (get_user(bundle, pc) != 0) {
pr_err("Couldn't read instruction at %p trying to step\n", pc); pr_err("Couldn't read instruction at %p trying to step\n", pc);
return; return;
...@@ -627,9 +640,9 @@ void single_step_once(struct pt_regs *regs) ...@@ -627,9 +640,9 @@ void single_step_once(struct pt_regs *regs)
/* /*
* Check if we need to rewrite an unaligned load/store. * Check if we need to rewrite an unaligned load/store.
* Returning zero is a special value meaning we need to SIGSEGV. * Returning zero is a special value meaning we generated a signal.
*/ */
if (mem_op != MEMOP_NONE && unaligned_fixup >= 0) { if (mem_op != MEMOP_NONE && align_ctl >= 0) {
bundle = rewrite_load_store_unaligned(state, bundle, regs, bundle = rewrite_load_store_unaligned(state, bundle, regs,
mem_op, size, sign_ext); mem_op, size, sign_ext);
if (bundle == 0) if (bundle == 0)
...@@ -668,9 +681,9 @@ void single_step_once(struct pt_regs *regs) ...@@ -668,9 +681,9 @@ void single_step_once(struct pt_regs *regs)
} }
/* End with a jump back to the next instruction */ /* End with a jump back to the next instruction */
delta = ((regs->pc + TILE_BUNDLE_SIZE_IN_BYTES) - delta = ((regs->pc + TILEPRO_BUNDLE_SIZE_IN_BYTES) -
(unsigned long)buffer) >> (unsigned long)buffer) >>
TILE_LOG2_BUNDLE_ALIGNMENT_IN_BYTES; TILEPRO_LOG2_BUNDLE_ALIGNMENT_IN_BYTES;
bundle = __single_step_j_insn; bundle = __single_step_j_insn;
bundle |= create_JOffLong_X1(delta); bundle |= create_JOffLong_X1(delta);
err |= __put_user(bundle, buffer++); err |= __put_user(bundle, buffer++);
...@@ -698,9 +711,6 @@ void single_step_once(struct pt_regs *regs) ...@@ -698,9 +711,6 @@ void single_step_once(struct pt_regs *regs)
} }
#else #else
#include <linux/smp.h>
#include <linux/ptrace.h>
#include <arch/spr_def.h>
static DEFINE_PER_CPU(unsigned long, ss_saved_pc); static DEFINE_PER_CPU(unsigned long, ss_saved_pc);
...@@ -743,10 +753,10 @@ void gx_singlestep_handle(struct pt_regs *regs, int fault_num) ...@@ -743,10 +753,10 @@ void gx_singlestep_handle(struct pt_regs *regs, int fault_num)
} else if ((*ss_pc != regs->pc) || } else if ((*ss_pc != regs->pc) ||
(!(control & SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK))) { (!(control & SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK))) {
ptrace_notify(SIGTRAP);
control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK; control |= SPR_SINGLE_STEP_CONTROL_1__CANCELED_MASK;
control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK; control |= SPR_SINGLE_STEP_CONTROL_1__INHIBIT_MASK;
__insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control); __insn_mtspr(SPR_SINGLE_STEP_CONTROL_K, control);
send_sigtrap(current, regs);
} }
} }
......
This diff is collapsed.
...@@ -722,8 +722,49 @@ void do_page_fault(struct pt_regs *regs, int fault_num, ...@@ -722,8 +722,49 @@ void do_page_fault(struct pt_regs *regs, int fault_num,
{ {
int is_page_fault; int is_page_fault;
#ifdef __tilegx__
/*
* We don't need early do_page_fault_ics() support, since unlike
* Pro we don't need to worry about unlocking the atomic locks.
* There is only one current case in GX where we touch any memory
* under ICS other than our own kernel stack, and we handle that
* here. (If we crash due to trying to touch our own stack,
* we're in too much trouble for C code to help out anyway.)
*/
if (write & ~1) {
unsigned long pc = write & ~1;
if (pc >= (unsigned long) __start_unalign_asm_code &&
pc < (unsigned long) __end_unalign_asm_code) {
struct thread_info *ti = current_thread_info();
/*
* Our EX_CONTEXT is still what it was from the
* initial unalign exception, but now we've faulted
* on the JIT page. We would like to complete the
* page fault however is appropriate, and then retry
* the instruction that caused the unalign exception.
* Our state has been "corrupted" by setting the low
* bit in "sp", and stashing r0..r3 in the
* thread_info area, so we revert all of that, then
* continue as if this were a normal page fault.
*/
regs->sp &= ~1UL;
regs->regs[0] = ti->unalign_jit_tmp[0];
regs->regs[1] = ti->unalign_jit_tmp[1];
regs->regs[2] = ti->unalign_jit_tmp[2];
regs->regs[3] = ti->unalign_jit_tmp[3];
write &= 1;
} else {
pr_alert("%s/%d: ICS set at page fault at %#lx: %#lx\n",
current->comm, current->pid, pc, address);
show_regs(regs);
do_group_exit(SIGKILL);
return;
}
}
#else
/* This case should have been handled by do_page_fault_ics(). */ /* This case should have been handled by do_page_fault_ics(). */
BUG_ON(write & ~1); BUG_ON(write & ~1);
#endif
#if CHIP_HAS_TILE_DMA() #if CHIP_HAS_TILE_DMA()
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment