powerpc: Emulate most Book I instructions in emulate_step()

This extends the emulate_step() function to handle a large proportion of the Book I instructions implemented on current 64-bit server processors. The aim is to handle all the load and store instructions used in the kernel, plus all of the instructions that appear between l[wd]arx and st[wd]cx., so this handles the Altivec/VMX lvx and stvx and the VSX lxv2dx and stxv2dx instructions (implemented in POWER7). The new code can emulate user mode instructions, and checks the effective address for a load or store if the saved state is for user mode. It doesn't handle little-endian mode at present. For floating-point, Altivec/VMX and VSX instructions, it checks that the saved MSR has the enable bit for the relevant facility set, and if so, assumes that the FP/VMX/VSX registers contain valid state, and does loads or stores directly to/from the FP/VMX/VSX registers, using assembly helpers in ldstfp.S. Instructions supported now include: * Loads and stores, including some but not all VMX and VSX instructions, and lmw/stmw * Atomic loads and stores (l[dw]arx, st[dw]cx.) * Arithmetic instructions (add, subtract, multiply, divide, etc.) * Compare instructions * Rotate and mask instructions * Shift instructions * Logical instructions (and, or, xor, etc.) * Condition register logical instructions * mtcrf, cntlz[wd], exts[bhw] * isync, sync, lwsync, ptesync, eieio * Cache operations (dcbf, dcbst, dcbt, dcbtst) The overflow-checking arithmetic instructions are not included, but they appear not to be ever used in C code. This uses decimal values for the minor opcodes in the switch statements because that is what appears in the Power ISA specification, thus it is easier to check that they are correct if they are in decimal. If this is used to single-step an instruction where a data breakpoint interrupt occurred, then there is the possibility that the instruction is a lwarx or ldarx. In that case we have to be careful not to lose the reservation until we get to the matching st[wd]cx., or we'll never make forward progress. One alternative is to try to arrange that we can return from interrupts and handle data breakpoint interrupts without losing the reservation, which means not using any spinlocks, mutexes, or atomic ops (including bitops). That seems rather fragile. The other alternative is to emulate the larx/stcx and all the instructions in between. This is why this commit adds support for a wide range of integer instructions. Signed-off-by: Paul Mackerras <paulus@samba.org>

powerpc: Emulate most Book I instructions in emulate_step()
This extends the emulate_step() function to handle a large proportion of the Book I instructions implemented on current 64-bit server processors. The aim is to handle all the load and store instructions used in the kernel, plus all of the instructions that appear between l[wd]arx and st[wd]cx., so this handles the Altivec/VMX lvx and stvx and the VSX lxv2dx and stxv2dx instructions (implemented in POWER7). The new code can emulate user mode instructions, and checks the effective address for a load or store if the saved state is for user mode. It doesn't handle little-endian mode at present. For floating-point, Altivec/VMX and VSX instructions, it checks that the saved MSR has the enable bit for the relevant facility set, and if so, assumes that the FP/VMX/VSX registers contain valid state, and does loads or stores directly to/from the FP/VMX/VSX registers, using assembly helpers in ldstfp.S. Instructions supported now include: * Loads and stores, including some but not all VMX and VSX instructions, and lmw/stmw * Atomic loads and stores (l[dw]arx, st[dw]cx.) * Arithmetic instructions (add, subtract, multiply, divide, etc.) * Compare instructions * Rotate and mask instructions * Shift instructions * Logical instructions (and, or, xor, etc.) * Condition register logical instructions * mtcrf, cntlz[wd], exts[bhw] * isync, sync, lwsync, ptesync, eieio * Cache operations (dcbf, dcbst, dcbt, dcbtst) The overflow-checking arithmetic instructions are not included, but they appear not to be ever used in C code. This uses decimal values for the minor opcodes in the switch statements because that is what appears in the Power ISA specification, thus it is easier to check that they are correct if they are in decimal. If this is used to single-step an instruction where a data breakpoint interrupt occurred, then there is the possibility that the instruction is a lwarx or ldarx. In that case we have to be careful not to lose the reservation until we get to the matching st[wd]cx., or we'll never make forward progress. One alternative is to try to arrange that we can return from interrupts and handle data breakpoint interrupts without losing the reservation, which means not using any spinlocks, mutexes, or atomic ops (including bitops). That seems rather fragile. The other alternative is to emulate the larx/stcx and all the instructions in between. This is why this commit adds support for a wide range of integer instructions. Signed-off-by: Paul Mackerras <paulus@samba.org>
0016a4cf · Paul Mackerras · 7e27d6e7 · 0016a4cf · 0016a4cf · 0016a4cf
Commit 0016a4cf authored Jun 15, 2010 by Paul Mackerras
5 changed files
--- a/arch/powerpc/include/asm/asm-compat.h
+++ b/arch/powerpc/include/asm/asm-compat.h
@@ -30,6 +30,7 @@
 #define PPC_STLCX	stringify_in_c(stdcx.)
 #define PPC_CNTLZL	stringify_in_c(cntlzd)
 #define PPC_LR_STKOFF	16
+#define PPC_MIN_STKFRM	112

 /* Move to CR, single-entry optimized version. Only available
 * on POWER4 and later.
@@ -55,6 +56,7 @@
 #define PPC_CNTLZL	stringify_in_c(cntlzw)
 #define PPC_MTOCRF	stringify_in_c(mtcrf)
 #define PPC_LR_STKOFF	4
+#define PPC_MIN_STKFRM	16

 #endif


--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -52,13 +52,17 @@
 #define PPC_INST_WAIT			0x7c00007c
 #define PPC_INST_TLBIVAX		0x7c000624
 #define PPC_INST_TLBSRX_DOT		0x7c0006a5
+#define PPC_INST_XXLOR			0xf0000510

 /* macros to insert fields into opcodes */
 #define __PPC_RA(a)	(((a) & 0x1f) << 16)
 #define __PPC_RB(b)	(((b) & 0x1f) << 11)
 #define __PPC_RS(s)	(((s) & 0x1f) << 21)
 #define __PPC_RT(s)	__PPC_RS(s)
+#define __PPC_XA(a)	((((a) & 0x1f) << 16) | (((a) & 0x20) >> 3))
+#define __PPC_XB(b)	((((b) & 0x1f) << 11) | (((b) & 0x20) >> 4))
 #define __PPC_XS(s)	((((s) & 0x1f) << 21) | (((s) & 0x20) >> 5))
+#define __PPC_XT(s)	__PPC_XS(s)
 #define __PPC_T_TLB(t)	(((t) & 0x3) << 21)
 #define __PPC_WC(w)	(((w) & 0x3) << 21)
 /*
@@ -106,9 +110,12 @@
 * the 128 bit load store instructions based on that.
 */
 #define VSX_XX1(s, a, b)	(__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
+#define VSX_XX3(t, a, b)	(__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
 #define STXVD2X(s, a, b)	stringify_in_c(.long PPC_INST_STXVD2X | \
 					       VSX_XX1((s), (a), (b)))
 #define LXVD2X(s, a, b)		stringify_in_c(.long PPC_INST_LXVD2X | \
 					       VSX_XX1((s), (a), (b)))
+#define XXLOR(t, a, b)		stringify_in_c(.long PPC_INST_XXLOR | \
+					       VSX_XX3((t), (a), (b)))

 #endif /* _ASM_POWERPC_PPC_OPCODE_H */
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -18,8 +18,8 @@ obj-$(CONFIG_HAS_IOMEM)	+= devres.o

 obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o \
 			   memcpy_64.o usercopy_64.o mem_64.o string.o
-obj-$(CONFIG_XMON)	+= sstep.o
-obj-$(CONFIG_KPROBES)	+= sstep.o
+obj-$(CONFIG_XMON)	+= sstep.o ldstfp.o
+obj-$(CONFIG_KPROBES)	+= sstep.o ldstfp.o

 ifeq ($(CONFIG_PPC64),y)
 obj-$(CONFIG_SMP)	+= locks.o

--- a/arch/powerpc/lib/ldstfp.S
+++ b/arch/powerpc/lib/ldstfp.S
+/*
+ * Floating-point, VMX/Altivec and VSX loads and stores
+ * for use in instruction emulation.
+ *
+ * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+#include <asm/reg.h>
+#include <asm/asm-offsets.h>
+#include <linux/errno.h>
+
+#define STKFRM	(PPC_MIN_STKFRM + 16)
+
+	.macro	extab	instr,handler
+	.section __ex_table,"a"
+	PPC_LONG \instr,\handler
+	.previous
+	.endm
+
+	.macro	inst32	op
+reg = 0
+	.rept	32
+20:	\op	reg,0,r4
+	b	3f
+	extab	20b,99f
+reg = reg + 1
+	.endr
+	.endm
+
+/* Get the contents of frN into fr0; N is in r3. */
+_GLOBAL(get_fpr)
+	mflr	r0
+	rlwinm	r3,r3,3,0xf8
+	bcl	20,31,1f
+	blr			/* fr0 is already in fr0 */
+	nop
+reg = 1
+	.rept	31
+	fmr	fr0,reg
+	blr
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+
+/* Put the contents of fr0 into frN; N is in r3. */
+_GLOBAL(put_fpr)
+	mflr	r0
+	rlwinm	r3,r3,3,0xf8
+	bcl	20,31,1f
+	blr			/* fr0 is already in fr0 */
+	nop
+reg = 1
+	.rept	31
+	fmr	reg,fr0
+	blr
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+
+/* Load FP reg N from float at *p.  N is in r3, p in r4. */
+_GLOBAL(do_lfs)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	ori	r7,r6,MSR_FP
+	cmpwi	cr7,r3,0
+	mtmsrd	r7
+	isync
+	beq	cr7,1f
+	stfd	fr0,STKFRM-16(r1)
+1:	li	r9,-EFAULT
+2:	lfs	fr0,0(r4)
+	li	r9,0
+3:	bl	put_fpr
+	beq	cr7,4f
+	lfd	fr0,STKFRM-16(r1)
+4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	mtmsrd	r6
+	isync
+	mr	r3,r9
+	addi	r1,r1,STKFRM
+	blr
+	extab	2b,3b
+
+/* Load FP reg N from double at *p.  N is in r3, p in r4. */
+_GLOBAL(do_lfd)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	ori	r7,r6,MSR_FP
+	cmpwi	cr7,r3,0
+	mtmsrd	r7
+	isync
+	beq	cr7,1f
+	stfd	fr0,STKFRM-16(r1)
+1:	li	r9,-EFAULT
+2:	lfd	fr0,0(r4)
+	li	r9,0
+3:	beq	cr7,4f
+	bl	put_fpr
+	lfd	fr0,STKFRM-16(r1)
+4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	mtmsrd	r6
+	isync
+	mr	r3,r9
+	addi	r1,r1,STKFRM
+	blr
+	extab	2b,3b
+
+/* Store FP reg N to float at *p.  N is in r3, p in r4. */
+_GLOBAL(do_stfs)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	ori	r7,r6,MSR_FP
+	cmpwi	cr7,r3,0
+	mtmsrd	r7
+	isync
+	beq	cr7,1f
+	stfd	fr0,STKFRM-16(r1)
+	bl	get_fpr
+1:	li	r9,-EFAULT
+2:	stfs	fr0,0(r4)
+	li	r9,0
+3:	beq	cr7,4f
+	lfd	fr0,STKFRM-16(r1)
+4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	mtmsrd	r6
+	isync
+	mr	r3,r9
+	addi	r1,r1,STKFRM
+	blr
+	extab	2b,3b
+
+/* Store FP reg N to double at *p.  N is in r3, p in r4. */
+_GLOBAL(do_stfd)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	ori	r7,r6,MSR_FP
+	cmpwi	cr7,r3,0
+	mtmsrd	r7
+	isync
+	beq	cr7,1f
+	stfd	fr0,STKFRM-16(r1)
+	bl	get_fpr
+1:	li	r9,-EFAULT
+2:	stfd	fr0,0(r4)
+	li	r9,0
+3:	beq	cr7,4f
+	lfd	fr0,STKFRM-16(r1)
+4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	mtmsrd	r6
+	isync
+	mr	r3,r9
+	addi	r1,r1,STKFRM
+	blr
+	extab	2b,3b
+
+#ifdef CONFIG_ALTIVEC
+/* Get the contents of vrN into vr0; N is in r3. */
+_GLOBAL(get_vr)
+	mflr	r0
+	rlwinm	r3,r3,3,0xf8
+	bcl	20,31,1f
+	blr			/* vr0 is already in vr0 */
+	nop
+reg = 1
+	.rept	31
+	vor	vr0,reg,reg	/* assembler doesn't know vmr? */
+	blr
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+
+/* Put the contents of vr0 into vrN; N is in r3. */
+_GLOBAL(put_vr)
+	mflr	r0
+	rlwinm	r3,r3,3,0xf8
+	bcl	20,31,1f
+	blr			/* vr0 is already in vr0 */
+	nop
+reg = 1
+	.rept	31
+	vor	reg,vr0,vr0
+	blr
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+
+/* Load vector reg N from *p.  N is in r3, p in r4. */
+_GLOBAL(do_lvx)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	oris	r7,r6,MSR_VEC@h
+	cmpwi	cr7,r3,0
+	li	r8,STKFRM-16
+	mtmsrd	r7
+	isync
+	beq	cr7,1f
+	stvx	vr0,r1,r8
+1:	li	r9,-EFAULT
+2:	lvx	vr0,0,r4
+	li	r9,0
+3:	beq	cr7,4f
+	bl	put_vr
+	lvx	vr0,r1,r8
+4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	mtmsrd	r6
+	isync
+	mr	r3,r9
+	addi	r1,r1,STKFRM
+	blr
+	extab	2b,3b
+
+/* Store vector reg N to *p.  N is in r3, p in r4. */
+_GLOBAL(do_stvx)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	oris	r7,r6,MSR_VEC@h
+	cmpwi	cr7,r3,0
+	li	r8,STKFRM-16
+	mtmsrd	r7
+	isync
+	beq	cr7,1f
+	stvx	vr0,r1,r8
+	bl	get_vr
+1:	li	r9,-EFAULT
+2:	stvx	vr0,0,r4
+	li	r9,0
+3:	beq	cr7,4f
+	lvx	vr0,r1,r8
+4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	mtmsrd	r6
+	isync
+	mr	r3,r9
+	addi	r1,r1,STKFRM
+	blr
+	extab	2b,3b
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+/* Get the contents of vsrN into vsr0; N is in r3. */
+_GLOBAL(get_vsr)
+	mflr	r0
+	rlwinm	r3,r3,3,0x1f8
+	bcl	20,31,1f
+	blr			/* vsr0 is already in vsr0 */
+	nop
+reg = 1
+	.rept	63
+	XXLOR(0,reg,reg)
+	blr
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+
+/* Put the contents of vsr0 into vsrN; N is in r3. */
+_GLOBAL(put_vsr)
+	mflr	r0
+	rlwinm	r3,r3,3,0x1f8
+	bcl	20,31,1f
+	blr			/* vr0 is already in vr0 */
+	nop
+reg = 1
+	.rept	63
+	XXLOR(reg,0,0)
+	blr
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+
+/* Load VSX reg N from vector doubleword *p.  N is in r3, p in r4. */
+_GLOBAL(do_lxvd2x)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	oris	r7,r6,MSR_VSX@h
+	cmpwi	cr7,r3,0
+	li	r8,STKFRM-16
+	mtmsrd	r7
+	isync
+	beq	cr7,1f
+	STXVD2X(0,r1,r8)
+1:	li	r9,-EFAULT
+2:	LXVD2X(0,0,r4)
+	li	r9,0
+3:	beq	cr7,4f
+	bl	put_vsr
+	LXVD2X(0,r1,r8)
+4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	mtmsrd	r6
+	isync
+	mr	r3,r9
+	addi	r1,r1,STKFRM
+	blr
+	extab	2b,3b
+
+/* Store VSX reg N to vector doubleword *p.  N is in r3, p in r4. */
+_GLOBAL(do_stxvd2x)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	oris	r7,r6,MSR_VSX@h
+	cmpwi	cr7,r3,0
+	li	r8,STKFRM-16
+	mtmsrd	r7
+	isync
+	beq	cr7,1f
+	STXVD2X(0,r1,r8)
+	bl	get_vsr
+1:	li	r9,-EFAULT
+2:	STXVD2X(0,0,r4)
+	li	r9,0
+3:	beq	cr7,4f
+	LXVD2X(0,r1,r8)
+4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	mtmsrd	r6
+	isync
+	mr	r3,r9
+	addi	r1,r1,STKFRM
+	blr
+	extab	2b,3b
+
+#endif /* CONFIG_VSX */
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c