Commit 3b6c1747 authored by Peter Zijlstra's avatar Peter Zijlstra

x86/retpoline: Add SKL retthunk retpolines

Ensure that retpolines do the proper call accounting so that the return
accounting works correctly.

Specifically; retpolines are used to replace both 'jmp *%reg' and
'call *%reg', however these two cases do not have the same accounting
requirements. Therefore split things up and provide two different
retpoline arrays for SKL.

The 'jmp *%reg' case needs no accounting, the
__x86_indirect_jump_thunk_array[] covers this. The retpoline is
changed to not use the return thunk; it's a simple call;ret construct.

[ strictly speaking it should do:
	andq $(~0x1f), PER_CPU_VAR(__x86_call_depth)
  but we can argue this can be covered by the fuzz we already have
  in the accounting depth (12) vs the RSB depth (16) ]

The 'call *%reg' case does need accounting, the
__x86_indirect_call_thunk_array[] covers this. Again, this retpoline
avoids the use of the return-thunk, in this case to avoid double
accounting.
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarThomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111147.996634749@infradead.org
parent 5d821386
...@@ -301,6 +301,8 @@ ...@@ -301,6 +301,8 @@
typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
extern retpoline_thunk_t __x86_indirect_thunk_array[]; extern retpoline_thunk_t __x86_indirect_thunk_array[];
extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
extern void __x86_return_thunk(void); extern void __x86_return_thunk(void);
extern void zen_untrain_ret(void); extern void zen_untrain_ret(void);
...@@ -330,6 +332,16 @@ static inline void x86_set_skl_return_thunk(void) {} ...@@ -330,6 +332,16 @@ static inline void x86_set_skl_return_thunk(void) {}
#include <asm/GEN-for-each-reg.h> #include <asm/GEN-for-each-reg.h>
#undef GEN #undef GEN
#define GEN(reg) \
extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg;
#include <asm/GEN-for-each-reg.h>
#undef GEN
#define GEN(reg) \
extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg;
#include <asm/GEN-for-each-reg.h>
#undef GEN
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* /*
......
...@@ -377,6 +377,56 @@ static int emit_indirect(int op, int reg, u8 *bytes) ...@@ -377,6 +377,56 @@ static int emit_indirect(int op, int reg, u8 *bytes)
return i; return i;
} }
static inline bool is_jcc32(struct insn *insn)
{
/* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
return insn->opcode.bytes[0] == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80;
}
static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes)
{
u8 op = insn->opcode.bytes[0];
int i = 0;
/*
* Clang does 'weird' Jcc __x86_indirect_thunk_r11 conditional
* tail-calls. Deal with them.
*/
if (is_jcc32(insn)) {
bytes[i++] = op;
op = insn->opcode.bytes[1];
goto clang_jcc;
}
if (insn->length == 6)
bytes[i++] = 0x2e; /* CS-prefix */
switch (op) {
case CALL_INSN_OPCODE:
__text_gen_insn(bytes+i, op, addr+i,
__x86_indirect_call_thunk_array[reg],
CALL_INSN_SIZE);
i += CALL_INSN_SIZE;
break;
case JMP32_INSN_OPCODE:
clang_jcc:
__text_gen_insn(bytes+i, op, addr+i,
__x86_indirect_jump_thunk_array[reg],
JMP32_INSN_SIZE);
i += JMP32_INSN_SIZE;
break;
default:
WARN("%pS %px %*ph\n", addr, addr, 6, addr);
return -1;
}
WARN_ON_ONCE(i != insn->length);
return i;
}
/* /*
* Rewrite the compiler generated retpoline thunk calls. * Rewrite the compiler generated retpoline thunk calls.
* *
...@@ -409,8 +459,12 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) ...@@ -409,8 +459,12 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
BUG_ON(reg == 4); BUG_ON(reg == 4);
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
!cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
return emit_call_track_retpoline(addr, insn, reg, bytes);
return -1; return -1;
}
op = insn->opcode.bytes[0]; op = insn->opcode.bytes[0];
...@@ -427,8 +481,7 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) ...@@ -427,8 +481,7 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
* [ NOP ] * [ NOP ]
* 1: * 1:
*/ */
/* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */ if (is_jcc32(insn)) {
if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) {
cc = insn->opcode.bytes[1] & 0xf; cc = insn->opcode.bytes[1] & 0xf;
cc ^= 1; /* invert condition */ cc ^= 1; /* invert condition */
......
...@@ -14,17 +14,18 @@ ...@@ -14,17 +14,18 @@
.section .text.__x86.indirect_thunk .section .text.__x86.indirect_thunk
.macro RETPOLINE reg
.macro POLINE reg
ANNOTATE_INTRA_FUNCTION_CALL ANNOTATE_INTRA_FUNCTION_CALL
call .Ldo_rop_\@ call .Ldo_rop_\@
.Lspec_trap_\@: int3
UNWIND_HINT_EMPTY
pause
lfence
jmp .Lspec_trap_\@
.Ldo_rop_\@: .Ldo_rop_\@:
mov %\reg, (%_ASM_SP) mov %\reg, (%_ASM_SP)
UNWIND_HINT_FUNC UNWIND_HINT_FUNC
.endm
.macro RETPOLINE reg
POLINE \reg
RET RET
.endm .endm
...@@ -54,7 +55,6 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) ...@@ -54,7 +55,6 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
*/ */
#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
.align RETPOLINE_THUNK_SIZE .align RETPOLINE_THUNK_SIZE
SYM_CODE_START(__x86_indirect_thunk_array) SYM_CODE_START(__x86_indirect_thunk_array)
...@@ -66,10 +66,65 @@ SYM_CODE_START(__x86_indirect_thunk_array) ...@@ -66,10 +66,65 @@ SYM_CODE_START(__x86_indirect_thunk_array)
.align RETPOLINE_THUNK_SIZE .align RETPOLINE_THUNK_SIZE
SYM_CODE_END(__x86_indirect_thunk_array) SYM_CODE_END(__x86_indirect_thunk_array)
#define GEN(reg) EXPORT_THUNK(reg) #define GEN(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN
#ifdef CONFIG_CALL_DEPTH_TRACKING
.macro CALL_THUNK reg
.align RETPOLINE_THUNK_SIZE
SYM_INNER_LABEL(__x86_indirect_call_thunk_\reg, SYM_L_GLOBAL)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR
CALL_DEPTH_ACCOUNT
POLINE \reg
ANNOTATE_UNRET_SAFE
ret
int3
.endm
.align RETPOLINE_THUNK_SIZE
SYM_CODE_START(__x86_indirect_call_thunk_array)
#define GEN(reg) CALL_THUNK reg
#include <asm/GEN-for-each-reg.h> #include <asm/GEN-for-each-reg.h>
#undef GEN #undef GEN
.align RETPOLINE_THUNK_SIZE
SYM_CODE_END(__x86_indirect_call_thunk_array)
#define GEN(reg) __EXPORT_THUNK(__x86_indirect_call_thunk_ ## reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN
.macro JUMP_THUNK reg
.align RETPOLINE_THUNK_SIZE
SYM_INNER_LABEL(__x86_indirect_jump_thunk_\reg, SYM_L_GLOBAL)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR
POLINE \reg
ANNOTATE_UNRET_SAFE
ret
int3
.endm
.align RETPOLINE_THUNK_SIZE
SYM_CODE_START(__x86_indirect_jump_thunk_array)
#define GEN(reg) JUMP_THUNK reg
#include <asm/GEN-for-each-reg.h>
#undef GEN
.align RETPOLINE_THUNK_SIZE
SYM_CODE_END(__x86_indirect_jump_thunk_array)
#define GEN(reg) __EXPORT_THUNK(__x86_indirect_jump_thunk_ ## reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN
#endif
/* /*
* This function name is magical and is used by -mfunction-return=thunk-extern * This function name is magical and is used by -mfunction-return=thunk-extern
* for the compiler to generate JMPs to it. * for the compiler to generate JMPs to it.
......
...@@ -417,7 +417,10 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) ...@@ -417,7 +417,10 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
EMIT2(0xFF, 0xE0 + reg); EMIT2(0xFF, 0xE0 + reg);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
OPTIMIZER_HIDE_VAR(reg); OPTIMIZER_HIDE_VAR(reg);
emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg], ip);
else
emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
} else { } else {
EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */ EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */
if (IS_ENABLED(CONFIG_RETPOLINE) || IS_ENABLED(CONFIG_SLS)) if (IS_ENABLED(CONFIG_RETPOLINE) || IS_ENABLED(CONFIG_SLS))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment