Commit 75085009 authored by Peter Zijlstra's avatar Peter Zijlstra

x86/alternative: Implement .retpoline_sites support

Rewrite retpoline thunk call sites to be indirect calls for
spectre_v2=off. This ensures spectre_v2=off is as near to a
RETPOLINE=n build as possible.

This is the replacement for objtool writing alternative entries to
ensure the same and achieves feature-parity with the previous
approach.

One noteworthy feature is that it relies on the thunks to be in
machine order to compute the register index.

Specifically, this does not yet address the Jcc __x86_indirect_thunk_*
calls generated by clang, a future patch will add this.
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: default avatarBorislav Petkov <bp@suse.de>
Acked-by: default avatarJosh Poimboeuf <jpoimboe@redhat.com>
Tested-by: default avatarAlexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/r/20211026120310.232495794@infradead.org
parent 1a6f7442
...@@ -421,6 +421,10 @@ void __init check_bugs(void) ...@@ -421,6 +421,10 @@ void __init check_bugs(void)
os_check_bugs(); os_check_bugs();
} }
void apply_retpolines(s32 *start, s32 *end)
{
}
void apply_alternatives(struct alt_instr *start, struct alt_instr *end) void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{ {
} }
......
...@@ -75,6 +75,7 @@ extern int alternatives_patched; ...@@ -75,6 +75,7 @@ extern int alternatives_patched;
extern void alternative_instructions(void); extern void alternative_instructions(void);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
extern void apply_retpolines(s32 *start, s32 *end);
struct module; struct module;
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <asm/io.h> #include <asm/io.h>
#include <asm/fixmap.h> #include <asm/fixmap.h>
#include <asm/paravirt.h> #include <asm/paravirt.h>
#include <asm/asm-prototypes.h>
int __read_mostly alternatives_patched; int __read_mostly alternatives_patched;
...@@ -113,6 +114,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) ...@@ -113,6 +114,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
} }
} }
extern s32 __retpoline_sites[], __retpoline_sites_end[];
extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[]; extern s32 __smp_locks[], __smp_locks_end[];
void text_poke_early(void *addr, const void *opcode, size_t len); void text_poke_early(void *addr, const void *opcode, size_t len);
...@@ -221,7 +223,7 @@ static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off) ...@@ -221,7 +223,7 @@ static __always_inline int optimize_nops_range(u8 *instr, u8 instrlen, int off)
* "noinline" to cause control flow change and thus invalidate I$ and * "noinline" to cause control flow change and thus invalidate I$ and
* cause refetch after modification. * cause refetch after modification.
*/ */
static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *instr) static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
{ {
struct insn insn; struct insn insn;
int i = 0; int i = 0;
...@@ -239,11 +241,11 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins ...@@ -239,11 +241,11 @@ static void __init_or_module noinline optimize_nops(struct alt_instr *a, u8 *ins
* optimized. * optimized.
*/ */
if (insn.length == 1 && insn.opcode.bytes[0] == 0x90) if (insn.length == 1 && insn.opcode.bytes[0] == 0x90)
i += optimize_nops_range(instr, a->instrlen, i); i += optimize_nops_range(instr, len, i);
else else
i += insn.length; i += insn.length;
if (i >= a->instrlen) if (i >= len)
return; return;
} }
} }
...@@ -331,10 +333,135 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, ...@@ -331,10 +333,135 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
text_poke_early(instr, insn_buff, insn_buff_sz); text_poke_early(instr, insn_buff, insn_buff_sz);
next: next:
optimize_nops(a, instr); optimize_nops(instr, a->instrlen);
} }
} }
#if defined(CONFIG_RETPOLINE) && defined(CONFIG_STACK_VALIDATION)
/*
* CALL/JMP *%\reg
*/
static int emit_indirect(int op, int reg, u8 *bytes)
{
int i = 0;
u8 modrm;
switch (op) {
case CALL_INSN_OPCODE:
modrm = 0x10; /* Reg = 2; CALL r/m */
break;
case JMP32_INSN_OPCODE:
modrm = 0x20; /* Reg = 4; JMP r/m */
break;
default:
WARN_ON_ONCE(1);
return -1;
}
if (reg >= 8) {
bytes[i++] = 0x41; /* REX.B prefix */
reg -= 8;
}
modrm |= 0xc0; /* Mod = 3 */
modrm += reg;
bytes[i++] = 0xff; /* opcode */
bytes[i++] = modrm;
return i;
}
/*
* Rewrite the compiler generated retpoline thunk calls.
*
* For spectre_v2=off (!X86_FEATURE_RETPOLINE), rewrite them into immediate
* indirect instructions, avoiding the extra indirection.
*
* For example, convert:
*
* CALL __x86_indirect_thunk_\reg
*
* into:
*
* CALL *%\reg
*
*/
static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
{
retpoline_thunk_t *target;
int reg, i = 0;
target = addr + insn->length + insn->immediate.value;
reg = target - __x86_indirect_thunk_array;
if (WARN_ON_ONCE(reg & ~0xf))
return -1;
/* If anyone ever does: CALL/JMP *%rsp, we're in deep trouble. */
BUG_ON(reg == 4);
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE))
return -1;
i = emit_indirect(insn->opcode.bytes[0], reg, bytes);
if (i < 0)
return i;
for (; i < insn->length;)
bytes[i++] = BYTES_NOP1;
return i;
}
/*
* Generated by 'objtool --retpoline'.
*/
void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
{
s32 *s;
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
struct insn insn;
int len, ret;
u8 bytes[16];
u8 op1, op2;
ret = insn_decode_kernel(&insn, addr);
if (WARN_ON_ONCE(ret < 0))
continue;
op1 = insn.opcode.bytes[0];
op2 = insn.opcode.bytes[1];
switch (op1) {
case CALL_INSN_OPCODE:
case JMP32_INSN_OPCODE:
break;
default:
WARN_ON_ONCE(1);
continue;
}
len = patch_retpoline(addr, &insn, bytes);
if (len == insn.length) {
optimize_nops(bytes, len);
text_poke_early(addr, bytes, len);
}
}
}
#else /* !RETPOLINES || !CONFIG_STACK_VALIDATION */
void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
#endif /* CONFIG_RETPOLINE && CONFIG_STACK_VALIDATION */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static void alternatives_smp_lock(const s32 *start, const s32 *end, static void alternatives_smp_lock(const s32 *start, const s32 *end,
u8 *text, u8 *text_end) u8 *text, u8 *text_end)
...@@ -642,6 +769,12 @@ void __init alternative_instructions(void) ...@@ -642,6 +769,12 @@ void __init alternative_instructions(void)
*/ */
apply_paravirt(__parainstructions, __parainstructions_end); apply_paravirt(__parainstructions, __parainstructions_end);
/*
* Rewrite the retpolines, must be done before alternatives since
* those can rewrite the retpoline thunks.
*/
apply_retpolines(__retpoline_sites, __retpoline_sites_end);
/* /*
* Then patch alternatives, such that those paravirt calls that are in * Then patch alternatives, such that those paravirt calls that are in
* alternatives can be overwritten by their immediate fragments. * alternatives can be overwritten by their immediate fragments.
......
...@@ -251,7 +251,8 @@ int module_finalize(const Elf_Ehdr *hdr, ...@@ -251,7 +251,8 @@ int module_finalize(const Elf_Ehdr *hdr,
struct module *me) struct module *me)
{ {
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
*para = NULL, *orc = NULL, *orc_ip = NULL; *para = NULL, *orc = NULL, *orc_ip = NULL,
*retpolines = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
...@@ -267,8 +268,14 @@ int module_finalize(const Elf_Ehdr *hdr, ...@@ -267,8 +268,14 @@ int module_finalize(const Elf_Ehdr *hdr,
orc = s; orc = s;
if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name)) if (!strcmp(".orc_unwind_ip", secstrings + s->sh_name))
orc_ip = s; orc_ip = s;
if (!strcmp(".retpoline_sites", secstrings + s->sh_name))
retpolines = s;
} }
if (retpolines) {
void *rseg = (void *)retpolines->sh_addr;
apply_retpolines(rseg, rseg + retpolines->sh_size);
}
if (alt) { if (alt) {
/* patch .altinstructions */ /* patch .altinstructions */
void *aseg = (void *)alt->sh_addr; void *aseg = (void *)alt->sh_addr;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment