Commit 20fa40b1 authored by Michael Ellerman's avatar Michael Ellerman

Merge branch 'fixes' into next

Merge our fixes branch, in particular to bring in the changes for the
entry/uaccess flush.
parents 0bd4b96d b6b79dd5
......@@ -2858,6 +2858,8 @@
mds=off [X86]
tsx_async_abort=off [X86]
kvm.nx_huge_pages=off [X86]
no_entry_flush [PPC]
no_uaccess_flush [PPC]
Exceptions:
This does not have any effect on
......@@ -3186,6 +3188,8 @@
noefi Disable EFI runtime services support.
no_entry_flush [PPC] Don't flush the L1-D cache when entering the kernel.
noexec [IA-64]
noexec [X86]
......@@ -3235,6 +3239,9 @@
nospec_store_bypass_disable
[HW] Disable all mitigations for the Speculative Store Bypass vulnerability
no_uaccess_flush
[PPC] Don't flush the L1-D cache after accessing user data.
noxsave [BUGS=X86] Disables x86 extended register state save
and restore using xsave. The kernel will fallback to
enabling legacy floating-point and sse state.
......
......@@ -248,7 +248,6 @@ KBUILD_CFLAGS += $(call cc-option,-mno-string)
cpu-as-$(CONFIG_40x) += -Wa,-m405
cpu-as-$(CONFIG_44x) += -Wa,-m440
cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec)
cpu-as-$(CONFIG_E200) += -Wa,-me200
cpu-as-$(CONFIG_E500) += -Wa,-me500
# When using '-many -mpower4' gas will first try and find a matching power4
......
......@@ -27,6 +27,7 @@
#endif
.endm
#ifdef CONFIG_PPC_KUAP
.macro kuap_check_amr gpr1, gpr2
#ifdef CONFIG_PPC_KUAP_DEBUG
BEGIN_MMU_FTR_SECTION_NESTED(67)
......@@ -38,6 +39,7 @@
END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
#endif
.endm
#endif
.macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr
#ifdef CONFIG_PPC_KUAP
......@@ -61,6 +63,10 @@
#else /* !__ASSEMBLY__ */
#include <linux/jump_label.h>
DECLARE_STATIC_KEY_FALSE(uaccess_flush_key);
#ifdef CONFIG_PPC_KUAP
#include <asm/mmu.h>
......@@ -103,8 +109,16 @@ static inline void kuap_check_amr(void)
static inline unsigned long get_kuap(void)
{
/*
* We return AMR_KUAP_BLOCKED when we don't support KUAP because
* prevent_user_access_return needs to return AMR_KUAP_BLOCKED to
* cause restore_user_access to do a flush.
*
* This has no effect in terms of actually blocking things on hash,
* so it doesn't break anything.
*/
if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP))
return 0;
return AMR_KUAP_BLOCKED;
return mfspr(SPRN_AMR);
}
......@@ -123,6 +137,29 @@ static inline void set_kuap(unsigned long value)
isync();
}
static inline bool
bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
(regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
"Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
}
#else /* CONFIG_PPC_KUAP */
static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr) { }
static inline unsigned long kuap_get_and_check_amr(void)
{
return 0UL;
}
static inline unsigned long get_kuap(void)
{
return AMR_KUAP_BLOCKED;
}
static inline void set_kuap(unsigned long value) { }
#endif /* !CONFIG_PPC_KUAP */
static __always_inline void allow_user_access(void __user *to, const void __user *from,
unsigned long size, unsigned long dir)
{
......@@ -142,6 +179,8 @@ static inline void prevent_user_access(void __user *to, const void __user *from,
unsigned long size, unsigned long dir)
{
set_kuap(AMR_KUAP_BLOCKED);
if (static_branch_unlikely(&uaccess_flush_key))
do_uaccess_flush();
}
static inline unsigned long prevent_user_access_return(void)
......@@ -149,6 +188,8 @@ static inline unsigned long prevent_user_access_return(void)
unsigned long flags = get_kuap();
set_kuap(AMR_KUAP_BLOCKED);
if (static_branch_unlikely(&uaccess_flush_key))
do_uaccess_flush();
return flags;
}
......@@ -156,30 +197,9 @@ static inline unsigned long prevent_user_access_return(void)
static inline void restore_user_access(unsigned long flags)
{
set_kuap(flags);
if (static_branch_unlikely(&uaccess_flush_key) && flags == AMR_KUAP_BLOCKED)
do_uaccess_flush();
}
static inline bool
bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
(regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
"Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
}
#else /* CONFIG_PPC_KUAP */
static inline void kuap_restore_amr(struct pt_regs *regs, unsigned long amr)
{
}
static inline void kuap_check_amr(void)
{
}
static inline unsigned long kuap_get_and_check_amr(void)
{
return 0;
}
#endif /* CONFIG_PPC_KUAP */
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */
......@@ -57,11 +57,18 @@
nop; \
nop
#define ENTRY_FLUSH_SLOT \
ENTRY_FLUSH_FIXUP_SECTION; \
nop; \
nop; \
nop;
/*
* r10 must be free to use, r13 must be paca
*/
#define INTERRUPT_TO_KERNEL \
STF_ENTRY_BARRIER_SLOT
STF_ENTRY_BARRIER_SLOT; \
ENTRY_FLUSH_SLOT
/*
* Macros for annotating the expected destination of (h)rfid
......@@ -137,6 +144,9 @@
RFSCV; \
b rfscv_flush_fallback
#else /* __ASSEMBLY__ */
/* Prototype for function defined in exceptions-64s.S */
void do_uaccess_flush(void);
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_EXCEPTION_H */
......@@ -205,6 +205,22 @@ label##3: \
FTR_ENTRY_OFFSET 955b-956b; \
.popsection;
#define UACCESS_FLUSH_FIXUP_SECTION \
959: \
.pushsection __uaccess_flush_fixup,"a"; \
.align 2; \
960: \
FTR_ENTRY_OFFSET 959b-960b; \
.popsection;
#define ENTRY_FLUSH_FIXUP_SECTION \
957: \
.pushsection __entry_flush_fixup,"a"; \
.align 2; \
958: \
FTR_ENTRY_OFFSET 957b-958b; \
.popsection;
#define RFI_FLUSH_FIXUP_SECTION \
951: \
.pushsection __rfi_flush_fixup,"a"; \
......@@ -237,8 +253,11 @@ label##3: \
#include <linux/types.h>
extern long stf_barrier_fallback;
extern long entry_flush_fallback;
extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup;
extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup;
extern long __start___uaccess_flush_fixup, __stop___uaccess_flush_fixup;
extern long __start___entry_flush_fixup, __stop___entry_flush_fixup;
extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup;
extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup;
extern long __start__btb_flush_fixup, __stop__btb_flush_fixup;
......
......@@ -14,7 +14,7 @@
#define KUAP_CURRENT_WRITE 8
#define KUAP_CURRENT (KUAP_CURRENT_READ | KUAP_CURRENT_WRITE)
#ifdef CONFIG_PPC64
#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/book3s/64/kup-radix.h>
#endif
#ifdef CONFIG_PPC_8xx
......@@ -35,6 +35,9 @@
.macro kuap_check current, gpr
.endm
.macro kuap_check_amr gpr1, gpr2
.endm
#endif
#else /* !__ASSEMBLY__ */
......@@ -53,17 +56,28 @@ static inline void setup_kuep(bool disabled) { }
void setup_kuap(bool disabled);
#else
static inline void setup_kuap(bool disabled) { }
static inline bool
bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
return false;
}
static inline void kuap_check_amr(void) { }
/*
* book3s/64/kup-radix.h defines these functions for the !KUAP case to flush
* the L1D cache after user accesses. Only include the empty stubs for other
* platforms.
*/
#ifndef CONFIG_PPC_BOOK3S_64
static inline void allow_user_access(void __user *to, const void __user *from,
unsigned long size, unsigned long dir) { }
static inline void prevent_user_access(void __user *to, const void __user *from,
unsigned long size, unsigned long dir) { }
static inline unsigned long prevent_user_access_return(void) { return 0UL; }
static inline void restore_user_access(unsigned long flags) { }
static inline bool
bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
return false;
}
#endif /* CONFIG_PPC_BOOK3S_64 */
#endif /* CONFIG_PPC_KUAP */
static inline void allow_read_from_user(const void __user *from, unsigned long size)
......
......@@ -63,7 +63,7 @@ static inline void restore_user_access(unsigned long flags)
static inline bool
bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000),
return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xff000000),
"Bug: fault blocked by AP register !");
}
......
......@@ -33,19 +33,18 @@
* respectively NA for All or X for Supervisor and no access for User.
* Then we use the APG to say whether accesses are according to Page rules or
* "all Supervisor" rules (Access to all)
* Therefore, we define 2 APG groups. lsb is _PMD_USER
* 0 => Kernel => 01 (all accesses performed according to page definition)
* 1 => User => 00 (all accesses performed as supervisor iaw page definition)
* 2-15 => Not Used
*/
#define MI_APG_INIT 0x40000000
/*
* 0 => Kernel => 01 (all accesses performed according to page definition)
* 1 => User => 10 (all accesses performed according to swaped page definition)
* 2-15 => Not Used
*/
#define MI_APG_KUEP 0x60000000
* _PAGE_ACCESSED is also managed via APG. When _PAGE_ACCESSED is not set, say
* "all User" rules, that will lead to NA for all.
* Therefore, we define 4 APG groups. lsb is _PAGE_ACCESSED
* 0 => Kernel => 11 (all accesses performed according as user iaw page definition)
* 1 => Kernel+Accessed => 01 (all accesses performed according to page definition)
* 2 => User => 11 (all accesses performed according as user iaw page definition)
* 3 => User+Accessed => 00 (all accesses performed as supervisor iaw page definition) for INIT
* => 10 (all accesses performed according to swaped page definition) for KUEP
* 4-15 => Not Used
*/
#define MI_APG_INIT 0xdc000000
#define MI_APG_KUEP 0xde000000
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MI_RPN is written, bits in
......@@ -106,25 +105,9 @@
#define MD_Ks 0x80000000 /* Should not be set */
#define MD_Kp 0x40000000 /* Should always be set */
/*
* All pages' PP data bits are set to either 000 or 011 or 001, which means
* respectively RW for Supervisor and no access for User, or RO for
* Supervisor and no access for user and NA for ALL.
* Then we use the APG to say whether accesses are according to Page rules or
* "all Supervisor" rules (Access to all)
* Therefore, we define 2 APG groups. lsb is _PMD_USER
* 0 => Kernel => 01 (all accesses performed according to page definition)
* 1 => User => 00 (all accesses performed as supervisor iaw page definition)
* 2-15 => Not Used
*/
#define MD_APG_INIT 0x40000000
/*
* 0 => No user => 01 (all accesses performed according to page definition)
* 1 => User => 10 (all accesses performed according to swaped page definition)
* 2-15 => Not Used
*/
#define MD_APG_KUAP 0x60000000
/* See explanation above at the definition of MI_APG_INIT */
#define MD_APG_INIT 0xdc000000
#define MD_APG_KUAP 0xde000000
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MD_RPN is written, bits in
......
......@@ -39,9 +39,9 @@
* into the TLB.
*/
#define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */
#define _PAGE_SPECIAL 0x0020 /* SW entry */
#define _PAGE_ACCESSED 0x0020 /* Copied to L1 APG 1 entry in I/DTLB */
#define _PAGE_EXEC 0x0040 /* Copied to PP (bit 21) in ITLB */
#define _PAGE_ACCESSED 0x0080 /* software: page referenced */
#define _PAGE_SPECIAL 0x0080 /* SW entry */
#define _PAGE_NA 0x0200 /* Supervisor NA, User no access */
#define _PAGE_RO 0x0600 /* Supervisor RO, User no access */
......@@ -59,11 +59,12 @@
#define _PMD_PRESENT 0x0001
#define _PMD_PRESENT_MASK _PMD_PRESENT
#define _PMD_BAD 0x0fd0
#define _PMD_BAD 0x0f90
#define _PMD_PAGE_MASK 0x000c
#define _PMD_PAGE_8M 0x000c
#define _PMD_PAGE_512K 0x0004
#define _PMD_USER 0x0020 /* APG 1 */
#define _PMD_ACCESSED 0x0020 /* APG 1 */
#define _PMD_USER 0x0040 /* APG 2 */
#define _PTE_NONE_MASK 0
......
......@@ -86,12 +86,19 @@ static inline bool security_ftr_enabled(u64 feature)
// Software required to flush link stack on context switch
#define SEC_FTR_FLUSH_LINK_STACK 0x0000000000001000ull
// The L1-D cache should be flushed when entering the kernel
#define SEC_FTR_L1D_FLUSH_ENTRY 0x0000000000004000ull
// The L1-D cache should be flushed after user accesses from the kernel
#define SEC_FTR_L1D_FLUSH_UACCESS 0x0000000000008000ull
// Features enabled by default
#define SEC_FTR_DEFAULT \
(SEC_FTR_L1D_FLUSH_HV | \
SEC_FTR_L1D_FLUSH_PR | \
SEC_FTR_BNDS_CHK_SPEC_BAR | \
SEC_FTR_L1D_FLUSH_ENTRY | \
SEC_FTR_L1D_FLUSH_UACCESS | \
SEC_FTR_FAVOUR_SECURITY)
#endif /* _ASM_POWERPC_SECURITY_FEATURES_H */
......@@ -52,12 +52,16 @@ enum l1d_flush_type {
};
void setup_rfi_flush(enum l1d_flush_type, bool enable);
void setup_entry_flush(bool enable);
void setup_uaccess_flush(bool enable);
void do_rfi_flush_fixups(enum l1d_flush_type types);
#ifdef CONFIG_PPC_BARRIER_NOSPEC
void setup_barrier_nospec(void);
#else
static inline void setup_barrier_nospec(void) { };
#endif
void do_uaccess_flush_fixups(enum l1d_flush_type types);
void do_entry_flush_fixups(enum l1d_flush_type types);
void do_barrier_nospec_fixups(bool enable);
extern bool barrier_nospec_enabled;
......
......@@ -6,6 +6,7 @@
struct device;
struct device_node;
struct drmem_lmb;
#ifdef CONFIG_NUMA
......@@ -61,6 +62,9 @@ static inline int early_cpu_to_node(int cpu)
*/
return (nid < 0) ? 0 : nid;
}
int of_drconf_to_nid_single(struct drmem_lmb *lmb);
#else
static inline int early_cpu_to_node(int cpu) { return 0; }
......@@ -84,10 +88,12 @@ static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
return 0;
}
#endif /* CONFIG_NUMA */
static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb)
{
return first_online_node;
}
struct drmem_lmb;
int of_drconf_to_nid_single(struct drmem_lmb *lmb);
#endif /* CONFIG_NUMA */
#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
extern int find_and_online_cpu_nid(int cpu);
......
......@@ -178,7 +178,7 @@ do { \
* are no aliasing issues.
*/
#define __put_user_asm_goto(x, addr, label, op) \
asm volatile goto( \
asm_volatile_goto( \
"1: " op "%U1%X1 %0,%1 # put_user\n" \
EX_TABLE(1b, %l2) \
: \
......@@ -191,7 +191,7 @@ do { \
__put_user_asm_goto(x, ptr, label, "std")
#else /* __powerpc64__ */
#define __put_user_asm2_goto(x, addr, label) \
asm volatile goto( \
asm_volatile_goto( \
"1: stw%X1 %0, %1\n" \
"2: stw%X1 %L0, %L1\n" \
EX_TABLE(1b, %l2) \
......
......@@ -264,8 +264,9 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v)
{
struct pci_io_addr_range *piar;
struct rb_node *n;
unsigned long flags;
spin_lock(&pci_io_addr_cache_root.piar_lock);
spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
for (n = rb_first(&pci_io_addr_cache_root.rb_root); n; n = rb_next(n)) {
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
......@@ -273,7 +274,7 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v)
(piar->flags & IORESOURCE_IO) ? "i/o" : "mem",
&piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev));
}
spin_unlock(&pci_io_addr_cache_root.piar_lock);
spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
return 0;
}
......
......@@ -1000,8 +1000,6 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)
* Vectors for the FWNMI option. Share common code.
*/
TRAMP_REAL_BEGIN(system_reset_fwnmi)
/* XXX: fwnmi guest could run a nested/PR guest, so why no test? */
__IKVM_REAL(system_reset)=0
GEN_INT_ENTRY system_reset, virt=0
#endif /* CONFIG_PPC_PSERIES */
......@@ -1412,6 +1410,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
* If none is found, do a Linux page fault. Linux page faults can happen in
* kernel mode due to user copy operations of course.
*
* KVM: The KVM HDSI handler may perform a load with MSR[DR]=1 in guest
* MMU context, which may cause a DSI in the host, which must go to the
* KVM handler. MSR[IR] is not enabled, so the real-mode handler will
* always be used regardless of AIL setting.
*
* - Radix MMU
* The hardware loads from the Linux page table directly, so a fault goes
* immediately to Linux page fault.
......@@ -1422,10 +1425,8 @@ INT_DEFINE_BEGIN(data_access)
IVEC=0x300
IDAR=1
IDSISR=1
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_SKIP=1
IKVM_REAL=1
#endif
INT_DEFINE_END(data_access)
EXC_REAL_BEGIN(data_access, 0x300, 0x80)
......@@ -1464,6 +1465,8 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
* ppc64_bolted_size (first segment). The kernel handler must avoid stomping
* on user-handler data structures.
*
* KVM: Same as 0x300, DSLB must test for KVM guest.
*
* A dedicated save area EXSLB is used (XXX: but it actually need not be
* these days, we could use EXGEN).
*/
......@@ -1472,10 +1475,8 @@ INT_DEFINE_BEGIN(data_access_slb)
IAREA=PACA_EXSLB
IRECONCILE=0
IDAR=1
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_SKIP=1
IKVM_REAL=1
#endif
INT_DEFINE_END(data_access_slb)
EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
......@@ -2951,15 +2952,8 @@ TRAMP_REAL_BEGIN(stf_barrier_fallback)
.endr
blr
TRAMP_REAL_BEGIN(rfi_flush_fallback)
SET_SCRATCH0(r13);
GET_PACA(r13);
std r1,PACA_EXRFI+EX_R12(r13)
ld r1,PACAKSAVE(r13)
std r9,PACA_EXRFI+EX_R9(r13)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
mfctr r9
/* Clobbers r10, r11, ctr */
.macro L1D_DISPLACEMENT_FLUSH
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
......@@ -2970,7 +2964,7 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
sync
/*
* The load adresses are at staggered offsets within cachelines,
* The load addresses are at staggered offsets within cachelines,
* which suits some pipelines better (on others it should not
* hurt).
*/
......@@ -2985,7 +2979,30 @@ TRAMP_REAL_BEGIN(rfi_flush_fallback)
ld r11,(0x80 + 8)*7(r10)
addi r10,r10,0x80*8
bdnz 1b
.endm
TRAMP_REAL_BEGIN(entry_flush_fallback)
std r9,PACA_EXRFI+EX_R9(r13)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
mfctr r9
L1D_DISPLACEMENT_FLUSH
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
ld r11,PACA_EXRFI+EX_R11(r13)
blr
TRAMP_REAL_BEGIN(rfi_flush_fallback)
SET_SCRATCH0(r13);
GET_PACA(r13);
std r1,PACA_EXRFI+EX_R12(r13)
ld r1,PACAKSAVE(r13)
std r9,PACA_EXRFI+EX_R9(r13)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
mfctr r9
L1D_DISPLACEMENT_FLUSH
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
......@@ -3003,32 +3020,7 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback)
std r10,PACA_EXRFI+EX_R10(r13)
std r11,PACA_EXRFI+EX_R11(r13)
mfctr r9
ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
ld r11,PACA_L1D_FLUSH_SIZE(r13)
srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
mtctr r11
DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
/* order ld/st prior to dcbt stop all streams with flushing */
sync
/*
* The load adresses are at staggered offsets within cachelines,
* which suits some pipelines better (on others it should not
* hurt).
*/
1:
ld r11,(0x80 + 8)*0(r10)
ld r11,(0x80 + 8)*1(r10)
ld r11,(0x80 + 8)*2(r10)
ld r11,(0x80 + 8)*3(r10)
ld r11,(0x80 + 8)*4(r10)
ld r11,(0x80 + 8)*5(r10)
ld r11,(0x80 + 8)*6(r10)
ld r11,(0x80 + 8)*7(r10)
addi r10,r10,0x80*8
bdnz 1b
L1D_DISPLACEMENT_FLUSH
mtctr r9
ld r9,PACA_EXRFI+EX_R9(r13)
ld r10,PACA_EXRFI+EX_R10(r13)
......@@ -3079,8 +3071,21 @@ TRAMP_REAL_BEGIN(rfscv_flush_fallback)
RFSCV
USE_TEXT_SECTION()
MASKED_INTERRUPT
MASKED_INTERRUPT hsrr=1
_GLOBAL(do_uaccess_flush)
UACCESS_FLUSH_FIXUP_SECTION
nop
nop
nop
blr
L1D_DISPLACEMENT_FLUSH
blr
_ASM_NOKPROBE_SYMBOL(do_uaccess_flush)
EXPORT_SYMBOL(do_uaccess_flush)
MASKED_INTERRUPT
MASKED_INTERRUPT hsrr=1
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
kvmppc_skip_interrupt:
......
......@@ -284,11 +284,7 @@ _ENTRY(saved_ksp_limit)
rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */
lwz r11, 0(r11) /* Get Linux PTE */
#ifdef CONFIG_SWAP
li r9, _PAGE_PRESENT | _PAGE_ACCESSED
#else
li r9, _PAGE_PRESENT
#endif
andc. r9, r9, r11 /* Check permission */
bne 5f
......@@ -369,11 +365,7 @@ _ENTRY(saved_ksp_limit)
rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */
lwz r11, 0(r11) /* Get Linux PTE */
#ifdef CONFIG_SWAP
li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
#else
li r9, _PAGE_PRESENT | _PAGE_EXEC
#endif
andc. r9, r9, r11 /* Check permission */
bne 5f
......
......@@ -202,9 +202,7 @@ SystemCall:
InstructionTLBMiss:
mtspr SPRN_SPRG_SCRATCH0, r10
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS)
mtspr SPRN_SPRG_SCRATCH1, r11
#endif
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
......@@ -224,25 +222,13 @@ InstructionTLBMiss:
3:
mtcr r11
#endif
#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT)
lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */
mtspr SPRN_MD_TWC, r11
#else
lwz r10, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */
mtspr SPRN_MI_TWC, r10 /* Set segment attributes */
mtspr SPRN_MD_TWC, r10
#endif
mfspr r10, SPRN_MD_TWC
lwz r10, 0(r10) /* Get the pte */
#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT)
rlwimi r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED
rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K
mtspr SPRN_MI_TWC, r11
#endif
#ifdef CONFIG_SWAP
rlwinm r11, r10, 32-5, _PAGE_PRESENT
and r11, r11, r10
rlwimi r10, r11, 0, _PAGE_PRESENT
#endif
/* The Linux PTE won't go exactly into the MMU TLB.
* Software indicator bits 20 and 23 must be clear.
* Software indicator bits 22, 24, 25, 26, and 27 must be
......@@ -256,9 +242,7 @@ InstructionTLBMiss:
/* Restore registers */
0: mfspr r10, SPRN_SPRG_SCRATCH0
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS)
mfspr r11, SPRN_SPRG_SCRATCH1
#endif
rfi
patch_site 0b, patch__itlbmiss_exit_1
......@@ -268,9 +252,7 @@ InstructionTLBMiss:
addi r10, r10, 1
stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)
mfspr r10, SPRN_SPRG_SCRATCH0
#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP)
mfspr r11, SPRN_SPRG_SCRATCH1
#endif
rfi
#endif
......@@ -297,30 +279,16 @@ DataStoreTLBMiss:
mfspr r10, SPRN_MD_TWC
lwz r10, 0(r10) /* Get the pte */
/* Insert the Guarded flag into the TWC from the Linux PTE.
/* Insert Guarded and Accessed flags into the TWC from the Linux PTE.
* It is bit 27 of both the Linux PTE and the TWC (at least
* I got that right :-). It will be better when we can put
* this into the Linux pgd/pmd and load it in the operation
* above.
*/
rlwimi r11, r10, 0, _PAGE_GUARDED
rlwimi r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED
rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K
mtspr SPRN_MD_TWC, r11
/* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
* We also need to know if the insn is a load/store, so:
* Clear _PAGE_PRESENT and load that which will
* trap into DTLB Error with store bit set accordinly.
*/
/* PRESENT=0x1, ACCESSED=0x20
* r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
* r10 = (r10 & ~PRESENT) | r11;
*/
#ifdef CONFIG_SWAP
rlwinm r11, r10, 32-5, _PAGE_PRESENT
and r11, r11, r10
rlwimi r10, r11, 0, _PAGE_PRESENT
#endif
/* The Linux PTE won't go exactly into the MMU TLB.
* Software indicator bits 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
......@@ -711,7 +679,7 @@ initial_mmu:
li r9, 4 /* up to 4 pages of 8M */
mtctr r9
lis r9, KERNELBASE@h /* Create vaddr for TLB */
li r10, MI_PS8MEG | MI_SVALID /* Set 8M byte page */
li r10, MI_PS8MEG | _PMD_ACCESSED | MI_SVALID
li r11, MI_BOOTINIT /* Create RPN for address 0 */
1:
mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
......@@ -775,7 +743,7 @@ _GLOBAL(mmu_pin_tlb)
#ifdef CONFIG_PIN_TLB_TEXT
LOAD_REG_IMMEDIATE(r5, 28 << 8)
LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG)
LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
LOAD_REG_ADDR(r9, _sinittext)
li r0, 4
......@@ -797,7 +765,7 @@ _GLOBAL(mmu_pin_tlb)
LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM)
#ifdef CONFIG_PIN_TLB_DATA
LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG)
LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
#ifdef CONFIG_PIN_TLB_IMMR
li r0, 3
#else
......@@ -834,7 +802,7 @@ _GLOBAL(mmu_pin_tlb)
#endif
#ifdef CONFIG_PIN_TLB_IMMR
LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID)
LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED)
LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED | _PMD_ACCESSED)
mfspr r8, SPRN_IMMR
rlwinm r8, r8, 0, 0xfff80000
ori r8, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \
......
......@@ -156,6 +156,7 @@ __after_mmu_off:
bl initial_bats
bl load_segment_registers
BEGIN_MMU_FTR_SECTION
bl reloc_offset
bl early_hash_table
END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
#if defined(CONFIG_BOOTX_TEXT)
......@@ -457,11 +458,7 @@ InstructionTLBMiss:
cmplw 0,r1,r3
#endif
mfspr r2, SPRN_SPRG_PGDIR
#ifdef CONFIG_SWAP
li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
#else
li r1,_PAGE_PRESENT | _PAGE_EXEC
#endif
#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
......@@ -523,11 +520,7 @@ DataLoadTLBMiss:
lis r1, TASK_SIZE@h /* check if kernel address */
cmplw 0,r1,r3
mfspr r2, SPRN_SPRG_PGDIR
#ifdef CONFIG_SWAP
li r1, _PAGE_PRESENT | _PAGE_ACCESSED
#else
li r1, _PAGE_PRESENT
#endif
bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
......@@ -603,11 +596,7 @@ DataStoreTLBMiss:
lis r1, TASK_SIZE@h /* check if kernel address */
cmplw 0,r1,r3
mfspr r2, SPRN_SPRG_PGDIR
#ifdef CONFIG_SWAP
li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
#else
li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT
#endif
bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
......@@ -932,7 +921,7 @@ early_hash_table:
ori r6, r6, 3 /* 256kB table */
mtspr SPRN_SDR1, r6
lis r6, early_hash@h
lis r3, Hash@ha
addis r3, r3, Hash@ha
stw r6, Hash@l(r3)
blr
......
......@@ -945,7 +945,13 @@ early_initcall(disable_hardlockup_detector);
static enum l1d_flush_type enabled_flush_types;
static void *l1d_flush_fallback_area;
static bool no_rfi_flush;
static bool no_entry_flush;
static bool no_uaccess_flush;
bool rfi_flush;
bool entry_flush;
bool uaccess_flush;
DEFINE_STATIC_KEY_FALSE(uaccess_flush_key);
EXPORT_SYMBOL(uaccess_flush_key);
static int __init handle_no_rfi_flush(char *p)
{
......@@ -955,6 +961,22 @@ static int __init handle_no_rfi_flush(char *p)
}
early_param("no_rfi_flush", handle_no_rfi_flush);
static int __init handle_no_entry_flush(char *p)
{
pr_info("entry-flush: disabled on command line.");
no_entry_flush = true;
return 0;
}
early_param("no_entry_flush", handle_no_entry_flush);
static int __init handle_no_uaccess_flush(char *p)
{
pr_info("uaccess-flush: disabled on command line.");
no_uaccess_flush = true;
return 0;
}
early_param("no_uaccess_flush", handle_no_uaccess_flush);
/*
* The RFI flush is not KPTI, but because users will see doco that says to use
* nopti we hijack that option here to also disable the RFI flush.
......@@ -986,6 +1008,32 @@ void rfi_flush_enable(bool enable)
rfi_flush = enable;
}
void entry_flush_enable(bool enable)
{
if (enable) {
do_entry_flush_fixups(enabled_flush_types);
on_each_cpu(do_nothing, NULL, 1);
} else {
do_entry_flush_fixups(L1D_FLUSH_NONE);
}
entry_flush = enable;
}
void uaccess_flush_enable(bool enable)
{
if (enable) {
do_uaccess_flush_fixups(enabled_flush_types);
static_branch_enable(&uaccess_flush_key);
on_each_cpu(do_nothing, NULL, 1);
} else {
static_branch_disable(&uaccess_flush_key);
do_uaccess_flush_fixups(L1D_FLUSH_NONE);
}
uaccess_flush = enable;
}
static void __ref init_fallback_flush(void)
{
u64 l1d_size, limit;
......@@ -1044,10 +1092,28 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable)
enabled_flush_types = types;
if (!no_rfi_flush && !cpu_mitigations_off())
if (!cpu_mitigations_off() && !no_rfi_flush)
rfi_flush_enable(enable);
}
void setup_entry_flush(bool enable)
{
if (cpu_mitigations_off())
return;
if (!no_entry_flush)
entry_flush_enable(enable);
}
void setup_uaccess_flush(bool enable)
{
if (cpu_mitigations_off())
return;
if (!no_uaccess_flush)
uaccess_flush_enable(enable);
}
#ifdef CONFIG_DEBUG_FS
static int rfi_flush_set(void *data, u64 val)
{
......@@ -1075,9 +1141,63 @@ static int rfi_flush_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n");
static int entry_flush_set(void *data, u64 val)
{
bool enable;
if (val == 1)
enable = true;
else if (val == 0)
enable = false;
else
return -EINVAL;
/* Only do anything if we're changing state */
if (enable != entry_flush)
entry_flush_enable(enable);
return 0;
}
static int entry_flush_get(void *data, u64 *val)
{
*val = entry_flush ? 1 : 0;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(fops_entry_flush, entry_flush_get, entry_flush_set, "%llu\n");
static int uaccess_flush_set(void *data, u64 val)
{
bool enable;
if (val == 1)
enable = true;
else if (val == 0)
enable = false;
else
return -EINVAL;
/* Only do anything if we're changing state */
if (enable != uaccess_flush)
uaccess_flush_enable(enable);
return 0;
}
static int uaccess_flush_get(void *data, u64 *val)
{
*val = uaccess_flush ? 1 : 0;
return 0;
}
DEFINE_SIMPLE_ATTRIBUTE(fops_uaccess_flush, uaccess_flush_get, uaccess_flush_set, "%llu\n");
static __init int rfi_flush_debugfs_init(void)
{
debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush);
debugfs_create_file("entry_flush", 0600, powerpc_debugfs_root, NULL, &fops_entry_flush);
debugfs_create_file("uaccess_flush", 0600, powerpc_debugfs_root, NULL, &fops_uaccess_flush);
return 0;
}
device_initcall(rfi_flush_debugfs_init);
......
......@@ -1393,13 +1393,14 @@ static void add_cpu_to_masks(int cpu)
/* Activate a secondary processor. */
void start_secondary(void *unused)
{
unsigned int cpu = smp_processor_id();
unsigned int cpu = raw_smp_processor_id();
mmgrab(&init_mm);
current->active_mm = &init_mm;
smp_store_cpu_info(cpu);
set_dec(tb_ticks_per_jiffy);
rcu_cpu_starting(cpu);
preempt_disable();
cpu_callin_map[cpu] = 1;
......
......@@ -2,7 +2,7 @@
#include <linux/err.h>
#include <asm/asm-prototypes.h>
#include <asm/book3s/64/kup-radix.h>
#include <asm/kup.h>
#include <asm/cputime.h>
#include <asm/hw_irq.h>
#include <asm/kprobes.h>
......
......@@ -131,6 +131,20 @@ SECTIONS
__stop___stf_entry_barrier_fixup = .;
}
. = ALIGN(8);
__uaccess_flush_fixup : AT(ADDR(__uaccess_flush_fixup) - LOAD_OFFSET) {
__start___uaccess_flush_fixup = .;
*(__uaccess_flush_fixup)
__stop___uaccess_flush_fixup = .;
}
. = ALIGN(8);
__entry_flush_fixup : AT(ADDR(__entry_flush_fixup) - LOAD_OFFSET) {
__start___entry_flush_fixup = .;
*(__entry_flush_fixup)
__stop___entry_flush_fixup = .;
}
. = ALIGN(8);
__stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) {
__start___stf_exit_barrier_fixup = .;
......
......@@ -251,6 +251,13 @@ static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
}
state = &sb->irq_state[src];
/* Some sanity checking */
if (!state->valid) {
pr_devel("%s: source %lx invalid !\n", __func__, irq);
return VM_FAULT_SIGBUS;
}
kvmppc_xive_select_irq(state, &hw_num, &xd);
arch_spin_lock(&sb->lock);
......
......@@ -234,6 +234,110 @@ void do_stf_barrier_fixups(enum stf_barrier_type types)
do_stf_exit_barrier_fixups(types);
}
void do_uaccess_flush_fixups(enum l1d_flush_type types)
{
unsigned int instrs[4], *dest;
long *start, *end;
int i;
start = PTRRELOC(&__start___uaccess_flush_fixup);
end = PTRRELOC(&__stop___uaccess_flush_fixup);
instrs[0] = 0x60000000; /* nop */
instrs[1] = 0x60000000; /* nop */
instrs[2] = 0x60000000; /* nop */
instrs[3] = 0x4e800020; /* blr */
i = 0;
if (types == L1D_FLUSH_FALLBACK) {
instrs[3] = 0x60000000; /* nop */
/* fallthrough to fallback flush */
}
if (types & L1D_FLUSH_ORI) {
instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
}
if (types & L1D_FLUSH_MTTRIG)
instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
for (i = 0; start < end; start++, i++) {
dest = (void *)start + *start;
pr_devel("patching dest %lx\n", (unsigned long)dest);
patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
patch_instruction((struct ppc_inst *)(dest + 3), ppc_inst(instrs[3]));
}
printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i,
(types == L1D_FLUSH_NONE) ? "no" :
(types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
(types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
? "ori+mttrig type"
: "ori type" :
(types & L1D_FLUSH_MTTRIG) ? "mttrig type"
: "unknown");
}
void do_entry_flush_fixups(enum l1d_flush_type types)
{
unsigned int instrs[3], *dest;
long *start, *end;
int i;
start = PTRRELOC(&__start___entry_flush_fixup);
end = PTRRELOC(&__stop___entry_flush_fixup);
instrs[0] = 0x60000000; /* nop */
instrs[1] = 0x60000000; /* nop */
instrs[2] = 0x60000000; /* nop */
i = 0;
if (types == L1D_FLUSH_FALLBACK) {
instrs[i++] = 0x7d4802a6; /* mflr r10 */
instrs[i++] = 0x60000000; /* branch patched below */
instrs[i++] = 0x7d4803a6; /* mtlr r10 */
}
if (types & L1D_FLUSH_ORI) {
instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */
instrs[i++] = 0x63de0000; /* ori 30,30,0 L1d flush*/
}
if (types & L1D_FLUSH_MTTRIG)
instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
for (i = 0; start < end; start++, i++) {
dest = (void *)start + *start;
pr_devel("patching dest %lx\n", (unsigned long)dest);
patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
if (types == L1D_FLUSH_FALLBACK)
patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback,
BRANCH_SET_LINK);
else
patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
}
printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i,
(types == L1D_FLUSH_NONE) ? "no" :
(types == L1D_FLUSH_FALLBACK) ? "fallback displacement" :
(types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG)
? "ori+mttrig type"
: "ori type" :
(types & L1D_FLUSH_MTTRIG) ? "mttrig type"
: "unknown");
}
void do_rfi_flush_fixups(enum l1d_flush_type types)
{
unsigned int instrs[3], *dest;
......
......@@ -98,7 +98,7 @@ static void init_fw_feat_flags(struct device_node *np)
security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
}
static void pnv_setup_rfi_flush(void)
static void pnv_setup_security_mitigations(void)
{
struct device_node *np, *fw_features;
enum l1d_flush_type type;
......@@ -122,12 +122,31 @@ static void pnv_setup_rfi_flush(void)
type = L1D_FLUSH_ORI;
}
/*
* If we are non-Power9 bare metal, we don't need to flush on kernel
* entry or after user access: they fix a P9 specific vulnerability.
*/
if (!pvr_version_is(PVR_POWER9)) {
security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY);
security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS);
}
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \
(security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \
security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV));
setup_rfi_flush(type, enable);
setup_count_cache_flush();
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
setup_entry_flush(enable);
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
setup_uaccess_flush(enable);
setup_stf_barrier();
}
static void __init pnv_check_guarded_cores(void)
......@@ -156,8 +175,7 @@ static void __init pnv_setup_arch(void)
{
set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT);
pnv_setup_rfi_flush();
setup_stf_barrier();
pnv_setup_security_mitigations();
/* Initialize SMP */
pnv_smp_init();
......
......@@ -349,8 +349,8 @@ void post_mobility_fixup(void)
cpus_read_unlock();
/* Possibly switch to a new RFI flush type */
pseries_setup_rfi_flush();
/* Possibly switch to a new L1 flush type */
pseries_setup_security_mitigations();
/* Reinitialise system information for hv-24x7 */
read_24x7_sys_info();
......
......@@ -111,7 +111,7 @@ static inline unsigned long cmo_get_page_size(void)
int dlpar_workqueue_init(void);
void pseries_setup_rfi_flush(void);
void pseries_setup_security_mitigations(void);
void pseries_lpar_read_hblkrm_characteristics(void);
#endif /* _PSERIES_PSERIES_H */
......@@ -542,7 +542,7 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result)
security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR);
}
void pseries_setup_rfi_flush(void)
void pseries_setup_security_mitigations(void)
{
struct h_cpu_char_result result;
enum l1d_flush_type types;
......@@ -579,6 +579,16 @@ void pseries_setup_rfi_flush(void)
setup_rfi_flush(types, enable);
setup_count_cache_flush();
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
security_ftr_enabled(SEC_FTR_L1D_FLUSH_ENTRY);
setup_entry_flush(enable);
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
security_ftr_enabled(SEC_FTR_L1D_FLUSH_UACCESS);
setup_uaccess_flush(enable);
setup_stf_barrier();
}
#ifdef CONFIG_PCI_IOV
......@@ -768,8 +778,7 @@ static void __init pSeries_setup_arch(void)
fwnmi_init();
pseries_setup_rfi_flush();
setup_stf_barrier();
pseries_setup_security_mitigations();
pseries_lpar_read_hblkrm_characteristics();
/* By default, only probe PCI (can be overridden by rtas_pci) */
......
......@@ -42,6 +42,11 @@ int perf_event_enable(int fd);
int perf_event_disable(int fd);
int perf_event_reset(int fd);
struct perf_event_read {
__u64 nr;
__u64 l1d_misses;
};
#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30)
#include <unistd.h>
#include <sys/syscall.h>
......
# SPDX-License-Identifier: GPL-2.0-only
rfi_flush
entry_flush
# SPDX-License-Identifier: GPL-2.0+
TEST_GEN_PROGS := rfi_flush spectre_v2
TEST_GEN_PROGS := rfi_flush entry_flush spectre_v2
top_srcdir = ../../../../..
CFLAGS += -I../../../../../usr/include
......@@ -11,3 +11,5 @@ $(TEST_GEN_PROGS): ../harness.c ../utils.c
$(OUTPUT)/spectre_v2: CFLAGS += -m64
$(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S
$(OUTPUT)/rfi_flush: flush_utils.c
$(OUTPUT)/entry_flush: flush_utils.c
// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright 2018 IBM Corporation.
*/
#define __SANE_USERSPACE_TYPES__
#include <sys/types.h>
#include <stdint.h>
#include <malloc.h>
#include <unistd.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "utils.h"
#include "flush_utils.h"
int entry_flush_test(void)
{
char *p;
int repetitions = 10;
int fd, passes = 0, iter, rc = 0;
struct perf_event_read v;
__u64 l1d_misses_total = 0;
unsigned long iterations = 100000, zero_size = 24 * 1024;
unsigned long l1d_misses_expected;
int rfi_flush_orig;
int entry_flush, entry_flush_orig;
SKIP_IF(geteuid() != 0);
// The PMU event we use only works on Power7 or later
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) {
perror("Unable to read powerpc/rfi_flush debugfs file");
SKIP_IF(1);
}
if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) {
perror("Unable to read powerpc/entry_flush debugfs file");
SKIP_IF(1);
}
if (rfi_flush_orig != 0) {
if (write_debugfs_file("powerpc/rfi_flush", 0) < 0) {
perror("error writing to powerpc/rfi_flush debugfs file");
FAIL_IF(1);
}
}
entry_flush = entry_flush_orig;
fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
FAIL_IF(fd < 0);
p = (char *)memalign(zero_size, CACHELINE_SIZE);
FAIL_IF(perf_event_enable(fd));
// disable L1 prefetching
set_dscr(1);
iter = repetitions;
/*
* We expect to see l1d miss for each cacheline access when entry_flush
* is set. Allow a small variation on this.
*/
l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2);
again:
FAIL_IF(perf_event_reset(fd));
syscall_loop(p, iterations, zero_size);
FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v));
if (entry_flush && v.l1d_misses >= l1d_misses_expected)
passes++;
else if (!entry_flush && v.l1d_misses < (l1d_misses_expected / 2))
passes++;
l1d_misses_total += v.l1d_misses;
while (--iter)
goto again;
if (passes < repetitions) {
printf("FAIL (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d failures]\n",
entry_flush, l1d_misses_total, entry_flush ? '<' : '>',
entry_flush ? repetitions * l1d_misses_expected :
repetitions * l1d_misses_expected / 2,
repetitions - passes, repetitions);
rc = 1;
} else {
printf("PASS (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d pass]\n",
entry_flush, l1d_misses_total, entry_flush ? '>' : '<',
entry_flush ? repetitions * l1d_misses_expected :
repetitions * l1d_misses_expected / 2,
passes, repetitions);
}
if (entry_flush == entry_flush_orig) {
entry_flush = !entry_flush_orig;
if (write_debugfs_file("powerpc/entry_flush", entry_flush) < 0) {
perror("error writing to powerpc/entry_flush debugfs file");
return 1;
}
iter = repetitions;
l1d_misses_total = 0;
passes = 0;
goto again;
}
perf_event_disable(fd);
close(fd);
set_dscr(0);
if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) {
perror("unable to restore original value of powerpc/rfi_flush debugfs file");
return 1;
}
if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) {
perror("unable to restore original value of powerpc/entry_flush debugfs file");
return 1;
}
return rc;
}
int main(int argc, char *argv[])
{
return test_harness(entry_flush_test, "entry_flush_test");
}
// SPDX-License-Identifier: GPL-2.0+
/*
* Copyright 2018 IBM Corporation.
*/
#define __SANE_USERSPACE_TYPES__
#include <sys/types.h>
#include <stdint.h>
#include <unistd.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "utils.h"
#include "flush_utils.h"
static inline __u64 load(void *addr)
{
__u64 tmp;
asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
return tmp;
}
void syscall_loop(char *p, unsigned long iterations,
unsigned long zero_size)
{
for (unsigned long i = 0; i < iterations; i++) {
for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
load(p + j);
getppid();
}
}
static void sigill_handler(int signr, siginfo_t *info, void *unused)
{
static int warned;
ucontext_t *ctx = (ucontext_t *)unused;
unsigned long *pc = &UCONTEXT_NIA(ctx);
/* mtspr 3,RS to check for move to DSCR below */
if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
if (!warned++)
printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
*pc += 4;
} else {
printf("SIGILL at %p\n", pc);
abort();
}
}
void set_dscr(unsigned long val)
{
static int init;
struct sigaction sa;
if (!init) {
memset(&sa, 0, sizeof(sa));
sa.sa_sigaction = sigill_handler;
sa.sa_flags = SA_SIGINFO;
if (sigaction(SIGILL, &sa, NULL))
perror("sigill_handler");
init = 1;
}
asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
}
/* SPDX-License-Identifier: GPL-2.0+ */
/*
* Copyright 2018 IBM Corporation.
*/
#ifndef _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H
#define _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H
#define CACHELINE_SIZE 128
void syscall_loop(char *p, unsigned long iterations,
unsigned long zero_size);
void set_dscr(unsigned long val);
#endif /* _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H */
......@@ -10,71 +10,12 @@
#include <stdint.h>
#include <malloc.h>
#include <unistd.h>
#include <signal.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "utils.h"
#include "flush_utils.h"
#define CACHELINE_SIZE 128
struct perf_event_read {
__u64 nr;
__u64 l1d_misses;
};
static inline __u64 load(void *addr)
{
__u64 tmp;
asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
return tmp;
}
static void syscall_loop(char *p, unsigned long iterations,
unsigned long zero_size)
{
for (unsigned long i = 0; i < iterations; i++) {
for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
load(p + j);
getppid();
}
}
static void sigill_handler(int signr, siginfo_t *info, void *unused)
{
static int warned = 0;
ucontext_t *ctx = (ucontext_t *)unused;
unsigned long *pc = &UCONTEXT_NIA(ctx);
/* mtspr 3,RS to check for move to DSCR below */
if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
if (!warned++)
printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
*pc += 4;
} else {
printf("SIGILL at %p\n", pc);
abort();
}
}
static void set_dscr(unsigned long val)
{
static int init = 0;
struct sigaction sa;
if (!init) {
memset(&sa, 0, sizeof(sa));
sa.sa_sigaction = sigill_handler;
sa.sa_flags = SA_SIGINFO;
if (sigaction(SIGILL, &sa, NULL))
perror("sigill_handler");
init = 1;
}
asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
}
int rfi_flush_test(void)
{
......@@ -85,19 +26,33 @@ int rfi_flush_test(void)
__u64 l1d_misses_total = 0;
unsigned long iterations = 100000, zero_size = 24 * 1024;
unsigned long l1d_misses_expected;
int rfi_flush_org, rfi_flush;
int rfi_flush_orig, rfi_flush;
int have_entry_flush, entry_flush_orig;
SKIP_IF(geteuid() != 0);
// The PMU event we use only works on Power7 or later
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) {
if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_orig) < 0) {
perror("Unable to read powerpc/rfi_flush debugfs file");
SKIP_IF(1);
}
rfi_flush = rfi_flush_org;
if (read_debugfs_file("powerpc/entry_flush", &entry_flush_orig) < 0) {
have_entry_flush = 0;
} else {
have_entry_flush = 1;
if (entry_flush_orig != 0) {
if (write_debugfs_file("powerpc/entry_flush", 0) < 0) {
perror("error writing to powerpc/entry_flush debugfs file");
return 1;
}
}
}
rfi_flush = rfi_flush_orig;
fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
FAIL_IF(fd < 0);
......@@ -106,6 +61,7 @@ int rfi_flush_test(void)
FAIL_IF(perf_event_enable(fd));
// disable L1 prefetching
set_dscr(1);
iter = repetitions;
......@@ -147,8 +103,8 @@ int rfi_flush_test(void)
repetitions * l1d_misses_expected / 2,
passes, repetitions);
if (rfi_flush == rfi_flush_org) {
rfi_flush = !rfi_flush_org;
if (rfi_flush == rfi_flush_orig) {
rfi_flush = !rfi_flush_orig;
if (write_debugfs_file("powerpc/rfi_flush", rfi_flush) < 0) {
perror("error writing to powerpc/rfi_flush debugfs file");
return 1;
......@@ -164,11 +120,19 @@ int rfi_flush_test(void)
set_dscr(0);
if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_org) < 0) {
if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_orig) < 0) {
perror("unable to restore original value of powerpc/rfi_flush debugfs file");
return 1;
}
if (have_entry_flush) {
if (write_debugfs_file("powerpc/entry_flush", entry_flush_orig) < 0) {
perror("unable to restore original value of powerpc/entry_flush "
"debugfs file");
return 1;
}
}
return rc;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment