Commit 1423e266 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86-fpu-2021-07-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fpu updates from Thomas Gleixner:
 "Fixes and improvements for FPU handling on x86:

   - Prevent sigaltstack out of bounds writes.

     The kernel unconditionally writes the FPU state to the alternate
     stack without checking whether the stack is large enough to
     accomodate it.

     Check the alternate stack size before doing so and in case it's too
     small force a SIGSEGV instead of silently corrupting user space
     data.

   - MINSIGSTKZ and SIGSTKSZ are constants in signal.h and have never
     been updated despite the fact that the FPU state which is stored on
     the signal stack has grown over time which causes trouble in the
     field when AVX512 is available on a CPU. The kernel does not expose
     the minimum requirements for the alternate stack size depending on
     the available and enabled CPU features.

     ARM already added an aux vector AT_MINSIGSTKSZ for the same reason.
     Add it to x86 as well.

   - A major cleanup of the x86 FPU code. The recent discoveries of
     XSTATE related issues unearthed quite some inconsistencies,
     duplicated code and other issues.

     The fine granular overhaul addresses this, makes the code more
     robust and maintainable, which allows to integrate upcoming XSTATE
     related features in sane ways"

* tag 'x86-fpu-2021-07-07' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (74 commits)
  x86/fpu/xstate: Clear xstate header in copy_xstate_to_uabi_buf() again
  x86/fpu/signal: Let xrstor handle the features to init
  x86/fpu/signal: Handle #PF in the direct restore path
  x86/fpu: Return proper error codes from user access functions
  x86/fpu/signal: Split out the direct restore code
  x86/fpu/signal: Sanitize copy_user_to_fpregs_zeroing()
  x86/fpu/signal: Sanitize the xstate check on sigframe
  x86/fpu/signal: Remove the legacy alignment check
  x86/fpu/signal: Move initial checks into fpu__restore_sig()
  x86/fpu: Mark init_fpstate __ro_after_init
  x86/pkru: Remove xstate fiddling from write_pkru()
  x86/fpu: Don't store PKRU in xstate in fpu_reset_fpstate()
  x86/fpu: Remove PKRU handling from switch_fpu_finish()
  x86/fpu: Mask PKRU from kernel XRSTOR[S] operations
  x86/fpu: Hook up PKRU into ptrace()
  x86/fpu: Add PKRU storage outside of task XSAVE buffer
  x86/fpu: Dont restore PKRU in fpregs_restore_userspace()
  x86/fpu: Rename xfeatures_mask_user() to xfeatures_mask_uabi()
  x86/fpu: Move FXSAVE_LEAK quirk info __copy_kernel_to_fpregs()
  x86/fpu: Rename __fpregs_load_activate() to fpregs_restore_userregs()
  ...
parents 4ea90317 93c2cdc9
.. SPDX-License-Identifier: GPL-2.0
==================================
x86-specific ELF Auxiliary Vectors
==================================
This document describes the semantics of the x86 auxiliary vectors.
Introduction
============
ELF Auxiliary vectors enable the kernel to efficiently provide
configuration-specific parameters to userspace. In this example, a program
allocates an alternate stack based on the kernel-provided size::
#include <sys/auxv.h>
#include <elf.h>
#include <signal.h>
#include <stdlib.h>
#include <assert.h>
#include <err.h>
#ifndef AT_MINSIGSTKSZ
#define AT_MINSIGSTKSZ 51
#endif
....
stack_t ss;
ss.ss_sp = malloc(ss.ss_size);
assert(ss.ss_sp);
ss.ss_size = getauxval(AT_MINSIGSTKSZ) + SIGSTKSZ;
ss.ss_flags = 0;
if (sigaltstack(&ss, NULL))
err(1, "sigaltstack");
The exposed auxiliary vectors
=============================
AT_SYSINFO is used for locating the vsyscall entry point. It is not
exported on 64-bit mode.
AT_SYSINFO_EHDR is the start address of the page containing the vDSO.
AT_MINSIGSTKSZ denotes the minimum stack size required by the kernel to
deliver a signal to user-space. AT_MINSIGSTKSZ comprehends the space
consumed by the kernel to accommodate the user context for the current
hardware configuration. It does not comprehend subsequent user-space stack
consumption, which must be added by the user. (e.g. Above, user-space adds
SIGSTKSZ to AT_MINSIGSTKSZ.)
......@@ -36,3 +36,4 @@ x86-specific Documentation
sva
sgx
features
elf_auxvec
......@@ -491,7 +491,7 @@ static void intel_pmu_arch_lbr_xrstors(void *ctx)
{
struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
copy_kernel_to_dynamic_supervisor(&task_ctx->xsave, XFEATURE_MASK_LBR);
xrstors(&task_ctx->xsave, XFEATURE_MASK_LBR);
}
static __always_inline bool lbr_is_reset_in_cstate(void *ctx)
......@@ -576,7 +576,7 @@ static void intel_pmu_arch_lbr_xsaves(void *ctx)
{
struct x86_perf_task_context_arch_lbr_xsave *task_ctx = ctx;
copy_dynamic_supervisor_to_kernel(&task_ctx->xsave, XFEATURE_MASK_LBR);
xsaves(&task_ctx->xsave, XFEATURE_MASK_LBR);
}
static void __intel_pmu_lbr_save(void *ctx)
......@@ -993,7 +993,7 @@ static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc)
intel_pmu_store_lbr(cpuc, NULL);
return;
}
copy_dynamic_supervisor_to_kernel(&xsave->xsave, XFEATURE_MASK_LBR);
xsaves(&xsave->xsave, XFEATURE_MASK_LBR);
intel_pmu_store_lbr(cpuc, xsave->lbr.entries);
}
......
......@@ -312,6 +312,7 @@ do { \
NEW_AUX_ENT(AT_SYSINFO, VDSO_ENTRY); \
NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE); \
} \
NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \
} while (0)
/*
......@@ -328,6 +329,7 @@ extern unsigned long task_size_32bit(void);
extern unsigned long task_size_64bit(int full_addr_space);
extern unsigned long get_mmap_base(int is_legacy);
extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len);
extern unsigned long get_sigframe_size(void);
#ifdef CONFIG_X86_32
......@@ -349,6 +351,7 @@ do { \
if (vdso64_enabled) \
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
(unsigned long __force)current->mm->context.vdso); \
NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \
} while (0)
/* As a historical oddity, the x32 and x86_64 vDSOs are controlled together. */
......@@ -357,6 +360,7 @@ do { \
if (vdso64_enabled) \
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
(unsigned long __force)current->mm->context.vdso); \
NEW_AUX_ENT(AT_MINSIGSTKSZ, get_sigframe_size()); \
} while (0)
#define AT_SYSINFO 32
......
This diff is collapsed.
......@@ -29,6 +29,8 @@ unsigned long
fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
unsigned long *buf_fx, unsigned long *size);
unsigned long fpu__get_fpstate_size(void);
extern void fpu__init_prepare_fx_sw_frame(void);
#endif /* _ASM_X86_FPU_SIGNAL_H */
......@@ -6,6 +6,7 @@
#include <linux/types.h>
#include <asm/processor.h>
#include <asm/fpu/api.h>
#include <asm/user.h>
/* Bit 63 of XCR0 is reserved for future expansion */
......@@ -34,6 +35,14 @@
XFEATURE_MASK_BNDREGS | \
XFEATURE_MASK_BNDCSR)
/*
* Features which are restored when returning to user space.
* PKRU is not restored on return to user space because PKRU
* is switched eagerly in switch_to() and flush_thread()
*/
#define XFEATURE_MASK_USER_RESTORE \
(XFEATURE_MASK_USER_SUPPORTED & ~XFEATURE_MASK_PKRU)
/* All currently supported supervisor features */
#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (XFEATURE_MASK_PASID)
......@@ -42,21 +51,21 @@
* and its size may be huge. Saving/restoring such supervisor state components
* at each context switch can cause high CPU and space overhead, which should
* be avoided. Such supervisor state components should only be saved/restored
* on demand. The on-demand dynamic supervisor features are set in this mask.
* on demand. The on-demand supervisor features are set in this mask.
*
* Unlike the existing supported supervisor features, a dynamic supervisor
* Unlike the existing supported supervisor features, an independent supervisor
* feature does not allocate a buffer in task->fpu, and the corresponding
* supervisor state component cannot be saved/restored at each context switch.
*
* To support a dynamic supervisor feature, a developer should follow the
* To support an independent supervisor feature, a developer should follow the
* dos and don'ts as below:
* - Do dynamically allocate a buffer for the supervisor state component.
* - Do manually invoke the XSAVES/XRSTORS instruction to save/restore the
* state component to/from the buffer.
* - Don't set the bit corresponding to the dynamic supervisor feature in
* - Don't set the bit corresponding to the independent supervisor feature in
* IA32_XSS at run time, since it has been set at boot time.
*/
#define XFEATURE_MASK_DYNAMIC (XFEATURE_MASK_LBR)
#define XFEATURE_MASK_INDEPENDENT (XFEATURE_MASK_LBR)
/*
* Unsupported supervisor features. When a supervisor feature in this mask is
......@@ -66,7 +75,7 @@
/* All supervisor states including supported and unsupported states. */
#define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \
XFEATURE_MASK_DYNAMIC | \
XFEATURE_MASK_INDEPENDENT | \
XFEATURE_MASK_SUPERVISOR_UNSUPPORTED)
#ifdef CONFIG_X86_64
......@@ -82,17 +91,42 @@ static inline u64 xfeatures_mask_supervisor(void)
return xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_SUPPORTED;
}
static inline u64 xfeatures_mask_user(void)
/*
* The xfeatures which are enabled in XCR0 and expected to be in ptrace
* buffers and signal frames.
*/
static inline u64 xfeatures_mask_uabi(void)
{
return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED;
}
static inline u64 xfeatures_mask_dynamic(void)
/*
* The xfeatures which are restored by the kernel when returning to user
* mode. This is not necessarily the same as xfeatures_mask_uabi() as the
* kernel does not manage all XCR0 enabled features via xsave/xrstor as
* some of them have to be switched eagerly on context switch and exec().
*/
static inline u64 xfeatures_mask_restore_user(void)
{
return xfeatures_mask_all & XFEATURE_MASK_USER_RESTORE;
}
/*
* Like xfeatures_mask_restore_user() but additionally restors the
* supported supervisor states.
*/
static inline u64 xfeatures_mask_fpstate(void)
{
return xfeatures_mask_all & \
(XFEATURE_MASK_USER_RESTORE | XFEATURE_MASK_SUPERVISOR_SUPPORTED);
}
static inline u64 xfeatures_mask_independent(void)
{
if (!boot_cpu_has(X86_FEATURE_ARCH_LBR))
return XFEATURE_MASK_DYNAMIC & ~XFEATURE_MASK_LBR;
return XFEATURE_MASK_INDEPENDENT & ~XFEATURE_MASK_LBR;
return XFEATURE_MASK_DYNAMIC;
return XFEATURE_MASK_INDEPENDENT;
}
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
......@@ -101,19 +135,21 @@ extern void __init update_regset_xstate_info(unsigned int size,
u64 xstate_mask);
void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
const void *get_xsave_field_ptr(int xfeature_nr);
int using_compacted_format(void);
int xfeature_size(int xfeature_nr);
struct membuf;
void copy_xstate_to_kernel(struct membuf to, struct xregs_state *xsave);
int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
void copy_supervisor_to_kernel(struct xregs_state *xsave);
void copy_dynamic_supervisor_to_kernel(struct xregs_state *xstate, u64 mask);
void copy_kernel_to_dynamic_supervisor(struct xregs_state *xstate, u64 mask);
int copy_uabi_from_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf);
int copy_sigframe_from_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf);
void xsaves(struct xregs_state *xsave, u64 mask);
void xrstors(struct xregs_state *xsave, u64 mask);
/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
int validate_user_xstate_header(const struct xstate_header *hdr);
enum xstate_copy_mode {
XSTATE_COPY_FP,
XSTATE_COPY_FX,
XSTATE_COPY_XSAVE,
};
struct membuf;
void copy_xstate_to_uabi_buf(struct membuf to, struct task_struct *tsk,
enum xstate_copy_mode mode);
#endif
......@@ -23,7 +23,7 @@
#ifndef __ASSEMBLY__
#include <asm/x86_init.h>
#include <asm/fpu/xstate.h>
#include <asm/pkru.h>
#include <asm/fpu/api.h>
#include <asm-generic/pgtable_uffd.h>
......@@ -126,35 +126,6 @@ static inline int pte_dirty(pte_t pte)
return pte_flags(pte) & _PAGE_DIRTY;
}
static inline u32 read_pkru(void)
{
if (boot_cpu_has(X86_FEATURE_OSPKE))
return rdpkru();
return 0;
}
static inline void write_pkru(u32 pkru)
{
struct pkru_state *pk;
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return;
pk = get_xsave_addr(&current->thread.fpu.state.xsave, XFEATURE_PKRU);
/*
* The PKRU value in xstate needs to be in sync with the value that is
* written to the CPU. The FPU restore on return to userland would
* otherwise load the previous value again.
*/
fpregs_lock();
if (pk)
pk->pkru = pkru;
__write_pkru(pkru);
fpregs_unlock();
}
static inline int pte_young(pte_t pte)
{
return pte_flags(pte) & _PAGE_ACCESSED;
......@@ -1360,32 +1331,6 @@ static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd)
}
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */
#define PKRU_AD_BIT 0x1
#define PKRU_WD_BIT 0x2
#define PKRU_BITS_PER_PKEY 2
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
extern u32 init_pkru_value;
#else
#define init_pkru_value 0
#endif
static inline bool __pkru_allows_read(u32 pkru, u16 pkey)
{
int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits));
}
static inline bool __pkru_allows_write(u32 pkru, u16 pkey)
{
int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
/*
* Access-disable disables writes too so we need to check
* both bits here.
*/
return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits));
}
static inline u16 pte_flags_pkey(unsigned long pte_flags)
{
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
......
......@@ -9,14 +9,14 @@
* will be necessary to ensure that the types that store key
* numbers and masks have sufficient capacity.
*/
#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
#define arch_max_pkey() (cpu_feature_enabled(X86_FEATURE_OSPKE) ? 16 : 1)
extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
unsigned long init_val);
static inline bool arch_pkeys_enabled(void)
{
return boot_cpu_has(X86_FEATURE_OSPKE);
return cpu_feature_enabled(X86_FEATURE_OSPKE);
}
/*
......@@ -26,7 +26,7 @@ static inline bool arch_pkeys_enabled(void)
extern int __execute_only_pkey(struct mm_struct *mm);
static inline int execute_only_pkey(struct mm_struct *mm)
{
if (!boot_cpu_has(X86_FEATURE_OSPKE))
if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
return ARCH_DEFAULT_PKEY;
return __execute_only_pkey(mm);
......@@ -37,7 +37,7 @@ extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma,
static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
int prot, int pkey)
{
if (!boot_cpu_has(X86_FEATURE_OSPKE))
if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
return 0;
return __arch_override_mprotect_pkey(vma, prot, pkey);
......@@ -124,7 +124,6 @@ extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
unsigned long init_val);
extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
unsigned long init_val);
extern void copy_init_pkru_to_fpregs(void);
static inline int vma_pkey(struct vm_area_struct *vma)
{
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PKRU_H
#define _ASM_X86_PKRU_H
#include <asm/fpu/xstate.h>
#define PKRU_AD_BIT 0x1
#define PKRU_WD_BIT 0x2
#define PKRU_BITS_PER_PKEY 2
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
extern u32 init_pkru_value;
#define pkru_get_init_value() READ_ONCE(init_pkru_value)
#else
#define init_pkru_value 0
#define pkru_get_init_value() 0
#endif
static inline bool __pkru_allows_read(u32 pkru, u16 pkey)
{
int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits));
}
static inline bool __pkru_allows_write(u32 pkru, u16 pkey)
{
int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
/*
* Access-disable disables writes too so we need to check
* both bits here.
*/
return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits));
}
static inline u32 read_pkru(void)
{
if (cpu_feature_enabled(X86_FEATURE_OSPKE))
return rdpkru();
return 0;
}
static inline void write_pkru(u32 pkru)
{
if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
return;
/*
* WRPKRU is relatively expensive compared to RDPKRU.
* Avoid WRPKRU when it would not change the value.
*/
if (pkru != rdpkru())
wrpkru(pkru);
}
static inline void pkru_write_default(void)
{
if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
return;
wrpkru(pkru_get_init_value());
}
#endif
......@@ -518,6 +518,15 @@ struct thread_struct {
unsigned int sig_on_uaccess_err:1;
/*
* Protection Keys Register for Userspace. Loaded immediately on
* context switch. Store it in thread_struct to avoid a lookup in
* the tasks's FPU xstate buffer. This value is only valid when a
* task is scheduled out. For 'current' the authoritative source of
* PKRU is the hardware itself.
*/
u32 pkru;
/* Floating point and extended processor state */
struct fpu fpu;
/*
......
......@@ -85,4 +85,6 @@ struct rt_sigframe_x32 {
#endif /* CONFIG_X86_64 */
void __init init_sigframe_size(void);
#endif /* _ASM_X86_SIGFRAME_H */
......@@ -104,25 +104,13 @@ static inline void wrpkru(u32 pkru)
: : "a" (pkru), "c"(ecx), "d"(edx));
}
static inline void __write_pkru(u32 pkru)
{
/*
* WRPKRU is relatively expensive compared to RDPKRU.
* Avoid WRPKRU when it would not change the value.
*/
if (pkru == rdpkru())
return;
wrpkru(pkru);
}
#else
static inline u32 rdpkru(void)
{
return 0;
}
static inline void __write_pkru(u32 pkru)
static inline void wrpkru(u32 pkru)
{
}
#endif
......
......@@ -12,9 +12,9 @@
/* entries in ARCH_DLINFO: */
#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64)
# define AT_VECTOR_SIZE_ARCH 2
# define AT_VECTOR_SIZE_ARCH 3
#else /* else it's non-compat x86-64 */
# define AT_VECTOR_SIZE_ARCH 1
# define AT_VECTOR_SIZE_ARCH 2
#endif
#endif /* _ASM_X86_AUXVEC_H */
......@@ -58,6 +58,7 @@
#include <asm/intel-family.h>
#include <asm/cpu_device_id.h>
#include <asm/uv/uv.h>
#include <asm/sigframe.h>
#include "cpu.h"
......@@ -465,27 +466,22 @@ static bool pku_disabled;
static __always_inline void setup_pku(struct cpuinfo_x86 *c)
{
struct pkru_state *pk;
if (c == &boot_cpu_data) {
if (pku_disabled || !cpu_feature_enabled(X86_FEATURE_PKU))
return;
/*
* Setting CR4.PKE will cause the X86_FEATURE_OSPKE cpuid
* bit to be set. Enforce it.
*/
setup_force_cpu_cap(X86_FEATURE_OSPKE);
/* check the boot processor, plus compile options for PKU: */
if (!cpu_feature_enabled(X86_FEATURE_PKU))
return;
/* checks the actual processor's cpuid bits: */
if (!cpu_has(c, X86_FEATURE_PKU))
return;
if (pku_disabled)
} else if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) {
return;
}
cr4_set_bits(X86_CR4_PKE);
pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU);
if (pk)
pk->pkru = init_pkru_value;
/*
* Setting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
* cpuid bit to be set. We need to ensure that we
* update that bit in this CPU's "cpu_info".
*/
set_cpu_cap(c, X86_FEATURE_OSPKE);
/* Load the default PKRU value */
pkru_write_default();
}
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
......@@ -1332,6 +1328,8 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
fpu__init_system(c);
init_sigframe_size();
#ifdef CONFIG_X86_32
/*
* Regardless of whether PCID is enumerated, the SDM says
......@@ -1717,9 +1715,8 @@ void print_cpu_info(struct cpuinfo_x86 *c)
}
/*
* clearcpuid= was already parsed in fpu__init_parse_early_param.
* But we need to keep a dummy __setup around otherwise it would
* show up as an environment variable for init.
* clearcpuid= was already parsed in cpu_parse_early_param(). This dummy
* function prevents it from becoming an environment variable for init.
*/
static __init int setup_clearcpuid(char *arg)
{
......
This diff is collapsed.
......@@ -89,7 +89,7 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
/*
* Boot time FPU feature detection code:
*/
unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
unsigned int mxcsr_feature_mask __ro_after_init = 0xffffffffu;
EXPORT_SYMBOL_GPL(mxcsr_feature_mask);
static void __init fpu__init_system_mxcsr(void)
......@@ -135,7 +135,7 @@ static void __init fpu__init_system_generic(void)
* This is inherent to the XSAVE architecture which puts all state
* components into a single, continuous memory block:
*/
unsigned int fpu_kernel_xstate_size;
unsigned int fpu_kernel_xstate_size __ro_after_init;
EXPORT_SYMBOL_GPL(fpu_kernel_xstate_size);
/* Get alignment of the TYPE. */
......@@ -216,17 +216,6 @@ static void __init fpu__init_system_xstate_size_legacy(void)
fpu_user_xstate_size = fpu_kernel_xstate_size;
}
/*
* Find supported xfeatures based on cpu features and command-line input.
* This must be called after fpu__init_parse_early_param() is called and
* xfeatures_mask is enumerated.
*/
u64 __init fpu__get_supported_xfeatures_mask(void)
{
return XFEATURE_MASK_USER_SUPPORTED |
XFEATURE_MASK_SUPERVISOR_SUPPORTED;
}
/* Legacy code to initialize eager fpu mode. */
static void __init fpu__init_system_ctx_switch(void)
{
......
......@@ -2,11 +2,13 @@
/*
* FPU register's regset abstraction, for ptrace, core dumps, etc.
*/
#include <linux/sched/task_stack.h>
#include <linux/vmalloc.h>
#include <asm/fpu/internal.h>
#include <asm/fpu/signal.h>
#include <asm/fpu/regset.h>
#include <asm/fpu/xstate.h>
#include <linux/sched/task_stack.h>
/*
* The xstateregs_active() routine is the same as the regset_fpregs_active() routine,
......@@ -26,18 +28,58 @@ int regset_xregset_fpregs_active(struct task_struct *target, const struct user_r
return 0;
}
/*
* The regset get() functions are invoked from:
*
* - coredump to dump the current task's fpstate. If the current task
* owns the FPU then the memory state has to be synchronized and the
* FPU register state preserved. Otherwise fpstate is already in sync.
*
* - ptrace to dump fpstate of a stopped task, in which case the registers
* have already been saved to fpstate on context switch.
*/
static void sync_fpstate(struct fpu *fpu)
{
if (fpu == &current->thread.fpu)
fpu_sync_fpstate(fpu);
}
/*
* Invalidate cached FPU registers before modifying the stopped target
* task's fpstate.
*
* This forces the target task on resume to restore the FPU registers from
* modified fpstate. Otherwise the task might skip the restore and operate
* with the cached FPU registers which discards the modifications.
*/
static void fpu_force_restore(struct fpu *fpu)
{
/*
* Only stopped child tasks can be used to modify the FPU
* state in the fpstate buffer:
*/
WARN_ON_FPU(fpu == &current->thread.fpu);
__fpu_invalidate_fpregs_state(fpu);
}
int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
struct membuf to)
{
struct fpu *fpu = &target->thread.fpu;
if (!boot_cpu_has(X86_FEATURE_FXSR))
if (!cpu_feature_enabled(X86_FEATURE_FXSR))
return -ENODEV;
fpu__prepare_read(fpu);
fpstate_sanitize_xstate(fpu);
sync_fpstate(fpu);
if (!use_xsave()) {
return membuf_write(&to, &fpu->state.fxsave,
sizeof(fpu->state.fxsave));
}
return membuf_write(&to, &fpu->state.fxsave, sizeof(struct fxregs_state));
copy_xstate_to_uabi_buf(to, target, XSTATE_COPY_FX);
return 0;
}
int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
......@@ -45,62 +87,52 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
const void *kbuf, const void __user *ubuf)
{
struct fpu *fpu = &target->thread.fpu;
struct user32_fxsr_struct newstate;
int ret;
if (!boot_cpu_has(X86_FEATURE_FXSR))
BUILD_BUG_ON(sizeof(newstate) != sizeof(struct fxregs_state));
if (!cpu_feature_enabled(X86_FEATURE_FXSR))
return -ENODEV;
fpu__prepare_write(fpu);
fpstate_sanitize_xstate(fpu);
/* No funny business with partial or oversized writes is permitted. */
if (pos != 0 || count != sizeof(newstate))
return -EINVAL;
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&fpu->state.fxsave, 0, -1);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &newstate, 0, -1);
if (ret)
return ret;
/*
* mxcsr reserved bits must be masked to zero for security reasons.
*/
fpu->state.fxsave.mxcsr &= mxcsr_feature_mask;
/* Do not allow an invalid MXCSR value. */
if (newstate.mxcsr & ~mxcsr_feature_mask)
return -EINVAL;
/*
* update the header bits in the xsave header, indicating the
* presence of FP and SSE state.
*/
if (boot_cpu_has(X86_FEATURE_XSAVE))
fpu_force_restore(fpu);
/* Copy the state */
memcpy(&fpu->state.fxsave, &newstate, sizeof(newstate));
/* Clear xmm8..15 */
BUILD_BUG_ON(sizeof(fpu->state.fxsave.xmm_space) != 16 * 16);
memset(&fpu->state.fxsave.xmm_space[8], 0, 8 * 16);
/* Mark FP and SSE as in use when XSAVE is enabled */
if (use_xsave())
fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
return ret;
return 0;
}
int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
struct membuf to)
{
struct fpu *fpu = &target->thread.fpu;
struct xregs_state *xsave;
if (!boot_cpu_has(X86_FEATURE_XSAVE))
if (!cpu_feature_enabled(X86_FEATURE_XSAVE))
return -ENODEV;
xsave = &fpu->state.xsave;
fpu__prepare_read(fpu);
sync_fpstate(&target->thread.fpu);
if (using_compacted_format()) {
copy_xstate_to_kernel(to, xsave);
return 0;
} else {
fpstate_sanitize_xstate(fpu);
/*
* Copy the 48 bytes defined by the software into the xsave
* area in the thread struct, so that we can copy the whole
* area to user using one user_regset_copyout().
*/
memcpy(&xsave->i387.sw_reserved, xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
/*
* Copy the xstate memory layout.
*/
return membuf_write(&to, xsave, fpu_user_xstate_size);
}
copy_xstate_to_uabi_buf(to, target, XSTATE_COPY_XSAVE);
return 0;
}
int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
......@@ -108,44 +140,34 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
const void *kbuf, const void __user *ubuf)
{
struct fpu *fpu = &target->thread.fpu;
struct xregs_state *xsave;
struct xregs_state *tmpbuf = NULL;
int ret;
if (!boot_cpu_has(X86_FEATURE_XSAVE))
if (!cpu_feature_enabled(X86_FEATURE_XSAVE))
return -ENODEV;
/*
* A whole standard-format XSAVE buffer is needed:
*/
if ((pos != 0) || (count < fpu_user_xstate_size))
if (pos != 0 || count != fpu_user_xstate_size)
return -EFAULT;
xsave = &fpu->state.xsave;
fpu__prepare_write(fpu);
if (!kbuf) {
tmpbuf = vmalloc(count);
if (!tmpbuf)
return -ENOMEM;
if (using_compacted_format()) {
if (kbuf)
ret = copy_kernel_to_xstate(xsave, kbuf);
else
ret = copy_user_to_xstate(xsave, ubuf);
} else {
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
if (!ret)
ret = validate_user_xstate_header(&xsave->header);
if (copy_from_user(tmpbuf, ubuf, count)) {
ret = -EFAULT;
goto out;
}
}
/*
* mxcsr reserved bits must be masked to zero for security reasons.
*/
xsave->i387.mxcsr &= mxcsr_feature_mask;
/*
* In case of failure, mark all states as init:
*/
if (ret)
fpstate_init(&fpu->state);
fpu_force_restore(fpu);
ret = copy_uabi_from_kernel_to_xstate(&fpu->state.xsave, kbuf ?: tmpbuf);
out:
vfree(tmpbuf);
return ret;
}
......@@ -221,10 +243,10 @@ static inline u32 twd_fxsr_to_i387(struct fxregs_state *fxsave)
* FXSR floating point environment conversions.
*/
void
convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
static void __convert_from_fxsr(struct user_i387_ia32_struct *env,
struct task_struct *tsk,
struct fxregs_state *fxsave)
{
struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave;
struct _fpreg *to = (struct _fpreg *) &env->st_space[0];
struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0];
int i;
......@@ -258,6 +280,12 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
memcpy(&to[i], &from[i], sizeof(to[0]));
}
void
convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
{
__convert_from_fxsr(env, tsk, &tsk->thread.fpu.state.fxsave);
}
void convert_to_fxsr(struct fxregs_state *fxsave,
const struct user_i387_ia32_struct *env)
......@@ -290,25 +318,29 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
{
struct fpu *fpu = &target->thread.fpu;
struct user_i387_ia32_struct env;
struct fxregs_state fxsave, *fx;
fpu__prepare_read(fpu);
sync_fpstate(fpu);
if (!boot_cpu_has(X86_FEATURE_FPU))
if (!cpu_feature_enabled(X86_FEATURE_FPU))
return fpregs_soft_get(target, regset, to);
if (!boot_cpu_has(X86_FEATURE_FXSR)) {
if (!cpu_feature_enabled(X86_FEATURE_FXSR)) {
return membuf_write(&to, &fpu->state.fsave,
sizeof(struct fregs_state));
}
fpstate_sanitize_xstate(fpu);
if (use_xsave()) {
struct membuf mb = { .p = &fxsave, .left = sizeof(fxsave) };
if (to.left == sizeof(env)) {
convert_from_fxsr(to.p, target);
return 0;
/* Handle init state optimized xstate correctly */
copy_xstate_to_uabi_buf(mb, target, XSTATE_COPY_FP);
fx = &fxsave;
} else {
fx = &fpu->state.fxsave;
}
convert_from_fxsr(&env, target);
__convert_from_fxsr(&env, target, fx);
return membuf_write(&to, &env, sizeof(env));
}
......@@ -320,31 +352,32 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
struct user_i387_ia32_struct env;
int ret;
fpu__prepare_write(fpu);
fpstate_sanitize_xstate(fpu);
/* No funny business with partial or oversized writes is permitted. */
if (pos != 0 || count != sizeof(struct user_i387_ia32_struct))
return -EINVAL;
if (!boot_cpu_has(X86_FEATURE_FPU))
if (!cpu_feature_enabled(X86_FEATURE_FPU))
return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
if (!boot_cpu_has(X86_FEATURE_FXSR))
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&fpu->state.fsave, 0,
-1);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
if (ret)
return ret;
if (pos > 0 || count < sizeof(env))
convert_from_fxsr(&env, target);
fpu_force_restore(fpu);
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
if (!ret)
convert_to_fxsr(&target->thread.fpu.state.fxsave, &env);
if (cpu_feature_enabled(X86_FEATURE_FXSR))
convert_to_fxsr(&fpu->state.fxsave, &env);
else
memcpy(&fpu->state.fsave, &env, sizeof(env));
/*
* update the header bit in the xsave header, indicating the
* Update the header bit in the xsave header, indicating the
* presence of FP.
*/
if (boot_cpu_has(X86_FEATURE_XSAVE))
if (cpu_feature_enabled(X86_FEATURE_XSAVE))
fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP;
return ret;
return 0;
}
#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */
This diff is collapsed.
This diff is collapsed.
......@@ -87,8 +87,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
#ifdef CONFIG_VM86
dst->thread.vm86 = NULL;
#endif
return fpu__copy(dst, src);
return fpu_clone(dst);
}
/*
......@@ -157,11 +156,18 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
/* Kernel thread ? */
if (unlikely(p->flags & PF_KTHREAD)) {
p->thread.pkru = pkru_get_init_value();
memset(childregs, 0, sizeof(struct pt_regs));
kthread_frame_init(frame, sp, arg);
return 0;
}
/*
* Clone current's PKRU value from hardware. tsk->thread.pkru
* is only valid when scheduled out.
*/
p->thread.pkru = read_pkru();
frame->bx = 0;
*childregs = *current_pt_regs();
childregs->ax = 0;
......@@ -199,6 +205,15 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
return ret;
}
static void pkru_flush_thread(void)
{
/*
* If PKRU is enabled the default PKRU value has to be loaded into
* the hardware right here (similar to context switch).
*/
pkru_write_default();
}
void flush_thread(void)
{
struct task_struct *tsk = current;
......@@ -206,7 +221,8 @@ void flush_thread(void)
flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
fpu__clear_all(&tsk->thread.fpu);
fpu_flush_thread();
pkru_flush_thread();
}
void disable_TSC(void)
......
......@@ -41,6 +41,7 @@
#include <linux/syscalls.h>
#include <asm/processor.h>
#include <asm/pkru.h>
#include <asm/fpu/internal.h>
#include <asm/mmu_context.h>
#include <asm/prctl.h>
......@@ -136,7 +137,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
log_lvl, d3, d6, d7);
}
if (boot_cpu_has(X86_FEATURE_OSPKE))
if (cpu_feature_enabled(X86_FEATURE_OSPKE))
printk("%sPKRU: %08x\n", log_lvl, read_pkru());
}
......@@ -339,6 +340,29 @@ static __always_inline void load_seg_legacy(unsigned short prev_index,
}
}
/*
* Store prev's PKRU value and load next's PKRU value if they differ. PKRU
* is not XSTATE managed on context switch because that would require a
* lookup in the task's FPU xsave buffer and require to keep that updated
* in various places.
*/
static __always_inline void x86_pkru_load(struct thread_struct *prev,
struct thread_struct *next)
{
if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
return;
/* Stash the prev task's value: */
prev->pkru = rdpkru();
/*
* PKRU writes are slightly expensive. Avoid them when not
* strictly necessary:
*/
if (prev->pkru != next->pkru)
wrpkru(next->pkru);
}
static __always_inline void x86_fsgsbase_load(struct thread_struct *prev,
struct thread_struct *next)
{
......@@ -588,6 +612,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
x86_fsgsbase_load(prev, next);
x86_pkru_load(prev, next);
/*
* Switch the PDA and FPU contexts.
*/
......
......@@ -212,6 +212,11 @@ do { \
* Set up a signal frame.
*/
/* x86 ABI requires 16-byte alignment */
#define FRAME_ALIGNMENT 16UL
#define MAX_FRAME_PADDING (FRAME_ALIGNMENT - 1)
/*
* Determine which stack to use..
*/
......@@ -222,9 +227,9 @@ static unsigned long align_sigframe(unsigned long sp)
* Align the stack pointer according to the i386 ABI,
* i.e. so that on function entry ((sp + 4) & 15) == 0.
*/
sp = ((sp + 4) & -16ul) - 4;
sp = ((sp + 4) & -FRAME_ALIGNMENT) - 4;
#else /* !CONFIG_X86_32 */
sp = round_down(sp, 16) - 8;
sp = round_down(sp, FRAME_ALIGNMENT) - 8;
#endif
return sp;
}
......@@ -234,10 +239,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
void __user **fpstate)
{
/* Default to using normal stack */
bool nested_altstack = on_sig_stack(regs->sp);
bool entering_altstack = false;
unsigned long math_size = 0;
unsigned long sp = regs->sp;
unsigned long buf_fx = 0;
int onsigstack = on_sig_stack(sp);
int ret;
/* redzone */
......@@ -246,15 +252,23 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
/* This is the X/Open sanctioned signal stack switching. */
if (ka->sa.sa_flags & SA_ONSTACK) {
if (sas_ss_flags(sp) == 0)
/*
* This checks nested_altstack via sas_ss_flags(). Sensible
* programs use SS_AUTODISARM, which disables that check, and
* programs that don't use SS_AUTODISARM get compatible.
*/
if (sas_ss_flags(sp) == 0) {
sp = current->sas_ss_sp + current->sas_ss_size;
entering_altstack = true;
}
} else if (IS_ENABLED(CONFIG_X86_32) &&
!onsigstack &&
!nested_altstack &&
regs->ss != __USER_DS &&
!(ka->sa.sa_flags & SA_RESTORER) &&
ka->sa.sa_restorer) {
/* This is the legacy signal stack switching. */
sp = (unsigned long) ka->sa.sa_restorer;
entering_altstack = true;
}
sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
......@@ -267,8 +281,15 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
* If we are on the alternate signal stack and would overflow it, don't.
* Return an always-bogus address instead so we will die with SIGSEGV.
*/
if (onsigstack && !likely(on_sig_stack(sp)))
if (unlikely((nested_altstack || entering_altstack) &&
!__on_sig_stack(sp))) {
if (show_unhandled_signals && printk_ratelimit())
pr_info("%s[%d] overflowed sigaltstack\n",
current->comm, task_pid_nr(current));
return (void __user *)-1L;
}
/* save i387 and extended state */
ret = copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size);
......@@ -663,6 +684,61 @@ SYSCALL_DEFINE0(rt_sigreturn)
return 0;
}
/*
* There are four different struct types for signal frame: sigframe_ia32,
* rt_sigframe_ia32, rt_sigframe_x32, and rt_sigframe. Use the worst case
* -- the largest size. It means the size for 64-bit apps is a bit more
* than needed, but this keeps the code simple.
*/
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
# define MAX_FRAME_SIGINFO_UCTXT_SIZE sizeof(struct sigframe_ia32)
#else
# define MAX_FRAME_SIGINFO_UCTXT_SIZE sizeof(struct rt_sigframe)
#endif
/*
* The FP state frame contains an XSAVE buffer which must be 64-byte aligned.
* If a signal frame starts at an unaligned address, extra space is required.
* This is the max alignment padding, conservatively.
*/
#define MAX_XSAVE_PADDING 63UL
/*
* The frame data is composed of the following areas and laid out as:
*
* -------------------------
* | alignment padding |
* -------------------------
* | (f)xsave frame |
* -------------------------
* | fsave header |
* -------------------------
* | alignment padding |
* -------------------------
* | siginfo + ucontext |
* -------------------------
*/
/* max_frame_size tells userspace the worst case signal stack size. */
static unsigned long __ro_after_init max_frame_size;
void __init init_sigframe_size(void)
{
max_frame_size = MAX_FRAME_SIGINFO_UCTXT_SIZE + MAX_FRAME_PADDING;
max_frame_size += fpu__get_fpstate_size() + MAX_XSAVE_PADDING;
/* Userspace expects an aligned size. */
max_frame_size = round_up(max_frame_size, FRAME_ALIGNMENT);
pr_info("max sigframe size: %lu\n", max_frame_size);
}
unsigned long get_sigframe_size(void)
{
return max_frame_size;
}
static inline int is_ia32_compat_frame(struct ksignal *ksig)
{
return IS_ENABLED(CONFIG_IA32_EMULATION) &&
......
......@@ -1046,9 +1046,10 @@ static void math_error(struct pt_regs *regs, int trapnr)
}
/*
* Save the info for the exception handler and clear the error.
* Synchronize the FPU register state to the memory register state
* if necessary. This allows the exception handler to inspect it.
*/
fpu__save(fpu);
fpu_sync_fpstate(fpu);
task->thread.trap_nr = trapnr;
task->thread.error_code = 0;
......
......@@ -19,6 +19,7 @@
#include <linux/trace_events.h>
#include <asm/fpu/internal.h>
#include <asm/pkru.h>
#include <asm/trapnr.h>
#include "x86.h"
......
......@@ -66,6 +66,7 @@
#include <asm/msr.h>
#include <asm/desc.h>
#include <asm/mce.h>
#include <asm/pkru.h>
#include <linux/kernel_stat.h>
#include <asm/fpu/internal.h> /* Ugh! */
#include <asm/pvclock.h>
......@@ -939,7 +940,7 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
(kvm_read_cr4_bits(vcpu, X86_CR4_PKE) ||
(vcpu->arch.xcr0 & XFEATURE_MASK_PKRU)) &&
vcpu->arch.pkru != vcpu->arch.host_pkru)
__write_pkru(vcpu->arch.pkru);
write_pkru(vcpu->arch.pkru);
}
EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
......@@ -953,7 +954,7 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
(vcpu->arch.xcr0 & XFEATURE_MASK_PKRU))) {
vcpu->arch.pkru = rdpkru();
if (vcpu->arch.pkru != vcpu->arch.host_pkru)
__write_pkru(vcpu->arch.host_pkru);
write_pkru(vcpu->arch.host_pkru);
}
if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) {
......@@ -4704,20 +4705,21 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
*/
valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
while (valid) {
u32 size, offset, ecx, edx;
u64 xfeature_mask = valid & -valid;
int xfeature_nr = fls64(xfeature_mask) - 1;
void *src = get_xsave_addr(xsave, xfeature_nr);
if (src) {
u32 size, offset, ecx, edx;
cpuid_count(XSTATE_CPUID, xfeature_nr,
&size, &offset, &ecx, &edx);
if (xfeature_nr == XFEATURE_PKRU)
memcpy(dest + offset, &vcpu->arch.pkru,
sizeof(vcpu->arch.pkru));
else
memcpy(dest + offset, src, size);
void *src;
cpuid_count(XSTATE_CPUID, xfeature_nr,
&size, &offset, &ecx, &edx);
if (xfeature_nr == XFEATURE_PKRU) {
memcpy(dest + offset, &vcpu->arch.pkru,
sizeof(vcpu->arch.pkru));
} else {
src = get_xsave_addr(xsave, xfeature_nr);
if (src)
memcpy(dest + offset, src, size);
}
valid -= xfeature_mask;
......@@ -4747,18 +4749,20 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
*/
valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
while (valid) {
u32 size, offset, ecx, edx;
u64 xfeature_mask = valid & -valid;
int xfeature_nr = fls64(xfeature_mask) - 1;
void *dest = get_xsave_addr(xsave, xfeature_nr);
if (dest) {
u32 size, offset, ecx, edx;
cpuid_count(XSTATE_CPUID, xfeature_nr,
&size, &offset, &ecx, &edx);
if (xfeature_nr == XFEATURE_PKRU)
memcpy(&vcpu->arch.pkru, src + offset,
sizeof(vcpu->arch.pkru));
else
cpuid_count(XSTATE_CPUID, xfeature_nr,
&size, &offset, &ecx, &edx);
if (xfeature_nr == XFEATURE_PKRU) {
memcpy(&vcpu->arch.pkru, src + offset,
sizeof(vcpu->arch.pkru));
} else {
void *dest = get_xsave_addr(xsave, xfeature_nr);
if (dest)
memcpy(dest, src + offset, size);
}
......@@ -9885,7 +9889,7 @@ static void kvm_save_current_fpu(struct fpu *fpu)
memcpy(&fpu->state, &current->thread.fpu.state,
fpu_kernel_xstate_size);
else
copy_fpregs_to_fpstate(fpu);
save_fpregs_to_fpstate(fpu);
}
/* Swap (qemu) user FPU context for the guest FPU context. */
......@@ -9901,7 +9905,7 @@ static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
*/
if (vcpu->arch.guest_fpu)
/* PKRU is separately restored in kvm_x86_ops.run. */
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
__restore_fpregs_from_fpstate(&vcpu->arch.guest_fpu->state,
~XFEATURE_MASK_PKRU);
fpregs_mark_activate();
......@@ -9922,7 +9926,7 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
if (vcpu->arch.guest_fpu)
kvm_save_current_fpu(vcpu->arch.guest_fpu);
copy_kernel_to_fpregs(&vcpu->arch.user_fpu->state);
restore_fpregs_from_fpstate(&vcpu->arch.user_fpu->state);
fpregs_mark_activate();
fpregs_unlock();
......
......@@ -144,7 +144,7 @@ extern int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, short __user *d);
extern int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, u_char __user *d);
extern int FPU_round_to_int(FPU_REG *r, u_char tag);
extern u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s);
extern void frstor(fpu_addr_modes addr_modes, u_char __user *data_address);
extern void FPU_frstor(fpu_addr_modes addr_modes, u_char __user *data_address);
extern u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d);
extern void fsave(fpu_addr_modes addr_modes, u_char __user *data_address);
extern int FPU_tagof(FPU_REG *ptr);
......
......@@ -240,7 +240,7 @@ int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
fix-up operations. */
return 1;
case 022: /* frstor m94/108byte */
frstor(addr_modes, (u_char __user *) data_address);
FPU_frstor(addr_modes, (u_char __user *) data_address);
/* Ensure that the values just loaded are not changed by
fix-up operations. */
return 1;
......
......@@ -1117,7 +1117,7 @@ u_char __user *fldenv(fpu_addr_modes addr_modes, u_char __user *s)
return s;
}
void frstor(fpu_addr_modes addr_modes, u_char __user *data_address)
void FPU_frstor(fpu_addr_modes addr_modes, u_char __user *data_address)
{
int i, regnr;
u_char __user *s = fldenv(addr_modes, data_address);
......
......@@ -65,7 +65,7 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
WARN_ONCE(1, "Bad FPU state detected at %pB, reinitializing FPU registers.",
(void *)instruction_pointer(regs));
__copy_kernel_to_fpregs(&init_fpstate, -1);
__restore_fpregs_from_fpstate(&init_fpstate, xfeatures_mask_fpstate());
return true;
}
EXPORT_SYMBOL_GPL(ex_handler_fprestore);
......
......@@ -875,7 +875,7 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
/* This code is always called on the current mm */
bool foreign = false;
if (!boot_cpu_has(X86_FEATURE_OSPKE))
if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
return false;
if (error_code & X86_PF_PK)
return true;
......
......@@ -10,7 +10,6 @@
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
#include <asm/mmu_context.h> /* vma_pkey() */
#include <asm/fpu/internal.h> /* init_fpstate */
int __execute_only_pkey(struct mm_struct *mm)
{
......@@ -125,22 +124,6 @@ u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
PKRU_AD_KEY(10) | PKRU_AD_KEY(11) | PKRU_AD_KEY(12) |
PKRU_AD_KEY(13) | PKRU_AD_KEY(14) | PKRU_AD_KEY(15);
/*
* Called from the FPU code when creating a fresh set of FPU
* registers. This is called from a very specific context where
* we know the FPU registers are safe for use and we can use PKRU
* directly.
*/
void copy_init_pkru_to_fpregs(void)
{
u32 init_pkru_value_snapshot = READ_ONCE(init_pkru_value);
/*
* Override the PKRU state that came from 'init_fpstate'
* with the baseline from the process.
*/
write_pkru(init_pkru_value_snapshot);
}
static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
......@@ -154,7 +137,6 @@ static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf,
static ssize_t init_pkru_write_file(struct file *file,
const char __user *user_buf, size_t count, loff_t *ppos)
{
struct pkru_state *pk;
char buf[32];
ssize_t len;
u32 new_init_pkru;
......@@ -177,10 +159,6 @@ static ssize_t init_pkru_write_file(struct file *file,
return -EINVAL;
WRITE_ONCE(init_pkru_value, new_init_pkru);
pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU);
if (!pk)
return -EINVAL;
pk->pkru = new_init_pkru;
return count;
}
......
......@@ -44,10 +44,6 @@ static inline bool arch_pkeys_enabled(void)
return false;
}
static inline void copy_init_pkru_to_fpregs(void)
{
}
#endif /* ! CONFIG_ARCH_HAS_PKEYS */
#endif /* _LINUX_PKEYS_H */
......@@ -538,6 +538,17 @@ static inline int kill_cad_pid(int sig, int priv)
#define SEND_SIG_NOINFO ((struct kernel_siginfo *) 0)
#define SEND_SIG_PRIV ((struct kernel_siginfo *) 1)
static inline int __on_sig_stack(unsigned long sp)
{
#ifdef CONFIG_STACK_GROWSUP
return sp >= current->sas_ss_sp &&
sp - current->sas_ss_sp < current->sas_ss_size;
#else
return sp > current->sas_ss_sp &&
sp - current->sas_ss_sp <= current->sas_ss_size;
#endif
}
/*
* True if we are on the alternate signal stack.
*/
......@@ -555,13 +566,7 @@ static inline int on_sig_stack(unsigned long sp)
if (current->sas_ss_flags & SS_AUTODISARM)
return 0;
#ifdef CONFIG_STACK_GROWSUP
return sp >= current->sas_ss_sp &&
sp - current->sas_ss_sp < current->sas_ss_size;
#else
return sp > current->sas_ss_sp &&
sp - current->sas_ss_sp <= current->sas_ss_size;
#endif
return __on_sig_stack(sp);
}
static inline int sas_ss_flags(unsigned long sp)
......
......@@ -33,5 +33,8 @@
#define AT_EXECFN 31 /* filename of program */
#ifndef AT_MINSIGSTKSZ
#define AT_MINSIGSTKSZ 51 /* minimal stack size for signal delivery */
#endif
#endif /* _UAPI_LINUX_AUXVEC_H */
......@@ -17,6 +17,7 @@
#include <string.h>
#include <assert.h>
#include <errno.h>
#include <sys/auxv.h>
#include "../kselftest.h"
......@@ -24,6 +25,11 @@
#define SS_AUTODISARM (1U << 31)
#endif
#ifndef AT_MINSIGSTKSZ
#define AT_MINSIGSTKSZ 51
#endif
static unsigned int stack_size;
static void *sstack, *ustack;
static ucontext_t uc, sc;
static const char *msg = "[OK]\tStack preserved";
......@@ -47,7 +53,7 @@ void my_usr1(int sig, siginfo_t *si, void *u)
#endif
if (sp < (unsigned long)sstack ||
sp >= (unsigned long)sstack + SIGSTKSZ) {
sp >= (unsigned long)sstack + stack_size) {
ksft_exit_fail_msg("SP is not on sigaltstack\n");
}
/* put some data on stack. other sighandler will try to overwrite it */
......@@ -108,6 +114,10 @@ int main(void)
stack_t stk;
int err;
/* Make sure more than the required minimum. */
stack_size = getauxval(AT_MINSIGSTKSZ) + SIGSTKSZ;
ksft_print_msg("[NOTE]\tthe stack size is %lu\n", stack_size);
ksft_print_header();
ksft_set_plan(3);
......@@ -117,7 +127,7 @@ int main(void)
sigaction(SIGUSR1, &act, NULL);
act.sa_sigaction = my_usr2;
sigaction(SIGUSR2, &act, NULL);
sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
sstack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
if (sstack == MAP_FAILED) {
ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
......@@ -139,7 +149,7 @@ int main(void)
}
stk.ss_sp = sstack;
stk.ss_size = SIGSTKSZ;
stk.ss_size = stack_size;
stk.ss_flags = SS_ONSTACK | SS_AUTODISARM;
err = sigaltstack(&stk, NULL);
if (err) {
......@@ -161,7 +171,7 @@ int main(void)
}
}
ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
ustack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
if (ustack == MAP_FAILED) {
ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
......@@ -170,7 +180,7 @@ int main(void)
getcontext(&uc);
uc.uc_link = NULL;
uc.uc_stack.ss_sp = ustack;
uc.uc_stack.ss_size = SIGSTKSZ;
uc.uc_stack.ss_size = stack_size;
makecontext(&uc, switch_fn, 0);
raise(SIGUSR1);
......
......@@ -13,11 +13,12 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl ioperm \
test_vsyscall mov_ss_trap \
syscall_arg_fault fsgsbase_restore
syscall_arg_fault fsgsbase_restore sigaltstack
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
corrupt_xstate_header
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* Corrupt the XSTATE header in a signal frame
*
* Based on analysis and a test case from Thomas Gleixner.
*/
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sched.h>
#include <signal.h>
#include <err.h>
#include <unistd.h>
#include <stdint.h>
#include <sys/wait.h>
static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
asm volatile(
"cpuid;"
: "=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (*eax), "2" (*ecx));
}
static inline int xsave_enabled(void)
{
unsigned int eax, ebx, ecx, edx;
eax = 0x1;
ecx = 0x0;
__cpuid(&eax, &ebx, &ecx, &edx);
/* Is CR4.OSXSAVE enabled ? */
return ecx & (1U << 27);
}
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_sigaction = handler;
sa.sa_flags = SA_SIGINFO | flags;
sigemptyset(&sa.sa_mask);
if (sigaction(sig, &sa, 0))
err(1, "sigaction");
}
static void sigusr1(int sig, siginfo_t *info, void *uc_void)
{
ucontext_t *uc = uc_void;
uint8_t *fpstate = (uint8_t *)uc->uc_mcontext.fpregs;
uint64_t *xfeatures = (uint64_t *)(fpstate + 512);
printf("\tWreck XSTATE header\n");
/* Wreck the first reserved bytes in the header */
*(xfeatures + 2) = 0xfffffff;
}
static void sigsegv(int sig, siginfo_t *info, void *uc_void)
{
printf("\tGot SIGSEGV\n");
}
int main(void)
{
cpu_set_t set;
sethandler(SIGUSR1, sigusr1, 0);
sethandler(SIGSEGV, sigsegv, 0);
if (!xsave_enabled()) {
printf("[SKIP] CR4.OSXSAVE disabled.\n");
return 0;
}
CPU_ZERO(&set);
CPU_SET(0, &set);
/*
* Enforce that the child runs on the same CPU
* which in turn forces a schedule.
*/
sched_setaffinity(getpid(), sizeof(set), &set);
printf("[RUN]\tSend ourselves a signal\n");
raise(SIGUSR1);
printf("[OK]\tBack from the signal. Now schedule.\n");
pid_t child = fork();
if (child < 0)
err(1, "fork");
if (child == 0)
return 0;
if (child)
waitpid(child, NULL, 0);
printf("[OK]\tBack in the main thread.\n");
/*
* We could try to confirm that extended state is still preserved
* when we schedule. For now, the only indication of failure is
* a warning in the kernel logs.
*/
return 0;
}
// SPDX-License-Identifier: GPL-2.0-only
#define _GNU_SOURCE
#include <signal.h>
#include <stdio.h>
#include <stdbool.h>
#include <string.h>
#include <err.h>
#include <errno.h>
#include <limits.h>
#include <sys/mman.h>
#include <sys/auxv.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <setjmp.h>
/* sigaltstack()-enforced minimum stack */
#define ENFORCED_MINSIGSTKSZ 2048
#ifndef AT_MINSIGSTKSZ
# define AT_MINSIGSTKSZ 51
#endif
static int nerrs;
static bool sigalrm_expected;
static unsigned long at_minstack_size;
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_sigaction = handler;
sa.sa_flags = SA_SIGINFO | flags;
sigemptyset(&sa.sa_mask);
if (sigaction(sig, &sa, 0))
err(1, "sigaction");
}
static void clearhandler(int sig)
{
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_handler = SIG_DFL;
sigemptyset(&sa.sa_mask);
if (sigaction(sig, &sa, 0))
err(1, "sigaction");
}
static int setup_altstack(void *start, unsigned long size)
{
stack_t ss;
memset(&ss, 0, sizeof(ss));
ss.ss_size = size;
ss.ss_sp = start;
return sigaltstack(&ss, NULL);
}
static jmp_buf jmpbuf;
static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
{
if (sigalrm_expected) {
printf("[FAIL]\tWrong signal delivered: SIGSEGV (expected SIGALRM).");
nerrs++;
} else {
printf("[OK]\tSIGSEGV signal delivered.\n");
}
siglongjmp(jmpbuf, 1);
}
static void sigalrm(int sig, siginfo_t *info, void *ctx_void)
{
if (!sigalrm_expected) {
printf("[FAIL]\tWrong signal delivered: SIGALRM (expected SIGSEGV).");
nerrs++;
} else {
printf("[OK]\tSIGALRM signal delivered.\n");
}
}
static void test_sigaltstack(void *altstack, unsigned long size)
{
if (setup_altstack(altstack, size))
err(1, "sigaltstack()");
sigalrm_expected = (size > at_minstack_size) ? true : false;
sethandler(SIGSEGV, sigsegv, 0);
sethandler(SIGALRM, sigalrm, SA_ONSTACK);
if (!sigsetjmp(jmpbuf, 1)) {
printf("[RUN]\tTest an alternate signal stack of %ssufficient size.\n",
sigalrm_expected ? "" : "in");
printf("\tRaise SIGALRM. %s is expected to be delivered.\n",
sigalrm_expected ? "It" : "SIGSEGV");
raise(SIGALRM);
}
clearhandler(SIGALRM);
clearhandler(SIGSEGV);
}
int main(void)
{
void *altstack;
at_minstack_size = getauxval(AT_MINSIGSTKSZ);
altstack = mmap(NULL, at_minstack_size + SIGSTKSZ, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
if (altstack == MAP_FAILED)
err(1, "mmap()");
if ((ENFORCED_MINSIGSTKSZ + 1) < at_minstack_size)
test_sigaltstack(altstack, ENFORCED_MINSIGSTKSZ + 1);
test_sigaltstack(altstack, at_minstack_size + SIGSTKSZ);
return nerrs == 0 ? 0 : 1;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment