Commit ae821d21 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86_mm_for_v5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 mm cleanups from Borislav Petkov:

 - PTRACE_GETREGS/PTRACE_PUTREGS regset selection cleanup

 - Another initial cleanup - more to follow - to the fault handling
   code.

 - Other minor cleanups and corrections.

* tag 'x86_mm_for_v5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits)
  x86/{fault,efi}: Fix and rename efi_recover_from_page_fault()
  x86/fault: Don't run fixups for SMAP violations
  x86/fault: Don't look for extable entries for SMEP violations
  x86/fault: Rename no_context() to kernelmode_fixup_or_oops()
  x86/fault: Bypass no_context() for implicit kernel faults from usermode
  x86/fault: Split the OOPS code out from no_context()
  x86/fault: Improve kernel-executing-user-memory handling
  x86/fault: Correct a few user vs kernel checks wrt WRUSS
  x86/fault: Document the locking in the fault_signal_pending() path
  x86/fault/32: Move is_f00f_bug() to do_kern_addr_fault()
  x86/fault: Fold mm_fault_error() into do_user_addr_fault()
  x86/fault: Skip the AMD erratum #91 workaround on unaffected CPUs
  x86/fault: Fix AMD erratum #91 errata fixup for user code
  x86/Kconfig: Remove HPET_EMULATE_RTC depends on RTC
  x86/asm: Fixup TASK_SIZE_MAX comment
  x86/ptrace: Clean up PTRACE_GETREGS/PTRACE_PUTREGS regset selection
  x86/vm86/32: Remove VM86_SCREEN_BITMAP support
  x86: Remove definition of DEBUG
  x86/entry: Remove now unused do_IRQ() declaration
  x86/mm: Remove duplicate definition of _PAGE_PAT_LARGE
  ...
parents 1255f440 40c1fa52
......@@ -890,7 +890,7 @@ config HPET_TIMER
config HPET_EMULATE_RTC
def_bool y
depends on HPET_TIMER && (RTC=y || RTC=m || RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
depends on HPET_TIMER && (RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
config APB_TIMER
def_bool y if X86_INTEL_MID
......
......@@ -139,7 +139,7 @@ extern void __init efi_dump_pagetable(void);
extern void __init efi_apply_memmap_quirks(void);
extern int __init efi_reuse_config(u64 tables, int nr_tables);
extern void efi_delete_dummy_variable(void);
extern void efi_recover_from_page_fault(unsigned long phys_addr);
extern void efi_crash_gracefully_on_page_fault(unsigned long phys_addr);
extern void efi_free_boot_services(void);
void efi_enter_mm(void);
......
......@@ -40,8 +40,6 @@ extern void native_init_IRQ(void);
extern void __handle_irq(struct irq_desc *desc, struct pt_regs *regs);
extern __visible void do_IRQ(struct pt_regs *regs, unsigned long vector);
extern void init_ISA_irqs(void);
extern void __init init_IRQ(void);
......
......@@ -66,7 +66,7 @@
* On Intel CPUs, if a SYSCALL instruction is at the highest canonical
* address, then that syscall will enter the kernel with a
* non-canonical return address, and SYSRET will explode dangerously.
* We avoid this particular problem by preventing anything executable
* We avoid this particular problem by preventing anything
* from being mapped at the maximum canonical address.
*
* On AMD CPUs in the Ryzen family, there's a nasty bug in which the
......
......@@ -177,8 +177,6 @@ enum page_cache_mode {
#define __pgprot(x) ((pgprot_t) { (x) } )
#define __pg(x) __pgprot(x)
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
#define PAGE_NONE __pg( 0| 0| 0|___A| 0| 0| 0|___G)
#define PAGE_SHARED __pg(__PP|__RW|_USR|___A|__NX| 0| 0| 0)
#define PAGE_SHARED_EXEC __pg(__PP|__RW|_USR|___A| 0| 0| 0| 0)
......
......@@ -36,7 +36,6 @@ struct vm86 {
unsigned long saved_sp0;
unsigned long flags;
unsigned long screen_bitmap;
unsigned long cpu_type;
struct revectored_struct int_revectored;
struct revectored_struct int21_revectored;
......
......@@ -97,7 +97,7 @@ struct revectored_struct {
struct vm86_struct {
struct vm86_regs regs;
unsigned long flags;
unsigned long screen_bitmap;
unsigned long screen_bitmap; /* unused, preserved by vm86() */
unsigned long cpu_type;
struct revectored_struct int_revectored;
struct revectored_struct int21_revectored;
......@@ -106,7 +106,7 @@ struct vm86_struct {
/*
* flags masks
*/
#define VM86_SCREEN_BITMAP 0x0001
#define VM86_SCREEN_BITMAP 0x0001 /* no longer supported */
struct vm86plus_info_struct {
unsigned long force_return_for_pic:1;
......
......@@ -537,9 +537,9 @@ static void __init print_out_mtrr_range_state(void)
if (!size_base)
continue;
size_base = to_size_factor(size_base, &size_factor),
size_base = to_size_factor(size_base, &size_factor);
start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
start_base = to_size_factor(start_base, &start_factor),
start_base = to_size_factor(start_base, &start_factor);
type = range_state[i].type;
pr_debug("reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
......
......@@ -3,7 +3,6 @@
* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
* because MTRRs can span up to 40 bits (36bits on most modern x86)
*/
#define DEBUG
#include <linux/export.h>
#include <linux/init.h>
......
......@@ -31,8 +31,6 @@
System Programming Guide; Section 9.11. (1997 edition - PPro).
*/
#define DEBUG
#include <linux/types.h> /* FIXME: kvm_para.h needs this */
#include <linux/stop_machine.h>
......
......@@ -4,9 +4,6 @@
#include <linux/string.h>
#include <linux/kallsyms.h>
#define DEBUG 1
static struct iommu_table_entry * __init
find_dependents_of(struct iommu_table_entry *start,
struct iommu_table_entry *finish,
......
......@@ -704,6 +704,9 @@ void ptrace_disable(struct task_struct *child)
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
static const struct user_regset_view user_x86_32_view; /* Initialized below. */
#endif
#ifdef CONFIG_X86_64
static const struct user_regset_view user_x86_64_view; /* Initialized below. */
#endif
long arch_ptrace(struct task_struct *child, long request,
unsigned long addr, unsigned long data)
......@@ -711,6 +714,14 @@ long arch_ptrace(struct task_struct *child, long request,
int ret;
unsigned long __user *datap = (unsigned long __user *)data;
#ifdef CONFIG_X86_64
/* This is native 64-bit ptrace() */
const struct user_regset_view *regset_view = &user_x86_64_view;
#else
/* This is native 32-bit ptrace() */
const struct user_regset_view *regset_view = &user_x86_32_view;
#endif
switch (request) {
/* read the word at location addr in the USER area. */
case PTRACE_PEEKUSR: {
......@@ -749,28 +760,28 @@ long arch_ptrace(struct task_struct *child, long request,
case PTRACE_GETREGS: /* Get all gp regs from the child. */
return copy_regset_to_user(child,
task_user_regset_view(current),
regset_view,
REGSET_GENERAL,
0, sizeof(struct user_regs_struct),
datap);
case PTRACE_SETREGS: /* Set all gp regs in the child. */
return copy_regset_from_user(child,
task_user_regset_view(current),
regset_view,
REGSET_GENERAL,
0, sizeof(struct user_regs_struct),
datap);
case PTRACE_GETFPREGS: /* Get the child FPU state. */
return copy_regset_to_user(child,
task_user_regset_view(current),
regset_view,
REGSET_FP,
0, sizeof(struct user_i387_struct),
datap);
case PTRACE_SETFPREGS: /* Set the child FPU state. */
return copy_regset_from_user(child,
task_user_regset_view(current),
regset_view,
REGSET_FP,
0, sizeof(struct user_i387_struct),
datap);
......@@ -1152,28 +1163,28 @@ static long x32_arch_ptrace(struct task_struct *child,
case PTRACE_GETREGS: /* Get all gp regs from the child. */
return copy_regset_to_user(child,
task_user_regset_view(current),
&user_x86_64_view,
REGSET_GENERAL,
0, sizeof(struct user_regs_struct),
datap);
case PTRACE_SETREGS: /* Set all gp regs in the child. */
return copy_regset_from_user(child,
task_user_regset_view(current),
&user_x86_64_view,
REGSET_GENERAL,
0, sizeof(struct user_regs_struct),
datap);
case PTRACE_GETFPREGS: /* Get the child FPU state. */
return copy_regset_to_user(child,
task_user_regset_view(current),
&user_x86_64_view,
REGSET_FP,
0, sizeof(struct user_i387_struct),
datap);
case PTRACE_SETFPREGS: /* Set the child FPU state. */
return copy_regset_from_user(child,
task_user_regset_view(current),
&user_x86_64_view,
REGSET_FP,
0, sizeof(struct user_i387_struct),
datap);
......@@ -1309,6 +1320,25 @@ void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask)
xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask;
}
/*
* This is used by the core dump code to decide which regset to dump. The
* core dump code writes out the resulting .e_machine and the corresponding
* regsets. This is suboptimal if the task is messing around with its CS.L
* field, but at worst the core dump will end up missing some information.
*
* Unfortunately, it is also used by the broken PTRACE_GETREGSET and
* PTRACE_SETREGSET APIs. These APIs look at the .regsets field but have
* no way to make sure that the e_machine they use matches the caller's
* expectations. The result is that the data format returned by
* PTRACE_GETREGSET depends on the returned CS field (and even the offset
* of the returned CS field depends on its value!) and the data format
* accepted by PTRACE_SETREGSET is determined by the old CS value. The
* upshot is that it is basically impossible to use these APIs correctly.
*
* The best way to fix it in the long run would probably be to add new
* improved ptrace() APIs to read and write registers reliably, possibly by
* allowing userspace to select the ELF e_machine variant that they expect.
*/
const struct user_regset_view *task_user_regset_view(struct task_struct *task)
{
#ifdef CONFIG_IA32_EMULATION
......
......@@ -90,14 +90,10 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
unsigned long, fd, unsigned long, off)
{
long error;
error = -EINVAL;
if (off & ~PAGE_MASK)
goto out;
return -EINVAL;
error = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
out:
return error;
return ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
}
static void find_start_end(unsigned long addr, unsigned long flags,
......
......@@ -134,7 +134,11 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
unsafe_put_user(regs->ds, &user->regs.ds, Efault_end);
unsafe_put_user(regs->fs, &user->regs.fs, Efault_end);
unsafe_put_user(regs->gs, &user->regs.gs, Efault_end);
unsafe_put_user(vm86->screen_bitmap, &user->screen_bitmap, Efault_end);
/*
* Don't write screen_bitmap in case some user had a value there
* and expected it to remain unchanged.
*/
user_access_end();
......@@ -160,49 +164,6 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
do_exit(SIGSEGV);
}
static void mark_screen_rdonly(struct mm_struct *mm)
{
struct vm_area_struct *vma;
spinlock_t *ptl;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
int i;
mmap_write_lock(mm);
pgd = pgd_offset(mm, 0xA0000);
if (pgd_none_or_clear_bad(pgd))
goto out;
p4d = p4d_offset(pgd, 0xA0000);
if (p4d_none_or_clear_bad(p4d))
goto out;
pud = pud_offset(p4d, 0xA0000);
if (pud_none_or_clear_bad(pud))
goto out;
pmd = pmd_offset(pud, 0xA0000);
if (pmd_trans_huge(*pmd)) {
vma = find_vma(mm, 0xA0000);
split_huge_pmd(vma, pmd, 0xA0000);
}
if (pmd_none_or_clear_bad(pmd))
goto out;
pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
for (i = 0; i < 32; i++) {
if (pte_present(*pte))
set_pte(pte, pte_wrprotect(*pte));
pte++;
}
pte_unmap_unlock(pte, ptl);
out:
mmap_write_unlock(mm);
flush_tlb_mm_range(mm, 0xA0000, 0xA0000 + 32*PAGE_SIZE, PAGE_SHIFT, false);
}
static int do_vm86_irq_handling(int subfunction, int irqnumber);
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus);
......@@ -282,6 +243,15 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
offsetof(struct vm86_struct, int_revectored)))
return -EFAULT;
/* VM86_SCREEN_BITMAP had numerous bugs and appears to have no users. */
if (v.flags & VM86_SCREEN_BITMAP) {
char comm[TASK_COMM_LEN];
pr_info_once("vm86: '%s' uses VM86_SCREEN_BITMAP, which is no longer supported\n", get_task_comm(comm, current));
return -EINVAL;
}
memset(&vm86regs, 0, sizeof(vm86regs));
vm86regs.pt.bx = v.regs.ebx;
......@@ -302,7 +272,6 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
vm86regs.gs = v.regs.gs;
vm86->flags = v.flags;
vm86->screen_bitmap = v.screen_bitmap;
vm86->cpu_type = v.cpu_type;
if (copy_from_user(&vm86->int_revectored,
......@@ -370,9 +339,6 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
update_task_stack(tsk);
preempt_enable();
if (vm86->flags & VM86_SCREEN_BITMAP)
mark_screen_rdonly(tsk->mm);
memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs));
return regs->ax;
}
......
This diff is collapsed.
......@@ -157,16 +157,25 @@ __ref void *alloc_low_pages(unsigned int num)
}
/*
* By default need 3 4k for initial PMD_SIZE, 3 4k for 0-ISA_END_ADDRESS.
* With KASLR memory randomization, depending on the machine e820 memory
* and the PUD alignment. We may need twice more pages when KASLR memory
* By default need to be able to allocate page tables below PGD firstly for
* the 0-ISA_END_ADDRESS range and secondly for the initial PMD_SIZE mapping.
* With KASLR memory randomization, depending on the machine e820 memory and the
* PUD alignment, twice that many pages may be needed when KASLR memory
* randomization is enabled.
*/
#ifndef CONFIG_X86_5LEVEL
#define INIT_PGD_PAGE_TABLES 3
#else
#define INIT_PGD_PAGE_TABLES 4
#endif
#ifndef CONFIG_RANDOMIZE_MEMORY
#define INIT_PGD_PAGE_COUNT 6
#define INIT_PGD_PAGE_COUNT (2 * INIT_PGD_PAGE_TABLES)
#else
#define INIT_PGD_PAGE_COUNT 12
#define INIT_PGD_PAGE_COUNT (4 * INIT_PGD_PAGE_TABLES)
#endif
#define INIT_PGT_BUF_SIZE (INIT_PGD_PAGE_COUNT * PAGE_SIZE)
RESERVE_BRK(early_pgt_alloc, INIT_PGT_BUF_SIZE);
void __init early_alloc_pgt_buf(void)
......
......@@ -10,8 +10,6 @@
#define pr_fmt(fmt) "mmiotrace: " fmt
#define DEBUG 1
#include <linux/moduleparam.h>
#include <linux/debugfs.h>
#include <linux/slab.h>
......
......@@ -687,15 +687,25 @@ int efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
* @return: Returns, if the page fault is not handled. This function
* will never return if the page fault is handled successfully.
*/
void efi_recover_from_page_fault(unsigned long phys_addr)
void efi_crash_gracefully_on_page_fault(unsigned long phys_addr)
{
if (!IS_ENABLED(CONFIG_X86_64))
return;
/*
* If we get an interrupt/NMI while processing an EFI runtime service
* then this is a regular OOPS, not an EFI failure.
*/
if (in_interrupt())
return;
/*
* Make sure that an efi runtime service caused the page fault.
* READ_ONCE() because we might be OOPSing in a different thread,
* and we don't want to trip KTSAN while trying to OOPS.
*/
if (efi_rts_work.efi_rts_id == EFI_NONE)
if (READ_ONCE(efi_rts_work.efi_rts_id) == EFI_NONE ||
current_work() != &efi_rts_work.work)
return;
/*
......@@ -747,6 +757,4 @@ void efi_recover_from_page_fault(unsigned long phys_addr)
set_current_state(TASK_IDLE);
schedule();
}
return;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment