Commit 51d2e09b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:
 "Fallout from the recent NMI fixes: make x86 LDT handling more robust.

  Also some EFI fixes"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/ldt: Make modify_ldt synchronous
  x86/xen: Probe target addresses in set_aliased_prot() before the hypercall
  x86/irq: Use the caller provided polarity setting in mp_check_pin_attr()
  efi: Check for NULL efi kernel parameters
  x86/efi: Use all 64 bit of efi_memmap in setup_e820()
parents 7c764cec 37868fe1
......@@ -1193,6 +1193,10 @@ static efi_status_t setup_e820(struct boot_params *params,
unsigned int e820_type = 0;
unsigned long m = efi->efi_memmap;
#ifdef CONFIG_X86_64
m |= (u64)efi->efi_memmap_hi << 32;
#endif
d = (efi_memory_desc_t *)(m + (i * efi->efi_memdesc_size));
switch (d->type) {
case EFI_RESERVED_TYPE:
......
......@@ -280,21 +280,6 @@ static inline void clear_LDT(void)
set_ldt(NULL, 0);
}
/*
* load one particular LDT into the current CPU
*/
static inline void load_LDT_nolock(mm_context_t *pc)
{
set_ldt(pc->ldt, pc->size);
}
static inline void load_LDT(mm_context_t *pc)
{
preempt_disable();
load_LDT_nolock(pc);
preempt_enable();
}
static inline unsigned long get_desc_base(const struct desc_struct *desc)
{
return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
......
......@@ -9,8 +9,7 @@
* we put the segment information here.
*/
typedef struct {
void *ldt;
int size;
struct ldt_struct *ldt;
#ifdef CONFIG_X86_64
/* True if mm supports a task running in 32 bit compatibility mode. */
......
......@@ -33,6 +33,50 @@ static inline void load_mm_cr4(struct mm_struct *mm)
static inline void load_mm_cr4(struct mm_struct *mm) {}
#endif
/*
* ldt_structs can be allocated, used, and freed, but they are never
* modified while live.
*/
struct ldt_struct {
/*
* Xen requires page-aligned LDTs with special permissions. This is
* needed to prevent us from installing evil descriptors such as
* call gates. On native, we could merge the ldt_struct and LDT
* allocations, but it's not worth trying to optimize.
*/
struct desc_struct *entries;
int size;
};
static inline void load_mm_ldt(struct mm_struct *mm)
{
struct ldt_struct *ldt;
/* lockless_dereference synchronizes with smp_store_release */
ldt = lockless_dereference(mm->context.ldt);
/*
* Any change to mm->context.ldt is followed by an IPI to all
* CPUs with the mm active. The LDT will not be freed until
* after the IPI is handled by all such CPUs. This means that,
* if the ldt_struct changes before we return, the values we see
* will be safe, and the new values will be loaded before we run
* any user code.
*
* NB: don't try to convert this to use RCU without extreme care.
* We would still need IRQs off, because we don't want to change
* the local LDT after an IPI loaded a newer value than the one
* that we can see.
*/
if (unlikely(ldt))
set_ldt(ldt->entries, ldt->size);
else
clear_LDT();
DEBUG_LOCKS_WARN_ON(preemptible());
}
/*
* Used for LDT copy/destruction.
*/
......@@ -78,12 +122,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
* was called and then modify_ldt changed
* prev->context.ldt but suppressed an IPI to this CPU.
* In this case, prev->context.ldt != NULL, because we
* never free an LDT while the mm still exists. That
* means that next->context.ldt != prev->context.ldt,
* because mms never share an LDT.
* never set context.ldt to NULL while the mm still
* exists. That means that next->context.ldt !=
* prev->context.ldt, because mms never share an LDT.
*/
if (unlikely(prev->context.ldt != next->context.ldt))
load_LDT_nolock(&next->context);
load_mm_ldt(next);
}
#ifdef CONFIG_SMP
else {
......@@ -106,7 +150,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
load_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
load_mm_cr4(next);
load_LDT_nolock(&next->context);
load_mm_ldt(next);
}
}
#endif
......
......@@ -943,7 +943,7 @@ static bool mp_check_pin_attr(int irq, struct irq_alloc_info *info)
*/
if (irq < nr_legacy_irqs() && data->count == 1) {
if (info->ioapic_trigger != data->trigger)
mp_register_handler(irq, data->trigger);
mp_register_handler(irq, info->ioapic_trigger);
data->entry.trigger = data->trigger = info->ioapic_trigger;
data->entry.polarity = data->polarity = info->ioapic_polarity;
}
......
......@@ -1410,7 +1410,7 @@ void cpu_init(void)
load_sp0(t, &current->thread);
set_tss_desc(cpu, t);
load_TR_desc();
load_LDT(&init_mm.context);
load_mm_ldt(&init_mm);
clear_all_debug_regs();
dbg_restore_debug_regs();
......@@ -1459,7 +1459,7 @@ void cpu_init(void)
load_sp0(t, thread);
set_tss_desc(cpu, t);
load_TR_desc();
load_LDT(&init_mm.context);
load_mm_ldt(&init_mm);
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
......
......@@ -2179,21 +2179,25 @@ static unsigned long get_segment_base(unsigned int segment)
int idx = segment >> 3;
if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
struct ldt_struct *ldt;
if (idx > LDT_ENTRIES)
return 0;
if (idx > current->active_mm->context.size)
/* IRQs are off, so this synchronizes with smp_store_release */
ldt = lockless_dereference(current->active_mm->context.ldt);
if (!ldt || idx > ldt->size)
return 0;
desc = current->active_mm->context.ldt;
desc = &ldt->entries[idx];
} else {
if (idx > GDT_ENTRIES)
return 0;
desc = raw_cpu_ptr(gdt_page.gdt);
desc = raw_cpu_ptr(gdt_page.gdt) + idx;
}
return get_desc_base(desc + idx);
return get_desc_base(desc);
}
#ifdef CONFIG_COMPAT
......
......@@ -12,6 +12,7 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
......@@ -20,82 +21,82 @@
#include <asm/mmu_context.h>
#include <asm/syscalls.h>
#ifdef CONFIG_SMP
/* context.lock is held for us, so we don't need any locking. */
static void flush_ldt(void *current_mm)
{
if (current->active_mm == current_mm)
load_LDT(&current->active_mm->context);
mm_context_t *pc;
if (current->active_mm != current_mm)
return;
pc = &current->active_mm->context;
set_ldt(pc->ldt->entries, pc->ldt->size);
}
#endif
static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
/* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
static struct ldt_struct *alloc_ldt_struct(int size)
{
void *oldldt, *newldt;
int oldsize;
if (mincount <= pc->size)
return 0;
oldsize = pc->size;
mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) &
(~(PAGE_SIZE / LDT_ENTRY_SIZE - 1));
if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE)
newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
struct ldt_struct *new_ldt;
int alloc_size;
if (size > LDT_ENTRIES)
return NULL;
new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
if (!new_ldt)
return NULL;
BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
alloc_size = size * LDT_ENTRY_SIZE;
/*
* Xen is very picky: it requires a page-aligned LDT that has no
* trailing nonzero bytes in any page that contains LDT descriptors.
* Keep it simple: zero the whole allocation and never allocate less
* than PAGE_SIZE.
*/
if (alloc_size > PAGE_SIZE)
new_ldt->entries = vzalloc(alloc_size);
else
newldt = (void *)__get_free_page(GFP_KERNEL);
if (!newldt)
return -ENOMEM;
new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (oldsize)
memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE);
oldldt = pc->ldt;
memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
(mincount - oldsize) * LDT_ENTRY_SIZE);
if (!new_ldt->entries) {
kfree(new_ldt);
return NULL;
}
paravirt_alloc_ldt(newldt, mincount);
new_ldt->size = size;
return new_ldt;
}
#ifdef CONFIG_X86_64
/* CHECKME: Do we really need this ? */
wmb();
#endif
pc->ldt = newldt;
wmb();
pc->size = mincount;
wmb();
if (reload) {
#ifdef CONFIG_SMP
preempt_disable();
load_LDT(pc);
if (!cpumask_equal(mm_cpumask(current->mm),
cpumask_of(smp_processor_id())))
smp_call_function(flush_ldt, current->mm, 1);
preempt_enable();
#else
load_LDT(pc);
#endif
}
if (oldsize) {
paravirt_free_ldt(oldldt, oldsize);
if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(oldldt);
else
put_page(virt_to_page(oldldt));
}
return 0;
/* After calling this, the LDT is immutable. */
static void finalize_ldt_struct(struct ldt_struct *ldt)
{
paravirt_alloc_ldt(ldt->entries, ldt->size);
}
static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
/* context.lock is held */
static void install_ldt(struct mm_struct *current_mm,
struct ldt_struct *ldt)
{
int err = alloc_ldt(new, old->size, 0);
int i;
/* Synchronizes with lockless_dereference in load_mm_ldt. */
smp_store_release(&current_mm->context.ldt, ldt);
/* Activate the LDT for all CPUs using current_mm. */
on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
}
if (err < 0)
return err;
static void free_ldt_struct(struct ldt_struct *ldt)
{
if (likely(!ldt))
return;
for (i = 0; i < old->size; i++)
write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
return 0;
paravirt_free_ldt(ldt->entries, ldt->size);
if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(ldt->entries);
else
kfree(ldt->entries);
kfree(ldt);
}
/*
......@@ -104,17 +105,37 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
*/
int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
struct ldt_struct *new_ldt;
struct mm_struct *old_mm;
int retval = 0;
mutex_init(&mm->context.lock);
mm->context.size = 0;
old_mm = current->mm;
if (old_mm && old_mm->context.size > 0) {
mutex_lock(&old_mm->context.lock);
retval = copy_ldt(&mm->context, &old_mm->context);
mutex_unlock(&old_mm->context.lock);
if (!old_mm) {
mm->context.ldt = NULL;
return 0;
}
mutex_lock(&old_mm->context.lock);
if (!old_mm->context.ldt) {
mm->context.ldt = NULL;
goto out_unlock;
}
new_ldt = alloc_ldt_struct(old_mm->context.ldt->size);
if (!new_ldt) {
retval = -ENOMEM;
goto out_unlock;
}
memcpy(new_ldt->entries, old_mm->context.ldt->entries,
new_ldt->size * LDT_ENTRY_SIZE);
finalize_ldt_struct(new_ldt);
mm->context.ldt = new_ldt;
out_unlock:
mutex_unlock(&old_mm->context.lock);
return retval;
}
......@@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
*/
void destroy_context(struct mm_struct *mm)
{
if (mm->context.size) {
#ifdef CONFIG_X86_32
/* CHECKME: Can this ever happen ? */
if (mm == current->active_mm)
clear_LDT();
#endif
paravirt_free_ldt(mm->context.ldt, mm->context.size);
if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(mm->context.ldt);
else
put_page(virt_to_page(mm->context.ldt));
mm->context.size = 0;
}
free_ldt_struct(mm->context.ldt);
mm->context.ldt = NULL;
}
static int read_ldt(void __user *ptr, unsigned long bytecount)
{
int err;
int retval;
unsigned long size;
struct mm_struct *mm = current->mm;
if (!mm->context.size)
return 0;
mutex_lock(&mm->context.lock);
if (!mm->context.ldt) {
retval = 0;
goto out_unlock;
}
if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
mutex_lock(&mm->context.lock);
size = mm->context.size * LDT_ENTRY_SIZE;
size = mm->context.ldt->size * LDT_ENTRY_SIZE;
if (size > bytecount)
size = bytecount;
err = 0;
if (copy_to_user(ptr, mm->context.ldt, size))
err = -EFAULT;
mutex_unlock(&mm->context.lock);
if (err < 0)
goto error_return;
if (copy_to_user(ptr, mm->context.ldt->entries, size)) {
retval = -EFAULT;
goto out_unlock;
}
if (size != bytecount) {
/* zero-fill the rest */
if (clear_user(ptr + size, bytecount - size) != 0) {
err = -EFAULT;
goto error_return;
/* Zero-fill the rest and pretend we read bytecount bytes. */
if (clear_user(ptr + size, bytecount - size)) {
retval = -EFAULT;
goto out_unlock;
}
}
return bytecount;
error_return:
return err;
retval = bytecount;
out_unlock:
mutex_unlock(&mm->context.lock);
return retval;
}
static int read_default_ldt(void __user *ptr, unsigned long bytecount)
......@@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
struct desc_struct ldt;
int error;
struct user_desc ldt_info;
int oldsize, newsize;
struct ldt_struct *new_ldt, *old_ldt;
error = -EINVAL;
if (bytecount != sizeof(ldt_info))
......@@ -213,34 +230,39 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
goto out;
}
mutex_lock(&mm->context.lock);
if (ldt_info.entry_number >= mm->context.size) {
error = alloc_ldt(&current->mm->context,
ldt_info.entry_number + 1, 1);
if (error < 0)
goto out_unlock;
}
/* Allow LDTs to be cleared by the user. */
if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
if (oldmode || LDT_empty(&ldt_info)) {
memset(&ldt, 0, sizeof(ldt));
goto install;
if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
LDT_empty(&ldt_info)) {
/* The user wants to clear the entry. */
memset(&ldt, 0, sizeof(ldt));
} else {
if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
error = -EINVAL;
goto out;
}
fill_ldt(&ldt, &ldt_info);
if (oldmode)
ldt.avl = 0;
}
if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
error = -EINVAL;
mutex_lock(&mm->context.lock);
old_ldt = mm->context.ldt;
oldsize = old_ldt ? old_ldt->size : 0;
newsize = max((int)(ldt_info.entry_number + 1), oldsize);
error = -ENOMEM;
new_ldt = alloc_ldt_struct(newsize);
if (!new_ldt)
goto out_unlock;
}
fill_ldt(&ldt, &ldt_info);
if (oldmode)
ldt.avl = 0;
if (old_ldt)
memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE);
new_ldt->entries[ldt_info.entry_number] = ldt;
finalize_ldt_struct(new_ldt);
/* Install the new entry ... */
install:
write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt);
install_ldt(mm, new_ldt);
free_ldt_struct(old_ldt);
error = 0;
out_unlock:
......
......@@ -121,11 +121,11 @@ void __show_regs(struct pt_regs *regs, int all)
void release_thread(struct task_struct *dead_task)
{
if (dead_task->mm) {
if (dead_task->mm->context.size) {
if (dead_task->mm->context.ldt) {
pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
dead_task->comm,
dead_task->mm->context.ldt,
dead_task->mm->context.size);
dead_task->mm->context.ldt->size);
BUG();
}
}
......
......@@ -5,6 +5,7 @@
#include <linux/mm.h>
#include <linux/ptrace.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
{
......@@ -30,10 +31,11 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
seg &= ~7UL;
mutex_lock(&child->mm->context.lock);
if (unlikely((seg >> 3) >= child->mm->context.size))
if (unlikely(!child->mm->context.ldt ||
(seg >> 3) >= child->mm->context.ldt->size))
addr = -1L; /* bogus selector, access would fault */
else {
desc = child->mm->context.ldt + seg;
desc = &child->mm->context.ldt->entries[seg];
base = get_desc_base(desc);
/* 16-bit code segment? */
......
......@@ -972,6 +972,11 @@ u64 efi_mem_attributes(unsigned long phys_addr)
static int __init arch_parse_efi_cmdline(char *str)
{
if (!str) {
pr_warn("need at least one option\n");
return -EINVAL;
}
if (parse_option_str(str, "old_map"))
set_bit(EFI_OLD_MEMMAP, &efi.flags);
if (parse_option_str(str, "debug"))
......
......@@ -22,6 +22,7 @@
#include <asm/fpu/internal.h>
#include <asm/debugreg.h>
#include <asm/cpu.h>
#include <asm/mmu_context.h>
#ifdef CONFIG_X86_32
__visible unsigned long saved_context_ebx;
......@@ -153,7 +154,7 @@ static void fix_processor_context(void)
syscall_init(); /* This sets MSR_*STAR and related */
#endif
load_TR_desc(); /* This does ltr */
load_LDT(&current->active_mm->context); /* This does lldt */
load_mm_ldt(current->active_mm); /* This does lldt */
fpu__resume_cpu();
}
......
......@@ -483,6 +483,7 @@ static void set_aliased_prot(void *v, pgprot_t prot)
pte_t pte;
unsigned long pfn;
struct page *page;
unsigned char dummy;
ptep = lookup_address((unsigned long)v, &level);
BUG_ON(ptep == NULL);
......@@ -492,6 +493,32 @@ static void set_aliased_prot(void *v, pgprot_t prot)
pte = pfn_pte(pfn, prot);
/*
* Careful: update_va_mapping() will fail if the virtual address
* we're poking isn't populated in the page tables. We don't
* need to worry about the direct map (that's always in the page
* tables), but we need to be careful about vmap space. In
* particular, the top level page table can lazily propagate
* entries between processes, so if we've switched mms since we
* vmapped the target in the first place, we might not have the
* top-level page table entry populated.
*
* We disable preemption because we want the same mm active when
* we probe the target and when we issue the hypercall. We'll
* have the same nominal mm, but if we're a kernel thread, lazy
* mm dropping could change our pgd.
*
* Out of an abundance of caution, this uses __get_user() to fault
* in the target address just in case there's some obscure case
* in which the target address isn't readable.
*/
preempt_disable();
pagefault_disable(); /* Avoid warnings due to being atomic. */
__get_user(dummy, (unsigned char __user __force *)v);
pagefault_enable();
if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
BUG();
......@@ -503,6 +530,8 @@ static void set_aliased_prot(void *v, pgprot_t prot)
BUG();
} else
kmap_flush_unused();
preempt_enable();
}
static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
......@@ -510,6 +539,17 @@ static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
int i;
/*
* We need to mark the all aliases of the LDT pages RO. We
* don't need to call vm_flush_aliases(), though, since that's
* only responsible for flushing aliases out the TLBs, not the
* page tables, and Xen will flush the TLB for us if needed.
*
* To avoid confusing future readers: none of this is necessary
* to load the LDT. The hypervisor only checks this when the
* LDT is faulted in due to subsequent descriptor access.
*/
for(i = 0; i < entries; i += entries_per_page)
set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
}
......
......@@ -58,6 +58,11 @@ bool efi_runtime_disabled(void)
static int __init parse_efi_cmdline(char *str)
{
if (!str) {
pr_warn("need at least one option\n");
return -EINVAL;
}
if (parse_option_str(str, "noruntime"))
disable_runtime = true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment