Commit 37868fe1 authored by Andy Lutomirski's avatar Andy Lutomirski Committed by Ingo Molnar

x86/ldt: Make modify_ldt synchronous

modify_ldt() has questionable locking and does not synchronize
threads.  Improve it: redesign the locking and synchronize all
threads' LDTs using an IPI on all modifications.

This will dramatically slow down modify_ldt in multithreaded
programs, but there shouldn't be any multithreaded programs that
care about modify_ldt's performance in the first place.

This fixes some fallout from the CVE-2015-5157 fixes.
Signed-off-by: default avatarAndy Lutomirski <luto@kernel.org>
Reviewed-by: default avatarBorislav Petkov <bp@suse.de>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: security@kernel.org <security@kernel.org>
Cc: <stable@vger.kernel.org>
Cc: xen-devel <xen-devel@lists.xen.org>
Link: http://lkml.kernel.org/r/4c6978476782160600471bd865b318db34c7b628.1438291540.git.luto@kernel.orgSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent aa1acff3
...@@ -280,21 +280,6 @@ static inline void clear_LDT(void) ...@@ -280,21 +280,6 @@ static inline void clear_LDT(void)
set_ldt(NULL, 0); set_ldt(NULL, 0);
} }
/*
* load one particular LDT into the current CPU
*/
static inline void load_LDT_nolock(mm_context_t *pc)
{
set_ldt(pc->ldt, pc->size);
}
static inline void load_LDT(mm_context_t *pc)
{
preempt_disable();
load_LDT_nolock(pc);
preempt_enable();
}
static inline unsigned long get_desc_base(const struct desc_struct *desc) static inline unsigned long get_desc_base(const struct desc_struct *desc)
{ {
return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
......
...@@ -9,8 +9,7 @@ ...@@ -9,8 +9,7 @@
* we put the segment information here. * we put the segment information here.
*/ */
typedef struct { typedef struct {
void *ldt; struct ldt_struct *ldt;
int size;
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* True if mm supports a task running in 32 bit compatibility mode. */ /* True if mm supports a task running in 32 bit compatibility mode. */
......
...@@ -33,6 +33,50 @@ static inline void load_mm_cr4(struct mm_struct *mm) ...@@ -33,6 +33,50 @@ static inline void load_mm_cr4(struct mm_struct *mm)
static inline void load_mm_cr4(struct mm_struct *mm) {} static inline void load_mm_cr4(struct mm_struct *mm) {}
#endif #endif
/*
* ldt_structs can be allocated, used, and freed, but they are never
* modified while live.
*/
struct ldt_struct {
/*
* Xen requires page-aligned LDTs with special permissions. This is
* needed to prevent us from installing evil descriptors such as
* call gates. On native, we could merge the ldt_struct and LDT
* allocations, but it's not worth trying to optimize.
*/
struct desc_struct *entries;
int size;
};
static inline void load_mm_ldt(struct mm_struct *mm)
{
struct ldt_struct *ldt;
/* lockless_dereference synchronizes with smp_store_release */
ldt = lockless_dereference(mm->context.ldt);
/*
* Any change to mm->context.ldt is followed by an IPI to all
* CPUs with the mm active. The LDT will not be freed until
* after the IPI is handled by all such CPUs. This means that,
* if the ldt_struct changes before we return, the values we see
* will be safe, and the new values will be loaded before we run
* any user code.
*
* NB: don't try to convert this to use RCU without extreme care.
* We would still need IRQs off, because we don't want to change
* the local LDT after an IPI loaded a newer value than the one
* that we can see.
*/
if (unlikely(ldt))
set_ldt(ldt->entries, ldt->size);
else
clear_LDT();
DEBUG_LOCKS_WARN_ON(preemptible());
}
/* /*
* Used for LDT copy/destruction. * Used for LDT copy/destruction.
*/ */
...@@ -78,12 +122,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, ...@@ -78,12 +122,12 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
* was called and then modify_ldt changed * was called and then modify_ldt changed
* prev->context.ldt but suppressed an IPI to this CPU. * prev->context.ldt but suppressed an IPI to this CPU.
* In this case, prev->context.ldt != NULL, because we * In this case, prev->context.ldt != NULL, because we
* never free an LDT while the mm still exists. That * never set context.ldt to NULL while the mm still
* means that next->context.ldt != prev->context.ldt, * exists. That means that next->context.ldt !=
* because mms never share an LDT. * prev->context.ldt, because mms never share an LDT.
*/ */
if (unlikely(prev->context.ldt != next->context.ldt)) if (unlikely(prev->context.ldt != next->context.ldt))
load_LDT_nolock(&next->context); load_mm_ldt(next);
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
else { else {
...@@ -106,7 +150,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, ...@@ -106,7 +150,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
load_cr3(next->pgd); load_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
load_mm_cr4(next); load_mm_cr4(next);
load_LDT_nolock(&next->context); load_mm_ldt(next);
} }
} }
#endif #endif
......
...@@ -1410,7 +1410,7 @@ void cpu_init(void) ...@@ -1410,7 +1410,7 @@ void cpu_init(void)
load_sp0(t, &current->thread); load_sp0(t, &current->thread);
set_tss_desc(cpu, t); set_tss_desc(cpu, t);
load_TR_desc(); load_TR_desc();
load_LDT(&init_mm.context); load_mm_ldt(&init_mm);
clear_all_debug_regs(); clear_all_debug_regs();
dbg_restore_debug_regs(); dbg_restore_debug_regs();
...@@ -1459,7 +1459,7 @@ void cpu_init(void) ...@@ -1459,7 +1459,7 @@ void cpu_init(void)
load_sp0(t, thread); load_sp0(t, thread);
set_tss_desc(cpu, t); set_tss_desc(cpu, t);
load_TR_desc(); load_TR_desc();
load_LDT(&init_mm.context); load_mm_ldt(&init_mm);
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
......
...@@ -2179,21 +2179,25 @@ static unsigned long get_segment_base(unsigned int segment) ...@@ -2179,21 +2179,25 @@ static unsigned long get_segment_base(unsigned int segment)
int idx = segment >> 3; int idx = segment >> 3;
if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) { if ((segment & SEGMENT_TI_MASK) == SEGMENT_LDT) {
struct ldt_struct *ldt;
if (idx > LDT_ENTRIES) if (idx > LDT_ENTRIES)
return 0; return 0;
if (idx > current->active_mm->context.size) /* IRQs are off, so this synchronizes with smp_store_release */
ldt = lockless_dereference(current->active_mm->context.ldt);
if (!ldt || idx > ldt->size)
return 0; return 0;
desc = current->active_mm->context.ldt; desc = &ldt->entries[idx];
} else { } else {
if (idx > GDT_ENTRIES) if (idx > GDT_ENTRIES)
return 0; return 0;
desc = raw_cpu_ptr(gdt_page.gdt); desc = raw_cpu_ptr(gdt_page.gdt) + idx;
} }
return get_desc_base(desc + idx); return get_desc_base(desc);
} }
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/string.h> #include <linux/string.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/slab.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
...@@ -20,82 +21,82 @@ ...@@ -20,82 +21,82 @@
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
#include <asm/syscalls.h> #include <asm/syscalls.h>
#ifdef CONFIG_SMP /* context.lock is held for us, so we don't need any locking. */
static void flush_ldt(void *current_mm) static void flush_ldt(void *current_mm)
{ {
if (current->active_mm == current_mm) mm_context_t *pc;
load_LDT(&current->active_mm->context);
if (current->active_mm != current_mm)
return;
pc = &current->active_mm->context;
set_ldt(pc->ldt->entries, pc->ldt->size);
} }
#endif
static int alloc_ldt(mm_context_t *pc, int mincount, int reload) /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
static struct ldt_struct *alloc_ldt_struct(int size)
{ {
void *oldldt, *newldt; struct ldt_struct *new_ldt;
int oldsize; int alloc_size;
if (mincount <= pc->size) if (size > LDT_ENTRIES)
return 0; return NULL;
oldsize = pc->size;
mincount = (mincount + (PAGE_SIZE / LDT_ENTRY_SIZE - 1)) & new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
(~(PAGE_SIZE / LDT_ENTRY_SIZE - 1)); if (!new_ldt)
if (mincount * LDT_ENTRY_SIZE > PAGE_SIZE) return NULL;
newldt = vmalloc(mincount * LDT_ENTRY_SIZE);
BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
alloc_size = size * LDT_ENTRY_SIZE;
/*
* Xen is very picky: it requires a page-aligned LDT that has no
* trailing nonzero bytes in any page that contains LDT descriptors.
* Keep it simple: zero the whole allocation and never allocate less
* than PAGE_SIZE.
*/
if (alloc_size > PAGE_SIZE)
new_ldt->entries = vzalloc(alloc_size);
else else
newldt = (void *)__get_free_page(GFP_KERNEL); new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!newldt)
return -ENOMEM;
if (oldsize) if (!new_ldt->entries) {
memcpy(newldt, pc->ldt, oldsize * LDT_ENTRY_SIZE); kfree(new_ldt);
oldldt = pc->ldt; return NULL;
memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, }
(mincount - oldsize) * LDT_ENTRY_SIZE);
paravirt_alloc_ldt(newldt, mincount); new_ldt->size = size;
return new_ldt;
}
#ifdef CONFIG_X86_64 /* After calling this, the LDT is immutable. */
/* CHECKME: Do we really need this ? */ static void finalize_ldt_struct(struct ldt_struct *ldt)
wmb(); {
#endif paravirt_alloc_ldt(ldt->entries, ldt->size);
pc->ldt = newldt;
wmb();
pc->size = mincount;
wmb();
if (reload) {
#ifdef CONFIG_SMP
preempt_disable();
load_LDT(pc);
if (!cpumask_equal(mm_cpumask(current->mm),
cpumask_of(smp_processor_id())))
smp_call_function(flush_ldt, current->mm, 1);
preempt_enable();
#else
load_LDT(pc);
#endif
}
if (oldsize) {
paravirt_free_ldt(oldldt, oldsize);
if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(oldldt);
else
put_page(virt_to_page(oldldt));
}
return 0;
} }
static inline int copy_ldt(mm_context_t *new, mm_context_t *old) /* context.lock is held */
static void install_ldt(struct mm_struct *current_mm,
struct ldt_struct *ldt)
{ {
int err = alloc_ldt(new, old->size, 0); /* Synchronizes with lockless_dereference in load_mm_ldt. */
int i; smp_store_release(&current_mm->context.ldt, ldt);
/* Activate the LDT for all CPUs using current_mm. */
on_each_cpu_mask(mm_cpumask(current_mm), flush_ldt, current_mm, true);
}
if (err < 0) static void free_ldt_struct(struct ldt_struct *ldt)
return err; {
if (likely(!ldt))
return;
for (i = 0; i < old->size; i++) paravirt_free_ldt(ldt->entries, ldt->size);
write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
return 0; vfree(ldt->entries);
else
kfree(ldt->entries);
kfree(ldt);
} }
/* /*
...@@ -104,17 +105,37 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) ...@@ -104,17 +105,37 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
*/ */
int init_new_context(struct task_struct *tsk, struct mm_struct *mm) int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{ {
struct ldt_struct *new_ldt;
struct mm_struct *old_mm; struct mm_struct *old_mm;
int retval = 0; int retval = 0;
mutex_init(&mm->context.lock); mutex_init(&mm->context.lock);
mm->context.size = 0;
old_mm = current->mm; old_mm = current->mm;
if (old_mm && old_mm->context.size > 0) { if (!old_mm) {
mutex_lock(&old_mm->context.lock); mm->context.ldt = NULL;
retval = copy_ldt(&mm->context, &old_mm->context); return 0;
mutex_unlock(&old_mm->context.lock);
} }
mutex_lock(&old_mm->context.lock);
if (!old_mm->context.ldt) {
mm->context.ldt = NULL;
goto out_unlock;
}
new_ldt = alloc_ldt_struct(old_mm->context.ldt->size);
if (!new_ldt) {
retval = -ENOMEM;
goto out_unlock;
}
memcpy(new_ldt->entries, old_mm->context.ldt->entries,
new_ldt->size * LDT_ENTRY_SIZE);
finalize_ldt_struct(new_ldt);
mm->context.ldt = new_ldt;
out_unlock:
mutex_unlock(&old_mm->context.lock);
return retval; return retval;
} }
...@@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) ...@@ -125,53 +146,47 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
*/ */
void destroy_context(struct mm_struct *mm) void destroy_context(struct mm_struct *mm)
{ {
if (mm->context.size) { free_ldt_struct(mm->context.ldt);
#ifdef CONFIG_X86_32 mm->context.ldt = NULL;
/* CHECKME: Can this ever happen ? */
if (mm == current->active_mm)
clear_LDT();
#endif
paravirt_free_ldt(mm->context.ldt, mm->context.size);
if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(mm->context.ldt);
else
put_page(virt_to_page(mm->context.ldt));
mm->context.size = 0;
}
} }
static int read_ldt(void __user *ptr, unsigned long bytecount) static int read_ldt(void __user *ptr, unsigned long bytecount)
{ {
int err; int retval;
unsigned long size; unsigned long size;
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
if (!mm->context.size) mutex_lock(&mm->context.lock);
return 0;
if (!mm->context.ldt) {
retval = 0;
goto out_unlock;
}
if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES) if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES; bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
mutex_lock(&mm->context.lock); size = mm->context.ldt->size * LDT_ENTRY_SIZE;
size = mm->context.size * LDT_ENTRY_SIZE;
if (size > bytecount) if (size > bytecount)
size = bytecount; size = bytecount;
err = 0; if (copy_to_user(ptr, mm->context.ldt->entries, size)) {
if (copy_to_user(ptr, mm->context.ldt, size)) retval = -EFAULT;
err = -EFAULT; goto out_unlock;
mutex_unlock(&mm->context.lock); }
if (err < 0)
goto error_return;
if (size != bytecount) { if (size != bytecount) {
/* zero-fill the rest */ /* Zero-fill the rest and pretend we read bytecount bytes. */
if (clear_user(ptr + size, bytecount - size) != 0) { if (clear_user(ptr + size, bytecount - size)) {
err = -EFAULT; retval = -EFAULT;
goto error_return; goto out_unlock;
} }
} }
return bytecount; retval = bytecount;
error_return:
return err; out_unlock:
mutex_unlock(&mm->context.lock);
return retval;
} }
static int read_default_ldt(void __user *ptr, unsigned long bytecount) static int read_default_ldt(void __user *ptr, unsigned long bytecount)
...@@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) ...@@ -195,6 +210,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
struct desc_struct ldt; struct desc_struct ldt;
int error; int error;
struct user_desc ldt_info; struct user_desc ldt_info;
int oldsize, newsize;
struct ldt_struct *new_ldt, *old_ldt;
error = -EINVAL; error = -EINVAL;
if (bytecount != sizeof(ldt_info)) if (bytecount != sizeof(ldt_info))
...@@ -213,34 +230,39 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) ...@@ -213,34 +230,39 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
goto out; goto out;
} }
mutex_lock(&mm->context.lock); if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
if (ldt_info.entry_number >= mm->context.size) { LDT_empty(&ldt_info)) {
error = alloc_ldt(&current->mm->context, /* The user wants to clear the entry. */
ldt_info.entry_number + 1, 1); memset(&ldt, 0, sizeof(ldt));
if (error < 0) } else {
goto out_unlock; if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
} error = -EINVAL;
goto out;
/* Allow LDTs to be cleared by the user. */
if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
if (oldmode || LDT_empty(&ldt_info)) {
memset(&ldt, 0, sizeof(ldt));
goto install;
} }
fill_ldt(&ldt, &ldt_info);
if (oldmode)
ldt.avl = 0;
} }
if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) { mutex_lock(&mm->context.lock);
error = -EINVAL;
old_ldt = mm->context.ldt;
oldsize = old_ldt ? old_ldt->size : 0;
newsize = max((int)(ldt_info.entry_number + 1), oldsize);
error = -ENOMEM;
new_ldt = alloc_ldt_struct(newsize);
if (!new_ldt)
goto out_unlock; goto out_unlock;
}
fill_ldt(&ldt, &ldt_info); if (old_ldt)
if (oldmode) memcpy(new_ldt->entries, old_ldt->entries, oldsize * LDT_ENTRY_SIZE);
ldt.avl = 0; new_ldt->entries[ldt_info.entry_number] = ldt;
finalize_ldt_struct(new_ldt);
/* Install the new entry ... */ install_ldt(mm, new_ldt);
install: free_ldt_struct(old_ldt);
write_ldt_entry(mm->context.ldt, ldt_info.entry_number, &ldt);
error = 0; error = 0;
out_unlock: out_unlock:
......
...@@ -121,11 +121,11 @@ void __show_regs(struct pt_regs *regs, int all) ...@@ -121,11 +121,11 @@ void __show_regs(struct pt_regs *regs, int all)
void release_thread(struct task_struct *dead_task) void release_thread(struct task_struct *dead_task)
{ {
if (dead_task->mm) { if (dead_task->mm) {
if (dead_task->mm->context.size) { if (dead_task->mm->context.ldt) {
pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n", pr_warn("WARNING: dead process %s still has LDT? <%p/%d>\n",
dead_task->comm, dead_task->comm,
dead_task->mm->context.ldt, dead_task->mm->context.ldt,
dead_task->mm->context.size); dead_task->mm->context.ldt->size);
BUG(); BUG();
} }
} }
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/ptrace.h> #include <linux/ptrace.h>
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/mmu_context.h>
unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
{ {
...@@ -30,10 +31,11 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re ...@@ -30,10 +31,11 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
seg &= ~7UL; seg &= ~7UL;
mutex_lock(&child->mm->context.lock); mutex_lock(&child->mm->context.lock);
if (unlikely((seg >> 3) >= child->mm->context.size)) if (unlikely(!child->mm->context.ldt ||
(seg >> 3) >= child->mm->context.ldt->size))
addr = -1L; /* bogus selector, access would fault */ addr = -1L; /* bogus selector, access would fault */
else { else {
desc = child->mm->context.ldt + seg; desc = &child->mm->context.ldt->entries[seg];
base = get_desc_base(desc); base = get_desc_base(desc);
/* 16-bit code segment? */ /* 16-bit code segment? */
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <asm/fpu/internal.h> #include <asm/fpu/internal.h>
#include <asm/debugreg.h> #include <asm/debugreg.h>
#include <asm/cpu.h> #include <asm/cpu.h>
#include <asm/mmu_context.h>
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
__visible unsigned long saved_context_ebx; __visible unsigned long saved_context_ebx;
...@@ -153,7 +154,7 @@ static void fix_processor_context(void) ...@@ -153,7 +154,7 @@ static void fix_processor_context(void)
syscall_init(); /* This sets MSR_*STAR and related */ syscall_init(); /* This sets MSR_*STAR and related */
#endif #endif
load_TR_desc(); /* This does ltr */ load_TR_desc(); /* This does ltr */
load_LDT(&current->active_mm->context); /* This does lldt */ load_mm_ldt(current->active_mm); /* This does lldt */
fpu__resume_cpu(); fpu__resume_cpu();
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment