Commit f120f13e authored by Jeremy Fitzhardinge's avatar Jeremy Fitzhardinge Committed by Jeremy Fitzhardinge

xen: Add support for preemption

Add Xen support for preemption.  This is mostly a cleanup of existing
preempt_enable/disable calls, or just comments to explain the current
usage.
Signed-off-by: default avatarJeremy Fitzhardinge <jeremy@xensource.com>
Signed-off-by: default avatarChris Wright <chrisw@sous-sol.org>
parent f87e4cac
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
config XEN config XEN
bool "Enable support for Xen hypervisor" bool "Enable support for Xen hypervisor"
depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !(PREEMPT || NEED_MULTIPLE_NODES) depends on PARAVIRT && X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES
help help
This is the Linux Xen port. Enabling this will allow the This is the Linux Xen port. Enabling this will allow the
kernel to boot in a paravirtualized environment under the kernel to boot in a paravirtualized environment under the
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/smp.h> #include <linux/smp.h>
#include <linux/preempt.h> #include <linux/preempt.h>
#include <linux/hardirq.h>
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/start_kernel.h> #include <linux/start_kernel.h>
...@@ -108,11 +109,10 @@ static unsigned long xen_save_fl(void) ...@@ -108,11 +109,10 @@ static unsigned long xen_save_fl(void)
struct vcpu_info *vcpu; struct vcpu_info *vcpu;
unsigned long flags; unsigned long flags;
preempt_disable();
vcpu = x86_read_percpu(xen_vcpu); vcpu = x86_read_percpu(xen_vcpu);
/* flag has opposite sense of mask */ /* flag has opposite sense of mask */
flags = !vcpu->evtchn_upcall_mask; flags = !vcpu->evtchn_upcall_mask;
preempt_enable();
/* convert to IF type flag /* convert to IF type flag
-0 -> 0x00000000 -0 -> 0x00000000
...@@ -125,32 +125,35 @@ static void xen_restore_fl(unsigned long flags) ...@@ -125,32 +125,35 @@ static void xen_restore_fl(unsigned long flags)
{ {
struct vcpu_info *vcpu; struct vcpu_info *vcpu;
preempt_disable();
/* convert from IF type flag */ /* convert from IF type flag */
flags = !(flags & X86_EFLAGS_IF); flags = !(flags & X86_EFLAGS_IF);
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
vcpu = x86_read_percpu(xen_vcpu); vcpu = x86_read_percpu(xen_vcpu);
vcpu->evtchn_upcall_mask = flags; vcpu->evtchn_upcall_mask = flags;
preempt_enable_no_resched();
if (flags == 0) { /* Doesn't matter if we get preempted here, because any
/* Unmask then check (avoid races). We're only protecting pending event will get dealt with anyway. */
against updates by this CPU, so there's no need for
anything stronger. */
barrier();
if (flags == 0) {
preempt_check_resched();
barrier(); /* unmask then check (avoid races) */
if (unlikely(vcpu->evtchn_upcall_pending)) if (unlikely(vcpu->evtchn_upcall_pending))
force_evtchn_callback(); force_evtchn_callback();
preempt_enable(); }
} else
preempt_enable_no_resched();
} }
static void xen_irq_disable(void) static void xen_irq_disable(void)
{ {
struct vcpu_info *vcpu; /* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable(); preempt_disable();
vcpu = x86_read_percpu(xen_vcpu); x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
vcpu->evtchn_upcall_mask = 1;
preempt_enable_no_resched(); preempt_enable_no_resched();
} }
...@@ -158,18 +161,20 @@ static void xen_irq_enable(void) ...@@ -158,18 +161,20 @@ static void xen_irq_enable(void)
{ {
struct vcpu_info *vcpu; struct vcpu_info *vcpu;
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable(); preempt_disable();
vcpu = x86_read_percpu(xen_vcpu); vcpu = x86_read_percpu(xen_vcpu);
vcpu->evtchn_upcall_mask = 0; vcpu->evtchn_upcall_mask = 0;
preempt_enable_no_resched();
/* Unmask then check (avoid races). We're only protecting /* Doesn't matter if we get preempted here, because any
against updates by this CPU, so there's no need for pending event will get dealt with anyway. */
anything stronger. */
barrier();
barrier(); /* unmask then check (avoid races) */
if (unlikely(vcpu->evtchn_upcall_pending)) if (unlikely(vcpu->evtchn_upcall_pending))
force_evtchn_callback(); force_evtchn_callback();
preempt_enable();
} }
static void xen_safe_halt(void) static void xen_safe_halt(void)
...@@ -189,6 +194,8 @@ static void xen_halt(void) ...@@ -189,6 +194,8 @@ static void xen_halt(void)
static void xen_set_lazy_mode(enum paravirt_lazy_mode mode) static void xen_set_lazy_mode(enum paravirt_lazy_mode mode)
{ {
BUG_ON(preemptible());
switch (mode) { switch (mode) {
case PARAVIRT_LAZY_NONE: case PARAVIRT_LAZY_NONE:
BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE); BUG_ON(x86_read_percpu(xen_lazy_mode) == PARAVIRT_LAZY_NONE);
...@@ -293,9 +300,13 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, ...@@ -293,9 +300,13 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
xmaddr_t mach_lp = virt_to_machine(lp); xmaddr_t mach_lp = virt_to_machine(lp);
u64 entry = (u64)high << 32 | low; u64 entry = (u64)high << 32 | low;
preempt_disable();
xen_mc_flush(); xen_mc_flush();
if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry)) if (HYPERVISOR_update_descriptor(mach_lp.maddr, entry))
BUG(); BUG();
preempt_enable();
} }
static int cvt_gate_to_trap(int vector, u32 low, u32 high, static int cvt_gate_to_trap(int vector, u32 low, u32 high,
...@@ -328,11 +339,13 @@ static DEFINE_PER_CPU(struct Xgt_desc_struct, idt_desc); ...@@ -328,11 +339,13 @@ static DEFINE_PER_CPU(struct Xgt_desc_struct, idt_desc);
static void xen_write_idt_entry(struct desc_struct *dt, int entrynum, static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
u32 low, u32 high) u32 low, u32 high)
{ {
int cpu = smp_processor_id();
unsigned long p = (unsigned long)&dt[entrynum]; unsigned long p = (unsigned long)&dt[entrynum];
unsigned long start = per_cpu(idt_desc, cpu).address; unsigned long start, end;
unsigned long end = start + per_cpu(idt_desc, cpu).size + 1;
preempt_disable();
start = __get_cpu_var(idt_desc).address;
end = start + __get_cpu_var(idt_desc).size + 1;
xen_mc_flush(); xen_mc_flush();
...@@ -347,6 +360,8 @@ static void xen_write_idt_entry(struct desc_struct *dt, int entrynum, ...@@ -347,6 +360,8 @@ static void xen_write_idt_entry(struct desc_struct *dt, int entrynum,
if (HYPERVISOR_set_trap_table(info)) if (HYPERVISOR_set_trap_table(info))
BUG(); BUG();
} }
preempt_enable();
} }
static void xen_convert_trap_info(const struct Xgt_desc_struct *desc, static void xen_convert_trap_info(const struct Xgt_desc_struct *desc,
...@@ -368,11 +383,9 @@ static void xen_convert_trap_info(const struct Xgt_desc_struct *desc, ...@@ -368,11 +383,9 @@ static void xen_convert_trap_info(const struct Xgt_desc_struct *desc,
void xen_copy_trap_info(struct trap_info *traps) void xen_copy_trap_info(struct trap_info *traps)
{ {
const struct Xgt_desc_struct *desc = &get_cpu_var(idt_desc); const struct Xgt_desc_struct *desc = &__get_cpu_var(idt_desc);
xen_convert_trap_info(desc, traps); xen_convert_trap_info(desc, traps);
put_cpu_var(idt_desc);
} }
/* Load a new IDT into Xen. In principle this can be per-CPU, so we /* Load a new IDT into Xen. In principle this can be per-CPU, so we
...@@ -382,12 +395,11 @@ static void xen_load_idt(const struct Xgt_desc_struct *desc) ...@@ -382,12 +395,11 @@ static void xen_load_idt(const struct Xgt_desc_struct *desc)
{ {
static DEFINE_SPINLOCK(lock); static DEFINE_SPINLOCK(lock);
static struct trap_info traps[257]; static struct trap_info traps[257];
int cpu = smp_processor_id();
per_cpu(idt_desc, cpu) = *desc;
spin_lock(&lock); spin_lock(&lock);
__get_cpu_var(idt_desc) = *desc;
xen_convert_trap_info(desc, traps); xen_convert_trap_info(desc, traps);
xen_mc_flush(); xen_mc_flush();
...@@ -402,6 +414,8 @@ static void xen_load_idt(const struct Xgt_desc_struct *desc) ...@@ -402,6 +414,8 @@ static void xen_load_idt(const struct Xgt_desc_struct *desc)
static void xen_write_gdt_entry(struct desc_struct *dt, int entry, static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
u32 low, u32 high) u32 low, u32 high)
{ {
preempt_disable();
switch ((high >> 8) & 0xff) { switch ((high >> 8) & 0xff) {
case DESCTYPE_LDT: case DESCTYPE_LDT:
case DESCTYPE_TSS: case DESCTYPE_TSS:
...@@ -418,10 +432,12 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry, ...@@ -418,10 +432,12 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
} }
} }
preempt_enable();
} }
static void xen_load_esp0(struct tss_struct *tss, static void xen_load_esp0(struct tss_struct *tss,
struct thread_struct *thread) struct thread_struct *thread)
{ {
struct multicall_space mcs = xen_mc_entry(0); struct multicall_space mcs = xen_mc_entry(0);
MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0); MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->esp0);
...@@ -525,6 +541,8 @@ static unsigned long xen_read_cr3(void) ...@@ -525,6 +541,8 @@ static unsigned long xen_read_cr3(void)
static void xen_write_cr3(unsigned long cr3) static void xen_write_cr3(unsigned long cr3)
{ {
BUG_ON(preemptible());
if (cr3 == x86_read_percpu(xen_cr3)) { if (cr3 == x86_read_percpu(xen_cr3)) {
/* just a simple tlb flush */ /* just a simple tlb flush */
xen_flush_tlb(); xen_flush_tlb();
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
* *
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
*/ */
#include <linux/sched.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/bug.h> #include <linux/bug.h>
#include <linux/sched.h> #include <linux/sched.h>
...@@ -531,5 +532,7 @@ void xen_exit_mmap(struct mm_struct *mm) ...@@ -531,5 +532,7 @@ void xen_exit_mmap(struct mm_struct *mm)
drop_mm_ref(mm); drop_mm_ref(mm);
put_cpu(); put_cpu();
spin_lock(&mm->page_table_lock);
xen_pgd_unpin(mm->pgd); xen_pgd_unpin(mm->pgd);
spin_unlock(&mm->page_table_lock);
} }
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
*/ */
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/hardirq.h>
#include <asm/xen/hypercall.h> #include <asm/xen/hypercall.h>
...@@ -39,10 +40,12 @@ DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); ...@@ -39,10 +40,12 @@ DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags);
void xen_mc_flush(void) void xen_mc_flush(void)
{ {
struct mc_buffer *b = &get_cpu_var(mc_buffer); struct mc_buffer *b = &__get_cpu_var(mc_buffer);
int ret = 0; int ret = 0;
unsigned long flags; unsigned long flags;
BUG_ON(preemptible());
/* Disable interrupts in case someone comes in and queues /* Disable interrupts in case someone comes in and queues
something in the middle */ something in the middle */
local_irq_save(flags); local_irq_save(flags);
...@@ -60,7 +63,6 @@ void xen_mc_flush(void) ...@@ -60,7 +63,6 @@ void xen_mc_flush(void)
} else } else
BUG_ON(b->argidx != 0); BUG_ON(b->argidx != 0);
put_cpu_var(mc_buffer);
local_irq_restore(flags); local_irq_restore(flags);
BUG_ON(ret); BUG_ON(ret);
...@@ -68,10 +70,11 @@ void xen_mc_flush(void) ...@@ -68,10 +70,11 @@ void xen_mc_flush(void)
struct multicall_space __xen_mc_entry(size_t args) struct multicall_space __xen_mc_entry(size_t args)
{ {
struct mc_buffer *b = &get_cpu_var(mc_buffer); struct mc_buffer *b = &__get_cpu_var(mc_buffer);
struct multicall_space ret; struct multicall_space ret;
unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64); unsigned argspace = (args + sizeof(u64) - 1) / sizeof(u64);
BUG_ON(preemptible());
BUG_ON(argspace > MC_ARGS); BUG_ON(argspace > MC_ARGS);
if (b->mcidx == MC_BATCH || if (b->mcidx == MC_BATCH ||
...@@ -83,7 +86,5 @@ struct multicall_space __xen_mc_entry(size_t args) ...@@ -83,7 +86,5 @@ struct multicall_space __xen_mc_entry(size_t args)
ret.args = &b->args[b->argidx]; ret.args = &b->args[b->argidx];
b->argidx += argspace; b->argidx += argspace;
put_cpu_var(mc_buffer);
return ret; return ret;
} }
...@@ -88,7 +88,7 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res) ...@@ -88,7 +88,7 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res)
u64 state_time; u64 state_time;
struct vcpu_runstate_info *state; struct vcpu_runstate_info *state;
preempt_disable(); BUG_ON(preemptible());
state = &__get_cpu_var(runstate); state = &__get_cpu_var(runstate);
...@@ -103,8 +103,6 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res) ...@@ -103,8 +103,6 @@ static void get_runstate_snapshot(struct vcpu_runstate_info *res)
*res = *state; *res = *state;
barrier(); barrier();
} while (get64(&state->state_entry_time) != state_time); } while (get64(&state->state_entry_time) != state_time);
preempt_enable();
} }
static void setup_runstate_info(int cpu) static void setup_runstate_info(int cpu)
...@@ -179,9 +177,19 @@ static void do_stolen_accounting(void) ...@@ -179,9 +177,19 @@ static void do_stolen_accounting(void)
unsigned long long xen_sched_clock(void) unsigned long long xen_sched_clock(void)
{ {
struct vcpu_runstate_info state; struct vcpu_runstate_info state;
cycle_t now = xen_clocksource_read(); cycle_t now;
u64 ret;
s64 offset; s64 offset;
/*
* Ideally sched_clock should be called on a per-cpu basis
* anyway, so preempt should already be disabled, but that's
* not current practice at the moment.
*/
preempt_disable();
now = xen_clocksource_read();
get_runstate_snapshot(&state); get_runstate_snapshot(&state);
WARN_ON(state.state != RUNSTATE_running); WARN_ON(state.state != RUNSTATE_running);
...@@ -190,9 +198,13 @@ unsigned long long xen_sched_clock(void) ...@@ -190,9 +198,13 @@ unsigned long long xen_sched_clock(void)
if (offset < 0) if (offset < 0)
offset = 0; offset = 0;
return state.time[RUNSTATE_blocked] + ret = state.time[RUNSTATE_blocked] +
state.time[RUNSTATE_running] + state.time[RUNSTATE_running] +
offset; offset;
preempt_enable();
return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment