Commit 49c13b51 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm: (80 commits)
  KVM: Use CPU_DYING for disabling virtualization
  KVM: Tune hotplug/suspend IPIs
  KVM: Keep track of which cpus have virtualization enabled
  SMP: Allow smp_call_function_single() to current cpu
  i386: Allow smp_call_function_single() to current cpu
  x86_64: Allow smp_call_function_single() to current cpu
  HOTPLUG: Adapt thermal throttle to CPU_DYING
  HOTPLUG: Adapt cpuset hotplug callback to CPU_DYING
  HOTPLUG: Add CPU_DYING notifier
  KVM: Clean up #includes
  KVM: Remove kvmfs in favor of the anonymous inodes source
  KVM: SVM: Reliably detect if SVM was disabled by BIOS
  KVM: VMX: Remove unnecessary code in vmx_tlb_flush()
  KVM: MMU: Fix Wrong tlb flush order
  KVM: VMX: Reinitialize the real-mode tss when entering real mode
  KVM: Avoid useless memory write when possible
  KVM: Fix x86 emulator writeback
  KVM: Add support for in-kernel pio handlers
  KVM: VMX: Fix interrupt checking on lightweight exit
  KVM: Adds support for in-kernel mmio handlers
  ...
parents 492559af cec9ad27
...@@ -134,19 +134,21 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, ...@@ -134,19 +134,21 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
int err; int err;
sys_dev = get_cpu_sysdev(cpu); sys_dev = get_cpu_sysdev(cpu);
mutex_lock(&therm_cpu_lock);
switch (action) { switch (action) {
case CPU_ONLINE: case CPU_ONLINE:
case CPU_ONLINE_FROZEN: case CPU_ONLINE_FROZEN:
mutex_lock(&therm_cpu_lock);
err = thermal_throttle_add_dev(sys_dev); err = thermal_throttle_add_dev(sys_dev);
mutex_unlock(&therm_cpu_lock);
WARN_ON(err); WARN_ON(err);
break; break;
case CPU_DEAD: case CPU_DEAD:
case CPU_DEAD_FROZEN: case CPU_DEAD_FROZEN:
mutex_lock(&therm_cpu_lock);
thermal_throttle_remove_dev(sys_dev); thermal_throttle_remove_dev(sys_dev);
mutex_unlock(&therm_cpu_lock);
break; break;
} }
mutex_unlock(&therm_cpu_lock);
return NOTIFY_OK; return NOTIFY_OK;
} }
......
...@@ -47,7 +47,7 @@ int smp_call_function(void (*func) (void *info), void *info, int nonatomic, ...@@ -47,7 +47,7 @@ int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
EXPORT_SYMBOL(smp_call_function); EXPORT_SYMBOL(smp_call_function);
/** /**
* smp_call_function_single - Run a function on another CPU * smp_call_function_single - Run a function on a specific CPU
* @cpu: The target CPU. Cannot be the calling CPU. * @cpu: The target CPU. Cannot be the calling CPU.
* @func: The function to run. This must be fast and non-blocking. * @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function. * @info: An arbitrary pointer to pass to the function.
...@@ -66,9 +66,11 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info, ...@@ -66,9 +66,11 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
int ret; int ret;
int me = get_cpu(); int me = get_cpu();
if (cpu == me) { if (cpu == me) {
WARN_ON(1); local_irq_disable();
func(info);
local_irq_enable();
put_cpu(); put_cpu();
return -EBUSY; return 0;
} }
ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
......
...@@ -357,7 +357,7 @@ __smp_call_function_single(int cpu, void (*func) (void *info), void *info, ...@@ -357,7 +357,7 @@ __smp_call_function_single(int cpu, void (*func) (void *info), void *info,
} }
/* /*
* smp_call_function_single - Run a function on another CPU * smp_call_function_single - Run a function on a specific CPU
* @func: The function to run. This must be fast and non-blocking. * @func: The function to run. This must be fast and non-blocking.
* @info: An arbitrary pointer to pass to the function. * @info: An arbitrary pointer to pass to the function.
* @nonatomic: Currently unused. * @nonatomic: Currently unused.
...@@ -374,14 +374,18 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info, ...@@ -374,14 +374,18 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
{ {
/* prevent preemption and reschedule on another processor */ /* prevent preemption and reschedule on another processor */
int me = get_cpu(); int me = get_cpu();
/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());
if (cpu == me) { if (cpu == me) {
local_irq_disable();
func(info);
local_irq_enable();
put_cpu(); put_cpu();
return 0; return 0;
} }
/* Can deadlock when called with interrupts disabled */
WARN_ON(irqs_disabled());
spin_lock_bh(&call_lock); spin_lock_bh(&call_lock);
__smp_call_function_single(cpu, func, info, nonatomic, wait); __smp_call_function_single(cpu, func, info, nonatomic, wait);
spin_unlock_bh(&call_lock); spin_unlock_bh(&call_lock);
......
# #
# KVM configuration # KVM configuration
# #
menu "Virtualization" menuconfig VIRTUALIZATION
bool "Virtualization"
depends on X86 depends on X86
default y
if VIRTUALIZATION
config KVM config KVM
tristate "Kernel-based Virtual Machine (KVM) support" tristate "Kernel-based Virtual Machine (KVM) support"
depends on X86 && EXPERIMENTAL depends on X86 && EXPERIMENTAL
depends on X86_CMPXCHG64 || 64BIT
---help--- ---help---
Support hosting fully virtualized guest machines using hardware Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent virtualization extensions. You will need a fairly recent
...@@ -35,4 +40,4 @@ config KVM_AMD ...@@ -35,4 +40,4 @@ config KVM_AMD
Provides support for KVM on AMD processors equipped with the AMD-V Provides support for KVM on AMD processors equipped with the AMD-V
(SVM) extensions. (SVM) extensions.
endmenu endif # VIRTUALIZATION
...@@ -10,6 +10,8 @@ ...@@ -10,6 +10,8 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <asm/signal.h> #include <asm/signal.h>
...@@ -18,6 +20,7 @@ ...@@ -18,6 +20,7 @@
#include <linux/kvm_para.h> #include <linux/kvm_para.h>
#define CR0_PE_MASK (1ULL << 0) #define CR0_PE_MASK (1ULL << 0)
#define CR0_MP_MASK (1ULL << 1)
#define CR0_TS_MASK (1ULL << 3) #define CR0_TS_MASK (1ULL << 3)
#define CR0_NE_MASK (1ULL << 5) #define CR0_NE_MASK (1ULL << 5)
#define CR0_WP_MASK (1ULL << 16) #define CR0_WP_MASK (1ULL << 16)
...@@ -42,7 +45,8 @@ ...@@ -42,7 +45,8 @@
(CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK \ (CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK \
| CR0_NW_MASK | CR0_CD_MASK) | CR0_NW_MASK | CR0_CD_MASK)
#define KVM_VM_CR0_ALWAYS_ON \ #define KVM_VM_CR0_ALWAYS_ON \
(CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK) (CR0_PG_MASK | CR0_PE_MASK | CR0_WP_MASK | CR0_NE_MASK | CR0_TS_MASK \
| CR0_MP_MASK)
#define KVM_GUEST_CR4_MASK \ #define KVM_GUEST_CR4_MASK \
(CR4_PSE_MASK | CR4_PAE_MASK | CR4_PGE_MASK | CR4_VMXE_MASK | CR4_VME_MASK) (CR4_PSE_MASK | CR4_PAE_MASK | CR4_PGE_MASK | CR4_VMXE_MASK | CR4_VME_MASK)
#define KVM_PMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK) #define KVM_PMODE_VM_CR4_ALWAYS_ON (CR4_VMXE_MASK | CR4_PAE_MASK)
...@@ -51,10 +55,10 @@ ...@@ -51,10 +55,10 @@
#define INVALID_PAGE (~(hpa_t)0) #define INVALID_PAGE (~(hpa_t)0)
#define UNMAPPED_GVA (~(gpa_t)0) #define UNMAPPED_GVA (~(gpa_t)0)
#define KVM_MAX_VCPUS 1 #define KVM_MAX_VCPUS 4
#define KVM_ALIAS_SLOTS 4 #define KVM_ALIAS_SLOTS 4
#define KVM_MEMORY_SLOTS 4 #define KVM_MEMORY_SLOTS 4
#define KVM_NUM_MMU_PAGES 256 #define KVM_NUM_MMU_PAGES 1024
#define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25 #define KVM_REFILL_PAGES 25
#define KVM_MAX_CPUID_ENTRIES 40 #define KVM_MAX_CPUID_ENTRIES 40
...@@ -79,6 +83,11 @@ ...@@ -79,6 +83,11 @@
#define KVM_PIO_PAGE_OFFSET 1 #define KVM_PIO_PAGE_OFFSET 1
/*
* vcpu->requests bit members
*/
#define KVM_TLB_FLUSH 0
/* /*
* Address types: * Address types:
* *
...@@ -137,7 +146,7 @@ struct kvm_mmu_page { ...@@ -137,7 +146,7 @@ struct kvm_mmu_page {
gfn_t gfn; gfn_t gfn;
union kvm_mmu_page_role role; union kvm_mmu_page_role role;
hpa_t page_hpa; u64 *spt;
unsigned long slot_bitmap; /* One bit set per slot which has memory unsigned long slot_bitmap; /* One bit set per slot which has memory
* in this shadow page. * in this shadow page.
*/ */
...@@ -232,6 +241,7 @@ struct kvm_pio_request { ...@@ -232,6 +241,7 @@ struct kvm_pio_request {
struct page *guest_pages[2]; struct page *guest_pages[2];
unsigned guest_page_offset; unsigned guest_page_offset;
int in; int in;
int port;
int size; int size;
int string; int string;
int down; int down;
...@@ -252,8 +262,70 @@ struct kvm_stat { ...@@ -252,8 +262,70 @@ struct kvm_stat {
u32 halt_exits; u32 halt_exits;
u32 request_irq_exits; u32 request_irq_exits;
u32 irq_exits; u32 irq_exits;
u32 light_exits;
u32 efer_reload;
};
struct kvm_io_device {
void (*read)(struct kvm_io_device *this,
gpa_t addr,
int len,
void *val);
void (*write)(struct kvm_io_device *this,
gpa_t addr,
int len,
const void *val);
int (*in_range)(struct kvm_io_device *this, gpa_t addr);
void (*destructor)(struct kvm_io_device *this);
void *private;
};
static inline void kvm_iodevice_read(struct kvm_io_device *dev,
gpa_t addr,
int len,
void *val)
{
dev->read(dev, addr, len, val);
}
static inline void kvm_iodevice_write(struct kvm_io_device *dev,
gpa_t addr,
int len,
const void *val)
{
dev->write(dev, addr, len, val);
}
static inline int kvm_iodevice_inrange(struct kvm_io_device *dev, gpa_t addr)
{
return dev->in_range(dev, addr);
}
static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
{
if (dev->destructor)
dev->destructor(dev);
}
/*
* It would be nice to use something smarter than a linear search, TBD...
* Thankfully we dont expect many devices to register (famous last words :),
* so until then it will suffice. At least its abstracted so we can change
* in one place.
*/
struct kvm_io_bus {
int dev_count;
#define NR_IOBUS_DEVS 6
struct kvm_io_device *devs[NR_IOBUS_DEVS];
}; };
void kvm_io_bus_init(struct kvm_io_bus *bus);
void kvm_io_bus_destroy(struct kvm_io_bus *bus);
struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, gpa_t addr);
void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
struct kvm_io_device *dev);
struct kvm_vcpu { struct kvm_vcpu {
struct kvm *kvm; struct kvm *kvm;
union { union {
...@@ -266,6 +338,8 @@ struct kvm_vcpu { ...@@ -266,6 +338,8 @@ struct kvm_vcpu {
u64 host_tsc; u64 host_tsc;
struct kvm_run *run; struct kvm_run *run;
int interrupt_window_open; int interrupt_window_open;
int guest_mode;
unsigned long requests;
unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) #define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
unsigned long irq_pending[NR_IRQ_WORDS]; unsigned long irq_pending[NR_IRQ_WORDS];
...@@ -285,15 +359,20 @@ struct kvm_vcpu { ...@@ -285,15 +359,20 @@ struct kvm_vcpu {
u64 apic_base; u64 apic_base;
u64 ia32_misc_enable_msr; u64 ia32_misc_enable_msr;
int nmsrs; int nmsrs;
int save_nmsrs;
int msr_offset_efer;
#ifdef CONFIG_X86_64
int msr_offset_kernel_gs_base;
#endif
struct vmx_msr_entry *guest_msrs; struct vmx_msr_entry *guest_msrs;
struct vmx_msr_entry *host_msrs; struct vmx_msr_entry *host_msrs;
struct list_head free_pages;
struct kvm_mmu_page page_header_buf[KVM_NUM_MMU_PAGES];
struct kvm_mmu mmu; struct kvm_mmu mmu;
struct kvm_mmu_memory_cache mmu_pte_chain_cache; struct kvm_mmu_memory_cache mmu_pte_chain_cache;
struct kvm_mmu_memory_cache mmu_rmap_desc_cache; struct kvm_mmu_memory_cache mmu_rmap_desc_cache;
struct kvm_mmu_memory_cache mmu_page_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
gfn_t last_pt_write_gfn; gfn_t last_pt_write_gfn;
int last_pt_write_count; int last_pt_write_count;
...@@ -305,6 +384,11 @@ struct kvm_vcpu { ...@@ -305,6 +384,11 @@ struct kvm_vcpu {
char *guest_fx_image; char *guest_fx_image;
int fpu_active; int fpu_active;
int guest_fpu_loaded; int guest_fpu_loaded;
struct vmx_host_state {
int loaded;
u16 fs_sel, gs_sel, ldt_sel;
int fs_gs_ldt_reload_needed;
} vmx_host_state;
int mmio_needed; int mmio_needed;
int mmio_read_completed; int mmio_read_completed;
...@@ -331,6 +415,7 @@ struct kvm_vcpu { ...@@ -331,6 +415,7 @@ struct kvm_vcpu {
u32 ar; u32 ar;
} tr, es, ds, fs, gs; } tr, es, ds, fs, gs;
} rmode; } rmode;
int halt_request; /* real mode on Intel only */
int cpuid_nent; int cpuid_nent;
struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES]; struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
...@@ -362,12 +447,15 @@ struct kvm { ...@@ -362,12 +447,15 @@ struct kvm {
struct list_head active_mmu_pages; struct list_head active_mmu_pages;
int n_free_mmu_pages; int n_free_mmu_pages;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
int nvcpus;
struct kvm_vcpu vcpus[KVM_MAX_VCPUS]; struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
int memory_config_version; int memory_config_version;
int busy; int busy;
unsigned long rmap_overflow; unsigned long rmap_overflow;
struct list_head vm_list; struct list_head vm_list;
struct file *filp; struct file *filp;
struct kvm_io_bus mmio_bus;
struct kvm_io_bus pio_bus;
}; };
struct descriptor_table { struct descriptor_table {
...@@ -488,6 +576,7 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, ...@@ -488,6 +576,7 @@ int kvm_setup_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned long count, int string, int down, int size, unsigned long count, int string, int down,
gva_t address, int rep, unsigned port); gva_t address, int rep, unsigned port);
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu);
int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address); int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
int emulate_clts(struct kvm_vcpu *vcpu); int emulate_clts(struct kvm_vcpu *vcpu);
int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr, int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr,
...@@ -511,6 +600,7 @@ void save_msrs(struct vmx_msr_entry *e, int n); ...@@ -511,6 +600,7 @@ void save_msrs(struct vmx_msr_entry *e, int n);
void kvm_resched(struct kvm_vcpu *vcpu); void kvm_resched(struct kvm_vcpu *vcpu);
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_flush_remote_tlbs(struct kvm *kvm);
int kvm_read_guest(struct kvm_vcpu *vcpu, int kvm_read_guest(struct kvm_vcpu *vcpu,
gva_t addr, gva_t addr,
...@@ -524,10 +614,12 @@ int kvm_write_guest(struct kvm_vcpu *vcpu, ...@@ -524,10 +614,12 @@ int kvm_write_guest(struct kvm_vcpu *vcpu,
unsigned long segment_base(u16 selector); unsigned long segment_base(u16 selector);
void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes); void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes); const u8 *old, const u8 *new, int bytes);
int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run); int kvm_hypercall(struct kvm_vcpu *vcpu, struct kvm_run *run);
...@@ -539,6 +631,14 @@ static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, ...@@ -539,6 +631,14 @@ static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
return vcpu->mmu.page_fault(vcpu, gva, error_code); return vcpu->mmu.page_fault(vcpu, gva, error_code);
} }
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
{
if (likely(vcpu->mmu.root_hpa != INVALID_PAGE))
return 0;
return kvm_mmu_load(vcpu);
}
static inline int is_long_mode(struct kvm_vcpu *vcpu) static inline int is_long_mode(struct kvm_vcpu *vcpu)
{ {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -14,16 +14,17 @@ ...@@ -14,16 +14,17 @@
* *
*/ */
#include "kvm_svm.h"
#include "x86_emulate.h"
#include <linux/module.h> #include <linux/module.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/profile.h> #include <linux/profile.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <asm/desc.h>
#include "kvm_svm.h" #include <asm/desc.h>
#include "x86_emulate.h"
MODULE_AUTHOR("Qumranet"); MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
...@@ -378,7 +379,7 @@ static __init int svm_hardware_setup(void) ...@@ -378,7 +379,7 @@ static __init int svm_hardware_setup(void)
int cpu; int cpu;
struct page *iopm_pages; struct page *iopm_pages;
struct page *msrpm_pages; struct page *msrpm_pages;
void *msrpm_va; void *iopm_va, *msrpm_va;
int r; int r;
kvm_emulator_want_group7_invlpg(); kvm_emulator_want_group7_invlpg();
...@@ -387,8 +388,10 @@ static __init int svm_hardware_setup(void) ...@@ -387,8 +388,10 @@ static __init int svm_hardware_setup(void)
if (!iopm_pages) if (!iopm_pages)
return -ENOMEM; return -ENOMEM;
memset(page_address(iopm_pages), 0xff,
PAGE_SIZE * (1 << IOPM_ALLOC_ORDER)); iopm_va = page_address(iopm_pages);
memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
clear_bit(0x80, iopm_va); /* allow direct access to PC debug port */
iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
...@@ -579,7 +582,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) ...@@ -579,7 +582,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
goto out2; goto out2;
vcpu->svm->vmcb = page_address(page); vcpu->svm->vmcb = page_address(page);
memset(vcpu->svm->vmcb, 0, PAGE_SIZE); clear_page(vcpu->svm->vmcb);
vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; vcpu->svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
vcpu->svm->asid_generation = 0; vcpu->svm->asid_generation = 0;
memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs)); memset(vcpu->svm->db_regs, 0, sizeof(vcpu->svm->db_regs));
...@@ -587,9 +590,9 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) ...@@ -587,9 +590,9 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
fx_init(vcpu); fx_init(vcpu);
vcpu->fpu_active = 1; vcpu->fpu_active = 1;
vcpu->apic_base = 0xfee00000 | vcpu->apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
/*for vcpu 0*/ MSR_IA32_APICBASE_BSP | if (vcpu == &vcpu->kvm->vcpus[0])
MSR_IA32_APICBASE_ENABLE; vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
return 0; return 0;
...@@ -955,7 +958,7 @@ static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -955,7 +958,7 @@ static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
* VMCB is undefined after a SHUTDOWN intercept * VMCB is undefined after a SHUTDOWN intercept
* so reinitialize it. * so reinitialize it.
*/ */
memset(vcpu->svm->vmcb, 0, PAGE_SIZE); clear_page(vcpu->svm->vmcb);
init_vmcb(vcpu->svm->vmcb); init_vmcb(vcpu->svm->vmcb);
kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
...@@ -1113,12 +1116,7 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -1113,12 +1116,7 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{ {
vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1; vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1;
skip_emulated_instruction(vcpu); skip_emulated_instruction(vcpu);
if (vcpu->irq_summary) return kvm_emulate_halt(vcpu);
return 1;
kvm_run->exit_reason = KVM_EXIT_HLT;
++vcpu->stat.halt_exits;
return 0;
} }
static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int vmmcall_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
...@@ -1473,6 +1471,11 @@ static void load_db_regs(unsigned long *db_regs) ...@@ -1473,6 +1471,11 @@ static void load_db_regs(unsigned long *db_regs)
asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3])); asm volatile ("mov %0, %%dr3" : : "r"(db_regs[3]));
} }
static void svm_flush_tlb(struct kvm_vcpu *vcpu)
{
force_new_asid(vcpu);
}
static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{ {
u16 fs_selector; u16 fs_selector;
...@@ -1481,11 +1484,20 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -1481,11 +1484,20 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
int r; int r;
again: again:
r = kvm_mmu_reload(vcpu);
if (unlikely(r))
return r;
if (!vcpu->mmio_read_completed) if (!vcpu->mmio_read_completed)
do_interrupt_requests(vcpu, kvm_run); do_interrupt_requests(vcpu, kvm_run);
clgi(); clgi();
vcpu->guest_mode = 1;
if (vcpu->requests)
if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests))
svm_flush_tlb(vcpu);
pre_svm_run(vcpu); pre_svm_run(vcpu);
save_host_msrs(vcpu); save_host_msrs(vcpu);
...@@ -1617,6 +1629,8 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -1617,6 +1629,8 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
#endif #endif
: "cc", "memory" ); : "cc", "memory" );
vcpu->guest_mode = 0;
if (vcpu->fpu_active) { if (vcpu->fpu_active) {
fx_save(vcpu->guest_fx_image); fx_save(vcpu->guest_fx_image);
fx_restore(vcpu->host_fx_image); fx_restore(vcpu->host_fx_image);
...@@ -1681,11 +1695,6 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) ...@@ -1681,11 +1695,6 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return r; return r;
} }
static void svm_flush_tlb(struct kvm_vcpu *vcpu)
{
force_new_asid(vcpu);
}
static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
{ {
vcpu->svm->vmcb->save.cr3 = root; vcpu->svm->vmcb->save.cr3 = root;
...@@ -1727,6 +1736,12 @@ static void svm_inject_page_fault(struct kvm_vcpu *vcpu, ...@@ -1727,6 +1736,12 @@ static void svm_inject_page_fault(struct kvm_vcpu *vcpu,
static int is_disabled(void) static int is_disabled(void)
{ {
u64 vm_cr;
rdmsrl(MSR_VM_CR, vm_cr);
if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
return 1;
return 0; return 0;
} }
......
...@@ -175,8 +175,11 @@ struct __attribute__ ((__packed__)) vmcb { ...@@ -175,8 +175,11 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_CPUID_FUNC 0x8000000a #define SVM_CPUID_FUNC 0x8000000a
#define MSR_EFER_SVME_MASK (1ULL << 12) #define MSR_EFER_SVME_MASK (1ULL << 12)
#define MSR_VM_CR 0xc0010114
#define MSR_VM_HSAVE_PA 0xc0010117ULL #define MSR_VM_HSAVE_PA 0xc0010117ULL
#define SVM_VM_CR_SVM_DISABLE 4
#define SVM_SELECTOR_S_SHIFT 4 #define SVM_SELECTOR_S_SHIFT 4
#define SVM_SELECTOR_DPL_SHIFT 5 #define SVM_SELECTOR_DPL_SHIFT 5
#define SVM_SELECTOR_P_SHIFT 7 #define SVM_SELECTOR_P_SHIFT 7
......
This diff is collapsed.
...@@ -98,8 +98,11 @@ static u8 opcode_table[256] = { ...@@ -98,8 +98,11 @@ static u8 opcode_table[256] = {
0, 0, 0, 0, 0, 0, 0, 0,
/* 0x40 - 0x4F */ /* 0x40 - 0x4F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x50 - 0x5F */ /* 0x50 - 0x57 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0x58 - 0x5F */
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
/* 0x60 - 0x6F */ /* 0x60 - 0x6F */
0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
...@@ -128,9 +131,9 @@ static u8 opcode_table[256] = { ...@@ -128,9 +131,9 @@ static u8 opcode_table[256] = {
/* 0xB0 - 0xBF */ /* 0xB0 - 0xBF */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xC0 - 0xC7 */ /* 0xC0 - 0xC7 */
ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, 0, 0, ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM,
0, 0, ByteOp | DstMem | SrcImm | ModRM | Mov, 0, ImplicitOps, 0, 0,
DstMem | SrcImm | ModRM | Mov, ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
/* 0xC8 - 0xCF */ /* 0xC8 - 0xCF */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xD0 - 0xD7 */ /* 0xD0 - 0xD7 */
...@@ -143,7 +146,8 @@ static u8 opcode_table[256] = { ...@@ -143,7 +146,8 @@ static u8 opcode_table[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
/* 0xF0 - 0xF7 */ /* 0xF0 - 0xF7 */
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, ImplicitOps, 0,
ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
/* 0xF8 - 0xFF */ /* 0xF8 - 0xFF */
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM 0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM
...@@ -152,7 +156,7 @@ static u8 opcode_table[256] = { ...@@ -152,7 +156,7 @@ static u8 opcode_table[256] = {
static u16 twobyte_table[256] = { static u16 twobyte_table[256] = {
/* 0x00 - 0x0F */ /* 0x00 - 0x0F */
0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0, 0, SrcMem | ModRM | DstReg, 0, 0, 0, 0, ImplicitOps, 0,
0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
/* 0x10 - 0x1F */ /* 0x10 - 0x1F */
0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
/* 0x20 - 0x2F */ /* 0x20 - 0x2F */
...@@ -481,6 +485,7 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) ...@@ -481,6 +485,7 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
int mode = ctxt->mode; int mode = ctxt->mode;
unsigned long modrm_ea; unsigned long modrm_ea;
int use_modrm_ea, index_reg = 0, base_reg = 0, scale, rip_relative = 0; int use_modrm_ea, index_reg = 0, base_reg = 0, scale, rip_relative = 0;
int no_wb = 0;
/* Shadow copy of register state. Committed on successful emulation. */ /* Shadow copy of register state. Committed on successful emulation. */
unsigned long _regs[NR_VCPU_REGS]; unsigned long _regs[NR_VCPU_REGS];
...@@ -1047,7 +1052,7 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) ...@@ -1047,7 +1052,7 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
_regs[VCPU_REGS_RSP]), _regs[VCPU_REGS_RSP]),
&dst.val, dst.bytes, ctxt)) != 0) &dst.val, dst.bytes, ctxt)) != 0)
goto done; goto done;
dst.val = dst.orig_val; /* skanky: disable writeback */ no_wb = 1;
break; break;
default: default:
goto cannot_emulate; goto cannot_emulate;
...@@ -1056,7 +1061,7 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) ...@@ -1056,7 +1061,7 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
} }
writeback: writeback:
if ((d & Mov) || (dst.orig_val != dst.val)) { if (!no_wb) {
switch (dst.type) { switch (dst.type) {
case OP_REG: case OP_REG:
/* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */ /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
...@@ -1149,6 +1154,23 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) ...@@ -1149,6 +1154,23 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
case 0xae ... 0xaf: /* scas */ case 0xae ... 0xaf: /* scas */
DPRINTF("Urk! I don't handle SCAS.\n"); DPRINTF("Urk! I don't handle SCAS.\n");
goto cannot_emulate; goto cannot_emulate;
case 0xf4: /* hlt */
ctxt->vcpu->halt_request = 1;
goto done;
case 0xc3: /* ret */
dst.ptr = &_eip;
goto pop_instruction;
case 0x58 ... 0x5f: /* pop reg */
dst.ptr = (unsigned long *)&_regs[b & 0x7];
pop_instruction:
if ((rc = ops->read_std(register_address(ctxt->ss_base,
_regs[VCPU_REGS_RSP]), dst.ptr, op_bytes, ctxt)) != 0)
goto done;
register_address_increment(_regs[VCPU_REGS_RSP], op_bytes);
no_wb = 1; /* Disable writeback. */
break;
} }
goto writeback; goto writeback;
...@@ -1302,8 +1324,10 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) ...@@ -1302,8 +1324,10 @@ x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
twobyte_special_insn: twobyte_special_insn:
/* Disable writeback. */ /* Disable writeback. */
dst.orig_val = dst.val; no_wb = 1;
switch (b) { switch (b) {
case 0x09: /* wbinvd */
break;
case 0x0d: /* GrpP (prefetch) */ case 0x0d: /* GrpP (prefetch) */
case 0x18: /* Grp16 (prefetch/nop) */ case 0x18: /* Grp16 (prefetch/nop) */
break; break;
......
...@@ -139,6 +139,7 @@ int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile, ...@@ -139,6 +139,7 @@ int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile,
put_filp(file); put_filp(file);
return error; return error;
} }
EXPORT_SYMBOL_GPL(anon_inode_getfd);
/* /*
* A single inode exists for all anon_inode files. Contrary to pipes, * A single inode exists for all anon_inode files. Contrary to pipes,
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
#define HPFS_SUPER_MAGIC 0xf995e849 #define HPFS_SUPER_MAGIC 0xf995e849
#define ISOFS_SUPER_MAGIC 0x9660 #define ISOFS_SUPER_MAGIC 0x9660
#define JFFS2_SUPER_MAGIC 0x72b6 #define JFFS2_SUPER_MAGIC 0x72b6
#define KVMFS_SUPER_MAGIC 0x19700426
#define ANON_INODE_FS_MAGIC 0x09041934 #define ANON_INODE_FS_MAGIC 0x09041934
#define MINIX_SUPER_MAGIC 0x137F /* original minix fs */ #define MINIX_SUPER_MAGIC 0x137F /* original minix fs */
......
...@@ -196,6 +196,8 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, ...@@ -196,6 +196,8 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ #define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */
#define CPU_LOCK_ACQUIRE 0x0008 /* Acquire all hotcpu locks */ #define CPU_LOCK_ACQUIRE 0x0008 /* Acquire all hotcpu locks */
#define CPU_LOCK_RELEASE 0x0009 /* Release all hotcpu locks */ #define CPU_LOCK_RELEASE 0x0009 /* Release all hotcpu locks */
#define CPU_DYING 0x000A /* CPU (unsigned)v not running any task,
* not handling interrupts, soon dead */
/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend /* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
* operation in progress * operation in progress
...@@ -208,6 +210,7 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, ...@@ -208,6 +210,7 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN) #define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) #define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) #define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN)
#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN)
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _LINUX_NOTIFIER_H */ #endif /* _LINUX_NOTIFIER_H */
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
*/ */
#include <linux/errno.h> #include <linux/errno.h>
#include <asm/system.h>
extern void cpu_idle(void); extern void cpu_idle(void);
...@@ -102,7 +103,11 @@ static inline void smp_send_reschedule(int cpu) { } ...@@ -102,7 +103,11 @@ static inline void smp_send_reschedule(int cpu) { }
static inline int smp_call_function_single(int cpuid, void (*func) (void *info), static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
void *info, int retry, int wait) void *info, int retry, int wait)
{ {
return -EBUSY; WARN_ON(cpuid != 0);
local_irq_disable();
func(info);
local_irq_enable();
return 0;
} }
#endif /* !SMP */ #endif /* !SMP */
......
...@@ -103,11 +103,19 @@ static inline void check_for_tasks(int cpu) ...@@ -103,11 +103,19 @@ static inline void check_for_tasks(int cpu)
write_unlock_irq(&tasklist_lock); write_unlock_irq(&tasklist_lock);
} }
struct take_cpu_down_param {
unsigned long mod;
void *hcpu;
};
/* Take this CPU down. */ /* Take this CPU down. */
static int take_cpu_down(void *unused) static int take_cpu_down(void *_param)
{ {
struct take_cpu_down_param *param = _param;
int err; int err;
raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
param->hcpu);
/* Ensure this CPU doesn't handle any more interrupts. */ /* Ensure this CPU doesn't handle any more interrupts. */
err = __cpu_disable(); err = __cpu_disable();
if (err < 0) if (err < 0)
...@@ -127,6 +135,10 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) ...@@ -127,6 +135,10 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
cpumask_t old_allowed, tmp; cpumask_t old_allowed, tmp;
void *hcpu = (void *)(long)cpu; void *hcpu = (void *)(long)cpu;
unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
struct take_cpu_down_param tcd_param = {
.mod = mod,
.hcpu = hcpu,
};
if (num_online_cpus() == 1) if (num_online_cpus() == 1)
return -EBUSY; return -EBUSY;
...@@ -153,7 +165,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen) ...@@ -153,7 +165,7 @@ static int _cpu_down(unsigned int cpu, int tasks_frozen)
set_cpus_allowed(current, tmp); set_cpus_allowed(current, tmp);
mutex_lock(&cpu_bitmask_lock); mutex_lock(&cpu_bitmask_lock);
p = __stop_machine_run(take_cpu_down, NULL, cpu); p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
mutex_unlock(&cpu_bitmask_lock); mutex_unlock(&cpu_bitmask_lock);
if (IS_ERR(p) || cpu_online(cpu)) { if (IS_ERR(p) || cpu_online(cpu)) {
......
...@@ -2138,6 +2138,9 @@ static void common_cpu_mem_hotplug_unplug(void) ...@@ -2138,6 +2138,9 @@ static void common_cpu_mem_hotplug_unplug(void)
static int cpuset_handle_cpuhp(struct notifier_block *nb, static int cpuset_handle_cpuhp(struct notifier_block *nb,
unsigned long phase, void *cpu) unsigned long phase, void *cpu)
{ {
if (phase == CPU_DYING || phase == CPU_DYING_FROZEN)
return NOTIFY_DONE;
common_cpu_mem_hotplug_unplug(); common_cpu_mem_hotplug_unplug();
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment