Commit 6cd8e300 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'kvm-updates/2.6.31' of git://git.kernel.org/pub/scm/virt/kvm/kvm

* 'kvm-updates/2.6.31' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (138 commits)
  KVM: Prevent overflow in largepages calculation
  KVM: Disable large pages on misaligned memory slots
  KVM: Add VT-x machine check support
  KVM: VMX: Rename rmode.active to rmode.vm86_active
  KVM: Move "exit due to NMI" handling into vmx_complete_interrupts()
  KVM: Disable CR8 intercept if tpr patching is active
  KVM: Do not migrate pending software interrupts.
  KVM: inject NMI after IRET from a previous NMI, not before.
  KVM: Always request IRQ/NMI window if an interrupt is pending
  KVM: Do not re-execute INTn instruction.
  KVM: skip_emulated_instruction() decode instruction if size is not known
  KVM: Remove irq_pending bitmap
  KVM: Do not allow interrupt injection from userspace if there is a pending event.
  KVM: Unprotect a page if #PF happens during NMI injection.
  KVM: s390: Verify memory in kvm run
  KVM: s390: Sanity check on validity intercept
  KVM: s390: Unlink vcpu on destroy - v2
  KVM: s390: optimize float int lock: spin_lock_bh --> spin_lock
  KVM: s390: use hrtimer for clock wakeup from idle - v2
  KVM: s390: Fix memory slot versus run - v3
  ...
parents ddbb8684 09f8ca74
......@@ -371,6 +371,7 @@ struct kvm_vcpu_arch {
int last_run_cpu;
int vmm_tr_slot;
int vm_tr_slot;
int sn_rtc_tr_slot;
#define KVM_MP_STATE_RUNNABLE 0
#define KVM_MP_STATE_UNINITIALIZED 1
......@@ -465,6 +466,7 @@ struct kvm_arch {
unsigned long vmm_init_rr;
int online_vcpus;
int is_sn2;
struct kvm_ioapic *vioapic;
struct kvm_vm_stat stat;
......@@ -472,6 +474,7 @@ struct kvm_arch {
struct list_head assigned_dev_head;
struct iommu_domain *iommu_domain;
int iommu_flags;
struct hlist_head irq_ack_notifier_list;
unsigned long irq_sources_bitmap;
......@@ -578,6 +581,8 @@ struct kvm_vmm_info{
kvm_vmm_entry *vmm_entry;
kvm_tramp_entry *tramp_entry;
unsigned long vmm_ivt;
unsigned long patch_mov_ar;
unsigned long patch_mov_ar_sn2;
};
int kvm_highest_pending_irq(struct kvm_vcpu *vcpu);
......@@ -585,7 +590,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu);
int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
void kvm_sal_emul(struct kvm_vcpu *vcpu);
static inline void kvm_inject_nmi(struct kvm_vcpu *vcpu) {}
#endif /* __ASSEMBLY__*/
#endif
......@@ -146,6 +146,8 @@
#define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX)
#define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX)
#define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX)
#define PAGE_KERNEL_UC __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX | \
_PAGE_MA_UC)
# ifndef __ASSEMBLY__
......
......@@ -610,6 +610,9 @@ static struct irqaction ipi_irqaction = {
.name = "IPI"
};
/*
* KVM uses this interrupt to force a cpu out of guest mode
*/
static struct irqaction resched_irqaction = {
.handler = dummy_handler,
.flags = IRQF_DISABLED,
......
......@@ -23,7 +23,7 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM && EXPERIMENTAL
depends on HAVE_KVM && MODULES && EXPERIMENTAL
# for device assignment:
depends on PCI
select PREEMPT_NOTIFIERS
......
This diff is collapsed.
......@@ -21,6 +21,9 @@
#include <linux/kvm_host.h>
#include <linux/smp.h>
#include <asm/sn/addrs.h>
#include <asm/sn/clksupport.h>
#include <asm/sn/shub_mmr.h>
#include "vti.h"
#include "misc.h"
......@@ -188,12 +191,35 @@ static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
return result;
}
static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
/*
* On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2
* RTC is used instead. This function patches the ratios from SAL
* to match the RTC before providing them to the guest.
*/
static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result)
{
struct pal_freq_ratio *ratio;
unsigned long sal_freq, sal_drift, factor;
result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
&sal_freq, &sal_drift);
ratio = (struct pal_freq_ratio *)&result->v2;
factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) /
sn_rtc_cycles_per_second;
ratio->num = 3;
ratio->den = factor;
}
static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
{
struct ia64_pal_retval result;
PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
if (vcpu->kvm->arch.is_sn2)
sn2_patch_itc_freq_ratios(&result);
return result;
}
......
......@@ -20,6 +20,10 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu);
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig);
int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, int dest, int dest_mode);
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
#define kvm_apic_present(x) (true)
#endif
......@@ -11,6 +11,7 @@
#include <asm/asmmacro.h>
#include <asm/processor.h>
#include <asm/kvm_host.h>
#include "vti.h"
#include "asm-offsets.h"
......@@ -140,6 +141,35 @@ GLOBAL_ENTRY(kvm_asm_mov_from_ar)
;;
END(kvm_asm_mov_from_ar)
/*
* Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC
* clock as it's source for emulating the ITC. This version will be
* copied on top of the original version if the host is determined to
* be an SN2.
*/
GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2)
add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT))
add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
extr.u r17=r25,6,7
mov r24=b0
;;
ld8 r18=[r18]
ld8 r19=[r19]
addl r20=@gprel(asm_mov_to_reg),gp
;;
add r19=r19,r18
shladd r17=r17,4,r20
;;
adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
st8 [r16] = r19
mov b0=r17
br.sptk.few b0
;;
END(kvm_asm_mov_from_ar_sn2)
// mov r1=rr[r3]
GLOBAL_ENTRY(kvm_asm_mov_from_rr)
......
......@@ -652,20 +652,25 @@ void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
unsigned long isr, unsigned long iim)
{
struct kvm_vcpu *v = current_vcpu;
long psr;
if (ia64_psr(regs)->cpl == 0) {
/* Allow hypercalls only when cpl = 0. */
if (iim == DOMN_PAL_REQUEST) {
local_irq_save(psr);
set_pal_call_data(v);
vmm_transition(v);
get_pal_call_result(v);
vcpu_increment_iip(v);
local_irq_restore(psr);
return;
} else if (iim == DOMN_SAL_REQUEST) {
local_irq_save(psr);
set_sal_call_data(v);
vmm_transition(v);
get_sal_call_result(v);
vcpu_increment_iip(v);
local_irq_restore(psr);
return;
}
}
......
......@@ -788,13 +788,29 @@ void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
setfpreg(reg, val, regs); /* FIXME: handle NATs later*/
}
/*
* The Altix RTC is mapped specially here for the vmm module
*/
#define SN_RTC_BASE (u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT))
static long kvm_get_itc(struct kvm_vcpu *vcpu)
{
#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
struct kvm *kvm = (struct kvm *)KVM_VM_BASE;
if (kvm->arch.is_sn2)
return (*SN_RTC_BASE);
else
#endif
return ia64_getreg(_IA64_REG_AR_ITC);
}
/************************************************************************
* lsapic timer
***********************************************************************/
u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
{
unsigned long guest_itc;
guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC);
guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu);
if (guest_itc >= VMX(vcpu, last_itc)) {
VMX(vcpu, last_itc) = guest_itc;
......@@ -809,7 +825,7 @@ static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
struct kvm_vcpu *v;
struct kvm *kvm;
int i;
long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC);
long itc_offset = val - kvm_get_itc(vcpu);
unsigned long vitv = VCPU(vcpu, itv);
kvm = (struct kvm *)KVM_VM_BASE;
......
......@@ -30,15 +30,19 @@ MODULE_AUTHOR("Intel");
MODULE_LICENSE("GPL");
extern char kvm_ia64_ivt;
extern char kvm_asm_mov_from_ar;
extern char kvm_asm_mov_from_ar_sn2;
extern fpswa_interface_t *vmm_fpswa_interface;
long vmm_sanity = 1;
struct kvm_vmm_info vmm_info = {
.module = THIS_MODULE,
.vmm_entry = vmm_entry,
.tramp_entry = vmm_trampoline,
.vmm_ivt = (unsigned long)&kvm_ia64_ivt,
.module = THIS_MODULE,
.vmm_entry = vmm_entry,
.tramp_entry = vmm_trampoline,
.vmm_ivt = (unsigned long)&kvm_ia64_ivt,
.patch_mov_ar = (unsigned long)&kvm_asm_mov_from_ar,
.patch_mov_ar_sn2 = (unsigned long)&kvm_asm_mov_from_ar_sn2,
};
static int __init kvm_vmm_init(void)
......
......@@ -95,7 +95,7 @@ GLOBAL_ENTRY(kvm_vmm_panic)
;;
srlz.i // guarantee that interruption collection is on
;;
//(p15) ssm psr.i // restore psr.i
(p15) ssm psr.i // restore psr.
addl r14=@gprel(ia64_leave_hypervisor),gp
;;
KVM_SAVE_REST
......@@ -249,7 +249,7 @@ ENTRY(kvm_break_fault)
;;
srlz.i // guarantee that interruption collection is on
;;
//(p15)ssm psr.i // restore psr.i
(p15)ssm psr.i // restore psr.i
addl r14=@gprel(ia64_leave_hypervisor),gp
;;
KVM_SAVE_REST
......@@ -439,7 +439,7 @@ kvm_dispatch_vexirq:
;;
srlz.i // guarantee that interruption collection is on
;;
//(p15) ssm psr.i // restore psr.i
(p15) ssm psr.i // restore psr.i
adds r3=8,r2 // set up second base pointer
;;
KVM_SAVE_REST
......@@ -819,7 +819,7 @@ ENTRY(kvm_dtlb_miss_dispatch)
;;
srlz.i // guarantee that interruption collection is on
;;
//(p15) ssm psr.i // restore psr.i
(p15) ssm psr.i // restore psr.i
addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
;;
KVM_SAVE_REST
......@@ -842,7 +842,7 @@ ENTRY(kvm_itlb_miss_dispatch)
;;
srlz.i // guarantee that interruption collection is on
;;
//(p15) ssm psr.i // restore psr.i
(p15) ssm psr.i // restore psr.i
addl r14=@gprel(ia64_leave_hypervisor),gp
;;
KVM_SAVE_REST
......@@ -871,7 +871,7 @@ ENTRY(kvm_dispatch_reflection)
;;
srlz.i // guarantee that interruption collection is on
;;
//(p15) ssm psr.i // restore psr.i
(p15) ssm psr.i // restore psr.i
addl r14=@gprel(ia64_leave_hypervisor),gp
;;
KVM_SAVE_REST
......@@ -898,7 +898,7 @@ ENTRY(kvm_dispatch_virtualization_fault)
;;
srlz.i // guarantee that interruption collection is on
;;
//(p15) ssm psr.i // restore psr.i
(p15) ssm psr.i // restore psr.i
addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
;;
KVM_SAVE_REST
......@@ -920,7 +920,7 @@ ENTRY(kvm_dispatch_interrupt)
;;
srlz.i
;;
//(p15) ssm psr.i
(p15) ssm psr.i
addl r14=@gprel(ia64_leave_hypervisor),gp
;;
KVM_SAVE_REST
......@@ -1333,7 +1333,7 @@ hostret = r24
;;
(p7) srlz.i
;;
//(p6) ssm psr.i
(p6) ssm psr.i
;;
mov rp=rpsave
mov ar.pfs=pfssave
......
......@@ -254,7 +254,8 @@ u64 guest_vhpt_lookup(u64 iha, u64 *pte)
"(p7) st8 [%2]=r9;;"
"ssm psr.ic;;"
"srlz.d;;"
/* "ssm psr.i;;" Once interrupts in vmm open, need fix*/
"ssm psr.i;;"
"srlz.d;;"
: "=r"(ret) : "r"(iha), "r"(pte):"memory");
return ret;
......
......@@ -41,6 +41,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
return !!(v->arch.pending_exceptions);
}
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
{
/* do real check here */
return 1;
}
int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
{
return !(v->arch.msr & MSR_WE);
......
......@@ -13,6 +13,8 @@
#ifndef ASM_KVM_HOST_H
#define ASM_KVM_HOST_H
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kvm_host.h>
#include <asm/debug.h>
#include <asm/cpuid.h>
......@@ -210,7 +212,8 @@ struct kvm_vcpu_arch {
s390_fp_regs guest_fpregs;
unsigned int guest_acrs[NUM_ACRS];
struct kvm_s390_local_interrupt local_int;
struct timer_list ckc_timer;
struct hrtimer ckc_timer;
struct tasklet_struct tasklet;
union {
cpuid_t cpu_id;
u64 stidp_data;
......
......@@ -154,17 +154,25 @@ static int handle_stop(struct kvm_vcpu *vcpu)
static int handle_validity(struct kvm_vcpu *vcpu)
{
int viwhy = vcpu->arch.sie_block->ipb >> 16;
int rc;
vcpu->stat.exit_validity++;
if (viwhy == 0x37) {
fault_in_pages_writeable((char __user *)
vcpu->kvm->arch.guest_origin +
vcpu->arch.sie_block->prefix,
PAGE_SIZE);
return 0;
}
VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
viwhy);
return -ENOTSUPP;
if ((viwhy == 0x37) && (vcpu->arch.sie_block->prefix
<= vcpu->kvm->arch.guest_memsize - 2*PAGE_SIZE)){
rc = fault_in_pages_writeable((char __user *)
vcpu->kvm->arch.guest_origin +
vcpu->arch.sie_block->prefix,
2*PAGE_SIZE);
if (rc)
/* user will receive sigsegv, exit to user */
rc = -ENOTSUPP;
} else
rc = -ENOTSUPP;
if (rc)
VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d",
viwhy);
return rc;
}
static int handle_instruction(struct kvm_vcpu *vcpu)
......
......@@ -12,6 +12,8 @@
#include <asm/lowcore.h>
#include <asm/uaccess.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kvm_host.h>
#include <linux/signal.h>
#include "kvm-s390.h"
......@@ -299,13 +301,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
}
if ((!rc) && atomic_read(&fi->active)) {
spin_lock_bh(&fi->lock);
spin_lock(&fi->lock);
list_for_each_entry(inti, &fi->list, list)
if (__interrupt_is_deliverable(vcpu, inti)) {
rc = 1;
break;
}
spin_unlock_bh(&fi->lock);
spin_unlock(&fi->lock);
}
if ((!rc) && (vcpu->arch.sie_block->ckc <
......@@ -318,6 +320,12 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
return rc;
}
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
{
/* do real check here */
return 1;
}
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
return 0;
......@@ -355,14 +363,12 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
return 0;
}
sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1;
sltime = ((vcpu->arch.sie_block->ckc - now)*125)>>9;
vcpu->arch.ckc_timer.expires = jiffies + sltime;
add_timer(&vcpu->arch.ckc_timer);
VCPU_EVENT(vcpu, 5, "enabled wait timer:%llx jiffies", sltime);
hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
no_timer:
spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
spin_lock(&vcpu->arch.local_int.float_int->lock);
spin_lock_bh(&vcpu->arch.local_int.lock);
add_wait_queue(&vcpu->arch.local_int.wq, &wait);
while (list_empty(&vcpu->arch.local_int.list) &&
......@@ -371,33 +377,46 @@ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
!signal_pending(current)) {
set_current_state(TASK_INTERRUPTIBLE);
spin_unlock_bh(&vcpu->arch.local_int.lock);
spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
spin_unlock(&vcpu->arch.local_int.float_int->lock);
vcpu_put(vcpu);
schedule();
vcpu_load(vcpu);
spin_lock_bh(&vcpu->arch.local_int.float_int->lock);
spin_lock(&vcpu->arch.local_int.float_int->lock);
spin_lock_bh(&vcpu->arch.local_int.lock);
}
__unset_cpu_idle(vcpu);
__set_current_state(TASK_RUNNING);
remove_wait_queue(&vcpu->wq, &wait);
spin_unlock_bh(&vcpu->arch.local_int.lock);
spin_unlock_bh(&vcpu->arch.local_int.float_int->lock);
del_timer(&vcpu->arch.ckc_timer);
spin_unlock(&vcpu->arch.local_int.float_int->lock);
hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
return 0;
}
void kvm_s390_idle_wakeup(unsigned long data)
void kvm_s390_tasklet(unsigned long parm)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm;
spin_lock_bh(&vcpu->arch.local_int.lock);
spin_lock(&vcpu->arch.local_int.lock);
vcpu->arch.local_int.timer_due = 1;
if (waitqueue_active(&vcpu->arch.local_int.wq))
wake_up_interruptible(&vcpu->arch.local_int.wq);
spin_unlock_bh(&vcpu->arch.local_int.lock);
spin_unlock(&vcpu->arch.local_int.lock);
}
/*
* low level hrtimer wake routine. Because this runs in hardirq context
* we schedule a tasklet to do the real work.
*/
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
tasklet_schedule(&vcpu->arch.tasklet);
return HRTIMER_NORESTART;
}
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
{
......@@ -436,7 +455,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
if (atomic_read(&fi->active)) {
do {
deliver = 0;
spin_lock_bh(&fi->lock);
spin_lock(&fi->lock);
list_for_each_entry_safe(inti, n, &fi->list, list) {
if (__interrupt_is_deliverable(vcpu, inti)) {
list_del(&inti->list);
......@@ -447,7 +466,7 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
}
if (list_empty(&fi->list))
atomic_set(&fi->active, 0);
spin_unlock_bh(&fi->lock);
spin_unlock(&fi->lock);
if (deliver) {
__do_deliver_interrupt(vcpu, inti);
kfree(inti);
......@@ -512,7 +531,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
mutex_lock(&kvm->lock);
fi = &kvm->arch.float_int;
spin_lock_bh(&fi->lock);
spin_lock(&fi->lock);
list_add_tail(&inti->list, &fi->list);
atomic_set(&fi->active, 1);
sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
......@@ -529,7 +548,7 @@ int kvm_s390_inject_vm(struct kvm *kvm,
if (waitqueue_active(&li->wq))
wake_up_interruptible(&li->wq);
spin_unlock_bh(&li->lock);
spin_unlock_bh(&fi->lock);
spin_unlock(&fi->lock);
mutex_unlock(&kvm->lock);
return 0;
}
......
......@@ -15,6 +15,7 @@
#include <linux/compiler.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/hrtimer.h>
#include <linux/init.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
......@@ -195,6 +196,10 @@ struct kvm *kvm_arch_create_vm(void)
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
(__u64) vcpu->arch.sie_block)
vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
smp_mb();
free_page((unsigned long)(vcpu->arch.sie_block));
kvm_vcpu_uninit(vcpu);
kfree(vcpu);
......@@ -283,8 +288,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gmsor = vcpu->kvm->arch.guest_origin;
vcpu->arch.sie_block->ecb = 2;
vcpu->arch.sie_block->eca = 0xC1002001U;
setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup,
(unsigned long) vcpu);
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
(unsigned long) vcpu);
vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
get_cpu_id(&vcpu->arch.cpu_id);
vcpu->arch.cpu_id.version = 0xff;
return 0;
......@@ -307,19 +314,21 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.sie_block->icpua = id;
BUG_ON(!kvm->arch.sca);
BUG_ON(kvm->arch.sca->cpu[id].sda);
kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
if (!kvm->arch.sca->cpu[id].sda)
kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
else
BUG_ON(!kvm->vcpus[id]); /* vcpu does already exist */
vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32);
vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
spin_lock_init(&vcpu->arch.local_int.lock);
INIT_LIST_HEAD(&vcpu->arch.local_int.list);
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
spin_lock_bh(&kvm->arch.float_int.lock);
spin_lock(&kvm->arch.float_int.lock);
kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
init_waitqueue_head(&vcpu->arch.local_int.wq);
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
spin_unlock_bh(&kvm->arch.float_int.lock);
spin_unlock(&kvm->arch.float_int.lock);
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
......@@ -478,6 +487,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
vcpu_load(vcpu);
/* verify, that memory has been registered */
if (!vcpu->kvm->arch.guest_memsize) {
vcpu_put(vcpu);
return -EINVAL;
}
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
......@@ -657,6 +672,8 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
struct kvm_memory_slot old,
int user_alloc)
{
int i;
/* A few sanity checks. We can have exactly one memory slot which has
to start at guest virtual zero and which has to be located at a
page boundary in userland and which has to end at a page boundary.
......@@ -664,7 +681,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
vmas. It is okay to mmap() and munmap() stuff in this slot after
doing this call at any time */
if (mem->slot)
if (mem->slot || kvm->arch.guest_memsize)
return -EINVAL;
if (mem->guest_phys_addr)
......@@ -676,15 +693,39 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
if (mem->memory_size & (PAGE_SIZE - 1))
return -EINVAL;
if (!user_alloc)
return -EINVAL;
/* lock all vcpus */
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (!kvm->vcpus[i])
continue;
if (!mutex_trylock(&kvm->vcpus[i]->mutex))
goto fail_out;
}
kvm->arch.guest_origin = mem->userspace_addr;
kvm->arch.guest_memsize = mem->memory_size;
/* FIXME: we do want to interrupt running CPUs and update their memory
configuration now to avoid race conditions. But hey, changing the
memory layout while virtual CPUs are running is usually bad
programming practice. */
/* update sie control blocks, and unlock all vcpus */
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (kvm->vcpus[i]) {
kvm->vcpus[i]->arch.sie_block->gmsor =
kvm->arch.guest_origin;
kvm->vcpus[i]->arch.sie_block->gmslm =
kvm->arch.guest_memsize +
kvm->arch.guest_origin +
VIRTIODESCSPACE - 1ul;
mutex_unlock(&kvm->vcpus[i]->mutex);
}
}
return 0;
fail_out:
for (; i >= 0; i--)
mutex_unlock(&kvm->vcpus[i]->mutex);
return -EINVAL;
}
void kvm_arch_flush_shadow(struct kvm *kvm)
......
......@@ -14,6 +14,7 @@
#ifndef ARCH_S390_KVM_S390_H
#define ARCH_S390_KVM_S390_H
#include <linux/hrtimer.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
......@@ -41,7 +42,8 @@ static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu)
}
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
void kvm_s390_idle_wakeup(unsigned long data);
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
void kvm_s390_tasklet(unsigned long parm);
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
int kvm_s390_inject_vm(struct kvm *kvm,
struct kvm_s390_interrupt *s390int);
......
......@@ -204,11 +204,11 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
int cpus = 0;
int n;
spin_lock_bh(&fi->lock);
spin_lock(&fi->lock);
for (n = 0; n < KVM_MAX_VCPUS; n++)
if (fi->local_int[n])
cpus++;
spin_unlock_bh(&fi->lock);
spin_unlock(&fi->lock);
/* deal with other level 3 hypervisors */
if (stsi(mem, 3, 2, 2) == -ENOSYS)
......
......@@ -52,7 +52,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
if (cpu_addr >= KVM_MAX_VCPUS)
return 3; /* not operational */
spin_lock_bh(&fi->lock);
spin_lock(&fi->lock);
if (fi->local_int[cpu_addr] == NULL)
rc = 3; /* not operational */
else if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
......@@ -64,7 +64,7 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
*reg |= SIGP_STAT_STOPPED;
rc = 1; /* status stored */
}
spin_unlock_bh(&fi->lock);
spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
return rc;
......@@ -86,7 +86,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
inti->type = KVM_S390_INT_EMERGENCY;
spin_lock_bh(&fi->lock);
spin_lock(&fi->lock);
li = fi->local_int[cpu_addr];
if (li == NULL) {
rc = 3; /* not operational */
......@@ -102,7 +102,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
spin_unlock_bh(&li->lock);
rc = 0; /* order accepted */
unlock:
spin_unlock_bh(&fi->lock);
spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
return rc;
}
......@@ -123,7 +123,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
inti->type = KVM_S390_SIGP_STOP;
spin_lock_bh(&fi->lock);
spin_lock(&fi->lock);
li = fi->local_int[cpu_addr];
if (li == NULL) {
rc = 3; /* not operational */
......@@ -142,7 +142,7 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store)
spin_unlock_bh(&li->lock);
rc = 0; /* order accepted */
unlock:
spin_unlock_bh(&fi->lock);
spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
return rc;
}
......@@ -188,7 +188,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
if (!inti)
return 2; /* busy */
spin_lock_bh(&fi->lock);
spin_lock(&fi->lock);
li = fi->local_int[cpu_addr];
if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) {
......@@ -220,7 +220,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
out_li:
spin_unlock_bh(&li->lock);
out_fi:
spin_unlock_bh(&fi->lock);
spin_unlock(&fi->lock);
return rc;
}
......
......@@ -116,6 +116,8 @@
#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */
#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */
#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
#define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */
#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */
#define X86_FEATURE_AES (4*32+25) /* AES instructions */
#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */
......
......@@ -16,6 +16,7 @@
#define __KVM_HAVE_MSI
#define __KVM_HAVE_USER_NMI
#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_MSIX
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
......
......@@ -185,6 +185,7 @@ union kvm_mmu_page_role {
unsigned access:3;
unsigned invalid:1;
unsigned cr4_pge:1;
unsigned nxe:1;
};
};
......@@ -212,7 +213,6 @@ struct kvm_mmu_page {
int multimapped; /* More than one parent_pte? */
int root_count; /* Currently serving as active root */
bool unsync;
bool global;
unsigned int unsync_children;
union {
u64 *parent_pte; /* !multimapped */
......@@ -261,13 +261,11 @@ struct kvm_mmu {
union kvm_mmu_page_role base_role;
u64 *pae_root;
u64 rsvd_bits_mask[2][4];
};
struct kvm_vcpu_arch {
u64 host_tsc;
int interrupt_window_open;
unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
/*
* rip and regs accesses must go through
* kvm_{register,rip}_{read,write} functions.
......@@ -286,6 +284,7 @@ struct kvm_vcpu_arch {
u64 shadow_efer;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
int32_t apic_arb_prio;
int mp_state;
int sipi_vector;
u64 ia32_misc_enable_msr;
......@@ -320,6 +319,8 @@ struct kvm_vcpu_arch {
struct kvm_pio_request pio;
void *pio_data;
u8 event_exit_inst_len;
struct kvm_queued_exception {
bool pending;
bool has_error_code;
......@@ -329,11 +330,12 @@ struct kvm_vcpu_arch {
struct kvm_queued_interrupt {
bool pending;
bool soft;
u8 nr;
} interrupt;
struct {
int active;
int vm86_active;
u8 save_iopl;
struct kvm_save_segment {
u16 selector;
......@@ -356,9 +358,9 @@ struct kvm_vcpu_arch {
unsigned int time_offset;
struct page *time_page;
bool singlestep; /* guest is single stepped by KVM */
bool nmi_pending;
bool nmi_injected;
bool nmi_window_open;
struct mtrr_state_type mtrr_state;
u32 pat;
......@@ -392,15 +394,14 @@ struct kvm_arch{
*/
struct list_head active_mmu_pages;
struct list_head assigned_dev_head;
struct list_head oos_global_pages;
struct iommu_domain *iommu_domain;
int iommu_flags;
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
struct kvm_pit *vpit;
struct hlist_head irq_ack_notifier_list;
int vapics_in_nmi_mode;
int round_robin_prev_vcpu;
unsigned int tss_addr;
struct page *apic_access_page;
......@@ -423,7 +424,6 @@ struct kvm_vm_stat {
u32 mmu_recycled;
u32 mmu_cache_miss;
u32 mmu_unsync;
u32 mmu_unsync_global;
u32 remote_tlb_flush;
u32 lpages;
};
......@@ -443,7 +443,6 @@ struct kvm_vcpu_stat {
u32 halt_exits;
u32 halt_wakeup;
u32 request_irq_exits;
u32 request_nmi_exits;
u32 irq_exits;
u32 host_state_reload;
u32 efer_reload;
......@@ -511,20 +510,22 @@ struct kvm_x86_ops {
void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
void (*patch_hypercall)(struct kvm_vcpu *vcpu,
unsigned char *hypercall_addr);
int (*get_irq)(struct kvm_vcpu *vcpu);
void (*set_irq)(struct kvm_vcpu *vcpu, int vec);
void (*set_irq)(struct kvm_vcpu *vcpu);
void (*set_nmi)(struct kvm_vcpu *vcpu);
void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
bool has_error_code, u32 error_code);
bool (*exception_injected)(struct kvm_vcpu *vcpu);
void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
struct kvm_run *run);
int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
int (*nmi_allowed)(struct kvm_vcpu *vcpu);
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
int (*get_mt_mask_shift)(void);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
};
extern struct kvm_x86_ops *kvm_x86_ops;
......@@ -538,7 +539,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu);
void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
void kvm_mmu_set_base_ptes(u64 base_pte);
void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask);
u64 dirty_mask, u64 nx_mask, u64 x_mask);
int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
......@@ -552,6 +553,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
const void *val, int bytes);
int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
gpa_t addr, unsigned long *ret);
u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
extern bool tdp_enabled;
......@@ -563,6 +565,7 @@ enum emulation_result {
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
unsigned long cr2, u16 error_code, int emulation_type);
void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
......@@ -638,7 +641,6 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_global(struct kvm_vcpu *vcpu);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
......@@ -769,6 +771,8 @@ enum {
#define HF_GIF_MASK (1 << 0)
#define HF_HIF_MASK (1 << 1)
#define HF_VINTR_MASK (1 << 2)
#define HF_NMI_MASK (1 << 3)
#define HF_IRET_MASK (1 << 4)
/*
* Hardware virtualization extension instructions may fault if a
......@@ -791,5 +795,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_age_hva(struct kvm *kvm, unsigned long hva);
int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
#endif /* _ASM_X86_KVM_HOST_H */
......@@ -143,6 +143,9 @@ struct decode_cache {
struct fetch_cache fetch;
};
#define X86_SHADOW_INT_MOV_SS 1
#define X86_SHADOW_INT_STI 2
struct x86_emulate_ctxt {
/* Register state before/after emulation. */
struct kvm_vcpu *vcpu;
......@@ -152,6 +155,9 @@ struct x86_emulate_ctxt {
int mode;
u32 cs_base;
/* interruptibility state, as a result of execution of STI or MOV SS */
int interruptibility;
/* decode cache */
struct decode_cache decode;
};
......
......@@ -225,6 +225,7 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_EVTINJ_VALID_ERR (1 << 11)
#define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK
#define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK
#define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR
#define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI
......
......@@ -247,6 +247,7 @@ enum vmcs_field {
#define EXIT_REASON_MSR_READ 31
#define EXIT_REASON_MSR_WRITE 32
#define EXIT_REASON_MWAIT_INSTRUCTION 36
#define EXIT_REASON_MCE_DURING_VMENTRY 41
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44
#define EXIT_REASON_EPT_VIOLATION 48
......
......@@ -420,6 +420,7 @@ void do_machine_check(struct pt_regs * regs, long error_code)
out2:
atomic_dec(&mce_entry);
}
EXPORT_SYMBOL_GPL(do_machine_check);
#ifdef CONFIG_X86_MCE_INTEL
/***
......
......@@ -27,6 +27,7 @@
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/hardirq.h>
#include <asm/timer.h>
#define MMU_QUEUE_SIZE 1024
......@@ -230,6 +231,9 @@ static void paravirt_ops_setup(void)
pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
}
#ifdef CONFIG_X86_IO_APIC
no_timer_check = 1;
#endif
}
void __init kvm_guest_init(void)
......
......@@ -172,6 +172,9 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
{
ack_APIC_irq();
inc_irq_stat(irq_resched_count);
/*
* KVM uses this interrupt to force a cpu out of guest mode
*/
}
void smp_call_function_interrupt(struct pt_regs *regs)
......
......@@ -50,6 +50,9 @@ config KVM_INTEL
Provides support for KVM on Intel processors equipped with the VT
extensions.
To compile this as a module, choose M here: the module
will be called kvm-intel.
config KVM_AMD
tristate "KVM for AMD processors support"
depends on KVM
......@@ -57,6 +60,9 @@ config KVM_AMD
Provides support for KVM on AMD processors equipped with the AMD-V
(SVM) extensions.
To compile this as a module, choose M here: the module
will be called kvm-amd.
config KVM_TRACE
bool "KVM trace support"
depends on KVM && SYSFS
......
......@@ -14,7 +14,7 @@ endif
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
i8254.o
i8254.o timer.o
obj-$(CONFIG_KVM) += kvm.o
kvm-intel-objs = vmx.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
......
......@@ -98,6 +98,37 @@ static int pit_get_gate(struct kvm *kvm, int channel)
return kvm->arch.vpit->pit_state.channels[channel].gate;
}
static s64 __kpit_elapsed(struct kvm *kvm)
{
s64 elapsed;
ktime_t remaining;
struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
/*
* The Counter does not stop when it reaches zero. In
* Modes 0, 1, 4, and 5 the Counter ``wraps around'' to
* the highest count, either FFFF hex for binary counting
* or 9999 for BCD counting, and continues counting.
* Modes 2 and 3 are periodic; the Counter reloads
* itself with the initial count and continues counting
* from there.
*/
remaining = hrtimer_expires_remaining(&ps->pit_timer.timer);
elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
elapsed = mod_64(elapsed, ps->pit_timer.period);
return elapsed;
}
static s64 kpit_elapsed(struct kvm *kvm, struct kvm_kpit_channel_state *c,
int channel)
{
if (channel == 0)
return __kpit_elapsed(kvm);
return ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
}
static int pit_get_count(struct kvm *kvm, int channel)
{
struct kvm_kpit_channel_state *c =
......@@ -107,7 +138,7 @@ static int pit_get_count(struct kvm *kvm, int channel)
WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
t = kpit_elapsed(kvm, c, channel);
d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
switch (c->mode) {
......@@ -137,7 +168,7 @@ static int pit_get_out(struct kvm *kvm, int channel)
WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock));
t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time));
t = kpit_elapsed(kvm, c, channel);
d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC);
switch (c->mode) {
......@@ -193,28 +224,6 @@ static void pit_latch_status(struct kvm *kvm, int channel)
}
}
static int __pit_timer_fn(struct kvm_kpit_state *ps)
{
struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0];
struct kvm_kpit_timer *pt = &ps->pit_timer;
if (!atomic_inc_and_test(&pt->pending))
set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
if (!pt->reinject)
atomic_set(&pt->pending, 1);
if (vcpu0 && waitqueue_active(&vcpu0->wq))
wake_up_interruptible(&vcpu0->wq);
hrtimer_add_expires_ns(&pt->timer, pt->period);
pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
if (pt->period)
ps->channels[0].count_load_time = ktime_get();
return (pt->period == 0 ? 0 : 1);
}
int pit_has_pending_timer(struct kvm_vcpu *vcpu)
{
struct kvm_pit *pit = vcpu->kvm->arch.vpit;
......@@ -235,21 +244,6 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
spin_unlock(&ps->inject_lock);
}
static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
{
struct kvm_kpit_state *ps;
int restart_timer = 0;
ps = container_of(data, struct kvm_kpit_state, pit_timer.timer);
restart_timer = __pit_timer_fn(ps);
if (restart_timer)
return HRTIMER_RESTART;
else
return HRTIMER_NORESTART;
}
void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
{
struct kvm_pit *pit = vcpu->kvm->arch.vpit;
......@@ -263,15 +257,26 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
}
static void destroy_pit_timer(struct kvm_kpit_timer *pt)
static void destroy_pit_timer(struct kvm_timer *pt)
{
pr_debug("pit: execute del timer!\n");
hrtimer_cancel(&pt->timer);
}
static bool kpit_is_periodic(struct kvm_timer *ktimer)
{
struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state,
pit_timer);
return ps->is_periodic;
}
static struct kvm_timer_ops kpit_ops = {
.is_periodic = kpit_is_periodic,
};
static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
{
struct kvm_kpit_timer *pt = &ps->pit_timer;
struct kvm_timer *pt = &ps->pit_timer;
s64 interval;
interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ);
......@@ -280,8 +285,14 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period)
/* TODO The new value only affected after the retriggered */
hrtimer_cancel(&pt->timer);
pt->period = (is_period == 0) ? 0 : interval;
pt->timer.function = pit_timer_fn;
pt->period = interval;
ps->is_periodic = is_period;
pt->timer.function = kvm_timer_fn;
pt->t_ops = &kpit_ops;
pt->kvm = ps->pit->kvm;
pt->vcpu_id = 0;
atomic_set(&pt->pending, 0);
ps->irq_ack = 1;
......@@ -298,23 +309,23 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
pr_debug("pit: load_count val is %d, channel is %d\n", val, channel);
/*
* Though spec said the state of 8254 is undefined after power-up,
* seems some tricky OS like Windows XP depends on IRQ0 interrupt
* when booting up.
* So here setting initialize rate for it, and not a specific number
* The largest possible initial count is 0; this is equivalent
* to 216 for binary counting and 104 for BCD counting.
*/
if (val == 0)
val = 0x10000;
ps->channels[channel].count_load_time = ktime_get();
ps->channels[channel].count = val;
if (channel != 0)
if (channel != 0) {
ps->channels[channel].count_load_time = ktime_get();
return;
}
/* Two types of timer
* mode 1 is one shot, mode 2 is period, otherwise del timer */
switch (ps->channels[0].mode) {
case 0:
case 1:
/* FIXME: enhance mode 4 precision */
case 4:
......
......@@ -3,15 +3,6 @@
#include "iodev.h"
struct kvm_kpit_timer {
struct hrtimer timer;
int irq;
s64 period; /* unit: ns */
s64 scheduled;
atomic_t pending;
bool reinject;
};
struct kvm_kpit_channel_state {
u32 count; /* can be 65536 */
u16 latched_count;
......@@ -30,7 +21,8 @@ struct kvm_kpit_channel_state {
struct kvm_kpit_state {
struct kvm_kpit_channel_state channels[3];
struct kvm_kpit_timer pit_timer;
struct kvm_timer pit_timer;
bool is_periodic;
u32 speaker_data_on;
struct mutex lock;
struct kvm_pit *pit;
......
......@@ -24,6 +24,7 @@
#include "irq.h"
#include "i8254.h"
#include "x86.h"
/*
* check if there are pending timer events
......@@ -48,6 +49,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{
struct kvm_pic *s;
if (!irqchip_in_kernel(v->kvm))
return v->arch.interrupt.pending;
if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */
if (kvm_apic_accept_pic_intr(v)) {
s = pic_irqchip(v->kvm); /* PIC */
......@@ -67,6 +71,9 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
struct kvm_pic *s;
int vector;
if (!irqchip_in_kernel(v->kvm))
return v->arch.interrupt.nr;
vector = kvm_get_apic_interrupt(v); /* APIC */
if (vector == -1) {
if (kvm_apic_accept_pic_intr(v)) {
......
struct kvm_timer {
struct hrtimer timer;
s64 period; /* unit: ns */
atomic_t pending; /* accumulated triggered timers */
bool reinject;
struct kvm_timer_ops *t_ops;
struct kvm *kvm;
int vcpu_id;
};
struct kvm_timer_ops {
bool (*is_periodic)(struct kvm_timer *);
};
enum hrtimer_restart kvm_timer_fn(struct hrtimer *data);
This diff is collapsed.
......@@ -2,18 +2,15 @@
#define __KVM_X86_LAPIC_H
#include "iodev.h"
#include "kvm_timer.h"
#include <linux/kvm_host.h>
struct kvm_lapic {
unsigned long base_address;
struct kvm_io_device dev;
struct {
atomic_t pending;
s64 period; /* unit: ns */
u32 divide_count;
struct hrtimer dev;
} timer;
struct kvm_timer lapic_timer;
u32 divide_count;
struct kvm_vcpu *vcpu;
struct page *regs_page;
void *regs;
......@@ -34,12 +31,13 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig);
int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
bool kvm_apic_present(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
......
This diff is collapsed.
......@@ -75,4 +75,9 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
return vcpu->arch.cr0 & X86_CR0_PG;
}
static inline int is_present_pte(unsigned long pte)
{
return pte & PT_PRESENT_MASK;
}
#endif
......@@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
gfn_t table_gfn;
unsigned index, pt_access, pte_access;
gpa_t pte_gpa;
int rsvd_fault = 0;
pgprintk("%s: addr %lx\n", __func__, addr);
walk:
......@@ -157,6 +158,10 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
if (!is_present_pte(pte))
goto not_present;
rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level);
if (rsvd_fault)
goto access_error;
if (write_fault && !is_writeble_pte(pte))
if (user_fault || is_write_protection(vcpu))
goto access_error;
......@@ -209,7 +214,6 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
if (ret)
goto walk;
pte |= PT_DIRTY_MASK;
kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0);
walker->ptes[walker->level - 1] = pte;
}
......@@ -233,6 +237,8 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
walker->error_code |= PFERR_USER_MASK;
if (fetch_fault)
walker->error_code |= PFERR_FETCH_MASK;
if (rsvd_fault)
walker->error_code |= PFERR_RSVD_MASK;
return 0;
}
......@@ -262,8 +268,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
kvm_get_pfn(pfn);
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
gpte & PT_DIRTY_MASK, NULL, largepage,
gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte),
pfn, true);
gpte_to_gfn(gpte), pfn, true);
}
/*
......@@ -297,7 +302,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
user_fault, write_fault,
gw->ptes[gw->level-1] & PT_DIRTY_MASK,
ptwrite, largepage,
gw->ptes[gw->level-1] & PT_GLOBAL_MASK,
gw->gfn, pfn, false);
break;
}
......@@ -380,7 +384,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
return r;
/*
* Look up the shadow pte for the faulting address.
* Look up the guest pte for the faulting address.
*/
r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault,
fetch_fault);
......@@ -586,7 +590,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
nr_present++;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn,
is_dirty_pte(gpte), 0, gfn,
spte_to_pfn(sp->spt[i]), true, false);
}
......
This diff is collapsed.
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/hrtimer.h>
#include <asm/atomic.h>
#include "kvm_timer.h"
static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer)
{
int restart_timer = 0;
wait_queue_head_t *q = &vcpu->wq;
/* FIXME: this code should not know anything about vcpus */
if (!atomic_inc_and_test(&ktimer->pending))
set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
if (!ktimer->reinject)
atomic_set(&ktimer->pending, 1);
if (waitqueue_active(q))
wake_up_interruptible(q);
if (ktimer->t_ops->is_periodic(ktimer)) {
hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
restart_timer = 1;
}
return restart_timer;
}
enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
{
int restart_timer;
struct kvm_vcpu *vcpu;
struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
vcpu = ktimer->kvm->vcpus[ktimer->vcpu_id];
if (!vcpu)
return HRTIMER_NORESTART;
restart_timer = __kvm_timer_fn(vcpu, ktimer);
if (restart_timer)
return HRTIMER_RESTART;
else
return HRTIMER_NORESTART;
}
This diff is collapsed.
This diff is collapsed.
......@@ -8,9 +8,11 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
vcpu->arch.exception.pending = false;
}
static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector)
static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector,
bool soft)
{
vcpu->arch.interrupt.pending = true;
vcpu->arch.interrupt.soft = soft;
vcpu->arch.interrupt.nr = vector;
}
......@@ -19,4 +21,14 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu)
vcpu->arch.interrupt.pending = false;
}
static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu)
{
return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending ||
vcpu->arch.nmi_injected;
}
static inline bool kvm_exception_is_soft(unsigned int nr)
{
return (nr == BP_VECTOR) || (nr == OF_VECTOR);
}
#endif
This diff is collapsed.
......@@ -119,7 +119,7 @@ struct kvm_run {
__u32 error_code;
} ex;
/* KVM_EXIT_IO */
struct kvm_io {
struct {
#define KVM_EXIT_IO_IN 0
#define KVM_EXIT_IO_OUT 1
__u8 direction;
......@@ -224,10 +224,10 @@ struct kvm_interrupt {
/* for KVM_GET_DIRTY_LOG */
struct kvm_dirty_log {
__u32 slot;
__u32 padding;
__u32 padding1;
union {
void __user *dirty_bitmap; /* one bit per page */
__u64 padding;
__u64 padding2;
};
};
......@@ -409,6 +409,10 @@ struct kvm_trace_rec {
#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
#define KVM_CAP_DEVICE_DEASSIGNMENT 27
#endif
#ifdef __KVM_HAVE_MSIX
#define KVM_CAP_DEVICE_MSIX 28
#endif
#define KVM_CAP_ASSIGN_DEV_IRQ 29
/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
......@@ -482,11 +486,18 @@ struct kvm_irq_routing {
#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
struct kvm_assigned_pci_dev)
#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
struct kvm_assigned_irq)
#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
struct kvm_assigned_pci_dev)
#define KVM_ASSIGN_SET_MSIX_NR \
_IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr)
#define KVM_ASSIGN_SET_MSIX_ENTRY \
_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
/*
* ioctls for vcpu fds
......@@ -577,6 +588,8 @@ struct kvm_debug_guest {
#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18)
#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19)
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
struct kvm_assigned_pci_dev {
__u32 assigned_dev_id;
__u32 busnr;
......@@ -587,6 +600,17 @@ struct kvm_assigned_pci_dev {
};
};
#define KVM_DEV_IRQ_HOST_INTX (1 << 0)
#define KVM_DEV_IRQ_HOST_MSI (1 << 1)
#define KVM_DEV_IRQ_HOST_MSIX (1 << 2)
#define KVM_DEV_IRQ_GUEST_INTX (1 << 8)
#define KVM_DEV_IRQ_GUEST_MSI (1 << 9)
#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10)
#define KVM_DEV_IRQ_HOST_MASK 0x00ff
#define KVM_DEV_IRQ_GUEST_MASK 0xff00
struct kvm_assigned_irq {
__u32 assigned_dev_id;
__u32 host_irq;
......@@ -602,9 +626,19 @@ struct kvm_assigned_irq {
};
};
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
#define KVM_DEV_IRQ_ASSIGN_MSI_ACTION KVM_DEV_IRQ_ASSIGN_ENABLE_MSI
#define KVM_DEV_IRQ_ASSIGN_ENABLE_MSI (1 << 0)
struct kvm_assigned_msix_nr {
__u32 assigned_dev_id;
__u16 entry_nr;
__u16 padding;
};
#define KVM_MAX_MSIX_PER_DEV 512
struct kvm_assigned_msix_entry {
__u32 assigned_dev_id;
__u32 gsi;
__u16 entry; /* The index of entry in the MSI-X table */
__u16 padding[3];
};
#endif
......@@ -38,6 +38,7 @@
#define KVM_REQ_UNHALT 6
#define KVM_REQ_MMU_SYNC 7
#define KVM_REQ_KVMCLOCK_UPDATE 8
#define KVM_REQ_KICK 9
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
......@@ -72,7 +73,6 @@ struct kvm_vcpu {
struct mutex mutex;
int cpu;
struct kvm_run *run;
int guest_mode;
unsigned long requests;
unsigned long guest_debug;
int fpu_active;
......@@ -298,6 +298,7 @@ int kvm_arch_hardware_setup(void);
void kvm_arch_hardware_unsetup(void);
void kvm_arch_check_processor_compat(void *rtn);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
void kvm_free_physmem(struct kvm *kvm);
......@@ -319,6 +320,13 @@ struct kvm_irq_ack_notifier {
void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
};
#define KVM_ASSIGNED_MSIX_PENDING 0x1
struct kvm_guest_msix_entry {
u32 vector;
u16 entry;
u16 flags;
};
struct kvm_assigned_dev_kernel {
struct kvm_irq_ack_notifier ack_notifier;
struct work_struct interrupt_work;
......@@ -326,18 +334,18 @@ struct kvm_assigned_dev_kernel {
int assigned_dev_id;
int host_busnr;
int host_devfn;
unsigned int entries_nr;
int host_irq;
bool host_irq_disabled;
struct msix_entry *host_msix_entries;
int guest_irq;
#define KVM_ASSIGNED_DEV_GUEST_INTX (1 << 0)
#define KVM_ASSIGNED_DEV_GUEST_MSI (1 << 1)
#define KVM_ASSIGNED_DEV_HOST_INTX (1 << 8)
#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9)
struct kvm_guest_msix_entry *guest_msix_entries;
unsigned long irq_requested_type;
int irq_source_id;
int flags;
struct pci_dev *dev;
struct kvm *kvm;
spinlock_t assigned_dev_lock;
};
struct kvm_irq_mask_notifier {
......@@ -360,6 +368,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm_irq_ack_notifier *kian);
int kvm_request_irq_source_id(struct kvm *kvm);
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
/* For vcpu->arch.iommu_flags */
#define KVM_IOMMU_CACHE_COHERENCY 0x1
#ifdef CONFIG_IOMMU_API
int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
unsigned long npages);
......
......@@ -40,4 +40,31 @@ typedef unsigned long hfn_t;
typedef hfn_t pfn_t;
union kvm_ioapic_redirect_entry {
u64 bits;
struct {
u8 vector;
u8 delivery_mode:3;
u8 dest_mode:1;
u8 delivery_status:1;
u8 polarity:1;
u8 remote_irr:1;
u8 trig_mode:1;
u8 mask:1;
u8 reserve:7;
u8 reserved[4];
u8 dest_id;
} fields;
};
struct kvm_lapic_irq {
u32 vector;
u32 delivery_mode;
u32 dest_mode;
u32 level;
u32 trig_mode;
u32 shorthand;
u32 dest_id;
};
#endif /* __KVM_TYPES_H__ */
This diff is collapsed.
......@@ -40,22 +40,7 @@ struct kvm_ioapic {
u32 id;
u32 irr;
u32 pad;
union ioapic_redir_entry {
u64 bits;
struct {
u8 vector;
u8 delivery_mode:3;
u8 dest_mode:1;
u8 delivery_status:1;
u8 polarity:1;
u8 remote_irr:1;
u8 trig_mode:1;
u8 mask:1;
u8 reserve:7;
u8 reserved[4];
u8 dest_id;
} fields;
} redirtbl[IOAPIC_NUM_PINS];
union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
struct kvm_io_device dev;
struct kvm *kvm;
void (*ack_notifier)(void *opaque, int irq);
......@@ -79,13 +64,13 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
return kvm->arch.vioapic;
}
struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
unsigned long bitmap);
int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, int dest, int dest_mode);
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
int kvm_ioapic_init(struct kvm *kvm);
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
u32 kvm_ioapic_get_delivery_bitmask(struct kvm_ioapic *ioapic, u8 dest,
u8 dest_mode);
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq);
#endif
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment