Commit 39a65762 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'kvm-updates/2.6.29' of git://git.kernel.org/pub/scm/virt/kvm/kvm

* 'kvm-updates/2.6.29' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: VMX: Flush volatile msrs before emulating rdmsr
  KVM: Fix assigned devices circular locking dependency
  KVM: x86: fix LAPIC pending count calculation
  KVM: Fix INTx for device assignment
  KVM: MMU: Map device MMIO as UC in EPT
  KVM: x86: disable kvmclock on non constant TSC hosts
  KVM: PIT: fix i8254 pending count read
  KVM: Fix racy in kvm_free_assigned_irq
  KVM: Add kvm_arch_sync_events to sync with asynchronize events
  KVM: mmu_notifiers release method
  KVM: Avoid using CONFIG_ in userspace visible headers
  KVM: ia64: fix fp fault/trap handler
parents 643aac1e 516a1a7e
......@@ -25,6 +25,10 @@
#include <linux/ioctl.h>
/* Select x86 specific features in <linux/kvm.h> */
#define __KVM_HAVE_IOAPIC
#define __KVM_HAVE_DEVICE_ASSIGNMENT
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
......
......@@ -1337,6 +1337,10 @@ static void kvm_release_vm_pages(struct kvm *kvm)
}
}
void kvm_arch_sync_events(struct kvm *kvm)
{
}
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_iommu_unmap_guest(kvm);
......
......@@ -455,13 +455,18 @@ fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
if (!vmm_fpswa_interface)
return (fpswa_ret_t) {-1, 0, 0, 0};
/*
* Just let fpswa driver to use hardware fp registers.
* No fp register is valid in memory.
*/
memset(&fp_state, 0, sizeof(fp_state_t));
/*
* compute fp_state. only FP registers f6 - f11 are used by the
* vmm, so set those bits in the mask and set the low volatile
* pointer to point to these registers.
*/
fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */
fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
/*
* unsigned long (*EFI_FPSWA) (
* unsigned long trap_type,
* void *Bundle,
......@@ -545,10 +550,6 @@ void reflect_interruption(u64 ifa, u64 isr, u64 iim,
status = vmm_handle_fpu_swa(0, regs, isr);
if (!status)
return ;
else if (-EAGAIN == status) {
vcpu_decrement_iip(vcpu);
return ;
}
break;
}
......
......@@ -125,6 +125,10 @@ static void kvmppc_free_vcpus(struct kvm *kvm)
}
}
void kvm_arch_sync_events(struct kvm *kvm)
{
}
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvmppc_free_vcpus(kvm);
......
......@@ -212,6 +212,10 @@ static void kvm_free_vcpus(struct kvm *kvm)
}
}
void kvm_arch_sync_events(struct kvm *kvm)
{
}
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_free_vcpus(kvm);
......
......@@ -9,6 +9,13 @@
#include <linux/types.h>
#include <linux/ioctl.h>
/* Select x86 specific features in <linux/kvm.h> */
#define __KVM_HAVE_PIT
#define __KVM_HAVE_IOAPIC
#define __KVM_HAVE_DEVICE_ASSIGNMENT
#define __KVM_HAVE_MSI
#define __KVM_HAVE_USER_NMI
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
......
......@@ -207,7 +207,7 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
hrtimer_add_expires_ns(&pt->timer, pt->period);
pt->scheduled = hrtimer_get_expires_ns(&pt->timer);
if (pt->period)
ps->channels[0].count_load_time = hrtimer_get_expires(&pt->timer);
ps->channels[0].count_load_time = ktime_get();
return (pt->period == 0 ? 0 : 1);
}
......
......@@ -87,13 +87,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
{
kvm_apic_timer_intr_post(vcpu, vec);
/* TODO: PIT, RTC etc. */
}
EXPORT_SYMBOL_GPL(kvm_timer_intr_post);
void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
{
__kvm_migrate_apic_timer(vcpu);
......
......@@ -89,7 +89,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
void kvm_pic_reset(struct kvm_kpic_state *s);
void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
......
......@@ -35,6 +35,12 @@
#include "kvm_cache_regs.h"
#include "irq.h"
#ifndef CONFIG_X86_64
#define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
#else
#define mod_64(x, y) ((x) % (y))
#endif
#define PRId64 "d"
#define PRIx64 "llx"
#define PRIu64 "u"
......@@ -511,52 +517,22 @@ static void apic_send_ipi(struct kvm_lapic *apic)
static u32 apic_get_tmcct(struct kvm_lapic *apic)
{
u64 counter_passed;
ktime_t passed, now;
ktime_t remaining;
s64 ns;
u32 tmcct;
ASSERT(apic != NULL);
now = apic->timer.dev.base->get_time();
tmcct = apic_get_reg(apic, APIC_TMICT);
/* if initial count is 0, current count should also be 0 */
if (tmcct == 0)
if (apic_get_reg(apic, APIC_TMICT) == 0)
return 0;
if (unlikely(ktime_to_ns(now) <=
ktime_to_ns(apic->timer.last_update))) {
/* Wrap around */
passed = ktime_add(( {
(ktime_t) {
.tv64 = KTIME_MAX -
(apic->timer.last_update).tv64}; }
), now);
apic_debug("time elapsed\n");
} else
passed = ktime_sub(now, apic->timer.last_update);
counter_passed = div64_u64(ktime_to_ns(passed),
(APIC_BUS_CYCLE_NS * apic->timer.divide_count));
if (counter_passed > tmcct) {
if (unlikely(!apic_lvtt_period(apic))) {
/* one-shot timers stick at 0 until reset */
tmcct = 0;
} else {
/*
* periodic timers reset to APIC_TMICT when they
* hit 0. The while loop simulates this happening N
* times. (counter_passed %= tmcct) would also work,
* but might be slower or not work on 32-bit??
*/
while (counter_passed > tmcct)
counter_passed -= tmcct;
tmcct -= counter_passed;
}
} else {
tmcct -= counter_passed;
}
remaining = hrtimer_expires_remaining(&apic->timer.dev);
if (ktime_to_ns(remaining) < 0)
remaining = ktime_set(0, 0);
ns = mod_64(ktime_to_ns(remaining), apic->timer.period);
tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count));
return tmcct;
}
......@@ -653,8 +629,6 @@ static void start_apic_timer(struct kvm_lapic *apic)
{
ktime_t now = apic->timer.dev.base->get_time();
apic->timer.last_update = now;
apic->timer.period = apic_get_reg(apic, APIC_TMICT) *
APIC_BUS_CYCLE_NS * apic->timer.divide_count;
atomic_set(&apic->timer.pending, 0);
......@@ -1110,16 +1084,6 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
}
}
void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec)
{
struct kvm_lapic *apic = vcpu->arch.apic;
if (apic && apic_lvt_vector(apic, APIC_LVTT) == vec)
apic->timer.last_update = ktime_add_ns(
apic->timer.last_update,
apic->timer.period);
}
int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
{
int vector = kvm_apic_has_interrupt(vcpu);
......
......@@ -12,7 +12,6 @@ struct kvm_lapic {
atomic_t pending;
s64 period; /* unit: ns */
u32 divide_count;
ktime_t last_update;
struct hrtimer dev;
} timer;
struct kvm_vcpu *vcpu;
......@@ -42,7 +41,6 @@ void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr);
void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu);
......
......@@ -1698,8 +1698,13 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
if (largepage)
spte |= PT_PAGE_SIZE_MASK;
if (mt_mask) {
mt_mask = get_memory_type(vcpu, gfn) <<
kvm_x86_ops->get_mt_mask_shift();
if (!kvm_is_mmio_pfn(pfn)) {
mt_mask = get_memory_type(vcpu, gfn) <<
kvm_x86_ops->get_mt_mask_shift();
mt_mask |= VMX_EPT_IGMT_BIT;
} else
mt_mask = MTRR_TYPE_UNCACHABLE <<
kvm_x86_ops->get_mt_mask_shift();
spte |= mt_mask;
}
......
......@@ -1600,7 +1600,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
/* Okay, we can deliver the interrupt: grab it and update PIC state. */
intr_vector = kvm_cpu_get_interrupt(vcpu);
svm_inject_irq(svm, intr_vector);
kvm_timer_intr_post(vcpu, intr_vector);
out:
update_cr8_intercept(vcpu);
}
......
......@@ -903,6 +903,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
default:
vmx_load_host_state(to_vmx(vcpu));
msr = find_msr_entry(to_vmx(vcpu), msr_index);
if (msr) {
data = msr->data;
......@@ -3285,7 +3286,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
}
if (vcpu->arch.interrupt.pending) {
vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr);
kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr);
if (kvm_cpu_has_interrupt(vcpu))
enable_irq_window(vcpu);
}
......@@ -3687,8 +3687,7 @@ static int __init vmx_init(void)
if (vm_need_ept()) {
bypass_guest_pf = 0;
kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
VMX_EPT_WRITABLE_MASK |
VMX_EPT_IGMT_BIT);
VMX_EPT_WRITABLE_MASK);
kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
VMX_EPT_EXECUTABLE_MASK,
VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
......
......@@ -967,7 +967,6 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
case KVM_CAP_SET_TSS_ADDR:
case KVM_CAP_EXT_CPUID:
case KVM_CAP_CLOCKSOURCE:
case KVM_CAP_PIT:
case KVM_CAP_NOP_IO_DELAY:
case KVM_CAP_MP_STATE:
......@@ -992,6 +991,9 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_IOMMU:
r = iommu_found();
break;
case KVM_CAP_CLOCKSOURCE:
r = boot_cpu_has(X86_FEATURE_CONSTANT_TSC);
break;
default:
r = 0;
break;
......@@ -4127,9 +4129,13 @@ static void kvm_free_vcpus(struct kvm *kvm)
}
void kvm_arch_destroy_vm(struct kvm *kvm)
void kvm_arch_sync_events(struct kvm *kvm)
{
kvm_free_all_assigned_devices(kvm);
}
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_iommu_unmap_guest(kvm);
kvm_free_pit(kvm);
kfree(kvm->arch.vpic);
......
......@@ -58,10 +58,10 @@ struct kvm_irqchip {
__u32 pad;
union {
char dummy[512]; /* reserving space */
#ifdef CONFIG_X86
#ifdef __KVM_HAVE_PIT
struct kvm_pic_state pic;
#endif
#if defined(CONFIG_X86) || defined(CONFIG_IA64)
#ifdef __KVM_HAVE_IOAPIC
struct kvm_ioapic_state ioapic;
#endif
} chip;
......@@ -384,16 +384,16 @@ struct kvm_trace_rec {
#define KVM_CAP_MP_STATE 14
#define KVM_CAP_COALESCED_MMIO 15
#define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */
#if defined(CONFIG_X86)||defined(CONFIG_IA64)
#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
#define KVM_CAP_DEVICE_ASSIGNMENT 17
#endif
#define KVM_CAP_IOMMU 18
#if defined(CONFIG_X86)
#ifdef __KVM_HAVE_MSI
#define KVM_CAP_DEVICE_MSI 20
#endif
/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
#if defined(CONFIG_X86)
#ifdef __KVM_HAVE_USER_NMI
#define KVM_CAP_USER_NMI 22
#endif
......
......@@ -285,6 +285,7 @@ void kvm_free_physmem(struct kvm *kvm);
struct kvm *kvm_arch_create_vm(void);
void kvm_arch_destroy_vm(struct kvm *kvm);
void kvm_free_all_assigned_devices(struct kvm *kvm);
void kvm_arch_sync_events(struct kvm *kvm);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
......
......@@ -73,14 +73,13 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
{
int i, r = 0;
down_read(&kvm->slots_lock);
for (i = 0; i < kvm->nmemslots; i++) {
r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
kvm->memslots[i].npages);
if (r)
break;
}
up_read(&kvm->slots_lock);
return r;
}
......@@ -190,12 +189,11 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
static int kvm_iommu_unmap_memslots(struct kvm *kvm)
{
int i;
down_read(&kvm->slots_lock);
for (i = 0; i < kvm->nmemslots; i++) {
kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
kvm->memslots[i].npages);
}
up_read(&kvm->slots_lock);
return 0;
}
......
......@@ -173,7 +173,6 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
assigned_dev->host_irq_disabled = false;
}
mutex_unlock(&assigned_dev->kvm->lock);
kvm_put_kvm(assigned_dev->kvm);
}
static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
......@@ -181,8 +180,6 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
struct kvm_assigned_dev_kernel *assigned_dev =
(struct kvm_assigned_dev_kernel *) dev_id;
kvm_get_kvm(assigned_dev->kvm);
schedule_work(&assigned_dev->interrupt_work);
disable_irq_nosync(irq);
......@@ -213,6 +210,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
}
}
/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
static void kvm_free_assigned_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
......@@ -228,11 +226,24 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
if (!assigned_dev->irq_requested_type)
return;
if (cancel_work_sync(&assigned_dev->interrupt_work))
/* We had pending work. That means we will have to take
* care of kvm_put_kvm.
*/
kvm_put_kvm(kvm);
/*
* In kvm_free_device_irq, cancel_work_sync return true if:
* 1. work is scheduled, and then cancelled.
* 2. work callback is executed.
*
* The first one ensured that the irq is disabled and no more events
* would happen. But for the second one, the irq may be enabled (e.g.
* for MSI). So we disable irq here to prevent further events.
*
* Notice this maybe result in nested disable if the interrupt type is
* INTx, but it's OK for we are going to free it.
*
* If this function is a part of VM destroy, please ensure that till
* now, the kvm state is still legal for probably we also have to wait
* interrupt_work done.
*/
disable_irq_nosync(assigned_dev->host_irq);
cancel_work_sync(&assigned_dev->interrupt_work);
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
......@@ -285,8 +296,8 @@ static int assigned_device_update_intx(struct kvm *kvm,
if (irqchip_in_kernel(kvm)) {
if (!msi2intx &&
adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI) {
free_irq(adev->host_irq, (void *)kvm);
(adev->irq_requested_type & KVM_ASSIGNED_DEV_HOST_MSI)) {
free_irq(adev->host_irq, (void *)adev);
pci_disable_msi(adev->dev);
}
......@@ -455,6 +466,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel *match;
struct pci_dev *dev;
down_read(&kvm->slots_lock);
mutex_lock(&kvm->lock);
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
......@@ -516,6 +528,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
out:
mutex_unlock(&kvm->lock);
up_read(&kvm->slots_lock);
return r;
out_list_del:
list_del(&match->list);
......@@ -527,6 +540,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
out_free:
kfree(match);
mutex_unlock(&kvm->lock);
up_read(&kvm->slots_lock);
return r;
}
#endif
......@@ -789,11 +803,19 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
return young;
}
static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
struct mm_struct *mm)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
kvm_arch_flush_shadow(kvm);
}
static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
.invalidate_page = kvm_mmu_notifier_invalidate_page,
.invalidate_range_start = kvm_mmu_notifier_invalidate_range_start,
.invalidate_range_end = kvm_mmu_notifier_invalidate_range_end,
.clear_flush_young = kvm_mmu_notifier_clear_flush_young,
.release = kvm_mmu_notifier_release,
};
#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
......@@ -883,6 +905,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
{
struct mm_struct *mm = kvm->mm;
kvm_arch_sync_events(kvm);
spin_lock(&kvm_lock);
list_del(&kvm->vm_list);
spin_unlock(&kvm_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment