Commit dce9ce36 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Radim Krčmář:
 "ARM:
   - Fix handling of the 32bit cycle counter
   - Fix cycle counter filtering

  x86:
   - Fix a race leading to double unregistering of user notifiers
   - Amend oversight in kvm_arch_set_irq that turned Hyper-V code dead
   - Use SRCU around kvm_lapic_set_vapic_addr
   - Avoid recursive flushing of asynchronous page faults
   - Do not rely on deferred update in KVM_GET_CLOCK, which fixes #GP
   - Let userspace know that KVM_GET_CLOCK is useful with master clock;
     4.9 changed the return value to better match the guest clock, but
     didn't provide means to let guests take advantage of it"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  kvm: x86: merge kvm_arch_set_irq and kvm_arch_set_irq_inatomic
  KVM: x86: fix missed SRCU usage in kvm_lapic_set_vapic_addr
  KVM: async_pf: avoid recursive flushing of work items
  kvm: kvmclock: let KVM_GET_CLOCK return whether the master clock is in use
  KVM: Disable irq while unregistering user notifier
  KVM: x86: do not go through vcpu in __get_kvmclock_ns
  KVM: arm64: Fix the issues when guest PMCCFILTR is configured
  arm64: KVM: pmu: Fix AArch32 cycle counter access
parents f6918382 a2b07739
......@@ -777,6 +777,17 @@ Gets the current timestamp of kvmclock as seen by the current guest. In
conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios
such as migration.
When KVM_CAP_ADJUST_CLOCK is passed to KVM_CHECK_EXTENSION, it returns the
set of bits that KVM can return in struct kvm_clock_data's flag member.
The only flag defined now is KVM_CLOCK_TSC_STABLE. If set, the returned
value is the exact kvmclock value seen by all VCPUs at the instant
when KVM_GET_CLOCK was called. If clear, the returned value is simply
CLOCK_MONOTONIC plus a constant offset; the offset can be modified
with KVM_SET_CLOCK. KVM will try to make all VCPUs follow this clock,
but the exact value read by each VCPU could differ, because the host
TSC is not stable.
struct kvm_clock_data {
__u64 clock; /* kvmclock current value */
__u32 flags;
......
......@@ -46,7 +46,15 @@
#define ARMV8_PMU_EVTYPE_MASK 0xc800ffff /* Mask for writable bits */
#define ARMV8_PMU_EVTYPE_EVENT 0xffff /* Mask for EVENT bits */
#define ARMV8_PMU_EVTYPE_EVENT_SW_INCR 0 /* Software increment event */
/*
* PMUv3 event types: required events
*/
#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12
/*
* Event filters for PMUv3
......
......@@ -31,17 +31,9 @@
/*
* ARMv8 PMUv3 Performance Events handling code.
* Common event types.
* Common event types (some are defined in asm/perf_event.h).
*/
/* Required events. */
#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03
#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04
#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10
#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11
#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12
/* At least one of the following is required. */
#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08
#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B
......
......@@ -597,8 +597,14 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
idx = ARMV8_PMU_CYCLE_IDX;
} else {
BUG();
return false;
}
} else if (r->CRn == 0 && r->CRm == 9) {
/* PMCCNTR */
if (pmu_access_event_counter_el0_disabled(vcpu))
return false;
idx = ARMV8_PMU_CYCLE_IDX;
} else if (r->CRn == 14 && (r->CRm & 12) == 8) {
/* PMEVCNTRn_EL0 */
if (pmu_access_event_counter_el0_disabled(vcpu))
......@@ -606,7 +612,7 @@ static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
} else {
BUG();
return false;
}
if (!pmu_counter_idx_valid(vcpu, idx))
......
......@@ -156,6 +156,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
}
static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
{
if (!level)
return -1;
return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
}
int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
......@@ -163,18 +173,26 @@ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
struct kvm_lapic_irq irq;
int r;
if (unlikely(e->type != KVM_IRQ_ROUTING_MSI))
return -EWOULDBLOCK;
switch (e->type) {
case KVM_IRQ_ROUTING_HV_SINT:
return kvm_hv_set_sint(e, kvm, irq_source_id, level,
line_status);
if (kvm_msi_route_invalid(kvm, e))
return -EINVAL;
case KVM_IRQ_ROUTING_MSI:
if (kvm_msi_route_invalid(kvm, e))
return -EINVAL;
kvm_set_msi_irq(kvm, e, &irq);
kvm_set_msi_irq(kvm, e, &irq);
if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
return r;
else
return -EWOULDBLOCK;
if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
return r;
break;
default:
break;
}
return -EWOULDBLOCK;
}
int kvm_request_irq_source_id(struct kvm *kvm)
......@@ -254,16 +272,6 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
srcu_read_unlock(&kvm->irq_srcu, idx);
}
static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
{
if (!level)
return -1;
return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
}
int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
......@@ -423,18 +431,6 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
srcu_read_unlock(&kvm->irq_srcu, idx);
}
int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm,
int irq_source_id, int level, bool line_status)
{
switch (irq->type) {
case KVM_IRQ_ROUTING_HV_SINT:
return kvm_hv_set_sint(irq, kvm, irq_source_id, level,
line_status);
default:
return -EWOULDBLOCK;
}
}
void kvm_arch_irq_routing_update(struct kvm *kvm)
{
kvm_hv_irq_routing_update(kvm);
......
......@@ -210,7 +210,18 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
struct kvm_shared_msrs *locals
= container_of(urn, struct kvm_shared_msrs, urn);
struct kvm_shared_msr_values *values;
unsigned long flags;
/*
* Disabling irqs at this point since the following code could be
* interrupted and executed through kvm_arch_hardware_disable()
*/
local_irq_save(flags);
if (locals->registered) {
locals->registered = false;
user_return_notifier_unregister(urn);
}
local_irq_restore(flags);
for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
values = &locals->values[slot];
if (values->host != values->curr) {
......@@ -218,8 +229,6 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
values->curr = values->host;
}
}
locals->registered = false;
user_return_notifier_unregister(urn);
}
static void shared_msr_update(unsigned slot, u32 msr)
......@@ -1724,18 +1733,23 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
static u64 __get_kvmclock_ns(struct kvm *kvm)
{
struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, 0);
struct kvm_arch *ka = &kvm->arch;
s64 ns;
struct pvclock_vcpu_time_info hv_clock;
if (vcpu->arch.hv_clock.flags & PVCLOCK_TSC_STABLE_BIT) {
u64 tsc = kvm_read_l1_tsc(vcpu, rdtsc());
ns = __pvclock_read_cycles(&vcpu->arch.hv_clock, tsc);
} else {
ns = ktime_get_boot_ns() + ka->kvmclock_offset;
spin_lock(&ka->pvclock_gtod_sync_lock);
if (!ka->use_master_clock) {
spin_unlock(&ka->pvclock_gtod_sync_lock);
return ktime_get_boot_ns() + ka->kvmclock_offset;
}
return ns;
hv_clock.tsc_timestamp = ka->master_cycle_now;
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
spin_unlock(&ka->pvclock_gtod_sync_lock);
kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
&hv_clock.tsc_shift,
&hv_clock.tsc_to_system_mul);
return __pvclock_read_cycles(&hv_clock, rdtsc());
}
u64 get_kvmclock_ns(struct kvm *kvm)
......@@ -2596,7 +2610,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PIT_STATE2:
case KVM_CAP_SET_IDENTITY_MAP_ADDR:
case KVM_CAP_XEN_HVM:
case KVM_CAP_ADJUST_CLOCK:
case KVM_CAP_VCPU_EVENTS:
case KVM_CAP_HYPERV:
case KVM_CAP_HYPERV_VAPIC:
......@@ -2623,6 +2636,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#endif
r = 1;
break;
case KVM_CAP_ADJUST_CLOCK:
r = KVM_CLOCK_TSC_STABLE;
break;
case KVM_CAP_X86_SMM:
/* SMBASE is usually relocated above 1M on modern chipsets,
* and SMM handlers might indeed rely on 4G segment limits,
......@@ -3415,6 +3431,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
};
case KVM_SET_VAPIC_ADDR: {
struct kvm_vapic_addr va;
int idx;
r = -EINVAL;
if (!lapic_in_kernel(vcpu))
......@@ -3422,7 +3439,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = -EFAULT;
if (copy_from_user(&va, argp, sizeof va))
goto out;
idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvm_lapic_set_vapic_addr(vcpu, va.vapic_addr);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
break;
}
case KVM_X86_SETUP_MCE: {
......@@ -4103,9 +4122,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
struct kvm_clock_data user_ns;
u64 now_ns;
now_ns = get_kvmclock_ns(kvm);
local_irq_disable();
now_ns = __get_kvmclock_ns(kvm);
user_ns.clock = now_ns;
user_ns.flags = 0;
user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
local_irq_enable();
memset(&user_ns.pad, 0, sizeof(user_ns.pad));
r = -EFAULT;
......
......@@ -972,12 +972,19 @@ struct kvm_irqfd {
__u8 pad[16];
};
/* For KVM_CAP_ADJUST_CLOCK */
/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */
#define KVM_CLOCK_TSC_STABLE 2
struct kvm_clock_data {
__u64 clock;
__u32 flags;
__u32 pad[9];
};
/* For KVM_CAP_SW_TLB */
#define KVM_MMU_FSL_BOOKE_NOHV 0
#define KVM_MMU_FSL_BOOKE_HV 1
......
......@@ -305,7 +305,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
continue;
type = vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i)
& ARMV8_PMU_EVTYPE_EVENT;
if ((type == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
if ((type == ARMV8_PMUV3_PERFCTR_SW_INCR)
&& (enable & BIT(i))) {
reg = vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
reg = lower_32_bits(reg);
......@@ -379,7 +379,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
/* Software increment event does't need to be backed by a perf event */
if (eventsel == ARMV8_PMU_EVTYPE_EVENT_SW_INCR)
if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
select_idx != ARMV8_PMU_CYCLE_IDX)
return;
memset(&attr, 0, sizeof(struct perf_event_attr));
......@@ -391,7 +392,8 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
attr.exclude_hv = 1; /* Don't count EL2 events */
attr.exclude_host = 1; /* Don't count host events */
attr.config = eventsel;
attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ?
ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
counter = kvm_pmu_get_counter_value(vcpu, select_idx);
/* The initial sample period (overflow count) of an event. */
......
......@@ -91,6 +91,7 @@ static void async_pf_execute(struct work_struct *work)
spin_lock(&vcpu->async_pf.lock);
list_add_tail(&apf->link, &vcpu->async_pf.done);
apf->vcpu = NULL;
spin_unlock(&vcpu->async_pf.lock);
/*
......@@ -113,6 +114,8 @@ static void async_pf_execute(struct work_struct *work)
void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
{
spin_lock(&vcpu->async_pf.lock);
/* cancel outstanding work queue item */
while (!list_empty(&vcpu->async_pf.queue)) {
struct kvm_async_pf *work =
......@@ -120,6 +123,14 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
typeof(*work), queue);
list_del(&work->queue);
/*
* We know it's present in vcpu->async_pf.done, do
* nothing here.
*/
if (!work->vcpu)
continue;
spin_unlock(&vcpu->async_pf.lock);
#ifdef CONFIG_KVM_ASYNC_PF_SYNC
flush_work(&work->work);
#else
......@@ -129,9 +140,9 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
kmem_cache_free(async_pf_cache, work);
}
#endif
spin_lock(&vcpu->async_pf.lock);
}
spin_lock(&vcpu->async_pf.lock);
while (!list_empty(&vcpu->async_pf.done)) {
struct kvm_async_pf *work =
list_first_entry(&vcpu->async_pf.done,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment