Commit ecd8ee7f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "Fixes for kvm on x86:

   - new selftests

   - fixes for migration with HyperV re-enlightenment enabled

   - fix RCU/SRCU usage

   - fixes for local_irq_restore misuse false positive"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  documentation/kvm: additional explanations on KVM_SET_BOOT_CPU_ID
  x86/kvm: Fix broken irq restoration in kvm_wait
  KVM: X86: Fix missing local pCPU when executing wbinvd on all dirty pCPUs
  KVM: x86: Protect userspace MSR filter with SRCU, and set atomically-ish
  selftests: kvm: add set_boot_cpu_id test
  selftests: kvm: add _vm_ioctl
  selftests: kvm: add get_msr_index_features
  selftests: kvm: Add basic Hyper-V clocksources tests
  KVM: x86: hyper-v: Don't touch TSC page values when guest opted for re-enlightenment
  KVM: x86: hyper-v: Track Hyper-V TSC page status
  KVM: x86: hyper-v: Prevent using not-yet-updated TSC page by secondary CPUs
  KVM: x86: hyper-v: Limit guest to writing zero to HV_X64_MSR_TSC_EMULATION_STATUS
  KVM: x86/mmu: Store the address space ID in the TDP iterator
  KVM: x86/mmu: Factor out tdp_iter_return_to_root
  KVM: x86/mmu: Fix RCU usage when atomically zapping SPTEs
  KVM: x86/mmu: Fix RCU usage in handle_removed_tdp_mmu_page
parents 3149860d 9ce3746d
...@@ -1495,7 +1495,8 @@ Fails if any VCPU has already been created. ...@@ -1495,7 +1495,8 @@ Fails if any VCPU has already been created.
Define which vcpu is the Bootstrap Processor (BSP). Values are the same Define which vcpu is the Bootstrap Processor (BSP). Values are the same
as the vcpu id in KVM_CREATE_VCPU. If this ioctl is not called, the default as the vcpu id in KVM_CREATE_VCPU. If this ioctl is not called, the default
is vcpu 0. is vcpu 0. This ioctl has to be called before vcpu creation,
otherwise it will return EBUSY error.
4.42 KVM_GET_XSAVE 4.42 KVM_GET_XSAVE
...@@ -4806,8 +4807,10 @@ If an MSR access is not permitted through the filtering, it generates a ...@@ -4806,8 +4807,10 @@ If an MSR access is not permitted through the filtering, it generates a
allows user space to deflect and potentially handle various MSR accesses allows user space to deflect and potentially handle various MSR accesses
into user space. into user space.
If a vCPU is in running state while this ioctl is invoked, the vCPU may Note, invoking this ioctl with a vCPU is running is inherently racy. However,
experience inconsistent filtering behavior on MSR accesses. KVM does guarantee that vCPUs will see either the previous filter or the new
filter, e.g. MSRs with identical settings in both the old and new filter will
have deterministic behavior.
4.127 KVM_XEN_HVM_SET_ATTR 4.127 KVM_XEN_HVM_SET_ATTR
-------------------------- --------------------------
......
...@@ -884,12 +884,29 @@ struct kvm_hv_syndbg { ...@@ -884,12 +884,29 @@ struct kvm_hv_syndbg {
u64 options; u64 options;
}; };
/* Current state of Hyper-V TSC page clocksource */
enum hv_tsc_page_status {
/* TSC page was not set up or disabled */
HV_TSC_PAGE_UNSET = 0,
/* TSC page MSR was written by the guest, update pending */
HV_TSC_PAGE_GUEST_CHANGED,
/* TSC page MSR was written by KVM userspace, update pending */
HV_TSC_PAGE_HOST_CHANGED,
/* TSC page was properly set up and is currently active */
HV_TSC_PAGE_SET,
/* TSC page is currently being updated and therefore is inactive */
HV_TSC_PAGE_UPDATING,
/* TSC page was set up with an inaccessible GPA */
HV_TSC_PAGE_BROKEN,
};
/* Hyper-V emulation context */ /* Hyper-V emulation context */
struct kvm_hv { struct kvm_hv {
struct mutex hv_lock; struct mutex hv_lock;
u64 hv_guest_os_id; u64 hv_guest_os_id;
u64 hv_hypercall; u64 hv_hypercall;
u64 hv_tsc_page; u64 hv_tsc_page;
enum hv_tsc_page_status hv_tsc_page_status;
/* Hyper-v based guest crash (NT kernel bugcheck) parameters */ /* Hyper-v based guest crash (NT kernel bugcheck) parameters */
u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
...@@ -931,6 +948,12 @@ enum kvm_irqchip_mode { ...@@ -931,6 +948,12 @@ enum kvm_irqchip_mode {
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
}; };
struct kvm_x86_msr_filter {
u8 count;
bool default_allow:1;
struct msr_bitmap_range ranges[16];
};
#define APICV_INHIBIT_REASON_DISABLE 0 #define APICV_INHIBIT_REASON_DISABLE 0
#define APICV_INHIBIT_REASON_HYPERV 1 #define APICV_INHIBIT_REASON_HYPERV 1
#define APICV_INHIBIT_REASON_NESTED 2 #define APICV_INHIBIT_REASON_NESTED 2
...@@ -1025,16 +1048,11 @@ struct kvm_arch { ...@@ -1025,16 +1048,11 @@ struct kvm_arch {
bool guest_can_read_msr_platform_info; bool guest_can_read_msr_platform_info;
bool exception_payload_enabled; bool exception_payload_enabled;
bool bus_lock_detection_enabled;
/* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
u32 user_space_msr_mask; u32 user_space_msr_mask;
struct kvm_x86_msr_filter __rcu *msr_filter;
struct {
u8 count;
bool default_allow:1;
struct msr_bitmap_range ranges[16];
} msr_filter;
bool bus_lock_detection_enabled;
struct kvm_pmu_event_filter __rcu *pmu_event_filter; struct kvm_pmu_event_filter __rcu *pmu_event_filter;
struct task_struct *nx_lpage_recovery_thread; struct task_struct *nx_lpage_recovery_thread;
......
...@@ -836,28 +836,25 @@ static void kvm_kick_cpu(int cpu) ...@@ -836,28 +836,25 @@ static void kvm_kick_cpu(int cpu)
static void kvm_wait(u8 *ptr, u8 val) static void kvm_wait(u8 *ptr, u8 val)
{ {
unsigned long flags;
if (in_nmi()) if (in_nmi())
return; return;
local_irq_save(flags);
if (READ_ONCE(*ptr) != val)
goto out;
/* /*
* halt until it's our turn and kicked. Note that we do safe halt * halt until it's our turn and kicked. Note that we do safe halt
* for irq enabled case to avoid hang when lock info is overwritten * for irq enabled case to avoid hang when lock info is overwritten
* in irq spinlock slowpath and no spurious interrupt occur to save us. * in irq spinlock slowpath and no spurious interrupt occur to save us.
*/ */
if (arch_irqs_disabled_flags(flags)) if (irqs_disabled()) {
if (READ_ONCE(*ptr) == val)
halt(); halt();
else } else {
local_irq_disable();
if (READ_ONCE(*ptr) == val)
safe_halt(); safe_halt();
out: local_irq_enable();
local_irq_restore(flags); }
} }
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
......
...@@ -520,10 +520,10 @@ static u64 get_time_ref_counter(struct kvm *kvm) ...@@ -520,10 +520,10 @@ static u64 get_time_ref_counter(struct kvm *kvm)
u64 tsc; u64 tsc;
/* /*
* The guest has not set up the TSC page or the clock isn't * Fall back to get_kvmclock_ns() when TSC page hasn't been set up,
* stable, fall back to get_kvmclock_ns. * is broken, disabled or being updated.
*/ */
if (!hv->tsc_ref.tsc_sequence) if (hv->hv_tsc_page_status != HV_TSC_PAGE_SET)
return div_u64(get_kvmclock_ns(kvm), 100); return div_u64(get_kvmclock_ns(kvm), 100);
vcpu = kvm_get_vcpu(kvm, 0); vcpu = kvm_get_vcpu(kvm, 0);
...@@ -1077,6 +1077,21 @@ static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, ...@@ -1077,6 +1077,21 @@ static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
return true; return true;
} }
/*
* Don't touch TSC page values if the guest has opted for TSC emulation after
* migration. KVM doesn't fully support reenlightenment notifications and TSC
* access emulation and Hyper-V is known to expect the values in TSC page to
* stay constant before TSC access emulation is disabled from guest side
* (HV_X64_MSR_TSC_EMULATION_STATUS). KVM userspace is expected to preserve TSC
* frequency and guest visible TSC value across migration (and prevent it when
* TSC scaling is unsupported).
*/
static inline bool tsc_page_update_unsafe(struct kvm_hv *hv)
{
return (hv->hv_tsc_page_status != HV_TSC_PAGE_GUEST_CHANGED) &&
hv->hv_tsc_emulation_control;
}
void kvm_hv_setup_tsc_page(struct kvm *kvm, void kvm_hv_setup_tsc_page(struct kvm *kvm,
struct pvclock_vcpu_time_info *hv_clock) struct pvclock_vcpu_time_info *hv_clock)
{ {
...@@ -1087,7 +1102,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, ...@@ -1087,7 +1102,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0); BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0);
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET)
return; return;
mutex_lock(&hv->hv_lock); mutex_lock(&hv->hv_lock);
...@@ -1101,7 +1117,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, ...@@ -1101,7 +1117,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
*/ */
if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
&tsc_seq, sizeof(tsc_seq)))) &tsc_seq, sizeof(tsc_seq))))
goto out_err;
if (tsc_seq && tsc_page_update_unsafe(hv)) {
if (kvm_read_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
goto out_err;
hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
goto out_unlock; goto out_unlock;
}
/* /*
* While we're computing and writing the parameters, force the * While we're computing and writing the parameters, force the
...@@ -1110,15 +1134,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, ...@@ -1110,15 +1134,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
hv->tsc_ref.tsc_sequence = 0; hv->tsc_ref.tsc_sequence = 0;
if (kvm_write_guest(kvm, gfn_to_gpa(gfn), if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
goto out_unlock; goto out_err;
if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref)) if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
goto out_unlock; goto out_err;
/* Ensure sequence is zero before writing the rest of the struct. */ /* Ensure sequence is zero before writing the rest of the struct. */
smp_wmb(); smp_wmb();
if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
goto out_unlock; goto out_err;
/* /*
* Now switch to the TSC page mechanism by writing the sequence. * Now switch to the TSC page mechanism by writing the sequence.
...@@ -1131,8 +1155,45 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, ...@@ -1131,8 +1155,45 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
smp_wmb(); smp_wmb();
hv->tsc_ref.tsc_sequence = tsc_seq; hv->tsc_ref.tsc_sequence = tsc_seq;
kvm_write_guest(kvm, gfn_to_gpa(gfn), if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)); &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
goto out_err;
hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
goto out_unlock;
out_err:
hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
out_unlock:
mutex_unlock(&hv->hv_lock);
}
void kvm_hv_invalidate_tsc_page(struct kvm *kvm)
{
struct kvm_hv *hv = to_kvm_hv(kvm);
u64 gfn;
if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET ||
tsc_page_update_unsafe(hv))
return;
mutex_lock(&hv->hv_lock);
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
goto out_unlock;
/* Preserve HV_TSC_PAGE_GUEST_CHANGED/HV_TSC_PAGE_HOST_CHANGED states */
if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET)
hv->hv_tsc_page_status = HV_TSC_PAGE_UPDATING;
gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
hv->tsc_ref.tsc_sequence = 0;
if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
out_unlock: out_unlock:
mutex_unlock(&hv->hv_lock); mutex_unlock(&hv->hv_lock);
} }
...@@ -1193,8 +1254,15 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, ...@@ -1193,8 +1254,15 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
} }
case HV_X64_MSR_REFERENCE_TSC: case HV_X64_MSR_REFERENCE_TSC:
hv->hv_tsc_page = data; hv->hv_tsc_page = data;
if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
if (!host)
hv->hv_tsc_page_status = HV_TSC_PAGE_GUEST_CHANGED;
else
hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED;
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
} else {
hv->hv_tsc_page_status = HV_TSC_PAGE_UNSET;
}
break; break;
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
return kvm_hv_msr_set_crash_data(kvm, return kvm_hv_msr_set_crash_data(kvm,
...@@ -1229,6 +1297,9 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, ...@@ -1229,6 +1297,9 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
hv->hv_tsc_emulation_control = data; hv->hv_tsc_emulation_control = data;
break; break;
case HV_X64_MSR_TSC_EMULATION_STATUS: case HV_X64_MSR_TSC_EMULATION_STATUS:
if (data && !host)
return 1;
hv->hv_tsc_emulation_status = data; hv->hv_tsc_emulation_status = data;
break; break;
case HV_X64_MSR_TIME_REF_COUNT: case HV_X64_MSR_TIME_REF_COUNT:
......
...@@ -133,6 +133,7 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu); ...@@ -133,6 +133,7 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
void kvm_hv_setup_tsc_page(struct kvm *kvm, void kvm_hv_setup_tsc_page(struct kvm *kvm,
struct pvclock_vcpu_time_info *hv_clock); struct pvclock_vcpu_time_info *hv_clock);
void kvm_hv_invalidate_tsc_page(struct kvm *kvm);
void kvm_hv_init_vm(struct kvm *kvm); void kvm_hv_init_vm(struct kvm *kvm);
void kvm_hv_destroy_vm(struct kvm *kvm); void kvm_hv_destroy_vm(struct kvm *kvm);
......
...@@ -78,6 +78,11 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep) ...@@ -78,6 +78,11 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
return to_shadow_page(__pa(sptep)); return to_shadow_page(__pa(sptep));
} }
static inline int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
{
return sp->role.smm ? 1 : 0;
}
static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
{ {
/* /*
......
...@@ -20,6 +20,21 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level) ...@@ -20,6 +20,21 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level)
return gfn & -KVM_PAGES_PER_HPAGE(level); return gfn & -KVM_PAGES_PER_HPAGE(level);
} }
/*
* Return the TDP iterator to the root PT and allow it to continue its
* traversal over the paging structure from there.
*/
void tdp_iter_restart(struct tdp_iter *iter)
{
iter->yielded_gfn = iter->next_last_level_gfn;
iter->level = iter->root_level;
iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
tdp_iter_refresh_sptep(iter);
iter->valid = true;
}
/* /*
* Sets a TDP iterator to walk a pre-order traversal of the paging structure * Sets a TDP iterator to walk a pre-order traversal of the paging structure
* rooted at root_pt, starting with the walk to translate next_last_level_gfn. * rooted at root_pt, starting with the walk to translate next_last_level_gfn.
...@@ -31,16 +46,12 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level, ...@@ -31,16 +46,12 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
WARN_ON(root_level > PT64_ROOT_MAX_LEVEL); WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
iter->next_last_level_gfn = next_last_level_gfn; iter->next_last_level_gfn = next_last_level_gfn;
iter->yielded_gfn = iter->next_last_level_gfn;
iter->root_level = root_level; iter->root_level = root_level;
iter->min_level = min_level; iter->min_level = min_level;
iter->level = root_level; iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root_pt;
iter->pt_path[iter->level - 1] = (tdp_ptep_t)root_pt; iter->as_id = kvm_mmu_page_as_id(sptep_to_sp(root_pt));
iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
tdp_iter_refresh_sptep(iter);
iter->valid = true; tdp_iter_restart(iter);
} }
/* /*
...@@ -159,8 +170,3 @@ void tdp_iter_next(struct tdp_iter *iter) ...@@ -159,8 +170,3 @@ void tdp_iter_next(struct tdp_iter *iter)
iter->valid = false; iter->valid = false;
} }
tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter)
{
return iter->pt_path[iter->root_level - 1];
}
...@@ -36,6 +36,8 @@ struct tdp_iter { ...@@ -36,6 +36,8 @@ struct tdp_iter {
int min_level; int min_level;
/* The iterator's current level within the paging structure */ /* The iterator's current level within the paging structure */
int level; int level;
/* The address space ID, i.e. SMM vs. regular. */
int as_id;
/* A snapshot of the value at sptep */ /* A snapshot of the value at sptep */
u64 old_spte; u64 old_spte;
/* /*
...@@ -62,6 +64,6 @@ tdp_ptep_t spte_to_child_pt(u64 pte, int level); ...@@ -62,6 +64,6 @@ tdp_ptep_t spte_to_child_pt(u64 pte, int level);
void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level, void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
int min_level, gfn_t next_last_level_gfn); int min_level, gfn_t next_last_level_gfn);
void tdp_iter_next(struct tdp_iter *iter); void tdp_iter_next(struct tdp_iter *iter);
tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter); void tdp_iter_restart(struct tdp_iter *iter);
#endif /* __KVM_X86_MMU_TDP_ITER_H */ #endif /* __KVM_X86_MMU_TDP_ITER_H */
...@@ -203,11 +203,6 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, ...@@ -203,11 +203,6 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
u64 old_spte, u64 new_spte, int level, u64 old_spte, u64 new_spte, int level,
bool shared); bool shared);
static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
{
return sp->role.smm ? 1 : 0;
}
static void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level) static void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level)
{ {
bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
...@@ -301,11 +296,16 @@ static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp, ...@@ -301,11 +296,16 @@ static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp,
* *
* Given a page table that has been removed from the TDP paging structure, * Given a page table that has been removed from the TDP paging structure,
* iterates through the page table to clear SPTEs and free child page tables. * iterates through the page table to clear SPTEs and free child page tables.
*
* Note that pt is passed in as a tdp_ptep_t, but it does not need RCU
* protection. Since this thread removed it from the paging structure,
* this thread will be responsible for ensuring the page is freed. Hence the
* early rcu_dereferences in the function.
*/ */
static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt, static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
bool shared) bool shared)
{ {
struct kvm_mmu_page *sp = sptep_to_sp(pt); struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt));
int level = sp->role.level; int level = sp->role.level;
gfn_t base_gfn = sp->gfn; gfn_t base_gfn = sp->gfn;
u64 old_child_spte; u64 old_child_spte;
...@@ -318,7 +318,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt, ...@@ -318,7 +318,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt,
tdp_mmu_unlink_page(kvm, sp, shared); tdp_mmu_unlink_page(kvm, sp, shared);
for (i = 0; i < PT64_ENT_PER_PAGE; i++) { for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
sptep = pt + i; sptep = rcu_dereference(pt) + i;
gfn = base_gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)); gfn = base_gfn + (i * KVM_PAGES_PER_HPAGE(level - 1));
if (shared) { if (shared) {
...@@ -492,10 +492,6 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm, ...@@ -492,10 +492,6 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
struct tdp_iter *iter, struct tdp_iter *iter,
u64 new_spte) u64 new_spte)
{ {
u64 *root_pt = tdp_iter_root_pt(iter);
struct kvm_mmu_page *root = sptep_to_sp(root_pt);
int as_id = kvm_mmu_page_as_id(root);
lockdep_assert_held_read(&kvm->mmu_lock); lockdep_assert_held_read(&kvm->mmu_lock);
/* /*
...@@ -509,8 +505,8 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm, ...@@ -509,8 +505,8 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
new_spte) != iter->old_spte) new_spte) != iter->old_spte)
return false; return false;
handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
iter->level, true); new_spte, iter->level, true);
return true; return true;
} }
...@@ -538,7 +534,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm, ...@@ -538,7 +534,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
* here since the SPTE is going from non-present * here since the SPTE is going from non-present
* to non-present. * to non-present.
*/ */
WRITE_ONCE(*iter->sptep, 0); WRITE_ONCE(*rcu_dereference(iter->sptep), 0);
return true; return true;
} }
...@@ -564,10 +560,6 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, ...@@ -564,10 +560,6 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
u64 new_spte, bool record_acc_track, u64 new_spte, bool record_acc_track,
bool record_dirty_log) bool record_dirty_log)
{ {
tdp_ptep_t root_pt = tdp_iter_root_pt(iter);
struct kvm_mmu_page *root = sptep_to_sp(root_pt);
int as_id = kvm_mmu_page_as_id(root);
lockdep_assert_held_write(&kvm->mmu_lock); lockdep_assert_held_write(&kvm->mmu_lock);
/* /*
...@@ -581,13 +573,13 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, ...@@ -581,13 +573,13 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
WRITE_ONCE(*rcu_dereference(iter->sptep), new_spte); WRITE_ONCE(*rcu_dereference(iter->sptep), new_spte);
__handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, __handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
iter->level, false); new_spte, iter->level, false);
if (record_acc_track) if (record_acc_track)
handle_changed_spte_acc_track(iter->old_spte, new_spte, handle_changed_spte_acc_track(iter->old_spte, new_spte,
iter->level); iter->level);
if (record_dirty_log) if (record_dirty_log)
handle_changed_spte_dirty_log(kvm, as_id, iter->gfn, handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn,
iter->old_spte, new_spte, iter->old_spte, new_spte,
iter->level); iter->level);
} }
...@@ -659,9 +651,7 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm, ...@@ -659,9 +651,7 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
WARN_ON(iter->gfn > iter->next_last_level_gfn); WARN_ON(iter->gfn > iter->next_last_level_gfn);
tdp_iter_start(iter, iter->pt_path[iter->root_level - 1], tdp_iter_restart(iter);
iter->root_level, iter->min_level,
iter->next_last_level_gfn);
return true; return true;
} }
......
...@@ -1526,35 +1526,44 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); ...@@ -1526,35 +1526,44 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type) bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
{ {
struct kvm_x86_msr_filter *msr_filter;
struct msr_bitmap_range *ranges;
struct kvm *kvm = vcpu->kvm; struct kvm *kvm = vcpu->kvm;
struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges; bool allowed;
u32 count = kvm->arch.msr_filter.count;
u32 i;
bool r = kvm->arch.msr_filter.default_allow;
int idx; int idx;
u32 i;
/* MSR filtering not set up or x2APIC enabled, allow everything */ /* x2APIC MSRs do not support filtering. */
if (!count || (index >= 0x800 && index <= 0x8ff)) if (index >= 0x800 && index <= 0x8ff)
return true; return true;
/* Prevent collision with set_msr_filter */
idx = srcu_read_lock(&kvm->srcu); idx = srcu_read_lock(&kvm->srcu);
for (i = 0; i < count; i++) { msr_filter = srcu_dereference(kvm->arch.msr_filter, &kvm->srcu);
if (!msr_filter) {
allowed = true;
goto out;
}
allowed = msr_filter->default_allow;
ranges = msr_filter->ranges;
for (i = 0; i < msr_filter->count; i++) {
u32 start = ranges[i].base; u32 start = ranges[i].base;
u32 end = start + ranges[i].nmsrs; u32 end = start + ranges[i].nmsrs;
u32 flags = ranges[i].flags; u32 flags = ranges[i].flags;
unsigned long *bitmap = ranges[i].bitmap; unsigned long *bitmap = ranges[i].bitmap;
if ((index >= start) && (index < end) && (flags & type)) { if ((index >= start) && (index < end) && (flags & type)) {
r = !!test_bit(index - start, bitmap); allowed = !!test_bit(index - start, bitmap);
break; break;
} }
} }
out:
srcu_read_unlock(&kvm->srcu, idx); srcu_read_unlock(&kvm->srcu, idx);
return r; return allowed;
} }
EXPORT_SYMBOL_GPL(kvm_msr_allowed); EXPORT_SYMBOL_GPL(kvm_msr_allowed);
...@@ -2551,6 +2560,8 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) ...@@ -2551,6 +2560,8 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
struct kvm_arch *ka = &kvm->arch; struct kvm_arch *ka = &kvm->arch;
kvm_hv_invalidate_tsc_page(kvm);
spin_lock(&ka->pvclock_gtod_sync_lock); spin_lock(&ka->pvclock_gtod_sync_lock);
kvm_make_mclock_inprogress_request(kvm); kvm_make_mclock_inprogress_request(kvm);
/* no guest entries from this point */ /* no guest entries from this point */
...@@ -5352,25 +5363,34 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, ...@@ -5352,25 +5363,34 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
return r; return r;
} }
static void kvm_clear_msr_filter(struct kvm *kvm) static struct kvm_x86_msr_filter *kvm_alloc_msr_filter(bool default_allow)
{
struct kvm_x86_msr_filter *msr_filter;
msr_filter = kzalloc(sizeof(*msr_filter), GFP_KERNEL_ACCOUNT);
if (!msr_filter)
return NULL;
msr_filter->default_allow = default_allow;
return msr_filter;
}
static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
{ {
u32 i; u32 i;
u32 count = kvm->arch.msr_filter.count;
struct msr_bitmap_range ranges[16];
mutex_lock(&kvm->lock); if (!msr_filter)
kvm->arch.msr_filter.count = 0; return;
memcpy(ranges, kvm->arch.msr_filter.ranges, count * sizeof(ranges[0]));
mutex_unlock(&kvm->lock); for (i = 0; i < msr_filter->count; i++)
synchronize_srcu(&kvm->srcu); kfree(msr_filter->ranges[i].bitmap);
for (i = 0; i < count; i++) kfree(msr_filter);
kfree(ranges[i].bitmap);
} }
static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user_range) static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
struct kvm_msr_filter_range *user_range)
{ {
struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges;
struct msr_bitmap_range range; struct msr_bitmap_range range;
unsigned long *bitmap = NULL; unsigned long *bitmap = NULL;
size_t bitmap_size; size_t bitmap_size;
...@@ -5404,11 +5424,9 @@ static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user ...@@ -5404,11 +5424,9 @@ static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user
goto err; goto err;
} }
/* Everything ok, add this range identifier to our global pool */ /* Everything ok, add this range identifier. */
ranges[kvm->arch.msr_filter.count] = range; msr_filter->ranges[msr_filter->count] = range;
/* Make sure we filled the array before we tell anyone to walk it */ msr_filter->count++;
smp_wmb();
kvm->arch.msr_filter.count++;
return 0; return 0;
err: err:
...@@ -5419,10 +5437,11 @@ static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user ...@@ -5419,10 +5437,11 @@ static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user
static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
{ {
struct kvm_msr_filter __user *user_msr_filter = argp; struct kvm_msr_filter __user *user_msr_filter = argp;
struct kvm_x86_msr_filter *new_filter, *old_filter;
struct kvm_msr_filter filter; struct kvm_msr_filter filter;
bool default_allow; bool default_allow;
int r = 0;
bool empty = true; bool empty = true;
int r = 0;
u32 i; u32 i;
if (copy_from_user(&filter, user_msr_filter, sizeof(filter))) if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
...@@ -5435,25 +5454,32 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) ...@@ -5435,25 +5454,32 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
if (empty && !default_allow) if (empty && !default_allow)
return -EINVAL; return -EINVAL;
kvm_clear_msr_filter(kvm); new_filter = kvm_alloc_msr_filter(default_allow);
if (!new_filter)
kvm->arch.msr_filter.default_allow = default_allow; return -ENOMEM;
/*
* Protect from concurrent calls to this function that could trigger
* a TOCTOU violation on kvm->arch.msr_filter.count.
*/
mutex_lock(&kvm->lock);
for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) { for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
r = kvm_add_msr_filter(kvm, &filter.ranges[i]); r = kvm_add_msr_filter(new_filter, &filter.ranges[i]);
if (r) if (r) {
break; kvm_free_msr_filter(new_filter);
return r;
}
} }
mutex_lock(&kvm->lock);
/* The per-VM filter is protected by kvm->lock... */
old_filter = srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1);
rcu_assign_pointer(kvm->arch.msr_filter, new_filter);
synchronize_srcu(&kvm->srcu);
kvm_free_msr_filter(old_filter);
kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED); kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->lock);
return r; return 0;
} }
long kvm_arch_vm_ioctl(struct file *filp, long kvm_arch_vm_ioctl(struct file *filp,
...@@ -6603,7 +6629,7 @@ static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu) ...@@ -6603,7 +6629,7 @@ static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
int cpu = get_cpu(); int cpu = get_cpu();
cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask); cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, on_each_cpu_mask(vcpu->arch.wbinvd_dirty_mask,
wbinvd_ipi, NULL, 1); wbinvd_ipi, NULL, 1);
put_cpu(); put_cpu();
cpumask_clear(vcpu->arch.wbinvd_dirty_mask); cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
...@@ -10634,8 +10660,6 @@ void kvm_arch_pre_destroy_vm(struct kvm *kvm) ...@@ -10634,8 +10660,6 @@ void kvm_arch_pre_destroy_vm(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm)
{ {
u32 i;
if (current->mm == kvm->mm) { if (current->mm == kvm->mm) {
/* /*
* Free memory regions allocated on behalf of userspace, * Free memory regions allocated on behalf of userspace,
...@@ -10651,8 +10675,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) ...@@ -10651,8 +10675,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
mutex_unlock(&kvm->slots_lock); mutex_unlock(&kvm->slots_lock);
} }
static_call_cond(kvm_x86_vm_destroy)(kvm); static_call_cond(kvm_x86_vm_destroy)(kvm);
for (i = 0; i < kvm->arch.msr_filter.count; i++) kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
kfree(kvm->arch.msr_filter.ranges[i].bitmap);
kvm_pic_destroy(kvm); kvm_pic_destroy(kvm);
kvm_ioapic_destroy(kvm); kvm_ioapic_destroy(kvm);
kvm_free_vcpus(kvm); kvm_free_vcpus(kvm);
......
...@@ -8,10 +8,13 @@ ...@@ -8,10 +8,13 @@
/x86_64/debug_regs /x86_64/debug_regs
/x86_64/evmcs_test /x86_64/evmcs_test
/x86_64/get_cpuid_test /x86_64/get_cpuid_test
/x86_64/get_msr_index_features
/x86_64/kvm_pv_test /x86_64/kvm_pv_test
/x86_64/hyperv_clock
/x86_64/hyperv_cpuid /x86_64/hyperv_cpuid
/x86_64/mmio_warning_test /x86_64/mmio_warning_test
/x86_64/platform_info_test /x86_64/platform_info_test
/x86_64/set_boot_cpu_id
/x86_64/set_sregs_test /x86_64/set_sregs_test
/x86_64/smm_test /x86_64/smm_test
/x86_64/state_test /x86_64/state_test
......
...@@ -39,12 +39,15 @@ LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c ...@@ -39,12 +39,15 @@ LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/smm_test
TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/state_test
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "sparsebit.h" #include "sparsebit.h"
#define KVM_DEV_PATH "/dev/kvm"
#define KVM_MAX_VCPUS 512 #define KVM_MAX_VCPUS 512
/* /*
...@@ -133,6 +134,7 @@ void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, ...@@ -133,6 +134,7 @@ void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
void *arg); void *arg);
void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg);
void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
......
...@@ -1697,11 +1697,16 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) ...@@ -1697,11 +1697,16 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
{ {
int ret; int ret;
ret = ioctl(vm->fd, cmd, arg); ret = _vm_ioctl(vm, cmd, arg);
TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)", TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
cmd, ret, errno, strerror(errno)); cmd, ret, errno, strerror(errno));
} }
int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
{
return ioctl(vm->fd, cmd, arg);
}
/* /*
* KVM system ioctl * KVM system ioctl
* *
......
...@@ -10,8 +10,6 @@ ...@@ -10,8 +10,6 @@
#include "sparsebit.h" #include "sparsebit.h"
#define KVM_DEV_PATH "/dev/kvm"
struct userspace_mem_region { struct userspace_mem_region {
struct kvm_userspace_memory_region region; struct kvm_userspace_memory_region region;
struct sparsebit *unused_phy_pages; struct sparsebit *unused_phy_pages;
......
// SPDX-License-Identifier: GPL-2.0
/*
* Test that KVM_GET_MSR_INDEX_LIST and
* KVM_GET_MSR_FEATURE_INDEX_LIST work as intended
*
* Copyright (C) 2020, Red Hat, Inc.
*/
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
static int kvm_num_index_msrs(int kvm_fd, int nmsrs)
{
struct kvm_msr_list *list;
int r;
list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
list->nmsrs = nmsrs;
r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
TEST_ASSERT(r == -1 && errno == E2BIG,
"Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
r);
r = list->nmsrs;
free(list);
return r;
}
static void test_get_msr_index(void)
{
int old_res, res, kvm_fd, r;
struct kvm_msr_list *list;
kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
if (kvm_fd < 0)
exit(KSFT_SKIP);
old_res = kvm_num_index_msrs(kvm_fd, 0);
TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
if (old_res != 1) {
res = kvm_num_index_msrs(kvm_fd, 1);
TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1");
TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical");
}
list = malloc(sizeof(*list) + old_res * sizeof(list->indices[0]));
list->nmsrs = old_res;
r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
TEST_ASSERT(r == 0,
"Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i",
r);
TEST_ASSERT(list->nmsrs == old_res, "Expecting nmsrs to be identical");
free(list);
close(kvm_fd);
}
static int kvm_num_feature_msrs(int kvm_fd, int nmsrs)
{
struct kvm_msr_list *list;
int r;
list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
list->nmsrs = nmsrs;
r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
TEST_ASSERT(r == -1 && errno == E2BIG,
"Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST probe, r: %i",
r);
r = list->nmsrs;
free(list);
return r;
}
struct kvm_msr_list *kvm_get_msr_feature_list(int kvm_fd, int nmsrs)
{
struct kvm_msr_list *list;
int r;
list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
list->nmsrs = nmsrs;
r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
TEST_ASSERT(r == 0,
"Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i",
r);
return list;
}
static void test_get_msr_feature(void)
{
int res, old_res, i, kvm_fd;
struct kvm_msr_list *feature_list;
kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
if (kvm_fd < 0)
exit(KSFT_SKIP);
old_res = kvm_num_feature_msrs(kvm_fd, 0);
TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
if (old_res != 1) {
res = kvm_num_feature_msrs(kvm_fd, 1);
TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1");
TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical");
}
feature_list = kvm_get_msr_feature_list(kvm_fd, old_res);
TEST_ASSERT(old_res == feature_list->nmsrs,
"Unmatching number of msr indexes");
for (i = 0; i < feature_list->nmsrs; i++)
kvm_get_feature_msr(feature_list->indices[i]);
free(feature_list);
close(kvm_fd);
}
int main(int argc, char *argv[])
{
if (kvm_check_cap(KVM_CAP_GET_MSR_FEATURES))
test_get_msr_feature();
test_get_msr_index();
}
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (C) 2021, Red Hat, Inc.
*
* Tests for Hyper-V clocksources
*/
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
struct ms_hyperv_tsc_page {
volatile u32 tsc_sequence;
u32 reserved1;
volatile u64 tsc_scale;
volatile s64 tsc_offset;
} __packed;
#define HV_X64_MSR_GUEST_OS_ID 0x40000000
#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
#define HV_X64_MSR_REFERENCE_TSC 0x40000021
#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
/* Simplified mul_u64_u64_shr() */
static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
{
union {
u64 ll;
struct {
u32 low, high;
} l;
} rm, rn, rh, a0, b0;
u64 c;
a0.ll = a;
b0.ll = b;
rm.ll = (u64)a0.l.low * b0.l.high;
rn.ll = (u64)a0.l.high * b0.l.low;
rh.ll = (u64)a0.l.high * b0.l.high;
rh.l.low = c = rm.l.high + rn.l.high + rh.l.low;
rh.l.high = (c >> 32) + rh.l.high;
return rh.ll;
}
static inline void nop_loop(void)
{
int i;
for (i = 0; i < 1000000; i++)
asm volatile("nop");
}
static inline void check_tsc_msr_rdtsc(void)
{
u64 tsc_freq, r1, r2, t1, t2;
s64 delta_ns;
tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
GUEST_ASSERT(tsc_freq > 0);
/* First, check MSR-based clocksource */
r1 = rdtsc();
t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
nop_loop();
r2 = rdtsc();
t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
GUEST_ASSERT(r2 > r1 && t2 > t1);
/* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
if (delta_ns < 0)
delta_ns = -delta_ns;
/* 1% tolerance */
GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100);
}
static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
{
u64 r1, r2, t1, t2;
/* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */
t1 = mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
/* 10 ms tolerance */
GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000);
nop_loop();
t2 = mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
}
static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa)
{
u64 tsc_scale, tsc_offset;
/* Set Guest OS id to enable Hyper-V emulation */
GUEST_SYNC(1);
wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48);
GUEST_SYNC(2);
check_tsc_msr_rdtsc();
GUEST_SYNC(3);
/* Set up TSC page is disabled state, check that it's clean */
wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa);
GUEST_ASSERT(tsc_page->tsc_sequence == 0);
GUEST_ASSERT(tsc_page->tsc_scale == 0);
GUEST_ASSERT(tsc_page->tsc_offset == 0);
GUEST_SYNC(4);
/* Set up TSC page is enabled state */
wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1);
GUEST_ASSERT(tsc_page->tsc_sequence != 0);
GUEST_SYNC(5);
check_tsc_msr_tsc_page(tsc_page);
GUEST_SYNC(6);
tsc_offset = tsc_page->tsc_offset;
/* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */
GUEST_SYNC(7);
GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset);
nop_loop();
/*
* Enable Re-enlightenment and check that TSC page stays constant across
* KVM_SET_CLOCK.
*/
wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff);
wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1);
tsc_offset = tsc_page->tsc_offset;
tsc_scale = tsc_page->tsc_scale;
GUEST_SYNC(8);
GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset);
GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale);
GUEST_SYNC(9);
check_tsc_msr_tsc_page(tsc_page);
/*
* Disable re-enlightenment and TSC page, check that KVM doesn't update
* it anymore.
*/
wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0);
wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
wrmsr(HV_X64_MSR_REFERENCE_TSC, 0);
memset(tsc_page, 0, sizeof(*tsc_page));
GUEST_SYNC(10);
GUEST_ASSERT(tsc_page->tsc_sequence == 0);
GUEST_ASSERT(tsc_page->tsc_offset == 0);
GUEST_ASSERT(tsc_page->tsc_scale == 0);
GUEST_DONE();
}
#define VCPU_ID 0
static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm)
{
u64 tsc_freq, r1, r2, t1, t2;
s64 delta_ns;
tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY);
TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
/* First, check MSR-based clocksource */
r1 = rdtsc();
t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
nop_loop();
r2 = rdtsc();
t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
/* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
if (delta_ns < 0)
delta_ns = -delta_ns;
/* 1% tolerance */
TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100,
"Elapsed time does not match (MSR=%ld, TSC=%ld)",
(t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq);
}
int main(void)
{
struct kvm_vm *vm;
struct kvm_run *run;
struct ucall uc;
vm_vaddr_t tsc_page_gva;
int stage;
vm = vm_create_default(VCPU_ID, 0, guest_main);
run = vcpu_state(vm, VCPU_ID);
vcpu_set_hv_cpuid(vm, VCPU_ID);
tsc_page_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize());
TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
"TSC page has to be page aligned\n");
vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
host_check_tsc_msr_rdtsc(vm);
for (stage = 1;; stage++) {
_vcpu_run(vm, VCPU_ID);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Stage %d: unexpected exit reason: %u (%s),\n",
stage, run->exit_reason,
exit_reason_str(run->exit_reason));
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_ABORT:
TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
__FILE__, uc.args[1]);
/* NOT REACHED */
case UCALL_SYNC:
break;
case UCALL_DONE:
/* Keep in sync with guest_main() */
TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d\n",
stage);
goto out;
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
uc.args[1] == stage,
"Stage %d: Unexpected register values vmexit, got %lx",
stage, (ulong)uc.args[1]);
/* Reset kvmclock triggering TSC page update */
if (stage == 7 || stage == 8 || stage == 10) {
struct kvm_clock_data clock = {0};
vm_ioctl(vm, KVM_SET_CLOCK, &clock);
}
}
out:
kvm_vm_free(vm);
}
// SPDX-License-Identifier: GPL-2.0
/*
* Test that KVM_SET_BOOT_CPU_ID works as intended
*
* Copyright (C) 2020, Red Hat, Inc.
*/
#define _GNU_SOURCE /* for program_invocation_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
#define N_VCPU 2
#define VCPU_ID0 0
#define VCPU_ID1 1
static uint32_t get_bsp_flag(void)
{
return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
}
static void guest_bsp_vcpu(void *arg)
{
GUEST_SYNC(1);
GUEST_ASSERT(get_bsp_flag() != 0);
GUEST_DONE();
}
static void guest_not_bsp_vcpu(void *arg)
{
GUEST_SYNC(1);
GUEST_ASSERT(get_bsp_flag() == 0);
GUEST_DONE();
}
static void test_set_boot_busy(struct kvm_vm *vm)
{
int res;
res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID0);
TEST_ASSERT(res == -1 && errno == EBUSY,
"KVM_SET_BOOT_CPU_ID set while running vm");
}
static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
{
struct ucall uc;
int stage;
for (stage = 0; stage < 2; stage++) {
vcpu_run(vm, vcpuid);
switch (get_ucall(vm, vcpuid, &uc)) {
case UCALL_SYNC:
TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
uc.args[1] == stage + 1,
"Stage %d: Unexpected register values vmexit, got %lx",
stage + 1, (ulong)uc.args[1]);
test_set_boot_busy(vm);
break;
case UCALL_DONE:
TEST_ASSERT(stage == 1,
"Expected GUEST_DONE in stage 2, got stage %d",
stage);
break;
case UCALL_ABORT:
TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx",
(const char *)uc.args[0], __FILE__,
uc.args[1], uc.args[2], uc.args[3]);
default:
TEST_ASSERT(false, "Unexpected exit: %s",
exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
}
}
}
static struct kvm_vm *create_vm(void)
{
struct kvm_vm *vm;
uint64_t vcpu_pages = (DEFAULT_STACK_PGS) * 2;
uint64_t extra_pg_pages = vcpu_pages / PTES_PER_MIN_PAGE * N_VCPU;
uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages;
pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT, pages);
vm = vm_create(VM_MODE_DEFAULT, pages, O_RDWR);
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
vm_create_irqchip(vm);
return vm;
}
static void add_x86_vcpu(struct kvm_vm *vm, uint32_t vcpuid, bool bsp_code)
{
if (bsp_code)
vm_vcpu_add_default(vm, vcpuid, guest_bsp_vcpu);
else
vm_vcpu_add_default(vm, vcpuid, guest_not_bsp_vcpu);
vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
}
static void run_vm_bsp(uint32_t bsp_vcpu)
{
struct kvm_vm *vm;
bool is_bsp_vcpu1 = bsp_vcpu == VCPU_ID1;
vm = create_vm();
if (is_bsp_vcpu1)
vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
add_x86_vcpu(vm, VCPU_ID0, !is_bsp_vcpu1);
add_x86_vcpu(vm, VCPU_ID1, is_bsp_vcpu1);
run_vcpu(vm, VCPU_ID0);
run_vcpu(vm, VCPU_ID1);
kvm_vm_free(vm);
}
static void check_set_bsp_busy(void)
{
struct kvm_vm *vm;
int res;
vm = create_vm();
add_x86_vcpu(vm, VCPU_ID0, true);
add_x86_vcpu(vm, VCPU_ID1, false);
res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set after adding vcpu");
run_vcpu(vm, VCPU_ID0);
run_vcpu(vm, VCPU_ID1);
res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set to a terminated vcpu");
kvm_vm_free(vm);
}
int main(int argc, char *argv[])
{
if (!kvm_check_cap(KVM_CAP_SET_BOOT_CPU_ID)) {
print_skip("set_boot_cpu_id not available");
return 0;
}
run_vm_bsp(VCPU_ID0);
run_vm_bsp(VCPU_ID1);
run_vm_bsp(VCPU_ID0);
check_set_bsp_busy();
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment