Commit 8a511e7e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
"ARM:

   - Fix EL2 Stage-1 MMIO mappings where a random address was used

   - Fix SMCCC function number comparison when the SVE hint is set

  RISC-V:

   - Fix KVM_GET_REG_LIST API for ISA_EXT registers

   - Fix reading ISA_EXT register of a missing extension

   - Fix ISA_EXT register handling in get-reg-list test

   - Fix filtering of AIA registers in get-reg-list test

  x86:

   - Fixes for TSC_AUX virtualization

   - Stop zapping page tables asynchronously, since we don't zap them as
     often as before"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: SVM: Do not use user return MSR support for virtualized TSC_AUX
  KVM: SVM: Fix TSC_AUX virtualization setup
  KVM: SVM: INTERCEPT_RDTSCP is never intercepted anyway
  KVM: x86/mmu: Stop zapping invalidated TDP MMU roots asynchronously
  KVM: x86/mmu: Do not filter address spaces in for_each_tdp_mmu_root_yield_safe()
  KVM: x86/mmu: Open code leaf invalidation from mmu_notifier
  KVM: riscv: selftests: Selectively filter-out AIA registers
  KVM: riscv: selftests: Fix ISA_EXT register handling in get-reg-list
  RISC-V: KVM: Fix riscv_vcpu_get_isa_ext_single() for missing extensions
  RISC-V: KVM: Fix KVM_GET_REG_LIST API for ISA_EXT registers
  KVM: selftests: Assert that vasprintf() is successful
  KVM: arm64: nvhe: Ignore SVE hint in SMCCC function ID
  KVM: arm64: Properly return allocated EL2 VA from hyp_alloc_private_va_range()
parents 5edc6bb3 5804c19b
......@@ -118,7 +118,7 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu);
u64 __guest_enter(struct kvm_vcpu *vcpu);
bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt);
bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id);
#ifdef __KVM_NVHE_HYPERVISOR__
void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
......
......@@ -12,6 +12,6 @@
#define FFA_MAX_FUNC_NUM 0x7F
int hyp_ffa_init(void *pages);
bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt);
bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id);
#endif /* __KVM_HYP_FFA_H */
......@@ -634,9 +634,8 @@ static bool do_ffa_features(struct arm_smccc_res *res,
return true;
}
bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt)
bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
{
DECLARE_REG(u64, func_id, host_ctxt, 0);
struct arm_smccc_res res;
/*
......
......@@ -57,6 +57,7 @@ __do_hyp_init:
cmp x0, #HVC_STUB_HCALL_NR
b.lo __kvm_handle_stub_hvc
bic x0, x0, #ARM_SMCCC_CALL_HINTS
mov x3, #KVM_HOST_SMCCC_FUNC(__kvm_hyp_init)
cmp x0, x3
b.eq 1f
......
......@@ -368,6 +368,7 @@ static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
if (static_branch_unlikely(&kvm_protected_mode_initialized))
hcall_min = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize;
id &= ~ARM_SMCCC_CALL_HINTS;
id -= KVM_HOST_SMCCC_ID(0);
if (unlikely(id < hcall_min || id >= ARRAY_SIZE(host_hcall)))
......@@ -392,11 +393,14 @@ static void default_host_smc_handler(struct kvm_cpu_context *host_ctxt)
static void handle_host_smc(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(u64, func_id, host_ctxt, 0);
bool handled;
handled = kvm_host_psci_handler(host_ctxt);
func_id &= ~ARM_SMCCC_CALL_HINTS;
handled = kvm_host_psci_handler(host_ctxt, func_id);
if (!handled)
handled = kvm_host_ffa_handler(host_ctxt);
handled = kvm_host_ffa_handler(host_ctxt, func_id);
if (!handled)
default_host_smc_handler(host_ctxt);
......
......@@ -273,9 +273,8 @@ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_
}
}
bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt)
bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
{
DECLARE_REG(u64, func_id, host_ctxt, 0);
unsigned long ret;
switch (kvm_host_psci_config.version) {
......
......@@ -652,6 +652,9 @@ int hyp_alloc_private_va_range(size_t size, unsigned long *haddr)
mutex_unlock(&kvm_hyp_pgd_mutex);
if (!ret)
*haddr = base;
return ret;
}
......
......@@ -460,8 +460,11 @@ static int riscv_vcpu_get_isa_ext_single(struct kvm_vcpu *vcpu,
reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
return -ENOENT;
*reg_val = 0;
host_isa_ext = kvm_isa_ext_arr[reg_num];
if (!__riscv_isa_extension_available(NULL, host_isa_ext))
return -ENOENT;
*reg_val = 0;
if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext))
*reg_val = 1; /* Mark the given extension as available */
......@@ -842,7 +845,7 @@ static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu,
u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_ISA_EXT | i;
isa_ext = kvm_isa_ext_arr[i];
if (!__riscv_isa_extension_available(vcpu->arch.isa, isa_ext))
if (!__riscv_isa_extension_available(NULL, isa_ext))
continue;
if (uindices) {
......
......@@ -1419,7 +1419,6 @@ struct kvm_arch {
* the thread holds the MMU lock in write mode.
*/
spinlock_t tdp_mmu_pages_lock;
struct workqueue_struct *tdp_mmu_zap_wq;
#endif /* CONFIG_X86_64 */
/*
......@@ -1835,7 +1834,7 @@ void kvm_mmu_vendor_module_exit(void);
void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
int kvm_mmu_create(struct kvm_vcpu *vcpu);
int kvm_mmu_init_vm(struct kvm *kvm);
void kvm_mmu_init_vm(struct kvm *kvm);
void kvm_mmu_uninit_vm(struct kvm *kvm);
void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
......
......@@ -6167,20 +6167,15 @@ static bool kvm_has_zapped_obsolete_pages(struct kvm *kvm)
return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
}
int kvm_mmu_init_vm(struct kvm *kvm)
void kvm_mmu_init_vm(struct kvm *kvm)
{
int r;
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
if (tdp_mmu_enabled) {
r = kvm_mmu_init_tdp_mmu(kvm);
if (r < 0)
return r;
}
if (tdp_mmu_enabled)
kvm_mmu_init_tdp_mmu(kvm);
kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache;
kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO;
......@@ -6189,8 +6184,6 @@ int kvm_mmu_init_vm(struct kvm *kvm)
kvm->arch.split_desc_cache.kmem_cache = pte_list_desc_cache;
kvm->arch.split_desc_cache.gfp_zero = __GFP_ZERO;
return 0;
}
static void mmu_free_vm_memory_caches(struct kvm *kvm)
......@@ -6246,7 +6239,6 @@ static bool kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_e
void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
{
bool flush;
int i;
if (WARN_ON_ONCE(gfn_end <= gfn_start))
return;
......@@ -6257,11 +6249,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end);
if (tdp_mmu_enabled) {
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
flush = kvm_tdp_mmu_zap_leafs(kvm, i, gfn_start,
gfn_end, true, flush);
}
if (tdp_mmu_enabled)
flush = kvm_tdp_mmu_zap_leafs(kvm, gfn_start, gfn_end, flush);
if (flush)
kvm_flush_remote_tlbs_range(kvm, gfn_start, gfn_end - gfn_start);
......
......@@ -58,8 +58,13 @@ struct kvm_mmu_page {
bool tdp_mmu_page;
bool unsync;
union {
u8 mmu_valid_gen;
/* Only accessed under slots_lock. */
bool tdp_mmu_scheduled_root_to_zap;
};
/*
* The shadow page can't be replaced by an equivalent huge page
* because it is being used to map an executable page in the guest
......@@ -100,13 +105,7 @@ struct kvm_mmu_page {
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
tdp_ptep_t ptep;
};
union {
DECLARE_BITMAP(unsync_child_bitmap, 512);
struct {
struct work_struct tdp_mmu_async_work;
void *tdp_mmu_async_data;
};
};
/*
* Tracks shadow pages that, if zapped, would allow KVM to create an NX
......
......@@ -12,18 +12,10 @@
#include <trace/events/kvm.h>
/* Initializes the TDP MMU for the VM, if enabled. */
int kvm_mmu_init_tdp_mmu(struct kvm *kvm)
void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
{
struct workqueue_struct *wq;
wq = alloc_workqueue("kvm", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 0);
if (!wq)
return -ENOMEM;
INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
kvm->arch.tdp_mmu_zap_wq = wq;
return 1;
}
/* Arbitrarily returns true so that this may be used in if statements. */
......@@ -46,20 +38,15 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
* ultimately frees all roots.
*/
kvm_tdp_mmu_invalidate_all_roots(kvm);
/*
* Destroying a workqueue also first flushes the workqueue, i.e. no
* need to invoke kvm_tdp_mmu_zap_invalidated_roots().
*/
destroy_workqueue(kvm->arch.tdp_mmu_zap_wq);
kvm_tdp_mmu_zap_invalidated_roots(kvm);
WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages));
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
/*
* Ensure that all the outstanding RCU callbacks to free shadow pages
* can run before the VM is torn down. Work items on tdp_mmu_zap_wq
* can call kvm_tdp_mmu_put_root and create new callbacks.
* can run before the VM is torn down. Putting the last reference to
* zapped roots will create new callbacks.
*/
rcu_barrier();
}
......@@ -86,46 +73,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
tdp_mmu_free_sp(sp);
}
static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
bool shared);
static void tdp_mmu_zap_root_work(struct work_struct *work)
{
struct kvm_mmu_page *root = container_of(work, struct kvm_mmu_page,
tdp_mmu_async_work);
struct kvm *kvm = root->tdp_mmu_async_data;
read_lock(&kvm->mmu_lock);
/*
* A TLB flush is not necessary as KVM performs a local TLB flush when
* allocating a new root (see kvm_mmu_load()), and when migrating vCPU
* to a different pCPU. Note, the local TLB flush on reuse also
* invalidates any paging-structure-cache entries, i.e. TLB entries for
* intermediate paging structures, that may be zapped, as such entries
* are associated with the ASID on both VMX and SVM.
*/
tdp_mmu_zap_root(kvm, root, true);
/*
* Drop the refcount using kvm_tdp_mmu_put_root() to test its logic for
* avoiding an infinite loop. By design, the root is reachable while
* it's being asynchronously zapped, thus a different task can put its
* last reference, i.e. flowing through kvm_tdp_mmu_put_root() for an
* asynchronously zapped root is unavoidable.
*/
kvm_tdp_mmu_put_root(kvm, root, true);
read_unlock(&kvm->mmu_lock);
}
static void tdp_mmu_schedule_zap_root(struct kvm *kvm, struct kvm_mmu_page *root)
{
root->tdp_mmu_async_data = kvm;
INIT_WORK(&root->tdp_mmu_async_work, tdp_mmu_zap_root_work);
queue_work(kvm->arch.tdp_mmu_zap_wq, &root->tdp_mmu_async_work);
}
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
bool shared)
{
......@@ -211,8 +158,12 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
#define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \
__for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, true)
#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id) \
__for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, false, false)
#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _shared) \
for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, false); \
_root; \
_root = tdp_mmu_next_root(_kvm, _root, _shared, false)) \
if (!kvm_lockdep_assert_mmu_lock_held(_kvm, _shared)) { \
} else
/*
* Iterate over all TDP MMU roots. Requires that mmu_lock be held for write,
......@@ -292,7 +243,7 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
* by a memslot update or by the destruction of the VM. Initialize the
* refcount to two; one reference for the vCPU, and one reference for
* the TDP MMU itself, which is held until the root is invalidated and
* is ultimately put by tdp_mmu_zap_root_work().
* is ultimately put by kvm_tdp_mmu_zap_invalidated_roots().
*/
refcount_set(&root->tdp_mmu_root_count, 2);
......@@ -877,13 +828,12 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
* true if a TLB flush is needed before releasing the MMU lock, i.e. if one or
* more SPTEs were zapped since the MMU lock was last acquired.
*/
bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end,
bool can_yield, bool flush)
bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush)
{
struct kvm_mmu_page *root;
for_each_tdp_mmu_root_yield_safe(kvm, root, as_id)
flush = tdp_mmu_zap_leafs(kvm, root, start, end, can_yield, flush);
for_each_tdp_mmu_root_yield_safe(kvm, root, false)
flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush);
return flush;
}
......@@ -891,7 +841,6 @@ bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end,
void kvm_tdp_mmu_zap_all(struct kvm *kvm)
{
struct kvm_mmu_page *root;
int i;
/*
* Zap all roots, including invalid roots, as all SPTEs must be dropped
......@@ -905,10 +854,8 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
* is being destroyed or the userspace VMM has exited. In both cases,
* KVM_RUN is unreachable, i.e. no vCPUs will ever service the request.
*/
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
for_each_tdp_mmu_root_yield_safe(kvm, root, i)
for_each_tdp_mmu_root_yield_safe(kvm, root, false)
tdp_mmu_zap_root(kvm, root, false);
}
}
/*
......@@ -917,18 +864,47 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
*/
void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
{
flush_workqueue(kvm->arch.tdp_mmu_zap_wq);
struct kvm_mmu_page *root;
read_lock(&kvm->mmu_lock);
for_each_tdp_mmu_root_yield_safe(kvm, root, true) {
if (!root->tdp_mmu_scheduled_root_to_zap)
continue;
root->tdp_mmu_scheduled_root_to_zap = false;
KVM_BUG_ON(!root->role.invalid, kvm);
/*
* A TLB flush is not necessary as KVM performs a local TLB
* flush when allocating a new root (see kvm_mmu_load()), and
* when migrating a vCPU to a different pCPU. Note, the local
* TLB flush on reuse also invalidates paging-structure-cache
* entries, i.e. TLB entries for intermediate paging structures,
* that may be zapped, as such entries are associated with the
* ASID on both VMX and SVM.
*/
tdp_mmu_zap_root(kvm, root, true);
/*
* The referenced needs to be put *after* zapping the root, as
* the root must be reachable by mmu_notifiers while it's being
* zapped
*/
kvm_tdp_mmu_put_root(kvm, root, true);
}
read_unlock(&kvm->mmu_lock);
}
/*
* Mark each TDP MMU root as invalid to prevent vCPUs from reusing a root that
* is about to be zapped, e.g. in response to a memslots update. The actual
* zapping is performed asynchronously. Using a separate workqueue makes it
* easy to ensure that the destruction is performed before the "fast zap"
* completes, without keeping a separate list of invalidated roots; the list is
* effectively the list of work items in the workqueue.
* zapping is done separately so that it happens with mmu_lock with read,
* whereas invalidating roots must be done with mmu_lock held for write (unless
* the VM is being destroyed).
*
* Note, the asynchronous worker is gifted the TDP MMU's reference.
* Note, kvm_tdp_mmu_zap_invalidated_roots() is gifted the TDP MMU's reference.
* See kvm_tdp_mmu_get_vcpu_root_hpa().
*/
void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm)
......@@ -953,19 +929,20 @@ void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm)
/*
* As above, mmu_lock isn't held when destroying the VM! There can't
* be other references to @kvm, i.e. nothing else can invalidate roots
* or be consuming roots, but walking the list of roots does need to be
* guarded against roots being deleted by the asynchronous zap worker.
* or get/put references to roots.
*/
list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) {
/*
* Note, invalid roots can outlive a memslot update! Invalid
* roots must be *zapped* before the memslot update completes,
* but a different task can acquire a reference and keep the
* root alive after its been zapped.
*/
rcu_read_lock();
list_for_each_entry_rcu(root, &kvm->arch.tdp_mmu_roots, link) {
if (!root->role.invalid) {
root->tdp_mmu_scheduled_root_to_zap = true;
root->role.invalid = true;
tdp_mmu_schedule_zap_root(kvm, root);
}
}
rcu_read_unlock();
}
/*
......@@ -1146,8 +1123,13 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
bool flush)
{
return kvm_tdp_mmu_zap_leafs(kvm, range->slot->as_id, range->start,
range->end, range->may_block, flush);
struct kvm_mmu_page *root;
__for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false, false)
flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end,
range->may_block, flush);
return flush;
}
typedef bool (*tdp_handler_t)(struct kvm *kvm, struct tdp_iter *iter,
......
......@@ -7,7 +7,7 @@
#include "spte.h"
int kvm_mmu_init_tdp_mmu(struct kvm *kvm);
void kvm_mmu_init_tdp_mmu(struct kvm *kvm);
void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm);
hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
......@@ -20,8 +20,7 @@ __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root)
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
bool shared);
bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start,
gfn_t end, bool can_yield, bool flush);
bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush);
bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp);
void kvm_tdp_mmu_zap_all(struct kvm *kvm);
void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm);
......
......@@ -2962,6 +2962,32 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
count, in);
}
static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) {
bool v_tsc_aux = guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
guest_cpuid_has(vcpu, X86_FEATURE_RDPID);
set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux);
}
}
void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
struct kvm_cpuid_entry2 *best;
/* For sev guests, the memory encryption bit is not reserved in CR3. */
best = kvm_find_cpuid_entry(vcpu, 0x8000001F);
if (best)
vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f));
if (sev_es_guest(svm->vcpu.kvm))
sev_es_vcpu_after_set_cpuid(svm);
}
static void sev_es_init_vmcb(struct vcpu_svm *svm)
{
struct vmcb *vmcb = svm->vmcb01.ptr;
......@@ -3024,14 +3050,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm)
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) &&
(guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP) ||
guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDPID))) {
set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, 1, 1);
if (guest_cpuid_has(&svm->vcpu, X86_FEATURE_RDTSCP))
svm_clr_intercept(svm, INTERCEPT_RDTSCP);
}
}
void sev_init_vmcb(struct vcpu_svm *svm)
......
......@@ -683,6 +683,21 @@ static int svm_hardware_enable(void)
amd_pmu_enable_virt();
/*
* If TSC_AUX virtualization is supported, TSC_AUX becomes a swap type
* "B" field (see sev_es_prepare_switch_to_guest()) for SEV-ES guests.
* Since Linux does not change the value of TSC_AUX once set, prime the
* TSC_AUX field now to avoid a RDMSR on every vCPU run.
*/
if (boot_cpu_has(X86_FEATURE_V_TSC_AUX)) {
struct sev_es_save_area *hostsa;
u32 msr_hi;
hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);
rdmsr(MSR_TSC_AUX, hostsa->tsc_aux, msr_hi);
}
return 0;
}
......@@ -1532,7 +1547,14 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
if (tsc_scaling)
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
if (likely(tsc_aux_uret_slot >= 0))
/*
* TSC_AUX is always virtualized for SEV-ES guests when the feature is
* available. The user return MSR support is not required in this case
* because TSC_AUX is restored on #VMEXIT from the host save area
* (which has been initialized in svm_hardware_enable()).
*/
if (likely(tsc_aux_uret_slot >= 0) &&
(!boot_cpu_has(X86_FEATURE_V_TSC_AUX) || !sev_es_guest(vcpu->kvm)))
kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
svm->guest_state_loaded = true;
......@@ -3086,6 +3108,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
break;
case MSR_TSC_AUX:
/*
* TSC_AUX is always virtualized for SEV-ES guests when the
* feature is available. The user return MSR support is not
* required in this case because TSC_AUX is restored on #VMEXIT
* from the host save area (which has been initialized in
* svm_hardware_enable()).
*/
if (boot_cpu_has(X86_FEATURE_V_TSC_AUX) && sev_es_guest(vcpu->kvm))
break;
/*
* TSC_AUX is usually changed only during boot and never read
* directly. Intercept TSC_AUX instead of exposing it to the
......@@ -4284,7 +4316,6 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_cpuid_entry2 *best;
/*
* SVM doesn't provide a way to disable just XSAVES in the guest, KVM
......@@ -4328,12 +4359,8 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_FLUSH_CMD, 0,
!!guest_cpuid_has(vcpu, X86_FEATURE_FLUSH_L1D));
/* For sev guests, the memory encryption bit is not reserved in CR3. */
if (sev_guest(vcpu->kvm)) {
best = kvm_find_cpuid_entry(vcpu, 0x8000001F);
if (best)
vcpu->arch.reserved_gpa_bits &= ~(1UL << (best->ebx & 0x3f));
}
if (sev_guest(vcpu->kvm))
sev_vcpu_after_set_cpuid(svm);
init_vmcb_after_set_cpuid(vcpu);
}
......
......@@ -684,6 +684,7 @@ void __init sev_hardware_setup(void);
void sev_hardware_unsetup(void);
int sev_cpu_init(struct svm_cpu_data *sd);
void sev_init_vmcb(struct vcpu_svm *svm);
void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm);
void sev_free_vcpu(struct kvm_vcpu *vcpu);
int sev_handle_vmgexit(struct kvm_vcpu *vcpu);
int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
......
......@@ -12308,9 +12308,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
goto out;
ret = kvm_mmu_init_vm(kvm);
if (ret)
goto out_page_track;
kvm_mmu_init_vm(kvm);
ret = static_call(kvm_x86_vm_init)(kvm);
if (ret)
......@@ -12355,7 +12353,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
out_uninit_mmu:
kvm_mmu_uninit_vm(kvm);
out_page_track:
kvm_page_track_cleanup(kvm);
out:
return ret;
......
......@@ -67,6 +67,8 @@
#define ARM_SMCCC_VERSION_1_3 0x10003
#define ARM_SMCCC_1_3_SVE_HINT 0x10000
#define ARM_SMCCC_CALL_HINTS ARM_SMCCC_1_3_SVE_HINT
#define ARM_SMCCC_VERSION_FUNC_ID \
ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
......
......@@ -387,7 +387,7 @@ char *strdup_printf(const char *fmt, ...)
char *str;
va_start(ap, fmt);
vasprintf(&str, fmt, ap);
TEST_ASSERT(vasprintf(&str, fmt, ap) >= 0, "vasprintf() failed");
va_end(ap);
return str;
......
......@@ -12,19 +12,37 @@
#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK)
static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX];
bool filter_reg(__u64 reg)
{
switch (reg & ~REG_MASK) {
/*
* Some ISA extensions are optional and not present on all host,
* but they can't be disabled through ISA_EXT registers when present.
* So, to make life easy, just filtering out these kind of registers.
* Same set of ISA_EXT registers are not present on all host because
* ISA_EXT registers are visible to the KVM user space based on the
* ISA extensions available on the host. Also, disabling an ISA
* extension using corresponding ISA_EXT register does not affect
* the visibility of the ISA_EXT register itself.
*
* Based on above, we should filter-out all ISA_EXT registers.
*/
switch (reg & ~REG_MASK) {
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_A:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_C:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_D:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_F:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_H:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_I:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_M:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_V:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVNAPOT:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR:
......@@ -32,6 +50,15 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM:
return true;
/* AIA registers are always available when Ssaia can't be disabled */
case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siselect):
case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1):
case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2):
case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(sieh):
case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siph):
case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h):
case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h):
return isa_ext_cant_disable[KVM_RISCV_ISA_EXT_SSAIA];
default:
break;
}
......@@ -50,24 +77,27 @@ static inline bool vcpu_has_ext(struct kvm_vcpu *vcpu, int ext)
unsigned long value;
ret = __vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(ext), &value);
if (ret) {
printf("Failed to get ext %d", ext);
return false;
}
return !!value;
return (ret) ? false : !!value;
}
void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
{
unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 };
struct vcpu_reg_sublist *s;
int rc;
for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++)
__vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(i), &isa_ext_state[i]);
/*
* Disable all extensions which were enabled by default
* if they were available in the risc-v host.
*/
for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++)
__vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(i), 0);
for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
rc = __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(i), 0);
if (rc && isa_ext_state[i])
isa_ext_cant_disable[i] = true;
}
for_each_sublist(c, s) {
if (!s->feature)
......@@ -506,10 +536,6 @@ static __u64 base_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(time),
KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(compare),
KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_A,
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_C,
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_I,
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_M,
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01,
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME,
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment