Commit 527d5cd7 authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini

KVM: x86/mmu: Zap only obsolete roots if a root shadow page is zapped

Zap only obsolete roots when responding to zapping a single root shadow
page.  Because KVM keeps root_count elevated when stuffing a previous
root into its PGD cache, shadowing a 64-bit guest means that zapping any
root causes all vCPUs to reload all roots, even if their current root is
not affected by the zap.

For many kernels, zapping a single root is a frequent operation, e.g. in
Linux it happens whenever an mm is dropped, e.g. process exits, etc...
Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Reviewed-by: default avatarBen Gardon <bgardon@google.com>
Message-Id: <20220225182248.3812651-5-seanjc@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 2f6f66cc
...@@ -102,6 +102,8 @@ ...@@ -102,6 +102,8 @@
#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29) #define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \ #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \
KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define CR0_RESERVED_BITS \ #define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
......
...@@ -80,6 +80,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, ...@@ -80,6 +80,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
int kvm_mmu_load(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu); void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu);
......
...@@ -2310,7 +2310,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, ...@@ -2310,7 +2310,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
struct list_head *invalid_list, struct list_head *invalid_list,
int *nr_zapped) int *nr_zapped)
{ {
bool list_unstable; bool list_unstable, zapped_root = false;
trace_kvm_mmu_prepare_zap_page(sp); trace_kvm_mmu_prepare_zap_page(sp);
++kvm->stat.mmu_shadow_zapped; ++kvm->stat.mmu_shadow_zapped;
...@@ -2352,14 +2352,20 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, ...@@ -2352,14 +2352,20 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
* in kvm_mmu_zap_all_fast(). Note, is_obsolete_sp() also * in kvm_mmu_zap_all_fast(). Note, is_obsolete_sp() also
* treats invalid shadow pages as being obsolete. * treats invalid shadow pages as being obsolete.
*/ */
if (!is_obsolete_sp(kvm, sp)) zapped_root = !is_obsolete_sp(kvm, sp);
kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
} }
if (sp->lpage_disallowed) if (sp->lpage_disallowed)
unaccount_huge_nx_page(kvm, sp); unaccount_huge_nx_page(kvm, sp);
sp->role.invalid = 1; sp->role.invalid = 1;
/*
* Make the request to free obsolete roots after marking the root
* invalid, otherwise other vCPUs may not see it as invalid.
*/
if (zapped_root)
kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_FREE_OBSOLETE_ROOTS);
return list_unstable; return list_unstable;
} }
...@@ -3947,7 +3953,7 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu, ...@@ -3947,7 +3953,7 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
* previous root, then __kvm_mmu_prepare_zap_page() signals all vCPUs * previous root, then __kvm_mmu_prepare_zap_page() signals all vCPUs
* to reload even if no vCPU is actively using the root. * to reload even if no vCPU is actively using the root.
*/ */
if (!sp && kvm_test_request(KVM_REQ_MMU_RELOAD, vcpu)) if (!sp && kvm_test_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
return true; return true;
return fault->slot && return fault->slot &&
...@@ -4180,8 +4186,8 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd) ...@@ -4180,8 +4186,8 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd)
/* /*
* It's possible that the cached previous root page is obsolete because * It's possible that the cached previous root page is obsolete because
* of a change in the MMU generation number. However, changing the * of a change in the MMU generation number. However, changing the
* generation number is accompanied by KVM_REQ_MMU_RELOAD, which will * generation number is accompanied by KVM_REQ_MMU_FREE_OBSOLETE_ROOTS,
* free the root set here and allocate a new one. * which will free the root set here and allocate a new one.
*/ */
kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu); kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu);
...@@ -5085,6 +5091,51 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu) ...@@ -5085,6 +5091,51 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu)
vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
} }
static bool is_obsolete_root(struct kvm *kvm, hpa_t root_hpa)
{
struct kvm_mmu_page *sp;
if (!VALID_PAGE(root_hpa))
return false;
/*
* When freeing obsolete roots, treat roots as obsolete if they don't
* have an associated shadow page. This does mean KVM will get false
* positives and free roots that don't strictly need to be freed, but
* such false positives are relatively rare:
*
* (a) only PAE paging and nested NPT has roots without shadow pages
* (b) remote reloads due to a memslot update obsoletes _all_ roots
* (c) KVM doesn't track previous roots for PAE paging, and the guest
* is unlikely to zap an in-use PGD.
*/
sp = to_shadow_page(root_hpa);
return !sp || is_obsolete_sp(kvm, sp);
}
static void __kvm_mmu_free_obsolete_roots(struct kvm *kvm, struct kvm_mmu *mmu)
{
unsigned long roots_to_free = 0;
int i;
if (is_obsolete_root(kvm, mmu->root.hpa))
roots_to_free |= KVM_MMU_ROOT_CURRENT;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
if (is_obsolete_root(kvm, mmu->root.hpa))
roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
}
if (roots_to_free)
kvm_mmu_free_roots(kvm, mmu, roots_to_free);
}
void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu)
{
__kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.root_mmu);
__kvm_mmu_free_obsolete_roots(vcpu->kvm, &vcpu->arch.guest_mmu);
}
static bool need_remote_flush(u64 old, u64 new) static bool need_remote_flush(u64 old, u64 new)
{ {
if (!is_shadow_present_pte(old)) if (!is_shadow_present_pte(old))
...@@ -5656,7 +5707,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm) ...@@ -5656,7 +5707,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
* Note: we need to do this under the protection of mmu_lock, * Note: we need to do this under the protection of mmu_lock,
* otherwise, vcpu would purge shadow page but miss tlb flush. * otherwise, vcpu would purge shadow page but miss tlb flush.
*/ */
kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_FREE_OBSOLETE_ROOTS);
kvm_zap_obsolete_pages(kvm); kvm_zap_obsolete_pages(kvm);
......
...@@ -9866,8 +9866,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) ...@@ -9866,8 +9866,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out; goto out;
} }
} }
if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) if (kvm_check_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
kvm_mmu_unload(vcpu); kvm_mmu_free_obsolete_roots(vcpu);
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
__kvm_migrate_timers(vcpu); __kvm_migrate_timers(vcpu);
if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu)) if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment