Commit 7c390d35 authored by Junaid Shahid's avatar Junaid Shahid Committed by Paolo Bonzini

kvm: x86: Add fast CR3 switch code path

When using shadow paging, a CR3 switch in the guest results in a VM Exit.
In the common case, that VM exit doesn't require much processing by KVM.
However, it does acquire the MMU lock, which can start showing signs of
contention under some workloads even on a 2 VCPU VM when the guest is
using KPTI. Therefore, we add a fast path that avoids acquiring the MMU
lock in the most common cases e.g. when switching back and forth between
the kernel and user mode CR3s used by KPTI with no guest page table
changes in between.

For now, this fast path is implemented only for 64-bit guests and hosts
to avoid the handling of PDPTEs, but it can be extended later to 32-bit
guests and/or hosts as well.
Signed-off-by: default avatarJunaid Shahid <junaids@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 578e1c4d
...@@ -326,6 +326,14 @@ struct rsvd_bits_validate { ...@@ -326,6 +326,14 @@ struct rsvd_bits_validate {
u64 bad_mt_xwr; u64 bad_mt_xwr;
}; };
struct kvm_mmu_root_info {
gpa_t cr3;
hpa_t hpa;
};
#define KVM_MMU_ROOT_INFO_INVALID \
((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE })
/* /*
* x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
* and 2-level 32-bit). The kvm_mmu structure abstracts the details of the * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
...@@ -354,6 +362,7 @@ struct kvm_mmu { ...@@ -354,6 +362,7 @@ struct kvm_mmu {
u8 shadow_root_level; u8 shadow_root_level;
u8 ept_ad; u8 ept_ad;
bool direct_map; bool direct_map;
struct kvm_mmu_root_info prev_root;
/* /*
* Bitmap; bit set = permission fault * Bitmap; bit set = permission fault
...@@ -1288,7 +1297,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); ...@@ -1288,7 +1297,7 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu); int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu); void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_free_roots(struct kvm_vcpu *vcpu); void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, bool free_prev_root);
gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access, gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
struct x86_exception *exception); struct x86_exception *exception);
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
...@@ -1307,7 +1316,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); ...@@ -1307,7 +1316,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code, int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u64 error_code,
void *insn, int insn_len); void *insn, int insn_len);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu); void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3);
void kvm_enable_tdp(void); void kvm_enable_tdp(void);
void kvm_disable_tdp(void); void kvm_disable_tdp(void);
......
...@@ -3405,17 +3405,22 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, ...@@ -3405,17 +3405,22 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
*root_hpa = INVALID_PAGE; *root_hpa = INVALID_PAGE;
} }
void kvm_mmu_free_roots(struct kvm_vcpu *vcpu) void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, bool free_prev_root)
{ {
int i; int i;
LIST_HEAD(invalid_list); LIST_HEAD(invalid_list);
struct kvm_mmu *mmu = &vcpu->arch.mmu; struct kvm_mmu *mmu = &vcpu->arch.mmu;
if (!VALID_PAGE(mmu->root_hpa)) if (!VALID_PAGE(mmu->root_hpa) &&
(!VALID_PAGE(mmu->prev_root.hpa) || !free_prev_root))
return; return;
spin_lock(&vcpu->kvm->mmu_lock); spin_lock(&vcpu->kvm->mmu_lock);
if (free_prev_root)
mmu_free_root_page(vcpu->kvm, &mmu->prev_root.hpa,
&invalid_list);
if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL && if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
(mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) { (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, &invalid_list); mmu_free_root_page(vcpu->kvm, &mmu->root_hpa, &invalid_list);
...@@ -4015,13 +4020,56 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu, ...@@ -4015,13 +4020,56 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu,
context->root_level = 0; context->root_level = 0;
context->shadow_root_level = PT32E_ROOT_LEVEL; context->shadow_root_level = PT32E_ROOT_LEVEL;
context->root_hpa = INVALID_PAGE; context->root_hpa = INVALID_PAGE;
context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
context->direct_map = true; context->direct_map = true;
context->nx = false; context->nx = false;
} }
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu) static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3)
{
struct kvm_mmu *mmu = &vcpu->arch.mmu;
/*
* For now, limit the fast switch to 64-bit hosts+VMs in order to avoid
* having to deal with PDPTEs. We may add support for 32-bit hosts/VMs
* later if necessary.
*/
if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
mmu->root_level >= PT64_ROOT_4LEVEL) {
gpa_t prev_cr3 = mmu->prev_root.cr3;
if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT))
return false;
swap(mmu->root_hpa, mmu->prev_root.hpa);
mmu->prev_root.cr3 = kvm_read_cr3(vcpu);
if (new_cr3 == prev_cr3 && VALID_PAGE(mmu->root_hpa)) {
/*
* It is possible that the cached previous root page is
* obsolete because of a change in the MMU
* generation number. However, that is accompanied by
* KVM_REQ_MMU_RELOAD, which will free the root that we
* have set here and allocate a new one.
*/
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
__clear_sp_write_flooding_count(
page_header(mmu->root_hpa));
mmu->set_cr3(vcpu, mmu->root_hpa);
return true;
}
}
return false;
}
void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3)
{ {
kvm_mmu_free_roots(vcpu); if (!fast_cr3_switch(vcpu, new_cr3))
kvm_mmu_free_roots(vcpu, false);
} }
static unsigned long get_cr3(struct kvm_vcpu *vcpu) static unsigned long get_cr3(struct kvm_vcpu *vcpu)
...@@ -4499,6 +4547,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu, ...@@ -4499,6 +4547,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu,
context->update_pte = paging64_update_pte; context->update_pte = paging64_update_pte;
context->shadow_root_level = level; context->shadow_root_level = level;
context->root_hpa = INVALID_PAGE; context->root_hpa = INVALID_PAGE;
context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
context->direct_map = false; context->direct_map = false;
} }
...@@ -4529,6 +4578,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu, ...@@ -4529,6 +4578,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu,
context->update_pte = paging32_update_pte; context->update_pte = paging32_update_pte;
context->shadow_root_level = PT32E_ROOT_LEVEL; context->shadow_root_level = PT32E_ROOT_LEVEL;
context->root_hpa = INVALID_PAGE; context->root_hpa = INVALID_PAGE;
context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
context->direct_map = false; context->direct_map = false;
} }
...@@ -4552,6 +4602,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) ...@@ -4552,6 +4602,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
context->update_pte = nonpaging_update_pte; context->update_pte = nonpaging_update_pte;
context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu); context->shadow_root_level = kvm_x86_ops->get_tdp_level(vcpu);
context->root_hpa = INVALID_PAGE; context->root_hpa = INVALID_PAGE;
context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
context->direct_map = true; context->direct_map = true;
context->set_cr3 = kvm_x86_ops->set_tdp_cr3; context->set_cr3 = kvm_x86_ops->set_tdp_cr3;
context->get_cr3 = get_cr3; context->get_cr3 = get_cr3;
...@@ -4634,6 +4685,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, ...@@ -4634,6 +4685,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
context->update_pte = ept_update_pte; context->update_pte = ept_update_pte;
context->root_level = PT64_ROOT_4LEVEL; context->root_level = PT64_ROOT_4LEVEL;
context->root_hpa = INVALID_PAGE; context->root_hpa = INVALID_PAGE;
context->prev_root = KVM_MMU_ROOT_INFO_INVALID;
context->direct_map = false; context->direct_map = false;
context->base_role.ad_disabled = !accessed_dirty; context->base_role.ad_disabled = !accessed_dirty;
context->base_role.guest_mode = 1; context->base_role.guest_mode = 1;
...@@ -4736,7 +4788,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load); ...@@ -4736,7 +4788,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_load);
void kvm_mmu_unload(struct kvm_vcpu *vcpu) void kvm_mmu_unload(struct kvm_vcpu *vcpu)
{ {
kvm_mmu_free_roots(vcpu); kvm_mmu_free_roots(vcpu, true);
WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa)); WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
} }
EXPORT_SYMBOL_GPL(kvm_mmu_unload); EXPORT_SYMBOL_GPL(kvm_mmu_unload);
...@@ -5116,6 +5168,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) ...@@ -5116,6 +5168,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu)
{ {
vcpu->arch.walk_mmu = &vcpu->arch.mmu; vcpu->arch.walk_mmu = &vcpu->arch.mmu;
vcpu->arch.mmu.root_hpa = INVALID_PAGE; vcpu->arch.mmu.root_hpa = INVALID_PAGE;
vcpu->arch.mmu.prev_root = KVM_MMU_ROOT_INFO_INVALID;
vcpu->arch.mmu.translate_gpa = translate_gpa; vcpu->arch.mmu.translate_gpa = translate_gpa;
vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa; vcpu->arch.nested_mmu.translate_gpa = translate_nested_gpa;
......
...@@ -867,9 +867,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) ...@@ -867,9 +867,10 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
return 1; return 1;
kvm_mmu_new_cr3(vcpu, cr3);
vcpu->arch.cr3 = cr3; vcpu->arch.cr3 = cr3;
__set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail);
kvm_mmu_new_cr3(vcpu);
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(kvm_set_cr3); EXPORT_SYMBOL_GPL(kvm_set_cr3);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment