Commit 852e3c19 authored by Joerg Roedel's avatar Joerg Roedel Committed by Avi Kivity

KVM: MMU: make direct mapping paths aware of mapping levels

Signed-off-by: default avatarJoerg Roedel <joerg.roedel@amd.com>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent d25797b2
...@@ -315,7 +315,7 @@ struct kvm_vcpu_arch { ...@@ -315,7 +315,7 @@ struct kvm_vcpu_arch {
struct { struct {
gfn_t gfn; /* presumed gfn during guest pte update */ gfn_t gfn; /* presumed gfn during guest pte update */
pfn_t pfn; /* pfn corresponding to that gfn */ pfn_t pfn; /* pfn corresponding to that gfn */
int largepage; int level;
unsigned long mmu_seq; unsigned long mmu_seq;
} update_pte; } update_pte;
......
...@@ -257,7 +257,7 @@ static int is_last_spte(u64 pte, int level) ...@@ -257,7 +257,7 @@ static int is_last_spte(u64 pte, int level)
{ {
if (level == PT_PAGE_TABLE_LEVEL) if (level == PT_PAGE_TABLE_LEVEL)
return 1; return 1;
if (level == PT_DIRECTORY_LEVEL && is_large_pte(pte)) if (is_large_pte(pte))
return 1; return 1;
return 0; return 0;
} }
...@@ -753,7 +753,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) ...@@ -753,7 +753,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp)
static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
int (*handler)(struct kvm *kvm, unsigned long *rmapp)) int (*handler)(struct kvm *kvm, unsigned long *rmapp))
{ {
int i; int i, j;
int retval = 0; int retval = 0;
/* /*
...@@ -772,11 +772,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, ...@@ -772,11 +772,15 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
end = start + (memslot->npages << PAGE_SHIFT); end = start + (memslot->npages << PAGE_SHIFT);
if (hva >= start && hva < end) { if (hva >= start && hva < end) {
gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
int idx = gfn_offset /
KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL);
retval |= handler(kvm, &memslot->rmap[gfn_offset]); retval |= handler(kvm, &memslot->rmap[gfn_offset]);
retval |= handler(kvm,
&memslot->lpage_info[0][idx].rmap_pde); for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
int idx = gfn_offset;
idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
retval |= handler(kvm,
&memslot->lpage_info[j][idx].rmap_pde);
}
} }
} }
...@@ -814,12 +818,15 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) ...@@ -814,12 +818,15 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
#define RMAP_RECYCLE_THRESHOLD 1000 #define RMAP_RECYCLE_THRESHOLD 1000
static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage) static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
{ {
unsigned long *rmapp; unsigned long *rmapp;
struct kvm_mmu_page *sp;
sp = page_header(__pa(spte));
gfn = unalias_gfn(vcpu->kvm, gfn); gfn = unalias_gfn(vcpu->kvm, gfn);
rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage); rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
kvm_unmap_rmapp(vcpu->kvm, rmapp); kvm_unmap_rmapp(vcpu->kvm, rmapp);
kvm_flush_remote_tlbs(vcpu->kvm); kvm_flush_remote_tlbs(vcpu->kvm);
...@@ -1734,7 +1741,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, ...@@ -1734,7 +1741,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned pte_access, int user_fault, unsigned pte_access, int user_fault,
int write_fault, int dirty, int largepage, int write_fault, int dirty, int level,
gfn_t gfn, pfn_t pfn, bool speculative, gfn_t gfn, pfn_t pfn, bool speculative,
bool can_unsync) bool can_unsync)
{ {
...@@ -1757,7 +1764,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, ...@@ -1757,7 +1764,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
spte |= shadow_nx_mask; spte |= shadow_nx_mask;
if (pte_access & ACC_USER_MASK) if (pte_access & ACC_USER_MASK)
spte |= shadow_user_mask; spte |= shadow_user_mask;
if (largepage) if (level > PT_PAGE_TABLE_LEVEL)
spte |= PT_PAGE_SIZE_MASK; spte |= PT_PAGE_SIZE_MASK;
if (tdp_enabled) if (tdp_enabled)
spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
...@@ -1768,7 +1775,8 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, ...@@ -1768,7 +1775,8 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
if ((pte_access & ACC_WRITE_MASK) if ((pte_access & ACC_WRITE_MASK)
|| (write_fault && !is_write_protection(vcpu) && !user_fault)) { || (write_fault && !is_write_protection(vcpu) && !user_fault)) {
if (largepage && has_wrprotected_page(vcpu->kvm, gfn, 1)) { if (level > PT_PAGE_TABLE_LEVEL &&
has_wrprotected_page(vcpu->kvm, gfn, level)) {
ret = 1; ret = 1;
spte = shadow_trap_nonpresent_pte; spte = shadow_trap_nonpresent_pte;
goto set_pte; goto set_pte;
...@@ -1806,7 +1814,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, ...@@ -1806,7 +1814,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned pt_access, unsigned pte_access, unsigned pt_access, unsigned pte_access,
int user_fault, int write_fault, int dirty, int user_fault, int write_fault, int dirty,
int *ptwrite, int largepage, gfn_t gfn, int *ptwrite, int level, gfn_t gfn,
pfn_t pfn, bool speculative) pfn_t pfn, bool speculative)
{ {
int was_rmapped = 0; int was_rmapped = 0;
...@@ -1823,7 +1831,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, ...@@ -1823,7 +1831,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
* If we overwrite a PTE page pointer with a 2MB PMD, unlink * If we overwrite a PTE page pointer with a 2MB PMD, unlink
* the parent of the now unreachable PTE. * the parent of the now unreachable PTE.
*/ */
if (largepage && !is_large_pte(*sptep)) { if (level > PT_PAGE_TABLE_LEVEL &&
!is_large_pte(*sptep)) {
struct kvm_mmu_page *child; struct kvm_mmu_page *child;
u64 pte = *sptep; u64 pte = *sptep;
...@@ -1836,8 +1845,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, ...@@ -1836,8 +1845,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
} else } else
was_rmapped = 1; was_rmapped = 1;
} }
if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
dirty, largepage, gfn, pfn, speculative, true)) { dirty, level, gfn, pfn, speculative, true)) {
if (write_fault) if (write_fault)
*ptwrite = 1; *ptwrite = 1;
kvm_x86_ops->tlb_flush(vcpu); kvm_x86_ops->tlb_flush(vcpu);
...@@ -1857,7 +1867,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, ...@@ -1857,7 +1867,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
if (!is_rmap_spte(*sptep)) if (!is_rmap_spte(*sptep))
kvm_release_pfn_clean(pfn); kvm_release_pfn_clean(pfn);
if (rmap_count > RMAP_RECYCLE_THRESHOLD) if (rmap_count > RMAP_RECYCLE_THRESHOLD)
rmap_recycle(vcpu, gfn, largepage); rmap_recycle(vcpu, sptep, gfn);
} else { } else {
if (was_writeble) if (was_writeble)
kvm_release_pfn_dirty(pfn); kvm_release_pfn_dirty(pfn);
...@@ -1875,7 +1885,7 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) ...@@ -1875,7 +1885,7 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
} }
static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
int largepage, gfn_t gfn, pfn_t pfn) int level, gfn_t gfn, pfn_t pfn)
{ {
struct kvm_shadow_walk_iterator iterator; struct kvm_shadow_walk_iterator iterator;
struct kvm_mmu_page *sp; struct kvm_mmu_page *sp;
...@@ -1883,11 +1893,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, ...@@ -1883,11 +1893,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
gfn_t pseudo_gfn; gfn_t pseudo_gfn;
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
if (iterator.level == PT_PAGE_TABLE_LEVEL if (iterator.level == level) {
|| (largepage && iterator.level == PT_DIRECTORY_LEVEL)) {
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
0, write, 1, &pt_write, 0, write, 1, &pt_write,
largepage, gfn, pfn, false); level, gfn, pfn, false);
++vcpu->stat.pf_fixed; ++vcpu->stat.pf_fixed;
break; break;
} }
...@@ -1915,14 +1924,20 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, ...@@ -1915,14 +1924,20 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
{ {
int r; int r;
int largepage = 0; int level;
pfn_t pfn; pfn_t pfn;
unsigned long mmu_seq; unsigned long mmu_seq;
if (mapping_level(vcpu, gfn) == PT_DIRECTORY_LEVEL) { level = mapping_level(vcpu, gfn);
gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);
largepage = 1; /*
} * This path builds a PAE pagetable - so we can map 2mb pages at
* maximum. Therefore check if the level is larger than that.
*/
if (level > PT_DIRECTORY_LEVEL)
level = PT_DIRECTORY_LEVEL;
gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
mmu_seq = vcpu->kvm->mmu_notifier_seq; mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb(); smp_rmb();
...@@ -1938,7 +1953,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) ...@@ -1938,7 +1953,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn)
if (mmu_notifier_retry(vcpu, mmu_seq)) if (mmu_notifier_retry(vcpu, mmu_seq))
goto out_unlock; goto out_unlock;
kvm_mmu_free_some_pages(vcpu); kvm_mmu_free_some_pages(vcpu);
r = __direct_map(vcpu, v, write, largepage, gfn, pfn); r = __direct_map(vcpu, v, write, level, gfn, pfn);
spin_unlock(&vcpu->kvm->mmu_lock); spin_unlock(&vcpu->kvm->mmu_lock);
...@@ -2114,7 +2129,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, ...@@ -2114,7 +2129,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
{ {
pfn_t pfn; pfn_t pfn;
int r; int r;
int largepage = 0; int level;
gfn_t gfn = gpa >> PAGE_SHIFT; gfn_t gfn = gpa >> PAGE_SHIFT;
unsigned long mmu_seq; unsigned long mmu_seq;
...@@ -2125,10 +2140,10 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, ...@@ -2125,10 +2140,10 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
if (r) if (r)
return r; return r;
if (mapping_level(vcpu, gfn) == PT_DIRECTORY_LEVEL) { level = mapping_level(vcpu, gfn);
gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);
largepage = 1; gfn &= ~(KVM_PAGES_PER_HPAGE(level) - 1);
}
mmu_seq = vcpu->kvm->mmu_notifier_seq; mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb(); smp_rmb();
pfn = gfn_to_pfn(vcpu->kvm, gfn); pfn = gfn_to_pfn(vcpu->kvm, gfn);
...@@ -2141,7 +2156,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, ...@@ -2141,7 +2156,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
goto out_unlock; goto out_unlock;
kvm_mmu_free_some_pages(vcpu); kvm_mmu_free_some_pages(vcpu);
r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
largepage, gfn, pfn); level, gfn, pfn);
spin_unlock(&vcpu->kvm->mmu_lock); spin_unlock(&vcpu->kvm->mmu_lock);
return r; return r;
...@@ -2448,7 +2463,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, ...@@ -2448,7 +2463,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
const void *new) const void *new)
{ {
if (sp->role.level != PT_PAGE_TABLE_LEVEL) { if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
if (!vcpu->arch.update_pte.largepage || if (vcpu->arch.update_pte.level == PT_PAGE_TABLE_LEVEL ||
sp->role.glevels == PT32_ROOT_LEVEL) { sp->role.glevels == PT32_ROOT_LEVEL) {
++vcpu->kvm->stat.mmu_pde_zapped; ++vcpu->kvm->stat.mmu_pde_zapped;
return; return;
...@@ -2498,7 +2513,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -2498,7 +2513,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
u64 gpte = 0; u64 gpte = 0;
pfn_t pfn; pfn_t pfn;
vcpu->arch.update_pte.largepage = 0; vcpu->arch.update_pte.level = PT_PAGE_TABLE_LEVEL;
if (bytes != 4 && bytes != 8) if (bytes != 4 && bytes != 8)
return; return;
...@@ -2530,7 +2545,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -2530,7 +2545,7 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
if (is_large_pte(gpte) && if (is_large_pte(gpte) &&
(mapping_level(vcpu, gfn) == PT_DIRECTORY_LEVEL)) { (mapping_level(vcpu, gfn) == PT_DIRECTORY_LEVEL)) {
gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1); gfn &= ~(KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL) - 1);
vcpu->arch.update_pte.largepage = 1; vcpu->arch.update_pte.level = PT_DIRECTORY_LEVEL;
} }
vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb(); smp_rmb();
......
...@@ -253,7 +253,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, ...@@ -253,7 +253,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
pt_element_t gpte; pt_element_t gpte;
unsigned pte_access; unsigned pte_access;
pfn_t pfn; pfn_t pfn;
int largepage = vcpu->arch.update_pte.largepage; int level = vcpu->arch.update_pte.level;
gpte = *(const pt_element_t *)pte; gpte = *(const pt_element_t *)pte;
if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) { if (~gpte & (PT_PRESENT_MASK | PT_ACCESSED_MASK)) {
...@@ -272,7 +272,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, ...@@ -272,7 +272,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page,
return; return;
kvm_get_pfn(pfn); kvm_get_pfn(pfn);
mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0,
gpte & PT_DIRTY_MASK, NULL, largepage, gpte & PT_DIRTY_MASK, NULL, level,
gpte_to_gfn(gpte), pfn, true); gpte_to_gfn(gpte), pfn, true);
} }
...@@ -306,7 +306,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -306,7 +306,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
gw->pte_access & access, gw->pte_access & access,
user_fault, write_fault, user_fault, write_fault,
gw->ptes[gw->level-1] & PT_DIRTY_MASK, gw->ptes[gw->level-1] & PT_DIRTY_MASK,
ptwrite, largepage, ptwrite, level,
gw->gfn, pfn, false); gw->gfn, pfn, false);
break; break;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment