Commit 640d9b0d authored by Xiao Guangrong's avatar Xiao Guangrong Committed by Avi Kivity

KVM: MMU: optimize to handle dirty bit

If dirty bit is not set, we can make the pte access read-only to avoid handing
dirty bit everywhere
Signed-off-by: default avatarXiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent bebb106a
...@@ -1923,7 +1923,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, ...@@ -1923,7 +1923,7 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned pte_access, int user_fault, unsigned pte_access, int user_fault,
int write_fault, int dirty, int level, int write_fault, int level,
gfn_t gfn, pfn_t pfn, bool speculative, gfn_t gfn, pfn_t pfn, bool speculative,
bool can_unsync, bool host_writable) bool can_unsync, bool host_writable)
{ {
...@@ -1938,8 +1938,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, ...@@ -1938,8 +1938,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
spte = PT_PRESENT_MASK; spte = PT_PRESENT_MASK;
if (!speculative) if (!speculative)
spte |= shadow_accessed_mask; spte |= shadow_accessed_mask;
if (!dirty)
pte_access &= ~ACC_WRITE_MASK;
if (pte_access & ACC_EXEC_MASK) if (pte_access & ACC_EXEC_MASK)
spte |= shadow_x_mask; spte |= shadow_x_mask;
else else
...@@ -2023,7 +2022,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, ...@@ -2023,7 +2022,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
unsigned pt_access, unsigned pte_access, unsigned pt_access, unsigned pte_access,
int user_fault, int write_fault, int dirty, int user_fault, int write_fault,
int *ptwrite, int level, gfn_t gfn, int *ptwrite, int level, gfn_t gfn,
pfn_t pfn, bool speculative, pfn_t pfn, bool speculative,
bool host_writable) bool host_writable)
...@@ -2059,7 +2058,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, ...@@ -2059,7 +2058,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
} }
if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault, if (set_spte(vcpu, sptep, pte_access, user_fault, write_fault,
dirty, level, gfn, pfn, speculative, true, level, gfn, pfn, speculative, true,
host_writable)) { host_writable)) {
if (write_fault) if (write_fault)
*ptwrite = 1; *ptwrite = 1;
...@@ -2129,7 +2128,7 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, ...@@ -2129,7 +2128,7 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
for (i = 0; i < ret; i++, gfn++, start++) for (i = 0; i < ret; i++, gfn++, start++)
mmu_set_spte(vcpu, start, ACC_ALL, mmu_set_spte(vcpu, start, ACC_ALL,
access, 0, 0, 1, NULL, access, 0, 0, NULL,
sp->role.level, gfn, sp->role.level, gfn,
page_to_pfn(pages[i]), true, true); page_to_pfn(pages[i]), true, true);
...@@ -2193,7 +2192,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, ...@@ -2193,7 +2192,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
unsigned pte_access = ACC_ALL; unsigned pte_access = ACC_ALL;
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access, mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, pte_access,
0, write, 1, &pt_write, 0, write, &pt_write,
level, gfn, pfn, prefault, map_writable); level, gfn, pfn, prefault, map_writable);
direct_pte_prefetch(vcpu, iterator.sptep); direct_pte_prefetch(vcpu, iterator.sptep);
++vcpu->stat.pf_fixed; ++vcpu->stat.pf_fixed;
......
...@@ -101,11 +101,15 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, ...@@ -101,11 +101,15 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
return (ret != orig_pte); return (ret != orig_pte);
} }
static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte,
bool last)
{ {
unsigned access; unsigned access;
access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
if (last && !is_dirty_gpte(gpte))
access &= ~ACC_WRITE_MASK;
#if PTTYPE == 64 #if PTTYPE == 64
if (vcpu->arch.mmu.nx) if (vcpu->arch.mmu.nx)
access &= ~(gpte >> PT64_NX_SHIFT); access &= ~(gpte >> PT64_NX_SHIFT);
...@@ -232,8 +236,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -232,8 +236,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
pte |= PT_ACCESSED_MASK; pte |= PT_ACCESSED_MASK;
} }
pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
walker->ptes[walker->level - 1] = pte; walker->ptes[walker->level - 1] = pte;
if (FNAME(is_last_gpte)(walker, vcpu, mmu, pte)) { if (FNAME(is_last_gpte)(walker, vcpu, mmu, pte)) {
...@@ -268,7 +270,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -268,7 +270,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
break; break;
} }
pt_access = pte_access; pt_access &= FNAME(gpte_access)(vcpu, pte, false);
--walker->level; --walker->level;
} }
...@@ -293,6 +295,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -293,6 +295,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
walker->ptes[walker->level - 1] = pte; walker->ptes[walker->level - 1] = pte;
} }
pte_access = pt_access & FNAME(gpte_access)(vcpu, pte, true);
walker->pt_access = pt_access; walker->pt_access = pt_access;
walker->pte_access = pte_access; walker->pte_access = pte_access;
pgprintk("%s: pte %llx pte_access %x pt_access %x\n", pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
...@@ -367,7 +370,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, ...@@ -367,7 +370,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
return; return;
pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte); pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true);
pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte)); pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
if (is_error_pfn(pfn)) { if (is_error_pfn(pfn)) {
kvm_release_pfn_clean(pfn); kvm_release_pfn_clean(pfn);
...@@ -379,7 +382,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, ...@@ -379,7 +382,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
* vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1). * vcpu->arch.update_pte.pfn was fetched from get_user_pages(write = 1).
*/ */
mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
is_dirty_gpte(gpte), NULL, PT_PAGE_TABLE_LEVEL, NULL, PT_PAGE_TABLE_LEVEL,
gpte_to_gfn(gpte), pfn, true, true); gpte_to_gfn(gpte), pfn, true, true);
} }
...@@ -430,7 +433,6 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, ...@@ -430,7 +433,6 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
unsigned pte_access; unsigned pte_access;
gfn_t gfn; gfn_t gfn;
pfn_t pfn; pfn_t pfn;
bool dirty;
if (spte == sptep) if (spte == sptep)
continue; continue;
...@@ -443,18 +445,18 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, ...@@ -443,18 +445,18 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte)) if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
continue; continue;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte,
true);
gfn = gpte_to_gfn(gpte); gfn = gpte_to_gfn(gpte);
dirty = is_dirty_gpte(gpte);
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
(pte_access & ACC_WRITE_MASK) && dirty); pte_access & ACC_WRITE_MASK);
if (is_error_pfn(pfn)) { if (is_error_pfn(pfn)) {
kvm_release_pfn_clean(pfn); kvm_release_pfn_clean(pfn);
break; break;
} }
mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
dirty, NULL, PT_PAGE_TABLE_LEVEL, gfn, NULL, PT_PAGE_TABLE_LEVEL, gfn,
pfn, true, true); pfn, true, true);
} }
} }
...@@ -470,7 +472,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -470,7 +472,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
{ {
unsigned access = gw->pt_access; unsigned access = gw->pt_access;
struct kvm_mmu_page *sp = NULL; struct kvm_mmu_page *sp = NULL;
bool dirty = is_dirty_gpte(gw->ptes[gw->level - 1]);
int top_level; int top_level;
unsigned direct_access; unsigned direct_access;
struct kvm_shadow_walk_iterator it; struct kvm_shadow_walk_iterator it;
...@@ -479,8 +480,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -479,8 +480,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
return NULL; return NULL;
direct_access = gw->pt_access & gw->pte_access; direct_access = gw->pt_access & gw->pte_access;
if (!dirty)
direct_access &= ~ACC_WRITE_MASK;
top_level = vcpu->arch.mmu.root_level; top_level = vcpu->arch.mmu.root_level;
if (top_level == PT32E_ROOT_LEVEL) if (top_level == PT32E_ROOT_LEVEL)
...@@ -539,7 +538,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, ...@@ -539,7 +538,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
} }
mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access,
user_fault, write_fault, dirty, ptwrite, it.level, user_fault, write_fault, ptwrite, it.level,
gw->gfn, pfn, prefault, map_writable); gw->gfn, pfn, prefault, map_writable);
FNAME(pte_prefetch)(vcpu, gw, it.sptep); FNAME(pte_prefetch)(vcpu, gw, it.sptep);
...@@ -622,17 +621,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, ...@@ -622,17 +621,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
return 0; return 0;
/* mmio */ /* mmio */
if (is_error_pfn(pfn)) { if (is_error_pfn(pfn))
unsigned access = walker.pte_access;
bool dirty = is_dirty_gpte(walker.ptes[walker.level - 1]);
if (!dirty)
access &= ~ACC_WRITE_MASK;
return kvm_handle_bad_page(vcpu, mmu_is_nested(vcpu) ? 0 : return kvm_handle_bad_page(vcpu, mmu_is_nested(vcpu) ? 0 :
addr, access, walker.gfn, pfn); addr, walker.pte_access, walker.gfn, pfn);
}
spin_lock(&vcpu->kvm->mmu_lock); spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu, mmu_seq)) if (mmu_notifier_retry(vcpu, mmu_seq))
goto out_unlock; goto out_unlock;
...@@ -849,11 +840,12 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) ...@@ -849,11 +840,12 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
} }
nr_present++; nr_present++;
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte,
true);
host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE; host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
is_dirty_gpte(gpte), PT_PAGE_TABLE_LEVEL, gfn, PT_PAGE_TABLE_LEVEL, gfn,
spte_to_pfn(sp->spt[i]), true, false, spte_to_pfn(sp->spt[i]), true, false,
host_writable); host_writable);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment