Commit 65855ed8 authored by Lai Jiangshan's avatar Lai Jiangshan Committed by Paolo Bonzini

KVM: X86: Synchronize the shadow pagetable before link it

If gpte is changed from non-present to present, the guest doesn't need
to flush tlb per SDM.  So the host must synchronze sp before
link it.  Otherwise the guest might use a wrong mapping.

For example: the guest first changes a level-1 pagetable, and then
links its parent to a new place where the original gpte is non-present.
Finally the guest can access the remapped area without flushing
the tlb.  The guest's behavior should be allowed per SDM, but the host
kvm mmu makes it wrong.

Fixes: 4731d4c7 ("KVM: MMU: out of sync shadow core")
Signed-off-by: default avatarLai Jiangshan <laijs@linux.alibaba.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Message-Id: <20210918005636.3675-3-jiangshanlai@gmail.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent f8160295
...@@ -2027,8 +2027,8 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents) ...@@ -2027,8 +2027,8 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
} while (!sp->unsync_children); } while (!sp->unsync_children);
} }
static void mmu_sync_children(struct kvm_vcpu *vcpu, static int mmu_sync_children(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *parent) struct kvm_mmu_page *parent, bool can_yield)
{ {
int i; int i;
struct kvm_mmu_page *sp; struct kvm_mmu_page *sp;
...@@ -2055,12 +2055,18 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, ...@@ -2055,12 +2055,18 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
} }
if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) { if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) {
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
if (!can_yield) {
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
return -EINTR;
}
cond_resched_rwlock_write(&vcpu->kvm->mmu_lock); cond_resched_rwlock_write(&vcpu->kvm->mmu_lock);
flush = false; flush = false;
} }
} }
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
return 0;
} }
static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp) static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
...@@ -2146,9 +2152,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, ...@@ -2146,9 +2152,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
} }
if (sp->unsync_children)
kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
__clear_sp_write_flooding_count(sp); __clear_sp_write_flooding_count(sp);
trace_get_page: trace_get_page:
...@@ -3684,7 +3687,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) ...@@ -3684,7 +3687,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
write_lock(&vcpu->kvm->mmu_lock); write_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC); kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
mmu_sync_children(vcpu, sp); mmu_sync_children(vcpu, sp, true);
kvm_mmu_audit(vcpu, AUDIT_POST_SYNC); kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
write_unlock(&vcpu->kvm->mmu_lock); write_unlock(&vcpu->kvm->mmu_lock);
...@@ -3700,7 +3703,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) ...@@ -3700,7 +3703,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
if (IS_VALID_PAE_ROOT(root)) { if (IS_VALID_PAE_ROOT(root)) {
root &= PT64_BASE_ADDR_MASK; root &= PT64_BASE_ADDR_MASK;
sp = to_shadow_page(root); sp = to_shadow_page(root);
mmu_sync_children(vcpu, sp); mmu_sync_children(vcpu, sp, true);
} }
} }
......
...@@ -707,8 +707,27 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr, ...@@ -707,8 +707,27 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
if (!is_shadow_present_pte(*it.sptep)) { if (!is_shadow_present_pte(*it.sptep)) {
table_gfn = gw->table_gfn[it.level - 2]; table_gfn = gw->table_gfn[it.level - 2];
access = gw->pt_access[it.level - 2]; access = gw->pt_access[it.level - 2];
sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1, sp = kvm_mmu_get_page(vcpu, table_gfn, addr,
false, access); it.level-1, false, access);
/*
* We must synchronize the pagetable before linking it
* because the guest doesn't need to flush tlb when
* the gpte is changed from non-present to present.
* Otherwise, the guest may use the wrong mapping.
*
* For PG_LEVEL_4K, kvm_mmu_get_page() has already
* synchronized it transiently via kvm_sync_page().
*
* For higher level pagetable, we synchronize it via
* the slower mmu_sync_children(). If it needs to
* break, some progress has been made; return
* RET_PF_RETRY and retry on the next #PF.
* KVM_REQ_MMU_SYNC is not necessary but it
* expedites the process.
*/
if (sp->unsync_children &&
mmu_sync_children(vcpu, sp, false))
return RET_PF_RETRY;
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment