Commit 24ae4cfa authored by Ben Gardon's avatar Ben Gardon Committed by Paolo Bonzini

KVM: x86/mmu: Allow enabling/disabling dirty logging under MMU read lock

To reduce lock contention and interference with page fault handlers,
allow the TDP MMU functions which enable and disable dirty logging
to operate under the MMU read lock.
Signed-off-by: default avatarBen Gardon <bgardon@google.com>
Message-Id: <20210401233736.638171-12-bgardon@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 2db6f772
...@@ -5532,10 +5532,14 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, ...@@ -5532,10 +5532,14 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
write_lock(&kvm->mmu_lock); write_lock(&kvm->mmu_lock);
flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect, flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
start_level, KVM_MAX_HUGEPAGE_LEVEL, false); start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
if (is_tdp_mmu_enabled(kvm))
flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level);
write_unlock(&kvm->mmu_lock); write_unlock(&kvm->mmu_lock);
if (is_tdp_mmu_enabled(kvm)) {
read_lock(&kvm->mmu_lock);
flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, start_level);
read_unlock(&kvm->mmu_lock);
}
/* /*
* We can flush all the TLBs out of the mmu lock without TLB * We can flush all the TLBs out of the mmu lock without TLB
* corruption since we just change the spte from writable to * corruption since we just change the spte from writable to
...@@ -5638,10 +5642,14 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, ...@@ -5638,10 +5642,14 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
write_lock(&kvm->mmu_lock); write_lock(&kvm->mmu_lock);
flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false); flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
if (is_tdp_mmu_enabled(kvm))
flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
write_unlock(&kvm->mmu_lock); write_unlock(&kvm->mmu_lock);
if (is_tdp_mmu_enabled(kvm)) {
read_lock(&kvm->mmu_lock);
flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
read_unlock(&kvm->mmu_lock);
}
/* /*
* It's also safe to flush TLBs out of mmu lock here as currently this * It's also safe to flush TLBs out of mmu lock here as currently this
* function is only used for dirty logging, in which case flushing TLB * function is only used for dirty logging, in which case flushing TLB
......
...@@ -495,8 +495,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, ...@@ -495,8 +495,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
} }
/* /*
* tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically and handle the * tdp_mmu_set_spte_atomic_no_dirty_log - Set a TDP MMU SPTE atomically
* associated bookkeeping * and handle the associated bookkeeping, but do not mark the page dirty
* in KVM's dirty bitmaps.
* *
* @kvm: kvm instance * @kvm: kvm instance
* @iter: a tdp_iter instance currently on the SPTE that should be set * @iter: a tdp_iter instance currently on the SPTE that should be set
...@@ -504,9 +505,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, ...@@ -504,9 +505,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
* Returns: true if the SPTE was set, false if it was not. If false is returned, * Returns: true if the SPTE was set, false if it was not. If false is returned,
* this function will have no side-effects. * this function will have no side-effects.
*/ */
static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm, static inline bool tdp_mmu_set_spte_atomic_no_dirty_log(struct kvm *kvm,
struct tdp_iter *iter, struct tdp_iter *iter,
u64 new_spte) u64 new_spte)
{ {
lockdep_assert_held_read(&kvm->mmu_lock); lockdep_assert_held_read(&kvm->mmu_lock);
...@@ -521,12 +522,25 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm, ...@@ -521,12 +522,25 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
new_spte) != iter->old_spte) new_spte) != iter->old_spte)
return false; return false;
handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte, __handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
new_spte, iter->level, true); new_spte, iter->level, true);
handle_changed_spte_acc_track(iter->old_spte, new_spte, iter->level);
return true; return true;
} }
static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
struct tdp_iter *iter,
u64 new_spte)
{
if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, iter, new_spte))
return false;
handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn,
iter->old_spte, new_spte, iter->level);
return true;
}
static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm, static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
struct tdp_iter *iter) struct tdp_iter *iter)
{ {
...@@ -1082,7 +1096,8 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, ...@@ -1082,7 +1096,8 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
for_each_tdp_pte_min_level(iter, root->spt, root->role.level, for_each_tdp_pte_min_level(iter, root->spt, root->role.level,
min_level, start, end) { min_level, start, end) {
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, false)) retry:
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
continue; continue;
if (!is_shadow_present_pte(iter.old_spte) || if (!is_shadow_present_pte(iter.old_spte) ||
...@@ -1092,7 +1107,15 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, ...@@ -1092,7 +1107,15 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
new_spte = iter.old_spte & ~PT_WRITABLE_MASK; new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, &iter,
new_spte)) {
/*
* The iter must explicitly re-read the SPTE because
* the atomic cmpxchg failed.
*/
iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
goto retry;
}
spte_set = true; spte_set = true;
} }
...@@ -1111,7 +1134,9 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot, ...@@ -1111,7 +1134,9 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot,
struct kvm_mmu_page *root; struct kvm_mmu_page *root;
bool spte_set = false; bool spte_set = false;
for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, false) lockdep_assert_held_read(&kvm->mmu_lock);
for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn, spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn,
slot->base_gfn + slot->npages, min_level); slot->base_gfn + slot->npages, min_level);
...@@ -1135,7 +1160,8 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, ...@@ -1135,7 +1160,8 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
rcu_read_lock(); rcu_read_lock();
tdp_root_for_each_leaf_pte(iter, root, start, end) { tdp_root_for_each_leaf_pte(iter, root, start, end) {
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, false)) retry:
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
continue; continue;
if (spte_ad_need_write_protect(iter.old_spte)) { if (spte_ad_need_write_protect(iter.old_spte)) {
...@@ -1150,7 +1176,15 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, ...@@ -1150,7 +1176,15 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
continue; continue;
} }
tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte); if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, &iter,
new_spte)) {
/*
* The iter must explicitly re-read the SPTE because
* the atomic cmpxchg failed.
*/
iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
goto retry;
}
spte_set = true; spte_set = true;
} }
...@@ -1170,7 +1204,9 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot) ...@@ -1170,7 +1204,9 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
struct kvm_mmu_page *root; struct kvm_mmu_page *root;
bool spte_set = false; bool spte_set = false;
for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, false) lockdep_assert_held_read(&kvm->mmu_lock);
for_each_tdp_mmu_root_yield_safe(kvm, root, slot->as_id, true)
spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn, spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn,
slot->base_gfn + slot->npages); slot->base_gfn + slot->npages);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment