Commit b6e16ae5 authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini

KVM: x86/mmu: Don't set dirty bits when disabling dirty logging w/ PML

Stop setting dirty bits for MMU pages when dirty logging is disabled for
a memslot, as PML is now completely disabled when there are no memslots
with dirty logging enabled.

This means that spurious PML entries will be created for memslots with
dirty logging disabled if at least one other memslot has dirty logging
enabled.  However, spurious PML entries are already possible since
dirty bits are set only when a dirty logging is turned off, i.e. memslots
that are never dirty logged will have dirty bits cleared.

In the end, it's faster overall to eat a few spurious PML entries in the
window where dirty logging is being disabled across all memslots.
Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Message-Id: <20210213005015.1651772-13-seanjc@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent a85863c2
...@@ -1421,8 +1421,6 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, ...@@ -1421,8 +1421,6 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
struct kvm_memory_slot *memslot); struct kvm_memory_slot *memslot);
void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
struct kvm_memory_slot *memslot); struct kvm_memory_slot *memslot);
void kvm_mmu_slot_set_dirty(struct kvm *kvm,
struct kvm_memory_slot *memslot);
void kvm_mmu_zap_all(struct kvm *kvm); void kvm_mmu_zap_all(struct kvm *kvm);
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm); unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
......
...@@ -1181,36 +1181,6 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head, ...@@ -1181,36 +1181,6 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
return flush; return flush;
} }
static bool spte_set_dirty(u64 *sptep)
{
u64 spte = *sptep;
rmap_printk("spte %p %llx\n", sptep, *sptep);
/*
* Similar to the !kvm_x86_ops.slot_disable_log_dirty case,
* do not bother adding back write access to pages marked
* SPTE_AD_WRPROT_ONLY_MASK.
*/
spte |= shadow_dirty_mask;
return mmu_spte_update(sptep, spte);
}
static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot)
{
u64 *sptep;
struct rmap_iterator iter;
bool flush = false;
for_each_rmap_spte(rmap_head, &iter, sptep)
if (spte_ad_enabled(*sptep))
flush |= spte_set_dirty(sptep);
return flush;
}
/** /**
* kvm_mmu_write_protect_pt_masked - write protect selected PT level pages * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages
* @kvm: kvm instance * @kvm: kvm instance
...@@ -5630,21 +5600,6 @@ void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, ...@@ -5630,21 +5600,6 @@ void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
} }
void kvm_mmu_slot_set_dirty(struct kvm *kvm,
struct kvm_memory_slot *memslot)
{
bool flush;
write_lock(&kvm->mmu_lock);
flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false);
if (is_tdp_mmu_enabled(kvm))
flush |= kvm_tdp_mmu_slot_set_dirty(kvm, memslot);
write_unlock(&kvm->mmu_lock);
if (flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
}
void kvm_mmu_zap_all(struct kvm *kvm) void kvm_mmu_zap_all(struct kvm *kvm)
{ {
struct kvm_mmu_page *sp, *node; struct kvm_mmu_page *sp, *node;
......
...@@ -1268,60 +1268,6 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm, ...@@ -1268,60 +1268,6 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
} }
} }
/*
* Set the dirty status of all the SPTEs mapping GFNs in the memslot. This is
* only used for PML, and so will involve setting the dirty bit on each SPTE.
* Returns true if an SPTE has been changed and the TLBs need to be flushed.
*/
static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t start, gfn_t end)
{
struct tdp_iter iter;
u64 new_spte;
bool spte_set = false;
rcu_read_lock();
tdp_root_for_each_pte(iter, root, start, end) {
if (tdp_mmu_iter_cond_resched(kvm, &iter, false))
continue;
if (!is_shadow_present_pte(iter.old_spte) ||
iter.old_spte & shadow_dirty_mask)
continue;
new_spte = iter.old_spte | shadow_dirty_mask;
tdp_mmu_set_spte(kvm, &iter, new_spte);
spte_set = true;
}
rcu_read_unlock();
return spte_set;
}
/*
* Set the dirty status of all the SPTEs mapping GFNs in the memslot. This is
* only used for PML, and so will involve setting the dirty bit on each SPTE.
* Returns true if an SPTE has been changed and the TLBs need to be flushed.
*/
bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot)
{
struct kvm_mmu_page *root;
int root_as_id;
bool spte_set = false;
for_each_tdp_mmu_root_yield_safe(kvm, root) {
root_as_id = kvm_mmu_page_as_id(root);
if (root_as_id != slot->as_id)
continue;
spte_set |= set_dirty_gfn_range(kvm, root, slot->base_gfn,
slot->base_gfn + slot->npages);
}
return spte_set;
}
/* /*
* Clear leaf entries which could be replaced by large mappings, for * Clear leaf entries which could be replaced by large mappings, for
* GFNs within the slot. * GFNs within the slot.
......
...@@ -33,7 +33,6 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm, ...@@ -33,7 +33,6 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot, struct kvm_memory_slot *slot,
gfn_t gfn, unsigned long mask, gfn_t gfn, unsigned long mask,
bool wrprot); bool wrprot);
bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot);
void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm, void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
struct kvm_memory_slot *slot); struct kvm_memory_slot *slot);
......
...@@ -10789,7 +10789,18 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, ...@@ -10789,7 +10789,18 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
/* /*
* Nothing more to do for RO slots (which can't be dirtied and can't be * Nothing more to do for RO slots (which can't be dirtied and can't be
* made writable) or CREATE/MOVE/DELETE of a slot. See comments below. * made writable) or CREATE/MOVE/DELETE of a slot.
*
* For a memslot with dirty logging disabled:
* CREATE: No dirty mappings will already exist.
* MOVE/DELETE: The old mappings will already have been cleaned up by
* kvm_arch_flush_shadow_memslot()
*
* For a memslot with dirty logging enabled:
* CREATE: No shadow pages exist, thus nothing to write-protect
* and no dirty bits to clear.
* MOVE/DELETE: The old mappings will already have been cleaned up by
* kvm_arch_flush_shadow_memslot().
*/ */
if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY)) if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY))
return; return;
...@@ -10802,55 +10813,31 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, ...@@ -10802,55 +10813,31 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
if (WARN_ON_ONCE(!((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES))) if (WARN_ON_ONCE(!((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES)))
return; return;
/* if (!log_dirty_pages) {
* Dirty logging tracks sptes in 4k granularity, meaning that large /*
* sptes have to be split. If live migration is successful, the guest * Dirty logging tracks sptes in 4k granularity, meaning that
* in the source machine will be destroyed and large sptes will be * large sptes have to be split. If live migration succeeds,
* created in the destination. However, if the guest continues to run * the guest in the source machine will be destroyed and large
* in the source machine (for example if live migration fails), small * sptes will be created in the destination. However, if the
* sptes will remain around and cause bad performance. * guest continues to run in the source machine (for example if
* * live migration fails), small sptes will remain around and
* Scan sptes if dirty logging has been stopped, dropping those * cause bad performance.
* which can be collapsed into a single large-page spte. Later *
* page faults will create the large-page sptes. * Scan sptes if dirty logging has been stopped, dropping those
* * which can be collapsed into a single large-page spte. Later
* There is no need to do this in any of the following cases: * page faults will create the large-page sptes.
* CREATE: No dirty mappings will already exist. */
* MOVE/DELETE: The old mappings will already have been cleaned up by
* kvm_arch_flush_shadow_memslot()
*/
if (!log_dirty_pages)
kvm_mmu_zap_collapsible_sptes(kvm, new); kvm_mmu_zap_collapsible_sptes(kvm, new);
} else {
/* /*
* Enable or disable dirty logging for the slot. * Large sptes are write-protected so they can be split on first
* * write. New large sptes cannot be created for this slot until
* For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of the old * the end of the logging. See the comments in fast_page_fault().
* slot have been zapped so no dirty logging updates are needed for *
* the old slot. * For small sptes, nothing is done if the dirty log is in the
* For KVM_MR_CREATE and KVM_MR_MOVE, once the new slot is visible * initial-all-set state. Otherwise, depending on whether pml
* any mappings that might be created in it will consume the * is enabled the D-bit or the W-bit will be cleared.
* properties of the new slot and do not need to be updated here. */
*
* When PML is enabled, the kvm_x86_ops dirty logging hooks are
* called to enable/disable dirty logging.
*
* When disabling dirty logging with PML enabled, the D-bit is set
* for sptes in the slot in order to prevent unnecessary GPA
* logging in the PML buffer (and potential PML buffer full VMEXIT).
* This guarantees leaving PML enabled for the guest's lifetime
* won't have any additional overhead from PML when the guest is
* running with dirty logging disabled.
*
* When enabling dirty logging, large sptes are write-protected
* so they can be split on first write. New large sptes cannot
* be created for this slot until the end of the logging.
* See the comments in fast_page_fault().
* For small sptes, nothing is done if the dirty log is in the
* initial-all-set state. Otherwise, depending on whether pml
* is enabled the D-bit or the W-bit will be cleared.
*/
if (log_dirty_pages) {
if (kvm_x86_ops.cpu_dirty_log_size) { if (kvm_x86_ops.cpu_dirty_log_size) {
if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) if (!kvm_dirty_log_manual_protect_and_init_set(kvm))
kvm_mmu_slot_leaf_clear_dirty(kvm, new); kvm_mmu_slot_leaf_clear_dirty(kvm, new);
...@@ -10870,8 +10857,6 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, ...@@ -10870,8 +10857,6 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
*/ */
kvm_mmu_slot_remove_write_access(kvm, new, level); kvm_mmu_slot_remove_write_access(kvm, new, level);
} }
} else if (kvm_x86_ops.cpu_dirty_log_size) {
kvm_mmu_slot_set_dirty(kvm, new);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment