Commit 34b69ede authored by Paolo Bonzini's avatar Paolo Bonzini

Merge tag 'kvm-x86-mmu-6.11' of https://github.com/kvm-x86/linux into HEAD

KVM x86 MMU changes for 6.11

 - Don't allocate kvm_mmu_page.shadowed_translation for shadow pages that can't
   hold leafs SPTEs.

 - Unconditionally drop mmu_lock when allocating TDP MMU page tables for eager
   page splitting to avoid stalling vCPUs when splitting huge pages.

 - Misc cleanups
parents 5dcc1e76 0089c055
...@@ -722,7 +722,7 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) ...@@ -722,7 +722,7 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
if (sp->role.passthrough) if (sp->role.passthrough)
return sp->gfn; return sp->gfn;
if (!sp->role.direct) if (sp->shadowed_translation)
return sp->shadowed_translation[index] >> PAGE_SHIFT; return sp->shadowed_translation[index] >> PAGE_SHIFT;
return sp->gfn + (index << ((sp->role.level - 1) * SPTE_LEVEL_BITS)); return sp->gfn + (index << ((sp->role.level - 1) * SPTE_LEVEL_BITS));
...@@ -736,7 +736,7 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index) ...@@ -736,7 +736,7 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
*/ */
static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index) static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index)
{ {
if (sp_has_gptes(sp)) if (sp->shadowed_translation)
return sp->shadowed_translation[index] & ACC_ALL; return sp->shadowed_translation[index] & ACC_ALL;
/* /*
...@@ -757,7 +757,7 @@ static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index) ...@@ -757,7 +757,7 @@ static u32 kvm_mmu_page_get_access(struct kvm_mmu_page *sp, int index)
static void kvm_mmu_page_set_translation(struct kvm_mmu_page *sp, int index, static void kvm_mmu_page_set_translation(struct kvm_mmu_page *sp, int index,
gfn_t gfn, unsigned int access) gfn_t gfn, unsigned int access)
{ {
if (sp_has_gptes(sp)) { if (sp->shadowed_translation) {
sp->shadowed_translation[index] = (gfn << PAGE_SHIFT) | access; sp->shadowed_translation[index] = (gfn << PAGE_SHIFT) | access;
return; return;
} }
...@@ -1700,8 +1700,7 @@ static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp) ...@@ -1700,8 +1700,7 @@ static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp)
hlist_del(&sp->hash_link); hlist_del(&sp->hash_link);
list_del(&sp->link); list_del(&sp->link);
free_page((unsigned long)sp->spt); free_page((unsigned long)sp->spt);
if (!sp->role.direct) free_page((unsigned long)sp->shadowed_translation);
free_page((unsigned long)sp->shadowed_translation);
kmem_cache_free(mmu_page_header_cache, sp); kmem_cache_free(mmu_page_header_cache, sp);
} }
...@@ -2203,7 +2202,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm, ...@@ -2203,7 +2202,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm,
sp = kvm_mmu_memory_cache_alloc(caches->page_header_cache); sp = kvm_mmu_memory_cache_alloc(caches->page_header_cache);
sp->spt = kvm_mmu_memory_cache_alloc(caches->shadow_page_cache); sp->spt = kvm_mmu_memory_cache_alloc(caches->shadow_page_cache);
if (!role.direct) if (!role.direct && role.level <= KVM_MAX_HUGEPAGE_LEVEL)
sp->shadowed_translation = kvm_mmu_memory_cache_alloc(caches->shadowed_info_cache); sp->shadowed_translation = kvm_mmu_memory_cache_alloc(caches->shadowed_info_cache);
set_page_private(virt_to_page(sp->spt), (unsigned long)sp); set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
...@@ -4609,7 +4608,10 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, ...@@ -4609,7 +4608,10 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
if (WARN_ON_ONCE(error_code >> 32)) if (WARN_ON_ONCE(error_code >> 32))
error_code = lower_32_bits(error_code); error_code = lower_32_bits(error_code);
/* Ensure the above sanity check also covers KVM-defined flags. */ /*
* Restrict KVM-defined flags to bits 63:32 so that it's impossible for
* them to conflict with #PF error codes, which are limited to 32 bits.
*/
BUILD_BUG_ON(lower_32_bits(PFERR_SYNTHETIC_MASK)); BUILD_BUG_ON(lower_32_bits(PFERR_SYNTHETIC_MASK));
vcpu->arch.l1tf_flush_l1d = true; vcpu->arch.l1tf_flush_l1d = true;
...@@ -7049,7 +7051,6 @@ static unsigned long mmu_shrink_scan(struct shrinker *shrink, ...@@ -7049,7 +7051,6 @@ static unsigned long mmu_shrink_scan(struct shrinker *shrink,
list_for_each_entry(kvm, &vm_list, vm_list) { list_for_each_entry(kvm, &vm_list, vm_list) {
int idx; int idx;
LIST_HEAD(invalid_list);
/* /*
* Never scan more than sc->nr_to_scan VM instances. * Never scan more than sc->nr_to_scan VM instances.
......
...@@ -911,7 +911,8 @@ static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int ...@@ -911,7 +911,8 @@ static int FNAME(sync_spte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, int
gpa_t pte_gpa; gpa_t pte_gpa;
gfn_t gfn; gfn_t gfn;
if (WARN_ON_ONCE(sp->spt[i] == SHADOW_NONPRESENT_VALUE)) if (WARN_ON_ONCE(sp->spt[i] == SHADOW_NONPRESENT_VALUE ||
!sp->shadowed_translation))
return 0; return 0;
first_pte_gpa = FNAME(get_level1_sp_gpa)(sp); first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
......
...@@ -1340,17 +1340,15 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, ...@@ -1340,17 +1340,15 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm,
return spte_set; return spte_set;
} }
static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp) static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(void)
{ {
struct kvm_mmu_page *sp; struct kvm_mmu_page *sp;
gfp |= __GFP_ZERO; sp = kmem_cache_zalloc(mmu_page_header_cache, GFP_KERNEL_ACCOUNT);
sp = kmem_cache_alloc(mmu_page_header_cache, gfp);
if (!sp) if (!sp)
return NULL; return NULL;
sp->spt = (void *)__get_free_page(gfp); sp->spt = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
if (!sp->spt) { if (!sp->spt) {
kmem_cache_free(mmu_page_header_cache, sp); kmem_cache_free(mmu_page_header_cache, sp);
return NULL; return NULL;
...@@ -1359,47 +1357,6 @@ static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp) ...@@ -1359,47 +1357,6 @@ static struct kvm_mmu_page *__tdp_mmu_alloc_sp_for_split(gfp_t gfp)
return sp; return sp;
} }
static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm,
struct tdp_iter *iter,
bool shared)
{
struct kvm_mmu_page *sp;
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
/*
* Since we are allocating while under the MMU lock we have to be
* careful about GFP flags. Use GFP_NOWAIT to avoid blocking on direct
* reclaim and to avoid making any filesystem callbacks (which can end
* up invoking KVM MMU notifiers, resulting in a deadlock).
*
* If this allocation fails we drop the lock and retry with reclaim
* allowed.
*/
sp = __tdp_mmu_alloc_sp_for_split(GFP_NOWAIT | __GFP_ACCOUNT);
if (sp)
return sp;
rcu_read_unlock();
if (shared)
read_unlock(&kvm->mmu_lock);
else
write_unlock(&kvm->mmu_lock);
iter->yielded = true;
sp = __tdp_mmu_alloc_sp_for_split(GFP_KERNEL_ACCOUNT);
if (shared)
read_lock(&kvm->mmu_lock);
else
write_lock(&kvm->mmu_lock);
rcu_read_lock();
return sp;
}
/* Note, the caller is responsible for initializing @sp. */ /* Note, the caller is responsible for initializing @sp. */
static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter,
struct kvm_mmu_page *sp, bool shared) struct kvm_mmu_page *sp, bool shared)
...@@ -1446,7 +1403,6 @@ static int tdp_mmu_split_huge_pages_root(struct kvm *kvm, ...@@ -1446,7 +1403,6 @@ static int tdp_mmu_split_huge_pages_root(struct kvm *kvm,
{ {
struct kvm_mmu_page *sp = NULL; struct kvm_mmu_page *sp = NULL;
struct tdp_iter iter; struct tdp_iter iter;
int ret = 0;
rcu_read_lock(); rcu_read_lock();
...@@ -1470,17 +1426,31 @@ static int tdp_mmu_split_huge_pages_root(struct kvm *kvm, ...@@ -1470,17 +1426,31 @@ static int tdp_mmu_split_huge_pages_root(struct kvm *kvm,
continue; continue;
if (!sp) { if (!sp) {
sp = tdp_mmu_alloc_sp_for_split(kvm, &iter, shared); rcu_read_unlock();
if (shared)
read_unlock(&kvm->mmu_lock);
else
write_unlock(&kvm->mmu_lock);
sp = tdp_mmu_alloc_sp_for_split();
if (shared)
read_lock(&kvm->mmu_lock);
else
write_lock(&kvm->mmu_lock);
if (!sp) { if (!sp) {
ret = -ENOMEM;
trace_kvm_mmu_split_huge_page(iter.gfn, trace_kvm_mmu_split_huge_page(iter.gfn,
iter.old_spte, iter.old_spte,
iter.level, ret); iter.level, -ENOMEM);
break; return -ENOMEM;
} }
if (iter.yielded) rcu_read_lock();
continue;
iter.yielded = true;
continue;
} }
tdp_mmu_init_child_sp(sp, &iter); tdp_mmu_init_child_sp(sp, &iter);
...@@ -1501,7 +1471,7 @@ static int tdp_mmu_split_huge_pages_root(struct kvm *kvm, ...@@ -1501,7 +1471,7 @@ static int tdp_mmu_split_huge_pages_root(struct kvm *kvm,
if (sp) if (sp)
tdp_mmu_free_sp(sp); tdp_mmu_free_sp(sp);
return ret; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment