Commit f0d8690a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
 "This includes a fix for two oopses, one on PPC and on x86.

  The rest is fixes for bugs with newer Intel processors"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  kvm/fpu: Enable eager restore kvm FPU for MPX
  Revert "KVM: x86: drop fpu_activate hook"
  kvm: fix crash in kvm_vcpu_reload_apic_access_page
  KVM: MMU: fix SMAP virtualization
  KVM: MMU: fix CR4.SMEP=1, CR0.WP=0 with shadow pages
  KVM: MMU: fix smap permission check
  KVM: PPC: Book3S HV: Fix list traversal in error case
parents 2f8126e3 c447e76b
...@@ -169,6 +169,10 @@ Shadow pages contain the following information: ...@@ -169,6 +169,10 @@ Shadow pages contain the following information:
Contains the value of cr4.smep && !cr0.wp for which the page is valid Contains the value of cr4.smep && !cr0.wp for which the page is valid
(pages for which this is true are different from other pages; see the (pages for which this is true are different from other pages; see the
treatment of cr0.wp=0 below). treatment of cr0.wp=0 below).
role.smap_andnot_wp:
Contains the value of cr4.smap && !cr0.wp for which the page is valid
(pages for which this is true are different from other pages; see the
treatment of cr0.wp=0 below).
gfn: gfn:
Either the guest page table containing the translations shadowed by this Either the guest page table containing the translations shadowed by this
page, or the base page frame for linear translations. See role.direct. page, or the base page frame for linear translations. See role.direct.
...@@ -344,10 +348,16 @@ on fault type: ...@@ -344,10 +348,16 @@ on fault type:
(user write faults generate a #PF) (user write faults generate a #PF)
In the first case there is an additional complication if CR4.SMEP is In the first case there are two additional complications:
enabled: since we've turned the page into a kernel page, the kernel may now - if CR4.SMEP is enabled: since we've turned the page into a kernel page,
execute it. We handle this by also setting spte.nx. If we get a user the kernel may now execute it. We handle this by also setting spte.nx.
fetch or read fault, we'll change spte.u=1 and spte.nx=gpte.nx back. If we get a user fetch or read fault, we'll change spte.u=1 and
spte.nx=gpte.nx back.
- if CR4.SMAP is disabled: since the page has been changed to a kernel
page, it can not be reused when CR4.SMAP is enabled. We set
CR4.SMAP && !CR0.WP into shadow page's role to avoid this case. Note,
here we do not care the case that CR4.SMAP is enabled since KVM will
directly inject #PF to guest due to failed permission check.
To prevent an spte that was converted into a kernel page with cr0.wp=0 To prevent an spte that was converted into a kernel page with cr0.wp=0
from being written by the kernel after cr0.wp has changed to 1, we make from being written by the kernel after cr0.wp has changed to 1, we make
......
...@@ -1952,7 +1952,7 @@ static void post_guest_process(struct kvmppc_vcore *vc) ...@@ -1952,7 +1952,7 @@ static void post_guest_process(struct kvmppc_vcore *vc)
*/ */
static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
{ {
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu, *vnext;
int i; int i;
int srcu_idx; int srcu_idx;
...@@ -1982,7 +1982,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) ...@@ -1982,7 +1982,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*/ */
if ((threads_per_core > 1) && if ((threads_per_core > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) { ((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) { list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
arch.run_list) {
vcpu->arch.ret = -EBUSY; vcpu->arch.ret = -EBUSY;
kvmppc_remove_runnable(vc, vcpu); kvmppc_remove_runnable(vc, vcpu);
wake_up(&vcpu->arch.cpu_run); wake_up(&vcpu->arch.cpu_run);
......
...@@ -207,6 +207,7 @@ union kvm_mmu_page_role { ...@@ -207,6 +207,7 @@ union kvm_mmu_page_role {
unsigned nxe:1; unsigned nxe:1;
unsigned cr0_wp:1; unsigned cr0_wp:1;
unsigned smep_andnot_wp:1; unsigned smep_andnot_wp:1;
unsigned smap_andnot_wp:1;
}; };
}; };
...@@ -400,6 +401,7 @@ struct kvm_vcpu_arch { ...@@ -400,6 +401,7 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_page_header_cache; struct kvm_mmu_memory_cache mmu_page_header_cache;
struct fpu guest_fpu; struct fpu guest_fpu;
bool eager_fpu;
u64 xcr0; u64 xcr0;
u64 guest_supported_xcr0; u64 guest_supported_xcr0;
u32 guest_xstate_size; u32 guest_xstate_size;
...@@ -743,6 +745,7 @@ struct kvm_x86_ops { ...@@ -743,6 +745,7 @@ struct kvm_x86_ops {
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
void (*fpu_activate)(struct kvm_vcpu *vcpu);
void (*fpu_deactivate)(struct kvm_vcpu *vcpu); void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
void (*tlb_flush)(struct kvm_vcpu *vcpu); void (*tlb_flush)(struct kvm_vcpu *vcpu);
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <asm/i387.h> /* For use_eager_fpu. Ugh! */
#include <asm/fpu-internal.h> /* For use_eager_fpu. Ugh! */
#include <asm/user.h> #include <asm/user.h>
#include <asm/xsave.h> #include <asm/xsave.h>
#include "cpuid.h" #include "cpuid.h"
...@@ -95,6 +97,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) ...@@ -95,6 +97,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true); best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
vcpu->arch.eager_fpu = guest_cpuid_has_mpx(vcpu);
/* /*
* The existing code assumes virtual address is 48-bit in the canonical * The existing code assumes virtual address is 48-bit in the canonical
* address checks; exit if it is ever changed. * address checks; exit if it is ever changed.
......
...@@ -117,4 +117,12 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) ...@@ -117,4 +117,12 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
best = kvm_find_cpuid_entry(vcpu, 7, 0); best = kvm_find_cpuid_entry(vcpu, 7, 0);
return best && (best->ebx & bit(X86_FEATURE_RTM)); return best && (best->ebx & bit(X86_FEATURE_RTM));
} }
static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
best = kvm_find_cpuid_entry(vcpu, 7, 0);
return best && (best->ebx & bit(X86_FEATURE_MPX));
}
#endif #endif
...@@ -3736,8 +3736,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, ...@@ -3736,8 +3736,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
} }
} }
void update_permission_bitmask(struct kvm_vcpu *vcpu, static void update_permission_bitmask(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu, bool ept) struct kvm_mmu *mmu, bool ept)
{ {
unsigned bit, byte, pfec; unsigned bit, byte, pfec;
u8 map; u8 map;
...@@ -3918,6 +3918,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) ...@@ -3918,6 +3918,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
{ {
bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
struct kvm_mmu *context = &vcpu->arch.mmu; struct kvm_mmu *context = &vcpu->arch.mmu;
MMU_WARN_ON(VALID_PAGE(context->root_hpa)); MMU_WARN_ON(VALID_PAGE(context->root_hpa));
...@@ -3936,6 +3937,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) ...@@ -3936,6 +3937,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
context->base_role.cr0_wp = is_write_protection(vcpu); context->base_role.cr0_wp = is_write_protection(vcpu);
context->base_role.smep_andnot_wp context->base_role.smep_andnot_wp
= smep && !is_write_protection(vcpu); = smep && !is_write_protection(vcpu);
context->base_role.smap_andnot_wp
= smap && !is_write_protection(vcpu);
} }
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
...@@ -4207,12 +4210,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -4207,12 +4210,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *new, int bytes) const u8 *new, int bytes)
{ {
gfn_t gfn = gpa >> PAGE_SHIFT; gfn_t gfn = gpa >> PAGE_SHIFT;
union kvm_mmu_page_role mask = { .word = 0 };
struct kvm_mmu_page *sp; struct kvm_mmu_page *sp;
LIST_HEAD(invalid_list); LIST_HEAD(invalid_list);
u64 entry, gentry, *spte; u64 entry, gentry, *spte;
int npte; int npte;
bool remote_flush, local_flush, zap_page; bool remote_flush, local_flush, zap_page;
union kvm_mmu_page_role mask = (union kvm_mmu_page_role) {
.cr0_wp = 1,
.cr4_pae = 1,
.nxe = 1,
.smep_andnot_wp = 1,
.smap_andnot_wp = 1,
};
/* /*
* If we don't have indirect shadow pages, it means no page is * If we don't have indirect shadow pages, it means no page is
...@@ -4238,7 +4247,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ...@@ -4238,7 +4247,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
++vcpu->kvm->stat.mmu_pte_write; ++vcpu->kvm->stat.mmu_pte_write;
kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE);
mask.cr0_wp = mask.cr4_pae = mask.nxe = 1;
for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) {
if (detect_write_misaligned(sp, gpa, bytes) || if (detect_write_misaligned(sp, gpa, bytes) ||
detect_write_flooding(sp)) { detect_write_flooding(sp)) {
......
...@@ -71,8 +71,6 @@ enum { ...@@ -71,8 +71,6 @@ enum {
int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
bool ept);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{ {
...@@ -166,6 +164,8 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, ...@@ -166,6 +164,8 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
int index = (pfec >> 1) + int index = (pfec >> 1) +
(smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1)); (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
WARN_ON(pfec & PFERR_RSVD_MASK);
return (mmu->permissions[index] >> pte_access) & 1; return (mmu->permissions[index] >> pte_access) & 1;
} }
......
...@@ -718,6 +718,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, ...@@ -718,6 +718,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
mmu_is_nested(vcpu)); mmu_is_nested(vcpu));
if (likely(r != RET_MMIO_PF_INVALID)) if (likely(r != RET_MMIO_PF_INVALID))
return r; return r;
/*
* page fault with PFEC.RSVD = 1 is caused by shadow
* page fault, should not be used to walk guest page
* table.
*/
error_code &= ~PFERR_RSVD_MASK;
}; };
r = mmu_topup_memory_caches(vcpu); r = mmu_topup_memory_caches(vcpu);
......
...@@ -4381,6 +4381,7 @@ static struct kvm_x86_ops svm_x86_ops = { ...@@ -4381,6 +4381,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.cache_reg = svm_cache_reg, .cache_reg = svm_cache_reg,
.get_rflags = svm_get_rflags, .get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags, .set_rflags = svm_set_rflags,
.fpu_activate = svm_fpu_activate,
.fpu_deactivate = svm_fpu_deactivate, .fpu_deactivate = svm_fpu_deactivate,
.tlb_flush = svm_flush_tlb, .tlb_flush = svm_flush_tlb,
......
...@@ -10185,6 +10185,7 @@ static struct kvm_x86_ops vmx_x86_ops = { ...@@ -10185,6 +10185,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.cache_reg = vmx_cache_reg, .cache_reg = vmx_cache_reg,
.get_rflags = vmx_get_rflags, .get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags, .set_rflags = vmx_set_rflags,
.fpu_activate = vmx_fpu_activate,
.fpu_deactivate = vmx_fpu_deactivate, .fpu_deactivate = vmx_fpu_deactivate,
.tlb_flush = vmx_flush_tlb, .tlb_flush = vmx_flush_tlb,
......
...@@ -702,8 +702,9 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr); ...@@ -702,8 +702,9 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr);
int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{ {
unsigned long old_cr4 = kvm_read_cr4(vcpu); unsigned long old_cr4 = kvm_read_cr4(vcpu);
unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE |
X86_CR4_PAE | X86_CR4_SMEP; X86_CR4_SMEP | X86_CR4_SMAP;
if (cr4 & CR4_RESERVED_BITS) if (cr4 & CR4_RESERVED_BITS)
return 1; return 1;
...@@ -744,9 +745,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) ...@@ -744,9 +745,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
kvm_mmu_reset_context(vcpu); kvm_mmu_reset_context(vcpu);
if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
kvm_update_cpuid(vcpu); kvm_update_cpuid(vcpu);
...@@ -6197,6 +6195,8 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) ...@@ -6197,6 +6195,8 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
return; return;
page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
if (is_error_page(page))
return;
kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page)); kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page));
/* /*
...@@ -7060,7 +7060,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) ...@@ -7060,7 +7060,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
fpu_save_init(&vcpu->arch.guest_fpu); fpu_save_init(&vcpu->arch.guest_fpu);
__kernel_fpu_end(); __kernel_fpu_end();
++vcpu->stat.fpu_reload; ++vcpu->stat.fpu_reload;
kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); if (!vcpu->arch.eager_fpu)
kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
trace_kvm_fpu(0); trace_kvm_fpu(0);
} }
...@@ -7076,11 +7078,21 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) ...@@ -7076,11 +7078,21 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
unsigned int id) unsigned int id)
{ {
struct kvm_vcpu *vcpu;
if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0)
printk_once(KERN_WARNING printk_once(KERN_WARNING
"kvm: SMP vm created on host with unstable TSC; " "kvm: SMP vm created on host with unstable TSC; "
"guest TSC will not be reliable\n"); "guest TSC will not be reliable\n");
return kvm_x86_ops->vcpu_create(kvm, id);
vcpu = kvm_x86_ops->vcpu_create(kvm, id);
/*
* Activate fpu unconditionally in case the guest needs eager FPU. It will be
* deactivated soon if it doesn't.
*/
kvm_x86_ops->fpu_activate(vcpu);
return vcpu;
} }
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment