Commit be94f6b7 authored by Huaitong Han's avatar Huaitong Han Committed by Paolo Bonzini

KVM, pkeys: add pkeys support for permission_fault

Protection keys define a new 4-bit protection key field (PKEY) in bits
62:59 of leaf entries of the page tables, the PKEY is an index to PKRU
register(16 domains), every domain has 2 bits(write disable bit, access
disable bit).

Static logic has been produced in update_pkru_bitmask, dynamic logic need
read pkey from page table entries, get pkru value, and deduce the correct
result.

[ Huaitong: Xiao helps to modify many sections. ]
Signed-off-by: default avatarHuaitong Han <huaitong.han@intel.com>
Signed-off-by: default avatarXiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 2d344105
...@@ -187,12 +187,14 @@ enum { ...@@ -187,12 +187,14 @@ enum {
#define PFERR_USER_BIT 2 #define PFERR_USER_BIT 2
#define PFERR_RSVD_BIT 3 #define PFERR_RSVD_BIT 3
#define PFERR_FETCH_BIT 4 #define PFERR_FETCH_BIT 4
#define PFERR_PK_BIT 5
#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT) #define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT) #define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
#define PFERR_USER_MASK (1U << PFERR_USER_BIT) #define PFERR_USER_MASK (1U << PFERR_USER_BIT)
#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) #define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) #define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
#define PFERR_PK_MASK (1U << PFERR_PK_BIT)
/* apic attention bits */ /* apic attention bits */
#define KVM_APIC_CHECK_VAPIC 0 #define KVM_APIC_CHECK_VAPIC 0
...@@ -882,6 +884,7 @@ struct kvm_x86_ops { ...@@ -882,6 +884,7 @@ struct kvm_x86_ops {
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
u32 (*get_pkru)(struct kvm_vcpu *vcpu);
void (*fpu_activate)(struct kvm_vcpu *vcpu); void (*fpu_activate)(struct kvm_vcpu *vcpu);
void (*fpu_deactivate)(struct kvm_vcpu *vcpu); void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
......
...@@ -84,6 +84,11 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) ...@@ -84,6 +84,11 @@ static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu)
| ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32); | ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) & -1u) << 32);
} }
static inline u32 kvm_read_pkru(struct kvm_vcpu *vcpu)
{
return kvm_x86_ops->get_pkru(vcpu);
}
static inline void enter_guest_mode(struct kvm_vcpu *vcpu) static inline void enter_guest_mode(struct kvm_vcpu *vcpu)
{ {
vcpu->arch.hflags |= HF_GUEST_MASK; vcpu->arch.hflags |= HF_GUEST_MASK;
......
...@@ -10,10 +10,11 @@ ...@@ -10,10 +10,11 @@
#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS) #define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
#define PT_WRITABLE_SHIFT 1 #define PT_WRITABLE_SHIFT 1
#define PT_USER_SHIFT 2
#define PT_PRESENT_MASK (1ULL << 0) #define PT_PRESENT_MASK (1ULL << 0)
#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT) #define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
#define PT_USER_MASK (1ULL << 2) #define PT_USER_MASK (1ULL << PT_USER_SHIFT)
#define PT_PWT_MASK (1ULL << 3) #define PT_PWT_MASK (1ULL << 3)
#define PT_PCD_MASK (1ULL << 4) #define PT_PCD_MASK (1ULL << 4)
#define PT_ACCESSED_SHIFT 5 #define PT_ACCESSED_SHIFT 5
...@@ -149,7 +150,8 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu) ...@@ -149,7 +150,8 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
* if the access faults. * if the access faults.
*/ */
static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
unsigned pte_access, unsigned pfec) unsigned pte_access, unsigned pte_pkey,
unsigned pfec)
{ {
int cpl = kvm_x86_ops->get_cpl(vcpu); int cpl = kvm_x86_ops->get_cpl(vcpu);
unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
...@@ -170,11 +172,32 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, ...@@ -170,11 +172,32 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC); unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
int index = (pfec >> 1) + int index = (pfec >> 1) +
(smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1)); (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
bool fault = (mmu->permissions[index] >> pte_access) & 1;
WARN_ON(pfec & PFERR_RSVD_MASK); WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
pfec |= PFERR_PRESENT_MASK; pfec |= PFERR_PRESENT_MASK;
return -((mmu->permissions[index] >> pte_access) & 1) & pfec;
if (unlikely(mmu->pkru_mask)) {
u32 pkru_bits, offset;
/*
* PKRU defines 32 bits, there are 16 domains and 2
* attribute bits per domain in pkru. pte_pkey is the
* index of the protection domain, so pte_pkey * 2 is
* is the index of the first bit for the domain.
*/
pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
/* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
offset = pfec - 1 +
((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));
pkru_bits &= mmu->pkru_mask >> offset;
pfec |= -pkru_bits & PFERR_PK_MASK;
fault |= (pkru_bits != 0);
}
return -(uint32_t)fault & pfec;
} }
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm); void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
......
...@@ -257,6 +257,17 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, ...@@ -257,6 +257,17 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
return 0; return 0;
} }
static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
{
unsigned pkeys = 0;
#if PTTYPE == 64
pte_t pte = {.pte = gpte};
pkeys = pte_flags_pkey(pte_flags(pte));
#endif
return pkeys;
}
/* /*
* Fetch a guest pte for a guest virtual address * Fetch a guest pte for a guest virtual address
*/ */
...@@ -268,7 +279,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -268,7 +279,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
pt_element_t pte; pt_element_t pte;
pt_element_t __user *uninitialized_var(ptep_user); pt_element_t __user *uninitialized_var(ptep_user);
gfn_t table_gfn; gfn_t table_gfn;
unsigned index, pt_access, pte_access, accessed_dirty; unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey;
gpa_t pte_gpa; gpa_t pte_gpa;
int offset; int offset;
const int write_fault = access & PFERR_WRITE_MASK; const int write_fault = access & PFERR_WRITE_MASK;
...@@ -359,7 +370,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, ...@@ -359,7 +370,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
walker->ptes[walker->level - 1] = pte; walker->ptes[walker->level - 1] = pte;
} while (!is_last_gpte(mmu, walker->level, pte)); } while (!is_last_gpte(mmu, walker->level, pte));
errcode = permission_fault(vcpu, mmu, pte_access, access); pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
errcode = permission_fault(vcpu, mmu, pte_access, pte_pkey, access);
if (unlikely(errcode)) if (unlikely(errcode))
goto error; goto error;
......
...@@ -1280,6 +1280,11 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) ...@@ -1280,6 +1280,11 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
to_svm(vcpu)->vmcb->save.rflags = rflags; to_svm(vcpu)->vmcb->save.rflags = rflags;
} }
static u32 svm_get_pkru(struct kvm_vcpu *vcpu)
{
return 0;
}
static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
{ {
switch (reg) { switch (reg) {
...@@ -4347,6 +4352,9 @@ static struct kvm_x86_ops svm_x86_ops = { ...@@ -4347,6 +4352,9 @@ static struct kvm_x86_ops svm_x86_ops = {
.cache_reg = svm_cache_reg, .cache_reg = svm_cache_reg,
.get_rflags = svm_get_rflags, .get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags, .set_rflags = svm_set_rflags,
.get_pkru = svm_get_pkru,
.fpu_activate = svm_fpu_activate, .fpu_activate = svm_fpu_activate,
.fpu_deactivate = svm_fpu_deactivate, .fpu_deactivate = svm_fpu_deactivate,
......
...@@ -2292,6 +2292,11 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) ...@@ -2292,6 +2292,11 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
vmcs_writel(GUEST_RFLAGS, rflags); vmcs_writel(GUEST_RFLAGS, rflags);
} }
static u32 vmx_get_pkru(struct kvm_vcpu *vcpu)
{
return to_vmx(vcpu)->guest_pkru;
}
static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu) static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
{ {
u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
...@@ -10925,6 +10930,9 @@ static struct kvm_x86_ops vmx_x86_ops = { ...@@ -10925,6 +10930,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
.cache_reg = vmx_cache_reg, .cache_reg = vmx_cache_reg,
.get_rflags = vmx_get_rflags, .get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags, .set_rflags = vmx_set_rflags,
.get_pkru = vmx_get_pkru,
.fpu_activate = vmx_fpu_activate, .fpu_activate = vmx_fpu_activate,
.fpu_deactivate = vmx_fpu_deactivate, .fpu_deactivate = vmx_fpu_deactivate,
......
...@@ -4326,9 +4326,14 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, ...@@ -4326,9 +4326,14 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0) u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
| (write ? PFERR_WRITE_MASK : 0); | (write ? PFERR_WRITE_MASK : 0);
/*
* currently PKRU is only applied to ept enabled guest so
* there is no pkey in EPT page table for L1 guest or EPT
* shadow page table for L2 guest.
*/
if (vcpu_match_mmio_gva(vcpu, gva) if (vcpu_match_mmio_gva(vcpu, gva)
&& !permission_fault(vcpu, vcpu->arch.walk_mmu, && !permission_fault(vcpu, vcpu->arch.walk_mmu,
vcpu->arch.access, access)) { vcpu->arch.access, 0, access)) {
*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT | *gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
(gva & (PAGE_SIZE - 1)); (gva & (PAGE_SIZE - 1));
trace_vcpu_match_mmio(gva, *gpa, write, false); trace_vcpu_match_mmio(gva, *gpa, write, false);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment