Commit 9f03db66 authored by Marc Zyngier's avatar Marc Zyngier

Merge branch kvm-arm64/mmu/mte into kvmarm-master/next

KVM/arm64 support for MTE, courtesy of Steven Price.
It allows the guest to use memory tagging, and offers
a new userspace API to save/restore the tags.

* kvm-arm64/mmu/mte:
  KVM: arm64: Document MTE capability and ioctl
  KVM: arm64: Add ioctl to fetch/store tags in a guest
  KVM: arm64: Expose KVM_ARM_CAP_MTE
  KVM: arm64: Save/restore MTE registers
  KVM: arm64: Introduce MTE VM feature
  arm64: mte: Sync tags for pages where PTE is untagged
Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
parents 2fea6cf7 04c02c20
......@@ -5034,6 +5034,43 @@ see KVM_XEN_VCPU_SET_ATTR above.
The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used
with the KVM_XEN_VCPU_GET_ATTR ioctl.
4.130 KVM_ARM_MTE_COPY_TAGS
---------------------------
:Capability: KVM_CAP_ARM_MTE
:Architectures: arm64
:Type: vm ioctl
:Parameters: struct kvm_arm_copy_mte_tags
:Returns: number of bytes copied, < 0 on error (-EINVAL for incorrect
arguments, -EFAULT if memory cannot be accessed).
::
struct kvm_arm_copy_mte_tags {
__u64 guest_ipa;
__u64 length;
void __user *addr;
__u64 flags;
__u64 reserved[2];
};
Copies Memory Tagging Extension (MTE) tags to/from guest tag memory. The
``guest_ipa`` and ``length`` fields must be ``PAGE_SIZE`` aligned. The ``addr``
field must point to a buffer which the tags will be copied to or from.
``flags`` specifies the direction of copy, either ``KVM_ARM_TAGS_TO_GUEST`` or
``KVM_ARM_TAGS_FROM_GUEST``.
The size of the buffer to store the tags is ``(length / 16)`` bytes
(granules in MTE are 16 bytes long). Each byte contains a single tag
value. This matches the format of ``PTRACE_PEEKMTETAGS`` and
``PTRACE_POKEMTETAGS``.
If an error occurs before any data is copied then a negative error code is
returned. If some tags have been copied before an error occurs then the number
of bytes successfully copied is returned. If the call completes successfully
then ``length`` is returned.
5. The kvm_run structure
========================
......@@ -6362,6 +6399,30 @@ default.
See Documentation/x86/sgx/2.Kernel-internals.rst for more details.
7.26 KVM_CAP_ARM_MTE
--------------------
:Architectures: arm64
:Parameters: none
This capability indicates that KVM (and the hardware) supports exposing the
Memory Tagging Extensions (MTE) to the guest. It must also be enabled by the
VMM before creating any VCPUs to allow the guest access. Note that MTE is only
available to a guest running in AArch64 mode and enabling this capability will
cause attempts to create AArch32 VCPUs to fail.
When enabled the guest is able to access tags associated with any memory given
to the guest. KVM will ensure that the tags are maintained during swap or
hibernation of the host; however the VMM needs to manually save/restore the
tags as appropriate if the VM is migrated.
When this capability is enabled all memory in memslots must be mapped as
not-shareable (no MAP_SHARED), attempts to create a memslot with a
MAP_SHARED mmap will result in an -EINVAL return.
When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to
perform a bulk copy of tags to/from the guest.
8. Other capabilities.
======================
......
......@@ -12,7 +12,8 @@
#include <asm/types.h>
/* Hyp Configuration Register (HCR) bits */
#define HCR_ATA (UL(1) << 56)
#define HCR_ATA_SHIFT 56
#define HCR_ATA (UL(1) << HCR_ATA_SHIFT)
#define HCR_FWB (UL(1) << 46)
#define HCR_API (UL(1) << 41)
#define HCR_APK (UL(1) << 40)
......
......@@ -84,6 +84,9 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
vcpu_el1_is_32bit(vcpu))
vcpu->arch.hcr_el2 |= HCR_TID2;
if (kvm_has_mte(vcpu->kvm))
vcpu->arch.hcr_el2 |= HCR_ATA;
}
static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
......
......@@ -133,6 +133,9 @@ struct kvm_arch {
u8 pfr0_csv2;
u8 pfr0_csv3;
/* Memory Tagging Extension enabled for the guest */
bool mte_enabled;
};
struct kvm_vcpu_fault_info {
......@@ -207,6 +210,12 @@ enum vcpu_sysreg {
CNTP_CVAL_EL0,
CNTP_CTL_EL0,
/* Memory Tagging Extension registers */
RGSR_EL1, /* Random Allocation Tag Seed Register */
GCR_EL1, /* Tag Control Register */
TFSR_EL1, /* Tag Fault Status Register (EL1) */
TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
/* 32bit specific registers. Keep them at the end of the range */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
......@@ -722,6 +731,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
struct kvm_arm_copy_mte_tags *copy_tags);
/* Guest/host FPSIMD coordination helpers */
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu);
void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu);
......@@ -770,6 +782,7 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
#define kvm_arm_vcpu_sve_finalized(vcpu) \
((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED)
#define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled)
#define kvm_vcpu_has_pmu(vcpu) \
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2020-2021 ARM Ltd.
*/
#ifndef __ASM_KVM_MTE_H
#define __ASM_KVM_MTE_H
#ifdef __ASSEMBLY__
#include <asm/sysreg.h>
#ifdef CONFIG_ARM64_MTE
.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
alternative_if_not ARM64_MTE
b .L__skip_switch\@
alternative_else_nop_endif
mrs \reg1, hcr_el2
tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
mrs_s \reg1, SYS_RGSR_EL1
str \reg1, [\h_ctxt, #CPU_RGSR_EL1]
mrs_s \reg1, SYS_GCR_EL1
str \reg1, [\h_ctxt, #CPU_GCR_EL1]
ldr \reg1, [\g_ctxt, #CPU_RGSR_EL1]
msr_s SYS_RGSR_EL1, \reg1
ldr \reg1, [\g_ctxt, #CPU_GCR_EL1]
msr_s SYS_GCR_EL1, \reg1
.L__skip_switch\@:
.endm
.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
alternative_if_not ARM64_MTE
b .L__skip_switch\@
alternative_else_nop_endif
mrs \reg1, hcr_el2
tbz \reg1, #(HCR_ATA_SHIFT), .L__skip_switch\@
mrs_s \reg1, SYS_RGSR_EL1
str \reg1, [\g_ctxt, #CPU_RGSR_EL1]
mrs_s \reg1, SYS_GCR_EL1
str \reg1, [\g_ctxt, #CPU_GCR_EL1]
ldr \reg1, [\h_ctxt, #CPU_RGSR_EL1]
msr_s SYS_RGSR_EL1, \reg1
ldr \reg1, [\h_ctxt, #CPU_GCR_EL1]
msr_s SYS_GCR_EL1, \reg1
isb
.L__skip_switch\@:
.endm
#else /* !CONFIG_ARM64_MTE */
.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
.endm
.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
.endm
#endif /* CONFIG_ARM64_MTE */
#endif /* __ASSEMBLY__ */
#endif /* __ASM_KVM_MTE_H */
......@@ -7,6 +7,7 @@
#define MTE_GRANULE_SIZE UL(16)
#define MTE_GRANULE_MASK (~(MTE_GRANULE_SIZE - 1))
#define MTE_GRANULES_PER_PAGE (PAGE_SIZE / MTE_GRANULE_SIZE)
#define MTE_TAG_SHIFT 56
#define MTE_TAG_SIZE 4
#define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT)
......
......@@ -37,7 +37,7 @@ void mte_free_tag_storage(char *storage);
/* track which pages have valid allocation tags */
#define PG_mte_tagged PG_arch_2
void mte_sync_tags(pte_t *ptep, pte_t pte);
void mte_sync_tags(pte_t old_pte, pte_t pte);
void mte_copy_page_tags(void *kto, const void *kfrom);
void mte_thread_init_user(void);
void mte_thread_switch(struct task_struct *next);
......@@ -53,7 +53,7 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
/* unused if !CONFIG_ARM64_MTE, silence the compiler */
#define PG_mte_tagged 0
static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
static inline void mte_sync_tags(pte_t old_pte, pte_t pte)
{
}
static inline void mte_copy_page_tags(void *kto, const void *kfrom)
......
......@@ -314,9 +314,25 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
__sync_icache_dcache(pte);
if (system_supports_mte() &&
pte_present(pte) && pte_tagged(pte) && !pte_special(pte))
mte_sync_tags(ptep, pte);
/*
* If the PTE would provide user space access to the tags associated
* with it then ensure that the MTE tags are synchronised. Although
* pte_access_permitted() returns false for exec only mappings, they
* don't expose tags (instruction fetches don't check tags).
*/
if (system_supports_mte() && pte_access_permitted(pte, false) &&
!pte_special(pte)) {
pte_t old_pte = READ_ONCE(*ptep);
/*
* We only need to synchronise if the new PTE has tags enabled
* or if swapping in (in which case another mapping may have
* set tags in the past even if this PTE isn't tagged).
* (!pte_none() && !pte_present()) is an open coded version of
* is_swap_pte()
*/
if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte)))
mte_sync_tags(old_pte, pte);
}
__check_racy_pte_update(mm, ptep, pte);
......
......@@ -651,7 +651,8 @@
#define INIT_SCTLR_EL2_MMU_ON \
(SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I | \
SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1)
SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | \
SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
#define INIT_SCTLR_EL2_MMU_OFF \
(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
......
......@@ -184,6 +184,17 @@ struct kvm_vcpu_events {
__u32 reserved[12];
};
struct kvm_arm_copy_mte_tags {
__u64 guest_ipa;
__u64 length;
void __user *addr;
__u64 flags;
__u64 reserved[2];
};
#define KVM_ARM_TAGS_TO_GUEST 0
#define KVM_ARM_TAGS_FROM_GUEST 1
/* If you need to interpret the index values, here is the key: */
#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
#define KVM_REG_ARM_COPROC_SHIFT 16
......
......@@ -111,6 +111,8 @@ int main(void)
DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags));
DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2));
DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs));
DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
DEFINE(CPU_GCR_EL1, offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1]));
DEFINE(CPU_APIAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
DEFINE(CPU_APIBKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
DEFINE(CPU_APDAKEYLO_EL1, offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
......
......@@ -32,10 +32,9 @@ DEFINE_STATIC_KEY_FALSE(mte_async_mode);
EXPORT_SYMBOL_GPL(mte_async_mode);
#endif
static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
static void mte_sync_page_tags(struct page *page, pte_t old_pte,
bool check_swap, bool pte_is_tagged)
{
pte_t old_pte = READ_ONCE(*ptep);
if (check_swap && is_swap_pte(old_pte)) {
swp_entry_t entry = pte_to_swp_entry(old_pte);
......@@ -43,6 +42,9 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
return;
}
if (!pte_is_tagged)
return;
page_kasan_tag_reset(page);
/*
* We need smp_wmb() in between setting the flags and clearing the
......@@ -55,16 +57,22 @@ static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
mte_clear_page_tags(page_address(page));
}
void mte_sync_tags(pte_t *ptep, pte_t pte)
void mte_sync_tags(pte_t old_pte, pte_t pte)
{
struct page *page = pte_page(pte);
long i, nr_pages = compound_nr(page);
bool check_swap = nr_pages == 1;
bool pte_is_tagged = pte_tagged(pte);
/* Early out if there's nothing to do */
if (!check_swap && !pte_is_tagged)
return;
/* if PG_mte_tagged is set, tags have already been initialised */
for (i = 0; i < nr_pages; i++, page++) {
if (!test_and_set_bit(PG_mte_tagged, &page->flags))
mte_sync_page_tags(page, ptep, check_swap);
mte_sync_page_tags(page, old_pte, check_swap,
pte_is_tagged);
}
}
......
......@@ -93,6 +93,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
r = 0;
kvm->arch.return_nisv_io_abort_to_user = true;
break;
case KVM_CAP_ARM_MTE:
if (!system_supports_mte() || kvm->created_vcpus)
return -EINVAL;
r = 0;
kvm->arch.mte_enabled = true;
break;
default:
r = -EINVAL;
break;
......@@ -237,6 +243,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
*/
r = 1;
break;
case KVM_CAP_ARM_MTE:
r = system_supports_mte();
break;
case KVM_CAP_STEAL_TIME:
r = kvm_arm_pvtime_supported();
break;
......@@ -1354,6 +1363,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
return 0;
}
case KVM_ARM_MTE_COPY_TAGS: {
struct kvm_arm_copy_mte_tags copy_tags;
if (copy_from_user(&copy_tags, argp, sizeof(copy_tags)))
return -EFAULT;
return kvm_vm_ioctl_mte_copy_tags(kvm, &copy_tags);
}
default:
return -EINVAL;
}
......
......@@ -995,3 +995,85 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
return ret;
}
long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
struct kvm_arm_copy_mte_tags *copy_tags)
{
gpa_t guest_ipa = copy_tags->guest_ipa;
size_t length = copy_tags->length;
void __user *tags = copy_tags->addr;
gpa_t gfn;
bool write = !(copy_tags->flags & KVM_ARM_TAGS_FROM_GUEST);
int ret = 0;
if (!kvm_has_mte(kvm))
return -EINVAL;
if (copy_tags->reserved[0] || copy_tags->reserved[1])
return -EINVAL;
if (copy_tags->flags & ~KVM_ARM_TAGS_FROM_GUEST)
return -EINVAL;
if (length & ~PAGE_MASK || guest_ipa & ~PAGE_MASK)
return -EINVAL;
gfn = gpa_to_gfn(guest_ipa);
mutex_lock(&kvm->slots_lock);
while (length > 0) {
kvm_pfn_t pfn = gfn_to_pfn_prot(kvm, gfn, write, NULL);
void *maddr;
unsigned long num_tags;
struct page *page;
if (is_error_noslot_pfn(pfn)) {
ret = -EFAULT;
goto out;
}
page = pfn_to_online_page(pfn);
if (!page) {
/* Reject ZONE_DEVICE memory */
ret = -EFAULT;
goto out;
}
maddr = page_address(page);
if (!write) {
if (test_bit(PG_mte_tagged, &page->flags))
num_tags = mte_copy_tags_to_user(tags, maddr,
MTE_GRANULES_PER_PAGE);
else
/* No tags in memory, so write zeros */
num_tags = MTE_GRANULES_PER_PAGE -
clear_user(tags, MTE_GRANULES_PER_PAGE);
kvm_release_pfn_clean(pfn);
} else {
num_tags = mte_copy_tags_from_user(maddr, tags,
MTE_GRANULES_PER_PAGE);
kvm_release_pfn_dirty(pfn);
}
if (num_tags != MTE_GRANULES_PER_PAGE) {
ret = -EFAULT;
goto out;
}
/* Set the flag after checking the write completed fully */
if (write)
set_bit(PG_mte_tagged, &page->flags);
gfn++;
tags += num_tags;
length -= PAGE_SIZE;
}
out:
mutex_unlock(&kvm->slots_lock);
/* If some data has been copied report the number of bytes copied */
if (length != copy_tags->length)
return copy_tags->length - length;
return ret;
}
......@@ -13,6 +13,7 @@
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_mte.h>
#include <asm/kvm_ptrauth.h>
.text
......@@ -51,6 +52,9 @@ alternative_else_nop_endif
add x29, x0, #VCPU_CONTEXT
// mte_switch_to_guest(g_ctxt, h_ctxt, tmp1)
mte_switch_to_guest x29, x1, x2
// Macro ptrauth_switch_to_guest format:
// ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
// The below macro to restore guest keys is not implemented in C code
......@@ -142,6 +146,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
// when this feature is enabled for kernel code.
ptrauth_switch_to_hyp x1, x2, x3, x4, x5
// mte_switch_to_hyp(g_ctxt, h_ctxt, reg1)
mte_switch_to_hyp x1, x2, x3
// Restore hyp's sp_el0
restore_sp_el0 x2, x3
......
......@@ -112,7 +112,8 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode,
new |= (old & PSR_C_BIT);
new |= (old & PSR_V_BIT);
// TODO: TCO (if/when ARMv8.5-MemTag is exposed to guests)
if (kvm_has_mte(vcpu->kvm))
new |= PSR_TCO_BIT;
new |= (old & PSR_DIT_BIT);
......
......@@ -14,6 +14,7 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
{
......@@ -26,6 +27,16 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0);
}
static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
{
struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
if (!vcpu)
vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
return kvm_has_mte(kern_hyp_va(vcpu->kvm));
}
static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1);
......@@ -46,6 +57,11 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par();
ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1);
if (ctxt_has_mte(ctxt)) {
ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR);
ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1);
}
ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1);
ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR);
ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR);
......@@ -107,6 +123,11 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1);
write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1);
if (ctxt_has_mte(ctxt)) {
write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR);
write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1);
}
if (!has_vhe() &&
cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
ctxt->__hyp_running_vcpu) {
......
......@@ -853,6 +853,45 @@ static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
return PAGE_SHIFT;
}
/*
* The page will be mapped in stage 2 as Normal Cacheable, so the VM will be
* able to see the page's tags and therefore they must be initialised first. If
* PG_mte_tagged is set, tags have already been initialised.
*
* The race in the test/set of the PG_mte_tagged flag is handled by:
* - preventing VM_SHARED mappings in a memslot with MTE preventing two VMs
* racing to santise the same page
* - mmap_lock protects between a VM faulting a page in and the VMM performing
* an mprotect() to add VM_MTE
*/
static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
unsigned long size)
{
unsigned long i, nr_pages = size >> PAGE_SHIFT;
struct page *page;
if (!kvm_has_mte(kvm))
return 0;
/*
* pfn_to_online_page() is used to reject ZONE_DEVICE pages
* that may not support tags.
*/
page = pfn_to_online_page(pfn);
if (!page)
return -EFAULT;
for (i = 0; i < nr_pages; i++, page++) {
if (!test_bit(PG_mte_tagged, &page->flags)) {
mte_clear_page_tags(page_address(page));
set_bit(PG_mte_tagged, &page->flags);
}
}
return 0;
}
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_memory_slot *memslot, unsigned long hva,
unsigned long fault_status)
......@@ -861,6 +900,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
bool write_fault, writable, force_pte = false;
bool exec_fault;
bool device = false;
bool shared;
unsigned long mmu_seq;
struct kvm *kvm = vcpu->kvm;
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
......@@ -907,6 +947,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
vma_shift = get_vma_page_shift(vma, hva);
}
shared = (vma->vm_flags & VM_PFNMAP);
switch (vma_shift) {
#ifndef __PAGETABLE_PMD_FOLDED
case PUD_SHIFT:
......@@ -1011,6 +1053,17 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (vma_pagesize == PAGE_SIZE && !(force_pte || device))
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
&pfn, &fault_ipa);
if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
/* Check the VMM hasn't introduced a new VM_SHARED VMA */
if (!shared)
ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
else
ret = -EFAULT;
if (ret)
goto out_unlock;
}
if (writable)
prot |= KVM_PGTABLE_PROT_W;
......@@ -1206,12 +1259,17 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
kvm_pfn_t pfn = pte_pfn(range->pte);
int ret;
if (!kvm->arch.mmu.pgt)
return false;
WARN_ON(range->end - range->start != 1);
ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE);
if (ret)
return false;
/*
* We've moved a page around, probably through CoW, so let's treat
* it just like a translation fault and the map handler will clean
......@@ -1414,6 +1472,14 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (!vma)
break;
/*
* VM_SHARED mappings are not allowed with MTE to avoid races
* when updating the PG_mte_tagged page flag, see
* sanitise_mte_tags for more details.
*/
if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED)
return -EINVAL;
if (vma->vm_flags & VM_PFNMAP) {
/* IO region dirty page logging not allowed */
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
......
......@@ -176,6 +176,10 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu)
if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1) && is32bit)
return false;
/* MTE is incompatible with AArch32 */
if (kvm_has_mte(vcpu->kvm) && is32bit)
return false;
/* Check that the vcpus are either all 32bit or all 64bit */
kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
if (vcpu_has_feature(tmp, KVM_ARM_VCPU_EL1_32BIT) != is32bit)
......
......@@ -1047,6 +1047,13 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu,
break;
case SYS_ID_AA64PFR1_EL1:
val &= ~FEATURE(ID_AA64PFR1_MTE);
if (kvm_has_mte(vcpu->kvm)) {
u64 pfr, mte;
pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT);
val |= FIELD_PREP(FEATURE(ID_AA64PFR1_MTE), mte);
}
break;
case SYS_ID_AA64ISAR1_EL1:
if (!vcpu_has_ptrauth(vcpu))
......@@ -1302,6 +1309,23 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
return true;
}
static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
{
if (kvm_has_mte(vcpu->kvm))
return 0;
return REG_HIDDEN;
}
#define MTE_REG(name) { \
SYS_DESC(SYS_##name), \
.access = undef_access, \
.reset = reset_unknown, \
.reg = name, \
.visibility = mte_visibility, \
}
/* sys_reg_desc initialiser for known cpufeature ID registers */
#define ID_SANITISED(name) { \
SYS_DESC(SYS_##name), \
......@@ -1470,8 +1494,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
{ SYS_DESC(SYS_RGSR_EL1), undef_access },
{ SYS_DESC(SYS_GCR_EL1), undef_access },
MTE_REG(RGSR_EL1),
MTE_REG(GCR_EL1),
{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
{ SYS_DESC(SYS_TRFCR_EL1), undef_access },
......@@ -1498,8 +1522,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
{ SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
{ SYS_DESC(SYS_TFSR_EL1), undef_access },
{ SYS_DESC(SYS_TFSRE0_EL1), undef_access },
MTE_REG(TFSR_EL1),
MTE_REG(TFSRE0_EL1),
{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
......
......@@ -1083,6 +1083,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_SGX_ATTRIBUTE 196
#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197
#define KVM_CAP_PTP_KVM 198
#define KVM_CAP_ARM_MTE 199
#ifdef KVM_CAP_IRQ_ROUTING
......@@ -1428,6 +1429,7 @@ struct kvm_s390_ucas_mapping {
/* Available with KVM_CAP_PMU_EVENT_FILTER */
#define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter)
#define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3)
#define KVM_ARM_MTE_COPY_TAGS _IOR(KVMIO, 0xb4, struct kvm_arm_copy_mte_tags)
/* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment