Commit 49d57592 authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm updates from Paolo Bonzini:
 "ARM:

   - Provide a virtual cache topology to the guest to avoid
     inconsistencies with migration on heterogenous systems. Non secure
     software has no practical need to traverse the caches by set/way in
     the first place

   - Add support for taking stage-2 access faults in parallel. This was
     an accidental omission in the original parallel faults
     implementation, but should provide a marginal improvement to
     machines w/o FEAT_HAFDBS (such as hardware from the fruit company)

   - A preamble to adding support for nested virtualization to KVM,
     including vEL2 register state, rudimentary nested exception
     handling and masking unsupported features for nested guests

   - Fixes to the PSCI relay that avoid an unexpected host SVE trap when
     resuming a CPU when running pKVM

   - VGIC maintenance interrupt support for the AIC

   - Improvements to the arch timer emulation, primarily aimed at
     reducing the trap overhead of running nested

   - Add CONFIG_USERFAULTFD to the KVM selftests config fragment in the
     interest of CI systems

   - Avoid VM-wide stop-the-world operations when a vCPU accesses its
     own redistributor

   - Serialize when toggling CPACR_EL1.SMEN to avoid unexpected
     exceptions in the host

   - Aesthetic and comment/kerneldoc fixes

   - Drop the vestiges of the old Columbia mailing list and add [Oliver]
     as co-maintainer

  RISC-V:

   - Fix wrong usage of PGDIR_SIZE instead of PUD_SIZE

   - Correctly place the guest in S-mode after redirecting a trap to the
     guest

   - Redirect illegal instruction traps to guest

   - SBI PMU support for guest

  s390:

   - Sort out confusion between virtual and physical addresses, which
     currently are the same on s390

   - A new ioctl that performs cmpxchg on guest memory

   - A few fixes

  x86:

   - Change tdp_mmu to a read-only parameter

   - Separate TDP and shadow MMU page fault paths

   - Enable Hyper-V invariant TSC control

   - Fix a variety of APICv and AVIC bugs, some of them real-world, some
     of them affecting architecurally legal but unlikely to happen in
     practice

   - Mark APIC timer as expired if its in one-shot mode and the count
     underflows while the vCPU task was being migrated

   - Advertise support for Intel's new fast REP string features

   - Fix a double-shootdown issue in the emergency reboot code

   - Ensure GIF=1 and disable SVM during an emergency reboot, i.e. give
     SVM similar treatment to VMX

   - Update Xen's TSC info CPUID sub-leaves as appropriate

   - Add support for Hyper-V's extended hypercalls, where "support" at
     this point is just forwarding the hypercalls to userspace

   - Clean up the kvm->lock vs. kvm->srcu sequences when updating the
     PMU and MSR filters

   - One-off fixes and cleanups

   - Fix and cleanup the range-based TLB flushing code, used when KVM is
     running on Hyper-V

   - Add support for filtering PMU events using a mask. If userspace
     wants to restrict heavily what events the guest can use, it can now
     do so without needing an absurd number of filter entries

   - Clean up KVM's handling of "PMU MSRs to save", especially when vPMU
     support is disabled

   - Add PEBS support for Intel Sapphire Rapids

   - Fix a mostly benign overflow bug in SEV's
     send|receive_update_data()

   - Move several SVM-specific flags into vcpu_svm

  x86 Intel:

   - Handle NMI VM-Exits before leaving the noinstr region

   - A few trivial cleanups in the VM-Enter flows

   - Stop enabling VMFUNC for L1 purely to document that KVM doesn't
     support EPTP switching (or any other VM function) for L1

   - Fix a crash when using eVMCS's enlighted MSR bitmaps

  Generic:

   - Clean up the hardware enable and initialization flow, which was
     scattered around multiple arch-specific hooks. Instead, just let
     the arch code call into generic code. Both x86 and ARM should
     benefit from not having to fight common KVM code's notion of how to
     do initialization

   - Account allocations in generic kvm_arch_alloc_vm()

   - Fix a memory leak if coalesced MMIO unregistration fails

  selftests:

   - On x86, cache the CPU vendor (AMD vs. Intel) and use the info to
     emit the correct hypercall instruction instead of relying on KVM to
     patch in VMMCALL

   - Use TAP interface for kvm_binary_stats_test and tsc_msrs_test"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (325 commits)
  KVM: SVM: hyper-v: placate modpost section mismatch error
  KVM: x86/mmu: Make tdp_mmu_allowed static
  KVM: arm64: nv: Use reg_to_encoding() to get sysreg ID
  KVM: arm64: nv: Only toggle cache for virtual EL2 when SCTLR_EL2 changes
  KVM: arm64: nv: Filter out unsupported features from ID regs
  KVM: arm64: nv: Emulate EL12 register accesses from the virtual EL2
  KVM: arm64: nv: Allow a sysreg to be hidden from userspace only
  KVM: arm64: nv: Emulate PSTATE.M for a guest hypervisor
  KVM: arm64: nv: Add accessors for SPSR_EL1, ELR_EL1 and VBAR_EL1 from virtual EL2
  KVM: arm64: nv: Handle SMCs taken from virtual EL2
  KVM: arm64: nv: Handle trapped ERET from virtual EL2
  KVM: arm64: nv: Inject HVC exceptions to the virtual EL2
  KVM: arm64: nv: Support virtual EL2 exceptions
  KVM: arm64: nv: Handle HCR_EL2.NV system register traps
  KVM: arm64: nv: Add nested virt VCPU primitives for vEL2 VCPU state
  KVM: arm64: nv: Add EL2 system registers to vcpu context
  KVM: arm64: nv: Allow userspace to set PSR_MODE_EL2x
  KVM: arm64: nv: Reset VCPU to EL2 registers if VCPU nested virt is set
  KVM: arm64: nv: Introduce nested virtualization VCPU feature
  KVM: arm64: Use the S2 MMU context to iterate over S2 table
  ...
parents 01687e7c 45dd9bc7
......@@ -2536,9 +2536,14 @@
protected: nVHE-based mode with support for guests whose
state is kept private from the host.
nested: VHE-based mode with support for nested
virtualization. Requires at least ARMv8.3
hardware.
Defaults to VHE/nVHE based on hardware support. Setting
mode to "protected" will disable kexec and hibernation
for the host.
for the host. "nested" is experimental and should be
used with extreme caution.
kvm-arm.vgic_v3_group0_trap=
[KVM,ARM] Trap guest accesses to GICv3 group-0
......
......@@ -3736,7 +3736,7 @@ The fields in each entry are defined as follows:
:Parameters: struct kvm_s390_mem_op (in)
:Returns: = 0 on success,
< 0 on generic error (e.g. -EFAULT or -ENOMEM),
> 0 if an exception occurred while walking the page tables
16 bit program exception code if the access causes such an exception
Read or write data from/to the VM's memory.
The KVM_CAP_S390_MEM_OP_EXTENSION capability specifies what functionality is
......@@ -3754,6 +3754,8 @@ Parameters are specified via the following structure::
struct {
__u8 ar; /* the access register number */
__u8 key; /* access key, ignored if flag unset */
__u8 pad1[6]; /* ignored */
__u64 old_addr; /* ignored if flag unset */
};
__u32 sida_offset; /* offset into the sida */
__u8 reserved[32]; /* ignored */
......@@ -3781,6 +3783,7 @@ Possible operations are:
* ``KVM_S390_MEMOP_ABSOLUTE_WRITE``
* ``KVM_S390_MEMOP_SIDA_READ``
* ``KVM_S390_MEMOP_SIDA_WRITE``
* ``KVM_S390_MEMOP_ABSOLUTE_CMPXCHG``
Logical read/write:
^^^^^^^^^^^^^^^^^^^
......@@ -3829,7 +3832,7 @@ the checks required for storage key protection as one operation (as opposed to
user space getting the storage keys, performing the checks, and accessing
memory thereafter, which could lead to a delay between check and access).
Absolute accesses are permitted for the VM ioctl if KVM_CAP_S390_MEM_OP_EXTENSION
is > 0.
has the KVM_S390_MEMOP_EXTENSION_CAP_BASE bit set.
Currently absolute accesses are not permitted for VCPU ioctls.
Absolute accesses are permitted for non-protected guests only.
......@@ -3837,7 +3840,26 @@ Supported flags:
* ``KVM_S390_MEMOP_F_CHECK_ONLY``
* ``KVM_S390_MEMOP_F_SKEY_PROTECTION``
The semantics of the flags are as for logical accesses.
The semantics of the flags common with logical accesses are as for logical
accesses.
Absolute cmpxchg:
^^^^^^^^^^^^^^^^^
Perform cmpxchg on absolute guest memory. Intended for use with the
KVM_S390_MEMOP_F_SKEY_PROTECTION flag.
Instead of doing an unconditional write, the access occurs only if the target
location contains the value pointed to by "old_addr".
This is performed as an atomic cmpxchg with the length specified by the "size"
parameter. "size" must be a power of two up to and including 16.
If the exchange did not take place because the target value doesn't match the
old value, the value "old_addr" points to is replaced by the target value.
User space can tell if an exchange took place by checking if this replacement
occurred. The cmpxchg op is permitted for the VM ioctl if
KVM_CAP_S390_MEM_OP_EXTENSION has flag KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG set.
Supported flags:
* ``KVM_S390_MEMOP_F_SKEY_PROTECTION``
SIDA read/write:
^^^^^^^^^^^^^^^^
......@@ -4457,6 +4479,18 @@ not holding a previously reported uncorrected error).
:Parameters: struct kvm_s390_cmma_log (in, out)
:Returns: 0 on success, a negative value on error
Errors:
====== =============================================================
ENOMEM not enough memory can be allocated to complete the task
ENXIO if CMMA is not enabled
EINVAL if KVM_S390_CMMA_PEEK is not set but migration mode was not enabled
EINVAL if KVM_S390_CMMA_PEEK is not set but dirty tracking has been
disabled (and thus migration mode was automatically disabled)
EFAULT if the userspace address is invalid or if no page table is
present for the addresses (e.g. when using hugepages).
====== =============================================================
This ioctl is used to get the values of the CMMA bits on the s390
architecture. It is meant to be used in two scenarios:
......@@ -4537,12 +4571,6 @@ mask is unused.
values points to the userspace buffer where the result will be stored.
This ioctl can fail with -ENOMEM if not enough memory can be allocated to
complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if
KVM_S390_CMMA_PEEK is not set but migration mode was not enabled, with
-EFAULT if the userspace address is invalid or if no page table is
present for the addresses (e.g. when using hugepages).
4.108 KVM_S390_SET_CMMA_BITS
----------------------------
......@@ -5005,6 +5033,15 @@ using this ioctl.
:Parameters: struct kvm_pmu_event_filter (in)
:Returns: 0 on success, -1 on error
Errors:
====== ============================================================
EFAULT args[0] cannot be accessed
EINVAL args[0] contains invalid data in the filter or filter events
E2BIG nevents is too large
EBUSY not enough memory to allocate the filter
====== ============================================================
::
struct kvm_pmu_event_filter {
......@@ -5016,14 +5053,69 @@ using this ioctl.
__u64 events[0];
};
This ioctl restricts the set of PMU events that the guest can program.
The argument holds a list of events which will be allowed or denied.
The eventsel+umask of each event the guest attempts to program is compared
against the events field to determine whether the guest should have access.
The events field only controls general purpose counters; fixed purpose
counters are controlled by the fixed_counter_bitmap.
This ioctl restricts the set of PMU events the guest can program by limiting
which event select and unit mask combinations are permitted.
The argument holds a list of filter events which will be allowed or denied.
Filter events only control general purpose counters; fixed purpose counters
are controlled by the fixed_counter_bitmap.
Valid values for 'flags'::
``0``
To use this mode, clear the 'flags' field.
In this mode each event will contain an event select + unit mask.
When the guest attempts to program the PMU the guest's event select +
unit mask is compared against the filter events to determine whether the
guest should have access.
``KVM_PMU_EVENT_FLAG_MASKED_EVENTS``
:Capability: KVM_CAP_PMU_EVENT_MASKED_EVENTS
In this mode each filter event will contain an event select, mask, match, and
exclude value. To encode a masked event use::
KVM_PMU_ENCODE_MASKED_ENTRY()
An encoded event will follow this layout::
Bits Description
---- -----------
7:0 event select (low bits)
15:8 umask match
31:16 unused
35:32 event select (high bits)
36:54 unused
55 exclude bit
63:56 umask mask
When the guest attempts to program the PMU, these steps are followed in
determining if the guest should have access:
1. Match the event select from the guest against the filter events.
2. If a match is found, match the guest's unit mask to the mask and match
values of the included filter events.
I.e. (unit mask & mask) == match && !exclude.
3. If a match is found, match the guest's unit mask to the mask and match
values of the excluded filter events.
I.e. (unit mask & mask) == match && exclude.
4.
a. If an included match is found and an excluded match is not found, filter
the event.
b. For everything else, do not filter the event.
5.
a. If the event is filtered and it's an allow list, allow the guest to
program the event.
b. If the event is filtered and it's a deny list, do not allow the guest to
program the event.
No flags are defined yet, the field must be zero.
When setting a new pmu event filter, -EINVAL will be returned if any of the
unused fields are set or if any of the high bits (35:32) in the event
select are set when called on Intel.
Valid values for 'action'::
......
......@@ -302,6 +302,10 @@ Allows userspace to start migration mode, needed for PGSTE migration.
Setting this attribute when migration mode is already active will have
no effects.
Dirty tracking must be enabled on all memslots, else -EINVAL is returned. When
dirty tracking is disabled on any memslot, migration mode is automatically
stopped.
:Parameters: none
:Returns: -ENOMEM if there is not enough free memory to start migration mode;
-EINVAL if the state of the VM is invalid (e.g. no memory defined);
......
......@@ -9,6 +9,8 @@ KVM Lock Overview
The acquisition orders for mutexes are as follows:
- cpus_read_lock() is taken outside kvm_lock
- kvm->lock is taken outside vcpu->mutex
- kvm->lock is taken outside kvm->slots_lock and kvm->irq_lock
......@@ -226,15 +228,10 @@ time it will be set using the Dirty tracking mechanism described above.
:Type: mutex
:Arch: any
:Protects: - vm_list
``kvm_count_lock``
^^^^^^^^^^^^^^^^^^
:Type: raw_spinlock_t
:Arch: any
:Protects: - hardware virtualization enable/disable
:Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
migration.
- kvm_usage_count
- hardware virtualization enable/disable
:Comment: KVM also disables CPU hotplug via cpus_read_lock() during
enable/disable.
``kvm->mn_invalidate_lock``
^^^^^^^^^^^^^^^^^^^^^^^^^^^
......@@ -292,3 +289,13 @@ time it will be set using the Dirty tracking mechanism described above.
wakeup notification event since external interrupts from the
assigned devices happens, we will find the vCPU on the list to
wakeup.
``vendor_module_lock``
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
:Type: mutex
:Arch: x86
:Protects: loading a vendor module (kvm_amd or kvm_intel)
:Comment: Exists because using kvm_lock leads to deadlock. cpu_hotplug_lock is
taken outside of kvm_lock, e.g. in KVM's CPU online/offline callbacks, and
many operations need to take cpu_hotplug_lock when loading a vendor module,
e.g. updating static calls.
......@@ -37,3 +37,14 @@ Nested virtualization features
------------------------------
TBD
x2APIC
------
When KVM_X2APIC_API_USE_32BIT_IDS is enabled, KVM activates a hack/quirk that
allows sending events to a single vCPU using its x2APIC ID even if the target
vCPU has legacy xAPIC enabled, e.g. to bring up hotplugged vCPUs via INIT-SIPI
on VMs with > 255 vCPUs. A side effect of the quirk is that, if multiple vCPUs
have the same physical APIC ID, KVM will deliver events targeting that APIC ID
only to the vCPU with the lowest vCPU ID. If KVM_X2APIC_API_USE_32BIT_IDS is
not enabled, KVM follows x86 architecture when processing interrupts (all vCPUs
matching the target APIC ID receive the interrupt).
......@@ -11269,13 +11269,12 @@ F: virt/kvm/*
 
KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
M: Marc Zyngier <maz@kernel.org>
M: Oliver Upton <oliver.upton@linux.dev>
R: James Morse <james.morse@arm.com>
R: Suzuki K Poulose <suzuki.poulose@arm.com>
R: Oliver Upton <oliver.upton@linux.dev>
R: Zenghui Yu <yuzenghui@huawei.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: kvmarm@lists.linux.dev
L: kvmarm@lists.cs.columbia.edu (deprecated, moderated for non-subscribers)
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git
F: arch/arm64/include/asm/kvm*
......
......@@ -16,6 +16,15 @@
#define CLIDR_LOC(clidr) (((clidr) >> CLIDR_LOC_SHIFT) & 0x7)
#define CLIDR_LOUIS(clidr) (((clidr) >> CLIDR_LOUIS_SHIFT) & 0x7)
/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1))
#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level))
#define CLIDR_CTYPE(clidr, level) \
(((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
/* Ttypen, bits [2(n - 1) + 34 : 2(n - 1) + 33], for n = 1 to 7 */
#define CLIDR_TTYPE_SHIFT(level) (2 * ((level) - 1) + CLIDR_EL1_Ttypen_SHIFT)
/*
* Memory returned by kmalloc() may be used for DMA, so we must make
* sure that all such allocations are cache aligned. Otherwise,
......
......@@ -196,4 +196,103 @@
__init_el2_nvhe_prepare_eret
.endm
#ifndef __KVM_NVHE_HYPERVISOR__
// This will clobber tmp1 and tmp2, and expect tmp1 to contain
// the id register value as read from the HW
.macro __check_override idreg, fld, width, pass, fail, tmp1, tmp2
ubfx \tmp1, \tmp1, #\fld, #\width
cbz \tmp1, \fail
adr_l \tmp1, \idreg\()_override
ldr \tmp2, [\tmp1, FTR_OVR_VAL_OFFSET]
ldr \tmp1, [\tmp1, FTR_OVR_MASK_OFFSET]
ubfx \tmp2, \tmp2, #\fld, #\width
ubfx \tmp1, \tmp1, #\fld, #\width
cmp \tmp1, xzr
and \tmp2, \tmp2, \tmp1
csinv \tmp2, \tmp2, xzr, ne
cbnz \tmp2, \pass
b \fail
.endm
// This will clobber tmp1 and tmp2
.macro check_override idreg, fld, pass, fail, tmp1, tmp2
mrs \tmp1, \idreg\()_el1
__check_override \idreg \fld 4 \pass \fail \tmp1 \tmp2
.endm
#else
// This will clobber tmp
.macro __check_override idreg, fld, width, pass, fail, tmp, ignore
ldr_l \tmp, \idreg\()_el1_sys_val
ubfx \tmp, \tmp, #\fld, #\width
cbnz \tmp, \pass
b \fail
.endm
.macro check_override idreg, fld, pass, fail, tmp, ignore
__check_override \idreg \fld 4 \pass \fail \tmp \ignore
.endm
#endif
.macro finalise_el2_state
check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2
.Linit_sve_\@: /* SVE register access */
mrs x0, cptr_el2 // Disable SVE traps
bic x0, x0, #CPTR_EL2_TZ
msr cptr_el2, x0
isb
mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
msr_s SYS_ZCR_EL2, x1 // length for EL1.
.Lskip_sve_\@:
check_override id_aa64pfr1, ID_AA64PFR1_EL1_SME_SHIFT, .Linit_sme_\@, .Lskip_sme_\@, x1, x2
.Linit_sme_\@: /* SME register access and priority mapping */
mrs x0, cptr_el2 // Disable SME traps
bic x0, x0, #CPTR_EL2_TSM
msr cptr_el2, x0
isb
mrs x1, sctlr_el2
orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps
msr sctlr_el2, x1
isb
mov x0, #0 // SMCR controls
// Full FP in SM?
mrs_s x1, SYS_ID_AA64SMFR0_EL1
__check_override id_aa64smfr0, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, .Linit_sme_fa64_\@, .Lskip_sme_fa64_\@, x1, x2
.Linit_sme_fa64_\@:
orr x0, x0, SMCR_ELx_FA64_MASK
.Lskip_sme_fa64_\@:
// ZT0 available?
mrs_s x1, SYS_ID_AA64SMFR0_EL1
__check_override id_aa64smfr0, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, .Linit_sme_zt0_\@, .Lskip_sme_zt0_\@, x1, x2
.Linit_sme_zt0_\@:
orr x0, x0, SMCR_ELx_EZT0_MASK
.Lskip_sme_zt0_\@:
orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector
msr_s SYS_SMCR_EL2, x0 // length for EL1.
mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported?
ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
cbz x1, .Lskip_sme_\@
msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal
mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present?
ubfx x1, x1, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4
cbz x1, .Lskip_sme_\@
mrs_s x1, SYS_HCRX_EL2
orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping
msr_s SYS_HCRX_EL2, x1
.Lskip_sme_\@:
.endm
#endif /* __ARM_KVM_INIT_H__ */
......@@ -272,6 +272,10 @@
(((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \
ESR_ELx_SYS64_ISS_OP2_SHIFT))
/* ISS field definitions for ERET/ERETAA/ERETAB trapping */
#define ESR_ELx_ERET_ISS_ERET 0x2
#define ESR_ELx_ERET_ISS_ERETA 0x1
/*
* ISS field definitions for floating-point exception traps
* (FP_EXC_32/FP_EXC_64).
......
......@@ -81,11 +81,12 @@
* SWIO: Turn set/way invalidates into set/way clean+invalidate
* PTW: Take a stage2 fault if a stage1 walk steps in device memory
* TID3: Trap EL1 reads of group 3 ID registers
* TID2: Trap CTR_EL0, CCSIDR2_EL1, CLIDR_EL1, and CSSELR_EL1
*/
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
HCR_BSU_IS | HCR_FB | HCR_TACR | \
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 )
HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3 | HCR_TID2)
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
......@@ -344,10 +345,26 @@
ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \
ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \
ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
ECN(BKPT32), ECN(VECTOR32), ECN(BRK64), ECN(ERET)
#define CPACR_EL1_TTA (1 << 28)
#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\
CPACR_EL1_ZEN_EL1EN)
#define kvm_mode_names \
{ PSR_MODE_EL0t, "EL0t" }, \
{ PSR_MODE_EL1t, "EL1t" }, \
{ PSR_MODE_EL1h, "EL1h" }, \
{ PSR_MODE_EL2t, "EL2t" }, \
{ PSR_MODE_EL2h, "EL2h" }, \
{ PSR_MODE_EL3t, "EL3t" }, \
{ PSR_MODE_EL3h, "EL3h" }, \
{ PSR_AA32_MODE_USR, "32-bit USR" }, \
{ PSR_AA32_MODE_FIQ, "32-bit FIQ" }, \
{ PSR_AA32_MODE_IRQ, "32-bit IRQ" }, \
{ PSR_AA32_MODE_SVC, "32-bit SVC" }, \
{ PSR_AA32_MODE_ABT, "32-bit ABT" }, \
{ PSR_AA32_MODE_HYP, "32-bit HYP" }, \
{ PSR_AA32_MODE_UND, "32-bit UND" }, \
{ PSR_AA32_MODE_SYS, "32-bit SYS" }
#endif /* __ARM64_KVM_ARM_H__ */
......@@ -33,6 +33,12 @@ enum exception_type {
except_type_serror = 0x180,
};
#define kvm_exception_type_names \
{ except_type_sync, "SYNC" }, \
{ except_type_irq, "IRQ" }, \
{ except_type_fiq, "FIQ" }, \
{ except_type_serror, "SERROR" }
bool kvm_condition_valid32(const struct kvm_vcpu *vcpu);
void kvm_skip_instr32(struct kvm_vcpu *vcpu);
......@@ -44,6 +50,10 @@ void kvm_inject_size_fault(struct kvm_vcpu *vcpu);
void kvm_vcpu_wfi(struct kvm_vcpu *vcpu);
void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__)
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
{
......@@ -88,10 +98,6 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
if (vcpu_el1_is_32bit(vcpu))
vcpu->arch.hcr_el2 &= ~HCR_RW;
if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE) ||
vcpu_el1_is_32bit(vcpu))
vcpu->arch.hcr_el2 |= HCR_TID2;
if (kvm_has_mte(vcpu->kvm))
vcpu->arch.hcr_el2 |= HCR_ATA;
}
......@@ -183,6 +189,62 @@ static __always_inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num,
vcpu_gp_regs(vcpu)->regs[reg_num] = val;
}
static inline bool vcpu_is_el2_ctxt(const struct kvm_cpu_context *ctxt)
{
switch (ctxt->regs.pstate & (PSR_MODE32_BIT | PSR_MODE_MASK)) {
case PSR_MODE_EL2h:
case PSR_MODE_EL2t:
return true;
default:
return false;
}
}
static inline bool vcpu_is_el2(const struct kvm_vcpu *vcpu)
{
return vcpu_is_el2_ctxt(&vcpu->arch.ctxt);
}
static inline bool __vcpu_el2_e2h_is_set(const struct kvm_cpu_context *ctxt)
{
return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_E2H;
}
static inline bool vcpu_el2_e2h_is_set(const struct kvm_vcpu *vcpu)
{
return __vcpu_el2_e2h_is_set(&vcpu->arch.ctxt);
}
static inline bool __vcpu_el2_tge_is_set(const struct kvm_cpu_context *ctxt)
{
return ctxt_sys_reg(ctxt, HCR_EL2) & HCR_TGE;
}
static inline bool vcpu_el2_tge_is_set(const struct kvm_vcpu *vcpu)
{
return __vcpu_el2_tge_is_set(&vcpu->arch.ctxt);
}
static inline bool __is_hyp_ctxt(const struct kvm_cpu_context *ctxt)
{
/*
* We are in a hypervisor context if the vcpu mode is EL2 or
* E2H and TGE bits are set. The latter means we are in the user space
* of the VHE kernel. ARMv8.1 ARM describes this as 'InHost'
*
* Note that the HCR_EL2.{E2H,TGE}={0,1} isn't really handled in the
* rest of the KVM code, and will result in a misbehaving guest.
*/
return vcpu_is_el2_ctxt(ctxt) ||
(__vcpu_el2_e2h_is_set(ctxt) && __vcpu_el2_tge_is_set(ctxt)) ||
__vcpu_el2_tge_is_set(ctxt);
}
static inline bool is_hyp_ctxt(const struct kvm_vcpu *vcpu)
{
return __is_hyp_ctxt(&vcpu->arch.ctxt);
}
/*
* The layout of SPSR for an AArch32 state is different when observed from an
* AArch64 SPSR_ELx or an AArch32 SPSR_*. This function generates the AArch32
......
......@@ -60,14 +60,19 @@
enum kvm_mode {
KVM_MODE_DEFAULT,
KVM_MODE_PROTECTED,
KVM_MODE_NV,
KVM_MODE_NONE,
};
#ifdef CONFIG_KVM
enum kvm_mode kvm_get_mode(void);
#else
static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; };
#endif
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
extern unsigned int kvm_sve_max_vl;
int kvm_arm_init_sve(void);
extern unsigned int __ro_after_init kvm_sve_max_vl;
int __init kvm_arm_init_sve(void);
u32 __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
......@@ -252,6 +257,7 @@ struct kvm_vcpu_fault_info {
enum vcpu_sysreg {
__INVALID_SYSREG__, /* 0 is reserved as an invalid value */
MPIDR_EL1, /* MultiProcessor Affinity Register */
CLIDR_EL1, /* Cache Level ID Register */
CSSELR_EL1, /* Cache Size Selection Register */
SCTLR_EL1, /* System Control Register */
ACTLR_EL1, /* Auxiliary Control Register */
......@@ -320,12 +326,43 @@ enum vcpu_sysreg {
TFSR_EL1, /* Tag Fault Status Register (EL1) */
TFSRE0_EL1, /* Tag Fault Status Register (EL0) */
/* 32bit specific registers. Keep them at the end of the range */
/* 32bit specific registers. */
DACR32_EL2, /* Domain Access Control Register */
IFSR32_EL2, /* Instruction Fault Status Register */
FPEXC32_EL2, /* Floating-Point Exception Control Register */
DBGVCR32_EL2, /* Debug Vector Catch Register */
/* EL2 registers */
VPIDR_EL2, /* Virtualization Processor ID Register */
VMPIDR_EL2, /* Virtualization Multiprocessor ID Register */
SCTLR_EL2, /* System Control Register (EL2) */
ACTLR_EL2, /* Auxiliary Control Register (EL2) */
HCR_EL2, /* Hypervisor Configuration Register */
MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */
CPTR_EL2, /* Architectural Feature Trap Register (EL2) */
HSTR_EL2, /* Hypervisor System Trap Register */
HACR_EL2, /* Hypervisor Auxiliary Control Register */
TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */
TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */
TCR_EL2, /* Translation Control Register (EL2) */
VTTBR_EL2, /* Virtualization Translation Table Base Register */
VTCR_EL2, /* Virtualization Translation Control Register */
SPSR_EL2, /* EL2 saved program status register */
ELR_EL2, /* EL2 exception link register */
AFSR0_EL2, /* Auxiliary Fault Status Register 0 (EL2) */
AFSR1_EL2, /* Auxiliary Fault Status Register 1 (EL2) */
ESR_EL2, /* Exception Syndrome Register (EL2) */
FAR_EL2, /* Fault Address Register (EL2) */
HPFAR_EL2, /* Hypervisor IPA Fault Address Register */
MAIR_EL2, /* Memory Attribute Indirection Register (EL2) */
AMAIR_EL2, /* Auxiliary Memory Attribute Indirection Register (EL2) */
VBAR_EL2, /* Vector Base Address Register (EL2) */
RVBAR_EL2, /* Reset Vector Base Address Register */
CONTEXTIDR_EL2, /* Context ID Register (EL2) */
TPIDR_EL2, /* EL2 Software Thread ID Register */
CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
SP_EL2, /* EL2 Stack Pointer */
NR_SYS_REGS /* Nothing after this line! */
};
......@@ -501,6 +538,9 @@ struct kvm_vcpu_arch {
u64 last_steal;
gpa_t base;
} steal;
/* Per-vcpu CCSIDR override or NULL */
u32 *ccsidr;
};
/*
......@@ -598,7 +638,7 @@ struct kvm_vcpu_arch {
#define EXCEPT_AA64_EL1_IRQ __vcpu_except_flags(1)
#define EXCEPT_AA64_EL1_FIQ __vcpu_except_flags(2)
#define EXCEPT_AA64_EL1_SERR __vcpu_except_flags(3)
/* For AArch64 with NV (one day): */
/* For AArch64 with NV: */
#define EXCEPT_AA64_EL2_SYNC __vcpu_except_flags(4)
#define EXCEPT_AA64_EL2_IRQ __vcpu_except_flags(5)
#define EXCEPT_AA64_EL2_FIQ __vcpu_except_flags(6)
......@@ -609,6 +649,8 @@ struct kvm_vcpu_arch {
#define DEBUG_STATE_SAVE_SPE __vcpu_single_flag(iflags, BIT(5))
/* Save TRBE context if active */
#define DEBUG_STATE_SAVE_TRBE __vcpu_single_flag(iflags, BIT(6))
/* vcpu running in HYP context */
#define VCPU_HYP_CONTEXT __vcpu_single_flag(iflags, BIT(7))
/* SVE enabled for host EL0 */
#define HOST_SVE_ENABLED __vcpu_single_flag(sflags, BIT(0))
......@@ -705,7 +747,6 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
return false;
switch (reg) {
case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break;
case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break;
case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break;
case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break;
......@@ -750,7 +791,6 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
return false;
switch (reg) {
case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break;
case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break;
case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break;
case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break;
......@@ -877,7 +917,7 @@ int kvm_handle_cp10_id(struct kvm_vcpu *vcpu);
void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
int kvm_sys_reg_table_init(void);
int __init kvm_sys_reg_table_init(void);
/* MMIO helpers */
void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
......@@ -908,20 +948,20 @@ int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
extern unsigned int kvm_arm_vmid_bits;
int kvm_arm_vmid_alloc_init(void);
void kvm_arm_vmid_alloc_free(void);
extern unsigned int __ro_after_init kvm_arm_vmid_bits;
int __init kvm_arm_vmid_alloc_init(void);
void __init kvm_arm_vmid_alloc_free(void);
void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
void kvm_arm_vmid_clear_active(void);
static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
{
vcpu_arch->steal.base = GPA_INVALID;
vcpu_arch->steal.base = INVALID_GPA;
}
static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
{
return (vcpu_arch->steal.base != GPA_INVALID);
return (vcpu_arch->steal.base != INVALID_GPA);
}
void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
......@@ -943,7 +983,6 @@ static inline bool kvm_system_needs_idmapped_vectors(void)
void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
......@@ -994,7 +1033,7 @@ static inline void kvm_clr_pmu_events(u32 clr) {}
void kvm_vcpu_load_sysregs_vhe(struct kvm_vcpu *vcpu);
void kvm_vcpu_put_sysregs_vhe(struct kvm_vcpu *vcpu);
int kvm_set_ipa_limit(void);
int __init kvm_set_ipa_limit(void);
#define __KVM_HAVE_ARCH_VM_ALLOC
struct kvm *kvm_arch_alloc_vm(void);
......
......@@ -122,6 +122,7 @@ extern u64 kvm_nvhe_sym(id_aa64isar2_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
extern unsigned long kvm_nvhe_sym(__icache_flags);
extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
......
......@@ -115,6 +115,7 @@ alternative_cb_end
#include <asm/cache.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
void kvm_update_va_mask(struct alt_instr *alt,
......@@ -163,7 +164,7 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size,
void __iomem **haddr);
int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
void **haddr);
void free_hyp_pgds(void);
void __init free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
......@@ -175,7 +176,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu);
phys_addr_t kvm_mmu_get_httbr(void);
phys_addr_t kvm_get_idmap_vector(void);
int kvm_mmu_init(u32 *hyp_va_bits);
int __init kvm_mmu_init(u32 *hyp_va_bits);
static inline void *__kvm_vector_slot2addr(void *base,
enum arm64_hyp_spectre_vector slot)
......@@ -192,7 +193,15 @@ struct kvm;
static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu)
{
return (vcpu_read_sys_reg(vcpu, SCTLR_EL1) & 0b101) == 0b101;
u64 cache_bits = SCTLR_ELx_M | SCTLR_ELx_C;
int reg;
if (vcpu_is_el2(vcpu))
reg = SCTLR_EL2;
else
reg = SCTLR_EL1;
return (vcpu_read_sys_reg(vcpu, reg) & cache_bits) == cache_bits;
}
static inline void __clean_dcache_guest_page(void *va, size_t size)
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ARM64_KVM_NESTED_H
#define __ARM64_KVM_NESTED_H
#include <linux/kvm_host.h>
static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
{
return (!__is_defined(__KVM_NVHE_HYPERVISOR__) &&
cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) &&
test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features));
}
struct sys_reg_params;
struct sys_reg_desc;
void access_nested_id_reg(struct kvm_vcpu *v, struct sys_reg_params *p,
const struct sys_reg_desc *r);
#endif /* __ARM64_KVM_NESTED_H */
......@@ -71,6 +71,11 @@ static inline kvm_pte_t kvm_phys_to_pte(u64 pa)
return pte;
}
static inline kvm_pfn_t kvm_pte_to_pfn(kvm_pte_t pte)
{
return __phys_to_pfn(kvm_pte_to_phys(pte));
}
static inline u64 kvm_granule_shift(u32 level)
{
/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
......@@ -188,12 +193,15 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
* children.
* @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared
* with other software walkers.
* @KVM_PGTABLE_WALK_HANDLE_FAULT: Indicates the page-table walk was
* invoked from a fault handler.
*/
enum kvm_pgtable_walk_flags {
KVM_PGTABLE_WALK_LEAF = BIT(0),
KVM_PGTABLE_WALK_TABLE_PRE = BIT(1),
KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
KVM_PGTABLE_WALK_SHARED = BIT(3),
KVM_PGTABLE_WALK_HANDLE_FAULT = BIT(4),
};
struct kvm_pgtable_visit_ctx {
......
......@@ -325,7 +325,6 @@
#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0)
#define SYS_CCSIDR_EL1 sys_reg(3, 1, 0, 0, 0)
#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7)
#define SYS_RNDR_EL0 sys_reg(3, 3, 2, 4, 0)
......@@ -411,23 +410,51 @@
#define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7)
#define SYS_VPIDR_EL2 sys_reg(3, 4, 0, 0, 0)
#define SYS_VMPIDR_EL2 sys_reg(3, 4, 0, 0, 5)
#define SYS_SCTLR_EL2 sys_reg(3, 4, 1, 0, 0)
#define SYS_ACTLR_EL2 sys_reg(3, 4, 1, 0, 1)
#define SYS_HCR_EL2 sys_reg(3, 4, 1, 1, 0)
#define SYS_MDCR_EL2 sys_reg(3, 4, 1, 1, 1)
#define SYS_CPTR_EL2 sys_reg(3, 4, 1, 1, 2)
#define SYS_HSTR_EL2 sys_reg(3, 4, 1, 1, 3)
#define SYS_HFGRTR_EL2 sys_reg(3, 4, 1, 1, 4)
#define SYS_HFGWTR_EL2 sys_reg(3, 4, 1, 1, 5)
#define SYS_HFGITR_EL2 sys_reg(3, 4, 1, 1, 6)
#define SYS_HACR_EL2 sys_reg(3, 4, 1, 1, 7)
#define SYS_TTBR0_EL2 sys_reg(3, 4, 2, 0, 0)
#define SYS_TTBR1_EL2 sys_reg(3, 4, 2, 0, 1)
#define SYS_TCR_EL2 sys_reg(3, 4, 2, 0, 2)
#define SYS_VTTBR_EL2 sys_reg(3, 4, 2, 1, 0)
#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1)
#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4)
#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5)
#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
#define SYS_SP_EL1 sys_reg(3, 4, 4, 1, 0)
#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1)
#define SYS_AFSR0_EL2 sys_reg(3, 4, 5, 1, 0)
#define SYS_AFSR1_EL2 sys_reg(3, 4, 5, 1, 1)
#define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0)
#define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3)
#define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0)
#define SYS_TFSR_EL2 sys_reg(3, 4, 5, 6, 0)
#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1)
#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0)
#define SYS_HPFAR_EL2 sys_reg(3, 4, 6, 0, 4)
#define SYS_MAIR_EL2 sys_reg(3, 4, 10, 2, 0)
#define SYS_AMAIR_EL2 sys_reg(3, 4, 10, 3, 0)
#define SYS_VBAR_EL2 sys_reg(3, 4, 12, 0, 0)
#define SYS_RVBAR_EL2 sys_reg(3, 4, 12, 0, 1)
#define SYS_RMR_EL2 sys_reg(3, 4, 12, 0, 2)
#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1)
#define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x)
#define SYS_ICH_AP0R0_EL2 __SYS__AP0Rx_EL2(0)
#define SYS_ICH_AP0R1_EL2 __SYS__AP0Rx_EL2(1)
......@@ -469,6 +496,12 @@
#define SYS_ICH_LR14_EL2 __SYS__LR8_EL2(6)
#define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7)
#define SYS_CONTEXTIDR_EL2 sys_reg(3, 4, 13, 0, 1)
#define SYS_TPIDR_EL2 sys_reg(3, 4, 13, 0, 2)
#define SYS_CNTVOFF_EL2 sys_reg(3, 4, 14, 0, 3)
#define SYS_CNTHCTL_EL2 sys_reg(3, 4, 14, 1, 0)
/* VHE encodings for architectural EL0/1 system registers */
#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0)
#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0)
......@@ -491,6 +524,8 @@
#define SYS_CNTV_CTL_EL02 sys_reg(3, 5, 14, 3, 1)
#define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2)
#define SYS_SP_EL2 sys_reg(3, 6, 4, 1, 0)
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_ENTP2 (BIT(60))
#define SCTLR_ELx_DSSBS (BIT(44))
......
......@@ -109,6 +109,7 @@ struct kvm_regs {
#define KVM_ARM_VCPU_SVE 4 /* enable SVE for this CPU */
#define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */
#define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */
#define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */
struct kvm_vcpu_init {
__u32 target;
......
......@@ -11,11 +11,6 @@
#include <linux/of.h>
#define MAX_CACHE_LEVEL 7 /* Max 7 level supported */
/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1))
#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level))
#define CLIDR_CTYPE(clidr, level) \
(((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
int cache_line_size(void)
{
......
......@@ -1967,6 +1967,20 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused)
write_sysreg(read_sysreg(tpidr_el1), tpidr_el2);
}
static bool has_nested_virt_support(const struct arm64_cpu_capabilities *cap,
int scope)
{
if (kvm_get_mode() != KVM_MODE_NV)
return false;
if (!has_cpuid_feature(cap, scope)) {
pr_warn("unavailable: %s\n", cap->desc);
return false;
}
return true;
}
#ifdef CONFIG_ARM64_PAN
static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused)
{
......@@ -2262,6 +2276,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = runs_at_el2,
.cpu_enable = cpu_copy_el2regs,
},
{
.desc = "Nested Virtualization Support",
.capability = ARM64_HAS_NESTED_VIRT,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_nested_virt_support,
.sys_reg = SYS_ID_AA64MMFR2_EL1,
.sign = FTR_UNSIGNED,
.field_pos = ID_AA64MMFR2_EL1_NV_SHIFT,
.field_width = 4,
.min_field_value = ID_AA64MMFR2_EL1_NV_IMP,
},
{
.capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment