Commit a0e50aa3 authored by Christoffer Dall's avatar Christoffer Dall Committed by Marc Zyngier

KVM: arm64: Factor out stage 2 page table data from struct kvm

As we are about to reuse our stage 2 page table manipulation code for
shadow stage 2 page tables in the context of nested virtualization, we
are going to manage multiple stage 2 page tables for a single VM.

This requires some pretty invasive changes to our data structures,
which moves the vmid and pgd pointers into a separate structure and
change pretty much all of our mmu code to operate on this structure
instead.

The new structure is called struct kvm_s2_mmu.

There is no intended functional change by this patch alone.
Reviewed-by: default avatarJames Morse <james.morse@arm.com>
Reviewed-by: default avatarAlexandru Elisei <alexandru.elisei@arm.com>
[Designed data structure layout in collaboration]
Signed-off-by: default avatarChristoffer Dall <christoffer.dall@arm.com>
Co-developed-by: default avatarMarc Zyngier <maz@kernel.org>
[maz: Moved the last_vcpu_ran down to the S2 MMU structure as well]
Signed-off-by: default avatarMarc Zyngier <maz@kernel.org>
parent ae4bffb5
......@@ -77,6 +77,7 @@
struct kvm;
struct kvm_vcpu;
struct kvm_s2_mmu;
DECLARE_KVM_NVHE_SYM(__kvm_hyp_init);
DECLARE_KVM_HYP_SYM(__kvm_hyp_vector);
......@@ -90,9 +91,9 @@ DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs);
#endif
extern void __kvm_flush_vm_context(void);
extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
extern void __kvm_tlb_flush_vmid(struct kvm *kvm);
extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu);
extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa);
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
......
......@@ -66,19 +66,34 @@ struct kvm_vmid {
u32 vmid;
};
struct kvm_arch {
struct kvm_s2_mmu {
struct kvm_vmid vmid;
/* stage2 entry level table */
pgd_t *pgd;
phys_addr_t pgd_phys;
/* VTCR_EL2 value for this VM */
u64 vtcr;
/*
* stage2 entry level table
*
* Two kvm_s2_mmu structures in the same VM can point to the same
* pgd here. This happens when running a guest using a
* translation regime that isn't affected by its own stage-2
* translation, such as a non-VHE hypervisor running at vEL2, or
* for vEL1/EL0 with vHCR_EL2.VM == 0. In that case, we use the
* canonical stage-2 page tables.
*/
pgd_t *pgd;
phys_addr_t pgd_phys;
/* The last vcpu id that ran on each physical CPU */
int __percpu *last_vcpu_ran;
struct kvm *kvm;
};
struct kvm_arch {
struct kvm_s2_mmu mmu;
/* VTCR_EL2 value for this VM */
u64 vtcr;
/* The maximum number of vCPUs depends on the used GIC model */
int max_vcpus;
......@@ -254,6 +269,9 @@ struct kvm_vcpu_arch {
void *sve_state;
unsigned int sve_max_vl;
/* Stage 2 paging state used by the hardware on next switch */
struct kvm_s2_mmu *hw_mmu;
/* HYP configuration */
u64 hcr_el2;
u32 mdcr_el2;
......
......@@ -134,8 +134,8 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
void free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);
int kvm_alloc_stage2_pgd(struct kvm *kvm);
void kvm_free_stage2_pgd(struct kvm *kvm);
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu);
void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
phys_addr_t pa, unsigned long size, bool writable);
......@@ -577,13 +577,13 @@ static inline u64 kvm_vttbr_baddr_mask(struct kvm *kvm)
return vttbr_baddr_mask(kvm_phys_shift(kvm), kvm_stage2_levels(kvm));
}
static __always_inline u64 kvm_get_vttbr(struct kvm *kvm)
static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
{
struct kvm_vmid *vmid = &kvm->arch.vmid;
struct kvm_vmid *vmid = &mmu->vmid;
u64 vmid_field, baddr;
u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0;
baddr = kvm->arch.pgd_phys;
baddr = mmu->pgd_phys;
vmid_field = (u64)vmid->vmid << VTTBR_VMID_SHIFT;
return kvm_phys_to_vttbr(baddr) | vmid_field | cnp;
}
......@@ -592,10 +592,10 @@ static __always_inline u64 kvm_get_vttbr(struct kvm *kvm)
* Must be called from hyp code running at EL2 with an updated VTTBR
* and interrupts disabled.
*/
static __always_inline void __load_guest_stage2(struct kvm *kvm)
static __always_inline void __load_guest_stage2(struct kvm_s2_mmu *mmu)
{
write_sysreg(kvm->arch.vtcr, vtcr_el2);
write_sysreg(kvm_get_vttbr(kvm), vttbr_el2);
write_sysreg(kern_hyp_va(mmu->kvm)->arch.vtcr, vtcr_el2);
write_sysreg(kvm_get_vttbr(mmu), vttbr_el2);
/*
* ARM errata 1165522 and 1530923 require the actual execution of the
......
......@@ -106,22 +106,15 @@ static int kvm_arm_default_max_vcpus(void)
*/
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
int ret, cpu;
int ret;
ret = kvm_arm_setup_stage2(kvm, type);
if (ret)
return ret;
kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran));
if (!kvm->arch.last_vcpu_ran)
return -ENOMEM;
for_each_possible_cpu(cpu)
*per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1;
ret = kvm_alloc_stage2_pgd(kvm);
ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu);
if (ret)
goto out_fail_alloc;
return ret;
ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP);
if (ret)
......@@ -129,18 +122,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_vgic_early_init(kvm);
/* Mark the initial VMID generation invalid */
kvm->arch.vmid.vmid_gen = 0;
/* The maximum number of VCPUs is limited by the host's GIC model */
kvm->arch.max_vcpus = kvm_arm_default_max_vcpus();
return ret;
out_free_stage2_pgd:
kvm_free_stage2_pgd(kvm);
out_fail_alloc:
free_percpu(kvm->arch.last_vcpu_ran);
kvm->arch.last_vcpu_ran = NULL;
kvm_free_stage2_pgd(&kvm->arch.mmu);
return ret;
}
......@@ -160,9 +147,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_vgic_destroy(kvm);
free_percpu(kvm->arch.last_vcpu_ran);
kvm->arch.last_vcpu_ran = NULL;
for (i = 0; i < KVM_MAX_VCPUS; ++i) {
if (kvm->vcpus[i]) {
kvm_vcpu_destroy(kvm->vcpus[i]);
......@@ -279,6 +263,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
kvm_arm_pvtime_vcpu_init(&vcpu->arch);
vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
err = kvm_vgic_vcpu_init(vcpu);
if (err)
return err;
......@@ -334,16 +320,18 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct kvm_s2_mmu *mmu;
int *last_ran;
last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran);
mmu = vcpu->arch.hw_mmu;
last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
/*
* We might get preempted before the vCPU actually runs, but
* over-invalidation doesn't affect correctness.
*/
if (*last_ran != vcpu->vcpu_id) {
kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu);
kvm_call_hyp(__kvm_tlb_flush_local_vmid, mmu);
*last_ran = vcpu->vcpu_id;
}
......@@ -680,7 +668,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
cond_resched();
update_vmid(&vcpu->kvm->arch.vmid);
update_vmid(&vcpu->arch.hw_mmu->vmid);
check_vcpu_requests(vcpu);
......@@ -729,7 +717,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
smp_store_mb(vcpu->mode, IN_GUEST_MODE);
if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) ||
if (ret <= 0 || need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) ||
kvm_request_pending(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
isb(); /* Ensure work in x_flush_hwstate is committed */
......
......@@ -122,9 +122,9 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
}
}
static inline void __activate_vm(struct kvm *kvm)
static inline void __activate_vm(struct kvm_s2_mmu *mmu)
{
__load_guest_stage2(kvm);
__load_guest_stage2(mmu);
}
static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
......
......@@ -194,7 +194,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__sysreg32_restore_state(vcpu);
__sysreg_restore_state_nvhe(guest_ctxt);
__activate_vm(kern_hyp_va(vcpu->kvm));
__activate_vm(kern_hyp_va(vcpu->arch.hw_mmu));
__activate_traps(vcpu);
__hyp_vgic_restore_state(vcpu);
......
......@@ -12,7 +12,8 @@ struct tlb_inv_context {
u64 tcr;
};
static void __tlb_switch_to_guest(struct kvm *kvm, struct tlb_inv_context *cxt)
static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
struct tlb_inv_context *cxt)
{
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
u64 val;
......@@ -30,12 +31,10 @@ static void __tlb_switch_to_guest(struct kvm *kvm, struct tlb_inv_context *cxt)
isb();
}
/* __load_guest_stage2() includes an ISB for the workaround. */
__load_guest_stage2(kvm);
asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
__load_guest_stage2(mmu);
}
static void __tlb_switch_to_host(struct kvm *kvm, struct tlb_inv_context *cxt)
static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
{
write_sysreg(0, vttbr_el2);
......@@ -47,15 +46,15 @@ static void __tlb_switch_to_host(struct kvm *kvm, struct tlb_inv_context *cxt)
}
}
void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa)
{
struct tlb_inv_context cxt;
dsb(ishst);
/* Switch to requested VMID */
kvm = kern_hyp_va(kvm);
__tlb_switch_to_guest(kvm, &cxt);
mmu = kern_hyp_va(mmu);
__tlb_switch_to_guest(mmu, &cxt);
/*
* We could do so much better if we had the VA as well.
......@@ -98,39 +97,39 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
if (icache_is_vpipt())
__flush_icache_all();
__tlb_switch_to_host(kvm, &cxt);
__tlb_switch_to_host(&cxt);
}
void __kvm_tlb_flush_vmid(struct kvm *kvm)
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
{
struct tlb_inv_context cxt;
dsb(ishst);
/* Switch to requested VMID */
kvm = kern_hyp_va(kvm);
__tlb_switch_to_guest(kvm, &cxt);
mmu = kern_hyp_va(mmu);
__tlb_switch_to_guest(mmu, &cxt);
__tlbi(vmalls12e1is);
dsb(ish);
isb();
__tlb_switch_to_host(kvm, &cxt);
__tlb_switch_to_host(&cxt);
}
void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
{
struct kvm *kvm = kern_hyp_va(kern_hyp_va(vcpu)->kvm);
struct tlb_inv_context cxt;
/* Switch to requested VMID */
__tlb_switch_to_guest(kvm, &cxt);
mmu = kern_hyp_va(mmu);
__tlb_switch_to_guest(mmu, &cxt);
__tlbi(vmalle1);
dsb(nsh);
isb();
__tlb_switch_to_host(kvm, &cxt);
__tlb_switch_to_host(&cxt);
}
void __kvm_flush_vm_context(void)
......
......@@ -125,7 +125,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
* stage 2 translation, and __activate_traps clear HCR_EL2.TGE
* (among other things).
*/
__activate_vm(vcpu->kvm);
__activate_vm(vcpu->arch.hw_mmu);
__activate_traps(vcpu);
sysreg_restore_guest_state_vhe(guest_ctxt);
......
......@@ -16,7 +16,8 @@ struct tlb_inv_context {
u64 sctlr;
};
static void __tlb_switch_to_guest(struct kvm *kvm, struct tlb_inv_context *cxt)
static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
struct tlb_inv_context *cxt)
{
u64 val;
......@@ -52,14 +53,14 @@ static void __tlb_switch_to_guest(struct kvm *kvm, struct tlb_inv_context *cxt)
* place before clearing TGE. __load_guest_stage2() already
* has an ISB in order to deal with this.
*/
__load_guest_stage2(kvm);
__load_guest_stage2(mmu);
val = read_sysreg(hcr_el2);
val &= ~HCR_TGE;
write_sysreg(val, hcr_el2);
isb();
}
static void __tlb_switch_to_host(struct kvm *kvm, struct tlb_inv_context *cxt)
static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
{
/*
* We're done with the TLB operation, let's restore the host's
......@@ -78,14 +79,14 @@ static void __tlb_switch_to_host(struct kvm *kvm, struct tlb_inv_context *cxt)
local_irq_restore(cxt->flags);
}
void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa)
{
struct tlb_inv_context cxt;
dsb(ishst);
/* Switch to requested VMID */
__tlb_switch_to_guest(kvm, &cxt);
__tlb_switch_to_guest(mmu, &cxt);
/*
* We could do so much better if we had the VA as well.
......@@ -106,38 +107,37 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
dsb(ish);
isb();
__tlb_switch_to_host(kvm, &cxt);
__tlb_switch_to_host(&cxt);
}
void __kvm_tlb_flush_vmid(struct kvm *kvm)
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
{
struct tlb_inv_context cxt;
dsb(ishst);
/* Switch to requested VMID */
__tlb_switch_to_guest(kvm, &cxt);
__tlb_switch_to_guest(mmu, &cxt);
__tlbi(vmalls12e1is);
dsb(ish);
isb();
__tlb_switch_to_host(kvm, &cxt);
__tlb_switch_to_host(&cxt);
}
void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
{
struct kvm *kvm = vcpu->kvm;
struct tlb_inv_context cxt;
/* Switch to requested VMID */
__tlb_switch_to_guest(kvm, &cxt);
__tlb_switch_to_guest(mmu, &cxt);
__tlbi(vmalle1);
dsb(nsh);
isb();
__tlb_switch_to_host(kvm, &cxt);
__tlb_switch_to_host(&cxt);
}
void __kvm_flush_vm_context(void)
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment