Commit 542380a2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Radim Krčmář:
 "ARM:
   - Fix a problem with GICv3 userspace save/restore
   - Clarify GICv2 userspace save/restore ABI
   - Be more careful in clearing GIC LRs
   - Add missing synchronization primitive to our MMU handling code

  PPC:
   - Check for a NULL return from kzalloc

  s390:
   - Prevent translation exception errors on valid page tables for the
     instruction-exection-protection support

  x86:
   - Fix Page-Modification Logging when running a nested guest"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: PPC: Book3S HV: Check for kmalloc errors in ioctl
  KVM: nVMX: initialize PML fields in vmcs02
  KVM: nVMX: do not leak PML full vmexit to L1
  KVM: arm/arm64: vgic: Fix GICC_PMR uaccess on GICv3 and clarify ABI
  KVM: arm64: Ensure LRs are clear when they should be
  kvm: arm/arm64: Fix locking for kvm_free_stage2_pgd
  KVM: s390: remove change-recording override support
  arm/arm64: KVM: Take mmap_sem in kvm_arch_prepare_memory_region
  arm/arm64: KVM: Take mmap_sem in stage2_unmap_vm
parents 62fedca5 8786fa66
......@@ -83,6 +83,12 @@ Groups:
Bits for undefined preemption levels are RAZ/WI.
For historical reasons and to provide ABI compatibility with userspace we
export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask
field in the lower 5 bits of a word, meaning that userspace must always
use the lower 5 bits to communicate with the KVM device and must shift the
value left by 3 places to obtain the actual priority mask level.
Limitations:
- Priorities are not implemented, and registers are RAZ/WI
- Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
......
......@@ -1124,6 +1124,9 @@ static void cpu_hyp_reinit(void)
if (__hyp_get_vectors() == hyp_default_vectors)
cpu_init_hyp_mode(NULL);
}
if (vgic_present)
kvm_vgic_init_cpu_hardware();
}
static void cpu_hyp_reset(void)
......
......@@ -292,11 +292,18 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
phys_addr_t addr = start, end = start + size;
phys_addr_t next;
assert_spin_locked(&kvm->mmu_lock);
pgd = kvm->arch.pgd + stage2_pgd_index(addr);
do {
next = stage2_pgd_addr_end(addr, end);
if (!stage2_pgd_none(*pgd))
unmap_stage2_puds(kvm, pgd, addr, next);
/*
* If the range is too large, release the kvm->mmu_lock
* to prevent starvation and lockup detector warnings.
*/
if (next != end)
cond_resched_lock(&kvm->mmu_lock);
} while (pgd++, addr = next, addr != end);
}
......@@ -803,6 +810,7 @@ void stage2_unmap_vm(struct kvm *kvm)
int idx;
idx = srcu_read_lock(&kvm->srcu);
down_read(&current->mm->mmap_sem);
spin_lock(&kvm->mmu_lock);
slots = kvm_memslots(kvm);
......@@ -810,6 +818,7 @@ void stage2_unmap_vm(struct kvm *kvm)
stage2_unmap_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
up_read(&current->mm->mmap_sem);
srcu_read_unlock(&kvm->srcu, idx);
}
......@@ -829,7 +838,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
if (kvm->arch.pgd == NULL)
return;
spin_lock(&kvm->mmu_lock);
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
spin_unlock(&kvm->mmu_lock);
/* Free the HW pgd, one page at a time */
free_pages_exact(kvm->arch.pgd, S2_PGD_SIZE);
kvm->arch.pgd = NULL;
......@@ -1801,6 +1813,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
(KVM_PHYS_SIZE >> PAGE_SHIFT))
return -EFAULT;
down_read(&current->mm->mmap_sem);
/*
* A memory region could potentially cover multiple VMAs, and any holes
* between them, so iterate over all of them to find out if we can map
......@@ -1844,8 +1857,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
pa += vm_start - vma->vm_start;
/* IO region dirty page logging not allowed */
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)
return -EINVAL;
if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) {
ret = -EINVAL;
goto out;
}
ret = kvm_phys_addr_ioremap(kvm, gpa, pa,
vm_end - vm_start,
......@@ -1857,7 +1872,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
} while (hva < reg_end);
if (change == KVM_MR_FLAGS_ONLY)
return ret;
goto out;
spin_lock(&kvm->mmu_lock);
if (ret)
......@@ -1865,6 +1880,8 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
else
stage2_flush_memslot(kvm, memslot);
spin_unlock(&kvm->mmu_lock);
out:
up_read(&current->mm->mmap_sem);
return ret;
}
......
......@@ -1487,6 +1487,10 @@ long kvm_vm_ioctl_resize_hpt_prepare(struct kvm *kvm,
/* start new resize */
resize = kzalloc(sizeof(*resize), GFP_KERNEL);
if (!resize) {
ret = -ENOMEM;
goto out;
}
resize->order = shift;
resize->kvm = kvm;
INIT_WORK(&resize->work, resize_hpt_prepare_work);
......
......@@ -168,8 +168,7 @@ union page_table_entry {
unsigned long z : 1; /* Zero Bit */
unsigned long i : 1; /* Page-Invalid Bit */
unsigned long p : 1; /* DAT-Protection Bit */
unsigned long co : 1; /* Change-Recording Override */
unsigned long : 8;
unsigned long : 9;
};
};
......@@ -745,8 +744,6 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
return PGM_PAGE_TRANSLATION;
if (pte.z)
return PGM_TRANSLATION_SPEC;
if (pte.co && !edat1)
return PGM_TRANSLATION_SPEC;
dat_protection |= pte.p;
raddr.pfra = pte.pfra;
real_address:
......@@ -1182,7 +1179,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
if (!rc && pte.i)
rc = PGM_PAGE_TRANSLATION;
if (!rc && (pte.z || (pte.co && sg->edat_level < 1)))
if (!rc && pte.z)
rc = PGM_TRANSLATION_SPEC;
shadow_page:
pte.p |= dat_protection;
......
......@@ -8198,6 +8198,9 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
case EXIT_REASON_PREEMPTION_TIMER:
return false;
case EXIT_REASON_PML_FULL:
/* We don't expose PML support to L1. */
return false;
default:
return true;
}
......@@ -10267,6 +10270,18 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
}
if (enable_pml) {
/*
* Conceptually we want to copy the PML address and index from
* vmcs01 here, and then back to vmcs01 on nested vmexit. But,
* since we always flush the log on each vmexit, this happens
* to be equivalent to simply resetting the fields in vmcs02.
*/
ASSERT(vmx->pml_pg);
vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
}
if (nested_cpu_has_ept(vmcs12)) {
kvm_mmu_unload(vcpu);
nested_ept_init_mmu_context(vcpu);
......
......@@ -295,6 +295,7 @@ void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu);
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
int kvm_vgic_map_resources(struct kvm *kvm);
int kvm_vgic_hyp_init(void);
void kvm_vgic_init_cpu_hardware(void);
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
bool level);
......
......@@ -96,6 +96,9 @@
#define GICH_MISR_EOI (1 << 0)
#define GICH_MISR_U (1 << 1)
#define GICV_PMR_PRIORITY_SHIFT 3
#define GICV_PMR_PRIORITY_MASK (0x1f << GICV_PMR_PRIORITY_SHIFT)
#ifndef __ASSEMBLY__
#include <linux/irqdomain.h>
......
......@@ -391,6 +391,25 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
return IRQ_HANDLED;
}
/**
* kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware
*
* For a specific CPU, initialize the GIC VE hardware.
*/
void kvm_vgic_init_cpu_hardware(void)
{
BUG_ON(preemptible());
/*
* We want to make sure the list registers start out clear so that we
* only have the program the used registers.
*/
if (kvm_vgic_global_state.type == VGIC_V2)
vgic_v2_init_lrs();
else
kvm_call_hyp(__vgic_v3_init_lrs);
}
/**
* kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable
* according to the host GIC model. Accordingly calls either
......
......@@ -229,7 +229,15 @@ static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
val = vmcr.ctlr;
break;
case GIC_CPU_PRIMASK:
val = vmcr.pmr;
/*
* Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
* the PMR field as GICH_VMCR.VMPriMask rather than
* GICC_PMR.Priority, so we expose the upper five bits of
* priority mask to userspace using the lower bits in the
* unsigned long.
*/
val = (vmcr.pmr & GICV_PMR_PRIORITY_MASK) >>
GICV_PMR_PRIORITY_SHIFT;
break;
case GIC_CPU_BINPOINT:
val = vmcr.bpr;
......@@ -262,7 +270,15 @@ static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
vmcr.ctlr = val;
break;
case GIC_CPU_PRIMASK:
vmcr.pmr = val;
/*
* Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the
* the PMR field as GICH_VMCR.VMPriMask rather than
* GICC_PMR.Priority, so we expose the upper five bits of
* priority mask to userspace using the lower bits in the
* unsigned long.
*/
vmcr.pmr = (val << GICV_PMR_PRIORITY_SHIFT) &
GICV_PMR_PRIORITY_MASK;
break;
case GIC_CPU_BINPOINT:
vmcr.bpr = val;
......
......@@ -36,6 +36,21 @@ static unsigned long *u64_to_bitmask(u64 *val)
return (unsigned long *)val;
}
static inline void vgic_v2_write_lr(int lr, u32 val)
{
void __iomem *base = kvm_vgic_global_state.vctrl_base;
writel_relaxed(val, base + GICH_LR0 + (lr * 4));
}
void vgic_v2_init_lrs(void)
{
int i;
for (i = 0; i < kvm_vgic_global_state.nr_lr; i++)
vgic_v2_write_lr(i, 0);
}
void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu)
{
struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
......@@ -191,8 +206,8 @@ void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
GICH_VMCR_ALIAS_BINPOINT_MASK;
vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) &
GICH_VMCR_BINPOINT_MASK;
vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) &
GICH_VMCR_PRIMASK_MASK;
vmcr |= ((vmcrp->pmr >> GICV_PMR_PRIORITY_SHIFT) <<
GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK;
vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
}
......@@ -207,8 +222,8 @@ void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
GICH_VMCR_ALIAS_BINPOINT_SHIFT;
vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >>
GICH_VMCR_BINPOINT_SHIFT;
vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >>
GICH_VMCR_PRIMASK_SHIFT;
vmcrp->pmr = ((vmcr & GICH_VMCR_PRIMASK_MASK) >>
GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT;
}
void vgic_v2_enable(struct kvm_vcpu *vcpu)
......
......@@ -81,11 +81,18 @@ static inline bool irq_is_pending(struct vgic_irq *irq)
return irq->pending_latch || irq->line_level;
}
/*
* This struct provides an intermediate representation of the fields contained
* in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC
* state to userspace can generate either GICv2 or GICv3 CPU interface
* registers regardless of the hardware backed GIC used.
*/
struct vgic_vmcr {
u32 ctlr;
u32 abpr;
u32 bpr;
u32 pmr;
u32 pmr; /* Priority mask field in the GICC_PMR and
* ICC_PMR_EL1 priority field format */
/* Below member variable are valid only for GICv3 */
u32 grpen0;
u32 grpen1;
......@@ -130,6 +137,8 @@ int vgic_v2_map_resources(struct kvm *kvm);
int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
enum vgic_type);
void vgic_v2_init_lrs(void);
static inline void vgic_get_irq_kref(struct vgic_irq *irq)
{
if (irq->intid < VGIC_MIN_LPI)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment