Commit eadf16a9 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull second batch of KVM changes from Paolo Bonzini:
 "This mostly includes the PPC changes for 4.1, which this time cover
  Book3S HV only (debugging aids, minor performance improvements and
  some cleanups).  But there are also bug fixes and small cleanups for
  ARM, x86 and s390.

  The task_migration_notifier revert and real fix is still pending
  review, but I'll send it as soon as possible after -rc1"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (29 commits)
  KVM: arm/arm64: check IRQ number on userland injection
  KVM: arm: irqfd: fix value returned by kvm_irq_map_gsi
  KVM: VMX: Preserve host CR4.MCE value while in guest mode.
  KVM: PPC: Book3S HV: Use msgsnd for signalling threads on POWER8
  KVM: PPC: Book3S HV: Translate kvmhv_commence_exit to C
  KVM: PPC: Book3S HV: Streamline guest entry and exit
  KVM: PPC: Book3S HV: Use bitmap of active threads rather than count
  KVM: PPC: Book3S HV: Use decrementer to wake napping threads
  KVM: PPC: Book3S HV: Don't wake thread with no vcpu on guest IPI
  KVM: PPC: Book3S HV: Get rid of vcore nap_count and n_woken
  KVM: PPC: Book3S HV: Move vcore preemption point up into kvmppc_run_vcpu
  KVM: PPC: Book3S HV: Minor cleanups
  KVM: PPC: Book3S HV: Simplify handling of VCPUs that need a VPA update
  KVM: PPC: Book3S HV: Accumulate timing information for real-mode code
  KVM: PPC: Book3S HV: Create debugfs file for each guest's HPT
  KVM: PPC: Book3S HV: Add ICP real mode counters
  KVM: PPC: Book3S HV: Move virtual mode ICP functions to real-mode
  KVM: PPC: Book3S HV: Convert ICS mutex lock to spin lock
  KVM: PPC: Book3S HV: Add guest->host real mode completion counters
  KVM: PPC: Book3S HV: Add helpers for lock/unlock hpte
  ...
parents 4a655466 2fa462f8
......@@ -3573,3 +3573,20 @@ struct {
@ar - access register number
KVM handlers should exit to userspace with rc = -EREMOTE.
8. Other capabilities.
----------------------
This section lists capabilities that give information about other
features of the KVM implementation.
8.1 KVM_CAP_PPC_HWRNG
Architectures: ppc
This capability, if KVM_CHECK_EXTENSION indicates that it is
available, means that that the kernel has an implementation of the
H_RANDOM hypercall backed by a hardware random-number generator.
If present, the kernel H_RANDOM handler can be enabled for guest use
with the KVM_CAP_PPC_ENABLE_HCALL capability.
......@@ -195,8 +195,14 @@ struct kvm_arch_memory_slot {
#define KVM_ARM_IRQ_CPU_IRQ 0
#define KVM_ARM_IRQ_CPU_FIQ 1
/* Highest supported SPI, from VGIC_NR_IRQS */
/*
* This used to hold the highest supported SPI, but it is now obsolete
* and only here to provide source code level compatibility with older
* userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
*/
#ifndef __KERNEL__
#define KVM_ARM_IRQ_GIC_MAX 127
#endif
/* One single KVM irqchip, ie. the VGIC */
#define KVM_NR_IRQCHIPS 1
......
......@@ -671,8 +671,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
if (!irqchip_in_kernel(kvm))
return -ENXIO;
if (irq_num < VGIC_NR_PRIVATE_IRQS ||
irq_num > KVM_ARM_IRQ_GIC_MAX)
if (irq_num < VGIC_NR_PRIVATE_IRQS)
return -EINVAL;
return kvm_vgic_inject_irq(kvm, 0, irq_num, level);
......
......@@ -188,8 +188,14 @@ struct kvm_arch_memory_slot {
#define KVM_ARM_IRQ_CPU_IRQ 0
#define KVM_ARM_IRQ_CPU_FIQ 1
/* Highest supported SPI, from VGIC_NR_IRQS */
/*
* This used to hold the highest supported SPI, but it is now obsolete
* and only here to provide source code level compatibility with older
* userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
*/
#ifndef __KERNEL__
#define KVM_ARM_IRQ_GIC_MAX 127
#endif
/* One single KVM irqchip, ie. the VGIC */
#define KVM_NR_IRQCHIPS 1
......
......@@ -30,8 +30,6 @@ static inline int arch_has_random(void)
return !!ppc_md.get_random_long;
}
int powernv_get_random_long(unsigned long *v);
static inline int arch_get_random_seed_long(unsigned long *v)
{
return 0;
......@@ -47,4 +45,13 @@ static inline int arch_has_random_seed(void)
#endif /* CONFIG_ARCH_RANDOM */
#ifdef CONFIG_PPC_POWERNV
int powernv_hwrng_present(void);
int powernv_get_random_long(unsigned long *v);
int powernv_get_random_real_mode(unsigned long *v);
#else
static inline int powernv_hwrng_present(void) { return 0; }
static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
#endif
#endif /* _ASM_POWERPC_ARCHRANDOM_H */
......@@ -288,6 +288,9 @@ static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
return !is_kvmppc_hv_enabled(vcpu->kvm);
}
extern int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu);
extern int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu);
/* Magic register values loaded into r3 and r4 before the 'sc' assembly
* instruction for the OSI hypercalls */
#define OSI_SC_MAGIC_R3 0x113724FA
......
......@@ -85,6 +85,20 @@ static inline long try_lock_hpte(__be64 *hpte, unsigned long bits)
return old == 0;
}
static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v)
{
hpte_v &= ~HPTE_V_HVLOCK;
asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
hpte[0] = cpu_to_be64(hpte_v);
}
/* Without barrier */
static inline void __unlock_hpte(__be64 *hpte, unsigned long hpte_v)
{
hpte_v &= ~HPTE_V_HVLOCK;
hpte[0] = cpu_to_be64(hpte_v);
}
static inline int __hpte_actual_psize(unsigned int lp, int psize)
{
int i, shift;
......@@ -424,6 +438,10 @@ static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
return rcu_dereference_raw_notrace(kvm->memslots);
}
extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
extern void kvmhv_rm_send_ipi(int cpu);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#endif /* __ASM_KVM_BOOK3S_64_H__ */
......@@ -227,10 +227,8 @@ struct kvm_arch {
unsigned long host_sdr1;
int tlbie_lock;
unsigned long lpcr;
unsigned long rmor;
struct kvm_rma_info *rma;
unsigned long vrma_slb_v;
int rma_setup_done;
int hpte_setup_done;
u32 hpt_order;
atomic_t vcpus_running;
u32 online_vcores;
......@@ -239,6 +237,8 @@ struct kvm_arch {
atomic_t hpte_mod_interest;
cpumask_t need_tlb_flush;
int hpt_cma_alloc;
struct dentry *debugfs_dir;
struct dentry *htab_dentry;
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
struct mutex hpt_mutex;
......@@ -263,18 +263,15 @@ struct kvm_arch {
/*
* Struct for a virtual core.
* Note: entry_exit_count combines an entry count in the bottom 8 bits
* and an exit count in the next 8 bits. This is so that we can
* atomically increment the entry count iff the exit count is 0
* without taking the lock.
* Note: entry_exit_map combines a bitmap of threads that have entered
* in the bottom 8 bits and a bitmap of threads that have exited in the
* next 8 bits. This is so that we can atomically set the entry bit
* iff the exit map is 0 without taking a lock.
*/
struct kvmppc_vcore {
int n_runnable;
int n_busy;
int num_threads;
int entry_exit_count;
int n_woken;
int nap_count;
int entry_exit_map;
int napping_threads;
int first_vcpuid;
u16 pcpu;
......@@ -299,13 +296,14 @@ struct kvmppc_vcore {
ulong conferring_threads;
};
#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff)
#define VCORE_EXIT_COUNT(vc) ((vc)->entry_exit_count >> 8)
#define VCORE_ENTRY_MAP(vc) ((vc)->entry_exit_map & 0xff)
#define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8)
#define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0)
/* Values for vcore_state */
#define VCORE_INACTIVE 0
#define VCORE_SLEEPING 1
#define VCORE_STARTING 2
#define VCORE_PREEMPT 2
#define VCORE_RUNNING 3
#define VCORE_EXITING 4
......@@ -368,6 +366,14 @@ struct kvmppc_slb {
u8 base_page_size; /* MMU_PAGE_xxx */
};
/* Struct used to accumulate timing information in HV real mode code */
struct kvmhv_tb_accumulator {
u64 seqcount; /* used to synchronize access, also count * 2 */
u64 tb_total; /* total time in timebase ticks */
u64 tb_min; /* min time */
u64 tb_max; /* max time */
};
# ifdef CONFIG_PPC_FSL_BOOK3E
#define KVMPPC_BOOKE_IAC_NUM 2
#define KVMPPC_BOOKE_DAC_NUM 2
......@@ -656,6 +662,19 @@ struct kvm_vcpu_arch {
u32 emul_inst;
#endif
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
struct kvmhv_tb_accumulator *cur_activity; /* What we're timing */
u64 cur_tb_start; /* when it started */
struct kvmhv_tb_accumulator rm_entry; /* real-mode entry code */
struct kvmhv_tb_accumulator rm_intr; /* real-mode intr handling */
struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */
struct kvmhv_tb_accumulator guest_time; /* guest execution */
struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */
struct dentry *debugfs_dir;
struct dentry *debugfs_timings;
#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
};
#define VCPU_FPR(vcpu, i) (vcpu)->arch.fp.fpr[i][TS_FPROFFSET]
......
......@@ -302,6 +302,8 @@ static inline bool is_kvmppc_hv_enabled(struct kvm *kvm)
return kvm->arch.kvm_ops == kvmppc_hv_ops;
}
extern int kvmppc_hwrng_present(void);
/*
* Cuts out inst bits with ordering according to spec.
* That means the leftmost bit is zero. All given bits are included.
......
......@@ -211,5 +211,8 @@ extern void secondary_cpu_time_init(void);
DECLARE_PER_CPU(u64, decrementers_next_tb);
/* Convert timebase ticks to nanoseconds */
unsigned long long tb_to_ns(unsigned long long tb_ticks);
#endif /* __KERNEL__ */
#endif /* __POWERPC_TIME_H */
......@@ -37,6 +37,7 @@
#include <asm/thread_info.h>
#include <asm/rtas.h>
#include <asm/vdso_datapage.h>
#include <asm/dbell.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
#include <asm/lppaca.h>
......@@ -458,6 +459,19 @@ int main(void)
DEFINE(VCPU_SPRG1, offsetof(struct kvm_vcpu, arch.shregs.sprg1));
DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
#endif
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
DEFINE(VCPU_TB_RMENTRY, offsetof(struct kvm_vcpu, arch.rm_entry));
DEFINE(VCPU_TB_RMINTR, offsetof(struct kvm_vcpu, arch.rm_intr));
DEFINE(VCPU_TB_RMEXIT, offsetof(struct kvm_vcpu, arch.rm_exit));
DEFINE(VCPU_TB_GUEST, offsetof(struct kvm_vcpu, arch.guest_time));
DEFINE(VCPU_TB_CEDE, offsetof(struct kvm_vcpu, arch.cede_time));
DEFINE(VCPU_CUR_ACTIVITY, offsetof(struct kvm_vcpu, arch.cur_activity));
DEFINE(VCPU_ACTIVITY_START, offsetof(struct kvm_vcpu, arch.cur_tb_start));
DEFINE(TAS_SEQCOUNT, offsetof(struct kvmhv_tb_accumulator, seqcount));
DEFINE(TAS_TOTAL, offsetof(struct kvmhv_tb_accumulator, tb_total));
DEFINE(TAS_MIN, offsetof(struct kvmhv_tb_accumulator, tb_min));
DEFINE(TAS_MAX, offsetof(struct kvmhv_tb_accumulator, tb_max));
#endif
DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3));
DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
......@@ -492,7 +506,6 @@ int main(void)
DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
DEFINE(VCPU_DSISR, offsetof(struct kvm_vcpu, arch.shregs.dsisr));
DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
......@@ -550,8 +563,7 @@ int main(void)
DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop));
DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort));
DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_count));
DEFINE(VCORE_NAP_COUNT, offsetof(struct kvmppc_vcore, nap_count));
DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map));
DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
DEFINE(VCORE_KVM, offsetof(struct kvmppc_vcore, kvm));
......@@ -748,5 +760,7 @@ int main(void)
offsetof(struct paca_struct, subcore_sibling_mask));
#endif
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
return 0;
}
......@@ -608,6 +608,12 @@ void arch_suspend_enable_irqs(void)
}
#endif
unsigned long long tb_to_ns(unsigned long long ticks)
{
return mulhdu(ticks, tb_to_ns_scale) << tb_to_ns_shift;
}
EXPORT_SYMBOL_GPL(tb_to_ns);
/*
* Scheduler clock - returns current time in nanosec units.
*
......
......@@ -110,6 +110,20 @@ config KVM_BOOK3S_64_PR
processor, including emulating 32-bit processors on a 64-bit
host.
config KVM_BOOK3S_HV_EXIT_TIMING
bool "Detailed timing for hypervisor real-mode code"
depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS
---help---
Calculate time taken for each vcpu in the real-mode guest entry,
exit, and interrupt handling code, plus time spent in the guest
and in nap mode due to idle (cede) while other threads are still
in the guest. The total, minimum and maximum times in nanoseconds
together with the number of executions are reported in debugfs in
kvm/vm#/vcpu#/timings. The overhead is of the order of 30 - 40
ns per exit on POWER8.
If unsure, say N.
config KVM_BOOKE_HV
bool
......
......@@ -821,6 +821,82 @@ void kvmppc_core_destroy_vm(struct kvm *kvm)
#endif
}
int kvmppc_h_logical_ci_load(struct kvm_vcpu *vcpu)
{
unsigned long size = kvmppc_get_gpr(vcpu, 4);
unsigned long addr = kvmppc_get_gpr(vcpu, 5);
u64 buf;
int ret;
if (!is_power_of_2(size) || (size > sizeof(buf)))
return H_TOO_HARD;
ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, size, &buf);
if (ret != 0)
return H_TOO_HARD;
switch (size) {
case 1:
kvmppc_set_gpr(vcpu, 4, *(u8 *)&buf);
break;
case 2:
kvmppc_set_gpr(vcpu, 4, be16_to_cpu(*(__be16 *)&buf));
break;
case 4:
kvmppc_set_gpr(vcpu, 4, be32_to_cpu(*(__be32 *)&buf));
break;
case 8:
kvmppc_set_gpr(vcpu, 4, be64_to_cpu(*(__be64 *)&buf));
break;
default:
BUG();
}
return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_load);
int kvmppc_h_logical_ci_store(struct kvm_vcpu *vcpu)
{
unsigned long size = kvmppc_get_gpr(vcpu, 4);
unsigned long addr = kvmppc_get_gpr(vcpu, 5);
unsigned long val = kvmppc_get_gpr(vcpu, 6);
u64 buf;
int ret;
switch (size) {
case 1:
*(u8 *)&buf = val;
break;
case 2:
*(__be16 *)&buf = cpu_to_be16(val);
break;
case 4:
*(__be32 *)&buf = cpu_to_be32(val);
break;
case 8:
*(__be64 *)&buf = cpu_to_be64(val);
break;
default:
return H_TOO_HARD;
}
ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, size, &buf);
if (ret != 0)
return H_TOO_HARD;
return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_h_logical_ci_store);
int kvmppc_core_check_processor_compat(void)
{
/*
......
......@@ -27,6 +27,7 @@
#include <linux/srcu.h>
#include <linux/anon_inodes.h>
#include <linux/file.h>
#include <linux/debugfs.h>
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
......@@ -116,12 +117,12 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
long order;
mutex_lock(&kvm->lock);
if (kvm->arch.rma_setup_done) {
kvm->arch.rma_setup_done = 0;
/* order rma_setup_done vs. vcpus_running */
if (kvm->arch.hpte_setup_done) {
kvm->arch.hpte_setup_done = 0;
/* order hpte_setup_done vs. vcpus_running */
smp_mb();
if (atomic_read(&kvm->arch.vcpus_running)) {
kvm->arch.rma_setup_done = 1;
kvm->arch.hpte_setup_done = 1;
goto out;
}
}
......@@ -338,9 +339,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
gr = kvm->arch.revmap[index].guest_rpte;
/* Unlock the HPTE */
asm volatile("lwsync" : : : "memory");
hptep[0] = cpu_to_be64(v);
unlock_hpte(hptep, v);
preempt_enable();
gpte->eaddr = eaddr;
......@@ -469,8 +468,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
hpte[0] = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
hpte[1] = be64_to_cpu(hptep[1]);
hpte[2] = r = rev->guest_rpte;
asm volatile("lwsync" : : : "memory");
hptep[0] = cpu_to_be64(hpte[0]);
unlock_hpte(hptep, hpte[0]);
preempt_enable();
if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
......@@ -621,7 +619,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
hptep[1] = cpu_to_be64(r);
eieio();
hptep[0] = cpu_to_be64(hpte[0]);
__unlock_hpte(hptep, hpte[0]);
asm volatile("ptesync" : : : "memory");
preempt_enable();
if (page && hpte_is_writable(r))
......@@ -642,7 +640,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
return ret;
out_unlock:
hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
preempt_enable();
goto out_put;
}
......@@ -771,7 +769,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
}
}
unlock_rmap(rmapp);
hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
}
return 0;
}
......@@ -857,7 +855,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
}
ret = 1;
}
hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
} while ((i = j) != head);
unlock_rmap(rmapp);
......@@ -974,8 +972,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
/* Now check and modify the HPTE */
if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) {
/* unlock and continue */
hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
continue;
}
......@@ -996,9 +993,9 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
npages_dirty = n;
eieio();
}
v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK);
v &= ~HPTE_V_ABSENT;
v |= HPTE_V_VALID;
hptep[0] = cpu_to_be64(v);
__unlock_hpte(hptep, v);
} while ((i = j) != head);
unlock_rmap(rmapp);
......@@ -1218,8 +1215,7 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
r &= ~HPTE_GR_MODIFIED;
revp->guest_rpte = r;
}
asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
hptp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
unlock_hpte(hptp, be64_to_cpu(hptp[0]));
preempt_enable();
if (!(valid == want_valid && (first_pass || dirty)))
ok = 0;
......@@ -1339,20 +1335,20 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
unsigned long tmp[2];
ssize_t nb;
long int err, ret;
int rma_setup;
int hpte_setup;
if (!access_ok(VERIFY_READ, buf, count))
return -EFAULT;
/* lock out vcpus from running while we're doing this */
mutex_lock(&kvm->lock);
rma_setup = kvm->arch.rma_setup_done;
if (rma_setup) {
kvm->arch.rma_setup_done = 0; /* temporarily */
/* order rma_setup_done vs. vcpus_running */
hpte_setup = kvm->arch.hpte_setup_done;
if (hpte_setup) {
kvm->arch.hpte_setup_done = 0; /* temporarily */
/* order hpte_setup_done vs. vcpus_running */
smp_mb();
if (atomic_read(&kvm->arch.vcpus_running)) {
kvm->arch.rma_setup_done = 1;
kvm->arch.hpte_setup_done = 1;
mutex_unlock(&kvm->lock);
return -EBUSY;
}
......@@ -1405,7 +1401,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
"r=%lx\n", ret, i, v, r);
goto out;
}
if (!rma_setup && is_vrma_hpte(v)) {
if (!hpte_setup && is_vrma_hpte(v)) {
unsigned long psize = hpte_base_page_size(v, r);
unsigned long senc = slb_pgsize_encoding(psize);
unsigned long lpcr;
......@@ -1414,7 +1410,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
(VRMA_VSID << SLB_VSID_SHIFT_1T);
lpcr = senc << (LPCR_VRMASD_SH - 4);
kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
rma_setup = 1;
hpte_setup = 1;
}
++i;
hptp += 2;
......@@ -1430,9 +1426,9 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
}
out:
/* Order HPTE updates vs. rma_setup_done */
/* Order HPTE updates vs. hpte_setup_done */
smp_wmb();
kvm->arch.rma_setup_done = rma_setup;
kvm->arch.hpte_setup_done = hpte_setup;
mutex_unlock(&kvm->lock);
if (err)
......@@ -1495,6 +1491,141 @@ int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
return ret;
}
struct debugfs_htab_state {
struct kvm *kvm;
struct mutex mutex;
unsigned long hpt_index;
int chars_left;
int buf_index;
char buf[64];
};
static int debugfs_htab_open(struct inode *inode, struct file *file)
{
struct kvm *kvm = inode->i_private;
struct debugfs_htab_state *p;
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return -ENOMEM;
kvm_get_kvm(kvm);
p->kvm = kvm;
mutex_init(&p->mutex);
file->private_data = p;
return nonseekable_open(inode, file);
}
static int debugfs_htab_release(struct inode *inode, struct file *file)
{
struct debugfs_htab_state *p = file->private_data;
kvm_put_kvm(p->kvm);
kfree(p);
return 0;
}
static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
size_t len, loff_t *ppos)
{
struct debugfs_htab_state *p = file->private_data;
ssize_t ret, r;
unsigned long i, n;
unsigned long v, hr, gr;
struct kvm *kvm;
__be64 *hptp;
ret = mutex_lock_interruptible(&p->mutex);
if (ret)
return ret;
if (p->chars_left) {
n = p->chars_left;
if (n > len)
n = len;
r = copy_to_user(buf, p->buf + p->buf_index, n);
n -= r;
p->chars_left -= n;
p->buf_index += n;
buf += n;
len -= n;
ret = n;
if (r) {
if (!n)
ret = -EFAULT;
goto out;
}
}
kvm = p->kvm;
i = p->hpt_index;
hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
for (; len != 0 && i < kvm->arch.hpt_npte; ++i, hptp += 2) {
if (!(be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)))
continue;
/* lock the HPTE so it's stable and read it */
preempt_disable();
while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
cpu_relax();
v = be64_to_cpu(hptp[0]) & ~HPTE_V_HVLOCK;
hr = be64_to_cpu(hptp[1]);
gr = kvm->arch.revmap[i].guest_rpte;
unlock_hpte(hptp, v);
preempt_enable();
if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
continue;
n = scnprintf(p->buf, sizeof(p->buf),
"%6lx %.16lx %.16lx %.16lx\n",
i, v, hr, gr);
p->chars_left = n;
if (n > len)
n = len;
r = copy_to_user(buf, p->buf, n);
n -= r;
p->chars_left -= n;
p->buf_index = n;
buf += n;
len -= n;
ret += n;
if (r) {
if (!ret)
ret = -EFAULT;
goto out;
}
}
p->hpt_index = i;
out:
mutex_unlock(&p->mutex);
return ret;
}
ssize_t debugfs_htab_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos)
{
return -EACCES;
}
static const struct file_operations debugfs_htab_fops = {
.owner = THIS_MODULE,
.open = debugfs_htab_open,
.release = debugfs_htab_release,
.read = debugfs_htab_read,
.write = debugfs_htab_write,
.llseek = generic_file_llseek,
};
void kvmppc_mmu_debugfs_init(struct kvm *kvm)
{
kvm->arch.htab_dentry = debugfs_create_file("htab", 0400,
kvm->arch.debugfs_dir, kvm,
&debugfs_htab_fops);
}
void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
{
struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
......
This diff is collapsed.
......@@ -21,6 +21,10 @@
#include <asm/cputable.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s.h>
#include <asm/archrandom.h>
#include <asm/xics.h>
#include <asm/dbell.h>
#include <asm/cputhreads.h>
#define KVM_CMA_CHUNK_ORDER 18
......@@ -114,11 +118,11 @@ long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
int rv = H_SUCCESS; /* => don't yield */
set_bit(vcpu->arch.ptid, &vc->conferring_threads);
while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) {
threads_running = VCORE_ENTRY_COUNT(vc);
threads_ceded = hweight32(vc->napping_threads);
threads_conferring = hweight32(vc->conferring_threads);
if (threads_ceded + threads_conferring >= threads_running) {
while ((get_tb() < stop) && !VCORE_IS_EXITING(vc)) {
threads_running = VCORE_ENTRY_MAP(vc);
threads_ceded = vc->napping_threads;
threads_conferring = vc->conferring_threads;
if ((threads_ceded | threads_conferring) == threads_running) {
rv = H_TOO_HARD; /* => do yield */
break;
}
......@@ -169,3 +173,89 @@ int kvmppc_hcall_impl_hv_realmode(unsigned long cmd)
return 0;
}
EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
int kvmppc_hwrng_present(void)
{
return powernv_hwrng_present();
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_h_random(struct kvm_vcpu *vcpu)
{
if (powernv_get_random_real_mode(&vcpu->arch.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
}
static inline void rm_writeb(unsigned long paddr, u8 val)
{
__asm__ __volatile__("stbcix %0,0,%1"
: : "r" (val), "r" (paddr) : "memory");
}
/*
* Send an interrupt or message to another CPU.
* This can only be called in real mode.
* The caller needs to include any barrier needed to order writes
* to memory vs. the IPI/message.
*/
void kvmhv_rm_send_ipi(int cpu)
{
unsigned long xics_phys;
/* On POWER8 for IPIs to threads in the same core, use msgsnd */
if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
cpu_first_thread_sibling(cpu) ==
cpu_first_thread_sibling(raw_smp_processor_id())) {
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
msg |= cpu_thread_in_core(cpu);
__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
return;
}
/* Else poke the target with an IPI */
xics_phys = paca[cpu].kvm_hstate.xics_phys;
rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
}
/*
* The following functions are called from the assembly code
* in book3s_hv_rmhandlers.S.
*/
static void kvmhv_interrupt_vcore(struct kvmppc_vcore *vc, int active)
{
int cpu = vc->pcpu;
/* Order setting of exit map vs. msgsnd/IPI */
smp_mb();
for (; active; active >>= 1, ++cpu)
if (active & 1)
kvmhv_rm_send_ipi(cpu);
}
void kvmhv_commence_exit(int trap)
{
struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
int ptid = local_paca->kvm_hstate.ptid;
int me, ee;
/* Set our bit in the threads-exiting-guest map in the 0xff00
bits of vcore->entry_exit_map */
me = 0x100 << ptid;
do {
ee = vc->entry_exit_map;
} while (cmpxchg(&vc->entry_exit_map, ee, ee | me) != ee);
/* Are we the first here? */
if ((ee >> 8) != 0)
return;
/*
* Trigger the other threads in this vcore to exit the guest.
* If this is a hypervisor decrementer interrupt then they
* will be already on their way out of the guest.
*/
if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER)
kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid));
}
......@@ -150,12 +150,6 @@ static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva,
return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
}
static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v)
{
asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
hpte[0] = cpu_to_be64(hpte_v);
}
long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel,
pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
......@@ -271,10 +265,10 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
u64 pte;
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
pte = be64_to_cpu(*hpte);
pte = be64_to_cpu(hpte[0]);
if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
break;
*hpte &= ~cpu_to_be64(HPTE_V_HVLOCK);
__unlock_hpte(hpte, pte);
hpte += 2;
}
if (i == 8)
......@@ -290,9 +284,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
pte = be64_to_cpu(*hpte);
pte = be64_to_cpu(hpte[0]);
if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
*hpte &= ~cpu_to_be64(HPTE_V_HVLOCK);
__unlock_hpte(hpte, pte);
return H_PTEG_FULL;
}
}
......@@ -331,7 +325,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
/* Write the first HPTE dword, unlocking the HPTE and making it valid */
eieio();
hpte[0] = cpu_to_be64(pteh);
__unlock_hpte(hpte, pteh);
asm volatile("ptesync" : : : "memory");
*pte_idx_ret = pte_index;
......@@ -412,7 +406,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
((flags & H_ANDCOND) && (pte & avpn) != 0)) {
hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
__unlock_hpte(hpte, pte);
return H_NOT_FOUND;
}
......@@ -548,7 +542,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
be64_to_cpu(hp[0]), be64_to_cpu(hp[1]));
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
args[j] |= rcbits << (56 - 5);
hp[0] = 0;
__unlock_hpte(hp, 0);
}
}
......@@ -574,7 +568,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
pte = be64_to_cpu(hpte[0]);
if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) {
hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
__unlock_hpte(hpte, pte);
return H_NOT_FOUND;
}
......@@ -755,8 +749,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
/* Return with the HPTE still locked */
return (hash << 3) + (i >> 1);
/* Unlock and move on */
hpte[i] = cpu_to_be64(v);
__unlock_hpte(&hpte[i], v);
}
if (val & HPTE_V_SECONDARY)
......
......@@ -23,17 +23,37 @@
#define DEBUG_PASSUP
static inline void rm_writeb(unsigned long paddr, u8 val)
static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
u32 new_irq);
/* -- ICS routines -- */
static void ics_rm_check_resend(struct kvmppc_xics *xics,
struct kvmppc_ics *ics, struct kvmppc_icp *icp)
{
__asm__ __volatile__("sync; stbcix %0,0,%1"
: : "r" (val), "r" (paddr) : "memory");
int i;
arch_spin_lock(&ics->lock);
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
struct ics_irq_state *state = &ics->irq_state[i];
if (!state->resend)
continue;
arch_spin_unlock(&ics->lock);
icp_rm_deliver_irq(xics, icp, state->number);
arch_spin_lock(&ics->lock);
}
arch_spin_unlock(&ics->lock);
}
/* -- ICP routines -- */
static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
struct kvm_vcpu *this_vcpu)
{
struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
unsigned long xics_phys;
int cpu;
/* Mark the target VCPU as having an interrupt pending */
......@@ -56,9 +76,8 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
/* In SMT cpu will always point to thread 0, we adjust it */
cpu += vcpu->arch.ptid;
/* Not too hard, then poke the target */
xics_phys = paca[cpu].kvm_hstate.xics_phys;
rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
smp_mb();
kvmhv_rm_send_ipi(cpu);
}
static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
......@@ -116,6 +135,180 @@ static inline int check_too_hard(struct kvmppc_xics *xics,
return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS;
}
static void icp_rm_check_resend(struct kvmppc_xics *xics,
struct kvmppc_icp *icp)
{
u32 icsid;
/* Order this load with the test for need_resend in the caller */
smp_rmb();
for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) {
struct kvmppc_ics *ics = xics->ics[icsid];
if (!test_and_clear_bit(icsid, icp->resend_map))
continue;
if (!ics)
continue;
ics_rm_check_resend(xics, ics, icp);
}
}
static bool icp_rm_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority,
u32 *reject)
{
union kvmppc_icp_state old_state, new_state;
bool success;
do {
old_state = new_state = READ_ONCE(icp->state);
*reject = 0;
/* See if we can deliver */
success = new_state.cppr > priority &&
new_state.mfrr > priority &&
new_state.pending_pri > priority;
/*
* If we can, check for a rejection and perform the
* delivery
*/
if (success) {
*reject = new_state.xisr;
new_state.xisr = irq;
new_state.pending_pri = priority;
} else {
/*
* If we failed to deliver we set need_resend
* so a subsequent CPPR state change causes us
* to try a new delivery.
*/
new_state.need_resend = true;
}
} while (!icp_rm_try_update(icp, old_state, new_state));
return success;
}
static void icp_rm_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
u32 new_irq)
{
struct ics_irq_state *state;
struct kvmppc_ics *ics;
u32 reject;
u16 src;
/*
* This is used both for initial delivery of an interrupt and
* for subsequent rejection.
*
* Rejection can be racy vs. resends. We have evaluated the
* rejection in an atomic ICP transaction which is now complete,
* so potentially the ICP can already accept the interrupt again.
*
* So we need to retry the delivery. Essentially the reject path
* boils down to a failed delivery. Always.
*
* Now the interrupt could also have moved to a different target,
* thus we may need to re-do the ICP lookup as well
*/
again:
/* Get the ICS state and lock it */
ics = kvmppc_xics_find_ics(xics, new_irq, &src);
if (!ics) {
/* Unsafe increment, but this does not need to be accurate */
xics->err_noics++;
return;
}
state = &ics->irq_state[src];
/* Get a lock on the ICS */
arch_spin_lock(&ics->lock);
/* Get our server */
if (!icp || state->server != icp->server_num) {
icp = kvmppc_xics_find_server(xics->kvm, state->server);
if (!icp) {
/* Unsafe increment again*/
xics->err_noicp++;
goto out;
}
}
/* Clear the resend bit of that interrupt */
state->resend = 0;
/*
* If masked, bail out
*
* Note: PAPR doesn't mention anything about masked pending
* when doing a resend, only when doing a delivery.
*
* However that would have the effect of losing a masked
* interrupt that was rejected and isn't consistent with
* the whole masked_pending business which is about not
* losing interrupts that occur while masked.
*
* I don't differentiate normal deliveries and resends, this
* implementation will differ from PAPR and not lose such
* interrupts.
*/
if (state->priority == MASKED) {
state->masked_pending = 1;
goto out;
}
/*
* Try the delivery, this will set the need_resend flag
* in the ICP as part of the atomic transaction if the
* delivery is not possible.
*
* Note that if successful, the new delivery might have itself
* rejected an interrupt that was "delivered" before we took the
* ics spin lock.
*
* In this case we do the whole sequence all over again for the
* new guy. We cannot assume that the rejected interrupt is less
* favored than the new one, and thus doesn't need to be delivered,
* because by the time we exit icp_rm_try_to_deliver() the target
* processor may well have already consumed & completed it, and thus
* the rejected interrupt might actually be already acceptable.
*/
if (icp_rm_try_to_deliver(icp, new_irq, state->priority, &reject)) {
/*
* Delivery was successful, did we reject somebody else ?
*/
if (reject && reject != XICS_IPI) {
arch_spin_unlock(&ics->lock);
new_irq = reject;
goto again;
}
} else {
/*
* We failed to deliver the interrupt we need to set the
* resend map bit and mark the ICS state as needing a resend
*/
set_bit(ics->icsid, icp->resend_map);
state->resend = 1;
/*
* If the need_resend flag got cleared in the ICP some time
* between icp_rm_try_to_deliver() atomic update and now, then
* we know it might have missed the resend_map bit. So we
* retry
*/
smp_mb();
if (!icp->state.need_resend) {
arch_spin_unlock(&ics->lock);
goto again;
}
}
out:
arch_spin_unlock(&ics->lock);
}
static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
u8 new_cppr)
{
......@@ -184,8 +377,8 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
* separately here as well.
*/
if (resend) {
icp->rm_action |= XICS_RM_CHECK_RESEND;
icp->rm_resend_icp = icp;
icp->n_check_resend++;
icp_rm_check_resend(xics, icp);
}
}
......@@ -300,16 +493,16 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
}
} while (!icp_rm_try_update(icp, old_state, new_state));
/* Pass rejects to virtual mode */
/* Handle reject in real mode */
if (reject && reject != XICS_IPI) {
this_icp->rm_action |= XICS_RM_REJECT;
this_icp->rm_reject = reject;
this_icp->n_reject++;
icp_rm_deliver_irq(xics, icp, reject);
}
/* Pass resends to virtual mode */
/* Handle resends in real mode */
if (resend) {
this_icp->rm_action |= XICS_RM_CHECK_RESEND;
this_icp->rm_resend_icp = icp;
this_icp->n_check_resend++;
icp_rm_check_resend(xics, icp);
}
return check_too_hard(xics, this_icp);
......@@ -365,10 +558,13 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
} while (!icp_rm_try_update(icp, old_state, new_state));
/* Pass rejects to virtual mode */
/*
* Check for rejects. They are handled by doing a new delivery
* attempt (see comments in icp_rm_deliver_irq).
*/
if (reject && reject != XICS_IPI) {
icp->rm_action |= XICS_RM_REJECT;
icp->rm_reject = reject;
icp->n_reject++;
icp_rm_deliver_irq(xics, icp, reject);
}
bail:
return check_too_hard(xics, icp);
......@@ -416,10 +612,10 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
goto bail;
state = &ics->irq_state[src];
/* Still asserted, resend it, we make it look like a reject */
/* Still asserted, resend it */
if (state->asserted) {
icp->rm_action |= XICS_RM_REJECT;
icp->rm_reject = irq;
icp->n_reject++;
icp_rm_deliver_irq(xics, icp, irq);
}
if (!hlist_empty(&vcpu->kvm->irq_ack_notifier_list)) {
......
This diff is collapsed.
......@@ -258,6 +258,28 @@ static int kvmppc_h_pr_put_tce(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
static int kvmppc_h_pr_logical_ci_load(struct kvm_vcpu *vcpu)
{
long rc;
rc = kvmppc_h_logical_ci_load(vcpu);
if (rc == H_TOO_HARD)
return EMULATE_FAIL;
kvmppc_set_gpr(vcpu, 3, rc);
return EMULATE_DONE;
}
static int kvmppc_h_pr_logical_ci_store(struct kvm_vcpu *vcpu)
{
long rc;
rc = kvmppc_h_logical_ci_store(vcpu);
if (rc == H_TOO_HARD)
return EMULATE_FAIL;
kvmppc_set_gpr(vcpu, 3, rc);
return EMULATE_DONE;
}
static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{
long rc = kvmppc_xics_hcall(vcpu, cmd);
......@@ -290,6 +312,10 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
vcpu->stat.halt_wakeup++;
return EMULATE_DONE;
case H_LOGICAL_CI_LOAD:
return kvmppc_h_pr_logical_ci_load(vcpu);
case H_LOGICAL_CI_STORE:
return kvmppc_h_pr_logical_ci_store(vcpu);
case H_XIRR:
case H_CPPR:
case H_EOI:
......@@ -323,6 +349,8 @@ int kvmppc_hcall_impl_pr(unsigned long cmd)
case H_BULK_REMOVE:
case H_PUT_TCE:
case H_CEDE:
case H_LOGICAL_CI_LOAD:
case H_LOGICAL_CI_STORE:
#ifdef CONFIG_KVM_XICS
case H_XIRR:
case H_CPPR:
......
......@@ -20,6 +20,7 @@
#include <asm/xics.h>
#include <asm/debug.h>
#include <asm/time.h>
#include <asm/spinlock.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
......@@ -39,7 +40,7 @@
* LOCKING
* =======
*
* Each ICS has a mutex protecting the information about the IRQ
* Each ICS has a spin lock protecting the information about the IRQ
* sources and avoiding simultaneous deliveries if the same interrupt.
*
* ICP operations are done via a single compare & swap transaction
......@@ -109,7 +110,10 @@ static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
{
int i;
mutex_lock(&ics->lock);
unsigned long flags;
local_irq_save(flags);
arch_spin_lock(&ics->lock);
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
struct ics_irq_state *state = &ics->irq_state[i];
......@@ -120,12 +124,15 @@ static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
XICS_DBG("resend %#x prio %#x\n", state->number,
state->priority);
mutex_unlock(&ics->lock);
arch_spin_unlock(&ics->lock);
local_irq_restore(flags);
icp_deliver_irq(xics, icp, state->number);
mutex_lock(&ics->lock);
local_irq_save(flags);
arch_spin_lock(&ics->lock);
}
mutex_unlock(&ics->lock);
arch_spin_unlock(&ics->lock);
local_irq_restore(flags);
}
static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
......@@ -133,8 +140,10 @@ static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
u32 server, u32 priority, u32 saved_priority)
{
bool deliver;
unsigned long flags;
mutex_lock(&ics->lock);
local_irq_save(flags);
arch_spin_lock(&ics->lock);
state->server = server;
state->priority = priority;
......@@ -145,7 +154,8 @@ static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics,
deliver = true;
}
mutex_unlock(&ics->lock);
arch_spin_unlock(&ics->lock);
local_irq_restore(flags);
return deliver;
}
......@@ -186,6 +196,7 @@ int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
struct kvmppc_ics *ics;
struct ics_irq_state *state;
u16 src;
unsigned long flags;
if (!xics)
return -ENODEV;
......@@ -195,10 +206,12 @@ int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority)
return -EINVAL;
state = &ics->irq_state[src];
mutex_lock(&ics->lock);
local_irq_save(flags);
arch_spin_lock(&ics->lock);
*server = state->server;
*priority = state->priority;
mutex_unlock(&ics->lock);
arch_spin_unlock(&ics->lock);
local_irq_restore(flags);
return 0;
}
......@@ -365,6 +378,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
struct kvmppc_ics *ics;
u32 reject;
u16 src;
unsigned long flags;
/*
* This is used both for initial delivery of an interrupt and
......@@ -391,7 +405,8 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
state = &ics->irq_state[src];
/* Get a lock on the ICS */
mutex_lock(&ics->lock);
local_irq_save(flags);
arch_spin_lock(&ics->lock);
/* Get our server */
if (!icp || state->server != icp->server_num) {
......@@ -434,7 +449,7 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
*
* Note that if successful, the new delivery might have itself
* rejected an interrupt that was "delivered" before we took the
* icp mutex.
* ics spin lock.
*
* In this case we do the whole sequence all over again for the
* new guy. We cannot assume that the rejected interrupt is less
......@@ -448,7 +463,8 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
* Delivery was successful, did we reject somebody else ?
*/
if (reject && reject != XICS_IPI) {
mutex_unlock(&ics->lock);
arch_spin_unlock(&ics->lock);
local_irq_restore(flags);
new_irq = reject;
goto again;
}
......@@ -468,12 +484,14 @@ static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
*/
smp_mb();
if (!icp->state.need_resend) {
mutex_unlock(&ics->lock);
arch_spin_unlock(&ics->lock);
local_irq_restore(flags);
goto again;
}
}
out:
mutex_unlock(&ics->lock);
arch_spin_unlock(&ics->lock);
local_irq_restore(flags);
}
static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
......@@ -802,14 +820,22 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n",
hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt);
if (icp->rm_action & XICS_RM_KICK_VCPU)
if (icp->rm_action & XICS_RM_KICK_VCPU) {
icp->n_rm_kick_vcpu++;
kvmppc_fast_vcpu_kick(icp->rm_kick_target);
if (icp->rm_action & XICS_RM_CHECK_RESEND)
}
if (icp->rm_action & XICS_RM_CHECK_RESEND) {
icp->n_rm_check_resend++;
icp_check_resend(xics, icp->rm_resend_icp);
if (icp->rm_action & XICS_RM_REJECT)
}
if (icp->rm_action & XICS_RM_REJECT) {
icp->n_rm_reject++;
icp_deliver_irq(xics, icp, icp->rm_reject);
if (icp->rm_action & XICS_RM_NOTIFY_EOI)
}
if (icp->rm_action & XICS_RM_NOTIFY_EOI) {
icp->n_rm_notify_eoi++;
kvm_notify_acked_irq(vcpu->kvm, 0, icp->rm_eoied_irq);
}
icp->rm_action = 0;
......@@ -872,10 +898,21 @@ static int xics_debug_show(struct seq_file *m, void *private)
struct kvm *kvm = xics->kvm;
struct kvm_vcpu *vcpu;
int icsid, i;
unsigned long flags;
unsigned long t_rm_kick_vcpu, t_rm_check_resend;
unsigned long t_rm_reject, t_rm_notify_eoi;
unsigned long t_reject, t_check_resend;
if (!kvm)
return 0;
t_rm_kick_vcpu = 0;
t_rm_notify_eoi = 0;
t_rm_check_resend = 0;
t_rm_reject = 0;
t_check_resend = 0;
t_reject = 0;
seq_printf(m, "=========\nICP state\n=========\n");
kvm_for_each_vcpu(i, vcpu, kvm) {
......@@ -890,8 +927,19 @@ static int xics_debug_show(struct seq_file *m, void *private)
icp->server_num, state.xisr,
state.pending_pri, state.cppr, state.mfrr,
state.out_ee, state.need_resend);
t_rm_kick_vcpu += icp->n_rm_kick_vcpu;
t_rm_notify_eoi += icp->n_rm_notify_eoi;
t_rm_check_resend += icp->n_rm_check_resend;
t_rm_reject += icp->n_rm_reject;
t_check_resend += icp->n_check_resend;
t_reject += icp->n_reject;
}
seq_printf(m, "ICP Guest->Host totals: kick_vcpu=%lu check_resend=%lu reject=%lu notify_eoi=%lu\n",
t_rm_kick_vcpu, t_rm_check_resend,
t_rm_reject, t_rm_notify_eoi);
seq_printf(m, "ICP Real Mode totals: check_resend=%lu resend=%lu\n",
t_check_resend, t_reject);
for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) {
struct kvmppc_ics *ics = xics->ics[icsid];
......@@ -901,7 +949,8 @@ static int xics_debug_show(struct seq_file *m, void *private)
seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n",
icsid);
mutex_lock(&ics->lock);
local_irq_save(flags);
arch_spin_lock(&ics->lock);
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
struct ics_irq_state *irq = &ics->irq_state[i];
......@@ -912,7 +961,8 @@ static int xics_debug_show(struct seq_file *m, void *private)
irq->resend, irq->masked_pending);
}
mutex_unlock(&ics->lock);
arch_spin_unlock(&ics->lock);
local_irq_restore(flags);
}
return 0;
}
......@@ -965,7 +1015,6 @@ static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm,
if (!ics)
goto out;
mutex_init(&ics->lock);
ics->icsid = icsid;
for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
......@@ -1107,13 +1156,15 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
u64 __user *ubufp = (u64 __user *) addr;
u16 idx;
u64 val, prio;
unsigned long flags;
ics = kvmppc_xics_find_ics(xics, irq, &idx);
if (!ics)
return -ENOENT;
irqp = &ics->irq_state[idx];
mutex_lock(&ics->lock);
local_irq_save(flags);
arch_spin_lock(&ics->lock);
ret = -ENOENT;
if (irqp->exists) {
val = irqp->server;
......@@ -1129,7 +1180,8 @@ static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr)
val |= KVM_XICS_PENDING;
ret = 0;
}
mutex_unlock(&ics->lock);
arch_spin_unlock(&ics->lock);
local_irq_restore(flags);
if (!ret && put_user(val, ubufp))
ret = -EFAULT;
......@@ -1146,6 +1198,7 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
u64 val;
u8 prio;
u32 server;
unsigned long flags;
if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
return -ENOENT;
......@@ -1166,7 +1219,8 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
kvmppc_xics_find_server(xics->kvm, server) == NULL)
return -EINVAL;
mutex_lock(&ics->lock);
local_irq_save(flags);
arch_spin_lock(&ics->lock);
irqp->server = server;
irqp->saved_priority = prio;
if (val & KVM_XICS_MASKED)
......@@ -1178,7 +1232,8 @@ static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr)
if ((val & KVM_XICS_PENDING) && (val & KVM_XICS_LEVEL_SENSITIVE))
irqp->asserted = 1;
irqp->exists = 1;
mutex_unlock(&ics->lock);
arch_spin_unlock(&ics->lock);
local_irq_restore(flags);
if (val & KVM_XICS_PENDING)
icp_deliver_irq(xics, NULL, irqp->number);
......
......@@ -78,13 +78,22 @@ struct kvmppc_icp {
u32 rm_reject;
u32 rm_eoied_irq;
/* Counters for each reason we exited real mode */
unsigned long n_rm_kick_vcpu;
unsigned long n_rm_check_resend;
unsigned long n_rm_reject;
unsigned long n_rm_notify_eoi;
/* Counters for handling ICP processing in real mode */
unsigned long n_check_resend;
unsigned long n_reject;
/* Debug stuff for real mode */
union kvmppc_icp_state rm_dbgstate;
struct kvm_vcpu *rm_dbgtgt;
};
struct kvmppc_ics {
struct mutex lock;
arch_spinlock_t lock;
u16 icsid;
struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
};
......@@ -96,6 +105,8 @@ struct kvmppc_xics {
u32 max_icsid;
bool real_mode;
bool real_mode_dbg;
u32 err_noics;
u32 err_noicp;
struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
};
......
......@@ -529,6 +529,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PPC_RMA:
r = 0;
break;
case KVM_CAP_PPC_HWRNG:
r = kvmppc_hwrng_present();
break;
#endif
case KVM_CAP_SYNC_MMU:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
......
......@@ -24,12 +24,22 @@
struct powernv_rng {
void __iomem *regs;
void __iomem *regs_real;
unsigned long mask;
};
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
int powernv_hwrng_present(void)
{
struct powernv_rng *rng;
rng = get_cpu_var(powernv_rng);
put_cpu_var(rng);
return rng != NULL;
}
static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
{
unsigned long parity;
......@@ -46,6 +56,17 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
return val;
}
int powernv_get_random_real_mode(unsigned long *v)
{
struct powernv_rng *rng;
rng = raw_cpu_read(powernv_rng);
*v = rng_whiten(rng, in_rm64(rng->regs_real));
return 1;
}
int powernv_get_random_long(unsigned long *v)
{
struct powernv_rng *rng;
......@@ -80,12 +101,20 @@ static __init void rng_init_per_cpu(struct powernv_rng *rng,
static __init int rng_create(struct device_node *dn)
{
struct powernv_rng *rng;
struct resource res;
unsigned long val;
rng = kzalloc(sizeof(*rng), GFP_KERNEL);
if (!rng)
return -ENOMEM;
if (of_address_to_resource(dn, 0, &res)) {
kfree(rng);
return -ENXIO;
}
rng->regs_real = (void __iomem *)res.start;
rng->regs = of_iomap(dn, 0);
if (!rng->regs) {
kfree(rng);
......
......@@ -110,7 +110,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
/* upper facilities limit for kvm */
unsigned long kvm_s390_fac_list_mask[] = {
0xffe6fffbfcfdfc40UL,
0x205c800000000000UL,
0x005c800000000000UL,
};
unsigned long kvm_s390_fac_list_mask_size(void)
......
......@@ -683,8 +683,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
unsigned long bitmap = 1;
struct kvm_lapic **dst;
int i;
bool ret = false;
bool x2apic_ipi = src && apic_x2apic_mode(src);
bool ret, x2apic_ipi;
*r = -1;
......@@ -696,16 +695,18 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
if (irq->shorthand)
return false;
x2apic_ipi = src && apic_x2apic_mode(src);
if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
return false;
ret = true;
rcu_read_lock();
map = rcu_dereference(kvm->arch.apic_map);
if (!map)
if (!map) {
ret = false;
goto out;
ret = true;
}
if (irq->dest_mode == APIC_DEST_PHYSICAL) {
if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
......
......@@ -4481,9 +4481,11 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
pfn = spte_to_pfn(*sptep);
/*
* Only EPT supported for now; otherwise, one would need to
* find out efficiently whether the guest page tables are
* also using huge pages.
* We cannot do huge page mapping for indirect shadow pages,
* which are found on the last rmap (level = 1) when not using
* tdp; such shadow pages are synced with the page table in
* the guest, and the guest page table is using 4K page size
* mapping if the indirect sp has level = 1.
*/
if (sp->role.direct &&
!kvm_is_reserved_pfn(pfn) &&
......@@ -4504,19 +4506,12 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
bool flush = false;
unsigned long *rmapp;
unsigned long last_index, index;
gfn_t gfn_start, gfn_end;
spin_lock(&kvm->mmu_lock);
gfn_start = memslot->base_gfn;
gfn_end = memslot->base_gfn + memslot->npages - 1;
if (gfn_start >= gfn_end)
goto out;
rmapp = memslot->arch.rmap[0];
last_index = gfn_to_index(gfn_end, memslot->base_gfn,
PT_PAGE_TABLE_LEVEL);
last_index = gfn_to_index(memslot->base_gfn + memslot->npages - 1,
memslot->base_gfn, PT_PAGE_TABLE_LEVEL);
for (index = 0; index <= last_index; ++index, ++rmapp) {
if (*rmapp)
......@@ -4534,7 +4529,6 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
if (flush)
kvm_flush_remote_tlbs(kvm);
out:
spin_unlock(&kvm->mmu_lock);
}
......
......@@ -3622,8 +3622,16 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
unsigned long hw_cr4 = cr4 | (to_vmx(vcpu)->rmode.vm86_active ?
KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
/*
* Pass through host's Machine Check Enable value to hw_cr4, which
* is in force while we are in guest mode. Do not let guests control
* this bit, even if host CR4.MCE == 0.
*/
unsigned long hw_cr4 =
(cr4_read_shadow() & X86_CR4_MCE) |
(cr4 & ~X86_CR4_MCE) |
(to_vmx(vcpu)->rmode.vm86_active ?
KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
if (cr4 & X86_CR4_VMXE) {
/*
......
......@@ -5799,7 +5799,6 @@ int kvm_arch_init(void *opaque)
kvm_set_mmio_spte_mask();
kvm_x86_ops = ops;
kvm_init_msr_list();
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
PT_DIRTY_MASK, PT64_NX_MASK, 0);
......@@ -7253,7 +7252,14 @@ void kvm_arch_hardware_disable(void)
int kvm_arch_hardware_setup(void)
{
return kvm_x86_ops->hardware_setup();
int r;
r = kvm_x86_ops->hardware_setup();
if (r != 0)
return r;
kvm_init_msr_list();
return 0;
}
void kvm_arch_hardware_unsetup(void)
......
......@@ -813,6 +813,7 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_MIPS_MSA 112
#define KVM_CAP_S390_INJECT_IRQ 113
#define KVM_CAP_S390_IRQ_STATE 114
#define KVM_CAP_PPC_HWRNG 115
#ifdef KVM_CAP_IRQ_ROUTING
......
......@@ -1561,6 +1561,9 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
goto out;
}
if (irq_num >= kvm->arch.vgic.nr_irqs)
return -EINVAL;
vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
if (vcpu_id >= 0) {
/* kick the specified vcpu */
......@@ -2141,7 +2144,7 @@ int kvm_irq_map_gsi(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *entries,
int gsi)
{
return gsi;
return 0;
}
int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
......
......@@ -89,6 +89,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
static __read_mostly struct preempt_ops kvm_preempt_ops;
struct dentry *kvm_debugfs_dir;
EXPORT_SYMBOL_GPL(kvm_debugfs_dir);
static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment