Commit 2c4aa55a authored by Paolo Bonzini's avatar Paolo Bonzini

Merge tag 'signed-kvm-ppc-next' of git://github.com/agraf/linux-2.6 into HEAD

Patch queue for ppc - 2014-12-18

Highights this time around:

  - Removal of HV support for 970. It became a maintenance burden and received
    practically no testing. POWER8 with HV is available now, so just grab one
    of those boxes if PR isn't enough for you.
  - Some bug fixes and performance improvements
  - Tracepoints for book3s_hv
parents cb5281a5 476ce5ef
......@@ -170,8 +170,6 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
unsigned long *nb_ret);
extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
unsigned long gpa, bool dirty);
extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel);
extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel,
pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
......
......@@ -37,7 +37,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
extern unsigned long kvm_rma_pages;
#endif
#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
......@@ -148,7 +147,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
/* This covers 14..54 bits of va*/
rb = (v & ~0x7fUL) << 16; /* AVA field */
rb |= v >> (62 - 8); /* B field */
rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */
/*
* AVA in v had cleared lower 23 bits. We need to derive
* that from pteg index
......
......@@ -180,11 +180,6 @@ struct kvmppc_spapr_tce_table {
struct page *pages[0];
};
struct kvm_rma_info {
atomic_t use_count;
unsigned long base_pfn;
};
/* XICS components, defined in book3s_xics.c */
struct kvmppc_xics;
struct kvmppc_icp;
......@@ -214,16 +209,9 @@ struct revmap_entry {
#define KVMPPC_RMAP_PRESENT 0x100000000ul
#define KVMPPC_RMAP_INDEX 0xfffffffful
/* Low-order bits in memslot->arch.slot_phys[] */
#define KVMPPC_PAGE_ORDER_MASK 0x1f
#define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */
#define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */
#define KVMPPC_GOT_PAGE 0x80
struct kvm_arch_memory_slot {
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
unsigned long *rmap;
unsigned long *slot_phys;
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
};
......@@ -242,14 +230,12 @@ struct kvm_arch {
struct kvm_rma_info *rma;
unsigned long vrma_slb_v;
int rma_setup_done;
int using_mmu_notifiers;
u32 hpt_order;
atomic_t vcpus_running;
u32 online_vcores;
unsigned long hpt_npte;
unsigned long hpt_mask;
atomic_t hpte_mod_interest;
spinlock_t slot_phys_lock;
cpumask_t need_tlb_flush;
int hpt_cma_alloc;
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
......@@ -297,6 +283,7 @@ struct kvmppc_vcore {
struct list_head runnable_threads;
spinlock_t lock;
wait_queue_head_t wq;
spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
u64 stolen_tb;
u64 preempt_tb;
struct kvm_vcpu *runner;
......@@ -308,6 +295,7 @@ struct kvmppc_vcore {
ulong dpdes; /* doorbell state (POWER8) */
void *mpp_buffer; /* Micro Partition Prefetch buffer */
bool mpp_buffer_is_valid;
ulong conferring_threads;
};
#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff)
......@@ -664,6 +652,8 @@ struct kvm_vcpu_arch {
spinlock_t tbacct_lock;
u64 busy_stolen;
u64 busy_preempt;
u32 emul_inst;
#endif
};
......
......@@ -170,8 +170,6 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce);
extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba);
extern struct kvm_rma_info *kvm_alloc_rma(void);
extern void kvm_release_rma(struct kvm_rma_info *ri);
extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
extern int kvmppc_core_init_vm(struct kvm *kvm);
......
......@@ -489,7 +489,6 @@ int main(void)
DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
......@@ -499,6 +498,7 @@ int main(void)
DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst));
#endif
#ifdef CONFIG_PPC_BOOK3S
DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
......
......@@ -172,6 +172,7 @@ config KVM_XICS
depends on KVM_BOOK3S_64 && !KVM_MPIC
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
default y
---help---
Include support for the XICS (eXternal Interrupt Controller
Specification) interrupt controller architecture used on
......
......@@ -64,14 +64,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
{
}
void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
{
}
void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
......
......@@ -78,11 +78,6 @@ static inline bool sr_kp(u32 sr_raw)
return (sr_raw & 0x20000000) ? true: false;
}
static inline bool sr_nx(u32 sr_raw)
{
return (sr_raw & 0x10000000) ? true: false;
}
static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *pte, bool data,
bool iswrite);
......
This diff is collapsed.
This diff is collapsed.
......@@ -17,6 +17,7 @@
#include <linux/memblock.h>
#include <linux/sizes.h>
#include <linux/cma.h>
#include <linux/bitops.h>
#include <asm/cputable.h>
#include <asm/kvm_ppc.h>
......@@ -33,95 +34,9 @@
* By default we reserve 5% of memory for hash pagetable allocation.
*/
static unsigned long kvm_cma_resv_ratio = 5;
/*
* We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
* Each RMA has to be physically contiguous and of a size that the
* hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB,
* and other larger sizes. Since we are unlikely to be allocate that
* much physically contiguous memory after the system is up and running,
* we preallocate a set of RMAs in early boot using CMA.
* should be power of 2.
*/
unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */
EXPORT_SYMBOL_GPL(kvm_rma_pages);
static struct cma *kvm_cma;
/* Work out RMLS (real mode limit selector) field value for a given RMA size.
Assumes POWER7 or PPC970. */
static inline int lpcr_rmls(unsigned long rma_size)
{
switch (rma_size) {
case 32ul << 20: /* 32 MB */
if (cpu_has_feature(CPU_FTR_ARCH_206))
return 8; /* only supported on POWER7 */
return -1;
case 64ul << 20: /* 64 MB */
return 3;
case 128ul << 20: /* 128 MB */
return 7;
case 256ul << 20: /* 256 MB */
return 4;
case 1ul << 30: /* 1 GB */
return 2;
case 16ul << 30: /* 16 GB */
return 1;
case 256ul << 30: /* 256 GB */
return 0;
default:
return -1;
}
}
static int __init early_parse_rma_size(char *p)
{
unsigned long kvm_rma_size;
pr_debug("%s(%s)\n", __func__, p);
if (!p)
return -EINVAL;
kvm_rma_size = memparse(p, &p);
/*
* Check that the requested size is one supported in hardware
*/
if (lpcr_rmls(kvm_rma_size) < 0) {
pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
return -EINVAL;
}
kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
return 0;
}
early_param("kvm_rma_size", early_parse_rma_size);
struct kvm_rma_info *kvm_alloc_rma()
{
struct page *page;
struct kvm_rma_info *ri;
ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
if (!ri)
return NULL;
page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages));
if (!page)
goto err_out;
atomic_set(&ri->use_count, 1);
ri->base_pfn = page_to_pfn(page);
return ri;
err_out:
kfree(ri);
return NULL;
}
EXPORT_SYMBOL_GPL(kvm_alloc_rma);
void kvm_release_rma(struct kvm_rma_info *ri)
{
if (atomic_dec_and_test(&ri->use_count)) {
cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
kfree(ri);
}
}
EXPORT_SYMBOL_GPL(kvm_release_rma);
static int __init early_parse_kvm_cma_resv(char *p)
{
pr_debug("%s(%s)\n", __func__, p);
......@@ -133,14 +48,9 @@ early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
struct page *kvm_alloc_hpt(unsigned long nr_pages)
{
unsigned long align_pages = HPT_ALIGN_PAGES;
VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
/* Old CPUs require HPT aligned on a multiple of its size */
if (!cpu_has_feature(CPU_FTR_ARCH_206))
align_pages = nr_pages;
return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages));
return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES));
}
EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
......@@ -181,21 +91,43 @@ void __init kvm_cma_reserve(void)
if (selected_size) {
pr_debug("%s: reserving %ld MiB for global area\n", __func__,
(unsigned long)selected_size / SZ_1M);
/*
* Old CPUs require HPT aligned on a multiple of its size. So for them
* make the alignment as max size we could request.
*/
if (!cpu_has_feature(CPU_FTR_ARCH_206))
align_size = __rounddown_pow_of_two(selected_size);
else
align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
cma_declare_contiguous(0, selected_size, 0, align_size,
KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
}
}
/*
* Real-mode H_CONFER implementation.
* We check if we are the only vcpu out of this virtual core
* still running in the guest and not ceded. If so, we pop up
* to the virtual-mode implementation; if not, just return to
* the guest.
*/
long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
unsigned int yield_count)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
int threads_running;
int threads_ceded;
int threads_conferring;
u64 stop = get_tb() + 10 * tb_ticks_per_usec;
int rv = H_SUCCESS; /* => don't yield */
set_bit(vcpu->arch.ptid, &vc->conferring_threads);
while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) {
threads_running = VCORE_ENTRY_COUNT(vc);
threads_ceded = hweight32(vc->napping_threads);
threads_conferring = hweight32(vc->conferring_threads);
if (threads_ceded + threads_conferring >= threads_running) {
rv = H_TOO_HARD; /* => do yield */
break;
}
}
clear_bit(vcpu->arch.ptid, &vc->conferring_threads);
return rv;
}
/*
* When running HV mode KVM we need to block certain operations while KVM VMs
* exist in the system. We use a counter of VMs to track this.
......
......@@ -52,10 +52,8 @@ _GLOBAL(__kvmppc_vcore_entry)
std r3, _CCR(r1)
/* Save host DSCR */
BEGIN_FTR_SECTION
mfspr r3, SPRN_DSCR
std r3, HSTATE_DSCR(r13)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
BEGIN_FTR_SECTION
/* Save host DABR */
......@@ -84,11 +82,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mfspr r7, SPRN_MMCR0 /* save MMCR0 */
mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
mfspr r6, SPRN_MMCRA
BEGIN_FTR_SECTION
/* On P7, clear MMCRA in order to disable SDAR updates */
/* Clear MMCRA in order to disable SDAR updates */
li r5, 0
mtspr SPRN_MMCRA, r5
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
isync
ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
lbz r5, LPPACA_PMCINUSE(r3)
......@@ -113,20 +109,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mfspr r7, SPRN_PMC4
mfspr r8, SPRN_PMC5
mfspr r9, SPRN_PMC6
BEGIN_FTR_SECTION
mfspr r10, SPRN_PMC7
mfspr r11, SPRN_PMC8
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
stw r3, HSTATE_PMC(r13)
stw r5, HSTATE_PMC + 4(r13)
stw r6, HSTATE_PMC + 8(r13)
stw r7, HSTATE_PMC + 12(r13)
stw r8, HSTATE_PMC + 16(r13)
stw r9, HSTATE_PMC + 20(r13)
BEGIN_FTR_SECTION
stw r10, HSTATE_PMC + 24(r13)
stw r11, HSTATE_PMC + 28(r13)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
31:
/*
......@@ -140,31 +128,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
add r8,r8,r7
std r8,HSTATE_DECEXP(r13)
#ifdef CONFIG_SMP
/*
* On PPC970, if the guest vcpu has an external interrupt pending,
* send ourselves an IPI so as to interrupt the guest once it
* enables interrupts. (It must have interrupts disabled,
* otherwise we would already have delivered the interrupt.)
*
* XXX If this is a UP build, smp_send_reschedule is not available,
* so the interrupt will be delayed until the next time the vcpu
* enters the guest with interrupts enabled.
*/
BEGIN_FTR_SECTION
ld r4, HSTATE_KVM_VCPU(r13)
ld r0, VCPU_PENDING_EXC(r4)
li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
and. r0, r0, r7
beq 32f
lhz r3, PACAPACAINDEX(r13)
bl smp_send_reschedule
nop
32:
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
#endif /* CONFIG_SMP */
/* Jump to partition switch code */
bl kvmppc_hv_entry_trampoline
nop
......
......@@ -138,8 +138,5 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
{
if (cpu_has_feature(CPU_FTR_ARCH_206))
return kvmppc_realmode_mc_power7(vcpu);
return 0;
}
......@@ -45,16 +45,12 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags)
* as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
* we can use tlbiel as long as we mark all other physical
* cores as potentially having stale TLB entries for this lpid.
* If we're not using MMU notifiers, we never take pages away
* from the guest, so we can use tlbiel if requested.
* Otherwise, don't use tlbiel.
*/
if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
global = 0;
else if (kvm->arch.using_mmu_notifiers)
global = 1;
else
global = !(flags & H_LOCAL);
global = 1;
if (!global) {
/* any other core might now have stale TLB entries... */
......@@ -170,7 +166,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
struct revmap_entry *rev;
unsigned long g_ptel;
struct kvm_memory_slot *memslot;
unsigned long *physp, pte_size;
unsigned long pte_size;
unsigned long is_io;
unsigned long *rmap;
pte_t pte;
......@@ -198,9 +194,6 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
is_io = ~0ul;
rmap = NULL;
if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
/* PPC970 can't do emulated MMIO */
if (!cpu_has_feature(CPU_FTR_ARCH_206))
return H_PARAMETER;
/* Emulated MMIO - mark this with key=31 */
pteh |= HPTE_V_ABSENT;
ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
......@@ -213,28 +206,12 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
slot_fn = gfn - memslot->base_gfn;
rmap = &memslot->arch.rmap[slot_fn];
if (!kvm->arch.using_mmu_notifiers) {
physp = memslot->arch.slot_phys;
if (!physp)
return H_PARAMETER;
physp += slot_fn;
if (realmode)
physp = real_vmalloc_addr(physp);
pa = *physp;
if (!pa)
return H_TOO_HARD;
is_io = pa & (HPTE_R_I | HPTE_R_W);
pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
pa &= PAGE_MASK;
pa |= gpa & ~PAGE_MASK;
} else {
/* Translate to host virtual address */
hva = __gfn_to_hva_memslot(memslot, gfn);
/* Look up the Linux PTE for the backing page */
pte_size = psize;
pte = lookup_linux_pte_and_update(pgdir, hva, writing,
&pte_size);
pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size);
if (pte_present(pte) && !pte_numa(pte)) {
if (writing && !pte_write(pte))
/* make the actual HPTE be read-only */
......@@ -244,7 +221,6 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
pa |= hva & (pte_size - 1);
pa |= gpa & ~PAGE_MASK;
}
}
if (pte_size < psize)
return H_PARAMETER;
......@@ -337,8 +313,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
rmap = real_vmalloc_addr(rmap);
lock_rmap(rmap);
/* Check for pending invalidations under the rmap chain lock */
if (kvm->arch.using_mmu_notifiers &&
mmu_notifier_retry(kvm, mmu_seq)) {
if (mmu_notifier_retry(kvm, mmu_seq)) {
/* inval in progress, write a non-present HPTE */
pteh |= HPTE_V_ABSENT;
pteh &= ~HPTE_V_VALID;
......@@ -395,61 +370,11 @@ static inline int try_lock_tlbie(unsigned int *lock)
return old == 0;
}
/*
* tlbie/tlbiel is a bit different on the PPC970 compared to later
* processors such as POWER7; the large page bit is in the instruction
* not RB, and the top 16 bits and the bottom 12 bits of the VA
* in RB must be 0.
*/
static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues,
long npages, int global, bool need_sync)
{
long i;
if (global) {
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
if (need_sync)
asm volatile("ptesync" : : : "memory");
for (i = 0; i < npages; ++i) {
unsigned long rb = rbvalues[i];
if (rb & 1) /* large page */
asm volatile("tlbie %0,1" : :
"r" (rb & 0x0000fffffffff000ul));
else
asm volatile("tlbie %0,0" : :
"r" (rb & 0x0000fffffffff000ul));
}
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
} else {
if (need_sync)
asm volatile("ptesync" : : : "memory");
for (i = 0; i < npages; ++i) {
unsigned long rb = rbvalues[i];
if (rb & 1) /* large page */
asm volatile("tlbiel %0,1" : :
"r" (rb & 0x0000fffffffff000ul));
else
asm volatile("tlbiel %0,0" : :
"r" (rb & 0x0000fffffffff000ul));
}
asm volatile("ptesync" : : : "memory");
}
}
static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
long npages, int global, bool need_sync)
{
long i;
if (cpu_has_feature(CPU_FTR_ARCH_201)) {
/* PPC970 tlbie instruction is a bit different */
do_tlbies_970(kvm, rbvalues, npages, global, need_sync);
return;
}
if (global) {
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
......@@ -667,40 +592,29 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
rev->guest_rpte = r;
note_hpte_modification(kvm, rev);
}
r = (be64_to_cpu(hpte[1]) & ~mask) | bits;
/* Update HPTE */
if (v & HPTE_V_VALID) {
rb = compute_tlbie_rb(v, r, pte_index);
hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID);
do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
/*
* If the host has this page as readonly but the guest
* wants to make it read/write, reduce the permissions.
* Checking the host permissions involves finding the
* memslot and then the Linux PTE for the page.
* If the page is valid, don't let it transition from
* readonly to writable. If it should be writable, we'll
* take a trap and let the page fault code sort it out.
*/
if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
unsigned long psize, gfn, hva;
struct kvm_memory_slot *memslot;
pgd_t *pgdir = vcpu->arch.pgdir;
pte_t pte;
psize = hpte_page_size(v, r);
gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
if (memslot) {
hva = __gfn_to_hva_memslot(memslot, gfn);
pte = lookup_linux_pte_and_update(pgdir, hva,
1, &psize);
if (pte_present(pte) && !pte_write(pte))
pte = be64_to_cpu(hpte[1]);
r = (pte & ~mask) | bits;
if (hpte_is_writable(r) && !hpte_is_writable(pte))
r = hpte_make_readonly(r);
/* If the PTE is changing, invalidate it first */
if (r != pte) {
rb = compute_tlbie_rb(v, r, pte_index);
hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) |
HPTE_V_ABSENT);
do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
true);
hpte[1] = cpu_to_be64(r);
}
}
}
hpte[1] = cpu_to_be64(r);
eieio();
hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK);
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
asm volatile("ptesync" : : : "memory");
return H_SUCCESS;
}
......
......@@ -183,8 +183,10 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
* state update in HW (ie bus transactions) so we can handle them
* separately here as well.
*/
if (resend)
if (resend) {
icp->rm_action |= XICS_RM_CHECK_RESEND;
icp->rm_resend_icp = icp;
}
}
......@@ -254,10 +256,25 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
* nothing needs to be done as there can be no XISR to
* reject.
*
* ICP state: Check_IPI
*
* If the CPPR is less favored, then we might be replacing
* an interrupt, and thus need to possibly reject it as in
* an interrupt, and thus need to possibly reject it.
*
* ICP state: Check_IPI
* ICP State: IPI
*
* Besides rejecting any pending interrupts, we also
* update XISR and pending_pri to mark IPI as pending.
*
* PAPR does not describe this state, but if the MFRR is being
* made less favored than its earlier value, there might be
* a previously-rejected interrupt needing to be resent.
* Ideally, we would want to resend only if
* prio(pending_interrupt) < mfrr &&
* prio(pending_interrupt) < cppr
* where pending interrupt is the one that was rejected. But
* we don't have that state, so we simply trigger a resend
* whenever the MFRR is made less favored.
*/
do {
old_state = new_state = ACCESS_ONCE(icp->state);
......@@ -270,13 +287,14 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
resend = false;
if (mfrr < new_state.cppr) {
/* Reject a pending interrupt if not an IPI */
if (mfrr <= new_state.pending_pri)
if (mfrr <= new_state.pending_pri) {
reject = new_state.xisr;
new_state.pending_pri = mfrr;
new_state.xisr = XICS_IPI;
}
}
if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
if (mfrr > old_state.mfrr) {
resend = new_state.need_resend;
new_state.need_resend = 0;
}
......@@ -289,8 +307,10 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
}
/* Pass resends to virtual mode */
if (resend)
if (resend) {
this_icp->rm_action |= XICS_RM_CHECK_RESEND;
this_icp->rm_resend_icp = icp;
}
return check_too_hard(xics, this_icp);
}
......
This diff is collapsed.
......@@ -352,14 +352,6 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb)
return kvmppc_get_field(inst, msb + 32, lsb + 32);
}
/*
* Replaces inst bits with ordering according to spec.
*/
static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value)
{
return kvmppc_set_field(inst, msb + 32, lsb + 32, value);
}
bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
{
if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
......
......@@ -644,11 +644,6 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
return r;
}
static inline int get_fpr_index(int i)
{
return i * TS_FPRWIDTH;
}
/* Give up external provider (FPU, Altivec, VSX) */
void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
{
......
......@@ -613,10 +613,25 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
* there might be a previously-rejected interrupt needing
* to be resent.
*
* ICP state: Check_IPI
*
* If the CPPR is less favored, then we might be replacing
* an interrupt, and thus need to possibly reject it as in
* an interrupt, and thus need to possibly reject it.
*
* ICP state: Check_IPI
* ICP State: IPI
*
* Besides rejecting any pending interrupts, we also
* update XISR and pending_pri to mark IPI as pending.
*
* PAPR does not describe this state, but if the MFRR is being
* made less favored than its earlier value, there might be
* a previously-rejected interrupt needing to be resent.
* Ideally, we would want to resend only if
* prio(pending_interrupt) < mfrr &&
* prio(pending_interrupt) < cppr
* where pending interrupt is the one that was rejected. But
* we don't have that state, so we simply trigger a resend
* whenever the MFRR is made less favored.
*/
do {
old_state = new_state = ACCESS_ONCE(icp->state);
......@@ -629,13 +644,14 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
resend = false;
if (mfrr < new_state.cppr) {
/* Reject a pending interrupt if not an IPI */
if (mfrr <= new_state.pending_pri)
if (mfrr <= new_state.pending_pri) {
reject = new_state.xisr;
new_state.pending_pri = mfrr;
new_state.xisr = XICS_IPI;
}
}
if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
if (mfrr > old_state.mfrr) {
resend = new_state.need_resend;
new_state.need_resend = 0;
}
......@@ -789,7 +805,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
if (icp->rm_action & XICS_RM_KICK_VCPU)
kvmppc_fast_vcpu_kick(icp->rm_kick_target);
if (icp->rm_action & XICS_RM_CHECK_RESEND)
icp_check_resend(xics, icp);
icp_check_resend(xics, icp->rm_resend_icp);
if (icp->rm_action & XICS_RM_REJECT)
icp_deliver_irq(xics, icp, icp->rm_reject);
if (icp->rm_action & XICS_RM_NOTIFY_EOI)
......
......@@ -74,6 +74,7 @@ struct kvmppc_icp {
#define XICS_RM_NOTIFY_EOI 0x8
u32 rm_action;
struct kvm_vcpu *rm_kick_target;
struct kvmppc_icp *rm_resend_icp;
u32 rm_reject;
u32 rm_eoied_irq;
......
......@@ -299,14 +299,6 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
}
void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
{
}
void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
{
}
static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu)
{
kvmppc_booke_vcpu_load(vcpu, cpu);
......
......@@ -527,18 +527,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 0;
break;
case KVM_CAP_PPC_RMA:
r = hv_enabled;
/* PPC970 requires an RMA */
if (r && cpu_has_feature(CPU_FTR_ARCH_201))
r = 2;
r = 0;
break;
#endif
case KVM_CAP_SYNC_MMU:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
if (hv_enabled)
r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
else
r = 0;
r = hv_enabled;
#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
r = 1;
#else
......
#if !defined(_TRACE_KVM_BOOK3S_H)
#define _TRACE_KVM_BOOK3S_H
/*
* Common defines used by the trace macros in trace_pr.h and trace_hv.h
*/
#define kvm_trace_symbol_exit \
{0x100, "SYSTEM_RESET"}, \
{0x200, "MACHINE_CHECK"}, \
{0x300, "DATA_STORAGE"}, \
{0x380, "DATA_SEGMENT"}, \
{0x400, "INST_STORAGE"}, \
{0x480, "INST_SEGMENT"}, \
{0x500, "EXTERNAL"}, \
{0x501, "EXTERNAL_LEVEL"}, \
{0x502, "EXTERNAL_HV"}, \
{0x600, "ALIGNMENT"}, \
{0x700, "PROGRAM"}, \
{0x800, "FP_UNAVAIL"}, \
{0x900, "DECREMENTER"}, \
{0x980, "HV_DECREMENTER"}, \
{0xc00, "SYSCALL"}, \
{0xd00, "TRACE"}, \
{0xe00, "H_DATA_STORAGE"}, \
{0xe20, "H_INST_STORAGE"}, \
{0xe40, "H_EMUL_ASSIST"}, \
{0xf00, "PERFMON"}, \
{0xf20, "ALTIVEC"}, \
{0xf40, "VSX"}
#endif
......@@ -151,6 +151,47 @@ TRACE_EVENT(kvm_booke206_ref_release,
__entry->pfn, __entry->flags)
);
#ifdef CONFIG_SPE_POSSIBLE
#define kvm_trace_symbol_irqprio_spe \
{BOOKE_IRQPRIO_SPE_UNAVAIL, "SPE_UNAVAIL"}, \
{BOOKE_IRQPRIO_SPE_FP_DATA, "SPE_FP_DATA"}, \
{BOOKE_IRQPRIO_SPE_FP_ROUND, "SPE_FP_ROUND"},
#else
#define kvm_trace_symbol_irqprio_spe
#endif
#ifdef CONFIG_PPC_E500MC
#define kvm_trace_symbol_irqprio_e500mc \
{BOOKE_IRQPRIO_ALTIVEC_UNAVAIL, "ALTIVEC_UNAVAIL"}, \
{BOOKE_IRQPRIO_ALTIVEC_ASSIST, "ALTIVEC_ASSIST"},
#else
#define kvm_trace_symbol_irqprio_e500mc
#endif
#define kvm_trace_symbol_irqprio \
kvm_trace_symbol_irqprio_spe \
kvm_trace_symbol_irqprio_e500mc \
{BOOKE_IRQPRIO_DATA_STORAGE, "DATA_STORAGE"}, \
{BOOKE_IRQPRIO_INST_STORAGE, "INST_STORAGE"}, \
{BOOKE_IRQPRIO_ALIGNMENT, "ALIGNMENT"}, \
{BOOKE_IRQPRIO_PROGRAM, "PROGRAM"}, \
{BOOKE_IRQPRIO_FP_UNAVAIL, "FP_UNAVAIL"}, \
{BOOKE_IRQPRIO_SYSCALL, "SYSCALL"}, \
{BOOKE_IRQPRIO_AP_UNAVAIL, "AP_UNAVAIL"}, \
{BOOKE_IRQPRIO_DTLB_MISS, "DTLB_MISS"}, \
{BOOKE_IRQPRIO_ITLB_MISS, "ITLB_MISS"}, \
{BOOKE_IRQPRIO_MACHINE_CHECK, "MACHINE_CHECK"}, \
{BOOKE_IRQPRIO_DEBUG, "DEBUG"}, \
{BOOKE_IRQPRIO_CRITICAL, "CRITICAL"}, \
{BOOKE_IRQPRIO_WATCHDOG, "WATCHDOG"}, \
{BOOKE_IRQPRIO_EXTERNAL, "EXTERNAL"}, \
{BOOKE_IRQPRIO_FIT, "FIT"}, \
{BOOKE_IRQPRIO_DECREMENTER, "DECREMENTER"}, \
{BOOKE_IRQPRIO_PERFORMANCE_MONITOR, "PERFORMANCE_MONITOR"}, \
{BOOKE_IRQPRIO_EXTERNAL_LEVEL, "EXTERNAL_LEVEL"}, \
{BOOKE_IRQPRIO_DBELL, "DBELL"}, \
{BOOKE_IRQPRIO_DBELL_CRIT, "DBELL_CRIT"} \
TRACE_EVENT(kvm_booke_queue_irqprio,
TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
TP_ARGS(vcpu, priority),
......@@ -167,8 +208,10 @@ TRACE_EVENT(kvm_booke_queue_irqprio,
__entry->pending = vcpu->arch.pending_exceptions;
),
TP_printk("vcpu=%x prio=%x pending=%lx",
__entry->cpu_nr, __entry->priority, __entry->pending)
TP_printk("vcpu=%x prio=%s pending=%lx",
__entry->cpu_nr,
__print_symbolic(__entry->priority, kvm_trace_symbol_irqprio),
__entry->pending)
);
#endif
......
This diff is collapsed.
......@@ -3,36 +3,13 @@
#define _TRACE_KVM_PR_H
#include <linux/tracepoint.h>
#include "trace_book3s.h"
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm_pr
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace_pr
#define kvm_trace_symbol_exit \
{0x100, "SYSTEM_RESET"}, \
{0x200, "MACHINE_CHECK"}, \
{0x300, "DATA_STORAGE"}, \
{0x380, "DATA_SEGMENT"}, \
{0x400, "INST_STORAGE"}, \
{0x480, "INST_SEGMENT"}, \
{0x500, "EXTERNAL"}, \
{0x501, "EXTERNAL_LEVEL"}, \
{0x502, "EXTERNAL_HV"}, \
{0x600, "ALIGNMENT"}, \
{0x700, "PROGRAM"}, \
{0x800, "FP_UNAVAIL"}, \
{0x900, "DECREMENTER"}, \
{0x980, "HV_DECREMENTER"}, \
{0xc00, "SYSCALL"}, \
{0xd00, "TRACE"}, \
{0xe00, "H_DATA_STORAGE"}, \
{0xe20, "H_INST_STORAGE"}, \
{0xe40, "H_EMUL_ASSIST"}, \
{0xf00, "PERFMON"}, \
{0xf20, "ALTIVEC"}, \
{0xf40, "VSX"}
TRACE_EVENT(kvm_book3s_reenter,
TP_PROTO(int r, struct kvm_vcpu *vcpu),
TP_ARGS(r, vcpu),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment