Commit 3c536423 authored by Michael Ellerman's avatar Michael Ellerman

Merge branch 'topic/ppc-kvm' into next

Merge some powerpc KVM patches from our topic branch.

In particular this brings in Nick's big series rewriting parts of the
guest entry/exit path in C.

Conflicts:
	arch/powerpc/kernel/security.c
	arch/powerpc/kvm/book3s_hv_rmhandlers.S
parents 07d8ad6f fae5c9f3
......@@ -120,6 +120,7 @@ extern s32 patch__call_flush_branch_caches3;
extern s32 patch__flush_count_cache_return;
extern s32 patch__flush_link_stack_return;
extern s32 patch__call_kvm_flush_link_stack;
extern s32 patch__call_kvm_flush_link_stack_p9;
extern s32 patch__memset_nocache, patch__memcpy_nocache;
extern long flush_branch_caches;
......@@ -140,7 +141,7 @@ void kvmhv_load_host_pmu(void);
void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr,
......
......@@ -35,6 +35,19 @@
/* PACA save area size in u64 units (exgen, exmc, etc) */
#define EX_SIZE 10
/* PACA save area offsets */
#define EX_R9 0
#define EX_R10 8
#define EX_R11 16
#define EX_R12 24
#define EX_R13 32
#define EX_DAR 40
#define EX_DSISR 48
#define EX_CCR 52
#define EX_CFAR 56
#define EX_PPR 64
#define EX_CTR 72
/*
* maximum recursive depth of MCE exceptions
*/
......
......@@ -147,6 +147,7 @@
#define KVM_GUEST_MODE_SKIP 2
#define KVM_GUEST_MODE_GUEST_HV 3
#define KVM_GUEST_MODE_HOST_HV 4
#define KVM_GUEST_MODE_HV_P9 5 /* ISA >= v3.0 path */
#define KVM_INST_FETCH_FAILED -1
......
......@@ -153,9 +153,17 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
return radix;
}
int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr);
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
#endif
/*
* Invalid HDSISR value which is used to indicate when HW has not set the reg.
* Used to work around an errata.
*/
#define HDSISR_CANARY 0x7fff
/*
* We use a lock bit in HPTE dword 0 to synchronize updates and
* accesses to each HPTE, and another bit to indicate non-present
......
......@@ -297,7 +297,6 @@ struct kvm_arch {
u8 fwnmi_enabled;
u8 secure_guest;
u8 svm_enabled;
bool threads_indep;
bool nested_enable;
bool dawr1_enabled;
pgd_t *pgtable;
......@@ -683,7 +682,12 @@ struct kvm_vcpu_arch {
ulong fault_dar;
u32 fault_dsisr;
unsigned long intr_msr;
ulong fault_gpa; /* guest real address of page fault (POWER9) */
/*
* POWER9 and later: fault_gpa contains the guest real address of page
* fault for a radix guest, or segment descriptor (equivalent to result
* from slbmfev of SLB entry that translated the EA) for hash guests.
*/
ulong fault_gpa;
#endif
#ifdef CONFIG_BOOKE
......
......@@ -129,6 +129,7 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags);
extern void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
......@@ -606,6 +607,7 @@ extern void kvmppc_free_pimap(struct kvm *kvm);
extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
extern int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req);
extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
......@@ -638,6 +640,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{ return 0; }
static inline int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
{ return 0; }
#endif
#ifdef CONFIG_KVM_XIVE
......@@ -655,8 +659,6 @@ extern int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
u32 *priority);
extern int kvmppc_xive_int_on(struct kvm *kvm, u32 irq);
extern int kvmppc_xive_int_off(struct kvm *kvm, u32 irq);
extern void kvmppc_xive_init_module(void);
extern void kvmppc_xive_exit_module(void);
extern int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
......@@ -671,6 +673,8 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status);
extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
extern void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu);
extern void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu);
static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
{
......@@ -680,8 +684,6 @@ static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
extern int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu);
extern void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu);
extern void kvmppc_xive_native_init_module(void);
extern void kvmppc_xive_native_exit_module(void);
extern int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
union kvmppc_one_reg *val);
extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
......@@ -695,8 +697,6 @@ static inline int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
u32 *priority) { return -1; }
static inline int kvmppc_xive_int_on(struct kvm *kvm, u32 irq) { return -1; }
static inline int kvmppc_xive_int_off(struct kvm *kvm, u32 irq) { return -1; }
static inline void kvmppc_xive_init_module(void) { }
static inline void kvmppc_xive_exit_module(void) { }
static inline int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
......@@ -711,14 +711,14 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status) { return -ENODEV; }
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
static inline void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) { }
static inline void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) { }
static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
static inline int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
struct kvm_vcpu *vcpu, u32 cpu) { return -EBUSY; }
static inline void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu) { }
static inline void kvmppc_xive_native_init_module(void) { }
static inline void kvmppc_xive_native_exit_module(void) { }
static inline int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu,
union kvmppc_one_reg *val)
{ return 0; }
......@@ -754,7 +754,7 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
unsigned long tce_value, unsigned long npages);
long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
unsigned int yield_count);
long kvmppc_h_random(struct kvm_vcpu *vcpu);
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu);
void kvmhv_commence_exit(int trap);
void kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
void kvmppc_subcore_enter_guest(void);
......
......@@ -121,12 +121,6 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
}
#endif
#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
#else
static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
#endif
extern void switch_cop(struct mm_struct *next);
extern int use_cop(unsigned long acop, struct mm_struct *mm);
extern void drop_cop(unsigned long acop, struct mm_struct *mm);
......
......@@ -97,6 +97,18 @@ extern void div128_by_32(u64 dividend_high, u64 dividend_low,
extern void secondary_cpu_time_init(void);
extern void __init time_init(void);
#ifdef CONFIG_PPC64
static inline unsigned long test_irq_work_pending(void)
{
unsigned long x;
asm volatile("lbz %0,%1(13)"
: "=r" (x)
: "i" (offsetof(struct paca_struct, irq_work_pending)));
return x;
}
#endif
DECLARE_PER_CPU(u64, decrementers_next_tb);
/* Convert timebase ticks to nanoseconds */
......
......@@ -473,7 +473,6 @@ int main(void)
OFFSET(VCPU_SLB_NR, kvm_vcpu, arch.slb_nr);
OFFSET(VCPU_FAULT_DSISR, kvm_vcpu, arch.fault_dsisr);
OFFSET(VCPU_FAULT_DAR, kvm_vcpu, arch.fault_dar);
OFFSET(VCPU_FAULT_GPA, kvm_vcpu, arch.fault_gpa);
OFFSET(VCPU_INTR_MSR, kvm_vcpu, arch.intr_msr);
OFFSET(VCPU_LAST_INST, kvm_vcpu, arch.last_inst);
OFFSET(VCPU_TRAP, kvm_vcpu, arch.trap);
......
......@@ -21,22 +21,6 @@
#include <asm/feature-fixups.h>
#include <asm/kup.h>
/* PACA save area offsets (exgen, exmc, etc) */
#define EX_R9 0
#define EX_R10 8
#define EX_R11 16
#define EX_R12 24
#define EX_R13 32
#define EX_DAR 40
#define EX_DSISR 48
#define EX_CCR 52
#define EX_CFAR 56
#define EX_PPR 64
#define EX_CTR 72
.if EX_SIZE != 10
.error "EX_SIZE is wrong"
.endif
/*
* Following are fixed section helper macros.
*
......@@ -133,7 +117,6 @@ name:
#define IBRANCH_TO_COMMON .L_IBRANCH_TO_COMMON_\name\() /* ENTRY branch to common */
#define IREALMODE_COMMON .L_IREALMODE_COMMON_\name\() /* Common runs in realmode */
#define IMASK .L_IMASK_\name\() /* IRQ soft-mask bit */
#define IKVM_SKIP .L_IKVM_SKIP_\name\() /* Generate KVM skip handler */
#define IKVM_REAL .L_IKVM_REAL_\name\() /* Real entry tests KVM */
#define __IKVM_REAL(name) .L_IKVM_REAL_ ## name
#define IKVM_VIRT .L_IKVM_VIRT_\name\() /* Virt entry tests KVM */
......@@ -190,9 +173,6 @@ do_define_int n
.ifndef IMASK
IMASK=0
.endif
.ifndef IKVM_SKIP
IKVM_SKIP=0
.endif
.ifndef IKVM_REAL
IKVM_REAL=0
.endif
......@@ -207,8 +187,6 @@ do_define_int n
.endif
.endm
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
* All interrupts which set HSRR registers, as well as SRESET and MCE and
* syscall when invoked with "sc 1" switch to MSR[HV]=1 (HVMODE) to be taken,
......@@ -238,88 +216,28 @@ do_define_int n
/*
* If an interrupt is taken while a guest is running, it is immediately routed
* to KVM to handle. If both HV and PR KVM arepossible, KVM interrupts go first
* to kvmppc_interrupt_hv, which handles the PR guest case.
* to KVM to handle.
*/
#define kvmppc_interrupt kvmppc_interrupt_hv
#else
#define kvmppc_interrupt kvmppc_interrupt_pr
#endif
.macro KVMTEST name
.macro KVMTEST name handler
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
lbz r10,HSTATE_IN_GUEST(r13)
cmpwi r10,0
bne \name\()_kvm
.endm
.macro GEN_KVM name
.balign IFETCH_ALIGN_BYTES
\name\()_kvm:
.if IKVM_SKIP
cmpwi r10,KVM_GUEST_MODE_SKIP
beq 89f
.else
BEGIN_FTR_SECTION
ld r10,IAREA+EX_CFAR(r13)
std r10,HSTATE_CFAR(r13)
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
.endif
ld r10,IAREA+EX_CTR(r13)
mtctr r10
BEGIN_FTR_SECTION
ld r10,IAREA+EX_PPR(r13)
std r10,HSTATE_PPR(r13)
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r11,IAREA+EX_R11(r13)
ld r12,IAREA+EX_R12(r13)
std r12,HSTATE_SCRATCH0(r13)
sldi r12,r9,32
ld r9,IAREA+EX_R9(r13)
ld r10,IAREA+EX_R10(r13)
/* HSRR variants have the 0x2 bit added to their trap number */
.if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
ori r12,r12,(IVEC + 0x2)
FTR_SECTION_ELSE
ori r12,r12,(IVEC)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
.elseif IHSRR
ori r12,r12,(IVEC+ 0x2)
.else
ori r12,r12,(IVEC)
.endif
b kvmppc_interrupt
.if IKVM_SKIP
89: mtocrf 0x80,r9
ld r10,IAREA+EX_CTR(r13)
mtctr r10
ld r9,IAREA+EX_R9(r13)
ld r10,IAREA+EX_R10(r13)
ld r11,IAREA+EX_R11(r13)
ld r12,IAREA+EX_R12(r13)
.if IHSRR_IF_HVMODE
BEGIN_FTR_SECTION
b kvmppc_skip_Hinterrupt
li r10,(IVEC + 0x2)
FTR_SECTION_ELSE
b kvmppc_skip_interrupt
li r10,(IVEC)
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
.elseif IHSRR
b kvmppc_skip_Hinterrupt
li r10,(IVEC + 0x2)
.else
b kvmppc_skip_interrupt
li r10,(IVEC)
.endif
.endif
.endm
#else
.macro KVMTEST name
.endm
.macro GEN_KVM name
.endm
bne \handler
#endif
.endm
/*
* This is the BOOK3S interrupt entry code macro.
......@@ -461,7 +379,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
DEFINE_FIXED_SYMBOL(\name\()_common_real)
\name\()_common_real:
.if IKVM_REAL
KVMTEST \name
KVMTEST \name kvm_interrupt
.endif
ld r10,PACAKMSR(r13) /* get MSR value for kernel */
......@@ -484,7 +402,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
DEFINE_FIXED_SYMBOL(\name\()_common_virt)
\name\()_common_virt:
.if IKVM_VIRT
KVMTEST \name
KVMTEST \name kvm_interrupt
1:
.endif
.endif /* IVIRT */
......@@ -498,7 +416,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_virt)
DEFINE_FIXED_SYMBOL(\name\()_common_real)
\name\()_common_real:
.if IKVM_REAL
KVMTEST \name
KVMTEST \name kvm_interrupt
.endif
.endm
......@@ -1000,8 +918,6 @@ EXC_COMMON_BEGIN(system_reset_common)
EXCEPTION_RESTORE_REGS
RFI_TO_USER_OR_KERNEL
GEN_KVM system_reset
/**
* Interrupt 0x200 - Machine Check Interrupt (MCE).
......@@ -1070,7 +986,6 @@ INT_DEFINE_BEGIN(machine_check)
ISET_RI=0
IDAR=1
IDSISR=1
IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(machine_check)
......@@ -1166,7 +1081,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
/*
* Check if we are coming from guest. If yes, then run the normal
* exception handler which will take the
* machine_check_kvm->kvmppc_interrupt branch to deliver the MC event
* machine_check_kvm->kvm_interrupt branch to deliver the MC event
* to guest.
*/
lbz r11,HSTATE_IN_GUEST(r13)
......@@ -1236,8 +1151,6 @@ EXC_COMMON_BEGIN(machine_check_common)
bl machine_check_exception
b interrupt_return
GEN_KVM machine_check
#ifdef CONFIG_PPC_P7_NAP
/*
......@@ -1342,7 +1255,6 @@ INT_DEFINE_BEGIN(data_access)
IVEC=0x300
IDAR=1
IDSISR=1
IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(data_access)
......@@ -1373,8 +1285,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
REST_NVGPRS(r1)
b interrupt_return
GEN_KVM data_access
/**
* Interrupt 0x380 - Data Segment Interrupt (DSLB).
......@@ -1396,7 +1306,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
INT_DEFINE_BEGIN(data_access_slb)
IVEC=0x380
IDAR=1
IKVM_SKIP=1
IKVM_REAL=1
INT_DEFINE_END(data_access_slb)
......@@ -1425,8 +1334,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
bl do_bad_slb_fault
b interrupt_return
GEN_KVM data_access_slb
/**
* Interrupt 0x400 - Instruction Storage Interrupt (ISI).
......@@ -1463,8 +1370,6 @@ MMU_FTR_SECTION_ELSE
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
b interrupt_return
GEN_KVM instruction_access
/**
* Interrupt 0x480 - Instruction Segment Interrupt (ISLB).
......@@ -1509,8 +1414,6 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
bl do_bad_slb_fault
b interrupt_return
GEN_KVM instruction_access_slb
/**
* Interrupt 0x500 - External Interrupt.
......@@ -1555,8 +1458,6 @@ EXC_COMMON_BEGIN(hardware_interrupt_common)
bl do_IRQ
b interrupt_return
GEN_KVM hardware_interrupt
/**
* Interrupt 0x600 - Alignment Interrupt
......@@ -1584,8 +1485,6 @@ EXC_COMMON_BEGIN(alignment_common)
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
b interrupt_return
GEN_KVM alignment
/**
* Interrupt 0x700 - Program Interrupt (program check).
......@@ -1693,8 +1592,6 @@ EXC_COMMON_BEGIN(program_check_common)
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
b interrupt_return
GEN_KVM program_check
/*
* Interrupt 0x800 - Floating-Point Unavailable Interrupt.
......@@ -1744,8 +1641,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
b interrupt_return
#endif
GEN_KVM fp_unavailable
/**
* Interrupt 0x900 - Decrementer Interrupt.
......@@ -1784,8 +1679,6 @@ EXC_COMMON_BEGIN(decrementer_common)
bl timer_interrupt
b interrupt_return
GEN_KVM decrementer
/**
* Interrupt 0x980 - Hypervisor Decrementer Interrupt.
......@@ -1831,8 +1724,6 @@ EXC_COMMON_BEGIN(hdecrementer_common)
ld r13,PACA_EXGEN+EX_R13(r13)
HRFI_TO_KERNEL
GEN_KVM hdecrementer
/**
* Interrupt 0xa00 - Directed Privileged Doorbell Interrupt.
......@@ -1872,8 +1763,6 @@ EXC_COMMON_BEGIN(doorbell_super_common)
#endif
b interrupt_return
GEN_KVM doorbell_super
EXC_REAL_NONE(0xb00, 0x100)
EXC_VIRT_NONE(0x4b00, 0x100)
......@@ -1923,7 +1812,7 @@ INT_DEFINE_END(system_call)
GET_PACA(r13)
std r10,PACA_EXGEN+EX_R10(r13)
INTERRUPT_TO_KERNEL
KVMTEST system_call /* uses r10, branch to system_call_kvm */
KVMTEST system_call kvm_hcall /* uses r10, branch to kvm_hcall */
mfctr r9
#else
mr r9,r13
......@@ -1979,14 +1868,16 @@ EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
EXC_VIRT_END(system_call, 0x4c00, 0x100)
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
TRAMP_REAL_BEGIN(system_call_kvm)
/*
* This is a hcall, so register convention is as above, with these
* differences:
* r13 = PACA
* ctr = orig r13
* orig r10 saved in PACA
*/
TRAMP_REAL_BEGIN(kvm_hcall)
std r9,PACA_EXGEN+EX_R9(r13)
std r11,PACA_EXGEN+EX_R11(r13)
std r12,PACA_EXGEN+EX_R12(r13)
mfcr r9
mfctr r10
std r10,PACA_EXGEN+EX_R13(r13)
li r10,0
std r10,PACA_EXGEN+EX_CFAR(r13)
std r10,PACA_EXGEN+EX_CTR(r13)
/*
* Save the PPR (on systems that support it) before changing to
* HMT_MEDIUM. That allows the KVM code to save that value into the
......@@ -1994,31 +1885,24 @@ TRAMP_REAL_BEGIN(system_call_kvm)
*/
BEGIN_FTR_SECTION
mfspr r10,SPRN_PPR
std r10,HSTATE_PPR(r13)
std r10,PACA_EXGEN+EX_PPR(r13)
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
HMT_MEDIUM
mfctr r10
SET_SCRATCH0(r10)
mfcr r10
std r12,HSTATE_SCRATCH0(r13)
sldi r12,r10,32
ori r12,r12,0xc00
#ifdef CONFIG_RELOCATABLE
/*
* Requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
* Requires __LOAD_FAR_HANDLER beause kvmppc_hcall lives
* outside the head section.
*/
__LOAD_FAR_HANDLER(r10, kvmppc_interrupt)
__LOAD_FAR_HANDLER(r10, kvmppc_hcall)
mtctr r10
ld r10,PACA_EXGEN+EX_R10(r13)
bctr
#else
ld r10,PACA_EXGEN+EX_R10(r13)
b kvmppc_interrupt
b kvmppc_hcall
#endif
#endif
/**
* Interrupt 0xd00 - Trace Interrupt.
* This is a synchronous interrupt in response to instruction step or
......@@ -2043,8 +1927,6 @@ EXC_COMMON_BEGIN(single_step_common)
bl single_step_exception
b interrupt_return
GEN_KVM single_step
/**
* Interrupt 0xe00 - Hypervisor Data Storage Interrupt (HDSI).
......@@ -2063,7 +1945,6 @@ INT_DEFINE_BEGIN(h_data_storage)
IHSRR=1
IDAR=1
IDSISR=1
IKVM_SKIP=1
IKVM_REAL=1
IKVM_VIRT=1
INT_DEFINE_END(h_data_storage)
......@@ -2084,8 +1965,6 @@ MMU_FTR_SECTION_ELSE
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
b interrupt_return
GEN_KVM h_data_storage
/**
* Interrupt 0xe20 - Hypervisor Instruction Storage Interrupt (HISI).
......@@ -2111,8 +1990,6 @@ EXC_COMMON_BEGIN(h_instr_storage_common)
bl unknown_exception
b interrupt_return
GEN_KVM h_instr_storage
/**
* Interrupt 0xe40 - Hypervisor Emulation Assistance Interrupt.
......@@ -2137,8 +2014,6 @@ EXC_COMMON_BEGIN(emulation_assist_common)
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
b interrupt_return
GEN_KVM emulation_assist
/**
* Interrupt 0xe60 - Hypervisor Maintenance Interrupt (HMI).
......@@ -2210,16 +2085,12 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
EXCEPTION_RESTORE_REGS hsrr=1
GEN_INT_ENTRY hmi_exception, virt=0
GEN_KVM hmi_exception_early
EXC_COMMON_BEGIN(hmi_exception_common)
GEN_COMMON hmi_exception
addi r3,r1,STACK_FRAME_OVERHEAD
bl handle_hmi_exception
b interrupt_return
GEN_KVM hmi_exception
/**
* Interrupt 0xe80 - Directed Hypervisor Doorbell Interrupt.
......@@ -2250,8 +2121,6 @@ EXC_COMMON_BEGIN(h_doorbell_common)
#endif
b interrupt_return
GEN_KVM h_doorbell
/**
* Interrupt 0xea0 - Hypervisor Virtualization Interrupt.
......@@ -2278,8 +2147,6 @@ EXC_COMMON_BEGIN(h_virt_irq_common)
bl do_IRQ
b interrupt_return
GEN_KVM h_virt_irq
EXC_REAL_NONE(0xec0, 0x20)
EXC_VIRT_NONE(0x4ec0, 0x20)
......@@ -2323,8 +2190,6 @@ EXC_COMMON_BEGIN(performance_monitor_common)
bl performance_monitor_exception
b interrupt_return
GEN_KVM performance_monitor
/**
* Interrupt 0xf20 - Vector Unavailable Interrupt.
......@@ -2374,8 +2239,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
bl altivec_unavailable_exception
b interrupt_return
GEN_KVM altivec_unavailable
/**
* Interrupt 0xf40 - VSX Unavailable Interrupt.
......@@ -2424,8 +2287,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
bl vsx_unavailable_exception
b interrupt_return
GEN_KVM vsx_unavailable
/**
* Interrupt 0xf60 - Facility Unavailable Interrupt.
......@@ -2454,8 +2315,6 @@ EXC_COMMON_BEGIN(facility_unavailable_common)
REST_NVGPRS(r1) /* instruction emulation may change GPRs */
b interrupt_return
GEN_KVM facility_unavailable
/**
* Interrupt 0xf60 - Hypervisor Facility Unavailable Interrupt.
......@@ -2484,8 +2343,6 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common)
REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */
b interrupt_return
GEN_KVM h_facility_unavailable
EXC_REAL_NONE(0xfa0, 0x20)
EXC_VIRT_NONE(0x4fa0, 0x20)
......@@ -2515,8 +2372,6 @@ EXC_COMMON_BEGIN(cbe_system_error_common)
bl cbe_system_error_exception
b interrupt_return
GEN_KVM cbe_system_error
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
......@@ -2548,8 +2403,6 @@ EXC_COMMON_BEGIN(instruction_breakpoint_common)
bl instruction_breakpoint_exception
b interrupt_return
GEN_KVM instruction_breakpoint
EXC_REAL_NONE(0x1400, 0x100)
EXC_VIRT_NONE(0x5400, 0x100)
......@@ -2670,8 +2523,6 @@ EXC_COMMON_BEGIN(denorm_exception_common)
bl unknown_exception
b interrupt_return
GEN_KVM denorm_exception
#ifdef CONFIG_CBE_RAS
INT_DEFINE_BEGIN(cbe_maintenance)
......@@ -2689,8 +2540,6 @@ EXC_COMMON_BEGIN(cbe_maintenance_common)
bl cbe_maintenance_exception
b interrupt_return
GEN_KVM cbe_maintenance
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
......@@ -2721,8 +2570,6 @@ EXC_COMMON_BEGIN(altivec_assist_common)
#endif
b interrupt_return
GEN_KVM altivec_assist
#ifdef CONFIG_CBE_RAS
INT_DEFINE_BEGIN(cbe_thermal)
......@@ -2740,8 +2587,6 @@ EXC_COMMON_BEGIN(cbe_thermal_common)
bl cbe_thermal_exception
b interrupt_return
GEN_KVM cbe_thermal
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
......@@ -2994,6 +2839,15 @@ TRAMP_REAL_BEGIN(rfscv_flush_fallback)
USE_TEXT_SECTION()
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
kvm_interrupt:
/*
* The conditional branch in KVMTEST can't reach all the way,
* make a stub.
*/
b kvmppc_interrupt
#endif
_GLOBAL(do_uaccess_flush)
UACCESS_FLUSH_FIXUP_SECTION
nop
......@@ -3009,32 +2863,6 @@ EXPORT_SYMBOL(do_uaccess_flush)
MASKED_INTERRUPT
MASKED_INTERRUPT hsrr=1
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
kvmppc_skip_interrupt:
/*
* Here all GPRs are unchanged from when the interrupt happened
* except for r13, which is saved in SPRG_SCRATCH0.
*/
mfspr r13, SPRN_SRR0
addi r13, r13, 4
mtspr SPRN_SRR0, r13
GET_SCRATCH0(r13)
RFI_TO_KERNEL
b .
kvmppc_skip_Hinterrupt:
/*
* Here all GPRs are unchanged from when the interrupt happened
* except for r13, which is saved in SPRG_SCRATCH0.
*/
mfspr r13, SPRN_HSRR0
addi r13, r13, 4
mtspr SPRN_HSRR0, r13
GET_SCRATCH0(r13)
HRFI_TO_KERNEL
b .
#endif
/*
* Relocation-on interrupts: A subset of the interrupts can be delivered
* with IR=1/DR=1, if AIL==2 and MSR.HV won't be changed by delivering
......
......@@ -432,16 +432,19 @@ device_initcall(stf_barrier_debugfs_init);
static void update_branch_cache_flush(void)
{
u32 *site;
u32 *site, __maybe_unused *site2;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
site = &patch__call_kvm_flush_link_stack;
site2 = &patch__call_kvm_flush_link_stack_p9;
// This controls the branch from guest_exit_cont to kvm_flush_link_stack
if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
patch_instruction_site(site, ppc_inst(PPC_RAW_NOP()));
patch_instruction_site(site2, ppc_inst(PPC_RAW_NOP()));
} else {
// Could use HW flush, but that could also flush count cache
patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
patch_branch_site(site2, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
}
#endif
......
......@@ -508,16 +508,6 @@ EXPORT_SYMBOL(profile_pc);
* 64-bit uses a byte in the PACA, 32-bit uses a per-cpu variable...
*/
#ifdef CONFIG_PPC64
static inline unsigned long test_irq_work_pending(void)
{
unsigned long x;
asm volatile("lbz %0,%1(13)"
: "=r" (x)
: "i" (offsetof(struct paca_struct, irq_work_pending)));
return x;
}
static inline void set_irq_work_pending_flag(void)
{
asm volatile("stb %0,%1(13)" : :
......
......@@ -57,6 +57,7 @@ kvm-pr-y := \
book3s_32_mmu.o
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_64_entry.o \
tm.o
ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
......@@ -86,6 +87,7 @@ kvm-book3s_64-builtin-tm-objs-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_hv_hmi.o \
book3s_hv_p9_entry.o \
book3s_hv_rmhandlers.o \
book3s_hv_rm_mmu.o \
book3s_hv_ras.o \
......
......@@ -171,6 +171,12 @@ void kvmppc_core_queue_machine_check(struct kvm_vcpu *vcpu, ulong flags)
}
EXPORT_SYMBOL_GPL(kvmppc_core_queue_machine_check);
void kvmppc_core_queue_syscall(struct kvm_vcpu *vcpu)
{
kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_SYSCALL, 0);
}
EXPORT_SYMBOL(kvmppc_core_queue_syscall);
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
{
/* might as well deliver this straight away */
......@@ -1044,13 +1050,10 @@ static int kvmppc_book3s_init(void)
#ifdef CONFIG_KVM_XICS
#ifdef CONFIG_KVM_XIVE
if (xics_on_xive()) {
kvmppc_xive_init_module();
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
if (kvmppc_xive_native_supported()) {
kvmppc_xive_native_init_module();
if (kvmppc_xive_native_supported())
kvm_register_device_ops(&kvm_xive_native_ops,
KVM_DEV_TYPE_XIVE);
}
} else
#endif
kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
......@@ -1060,12 +1063,6 @@ static int kvmppc_book3s_init(void)
static void kvmppc_book3s_exit(void)
{
#ifdef CONFIG_KVM_XICS
if (xics_on_xive()) {
kvmppc_xive_exit_module();
kvmppc_xive_native_exit_module();
}
#endif
#ifdef CONFIG_KVM_BOOK3S_32_HANDLER
kvmppc_book3s_exit_pr();
#endif
......
/* SPDX-License-Identifier: GPL-2.0-only */
#include <asm/asm-offsets.h>
#include <asm/cache.h>
#include <asm/code-patching-asm.h>
#include <asm/exception-64s.h>
#include <asm/export.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_book3s_asm.h>
#include <asm/mmu.h>
#include <asm/ppc_asm.h>
#include <asm/ptrace.h>
#include <asm/reg.h>
#include <asm/ultravisor-api.h>
/*
* These are branched to from interrupt handlers in exception-64s.S which set
* IKVM_REAL or IKVM_VIRT, if HSTATE_IN_GUEST was found to be non-zero.
*/
/*
* This is a hcall, so register convention is as
* Documentation/powerpc/papr_hcalls.rst.
*
* This may also be a syscall from PR-KVM userspace that is to be
* reflected to the PR guest kernel, so registers may be set up for
* a system call rather than hcall. We don't currently clobber
* anything here, but the 0xc00 handler has already clobbered CTR
* and CR0, so PR-KVM can not support a guest kernel that preserves
* those registers across its system calls.
*
* The state of registers is as kvmppc_interrupt, except CFAR is not
* saved, R13 is not in SCRATCH0, and R10 does not contain the trap.
*/
.global kvmppc_hcall
.balign IFETCH_ALIGN_BYTES
kvmppc_hcall:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
lbz r10,HSTATE_IN_GUEST(r13)
cmpwi r10,KVM_GUEST_MODE_HV_P9
beq kvmppc_p9_exit_hcall
#endif
ld r10,PACA_EXGEN+EX_R13(r13)
SET_SCRATCH0(r10)
li r10,0xc00
/* Now we look like kvmppc_interrupt */
li r11,PACA_EXGEN
b .Lgot_save_area
/*
* KVM interrupt entry occurs after GEN_INT_ENTRY runs, and follows that
* call convention:
*
* guest R9-R13, CTR, CFAR, PPR saved in PACA EX_xxx save area
* guest (H)DAR, (H)DSISR are also in the save area for relevant interrupts
* guest R13 also saved in SCRATCH0
* R13 = PACA
* R11 = (H)SRR0
* R12 = (H)SRR1
* R9 = guest CR
* PPR is set to medium
*
* With the addition for KVM:
* R10 = trap vector
*/
.global kvmppc_interrupt
.balign IFETCH_ALIGN_BYTES
kvmppc_interrupt:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
std r10,HSTATE_SCRATCH0(r13)
lbz r10,HSTATE_IN_GUEST(r13)
cmpwi r10,KVM_GUEST_MODE_HV_P9
beq kvmppc_p9_exit_interrupt
ld r10,HSTATE_SCRATCH0(r13)
#endif
li r11,PACA_EXGEN
cmpdi r10,0x200
bgt+ .Lgot_save_area
li r11,PACA_EXMC
beq .Lgot_save_area
li r11,PACA_EXNMI
.Lgot_save_area:
add r11,r11,r13
BEGIN_FTR_SECTION
ld r12,EX_CFAR(r11)
std r12,HSTATE_CFAR(r13)
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld r12,EX_CTR(r11)
mtctr r12
BEGIN_FTR_SECTION
ld r12,EX_PPR(r11)
std r12,HSTATE_PPR(r13)
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r12,EX_R12(r11)
std r12,HSTATE_SCRATCH0(r13)
sldi r12,r9,32
or r12,r12,r10
ld r9,EX_R9(r11)
ld r10,EX_R10(r11)
ld r11,EX_R11(r11)
/*
* Hcalls and other interrupts come here after normalising register
* contents and save locations:
*
* R12 = (guest CR << 32) | interrupt vector
* R13 = PACA
* guest R12 saved in shadow HSTATE_SCRATCH0
* guest R13 saved in SPRN_SCRATCH0
*/
std r9,HSTATE_SCRATCH2(r13)
lbz r9,HSTATE_IN_GUEST(r13)
cmpwi r9,KVM_GUEST_MODE_SKIP
beq- .Lmaybe_skip
.Lno_skip:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
cmpwi r9,KVM_GUEST_MODE_GUEST
beq kvmppc_interrupt_pr
#endif
b kvmppc_interrupt_hv
#else
b kvmppc_interrupt_pr
#endif
/*
* "Skip" interrupts are part of a trick KVM uses a with hash guests to load
* the faulting instruction in guest memory from the the hypervisor without
* walking page tables.
*
* When the guest takes a fault that requires the hypervisor to load the
* instruction (e.g., MMIO emulation), KVM is running in real-mode with HV=1
* and the guest MMU context loaded. It sets KVM_GUEST_MODE_SKIP, and sets
* MSR[DR]=1 while leaving MSR[IR]=0, so it continues to fetch HV instructions
* but loads and stores will access the guest context. This is used to load
* the faulting instruction using the faulting guest effective address.
*
* However the guest context may not be able to translate, or it may cause a
* machine check or other issue, which results in a fault in the host
* (even with KVM-HV).
*
* These faults come here because KVM_GUEST_MODE_SKIP was set, so if they
* are (or are likely) caused by that load, the instruction is skipped by
* just returning with the PC advanced +4, where it is noticed the load did
* not execute and it goes to the slow path which walks the page tables to
* read guest memory.
*/
.Lmaybe_skip:
cmpwi r12,BOOK3S_INTERRUPT_MACHINE_CHECK
beq 1f
cmpwi r12,BOOK3S_INTERRUPT_DATA_STORAGE
beq 1f
cmpwi r12,BOOK3S_INTERRUPT_DATA_SEGMENT
beq 1f
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/* HSRR interrupts get 2 added to interrupt number */
cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE | 0x2
beq 2f
#endif
b .Lno_skip
1: mfspr r9,SPRN_SRR0
addi r9,r9,4
mtspr SPRN_SRR0,r9
ld r12,HSTATE_SCRATCH0(r13)
ld r9,HSTATE_SCRATCH2(r13)
GET_SCRATCH0(r13)
RFI_TO_KERNEL
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
2: mfspr r9,SPRN_HSRR0
addi r9,r9,4
mtspr SPRN_HSRR0,r9
ld r12,HSTATE_SCRATCH0(r13)
ld r9,HSTATE_SCRATCH2(r13)
GET_SCRATCH0(r13)
HRFI_TO_KERNEL
#endif
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/* Stack frame offsets for kvmppc_p9_enter_guest */
#define SFS (144 + STACK_FRAME_MIN_SIZE)
#define STACK_SLOT_NVGPRS (SFS - 144) /* 18 gprs */
/*
* void kvmppc_p9_enter_guest(struct vcpu *vcpu);
*
* Enter the guest on a ISAv3.0 or later system.
*/
.balign IFETCH_ALIGN_BYTES
_GLOBAL(kvmppc_p9_enter_guest)
EXPORT_SYMBOL_GPL(kvmppc_p9_enter_guest)
mflr r0
std r0,PPC_LR_STKOFF(r1)
stdu r1,-SFS(r1)
std r1,HSTATE_HOST_R1(r13)
mfcr r4
stw r4,SFS+8(r1)
reg = 14
.rept 18
std reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
reg = reg + 1
.endr
ld r4,VCPU_LR(r3)
mtlr r4
ld r4,VCPU_CTR(r3)
mtctr r4
ld r4,VCPU_XER(r3)
mtspr SPRN_XER,r4
ld r1,VCPU_CR(r3)
BEGIN_FTR_SECTION
ld r4,VCPU_CFAR(r3)
mtspr SPRN_CFAR,r4
END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
BEGIN_FTR_SECTION
ld r4,VCPU_PPR(r3)
mtspr SPRN_PPR,r4
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
reg = 4
.rept 28
ld reg,__VCPU_GPR(reg)(r3)
reg = reg + 1
.endr
ld r4,VCPU_KVM(r3)
lbz r4,KVM_SECURE_GUEST(r4)
cmpdi r4,0
ld r4,VCPU_GPR(R4)(r3)
bne .Lret_to_ultra
mtcr r1
ld r0,VCPU_GPR(R0)(r3)
ld r1,VCPU_GPR(R1)(r3)
ld r2,VCPU_GPR(R2)(r3)
ld r3,VCPU_GPR(R3)(r3)
HRFI_TO_GUEST
b .
/*
* Use UV_RETURN ultracall to return control back to the Ultravisor
* after processing an hypercall or interrupt that was forwarded
* (a.k.a. reflected) to the Hypervisor.
*
* All registers have already been reloaded except the ucall requires:
* R0 = hcall result
* R2 = SRR1, so UV can detect a synthesized interrupt (if any)
* R3 = UV_RETURN
*/
.Lret_to_ultra:
mtcr r1
ld r1,VCPU_GPR(R1)(r3)
ld r0,VCPU_GPR(R3)(r3)
mfspr r2,SPRN_SRR1
LOAD_REG_IMMEDIATE(r3, UV_RETURN)
sc 2
/*
* kvmppc_p9_exit_hcall and kvmppc_p9_exit_interrupt are branched to from
* above if the interrupt was taken for a guest that was entered via
* kvmppc_p9_enter_guest().
*
* The exit code recovers the host stack and vcpu pointer, saves all guest GPRs
* and CR, LR, XER as well as guest MSR and NIA into the VCPU, then re-
* establishes the host stack and registers to return from the
* kvmppc_p9_enter_guest() function, which saves CTR and other guest registers
* (SPRs and FP, VEC, etc).
*/
.balign IFETCH_ALIGN_BYTES
kvmppc_p9_exit_hcall:
mfspr r11,SPRN_SRR0
mfspr r12,SPRN_SRR1
li r10,0xc00
std r10,HSTATE_SCRATCH0(r13)
.balign IFETCH_ALIGN_BYTES
kvmppc_p9_exit_interrupt:
/*
* If set to KVM_GUEST_MODE_HV_P9 but we're still in the
* hypervisor, that means we can't return from the entry stack.
*/
rldicl. r10,r12,64-MSR_HV_LG,63
bne- kvmppc_p9_bad_interrupt
std r1,HSTATE_SCRATCH1(r13)
std r3,HSTATE_SCRATCH2(r13)
ld r1,HSTATE_HOST_R1(r13)
ld r3,HSTATE_KVM_VCPU(r13)
std r9,VCPU_CR(r3)
1:
std r11,VCPU_PC(r3)
std r12,VCPU_MSR(r3)
reg = 14
.rept 18
std reg,__VCPU_GPR(reg)(r3)
reg = reg + 1
.endr
/* r1, r3, r9-r13 are saved to vcpu by C code */
std r0,VCPU_GPR(R0)(r3)
std r2,VCPU_GPR(R2)(r3)
reg = 4
.rept 5
std reg,__VCPU_GPR(reg)(r3)
reg = reg + 1
.endr
ld r2,PACATOC(r13)
mflr r4
std r4,VCPU_LR(r3)
mfspr r4,SPRN_XER
std r4,VCPU_XER(r3)
reg = 14
.rept 18
ld reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
reg = reg + 1
.endr
lwz r4,SFS+8(r1)
mtcr r4
/*
* Flush the link stack here, before executing the first blr on the
* way out of the guest.
*
* The link stack won't match coming out of the guest anyway so the
* only cost is the flush itself. The call clobbers r0.
*/
1: nop
patch_site 1b patch__call_kvm_flush_link_stack_p9
addi r1,r1,SFS
ld r0,PPC_LR_STKOFF(r1)
mtlr r0
blr
/*
* Took an interrupt somewhere right before HRFID to guest, so registers are
* in a bad way. Return things hopefully enough to run host virtual code and
* run the Linux interrupt handler (SRESET or MCE) to print something useful.
*
* We could be really clever and save all host registers in known locations
* before setting HSTATE_IN_GUEST, then restoring them all here, and setting
* return address to a fixup that sets them up again. But that's a lot of
* effort for a small bit of code. Lots of other things to do first.
*/
kvmppc_p9_bad_interrupt:
BEGIN_MMU_FTR_SECTION
/*
* Hash host doesn't try to recover MMU (requires host SLB reload)
*/
b .
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
/*
* Clean up guest registers to give host a chance to run.
*/
li r10,0
mtspr SPRN_AMR,r10
mtspr SPRN_IAMR,r10
mtspr SPRN_CIABR,r10
mtspr SPRN_DAWRX0,r10
BEGIN_FTR_SECTION
mtspr SPRN_DAWRX1,r10
END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
mtspr SPRN_PID,r10
/*
* Switch to host MMU mode
*/
ld r10, HSTATE_KVM_VCPU(r13)
ld r10, VCPU_KVM(r10)
lwz r10, KVM_HOST_LPID(r10)
mtspr SPRN_LPID,r10
ld r10, HSTATE_KVM_VCPU(r13)
ld r10, VCPU_KVM(r10)
ld r10, KVM_HOST_LPCR(r10)
mtspr SPRN_LPCR,r10
/*
* Set GUEST_MODE_NONE so the handler won't branch to KVM, and clear
* MSR_RI in r12 ([H]SRR1) so the handler won't try to return.
*/
li r10,KVM_GUEST_MODE_NONE
stb r10,HSTATE_IN_GUEST(r13)
li r10,MSR_RI
andc r12,r12,r10
/*
* Go back to interrupt handler. MCE and SRESET have their specific
* PACA save area so they should be used directly. They set up their
* own stack. The other handlers all use EXGEN. They will use the
* guest r1 if it looks like a kernel stack, so just load the
* emergency stack and go to program check for all other interrupts.
*/
ld r10,HSTATE_SCRATCH0(r13)
cmpwi r10,BOOK3S_INTERRUPT_MACHINE_CHECK
beq machine_check_common
cmpwi r10,BOOK3S_INTERRUPT_SYSTEM_RESET
beq system_reset_common
b .
#endif
......@@ -391,10 +391,6 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
/* liobn, ioba, tce); */
/* For radix, we might be in virtual mode, so punt */
if (kvm_is_radix(vcpu->kvm))
return H_TOO_HARD;
stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
......@@ -489,10 +485,6 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
bool prereg = false;
struct kvmppc_spapr_tce_iommu_table *stit;
/* For radix, we might be in virtual mode, so punt */
if (kvm_is_radix(vcpu->kvm))
return H_TOO_HARD;
/*
* used to check for invalidations in progress
*/
......@@ -602,10 +594,6 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
long i, ret;
struct kvmppc_spapr_tce_iommu_table *stit;
/* For radix, we might be in virtual mode, so punt */
if (kvm_is_radix(vcpu->kvm))
return H_TOO_HARD;
stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
......
......@@ -103,13 +103,9 @@ static int target_smt_mode;
module_param(target_smt_mode, int, 0644);
MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
static bool indep_threads_mode = true;
module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)");
static bool one_vm_per_core;
module_param(one_vm_per_core, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires indep_threads_mode=N)");
MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires POWER8 or older)");
#ifdef CONFIG_KVM_XICS
static const struct kernel_param_ops module_param_ops = {
......@@ -134,9 +130,6 @@ static inline bool nesting_enabled(struct kvm *kvm)
return kvm->arch.nested_enable && kvm_is_radix(kvm);
}
/* If set, the threads on each CPU core have to be in the same MMU mode */
static bool no_mixing_hpt_and_radix __read_mostly;
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
/*
......@@ -807,7 +800,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
* KVM does not support mflags=2 (AIL=2) and AIL=1 is reserved.
* Keep this in synch with kvmppc_filter_guest_lpcr_hv.
*/
if (mflags != 0 && mflags != 3)
if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
kvmhv_vcpu_is_radix(vcpu) && mflags == 3)
return H_UNSUPPORTED_FLAG_START;
return H_TOO_HARD;
default:
......@@ -899,6 +893,10 @@ static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
* H_SUCCESS if the source vcore wasn't idle (e.g. if it may
* have useful work to do and should not confer) so we don't
* recheck that here.
*
* In the case of the P9 single vcpu per vcore case, the real
* mode handler is not called but no other threads are in the
* source vcore.
*/
spin_lock(&vcore->lock);
......@@ -926,6 +924,7 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
unsigned long req = kvmppc_get_gpr(vcpu, 3);
unsigned long target, ret = H_SUCCESS;
int yield_count;
......@@ -937,11 +936,57 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
return RESUME_HOST;
switch (req) {
case H_REMOVE:
ret = kvmppc_h_remove(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
if (ret == H_TOO_HARD)
return RESUME_HOST;
break;
case H_ENTER:
ret = kvmppc_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6),
kvmppc_get_gpr(vcpu, 7));
if (ret == H_TOO_HARD)
return RESUME_HOST;
break;
case H_READ:
ret = kvmppc_h_read(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5));
if (ret == H_TOO_HARD)
return RESUME_HOST;
break;
case H_CLEAR_MOD:
ret = kvmppc_h_clear_mod(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5));
if (ret == H_TOO_HARD)
return RESUME_HOST;
break;
case H_CLEAR_REF:
ret = kvmppc_h_clear_ref(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5));
if (ret == H_TOO_HARD)
return RESUME_HOST;
break;
case H_PROTECT:
ret = kvmppc_h_protect(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
if (ret == H_TOO_HARD)
return RESUME_HOST;
break;
case H_BULK_REMOVE:
ret = kvmppc_h_bulk_remove(vcpu);
if (ret == H_TOO_HARD)
return RESUME_HOST;
break;
case H_CEDE:
break;
case H_PROD:
target = kvmppc_get_gpr(vcpu, 4);
tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
tvcpu = kvmppc_find_vcpu(kvm, target);
if (!tvcpu) {
ret = H_PARAMETER;
break;
......@@ -955,7 +1000,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
target = kvmppc_get_gpr(vcpu, 4);
if (target == -1)
break;
tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
tvcpu = kvmppc_find_vcpu(kvm, target);
if (!tvcpu) {
ret = H_PARAMETER;
break;
......@@ -971,12 +1016,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
kvmppc_get_gpr(vcpu, 6));
break;
case H_RTAS:
if (list_empty(&vcpu->kvm->arch.rtas_tokens))
if (list_empty(&kvm->arch.rtas_tokens))
return RESUME_HOST;
idx = srcu_read_lock(&vcpu->kvm->srcu);
idx = srcu_read_lock(&kvm->srcu);
rc = kvmppc_rtas_hcall(vcpu);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
srcu_read_unlock(&kvm->srcu, idx);
if (rc == -ENOENT)
return RESUME_HOST;
......@@ -1063,12 +1108,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SET_PARTITION_TABLE:
ret = H_FUNCTION;
if (nesting_enabled(vcpu->kvm))
if (nesting_enabled(kvm))
ret = kvmhv_set_partition_table(vcpu);
break;
case H_ENTER_NESTED:
ret = H_FUNCTION;
if (!nesting_enabled(vcpu->kvm))
if (!nesting_enabled(kvm))
break;
ret = kvmhv_enter_nested_guest(vcpu);
if (ret == H_INTERRUPT) {
......@@ -1083,12 +1128,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
break;
case H_TLB_INVALIDATE:
ret = H_FUNCTION;
if (nesting_enabled(vcpu->kvm))
if (nesting_enabled(kvm))
ret = kvmhv_do_nested_tlbie(vcpu);
break;
case H_COPY_TOFROM_GUEST:
ret = H_FUNCTION;
if (nesting_enabled(vcpu->kvm))
if (nesting_enabled(kvm))
ret = kvmhv_copy_tofrom_guest_nested(vcpu);
break;
case H_PAGE_INIT:
......@@ -1099,7 +1144,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_PAGE_IN:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
ret = kvmppc_h_svm_page_in(vcpu->kvm,
ret = kvmppc_h_svm_page_in(kvm,
kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
......@@ -1107,7 +1152,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_PAGE_OUT:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
ret = kvmppc_h_svm_page_out(vcpu->kvm,
ret = kvmppc_h_svm_page_out(kvm,
kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
......@@ -1115,12 +1160,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_INIT_START:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
ret = kvmppc_h_svm_init_start(vcpu->kvm);
ret = kvmppc_h_svm_init_start(kvm);
break;
case H_SVM_INIT_DONE:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
ret = kvmppc_h_svm_init_done(vcpu->kvm);
ret = kvmppc_h_svm_init_done(kvm);
break;
case H_SVM_INIT_ABORT:
/*
......@@ -1130,24 +1175,26 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
* Instead the kvm->arch.secure_guest flag is checked inside
* kvmppc_h_svm_init_abort().
*/
ret = kvmppc_h_svm_init_abort(vcpu->kvm);
ret = kvmppc_h_svm_init_abort(kvm);
break;
default:
return RESUME_HOST;
}
WARN_ON_ONCE(ret == H_TOO_HARD);
kvmppc_set_gpr(vcpu, 3, ret);
vcpu->arch.hcall_needed = 0;
return RESUME_GUEST;
}
/*
* Handle H_CEDE in the nested virtualization case where we haven't
* called the real-mode hcall handlers in book3s_hv_rmhandlers.S.
* Handle H_CEDE in the P9 path where we don't call the real-mode hcall
* handlers in book3s_hv_rmhandlers.S.
*
* This has to be done early, not in kvmppc_pseries_do_hcall(), so
* that the cede logic in kvmppc_run_single_vcpu() works properly.
*/
static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
static void kvmppc_cede(struct kvm_vcpu *vcpu)
{
vcpu->arch.shregs.msr |= MSR_EE;
vcpu->arch.ceded = 1;
......@@ -1400,13 +1447,39 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
}
case BOOK3S_INTERRUPT_SYSCALL:
{
/* hcall - punt to userspace */
int i;
/* hypercall with MSR_PR has already been handled in rmode,
* and never reaches here.
*/
if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) {
/*
* Guest userspace executed sc 1. This can only be
* reached by the P9 path because the old path
* handles this case in realmode hcall handlers.
*/
if (!kvmhv_vcpu_is_radix(vcpu)) {
/*
* A guest could be running PR KVM, so this
* may be a PR KVM hcall. It must be reflected
* to the guest kernel as a sc interrupt.
*/
kvmppc_core_queue_syscall(vcpu);
} else {
/*
* Radix guests can not run PR KVM or nested HV
* hash guests which might run PR KVM, so this
* is always a privilege fault. Send a program
* check to guest kernel.
*/
kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
}
r = RESUME_GUEST;
break;
}
/*
* hcall - gather args and set exit_reason. This will next be
* handled by kvmppc_pseries_do_hcall which may be able to deal
* with it and resume guest, or may punt to userspace.
*/
run->papr_hcall.nr = kvmppc_get_gpr(vcpu, 3);
for (i = 0; i < 9; ++i)
run->papr_hcall.args[i] = kvmppc_get_gpr(vcpu, 4 + i);
......@@ -1419,20 +1492,102 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
* We get these next two if the guest accesses a page which it thinks
* it has mapped but which is not actually present, either because
* it is for an emulated I/O device or because the corresonding
* host page has been paged out. Any other HDSI/HISI interrupts
* have been handled already.
* host page has been paged out.
*
* Any other HDSI/HISI interrupts have been handled already for P7/8
* guests. For POWER9 hash guests not using rmhandlers, basic hash
* fault handling is done here.
*/
case BOOK3S_INTERRUPT_H_DATA_STORAGE:
r = RESUME_PAGE_FAULT;
case BOOK3S_INTERRUPT_H_DATA_STORAGE: {
unsigned long vsid;
long err;
if (vcpu->arch.fault_dsisr == HDSISR_CANARY) {
r = RESUME_GUEST; /* Just retry if it's the canary */
break;
}
if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
/*
* Radix doesn't require anything, and pre-ISAv3.0 hash
* already attempted to handle this in rmhandlers. The
* hash fault handling below is v3 only (it uses ASDR
* via fault_gpa).
*/
r = RESUME_PAGE_FAULT;
break;
}
if (!(vcpu->arch.fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT))) {
kvmppc_core_queue_data_storage(vcpu,
vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
r = RESUME_GUEST;
break;
}
if (!(vcpu->arch.shregs.msr & MSR_DR))
vsid = vcpu->kvm->arch.vrma_slb_v;
else
vsid = vcpu->arch.fault_gpa;
err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
vsid, vcpu->arch.fault_dsisr, true);
if (err == 0) {
r = RESUME_GUEST;
} else if (err == -1 || err == -2) {
r = RESUME_PAGE_FAULT;
} else {
kvmppc_core_queue_data_storage(vcpu,
vcpu->arch.fault_dar, err);
r = RESUME_GUEST;
}
break;
case BOOK3S_INTERRUPT_H_INST_STORAGE:
}
case BOOK3S_INTERRUPT_H_INST_STORAGE: {
unsigned long vsid;
long err;
vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
vcpu->arch.fault_dsisr = vcpu->arch.shregs.msr &
DSISR_SRR1_MATCH_64S;
if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
r = RESUME_PAGE_FAULT;
if (kvm_is_radix(vcpu->kvm) || !cpu_has_feature(CPU_FTR_ARCH_300)) {
/*
* Radix doesn't require anything, and pre-ISAv3.0 hash
* already attempted to handle this in rmhandlers. The
* hash fault handling below is v3 only (it uses ASDR
* via fault_gpa).
*/
if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
r = RESUME_PAGE_FAULT;
break;
}
if (!(vcpu->arch.fault_dsisr & SRR1_ISI_NOPT)) {
kvmppc_core_queue_inst_storage(vcpu,
vcpu->arch.fault_dsisr);
r = RESUME_GUEST;
break;
}
if (!(vcpu->arch.shregs.msr & MSR_IR))
vsid = vcpu->kvm->arch.vrma_slb_v;
else
vsid = vcpu->arch.fault_gpa;
err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
vsid, vcpu->arch.fault_dsisr, false);
if (err == 0) {
r = RESUME_GUEST;
} else if (err == -1) {
r = RESUME_PAGE_FAULT;
} else {
kvmppc_core_queue_inst_storage(vcpu, err);
r = RESUME_GUEST;
}
break;
}
/*
* This occurs if the guest executes an illegal instruction.
* If the guest debug is disabled, generate a program interrupt
......@@ -1654,6 +1809,14 @@ unsigned long kvmppc_filter_lpcr_hv(struct kvm *kvm, unsigned long lpcr)
lpcr &= ~LPCR_AIL;
if ((lpcr & LPCR_AIL) != LPCR_AIL_3)
lpcr &= ~LPCR_AIL; /* LPCR[AIL]=1/2 is disallowed */
/*
* On some POWER9s we force AIL off for radix guests to prevent
* executing in MSR[HV]=1 mode with the MMU enabled and PIDR set to
* guest, which can result in Q0 translations with LPID=0 PID=PIDR to
* be cached, which the host TLB management does not expect.
*/
if (kvm_is_radix(kvm) && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
lpcr &= ~LPCR_AIL;
/*
* On POWER9, allow userspace to enable large decrementer for the
......@@ -2233,7 +2396,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
*/
static int threads_per_vcore(struct kvm *kvm)
{
if (kvm->arch.threads_indep)
if (cpu_has_feature(CPU_FTR_ARCH_300))
return 1;
return threads_per_subcore;
}
......@@ -2967,9 +3130,6 @@ static void prepare_threads(struct kvmppc_vcore *vc)
for_each_runnable_thread(i, vcpu, vc) {
if (signal_pending(vcpu->arch.run_task))
vcpu->arch.ret = -EINTR;
else if (no_mixing_hpt_and_radix &&
kvm_is_radix(vc->kvm) != radix_enabled())
vcpu->arch.ret = -EINVAL;
else if (vcpu->arch.vpa.update_pending ||
vcpu->arch.slb_shadow.update_pending ||
vcpu->arch.dtl.update_pending)
......@@ -3176,6 +3336,9 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
int trap;
bool is_power8;
if (WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)))
return;
/*
* Remove from the list any threads that have a signal pending
* or need a VPA update done
......@@ -3203,9 +3366,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
* Make sure we are running on primary threads, and that secondary
* threads are offline. Also check if the number of threads in this
* guest are greater than the current system threads per guest.
* On POWER9, we need to be not in independent-threads mode if
* this is a HPT guest on a radix host machine where the
* CPU threads may not be in different MMU modes.
*/
if ((controlled_threads > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
......@@ -3229,18 +3389,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
if (vc->num_threads < target_threads)
collect_piggybacks(&core_info, target_threads);
/*
* On radix, arrange for TLB flushing if necessary.
* This has to be done before disabling interrupts since
* it uses smp_call_function().
*/
pcpu = smp_processor_id();
if (kvm_is_radix(vc->kvm)) {
for (sub = 0; sub < core_info.n_subcores; ++sub)
for_each_runnable_thread(i, vcpu, core_info.vc[sub])
kvmppc_prepare_radix_vcpu(vcpu, pcpu);
}
/*
* Hard-disable interrupts, and check resched flag and signals.
* If we need to reschedule or deliver a signal, clean up
......@@ -3273,8 +3421,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
cmd_bit = stat_bit = 0;
split = core_info.n_subcores;
sip = NULL;
is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
&& !cpu_has_feature(CPU_FTR_ARCH_300);
is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S);
if (split > 1) {
sip = &split_info;
......@@ -3478,184 +3625,113 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
trace_kvmppc_run_core(vc, 1);
}
/*
* Load up hypervisor-mode registers on P9.
*/
static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long lpcr)
static void load_spr_state(struct kvm_vcpu *vcpu)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
s64 hdec;
u64 tb, purr, spurr;
int trap;
unsigned long host_hfscr = mfspr(SPRN_HFSCR);
unsigned long host_ciabr = mfspr(SPRN_CIABR);
unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
unsigned long host_psscr = mfspr(SPRN_PSSCR);
unsigned long host_pidr = mfspr(SPRN_PID);
unsigned long host_dawr1 = 0;
unsigned long host_dawrx1 = 0;
if (cpu_has_feature(CPU_FTR_DAWR1)) {
host_dawr1 = mfspr(SPRN_DAWR1);
host_dawrx1 = mfspr(SPRN_DAWRX1);
}
mtspr(SPRN_DSCR, vcpu->arch.dscr);
mtspr(SPRN_IAMR, vcpu->arch.iamr);
mtspr(SPRN_PSPB, vcpu->arch.pspb);
mtspr(SPRN_FSCR, vcpu->arch.fscr);
mtspr(SPRN_TAR, vcpu->arch.tar);
mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
mtspr(SPRN_BESCR, vcpu->arch.bescr);
mtspr(SPRN_WORT, vcpu->arch.wort);
mtspr(SPRN_TIDR, vcpu->arch.tid);
mtspr(SPRN_AMR, vcpu->arch.amr);
mtspr(SPRN_UAMOR, vcpu->arch.uamor);
/*
* P8 and P9 suppress the HDEC exception when LPCR[HDICE] = 0,
* so set HDICE before writing HDEC.
* DAR, DSISR, and for nested HV, SPRGs must be set with MSR[RI]
* clear (or hstate set appropriately to catch those registers
* being clobbered if we take a MCE or SRESET), so those are done
* later.
*/
mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr | LPCR_HDICE);
isync();
hdec = time_limit - mftb();
if (hdec < 0) {
mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
isync();
return BOOK3S_INTERRUPT_HV_DECREMENTER;
}
mtspr(SPRN_HDEC, hdec);
if (vc->tb_offset) {
u64 new_tb = mftb() + vc->tb_offset;
mtspr(SPRN_TBU40, new_tb);
tb = mftb();
if ((tb & 0xffffff) < (new_tb & 0xffffff))
mtspr(SPRN_TBU40, new_tb + 0x1000000);
vc->tb_offset_applied = vc->tb_offset;
}
if (vc->pcr)
mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
mtspr(SPRN_DPDES, vc->dpdes);
mtspr(SPRN_VTB, vc->vtb);
local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
mtspr(SPRN_PURR, vcpu->arch.purr);
mtspr(SPRN_SPURR, vcpu->arch.spurr);
if (dawr_enabled()) {
mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
if (cpu_has_feature(CPU_FTR_DAWR1)) {
mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
}
}
mtspr(SPRN_CIABR, vcpu->arch.ciabr);
mtspr(SPRN_IC, vcpu->arch.ic);
mtspr(SPRN_PID, vcpu->arch.pid);
mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
mtspr(SPRN_AMOR, ~0UL);
mtspr(SPRN_LPCR, lpcr);
isync();
kvmppc_xive_push_vcpu(vcpu);
mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
trap = __kvmhv_vcpu_entry_p9(vcpu);
/* Advance host PURR/SPURR by the amount used by guest */
purr = mfspr(SPRN_PURR);
spurr = mfspr(SPRN_SPURR);
mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
purr - vcpu->arch.purr);
mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
spurr - vcpu->arch.spurr);
vcpu->arch.purr = purr;
vcpu->arch.spurr = spurr;
if (!(vcpu->arch.ctrl & 1))
mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
}
vcpu->arch.ic = mfspr(SPRN_IC);
vcpu->arch.pid = mfspr(SPRN_PID);
vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
static void store_spr_state(struct kvm_vcpu *vcpu)
{
vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
vcpu->arch.iamr = mfspr(SPRN_IAMR);
vcpu->arch.pspb = mfspr(SPRN_PSPB);
vcpu->arch.fscr = mfspr(SPRN_FSCR);
vcpu->arch.tar = mfspr(SPRN_TAR);
vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
vcpu->arch.bescr = mfspr(SPRN_BESCR);
vcpu->arch.wort = mfspr(SPRN_WORT);
vcpu->arch.tid = mfspr(SPRN_TIDR);
vcpu->arch.amr = mfspr(SPRN_AMR);
vcpu->arch.uamor = mfspr(SPRN_UAMOR);
vcpu->arch.dscr = mfspr(SPRN_DSCR);
}
/* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
mtspr(SPRN_PSSCR, host_psscr |
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
mtspr(SPRN_HFSCR, host_hfscr);
mtspr(SPRN_CIABR, host_ciabr);
mtspr(SPRN_DAWR0, host_dawr0);
mtspr(SPRN_DAWRX0, host_dawrx0);
if (cpu_has_feature(CPU_FTR_DAWR1)) {
mtspr(SPRN_DAWR1, host_dawr1);
mtspr(SPRN_DAWRX1, host_dawrx1);
}
mtspr(SPRN_PID, host_pidr);
/*
* Privileged (non-hypervisor) host registers to save.
*/
struct p9_host_os_sprs {
unsigned long dscr;
unsigned long tidr;
unsigned long iamr;
unsigned long amr;
unsigned long fscr;
};
/*
* Since this is radix, do a eieio; tlbsync; ptesync sequence in
* case we interrupted the guest between a tlbie and a ptesync.
*/
asm volatile("eieio; tlbsync; ptesync");
static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
{
host_os_sprs->dscr = mfspr(SPRN_DSCR);
host_os_sprs->tidr = mfspr(SPRN_TIDR);
host_os_sprs->iamr = mfspr(SPRN_IAMR);
host_os_sprs->amr = mfspr(SPRN_AMR);
host_os_sprs->fscr = mfspr(SPRN_FSCR);
}
/*
* cp_abort is required if the processor supports local copy-paste
* to clear the copy buffer that was under control of the guest.
*/
if (cpu_has_feature(CPU_FTR_ARCH_31))
asm volatile(PPC_CP_ABORT);
/* vcpu guest regs must already be saved */
static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
struct p9_host_os_sprs *host_os_sprs)
{
mtspr(SPRN_PSPB, 0);
mtspr(SPRN_WORT, 0);
mtspr(SPRN_UAMOR, 0);
mtspr(SPRN_LPID, vcpu->kvm->arch.host_lpid); /* restore host LPID */
isync();
mtspr(SPRN_DSCR, host_os_sprs->dscr);
mtspr(SPRN_TIDR, host_os_sprs->tidr);
mtspr(SPRN_IAMR, host_os_sprs->iamr);
vc->dpdes = mfspr(SPRN_DPDES);
vc->vtb = mfspr(SPRN_VTB);
mtspr(SPRN_DPDES, 0);
if (vc->pcr)
mtspr(SPRN_PCR, PCR_MASK);
if (host_os_sprs->amr != vcpu->arch.amr)
mtspr(SPRN_AMR, host_os_sprs->amr);
if (vc->tb_offset_applied) {
u64 new_tb = mftb() - vc->tb_offset_applied;
mtspr(SPRN_TBU40, new_tb);
tb = mftb();
if ((tb & 0xffffff) < (new_tb & 0xffffff))
mtspr(SPRN_TBU40, new_tb + 0x1000000);
vc->tb_offset_applied = 0;
}
if (host_os_sprs->fscr != vcpu->arch.fscr)
mtspr(SPRN_FSCR, host_os_sprs->fscr);
mtspr(SPRN_HDEC, 0x7fffffff);
mtspr(SPRN_LPCR, vcpu->kvm->arch.host_lpcr);
/* Save guest CTRL register, set runlatch to 1 */
if (!(vcpu->arch.ctrl & 1))
mtspr(SPRN_CTRLT, 1);
}
return trap;
static inline bool hcall_is_xics(unsigned long req)
{
return req == H_EOI || req == H_CPPR || req == H_IPI ||
req == H_IPOLL || req == H_XIRR || req == H_XIRR_X;
}
/*
* Virtual-mode guest entry for POWER9 and later when the host and
* guest are both using the radix MMU. The LPIDR has already been set.
* Guest entry for POWER9 and later CPUs.
*/
static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long lpcr)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long host_dscr = mfspr(SPRN_DSCR);
unsigned long host_tidr = mfspr(SPRN_TIDR);
unsigned long host_iamr = mfspr(SPRN_IAMR);
unsigned long host_amr = mfspr(SPRN_AMR);
unsigned long host_fscr = mfspr(SPRN_FSCR);
struct p9_host_os_sprs host_os_sprs;
s64 dec;
u64 tb;
int trap, save_pmu;
WARN_ON_ONCE(vcpu->arch.ceded);
dec = mfspr(SPRN_DEC);
tb = mftb();
if (dec < 0)
......@@ -3664,7 +3740,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
if (local_paca->kvm_hstate.dec_expires < time_limit)
time_limit = local_paca->kvm_hstate.dec_expires;
vcpu->arch.ceded = 0;
save_p9_host_os_sprs(&host_os_sprs);
kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */
......@@ -3693,24 +3769,20 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
#endif
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
mtspr(SPRN_DSCR, vcpu->arch.dscr);
mtspr(SPRN_IAMR, vcpu->arch.iamr);
mtspr(SPRN_PSPB, vcpu->arch.pspb);
mtspr(SPRN_FSCR, vcpu->arch.fscr);
mtspr(SPRN_TAR, vcpu->arch.tar);
mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
mtspr(SPRN_BESCR, vcpu->arch.bescr);
mtspr(SPRN_WORT, vcpu->arch.wort);
mtspr(SPRN_TIDR, vcpu->arch.tid);
mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
mtspr(SPRN_AMR, vcpu->arch.amr);
mtspr(SPRN_UAMOR, vcpu->arch.uamor);
if (!(vcpu->arch.ctrl & 1))
mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
load_spr_state(vcpu);
/*
* When setting DEC, we must always deal with irq_work_raise via NMI vs
* setting DEC. The problem occurs right as we switch into guest mode
* if a NMI hits and sets pending work and sets DEC, then that will
* apply to the guest and not bring us back to the host.
*
* irq_work_raise could check a flag (or possibly LPCR[HDICE] for
* example) and set HDEC to 1? That wouldn't solve the nested hv
* case which needs to abort the hcall or zero the time limit.
*
* XXX: Another day's problem.
*/
mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
if (kvmhv_on_pseries()) {
......@@ -3718,7 +3790,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
* We need to save and restore the guest visible part of the
* psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
* doesn't do this for us. Note only required if pseries since
* this is done in kvmhv_load_hv_regs_and_go() below otherwise.
* this is done in kvmhv_vcpu_entry_p9() below otherwise.
*/
unsigned long host_psscr;
/* call our hypervisor to load up HV regs and go */
......@@ -3738,6 +3810,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
hvregs.vcpu_token = vcpu->vcpu_id;
}
hvregs.hdec_expiry = time_limit;
mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
__pa(&vcpu->arch.regs));
kvmhv_restore_hv_return_state(vcpu, &hvregs);
......@@ -3750,15 +3824,41 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
/* H_CEDE has to be handled now, not later */
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
kvmppc_nested_cede(vcpu);
kvmppc_cede(vcpu);
kvmppc_set_gpr(vcpu, 3, 0);
trap = 0;
}
} else {
trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
kvmppc_xive_push_vcpu(vcpu);
trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr);
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
!(vcpu->arch.shregs.msr & MSR_PR)) {
unsigned long req = kvmppc_get_gpr(vcpu, 3);
/* H_CEDE has to be handled now, not later */
if (req == H_CEDE) {
kvmppc_cede(vcpu);
kvmppc_xive_rearm_escalation(vcpu); /* may un-cede */
kvmppc_set_gpr(vcpu, 3, 0);
trap = 0;
/* XICS hcalls must be handled before xive is pulled */
} else if (hcall_is_xics(req)) {
int ret;
ret = kvmppc_xive_xics_hcall(vcpu, req);
if (ret != H_TOO_HARD) {
kvmppc_set_gpr(vcpu, 3, ret);
trap = 0;
}
}
}
kvmppc_xive_pull_vcpu(vcpu);
if (kvm_is_radix(vcpu->kvm))
vcpu->arch.slb_max = 0;
}
vcpu->arch.slb_max = 0;
dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
dec = (s32) dec;
......@@ -3766,36 +3866,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.dec_expires = dec + tb;
vcpu->cpu = -1;
vcpu->arch.thread_cpu = -1;
/* Save guest CTRL register, set runlatch to 1 */
vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
if (!(vcpu->arch.ctrl & 1))
mtspr(SPRN_CTRLT, vcpu->arch.ctrl | 1);
vcpu->arch.iamr = mfspr(SPRN_IAMR);
vcpu->arch.pspb = mfspr(SPRN_PSPB);
vcpu->arch.fscr = mfspr(SPRN_FSCR);
vcpu->arch.tar = mfspr(SPRN_TAR);
vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
vcpu->arch.bescr = mfspr(SPRN_BESCR);
vcpu->arch.wort = mfspr(SPRN_WORT);
vcpu->arch.tid = mfspr(SPRN_TIDR);
vcpu->arch.amr = mfspr(SPRN_AMR);
vcpu->arch.uamor = mfspr(SPRN_UAMOR);
vcpu->arch.dscr = mfspr(SPRN_DSCR);
mtspr(SPRN_PSPB, 0);
mtspr(SPRN_WORT, 0);
mtspr(SPRN_UAMOR, 0);
mtspr(SPRN_DSCR, host_dscr);
mtspr(SPRN_TIDR, host_tidr);
mtspr(SPRN_IAMR, host_iamr);
store_spr_state(vcpu);
if (host_amr != vcpu->arch.amr)
mtspr(SPRN_AMR, host_amr);
if (host_fscr != vcpu->arch.fscr)
mtspr(SPRN_FSCR, host_fscr);
restore_p9_host_os_sprs(vcpu, &host_os_sprs);
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
store_fp_state(&vcpu->arch.fp);
......@@ -3825,6 +3899,9 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vc->in_guest = 0;
mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
/* We may have raced with new irq work */
if (test_irq_work_pending())
set_dec(1);
mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
kvmhv_load_host_pmu();
......@@ -4014,7 +4091,6 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
/*
* This never fails for a radix guest, as none of the operations it does
* for a radix guest can fail or have a way to report failure.
* kvmhv_run_single_vcpu() relies on this fact.
*/
static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
{
......@@ -4170,7 +4246,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
{
struct kvm_run *run = vcpu->run;
int trap, r, pcpu;
int srcu_idx, lpid;
int srcu_idx;
struct kvmppc_vcore *vc;
struct kvm *kvm = vcpu->kvm;
struct kvm_nested_guest *nested = vcpu->arch.nested;
......@@ -4193,8 +4269,15 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
vc->runner = vcpu;
/* See if the MMU is ready to go */
if (!kvm->arch.mmu_ready)
kvmhv_setup_mmu(vcpu);
if (!kvm->arch.mmu_ready) {
r = kvmhv_setup_mmu(vcpu);
if (r) {
run->exit_reason = KVM_EXIT_FAIL_ENTRY;
run->fail_entry.hardware_entry_failure_reason = 0;
vcpu->arch.ret = r;
return r;
}
}
if (need_resched())
cond_resched();
......@@ -4207,7 +4290,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
preempt_disable();
pcpu = smp_processor_id();
vc->pcpu = pcpu;
kvmppc_prepare_radix_vcpu(vcpu, pcpu);
if (kvm_is_radix(kvm))
kvmppc_prepare_radix_vcpu(vcpu, pcpu);
local_irq_disable();
hard_irq_disable();
......@@ -4244,13 +4328,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
vc->vcore_state = VCORE_RUNNING;
trace_kvmppc_run_core(vc, 0);
if (cpu_has_feature(CPU_FTR_HVMODE)) {
lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
mtspr(SPRN_LPID, lpid);
isync();
kvmppc_check_need_tlb_flush(kvm, pcpu, nested);
}
guest_enter_irqoff();
srcu_idx = srcu_read_lock(&kvm->srcu);
......@@ -4269,11 +4346,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
srcu_read_unlock(&kvm->srcu, srcu_idx);
if (cpu_has_feature(CPU_FTR_HVMODE)) {
mtspr(SPRN_LPID, kvm->arch.host_lpid);
isync();
}
set_irq_happened(trap);
kvmppc_set_host_core(pcpu);
......@@ -4419,19 +4491,23 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
do {
/*
* The TLB prefetch bug fixup is only in the kvmppc_run_vcpu
* path, which also handles hash and dependent threads mode.
*/
if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
if (cpu_has_feature(CPU_FTR_ARCH_300))
r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
vcpu->arch.vcore->lpcr);
else
r = kvmppc_run_vcpu(vcpu);
if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
!(vcpu->arch.shregs.msr & MSR_PR)) {
if (run->exit_reason == KVM_EXIT_PAPR_HCALL) {
if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_PR)) {
/*
* These should have been caught reflected
* into the guest by now. Final sanity check:
* don't allow userspace to execute hcalls in
* the hypervisor.
*/
r = RESUME_GUEST;
continue;
}
trace_kvm_hcall_enter(vcpu);
r = kvmppc_pseries_do_hcall(vcpu);
trace_kvm_hcall_exit(vcpu, r);
......@@ -5038,18 +5114,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
/*
* Track that we now have a HV mode VM active. This blocks secondary
* CPU threads from coming online.
* On POWER9, we only need to do this if the "indep_threads_mode"
* module parameter has been set to N.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n");
kvm->arch.threads_indep = true;
} else {
kvm->arch.threads_indep = indep_threads_mode;
}
}
if (!kvm->arch.threads_indep)
if (!cpu_has_feature(CPU_FTR_ARCH_300))
kvm_hv_vm_activated();
/*
......@@ -5090,7 +5156,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
{
debugfs_remove_recursive(kvm->arch.debugfs_dir);
if (!kvm->arch.threads_indep)
if (!cpu_has_feature(CPU_FTR_ARCH_300))
kvm_hv_vm_deactivated();
kvmppc_free_vcores(kvm);
......@@ -5511,7 +5577,9 @@ static int kvmhv_enable_nested(struct kvm *kvm)
{
if (!nested)
return -EPERM;
if (!cpu_has_feature(CPU_FTR_ARCH_300) || no_mixing_hpt_and_radix)
if (!cpu_has_feature(CPU_FTR_ARCH_300))
return -ENODEV;
if (!radix_enabled())
return -ENODEV;
/* kvm == NULL means the caller is testing if the capability exists */
......@@ -5674,11 +5742,25 @@ static int kvmhv_enable_dawr1(struct kvm *kvm)
static bool kvmppc_hash_v3_possible(void)
{
if (radix_enabled() && no_mixing_hpt_and_radix)
if (!cpu_has_feature(CPU_FTR_ARCH_300))
return false;
return cpu_has_feature(CPU_FTR_ARCH_300) &&
cpu_has_feature(CPU_FTR_HVMODE);
if (!cpu_has_feature(CPU_FTR_HVMODE))
return false;
/*
* POWER9 chips before version 2.02 can't have some threads in
* HPT mode and some in radix mode on the same core.
*/
if (radix_enabled()) {
unsigned int pvr = mfspr(SPRN_PVR);
if ((pvr >> 16) == PVR_POWER9 &&
(((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
return false;
}
return true;
}
static struct kvmppc_ops kvm_ops_hv = {
......@@ -5822,18 +5904,6 @@ static int kvmppc_book3s_init_hv(void)
if (kvmppc_radix_possible())
r = kvmppc_radix_init();
/*
* POWER9 chips before version 2.02 can't have some threads in
* HPT mode and some in radix mode on the same core.
*/
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
unsigned int pvr = mfspr(SPRN_PVR);
if ((pvr >> 16) == PVR_POWER9 &&
(((pvr & 0xe000) == 0 && (pvr & 0xfff) < 0x202) ||
((pvr & 0xe000) == 0x2000 && (pvr & 0xfff) < 0x101)))
no_mixing_hpt_and_radix = true;
}
r = kvmppc_uvmem_init();
if (r < 0)
pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r);
......
......@@ -34,21 +34,6 @@
#include "book3s_xics.h"
#include "book3s_xive.h"
/*
* The XIVE module will populate these when it loads
*/
unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr);
int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
EXPORT_SYMBOL_GPL(__xive_vm_h_xirr);
EXPORT_SYMBOL_GPL(__xive_vm_h_ipoll);
EXPORT_SYMBOL_GPL(__xive_vm_h_ipi);
EXPORT_SYMBOL_GPL(__xive_vm_h_cppr);
EXPORT_SYMBOL_GPL(__xive_vm_h_eoi);
/*
* Hash page table alignment on newer cpus(CPU_FTR_ARCH_206)
* should be power of 2.
......@@ -196,16 +181,9 @@ int kvmppc_hwrng_present(void)
}
EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
long kvmppc_h_random(struct kvm_vcpu *vcpu)
long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
{
int r;
/* Only need to do the expensive mfmsr() on radix */
if (kvm_is_radix(vcpu->kvm) && (mfmsr() & MSR_IR))
r = powernv_get_random_long(&vcpu->arch.regs.gpr[4]);
else
r = powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]);
if (r)
if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
return H_SUCCESS;
return H_HARDWARE;
......@@ -221,15 +199,6 @@ void kvmhv_rm_send_ipi(int cpu)
void __iomem *xics_phys;
unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
/* For a nested hypervisor, use the XICS via hcall */
if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
plpar_hcall_raw(H_IPI, retbuf, get_hard_smp_processor_id(cpu),
IPI_PRIORITY);
return;
}
/* On POWER9 we can use msgsnd for any destination cpu. */
if (cpu_has_feature(CPU_FTR_ARCH_300)) {
msg |= get_hard_smp_processor_id(cpu);
......@@ -442,19 +411,12 @@ static long kvmppc_read_one_intr(bool *again)
return 1;
/* Now read the interrupt from the ICP */
if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
rc = plpar_hcall_raw(H_XIRR, retbuf, 0xFF);
xirr = cpu_to_be32(retbuf[0]);
} else {
xics_phys = local_paca->kvm_hstate.xics_phys;
rc = 0;
if (!xics_phys)
rc = opal_int_get_xirr(&xirr, false);
else
xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
}
xics_phys = local_paca->kvm_hstate.xics_phys;
rc = 0;
if (!xics_phys)
rc = opal_int_get_xirr(&xirr, false);
else
xirr = __raw_rm_readl(xics_phys + XICS_XIRR);
if (rc < 0)
return 1;
......@@ -483,13 +445,7 @@ static long kvmppc_read_one_intr(bool *again)
*/
if (xisr == XICS_IPI) {
rc = 0;
if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
plpar_hcall_raw(H_IPI, retbuf,
hard_smp_processor_id(), 0xff);
plpar_hcall_raw(H_EOI, retbuf, h_xirr);
} else if (xics_phys) {
if (xics_phys) {
__raw_rm_writeb(0xff, xics_phys + XICS_MFRR);
__raw_rm_writel(xirr, xics_phys + XICS_XIRR);
} else {
......@@ -515,13 +471,7 @@ static long kvmppc_read_one_intr(bool *again)
/* We raced with the host,
* we need to resend that IPI, bummer
*/
if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
plpar_hcall_raw(H_IPI, retbuf,
hard_smp_processor_id(),
IPI_PRIORITY);
} else if (xics_phys)
if (xics_phys)
__raw_rm_writeb(IPI_PRIORITY,
xics_phys + XICS_MFRR);
else
......@@ -541,22 +491,13 @@ static long kvmppc_read_one_intr(bool *again)
}
#ifdef CONFIG_KVM_XICS
static inline bool is_rm(void)
{
return !(mfmsr() & MSR_DR);
}
unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
if (xics_on_xive()) {
if (is_rm())
return xive_rm_h_xirr(vcpu);
if (unlikely(!__xive_vm_h_xirr))
return H_NOT_AVAILABLE;
return __xive_vm_h_xirr(vcpu);
} else
if (xics_on_xive())
return xive_rm_h_xirr(vcpu);
else
return xics_rm_h_xirr(vcpu);
}
......@@ -565,13 +506,9 @@ unsigned long kvmppc_rm_h_xirr_x(struct kvm_vcpu *vcpu)
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
vcpu->arch.regs.gpr[5] = get_tb();
if (xics_on_xive()) {
if (is_rm())
return xive_rm_h_xirr(vcpu);
if (unlikely(!__xive_vm_h_xirr))
return H_NOT_AVAILABLE;
return __xive_vm_h_xirr(vcpu);
} else
if (xics_on_xive())
return xive_rm_h_xirr(vcpu);
else
return xics_rm_h_xirr(vcpu);
}
......@@ -579,13 +516,9 @@ unsigned long kvmppc_rm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
if (xics_on_xive()) {
if (is_rm())
return xive_rm_h_ipoll(vcpu, server);
if (unlikely(!__xive_vm_h_ipoll))
return H_NOT_AVAILABLE;
return __xive_vm_h_ipoll(vcpu, server);
} else
if (xics_on_xive())
return xive_rm_h_ipoll(vcpu, server);
else
return H_TOO_HARD;
}
......@@ -594,13 +527,9 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
if (xics_on_xive()) {
if (is_rm())
return xive_rm_h_ipi(vcpu, server, mfrr);
if (unlikely(!__xive_vm_h_ipi))
return H_NOT_AVAILABLE;
return __xive_vm_h_ipi(vcpu, server, mfrr);
} else
if (xics_on_xive())
return xive_rm_h_ipi(vcpu, server, mfrr);
else
return xics_rm_h_ipi(vcpu, server, mfrr);
}
......@@ -608,13 +537,9 @@ int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
if (xics_on_xive()) {
if (is_rm())
return xive_rm_h_cppr(vcpu, cppr);
if (unlikely(!__xive_vm_h_cppr))
return H_NOT_AVAILABLE;
return __xive_vm_h_cppr(vcpu, cppr);
} else
if (xics_on_xive())
return xive_rm_h_cppr(vcpu, cppr);
else
return xics_rm_h_cppr(vcpu, cppr);
}
......@@ -622,13 +547,9 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
{
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
if (xics_on_xive()) {
if (is_rm())
return xive_rm_h_eoi(vcpu, xirr);
if (unlikely(!__xive_vm_h_eoi))
return H_NOT_AVAILABLE;
return __xive_vm_h_eoi(vcpu, xirr);
} else
if (xics_on_xive())
return xive_rm_h_eoi(vcpu, xirr);
else
return xics_rm_h_eoi(vcpu, xirr);
}
#endif /* CONFIG_KVM_XICS */
......
......@@ -58,7 +58,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/*
* Put whatever is in the decrementer into the
* hypervisor decrementer.
* Because of a hardware deviation in P8 and P9,
* Because of a hardware deviation in P8,
* we need to set LPCR[HDICE] before writing HDEC.
*/
ld r5, HSTATE_KVM_VCORE(r13)
......@@ -67,15 +67,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
ori r8, r9, LPCR_HDICE
mtspr SPRN_LPCR, r8
isync
andis. r0, r9, LPCR_LD@h
mfspr r8,SPRN_DEC
mftb r7
BEGIN_FTR_SECTION
/* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
bne 32f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r8,r8
32: mtspr SPRN_HDEC,r8
mtspr SPRN_HDEC,r8
add r8,r8,r7
std r8,HSTATE_DECEXP(r13)
......
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/kernel.h>
#include <linux/kvm_host.h>
#include <asm/asm-prototypes.h>
#include <asm/dbell.h>
#include <asm/kvm_ppc.h>
#include <asm/ppc-opcode.h>
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
u64 tb = mftb() - vc->tb_offset_applied;
vcpu->arch.cur_activity = next;
vcpu->arch.cur_tb_start = tb;
}
static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
struct kvmhv_tb_accumulator *curr;
u64 tb = mftb() - vc->tb_offset_applied;
u64 prev_tb;
u64 delta;
u64 seq;
curr = vcpu->arch.cur_activity;
vcpu->arch.cur_activity = next;
prev_tb = vcpu->arch.cur_tb_start;
vcpu->arch.cur_tb_start = tb;
if (!curr)
return;
delta = tb - prev_tb;
seq = curr->seqcount;
curr->seqcount = seq + 1;
smp_wmb();
curr->tb_total += delta;
if (seq == 0 || delta < curr->tb_min)
curr->tb_min = delta;
if (delta > curr->tb_max)
curr->tb_max = delta;
smp_wmb();
curr->seqcount = seq + 2;
}
#define start_timing(vcpu, next) __start_timing(vcpu, next)
#define end_timing(vcpu) __start_timing(vcpu, NULL)
#define accumulate_time(vcpu, next) __accumulate_time(vcpu, next)
#else
#define start_timing(vcpu, next) do {} while (0)
#define end_timing(vcpu) do {} while (0)
#define accumulate_time(vcpu, next) do {} while (0)
#endif
static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev)
{
asm volatile("slbmfev %0,%1" : "=r" (*slbev) : "r" (idx));
asm volatile("slbmfee %0,%1" : "=r" (*slbee) : "r" (idx));
}
static inline void mtslb(u64 slbee, u64 slbev)
{
asm volatile("slbmte %0,%1" :: "r" (slbev), "r" (slbee));
}
static inline void clear_slb_entry(unsigned int idx)
{
mtslb(idx, 0);
}
static inline void slb_clear_invalidate_partition(void)
{
clear_slb_entry(0);
asm volatile(PPC_SLBIA(6));
}
/*
* Malicious or buggy radix guests may have inserted SLB entries
* (only 0..3 because radix always runs with UPRT=1), so these must
* be cleared here to avoid side-channels. slbmte is used rather
* than slbia, as it won't clear cached translations.
*/
static void radix_clear_slb(void)
{
int i;
for (i = 0; i < 4; i++)
clear_slb_entry(i);
}
static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
{
struct kvm_nested_guest *nested = vcpu->arch.nested;
u32 lpid;
lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
/*
* All the isync()s are overkill but trivially follow the ISA
* requirements. Some can likely be replaced with justification
* comment for why they are not needed.
*/
isync();
mtspr(SPRN_LPID, lpid);
isync();
mtspr(SPRN_LPCR, lpcr);
isync();
mtspr(SPRN_PID, vcpu->arch.pid);
isync();
}
static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
{
u32 lpid;
int i;
lpid = kvm->arch.lpid;
mtspr(SPRN_LPID, lpid);
mtspr(SPRN_LPCR, lpcr);
mtspr(SPRN_PID, vcpu->arch.pid);
for (i = 0; i < vcpu->arch.slb_max; i++)
mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv);
isync();
}
static void switch_mmu_to_host(struct kvm *kvm, u32 pid)
{
isync();
mtspr(SPRN_PID, pid);
isync();
mtspr(SPRN_LPID, kvm->arch.host_lpid);
isync();
mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
isync();
if (!radix_enabled())
slb_restore_bolted_realmode();
}
static void save_clear_host_mmu(struct kvm *kvm)
{
if (!radix_enabled()) {
/*
* Hash host could save and restore host SLB entries to
* reduce SLB fault overheads of VM exits, but for now the
* existing code clears all entries and restores just the
* bolted ones when switching back to host.
*/
slb_clear_invalidate_partition();
}
}
static void save_clear_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
{
if (kvm_is_radix(kvm)) {
radix_clear_slb();
} else {
int i;
int nr = 0;
/*
* This must run before switching to host (radix host can't
* access all SLBs).
*/
for (i = 0; i < vcpu->arch.slb_nr; i++) {
u64 slbee, slbev;
mfslb(i, &slbee, &slbev);
if (slbee & SLB_ESID_V) {
vcpu->arch.slb[nr].orige = slbee | i;
vcpu->arch.slb[nr].origv = slbev;
nr++;
}
}
vcpu->arch.slb_max = nr;
slb_clear_invalidate_partition();
}
}
int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr)
{
struct kvm *kvm = vcpu->kvm;
struct kvm_nested_guest *nested = vcpu->arch.nested;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
s64 hdec;
u64 tb, purr, spurr;
u64 *exsave;
bool ri_set;
int trap;
unsigned long msr;
unsigned long host_hfscr;
unsigned long host_ciabr;
unsigned long host_dawr0;
unsigned long host_dawrx0;
unsigned long host_psscr;
unsigned long host_pidr;
unsigned long host_dawr1;
unsigned long host_dawrx1;
hdec = time_limit - mftb();
if (hdec < 0)
return BOOK3S_INTERRUPT_HV_DECREMENTER;
WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
start_timing(vcpu, &vcpu->arch.rm_entry);
vcpu->arch.ceded = 0;
if (vc->tb_offset) {
u64 new_tb = mftb() + vc->tb_offset;
mtspr(SPRN_TBU40, new_tb);
tb = mftb();
if ((tb & 0xffffff) < (new_tb & 0xffffff))
mtspr(SPRN_TBU40, new_tb + 0x1000000);
vc->tb_offset_applied = vc->tb_offset;
}
msr = mfmsr();
host_hfscr = mfspr(SPRN_HFSCR);
host_ciabr = mfspr(SPRN_CIABR);
host_dawr0 = mfspr(SPRN_DAWR0);
host_dawrx0 = mfspr(SPRN_DAWRX0);
host_psscr = mfspr(SPRN_PSSCR);
host_pidr = mfspr(SPRN_PID);
if (cpu_has_feature(CPU_FTR_DAWR1)) {
host_dawr1 = mfspr(SPRN_DAWR1);
host_dawrx1 = mfspr(SPRN_DAWRX1);
}
if (vc->pcr)
mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
mtspr(SPRN_DPDES, vc->dpdes);
mtspr(SPRN_VTB, vc->vtb);
local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
mtspr(SPRN_PURR, vcpu->arch.purr);
mtspr(SPRN_SPURR, vcpu->arch.spurr);
if (dawr_enabled()) {
mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
if (cpu_has_feature(CPU_FTR_DAWR1)) {
mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
}
}
mtspr(SPRN_CIABR, vcpu->arch.ciabr);
mtspr(SPRN_IC, vcpu->arch.ic);
mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
mtspr(SPRN_HSRR1, (vcpu->arch.shregs.msr & ~MSR_HV) | MSR_ME);
/*
* On POWER9 DD2.1 and below, sometimes on a Hypervisor Data Storage
* Interrupt (HDSI) the HDSISR is not be updated at all.
*
* To work around this we put a canary value into the HDSISR before
* returning to a guest and then check for this canary when we take a
* HDSI. If we find the canary on a HDSI, we know the hardware didn't
* update the HDSISR. In this case we return to the guest to retake the
* HDSI which should correctly update the HDSISR the second time HDSI
* entry.
*
* Just do this on all p9 processors for now.
*/
mtspr(SPRN_HDSISR, HDSISR_CANARY);
mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
mtspr(SPRN_AMOR, ~0UL);
local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_HV_P9;
/*
* Hash host, hash guest, or radix guest with prefetch bug, all have
* to disable the MMU before switching to guest MMU state.
*/
if (!radix_enabled() || !kvm_is_radix(kvm) ||
cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
__mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
save_clear_host_mmu(kvm);
if (kvm_is_radix(kvm)) {
switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
__mtmsrd(0, 1); /* clear RI */
} else {
switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
}
/* TLBIEL uses LPID=LPIDR, so run this after setting guest LPID */
kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
/*
* P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
* so set guest LPCR (with HDICE) before writing HDEC.
*/
mtspr(SPRN_HDEC, hdec);
mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
accumulate_time(vcpu, &vcpu->arch.guest_time);
kvmppc_p9_enter_guest(vcpu);
accumulate_time(vcpu, &vcpu->arch.rm_intr);
/* XXX: Could get these from r11/12 and paca exsave instead */
vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0);
vcpu->arch.shregs.srr1 = mfspr(SPRN_SRR1);
vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
/* 0x2 bit for HSRR is only used by PR and P7/8 HV paths, clear it */
trap = local_paca->kvm_hstate.scratch0 & ~0x2;
/* HSRR interrupts leave MSR[RI] unchanged, SRR interrupts clear it. */
ri_set = false;
if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) {
if (trap != BOOK3S_INTERRUPT_SYSCALL &&
(vcpu->arch.shregs.msr & MSR_RI))
ri_set = true;
exsave = local_paca->exgen;
} else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) {
exsave = local_paca->exnmi;
} else { /* trap == 0x200 */
exsave = local_paca->exmc;
}
vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
/*
* Only set RI after reading machine check regs (DAR, DSISR, SRR0/1)
* and hstate scratch (which we need to move into exsave to make
* re-entrant vs SRESET/MCE)
*/
if (ri_set) {
if (unlikely(!(mfmsr() & MSR_RI))) {
__mtmsrd(MSR_RI, 1);
WARN_ON_ONCE(1);
}
} else {
WARN_ON_ONCE(mfmsr() & MSR_RI);
__mtmsrd(MSR_RI, 1);
}
vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
vcpu->arch.regs.gpr[11] = exsave[EX_R11/sizeof(u64)];
vcpu->arch.regs.gpr[12] = exsave[EX_R12/sizeof(u64)];
vcpu->arch.regs.gpr[13] = exsave[EX_R13/sizeof(u64)];
vcpu->arch.ppr = exsave[EX_PPR/sizeof(u64)];
vcpu->arch.cfar = exsave[EX_CFAR/sizeof(u64)];
vcpu->arch.regs.ctr = exsave[EX_CTR/sizeof(u64)];
vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
if (unlikely(trap == BOOK3S_INTERRUPT_MACHINE_CHECK)) {
vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
kvmppc_realmode_machine_check(vcpu);
} else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) {
kvmppc_realmode_hmi_handler();
} else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) {
vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
} else if (trap == BOOK3S_INTERRUPT_H_DATA_STORAGE) {
vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
} else if (trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
} else if (trap == BOOK3S_INTERRUPT_H_FAC_UNAVAIL) {
vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
* Softpatch interrupt for transactional memory emulation cases
* on POWER9 DD2.2. This is early in the guest exit path - we
* haven't saved registers or done a treclaim yet.
*/
} else if (trap == BOOK3S_INTERRUPT_HV_SOFTPATCH) {
vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
/*
* The cases we want to handle here are those where the guest
* is in real suspend mode and is trying to transition to
* transactional mode.
*/
if (local_paca->kvm_hstate.fake_suspend &&
(vcpu->arch.shregs.msr & MSR_TS_S)) {
if (kvmhv_p9_tm_emulation_early(vcpu)) {
/* Prevent it being handled again. */
trap = 0;
}
}
#endif
}
accumulate_time(vcpu, &vcpu->arch.rm_exit);
/* Advance host PURR/SPURR by the amount used by guest */
purr = mfspr(SPRN_PURR);
spurr = mfspr(SPRN_SPURR);
mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
purr - vcpu->arch.purr);
mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
spurr - vcpu->arch.spurr);
vcpu->arch.purr = purr;
vcpu->arch.spurr = spurr;
vcpu->arch.ic = mfspr(SPRN_IC);
vcpu->arch.pid = mfspr(SPRN_PID);
vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
/* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
mtspr(SPRN_PSSCR, host_psscr |
(local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
mtspr(SPRN_HFSCR, host_hfscr);
mtspr(SPRN_CIABR, host_ciabr);
mtspr(SPRN_DAWR0, host_dawr0);
mtspr(SPRN_DAWRX0, host_dawrx0);
if (cpu_has_feature(CPU_FTR_DAWR1)) {
mtspr(SPRN_DAWR1, host_dawr1);
mtspr(SPRN_DAWRX1, host_dawrx1);
}
if (kvm_is_radix(kvm)) {
/*
* Since this is radix, do a eieio; tlbsync; ptesync sequence
* in case we interrupted the guest between a tlbie and a
* ptesync.
*/
asm volatile("eieio; tlbsync; ptesync");
}
/*
* cp_abort is required if the processor supports local copy-paste
* to clear the copy buffer that was under control of the guest.
*/
if (cpu_has_feature(CPU_FTR_ARCH_31))
asm volatile(PPC_CP_ABORT);
vc->dpdes = mfspr(SPRN_DPDES);
vc->vtb = mfspr(SPRN_VTB);
mtspr(SPRN_DPDES, 0);
if (vc->pcr)
mtspr(SPRN_PCR, PCR_MASK);
if (vc->tb_offset_applied) {
u64 new_tb = mftb() - vc->tb_offset_applied;
mtspr(SPRN_TBU40, new_tb);
tb = mftb();
if ((tb & 0xffffff) < (new_tb & 0xffffff))
mtspr(SPRN_TBU40, new_tb + 0x1000000);
vc->tb_offset_applied = 0;
}
mtspr(SPRN_HDEC, 0x7fffffff);
save_clear_guest_mmu(kvm, vcpu);
switch_mmu_to_host(kvm, host_pidr);
local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
/*
* If we are in real mode, only switch MMU on after the MMU is
* switched to host, to avoid the P9_RADIX_PREFETCH_BUG.
*/
__mtmsrd(msr, 0);
end_timing(vcpu);
return trap;
}
EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9);
......@@ -46,6 +46,10 @@ static int global_invalidates(struct kvm *kvm)
else
global = 1;
/* LPID has been switched to host if in virt mode so can't do local */
if (!global && (mfmsr() & (MSR_IR|MSR_DR)))
global = 1;
if (!global) {
/* any other core might now have stale TLB entries... */
smp_wmb();
......@@ -398,6 +402,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
vcpu->arch.pgdir, true,
&vcpu->arch.regs.gpr[4]);
}
EXPORT_SYMBOL_GPL(kvmppc_h_enter);
#ifdef __BIG_ENDIAN__
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
......@@ -542,6 +547,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
&vcpu->arch.regs.gpr[4]);
}
EXPORT_SYMBOL_GPL(kvmppc_h_remove);
long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
{
......@@ -660,6 +666,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
return ret;
}
EXPORT_SYMBOL_GPL(kvmppc_h_bulk_remove);
long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index, unsigned long avpn)
......@@ -730,6 +737,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_h_protect);
long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index)
......@@ -770,6 +778,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
}
return H_SUCCESS;
}
EXPORT_SYMBOL_GPL(kvmppc_h_read);
long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index)
......@@ -818,6 +827,7 @@ long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
return ret;
}
EXPORT_SYMBOL_GPL(kvmppc_h_clear_ref);
long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long pte_index)
......@@ -865,6 +875,7 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
return ret;
}
EXPORT_SYMBOL_GPL(kvmppc_h_clear_mod);
static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq,
unsigned long gpa, int writing, unsigned long *hpa,
......@@ -1283,3 +1294,4 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
return -1; /* send fault up to host kernel mode */
}
EXPORT_SYMBOL_GPL(kvmppc_hpte_hv_fault);
......@@ -141,13 +141,6 @@ static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu,
return;
}
if (xive_enabled() && kvmhv_on_pseries()) {
/* No XICS access or hypercalls available, too hard */
this_icp->rm_action |= XICS_RM_KICK_VCPU;
this_icp->rm_kick_target = vcpu;
return;
}
/*
* Check if the core is loaded,
* if not, find an available host core to post to wake the VCPU,
......@@ -771,14 +764,6 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
void __iomem *xics_phys;
int64_t rc;
if (kvmhv_on_pseries()) {
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
iosync();
plpar_hcall_raw(H_EOI, retbuf, hwirq);
return;
}
rc = pnv_opal_pci_msi_eoi(c, hwirq);
if (rc)
......
......@@ -25,18 +25,10 @@
#include <asm/export.h>
#include <asm/tm.h>
#include <asm/opal.h>
#include <asm/xive-regs.h>
#include <asm/thread_info.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
#include <asm/cpuidle.h>
#include <asm/ultravisor-api.h>
/* Sign-extend HDEC if not on POWER9 */
#define EXTEND_HDEC(reg) \
BEGIN_FTR_SECTION; \
extsw reg, reg; \
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* Values in HSTATE_NAPPING(r13) */
#define NAPPING_CEDE 1
......@@ -44,9 +36,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define NAPPING_UNSPLIT 3
/* Stack frame offsets for kvmppc_hv_entry */
#define SFS 208
#define SFS 160
#define STACK_SLOT_TRAP (SFS-4)
#define STACK_SLOT_SHORT_PATH (SFS-8)
#define STACK_SLOT_TID (SFS-16)
#define STACK_SLOT_PSSCR (SFS-24)
#define STACK_SLOT_PID (SFS-32)
......@@ -57,11 +48,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define STACK_SLOT_HFSCR (SFS-72)
#define STACK_SLOT_AMR (SFS-80)
#define STACK_SLOT_UAMOR (SFS-88)
#define STACK_SLOT_DAWR1 (SFS-96)
#define STACK_SLOT_DAWRX1 (SFS-104)
#define STACK_SLOT_FSCR (SFS-112)
/* the following is used by the P9 short path */
#define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */
#define STACK_SLOT_FSCR (SFS-96)
/*
* Call kvmppc_hv_entry in real mode.
......@@ -137,15 +124,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/* Return the trap number on this thread as the return value */
mr r3, r12
/*
* If we came back from the guest via a relocation-on interrupt,
* we will be in virtual mode at this point, which makes it a
* little easier to get back to the caller.
*/
mfmsr r0
andi. r0, r0, MSR_IR /* in real mode? */
bne .Lvirt_return
/* RFI into the highmem handler */
mfmsr r6
li r0, MSR_RI
......@@ -155,11 +133,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtsrr1 r7
RFI_TO_KERNEL
/* Virtual-mode return */
.Lvirt_return:
mtlr r8
blr
kvmppc_primary_no_guest:
/* We handle this much like a ceded vcpu */
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
......@@ -246,7 +219,7 @@ kvm_novcpu_wakeup:
/* See if our timeslice has expired (HDEC is negative) */
mfspr r0, SPRN_HDEC
EXTEND_HDEC(r0)
extsw r0, r0
li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
cmpdi r0, 0
blt kvm_novcpu_exit
......@@ -348,10 +321,8 @@ kvm_secondary_got_guest:
lbz r4, HSTATE_PTID(r13)
cmpwi r4, 0
bne 63f
LOAD_REG_ADDR(r6, decrementer_max)
ld r6, 0(r6)
lis r6,0x7fff /* MAX_INT@h */
mtspr SPRN_HDEC, r6
BEGIN_FTR_SECTION
/* and set per-LPAR registers, if doing dynamic micro-threading */
ld r6, HSTATE_SPLIT_MODE(r13)
cmpdi r6, 0
......@@ -363,7 +334,6 @@ BEGIN_FTR_SECTION
ld r0, KVM_SPLIT_LDBAR(r6)
mtspr SPRN_LDBAR, r0
isync
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
63:
/* Order load of vcpu after load of vcore */
lwsync
......@@ -434,7 +404,6 @@ kvm_no_guest:
blr
53:
BEGIN_FTR_SECTION
HMT_LOW
ld r5, HSTATE_KVM_VCORE(r13)
cmpdi r5, 0
......@@ -449,14 +418,6 @@ BEGIN_FTR_SECTION
b kvm_unsplit_nap
60: HMT_MEDIUM
b kvm_secondary_got_guest
FTR_SECTION_ELSE
HMT_LOW
ld r5, HSTATE_KVM_VCORE(r13)
cmpdi r5, 0
beq kvm_no_guest
HMT_MEDIUM
b kvm_secondary_got_guest
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
54: li r0, KVM_HWTHREAD_IN_KVM
stb r0, HSTATE_HWTHREAD_STATE(r13)
......@@ -582,13 +543,11 @@ kvmppc_hv_entry:
bne 10f
lwz r7,KVM_LPID(r9)
BEGIN_FTR_SECTION
ld r6,KVM_SDR1(r9)
li r0,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r0
ptesync
mtspr SPRN_SDR1,r6 /* switch to partition page table */
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7
isync
......@@ -668,16 +627,6 @@ kvmppc_got_guest:
mtspr SPRN_SPURR,r8
/* Save host values of some registers */
BEGIN_FTR_SECTION
mfspr r5, SPRN_TIDR
mfspr r6, SPRN_PSSCR
mfspr r7, SPRN_PID
std r5, STACK_SLOT_TID(r1)
std r6, STACK_SLOT_PSSCR(r1)
std r7, STACK_SLOT_PID(r1)
mfspr r5, SPRN_HFSCR
std r5, STACK_SLOT_HFSCR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
mfspr r5, SPRN_CIABR
mfspr r6, SPRN_DAWR0
......@@ -690,12 +639,6 @@ BEGIN_FTR_SECTION
mfspr r5, SPRN_FSCR
std r5, STACK_SLOT_FSCR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
mfspr r6, SPRN_DAWR1
mfspr r7, SPRN_DAWRX1
std r6, STACK_SLOT_DAWR1(r1)
std r7, STACK_SLOT_DAWRX1(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
mfspr r5, SPRN_AMR
std r5, STACK_SLOT_AMR(r1)
......@@ -713,13 +656,9 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
* Branch around the call if both CPU_FTR_TM and
* CPU_FTR_P9_TM_HV_ASSIST are off.
*/
BEGIN_FTR_SECTION
b 91f
END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
......@@ -786,12 +725,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
ld r6, VCPU_DAWRX0(r4)
mtspr SPRN_DAWR0, r5
mtspr SPRN_DAWRX0, r6
BEGIN_FTR_SECTION
ld r5, VCPU_DAWR1(r4)
ld r6, VCPU_DAWRX1(r4)
mtspr SPRN_DAWR1, r5
mtspr SPRN_DAWRX1, r6
END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
1:
ld r7, VCPU_CIABR(r4)
ld r8, VCPU_TAR(r4)
......@@ -809,7 +742,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
mtspr SPRN_BESCR, r6
mtspr SPRN_PID, r7
mtspr SPRN_WORT, r8
BEGIN_FTR_SECTION
/* POWER8-only registers */
ld r5, VCPU_TCSCR(r4)
ld r6, VCPU_ACOP(r4)
......@@ -820,18 +752,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_CSIGR, r7
mtspr SPRN_TACR, r8
nop
FTR_SECTION_ELSE
/* POWER9-only registers */
ld r5, VCPU_TID(r4)
ld r6, VCPU_PSSCR(r4)
lbz r8, HSTATE_FAKE_SUSPEND(r13)
oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */
rldimi r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG
ld r7, VCPU_HFSCR(r4)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
mtspr SPRN_HFSCR, r7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
8:
ld r5, VCPU_SPRG0(r4)
......@@ -901,23 +821,15 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/* Check if HDEC expires soon */
mfspr r3, SPRN_HDEC
EXTEND_HDEC(r3)
extsw r3, r3
cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
ld r6, VCPU_KVM(r4)
lbz r0, KVM_RADIX(r6)
cmpwi r0, 0
bne 9f
/* For hash guest, clear out and reload the SLB */
BEGIN_MMU_FTR_SECTION
/* Radix host won't have populated the SLB, so no need to clear */
/* Clear out and reload the SLB */
li r6, 0
slbmte r6, r6
PPC_SLBIA(6)
ptesync
END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
lwz r5,VCPU_SLB_MAX(r4)
......@@ -932,96 +844,9 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
bdnz 1b
9:
#ifdef CONFIG_KVM_XICS
/* We are entering the guest on that thread, push VCPU to XIVE */
ld r11, VCPU_XIVE_SAVED_STATE(r4)
li r9, TM_QW1_OS
lwz r8, VCPU_XIVE_CAM_WORD(r4)
cmpwi r8, 0
beq no_xive
li r7, TM_QW1_OS + TM_WORD2
mfmsr r0
andi. r0, r0, MSR_DR /* in real mode? */
beq 2f
ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
cmpldi cr1, r10, 0
beq cr1, no_xive
eieio
stdx r11,r9,r10
stwx r8,r7,r10
b 3f
2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
cmpldi cr1, r10, 0
beq cr1, no_xive
eieio
stdcix r11,r9,r10
stwcix r8,r7,r10
3: li r9, 1
stb r9, VCPU_XIVE_PUSHED(r4)
eieio
/*
* We clear the irq_pending flag. There is a small chance of a
* race vs. the escalation interrupt happening on another
* processor setting it again, but the only consequence is to
* cause a spurrious wakeup on the next H_CEDE which is not an
* issue.
*/
li r0,0
stb r0, VCPU_IRQ_PENDING(r4)
/*
* In single escalation mode, if the escalation interrupt is
* on, we mask it.
*/
lbz r0, VCPU_XIVE_ESC_ON(r4)
cmpwi cr1, r0,0
beq cr1, 1f
li r9, XIVE_ESB_SET_PQ_01
beq 4f /* in real mode? */
ld r10, VCPU_XIVE_ESC_VADDR(r4)
ldx r0, r10, r9
b 5f
4: ld r10, VCPU_XIVE_ESC_RADDR(r4)
ldcix r0, r10, r9
5: sync
/* We have a possible subtle race here: The escalation interrupt might
* have fired and be on its way to the host queue while we mask it,
* and if we unmask it early enough (re-cede right away), there is
* a theorical possibility that it fires again, thus landing in the
* target queue more than once which is a big no-no.
*
* Fortunately, solving this is rather easy. If the above load setting
* PQ to 01 returns a previous value where P is set, then we know the
* escalation interrupt is somewhere on its way to the host. In that
* case we simply don't clear the xive_esc_on flag below. It will be
* eventually cleared by the handler for the escalation interrupt.
*
* Then, when doing a cede, we check that flag again before re-enabling
* the escalation interrupt, and if set, we abort the cede.
*/
andi. r0, r0, XIVE_ESB_VAL_P
bne- 1f
/* Now P is 0, we can clear the flag */
li r0, 0
stb r0, VCPU_XIVE_ESC_ON(r4)
1:
no_xive:
#endif /* CONFIG_KVM_XICS */
li r0, 0
stw r0, STACK_SLOT_SHORT_PATH(r1)
deliver_guest_interrupt: /* r4 = vcpu, r13 = paca */
/* Check if we can deliver an external or decrementer interrupt now */
ld r0, VCPU_PENDING_EXC(r4)
BEGIN_FTR_SECTION
/* On POWER9, also check for emulated doorbell interrupt */
lbz r3, VCPU_DBELL_REQ(r4)
or r0, r0, r3
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
cmpdi r0, 0
beq 71f
mr r3, r4
......@@ -1033,7 +858,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_SRR0, r6
mtspr SPRN_SRR1, r7
fast_guest_entry_c:
ld r10, VCPU_PC(r4)
ld r11, VCPU_MSR(r4)
/* r11 = vcpu->arch.msr & ~MSR_HV */
......@@ -1095,18 +919,8 @@ BEGIN_FTR_SECTION
mtspr SPRN_PPR, r0
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Move canary into DSISR to check for later */
BEGIN_FTR_SECTION
li r0, 0x7fff
mtspr SPRN_HDSISR, r0
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r6, VCPU_KVM(r4)
lbz r7, KVM_SECURE_GUEST(r6)
cmpdi r7, 0
ld r6, VCPU_GPR(R6)(r4)
ld r7, VCPU_GPR(R7)(r4)
bne ret_to_ultra
ld r0, VCPU_CR(r4)
mtcr r0
......@@ -1117,117 +931,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r4, VCPU_GPR(R4)(r4)
HRFI_TO_GUEST
b .
/*
* Use UV_RETURN ultracall to return control back to the Ultravisor after
* processing an hypercall or interrupt that was forwarded (a.k.a. reflected)
* to the Hypervisor.
*
* All registers have already been loaded, except:
* R0 = hcall result
* R2 = SRR1, so UV can detect a synthesized interrupt (if any)
* R3 = UV_RETURN
*/
ret_to_ultra:
ld r0, VCPU_CR(r4)
mtcr r0
ld r0, VCPU_GPR(R3)(r4)
mfspr r2, SPRN_SRR1
li r3, 0
ori r3, r3, UV_RETURN
ld r4, VCPU_GPR(R4)(r4)
sc 2
/*
* Enter the guest on a P9 or later system where we have exactly
* one vcpu per vcore and we don't need to go to real mode
* (which implies that host and guest are both using radix MMU mode).
* r3 = vcpu pointer
* Most SPRs and all the VSRs have been loaded already.
*/
_GLOBAL(__kvmhv_vcpu_entry_p9)
EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
mflr r0
std r0, PPC_LR_STKOFF(r1)
stdu r1, -SFS(r1)
li r0, 1
stw r0, STACK_SLOT_SHORT_PATH(r1)
std r3, HSTATE_KVM_VCPU(r13)
mfcr r4
stw r4, SFS+8(r1)
std r1, HSTATE_HOST_R1(r13)
reg = 14
.rept 18
std reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
reg = reg + 1
.endr
reg = 14
.rept 18
ld reg, __VCPU_GPR(reg)(r3)
reg = reg + 1
.endr
mfmsr r10
std r10, HSTATE_HOST_MSR(r13)
mr r4, r3
b fast_guest_entry_c
guest_exit_short_path:
/*
* Malicious or buggy radix guests may have inserted SLB entries
* (only 0..3 because radix always runs with UPRT=1), so these must
* be cleared here to avoid side-channels. slbmte is used rather
* than slbia, as it won't clear cached translations.
*/
li r0,0
slbmte r0,r0
li r4,1
slbmte r0,r4
li r4,2
slbmte r0,r4
li r4,3
slbmte r0,r4
li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13)
reg = 14
.rept 18
std reg, __VCPU_GPR(reg)(r9)
reg = reg + 1
.endr
reg = 14
.rept 18
ld reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
reg = reg + 1
.endr
lwz r4, SFS+8(r1)
mtcr r4
mr r3, r12 /* trap number */
addi r1, r1, SFS
ld r0, PPC_LR_STKOFF(r1)
mtlr r0
/* If we are in real mode, do a rfid to get back to the caller */
mfmsr r4
andi. r5, r4, MSR_IR
bnelr
rldicl r5, r4, 64 - MSR_TS_S_LG, 62 /* extract TS field */
mtspr SPRN_SRR0, r0
ld r10, HSTATE_HOST_MSR(r13)
rldimi r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG
mtspr SPRN_SRR1, r10
RFI_TO_KERNEL
b .
secondary_too_late:
li r12, 0
......@@ -1268,21 +971,16 @@ hdec_soon:
kvmppc_interrupt_hv:
/*
* Register contents:
* R9 = HSTATE_IN_GUEST
* R12 = (guest CR << 32) | interrupt vector
* R13 = PACA
* guest R12 saved in shadow VCPU SCRATCH0
* guest R13 saved in SPRN_SCRATCH0
* guest R9 saved in HSTATE_SCRATCH2
*/
std r9, HSTATE_SCRATCH2(r13)
lbz r9, HSTATE_IN_GUEST(r13)
cmpwi r9, KVM_GUEST_MODE_HOST_HV
beq kvmppc_bad_host_intr
#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
cmpwi r9, KVM_GUEST_MODE_GUEST
ld r9, HSTATE_SCRATCH2(r13)
beq kvmppc_interrupt_pr
#endif
/* We're now back in the host but in guest MMU context */
cmpwi r9,KVM_GUEST_MODE_HOST_HV
beq kvmppc_bad_host_intr
li r9, KVM_GUEST_MODE_HOST_HV
stb r9, HSTATE_IN_GUEST(r13)
......@@ -1400,7 +1098,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
bne 2f
mfspr r3,SPRN_HDEC
EXTEND_HDEC(r3)
extsw r3, r3
cmpdi r3,0
mr r4,r9
bge fast_guest_return
......@@ -1412,14 +1110,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Hypervisor doorbell - exit only if host IPI flag set */
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
bne 3f
BEGIN_FTR_SECTION
PPC_MSGSYNC
lwsync
/* always exit if we're running a nested guest */
ld r0, VCPU_NESTED(r9)
cmpdi r0, 0
bne guest_exit_cont
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
beq maybe_reenter_guest
......@@ -1449,62 +1139,16 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
mr r4, r9
bl kvmhv_accumulate_time
#endif
#ifdef CONFIG_KVM_XICS
/* We are exiting, pull the VP from the XIVE */
lbz r0, VCPU_XIVE_PUSHED(r9)
cmpwi cr0, r0, 0
beq 1f
li r7, TM_SPC_PULL_OS_CTX
li r6, TM_QW1_OS
mfmsr r0
andi. r0, r0, MSR_DR /* in real mode? */
beq 2f
ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
cmpldi cr0, r10, 0
beq 1f
/* First load to pull the context, we ignore the value */
eieio
lwzx r11, r7, r10
/* Second load to recover the context state (Words 0 and 1) */
ldx r11, r6, r10
b 3f
2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
cmpldi cr0, r10, 0
beq 1f
/* First load to pull the context, we ignore the value */
eieio
lwzcix r11, r7, r10
/* Second load to recover the context state (Words 0 and 1) */
ldcix r11, r6, r10
3: std r11, VCPU_XIVE_SAVED_STATE(r9)
/* Fixup some of the state for the next load */
li r10, 0
li r0, 0xff
stb r10, VCPU_XIVE_PUSHED(r9)
stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
eieio
1:
#endif /* CONFIG_KVM_XICS */
/*
* Possibly flush the link stack here, before we do a blr in
* guest_exit_short_path.
* kvmhv_switch_to_host.
*/
1: nop
patch_site 1b patch__call_kvm_flush_link_stack
/* If we came in through the P9 short path, go back out to C now */
lwz r0, STACK_SLOT_SHORT_PATH(r1)
cmpwi r0, 0
bne guest_exit_short_path
/* For hash guest, read the guest SLB and save it away */
ld r5, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r5)
li r5, 0
cmpwi r0, 0
bne 0f /* for radix, save 0 entries */
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
mtctr r0
li r6,0
......@@ -1528,9 +1172,6 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
stw r5,VCPU_SLB_MAX(r9)
/* load host SLB entries */
BEGIN_MMU_FTR_SECTION
b guest_bypass
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
ld r8,PACA_SLBSHADOWPTR(r13)
.rept SLB_NUM_BOLTED
......@@ -1543,21 +1184,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
slbmte r6,r5
1: addi r8,r8,16
.endr
b guest_bypass
0: /*
* Sanitise radix guest SLB, see guest_exit_short_path comment.
* We clear vcpu->arch.slb_max to match earlier behaviour.
*/
li r0,0
stw r0,VCPU_SLB_MAX(r9)
slbmte r0,r0
li r4,1
slbmte r0,r4
li r4,2
slbmte r0,r4
li r4,3
slbmte r0,r4
guest_bypass:
stw r12, STACK_SLOT_TRAP(r1)
......@@ -1567,12 +1193,6 @@ guest_bypass:
ld r3, HSTATE_KVM_VCORE(r13)
mfspr r5,SPRN_DEC
mftb r6
/* On P9, if the guest has large decr enabled, don't sign extend */
BEGIN_FTR_SECTION
ld r4, VCORE_LPCR(r3)
andis. r4, r4, LPCR_LD@h
bne 16f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r5,r5
16: add r5,r5,r6
/* r5 is a guest timebase value here, convert to host TB */
......@@ -1646,7 +1266,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
std r6, VCPU_BESCR(r9)
stw r7, VCPU_GUEST_PID(r9)
std r8, VCPU_WORT(r9)
BEGIN_FTR_SECTION
mfspr r5, SPRN_TCSCR
mfspr r6, SPRN_ACOP
mfspr r7, SPRN_CSIGR
......@@ -1655,17 +1274,6 @@ BEGIN_FTR_SECTION
std r6, VCPU_ACOP(r9)
std r7, VCPU_CSIGR(r9)
std r8, VCPU_TACR(r9)
FTR_SECTION_ELSE
mfspr r5, SPRN_TIDR
mfspr r6, SPRN_PSSCR
std r5, VCPU_TID(r9)
rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */
rotldi r6, r6, 60
std r6, VCPU_PSSCR(r9)
/* Restore host HFSCR value */
ld r7, STACK_SLOT_HFSCR(r1)
mtspr SPRN_HFSCR, r7
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
ld r5, STACK_SLOT_FSCR(r1)
mtspr SPRN_FSCR, r5
......@@ -1677,13 +1285,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
li r0, 0
mtspr SPRN_PSPB, r0
mtspr SPRN_WORT, r0
BEGIN_FTR_SECTION
mtspr SPRN_TCSCR, r0
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
li r0, 1
sldi r0, r0, 31
mtspr SPRN_MMCRS, r0
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* Save and restore AMR, IAMR and UAMOR before turning on the MMU */
ld r8, STACK_SLOT_IAMR(r1)
......@@ -1740,13 +1346,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
bl kvmppc_save_fp
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
* Branch around the call if both CPU_FTR_TM and
* CPU_FTR_P9_TM_HV_ASSIST are off.
*/
BEGIN_FTR_SECTION
b 91f
END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
......@@ -1792,80 +1394,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_DAWR0, r6
mtspr SPRN_DAWRX0, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
ld r6, STACK_SLOT_DAWR1(r1)
ld r7, STACK_SLOT_DAWRX1(r1)
mtspr SPRN_DAWR1, r6
mtspr SPRN_DAWRX1, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
BEGIN_FTR_SECTION
ld r5, STACK_SLOT_TID(r1)
ld r6, STACK_SLOT_PSSCR(r1)
ld r7, STACK_SLOT_PID(r1)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
mtspr SPRN_PID, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
#ifdef CONFIG_PPC_RADIX_MMU
/*
* Are we running hash or radix ?
*/
ld r5, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r5)
cmpwi cr2, r0, 0
beq cr2, 2f
/*
* Radix: do eieio; tlbsync; ptesync sequence in case we
* interrupted the guest between a tlbie and a ptesync.
*/
eieio
tlbsync
ptesync
BEGIN_FTR_SECTION
/* Radix: Handle the case where the guest used an illegal PID */
LOAD_REG_ADDR(r4, mmu_base_pid)
lwz r3, VCPU_GUEST_PID(r9)
lwz r5, 0(r4)
cmpw cr0,r3,r5
blt 2f
/*
* Illegal PID, the HW might have prefetched and cached in the TLB
* some translations for the LPID 0 / guest PID combination which
* Linux doesn't know about, so we need to flush that PID out of
* the TLB. First we need to set LPIDR to 0 so tlbiel applies to
* the right context.
*/
li r0,0
mtspr SPRN_LPID,r0
isync
/* Then do a congruence class local flush */
ld r6,VCPU_KVM(r9)
lwz r0,KVM_TLB_SETS(r6)
mtctr r0
li r7,0x400 /* IS field = 0b01 */
ptesync
sldi r0,r3,32 /* RS has PID */
1: PPC_TLBIEL(7,0,2,1,1) /* RIC=2, PRS=1, R=1 */
addi r7,r7,0x1000
bdnz 1b
ptesync
END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG)
2:
#endif /* CONFIG_PPC_RADIX_MMU */
/*
* cp_abort is required if the processor supports local copy-paste
* to clear the copy buffer that was under control of the guest.
*/
BEGIN_FTR_SECTION
PPC_CP_ABORT
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
/*
* POWER7/POWER8 guest -> host partition switch code.
......@@ -1902,13 +1430,11 @@ kvmhv_switch_to_host:
/* Primary thread switches back to host partition */
lwz r7,KVM_HOST_LPID(r4)
BEGIN_FTR_SECTION
ld r6,KVM_HOST_SDR1(r4)
li r8,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r8
ptesync
mtspr SPRN_SDR1,r6 /* switch to host page table */
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7
isync
......@@ -2117,26 +1643,13 @@ kvmppc_tm_emul:
* reflect the HDSI to the guest as a DSI.
*/
kvmppc_hdsi:
ld r3, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r3)
mfspr r4, SPRN_HDAR
mfspr r6, SPRN_HDSISR
BEGIN_FTR_SECTION
/* Look for DSISR canary. If we find it, retry instruction */
cmpdi r6, 0x7fff
beq 6f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
cmpwi r0, 0
bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */
/* HPTE not found fault or protection fault? */
andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
beq 1f /* if not, send it to the guest */
andi. r0, r11, MSR_DR /* data relocation enabled? */
beq 3f
BEGIN_FTR_SECTION
mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
b 4f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
clrrdi r0, r4, 28
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
li r0, BOOK3S_INTERRUPT_DATA_SEGMENT
......@@ -2204,31 +1717,15 @@ fast_interrupt_c_return:
stb r0, HSTATE_IN_GUEST(r13)
b guest_exit_cont
.Lradix_hdsi:
std r4, VCPU_FAULT_DAR(r9)
stw r6, VCPU_FAULT_DSISR(r9)
.Lradix_hisi:
mfspr r5, SPRN_ASDR
std r5, VCPU_FAULT_GPA(r9)
b guest_exit_cont
/*
* Similarly for an HISI, reflect it to the guest as an ISI unless
* it is an HPTE not found fault for a page that we have paged out.
*/
kvmppc_hisi:
ld r3, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r3)
cmpwi r0, 0
bne .Lradix_hisi /* for radix, just save ASDR */
andis. r0, r11, SRR1_ISI_NOPT@h
beq 1f
andi. r0, r11, MSR_IR /* instruction relocation enabled? */
beq 3f
BEGIN_FTR_SECTION
mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
b 4f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
clrrdi r0, r10, 28
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
li r0, BOOK3S_INTERRUPT_INST_SEGMENT
......@@ -2276,10 +1773,6 @@ hcall_try_real_mode:
andi. r0,r11,MSR_PR
/* sc 1 from userspace - reflect to guest syscall */
bne sc_1_fast_return
/* sc 1 from nested guest - give it to L1 to handle */
ld r0, VCPU_NESTED(r9)
cmpdi r0, 0
bne guest_exit_cont
clrrdi r3,r3,2
cmpldi r3,hcall_real_table_end - hcall_real_table
bge guest_exit_cont
......@@ -2544,7 +2037,7 @@ hcall_real_table:
#else
.long 0 /* 0x2fc - H_XIRR_X*/
#endif
.long DOTSYM(kvmppc_h_random) - hcall_real_table
.long DOTSYM(kvmppc_rm_h_random) - hcall_real_table
.globl hcall_real_table_end
hcall_real_table_end:
......@@ -2675,13 +2168,9 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */
bl kvmppc_save_fp
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
* Branch around the call if both CPU_FTR_TM and
* CPU_FTR_P9_TM_HV_ASSIST are off.
*/
BEGIN_FTR_SECTION
b 91f
END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
......@@ -2701,15 +2190,8 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
mfspr r3, SPRN_DEC
mfspr r4, SPRN_HDEC
mftb r5
BEGIN_FTR_SECTION
/* On P9 check whether the guest has large decrementer mode enabled */
ld r6, HSTATE_KVM_VCORE(r13)
ld r6, VCORE_LPCR(r6)
andis. r6, r6, LPCR_LD@h
bne 68f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r3, r3
68: EXTEND_HDEC(r4)
extsw r4, r4
cmpd r3, r4
ble 67f
mtspr SPRN_DEC, r4
......@@ -2754,28 +2236,11 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
kvm_nap_sequence: /* desired LPCR value in r5 */
BEGIN_FTR_SECTION
/*
* PSSCR bits: exit criterion = 1 (wakeup based on LPCR at sreset)
* enable state loss = 1 (allow SMT mode switch)
* requested level = 0 (just stop dispatching)
*/
lis r3, (PSSCR_EC | PSSCR_ESL)@h
/* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
li r4, LPCR_PECE_HVEE@higher
sldi r4, r4, 32
or r5, r5, r4
FTR_SECTION_ELSE
li r3, PNV_THREAD_NAP
ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_LPCR,r5
isync
BEGIN_FTR_SECTION
bl isa300_idle_stop_mayloss
FTR_SECTION_ELSE
bl isa206_idle_insn_mayloss
ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mfspr r0, SPRN_CTRLF
ori r0, r0, 1
......@@ -2794,10 +2259,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
beq kvm_end_cede
cmpwi r0, NAPPING_NOVCPU
beq kvm_novcpu_wakeup
BEGIN_FTR_SECTION
cmpwi r0, NAPPING_UNSPLIT
beq kvm_unsplit_wakeup
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
twi 31,0,0 /* Nap state must not be zero */
33: mr r4, r3
......@@ -2817,13 +2280,9 @@ kvm_end_cede:
#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
/*
* Branch around the call if both CPU_FTR_TM and
* CPU_FTR_P9_TM_HV_ASSIST are off.
*/
BEGIN_FTR_SECTION
b 91f
END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
......@@ -2913,47 +2372,7 @@ kvm_cede_prodded:
/* we've ceded but we want to give control to the host */
kvm_cede_exit:
ld r9, HSTATE_KVM_VCPU(r13)
#ifdef CONFIG_KVM_XICS
/* are we using XIVE with single escalation? */
ld r10, VCPU_XIVE_ESC_VADDR(r9)
cmpdi r10, 0
beq 3f
li r6, XIVE_ESB_SET_PQ_00
/*
* If we still have a pending escalation, abort the cede,
* and we must set PQ to 10 rather than 00 so that we don't
* potentially end up with two entries for the escalation
* interrupt in the XIVE interrupt queue. In that case
* we also don't want to set xive_esc_on to 1 here in
* case we race with xive_esc_irq().
*/
lbz r5, VCPU_XIVE_ESC_ON(r9)
cmpwi r5, 0
beq 4f
li r0, 0
stb r0, VCPU_CEDED(r9)
/*
* The escalation interrupts are special as we don't EOI them.
* There is no need to use the load-after-store ordering offset
* to set PQ to 10 as we won't use StoreEOI.
*/
li r6, XIVE_ESB_SET_PQ_10
b 5f
4: li r0, 1
stb r0, VCPU_XIVE_ESC_ON(r9)
/* make sure store to xive_esc_on is seen before xive_esc_irq runs */
sync
5: /* Enable XIVE escalation */
mfmsr r0
andi. r0, r0, MSR_DR /* in real mode? */
beq 1f
ldx r0, r10, r6
b 2f
1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
ldcix r0, r10, r6
2: sync
#endif /* CONFIG_KVM_XICS */
3: b guest_exit_cont
b guest_exit_cont
/* Try to do machine check recovery in real mode */
machine_check_realmode:
......@@ -3030,10 +2449,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
PPC_MSGCLR(6)
/* see if it's a host IPI */
li r3, 1
BEGIN_FTR_SECTION
PPC_MSGSYNC
lwsync
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
bnelr
......@@ -3342,73 +2757,12 @@ kvmppc_bad_host_intr:
std r3, STACK_FRAME_OVERHEAD-16(r1)
/*
* On POWER9 do a minimal restore of the MMU and call C code,
* which will print a message and panic.
* XXX On POWER7 and POWER8, we just spin here since we don't
* know what the other threads are doing (and we don't want to
* coordinate with them) - but at least we now have register state
* in memory that we might be able to look at from another CPU.
*/
BEGIN_FTR_SECTION
b .
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
ld r9, HSTATE_KVM_VCPU(r13)
ld r10, VCPU_KVM(r9)
li r0, 0
mtspr SPRN_AMR, r0
mtspr SPRN_IAMR, r0
mtspr SPRN_CIABR, r0
mtspr SPRN_DAWRX0, r0
BEGIN_FTR_SECTION
mtspr SPRN_DAWRX1, r0
END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
/* Clear hash and radix guest SLB, see guest_exit_short_path comment. */
slbmte r0, r0
PPC_SLBIA(6)
BEGIN_MMU_FTR_SECTION
b 4f
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
ptesync
ld r8, PACA_SLBSHADOWPTR(r13)
.rept SLB_NUM_BOLTED
li r3, SLBSHADOW_SAVEAREA
LDX_BE r5, r8, r3
addi r3, r3, 8
LDX_BE r6, r8, r3
andis. r7, r5, SLB_ESID_V@h
beq 3f
slbmte r6, r5
3: addi r8, r8, 16
.endr
4: lwz r7, KVM_HOST_LPID(r10)
mtspr SPRN_LPID, r7
mtspr SPRN_PID, r0
ld r8, KVM_HOST_LPCR(r10)
mtspr SPRN_LPCR, r8
isync
li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13)
/*
* Turn on the MMU and jump to C code
*/
bcl 20, 31, .+4
5: mflr r3
addi r3, r3, 9f - 5b
li r4, -1
rldimi r3, r4, 62, 0 /* ensure 0xc000000000000000 bits are set */
ld r4, PACAKMSR(r13)
mtspr SPRN_SRR0, r3
mtspr SPRN_SRR1, r4
RFI_TO_KERNEL
9: addi r3, r1, STACK_FRAME_OVERHEAD
bl kvmppc_bad_interrupt
b 9b
/*
* This mimics the MSR transition on IRQ delivery. The new guest MSR is taken
......
......@@ -164,12 +164,15 @@ kvmppc_interrupt_pr:
/* 64-bit entry. Register usage at this point:
*
* SPRG_SCRATCH0 = guest R13
* R9 = HSTATE_IN_GUEST
* R12 = (guest CR << 32) | exit handler id
* R13 = PACA
* HSTATE.SCRATCH0 = guest R12
* HSTATE.SCRATCH2 = guest R9
*/
#ifdef CONFIG_PPC64
/* Match 32-bit entry */
ld r9,HSTATE_SCRATCH2(r13)
rotldi r12, r12, 32 /* Flip R12 halves for stw */
stw r12, HSTATE_SCRATCH1(r13) /* CR is now in the low half */
srdi r12, r12, 32 /* shift trap into low half */
......
......@@ -127,6 +127,71 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
/*
* Pull a vcpu's context from the XIVE on guest exit.
* This assumes we are in virtual mode (MMU on)
*/
void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
{
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
if (!vcpu->arch.xive_pushed)
return;
/*
* Should not have been pushed if there is no tima
*/
if (WARN_ON(!tima))
return;
eieio();
/* First load to pull the context, we ignore the value */
__raw_readl(tima + TM_SPC_PULL_OS_CTX);
/* Second load to recover the context state (Words 0 and 1) */
vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
/* Fixup some of the state for the next load */
vcpu->arch.xive_saved_state.lsmfb = 0;
vcpu->arch.xive_saved_state.ack = 0xff;
vcpu->arch.xive_pushed = 0;
eieio();
}
EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu);
void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu)
{
void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr;
if (!esc_vaddr)
return;
/* we are using XIVE with single escalation */
if (vcpu->arch.xive_esc_on) {
/*
* If we still have a pending escalation, abort the cede,
* and we must set PQ to 10 rather than 00 so that we don't
* potentially end up with two entries for the escalation
* interrupt in the XIVE interrupt queue. In that case
* we also don't want to set xive_esc_on to 1 here in
* case we race with xive_esc_irq().
*/
vcpu->arch.ceded = 0;
/*
* The escalation interrupts are special as we don't EOI them.
* There is no need to use the load-after-store ordering offset
* to set PQ to 10 as we won't use StoreEOI.
*/
__raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_10);
} else {
vcpu->arch.xive_esc_on = true;
mb();
__raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00);
}
mb();
}
EXPORT_SYMBOL_GPL(kvmppc_xive_rearm_escalation);
/*
* This is a simple trigger for a generic XIVE IRQ. This must
* only be called for interrupts that support a trigger page
......@@ -2075,6 +2140,36 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
return 0;
}
int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
/* The VM should have configured XICS mode before doing XICS hcalls. */
if (!kvmppc_xics_enabled(vcpu))
return H_TOO_HARD;
switch (req) {
case H_XIRR:
return xive_vm_h_xirr(vcpu);
case H_CPPR:
return xive_vm_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
case H_EOI:
return xive_vm_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
case H_IPI:
return xive_vm_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5));
case H_IPOLL:
return xive_vm_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
case H_XIRR_X:
xive_vm_h_xirr(vcpu);
kvmppc_set_gpr(vcpu, 5, get_tb() + vc->tb_offset);
return H_SUCCESS;
}
return H_UNSUPPORTED;
}
EXPORT_SYMBOL_GPL(kvmppc_xive_xics_hcall);
int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
......@@ -2257,21 +2352,3 @@ struct kvm_device_ops kvm_xive_ops = {
.get_attr = xive_get_attr,
.has_attr = xive_has_attr,
};
void kvmppc_xive_init_module(void)
{
__xive_vm_h_xirr = xive_vm_h_xirr;
__xive_vm_h_ipoll = xive_vm_h_ipoll;
__xive_vm_h_ipi = xive_vm_h_ipi;
__xive_vm_h_cppr = xive_vm_h_cppr;
__xive_vm_h_eoi = xive_vm_h_eoi;
}
void kvmppc_xive_exit_module(void)
{
__xive_vm_h_xirr = NULL;
__xive_vm_h_ipoll = NULL;
__xive_vm_h_ipi = NULL;
__xive_vm_h_cppr = NULL;
__xive_vm_h_eoi = NULL;
}
......@@ -289,13 +289,6 @@ extern int xive_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
extern int xive_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
extern int xive_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
extern unsigned long (*__xive_vm_h_xirr)(struct kvm_vcpu *vcpu);
extern unsigned long (*__xive_vm_h_ipoll)(struct kvm_vcpu *vcpu, unsigned long server);
extern int (*__xive_vm_h_ipi)(struct kvm_vcpu *vcpu, unsigned long server,
unsigned long mfrr);
extern int (*__xive_vm_h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr);
extern int (*__xive_vm_h_eoi)(struct kvm_vcpu *vcpu, unsigned long xirr);
/*
* Common Xive routines for XICS-over-XIVE and XIVE native
*/
......
......@@ -1281,13 +1281,3 @@ struct kvm_device_ops kvm_xive_native_ops = {
.has_attr = kvmppc_xive_native_has_attr,
.mmap = kvmppc_xive_native_mmap,
};
void kvmppc_xive_native_init_module(void)
{
;
}
void kvmppc_xive_native_exit_module(void)
{
;
}
......@@ -357,30 +357,19 @@ static void __init radix_init_pgtable(void)
}
/* Find out how many PID bits are supported */
if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
if (!mmu_pid_bits)
mmu_pid_bits = 20;
mmu_base_pid = 1;
} else if (cpu_has_feature(CPU_FTR_HVMODE)) {
if (!mmu_pid_bits)
mmu_pid_bits = 20;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
if (!cpu_has_feature(CPU_FTR_HVMODE) &&
cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
/*
* When KVM is possible, we only use the top half of the
* PID space to avoid collisions between host and guest PIDs
* which can cause problems due to prefetch when exiting the
* guest with AIL=3
* Older versions of KVM on these machines perfer if the
* guest only uses the low 19 PID bits.
*/
mmu_base_pid = 1 << (mmu_pid_bits - 1);
#else
mmu_base_pid = 1;
#endif
} else {
/* The guest uses the bottom half of the PID space */
if (!mmu_pid_bits)
mmu_pid_bits = 19;
mmu_base_pid = 1;
} else {
if (!mmu_pid_bits)
mmu_pid_bits = 20;
}
mmu_base_pid = 1;
/*
* Allocate Partition table and process table for the
......
......@@ -1344,49 +1344,3 @@ void radix__flush_tlb_all(void)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
{
unsigned long pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
return;
if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
return;
/*
* If this context hasn't run on that CPU before and KVM is
* around, there's a slim chance that the guest on another
* CPU just brought in obsolete translation into the TLB of
* this CPU due to a bad prefetch using the guest PID on
* the way into the hypervisor.
*
* We work around this here. If KVM is possible, we check if
* any sibling thread is in KVM. If it is, the window may exist
* and thus we flush that PID from the core.
*
* A potential future improvement would be to mark which PIDs
* have never been used on the system and avoid it if the PID
* is new and the process has no other cpumask bit set.
*/
if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
int cpu = smp_processor_id();
int sib = cpu_first_thread_sibling(cpu);
bool flush = false;
for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
if (sib == cpu)
continue;
if (!cpu_possible(sib))
continue;
if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
flush = true;
}
if (flush)
_tlbiel_pid(pid, RIC_FLUSH_ALL);
}
}
EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
......@@ -83,9 +83,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (cpu_has_feature(CPU_FTR_ALTIVEC))
asm volatile ("dssall");
if (new_on_cpu)
radix_kvm_prefetch_workaround(next);
else
if (!new_on_cpu)
membarrier_arch_switch_mm(prev, next, tsk);
/*
......
......@@ -604,7 +604,7 @@ struct p9_sprs {
u64 uamor;
};
static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
static unsigned long power9_idle_stop(unsigned long psscr)
{
int cpu = raw_smp_processor_id();
int first = cpu_first_thread_sibling(cpu);
......@@ -620,8 +620,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
/* EC=ESL=0 case */
BUG_ON(!mmu_on);
/*
* Wake synchronously. SRESET via xscom may still cause
* a 0x100 powersave wakeup with SRR1 reason!
......@@ -803,8 +801,7 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
__slb_restore_bolted_realmode();
out:
if (mmu_on)
mtmsr(MSR_KERNEL);
mtmsr(MSR_KERNEL);
return srr1;
}
......@@ -895,7 +892,7 @@ struct p10_sprs {
*/
};
static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
static unsigned long power10_idle_stop(unsigned long psscr)
{
int cpu = raw_smp_processor_id();
int first = cpu_first_thread_sibling(cpu);
......@@ -909,8 +906,6 @@ static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
/* EC=ESL=0 case */
BUG_ON(!mmu_on);
/*
* Wake synchronously. SRESET via xscom may still cause
* a 0x100 powersave wakeup with SRR1 reason!
......@@ -991,8 +986,7 @@ static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
__slb_restore_bolted_realmode();
out:
if (mmu_on)
mtmsr(MSR_KERNEL);
mtmsr(MSR_KERNEL);
return srr1;
}
......@@ -1002,40 +996,10 @@ static unsigned long arch300_offline_stop(unsigned long psscr)
{
unsigned long srr1;
#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
__ppc64_runlatch_off();
if (cpu_has_feature(CPU_FTR_ARCH_31))
srr1 = power10_idle_stop(psscr, true);
srr1 = power10_idle_stop(psscr);
else
srr1 = power9_idle_stop(psscr, true);
__ppc64_runlatch_on();
#else
/*
* Tell KVM we're entering idle.
* This does not have to be done in real mode because the P9 MMU
* is independent per-thread. Some steppings share radix/hash mode
* between threads, but in that case KVM has a barrier sync in real
* mode before and after switching between radix and hash.
*
* kvm_start_guest must still be called in real mode though, hence
* the false argument.
*/
local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
__ppc64_runlatch_off();
if (cpu_has_feature(CPU_FTR_ARCH_31))
srr1 = power10_idle_stop(psscr, false);
else
srr1 = power9_idle_stop(psscr, false);
__ppc64_runlatch_on();
local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
/* Order setting hwthread_state vs. testing hwthread_req */
smp_mb();
if (local_paca->kvm_hstate.hwthread_req)
srr1 = idle_kvm_start_guest(srr1);
mtmsr(MSR_KERNEL);
#endif
srr1 = power9_idle_stop(psscr);
return srr1;
}
......@@ -1055,9 +1019,9 @@ void arch300_idle_type(unsigned long stop_psscr_val,
__ppc64_runlatch_off();
if (cpu_has_feature(CPU_FTR_ARCH_31))
srr1 = power10_idle_stop(psscr, true);
srr1 = power10_idle_stop(psscr);
else
srr1 = power9_idle_stop(psscr, true);
srr1 = power9_idle_stop(psscr);
__ppc64_runlatch_on();
fini_irq_for_idle_irqsoff();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment