Commit 95a6432c authored by Paul Mackerras's avatar Paul Mackerras Committed by Michael Ellerman

KVM: PPC: Book3S HV: Streamlined guest entry/exit path on P9 for radix guests

This creates an alternative guest entry/exit path which is used for
radix guests on POWER9 systems when we have indep_threads_mode=Y.  In
these circumstances there is exactly one vcpu per vcore and there is
no coordination required between vcpus or vcores; the vcpu can enter
the guest without needing to synchronize with anything else.

The new fast path is implemented almost entirely in C in book3s_hv.c
and runs with the MMU on until the guest is entered.  On guest exit
we use the existing path until the point where we are committed to
exiting the guest (as distinct from handling an interrupt in the
low-level code and returning to the guest) and we have pulled the
guest context from the XIVE.  At that point we check a flag in the
stack frame to see whether we came in via the old path and the new
path; if we came in via the new path then we go back to C code to do
the rest of the process of saving the guest context and restoring the
host context.

The C code is split into separate functions for handling the
OS-accessible state and the hypervisor state, with the idea that the
latter can be replaced by a hypercall when we implement nested
virtualization.
Signed-off-by: default avatarPaul Mackerras <paulus@ozlabs.org>
Reviewed-by: default avatarDavid Gibson <david@gibson.dropbear.id.au>
[mpe: Fix CONFIG_ALTIVEC=n build]
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 53655ddd
......@@ -165,4 +165,6 @@ void kvmhv_load_host_pmu(void);
void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
......@@ -583,6 +583,7 @@ extern int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status);
extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
#else
static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
u32 priority) { return -1; }
......@@ -605,6 +606,7 @@ static inline int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) { retur
static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status) { return -ENODEV; }
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
#endif /* CONFIG_KVM_XIVE */
/*
......
This diff is collapsed.
......@@ -177,6 +177,7 @@ void kvmppc_subcore_enter_guest(void)
local_paca->sibling_subcore_state->in_guest[subcore_id] = 1;
}
EXPORT_SYMBOL_GPL(kvmppc_subcore_enter_guest);
void kvmppc_subcore_exit_guest(void)
{
......@@ -187,6 +188,7 @@ void kvmppc_subcore_exit_guest(void)
local_paca->sibling_subcore_state->in_guest[subcore_id] = 0;
}
EXPORT_SYMBOL_GPL(kvmppc_subcore_exit_guest);
static bool kvmppc_tb_resync_required(void)
{
......
......@@ -47,8 +47,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define NAPPING_NOVCPU 2
/* Stack frame offsets for kvmppc_hv_entry */
#define SFS 160
#define SFS 208
#define STACK_SLOT_TRAP (SFS-4)
#define STACK_SLOT_SHORT_PATH (SFS-8)
#define STACK_SLOT_TID (SFS-16)
#define STACK_SLOT_PSSCR (SFS-24)
#define STACK_SLOT_PID (SFS-32)
......@@ -57,6 +58,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define STACK_SLOT_DAWR (SFS-56)
#define STACK_SLOT_DAWRX (SFS-64)
#define STACK_SLOT_HFSCR (SFS-72)
/* the following is used by the P9 short path */
#define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */
/*
* Call kvmppc_hv_entry in real mode.
......@@ -1020,6 +1023,9 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
no_xive:
#endif /* CONFIG_KVM_XICS */
li r0, 0
stw r0, STACK_SLOT_SHORT_PATH(r1)
deliver_guest_interrupt: /* r4 = vcpu, r13 = paca */
/* Check if we can deliver an external or decrementer interrupt now */
ld r0, VCPU_PENDING_EXC(r4)
......@@ -1034,13 +1040,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
bl kvmppc_guest_entry_inject_int
ld r4, HSTATE_KVM_VCPU(r13)
71:
ld r10, VCPU_PC(r4)
ld r11, VCPU_MSR(r4)
ld r6, VCPU_SRR0(r4)
ld r7, VCPU_SRR1(r4)
mtspr SPRN_SRR0, r6
mtspr SPRN_SRR1, r7
fast_guest_entry_c:
ld r10, VCPU_PC(r4)
ld r11, VCPU_MSR(r4)
/* r11 = vcpu->arch.msr & ~MSR_HV */
rldicl r11, r11, 63 - MSR_HV_LG, 1
rotldi r11, r11, 1 + MSR_HV_LG
......@@ -1117,6 +1124,83 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
HRFI_TO_GUEST
b .
/*
* Enter the guest on a P9 or later system where we have exactly
* one vcpu per vcore and we don't need to go to real mode
* (which implies that host and guest are both using radix MMU mode).
* r3 = vcpu pointer
* Most SPRs and all the VSRs have been loaded already.
*/
_GLOBAL(__kvmhv_vcpu_entry_p9)
EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
mflr r0
std r0, PPC_LR_STKOFF(r1)
stdu r1, -SFS(r1)
li r0, 1
stw r0, STACK_SLOT_SHORT_PATH(r1)
std r3, HSTATE_KVM_VCPU(r13)
mfcr r4
stw r4, SFS+8(r1)
std r1, HSTATE_HOST_R1(r13)
reg = 14
.rept 18
std reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
reg = reg + 1
.endr
reg = 14
.rept 18
ld reg, __VCPU_GPR(reg)(r3)
reg = reg + 1
.endr
mfmsr r10
std r10, HSTATE_HOST_MSR(r13)
mr r4, r3
b fast_guest_entry_c
guest_exit_short_path:
li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13)
reg = 14
.rept 18
std reg, __VCPU_GPR(reg)(r9)
reg = reg + 1
.endr
reg = 14
.rept 18
ld reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
reg = reg + 1
.endr
lwz r4, SFS+8(r1)
mtcr r4
mr r3, r12 /* trap number */
addi r1, r1, SFS
ld r0, PPC_LR_STKOFF(r1)
mtlr r0
/* If we are in real mode, do a rfid to get back to the caller */
mfmsr r4
andi. r5, r4, MSR_IR
bnelr
rldicl r5, r4, 64 - MSR_TS_S_LG, 62 /* extract TS field */
mtspr SPRN_SRR0, r0
ld r10, HSTATE_HOST_MSR(r13)
rldimi r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG
mtspr SPRN_SRR1, r10
RFI_TO_KERNEL
b .
secondary_too_late:
li r12, 0
stw r12, STACK_SLOT_TRAP(r1)
......@@ -1377,6 +1461,11 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
1:
#endif /* CONFIG_KVM_XICS */
/* If we came in through the P9 short path, go back out to C now */
lwz r0, STACK_SLOT_SHORT_PATH(r1)
cmpwi r0, 0
bne guest_exit_short_path
/* For hash guest, read the guest SLB and save it away */
ld r5, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r5)
......
......@@ -61,6 +61,69 @@
*/
#define XIVE_Q_GAP 2
/*
* Push a vcpu's context to the XIVE on guest entry.
* This assumes we are in virtual mode (MMU on)
*/
void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
{
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
u64 pq;
if (!tima)
return;
eieio();
__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
vcpu->arch.xive_pushed = 1;
eieio();
/*
* We clear the irq_pending flag. There is a small chance of a
* race vs. the escalation interrupt happening on another
* processor setting it again, but the only consequence is to
* cause a spurious wakeup on the next H_CEDE, which is not an
* issue.
*/
vcpu->arch.irq_pending = 0;
/*
* In single escalation mode, if the escalation interrupt is
* on, we mask it.
*/
if (vcpu->arch.xive_esc_on) {
pq = __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
XIVE_ESB_SET_PQ_01));
mb();
/*
* We have a possible subtle race here: The escalation
* interrupt might have fired and be on its way to the
* host queue while we mask it, and if we unmask it
* early enough (re-cede right away), there is a
* theorical possibility that it fires again, thus
* landing in the target queue more than once which is
* a big no-no.
*
* Fortunately, solving this is rather easy. If the
* above load setting PQ to 01 returns a previous
* value where P is set, then we know the escalation
* interrupt is somewhere on its way to the host. In
* that case we simply don't clear the xive_esc_on
* flag below. It will be eventually cleared by the
* handler for the escalation interrupt.
*
* Then, when doing a cede, we check that flag again
* before re-enabling the escalation interrupt, and if
* set, we abort the cede.
*/
if (!(pq & XIVE_ESB_VAL_P))
/* Now P is 0, we can clear the flag */
vcpu->arch.xive_esc_on = 0;
}
}
EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
/*
* This is a simple trigger for a generic XIVE IRQ. This must
* only be called for interrupts that support a trigger page
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment