Commit b4deba5c authored by Paul Mackerras's avatar Paul Mackerras Committed by Alexander Graf

KVM: PPC: Book3S HV: Implement dynamic micro-threading on POWER8

This builds on the ability to run more than one vcore on a physical
core by using the micro-threading (split-core) modes of the POWER8
chip.  Previously, only vcores from the same VM could be run together,
and (on POWER8) only if they had just one thread per core.  With the
ability to split the core on guest entry and unsplit it on guest exit,
we can run up to 8 vcpu threads from up to 4 different VMs, and we can
run multiple vcores with 2 or 4 vcpus per vcore.

Dynamic micro-threading is only available if the static configuration
of the cores is whole-core mode (unsplit), and only on POWER8.

To manage this, we introduce a new kvm_split_mode struct which is
shared across all of the subcores in the core, with a pointer in the
paca on each thread.  In addition we extend the core_info struct to
have information on each subcore.  When deciding whether to add a
vcore to the set already on the core, we now have two possibilities:
(a) piggyback the vcore onto an existing subcore, or (b) start a new
subcore.

Currently, when any vcpu needs to exit the guest and switch to host
virtual mode, we interrupt all the threads in all subcores and switch
the core back to whole-core mode.  It may be possible in future to
allow some of the subcores to keep executing in the guest while
subcore 0 switches to the host, but that is not implemented in this
patch.

This adds a module parameter called dynamic_mt_modes which controls
which micro-threading (split-core) modes the code will consider, as a
bitmap.  In other words, if it is 0, no micro-threading mode is
considered; if it is 2, only 2-way micro-threading is considered; if
it is 4, only 4-way, and if it is 6, both 2-way and 4-way
micro-threading mode will be considered.  The default is 6.

With this, we now have secondary threads which are the primary thread
for their subcore and therefore need to do the MMU switch.  These
threads will need to be started even if they have no vcpu to run, so
we use the vcore pointer in the PACA rather than the vcpu pointer to
trigger them.

It is now possible for thread 0 to find that an exit has been
requested before it gets to switch the subcore state to the guest.  In
that case we haven't added the guest's timebase offset to the
timebase, so we need to be careful not to subtract the offset in the
guest exit path.  In fact we just skip the whole path that switches
back to host context, since we haven't switched to the guest context.
Signed-off-by: default avatarPaul Mackerras <paulus@samba.org>
Signed-off-by: default avatarAlexander Graf <agraf@suse.de>
parent ec257165
...@@ -25,6 +25,12 @@ ...@@ -25,6 +25,12 @@
#define XICS_MFRR 0xc #define XICS_MFRR 0xc
#define XICS_IPI 2 /* interrupt source # for IPIs */ #define XICS_IPI 2 /* interrupt source # for IPIs */
/* Maximum number of threads per physical core */
#define MAX_SMT_THREADS 8
/* Maximum number of subcores per physical core */
#define MAX_SUBCORES 4
#ifdef __ASSEMBLY__ #ifdef __ASSEMBLY__
#ifdef CONFIG_KVM_BOOK3S_HANDLER #ifdef CONFIG_KVM_BOOK3S_HANDLER
...@@ -65,6 +71,19 @@ kvmppc_resume_\intno: ...@@ -65,6 +71,19 @@ kvmppc_resume_\intno:
#else /*__ASSEMBLY__ */ #else /*__ASSEMBLY__ */
struct kvmppc_vcore;
/* Struct used for coordinating micro-threading (split-core) mode changes */
struct kvm_split_mode {
unsigned long rpr;
unsigned long pmmar;
unsigned long ldbar;
u8 subcore_size;
u8 do_nap;
u8 napped[MAX_SMT_THREADS];
struct kvmppc_vcore *master_vcs[MAX_SUBCORES];
};
/* /*
* This struct goes in the PACA on 64-bit processors. It is used * This struct goes in the PACA on 64-bit processors. It is used
* to store host state that needs to be saved when we enter a guest * to store host state that needs to be saved when we enter a guest
...@@ -100,6 +119,7 @@ struct kvmppc_host_state { ...@@ -100,6 +119,7 @@ struct kvmppc_host_state {
u64 host_spurr; u64 host_spurr;
u64 host_dscr; u64 host_dscr;
u64 dec_expires; u64 dec_expires;
struct kvm_split_mode *kvm_split_mode;
#endif #endif
#ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_PPC_BOOK3S_64
u64 cfar; u64 cfar;
......
...@@ -302,6 +302,9 @@ struct kvmppc_vcore { ...@@ -302,6 +302,9 @@ struct kvmppc_vcore {
#define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8) #define VCORE_EXIT_MAP(vc) ((vc)->entry_exit_map >> 8)
#define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0) #define VCORE_IS_EXITING(vc) (VCORE_EXIT_MAP(vc) != 0)
/* This bit is used when a vcore exit is triggered from outside the vcore */
#define VCORE_EXIT_REQ 0x10000
/* /*
* Values for vcore_state. * Values for vcore_state.
* Note that these are arranged such that lower values * Note that these are arranged such that lower values
......
...@@ -676,7 +676,14 @@ int main(void) ...@@ -676,7 +676,14 @@ int main(void)
HSTATE_FIELD(HSTATE_DSCR, host_dscr); HSTATE_FIELD(HSTATE_DSCR, host_dscr);
HSTATE_FIELD(HSTATE_DABR, dabr); HSTATE_FIELD(HSTATE_DABR, dabr);
HSTATE_FIELD(HSTATE_DECEXP, dec_expires); HSTATE_FIELD(HSTATE_DECEXP, dec_expires);
HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode);
DEFINE(IPI_PRIORITY, IPI_PRIORITY); DEFINE(IPI_PRIORITY, IPI_PRIORITY);
DEFINE(KVM_SPLIT_RPR, offsetof(struct kvm_split_mode, rpr));
DEFINE(KVM_SPLIT_PMMAR, offsetof(struct kvm_split_mode, pmmar));
DEFINE(KVM_SPLIT_LDBAR, offsetof(struct kvm_split_mode, ldbar));
DEFINE(KVM_SPLIT_SIZE, offsetof(struct kvm_split_mode, subcore_size));
DEFINE(KVM_SPLIT_DO_NAP, offsetof(struct kvm_split_mode, do_nap));
DEFINE(KVM_SPLIT_NAPPED, offsetof(struct kvm_split_mode, napped));
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
#ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_PPC_BOOK3S_64
......
This diff is collapsed.
...@@ -239,7 +239,8 @@ void kvmhv_commence_exit(int trap) ...@@ -239,7 +239,8 @@ void kvmhv_commence_exit(int trap)
{ {
struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore; struct kvmppc_vcore *vc = local_paca->kvm_hstate.kvm_vcore;
int ptid = local_paca->kvm_hstate.ptid; int ptid = local_paca->kvm_hstate.ptid;
int me, ee; struct kvm_split_mode *sip = local_paca->kvm_hstate.kvm_split_mode;
int me, ee, i;
/* Set our bit in the threads-exiting-guest map in the 0xff00 /* Set our bit in the threads-exiting-guest map in the 0xff00
bits of vcore->entry_exit_map */ bits of vcore->entry_exit_map */
...@@ -259,4 +260,26 @@ void kvmhv_commence_exit(int trap) ...@@ -259,4 +260,26 @@ void kvmhv_commence_exit(int trap)
*/ */
if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER) if (trap != BOOK3S_INTERRUPT_HV_DECREMENTER)
kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid)); kvmhv_interrupt_vcore(vc, ee & ~(1 << ptid));
/*
* If we are doing dynamic micro-threading, interrupt the other
* subcores to pull them out of their guests too.
*/
if (!sip)
return;
for (i = 0; i < MAX_SUBCORES; ++i) {
vc = sip->master_vcs[i];
if (!vc)
break;
do {
ee = vc->entry_exit_map;
/* Already asked to exit? */
if ((ee >> 8) != 0)
break;
} while (cmpxchg(&vc->entry_exit_map, ee,
ee | VCORE_EXIT_REQ) != ee);
if ((ee >> 8) == 0)
kvmhv_interrupt_vcore(vc, ee);
}
} }
...@@ -128,6 +128,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -128,6 +128,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
subf r4, r4, r3 subf r4, r4, r3
mtspr SPRN_DEC, r4 mtspr SPRN_DEC, r4
/* hwthread_req may have got set by cede or no vcpu, so clear it */
li r0, 0
stb r0, HSTATE_HWTHREAD_REQ(r13)
/* /*
* For external and machine check interrupts, we need * For external and machine check interrupts, we need
* to call the Linux handler to process the interrupt. * to call the Linux handler to process the interrupt.
...@@ -215,7 +219,6 @@ kvm_novcpu_wakeup: ...@@ -215,7 +219,6 @@ kvm_novcpu_wakeup:
ld r5, HSTATE_KVM_VCORE(r13) ld r5, HSTATE_KVM_VCORE(r13)
li r0, 0 li r0, 0
stb r0, HSTATE_NAPPING(r13) stb r0, HSTATE_NAPPING(r13)
stb r0, HSTATE_HWTHREAD_REQ(r13)
/* check the wake reason */ /* check the wake reason */
bl kvmppc_check_wake_reason bl kvmppc_check_wake_reason
...@@ -315,10 +318,10 @@ kvm_start_guest: ...@@ -315,10 +318,10 @@ kvm_start_guest:
cmpdi r3, 0 cmpdi r3, 0
bge kvm_no_guest bge kvm_no_guest
/* get vcpu pointer, NULL if we have no vcpu to run */ /* get vcore pointer, NULL if we have nothing to run */
ld r4,HSTATE_KVM_VCPU(r13) ld r5,HSTATE_KVM_VCORE(r13)
cmpdi r4,0 cmpdi r5,0
/* if we have no vcpu to run, go back to sleep */ /* if we have no vcore to run, go back to sleep */
beq kvm_no_guest beq kvm_no_guest
kvm_secondary_got_guest: kvm_secondary_got_guest:
...@@ -327,21 +330,42 @@ kvm_secondary_got_guest: ...@@ -327,21 +330,42 @@ kvm_secondary_got_guest:
ld r6, PACA_DSCR_DEFAULT(r13) ld r6, PACA_DSCR_DEFAULT(r13)
std r6, HSTATE_DSCR(r13) std r6, HSTATE_DSCR(r13)
/* Order load of vcore, ptid etc. after load of vcpu */ /* On thread 0 of a subcore, set HDEC to max */
lbz r4, HSTATE_PTID(r13)
cmpwi r4, 0
bne 63f
lis r6, 0x7fff
ori r6, r6, 0xffff
mtspr SPRN_HDEC, r6
/* and set per-LPAR registers, if doing dynamic micro-threading */
ld r6, HSTATE_SPLIT_MODE(r13)
cmpdi r6, 0
beq 63f
ld r0, KVM_SPLIT_RPR(r6)
mtspr SPRN_RPR, r0
ld r0, KVM_SPLIT_PMMAR(r6)
mtspr SPRN_PMMAR, r0
ld r0, KVM_SPLIT_LDBAR(r6)
mtspr SPRN_LDBAR, r0
isync
63:
/* Order load of vcpu after load of vcore */
lwsync lwsync
ld r4, HSTATE_KVM_VCPU(r13)
bl kvmppc_hv_entry bl kvmppc_hv_entry
/* Back from the guest, go back to nap */ /* Back from the guest, go back to nap */
/* Clear our vcpu pointer so we don't come back in early */ /* Clear our vcpu and vcore pointers so we don't come back in early */
li r0, 0 li r0, 0
std r0, HSTATE_KVM_VCPU(r13)
/* /*
* Once we clear HSTATE_KVM_VCPU(r13), the code in * Once we clear HSTATE_KVM_VCORE(r13), the code in
* kvmppc_run_core() is going to assume that all our vcpu * kvmppc_run_core() is going to assume that all our vcpu
* state is visible in memory. This lwsync makes sure * state is visible in memory. This lwsync makes sure
* that that is true. * that that is true.
*/ */
lwsync lwsync
std r0, HSTATE_KVM_VCPU(r13) std r0, HSTATE_KVM_VCORE(r13)
/* /*
* At this point we have finished executing in the guest. * At this point we have finished executing in the guest.
...@@ -374,16 +398,63 @@ kvm_no_guest: ...@@ -374,16 +398,63 @@ kvm_no_guest:
b power7_wakeup_loss b power7_wakeup_loss
53: HMT_LOW 53: HMT_LOW
ld r4, HSTATE_KVM_VCPU(r13) ld r5, HSTATE_KVM_VCORE(r13)
cmpdi r4, 0 cmpdi r5, 0
bne 60f
ld r3, HSTATE_SPLIT_MODE(r13)
cmpdi r3, 0
beq kvm_no_guest
lbz r0, KVM_SPLIT_DO_NAP(r3)
cmpwi r0, 0
beq kvm_no_guest beq kvm_no_guest
HMT_MEDIUM HMT_MEDIUM
b kvm_unsplit_nap
60: HMT_MEDIUM
b kvm_secondary_got_guest b kvm_secondary_got_guest
54: li r0, KVM_HWTHREAD_IN_KVM 54: li r0, KVM_HWTHREAD_IN_KVM
stb r0, HSTATE_HWTHREAD_STATE(r13) stb r0, HSTATE_HWTHREAD_STATE(r13)
b kvm_no_guest b kvm_no_guest
/*
* Here the primary thread is trying to return the core to
* whole-core mode, so we need to nap.
*/
kvm_unsplit_nap:
/* clear any pending message */
BEGIN_FTR_SECTION
lis r6, (PPC_DBELL_SERVER << (63-36))@h
PPC_MSGCLR(6)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
/* Set kvm_split_mode.napped[tid] = 1 */
ld r3, HSTATE_SPLIT_MODE(r13)
li r0, 1
lhz r4, PACAPACAINDEX(r13)
clrldi r4, r4, 61 /* micro-threading => P8 => 8 threads/core */
addi r4, r4, KVM_SPLIT_NAPPED
stbx r0, r3, r4
/* Check the do_nap flag again after setting napped[] */
sync
lbz r0, KVM_SPLIT_DO_NAP(r3)
cmpwi r0, 0
beq 57f
li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
mfspr r4, SPRN_LPCR
rlwimi r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
mtspr SPRN_LPCR, r4
isync
std r0, HSTATE_SCRATCH0(r13)
ptesync
ld r0, HSTATE_SCRATCH0(r13)
1: cmpd r0, r0
bne 1b
nap
b .
57: li r0, 0
stbx r0, r3, r4
b kvm_no_guest
/****************************************************************************** /******************************************************************************
* * * *
* Entry code * * Entry code *
...@@ -854,7 +925,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) ...@@ -854,7 +925,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
cmpwi r0, 0 cmpwi r0, 0
bne 21f bne 21f
HMT_LOW HMT_LOW
20: lbz r0, VCORE_IN_GUEST(r5) 20: lwz r3, VCORE_ENTRY_EXIT(r5)
cmpwi r3, 0x100
bge no_switch_exit
lbz r0, VCORE_IN_GUEST(r5)
cmpwi r0, 0 cmpwi r0, 0
beq 20b beq 20b
HMT_MEDIUM HMT_MEDIUM
...@@ -985,9 +1059,13 @@ secondary_too_late: ...@@ -985,9 +1059,13 @@ secondary_too_late:
#endif #endif
11: b kvmhv_switch_to_host 11: b kvmhv_switch_to_host
no_switch_exit:
HMT_MEDIUM
li r12, 0
b 12f
hdec_soon: hdec_soon:
li r12, BOOK3S_INTERRUPT_HV_DECREMENTER li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
stw r12, VCPU_TRAP(r4) 12: stw r12, VCPU_TRAP(r4)
mr r9, r4 mr r9, r4
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
addi r3, r4, VCPU_TB_RMEXIT addi r3, r4, VCPU_TB_RMEXIT
...@@ -1545,12 +1623,17 @@ kvmhv_switch_to_host: ...@@ -1545,12 +1623,17 @@ kvmhv_switch_to_host:
/* Primary thread waits for all the secondaries to exit guest */ /* Primary thread waits for all the secondaries to exit guest */
15: lwz r3,VCORE_ENTRY_EXIT(r5) 15: lwz r3,VCORE_ENTRY_EXIT(r5)
srwi r0,r3,8 rlwinm r0,r3,32-8,0xff
clrldi r3,r3,56 clrldi r3,r3,56
cmpw r3,r0 cmpw r3,r0
bne 15b bne 15b
isync isync
/* Did we actually switch to the guest at all? */
lbz r6, VCORE_IN_GUEST(r5)
cmpwi r6, 0
beq 19f
/* Primary thread switches back to host partition */ /* Primary thread switches back to host partition */
ld r6,KVM_HOST_SDR1(r4) ld r6,KVM_HOST_SDR1(r4)
lwz r7,KVM_HOST_LPID(r4) lwz r7,KVM_HOST_LPID(r4)
...@@ -1594,7 +1677,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) ...@@ -1594,7 +1677,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
18: 18:
/* Signal secondary CPUs to continue */ /* Signal secondary CPUs to continue */
stb r0,VCORE_IN_GUEST(r5) stb r0,VCORE_IN_GUEST(r5)
lis r8,0x7fff /* MAX_INT@h */ 19: lis r8,0x7fff /* MAX_INT@h */
mtspr SPRN_HDEC,r8 mtspr SPRN_HDEC,r8
16: ld r8,KVM_HOST_LPCR(r4) 16: ld r8,KVM_HOST_LPCR(r4)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment