Commit 7924bd41 authored by Hollis Blanchard's avatar Hollis Blanchard Committed by Avi Kivity

KVM: ppc: directly insert shadow mappings into the hardware TLB

Formerly, we used to maintain a per-vcpu shadow TLB and on every entry to the
guest would load this array into the hardware TLB. This consumed 1280 bytes of
memory (64 entries of 16 bytes plus a struct page pointer each), and also
required some assembly to loop over the array on every entry.

Instead of saving a copy in memory, we can just store shadow mappings directly
into the hardware TLB, accepting that the host kernel will clobber these as
part of the normal 440 TLB round robin. When we do that we need less than half
the memory, and we have decreased the exit handling time for all guest exits,
at the cost of increased number of TLB misses because the host overwrites some
guest entries.

These savings will be increased on processors with larger TLBs or which
implement intelligent flush instructions like tlbivax (which will avoid the
need to walk arrays in software).

In addition to that and to the code simplification, we have a greater chance of
leaving other host userspace mappings in the TLB, instead of forcing all
subsequent tasks to re-fault all their mappings.
Signed-off-by: default avatarHollis Blanchard <hollisb@us.ibm.com>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent c0ca609c
...@@ -22,19 +22,25 @@ ...@@ -22,19 +22,25 @@
#include <linux/kvm_host.h> #include <linux/kvm_host.h>
/* XXX Can't include mmu-44x.h because it redefines struct mm_context. */
#define PPC44x_TLB_SIZE 64 #define PPC44x_TLB_SIZE 64
/* If the guest is expecting it, this can be as large as we like; we'd just
* need to find some way of advertising it. */
#define KVM44x_GUEST_TLB_SIZE 64
struct kvmppc_44x_shadow_ref {
struct page *page;
u16 gtlb_index;
u8 writeable;
u8 tid;
};
struct kvmppc_vcpu_44x { struct kvmppc_vcpu_44x {
/* Unmodified copy of the guest's TLB. */ /* Unmodified copy of the guest's TLB. */
struct kvmppc_44x_tlbe guest_tlb[PPC44x_TLB_SIZE]; struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE];
/* TLB that's actually used when the guest is running. */
struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE]; /* References to guest pages in the hardware TLB. */
/* Pages which are referenced in the shadow TLB. */ struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
struct page *shadow_pages[PPC44x_TLB_SIZE];
/* Track which TLB entries we've modified in the current exit. */
u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
struct kvm_vcpu vcpu; struct kvm_vcpu vcpu;
}; };
......
...@@ -53,7 +53,8 @@ extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); ...@@ -53,7 +53,8 @@ extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
u64 asid, u32 flags, u32 max_bytes); u64 asid, u32 flags, u32 max_bytes,
unsigned int gtlb_idx);
extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode);
extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid);
......
...@@ -359,12 +359,6 @@ int main(void) ...@@ -359,12 +359,6 @@ int main(void)
#ifdef CONFIG_KVM #ifdef CONFIG_KVM
DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe)); DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe));
DEFINE(VCPU_TO_44X, offsetof(struct kvmppc_vcpu_44x, vcpu));
DEFINE(VCPU44x_SHADOW_TLB,
offsetof(struct kvmppc_vcpu_44x, shadow_tlb));
DEFINE(VCPU44x_SHADOW_MOD,
offsetof(struct kvmppc_vcpu_44x, shadow_tlb_mod));
DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack));
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
......
...@@ -96,21 +96,14 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu) ...@@ -96,21 +96,14 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{ {
int i;
/* Mark every guest entry in the shadow TLB entry modified, so that they
* will all be reloaded on the next vcpu run (instead of being
* demand-faulted). */
for (i = 0; i <= tlb_44x_hwater; i++)
kvmppc_tlbe_set_modified(vcpu, i);
} }
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
{ {
/* Don't leave guest TLB entries resident when being de-scheduled. */ /* XXX Since every guest uses TS=1 TID=0/1 mappings, we can't leave any TLB
/* XXX It would be nice to differentiate between heavyweight exit and * entries around when we're descheduled, so we must completely flush the
* sched_out here, since we could avoid the TLB flush for heavyweight * TLB of all guest mappings. On the other hand, if there is only one
* exits. */ * guest, this flush is completely unnecessary. */
_tlbia(); _tlbia();
} }
...@@ -130,6 +123,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) ...@@ -130,6 +123,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
{ {
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0]; struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0];
int i;
tlbe->tid = 0; tlbe->tid = 0;
tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
...@@ -148,6 +142,9 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) ...@@ -148,6 +142,9 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
* CCR1[TCS]. */ * CCR1[TCS]. */
vcpu->arch.ccr1 = mfspr(SPRN_CCR1); vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
vcpu_44x->shadow_refs[i].gtlb_index = -1;
return 0; return 0;
} }
......
This diff is collapsed.
...@@ -25,11 +25,8 @@ ...@@ -25,11 +25,8 @@
extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
unsigned int pid, unsigned int as); unsigned int pid, unsigned int as);
extern struct kvmppc_44x_tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, extern int kvmppc_44x_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
gva_t eaddr); extern int kvmppc_44x_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr);
extern struct kvmppc_44x_tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu,
gva_t eaddr);
extern void kvmppc_tlbe_set_modified(struct kvm_vcpu *vcpu, unsigned int i);
extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
u8 rc); u8 rc);
......
...@@ -24,10 +24,12 @@ ...@@ -24,10 +24,12 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include <linux/fs.h> #include <linux/fs.h>
#include <asm/cputable.h> #include <asm/cputable.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/kvm_ppc.h> #include <asm/kvm_ppc.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/kvm_44x.h>
#include "booke.h" #include "booke.h"
#include "44x_tlb.h" #include "44x_tlb.h"
...@@ -207,10 +209,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -207,10 +209,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* handled this interrupt the moment we enabled interrupts. * handled this interrupt the moment we enabled interrupts.
* Now we just offer it a chance to reschedule the guest. */ * Now we just offer it a chance to reschedule the guest. */
/* XXX At this point the TLB still holds our shadow TLB, so if
* we do reschedule the host will fault over it. Perhaps we
* should politely restore the host's entries to minimize
* misses before ceding control. */
vcpu->stat.dec_exits++; vcpu->stat.dec_exits++;
if (need_resched()) if (need_resched())
cond_resched(); cond_resched();
...@@ -281,14 +279,17 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -281,14 +279,17 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST; r = RESUME_GUEST;
break; break;
/* XXX move to a 440-specific file. */
case BOOKE_INTERRUPT_DTLB_MISS: { case BOOKE_INTERRUPT_DTLB_MISS: {
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
struct kvmppc_44x_tlbe *gtlbe; struct kvmppc_44x_tlbe *gtlbe;
unsigned long eaddr = vcpu->arch.fault_dear; unsigned long eaddr = vcpu->arch.fault_dear;
int gtlb_index;
gfn_t gfn; gfn_t gfn;
/* Check the guest TLB. */ /* Check the guest TLB. */
gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr); gtlb_index = kvmppc_44x_dtlb_index(vcpu, eaddr);
if (!gtlbe) { if (gtlb_index < 0) {
/* The guest didn't have a mapping for it. */ /* The guest didn't have a mapping for it. */
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS); kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
vcpu->arch.dear = vcpu->arch.fault_dear; vcpu->arch.dear = vcpu->arch.fault_dear;
...@@ -298,6 +299,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -298,6 +299,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break; break;
} }
gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr); vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr);
gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT; gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT;
...@@ -309,7 +311,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -309,7 +311,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* Either way, we need to satisfy the fault without * Either way, we need to satisfy the fault without
* invoking the guest. */ * invoking the guest. */
kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid, kvmppc_mmu_map(vcpu, eaddr, vcpu->arch.paddr_accessed, gtlbe->tid,
gtlbe->word2, get_tlb_bytes(gtlbe)); gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
vcpu->stat.dtlb_virt_miss_exits++; vcpu->stat.dtlb_virt_miss_exits++;
r = RESUME_GUEST; r = RESUME_GUEST;
} else { } else {
...@@ -322,17 +324,20 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -322,17 +324,20 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break; break;
} }
/* XXX move to a 440-specific file. */
case BOOKE_INTERRUPT_ITLB_MISS: { case BOOKE_INTERRUPT_ITLB_MISS: {
struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
struct kvmppc_44x_tlbe *gtlbe; struct kvmppc_44x_tlbe *gtlbe;
unsigned long eaddr = vcpu->arch.pc; unsigned long eaddr = vcpu->arch.pc;
gpa_t gpaddr; gpa_t gpaddr;
gfn_t gfn; gfn_t gfn;
int gtlb_index;
r = RESUME_GUEST; r = RESUME_GUEST;
/* Check the guest TLB. */ /* Check the guest TLB. */
gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr); gtlb_index = kvmppc_44x_itlb_index(vcpu, eaddr);
if (!gtlbe) { if (gtlb_index < 0) {
/* The guest didn't have a mapping for it. */ /* The guest didn't have a mapping for it. */
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS); kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
vcpu->stat.itlb_real_miss_exits++; vcpu->stat.itlb_real_miss_exits++;
...@@ -341,6 +346,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -341,6 +346,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->stat.itlb_virt_miss_exits++; vcpu->stat.itlb_virt_miss_exits++;
gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
gpaddr = tlb_xlate(gtlbe, eaddr); gpaddr = tlb_xlate(gtlbe, eaddr);
gfn = gpaddr >> PAGE_SHIFT; gfn = gpaddr >> PAGE_SHIFT;
...@@ -352,7 +358,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, ...@@ -352,7 +358,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
* Either way, we need to satisfy the fault without * Either way, we need to satisfy the fault without
* invoking the guest. */ * invoking the guest. */
kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid, kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlbe->tid,
gtlbe->word2, get_tlb_bytes(gtlbe)); gtlbe->word2, get_tlb_bytes(gtlbe), gtlb_index);
} else { } else {
/* Guest mapped and leaped at non-RAM! */ /* Guest mapped and leaped at non-RAM! */
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK); kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_MACHINE_CHECK);
......
...@@ -335,54 +335,6 @@ lightweight_exit: ...@@ -335,54 +335,6 @@ lightweight_exit:
lwz r3, VCPU_SHADOW_PID(r4) lwz r3, VCPU_SHADOW_PID(r4)
mtspr SPRN_PID, r3 mtspr SPRN_PID, r3
/* Prevent all asynchronous TLB updates. */
mfmsr r5
lis r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@h
ori r6, r6, (MSR_EE|MSR_CE|MSR_ME|MSR_DE)@l
andc r6, r5, r6
mtmsr r6
/* Load the guest mappings, leaving the host's "pinned" kernel mappings
* in place. */
mfspr r10, SPRN_MMUCR /* Save host MMUCR. */
li r5, PPC44x_TLB_SIZE
lis r5, tlb_44x_hwater@ha
lwz r5, tlb_44x_hwater@l(r5)
mtctr r5
addi r9, r4, -VCPU_TO_44X + VCPU44x_SHADOW_TLB
addi r5, r4, -VCPU_TO_44X + VCPU44x_SHADOW_MOD
li r3, 0
1:
lbzx r7, r3, r5
cmpwi r7, 0
beq 3f
/* Load guest entry. */
mulli r11, r3, TLBE_BYTES
add r11, r11, r9
lwz r7, 0(r11)
mtspr SPRN_MMUCR, r7
lwz r7, 4(r11)
tlbwe r7, r3, PPC44x_TLB_PAGEID
lwz r7, 8(r11)
tlbwe r7, r3, PPC44x_TLB_XLAT
lwz r7, 12(r11)
tlbwe r7, r3, PPC44x_TLB_ATTRIB
3:
addi r3, r3, 1 /* Increment index. */
bdnz 1b
mtspr SPRN_MMUCR, r10 /* Restore host MMUCR. */
/* Clear bitmap of modified TLB entries */
li r5, PPC44x_TLB_SIZE>>2
mtctr r5
addi r5, r4, -VCPU_TO_44X + VCPU44x_SHADOW_MOD - 4
li r6, 0
1:
stwu r6, 4(r5)
bdnz 1b
iccci 0, 0 /* XXX hack */ iccci 0, 0 /* XXX hack */
/* Load some guest volatiles. */ /* Load some guest volatiles. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment