Commit ab9cf499 authored by Michael S. Tsirkin's avatar Michael S. Tsirkin Committed by Avi Kivity

KVM guest: guest side for eoi avoidance

The idea is simple: there's a bit, per APIC, in guest memory,
that tells the guest that it does not need EOI.
Guest tests it using a single est and clear operation - this is
necessary so that host can detect interrupt nesting - and if set, it can
skip the EOI MSR.

I run a simple microbenchmark to show exit reduction
(note: for testing, need to apply follow-up patch
'kvm: host side for eoi optimization' + a qemu patch
 I posted separately, on host):

Before:

Performance counter stats for 'sleep 1s':

            47,357 kvm:kvm_entry                                                [99.98%]
                 0 kvm:kvm_hypercall                                            [99.98%]
                 0 kvm:kvm_hv_hypercall                                         [99.98%]
             5,001 kvm:kvm_pio                                                  [99.98%]
                 0 kvm:kvm_cpuid                                                [99.98%]
            22,124 kvm:kvm_apic                                                 [99.98%]
            49,849 kvm:kvm_exit                                                 [99.98%]
            21,115 kvm:kvm_inj_virq                                             [99.98%]
                 0 kvm:kvm_inj_exception                                        [99.98%]
                 0 kvm:kvm_page_fault                                           [99.98%]
            22,937 kvm:kvm_msr                                                  [99.98%]
                 0 kvm:kvm_cr                                                   [99.98%]
                 0 kvm:kvm_pic_set_irq                                          [99.98%]
                 0 kvm:kvm_apic_ipi                                             [99.98%]
            22,207 kvm:kvm_apic_accept_irq                                      [99.98%]
            22,421 kvm:kvm_eoi                                                  [99.98%]
                 0 kvm:kvm_pv_eoi                                               [99.99%]
                 0 kvm:kvm_nested_vmrun                                         [99.99%]
                 0 kvm:kvm_nested_intercepts                                    [99.99%]
                 0 kvm:kvm_nested_vmexit                                        [99.99%]
                 0 kvm:kvm_nested_vmexit_inject                                    [99.99%]
                 0 kvm:kvm_nested_intr_vmexit                                    [99.99%]
                 0 kvm:kvm_invlpga                                              [99.99%]
                 0 kvm:kvm_skinit                                               [99.99%]
                57 kvm:kvm_emulate_insn                                         [99.99%]
                 0 kvm:vcpu_match_mmio                                          [99.99%]
                 0 kvm:kvm_userspace_exit                                       [99.99%]
                 2 kvm:kvm_set_irq                                              [99.99%]
                 2 kvm:kvm_ioapic_set_irq                                       [99.99%]
            23,609 kvm:kvm_msi_set_irq                                          [99.99%]
                 1 kvm:kvm_ack_irq                                              [99.99%]
               131 kvm:kvm_mmio                                                 [99.99%]
               226 kvm:kvm_fpu                                                  [100.00%]
                 0 kvm:kvm_age_page                                             [100.00%]
                 0 kvm:kvm_try_async_get_page                                    [100.00%]
                 0 kvm:kvm_async_pf_doublefault                                    [100.00%]
                 0 kvm:kvm_async_pf_not_present                                    [100.00%]
                 0 kvm:kvm_async_pf_ready                                       [100.00%]
                 0 kvm:kvm_async_pf_completed

       1.002100578 seconds time elapsed

After:

 Performance counter stats for 'sleep 1s':

            28,354 kvm:kvm_entry                                                [99.98%]
                 0 kvm:kvm_hypercall                                            [99.98%]
                 0 kvm:kvm_hv_hypercall                                         [99.98%]
             1,347 kvm:kvm_pio                                                  [99.98%]
                 0 kvm:kvm_cpuid                                                [99.98%]
             1,931 kvm:kvm_apic                                                 [99.98%]
            29,595 kvm:kvm_exit                                                 [99.98%]
            24,884 kvm:kvm_inj_virq                                             [99.98%]
                 0 kvm:kvm_inj_exception                                        [99.98%]
                 0 kvm:kvm_page_fault                                           [99.98%]
             1,986 kvm:kvm_msr                                                  [99.98%]
                 0 kvm:kvm_cr                                                   [99.98%]
                 0 kvm:kvm_pic_set_irq                                          [99.98%]
                 0 kvm:kvm_apic_ipi                                             [99.99%]
            25,953 kvm:kvm_apic_accept_irq                                      [99.99%]
            26,132 kvm:kvm_eoi                                                  [99.99%]
            26,593 kvm:kvm_pv_eoi                                               [99.99%]
                 0 kvm:kvm_nested_vmrun                                         [99.99%]
                 0 kvm:kvm_nested_intercepts                                    [99.99%]
                 0 kvm:kvm_nested_vmexit                                        [99.99%]
                 0 kvm:kvm_nested_vmexit_inject                                    [99.99%]
                 0 kvm:kvm_nested_intr_vmexit                                    [99.99%]
                 0 kvm:kvm_invlpga                                              [99.99%]
                 0 kvm:kvm_skinit                                               [99.99%]
               284 kvm:kvm_emulate_insn                                         [99.99%]
                68 kvm:vcpu_match_mmio                                          [99.99%]
                68 kvm:kvm_userspace_exit                                       [99.99%]
                 2 kvm:kvm_set_irq                                              [99.99%]
                 2 kvm:kvm_ioapic_set_irq                                       [99.99%]
            28,288 kvm:kvm_msi_set_irq                                          [99.99%]
                 1 kvm:kvm_ack_irq                                              [99.99%]
               131 kvm:kvm_mmio                                                 [100.00%]
               588 kvm:kvm_fpu                                                  [100.00%]
                 0 kvm:kvm_age_page                                             [100.00%]
                 0 kvm:kvm_try_async_get_page                                    [100.00%]
                 0 kvm:kvm_async_pf_doublefault                                    [100.00%]
                 0 kvm:kvm_async_pf_not_present                                    [100.00%]
                 0 kvm:kvm_async_pf_ready                                       [100.00%]
                 0 kvm:kvm_async_pf_completed

       1.002039622 seconds time elapsed

We see that # of exits is almost halved.
Signed-off-by: default avatarMichael S. Tsirkin <mst@redhat.com>
Signed-off-by: default avatarAvi Kivity <avi@redhat.com>
parent 8680b94b
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#define KVM_FEATURE_CLOCKSOURCE2 3 #define KVM_FEATURE_CLOCKSOURCE2 3
#define KVM_FEATURE_ASYNC_PF 4 #define KVM_FEATURE_ASYNC_PF 4
#define KVM_FEATURE_STEAL_TIME 5 #define KVM_FEATURE_STEAL_TIME 5
#define KVM_FEATURE_PV_EOI 6
/* The last 8 bits are used to indicate how to interpret the flags field /* The last 8 bits are used to indicate how to interpret the flags field
* in pvclock structure. If no bits are set, all flags are ignored. * in pvclock structure. If no bits are set, all flags are ignored.
...@@ -37,6 +38,7 @@ ...@@ -37,6 +38,7 @@
#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
#define MSR_KVM_STEAL_TIME 0x4b564d03 #define MSR_KVM_STEAL_TIME 0x4b564d03
#define MSR_KVM_PV_EOI_EN 0x4b564d04
struct kvm_steal_time { struct kvm_steal_time {
__u64 steal; __u64 steal;
...@@ -89,6 +91,11 @@ struct kvm_vcpu_pv_apf_data { ...@@ -89,6 +91,11 @@ struct kvm_vcpu_pv_apf_data {
__u32 enabled; __u32 enabled;
}; };
#define KVM_PV_EOI_BIT 0
#define KVM_PV_EOI_MASK (0x1 << KVM_PV_EOI_BIT)
#define KVM_PV_EOI_ENABLED KVM_PV_EOI_MASK
#define KVM_PV_EOI_DISABLED 0x0
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <asm/processor.h> #include <asm/processor.h>
......
...@@ -39,6 +39,8 @@ ...@@ -39,6 +39,8 @@
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/idle.h> #include <asm/idle.h>
#include <asm/apic.h>
#include <asm/apicdef.h>
static int kvmapf = 1; static int kvmapf = 1;
...@@ -283,6 +285,22 @@ static void kvm_register_steal_time(void) ...@@ -283,6 +285,22 @@ static void kvm_register_steal_time(void)
cpu, __pa(st)); cpu, __pa(st));
} }
static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
static void kvm_guest_apic_eoi_write(u32 reg, u32 val)
{
/**
* This relies on __test_and_clear_bit to modify the memory
* in a way that is atomic with respect to the local CPU.
* The hypervisor only accesses this memory from the local CPU so
* there's no need for lock or memory barriers.
* An optimization barrier is implied in apic write.
*/
if (__test_and_clear_bit(KVM_PV_EOI_BIT, &__get_cpu_var(kvm_apic_eoi)))
return;
apic->write(APIC_EOI, APIC_EOI_ACK);
}
void __cpuinit kvm_guest_cpu_init(void) void __cpuinit kvm_guest_cpu_init(void)
{ {
if (!kvm_para_available()) if (!kvm_para_available())
...@@ -300,11 +318,20 @@ void __cpuinit kvm_guest_cpu_init(void) ...@@ -300,11 +318,20 @@ void __cpuinit kvm_guest_cpu_init(void)
smp_processor_id()); smp_processor_id());
} }
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
unsigned long pa;
/* Size alignment is implied but just to make it explicit. */
BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
__get_cpu_var(kvm_apic_eoi) = 0;
pa = __pa(&__get_cpu_var(kvm_apic_eoi)) | KVM_MSR_ENABLED;
wrmsrl(MSR_KVM_PV_EOI_EN, pa);
}
if (has_steal_clock) if (has_steal_clock)
kvm_register_steal_time(); kvm_register_steal_time();
} }
static void kvm_pv_disable_apf(void *unused) static void kvm_pv_disable_apf(void)
{ {
if (!__get_cpu_var(apf_reason).enabled) if (!__get_cpu_var(apf_reason).enabled)
return; return;
...@@ -316,11 +343,23 @@ static void kvm_pv_disable_apf(void *unused) ...@@ -316,11 +343,23 @@ static void kvm_pv_disable_apf(void *unused)
smp_processor_id()); smp_processor_id());
} }
static void kvm_pv_guest_cpu_reboot(void *unused)
{
/*
* We disable PV EOI before we load a new kernel by kexec,
* since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
* New kernel can re-enable when it boots.
*/
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
kvm_pv_disable_apf();
}
static int kvm_pv_reboot_notify(struct notifier_block *nb, static int kvm_pv_reboot_notify(struct notifier_block *nb,
unsigned long code, void *unused) unsigned long code, void *unused)
{ {
if (code == SYS_RESTART) if (code == SYS_RESTART)
on_each_cpu(kvm_pv_disable_apf, NULL, 1); on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
return NOTIFY_DONE; return NOTIFY_DONE;
} }
...@@ -371,7 +410,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy) ...@@ -371,7 +410,9 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy)
static void kvm_guest_cpu_offline(void *dummy) static void kvm_guest_cpu_offline(void *dummy)
{ {
kvm_disable_steal_time(); kvm_disable_steal_time();
kvm_pv_disable_apf(NULL); if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
kvm_pv_disable_apf();
apf_task_wake_all(); apf_task_wake_all();
} }
...@@ -424,6 +465,16 @@ void __init kvm_guest_init(void) ...@@ -424,6 +465,16 @@ void __init kvm_guest_init(void)
pv_time_ops.steal_clock = kvm_steal_clock; pv_time_ops.steal_clock = kvm_steal_clock;
} }
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
struct apic **drv;
for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
/* Should happen once for each apic */
WARN_ON((*drv)->eoi_write == kvm_guest_apic_eoi_write);
(*drv)->eoi_write = kvm_guest_apic_eoi_write;
}
}
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
register_cpu_notifier(&kvm_cpu_notifier); register_cpu_notifier(&kvm_cpu_notifier);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment