Commit 8d14695f authored by Yang Zhang's avatar Yang Zhang Committed by Gleb Natapov

x86, apicv: add virtual x2apic support

basically to benefit from apicv, we need to enable virtualized x2apic mode.
Currently, we only enable it when guest is really using x2apic.

Also, clear MSR bitmap for corresponding x2apic MSRs when guest enabled x2apic:
0x800 - 0x8ff: no read intercept for apicv register virtualization,
               except APIC ID and TMCCT which need software's assistance to
               get right value.
Reviewed-by: default avatarMarcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: default avatarKevin Tian <kevin.tian@intel.com>
Signed-off-by: default avatarYang Zhang <yang.z.zhang@Intel.com>
Signed-off-by: default avatarGleb Natapov <gleb@redhat.com>
parent 83d4c286
......@@ -699,6 +699,7 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
......
......@@ -139,6 +139,7 @@
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
#define SECONDARY_EXEC_RDTSCP 0x00000008
#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
......
......@@ -140,11 +140,6 @@ static inline int apic_enabled(struct kvm_lapic *apic)
(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
static inline int apic_x2apic_mode(struct kvm_lapic *apic)
{
return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
}
static inline int kvm_apic_id(struct kvm_lapic *apic)
{
return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
......@@ -1303,6 +1298,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
{
u64 old_value = vcpu->arch.apic_base;
struct kvm_lapic *apic = vcpu->arch.apic;
if (!apic) {
......@@ -1324,11 +1320,16 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
value &= ~MSR_IA32_APICBASE_BSP;
vcpu->arch.apic_base = value;
if (apic_x2apic_mode(apic)) {
u32 id = kvm_apic_id(apic);
u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
kvm_apic_set_ldr(apic, ldr);
if ((old_value ^ value) & X2APIC_ENABLE) {
if (value & X2APIC_ENABLE) {
u32 id = kvm_apic_id(apic);
u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf));
kvm_apic_set_ldr(apic, ldr);
kvm_x86_ops->set_virtual_x2apic_mode(vcpu, true);
} else
kvm_x86_ops->set_virtual_x2apic_mode(vcpu, false);
}
apic->base_address = apic->vcpu->arch.apic_base &
MSR_IA32_APICBASE_BASE;
......
......@@ -126,4 +126,9 @@ static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic);
}
static inline int apic_x2apic_mode(struct kvm_lapic *apic)
{
return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
}
#endif
......@@ -3571,6 +3571,11 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
}
static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
{
return;
}
static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
......@@ -4290,6 +4295,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.enable_nmi_window = enable_nmi_window,
.enable_irq_window = enable_irq_window,
.update_cr8_intercept = update_cr8_intercept,
.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
.set_tss_addr = svm_set_tss_addr,
.get_tdp_level = get_npt_level,
......
......@@ -643,6 +643,8 @@ static unsigned long *vmx_io_bitmap_a;
static unsigned long *vmx_io_bitmap_b;
static unsigned long *vmx_msr_bitmap_legacy;
static unsigned long *vmx_msr_bitmap_longmode;
static unsigned long *vmx_msr_bitmap_legacy_x2apic;
static unsigned long *vmx_msr_bitmap_longmode_x2apic;
static bool cpu_has_load_ia32_efer;
static bool cpu_has_load_perf_global_ctrl;
......@@ -767,6 +769,12 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
}
static inline bool cpu_has_vmx_virtualize_x2apic_mode(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
}
static inline bool cpu_has_vmx_apic_register_virt(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
......@@ -1830,6 +1838,25 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
vmx->guest_msrs[from] = tmp;
}
static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
{
unsigned long *msr_bitmap;
if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
if (is_long_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
else
msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
} else {
if (is_long_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_longmode;
else
msr_bitmap = vmx_msr_bitmap_legacy;
}
vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
}
/*
* Set up the vmcs to automatically save and restore system
* msrs. Don't touch the 64-bit msrs if the guest is in legacy
......@@ -1838,7 +1865,6 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
static void setup_msrs(struct vcpu_vmx *vmx)
{
int save_nmsrs, index;
unsigned long *msr_bitmap;
save_nmsrs = 0;
#ifdef CONFIG_X86_64
......@@ -1870,14 +1896,8 @@ static void setup_msrs(struct vcpu_vmx *vmx)
vmx->save_nmsrs = save_nmsrs;
if (cpu_has_vmx_msr_bitmap()) {
if (is_long_mode(&vmx->vcpu))
msr_bitmap = vmx_msr_bitmap_longmode;
else
msr_bitmap = vmx_msr_bitmap_legacy;
vmcs_write64(MSR_BITMAP, __pa(msr_bitmap));
}
if (cpu_has_vmx_msr_bitmap())
vmx_set_msr_bitmap(&vmx->vcpu);
}
/*
......@@ -2543,6 +2563,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
min2 = 0;
opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_WBINVD_EXITING |
SECONDARY_EXEC_ENABLE_VPID |
SECONDARY_EXEC_ENABLE_EPT |
......@@ -2564,7 +2585,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
if (!(_cpu_based_exec_control & CPU_BASED_TPR_SHADOW))
_cpu_based_2nd_exec_control &= ~(
SECONDARY_EXEC_APIC_REGISTER_VIRT);
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
/* CR3 accesses and invlpg don't need to cause VM Exits when EPT
......@@ -3725,7 +3747,10 @@ static void free_vpid(struct vcpu_vmx *vmx)
spin_unlock(&vmx_vpid_lock);
}
static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
u32 msr, int type)
{
int f = sizeof(unsigned long);
......@@ -3738,20 +3763,93 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr)
* We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
*/
if (msr <= 0x1fff) {
__clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */
__clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */
if (type & MSR_TYPE_R)
/* read-low */
__clear_bit(msr, msr_bitmap + 0x000 / f);
if (type & MSR_TYPE_W)
/* write-low */
__clear_bit(msr, msr_bitmap + 0x800 / f);
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
msr &= 0x1fff;
__clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */
__clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */
if (type & MSR_TYPE_R)
/* read-high */
__clear_bit(msr, msr_bitmap + 0x400 / f);
if (type & MSR_TYPE_W)
/* write-high */
__clear_bit(msr, msr_bitmap + 0xc00 / f);
}
}
static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
u32 msr, int type)
{
int f = sizeof(unsigned long);
if (!cpu_has_vmx_msr_bitmap())
return;
/*
* See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
* have the write-low and read-high bitmap offsets the wrong way round.
* We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
*/
if (msr <= 0x1fff) {
if (type & MSR_TYPE_R)
/* read-low */
__set_bit(msr, msr_bitmap + 0x000 / f);
if (type & MSR_TYPE_W)
/* write-low */
__set_bit(msr, msr_bitmap + 0x800 / f);
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
msr &= 0x1fff;
if (type & MSR_TYPE_R)
/* read-high */
__set_bit(msr, msr_bitmap + 0x400 / f);
if (type & MSR_TYPE_W)
/* write-high */
__set_bit(msr, msr_bitmap + 0xc00 / f);
}
}
static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
{
if (!longmode_only)
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr);
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr);
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy,
msr, MSR_TYPE_R | MSR_TYPE_W);
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode,
msr, MSR_TYPE_R | MSR_TYPE_W);
}
static void vmx_enable_intercept_msr_read_x2apic(u32 msr)
{
__vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
msr, MSR_TYPE_R);
__vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
msr, MSR_TYPE_R);
}
static void vmx_disable_intercept_msr_read_x2apic(u32 msr)
{
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
msr, MSR_TYPE_R);
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
msr, MSR_TYPE_R);
}
static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
{
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
msr, MSR_TYPE_W);
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
msr, MSR_TYPE_W);
}
/*
......@@ -3849,6 +3947,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
if (!enable_apicv_reg || !irqchip_in_kernel(vmx->vcpu.kvm))
exec_control &= ~SECONDARY_EXEC_APIC_REGISTER_VIRT;
exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
return exec_control;
}
......@@ -6101,6 +6200,34 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
vmcs_write32(TPR_THRESHOLD, irr);
}
static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
{
u32 sec_exec_control;
/*
* There is not point to enable virtualize x2apic without enable
* apicv
*/
if (!cpu_has_vmx_virtualize_x2apic_mode() || !enable_apicv_reg)
return;
if (!vm_need_tpr_shadow(vcpu->kvm))
return;
sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
if (set) {
sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
} else {
sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
}
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
vmx_set_msr_bitmap(vcpu);
}
static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
{
u32 exit_intr_info;
......@@ -7364,6 +7491,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.enable_nmi_window = enable_nmi_window,
.enable_irq_window = enable_irq_window,
.update_cr8_intercept = update_cr8_intercept,
.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
.set_tss_addr = vmx_set_tss_addr,
.get_tdp_level = get_ept_level,
......@@ -7396,7 +7524,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
static int __init vmx_init(void)
{
int r, i;
int r, i, msr;
rdmsrl_safe(MSR_EFER, &host_efer);
......@@ -7417,11 +7545,19 @@ static int __init vmx_init(void)
if (!vmx_msr_bitmap_legacy)
goto out1;
vmx_msr_bitmap_legacy_x2apic =
(unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_msr_bitmap_legacy_x2apic)
goto out2;
vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_msr_bitmap_longmode)
goto out2;
goto out3;
vmx_msr_bitmap_longmode_x2apic =
(unsigned long *)__get_free_page(GFP_KERNEL);
if (!vmx_msr_bitmap_longmode_x2apic)
goto out4;
/*
* Allow direct access to the PC debug port (it is often used for I/O
......@@ -7453,6 +7589,24 @@ static int __init vmx_init(void)
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
memcpy(vmx_msr_bitmap_legacy_x2apic,
vmx_msr_bitmap_legacy, PAGE_SIZE);
memcpy(vmx_msr_bitmap_longmode_x2apic,
vmx_msr_bitmap_longmode, PAGE_SIZE);
if (enable_apicv_reg) {
for (msr = 0x800; msr <= 0x8ff; msr++)
vmx_disable_intercept_msr_read_x2apic(msr);
/* According SDM, in x2apic mode, the whole id reg is used.
* But in KVM, it only use the highest eight bits. Need to
* intercept it */
vmx_enable_intercept_msr_read_x2apic(0x802);
/* TMCCT */
vmx_enable_intercept_msr_read_x2apic(0x839);
/* TPR */
vmx_disable_intercept_msr_write_x2apic(0x808);
}
if (enable_ept) {
kvm_mmu_set_mask_ptes(0ull,
......@@ -7466,8 +7620,10 @@ static int __init vmx_init(void)
return 0;
out3:
out4:
free_page((unsigned long)vmx_msr_bitmap_longmode);
out3:
free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
out2:
free_page((unsigned long)vmx_msr_bitmap_legacy);
out1:
......@@ -7479,6 +7635,8 @@ static int __init vmx_init(void)
static void __exit vmx_exit(void)
{
free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
free_page((unsigned long)vmx_msr_bitmap_legacy);
free_page((unsigned long)vmx_msr_bitmap_longmode);
free_page((unsigned long)vmx_io_bitmap_b);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment