Commit 1c6f0b47 authored by Sean Christopherson's avatar Sean Christopherson Committed by Paolo Bonzini

KVM: nVMX: Track vmcs12 offsets for shadowed VMCS fields

The vmcs12 fields offsets are constant and known at compile time.  Store
the associated offset for each shadowed field to avoid the costly lookup
in vmcs_field_to_offset() when copying between vmcs12 and the shadow
VMCS.  Avoiding the costly lookup reduces the latency of copying by
~100 cycles in each direction.
Signed-off-by: default avatarSean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent b6437805
......@@ -41,15 +41,19 @@ static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
static u16 shadow_read_only_fields[] = {
#define SHADOW_FIELD_RO(x) x,
struct shadow_vmcs_field {
u16 encoding;
u16 offset;
};
static struct shadow_vmcs_field shadow_read_only_fields[] = {
#define SHADOW_FIELD_RO(x, y) { x, offsetof(struct vmcs12, y) },
#include "vmcs_shadow_fields.h"
};
static int max_shadow_read_only_fields =
ARRAY_SIZE(shadow_read_only_fields);
static u16 shadow_read_write_fields[] = {
#define SHADOW_FIELD_RW(x) x,
static struct shadow_vmcs_field shadow_read_write_fields[] = {
#define SHADOW_FIELD_RW(x, y) { x, offsetof(struct vmcs12, y) },
#include "vmcs_shadow_fields.h"
};
static int max_shadow_read_write_fields =
......@@ -63,37 +67,39 @@ static void init_vmcs_shadow_fields(void)
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
for (i = j = 0; i < max_shadow_read_only_fields; i++) {
u16 field = shadow_read_only_fields[i];
struct shadow_vmcs_field entry = shadow_read_only_fields[i];
u16 field = entry.encoding;
if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
(i + 1 == max_shadow_read_only_fields ||
shadow_read_only_fields[i + 1] != field + 1))
shadow_read_only_fields[i + 1].encoding != field + 1))
pr_err("Missing field from shadow_read_only_field %x\n",
field + 1);
clear_bit(field, vmx_vmread_bitmap);
#ifdef CONFIG_X86_64
if (field & 1)
#ifdef CONFIG_X86_64
continue;
#else
entry.offset += sizeof(u32);
#endif
if (j < i)
shadow_read_only_fields[j] = field;
j++;
shadow_read_only_fields[j++] = entry;
}
max_shadow_read_only_fields = j;
for (i = j = 0; i < max_shadow_read_write_fields; i++) {
u16 field = shadow_read_write_fields[i];
struct shadow_vmcs_field entry = shadow_read_write_fields[i];
u16 field = entry.encoding;
if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
(i + 1 == max_shadow_read_write_fields ||
shadow_read_write_fields[i + 1] != field + 1))
shadow_read_write_fields[i + 1].encoding != field + 1))
pr_err("Missing field from shadow_read_write_field %x\n",
field + 1);
WARN_ONCE(field >= GUEST_ES_AR_BYTES &&
field <= GUEST_TR_AR_BYTES,
"Update vmcs12_write_any() to expose AR_BYTES RW");
"Update vmcs12_write_any() to drop reserved bits from AR_BYTES");
/*
* PML and the preemption timer can be emulated, but the
......@@ -119,13 +125,13 @@ static void init_vmcs_shadow_fields(void)
clear_bit(field, vmx_vmwrite_bitmap);
clear_bit(field, vmx_vmread_bitmap);
#ifdef CONFIG_X86_64
if (field & 1)
#ifdef CONFIG_X86_64
continue;
#else
entry.offset += sizeof(u32);
#endif
if (j < i)
shadow_read_write_fields[j] = field;
j++;
shadow_read_write_fields[j++] = entry;
}
max_shadow_read_write_fields = j;
}
......@@ -1308,7 +1314,8 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
{
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
unsigned long field;
struct shadow_vmcs_field field;
unsigned long val;
int i;
preempt_disable();
......@@ -1317,7 +1324,8 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
for (i = 0; i < max_shadow_read_write_fields; i++) {
field = shadow_read_write_fields[i];
vmcs12_write_any(vmcs12, field, __vmcs_readl(field));
val = __vmcs_readl(field.encoding);
vmcs12_write_any(vmcs12, field.encoding, field.offset, val);
}
vmcs_clear(shadow_vmcs);
......@@ -1328,7 +1336,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
{
const u16 *fields[] = {
const struct shadow_vmcs_field *fields[] = {
shadow_read_write_fields,
shadow_read_only_fields
};
......@@ -1336,18 +1344,20 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
max_shadow_read_write_fields,
max_shadow_read_only_fields
};
int i, q;
unsigned long field;
u64 field_value = 0;
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
struct shadow_vmcs_field field;
unsigned long val;
int i, q;
vmcs_load(shadow_vmcs);
for (q = 0; q < ARRAY_SIZE(fields); q++) {
for (i = 0; i < max_fields[q]; i++) {
field = fields[q][i];
vmcs12_read_any(get_vmcs12(&vmx->vcpu), field, &field_value);
__vmcs_writel(field, field_value);
val = vmcs12_read_any(vmcs12, field.encoding,
field.offset);
__vmcs_writel(field.encoding, val);
}
}
......@@ -2144,6 +2154,8 @@ static void prepare_vmcs02_full(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
......@@ -2240,23 +2252,12 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
u32 *entry_failure_code)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs) {
prepare_vmcs02_full(vmx, vmcs12);
vmx->nested.dirty_vmcs12 = false;
}
/*
* First, the fields that are shadowed. This must be kept in sync
* with vmcs_shadow_fields.h.
*/
if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
}
if (vmx->nested.nested_run_pending &&
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
......@@ -4372,6 +4373,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
int len;
gva_t gva = 0;
struct vmcs12 *vmcs12;
short offset;
if (!nested_vmx_check_permission(vcpu))
return 1;
......@@ -4393,11 +4395,15 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
/* Decode instruction info and find the field to read */
field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
/* Read the field, zero-extended to a u64 field_value */
if (vmcs12_read_any(vmcs12, field, &field_value) < 0)
offset = vmcs_field_to_offset(field);
if (offset < 0)
return nested_vmx_failValid(vcpu,
VMXERR_UNSUPPORTED_VMCS_COMPONENT);
/* Read the field, zero-extended to a u64 field_value */
field_value = vmcs12_read_any(vmcs12, field, offset);
/*
* Now copy part of this value to register or memory, as requested.
* Note that the number of bits actually copied is 32 or 64 depending
......@@ -4437,6 +4443,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
u64 field_value = 0;
struct x86_exception e;
struct vmcs12 *vmcs12;
short offset;
if (!nested_vmx_check_permission(vcpu))
return 1;
......@@ -4481,6 +4488,11 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
vmcs12 = get_shadow_vmcs12(vcpu);
}
offset = vmcs_field_to_offset(field);
if (offset < 0)
return nested_vmx_failValid(vcpu,
VMXERR_UNSUPPORTED_VMCS_COMPONENT);
/*
* Some Intel CPUs intentionally drop the reserved bits of the AR byte
* fields on VMWRITE. Emulate this behavior to ensure consistent KVM
......@@ -4492,9 +4504,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES)
field_value &= 0x1f0ff;
if (vmcs12_write_any(vmcs12, field, field_value) < 0)
return nested_vmx_failValid(vcpu,
VMXERR_UNSUPPORTED_VMCS_COMPONENT);
vmcs12_write_any(vmcs12, field, offset, field_value);
/*
* Do not track vmcs12 dirty-state if in guest-mode
......@@ -4502,7 +4512,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
*/
if (!is_guest_mode(vcpu)) {
switch (field) {
#define SHADOW_FIELD_RW(x) case x:
#define SHADOW_FIELD_RW(x, y) case x:
#include "vmcs_shadow_fields.h"
/*
* The fields that can be updated by L1 without a vmexit are
......@@ -4511,7 +4521,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
*/
break;
#define SHADOW_FIELD_RO(x) case x:
#define SHADOW_FIELD_RO(x, y) case x:
#include "vmcs_shadow_fields.h"
/*
* L1 can read these fields without exiting, ensure the
......
......@@ -394,69 +394,48 @@ static inline short vmcs_field_to_offset(unsigned long field)
#undef ROL16
/*
* Read a vmcs12 field. Since these can have varying lengths and we return
* one type, we chose the biggest type (u64) and zero-extend the return value
* to that size. Note that the caller, handle_vmread, might need to use only
* some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
* 64-bit fields are to be returned).
*/
static inline int vmcs12_read_any(struct vmcs12 *vmcs12,
unsigned long field, u64 *ret)
static inline u64 vmcs12_read_any(struct vmcs12 *vmcs12, unsigned long field,
u16 offset)
{
short offset = vmcs_field_to_offset(field);
char *p;
if (offset < 0)
return offset;
p = (char *)vmcs12 + offset;
char *p = (char *)vmcs12 + offset;
switch (vmcs_field_width(field)) {
case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
*ret = *((natural_width *)p);
return 0;
return *((natural_width *)p);
case VMCS_FIELD_WIDTH_U16:
*ret = *((u16 *)p);
return 0;
return *((u16 *)p);
case VMCS_FIELD_WIDTH_U32:
*ret = *((u32 *)p);
return 0;
return *((u32 *)p);
case VMCS_FIELD_WIDTH_U64:
*ret = *((u64 *)p);
return 0;
return *((u64 *)p);
default:
WARN_ON(1);
return -ENOENT;
WARN_ON_ONCE(1);
return -1;
}
}
static inline int vmcs12_write_any(struct vmcs12 *vmcs12,
unsigned long field, u64 field_value){
short offset = vmcs_field_to_offset(field);
static inline void vmcs12_write_any(struct vmcs12 *vmcs12, unsigned long field,
u16 offset, u64 field_value)
{
char *p = (char *)vmcs12 + offset;
if (offset < 0)
return offset;
switch (vmcs_field_width(field)) {
case VMCS_FIELD_WIDTH_U16:
*(u16 *)p = field_value;
return 0;
break;
case VMCS_FIELD_WIDTH_U32:
*(u32 *)p = field_value;
return 0;
break;
case VMCS_FIELD_WIDTH_U64:
*(u64 *)p = field_value;
return 0;
break;
case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
*(natural_width *)p = field_value;
return 0;
break;
default:
WARN_ON(1);
return -ENOENT;
WARN_ON_ONCE(1);
break;
}
}
#endif /* __KVM_X86_VMX_VMCS12_H */
#ifndef SHADOW_FIELD_RO
#define SHADOW_FIELD_RO(x)
#define SHADOW_FIELD_RO(x, y)
#endif
#ifndef SHADOW_FIELD_RW
#define SHADOW_FIELD_RW(x)
#define SHADOW_FIELD_RW(x, y)
#endif
/*
......@@ -28,47 +28,47 @@
*/
/* 16-bits */
SHADOW_FIELD_RW(GUEST_INTR_STATUS)
SHADOW_FIELD_RW(GUEST_PML_INDEX)
SHADOW_FIELD_RW(HOST_FS_SELECTOR)
SHADOW_FIELD_RW(HOST_GS_SELECTOR)
SHADOW_FIELD_RW(GUEST_INTR_STATUS, guest_intr_status)
SHADOW_FIELD_RW(GUEST_PML_INDEX, guest_pml_index)
SHADOW_FIELD_RW(HOST_FS_SELECTOR, host_fs_selector)
SHADOW_FIELD_RW(HOST_GS_SELECTOR, host_gs_selector)
/* 32-bits */
SHADOW_FIELD_RO(VM_EXIT_REASON)
SHADOW_FIELD_RO(VM_EXIT_INTR_INFO)
SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN)
SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD)
SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE)
SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE)
SHADOW_FIELD_RO(GUEST_CS_AR_BYTES)
SHADOW_FIELD_RO(GUEST_SS_AR_BYTES)
SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL)
SHADOW_FIELD_RW(EXCEPTION_BITMAP)
SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE)
SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD)
SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN)
SHADOW_FIELD_RW(TPR_THRESHOLD)
SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO)
SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE)
SHADOW_FIELD_RO(VM_EXIT_REASON, vm_exit_reason)
SHADOW_FIELD_RO(VM_EXIT_INTR_INFO, vm_exit_intr_info)
SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len)
SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field)
SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code)
SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code)
SHADOW_FIELD_RO(GUEST_CS_AR_BYTES, guest_cs_ar_bytes)
SHADOW_FIELD_RO(GUEST_SS_AR_BYTES, guest_ss_ar_bytes)
SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control)
SHADOW_FIELD_RW(EXCEPTION_BITMAP, exception_bitmap)
SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code)
SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field)
SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len)
SHADOW_FIELD_RW(TPR_THRESHOLD, tpr_threshold)
SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info)
SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value)
/* Natural width */
SHADOW_FIELD_RO(EXIT_QUALIFICATION)
SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS)
SHADOW_FIELD_RW(GUEST_RIP)
SHADOW_FIELD_RW(GUEST_RSP)
SHADOW_FIELD_RW(GUEST_CR0)
SHADOW_FIELD_RW(GUEST_CR3)
SHADOW_FIELD_RW(GUEST_CR4)
SHADOW_FIELD_RW(GUEST_RFLAGS)
SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK)
SHADOW_FIELD_RW(CR0_READ_SHADOW)
SHADOW_FIELD_RW(CR4_READ_SHADOW)
SHADOW_FIELD_RW(HOST_FS_BASE)
SHADOW_FIELD_RW(HOST_GS_BASE)
SHADOW_FIELD_RO(EXIT_QUALIFICATION, exit_qualification)
SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS, guest_linear_address)
SHADOW_FIELD_RW(GUEST_RIP, guest_rip)
SHADOW_FIELD_RW(GUEST_RSP, guest_rsp)
SHADOW_FIELD_RW(GUEST_CR0, guest_cr0)
SHADOW_FIELD_RW(GUEST_CR3, guest_cr3)
SHADOW_FIELD_RW(GUEST_CR4, guest_cr4)
SHADOW_FIELD_RW(GUEST_RFLAGS, guest_rflags)
SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK, cr0_guest_host_mask)
SHADOW_FIELD_RW(CR0_READ_SHADOW, cr0_read_shadow)
SHADOW_FIELD_RW(CR4_READ_SHADOW, cr4_read_shadow)
SHADOW_FIELD_RW(HOST_FS_BASE, host_fs_base)
SHADOW_FIELD_RW(HOST_GS_BASE, host_gs_base)
/* 64-bit */
SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS)
SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH)
SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS, guest_physical_address)
SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH, guest_physical_address)
#undef SHADOW_FIELD_RO
#undef SHADOW_FIELD_RW
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment