Commit bf8c55d8 authored by Chao Peng's avatar Chao Peng Committed by Paolo Bonzini

KVM: x86: Implement Intel PT MSRs read/write emulation

This patch implement Intel Processor Trace MSRs read/write
emulation.
Intel PT MSRs read/write need to be emulated when Intel PT
MSRs is intercepted in guest and during live migration.
Signed-off-by: default avatarChao Peng <chao.p.peng@linux.intel.com>
Signed-off-by: default avatarLuwei Kang <luwei.kang@intel.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 6c0f0bba
...@@ -140,6 +140,14 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); ...@@ -140,6 +140,14 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO);
#define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) #define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM))
#define MSR_IA32_RTIT_STATUS_MASK (~(RTIT_STATUS_FILTEREN | \
RTIT_STATUS_CONTEXTEN | RTIT_STATUS_TRIGGEREN | \
RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
RTIT_STATUS_BYTECNT))
#define MSR_IA32_RTIT_OUTPUT_BASE_MASK \
(~((1UL << cpuid_query_maxphyaddr(vcpu)) - 1) | 0x7f)
/* /*
* These 2 parameters are used to config the controls for Pause-Loop Exiting: * These 2 parameters are used to config the controls for Pause-Loop Exiting:
* ple_gap: upper bound on the amount of time between two successive * ple_gap: upper bound on the amount of time between two successive
...@@ -1354,6 +1362,79 @@ void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) ...@@ -1354,6 +1362,79 @@ void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility); vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility);
} }
static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long value;
/*
* Any MSR write that attempts to change bits marked reserved will
* case a #GP fault.
*/
if (data & vmx->pt_desc.ctl_bitmask)
return 1;
/*
* Any attempt to modify IA32_RTIT_CTL while TraceEn is set will
* result in a #GP unless the same write also clears TraceEn.
*/
if ((vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) &&
((vmx->pt_desc.guest.ctl ^ data) & ~RTIT_CTL_TRACEEN))
return 1;
/*
* WRMSR to IA32_RTIT_CTL that sets TraceEn but clears this bit
* and FabricEn would cause #GP, if
* CPUID.(EAX=14H, ECX=0):ECX.SNGLRGNOUT[bit 2] = 0
*/
if ((data & RTIT_CTL_TRACEEN) && !(data & RTIT_CTL_TOPA) &&
!(data & RTIT_CTL_FABRIC_EN) &&
!intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_single_range_output))
return 1;
/*
* MTCFreq, CycThresh and PSBFreq encodings check, any MSR write that
* utilize encodings marked reserved will casue a #GP fault.
*/
value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc_periods);
if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_mtc) &&
!test_bit((data & RTIT_CTL_MTC_RANGE) >>
RTIT_CTL_MTC_RANGE_OFFSET, &value))
return 1;
value = intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_cycle_thresholds);
if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
!test_bit((data & RTIT_CTL_CYC_THRESH) >>
RTIT_CTL_CYC_THRESH_OFFSET, &value))
return 1;
value = intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_periods);
if (intel_pt_validate_cap(vmx->pt_desc.caps, PT_CAP_psb_cyc) &&
!test_bit((data & RTIT_CTL_PSB_FREQ) >>
RTIT_CTL_PSB_FREQ_OFFSET, &value))
return 1;
/*
* If ADDRx_CFG is reserved or the encodings is >2 will
* cause a #GP fault.
*/
value = (data & RTIT_CTL_ADDR0) >> RTIT_CTL_ADDR0_OFFSET;
if ((value && (vmx->pt_desc.addr_range < 1)) || (value > 2))
return 1;
value = (data & RTIT_CTL_ADDR1) >> RTIT_CTL_ADDR1_OFFSET;
if ((value && (vmx->pt_desc.addr_range < 2)) || (value > 2))
return 1;
value = (data & RTIT_CTL_ADDR2) >> RTIT_CTL_ADDR2_OFFSET;
if ((value && (vmx->pt_desc.addr_range < 3)) || (value > 2))
return 1;
value = (data & RTIT_CTL_ADDR3) >> RTIT_CTL_ADDR3_OFFSET;
if ((value && (vmx->pt_desc.addr_range < 4)) || (value > 2))
return 1;
return 0;
}
static void skip_emulated_instruction(struct kvm_vcpu *vcpu) static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
{ {
unsigned long rip; unsigned long rip;
...@@ -1555,6 +1636,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -1555,6 +1636,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{ {
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
struct shared_msr_entry *msr; struct shared_msr_entry *msr;
u32 index;
switch (msr_info->index) { switch (msr_info->index) {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
...@@ -1619,6 +1701,52 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -1619,6 +1701,52 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1; return 1;
msr_info->data = vcpu->arch.ia32_xss; msr_info->data = vcpu->arch.ia32_xss;
break; break;
case MSR_IA32_RTIT_CTL:
if (pt_mode != PT_MODE_HOST_GUEST)
return 1;
msr_info->data = vmx->pt_desc.guest.ctl;
break;
case MSR_IA32_RTIT_STATUS:
if (pt_mode != PT_MODE_HOST_GUEST)
return 1;
msr_info->data = vmx->pt_desc.guest.status;
break;
case MSR_IA32_RTIT_CR3_MATCH:
if ((pt_mode != PT_MODE_HOST_GUEST) ||
!intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_cr3_filtering))
return 1;
msr_info->data = vmx->pt_desc.guest.cr3_match;
break;
case MSR_IA32_RTIT_OUTPUT_BASE:
if ((pt_mode != PT_MODE_HOST_GUEST) ||
(!intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_topa_output) &&
!intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_single_range_output)))
return 1;
msr_info->data = vmx->pt_desc.guest.output_base;
break;
case MSR_IA32_RTIT_OUTPUT_MASK:
if ((pt_mode != PT_MODE_HOST_GUEST) ||
(!intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_topa_output) &&
!intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_single_range_output)))
return 1;
msr_info->data = vmx->pt_desc.guest.output_mask;
break;
case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
if ((pt_mode != PT_MODE_HOST_GUEST) ||
(index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_num_address_ranges)))
return 1;
if (index % 2)
msr_info->data = vmx->pt_desc.guest.addr_b[index / 2];
else
msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
break;
case MSR_TSC_AUX: case MSR_TSC_AUX:
if (!msr_info->host_initiated && if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
...@@ -1648,6 +1776,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -1648,6 +1776,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
int ret = 0; int ret = 0;
u32 msr_index = msr_info->index; u32 msr_index = msr_info->index;
u64 data = msr_info->data; u64 data = msr_info->data;
u32 index;
switch (msr_index) { switch (msr_index) {
case MSR_EFER: case MSR_EFER:
...@@ -1799,6 +1928,61 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -1799,6 +1928,61 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
else else
clear_atomic_switch_msr(vmx, MSR_IA32_XSS); clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
break; break;
case MSR_IA32_RTIT_CTL:
if ((pt_mode != PT_MODE_HOST_GUEST) ||
vmx_rtit_ctl_check(vcpu, data))
return 1;
vmcs_write64(GUEST_IA32_RTIT_CTL, data);
vmx->pt_desc.guest.ctl = data;
break;
case MSR_IA32_RTIT_STATUS:
if ((pt_mode != PT_MODE_HOST_GUEST) ||
(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
(data & MSR_IA32_RTIT_STATUS_MASK))
return 1;
vmx->pt_desc.guest.status = data;
break;
case MSR_IA32_RTIT_CR3_MATCH:
if ((pt_mode != PT_MODE_HOST_GUEST) ||
(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
!intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_cr3_filtering))
return 1;
vmx->pt_desc.guest.cr3_match = data;
break;
case MSR_IA32_RTIT_OUTPUT_BASE:
if ((pt_mode != PT_MODE_HOST_GUEST) ||
(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
(!intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_topa_output) &&
!intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_single_range_output)) ||
(data & MSR_IA32_RTIT_OUTPUT_BASE_MASK))
return 1;
vmx->pt_desc.guest.output_base = data;
break;
case MSR_IA32_RTIT_OUTPUT_MASK:
if ((pt_mode != PT_MODE_HOST_GUEST) ||
(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
(!intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_topa_output) &&
!intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_single_range_output)))
return 1;
vmx->pt_desc.guest.output_mask = data;
break;
case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
index = msr_info->index - MSR_IA32_RTIT_ADDR0_A;
if ((pt_mode != PT_MODE_HOST_GUEST) ||
(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) ||
(index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps,
PT_CAP_num_address_ranges)))
return 1;
if (index % 2)
vmx->pt_desc.guest.addr_b[index / 2] = data;
else
vmx->pt_desc.guest.addr_a[index / 2] = data;
break;
case MSR_TSC_AUX: case MSR_TSC_AUX:
if (!msr_info->host_initiated && if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
......
...@@ -69,6 +69,7 @@ ...@@ -69,6 +69,7 @@
#include <asm/irq_remapping.h> #include <asm/irq_remapping.h>
#include <asm/mshyperv.h> #include <asm/mshyperv.h>
#include <asm/hypervisor.h> #include <asm/hypervisor.h>
#include <asm/intel_pt.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include "trace.h" #include "trace.h"
...@@ -1124,7 +1125,13 @@ static u32 msrs_to_save[] = { ...@@ -1124,7 +1125,13 @@ static u32 msrs_to_save[] = {
#endif #endif
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES,
MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH,
MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK,
MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B,
MSR_IA32_RTIT_ADDR1_A, MSR_IA32_RTIT_ADDR1_B,
MSR_IA32_RTIT_ADDR2_A, MSR_IA32_RTIT_ADDR2_B,
MSR_IA32_RTIT_ADDR3_A, MSR_IA32_RTIT_ADDR3_B,
}; };
static unsigned num_msrs_to_save; static unsigned num_msrs_to_save;
...@@ -4884,6 +4891,30 @@ static void kvm_init_msr_list(void) ...@@ -4884,6 +4891,30 @@ static void kvm_init_msr_list(void)
if (!kvm_x86_ops->rdtscp_supported()) if (!kvm_x86_ops->rdtscp_supported())
continue; continue;
break; break;
case MSR_IA32_RTIT_CTL:
case MSR_IA32_RTIT_STATUS:
if (!kvm_x86_ops->pt_supported())
continue;
break;
case MSR_IA32_RTIT_CR3_MATCH:
if (!kvm_x86_ops->pt_supported() ||
!intel_pt_validate_hw_cap(PT_CAP_cr3_filtering))
continue;
break;
case MSR_IA32_RTIT_OUTPUT_BASE:
case MSR_IA32_RTIT_OUTPUT_MASK:
if (!kvm_x86_ops->pt_supported() ||
(!intel_pt_validate_hw_cap(PT_CAP_topa_output) &&
!intel_pt_validate_hw_cap(PT_CAP_single_range_output)))
continue;
break;
case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: {
if (!kvm_x86_ops->pt_supported() ||
msrs_to_save[i] - MSR_IA32_RTIT_ADDR0_A >=
intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2)
continue;
break;
}
default: default:
break; break;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment