Commit 8566ac8b authored by Babu Moger's avatar Babu Moger Committed by Radim Krčmář

KVM: SVM: Implement pause loop exit logic in SVM

Bring the PLE(pause loop exit) logic to AMD svm driver.

While testing, we found this helping in situations where numerous
pauses are generated. Without these patches we could see continuos
VMEXITS due to pause interceptions. Tested it on AMD EPYC server with
boot parameter idle=poll on a VM with 32 vcpus to simulate extensive
pause behaviour. Here are VMEXITS in 10 seconds interval.

Pauses                  810199                  504
Total                   882184                  325415
Signed-off-by: default avatarBabu Moger <babu.moger@amd.com>
[Prevented the window from dropping below the initial value. - Radim]
Signed-off-by: default avatarRadim Krčmář <rkrcmar@redhat.com>
parent 1d8fb44a
...@@ -299,6 +299,54 @@ static bool npt_enabled = true; ...@@ -299,6 +299,54 @@ static bool npt_enabled = true;
static bool npt_enabled; static bool npt_enabled;
#endif #endif
/*
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
* pause_filter_count: On processors that support Pause filtering(indicated
* by CPUID Fn8000_000A_EDX), the VMCB provides a 16 bit pause filter
* count value. On VMRUN this value is loaded into an internal counter.
* Each time a pause instruction is executed, this counter is decremented
* until it reaches zero at which time a #VMEXIT is generated if pause
* intercept is enabled. Refer to AMD APM Vol 2 Section 15.14.4 Pause
* Intercept Filtering for more details.
* This also indicate if ple logic enabled.
*
* pause_filter_thresh: In addition, some processor families support advanced
* pause filtering (indicated by CPUID Fn8000_000A_EDX) upper bound on
* the amount of time a guest is allowed to execute in a pause loop.
* In this mode, a 16-bit pause filter threshold field is added in the
* VMCB. The threshold value is a cycle count that is used to reset the
* pause counter. As with simple pause filtering, VMRUN loads the pause
* count value from VMCB into an internal counter. Then, on each pause
* instruction the hardware checks the elapsed number of cycles since
* the most recent pause instruction against the pause filter threshold.
* If the elapsed cycle count is greater than the pause filter threshold,
* then the internal pause count is reloaded from the VMCB and execution
* continues. If the elapsed cycle count is less than the pause filter
* threshold, then the internal pause count is decremented. If the count
* value is less than zero and PAUSE intercept is enabled, a #VMEXIT is
* triggered. If advanced pause filtering is supported and pause filter
* threshold field is set to zero, the filter will operate in the simpler,
* count only mode.
*/
static unsigned short pause_filter_thresh = KVM_DEFAULT_PLE_GAP;
module_param(pause_filter_thresh, ushort, 0444);
static unsigned short pause_filter_count = KVM_SVM_DEFAULT_PLE_WINDOW;
module_param(pause_filter_count, ushort, 0444);
/* Default doubles per-vcpu window every exit. */
static unsigned short pause_filter_count_grow = KVM_DEFAULT_PLE_WINDOW_GROW;
module_param(pause_filter_count_grow, ushort, 0444);
/* Default resets per-vcpu window every exit to pause_filter_count. */
static unsigned short pause_filter_count_shrink = KVM_DEFAULT_PLE_WINDOW_SHRINK;
module_param(pause_filter_count_shrink, ushort, 0444);
/* Default is to compute the maximum so we can never overflow. */
static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
module_param(pause_filter_count_max, ushort, 0444);
/* allow nested paging (virtualized MMU) for all guests */ /* allow nested paging (virtualized MMU) for all guests */
static int npt = true; static int npt = true;
module_param(npt, int, S_IRUGO); module_param(npt, int, S_IRUGO);
...@@ -1198,6 +1246,42 @@ static __init int sev_hardware_setup(void) ...@@ -1198,6 +1246,42 @@ static __init int sev_hardware_setup(void)
return rc; return rc;
} }
static void grow_ple_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
control->pause_filter_count = __grow_ple_window(old,
pause_filter_count,
pause_filter_count_grow,
pause_filter_count_max);
if (control->pause_filter_count != old)
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
trace_kvm_ple_window_grow(vcpu->vcpu_id,
control->pause_filter_count, old);
}
static void shrink_ple_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
control->pause_filter_count =
__shrink_ple_window(old,
pause_filter_count,
pause_filter_count_shrink,
pause_filter_count);
if (control->pause_filter_count != old)
mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
trace_kvm_ple_window_shrink(vcpu->vcpu_id,
control->pause_filter_count, old);
}
static __init int svm_hardware_setup(void) static __init int svm_hardware_setup(void)
{ {
int cpu; int cpu;
...@@ -1228,6 +1312,14 @@ static __init int svm_hardware_setup(void) ...@@ -1228,6 +1312,14 @@ static __init int svm_hardware_setup(void)
kvm_tsc_scaling_ratio_frac_bits = 32; kvm_tsc_scaling_ratio_frac_bits = 32;
} }
/* Check for pause filtering support */
if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
pause_filter_count = 0;
pause_filter_thresh = 0;
} else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) {
pause_filter_thresh = 0;
}
if (nested) { if (nested) {
printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
...@@ -1485,10 +1577,13 @@ static void init_vmcb(struct vcpu_svm *svm) ...@@ -1485,10 +1577,13 @@ static void init_vmcb(struct vcpu_svm *svm)
svm->nested.vmcb = 0; svm->nested.vmcb = 0;
svm->vcpu.arch.hflags = 0; svm->vcpu.arch.hflags = 0;
if (boot_cpu_has(X86_FEATURE_PAUSEFILTER) && if (pause_filter_count) {
!kvm_pause_in_guest(svm->vcpu.kvm)) { control->pause_filter_count = pause_filter_count;
control->pause_filter_count = 3000; if (pause_filter_thresh)
control->pause_filter_thresh = pause_filter_thresh;
set_intercept(svm, INTERCEPT_PAUSE); set_intercept(svm, INTERCEPT_PAUSE);
} else {
clr_intercept(svm, INTERCEPT_PAUSE);
} }
if (kvm_vcpu_apicv_active(&svm->vcpu)) if (kvm_vcpu_apicv_active(&svm->vcpu))
...@@ -4288,6 +4383,9 @@ static int pause_interception(struct vcpu_svm *svm) ...@@ -4288,6 +4383,9 @@ static int pause_interception(struct vcpu_svm *svm)
struct kvm_vcpu *vcpu = &svm->vcpu; struct kvm_vcpu *vcpu = &svm->vcpu;
bool in_kernel = (svm_get_cpl(vcpu) == 0); bool in_kernel = (svm_get_cpl(vcpu) == 0);
if (pause_filter_thresh)
grow_ple_window(vcpu);
kvm_vcpu_on_spin(vcpu, in_kernel); kvm_vcpu_on_spin(vcpu, in_kernel);
return 1; return 1;
} }
...@@ -5984,6 +6082,8 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu) ...@@ -5984,6 +6082,8 @@ static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
{ {
if (pause_filter_thresh)
shrink_ple_window(vcpu);
} }
static inline void avic_post_state_restore(struct kvm_vcpu *vcpu) static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
......
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
#define KVM_DEFAULT_PLE_WINDOW_GROW 2 #define KVM_DEFAULT_PLE_WINDOW_GROW 2
#define KVM_DEFAULT_PLE_WINDOW_SHRINK 0 #define KVM_DEFAULT_PLE_WINDOW_SHRINK 0
#define KVM_VMX_DEFAULT_PLE_WINDOW_MAX UINT_MAX #define KVM_VMX_DEFAULT_PLE_WINDOW_MAX UINT_MAX
#define KVM_SVM_DEFAULT_PLE_WINDOW_MAX USHRT_MAX
#define KVM_SVM_DEFAULT_PLE_WINDOW 3000
static inline unsigned int __grow_ple_window(unsigned int val, static inline unsigned int __grow_ple_window(unsigned int val,
unsigned int base, unsigned int modifier, unsigned int max) unsigned int base, unsigned int modifier, unsigned int max)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment