Commit de0f6195 authored by Like Xu's avatar Like Xu Committed by Paolo Bonzini

KVM: x86/pmu: Defer counter emulated overflow via pmc->prev_counter

Defer reprogramming counters and handling overflow via KVM_REQ_PMU
when incrementing counters.  KVM skips emulated WRMSR in the VM-Exit
fastpath, the fastpath runs with IRQs disabled, skipping instructions
can increment and reprogram counters, reprogramming counters can
sleep, and sleeping is disallowed while IRQs are disabled.

 [*] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580
 [*] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 2981888, name: CPU 15/KVM
 [*] preempt_count: 1, expected: 0
 [*] RCU nest depth: 0, expected: 0
 [*] INFO: lockdep is turned off.
 [*] irq event stamp: 0
 [*] hardirqs last  enabled at (0): [<0000000000000000>] 0x0
 [*] hardirqs last disabled at (0): [<ffffffff8121222a>] copy_process+0x146a/0x62d0
 [*] softirqs last  enabled at (0): [<ffffffff81212269>] copy_process+0x14a9/0x62d0
 [*] softirqs last disabled at (0): [<0000000000000000>] 0x0
 [*] Preemption disabled at:
 [*] [<ffffffffc2063fc1>] vcpu_enter_guest+0x1001/0x3dc0 [kvm]
 [*] CPU: 17 PID: 2981888 Comm: CPU 15/KVM Kdump: 5.19.0-rc1-g239111db364c-dirty #2
 [*] Call Trace:
 [*]  <TASK>
 [*]  dump_stack_lvl+0x6c/0x9b
 [*]  __might_resched.cold+0x22e/0x297
 [*]  __mutex_lock+0xc0/0x23b0
 [*]  perf_event_ctx_lock_nested+0x18f/0x340
 [*]  perf_event_pause+0x1a/0x110
 [*]  reprogram_counter+0x2af/0x1490 [kvm]
 [*]  kvm_pmu_trigger_event+0x429/0x950 [kvm]
 [*]  kvm_skip_emulated_instruction+0x48/0x90 [kvm]
 [*]  handle_fastpath_set_msr_irqoff+0x349/0x3b0 [kvm]
 [*]  vmx_vcpu_run+0x268e/0x3b80 [kvm_intel]
 [*]  vcpu_enter_guest+0x1d22/0x3dc0 [kvm]

Add a field to kvm_pmc to track the previous counter value in order
to defer overflow detection to kvm_pmu_handle_event() (the counter must
be paused before handling overflow, and that may increment the counter).

Opportunistically shrink sizeof(struct kvm_pmc) a bit.
Suggested-by: default avatarWanpeng Li <wanpengli@tencent.com>
Fixes: 9cd803d4 ("KVM: x86: Update vPMCs when retiring instructions")
Signed-off-by: default avatarLike Xu <likexu@tencent.com>
Link: https://lore.kernel.org/r/20220831085328.45489-6-likexu@tencent.com
[sean: avoid re-triggering KVM_REQ_PMU on overflow, tweak changelog]
Signed-off-by: default avatarSean Christopherson <seanjc@google.com>
Message-Id: <20220923001355.3741194-5-seanjc@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 68fb4757
...@@ -491,7 +491,10 @@ enum pmc_type { ...@@ -491,7 +491,10 @@ enum pmc_type {
struct kvm_pmc { struct kvm_pmc {
enum pmc_type type; enum pmc_type type;
u8 idx; u8 idx;
bool is_paused;
bool intr;
u64 counter; u64 counter;
u64 prev_counter;
u64 eventsel; u64 eventsel;
struct perf_event *perf_event; struct perf_event *perf_event;
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
...@@ -501,8 +504,6 @@ struct kvm_pmc { ...@@ -501,8 +504,6 @@ struct kvm_pmc {
* ctrl value for fixed counters. * ctrl value for fixed counters.
*/ */
u64 current_config; u64 current_config;
bool is_paused;
bool intr;
}; };
/* More counters may conflict with other existing Architectural MSRs */ /* More counters may conflict with other existing Architectural MSRs */
......
...@@ -101,14 +101,6 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) ...@@ -101,14 +101,6 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
struct kvm_pmu *pmu = pmc_to_pmu(pmc); struct kvm_pmu *pmu = pmc_to_pmu(pmc);
bool skip_pmi = false; bool skip_pmi = false;
/*
* Ignore overflow events for counters that are scheduled to be
* reprogrammed, e.g. if a PMI for the previous event races with KVM's
* handling of a related guest WRMSR.
*/
if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi))
return;
if (pmc->perf_event && pmc->perf_event->attr.precise_ip) { if (pmc->perf_event && pmc->perf_event->attr.precise_ip) {
if (!in_pmi) { if (!in_pmi) {
/* /*
...@@ -126,7 +118,6 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) ...@@ -126,7 +118,6 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
} else { } else {
__set_bit(pmc->idx, (unsigned long *)&pmu->global_status); __set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
} }
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
if (!pmc->intr || skip_pmi) if (!pmc->intr || skip_pmi)
return; return;
...@@ -151,7 +142,17 @@ static void kvm_perf_overflow(struct perf_event *perf_event, ...@@ -151,7 +142,17 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
{ {
struct kvm_pmc *pmc = perf_event->overflow_handler_context; struct kvm_pmc *pmc = perf_event->overflow_handler_context;
/*
* Ignore overflow events for counters that are scheduled to be
* reprogrammed, e.g. if a PMI for the previous event races with KVM's
* handling of a related guest WRMSR.
*/
if (test_and_set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi))
return;
__kvm_perf_overflow(pmc, true); __kvm_perf_overflow(pmc, true);
kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
} }
static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config, static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config,
...@@ -311,6 +312,9 @@ static void reprogram_counter(struct kvm_pmc *pmc) ...@@ -311,6 +312,9 @@ static void reprogram_counter(struct kvm_pmc *pmc)
if (!check_pmu_event_filter(pmc)) if (!check_pmu_event_filter(pmc))
goto reprogram_complete; goto reprogram_complete;
if (pmc->counter < pmc->prev_counter)
__kvm_perf_overflow(pmc, false);
if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
printk_once("kvm pmu: pin control bit is ignored\n"); printk_once("kvm pmu: pin control bit is ignored\n");
...@@ -348,6 +352,7 @@ static void reprogram_counter(struct kvm_pmc *pmc) ...@@ -348,6 +352,7 @@ static void reprogram_counter(struct kvm_pmc *pmc)
reprogram_complete: reprogram_complete:
clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi); clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
pmc->prev_counter = 0;
} }
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
...@@ -536,14 +541,9 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu) ...@@ -536,14 +541,9 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
static void kvm_pmu_incr_counter(struct kvm_pmc *pmc) static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
{ {
u64 prev_count; pmc->prev_counter = pmc->counter;
prev_count = pmc->counter;
pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc); pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc);
kvm_pmu_request_counter_reprogam(pmc);
reprogram_counter(pmc);
if (pmc->counter < prev_count)
__kvm_perf_overflow(pmc, false);
} }
static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc, static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
......
...@@ -212,7 +212,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu) ...@@ -212,7 +212,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
struct kvm_pmc *pmc = &pmu->gp_counters[i]; struct kvm_pmc *pmc = &pmu->gp_counters[i];
pmc_stop_counter(pmc); pmc_stop_counter(pmc);
pmc->counter = pmc->eventsel = 0; pmc->counter = pmc->prev_counter = pmc->eventsel = 0;
} }
} }
......
...@@ -646,14 +646,14 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) ...@@ -646,14 +646,14 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
pmc = &pmu->gp_counters[i]; pmc = &pmu->gp_counters[i];
pmc_stop_counter(pmc); pmc_stop_counter(pmc);
pmc->counter = pmc->eventsel = 0; pmc->counter = pmc->prev_counter = pmc->eventsel = 0;
} }
for (i = 0; i < KVM_PMC_MAX_FIXED; i++) { for (i = 0; i < KVM_PMC_MAX_FIXED; i++) {
pmc = &pmu->fixed_counters[i]; pmc = &pmu->fixed_counters[i];
pmc_stop_counter(pmc); pmc_stop_counter(pmc);
pmc->counter = 0; pmc->counter = pmc->prev_counter = 0;
} }
pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 0; pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment