Commit 8228c77d authored by David Woodhouse's avatar David Woodhouse Committed by Paolo Bonzini

KVM: x86: switch pvclock_gtod_sync_lock to a raw spinlock

On the preemption path when updating a Xen guest's runstate times, this
lock is taken inside the scheduler rq->lock, which is a raw spinlock.
This was shown in a lockdep warning:

[   89.138354] =============================
[   89.138356] [ BUG: Invalid wait context ]
[   89.138358] 5.15.0-rc5+ #834 Tainted: G S        I E
[   89.138360] -----------------------------
[   89.138361] xen_shinfo_test/2575 is trying to lock:
[   89.138363] ffffa34a0364efd8 (&kvm->arch.pvclock_gtod_sync_lock){....}-{3:3}, at: get_kvmclock_ns+0x1f/0x130 [kvm]
[   89.138442] other info that might help us debug this:
[   89.138444] context-{5:5}
[   89.138445] 4 locks held by xen_shinfo_test/2575:
[   89.138447]  #0: ffff972bdc3b8108 (&vcpu->mutex){+.+.}-{4:4}, at: kvm_vcpu_ioctl+0x77/0x6f0 [kvm]
[   89.138483]  #1: ffffa34a03662e90 (&kvm->srcu){....}-{0:0}, at: kvm_arch_vcpu_ioctl_run+0xdc/0x8b0 [kvm]
[   89.138526]  #2: ffff97331fdbac98 (&rq->__lock){-.-.}-{2:2}, at: __schedule+0xff/0xbd0
[   89.138534]  #3: ffffa34a03662e90 (&kvm->srcu){....}-{0:0}, at: kvm_arch_vcpu_put+0x26/0x170 [kvm]
...
[   89.138695]  get_kvmclock_ns+0x1f/0x130 [kvm]
[   89.138734]  kvm_xen_update_runstate+0x14/0x90 [kvm]
[   89.138783]  kvm_xen_update_runstate_guest+0x15/0xd0 [kvm]
[   89.138830]  kvm_arch_vcpu_put+0xe6/0x170 [kvm]
[   89.138870]  kvm_sched_out+0x2f/0x40 [kvm]
[   89.138900]  __schedule+0x5de/0xbd0

Cc: stable@vger.kernel.org
Reported-by: syzbot+b282b65c2c68492df769@syzkaller.appspotmail.com
Fixes: 30b5c851 ("KVM: x86/xen: Add support for vCPU runstate information")
Signed-off-by: default avatarDavid Woodhouse <dwmw@amazon.co.uk>
Message-Id: <1b02a06421c17993df337493a68ba923f3bd5c0f.camel@infradead.org>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent fa13843d
...@@ -1097,7 +1097,7 @@ struct kvm_arch { ...@@ -1097,7 +1097,7 @@ struct kvm_arch {
u64 cur_tsc_generation; u64 cur_tsc_generation;
int nr_vcpus_matched_tsc; int nr_vcpus_matched_tsc;
spinlock_t pvclock_gtod_sync_lock; raw_spinlock_t pvclock_gtod_sync_lock;
bool use_master_clock; bool use_master_clock;
u64 master_kernel_ns; u64 master_kernel_ns;
u64 master_cycle_now; u64 master_cycle_now;
......
...@@ -2542,7 +2542,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data) ...@@ -2542,7 +2542,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
kvm_vcpu_write_tsc_offset(vcpu, offset); kvm_vcpu_write_tsc_offset(vcpu, offset);
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags); raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags); raw_spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
if (!matched) { if (!matched) {
kvm->arch.nr_vcpus_matched_tsc = 0; kvm->arch.nr_vcpus_matched_tsc = 0;
} else if (!already_matched) { } else if (!already_matched) {
...@@ -2550,7 +2550,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data) ...@@ -2550,7 +2550,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
} }
kvm_track_tsc_matching(vcpu); kvm_track_tsc_matching(vcpu);
spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags); raw_spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
} }
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
...@@ -2780,9 +2780,9 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) ...@@ -2780,9 +2780,9 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
kvm_make_mclock_inprogress_request(kvm); kvm_make_mclock_inprogress_request(kvm);
/* no guest entries from this point */ /* no guest entries from this point */
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
pvclock_update_vm_gtod_copy(kvm); pvclock_update_vm_gtod_copy(kvm);
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
kvm_for_each_vcpu(i, vcpu, kvm) kvm_for_each_vcpu(i, vcpu, kvm)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
...@@ -2800,15 +2800,15 @@ u64 get_kvmclock_ns(struct kvm *kvm) ...@@ -2800,15 +2800,15 @@ u64 get_kvmclock_ns(struct kvm *kvm)
unsigned long flags; unsigned long flags;
u64 ret; u64 ret;
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
if (!ka->use_master_clock) { if (!ka->use_master_clock) {
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
return get_kvmclock_base_ns() + ka->kvmclock_offset; return get_kvmclock_base_ns() + ka->kvmclock_offset;
} }
hv_clock.tsc_timestamp = ka->master_cycle_now; hv_clock.tsc_timestamp = ka->master_cycle_now;
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
/* both __this_cpu_read() and rdtsc() should be on the same cpu */ /* both __this_cpu_read() and rdtsc() should be on the same cpu */
get_cpu(); get_cpu();
...@@ -2902,13 +2902,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) ...@@ -2902,13 +2902,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
* If the host uses TSC clock, then passthrough TSC as stable * If the host uses TSC clock, then passthrough TSC as stable
* to the guest. * to the guest.
*/ */
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
use_master_clock = ka->use_master_clock; use_master_clock = ka->use_master_clock;
if (use_master_clock) { if (use_master_clock) {
host_tsc = ka->master_cycle_now; host_tsc = ka->master_cycle_now;
kernel_ns = ka->master_kernel_ns; kernel_ns = ka->master_kernel_ns;
} }
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
/* Keep irq disabled to prevent changes to the clock */ /* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags); local_irq_save(flags);
...@@ -6100,13 +6100,13 @@ long kvm_arch_vm_ioctl(struct file *filp, ...@@ -6100,13 +6100,13 @@ long kvm_arch_vm_ioctl(struct file *filp,
* is slightly ahead) here we risk going negative on unsigned * is slightly ahead) here we risk going negative on unsigned
* 'system_time' when 'user_ns.clock' is very small. * 'system_time' when 'user_ns.clock' is very small.
*/ */
spin_lock_irq(&ka->pvclock_gtod_sync_lock); raw_spin_lock_irq(&ka->pvclock_gtod_sync_lock);
if (kvm->arch.use_master_clock) if (kvm->arch.use_master_clock)
now_ns = ka->master_kernel_ns; now_ns = ka->master_kernel_ns;
else else
now_ns = get_kvmclock_base_ns(); now_ns = get_kvmclock_base_ns();
ka->kvmclock_offset = user_ns.clock - now_ns; ka->kvmclock_offset = user_ns.clock - now_ns;
spin_unlock_irq(&ka->pvclock_gtod_sync_lock); raw_spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE); kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
break; break;
...@@ -8139,9 +8139,9 @@ static void kvm_hyperv_tsc_notifier(void) ...@@ -8139,9 +8139,9 @@ static void kvm_hyperv_tsc_notifier(void)
list_for_each_entry(kvm, &vm_list, vm_list) { list_for_each_entry(kvm, &vm_list, vm_list) {
struct kvm_arch *ka = &kvm->arch; struct kvm_arch *ka = &kvm->arch;
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); raw_spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
pvclock_update_vm_gtod_copy(kvm); pvclock_update_vm_gtod_copy(kvm);
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); raw_spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
kvm_for_each_vcpu(cpu, vcpu, kvm) kvm_for_each_vcpu(cpu, vcpu, kvm)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
...@@ -11182,7 +11182,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ...@@ -11182,7 +11182,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
raw_spin_lock_init(&kvm->arch.tsc_write_lock); raw_spin_lock_init(&kvm->arch.tsc_write_lock);
mutex_init(&kvm->arch.apic_map_lock); mutex_init(&kvm->arch.apic_map_lock);
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); raw_spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock);
kvm->arch.kvmclock_offset = -get_kvmclock_base_ns(); kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
pvclock_update_vm_gtod_copy(kvm); pvclock_update_vm_gtod_copy(kvm);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment