Commit 869b4421 authored by Paolo Bonzini's avatar Paolo Bonzini

kvm: x86: protect masterclock with a seqcount

Protect the reference point for kvmclock with a seqcount, so that
kvmclock updates for all vCPUs can proceed in parallel.  Xen runstate
updates will also run in parallel and not bounce the kvmclock cacheline.

Of the variables that were protected by pvclock_gtod_sync_lock,
nr_vcpus_matched_tsc is different because it is updated outside
pvclock_update_vm_gtod_copy and read inside it.  Therefore, we
need to keep it protected by a spinlock.  In fact it must now
be a raw spinlock, because pvclock_update_vm_gtod_copy, being the
write-side of a seqcount, is non-preemptible.  Since we already
have tsc_write_lock which is a raw spinlock, we can just use
tsc_write_lock as the lock that protects the write-side of the
seqcount.
Co-developed-by: default avatarOliver Upton <oupton@google.com>
Message-Id: <20210916181538.968978-6-oupton@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent c68dc1b5
...@@ -1086,6 +1086,11 @@ struct kvm_arch { ...@@ -1086,6 +1086,11 @@ struct kvm_arch {
unsigned long irq_sources_bitmap; unsigned long irq_sources_bitmap;
s64 kvmclock_offset; s64 kvmclock_offset;
/*
* This also protects nr_vcpus_matched_tsc which is read from a
* preemption-disabled region, so it must be a raw spinlock.
*/
raw_spinlock_t tsc_write_lock; raw_spinlock_t tsc_write_lock;
u64 last_tsc_nsec; u64 last_tsc_nsec;
u64 last_tsc_write; u64 last_tsc_write;
...@@ -1096,7 +1101,7 @@ struct kvm_arch { ...@@ -1096,7 +1101,7 @@ struct kvm_arch {
u64 cur_tsc_generation; u64 cur_tsc_generation;
int nr_vcpus_matched_tsc; int nr_vcpus_matched_tsc;
spinlock_t pvclock_gtod_sync_lock; seqcount_raw_spinlock_t pvclock_sc;
bool use_master_clock; bool use_master_clock;
u64 master_kernel_ns; u64 master_kernel_ns;
u64 master_cycle_now; u64 master_cycle_now;
......
...@@ -2521,9 +2521,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data) ...@@ -2521,9 +2521,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write; vcpu->arch.this_tsc_write = kvm->arch.cur_tsc_write;
kvm_vcpu_write_tsc_offset(vcpu, offset); kvm_vcpu_write_tsc_offset(vcpu, offset);
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
if (!matched) { if (!matched) {
kvm->arch.nr_vcpus_matched_tsc = 0; kvm->arch.nr_vcpus_matched_tsc = 0;
} else if (!already_matched) { } else if (!already_matched) {
...@@ -2531,7 +2529,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data) ...@@ -2531,7 +2529,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
} }
kvm_track_tsc_matching(vcpu); kvm_track_tsc_matching(vcpu);
spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags); raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
} }
static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
...@@ -2719,6 +2717,7 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm) ...@@ -2719,6 +2717,7 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
int vclock_mode; int vclock_mode;
bool host_tsc_clocksource, vcpus_matched; bool host_tsc_clocksource, vcpus_matched;
lockdep_assert_held(&kvm->arch.tsc_write_lock);
vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 == vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
atomic_read(&kvm->online_vcpus)); atomic_read(&kvm->online_vcpus));
...@@ -2748,14 +2747,18 @@ static void kvm_make_mclock_inprogress_request(struct kvm *kvm) ...@@ -2748,14 +2747,18 @@ static void kvm_make_mclock_inprogress_request(struct kvm *kvm)
kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
} }
static void kvm_start_pvclock_update(struct kvm *kvm) static void __kvm_start_pvclock_update(struct kvm *kvm)
{ {
struct kvm_arch *ka = &kvm->arch; raw_spin_lock_irq(&kvm->arch.tsc_write_lock);
write_seqcount_begin(&kvm->arch.pvclock_sc);
}
static void kvm_start_pvclock_update(struct kvm *kvm)
{
kvm_make_mclock_inprogress_request(kvm); kvm_make_mclock_inprogress_request(kvm);
/* no guest entries from this point */ /* no guest entries from this point */
spin_lock_irq(&ka->pvclock_gtod_sync_lock); __kvm_start_pvclock_update(kvm);
} }
static void kvm_end_pvclock_update(struct kvm *kvm) static void kvm_end_pvclock_update(struct kvm *kvm)
...@@ -2764,7 +2767,8 @@ static void kvm_end_pvclock_update(struct kvm *kvm) ...@@ -2764,7 +2767,8 @@ static void kvm_end_pvclock_update(struct kvm *kvm)
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
int i; int i;
spin_unlock_irq(&ka->pvclock_gtod_sync_lock); write_seqcount_end(&ka->pvclock_sc);
raw_spin_unlock_irq(&ka->tsc_write_lock);
kvm_for_each_vcpu(i, vcpu, kvm) kvm_for_each_vcpu(i, vcpu, kvm)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
...@@ -2781,29 +2785,17 @@ static void kvm_update_masterclock(struct kvm *kvm) ...@@ -2781,29 +2785,17 @@ static void kvm_update_masterclock(struct kvm *kvm)
kvm_end_pvclock_update(kvm); kvm_end_pvclock_update(kvm);
} }
static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data) /* Called within read_seqcount_begin/retry for kvm->pvclock_sc. */
static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
{ {
struct kvm_arch *ka = &kvm->arch; struct kvm_arch *ka = &kvm->arch;
struct pvclock_vcpu_time_info hv_clock; struct pvclock_vcpu_time_info hv_clock;
unsigned long flags;
data->flags = 0;
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
if (!ka->use_master_clock) {
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
data->clock = get_kvmclock_base_ns() + ka->kvmclock_offset;
return;
}
data->flags |= KVM_CLOCK_TSC_STABLE;
hv_clock.tsc_timestamp = ka->master_cycle_now;
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
/* both __this_cpu_read() and rdtsc() should be on the same cpu */ /* both __this_cpu_read() and rdtsc() should be on the same cpu */
get_cpu(); get_cpu();
if (__this_cpu_read(cpu_tsc_khz)) { data->flags = 0;
if (ka->use_master_clock && __this_cpu_read(cpu_tsc_khz)) {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
struct timespec64 ts; struct timespec64 ts;
...@@ -2814,6 +2806,9 @@ static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data) ...@@ -2814,6 +2806,9 @@ static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
#endif #endif
data->host_tsc = rdtsc(); data->host_tsc = rdtsc();
data->flags |= KVM_CLOCK_TSC_STABLE;
hv_clock.tsc_timestamp = ka->master_cycle_now;
hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
&hv_clock.tsc_shift, &hv_clock.tsc_shift,
&hv_clock.tsc_to_system_mul); &hv_clock.tsc_to_system_mul);
...@@ -2825,6 +2820,17 @@ static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data) ...@@ -2825,6 +2820,17 @@ static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
put_cpu(); put_cpu();
} }
static void get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
{
struct kvm_arch *ka = &kvm->arch;
unsigned seq;
do {
seq = read_seqcount_begin(&ka->pvclock_sc);
__get_kvmclock(kvm, data);
} while (read_seqcount_retry(&ka->pvclock_sc, seq));
}
u64 get_kvmclock_ns(struct kvm *kvm) u64 get_kvmclock_ns(struct kvm *kvm)
{ {
struct kvm_clock_data data; struct kvm_clock_data data;
...@@ -2895,6 +2901,7 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v, ...@@ -2895,6 +2901,7 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v,
static int kvm_guest_time_update(struct kvm_vcpu *v) static int kvm_guest_time_update(struct kvm_vcpu *v)
{ {
unsigned long flags, tgt_tsc_khz; unsigned long flags, tgt_tsc_khz;
unsigned seq;
struct kvm_vcpu_arch *vcpu = &v->arch; struct kvm_vcpu_arch *vcpu = &v->arch;
struct kvm_arch *ka = &v->kvm->arch; struct kvm_arch *ka = &v->kvm->arch;
s64 kernel_ns; s64 kernel_ns;
...@@ -2909,13 +2916,14 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) ...@@ -2909,13 +2916,14 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
* If the host uses TSC clock, then passthrough TSC as stable * If the host uses TSC clock, then passthrough TSC as stable
* to the guest. * to the guest.
*/ */
spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags); do {
seq = read_seqcount_begin(&ka->pvclock_sc);
use_master_clock = ka->use_master_clock; use_master_clock = ka->use_master_clock;
if (use_master_clock) { if (use_master_clock) {
host_tsc = ka->master_cycle_now; host_tsc = ka->master_cycle_now;
kernel_ns = ka->master_kernel_ns; kernel_ns = ka->master_kernel_ns;
} }
spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags); } while (read_seqcount_retry(&ka->pvclock_sc, seq));
/* Keep irq disabled to prevent changes to the clock */ /* Keep irq disabled to prevent changes to the clock */
local_irq_save(flags); local_irq_save(flags);
...@@ -5838,9 +5846,8 @@ int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state) ...@@ -5838,9 +5846,8 @@ int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state)
static int kvm_vm_ioctl_get_clock(struct kvm *kvm, void __user *argp) static int kvm_vm_ioctl_get_clock(struct kvm *kvm, void __user *argp)
{ {
struct kvm_clock_data data; struct kvm_clock_data data = { 0 };
memset(&data, 0, sizeof(data));
get_kvmclock(kvm, &data); get_kvmclock(kvm, &data);
if (copy_to_user(argp, &data, sizeof(data))) if (copy_to_user(argp, &data, sizeof(data)))
return -EFAULT; return -EFAULT;
...@@ -8153,9 +8160,7 @@ static void kvm_hyperv_tsc_notifier(void) ...@@ -8153,9 +8160,7 @@ static void kvm_hyperv_tsc_notifier(void)
kvm_max_guest_tsc_khz = tsc_khz; kvm_max_guest_tsc_khz = tsc_khz;
list_for_each_entry(kvm, &vm_list, vm_list) { list_for_each_entry(kvm, &vm_list, vm_list) {
struct kvm_arch *ka = &kvm->arch; __kvm_start_pvclock_update(kvm);
spin_lock_irq(&ka->pvclock_gtod_sync_lock);
pvclock_update_vm_gtod_copy(kvm); pvclock_update_vm_gtod_copy(kvm);
kvm_end_pvclock_update(kvm); kvm_end_pvclock_update(kvm);
} }
...@@ -11156,6 +11161,7 @@ void kvm_arch_free_vm(struct kvm *kvm) ...@@ -11156,6 +11161,7 @@ void kvm_arch_free_vm(struct kvm *kvm)
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{ {
int ret; int ret;
unsigned long flags;
if (type) if (type)
return -EINVAL; return -EINVAL;
...@@ -11179,10 +11185,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ...@@ -11179,10 +11185,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
raw_spin_lock_init(&kvm->arch.tsc_write_lock); raw_spin_lock_init(&kvm->arch.tsc_write_lock);
mutex_init(&kvm->arch.apic_map_lock); mutex_init(&kvm->arch.apic_map_lock);
spin_lock_init(&kvm->arch.pvclock_gtod_sync_lock); seqcount_raw_spinlock_init(&kvm->arch.pvclock_sc, &kvm->arch.tsc_write_lock);
kvm->arch.kvmclock_offset = -get_kvmclock_base_ns(); kvm->arch.kvmclock_offset = -get_kvmclock_base_ns();
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
pvclock_update_vm_gtod_copy(kvm); pvclock_update_vm_gtod_copy(kvm);
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
kvm->arch.guest_can_read_msr_platform_info = true; kvm->arch.guest_can_read_msr_platform_info = true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment