Commit aa2a4b65 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 vdso updates from Ingo Molnar:
 "Add support for vDSO acceleration of the "Hyper-V TSC page", to speed
  up clock reading on Hyper-V guests"

* 'x86-vdso-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/vdso: Add VCLOCK_HVCLOCK vDSO clock read method
  x86/hyperv: Move TSC reading method to asm/mshyperv.h
  x86/hyperv: Implement hv_get_tsc_page()
parents d19458a4 90b20432
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <asm/unistd.h> #include <asm/unistd.h>
#include <asm/msr.h> #include <asm/msr.h>
#include <asm/pvclock.h> #include <asm/pvclock.h>
#include <asm/mshyperv.h>
#include <linux/math64.h> #include <linux/math64.h>
#include <linux/time.h> #include <linux/time.h>
#include <linux/kernel.h> #include <linux/kernel.h>
...@@ -32,6 +33,11 @@ extern u8 pvclock_page ...@@ -32,6 +33,11 @@ extern u8 pvclock_page
__attribute__((visibility("hidden"))); __attribute__((visibility("hidden")));
#endif #endif
#ifdef CONFIG_HYPERV_TSCPAGE
extern u8 hvclock_page
__attribute__((visibility("hidden")));
#endif
#ifndef BUILD_VDSO32 #ifndef BUILD_VDSO32
notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
...@@ -141,6 +147,20 @@ static notrace u64 vread_pvclock(int *mode) ...@@ -141,6 +147,20 @@ static notrace u64 vread_pvclock(int *mode)
return last; return last;
} }
#endif #endif
#ifdef CONFIG_HYPERV_TSCPAGE
static notrace u64 vread_hvclock(int *mode)
{
const struct ms_hyperv_tsc_page *tsc_pg =
(const struct ms_hyperv_tsc_page *)&hvclock_page;
u64 current_tick = hv_read_tsc_page(tsc_pg);
if (current_tick != U64_MAX)
return current_tick;
*mode = VCLOCK_NONE;
return 0;
}
#endif
notrace static u64 vread_tsc(void) notrace static u64 vread_tsc(void)
{ {
...@@ -172,6 +192,10 @@ notrace static inline u64 vgetsns(int *mode) ...@@ -172,6 +192,10 @@ notrace static inline u64 vgetsns(int *mode)
#ifdef CONFIG_PARAVIRT_CLOCK #ifdef CONFIG_PARAVIRT_CLOCK
else if (gtod->vclock_mode == VCLOCK_PVCLOCK) else if (gtod->vclock_mode == VCLOCK_PVCLOCK)
cycles = vread_pvclock(mode); cycles = vread_pvclock(mode);
#endif
#ifdef CONFIG_HYPERV_TSCPAGE
else if (gtod->vclock_mode == VCLOCK_HVCLOCK)
cycles = vread_hvclock(mode);
#endif #endif
else else
return 0; return 0;
......
...@@ -25,7 +25,7 @@ SECTIONS ...@@ -25,7 +25,7 @@ SECTIONS
* segment. * segment.
*/ */
vvar_start = . - 2 * PAGE_SIZE; vvar_start = . - 3 * PAGE_SIZE;
vvar_page = vvar_start; vvar_page = vvar_start;
/* Place all vvars at the offsets in asm/vvar.h. */ /* Place all vvars at the offsets in asm/vvar.h. */
...@@ -36,6 +36,7 @@ SECTIONS ...@@ -36,6 +36,7 @@ SECTIONS
#undef EMIT_VVAR #undef EMIT_VVAR
pvclock_page = vvar_start + PAGE_SIZE; pvclock_page = vvar_start + PAGE_SIZE;
hvclock_page = vvar_start + 2 * PAGE_SIZE;
. = SIZEOF_HEADERS; . = SIZEOF_HEADERS;
......
...@@ -74,6 +74,7 @@ enum { ...@@ -74,6 +74,7 @@ enum {
sym_vvar_page, sym_vvar_page,
sym_hpet_page, sym_hpet_page,
sym_pvclock_page, sym_pvclock_page,
sym_hvclock_page,
sym_VDSO_FAKE_SECTION_TABLE_START, sym_VDSO_FAKE_SECTION_TABLE_START,
sym_VDSO_FAKE_SECTION_TABLE_END, sym_VDSO_FAKE_SECTION_TABLE_END,
}; };
...@@ -82,6 +83,7 @@ const int special_pages[] = { ...@@ -82,6 +83,7 @@ const int special_pages[] = {
sym_vvar_page, sym_vvar_page,
sym_hpet_page, sym_hpet_page,
sym_pvclock_page, sym_pvclock_page,
sym_hvclock_page,
}; };
struct vdso_sym { struct vdso_sym {
...@@ -94,6 +96,7 @@ struct vdso_sym required_syms[] = { ...@@ -94,6 +96,7 @@ struct vdso_sym required_syms[] = {
[sym_vvar_page] = {"vvar_page", true}, [sym_vvar_page] = {"vvar_page", true},
[sym_hpet_page] = {"hpet_page", true}, [sym_hpet_page] = {"hpet_page", true},
[sym_pvclock_page] = {"pvclock_page", true}, [sym_pvclock_page] = {"pvclock_page", true},
[sym_hvclock_page] = {"hvclock_page", true},
[sym_VDSO_FAKE_SECTION_TABLE_START] = { [sym_VDSO_FAKE_SECTION_TABLE_START] = {
"VDSO_FAKE_SECTION_TABLE_START", false "VDSO_FAKE_SECTION_TABLE_START", false
}, },
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <asm/page.h> #include <asm/page.h>
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/mshyperv.h>
#if defined(CONFIG_X86_64) #if defined(CONFIG_X86_64)
unsigned int __read_mostly vdso64_enabled = 1; unsigned int __read_mostly vdso64_enabled = 1;
...@@ -121,6 +122,12 @@ static int vvar_fault(const struct vm_special_mapping *sm, ...@@ -121,6 +122,12 @@ static int vvar_fault(const struct vm_special_mapping *sm,
vmf->address, vmf->address,
__pa(pvti) >> PAGE_SHIFT); __pa(pvti) >> PAGE_SHIFT);
} }
} else if (sym_offset == image->sym_hvclock_page) {
struct ms_hyperv_tsc_page *tsc_pg = hv_get_tsc_page();
if (tsc_pg && vclock_was_used(VCLOCK_HVCLOCK))
ret = vm_insert_pfn(vma, vmf->address,
vmalloc_to_pfn(tsc_pg));
} }
if (ret == 0 || ret == -EBUSY) if (ret == 0 || ret == -EBUSY)
......
...@@ -27,45 +27,22 @@ ...@@ -27,45 +27,22 @@
#include <linux/clockchips.h> #include <linux/clockchips.h>
#ifdef CONFIG_X86_64 #ifdef CONFIG_HYPERV_TSCPAGE
static struct ms_hyperv_tsc_page *tsc_pg; static struct ms_hyperv_tsc_page *tsc_pg;
struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
{
return tsc_pg;
}
static u64 read_hv_clock_tsc(struct clocksource *arg) static u64 read_hv_clock_tsc(struct clocksource *arg)
{ {
u64 current_tick; u64 current_tick = hv_read_tsc_page(tsc_pg);
if (current_tick == U64_MAX)
rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
if (tsc_pg->tsc_sequence != 0) {
/*
* Use the tsc page to compute the value.
*/
while (1) {
u64 tmp;
u32 sequence = tsc_pg->tsc_sequence;
u64 cur_tsc;
u64 scale = tsc_pg->tsc_scale;
s64 offset = tsc_pg->tsc_offset;
rdtscll(cur_tsc);
/* current_tick = ((cur_tsc *scale) >> 64) + offset */
asm("mulq %3"
: "=d" (current_tick), "=a" (tmp)
: "a" (cur_tsc), "r" (scale));
current_tick += offset;
if (tsc_pg->tsc_sequence == sequence)
return current_tick;
if (tsc_pg->tsc_sequence != 0)
continue;
/*
* Fallback using MSR method.
*/
break;
}
}
rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick);
return current_tick; return current_tick;
} }
...@@ -139,7 +116,7 @@ void hyperv_init(void) ...@@ -139,7 +116,7 @@ void hyperv_init(void)
/* /*
* Register Hyper-V specific clocksource. * Register Hyper-V specific clocksource.
*/ */
#ifdef CONFIG_X86_64 #ifdef CONFIG_HYPERV_TSCPAGE
if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) { if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) {
union hv_x64_msr_hypercall_contents tsc_msr; union hv_x64_msr_hypercall_contents tsc_msr;
...@@ -155,6 +132,9 @@ void hyperv_init(void) ...@@ -155,6 +132,9 @@ void hyperv_init(void)
tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg); tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg);
wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64);
hyperv_cs_tsc.archdata.vclock_mode = VCLOCK_HVCLOCK;
clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);
return; return;
} }
......
...@@ -6,7 +6,8 @@ ...@@ -6,7 +6,8 @@
#define VCLOCK_NONE 0 /* No vDSO clock available. */ #define VCLOCK_NONE 0 /* No vDSO clock available. */
#define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ #define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */
#define VCLOCK_PVCLOCK 2 /* vDSO should use vread_pvclock. */ #define VCLOCK_PVCLOCK 2 /* vDSO should use vread_pvclock. */
#define VCLOCK_MAX 2 #define VCLOCK_HVCLOCK 3 /* vDSO should use vread_hvclock. */
#define VCLOCK_MAX 3
struct arch_clocksource_data { struct arch_clocksource_data {
int vclock_mode; int vclock_mode;
......
...@@ -176,4 +176,58 @@ void hyperv_report_panic(struct pt_regs *regs); ...@@ -176,4 +176,58 @@ void hyperv_report_panic(struct pt_regs *regs);
bool hv_is_hypercall_page_setup(void); bool hv_is_hypercall_page_setup(void);
void hyperv_cleanup(void); void hyperv_cleanup(void);
#endif #endif
#ifdef CONFIG_HYPERV_TSCPAGE
struct ms_hyperv_tsc_page *hv_get_tsc_page(void);
static inline u64 hv_read_tsc_page(const struct ms_hyperv_tsc_page *tsc_pg)
{
u64 scale, offset, cur_tsc;
u32 sequence;
/*
* The protocol for reading Hyper-V TSC page is specified in Hypervisor
* Top-Level Functional Specification ver. 3.0 and above. To get the
* reference time we must do the following:
* - READ ReferenceTscSequence
* A special '0' value indicates the time source is unreliable and we
* need to use something else. The currently published specification
* versions (up to 4.0b) contain a mistake and wrongly claim '-1'
* instead of '0' as the special value, see commit c35b82ef0294.
* - ReferenceTime =
* ((RDTSC() * ReferenceTscScale) >> 64) + ReferenceTscOffset
* - READ ReferenceTscSequence again. In case its value has changed
* since our first reading we need to discard ReferenceTime and repeat
* the whole sequence as the hypervisor was updating the page in
* between.
*/
do {
sequence = READ_ONCE(tsc_pg->tsc_sequence);
if (!sequence)
return U64_MAX;
/*
* Make sure we read sequence before we read other values from
* TSC page.
*/
smp_rmb();
scale = READ_ONCE(tsc_pg->tsc_scale);
offset = READ_ONCE(tsc_pg->tsc_offset);
cur_tsc = rdtsc_ordered();
/*
* Make sure we read sequence after we read all other values
* from TSC page.
*/
smp_rmb();
} while (READ_ONCE(tsc_pg->tsc_sequence) != sequence);
return mul_u64_u64_shr(cur_tsc, scale, 64) + offset;
}
#else
static inline struct ms_hyperv_tsc_page *hv_get_tsc_page(void)
{
return NULL;
}
#endif
#endif #endif
...@@ -20,6 +20,7 @@ struct vdso_image { ...@@ -20,6 +20,7 @@ struct vdso_image {
long sym_vvar_page; long sym_vvar_page;
long sym_hpet_page; long sym_hpet_page;
long sym_pvclock_page; long sym_pvclock_page;
long sym_hvclock_page;
long sym_VDSO32_NOTE_MASK; long sym_VDSO32_NOTE_MASK;
long sym___kernel_sigreturn; long sym___kernel_sigreturn;
long sym___kernel_rt_sigreturn; long sym___kernel_rt_sigreturn;
......
...@@ -7,6 +7,9 @@ config HYPERV ...@@ -7,6 +7,9 @@ config HYPERV
Select this option to run Linux as a Hyper-V client operating Select this option to run Linux as a Hyper-V client operating
system. system.
config HYPERV_TSCPAGE
def_bool HYPERV && X86_64
config HYPERV_UTILS config HYPERV_UTILS
tristate "Microsoft Hyper-V Utilities driver" tristate "Microsoft Hyper-V Utilities driver"
depends on HYPERV && CONNECTOR && NLS depends on HYPERV && CONNECTOR && NLS
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment