Commit 688340ea authored by Jeremy Fitzhardinge's avatar Jeremy Fitzhardinge Committed by Jeremy Fitzhardinge

Add a sched_clock paravirt_op

The tsc-based get_scheduled_cycles interface is not a good match for
Xen's runstate accounting, which reports everything in nanoseconds.

This patch replaces this interface with a sched_clock interface, which
matches both Xen and VMI's requirements.

In order to do this, we:
   1. replace get_scheduled_cycles with sched_clock
   2. hoist cycles_2_ns into a common header
   3. update vmi accordingly

One thing to note: because sched_clock is implemented as a weak
function in kernel/sched.c, we must define a real function in order to
override this weak binding.  This means the usual paravirt_ops
technique of using an inline function won't work in this case.
Signed-off-by: default avatarJeremy Fitzhardinge <jeremy@xensource.com>
Cc: Zachary Amsden <zach@vmware.com>
Cc: Dan Hecht <dhecht@vmware.com>
Cc: john stultz <johnstul@us.ibm.com>
parent d572929c
...@@ -302,7 +302,7 @@ struct paravirt_ops paravirt_ops = { ...@@ -302,7 +302,7 @@ struct paravirt_ops paravirt_ops = {
.write_msr = native_write_msr_safe, .write_msr = native_write_msr_safe,
.read_tsc = native_read_tsc, .read_tsc = native_read_tsc,
.read_pmc = native_read_pmc, .read_pmc = native_read_pmc,
.get_scheduled_cycles = native_read_tsc, .sched_clock = native_sched_clock,
.get_cpu_khz = native_calculate_cpu_khz, .get_cpu_khz = native_calculate_cpu_khz,
.load_tr_desc = native_load_tr_desc, .load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt, .set_ldt = native_set_ldt,
......
...@@ -84,7 +84,7 @@ static inline int check_tsc_unstable(void) ...@@ -84,7 +84,7 @@ static inline int check_tsc_unstable(void)
* *
* -johnstul@us.ibm.com "math is hard, lets go shopping!" * -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/ */
static unsigned long cyc2ns_scale __read_mostly; unsigned long cyc2ns_scale __read_mostly;
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
...@@ -93,15 +93,10 @@ static inline void set_cyc2ns_scale(unsigned long cpu_khz) ...@@ -93,15 +93,10 @@ static inline void set_cyc2ns_scale(unsigned long cpu_khz)
cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz;
} }
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
{
return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
}
/* /*
* Scheduler clock - returns current time in nanosec units. * Scheduler clock - returns current time in nanosec units.
*/ */
unsigned long long sched_clock(void) unsigned long long native_sched_clock(void)
{ {
unsigned long long this_offset; unsigned long long this_offset;
...@@ -118,12 +113,24 @@ unsigned long long sched_clock(void) ...@@ -118,12 +113,24 @@ unsigned long long sched_clock(void)
return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
/* read the Time Stamp Counter: */ /* read the Time Stamp Counter: */
get_scheduled_cycles(this_offset); rdtscll(this_offset);
/* return the value in ns */ /* return the value in ns */
return cycles_2_ns(this_offset); return cycles_2_ns(this_offset);
} }
/* We need to define a real function for sched_clock, to override the
weak default version */
#ifdef CONFIG_PARAVIRT
unsigned long long sched_clock(void)
{
return paravirt_sched_clock();
}
#else
unsigned long long sched_clock(void)
__attribute__((alias("native_sched_clock")));
#endif
unsigned long native_calculate_cpu_khz(void) unsigned long native_calculate_cpu_khz(void)
{ {
unsigned long long start, end; unsigned long long start, end;
......
...@@ -891,7 +891,7 @@ static inline int __init activate_vmi(void) ...@@ -891,7 +891,7 @@ static inline int __init activate_vmi(void)
paravirt_ops.setup_boot_clock = vmi_time_bsp_init; paravirt_ops.setup_boot_clock = vmi_time_bsp_init;
paravirt_ops.setup_secondary_clock = vmi_time_ap_init; paravirt_ops.setup_secondary_clock = vmi_time_ap_init;
#endif #endif
paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; paravirt_ops.sched_clock = vmi_sched_clock;
paravirt_ops.get_cpu_khz = vmi_cpu_khz; paravirt_ops.get_cpu_khz = vmi_cpu_khz;
/* We have true wallclock functions; disable CMOS clock sync */ /* We have true wallclock functions; disable CMOS clock sync */
......
...@@ -64,10 +64,10 @@ int vmi_set_wallclock(unsigned long now) ...@@ -64,10 +64,10 @@ int vmi_set_wallclock(unsigned long now)
return 0; return 0;
} }
/* paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles */ /* paravirt_ops.sched_clock = vmi_sched_clock */
unsigned long long vmi_get_sched_cycles(void) unsigned long long vmi_sched_clock(void)
{ {
return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE));
} }
/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */ /* paravirt_ops.get_cpu_khz = vmi_cpu_khz */
......
...@@ -118,7 +118,7 @@ struct paravirt_ops ...@@ -118,7 +118,7 @@ struct paravirt_ops
u64 (*read_tsc)(void); u64 (*read_tsc)(void);
u64 (*read_pmc)(void); u64 (*read_pmc)(void);
u64 (*get_scheduled_cycles)(void); unsigned long long (*sched_clock)(void);
unsigned long (*get_cpu_khz)(void); unsigned long (*get_cpu_khz)(void);
/* Segment descriptor handling */ /* Segment descriptor handling */
...@@ -566,7 +566,10 @@ static inline u64 paravirt_read_tsc(void) ...@@ -566,7 +566,10 @@ static inline u64 paravirt_read_tsc(void)
#define rdtscll(val) (val = paravirt_read_tsc()) #define rdtscll(val) (val = paravirt_read_tsc())
#define get_scheduled_cycles(val) (val = paravirt_ops.get_scheduled_cycles()) static inline unsigned long long paravirt_sched_clock(void)
{
return PVOP_CALL0(unsigned long long, sched_clock);
}
#define calculate_cpu_khz() (paravirt_ops.get_cpu_khz()) #define calculate_cpu_khz() (paravirt_ops.get_cpu_khz())
#define write_tsc(val1,val2) wrmsr(0x10, val1, val2) #define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
......
...@@ -15,8 +15,38 @@ extern int no_sync_cmos_clock; ...@@ -15,8 +15,38 @@ extern int no_sync_cmos_clock;
extern int recalibrate_cpu_khz(void); extern int recalibrate_cpu_khz(void);
#ifndef CONFIG_PARAVIRT #ifndef CONFIG_PARAVIRT
#define get_scheduled_cycles(val) rdtscll(val)
#define calculate_cpu_khz() native_calculate_cpu_khz() #define calculate_cpu_khz() native_calculate_cpu_khz()
#endif #endif
/* Accellerators for sched_clock()
* convert from cycles(64bits) => nanoseconds (64bits)
* basic equation:
* ns = cycles / (freq / ns_per_sec)
* ns = cycles * (ns_per_sec / freq)
* ns = cycles * (10^9 / (cpu_khz * 10^3))
* ns = cycles * (10^6 / cpu_khz)
*
* Then we use scaling math (suggested by george@mvista.com) to get:
* ns = cycles * (10^6 * SC / cpu_khz) / SC
* ns = cycles * cyc2ns_scale / SC
*
* And since SC is a constant power of two, we can convert the div
* into a shift.
*
* We can use khz divisor instead of mhz to keep a better percision, since
* cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
* (mathieu.desnoyers@polymtl.ca)
*
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
extern unsigned long cyc2ns_scale __read_mostly;
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
{
return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
}
#endif #endif
...@@ -49,7 +49,7 @@ extern struct vmi_timer_ops { ...@@ -49,7 +49,7 @@ extern struct vmi_timer_ops {
extern void __init vmi_time_init(void); extern void __init vmi_time_init(void);
extern unsigned long vmi_get_wallclock(void); extern unsigned long vmi_get_wallclock(void);
extern int vmi_set_wallclock(unsigned long now); extern int vmi_set_wallclock(unsigned long now);
extern unsigned long long vmi_get_sched_cycles(void); extern unsigned long long vmi_sched_clock(void);
extern unsigned long vmi_cpu_khz(void); extern unsigned long vmi_cpu_khz(void);
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment