Commit 128c434a authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Thomas Gleixner:
 "This update provides:

   - make the scheduler clock switch to unstable mode smooth so the
     timestamps stay at microseconds granularity instead of switching to
     tick granularity.

   - unbreak perf test tsc by taking the new offset into account which
     was added in order to proveide better sched clock continuity

   - switching sched clock to unstable mode runs all clock related
     computations which affect the sched clock output itself from a work
     queue. In case of preemption sched clock uses half updated data and
     provides wrong timestamps. Keep the math in the protected context
     and delegate only the static key switch to workqueue context.

   - remove a duplicate header include"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/headers: Remove duplicate #include <linux/sched/debug.h> line
  sched/clock: Fix broken stable to unstable transfer
  sched/clock, x86/perf: Fix "perf test tsc"
  sched/clock: Fix clear_sched_clock_stable() preempt wobbly
parents 0a89b5eb 658b2995
...@@ -2256,6 +2256,7 @@ void arch_perf_update_userpage(struct perf_event *event, ...@@ -2256,6 +2256,7 @@ void arch_perf_update_userpage(struct perf_event *event,
struct perf_event_mmap_page *userpg, u64 now) struct perf_event_mmap_page *userpg, u64 now)
{ {
struct cyc2ns_data *data; struct cyc2ns_data *data;
u64 offset;
userpg->cap_user_time = 0; userpg->cap_user_time = 0;
userpg->cap_user_time_zero = 0; userpg->cap_user_time_zero = 0;
...@@ -2263,11 +2264,13 @@ void arch_perf_update_userpage(struct perf_event *event, ...@@ -2263,11 +2264,13 @@ void arch_perf_update_userpage(struct perf_event *event,
!!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED); !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED);
userpg->pmc_width = x86_pmu.cntval_bits; userpg->pmc_width = x86_pmu.cntval_bits;
if (!sched_clock_stable()) if (!using_native_sched_clock() || !sched_clock_stable())
return; return;
data = cyc2ns_read_begin(); data = cyc2ns_read_begin();
offset = data->cyc2ns_offset + __sched_clock_offset;
/* /*
* Internal timekeeping for enabled/running/stopped times * Internal timekeeping for enabled/running/stopped times
* is always in the local_clock domain. * is always in the local_clock domain.
...@@ -2275,7 +2278,7 @@ void arch_perf_update_userpage(struct perf_event *event, ...@@ -2275,7 +2278,7 @@ void arch_perf_update_userpage(struct perf_event *event,
userpg->cap_user_time = 1; userpg->cap_user_time = 1;
userpg->time_mult = data->cyc2ns_mul; userpg->time_mult = data->cyc2ns_mul;
userpg->time_shift = data->cyc2ns_shift; userpg->time_shift = data->cyc2ns_shift;
userpg->time_offset = data->cyc2ns_offset - now; userpg->time_offset = offset - now;
/* /*
* cap_user_time_zero doesn't make sense when we're using a different * cap_user_time_zero doesn't make sense when we're using a different
...@@ -2283,7 +2286,7 @@ void arch_perf_update_userpage(struct perf_event *event, ...@@ -2283,7 +2286,7 @@ void arch_perf_update_userpage(struct perf_event *event,
*/ */
if (!event->attr.use_clockid) { if (!event->attr.use_clockid) {
userpg->cap_user_time_zero = 1; userpg->cap_user_time_zero = 1;
userpg->time_zero = data->cyc2ns_offset; userpg->time_zero = offset;
} }
cyc2ns_read_end(data); cyc2ns_read_end(data);
......
...@@ -12,6 +12,8 @@ extern int recalibrate_cpu_khz(void); ...@@ -12,6 +12,8 @@ extern int recalibrate_cpu_khz(void);
extern int no_timer_check; extern int no_timer_check;
extern bool using_native_sched_clock(void);
/* /*
* We use the full linear equation: f(x) = a + b*x, in order to allow * We use the full linear equation: f(x) = a + b*x, in order to allow
* a continuous function in the face of dynamic freq changes. * a continuous function in the face of dynamic freq changes.
......
...@@ -328,7 +328,7 @@ unsigned long long sched_clock(void) ...@@ -328,7 +328,7 @@ unsigned long long sched_clock(void)
return paravirt_sched_clock(); return paravirt_sched_clock();
} }
static inline bool using_native_sched_clock(void) bool using_native_sched_clock(void)
{ {
return pv_time_ops.sched_clock == native_sched_clock; return pv_time_ops.sched_clock == native_sched_clock;
} }
...@@ -336,7 +336,7 @@ static inline bool using_native_sched_clock(void) ...@@ -336,7 +336,7 @@ static inline bool using_native_sched_clock(void)
unsigned long long unsigned long long
sched_clock(void) __attribute__((alias("native_sched_clock"))); sched_clock(void) __attribute__((alias("native_sched_clock")));
static inline bool using_native_sched_clock(void) { return true; } bool using_native_sched_clock(void) { return true; }
#endif #endif
int check_tsc_unstable(void) int check_tsc_unstable(void)
......
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/sched/debug.h> #include <linux/sched/debug.h>
#include <linux/sched/debug.h>
#include <linux/tty.h> #include <linux/tty.h>
#include <linux/tty_flip.h> #include <linux/tty_flip.h>
#include <linux/mm.h> #include <linux/mm.h>
......
...@@ -54,15 +54,16 @@ static inline u64 local_clock(void) ...@@ -54,15 +54,16 @@ static inline u64 local_clock(void)
} }
#else #else
extern void sched_clock_init_late(void); extern void sched_clock_init_late(void);
/*
* Architectures can set this to 1 if they have specified
* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
* but then during bootup it turns out that sched_clock()
* is reliable after all:
*/
extern int sched_clock_stable(void); extern int sched_clock_stable(void);
extern void clear_sched_clock_stable(void); extern void clear_sched_clock_stable(void);
/*
* When sched_clock_stable(), __sched_clock_offset provides the offset
* between local_clock() and sched_clock().
*/
extern u64 __sched_clock_offset;
extern void sched_clock_tick(void); extern void sched_clock_tick(void);
extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_sleep_event(void);
extern void sched_clock_idle_wakeup_event(u64 delta_ns); extern void sched_clock_idle_wakeup_event(u64 delta_ns);
......
...@@ -96,10 +96,10 @@ static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable); ...@@ -96,10 +96,10 @@ static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable);
static int __sched_clock_stable_early = 1; static int __sched_clock_stable_early = 1;
/* /*
* We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset * We want: ktime_get_ns() + __gtod_offset == sched_clock() + __sched_clock_offset
*/ */
static __read_mostly u64 raw_offset; __read_mostly u64 __sched_clock_offset;
static __read_mostly u64 gtod_offset; static __read_mostly u64 __gtod_offset;
struct sched_clock_data { struct sched_clock_data {
u64 tick_raw; u64 tick_raw;
...@@ -131,17 +131,24 @@ static void __set_sched_clock_stable(void) ...@@ -131,17 +131,24 @@ static void __set_sched_clock_stable(void)
/* /*
* Attempt to make the (initial) unstable->stable transition continuous. * Attempt to make the (initial) unstable->stable transition continuous.
*/ */
raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw); __sched_clock_offset = (scd->tick_gtod + __gtod_offset) - (scd->tick_raw);
printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n", printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n",
scd->tick_gtod, gtod_offset, scd->tick_gtod, __gtod_offset,
scd->tick_raw, raw_offset); scd->tick_raw, __sched_clock_offset);
static_branch_enable(&__sched_clock_stable); static_branch_enable(&__sched_clock_stable);
tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE);
} }
static void __clear_sched_clock_stable(struct work_struct *work) static void __sched_clock_work(struct work_struct *work)
{
static_branch_disable(&__sched_clock_stable);
}
static DECLARE_WORK(sched_clock_work, __sched_clock_work);
static void __clear_sched_clock_stable(void)
{ {
struct sched_clock_data *scd = this_scd(); struct sched_clock_data *scd = this_scd();
...@@ -154,17 +161,17 @@ static void __clear_sched_clock_stable(struct work_struct *work) ...@@ -154,17 +161,17 @@ static void __clear_sched_clock_stable(struct work_struct *work)
* *
* Still do what we can. * Still do what we can.
*/ */
gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod); __gtod_offset = (scd->tick_raw + __sched_clock_offset) - (scd->tick_gtod);
printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n", printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n",
scd->tick_gtod, gtod_offset, scd->tick_gtod, __gtod_offset,
scd->tick_raw, raw_offset); scd->tick_raw, __sched_clock_offset);
static_branch_disable(&__sched_clock_stable);
tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE); tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE);
}
static DECLARE_WORK(sched_clock_work, __clear_sched_clock_stable); if (sched_clock_stable())
schedule_work(&sched_clock_work);
}
void clear_sched_clock_stable(void) void clear_sched_clock_stable(void)
{ {
...@@ -173,7 +180,7 @@ void clear_sched_clock_stable(void) ...@@ -173,7 +180,7 @@ void clear_sched_clock_stable(void)
smp_mb(); /* matches sched_clock_init_late() */ smp_mb(); /* matches sched_clock_init_late() */
if (sched_clock_running == 2) if (sched_clock_running == 2)
schedule_work(&sched_clock_work); __clear_sched_clock_stable();
} }
void sched_clock_init_late(void) void sched_clock_init_late(void)
...@@ -214,7 +221,7 @@ static inline u64 wrap_max(u64 x, u64 y) ...@@ -214,7 +221,7 @@ static inline u64 wrap_max(u64 x, u64 y)
*/ */
static u64 sched_clock_local(struct sched_clock_data *scd) static u64 sched_clock_local(struct sched_clock_data *scd)
{ {
u64 now, clock, old_clock, min_clock, max_clock; u64 now, clock, old_clock, min_clock, max_clock, gtod;
s64 delta; s64 delta;
again: again:
...@@ -231,9 +238,10 @@ static u64 sched_clock_local(struct sched_clock_data *scd) ...@@ -231,9 +238,10 @@ static u64 sched_clock_local(struct sched_clock_data *scd)
* scd->tick_gtod + TICK_NSEC); * scd->tick_gtod + TICK_NSEC);
*/ */
clock = scd->tick_gtod + gtod_offset + delta; gtod = scd->tick_gtod + __gtod_offset;
min_clock = wrap_max(scd->tick_gtod, old_clock); clock = gtod + delta;
max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC); min_clock = wrap_max(gtod, old_clock);
max_clock = wrap_max(old_clock, gtod + TICK_NSEC);
clock = wrap_max(clock, min_clock); clock = wrap_max(clock, min_clock);
clock = wrap_min(clock, max_clock); clock = wrap_min(clock, max_clock);
...@@ -317,7 +325,7 @@ u64 sched_clock_cpu(int cpu) ...@@ -317,7 +325,7 @@ u64 sched_clock_cpu(int cpu)
u64 clock; u64 clock;
if (sched_clock_stable()) if (sched_clock_stable())
return sched_clock() + raw_offset; return sched_clock() + __sched_clock_offset;
if (unlikely(!sched_clock_running)) if (unlikely(!sched_clock_running))
return 0ull; return 0ull;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment