Commit 6e01f86f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'timers-core-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull timer and timekeeping updates from Thomas Gleixner:

 - Expose CLOCK_TAI to instrumentation to aid with TSN debugging.

 - Ensure that the clockevent is stopped when there is no timer armed to
   avoid pointless wakeups.

 - Make the sched clock frequency handling and rounding consistent.

 - Provide a better debugobject hint for delayed works. The timer
   callback is always the same, which makes it difficult to identify the
   underlying work. Use the work function as a hint instead.

 - Move the timer specific sysctl code into the timer subsystem.

 - The usual set of improvements and cleanups

* tag 'timers-core-2022-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  timers: Provide a better debugobjects hint for delayed works
  time/sched_clock: Fix formatting of frequency reporting code
  time/sched_clock: Use Hz as the unit for clock rate reporting below 4kHz
  time/sched_clock: Round the frequency reported to nearest rather than down
  timekeeping: Consolidate fast timekeeper
  timekeeping: Annotate ktime_get_boot_fast_ns() with data_race()
  timers/nohz: Switch to ONESHOT_STOPPED in the low-res handler when the tick is stopped
  timekeeping: Introduce fast accessor to clock tai
  tracing/timer: Add missing argument documentation of trace points
  clocksource: Replace cpumask_weight() with cpumask_empty()
  timers: Move timer sysctl into the timer code
  clockevents: Use dedicated list iterator variable
  timers: Simplify calc_index()
  timers: Initialize base::next_expiry_recalc in timers_prepare_cpu()
parents fcfde8a7 317f29c1
...@@ -132,6 +132,7 @@ Some additional variants exist for more specialized cases: ...@@ -132,6 +132,7 @@ Some additional variants exist for more specialized cases:
.. c:function:: u64 ktime_get_mono_fast_ns( void ) .. c:function:: u64 ktime_get_mono_fast_ns( void )
u64 ktime_get_raw_fast_ns( void ) u64 ktime_get_raw_fast_ns( void )
u64 ktime_get_boot_fast_ns( void ) u64 ktime_get_boot_fast_ns( void )
u64 ktime_get_tai_fast_ns( void )
u64 ktime_get_real_fast_ns( void ) u64 ktime_get_real_fast_ns( void )
These variants are safe to call from any context, including from These variants are safe to call from any context, including from
......
...@@ -177,6 +177,7 @@ static inline u64 ktime_get_raw_ns(void) ...@@ -177,6 +177,7 @@ static inline u64 ktime_get_raw_ns(void)
extern u64 ktime_get_mono_fast_ns(void); extern u64 ktime_get_mono_fast_ns(void);
extern u64 ktime_get_raw_fast_ns(void); extern u64 ktime_get_raw_fast_ns(void);
extern u64 ktime_get_boot_fast_ns(void); extern u64 ktime_get_boot_fast_ns(void);
extern u64 ktime_get_tai_fast_ns(void);
extern u64 ktime_get_real_fast_ns(void); extern u64 ktime_get_real_fast_ns(void);
/* /*
......
...@@ -196,14 +196,6 @@ extern void init_timers(void); ...@@ -196,14 +196,6 @@ extern void init_timers(void);
struct hrtimer; struct hrtimer;
extern enum hrtimer_restart it_real_fn(struct hrtimer *); extern enum hrtimer_restart it_real_fn(struct hrtimer *);
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
struct ctl_table;
extern unsigned int sysctl_timer_migration;
int timer_migration_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
#endif
unsigned long __round_jiffies(unsigned long j, int cpu); unsigned long __round_jiffies(unsigned long j, int cpu);
unsigned long __round_jiffies_relative(unsigned long j, int cpu); unsigned long __round_jiffies_relative(unsigned long j, int cpu);
unsigned long round_jiffies(unsigned long j); unsigned long round_jiffies(unsigned long j);
......
...@@ -48,6 +48,7 @@ DEFINE_EVENT(timer_class, timer_init, ...@@ -48,6 +48,7 @@ DEFINE_EVENT(timer_class, timer_init,
* timer_start - called when the timer is started * timer_start - called when the timer is started
* @timer: pointer to struct timer_list * @timer: pointer to struct timer_list
* @expires: the timers expiry time * @expires: the timers expiry time
* @flags: the timers flags
*/ */
TRACE_EVENT(timer_start, TRACE_EVENT(timer_start,
...@@ -84,6 +85,7 @@ TRACE_EVENT(timer_start, ...@@ -84,6 +85,7 @@ TRACE_EVENT(timer_start,
/** /**
* timer_expire_entry - called immediately before the timer callback * timer_expire_entry - called immediately before the timer callback
* @timer: pointer to struct timer_list * @timer: pointer to struct timer_list
* @baseclk: value of timer_base::clk when timer expires
* *
* Allows to determine the timer latency. * Allows to determine the timer latency.
*/ */
...@@ -191,6 +193,7 @@ TRACE_EVENT(hrtimer_init, ...@@ -191,6 +193,7 @@ TRACE_EVENT(hrtimer_init,
/** /**
* hrtimer_start - called when the hrtimer is started * hrtimer_start - called when the hrtimer is started
* @hrtimer: pointer to struct hrtimer * @hrtimer: pointer to struct hrtimer
* @mode: the hrtimers mode
*/ */
TRACE_EVENT(hrtimer_start, TRACE_EVENT(hrtimer_start,
......
...@@ -2288,17 +2288,6 @@ static struct ctl_table kern_table[] = { ...@@ -2288,17 +2288,6 @@ static struct ctl_table kern_table[] = {
.extra1 = SYSCTL_ZERO, .extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE, .extra2 = SYSCTL_ONE,
}, },
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
{
.procname = "timer_migration",
.data = &sysctl_timer_migration,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = timer_migration_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
#endif
#ifdef CONFIG_BPF_SYSCALL #ifdef CONFIG_BPF_SYSCALL
{ {
.procname = "unprivileged_bpf_disabled", .procname = "unprivileged_bpf_disabled",
......
...@@ -690,7 +690,7 @@ static ssize_t unbind_device_store(struct device *dev, ...@@ -690,7 +690,7 @@ static ssize_t unbind_device_store(struct device *dev,
{ {
char name[CS_NAME_LEN]; char name[CS_NAME_LEN];
ssize_t ret = sysfs_get_uname(buf, name, count); ssize_t ret = sysfs_get_uname(buf, name, count);
struct clock_event_device *ce; struct clock_event_device *ce = NULL, *iter;
if (ret < 0) if (ret < 0)
return ret; return ret;
...@@ -698,9 +698,10 @@ static ssize_t unbind_device_store(struct device *dev, ...@@ -698,9 +698,10 @@ static ssize_t unbind_device_store(struct device *dev,
ret = -ENODEV; ret = -ENODEV;
mutex_lock(&clockevents_mutex); mutex_lock(&clockevents_mutex);
raw_spin_lock_irq(&clockevents_lock); raw_spin_lock_irq(&clockevents_lock);
list_for_each_entry(ce, &clockevent_devices, list) { list_for_each_entry(iter, &clockevent_devices, list) {
if (!strcmp(ce->name, name)) { if (!strcmp(iter->name, name)) {
ret = __clockevents_try_unbind(ce, dev->id); ret = __clockevents_try_unbind(iter, dev->id);
ce = iter;
break; break;
} }
} }
......
...@@ -343,7 +343,7 @@ void clocksource_verify_percpu(struct clocksource *cs) ...@@ -343,7 +343,7 @@ void clocksource_verify_percpu(struct clocksource *cs)
cpus_read_lock(); cpus_read_lock();
preempt_disable(); preempt_disable();
clocksource_verify_choose_cpus(); clocksource_verify_choose_cpus();
if (cpumask_weight(&cpus_chosen) == 0) { if (cpumask_empty(&cpus_chosen)) {
preempt_enable(); preempt_enable();
cpus_read_unlock(); cpus_read_unlock();
pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name); pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <linux/jiffies.h> #include <linux/jiffies.h>
#include <linux/ktime.h> #include <linux/ktime.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/math.h>
#include <linux/moduleparam.h> #include <linux/moduleparam.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/clock.h> #include <linux/sched/clock.h>
...@@ -199,16 +200,14 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate) ...@@ -199,16 +200,14 @@ sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
r = rate; r = rate;
if (r >= 4000000) { if (r >= 4000000) {
r /= 1000000; r = DIV_ROUND_CLOSEST(r, 1000000);
r_unit = 'M'; r_unit = 'M';
} else { } else if (r >= 4000) {
if (r >= 1000) { r = DIV_ROUND_CLOSEST(r, 1000);
r /= 1000;
r_unit = 'k'; r_unit = 'k';
} else { } else {
r_unit = ' '; r_unit = ' ';
} }
}
/* Calculate the ns resolution of this counter */ /* Calculate the ns resolution of this counter */
res = cyc_to_ns(1ULL, new_mult, new_shift); res = cyc_to_ns(1ULL, new_mult, new_shift);
......
...@@ -928,6 +928,8 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu) ...@@ -928,6 +928,8 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
if (unlikely(expires == KTIME_MAX)) { if (unlikely(expires == KTIME_MAX)) {
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
hrtimer_cancel(&ts->sched_timer); hrtimer_cancel(&ts->sched_timer);
else
tick_program_event(KTIME_MAX, 1);
return; return;
} }
...@@ -1364,9 +1366,15 @@ static void tick_nohz_handler(struct clock_event_device *dev) ...@@ -1364,9 +1366,15 @@ static void tick_nohz_handler(struct clock_event_device *dev)
tick_sched_do_timer(ts, now); tick_sched_do_timer(ts, now);
tick_sched_handle(ts, regs); tick_sched_handle(ts, regs);
/* No need to reprogram if we are running tickless */ if (unlikely(ts->tick_stopped)) {
if (unlikely(ts->tick_stopped)) /*
* The clockevent device is not reprogrammed, so change the
* clock event device to ONESHOT_STOPPED to avoid spurious
* interrupts on devices which might not be truly one shot.
*/
tick_program_event(KTIME_MAX, 1);
return; return;
}
hrtimer_forward(&ts->sched_timer, now, TICK_NSEC); hrtimer_forward(&ts->sched_timer, now, TICK_NSEC);
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1); tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
......
...@@ -429,6 +429,14 @@ static void update_fast_timekeeper(const struct tk_read_base *tkr, ...@@ -429,6 +429,14 @@ static void update_fast_timekeeper(const struct tk_read_base *tkr,
memcpy(base + 1, base, sizeof(*base)); memcpy(base + 1, base, sizeof(*base));
} }
static __always_inline u64 fast_tk_get_delta_ns(struct tk_read_base *tkr)
{
u64 delta, cycles = tk_clock_read(tkr);
delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
return timekeeping_delta_to_ns(tkr, delta);
}
static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
{ {
struct tk_read_base *tkr; struct tk_read_base *tkr;
...@@ -439,12 +447,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) ...@@ -439,12 +447,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
seq = raw_read_seqcount_latch(&tkf->seq); seq = raw_read_seqcount_latch(&tkf->seq);
tkr = tkf->base + (seq & 0x01); tkr = tkf->base + (seq & 0x01);
now = ktime_to_ns(tkr->base); now = ktime_to_ns(tkr->base);
now += fast_tk_get_delta_ns(tkr);
now += timekeeping_delta_to_ns(tkr,
clocksource_delta(
tk_clock_read(tkr),
tkr->cycle_last,
tkr->mask));
} while (read_seqcount_latch_retry(&tkf->seq, seq)); } while (read_seqcount_latch_retry(&tkf->seq, seq));
return now; return now;
...@@ -528,10 +531,27 @@ u64 notrace ktime_get_boot_fast_ns(void) ...@@ -528,10 +531,27 @@ u64 notrace ktime_get_boot_fast_ns(void)
{ {
struct timekeeper *tk = &tk_core.timekeeper; struct timekeeper *tk = &tk_core.timekeeper;
return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot)); return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_boot)));
} }
EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
/**
* ktime_get_tai_fast_ns - NMI safe and fast access to tai clock.
*
* The same limitations as described for ktime_get_boot_fast_ns() apply. The
* mono time and the TAI offset are not read atomically which may yield wrong
* readouts. However, an update of the TAI offset is an rare event e.g., caused
* by settime or adjtimex with an offset. The user of this function has to deal
* with the possibility of wrong timestamps in post processing.
*/
u64 notrace ktime_get_tai_fast_ns(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
return (ktime_get_mono_fast_ns() + ktime_to_ns(data_race(tk->offs_tai)));
}
EXPORT_SYMBOL_GPL(ktime_get_tai_fast_ns);
static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono) static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
{ {
struct tk_read_base *tkr; struct tk_read_base *tkr;
...@@ -543,10 +563,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono) ...@@ -543,10 +563,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono)
tkr = tkf->base + (seq & 0x01); tkr = tkf->base + (seq & 0x01);
basem = ktime_to_ns(tkr->base); basem = ktime_to_ns(tkr->base);
baser = ktime_to_ns(tkr->base_real); baser = ktime_to_ns(tkr->base_real);
delta = fast_tk_get_delta_ns(tkr);
delta = timekeeping_delta_to_ns(tkr,
clocksource_delta(tk_clock_read(tkr),
tkr->cycle_last, tkr->mask));
} while (read_seqcount_latch_retry(&tkf->seq, seq)); } while (read_seqcount_latch_retry(&tkf->seq, seq));
if (mono) if (mono)
......
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/compat.h> #include <linux/compat.h>
#include <linux/random.h> #include <linux/random.h>
#include <linux/sysctl.h>
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include <asm/unistd.h> #include <asm/unistd.h>
...@@ -223,7 +224,7 @@ static void timer_update_keys(struct work_struct *work); ...@@ -223,7 +224,7 @@ static void timer_update_keys(struct work_struct *work);
static DECLARE_WORK(timer_update_work, timer_update_keys); static DECLARE_WORK(timer_update_work, timer_update_keys);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
unsigned int sysctl_timer_migration = 1; static unsigned int sysctl_timer_migration = 1;
DEFINE_STATIC_KEY_FALSE(timers_migration_enabled); DEFINE_STATIC_KEY_FALSE(timers_migration_enabled);
...@@ -234,34 +235,56 @@ static void timers_update_migration(void) ...@@ -234,34 +235,56 @@ static void timers_update_migration(void)
else else
static_branch_disable(&timers_migration_enabled); static_branch_disable(&timers_migration_enabled);
} }
#else
static inline void timers_update_migration(void) { }
#endif /* !CONFIG_SMP */
static void timer_update_keys(struct work_struct *work) #ifdef CONFIG_SYSCTL
static int timer_migration_handler(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{ {
int ret;
mutex_lock(&timer_keys_mutex); mutex_lock(&timer_keys_mutex);
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write)
timers_update_migration(); timers_update_migration();
static_branch_enable(&timers_nohz_active);
mutex_unlock(&timer_keys_mutex); mutex_unlock(&timer_keys_mutex);
return ret;
} }
void timers_update_nohz(void) static struct ctl_table timer_sysctl[] = {
{
.procname = "timer_migration",
.data = &sysctl_timer_migration,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = timer_migration_handler,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
{}
};
static int __init timer_sysctl_init(void)
{ {
schedule_work(&timer_update_work); register_sysctl("kernel", timer_sysctl);
return 0;
} }
device_initcall(timer_sysctl_init);
#endif /* CONFIG_SYSCTL */
#else /* CONFIG_SMP */
static inline void timers_update_migration(void) { }
#endif /* !CONFIG_SMP */
int timer_migration_handler(struct ctl_table *table, int write, static void timer_update_keys(struct work_struct *work)
void *buffer, size_t *lenp, loff_t *ppos)
{ {
int ret;
mutex_lock(&timer_keys_mutex); mutex_lock(&timer_keys_mutex);
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write)
timers_update_migration(); timers_update_migration();
static_branch_enable(&timers_nohz_active);
mutex_unlock(&timer_keys_mutex); mutex_unlock(&timer_keys_mutex);
return ret; }
void timers_update_nohz(void)
{
schedule_work(&timer_update_work);
} }
static inline bool is_timers_nohz_active(void) static inline bool is_timers_nohz_active(void)
...@@ -502,7 +525,7 @@ static inline unsigned calc_index(unsigned long expires, unsigned lvl, ...@@ -502,7 +525,7 @@ static inline unsigned calc_index(unsigned long expires, unsigned lvl,
* *
* Round up with level granularity to prevent this. * Round up with level granularity to prevent this.
*/ */
expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl); expires = (expires >> LVL_SHIFT(lvl)) + 1;
*bucket_expiry = expires << LVL_SHIFT(lvl); *bucket_expiry = expires << LVL_SHIFT(lvl);
return LVL_OFFS(lvl) + (expires & LVL_MASK); return LVL_OFFS(lvl) + (expires & LVL_MASK);
} }
...@@ -615,9 +638,39 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer ...@@ -615,9 +638,39 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer
static const struct debug_obj_descr timer_debug_descr; static const struct debug_obj_descr timer_debug_descr;
struct timer_hint {
void (*function)(struct timer_list *t);
long offset;
};
#define TIMER_HINT(fn, container, timr, hintfn) \
{ \
.function = fn, \
.offset = offsetof(container, hintfn) - \
offsetof(container, timr) \
}
static const struct timer_hint timer_hints[] = {
TIMER_HINT(delayed_work_timer_fn,
struct delayed_work, timer, work.func),
TIMER_HINT(kthread_delayed_work_timer_fn,
struct kthread_delayed_work, timer, work.func),
};
static void *timer_debug_hint(void *addr) static void *timer_debug_hint(void *addr)
{ {
return ((struct timer_list *) addr)->function; struct timer_list *timer = addr;
int i;
for (i = 0; i < ARRAY_SIZE(timer_hints); i++) {
if (timer_hints[i].function == timer->function) {
void (**fn)(void) = addr + timer_hints[i].offset;
return *fn;
}
}
return timer->function;
} }
static bool timer_is_static_object(void *addr) static bool timer_is_static_object(void *addr)
...@@ -1953,6 +2006,7 @@ int timers_prepare_cpu(unsigned int cpu) ...@@ -1953,6 +2006,7 @@ int timers_prepare_cpu(unsigned int cpu)
base = per_cpu_ptr(&timer_bases[b], cpu); base = per_cpu_ptr(&timer_bases[b], cpu);
base->clk = jiffies; base->clk = jiffies;
base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA;
base->next_expiry_recalc = false;
base->timers_pending = false; base->timers_pending = false;
base->is_idle = false; base->is_idle = false;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment