Commit 60f2415e authored by Yafang Shao's avatar Yafang Shao Committed by Peter Zijlstra

sched: Make schedstats helpers independent of fair sched class

The original prototype of the schedstats helpers are

  update_stats_wait_*(struct cfs_rq *cfs_rq, struct sched_entity *se)

The cfs_rq in these helpers is used to get the rq_clock, and the se is
used to get the struct sched_statistics and the struct task_struct. In
order to make these helpers available by all sched classes, we can pass
the rq, sched_statistics and task_struct directly.

Then the new helpers are

  update_stats_wait_*(struct rq *rq, struct task_struct *p,
                      struct sched_statistics *stats)

which are independent of fair sched class.

To avoid vmlinux growing too large or introducing ovehead when
!schedstat_enabled(), some new helpers after schedstat_enabled() are also
introduced, Suggested by Mel. These helpers are in sched/stats.c,

  __update_stats_wait_*(struct rq *rq, struct task_struct *p,
                        struct sched_statistics *stats)

The size of vmlinux as follows,
                      Before          After
  Size of vmlinux     826308552       826304640
The size is a litte smaller as some functions are not inlined again after
the change.

I also compared the sched performance with 'perf bench sched pipe',
suggested by Mel. The result as followsi (in usecs/op),
                             Before                After
  kernel.sched_schedstats=0  5.2~5.4               5.2~5.4
  kernel.sched_schedstats=1  5.3~5.5               5.3~5.5

[These data is a little difference with the prev version, that is
because my old test machine is destroyed so I have to use a new
different test machine.]
Almost no difference.

No functional change.

[lkp@intel.com: reported build failure in prev version]
Signed-off-by: default avatarYafang Shao <laoar.shao@gmail.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarMel Gorman <mgorman@suse.de>
Link: https://lore.kernel.org/r/20210905143547.4668-4-laoar.shao@gmail.com
parent ceeadb83
...@@ -887,32 +887,27 @@ static void update_curr_fair(struct rq *rq) ...@@ -887,32 +887,27 @@ static void update_curr_fair(struct rq *rq)
} }
static inline void static inline void
update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) update_stats_wait_start_fair(struct cfs_rq *cfs_rq, struct sched_entity *se)
{ {
u64 wait_start, prev_wait_start;
struct sched_statistics *stats; struct sched_statistics *stats;
struct task_struct *p = NULL;
if (!schedstat_enabled()) if (!schedstat_enabled())
return; return;
stats = __schedstats_from_se(se); stats = __schedstats_from_se(se);
wait_start = rq_clock(rq_of(cfs_rq)); if (entity_is_task(se))
prev_wait_start = schedstat_val(stats->wait_start); p = task_of(se);
if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
likely(wait_start > prev_wait_start))
wait_start -= prev_wait_start;
__schedstat_set(stats->wait_start, wait_start); __update_stats_wait_start(rq_of(cfs_rq), p, stats);
} }
static inline void static inline void
update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) update_stats_wait_end_fair(struct cfs_rq *cfs_rq, struct sched_entity *se)
{ {
struct sched_statistics *stats; struct sched_statistics *stats;
struct task_struct *p = NULL; struct task_struct *p = NULL;
u64 delta;
if (!schedstat_enabled()) if (!schedstat_enabled())
return; return;
...@@ -928,105 +923,34 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) ...@@ -928,105 +923,34 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
if (unlikely(!schedstat_val(stats->wait_start))) if (unlikely(!schedstat_val(stats->wait_start)))
return; return;
delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(stats->wait_start); if (entity_is_task(se))
if (entity_is_task(se)) {
p = task_of(se); p = task_of(se);
if (task_on_rq_migrating(p)) {
/*
* Preserve migrating task's wait time so wait_start
* time stamp can be adjusted to accumulate wait time
* prior to migration.
*/
__schedstat_set(stats->wait_start, delta);
return;
}
trace_sched_stat_wait(p, delta);
}
__schedstat_set(stats->wait_max, __update_stats_wait_end(rq_of(cfs_rq), p, stats);
max(schedstat_val(stats->wait_max), delta));
__schedstat_inc(stats->wait_count);
__schedstat_add(stats->wait_sum, delta);
__schedstat_set(stats->wait_start, 0);
} }
static inline void static inline void
update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) update_stats_enqueue_sleeper_fair(struct cfs_rq *cfs_rq, struct sched_entity *se)
{ {
struct sched_statistics *stats; struct sched_statistics *stats;
struct task_struct *tsk = NULL; struct task_struct *tsk = NULL;
u64 sleep_start, block_start;
if (!schedstat_enabled()) if (!schedstat_enabled())
return; return;
stats = __schedstats_from_se(se); stats = __schedstats_from_se(se);
sleep_start = schedstat_val(stats->sleep_start);
block_start = schedstat_val(stats->block_start);
if (entity_is_task(se)) if (entity_is_task(se))
tsk = task_of(se); tsk = task_of(se);
if (sleep_start) { __update_stats_enqueue_sleeper(rq_of(cfs_rq), tsk, stats);
u64 delta = rq_clock(rq_of(cfs_rq)) - sleep_start;
if ((s64)delta < 0)
delta = 0;
if (unlikely(delta > schedstat_val(stats->sleep_max)))
__schedstat_set(stats->sleep_max, delta);
__schedstat_set(stats->sleep_start, 0);
__schedstat_add(stats->sum_sleep_runtime, delta);
if (tsk) {
account_scheduler_latency(tsk, delta >> 10, 1);
trace_sched_stat_sleep(tsk, delta);
}
}
if (block_start) {
u64 delta = rq_clock(rq_of(cfs_rq)) - block_start;
if ((s64)delta < 0)
delta = 0;
if (unlikely(delta > schedstat_val(stats->block_max)))
__schedstat_set(stats->block_max, delta);
__schedstat_set(stats->block_start, 0);
__schedstat_add(stats->sum_sleep_runtime, delta);
if (tsk) {
if (tsk->in_iowait) {
__schedstat_add(stats->iowait_sum, delta);
__schedstat_inc(stats->iowait_count);
trace_sched_stat_iowait(tsk, delta);
}
trace_sched_stat_blocked(tsk, delta);
/*
* Blocking time is in units of nanosecs, so shift by
* 20 to get a milliseconds-range estimation of the
* amount of time that the task spent sleeping:
*/
if (unlikely(prof_on == SLEEP_PROFILING)) {
profile_hits(SLEEP_PROFILING,
(void *)get_wchan(tsk),
delta >> 20);
}
account_scheduler_latency(tsk, delta >> 10, 0);
}
}
} }
/* /*
* Task is being enqueued - update stats: * Task is being enqueued - update stats:
*/ */
static inline void static inline void
update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) update_stats_enqueue_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{ {
if (!schedstat_enabled()) if (!schedstat_enabled())
return; return;
...@@ -1036,14 +960,14 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) ...@@ -1036,14 +960,14 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* a dequeue/enqueue event is a NOP) * a dequeue/enqueue event is a NOP)
*/ */
if (se != cfs_rq->curr) if (se != cfs_rq->curr)
update_stats_wait_start(cfs_rq, se); update_stats_wait_start_fair(cfs_rq, se);
if (flags & ENQUEUE_WAKEUP) if (flags & ENQUEUE_WAKEUP)
update_stats_enqueue_sleeper(cfs_rq, se); update_stats_enqueue_sleeper_fair(cfs_rq, se);
} }
static inline void static inline void
update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) update_stats_dequeue_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{ {
if (!schedstat_enabled()) if (!schedstat_enabled())
...@@ -1054,7 +978,7 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) ...@@ -1054,7 +978,7 @@ update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
* waiting task: * waiting task:
*/ */
if (se != cfs_rq->curr) if (se != cfs_rq->curr)
update_stats_wait_end(cfs_rq, se); update_stats_wait_end_fair(cfs_rq, se);
if ((flags & DEQUEUE_SLEEP) && entity_is_task(se)) { if ((flags & DEQUEUE_SLEEP) && entity_is_task(se)) {
struct task_struct *tsk = task_of(se); struct task_struct *tsk = task_of(se);
...@@ -4267,26 +4191,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) ...@@ -4267,26 +4191,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
static void check_enqueue_throttle(struct cfs_rq *cfs_rq); static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
static inline void check_schedstat_required(void)
{
#ifdef CONFIG_SCHEDSTATS
if (schedstat_enabled())
return;
/* Force schedstat enabled if a dependent tracepoint is active */
if (trace_sched_stat_wait_enabled() ||
trace_sched_stat_sleep_enabled() ||
trace_sched_stat_iowait_enabled() ||
trace_sched_stat_blocked_enabled() ||
trace_sched_stat_runtime_enabled()) {
printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
"stat_blocked and stat_runtime require the "
"kernel parameter schedstats=enable or "
"kernel.sched_schedstats=1\n");
}
#endif
}
static inline bool cfs_bandwidth_used(void); static inline bool cfs_bandwidth_used(void);
/* /*
...@@ -4360,7 +4264,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) ...@@ -4360,7 +4264,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
place_entity(cfs_rq, se, 0); place_entity(cfs_rq, se, 0);
check_schedstat_required(); check_schedstat_required();
update_stats_enqueue(cfs_rq, se, flags); update_stats_enqueue_fair(cfs_rq, se, flags);
check_spread(cfs_rq, se); check_spread(cfs_rq, se);
if (!curr) if (!curr)
__enqueue_entity(cfs_rq, se); __enqueue_entity(cfs_rq, se);
...@@ -4444,7 +4348,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) ...@@ -4444,7 +4348,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
update_load_avg(cfs_rq, se, UPDATE_TG); update_load_avg(cfs_rq, se, UPDATE_TG);
se_update_runnable(se); se_update_runnable(se);
update_stats_dequeue(cfs_rq, se, flags); update_stats_dequeue_fair(cfs_rq, se, flags);
clear_buddies(cfs_rq, se); clear_buddies(cfs_rq, se);
...@@ -4529,7 +4433,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) ...@@ -4529,7 +4433,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
* a CPU. So account for the time it spent waiting on the * a CPU. So account for the time it spent waiting on the
* runqueue. * runqueue.
*/ */
update_stats_wait_end(cfs_rq, se); update_stats_wait_end_fair(cfs_rq, se);
__dequeue_entity(cfs_rq, se); __dequeue_entity(cfs_rq, se);
update_load_avg(cfs_rq, se, UPDATE_TG); update_load_avg(cfs_rq, se, UPDATE_TG);
} }
...@@ -4631,7 +4535,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) ...@@ -4631,7 +4535,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
check_spread(cfs_rq, prev); check_spread(cfs_rq, prev);
if (prev->on_rq) { if (prev->on_rq) {
update_stats_wait_start(cfs_rq, prev); update_stats_wait_start_fair(cfs_rq, prev);
/* Put 'current' back into the tree. */ /* Put 'current' back into the tree. */
__enqueue_entity(cfs_rq, prev); __enqueue_entity(cfs_rq, prev);
/* in !on_rq case, update occurred at dequeue */ /* in !on_rq case, update occurred at dequeue */
......
...@@ -4,6 +4,109 @@ ...@@ -4,6 +4,109 @@
*/ */
#include "sched.h" #include "sched.h"
void __update_stats_wait_start(struct rq *rq, struct task_struct *p,
struct sched_statistics *stats)
{
u64 wait_start, prev_wait_start;
wait_start = rq_clock(rq);
prev_wait_start = schedstat_val(stats->wait_start);
if (p && likely(wait_start > prev_wait_start))
wait_start -= prev_wait_start;
__schedstat_set(stats->wait_start, wait_start);
}
void __update_stats_wait_end(struct rq *rq, struct task_struct *p,
struct sched_statistics *stats)
{
u64 delta = rq_clock(rq) - schedstat_val(stats->wait_start);
if (p) {
if (task_on_rq_migrating(p)) {
/*
* Preserve migrating task's wait time so wait_start
* time stamp can be adjusted to accumulate wait time
* prior to migration.
*/
__schedstat_set(stats->wait_start, delta);
return;
}
trace_sched_stat_wait(p, delta);
}
__schedstat_set(stats->wait_max,
max(schedstat_val(stats->wait_max), delta));
__schedstat_inc(stats->wait_count);
__schedstat_add(stats->wait_sum, delta);
__schedstat_set(stats->wait_start, 0);
}
void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p,
struct sched_statistics *stats)
{
u64 sleep_start, block_start;
sleep_start = schedstat_val(stats->sleep_start);
block_start = schedstat_val(stats->block_start);
if (sleep_start) {
u64 delta = rq_clock(rq) - sleep_start;
if ((s64)delta < 0)
delta = 0;
if (unlikely(delta > schedstat_val(stats->sleep_max)))
__schedstat_set(stats->sleep_max, delta);
__schedstat_set(stats->sleep_start, 0);
__schedstat_add(stats->sum_sleep_runtime, delta);
if (p) {
account_scheduler_latency(p, delta >> 10, 1);
trace_sched_stat_sleep(p, delta);
}
}
if (block_start) {
u64 delta = rq_clock(rq) - block_start;
if ((s64)delta < 0)
delta = 0;
if (unlikely(delta > schedstat_val(stats->block_max)))
__schedstat_set(stats->block_max, delta);
__schedstat_set(stats->block_start, 0);
__schedstat_add(stats->sum_sleep_runtime, delta);
if (p) {
if (p->in_iowait) {
__schedstat_add(stats->iowait_sum, delta);
__schedstat_inc(stats->iowait_count);
trace_sched_stat_iowait(p, delta);
}
trace_sched_stat_blocked(p, delta);
/*
* Blocking time is in units of nanosecs, so shift by
* 20 to get a milliseconds-range estimation of the
* amount of time that the task spent sleeping:
*/
if (unlikely(prof_on == SLEEP_PROFILING)) {
profile_hits(SLEEP_PROFILING,
(void *)get_wchan(p),
delta >> 20);
}
account_scheduler_latency(p, delta >> 10, 0);
}
}
}
/* /*
* Current schedstat API version. * Current schedstat API version.
* *
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHEDSTATS
extern struct static_key_false sched_schedstats;
/* /*
* Expects runqueue lock to be held for atomicity of update * Expects runqueue lock to be held for atomicity of update
*/ */
...@@ -40,6 +42,29 @@ rq_sched_info_dequeue(struct rq *rq, unsigned long long delta) ...@@ -40,6 +42,29 @@ rq_sched_info_dequeue(struct rq *rq, unsigned long long delta)
#define schedstat_val(var) (var) #define schedstat_val(var) (var)
#define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0) #define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0)
void __update_stats_wait_start(struct rq *rq, struct task_struct *p,
struct sched_statistics *stats);
void __update_stats_wait_end(struct rq *rq, struct task_struct *p,
struct sched_statistics *stats);
void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p,
struct sched_statistics *stats);
static inline void
check_schedstat_required(void)
{
if (schedstat_enabled())
return;
/* Force schedstat enabled if a dependent tracepoint is active */
if (trace_sched_stat_wait_enabled() ||
trace_sched_stat_sleep_enabled() ||
trace_sched_stat_iowait_enabled() ||
trace_sched_stat_blocked_enabled() ||
trace_sched_stat_runtime_enabled())
printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, stat_blocked and stat_runtime require the kernel parameter schedstats=enable or kernel.sched_schedstats=1\n");
}
#else /* !CONFIG_SCHEDSTATS: */ #else /* !CONFIG_SCHEDSTATS: */
static inline void rq_sched_info_arrive (struct rq *rq, unsigned long long delta) { } static inline void rq_sched_info_arrive (struct rq *rq, unsigned long long delta) { }
...@@ -55,6 +80,11 @@ static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delt ...@@ -55,6 +80,11 @@ static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delt
# define schedstat_val(var) 0 # define schedstat_val(var) 0
# define schedstat_val_or_zero(var) 0 # define schedstat_val_or_zero(var) 0
# define __update_stats_wait_start(rq, p, stats) do { } while (0)
# define __update_stats_wait_end(rq, p, stats) do { } while (0)
# define __update_stats_enqueue_sleeper(rq, p, stats) do { } while (0)
# define check_schedstat_required() do { } while (0)
#endif /* CONFIG_SCHEDSTATS */ #endif /* CONFIG_SCHEDSTATS */
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment