Commit 22a093b2 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler fixes from Ingo Molnar:
 "Debug info and other statistics fixes and related enhancements"

* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  sched/numa: Fix numa balancing stats in /proc/pid/sched
  sched/numa: Show numa_group ID in /proc/sched_debug task listings
  sched/debug: Move print_cfs_rq() declaration to kernel/sched/sched.h
  sched/stat: Expose /proc/pid/schedstat if CONFIG_SCHED_INFO=y
  sched/stat: Simplify the sched_info accounting dependency
parents c1776a18 397f2378
...@@ -491,14 +491,17 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, ...@@ -491,14 +491,17 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
} }
#endif #endif
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHED_INFO
/* /*
* Provides /proc/PID/schedstat * Provides /proc/PID/schedstat
*/ */
static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task) struct pid *pid, struct task_struct *task)
{ {
seq_printf(m, "%llu %llu %lu\n", if (unlikely(!sched_info_on()))
seq_printf(m, "0 0 0\n");
else
seq_printf(m, "%llu %llu %lu\n",
(unsigned long long)task->se.sum_exec_runtime, (unsigned long long)task->se.sum_exec_runtime,
(unsigned long long)task->sched_info.run_delay, (unsigned long long)task->sched_info.run_delay,
task->sched_info.pcount); task->sched_info.pcount);
...@@ -2787,7 +2790,7 @@ static const struct pid_entry tgid_base_stuff[] = { ...@@ -2787,7 +2790,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_STACKTRACE #ifdef CONFIG_STACKTRACE
ONE("stack", S_IRUSR, proc_pid_stack), ONE("stack", S_IRUSR, proc_pid_stack),
#endif #endif
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHED_INFO
ONE("schedstat", S_IRUGO, proc_pid_schedstat), ONE("schedstat", S_IRUGO, proc_pid_schedstat),
#endif #endif
#ifdef CONFIG_LATENCYTOP #ifdef CONFIG_LATENCYTOP
...@@ -3135,7 +3138,7 @@ static const struct pid_entry tid_base_stuff[] = { ...@@ -3135,7 +3138,7 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_STACKTRACE #ifdef CONFIG_STACKTRACE
ONE("stack", S_IRUSR, proc_pid_stack), ONE("stack", S_IRUSR, proc_pid_stack),
#endif #endif
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHED_INFO
ONE("schedstat", S_IRUGO, proc_pid_schedstat), ONE("schedstat", S_IRUGO, proc_pid_schedstat),
#endif #endif
#ifdef CONFIG_LATENCYTOP #ifdef CONFIG_LATENCYTOP
......
...@@ -192,8 +192,6 @@ struct task_group; ...@@ -192,8 +192,6 @@ struct task_group;
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
extern void proc_sched_set_task(struct task_struct *p); extern void proc_sched_set_task(struct task_struct *p);
extern void
print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
#endif #endif
/* /*
...@@ -838,7 +836,7 @@ extern struct user_struct root_user; ...@@ -838,7 +836,7 @@ extern struct user_struct root_user;
struct backing_dev_info; struct backing_dev_info;
struct reclaim_state; struct reclaim_state;
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) #ifdef CONFIG_SCHED_INFO
struct sched_info { struct sched_info {
/* cumulative counters */ /* cumulative counters */
unsigned long pcount; /* # of times run on this cpu */ unsigned long pcount; /* # of times run on this cpu */
...@@ -848,7 +846,7 @@ struct sched_info { ...@@ -848,7 +846,7 @@ struct sched_info {
unsigned long long last_arrival,/* when we last ran on a cpu */ unsigned long long last_arrival,/* when we last ran on a cpu */
last_queued; /* when we were last queued to run */ last_queued; /* when we were last queued to run */
}; };
#endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */ #endif /* CONFIG_SCHED_INFO */
#ifdef CONFIG_TASK_DELAY_ACCT #ifdef CONFIG_TASK_DELAY_ACCT
struct task_delay_info { struct task_delay_info {
...@@ -1397,7 +1395,7 @@ struct task_struct { ...@@ -1397,7 +1395,7 @@ struct task_struct {
int rcu_tasks_idle_cpu; int rcu_tasks_idle_cpu;
#endif /* #ifdef CONFIG_TASKS_RCU */ #endif /* #ifdef CONFIG_TASKS_RCU */
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) #ifdef CONFIG_SCHED_INFO
struct sched_info sched_info; struct sched_info sched_info;
#endif #endif
......
...@@ -435,6 +435,7 @@ config TASKSTATS ...@@ -435,6 +435,7 @@ config TASKSTATS
config TASK_DELAY_ACCT config TASK_DELAY_ACCT
bool "Enable per-task delay accounting" bool "Enable per-task delay accounting"
depends on TASKSTATS depends on TASKSTATS
select SCHED_INFO
help help
Collect information on time spent by a task waiting for system Collect information on time spent by a task waiting for system
resources like cpu, synchronous block I/O completion and swapping resources like cpu, synchronous block I/O completion and swapping
......
...@@ -2164,7 +2164,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) ...@@ -2164,7 +2164,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
set_task_cpu(p, cpu); set_task_cpu(p, cpu);
raw_spin_unlock_irqrestore(&p->pi_lock, flags); raw_spin_unlock_irqrestore(&p->pi_lock, flags);
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) #ifdef CONFIG_SCHED_INFO
if (likely(sched_info_on())) if (likely(sched_info_on()))
memset(&p->sched_info, 0, sizeof(p->sched_info)); memset(&p->sched_info, 0, sizeof(p->sched_info));
#endif #endif
......
...@@ -142,7 +142,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) ...@@ -142,7 +142,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
0LL, 0L); 0LL, 0L);
#endif #endif
#ifdef CONFIG_NUMA_BALANCING #ifdef CONFIG_NUMA_BALANCING
SEQ_printf(m, " %d", task_node(p)); SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p));
#endif #endif
#ifdef CONFIG_CGROUP_SCHED #ifdef CONFIG_CGROUP_SCHED
SEQ_printf(m, " %s", task_group_path(task_group(p))); SEQ_printf(m, " %s", task_group_path(task_group(p)));
...@@ -517,11 +517,21 @@ __initcall(init_sched_debug_procfs); ...@@ -517,11 +517,21 @@ __initcall(init_sched_debug_procfs);
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F)) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
#ifdef CONFIG_NUMA_BALANCING
void print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
unsigned long tpf, unsigned long gsf, unsigned long gpf)
{
SEQ_printf(m, "numa_faults node=%d ", node);
SEQ_printf(m, "task_private=%lu task_shared=%lu ", tsf, tpf);
SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gsf, gpf);
}
#endif
static void sched_show_numa(struct task_struct *p, struct seq_file *m) static void sched_show_numa(struct task_struct *p, struct seq_file *m)
{ {
#ifdef CONFIG_NUMA_BALANCING #ifdef CONFIG_NUMA_BALANCING
struct mempolicy *pol; struct mempolicy *pol;
int node, i;
if (p->mm) if (p->mm)
P(mm->numa_scan_seq); P(mm->numa_scan_seq);
...@@ -533,26 +543,12 @@ static void sched_show_numa(struct task_struct *p, struct seq_file *m) ...@@ -533,26 +543,12 @@ static void sched_show_numa(struct task_struct *p, struct seq_file *m)
mpol_get(pol); mpol_get(pol);
task_unlock(p); task_unlock(p);
SEQ_printf(m, "numa_migrations, %ld\n", xchg(&p->numa_pages_migrated, 0)); P(numa_pages_migrated);
P(numa_preferred_nid);
for_each_online_node(node) { P(total_numa_faults);
for (i = 0; i < 2; i++) { SEQ_printf(m, "current_node=%d, numa_group_id=%d\n",
unsigned long nr_faults = -1; task_node(p), task_numa_group_id(p));
int cpu_current, home_node; show_numa_stats(p, m);
if (p->numa_faults)
nr_faults = p->numa_faults[2*node + i];
cpu_current = !i ? (task_node(p) == node) :
(pol && node_isset(node, pol->v.nodes));
home_node = (p->numa_preferred_nid == node);
SEQ_printf(m, "numa_faults_memory, %d, %d, %d, %d, %ld\n",
i, node, cpu_current, home_node, nr_faults);
}
}
mpol_put(pol); mpol_put(pol);
#endif #endif
} }
......
...@@ -8473,7 +8473,27 @@ void print_cfs_stats(struct seq_file *m, int cpu) ...@@ -8473,7 +8473,27 @@ void print_cfs_stats(struct seq_file *m, int cpu)
print_cfs_rq(m, cpu, cfs_rq); print_cfs_rq(m, cpu, cfs_rq);
rcu_read_unlock(); rcu_read_unlock();
} }
#endif
#ifdef CONFIG_NUMA_BALANCING
void show_numa_stats(struct task_struct *p, struct seq_file *m)
{
int node;
unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0;
for_each_online_node(node) {
if (p->numa_faults) {
tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)];
tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)];
}
if (p->numa_group) {
gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)],
gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)];
}
print_numa_stats(m, node, tsf, tpf, gsf, gpf);
}
}
#endif /* CONFIG_NUMA_BALANCING */
#endif /* CONFIG_SCHED_DEBUG */
__init void init_sched_fair_class(void) __init void init_sched_fair_class(void)
{ {
......
...@@ -1689,9 +1689,22 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) ...@@ -1689,9 +1689,22 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq); extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq); extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
#ifdef CONFIG_SCHED_DEBUG
extern void print_cfs_stats(struct seq_file *m, int cpu); extern void print_cfs_stats(struct seq_file *m, int cpu);
extern void print_rt_stats(struct seq_file *m, int cpu); extern void print_rt_stats(struct seq_file *m, int cpu);
extern void print_dl_stats(struct seq_file *m, int cpu); extern void print_dl_stats(struct seq_file *m, int cpu);
extern void
print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
#ifdef CONFIG_NUMA_BALANCING
extern void
show_numa_stats(struct task_struct *p, struct seq_file *m);
extern void
print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
unsigned long tpf, unsigned long gsf, unsigned long gpf);
#endif /* CONFIG_NUMA_BALANCING */
#endif /* CONFIG_SCHED_DEBUG */
extern void init_cfs_rq(struct cfs_rq *cfs_rq); extern void init_cfs_rq(struct cfs_rq *cfs_rq);
extern void init_rt_rq(struct rt_rq *rt_rq); extern void init_rt_rq(struct rt_rq *rt_rq);
......
...@@ -47,7 +47,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta) ...@@ -47,7 +47,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta)
# define schedstat_set(var, val) do { } while (0) # define schedstat_set(var, val) do { } while (0)
#endif #endif
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) #ifdef CONFIG_SCHED_INFO
static inline void sched_info_reset_dequeued(struct task_struct *t) static inline void sched_info_reset_dequeued(struct task_struct *t)
{ {
t->sched_info.last_queued = 0; t->sched_info.last_queued = 0;
...@@ -156,7 +156,7 @@ sched_info_switch(struct rq *rq, ...@@ -156,7 +156,7 @@ sched_info_switch(struct rq *rq,
#define sched_info_depart(rq, t) do { } while (0) #define sched_info_depart(rq, t) do { } while (0)
#define sched_info_arrive(rq, next) do { } while (0) #define sched_info_arrive(rq, next) do { } while (0)
#define sched_info_switch(rq, t, next) do { } while (0) #define sched_info_switch(rq, t, next) do { } while (0)
#endif /* CONFIG_SCHEDSTATS || CONFIG_TASK_DELAY_ACCT */ #endif /* CONFIG_SCHED_INFO */
/* /*
* The following are functions that support scheduler-internal time accounting. * The following are functions that support scheduler-internal time accounting.
......
...@@ -841,9 +841,14 @@ config SCHED_DEBUG ...@@ -841,9 +841,14 @@ config SCHED_DEBUG
that can help debug the scheduler. The runtime overhead of this that can help debug the scheduler. The runtime overhead of this
option is minimal. option is minimal.
config SCHED_INFO
bool
default n
config SCHEDSTATS config SCHEDSTATS
bool "Collect scheduler statistics" bool "Collect scheduler statistics"
depends on DEBUG_KERNEL && PROC_FS depends on DEBUG_KERNEL && PROC_FS
select SCHED_INFO
help help
If you say Y here, additional code will be inserted into the If you say Y here, additional code will be inserted into the
scheduler and related routines to collect statistics about scheduler and related routines to collect statistics about
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment