Commit 2d722f6d authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "The main changes:

   - load-calculation cleanups and improvements, by Alex Shi
   - various nohz related tidying up of statisics, by Frederic
     Weisbecker
   - factor out /proc functions to kernel/sched/proc.c, by Paul
     Gortmaker
   - simplify the RT policy scheduler, by Kirill Tkhai
   - various fixes and cleanups"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (42 commits)
  sched/debug: Remove CONFIG_FAIR_GROUP_SCHED mask
  sched/debug: Fix formatting of /proc/<PID>/sched
  sched: Fix typo in struct sched_avg member description
  sched/fair: Fix typo describing flags in enqueue_entity
  sched/debug: Add load-tracking statistics to task
  sched: Change get_rq_runnable_load() to static and inline
  sched/tg: Remove tg.load_weight
  sched/cfs_rq: Change atomic64_t removed_load to atomic_long_t
  sched/tg: Use 'unsigned long' for load variable in task group
  sched: Change cfs_rq load avg to unsigned long
  sched: Consider runnable load average in move_tasks()
  sched: Compute runnable load avg in cpu_load and cpu_avg_load_per_task
  sched: Update cpu load after task_tick
  sched: Fix sleep time double accounting in enqueue entity
  sched: Set an initial value of runnable avg for new forked task
  sched: Move a few runnable tg variables into CONFIG_SMP
  Revert "sched: Introduce temporary FAIR_GROUP_SCHED dependency for load-tracking"
  sched: Don't mix use of typedef ctl_table and struct ctl_table
  sched: Remove WARN_ON(!sd) from init_sched_groups_power()
  sched: Fix memory leakage in build_sched_groups()
  ...
parents f0bb4c0a 2fd1b487
......@@ -373,7 +373,7 @@ can become very uneven.
1.7 What is sched_load_balance ?
--------------------------------
The kernel scheduler (kernel/sched.c) automatically load balances
The kernel scheduler (kernel/sched/core.c) automatically load balances
tasks. If one CPU is underutilized, kernel code running on that
CPU will look for tasks on other more overloaded CPUs and move those
tasks to itself, within the constraints of such placement mechanisms
......
......@@ -384,7 +384,7 @@ priority back.
__rt_mutex_adjust_prio examines the result of rt_mutex_getprio, and if the
result does not equal the task's current priority, then rt_mutex_setprio
is called to adjust the priority of the task to the new priority.
Note that rt_mutex_setprio is defined in kernel/sched.c to implement the
Note that rt_mutex_setprio is defined in kernel/sched/core.c to implement the
actual change in priority.
It is interesting to note that __rt_mutex_adjust_prio can either increase
......
......@@ -25,7 +25,7 @@ is treated as one entity. The load of a group is defined as the sum of the
load of each of its member CPUs, and only when the load of a group becomes
out of balance are tasks moved between groups.
In kernel/sched.c, trigger_load_balance() is run periodically on each CPU
In kernel/sched/core.c, trigger_load_balance() is run periodically on each CPU
through scheduler_tick(). It raises a softirq after the next regularly scheduled
rebalancing event for the current runqueue has arrived. The actual load
balancing workhorse, run_rebalance_domains()->rebalance_domains(), is then run
......@@ -62,7 +62,7 @@ struct sched_domain fields, SD_FLAG_*, SD_*_INIT to get an idea of
the specifics and what to tune.
Architectures may retain the regular override the default SD_*_INIT flags
while using the generic domain builder in kernel/sched.c if they wish to
while using the generic domain builder in kernel/sched/core.c if they wish to
retain the traditional SMT->SMP->NUMA topology (or some subset of that). This
can be done by #define'ing ARCH_HASH_SCHED_TUNE.
......
......@@ -137,7 +137,7 @@ don't block on each other (and thus there is no dead-lock wrt interrupts.
But when you do the write-lock, you have to use the irq-safe version.
For an example of being clever with rw-locks, see the "waitqueue_lock"
handling in kernel/sched.c - nothing ever _changes_ a wait-queue from
handling in kernel/sched/core.c - nothing ever _changes_ a wait-queue from
within an interrupt, they only read the queue in order to know whom to
wake up. So read-locks are safe (which is good: they are very common
indeed), while write-locks need to protect themselves against interrupts.
......
......@@ -3127,7 +3127,7 @@
at process_kern.c:156
#3 0x1006a052 in switch_to (prev=0x50072000, next=0x507e8000, last=0x50072000)
at process_kern.c:161
#4 0x10001d12 in schedule () at sched.c:777
#4 0x10001d12 in schedule () at core.c:777
#5 0x1006a744 in __down (sem=0x507d241c) at semaphore.c:71
#6 0x1006aa10 in __down_failed () at semaphore.c:157
#7 0x1006c5d8 in segv_handler (sc=0x5006e940) at trap_user.c:174
......@@ -3191,7 +3191,7 @@
at process_kern.c:161
161 _switch_to(prev, next);
(gdb)
#4 0x10001d12 in schedule () at sched.c:777
#4 0x10001d12 in schedule () at core.c:777
777 switch_to(prev, next, prev);
(gdb)
#5 0x1006a744 in __down (sem=0x507d241c) at semaphore.c:71
......
......@@ -341,7 +341,7 @@ unsigned long get_wchan(struct task_struct *p)
* is actually quite ugly. It might be possible to
* determine the frame size automatically at build
* time by doing this:
* - compile sched.c
* - compile sched/core.c
* - disassemble the resulting sched.o
* - look for 'sub sp,??' shortly after '<schedule>:'
*/
......
......@@ -17,7 +17,7 @@ static inline unsigned long cris_swapnwbrlz(unsigned long w)
in another register:
! __asm__ ("swapnwbr %2\n\tlz %2,%0"
! : "=r,r" (res), "=r,X" (dummy) : "1,0" (w));
confuses gcc (sched.c, gcc from cris-dist-1.14). */
confuses gcc (core.c, gcc from cris-dist-1.14). */
unsigned long res;
__asm__ ("swapnwbr %0 \n\t"
......
......@@ -1035,7 +1035,7 @@ END(ia64_delay_loop)
* Return a CPU-local timestamp in nano-seconds. This timestamp is
* NOT synchronized across CPUs its return value must never be
* compared against the values returned on another CPU. The usage in
* kernel/sched.c ensures that.
* kernel/sched/core.c ensures that.
*
* The return-value of sched_clock() is NOT supposed to wrap-around.
* If it did, it would cause some scheduling hiccups (at the worst).
......
......@@ -27,12 +27,12 @@ unsigned long mt_fpemul_threshold;
* FPU affinity with the user's requested processor affinity.
* This code is 98% identical with the sys_sched_setaffinity()
* and sys_sched_getaffinity() system calls, and should be
* updated when kernel/sched.c changes.
* updated when kernel/sched/core.c changes.
*/
/*
* find_process_by_pid - find a process with a matching PID value.
* used in sys_sched_set/getaffinity() in kernel/sched.c, so
* used in sys_sched_set/getaffinity() in kernel/sched/core.c, so
* cloned here.
*/
static inline struct task_struct *find_process_by_pid(pid_t pid)
......
......@@ -476,8 +476,9 @@ einval: li v0, -ENOSYS
/*
* For FPU affinity scheduling on MIPS MT processors, we need to
* intercept sys_sched_xxxaffinity() calls until we get a proper hook
* in kernel/sched.c. Considered only temporary we only support these
* hooks for the 32-bit kernel - there is no MIPS64 MT processor atm.
* in kernel/sched/core.c. Considered only temporary we only support
* these hooks for the 32-bit kernel - there is no MIPS64 MT processor
* atm.
*/
sys mipsmt_sys_sched_setaffinity 3
sys mipsmt_sys_sched_getaffinity 3
......
......@@ -38,7 +38,7 @@ extern void drop_cop(unsigned long acop, struct mm_struct *mm);
/*
* switch_mm is the entry point called from the architecture independent
* code in kernel/sched.c
* code in kernel/sched/core.c
*/
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
......
......@@ -225,7 +225,7 @@ extern int do_work_pending(struct pt_regs *regs, u32 flags);
/*
* Return saved (kernel) PC of a blocked thread.
* Only used in a printk() in kernel/sched.c, so don't work too hard.
* Only used in a printk() in kernel/sched/core.c, so don't work too hard.
*/
#define thread_saved_pc(t) ((t)->thread.pc)
......
......@@ -442,7 +442,7 @@ void _KBacktraceIterator_init_current(struct KBacktraceIterator *kbt, ulong pc,
regs_to_pt_regs(&regs, pc, lr, sp, r52));
}
/* This is called only from kernel/sched.c, with esp == NULL */
/* This is called only from kernel/sched/core.c, with esp == NULL */
void show_stack(struct task_struct *task, unsigned long *esp)
{
struct KBacktraceIterator kbt;
......
......@@ -39,7 +39,7 @@ void show_trace(struct task_struct *task, unsigned long * stack)
static const int kstack_depth_to_print = 24;
/* This recently started being used in arch-independent code too, as in
* kernel/sched.c.*/
* kernel/sched/core.c.*/
void show_stack(struct task_struct *task, unsigned long *esp)
{
unsigned long *stack;
......
......@@ -5,7 +5,7 @@
* (C) Copyright 2001 Linus Torvalds
*
* Atomic wait-for-completion handler data structures.
* See kernel/sched.c for details.
* See kernel/sched/core.c for details.
*/
#include <linux/wait.h>
......
......@@ -822,7 +822,7 @@ static inline void perf_restore_debug_store(void) { }
#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x))
/*
* This has to have a higher priority than migration_notifier in sched.c.
* This has to have a higher priority than migration_notifier in sched/core.c.
*/
#define perf_cpu_notifier(fn) \
do { \
......
......@@ -924,7 +924,7 @@ struct load_weight {
struct sched_avg {
/*
* These sums represent an infinite geometric series and so are bound
* above by 1024/(1-y). Thus we only need a u32 to store them for for all
* above by 1024/(1-y). Thus we only need a u32 to store them for all
* choices of y < 1-2^(-32)*1024.
*/
u32 runnable_avg_sum, runnable_avg_period;
......@@ -994,12 +994,7 @@ struct sched_entity {
struct cfs_rq *my_q;
#endif
/*
* Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
* removed when useful for applications beyond shares distribution (e.g.
* load-balance).
*/
#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
#ifdef CONFIG_SMP
/* Per-entity load-tracking */
struct sched_avg avg;
#endif
......
......@@ -67,7 +67,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
#else /* DEBUG_SPINLOCK */
#define arch_spin_is_locked(lock) ((void)(lock), 0)
/* for sched.c and kernel_lock.c: */
/* for sched/core.c and kernel_lock.c: */
# define arch_spin_lock(lock) do { barrier(); (void)(lock); } while (0)
# define arch_spin_lock_flags(lock, flags) do { barrier(); (void)(lock); } while (0)
# define arch_spin_unlock(lock) do { barrier(); (void)(lock); } while (0)
......
......@@ -361,7 +361,7 @@ __SYSCALL(__NR_syslog, sys_syslog)
#define __NR_ptrace 117
__SYSCALL(__NR_ptrace, sys_ptrace)
/* kernel/sched.c */
/* kernel/sched/core.c */
#define __NR_sched_setparam 118
__SYSCALL(__NR_sched_setparam, sys_sched_setparam)
#define __NR_sched_setscheduler 119
......
......@@ -540,7 +540,7 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
* This function builds a partial partition of the systems CPUs
* A 'partial partition' is a set of non-overlapping subsets whose
* union is a subset of that set.
* The output of this function needs to be passed to kernel/sched.c
* The output of this function needs to be passed to kernel/sched/core.c
* partition_sched_domains() routine, which will rebuild the scheduler's
* load balancing domains (sched domains) as specified by that partial
* partition.
......@@ -569,7 +569,7 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
* is a subset of one of these domains, while there are as
* many such domains as possible, each as small as possible.
* doms - Conversion of 'csa' to an array of cpumasks, for passing to
* the kernel/sched.c routine partition_sched_domains() in a
* the kernel/sched/core.c routine partition_sched_domains() in a
* convenient format, that can be easily compared to the prior
* value to determine what partition elements (sched domains)
* were changed (added or removed.)
......
......@@ -11,7 +11,7 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
endif
obj-y += core.o clock.o cputime.o idle_task.o fair.o rt.o stop_task.o
obj-y += core.o proc.o clock.o cputime.o idle_task.o fair.o rt.o stop_task.o
obj-$(CONFIG_SMP) += cpupri.o
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
obj-$(CONFIG_SCHEDSTATS) += stats.o
......
......@@ -77,8 +77,6 @@ static inline struct autogroup *autogroup_create(void)
if (IS_ERR(tg))
goto out_free;
sched_online_group(tg, &root_task_group);
kref_init(&ag->kref);
init_rwsem(&ag->lock);
ag->id = atomic_inc_return(&autogroup_seq_nr);
......@@ -98,6 +96,7 @@ static inline struct autogroup *autogroup_create(void)
#endif
tg->autogroup = ag;
sched_online_group(tg, &root_task_group);
return ag;
out_free:
......
This diff is collapsed.
......@@ -515,9 +515,8 @@ static cputime_t scale_stime(u64 stime, u64 rtime, u64 total)
for (;;) {
/* Make sure "rtime" is the bigger of stime/rtime */
if (stime > rtime) {
u64 tmp = rtime; rtime = stime; stime = tmp;
}
if (stime > rtime)
swap(rtime, stime);
/* Make sure 'total' fits in 32 bits */
if (total >> 32)
......
......@@ -209,22 +209,24 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
cfs_rq->nr_spread_over);
SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
#ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_SMP
SEQ_printf(m, " .%-30s: %lld\n", "runnable_load_avg",
SEQ_printf(m, " .%-30s: %ld\n", "runnable_load_avg",
cfs_rq->runnable_load_avg);
SEQ_printf(m, " .%-30s: %lld\n", "blocked_load_avg",
SEQ_printf(m, " .%-30s: %ld\n", "blocked_load_avg",
cfs_rq->blocked_load_avg);
SEQ_printf(m, " .%-30s: %lld\n", "tg_load_avg",
(unsigned long long)atomic64_read(&cfs_rq->tg->load_avg));
SEQ_printf(m, " .%-30s: %lld\n", "tg_load_contrib",
#ifdef CONFIG_FAIR_GROUP_SCHED
SEQ_printf(m, " .%-30s: %ld\n", "tg_load_contrib",
cfs_rq->tg_load_contrib);
SEQ_printf(m, " .%-30s: %d\n", "tg_runnable_contrib",
cfs_rq->tg_runnable_contrib);
SEQ_printf(m, " .%-30s: %ld\n", "tg_load_avg",
atomic_long_read(&cfs_rq->tg->load_avg));
SEQ_printf(m, " .%-30s: %d\n", "tg->runnable_avg",
atomic_read(&cfs_rq->tg->runnable_avg));
#endif
#endif
#ifdef CONFIG_FAIR_GROUP_SCHED
print_cfs_group_stats(m, cpu, cfs_rq->tg);
#endif
}
......@@ -493,15 +495,16 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid,
get_nr_threads(p));
SEQ_printf(m,
"---------------------------------------------------------\n");
"---------------------------------------------------------"
"----------\n");
#define __P(F) \
SEQ_printf(m, "%-35s:%21Ld\n", #F, (long long)F)
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
#define P(F) \
SEQ_printf(m, "%-35s:%21Ld\n", #F, (long long)p->F)
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
#define __PN(F) \
SEQ_printf(m, "%-35s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
#define PN(F) \
SEQ_printf(m, "%-35s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
PN(se.exec_start);
PN(se.vruntime);
......@@ -560,12 +563,18 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
}
#endif
__P(nr_switches);
SEQ_printf(m, "%-35s:%21Ld\n",
SEQ_printf(m, "%-45s:%21Ld\n",
"nr_voluntary_switches", (long long)p->nvcsw);
SEQ_printf(m, "%-35s:%21Ld\n",
SEQ_printf(m, "%-45s:%21Ld\n",
"nr_involuntary_switches", (long long)p->nivcsw);
P(se.load.weight);
#ifdef CONFIG_SMP
P(se.avg.runnable_avg_sum);
P(se.avg.runnable_avg_period);
P(se.avg.load_avg_contrib);
P(se.avg.decay_count);
#endif
P(policy);
P(prio);
#undef PN
......@@ -579,7 +588,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
t0 = cpu_clock(this_cpu);
t1 = cpu_clock(this_cpu);
SEQ_printf(m, "%-35s:%21Ld\n",
SEQ_printf(m, "%-45s:%21Ld\n",
"clock-delta", (long long)(t1-t0));
}
}
......
This diff is collapsed.
This diff is collapsed.
......@@ -399,20 +399,6 @@ static inline struct task_group *next_task_group(struct task_group *tg)
(iter = next_task_group(iter)) && \
(rt_rq = iter->rt_rq[cpu_of(rq)]);)
static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
{
list_add_rcu(&rt_rq->leaf_rt_rq_list,
&rq_of_rt_rq(rt_rq)->leaf_rt_rq_list);
}
static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
{
list_del_rcu(&rt_rq->leaf_rt_rq_list);
}
#define for_each_leaf_rt_rq(rt_rq, rq) \
list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
#define for_each_sched_rt_entity(rt_se) \
for (; rt_se; rt_se = rt_se->parent)
......@@ -472,7 +458,7 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se)
#ifdef CONFIG_SMP
static inline const struct cpumask *sched_rt_period_mask(void)
{
return cpu_rq(smp_processor_id())->rd->span;
return this_rq()->rd->span;
}
#else
static inline const struct cpumask *sched_rt_period_mask(void)
......@@ -509,17 +495,6 @@ typedef struct rt_rq *rt_rq_iter_t;
#define for_each_rt_rq(rt_rq, iter, rq) \
for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
{
}
static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
{
}
#define for_each_leaf_rt_rq(rt_rq, rq) \
for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
#define for_each_sched_rt_entity(rt_se) \
for (; rt_se; rt_se = NULL)
......@@ -699,15 +674,6 @@ static void __disable_runtime(struct rq *rq)
}
}
static void disable_runtime(struct rq *rq)
{
unsigned long flags;
raw_spin_lock_irqsave(&rq->lock, flags);
__disable_runtime(rq);
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
static void __enable_runtime(struct rq *rq)
{
rt_rq_iter_t iter;
......@@ -732,37 +698,6 @@ static void __enable_runtime(struct rq *rq)
}
}
static void enable_runtime(struct rq *rq)
{
unsigned long flags;
raw_spin_lock_irqsave(&rq->lock, flags);
__enable_runtime(rq);
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu)
{
int cpu = (int)(long)hcpu;
switch (action) {
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
disable_runtime(cpu_rq(cpu));
return NOTIFY_OK;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
enable_runtime(cpu_rq(cpu));
return NOTIFY_OK;
default:
return NOTIFY_DONE;
}
}
static int balance_runtime(struct rt_rq *rt_rq)
{
int more = 0;
......@@ -926,7 +861,7 @@ static void update_curr_rt(struct rq *rq)
if (curr->sched_class != &rt_sched_class)
return;
delta_exec = rq->clock_task - curr->se.exec_start;
delta_exec = rq_clock_task(rq) - curr->se.exec_start;
if (unlikely((s64)delta_exec <= 0))
return;
......@@ -936,7 +871,7 @@ static void update_curr_rt(struct rq *rq)
curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);
curr->se.exec_start = rq->clock_task;
curr->se.exec_start = rq_clock_task(rq);
cpuacct_charge(curr, delta_exec);
sched_rt_avg_update(rq, delta_exec);
......@@ -1106,9 +1041,6 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
return;
if (!rt_rq->rt_nr_running)
list_add_leaf_rt_rq(rt_rq);
if (head)
list_add(&rt_se->run_list, queue);
else
......@@ -1128,8 +1060,6 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
__clear_bit(rt_se_prio(rt_se), array->bitmap);
dec_rt_tasks(rt_se, rt_rq);
if (!rt_rq->rt_nr_running)
list_del_leaf_rt_rq(rt_rq);
}
/*
......@@ -1385,7 +1315,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
} while (rt_rq);
p = rt_task_of(rt_se);
p->se.exec_start = rq->clock_task;
p->se.exec_start = rq_clock_task(rq);
return p;
}
......@@ -1434,42 +1364,24 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
return 0;
}
/* Return the second highest RT task, NULL otherwise */
static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
/*
* Return the highest pushable rq's task, which is suitable to be executed
* on the cpu, NULL otherwise
*/
static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
{
struct task_struct *next = NULL;
struct sched_rt_entity *rt_se;
struct rt_prio_array *array;
struct rt_rq *rt_rq;
int idx;
for_each_leaf_rt_rq(rt_rq, rq) {
array = &rt_rq->active;
idx = sched_find_first_bit(array->bitmap);
next_idx:
if (idx >= MAX_RT_PRIO)
continue;
if (next && next->prio <= idx)
continue;
list_for_each_entry(rt_se, array->queue + idx, run_list) {
struct plist_head *head = &rq->rt.pushable_tasks;
struct task_struct *p;
if (!rt_entity_is_task(rt_se))
continue;
if (!has_pushable_tasks(rq))
return NULL;
p = rt_task_of(rt_se);
if (pick_rt_task(rq, p, cpu)) {
next = p;
break;
}
}
if (!next) {
idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
goto next_idx;
}
plist_for_each_entry(p, head, pushable_tasks) {
if (pick_rt_task(rq, p, cpu))
return p;
}
return next;
return NULL;
}
static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
......@@ -1743,12 +1655,10 @@ static int pull_rt_task(struct rq *this_rq)
double_lock_balance(this_rq, src_rq);
/*
* Are there still pullable RT tasks?
* We can pull only a task, which is pushable
* on its rq, and no others.
*/
if (src_rq->rt.rt_nr_running <= 1)
goto skip;
p = pick_next_highest_task_rt(src_rq, this_cpu);
p = pick_highest_pushable_task(src_rq, this_cpu);
/*
* Do we have an RT task that preempts
......@@ -2037,7 +1947,7 @@ static void set_curr_task_rt(struct rq *rq)
{
struct task_struct *p = rq->curr;
p->se.exec_start = rq->clock_task;
p->se.exec_start = rq_clock_task(rq);
/* The running task is never eligible for pushing */
dequeue_pushable_task(rq, p);
......
......@@ -10,8 +10,16 @@
#include "cpupri.h"
#include "cpuacct.h"
struct rq;
extern __read_mostly int scheduler_running;
extern unsigned long calc_load_update;
extern atomic_long_t calc_load_tasks;
extern long calc_load_fold_active(struct rq *this_rq);
extern void update_cpu_load_active(struct rq *this_rq);
/*
* Convert user-nice values [ -20 ... 0 ... 19 ]
* to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
......@@ -140,10 +148,11 @@ struct task_group {
struct cfs_rq **cfs_rq;
unsigned long shares;
atomic_t load_weight;
atomic64_t load_avg;
#ifdef CONFIG_SMP
atomic_long_t load_avg;
atomic_t runnable_avg;
#endif
#endif
#ifdef CONFIG_RT_GROUP_SCHED
struct sched_rt_entity **rt_se;
......@@ -261,26 +270,21 @@ struct cfs_rq {
#endif
#ifdef CONFIG_SMP
/*
* Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
* removed when useful for applications beyond shares distribution (e.g.
* load-balance).
*/
#ifdef CONFIG_FAIR_GROUP_SCHED
/*
* CFS Load tracking
* Under CFS, load is tracked on a per-entity basis and aggregated up.
* This allows for the description of both thread and group usage (in
* the FAIR_GROUP_SCHED case).
*/
u64 runnable_load_avg, blocked_load_avg;
atomic64_t decay_counter, removed_load;
unsigned long runnable_load_avg, blocked_load_avg;
atomic64_t decay_counter;
u64 last_decay;
#endif /* CONFIG_FAIR_GROUP_SCHED */
/* These always depend on CONFIG_FAIR_GROUP_SCHED */
atomic_long_t removed_load;
#ifdef CONFIG_FAIR_GROUP_SCHED
/* Required to track per-cpu representation of a task_group */
u32 tg_runnable_contrib;
u64 tg_load_contrib;
unsigned long tg_load_contrib;
#endif /* CONFIG_FAIR_GROUP_SCHED */
/*
......@@ -353,7 +357,6 @@ struct rt_rq {
unsigned long rt_nr_boosted;
struct rq *rq;
struct list_head leaf_rt_rq_list;
struct task_group *tg;
#endif
};
......@@ -540,6 +543,16 @@ DECLARE_PER_CPU(struct rq, runqueues);
#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define raw_rq() (&__raw_get_cpu_var(runqueues))
static inline u64 rq_clock(struct rq *rq)
{
return rq->clock;
}
static inline u64 rq_clock_task(struct rq *rq)
{
return rq->clock_task;
}
#ifdef CONFIG_SMP
#define rcu_dereference_check_sched_domain(p) \
......@@ -884,24 +897,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
#define WF_FORK 0x02 /* child wakeup after fork */
#define WF_MIGRATED 0x4 /* internal use, task got migrated */
static inline void update_load_add(struct load_weight *lw, unsigned long inc)
{
lw->weight += inc;
lw->inv_weight = 0;
}
static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
{
lw->weight -= dec;
lw->inv_weight = 0;
}
static inline void update_load_set(struct load_weight *lw, unsigned long w)
{
lw->weight = w;
lw->inv_weight = 0;
}
/*
* To aid in avoiding the subversion of "niceness" due to uneven distribution
* of tasks with abnormal "nice" values across CPUs the contribution that
......@@ -1028,17 +1023,8 @@ extern void update_group_power(struct sched_domain *sd, int cpu);
extern void trigger_load_balance(struct rq *rq, int cpu);
extern void idle_balance(int this_cpu, struct rq *this_rq);
/*
* Only depends on SMP, FAIR_GROUP_SCHED may be removed when runnable_avg
* becomes useful in lb
*/
#if defined(CONFIG_FAIR_GROUP_SCHED)
extern void idle_enter_fair(struct rq *this_rq);
extern void idle_exit_fair(struct rq *this_rq);
#else
static inline void idle_enter_fair(struct rq *this_rq) {}
static inline void idle_exit_fair(struct rq *this_rq) {}
#endif
#else /* CONFIG_SMP */
......@@ -1051,7 +1037,6 @@ static inline void idle_balance(int cpu, struct rq *rq)
extern void sysrq_sched_debug_show(void);
extern void sched_init_granularity(void);
extern void update_max_interval(void);
extern int update_runtime(struct notifier_block *nfb, unsigned long action, void *hcpu);
extern void init_sched_rt_class(void);
extern void init_sched_fair_class(void);
......@@ -1063,6 +1048,8 @@ extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime
extern void update_idle_cpu_load(struct rq *this_rq);
extern void init_task_runnable_average(struct task_struct *p);
#ifdef CONFIG_PARAVIRT
static inline u64 steal_ticks(u64 steal)
{
......
......@@ -61,7 +61,7 @@ static inline void sched_info_reset_dequeued(struct task_struct *t)
*/
static inline void sched_info_dequeued(struct task_struct *t)
{
unsigned long long now = task_rq(t)->clock, delta = 0;
unsigned long long now = rq_clock(task_rq(t)), delta = 0;
if (unlikely(sched_info_on()))
if (t->sched_info.last_queued)
......@@ -79,7 +79,7 @@ static inline void sched_info_dequeued(struct task_struct *t)
*/
static void sched_info_arrive(struct task_struct *t)
{
unsigned long long now = task_rq(t)->clock, delta = 0;
unsigned long long now = rq_clock(task_rq(t)), delta = 0;
if (t->sched_info.last_queued)
delta = now - t->sched_info.last_queued;
......@@ -100,7 +100,7 @@ static inline void sched_info_queued(struct task_struct *t)
{
if (unlikely(sched_info_on()))
if (!t->sched_info.last_queued)
t->sched_info.last_queued = task_rq(t)->clock;
t->sched_info.last_queued = rq_clock(task_rq(t));
}
/*
......@@ -112,7 +112,7 @@ static inline void sched_info_queued(struct task_struct *t)
*/
static inline void sched_info_depart(struct task_struct *t)
{
unsigned long long delta = task_rq(t)->clock -
unsigned long long delta = rq_clock(task_rq(t)) -
t->sched_info.last_arrival;
rq_sched_info_depart(task_rq(t), delta);
......
......@@ -28,7 +28,7 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
struct task_struct *stop = rq->stop;
if (stop && stop->on_rq) {
stop->se.exec_start = rq->clock_task;
stop->se.exec_start = rq_clock_task(rq);
return stop;
}
......@@ -57,7 +57,7 @@ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
struct task_struct *curr = rq->curr;
u64 delta_exec;
delta_exec = rq->clock_task - curr->se.exec_start;
delta_exec = rq_clock_task(rq) - curr->se.exec_start;
if (unlikely((s64)delta_exec < 0))
delta_exec = 0;
......@@ -67,7 +67,7 @@ static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
curr->se.sum_exec_runtime += delta_exec;
account_group_exec_runtime(curr, delta_exec);
curr->se.exec_start = rq->clock_task;
curr->se.exec_start = rq_clock_task(rq);
cpuacct_charge(curr, delta_exec);
}
......@@ -79,7 +79,7 @@ static void set_curr_task_stop(struct rq *rq)
{
struct task_struct *stop = rq->stop;
stop->se.exec_start = rq->clock_task;
stop->se.exec_start = rq_clock_task(rq);
}
static void switched_to_stop(struct rq *rq, struct task_struct *p)
......
......@@ -11,7 +11,7 @@
* Modification history kernel/time.c
*
* 1993-09-02 Philip Gladstone
* Created file with time related functions from sched.c and adjtimex()
* Created file with time related functions from sched/core.c and adjtimex()
* 1993-10-08 Torsten Duwe
* adjtime interface update and CMOS clock write code
* 1995-08-13 Torsten Duwe
......
......@@ -64,7 +64,7 @@ static inline struct worker *current_wq_worker(void)
/*
* Scheduler hooks for concurrency managed workqueue. Only to be used from
* sched.c and workqueue.c.
* sched/core.c and workqueue.c.
*/
void wq_worker_waking_up(struct task_struct *task, int cpu);
struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment