Commit af345201 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "The main changes in this cycle were:

   - tickless load average calculation enhancements (Byungchul Park)

   - vtime handling enhancements (Frederic Weisbecker)

   - scalability improvement via properly aligning a key structure field
     (Jiri Olsa)

   - various stop_machine() fixes (Oleg Nesterov)

   - sched/numa enhancement (Rik van Riel)

   - various fixes and improvements (Andi Kleen, Dietmar Eggemann,
     Geliang Tang, Hiroshi Shimamoto, Joonwoo Park, Peter Zijlstra,
     Waiman Long, Wanpeng Li, Yuyang Du)"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (32 commits)
  sched/fair: Fix new task's load avg removed from source CPU in wake_up_new_task()
  sched/core: Move sched_entity::avg into separate cache line
  x86/fpu: Properly align size in CHECK_MEMBER_AT_END_OF() macro
  sched/deadline: Fix the earliest_dl.next logic
  sched/fair: Disable the task group load_avg update for the root_task_group
  sched/fair: Move the cache-hot 'load_avg' variable into its own cacheline
  sched/fair: Avoid redundant idle_cpu() call in update_sg_lb_stats()
  sched/core: Move the sched_to_prio[] arrays out of line
  sched/cputime: Convert vtime_seqlock to seqcount
  sched/cputime: Introduce vtime accounting check for readers
  sched/cputime: Rename vtime_accounting_enabled() to vtime_accounting_cpu_enabled()
  sched/cputime: Correctly handle task guest time on housekeepers
  sched/cputime: Clarify vtime symbols and document them
  sched/cputime: Remove extra cost in task_cputime()
  sched/fair: Make it possible to account fair load avg consistently
  sched/fair: Modify the comment about lock assumptions in migrate_task_rq_fair()
  stop_machine: Clean up the usage of the preemption counter in cpu_stopper_thread()
  stop_machine: Shift the 'done != NULL' check from cpu_stop_signal_done() to callers
  stop_machine: Kill cpu_stop_done->executed
  stop_machine: Change __stop_cpus() to rely on cpu_stop_queue_work()
  ...
parents 4bd20db2 0905f04e
...@@ -143,9 +143,18 @@ static void __init fpu__init_system_generic(void) ...@@ -143,9 +143,18 @@ static void __init fpu__init_system_generic(void)
unsigned int xstate_size; unsigned int xstate_size;
EXPORT_SYMBOL_GPL(xstate_size); EXPORT_SYMBOL_GPL(xstate_size);
/* Enforce that 'MEMBER' is the last field of 'TYPE': */ /* Get alignment of the TYPE. */
#define TYPE_ALIGN(TYPE) offsetof(struct { char x; TYPE test; }, test)
/*
* Enforce that 'MEMBER' is the last field of 'TYPE'.
*
* Align the computed size with alignment of the TYPE,
* because that's how C aligns structs.
*/
#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \ #define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \
BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER)) BUILD_BUG_ON(sizeof(TYPE) != ALIGN(offsetofend(TYPE, MEMBER), \
TYPE_ALIGN(TYPE)))
/* /*
* We append the 'struct fpu' to the task_struct: * We append the 'struct fpu' to the task_struct:
......
...@@ -86,7 +86,7 @@ static inline void context_tracking_init(void) { } ...@@ -86,7 +86,7 @@ static inline void context_tracking_init(void) { }
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
static inline void guest_enter(void) static inline void guest_enter(void)
{ {
if (vtime_accounting_enabled()) if (vtime_accounting_cpu_enabled())
vtime_guest_enter(current); vtime_guest_enter(current);
else else
current->flags |= PF_VCPU; current->flags |= PF_VCPU;
...@@ -100,7 +100,7 @@ static inline void guest_exit(void) ...@@ -100,7 +100,7 @@ static inline void guest_exit(void)
if (context_tracking_is_enabled()) if (context_tracking_is_enabled())
__context_tracking_exit(CONTEXT_GUEST); __context_tracking_exit(CONTEXT_GUEST);
if (vtime_accounting_enabled()) if (vtime_accounting_cpu_enabled())
vtime_guest_exit(current); vtime_guest_exit(current);
else else
current->flags &= ~PF_VCPU; current->flags &= ~PF_VCPU;
......
...@@ -150,7 +150,7 @@ extern struct task_group root_task_group; ...@@ -150,7 +150,7 @@ extern struct task_group root_task_group;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
# define INIT_VTIME(tsk) \ # define INIT_VTIME(tsk) \
.vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \ .vtime_seqcount = SEQCNT_ZERO(tsk.vtime_seqcount), \
.vtime_snap = 0, \ .vtime_snap = 0, \
.vtime_snap_whence = VTIME_SYS, .vtime_snap_whence = VTIME_SYS,
#else #else
......
...@@ -177,9 +177,9 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); ...@@ -177,9 +177,9 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
extern void calc_global_load(unsigned long ticks); extern void calc_global_load(unsigned long ticks);
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
extern void update_cpu_load_nohz(void); extern void update_cpu_load_nohz(int active);
#else #else
static inline void update_cpu_load_nohz(void) { } static inline void update_cpu_load_nohz(int active) { }
#endif #endif
extern unsigned long get_parent_ip(unsigned long addr); extern unsigned long get_parent_ip(unsigned long addr);
...@@ -1268,8 +1268,13 @@ struct sched_entity { ...@@ -1268,8 +1268,13 @@ struct sched_entity {
#endif #endif
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* Per entity load average tracking */ /*
struct sched_avg avg; * Per entity load average tracking.
*
* Put into separate cache line so it does not
* collide with read-mostly values above.
*/
struct sched_avg avg ____cacheline_aligned_in_smp;
#endif #endif
}; };
...@@ -1520,11 +1525,14 @@ struct task_struct { ...@@ -1520,11 +1525,14 @@ struct task_struct {
cputime_t gtime; cputime_t gtime;
struct prev_cputime prev_cputime; struct prev_cputime prev_cputime;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
seqlock_t vtime_seqlock; seqcount_t vtime_seqcount;
unsigned long long vtime_snap; unsigned long long vtime_snap;
enum { enum {
VTIME_SLEEPING = 0, /* Task is sleeping or running in a CPU with VTIME inactive */
VTIME_INACTIVE = 0,
/* Task runs in userspace in a CPU with VTIME active */
VTIME_USER, VTIME_USER,
/* Task runs in kernelspace in a CPU with VTIME active */
VTIME_SYS, VTIME_SYS,
} vtime_snap_whence; } vtime_snap_whence;
#endif #endif
......
...@@ -29,7 +29,7 @@ struct cpu_stop_work { ...@@ -29,7 +29,7 @@ struct cpu_stop_work {
int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg); int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg); int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *arg);
void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
struct cpu_stop_work *work_buf); struct cpu_stop_work *work_buf);
int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
...@@ -65,7 +65,7 @@ static void stop_one_cpu_nowait_workfn(struct work_struct *work) ...@@ -65,7 +65,7 @@ static void stop_one_cpu_nowait_workfn(struct work_struct *work)
preempt_enable(); preempt_enable();
} }
static inline void stop_one_cpu_nowait(unsigned int cpu, static inline bool stop_one_cpu_nowait(unsigned int cpu,
cpu_stop_fn_t fn, void *arg, cpu_stop_fn_t fn, void *arg,
struct cpu_stop_work *work_buf) struct cpu_stop_work *work_buf)
{ {
...@@ -74,7 +74,10 @@ static inline void stop_one_cpu_nowait(unsigned int cpu, ...@@ -74,7 +74,10 @@ static inline void stop_one_cpu_nowait(unsigned int cpu,
work_buf->fn = fn; work_buf->fn = fn;
work_buf->arg = arg; work_buf->arg = arg;
schedule_work(&work_buf->work); schedule_work(&work_buf->work);
return true;
} }
return false;
} }
static inline int stop_cpus(const struct cpumask *cpumask, static inline int stop_cpus(const struct cpumask *cpumask,
......
...@@ -10,16 +10,27 @@ ...@@ -10,16 +10,27 @@
struct task_struct; struct task_struct;
/* /*
* vtime_accounting_enabled() definitions/declarations * vtime_accounting_cpu_enabled() definitions/declarations
*/ */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static inline bool vtime_accounting_enabled(void) { return true; } static inline bool vtime_accounting_cpu_enabled(void) { return true; }
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
/*
* Checks if vtime is enabled on some CPU. Cputime readers want to be careful
* in that case and compute the tickless cputime.
* For now vtime state is tied to context tracking. We might want to decouple
* those later if necessary.
*/
static inline bool vtime_accounting_enabled(void) static inline bool vtime_accounting_enabled(void)
{ {
if (context_tracking_is_enabled()) { return context_tracking_is_enabled();
}
static inline bool vtime_accounting_cpu_enabled(void)
{
if (vtime_accounting_enabled()) {
if (context_tracking_cpu_is_enabled()) if (context_tracking_cpu_is_enabled())
return true; return true;
} }
...@@ -29,7 +40,7 @@ static inline bool vtime_accounting_enabled(void) ...@@ -29,7 +40,7 @@ static inline bool vtime_accounting_enabled(void)
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
#ifndef CONFIG_VIRT_CPU_ACCOUNTING #ifndef CONFIG_VIRT_CPU_ACCOUNTING
static inline bool vtime_accounting_enabled(void) { return false; } static inline bool vtime_accounting_cpu_enabled(void) { return false; }
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
...@@ -44,7 +55,7 @@ extern void vtime_task_switch(struct task_struct *prev); ...@@ -44,7 +55,7 @@ extern void vtime_task_switch(struct task_struct *prev);
extern void vtime_common_task_switch(struct task_struct *prev); extern void vtime_common_task_switch(struct task_struct *prev);
static inline void vtime_task_switch(struct task_struct *prev) static inline void vtime_task_switch(struct task_struct *prev)
{ {
if (vtime_accounting_enabled()) if (vtime_accounting_cpu_enabled())
vtime_common_task_switch(prev); vtime_common_task_switch(prev);
} }
#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */ #endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
...@@ -59,7 +70,7 @@ extern void vtime_account_irq_enter(struct task_struct *tsk); ...@@ -59,7 +70,7 @@ extern void vtime_account_irq_enter(struct task_struct *tsk);
extern void vtime_common_account_irq_enter(struct task_struct *tsk); extern void vtime_common_account_irq_enter(struct task_struct *tsk);
static inline void vtime_account_irq_enter(struct task_struct *tsk) static inline void vtime_account_irq_enter(struct task_struct *tsk)
{ {
if (vtime_accounting_enabled()) if (vtime_accounting_cpu_enabled())
vtime_common_account_irq_enter(tsk); vtime_common_account_irq_enter(tsk);
} }
#endif /* __ARCH_HAS_VTIME_ACCOUNT */ #endif /* __ARCH_HAS_VTIME_ACCOUNT */
...@@ -78,7 +89,7 @@ extern void vtime_gen_account_irq_exit(struct task_struct *tsk); ...@@ -78,7 +89,7 @@ extern void vtime_gen_account_irq_exit(struct task_struct *tsk);
static inline void vtime_account_irq_exit(struct task_struct *tsk) static inline void vtime_account_irq_exit(struct task_struct *tsk)
{ {
if (vtime_accounting_enabled()) if (vtime_accounting_cpu_enabled())
vtime_gen_account_irq_exit(tsk); vtime_gen_account_irq_exit(tsk);
} }
......
...@@ -102,6 +102,36 @@ init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func) ...@@ -102,6 +102,36 @@ init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func)
q->func = func; q->func = func;
} }
/**
* waitqueue_active -- locklessly test for waiters on the queue
* @q: the waitqueue to test for waiters
*
* returns true if the wait list is not empty
*
* NOTE: this function is lockless and requires care, incorrect usage _will_
* lead to sporadic and non-obvious failure.
*
* Use either while holding wait_queue_head_t::lock or when used for wakeups
* with an extra smp_mb() like:
*
* CPU0 - waker CPU1 - waiter
*
* for (;;) {
* @cond = true; prepare_to_wait(&wq, &wait, state);
* smp_mb(); // smp_mb() from set_current_state()
* if (waitqueue_active(wq)) if (@cond)
* wake_up(wq); break;
* schedule();
* }
* finish_wait(&wq, &wait);
*
* Because without the explicit smp_mb() it's possible for the
* waitqueue_active() load to get hoisted over the @cond store such that we'll
* observe an empty wait list while the waiter might not observe @cond.
*
* Also note that this 'optimization' trades a spin_lock() for an smp_mb(),
* which (when the lock is uncontended) are of roughly equal cost.
*/
static inline int waitqueue_active(wait_queue_head_t *q) static inline int waitqueue_active(wait_queue_head_t *q)
{ {
return !list_empty(&q->task_list); return !list_empty(&q->task_list);
......
...@@ -1349,9 +1349,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, ...@@ -1349,9 +1349,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
prev_cputime_init(&p->prev_cputime); prev_cputime_init(&p->prev_cputime);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
seqlock_init(&p->vtime_seqlock); seqcount_init(&p->vtime_seqcount);
p->vtime_snap = 0; p->vtime_snap = 0;
p->vtime_snap_whence = VTIME_SLEEPING; p->vtime_snap_whence = VTIME_INACTIVE;
#endif #endif
#if defined(SPLIT_RSS_COUNTING) #if defined(SPLIT_RSS_COUNTING)
......
...@@ -212,7 +212,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) ...@@ -212,7 +212,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
ag = autogroup_task_get(p); ag = autogroup_task_get(p);
down_write(&ag->lock); down_write(&ag->lock);
err = sched_group_set_shares(ag->tg, prio_to_weight[nice + 20]); err = sched_group_set_shares(ag->tg, sched_prio_to_weight[nice + 20]);
if (!err) if (!err)
ag->nice = nice; ag->nice = nice;
up_write(&ag->lock); up_write(&ag->lock);
......
...@@ -731,7 +731,7 @@ bool sched_can_stop_tick(void) ...@@ -731,7 +731,7 @@ bool sched_can_stop_tick(void)
if (current->policy == SCHED_RR) { if (current->policy == SCHED_RR) {
struct sched_rt_entity *rt_se = &current->rt; struct sched_rt_entity *rt_se = &current->rt;
return rt_se->run_list.prev == rt_se->run_list.next; return list_is_singular(&rt_se->run_list);
} }
/* /*
...@@ -823,8 +823,8 @@ static void set_load_weight(struct task_struct *p) ...@@ -823,8 +823,8 @@ static void set_load_weight(struct task_struct *p)
return; return;
} }
load->weight = scale_load(prio_to_weight[prio]); load->weight = scale_load(sched_prio_to_weight[prio]);
load->inv_weight = prio_to_wmult[prio]; load->inv_weight = sched_prio_to_wmult[prio];
} }
static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
...@@ -1071,8 +1071,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new ...@@ -1071,8 +1071,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new
{ {
lockdep_assert_held(&rq->lock); lockdep_assert_held(&rq->lock);
dequeue_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_MIGRATING; p->on_rq = TASK_ON_RQ_MIGRATING;
dequeue_task(rq, p, 0);
set_task_cpu(p, new_cpu); set_task_cpu(p, new_cpu);
raw_spin_unlock(&rq->lock); raw_spin_unlock(&rq->lock);
...@@ -1080,8 +1080,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new ...@@ -1080,8 +1080,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new
raw_spin_lock(&rq->lock); raw_spin_lock(&rq->lock);
BUG_ON(task_cpu(p) != new_cpu); BUG_ON(task_cpu(p) != new_cpu);
p->on_rq = TASK_ON_RQ_QUEUED;
enqueue_task(rq, p, 0); enqueue_task(rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
check_preempt_curr(rq, p, 0); check_preempt_curr(rq, p, 0);
return rq; return rq;
...@@ -1274,6 +1274,15 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) ...@@ -1274,6 +1274,15 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
!p->on_rq); !p->on_rq);
/*
* Migrating fair class task must have p->on_rq = TASK_ON_RQ_MIGRATING,
* because schedstat_wait_{start,end} rebase migrating task's wait_start
* time relying on p->on_rq.
*/
WARN_ON_ONCE(p->state == TASK_RUNNING &&
p->sched_class == &fair_sched_class &&
(p->on_rq && !task_on_rq_migrating(p)));
#ifdef CONFIG_LOCKDEP #ifdef CONFIG_LOCKDEP
/* /*
* The caller should hold either p->pi_lock or rq->lock, when changing * The caller should hold either p->pi_lock or rq->lock, when changing
...@@ -1310,9 +1319,11 @@ static void __migrate_swap_task(struct task_struct *p, int cpu) ...@@ -1310,9 +1319,11 @@ static void __migrate_swap_task(struct task_struct *p, int cpu)
src_rq = task_rq(p); src_rq = task_rq(p);
dst_rq = cpu_rq(cpu); dst_rq = cpu_rq(cpu);
p->on_rq = TASK_ON_RQ_MIGRATING;
deactivate_task(src_rq, p, 0); deactivate_task(src_rq, p, 0);
set_task_cpu(p, cpu); set_task_cpu(p, cpu);
activate_task(dst_rq, p, 0); activate_task(dst_rq, p, 0);
p->on_rq = TASK_ON_RQ_QUEUED;
check_preempt_curr(dst_rq, p, 0); check_preempt_curr(dst_rq, p, 0);
} else { } else {
/* /*
...@@ -2194,6 +2205,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) ...@@ -2194,6 +2205,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.vruntime = 0; p->se.vruntime = 0;
INIT_LIST_HEAD(&p->se.group_node); INIT_LIST_HEAD(&p->se.group_node);
#ifdef CONFIG_FAIR_GROUP_SCHED
p->se.cfs_rq = NULL;
#endif
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHEDSTATS
memset(&p->se.statistics, 0, sizeof(p->se.statistics)); memset(&p->se.statistics, 0, sizeof(p->se.statistics));
#endif #endif
...@@ -7442,6 +7457,9 @@ int in_sched_functions(unsigned long addr) ...@@ -7442,6 +7457,9 @@ int in_sched_functions(unsigned long addr)
*/ */
struct task_group root_task_group; struct task_group root_task_group;
LIST_HEAD(task_groups); LIST_HEAD(task_groups);
/* Cacheline aligned slab cache for task_group */
static struct kmem_cache *task_group_cache __read_mostly;
#endif #endif
DECLARE_PER_CPU(cpumask_var_t, load_balance_mask); DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
...@@ -7499,11 +7517,12 @@ void __init sched_init(void) ...@@ -7499,11 +7517,12 @@ void __init sched_init(void)
#endif /* CONFIG_RT_GROUP_SCHED */ #endif /* CONFIG_RT_GROUP_SCHED */
#ifdef CONFIG_CGROUP_SCHED #ifdef CONFIG_CGROUP_SCHED
task_group_cache = KMEM_CACHE(task_group, 0);
list_add(&root_task_group.list, &task_groups); list_add(&root_task_group.list, &task_groups);
INIT_LIST_HEAD(&root_task_group.children); INIT_LIST_HEAD(&root_task_group.children);
INIT_LIST_HEAD(&root_task_group.siblings); INIT_LIST_HEAD(&root_task_group.siblings);
autogroup_init(&init_task); autogroup_init(&init_task);
#endif /* CONFIG_CGROUP_SCHED */ #endif /* CONFIG_CGROUP_SCHED */
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
...@@ -7784,7 +7803,7 @@ static void free_sched_group(struct task_group *tg) ...@@ -7784,7 +7803,7 @@ static void free_sched_group(struct task_group *tg)
free_fair_sched_group(tg); free_fair_sched_group(tg);
free_rt_sched_group(tg); free_rt_sched_group(tg);
autogroup_free(tg); autogroup_free(tg);
kfree(tg); kmem_cache_free(task_group_cache, tg);
} }
/* allocate runqueue etc for a new task group */ /* allocate runqueue etc for a new task group */
...@@ -7792,7 +7811,7 @@ struct task_group *sched_create_group(struct task_group *parent) ...@@ -7792,7 +7811,7 @@ struct task_group *sched_create_group(struct task_group *parent)
{ {
struct task_group *tg; struct task_group *tg;
tg = kzalloc(sizeof(*tg), GFP_KERNEL); tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
if (!tg) if (!tg)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
...@@ -8697,3 +8716,44 @@ void dump_cpu_task(int cpu) ...@@ -8697,3 +8716,44 @@ void dump_cpu_task(int cpu)
pr_info("Task dump for CPU %d:\n", cpu); pr_info("Task dump for CPU %d:\n", cpu);
sched_show_task(cpu_curr(cpu)); sched_show_task(cpu_curr(cpu));
} }
/*
* Nice levels are multiplicative, with a gentle 10% change for every
* nice level changed. I.e. when a CPU-bound task goes from nice 0 to
* nice 1, it will get ~10% less CPU time than another CPU-bound task
* that remained on nice 0.
*
* The "10% effect" is relative and cumulative: from _any_ nice level,
* if you go up 1 level, it's -10% CPU usage, if you go down 1 level
* it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
* If a task goes up by ~10% and another task goes down by ~10% then
* the relative distance between them is ~25%.)
*/
const int sched_prio_to_weight[40] = {
/* -20 */ 88761, 71755, 56483, 46273, 36291,
/* -15 */ 29154, 23254, 18705, 14949, 11916,
/* -10 */ 9548, 7620, 6100, 4904, 3906,
/* -5 */ 3121, 2501, 1991, 1586, 1277,
/* 0 */ 1024, 820, 655, 526, 423,
/* 5 */ 335, 272, 215, 172, 137,
/* 10 */ 110, 87, 70, 56, 45,
/* 15 */ 36, 29, 23, 18, 15,
};
/*
* Inverse (2^32/x) values of the sched_prio_to_weight[] array, precalculated.
*
* In cases where the weight does not change often, we can use the
* precalculated inverse to speed up arithmetics by turning divisions
* into multiplications:
*/
const u32 sched_prio_to_wmult[40] = {
/* -20 */ 48388, 59856, 76040, 92818, 118348,
/* -15 */ 147320, 184698, 229616, 287308, 360437,
/* -10 */ 449829, 563644, 704093, 875809, 1099582,
/* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326,
/* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587,
/* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126,
/* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717,
/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
};
...@@ -466,7 +466,7 @@ void account_process_tick(struct task_struct *p, int user_tick) ...@@ -466,7 +466,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq(); struct rq *rq = this_rq();
if (vtime_accounting_enabled()) if (vtime_accounting_cpu_enabled())
return; return;
if (sched_clock_irqtime) { if (sched_clock_irqtime) {
...@@ -680,7 +680,7 @@ static cputime_t get_vtime_delta(struct task_struct *tsk) ...@@ -680,7 +680,7 @@ static cputime_t get_vtime_delta(struct task_struct *tsk)
{ {
unsigned long long delta = vtime_delta(tsk); unsigned long long delta = vtime_delta(tsk);
WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING); WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE);
tsk->vtime_snap += delta; tsk->vtime_snap += delta;
/* CHECKME: always safe to convert nsecs to cputime? */ /* CHECKME: always safe to convert nsecs to cputime? */
...@@ -696,37 +696,37 @@ static void __vtime_account_system(struct task_struct *tsk) ...@@ -696,37 +696,37 @@ static void __vtime_account_system(struct task_struct *tsk)
void vtime_account_system(struct task_struct *tsk) void vtime_account_system(struct task_struct *tsk)
{ {
write_seqlock(&tsk->vtime_seqlock); write_seqcount_begin(&tsk->vtime_seqcount);
__vtime_account_system(tsk); __vtime_account_system(tsk);
write_sequnlock(&tsk->vtime_seqlock); write_seqcount_end(&tsk->vtime_seqcount);
} }
void vtime_gen_account_irq_exit(struct task_struct *tsk) void vtime_gen_account_irq_exit(struct task_struct *tsk)
{ {
write_seqlock(&tsk->vtime_seqlock); write_seqcount_begin(&tsk->vtime_seqcount);
__vtime_account_system(tsk); __vtime_account_system(tsk);
if (context_tracking_in_user()) if (context_tracking_in_user())
tsk->vtime_snap_whence = VTIME_USER; tsk->vtime_snap_whence = VTIME_USER;
write_sequnlock(&tsk->vtime_seqlock); write_seqcount_end(&tsk->vtime_seqcount);
} }
void vtime_account_user(struct task_struct *tsk) void vtime_account_user(struct task_struct *tsk)
{ {
cputime_t delta_cpu; cputime_t delta_cpu;
write_seqlock(&tsk->vtime_seqlock); write_seqcount_begin(&tsk->vtime_seqcount);
delta_cpu = get_vtime_delta(tsk); delta_cpu = get_vtime_delta(tsk);
tsk->vtime_snap_whence = VTIME_SYS; tsk->vtime_snap_whence = VTIME_SYS;
account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
write_sequnlock(&tsk->vtime_seqlock); write_seqcount_end(&tsk->vtime_seqcount);
} }
void vtime_user_enter(struct task_struct *tsk) void vtime_user_enter(struct task_struct *tsk)
{ {
write_seqlock(&tsk->vtime_seqlock); write_seqcount_begin(&tsk->vtime_seqcount);
__vtime_account_system(tsk); __vtime_account_system(tsk);
tsk->vtime_snap_whence = VTIME_USER; tsk->vtime_snap_whence = VTIME_USER;
write_sequnlock(&tsk->vtime_seqlock); write_seqcount_end(&tsk->vtime_seqcount);
} }
void vtime_guest_enter(struct task_struct *tsk) void vtime_guest_enter(struct task_struct *tsk)
...@@ -738,19 +738,19 @@ void vtime_guest_enter(struct task_struct *tsk) ...@@ -738,19 +738,19 @@ void vtime_guest_enter(struct task_struct *tsk)
* synchronization against the reader (task_gtime()) * synchronization against the reader (task_gtime())
* that can thus safely catch up with a tickless delta. * that can thus safely catch up with a tickless delta.
*/ */
write_seqlock(&tsk->vtime_seqlock); write_seqcount_begin(&tsk->vtime_seqcount);
__vtime_account_system(tsk); __vtime_account_system(tsk);
current->flags |= PF_VCPU; current->flags |= PF_VCPU;
write_sequnlock(&tsk->vtime_seqlock); write_seqcount_end(&tsk->vtime_seqcount);
} }
EXPORT_SYMBOL_GPL(vtime_guest_enter); EXPORT_SYMBOL_GPL(vtime_guest_enter);
void vtime_guest_exit(struct task_struct *tsk) void vtime_guest_exit(struct task_struct *tsk)
{ {
write_seqlock(&tsk->vtime_seqlock); write_seqcount_begin(&tsk->vtime_seqcount);
__vtime_account_system(tsk); __vtime_account_system(tsk);
current->flags &= ~PF_VCPU; current->flags &= ~PF_VCPU;
write_sequnlock(&tsk->vtime_seqlock); write_seqcount_end(&tsk->vtime_seqcount);
} }
EXPORT_SYMBOL_GPL(vtime_guest_exit); EXPORT_SYMBOL_GPL(vtime_guest_exit);
...@@ -763,24 +763,26 @@ void vtime_account_idle(struct task_struct *tsk) ...@@ -763,24 +763,26 @@ void vtime_account_idle(struct task_struct *tsk)
void arch_vtime_task_switch(struct task_struct *prev) void arch_vtime_task_switch(struct task_struct *prev)
{ {
write_seqlock(&prev->vtime_seqlock); write_seqcount_begin(&prev->vtime_seqcount);
prev->vtime_snap_whence = VTIME_SLEEPING; prev->vtime_snap_whence = VTIME_INACTIVE;
write_sequnlock(&prev->vtime_seqlock); write_seqcount_end(&prev->vtime_seqcount);
write_seqlock(&current->vtime_seqlock); write_seqcount_begin(&current->vtime_seqcount);
current->vtime_snap_whence = VTIME_SYS; current->vtime_snap_whence = VTIME_SYS;
current->vtime_snap = sched_clock_cpu(smp_processor_id()); current->vtime_snap = sched_clock_cpu(smp_processor_id());
write_sequnlock(&current->vtime_seqlock); write_seqcount_end(&current->vtime_seqcount);
} }
void vtime_init_idle(struct task_struct *t, int cpu) void vtime_init_idle(struct task_struct *t, int cpu)
{ {
unsigned long flags; unsigned long flags;
write_seqlock_irqsave(&t->vtime_seqlock, flags); local_irq_save(flags);
write_seqcount_begin(&t->vtime_seqcount);
t->vtime_snap_whence = VTIME_SYS; t->vtime_snap_whence = VTIME_SYS;
t->vtime_snap = sched_clock_cpu(cpu); t->vtime_snap = sched_clock_cpu(cpu);
write_sequnlock_irqrestore(&t->vtime_seqlock, flags); write_seqcount_end(&t->vtime_seqcount);
local_irq_restore(flags);
} }
cputime_t task_gtime(struct task_struct *t) cputime_t task_gtime(struct task_struct *t)
...@@ -788,17 +790,17 @@ cputime_t task_gtime(struct task_struct *t) ...@@ -788,17 +790,17 @@ cputime_t task_gtime(struct task_struct *t)
unsigned int seq; unsigned int seq;
cputime_t gtime; cputime_t gtime;
if (!context_tracking_is_enabled()) if (!vtime_accounting_enabled())
return t->gtime; return t->gtime;
do { do {
seq = read_seqbegin(&t->vtime_seqlock); seq = read_seqcount_begin(&t->vtime_seqcount);
gtime = t->gtime; gtime = t->gtime;
if (t->flags & PF_VCPU) if (t->vtime_snap_whence == VTIME_SYS && t->flags & PF_VCPU)
gtime += vtime_delta(t); gtime += vtime_delta(t);
} while (read_seqretry(&t->vtime_seqlock, seq)); } while (read_seqcount_retry(&t->vtime_seqcount, seq));
return gtime; return gtime;
} }
...@@ -821,7 +823,7 @@ fetch_task_cputime(struct task_struct *t, ...@@ -821,7 +823,7 @@ fetch_task_cputime(struct task_struct *t,
*udelta = 0; *udelta = 0;
*sdelta = 0; *sdelta = 0;
seq = read_seqbegin(&t->vtime_seqlock); seq = read_seqcount_begin(&t->vtime_seqcount);
if (u_dst) if (u_dst)
*u_dst = *u_src; *u_dst = *u_src;
...@@ -829,7 +831,7 @@ fetch_task_cputime(struct task_struct *t, ...@@ -829,7 +831,7 @@ fetch_task_cputime(struct task_struct *t,
*s_dst = *s_src; *s_dst = *s_src;
/* Task is sleeping, nothing to add */ /* Task is sleeping, nothing to add */
if (t->vtime_snap_whence == VTIME_SLEEPING || if (t->vtime_snap_whence == VTIME_INACTIVE ||
is_idle_task(t)) is_idle_task(t))
continue; continue;
...@@ -845,7 +847,7 @@ fetch_task_cputime(struct task_struct *t, ...@@ -845,7 +847,7 @@ fetch_task_cputime(struct task_struct *t,
if (t->vtime_snap_whence == VTIME_SYS) if (t->vtime_snap_whence == VTIME_SYS)
*sdelta = delta; *sdelta = delta;
} }
} while (read_seqretry(&t->vtime_seqlock, seq)); } while (read_seqcount_retry(&t->vtime_seqcount, seq));
} }
...@@ -853,6 +855,14 @@ void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) ...@@ -853,6 +855,14 @@ void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
{ {
cputime_t udelta, sdelta; cputime_t udelta, sdelta;
if (!vtime_accounting_enabled()) {
if (utime)
*utime = t->utime;
if (stime)
*stime = t->stime;
return;
}
fetch_task_cputime(t, utime, stime, &t->utime, fetch_task_cputime(t, utime, stime, &t->utime,
&t->stime, &udelta, &sdelta); &t->stime, &udelta, &sdelta);
if (utime) if (utime)
...@@ -866,6 +876,14 @@ void task_cputime_scaled(struct task_struct *t, ...@@ -866,6 +876,14 @@ void task_cputime_scaled(struct task_struct *t,
{ {
cputime_t udelta, sdelta; cputime_t udelta, sdelta;
if (!vtime_accounting_enabled()) {
if (utimescaled)
*utimescaled = t->utimescaled;
if (stimescaled)
*stimescaled = t->stimescaled;
return;
}
fetch_task_cputime(t, utimescaled, stimescaled, fetch_task_cputime(t, utimescaled, stimescaled,
&t->utimescaled, &t->stimescaled, &udelta, &sdelta); &t->utimescaled, &t->stimescaled, &udelta, &sdelta);
if (utimescaled) if (utimescaled)
......
...@@ -176,8 +176,10 @@ static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) ...@@ -176,8 +176,10 @@ static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p)
} }
} }
if (leftmost) if (leftmost) {
dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks; dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks;
dl_rq->earliest_dl.next = p->dl.deadline;
}
rb_link_node(&p->pushable_dl_tasks, parent, link); rb_link_node(&p->pushable_dl_tasks, parent, link);
rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
...@@ -195,6 +197,10 @@ static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) ...@@ -195,6 +197,10 @@ static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p)
next_node = rb_next(&p->pushable_dl_tasks); next_node = rb_next(&p->pushable_dl_tasks);
dl_rq->pushable_dl_tasks_leftmost = next_node; dl_rq->pushable_dl_tasks_leftmost = next_node;
if (next_node) {
dl_rq->earliest_dl.next = rb_entry(next_node,
struct task_struct, pushable_dl_tasks)->dl.deadline;
}
} }
rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root);
...@@ -782,42 +788,14 @@ static void update_curr_dl(struct rq *rq) ...@@ -782,42 +788,14 @@ static void update_curr_dl(struct rq *rq)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu);
static inline u64 next_deadline(struct rq *rq)
{
struct task_struct *next = pick_next_earliest_dl_task(rq, rq->cpu);
if (next && dl_prio(next->prio))
return next->dl.deadline;
else
return 0;
}
static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
{ {
struct rq *rq = rq_of_dl_rq(dl_rq); struct rq *rq = rq_of_dl_rq(dl_rq);
if (dl_rq->earliest_dl.curr == 0 || if (dl_rq->earliest_dl.curr == 0 ||
dl_time_before(deadline, dl_rq->earliest_dl.curr)) { dl_time_before(deadline, dl_rq->earliest_dl.curr)) {
/*
* If the dl_rq had no -deadline tasks, or if the new task
* has shorter deadline than the current one on dl_rq, we
* know that the previous earliest becomes our next earliest,
* as the new task becomes the earliest itself.
*/
dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr;
dl_rq->earliest_dl.curr = deadline; dl_rq->earliest_dl.curr = deadline;
cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1); cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1);
} else if (dl_rq->earliest_dl.next == 0 ||
dl_time_before(deadline, dl_rq->earliest_dl.next)) {
/*
* On the other hand, if the new -deadline task has a
* a later deadline than the earliest one on dl_rq, but
* it is earlier than the next (if any), we must
* recompute the next-earliest.
*/
dl_rq->earliest_dl.next = next_deadline(rq);
} }
} }
...@@ -839,7 +817,6 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) ...@@ -839,7 +817,6 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
entry = rb_entry(leftmost, struct sched_dl_entity, rb_node); entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
dl_rq->earliest_dl.curr = entry->deadline; dl_rq->earliest_dl.curr = entry->deadline;
dl_rq->earliest_dl.next = next_deadline(rq);
cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1); cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1);
} }
} }
...@@ -1274,28 +1251,6 @@ static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) ...@@ -1274,28 +1251,6 @@ static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
return 0; return 0;
} }
/* Returns the second earliest -deadline task, NULL otherwise */
static struct task_struct *pick_next_earliest_dl_task(struct rq *rq, int cpu)
{
struct rb_node *next_node = rq->dl.rb_leftmost;
struct sched_dl_entity *dl_se;
struct task_struct *p = NULL;
next_node:
next_node = rb_next(next_node);
if (next_node) {
dl_se = rb_entry(next_node, struct sched_dl_entity, rb_node);
p = dl_task_of(dl_se);
if (pick_dl_task(rq, p, cpu))
return p;
goto next_node;
}
return NULL;
}
/* /*
* Return the earliest pushable rq's task, which is suitable to be executed * Return the earliest pushable rq's task, which is suitable to be executed
* on the CPU, NULL otherwise: * on the CPU, NULL otherwise:
......
This diff is collapsed.
...@@ -47,7 +47,6 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags) ...@@ -47,7 +47,6 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
{ {
idle_exit_fair(rq);
rq_last_tick_reset(rq); rq_last_tick_reset(rq);
} }
......
...@@ -248,7 +248,12 @@ struct task_group { ...@@ -248,7 +248,12 @@ struct task_group {
unsigned long shares; unsigned long shares;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
atomic_long_t load_avg; /*
* load_avg can be heavily contended at clock tick time, so put
* it in its own cacheline separated from the fields above which
* will also be accessed at each tick.
*/
atomic_long_t load_avg ____cacheline_aligned;
#endif #endif
#endif #endif
...@@ -335,7 +340,15 @@ extern void sched_move_task(struct task_struct *tsk); ...@@ -335,7 +340,15 @@ extern void sched_move_task(struct task_struct *tsk);
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
#endif
#ifdef CONFIG_SMP
extern void set_task_rq_fair(struct sched_entity *se,
struct cfs_rq *prev, struct cfs_rq *next);
#else /* !CONFIG_SMP */
static inline void set_task_rq_fair(struct sched_entity *se,
struct cfs_rq *prev, struct cfs_rq *next) { }
#endif /* CONFIG_SMP */
#endif /* CONFIG_FAIR_GROUP_SCHED */
#else /* CONFIG_CGROUP_SCHED */ #else /* CONFIG_CGROUP_SCHED */
...@@ -933,6 +946,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) ...@@ -933,6 +946,7 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
#endif #endif
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
p->se.cfs_rq = tg->cfs_rq[cpu]; p->se.cfs_rq = tg->cfs_rq[cpu];
p->se.parent = tg->se[cpu]; p->se.parent = tg->se[cpu];
#endif #endif
...@@ -1113,46 +1127,8 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) ...@@ -1113,46 +1127,8 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
#define WEIGHT_IDLEPRIO 3 #define WEIGHT_IDLEPRIO 3
#define WMULT_IDLEPRIO 1431655765 #define WMULT_IDLEPRIO 1431655765
/* extern const int sched_prio_to_weight[40];
* Nice levels are multiplicative, with a gentle 10% change for every extern const u32 sched_prio_to_wmult[40];
* nice level changed. I.e. when a CPU-bound task goes from nice 0 to
* nice 1, it will get ~10% less CPU time than another CPU-bound task
* that remained on nice 0.
*
* The "10% effect" is relative and cumulative: from _any_ nice level,
* if you go up 1 level, it's -10% CPU usage, if you go down 1 level
* it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
* If a task goes up by ~10% and another task goes down by ~10% then
* the relative distance between them is ~25%.)
*/
static const int prio_to_weight[40] = {
/* -20 */ 88761, 71755, 56483, 46273, 36291,
/* -15 */ 29154, 23254, 18705, 14949, 11916,
/* -10 */ 9548, 7620, 6100, 4904, 3906,
/* -5 */ 3121, 2501, 1991, 1586, 1277,
/* 0 */ 1024, 820, 655, 526, 423,
/* 5 */ 335, 272, 215, 172, 137,
/* 10 */ 110, 87, 70, 56, 45,
/* 15 */ 36, 29, 23, 18, 15,
};
/*
* Inverse (2^32/x) values of the prio_to_weight[] array, precalculated.
*
* In cases where the weight does not change often, we can use the
* precalculated inverse to speed up arithmetics by turning divisions
* into multiplications:
*/
static const u32 prio_to_wmult[40] = {
/* -20 */ 48388, 59856, 76040, 92818, 118348,
/* -15 */ 147320, 184698, 229616, 287308, 360437,
/* -10 */ 449829, 563644, 704093, 875809, 1099582,
/* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326,
/* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587,
/* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126,
/* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717,
/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
};
#define ENQUEUE_WAKEUP 0x01 #define ENQUEUE_WAKEUP 0x01
#define ENQUEUE_HEAD 0x02 #define ENQUEUE_HEAD 0x02
...@@ -1252,16 +1228,8 @@ extern void update_group_capacity(struct sched_domain *sd, int cpu); ...@@ -1252,16 +1228,8 @@ extern void update_group_capacity(struct sched_domain *sd, int cpu);
extern void trigger_load_balance(struct rq *rq); extern void trigger_load_balance(struct rq *rq);
extern void idle_enter_fair(struct rq *this_rq);
extern void idle_exit_fair(struct rq *this_rq);
extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask); extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
#else
static inline void idle_enter_fair(struct rq *rq) { }
static inline void idle_exit_fair(struct rq *rq) { }
#endif #endif
#ifdef CONFIG_CPU_IDLE #ifdef CONFIG_CPU_IDLE
......
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
*/ */
struct cpu_stop_done { struct cpu_stop_done {
atomic_t nr_todo; /* nr left to execute */ atomic_t nr_todo; /* nr left to execute */
bool executed; /* actually executed? */
int ret; /* collected return value */ int ret; /* collected return value */
struct completion completion; /* fired if nr_todo reaches 0 */ struct completion completion; /* fired if nr_todo reaches 0 */
}; };
...@@ -63,14 +62,10 @@ static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) ...@@ -63,14 +62,10 @@ static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
} }
/* signal completion unless @done is NULL */ /* signal completion unless @done is NULL */
static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed) static void cpu_stop_signal_done(struct cpu_stop_done *done)
{ {
if (done) { if (atomic_dec_and_test(&done->nr_todo))
if (executed) complete(&done->completion);
done->executed = true;
if (atomic_dec_and_test(&done->nr_todo))
complete(&done->completion);
}
} }
static void __cpu_stop_queue_work(struct cpu_stopper *stopper, static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
...@@ -81,17 +76,21 @@ static void __cpu_stop_queue_work(struct cpu_stopper *stopper, ...@@ -81,17 +76,21 @@ static void __cpu_stop_queue_work(struct cpu_stopper *stopper,
} }
/* queue @work to @stopper. if offline, @work is completed immediately */ /* queue @work to @stopper. if offline, @work is completed immediately */
static void cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
{ {
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
unsigned long flags; unsigned long flags;
bool enabled;
spin_lock_irqsave(&stopper->lock, flags); spin_lock_irqsave(&stopper->lock, flags);
if (stopper->enabled) enabled = stopper->enabled;
if (enabled)
__cpu_stop_queue_work(stopper, work); __cpu_stop_queue_work(stopper, work);
else else if (work->done)
cpu_stop_signal_done(work->done, false); cpu_stop_signal_done(work->done);
spin_unlock_irqrestore(&stopper->lock, flags); spin_unlock_irqrestore(&stopper->lock, flags);
return enabled;
} }
/** /**
...@@ -124,9 +123,10 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) ...@@ -124,9 +123,10 @@ int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done }; struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
cpu_stop_init_done(&done, 1); cpu_stop_init_done(&done, 1);
cpu_stop_queue_work(cpu, &work); if (!cpu_stop_queue_work(cpu, &work))
return -ENOENT;
wait_for_completion(&done.completion); wait_for_completion(&done.completion);
return done.executed ? done.ret : -ENOENT; return done.ret;
} }
/* This controls the threads on each CPU. */ /* This controls the threads on each CPU. */
...@@ -258,7 +258,6 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void * ...@@ -258,7 +258,6 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
struct cpu_stop_work work1, work2; struct cpu_stop_work work1, work2;
struct multi_stop_data msdata; struct multi_stop_data msdata;
preempt_disable();
msdata = (struct multi_stop_data){ msdata = (struct multi_stop_data){
.fn = fn, .fn = fn,
.data = arg, .data = arg,
...@@ -277,16 +276,11 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void * ...@@ -277,16 +276,11 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
if (cpu1 > cpu2) if (cpu1 > cpu2)
swap(cpu1, cpu2); swap(cpu1, cpu2);
if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2)) { if (cpu_stop_queue_two_works(cpu1, &work1, cpu2, &work2))
preempt_enable();
return -ENOENT; return -ENOENT;
}
preempt_enable();
wait_for_completion(&done.completion); wait_for_completion(&done.completion);
return done.ret;
return done.executed ? done.ret : -ENOENT;
} }
/** /**
...@@ -302,23 +296,28 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void * ...@@ -302,23 +296,28 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
* *
* CONTEXT: * CONTEXT:
* Don't care. * Don't care.
*
* RETURNS:
* true if cpu_stop_work was queued successfully and @fn will be called,
* false otherwise.
*/ */
void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
struct cpu_stop_work *work_buf) struct cpu_stop_work *work_buf)
{ {
*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, }; *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
cpu_stop_queue_work(cpu, work_buf); return cpu_stop_queue_work(cpu, work_buf);
} }
/* static data for stop_cpus */ /* static data for stop_cpus */
static DEFINE_MUTEX(stop_cpus_mutex); static DEFINE_MUTEX(stop_cpus_mutex);
static void queue_stop_cpus_work(const struct cpumask *cpumask, static bool queue_stop_cpus_work(const struct cpumask *cpumask,
cpu_stop_fn_t fn, void *arg, cpu_stop_fn_t fn, void *arg,
struct cpu_stop_done *done) struct cpu_stop_done *done)
{ {
struct cpu_stop_work *work; struct cpu_stop_work *work;
unsigned int cpu; unsigned int cpu;
bool queued = false;
/* /*
* Disable preemption while queueing to avoid getting * Disable preemption while queueing to avoid getting
...@@ -331,9 +330,12 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask, ...@@ -331,9 +330,12 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
work->fn = fn; work->fn = fn;
work->arg = arg; work->arg = arg;
work->done = done; work->done = done;
cpu_stop_queue_work(cpu, work); if (cpu_stop_queue_work(cpu, work))
queued = true;
} }
lg_global_unlock(&stop_cpus_lock); lg_global_unlock(&stop_cpus_lock);
return queued;
} }
static int __stop_cpus(const struct cpumask *cpumask, static int __stop_cpus(const struct cpumask *cpumask,
...@@ -342,9 +344,10 @@ static int __stop_cpus(const struct cpumask *cpumask, ...@@ -342,9 +344,10 @@ static int __stop_cpus(const struct cpumask *cpumask,
struct cpu_stop_done done; struct cpu_stop_done done;
cpu_stop_init_done(&done, cpumask_weight(cpumask)); cpu_stop_init_done(&done, cpumask_weight(cpumask));
queue_stop_cpus_work(cpumask, fn, arg, &done); if (!queue_stop_cpus_work(cpumask, fn, arg, &done))
return -ENOENT;
wait_for_completion(&done.completion); wait_for_completion(&done.completion);
return done.executed ? done.ret : -ENOENT; return done.ret;
} }
/** /**
...@@ -432,7 +435,6 @@ static void cpu_stopper_thread(unsigned int cpu) ...@@ -432,7 +435,6 @@ static void cpu_stopper_thread(unsigned int cpu)
{ {
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
struct cpu_stop_work *work; struct cpu_stop_work *work;
int ret;
repeat: repeat:
work = NULL; work = NULL;
...@@ -448,23 +450,19 @@ static void cpu_stopper_thread(unsigned int cpu) ...@@ -448,23 +450,19 @@ static void cpu_stopper_thread(unsigned int cpu)
cpu_stop_fn_t fn = work->fn; cpu_stop_fn_t fn = work->fn;
void *arg = work->arg; void *arg = work->arg;
struct cpu_stop_done *done = work->done; struct cpu_stop_done *done = work->done;
char ksym_buf[KSYM_NAME_LEN] __maybe_unused; int ret;
/* cpu stop callbacks are not allowed to sleep */
preempt_disable();
/* cpu stop callbacks must not sleep, make in_atomic() == T */
preempt_count_inc();
ret = fn(arg); ret = fn(arg);
if (ret) if (done) {
done->ret = ret; if (ret)
done->ret = ret;
/* restore preemption and check it's still balanced */ cpu_stop_signal_done(done);
preempt_enable(); }
preempt_count_dec();
WARN_ONCE(preempt_count(), WARN_ONCE(preempt_count(),
"cpu_stop: %s(%p) leaked preempt count\n", "cpu_stop: %pf(%p) leaked preempt count\n", fn, arg);
kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL,
ksym_buf), arg);
cpu_stop_signal_done(done, true);
goto repeat; goto repeat;
} }
} }
......
...@@ -694,11 +694,11 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, ...@@ -694,11 +694,11 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
return tick; return tick;
} }
static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now, int active)
{ {
/* Update jiffies first */ /* Update jiffies first */
tick_do_update_jiffies64(now); tick_do_update_jiffies64(now);
update_cpu_load_nohz(); update_cpu_load_nohz(active);
calc_load_exit_idle(); calc_load_exit_idle();
touch_softlockup_watchdog(); touch_softlockup_watchdog();
...@@ -725,7 +725,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts) ...@@ -725,7 +725,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
if (can_stop_full_tick()) if (can_stop_full_tick())
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu); tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
else if (ts->tick_stopped) else if (ts->tick_stopped)
tick_nohz_restart_sched_tick(ts, ktime_get()); tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
#endif #endif
} }
...@@ -875,7 +875,7 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts) ...@@ -875,7 +875,7 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
unsigned long ticks; unsigned long ticks;
if (vtime_accounting_enabled()) if (vtime_accounting_cpu_enabled())
return; return;
/* /*
* We stopped the tick in idle. Update process times would miss the * We stopped the tick in idle. Update process times would miss the
...@@ -916,7 +916,7 @@ void tick_nohz_idle_exit(void) ...@@ -916,7 +916,7 @@ void tick_nohz_idle_exit(void)
tick_nohz_stop_idle(ts, now); tick_nohz_stop_idle(ts, now);
if (ts->tick_stopped) { if (ts->tick_stopped) {
tick_nohz_restart_sched_tick(ts, now); tick_nohz_restart_sched_tick(ts, now, 0);
tick_nohz_account_idle_ticks(ts); tick_nohz_account_idle_ticks(ts);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment