Commit 97fb7a0a authored by Ingo Molnar's avatar Ingo Molnar

sched: Clean up and harmonize the coding style of the scheduler code base

A good number of small style inconsistencies have accumulated
in the scheduler core, so do a pass over them to harmonize
all these details:

 - fix speling in comments,

 - use curly braces for multi-line statements,

 - remove unnecessary parentheses from integer literals,

 - capitalize consistently,

 - remove stray newlines,

 - add comments where necessary,

 - remove invalid/unnecessary comments,

 - align structure definitions and other data types vertically,

 - add missing newlines for increased readability,

 - fix vertical tabulation where it's misaligned,

 - harmonize preprocessor conditional block labeling
   and vertical alignment,

 - remove line-breaks where they uglify the code,

 - add newline after local variable definitions,

No change in functionality:

  md5:
     1191fa0a890cfa8132156d2959d7e9e2  built-in.o.before.asm
     1191fa0a890cfa8132156d2959d7e9e2  built-in.o.after.asm

Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent c2e51382
...@@ -168,18 +168,19 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag) ...@@ -168,18 +168,19 @@ autogroup_move_group(struct task_struct *p, struct autogroup *ag)
autogroup_kref_put(prev); autogroup_kref_put(prev);
} }
/* Allocates GFP_KERNEL, cannot be called under any spinlock */ /* Allocates GFP_KERNEL, cannot be called under any spinlock: */
void sched_autogroup_create_attach(struct task_struct *p) void sched_autogroup_create_attach(struct task_struct *p)
{ {
struct autogroup *ag = autogroup_create(); struct autogroup *ag = autogroup_create();
autogroup_move_group(p, ag); autogroup_move_group(p, ag);
/* drop extra reference added by autogroup_create() */
/* Drop extra reference added by autogroup_create(): */
autogroup_kref_put(ag); autogroup_kref_put(ag);
} }
EXPORT_SYMBOL(sched_autogroup_create_attach); EXPORT_SYMBOL(sched_autogroup_create_attach);
/* Cannot be called under siglock. Currently has no users */ /* Cannot be called under siglock. Currently has no users: */
void sched_autogroup_detach(struct task_struct *p) void sched_autogroup_detach(struct task_struct *p)
{ {
autogroup_move_group(p, &autogroup_default); autogroup_move_group(p, &autogroup_default);
...@@ -202,7 +203,6 @@ static int __init setup_autogroup(char *str) ...@@ -202,7 +203,6 @@ static int __init setup_autogroup(char *str)
return 1; return 1;
} }
__setup("noautogroup", setup_autogroup); __setup("noautogroup", setup_autogroup);
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
...@@ -224,7 +224,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) ...@@ -224,7 +224,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice)
if (nice < 0 && !can_nice(current, nice)) if (nice < 0 && !can_nice(current, nice))
return -EPERM; return -EPERM;
/* this is a heavy operation taking global locks.. */ /* This is a heavy operation, taking global locks.. */
if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next)) if (!capable(CAP_SYS_ADMIN) && time_before(jiffies, next))
return -EAGAIN; return -EAGAIN;
...@@ -267,4 +267,4 @@ int autogroup_path(struct task_group *tg, char *buf, int buflen) ...@@ -267,4 +267,4 @@ int autogroup_path(struct task_group *tg, char *buf, int buflen)
return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id);
} }
#endif /* CONFIG_SCHED_DEBUG */ #endif
...@@ -7,9 +7,9 @@ ...@@ -7,9 +7,9 @@
struct autogroup { struct autogroup {
/* /*
* reference doesn't mean how many thread attach to this * Reference doesn't mean how many threads attach to this
* autogroup now. It just stands for the number of task * autogroup now. It just stands for the number of tasks
* could use this autogroup. * which could use this autogroup.
*/ */
struct kref kref; struct kref kref;
struct task_group *tg; struct task_group *tg;
...@@ -56,11 +56,9 @@ autogroup_task_group(struct task_struct *p, struct task_group *tg) ...@@ -56,11 +56,9 @@ autogroup_task_group(struct task_struct *p, struct task_group *tg)
return tg; return tg;
} }
#ifdef CONFIG_SCHED_DEBUG
static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) static inline int autogroup_path(struct task_group *tg, char *buf, int buflen)
{ {
return 0; return 0;
} }
#endif
#endif /* CONFIG_SCHED_AUTOGROUP */ #endif /* CONFIG_SCHED_AUTOGROUP */
/* /*
* sched_clock for unstable cpu clocks * sched_clock() for unstable CPU clocks
* *
* Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra * Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra
* *
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* Guillaume Chazarain <guichaz@gmail.com> * Guillaume Chazarain <guichaz@gmail.com>
* *
* *
* What: * What this file implements:
* *
* cpu_clock(i) provides a fast (execution time) high resolution * cpu_clock(i) provides a fast (execution time) high resolution
* clock with bounded drift between CPUs. The value of cpu_clock(i) * clock with bounded drift between CPUs. The value of cpu_clock(i)
...@@ -26,11 +26,11 @@ ...@@ -26,11 +26,11 @@
* at 0 on boot (but people really shouldn't rely on that). * at 0 on boot (but people really shouldn't rely on that).
* *
* cpu_clock(i) -- can be used from any context, including NMI. * cpu_clock(i) -- can be used from any context, including NMI.
* local_clock() -- is cpu_clock() on the current cpu. * local_clock() -- is cpu_clock() on the current CPU.
* *
* sched_clock_cpu(i) * sched_clock_cpu(i)
* *
* How: * How it is implemented:
* *
* The implementation either uses sched_clock() when * The implementation either uses sched_clock() when
* !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the * !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the
...@@ -302,21 +302,21 @@ static u64 sched_clock_remote(struct sched_clock_data *scd) ...@@ -302,21 +302,21 @@ static u64 sched_clock_remote(struct sched_clock_data *scd)
* cmpxchg64 below only protects one readout. * cmpxchg64 below only protects one readout.
* *
* We must reread via sched_clock_local() in the retry case on * We must reread via sched_clock_local() in the retry case on
* 32bit as an NMI could use sched_clock_local() via the * 32-bit kernels as an NMI could use sched_clock_local() via the
* tracer and hit between the readout of * tracer and hit between the readout of
* the low32bit and the high 32bit portion. * the low 32-bit and the high 32-bit portion.
*/ */
this_clock = sched_clock_local(my_scd); this_clock = sched_clock_local(my_scd);
/* /*
* We must enforce atomic readout on 32bit, otherwise the * We must enforce atomic readout on 32-bit, otherwise the
* update on the remote cpu can hit inbetween the readout of * update on the remote CPU can hit inbetween the readout of
* the low32bit and the high 32bit portion. * the low 32-bit and the high 32-bit portion.
*/ */
remote_clock = cmpxchg64(&scd->clock, 0, 0); remote_clock = cmpxchg64(&scd->clock, 0, 0);
#else #else
/* /*
* On 64bit the read of [my]scd->clock is atomic versus the * On 64-bit kernels the read of [my]scd->clock is atomic versus the
* update, so we can avoid the above 32bit dance. * update, so we can avoid the above 32-bit dance.
*/ */
sched_clock_local(my_scd); sched_clock_local(my_scd);
again: again:
......
...@@ -135,7 +135,7 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) ...@@ -135,7 +135,7 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
* [L] ->on_rq * [L] ->on_rq
* RELEASE (rq->lock) * RELEASE (rq->lock)
* *
* If we observe the old cpu in task_rq_lock, the acquire of * If we observe the old CPU in task_rq_lock, the acquire of
* the old rq->lock will fully serialize against the stores. * the old rq->lock will fully serialize against the stores.
* *
* If we observe the new CPU in task_rq_lock, the acquire will * If we observe the new CPU in task_rq_lock, the acquire will
...@@ -1457,7 +1457,7 @@ EXPORT_SYMBOL_GPL(kick_process); ...@@ -1457,7 +1457,7 @@ EXPORT_SYMBOL_GPL(kick_process);
* *
* - cpu_active must be a subset of cpu_online * - cpu_active must be a subset of cpu_online
* *
* - on cpu-up we allow per-cpu kthreads on the online && !active cpu, * - on CPU-up we allow per-CPU kthreads on the online && !active CPU,
* see __set_cpus_allowed_ptr(). At this point the newly online * see __set_cpus_allowed_ptr(). At this point the newly online
* CPU isn't yet part of the sched domains, and balancing will not * CPU isn't yet part of the sched domains, and balancing will not
* see it. * see it.
...@@ -3037,7 +3037,7 @@ unsigned long long task_sched_runtime(struct task_struct *p) ...@@ -3037,7 +3037,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
#if defined(CONFIG_64BIT) && defined(CONFIG_SMP) #if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
/* /*
* 64-bit doesn't need locks to atomically read a 64bit value. * 64-bit doesn't need locks to atomically read a 64-bit value.
* So we have a optimization chance when the task's delta_exec is 0. * So we have a optimization chance when the task's delta_exec is 0.
* Reading ->on_cpu is racy, but this is ok. * Reading ->on_cpu is racy, but this is ok.
* *
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
* (balbir@in.ibm.com). * (balbir@in.ibm.com).
*/ */
/* Time spent by the tasks of the cpu accounting group executing in ... */ /* Time spent by the tasks of the CPU accounting group executing in ... */
enum cpuacct_stat_index { enum cpuacct_stat_index {
CPUACCT_STAT_USER, /* ... user mode */ CPUACCT_STAT_USER, /* ... user mode */
CPUACCT_STAT_SYSTEM, /* ... kernel mode */ CPUACCT_STAT_SYSTEM, /* ... kernel mode */
...@@ -35,12 +35,12 @@ struct cpuacct_usage { ...@@ -35,12 +35,12 @@ struct cpuacct_usage {
u64 usages[CPUACCT_STAT_NSTATS]; u64 usages[CPUACCT_STAT_NSTATS];
}; };
/* track cpu usage of a group of tasks and its child groups */ /* track CPU usage of a group of tasks and its child groups */
struct cpuacct { struct cpuacct {
struct cgroup_subsys_state css; struct cgroup_subsys_state css;
/* cpuusage holds pointer to a u64-type object on every cpu */ /* cpuusage holds pointer to a u64-type object on every CPU */
struct cpuacct_usage __percpu *cpuusage; struct cpuacct_usage __percpu *cpuusage;
struct kernel_cpustat __percpu *cpustat; struct kernel_cpustat __percpu *cpustat;
}; };
static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
...@@ -48,7 +48,7 @@ static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) ...@@ -48,7 +48,7 @@ static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css)
return css ? container_of(css, struct cpuacct, css) : NULL; return css ? container_of(css, struct cpuacct, css) : NULL;
} }
/* return cpu accounting group to which this task belongs */ /* Return CPU accounting group to which this task belongs */
static inline struct cpuacct *task_ca(struct task_struct *tsk) static inline struct cpuacct *task_ca(struct task_struct *tsk)
{ {
return css_ca(task_css(tsk, cpuacct_cgrp_id)); return css_ca(task_css(tsk, cpuacct_cgrp_id));
...@@ -65,7 +65,7 @@ static struct cpuacct root_cpuacct = { ...@@ -65,7 +65,7 @@ static struct cpuacct root_cpuacct = {
.cpuusage = &root_cpuacct_cpuusage, .cpuusage = &root_cpuacct_cpuusage,
}; };
/* create a new cpu accounting group */ /* Create a new CPU accounting group */
static struct cgroup_subsys_state * static struct cgroup_subsys_state *
cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
{ {
...@@ -96,7 +96,7 @@ cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) ...@@ -96,7 +96,7 @@ cpuacct_css_alloc(struct cgroup_subsys_state *parent_css)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
/* destroy an existing cpu accounting group */ /* Destroy an existing CPU accounting group */
static void cpuacct_css_free(struct cgroup_subsys_state *css) static void cpuacct_css_free(struct cgroup_subsys_state *css)
{ {
struct cpuacct *ca = css_ca(css); struct cpuacct *ca = css_ca(css);
...@@ -162,7 +162,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) ...@@ -162,7 +162,7 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
#endif #endif
} }
/* return total cpu usage (in nanoseconds) of a group */ /* Return total CPU usage (in nanoseconds) of a group */
static u64 __cpuusage_read(struct cgroup_subsys_state *css, static u64 __cpuusage_read(struct cgroup_subsys_state *css,
enum cpuacct_stat_index index) enum cpuacct_stat_index index)
{ {
......
...@@ -10,7 +10,6 @@ ...@@ -10,7 +10,6 @@
* as published by the Free Software Foundation; version 2 * as published by the Free Software Foundation; version 2
* of the License. * of the License.
*/ */
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/slab.h> #include <linux/slab.h>
...@@ -147,9 +146,9 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, ...@@ -147,9 +146,9 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
} }
/* /*
* cpudl_clear - remove a cpu from the cpudl max-heap * cpudl_clear - remove a CPU from the cpudl max-heap
* @cp: the cpudl max-heap context * @cp: the cpudl max-heap context
* @cpu: the target cpu * @cpu: the target CPU
* *
* Notes: assumes cpu_rq(cpu)->lock is locked * Notes: assumes cpu_rq(cpu)->lock is locked
* *
...@@ -188,8 +187,8 @@ void cpudl_clear(struct cpudl *cp, int cpu) ...@@ -188,8 +187,8 @@ void cpudl_clear(struct cpudl *cp, int cpu)
/* /*
* cpudl_set - update the cpudl max-heap * cpudl_set - update the cpudl max-heap
* @cp: the cpudl max-heap context * @cp: the cpudl max-heap context
* @cpu: the target cpu * @cpu: the target CPU
* @dl: the new earliest deadline for this cpu * @dl: the new earliest deadline for this CPU
* *
* Notes: assumes cpu_rq(cpu)->lock is locked * Notes: assumes cpu_rq(cpu)->lock is locked
* *
...@@ -224,7 +223,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl) ...@@ -224,7 +223,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl)
/* /*
* cpudl_set_freecpu - Set the cpudl.free_cpus * cpudl_set_freecpu - Set the cpudl.free_cpus
* @cp: the cpudl max-heap context * @cp: the cpudl max-heap context
* @cpu: rd attached cpu * @cpu: rd attached CPU
*/ */
void cpudl_set_freecpu(struct cpudl *cp, int cpu) void cpudl_set_freecpu(struct cpudl *cp, int cpu)
{ {
...@@ -234,7 +233,7 @@ void cpudl_set_freecpu(struct cpudl *cp, int cpu) ...@@ -234,7 +233,7 @@ void cpudl_set_freecpu(struct cpudl *cp, int cpu)
/* /*
* cpudl_clear_freecpu - Clear the cpudl.free_cpus * cpudl_clear_freecpu - Clear the cpudl.free_cpus
* @cp: the cpudl max-heap context * @cp: the cpudl max-heap context
* @cpu: rd attached cpu * @cpu: rd attached CPU
*/ */
void cpudl_clear_freecpu(struct cpudl *cp, int cpu) void cpudl_clear_freecpu(struct cpudl *cp, int cpu)
{ {
......
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_CPUDL_H
#define _LINUX_CPUDL_H
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/deadline.h> #include <linux/sched/deadline.h>
#define IDX_INVALID -1 #define IDX_INVALID -1
struct cpudl_item { struct cpudl_item {
u64 dl; u64 dl;
int cpu; int cpu;
int idx; int idx;
}; };
struct cpudl { struct cpudl {
raw_spinlock_t lock; raw_spinlock_t lock;
int size; int size;
cpumask_var_t free_cpus; cpumask_var_t free_cpus;
struct cpudl_item *elements; struct cpudl_item *elements;
}; };
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
int cpudl_find(struct cpudl *cp, struct task_struct *p, int cpudl_find(struct cpudl *cp, struct task_struct *p, struct cpumask *later_mask);
struct cpumask *later_mask);
void cpudl_set(struct cpudl *cp, int cpu, u64 dl); void cpudl_set(struct cpudl *cp, int cpu, u64 dl);
void cpudl_clear(struct cpudl *cp, int cpu); void cpudl_clear(struct cpudl *cp, int cpu);
int cpudl_init(struct cpudl *cp); int cpudl_init(struct cpudl *cp);
void cpudl_set_freecpu(struct cpudl *cp, int cpu); void cpudl_set_freecpu(struct cpudl *cp, int cpu);
void cpudl_clear_freecpu(struct cpudl *cp, int cpu); void cpudl_clear_freecpu(struct cpudl *cp, int cpu);
void cpudl_cleanup(struct cpudl *cp); void cpudl_cleanup(struct cpudl *cp);
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#endif /* _LINUX_CPUDL_H */
...@@ -20,52 +20,52 @@ ...@@ -20,52 +20,52 @@
#include "sched.h" #include "sched.h"
struct sugov_tunables { struct sugov_tunables {
struct gov_attr_set attr_set; struct gov_attr_set attr_set;
unsigned int rate_limit_us; unsigned int rate_limit_us;
}; };
struct sugov_policy { struct sugov_policy {
struct cpufreq_policy *policy; struct cpufreq_policy *policy;
struct sugov_tunables *tunables; struct sugov_tunables *tunables;
struct list_head tunables_hook; struct list_head tunables_hook;
raw_spinlock_t update_lock; /* For shared policies */ raw_spinlock_t update_lock; /* For shared policies */
u64 last_freq_update_time; u64 last_freq_update_time;
s64 freq_update_delay_ns; s64 freq_update_delay_ns;
unsigned int next_freq; unsigned int next_freq;
unsigned int cached_raw_freq; unsigned int cached_raw_freq;
/* The next fields are only needed if fast switch cannot be used. */ /* The next fields are only needed if fast switch cannot be used: */
struct irq_work irq_work; struct irq_work irq_work;
struct kthread_work work; struct kthread_work work;
struct mutex work_lock; struct mutex work_lock;
struct kthread_worker worker; struct kthread_worker worker;
struct task_struct *thread; struct task_struct *thread;
bool work_in_progress; bool work_in_progress;
bool need_freq_update; bool need_freq_update;
}; };
struct sugov_cpu { struct sugov_cpu {
struct update_util_data update_util; struct update_util_data update_util;
struct sugov_policy *sg_policy; struct sugov_policy *sg_policy;
unsigned int cpu; unsigned int cpu;
bool iowait_boost_pending; bool iowait_boost_pending;
unsigned int iowait_boost; unsigned int iowait_boost;
unsigned int iowait_boost_max; unsigned int iowait_boost_max;
u64 last_update; u64 last_update;
/* The fields below are only needed when sharing a policy. */ /* The fields below are only needed when sharing a policy: */
unsigned long util_cfs; unsigned long util_cfs;
unsigned long util_dl; unsigned long util_dl;
unsigned long max; unsigned long max;
unsigned int flags; unsigned int flags;
/* The field below is for single-CPU policies only. */ /* The field below is for single-CPU policies only: */
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
unsigned long saved_idle_calls; unsigned long saved_idle_calls;
#endif #endif
}; };
...@@ -79,9 +79,9 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) ...@@ -79,9 +79,9 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
/* /*
* Since cpufreq_update_util() is called with rq->lock held for * Since cpufreq_update_util() is called with rq->lock held for
* the @target_cpu, our per-cpu data is fully serialized. * the @target_cpu, our per-CPU data is fully serialized.
* *
* However, drivers cannot in general deal with cross-cpu * However, drivers cannot in general deal with cross-CPU
* requests, so while get_next_freq() will work, our * requests, so while get_next_freq() will work, our
* sugov_update_commit() call may not for the fast switching platforms. * sugov_update_commit() call may not for the fast switching platforms.
* *
...@@ -111,6 +111,7 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) ...@@ -111,6 +111,7 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
} }
delta_ns = time - sg_policy->last_freq_update_time; delta_ns = time - sg_policy->last_freq_update_time;
return delta_ns >= sg_policy->freq_update_delay_ns; return delta_ns >= sg_policy->freq_update_delay_ns;
} }
...@@ -345,8 +346,8 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) ...@@ -345,8 +346,8 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
return get_next_freq(sg_policy, util, max); return get_next_freq(sg_policy, util, max);
} }
static void sugov_update_shared(struct update_util_data *hook, u64 time, static void
unsigned int flags) sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
{ {
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct sugov_policy *sg_policy = sg_cpu->sg_policy;
...@@ -423,8 +424,8 @@ static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) ...@@ -423,8 +424,8 @@ static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
return sprintf(buf, "%u\n", tunables->rate_limit_us); return sprintf(buf, "%u\n", tunables->rate_limit_us);
} }
static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, static ssize_t
size_t count) rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
{ {
struct sugov_tunables *tunables = to_sugov_tunables(attr_set); struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
struct sugov_policy *sg_policy; struct sugov_policy *sg_policy;
...@@ -479,11 +480,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy) ...@@ -479,11 +480,11 @@ static int sugov_kthread_create(struct sugov_policy *sg_policy)
{ {
struct task_struct *thread; struct task_struct *thread;
struct sched_attr attr = { struct sched_attr attr = {
.size = sizeof(struct sched_attr), .size = sizeof(struct sched_attr),
.sched_policy = SCHED_DEADLINE, .sched_policy = SCHED_DEADLINE,
.sched_flags = SCHED_FLAG_SUGOV, .sched_flags = SCHED_FLAG_SUGOV,
.sched_nice = 0, .sched_nice = 0,
.sched_priority = 0, .sched_priority = 0,
/* /*
* Fake (unused) bandwidth; workaround to "fix" * Fake (unused) bandwidth; workaround to "fix"
* priority inheritance. * priority inheritance.
...@@ -663,21 +664,21 @@ static int sugov_start(struct cpufreq_policy *policy) ...@@ -663,21 +664,21 @@ static int sugov_start(struct cpufreq_policy *policy)
struct sugov_policy *sg_policy = policy->governor_data; struct sugov_policy *sg_policy = policy->governor_data;
unsigned int cpu; unsigned int cpu;
sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
sg_policy->last_freq_update_time = 0; sg_policy->last_freq_update_time = 0;
sg_policy->next_freq = UINT_MAX; sg_policy->next_freq = UINT_MAX;
sg_policy->work_in_progress = false; sg_policy->work_in_progress = false;
sg_policy->need_freq_update = false; sg_policy->need_freq_update = false;
sg_policy->cached_raw_freq = 0; sg_policy->cached_raw_freq = 0;
for_each_cpu(cpu, policy->cpus) { for_each_cpu(cpu, policy->cpus) {
struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
memset(sg_cpu, 0, sizeof(*sg_cpu)); memset(sg_cpu, 0, sizeof(*sg_cpu));
sg_cpu->cpu = cpu; sg_cpu->cpu = cpu;
sg_cpu->sg_policy = sg_policy; sg_cpu->sg_policy = sg_policy;
sg_cpu->flags = 0; sg_cpu->flags = 0;
sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
} }
for_each_cpu(cpu, policy->cpus) { for_each_cpu(cpu, policy->cpus) {
...@@ -721,14 +722,14 @@ static void sugov_limits(struct cpufreq_policy *policy) ...@@ -721,14 +722,14 @@ static void sugov_limits(struct cpufreq_policy *policy)
} }
static struct cpufreq_governor schedutil_gov = { static struct cpufreq_governor schedutil_gov = {
.name = "schedutil", .name = "schedutil",
.owner = THIS_MODULE, .owner = THIS_MODULE,
.dynamic_switching = true, .dynamic_switching = true,
.init = sugov_init, .init = sugov_init,
.exit = sugov_exit, .exit = sugov_exit,
.start = sugov_start, .start = sugov_start,
.stop = sugov_stop, .stop = sugov_stop,
.limits = sugov_limits, .limits = sugov_limits,
}; };
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
* *
* going from the lowest priority to the highest. CPUs in the INVALID state * going from the lowest priority to the highest. CPUs in the INVALID state
* are not eligible for routing. The system maintains this state with * are not eligible for routing. The system maintains this state with
* a 2 dimensional bitmap (the first for priority class, the second for cpus * a 2 dimensional bitmap (the first for priority class, the second for CPUs
* in that class). Therefore a typical application without affinity * in that class). Therefore a typical application without affinity
* restrictions can find a suitable CPU with O(1) complexity (e.g. two bit * restrictions can find a suitable CPU with O(1) complexity (e.g. two bit
* searches). For tasks with affinity restrictions, the algorithm has a * searches). For tasks with affinity restrictions, the algorithm has a
...@@ -26,7 +26,6 @@ ...@@ -26,7 +26,6 @@
* as published by the Free Software Foundation; version 2 * as published by the Free Software Foundation; version 2
* of the License. * of the License.
*/ */
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/rt.h> #include <linux/sched/rt.h>
...@@ -128,9 +127,9 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, ...@@ -128,9 +127,9 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
} }
/** /**
* cpupri_set - update the cpu priority setting * cpupri_set - update the CPU priority setting
* @cp: The cpupri context * @cp: The cpupri context
* @cpu: The target cpu * @cpu: The target CPU
* @newpri: The priority (INVALID-RT99) to assign to this CPU * @newpri: The priority (INVALID-RT99) to assign to this CPU
* *
* Note: Assumes cpu_rq(cpu)->lock is locked * Note: Assumes cpu_rq(cpu)->lock is locked
...@@ -151,7 +150,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) ...@@ -151,7 +150,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
return; return;
/* /*
* If the cpu was currently mapped to a different value, we * If the CPU was currently mapped to a different value, we
* need to map it to the new value then remove the old value. * need to map it to the new value then remove the old value.
* Note, we must add the new value first, otherwise we risk the * Note, we must add the new value first, otherwise we risk the
* cpu being missed by the priority loop in cpupri_find. * cpu being missed by the priority loop in cpupri_find.
......
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
#ifndef _LINUX_CPUPRI_H
#define _LINUX_CPUPRI_H
#include <linux/sched.h> #include <linux/sched.h>
#define CPUPRI_NR_PRIORITIES (MAX_RT_PRIO + 2) #define CPUPRI_NR_PRIORITIES (MAX_RT_PRIO + 2)
#define CPUPRI_INVALID -1 #define CPUPRI_INVALID -1
#define CPUPRI_IDLE 0 #define CPUPRI_IDLE 0
#define CPUPRI_NORMAL 1 #define CPUPRI_NORMAL 1
/* values 2-101 are RT priorities 0-99 */ /* values 2-101 are RT priorities 0-99 */
struct cpupri_vec { struct cpupri_vec {
atomic_t count; atomic_t count;
cpumask_var_t mask; cpumask_var_t mask;
}; };
struct cpupri { struct cpupri {
struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES]; struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES];
int *cpu_to_pri; int *cpu_to_pri;
}; };
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
int cpupri_find(struct cpupri *cp, int cpupri_find(struct cpupri *cp, struct task_struct *p, struct cpumask *lowest_mask);
struct task_struct *p, struct cpumask *lowest_mask);
void cpupri_set(struct cpupri *cp, int cpu, int pri); void cpupri_set(struct cpupri *cp, int cpu, int pri);
int cpupri_init(struct cpupri *cp); int cpupri_init(struct cpupri *cp);
void cpupri_cleanup(struct cpupri *cp); void cpupri_cleanup(struct cpupri *cp);
#endif #endif
#endif /* _LINUX_CPUPRI_H */
...@@ -113,9 +113,9 @@ static inline void task_group_account_field(struct task_struct *p, int index, ...@@ -113,9 +113,9 @@ static inline void task_group_account_field(struct task_struct *p, int index,
} }
/* /*
* Account user cpu time to a process. * Account user CPU time to a process.
* @p: the process that the cpu time gets accounted to * @p: the process that the CPU time gets accounted to
* @cputime: the cpu time spent in user space since the last update * @cputime: the CPU time spent in user space since the last update
*/ */
void account_user_time(struct task_struct *p, u64 cputime) void account_user_time(struct task_struct *p, u64 cputime)
{ {
...@@ -135,9 +135,9 @@ void account_user_time(struct task_struct *p, u64 cputime) ...@@ -135,9 +135,9 @@ void account_user_time(struct task_struct *p, u64 cputime)
} }
/* /*
* Account guest cpu time to a process. * Account guest CPU time to a process.
* @p: the process that the cpu time gets accounted to * @p: the process that the CPU time gets accounted to
* @cputime: the cpu time spent in virtual machine since the last update * @cputime: the CPU time spent in virtual machine since the last update
*/ */
void account_guest_time(struct task_struct *p, u64 cputime) void account_guest_time(struct task_struct *p, u64 cputime)
{ {
...@@ -159,9 +159,9 @@ void account_guest_time(struct task_struct *p, u64 cputime) ...@@ -159,9 +159,9 @@ void account_guest_time(struct task_struct *p, u64 cputime)
} }
/* /*
* Account system cpu time to a process and desired cpustat field * Account system CPU time to a process and desired cpustat field
* @p: the process that the cpu time gets accounted to * @p: the process that the CPU time gets accounted to
* @cputime: the cpu time spent in kernel space since the last update * @cputime: the CPU time spent in kernel space since the last update
* @index: pointer to cpustat field that has to be updated * @index: pointer to cpustat field that has to be updated
*/ */
void account_system_index_time(struct task_struct *p, void account_system_index_time(struct task_struct *p,
...@@ -179,10 +179,10 @@ void account_system_index_time(struct task_struct *p, ...@@ -179,10 +179,10 @@ void account_system_index_time(struct task_struct *p,
} }
/* /*
* Account system cpu time to a process. * Account system CPU time to a process.
* @p: the process that the cpu time gets accounted to * @p: the process that the CPU time gets accounted to
* @hardirq_offset: the offset to subtract from hardirq_count() * @hardirq_offset: the offset to subtract from hardirq_count()
* @cputime: the cpu time spent in kernel space since the last update * @cputime: the CPU time spent in kernel space since the last update
*/ */
void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime) void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime)
{ {
...@@ -205,7 +205,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime) ...@@ -205,7 +205,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime)
/* /*
* Account for involuntary wait time. * Account for involuntary wait time.
* @cputime: the cpu time spent in involuntary wait * @cputime: the CPU time spent in involuntary wait
*/ */
void account_steal_time(u64 cputime) void account_steal_time(u64 cputime)
{ {
...@@ -216,7 +216,7 @@ void account_steal_time(u64 cputime) ...@@ -216,7 +216,7 @@ void account_steal_time(u64 cputime)
/* /*
* Account for idle time. * Account for idle time.
* @cputime: the cpu time spent in idle wait * @cputime: the CPU time spent in idle wait
*/ */
void account_idle_time(u64 cputime) void account_idle_time(u64 cputime)
{ {
...@@ -338,7 +338,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) ...@@ -338,7 +338,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
#ifdef CONFIG_IRQ_TIME_ACCOUNTING #ifdef CONFIG_IRQ_TIME_ACCOUNTING
/* /*
* Account a tick to a process and cpustat * Account a tick to a process and cpustat
* @p: the process that the cpu time gets accounted to * @p: the process that the CPU time gets accounted to
* @user_tick: is the tick from userspace * @user_tick: is the tick from userspace
* @rq: the pointer to rq * @rq: the pointer to rq
* *
...@@ -400,17 +400,16 @@ static void irqtime_account_idle_ticks(int ticks) ...@@ -400,17 +400,16 @@ static void irqtime_account_idle_ticks(int ticks)
irqtime_account_process_tick(current, 0, rq, ticks); irqtime_account_process_tick(current, 0, rq, ticks);
} }
#else /* CONFIG_IRQ_TIME_ACCOUNTING */ #else /* CONFIG_IRQ_TIME_ACCOUNTING */
static inline void irqtime_account_idle_ticks(int ticks) {} static inline void irqtime_account_idle_ticks(int ticks) { }
static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
struct rq *rq, int nr_ticks) {} struct rq *rq, int nr_ticks) { }
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ #endif /* CONFIG_IRQ_TIME_ACCOUNTING */
/* /*
* Use precise platform statistics if available: * Use precise platform statistics if available:
*/ */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING
# ifndef __ARCH_HAS_VTIME_TASK_SWITCH
#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
void vtime_common_task_switch(struct task_struct *prev) void vtime_common_task_switch(struct task_struct *prev)
{ {
if (is_idle_task(prev)) if (is_idle_task(prev))
...@@ -421,8 +420,7 @@ void vtime_common_task_switch(struct task_struct *prev) ...@@ -421,8 +420,7 @@ void vtime_common_task_switch(struct task_struct *prev)
vtime_flush(prev); vtime_flush(prev);
arch_vtime_task_switch(prev); arch_vtime_task_switch(prev);
} }
#endif # endif
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */ #endif /* CONFIG_VIRT_CPU_ACCOUNTING */
...@@ -469,10 +467,12 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) ...@@ -469,10 +467,12 @@ void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st)
*ut = cputime.utime; *ut = cputime.utime;
*st = cputime.stime; *st = cputime.stime;
} }
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE: */
/* /*
* Account a single tick of cpu time. * Account a single tick of CPU time.
* @p: the process that the cpu time gets accounted to * @p: the process that the CPU time gets accounted to
* @user_tick: indicates if the tick is a user or a system tick * @user_tick: indicates if the tick is a user or a system tick
*/ */
void account_process_tick(struct task_struct *p, int user_tick) void account_process_tick(struct task_struct *p, int user_tick)
......
...@@ -539,12 +539,12 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p ...@@ -539,12 +539,12 @@ static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p
/* /*
* If we cannot preempt any rq, fall back to pick any * If we cannot preempt any rq, fall back to pick any
* online cpu. * online CPU:
*/ */
cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
if (cpu >= nr_cpu_ids) { if (cpu >= nr_cpu_ids) {
/* /*
* Fail to find any suitable cpu. * Failed to find any suitable CPU.
* The task will never come back! * The task will never come back!
*/ */
BUG_ON(dl_bandwidth_enabled()); BUG_ON(dl_bandwidth_enabled());
...@@ -608,8 +608,7 @@ static inline void queue_pull_task(struct rq *rq) ...@@ -608,8 +608,7 @@ static inline void queue_pull_task(struct rq *rq)
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags); static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags);
static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags); static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags);
static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags);
int flags);
/* /*
* We are being explicitly informed that a new instance is starting, * We are being explicitly informed that a new instance is starting,
...@@ -1873,7 +1872,7 @@ static int find_later_rq(struct task_struct *task) ...@@ -1873,7 +1872,7 @@ static int find_later_rq(struct task_struct *task)
/* /*
* We have to consider system topology and task affinity * We have to consider system topology and task affinity
* first, then we can look for a suitable cpu. * first, then we can look for a suitable CPU.
*/ */
if (!cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask)) if (!cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask))
return -1; return -1;
...@@ -1887,7 +1886,7 @@ static int find_later_rq(struct task_struct *task) ...@@ -1887,7 +1886,7 @@ static int find_later_rq(struct task_struct *task)
* Now we check how well this matches with task's * Now we check how well this matches with task's
* affinity and system topology. * affinity and system topology.
* *
* The last cpu where the task run is our first * The last CPU where the task run is our first
* guess, since it is most likely cache-hot there. * guess, since it is most likely cache-hot there.
*/ */
if (cpumask_test_cpu(cpu, later_mask)) if (cpumask_test_cpu(cpu, later_mask))
...@@ -1917,9 +1916,9 @@ static int find_later_rq(struct task_struct *task) ...@@ -1917,9 +1916,9 @@ static int find_later_rq(struct task_struct *task)
best_cpu = cpumask_first_and(later_mask, best_cpu = cpumask_first_and(later_mask,
sched_domain_span(sd)); sched_domain_span(sd));
/* /*
* Last chance: if a cpu being in both later_mask * Last chance: if a CPU being in both later_mask
* and current sd span is valid, that becomes our * and current sd span is valid, that becomes our
* choice. Of course, the latest possible cpu is * choice. Of course, the latest possible CPU is
* already under consideration through later_mask. * already under consideration through later_mask.
*/ */
if (best_cpu < nr_cpu_ids) { if (best_cpu < nr_cpu_ids) {
...@@ -2075,7 +2074,7 @@ static int push_dl_task(struct rq *rq) ...@@ -2075,7 +2074,7 @@ static int push_dl_task(struct rq *rq)
if (task == next_task) { if (task == next_task) {
/* /*
* The task is still there. We don't try * The task is still there. We don't try
* again, some other cpu will pull it when ready. * again, some other CPU will pull it when ready.
*/ */
goto out; goto out;
} }
...@@ -2308,7 +2307,7 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p) ...@@ -2308,7 +2307,7 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
/* /*
* Since this might be the only -deadline task on the rq, * Since this might be the only -deadline task on the rq,
* this is the right place to try to pull some other one * this is the right place to try to pull some other one
* from an overloaded cpu, if any. * from an overloaded CPU, if any.
*/ */
if (!task_on_rq_queued(p) || rq->dl.dl_nr_running) if (!task_on_rq_queued(p) || rq->dl.dl_nr_running)
return; return;
...@@ -2634,17 +2633,17 @@ void __dl_clear_params(struct task_struct *p) ...@@ -2634,17 +2633,17 @@ void __dl_clear_params(struct task_struct *p)
{ {
struct sched_dl_entity *dl_se = &p->dl; struct sched_dl_entity *dl_se = &p->dl;
dl_se->dl_runtime = 0; dl_se->dl_runtime = 0;
dl_se->dl_deadline = 0; dl_se->dl_deadline = 0;
dl_se->dl_period = 0; dl_se->dl_period = 0;
dl_se->flags = 0; dl_se->flags = 0;
dl_se->dl_bw = 0; dl_se->dl_bw = 0;
dl_se->dl_density = 0; dl_se->dl_density = 0;
dl_se->dl_throttled = 0; dl_se->dl_throttled = 0;
dl_se->dl_yielded = 0; dl_se->dl_yielded = 0;
dl_se->dl_non_contending = 0; dl_se->dl_non_contending = 0;
dl_se->dl_overrun = 0; dl_se->dl_overrun = 0;
} }
bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr) bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
...@@ -2663,21 +2662,22 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr) ...@@ -2663,21 +2662,22 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed) int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed)
{ {
unsigned int dest_cpu = cpumask_any_and(cpu_active_mask, unsigned int dest_cpu;
cs_cpus_allowed);
struct dl_bw *dl_b; struct dl_bw *dl_b;
bool overflow; bool overflow;
int cpus, ret; int cpus, ret;
unsigned long flags; unsigned long flags;
dest_cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed);
rcu_read_lock_sched(); rcu_read_lock_sched();
dl_b = dl_bw_of(dest_cpu); dl_b = dl_bw_of(dest_cpu);
raw_spin_lock_irqsave(&dl_b->lock, flags); raw_spin_lock_irqsave(&dl_b->lock, flags);
cpus = dl_bw_cpus(dest_cpu); cpus = dl_bw_cpus(dest_cpu);
overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw); overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
if (overflow) if (overflow) {
ret = -EBUSY; ret = -EBUSY;
else { } else {
/* /*
* We reserve space for this task in the destination * We reserve space for this task in the destination
* root_domain, as we can't fail after this point. * root_domain, as we can't fail after this point.
...@@ -2689,6 +2689,7 @@ int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allo ...@@ -2689,6 +2689,7 @@ int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allo
} }
raw_spin_unlock_irqrestore(&dl_b->lock, flags); raw_spin_unlock_irqrestore(&dl_b->lock, flags);
rcu_read_unlock_sched(); rcu_read_unlock_sched();
return ret; return ret;
} }
...@@ -2709,6 +2710,7 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, ...@@ -2709,6 +2710,7 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
ret = 0; ret = 0;
raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags); raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
rcu_read_unlock_sched(); rcu_read_unlock_sched();
return ret; return ret;
} }
...@@ -2726,6 +2728,7 @@ bool dl_cpu_busy(unsigned int cpu) ...@@ -2726,6 +2728,7 @@ bool dl_cpu_busy(unsigned int cpu)
overflow = __dl_overflow(dl_b, cpus, 0, 0); overflow = __dl_overflow(dl_b, cpus, 0, 0);
raw_spin_unlock_irqrestore(&dl_b->lock, flags); raw_spin_unlock_irqrestore(&dl_b->lock, flags);
rcu_read_unlock_sched(); rcu_read_unlock_sched();
return overflow; return overflow;
} }
#endif #endif
......
...@@ -9,7 +9,6 @@ ...@@ -9,7 +9,6 @@
* it under the terms of the GNU General Public License version 2 as * it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation. * published by the Free Software Foundation.
*/ */
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/sched/task.h> #include <linux/sched/task.h>
...@@ -274,34 +273,19 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd) ...@@ -274,34 +273,19 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd)
if (table == NULL) if (table == NULL)
return NULL; return NULL;
set_table_entry(&table[0], "min_interval", &sd->min_interval, set_table_entry(&table[0] , "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax, false);
sizeof(long), 0644, proc_doulongvec_minmax, false); set_table_entry(&table[1] , "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax, false);
set_table_entry(&table[1], "max_interval", &sd->max_interval, set_table_entry(&table[2] , "busy_idx", &sd->busy_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
sizeof(long), 0644, proc_doulongvec_minmax, false); set_table_entry(&table[3] , "idle_idx", &sd->idle_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
set_table_entry(&table[2], "busy_idx", &sd->busy_idx, set_table_entry(&table[4] , "newidle_idx", &sd->newidle_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
sizeof(int), 0644, proc_dointvec_minmax, true); set_table_entry(&table[5] , "wake_idx", &sd->wake_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
set_table_entry(&table[3], "idle_idx", &sd->idle_idx, set_table_entry(&table[6] , "forkexec_idx", &sd->forkexec_idx, sizeof(int) , 0644, proc_dointvec_minmax, true );
sizeof(int), 0644, proc_dointvec_minmax, true); set_table_entry(&table[7] , "busy_factor", &sd->busy_factor, sizeof(int) , 0644, proc_dointvec_minmax, false);
set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx, set_table_entry(&table[8] , "imbalance_pct", &sd->imbalance_pct, sizeof(int) , 0644, proc_dointvec_minmax, false);
sizeof(int), 0644, proc_dointvec_minmax, true); set_table_entry(&table[9] , "cache_nice_tries", &sd->cache_nice_tries, sizeof(int) , 0644, proc_dointvec_minmax, false);
set_table_entry(&table[5], "wake_idx", &sd->wake_idx, set_table_entry(&table[10], "flags", &sd->flags, sizeof(int) , 0644, proc_dointvec_minmax, false);
sizeof(int), 0644, proc_dointvec_minmax, true); set_table_entry(&table[11], "max_newidle_lb_cost", &sd->max_newidle_lb_cost, sizeof(long), 0644, proc_doulongvec_minmax, false);
set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx, set_table_entry(&table[12], "name", sd->name, CORENAME_MAX_SIZE, 0444, proc_dostring, false);
sizeof(int), 0644, proc_dointvec_minmax, true);
set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
sizeof(int), 0644, proc_dointvec_minmax, false);
set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
sizeof(int), 0644, proc_dointvec_minmax, false);
set_table_entry(&table[9], "cache_nice_tries",
&sd->cache_nice_tries,
sizeof(int), 0644, proc_dointvec_minmax, false);
set_table_entry(&table[10], "flags", &sd->flags,
sizeof(int), 0644, proc_dointvec_minmax, false);
set_table_entry(&table[11], "max_newidle_lb_cost",
&sd->max_newidle_lb_cost,
sizeof(long), 0644, proc_doulongvec_minmax, false);
set_table_entry(&table[12], "name", sd->name,
CORENAME_MAX_SIZE, 0444, proc_dostring, false);
/* &table[13] is terminator */ /* &table[13] is terminator */
return table; return table;
...@@ -332,8 +316,8 @@ static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu) ...@@ -332,8 +316,8 @@ static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
return table; return table;
} }
static cpumask_var_t sd_sysctl_cpus; static cpumask_var_t sd_sysctl_cpus;
static struct ctl_table_header *sd_sysctl_header; static struct ctl_table_header *sd_sysctl_header;
void register_sched_domain_sysctl(void) void register_sched_domain_sysctl(void)
{ {
...@@ -413,14 +397,10 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group ...@@ -413,14 +397,10 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
{ {
struct sched_entity *se = tg->se[cpu]; struct sched_entity *se = tg->se[cpu];
#define P(F) \ #define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F) #define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)schedstat_val(F))
#define P_SCHEDSTAT(F) \ #define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)schedstat_val(F)) #define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
#define PN(F) \
SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
#define PN_SCHEDSTAT(F) \
SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
if (!se) if (!se)
return; return;
...@@ -428,6 +408,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group ...@@ -428,6 +408,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
PN(se->exec_start); PN(se->exec_start);
PN(se->vruntime); PN(se->vruntime);
PN(se->sum_exec_runtime); PN(se->sum_exec_runtime);
if (schedstat_enabled()) { if (schedstat_enabled()) {
PN_SCHEDSTAT(se->statistics.wait_start); PN_SCHEDSTAT(se->statistics.wait_start);
PN_SCHEDSTAT(se->statistics.sleep_start); PN_SCHEDSTAT(se->statistics.sleep_start);
...@@ -440,6 +421,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group ...@@ -440,6 +421,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
PN_SCHEDSTAT(se->statistics.wait_sum); PN_SCHEDSTAT(se->statistics.wait_sum);
P_SCHEDSTAT(se->statistics.wait_count); P_SCHEDSTAT(se->statistics.wait_count);
} }
P(se->load.weight); P(se->load.weight);
P(se->runnable_weight); P(se->runnable_weight);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -464,6 +446,7 @@ static char *task_group_path(struct task_group *tg) ...@@ -464,6 +446,7 @@ static char *task_group_path(struct task_group *tg)
return group_path; return group_path;
cgroup_path(tg->css.cgroup, group_path, PATH_MAX); cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
return group_path; return group_path;
} }
#endif #endif
...@@ -799,9 +782,9 @@ void sysrq_sched_debug_show(void) ...@@ -799,9 +782,9 @@ void sysrq_sched_debug_show(void)
/* /*
* This itererator needs some explanation. * This itererator needs some explanation.
* It returns 1 for the header position. * It returns 1 for the header position.
* This means 2 is cpu 0. * This means 2 is CPU 0.
* In a hotplugged system some cpus, including cpu 0, may be missing so we have * In a hotplugged system some CPUs, including CPU 0, may be missing so we have
* to use cpumask_* to iterate over the cpus. * to use cpumask_* to iterate over the CPUs.
*/ */
static void *sched_debug_start(struct seq_file *file, loff_t *offset) static void *sched_debug_start(struct seq_file *file, loff_t *offset)
{ {
...@@ -821,6 +804,7 @@ static void *sched_debug_start(struct seq_file *file, loff_t *offset) ...@@ -821,6 +804,7 @@ static void *sched_debug_start(struct seq_file *file, loff_t *offset)
if (n < nr_cpu_ids) if (n < nr_cpu_ids)
return (void *)(unsigned long)(n + 2); return (void *)(unsigned long)(n + 2);
return NULL; return NULL;
} }
...@@ -835,10 +819,10 @@ static void sched_debug_stop(struct seq_file *file, void *data) ...@@ -835,10 +819,10 @@ static void sched_debug_stop(struct seq_file *file, void *data)
} }
static const struct seq_operations sched_debug_sops = { static const struct seq_operations sched_debug_sops = {
.start = sched_debug_start, .start = sched_debug_start,
.next = sched_debug_next, .next = sched_debug_next,
.stop = sched_debug_stop, .stop = sched_debug_stop,
.show = sched_debug_show, .show = sched_debug_show,
}; };
static int sched_debug_release(struct inode *inode, struct file *file) static int sched_debug_release(struct inode *inode, struct file *file)
...@@ -876,14 +860,10 @@ static int __init init_sched_debug_procfs(void) ...@@ -876,14 +860,10 @@ static int __init init_sched_debug_procfs(void)
__initcall(init_sched_debug_procfs); __initcall(init_sched_debug_procfs);
#define __P(F) \ #define __P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F) #define P(F) SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
#define P(F) \ #define __PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F) #define PN(F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
#define __PN(F) \
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
#define PN(F) \
SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
#ifdef CONFIG_NUMA_BALANCING #ifdef CONFIG_NUMA_BALANCING
......
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
* Adaptive scheduling granularity, math enhancements by Peter Zijlstra * Adaptive scheduling granularity, math enhancements by Peter Zijlstra
* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
*/ */
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/sched/topology.h> #include <linux/sched/topology.h>
...@@ -103,7 +102,7 @@ const_debug unsigned int sysctl_sched_migration_cost = 500000UL; ...@@ -103,7 +102,7 @@ const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* /*
* For asym packing, by default the lower numbered cpu has higher priority. * For asym packing, by default the lower numbered CPU has higher priority.
*/ */
int __weak arch_asym_cpu_priority(int cpu) int __weak arch_asym_cpu_priority(int cpu)
{ {
...@@ -1181,7 +1180,7 @@ pid_t task_numa_group_id(struct task_struct *p) ...@@ -1181,7 +1180,7 @@ pid_t task_numa_group_id(struct task_struct *p)
} }
/* /*
* The averaged statistics, shared & private, memory & cpu, * The averaged statistics, shared & private, memory & CPU,
* occupy the first half of the array. The second half of the * occupy the first half of the array. The second half of the
* array is for current counters, which are averaged into the * array is for current counters, which are averaged into the
* first set by task_numa_placement. * first set by task_numa_placement.
...@@ -1587,7 +1586,7 @@ static void task_numa_compare(struct task_numa_env *env, ...@@ -1587,7 +1586,7 @@ static void task_numa_compare(struct task_numa_env *env,
* be incurred if the tasks were swapped. * be incurred if the tasks were swapped.
*/ */
if (cur) { if (cur) {
/* Skip this swap candidate if cannot move to the source cpu */ /* Skip this swap candidate if cannot move to the source CPU: */
if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed)) if (!cpumask_test_cpu(env->src_cpu, &cur->cpus_allowed))
goto unlock; goto unlock;
...@@ -1631,7 +1630,7 @@ static void task_numa_compare(struct task_numa_env *env, ...@@ -1631,7 +1630,7 @@ static void task_numa_compare(struct task_numa_env *env,
goto balance; goto balance;
} }
/* Balance doesn't matter much if we're running a task per cpu */ /* Balance doesn't matter much if we're running a task per CPU: */
if (imp > env->best_imp && src_rq->nr_running == 1 && if (imp > env->best_imp && src_rq->nr_running == 1 &&
dst_rq->nr_running == 1) dst_rq->nr_running == 1)
goto assign; goto assign;
...@@ -1676,7 +1675,7 @@ static void task_numa_compare(struct task_numa_env *env, ...@@ -1676,7 +1675,7 @@ static void task_numa_compare(struct task_numa_env *env,
*/ */
if (!cur) { if (!cur) {
/* /*
* select_idle_siblings() uses an per-cpu cpumask that * select_idle_siblings() uses an per-CPU cpumask that
* can be used from IRQ context. * can be used from IRQ context.
*/ */
local_irq_disable(); local_irq_disable();
...@@ -3362,7 +3361,7 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) ...@@ -3362,7 +3361,7 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force)
} }
/* /*
* Called within set_task_rq() right before setting a task's cpu. The * Called within set_task_rq() right before setting a task's CPU. The
* caller only guarantees p->pi_lock is held; no other assumptions, * caller only guarantees p->pi_lock is held; no other assumptions,
* including the state of rq->lock, should be made. * including the state of rq->lock, should be made.
*/ */
...@@ -3541,7 +3540,7 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf ...@@ -3541,7 +3540,7 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf
/* /*
* runnable_sum can't be lower than running_sum * runnable_sum can't be lower than running_sum
* As running sum is scale with cpu capacity wehreas the runnable sum * As running sum is scale with CPU capacity wehreas the runnable sum
* is not we rescale running_sum 1st * is not we rescale running_sum 1st
*/ */
running_sum = se->avg.util_sum / running_sum = se->avg.util_sum /
...@@ -4688,7 +4687,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) ...@@ -4688,7 +4687,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
if (!se) if (!se)
add_nr_running(rq, task_delta); add_nr_running(rq, task_delta);
/* determine whether we need to wake up potentially idle cpu */ /* Determine whether we need to wake up potentially idle CPU: */
if (rq->curr == rq->idle && rq->cfs.nr_running) if (rq->curr == rq->idle && rq->cfs.nr_running)
resched_curr(rq); resched_curr(rq);
} }
...@@ -5053,7 +5052,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) ...@@ -5053,7 +5052,7 @@ static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
} }
/* /*
* Both these cpu hotplug callbacks race against unregister_fair_sched_group() * Both these CPU hotplug callbacks race against unregister_fair_sched_group()
* *
* The race is harmless, since modifying bandwidth settings of unhooked group * The race is harmless, since modifying bandwidth settings of unhooked group
* bits doesn't do much. * bits doesn't do much.
...@@ -5098,7 +5097,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq) ...@@ -5098,7 +5097,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
*/ */
cfs_rq->runtime_remaining = 1; cfs_rq->runtime_remaining = 1;
/* /*
* Offline rq is schedulable till cpu is completely disabled * Offline rq is schedulable till CPU is completely disabled
* in take_cpu_down(), so we prevent new cfs throttling here. * in take_cpu_down(), so we prevent new cfs throttling here.
*/ */
cfs_rq->runtime_enabled = 0; cfs_rq->runtime_enabled = 0;
...@@ -5335,8 +5334,8 @@ DEFINE_PER_CPU(cpumask_var_t, select_idle_mask); ...@@ -5335,8 +5334,8 @@ DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
* *
* load' = (1 - 1/2^i) * load + (1/2^i) * cur_load * load' = (1 - 1/2^i) * load + (1/2^i) * cur_load
* *
* If a cpu misses updates for n ticks (as it was idle) and update gets * If a CPU misses updates for n ticks (as it was idle) and update gets
* called on the n+1-th tick when cpu may be busy, then we have: * called on the n+1-th tick when CPU may be busy, then we have:
* *
* load_n = (1 - 1/2^i)^n * load_0 * load_n = (1 - 1/2^i)^n * load_0
* load_n+1 = (1 - 1/2^i) * load_n + (1/2^i) * cur_load * load_n+1 = (1 - 1/2^i) * load_n + (1/2^i) * cur_load
...@@ -5480,7 +5479,7 @@ static unsigned long weighted_cpuload(struct rq *rq) ...@@ -5480,7 +5479,7 @@ static unsigned long weighted_cpuload(struct rq *rq)
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
/* /*
* There is no sane way to deal with nohz on smp when using jiffies because the * There is no sane way to deal with nohz on smp when using jiffies because the
* cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading * CPU doing the jiffies update might drift wrt the CPU doing the jiffy reading
* causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
* *
* Therefore we need to avoid the delta approach from the regular tick when * Therefore we need to avoid the delta approach from the regular tick when
...@@ -5591,7 +5590,7 @@ void cpu_load_update_active(struct rq *this_rq) ...@@ -5591,7 +5590,7 @@ void cpu_load_update_active(struct rq *this_rq)
} }
/* /*
* Return a low guess at the load of a migration-source cpu weighted * Return a low guess at the load of a migration-source CPU weighted
* according to the scheduling class and "nice" value. * according to the scheduling class and "nice" value.
* *
* We want to under-estimate the load of migration sources, to * We want to under-estimate the load of migration sources, to
...@@ -5609,7 +5608,7 @@ static unsigned long source_load(int cpu, int type) ...@@ -5609,7 +5608,7 @@ static unsigned long source_load(int cpu, int type)
} }
/* /*
* Return a high guess at the load of a migration-target cpu weighted * Return a high guess at the load of a migration-target CPU weighted
* according to the scheduling class and "nice" value. * according to the scheduling class and "nice" value.
*/ */
static unsigned long target_load(int cpu, int type) static unsigned long target_load(int cpu, int type)
...@@ -5889,7 +5888,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, ...@@ -5889,7 +5888,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
max_spare_cap = 0; max_spare_cap = 0;
for_each_cpu(i, sched_group_span(group)) { for_each_cpu(i, sched_group_span(group)) {
/* Bias balancing toward cpus of our domain */ /* Bias balancing toward CPUs of our domain */
if (local_group) if (local_group)
load = source_load(i, load_idx); load = source_load(i, load_idx);
else else
...@@ -5919,7 +5918,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, ...@@ -5919,7 +5918,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
if (min_runnable_load > (runnable_load + imbalance)) { if (min_runnable_load > (runnable_load + imbalance)) {
/* /*
* The runnable load is significantly smaller * The runnable load is significantly smaller
* so we can pick this new cpu * so we can pick this new CPU:
*/ */
min_runnable_load = runnable_load; min_runnable_load = runnable_load;
min_avg_load = avg_load; min_avg_load = avg_load;
...@@ -5928,7 +5927,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, ...@@ -5928,7 +5927,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
(100*min_avg_load > imbalance_scale*avg_load)) { (100*min_avg_load > imbalance_scale*avg_load)) {
/* /*
* The runnable loads are close so take the * The runnable loads are close so take the
* blocked load into account through avg_load. * blocked load into account through avg_load:
*/ */
min_avg_load = avg_load; min_avg_load = avg_load;
idlest = group; idlest = group;
...@@ -5989,7 +5988,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, ...@@ -5989,7 +5988,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
} }
/* /*
* find_idlest_group_cpu - find the idlest cpu among the cpus in group. * find_idlest_group_cpu - find the idlest CPU among the CPUs in the group.
*/ */
static int static int
find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
...@@ -6067,12 +6066,12 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p ...@@ -6067,12 +6066,12 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
new_cpu = find_idlest_group_cpu(group, p, cpu); new_cpu = find_idlest_group_cpu(group, p, cpu);
if (new_cpu == cpu) { if (new_cpu == cpu) {
/* Now try balancing at a lower domain level of cpu */ /* Now try balancing at a lower domain level of 'cpu': */
sd = sd->child; sd = sd->child;
continue; continue;
} }
/* Now try balancing at a lower domain level of new_cpu */ /* Now try balancing at a lower domain level of 'new_cpu': */
cpu = new_cpu; cpu = new_cpu;
weight = sd->span_weight; weight = sd->span_weight;
sd = NULL; sd = NULL;
...@@ -6082,7 +6081,6 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p ...@@ -6082,7 +6081,6 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
if (tmp->flags & sd_flag) if (tmp->flags & sd_flag)
sd = tmp; sd = tmp;
} }
/* while loop will break here if sd == NULL */
} }
return new_cpu; return new_cpu;
...@@ -6278,12 +6276,12 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) ...@@ -6278,12 +6276,12 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
return target; return target;
/* /*
* If the previous cpu is cache affine and idle, don't be stupid. * If the previous CPU is cache affine and idle, don't be stupid:
*/ */
if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev)) if (prev != target && cpus_share_cache(prev, target) && idle_cpu(prev))
return prev; return prev;
/* Check a recently used CPU as a potential idle candidate */ /* Check a recently used CPU as a potential idle candidate: */
recent_used_cpu = p->recent_used_cpu; recent_used_cpu = p->recent_used_cpu;
if (recent_used_cpu != prev && if (recent_used_cpu != prev &&
recent_used_cpu != target && recent_used_cpu != target &&
...@@ -6292,7 +6290,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) ...@@ -6292,7 +6290,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) { cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) {
/* /*
* Replace recent_used_cpu with prev as it is a potential * Replace recent_used_cpu with prev as it is a potential
* candidate for the next wake. * candidate for the next wake:
*/ */
p->recent_used_cpu = prev; p->recent_used_cpu = prev;
return recent_used_cpu; return recent_used_cpu;
...@@ -6357,7 +6355,7 @@ static inline unsigned long task_util(struct task_struct *p) ...@@ -6357,7 +6355,7 @@ static inline unsigned long task_util(struct task_struct *p)
} }
/* /*
* cpu_util_wake: Compute cpu utilization with any contributions from * cpu_util_wake: Compute CPU utilization with any contributions from
* the waking task p removed. * the waking task p removed.
*/ */
static unsigned long cpu_util_wake(int cpu, struct task_struct *p) static unsigned long cpu_util_wake(int cpu, struct task_struct *p)
...@@ -6403,10 +6401,10 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) ...@@ -6403,10 +6401,10 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
* that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE, * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
* SD_BALANCE_FORK, or SD_BALANCE_EXEC. * SD_BALANCE_FORK, or SD_BALANCE_EXEC.
* *
* Balances load by selecting the idlest cpu in the idlest group, or under * Balances load by selecting the idlest CPU in the idlest group, or under
* certain conditions an idle sibling cpu if the domain has SD_WAKE_AFFINE set. * certain conditions an idle sibling CPU if the domain has SD_WAKE_AFFINE set.
* *
* Returns the target cpu number. * Returns the target CPU number.
* *
* preempt must be disabled. * preempt must be disabled.
*/ */
...@@ -6431,7 +6429,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f ...@@ -6431,7 +6429,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
break; break;
/* /*
* If both cpu and prev_cpu are part of this domain, * If both 'cpu' and 'prev_cpu' are part of this domain,
* cpu is a valid SD_WAKE_AFFINE target. * cpu is a valid SD_WAKE_AFFINE target.
*/ */
if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
...@@ -6482,9 +6480,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f ...@@ -6482,9 +6480,9 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
static void detach_entity_cfs_rq(struct sched_entity *se); static void detach_entity_cfs_rq(struct sched_entity *se);
/* /*
* Called immediately before a task is migrated to a new cpu; task_cpu(p) and * Called immediately before a task is migrated to a new CPU; task_cpu(p) and
* cfs_rq_of(p) references at time of call are still valid and identify the * cfs_rq_of(p) references at time of call are still valid and identify the
* previous cpu. The caller guarantees p->pi_lock or task_rq(p)->lock is held. * previous CPU. The caller guarantees p->pi_lock or task_rq(p)->lock is held.
*/ */
static void migrate_task_rq_fair(struct task_struct *p) static void migrate_task_rq_fair(struct task_struct *p)
{ {
...@@ -6918,17 +6916,17 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp ...@@ -6918,17 +6916,17 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
* BASICS * BASICS
* *
* The purpose of load-balancing is to achieve the same basic fairness the * The purpose of load-balancing is to achieve the same basic fairness the
* per-cpu scheduler provides, namely provide a proportional amount of compute * per-CPU scheduler provides, namely provide a proportional amount of compute
* time to each task. This is expressed in the following equation: * time to each task. This is expressed in the following equation:
* *
* W_i,n/P_i == W_j,n/P_j for all i,j (1) * W_i,n/P_i == W_j,n/P_j for all i,j (1)
* *
* Where W_i,n is the n-th weight average for cpu i. The instantaneous weight * Where W_i,n is the n-th weight average for CPU i. The instantaneous weight
* W_i,0 is defined as: * W_i,0 is defined as:
* *
* W_i,0 = \Sum_j w_i,j (2) * W_i,0 = \Sum_j w_i,j (2)
* *
* Where w_i,j is the weight of the j-th runnable task on cpu i. This weight * Where w_i,j is the weight of the j-th runnable task on CPU i. This weight
* is derived from the nice value as per sched_prio_to_weight[]. * is derived from the nice value as per sched_prio_to_weight[].
* *
* The weight average is an exponential decay average of the instantaneous * The weight average is an exponential decay average of the instantaneous
...@@ -6936,7 +6934,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp ...@@ -6936,7 +6934,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
* *
* W'_i,n = (2^n - 1) / 2^n * W_i,n + 1 / 2^n * W_i,0 (3) * W'_i,n = (2^n - 1) / 2^n * W_i,n + 1 / 2^n * W_i,0 (3)
* *
* C_i is the compute capacity of cpu i, typically it is the * C_i is the compute capacity of CPU i, typically it is the
* fraction of 'recent' time available for SCHED_OTHER task execution. But it * fraction of 'recent' time available for SCHED_OTHER task execution. But it
* can also include other factors [XXX]. * can also include other factors [XXX].
* *
...@@ -6957,11 +6955,11 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp ...@@ -6957,11 +6955,11 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
* SCHED DOMAINS * SCHED DOMAINS
* *
* In order to solve the imbalance equation (4), and avoid the obvious O(n^2) * In order to solve the imbalance equation (4), and avoid the obvious O(n^2)
* for all i,j solution, we create a tree of cpus that follows the hardware * for all i,j solution, we create a tree of CPUs that follows the hardware
* topology where each level pairs two lower groups (or better). This results * topology where each level pairs two lower groups (or better). This results
* in O(log n) layers. Furthermore we reduce the number of cpus going up the * in O(log n) layers. Furthermore we reduce the number of CPUs going up the
* tree to only the first of the previous level and we decrease the frequency * tree to only the first of the previous level and we decrease the frequency
* of load-balance at each level inv. proportional to the number of cpus in * of load-balance at each level inv. proportional to the number of CPUs in
* the groups. * the groups.
* *
* This yields: * This yields:
...@@ -6970,7 +6968,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp ...@@ -6970,7 +6968,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
* \Sum { --- * --- * 2^i } = O(n) (5) * \Sum { --- * --- * 2^i } = O(n) (5)
* i = 0 2^i 2^i * i = 0 2^i 2^i
* `- size of each group * `- size of each group
* | | `- number of cpus doing load-balance * | | `- number of CPUs doing load-balance
* | `- freq * | `- freq
* `- sum over all levels * `- sum over all levels
* *
...@@ -6978,7 +6976,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp ...@@ -6978,7 +6976,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
* this makes (5) the runtime complexity of the balancer. * this makes (5) the runtime complexity of the balancer.
* *
* An important property here is that each CPU is still (indirectly) connected * An important property here is that each CPU is still (indirectly) connected
* to every other cpu in at most O(log n) steps: * to every other CPU in at most O(log n) steps:
* *
* The adjacency matrix of the resulting graph is given by: * The adjacency matrix of the resulting graph is given by:
* *
...@@ -6990,7 +6988,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp ...@@ -6990,7 +6988,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
* *
* A^(log_2 n)_i,j != 0 for all i,j (7) * A^(log_2 n)_i,j != 0 for all i,j (7)
* *
* Showing there's indeed a path between every cpu in at most O(log n) steps. * Showing there's indeed a path between every CPU in at most O(log n) steps.
* The task movement gives a factor of O(m), giving a convergence complexity * The task movement gives a factor of O(m), giving a convergence complexity
* of: * of:
* *
...@@ -7000,7 +6998,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp ...@@ -7000,7 +6998,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
* WORK CONSERVING * WORK CONSERVING
* *
* In order to avoid CPUs going idle while there's still work to do, new idle * In order to avoid CPUs going idle while there's still work to do, new idle
* balancing is more aggressive and has the newly idle cpu iterate up the domain * balancing is more aggressive and has the newly idle CPU iterate up the domain
* tree itself instead of relying on other CPUs to bring it work. * tree itself instead of relying on other CPUs to bring it work.
* *
* This adds some complexity to both (5) and (8) but it reduces the total idle * This adds some complexity to both (5) and (8) but it reduces the total idle
...@@ -7021,7 +7019,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp ...@@ -7021,7 +7019,7 @@ static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preemp
* *
* s_k,i = \Sum_j w_i,j,k and S_k = \Sum_i s_k,i (10) * s_k,i = \Sum_j w_i,j,k and S_k = \Sum_i s_k,i (10)
* *
* w_i,j,k is the weight of the j-th runnable task in the k-th cgroup on cpu i. * w_i,j,k is the weight of the j-th runnable task in the k-th cgroup on CPU i.
* *
* The big problem is S_k, its a global sum needed to compute a local (W_i) * The big problem is S_k, its a global sum needed to compute a local (W_i)
* property. * property.
...@@ -7185,7 +7183,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) ...@@ -7185,7 +7183,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
env->flags |= LBF_SOME_PINNED; env->flags |= LBF_SOME_PINNED;
/* /*
* Remember if this task can be migrated to any other cpu in * Remember if this task can be migrated to any other CPU in
* our sched_group. We may want to revisit it if we couldn't * our sched_group. We may want to revisit it if we couldn't
* meet load balance goals by pulling other tasks on src_cpu. * meet load balance goals by pulling other tasks on src_cpu.
* *
...@@ -7195,7 +7193,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) ...@@ -7195,7 +7193,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
if (env->idle == CPU_NEWLY_IDLE || (env->flags & LBF_DST_PINNED)) if (env->idle == CPU_NEWLY_IDLE || (env->flags & LBF_DST_PINNED))
return 0; return 0;
/* Prevent to re-select dst_cpu via env's cpus */ /* Prevent to re-select dst_cpu via env's CPUs: */
for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) { for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
if (cpumask_test_cpu(cpu, &p->cpus_allowed)) { if (cpumask_test_cpu(cpu, &p->cpus_allowed)) {
env->flags |= LBF_DST_PINNED; env->flags |= LBF_DST_PINNED;
...@@ -7769,8 +7767,8 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd) ...@@ -7769,8 +7767,8 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
* Group imbalance indicates (and tries to solve) the problem where balancing * Group imbalance indicates (and tries to solve) the problem where balancing
* groups is inadequate due to ->cpus_allowed constraints. * groups is inadequate due to ->cpus_allowed constraints.
* *
* Imagine a situation of two groups of 4 cpus each and 4 tasks each with a * Imagine a situation of two groups of 4 CPUs each and 4 tasks each with a
* cpumask covering 1 cpu of the first group and 3 cpus of the second group. * cpumask covering 1 CPU of the first group and 3 CPUs of the second group.
* Something like: * Something like:
* *
* { 0 1 2 3 } { 4 5 6 7 } * { 0 1 2 3 } { 4 5 6 7 }
...@@ -7778,7 +7776,7 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd) ...@@ -7778,7 +7776,7 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
* *
* If we were to balance group-wise we'd place two tasks in the first group and * If we were to balance group-wise we'd place two tasks in the first group and
* two tasks in the second group. Clearly this is undesired as it will overload * two tasks in the second group. Clearly this is undesired as it will overload
* cpu 3 and leave one of the cpus in the second group unused. * cpu 3 and leave one of the CPUs in the second group unused.
* *
* The current solution to this issue is detecting the skew in the first group * The current solution to this issue is detecting the skew in the first group
* by noticing the lower domain failed to reach balance and had difficulty * by noticing the lower domain failed to reach balance and had difficulty
...@@ -7891,7 +7889,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, ...@@ -7891,7 +7889,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
for_each_cpu_and(i, sched_group_span(group), env->cpus) { for_each_cpu_and(i, sched_group_span(group), env->cpus) {
struct rq *rq = cpu_rq(i); struct rq *rq = cpu_rq(i);
/* Bias balancing toward cpus of our domain */ /* Bias balancing toward CPUs of our domain: */
if (local_group) if (local_group)
load = target_load(i, load_idx); load = target_load(i, load_idx);
else else
...@@ -7977,7 +7975,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, ...@@ -7977,7 +7975,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
if (!(env->sd->flags & SD_ASYM_PACKING)) if (!(env->sd->flags & SD_ASYM_PACKING))
return true; return true;
/* No ASYM_PACKING if target cpu is already busy */ /* No ASYM_PACKING if target CPU is already busy */
if (env->idle == CPU_NOT_IDLE) if (env->idle == CPU_NOT_IDLE)
return true; return true;
/* /*
...@@ -7990,7 +7988,7 @@ static bool update_sd_pick_busiest(struct lb_env *env, ...@@ -7990,7 +7988,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
if (!sds->busiest) if (!sds->busiest)
return true; return true;
/* Prefer to move from lowest priority cpu's work */ /* Prefer to move from lowest priority CPU's work */
if (sched_asym_prefer(sds->busiest->asym_prefer_cpu, if (sched_asym_prefer(sds->busiest->asym_prefer_cpu,
sg->asym_prefer_cpu)) sg->asym_prefer_cpu))
return true; return true;
...@@ -8243,7 +8241,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s ...@@ -8243,7 +8241,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
if (busiest->group_type == group_imbalanced) { if (busiest->group_type == group_imbalanced) {
/* /*
* In the group_imb case we cannot rely on group-wide averages * In the group_imb case we cannot rely on group-wide averages
* to ensure cpu-load equilibrium, look at wider averages. XXX * to ensure CPU-load equilibrium, look at wider averages. XXX
*/ */
busiest->load_per_task = busiest->load_per_task =
min(busiest->load_per_task, sds->avg_load); min(busiest->load_per_task, sds->avg_load);
...@@ -8262,7 +8260,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s ...@@ -8262,7 +8260,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
} }
/* /*
* If there aren't any idle cpus, avoid creating some. * If there aren't any idle CPUs, avoid creating some.
*/ */
if (busiest->group_type == group_overloaded && if (busiest->group_type == group_overloaded &&
local->group_type == group_overloaded) { local->group_type == group_overloaded) {
...@@ -8276,9 +8274,9 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s ...@@ -8276,9 +8274,9 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
} }
/* /*
* We're trying to get all the cpus to the average_load, so we don't * We're trying to get all the CPUs to the average_load, so we don't
* want to push ourselves above the average load, nor do we wish to * want to push ourselves above the average load, nor do we wish to
* reduce the max loaded cpu below the average load. At the same time, * reduce the max loaded CPU below the average load. At the same time,
* we also don't want to reduce the group load below the group * we also don't want to reduce the group load below the group
* capacity. Thus we look for the minimum possible imbalance. * capacity. Thus we look for the minimum possible imbalance.
*/ */
...@@ -8372,9 +8370,9 @@ static struct sched_group *find_busiest_group(struct lb_env *env) ...@@ -8372,9 +8370,9 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
if (env->idle == CPU_IDLE) { if (env->idle == CPU_IDLE) {
/* /*
* This cpu is idle. If the busiest group is not overloaded * This CPU is idle. If the busiest group is not overloaded
* and there is no imbalance between this and busiest group * and there is no imbalance between this and busiest group
* wrt idle cpus, it is balanced. The imbalance becomes * wrt idle CPUs, it is balanced. The imbalance becomes
* significant if the diff is greater than 1 otherwise we * significant if the diff is greater than 1 otherwise we
* might end up to just move the imbalance on another group * might end up to just move the imbalance on another group
*/ */
...@@ -8402,7 +8400,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) ...@@ -8402,7 +8400,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
} }
/* /*
* find_busiest_queue - find the busiest runqueue among the cpus in group. * find_busiest_queue - find the busiest runqueue among the CPUs in the group.
*/ */
static struct rq *find_busiest_queue(struct lb_env *env, static struct rq *find_busiest_queue(struct lb_env *env,
struct sched_group *group) struct sched_group *group)
...@@ -8446,7 +8444,7 @@ static struct rq *find_busiest_queue(struct lb_env *env, ...@@ -8446,7 +8444,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
/* /*
* When comparing with imbalance, use weighted_cpuload() * When comparing with imbalance, use weighted_cpuload()
* which is not scaled with the cpu capacity. * which is not scaled with the CPU capacity.
*/ */
if (rq->nr_running == 1 && wl > env->imbalance && if (rq->nr_running == 1 && wl > env->imbalance &&
...@@ -8454,9 +8452,9 @@ static struct rq *find_busiest_queue(struct lb_env *env, ...@@ -8454,9 +8452,9 @@ static struct rq *find_busiest_queue(struct lb_env *env,
continue; continue;
/* /*
* For the load comparisons with the other cpu's, consider * For the load comparisons with the other CPU's, consider
* the weighted_cpuload() scaled with the cpu capacity, so * the weighted_cpuload() scaled with the CPU capacity, so
* that the load can be moved away from the cpu that is * that the load can be moved away from the CPU that is
* potentially running at a lower capacity. * potentially running at a lower capacity.
* *
* Thus we're looking for max(wl_i / capacity_i), crosswise * Thus we're looking for max(wl_i / capacity_i), crosswise
...@@ -8527,13 +8525,13 @@ static int should_we_balance(struct lb_env *env) ...@@ -8527,13 +8525,13 @@ static int should_we_balance(struct lb_env *env)
return 0; return 0;
/* /*
* In the newly idle case, we will allow all the cpu's * In the newly idle case, we will allow all the CPUs
* to do the newly idle load balance. * to do the newly idle load balance.
*/ */
if (env->idle == CPU_NEWLY_IDLE) if (env->idle == CPU_NEWLY_IDLE)
return 1; return 1;
/* Try to find first idle cpu */ /* Try to find first idle CPU */
for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) { for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) {
if (!idle_cpu(cpu)) if (!idle_cpu(cpu))
continue; continue;
...@@ -8546,7 +8544,7 @@ static int should_we_balance(struct lb_env *env) ...@@ -8546,7 +8544,7 @@ static int should_we_balance(struct lb_env *env)
balance_cpu = group_balance_cpu(sg); balance_cpu = group_balance_cpu(sg);
/* /*
* First idle cpu or the first cpu(busiest) in this sched group * First idle CPU or the first CPU(busiest) in this sched group
* is eligible for doing load balancing at this and above domains. * is eligible for doing load balancing at this and above domains.
*/ */
return balance_cpu == env->dst_cpu; return balance_cpu == env->dst_cpu;
...@@ -8655,7 +8653,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, ...@@ -8655,7 +8653,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
* Revisit (affine) tasks on src_cpu that couldn't be moved to * Revisit (affine) tasks on src_cpu that couldn't be moved to
* us and move them to an alternate dst_cpu in our sched_group * us and move them to an alternate dst_cpu in our sched_group
* where they can run. The upper limit on how many times we * where they can run. The upper limit on how many times we
* iterate on same src_cpu is dependent on number of cpus in our * iterate on same src_cpu is dependent on number of CPUs in our
* sched_group. * sched_group.
* *
* This changes load balance semantics a bit on who can move * This changes load balance semantics a bit on who can move
...@@ -8672,7 +8670,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, ...@@ -8672,7 +8670,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
*/ */
if ((env.flags & LBF_DST_PINNED) && env.imbalance > 0) { if ((env.flags & LBF_DST_PINNED) && env.imbalance > 0) {
/* Prevent to re-select dst_cpu via env's cpus */ /* Prevent to re-select dst_cpu via env's CPUs */
cpumask_clear_cpu(env.dst_cpu, env.cpus); cpumask_clear_cpu(env.dst_cpu, env.cpus);
env.dst_rq = cpu_rq(env.new_dst_cpu); env.dst_rq = cpu_rq(env.new_dst_cpu);
...@@ -8734,9 +8732,10 @@ static int load_balance(int this_cpu, struct rq *this_rq, ...@@ -8734,9 +8732,10 @@ static int load_balance(int this_cpu, struct rq *this_rq,
raw_spin_lock_irqsave(&busiest->lock, flags); raw_spin_lock_irqsave(&busiest->lock, flags);
/* don't kick the active_load_balance_cpu_stop, /*
* if the curr task on busiest cpu can't be * Don't kick the active_load_balance_cpu_stop,
* moved to this_cpu * if the curr task on busiest CPU can't be
* moved to this_cpu:
*/ */
if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) { if (!cpumask_test_cpu(this_cpu, &busiest->curr->cpus_allowed)) {
raw_spin_unlock_irqrestore(&busiest->lock, raw_spin_unlock_irqrestore(&busiest->lock,
...@@ -8962,7 +8961,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf) ...@@ -8962,7 +8961,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf)
} }
/* /*
* active_load_balance_cpu_stop is run by cpu stopper. It pushes * active_load_balance_cpu_stop is run by the CPU stopper. It pushes
* running tasks off the busiest CPU onto idle CPUs. It requires at * running tasks off the busiest CPU onto idle CPUs. It requires at
* least 1 task to be running on each physical CPU where possible, and * least 1 task to be running on each physical CPU where possible, and
* avoids physical / logical imbalances. * avoids physical / logical imbalances.
...@@ -8986,7 +8985,7 @@ static int active_load_balance_cpu_stop(void *data) ...@@ -8986,7 +8985,7 @@ static int active_load_balance_cpu_stop(void *data)
if (!cpu_active(busiest_cpu) || !cpu_active(target_cpu)) if (!cpu_active(busiest_cpu) || !cpu_active(target_cpu))
goto out_unlock; goto out_unlock;
/* make sure the requested cpu hasn't gone down in the meantime */ /* Make sure the requested CPU hasn't gone down in the meantime: */
if (unlikely(busiest_cpu != smp_processor_id() || if (unlikely(busiest_cpu != smp_processor_id() ||
!busiest_rq->active_balance)) !busiest_rq->active_balance))
goto out_unlock; goto out_unlock;
...@@ -8998,7 +8997,7 @@ static int active_load_balance_cpu_stop(void *data) ...@@ -8998,7 +8997,7 @@ static int active_load_balance_cpu_stop(void *data)
/* /*
* This condition is "impossible", if it occurs * This condition is "impossible", if it occurs
* we need to fix it. Originally reported by * we need to fix it. Originally reported by
* Bjorn Helgaas on a 128-cpu setup. * Bjorn Helgaas on a 128-CPU setup.
*/ */
BUG_ON(busiest_rq == target_rq); BUG_ON(busiest_rq == target_rq);
...@@ -9100,7 +9099,7 @@ static void nohz_balancer_kick(void) ...@@ -9100,7 +9099,7 @@ static void nohz_balancer_kick(void)
return; return;
/* /*
* Use smp_send_reschedule() instead of resched_cpu(). * Use smp_send_reschedule() instead of resched_cpu().
* This way we generate a sched IPI on the target cpu which * This way we generate a sched IPI on the target CPU which
* is idle. And the softirq performing nohz idle load balance * is idle. And the softirq performing nohz idle load balance
* will be run before returning from the IPI. * will be run before returning from the IPI.
*/ */
...@@ -9157,14 +9156,12 @@ void set_cpu_sd_state_idle(void) ...@@ -9157,14 +9156,12 @@ void set_cpu_sd_state_idle(void)
} }
/* /*
* This routine will record that the cpu is going idle with tick stopped. * This routine will record that the CPU is going idle with tick stopped.
* This info will be used in performing idle load balancing in the future. * This info will be used in performing idle load balancing in the future.
*/ */
void nohz_balance_enter_idle(int cpu) void nohz_balance_enter_idle(int cpu)
{ {
/* /* If this CPU is going down, then nothing needs to be done: */
* If this cpu is going down, then nothing needs to be done.
*/
if (!cpu_active(cpu)) if (!cpu_active(cpu))
return; return;
...@@ -9175,9 +9172,7 @@ void nohz_balance_enter_idle(int cpu) ...@@ -9175,9 +9172,7 @@ void nohz_balance_enter_idle(int cpu)
if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu))) if (test_bit(NOHZ_TICK_STOPPED, nohz_flags(cpu)))
return; return;
/* /* If we're a completely isolated CPU, we don't play: */
* If we're a completely isolated CPU, we don't play.
*/
if (on_null_domain(cpu_rq(cpu))) if (on_null_domain(cpu_rq(cpu)))
return; return;
...@@ -9286,7 +9281,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) ...@@ -9286,7 +9281,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
/* /*
* next_balance will be updated only when there is a need. * next_balance will be updated only when there is a need.
* When the cpu is attached to null domain for ex, it will not be * When the CPU is attached to null domain for ex, it will not be
* updated. * updated.
*/ */
if (likely(update_next_balance)) { if (likely(update_next_balance)) {
...@@ -9310,7 +9305,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle) ...@@ -9310,7 +9305,7 @@ static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
/* /*
* In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the
* rebalancing for all the cpus for whom scheduler ticks are stopped. * rebalancing for all the CPUs for whom scheduler ticks are stopped.
*/ */
static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
{ {
...@@ -9330,8 +9325,8 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) ...@@ -9330,8 +9325,8 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
continue; continue;
/* /*
* If this cpu gets work to do, stop the load balancing * If this CPU gets work to do, stop the load balancing
* work being done for other cpus. Next load * work being done for other CPUs. Next load
* balancing owner will pick it up. * balancing owner will pick it up.
*/ */
if (need_resched()) if (need_resched())
...@@ -9373,13 +9368,13 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) ...@@ -9373,13 +9368,13 @@ static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
/* /*
* Current heuristic for kicking the idle load balancer in the presence * Current heuristic for kicking the idle load balancer in the presence
* of an idle cpu in the system. * of an idle CPU in the system.
* - This rq has more than one task. * - This rq has more than one task.
* - This rq has at least one CFS task and the capacity of the CPU is * - This rq has at least one CFS task and the capacity of the CPU is
* significantly reduced because of RT tasks or IRQs. * significantly reduced because of RT tasks or IRQs.
* - At parent of LLC scheduler domain level, this cpu's scheduler group has * - At parent of LLC scheduler domain level, this CPU's scheduler group has
* multiple busy cpu. * multiple busy CPUs.
* - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler * - For SD_ASYM_PACKING, if the lower numbered CPU's in the scheduler
* domain span are idle. * domain span are idle.
*/ */
static inline bool nohz_kick_needed(struct rq *rq) static inline bool nohz_kick_needed(struct rq *rq)
...@@ -9469,10 +9464,10 @@ static __latent_entropy void run_rebalance_domains(struct softirq_action *h) ...@@ -9469,10 +9464,10 @@ static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
CPU_IDLE : CPU_NOT_IDLE; CPU_IDLE : CPU_NOT_IDLE;
/* /*
* If this cpu has a pending nohz_balance_kick, then do the * If this CPU has a pending nohz_balance_kick, then do the
* balancing on behalf of the other idle cpus whose ticks are * balancing on behalf of the other idle CPUs whose ticks are
* stopped. Do nohz_idle_balance *before* rebalance_domains to * stopped. Do nohz_idle_balance *before* rebalance_domains to
* give the idle cpus a chance to load balance. Else we may * give the idle CPUs a chance to load balance. Else we may
* load balance only within the local sched_domain hierarchy * load balance only within the local sched_domain hierarchy
* and abort nohz_idle_balance altogether if we pull some load. * and abort nohz_idle_balance altogether if we pull some load.
*/ */
......
/* /*
* Generic entry point for the idle threads * Generic entry points for the idle threads
*/ */
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/idle.h> #include <linux/sched/idle.h>
...@@ -332,8 +332,8 @@ void cpu_startup_entry(enum cpuhp_state state) ...@@ -332,8 +332,8 @@ void cpu_startup_entry(enum cpuhp_state state)
{ {
/* /*
* This #ifdef needs to die, but it's too late in the cycle to * This #ifdef needs to die, but it's too late in the cycle to
* make this generic (arm and sh have never invoked the canary * make this generic (ARM and SH have never invoked the canary
* init for the non boot cpus!). Will be fixed in 3.11 * init for the non boot CPUs!). Will be fixed in 3.11
*/ */
#ifdef CONFIG_X86 #ifdef CONFIG_X86
/* /*
......
...@@ -14,7 +14,7 @@ select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags) ...@@ -14,7 +14,7 @@ select_task_rq_idle(struct task_struct *p, int cpu, int sd_flag, int flags)
{ {
return task_cpu(p); /* IDLE tasks as never migrated */ return task_cpu(p); /* IDLE tasks as never migrated */
} }
#endif /* CONFIG_SMP */ #endif
/* /*
* Idle tasks are unconditionally rescheduled: * Idle tasks are unconditionally rescheduled:
...@@ -30,6 +30,7 @@ pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf ...@@ -30,6 +30,7 @@ pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
put_prev_task(rq, prev); put_prev_task(rq, prev);
update_idle_core(rq); update_idle_core(rq);
schedstat_inc(rq->sched_goidle); schedstat_inc(rq->sched_goidle);
return rq->idle; return rq->idle;
} }
......
...@@ -6,13 +6,13 @@ ...@@ -6,13 +6,13 @@
* Copyright (C) 2017-2018 SUSE, Frederic Weisbecker * Copyright (C) 2017-2018 SUSE, Frederic Weisbecker
* *
*/ */
#include <linux/sched/isolation.h> #include <linux/sched/isolation.h>
#include <linux/tick.h> #include <linux/tick.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/static_key.h> #include <linux/static_key.h>
#include <linux/ctype.h> #include <linux/ctype.h>
#include "sched.h" #include "sched.h"
DEFINE_STATIC_KEY_FALSE(housekeeping_overriden); DEFINE_STATIC_KEY_FALSE(housekeeping_overriden);
......
...@@ -32,29 +32,29 @@ ...@@ -32,29 +32,29 @@
* Due to a number of reasons the above turns in the mess below: * Due to a number of reasons the above turns in the mess below:
* *
* - for_each_possible_cpu() is prohibitively expensive on machines with * - for_each_possible_cpu() is prohibitively expensive on machines with
* serious number of cpus, therefore we need to take a distributed approach * serious number of CPUs, therefore we need to take a distributed approach
* to calculating nr_active. * to calculating nr_active.
* *
* \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0 * \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0
* = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) } * = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) }
* *
* So assuming nr_active := 0 when we start out -- true per definition, we * So assuming nr_active := 0 when we start out -- true per definition, we
* can simply take per-cpu deltas and fold those into a global accumulate * can simply take per-CPU deltas and fold those into a global accumulate
* to obtain the same result. See calc_load_fold_active(). * to obtain the same result. See calc_load_fold_active().
* *
* Furthermore, in order to avoid synchronizing all per-cpu delta folding * Furthermore, in order to avoid synchronizing all per-CPU delta folding
* across the machine, we assume 10 ticks is sufficient time for every * across the machine, we assume 10 ticks is sufficient time for every
* cpu to have completed this task. * CPU to have completed this task.
* *
* This places an upper-bound on the IRQ-off latency of the machine. Then * This places an upper-bound on the IRQ-off latency of the machine. Then
* again, being late doesn't loose the delta, just wrecks the sample. * again, being late doesn't loose the delta, just wrecks the sample.
* *
* - cpu_rq()->nr_uninterruptible isn't accurately tracked per-cpu because * - cpu_rq()->nr_uninterruptible isn't accurately tracked per-CPU because
* this would add another cross-cpu cacheline miss and atomic operation * this would add another cross-CPU cacheline miss and atomic operation
* to the wakeup path. Instead we increment on whatever cpu the task ran * to the wakeup path. Instead we increment on whatever CPU the task ran
* when it went into uninterruptible state and decrement on whatever cpu * when it went into uninterruptible state and decrement on whatever CPU
* did the wakeup. This means that only the sum of nr_uninterruptible over * did the wakeup. This means that only the sum of nr_uninterruptible over
* all cpus yields the correct result. * all CPUs yields the correct result.
* *
* This covers the NO_HZ=n code, for extra head-aches, see the comment below. * This covers the NO_HZ=n code, for extra head-aches, see the comment below.
*/ */
...@@ -115,11 +115,11 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) ...@@ -115,11 +115,11 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
* Handle NO_HZ for the global load-average. * Handle NO_HZ for the global load-average.
* *
* Since the above described distributed algorithm to compute the global * Since the above described distributed algorithm to compute the global
* load-average relies on per-cpu sampling from the tick, it is affected by * load-average relies on per-CPU sampling from the tick, it is affected by
* NO_HZ. * NO_HZ.
* *
* The basic idea is to fold the nr_active delta into a global NO_HZ-delta upon * The basic idea is to fold the nr_active delta into a global NO_HZ-delta upon
* entering NO_HZ state such that we can include this as an 'extra' cpu delta * entering NO_HZ state such that we can include this as an 'extra' CPU delta
* when we read the global state. * when we read the global state.
* *
* Obviously reality has to ruin such a delightfully simple scheme: * Obviously reality has to ruin such a delightfully simple scheme:
...@@ -146,9 +146,9 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) ...@@ -146,9 +146,9 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
* busy state. * busy state.
* *
* This is solved by pushing the window forward, and thus skipping the * This is solved by pushing the window forward, and thus skipping the
* sample, for this cpu (effectively using the NO_HZ-delta for this cpu which * sample, for this CPU (effectively using the NO_HZ-delta for this CPU which
* was in effect at the time the window opened). This also solves the issue * was in effect at the time the window opened). This also solves the issue
* of having to deal with a cpu having been in NO_HZ for multiple LOAD_FREQ * of having to deal with a CPU having been in NO_HZ for multiple LOAD_FREQ
* intervals. * intervals.
* *
* When making the ILB scale, we should try to pull this in as well. * When making the ILB scale, we should try to pull this in as well.
...@@ -299,7 +299,7 @@ calc_load_n(unsigned long load, unsigned long exp, ...@@ -299,7 +299,7 @@ calc_load_n(unsigned long load, unsigned long exp,
} }
/* /*
* NO_HZ can leave us missing all per-cpu ticks calling * NO_HZ can leave us missing all per-CPU ticks calling
* calc_load_fold_active(), but since a NO_HZ CPU folds its delta into * calc_load_fold_active(), but since a NO_HZ CPU folds its delta into
* calc_load_nohz per calc_load_nohz_start(), all we need to do is fold * calc_load_nohz per calc_load_nohz_start(), all we need to do is fold
* in the pending NO_HZ delta if our NO_HZ period crossed a load cycle boundary. * in the pending NO_HZ delta if our NO_HZ period crossed a load cycle boundary.
...@@ -363,7 +363,7 @@ void calc_global_load(unsigned long ticks) ...@@ -363,7 +363,7 @@ void calc_global_load(unsigned long ticks)
return; return;
/* /*
* Fold the 'old' NO_HZ-delta to include all NO_HZ cpus. * Fold the 'old' NO_HZ-delta to include all NO_HZ CPUs.
*/ */
delta = calc_load_nohz_fold(); delta = calc_load_nohz_fold();
if (delta) if (delta)
......
...@@ -27,18 +27,18 @@ ...@@ -27,18 +27,18 @@
* except MEMBARRIER_CMD_QUERY. * except MEMBARRIER_CMD_QUERY.
*/ */
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE #ifdef CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE
#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \ #define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \
(MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE \ (MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE \
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE) | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE)
#else #else
#define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK 0 #define MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK 0
#endif #endif
#define MEMBARRIER_CMD_BITMASK \ #define MEMBARRIER_CMD_BITMASK \
(MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \ (MEMBARRIER_CMD_GLOBAL | MEMBARRIER_CMD_GLOBAL_EXPEDITED \
| MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \ | MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \
| MEMBARRIER_CMD_PRIVATE_EXPEDITED \ | MEMBARRIER_CMD_PRIVATE_EXPEDITED \
| MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \ | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \
| MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK) | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK)
static void ipi_mb(void *info) static void ipi_mb(void *info)
...@@ -85,6 +85,7 @@ static int membarrier_global_expedited(void) ...@@ -85,6 +85,7 @@ static int membarrier_global_expedited(void)
*/ */
if (cpu == raw_smp_processor_id()) if (cpu == raw_smp_processor_id())
continue; continue;
rcu_read_lock(); rcu_read_lock();
p = task_rcu_dereference(&cpu_rq(cpu)->curr); p = task_rcu_dereference(&cpu_rq(cpu)->curr);
if (p && p->mm && (atomic_read(&p->mm->membarrier_state) & if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
...@@ -188,6 +189,7 @@ static int membarrier_private_expedited(int flags) ...@@ -188,6 +189,7 @@ static int membarrier_private_expedited(int flags)
* rq->curr modification in scheduler. * rq->curr modification in scheduler.
*/ */
smp_mb(); /* exit from system call is not a mb */ smp_mb(); /* exit from system call is not a mb */
return 0; return 0;
} }
...@@ -219,6 +221,7 @@ static int membarrier_register_global_expedited(void) ...@@ -219,6 +221,7 @@ static int membarrier_register_global_expedited(void)
} }
atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY, atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
&mm->membarrier_state); &mm->membarrier_state);
return 0; return 0;
} }
...@@ -253,6 +256,7 @@ static int membarrier_register_private_expedited(int flags) ...@@ -253,6 +256,7 @@ static int membarrier_register_private_expedited(int flags)
synchronize_sched(); synchronize_sched();
} }
atomic_or(state, &mm->membarrier_state); atomic_or(state, &mm->membarrier_state);
return 0; return 0;
} }
......
...@@ -1453,9 +1453,9 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) ...@@ -1453,9 +1453,9 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
return; return;
/* /*
* There appears to be other cpus that can accept * There appear to be other CPUs that can accept
* current and none to run 'p', so lets reschedule * the current task but none can run 'p', so lets reschedule
* to try and push current away: * to try and push the current task away:
*/ */
requeue_task_rt(rq, p, 1); requeue_task_rt(rq, p, 1);
resched_curr(rq); resched_curr(rq);
...@@ -1596,12 +1596,13 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) ...@@ -1596,12 +1596,13 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
if (!task_running(rq, p) && if (!task_running(rq, p) &&
cpumask_test_cpu(cpu, &p->cpus_allowed)) cpumask_test_cpu(cpu, &p->cpus_allowed))
return 1; return 1;
return 0; return 0;
} }
/* /*
* Return the highest pushable rq's task, which is suitable to be executed * Return the highest pushable rq's task, which is suitable to be executed
* on the cpu, NULL otherwise * on the CPU, NULL otherwise
*/ */
static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu)
{ {
...@@ -1639,11 +1640,11 @@ static int find_lowest_rq(struct task_struct *task) ...@@ -1639,11 +1640,11 @@ static int find_lowest_rq(struct task_struct *task)
return -1; /* No targets found */ return -1; /* No targets found */
/* /*
* At this point we have built a mask of cpus representing the * At this point we have built a mask of CPUs representing the
* lowest priority tasks in the system. Now we want to elect * lowest priority tasks in the system. Now we want to elect
* the best one based on our affinity and topology. * the best one based on our affinity and topology.
* *
* We prioritize the last cpu that the task executed on since * We prioritize the last CPU that the task executed on since
* it is most likely cache-hot in that location. * it is most likely cache-hot in that location.
*/ */
if (cpumask_test_cpu(cpu, lowest_mask)) if (cpumask_test_cpu(cpu, lowest_mask))
...@@ -1651,7 +1652,7 @@ static int find_lowest_rq(struct task_struct *task) ...@@ -1651,7 +1652,7 @@ static int find_lowest_rq(struct task_struct *task)
/* /*
* Otherwise, we consult the sched_domains span maps to figure * Otherwise, we consult the sched_domains span maps to figure
* out which cpu is logically closest to our hot cache data. * out which CPU is logically closest to our hot cache data.
*/ */
if (!cpumask_test_cpu(this_cpu, lowest_mask)) if (!cpumask_test_cpu(this_cpu, lowest_mask))
this_cpu = -1; /* Skip this_cpu opt if not among lowest */ this_cpu = -1; /* Skip this_cpu opt if not among lowest */
...@@ -1692,6 +1693,7 @@ static int find_lowest_rq(struct task_struct *task) ...@@ -1692,6 +1693,7 @@ static int find_lowest_rq(struct task_struct *task)
cpu = cpumask_any(lowest_mask); cpu = cpumask_any(lowest_mask);
if (cpu < nr_cpu_ids) if (cpu < nr_cpu_ids)
return cpu; return cpu;
return -1; return -1;
} }
...@@ -1827,7 +1829,7 @@ static int push_rt_task(struct rq *rq) ...@@ -1827,7 +1829,7 @@ static int push_rt_task(struct rq *rq)
* The task hasn't migrated, and is still the next * The task hasn't migrated, and is still the next
* eligible task, but we failed to find a run-queue * eligible task, but we failed to find a run-queue
* to push it to. Do not retry in this case, since * to push it to. Do not retry in this case, since
* other cpus will pull from us when ready. * other CPUs will pull from us when ready.
*/ */
goto out; goto out;
} }
...@@ -1919,7 +1921,7 @@ static int rto_next_cpu(struct root_domain *rd) ...@@ -1919,7 +1921,7 @@ static int rto_next_cpu(struct root_domain *rd)
* rt_next_cpu() will simply return the first CPU found in * rt_next_cpu() will simply return the first CPU found in
* the rto_mask. * the rto_mask.
* *
* If rto_next_cpu() is called with rto_cpu is a valid cpu, it * If rto_next_cpu() is called with rto_cpu is a valid CPU, it
* will return the next CPU found in the rto_mask. * will return the next CPU found in the rto_mask.
* *
* If there are no more CPUs left in the rto_mask, then a check is made * If there are no more CPUs left in the rto_mask, then a check is made
...@@ -1980,7 +1982,7 @@ static void tell_cpu_to_push(struct rq *rq) ...@@ -1980,7 +1982,7 @@ static void tell_cpu_to_push(struct rq *rq)
raw_spin_lock(&rq->rd->rto_lock); raw_spin_lock(&rq->rd->rto_lock);
/* /*
* The rto_cpu is updated under the lock, if it has a valid cpu * The rto_cpu is updated under the lock, if it has a valid CPU
* then the IPI is still running and will continue due to the * then the IPI is still running and will continue due to the
* update to loop_next, and nothing needs to be done here. * update to loop_next, and nothing needs to be done here.
* Otherwise it is finishing up and an ipi needs to be sent. * Otherwise it is finishing up and an ipi needs to be sent.
...@@ -2105,7 +2107,7 @@ static void pull_rt_task(struct rq *this_rq) ...@@ -2105,7 +2107,7 @@ static void pull_rt_task(struct rq *this_rq)
/* /*
* There's a chance that p is higher in priority * There's a chance that p is higher in priority
* than what's currently running on its cpu. * than what's currently running on its CPU.
* This is just that p is wakeing up and hasn't * This is just that p is wakeing up and hasn't
* had a chance to schedule. We only pull * had a chance to schedule. We only pull
* p if it is lower in priority than the * p if it is lower in priority than the
...@@ -2693,6 +2695,7 @@ int sched_rr_handler(struct ctl_table *table, int write, ...@@ -2693,6 +2695,7 @@ int sched_rr_handler(struct ctl_table *table, int write,
msecs_to_jiffies(sysctl_sched_rr_timeslice); msecs_to_jiffies(sysctl_sched_rr_timeslice);
} }
mutex_unlock(&mutex); mutex_unlock(&mutex);
return ret; return ret;
} }
......
/* SPDX-License-Identifier: GPL-2.0 */ /* SPDX-License-Identifier: GPL-2.0 */
/*
* Scheduler internal types and methods:
*/
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/autogroup.h> #include <linux/sched/autogroup.h>
#include <linux/sched/sysctl.h> #include <linux/sched/sysctl.h>
...@@ -79,11 +81,11 @@ static inline void cpu_load_update_active(struct rq *this_rq) { } ...@@ -79,11 +81,11 @@ static inline void cpu_load_update_active(struct rq *this_rq) { }
* and does not change the user-interface for setting shares/weights. * and does not change the user-interface for setting shares/weights.
* *
* We increase resolution only if we have enough bits to allow this increased * We increase resolution only if we have enough bits to allow this increased
* resolution (i.e. 64bit). The costs for increasing resolution when 32bit are * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
* pretty high and the returns do not justify the increased costs. * are pretty high and the returns do not justify the increased costs.
* *
* Really only required when CONFIG_FAIR_GROUP_SCHED is also set, but to * Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to
* increase coverage and consistency always enable it on 64bit platforms. * increase coverage and consistency always enable it on 64-bit platforms.
*/ */
#ifdef CONFIG_64BIT #ifdef CONFIG_64BIT
# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT) # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
...@@ -111,16 +113,12 @@ static inline void cpu_load_update_active(struct rq *this_rq) { } ...@@ -111,16 +113,12 @@ static inline void cpu_load_update_active(struct rq *this_rq) { }
* 10 -> just above 1us * 10 -> just above 1us
* 9 -> just above 0.5us * 9 -> just above 0.5us
*/ */
#define DL_SCALE (10) #define DL_SCALE 10
/*
* These are the 'tuning knobs' of the scheduler:
*/
/* /*
* single value that denotes runtime == period, ie unlimited time. * Single value that denotes runtime == period, ie unlimited time.
*/ */
#define RUNTIME_INF ((u64)~0ULL) #define RUNTIME_INF ((u64)~0ULL)
static inline int idle_policy(int policy) static inline int idle_policy(int policy)
{ {
...@@ -235,9 +233,9 @@ void __dl_clear_params(struct task_struct *p); ...@@ -235,9 +233,9 @@ void __dl_clear_params(struct task_struct *p);
* control. * control.
*/ */
struct dl_bandwidth { struct dl_bandwidth {
raw_spinlock_t dl_runtime_lock; raw_spinlock_t dl_runtime_lock;
u64 dl_runtime; u64 dl_runtime;
u64 dl_period; u64 dl_period;
}; };
static inline int dl_bandwidth_enabled(void) static inline int dl_bandwidth_enabled(void)
...@@ -246,8 +244,9 @@ static inline int dl_bandwidth_enabled(void) ...@@ -246,8 +244,9 @@ static inline int dl_bandwidth_enabled(void)
} }
struct dl_bw { struct dl_bw {
raw_spinlock_t lock; raw_spinlock_t lock;
u64 bw, total_bw; u64 bw;
u64 total_bw;
}; };
static inline void __dl_update(struct dl_bw *dl_b, s64 bw); static inline void __dl_update(struct dl_bw *dl_b, s64 bw);
...@@ -273,20 +272,17 @@ bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw) ...@@ -273,20 +272,17 @@ bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw; dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
} }
void dl_change_utilization(struct task_struct *p, u64 new_bw); extern void dl_change_utilization(struct task_struct *p, u64 new_bw);
extern void init_dl_bw(struct dl_bw *dl_b); extern void init_dl_bw(struct dl_bw *dl_b);
extern int sched_dl_global_validate(void); extern int sched_dl_global_validate(void);
extern void sched_dl_do_global(void); extern void sched_dl_do_global(void);
extern int sched_dl_overflow(struct task_struct *p, int policy, extern int sched_dl_overflow(struct task_struct *p, int policy, const struct sched_attr *attr);
const struct sched_attr *attr);
extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr); extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr); extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
extern bool __checkparam_dl(const struct sched_attr *attr); extern bool __checkparam_dl(const struct sched_attr *attr);
extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr); extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
extern int dl_task_can_attach(struct task_struct *p, extern int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
const struct cpumask *cs_cpus_allowed); extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
const struct cpumask *trial);
extern bool dl_cpu_busy(unsigned int cpu); extern bool dl_cpu_busy(unsigned int cpu);
#ifdef CONFIG_CGROUP_SCHED #ifdef CONFIG_CGROUP_SCHED
...@@ -300,32 +296,36 @@ extern struct list_head task_groups; ...@@ -300,32 +296,36 @@ extern struct list_head task_groups;
struct cfs_bandwidth { struct cfs_bandwidth {
#ifdef CONFIG_CFS_BANDWIDTH #ifdef CONFIG_CFS_BANDWIDTH
raw_spinlock_t lock; raw_spinlock_t lock;
ktime_t period; ktime_t period;
u64 quota, runtime; u64 quota;
s64 hierarchical_quota; u64 runtime;
u64 runtime_expires; s64 hierarchical_quota;
u64 runtime_expires;
int idle, period_active;
struct hrtimer period_timer, slack_timer; int idle;
struct list_head throttled_cfs_rq; int period_active;
struct hrtimer period_timer;
/* statistics */ struct hrtimer slack_timer;
int nr_periods, nr_throttled; struct list_head throttled_cfs_rq;
u64 throttled_time;
/* Statistics: */
int nr_periods;
int nr_throttled;
u64 throttled_time;
#endif #endif
}; };
/* task group related information */ /* Task group related information */
struct task_group { struct task_group {
struct cgroup_subsys_state css; struct cgroup_subsys_state css;
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
/* schedulable entities of this group on each cpu */ /* schedulable entities of this group on each CPU */
struct sched_entity **se; struct sched_entity **se;
/* runqueue "owned" by this group on each cpu */ /* runqueue "owned" by this group on each CPU */
struct cfs_rq **cfs_rq; struct cfs_rq **cfs_rq;
unsigned long shares; unsigned long shares;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* /*
...@@ -333,29 +333,29 @@ struct task_group { ...@@ -333,29 +333,29 @@ struct task_group {
* it in its own cacheline separated from the fields above which * it in its own cacheline separated from the fields above which
* will also be accessed at each tick. * will also be accessed at each tick.
*/ */
atomic_long_t load_avg ____cacheline_aligned; atomic_long_t load_avg ____cacheline_aligned;
#endif #endif
#endif #endif
#ifdef CONFIG_RT_GROUP_SCHED #ifdef CONFIG_RT_GROUP_SCHED
struct sched_rt_entity **rt_se; struct sched_rt_entity **rt_se;
struct rt_rq **rt_rq; struct rt_rq **rt_rq;
struct rt_bandwidth rt_bandwidth; struct rt_bandwidth rt_bandwidth;
#endif #endif
struct rcu_head rcu; struct rcu_head rcu;
struct list_head list; struct list_head list;
struct task_group *parent; struct task_group *parent;
struct list_head siblings; struct list_head siblings;
struct list_head children; struct list_head children;
#ifdef CONFIG_SCHED_AUTOGROUP #ifdef CONFIG_SCHED_AUTOGROUP
struct autogroup *autogroup; struct autogroup *autogroup;
#endif #endif
struct cfs_bandwidth cfs_bandwidth; struct cfs_bandwidth cfs_bandwidth;
}; };
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
...@@ -369,8 +369,8 @@ struct task_group { ...@@ -369,8 +369,8 @@ struct task_group {
* (The default weight is 1024 - so there's no practical * (The default weight is 1024 - so there's no practical
* limitation from this.) * limitation from this.)
*/ */
#define MIN_SHARES (1UL << 1) #define MIN_SHARES (1UL << 1)
#define MAX_SHARES (1UL << 18) #define MAX_SHARES (1UL << 18)
#endif #endif
typedef int (*tg_visitor)(struct task_group *, void *); typedef int (*tg_visitor)(struct task_group *, void *);
...@@ -443,35 +443,39 @@ struct cfs_bandwidth { }; ...@@ -443,35 +443,39 @@ struct cfs_bandwidth { };
/* CFS-related fields in a runqueue */ /* CFS-related fields in a runqueue */
struct cfs_rq { struct cfs_rq {
struct load_weight load; struct load_weight load;
unsigned long runnable_weight; unsigned long runnable_weight;
unsigned int nr_running, h_nr_running; unsigned int nr_running;
unsigned int h_nr_running;
u64 exec_clock; u64 exec_clock;
u64 min_vruntime; u64 min_vruntime;
#ifndef CONFIG_64BIT #ifndef CONFIG_64BIT
u64 min_vruntime_copy; u64 min_vruntime_copy;
#endif #endif
struct rb_root_cached tasks_timeline; struct rb_root_cached tasks_timeline;
/* /*
* 'curr' points to currently running entity on this cfs_rq. * 'curr' points to currently running entity on this cfs_rq.
* It is set to NULL otherwise (i.e when none are currently running). * It is set to NULL otherwise (i.e when none are currently running).
*/ */
struct sched_entity *curr, *next, *last, *skip; struct sched_entity *curr;
struct sched_entity *next;
struct sched_entity *last;
struct sched_entity *skip;
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
unsigned int nr_spread_over; unsigned int nr_spread_over;
#endif #endif
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* /*
* CFS load tracking * CFS load tracking
*/ */
struct sched_avg avg; struct sched_avg avg;
#ifndef CONFIG_64BIT #ifndef CONFIG_64BIT
u64 load_last_update_time_copy; u64 load_last_update_time_copy;
#endif #endif
struct { struct {
raw_spinlock_t lock ____cacheline_aligned; raw_spinlock_t lock ____cacheline_aligned;
...@@ -482,9 +486,9 @@ struct cfs_rq { ...@@ -482,9 +486,9 @@ struct cfs_rq {
} removed; } removed;
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
unsigned long tg_load_avg_contrib; unsigned long tg_load_avg_contrib;
long propagate; long propagate;
long prop_runnable_sum; long prop_runnable_sum;
/* /*
* h_load = weight * f(tg) * h_load = weight * f(tg)
...@@ -492,36 +496,38 @@ struct cfs_rq { ...@@ -492,36 +496,38 @@ struct cfs_rq {
* Where f(tg) is the recursive weight fraction assigned to * Where f(tg) is the recursive weight fraction assigned to
* this group. * this group.
*/ */
unsigned long h_load; unsigned long h_load;
u64 last_h_load_update; u64 last_h_load_update;
struct sched_entity *h_load_next; struct sched_entity *h_load_next;
#endif /* CONFIG_FAIR_GROUP_SCHED */ #endif /* CONFIG_FAIR_GROUP_SCHED */
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ struct rq *rq; /* CPU runqueue to which this cfs_rq is attached */
/* /*
* leaf cfs_rqs are those that hold tasks (lowest schedulable entity in * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
* a hierarchy). Non-leaf lrqs hold other higher schedulable entities * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
* (like users, containers etc.) * (like users, containers etc.)
* *
* leaf_cfs_rq_list ties together list of leaf cfs_rq's in a cpu. This * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a CPU.
* list is used during load balance. * This list is used during load balance.
*/ */
int on_list; int on_list;
struct list_head leaf_cfs_rq_list; struct list_head leaf_cfs_rq_list;
struct task_group *tg; /* group that "owns" this runqueue */ struct task_group *tg; /* group that "owns" this runqueue */
#ifdef CONFIG_CFS_BANDWIDTH #ifdef CONFIG_CFS_BANDWIDTH
int runtime_enabled; int runtime_enabled;
u64 runtime_expires; u64 runtime_expires;
s64 runtime_remaining; s64 runtime_remaining;
u64 throttled_clock, throttled_clock_task; u64 throttled_clock;
u64 throttled_clock_task_time; u64 throttled_clock_task;
int throttled, throttle_count; u64 throttled_clock_task_time;
struct list_head throttled_list; int throttled;
int throttle_count;
struct list_head throttled_list;
#endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_CFS_BANDWIDTH */
#endif /* CONFIG_FAIR_GROUP_SCHED */ #endif /* CONFIG_FAIR_GROUP_SCHED */
}; };
...@@ -538,45 +544,45 @@ static inline int rt_bandwidth_enabled(void) ...@@ -538,45 +544,45 @@ static inline int rt_bandwidth_enabled(void)
/* Real-Time classes' related field in a runqueue: */ /* Real-Time classes' related field in a runqueue: */
struct rt_rq { struct rt_rq {
struct rt_prio_array active; struct rt_prio_array active;
unsigned int rt_nr_running; unsigned int rt_nr_running;
unsigned int rr_nr_running; unsigned int rr_nr_running;
#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
struct { struct {
int curr; /* highest queued rt task prio */ int curr; /* highest queued rt task prio */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
int next; /* next highest */ int next; /* next highest */
#endif #endif
} highest_prio; } highest_prio;
#endif #endif
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
unsigned long rt_nr_migratory; unsigned long rt_nr_migratory;
unsigned long rt_nr_total; unsigned long rt_nr_total;
int overloaded; int overloaded;
struct plist_head pushable_tasks; struct plist_head pushable_tasks;
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
int rt_queued; int rt_queued;
int rt_throttled; int rt_throttled;
u64 rt_time; u64 rt_time;
u64 rt_runtime; u64 rt_runtime;
/* Nests inside the rq lock: */ /* Nests inside the rq lock: */
raw_spinlock_t rt_runtime_lock; raw_spinlock_t rt_runtime_lock;
#ifdef CONFIG_RT_GROUP_SCHED #ifdef CONFIG_RT_GROUP_SCHED
unsigned long rt_nr_boosted; unsigned long rt_nr_boosted;
struct rq *rq; struct rq *rq;
struct task_group *tg; struct task_group *tg;
#endif #endif
}; };
/* Deadline class' related fields in a runqueue */ /* Deadline class' related fields in a runqueue */
struct dl_rq { struct dl_rq {
/* runqueue is an rbtree, ordered by deadline */ /* runqueue is an rbtree, ordered by deadline */
struct rb_root_cached root; struct rb_root_cached root;
unsigned long dl_nr_running; unsigned long dl_nr_running;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* /*
...@@ -586,28 +592,28 @@ struct dl_rq { ...@@ -586,28 +592,28 @@ struct dl_rq {
* should migrate somewhere else. * should migrate somewhere else.
*/ */
struct { struct {
u64 curr; u64 curr;
u64 next; u64 next;
} earliest_dl; } earliest_dl;
unsigned long dl_nr_migratory; unsigned long dl_nr_migratory;
int overloaded; int overloaded;
/* /*
* Tasks on this rq that can be pushed away. They are kept in * Tasks on this rq that can be pushed away. They are kept in
* an rb-tree, ordered by tasks' deadlines, with caching * an rb-tree, ordered by tasks' deadlines, with caching
* of the leftmost (earliest deadline) element. * of the leftmost (earliest deadline) element.
*/ */
struct rb_root_cached pushable_dl_tasks_root; struct rb_root_cached pushable_dl_tasks_root;
#else #else
struct dl_bw dl_bw; struct dl_bw dl_bw;
#endif #endif
/* /*
* "Active utilization" for this runqueue: increased when a * "Active utilization" for this runqueue: increased when a
* task wakes up (becomes TASK_RUNNING) and decreased when a * task wakes up (becomes TASK_RUNNING) and decreased when a
* task blocks * task blocks
*/ */
u64 running_bw; u64 running_bw;
/* /*
* Utilization of the tasks "assigned" to this runqueue (including * Utilization of the tasks "assigned" to this runqueue (including
...@@ -618,14 +624,14 @@ struct dl_rq { ...@@ -618,14 +624,14 @@ struct dl_rq {
* This is needed to compute the "inactive utilization" for the * This is needed to compute the "inactive utilization" for the
* runqueue (inactive utilization = this_bw - running_bw). * runqueue (inactive utilization = this_bw - running_bw).
*/ */
u64 this_bw; u64 this_bw;
u64 extra_bw; u64 extra_bw;
/* /*
* Inverse of the fraction of CPU utilization that can be reclaimed * Inverse of the fraction of CPU utilization that can be reclaimed
* by the GRUB algorithm. * by the GRUB algorithm.
*/ */
u64 bw_ratio; u64 bw_ratio;
}; };
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -638,51 +644,51 @@ static inline bool sched_asym_prefer(int a, int b) ...@@ -638,51 +644,51 @@ static inline bool sched_asym_prefer(int a, int b)
/* /*
* We add the notion of a root-domain which will be used to define per-domain * We add the notion of a root-domain which will be used to define per-domain
* variables. Each exclusive cpuset essentially defines an island domain by * variables. Each exclusive cpuset essentially defines an island domain by
* fully partitioning the member cpus from any other cpuset. Whenever a new * fully partitioning the member CPUs from any other cpuset. Whenever a new
* exclusive cpuset is created, we also create and attach a new root-domain * exclusive cpuset is created, we also create and attach a new root-domain
* object. * object.
* *
*/ */
struct root_domain { struct root_domain {
atomic_t refcount; atomic_t refcount;
atomic_t rto_count; atomic_t rto_count;
struct rcu_head rcu; struct rcu_head rcu;
cpumask_var_t span; cpumask_var_t span;
cpumask_var_t online; cpumask_var_t online;
/* Indicate more than one runnable task for any CPU */ /* Indicate more than one runnable task for any CPU */
bool overload; bool overload;
/* /*
* The bit corresponding to a CPU gets set here if such CPU has more * The bit corresponding to a CPU gets set here if such CPU has more
* than one runnable -deadline task (as it is below for RT tasks). * than one runnable -deadline task (as it is below for RT tasks).
*/ */
cpumask_var_t dlo_mask; cpumask_var_t dlo_mask;
atomic_t dlo_count; atomic_t dlo_count;
struct dl_bw dl_bw; struct dl_bw dl_bw;
struct cpudl cpudl; struct cpudl cpudl;
#ifdef HAVE_RT_PUSH_IPI #ifdef HAVE_RT_PUSH_IPI
/* /*
* For IPI pull requests, loop across the rto_mask. * For IPI pull requests, loop across the rto_mask.
*/ */
struct irq_work rto_push_work; struct irq_work rto_push_work;
raw_spinlock_t rto_lock; raw_spinlock_t rto_lock;
/* These are only updated and read within rto_lock */ /* These are only updated and read within rto_lock */
int rto_loop; int rto_loop;
int rto_cpu; int rto_cpu;
/* These atomics are updated outside of a lock */ /* These atomics are updated outside of a lock */
atomic_t rto_loop_next; atomic_t rto_loop_next;
atomic_t rto_loop_start; atomic_t rto_loop_start;
#endif #endif
/* /*
* The "RT overload" flag: it gets set if a CPU has more than * The "RT overload" flag: it gets set if a CPU has more than
* one runnable RT task. * one runnable RT task.
*/ */
cpumask_var_t rto_mask; cpumask_var_t rto_mask;
struct cpupri cpupri; struct cpupri cpupri;
unsigned long max_cpu_capacity; unsigned long max_cpu_capacity;
}; };
extern struct root_domain def_root_domain; extern struct root_domain def_root_domain;
...@@ -708,39 +714,39 @@ extern void rto_push_irq_work_func(struct irq_work *work); ...@@ -708,39 +714,39 @@ extern void rto_push_irq_work_func(struct irq_work *work);
*/ */
struct rq { struct rq {
/* runqueue lock: */ /* runqueue lock: */
raw_spinlock_t lock; raw_spinlock_t lock;
/* /*
* nr_running and cpu_load should be in the same cacheline because * nr_running and cpu_load should be in the same cacheline because
* remote CPUs use both these fields when doing load calculation. * remote CPUs use both these fields when doing load calculation.
*/ */
unsigned int nr_running; unsigned int nr_running;
#ifdef CONFIG_NUMA_BALANCING #ifdef CONFIG_NUMA_BALANCING
unsigned int nr_numa_running; unsigned int nr_numa_running;
unsigned int nr_preferred_running; unsigned int nr_preferred_running;
#endif #endif
#define CPU_LOAD_IDX_MAX 5 #define CPU_LOAD_IDX_MAX 5
unsigned long cpu_load[CPU_LOAD_IDX_MAX]; unsigned long cpu_load[CPU_LOAD_IDX_MAX];
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
unsigned long last_load_update_tick; unsigned long last_load_update_tick;
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
unsigned long nohz_flags; unsigned long nohz_flags;
#endif /* CONFIG_NO_HZ_COMMON */ #endif /* CONFIG_NO_HZ_COMMON */
/* capture load from *all* tasks on this cpu: */ /* capture load from *all* tasks on this CPU: */
struct load_weight load; struct load_weight load;
unsigned long nr_load_updates; unsigned long nr_load_updates;
u64 nr_switches; u64 nr_switches;
struct cfs_rq cfs; struct cfs_rq cfs;
struct rt_rq rt; struct rt_rq rt;
struct dl_rq dl; struct dl_rq dl;
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this cpu: */ /* list of leaf cfs_rq on this CPU: */
struct list_head leaf_cfs_rq_list; struct list_head leaf_cfs_rq_list;
struct list_head *tmp_alone_branch; struct list_head *tmp_alone_branch;
#endif /* CONFIG_FAIR_GROUP_SCHED */ #endif /* CONFIG_FAIR_GROUP_SCHED */
/* /*
...@@ -749,94 +755,98 @@ struct rq { ...@@ -749,94 +755,98 @@ struct rq {
* one CPU and if it got migrated afterwards it may decrease * one CPU and if it got migrated afterwards it may decrease
* it on another CPU. Always updated under the runqueue lock: * it on another CPU. Always updated under the runqueue lock:
*/ */
unsigned long nr_uninterruptible; unsigned long nr_uninterruptible;
struct task_struct *curr, *idle, *stop; struct task_struct *curr;
unsigned long next_balance; struct task_struct *idle;
struct mm_struct *prev_mm; struct task_struct *stop;
unsigned long next_balance;
struct mm_struct *prev_mm;
unsigned int clock_update_flags; unsigned int clock_update_flags;
u64 clock; u64 clock;
u64 clock_task; u64 clock_task;
atomic_t nr_iowait; atomic_t nr_iowait;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
struct root_domain *rd; struct root_domain *rd;
struct sched_domain *sd; struct sched_domain *sd;
unsigned long cpu_capacity;
unsigned long cpu_capacity_orig;
unsigned long cpu_capacity; struct callback_head *balance_callback;
unsigned long cpu_capacity_orig;
struct callback_head *balance_callback; unsigned char idle_balance;
unsigned char idle_balance;
/* For active balancing */ /* For active balancing */
int active_balance; int active_balance;
int push_cpu; int push_cpu;
struct cpu_stop_work active_balance_work; struct cpu_stop_work active_balance_work;
/* cpu of this runqueue: */
int cpu; /* CPU of this runqueue: */
int online; int cpu;
int online;
struct list_head cfs_tasks; struct list_head cfs_tasks;
u64 rt_avg; u64 rt_avg;
u64 age_stamp; u64 age_stamp;
u64 idle_stamp; u64 idle_stamp;
u64 avg_idle; u64 avg_idle;
/* This is used to determine avg_idle's max value */ /* This is used to determine avg_idle's max value */
u64 max_idle_balance_cost; u64 max_idle_balance_cost;
#endif #endif
#ifdef CONFIG_IRQ_TIME_ACCOUNTING #ifdef CONFIG_IRQ_TIME_ACCOUNTING
u64 prev_irq_time; u64 prev_irq_time;
#endif #endif
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
u64 prev_steal_time; u64 prev_steal_time;
#endif #endif
#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
u64 prev_steal_time_rq; u64 prev_steal_time_rq;
#endif #endif
/* calc_load related fields */ /* calc_load related fields */
unsigned long calc_load_update; unsigned long calc_load_update;
long calc_load_active; long calc_load_active;
#ifdef CONFIG_SCHED_HRTICK #ifdef CONFIG_SCHED_HRTICK
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
int hrtick_csd_pending; int hrtick_csd_pending;
call_single_data_t hrtick_csd; call_single_data_t hrtick_csd;
#endif #endif
struct hrtimer hrtick_timer; struct hrtimer hrtick_timer;
#endif #endif
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHEDSTATS
/* latency stats */ /* latency stats */
struct sched_info rq_sched_info; struct sched_info rq_sched_info;
unsigned long long rq_cpu_time; unsigned long long rq_cpu_time;
/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */ /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
/* sys_sched_yield() stats */ /* sys_sched_yield() stats */
unsigned int yld_count; unsigned int yld_count;
/* schedule() stats */ /* schedule() stats */
unsigned int sched_count; unsigned int sched_count;
unsigned int sched_goidle; unsigned int sched_goidle;
/* try_to_wake_up() stats */ /* try_to_wake_up() stats */
unsigned int ttwu_count; unsigned int ttwu_count;
unsigned int ttwu_local; unsigned int ttwu_local;
#endif #endif
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
struct llist_head wake_list; struct llist_head wake_list;
#endif #endif
#ifdef CONFIG_CPU_IDLE #ifdef CONFIG_CPU_IDLE
/* Must be inspected within a rcu lock section */ /* Must be inspected within a rcu lock section */
struct cpuidle_state *idle_state; struct cpuidle_state *idle_state;
#endif #endif
}; };
...@@ -902,9 +912,9 @@ static inline u64 __rq_clock_broken(struct rq *rq) ...@@ -902,9 +912,9 @@ static inline u64 __rq_clock_broken(struct rq *rq)
* one position though, because the next rq_unpin_lock() will shift it * one position though, because the next rq_unpin_lock() will shift it
* back. * back.
*/ */
#define RQCF_REQ_SKIP 0x01 #define RQCF_REQ_SKIP 0x01
#define RQCF_ACT_SKIP 0x02 #define RQCF_ACT_SKIP 0x02
#define RQCF_UPDATED 0x04 #define RQCF_UPDATED 0x04
static inline void assert_clock_updated(struct rq *rq) static inline void assert_clock_updated(struct rq *rq)
{ {
...@@ -1057,12 +1067,12 @@ extern void sched_ttwu_pending(void); ...@@ -1057,12 +1067,12 @@ extern void sched_ttwu_pending(void);
/** /**
* highest_flag_domain - Return highest sched_domain containing flag. * highest_flag_domain - Return highest sched_domain containing flag.
* @cpu: The cpu whose highest level of sched domain is to * @cpu: The CPU whose highest level of sched domain is to
* be returned. * be returned.
* @flag: The flag to check for the highest sched_domain * @flag: The flag to check for the highest sched_domain
* for the given cpu. * for the given CPU.
* *
* Returns the highest sched_domain of a cpu which contains the given flag. * Returns the highest sched_domain of a CPU which contains the given flag.
*/ */
static inline struct sched_domain *highest_flag_domain(int cpu, int flag) static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
{ {
...@@ -1097,30 +1107,30 @@ DECLARE_PER_CPU(struct sched_domain *, sd_numa); ...@@ -1097,30 +1107,30 @@ DECLARE_PER_CPU(struct sched_domain *, sd_numa);
DECLARE_PER_CPU(struct sched_domain *, sd_asym); DECLARE_PER_CPU(struct sched_domain *, sd_asym);
struct sched_group_capacity { struct sched_group_capacity {
atomic_t ref; atomic_t ref;
/* /*
* CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
* for a single CPU. * for a single CPU.
*/ */
unsigned long capacity; unsigned long capacity;
unsigned long min_capacity; /* Min per-CPU capacity in group */ unsigned long min_capacity; /* Min per-CPU capacity in group */
unsigned long next_update; unsigned long next_update;
int imbalance; /* XXX unrelated to capacity but shared group state */ int imbalance; /* XXX unrelated to capacity but shared group state */
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
int id; int id;
#endif #endif
unsigned long cpumask[0]; /* balance mask */ unsigned long cpumask[0]; /* Balance mask */
}; };
struct sched_group { struct sched_group {
struct sched_group *next; /* Must be a circular list */ struct sched_group *next; /* Must be a circular list */
atomic_t ref; atomic_t ref;
unsigned int group_weight; unsigned int group_weight;
struct sched_group_capacity *sgc; struct sched_group_capacity *sgc;
int asym_prefer_cpu; /* cpu of highest priority in group */ int asym_prefer_cpu; /* CPU of highest priority in group */
/* /*
* The CPUs this group covers. * The CPUs this group covers.
...@@ -1129,7 +1139,7 @@ struct sched_group { ...@@ -1129,7 +1139,7 @@ struct sched_group {
* by attaching extra space to the end of the structure, * by attaching extra space to the end of the structure,
* depending on how many CPUs the kernel has booted up with) * depending on how many CPUs the kernel has booted up with)
*/ */
unsigned long cpumask[0]; unsigned long cpumask[0];
}; };
static inline struct cpumask *sched_group_span(struct sched_group *sg) static inline struct cpumask *sched_group_span(struct sched_group *sg)
...@@ -1146,8 +1156,8 @@ static inline struct cpumask *group_balance_mask(struct sched_group *sg) ...@@ -1146,8 +1156,8 @@ static inline struct cpumask *group_balance_mask(struct sched_group *sg)
} }
/** /**
* group_first_cpu - Returns the first cpu in the cpumask of a sched_group. * group_first_cpu - Returns the first CPU in the cpumask of a sched_group.
* @group: The group whose first cpu is to be returned. * @group: The group whose first CPU is to be returned.
*/ */
static inline unsigned int group_first_cpu(struct sched_group *group) static inline unsigned int group_first_cpu(struct sched_group *group)
{ {
...@@ -1357,9 +1367,9 @@ static inline int task_on_rq_migrating(struct task_struct *p) ...@@ -1357,9 +1367,9 @@ static inline int task_on_rq_migrating(struct task_struct *p)
/* /*
* wake flags * wake flags
*/ */
#define WF_SYNC 0x01 /* waker goes to sleep after wakeup */ #define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */
#define WF_FORK 0x02 /* child wakeup after fork */ #define WF_FORK 0x02 /* Child wakeup after fork */
#define WF_MIGRATED 0x4 /* internal use, task got migrated */ #define WF_MIGRATED 0x4 /* Internal use, task got migrated */
/* /*
* To aid in avoiding the subversion of "niceness" due to uneven distribution * To aid in avoiding the subversion of "niceness" due to uneven distribution
...@@ -1370,11 +1380,11 @@ static inline int task_on_rq_migrating(struct task_struct *p) ...@@ -1370,11 +1380,11 @@ static inline int task_on_rq_migrating(struct task_struct *p)
* slice expiry etc. * slice expiry etc.
*/ */
#define WEIGHT_IDLEPRIO 3 #define WEIGHT_IDLEPRIO 3
#define WMULT_IDLEPRIO 1431655765 #define WMULT_IDLEPRIO 1431655765
extern const int sched_prio_to_weight[40]; extern const int sched_prio_to_weight[40];
extern const u32 sched_prio_to_wmult[40]; extern const u32 sched_prio_to_wmult[40];
/* /*
* {de,en}queue flags: * {de,en}queue flags:
...@@ -1396,9 +1406,9 @@ extern const u32 sched_prio_to_wmult[40]; ...@@ -1396,9 +1406,9 @@ extern const u32 sched_prio_to_wmult[40];
*/ */
#define DEQUEUE_SLEEP 0x01 #define DEQUEUE_SLEEP 0x01
#define DEQUEUE_SAVE 0x02 /* matches ENQUEUE_RESTORE */ #define DEQUEUE_SAVE 0x02 /* Matches ENQUEUE_RESTORE */
#define DEQUEUE_MOVE 0x04 /* matches ENQUEUE_MOVE */ #define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */
#define DEQUEUE_NOCLOCK 0x08 /* matches ENQUEUE_NOCLOCK */ #define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */
#define ENQUEUE_WAKEUP 0x01 #define ENQUEUE_WAKEUP 0x01
#define ENQUEUE_RESTORE 0x02 #define ENQUEUE_RESTORE 0x02
...@@ -1420,10 +1430,10 @@ struct sched_class { ...@@ -1420,10 +1430,10 @@ struct sched_class {
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*yield_task) (struct rq *rq); void (*yield_task) (struct rq *rq);
bool (*yield_to_task) (struct rq *rq, struct task_struct *p, bool preempt); bool (*yield_to_task)(struct rq *rq, struct task_struct *p, bool preempt);
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
/* /*
* It is the responsibility of the pick_next_task() method that will * It is the responsibility of the pick_next_task() method that will
...@@ -1433,16 +1443,16 @@ struct sched_class { ...@@ -1433,16 +1443,16 @@ struct sched_class {
* May return RETRY_TASK when it finds a higher prio class has runnable * May return RETRY_TASK when it finds a higher prio class has runnable
* tasks. * tasks.
*/ */
struct task_struct * (*pick_next_task) (struct rq *rq, struct task_struct * (*pick_next_task)(struct rq *rq,
struct task_struct *prev, struct task_struct *prev,
struct rq_flags *rf); struct rq_flags *rf);
void (*put_prev_task) (struct rq *rq, struct task_struct *p); void (*put_prev_task)(struct rq *rq, struct task_struct *p);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags); int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
void (*migrate_task_rq)(struct task_struct *p); void (*migrate_task_rq)(struct task_struct *p);
void (*task_woken) (struct rq *this_rq, struct task_struct *task); void (*task_woken)(struct rq *this_rq, struct task_struct *task);
void (*set_cpus_allowed)(struct task_struct *p, void (*set_cpus_allowed)(struct task_struct *p,
const struct cpumask *newmask); const struct cpumask *newmask);
...@@ -1451,31 +1461,31 @@ struct sched_class { ...@@ -1451,31 +1461,31 @@ struct sched_class {
void (*rq_offline)(struct rq *rq); void (*rq_offline)(struct rq *rq);
#endif #endif
void (*set_curr_task) (struct rq *rq); void (*set_curr_task)(struct rq *rq);
void (*task_tick) (struct rq *rq, struct task_struct *p, int queued); void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
void (*task_fork) (struct task_struct *p); void (*task_fork)(struct task_struct *p);
void (*task_dead) (struct task_struct *p); void (*task_dead)(struct task_struct *p);
/* /*
* The switched_from() call is allowed to drop rq->lock, therefore we * The switched_from() call is allowed to drop rq->lock, therefore we
* cannot assume the switched_from/switched_to pair is serliazed by * cannot assume the switched_from/switched_to pair is serliazed by
* rq->lock. They are however serialized by p->pi_lock. * rq->lock. They are however serialized by p->pi_lock.
*/ */
void (*switched_from) (struct rq *this_rq, struct task_struct *task); void (*switched_from)(struct rq *this_rq, struct task_struct *task);
void (*switched_to) (struct rq *this_rq, struct task_struct *task); void (*switched_to) (struct rq *this_rq, struct task_struct *task);
void (*prio_changed) (struct rq *this_rq, struct task_struct *task, void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
int oldprio); int oldprio);
unsigned int (*get_rr_interval) (struct rq *rq, unsigned int (*get_rr_interval)(struct rq *rq,
struct task_struct *task); struct task_struct *task);
void (*update_curr) (struct rq *rq); void (*update_curr)(struct rq *rq);
#define TASK_SET_GROUP 0 #define TASK_SET_GROUP 0
#define TASK_MOVE_GROUP 1 #define TASK_MOVE_GROUP 1
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
void (*task_change_group) (struct task_struct *p, int type); void (*task_change_group)(struct task_struct *p, int type);
#endif #endif
}; };
...@@ -1524,6 +1534,7 @@ static inline void idle_set_state(struct rq *rq, ...@@ -1524,6 +1534,7 @@ static inline void idle_set_state(struct rq *rq,
static inline struct cpuidle_state *idle_get_state(struct rq *rq) static inline struct cpuidle_state *idle_get_state(struct rq *rq)
{ {
SCHED_WARN_ON(!rcu_read_lock_held()); SCHED_WARN_ON(!rcu_read_lock_held());
return rq->idle_state; return rq->idle_state;
} }
#else #else
...@@ -1562,9 +1573,9 @@ extern void init_dl_task_timer(struct sched_dl_entity *dl_se); ...@@ -1562,9 +1573,9 @@ extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se); extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq); extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
#define BW_SHIFT 20 #define BW_SHIFT 20
#define BW_UNIT (1 << BW_SHIFT) #define BW_UNIT (1 << BW_SHIFT)
#define RATIO_SHIFT 8 #define RATIO_SHIFT 8
unsigned long to_ratio(u64 period, u64 runtime); unsigned long to_ratio(u64 period, u64 runtime);
extern void init_entity_runnable_average(struct sched_entity *se); extern void init_entity_runnable_average(struct sched_entity *se);
...@@ -1814,8 +1825,8 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) ...@@ -1814,8 +1825,8 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
/* /*
* Unfair double_lock_balance: Optimizes throughput at the expense of * Unfair double_lock_balance: Optimizes throughput at the expense of
* latency by eliminating extra atomic operations when the locks are * latency by eliminating extra atomic operations when the locks are
* already in proper order on entry. This favors lower cpu-ids and will * already in proper order on entry. This favors lower CPU-ids and will
* grant the double lock to lower cpus over higher ids under contention, * grant the double lock to lower CPUs over higher ids under contention,
* regardless of entry order into the function. * regardless of entry order into the function.
*/ */
static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
...@@ -1847,7 +1858,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) ...@@ -1847,7 +1858,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest) static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
{ {
if (unlikely(!irqs_disabled())) { if (unlikely(!irqs_disabled())) {
/* printk() doesn't work good under rq->lock */ /* printk() doesn't work well under rq->lock */
raw_spin_unlock(&this_rq->lock); raw_spin_unlock(&this_rq->lock);
BUG_ON(1); BUG_ON(1);
} }
...@@ -2106,15 +2117,14 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {} ...@@ -2106,15 +2117,14 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
#endif /* CONFIG_CPU_FREQ */ #endif /* CONFIG_CPU_FREQ */
#ifdef arch_scale_freq_capacity #ifdef arch_scale_freq_capacity
#ifndef arch_scale_freq_invariant # ifndef arch_scale_freq_invariant
#define arch_scale_freq_invariant() (true) # define arch_scale_freq_invariant() true
#endif # endif
#else /* arch_scale_freq_capacity */ #else
#define arch_scale_freq_invariant() (false) # define arch_scale_freq_invariant() false
#endif #endif
#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL #ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
static inline unsigned long cpu_util_dl(struct rq *rq) static inline unsigned long cpu_util_dl(struct rq *rq)
{ {
return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT; return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
...@@ -2124,5 +2134,4 @@ static inline unsigned long cpu_util_cfs(struct rq *rq) ...@@ -2124,5 +2134,4 @@ static inline unsigned long cpu_util_cfs(struct rq *rq)
{ {
return rq->cfs.avg.util_avg; return rq->cfs.avg.util_avg;
} }
#endif #endif
...@@ -78,8 +78,8 @@ static int show_schedstat(struct seq_file *seq, void *v) ...@@ -78,8 +78,8 @@ static int show_schedstat(struct seq_file *seq, void *v)
* This itererator needs some explanation. * This itererator needs some explanation.
* It returns 1 for the header position. * It returns 1 for the header position.
* This means 2 is cpu 0. * This means 2 is cpu 0.
* In a hotplugged system some cpus, including cpu 0, may be missing so we have * In a hotplugged system some CPUs, including cpu 0, may be missing so we have
* to use cpumask_* to iterate over the cpus. * to use cpumask_* to iterate over the CPUs.
*/ */
static void *schedstat_start(struct seq_file *file, loff_t *offset) static void *schedstat_start(struct seq_file *file, loff_t *offset)
{ {
...@@ -99,12 +99,14 @@ static void *schedstat_start(struct seq_file *file, loff_t *offset) ...@@ -99,12 +99,14 @@ static void *schedstat_start(struct seq_file *file, loff_t *offset)
if (n < nr_cpu_ids) if (n < nr_cpu_ids)
return (void *)(unsigned long)(n + 2); return (void *)(unsigned long)(n + 2);
return NULL; return NULL;
} }
static void *schedstat_next(struct seq_file *file, void *data, loff_t *offset) static void *schedstat_next(struct seq_file *file, void *data, loff_t *offset)
{ {
(*offset)++; (*offset)++;
return schedstat_start(file, offset); return schedstat_start(file, offset);
} }
...@@ -134,6 +136,7 @@ static const struct file_operations proc_schedstat_operations = { ...@@ -134,6 +136,7 @@ static const struct file_operations proc_schedstat_operations = {
static int __init proc_schedstat_init(void) static int __init proc_schedstat_init(void)
{ {
proc_create("schedstat", 0, NULL, &proc_schedstat_operations); proc_create("schedstat", 0, NULL, &proc_schedstat_operations);
return 0; return 0;
} }
subsys_initcall(proc_schedstat_init); subsys_initcall(proc_schedstat_init);
...@@ -30,35 +30,29 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) ...@@ -30,35 +30,29 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta)
if (rq) if (rq)
rq->rq_sched_info.run_delay += delta; rq->rq_sched_info.run_delay += delta;
} }
#define schedstat_enabled() static_branch_unlikely(&sched_schedstats) #define schedstat_enabled() static_branch_unlikely(&sched_schedstats)
#define __schedstat_inc(var) do { var++; } while (0) #define __schedstat_inc(var) do { var++; } while (0)
#define schedstat_inc(var) do { if (schedstat_enabled()) { var++; } } while (0) #define schedstat_inc(var) do { if (schedstat_enabled()) { var++; } } while (0)
#define __schedstat_add(var, amt) do { var += (amt); } while (0) #define __schedstat_add(var, amt) do { var += (amt); } while (0)
#define schedstat_add(var, amt) do { if (schedstat_enabled()) { var += (amt); } } while (0) #define schedstat_add(var, amt) do { if (schedstat_enabled()) { var += (amt); } } while (0)
#define __schedstat_set(var, val) do { var = (val); } while (0) #define __schedstat_set(var, val) do { var = (val); } while (0)
#define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0) #define schedstat_set(var, val) do { if (schedstat_enabled()) { var = (val); } } while (0)
#define schedstat_val(var) (var) #define schedstat_val(var) (var)
#define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0) #define schedstat_val_or_zero(var) ((schedstat_enabled()) ? (var) : 0)
#else /* !CONFIG_SCHEDSTATS */ #else /* !CONFIG_SCHEDSTATS: */
static inline void static inline void rq_sched_info_arrive (struct rq *rq, unsigned long long delta) { }
rq_sched_info_arrive(struct rq *rq, unsigned long long delta) static inline void rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) { }
{} static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delta) { }
static inline void # define schedstat_enabled() 0
rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) # define __schedstat_inc(var) do { } while (0)
{} # define schedstat_inc(var) do { } while (0)
static inline void # define __schedstat_add(var, amt) do { } while (0)
rq_sched_info_depart(struct rq *rq, unsigned long long delta) # define schedstat_add(var, amt) do { } while (0)
{} # define __schedstat_set(var, val) do { } while (0)
#define schedstat_enabled() 0 # define schedstat_set(var, val) do { } while (0)
#define __schedstat_inc(var) do { } while (0) # define schedstat_val(var) 0
#define schedstat_inc(var) do { } while (0) # define schedstat_val_or_zero(var) 0
#define __schedstat_add(var, amt) do { } while (0)
#define schedstat_add(var, amt) do { } while (0)
#define __schedstat_set(var, val) do { } while (0)
#define schedstat_set(var, val) do { } while (0)
#define schedstat_val(var) 0
#define schedstat_val_or_zero(var) 0
#endif /* CONFIG_SCHEDSTATS */ #endif /* CONFIG_SCHEDSTATS */
#ifdef CONFIG_SCHED_INFO #ifdef CONFIG_SCHED_INFO
...@@ -69,9 +63,9 @@ static inline void sched_info_reset_dequeued(struct task_struct *t) ...@@ -69,9 +63,9 @@ static inline void sched_info_reset_dequeued(struct task_struct *t)
/* /*
* We are interested in knowing how long it was from the *first* time a * We are interested in knowing how long it was from the *first* time a
* task was queued to the time that it finally hit a cpu, we call this routine * task was queued to the time that it finally hit a CPU, we call this routine
* from dequeue_task() to account for possible rq->clock skew across cpus. The * from dequeue_task() to account for possible rq->clock skew across CPUs. The
* delta taken on each cpu would annul the skew. * delta taken on each CPU would annul the skew.
*/ */
static inline void sched_info_dequeued(struct rq *rq, struct task_struct *t) static inline void sched_info_dequeued(struct rq *rq, struct task_struct *t)
{ {
...@@ -87,7 +81,7 @@ static inline void sched_info_dequeued(struct rq *rq, struct task_struct *t) ...@@ -87,7 +81,7 @@ static inline void sched_info_dequeued(struct rq *rq, struct task_struct *t)
} }
/* /*
* Called when a task finally hits the cpu. We can now calculate how * Called when a task finally hits the CPU. We can now calculate how
* long it was waiting to run. We also note when it began so that we * long it was waiting to run. We also note when it began so that we
* can keep stats on how long its timeslice is. * can keep stats on how long its timeslice is.
*/ */
...@@ -112,9 +106,10 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t) ...@@ -112,9 +106,10 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t)
*/ */
static inline void sched_info_queued(struct rq *rq, struct task_struct *t) static inline void sched_info_queued(struct rq *rq, struct task_struct *t)
{ {
if (unlikely(sched_info_on())) if (unlikely(sched_info_on())) {
if (!t->sched_info.last_queued) if (!t->sched_info.last_queued)
t->sched_info.last_queued = rq_clock(rq); t->sched_info.last_queued = rq_clock(rq);
}
} }
/* /*
...@@ -127,8 +122,7 @@ static inline void sched_info_queued(struct rq *rq, struct task_struct *t) ...@@ -127,8 +122,7 @@ static inline void sched_info_queued(struct rq *rq, struct task_struct *t)
*/ */
static inline void sched_info_depart(struct rq *rq, struct task_struct *t) static inline void sched_info_depart(struct rq *rq, struct task_struct *t)
{ {
unsigned long long delta = rq_clock(rq) - unsigned long long delta = rq_clock(rq) - t->sched_info.last_arrival;
t->sched_info.last_arrival;
rq_sched_info_depart(rq, delta); rq_sched_info_depart(rq, delta);
...@@ -142,11 +136,10 @@ static inline void sched_info_depart(struct rq *rq, struct task_struct *t) ...@@ -142,11 +136,10 @@ static inline void sched_info_depart(struct rq *rq, struct task_struct *t)
* the idle task.) We are only called when prev != next. * the idle task.) We are only called when prev != next.
*/ */
static inline void static inline void
__sched_info_switch(struct rq *rq, __sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next)
struct task_struct *prev, struct task_struct *next)
{ {
/* /*
* prev now departs the cpu. It's not interesting to record * prev now departs the CPU. It's not interesting to record
* stats about how efficient we were at scheduling the idle * stats about how efficient we were at scheduling the idle
* process, however. * process, however.
*/ */
...@@ -156,18 +149,19 @@ __sched_info_switch(struct rq *rq, ...@@ -156,18 +149,19 @@ __sched_info_switch(struct rq *rq,
if (next != rq->idle) if (next != rq->idle)
sched_info_arrive(rq, next); sched_info_arrive(rq, next);
} }
static inline void static inline void
sched_info_switch(struct rq *rq, sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next)
struct task_struct *prev, struct task_struct *next)
{ {
if (unlikely(sched_info_on())) if (unlikely(sched_info_on()))
__sched_info_switch(rq, prev, next); __sched_info_switch(rq, prev, next);
} }
#else
#define sched_info_queued(rq, t) do { } while (0) #else /* !CONFIG_SCHED_INFO: */
#define sched_info_reset_dequeued(t) do { } while (0) # define sched_info_queued(rq, t) do { } while (0)
#define sched_info_dequeued(rq, t) do { } while (0) # define sched_info_reset_dequeued(t) do { } while (0)
#define sched_info_depart(rq, t) do { } while (0) # define sched_info_dequeued(rq, t) do { } while (0)
#define sched_info_arrive(rq, next) do { } while (0) # define sched_info_depart(rq, t) do { } while (0)
#define sched_info_switch(rq, t, next) do { } while (0) # define sched_info_arrive(rq, next) do { } while (0)
# define sched_info_switch(rq, t, next) do { } while (0)
#endif /* CONFIG_SCHED_INFO */ #endif /* CONFIG_SCHED_INFO */
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include "sched.h"
/* /*
* stop-task scheduling class. * stop-task scheduling class.
* *
...@@ -9,6 +7,7 @@ ...@@ -9,6 +7,7 @@
* *
* See kernel/stop_machine.c * See kernel/stop_machine.c
*/ */
#include "sched.h"
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static int static int
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
/*
* <linux/swait.h> (simple wait queues ) implementation:
*/
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/swait.h> #include <linux/swait.h>
......
...@@ -41,8 +41,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, ...@@ -41,8 +41,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
if (!(sd->flags & SD_LOAD_BALANCE)) { if (!(sd->flags & SD_LOAD_BALANCE)) {
printk("does not load-balance\n"); printk("does not load-balance\n");
if (sd->parent) if (sd->parent)
printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain" printk(KERN_ERR "ERROR: !SD_LOAD_BALANCE domain has parent");
" has parent");
return -1; return -1;
} }
...@@ -50,12 +49,10 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, ...@@ -50,12 +49,10 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
cpumask_pr_args(sched_domain_span(sd)), sd->name); cpumask_pr_args(sched_domain_span(sd)), sd->name);
if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) { if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
printk(KERN_ERR "ERROR: domain->span does not contain " printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu);
"CPU%d\n", cpu);
} }
if (!cpumask_test_cpu(cpu, sched_group_span(group))) { if (!cpumask_test_cpu(cpu, sched_group_span(group))) {
printk(KERN_ERR "ERROR: domain->groups does not contain" printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu);
" CPU%d\n", cpu);
} }
printk(KERN_DEBUG "%*s groups:", level + 1, ""); printk(KERN_DEBUG "%*s groups:", level + 1, "");
...@@ -115,8 +112,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, ...@@ -115,8 +112,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
if (sd->parent && if (sd->parent &&
!cpumask_subset(groupmask, sched_domain_span(sd->parent))) !cpumask_subset(groupmask, sched_domain_span(sd->parent)))
printk(KERN_ERR "ERROR: parent span is not a superset " printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n");
"of domain->span\n");
return 0; return 0;
} }
...@@ -595,7 +591,7 @@ int group_balance_cpu(struct sched_group *sg) ...@@ -595,7 +591,7 @@ int group_balance_cpu(struct sched_group *sg)
* are not. * are not.
* *
* This leads to a few particularly weird cases where the sched_domain's are * This leads to a few particularly weird cases where the sched_domain's are
* not of the same number for each cpu. Consider: * not of the same number for each CPU. Consider:
* *
* NUMA-2 0-3 0-3 * NUMA-2 0-3 0-3
* groups: {0-2},{1-3} {1-3},{0-2} * groups: {0-2},{1-3} {1-3},{0-2}
...@@ -780,7 +776,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) ...@@ -780,7 +776,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
* ^ ^ ^ ^ * ^ ^ ^ ^
* `-' `-' * `-' `-'
* *
* The sched_domains are per-cpu and have a two way link (parent & child) and * The sched_domains are per-CPU and have a two way link (parent & child) and
* denote the ever growing mask of CPUs belonging to that level of topology. * denote the ever growing mask of CPUs belonging to that level of topology.
* *
* Each sched_domain has a circular (double) linked list of sched_group's, each * Each sched_domain has a circular (double) linked list of sched_group's, each
...@@ -1021,6 +1017,7 @@ __visit_domain_allocation_hell(struct s_data *d, const struct cpumask *cpu_map) ...@@ -1021,6 +1017,7 @@ __visit_domain_allocation_hell(struct s_data *d, const struct cpumask *cpu_map)
d->rd = alloc_rootdomain(); d->rd = alloc_rootdomain();
if (!d->rd) if (!d->rd)
return sa_sd; return sa_sd;
return sa_rootdomain; return sa_rootdomain;
} }
...@@ -1047,12 +1044,14 @@ static void claim_allocations(int cpu, struct sched_domain *sd) ...@@ -1047,12 +1044,14 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
} }
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
static int sched_domains_numa_levels;
enum numa_topology_type sched_numa_topology_type; enum numa_topology_type sched_numa_topology_type;
static int *sched_domains_numa_distance;
int sched_max_numa_distance; static int sched_domains_numa_levels;
static struct cpumask ***sched_domains_numa_masks; static int sched_domains_curr_level;
static int sched_domains_curr_level;
int sched_max_numa_distance;
static int *sched_domains_numa_distance;
static struct cpumask ***sched_domains_numa_masks;
#endif #endif
/* /*
...@@ -1074,11 +1073,11 @@ static int sched_domains_curr_level; ...@@ -1074,11 +1073,11 @@ static int sched_domains_curr_level;
* SD_ASYM_PACKING - describes SMT quirks * SD_ASYM_PACKING - describes SMT quirks
*/ */
#define TOPOLOGY_SD_FLAGS \ #define TOPOLOGY_SD_FLAGS \
(SD_SHARE_CPUCAPACITY | \ (SD_SHARE_CPUCAPACITY | \
SD_SHARE_PKG_RESOURCES | \ SD_SHARE_PKG_RESOURCES | \
SD_NUMA | \ SD_NUMA | \
SD_ASYM_PACKING | \ SD_ASYM_PACKING | \
SD_ASYM_CPUCAPACITY | \ SD_ASYM_CPUCAPACITY | \
SD_SHARE_POWERDOMAIN) SD_SHARE_POWERDOMAIN)
static struct sched_domain * static struct sched_domain *
...@@ -1628,7 +1627,7 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve ...@@ -1628,7 +1627,7 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve
pr_err(" the %s domain not a subset of the %s domain\n", pr_err(" the %s domain not a subset of the %s domain\n",
child->name, sd->name); child->name, sd->name);
#endif #endif
/* Fixup, ensure @sd has at least @child cpus. */ /* Fixup, ensure @sd has at least @child CPUs. */
cpumask_or(sched_domain_span(sd), cpumask_or(sched_domain_span(sd),
sched_domain_span(sd), sched_domain_span(sd),
sched_domain_span(child)); sched_domain_span(child));
...@@ -1720,6 +1719,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att ...@@ -1720,6 +1719,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
ret = 0; ret = 0;
error: error:
__free_domain_allocs(&d, alloc_state, cpu_map); __free_domain_allocs(&d, alloc_state, cpu_map);
return ret; return ret;
} }
...@@ -1824,6 +1824,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, ...@@ -1824,6 +1824,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
return 1; return 1;
tmp = SD_ATTR_INIT; tmp = SD_ATTR_INIT;
return !memcmp(cur ? (cur + idx_cur) : &tmp, return !memcmp(cur ? (cur + idx_cur) : &tmp,
new ? (new + idx_new) : &tmp, new ? (new + idx_new) : &tmp,
sizeof(struct sched_domain_attr)); sizeof(struct sched_domain_attr));
...@@ -1929,4 +1930,3 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], ...@@ -1929,4 +1930,3 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
mutex_unlock(&sched_domains_mutex); mutex_unlock(&sched_domains_mutex);
} }
...@@ -107,6 +107,7 @@ static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode, ...@@ -107,6 +107,7 @@ static int __wake_up_common(struct wait_queue_head *wq_head, unsigned int mode,
break; break;
} }
} }
return nr_exclusive; return nr_exclusive;
} }
...@@ -317,6 +318,7 @@ int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait) ...@@ -317,6 +318,7 @@ int do_wait_intr(wait_queue_head_t *wq, wait_queue_entry_t *wait)
spin_unlock(&wq->lock); spin_unlock(&wq->lock);
schedule(); schedule();
spin_lock(&wq->lock); spin_lock(&wq->lock);
return 0; return 0;
} }
EXPORT_SYMBOL(do_wait_intr); EXPORT_SYMBOL(do_wait_intr);
...@@ -333,6 +335,7 @@ int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait) ...@@ -333,6 +335,7 @@ int do_wait_intr_irq(wait_queue_head_t *wq, wait_queue_entry_t *wait)
spin_unlock_irq(&wq->lock); spin_unlock_irq(&wq->lock);
schedule(); schedule();
spin_lock_irq(&wq->lock); spin_lock_irq(&wq->lock);
return 0; return 0;
} }
EXPORT_SYMBOL(do_wait_intr_irq); EXPORT_SYMBOL(do_wait_intr_irq);
...@@ -378,6 +381,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i ...@@ -378,6 +381,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i
if (ret) if (ret)
list_del_init(&wq_entry->entry); list_del_init(&wq_entry->entry);
return ret; return ret;
} }
EXPORT_SYMBOL(autoremove_wake_function); EXPORT_SYMBOL(autoremove_wake_function);
......
...@@ -29,8 +29,8 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync ...@@ -29,8 +29,8 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync
wait_bit->key.bit_nr != key->bit_nr || wait_bit->key.bit_nr != key->bit_nr ||
test_bit(key->bit_nr, key->flags)) test_bit(key->bit_nr, key->flags))
return 0; return 0;
else
return autoremove_wake_function(wq_entry, mode, sync, key); return autoremove_wake_function(wq_entry, mode, sync, key);
} }
EXPORT_SYMBOL(wake_bit_function); EXPORT_SYMBOL(wake_bit_function);
...@@ -50,7 +50,9 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_ ...@@ -50,7 +50,9 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_
if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags))
ret = (*action)(&wbq_entry->key, mode); ret = (*action)(&wbq_entry->key, mode);
} while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret);
finish_wait(wq_head, &wbq_entry->wq_entry); finish_wait(wq_head, &wbq_entry->wq_entry);
return ret; return ret;
} }
EXPORT_SYMBOL(__wait_on_bit); EXPORT_SYMBOL(__wait_on_bit);
...@@ -73,6 +75,7 @@ int __sched out_of_line_wait_on_bit_timeout( ...@@ -73,6 +75,7 @@ int __sched out_of_line_wait_on_bit_timeout(
DEFINE_WAIT_BIT(wq_entry, word, bit); DEFINE_WAIT_BIT(wq_entry, word, bit);
wq_entry.key.timeout = jiffies + timeout; wq_entry.key.timeout = jiffies + timeout;
return __wait_on_bit(wq_head, &wq_entry, action, mode); return __wait_on_bit(wq_head, &wq_entry, action, mode);
} }
EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout); EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
...@@ -120,6 +123,7 @@ EXPORT_SYMBOL(out_of_line_wait_on_bit_lock); ...@@ -120,6 +123,7 @@ EXPORT_SYMBOL(out_of_line_wait_on_bit_lock);
void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit) void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit)
{ {
struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit); struct wait_bit_key key = __WAIT_BIT_KEY_INITIALIZER(word, bit);
if (waitqueue_active(wq_head)) if (waitqueue_active(wq_head))
__wake_up(wq_head, TASK_NORMAL, 1, &key); __wake_up(wq_head, TASK_NORMAL, 1, &key);
} }
...@@ -157,6 +161,7 @@ static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p) ...@@ -157,6 +161,7 @@ static inline wait_queue_head_t *atomic_t_waitqueue(atomic_t *p)
{ {
if (BITS_PER_LONG == 64) { if (BITS_PER_LONG == 64) {
unsigned long q = (unsigned long)p; unsigned long q = (unsigned long)p;
return bit_waitqueue((void *)(q & ~1), q & 1); return bit_waitqueue((void *)(q & ~1), q & 1);
} }
return bit_waitqueue(p, 0); return bit_waitqueue(p, 0);
...@@ -173,6 +178,7 @@ static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mo ...@@ -173,6 +178,7 @@ static int wake_atomic_t_function(struct wait_queue_entry *wq_entry, unsigned mo
wait_bit->key.bit_nr != key->bit_nr || wait_bit->key.bit_nr != key->bit_nr ||
atomic_read(val) != 0) atomic_read(val) != 0)
return 0; return 0;
return autoremove_wake_function(wq_entry, mode, sync, key); return autoremove_wake_function(wq_entry, mode, sync, key);
} }
...@@ -196,6 +202,7 @@ int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_en ...@@ -196,6 +202,7 @@ int __wait_on_atomic_t(struct wait_queue_head *wq_head, struct wait_bit_queue_en
ret = (*action)(val, mode); ret = (*action)(val, mode);
} while (!ret && atomic_read(val) != 0); } while (!ret && atomic_read(val) != 0);
finish_wait(wq_head, &wbq_entry->wq_entry); finish_wait(wq_head, &wbq_entry->wq_entry);
return ret; return ret;
} }
...@@ -226,6 +233,7 @@ __sched int atomic_t_wait(atomic_t *counter, unsigned int mode) ...@@ -226,6 +233,7 @@ __sched int atomic_t_wait(atomic_t *counter, unsigned int mode)
schedule(); schedule();
if (signal_pending_state(mode, current)) if (signal_pending_state(mode, current))
return -EINTR; return -EINTR;
return 0; return 0;
} }
EXPORT_SYMBOL(atomic_t_wait); EXPORT_SYMBOL(atomic_t_wait);
...@@ -250,6 +258,7 @@ __sched int bit_wait(struct wait_bit_key *word, int mode) ...@@ -250,6 +258,7 @@ __sched int bit_wait(struct wait_bit_key *word, int mode)
schedule(); schedule();
if (signal_pending_state(mode, current)) if (signal_pending_state(mode, current))
return -EINTR; return -EINTR;
return 0; return 0;
} }
EXPORT_SYMBOL(bit_wait); EXPORT_SYMBOL(bit_wait);
...@@ -259,6 +268,7 @@ __sched int bit_wait_io(struct wait_bit_key *word, int mode) ...@@ -259,6 +268,7 @@ __sched int bit_wait_io(struct wait_bit_key *word, int mode)
io_schedule(); io_schedule();
if (signal_pending_state(mode, current)) if (signal_pending_state(mode, current))
return -EINTR; return -EINTR;
return 0; return 0;
} }
EXPORT_SYMBOL(bit_wait_io); EXPORT_SYMBOL(bit_wait_io);
...@@ -266,11 +276,13 @@ EXPORT_SYMBOL(bit_wait_io); ...@@ -266,11 +276,13 @@ EXPORT_SYMBOL(bit_wait_io);
__sched int bit_wait_timeout(struct wait_bit_key *word, int mode) __sched int bit_wait_timeout(struct wait_bit_key *word, int mode)
{ {
unsigned long now = READ_ONCE(jiffies); unsigned long now = READ_ONCE(jiffies);
if (time_after_eq(now, word->timeout)) if (time_after_eq(now, word->timeout))
return -EAGAIN; return -EAGAIN;
schedule_timeout(word->timeout - now); schedule_timeout(word->timeout - now);
if (signal_pending_state(mode, current)) if (signal_pending_state(mode, current))
return -EINTR; return -EINTR;
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(bit_wait_timeout); EXPORT_SYMBOL_GPL(bit_wait_timeout);
...@@ -278,11 +290,13 @@ EXPORT_SYMBOL_GPL(bit_wait_timeout); ...@@ -278,11 +290,13 @@ EXPORT_SYMBOL_GPL(bit_wait_timeout);
__sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode) __sched int bit_wait_io_timeout(struct wait_bit_key *word, int mode)
{ {
unsigned long now = READ_ONCE(jiffies); unsigned long now = READ_ONCE(jiffies);
if (time_after_eq(now, word->timeout)) if (time_after_eq(now, word->timeout))
return -EAGAIN; return -EAGAIN;
io_schedule_timeout(word->timeout - now); io_schedule_timeout(word->timeout - now);
if (signal_pending_state(mode, current)) if (signal_pending_state(mode, current))
return -EINTR; return -EINTR;
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(bit_wait_io_timeout); EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment