Commit c94d89fa authored by Peter Zijlstra's avatar Peter Zijlstra

Merge branch 'sched/core'

parents 7c60610d 234b8ab6
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <linux/nodemask.h> #include <linux/nodemask.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/mmu_context.h>
#include <linux/jump_label.h> #include <linux/jump_label.h>
#ifdef CONFIG_CPUSETS #ifdef CONFIG_CPUSETS
...@@ -58,7 +59,7 @@ extern void cpuset_wait_for_hotplug(void); ...@@ -58,7 +59,7 @@ extern void cpuset_wait_for_hotplug(void);
extern void cpuset_read_lock(void); extern void cpuset_read_lock(void);
extern void cpuset_read_unlock(void); extern void cpuset_read_unlock(void);
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
extern void cpuset_cpus_allowed_fallback(struct task_struct *p); extern bool cpuset_cpus_allowed_fallback(struct task_struct *p);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
#define cpuset_current_mems_allowed (current->mems_allowed) #define cpuset_current_mems_allowed (current->mems_allowed)
void cpuset_init_current_mems_allowed(void); void cpuset_init_current_mems_allowed(void);
...@@ -184,11 +185,12 @@ static inline void cpuset_read_unlock(void) { } ...@@ -184,11 +185,12 @@ static inline void cpuset_read_unlock(void) { }
static inline void cpuset_cpus_allowed(struct task_struct *p, static inline void cpuset_cpus_allowed(struct task_struct *p,
struct cpumask *mask) struct cpumask *mask)
{ {
cpumask_copy(mask, cpu_possible_mask); cpumask_copy(mask, task_cpu_possible_mask(p));
} }
static inline void cpuset_cpus_allowed_fallback(struct task_struct *p) static inline bool cpuset_cpus_allowed_fallback(struct task_struct *p)
{ {
return false;
} }
static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
......
...@@ -14,4 +14,18 @@ ...@@ -14,4 +14,18 @@
static inline void leave_mm(int cpu) { } static inline void leave_mm(int cpu) { }
#endif #endif
/*
* CPUs that are capable of running user task @p. Must contain at least one
* active CPU. It is assumed that the kernel can run on all CPUs, so calling
* this for a kernel thread is pointless.
*
* By default, we assume a sane, homogeneous system.
*/
#ifndef task_cpu_possible_mask
# define task_cpu_possible_mask(p) cpu_possible_mask
# define task_cpu_possible(cpu, p) true
#else
# define task_cpu_possible(cpu, p) cpumask_test_cpu((cpu), task_cpu_possible_mask(p))
#endif
#endif #endif
...@@ -748,6 +748,7 @@ struct task_struct { ...@@ -748,6 +748,7 @@ struct task_struct {
unsigned int policy; unsigned int policy;
int nr_cpus_allowed; int nr_cpus_allowed;
const cpumask_t *cpus_ptr; const cpumask_t *cpus_ptr;
cpumask_t *user_cpus_ptr;
cpumask_t cpus_mask; cpumask_t cpus_mask;
void *migration_pending; void *migration_pending;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -1705,6 +1706,11 @@ extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_ ...@@ -1705,6 +1706,11 @@ extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node);
extern void release_user_cpus_ptr(struct task_struct *p);
extern int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask);
extern void force_compatible_cpus_allowed_ptr(struct task_struct *p);
extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p);
#else #else
static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) static inline void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
{ {
...@@ -1715,6 +1721,21 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma ...@@ -1715,6 +1721,21 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpuma
return -EINVAL; return -EINVAL;
return 0; return 0;
} }
static inline int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node)
{
if (src->user_cpus_ptr)
return -EINVAL;
return 0;
}
static inline void release_user_cpus_ptr(struct task_struct *p)
{
WARN_ON(p->user_cpus_ptr);
}
static inline int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
{
return 0;
}
#endif #endif
extern int yield_to(struct task_struct *p, bool preempt); extern int yield_to(struct task_struct *p, bool preempt);
......
...@@ -28,30 +28,12 @@ enum { sysctl_hung_task_timeout_secs = 0 }; ...@@ -28,30 +28,12 @@ enum { sysctl_hung_task_timeout_secs = 0 };
extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
enum sched_tunable_scaling { enum sched_tunable_scaling {
SCHED_TUNABLESCALING_NONE, SCHED_TUNABLESCALING_NONE,
SCHED_TUNABLESCALING_LOG, SCHED_TUNABLESCALING_LOG,
SCHED_TUNABLESCALING_LINEAR, SCHED_TUNABLESCALING_LINEAR,
SCHED_TUNABLESCALING_END, SCHED_TUNABLESCALING_END,
}; };
extern unsigned int sysctl_sched_tunable_scaling;
extern unsigned int sysctl_numa_balancing_scan_delay;
extern unsigned int sysctl_numa_balancing_scan_period_min;
extern unsigned int sysctl_numa_balancing_scan_period_max;
extern unsigned int sysctl_numa_balancing_scan_size;
#ifdef CONFIG_SCHED_DEBUG
extern __read_mostly unsigned int sysctl_sched_migration_cost;
extern __read_mostly unsigned int sysctl_sched_nr_migrate;
extern int sysctl_resched_latency_warn_ms;
extern int sysctl_resched_latency_warn_once;
#endif
/* /*
* control realtime throttling: * control realtime throttling:
......
...@@ -56,7 +56,7 @@ struct task_struct; ...@@ -56,7 +56,7 @@ struct task_struct;
#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ #define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \
.lock = __SPIN_LOCK_UNLOCKED(name.lock), \ .lock = __SPIN_LOCK_UNLOCKED(name.lock), \
.head = { &(name).head, &(name).head } } .head = LIST_HEAD_INIT(name.head) }
#define DECLARE_WAIT_QUEUE_HEAD(name) \ #define DECLARE_WAIT_QUEUE_HEAD(name) \
struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name) struct wait_queue_head name = __WAIT_QUEUE_HEAD_INITIALIZER(name)
......
...@@ -80,6 +80,7 @@ struct task_struct init_task ...@@ -80,6 +80,7 @@ struct task_struct init_task
.normal_prio = MAX_PRIO - 20, .normal_prio = MAX_PRIO - 20,
.policy = SCHED_NORMAL, .policy = SCHED_NORMAL,
.cpus_ptr = &init_task.cpus_mask, .cpus_ptr = &init_task.cpus_mask,
.user_cpus_ptr = NULL,
.cpus_mask = CPU_MASK_ALL, .cpus_mask = CPU_MASK_ALL,
.nr_cpus_allowed= NR_CPUS, .nr_cpus_allowed= NR_CPUS,
.mm = NULL, .mm = NULL,
......
...@@ -372,18 +372,29 @@ static inline bool is_in_v2_mode(void) ...@@ -372,18 +372,29 @@ static inline bool is_in_v2_mode(void)
} }
/* /*
* Return in pmask the portion of a cpusets's cpus_allowed that * Return in pmask the portion of a task's cpusets's cpus_allowed that
* are online. If none are online, walk up the cpuset hierarchy * are online and are capable of running the task. If none are found,
* until we find one that does have some online cpus. * walk up the cpuset hierarchy until we find one that does have some
* appropriate cpus.
* *
* One way or another, we guarantee to return some non-empty subset * One way or another, we guarantee to return some non-empty subset
* of cpu_online_mask. * of cpu_online_mask.
* *
* Call with callback_lock or cpuset_mutex held. * Call with callback_lock or cpuset_mutex held.
*/ */
static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) static void guarantee_online_cpus(struct task_struct *tsk,
struct cpumask *pmask)
{ {
while (!cpumask_intersects(cs->effective_cpus, cpu_online_mask)) { const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);
struct cpuset *cs;
if (WARN_ON(!cpumask_and(pmask, possible_mask, cpu_online_mask)))
cpumask_copy(pmask, cpu_online_mask);
rcu_read_lock();
cs = task_cs(tsk);
while (!cpumask_intersects(cs->effective_cpus, pmask)) {
cs = parent_cs(cs); cs = parent_cs(cs);
if (unlikely(!cs)) { if (unlikely(!cs)) {
/* /*
...@@ -393,11 +404,13 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask) ...@@ -393,11 +404,13 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
* cpuset's effective_cpus is on its way to be * cpuset's effective_cpus is on its way to be
* identical to cpu_online_mask. * identical to cpu_online_mask.
*/ */
cpumask_copy(pmask, cpu_online_mask); goto out_unlock;
return;
} }
} }
cpumask_and(pmask, cs->effective_cpus, cpu_online_mask); cpumask_and(pmask, pmask, cs->effective_cpus);
out_unlock:
rcu_read_unlock();
} }
/* /*
...@@ -2199,15 +2212,13 @@ static void cpuset_attach(struct cgroup_taskset *tset) ...@@ -2199,15 +2212,13 @@ static void cpuset_attach(struct cgroup_taskset *tset)
percpu_down_write(&cpuset_rwsem); percpu_down_write(&cpuset_rwsem);
/* prepare for attach */
if (cs == &top_cpuset)
cpumask_copy(cpus_attach, cpu_possible_mask);
else
guarantee_online_cpus(cs, cpus_attach);
guarantee_online_mems(cs, &cpuset_attach_nodemask_to); guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
cgroup_taskset_for_each(task, css, tset) { cgroup_taskset_for_each(task, css, tset) {
if (cs != &top_cpuset)
guarantee_online_cpus(task, cpus_attach);
else
cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
/* /*
* can_attach beforehand should guarantee that this doesn't * can_attach beforehand should guarantee that this doesn't
* fail. TODO: have a better way to handle failure here * fail. TODO: have a better way to handle failure here
...@@ -3302,9 +3313,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) ...@@ -3302,9 +3313,7 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&callback_lock, flags); spin_lock_irqsave(&callback_lock, flags);
rcu_read_lock(); guarantee_online_cpus(tsk, pmask);
guarantee_online_cpus(task_cs(tsk), pmask);
rcu_read_unlock();
spin_unlock_irqrestore(&callback_lock, flags); spin_unlock_irqrestore(&callback_lock, flags);
} }
...@@ -3318,13 +3327,22 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) ...@@ -3318,13 +3327,22 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
* which will not contain a sane cpumask during cases such as cpu hotplugging. * which will not contain a sane cpumask during cases such as cpu hotplugging.
* This is the absolute last resort for the scheduler and it is only used if * This is the absolute last resort for the scheduler and it is only used if
* _every_ other avenue has been traveled. * _every_ other avenue has been traveled.
*
* Returns true if the affinity of @tsk was changed, false otherwise.
**/ **/
void cpuset_cpus_allowed_fallback(struct task_struct *tsk) bool cpuset_cpus_allowed_fallback(struct task_struct *tsk)
{ {
const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);
const struct cpumask *cs_mask;
bool changed = false;
rcu_read_lock(); rcu_read_lock();
do_set_cpus_allowed(tsk, is_in_v2_mode() ? cs_mask = task_cs(tsk)->cpus_allowed;
task_cs(tsk)->cpus_allowed : cpu_possible_mask); if (is_in_v2_mode() && cpumask_subset(cs_mask, possible_mask)) {
do_set_cpus_allowed(tsk, cs_mask);
changed = true;
}
rcu_read_unlock(); rcu_read_unlock();
/* /*
...@@ -3344,6 +3362,7 @@ void cpuset_cpus_allowed_fallback(struct task_struct *tsk) ...@@ -3344,6 +3362,7 @@ void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
* select_fallback_rq() will fix things ups and set cpu_possible_mask * select_fallback_rq() will fix things ups and set cpu_possible_mask
* if required. * if required.
*/ */
return changed;
} }
void __init cpuset_init_current_mems_allowed(void) void __init cpuset_init_current_mems_allowed(void)
......
...@@ -446,6 +446,7 @@ void put_task_stack(struct task_struct *tsk) ...@@ -446,6 +446,7 @@ void put_task_stack(struct task_struct *tsk)
void free_task(struct task_struct *tsk) void free_task(struct task_struct *tsk)
{ {
release_user_cpus_ptr(tsk);
scs_release(tsk); scs_release(tsk);
#ifndef CONFIG_THREAD_INFO_IN_TASK #ifndef CONFIG_THREAD_INFO_IN_TASK
...@@ -924,6 +925,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) ...@@ -924,6 +925,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
#endif #endif
if (orig->cpus_ptr == &orig->cpus_mask) if (orig->cpus_ptr == &orig->cpus_mask)
tsk->cpus_ptr = &tsk->cpus_mask; tsk->cpus_ptr = &tsk->cpus_mask;
dup_user_cpus_ptr(tsk, orig, node);
/* /*
* One for the user space visible state that goes away when reaped. * One for the user space visible state that goes away when reaped.
......
This diff is collapsed.
...@@ -1733,6 +1733,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused ...@@ -1733,6 +1733,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
*/ */
raw_spin_rq_lock(rq); raw_spin_rq_lock(rq);
if (p->dl.dl_non_contending) { if (p->dl.dl_non_contending) {
update_rq_clock(rq);
sub_running_bw(&p->dl, &rq->dl); sub_running_bw(&p->dl, &rq->dl);
p->dl.dl_non_contending = 0; p->dl.dl_non_contending = 0;
/* /*
...@@ -2741,7 +2742,7 @@ void __setparam_dl(struct task_struct *p, const struct sched_attr *attr) ...@@ -2741,7 +2742,7 @@ void __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
dl_se->dl_runtime = attr->sched_runtime; dl_se->dl_runtime = attr->sched_runtime;
dl_se->dl_deadline = attr->sched_deadline; dl_se->dl_deadline = attr->sched_deadline;
dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline; dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
dl_se->flags = attr->sched_flags; dl_se->flags = attr->sched_flags & SCHED_DL_FLAGS;
dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime); dl_se->dl_density = to_ratio(dl_se->dl_deadline, dl_se->dl_runtime);
} }
...@@ -2754,7 +2755,8 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr) ...@@ -2754,7 +2755,8 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
attr->sched_runtime = dl_se->dl_runtime; attr->sched_runtime = dl_se->dl_runtime;
attr->sched_deadline = dl_se->dl_deadline; attr->sched_deadline = dl_se->dl_deadline;
attr->sched_period = dl_se->dl_period; attr->sched_period = dl_se->dl_period;
attr->sched_flags = dl_se->flags; attr->sched_flags &= ~SCHED_DL_FLAGS;
attr->sched_flags |= dl_se->flags;
} }
/* /*
...@@ -2851,7 +2853,7 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr) ...@@ -2851,7 +2853,7 @@ bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr)
if (dl_se->dl_runtime != attr->sched_runtime || if (dl_se->dl_runtime != attr->sched_runtime ||
dl_se->dl_deadline != attr->sched_deadline || dl_se->dl_deadline != attr->sched_deadline ||
dl_se->dl_period != attr->sched_period || dl_se->dl_period != attr->sched_period ||
dl_se->flags != attr->sched_flags) dl_se->flags != (attr->sched_flags & SCHED_DL_FLAGS))
return true; return true;
return false; return false;
......
...@@ -388,6 +388,13 @@ void update_sched_domain_debugfs(void) ...@@ -388,6 +388,13 @@ void update_sched_domain_debugfs(void)
{ {
int cpu, i; int cpu, i;
/*
* This can unfortunately be invoked before sched_debug_init() creates
* the debug directory. Don't touch sd_sysctl_cpus until then.
*/
if (!debugfs_sched)
return;
if (!cpumask_available(sd_sysctl_cpus)) { if (!cpumask_available(sd_sysctl_cpus)) {
if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL)) if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
return; return;
...@@ -600,6 +607,9 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) ...@@ -600,6 +607,9 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over",
cfs_rq->nr_spread_over); cfs_rq->nr_spread_over);
SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running); SEQ_printf(m, " .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
SEQ_printf(m, " .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running);
SEQ_printf(m, " .%-30s: %d\n", "idle_h_nr_running",
cfs_rq->idle_h_nr_running);
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
SEQ_printf(m, " .%-30s: %lu\n", "load_avg", SEQ_printf(m, " .%-30s: %lu\n", "load_avg",
......
This diff is collapsed.
...@@ -227,6 +227,8 @@ static inline void update_avg(u64 *avg, u64 sample) ...@@ -227,6 +227,8 @@ static inline void update_avg(u64 *avg, u64 sample)
*/ */
#define SCHED_FLAG_SUGOV 0x10000000 #define SCHED_FLAG_SUGOV 0x10000000
#define SCHED_DL_FLAGS (SCHED_FLAG_RECLAIM | SCHED_FLAG_DL_OVERRUN | SCHED_FLAG_SUGOV)
static inline bool dl_entity_is_special(struct sched_dl_entity *dl_se) static inline bool dl_entity_is_special(struct sched_dl_entity *dl_se)
{ {
#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL #ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
...@@ -394,6 +396,9 @@ struct task_group { ...@@ -394,6 +396,9 @@ struct task_group {
struct cfs_rq **cfs_rq; struct cfs_rq **cfs_rq;
unsigned long shares; unsigned long shares;
/* A positive value indicates that this is a SCHED_IDLE group. */
int idle;
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* /*
* load_avg can be heavily contended at clock tick time, so put * load_avg can be heavily contended at clock tick time, so put
...@@ -503,6 +508,8 @@ extern void sched_move_task(struct task_struct *tsk); ...@@ -503,6 +508,8 @@ extern void sched_move_task(struct task_struct *tsk);
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
extern int sched_group_set_idle(struct task_group *tg, long idle);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
extern void set_task_rq_fair(struct sched_entity *se, extern void set_task_rq_fair(struct sched_entity *se,
struct cfs_rq *prev, struct cfs_rq *next); struct cfs_rq *prev, struct cfs_rq *next);
...@@ -599,6 +606,9 @@ struct cfs_rq { ...@@ -599,6 +606,9 @@ struct cfs_rq {
struct list_head leaf_cfs_rq_list; struct list_head leaf_cfs_rq_list;
struct task_group *tg; /* group that "owns" this runqueue */ struct task_group *tg; /* group that "owns" this runqueue */
/* Locally cached copy of our task_group's idle value */
int idle;
#ifdef CONFIG_CFS_BANDWIDTH #ifdef CONFIG_CFS_BANDWIDTH
int runtime_enabled; int runtime_enabled;
s64 runtime_remaining; s64 runtime_remaining;
...@@ -2234,6 +2244,7 @@ extern struct task_struct *pick_next_task_idle(struct rq *rq); ...@@ -2234,6 +2244,7 @@ extern struct task_struct *pick_next_task_idle(struct rq *rq);
#define SCA_CHECK 0x01 #define SCA_CHECK 0x01
#define SCA_MIGRATE_DISABLE 0x02 #define SCA_MIGRATE_DISABLE 0x02
#define SCA_MIGRATE_ENABLE 0x04 #define SCA_MIGRATE_ENABLE 0x04
#define SCA_USER 0x08
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
...@@ -2385,6 +2396,21 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags); ...@@ -2385,6 +2396,21 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
extern const_debug unsigned int sysctl_sched_nr_migrate; extern const_debug unsigned int sysctl_sched_nr_migrate;
extern const_debug unsigned int sysctl_sched_migration_cost; extern const_debug unsigned int sysctl_sched_migration_cost;
#ifdef CONFIG_SCHED_DEBUG
extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern int sysctl_resched_latency_warn_ms;
extern int sysctl_resched_latency_warn_once;
extern unsigned int sysctl_sched_tunable_scaling;
extern unsigned int sysctl_numa_balancing_scan_delay;
extern unsigned int sysctl_numa_balancing_scan_period_min;
extern unsigned int sysctl_numa_balancing_scan_period_max;
extern unsigned int sysctl_numa_balancing_scan_size;
#endif
#ifdef CONFIG_SCHED_HRTICK #ifdef CONFIG_SCHED_HRTICK
/* /*
......
...@@ -1482,6 +1482,8 @@ int sched_max_numa_distance; ...@@ -1482,6 +1482,8 @@ int sched_max_numa_distance;
static int *sched_domains_numa_distance; static int *sched_domains_numa_distance;
static struct cpumask ***sched_domains_numa_masks; static struct cpumask ***sched_domains_numa_masks;
int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE; int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE;
static unsigned long __read_mostly *sched_numa_onlined_nodes;
#endif #endif
/* /*
...@@ -1833,6 +1835,16 @@ void sched_init_numa(void) ...@@ -1833,6 +1835,16 @@ void sched_init_numa(void)
sched_domains_numa_masks[i][j] = mask; sched_domains_numa_masks[i][j] = mask;
for_each_node(k) { for_each_node(k) {
/*
* Distance information can be unreliable for
* offline nodes, defer building the node
* masks to its bringup.
* This relies on all unique distance values
* still being visible at init time.
*/
if (!node_online(j))
continue;
if (sched_debug() && (node_distance(j, k) != node_distance(k, j))) if (sched_debug() && (node_distance(j, k) != node_distance(k, j)))
sched_numa_warn("Node-distance not symmetric"); sched_numa_warn("Node-distance not symmetric");
...@@ -1886,6 +1898,53 @@ void sched_init_numa(void) ...@@ -1886,6 +1898,53 @@ void sched_init_numa(void)
sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1]; sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1];
init_numa_topology_type(); init_numa_topology_type();
sched_numa_onlined_nodes = bitmap_alloc(nr_node_ids, GFP_KERNEL);
if (!sched_numa_onlined_nodes)
return;
bitmap_zero(sched_numa_onlined_nodes, nr_node_ids);
for_each_online_node(i)
bitmap_set(sched_numa_onlined_nodes, i, 1);
}
static void __sched_domains_numa_masks_set(unsigned int node)
{
int i, j;
/*
* NUMA masks are not built for offline nodes in sched_init_numa().
* Thus, when a CPU of a never-onlined-before node gets plugged in,
* adding that new CPU to the right NUMA masks is not sufficient: the
* masks of that CPU's node must also be updated.
*/
if (test_bit(node, sched_numa_onlined_nodes))
return;
bitmap_set(sched_numa_onlined_nodes, node, 1);
for (i = 0; i < sched_domains_numa_levels; i++) {
for (j = 0; j < nr_node_ids; j++) {
if (!node_online(j) || node == j)
continue;
if (node_distance(j, node) > sched_domains_numa_distance[i])
continue;
/* Add remote nodes in our masks */
cpumask_or(sched_domains_numa_masks[i][node],
sched_domains_numa_masks[i][node],
sched_domains_numa_masks[0][j]);
}
}
/*
* A new node has been brought up, potentially changing the topology
* classification.
*
* Note that this is racy vs any use of sched_numa_topology_type :/
*/
init_numa_topology_type();
} }
void sched_domains_numa_masks_set(unsigned int cpu) void sched_domains_numa_masks_set(unsigned int cpu)
...@@ -1893,8 +1952,14 @@ void sched_domains_numa_masks_set(unsigned int cpu) ...@@ -1893,8 +1952,14 @@ void sched_domains_numa_masks_set(unsigned int cpu)
int node = cpu_to_node(cpu); int node = cpu_to_node(cpu);
int i, j; int i, j;
__sched_domains_numa_masks_set(node);
for (i = 0; i < sched_domains_numa_levels; i++) { for (i = 0; i < sched_domains_numa_levels; i++) {
for (j = 0; j < nr_node_ids; j++) { for (j = 0; j < nr_node_ids; j++) {
if (!node_online(j))
continue;
/* Set ourselves in the remote node's masks */
if (node_distance(j, node) <= sched_domains_numa_distance[i]) if (node_distance(j, node) <= sched_domains_numa_distance[i])
cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]); cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment