Commit 12fa97c6 authored by Peter Zijlstra's avatar Peter Zijlstra

Merge branch 'sched/migrate-disable'

parents b6d37a76 c777d847
...@@ -382,9 +382,9 @@ static inline void task_context_switch_counts(struct seq_file *m, ...@@ -382,9 +382,9 @@ static inline void task_context_switch_counts(struct seq_file *m,
static void task_cpus_allowed(struct seq_file *m, struct task_struct *task) static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
{ {
seq_printf(m, "Cpus_allowed:\t%*pb\n", seq_printf(m, "Cpus_allowed:\t%*pb\n",
cpumask_pr_args(task->cpus_ptr)); cpumask_pr_args(&task->cpus_mask));
seq_printf(m, "Cpus_allowed_list:\t%*pbl\n", seq_printf(m, "Cpus_allowed_list:\t%*pbl\n",
cpumask_pr_args(task->cpus_ptr)); cpumask_pr_args(&task->cpus_mask));
} }
static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm) static inline void task_core_dumping(struct seq_file *m, struct mm_struct *mm)
......
...@@ -152,6 +152,7 @@ enum cpuhp_state { ...@@ -152,6 +152,7 @@ enum cpuhp_state {
CPUHP_AP_ONLINE, CPUHP_AP_ONLINE,
CPUHP_TEARDOWN_CPU, CPUHP_TEARDOWN_CPU,
CPUHP_AP_ONLINE_IDLE, CPUHP_AP_ONLINE_IDLE,
CPUHP_AP_SCHED_WAIT_EMPTY,
CPUHP_AP_SMPBOOT_THREADS, CPUHP_AP_SMPBOOT_THREADS,
CPUHP_AP_X86_VDSO_VMA_ONLINE, CPUHP_AP_X86_VDSO_VMA_ONLINE,
CPUHP_AP_IRQ_AFFINITY_ONLINE, CPUHP_AP_IRQ_AFFINITY_ONLINE,
......
...@@ -199,6 +199,11 @@ static inline int cpumask_any_and_distribute(const struct cpumask *src1p, ...@@ -199,6 +199,11 @@ static inline int cpumask_any_and_distribute(const struct cpumask *src1p,
return cpumask_next_and(-1, src1p, src2p); return cpumask_next_and(-1, src1p, src2p);
} }
static inline int cpumask_any_distribute(const struct cpumask *srcp)
{
return cpumask_first(srcp);
}
#define for_each_cpu(cpu, mask) \ #define for_each_cpu(cpu, mask) \
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
#define for_each_cpu_not(cpu, mask) \ #define for_each_cpu_not(cpu, mask) \
...@@ -252,6 +257,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu); ...@@ -252,6 +257,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
unsigned int cpumask_local_spread(unsigned int i, int node); unsigned int cpumask_local_spread(unsigned int i, int node);
int cpumask_any_and_distribute(const struct cpumask *src1p, int cpumask_any_and_distribute(const struct cpumask *src1p,
const struct cpumask *src2p); const struct cpumask *src2p);
int cpumask_any_distribute(const struct cpumask *srcp);
/** /**
* for_each_cpu - iterate over every cpu in a mask * for_each_cpu - iterate over every cpu in a mask
......
...@@ -322,6 +322,73 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier, ...@@ -322,6 +322,73 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
#endif #endif
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
/*
* Migrate-Disable and why it is undesired.
*
* When a preempted task becomes elegible to run under the ideal model (IOW it
* becomes one of the M highest priority tasks), it might still have to wait
* for the preemptee's migrate_disable() section to complete. Thereby suffering
* a reduction in bandwidth in the exact duration of the migrate_disable()
* section.
*
* Per this argument, the change from preempt_disable() to migrate_disable()
* gets us:
*
* - a higher priority tasks gains reduced wake-up latency; with preempt_disable()
* it would have had to wait for the lower priority task.
*
* - a lower priority tasks; which under preempt_disable() could've instantly
* migrated away when another CPU becomes available, is now constrained
* by the ability to push the higher priority task away, which might itself be
* in a migrate_disable() section, reducing it's available bandwidth.
*
* IOW it trades latency / moves the interference term, but it stays in the
* system, and as long as it remains unbounded, the system is not fully
* deterministic.
*
*
* The reason we have it anyway.
*
* PREEMPT_RT breaks a number of assumptions traditionally held. By forcing a
* number of primitives into becoming preemptible, they would also allow
* migration. This turns out to break a bunch of per-cpu usage. To this end,
* all these primitives employ migirate_disable() to restore this implicit
* assumption.
*
* This is a 'temporary' work-around at best. The correct solution is getting
* rid of the above assumptions and reworking the code to employ explicit
* per-cpu locking or short preempt-disable regions.
*
* The end goal must be to get rid of migrate_disable(), alternatively we need
* a schedulability theory that does not depend on abritrary migration.
*
*
* Notes on the implementation.
*
* The implementation is particularly tricky since existing code patterns
* dictate neither migrate_disable() nor migrate_enable() is allowed to block.
* This means that it cannot use cpus_read_lock() to serialize against hotplug,
* nor can it easily migrate itself into a pending affinity mask change on
* migrate_enable().
*
*
* Note: even non-work-conserving schedulers like semi-partitioned depends on
* migration, so migrate_disable() is not only a problem for
* work-conserving schedulers.
*
*/
extern void migrate_disable(void);
extern void migrate_enable(void);
#elif defined(CONFIG_PREEMPT_RT)
static inline void migrate_disable(void) { }
static inline void migrate_enable(void) { }
#else /* !CONFIG_PREEMPT_RT */
/** /**
* migrate_disable - Prevent migration of the current task * migrate_disable - Prevent migration of the current task
* *
...@@ -352,4 +419,6 @@ static __always_inline void migrate_enable(void) ...@@ -352,4 +419,6 @@ static __always_inline void migrate_enable(void)
preempt_enable(); preempt_enable();
} }
#endif /* CONFIG_SMP && CONFIG_PREEMPT_RT */
#endif /* __LINUX_PREEMPT_H */ #endif /* __LINUX_PREEMPT_H */
...@@ -714,6 +714,11 @@ struct task_struct { ...@@ -714,6 +714,11 @@ struct task_struct {
int nr_cpus_allowed; int nr_cpus_allowed;
const cpumask_t *cpus_ptr; const cpumask_t *cpus_ptr;
cpumask_t cpus_mask; cpumask_t cpus_mask;
void *migration_pending;
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
unsigned short migration_disabled;
#endif
unsigned short migration_flags;
#ifdef CONFIG_PREEMPT_RCU #ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting; int rcu_read_lock_nesting;
......
...@@ -11,8 +11,10 @@ extern int sched_cpu_activate(unsigned int cpu); ...@@ -11,8 +11,10 @@ extern int sched_cpu_activate(unsigned int cpu);
extern int sched_cpu_deactivate(unsigned int cpu); extern int sched_cpu_deactivate(unsigned int cpu);
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
extern int sched_cpu_wait_empty(unsigned int cpu);
extern int sched_cpu_dying(unsigned int cpu); extern int sched_cpu_dying(unsigned int cpu);
#else #else
# define sched_cpu_wait_empty NULL
# define sched_cpu_dying NULL # define sched_cpu_dying NULL
#endif #endif
......
...@@ -24,6 +24,7 @@ typedef int (*cpu_stop_fn_t)(void *arg); ...@@ -24,6 +24,7 @@ typedef int (*cpu_stop_fn_t)(void *arg);
struct cpu_stop_work { struct cpu_stop_work {
struct list_head list; /* cpu_stopper->works */ struct list_head list; /* cpu_stopper->works */
cpu_stop_fn_t fn; cpu_stop_fn_t fn;
unsigned long caller;
void *arg; void *arg;
struct cpu_stop_done *done; struct cpu_stop_done *done;
}; };
...@@ -36,6 +37,8 @@ void stop_machine_park(int cpu); ...@@ -36,6 +37,8 @@ void stop_machine_park(int cpu);
void stop_machine_unpark(int cpu); void stop_machine_unpark(int cpu);
void stop_machine_yield(const struct cpumask *cpumask); void stop_machine_yield(const struct cpumask *cpumask);
extern void print_stop_info(const char *log_lvl, struct task_struct *task);
#else /* CONFIG_SMP */ #else /* CONFIG_SMP */
#include <linux/workqueue.h> #include <linux/workqueue.h>
...@@ -80,6 +83,8 @@ static inline bool stop_one_cpu_nowait(unsigned int cpu, ...@@ -80,6 +83,8 @@ static inline bool stop_one_cpu_nowait(unsigned int cpu,
return false; return false;
} }
static inline void print_stop_info(const char *log_lvl, struct task_struct *task) { }
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
/* /*
......
...@@ -1602,7 +1602,7 @@ static struct cpuhp_step cpuhp_hp_states[] = { ...@@ -1602,7 +1602,7 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.name = "ap:online", .name = "ap:online",
}, },
/* /*
* Handled on controll processor until the plugged processor manages * Handled on control processor until the plugged processor manages
* this itself. * this itself.
*/ */
[CPUHP_TEARDOWN_CPU] = { [CPUHP_TEARDOWN_CPU] = {
...@@ -1611,6 +1611,13 @@ static struct cpuhp_step cpuhp_hp_states[] = { ...@@ -1611,6 +1611,13 @@ static struct cpuhp_step cpuhp_hp_states[] = {
.teardown.single = takedown_cpu, .teardown.single = takedown_cpu,
.cant_stop = true, .cant_stop = true,
}, },
[CPUHP_AP_SCHED_WAIT_EMPTY] = {
.name = "sched:waitempty",
.startup.single = NULL,
.teardown.single = sched_cpu_wait_empty,
},
/* Handle smpboot threads park/unpark */ /* Handle smpboot threads park/unpark */
[CPUHP_AP_SMPBOOT_THREADS] = { [CPUHP_AP_SMPBOOT_THREADS] = {
.name = "smpboot/threads:online", .name = "smpboot/threads:online",
......
This diff is collapsed.
...@@ -120,7 +120,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, ...@@ -120,7 +120,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
const struct sched_dl_entity *dl_se = &p->dl; const struct sched_dl_entity *dl_se = &p->dl;
if (later_mask && if (later_mask &&
cpumask_and(later_mask, cp->free_cpus, p->cpus_ptr)) { cpumask_and(later_mask, cp->free_cpus, &p->cpus_mask)) {
unsigned long cap, max_cap = 0; unsigned long cap, max_cap = 0;
int cpu, max_cpu = -1; int cpu, max_cpu = -1;
...@@ -151,7 +151,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p, ...@@ -151,7 +151,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,
WARN_ON(best_cpu != -1 && !cpu_present(best_cpu)); WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));
if (cpumask_test_cpu(best_cpu, p->cpus_ptr) && if (cpumask_test_cpu(best_cpu, &p->cpus_mask) &&
dl_time_before(dl_se->deadline, cp->elements[0].dl)) { dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
if (later_mask) if (later_mask)
cpumask_set_cpu(best_cpu, later_mask); cpumask_set_cpu(best_cpu, later_mask);
......
...@@ -97,11 +97,11 @@ static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p, ...@@ -97,11 +97,11 @@ static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p,
if (skip) if (skip)
return 0; return 0;
if (cpumask_any_and(p->cpus_ptr, vec->mask) >= nr_cpu_ids) if (cpumask_any_and(&p->cpus_mask, vec->mask) >= nr_cpu_ids)
return 0; return 0;
if (lowest_mask) { if (lowest_mask) {
cpumask_and(lowest_mask, p->cpus_ptr, vec->mask); cpumask_and(lowest_mask, &p->cpus_mask, vec->mask);
/* /*
* We have to ensure that we have at least one bit * We have to ensure that we have at least one bit
......
...@@ -559,7 +559,7 @@ static int push_dl_task(struct rq *rq); ...@@ -559,7 +559,7 @@ static int push_dl_task(struct rq *rq);
static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev) static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev)
{ {
return dl_task(prev); return rq->online && dl_task(prev);
} }
static DEFINE_PER_CPU(struct callback_head, dl_push_head); static DEFINE_PER_CPU(struct callback_head, dl_push_head);
...@@ -1931,7 +1931,7 @@ static void task_fork_dl(struct task_struct *p) ...@@ -1931,7 +1931,7 @@ static void task_fork_dl(struct task_struct *p)
static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu)
{ {
if (!task_running(rq, p) && if (!task_running(rq, p) &&
cpumask_test_cpu(cpu, p->cpus_ptr)) cpumask_test_cpu(cpu, &p->cpus_mask))
return 1; return 1;
return 0; return 0;
} }
...@@ -2021,8 +2021,8 @@ static int find_later_rq(struct task_struct *task) ...@@ -2021,8 +2021,8 @@ static int find_later_rq(struct task_struct *task)
return this_cpu; return this_cpu;
} }
best_cpu = cpumask_first_and(later_mask, best_cpu = cpumask_any_and_distribute(later_mask,
sched_domain_span(sd)); sched_domain_span(sd));
/* /*
* Last chance: if a CPU being in both later_mask * Last chance: if a CPU being in both later_mask
* and current sd span is valid, that becomes our * and current sd span is valid, that becomes our
...@@ -2044,7 +2044,7 @@ static int find_later_rq(struct task_struct *task) ...@@ -2044,7 +2044,7 @@ static int find_later_rq(struct task_struct *task)
if (this_cpu != -1) if (this_cpu != -1)
return this_cpu; return this_cpu;
cpu = cpumask_any(later_mask); cpu = cpumask_any_distribute(later_mask);
if (cpu < nr_cpu_ids) if (cpu < nr_cpu_ids)
return cpu; return cpu;
...@@ -2081,7 +2081,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) ...@@ -2081,7 +2081,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
/* Retry if something changed. */ /* Retry if something changed. */
if (double_lock_balance(rq, later_rq)) { if (double_lock_balance(rq, later_rq)) {
if (unlikely(task_rq(task) != rq || if (unlikely(task_rq(task) != rq ||
!cpumask_test_cpu(later_rq->cpu, task->cpus_ptr) || !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
task_running(rq, task) || task_running(rq, task) ||
!dl_task(task) || !dl_task(task) ||
!task_on_rq_queued(task))) { !task_on_rq_queued(task))) {
...@@ -2148,6 +2148,9 @@ static int push_dl_task(struct rq *rq) ...@@ -2148,6 +2148,9 @@ static int push_dl_task(struct rq *rq)
return 0; return 0;
retry: retry:
if (is_migration_disabled(next_task))
return 0;
if (WARN_ON(next_task == rq->curr)) if (WARN_ON(next_task == rq->curr))
return 0; return 0;
...@@ -2225,7 +2228,7 @@ static void push_dl_tasks(struct rq *rq) ...@@ -2225,7 +2228,7 @@ static void push_dl_tasks(struct rq *rq)
static void pull_dl_task(struct rq *this_rq) static void pull_dl_task(struct rq *this_rq)
{ {
int this_cpu = this_rq->cpu, cpu; int this_cpu = this_rq->cpu, cpu;
struct task_struct *p; struct task_struct *p, *push_task;
bool resched = false; bool resched = false;
struct rq *src_rq; struct rq *src_rq;
u64 dmin = LONG_MAX; u64 dmin = LONG_MAX;
...@@ -2255,6 +2258,7 @@ static void pull_dl_task(struct rq *this_rq) ...@@ -2255,6 +2258,7 @@ static void pull_dl_task(struct rq *this_rq)
continue; continue;
/* Might drop this_rq->lock */ /* Might drop this_rq->lock */
push_task = NULL;
double_lock_balance(this_rq, src_rq); double_lock_balance(this_rq, src_rq);
/* /*
...@@ -2286,17 +2290,27 @@ static void pull_dl_task(struct rq *this_rq) ...@@ -2286,17 +2290,27 @@ static void pull_dl_task(struct rq *this_rq)
src_rq->curr->dl.deadline)) src_rq->curr->dl.deadline))
goto skip; goto skip;
resched = true; if (is_migration_disabled(p)) {
push_task = get_push_task(src_rq);
deactivate_task(src_rq, p, 0); } else {
set_task_cpu(p, this_cpu); deactivate_task(src_rq, p, 0);
activate_task(this_rq, p, 0); set_task_cpu(p, this_cpu);
dmin = p->dl.deadline; activate_task(this_rq, p, 0);
dmin = p->dl.deadline;
resched = true;
}
/* Is there any other task even earlier? */ /* Is there any other task even earlier? */
} }
skip: skip:
double_unlock_balance(this_rq, src_rq); double_unlock_balance(this_rq, src_rq);
if (push_task) {
raw_spin_unlock(&this_rq->lock);
stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
push_task, &src_rq->push_work);
raw_spin_lock(&this_rq->lock);
}
} }
if (resched) if (resched)
...@@ -2320,7 +2334,8 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p) ...@@ -2320,7 +2334,8 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p)
} }
static void set_cpus_allowed_dl(struct task_struct *p, static void set_cpus_allowed_dl(struct task_struct *p,
const struct cpumask *new_mask) const struct cpumask *new_mask,
u32 flags)
{ {
struct root_domain *src_rd; struct root_domain *src_rd;
struct rq *rq; struct rq *rq;
...@@ -2349,7 +2364,7 @@ static void set_cpus_allowed_dl(struct task_struct *p, ...@@ -2349,7 +2364,7 @@ static void set_cpus_allowed_dl(struct task_struct *p,
raw_spin_unlock(&src_dl_b->lock); raw_spin_unlock(&src_dl_b->lock);
} }
set_cpus_allowed_common(p, new_mask); set_cpus_allowed_common(p, new_mask, flags);
} }
/* Assumes rq->lock is held */ /* Assumes rq->lock is held */
...@@ -2542,6 +2557,7 @@ DEFINE_SCHED_CLASS(dl) = { ...@@ -2542,6 +2557,7 @@ DEFINE_SCHED_CLASS(dl) = {
.rq_online = rq_online_dl, .rq_online = rq_online_dl,
.rq_offline = rq_offline_dl, .rq_offline = rq_offline_dl,
.task_woken = task_woken_dl, .task_woken = task_woken_dl,
.find_lock_rq = find_lock_later_rq,
#endif #endif
.task_tick = task_tick_dl, .task_tick = task_tick_dl,
......
...@@ -265,7 +265,7 @@ static void pull_rt_task(struct rq *this_rq); ...@@ -265,7 +265,7 @@ static void pull_rt_task(struct rq *this_rq);
static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev)
{ {
/* Try to pull RT tasks here if we lower this rq's prio */ /* Try to pull RT tasks here if we lower this rq's prio */
return rq->rt.highest_prio.curr > prev->prio; return rq->online && rq->rt.highest_prio.curr > prev->prio;
} }
static inline int rt_overloaded(struct rq *rq) static inline int rt_overloaded(struct rq *rq)
...@@ -1660,7 +1660,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) ...@@ -1660,7 +1660,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
{ {
if (!task_running(rq, p) && if (!task_running(rq, p) &&
cpumask_test_cpu(cpu, p->cpus_ptr)) cpumask_test_cpu(cpu, &p->cpus_mask))
return 1; return 1;
return 0; return 0;
...@@ -1754,8 +1754,8 @@ static int find_lowest_rq(struct task_struct *task) ...@@ -1754,8 +1754,8 @@ static int find_lowest_rq(struct task_struct *task)
return this_cpu; return this_cpu;
} }
best_cpu = cpumask_first_and(lowest_mask, best_cpu = cpumask_any_and_distribute(lowest_mask,
sched_domain_span(sd)); sched_domain_span(sd));
if (best_cpu < nr_cpu_ids) { if (best_cpu < nr_cpu_ids) {
rcu_read_unlock(); rcu_read_unlock();
return best_cpu; return best_cpu;
...@@ -1772,7 +1772,7 @@ static int find_lowest_rq(struct task_struct *task) ...@@ -1772,7 +1772,7 @@ static int find_lowest_rq(struct task_struct *task)
if (this_cpu != -1) if (this_cpu != -1)
return this_cpu; return this_cpu;
cpu = cpumask_any(lowest_mask); cpu = cpumask_any_distribute(lowest_mask);
if (cpu < nr_cpu_ids) if (cpu < nr_cpu_ids)
return cpu; return cpu;
...@@ -1813,7 +1813,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) ...@@ -1813,7 +1813,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
* Also make sure that it wasn't scheduled on its rq. * Also make sure that it wasn't scheduled on its rq.
*/ */
if (unlikely(task_rq(task) != rq || if (unlikely(task_rq(task) != rq ||
!cpumask_test_cpu(lowest_rq->cpu, task->cpus_ptr) || !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) ||
task_running(rq, task) || task_running(rq, task) ||
!rt_task(task) || !rt_task(task) ||
!task_on_rq_queued(task))) { !task_on_rq_queued(task))) {
...@@ -1861,7 +1861,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq) ...@@ -1861,7 +1861,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
* running task can migrate over to a CPU that is running a task * running task can migrate over to a CPU that is running a task
* of lesser priority. * of lesser priority.
*/ */
static int push_rt_task(struct rq *rq) static int push_rt_task(struct rq *rq, bool pull)
{ {
struct task_struct *next_task; struct task_struct *next_task;
struct rq *lowest_rq; struct rq *lowest_rq;
...@@ -1875,6 +1875,34 @@ static int push_rt_task(struct rq *rq) ...@@ -1875,6 +1875,34 @@ static int push_rt_task(struct rq *rq)
return 0; return 0;
retry: retry:
if (is_migration_disabled(next_task)) {
struct task_struct *push_task = NULL;
int cpu;
if (!pull || rq->push_busy)
return 0;
cpu = find_lowest_rq(rq->curr);
if (cpu == -1 || cpu == rq->cpu)
return 0;
/*
* Given we found a CPU with lower priority than @next_task,
* therefore it should be running. However we cannot migrate it
* to this other CPU, instead attempt to push the current
* running task on this CPU away.
*/
push_task = get_push_task(rq);
if (push_task) {
raw_spin_unlock(&rq->lock);
stop_one_cpu_nowait(rq->cpu, push_cpu_stop,
push_task, &rq->push_work);
raw_spin_lock(&rq->lock);
}
return 0;
}
if (WARN_ON(next_task == rq->curr)) if (WARN_ON(next_task == rq->curr))
return 0; return 0;
...@@ -1929,12 +1957,10 @@ static int push_rt_task(struct rq *rq) ...@@ -1929,12 +1957,10 @@ static int push_rt_task(struct rq *rq)
deactivate_task(rq, next_task, 0); deactivate_task(rq, next_task, 0);
set_task_cpu(next_task, lowest_rq->cpu); set_task_cpu(next_task, lowest_rq->cpu);
activate_task(lowest_rq, next_task, 0); activate_task(lowest_rq, next_task, 0);
ret = 1;
resched_curr(lowest_rq); resched_curr(lowest_rq);
ret = 1;
double_unlock_balance(rq, lowest_rq); double_unlock_balance(rq, lowest_rq);
out: out:
put_task_struct(next_task); put_task_struct(next_task);
...@@ -1944,7 +1970,7 @@ static int push_rt_task(struct rq *rq) ...@@ -1944,7 +1970,7 @@ static int push_rt_task(struct rq *rq)
static void push_rt_tasks(struct rq *rq) static void push_rt_tasks(struct rq *rq)
{ {
/* push_rt_task will return true if it moved an RT */ /* push_rt_task will return true if it moved an RT */
while (push_rt_task(rq)) while (push_rt_task(rq, false))
; ;
} }
...@@ -2097,7 +2123,8 @@ void rto_push_irq_work_func(struct irq_work *work) ...@@ -2097,7 +2123,8 @@ void rto_push_irq_work_func(struct irq_work *work)
*/ */
if (has_pushable_tasks(rq)) { if (has_pushable_tasks(rq)) {
raw_spin_lock(&rq->lock); raw_spin_lock(&rq->lock);
push_rt_tasks(rq); while (push_rt_task(rq, true))
;
raw_spin_unlock(&rq->lock); raw_spin_unlock(&rq->lock);
} }
...@@ -2122,7 +2149,7 @@ static void pull_rt_task(struct rq *this_rq) ...@@ -2122,7 +2149,7 @@ static void pull_rt_task(struct rq *this_rq)
{ {
int this_cpu = this_rq->cpu, cpu; int this_cpu = this_rq->cpu, cpu;
bool resched = false; bool resched = false;
struct task_struct *p; struct task_struct *p, *push_task;
struct rq *src_rq; struct rq *src_rq;
int rt_overload_count = rt_overloaded(this_rq); int rt_overload_count = rt_overloaded(this_rq);
...@@ -2169,6 +2196,7 @@ static void pull_rt_task(struct rq *this_rq) ...@@ -2169,6 +2196,7 @@ static void pull_rt_task(struct rq *this_rq)
* double_lock_balance, and another CPU could * double_lock_balance, and another CPU could
* alter this_rq * alter this_rq
*/ */
push_task = NULL;
double_lock_balance(this_rq, src_rq); double_lock_balance(this_rq, src_rq);
/* /*
...@@ -2196,11 +2224,14 @@ static void pull_rt_task(struct rq *this_rq) ...@@ -2196,11 +2224,14 @@ static void pull_rt_task(struct rq *this_rq)
if (p->prio < src_rq->curr->prio) if (p->prio < src_rq->curr->prio)
goto skip; goto skip;
resched = true; if (is_migration_disabled(p)) {
push_task = get_push_task(src_rq);
deactivate_task(src_rq, p, 0); } else {
set_task_cpu(p, this_cpu); deactivate_task(src_rq, p, 0);
activate_task(this_rq, p, 0); set_task_cpu(p, this_cpu);
activate_task(this_rq, p, 0);
resched = true;
}
/* /*
* We continue with the search, just in * We continue with the search, just in
* case there's an even higher prio task * case there's an even higher prio task
...@@ -2210,6 +2241,13 @@ static void pull_rt_task(struct rq *this_rq) ...@@ -2210,6 +2241,13 @@ static void pull_rt_task(struct rq *this_rq)
} }
skip: skip:
double_unlock_balance(this_rq, src_rq); double_unlock_balance(this_rq, src_rq);
if (push_task) {
raw_spin_unlock(&this_rq->lock);
stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop,
push_task, &src_rq->push_work);
raw_spin_lock(&this_rq->lock);
}
} }
if (resched) if (resched)
...@@ -2451,6 +2489,7 @@ DEFINE_SCHED_CLASS(rt) = { ...@@ -2451,6 +2489,7 @@ DEFINE_SCHED_CLASS(rt) = {
.rq_offline = rq_offline_rt, .rq_offline = rq_offline_rt,
.task_woken = task_woken_rt, .task_woken = task_woken_rt,
.switched_from = switched_from_rt, .switched_from = switched_from_rt,
.find_lock_rq = find_lock_lowest_rq,
#endif #endif
.task_tick = task_tick_rt, .task_tick = task_tick_rt,
......
...@@ -975,6 +975,7 @@ struct rq { ...@@ -975,6 +975,7 @@ struct rq {
unsigned long cpu_capacity_orig; unsigned long cpu_capacity_orig;
struct callback_head *balance_callback; struct callback_head *balance_callback;
unsigned char balance_flags;
unsigned char nohz_idle_balance; unsigned char nohz_idle_balance;
unsigned char idle_balance; unsigned char idle_balance;
...@@ -1005,6 +1006,10 @@ struct rq { ...@@ -1005,6 +1006,10 @@ struct rq {
/* This is used to determine avg_idle's max value */ /* This is used to determine avg_idle's max value */
u64 max_idle_balance_cost; u64 max_idle_balance_cost;
#ifdef CONFIG_HOTPLUG_CPU
struct rcuwait hotplug_wait;
#endif
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
#ifdef CONFIG_IRQ_TIME_ACCOUNTING #ifdef CONFIG_IRQ_TIME_ACCOUNTING
...@@ -1050,6 +1055,12 @@ struct rq { ...@@ -1050,6 +1055,12 @@ struct rq {
/* Must be inspected within a rcu lock section */ /* Must be inspected within a rcu lock section */
struct cpuidle_state *idle_state; struct cpuidle_state *idle_state;
#endif #endif
#if defined(CONFIG_PREEMPT_RT) && defined(CONFIG_SMP)
unsigned int nr_pinned;
#endif
unsigned int push_busy;
struct cpu_stop_work push_work;
}; };
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
...@@ -1077,6 +1088,16 @@ static inline int cpu_of(struct rq *rq) ...@@ -1077,6 +1088,16 @@ static inline int cpu_of(struct rq *rq)
#endif #endif
} }
#define MDF_PUSH 0x01
static inline bool is_migration_disabled(struct task_struct *p)
{
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
return p->migration_disabled;
#else
return false;
#endif
}
#ifdef CONFIG_SCHED_SMT #ifdef CONFIG_SCHED_SMT
extern void __update_idle_core(struct rq *rq); extern void __update_idle_core(struct rq *rq);
...@@ -1223,6 +1244,9 @@ static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf) ...@@ -1223,6 +1244,9 @@ static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
rf->clock_update_flags = 0; rf->clock_update_flags = 0;
#endif #endif
#ifdef CONFIG_SMP
SCHED_WARN_ON(rq->balance_callback);
#endif
} }
static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf) static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
...@@ -1384,6 +1408,9 @@ init_numa_balancing(unsigned long clone_flags, struct task_struct *p) ...@@ -1384,6 +1408,9 @@ init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
#define BALANCE_WORK 0x01
#define BALANCE_PUSH 0x02
static inline void static inline void
queue_balance_callback(struct rq *rq, queue_balance_callback(struct rq *rq,
struct callback_head *head, struct callback_head *head,
...@@ -1391,12 +1418,13 @@ queue_balance_callback(struct rq *rq, ...@@ -1391,12 +1418,13 @@ queue_balance_callback(struct rq *rq,
{ {
lockdep_assert_held(&rq->lock); lockdep_assert_held(&rq->lock);
if (unlikely(head->next)) if (unlikely(head->next || (rq->balance_flags & BALANCE_PUSH)))
return; return;
head->func = (void (*)(struct callback_head *))func; head->func = (void (*)(struct callback_head *))func;
head->next = rq->balance_callback; head->next = rq->balance_callback;
rq->balance_callback = head; rq->balance_callback = head;
rq->balance_flags |= BALANCE_WORK;
} }
#define rcu_dereference_check_sched_domain(p) \ #define rcu_dereference_check_sched_domain(p) \
...@@ -1804,10 +1832,13 @@ struct sched_class { ...@@ -1804,10 +1832,13 @@ struct sched_class {
void (*task_woken)(struct rq *this_rq, struct task_struct *task); void (*task_woken)(struct rq *this_rq, struct task_struct *task);
void (*set_cpus_allowed)(struct task_struct *p, void (*set_cpus_allowed)(struct task_struct *p,
const struct cpumask *newmask); const struct cpumask *newmask,
u32 flags);
void (*rq_online)(struct rq *rq); void (*rq_online)(struct rq *rq);
void (*rq_offline)(struct rq *rq); void (*rq_offline)(struct rq *rq);
struct rq *(*find_lock_rq)(struct task_struct *p, struct rq *rq);
#endif #endif
void (*task_tick)(struct rq *rq, struct task_struct *p, int queued); void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
...@@ -1905,13 +1936,35 @@ static inline bool sched_fair_runnable(struct rq *rq) ...@@ -1905,13 +1936,35 @@ static inline bool sched_fair_runnable(struct rq *rq)
extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); extern struct task_struct *pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf);
extern struct task_struct *pick_next_task_idle(struct rq *rq); extern struct task_struct *pick_next_task_idle(struct rq *rq);
#define SCA_CHECK 0x01
#define SCA_MIGRATE_DISABLE 0x02
#define SCA_MIGRATE_ENABLE 0x04
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
extern void update_group_capacity(struct sched_domain *sd, int cpu); extern void update_group_capacity(struct sched_domain *sd, int cpu);
extern void trigger_load_balance(struct rq *rq); extern void trigger_load_balance(struct rq *rq);
extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask); extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags);
static inline struct task_struct *get_push_task(struct rq *rq)
{
struct task_struct *p = rq->curr;
lockdep_assert_held(&rq->lock);
if (rq->push_busy)
return NULL;
if (p->nr_cpus_allowed == 1)
return NULL;
rq->push_busy = true;
return get_task_struct(p);
}
extern int push_cpu_stop(void *arg);
#endif #endif
......
...@@ -42,11 +42,27 @@ struct cpu_stopper { ...@@ -42,11 +42,27 @@ struct cpu_stopper {
struct list_head works; /* list of pending works */ struct list_head works; /* list of pending works */
struct cpu_stop_work stop_work; /* for stop_cpus */ struct cpu_stop_work stop_work; /* for stop_cpus */
unsigned long caller;
cpu_stop_fn_t fn;
}; };
static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
static bool stop_machine_initialized = false; static bool stop_machine_initialized = false;
void print_stop_info(const char *log_lvl, struct task_struct *task)
{
/*
* If @task is a stopper task, it cannot migrate and task_cpu() is
* stable.
*/
struct cpu_stopper *stopper = per_cpu_ptr(&cpu_stopper, task_cpu(task));
if (task != stopper->thread)
return;
printk("%sStopper: %pS <- %pS\n", log_lvl, stopper->fn, (void *)stopper->caller);
}
/* static data for stop_cpus */ /* static data for stop_cpus */
static DEFINE_MUTEX(stop_cpus_mutex); static DEFINE_MUTEX(stop_cpus_mutex);
static bool stop_cpus_in_progress; static bool stop_cpus_in_progress;
...@@ -123,7 +139,7 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) ...@@ -123,7 +139,7 @@ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work)
int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
{ {
struct cpu_stop_done done; struct cpu_stop_done done;
struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done }; struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done, .caller = _RET_IP_ };
cpu_stop_init_done(&done, 1); cpu_stop_init_done(&done, 1);
if (!cpu_stop_queue_work(cpu, &work)) if (!cpu_stop_queue_work(cpu, &work))
...@@ -331,7 +347,8 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void * ...@@ -331,7 +347,8 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
work1 = work2 = (struct cpu_stop_work){ work1 = work2 = (struct cpu_stop_work){
.fn = multi_cpu_stop, .fn = multi_cpu_stop,
.arg = &msdata, .arg = &msdata,
.done = &done .done = &done,
.caller = _RET_IP_,
}; };
cpu_stop_init_done(&done, 2); cpu_stop_init_done(&done, 2);
...@@ -367,7 +384,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void * ...@@ -367,7 +384,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
struct cpu_stop_work *work_buf) struct cpu_stop_work *work_buf)
{ {
*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, }; *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, .caller = _RET_IP_, };
return cpu_stop_queue_work(cpu, work_buf); return cpu_stop_queue_work(cpu, work_buf);
} }
...@@ -487,6 +504,8 @@ static void cpu_stopper_thread(unsigned int cpu) ...@@ -487,6 +504,8 @@ static void cpu_stopper_thread(unsigned int cpu)
int ret; int ret;
/* cpu stop callbacks must not sleep, make in_atomic() == T */ /* cpu stop callbacks must not sleep, make in_atomic() == T */
stopper->caller = work->caller;
stopper->fn = fn;
preempt_count_inc(); preempt_count_inc();
ret = fn(arg); ret = fn(arg);
if (done) { if (done) {
...@@ -495,6 +514,8 @@ static void cpu_stopper_thread(unsigned int cpu) ...@@ -495,6 +514,8 @@ static void cpu_stopper_thread(unsigned int cpu)
cpu_stop_signal_done(done); cpu_stop_signal_done(done);
} }
preempt_count_dec(); preempt_count_dec();
stopper->fn = NULL;
stopper->caller = 0;
WARN_ONCE(preempt_count(), WARN_ONCE(preempt_count(),
"cpu_stop: %ps(%p) leaked preempt count\n", fn, arg); "cpu_stop: %ps(%p) leaked preempt count\n", fn, arg);
goto repeat; goto repeat;
......
...@@ -4908,6 +4908,10 @@ static void unbind_workers(int cpu) ...@@ -4908,6 +4908,10 @@ static void unbind_workers(int cpu)
pool->flags |= POOL_DISASSOCIATED; pool->flags |= POOL_DISASSOCIATED;
raw_spin_unlock_irq(&pool->lock); raw_spin_unlock_irq(&pool->lock);
for_each_pool_worker(worker, pool)
WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_active_mask) < 0);
mutex_unlock(&wq_pool_attach_mutex); mutex_unlock(&wq_pool_attach_mutex);
/* /*
......
...@@ -267,3 +267,21 @@ int cpumask_any_and_distribute(const struct cpumask *src1p, ...@@ -267,3 +267,21 @@ int cpumask_any_and_distribute(const struct cpumask *src1p,
return next; return next;
} }
EXPORT_SYMBOL(cpumask_any_and_distribute); EXPORT_SYMBOL(cpumask_any_and_distribute);
int cpumask_any_distribute(const struct cpumask *srcp)
{
int next, prev;
/* NOTE: our first selection will skip 0. */
prev = __this_cpu_read(distribute_cpu_mask_prev);
next = cpumask_next(prev, srcp);
if (next >= nr_cpu_ids)
next = cpumask_first(srcp);
if (next < nr_cpu_ids)
__this_cpu_write(distribute_cpu_mask_prev, next);
return next;
}
EXPORT_SYMBOL(cpumask_any_distribute);
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/kexec.h> #include <linux/kexec.h>
#include <linux/utsname.h> #include <linux/utsname.h>
#include <linux/stop_machine.h>
static char dump_stack_arch_desc_str[128]; static char dump_stack_arch_desc_str[128];
...@@ -57,6 +58,7 @@ void dump_stack_print_info(const char *log_lvl) ...@@ -57,6 +58,7 @@ void dump_stack_print_info(const char *log_lvl)
log_lvl, dump_stack_arch_desc_str); log_lvl, dump_stack_arch_desc_str);
print_worker_info(log_lvl, current); print_worker_info(log_lvl, current);
print_stop_info(log_lvl, current);
} }
/** /**
......
...@@ -26,6 +26,11 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2) ...@@ -26,6 +26,11 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2)
if (current->nr_cpus_allowed == 1) if (current->nr_cpus_allowed == 1)
goto out; goto out;
#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT_RT)
if (current->migration_disabled)
goto out;
#endif
/* /*
* It is valid to assume CPU-locality during early bootup: * It is valid to assume CPU-locality during early bootup:
*/ */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment