Commit df7c8e84 authored by Rusty Russell's avatar Rusty Russell Committed by Ingo Molnar

cpumask: remove cpumask allocation from idle_balance

Impact: fix circular locking

Steven reports a circular locking from alloc_cpumask_var doing
a wakeup. We get rid of this using the tried-and-true technique
of using a per-cpu cpumask_var_t rather than doing an alloc
every time.

Simpler and more robust than a rare, implicit allocation within
an atomic codepath.
Reported-by: default avatarSteven Rostedt <rostedt@goodmis.org>
Signed-off-by: default avatarRusty Russell <rusty@rustcorp.com.au>
LKML-Reference: <alpine.DEB.2.00.0903181729360.31583@gandalf.stny.rr.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent c38da569
...@@ -3448,19 +3448,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, ...@@ -3448,19 +3448,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
*/ */
#define MAX_PINNED_INTERVAL 512 #define MAX_PINNED_INTERVAL 512
/* Working cpumask for load_balance and load_balance_newidle. */
static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
/* /*
* Check this_cpu to ensure it is balanced within domain. Attempt to move * Check this_cpu to ensure it is balanced within domain. Attempt to move
* tasks if there is an imbalance. * tasks if there is an imbalance.
*/ */
static int load_balance(int this_cpu, struct rq *this_rq, static int load_balance(int this_cpu, struct rq *this_rq,
struct sched_domain *sd, enum cpu_idle_type idle, struct sched_domain *sd, enum cpu_idle_type idle,
int *balance, struct cpumask *cpus) int *balance)
{ {
int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
struct sched_group *group; struct sched_group *group;
unsigned long imbalance; unsigned long imbalance;
struct rq *busiest; struct rq *busiest;
unsigned long flags; unsigned long flags;
struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
cpumask_setall(cpus); cpumask_setall(cpus);
...@@ -3615,8 +3619,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, ...@@ -3615,8 +3619,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
* this_rq is locked. * this_rq is locked.
*/ */
static int static int
load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
struct cpumask *cpus)
{ {
struct sched_group *group; struct sched_group *group;
struct rq *busiest = NULL; struct rq *busiest = NULL;
...@@ -3624,6 +3627,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd, ...@@ -3624,6 +3627,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
int ld_moved = 0; int ld_moved = 0;
int sd_idle = 0; int sd_idle = 0;
int all_pinned = 0; int all_pinned = 0;
struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
cpumask_setall(cpus); cpumask_setall(cpus);
...@@ -3764,10 +3768,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq) ...@@ -3764,10 +3768,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
struct sched_domain *sd; struct sched_domain *sd;
int pulled_task = 0; int pulled_task = 0;
unsigned long next_balance = jiffies + HZ; unsigned long next_balance = jiffies + HZ;
cpumask_var_t tmpmask;
if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
return;
for_each_domain(this_cpu, sd) { for_each_domain(this_cpu, sd) {
unsigned long interval; unsigned long interval;
...@@ -3778,7 +3778,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq) ...@@ -3778,7 +3778,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
if (sd->flags & SD_BALANCE_NEWIDLE) if (sd->flags & SD_BALANCE_NEWIDLE)
/* If we've pulled tasks over stop searching: */ /* If we've pulled tasks over stop searching: */
pulled_task = load_balance_newidle(this_cpu, this_rq, pulled_task = load_balance_newidle(this_cpu, this_rq,
sd, tmpmask); sd);
interval = msecs_to_jiffies(sd->balance_interval); interval = msecs_to_jiffies(sd->balance_interval);
if (time_after(next_balance, sd->last_balance + interval)) if (time_after(next_balance, sd->last_balance + interval))
...@@ -3793,7 +3793,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq) ...@@ -3793,7 +3793,6 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
*/ */
this_rq->next_balance = next_balance; this_rq->next_balance = next_balance;
} }
free_cpumask_var(tmpmask);
} }
/* /*
...@@ -3943,11 +3942,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) ...@@ -3943,11 +3942,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
unsigned long next_balance = jiffies + 60*HZ; unsigned long next_balance = jiffies + 60*HZ;
int update_next_balance = 0; int update_next_balance = 0;
int need_serialize; int need_serialize;
cpumask_var_t tmp;
/* Fails alloc? Rebalancing probably not a priority right now. */
if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
return;
for_each_domain(cpu, sd) { for_each_domain(cpu, sd) {
if (!(sd->flags & SD_LOAD_BALANCE)) if (!(sd->flags & SD_LOAD_BALANCE))
...@@ -3972,7 +3966,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) ...@@ -3972,7 +3966,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
} }
if (time_after_eq(jiffies, sd->last_balance + interval)) { if (time_after_eq(jiffies, sd->last_balance + interval)) {
if (load_balance(cpu, rq, sd, idle, &balance, tmp)) { if (load_balance(cpu, rq, sd, idle, &balance)) {
/* /*
* We've pulled tasks over so either we're no * We've pulled tasks over so either we're no
* longer idle, or one of our SMT siblings is * longer idle, or one of our SMT siblings is
...@@ -4006,8 +4000,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) ...@@ -4006,8 +4000,6 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
*/ */
if (likely(update_next_balance)) if (likely(update_next_balance))
rq->next_balance = next_balance; rq->next_balance = next_balance;
free_cpumask_var(tmp);
} }
/* /*
...@@ -8303,6 +8295,9 @@ void __init sched_init(void) ...@@ -8303,6 +8295,9 @@ void __init sched_init(void)
#endif #endif
#ifdef CONFIG_USER_SCHED #ifdef CONFIG_USER_SCHED
alloc_size *= 2; alloc_size *= 2;
#endif
#ifdef CONFIG_CPUMASK_OFFSTACK
alloc_size *= num_possible_cpus() * cpumask_size();
#endif #endif
/* /*
* As sched_init() is called before page_alloc is setup, * As sched_init() is called before page_alloc is setup,
...@@ -8341,6 +8336,12 @@ void __init sched_init(void) ...@@ -8341,6 +8336,12 @@ void __init sched_init(void)
ptr += nr_cpu_ids * sizeof(void **); ptr += nr_cpu_ids * sizeof(void **);
#endif /* CONFIG_USER_SCHED */ #endif /* CONFIG_USER_SCHED */
#endif /* CONFIG_RT_GROUP_SCHED */ #endif /* CONFIG_RT_GROUP_SCHED */
#ifdef CONFIG_CPUMASK_OFFSTACK
for_each_possible_cpu(i) {
per_cpu(load_balance_tmpmask, i) = (void *)ptr;
ptr += cpumask_size();
}
#endif /* CONFIG_CPUMASK_OFFSTACK */
} }
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment