Commit 699140ba authored by Tejun Heo's avatar Tejun Heo

cpuset: drop async_rebuild_sched_domains()

In general, we want to make cgroup_mutex one of the outermost locks
and be able to use get_online_cpus() and friends from cgroup methods.
With cpuset hotplug made async, get_online_cpus() can now be nested
inside cgroup_mutex.

Currently, cpuset avoids nesting get_online_cpus() inside cgroup_mutex
by bouncing sched_domain rebuilding to a work item.  As such nesting
is allowed now, remove the workqueue bouncing code and always rebuild
sched_domains synchronously.  This also nests sched_domains_mutex
inside cgroup_mutex, which is intended and should be okay.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Acked-by: default avatarLi Zefan <lizefan@huawei.com>
parent 3a5a6d0c
...@@ -60,14 +60,6 @@ ...@@ -60,14 +60,6 @@
#include <linux/workqueue.h> #include <linux/workqueue.h>
#include <linux/cgroup.h> #include <linux/cgroup.h>
/*
* Workqueue for cpuset related tasks.
*
* Using kevent workqueue may cause deadlock when memory_migrate
* is set. So we create a separate workqueue thread for cpuset.
*/
static struct workqueue_struct *cpuset_wq;
/* /*
* Tracks how many cpusets are currently defined in system. * Tracks how many cpusets are currently defined in system.
* When there is only one cpuset (the root cpuset) we can * When there is only one cpuset (the root cpuset) we can
...@@ -753,25 +745,25 @@ static int generate_sched_domains(cpumask_var_t **domains, ...@@ -753,25 +745,25 @@ static int generate_sched_domains(cpumask_var_t **domains,
/* /*
* Rebuild scheduler domains. * Rebuild scheduler domains.
* *
* Call with neither cgroup_mutex held nor within get_online_cpus(). * If the flag 'sched_load_balance' of any cpuset with non-empty
* Takes both cgroup_mutex and get_online_cpus(). * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
* which has that flag enabled, or if any cpuset with a non-empty
* 'cpus' is removed, then call this routine to rebuild the
* scheduler's dynamic sched domains.
* *
* Cannot be directly called from cpuset code handling changes * Call with cgroup_mutex held. Takes get_online_cpus().
* to the cpuset pseudo-filesystem, because it cannot be called
* from code that already holds cgroup_mutex.
*/ */
static void do_rebuild_sched_domains(struct work_struct *unused) static void rebuild_sched_domains_locked(void)
{ {
struct sched_domain_attr *attr; struct sched_domain_attr *attr;
cpumask_var_t *doms; cpumask_var_t *doms;
int ndoms; int ndoms;
WARN_ON_ONCE(!cgroup_lock_is_held());
get_online_cpus(); get_online_cpus();
/* Generate domain masks and attrs */ /* Generate domain masks and attrs */
cgroup_lock();
ndoms = generate_sched_domains(&doms, &attr); ndoms = generate_sched_domains(&doms, &attr);
cgroup_unlock();
/* Have scheduler rebuild the domains */ /* Have scheduler rebuild the domains */
partition_sched_domains(ndoms, doms, attr); partition_sched_domains(ndoms, doms, attr);
...@@ -779,7 +771,7 @@ static void do_rebuild_sched_domains(struct work_struct *unused) ...@@ -779,7 +771,7 @@ static void do_rebuild_sched_domains(struct work_struct *unused)
put_online_cpus(); put_online_cpus();
} }
#else /* !CONFIG_SMP */ #else /* !CONFIG_SMP */
static void do_rebuild_sched_domains(struct work_struct *unused) static void rebuild_sched_domains_locked(void)
{ {
} }
...@@ -791,44 +783,11 @@ static int generate_sched_domains(cpumask_var_t **domains, ...@@ -791,44 +783,11 @@ static int generate_sched_domains(cpumask_var_t **domains,
} }
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains);
/*
* Rebuild scheduler domains, asynchronously via workqueue.
*
* If the flag 'sched_load_balance' of any cpuset with non-empty
* 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
* which has that flag enabled, or if any cpuset with a non-empty
* 'cpus' is removed, then call this routine to rebuild the
* scheduler's dynamic sched domains.
*
* The rebuild_sched_domains() and partition_sched_domains()
* routines must nest cgroup_lock() inside get_online_cpus(),
* but such cpuset changes as these must nest that locking the
* other way, holding cgroup_lock() for much of the code.
*
* So in order to avoid an ABBA deadlock, the cpuset code handling
* these user changes delegates the actual sched domain rebuilding
* to a separate workqueue thread, which ends up processing the
* above do_rebuild_sched_domains() function.
*/
static void async_rebuild_sched_domains(void)
{
queue_work(cpuset_wq, &rebuild_sched_domains_work);
}
/*
* Accomplishes the same scheduler domain rebuild as the above
* async_rebuild_sched_domains(), however it directly calls the
* rebuild routine synchronously rather than calling it via an
* asynchronous work thread.
*
* This can only be called from code that is not holding
* cgroup_mutex (not nested in a cgroup_lock() call.)
*/
void rebuild_sched_domains(void) void rebuild_sched_domains(void)
{ {
do_rebuild_sched_domains(NULL); cgroup_lock();
rebuild_sched_domains_locked();
cgroup_unlock();
} }
/** /**
...@@ -948,7 +907,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, ...@@ -948,7 +907,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
heap_free(&heap); heap_free(&heap);
if (is_load_balanced) if (is_load_balanced)
async_rebuild_sched_domains(); rebuild_sched_domains_locked();
return 0; return 0;
} }
...@@ -1196,7 +1155,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) ...@@ -1196,7 +1155,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
cs->relax_domain_level = val; cs->relax_domain_level = val;
if (!cpumask_empty(cs->cpus_allowed) && if (!cpumask_empty(cs->cpus_allowed) &&
is_sched_load_balance(cs)) is_sched_load_balance(cs))
async_rebuild_sched_domains(); rebuild_sched_domains_locked();
} }
return 0; return 0;
...@@ -1288,7 +1247,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, ...@@ -1288,7 +1247,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
mutex_unlock(&callback_mutex); mutex_unlock(&callback_mutex);
if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
async_rebuild_sched_domains(); rebuild_sched_domains_locked();
if (spread_flag_changed) if (spread_flag_changed)
update_tasks_flags(cs, &heap); update_tasks_flags(cs, &heap);
...@@ -1925,7 +1884,7 @@ static void cpuset_css_offline(struct cgroup *cgrp) ...@@ -1925,7 +1884,7 @@ static void cpuset_css_offline(struct cgroup *cgrp)
/* /*
* If the cpuset being removed has its flag 'sched_load_balance' * If the cpuset being removed has its flag 'sched_load_balance'
* enabled, then simulate turning sched_load_balance off, which * enabled, then simulate turning sched_load_balance off, which
* will call async_rebuild_sched_domains(). * will call rebuild_sched_domains_locked().
*/ */
static void cpuset_css_free(struct cgroup *cont) static void cpuset_css_free(struct cgroup *cont)
...@@ -2237,9 +2196,6 @@ void __init cpuset_init_smp(void) ...@@ -2237,9 +2196,6 @@ void __init cpuset_init_smp(void)
top_cpuset.mems_allowed = node_states[N_MEMORY]; top_cpuset.mems_allowed = node_states[N_MEMORY];
hotplug_memory_notifier(cpuset_track_online_nodes, 10); hotplug_memory_notifier(cpuset_track_online_nodes, 10);
cpuset_wq = create_singlethread_workqueue("cpuset");
BUG_ON(!cpuset_wq);
} }
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment