Commit fc560a26 authored by Tejun Heo's avatar Tejun Heo

cpuset: replace cpuset->stack_list with cpuset_for_each_descendant_pre()

Implement cpuset_for_each_descendant_pre() and replace the
cpuset-specific tree walking using cpuset->stack_list with it.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Reviewed-by: default avatarMichal Hocko <mhocko@suse.cz>
Acked-by: default avatarLi Zefan <lizefan@huawei.com>
parent 5d21cc2d
...@@ -103,9 +103,6 @@ struct cpuset { ...@@ -103,9 +103,6 @@ struct cpuset {
/* for custom sched domain */ /* for custom sched domain */
int relax_domain_level; int relax_domain_level;
/* used for walking a cpuset hierarchy */
struct list_head stack_list;
struct work_struct hotplug_work; struct work_struct hotplug_work;
}; };
...@@ -207,6 +204,20 @@ static struct cpuset top_cpuset = { ...@@ -207,6 +204,20 @@ static struct cpuset top_cpuset = {
cgroup_for_each_child((pos_cgrp), (parent_cs)->css.cgroup) \ cgroup_for_each_child((pos_cgrp), (parent_cs)->css.cgroup) \
if (is_cpuset_online(((child_cs) = cgroup_cs((pos_cgrp))))) if (is_cpuset_online(((child_cs) = cgroup_cs((pos_cgrp)))))
/**
* cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants
* @des_cs: loop cursor pointing to the current descendant
* @pos_cgrp: used for iteration
* @root_cs: target cpuset to walk ancestor of
*
* Walk @des_cs through the online descendants of @root_cs. Must be used
* with RCU read locked. The caller may modify @pos_cgrp by calling
* cgroup_rightmost_descendant() to skip subtree.
*/
#define cpuset_for_each_descendant_pre(des_cs, pos_cgrp, root_cs) \
cgroup_for_each_descendant_pre((pos_cgrp), (root_cs)->css.cgroup) \
if (is_cpuset_online(((des_cs) = cgroup_cs((pos_cgrp)))))
/* /*
* There are two global mutexes guarding cpuset structures - cpuset_mutex * There are two global mutexes guarding cpuset structures - cpuset_mutex
* and callback_mutex. The latter may nest inside the former. We also * and callback_mutex. The latter may nest inside the former. We also
...@@ -507,31 +518,24 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c) ...@@ -507,31 +518,24 @@ update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
return; return;
} }
static void static void update_domain_attr_tree(struct sched_domain_attr *dattr,
update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) struct cpuset *root_cs)
{ {
LIST_HEAD(q);
list_add(&c->stack_list, &q);
while (!list_empty(&q)) {
struct cpuset *cp; struct cpuset *cp;
struct cgroup *cont; struct cgroup *pos_cgrp;
struct cpuset *child;
cp = list_first_entry(&q, struct cpuset, stack_list); rcu_read_lock();
list_del(q.next); cpuset_for_each_descendant_pre(cp, pos_cgrp, root_cs) {
/* skip the whole subtree if @cp doesn't have any CPU */
if (cpumask_empty(cp->cpus_allowed)) if (cpumask_empty(cp->cpus_allowed)) {
pos_cgrp = cgroup_rightmost_descendant(pos_cgrp);
continue; continue;
}
if (is_sched_load_balance(cp)) if (is_sched_load_balance(cp))
update_domain_attr(dattr, cp); update_domain_attr(dattr, cp);
rcu_read_lock();
cpuset_for_each_child(child, cont, cp)
list_add_tail(&child->stack_list, &q);
rcu_read_unlock();
} }
rcu_read_unlock();
} }
/* /*
...@@ -591,7 +595,6 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) ...@@ -591,7 +595,6 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c)
static int generate_sched_domains(cpumask_var_t **domains, static int generate_sched_domains(cpumask_var_t **domains,
struct sched_domain_attr **attributes) struct sched_domain_attr **attributes)
{ {
LIST_HEAD(q); /* queue of cpusets to be scanned */
struct cpuset *cp; /* scans q */ struct cpuset *cp; /* scans q */
struct cpuset **csa; /* array of all cpuset ptrs */ struct cpuset **csa; /* array of all cpuset ptrs */
int csn; /* how many cpuset ptrs in csa so far */ int csn; /* how many cpuset ptrs in csa so far */
...@@ -600,6 +603,7 @@ static int generate_sched_domains(cpumask_var_t **domains, ...@@ -600,6 +603,7 @@ static int generate_sched_domains(cpumask_var_t **domains,
struct sched_domain_attr *dattr; /* attributes for custom domains */ struct sched_domain_attr *dattr; /* attributes for custom domains */
int ndoms = 0; /* number of sched domains in result */ int ndoms = 0; /* number of sched domains in result */
int nslot; /* next empty doms[] struct cpumask slot */ int nslot; /* next empty doms[] struct cpumask slot */
struct cgroup *pos_cgrp;
doms = NULL; doms = NULL;
dattr = NULL; dattr = NULL;
...@@ -627,33 +631,27 @@ static int generate_sched_domains(cpumask_var_t **domains, ...@@ -627,33 +631,27 @@ static int generate_sched_domains(cpumask_var_t **domains,
goto done; goto done;
csn = 0; csn = 0;
list_add(&top_cpuset.stack_list, &q); rcu_read_lock();
while (!list_empty(&q)) { cpuset_for_each_descendant_pre(cp, pos_cgrp, &top_cpuset) {
struct cgroup *cont; /*
struct cpuset *child; /* scans child cpusets of cp */ * Continue traversing beyond @cp iff @cp has some CPUs and
* isn't load balancing. The former is obvious. The
cp = list_first_entry(&q, struct cpuset, stack_list); * latter: All child cpusets contain a subset of the
list_del(q.next); * parent's cpus, so just skip them, and then we call
* update_domain_attr_tree() to calc relax_domain_level of
if (cpumask_empty(cp->cpus_allowed)) * the corresponding sched domain.
*/
if (!cpumask_empty(cp->cpus_allowed) &&
!is_sched_load_balance(cp))
continue; continue;
/* if (is_sched_load_balance(cp))
* All child cpusets contain a subset of the parent's cpus, so
* just skip them, and then we call update_domain_attr_tree()
* to calc relax_domain_level of the corresponding sched
* domain.
*/
if (is_sched_load_balance(cp)) {
csa[csn++] = cp; csa[csn++] = cp;
continue;
}
rcu_read_lock(); /* skip @cp's subtree */
cpuset_for_each_child(child, cont, cp) pos_cgrp = cgroup_rightmost_descendant(pos_cgrp);
list_add_tail(&child->stack_list, &q);
rcu_read_unlock();
} }
rcu_read_unlock();
for (i = 0; i < csn; i++) for (i = 0; i < csn; i++)
csa[i]->pn = i; csa[i]->pn = i;
...@@ -2068,31 +2066,6 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) ...@@ -2068,31 +2066,6 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
move_member_tasks_to_cpuset(cs, parent); move_member_tasks_to_cpuset(cs, parent);
} }
/*
* Helper function to traverse cpusets.
* It can be used to walk the cpuset tree from top to bottom, completing
* one layer before dropping down to the next (thus always processing a
* node before any of its children).
*/
static struct cpuset *cpuset_next(struct list_head *queue)
{
struct cpuset *cp;
struct cpuset *child; /* scans child cpusets of cp */
struct cgroup *cont;
if (list_empty(queue))
return NULL;
cp = list_first_entry(queue, struct cpuset, stack_list);
list_del(queue->next);
rcu_read_lock();
cpuset_for_each_child(child, cont, cp)
list_add_tail(&child->stack_list, queue);
rcu_read_unlock();
return cp;
}
/** /**
* cpuset_propagate_hotplug_workfn - propagate CPU/memory hotplug to a cpuset * cpuset_propagate_hotplug_workfn - propagate CPU/memory hotplug to a cpuset
* @cs: cpuset in interest * @cs: cpuset in interest
...@@ -2229,12 +2202,12 @@ static void cpuset_hotplug_workfn(struct work_struct *work) ...@@ -2229,12 +2202,12 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
/* if cpus or mems went down, we need to propagate to descendants */ /* if cpus or mems went down, we need to propagate to descendants */
if (cpus_offlined || mems_offlined) { if (cpus_offlined || mems_offlined) {
struct cpuset *cs; struct cpuset *cs;
LIST_HEAD(queue); struct cgroup *pos_cgrp;
list_add_tail(&top_cpuset.stack_list, &queue); rcu_read_lock();
while ((cs = cpuset_next(&queue))) cpuset_for_each_descendant_pre(cs, pos_cgrp, &top_cpuset)
if (cs != &top_cpuset)
schedule_cpuset_propagate_hotplug(cs); schedule_cpuset_propagate_hotplug(cs);
rcu_read_unlock();
} }
mutex_unlock(&cpuset_mutex); mutex_unlock(&cpuset_mutex);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment