Commit bd8815a6 authored by Tejun Heo's avatar Tejun Heo

cgroup: make css_for_each_descendant() and friends include the origin css in the iteration

Previously, all css descendant iterators didn't include the origin
(root of subtree) css in the iteration.  The reasons were maintaining
consistency with css_for_each_child() and that at the time of
introduction more use cases needed skipping the origin anyway;
however, given that css_is_descendant() considers self to be a
descendant, omitting the origin css has become more confusing and
looking at the accumulated use cases rather clearly indicates that
including origin would result in simpler code overall.

While this is a change which can easily lead to subtle bugs, cgroup
API including the iterators has recently gone through major
restructuring and no out-of-tree changes will be applicable without
adjustments making this a relatively acceptable opportunity for this
type of change.

The conversions are mostly straight-forward.  If the iteration block
had explicit origin handling before or after, it's moved inside the
iteration.  If not, if (pos == origin) continue; is added.  Some
conversions add extra reference get/put around origin handling by
consolidating origin handling and the rest.  While the extra ref
operations aren't strictly necessary, this shouldn't cause any
noticeable difference.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Acked-by: default avatarLi Zefan <lizefan@huawei.com>
Acked-by: default avatarVivek Goyal <vgoyal@redhat.com>
Acked-by: default avatarAristeu Rozanski <aris@redhat.com>
Acked-by: default avatarMichal Hocko <mhocko@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Balbir Singh <bsingharora@gmail.com>
parent 95109b62
...@@ -615,12 +615,10 @@ u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off) ...@@ -615,12 +615,10 @@ u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
struct blkcg_policy *pol = blkcg_policy[pd->plid]; struct blkcg_policy *pol = blkcg_policy[pd->plid];
struct blkcg_gq *pos_blkg; struct blkcg_gq *pos_blkg;
struct cgroup_subsys_state *pos_css; struct cgroup_subsys_state *pos_css;
u64 sum; u64 sum = 0;
lockdep_assert_held(pd->blkg->q->queue_lock); lockdep_assert_held(pd->blkg->q->queue_lock);
sum = blkg_stat_read((void *)pd + off);
rcu_read_lock(); rcu_read_lock();
blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) { blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
...@@ -650,13 +648,11 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, ...@@ -650,13 +648,11 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
struct blkcg_policy *pol = blkcg_policy[pd->plid]; struct blkcg_policy *pol = blkcg_policy[pd->plid];
struct blkcg_gq *pos_blkg; struct blkcg_gq *pos_blkg;
struct cgroup_subsys_state *pos_css; struct cgroup_subsys_state *pos_css;
struct blkg_rwstat sum; struct blkg_rwstat sum = { };
int i; int i;
lockdep_assert_held(pd->blkg->q->queue_lock); lockdep_assert_held(pd->blkg->q->queue_lock);
sum = blkg_rwstat_read((void *)pd + off);
rcu_read_lock(); rcu_read_lock();
blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) { blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
......
...@@ -291,6 +291,7 @@ struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, ...@@ -291,6 +291,7 @@ struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
* read locked. If called under either blkcg or queue lock, the iteration * read locked. If called under either blkcg or queue lock, the iteration
* is guaranteed to include all and only online blkgs. The caller may * is guaranteed to include all and only online blkgs. The caller may
* update @pos_css by calling css_rightmost_descendant() to skip subtree. * update @pos_css by calling css_rightmost_descendant() to skip subtree.
* @p_blkg is included in the iteration and the first node to be visited.
*/ */
#define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \
css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \
...@@ -304,7 +305,8 @@ struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, ...@@ -304,7 +305,8 @@ struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
* @p_blkg: target blkg to walk descendants of * @p_blkg: target blkg to walk descendants of
* *
* Similar to blkg_for_each_descendant_pre() but performs post-order * Similar to blkg_for_each_descendant_pre() but performs post-order
* traversal instead. Synchronization rules are the same. * traversal instead. Synchronization rules are the same. @p_blkg is
* included in the iteration and the last node to be visited.
*/ */
#define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \
css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \
......
...@@ -1379,7 +1379,6 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft, ...@@ -1379,7 +1379,6 @@ static int tg_set_conf(struct cgroup_subsys_state *css, struct cftype *cft,
* restrictions in the whole hierarchy and allows them to bypass * restrictions in the whole hierarchy and allows them to bypass
* blk-throttle. * blk-throttle.
*/ */
tg_update_has_rules(tg);
blkg_for_each_descendant_pre(blkg, pos_css, ctx.blkg) blkg_for_each_descendant_pre(blkg, pos_css, ctx.blkg)
tg_update_has_rules(blkg_to_tg(blkg)); tg_update_has_rules(blkg_to_tg(blkg));
...@@ -1639,8 +1638,6 @@ void blk_throtl_drain(struct request_queue *q) ...@@ -1639,8 +1638,6 @@ void blk_throtl_drain(struct request_queue *q)
blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg)
tg_drain_bios(&blkg_to_tg(blkg)->service_queue); tg_drain_bios(&blkg_to_tg(blkg)->service_queue);
tg_drain_bios(&td_root_tg(td)->service_queue);
/* finally, transfer bios from top-level tg's into the td */ /* finally, transfer bios from top-level tg's into the td */
tg_drain_bios(&td->service_queue); tg_drain_bios(&td->service_queue);
......
...@@ -798,7 +798,8 @@ css_rightmost_descendant(struct cgroup_subsys_state *pos); ...@@ -798,7 +798,8 @@ css_rightmost_descendant(struct cgroup_subsys_state *pos);
* @pos: the css * to use as the loop cursor * @pos: the css * to use as the loop cursor
* @root: css whose descendants to walk * @root: css whose descendants to walk
* *
* Walk @root's descendants. Must be called under rcu_read_lock(). A * Walk @root's descendants. @root is included in the iteration and the
* first node to be visited. Must be called under rcu_read_lock(). A
* descendant css which hasn't finished ->css_online() or already has * descendant css which hasn't finished ->css_online() or already has
* finished ->css_offline() may show up during traversal and it's each * finished ->css_offline() may show up during traversal and it's each
* subsystem's responsibility to verify that each @pos is alive. * subsystem's responsibility to verify that each @pos is alive.
...@@ -820,13 +821,12 @@ css_rightmost_descendant(struct cgroup_subsys_state *pos); ...@@ -820,13 +821,12 @@ css_rightmost_descendant(struct cgroup_subsys_state *pos);
* *
* my_update_state(@css) * my_update_state(@css)
* { * {
* Lock @css;
* Update @css's state;
* Unlock @css;
*
* css_for_each_descendant_pre(@pos, @css) { * css_for_each_descendant_pre(@pos, @css) {
* Lock @pos; * Lock @pos;
* Verify @pos is alive and inherit state from @pos's parent; * if (@pos == @css)
* Update @css's state;
* else
* Verify @pos is alive and inherit state from its parent;
* Unlock @pos; * Unlock @pos;
* } * }
* } * }
...@@ -864,8 +864,9 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, ...@@ -864,8 +864,9 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
* @css: css whose descendants to walk * @css: css whose descendants to walk
* *
* Similar to css_for_each_descendant_pre() but performs post-order * Similar to css_for_each_descendant_pre() but performs post-order
* traversal instead. Note that the walk visibility guarantee described in * traversal instead. @root is included in the iteration and the last
* pre-order walk doesn't apply the same to post-order walks. * node to be visited. Note that the walk visibility guarantee described
* in pre-order walk doesn't apply the same to post-order walks.
*/ */
#define css_for_each_descendant_post(pos, css) \ #define css_for_each_descendant_post(pos, css) \
for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \
......
...@@ -2868,17 +2868,6 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add) ...@@ -2868,17 +2868,6 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
mutex_unlock(&cgroup_mutex); mutex_unlock(&cgroup_mutex);
/* @root always needs to be updated */
inode = root->dentry->d_inode;
mutex_lock(&inode->i_mutex);
mutex_lock(&cgroup_mutex);
ret = cgroup_addrm_files(root, cfts, is_add);
mutex_unlock(&cgroup_mutex);
mutex_unlock(&inode->i_mutex);
if (ret)
goto out_deact;
/* add/rm files for all cgroups created before */ /* add/rm files for all cgroups created before */
rcu_read_lock(); rcu_read_lock();
css_for_each_descendant_pre(css, cgroup_css(root, ss->subsys_id)) { css_for_each_descendant_pre(css, cgroup_css(root, ss->subsys_id)) {
...@@ -2907,7 +2896,6 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add) ...@@ -2907,7 +2896,6 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)
} }
rcu_read_unlock(); rcu_read_unlock();
dput(prev); dput(prev);
out_deact:
deactivate_super(sb); deactivate_super(sb);
return ret; return ret;
} }
...@@ -3099,7 +3087,8 @@ EXPORT_SYMBOL_GPL(css_next_child); ...@@ -3099,7 +3087,8 @@ EXPORT_SYMBOL_GPL(css_next_child);
* @root: css whose descendants to walk * @root: css whose descendants to walk
* *
* To be used by css_for_each_descendant_pre(). Find the next descendant * To be used by css_for_each_descendant_pre(). Find the next descendant
* to visit for pre-order traversal of @root's descendants. * to visit for pre-order traversal of @root's descendants. @root is
* included in the iteration and the first node to be visited.
* *
* While this function requires RCU read locking, it doesn't require the * While this function requires RCU read locking, it doesn't require the
* whole traversal to be contained in a single RCU critical section. This * whole traversal to be contained in a single RCU critical section. This
...@@ -3114,9 +3103,9 @@ css_next_descendant_pre(struct cgroup_subsys_state *pos, ...@@ -3114,9 +3103,9 @@ css_next_descendant_pre(struct cgroup_subsys_state *pos,
WARN_ON_ONCE(!rcu_read_lock_held()); WARN_ON_ONCE(!rcu_read_lock_held());
/* if first iteration, pretend we just visited @root */ /* if first iteration, visit @root */
if (!pos) if (!pos)
pos = root; return root;
/* visit the first child if exists */ /* visit the first child if exists */
next = css_next_child(NULL, pos); next = css_next_child(NULL, pos);
...@@ -3186,7 +3175,8 @@ css_leftmost_descendant(struct cgroup_subsys_state *pos) ...@@ -3186,7 +3175,8 @@ css_leftmost_descendant(struct cgroup_subsys_state *pos)
* @root: css whose descendants to walk * @root: css whose descendants to walk
* *
* To be used by css_for_each_descendant_post(). Find the next descendant * To be used by css_for_each_descendant_post(). Find the next descendant
* to visit for post-order traversal of @root's descendants. * to visit for post-order traversal of @root's descendants. @root is
* included in the iteration and the last node to be visited.
* *
* While this function requires RCU read locking, it doesn't require the * While this function requires RCU read locking, it doesn't require the
* whole traversal to be contained in a single RCU critical section. This * whole traversal to be contained in a single RCU critical section. This
...@@ -3207,14 +3197,17 @@ css_next_descendant_post(struct cgroup_subsys_state *pos, ...@@ -3207,14 +3197,17 @@ css_next_descendant_post(struct cgroup_subsys_state *pos,
return next != root ? next : NULL; return next != root ? next : NULL;
} }
/* if we visited @root, we're done */
if (pos == root)
return NULL;
/* if there's an unvisited sibling, visit its leftmost descendant */ /* if there's an unvisited sibling, visit its leftmost descendant */
next = css_next_child(pos, css_parent(pos)); next = css_next_child(pos, css_parent(pos));
if (next) if (next)
return css_leftmost_descendant(next); return css_leftmost_descendant(next);
/* no sibling left, visit parent */ /* no sibling left, visit parent */
next = css_parent(pos); return css_parent(pos);
return next != root ? next : NULL;
} }
EXPORT_SYMBOL_GPL(css_next_descendant_post); EXPORT_SYMBOL_GPL(css_next_descendant_post);
......
...@@ -311,7 +311,6 @@ static int freezer_read(struct cgroup_subsys_state *css, struct cftype *cft, ...@@ -311,7 +311,6 @@ static int freezer_read(struct cgroup_subsys_state *css, struct cftype *cft,
/* update states bottom-up */ /* update states bottom-up */
css_for_each_descendant_post(pos, css) css_for_each_descendant_post(pos, css)
update_if_frozen(pos); update_if_frozen(pos);
update_if_frozen(css);
rcu_read_unlock(); rcu_read_unlock();
...@@ -391,11 +390,6 @@ static void freezer_change_state(struct freezer *freezer, bool freeze) ...@@ -391,11 +390,6 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
{ {
struct cgroup_subsys_state *pos; struct cgroup_subsys_state *pos;
/* update @freezer */
spin_lock_irq(&freezer->lock);
freezer_apply_state(freezer, freeze, CGROUP_FREEZING_SELF);
spin_unlock_irq(&freezer->lock);
/* /*
* Update all its descendants in pre-order traversal. Each * Update all its descendants in pre-order traversal. Each
* descendant will try to inherit its parent's FREEZING state as * descendant will try to inherit its parent's FREEZING state as
...@@ -406,14 +400,23 @@ static void freezer_change_state(struct freezer *freezer, bool freeze) ...@@ -406,14 +400,23 @@ static void freezer_change_state(struct freezer *freezer, bool freeze)
struct freezer *pos_f = css_freezer(pos); struct freezer *pos_f = css_freezer(pos);
struct freezer *parent = parent_freezer(pos_f); struct freezer *parent = parent_freezer(pos_f);
spin_lock_irq(&pos_f->lock);
if (pos_f == freezer) {
freezer_apply_state(pos_f, freeze,
CGROUP_FREEZING_SELF);
} else {
/* /*
* Our update to @parent->state is already visible which is * Our update to @parent->state is already visible
* all we need. No need to lock @parent. For more info on * which is all we need. No need to lock @parent.
* synchronization, see freezer_post_create(). * For more info on synchronization, see
* freezer_post_create().
*/ */
spin_lock_irq(&pos_f->lock); freezer_apply_state(pos_f,
freezer_apply_state(pos_f, parent->state & CGROUP_FREEZING, parent->state & CGROUP_FREEZING,
CGROUP_FREEZING_PARENT); CGROUP_FREEZING_PARENT);
}
spin_unlock_irq(&pos_f->lock); spin_unlock_irq(&pos_f->lock);
} }
rcu_read_unlock(); rcu_read_unlock();
......
...@@ -222,7 +222,8 @@ static struct cpuset top_cpuset = { ...@@ -222,7 +222,8 @@ static struct cpuset top_cpuset = {
* *
* Walk @des_cs through the online descendants of @root_cs. Must be used * Walk @des_cs through the online descendants of @root_cs. Must be used
* with RCU read locked. The caller may modify @pos_css by calling * with RCU read locked. The caller may modify @pos_css by calling
* css_rightmost_descendant() to skip subtree. * css_rightmost_descendant() to skip subtree. @root_cs is included in the
* iteration and the first node to be visited.
*/ */
#define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs) \ #define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs) \
css_for_each_descendant_pre((pos_css), &(root_cs)->css) \ css_for_each_descendant_pre((pos_css), &(root_cs)->css) \
...@@ -506,6 +507,9 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr, ...@@ -506,6 +507,9 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
rcu_read_lock(); rcu_read_lock();
cpuset_for_each_descendant_pre(cp, pos_css, root_cs) { cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
if (cp == root_cs)
continue;
/* skip the whole subtree if @cp doesn't have any CPU */ /* skip the whole subtree if @cp doesn't have any CPU */
if (cpumask_empty(cp->cpus_allowed)) { if (cpumask_empty(cp->cpus_allowed)) {
pos_css = css_rightmost_descendant(pos_css); pos_css = css_rightmost_descendant(pos_css);
...@@ -613,6 +617,8 @@ static int generate_sched_domains(cpumask_var_t **domains, ...@@ -613,6 +617,8 @@ static int generate_sched_domains(cpumask_var_t **domains,
rcu_read_lock(); rcu_read_lock();
cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) { cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) {
if (cp == &top_cpuset)
continue;
/* /*
* Continue traversing beyond @cp iff @cp has some CPUs and * Continue traversing beyond @cp iff @cp has some CPUs and
* isn't load balancing. The former is obvious. The * isn't load balancing. The former is obvious. The
...@@ -875,16 +881,18 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs, ...@@ -875,16 +881,18 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs,
struct cpuset *cp; struct cpuset *cp;
struct cgroup_subsys_state *pos_css; struct cgroup_subsys_state *pos_css;
if (update_root)
update_tasks_cpumask(root_cs, heap);
rcu_read_lock(); rcu_read_lock();
cpuset_for_each_descendant_pre(cp, pos_css, root_cs) { cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
if (cp == root_cs) {
if (!update_root)
continue;
} else {
/* skip the whole subtree if @cp have some CPU */ /* skip the whole subtree if @cp have some CPU */
if (!cpumask_empty(cp->cpus_allowed)) { if (!cpumask_empty(cp->cpus_allowed)) {
pos_css = css_rightmost_descendant(pos_css); pos_css = css_rightmost_descendant(pos_css);
continue; continue;
} }
}
if (!css_tryget(&cp->css)) if (!css_tryget(&cp->css))
continue; continue;
rcu_read_unlock(); rcu_read_unlock();
...@@ -1130,16 +1138,18 @@ static void update_tasks_nodemask_hier(struct cpuset *root_cs, ...@@ -1130,16 +1138,18 @@ static void update_tasks_nodemask_hier(struct cpuset *root_cs,
struct cpuset *cp; struct cpuset *cp;
struct cgroup_subsys_state *pos_css; struct cgroup_subsys_state *pos_css;
if (update_root)
update_tasks_nodemask(root_cs, heap);
rcu_read_lock(); rcu_read_lock();
cpuset_for_each_descendant_pre(cp, pos_css, root_cs) { cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
if (cp == root_cs) {
if (!update_root)
continue;
} else {
/* skip the whole subtree if @cp have some CPU */ /* skip the whole subtree if @cp have some CPU */
if (!nodes_empty(cp->mems_allowed)) { if (!nodes_empty(cp->mems_allowed)) {
pos_css = css_rightmost_descendant(pos_css); pos_css = css_rightmost_descendant(pos_css);
continue; continue;
} }
}
if (!css_tryget(&cp->css)) if (!css_tryget(&cp->css))
continue; continue;
rcu_read_unlock(); rcu_read_unlock();
...@@ -2237,7 +2247,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) ...@@ -2237,7 +2247,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
rcu_read_lock(); rcu_read_lock();
cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) { cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
if (!css_tryget(&cs->css)) if (cs == &top_cpuset || !css_tryget(&cs->css))
continue; continue;
rcu_read_unlock(); rcu_read_unlock();
......
...@@ -1079,14 +1079,7 @@ static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root, ...@@ -1079,14 +1079,7 @@ static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
{ {
struct cgroup_subsys_state *prev_css, *next_css; struct cgroup_subsys_state *prev_css, *next_css;
/* prev_css = last_visited ? &last_visited->css : NULL;
* Root is not visited by cgroup iterators so it needs an
* explicit visit.
*/
if (!last_visited)
return root;
prev_css = (last_visited == root) ? NULL : &last_visited->css;
skip_node: skip_node:
next_css = css_next_descendant_pre(prev_css, &root->css); next_css = css_next_descendant_pre(prev_css, &root->css);
......
...@@ -456,7 +456,7 @@ static int propagate_exception(struct dev_cgroup *devcg_root, ...@@ -456,7 +456,7 @@ static int propagate_exception(struct dev_cgroup *devcg_root,
* methods), and online ones are safe to access outside RCU * methods), and online ones are safe to access outside RCU
* read lock without bumping refcnt. * read lock without bumping refcnt.
*/ */
if (!is_devcg_online(devcg)) if (pos == &devcg_root->css || !is_devcg_online(devcg))
continue; continue;
rcu_read_unlock(); rcu_read_unlock();
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment