Commit f9fac98f authored by Tejun Heo's avatar Tejun Heo Committed by Sasha Levin

blkcg: always create the blkcg_gq for the root blkcg

[ Upstream commit ec13b1d6 ]

Currently, blkcg does a minor optimization where the root blkcg is
created when the first blkcg policy is activated on a queue and
destroyed on the deactivation of the last.  On systems where blkcg is
configured but not used, this saves one blkcg_gq struct per queue.  On
systems where blkcg is actually used, there's no difference.  The only
case where this can lead to any meaninful, albeit still minute, save
in memory consumption is when all blkcg policies are deactivated after
being widely used in the system, which is a hihgly unlikely scenario.

The conditional existence of root blkcg_gq has already created several
bugs in blkcg and became an issue once again for the new per-cgroup
wb_congested mechanism for cgroup writeback support leading to a NULL
dereference when no blkcg policy is active.  This is really not worth
bothering with.  This patch makes blkcg always allocate and link the
root blkcg_gq and release it only on queue destruction.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Reported-by: default avatarFengguang Wu <fengguang.wu@intel.com>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
Signed-off-by: default avatarSasha Levin <alexander.levin@verizon.com>
parent 712b6a6d
...@@ -235,13 +235,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, ...@@ -235,13 +235,8 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
blkg->online = true; blkg->online = true;
spin_unlock(&blkcg->lock); spin_unlock(&blkcg->lock);
if (!ret) { if (!ret)
if (blkcg == &blkcg_root) {
q->root_blkg = blkg;
q->root_rl.blkg = blkg;
}
return blkg; return blkg;
}
/* @blkg failed fully initialized, use the usual release path */ /* @blkg failed fully initialized, use the usual release path */
blkg_put(blkg); blkg_put(blkg);
...@@ -339,15 +334,6 @@ static void blkg_destroy(struct blkcg_gq *blkg) ...@@ -339,15 +334,6 @@ static void blkg_destroy(struct blkcg_gq *blkg)
if (rcu_access_pointer(blkcg->blkg_hint) == blkg) if (rcu_access_pointer(blkcg->blkg_hint) == blkg)
rcu_assign_pointer(blkcg->blkg_hint, NULL); rcu_assign_pointer(blkcg->blkg_hint, NULL);
/*
* If root blkg is destroyed. Just clear the pointer since root_rl
* does not take reference on root blkg.
*/
if (blkcg == &blkcg_root) {
blkg->q->root_blkg = NULL;
blkg->q->root_rl.blkg = NULL;
}
/* /*
* Put the reference taken at the time of creation so that when all * Put the reference taken at the time of creation so that when all
* queues are gone, group can be destroyed. * queues are gone, group can be destroyed.
...@@ -859,9 +845,45 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) ...@@ -859,9 +845,45 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
*/ */
int blkcg_init_queue(struct request_queue *q) int blkcg_init_queue(struct request_queue *q)
{ {
might_sleep(); struct blkcg_gq *new_blkg, *blkg;
bool preloaded;
int ret;
new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
if (!new_blkg)
return -ENOMEM;
preloaded = !radix_tree_preload(GFP_KERNEL);
/*
* Make sure the root blkg exists and count the existing blkgs. As
* @q is bypassing at this point, blkg_lookup_create() can't be
* used. Open code insertion.
*/
rcu_read_lock();
spin_lock_irq(q->queue_lock);
blkg = blkg_create(&blkcg_root, q, new_blkg);
spin_unlock_irq(q->queue_lock);
rcu_read_unlock();
if (preloaded)
radix_tree_preload_end();
return blk_throtl_init(q); if (IS_ERR(blkg)) {
kfree(new_blkg);
return PTR_ERR(blkg);
}
q->root_blkg = blkg;
q->root_rl.blkg = blkg;
ret = blk_throtl_init(q);
if (ret) {
spin_lock_irq(q->queue_lock);
blkg_destroy_all(q);
spin_unlock_irq(q->queue_lock);
}
return ret;
} }
/** /**
...@@ -962,52 +984,20 @@ int blkcg_activate_policy(struct request_queue *q, ...@@ -962,52 +984,20 @@ int blkcg_activate_policy(struct request_queue *q,
const struct blkcg_policy *pol) const struct blkcg_policy *pol)
{ {
LIST_HEAD(pds); LIST_HEAD(pds);
struct blkcg_gq *blkg, *new_blkg; struct blkcg_gq *blkg;
struct blkg_policy_data *pd, *n; struct blkg_policy_data *pd, *n;
int cnt = 0, ret; int cnt = 0, ret;
bool preloaded;
if (blkcg_policy_enabled(q, pol)) if (blkcg_policy_enabled(q, pol))
return 0; return 0;
/* preallocations for root blkg */ /* count and allocate policy_data for all existing blkgs */
new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL);
if (!new_blkg)
return -ENOMEM;
blk_queue_bypass_start(q); blk_queue_bypass_start(q);
preloaded = !radix_tree_preload(GFP_KERNEL);
/*
* Make sure the root blkg exists and count the existing blkgs. As
* @q is bypassing at this point, blkg_lookup_create() can't be
* used. Open code it.
*/
spin_lock_irq(q->queue_lock); spin_lock_irq(q->queue_lock);
rcu_read_lock();
blkg = __blkg_lookup(&blkcg_root, q, false);
if (blkg)
blkg_free(new_blkg);
else
blkg = blkg_create(&blkcg_root, q, new_blkg);
rcu_read_unlock();
if (preloaded)
radix_tree_preload_end();
if (IS_ERR(blkg)) {
ret = PTR_ERR(blkg);
goto out_unlock;
}
list_for_each_entry(blkg, &q->blkg_list, q_node) list_for_each_entry(blkg, &q->blkg_list, q_node)
cnt++; cnt++;
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
/* allocate policy_data for all existing blkgs */
while (cnt--) { while (cnt--) {
pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node); pd = kzalloc_node(pol->pd_size, GFP_KERNEL, q->node);
if (!pd) { if (!pd) {
...@@ -1076,10 +1066,6 @@ void blkcg_deactivate_policy(struct request_queue *q, ...@@ -1076,10 +1066,6 @@ void blkcg_deactivate_policy(struct request_queue *q,
__clear_bit(pol->plid, q->blkcg_pols); __clear_bit(pol->plid, q->blkcg_pols);
/* if no policy is left, no need for blkgs - shoot them down */
if (bitmap_empty(q->blkcg_pols, BLKCG_MAX_POLS))
blkg_destroy_all(q);
list_for_each_entry(blkg, &q->blkg_list, q_node) { list_for_each_entry(blkg, &q->blkg_list, q_node) {
/* grab blkcg lock too while removing @pd from @blkg */ /* grab blkcg lock too while removing @pd from @blkg */
spin_lock(&blkg->blkcg->lock); spin_lock(&blkg->blkcg->lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment