Commit 9d179b86 authored by Tejun Heo's avatar Tejun Heo Committed by Jens Axboe

blkcg: Fix multiple bugs in blkcg_activate_policy()

blkcg_activate_policy() has the following bugs.

* cf09a8ee ("blkcg: pass @q and @blkcg into
  blkcg_pol_alloc_pd_fn()") added @blkcg to ->pd_alloc_fn(); however,
  blkcg_activate_policy() ends up using pd's allocated for the root
  blkcg for all preallocations, so ->pd_init_fn() for non-root blkcgs
  can be passed in pd's which are allocated for the root blkcg.

  For blk-iocost, this means that ->pd_init_fn() can write beyond the
  end of the allocated object as it determines the length of the flex
  array at the end based on the blkcg's nesting level.

* Each pd is initialized as they get allocated.  If alloc fails, the
  policy will get freed with pd's initialized on it.

* After the above partial failure, the partial pds are not freed.

This patch fixes all the above issues by

* Restructuring blkcg_activate_policy() so that alloc and init passes
  are separate.  Init takes place only after all allocs succeeded and
  on failure all allocated pds are freed.

* Unifying and fixing the cleanup of the remaining pd_prealloc.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Fixes: cf09a8ee ("blkcg: pass @q and @blkcg into blkcg_pol_alloc_pd_fn()")
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 5da0fb1a
...@@ -1362,7 +1362,7 @@ int blkcg_activate_policy(struct request_queue *q, ...@@ -1362,7 +1362,7 @@ int blkcg_activate_policy(struct request_queue *q,
const struct blkcg_policy *pol) const struct blkcg_policy *pol)
{ {
struct blkg_policy_data *pd_prealloc = NULL; struct blkg_policy_data *pd_prealloc = NULL;
struct blkcg_gq *blkg; struct blkcg_gq *blkg, *pinned_blkg = NULL;
int ret; int ret;
if (blkcg_policy_enabled(q, pol)) if (blkcg_policy_enabled(q, pol))
...@@ -1370,49 +1370,82 @@ int blkcg_activate_policy(struct request_queue *q, ...@@ -1370,49 +1370,82 @@ int blkcg_activate_policy(struct request_queue *q,
if (queue_is_mq(q)) if (queue_is_mq(q))
blk_mq_freeze_queue(q); blk_mq_freeze_queue(q);
pd_prealloc: retry:
if (!pd_prealloc) {
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, &blkcg_root);
if (!pd_prealloc) {
ret = -ENOMEM;
goto out_bypass_end;
}
}
spin_lock_irq(&q->queue_lock); spin_lock_irq(&q->queue_lock);
/* blkg_list is pushed at the head, reverse walk to init parents first */ /* blkg_list is pushed at the head, reverse walk to allocate parents first */
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) { list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
struct blkg_policy_data *pd; struct blkg_policy_data *pd;
if (blkg->pd[pol->plid]) if (blkg->pd[pol->plid])
continue; continue;
pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, &blkcg_root); /* If prealloc matches, use it; otherwise try GFP_NOWAIT */
if (!pd) if (blkg == pinned_blkg) {
swap(pd, pd_prealloc); pd = pd_prealloc;
pd_prealloc = NULL;
} else {
pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q,
blkg->blkcg);
}
if (!pd) { if (!pd) {
/*
* GFP_NOWAIT failed. Free the existing one and
* prealloc for @blkg w/ GFP_KERNEL.
*/
if (pinned_blkg)
blkg_put(pinned_blkg);
blkg_get(blkg);
pinned_blkg = blkg;
spin_unlock_irq(&q->queue_lock); spin_unlock_irq(&q->queue_lock);
goto pd_prealloc;
if (pd_prealloc)
pol->pd_free_fn(pd_prealloc);
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q,
blkg->blkcg);
if (pd_prealloc)
goto retry;
else
goto enomem;
} }
blkg->pd[pol->plid] = pd; blkg->pd[pol->plid] = pd;
pd->blkg = blkg; pd->blkg = blkg;
pd->plid = pol->plid; pd->plid = pol->plid;
if (pol->pd_init_fn)
pol->pd_init_fn(pd);
} }
/* all allocated, init in the same order */
if (pol->pd_init_fn)
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
pol->pd_init_fn(blkg->pd[pol->plid]);
__set_bit(pol->plid, q->blkcg_pols); __set_bit(pol->plid, q->blkcg_pols);
ret = 0; ret = 0;
spin_unlock_irq(&q->queue_lock); spin_unlock_irq(&q->queue_lock);
out_bypass_end: out:
if (queue_is_mq(q)) if (queue_is_mq(q))
blk_mq_unfreeze_queue(q); blk_mq_unfreeze_queue(q);
if (pinned_blkg)
blkg_put(pinned_blkg);
if (pd_prealloc) if (pd_prealloc)
pol->pd_free_fn(pd_prealloc); pol->pd_free_fn(pd_prealloc);
return ret; return ret;
enomem:
/* alloc failed, nothing's initialized yet, free everything */
spin_lock_irq(&q->queue_lock);
list_for_each_entry(blkg, &q->blkg_list, q_node) {
if (blkg->pd[pol->plid]) {
pol->pd_free_fn(blkg->pd[pol->plid]);
blkg->pd[pol->plid] = NULL;
}
}
spin_unlock_irq(&q->queue_lock);
ret = -ENOMEM;
goto out;
} }
EXPORT_SYMBOL_GPL(blkcg_activate_policy); EXPORT_SYMBOL_GPL(blkcg_activate_policy);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment