Commit b5f2954d authored by Dennis Zhou's avatar Dennis Zhou Committed by Jens Axboe

blkcg: revert blkcg cleanups series

This reverts a series committed earlier due to null pointer exception
bug report in [1]. It seems there are edge case interactions that I did
not consider and will need some time to understand what causes the
adverse interactions.

The original series can be found in [2] with a follow up series in [3].

[1] https://www.spinics.net/lists/cgroups/msg20719.html
[2] https://lore.kernel.org/lkml/20180911184137.35897-1-dennisszhou@gmail.com/
[3] https://lore.kernel.org/lkml/20181020185612.51587-1-dennis@kernel.org/

This reverts the following commits:
d459d853, b2c3fa54, 101246ec, b3b9f24f, e2b09899,
f0fcb3ec, c839e7a0, bdc24917, 74b7c02a, 5bf9a1f3,
a7b39b4e, 07b05bcc, 49f4c2dc, 27e6fa99Signed-off-by: default avatarDennis Zhou <dennis@kernel.org>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 153fcd5f
......@@ -1857,10 +1857,8 @@ following two functions.
wbc_init_bio(@wbc, @bio)
Should be called for each bio carrying writeback data and
associates the bio with the inode's owner cgroup and the
corresponding request queue. This must be called after
a queue (device) has been associated with the bio and
before submission.
associates the bio with the inode's owner cgroup. Can be
called anytime between bio allocation and submission.
wbc_account_io(@wbc, @page, @bytes)
Should be called for each data segment being written out.
......@@ -1879,7 +1877,7 @@ the configuration, the bio may be executed at a lower priority and if
the writeback session is holding shared resources, e.g. a journal
entry, may lead to priority inversion. There is no one easy solution
for the problem. Filesystems can try to work around specific problem
cases by skipping wbc_init_bio() or using bio_associate_create_blkg()
cases by skipping wbc_init_bio() or using bio_associate_blkcg()
directly.
......
......@@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
uint64_t serial_nr;
rcu_read_lock();
serial_nr = __bio_blkcg(bio)->css.serial_nr;
serial_nr = bio_blkcg(bio)->css.serial_nr;
/*
* Check whether blkcg has changed. The condition may trigger
......@@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
goto out;
bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio));
bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
/*
* Update blkg_path for bfq_log_* functions. We cache this
* path, and update it here, for the following
......
......@@ -4384,7 +4384,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
rcu_read_lock();
bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio));
bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
if (!bfqg) {
bfqq = &bfqd->oom_bfqq;
goto out;
......
......@@ -609,9 +609,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
bio->bi_iter = bio_src->bi_iter;
bio->bi_io_vec = bio_src->bi_io_vec;
bio_clone_blkg_association(bio, bio_src);
blkcg_bio_issue_init(bio);
bio_clone_blkcg_association(bio, bio_src);
}
EXPORT_SYMBOL(__bio_clone_fast);
......@@ -1956,151 +1954,69 @@ EXPORT_SYMBOL(bioset_init_from_src);
#ifdef CONFIG_BLK_CGROUP
/**
* bio_associate_blkg - associate a bio with the a blkg
* @bio: target bio
* @blkg: the blkg to associate
*
* This tries to associate @bio with the specified blkg. Association failure
* is handled by walking up the blkg tree. Therefore, the blkg associated can
* be anything between @blkg and the root_blkg. This situation only happens
* when a cgroup is dying and then the remaining bios will spill to the closest
* alive blkg.
*
* A reference will be taken on the @blkg and will be released when @bio is
* freed.
*/
int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
{
if (unlikely(bio->bi_blkg))
return -EBUSY;
bio->bi_blkg = blkg_tryget_closest(blkg);
return 0;
}
/**
* __bio_associate_blkg_from_css - internal blkg association function
*
* This in the core association function that all association paths rely on.
* A blkg reference is taken which is released upon freeing of the bio.
*/
static int __bio_associate_blkg_from_css(struct bio *bio,
struct cgroup_subsys_state *css)
{
struct request_queue *q = bio->bi_disk->queue;
struct blkcg_gq *blkg;
int ret;
rcu_read_lock();
if (!css || !css->parent)
blkg = q->root_blkg;
else
blkg = blkg_lookup_create(css_to_blkcg(css), q);
ret = bio_associate_blkg(bio, blkg);
rcu_read_unlock();
return ret;
}
/**
* bio_associate_blkg_from_css - associate a bio with a specified css
* @bio: target bio
* @css: target css
*
* Associate @bio with the blkg found by combining the css's blkg and the
* request_queue of the @bio. This falls back to the queue's root_blkg if
* the association fails with the css.
*/
int bio_associate_blkg_from_css(struct bio *bio,
struct cgroup_subsys_state *css)
{
if (unlikely(bio->bi_blkg))
return -EBUSY;
return __bio_associate_blkg_from_css(bio, css);
}
EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
#ifdef CONFIG_MEMCG
/**
* bio_associate_blkg_from_page - associate a bio with the page's blkg
* bio_associate_blkcg_from_page - associate a bio with the page's blkcg
* @bio: target bio
* @page: the page to lookup the blkcg from
*
* Associate @bio with the blkg from @page's owning memcg and the respective
* request_queue. If cgroup_e_css returns NULL, fall back to the queue's
* root_blkg.
*
* Note: this must be called after bio has an associated device.
* Associate @bio with the blkcg from @page's owning memcg. This works like
* every other associate function wrt references.
*/
int bio_associate_blkg_from_page(struct bio *bio, struct page *page)
int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
{
struct cgroup_subsys_state *css;
int ret;
struct cgroup_subsys_state *blkcg_css;
if (unlikely(bio->bi_blkg))
if (unlikely(bio->bi_css))
return -EBUSY;
if (!page->mem_cgroup)
return 0;
rcu_read_lock();
css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
ret = __bio_associate_blkg_from_css(bio, css);
rcu_read_unlock();
return ret;
blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
&io_cgrp_subsys);
bio->bi_css = blkcg_css;
return 0;
}
#endif /* CONFIG_MEMCG */
/**
* bio_associate_create_blkg - associate a bio with a blkg from q
* @q: request_queue where bio is going
* bio_associate_blkcg - associate a bio with the specified blkcg
* @bio: target bio
* @blkcg_css: css of the blkcg to associate
*
* Associate @bio with the blkg found from the bio's css and the request_queue.
* If one is not found, bio_lookup_blkg creates the blkg. This falls back to
* the queue's root_blkg if association fails.
* Associate @bio with the blkcg specified by @blkcg_css. Block layer will
* treat @bio as if it were issued by a task which belongs to the blkcg.
*
* This function takes an extra reference of @blkcg_css which will be put
* when @bio is released. The caller must own @bio and is responsible for
* synchronizing calls to this function.
*/
int bio_associate_create_blkg(struct request_queue *q, struct bio *bio)
int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
{
struct cgroup_subsys_state *css;
int ret = 0;
/* someone has already associated this bio with a blkg */
if (bio->bi_blkg)
return ret;
rcu_read_lock();
css = blkcg_css();
ret = __bio_associate_blkg_from_css(bio, css);
rcu_read_unlock();
return ret;
if (unlikely(bio->bi_css))
return -EBUSY;
css_get(blkcg_css);
bio->bi_css = blkcg_css;
return 0;
}
EXPORT_SYMBOL_GPL(bio_associate_blkcg);
/**
* bio_reassociate_blkg - reassociate a bio with a blkg from q
* @q: request_queue where bio is going
* bio_associate_blkg - associate a bio with the specified blkg
* @bio: target bio
* @blkg: the blkg to associate
*
* When submitting a bio, multiple recursive calls to make_request() may occur.
* This causes the initial associate done in blkcg_bio_issue_check() to be
* incorrect and reference the prior request_queue. This performs reassociation
* when this situation happens.
* Associate @bio with the blkg specified by @blkg. This is the queue specific
* blkcg information associated with the @bio, a reference will be taken on the
* @blkg and will be freed when the bio is freed.
*/
int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
{
if (bio->bi_blkg) {
blkg_put(bio->bi_blkg);
bio->bi_blkg = NULL;
}
return bio_associate_create_blkg(q, bio);
if (unlikely(bio->bi_blkg))
return -EBUSY;
if (!blkg_try_get(blkg))
return -ENODEV;
bio->bi_blkg = blkg;
return 0;
}
/**
......@@ -2113,6 +2029,10 @@ void bio_disassociate_task(struct bio *bio)
put_io_context(bio->bi_ioc);
bio->bi_ioc = NULL;
}
if (bio->bi_css) {
css_put(bio->bi_css);
bio->bi_css = NULL;
}
if (bio->bi_blkg) {
blkg_put(bio->bi_blkg);
bio->bi_blkg = NULL;
......@@ -2120,16 +2040,16 @@ void bio_disassociate_task(struct bio *bio)
}
/**
* bio_clone_blkg_association - clone blkg association from src to dst bio
* bio_clone_blkcg_association - clone blkcg association from src to dst bio
* @dst: destination bio
* @src: source bio
*/
void bio_clone_blkg_association(struct bio *dst, struct bio *src)
void bio_clone_blkcg_association(struct bio *dst, struct bio *src)
{
if (src->bi_blkg)
bio_associate_blkg(dst, src->bi_blkg);
if (src->bi_css)
WARN_ON(bio_associate_blkcg(dst, src->bi_css));
}
EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
#endif /* CONFIG_BLK_CGROUP */
static void __init biovec_init_slabs(void)
......
......@@ -84,37 +84,6 @@ static void blkg_free(struct blkcg_gq *blkg)
kfree(blkg);
}
static void __blkg_release(struct rcu_head *rcu)
{
struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
percpu_ref_exit(&blkg->refcnt);
/* release the blkcg and parent blkg refs this blkg has been holding */
css_put(&blkg->blkcg->css);
if (blkg->parent)
blkg_put(blkg->parent);
wb_congested_put(blkg->wb_congested);
blkg_free(blkg);
}
/*
* A group is RCU protected, but having an rcu lock does not mean that one
* can access all the fields of blkg and assume these are valid. For
* example, don't try to follow throtl_data and request queue links.
*
* Having a reference to blkg under an rcu allows accesses to only values
* local to groups like group stats and group rate limits.
*/
static void blkg_release(struct percpu_ref *ref)
{
struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);
call_rcu(&blkg->rcu_head, __blkg_release);
}
/**
* blkg_alloc - allocate a blkg
* @blkcg: block cgroup the new blkg is associated with
......@@ -141,6 +110,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
blkg->q = q;
INIT_LIST_HEAD(&blkg->q_node);
blkg->blkcg = blkcg;
atomic_set(&blkg->refcnt, 1);
/* root blkg uses @q->root_rl, init rl only for !root blkgs */
if (blkcg != &blkcg_root) {
......@@ -247,11 +217,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
blkg_get(blkg->parent);
}
ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0,
GFP_NOWAIT | __GFP_NOWARN);
if (ret)
goto err_cancel_ref;
/* invoke per-policy init */
for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkcg_policy *pol = blkcg_policy[i];
......@@ -284,8 +249,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
blkg_put(blkg);
return ERR_PTR(ret);
err_cancel_ref:
percpu_ref_exit(&blkg->refcnt);
err_put_congested:
wb_congested_put(wb_congested);
err_put_css:
......@@ -296,7 +259,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
}
/**
* __blkg_lookup_create - lookup blkg, try to create one if not there
* blkg_lookup_create - lookup blkg, try to create one if not there
* @blkcg: blkcg of interest
* @q: request_queue of interest
*
......@@ -305,10 +268,11 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
* that all non-root blkg's have access to the parent blkg. This function
* should be called under RCU read lock and @q->queue_lock.
*
* Returns the blkg or the closest blkg if blkg_create fails as it walks
* down from root.
* Returns pointer to the looked up or created blkg on success, ERR_PTR()
* value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not
* dead and bypassing, returns ERR_PTR(-EBUSY).
*/
struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
struct request_queue *q)
{
struct blkcg_gq *blkg;
......@@ -321,7 +285,7 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
* we shouldn't allow anything to go through for a bypassing queue.
*/
if (unlikely(blk_queue_bypass(q)))
return q->root_blkg;
return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
blkg = __blkg_lookup(blkcg, q, true);
if (blkg)
......@@ -329,58 +293,23 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
/*
* Create blkgs walking down from blkcg_root to @blkcg, so that all
* non-root blkgs have access to their parents. Returns the closest
* blkg to the intended blkg should blkg_create() fail.
* non-root blkgs have access to their parents.
*/
while (true) {
struct blkcg *pos = blkcg;
struct blkcg *parent = blkcg_parent(blkcg);
struct blkcg_gq *ret_blkg = q->root_blkg;
while (parent) {
blkg = __blkg_lookup(parent, q, false);
if (blkg) {
/* remember closest blkg */
ret_blkg = blkg;
break;
}
while (parent && !__blkg_lookup(parent, q, false)) {
pos = parent;
parent = blkcg_parent(parent);
}
blkg = blkg_create(pos, q, NULL);
if (IS_ERR(blkg))
return ret_blkg;
if (pos == blkcg)
if (pos == blkcg || IS_ERR(blkg))
return blkg;
}
}
/**
* blkg_lookup_create - find or create a blkg
* @blkcg: target block cgroup
* @q: target request_queue
*
* This looks up or creates the blkg representing the unique pair
* of the blkcg and the request_queue.
*/
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
struct request_queue *q)
{
struct blkcg_gq *blkg = blkg_lookup(blkcg, q);
unsigned long flags;
if (unlikely(!blkg)) {
spin_lock_irqsave(q->queue_lock, flags);
blkg = __blkg_lookup_create(blkcg, q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
return blkg;
}
static void blkg_destroy(struct blkcg_gq *blkg)
{
struct blkcg *blkcg = blkg->blkcg;
......@@ -424,7 +353,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
* Put the reference taken at the time of creation so that when all
* queues are gone, group can be destroyed.
*/
percpu_ref_kill(&blkg->refcnt);
blkg_put(blkg);
}
/**
......@@ -451,6 +380,29 @@ static void blkg_destroy_all(struct request_queue *q)
q->root_rl.blkg = NULL;
}
/*
* A group is RCU protected, but having an rcu lock does not mean that one
* can access all the fields of blkg and assume these are valid. For
* example, don't try to follow throtl_data and request queue links.
*
* Having a reference to blkg under an rcu allows accesses to only values
* local to groups like group stats and group rate limits.
*/
void __blkg_release_rcu(struct rcu_head *rcu_head)
{
struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
/* release the blkcg and parent blkg refs this blkg has been holding */
css_put(&blkg->blkcg->css);
if (blkg->parent)
blkg_put(blkg->parent);
wb_congested_put(blkg->wb_congested);
blkg_free(blkg);
}
EXPORT_SYMBOL_GPL(__blkg_release_rcu);
/*
* The next function used by blk_queue_for_each_rl(). It's a bit tricky
* because the root blkg uses @q->root_rl instead of its own rl.
......@@ -1796,7 +1748,8 @@ void blkcg_maybe_throttle_current(void)
blkg = blkg_lookup(blkcg, q);
if (!blkg)
goto out;
if (!blkg_tryget(blkg))
blkg = blkg_try_get(blkg);
if (!blkg)
goto out;
rcu_read_unlock();
......
......@@ -2435,7 +2435,6 @@ blk_qc_t generic_make_request(struct bio *bio)
if (q)
blk_queue_exit(q);
q = bio->bi_disk->queue;
bio_reassociate_blkg(q, bio);
flags = 0;
if (bio->bi_opf & REQ_NOWAIT)
flags = BLK_MQ_REQ_NOWAIT;
......
......@@ -480,12 +480,34 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
spinlock_t *lock)
{
struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
struct blkcg_gq *blkg = bio->bi_blkg;
struct blkcg *blkcg;
struct blkcg_gq *blkg;
struct request_queue *q = rqos->q;
bool issue_as_root = bio_issue_as_root_blkg(bio);
if (!blk_iolatency_enabled(blkiolat))
return;
rcu_read_lock();
blkcg = bio_blkcg(bio);
bio_associate_blkcg(bio, &blkcg->css);
blkg = blkg_lookup(blkcg, q);
if (unlikely(!blkg)) {
if (!lock)
spin_lock_irq(q->queue_lock);
blkg = blkg_lookup_create(blkcg, q);
if (IS_ERR(blkg))
blkg = NULL;
if (!lock)
spin_unlock_irq(q->queue_lock);
}
if (!blkg)
goto out;
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
bio_associate_blkg(bio, blkg);
out:
rcu_read_unlock();
while (blkg && blkg->parent) {
struct iolatency_grp *iolat = blkg_to_lat(blkg);
if (!iolat) {
......@@ -706,7 +728,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
* We could be exiting, don't access the pd unless we have a
* ref on the blkg.
*/
if (!blkg_tryget(blkg))
if (!blkg_try_get(blkg))
continue;
iolat = blkg_to_lat(blkg);
......
......@@ -2115,11 +2115,21 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
}
#endif
static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
{
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
/* fallback to root_blkg if we fail to get a blkg ref */
if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV))
bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg);
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
#endif
}
bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
struct bio *bio)
{
struct throtl_qnode *qn = NULL;
struct throtl_grp *tg = blkg_to_tg(blkg);
struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
struct throtl_service_queue *sq;
bool rw = bio_data_dir(bio);
bool throttled = false;
......@@ -2138,6 +2148,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
if (unlikely(blk_queue_bypass(q)))
goto out_unlock;
blk_throtl_assoc_bio(tg, bio);
blk_throtl_update_idletime(tg);
sq = &tg->service_queue;
......
......@@ -276,9 +276,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
}
}
bio_clone_blkg_association(bio, bio_src);
blkcg_bio_issue_init(bio);
bio_clone_blkcg_association(bio, bio_src);
return bio;
}
......
......@@ -3759,7 +3759,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
uint64_t serial_nr;
rcu_read_lock();
serial_nr = __bio_blkcg(bio)->css.serial_nr;
serial_nr = bio_blkcg(bio)->css.serial_nr;
rcu_read_unlock();
/*
......@@ -3824,7 +3824,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
struct cfq_group *cfqg;
rcu_read_lock();
cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio));
cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio));
if (!cfqg) {
cfqq = &cfqd->oom_cfqq;
goto out;
......
......@@ -77,7 +77,6 @@
#include <linux/falloc.h>
#include <linux/uio.h>
#include <linux/ioprio.h>
#include <linux/blk-cgroup.h>
#include "loop.h"
......@@ -1761,8 +1760,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
/* always use the first bio's css */
#ifdef CONFIG_BLK_CGROUP
if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) {
cmd->css = &bio_blkcg(rq->bio)->css;
if (cmd->use_aio && rq->bio && rq->bio->bi_css) {
cmd->css = rq->bio->bi_css;
css_get(cmd->css);
} else
#endif
......
......@@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
!discard_bio)
continue;
bio_chain(discard_bio, bio);
bio_clone_blkg_association(discard_bio, bio);
bio_clone_blkcg_association(discard_bio, bio);
if (mddev->gendisk)
trace_block_bio_remap(bdev_get_queue(rdev->bdev),
discard_bio, disk_devt(mddev->gendisk),
......
......@@ -3060,6 +3060,11 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
*/
bio = bio_alloc(GFP_NOIO, 1);
if (wbc) {
wbc_init_bio(wbc, bio);
wbc_account_io(wbc, bh->b_page, bh->b_size);
}
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio_set_dev(bio, bh->b_bdev);
bio->bi_write_hint = write_hint;
......@@ -3079,11 +3084,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
op_flags |= REQ_PRIO;
bio_set_op_attrs(bio, op, op_flags);
if (wbc) {
wbc_init_bio(wbc, bio);
wbc_account_io(wbc, bh->b_page, bh->b_size);
}
submit_bio(bio);
return 0;
}
......
......@@ -374,13 +374,13 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
if (!bio)
return -ENOMEM;
wbc_init_bio(io->io_wbc, bio);
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio_set_dev(bio, bh->b_bdev);
bio->bi_end_io = ext4_end_bio;
bio->bi_private = ext4_get_io_end(io->io_end);
io->io_bio = bio;
io->io_next_block = bh->b_blocknr;
wbc_init_bio(io->io_wbc, bio);
return 0;
}
......
......@@ -503,30 +503,22 @@ do { \
disk_devt((bio)->bi_disk)
#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
int bio_associate_blkg_from_page(struct bio *bio, struct page *page);
int bio_associate_blkcg_from_page(struct bio *bio, struct page *page);
#else
static inline int bio_associate_blkg_from_page(struct bio *bio,
static inline int bio_associate_blkcg_from_page(struct bio *bio,
struct page *page) { return 0; }
#endif
#ifdef CONFIG_BLK_CGROUP
int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
int bio_associate_blkg_from_css(struct bio *bio,
struct cgroup_subsys_state *css);
int bio_associate_create_blkg(struct request_queue *q, struct bio *bio);
int bio_reassociate_blkg(struct request_queue *q, struct bio *bio);
void bio_disassociate_task(struct bio *bio);
void bio_clone_blkg_association(struct bio *dst, struct bio *src);
void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
#else /* CONFIG_BLK_CGROUP */
static inline int bio_associate_blkg_from_css(struct bio *bio,
struct cgroup_subsys_state *css)
{ return 0; }
static inline int bio_associate_create_blkg(struct request_queue *q,
struct bio *bio) { return 0; }
static inline int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
{ return 0; }
static inline int bio_associate_blkcg(struct bio *bio,
struct cgroup_subsys_state *blkcg_css) { return 0; }
static inline void bio_disassociate_task(struct bio *bio) { }
static inline void bio_clone_blkg_association(struct bio *dst,
static inline void bio_clone_blkcg_association(struct bio *dst,
struct bio *src) { }
#endif /* CONFIG_BLK_CGROUP */
......
......@@ -126,7 +126,7 @@ struct blkcg_gq {
struct request_list rl;
/* reference count */
struct percpu_ref refcnt;
atomic_t refcnt;
/* is this blkg online? protected by both blkcg and q locks */
bool online;
......@@ -184,8 +184,6 @@ extern struct cgroup_subsys_state * const blkcg_root_css;
struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
struct request_queue *q, bool update_hint);
struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
struct request_queue *q);
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
struct request_queue *q);
int blkcg_init_queue(struct request_queue *q);
......@@ -232,59 +230,22 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
char *input, struct blkg_conf_ctx *ctx);
void blkg_conf_finish(struct blkg_conf_ctx *ctx);
/**
* blkcg_css - find the current css
*
* Find the css associated with either the kthread or the current task.
* This may return a dying css, so it is up to the caller to use tryget logic
* to confirm it is alive and well.
*/
static inline struct cgroup_subsys_state *blkcg_css(void)
{
struct cgroup_subsys_state *css;
css = kthread_blkcg();
if (css)
return css;
return task_css(current, io_cgrp_id);
}
static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
{
return css ? container_of(css, struct blkcg, css) : NULL;
}
/**
* __bio_blkcg - internal version of bio_blkcg for bfq and cfq
*
* DO NOT USE.
* There is a flaw using this version of the function. In particular, this was
* used in a broken paradigm where association was called on the given css. It
* is possible though that the returned css from task_css() is in the process
* of dying due to migration of the current task. So it is improper to assume
* *_get() is going to succeed. Both BFQ and CFQ rely on this logic and will
* take additional work to handle more gracefully.
*/
static inline struct blkcg *__bio_blkcg(struct bio *bio)
{
if (bio && bio->bi_blkg)
return bio->bi_blkg->blkcg;
return css_to_blkcg(blkcg_css());
}
/**
* bio_blkcg - grab the blkcg associated with a bio
* @bio: target bio
*
* This returns the blkcg associated with a bio, NULL if not associated.
* Callers are expected to either handle NULL or know association has been
* done prior to calling this.
*/
static inline struct blkcg *bio_blkcg(struct bio *bio)
{
if (bio && bio->bi_blkg)
return bio->bi_blkg->blkcg;
return NULL;
struct cgroup_subsys_state *css;
if (bio && bio->bi_css)
return css_to_blkcg(bio->bi_css);
css = kthread_blkcg();
if (css)
return css_to_blkcg(css);
return css_to_blkcg(task_css(current, io_cgrp_id));
}
static inline bool blk_cgroup_congested(void)
......@@ -490,35 +451,26 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
*/
static inline void blkg_get(struct blkcg_gq *blkg)
{
percpu_ref_get(&blkg->refcnt);
WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
atomic_inc(&blkg->refcnt);
}
/**
* blkg_tryget - try and get a blkg reference
* blkg_try_get - try and get a blkg reference
* @blkg: blkg to get
*
* This is for use when doing an RCU lookup of the blkg. We may be in the midst
* of freeing this blkg, so we can only use it if the refcnt is not zero.
*/
static inline bool blkg_tryget(struct blkcg_gq *blkg)
static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
{
return percpu_ref_tryget(&blkg->refcnt);
if (atomic_inc_not_zero(&blkg->refcnt))
return blkg;
return NULL;
}
/**
* blkg_tryget_closest - try and get a blkg ref on the closet blkg
* @blkg: blkg to get
*
* This walks up the blkg tree to find the closest non-dying blkg and returns
* the blkg that it did association with as it may not be the passed in blkg.
*/
static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
{
while (!percpu_ref_tryget(&blkg->refcnt))
blkg = blkg->parent;
return blkg;
}
void __blkg_release_rcu(struct rcu_head *rcu);
/**
* blkg_put - put a blkg reference
......@@ -526,7 +478,9 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
*/
static inline void blkg_put(struct blkcg_gq *blkg)
{
percpu_ref_put(&blkg->refcnt);
WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
if (atomic_dec_and_test(&blkg->refcnt))
call_rcu(&blkg->rcu_head, __blkg_release_rcu);
}
/**
......@@ -579,36 +533,25 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
rcu_read_lock();
if (bio && bio->bi_blkg) {
blkcg = bio->bi_blkg->blkcg;
if (blkcg == &blkcg_root)
goto rl_use_root;
blkg_get(bio->bi_blkg);
rcu_read_unlock();
return &bio->bi_blkg->rl;
}
blkcg = bio_blkcg(bio);
blkcg = css_to_blkcg(blkcg_css());
/* bypass blkg lookup and use @q->root_rl directly for root */
if (blkcg == &blkcg_root)
goto rl_use_root;
goto root_rl;
/*
* Try to use blkg->rl. blkg lookup may fail under memory pressure
* or if either the blkcg or queue is going away. Fall back to
* root_rl in such cases.
*/
blkg = blkg_lookup(blkcg, q);
if (unlikely(!blkg))
blkg = __blkg_lookup_create(blkcg, q);
if (blkg->blkcg == &blkcg_root || !blkg_tryget(blkg))
goto rl_use_root;
goto root_rl;
blkg_get(blkg);
rcu_read_unlock();
return &blkg->rl;
/*
* Each blkg has its own request_list, however, the root blkcg
* uses the request_queue's root_rl. This is to avoid most
* overhead for the root blkcg.
*/
rl_use_root:
root_rl:
rcu_read_unlock();
return &q->root_rl;
}
......@@ -854,26 +797,32 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg
struct bio *bio) { return false; }
#endif
static inline void blkcg_bio_issue_init(struct bio *bio)
{
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
}
static inline bool blkcg_bio_issue_check(struct request_queue *q,
struct bio *bio)
{
struct blkcg *blkcg;
struct blkcg_gq *blkg;
bool throtl = false;
rcu_read_lock();
blkcg = bio_blkcg(bio);
/* associate blkcg if bio hasn't attached one */
bio_associate_blkcg(bio, &blkcg->css);
bio_associate_create_blkg(q, bio);
blkg = bio->bi_blkg;
blkg = blkg_lookup(blkcg, q);
if (unlikely(!blkg)) {
spin_lock_irq(q->queue_lock);
blkg = blkg_lookup_create(blkcg, q);
if (IS_ERR(blkg))
blkg = NULL;
spin_unlock_irq(q->queue_lock);
}
throtl = blk_throtl_bio(q, blkg, bio);
if (!throtl) {
blkg = blkg ?: q->root_blkg;
/*
* If the bio is flagged with BIO_QUEUE_ENTERED it means this
* is a split bio and we would have already accounted for the
......@@ -885,8 +834,6 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
}
blkcg_bio_issue_init(bio);
rcu_read_unlock();
return !throtl;
}
......@@ -983,7 +930,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
static inline void blkcg_deactivate_policy(struct request_queue *q,
const struct blkcg_policy *pol) { }
static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
......@@ -999,7 +945,6 @@ static inline void blk_put_rl(struct request_list *rl) { }
static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
static inline void blkcg_bio_issue_init(struct bio *bio) { }
static inline bool blkcg_bio_issue_check(struct request_queue *q,
struct bio *bio) { return true; }
......
......@@ -178,6 +178,7 @@ struct bio {
* release. Read comment on top of bio_associate_current().
*/
struct io_context *bi_ioc;
struct cgroup_subsys_state *bi_css;
struct blkcg_gq *bi_blkg;
struct bio_issue bi_issue;
#endif
......
......@@ -93,8 +93,6 @@ extern struct css_set init_css_set;
bool css_has_online_children(struct cgroup_subsys_state *css);
struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
struct cgroup_subsys *ss);
struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
struct cgroup_subsys *ss);
struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
......
......@@ -246,8 +246,7 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
*
* @bio is a part of the writeback in progress controlled by @wbc. Perform
* writeback specific initialization. This is used to apply the cgroup
* writeback context. Must be called after the bio has been associated with
* a device.
* writeback context.
*/
static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
{
......@@ -258,7 +257,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
* regular writeback instead of writing things out itself.
*/
if (wbc->wb)
bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css);
bio_associate_blkcg(bio, wbc->wb->blkcg_css);
}
#else /* CONFIG_CGROUP_WRITEBACK */
......
......@@ -492,7 +492,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
}
/**
* cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss
* cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
* @cgrp: the cgroup of interest
* @ss: the subsystem of interest (%NULL returns @cgrp->self)
*
......@@ -501,7 +501,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
* enabled. If @ss is associated with the hierarchy @cgrp is on, this
* function is guaranteed to return non-NULL css.
*/
static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
struct cgroup_subsys *ss)
{
lockdep_assert_held(&cgroup_mutex);
......@@ -522,35 +522,6 @@ static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
return cgroup_css(cgrp, ss);
}
/**
* cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
* @cgrp: the cgroup of interest
* @ss: the subsystem of interest
*
* Find and get the effective css of @cgrp for @ss. The effective css is
* defined as the matching css of the nearest ancestor including self which
* has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on,
* the root css is returned, so this function always returns a valid css.
*
* The returned css is not guaranteed to be online, and therefore it is the
* callers responsiblity to tryget a reference for it.
*/
struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
struct cgroup_subsys *ss)
{
struct cgroup_subsys_state *css;
do {
css = cgroup_css(cgrp, ss);
if (css)
return css;
cgrp = cgroup_parent(cgrp);
} while (cgrp);
return init_css_set.subsys[ss->id];
}
/**
* cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
* @cgrp: the cgroup of interest
......@@ -635,8 +606,7 @@ EXPORT_SYMBOL_GPL(of_css);
*/
#define for_each_e_css(css, ssid, cgrp) \
for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
if (!((css) = cgroup_e_css_by_mask(cgrp, \
cgroup_subsys[(ssid)]))) \
if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
; \
else
......@@ -1036,7 +1006,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
* @ss is in this hierarchy, so we want the
* effective css from @cgrp.
*/
template[i] = cgroup_e_css_by_mask(cgrp, ss);
template[i] = cgroup_e_css(cgrp, ss);
} else {
/*
* @ss is not in this hierarchy, so we don't want
......@@ -3053,7 +3023,7 @@ static int cgroup_apply_control(struct cgroup *cgrp)
return ret;
/*
* At this point, cgroup_e_css_by_mask() results reflect the new csses
* At this point, cgroup_e_css() results reflect the new csses
* making the following cgroup_update_dfl_csses() properly update
* css associations of all tasks in the subtree.
*/
......
......@@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
return NULL;
if (!bio->bi_blkg)
if (!bio->bi_css)
return NULL;
return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup);
return cgroup_get_kernfs_id(bio->bi_css->cgroup);
}
#else
static union kernfs_node_id *
......
......@@ -339,7 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
goto out;
}
bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
bio_associate_blkg_from_page(bio, page);
bio_associate_blkcg_from_page(bio, page);
count_swpout_vm_event(page);
set_page_writeback(page);
unlock_page(page);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment