Commit e56da7e2 authored by Tejun Heo's avatar Tejun Heo Committed by Jens Axboe

blkcg: don't allow or retain configuration of missing devices

blkcg is very peculiar in that it allows setting and remembering
configurations for non-existent devices by maintaining separate data
structures for configuration.

This behavior is completely out of the usual norms and outright
confusing; furthermore, it uses dev_t number to match the
configuration to devices, which is unpredictable to begin with and
becomes completely unuseable if EXT_DEVT is fully used.

It is wholely unnecessary - we already have fully functional userland
mechanism to program devices being hotplugged which has full access to
device identification, connection topology and filesystem information.

Add a new struct blkio_group_conf which contains all blkcg
configurations to blkio_group and let blkio_group, which can be
created iff the associated device exists and is removed when the
associated device goes away, carry all configurations.

Note that, after this patch, all newly created blkg's will always have
the default configuration (unlimited for throttling and blkcg's weight
for propio).

This patch makes blkio_policy_node meaningless but doesn't remove it.
The next patch will.

-v2: Updated to retry after short sleep if blkg lookup/creation failed
     due to the queue being temporarily bypassed as indicated by
     -EBUSY return.  Pointed out by Vivek.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent cd1604fa
...@@ -855,9 +855,12 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg, ...@@ -855,9 +855,12 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg,
} }
static int blkio_policy_parse_and_set(char *buf, static int blkio_policy_parse_and_set(char *buf,
struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid) struct blkio_policy_node *newpn,
enum blkio_policy_id plid, int fileid,
struct blkio_cgroup *blkcg)
{ {
struct gendisk *disk = NULL; struct gendisk *disk = NULL;
struct blkio_group *blkg = NULL;
char *s[4], *p, *major_s = NULL, *minor_s = NULL; char *s[4], *p, *major_s = NULL, *minor_s = NULL;
unsigned long major, minor; unsigned long major, minor;
int i = 0, ret = -EINVAL; int i = 0, ret = -EINVAL;
...@@ -903,11 +906,25 @@ static int blkio_policy_parse_and_set(char *buf, ...@@ -903,11 +906,25 @@ static int blkio_policy_parse_and_set(char *buf,
goto out; goto out;
/* For rule removal, do not check for device presence. */ /* For rule removal, do not check for device presence. */
if (temp) { disk = get_gendisk(dev, &part);
disk = get_gendisk(dev, &part);
if (!disk || part) { if ((!disk || part) && temp) {
ret = -ENODEV; ret = -ENODEV;
goto out; goto out;
}
rcu_read_lock();
if (disk && !part) {
spin_lock_irq(disk->queue->queue_lock);
blkg = blkg_lookup_create(blkcg, disk->queue, plid, false);
spin_unlock_irq(disk->queue->queue_lock);
if (IS_ERR(blkg)) {
ret = PTR_ERR(blkg);
if (ret == -EBUSY)
goto out_unlock;
blkg = NULL;
} }
} }
...@@ -917,25 +934,46 @@ static int blkio_policy_parse_and_set(char *buf, ...@@ -917,25 +934,46 @@ static int blkio_policy_parse_and_set(char *buf,
case BLKIO_POLICY_PROP: case BLKIO_POLICY_PROP:
if ((temp < BLKIO_WEIGHT_MIN && temp > 0) || if ((temp < BLKIO_WEIGHT_MIN && temp > 0) ||
temp > BLKIO_WEIGHT_MAX) temp > BLKIO_WEIGHT_MAX)
goto out; goto out_unlock;
newpn->plid = plid; newpn->plid = plid;
newpn->fileid = fileid; newpn->fileid = fileid;
newpn->val.weight = temp; newpn->val.weight = temp;
if (blkg)
blkg->conf.weight = temp;
break; break;
case BLKIO_POLICY_THROTL: case BLKIO_POLICY_THROTL:
switch(fileid) { switch(fileid) {
case BLKIO_THROTL_read_bps_device: case BLKIO_THROTL_read_bps_device:
if (blkg)
blkg->conf.bps[READ] = temp;
newpn->plid = plid;
newpn->fileid = fileid;
newpn->val.bps = temp;
break;
case BLKIO_THROTL_write_bps_device: case BLKIO_THROTL_write_bps_device:
if (blkg)
blkg->conf.bps[WRITE] = temp;
newpn->plid = plid; newpn->plid = plid;
newpn->fileid = fileid; newpn->fileid = fileid;
newpn->val.bps = temp; newpn->val.bps = temp;
break; break;
case BLKIO_THROTL_read_iops_device: case BLKIO_THROTL_read_iops_device:
if (temp > THROTL_IOPS_MAX)
goto out_unlock;
if (blkg)
blkg->conf.iops[READ] = temp;
newpn->plid = plid;
newpn->fileid = fileid;
newpn->val.iops = (unsigned int)temp;
break;
case BLKIO_THROTL_write_iops_device: case BLKIO_THROTL_write_iops_device:
if (temp > THROTL_IOPS_MAX) if (temp > THROTL_IOPS_MAX)
goto out; goto out_unlock;
if (blkg)
blkg->conf.iops[WRITE] = temp;
newpn->plid = plid; newpn->plid = plid;
newpn->fileid = fileid; newpn->fileid = fileid;
newpn->val.iops = (unsigned int)temp; newpn->val.iops = (unsigned int)temp;
...@@ -946,8 +984,21 @@ static int blkio_policy_parse_and_set(char *buf, ...@@ -946,8 +984,21 @@ static int blkio_policy_parse_and_set(char *buf,
BUG(); BUG();
} }
ret = 0; ret = 0;
out_unlock:
rcu_read_unlock();
out: out:
put_disk(disk); put_disk(disk);
/*
* If queue was bypassing, we should retry. Do so after a short
* msleep(). It isn't strictly necessary but queue can be
* bypassing for some time and it's always nice to avoid busy
* looping.
*/
if (ret == -EBUSY) {
msleep(10);
return restart_syscall();
}
return ret; return ret;
} }
...@@ -1095,26 +1146,29 @@ static void blkio_update_policy_rule(struct blkio_policy_node *oldpn, ...@@ -1095,26 +1146,29 @@ static void blkio_update_policy_rule(struct blkio_policy_node *oldpn,
static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg, static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg,
struct blkio_group *blkg, struct blkio_policy_node *pn) struct blkio_group *blkg, struct blkio_policy_node *pn)
{ {
unsigned int weight, iops; struct blkio_group_conf *conf = &blkg->conf;
u64 bps;
switch(pn->plid) { switch(pn->plid) {
case BLKIO_POLICY_PROP: case BLKIO_POLICY_PROP:
weight = pn->val.weight ? pn->val.weight : blkio_update_group_weight(blkg, conf->weight ?: blkcg->weight);
blkcg->weight;
blkio_update_group_weight(blkg, weight);
break; break;
case BLKIO_POLICY_THROTL: case BLKIO_POLICY_THROTL:
switch(pn->fileid) { switch(pn->fileid) {
case BLKIO_THROTL_read_bps_device: case BLKIO_THROTL_read_bps_device:
blkio_update_group_bps(blkg, conf->bps[READ] ?: -1,
pn->fileid);
break;
case BLKIO_THROTL_write_bps_device: case BLKIO_THROTL_write_bps_device:
bps = pn->val.bps ? pn->val.bps : (-1); blkio_update_group_bps(blkg, conf->bps[WRITE] ?: -1,
blkio_update_group_bps(blkg, bps, pn->fileid); pn->fileid);
break; break;
case BLKIO_THROTL_read_iops_device: case BLKIO_THROTL_read_iops_device:
blkio_update_group_iops(blkg, conf->iops[READ] ?: -1,
pn->fileid);
break;
case BLKIO_THROTL_write_iops_device: case BLKIO_THROTL_write_iops_device:
iops = pn->val.iops ? pn->val.iops : (-1); blkio_update_group_iops(blkg, conf->iops[WRITE] ?: -1,
blkio_update_group_iops(blkg, iops, pn->fileid); pn->fileid);
break; break;
} }
break; break;
...@@ -1152,7 +1206,7 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft, ...@@ -1152,7 +1206,7 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
int ret = 0; int ret = 0;
char *buf; char *buf;
struct blkio_policy_node *newpn, *pn; struct blkio_policy_node *newpn, *pn;
struct blkio_cgroup *blkcg; struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
int keep_newpn = 0; int keep_newpn = 0;
enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private); enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
int fileid = BLKIOFILE_ATTR(cft->private); int fileid = BLKIOFILE_ATTR(cft->private);
...@@ -1167,12 +1221,10 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft, ...@@ -1167,12 +1221,10 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
goto free_buf; goto free_buf;
} }
ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid); ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid, blkcg);
if (ret) if (ret)
goto free_newpn; goto free_newpn;
blkcg = cgroup_to_blkio_cgroup(cgrp);
spin_lock_irq(&blkcg->lock); spin_lock_irq(&blkcg->lock);
pn = blkio_policy_search_node(blkcg, newpn->dev, plid, fileid); pn = blkio_policy_search_node(blkcg, newpn->dev, plid, fileid);
......
...@@ -154,6 +154,12 @@ struct blkio_group_stats_cpu { ...@@ -154,6 +154,12 @@ struct blkio_group_stats_cpu {
struct u64_stats_sync syncp; struct u64_stats_sync syncp;
}; };
struct blkio_group_conf {
unsigned int weight;
unsigned int iops[2];
u64 bps[2];
};
struct blkio_group { struct blkio_group {
/* Pointer to the associated request_queue, RCU protected */ /* Pointer to the associated request_queue, RCU protected */
struct request_queue __rcu *q; struct request_queue __rcu *q;
...@@ -166,6 +172,9 @@ struct blkio_group { ...@@ -166,6 +172,9 @@ struct blkio_group {
/* policy which owns this blk group */ /* policy which owns this blk group */
enum blkio_policy_id plid; enum blkio_policy_id plid;
/* Configuration */
struct blkio_group_conf conf;
/* Need to serialize the stats in the case of reset/update */ /* Need to serialize the stats in the case of reset/update */
spinlock_t stats_lock; spinlock_t stats_lock;
struct blkio_group_stats stats; struct blkio_group_stats stats;
......
...@@ -196,10 +196,10 @@ static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q, ...@@ -196,10 +196,10 @@ static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q,
bio_list_init(&tg->bio_lists[1]); bio_list_init(&tg->bio_lists[1]);
tg->limits_changed = false; tg->limits_changed = false;
tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev); tg->bps[READ] = -1;
tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev); tg->bps[WRITE] = -1;
tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev); tg->iops[READ] = -1;
tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev); tg->iops[WRITE] = -1;
/* /*
* Take the initial reference that will be released on destroy * Take the initial reference that will be released on destroy
......
...@@ -1083,7 +1083,7 @@ static struct blkio_group *cfq_alloc_blkio_group(struct request_queue *q, ...@@ -1083,7 +1083,7 @@ static struct blkio_group *cfq_alloc_blkio_group(struct request_queue *q,
return NULL; return NULL;
cfq_init_cfqg_base(cfqg); cfq_init_cfqg_base(cfqg);
cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); cfqg->weight = blkcg->weight;
/* /*
* Take the initial reference that will be released on destroy * Take the initial reference that will be released on destroy
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment