Commit cd5ab1b0 authored by Shaohua Li's avatar Shaohua Li Committed by Jens Axboe

blk-throttle: add .low interface

Add low limit for cgroup and corresponding cgroup interface. To be
consistent with memcg, we allow users configure .low limit higher than
.max limit. But the internal logic always assumes .low limit is lower
than .max limit. So we add extra bps/iops_conf fields in throtl_grp for
userspace configuration. Old bps/iops fields in throtl_grp will be the
actual limit we use for throttling.
Signed-off-by: default avatarShaohua Li <shli@fb.com>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent 327ffb9b
...@@ -84,6 +84,7 @@ enum tg_state_flags { ...@@ -84,6 +84,7 @@ enum tg_state_flags {
#define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node) #define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node)
enum { enum {
LIMIT_LOW,
LIMIT_MAX, LIMIT_MAX,
LIMIT_CNT, LIMIT_CNT,
}; };
...@@ -124,11 +125,15 @@ struct throtl_grp { ...@@ -124,11 +125,15 @@ struct throtl_grp {
/* are there any throtl rules between this group and td? */ /* are there any throtl rules between this group and td? */
bool has_rules[2]; bool has_rules[2];
/* bytes per second rate limits */ /* internally used bytes per second rate limits */
uint64_t bps[2][LIMIT_CNT]; uint64_t bps[2][LIMIT_CNT];
/* user configured bps limits */
uint64_t bps_conf[2][LIMIT_CNT];
/* IOPS limits */ /* internally used IOPS limits */
unsigned int iops[2][LIMIT_CNT]; unsigned int iops[2][LIMIT_CNT];
/* user configured IOPS limits */
unsigned int iops_conf[2][LIMIT_CNT];
/* Number of bytes disptached in current slice */ /* Number of bytes disptached in current slice */
uint64_t bytes_disp[2]; uint64_t bytes_disp[2];
...@@ -355,6 +360,11 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node) ...@@ -355,6 +360,11 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
tg->bps[WRITE][LIMIT_MAX] = U64_MAX; tg->bps[WRITE][LIMIT_MAX] = U64_MAX;
tg->iops[READ][LIMIT_MAX] = UINT_MAX; tg->iops[READ][LIMIT_MAX] = UINT_MAX;
tg->iops[WRITE][LIMIT_MAX] = UINT_MAX; tg->iops[WRITE][LIMIT_MAX] = UINT_MAX;
tg->bps_conf[READ][LIMIT_MAX] = U64_MAX;
tg->bps_conf[WRITE][LIMIT_MAX] = U64_MAX;
tg->iops_conf[READ][LIMIT_MAX] = UINT_MAX;
tg->iops_conf[WRITE][LIMIT_MAX] = UINT_MAX;
/* LIMIT_LOW will have default value 0 */
return &tg->pd; return &tg->pd;
} }
...@@ -412,6 +422,41 @@ static void throtl_pd_online(struct blkg_policy_data *pd) ...@@ -412,6 +422,41 @@ static void throtl_pd_online(struct blkg_policy_data *pd)
tg_update_has_rules(pd_to_tg(pd)); tg_update_has_rules(pd_to_tg(pd));
} }
static void blk_throtl_update_limit_valid(struct throtl_data *td)
{
struct cgroup_subsys_state *pos_css;
struct blkcg_gq *blkg;
bool low_valid = false;
rcu_read_lock();
blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
struct throtl_grp *tg = blkg_to_tg(blkg);
if (tg->bps[READ][LIMIT_LOW] || tg->bps[WRITE][LIMIT_LOW] ||
tg->iops[READ][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW])
low_valid = true;
}
rcu_read_unlock();
td->limit_valid[LIMIT_LOW] = low_valid;
}
static void throtl_pd_offline(struct blkg_policy_data *pd)
{
struct throtl_grp *tg = pd_to_tg(pd);
tg->bps[READ][LIMIT_LOW] = 0;
tg->bps[WRITE][LIMIT_LOW] = 0;
tg->iops[READ][LIMIT_LOW] = 0;
tg->iops[WRITE][LIMIT_LOW] = 0;
blk_throtl_update_limit_valid(tg->td);
if (tg->td->limit_index == LIMIT_LOW &&
!tg->td->limit_valid[LIMIT_LOW])
tg->td->limit_index = LIMIT_MAX;
}
static void throtl_pd_free(struct blkg_policy_data *pd) static void throtl_pd_free(struct blkg_policy_data *pd)
{ {
struct throtl_grp *tg = pd_to_tg(pd); struct throtl_grp *tg = pd_to_tg(pd);
...@@ -1284,48 +1329,58 @@ static struct cftype throtl_legacy_files[] = { ...@@ -1284,48 +1329,58 @@ static struct cftype throtl_legacy_files[] = {
{ } /* terminate */ { } /* terminate */
}; };
static u64 tg_prfill_max(struct seq_file *sf, struct blkg_policy_data *pd, static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
int off) int off)
{ {
struct throtl_grp *tg = pd_to_tg(pd); struct throtl_grp *tg = pd_to_tg(pd);
const char *dname = blkg_dev_name(pd->blkg); const char *dname = blkg_dev_name(pd->blkg);
char bufs[4][21] = { "max", "max", "max", "max" }; char bufs[4][21] = { "max", "max", "max", "max" };
u64 bps_dft;
unsigned int iops_dft;
if (!dname) if (!dname)
return 0; return 0;
if (tg->bps[READ][LIMIT_MAX] == U64_MAX && if (off == LIMIT_LOW) {
tg->bps[WRITE][LIMIT_MAX] == U64_MAX && bps_dft = 0;
tg->iops[READ][LIMIT_MAX] == UINT_MAX && iops_dft = 0;
tg->iops[WRITE][LIMIT_MAX] == UINT_MAX) } else {
bps_dft = U64_MAX;
iops_dft = UINT_MAX;
}
if (tg->bps_conf[READ][off] == bps_dft &&
tg->bps_conf[WRITE][off] == bps_dft &&
tg->iops_conf[READ][off] == iops_dft &&
tg->iops_conf[WRITE][off] == iops_dft)
return 0; return 0;
if (tg->bps[READ][LIMIT_MAX] != U64_MAX) if (tg->bps_conf[READ][off] != bps_dft)
snprintf(bufs[0], sizeof(bufs[0]), "%llu", snprintf(bufs[0], sizeof(bufs[0]), "%llu",
tg->bps[READ][LIMIT_MAX]); tg->bps_conf[READ][off]);
if (tg->bps[WRITE][LIMIT_MAX] != U64_MAX) if (tg->bps_conf[WRITE][off] != bps_dft)
snprintf(bufs[1], sizeof(bufs[1]), "%llu", snprintf(bufs[1], sizeof(bufs[1]), "%llu",
tg->bps[WRITE][LIMIT_MAX]); tg->bps_conf[WRITE][off]);
if (tg->iops[READ][LIMIT_MAX] != UINT_MAX) if (tg->iops_conf[READ][off] != iops_dft)
snprintf(bufs[2], sizeof(bufs[2]), "%u", snprintf(bufs[2], sizeof(bufs[2]), "%u",
tg->iops[READ][LIMIT_MAX]); tg->iops_conf[READ][off]);
if (tg->iops[WRITE][LIMIT_MAX] != UINT_MAX) if (tg->iops_conf[WRITE][off] != iops_dft)
snprintf(bufs[3], sizeof(bufs[3]), "%u", snprintf(bufs[3], sizeof(bufs[3]), "%u",
tg->iops[WRITE][LIMIT_MAX]); tg->iops_conf[WRITE][off]);
seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s\n", seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s\n",
dname, bufs[0], bufs[1], bufs[2], bufs[3]); dname, bufs[0], bufs[1], bufs[2], bufs[3]);
return 0; return 0;
} }
static int tg_print_max(struct seq_file *sf, void *v) static int tg_print_limit(struct seq_file *sf, void *v)
{ {
blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_max, blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_limit,
&blkcg_policy_throtl, seq_cft(sf)->private, false); &blkcg_policy_throtl, seq_cft(sf)->private, false);
return 0; return 0;
} }
static ssize_t tg_set_max(struct kernfs_open_file *of, static ssize_t tg_set_limit(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off) char *buf, size_t nbytes, loff_t off)
{ {
struct blkcg *blkcg = css_to_blkcg(of_css(of)); struct blkcg *blkcg = css_to_blkcg(of_css(of));
...@@ -1333,6 +1388,7 @@ static ssize_t tg_set_max(struct kernfs_open_file *of, ...@@ -1333,6 +1388,7 @@ static ssize_t tg_set_max(struct kernfs_open_file *of,
struct throtl_grp *tg; struct throtl_grp *tg;
u64 v[4]; u64 v[4];
int ret; int ret;
int index = of_cft(of)->private;
ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx); ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx);
if (ret) if (ret)
...@@ -1340,10 +1396,10 @@ static ssize_t tg_set_max(struct kernfs_open_file *of, ...@@ -1340,10 +1396,10 @@ static ssize_t tg_set_max(struct kernfs_open_file *of,
tg = blkg_to_tg(ctx.blkg); tg = blkg_to_tg(ctx.blkg);
v[0] = tg->bps[READ][LIMIT_MAX]; v[0] = tg->bps_conf[READ][index];
v[1] = tg->bps[WRITE][LIMIT_MAX]; v[1] = tg->bps_conf[WRITE][index];
v[2] = tg->iops[READ][LIMIT_MAX]; v[2] = tg->iops_conf[READ][index];
v[3] = tg->iops[WRITE][LIMIT_MAX]; v[3] = tg->iops_conf[WRITE][index];
while (true) { while (true) {
char tok[27]; /* wiops=18446744073709551616 */ char tok[27]; /* wiops=18446744073709551616 */
...@@ -1380,11 +1436,31 @@ static ssize_t tg_set_max(struct kernfs_open_file *of, ...@@ -1380,11 +1436,31 @@ static ssize_t tg_set_max(struct kernfs_open_file *of,
goto out_finish; goto out_finish;
} }
tg->bps[READ][LIMIT_MAX] = v[0]; tg->bps_conf[READ][index] = v[0];
tg->bps[WRITE][LIMIT_MAX] = v[1]; tg->bps_conf[WRITE][index] = v[1];
tg->iops[READ][LIMIT_MAX] = v[2]; tg->iops_conf[READ][index] = v[2];
tg->iops[WRITE][LIMIT_MAX] = v[3]; tg->iops_conf[WRITE][index] = v[3];
if (index == LIMIT_MAX) {
tg->bps[READ][index] = v[0];
tg->bps[WRITE][index] = v[1];
tg->iops[READ][index] = v[2];
tg->iops[WRITE][index] = v[3];
}
tg->bps[READ][LIMIT_LOW] = min(tg->bps_conf[READ][LIMIT_LOW],
tg->bps_conf[READ][LIMIT_MAX]);
tg->bps[WRITE][LIMIT_LOW] = min(tg->bps_conf[WRITE][LIMIT_LOW],
tg->bps_conf[WRITE][LIMIT_MAX]);
tg->iops[READ][LIMIT_LOW] = min(tg->iops_conf[READ][LIMIT_LOW],
tg->iops_conf[READ][LIMIT_MAX]);
tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW],
tg->iops_conf[WRITE][LIMIT_MAX]);
if (index == LIMIT_LOW) {
blk_throtl_update_limit_valid(tg->td);
if (tg->td->limit_valid[LIMIT_LOW])
tg->td->limit_index = LIMIT_LOW;
}
tg_conf_updated(tg); tg_conf_updated(tg);
ret = 0; ret = 0;
out_finish: out_finish:
...@@ -1393,11 +1469,21 @@ static ssize_t tg_set_max(struct kernfs_open_file *of, ...@@ -1393,11 +1469,21 @@ static ssize_t tg_set_max(struct kernfs_open_file *of,
} }
static struct cftype throtl_files[] = { static struct cftype throtl_files[] = {
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
{
.name = "low",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = tg_print_limit,
.write = tg_set_limit,
.private = LIMIT_LOW,
},
#endif
{ {
.name = "max", .name = "max",
.flags = CFTYPE_NOT_ON_ROOT, .flags = CFTYPE_NOT_ON_ROOT,
.seq_show = tg_print_max, .seq_show = tg_print_limit,
.write = tg_set_max, .write = tg_set_limit,
.private = LIMIT_MAX,
}, },
{ } /* terminate */ { } /* terminate */
}; };
...@@ -1416,6 +1502,7 @@ static struct blkcg_policy blkcg_policy_throtl = { ...@@ -1416,6 +1502,7 @@ static struct blkcg_policy blkcg_policy_throtl = {
.pd_alloc_fn = throtl_pd_alloc, .pd_alloc_fn = throtl_pd_alloc,
.pd_init_fn = throtl_pd_init, .pd_init_fn = throtl_pd_init,
.pd_online_fn = throtl_pd_online, .pd_online_fn = throtl_pd_online,
.pd_offline_fn = throtl_pd_offline,
.pd_free_fn = throtl_pd_free, .pd_free_fn = throtl_pd_free,
}; };
...@@ -1595,6 +1682,7 @@ int blk_throtl_init(struct request_queue *q) ...@@ -1595,6 +1682,7 @@ int blk_throtl_init(struct request_queue *q)
td->queue = q; td->queue = q;
td->limit_valid[LIMIT_MAX] = true; td->limit_valid[LIMIT_MAX] = true;
td->limit_index = LIMIT_MAX;
/* activate policy */ /* activate policy */
ret = blkcg_activate_policy(q, &blkcg_policy_throtl); ret = blkcg_activate_policy(q, &blkcg_policy_throtl);
if (ret) if (ret)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment