Commit b4f428ef authored by Shaohua Li's avatar Shaohua Li Committed by Jens Axboe

blk-throttle: force user to configure all settings for io.low

Default value of io.low limit is 0. If user doesn't configure the limit,
last patch makes cgroup be throttled to very tiny bps/iops, which could
stall the system. A cgroup with default settings of io.low limit really
means nothing, so we force user to configure all settings, otherwise
io.low limit doesn't take effect. With this stragety, default setting of
latency/idle isn't important, so just set them to very conservative and
safe value.
Signed-off-by: default avatarShaohua Li <shli@fb.com>
Acked-by: default avatarTejun Heo <tj@kernel.org>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent 9bb67aeb
...@@ -22,13 +22,11 @@ static int throtl_quantum = 32; ...@@ -22,13 +22,11 @@ static int throtl_quantum = 32;
#define DFL_THROTL_SLICE_HD (HZ / 10) #define DFL_THROTL_SLICE_HD (HZ / 10)
#define DFL_THROTL_SLICE_SSD (HZ / 50) #define DFL_THROTL_SLICE_SSD (HZ / 50)
#define MAX_THROTL_SLICE (HZ) #define MAX_THROTL_SLICE (HZ)
#define DFL_IDLE_THRESHOLD_SSD (1000L) /* 1 ms */
#define DFL_IDLE_THRESHOLD_HD (100L * 1000) /* 100 ms */
#define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */ #define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
/* default latency target is 0, eg, guarantee IO latency by default */
#define DFL_LATENCY_TARGET (0)
#define MIN_THROTL_BPS (320 * 1024) #define MIN_THROTL_BPS (320 * 1024)
#define MIN_THROTL_IOPS (10) #define MIN_THROTL_IOPS (10)
#define DFL_LATENCY_TARGET (-1L)
#define DFL_IDLE_THRESHOLD (0)
#define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT) #define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
...@@ -205,8 +203,6 @@ struct throtl_data ...@@ -205,8 +203,6 @@ struct throtl_data
unsigned int limit_index; unsigned int limit_index;
bool limit_valid[LIMIT_CNT]; bool limit_valid[LIMIT_CNT];
unsigned long dft_idletime_threshold; /* us */
unsigned long low_upgrade_time; unsigned long low_upgrade_time;
unsigned long low_downgrade_time; unsigned long low_downgrade_time;
...@@ -500,6 +496,8 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node) ...@@ -500,6 +496,8 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
tg->latency_target = DFL_LATENCY_TARGET; tg->latency_target = DFL_LATENCY_TARGET;
tg->latency_target_conf = DFL_LATENCY_TARGET; tg->latency_target_conf = DFL_LATENCY_TARGET;
tg->idletime_threshold = DFL_IDLE_THRESHOLD;
tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
return &tg->pd; return &tg->pd;
} }
...@@ -528,9 +526,6 @@ static void throtl_pd_init(struct blkg_policy_data *pd) ...@@ -528,9 +526,6 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent) if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue; sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
tg->td = td; tg->td = td;
tg->idletime_threshold = td->dft_idletime_threshold;
tg->idletime_threshold_conf = td->dft_idletime_threshold;
} }
/* /*
...@@ -1534,7 +1529,7 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd, ...@@ -1534,7 +1529,7 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
tg->iops_conf[READ][off] == iops_dft && tg->iops_conf[READ][off] == iops_dft &&
tg->iops_conf[WRITE][off] == iops_dft && tg->iops_conf[WRITE][off] == iops_dft &&
(off != LIMIT_LOW || (off != LIMIT_LOW ||
(tg->idletime_threshold_conf == tg->td->dft_idletime_threshold && (tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD &&
tg->latency_target_conf == DFL_LATENCY_TARGET))) tg->latency_target_conf == DFL_LATENCY_TARGET)))
return 0; return 0;
...@@ -1660,16 +1655,31 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of, ...@@ -1660,16 +1655,31 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
tg->iops_conf[READ][LIMIT_MAX]); tg->iops_conf[READ][LIMIT_MAX]);
tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW], tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW],
tg->iops_conf[WRITE][LIMIT_MAX]); tg->iops_conf[WRITE][LIMIT_MAX]);
if (index == LIMIT_LOW) {
blk_throtl_update_limit_valid(tg->td);
if (tg->td->limit_valid[LIMIT_LOW])
tg->td->limit_index = LIMIT_LOW;
tg->idletime_threshold_conf = idle_time; tg->idletime_threshold_conf = idle_time;
tg->idletime_threshold = tg->idletime_threshold_conf;
tg->latency_target_conf = latency_time; tg->latency_target_conf = latency_time;
/* force user to configure all settings for low limit */
if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] ||
tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) ||
tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD ||
tg->latency_target_conf == DFL_LATENCY_TARGET) {
tg->bps[READ][LIMIT_LOW] = 0;
tg->bps[WRITE][LIMIT_LOW] = 0;
tg->iops[READ][LIMIT_LOW] = 0;
tg->iops[WRITE][LIMIT_LOW] = 0;
tg->idletime_threshold = DFL_IDLE_THRESHOLD;
tg->latency_target = DFL_LATENCY_TARGET;
} else if (index == LIMIT_LOW) {
tg->idletime_threshold = tg->idletime_threshold_conf;
tg->latency_target = tg->latency_target_conf; tg->latency_target = tg->latency_target_conf;
} }
blk_throtl_update_limit_valid(tg->td);
if (tg->td->limit_valid[LIMIT_LOW]) {
if (index == LIMIT_LOW)
tg->td->limit_index = LIMIT_LOW;
} else
tg->td->limit_index = LIMIT_MAX;
tg_conf_updated(tg, index == LIMIT_LOW && tg_conf_updated(tg, index == LIMIT_LOW &&
tg->td->limit_valid[LIMIT_LOW]); tg->td->limit_valid[LIMIT_LOW]);
ret = 0; ret = 0;
...@@ -1760,15 +1770,17 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg) ...@@ -1760,15 +1770,17 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg)
/* /*
* cgroup is idle if: * cgroup is idle if:
* - single idle is too long, longer than a fixed value (in case user * - single idle is too long, longer than a fixed value (in case user
* configure a too big threshold) or 4 times of slice * configure a too big threshold) or 4 times of idletime threshold
* - average think time is more than threshold * - average think time is more than threshold
* - IO latency is largely below threshold * - IO latency is largely below threshold
*/ */
unsigned long time = jiffies_to_usecs(4 * tg->td->throtl_slice); unsigned long time;
bool ret; bool ret;
time = min_t(unsigned long, MAX_IDLE_TIME, time); time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold);
ret = (ktime_get_ns() >> 10) - tg->last_finish_time > time || ret = tg->latency_target == DFL_LATENCY_TARGET ||
tg->idletime_threshold == DFL_IDLE_THRESHOLD ||
(ktime_get_ns() >> 10) - tg->last_finish_time > time ||
tg->avg_idletime > tg->idletime_threshold || tg->avg_idletime > tg->idletime_threshold ||
(tg->latency_target && tg->bio_cnt && (tg->latency_target && tg->bio_cnt &&
tg->bad_bio_cnt * 5 < tg->bio_cnt); tg->bad_bio_cnt * 5 < tg->bio_cnt);
...@@ -2405,19 +2417,14 @@ void blk_throtl_exit(struct request_queue *q) ...@@ -2405,19 +2417,14 @@ void blk_throtl_exit(struct request_queue *q)
void blk_throtl_register_queue(struct request_queue *q) void blk_throtl_register_queue(struct request_queue *q)
{ {
struct throtl_data *td; struct throtl_data *td;
struct cgroup_subsys_state *pos_css;
struct blkcg_gq *blkg;
td = q->td; td = q->td;
BUG_ON(!td); BUG_ON(!td);
if (blk_queue_nonrot(q)) { if (blk_queue_nonrot(q))
td->throtl_slice = DFL_THROTL_SLICE_SSD; td->throtl_slice = DFL_THROTL_SLICE_SSD;
td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_SSD; else
} else {
td->throtl_slice = DFL_THROTL_SLICE_HD; td->throtl_slice = DFL_THROTL_SLICE_HD;
td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_HD;
}
#ifndef CONFIG_BLK_DEV_THROTTLING_LOW #ifndef CONFIG_BLK_DEV_THROTTLING_LOW
/* if no low limit, use previous default */ /* if no low limit, use previous default */
td->throtl_slice = DFL_THROTL_SLICE_HD; td->throtl_slice = DFL_THROTL_SLICE_HD;
...@@ -2426,19 +2433,6 @@ void blk_throtl_register_queue(struct request_queue *q) ...@@ -2426,19 +2433,6 @@ void blk_throtl_register_queue(struct request_queue *q)
td->track_bio_latency = !q->mq_ops && !q->request_fn; td->track_bio_latency = !q->mq_ops && !q->request_fn;
if (!td->track_bio_latency) if (!td->track_bio_latency)
blk_stat_enable_accounting(q); blk_stat_enable_accounting(q);
/*
* some tg are created before queue is fully initialized, eg, nonrot
* isn't initialized yet
*/
rcu_read_lock();
blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
struct throtl_grp *tg = blkg_to_tg(blkg);
tg->idletime_threshold = td->dft_idletime_threshold;
tg->idletime_threshold_conf = td->dft_idletime_threshold;
}
rcu_read_unlock();
} }
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment