Commit eff2c4f1 authored by Shaohua Li's avatar Shaohua Li Committed by Jens Axboe

nullb: bandwidth control

In test, we usually expect controllable disk speed. For example, in a
raid array, we'd like some disks are fast and some are slow. MD RAID
actually has a feature for this. To test the feature, we'd like to make
the disk run in specific speed.

block throttling probably can be used for this purpose, but it requires
cgroup setup. Here we just implement a simple throttling mechanism in
the driver. There is slight fluctuation in the mechanism, but it's good
enough for test.

To configure the bandwidth cap, user sets the 'mbps' attribute. mbps is
MB/s.

Based on original patch from Kyungchan Koh
Signed-off-by: default avatarKyungchan Koh <kkc6196@fb.com>
Signed-off-by: default avatarShaohua Li <shli@fb.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 306eb6b4
...@@ -23,6 +23,14 @@ ...@@ -23,6 +23,14 @@
#define FREE_BATCH 16 #define FREE_BATCH 16
#define TICKS_PER_SEC 50ULL
#define TIMER_INTERVAL (NSEC_PER_SEC / TICKS_PER_SEC)
static inline u64 mb_per_tick(int mbps)
{
return (1 << 20) / TICKS_PER_SEC * ((u64) mbps);
}
struct nullb_cmd { struct nullb_cmd {
struct list_head list; struct list_head list;
struct llist_node ll_list; struct llist_node ll_list;
...@@ -49,10 +57,12 @@ struct nullb_queue { ...@@ -49,10 +57,12 @@ struct nullb_queue {
* *
* CONFIGURED: Device has been configured and turned on. Cannot reconfigure. * CONFIGURED: Device has been configured and turned on. Cannot reconfigure.
* UP: Device is currently on and visible in userspace. * UP: Device is currently on and visible in userspace.
* THROTTLED: Device is being throttled.
*/ */
enum nullb_device_flags { enum nullb_device_flags {
NULLB_DEV_FL_CONFIGURED = 0, NULLB_DEV_FL_CONFIGURED = 0,
NULLB_DEV_FL_UP = 1, NULLB_DEV_FL_UP = 1,
NULLB_DEV_FL_THROTTLED = 2,
}; };
/* /*
...@@ -83,6 +93,7 @@ struct nullb_device { ...@@ -83,6 +93,7 @@ struct nullb_device {
unsigned int irqmode; /* IRQ completion handler */ unsigned int irqmode; /* IRQ completion handler */
unsigned int hw_queue_depth; /* queue depth */ unsigned int hw_queue_depth; /* queue depth */
unsigned int index; /* index of the disk, only valid with a disk */ unsigned int index; /* index of the disk, only valid with a disk */
unsigned int mbps; /* Bandwidth throttle cap (in MB/s) */
bool use_lightnvm; /* register as a LightNVM device */ bool use_lightnvm; /* register as a LightNVM device */
bool blocking; /* blocking blk-mq device */ bool blocking; /* blocking blk-mq device */
bool use_per_node_hctx; /* use per-node allocation for hardware context */ bool use_per_node_hctx; /* use per-node allocation for hardware context */
...@@ -100,8 +111,9 @@ struct nullb { ...@@ -100,8 +111,9 @@ struct nullb {
struct nvm_dev *ndev; struct nvm_dev *ndev;
struct blk_mq_tag_set *tag_set; struct blk_mq_tag_set *tag_set;
struct blk_mq_tag_set __tag_set; struct blk_mq_tag_set __tag_set;
struct hrtimer timer;
unsigned int queue_depth; unsigned int queue_depth;
atomic_long_t cur_bytes;
struct hrtimer bw_timer;
spinlock_t lock; spinlock_t lock;
struct nullb_queue *queues; struct nullb_queue *queues;
...@@ -320,6 +332,7 @@ NULLB_DEVICE_ATTR(blocking, bool); ...@@ -320,6 +332,7 @@ NULLB_DEVICE_ATTR(blocking, bool);
NULLB_DEVICE_ATTR(use_per_node_hctx, bool); NULLB_DEVICE_ATTR(use_per_node_hctx, bool);
NULLB_DEVICE_ATTR(memory_backed, bool); NULLB_DEVICE_ATTR(memory_backed, bool);
NULLB_DEVICE_ATTR(discard, bool); NULLB_DEVICE_ATTR(discard, bool);
NULLB_DEVICE_ATTR(mbps, uint);
static ssize_t nullb_device_power_show(struct config_item *item, char *page) static ssize_t nullb_device_power_show(struct config_item *item, char *page)
{ {
...@@ -376,6 +389,7 @@ static struct configfs_attribute *nullb_device_attrs[] = { ...@@ -376,6 +389,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_power, &nullb_device_attr_power,
&nullb_device_attr_memory_backed, &nullb_device_attr_memory_backed,
&nullb_device_attr_discard, &nullb_device_attr_discard,
&nullb_device_attr_mbps,
NULL, NULL,
}; };
...@@ -428,7 +442,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item) ...@@ -428,7 +442,7 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
static ssize_t memb_group_features_show(struct config_item *item, char *page) static ssize_t memb_group_features_show(struct config_item *item, char *page)
{ {
return snprintf(page, PAGE_SIZE, "memory_backed,discard\n"); return snprintf(page, PAGE_SIZE, "memory_backed,discard,bandwidth\n");
} }
CONFIGFS_ATTR_RO(memb_group_, features); CONFIGFS_ATTR_RO(memb_group_, features);
...@@ -914,11 +928,65 @@ static int null_handle_bio(struct nullb_cmd *cmd) ...@@ -914,11 +928,65 @@ static int null_handle_bio(struct nullb_cmd *cmd)
return 0; return 0;
} }
static void null_stop_queue(struct nullb *nullb)
{
struct request_queue *q = nullb->q;
if (nullb->dev->queue_mode == NULL_Q_MQ)
blk_mq_stop_hw_queues(q);
else {
spin_lock_irq(q->queue_lock);
blk_stop_queue(q);
spin_unlock_irq(q->queue_lock);
}
}
static void null_restart_queue_async(struct nullb *nullb)
{
struct request_queue *q = nullb->q;
unsigned long flags;
if (nullb->dev->queue_mode == NULL_Q_MQ)
blk_mq_start_stopped_hw_queues(q, true);
else {
spin_lock_irqsave(q->queue_lock, flags);
blk_start_queue_async(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
}
static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
{ {
struct nullb_device *dev = cmd->nq->dev; struct nullb_device *dev = cmd->nq->dev;
struct nullb *nullb = dev->nullb;
int err = 0; int err = 0;
if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
struct request *rq = cmd->rq;
if (!hrtimer_active(&nullb->bw_timer))
hrtimer_restart(&nullb->bw_timer);
if (atomic_long_sub_return(blk_rq_bytes(rq),
&nullb->cur_bytes) < 0) {
null_stop_queue(nullb);
/* race with timer */
if (atomic_long_read(&nullb->cur_bytes) > 0)
null_restart_queue_async(nullb);
if (dev->queue_mode == NULL_Q_RQ) {
struct request_queue *q = nullb->q;
spin_lock_irq(q->queue_lock);
rq->rq_flags |= RQF_DONTPREP;
blk_requeue_request(q, rq);
spin_unlock_irq(q->queue_lock);
return BLK_STS_OK;
} else
/* requeue request */
return BLK_STS_RESOURCE;
}
}
if (dev->memory_backed) { if (dev->memory_backed) {
if (dev->queue_mode == NULL_Q_BIO) if (dev->queue_mode == NULL_Q_BIO)
err = null_handle_bio(cmd); err = null_handle_bio(cmd);
...@@ -954,6 +1022,33 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) ...@@ -954,6 +1022,33 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
return BLK_STS_OK; return BLK_STS_OK;
} }
static enum hrtimer_restart nullb_bwtimer_fn(struct hrtimer *timer)
{
struct nullb *nullb = container_of(timer, struct nullb, bw_timer);
ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
unsigned int mbps = nullb->dev->mbps;
if (atomic_long_read(&nullb->cur_bytes) == mb_per_tick(mbps))
return HRTIMER_NORESTART;
atomic_long_set(&nullb->cur_bytes, mb_per_tick(mbps));
null_restart_queue_async(nullb);
hrtimer_forward_now(&nullb->bw_timer, timer_interval);
return HRTIMER_RESTART;
}
static void nullb_setup_bwtimer(struct nullb *nullb)
{
ktime_t timer_interval = ktime_set(0, TIMER_INTERVAL);
hrtimer_init(&nullb->bw_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
nullb->bw_timer.function = nullb_bwtimer_fn;
atomic_long_set(&nullb->cur_bytes, mb_per_tick(nullb->dev->mbps));
hrtimer_start(&nullb->bw_timer, timer_interval, HRTIMER_MODE_REL);
}
static struct nullb_queue *nullb_to_queue(struct nullb *nullb) static struct nullb_queue *nullb_to_queue(struct nullb *nullb)
{ {
int index = 0; int index = 0;
...@@ -1224,6 +1319,13 @@ static void null_del_dev(struct nullb *nullb) ...@@ -1224,6 +1319,13 @@ static void null_del_dev(struct nullb *nullb)
null_nvm_unregister(nullb); null_nvm_unregister(nullb);
else else
del_gendisk(nullb->disk); del_gendisk(nullb->disk);
if (test_bit(NULLB_DEV_FL_THROTTLED, &nullb->dev->flags)) {
hrtimer_cancel(&nullb->bw_timer);
atomic_long_set(&nullb->cur_bytes, LONG_MAX);
null_restart_queue_async(nullb);
}
blk_cleanup_queue(nullb->q); blk_cleanup_queue(nullb->q);
if (dev->queue_mode == NULL_Q_MQ && if (dev->queue_mode == NULL_Q_MQ &&
nullb->tag_set == &nullb->__tag_set) nullb->tag_set == &nullb->__tag_set)
...@@ -1409,6 +1511,11 @@ static void null_validate_conf(struct nullb_device *dev) ...@@ -1409,6 +1511,11 @@ static void null_validate_conf(struct nullb_device *dev)
/* Do memory allocation, so set blocking */ /* Do memory allocation, so set blocking */
if (dev->memory_backed) if (dev->memory_backed)
dev->blocking = true; dev->blocking = true;
dev->mbps = min_t(unsigned int, 1024 * 40, dev->mbps);
/* can not stop a queue */
if (dev->queue_mode == NULL_Q_BIO)
dev->mbps = 0;
} }
static int null_add_dev(struct nullb_device *dev) static int null_add_dev(struct nullb_device *dev)
...@@ -1474,6 +1581,11 @@ static int null_add_dev(struct nullb_device *dev) ...@@ -1474,6 +1581,11 @@ static int null_add_dev(struct nullb_device *dev)
goto out_cleanup_blk_queue; goto out_cleanup_blk_queue;
} }
if (dev->mbps) {
set_bit(NULLB_DEV_FL_THROTTLED, &dev->flags);
nullb_setup_bwtimer(nullb);
}
nullb->q->queuedata = nullb; nullb->q->queuedata = nullb;
queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q);
queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q); queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment