Commit 0034af03 authored by Jens Axboe's avatar Jens Axboe

block: make /sys/block/<dev>/queue/discard_max_bytes writeable

Lots of devices support huge discard sizes these days. Depending
on how the device handles them internally, huge discards can
introduce massive latencies (hundreds of msec) on the device side.

We have a sysfs file, discard_max_bytes, that advertises the max
hardware supported discard size. Make this writeable, and split
the settings into a soft and hard limit. This can be set from
'discard_granularity' and up to the hardware limit.

Add a new sysfs file, 'discard_max_hw_bytes', that shows the hw
set limit.
Reviewed-by: default avatarJeff Moyer <jmoyer@redhat.com>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent 2bb4cd5c
...@@ -20,7 +20,7 @@ This shows the size of internal allocation of the device in bytes, if ...@@ -20,7 +20,7 @@ This shows the size of internal allocation of the device in bytes, if
reported by the device. A value of '0' means device does not support reported by the device. A value of '0' means device does not support
the discard functionality. the discard functionality.
discard_max_bytes (RO) discard_max_hw_bytes (RO)
---------------------- ----------------------
Devices that support discard functionality may have internal limits on Devices that support discard functionality may have internal limits on
the number of bytes that can be trimmed or unmapped in a single operation. the number of bytes that can be trimmed or unmapped in a single operation.
...@@ -29,6 +29,14 @@ number of bytes that can be discarded in a single operation. Discard ...@@ -29,6 +29,14 @@ number of bytes that can be discarded in a single operation. Discard
requests issued to the device must not exceed this limit. A discard_max_bytes requests issued to the device must not exceed this limit. A discard_max_bytes
value of 0 means that the device does not support discard functionality. value of 0 means that the device does not support discard functionality.
discard_max_bytes (RW)
----------------------
While discard_max_hw_bytes is the hardware limit for the device, this
setting is the software limit. Some devices exhibit large latencies when
large discards are issued, setting this value lower will make Linux issue
smaller discards and potentially help reduce latencies induced by large
discard operations.
discard_zeroes_data (RO) discard_zeroes_data (RO)
------------------------ ------------------------
When read, this file will show if the discarded block are zeroed by the When read, this file will show if the discarded block are zeroed by the
......
...@@ -116,6 +116,7 @@ void blk_set_default_limits(struct queue_limits *lim) ...@@ -116,6 +116,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->chunk_sectors = 0; lim->chunk_sectors = 0;
lim->max_write_same_sectors = 0; lim->max_write_same_sectors = 0;
lim->max_discard_sectors = 0; lim->max_discard_sectors = 0;
lim->max_hw_discard_sectors = 0;
lim->discard_granularity = 0; lim->discard_granularity = 0;
lim->discard_alignment = 0; lim->discard_alignment = 0;
lim->discard_misaligned = 0; lim->discard_misaligned = 0;
...@@ -303,6 +304,7 @@ EXPORT_SYMBOL(blk_queue_chunk_sectors); ...@@ -303,6 +304,7 @@ EXPORT_SYMBOL(blk_queue_chunk_sectors);
void blk_queue_max_discard_sectors(struct request_queue *q, void blk_queue_max_discard_sectors(struct request_queue *q,
unsigned int max_discard_sectors) unsigned int max_discard_sectors)
{ {
q->limits.max_hw_discard_sectors = max_discard_sectors;
q->limits.max_discard_sectors = max_discard_sectors; q->limits.max_discard_sectors = max_discard_sectors;
} }
EXPORT_SYMBOL(blk_queue_max_discard_sectors); EXPORT_SYMBOL(blk_queue_max_discard_sectors);
...@@ -641,6 +643,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, ...@@ -641,6 +643,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_discard_sectors = min_not_zero(t->max_discard_sectors, t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
b->max_discard_sectors); b->max_discard_sectors);
t->max_hw_discard_sectors = min_not_zero(t->max_hw_discard_sectors,
b->max_hw_discard_sectors);
t->discard_granularity = max(t->discard_granularity, t->discard_granularity = max(t->discard_granularity,
b->discard_granularity); b->discard_granularity);
t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) % t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) %
......
...@@ -145,12 +145,43 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag ...@@ -145,12 +145,43 @@ static ssize_t queue_discard_granularity_show(struct request_queue *q, char *pag
return queue_var_show(q->limits.discard_granularity, page); return queue_var_show(q->limits.discard_granularity, page);
} }
static ssize_t queue_discard_max_hw_show(struct request_queue *q, char *page)
{
unsigned long long val;
val = q->limits.max_hw_discard_sectors << 9;
return sprintf(page, "%llu\n", val);
}
static ssize_t queue_discard_max_show(struct request_queue *q, char *page) static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
{ {
return sprintf(page, "%llu\n", return sprintf(page, "%llu\n",
(unsigned long long)q->limits.max_discard_sectors << 9); (unsigned long long)q->limits.max_discard_sectors << 9);
} }
static ssize_t queue_discard_max_store(struct request_queue *q,
const char *page, size_t count)
{
unsigned long max_discard;
ssize_t ret = queue_var_store(&max_discard, page, count);
if (ret < 0)
return ret;
if (max_discard & (q->limits.discard_granularity - 1))
return -EINVAL;
max_discard >>= 9;
if (max_discard > UINT_MAX)
return -EINVAL;
if (max_discard > q->limits.max_hw_discard_sectors)
max_discard = q->limits.max_hw_discard_sectors;
q->limits.max_discard_sectors = max_discard;
return ret;
}
static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page) static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
{ {
return queue_var_show(queue_discard_zeroes_data(q), page); return queue_var_show(queue_discard_zeroes_data(q), page);
...@@ -360,9 +391,15 @@ static struct queue_sysfs_entry queue_discard_granularity_entry = { ...@@ -360,9 +391,15 @@ static struct queue_sysfs_entry queue_discard_granularity_entry = {
.show = queue_discard_granularity_show, .show = queue_discard_granularity_show,
}; };
static struct queue_sysfs_entry queue_discard_max_hw_entry = {
.attr = {.name = "discard_max_hw_bytes", .mode = S_IRUGO },
.show = queue_discard_max_hw_show,
};
static struct queue_sysfs_entry queue_discard_max_entry = { static struct queue_sysfs_entry queue_discard_max_entry = {
.attr = {.name = "discard_max_bytes", .mode = S_IRUGO }, .attr = {.name = "discard_max_bytes", .mode = S_IRUGO | S_IWUSR },
.show = queue_discard_max_show, .show = queue_discard_max_show,
.store = queue_discard_max_store,
}; };
static struct queue_sysfs_entry queue_discard_zeroes_data_entry = { static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
...@@ -421,6 +458,7 @@ static struct attribute *default_attrs[] = { ...@@ -421,6 +458,7 @@ static struct attribute *default_attrs[] = {
&queue_io_opt_entry.attr, &queue_io_opt_entry.attr,
&queue_discard_granularity_entry.attr, &queue_discard_granularity_entry.attr,
&queue_discard_max_entry.attr, &queue_discard_max_entry.attr,
&queue_discard_max_hw_entry.attr,
&queue_discard_zeroes_data_entry.attr, &queue_discard_zeroes_data_entry.attr,
&queue_write_same_max_entry.attr, &queue_write_same_max_entry.attr,
&queue_nonrot_entry.attr, &queue_nonrot_entry.attr,
......
...@@ -268,6 +268,7 @@ struct queue_limits { ...@@ -268,6 +268,7 @@ struct queue_limits {
unsigned int io_min; unsigned int io_min;
unsigned int io_opt; unsigned int io_opt;
unsigned int max_discard_sectors; unsigned int max_discard_sectors;
unsigned int max_hw_discard_sectors;
unsigned int max_write_same_sectors; unsigned int max_write_same_sectors;
unsigned int discard_granularity; unsigned int discard_granularity;
unsigned int discard_alignment; unsigned int discard_alignment;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment