Commit 81adb863 authored by Bart Van Assche's avatar Bart Van Assche Committed by Christoph Hellwig

nvme: set physical block size and optimal I/O size

>From the NVMe 1.4 spec:

NSFEAT bit 4 if set to 1: indicates that the fields NPWG, NPWA, NPDG, NPDA,
and NOWS are defined for this namespace and should be used by the host for
I/O optimization;
[ ... ]
Namespace Preferred Write Granularity (NPWG): This field indicates the
smallest recommended write granularity in logical blocks for this namespace.
This is a 0's based value. The size indicated should be less than or equal
to Maximum Data Transfer Size (MDTS) that is specified in units of minimum
memory page size. The value of this field may change if the namespace is
reformatted. The size should be a multiple of Namespace Preferred Write
Alignment (NPWA). Refer to section 8.25 for how this field is utilized to
improve performance and endurance.
[ ... ]
Each Write, Write Uncorrectable, or Write Zeroes commands should address a
multiple of Namespace Preferred Write Granularity (NPWG) (refer to Figure
245) and Stream Write Size (SWS) (refer to Figure 515) logical blocks (as
expressed in the NLB field), and the SLBA field of the command should be
aligned to Namespace Preferred Write Alignment (NPWA) (refer to Figure 245)
for best performance.
Signed-off-by: default avatarBart Van Assche <bvanassche@acm.org>
Reviewed-by: default avatarHannes Reinecke <hare@suse.com>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
parent 6605bdd5
......@@ -1626,6 +1626,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
{
sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9);
unsigned short bs = 1 << ns->lba_shift;
u32 atomic_bs, phys_bs, io_opt;
if (ns->lba_shift > PAGE_SHIFT) {
/* unsupported block size, set capacity to 0 later */
......@@ -1634,9 +1635,37 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_mq_freeze_queue(disk->queue);
blk_integrity_unregister(disk);
if (id->nabo == 0) {
/*
* Bit 1 indicates whether NAWUPF is defined for this namespace
* and whether it should be used instead of AWUPF. If NAWUPF ==
* 0 then AWUPF must be used instead.
*/
if (id->nsfeat & (1 << 1) && id->nawupf)
atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
else
atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
} else {
atomic_bs = bs;
}
phys_bs = bs;
io_opt = bs;
if (id->nsfeat & (1 << 4)) {
/* NPWG = Namespace Preferred Write Granularity */
phys_bs *= 1 + le16_to_cpu(id->npwg);
/* NOWS = Namespace Optimal Write Size */
io_opt *= 1 + le16_to_cpu(id->nows);
}
blk_queue_logical_block_size(disk->queue, bs);
blk_queue_physical_block_size(disk->queue, bs);
blk_queue_io_min(disk->queue, bs);
/*
* Linux filesystems assume writing a single physical block is
* an atomic operation. Hence limit the physical block size to the
* value of the Atomic Write Unit Power Fail parameter.
*/
blk_queue_physical_block_size(disk->queue, min(phys_bs, atomic_bs));
blk_queue_io_min(disk->queue, phys_bs);
blk_queue_io_opt(disk->queue, io_opt);
if (ns->ms && !ns->ext &&
(ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
......@@ -2433,6 +2462,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev));
subsys->vendor_id = le16_to_cpu(id->vid);
subsys->cmic = id->cmic;
subsys->awupf = le16_to_cpu(id->awupf);
#ifdef CONFIG_NVME_MULTIPATH
subsys->iopolicy = NVME_IOPOLICY_NUMA;
#endif
......
......@@ -283,6 +283,7 @@ struct nvme_subsystem {
char firmware_rev[8];
u8 cmic;
u16 vendor_id;
u16 awupf; /* 0's based awupf value. */
struct ida ns_ida;
#ifdef CONFIG_NVME_MULTIPATH
enum nvme_iopolicy iopolicy;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment