Commit a4aea562 authored by Matias Bjørling's avatar Matias Bjørling Committed by Jens Axboe

NVMe: Convert to blk-mq

This converts the NVMe driver to a blk-mq request-based driver.

The NVMe driver is currently bio-based and implements queue logic within
itself.  By using blk-mq, a lot of these responsibilities can be moved
and simplified.

The patch is divided into the following blocks:

 * Per-command data and cmdid have been moved into the struct request
   field. The cmdid_data can be retrieved using blk_mq_rq_to_pdu() and id
   maintenance are now handled by blk-mq through the rq->tag field.

 * The logic for splitting bio's has been moved into the blk-mq layer.
   The driver instead notifies the block layer about limited gap support in
   SG lists.

 * blk-mq handles timeouts and is reimplemented within nvme_timeout().
   This both includes abort handling and command cancelation.

 * Assignment of nvme queues to CPUs are replaced with the blk-mq
   version. The current blk-mq strategy is to assign the number of
   mapped queues and CPUs to provide synergy, while the nvme driver
   assign as many nvme hw queues as possible. This can be implemented in
   blk-mq if needed.

 * NVMe queues are merged with the tags structure of blk-mq.

 * blk-mq takes care of setup/teardown of nvme queues and guards invalid
   accesses. Therefore, RCU-usage for nvme queues can be removed.

 * IO tracing and accounting are handled by blk-mq and therefore removed.

 * Queue suspension logic is replaced with the logic from the block
   layer.

Contributions in this patch from:

  Sam Bradshaw <sbradshaw@micron.com>
  Jens Axboe <axboe@fb.com>
  Keith Busch <keith.busch@intel.com>
  Robert Nelson <rlnelson@google.com>
Acked-by: default avatarKeith Busch <keith.busch@intel.com>
Acked-by: default avatarJens Axboe <axboe@fb.com>

Updated for new ->queue_rq() prototype.
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent 9dbbfab7
This diff is collapsed.
...@@ -2105,7 +2105,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, ...@@ -2105,7 +2105,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
nvme_offset += unit_num_blocks; nvme_offset += unit_num_blocks;
nvme_sc = nvme_submit_io_cmd(dev, &c, NULL); nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL);
if (nvme_sc != NVME_SC_SUCCESS) { if (nvme_sc != NVME_SC_SUCCESS) {
nvme_unmap_user_pages(dev, nvme_unmap_user_pages(dev,
(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE, (is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
...@@ -2658,7 +2658,7 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr, ...@@ -2658,7 +2658,7 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
c.common.opcode = nvme_cmd_flush; c.common.opcode = nvme_cmd_flush;
c.common.nsid = cpu_to_le32(ns->ns_id); c.common.nsid = cpu_to_le32(ns->ns_id);
nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL); nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL);
res = nvme_trans_status_code(hdr, nvme_sc); res = nvme_trans_status_code(hdr, nvme_sc);
if (res) if (res)
goto out; goto out;
...@@ -2686,7 +2686,7 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns, ...@@ -2686,7 +2686,7 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
c.common.opcode = nvme_cmd_flush; c.common.opcode = nvme_cmd_flush;
c.common.nsid = cpu_to_le32(ns->ns_id); c.common.nsid = cpu_to_le32(ns->ns_id);
nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL); nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL);
res = nvme_trans_status_code(hdr, nvme_sc); res = nvme_trans_status_code(hdr, nvme_sc);
if (res) if (res)
...@@ -2894,7 +2894,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr, ...@@ -2894,7 +2894,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
c.dsm.nr = cpu_to_le32(ndesc - 1); c.dsm.nr = cpu_to_le32(ndesc - 1);
c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
nvme_sc = nvme_submit_io_cmd(dev, &c, NULL); nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL);
res = nvme_trans_status_code(hdr, nvme_sc); res = nvme_trans_status_code(hdr, nvme_sc);
dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range), dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range),
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/miscdevice.h> #include <linux/miscdevice.h>
#include <linux/kref.h> #include <linux/kref.h>
#include <linux/blk-mq.h>
struct nvme_bar { struct nvme_bar {
__u64 cap; /* Controller Capabilities */ __u64 cap; /* Controller Capabilities */
...@@ -71,8 +72,10 @@ extern unsigned char nvme_io_timeout; ...@@ -71,8 +72,10 @@ extern unsigned char nvme_io_timeout;
*/ */
struct nvme_dev { struct nvme_dev {
struct list_head node; struct list_head node;
struct nvme_queue __rcu **queues; struct nvme_queue **queues;
unsigned short __percpu *io_queue; struct request_queue *admin_q;
struct blk_mq_tag_set tagset;
struct blk_mq_tag_set admin_tagset;
u32 __iomem *dbs; u32 __iomem *dbs;
struct pci_dev *pci_dev; struct pci_dev *pci_dev;
struct dma_pool *prp_page_pool; struct dma_pool *prp_page_pool;
...@@ -91,7 +94,6 @@ struct nvme_dev { ...@@ -91,7 +94,6 @@ struct nvme_dev {
struct miscdevice miscdev; struct miscdevice miscdev;
work_func_t reset_workfn; work_func_t reset_workfn;
struct work_struct reset_work; struct work_struct reset_work;
struct work_struct cpu_work;
char name[12]; char name[12];
char serial[20]; char serial[20];
char model[40]; char model[40];
...@@ -135,7 +137,6 @@ struct nvme_iod { ...@@ -135,7 +137,6 @@ struct nvme_iod {
int offset; /* Of PRP list */ int offset; /* Of PRP list */
int nents; /* Used in scatterlist */ int nents; /* Used in scatterlist */
int length; /* Of data, in bytes */ int length; /* Of data, in bytes */
unsigned long start_time;
dma_addr_t first_dma; dma_addr_t first_dma;
struct list_head node; struct list_head node;
struct scatterlist sg[0]; struct scatterlist sg[0];
...@@ -153,12 +154,14 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) ...@@ -153,12 +154,14 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
*/ */
void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod); void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod);
int nvme_setup_prps(struct nvme_dev *, struct nvme_iod *, int , gfp_t); int nvme_setup_prps(struct nvme_dev *, struct nvme_iod *, int, gfp_t);
struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
unsigned long addr, unsigned length); unsigned long addr, unsigned length);
void nvme_unmap_user_pages(struct nvme_dev *dev, int write, void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
struct nvme_iod *iod); struct nvme_iod *iod);
int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_command *, u32 *); int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_ns *,
struct nvme_command *, u32 *);
int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns);
int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *, int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *,
u32 *result); u32 *result);
int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns, int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment