Commit 4e893ca8 authored by Stuart Hayes's avatar Stuart Hayes Committed by Keith Busch

nvme_core: scan namespaces asynchronously

Use async function calls to make namespace scanning happen in parallel.

Without the patch, NVME namespaces are scanned serially, so it can take
a long time for all of a controller's namespaces to become available,
especially with a slower (TCP) interface with large number of
namespaces.

It is not uncommon to have large numbers (hundreds or thousands) of
namespaces on nvme-of with storage servers.

The time it took for all namespaces to show up after connecting (via
TCP) to a controller with 1002 namespaces was measured on one system:

network latency   without patch   with patch
     0                 6s            1s
    50ms             210s           10s
   100ms             417s           18s

Measurements taken on another system show the effect of the patch on the
time nvme_scan_work() took to complete, when connecting to a linux
nvme-of target with varying numbers of namespaces, on a network of
400us.

namespaces    without patch   with patch
     1            16ms           14ms
     2            24ms           16ms
     4            49ms           22ms
     8           101ms           33ms
    16           207ms           56ms
   100           1.4s           0.6s
  1000          12.9s           2.0s

On the same system, connecting to a local PCIe NVMe drive (a Samsung
PM1733) instead of a network target:

namespaces    without patch   with patch
     1            13ms           12ms
     2            41ms           13ms
Signed-off-by: default avatarStuart Hayes <stuart.w.hayes@gmail.com>
Reviewed-by: default avatarSagi Grimberg <sagi@grimberg.me>
parent b2261de7
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
* Copyright (c) 2011-2014, Intel Corporation. * Copyright (c) 2011-2014, Intel Corporation.
*/ */
#include <linux/async.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/blk-mq.h> #include <linux/blk-mq.h>
#include <linux/blk-integrity.h> #include <linux/blk-integrity.h>
...@@ -4040,6 +4041,35 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid) ...@@ -4040,6 +4041,35 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
} }
} }
/**
* struct async_scan_info - keeps track of controller & NSIDs to scan
* @ctrl: Controller on which namespaces are being scanned
* @next_nsid: Index of next NSID to scan in ns_list
* @ns_list: Pointer to list of NSIDs to scan
*
* Note: There is a single async_scan_info structure shared by all instances
* of nvme_scan_ns_async() scanning a given controller, so the atomic
* operations on next_nsid are critical to ensure each instance scans a unique
* NSID.
*/
struct async_scan_info {
struct nvme_ctrl *ctrl;
atomic_t next_nsid;
__le32 *ns_list;
};
static void nvme_scan_ns_async(void *data, async_cookie_t cookie)
{
struct async_scan_info *scan_info = data;
int idx;
u32 nsid;
idx = (u32)atomic_fetch_inc(&scan_info->next_nsid);
nsid = le32_to_cpu(scan_info->ns_list[idx]);
nvme_scan_ns(scan_info->ctrl, nsid);
}
static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
unsigned nsid) unsigned nsid)
{ {
...@@ -4066,11 +4096,15 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) ...@@ -4066,11 +4096,15 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
__le32 *ns_list; __le32 *ns_list;
u32 prev = 0; u32 prev = 0;
int ret = 0, i; int ret = 0, i;
ASYNC_DOMAIN(domain);
struct async_scan_info scan_info;
ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL); ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
if (!ns_list) if (!ns_list)
return -ENOMEM; return -ENOMEM;
scan_info.ctrl = ctrl;
scan_info.ns_list = ns_list;
for (;;) { for (;;) {
struct nvme_command cmd = { struct nvme_command cmd = {
.identify.opcode = nvme_admin_identify, .identify.opcode = nvme_admin_identify,
...@@ -4086,19 +4120,23 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) ...@@ -4086,19 +4120,23 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
goto free; goto free;
} }
atomic_set(&scan_info.next_nsid, 0);
for (i = 0; i < nr_entries; i++) { for (i = 0; i < nr_entries; i++) {
u32 nsid = le32_to_cpu(ns_list[i]); u32 nsid = le32_to_cpu(ns_list[i]);
if (!nsid) /* end of the list? */ if (!nsid) /* end of the list? */
goto out; goto out;
nvme_scan_ns(ctrl, nsid); async_schedule_domain(nvme_scan_ns_async, &scan_info,
&domain);
while (++prev < nsid) while (++prev < nsid)
nvme_ns_remove_by_nsid(ctrl, prev); nvme_ns_remove_by_nsid(ctrl, prev);
} }
async_synchronize_full_domain(&domain);
} }
out: out:
nvme_remove_invalid_namespaces(ctrl, prev); nvme_remove_invalid_namespaces(ctrl, prev);
free: free:
async_synchronize_full_domain(&domain);
kfree(ns_list); kfree(ns_list);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment