Commit f87b0f0d authored by Jens Axboe's avatar Jens Axboe

Merge branch 'nvme-4.19' of git://git.infradead.org/nvme into for-4.19/block2

Pull NVMe changes from Christoph:

"This contains the support for TP4004, Asymmetric Namespace Access,
 which makes NVMe multipathing usable in practice."

* 'nvme-4.19' of git://git.infradead.org/nvme:
  nvmet: use Retain Async Event bit to clear AEN
  nvmet: support configuring ANA groups
  nvmet: add minimal ANA support
  nvmet: track and limit the number of namespaces per subsystem
  nvmet: keep a port pointer in nvmet_ctrl
  nvme: add ANA support
  nvme: remove nvme_req_needs_failover
  nvme: simplify the API for getting log pages
  nvme.h: add ANA definitions
  nvme.h: add support for the log specific field
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parents 05b9ba4b b369b30c
...@@ -252,7 +252,8 @@ void nvme_complete_rq(struct request *req) ...@@ -252,7 +252,8 @@ void nvme_complete_rq(struct request *req)
trace_nvme_complete_rq(req); trace_nvme_complete_rq(req);
if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) { if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
if (nvme_req_needs_failover(req, status)) { if ((req->cmd_flags & REQ_NVME_MPATH) &&
blk_path_error(status)) {
nvme_failover_req(req); nvme_failover_req(req);
return; return;
} }
...@@ -1067,7 +1068,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count) ...@@ -1067,7 +1068,7 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
EXPORT_SYMBOL_GPL(nvme_set_queue_count); EXPORT_SYMBOL_GPL(nvme_set_queue_count);
#define NVME_AEN_SUPPORTED \ #define NVME_AEN_SUPPORTED \
(NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT) (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_FW_ACT | NVME_AEN_CFG_ANA_CHANGE)
static void nvme_enable_aen(struct nvme_ctrl *ctrl) static void nvme_enable_aen(struct nvme_ctrl *ctrl)
{ {
...@@ -2281,21 +2282,16 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) ...@@ -2281,21 +2282,16 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
return ret; return ret;
} }
int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns, int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
u8 log_page, void *log, void *log, size_t size, u64 offset)
size_t size, u64 offset)
{ {
struct nvme_command c = { }; struct nvme_command c = { };
unsigned long dwlen = size / 4 - 1; unsigned long dwlen = size / 4 - 1;
c.get_log_page.opcode = nvme_admin_get_log_page; c.get_log_page.opcode = nvme_admin_get_log_page;
c.get_log_page.nsid = cpu_to_le32(nsid);
if (ns)
c.get_log_page.nsid = cpu_to_le32(ns->head->ns_id);
else
c.get_log_page.nsid = cpu_to_le32(NVME_NSID_ALL);
c.get_log_page.lid = log_page; c.get_log_page.lid = log_page;
c.get_log_page.lsp = lsp;
c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1)); c.get_log_page.numdl = cpu_to_le16(dwlen & ((1 << 16) - 1));
c.get_log_page.numdu = cpu_to_le16(dwlen >> 16); c.get_log_page.numdu = cpu_to_le16(dwlen >> 16);
c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset)); c.get_log_page.lpol = cpu_to_le32(lower_32_bits(offset));
...@@ -2304,12 +2300,6 @@ int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -2304,12 +2300,6 @@ int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size); return nvme_submit_sync_cmd(ctrl->admin_q, &c, log, size);
} }
static int nvme_get_log(struct nvme_ctrl *ctrl, u8 log_page, void *log,
size_t size)
{
return nvme_get_log_ext(ctrl, NULL, log_page, log, size, 0);
}
static int nvme_get_effects_log(struct nvme_ctrl *ctrl) static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
{ {
int ret; int ret;
...@@ -2320,8 +2310,8 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl) ...@@ -2320,8 +2310,8 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl)
if (!ctrl->effects) if (!ctrl->effects)
return 0; return 0;
ret = nvme_get_log(ctrl, NVME_LOG_CMD_EFFECTS, ctrl->effects, ret = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CMD_EFFECTS, 0,
sizeof(*ctrl->effects)); ctrl->effects, sizeof(*ctrl->effects), 0);
if (ret) { if (ret) {
kfree(ctrl->effects); kfree(ctrl->effects);
ctrl->effects = NULL; ctrl->effects = NULL;
...@@ -2412,6 +2402,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ...@@ -2412,6 +2402,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
nvme_set_queue_limits(ctrl, ctrl->admin_q); nvme_set_queue_limits(ctrl, ctrl->admin_q);
ctrl->sgls = le32_to_cpu(id->sgls); ctrl->sgls = le32_to_cpu(id->sgls);
ctrl->kas = le16_to_cpu(id->kas); ctrl->kas = le16_to_cpu(id->kas);
ctrl->max_namespaces = le32_to_cpu(id->mnan);
if (id->rtd3e) { if (id->rtd3e) {
/* us -> s */ /* us -> s */
...@@ -2471,8 +2462,12 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ...@@ -2471,8 +2462,12 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
ctrl->hmmaxd = le16_to_cpu(id->hmmaxd); ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
} }
ret = nvme_mpath_init(ctrl, id);
kfree(id); kfree(id);
if (ret < 0)
return ret;
if (ctrl->apst_enabled && !prev_apst_enabled) if (ctrl->apst_enabled && !prev_apst_enabled)
dev_pm_qos_expose_latency_tolerance(ctrl->device); dev_pm_qos_expose_latency_tolerance(ctrl->device);
else if (!ctrl->apst_enabled && prev_apst_enabled) else if (!ctrl->apst_enabled && prev_apst_enabled)
...@@ -2691,6 +2686,10 @@ static struct attribute *nvme_ns_id_attrs[] = { ...@@ -2691,6 +2686,10 @@ static struct attribute *nvme_ns_id_attrs[] = {
&dev_attr_nguid.attr, &dev_attr_nguid.attr,
&dev_attr_eui.attr, &dev_attr_eui.attr,
&dev_attr_nsid.attr, &dev_attr_nsid.attr,
#ifdef CONFIG_NVME_MULTIPATH
&dev_attr_ana_grpid.attr,
&dev_attr_ana_state.attr,
#endif
NULL, NULL,
}; };
...@@ -2713,6 +2712,14 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj, ...@@ -2713,6 +2712,14 @@ static umode_t nvme_ns_id_attrs_are_visible(struct kobject *kobj,
if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) if (!memchr_inv(ids->eui64, 0, sizeof(ids->eui64)))
return 0; return 0;
} }
#ifdef CONFIG_NVME_MULTIPATH
if (a == &dev_attr_ana_grpid.attr || a == &dev_attr_ana_state.attr) {
if (dev_to_disk(dev)->fops != &nvme_fops) /* per-path attr */
return 0;
if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl))
return 0;
}
#endif
return a->mode; return a->mode;
} }
...@@ -3086,8 +3093,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ...@@ -3086,8 +3093,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
nvme_get_ctrl(ctrl); nvme_get_ctrl(ctrl);
kfree(id);
device_add_disk(ctrl->device, ns->disk); device_add_disk(ctrl->device, ns->disk);
if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj, if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj,
&nvme_ns_id_attr_group)) &nvme_ns_id_attr_group))
...@@ -3097,8 +3102,10 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ...@@ -3097,8 +3102,10 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
pr_warn("%s: failed to register lightnvm sysfs group for identification\n", pr_warn("%s: failed to register lightnvm sysfs group for identification\n",
ns->disk->disk_name); ns->disk->disk_name);
nvme_mpath_add_disk(ns->head); nvme_mpath_add_disk(ns, id);
nvme_fault_inject_init(ns); nvme_fault_inject_init(ns);
kfree(id);
return; return;
out_unlink_ns: out_unlink_ns:
mutex_lock(&ctrl->subsys->lock); mutex_lock(&ctrl->subsys->lock);
...@@ -3240,7 +3247,8 @@ static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl) ...@@ -3240,7 +3247,8 @@ static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl)
* raced with us in reading the log page, which could cause us to miss * raced with us in reading the log page, which could cause us to miss
* updates. * updates.
*/ */
error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size); error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_CHANGED_NS, 0, log,
log_size, 0);
if (error) if (error)
dev_warn(ctrl->device, dev_warn(ctrl->device,
"reading changed ns log failed: %d\n", error); "reading changed ns log failed: %d\n", error);
...@@ -3357,9 +3365,9 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl) ...@@ -3357,9 +3365,9 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl)
if (!log) if (!log)
return; return;
if (nvme_get_log(ctrl, NVME_LOG_FW_SLOT, log, sizeof(*log))) if (nvme_get_log(ctrl, NVME_NSID_ALL, 0, NVME_LOG_FW_SLOT, log,
dev_warn(ctrl->device, sizeof(*log), 0))
"Get FW SLOT INFO log error\n"); dev_warn(ctrl->device, "Get FW SLOT INFO log error\n");
kfree(log); kfree(log);
} }
...@@ -3405,6 +3413,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) ...@@ -3405,6 +3413,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
case NVME_AER_NOTICE_FW_ACT_STARTING: case NVME_AER_NOTICE_FW_ACT_STARTING:
queue_work(nvme_wq, &ctrl->fw_act_work); queue_work(nvme_wq, &ctrl->fw_act_work);
break; break;
#ifdef CONFIG_NVME_MULTIPATH
case NVME_AER_NOTICE_ANA:
if (!ctrl->ana_log_buf)
break;
queue_work(nvme_wq, &ctrl->ana_work);
break;
#endif
default: default:
dev_warn(ctrl->device, "async event result %08x\n", result); dev_warn(ctrl->device, "async event result %08x\n", result);
} }
...@@ -3437,6 +3452,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event); ...@@ -3437,6 +3452,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_async_event);
void nvme_stop_ctrl(struct nvme_ctrl *ctrl) void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
{ {
nvme_mpath_stop(ctrl);
nvme_stop_keep_alive(ctrl); nvme_stop_keep_alive(ctrl);
flush_work(&ctrl->async_event_work); flush_work(&ctrl->async_event_work);
flush_work(&ctrl->scan_work); flush_work(&ctrl->scan_work);
...@@ -3474,6 +3490,7 @@ static void nvme_free_ctrl(struct device *dev) ...@@ -3474,6 +3490,7 @@ static void nvme_free_ctrl(struct device *dev)
ida_simple_remove(&nvme_instance_ida, ctrl->instance); ida_simple_remove(&nvme_instance_ida, ctrl->instance);
kfree(ctrl->effects); kfree(ctrl->effects);
nvme_mpath_uninit(ctrl);
if (subsys) { if (subsys) {
mutex_lock(&subsys->lock); mutex_lock(&subsys->lock);
......
...@@ -604,8 +604,9 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev, ...@@ -604,8 +604,9 @@ static int nvme_nvm_get_chk_meta(struct nvm_dev *ndev,
while (left) { while (left) {
len = min_t(unsigned int, left, max_len); len = min_t(unsigned int, left, max_len);
ret = nvme_get_log_ext(ctrl, ns, NVME_NVM_LOG_REPORT_CHUNK, ret = nvme_get_log(ctrl, ns->head->ns_id,
dev_meta, len, offset); NVME_NVM_LOG_REPORT_CHUNK, 0, dev_meta, len,
offset);
if (ret) { if (ret) {
dev_err(ctrl->device, "Get REPORT CHUNK log error\n"); dev_err(ctrl->device, "Get REPORT CHUNK log error\n");
break; break;
......
/* /*
* Copyright (c) 2017 Christoph Hellwig. * Copyright (c) 2017-2018 Christoph Hellwig.
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License, * under the terms and conditions of the GNU General Public License,
...@@ -20,6 +20,11 @@ module_param(multipath, bool, 0444); ...@@ -20,6 +20,11 @@ module_param(multipath, bool, 0444);
MODULE_PARM_DESC(multipath, MODULE_PARM_DESC(multipath,
"turn on native support for multiple controllers per subsystem"); "turn on native support for multiple controllers per subsystem");
inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
{
return multipath && (ctrl->subsys->cmic & (1 << 3));
}
/* /*
* If multipathing is enabled we need to always use the subsystem instance * If multipathing is enabled we need to always use the subsystem instance
* number for numbering our devices to avoid conflicts between subsystems that * number for numbering our devices to avoid conflicts between subsystems that
...@@ -45,6 +50,7 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, ...@@ -45,6 +50,7 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
void nvme_failover_req(struct request *req) void nvme_failover_req(struct request *req)
{ {
struct nvme_ns *ns = req->q->queuedata; struct nvme_ns *ns = req->q->queuedata;
u16 status = nvme_req(req)->status;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&ns->head->requeue_lock, flags); spin_lock_irqsave(&ns->head->requeue_lock, flags);
...@@ -52,15 +58,35 @@ void nvme_failover_req(struct request *req) ...@@ -52,15 +58,35 @@ void nvme_failover_req(struct request *req)
spin_unlock_irqrestore(&ns->head->requeue_lock, flags); spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
blk_mq_end_request(req, 0); blk_mq_end_request(req, 0);
nvme_reset_ctrl(ns->ctrl); switch (status & 0x7ff) {
kblockd_schedule_work(&ns->head->requeue_work); case NVME_SC_ANA_TRANSITION:
} case NVME_SC_ANA_INACCESSIBLE:
case NVME_SC_ANA_PERSISTENT_LOSS:
/*
* If we got back an ANA error we know the controller is alive,
* but not ready to serve this namespaces. The spec suggests
* we should update our general state here, but due to the fact
* that the admin and I/O queues are not serialized that is
* fundamentally racy. So instead just clear the current path,
* mark the the path as pending and kick of a re-read of the ANA
* log page ASAP.
*/
nvme_mpath_clear_current_path(ns);
if (ns->ctrl->ana_log_buf) {
set_bit(NVME_NS_ANA_PENDING, &ns->flags);
queue_work(nvme_wq, &ns->ctrl->ana_work);
}
break;
default:
/*
* Reset the controller for any non-ANA error as we don't know
* what caused the error.
*/
nvme_reset_ctrl(ns->ctrl);
break;
}
bool nvme_req_needs_failover(struct request *req, blk_status_t error) kblockd_schedule_work(&ns->head->requeue_work);
{
if (!(req->cmd_flags & REQ_NVME_MPATH))
return false;
return blk_path_error(error);
} }
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
...@@ -75,25 +101,51 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) ...@@ -75,25 +101,51 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
up_read(&ctrl->namespaces_rwsem); up_read(&ctrl->namespaces_rwsem);
} }
static const char *nvme_ana_state_names[] = {
[0] = "invalid state",
[NVME_ANA_OPTIMIZED] = "optimized",
[NVME_ANA_NONOPTIMIZED] = "non-optimized",
[NVME_ANA_INACCESSIBLE] = "inaccessible",
[NVME_ANA_PERSISTENT_LOSS] = "persistent-loss",
[NVME_ANA_CHANGE] = "change",
};
static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head) static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head)
{ {
struct nvme_ns *ns; struct nvme_ns *ns, *fallback = NULL;
list_for_each_entry_rcu(ns, &head->list, siblings) { list_for_each_entry_rcu(ns, &head->list, siblings) {
if (ns->ctrl->state == NVME_CTRL_LIVE) { if (ns->ctrl->state != NVME_CTRL_LIVE ||
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
continue;
switch (ns->ana_state) {
case NVME_ANA_OPTIMIZED:
rcu_assign_pointer(head->current_path, ns); rcu_assign_pointer(head->current_path, ns);
return ns; return ns;
case NVME_ANA_NONOPTIMIZED:
fallback = ns;
break;
default:
break;
} }
} }
return NULL; if (fallback)
rcu_assign_pointer(head->current_path, fallback);
return fallback;
}
static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
{
return ns->ctrl->state == NVME_CTRL_LIVE &&
ns->ana_state == NVME_ANA_OPTIMIZED;
} }
inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
{ {
struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu); struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu);
if (unlikely(!ns || ns->ctrl->state != NVME_CTRL_LIVE)) if (unlikely(!ns || !nvme_path_is_optimized(ns)))
ns = __nvme_find_path(head); ns = __nvme_find_path(head);
return ns; return ns;
} }
...@@ -142,7 +194,7 @@ static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc) ...@@ -142,7 +194,7 @@ static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc)
srcu_idx = srcu_read_lock(&head->srcu); srcu_idx = srcu_read_lock(&head->srcu);
ns = srcu_dereference(head->current_path, &head->srcu); ns = srcu_dereference(head->current_path, &head->srcu);
if (likely(ns && ns->ctrl->state == NVME_CTRL_LIVE)) if (likely(ns && nvme_path_is_optimized(ns)))
found = ns->queue->poll_fn(q, qc); found = ns->queue->poll_fn(q, qc);
srcu_read_unlock(&head->srcu, srcu_idx); srcu_read_unlock(&head->srcu, srcu_idx);
return found; return found;
...@@ -176,6 +228,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) ...@@ -176,6 +228,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
struct request_queue *q; struct request_queue *q;
bool vwc = false; bool vwc = false;
mutex_init(&head->lock);
bio_list_init(&head->requeue_list); bio_list_init(&head->requeue_list);
spin_lock_init(&head->requeue_lock); spin_lock_init(&head->requeue_lock);
INIT_WORK(&head->requeue_work, nvme_requeue_work); INIT_WORK(&head->requeue_work, nvme_requeue_work);
...@@ -220,29 +273,232 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) ...@@ -220,29 +273,232 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
return -ENOMEM; return -ENOMEM;
} }
void nvme_mpath_add_disk(struct nvme_ns_head *head) static void nvme_mpath_set_live(struct nvme_ns *ns)
{ {
struct nvme_ns_head *head = ns->head;
lockdep_assert_held(&ns->head->lock);
if (!head->disk) if (!head->disk)
return; return;
mutex_lock(&head->subsys->lock);
if (!(head->disk->flags & GENHD_FL_UP)) { if (!(head->disk->flags & GENHD_FL_UP)) {
device_add_disk(&head->subsys->dev, head->disk); device_add_disk(&head->subsys->dev, head->disk);
if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, if (sysfs_create_group(&disk_to_dev(head->disk)->kobj,
&nvme_ns_id_attr_group)) &nvme_ns_id_attr_group))
pr_warn("%s: failed to create sysfs group for identification\n", dev_warn(&head->subsys->dev,
head->disk->disk_name); "failed to create id group.\n");
}
kblockd_schedule_work(&ns->head->requeue_work);
}
static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
int (*cb)(struct nvme_ctrl *ctrl, struct nvme_ana_group_desc *,
void *))
{
void *base = ctrl->ana_log_buf;
size_t offset = sizeof(struct nvme_ana_rsp_hdr);
int error, i;
lockdep_assert_held(&ctrl->ana_lock);
for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) {
struct nvme_ana_group_desc *desc = base + offset;
u32 nr_nsids = le32_to_cpu(desc->nnsids);
size_t nsid_buf_size = nr_nsids * sizeof(__le32);
if (WARN_ON_ONCE(desc->grpid == 0))
return -EINVAL;
if (WARN_ON_ONCE(le32_to_cpu(desc->grpid) > ctrl->anagrpmax))
return -EINVAL;
if (WARN_ON_ONCE(desc->state == 0))
return -EINVAL;
if (WARN_ON_ONCE(desc->state > NVME_ANA_CHANGE))
return -EINVAL;
offset += sizeof(*desc);
if (WARN_ON_ONCE(offset > ctrl->ana_log_size - nsid_buf_size))
return -EINVAL;
error = cb(ctrl, desc, data);
if (error)
return error;
offset += nsid_buf_size;
if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
return -EINVAL;
}
return 0;
}
static inline bool nvme_state_is_live(enum nvme_ana_state state)
{
return state == NVME_ANA_OPTIMIZED || state == NVME_ANA_NONOPTIMIZED;
}
static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
struct nvme_ns *ns)
{
enum nvme_ana_state old;
mutex_lock(&ns->head->lock);
old = ns->ana_state;
ns->ana_grpid = le32_to_cpu(desc->grpid);
ns->ana_state = desc->state;
clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
if (nvme_state_is_live(ns->ana_state) && !nvme_state_is_live(old))
nvme_mpath_set_live(ns);
mutex_unlock(&ns->head->lock);
}
static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
struct nvme_ana_group_desc *desc, void *data)
{
u32 nr_nsids = le32_to_cpu(desc->nnsids), n = 0;
unsigned *nr_change_groups = data;
struct nvme_ns *ns;
dev_info(ctrl->device, "ANA group %d: %s.\n",
le32_to_cpu(desc->grpid),
nvme_ana_state_names[desc->state]);
if (desc->state == NVME_ANA_CHANGE)
(*nr_change_groups)++;
if (!nr_nsids)
return 0;
down_write(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list) {
if (ns->head->ns_id != le32_to_cpu(desc->nsids[n]))
continue;
nvme_update_ns_ana_state(desc, ns);
if (++n == nr_nsids)
break;
}
up_write(&ctrl->namespaces_rwsem);
WARN_ON_ONCE(n < nr_nsids);
return 0;
}
static int nvme_read_ana_log(struct nvme_ctrl *ctrl, bool groups_only)
{
u32 nr_change_groups = 0;
int error;
mutex_lock(&ctrl->ana_lock);
error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA,
groups_only ? NVME_ANA_LOG_RGO : 0,
ctrl->ana_log_buf, ctrl->ana_log_size, 0);
if (error) {
dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error);
goto out_unlock;
}
error = nvme_parse_ana_log(ctrl, &nr_change_groups,
nvme_update_ana_state);
if (error)
goto out_unlock;
/*
* In theory we should have an ANATT timer per group as they might enter
* the change state at different times. But that is a lot of overhead
* just to protect against a target that keeps entering new changes
* states while never finishing previous ones. But we'll still
* eventually time out once all groups are in change state, so this
* isn't a big deal.
*
* We also double the ANATT value to provide some slack for transports
* or AEN processing overhead.
*/
if (nr_change_groups)
mod_timer(&ctrl->anatt_timer, ctrl->anatt * HZ * 2 + jiffies);
else
del_timer_sync(&ctrl->anatt_timer);
out_unlock:
mutex_unlock(&ctrl->ana_lock);
return error;
}
static void nvme_ana_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ana_work);
nvme_read_ana_log(ctrl, false);
}
static void nvme_anatt_timeout(struct timer_list *t)
{
struct nvme_ctrl *ctrl = from_timer(ctrl, t, anatt_timer);
dev_info(ctrl->device, "ANATT timeout, resetting controller.\n");
nvme_reset_ctrl(ctrl);
}
void nvme_mpath_stop(struct nvme_ctrl *ctrl)
{
if (!nvme_ctrl_use_ana(ctrl))
return;
del_timer_sync(&ctrl->anatt_timer);
cancel_work_sync(&ctrl->ana_work);
}
static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
return sprintf(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid);
}
DEVICE_ATTR_RO(ana_grpid);
static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
return sprintf(buf, "%s\n", nvme_ana_state_names[ns->ana_state]);
}
DEVICE_ATTR_RO(ana_state);
static int nvme_set_ns_ana_state(struct nvme_ctrl *ctrl,
struct nvme_ana_group_desc *desc, void *data)
{
struct nvme_ns *ns = data;
if (ns->ana_grpid == le32_to_cpu(desc->grpid)) {
nvme_update_ns_ana_state(desc, ns);
return -ENXIO; /* just break out of the loop */
}
return 0;
}
void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
{
if (nvme_ctrl_use_ana(ns->ctrl)) {
mutex_lock(&ns->ctrl->ana_lock);
ns->ana_grpid = le32_to_cpu(id->anagrpid);
nvme_parse_ana_log(ns->ctrl, ns, nvme_set_ns_ana_state);
mutex_unlock(&ns->ctrl->ana_lock);
} else {
mutex_lock(&ns->head->lock);
ns->ana_state = NVME_ANA_OPTIMIZED;
nvme_mpath_set_live(ns);
mutex_unlock(&ns->head->lock);
} }
mutex_unlock(&head->subsys->lock);
} }
void nvme_mpath_remove_disk(struct nvme_ns_head *head) void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{ {
if (!head->disk) if (!head->disk)
return; return;
sysfs_remove_group(&disk_to_dev(head->disk)->kobj, if (head->disk->flags & GENHD_FL_UP) {
&nvme_ns_id_attr_group); sysfs_remove_group(&disk_to_dev(head->disk)->kobj,
del_gendisk(head->disk); &nvme_ns_id_attr_group);
del_gendisk(head->disk);
}
blk_set_queue_dying(head->disk->queue); blk_set_queue_dying(head->disk->queue);
/* make sure all pending bios are cleaned up */ /* make sure all pending bios are cleaned up */
kblockd_schedule_work(&head->requeue_work); kblockd_schedule_work(&head->requeue_work);
...@@ -250,3 +506,52 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) ...@@ -250,3 +506,52 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
blk_cleanup_queue(head->disk->queue); blk_cleanup_queue(head->disk->queue);
put_disk(head->disk); put_disk(head->disk);
} }
int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
{
int error;
if (!nvme_ctrl_use_ana(ctrl))
return 0;
ctrl->anacap = id->anacap;
ctrl->anatt = id->anatt;
ctrl->nanagrpid = le32_to_cpu(id->nanagrpid);
ctrl->anagrpmax = le32_to_cpu(id->anagrpmax);
mutex_init(&ctrl->ana_lock);
timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
ctrl->ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc);
if (!(ctrl->anacap & (1 << 6)))
ctrl->ana_log_size += ctrl->max_namespaces * sizeof(__le32);
if (ctrl->ana_log_size > ctrl->max_hw_sectors << SECTOR_SHIFT) {
dev_err(ctrl->device,
"ANA log page size (%zd) larger than MDTS (%d).\n",
ctrl->ana_log_size,
ctrl->max_hw_sectors << SECTOR_SHIFT);
dev_err(ctrl->device, "disabling ANA support.\n");
return 0;
}
INIT_WORK(&ctrl->ana_work, nvme_ana_work);
ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
if (!ctrl->ana_log_buf)
goto out;
error = nvme_read_ana_log(ctrl, true);
if (error)
goto out_free_ana_log_buf;
return 0;
out_free_ana_log_buf:
kfree(ctrl->ana_log_buf);
out:
return -ENOMEM;
}
void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
{
kfree(ctrl->ana_log_buf);
}
...@@ -183,6 +183,7 @@ struct nvme_ctrl { ...@@ -183,6 +183,7 @@ struct nvme_ctrl {
u16 oacs; u16 oacs;
u16 nssa; u16 nssa;
u16 nr_streams; u16 nr_streams;
u32 max_namespaces;
atomic_t abort_limit; atomic_t abort_limit;
u8 vwc; u8 vwc;
u32 vs; u32 vs;
...@@ -205,6 +206,19 @@ struct nvme_ctrl { ...@@ -205,6 +206,19 @@ struct nvme_ctrl {
struct work_struct fw_act_work; struct work_struct fw_act_work;
unsigned long events; unsigned long events;
#ifdef CONFIG_NVME_MULTIPATH
/* asymmetric namespace access: */
u8 anacap;
u8 anatt;
u32 anagrpmax;
u32 nanagrpid;
struct mutex ana_lock;
struct nvme_ana_rsp_hdr *ana_log_buf;
size_t ana_log_size;
struct timer_list anatt_timer;
struct work_struct ana_work;
#endif
/* Power saving configuration */ /* Power saving configuration */
u64 ps_max_latency_us; u64 ps_max_latency_us;
bool apst_enabled; bool apst_enabled;
...@@ -269,6 +283,7 @@ struct nvme_ns_head { ...@@ -269,6 +283,7 @@ struct nvme_ns_head {
struct bio_list requeue_list; struct bio_list requeue_list;
spinlock_t requeue_lock; spinlock_t requeue_lock;
struct work_struct requeue_work; struct work_struct requeue_work;
struct mutex lock;
#endif #endif
struct list_head list; struct list_head list;
struct srcu_struct srcu; struct srcu_struct srcu;
...@@ -295,6 +310,10 @@ struct nvme_ns { ...@@ -295,6 +310,10 @@ struct nvme_ns {
struct nvme_ctrl *ctrl; struct nvme_ctrl *ctrl;
struct request_queue *queue; struct request_queue *queue;
struct gendisk *disk; struct gendisk *disk;
#ifdef CONFIG_NVME_MULTIPATH
enum nvme_ana_state ana_state;
u32 ana_grpid;
#endif
struct list_head siblings; struct list_head siblings;
struct nvm_dev *ndev; struct nvm_dev *ndev;
struct kref kref; struct kref kref;
...@@ -307,8 +326,9 @@ struct nvme_ns { ...@@ -307,8 +326,9 @@ struct nvme_ns {
bool ext; bool ext;
u8 pi_type; u8 pi_type;
unsigned long flags; unsigned long flags;
#define NVME_NS_REMOVING 0 #define NVME_NS_REMOVING 0
#define NVME_NS_DEAD 1 #define NVME_NS_DEAD 1
#define NVME_NS_ANA_PENDING 2
u16 noiob; u16 noiob;
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
...@@ -436,21 +456,24 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); ...@@ -436,21 +456,24 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl);
int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl);
int nvme_get_log_ext(struct nvme_ctrl *ctrl, struct nvme_ns *ns, int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
u8 log_page, void *log, size_t size, u64 offset); void *log, size_t size, u64 offset);
extern const struct attribute_group nvme_ns_id_attr_group; extern const struct attribute_group nvme_ns_id_attr_group;
extern const struct block_device_operations nvme_ns_head_ops; extern const struct block_device_operations nvme_ns_head_ops;
#ifdef CONFIG_NVME_MULTIPATH #ifdef CONFIG_NVME_MULTIPATH
bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl);
void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
struct nvme_ctrl *ctrl, int *flags); struct nvme_ctrl *ctrl, int *flags);
void nvme_failover_req(struct request *req); void nvme_failover_req(struct request *req);
bool nvme_req_needs_failover(struct request *req, blk_status_t error);
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
void nvme_mpath_add_disk(struct nvme_ns_head *head); void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
void nvme_mpath_remove_disk(struct nvme_ns_head *head); void nvme_mpath_remove_disk(struct nvme_ns_head *head);
int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
void nvme_mpath_stop(struct nvme_ctrl *ctrl);
static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{ {
...@@ -469,7 +492,14 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) ...@@ -469,7 +492,14 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
kblockd_schedule_work(&head->requeue_work); kblockd_schedule_work(&head->requeue_work);
} }
extern struct device_attribute dev_attr_ana_grpid;
extern struct device_attribute dev_attr_ana_state;
#else #else
static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
{
return false;
}
/* /*
* Without the multipath code enabled, multiple controller per subsystems are * Without the multipath code enabled, multiple controller per subsystems are
* visible as devices and thus we cannot use the subsystem instance. * visible as devices and thus we cannot use the subsystem instance.
...@@ -483,11 +513,6 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, ...@@ -483,11 +513,6 @@ static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
static inline void nvme_failover_req(struct request *req) static inline void nvme_failover_req(struct request *req)
{ {
} }
static inline bool nvme_req_needs_failover(struct request *req,
blk_status_t error)
{
return false;
}
static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
{ {
} }
...@@ -496,7 +521,8 @@ static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, ...@@ -496,7 +521,8 @@ static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,
{ {
return 0; return 0;
} }
static inline void nvme_mpath_add_disk(struct nvme_ns_head *head) static inline void nvme_mpath_add_disk(struct nvme_ns *ns,
struct nvme_id_ns *id)
{ {
} }
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head) static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
...@@ -508,6 +534,17 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) ...@@ -508,6 +534,17 @@ static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
{ {
} }
static inline int nvme_mpath_init(struct nvme_ctrl *ctrl,
struct nvme_id_ctrl *id)
{
return 0;
}
static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
{
}
static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl)
{
}
#endif /* CONFIG_NVME_MULTIPATH */ #endif /* CONFIG_NVME_MULTIPATH */
#ifdef CONFIG_NVM #ifdef CONFIG_NVM
......
...@@ -19,6 +19,19 @@ ...@@ -19,6 +19,19 @@
#include <asm/unaligned.h> #include <asm/unaligned.h>
#include "nvmet.h" #include "nvmet.h"
/*
* This helper allows us to clear the AEN based on the RAE bit,
* Please use this helper when processing the log pages which are
* associated with the AEN.
*/
static inline void nvmet_clear_aen(struct nvmet_req *req, u32 aen_bit)
{
int rae = le32_to_cpu(req->cmd->common.cdw10[0]) & 1 << 15;
if (!rae)
clear_bit(aen_bit, &req->sq->ctrl->aen_masked);
}
u32 nvmet_get_log_page_len(struct nvme_command *cmd) u32 nvmet_get_log_page_len(struct nvme_command *cmd)
{ {
u32 len = le16_to_cpu(cmd->get_log_page.numdu); u32 len = le16_to_cpu(cmd->get_log_page.numdu);
...@@ -176,12 +189,76 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req) ...@@ -176,12 +189,76 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req)
if (!status) if (!status)
status = nvmet_zero_sgl(req, len, req->data_len - len); status = nvmet_zero_sgl(req, len, req->data_len - len);
ctrl->nr_changed_ns = 0; ctrl->nr_changed_ns = 0;
clear_bit(NVME_AEN_CFG_NS_ATTR, &ctrl->aen_masked); nvmet_clear_aen(req, NVME_AEN_CFG_NS_ATTR);
mutex_unlock(&ctrl->lock); mutex_unlock(&ctrl->lock);
out: out:
nvmet_req_complete(req, status); nvmet_req_complete(req, status);
} }
static u32 nvmet_format_ana_group(struct nvmet_req *req, u32 grpid,
struct nvme_ana_group_desc *desc)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmet_ns *ns;
u32 count = 0;
if (!(req->cmd->get_log_page.lsp & NVME_ANA_LOG_RGO)) {
rcu_read_lock();
list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link)
if (ns->anagrpid == grpid)
desc->nsids[count++] = cpu_to_le32(ns->nsid);
rcu_read_unlock();
}
desc->grpid = cpu_to_le32(grpid);
desc->nnsids = cpu_to_le32(count);
desc->chgcnt = cpu_to_le64(nvmet_ana_chgcnt);
desc->state = req->port->ana_state[grpid];
memset(desc->rsvd17, 0, sizeof(desc->rsvd17));
return sizeof(struct nvme_ana_group_desc) + count * sizeof(__le32);
}
static void nvmet_execute_get_log_page_ana(struct nvmet_req *req)
{
struct nvme_ana_rsp_hdr hdr = { 0, };
struct nvme_ana_group_desc *desc;
size_t offset = sizeof(struct nvme_ana_rsp_hdr); /* start beyond hdr */
size_t len;
u32 grpid;
u16 ngrps = 0;
u16 status;
status = NVME_SC_INTERNAL;
desc = kmalloc(sizeof(struct nvme_ana_group_desc) +
NVMET_MAX_NAMESPACES * sizeof(__le32), GFP_KERNEL);
if (!desc)
goto out;
down_read(&nvmet_ana_sem);
for (grpid = 1; grpid <= NVMET_MAX_ANAGRPS; grpid++) {
if (!nvmet_ana_group_enabled[grpid])
continue;
len = nvmet_format_ana_group(req, grpid, desc);
status = nvmet_copy_to_sgl(req, offset, desc, len);
if (status)
break;
offset += len;
ngrps++;
}
hdr.chgcnt = cpu_to_le64(nvmet_ana_chgcnt);
hdr.ngrps = cpu_to_le16(ngrps);
nvmet_clear_aen(req, NVME_AEN_CFG_ANA_CHANGE);
up_read(&nvmet_ana_sem);
kfree(desc);
/* copy the header last once we know the number of groups */
status = nvmet_copy_to_sgl(req, 0, &hdr, sizeof(hdr));
out:
nvmet_req_complete(req, status);
}
static void nvmet_execute_identify_ctrl(struct nvmet_req *req) static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{ {
struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvmet_ctrl *ctrl = req->sq->ctrl;
...@@ -213,8 +290,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) ...@@ -213,8 +290,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
* the safest is to leave it as zeroes. * the safest is to leave it as zeroes.
*/ */
/* we support multiple ports and multiples hosts: */ /* we support multiple ports, multiples hosts and ANA: */
id->cmic = (1 << 0) | (1 << 1); id->cmic = (1 << 0) | (1 << 1) | (1 << 3);
/* no limit on data transfer sizes for now */ /* no limit on data transfer sizes for now */
id->mdts = 0; id->mdts = 0;
...@@ -252,6 +329,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) ...@@ -252,6 +329,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); id->maxcmd = cpu_to_le16(NVMET_MAX_CMD);
id->nn = cpu_to_le32(ctrl->subsys->max_nsid); id->nn = cpu_to_le32(ctrl->subsys->max_nsid);
id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES);
id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM | id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM |
NVME_CTRL_ONCS_WRITE_ZEROES); NVME_CTRL_ONCS_WRITE_ZEROES);
...@@ -281,6 +359,11 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) ...@@ -281,6 +359,11 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
id->msdbd = ctrl->ops->msdbd; id->msdbd = ctrl->ops->msdbd;
id->anacap = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
id->anatt = 10; /* random value */
id->anagrpmax = cpu_to_le32(NVMET_MAX_ANAGRPS);
id->nanagrpid = cpu_to_le32(NVMET_MAX_ANAGRPS);
/* /*
* Meh, we don't really support any power state. Fake up the same * Meh, we don't really support any power state. Fake up the same
* values that qemu does. * values that qemu does.
...@@ -322,8 +405,15 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) ...@@ -322,8 +405,15 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
* nuse = ncap = nsze isn't always true, but we have no way to find * nuse = ncap = nsze isn't always true, but we have no way to find
* that out from the underlying device. * that out from the underlying device.
*/ */
id->ncap = id->nuse = id->nsze = id->ncap = id->nsze = cpu_to_le64(ns->size >> ns->blksize_shift);
cpu_to_le64(ns->size >> ns->blksize_shift); switch (req->port->ana_state[ns->anagrpid]) {
case NVME_ANA_INACCESSIBLE:
case NVME_ANA_PERSISTENT_LOSS:
break;
default:
id->nuse = id->nsze;
break;
}
/* /*
* We just provide a single LBA format that matches what the * We just provide a single LBA format that matches what the
...@@ -337,6 +427,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) ...@@ -337,6 +427,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
* controllers, but also with any other user of the block device. * controllers, but also with any other user of the block device.
*/ */
id->nmic = (1 << 0); id->nmic = (1 << 0);
id->anagrpid = cpu_to_le32(ns->anagrpid);
memcpy(&id->nguid, &ns->nguid, sizeof(id->nguid)); memcpy(&id->nguid, &ns->nguid, sizeof(id->nguid));
...@@ -619,6 +710,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) ...@@ -619,6 +710,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
case NVME_LOG_CMD_EFFECTS: case NVME_LOG_CMD_EFFECTS:
req->execute = nvmet_execute_get_log_cmd_effects_ns; req->execute = nvmet_execute_get_log_cmd_effects_ns;
return 0; return 0;
case NVME_LOG_ANA:
req->execute = nvmet_execute_get_log_page_ana;
return 0;
} }
break; break;
case nvme_admin_identify: case nvme_admin_identify:
......
...@@ -411,6 +411,39 @@ static ssize_t nvmet_ns_device_nguid_store(struct config_item *item, ...@@ -411,6 +411,39 @@ static ssize_t nvmet_ns_device_nguid_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_ns_, device_nguid); CONFIGFS_ATTR(nvmet_ns_, device_nguid);
static ssize_t nvmet_ns_ana_grpid_show(struct config_item *item, char *page)
{
return sprintf(page, "%u\n", to_nvmet_ns(item)->anagrpid);
}
static ssize_t nvmet_ns_ana_grpid_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_ns *ns = to_nvmet_ns(item);
u32 oldgrpid, newgrpid;
int ret;
ret = kstrtou32(page, 0, &newgrpid);
if (ret)
return ret;
if (newgrpid < 1 || newgrpid > NVMET_MAX_ANAGRPS)
return -EINVAL;
down_write(&nvmet_ana_sem);
oldgrpid = ns->anagrpid;
nvmet_ana_group_enabled[newgrpid]++;
ns->anagrpid = newgrpid;
nvmet_ana_group_enabled[oldgrpid]--;
nvmet_ana_chgcnt++;
up_write(&nvmet_ana_sem);
nvmet_send_ana_event(ns->subsys, NULL);
return count;
}
CONFIGFS_ATTR(nvmet_ns_, ana_grpid);
static ssize_t nvmet_ns_enable_show(struct config_item *item, char *page) static ssize_t nvmet_ns_enable_show(struct config_item *item, char *page)
{ {
return sprintf(page, "%d\n", to_nvmet_ns(item)->enabled); return sprintf(page, "%d\n", to_nvmet_ns(item)->enabled);
...@@ -468,6 +501,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = { ...@@ -468,6 +501,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_device_path, &nvmet_ns_attr_device_path,
&nvmet_ns_attr_device_nguid, &nvmet_ns_attr_device_nguid,
&nvmet_ns_attr_device_uuid, &nvmet_ns_attr_device_uuid,
&nvmet_ns_attr_ana_grpid,
&nvmet_ns_attr_enable, &nvmet_ns_attr_enable,
&nvmet_ns_attr_buffered_io, &nvmet_ns_attr_buffered_io,
NULL, NULL,
...@@ -916,6 +950,134 @@ static const struct config_item_type nvmet_referrals_type = { ...@@ -916,6 +950,134 @@ static const struct config_item_type nvmet_referrals_type = {
.ct_group_ops = &nvmet_referral_group_ops, .ct_group_ops = &nvmet_referral_group_ops,
}; };
static struct {
enum nvme_ana_state state;
const char *name;
} nvmet_ana_state_names[] = {
{ NVME_ANA_OPTIMIZED, "optimized" },
{ NVME_ANA_NONOPTIMIZED, "non-optimized" },
{ NVME_ANA_INACCESSIBLE, "inaccessible" },
{ NVME_ANA_PERSISTENT_LOSS, "persistent-loss" },
{ NVME_ANA_CHANGE, "change" },
};
static ssize_t nvmet_ana_group_ana_state_show(struct config_item *item,
char *page)
{
struct nvmet_ana_group *grp = to_ana_group(item);
enum nvme_ana_state state = grp->port->ana_state[grp->grpid];
int i;
for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
if (state != nvmet_ana_state_names[i].state)
continue;
return sprintf(page, "%s\n", nvmet_ana_state_names[i].name);
}
return sprintf(page, "\n");
}
static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_ana_group *grp = to_ana_group(item);
int i;
for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
if (sysfs_streq(page, nvmet_ana_state_names[i].name))
goto found;
}
pr_err("Invalid value '%s' for ana_state\n", page);
return -EINVAL;
found:
down_write(&nvmet_ana_sem);
grp->port->ana_state[grp->grpid] = nvmet_ana_state_names[i].state;
nvmet_ana_chgcnt++;
up_write(&nvmet_ana_sem);
nvmet_port_send_ana_event(grp->port);
return count;
}
CONFIGFS_ATTR(nvmet_ana_group_, ana_state);
static struct configfs_attribute *nvmet_ana_group_attrs[] = {
&nvmet_ana_group_attr_ana_state,
NULL,
};
static void nvmet_ana_group_release(struct config_item *item)
{
struct nvmet_ana_group *grp = to_ana_group(item);
if (grp == &grp->port->ana_default_group)
return;
down_write(&nvmet_ana_sem);
grp->port->ana_state[grp->grpid] = NVME_ANA_INACCESSIBLE;
nvmet_ana_group_enabled[grp->grpid]--;
up_write(&nvmet_ana_sem);
nvmet_port_send_ana_event(grp->port);
kfree(grp);
}
static struct configfs_item_operations nvmet_ana_group_item_ops = {
.release = nvmet_ana_group_release,
};
static const struct config_item_type nvmet_ana_group_type = {
.ct_item_ops = &nvmet_ana_group_item_ops,
.ct_attrs = nvmet_ana_group_attrs,
.ct_owner = THIS_MODULE,
};
static struct config_group *nvmet_ana_groups_make_group(
struct config_group *group, const char *name)
{
struct nvmet_port *port = ana_groups_to_port(&group->cg_item);
struct nvmet_ana_group *grp;
u32 grpid;
int ret;
ret = kstrtou32(name, 0, &grpid);
if (ret)
goto out;
ret = -EINVAL;
if (grpid <= 1 || grpid > NVMET_MAX_ANAGRPS)
goto out;
ret = -ENOMEM;
grp = kzalloc(sizeof(*grp), GFP_KERNEL);
if (!grp)
goto out;
grp->port = port;
grp->grpid = grpid;
down_write(&nvmet_ana_sem);
nvmet_ana_group_enabled[grpid]++;
up_write(&nvmet_ana_sem);
nvmet_port_send_ana_event(grp->port);
config_group_init_type_name(&grp->group, name, &nvmet_ana_group_type);
return &grp->group;
out:
return ERR_PTR(ret);
}
static struct configfs_group_operations nvmet_ana_groups_group_ops = {
.make_group = nvmet_ana_groups_make_group,
};
static const struct config_item_type nvmet_ana_groups_type = {
.ct_group_ops = &nvmet_ana_groups_group_ops,
.ct_owner = THIS_MODULE,
};
/* /*
* Ports definitions. * Ports definitions.
*/ */
...@@ -923,6 +1085,7 @@ static void nvmet_port_release(struct config_item *item) ...@@ -923,6 +1085,7 @@ static void nvmet_port_release(struct config_item *item)
{ {
struct nvmet_port *port = to_nvmet_port(item); struct nvmet_port *port = to_nvmet_port(item);
kfree(port->ana_state);
kfree(port); kfree(port);
} }
...@@ -951,6 +1114,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group, ...@@ -951,6 +1114,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
{ {
struct nvmet_port *port; struct nvmet_port *port;
u16 portid; u16 portid;
u32 i;
if (kstrtou16(name, 0, &portid)) if (kstrtou16(name, 0, &portid))
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
...@@ -959,6 +1123,20 @@ static struct config_group *nvmet_ports_make(struct config_group *group, ...@@ -959,6 +1123,20 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
if (!port) if (!port)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
port->ana_state = kcalloc(NVMET_MAX_ANAGRPS + 1,
sizeof(*port->ana_state), GFP_KERNEL);
if (!port->ana_state) {
kfree(port);
return ERR_PTR(-ENOMEM);
}
for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) {
if (i == NVMET_DEFAULT_ANA_GRPID)
port->ana_state[1] = NVME_ANA_OPTIMIZED;
else
port->ana_state[i] = NVME_ANA_INACCESSIBLE;
}
INIT_LIST_HEAD(&port->entry); INIT_LIST_HEAD(&port->entry);
INIT_LIST_HEAD(&port->subsystems); INIT_LIST_HEAD(&port->subsystems);
INIT_LIST_HEAD(&port->referrals); INIT_LIST_HEAD(&port->referrals);
...@@ -975,6 +1153,18 @@ static struct config_group *nvmet_ports_make(struct config_group *group, ...@@ -975,6 +1153,18 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
"referrals", &nvmet_referrals_type); "referrals", &nvmet_referrals_type);
configfs_add_default_group(&port->referrals_group, &port->group); configfs_add_default_group(&port->referrals_group, &port->group);
config_group_init_type_name(&port->ana_groups_group,
"ana_groups", &nvmet_ana_groups_type);
configfs_add_default_group(&port->ana_groups_group, &port->group);
port->ana_default_group.port = port;
port->ana_default_group.grpid = NVMET_DEFAULT_ANA_GRPID;
config_group_init_type_name(&port->ana_default_group.group,
__stringify(NVMET_DEFAULT_ANA_GRPID),
&nvmet_ana_group_type);
configfs_add_default_group(&port->ana_default_group.group,
&port->ana_groups_group);
return &port->group; return &port->group;
} }
......
...@@ -40,6 +40,10 @@ static DEFINE_IDA(cntlid_ida); ...@@ -40,6 +40,10 @@ static DEFINE_IDA(cntlid_ida);
*/ */
DECLARE_RWSEM(nvmet_config_sem); DECLARE_RWSEM(nvmet_config_sem);
u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
u64 nvmet_ana_chgcnt;
DECLARE_RWSEM(nvmet_ana_sem);
static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
const char *subsysnqn); const char *subsysnqn);
...@@ -190,6 +194,33 @@ static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) ...@@ -190,6 +194,33 @@ static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
} }
} }
void nvmet_send_ana_event(struct nvmet_subsys *subsys,
struct nvmet_port *port)
{
struct nvmet_ctrl *ctrl;
mutex_lock(&subsys->lock);
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
if (port && ctrl->port != port)
continue;
if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_ANA_CHANGE))
continue;
nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
NVME_AER_NOTICE_ANA, NVME_LOG_ANA);
}
mutex_unlock(&subsys->lock);
}
void nvmet_port_send_ana_event(struct nvmet_port *port)
{
struct nvmet_subsys_link *p;
down_read(&nvmet_config_sem);
list_for_each_entry(p, &port->subsystems, entry)
nvmet_send_ana_event(p->subsys, port);
up_read(&nvmet_config_sem);
}
int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
{ {
int ret = 0; int ret = 0;
...@@ -337,9 +368,13 @@ static void nvmet_ns_dev_disable(struct nvmet_ns *ns) ...@@ -337,9 +368,13 @@ static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
int nvmet_ns_enable(struct nvmet_ns *ns) int nvmet_ns_enable(struct nvmet_ns *ns)
{ {
struct nvmet_subsys *subsys = ns->subsys; struct nvmet_subsys *subsys = ns->subsys;
int ret = 0; int ret;
mutex_lock(&subsys->lock); mutex_lock(&subsys->lock);
ret = -EMFILE;
if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
goto out_unlock;
ret = 0;
if (ns->enabled) if (ns->enabled)
goto out_unlock; goto out_unlock;
...@@ -374,6 +409,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns) ...@@ -374,6 +409,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
list_add_tail_rcu(&ns->dev_link, &old->dev_link); list_add_tail_rcu(&ns->dev_link, &old->dev_link);
} }
subsys->nr_namespaces++;
nvmet_ns_changed(subsys, ns->nsid); nvmet_ns_changed(subsys, ns->nsid);
ns->enabled = true; ns->enabled = true;
...@@ -414,6 +450,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns) ...@@ -414,6 +450,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns)
percpu_ref_exit(&ns->ref); percpu_ref_exit(&ns->ref);
mutex_lock(&subsys->lock); mutex_lock(&subsys->lock);
subsys->nr_namespaces--;
nvmet_ns_changed(subsys, ns->nsid); nvmet_ns_changed(subsys, ns->nsid);
nvmet_ns_dev_disable(ns); nvmet_ns_dev_disable(ns);
out_unlock: out_unlock:
...@@ -424,6 +461,10 @@ void nvmet_ns_free(struct nvmet_ns *ns) ...@@ -424,6 +461,10 @@ void nvmet_ns_free(struct nvmet_ns *ns)
{ {
nvmet_ns_disable(ns); nvmet_ns_disable(ns);
down_write(&nvmet_ana_sem);
nvmet_ana_group_enabled[ns->anagrpid]--;
up_write(&nvmet_ana_sem);
kfree(ns->device_path); kfree(ns->device_path);
kfree(ns); kfree(ns);
} }
...@@ -441,6 +482,12 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) ...@@ -441,6 +482,12 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
ns->nsid = nsid; ns->nsid = nsid;
ns->subsys = subsys; ns->subsys = subsys;
down_write(&nvmet_ana_sem);
ns->anagrpid = NVMET_DEFAULT_ANA_GRPID;
nvmet_ana_group_enabled[ns->anagrpid]++;
up_write(&nvmet_ana_sem);
uuid_gen(&ns->uuid); uuid_gen(&ns->uuid);
ns->buffered_io = false; ns->buffered_io = false;
...@@ -548,6 +595,20 @@ int nvmet_sq_init(struct nvmet_sq *sq) ...@@ -548,6 +595,20 @@ int nvmet_sq_init(struct nvmet_sq *sq)
} }
EXPORT_SYMBOL_GPL(nvmet_sq_init); EXPORT_SYMBOL_GPL(nvmet_sq_init);
static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
struct nvmet_ns *ns)
{
enum nvme_ana_state state = port->ana_state[ns->anagrpid];
if (unlikely(state == NVME_ANA_INACCESSIBLE))
return NVME_SC_ANA_INACCESSIBLE;
if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
return NVME_SC_ANA_PERSISTENT_LOSS;
if (unlikely(state == NVME_ANA_CHANGE))
return NVME_SC_ANA_TRANSITION;
return 0;
}
static u16 nvmet_parse_io_cmd(struct nvmet_req *req) static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
{ {
struct nvme_command *cmd = req->cmd; struct nvme_command *cmd = req->cmd;
...@@ -560,6 +621,9 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req) ...@@ -560,6 +621,9 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
if (unlikely(!req->ns)) if (unlikely(!req->ns))
return NVME_SC_INVALID_NS | NVME_SC_DNR; return NVME_SC_INVALID_NS | NVME_SC_DNR;
ret = nvmet_check_ana_state(req->port, req->ns);
if (unlikely(ret))
return ret;
if (req->ns->file) if (req->ns->file)
return nvmet_file_parse_io_cmd(req); return nvmet_file_parse_io_cmd(req);
...@@ -876,6 +940,8 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, ...@@ -876,6 +940,8 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
nvmet_init_cap(ctrl); nvmet_init_cap(ctrl);
ctrl->port = req->port;
INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
INIT_LIST_HEAD(&ctrl->async_events); INIT_LIST_HEAD(&ctrl->async_events);
...@@ -1115,12 +1181,15 @@ static int __init nvmet_init(void) ...@@ -1115,12 +1181,15 @@ static int __init nvmet_init(void)
{ {
int error; int error;
nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
WQ_MEM_RECLAIM, 0); WQ_MEM_RECLAIM, 0);
if (!buffered_io_wq) { if (!buffered_io_wq) {
error = -ENOMEM; error = -ENOMEM;
goto out; goto out;
} }
error = nvmet_init_discovery(); error = nvmet_init_discovery();
if (error) if (error)
goto out; goto out;
......
...@@ -30,12 +30,11 @@ ...@@ -30,12 +30,11 @@
#define NVMET_ASYNC_EVENTS 4 #define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128 #define NVMET_ERROR_LOG_SLOTS 128
/* /*
* Supported optional AENs: * Supported optional AENs:
*/ */
#define NVMET_AEN_CFG_OPTIONAL \ #define NVMET_AEN_CFG_OPTIONAL \
NVME_AEN_CFG_NS_ATTR (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_ANA_CHANGE)
/* /*
* Plus mandatory SMART AENs (we'll never send them, but allow enabling them): * Plus mandatory SMART AENs (we'll never send them, but allow enabling them):
...@@ -64,6 +63,7 @@ struct nvmet_ns { ...@@ -64,6 +63,7 @@ struct nvmet_ns {
loff_t size; loff_t size;
u8 nguid[16]; u8 nguid[16];
uuid_t uuid; uuid_t uuid;
u32 anagrpid;
bool buffered_io; bool buffered_io;
bool enabled; bool enabled;
...@@ -98,6 +98,18 @@ struct nvmet_sq { ...@@ -98,6 +98,18 @@ struct nvmet_sq {
struct completion confirm_done; struct completion confirm_done;
}; };
struct nvmet_ana_group {
struct config_group group;
struct nvmet_port *port;
u32 grpid;
};
static inline struct nvmet_ana_group *to_ana_group(struct config_item *item)
{
return container_of(to_config_group(item), struct nvmet_ana_group,
group);
}
/** /**
* struct nvmet_port - Common structure to keep port * struct nvmet_port - Common structure to keep port
* information for the target. * information for the target.
...@@ -115,6 +127,9 @@ struct nvmet_port { ...@@ -115,6 +127,9 @@ struct nvmet_port {
struct list_head subsystems; struct list_head subsystems;
struct config_group referrals_group; struct config_group referrals_group;
struct list_head referrals; struct list_head referrals;
struct config_group ana_groups_group;
struct nvmet_ana_group ana_default_group;
enum nvme_ana_state *ana_state;
void *priv; void *priv;
bool enabled; bool enabled;
int inline_data_size; int inline_data_size;
...@@ -126,6 +141,13 @@ static inline struct nvmet_port *to_nvmet_port(struct config_item *item) ...@@ -126,6 +141,13 @@ static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
group); group);
} }
static inline struct nvmet_port *ana_groups_to_port(
struct config_item *item)
{
return container_of(to_config_group(item), struct nvmet_port,
ana_groups_group);
}
struct nvmet_ctrl { struct nvmet_ctrl {
struct nvmet_subsys *subsys; struct nvmet_subsys *subsys;
struct nvmet_cq **cqs; struct nvmet_cq **cqs;
...@@ -140,6 +162,8 @@ struct nvmet_ctrl { ...@@ -140,6 +162,8 @@ struct nvmet_ctrl {
u16 cntlid; u16 cntlid;
u32 kato; u32 kato;
struct nvmet_port *port;
u32 aen_enabled; u32 aen_enabled;
unsigned long aen_masked; unsigned long aen_masked;
struct nvmet_req *async_event_cmds[NVMET_ASYNC_EVENTS]; struct nvmet_req *async_event_cmds[NVMET_ASYNC_EVENTS];
...@@ -168,6 +192,7 @@ struct nvmet_subsys { ...@@ -168,6 +192,7 @@ struct nvmet_subsys {
struct kref ref; struct kref ref;
struct list_head namespaces; struct list_head namespaces;
unsigned int nr_namespaces;
unsigned int max_nsid; unsigned int max_nsid;
struct list_head ctrls; struct list_head ctrls;
...@@ -340,6 +365,10 @@ void nvmet_ns_disable(struct nvmet_ns *ns); ...@@ -340,6 +365,10 @@ void nvmet_ns_disable(struct nvmet_ns *ns);
struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid); struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid);
void nvmet_ns_free(struct nvmet_ns *ns); void nvmet_ns_free(struct nvmet_ns *ns);
void nvmet_send_ana_event(struct nvmet_subsys *subsys,
struct nvmet_port *port);
void nvmet_port_send_ana_event(struct nvmet_port *port);
int nvmet_register_transport(const struct nvmet_fabrics_ops *ops); int nvmet_register_transport(const struct nvmet_fabrics_ops *ops);
void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops); void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops);
...@@ -360,6 +389,22 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd); ...@@ -360,6 +389,22 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd);
#define NVMET_QUEUE_SIZE 1024 #define NVMET_QUEUE_SIZE 1024
#define NVMET_NR_QUEUES 128 #define NVMET_NR_QUEUES 128
#define NVMET_MAX_CMD NVMET_QUEUE_SIZE #define NVMET_MAX_CMD NVMET_QUEUE_SIZE
/*
* Nice round number that makes a list of nsids fit into a page.
* Should become tunable at some point in the future.
*/
#define NVMET_MAX_NAMESPACES 1024
/*
* 0 is not a valid ANA group ID, so we start numbering at 1.
*
* ANA Group 1 exists without manual intervention, has namespaces assigned to it
* by default, and is available in an optimized state through all ports.
*/
#define NVMET_MAX_ANAGRPS 128
#define NVMET_DEFAULT_ANA_GRPID 1
#define NVMET_KAS 10 #define NVMET_KAS 10
#define NVMET_DISC_KATO 120 #define NVMET_DISC_KATO 120
...@@ -373,6 +418,10 @@ extern struct nvmet_subsys *nvmet_disc_subsys; ...@@ -373,6 +418,10 @@ extern struct nvmet_subsys *nvmet_disc_subsys;
extern u64 nvmet_genctr; extern u64 nvmet_genctr;
extern struct rw_semaphore nvmet_config_sem; extern struct rw_semaphore nvmet_config_sem;
extern u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
extern u64 nvmet_ana_chgcnt;
extern struct rw_semaphore nvmet_ana_sem;
bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys, bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
const char *hostnqn); const char *hostnqn);
......
...@@ -242,7 +242,12 @@ struct nvme_id_ctrl { ...@@ -242,7 +242,12 @@ struct nvme_id_ctrl {
__le32 sanicap; __le32 sanicap;
__le32 hmminds; __le32 hmminds;
__le16 hmmaxd; __le16 hmmaxd;
__u8 rsvd338[174]; __u8 rsvd338[4];
__u8 anatt;
__u8 anacap;
__le32 anagrpmax;
__le32 nanagrpid;
__u8 rsvd352[160];
__u8 sqes; __u8 sqes;
__u8 cqes; __u8 cqes;
__le16 maxcmd; __le16 maxcmd;
...@@ -258,7 +263,8 @@ struct nvme_id_ctrl { ...@@ -258,7 +263,8 @@ struct nvme_id_ctrl {
__le16 acwu; __le16 acwu;
__u8 rsvd534[2]; __u8 rsvd534[2];
__le32 sgls; __le32 sgls;
__u8 rsvd540[228]; __le32 mnan;
__u8 rsvd544[224];
char subnqn[256]; char subnqn[256];
__u8 rsvd1024[768]; __u8 rsvd1024[768];
__le32 ioccsz; __le32 ioccsz;
...@@ -312,7 +318,9 @@ struct nvme_id_ns { ...@@ -312,7 +318,9 @@ struct nvme_id_ns {
__le16 nabspf; __le16 nabspf;
__le16 noiob; __le16 noiob;
__u8 nvmcap[16]; __u8 nvmcap[16];
__u8 rsvd64[40]; __u8 rsvd64[28];
__le32 anagrpid;
__u8 rsvd96[8];
__u8 nguid[16]; __u8 nguid[16];
__u8 eui64[8]; __u8 eui64[8];
struct nvme_lbaf lbaf[16]; struct nvme_lbaf lbaf[16];
...@@ -425,6 +433,32 @@ struct nvme_effects_log { ...@@ -425,6 +433,32 @@ struct nvme_effects_log {
__u8 resv[2048]; __u8 resv[2048];
}; };
enum nvme_ana_state {
NVME_ANA_OPTIMIZED = 0x01,
NVME_ANA_NONOPTIMIZED = 0x02,
NVME_ANA_INACCESSIBLE = 0x03,
NVME_ANA_PERSISTENT_LOSS = 0x04,
NVME_ANA_CHANGE = 0x0f,
};
struct nvme_ana_group_desc {
__le32 grpid;
__le32 nnsids;
__le64 chgcnt;
__u8 state;
__u8 rsvd17[7];
__le32 nsids[];
};
/* flag for the log specific field of the ANA log */
#define NVME_ANA_LOG_RGO (1 << 0)
struct nvme_ana_rsp_hdr {
__le64 chgcnt;
__le16 ngrps;
__le16 rsvd10[3];
};
enum { enum {
NVME_SMART_CRIT_SPARE = 1 << 0, NVME_SMART_CRIT_SPARE = 1 << 0,
NVME_SMART_CRIT_TEMPERATURE = 1 << 1, NVME_SMART_CRIT_TEMPERATURE = 1 << 1,
...@@ -444,11 +478,13 @@ enum { ...@@ -444,11 +478,13 @@ enum {
enum { enum {
NVME_AER_NOTICE_NS_CHANGED = 0x00, NVME_AER_NOTICE_NS_CHANGED = 0x00,
NVME_AER_NOTICE_FW_ACT_STARTING = 0x01, NVME_AER_NOTICE_FW_ACT_STARTING = 0x01,
NVME_AER_NOTICE_ANA = 0x03,
}; };
enum { enum {
NVME_AEN_CFG_NS_ATTR = 1 << 8, NVME_AEN_CFG_NS_ATTR = 1 << 8,
NVME_AEN_CFG_FW_ACT = 1 << 9, NVME_AEN_CFG_FW_ACT = 1 << 9,
NVME_AEN_CFG_ANA_CHANGE = 1 << 11,
}; };
struct nvme_lba_range_type { struct nvme_lba_range_type {
...@@ -763,6 +799,7 @@ enum { ...@@ -763,6 +799,7 @@ enum {
NVME_LOG_FW_SLOT = 0x03, NVME_LOG_FW_SLOT = 0x03,
NVME_LOG_CHANGED_NS = 0x04, NVME_LOG_CHANGED_NS = 0x04,
NVME_LOG_CMD_EFFECTS = 0x05, NVME_LOG_CMD_EFFECTS = 0x05,
NVME_LOG_ANA = 0x0c,
NVME_LOG_DISC = 0x70, NVME_LOG_DISC = 0x70,
NVME_LOG_RESERVATION = 0x80, NVME_LOG_RESERVATION = 0x80,
NVME_FWACT_REPL = (0 << 3), NVME_FWACT_REPL = (0 << 3),
...@@ -885,7 +922,7 @@ struct nvme_get_log_page_command { ...@@ -885,7 +922,7 @@ struct nvme_get_log_page_command {
__u64 rsvd2[2]; __u64 rsvd2[2];
union nvme_data_ptr dptr; union nvme_data_ptr dptr;
__u8 lid; __u8 lid;
__u8 rsvd10; __u8 lsp; /* upper 4 bits reserved */
__le16 numdl; __le16 numdl;
__le16 numdu; __le16 numdu;
__u16 rsvd11; __u16 rsvd11;
...@@ -1185,6 +1222,13 @@ enum { ...@@ -1185,6 +1222,13 @@ enum {
NVME_SC_ACCESS_DENIED = 0x286, NVME_SC_ACCESS_DENIED = 0x286,
NVME_SC_UNWRITTEN_BLOCK = 0x287, NVME_SC_UNWRITTEN_BLOCK = 0x287,
/*
* Path-related Errors:
*/
NVME_SC_ANA_PERSISTENT_LOSS = 0x301,
NVME_SC_ANA_INACCESSIBLE = 0x302,
NVME_SC_ANA_TRANSITION = 0x303,
NVME_SC_DNR = 0x4000, NVME_SC_DNR = 0x4000,
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment