Commit 265c5596 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-20180616' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "A collection of fixes that should go into -rc1. This contains:

   - bsg_open vs bsg_unregister race fix (Anatoliy)

   - NVMe pull request from Christoph, with fixes for regressions in
     this window, FC connect/reconnect path code unification, and a
     trace point addition.

   - timeout fix (Christoph)

   - remove a few unused functions (Christoph)

   - blk-mq tag_set reinit fix (Roman)"

* tag 'for-linus-20180616' of git://git.kernel.dk/linux-block:
  bsg: fix race of bsg_open and bsg_unregister
  block: remov blk_queue_invalidate_tags
  nvme-fabrics: fix and refine state checks in __nvmf_check_ready
  nvme-fabrics: handle the admin-only case properly in nvmf_check_ready
  nvme-fabrics: refactor queue ready check
  blk-mq: remove blk_mq_tagset_iter
  nvme: remove nvme_reinit_tagset
  nvme-fc: fix nulling of queue data on reconnect
  nvme-fc: remove reinit_request routine
  blk-mq: don't time out requests again that are in the timeout handler
  nvme-fc: change controllers first connect to use reconnect path
  nvme: don't rely on the changed namespace list log
  nvmet: free smart-log buffer after use
  nvme-rdma: fix error flow during mapping request data
  nvme: add bio remapping tracepoint
  nvme: fix NULL pointer dereference in nvme_init_subsystem
  blk-mq: reinit q->tag_set_list entry only after grace period
parents 5e7b9212 d6c73964
......@@ -752,18 +752,6 @@ completion of the request to the block layer. This means ending tag
operations before calling end_that_request_last()! For an example of a user
of these helpers, see the IDE tagged command queueing support.
Certain hardware conditions may dictate a need to invalidate the block tag
queue. For instance, on IDE any tagged request error needs to clear both
the hardware and software block queue and enable the driver to sanely restart
all the outstanding requests. There's a third helper to do that:
blk_queue_invalidate_tags(struct request_queue *q)
Clear the internal block tag queue and re-add all the pending requests
to the request queue. The driver will receive them again on the
next request_fn run, just like it did the first time it encountered
them.
3.2.5.2 Tag info
Some block functions exist to query current tag status or to go from a
......@@ -805,8 +793,7 @@ Internally, block manages tags in the blk_queue_tag structure:
Most of the above is simple and straight forward, however busy_list may need
a bit of explaining. Normally we don't care too much about request ordering,
but in the event of any barrier requests in the tag queue we need to ensure
that requests are restarted in the order they were queue. This may happen
if the driver needs to use blk_queue_invalidate_tags().
that requests are restarted in the order they were queue.
3.3 I/O Submission
......
......@@ -311,35 +311,6 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
}
EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data,
int (fn)(void *, struct request *))
{
int i, j, ret = 0;
if (WARN_ON_ONCE(!fn))
goto out;
for (i = 0; i < set->nr_hw_queues; i++) {
struct blk_mq_tags *tags = set->tags[i];
if (!tags)
continue;
for (j = 0; j < tags->nr_tags; j++) {
if (!tags->static_rqs[j])
continue;
ret = fn(data, tags->static_rqs[j]);
if (ret)
goto out;
}
}
out:
return ret;
}
EXPORT_SYMBOL_GPL(blk_mq_tagset_iter);
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
void *priv)
{
......
......@@ -671,6 +671,7 @@ static void __blk_mq_requeue_request(struct request *rq)
if (blk_mq_request_started(rq)) {
WRITE_ONCE(rq->state, MQ_RQ_IDLE);
rq->rq_flags &= ~RQF_TIMED_OUT;
if (q->dma_drain_size && blk_rq_bytes(rq))
rq->nr_phys_segments--;
}
......@@ -770,6 +771,7 @@ EXPORT_SYMBOL(blk_mq_tag_to_rq);
static void blk_mq_rq_timed_out(struct request *req, bool reserved)
{
req->rq_flags |= RQF_TIMED_OUT;
if (req->q->mq_ops->timeout) {
enum blk_eh_timer_return ret;
......@@ -779,6 +781,7 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved)
WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
}
req->rq_flags &= ~RQF_TIMED_OUT;
blk_add_timer(req);
}
......@@ -788,6 +791,8 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
return false;
if (rq->rq_flags & RQF_TIMED_OUT)
return false;
deadline = blk_rq_deadline(rq);
if (time_after_eq(jiffies, deadline))
......@@ -2349,7 +2354,6 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
mutex_lock(&set->tag_list_lock);
list_del_rcu(&q->tag_set_list);
INIT_LIST_HEAD(&q->tag_set_list);
if (list_is_singular(&set->tag_list)) {
/* just transitioned to unshared */
set->flags &= ~BLK_MQ_F_TAG_SHARED;
......@@ -2357,8 +2361,8 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
blk_mq_update_tag_set_depth(set, false);
}
mutex_unlock(&set->tag_list_lock);
synchronize_rcu();
INIT_LIST_HEAD(&q->tag_set_list);
}
static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
......
......@@ -188,7 +188,6 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
*/
q->queue_tags = tags;
queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q);
INIT_LIST_HEAD(&q->tag_busy_list);
return 0;
}
EXPORT_SYMBOL(blk_queue_init_tags);
......@@ -374,27 +373,6 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
rq->tag = tag;
bqt->tag_index[tag] = rq;
blk_start_request(rq);
list_add(&rq->queuelist, &q->tag_busy_list);
return 0;
}
EXPORT_SYMBOL(blk_queue_start_tag);
/**
* blk_queue_invalidate_tags - invalidate all pending tags
* @q: the request queue for the device
*
* Description:
* Hardware conditions may dictate a need to stop all pending requests.
* In this case, we will safely clear the block side of the tag queue and
* readd all requests to the request queue in the right order.
**/
void blk_queue_invalidate_tags(struct request_queue *q)
{
struct list_head *tmp, *n;
lockdep_assert_held(q->queue_lock);
list_for_each_safe(tmp, n, &q->tag_busy_list)
blk_requeue_request(q, list_entry_rq(tmp));
}
EXPORT_SYMBOL(blk_queue_invalidate_tags);
......@@ -693,6 +693,8 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
struct bsg_device *bd;
unsigned char buf[32];
lockdep_assert_held(&bsg_mutex);
if (!blk_get_queue(rq))
return ERR_PTR(-ENXIO);
......@@ -707,14 +709,12 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
bsg_set_block(bd, file);
atomic_set(&bd->ref_count, 1);
mutex_lock(&bsg_mutex);
hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(iminor(inode)));
strncpy(bd->name, dev_name(rq->bsg_dev.class_dev), sizeof(bd->name) - 1);
bsg_dbg(bd, "bound to <%s>, max queue %d\n",
format_dev_t(buf, inode->i_rdev), bd->max_queue);
mutex_unlock(&bsg_mutex);
return bd;
}
......@@ -722,7 +722,7 @@ static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q)
{
struct bsg_device *bd;
mutex_lock(&bsg_mutex);
lockdep_assert_held(&bsg_mutex);
hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) {
if (bd->queue == q) {
......@@ -732,7 +732,6 @@ static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q)
}
bd = NULL;
found:
mutex_unlock(&bsg_mutex);
return bd;
}
......@@ -746,17 +745,18 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file)
*/
mutex_lock(&bsg_mutex);
bcd = idr_find(&bsg_minor_idr, iminor(inode));
mutex_unlock(&bsg_mutex);
if (!bcd)
return ERR_PTR(-ENODEV);
if (!bcd) {
bd = ERR_PTR(-ENODEV);
goto out_unlock;
}
bd = __bsg_get_device(iminor(inode), bcd->queue);
if (bd)
return bd;
bd = bsg_add_device(inode, bcd->queue, file);
if (!bd)
bd = bsg_add_device(inode, bcd->queue, file);
out_unlock:
mutex_unlock(&bsg_mutex);
return bd;
}
......
......@@ -2208,7 +2208,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
* Verify that the subsystem actually supports multiple
* controllers, else bail out.
*/
if (!ctrl->opts->discovery_nqn &&
if (!(ctrl->opts && ctrl->opts->discovery_nqn) &&
nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
dev_err(ctrl->device,
"ignoring ctrl due to duplicate subnqn (%s).\n",
......@@ -3197,40 +3197,28 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
nvme_remove_invalid_namespaces(ctrl, nn);
}
static bool nvme_scan_changed_ns_log(struct nvme_ctrl *ctrl)
static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl)
{
size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32);
__le32 *log;
int error, i;
bool ret = false;
int error;
log = kzalloc(log_size, GFP_KERNEL);
if (!log)
return false;
return;
/*
* We need to read the log to clear the AEN, but we don't want to rely
* on it for the changed namespace information as userspace could have
* raced with us in reading the log page, which could cause us to miss
* updates.
*/
error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size);
if (error) {
if (error)
dev_warn(ctrl->device,
"reading changed ns log failed: %d\n", error);
goto out_free_log;
}
if (log[0] == cpu_to_le32(0xffffffff))
goto out_free_log;
for (i = 0; i < NVME_MAX_CHANGED_NAMESPACES; i++) {
u32 nsid = le32_to_cpu(log[i]);
if (nsid == 0)
break;
dev_info(ctrl->device, "rescanning namespace %d.\n", nsid);
nvme_validate_ns(ctrl, nsid);
}
ret = true;
out_free_log:
kfree(log);
return ret;
}
static void nvme_scan_work(struct work_struct *work)
......@@ -3246,9 +3234,8 @@ static void nvme_scan_work(struct work_struct *work)
WARN_ON_ONCE(!ctrl->tagset);
if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
if (nvme_scan_changed_ns_log(ctrl))
goto out_sort_namespaces;
dev_info(ctrl->device, "rescanning namespaces.\n");
nvme_clear_changed_ns_log(ctrl);
}
if (nvme_identify_ctrl(ctrl, &id))
......@@ -3263,7 +3250,6 @@ static void nvme_scan_work(struct work_struct *work)
nvme_scan_ns_sequential(ctrl, nn);
out_free_id:
kfree(id);
out_sort_namespaces:
down_write(&ctrl->namespaces_rwsem);
list_sort(NULL, &ctrl->namespaces, ns_cmp);
up_write(&ctrl->namespaces_rwsem);
......@@ -3641,16 +3627,6 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_start_queues);
int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set)
{
if (!ctrl->ops->reinit_request)
return 0;
return blk_mq_tagset_iter(set, set->driver_data,
ctrl->ops->reinit_request);
}
EXPORT_SYMBOL_GPL(nvme_reinit_tagset);
int __init nvme_core_init(void)
{
int result = -ENOMEM;
......
......@@ -536,67 +536,55 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
return NULL;
}
blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live, bool is_connected)
/*
* For something we're not in a state to send to the device the default action
* is to busy it and retry it after the controller state is recovered. However,
* anything marked for failfast or nvme multipath is immediately failed.
*
* Note: commands used to initialize the controller will be marked for failfast.
* Note: nvme cli/ioctl commands are marked for failfast.
*/
blk_status_t nvmf_fail_nonready_command(struct request *rq)
{
struct nvme_command *cmd = nvme_req(rq)->cmd;
if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
nvme_req(rq)->status = NVME_SC_ABORT_REQ;
return BLK_STS_IOERR;
}
EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command);
if (likely(ctrl->state == NVME_CTRL_LIVE && is_connected))
return BLK_STS_OK;
bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live)
{
struct nvme_request *req = nvme_req(rq);
/*
* If we are in some state of setup or teardown only allow
* internally generated commands.
*/
if (!blk_rq_is_passthrough(rq) || (req->flags & NVME_REQ_USERCMD))
return false;
/*
* Only allow commands on a live queue, except for the connect command,
* which is require to set the queue live in the appropinquate states.
*/
switch (ctrl->state) {
case NVME_CTRL_NEW:
case NVME_CTRL_CONNECTING:
case NVME_CTRL_DELETING:
/*
* This is the case of starting a new or deleting an association
* but connectivity was lost before it was fully created or torn
* down. We need to error the commands used to initialize the
* controller so the reconnect can go into a retry attempt. The
* commands should all be marked REQ_FAILFAST_DRIVER, which will
* hit the reject path below. Anything else will be queued while
* the state settles.
*/
if (!is_connected)
break;
/*
* If queue is live, allow only commands that are internally
* generated pass through. These are commands on the admin
* queue to initialize the controller. This will reject any
* ioctl admin cmds received while initializing.
*/
if (queue_live && !(nvme_req(rq)->flags & NVME_REQ_USERCMD))
return BLK_STS_OK;
/*
* If the queue is not live, allow only a connect command. This
* will reject any ioctl admin cmd as well as initialization
* commands if the controller reverted the queue to non-live.
*/
if (!queue_live && blk_rq_is_passthrough(rq) &&
cmd->common.opcode == nvme_fabrics_command &&
cmd->fabrics.fctype == nvme_fabrics_type_connect)
return BLK_STS_OK;
if (req->cmd->common.opcode == nvme_fabrics_command &&
req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
return true;
break;
default:
break;
case NVME_CTRL_DEAD:
return false;
}
/*
* Any other new io is something we're not in a state to send to the
* device. Default action is to busy it and retry it after the
* controller state is recovered. However, anything marked for failfast
* or nvme multipath is immediately failed. Note: commands used to
* initialize the controller will be marked for failfast.
* Note: nvme cli/ioctl commands are marked for failfast.
*/
if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
nvme_req(rq)->status = NVME_SC_ABORT_REQ;
return BLK_STS_IOERR;
return queue_live;
}
EXPORT_SYMBOL_GPL(nvmf_check_if_ready);
EXPORT_SYMBOL_GPL(__nvmf_check_ready);
static const match_table_t opt_tokens = {
{ NVMF_OPT_TRANSPORT, "transport=%s" },
......
......@@ -162,7 +162,17 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
void nvmf_free_options(struct nvmf_ctrl_options *opts);
int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl,
struct request *rq, bool queue_live, bool is_connected);
blk_status_t nvmf_fail_nonready_command(struct request *rq);
bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live);
static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live)
{
if (likely(ctrl->state == NVME_CTRL_LIVE ||
ctrl->state == NVME_CTRL_ADMIN_ONLY))
return true;
return __nvmf_check_ready(ctrl, rq, queue_live);
}
#endif /* _NVME_FABRICS_H */
......@@ -142,6 +142,7 @@ struct nvme_fc_ctrl {
struct nvme_fc_rport *rport;
u32 cnum;
bool ioq_live;
bool assoc_active;
u64 association_id;
......@@ -1470,21 +1471,6 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
static int
nvme_fc_reinit_request(void *data, struct request *rq)
{
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
memset(cmdiu, 0, sizeof(*cmdiu));
cmdiu->scsi_id = NVME_CMD_SCSI_ID;
cmdiu->fc_id = NVME_CMD_FC_ID;
cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32));
memset(&op->rsp_iu, 0, sizeof(op->rsp_iu));
return 0;
}
static void
__nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl,
struct nvme_fc_fcp_op *op)
......@@ -1893,6 +1879,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue)
*/
queue->connection_id = 0;
atomic_set(&queue->csn, 1);
}
static void
......@@ -2279,14 +2266,13 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
struct nvme_command *sqe = &cmdiu->sqe;
enum nvmefc_fcp_datadir io_dir;
bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags);
u32 data_len;
blk_status_t ret;
ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq,
test_bit(NVME_FC_Q_LIVE, &queue->flags),
ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE);
if (unlikely(ret))
return ret;
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvmf_fail_nonready_command(rq);
ret = nvme_setup_cmd(ns, rq, sqe);
if (ret)
......@@ -2463,6 +2449,8 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
if (ret)
goto out_delete_hw_queues;
ctrl->ioq_live = true;
return 0;
out_delete_hw_queues:
......@@ -2480,7 +2468,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
}
static int
nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
{
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
unsigned int nr_io_queues;
......@@ -2500,12 +2488,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
if (ctrl->ctrl.queue_count == 1)
return 0;
nvme_fc_init_io_queues(ctrl);
ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
if (ret)
goto out_free_io_queues;
ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
if (ret)
goto out_free_io_queues;
......@@ -2603,8 +2585,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
* Create the admin queue
*/
nvme_fc_init_queue(ctrl, 0);
ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
NVME_AQ_DEPTH);
if (ret)
......@@ -2615,8 +2595,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
if (ret)
goto out_delete_hw_queue;
if (ctrl->ctrl.state != NVME_CTRL_NEW)
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
ret = nvmf_connect_admin_queue(&ctrl->ctrl);
if (ret)
......@@ -2689,10 +2668,10 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
*/
if (ctrl->ctrl.queue_count > 1) {
if (ctrl->ctrl.state == NVME_CTRL_NEW)
if (!ctrl->ioq_live)
ret = nvme_fc_create_io_queues(ctrl);
else
ret = nvme_fc_reinit_io_queues(ctrl);
ret = nvme_fc_recreate_io_queues(ctrl);
if (ret)
goto out_term_aen_ops;
}
......@@ -2776,8 +2755,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
* use blk_mq_tagset_busy_itr() and the transport routine to
* terminate the exchanges.
*/
if (ctrl->ctrl.state != NVME_CTRL_NEW)
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
nvme_fc_terminate_exchange, &ctrl->ctrl);
......@@ -2917,7 +2895,6 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
.submit_async_event = nvme_fc_submit_async_event,
.delete_ctrl = nvme_fc_delete_ctrl,
.get_address = nvmf_get_address,
.reinit_request = nvme_fc_reinit_request,
};
static void
......@@ -2934,7 +2911,7 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
nvme_fc_reconnect_or_delete(ctrl, ret);
else
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: controller reconnect complete\n",
"NVME-FC{%d}: controller connect complete\n",
ctrl->cnum);
}
......@@ -2982,7 +2959,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
{
struct nvme_fc_ctrl *ctrl;
unsigned long flags;
int ret, idx, retry;
int ret, idx;
if (!(rport->remoteport.port_role &
(FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
......@@ -3009,11 +2986,13 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
}
ctrl->ctrl.opts = opts;
ctrl->ctrl.nr_reconnects = 0;
INIT_LIST_HEAD(&ctrl->ctrl_list);
ctrl->lport = lport;
ctrl->rport = rport;
ctrl->dev = lport->dev;
ctrl->cnum = idx;
ctrl->ioq_live = false;
ctrl->assoc_active = false;
init_waitqueue_head(&ctrl->ioabort_wait);
......@@ -3032,6 +3011,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->ctrl.sqsize = opts->queue_size - 1;
ctrl->ctrl.kato = opts->kato;
ctrl->ctrl.cntlid = 0xffff;
ret = -ENOMEM;
ctrl->queues = kcalloc(ctrl->ctrl.queue_count,
......@@ -3039,6 +3019,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
if (!ctrl->queues)
goto out_free_ida;
nvme_fc_init_queue(ctrl, 0);
memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
......@@ -3081,62 +3063,24 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list);
spin_unlock_irqrestore(&rport->lock, flags);
/*
* It's possible that transactions used to create the association
* may fail. Examples: CreateAssociation LS or CreateIOConnection
* LS gets dropped/corrupted/fails; or a frame gets dropped or a
* command times out for one of the actions to init the controller
* (Connect, Get/Set_Property, Set_Features, etc). Many of these
* transport errors (frame drop, LS failure) inherently must kill
* the association. The transport is coded so that any command used
* to create the association (prior to a LIVE state transition
* while NEW or CONNECTING) will fail if it completes in error or
* times out.
*
* As such: as the connect request was mostly likely due to a
* udev event that discovered the remote port, meaning there is
* not an admin or script there to restart if the connect
* request fails, retry the initial connection creation up to
* three times before giving up and declaring failure.
*/
for (retry = 0; retry < 3; retry++) {
ret = nvme_fc_create_association(ctrl);
if (!ret)
break;
}
if (ret) {
nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
cancel_work_sync(&ctrl->ctrl.reset_work);
cancel_delayed_work_sync(&ctrl->connect_work);
/* couldn't schedule retry - fail out */
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) ||
!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: Connect retry failed\n", ctrl->cnum);
ctrl->ctrl.opts = NULL;
"NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum);
goto fail_ctrl;
}
/* initiate nvme ctrl ref counting teardown */
nvme_uninit_ctrl(&ctrl->ctrl);
nvme_get_ctrl(&ctrl->ctrl);
/* Remove core ctrl ref. */
if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
nvme_put_ctrl(&ctrl->ctrl);
/* as we're past the point where we transition to the ref
* counting teardown path, if we return a bad pointer here,
* the calling routine, thinking it's prior to the
* transition, will do an rport put. Since the teardown
* path also does a rport put, we do an extra get here to
* so proper order/teardown happens.
*/
nvme_fc_rport_get(rport);
if (ret > 0)
ret = -EIO;
return ERR_PTR(ret);
dev_err(ctrl->ctrl.device,
"NVME-FC{%d}: failed to schedule initial connect\n",
ctrl->cnum);
goto fail_ctrl;
}
nvme_get_ctrl(&ctrl->ctrl);
flush_delayed_work(&ctrl->connect_work);
dev_info(ctrl->ctrl.device,
"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
......@@ -3144,6 +3088,30 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
return &ctrl->ctrl;
fail_ctrl:
nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
cancel_work_sync(&ctrl->ctrl.reset_work);
cancel_delayed_work_sync(&ctrl->connect_work);
ctrl->ctrl.opts = NULL;
/* initiate nvme ctrl ref counting teardown */
nvme_uninit_ctrl(&ctrl->ctrl);
/* Remove core ctrl ref. */
nvme_put_ctrl(&ctrl->ctrl);
/* as we're past the point where we transition to the ref
* counting teardown path, if we return a bad pointer here,
* the calling routine, thinking it's prior to the
* transition, will do an rport put. Since the teardown
* path also does a rport put, we do an extra get here to
* so proper order/teardown happens.
*/
nvme_fc_rport_get(rport);
return ERR_PTR(-EIO);
out_cleanup_admin_q:
blk_cleanup_queue(ctrl->ctrl.admin_q);
out_free_admin_tag_set:
......
......@@ -12,6 +12,7 @@
*/
#include <linux/moduleparam.h>
#include <trace/events/block.h>
#include "nvme.h"
static bool multipath = true;
......@@ -111,6 +112,9 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
if (likely(ns)) {
bio->bi_disk = ns->disk;
bio->bi_opf |= REQ_NVME_MPATH;
trace_block_bio_remap(bio->bi_disk->queue, bio,
disk_devt(ns->head->disk),
bio->bi_iter.bi_sector);
ret = direct_make_request(bio);
} else if (!list_empty_careful(&head->list)) {
dev_warn_ratelimited(dev, "no path available - requeuing I/O\n");
......
......@@ -321,7 +321,6 @@ struct nvme_ctrl_ops {
void (*submit_async_event)(struct nvme_ctrl *ctrl);
void (*delete_ctrl)(struct nvme_ctrl *ctrl);
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
int (*reinit_request)(void *data, struct request *rq);
void (*stop_ctrl)(struct nvme_ctrl *ctrl);
};
......@@ -416,7 +415,6 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl);
void nvme_wait_freeze(struct nvme_ctrl *ctrl);
void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
void nvme_start_freeze(struct nvme_ctrl *ctrl);
int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set);
#define NVME_QID_ANY -1
struct request *nvme_alloc_request(struct request_queue *q,
......
......@@ -1189,21 +1189,38 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents,
rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
if (unlikely(count <= 0)) {
sg_free_table_chained(&req->sg_table, true);
return -EIO;
ret = -EIO;
goto out_free_table;
}
if (count == 1) {
if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
blk_rq_payload_bytes(rq) <=
nvme_rdma_inline_data_size(queue))
return nvme_rdma_map_sg_inline(queue, req, c);
nvme_rdma_inline_data_size(queue)) {
ret = nvme_rdma_map_sg_inline(queue, req, c);
goto out;
}
if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)
return nvme_rdma_map_sg_single(queue, req, c);
if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
ret = nvme_rdma_map_sg_single(queue, req, c);
goto out;
}
}
return nvme_rdma_map_sg_fr(queue, req, c, count);
ret = nvme_rdma_map_sg_fr(queue, req, c, count);
out:
if (unlikely(ret))
goto out_unmap_sg;
return 0;
out_unmap_sg:
ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
req->nents, rq_data_dir(rq) ==
WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
out_free_table:
sg_free_table_chained(&req->sg_table, true);
return ret;
}
static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
......@@ -1613,15 +1630,14 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_rdma_qe *sqe = &req->sqe;
struct nvme_command *c = sqe->data;
struct ib_device *dev;
bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags);
blk_status_t ret;
int err;
WARN_ON_ONCE(rq->tag < 0);
ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq,
test_bit(NVME_RDMA_Q_LIVE, &queue->flags), true);
if (unlikely(ret))
return ret;
if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
return nvmf_fail_nonready_command(rq);
dev = queue->device->dev;
ib_dma_sync_single_for_cpu(dev, sqe->dma,
......
......@@ -119,9 +119,11 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
else
status = nvmet_get_smart_log_nsid(req, log);
if (status)
goto out;
goto out_free_log;
status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
out_free_log:
kfree(log);
out:
nvmet_req_complete(req, status);
}
......
......@@ -158,12 +158,11 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
struct nvme_loop_queue *queue = hctx->driver_data;
struct request *req = bd->rq;
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
bool queue_ready = test_bit(NVME_LOOP_Q_LIVE, &queue->flags);
blk_status_t ret;
ret = nvmf_check_if_ready(&queue->ctrl->ctrl, req,
test_bit(NVME_LOOP_Q_LIVE, &queue->flags), true);
if (unlikely(ret))
return ret;
if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready))
return nvmf_fail_nonready_command(req);
ret = nvme_setup_cmd(ns, req, &iod->cmd);
if (ret)
......
......@@ -281,8 +281,6 @@ void blk_freeze_queue_start(struct request_queue *q);
void blk_mq_freeze_queue_wait(struct request_queue *q);
int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
unsigned long timeout);
int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data,
int (reinit_request)(void *, struct request *));
int blk_mq_map_queues(struct blk_mq_tag_set *set);
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
......
......@@ -127,6 +127,8 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
/* already slept for hybrid poll */
#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20))
/* ->timeout has been called, don't expire again */
#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21))
/* flags that prevent us from merging requests: */
#define RQF_NOMERGE_FLAGS \
......@@ -560,7 +562,6 @@ struct request_queue {
unsigned int dma_alignment;
struct blk_queue_tag *queue_tags;
struct list_head tag_busy_list;
unsigned int nr_sorted;
unsigned int in_flight[2];
......@@ -1373,7 +1374,6 @@ extern void blk_queue_end_tag(struct request_queue *, struct request *);
extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int);
extern void blk_queue_free_tags(struct request_queue *);
extern int blk_queue_resize_tags(struct request_queue *, int);
extern void blk_queue_invalidate_tags(struct request_queue *);
extern struct blk_queue_tag *blk_init_tags(int, int);
extern void blk_free_tags(struct blk_queue_tag *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment