Commit 8df2a0a6 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'block-5.7-2020-04-10' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "Here's a set of fixes that should go into this merge window. This
  contains:

   - NVMe pull request from Christoph with various fixes

   - Better discard support for loop (Evan)

   - Only call ->commit_rqs() if we have queued IO (Keith)

   - blkcg offlining fixes (Tejun)

   - fix (and fix the fix) for busy partitions"

* tag 'block-5.7-2020-04-10' of git://git.kernel.dk/linux-block:
  block: fix busy device checking in blk_drop_partitions again
  block: fix busy device checking in blk_drop_partitions
  nvmet-rdma: fix double free of rdma queue
  blk-mq: don't commit_rqs() if none were queued
  nvme-fc: Revert "add module to ops template to allow module references"
  nvme: fix deadlock caused by ANA update wrong locking
  nvmet-rdma: fix bonding failover possible NULL deref
  loop: Better discard support for block devices
  loop: Report EOPNOTSUPP properly
  nvmet: fix NULL dereference when removing a referral
  nvme: inherit stable pages constraint in the mpath stack device
  blkcg: don't offline parent blkcg first
  blkcg: rename blkcg->cgwb_refcnt to ->online_pin and always use it
  nvme-tcp: fix possible crash in recv error flow
  nvme-tcp: don't poll a non-live queue
  nvme-tcp: fix possible crash in write_zeroes processing
  nvmet-fc: fix typo in comment
  nvme-rdma: Replace comma with a semicolon
  nvme-fcloop: fix deallocation of working context
  nvme: fix compat address handling in several ioctls
parents 172edde9 cb6b771b
...@@ -883,8 +883,8 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css) ...@@ -883,8 +883,8 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
/* this prevents anyone from attaching or migrating to this blkcg */ /* this prevents anyone from attaching or migrating to this blkcg */
wb_blkcg_offline(blkcg); wb_blkcg_offline(blkcg);
/* put the base cgwb reference allowing step 2 to be triggered */ /* put the base online pin allowing step 2 to be triggered */
blkcg_cgwb_put(blkcg); blkcg_unpin_online(blkcg);
} }
/** /**
...@@ -983,11 +983,11 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) ...@@ -983,11 +983,11 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
} }
spin_lock_init(&blkcg->lock); spin_lock_init(&blkcg->lock);
refcount_set(&blkcg->online_pin, 1);
INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT | __GFP_NOWARN); INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_NOWAIT | __GFP_NOWARN);
INIT_HLIST_HEAD(&blkcg->blkg_list); INIT_HLIST_HEAD(&blkcg->blkg_list);
#ifdef CONFIG_CGROUP_WRITEBACK #ifdef CONFIG_CGROUP_WRITEBACK
INIT_LIST_HEAD(&blkcg->cgwb_list); INIT_LIST_HEAD(&blkcg->cgwb_list);
refcount_set(&blkcg->cgwb_refcnt, 1);
#endif #endif
list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs); list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs);
...@@ -1006,6 +1006,21 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) ...@@ -1006,6 +1006,21 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
return ret; return ret;
} }
static int blkcg_css_online(struct cgroup_subsys_state *css)
{
struct blkcg *blkcg = css_to_blkcg(css);
struct blkcg *parent = blkcg_parent(blkcg);
/*
* blkcg_pin_online() is used to delay blkcg offline so that blkgs
* don't go offline while cgwbs are still active on them. Pin the
* parent so that offline always happens towards the root.
*/
if (parent)
blkcg_pin_online(parent);
return 0;
}
/** /**
* blkcg_init_queue - initialize blkcg part of request queue * blkcg_init_queue - initialize blkcg part of request queue
* @q: request_queue to initialize * @q: request_queue to initialize
...@@ -1199,6 +1214,7 @@ static void blkcg_exit(struct task_struct *tsk) ...@@ -1199,6 +1214,7 @@ static void blkcg_exit(struct task_struct *tsk)
struct cgroup_subsys io_cgrp_subsys = { struct cgroup_subsys io_cgrp_subsys = {
.css_alloc = blkcg_css_alloc, .css_alloc = blkcg_css_alloc,
.css_online = blkcg_css_online,
.css_offline = blkcg_css_offline, .css_offline = blkcg_css_offline,
.css_free = blkcg_css_free, .css_free = blkcg_css_free,
.can_attach = blkcg_can_attach, .can_attach = blkcg_can_attach,
......
...@@ -1289,7 +1289,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, ...@@ -1289,7 +1289,7 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list,
* the driver there was more coming, but that turned out to * the driver there was more coming, but that turned out to
* be a lie. * be a lie.
*/ */
if (q->mq_ops->commit_rqs) if (q->mq_ops->commit_rqs && queued)
q->mq_ops->commit_rqs(hctx); q->mq_ops->commit_rqs(hctx);
spin_lock(&hctx->lock); spin_lock(&hctx->lock);
...@@ -1911,6 +1911,8 @@ blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last) ...@@ -1911,6 +1911,8 @@ blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
struct list_head *list) struct list_head *list)
{ {
int queued = 0;
while (!list_empty(list)) { while (!list_empty(list)) {
blk_status_t ret; blk_status_t ret;
struct request *rq = list_first_entry(list, struct request, struct request *rq = list_first_entry(list, struct request,
...@@ -1926,7 +1928,8 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, ...@@ -1926,7 +1928,8 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
break; break;
} }
blk_mq_end_request(rq, ret); blk_mq_end_request(rq, ret);
} } else
queued++;
} }
/* /*
...@@ -1934,7 +1937,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx, ...@@ -1934,7 +1937,7 @@ void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
* the driver there was more coming, but that turned out to * the driver there was more coming, but that turned out to
* be a lie. * be a lie.
*/ */
if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs) if (!list_empty(list) && hctx->queue->mq_ops->commit_rqs && queued)
hctx->queue->mq_ops->commit_rqs(hctx); hctx->queue->mq_ops->commit_rqs(hctx);
} }
......
...@@ -496,7 +496,7 @@ int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev) ...@@ -496,7 +496,7 @@ int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev)
if (!disk_part_scan_enabled(disk)) if (!disk_part_scan_enabled(disk))
return 0; return 0;
if (bdev->bd_part_count || bdev->bd_super) if (bdev->bd_part_count || bdev->bd_openers > 1)
return -EBUSY; return -EBUSY;
res = invalidate_partition(disk, 0); res = invalidate_partition(disk, 0);
if (res) if (res)
......
...@@ -429,11 +429,12 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos, ...@@ -429,11 +429,12 @@ static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos,
* information. * information.
*/ */
struct file *file = lo->lo_backing_file; struct file *file = lo->lo_backing_file;
struct request_queue *q = lo->lo_queue;
int ret; int ret;
mode |= FALLOC_FL_KEEP_SIZE; mode |= FALLOC_FL_KEEP_SIZE;
if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) { if (!blk_queue_discard(q)) {
ret = -EOPNOTSUPP; ret = -EOPNOTSUPP;
goto out; goto out;
} }
...@@ -463,7 +464,7 @@ static void lo_complete_rq(struct request *rq) ...@@ -463,7 +464,7 @@ static void lo_complete_rq(struct request *rq)
if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) || if (!cmd->use_aio || cmd->ret < 0 || cmd->ret == blk_rq_bytes(rq) ||
req_op(rq) != REQ_OP_READ) { req_op(rq) != REQ_OP_READ) {
if (cmd->ret < 0) if (cmd->ret < 0)
ret = BLK_STS_IOERR; ret = errno_to_blk_status(cmd->ret);
goto end_io; goto end_io;
} }
...@@ -867,28 +868,47 @@ static void loop_config_discard(struct loop_device *lo) ...@@ -867,28 +868,47 @@ static void loop_config_discard(struct loop_device *lo)
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
struct request_queue *q = lo->lo_queue; struct request_queue *q = lo->lo_queue;
/*
* If the backing device is a block device, mirror its zeroing
* capability. Set the discard sectors to the block device's zeroing
* capabilities because loop discards result in blkdev_issue_zeroout(),
* not blkdev_issue_discard(). This maintains consistent behavior with
* file-backed loop devices: discarded regions read back as zero.
*/
if (S_ISBLK(inode->i_mode) && !lo->lo_encrypt_key_size) {
struct request_queue *backingq;
backingq = bdev_get_queue(inode->i_bdev);
blk_queue_max_discard_sectors(q,
backingq->limits.max_write_zeroes_sectors);
blk_queue_max_write_zeroes_sectors(q,
backingq->limits.max_write_zeroes_sectors);
/* /*
* We use punch hole to reclaim the free space used by the * We use punch hole to reclaim the free space used by the
* image a.k.a. discard. However we do not support discard if * image a.k.a. discard. However we do not support discard if
* encryption is enabled, because it may give an attacker * encryption is enabled, because it may give an attacker
* useful information. * useful information.
*/ */
if ((!file->f_op->fallocate) || } else if (!file->f_op->fallocate || lo->lo_encrypt_key_size) {
lo->lo_encrypt_key_size) {
q->limits.discard_granularity = 0; q->limits.discard_granularity = 0;
q->limits.discard_alignment = 0; q->limits.discard_alignment = 0;
blk_queue_max_discard_sectors(q, 0); blk_queue_max_discard_sectors(q, 0);
blk_queue_max_write_zeroes_sectors(q, 0); blk_queue_max_write_zeroes_sectors(q, 0);
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
return;
}
q->limits.discard_granularity = inode->i_sb->s_blocksize; } else {
q->limits.discard_alignment = 0; q->limits.discard_granularity = inode->i_sb->s_blocksize;
q->limits.discard_alignment = 0;
blk_queue_max_discard_sectors(q, UINT_MAX >> 9);
blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9);
}
blk_queue_max_discard_sectors(q, UINT_MAX >> 9); if (q->limits.max_write_zeroes_sectors)
blk_queue_max_write_zeroes_sectors(q, UINT_MAX >> 9); blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); else
blk_queue_flag_clear(QUEUE_FLAG_DISCARD, q);
} }
static void loop_unprepare_queue(struct loop_device *lo) static void loop_unprepare_queue(struct loop_device *lo)
...@@ -1955,7 +1975,10 @@ static void loop_handle_cmd(struct loop_cmd *cmd) ...@@ -1955,7 +1975,10 @@ static void loop_handle_cmd(struct loop_cmd *cmd)
failed: failed:
/* complete non-aio request */ /* complete non-aio request */
if (!cmd->use_aio || ret) { if (!cmd->use_aio || ret) {
cmd->ret = ret ? -EIO : 0; if (ret == -EOPNOTSUPP)
cmd->ret = ret;
else
cmd->ret = ret ? -EIO : 0;
blk_mq_complete_request(rq); blk_mq_complete_request(rq);
} }
} }
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/blk-mq.h> #include <linux/blk-mq.h>
#include <linux/compat.h>
#include <linux/delay.h> #include <linux/delay.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/hdreg.h> #include <linux/hdreg.h>
...@@ -1252,6 +1253,18 @@ static void nvme_enable_aen(struct nvme_ctrl *ctrl) ...@@ -1252,6 +1253,18 @@ static void nvme_enable_aen(struct nvme_ctrl *ctrl)
queue_work(nvme_wq, &ctrl->async_event_work); queue_work(nvme_wq, &ctrl->async_event_work);
} }
/*
* Convert integer values from ioctl structures to user pointers, silently
* ignoring the upper bits in the compat case to match behaviour of 32-bit
* kernels.
*/
static void __user *nvme_to_user_ptr(uintptr_t ptrval)
{
if (in_compat_syscall())
ptrval = (compat_uptr_t)ptrval;
return (void __user *)ptrval;
}
static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
{ {
struct nvme_user_io io; struct nvme_user_io io;
...@@ -1275,7 +1288,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) ...@@ -1275,7 +1288,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
length = (io.nblocks + 1) << ns->lba_shift; length = (io.nblocks + 1) << ns->lba_shift;
meta_len = (io.nblocks + 1) * ns->ms; meta_len = (io.nblocks + 1) * ns->ms;
metadata = (void __user *)(uintptr_t)io.metadata; metadata = nvme_to_user_ptr(io.metadata);
if (ns->ext) { if (ns->ext) {
length += meta_len; length += meta_len;
...@@ -1298,7 +1311,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) ...@@ -1298,7 +1311,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
c.rw.appmask = cpu_to_le16(io.appmask); c.rw.appmask = cpu_to_le16(io.appmask);
return nvme_submit_user_cmd(ns->queue, &c, return nvme_submit_user_cmd(ns->queue, &c,
(void __user *)(uintptr_t)io.addr, length, nvme_to_user_ptr(io.addr), length,
metadata, meta_len, lower_32_bits(io.slba), NULL, 0); metadata, meta_len, lower_32_bits(io.slba), NULL, 0);
} }
...@@ -1418,9 +1431,9 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -1418,9 +1431,9 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
effects = nvme_passthru_start(ctrl, ns, cmd.opcode); effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
(void __user *)(uintptr_t)cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.addr), cmd.data_len,
(void __user *)(uintptr_t)cmd.metadata, nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
cmd.metadata_len, 0, &result, timeout); 0, &result, timeout);
nvme_passthru_end(ctrl, effects); nvme_passthru_end(ctrl, effects);
if (status >= 0) { if (status >= 0) {
...@@ -1465,8 +1478,8 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ...@@ -1465,8 +1478,8 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
effects = nvme_passthru_start(ctrl, ns, cmd.opcode); effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
(void __user *)(uintptr_t)cmd.addr, cmd.data_len, nvme_to_user_ptr(cmd.addr), cmd.data_len,
(void __user *)(uintptr_t)cmd.metadata, cmd.metadata_len, nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
0, &cmd.result, timeout); 0, &cmd.result, timeout);
nvme_passthru_end(ctrl, effects); nvme_passthru_end(ctrl, effects);
...@@ -1884,6 +1897,13 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ...@@ -1884,6 +1897,13 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
if (ns->head->disk) { if (ns->head->disk) {
nvme_update_disk_info(ns->head->disk, ns, id); nvme_update_disk_info(ns->head->disk, ns, id);
blk_queue_stack_limits(ns->head->disk->queue, ns->queue); blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
struct backing_dev_info *info =
ns->head->disk->queue->backing_dev_info;
info->capabilities |= BDI_CAP_STABLE_WRITES;
}
revalidate_disk(ns->head->disk); revalidate_disk(ns->head->disk);
} }
#endif #endif
......
...@@ -342,8 +342,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, ...@@ -342,8 +342,7 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
!template->ls_req || !template->fcp_io || !template->ls_req || !template->fcp_io ||
!template->ls_abort || !template->fcp_abort || !template->ls_abort || !template->fcp_abort ||
!template->max_hw_queues || !template->max_sgl_segments || !template->max_hw_queues || !template->max_sgl_segments ||
!template->max_dif_sgl_segments || !template->dma_boundary || !template->max_dif_sgl_segments || !template->dma_boundary) {
!template->module) {
ret = -EINVAL; ret = -EINVAL;
goto out_reghost_failed; goto out_reghost_failed;
} }
...@@ -2016,7 +2015,6 @@ nvme_fc_ctrl_free(struct kref *ref) ...@@ -2016,7 +2015,6 @@ nvme_fc_ctrl_free(struct kref *ref)
{ {
struct nvme_fc_ctrl *ctrl = struct nvme_fc_ctrl *ctrl =
container_of(ref, struct nvme_fc_ctrl, ref); container_of(ref, struct nvme_fc_ctrl, ref);
struct nvme_fc_lport *lport = ctrl->lport;
unsigned long flags; unsigned long flags;
if (ctrl->ctrl.tagset) { if (ctrl->ctrl.tagset) {
...@@ -2043,7 +2041,6 @@ nvme_fc_ctrl_free(struct kref *ref) ...@@ -2043,7 +2041,6 @@ nvme_fc_ctrl_free(struct kref *ref)
if (ctrl->ctrl.opts) if (ctrl->ctrl.opts)
nvmf_free_options(ctrl->ctrl.opts); nvmf_free_options(ctrl->ctrl.opts);
kfree(ctrl); kfree(ctrl);
module_put(lport->ops->module);
} }
static void static void
...@@ -3074,15 +3071,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ...@@ -3074,15 +3071,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
goto out_fail; goto out_fail;
} }
if (!try_module_get(lport->ops->module)) {
ret = -EUNATCH;
goto out_free_ctrl;
}
idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL);
if (idx < 0) { if (idx < 0) {
ret = -ENOSPC; ret = -ENOSPC;
goto out_mod_put; goto out_free_ctrl;
} }
ctrl->ctrl.opts = opts; ctrl->ctrl.opts = opts;
...@@ -3232,8 +3224,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ...@@ -3232,8 +3224,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
out_free_ida: out_free_ida:
put_device(ctrl->dev); put_device(ctrl->dev);
ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum);
out_mod_put:
module_put(lport->ops->module);
out_free_ctrl: out_free_ctrl:
kfree(ctrl); kfree(ctrl);
out_fail: out_fail:
......
...@@ -510,7 +510,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, ...@@ -510,7 +510,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
if (!nr_nsids) if (!nr_nsids)
return 0; return 0;
down_write(&ctrl->namespaces_rwsem); down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list) { list_for_each_entry(ns, &ctrl->namespaces, list) {
unsigned nsid = le32_to_cpu(desc->nsids[n]); unsigned nsid = le32_to_cpu(desc->nsids[n]);
...@@ -521,7 +521,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, ...@@ -521,7 +521,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
if (++n == nr_nsids) if (++n == nr_nsids)
break; break;
} }
up_write(&ctrl->namespaces_rwsem); up_read(&ctrl->namespaces_rwsem);
return 0; return 0;
} }
......
...@@ -1342,7 +1342,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, ...@@ -1342,7 +1342,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
int ret; int ret;
sge->addr = qe->dma; sge->addr = qe->dma;
sge->length = sizeof(struct nvme_command), sge->length = sizeof(struct nvme_command);
sge->lkey = queue->device->pd->local_dma_lkey; sge->lkey = queue->device->pd->local_dma_lkey;
wr.next = NULL; wr.next = NULL;
......
...@@ -174,16 +174,14 @@ static inline bool nvme_tcp_async_req(struct nvme_tcp_request *req) ...@@ -174,16 +174,14 @@ static inline bool nvme_tcp_async_req(struct nvme_tcp_request *req)
static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req) static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req)
{ {
struct request *rq; struct request *rq;
unsigned int bytes;
if (unlikely(nvme_tcp_async_req(req))) if (unlikely(nvme_tcp_async_req(req)))
return false; /* async events don't have a request */ return false; /* async events don't have a request */
rq = blk_mq_rq_from_pdu(req); rq = blk_mq_rq_from_pdu(req);
bytes = blk_rq_payload_bytes(rq);
return rq_data_dir(rq) == WRITE && bytes && return rq_data_dir(rq) == WRITE && req->data_len &&
bytes <= nvme_tcp_inline_data_size(req->queue); req->data_len <= nvme_tcp_inline_data_size(req->queue);
} }
static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req) static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req)
...@@ -1075,7 +1073,7 @@ static void nvme_tcp_io_work(struct work_struct *w) ...@@ -1075,7 +1073,7 @@ static void nvme_tcp_io_work(struct work_struct *w)
if (result > 0) if (result > 0)
pending = true; pending = true;
else if (unlikely(result < 0)) else if (unlikely(result < 0))
break; return;
if (!pending) if (!pending)
return; return;
...@@ -2164,7 +2162,9 @@ static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue, ...@@ -2164,7 +2162,9 @@ static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue,
c->common.flags |= NVME_CMD_SGL_METABUF; c->common.flags |= NVME_CMD_SGL_METABUF;
if (rq_data_dir(rq) == WRITE && req->data_len && if (!blk_rq_nr_phys_segments(rq))
nvme_tcp_set_sg_null(c);
else if (rq_data_dir(rq) == WRITE &&
req->data_len <= nvme_tcp_inline_data_size(queue)) req->data_len <= nvme_tcp_inline_data_size(queue))
nvme_tcp_set_sg_inline(queue, c, req->data_len); nvme_tcp_set_sg_inline(queue, c, req->data_len);
else else
...@@ -2191,7 +2191,8 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, ...@@ -2191,7 +2191,8 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
req->data_sent = 0; req->data_sent = 0;
req->pdu_len = 0; req->pdu_len = 0;
req->pdu_sent = 0; req->pdu_sent = 0;
req->data_len = blk_rq_payload_bytes(rq); req->data_len = blk_rq_nr_phys_segments(rq) ?
blk_rq_payload_bytes(rq) : 0;
req->curr_bio = rq->bio; req->curr_bio = rq->bio;
if (rq_data_dir(rq) == WRITE && if (rq_data_dir(rq) == WRITE &&
...@@ -2298,6 +2299,9 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx) ...@@ -2298,6 +2299,9 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
struct nvme_tcp_queue *queue = hctx->driver_data; struct nvme_tcp_queue *queue = hctx->driver_data;
struct sock *sk = queue->sock->sk; struct sock *sk = queue->sock->sk;
if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
return 0;
if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
sk_busy_loop(sk, true); sk_busy_loop(sk, true);
nvme_tcp_try_recv(queue); nvme_tcp_try_recv(queue);
......
...@@ -1098,12 +1098,19 @@ static struct configfs_attribute *nvmet_referral_attrs[] = { ...@@ -1098,12 +1098,19 @@ static struct configfs_attribute *nvmet_referral_attrs[] = {
NULL, NULL,
}; };
static void nvmet_referral_release(struct config_item *item) static void nvmet_referral_notify(struct config_group *group,
struct config_item *item)
{ {
struct nvmet_port *parent = to_nvmet_port(item->ci_parent->ci_parent); struct nvmet_port *parent = to_nvmet_port(item->ci_parent->ci_parent);
struct nvmet_port *port = to_nvmet_port(item); struct nvmet_port *port = to_nvmet_port(item);
nvmet_referral_disable(parent, port); nvmet_referral_disable(parent, port);
}
static void nvmet_referral_release(struct config_item *item)
{
struct nvmet_port *port = to_nvmet_port(item);
kfree(port); kfree(port);
} }
...@@ -1134,6 +1141,7 @@ static struct config_group *nvmet_referral_make( ...@@ -1134,6 +1141,7 @@ static struct config_group *nvmet_referral_make(
static struct configfs_group_operations nvmet_referral_group_ops = { static struct configfs_group_operations nvmet_referral_group_ops = {
.make_group = nvmet_referral_make, .make_group = nvmet_referral_make,
.disconnect_notify = nvmet_referral_notify,
}; };
static const struct config_item_type nvmet_referrals_type = { static const struct config_item_type nvmet_referrals_type = {
......
...@@ -684,7 +684,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) ...@@ -684,7 +684,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
disconnect = atomic_xchg(&queue->connected, 0); disconnect = atomic_xchg(&queue->connected, 0);
spin_lock_irqsave(&queue->qlock, flags); spin_lock_irqsave(&queue->qlock, flags);
/* about outstanding io's */ /* abort outstanding io's */
for (i = 0; i < queue->sqsize; fod++, i++) { for (i = 0; i < queue->sqsize; fod++, i++) {
if (fod->active) { if (fod->active) {
spin_lock(&fod->flock); spin_lock(&fod->flock);
......
...@@ -198,10 +198,13 @@ struct fcloop_lport_priv { ...@@ -198,10 +198,13 @@ struct fcloop_lport_priv {
}; };
struct fcloop_rport { struct fcloop_rport {
struct nvme_fc_remote_port *remoteport; struct nvme_fc_remote_port *remoteport;
struct nvmet_fc_target_port *targetport; struct nvmet_fc_target_port *targetport;
struct fcloop_nport *nport; struct fcloop_nport *nport;
struct fcloop_lport *lport; struct fcloop_lport *lport;
spinlock_t lock;
struct list_head ls_list;
struct work_struct ls_work;
}; };
struct fcloop_tport { struct fcloop_tport {
...@@ -224,11 +227,10 @@ struct fcloop_nport { ...@@ -224,11 +227,10 @@ struct fcloop_nport {
}; };
struct fcloop_lsreq { struct fcloop_lsreq {
struct fcloop_tport *tport;
struct nvmefc_ls_req *lsreq; struct nvmefc_ls_req *lsreq;
struct work_struct work;
struct nvmefc_tgt_ls_req tgt_ls_req; struct nvmefc_tgt_ls_req tgt_ls_req;
int status; int status;
struct list_head ls_list; /* fcloop_rport->ls_list */
}; };
struct fcloop_rscn { struct fcloop_rscn {
...@@ -292,21 +294,32 @@ fcloop_delete_queue(struct nvme_fc_local_port *localport, ...@@ -292,21 +294,32 @@ fcloop_delete_queue(struct nvme_fc_local_port *localport,
{ {
} }
/*
* Transmit of LS RSP done (e.g. buffers all set). call back up
* initiator "done" flows.
*/
static void static void
fcloop_tgt_lsrqst_done_work(struct work_struct *work) fcloop_rport_lsrqst_work(struct work_struct *work)
{ {
struct fcloop_lsreq *tls_req = struct fcloop_rport *rport =
container_of(work, struct fcloop_lsreq, work); container_of(work, struct fcloop_rport, ls_work);
struct fcloop_tport *tport = tls_req->tport; struct fcloop_lsreq *tls_req;
struct nvmefc_ls_req *lsreq = tls_req->lsreq;
if (!tport || tport->remoteport) spin_lock(&rport->lock);
lsreq->done(lsreq, tls_req->status); for (;;) {
tls_req = list_first_entry_or_null(&rport->ls_list,
struct fcloop_lsreq, ls_list);
if (!tls_req)
break;
list_del(&tls_req->ls_list);
spin_unlock(&rport->lock);
tls_req->lsreq->done(tls_req->lsreq, tls_req->status);
/*
* callee may free memory containing tls_req.
* do not reference lsreq after this.
*/
spin_lock(&rport->lock);
}
spin_unlock(&rport->lock);
} }
static int static int
...@@ -319,17 +332,18 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, ...@@ -319,17 +332,18 @@ fcloop_ls_req(struct nvme_fc_local_port *localport,
int ret = 0; int ret = 0;
tls_req->lsreq = lsreq; tls_req->lsreq = lsreq;
INIT_WORK(&tls_req->work, fcloop_tgt_lsrqst_done_work); INIT_LIST_HEAD(&tls_req->ls_list);
if (!rport->targetport) { if (!rport->targetport) {
tls_req->status = -ECONNREFUSED; tls_req->status = -ECONNREFUSED;
tls_req->tport = NULL; spin_lock(&rport->lock);
schedule_work(&tls_req->work); list_add_tail(&rport->ls_list, &tls_req->ls_list);
spin_unlock(&rport->lock);
schedule_work(&rport->ls_work);
return ret; return ret;
} }
tls_req->status = 0; tls_req->status = 0;
tls_req->tport = rport->targetport->private;
ret = nvmet_fc_rcv_ls_req(rport->targetport, &tls_req->tgt_ls_req, ret = nvmet_fc_rcv_ls_req(rport->targetport, &tls_req->tgt_ls_req,
lsreq->rqstaddr, lsreq->rqstlen); lsreq->rqstaddr, lsreq->rqstlen);
...@@ -337,18 +351,28 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, ...@@ -337,18 +351,28 @@ fcloop_ls_req(struct nvme_fc_local_port *localport,
} }
static int static int
fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport, fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *targetport,
struct nvmefc_tgt_ls_req *tgt_lsreq) struct nvmefc_tgt_ls_req *tgt_lsreq)
{ {
struct fcloop_lsreq *tls_req = tgt_ls_req_to_lsreq(tgt_lsreq); struct fcloop_lsreq *tls_req = tgt_ls_req_to_lsreq(tgt_lsreq);
struct nvmefc_ls_req *lsreq = tls_req->lsreq; struct nvmefc_ls_req *lsreq = tls_req->lsreq;
struct fcloop_tport *tport = targetport->private;
struct nvme_fc_remote_port *remoteport = tport->remoteport;
struct fcloop_rport *rport;
memcpy(lsreq->rspaddr, tgt_lsreq->rspbuf, memcpy(lsreq->rspaddr, tgt_lsreq->rspbuf,
((lsreq->rsplen < tgt_lsreq->rsplen) ? ((lsreq->rsplen < tgt_lsreq->rsplen) ?
lsreq->rsplen : tgt_lsreq->rsplen)); lsreq->rsplen : tgt_lsreq->rsplen));
tgt_lsreq->done(tgt_lsreq); tgt_lsreq->done(tgt_lsreq);
schedule_work(&tls_req->work); if (remoteport) {
rport = remoteport->private;
spin_lock(&rport->lock);
list_add_tail(&rport->ls_list, &tls_req->ls_list);
spin_unlock(&rport->lock);
schedule_work(&rport->ls_work);
}
return 0; return 0;
} }
...@@ -834,6 +858,7 @@ fcloop_remoteport_delete(struct nvme_fc_remote_port *remoteport) ...@@ -834,6 +858,7 @@ fcloop_remoteport_delete(struct nvme_fc_remote_port *remoteport)
{ {
struct fcloop_rport *rport = remoteport->private; struct fcloop_rport *rport = remoteport->private;
flush_work(&rport->ls_work);
fcloop_nport_put(rport->nport); fcloop_nport_put(rport->nport);
} }
...@@ -850,7 +875,6 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) ...@@ -850,7 +875,6 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport)
#define FCLOOP_DMABOUND_4G 0xFFFFFFFF #define FCLOOP_DMABOUND_4G 0xFFFFFFFF
static struct nvme_fc_port_template fctemplate = { static struct nvme_fc_port_template fctemplate = {
.module = THIS_MODULE,
.localport_delete = fcloop_localport_delete, .localport_delete = fcloop_localport_delete,
.remoteport_delete = fcloop_remoteport_delete, .remoteport_delete = fcloop_remoteport_delete,
.create_queue = fcloop_create_queue, .create_queue = fcloop_create_queue,
...@@ -1136,6 +1160,9 @@ fcloop_create_remote_port(struct device *dev, struct device_attribute *attr, ...@@ -1136,6 +1160,9 @@ fcloop_create_remote_port(struct device *dev, struct device_attribute *attr,
rport->nport = nport; rport->nport = nport;
rport->lport = nport->lport; rport->lport = nport->lport;
nport->rport = rport; nport->rport = rport;
spin_lock_init(&rport->lock);
INIT_WORK(&rport->ls_work, fcloop_rport_lsrqst_work);
INIT_LIST_HEAD(&rport->ls_list);
return count; return count;
} }
......
...@@ -78,6 +78,7 @@ enum nvmet_rdma_queue_state { ...@@ -78,6 +78,7 @@ enum nvmet_rdma_queue_state {
struct nvmet_rdma_queue { struct nvmet_rdma_queue {
struct rdma_cm_id *cm_id; struct rdma_cm_id *cm_id;
struct ib_qp *qp;
struct nvmet_port *port; struct nvmet_port *port;
struct ib_cq *cq; struct ib_cq *cq;
atomic_t sq_wr_avail; atomic_t sq_wr_avail;
...@@ -105,6 +106,13 @@ struct nvmet_rdma_queue { ...@@ -105,6 +106,13 @@ struct nvmet_rdma_queue {
struct list_head queue_list; struct list_head queue_list;
}; };
struct nvmet_rdma_port {
struct nvmet_port *nport;
struct sockaddr_storage addr;
struct rdma_cm_id *cm_id;
struct delayed_work repair_work;
};
struct nvmet_rdma_device { struct nvmet_rdma_device {
struct ib_device *device; struct ib_device *device;
struct ib_pd *pd; struct ib_pd *pd;
...@@ -461,7 +469,7 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, ...@@ -461,7 +469,7 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
if (ndev->srq) if (ndev->srq)
ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL); ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL);
else else
ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, NULL); ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL);
if (unlikely(ret)) if (unlikely(ret))
pr_err("post_recv cmd failed\n"); pr_err("post_recv cmd failed\n");
...@@ -500,7 +508,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) ...@@ -500,7 +508,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail); atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);
if (rsp->n_rdma) { if (rsp->n_rdma) {
rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp, rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
queue->cm_id->port_num, rsp->req.sg, queue->cm_id->port_num, rsp->req.sg,
rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
} }
...@@ -584,7 +592,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) ...@@ -584,7 +592,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
WARN_ON(rsp->n_rdma <= 0); WARN_ON(rsp->n_rdma <= 0);
atomic_add(rsp->n_rdma, &queue->sq_wr_avail); atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
rdma_rw_ctx_destroy(&rsp->rw, queue->cm_id->qp, rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
queue->cm_id->port_num, rsp->req.sg, queue->cm_id->port_num, rsp->req.sg,
rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
rsp->n_rdma = 0; rsp->n_rdma = 0;
...@@ -739,7 +747,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp) ...@@ -739,7 +747,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp)
} }
if (nvmet_rdma_need_data_in(rsp)) { if (nvmet_rdma_need_data_in(rsp)) {
if (rdma_rw_ctx_post(&rsp->rw, queue->cm_id->qp, if (rdma_rw_ctx_post(&rsp->rw, queue->qp,
queue->cm_id->port_num, &rsp->read_cqe, NULL)) queue->cm_id->port_num, &rsp->read_cqe, NULL))
nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR); nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR);
} else { } else {
...@@ -911,7 +919,8 @@ static void nvmet_rdma_free_dev(struct kref *ref) ...@@ -911,7 +919,8 @@ static void nvmet_rdma_free_dev(struct kref *ref)
static struct nvmet_rdma_device * static struct nvmet_rdma_device *
nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
{ {
struct nvmet_port *port = cm_id->context; struct nvmet_rdma_port *port = cm_id->context;
struct nvmet_port *nport = port->nport;
struct nvmet_rdma_device *ndev; struct nvmet_rdma_device *ndev;
int inline_page_count; int inline_page_count;
int inline_sge_count; int inline_sge_count;
...@@ -928,17 +937,17 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) ...@@ -928,17 +937,17 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
if (!ndev) if (!ndev)
goto out_err; goto out_err;
inline_page_count = num_pages(port->inline_data_size); inline_page_count = num_pages(nport->inline_data_size);
inline_sge_count = max(cm_id->device->attrs.max_sge_rd, inline_sge_count = max(cm_id->device->attrs.max_sge_rd,
cm_id->device->attrs.max_recv_sge) - 1; cm_id->device->attrs.max_recv_sge) - 1;
if (inline_page_count > inline_sge_count) { if (inline_page_count > inline_sge_count) {
pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n", pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n",
port->inline_data_size, cm_id->device->name, nport->inline_data_size, cm_id->device->name,
inline_sge_count * PAGE_SIZE); inline_sge_count * PAGE_SIZE);
port->inline_data_size = inline_sge_count * PAGE_SIZE; nport->inline_data_size = inline_sge_count * PAGE_SIZE;
inline_page_count = inline_sge_count; inline_page_count = inline_sge_count;
} }
ndev->inline_data_size = port->inline_data_size; ndev->inline_data_size = nport->inline_data_size;
ndev->inline_page_count = inline_page_count; ndev->inline_page_count = inline_page_count;
ndev->device = cm_id->device; ndev->device = cm_id->device;
kref_init(&ndev->ref); kref_init(&ndev->ref);
...@@ -1024,6 +1033,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) ...@@ -1024,6 +1033,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
pr_err("failed to create_qp ret= %d\n", ret); pr_err("failed to create_qp ret= %d\n", ret);
goto err_destroy_cq; goto err_destroy_cq;
} }
queue->qp = queue->cm_id->qp;
atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr); atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr);
...@@ -1052,11 +1062,10 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) ...@@ -1052,11 +1062,10 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue) static void nvmet_rdma_destroy_queue_ib(struct nvmet_rdma_queue *queue)
{ {
struct ib_qp *qp = queue->cm_id->qp; ib_drain_qp(queue->qp);
if (queue->cm_id)
ib_drain_qp(qp); rdma_destroy_id(queue->cm_id);
rdma_destroy_id(queue->cm_id); ib_destroy_qp(queue->qp);
ib_destroy_qp(qp);
ib_free_cq(queue->cq); ib_free_cq(queue->cq);
} }
...@@ -1266,6 +1275,7 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id, ...@@ -1266,6 +1275,7 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id,
static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event) struct rdma_cm_event *event)
{ {
struct nvmet_rdma_port *port = cm_id->context;
struct nvmet_rdma_device *ndev; struct nvmet_rdma_device *ndev;
struct nvmet_rdma_queue *queue; struct nvmet_rdma_queue *queue;
int ret = -EINVAL; int ret = -EINVAL;
...@@ -1281,7 +1291,7 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ...@@ -1281,7 +1291,7 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
ret = -ENOMEM; ret = -ENOMEM;
goto put_device; goto put_device;
} }
queue->port = cm_id->context; queue->port = port->nport;
if (queue->host_qid == 0) { if (queue->host_qid == 0) {
/* Let inflight controller teardown complete */ /* Let inflight controller teardown complete */
...@@ -1290,9 +1300,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ...@@ -1290,9 +1300,12 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
if (ret) { if (ret) {
schedule_work(&queue->release_work); /*
/* Destroying rdma_cm id is not needed here */ * Don't destroy the cm_id in free path, as we implicitly
return 0; * destroy the cm_id here with non-zero ret code.
*/
queue->cm_id = NULL;
goto free_queue;
} }
mutex_lock(&nvmet_rdma_queue_mutex); mutex_lock(&nvmet_rdma_queue_mutex);
...@@ -1301,6 +1314,8 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ...@@ -1301,6 +1314,8 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
return 0; return 0;
free_queue:
nvmet_rdma_free_queue(queue);
put_device: put_device:
kref_put(&ndev->ref, nvmet_rdma_free_dev); kref_put(&ndev->ref, nvmet_rdma_free_dev);
...@@ -1406,7 +1421,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, ...@@ -1406,7 +1421,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id,
static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
struct nvmet_rdma_queue *queue) struct nvmet_rdma_queue *queue)
{ {
struct nvmet_port *port; struct nvmet_rdma_port *port;
if (queue) { if (queue) {
/* /*
...@@ -1425,7 +1440,7 @@ static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id, ...@@ -1425,7 +1440,7 @@ static int nvmet_rdma_device_removal(struct rdma_cm_id *cm_id,
* cm_id destroy. use atomic xchg to make sure * cm_id destroy. use atomic xchg to make sure
* we don't compete with remove_port. * we don't compete with remove_port.
*/ */
if (xchg(&port->priv, NULL) != cm_id) if (xchg(&port->cm_id, NULL) != cm_id)
return 0; return 0;
/* /*
...@@ -1456,6 +1471,13 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, ...@@ -1456,6 +1471,13 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id,
nvmet_rdma_queue_established(queue); nvmet_rdma_queue_established(queue);
break; break;
case RDMA_CM_EVENT_ADDR_CHANGE: case RDMA_CM_EVENT_ADDR_CHANGE:
if (!queue) {
struct nvmet_rdma_port *port = cm_id->context;
schedule_delayed_work(&port->repair_work, 0);
break;
}
/* FALLTHROUGH */
case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_DISCONNECTED:
case RDMA_CM_EVENT_TIMEWAIT_EXIT: case RDMA_CM_EVENT_TIMEWAIT_EXIT:
nvmet_rdma_queue_disconnect(queue); nvmet_rdma_queue_disconnect(queue);
...@@ -1498,42 +1520,19 @@ static void nvmet_rdma_delete_ctrl(struct nvmet_ctrl *ctrl) ...@@ -1498,42 +1520,19 @@ static void nvmet_rdma_delete_ctrl(struct nvmet_ctrl *ctrl)
mutex_unlock(&nvmet_rdma_queue_mutex); mutex_unlock(&nvmet_rdma_queue_mutex);
} }
static int nvmet_rdma_add_port(struct nvmet_port *port) static void nvmet_rdma_disable_port(struct nvmet_rdma_port *port)
{ {
struct rdma_cm_id *cm_id; struct rdma_cm_id *cm_id = xchg(&port->cm_id, NULL);
struct sockaddr_storage addr = { };
__kernel_sa_family_t af;
int ret;
switch (port->disc_addr.adrfam) { if (cm_id)
case NVMF_ADDR_FAMILY_IP4: rdma_destroy_id(cm_id);
af = AF_INET; }
break;
case NVMF_ADDR_FAMILY_IP6:
af = AF_INET6;
break;
default:
pr_err("address family %d not supported\n",
port->disc_addr.adrfam);
return -EINVAL;
}
if (port->inline_data_size < 0) {
port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE;
} else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) {
pr_warn("inline_data_size %u is too large, reducing to %u\n",
port->inline_data_size,
NVMET_RDMA_MAX_INLINE_DATA_SIZE);
port->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE;
}
ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr, static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port)
port->disc_addr.trsvcid, &addr); {
if (ret) { struct sockaddr *addr = (struct sockaddr *)&port->addr;
pr_err("malformed ip/port passed: %s:%s\n", struct rdma_cm_id *cm_id;
port->disc_addr.traddr, port->disc_addr.trsvcid); int ret;
return ret;
}
cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port, cm_id = rdma_create_id(&init_net, nvmet_rdma_cm_handler, port,
RDMA_PS_TCP, IB_QPT_RC); RDMA_PS_TCP, IB_QPT_RC);
...@@ -1552,23 +1551,19 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) ...@@ -1552,23 +1551,19 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
goto out_destroy_id; goto out_destroy_id;
} }
ret = rdma_bind_addr(cm_id, (struct sockaddr *)&addr); ret = rdma_bind_addr(cm_id, addr);
if (ret) { if (ret) {
pr_err("binding CM ID to %pISpcs failed (%d)\n", pr_err("binding CM ID to %pISpcs failed (%d)\n", addr, ret);
(struct sockaddr *)&addr, ret);
goto out_destroy_id; goto out_destroy_id;
} }
ret = rdma_listen(cm_id, 128); ret = rdma_listen(cm_id, 128);
if (ret) { if (ret) {
pr_err("listening to %pISpcs failed (%d)\n", pr_err("listening to %pISpcs failed (%d)\n", addr, ret);
(struct sockaddr *)&addr, ret);
goto out_destroy_id; goto out_destroy_id;
} }
pr_info("enabling port %d (%pISpcs)\n", port->cm_id = cm_id;
le16_to_cpu(port->disc_addr.portid), (struct sockaddr *)&addr);
port->priv = cm_id;
return 0; return 0;
out_destroy_id: out_destroy_id:
...@@ -1576,18 +1571,92 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) ...@@ -1576,18 +1571,92 @@ static int nvmet_rdma_add_port(struct nvmet_port *port)
return ret; return ret;
} }
static void nvmet_rdma_remove_port(struct nvmet_port *port) static void nvmet_rdma_repair_port_work(struct work_struct *w)
{ {
struct rdma_cm_id *cm_id = xchg(&port->priv, NULL); struct nvmet_rdma_port *port = container_of(to_delayed_work(w),
struct nvmet_rdma_port, repair_work);
int ret;
if (cm_id) nvmet_rdma_disable_port(port);
rdma_destroy_id(cm_id); ret = nvmet_rdma_enable_port(port);
if (ret)
schedule_delayed_work(&port->repair_work, 5 * HZ);
}
static int nvmet_rdma_add_port(struct nvmet_port *nport)
{
struct nvmet_rdma_port *port;
__kernel_sa_family_t af;
int ret;
port = kzalloc(sizeof(*port), GFP_KERNEL);
if (!port)
return -ENOMEM;
nport->priv = port;
port->nport = nport;
INIT_DELAYED_WORK(&port->repair_work, nvmet_rdma_repair_port_work);
switch (nport->disc_addr.adrfam) {
case NVMF_ADDR_FAMILY_IP4:
af = AF_INET;
break;
case NVMF_ADDR_FAMILY_IP6:
af = AF_INET6;
break;
default:
pr_err("address family %d not supported\n",
nport->disc_addr.adrfam);
ret = -EINVAL;
goto out_free_port;
}
if (nport->inline_data_size < 0) {
nport->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE;
} else if (nport->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) {
pr_warn("inline_data_size %u is too large, reducing to %u\n",
nport->inline_data_size,
NVMET_RDMA_MAX_INLINE_DATA_SIZE);
nport->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE;
}
ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr,
nport->disc_addr.trsvcid, &port->addr);
if (ret) {
pr_err("malformed ip/port passed: %s:%s\n",
nport->disc_addr.traddr, nport->disc_addr.trsvcid);
goto out_free_port;
}
ret = nvmet_rdma_enable_port(port);
if (ret)
goto out_free_port;
pr_info("enabling port %d (%pISpcs)\n",
le16_to_cpu(nport->disc_addr.portid),
(struct sockaddr *)&port->addr);
return 0;
out_free_port:
kfree(port);
return ret;
}
static void nvmet_rdma_remove_port(struct nvmet_port *nport)
{
struct nvmet_rdma_port *port = nport->priv;
cancel_delayed_work_sync(&port->repair_work);
nvmet_rdma_disable_port(port);
kfree(port);
} }
static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
struct nvmet_port *port, char *traddr) struct nvmet_port *nport, char *traddr)
{ {
struct rdma_cm_id *cm_id = port->priv; struct nvmet_rdma_port *port = nport->priv;
struct rdma_cm_id *cm_id = port->cm_id;
if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) { if (inet_addr_is_any((struct sockaddr *)&cm_id->route.addr.src_addr)) {
struct nvmet_rdma_rsp *rsp = struct nvmet_rdma_rsp *rsp =
...@@ -1597,7 +1666,7 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, ...@@ -1597,7 +1666,7 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
sprintf(traddr, "%pISc", addr); sprintf(traddr, "%pISc", addr);
} else { } else {
memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE); memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE);
} }
} }
......
...@@ -1985,8 +1985,6 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, ...@@ -1985,8 +1985,6 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport,
/* Declare and initialization an instance of the FC NVME template. */ /* Declare and initialization an instance of the FC NVME template. */
static struct nvme_fc_port_template lpfc_nvme_template = { static struct nvme_fc_port_template lpfc_nvme_template = {
.module = THIS_MODULE,
/* initiator-based functions */ /* initiator-based functions */
.localport_delete = lpfc_nvme_localport_delete, .localport_delete = lpfc_nvme_localport_delete,
.remoteport_delete = lpfc_nvme_remoteport_delete, .remoteport_delete = lpfc_nvme_remoteport_delete,
......
...@@ -610,7 +610,6 @@ static void qla_nvme_remoteport_delete(struct nvme_fc_remote_port *rport) ...@@ -610,7 +610,6 @@ static void qla_nvme_remoteport_delete(struct nvme_fc_remote_port *rport)
} }
static struct nvme_fc_port_template qla_nvme_fc_transport = { static struct nvme_fc_port_template qla_nvme_fc_transport = {
.module = THIS_MODULE,
.localport_delete = qla_nvme_localport_delete, .localport_delete = qla_nvme_localport_delete,
.remoteport_delete = qla_nvme_remoteport_delete, .remoteport_delete = qla_nvme_remoteport_delete,
.create_queue = qla_nvme_alloc_queue, .create_queue = qla_nvme_alloc_queue,
......
...@@ -46,6 +46,7 @@ struct blkcg_gq; ...@@ -46,6 +46,7 @@ struct blkcg_gq;
struct blkcg { struct blkcg {
struct cgroup_subsys_state css; struct cgroup_subsys_state css;
spinlock_t lock; spinlock_t lock;
refcount_t online_pin;
struct radix_tree_root blkg_tree; struct radix_tree_root blkg_tree;
struct blkcg_gq __rcu *blkg_hint; struct blkcg_gq __rcu *blkg_hint;
...@@ -56,7 +57,6 @@ struct blkcg { ...@@ -56,7 +57,6 @@ struct blkcg {
struct list_head all_blkcgs_node; struct list_head all_blkcgs_node;
#ifdef CONFIG_CGROUP_WRITEBACK #ifdef CONFIG_CGROUP_WRITEBACK
struct list_head cgwb_list; struct list_head cgwb_list;
refcount_t cgwb_refcnt;
#endif #endif
}; };
...@@ -412,47 +412,38 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) ...@@ -412,47 +412,38 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
extern void blkcg_destroy_blkgs(struct blkcg *blkcg); extern void blkcg_destroy_blkgs(struct blkcg *blkcg);
#ifdef CONFIG_CGROUP_WRITEBACK
/** /**
* blkcg_cgwb_get - get a reference for blkcg->cgwb_list * blkcg_pin_online - pin online state
* @blkcg: blkcg of interest * @blkcg: blkcg of interest
* *
* This is used to track the number of active wb's related to a blkcg. * While pinned, a blkcg is kept online. This is primarily used to
* impedance-match blkg and cgwb lifetimes so that blkg doesn't go offline
* while an associated cgwb is still active.
*/ */
static inline void blkcg_cgwb_get(struct blkcg *blkcg) static inline void blkcg_pin_online(struct blkcg *blkcg)
{ {
refcount_inc(&blkcg->cgwb_refcnt); refcount_inc(&blkcg->online_pin);
} }
/** /**
* blkcg_cgwb_put - put a reference for @blkcg->cgwb_list * blkcg_unpin_online - unpin online state
* @blkcg: blkcg of interest * @blkcg: blkcg of interest
* *
* This is used to track the number of active wb's related to a blkcg. * This is primarily used to impedance-match blkg and cgwb lifetimes so
* When this count goes to zero, all active wb has finished so the * that blkg doesn't go offline while an associated cgwb is still active.
* When this count goes to zero, all active cgwbs have finished so the
* blkcg can continue destruction by calling blkcg_destroy_blkgs(). * blkcg can continue destruction by calling blkcg_destroy_blkgs().
* This work may occur in cgwb_release_workfn() on the cgwb_release
* workqueue.
*/ */
static inline void blkcg_cgwb_put(struct blkcg *blkcg) static inline void blkcg_unpin_online(struct blkcg *blkcg)
{ {
if (refcount_dec_and_test(&blkcg->cgwb_refcnt)) do {
if (!refcount_dec_and_test(&blkcg->online_pin))
break;
blkcg_destroy_blkgs(blkcg); blkcg_destroy_blkgs(blkcg);
blkcg = blkcg_parent(blkcg);
} while (blkcg);
} }
#else
static inline void blkcg_cgwb_get(struct blkcg *blkcg) { }
static inline void blkcg_cgwb_put(struct blkcg *blkcg)
{
/* wb isn't being accounted, so trigger destruction right away */
blkcg_destroy_blkgs(blkcg);
}
#endif
/** /**
* blkg_path - format cgroup path of blkg * blkg_path - format cgroup path of blkg
* @blkg: blkg of interest * @blkg: blkg of interest
......
...@@ -270,8 +270,6 @@ struct nvme_fc_remote_port { ...@@ -270,8 +270,6 @@ struct nvme_fc_remote_port {
* *
* Host/Initiator Transport Entrypoints/Parameters: * Host/Initiator Transport Entrypoints/Parameters:
* *
* @module: The LLDD module using the interface
*
* @localport_delete: The LLDD initiates deletion of a localport via * @localport_delete: The LLDD initiates deletion of a localport via
* nvme_fc_deregister_localport(). However, the teardown is * nvme_fc_deregister_localport(). However, the teardown is
* asynchronous. This routine is called upon the completion of the * asynchronous. This routine is called upon the completion of the
...@@ -385,8 +383,6 @@ struct nvme_fc_remote_port { ...@@ -385,8 +383,6 @@ struct nvme_fc_remote_port {
* Value is Mandatory. Allowed to be zero. * Value is Mandatory. Allowed to be zero.
*/ */
struct nvme_fc_port_template { struct nvme_fc_port_template {
struct module *module;
/* initiator-based functions */ /* initiator-based functions */
void (*localport_delete)(struct nvme_fc_local_port *); void (*localport_delete)(struct nvme_fc_local_port *);
void (*remoteport_delete)(struct nvme_fc_remote_port *); void (*remoteport_delete)(struct nvme_fc_remote_port *);
......
...@@ -491,8 +491,8 @@ static void cgwb_release_workfn(struct work_struct *work) ...@@ -491,8 +491,8 @@ static void cgwb_release_workfn(struct work_struct *work)
css_put(wb->blkcg_css); css_put(wb->blkcg_css);
mutex_unlock(&wb->bdi->cgwb_release_mutex); mutex_unlock(&wb->bdi->cgwb_release_mutex);
/* triggers blkg destruction if cgwb_refcnt becomes zero */ /* triggers blkg destruction if no online users left */
blkcg_cgwb_put(blkcg); blkcg_unpin_online(blkcg);
fprop_local_destroy_percpu(&wb->memcg_completions); fprop_local_destroy_percpu(&wb->memcg_completions);
percpu_ref_exit(&wb->refcnt); percpu_ref_exit(&wb->refcnt);
...@@ -592,7 +592,7 @@ static int cgwb_create(struct backing_dev_info *bdi, ...@@ -592,7 +592,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list); list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
list_add(&wb->memcg_node, memcg_cgwb_list); list_add(&wb->memcg_node, memcg_cgwb_list);
list_add(&wb->blkcg_node, blkcg_cgwb_list); list_add(&wb->blkcg_node, blkcg_cgwb_list);
blkcg_cgwb_get(blkcg); blkcg_pin_online(blkcg);
css_get(memcg_css); css_get(memcg_css);
css_get(blkcg_css); css_get(blkcg_css);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment