Commit 8d72e5bd authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-20190608' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:

 - Allow symlink from the bfq.weight cgroup parameter to the general
   weight (Angelo)

 - Damien is new skd maintainer (Bart)

 - NVMe pull request from Sagi, with a few small fixes.

 - Ensure we set DMA segment size properly, dma-debug is now tripping on
   these (Christoph)

 - Remove useless debugfs_create() return check (Greg)

 - Remove redundant unlikely() check on IS_ERR() (Kefeng)

 - Fixup request freeing on exit (Ming)

* tag 'for-linus-20190608' of git://git.kernel.dk/linux-block:
  block, bfq: add weight symlink to the bfq.weight cgroup parameter
  cgroup: let a symlink too be created with a cftype file
  block: free sched's request pool in blk_cleanup_queue
  nvme-rdma: use dynamic dma mapping per command
  nvme: Fix u32 overflow in the number of namespace list calculation
  mmc: also set max_segment_size in the device
  mtip32xx: also set max_segment_size in the device
  rsxx: don't call dma_set_max_seg_size
  nvme-pci: don't limit DMA segement size
  block: Drop unlikely before IS_ERR(_OR_NULL)
  block: aoe: no need to check return value of debugfs_create functions
  nvmet: fix data_len to 0 for bdev-backed write_zeroes
  MAINTAINERS: Hand over skd maintainership
  nvme-tcp: fix queue mapping when queue count is limited
  nvme-rdma: fix queue mapping when queue count is limited
parents 1b02caa3 6c70f899
...@@ -14995,7 +14995,7 @@ S: Odd Fixes ...@@ -14995,7 +14995,7 @@ S: Odd Fixes
F: drivers/net/ethernet/adaptec/starfire* F: drivers/net/ethernet/adaptec/starfire*
STEC S1220 SKD DRIVER STEC S1220 SKD DRIVER
M: Bart Van Assche <bart.vanassche@wdc.com> M: Damien Le Moal <Damien.LeMoal@wdc.com>
L: linux-block@vger.kernel.org L: linux-block@vger.kernel.org
S: Maintained S: Maintained
F: drivers/block/skd*[ch] F: drivers/block/skd*[ch]
......
...@@ -1046,7 +1046,8 @@ struct blkcg_policy blkcg_policy_bfq = { ...@@ -1046,7 +1046,8 @@ struct blkcg_policy blkcg_policy_bfq = {
struct cftype bfq_blkcg_legacy_files[] = { struct cftype bfq_blkcg_legacy_files[] = {
{ {
.name = "bfq.weight", .name = "bfq.weight",
.flags = CFTYPE_NOT_ON_ROOT, .link_name = "weight",
.flags = CFTYPE_NOT_ON_ROOT | CFTYPE_SYMLINKED,
.seq_show = bfq_io_show_weight, .seq_show = bfq_io_show_weight,
.write_u64 = bfq_io_set_weight_legacy, .write_u64 = bfq_io_set_weight_legacy,
}, },
...@@ -1166,7 +1167,8 @@ struct cftype bfq_blkcg_legacy_files[] = { ...@@ -1166,7 +1167,8 @@ struct cftype bfq_blkcg_legacy_files[] = {
struct cftype bfq_blkg_files[] = { struct cftype bfq_blkg_files[] = {
{ {
.name = "bfq.weight", .name = "bfq.weight",
.flags = CFTYPE_NOT_ON_ROOT, .link_name = "weight",
.flags = CFTYPE_NOT_ON_ROOT | CFTYPE_SYMLINKED,
.seq_show = bfq_io_show_weight, .seq_show = bfq_io_show_weight,
.write = bfq_io_set_weight, .write = bfq_io_set_weight,
}, },
......
...@@ -881,7 +881,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, ...@@ -881,7 +881,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
blkg_free(new_blkg); blkg_free(new_blkg);
} else { } else {
blkg = blkg_create(pos, q, new_blkg); blkg = blkg_create(pos, q, new_blkg);
if (unlikely(IS_ERR(blkg))) { if (IS_ERR(blkg)) {
ret = PTR_ERR(blkg); ret = PTR_ERR(blkg);
goto fail_unlock; goto fail_unlock;
} }
......
...@@ -320,6 +320,19 @@ void blk_cleanup_queue(struct request_queue *q) ...@@ -320,6 +320,19 @@ void blk_cleanup_queue(struct request_queue *q)
if (queue_is_mq(q)) if (queue_is_mq(q))
blk_mq_exit_queue(q); blk_mq_exit_queue(q);
/*
* In theory, request pool of sched_tags belongs to request queue.
* However, the current implementation requires tag_set for freeing
* requests, so free the pool now.
*
* Queue has become frozen, there can't be any in-queue requests, so
* it is safe to free requests now.
*/
mutex_lock(&q->sysfs_lock);
if (q->elevator)
blk_mq_sched_free_requests(q);
mutex_unlock(&q->sysfs_lock);
percpu_ref_exit(&q->q_usage_counter); percpu_ref_exit(&q->q_usage_counter);
/* @q is and will stay empty, shutdown and put */ /* @q is and will stay empty, shutdown and put */
......
...@@ -475,14 +475,18 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q, ...@@ -475,14 +475,18 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
return ret; return ret;
} }
/* called in queue's release handler, tagset has gone away */
static void blk_mq_sched_tags_teardown(struct request_queue *q) static void blk_mq_sched_tags_teardown(struct request_queue *q)
{ {
struct blk_mq_tag_set *set = q->tag_set;
struct blk_mq_hw_ctx *hctx; struct blk_mq_hw_ctx *hctx;
int i; int i;
queue_for_each_hw_ctx(q, hctx, i) queue_for_each_hw_ctx(q, hctx, i) {
blk_mq_sched_free_tags(set, hctx, i); if (hctx->sched_tags) {
blk_mq_free_rq_map(hctx->sched_tags);
hctx->sched_tags = NULL;
}
}
} }
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
...@@ -523,6 +527,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) ...@@ -523,6 +527,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
ret = e->ops.init_hctx(hctx, i); ret = e->ops.init_hctx(hctx, i);
if (ret) { if (ret) {
eq = q->elevator; eq = q->elevator;
blk_mq_sched_free_requests(q);
blk_mq_exit_sched(q, eq); blk_mq_exit_sched(q, eq);
kobject_put(&eq->kobj); kobject_put(&eq->kobj);
return ret; return ret;
...@@ -534,11 +539,30 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) ...@@ -534,11 +539,30 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
return 0; return 0;
err: err:
blk_mq_sched_free_requests(q);
blk_mq_sched_tags_teardown(q); blk_mq_sched_tags_teardown(q);
q->elevator = NULL; q->elevator = NULL;
return ret; return ret;
} }
/*
* called in either blk_queue_cleanup or elevator_switch, tagset
* is required for freeing requests
*/
void blk_mq_sched_free_requests(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
int i;
lockdep_assert_held(&q->sysfs_lock);
WARN_ON(!q->elevator);
queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->sched_tags)
blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i);
}
}
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
{ {
struct blk_mq_hw_ctx *hctx; struct blk_mq_hw_ctx *hctx;
......
...@@ -28,6 +28,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx); ...@@ -28,6 +28,7 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx);
int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e); int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e);
void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e); void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e);
void blk_mq_sched_free_requests(struct request_queue *q);
static inline bool static inline bool
blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio) blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
......
...@@ -850,7 +850,7 @@ static void blk_exit_queue(struct request_queue *q) ...@@ -850,7 +850,7 @@ static void blk_exit_queue(struct request_queue *q)
*/ */
if (q->elevator) { if (q->elevator) {
ioc_clear_queue(q); ioc_clear_queue(q);
elevator_exit(q, q->elevator); __elevator_exit(q, q->elevator);
q->elevator = NULL; q->elevator = NULL;
} }
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
#include <linux/blk-mq.h> #include <linux/blk-mq.h>
#include <xen/xen.h> #include <xen/xen.h>
#include "blk-mq.h" #include "blk-mq.h"
#include "blk-mq-sched.h"
/* Max future timer expiry for timeouts */ /* Max future timer expiry for timeouts */
#define BLK_MAX_TIMEOUT (5 * HZ) #define BLK_MAX_TIMEOUT (5 * HZ)
...@@ -176,10 +177,17 @@ void blk_insert_flush(struct request *rq); ...@@ -176,10 +177,17 @@ void blk_insert_flush(struct request *rq);
int elevator_init_mq(struct request_queue *q); int elevator_init_mq(struct request_queue *q);
int elevator_switch_mq(struct request_queue *q, int elevator_switch_mq(struct request_queue *q,
struct elevator_type *new_e); struct elevator_type *new_e);
void elevator_exit(struct request_queue *, struct elevator_queue *); void __elevator_exit(struct request_queue *, struct elevator_queue *);
int elv_register_queue(struct request_queue *q); int elv_register_queue(struct request_queue *q);
void elv_unregister_queue(struct request_queue *q); void elv_unregister_queue(struct request_queue *q);
static inline void elevator_exit(struct request_queue *q,
struct elevator_queue *e)
{
blk_mq_sched_free_requests(q);
__elevator_exit(q, e);
}
struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); struct hd_struct *__disk_get_part(struct gendisk *disk, int partno);
#ifdef CONFIG_FAIL_IO_TIMEOUT #ifdef CONFIG_FAIL_IO_TIMEOUT
......
...@@ -178,7 +178,7 @@ static void elevator_release(struct kobject *kobj) ...@@ -178,7 +178,7 @@ static void elevator_release(struct kobject *kobj)
kfree(e); kfree(e);
} }
void elevator_exit(struct request_queue *q, struct elevator_queue *e) void __elevator_exit(struct request_queue *q, struct elevator_queue *e)
{ {
mutex_lock(&e->sysfs_lock); mutex_lock(&e->sysfs_lock);
if (e->type->ops.exit_sched) if (e->type->ops.exit_sched)
......
...@@ -196,7 +196,6 @@ static const struct file_operations aoe_debugfs_fops = { ...@@ -196,7 +196,6 @@ static const struct file_operations aoe_debugfs_fops = {
static void static void
aoedisk_add_debugfs(struct aoedev *d) aoedisk_add_debugfs(struct aoedev *d)
{ {
struct dentry *entry;
char *p; char *p;
if (aoe_debugfs_dir == NULL) if (aoe_debugfs_dir == NULL)
...@@ -207,15 +206,8 @@ aoedisk_add_debugfs(struct aoedev *d) ...@@ -207,15 +206,8 @@ aoedisk_add_debugfs(struct aoedev *d)
else else
p++; p++;
BUG_ON(*p == '\0'); BUG_ON(*p == '\0');
entry = debugfs_create_file(p, 0444, aoe_debugfs_dir, d, d->debugfs = debugfs_create_file(p, 0444, aoe_debugfs_dir, d,
&aoe_debugfs_fops); &aoe_debugfs_fops);
if (IS_ERR_OR_NULL(entry)) {
pr_info("aoe: cannot create debugfs file for %s\n",
d->gd->disk_name);
return;
}
BUG_ON(d->debugfs);
d->debugfs = entry;
} }
void void
aoedisk_rm_debugfs(struct aoedev *d) aoedisk_rm_debugfs(struct aoedev *d)
...@@ -472,10 +464,6 @@ aoeblk_init(void) ...@@ -472,10 +464,6 @@ aoeblk_init(void)
if (buf_pool_cache == NULL) if (buf_pool_cache == NULL)
return -ENOMEM; return -ENOMEM;
aoe_debugfs_dir = debugfs_create_dir("aoe", NULL); aoe_debugfs_dir = debugfs_create_dir("aoe", NULL);
if (IS_ERR_OR_NULL(aoe_debugfs_dir)) {
pr_info("aoe: cannot create debugfs directory\n");
aoe_debugfs_dir = NULL;
}
return 0; return 0;
} }
...@@ -3676,6 +3676,7 @@ static int mtip_block_initialize(struct driver_data *dd) ...@@ -3676,6 +3676,7 @@ static int mtip_block_initialize(struct driver_data *dd)
blk_queue_physical_block_size(dd->queue, 4096); blk_queue_physical_block_size(dd->queue, 4096);
blk_queue_max_hw_sectors(dd->queue, 0xffff); blk_queue_max_hw_sectors(dd->queue, 0xffff);
blk_queue_max_segment_size(dd->queue, 0x400000); blk_queue_max_segment_size(dd->queue, 0x400000);
dma_set_max_seg_size(&dd->pdev->dev, 0x400000);
blk_queue_io_min(dd->queue, 4096); blk_queue_io_min(dd->queue, 4096);
/* Set the capacity of the device in 512 byte sectors. */ /* Set the capacity of the device in 512 byte sectors. */
......
...@@ -767,7 +767,6 @@ static int rsxx_pci_probe(struct pci_dev *dev, ...@@ -767,7 +767,6 @@ static int rsxx_pci_probe(struct pci_dev *dev,
goto failed_enable; goto failed_enable;
pci_set_master(dev); pci_set_master(dev);
dma_set_max_seg_size(&dev->dev, RSXX_HW_BLK_SIZE);
st = dma_set_mask(&dev->dev, DMA_BIT_MASK(64)); st = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
if (st) { if (st) {
......
...@@ -377,6 +377,8 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card) ...@@ -377,6 +377,8 @@ static void mmc_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
blk_queue_max_segment_size(mq->queue, blk_queue_max_segment_size(mq->queue,
round_down(host->max_seg_size, block_size)); round_down(host->max_seg_size, block_size));
dma_set_max_seg_size(mmc_dev(host), queue_max_segment_size(mq->queue));
INIT_WORK(&mq->recovery_work, mmc_mq_recovery_handler); INIT_WORK(&mq->recovery_work, mmc_mq_recovery_handler);
INIT_WORK(&mq->complete_work, mmc_blk_mq_complete_work); INIT_WORK(&mq->complete_work, mmc_blk_mq_complete_work);
......
...@@ -3400,7 +3400,8 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) ...@@ -3400,7 +3400,8 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
__le32 *ns_list; __le32 *ns_list;
unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024); unsigned i, j, nsid, prev = 0;
unsigned num_lists = DIV_ROUND_UP_ULL((u64)nn, 1024);
int ret = 0; int ret = 0;
ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL); ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
......
...@@ -2513,6 +2513,12 @@ static void nvme_reset_work(struct work_struct *work) ...@@ -2513,6 +2513,12 @@ static void nvme_reset_work(struct work_struct *work)
*/ */
dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1; dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
dev->ctrl.max_segments = NVME_MAX_SEGS; dev->ctrl.max_segments = NVME_MAX_SEGS;
/*
* Don't limit the IOMMU merged segment size.
*/
dma_set_max_seg_size(dev->dev, 0xffffffff);
mutex_unlock(&dev->shutdown_lock); mutex_unlock(&dev->shutdown_lock);
/* /*
......
...@@ -213,6 +213,11 @@ static struct nvme_rdma_qe *nvme_rdma_alloc_ring(struct ib_device *ibdev, ...@@ -213,6 +213,11 @@ static struct nvme_rdma_qe *nvme_rdma_alloc_ring(struct ib_device *ibdev,
if (!ring) if (!ring)
return NULL; return NULL;
/*
* Bind the CQEs (post recv buffers) DMA mapping to the RDMA queue
* lifetime. It's safe, since any chage in the underlying RDMA device
* will issue error recovery and queue re-creation.
*/
for (i = 0; i < ib_queue_size; i++) { for (i = 0; i < ib_queue_size; i++) {
if (nvme_rdma_alloc_qe(ibdev, &ring[i], capsule_size, dir)) if (nvme_rdma_alloc_qe(ibdev, &ring[i], capsule_size, dir))
goto out_free_ring; goto out_free_ring;
...@@ -274,14 +279,9 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor) ...@@ -274,14 +279,9 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
struct request *rq, unsigned int hctx_idx) struct request *rq, unsigned int hctx_idx)
{ {
struct nvme_rdma_ctrl *ctrl = set->driver_data;
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
struct nvme_rdma_device *dev = queue->device;
nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command), kfree(req->sqe.data);
DMA_TO_DEVICE);
} }
static int nvme_rdma_init_request(struct blk_mq_tag_set *set, static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
...@@ -292,15 +292,11 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, ...@@ -292,15 +292,11 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
int ret;
nvme_req(rq)->ctrl = &ctrl->ctrl; nvme_req(rq)->ctrl = &ctrl->ctrl;
ret = nvme_rdma_alloc_qe(ibdev, &req->sqe, sizeof(struct nvme_command), req->sqe.data = kzalloc(sizeof(struct nvme_command), GFP_KERNEL);
DMA_TO_DEVICE); if (!req->sqe.data)
if (ret) return -ENOMEM;
return ret;
req->queue = queue; req->queue = queue;
...@@ -641,34 +637,16 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) ...@@ -641,34 +637,16 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
{ {
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
struct ib_device *ibdev = ctrl->device->dev; struct ib_device *ibdev = ctrl->device->dev;
unsigned int nr_io_queues; unsigned int nr_io_queues, nr_default_queues;
unsigned int nr_read_queues, nr_poll_queues;
int i, ret; int i, ret;
nr_io_queues = min(opts->nr_io_queues, num_online_cpus()); nr_read_queues = min_t(unsigned int, ibdev->num_comp_vectors,
min(opts->nr_io_queues, num_online_cpus()));
/* nr_default_queues = min_t(unsigned int, ibdev->num_comp_vectors,
* we map queues according to the device irq vectors for min(opts->nr_write_queues, num_online_cpus()));
* optimal locality so we don't need more queues than nr_poll_queues = min(opts->nr_poll_queues, num_online_cpus());
* completion vectors. nr_io_queues = nr_read_queues + nr_default_queues + nr_poll_queues;
*/
nr_io_queues = min_t(unsigned int, nr_io_queues,
ibdev->num_comp_vectors);
if (opts->nr_write_queues) {
ctrl->io_queues[HCTX_TYPE_DEFAULT] =
min(opts->nr_write_queues, nr_io_queues);
nr_io_queues += ctrl->io_queues[HCTX_TYPE_DEFAULT];
} else {
ctrl->io_queues[HCTX_TYPE_DEFAULT] = nr_io_queues;
}
ctrl->io_queues[HCTX_TYPE_READ] = nr_io_queues;
if (opts->nr_poll_queues) {
ctrl->io_queues[HCTX_TYPE_POLL] =
min(opts->nr_poll_queues, num_online_cpus());
nr_io_queues += ctrl->io_queues[HCTX_TYPE_POLL];
}
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
if (ret) if (ret)
...@@ -681,6 +659,34 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) ...@@ -681,6 +659,34 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
dev_info(ctrl->ctrl.device, dev_info(ctrl->ctrl.device,
"creating %d I/O queues.\n", nr_io_queues); "creating %d I/O queues.\n", nr_io_queues);
if (opts->nr_write_queues && nr_read_queues < nr_io_queues) {
/*
* separate read/write queues
* hand out dedicated default queues only after we have
* sufficient read queues.
*/
ctrl->io_queues[HCTX_TYPE_READ] = nr_read_queues;
nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
ctrl->io_queues[HCTX_TYPE_DEFAULT] =
min(nr_default_queues, nr_io_queues);
nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
} else {
/*
* shared read/write queues
* either no write queues were requested, or we don't have
* sufficient queue count to have dedicated default queues.
*/
ctrl->io_queues[HCTX_TYPE_DEFAULT] =
min(nr_read_queues, nr_io_queues);
nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
}
if (opts->nr_poll_queues && nr_io_queues) {
/* map dedicated poll queues only if we have queues left */
ctrl->io_queues[HCTX_TYPE_POLL] =
min(nr_poll_queues, nr_io_queues);
}
for (i = 1; i < ctrl->ctrl.queue_count; i++) { for (i = 1; i < ctrl->ctrl.queue_count; i++) {
ret = nvme_rdma_alloc_queue(ctrl, i, ret = nvme_rdma_alloc_queue(ctrl, i,
ctrl->ctrl.sqsize + 1); ctrl->ctrl.sqsize + 1);
...@@ -769,6 +775,11 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ...@@ -769,6 +775,11 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
/*
* Bind the async event SQE DMA mapping to the admin queue lifetime.
* It's safe, since any chage in the underlying RDMA device will issue
* error recovery and queue re-creation.
*/
error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe, error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
sizeof(struct nvme_command), DMA_TO_DEVICE); sizeof(struct nvme_command), DMA_TO_DEVICE);
if (error) if (error)
...@@ -1709,12 +1720,20 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -1709,12 +1720,20 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq); return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
dev = queue->device->dev; dev = queue->device->dev;
req->sqe.dma = ib_dma_map_single(dev, req->sqe.data,
sizeof(struct nvme_command),
DMA_TO_DEVICE);
err = ib_dma_mapping_error(dev, req->sqe.dma);
if (unlikely(err))
return BLK_STS_RESOURCE;
ib_dma_sync_single_for_cpu(dev, sqe->dma, ib_dma_sync_single_for_cpu(dev, sqe->dma,
sizeof(struct nvme_command), DMA_TO_DEVICE); sizeof(struct nvme_command), DMA_TO_DEVICE);
ret = nvme_setup_cmd(ns, rq, c); ret = nvme_setup_cmd(ns, rq, c);
if (ret) if (ret)
return ret; goto unmap_qe;
blk_mq_start_request(rq); blk_mq_start_request(rq);
...@@ -1739,10 +1758,16 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -1739,10 +1758,16 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
} }
return BLK_STS_OK; return BLK_STS_OK;
err: err:
if (err == -ENOMEM || err == -EAGAIN) if (err == -ENOMEM || err == -EAGAIN)
return BLK_STS_RESOURCE; ret = BLK_STS_RESOURCE;
return BLK_STS_IOERR; else
ret = BLK_STS_IOERR;
unmap_qe:
ib_dma_unmap_single(dev, req->sqe.dma, sizeof(struct nvme_command),
DMA_TO_DEVICE);
return ret;
} }
static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx) static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx)
...@@ -1755,25 +1780,36 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx) ...@@ -1755,25 +1780,36 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx)
static void nvme_rdma_complete_rq(struct request *rq) static void nvme_rdma_complete_rq(struct request *rq)
{ {
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_queue *queue = req->queue;
struct ib_device *ibdev = queue->device->dev;
nvme_rdma_unmap_data(req->queue, rq); nvme_rdma_unmap_data(queue, rq);
ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command),
DMA_TO_DEVICE);
nvme_complete_rq(rq); nvme_complete_rq(rq);
} }
static int nvme_rdma_map_queues(struct blk_mq_tag_set *set) static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
{ {
struct nvme_rdma_ctrl *ctrl = set->driver_data; struct nvme_rdma_ctrl *ctrl = set->driver_data;
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
set->map[HCTX_TYPE_DEFAULT].nr_queues =
ctrl->io_queues[HCTX_TYPE_DEFAULT];
set->map[HCTX_TYPE_READ].nr_queues = ctrl->io_queues[HCTX_TYPE_READ];
if (ctrl->ctrl.opts->nr_write_queues) {
/* separate read/write queues */ /* separate read/write queues */
set->map[HCTX_TYPE_DEFAULT].nr_queues =
ctrl->io_queues[HCTX_TYPE_DEFAULT];
set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
set->map[HCTX_TYPE_READ].nr_queues =
ctrl->io_queues[HCTX_TYPE_READ];
set->map[HCTX_TYPE_READ].queue_offset = set->map[HCTX_TYPE_READ].queue_offset =
ctrl->io_queues[HCTX_TYPE_DEFAULT]; ctrl->io_queues[HCTX_TYPE_DEFAULT];
} else { } else {
/* mixed read/write queues */ /* shared read/write queues */
set->map[HCTX_TYPE_DEFAULT].nr_queues =
ctrl->io_queues[HCTX_TYPE_DEFAULT];
set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
set->map[HCTX_TYPE_READ].nr_queues =
ctrl->io_queues[HCTX_TYPE_DEFAULT];
set->map[HCTX_TYPE_READ].queue_offset = 0; set->map[HCTX_TYPE_READ].queue_offset = 0;
} }
blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT], blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT],
...@@ -1781,16 +1817,22 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set) ...@@ -1781,16 +1817,22 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ], blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ],
ctrl->device->dev, 0); ctrl->device->dev, 0);
if (ctrl->ctrl.opts->nr_poll_queues) { if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
/* map dedicated poll queues only if we have queues left */
set->map[HCTX_TYPE_POLL].nr_queues = set->map[HCTX_TYPE_POLL].nr_queues =
ctrl->io_queues[HCTX_TYPE_POLL]; ctrl->io_queues[HCTX_TYPE_POLL];
set->map[HCTX_TYPE_POLL].queue_offset = set->map[HCTX_TYPE_POLL].queue_offset =
ctrl->io_queues[HCTX_TYPE_DEFAULT]; ctrl->io_queues[HCTX_TYPE_DEFAULT] +
if (ctrl->ctrl.opts->nr_write_queues) ctrl->io_queues[HCTX_TYPE_READ];
set->map[HCTX_TYPE_POLL].queue_offset +=
ctrl->io_queues[HCTX_TYPE_READ];
blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]); blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
} }
dev_info(ctrl->ctrl.device,
"mapped %d/%d/%d default/read/poll queues.\n",
ctrl->io_queues[HCTX_TYPE_DEFAULT],
ctrl->io_queues[HCTX_TYPE_READ],
ctrl->io_queues[HCTX_TYPE_POLL]);
return 0; return 0;
} }
......
...@@ -111,6 +111,7 @@ struct nvme_tcp_ctrl { ...@@ -111,6 +111,7 @@ struct nvme_tcp_ctrl {
struct work_struct err_work; struct work_struct err_work;
struct delayed_work connect_work; struct delayed_work connect_work;
struct nvme_tcp_request async_req; struct nvme_tcp_request async_req;
u32 io_queues[HCTX_MAX_TYPES];
}; };
static LIST_HEAD(nvme_tcp_ctrl_list); static LIST_HEAD(nvme_tcp_ctrl_list);
...@@ -1564,6 +1565,35 @@ static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl) ...@@ -1564,6 +1565,35 @@ static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
return nr_io_queues; return nr_io_queues;
} }
static void nvme_tcp_set_io_queues(struct nvme_ctrl *nctrl,
unsigned int nr_io_queues)
{
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvmf_ctrl_options *opts = nctrl->opts;
if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) {
/*
* separate read/write queues
* hand out dedicated default queues only after we have
* sufficient read queues.
*/
ctrl->io_queues[HCTX_TYPE_READ] = opts->nr_io_queues;
nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
ctrl->io_queues[HCTX_TYPE_DEFAULT] =
min(opts->nr_write_queues, nr_io_queues);
nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
} else {
/*
* shared read/write queues
* either no write queues were requested, or we don't have
* sufficient queue count to have dedicated default queues.
*/
ctrl->io_queues[HCTX_TYPE_DEFAULT] =
min(opts->nr_io_queues, nr_io_queues);
nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
}
}
static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
{ {
unsigned int nr_io_queues; unsigned int nr_io_queues;
...@@ -1581,6 +1611,8 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) ...@@ -1581,6 +1611,8 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
dev_info(ctrl->device, dev_info(ctrl->device,
"creating %d I/O queues.\n", nr_io_queues); "creating %d I/O queues.\n", nr_io_queues);
nvme_tcp_set_io_queues(ctrl, nr_io_queues);
return __nvme_tcp_alloc_io_queues(ctrl); return __nvme_tcp_alloc_io_queues(ctrl);
} }
...@@ -2089,23 +2121,34 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, ...@@ -2089,23 +2121,34 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
static int nvme_tcp_map_queues(struct blk_mq_tag_set *set) static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
{ {
struct nvme_tcp_ctrl *ctrl = set->driver_data; struct nvme_tcp_ctrl *ctrl = set->driver_data;
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
set->map[HCTX_TYPE_READ].nr_queues = ctrl->ctrl.opts->nr_io_queues;
if (ctrl->ctrl.opts->nr_write_queues) {
/* separate read/write queues */ /* separate read/write queues */
set->map[HCTX_TYPE_DEFAULT].nr_queues = set->map[HCTX_TYPE_DEFAULT].nr_queues =
ctrl->ctrl.opts->nr_write_queues; ctrl->io_queues[HCTX_TYPE_DEFAULT];
set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
set->map[HCTX_TYPE_READ].nr_queues =
ctrl->io_queues[HCTX_TYPE_READ];
set->map[HCTX_TYPE_READ].queue_offset = set->map[HCTX_TYPE_READ].queue_offset =
ctrl->ctrl.opts->nr_write_queues; ctrl->io_queues[HCTX_TYPE_DEFAULT];
} else { } else {
/* mixed read/write queues */ /* shared read/write queues */
set->map[HCTX_TYPE_DEFAULT].nr_queues = set->map[HCTX_TYPE_DEFAULT].nr_queues =
ctrl->ctrl.opts->nr_io_queues; ctrl->io_queues[HCTX_TYPE_DEFAULT];
set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
set->map[HCTX_TYPE_READ].nr_queues =
ctrl->io_queues[HCTX_TYPE_DEFAULT];
set->map[HCTX_TYPE_READ].queue_offset = 0; set->map[HCTX_TYPE_READ].queue_offset = 0;
} }
blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
blk_mq_map_queues(&set->map[HCTX_TYPE_READ]); blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
dev_info(ctrl->ctrl.device,
"mapped %d/%d default/read queues.\n",
ctrl->io_queues[HCTX_TYPE_DEFAULT],
ctrl->io_queues[HCTX_TYPE_READ]);
return 0; return 0;
} }
......
...@@ -293,6 +293,7 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) ...@@ -293,6 +293,7 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
return 0; return 0;
case nvme_cmd_write_zeroes: case nvme_cmd_write_zeroes:
req->execute = nvmet_bdev_execute_write_zeroes; req->execute = nvmet_bdev_execute_write_zeroes;
req->data_len = 0;
return 0; return 0;
default: default:
pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
......
...@@ -106,6 +106,8 @@ enum { ...@@ -106,6 +106,8 @@ enum {
CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */
CFTYPE_DEBUG = (1 << 5), /* create when cgroup_debug */ CFTYPE_DEBUG = (1 << 5), /* create when cgroup_debug */
CFTYPE_SYMLINKED = (1 << 6), /* pointed to by symlink too */
/* internal flags, do not use outside cgroup core proper */ /* internal flags, do not use outside cgroup core proper */
__CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */
__CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */
...@@ -543,6 +545,7 @@ struct cftype { ...@@ -543,6 +545,7 @@ struct cftype {
* end of cftype array. * end of cftype array.
*/ */
char name[MAX_CFTYPE_NAME]; char name[MAX_CFTYPE_NAME];
char link_name[MAX_CFTYPE_NAME];
unsigned long private; unsigned long private;
/* /*
......
...@@ -1460,8 +1460,8 @@ struct cgroup *task_cgroup_from_root(struct task_struct *task, ...@@ -1460,8 +1460,8 @@ struct cgroup *task_cgroup_from_root(struct task_struct *task,
static struct kernfs_syscall_ops cgroup_kf_syscall_ops; static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft, static char *cgroup_fill_name(struct cgroup *cgrp, const struct cftype *cft,
char *buf) char *buf, bool write_link_name)
{ {
struct cgroup_subsys *ss = cft->ss; struct cgroup_subsys *ss = cft->ss;
...@@ -1471,13 +1471,26 @@ static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft, ...@@ -1471,13 +1471,26 @@ static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
snprintf(buf, CGROUP_FILE_NAME_MAX, "%s%s.%s", snprintf(buf, CGROUP_FILE_NAME_MAX, "%s%s.%s",
dbg, cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name, dbg, cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
cft->name); write_link_name ? cft->link_name : cft->name);
} else { } else {
strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX); strscpy(buf, write_link_name ? cft->link_name : cft->name,
CGROUP_FILE_NAME_MAX);
} }
return buf; return buf;
} }
static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
char *buf)
{
return cgroup_fill_name(cgrp, cft, buf, false);
}
static char *cgroup_link_name(struct cgroup *cgrp, const struct cftype *cft,
char *buf)
{
return cgroup_fill_name(cgrp, cft, buf, true);
}
/** /**
* cgroup_file_mode - deduce file mode of a control file * cgroup_file_mode - deduce file mode of a control file
* @cft: the control file in question * @cft: the control file in question
...@@ -1636,6 +1649,9 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) ...@@ -1636,6 +1649,9 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
} }
kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name)); kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
if (cft->flags & CFTYPE_SYMLINKED)
kernfs_remove_by_name(cgrp->kn,
cgroup_link_name(cgrp, cft, name));
} }
/** /**
...@@ -3821,6 +3837,7 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp, ...@@ -3821,6 +3837,7 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
{ {
char name[CGROUP_FILE_NAME_MAX]; char name[CGROUP_FILE_NAME_MAX];
struct kernfs_node *kn; struct kernfs_node *kn;
struct kernfs_node *kn_link;
struct lock_class_key *key = NULL; struct lock_class_key *key = NULL;
int ret; int ret;
...@@ -3851,6 +3868,14 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp, ...@@ -3851,6 +3868,14 @@ static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
spin_unlock_irq(&cgroup_file_kn_lock); spin_unlock_irq(&cgroup_file_kn_lock);
} }
if (cft->flags & CFTYPE_SYMLINKED) {
kn_link = kernfs_create_link(cgrp->kn,
cgroup_link_name(cgrp, cft, name),
kn);
if (IS_ERR(kn_link))
return PTR_ERR(kn_link);
}
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment