Commit 7c989b1d authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.1/passthrough-2022-10-04' of git://git.kernel.dk/linux

Pull passthrough updates from Jens Axboe:
 "With these changes, passthrough NVMe support over io_uring now
  performs at the same level as block device O_DIRECT, and in many cases
  6-8% better.

  This contains:

   - Add support for fixed buffers for passthrough (Anuj, Kanchan)

   - Enable batched allocations and freeing on passthrough, similarly to
     what we support on the normal storage path (me)

   - Fix from Geert fixing an issue with !CONFIG_IO_URING"

* tag 'for-6.1/passthrough-2022-10-04' of git://git.kernel.dk/linux:
  io_uring: Add missing inline to io_uring_cmd_import_fixed() dummy
  nvme: wire up fixed buffer support for nvme passthrough
  nvme: pass ubuffer as an integer
  block: extend functionality to map bvec iterator
  block: factor out blk_rq_map_bio_alloc helper
  block: rename bio_map_put to blk_mq_map_bio_put
  nvme: refactor nvme_alloc_request
  nvme: refactor nvme_add_user_metadata
  nvme: Use blk_rq_map_user_io helper
  scsi: Use blk_rq_map_user_io helper
  block: add blk_rq_map_user_io
  io_uring: introduce fixed buffer support for io_uring_cmd
  io_uring: add io_uring_cmd_import_fixed
  nvme: enable batched completions of passthrough IO
  nvme: split out metadata vs non metadata end_io uring_cmd completions
  block: allow end_io based requests in the completion batch handling
  block: change request end_io handler to pass back a return value
  block: enable batched allocation for blk_mq_alloc_request()
  block: kill deprecated BUG_ON() in the flush handling
parents 51338980 0e0abad2
...@@ -205,7 +205,6 @@ static void blk_flush_complete_seq(struct request *rq, ...@@ -205,7 +205,6 @@ static void blk_flush_complete_seq(struct request *rq,
* flush data request completion path. Restore @rq for * flush data request completion path. Restore @rq for
* normal completion and end it. * normal completion and end it.
*/ */
BUG_ON(!list_empty(&rq->queuelist));
list_del_init(&rq->flush.list); list_del_init(&rq->flush.list);
blk_flush_restore_request(rq); blk_flush_restore_request(rq);
blk_mq_end_request(rq, error); blk_mq_end_request(rq, error);
...@@ -218,7 +217,8 @@ static void blk_flush_complete_seq(struct request *rq, ...@@ -218,7 +217,8 @@ static void blk_flush_complete_seq(struct request *rq,
blk_kick_flush(q, fq, cmd_flags); blk_kick_flush(q, fq, cmd_flags);
} }
static void flush_end_io(struct request *flush_rq, blk_status_t error) static enum rq_end_io_ret flush_end_io(struct request *flush_rq,
blk_status_t error)
{ {
struct request_queue *q = flush_rq->q; struct request_queue *q = flush_rq->q;
struct list_head *running; struct list_head *running;
...@@ -232,7 +232,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) ...@@ -232,7 +232,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
if (!req_ref_put_and_test(flush_rq)) { if (!req_ref_put_and_test(flush_rq)) {
fq->rq_status = error; fq->rq_status = error;
spin_unlock_irqrestore(&fq->mq_flush_lock, flags); spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
return; return RQ_END_IO_NONE;
} }
blk_account_io_flush(flush_rq); blk_account_io_flush(flush_rq);
...@@ -269,6 +269,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) ...@@ -269,6 +269,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
} }
spin_unlock_irqrestore(&fq->mq_flush_lock, flags); spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
return RQ_END_IO_NONE;
} }
bool is_flush_rq(struct request *rq) bool is_flush_rq(struct request *rq)
...@@ -354,7 +355,8 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, ...@@ -354,7 +355,8 @@ static void blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq,
blk_flush_queue_rq(flush_rq, false); blk_flush_queue_rq(flush_rq, false);
} }
static void mq_flush_data_end_io(struct request *rq, blk_status_t error) static enum rq_end_io_ret mq_flush_data_end_io(struct request *rq,
blk_status_t error)
{ {
struct request_queue *q = rq->q; struct request_queue *q = rq->q;
struct blk_mq_hw_ctx *hctx = rq->mq_hctx; struct blk_mq_hw_ctx *hctx = rq->mq_hctx;
...@@ -376,6 +378,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error) ...@@ -376,6 +378,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
spin_unlock_irqrestore(&fq->mq_flush_lock, flags); spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
blk_mq_sched_restart(hctx); blk_mq_sched_restart(hctx);
return RQ_END_IO_NONE;
} }
/** /**
......
...@@ -231,7 +231,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, ...@@ -231,7 +231,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
return ret; return ret;
} }
static void bio_map_put(struct bio *bio) static void blk_mq_map_bio_put(struct bio *bio)
{ {
if (bio->bi_opf & REQ_ALLOC_CACHE) { if (bio->bi_opf & REQ_ALLOC_CACHE) {
bio_put(bio); bio_put(bio);
...@@ -241,17 +241,10 @@ static void bio_map_put(struct bio *bio) ...@@ -241,17 +241,10 @@ static void bio_map_put(struct bio *bio)
} }
} }
static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, static struct bio *blk_rq_map_bio_alloc(struct request *rq,
gfp_t gfp_mask) unsigned int nr_vecs, gfp_t gfp_mask)
{ {
unsigned int max_sectors = queue_max_hw_sectors(rq->q);
unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
struct bio *bio; struct bio *bio;
int ret;
int j;
if (!iov_iter_count(iter))
return -EINVAL;
if (rq->cmd_flags & REQ_POLLED) { if (rq->cmd_flags & REQ_POLLED) {
blk_opf_t opf = rq->cmd_flags | REQ_ALLOC_CACHE; blk_opf_t opf = rq->cmd_flags | REQ_ALLOC_CACHE;
...@@ -259,13 +252,31 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, ...@@ -259,13 +252,31 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
bio = bio_alloc_bioset(NULL, nr_vecs, opf, gfp_mask, bio = bio_alloc_bioset(NULL, nr_vecs, opf, gfp_mask,
&fs_bio_set); &fs_bio_set);
if (!bio) if (!bio)
return -ENOMEM; return NULL;
} else { } else {
bio = bio_kmalloc(nr_vecs, gfp_mask); bio = bio_kmalloc(nr_vecs, gfp_mask);
if (!bio) if (!bio)
return -ENOMEM; return NULL;
bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq)); bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq));
} }
return bio;
}
static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
gfp_t gfp_mask)
{
unsigned int max_sectors = queue_max_hw_sectors(rq->q);
unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS);
struct bio *bio;
int ret;
int j;
if (!iov_iter_count(iter))
return -EINVAL;
bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask);
if (bio == NULL)
return -ENOMEM;
while (iov_iter_count(iter)) { while (iov_iter_count(iter)) {
struct page **pages, *stack_pages[UIO_FASTIOV]; struct page **pages, *stack_pages[UIO_FASTIOV];
...@@ -331,7 +342,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, ...@@ -331,7 +342,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
out_unmap: out_unmap:
bio_release_pages(bio, false); bio_release_pages(bio, false);
bio_map_put(bio); blk_mq_map_bio_put(bio);
return ret; return ret;
} }
...@@ -537,6 +548,62 @@ int blk_rq_append_bio(struct request *rq, struct bio *bio) ...@@ -537,6 +548,62 @@ int blk_rq_append_bio(struct request *rq, struct bio *bio)
} }
EXPORT_SYMBOL(blk_rq_append_bio); EXPORT_SYMBOL(blk_rq_append_bio);
/* Prepare bio for passthrough IO given ITER_BVEC iter */
static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
{
struct request_queue *q = rq->q;
size_t nr_iter = iov_iter_count(iter);
size_t nr_segs = iter->nr_segs;
struct bio_vec *bvecs, *bvprvp = NULL;
struct queue_limits *lim = &q->limits;
unsigned int nsegs = 0, bytes = 0;
struct bio *bio;
size_t i;
if (!nr_iter || (nr_iter >> SECTOR_SHIFT) > queue_max_hw_sectors(q))
return -EINVAL;
if (nr_segs > queue_max_segments(q))
return -EINVAL;
/* no iovecs to alloc, as we already have a BVEC iterator */
bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL);
if (bio == NULL)
return -ENOMEM;
bio_iov_bvec_set(bio, (struct iov_iter *)iter);
blk_rq_bio_prep(rq, bio, nr_segs);
/* loop to perform a bunch of sanity checks */
bvecs = (struct bio_vec *)iter->bvec;
for (i = 0; i < nr_segs; i++) {
struct bio_vec *bv = &bvecs[i];
/*
* If the queue doesn't support SG gaps and adding this
* offset would create a gap, fallback to copy.
*/
if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv->bv_offset)) {
blk_mq_map_bio_put(bio);
return -EREMOTEIO;
}
/* check full condition */
if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len)
goto put_bio;
if (bytes + bv->bv_len > nr_iter)
goto put_bio;
if (bv->bv_offset + bv->bv_len > PAGE_SIZE)
goto put_bio;
nsegs++;
bytes += bv->bv_len;
bvprvp = bv;
}
return 0;
put_bio:
blk_mq_map_bio_put(bio);
return -EINVAL;
}
/** /**
* blk_rq_map_user_iov - map user data to a request, for passthrough requests * blk_rq_map_user_iov - map user data to a request, for passthrough requests
* @q: request queue where request should be inserted * @q: request queue where request should be inserted
...@@ -556,24 +623,35 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, ...@@ -556,24 +623,35 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
struct rq_map_data *map_data, struct rq_map_data *map_data,
const struct iov_iter *iter, gfp_t gfp_mask) const struct iov_iter *iter, gfp_t gfp_mask)
{ {
bool copy = false; bool copy = false, map_bvec = false;
unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
struct bio *bio = NULL; struct bio *bio = NULL;
struct iov_iter i; struct iov_iter i;
int ret = -EINVAL; int ret = -EINVAL;
if (!iter_is_iovec(iter))
goto fail;
if (map_data) if (map_data)
copy = true; copy = true;
else if (blk_queue_may_bounce(q)) else if (blk_queue_may_bounce(q))
copy = true; copy = true;
else if (iov_iter_alignment(iter) & align) else if (iov_iter_alignment(iter) & align)
copy = true; copy = true;
else if (iov_iter_is_bvec(iter))
map_bvec = true;
else if (!iter_is_iovec(iter))
copy = true;
else if (queue_virt_boundary(q)) else if (queue_virt_boundary(q))
copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter); copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter);
if (map_bvec) {
ret = blk_rq_map_user_bvec(rq, iter);
if (!ret)
return 0;
if (ret != -EREMOTEIO)
goto fail;
/* fall back to copying the data on limits mismatches */
copy = true;
}
i = *iter; i = *iter;
do { do {
if (copy) if (copy)
...@@ -611,6 +689,42 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, ...@@ -611,6 +689,42 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq,
} }
EXPORT_SYMBOL(blk_rq_map_user); EXPORT_SYMBOL(blk_rq_map_user);
int blk_rq_map_user_io(struct request *req, struct rq_map_data *map_data,
void __user *ubuf, unsigned long buf_len, gfp_t gfp_mask,
bool vec, int iov_count, bool check_iter_count, int rw)
{
int ret = 0;
if (vec) {
struct iovec fast_iov[UIO_FASTIOV];
struct iovec *iov = fast_iov;
struct iov_iter iter;
ret = import_iovec(rw, ubuf, iov_count ? iov_count : buf_len,
UIO_FASTIOV, &iov, &iter);
if (ret < 0)
return ret;
if (iov_count) {
/* SG_IO howto says that the shorter of the two wins */
iov_iter_truncate(&iter, buf_len);
if (check_iter_count && !iov_iter_count(&iter)) {
kfree(iov);
return -EINVAL;
}
}
ret = blk_rq_map_user_iov(req->q, req, map_data, &iter,
gfp_mask);
kfree(iov);
} else if (buf_len) {
ret = blk_rq_map_user(req->q, req, map_data, ubuf, buf_len,
gfp_mask);
}
return ret;
}
EXPORT_SYMBOL(blk_rq_map_user_io);
/** /**
* blk_rq_unmap_user - unmap a request with user data * blk_rq_unmap_user - unmap a request with user data
* @bio: start of bio list * @bio: start of bio list
...@@ -636,7 +750,7 @@ int blk_rq_unmap_user(struct bio *bio) ...@@ -636,7 +750,7 @@ int blk_rq_unmap_user(struct bio *bio)
next_bio = bio; next_bio = bio;
bio = bio->bi_next; bio = bio->bi_next;
bio_map_put(next_bio); blk_mq_map_bio_put(next_bio);
} }
return ret; return ret;
......
...@@ -510,16 +510,77 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data) ...@@ -510,16 +510,77 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
alloc_time_ns); alloc_time_ns);
} }
static struct request *blk_mq_rq_cache_fill(struct request_queue *q,
struct blk_plug *plug,
blk_opf_t opf,
blk_mq_req_flags_t flags)
{
struct blk_mq_alloc_data data = {
.q = q,
.flags = flags,
.cmd_flags = opf,
.nr_tags = plug->nr_ios,
.cached_rq = &plug->cached_rq,
};
struct request *rq;
if (blk_queue_enter(q, flags))
return NULL;
plug->nr_ios = 1;
rq = __blk_mq_alloc_requests(&data);
if (unlikely(!rq))
blk_queue_exit(q);
return rq;
}
static struct request *blk_mq_alloc_cached_request(struct request_queue *q,
blk_opf_t opf,
blk_mq_req_flags_t flags)
{
struct blk_plug *plug = current->plug;
struct request *rq;
if (!plug)
return NULL;
if (rq_list_empty(plug->cached_rq)) {
if (plug->nr_ios == 1)
return NULL;
rq = blk_mq_rq_cache_fill(q, plug, opf, flags);
if (rq)
goto got_it;
return NULL;
}
rq = rq_list_peek(&plug->cached_rq);
if (!rq || rq->q != q)
return NULL;
if (blk_mq_get_hctx_type(opf) != rq->mq_hctx->type)
return NULL;
if (op_is_flush(rq->cmd_flags) != op_is_flush(opf))
return NULL;
plug->cached_rq = rq_list_next(rq);
got_it:
rq->cmd_flags = opf;
INIT_LIST_HEAD(&rq->queuelist);
return rq;
}
struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf,
blk_mq_req_flags_t flags) blk_mq_req_flags_t flags)
{ {
struct request *rq;
rq = blk_mq_alloc_cached_request(q, opf, flags);
if (!rq) {
struct blk_mq_alloc_data data = { struct blk_mq_alloc_data data = {
.q = q, .q = q,
.flags = flags, .flags = flags,
.cmd_flags = opf, .cmd_flags = opf,
.nr_tags = 1, .nr_tags = 1,
}; };
struct request *rq;
int ret; int ret;
ret = blk_queue_enter(q, flags); ret = blk_queue_enter(q, flags);
...@@ -529,6 +590,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf, ...@@ -529,6 +590,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, blk_opf_t opf,
rq = __blk_mq_alloc_requests(&data); rq = __blk_mq_alloc_requests(&data);
if (!rq) if (!rq)
goto out_queue_exit; goto out_queue_exit;
}
rq->__data_len = 0; rq->__data_len = 0;
rq->__sector = (sector_t) -1; rq->__sector = (sector_t) -1;
rq->bio = rq->biotail = NULL; rq->bio = rq->biotail = NULL;
...@@ -761,8 +823,10 @@ static void blk_complete_request(struct request *req) ...@@ -761,8 +823,10 @@ static void blk_complete_request(struct request *req)
* can find how many bytes remain in the request * can find how many bytes remain in the request
* later. * later.
*/ */
if (!req->end_io) {
req->bio = NULL; req->bio = NULL;
req->__data_len = 0; req->__data_len = 0;
}
} }
/** /**
...@@ -939,7 +1003,8 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error) ...@@ -939,7 +1003,8 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
if (rq->end_io) { if (rq->end_io) {
rq_qos_done(rq->q, rq); rq_qos_done(rq->q, rq);
rq->end_io(rq, error); if (rq->end_io(rq, error) == RQ_END_IO_FREE)
blk_mq_free_request(rq);
} else { } else {
blk_mq_free_request(rq); blk_mq_free_request(rq);
} }
...@@ -992,6 +1057,13 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob) ...@@ -992,6 +1057,13 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob)
rq_qos_done(rq->q, rq); rq_qos_done(rq->q, rq);
/*
* If end_io handler returns NONE, then it still has
* ownership of the request.
*/
if (rq->end_io && rq->end_io(rq, 0) == RQ_END_IO_NONE)
continue;
WRITE_ONCE(rq->state, MQ_RQ_IDLE); WRITE_ONCE(rq->state, MQ_RQ_IDLE);
if (!req_ref_put_and_test(rq)) if (!req_ref_put_and_test(rq))
continue; continue;
...@@ -1233,12 +1305,13 @@ struct blk_rq_wait { ...@@ -1233,12 +1305,13 @@ struct blk_rq_wait {
blk_status_t ret; blk_status_t ret;
}; };
static void blk_end_sync_rq(struct request *rq, blk_status_t ret) static enum rq_end_io_ret blk_end_sync_rq(struct request *rq, blk_status_t ret)
{ {
struct blk_rq_wait *wait = rq->end_io_data; struct blk_rq_wait *wait = rq->end_io_data;
wait->ret = ret; wait->ret = ret;
complete(&wait->done); complete(&wait->done);
return RQ_END_IO_NONE;
} }
bool blk_rq_is_poll(struct request *rq) bool blk_rq_is_poll(struct request *rq)
...@@ -1472,10 +1545,12 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next) ...@@ -1472,10 +1545,12 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
void blk_mq_put_rq_ref(struct request *rq) void blk_mq_put_rq_ref(struct request *rq)
{ {
if (is_flush_rq(rq)) if (is_flush_rq(rq)) {
rq->end_io(rq, 0); if (rq->end_io(rq, 0) == RQ_END_IO_FREE)
else if (req_ref_put_and_test(rq)) blk_mq_free_request(rq);
} else if (req_ref_put_and_test(rq)) {
__blk_mq_free_request(rq); __blk_mq_free_request(rq);
}
} }
static bool blk_mq_check_expired(struct request *rq, void *priv) static bool blk_mq_check_expired(struct request *rq, void *priv)
......
...@@ -292,11 +292,13 @@ static void dm_kill_unmapped_request(struct request *rq, blk_status_t error) ...@@ -292,11 +292,13 @@ static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
dm_complete_request(rq, error); dm_complete_request(rq, error);
} }
static void end_clone_request(struct request *clone, blk_status_t error) static enum rq_end_io_ret end_clone_request(struct request *clone,
blk_status_t error)
{ {
struct dm_rq_target_io *tio = clone->end_io_data; struct dm_rq_target_io *tio = clone->end_io_data;
dm_complete_request(tio->orig, error); dm_complete_request(tio->orig, error);
return RQ_END_IO_NONE;
} }
static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
......
...@@ -1172,7 +1172,8 @@ static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl) ...@@ -1172,7 +1172,8 @@ static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ / 2); queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ / 2);
} }
static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
blk_status_t status)
{ {
struct nvme_ctrl *ctrl = rq->end_io_data; struct nvme_ctrl *ctrl = rq->end_io_data;
unsigned long flags; unsigned long flags;
...@@ -1184,7 +1185,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) ...@@ -1184,7 +1185,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
dev_err(ctrl->device, dev_err(ctrl->device,
"failed nvme_keep_alive_end_io error=%d\n", "failed nvme_keep_alive_end_io error=%d\n",
status); status);
return; return RQ_END_IO_NONE;
} }
ctrl->comp_seen = false; ctrl->comp_seen = false;
...@@ -1195,6 +1196,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) ...@@ -1195,6 +1196,7 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
spin_unlock_irqrestore(&ctrl->lock, flags); spin_unlock_irqrestore(&ctrl->lock, flags);
if (startka) if (startka)
nvme_queue_keep_alive_work(ctrl); nvme_queue_keep_alive_work(ctrl);
return RQ_END_IO_NONE;
} }
static void nvme_keep_alive_work(struct work_struct *work) static void nvme_keep_alive_work(struct work_struct *work)
......
This diff is collapsed.
...@@ -1268,7 +1268,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid) ...@@ -1268,7 +1268,7 @@ static int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid); return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
} }
static void abort_endio(struct request *req, blk_status_t error) static enum rq_end_io_ret abort_endio(struct request *req, blk_status_t error)
{ {
struct nvme_queue *nvmeq = req->mq_hctx->driver_data; struct nvme_queue *nvmeq = req->mq_hctx->driver_data;
...@@ -1276,6 +1276,7 @@ static void abort_endio(struct request *req, blk_status_t error) ...@@ -1276,6 +1276,7 @@ static void abort_endio(struct request *req, blk_status_t error)
"Abort status: 0x%x", nvme_req(req)->status); "Abort status: 0x%x", nvme_req(req)->status);
atomic_inc(&nvmeq->dev->ctrl.abort_limit); atomic_inc(&nvmeq->dev->ctrl.abort_limit);
blk_mq_free_request(req); blk_mq_free_request(req);
return RQ_END_IO_NONE;
} }
static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
...@@ -2447,22 +2448,25 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) ...@@ -2447,22 +2448,25 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
return result; return result;
} }
static void nvme_del_queue_end(struct request *req, blk_status_t error) static enum rq_end_io_ret nvme_del_queue_end(struct request *req,
blk_status_t error)
{ {
struct nvme_queue *nvmeq = req->end_io_data; struct nvme_queue *nvmeq = req->end_io_data;
blk_mq_free_request(req); blk_mq_free_request(req);
complete(&nvmeq->delete_done); complete(&nvmeq->delete_done);
return RQ_END_IO_NONE;
} }
static void nvme_del_cq_end(struct request *req, blk_status_t error) static enum rq_end_io_ret nvme_del_cq_end(struct request *req,
blk_status_t error)
{ {
struct nvme_queue *nvmeq = req->end_io_data; struct nvme_queue *nvmeq = req->end_io_data;
if (error) if (error)
set_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags); set_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags);
nvme_del_queue_end(req, error); return nvme_del_queue_end(req, error);
} }
static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
......
...@@ -245,7 +245,7 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) ...@@ -245,7 +245,7 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w)
nvme_passthru_end(ctrl, effects, req->cmd, status); nvme_passthru_end(ctrl, effects, req->cmd, status);
} }
static void nvmet_passthru_req_done(struct request *rq, static enum rq_end_io_ret nvmet_passthru_req_done(struct request *rq,
blk_status_t blk_status) blk_status_t blk_status)
{ {
struct nvmet_req *req = rq->end_io_data; struct nvmet_req *req = rq->end_io_data;
...@@ -253,6 +253,7 @@ static void nvmet_passthru_req_done(struct request *rq, ...@@ -253,6 +253,7 @@ static void nvmet_passthru_req_done(struct request *rq,
req->cqe->result = nvme_req(rq)->result; req->cqe->result = nvme_req(rq)->result;
nvmet_req_complete(req, nvme_req(rq)->status); nvmet_req_complete(req, nvme_req(rq)->status);
blk_mq_free_request(rq); blk_mq_free_request(rq);
return RQ_END_IO_NONE;
} }
static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
......
...@@ -2004,9 +2004,11 @@ enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *scmd) ...@@ -2004,9 +2004,11 @@ enum scsi_disposition scsi_decide_disposition(struct scsi_cmnd *scmd)
} }
} }
static void eh_lock_door_done(struct request *req, blk_status_t status) static enum rq_end_io_ret eh_lock_door_done(struct request *req,
blk_status_t status)
{ {
blk_mq_free_request(req); blk_mq_free_request(req);
return RQ_END_IO_NONE;
} }
/** /**
......
...@@ -449,25 +449,9 @@ static int sg_io(struct scsi_device *sdev, struct sg_io_hdr *hdr, fmode_t mode) ...@@ -449,25 +449,9 @@ static int sg_io(struct scsi_device *sdev, struct sg_io_hdr *hdr, fmode_t mode)
if (ret < 0) if (ret < 0)
goto out_put_request; goto out_put_request;
ret = 0; ret = blk_rq_map_user_io(rq, NULL, hdr->dxferp, hdr->dxfer_len,
if (hdr->iovec_count && hdr->dxfer_len) { GFP_KERNEL, hdr->iovec_count && hdr->dxfer_len,
struct iov_iter i; hdr->iovec_count, 0, rq_data_dir(rq));
struct iovec *iov = NULL;
ret = import_iovec(rq_data_dir(rq), hdr->dxferp,
hdr->iovec_count, 0, &iov, &i);
if (ret < 0)
goto out_put_request;
/* SG_IO howto says that the shorter of the two wins */
iov_iter_truncate(&i, hdr->dxfer_len);
ret = blk_rq_map_user_iov(rq->q, rq, NULL, &i, GFP_KERNEL);
kfree(iov);
} else if (hdr->dxfer_len)
ret = blk_rq_map_user(rq->q, rq, NULL, hdr->dxferp,
hdr->dxfer_len, GFP_KERNEL);
if (ret) if (ret)
goto out_put_request; goto out_put_request;
......
...@@ -177,7 +177,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */ ...@@ -177,7 +177,7 @@ typedef struct sg_device { /* holds the state of each scsi generic device */
} Sg_device; } Sg_device;
/* tasklet or soft irq callback */ /* tasklet or soft irq callback */
static void sg_rq_end_io(struct request *rq, blk_status_t status); static enum rq_end_io_ret sg_rq_end_io(struct request *rq, blk_status_t status);
static int sg_start_req(Sg_request *srp, unsigned char *cmd); static int sg_start_req(Sg_request *srp, unsigned char *cmd);
static int sg_finish_rem_req(Sg_request * srp); static int sg_finish_rem_req(Sg_request * srp);
static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size); static int sg_build_indirect(Sg_scatter_hold * schp, Sg_fd * sfp, int buff_size);
...@@ -1311,7 +1311,7 @@ sg_rq_end_io_usercontext(struct work_struct *work) ...@@ -1311,7 +1311,7 @@ sg_rq_end_io_usercontext(struct work_struct *work)
* This function is a "bottom half" handler that is called by the mid * This function is a "bottom half" handler that is called by the mid
* level when a command is completed (or has failed). * level when a command is completed (or has failed).
*/ */
static void static enum rq_end_io_ret
sg_rq_end_io(struct request *rq, blk_status_t status) sg_rq_end_io(struct request *rq, blk_status_t status)
{ {
struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(rq); struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(rq);
...@@ -1324,11 +1324,11 @@ sg_rq_end_io(struct request *rq, blk_status_t status) ...@@ -1324,11 +1324,11 @@ sg_rq_end_io(struct request *rq, blk_status_t status)
int result, resid, done = 1; int result, resid, done = 1;
if (WARN_ON(srp->done != 0)) if (WARN_ON(srp->done != 0))
return; return RQ_END_IO_NONE;
sfp = srp->parentfp; sfp = srp->parentfp;
if (WARN_ON(sfp == NULL)) if (WARN_ON(sfp == NULL))
return; return RQ_END_IO_NONE;
sdp = sfp->parentdp; sdp = sfp->parentdp;
if (unlikely(atomic_read(&sdp->detaching))) if (unlikely(atomic_read(&sdp->detaching)))
...@@ -1406,6 +1406,7 @@ sg_rq_end_io(struct request *rq, blk_status_t status) ...@@ -1406,6 +1406,7 @@ sg_rq_end_io(struct request *rq, blk_status_t status)
INIT_WORK(&srp->ew.work, sg_rq_end_io_usercontext); INIT_WORK(&srp->ew.work, sg_rq_end_io_usercontext);
schedule_work(&srp->ew.work); schedule_work(&srp->ew.work);
} }
return RQ_END_IO_NONE;
} }
static const struct file_operations sg_fops = { static const struct file_operations sg_fops = {
...@@ -1803,26 +1804,8 @@ sg_start_req(Sg_request *srp, unsigned char *cmd) ...@@ -1803,26 +1804,8 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
md->from_user = 0; md->from_user = 0;
} }
if (iov_count) { res = blk_rq_map_user_io(rq, md, hp->dxferp, hp->dxfer_len,
struct iovec *iov = NULL; GFP_ATOMIC, iov_count, iov_count, 1, rw);
struct iov_iter i;
res = import_iovec(rw, hp->dxferp, iov_count, 0, &iov, &i);
if (res < 0)
return res;
iov_iter_truncate(&i, hp->dxfer_len);
if (!iov_iter_count(&i)) {
kfree(iov);
return -EINVAL;
}
res = blk_rq_map_user_iov(q, rq, md, &i, GFP_ATOMIC);
kfree(iov);
} else
res = blk_rq_map_user(q, rq, md, hp->dxferp,
hp->dxfer_len, GFP_ATOMIC);
if (!res) { if (!res) {
srp->bio = rq->bio; srp->bio = rq->bio;
......
...@@ -512,7 +512,8 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req) ...@@ -512,7 +512,8 @@ static void st_do_stats(struct scsi_tape *STp, struct request *req)
atomic64_dec(&STp->stats->in_flight); atomic64_dec(&STp->stats->in_flight);
} }
static void st_scsi_execute_end(struct request *req, blk_status_t status) static enum rq_end_io_ret st_scsi_execute_end(struct request *req,
blk_status_t status)
{ {
struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
struct st_request *SRpnt = req->end_io_data; struct st_request *SRpnt = req->end_io_data;
...@@ -532,6 +533,7 @@ static void st_scsi_execute_end(struct request *req, blk_status_t status) ...@@ -532,6 +533,7 @@ static void st_scsi_execute_end(struct request *req, blk_status_t status)
blk_rq_unmap_user(tmp); blk_rq_unmap_user(tmp);
blk_mq_free_request(req); blk_mq_free_request(req);
return RQ_END_IO_NONE;
} }
static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd, static int st_scsi_execute(struct st_request *SRpnt, const unsigned char *cmd,
......
...@@ -39,7 +39,7 @@ static inline struct pscsi_dev_virt *PSCSI_DEV(struct se_device *dev) ...@@ -39,7 +39,7 @@ static inline struct pscsi_dev_virt *PSCSI_DEV(struct se_device *dev)
} }
static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd); static sense_reason_t pscsi_execute_cmd(struct se_cmd *cmd);
static void pscsi_req_done(struct request *, blk_status_t); static enum rq_end_io_ret pscsi_req_done(struct request *, blk_status_t);
/* pscsi_attach_hba(): /* pscsi_attach_hba():
* *
...@@ -1002,7 +1002,8 @@ static sector_t pscsi_get_blocks(struct se_device *dev) ...@@ -1002,7 +1002,8 @@ static sector_t pscsi_get_blocks(struct se_device *dev)
return 0; return 0;
} }
static void pscsi_req_done(struct request *req, blk_status_t status) static enum rq_end_io_ret pscsi_req_done(struct request *req,
blk_status_t status)
{ {
struct se_cmd *cmd = req->end_io_data; struct se_cmd *cmd = req->end_io_data;
struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req); struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
...@@ -1029,6 +1030,7 @@ static void pscsi_req_done(struct request *req, blk_status_t status) ...@@ -1029,6 +1030,7 @@ static void pscsi_req_done(struct request *req, blk_status_t status)
} }
blk_mq_free_request(req); blk_mq_free_request(req);
return RQ_END_IO_NONE;
} }
static const struct target_backend_ops pscsi_ops = { static const struct target_backend_ops pscsi_ops = {
......
...@@ -613,14 +613,17 @@ static void ufshpb_activate_subregion(struct ufshpb_lu *hpb, ...@@ -613,14 +613,17 @@ static void ufshpb_activate_subregion(struct ufshpb_lu *hpb,
srgn->srgn_state = HPB_SRGN_VALID; srgn->srgn_state = HPB_SRGN_VALID;
} }
static void ufshpb_umap_req_compl_fn(struct request *req, blk_status_t error) static enum rq_end_io_ret ufshpb_umap_req_compl_fn(struct request *req,
blk_status_t error)
{ {
struct ufshpb_req *umap_req = (struct ufshpb_req *)req->end_io_data; struct ufshpb_req *umap_req = (struct ufshpb_req *)req->end_io_data;
ufshpb_put_req(umap_req->hpb, umap_req); ufshpb_put_req(umap_req->hpb, umap_req);
return RQ_END_IO_NONE;
} }
static void ufshpb_map_req_compl_fn(struct request *req, blk_status_t error) static enum rq_end_io_ret ufshpb_map_req_compl_fn(struct request *req,
blk_status_t error)
{ {
struct ufshpb_req *map_req = (struct ufshpb_req *) req->end_io_data; struct ufshpb_req *map_req = (struct ufshpb_req *) req->end_io_data;
struct ufshpb_lu *hpb = map_req->hpb; struct ufshpb_lu *hpb = map_req->hpb;
...@@ -636,6 +639,7 @@ static void ufshpb_map_req_compl_fn(struct request *req, blk_status_t error) ...@@ -636,6 +639,7 @@ static void ufshpb_map_req_compl_fn(struct request *req, blk_status_t error)
spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); spin_unlock_irqrestore(&hpb->rgn_state_lock, flags);
ufshpb_put_map_req(map_req->hpb, map_req); ufshpb_put_map_req(map_req->hpb, map_req);
return RQ_END_IO_NONE;
} }
static void ufshpb_set_unmap_cmd(unsigned char *cdb, struct ufshpb_region *rgn) static void ufshpb_set_unmap_cmd(unsigned char *cdb, struct ufshpb_region *rgn)
......
...@@ -14,7 +14,12 @@ struct blk_flush_queue; ...@@ -14,7 +14,12 @@ struct blk_flush_queue;
#define BLKDEV_MIN_RQ 4 #define BLKDEV_MIN_RQ 4
#define BLKDEV_DEFAULT_RQ 128 #define BLKDEV_DEFAULT_RQ 128
typedef void (rq_end_io_fn)(struct request *, blk_status_t); enum rq_end_io_ret {
RQ_END_IO_NONE,
RQ_END_IO_FREE,
};
typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t);
/* /*
* request flags */ * request flags */
...@@ -848,8 +853,9 @@ static inline bool blk_mq_add_to_batch(struct request *req, ...@@ -848,8 +853,9 @@ static inline bool blk_mq_add_to_batch(struct request *req,
struct io_comp_batch *iob, int ioerror, struct io_comp_batch *iob, int ioerror,
void (*complete)(struct io_comp_batch *)) void (*complete)(struct io_comp_batch *))
{ {
if (!iob || (req->rq_flags & RQF_ELV) || req->end_io || ioerror) if (!iob || (req->rq_flags & RQF_ELV) || ioerror)
return false; return false;
if (!iob->complete) if (!iob->complete)
iob->complete = complete; iob->complete = complete;
else if (iob->complete != complete) else if (iob->complete != complete)
...@@ -979,6 +985,8 @@ struct rq_map_data { ...@@ -979,6 +985,8 @@ struct rq_map_data {
int blk_rq_map_user(struct request_queue *, struct request *, int blk_rq_map_user(struct request_queue *, struct request *,
struct rq_map_data *, void __user *, unsigned long, gfp_t); struct rq_map_data *, void __user *, unsigned long, gfp_t);
int blk_rq_map_user_io(struct request *, struct rq_map_data *,
void __user *, unsigned long, gfp_t, bool, int, bool, int);
int blk_rq_map_user_iov(struct request_queue *, struct request *, int blk_rq_map_user_iov(struct request_queue *, struct request *,
struct rq_map_data *, const struct iov_iter *, gfp_t); struct rq_map_data *, const struct iov_iter *, gfp_t);
int blk_rq_unmap_user(struct bio *); int blk_rq_unmap_user(struct bio *);
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/xarray.h> #include <linux/xarray.h>
#include <uapi/linux/io_uring.h>
enum io_uring_cmd_flags { enum io_uring_cmd_flags {
IO_URING_F_COMPLETE_DEFER = 1, IO_URING_F_COMPLETE_DEFER = 1,
...@@ -27,11 +28,13 @@ struct io_uring_cmd { ...@@ -27,11 +28,13 @@ struct io_uring_cmd {
void *cookie; void *cookie;
}; };
u32 cmd_op; u32 cmd_op;
u32 pad; u32 flags;
u8 pdu[32]; /* available inline for free use */ u8 pdu[32]; /* available inline for free use */
}; };
#if defined(CONFIG_IO_URING) #if defined(CONFIG_IO_URING)
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd);
void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2); void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, ssize_t res2);
void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd,
void (*task_work_cb)(struct io_uring_cmd *)); void (*task_work_cb)(struct io_uring_cmd *));
...@@ -59,6 +62,11 @@ static inline void io_uring_free(struct task_struct *tsk) ...@@ -59,6 +62,11 @@ static inline void io_uring_free(struct task_struct *tsk)
__io_uring_free(tsk); __io_uring_free(tsk);
} }
#else #else
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd)
{
return -EOPNOTSUPP;
}
static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret, static inline void io_uring_cmd_done(struct io_uring_cmd *cmd, ssize_t ret,
ssize_t ret2) ssize_t ret2)
{ {
......
...@@ -56,6 +56,7 @@ struct io_uring_sqe { ...@@ -56,6 +56,7 @@ struct io_uring_sqe {
__u32 hardlink_flags; __u32 hardlink_flags;
__u32 xattr_flags; __u32 xattr_flags;
__u32 msg_ring_flags; __u32 msg_ring_flags;
__u32 uring_cmd_flags;
}; };
__u64 user_data; /* data to be passed back at completion time */ __u64 user_data; /* data to be passed back at completion time */
/* pack this to avoid bogus arm OABI complaints */ /* pack this to avoid bogus arm OABI complaints */
...@@ -219,6 +220,14 @@ enum io_uring_op { ...@@ -219,6 +220,14 @@ enum io_uring_op {
IORING_OP_LAST, IORING_OP_LAST,
}; };
/*
* sqe->uring_cmd_flags
* IORING_URING_CMD_FIXED use registered buffer; pass thig flag
* along with setting sqe->buf_index.
*/
#define IORING_URING_CMD_FIXED (1U << 0)
/* /*
* sqe->fsync_flags * sqe->fsync_flags
*/ */
......
...@@ -4,10 +4,12 @@ ...@@ -4,10 +4,12 @@
#include <linux/file.h> #include <linux/file.h>
#include <linux/io_uring.h> #include <linux/io_uring.h>
#include <linux/security.h> #include <linux/security.h>
#include <linux/nospec.h>
#include <uapi/linux/io_uring.h> #include <uapi/linux/io_uring.h>
#include "io_uring.h" #include "io_uring.h"
#include "rsrc.h"
#include "uring_cmd.h" #include "uring_cmd.h"
static void io_uring_cmd_work(struct io_kiocb *req, bool *locked) static void io_uring_cmd_work(struct io_kiocb *req, bool *locked)
...@@ -76,8 +78,24 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) ...@@ -76,8 +78,24 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{ {
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd); struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
if (sqe->rw_flags || sqe->__pad1) if (sqe->__pad1)
return -EINVAL; return -EINVAL;
ioucmd->flags = READ_ONCE(sqe->uring_cmd_flags);
if (ioucmd->flags & ~IORING_URING_CMD_FIXED)
return -EINVAL;
if (ioucmd->flags & IORING_URING_CMD_FIXED) {
struct io_ring_ctx *ctx = req->ctx;
u16 index;
req->buf_index = READ_ONCE(sqe->buf_index);
if (unlikely(req->buf_index >= ctx->nr_user_bufs))
return -EFAULT;
index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
req->imu = ctx->user_bufs[index];
io_req_set_rsrc_node(req, ctx, 0);
}
ioucmd->cmd = sqe->cmd; ioucmd->cmd = sqe->cmd;
ioucmd->cmd_op = READ_ONCE(sqe->cmd_op); ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
return 0; return 0;
...@@ -129,3 +147,12 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) ...@@ -129,3 +147,12 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
return IOU_ISSUE_SKIP_COMPLETE; return IOU_ISSUE_SKIP_COMPLETE;
} }
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
struct iov_iter *iter, void *ioucmd)
{
struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
return io_import_fixed(rw, iter, req->imu, ubuf, len);
}
EXPORT_SYMBOL_GPL(io_uring_cmd_import_fixed);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment