Commit ffb8e45c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-20190329' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "Small set of fixes that should go into this series. This contains:

   - compat signal mask fix for io_uring (Arnd)

   - EAGAIN corner case for direct vs buffered writes for io_uring
     (Roman)

   - NVMe pull request from Christoph with various little fixes

   - sbitmap ws_active fix, which caused a perf regression for shared
     tags (me)

   - sbitmap bit ordering fix (Ming)

   - libata on-stack DMA fix (Raymond)"

* tag 'for-linus-20190329' of git://git.kernel.dk/linux-block:
  nvmet: fix error flow during ns enable
  nvmet: fix building bvec from sg list
  nvme-multipath: relax ANA state check
  nvme-tcp: fix an endianess miss-annotation
  libata: fix using DMA buffers on stack
  io_uring: offload write to async worker in case of -EAGAIN
  sbitmap: order READ/WRITE freed instance and setting clear bit
  blk-mq: fix sbitmap ws_active for shared tags
  io_uring: fix big-endian compat signal mask handling
  blk-mq: update comment for blk_mq_hctx_has_pending()
  blk-mq: use blk_mq_put_driver_tag() to put tag
parents 7376e39a 7bca889e
...@@ -220,7 +220,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) ...@@ -220,7 +220,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error)
blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq);
flush_rq->tag = -1; flush_rq->tag = -1;
} else { } else {
blk_mq_put_driver_tag_hctx(hctx, flush_rq); blk_mq_put_driver_tag(flush_rq);
flush_rq->internal_tag = -1; flush_rq->internal_tag = -1;
} }
...@@ -324,7 +324,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error) ...@@ -324,7 +324,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error)
if (q->elevator) { if (q->elevator) {
WARN_ON(rq->tag < 0); WARN_ON(rq->tag < 0);
blk_mq_put_driver_tag_hctx(hctx, rq); blk_mq_put_driver_tag(rq);
} }
/* /*
......
...@@ -59,7 +59,8 @@ static int blk_mq_poll_stats_bkt(const struct request *rq) ...@@ -59,7 +59,8 @@ static int blk_mq_poll_stats_bkt(const struct request *rq)
} }
/* /*
* Check if any of the ctx's have pending work in this hardware queue * Check if any of the ctx, dispatch list or elevator
* have pending work in this hardware queue.
*/ */
static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx)
{ {
...@@ -1071,7 +1072,13 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, ...@@ -1071,7 +1072,13 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait); hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait);
spin_lock(&hctx->dispatch_wait_lock); spin_lock(&hctx->dispatch_wait_lock);
if (!list_empty(&wait->entry)) {
struct sbitmap_queue *sbq;
list_del_init(&wait->entry); list_del_init(&wait->entry);
sbq = &hctx->tags->bitmap_tags;
atomic_dec(&sbq->ws_active);
}
spin_unlock(&hctx->dispatch_wait_lock); spin_unlock(&hctx->dispatch_wait_lock);
blk_mq_run_hw_queue(hctx, true); blk_mq_run_hw_queue(hctx, true);
...@@ -1087,6 +1094,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, ...@@ -1087,6 +1094,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
struct request *rq) struct request *rq)
{ {
struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags;
struct wait_queue_head *wq; struct wait_queue_head *wq;
wait_queue_entry_t *wait; wait_queue_entry_t *wait;
bool ret; bool ret;
...@@ -1109,7 +1117,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, ...@@ -1109,7 +1117,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
if (!list_empty_careful(&wait->entry)) if (!list_empty_careful(&wait->entry))
return false; return false;
wq = &bt_wait_ptr(&hctx->tags->bitmap_tags, hctx)->wait; wq = &bt_wait_ptr(sbq, hctx)->wait;
spin_lock_irq(&wq->lock); spin_lock_irq(&wq->lock);
spin_lock(&hctx->dispatch_wait_lock); spin_lock(&hctx->dispatch_wait_lock);
...@@ -1119,6 +1127,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, ...@@ -1119,6 +1127,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
return false; return false;
} }
atomic_inc(&sbq->ws_active);
wait->flags &= ~WQ_FLAG_EXCLUSIVE; wait->flags &= ~WQ_FLAG_EXCLUSIVE;
__add_wait_queue(wq, wait); __add_wait_queue(wq, wait);
...@@ -1139,6 +1148,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, ...@@ -1139,6 +1148,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx,
* someone else gets the wakeup. * someone else gets the wakeup.
*/ */
list_del_init(&wait->entry); list_del_init(&wait->entry);
atomic_dec(&sbq->ws_active);
spin_unlock(&hctx->dispatch_wait_lock); spin_unlock(&hctx->dispatch_wait_lock);
spin_unlock_irq(&wq->lock); spin_unlock_irq(&wq->lock);
......
...@@ -224,15 +224,6 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, ...@@ -224,15 +224,6 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
} }
} }
static inline void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
if (rq->tag == -1 || rq->internal_tag == -1)
return;
__blk_mq_put_driver_tag(hctx, rq);
}
static inline void blk_mq_put_driver_tag(struct request *rq) static inline void blk_mq_put_driver_tag(struct request *rq)
{ {
if (rq->tag == -1 || rq->internal_tag == -1) if (rq->tag == -1 || rq->internal_tag == -1)
......
...@@ -52,38 +52,52 @@ static int eject_tray(struct ata_device *dev) ...@@ -52,38 +52,52 @@ static int eject_tray(struct ata_device *dev)
/* Per the spec, only slot type and drawer type ODD can be supported */ /* Per the spec, only slot type and drawer type ODD can be supported */
static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev) static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev)
{ {
char buf[16]; char *buf;
unsigned int ret; unsigned int ret;
struct rm_feature_desc *desc = (void *)(buf + 8); struct rm_feature_desc *desc;
struct ata_taskfile tf; struct ata_taskfile tf;
static const char cdb[] = { GPCMD_GET_CONFIGURATION, static const char cdb[] = { GPCMD_GET_CONFIGURATION,
2, /* only 1 feature descriptor requested */ 2, /* only 1 feature descriptor requested */
0, 3, /* 3, removable medium feature */ 0, 3, /* 3, removable medium feature */
0, 0, 0,/* reserved */ 0, 0, 0,/* reserved */
0, sizeof(buf), 0, 16,
0, 0, 0, 0, 0, 0,
}; };
buf = kzalloc(16, GFP_KERNEL);
if (!buf)
return ODD_MECH_TYPE_UNSUPPORTED;
desc = (void *)(buf + 8);
ata_tf_init(dev, &tf); ata_tf_init(dev, &tf);
tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
tf.command = ATA_CMD_PACKET; tf.command = ATA_CMD_PACKET;
tf.protocol = ATAPI_PROT_PIO; tf.protocol = ATAPI_PROT_PIO;
tf.lbam = sizeof(buf); tf.lbam = 16;
ret = ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, ret = ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
buf, sizeof(buf), 0); buf, 16, 0);
if (ret) if (ret) {
kfree(buf);
return ODD_MECH_TYPE_UNSUPPORTED; return ODD_MECH_TYPE_UNSUPPORTED;
}
if (be16_to_cpu(desc->feature_code) != 3) if (be16_to_cpu(desc->feature_code) != 3) {
kfree(buf);
return ODD_MECH_TYPE_UNSUPPORTED; return ODD_MECH_TYPE_UNSUPPORTED;
}
if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1) if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1) {
kfree(buf);
return ODD_MECH_TYPE_SLOT; return ODD_MECH_TYPE_SLOT;
else if (desc->mech_type == 1 && desc->load == 0 && desc->eject == 1) } else if (desc->mech_type == 1 && desc->load == 0 &&
desc->eject == 1) {
kfree(buf);
return ODD_MECH_TYPE_DRAWER; return ODD_MECH_TYPE_DRAWER;
else } else {
kfree(buf);
return ODD_MECH_TYPE_UNSUPPORTED; return ODD_MECH_TYPE_UNSUPPORTED;
}
} }
/* Test if ODD is zero power ready by sense code */ /* Test if ODD is zero power ready by sense code */
......
...@@ -404,15 +404,12 @@ static inline bool nvme_state_is_live(enum nvme_ana_state state) ...@@ -404,15 +404,12 @@ static inline bool nvme_state_is_live(enum nvme_ana_state state)
static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
struct nvme_ns *ns) struct nvme_ns *ns)
{ {
enum nvme_ana_state old;
mutex_lock(&ns->head->lock); mutex_lock(&ns->head->lock);
old = ns->ana_state;
ns->ana_grpid = le32_to_cpu(desc->grpid); ns->ana_grpid = le32_to_cpu(desc->grpid);
ns->ana_state = desc->state; ns->ana_state = desc->state;
clear_bit(NVME_NS_ANA_PENDING, &ns->flags); clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
if (nvme_state_is_live(ns->ana_state) && !nvme_state_is_live(old)) if (nvme_state_is_live(ns->ana_state))
nvme_mpath_set_live(ns); nvme_mpath_set_live(ns);
mutex_unlock(&ns->head->lock); mutex_unlock(&ns->head->lock);
} }
......
...@@ -627,7 +627,7 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, ...@@ -627,7 +627,7 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
return ret; return ret;
} }
static inline void nvme_tcp_end_request(struct request *rq, __le16 status) static inline void nvme_tcp_end_request(struct request *rq, u16 status)
{ {
union nvme_result res = {}; union nvme_result res = {};
......
...@@ -509,7 +509,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns) ...@@ -509,7 +509,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
ret = nvmet_p2pmem_ns_enable(ns); ret = nvmet_p2pmem_ns_enable(ns);
if (ret) if (ret)
goto out_unlock; goto out_dev_disable;
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
nvmet_p2pmem_ns_add_p2p(ctrl, ns); nvmet_p2pmem_ns_add_p2p(ctrl, ns);
...@@ -550,7 +550,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns) ...@@ -550,7 +550,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
out_dev_put: out_dev_put:
list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
out_dev_disable:
nvmet_ns_dev_disable(ns); nvmet_ns_dev_disable(ns);
goto out_unlock; goto out_unlock;
} }
......
...@@ -75,11 +75,11 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) ...@@ -75,11 +75,11 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
return ret; return ret;
} }
static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter) static void nvmet_file_init_bvec(struct bio_vec *bv, struct scatterlist *sg)
{ {
bv->bv_page = sg_page_iter_page(iter); bv->bv_page = sg_page(sg);
bv->bv_offset = iter->sg->offset; bv->bv_offset = sg->offset;
bv->bv_len = PAGE_SIZE - iter->sg->offset; bv->bv_len = sg->length;
} }
static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos,
...@@ -128,14 +128,14 @@ static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2) ...@@ -128,14 +128,14 @@ static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2)
static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags) static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
{ {
ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE); ssize_t nr_bvec = req->sg_cnt;
struct sg_page_iter sg_pg_iter;
unsigned long bv_cnt = 0; unsigned long bv_cnt = 0;
bool is_sync = false; bool is_sync = false;
size_t len = 0, total_len = 0; size_t len = 0, total_len = 0;
ssize_t ret = 0; ssize_t ret = 0;
loff_t pos; loff_t pos;
int i;
struct scatterlist *sg;
if (req->f.mpool_alloc && nr_bvec > NVMET_MAX_MPOOL_BVEC) if (req->f.mpool_alloc && nr_bvec > NVMET_MAX_MPOOL_BVEC)
is_sync = true; is_sync = true;
...@@ -147,8 +147,8 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags) ...@@ -147,8 +147,8 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags)
} }
memset(&req->f.iocb, 0, sizeof(struct kiocb)); memset(&req->f.iocb, 0, sizeof(struct kiocb));
for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) { for_each_sg(req->sg, sg, req->sg_cnt, i) {
nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter); nvmet_file_init_bvec(&req->f.bvec[bv_cnt], sg);
len += req->f.bvec[bv_cnt].bv_len; len += req->f.bvec[bv_cnt].bv_len;
total_len += req->f.bvec[bv_cnt].bv_len; total_len += req->f.bvec[bv_cnt].bv_len;
bv_cnt++; bv_cnt++;
...@@ -225,7 +225,7 @@ static void nvmet_file_submit_buffered_io(struct nvmet_req *req) ...@@ -225,7 +225,7 @@ static void nvmet_file_submit_buffered_io(struct nvmet_req *req)
static void nvmet_file_execute_rw(struct nvmet_req *req) static void nvmet_file_execute_rw(struct nvmet_req *req)
{ {
ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE); ssize_t nr_bvec = req->sg_cnt;
if (!req->sg_cnt || !nr_bvec) { if (!req->sg_cnt || !nr_bvec) {
nvmet_req_complete(req, 0); nvmet_req_complete(req, 0);
......
...@@ -1022,6 +1022,8 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, ...@@ -1022,6 +1022,8 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
ret = rw_verify_area(WRITE, file, &kiocb->ki_pos, iov_count); ret = rw_verify_area(WRITE, file, &kiocb->ki_pos, iov_count);
if (!ret) { if (!ret) {
ssize_t ret2;
/* /*
* Open-code file_start_write here to grab freeze protection, * Open-code file_start_write here to grab freeze protection,
* which will be released by another thread in * which will be released by another thread in
...@@ -1036,7 +1038,19 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, ...@@ -1036,7 +1038,19 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s,
SB_FREEZE_WRITE); SB_FREEZE_WRITE);
} }
kiocb->ki_flags |= IOCB_WRITE; kiocb->ki_flags |= IOCB_WRITE;
io_rw_done(kiocb, call_write_iter(file, kiocb, &iter));
ret2 = call_write_iter(file, kiocb, &iter);
if (!force_nonblock || ret2 != -EAGAIN) {
io_rw_done(kiocb, ret2);
} else {
/*
* If ->needs_lock is true, we're already in async
* context.
*/
if (!s->needs_lock)
io_async_list_note(WRITE, req, iov_count);
ret = -EAGAIN;
}
} }
out_free: out_free:
kfree(iovec); kfree(iovec);
...@@ -1968,7 +1982,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, ...@@ -1968,7 +1982,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
return 0; return 0;
if (sig) { if (sig) {
ret = set_user_sigmask(sig, &ksigmask, &sigsaved, sigsz); #ifdef CONFIG_COMPAT
if (in_compat_syscall())
ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
&ksigmask, &sigsaved, sigsz);
else
#endif
ret = set_user_sigmask(sig, &ksigmask,
&sigsaved, sigsz);
if (ret) if (ret)
return ret; return ret;
} }
......
...@@ -591,6 +591,17 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); ...@@ -591,6 +591,17 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up);
void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
unsigned int cpu) unsigned int cpu)
{ {
/*
* Once the clear bit is set, the bit may be allocated out.
*
* Orders READ/WRITE on the asssociated instance(such as request
* of blk_mq) by this bit for avoiding race with re-allocation,
* and its pair is the memory barrier implied in __sbitmap_get_word.
*
* One invariant is that the clear bit has to be zero when the bit
* is in use.
*/
smp_mb__before_atomic();
sbitmap_deferred_clear_bit(&sbq->sb, nr); sbitmap_deferred_clear_bit(&sbq->sb, nr);
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment