Commit 38a2ca2c authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-20190420' of git://git.kernel.dk/linux-block

Pull block fixes from Jens Axboe:
 "A set of small fixes that should go into this series. This contains:

   - Removal of unused queue member (Hou)

   - Overflow bvec fix (Ming)

   - Various little io_uring tweaks (me)
       - kthread parking
       - Only call cpu_possible() for verified CPU
       - Drop unused 'file' argument to io_file_put()
       - io_uring_enter vs io_uring_register deadlock fix
       - CQ overflow fix

   - BFQ internal depth update fix (me)"

* tag 'for-linus-20190420' of git://git.kernel.dk/linux-block:
  block: make sure that bvec length can't be overflow
  block: kill all_q_node in request_queue
  io_uring: fix CQ overflow condition
  io_uring: fix possible deadlock between io_uring_{enter,register}
  io_uring: drop io_file_put() 'file' argument
  bfq: update internal depth state when queue depth changes
  io_uring: only test SQPOLL cpu after we've verified it
  io_uring: park SQPOLL thread if it's percpu
parents 34396bdf 6bedf00e
...@@ -5396,7 +5396,7 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd, ...@@ -5396,7 +5396,7 @@ static unsigned int bfq_update_depths(struct bfq_data *bfqd,
return min_shallow; return min_shallow;
} }
static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index) static void bfq_depth_updated(struct blk_mq_hw_ctx *hctx)
{ {
struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; struct bfq_data *bfqd = hctx->queue->elevator->elevator_data;
struct blk_mq_tags *tags = hctx->sched_tags; struct blk_mq_tags *tags = hctx->sched_tags;
...@@ -5404,6 +5404,11 @@ static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index) ...@@ -5404,6 +5404,11 @@ static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags); min_shallow = bfq_update_depths(bfqd, &tags->bitmap_tags);
sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow); sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, min_shallow);
}
static int bfq_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int index)
{
bfq_depth_updated(hctx);
return 0; return 0;
} }
...@@ -5826,6 +5831,7 @@ static struct elevator_type iosched_bfq_mq = { ...@@ -5826,6 +5831,7 @@ static struct elevator_type iosched_bfq_mq = {
.requests_merged = bfq_requests_merged, .requests_merged = bfq_requests_merged,
.request_merged = bfq_request_merged, .request_merged = bfq_request_merged,
.has_work = bfq_has_work, .has_work = bfq_has_work,
.depth_updated = bfq_depth_updated,
.init_hctx = bfq_init_hctx, .init_hctx = bfq_init_hctx,
.init_sched = bfq_init_queue, .init_sched = bfq_init_queue,
.exit_sched = bfq_exit_queue, .exit_sched = bfq_exit_queue,
......
...@@ -3135,6 +3135,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) ...@@ -3135,6 +3135,8 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
} }
if (ret) if (ret)
break; break;
if (q->elevator && q->elevator->type->ops.depth_updated)
q->elevator->type->ops.depth_updated(hctx);
} }
if (!ret) if (!ret)
......
...@@ -338,7 +338,7 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx) ...@@ -338,7 +338,7 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx)
tail = ctx->cached_cq_tail; tail = ctx->cached_cq_tail;
/* See comment at the top of the file */ /* See comment at the top of the file */
smp_rmb(); smp_rmb();
if (tail + 1 == READ_ONCE(ring->r.head)) if (tail - READ_ONCE(ring->r.head) == ring->ring_entries)
return NULL; return NULL;
ctx->cached_cq_tail++; ctx->cached_cq_tail++;
...@@ -682,11 +682,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req) ...@@ -682,11 +682,9 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
list_add_tail(&req->list, &ctx->poll_list); list_add_tail(&req->list, &ctx->poll_list);
} }
static void io_file_put(struct io_submit_state *state, struct file *file) static void io_file_put(struct io_submit_state *state)
{ {
if (!state) { if (state->file) {
fput(file);
} else if (state->file) {
int diff = state->has_refs - state->used_refs; int diff = state->has_refs - state->used_refs;
if (diff) if (diff)
...@@ -711,7 +709,7 @@ static struct file *io_file_get(struct io_submit_state *state, int fd) ...@@ -711,7 +709,7 @@ static struct file *io_file_get(struct io_submit_state *state, int fd)
state->ios_left--; state->ios_left--;
return state->file; return state->file;
} }
io_file_put(state, NULL); io_file_put(state);
} }
state->file = fget_many(fd, state->ios_left); state->file = fget_many(fd, state->ios_left);
if (!state->file) if (!state->file)
...@@ -1671,7 +1669,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, ...@@ -1671,7 +1669,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s,
static void io_submit_state_end(struct io_submit_state *state) static void io_submit_state_end(struct io_submit_state *state)
{ {
blk_finish_plug(&state->plug); blk_finish_plug(&state->plug);
io_file_put(state, NULL); io_file_put(state);
if (state->free_reqs) if (state->free_reqs)
kmem_cache_free_bulk(req_cachep, state->free_reqs, kmem_cache_free_bulk(req_cachep, state->free_reqs,
&state->reqs[state->cur_req]); &state->reqs[state->cur_req]);
...@@ -1920,6 +1918,10 @@ static int io_sq_thread(void *data) ...@@ -1920,6 +1918,10 @@ static int io_sq_thread(void *data)
unuse_mm(cur_mm); unuse_mm(cur_mm);
mmput(cur_mm); mmput(cur_mm);
} }
if (kthread_should_park())
kthread_parkme();
return 0; return 0;
} }
...@@ -2054,6 +2056,7 @@ static void io_sq_thread_stop(struct io_ring_ctx *ctx) ...@@ -2054,6 +2056,7 @@ static void io_sq_thread_stop(struct io_ring_ctx *ctx)
if (ctx->sqo_thread) { if (ctx->sqo_thread) {
ctx->sqo_stop = 1; ctx->sqo_stop = 1;
mb(); mb();
kthread_park(ctx->sqo_thread);
kthread_stop(ctx->sqo_thread); kthread_stop(ctx->sqo_thread);
ctx->sqo_thread = NULL; ctx->sqo_thread = NULL;
} }
...@@ -2236,10 +2239,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, ...@@ -2236,10 +2239,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
mmgrab(current->mm); mmgrab(current->mm);
ctx->sqo_mm = current->mm; ctx->sqo_mm = current->mm;
ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
if (!ctx->sq_thread_idle)
ctx->sq_thread_idle = HZ;
ret = -EINVAL; ret = -EINVAL;
if (!cpu_possible(p->sq_thread_cpu)) if (!cpu_possible(p->sq_thread_cpu))
goto err; goto err;
...@@ -2249,10 +2248,18 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, ...@@ -2249,10 +2248,18 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx,
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
goto err; goto err;
ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
if (!ctx->sq_thread_idle)
ctx->sq_thread_idle = HZ;
if (p->flags & IORING_SETUP_SQ_AFF) { if (p->flags & IORING_SETUP_SQ_AFF) {
int cpu; int cpu;
cpu = array_index_nospec(p->sq_thread_cpu, NR_CPUS); cpu = array_index_nospec(p->sq_thread_cpu, NR_CPUS);
ret = -EINVAL;
if (!cpu_possible(p->sq_thread_cpu))
goto err;
ctx->sqo_thread = kthread_create_on_cpu(io_sq_thread, ctx->sqo_thread = kthread_create_on_cpu(io_sq_thread,
ctx, cpu, ctx, cpu,
"io_uring-sq"); "io_uring-sq");
...@@ -2922,11 +2929,23 @@ SYSCALL_DEFINE2(io_uring_setup, u32, entries, ...@@ -2922,11 +2929,23 @@ SYSCALL_DEFINE2(io_uring_setup, u32, entries,
static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
void __user *arg, unsigned nr_args) void __user *arg, unsigned nr_args)
__releases(ctx->uring_lock)
__acquires(ctx->uring_lock)
{ {
int ret; int ret;
percpu_ref_kill(&ctx->refs); percpu_ref_kill(&ctx->refs);
/*
* Drop uring mutex before waiting for references to exit. If another
* thread is currently inside io_uring_enter() it might need to grab
* the uring_lock to make progress. If we hold it here across the drain
* wait, then we can deadlock. It's safe to drop the mutex here, since
* no new references will come in after we've killed the percpu ref.
*/
mutex_unlock(&ctx->uring_lock);
wait_for_completion(&ctx->ctx_done); wait_for_completion(&ctx->ctx_done);
mutex_lock(&ctx->uring_lock);
switch (opcode) { switch (opcode) {
case IORING_REGISTER_BUFFERS: case IORING_REGISTER_BUFFERS:
......
...@@ -548,7 +548,6 @@ struct request_queue { ...@@ -548,7 +548,6 @@ struct request_queue {
struct rcu_head rcu_head; struct rcu_head rcu_head;
wait_queue_head_t mq_freeze_wq; wait_queue_head_t mq_freeze_wq;
struct percpu_ref q_usage_counter; struct percpu_ref q_usage_counter;
struct list_head all_q_node;
struct blk_mq_tag_set *tag_set; struct blk_mq_tag_set *tag_set;
struct list_head tag_set_list; struct list_head tag_set_list;
......
...@@ -160,8 +160,9 @@ static inline void bvec_advance(const struct bio_vec *bvec, ...@@ -160,8 +160,9 @@ static inline void bvec_advance(const struct bio_vec *bvec,
bv->bv_page = nth_page(bv->bv_page, 1); bv->bv_page = nth_page(bv->bv_page, 1);
bv->bv_offset = 0; bv->bv_offset = 0;
} else { } else {
bv->bv_page = bvec->bv_page; bv->bv_page = bvec_nth_page(bvec->bv_page, bvec->bv_offset /
bv->bv_offset = bvec->bv_offset; PAGE_SIZE);
bv->bv_offset = bvec->bv_offset & ~PAGE_MASK;
} }
bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset, bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset,
bvec->bv_len - iter_all->done); bvec->bv_len - iter_all->done);
......
...@@ -31,6 +31,7 @@ struct elevator_mq_ops { ...@@ -31,6 +31,7 @@ struct elevator_mq_ops {
void (*exit_sched)(struct elevator_queue *); void (*exit_sched)(struct elevator_queue *);
int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int); int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int);
void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int); void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int);
void (*depth_updated)(struct blk_mq_hw_ctx *);
bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *); bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment