Commit a88c3869 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-6.6-2023-10-06' of git://git.kernel.dk/linux

Pull io_uring fixes from Jens Axboe:

 - syzbot report on a crash on 32-bit arm with highmem, and went digging
   to check for potentially similar issues and found one more (me)

 - Fix a syzbot report with PROVE_LOCKING=y and setting up the ring in a
   disabled state (me)

 - Fix for race with CPU hotplut and io-wq init (Jeff)

* tag 'io_uring-6.6-2023-10-06' of git://git.kernel.dk/linux:
  io-wq: fully initialize wqe before calling cpuhp_state_add_instance_nocalls()
  io_uring: don't allow IORING_SETUP_NO_MMAP rings on highmem pages
  io_uring: ensure io_lockdep_assert_cq_locked() handles disabled rings
  io_uring/kbuf: don't allow registered buffer rings on highmem pages
parents af95dc6f 0f8baa3c
...@@ -1151,9 +1151,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ...@@ -1151,9 +1151,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
wq = kzalloc(sizeof(struct io_wq), GFP_KERNEL); wq = kzalloc(sizeof(struct io_wq), GFP_KERNEL);
if (!wq) if (!wq)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
if (ret)
goto err_wq;
refcount_inc(&data->hash->refs); refcount_inc(&data->hash->refs);
wq->hash = data->hash; wq->hash = data->hash;
...@@ -1186,13 +1183,14 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ...@@ -1186,13 +1183,14 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
wq->task = get_task_struct(data->task); wq->task = get_task_struct(data->task);
atomic_set(&wq->worker_refs, 1); atomic_set(&wq->worker_refs, 1);
init_completion(&wq->worker_done); init_completion(&wq->worker_done);
ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
if (ret)
goto err;
return wq; return wq;
err: err:
io_wq_put_hash(data->hash); io_wq_put_hash(data->hash);
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
free_cpumask_var(wq->cpu_mask); free_cpumask_var(wq->cpu_mask);
err_wq:
kfree(wq); kfree(wq);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
......
...@@ -2686,7 +2686,7 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages, ...@@ -2686,7 +2686,7 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
{ {
struct page **page_array; struct page **page_array;
unsigned int nr_pages; unsigned int nr_pages;
int ret; int ret, i;
*npages = 0; *npages = 0;
...@@ -2716,6 +2716,20 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages, ...@@ -2716,6 +2716,20 @@ static void *__io_uaddr_map(struct page ***pages, unsigned short *npages,
*/ */
if (page_array[0] != page_array[ret - 1]) if (page_array[0] != page_array[ret - 1])
goto err; goto err;
/*
* Can't support mapping user allocated ring memory on 32-bit archs
* where it could potentially reside in highmem. Just fail those with
* -EINVAL, just like we did on kernels that didn't support this
* feature.
*/
for (i = 0; i < nr_pages; i++) {
if (PageHighMem(page_array[i])) {
ret = -EINVAL;
goto err;
}
}
*pages = page_array; *pages = page_array;
*npages = nr_pages; *npages = nr_pages;
return page_to_virt(page_array[0]); return page_to_virt(page_array[0]);
......
...@@ -86,20 +86,33 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx); ...@@ -86,20 +86,33 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx);
bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task,
bool cancel_all); bool cancel_all);
#define io_lockdep_assert_cq_locked(ctx) \ #if defined(CONFIG_PROVE_LOCKING)
do { \ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
lockdep_assert(in_task()); \ {
\ lockdep_assert(in_task());
if (ctx->flags & IORING_SETUP_IOPOLL) { \
lockdep_assert_held(&ctx->uring_lock); \ if (ctx->flags & IORING_SETUP_IOPOLL) {
} else if (!ctx->task_complete) { \ lockdep_assert_held(&ctx->uring_lock);
lockdep_assert_held(&ctx->completion_lock); \ } else if (!ctx->task_complete) {
} else if (ctx->submitter_task->flags & PF_EXITING) { \ lockdep_assert_held(&ctx->completion_lock);
lockdep_assert(current_work()); \ } else if (ctx->submitter_task) {
} else { \ /*
lockdep_assert(current == ctx->submitter_task); \ * ->submitter_task may be NULL and we can still post a CQE,
} \ * if the ring has been setup with IORING_SETUP_R_DISABLED.
} while (0) * Not from an SQE, as those cannot be submitted, but via
* updating tagged resources.
*/
if (ctx->submitter_task->flags & PF_EXITING)
lockdep_assert(current_work());
else
lockdep_assert(current == ctx->submitter_task);
}
}
#else
static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
{
}
#endif
static inline void io_req_task_work_add(struct io_kiocb *req) static inline void io_req_task_work_add(struct io_kiocb *req)
{ {
......
...@@ -477,7 +477,7 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, ...@@ -477,7 +477,7 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
{ {
struct io_uring_buf_ring *br; struct io_uring_buf_ring *br;
struct page **pages; struct page **pages;
int nr_pages; int i, nr_pages;
pages = io_pin_pages(reg->ring_addr, pages = io_pin_pages(reg->ring_addr,
flex_array_size(br, bufs, reg->ring_entries), flex_array_size(br, bufs, reg->ring_entries),
...@@ -485,6 +485,17 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, ...@@ -485,6 +485,17 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
if (IS_ERR(pages)) if (IS_ERR(pages))
return PTR_ERR(pages); return PTR_ERR(pages);
/*
* Apparently some 32-bit boxes (ARM) will return highmem pages,
* which then need to be mapped. We could support that, but it'd
* complicate the code and slowdown the common cases quite a bit.
* So just error out, returning -EINVAL just like we did on kernels
* that didn't support mapped buffer rings.
*/
for (i = 0; i < nr_pages; i++)
if (PageHighMem(pages[i]))
goto error_unpin;
br = page_address(pages[0]); br = page_address(pages[0]);
#ifdef SHM_COLOUR #ifdef SHM_COLOUR
/* /*
...@@ -496,13 +507,8 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, ...@@ -496,13 +507,8 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
* should use IOU_PBUF_RING_MMAP instead, and liburing will handle * should use IOU_PBUF_RING_MMAP instead, and liburing will handle
* this transparently. * this transparently.
*/ */
if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1)) { if ((reg->ring_addr | (unsigned long) br) & (SHM_COLOUR - 1))
int i; goto error_unpin;
for (i = 0; i < nr_pages; i++)
unpin_user_page(pages[i]);
return -EINVAL;
}
#endif #endif
bl->buf_pages = pages; bl->buf_pages = pages;
bl->buf_nr_pages = nr_pages; bl->buf_nr_pages = nr_pages;
...@@ -510,6 +516,11 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, ...@@ -510,6 +516,11 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg,
bl->is_mapped = 1; bl->is_mapped = 1;
bl->is_mmap = 0; bl->is_mmap = 0;
return 0; return 0;
error_unpin:
for (i = 0; i < nr_pages; i++)
unpin_user_page(pages[i]);
kvfree(pages);
return -EINVAL;
} }
static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg, static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment