Commit ca60ad6a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-5.6-2020-02-14' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:
 "Here's a set of fixes for io_uring:

   - Various fixes with cleanups from Pavel, fixing corner cases where
     we're not correctly dealing with iovec cleanup.

   - Clarify that statx/openat/openat2 don't accept fixed files

   - Buffered raw device write EOPTNOTSUPP fix

   - Ensure async workers grab current->fs

   - A few task exit fixes with pending requests that grab the file
     table

   - send/recvmsg async load fix

   - io-wq offline node setup fix

   - CQ overflow flush in poll"

* tag 'io_uring-5.6-2020-02-14' of git://git.kernel.dk/linux-block: (21 commits)
  io_uring: prune request from overflow list on flush
  io-wq: don't call kXalloc_node() with non-online node
  io_uring: retain sockaddr_storage across send/recvmsg async punt
  io_uring: cancel pending async work if task exits
  io-wq: add io_wq_cancel_pid() to cancel based on a specific pid
  io-wq: make io_wqe_cancel_work() take a match handler
  io_uring: fix openat/statx's filename leak
  io_uring: fix double prep iovec leak
  io_uring: fix async close() with f_op->flush()
  io_uring: allow AT_FDCWD for non-file openat/openat2/statx
  io_uring: grab ->fs as part of async preparation
  io-wq: add support for inheriting ->fs
  io_uring: retry raw bdev writes if we hit -EOPNOTSUPP
  io_uring: add cleanup for openat()/statx()
  io_uring: fix iovec leaks
  io_uring: remove unused struct io_async_open
  io_uring: flush overflowed CQ events in the io_uring_poll()
  io_uring: statx/openat/openat2 don't support fixed files
  io_uring: fix deferred req iovec leak
  io_uring: fix 1-bit bitfields to be unsigned
  ...
parents 2019fc96 2ca10259
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/rculist_nulls.h> #include <linux/rculist_nulls.h>
#include <linux/fs_struct.h>
#include "io-wq.h" #include "io-wq.h"
...@@ -59,6 +60,7 @@ struct io_worker { ...@@ -59,6 +60,7 @@ struct io_worker {
const struct cred *cur_creds; const struct cred *cur_creds;
const struct cred *saved_creds; const struct cred *saved_creds;
struct files_struct *restore_files; struct files_struct *restore_files;
struct fs_struct *restore_fs;
}; };
#if BITS_PER_LONG == 64 #if BITS_PER_LONG == 64
...@@ -151,6 +153,9 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker) ...@@ -151,6 +153,9 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
task_unlock(current); task_unlock(current);
} }
if (current->fs != worker->restore_fs)
current->fs = worker->restore_fs;
/* /*
* If we have an active mm, we need to drop the wq lock before unusing * If we have an active mm, we need to drop the wq lock before unusing
* it. If we do, return true and let the caller retry the idle loop. * it. If we do, return true and let the caller retry the idle loop.
...@@ -311,6 +316,7 @@ static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker) ...@@ -311,6 +316,7 @@ static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker)
worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
worker->restore_files = current->files; worker->restore_files = current->files;
worker->restore_fs = current->fs;
io_wqe_inc_running(wqe, worker); io_wqe_inc_running(wqe, worker);
} }
...@@ -481,6 +487,8 @@ static void io_worker_handle_work(struct io_worker *worker) ...@@ -481,6 +487,8 @@ static void io_worker_handle_work(struct io_worker *worker)
current->files = work->files; current->files = work->files;
task_unlock(current); task_unlock(current);
} }
if (work->fs && current->fs != work->fs)
current->fs = work->fs;
if (work->mm != worker->mm) if (work->mm != worker->mm)
io_wq_switch_mm(worker, work); io_wq_switch_mm(worker, work);
if (worker->cur_creds != work->creds) if (worker->cur_creds != work->creds)
...@@ -691,11 +699,16 @@ static int io_wq_manager(void *data) ...@@ -691,11 +699,16 @@ static int io_wq_manager(void *data)
/* create fixed workers */ /* create fixed workers */
refcount_set(&wq->refs, workers_to_create); refcount_set(&wq->refs, workers_to_create);
for_each_node(node) { for_each_node(node) {
if (!node_online(node))
continue;
if (!create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND)) if (!create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
goto err; goto err;
workers_to_create--; workers_to_create--;
} }
while (workers_to_create--)
refcount_dec(&wq->refs);
complete(&wq->done); complete(&wq->done);
while (!kthread_should_stop()) { while (!kthread_should_stop()) {
...@@ -703,6 +716,9 @@ static int io_wq_manager(void *data) ...@@ -703,6 +716,9 @@ static int io_wq_manager(void *data)
struct io_wqe *wqe = wq->wqes[node]; struct io_wqe *wqe = wq->wqes[node];
bool fork_worker[2] = { false, false }; bool fork_worker[2] = { false, false };
if (!node_online(node))
continue;
spin_lock_irq(&wqe->lock); spin_lock_irq(&wqe->lock);
if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND)) if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
fork_worker[IO_WQ_ACCT_BOUND] = true; fork_worker[IO_WQ_ACCT_BOUND] = true;
...@@ -821,7 +837,9 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe, ...@@ -821,7 +837,9 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe,
list_for_each_entry_rcu(worker, &wqe->all_list, all_list) { list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
if (io_worker_get(worker)) { if (io_worker_get(worker)) {
ret = func(worker, data); /* no task if node is/was offline */
if (worker->task)
ret = func(worker, data);
io_worker_release(worker); io_worker_release(worker);
if (ret) if (ret)
break; break;
...@@ -929,17 +947,19 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, ...@@ -929,17 +947,19 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
return ret; return ret;
} }
struct work_match {
bool (*fn)(struct io_wq_work *, void *data);
void *data;
};
static bool io_wq_worker_cancel(struct io_worker *worker, void *data) static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
{ {
struct io_wq_work *work = data; struct work_match *match = data;
unsigned long flags; unsigned long flags;
bool ret = false; bool ret = false;
if (worker->cur_work != work)
return false;
spin_lock_irqsave(&worker->lock, flags); spin_lock_irqsave(&worker->lock, flags);
if (worker->cur_work == work && if (match->fn(worker->cur_work, match->data) &&
!(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL)) { !(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL)) {
send_sig(SIGINT, worker->task, 1); send_sig(SIGINT, worker->task, 1);
ret = true; ret = true;
...@@ -950,15 +970,13 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data) ...@@ -950,15 +970,13 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
} }
static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe, static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
struct io_wq_work *cwork) struct work_match *match)
{ {
struct io_wq_work_node *node, *prev; struct io_wq_work_node *node, *prev;
struct io_wq_work *work; struct io_wq_work *work;
unsigned long flags; unsigned long flags;
bool found = false; bool found = false;
cwork->flags |= IO_WQ_WORK_CANCEL;
/* /*
* First check pending list, if we're lucky we can just remove it * First check pending list, if we're lucky we can just remove it
* from there. CANCEL_OK means that the work is returned as-new, * from there. CANCEL_OK means that the work is returned as-new,
...@@ -968,7 +986,7 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe, ...@@ -968,7 +986,7 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
wq_list_for_each(node, prev, &wqe->work_list) { wq_list_for_each(node, prev, &wqe->work_list) {
work = container_of(node, struct io_wq_work, list); work = container_of(node, struct io_wq_work, list);
if (work == cwork) { if (match->fn(work, match->data)) {
wq_node_del(&wqe->work_list, node, prev); wq_node_del(&wqe->work_list, node, prev);
found = true; found = true;
break; break;
...@@ -989,20 +1007,60 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe, ...@@ -989,20 +1007,60 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
* completion will run normally in this case. * completion will run normally in this case.
*/ */
rcu_read_lock(); rcu_read_lock();
found = io_wq_for_each_worker(wqe, io_wq_worker_cancel, cwork); found = io_wq_for_each_worker(wqe, io_wq_worker_cancel, match);
rcu_read_unlock(); rcu_read_unlock();
return found ? IO_WQ_CANCEL_RUNNING : IO_WQ_CANCEL_NOTFOUND; return found ? IO_WQ_CANCEL_RUNNING : IO_WQ_CANCEL_NOTFOUND;
} }
static bool io_wq_work_match(struct io_wq_work *work, void *data)
{
return work == data;
}
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork) enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork)
{ {
struct work_match match = {
.fn = io_wq_work_match,
.data = cwork
};
enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND; enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
int node; int node;
cwork->flags |= IO_WQ_WORK_CANCEL;
for_each_node(node) { for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node]; struct io_wqe *wqe = wq->wqes[node];
ret = io_wqe_cancel_work(wqe, cwork); ret = io_wqe_cancel_work(wqe, &match);
if (ret != IO_WQ_CANCEL_NOTFOUND)
break;
}
return ret;
}
static bool io_wq_pid_match(struct io_wq_work *work, void *data)
{
pid_t pid = (pid_t) (unsigned long) data;
if (work)
return work->task_pid == pid;
return false;
}
enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid)
{
struct work_match match = {
.fn = io_wq_pid_match,
.data = (void *) (unsigned long) pid
};
enum io_wq_cancel ret = IO_WQ_CANCEL_NOTFOUND;
int node;
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
ret = io_wqe_cancel_work(wqe, &match);
if (ret != IO_WQ_CANCEL_NOTFOUND) if (ret != IO_WQ_CANCEL_NOTFOUND)
break; break;
} }
...@@ -1036,6 +1094,8 @@ void io_wq_flush(struct io_wq *wq) ...@@ -1036,6 +1094,8 @@ void io_wq_flush(struct io_wq *wq)
for_each_node(node) { for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node]; struct io_wqe *wqe = wq->wqes[node];
if (!node_online(node))
continue;
init_completion(&data.done); init_completion(&data.done);
INIT_IO_WORK(&data.work, io_wq_flush_func); INIT_IO_WORK(&data.work, io_wq_flush_func);
data.work.flags |= IO_WQ_WORK_INTERNAL; data.work.flags |= IO_WQ_WORK_INTERNAL;
...@@ -1067,12 +1127,15 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ...@@ -1067,12 +1127,15 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
for_each_node(node) { for_each_node(node) {
struct io_wqe *wqe; struct io_wqe *wqe;
int alloc_node = node;
wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, node); if (!node_online(alloc_node))
alloc_node = NUMA_NO_NODE;
wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node);
if (!wqe) if (!wqe)
goto err; goto err;
wq->wqes[node] = wqe; wq->wqes[node] = wqe;
wqe->node = node; wqe->node = alloc_node;
wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded; wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0); atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
if (wq->user) { if (wq->user) {
...@@ -1080,7 +1143,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ...@@ -1080,7 +1143,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
task_rlimit(current, RLIMIT_NPROC); task_rlimit(current, RLIMIT_NPROC);
} }
atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0); atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
wqe->node = node;
wqe->wq = wq; wqe->wq = wq;
spin_lock_init(&wqe->lock); spin_lock_init(&wqe->lock);
INIT_WQ_LIST(&wqe->work_list); INIT_WQ_LIST(&wqe->work_list);
......
...@@ -74,17 +74,20 @@ struct io_wq_work { ...@@ -74,17 +74,20 @@ struct io_wq_work {
struct files_struct *files; struct files_struct *files;
struct mm_struct *mm; struct mm_struct *mm;
const struct cred *creds; const struct cred *creds;
struct fs_struct *fs;
unsigned flags; unsigned flags;
pid_t task_pid;
}; };
#define INIT_IO_WORK(work, _func) \ #define INIT_IO_WORK(work, _func) \
do { \ do { \
(work)->list.next = NULL; \ (work)->list.next = NULL; \
(work)->func = _func; \ (work)->func = _func; \
(work)->flags = 0; \
(work)->files = NULL; \ (work)->files = NULL; \
(work)->mm = NULL; \ (work)->mm = NULL; \
(work)->creds = NULL; \ (work)->creds = NULL; \
(work)->fs = NULL; \
(work)->flags = 0; \
} while (0) \ } while (0) \
typedef void (get_work_fn)(struct io_wq_work *); typedef void (get_work_fn)(struct io_wq_work *);
...@@ -107,6 +110,7 @@ void io_wq_flush(struct io_wq *wq); ...@@ -107,6 +110,7 @@ void io_wq_flush(struct io_wq *wq);
void io_wq_cancel_all(struct io_wq *wq); void io_wq_cancel_all(struct io_wq *wq);
enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork); enum io_wq_cancel io_wq_cancel_work(struct io_wq *wq, struct io_wq_work *cwork);
enum io_wq_cancel io_wq_cancel_pid(struct io_wq *wq, pid_t pid);
typedef bool (work_cancel_fn)(struct io_wq_work *, void *); typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment