Commit af004187 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'io_uring-5.10-2020-10-24' of git://git.kernel.dk/linux-block

Pull io_uring fixes from Jens Axboe:

 - fsize was missed in previous unification of work flags

 - Few fixes cleaning up the flags unification creds cases (Pavel)

 - Fix NUMA affinities for completely unplugged/replugged node for io-wq

 - Two fallout fixes from the set_fs changes. One local to io_uring, one
   for the splice entry point that io_uring uses.

 - Linked timeout fixes (Pavel)

 - Removal of ->flush() ->files work-around that we don't need anymore
   with referenced files (Pavel)

 - Various cleanups (Pavel)

* tag 'io_uring-5.10-2020-10-24' of git://git.kernel.dk/linux-block:
  splice: change exported internal do_splice() helper to take kernel offset
  io_uring: make loop_rw_iter() use original user supplied pointers
  io_uring: remove req cancel in ->flush()
  io-wq: re-set NUMA node affinities if CPUs come online
  io_uring: don't reuse linked_timeout
  io_uring: unify fsize with def->work_flags
  io_uring: fix racy REQ_F_LINK_TIMEOUT clearing
  io_uring: do poll's hash_node init in common code
  io_uring: inline io_poll_task_handler()
  io_uring: remove extra ->file check in poll prep
  io_uring: make cached_cq_overflow non atomic_t
  io_uring: inline io_fail_links()
  io_uring: kill ref get/drop in personality init
  io_uring: flags-based creds init in queue
parents cb6b2897 ee6e00c8
...@@ -19,7 +19,9 @@ ...@@ -19,7 +19,9 @@
#include <linux/task_work.h> #include <linux/task_work.h>
#include <linux/blk-cgroup.h> #include <linux/blk-cgroup.h>
#include <linux/audit.h> #include <linux/audit.h>
#include <linux/cpu.h>
#include "../kernel/sched/sched.h"
#include "io-wq.h" #include "io-wq.h"
#define WORKER_IDLE_TIMEOUT (5 * HZ) #define WORKER_IDLE_TIMEOUT (5 * HZ)
...@@ -123,9 +125,13 @@ struct io_wq { ...@@ -123,9 +125,13 @@ struct io_wq {
refcount_t refs; refcount_t refs;
struct completion done; struct completion done;
struct hlist_node cpuhp_node;
refcount_t use_refs; refcount_t use_refs;
}; };
static enum cpuhp_state io_wq_online;
static bool io_worker_get(struct io_worker *worker) static bool io_worker_get(struct io_worker *worker)
{ {
return refcount_inc_not_zero(&worker->ref); return refcount_inc_not_zero(&worker->ref);
...@@ -187,7 +193,8 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker) ...@@ -187,7 +193,8 @@ static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
worker->blkcg_css = NULL; worker->blkcg_css = NULL;
} }
#endif #endif
if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY)
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
return dropped_lock; return dropped_lock;
} }
...@@ -483,7 +490,10 @@ static void io_impersonate_work(struct io_worker *worker, ...@@ -483,7 +490,10 @@ static void io_impersonate_work(struct io_worker *worker,
if ((work->flags & IO_WQ_WORK_CREDS) && if ((work->flags & IO_WQ_WORK_CREDS) &&
worker->cur_creds != work->identity->creds) worker->cur_creds != work->identity->creds)
io_wq_switch_creds(worker, work); io_wq_switch_creds(worker, work);
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize; if (work->flags & IO_WQ_WORK_FSIZE)
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize;
else if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY)
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
io_wq_switch_blkcg(worker, work); io_wq_switch_blkcg(worker, work);
#ifdef CONFIG_AUDIT #ifdef CONFIG_AUDIT
current->loginuid = work->identity->loginuid; current->loginuid = work->identity->loginuid;
...@@ -1087,10 +1097,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ...@@ -1087,10 +1097,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL); wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL);
if (!wq->wqes) { if (!wq->wqes)
kfree(wq); goto err_wq;
return ERR_PTR(-ENOMEM);
} ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
if (ret)
goto err_wqes;
wq->free_work = data->free_work; wq->free_work = data->free_work;
wq->do_work = data->do_work; wq->do_work = data->do_work;
...@@ -1098,6 +1110,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ...@@ -1098,6 +1110,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
/* caller must already hold a reference to this */ /* caller must already hold a reference to this */
wq->user = data->user; wq->user = data->user;
ret = -ENOMEM;
for_each_node(node) { for_each_node(node) {
struct io_wqe *wqe; struct io_wqe *wqe;
int alloc_node = node; int alloc_node = node;
...@@ -1141,9 +1154,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ...@@ -1141,9 +1154,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
ret = PTR_ERR(wq->manager); ret = PTR_ERR(wq->manager);
complete(&wq->done); complete(&wq->done);
err: err:
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
for_each_node(node) for_each_node(node)
kfree(wq->wqes[node]); kfree(wq->wqes[node]);
err_wqes:
kfree(wq->wqes); kfree(wq->wqes);
err_wq:
kfree(wq); kfree(wq);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
...@@ -1160,6 +1176,8 @@ static void __io_wq_destroy(struct io_wq *wq) ...@@ -1160,6 +1176,8 @@ static void __io_wq_destroy(struct io_wq *wq)
{ {
int node; int node;
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
set_bit(IO_WQ_BIT_EXIT, &wq->state); set_bit(IO_WQ_BIT_EXIT, &wq->state);
if (wq->manager) if (wq->manager)
kthread_stop(wq->manager); kthread_stop(wq->manager);
...@@ -1187,3 +1205,41 @@ struct task_struct *io_wq_get_task(struct io_wq *wq) ...@@ -1187,3 +1205,41 @@ struct task_struct *io_wq_get_task(struct io_wq *wq)
{ {
return wq->manager; return wq->manager;
} }
static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
{
struct task_struct *task = worker->task;
struct rq_flags rf;
struct rq *rq;
rq = task_rq_lock(task, &rf);
do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node));
task->flags |= PF_NO_SETAFFINITY;
task_rq_unlock(rq, task, &rf);
return false;
}
static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
{
struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
int i;
rcu_read_lock();
for_each_node(i)
io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL);
rcu_read_unlock();
return 0;
}
static __init int io_wq_init(void)
{
int ret;
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online",
io_wq_cpu_online, NULL);
if (ret < 0)
return ret;
io_wq_online = ret;
return 0;
}
subsys_initcall(io_wq_init);
...@@ -17,6 +17,7 @@ enum { ...@@ -17,6 +17,7 @@ enum {
IO_WQ_WORK_MM = 128, IO_WQ_WORK_MM = 128,
IO_WQ_WORK_CREDS = 256, IO_WQ_WORK_CREDS = 256,
IO_WQ_WORK_BLKCG = 512, IO_WQ_WORK_BLKCG = 512,
IO_WQ_WORK_FSIZE = 1024,
IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */ IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
}; };
......
This diff is collapsed.
...@@ -1005,9 +1005,8 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, ...@@ -1005,9 +1005,8 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
/* /*
* Determine where to splice to/from. * Determine where to splice to/from.
*/ */
long do_splice(struct file *in, loff_t __user *off_in, long do_splice(struct file *in, loff_t *off_in, struct file *out,
struct file *out, loff_t __user *off_out, loff_t *off_out, size_t len, unsigned int flags)
size_t len, unsigned int flags)
{ {
struct pipe_inode_info *ipipe; struct pipe_inode_info *ipipe;
struct pipe_inode_info *opipe; struct pipe_inode_info *opipe;
...@@ -1041,8 +1040,7 @@ long do_splice(struct file *in, loff_t __user *off_in, ...@@ -1041,8 +1040,7 @@ long do_splice(struct file *in, loff_t __user *off_in,
if (off_out) { if (off_out) {
if (!(out->f_mode & FMODE_PWRITE)) if (!(out->f_mode & FMODE_PWRITE))
return -EINVAL; return -EINVAL;
if (copy_from_user(&offset, off_out, sizeof(loff_t))) offset = *off_out;
return -EFAULT;
} else { } else {
offset = out->f_pos; offset = out->f_pos;
} }
...@@ -1063,8 +1061,8 @@ long do_splice(struct file *in, loff_t __user *off_in, ...@@ -1063,8 +1061,8 @@ long do_splice(struct file *in, loff_t __user *off_in,
if (!off_out) if (!off_out)
out->f_pos = offset; out->f_pos = offset;
else if (copy_to_user(off_out, &offset, sizeof(loff_t))) else
ret = -EFAULT; *off_out = offset;
return ret; return ret;
} }
...@@ -1075,8 +1073,7 @@ long do_splice(struct file *in, loff_t __user *off_in, ...@@ -1075,8 +1073,7 @@ long do_splice(struct file *in, loff_t __user *off_in,
if (off_in) { if (off_in) {
if (!(in->f_mode & FMODE_PREAD)) if (!(in->f_mode & FMODE_PREAD))
return -EINVAL; return -EINVAL;
if (copy_from_user(&offset, off_in, sizeof(loff_t))) offset = *off_in;
return -EFAULT;
} else { } else {
offset = in->f_pos; offset = in->f_pos;
} }
...@@ -1100,8 +1097,8 @@ long do_splice(struct file *in, loff_t __user *off_in, ...@@ -1100,8 +1097,8 @@ long do_splice(struct file *in, loff_t __user *off_in,
wakeup_pipe_readers(opipe); wakeup_pipe_readers(opipe);
if (!off_in) if (!off_in)
in->f_pos = offset; in->f_pos = offset;
else if (copy_to_user(off_in, &offset, sizeof(loff_t))) else
ret = -EFAULT; *off_in = offset;
return ret; return ret;
} }
...@@ -1109,6 +1106,46 @@ long do_splice(struct file *in, loff_t __user *off_in, ...@@ -1109,6 +1106,46 @@ long do_splice(struct file *in, loff_t __user *off_in,
return -EINVAL; return -EINVAL;
} }
static long __do_splice(struct file *in, loff_t __user *off_in,
struct file *out, loff_t __user *off_out,
size_t len, unsigned int flags)
{
struct pipe_inode_info *ipipe;
struct pipe_inode_info *opipe;
loff_t offset, *__off_in = NULL, *__off_out = NULL;
long ret;
ipipe = get_pipe_info(in, true);
opipe = get_pipe_info(out, true);
if (ipipe && off_in)
return -ESPIPE;
if (opipe && off_out)
return -ESPIPE;
if (off_out) {
if (copy_from_user(&offset, off_out, sizeof(loff_t)))
return -EFAULT;
__off_out = &offset;
}
if (off_in) {
if (copy_from_user(&offset, off_in, sizeof(loff_t)))
return -EFAULT;
__off_in = &offset;
}
ret = do_splice(in, __off_in, out, __off_out, len, flags);
if (ret < 0)
return ret;
if (__off_out && copy_to_user(off_out, __off_out, sizeof(loff_t)))
return -EFAULT;
if (__off_in && copy_to_user(off_in, __off_in, sizeof(loff_t)))
return -EFAULT;
return ret;
}
static int iter_to_pipe(struct iov_iter *from, static int iter_to_pipe(struct iov_iter *from,
struct pipe_inode_info *pipe, struct pipe_inode_info *pipe,
unsigned flags) unsigned flags)
...@@ -1303,8 +1340,8 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, ...@@ -1303,8 +1340,8 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,
if (in.file) { if (in.file) {
out = fdget(fd_out); out = fdget(fd_out);
if (out.file) { if (out.file) {
error = do_splice(in.file, off_in, out.file, off_out, error = __do_splice(in.file, off_in, out.file, off_out,
len, flags); len, flags);
fdput(out); fdput(out);
} }
fdput(in); fdput(in);
......
...@@ -78,8 +78,8 @@ extern ssize_t add_to_pipe(struct pipe_inode_info *, ...@@ -78,8 +78,8 @@ extern ssize_t add_to_pipe(struct pipe_inode_info *,
struct pipe_buffer *); struct pipe_buffer *);
extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
splice_direct_actor *); splice_direct_actor *);
extern long do_splice(struct file *in, loff_t __user *off_in, extern long do_splice(struct file *in, loff_t *off_in,
struct file *out, loff_t __user *off_out, struct file *out, loff_t *off_out,
size_t len, unsigned int flags); size_t len, unsigned int flags);
extern long do_tee(struct file *in, struct file *out, size_t len, extern long do_tee(struct file *in, struct file *out, size_t len,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment