Commit fe76421d authored by Jens Axboe's avatar Jens Axboe

io_uring: allow user configurable IO thread CPU affinity

io-wq defaults to per-node masks for IO workers. This works fine by
default, but isn't particularly handy for workloads that prefer more
specific affinities, for either performance or isolation reasons.

This adds IORING_REGISTER_IOWQ_AFF that allows the user to pass in a CPU
mask that is then applied to IO thread workers, and an
IORING_UNREGISTER_IOWQ_AFF that simply resets the masks back to the
default of per-node.

Note that no care is given to existing IO threads, they will need to go
through a reschedule before the affinity is correct if they are already
running or sleeping.
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 0e03496d
...@@ -1087,6 +1087,23 @@ static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node) ...@@ -1087,6 +1087,23 @@ static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node)
return __io_wq_cpu_online(wq, cpu, false); return __io_wq_cpu_online(wq, cpu, false);
} }
int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask)
{
int i;
rcu_read_lock();
for_each_node(i) {
struct io_wqe *wqe = wq->wqes[i];
if (mask)
cpumask_copy(wqe->cpu_mask, mask);
else
cpumask_copy(wqe->cpu_mask, cpumask_of_node(i));
}
rcu_read_unlock();
return 0;
}
static __init int io_wq_init(void) static __init int io_wq_init(void)
{ {
int ret; int ret;
......
...@@ -128,6 +128,8 @@ void io_wq_put_and_exit(struct io_wq *wq); ...@@ -128,6 +128,8 @@ void io_wq_put_and_exit(struct io_wq *wq);
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
void io_wq_hash_work(struct io_wq_work *work, void *val); void io_wq_hash_work(struct io_wq_work *work, void *val);
int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
static inline bool io_wq_is_hashed(struct io_wq_work *work) static inline bool io_wq_is_hashed(struct io_wq_work *work)
{ {
return work->flags & IO_WQ_WORK_HASHED; return work->flags & IO_WQ_WORK_HASHED;
......
...@@ -9983,6 +9983,43 @@ static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, ...@@ -9983,6 +9983,43 @@ static int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg,
return -EINVAL; return -EINVAL;
} }
static int io_register_iowq_aff(struct io_ring_ctx *ctx, void __user *arg,
unsigned len)
{
struct io_uring_task *tctx = current->io_uring;
cpumask_var_t new_mask;
int ret;
if (!tctx || !tctx->io_wq)
return -EINVAL;
if (!alloc_cpumask_var(&new_mask, GFP_KERNEL))
return -ENOMEM;
cpumask_clear(new_mask);
if (len > cpumask_size())
len = cpumask_size();
if (copy_from_user(new_mask, arg, len)) {
free_cpumask_var(new_mask);
return -EFAULT;
}
ret = io_wq_cpu_affinity(tctx->io_wq, new_mask);
free_cpumask_var(new_mask);
return ret;
}
static int io_unregister_iowq_aff(struct io_ring_ctx *ctx)
{
struct io_uring_task *tctx = current->io_uring;
if (!tctx || !tctx->io_wq)
return -EINVAL;
return io_wq_cpu_affinity(tctx->io_wq, NULL);
}
static bool io_register_op_must_quiesce(int op) static bool io_register_op_must_quiesce(int op)
{ {
switch (op) { switch (op) {
...@@ -9998,6 +10035,8 @@ static bool io_register_op_must_quiesce(int op) ...@@ -9998,6 +10035,8 @@ static bool io_register_op_must_quiesce(int op)
case IORING_REGISTER_FILES_UPDATE2: case IORING_REGISTER_FILES_UPDATE2:
case IORING_REGISTER_BUFFERS2: case IORING_REGISTER_BUFFERS2:
case IORING_REGISTER_BUFFERS_UPDATE: case IORING_REGISTER_BUFFERS_UPDATE:
case IORING_REGISTER_IOWQ_AFF:
case IORING_UNREGISTER_IOWQ_AFF:
return false; return false;
default: default:
return true; return true;
...@@ -10137,6 +10176,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, ...@@ -10137,6 +10176,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
ret = io_register_rsrc_update(ctx, arg, nr_args, ret = io_register_rsrc_update(ctx, arg, nr_args,
IORING_RSRC_BUFFER); IORING_RSRC_BUFFER);
break; break;
case IORING_REGISTER_IOWQ_AFF:
ret = -EINVAL;
if (!arg || !nr_args)
break;
ret = io_register_iowq_aff(ctx, arg, nr_args);
break;
case IORING_UNREGISTER_IOWQ_AFF:
ret = -EINVAL;
if (arg || nr_args)
break;
ret = io_unregister_iowq_aff(ctx);
break;
default: default:
ret = -EINVAL; ret = -EINVAL;
break; break;
......
...@@ -306,6 +306,10 @@ enum { ...@@ -306,6 +306,10 @@ enum {
IORING_REGISTER_BUFFERS2 = 15, IORING_REGISTER_BUFFERS2 = 15,
IORING_REGISTER_BUFFERS_UPDATE = 16, IORING_REGISTER_BUFFERS_UPDATE = 16,
/* set/clear io-wq thread affinities */
IORING_REGISTER_IOWQ_AFF = 17,
IORING_UNREGISTER_IOWQ_AFF = 18,
/* this goes last */ /* this goes last */
IORING_REGISTER_LAST IORING_REGISTER_LAST
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment