Commit 43c01fbe authored by Jens Axboe's avatar Jens Axboe

io-wq: re-set NUMA node affinities if CPUs come online

We correctly set io-wq NUMA node affinities when the io-wq context is
setup, but if an entire node CPU set is offlined and then brought back
online, the per node affinities are broken. Ensure that we set them
again whenever a CPU comes online. This ensures that we always track
the right node affinity. The usual cpuhp notifiers are used to drive it.
Reported-by: default avatarZhang Qiang <qiang.zhang@windriver.com>
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent ff577161
...@@ -19,7 +19,9 @@ ...@@ -19,7 +19,9 @@
#include <linux/task_work.h> #include <linux/task_work.h>
#include <linux/blk-cgroup.h> #include <linux/blk-cgroup.h>
#include <linux/audit.h> #include <linux/audit.h>
#include <linux/cpu.h>
#include "../kernel/sched/sched.h"
#include "io-wq.h" #include "io-wq.h"
#define WORKER_IDLE_TIMEOUT (5 * HZ) #define WORKER_IDLE_TIMEOUT (5 * HZ)
...@@ -123,9 +125,13 @@ struct io_wq { ...@@ -123,9 +125,13 @@ struct io_wq {
refcount_t refs; refcount_t refs;
struct completion done; struct completion done;
struct hlist_node cpuhp_node;
refcount_t use_refs; refcount_t use_refs;
}; };
static enum cpuhp_state io_wq_online;
static bool io_worker_get(struct io_worker *worker) static bool io_worker_get(struct io_worker *worker)
{ {
return refcount_inc_not_zero(&worker->ref); return refcount_inc_not_zero(&worker->ref);
...@@ -1091,10 +1097,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ...@@ -1091,10 +1097,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL); wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL);
if (!wq->wqes) { if (!wq->wqes)
kfree(wq); goto err_wq;
return ERR_PTR(-ENOMEM);
} ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
if (ret)
goto err_wqes;
wq->free_work = data->free_work; wq->free_work = data->free_work;
wq->do_work = data->do_work; wq->do_work = data->do_work;
...@@ -1102,6 +1110,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ...@@ -1102,6 +1110,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
/* caller must already hold a reference to this */ /* caller must already hold a reference to this */
wq->user = data->user; wq->user = data->user;
ret = -ENOMEM;
for_each_node(node) { for_each_node(node) {
struct io_wqe *wqe; struct io_wqe *wqe;
int alloc_node = node; int alloc_node = node;
...@@ -1145,9 +1154,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ...@@ -1145,9 +1154,12 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
ret = PTR_ERR(wq->manager); ret = PTR_ERR(wq->manager);
complete(&wq->done); complete(&wq->done);
err: err:
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
for_each_node(node) for_each_node(node)
kfree(wq->wqes[node]); kfree(wq->wqes[node]);
err_wqes:
kfree(wq->wqes); kfree(wq->wqes);
err_wq:
kfree(wq); kfree(wq);
return ERR_PTR(ret); return ERR_PTR(ret);
} }
...@@ -1164,6 +1176,8 @@ static void __io_wq_destroy(struct io_wq *wq) ...@@ -1164,6 +1176,8 @@ static void __io_wq_destroy(struct io_wq *wq)
{ {
int node; int node;
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
set_bit(IO_WQ_BIT_EXIT, &wq->state); set_bit(IO_WQ_BIT_EXIT, &wq->state);
if (wq->manager) if (wq->manager)
kthread_stop(wq->manager); kthread_stop(wq->manager);
...@@ -1191,3 +1205,41 @@ struct task_struct *io_wq_get_task(struct io_wq *wq) ...@@ -1191,3 +1205,41 @@ struct task_struct *io_wq_get_task(struct io_wq *wq)
{ {
return wq->manager; return wq->manager;
} }
static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
{
struct task_struct *task = worker->task;
struct rq_flags rf;
struct rq *rq;
rq = task_rq_lock(task, &rf);
do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node));
task->flags |= PF_NO_SETAFFINITY;
task_rq_unlock(rq, task, &rf);
return false;
}
static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
{
struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
int i;
rcu_read_lock();
for_each_node(i)
io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL);
rcu_read_unlock();
return 0;
}
static __init int io_wq_init(void)
{
int ret;
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online",
io_wq_cpu_online, NULL);
if (ret < 0)
return ret;
io_wq_online = ret;
return 0;
}
subsys_initcall(io_wq_init);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment