Commit ea1abd61 authored by Lai Jiangshan's avatar Lai Jiangshan Committed by Tejun Heo

workqueue: reimplement idle worker rebinding

Currently rebind_workers() uses rebinds idle workers synchronously
before proceeding to requesting busy workers to rebind.  This is
necessary because all workers on @worker_pool->idle_list must be bound
before concurrency management local wake-ups from the busy workers
take place.

Unfortunately, the synchronous idle rebinding is quite complicated.
This patch reimplements idle rebinding to simplify the code path.

Rather than trying to make all idle workers bound before rebinding
busy workers, we simply remove all to-be-bound idle workers from the
idle list and let them add themselves back after completing rebinding
(successful or not).

As only workers which finished rebinding can on on the idle worker
list, the idle worker list is guaranteed to have only bound workers
unless CPU went down again and local wake-ups are safe.

After the change, @worker_pool->nr_idle may deviate than the actual
number of idle workers on @worker_pool->idle_list.  More specifically,
nr_idle may be non-zero while ->idle_list is empty.  All users of
->nr_idle and ->idle_list are audited.  The only affected one is
too_many_workers() which is updated to check %false if ->idle_list is
empty regardless of ->nr_idle.

After this patch, rebind_workers() no longer performs the nasty
idle-rebind retries which require temporary release of gcwq->lock, and
both unbinding and rebinding are atomic w.r.t. global_cwq->lock.

worker->idle_rebind and global_cwq->rebind_hold are now unnecessary
and removed along with the definition of struct idle_rebind.

Changed from V1:
	1) remove unlikely from too_many_workers(), ->idle_list can be empty
	   anytime, even before this patch, no reason to use unlikely.
	2) fix a small rebasing mistake.
	   (which is from rebasing the orignal fixing patch to for-next)
	3) add a lot of comments.
	4) clear WORKER_REBIND unconditionaly in idle_worker_rebind()

tj: Updated comments and description.
Signed-off-by: default avatarLai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parent 6c1423ba
...@@ -126,7 +126,6 @@ enum { ...@@ -126,7 +126,6 @@ enum {
struct global_cwq; struct global_cwq;
struct worker_pool; struct worker_pool;
struct idle_rebind;
/* /*
* The poor guys doing the actual heavy lifting. All on-duty workers * The poor guys doing the actual heavy lifting. All on-duty workers
...@@ -150,7 +149,6 @@ struct worker { ...@@ -150,7 +149,6 @@ struct worker {
int id; /* I: worker id */ int id; /* I: worker id */
/* for rebinding worker to CPU */ /* for rebinding worker to CPU */
struct idle_rebind *idle_rebind; /* L: for idle worker */
struct work_struct rebind_work; /* L: for busy worker */ struct work_struct rebind_work; /* L: for busy worker */
}; };
...@@ -160,6 +158,8 @@ struct worker_pool { ...@@ -160,6 +158,8 @@ struct worker_pool {
struct list_head worklist; /* L: list of pending works */ struct list_head worklist; /* L: list of pending works */
int nr_workers; /* L: total number of workers */ int nr_workers; /* L: total number of workers */
/* nr_idle includes the ones off idle_list for rebinding */
int nr_idle; /* L: currently idle ones */ int nr_idle; /* L: currently idle ones */
struct list_head idle_list; /* X: list of idle workers */ struct list_head idle_list; /* X: list of idle workers */
...@@ -186,8 +186,6 @@ struct global_cwq { ...@@ -186,8 +186,6 @@ struct global_cwq {
struct worker_pool pools[NR_WORKER_POOLS]; struct worker_pool pools[NR_WORKER_POOLS];
/* normal and highpri pools */ /* normal and highpri pools */
wait_queue_head_t rebind_hold; /* rebind hold wait */
} ____cacheline_aligned_in_smp; } ____cacheline_aligned_in_smp;
/* /*
...@@ -687,6 +685,13 @@ static bool too_many_workers(struct worker_pool *pool) ...@@ -687,6 +685,13 @@ static bool too_many_workers(struct worker_pool *pool)
int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
int nr_busy = pool->nr_workers - nr_idle; int nr_busy = pool->nr_workers - nr_idle;
/*
* nr_idle and idle_list may disagree if idle rebinding is in
* progress. Never return %true if idle_list is empty.
*/
if (list_empty(&pool->idle_list))
return false;
return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy; return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
} }
...@@ -1611,37 +1616,26 @@ __acquires(&gcwq->lock) ...@@ -1611,37 +1616,26 @@ __acquires(&gcwq->lock)
} }
} }
struct idle_rebind {
int cnt; /* # workers to be rebound */
struct completion done; /* all workers rebound */
};
/* /*
* Rebind an idle @worker to its CPU. During CPU onlining, this has to * Rebind an idle @worker to its CPU. worker_thread() will test
* happen synchronously for idle workers. worker_thread() will test
* %WORKER_REBIND before leaving idle and call this function. * %WORKER_REBIND before leaving idle and call this function.
*/ */
static void idle_worker_rebind(struct worker *worker) static void idle_worker_rebind(struct worker *worker)
{ {
struct global_cwq *gcwq = worker->pool->gcwq; struct global_cwq *gcwq = worker->pool->gcwq;
/* CPU must be online at this point */
WARN_ON(!worker_maybe_bind_and_lock(worker));
if (!--worker->idle_rebind->cnt)
complete(&worker->idle_rebind->done);
spin_unlock_irq(&worker->pool->gcwq->lock);
/* we did our part, wait for rebind_workers() to finish up */
wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));
/* /*
* rebind_workers() shouldn't finish until all workers passed the * CPU may go down again inbetween. If rebinding fails, reinstate
* above WORKER_REBIND wait. Tell it when done. * UNBOUND. We're off idle_list and nobody else can do it for us.
*/ */
spin_lock_irq(&worker->pool->gcwq->lock); if (!worker_maybe_bind_and_lock(worker))
if (!--worker->idle_rebind->cnt) worker->flags |= WORKER_UNBOUND;
complete(&worker->idle_rebind->done);
spin_unlock_irq(&worker->pool->gcwq->lock); worker_clr_flags(worker, WORKER_REBIND);
/* rebind complete, become available again */
list_add(&worker->entry, &worker->pool->idle_list);
spin_unlock_irq(&gcwq->lock);
} }
/* /*
...@@ -1676,29 +1670,25 @@ static void busy_worker_rebind_fn(struct work_struct *work) ...@@ -1676,29 +1670,25 @@ static void busy_worker_rebind_fn(struct work_struct *work)
* @gcwq->cpu is coming online. Rebind all workers to the CPU. Rebinding * @gcwq->cpu is coming online. Rebind all workers to the CPU. Rebinding
* is different for idle and busy ones. * is different for idle and busy ones.
* *
* The idle ones should be rebound synchronously and idle rebinding should * Idle ones will be removed from the idle_list and woken up. They will
* be complete before any worker starts executing work items with * add themselves back after completing rebind. This ensures that the
* concurrency management enabled; otherwise, scheduler may oops trying to * idle_list doesn't contain any unbound workers when re-bound busy workers
* wake up non-local idle worker from wq_worker_sleeping(). * try to perform local wake-ups for concurrency management.
*
* This is achieved by repeatedly requesting rebinding until all idle
* workers are known to have been rebound under @gcwq->lock and holding all
* idle workers from becoming busy until idle rebinding is complete.
* *
* Once idle workers are rebound, busy workers can be rebound as they * Busy workers can rebind after they finish their current work items.
* finish executing their current work items. Queueing the rebind work at * Queueing the rebind work item at the head of the scheduled list is
* the head of their scheduled lists is enough. Note that nr_running will * enough. Note that nr_running will be properly bumped as busy workers
* be properbly bumped as busy workers rebind. * rebind.
* *
* On return, all workers are guaranteed to either be bound or have rebind * On return, all non-manager workers are scheduled for rebind - see
* work item scheduled. * manage_workers() for the manager special case. Any idle worker
* including the manager will not appear on @idle_list until rebind is
* complete, making local wake-ups safe.
*/ */
static void rebind_workers(struct global_cwq *gcwq) static void rebind_workers(struct global_cwq *gcwq)
__releases(&gcwq->lock) __acquires(&gcwq->lock)
{ {
struct idle_rebind idle_rebind;
struct worker_pool *pool; struct worker_pool *pool;
struct worker *worker; struct worker *worker, *n;
struct hlist_node *pos; struct hlist_node *pos;
int i; int i;
...@@ -1707,46 +1697,29 @@ static void rebind_workers(struct global_cwq *gcwq) ...@@ -1707,46 +1697,29 @@ static void rebind_workers(struct global_cwq *gcwq)
for_each_worker_pool(pool, gcwq) for_each_worker_pool(pool, gcwq)
lockdep_assert_held(&pool->manager_mutex); lockdep_assert_held(&pool->manager_mutex);
/* /* set REBIND and kick idle ones */
* Rebind idle workers. Interlocked both ways. We wait for
* workers to rebind via @idle_rebind.done. Workers will wait for
* us to finish up by watching %WORKER_REBIND.
*/
init_completion(&idle_rebind.done);
retry:
idle_rebind.cnt = 1;
INIT_COMPLETION(idle_rebind.done);
/* set REBIND and kick idle ones, we'll wait for these later */
for_each_worker_pool(pool, gcwq) { for_each_worker_pool(pool, gcwq) {
list_for_each_entry(worker, &pool->idle_list, entry) { list_for_each_entry_safe(worker, n, &pool->idle_list, entry) {
unsigned long worker_flags = worker->flags; unsigned long worker_flags = worker->flags;
if (worker->flags & WORKER_REBIND)
continue;
/* morph UNBOUND to REBIND atomically */ /* morph UNBOUND to REBIND atomically */
worker_flags &= ~WORKER_UNBOUND; worker_flags &= ~WORKER_UNBOUND;
worker_flags |= WORKER_REBIND; worker_flags |= WORKER_REBIND;
ACCESS_ONCE(worker->flags) = worker_flags; ACCESS_ONCE(worker->flags) = worker_flags;
idle_rebind.cnt++; /*
worker->idle_rebind = &idle_rebind; * idle workers should be off @pool->idle_list
* until rebind is complete to avoid receiving
* premature local wake-ups.
*/
list_del_init(&worker->entry);
/* worker_thread() will call idle_worker_rebind() */ /* worker_thread() will call idle_worker_rebind() */
wake_up_process(worker->task); wake_up_process(worker->task);
} }
} }
if (--idle_rebind.cnt) { /* rebind busy workers */
spin_unlock_irq(&gcwq->lock);
wait_for_completion(&idle_rebind.done);
spin_lock_irq(&gcwq->lock);
/* busy ones might have become idle while waiting, retry */
goto retry;
}
/* all idle workers are rebound, rebind busy workers */
for_each_busy_worker(worker, i, pos, gcwq) { for_each_busy_worker(worker, i, pos, gcwq) {
unsigned long worker_flags = worker->flags; unsigned long worker_flags = worker->flags;
struct work_struct *rebind_work = &worker->rebind_work; struct work_struct *rebind_work = &worker->rebind_work;
...@@ -1776,34 +1749,6 @@ static void rebind_workers(struct global_cwq *gcwq) ...@@ -1776,34 +1749,6 @@ static void rebind_workers(struct global_cwq *gcwq)
worker->scheduled.next, worker->scheduled.next,
work_color_to_flags(WORK_NO_COLOR)); work_color_to_flags(WORK_NO_COLOR));
} }
/*
* All idle workers are rebound and waiting for %WORKER_REBIND to
* be cleared inside idle_worker_rebind(). Clear and release.
* Clearing %WORKER_REBIND from this foreign context is safe
* because these workers are still guaranteed to be idle.
*
* We need to make sure all idle workers passed WORKER_REBIND wait
* in idle_worker_rebind() before returning; otherwise, workers can
* get stuck at the wait if hotplug cycle repeats.
*/
idle_rebind.cnt = 1;
INIT_COMPLETION(idle_rebind.done);
for_each_worker_pool(pool, gcwq) {
list_for_each_entry(worker, &pool->idle_list, entry) {
worker->flags &= ~WORKER_REBIND;
idle_rebind.cnt++;
}
}
wake_up_all(&gcwq->rebind_hold);
if (--idle_rebind.cnt) {
spin_unlock_irq(&gcwq->lock);
wait_for_completion(&idle_rebind.done);
spin_lock_irq(&gcwq->lock);
}
} }
static struct worker *alloc_worker(void) static struct worker *alloc_worker(void)
...@@ -3916,8 +3861,6 @@ static int __init init_workqueues(void) ...@@ -3916,8 +3861,6 @@ static int __init init_workqueues(void)
mutex_init(&pool->manager_mutex); mutex_init(&pool->manager_mutex);
ida_init(&pool->worker_ida); ida_init(&pool->worker_ida);
} }
init_waitqueue_head(&gcwq->rebind_hold);
} }
/* create the initial worker */ /* create the initial worker */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment