Commit 0bd1189e authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-3.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

Pull workqueue fixes from Tejun Heo:
 "It's later than I'd like but well the timing just didn't work out this
  time.

  There are three bug fixes.  One from before 3.6-rc1 and two from the
  new CPU hotplug code.  Kudos to Lai for discovering all of them and
  providing fixes.

   * Atomicity bug when clearing a flag and setting another.  The two
     operation should have been atomic but wasn't.  This bug has existed
     for a long time but is unlikely to have actually happened.  Fix is
     safe.  Marked for -stable.

   * If CPU hotplug cycles happen back-to-back before workers finish the
     previous cycle, the states could get out of sync and it could get
     stuck.  Fixed by waiting for workers to complete before finishing
     hotplug cycle.

   * While CPU hotplug is in progress, idle workers could be depleted
     which can then lead to deadlock.  I think both happening together
     is highly unlikely but still better to fix it and the fix isn't too
     scary.

  There's another workqueue related regression which reported a few days
  ago:

    https://bugzilla.kernel.org/show_bug.cgi?id=47301

  It's a bit of head scratcher but there is a semi-reliable reproduce
  case, so I'm hoping to resolve it soonish."

* 'for-3.6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
  workqueue: fix possible idle worker depletion across CPU hotplug
  workqueue: restore POOL_MANAGING_WORKERS
  workqueue: fix possible deadlock in idle worker rebinding
  workqueue: move WORKER_REBIND clearing in rebind_workers() to the end of the function
  workqueue: UNBOUND -> REBIND morphing in rebind_workers() should be atomic
parents 274a2f5d ee378aa4
...@@ -66,6 +66,7 @@ enum { ...@@ -66,6 +66,7 @@ enum {
/* pool flags */ /* pool flags */
POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
POOL_MANAGING_WORKERS = 1 << 1, /* managing workers */
/* worker flags */ /* worker flags */
WORKER_STARTED = 1 << 0, /* started */ WORKER_STARTED = 1 << 0, /* started */
...@@ -652,7 +653,7 @@ static bool need_to_manage_workers(struct worker_pool *pool) ...@@ -652,7 +653,7 @@ static bool need_to_manage_workers(struct worker_pool *pool)
/* Do we have too many workers and should some go away? */ /* Do we have too many workers and should some go away? */
static bool too_many_workers(struct worker_pool *pool) static bool too_many_workers(struct worker_pool *pool)
{ {
bool managing = mutex_is_locked(&pool->manager_mutex); bool managing = pool->flags & POOL_MANAGING_WORKERS;
int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
int nr_busy = pool->nr_workers - nr_idle; int nr_busy = pool->nr_workers - nr_idle;
...@@ -1326,6 +1327,15 @@ static void idle_worker_rebind(struct worker *worker) ...@@ -1326,6 +1327,15 @@ static void idle_worker_rebind(struct worker *worker)
/* we did our part, wait for rebind_workers() to finish up */ /* we did our part, wait for rebind_workers() to finish up */
wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND)); wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));
/*
* rebind_workers() shouldn't finish until all workers passed the
* above WORKER_REBIND wait. Tell it when done.
*/
spin_lock_irq(&worker->pool->gcwq->lock);
if (!--worker->idle_rebind->cnt)
complete(&worker->idle_rebind->done);
spin_unlock_irq(&worker->pool->gcwq->lock);
} }
/* /*
...@@ -1396,12 +1406,15 @@ static void rebind_workers(struct global_cwq *gcwq) ...@@ -1396,12 +1406,15 @@ static void rebind_workers(struct global_cwq *gcwq)
/* set REBIND and kick idle ones, we'll wait for these later */ /* set REBIND and kick idle ones, we'll wait for these later */
for_each_worker_pool(pool, gcwq) { for_each_worker_pool(pool, gcwq) {
list_for_each_entry(worker, &pool->idle_list, entry) { list_for_each_entry(worker, &pool->idle_list, entry) {
unsigned long worker_flags = worker->flags;
if (worker->flags & WORKER_REBIND) if (worker->flags & WORKER_REBIND)
continue; continue;
/* morph UNBOUND to REBIND */ /* morph UNBOUND to REBIND atomically */
worker->flags &= ~WORKER_UNBOUND; worker_flags &= ~WORKER_UNBOUND;
worker->flags |= WORKER_REBIND; worker_flags |= WORKER_REBIND;
ACCESS_ONCE(worker->flags) = worker_flags;
idle_rebind.cnt++; idle_rebind.cnt++;
worker->idle_rebind = &idle_rebind; worker->idle_rebind = &idle_rebind;
...@@ -1419,25 +1432,15 @@ static void rebind_workers(struct global_cwq *gcwq) ...@@ -1419,25 +1432,15 @@ static void rebind_workers(struct global_cwq *gcwq)
goto retry; goto retry;
} }
/* /* all idle workers are rebound, rebind busy workers */
* All idle workers are rebound and waiting for %WORKER_REBIND to
* be cleared inside idle_worker_rebind(). Clear and release.
* Clearing %WORKER_REBIND from this foreign context is safe
* because these workers are still guaranteed to be idle.
*/
for_each_worker_pool(pool, gcwq)
list_for_each_entry(worker, &pool->idle_list, entry)
worker->flags &= ~WORKER_REBIND;
wake_up_all(&gcwq->rebind_hold);
/* rebind busy workers */
for_each_busy_worker(worker, i, pos, gcwq) { for_each_busy_worker(worker, i, pos, gcwq) {
struct work_struct *rebind_work = &worker->rebind_work; struct work_struct *rebind_work = &worker->rebind_work;
unsigned long worker_flags = worker->flags;
/* morph UNBOUND to REBIND */ /* morph UNBOUND to REBIND atomically */
worker->flags &= ~WORKER_UNBOUND; worker_flags &= ~WORKER_UNBOUND;
worker->flags |= WORKER_REBIND; worker_flags |= WORKER_REBIND;
ACCESS_ONCE(worker->flags) = worker_flags;
if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, if (test_and_set_bit(WORK_STRUCT_PENDING_BIT,
work_data_bits(rebind_work))) work_data_bits(rebind_work)))
...@@ -1449,6 +1452,34 @@ static void rebind_workers(struct global_cwq *gcwq) ...@@ -1449,6 +1452,34 @@ static void rebind_workers(struct global_cwq *gcwq)
worker->scheduled.next, worker->scheduled.next,
work_color_to_flags(WORK_NO_COLOR)); work_color_to_flags(WORK_NO_COLOR));
} }
/*
* All idle workers are rebound and waiting for %WORKER_REBIND to
* be cleared inside idle_worker_rebind(). Clear and release.
* Clearing %WORKER_REBIND from this foreign context is safe
* because these workers are still guaranteed to be idle.
*
* We need to make sure all idle workers passed WORKER_REBIND wait
* in idle_worker_rebind() before returning; otherwise, workers can
* get stuck at the wait if hotplug cycle repeats.
*/
idle_rebind.cnt = 1;
INIT_COMPLETION(idle_rebind.done);
for_each_worker_pool(pool, gcwq) {
list_for_each_entry(worker, &pool->idle_list, entry) {
worker->flags &= ~WORKER_REBIND;
idle_rebind.cnt++;
}
}
wake_up_all(&gcwq->rebind_hold);
if (--idle_rebind.cnt) {
spin_unlock_irq(&gcwq->lock);
wait_for_completion(&idle_rebind.done);
spin_lock_irq(&gcwq->lock);
}
} }
static struct worker *alloc_worker(void) static struct worker *alloc_worker(void)
...@@ -1794,9 +1825,45 @@ static bool manage_workers(struct worker *worker) ...@@ -1794,9 +1825,45 @@ static bool manage_workers(struct worker *worker)
struct worker_pool *pool = worker->pool; struct worker_pool *pool = worker->pool;
bool ret = false; bool ret = false;
if (!mutex_trylock(&pool->manager_mutex)) if (pool->flags & POOL_MANAGING_WORKERS)
return ret; return ret;
pool->flags |= POOL_MANAGING_WORKERS;
/*
* To simplify both worker management and CPU hotplug, hold off
* management while hotplug is in progress. CPU hotplug path can't
* grab %POOL_MANAGING_WORKERS to achieve this because that can
* lead to idle worker depletion (all become busy thinking someone
* else is managing) which in turn can result in deadlock under
* extreme circumstances. Use @pool->manager_mutex to synchronize
* manager against CPU hotplug.
*
* manager_mutex would always be free unless CPU hotplug is in
* progress. trylock first without dropping @gcwq->lock.
*/
if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
spin_unlock_irq(&pool->gcwq->lock);
mutex_lock(&pool->manager_mutex);
/*
* CPU hotplug could have happened while we were waiting
* for manager_mutex. Hotplug itself can't handle us
* because manager isn't either on idle or busy list, and
* @gcwq's state and ours could have deviated.
*
* As hotplug is now excluded via manager_mutex, we can
* simply try to bind. It will succeed or fail depending
* on @gcwq's current state. Try it and adjust
* %WORKER_UNBOUND accordingly.
*/
if (worker_maybe_bind_and_lock(worker))
worker->flags &= ~WORKER_UNBOUND;
else
worker->flags |= WORKER_UNBOUND;
ret = true;
}
pool->flags &= ~POOL_MANAGE_WORKERS; pool->flags &= ~POOL_MANAGE_WORKERS;
/* /*
...@@ -1806,6 +1873,7 @@ static bool manage_workers(struct worker *worker) ...@@ -1806,6 +1873,7 @@ static bool manage_workers(struct worker *worker)
ret |= maybe_destroy_workers(pool); ret |= maybe_destroy_workers(pool);
ret |= maybe_create_worker(pool); ret |= maybe_create_worker(pool);
pool->flags &= ~POOL_MANAGING_WORKERS;
mutex_unlock(&pool->manager_mutex); mutex_unlock(&pool->manager_mutex);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment