Commit e274795e authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

locking/mutex: Fix mutex handoff

While reviewing the ww_mutex patches, I noticed that it was still
possible to (incorrectly) succeed for (incorrect) code like:

	mutex_lock(&a);
	mutex_lock(&a);

This was possible if the second mutex_lock() would block (as expected)
but then receive a spurious wakeup. At that point it would find itself
at the front of the queue, request a handoff and instantly claim
ownership and continue, since owner would point to itself.

Avoid this scenario and simplify the code by introducing a third low
bit to signal handoff pickup. So once we request handoff, unlock
clears the handoff bit and sets the pickup bit along with the new
owner.

This also removes the need for the .handoff argument to
__mutex_trylock(), since that becomes superfluous with PICKUP.

In order to guarantee enough low bits, ensure task_struct alignment is
at least L1_CACHE_BYTES (which seems a good ideal regardless).
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 9d659ae1 ("locking/mutex: Add lock handoff to avoid starvation")
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 52b94129
...@@ -65,7 +65,7 @@ struct mutex { ...@@ -65,7 +65,7 @@ struct mutex {
static inline struct task_struct *__mutex_owner(struct mutex *lock) static inline struct task_struct *__mutex_owner(struct mutex *lock)
{ {
return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x03); return (struct task_struct *)(atomic_long_read(&lock->owner) & ~0x07);
} }
/* /*
......
...@@ -432,11 +432,13 @@ void __init fork_init(void) ...@@ -432,11 +432,13 @@ void __init fork_init(void)
int i; int i;
#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
#ifndef ARCH_MIN_TASKALIGN #ifndef ARCH_MIN_TASKALIGN
#define ARCH_MIN_TASKALIGN L1_CACHE_BYTES #define ARCH_MIN_TASKALIGN 0
#endif #endif
int align = min_t(int, L1_CACHE_BYTES, ARCH_MIN_TASKALIGN);
/* create a slab on which task_structs can be allocated */ /* create a slab on which task_structs can be allocated */
task_struct_cachep = kmem_cache_create("task_struct", task_struct_cachep = kmem_cache_create("task_struct",
arch_task_struct_size, ARCH_MIN_TASKALIGN, arch_task_struct_size, align,
SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL); SLAB_PANIC|SLAB_NOTRACK|SLAB_ACCOUNT, NULL);
#endif #endif
......
...@@ -50,16 +50,17 @@ EXPORT_SYMBOL(__mutex_init); ...@@ -50,16 +50,17 @@ EXPORT_SYMBOL(__mutex_init);
/* /*
* @owner: contains: 'struct task_struct *' to the current lock owner, * @owner: contains: 'struct task_struct *' to the current lock owner,
* NULL means not owned. Since task_struct pointers are aligned at * NULL means not owned. Since task_struct pointers are aligned at
* ARCH_MIN_TASKALIGN (which is at least sizeof(void *)), we have low * at least L1_CACHE_BYTES, we have low bits to store extra state.
* bits to store extra state.
* *
* Bit0 indicates a non-empty waiter list; unlock must issue a wakeup. * Bit0 indicates a non-empty waiter list; unlock must issue a wakeup.
* Bit1 indicates unlock needs to hand the lock to the top-waiter * Bit1 indicates unlock needs to hand the lock to the top-waiter
* Bit2 indicates handoff has been done and we're waiting for pickup.
*/ */
#define MUTEX_FLAG_WAITERS 0x01 #define MUTEX_FLAG_WAITERS 0x01
#define MUTEX_FLAG_HANDOFF 0x02 #define MUTEX_FLAG_HANDOFF 0x02
#define MUTEX_FLAG_PICKUP 0x04
#define MUTEX_FLAGS 0x03 #define MUTEX_FLAGS 0x07
static inline struct task_struct *__owner_task(unsigned long owner) static inline struct task_struct *__owner_task(unsigned long owner)
{ {
...@@ -72,38 +73,29 @@ static inline unsigned long __owner_flags(unsigned long owner) ...@@ -72,38 +73,29 @@ static inline unsigned long __owner_flags(unsigned long owner)
} }
/* /*
* Actual trylock that will work on any unlocked state. * Trylock variant that retuns the owning task on failure.
*
* When setting the owner field, we must preserve the low flag bits.
*
* Be careful with @handoff, only set that in a wait-loop (where you set
* HANDOFF) to avoid recursive lock attempts.
*/ */
static inline bool __mutex_trylock(struct mutex *lock, const bool handoff) static inline struct task_struct *__mutex_trylock_or_owner(struct mutex *lock)
{ {
unsigned long owner, curr = (unsigned long)current; unsigned long owner, curr = (unsigned long)current;
owner = atomic_long_read(&lock->owner); owner = atomic_long_read(&lock->owner);
for (;;) { /* must loop, can race against a flag */ for (;;) { /* must loop, can race against a flag */
unsigned long old, flags = __owner_flags(owner); unsigned long old, flags = __owner_flags(owner);
unsigned long task = owner & ~MUTEX_FLAGS;
if (__owner_task(owner)) { if (task) {
if (handoff && unlikely(__owner_task(owner) == current)) { if (likely(task != curr))
/* break;
* Provide ACQUIRE semantics for the lock-handoff.
*
* We cannot easily use load-acquire here, since
* the actual load is a failed cmpxchg, which
* doesn't imply any barriers.
*
* Also, this is a fairly unlikely scenario, and
* this contains the cost.
*/
smp_mb(); /* ACQUIRE */
return true;
}
return false; if (likely(!(flags & MUTEX_FLAG_PICKUP)))
break;
flags &= ~MUTEX_FLAG_PICKUP;
} else {
#ifdef CONFIG_DEBUG_MUTEXES
DEBUG_LOCKS_WARN_ON(flags & MUTEX_FLAG_PICKUP);
#endif
} }
/* /*
...@@ -111,15 +103,24 @@ static inline bool __mutex_trylock(struct mutex *lock, const bool handoff) ...@@ -111,15 +103,24 @@ static inline bool __mutex_trylock(struct mutex *lock, const bool handoff)
* past the point where we acquire it. This would be possible * past the point where we acquire it. This would be possible
* if we (accidentally) set the bit on an unlocked mutex. * if we (accidentally) set the bit on an unlocked mutex.
*/ */
if (handoff)
flags &= ~MUTEX_FLAG_HANDOFF; flags &= ~MUTEX_FLAG_HANDOFF;
old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags); old = atomic_long_cmpxchg_acquire(&lock->owner, owner, curr | flags);
if (old == owner) if (old == owner)
return true; return NULL;
owner = old; owner = old;
} }
return __owner_task(owner);
}
/*
* Actual trylock that will work on any unlocked state.
*/
static inline bool __mutex_trylock(struct mutex *lock)
{
return !__mutex_trylock_or_owner(lock);
} }
#ifndef CONFIG_DEBUG_LOCK_ALLOC #ifndef CONFIG_DEBUG_LOCK_ALLOC
...@@ -171,9 +172,9 @@ static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_wait ...@@ -171,9 +172,9 @@ static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_wait
/* /*
* Give up ownership to a specific task, when @task = NULL, this is equivalent * Give up ownership to a specific task, when @task = NULL, this is equivalent
* to a regular unlock. Clears HANDOFF, preserves WAITERS. Provides RELEASE * to a regular unlock. Sets PICKUP on a handoff, clears HANDOF, preserves
* semantics like a regular unlock, the __mutex_trylock() provides matching * WAITERS. Provides RELEASE semantics like a regular unlock, the
* ACQUIRE semantics for the handoff. * __mutex_trylock() provides a matching ACQUIRE semantics for the handoff.
*/ */
static void __mutex_handoff(struct mutex *lock, struct task_struct *task) static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
{ {
...@@ -184,10 +185,13 @@ static void __mutex_handoff(struct mutex *lock, struct task_struct *task) ...@@ -184,10 +185,13 @@ static void __mutex_handoff(struct mutex *lock, struct task_struct *task)
#ifdef CONFIG_DEBUG_MUTEXES #ifdef CONFIG_DEBUG_MUTEXES
DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current); DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP);
#endif #endif
new = (owner & MUTEX_FLAG_WAITERS); new = (owner & MUTEX_FLAG_WAITERS);
new |= (unsigned long)task; new |= (unsigned long)task;
if (task)
new |= MUTEX_FLAG_PICKUP;
old = atomic_long_cmpxchg_release(&lock->owner, owner, new); old = atomic_long_cmpxchg_release(&lock->owner, owner, new);
if (old == owner) if (old == owner)
...@@ -435,8 +439,6 @@ static bool mutex_optimistic_spin(struct mutex *lock, ...@@ -435,8 +439,6 @@ static bool mutex_optimistic_spin(struct mutex *lock,
struct ww_acquire_ctx *ww_ctx, struct ww_acquire_ctx *ww_ctx,
const bool use_ww_ctx, const bool waiter) const bool use_ww_ctx, const bool waiter)
{ {
struct task_struct *task = current;
if (!waiter) { if (!waiter) {
/* /*
* The purpose of the mutex_can_spin_on_owner() function is * The purpose of the mutex_can_spin_on_owner() function is
...@@ -476,24 +478,17 @@ static bool mutex_optimistic_spin(struct mutex *lock, ...@@ -476,24 +478,17 @@ static bool mutex_optimistic_spin(struct mutex *lock,
goto fail_unlock; goto fail_unlock;
} }
/* Try to acquire the mutex... */
owner = __mutex_trylock_or_owner(lock);
if (!owner)
break;
/* /*
* If there's an owner, wait for it to either * There's an owner, wait for it to either
* release the lock or go to sleep. * release the lock or go to sleep.
*/ */
owner = __mutex_owner(lock);
if (owner) {
if (waiter && owner == task) {
smp_mb(); /* ACQUIRE */
break;
}
if (!mutex_spin_on_owner(lock, owner)) if (!mutex_spin_on_owner(lock, owner))
goto fail_unlock; goto fail_unlock;
}
/* Try to acquire the mutex if it is unlocked. */
if (__mutex_trylock(lock, waiter))
break;
/* /*
* The cpu_relax() call is a compiler barrier which forces * The cpu_relax() call is a compiler barrier which forces
...@@ -637,7 +632,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, ...@@ -637,7 +632,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
preempt_disable(); preempt_disable();
mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
if (__mutex_trylock(lock, false) || if (__mutex_trylock(lock) ||
mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, false)) { mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, false)) {
/* got the lock, yay! */ /* got the lock, yay! */
lock_acquired(&lock->dep_map, ip); lock_acquired(&lock->dep_map, ip);
...@@ -651,7 +646,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, ...@@ -651,7 +646,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
/* /*
* After waiting to acquire the wait_lock, try again. * After waiting to acquire the wait_lock, try again.
*/ */
if (__mutex_trylock(lock, false)) if (__mutex_trylock(lock))
goto skip_wait; goto skip_wait;
debug_mutex_lock_common(lock, &waiter); debug_mutex_lock_common(lock, &waiter);
...@@ -674,7 +669,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, ...@@ -674,7 +669,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* before testing the error conditions to make sure we pick up * before testing the error conditions to make sure we pick up
* the handoff. * the handoff.
*/ */
if (__mutex_trylock(lock, first)) if (__mutex_trylock(lock))
goto acquired; goto acquired;
/* /*
...@@ -707,8 +702,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, ...@@ -707,8 +702,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* state back to RUNNING and fall through the next schedule(), * state back to RUNNING and fall through the next schedule(),
* or we must see its unlock and acquire. * or we must see its unlock and acquire.
*/ */
if ((first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)) || if (__mutex_trylock(lock) ||
__mutex_trylock(lock, first)) (first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true)))
break; break;
spin_lock_mutex(&lock->wait_lock, flags); spin_lock_mutex(&lock->wait_lock, flags);
...@@ -865,6 +860,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne ...@@ -865,6 +860,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
#ifdef CONFIG_DEBUG_MUTEXES #ifdef CONFIG_DEBUG_MUTEXES
DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current); DEBUG_LOCKS_WARN_ON(__owner_task(owner) != current);
DEBUG_LOCKS_WARN_ON(owner & MUTEX_FLAG_PICKUP);
#endif #endif
if (owner & MUTEX_FLAG_HANDOFF) if (owner & MUTEX_FLAG_HANDOFF)
...@@ -1003,7 +999,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock, ...@@ -1003,7 +999,7 @@ __ww_mutex_lock_interruptible_slowpath(struct ww_mutex *lock,
*/ */
int __sched mutex_trylock(struct mutex *lock) int __sched mutex_trylock(struct mutex *lock)
{ {
bool locked = __mutex_trylock(lock, false); bool locked = __mutex_trylock(lock);
if (locked) if (locked)
mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_); mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment