Commit c516df97 authored by Nicolai Hähnle's avatar Nicolai Hähnle Committed by Ingo Molnar

locking/ww_mutex: Optimize ww-mutexes by yielding to other waiters from optimistic spin

Lock stealing is less beneficial for w/w mutexes since we may just end up
backing off if we stole from a thread with an earlier acquire stamp that
already holds another w/w mutex that we also need. So don't spin
optimistically unless we are sure that there is no other waiter that might
cause us to back off.

Median timings taken of a contention-heavy GPU workload:

Before:

  real    0m52.946s
  user    0m7.272s
  sys     1m55.964s

After:

  real    0m53.086s
  user    0m7.360s
  sys     1m46.204s

This particular workload still spends 20%-25% of CPU in mutex_spin_on_owner
according to perf, but my attempts to further reduce this spinning based on
various heuristics all lead to an increase in measured wall time despite
the decrease in sys time.
Signed-off-by: default avatarNicolai Hähnle <Nicolai.Haehnle@amd.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Maarten Lankhorst <dev@mblankhorst.nl>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dri-devel@lists.freedesktop.org
Link: http://lkml.kernel.org/r/1482346000-9927-11-git-send-email-nhaehnle@gmail.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent 25f13b40
...@@ -371,6 +371,49 @@ ww_mutex_set_context_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) ...@@ -371,6 +371,49 @@ ww_mutex_set_context_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
} }
#ifdef CONFIG_MUTEX_SPIN_ON_OWNER #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
static inline
bool ww_mutex_spin_on_owner(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
struct mutex_waiter *waiter)
{
struct ww_mutex *ww;
ww = container_of(lock, struct ww_mutex, base);
/*
* If ww->ctx is set the contents are undefined, only
* by acquiring wait_lock there is a guarantee that
* they are not invalid when reading.
*
* As such, when deadlock detection needs to be
* performed the optimistic spinning cannot be done.
*
* Check this in every inner iteration because we may
* be racing against another thread's ww_mutex_lock.
*/
if (ww_ctx->acquired > 0 && READ_ONCE(ww->ctx))
return false;
/*
* If we aren't on the wait list yet, cancel the spin
* if there are waiters. We want to avoid stealing the
* lock from a waiter with an earlier stamp, since the
* other thread may already own a lock that we also
* need.
*/
if (!waiter && (atomic_long_read(&lock->owner) & MUTEX_FLAG_WAITERS))
return false;
/*
* Similarly, stop spinning if we are no longer the
* first waiter.
*/
if (waiter && !__mutex_waiter_is_first(lock, waiter))
return false;
return true;
}
/* /*
* Look out! "owner" is an entirely speculative pointer access and not * Look out! "owner" is an entirely speculative pointer access and not
* reliable. * reliable.
...@@ -379,7 +422,7 @@ ww_mutex_set_context_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) ...@@ -379,7 +422,7 @@ ww_mutex_set_context_slowpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)
*/ */
static noinline static noinline
bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner, bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
struct ww_acquire_ctx *ww_ctx) struct ww_acquire_ctx *ww_ctx, struct mutex_waiter *waiter)
{ {
bool ret = true; bool ret = true;
...@@ -402,26 +445,9 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner, ...@@ -402,26 +445,9 @@ bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner,
break; break;
} }
if (ww_ctx && ww_ctx->acquired > 0) { if (ww_ctx && !ww_mutex_spin_on_owner(lock, ww_ctx, waiter)) {
struct ww_mutex *ww; ret = false;
break;
ww = container_of(lock, struct ww_mutex, base);
/*
* If ww->ctx is set the contents are undefined, only
* by acquiring wait_lock there is a guarantee that
* they are not invalid when reading.
*
* As such, when deadlock detection needs to be
* performed the optimistic spinning cannot be done.
*
* Check this in every inner iteration because we may
* be racing against another thread's ww_mutex_lock.
*/
if (READ_ONCE(ww->ctx)) {
ret = false;
break;
}
} }
cpu_relax(); cpu_relax();
...@@ -484,7 +510,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) ...@@ -484,7 +510,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
*/ */
static __always_inline bool static __always_inline bool
mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
const bool use_ww_ctx, const bool waiter) const bool use_ww_ctx, struct mutex_waiter *waiter)
{ {
if (!waiter) { if (!waiter) {
/* /*
...@@ -518,7 +544,7 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, ...@@ -518,7 +544,7 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
* There's an owner, wait for it to either * There's an owner, wait for it to either
* release the lock or go to sleep. * release the lock or go to sleep.
*/ */
if (!mutex_spin_on_owner(lock, owner, ww_ctx)) if (!mutex_spin_on_owner(lock, owner, ww_ctx, waiter))
goto fail_unlock; goto fail_unlock;
/* /*
...@@ -560,7 +586,7 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, ...@@ -560,7 +586,7 @@ mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
#else #else
static __always_inline bool static __always_inline bool
mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx, mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
const bool use_ww_ctx, const bool waiter) const bool use_ww_ctx, struct mutex_waiter *waiter)
{ {
return false; return false;
} }
...@@ -731,7 +757,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, ...@@ -731,7 +757,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
if (__mutex_trylock(lock) || if (__mutex_trylock(lock) ||
mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, false)) { mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, NULL)) {
/* got the lock, yay! */ /* got the lock, yay! */
lock_acquired(&lock->dep_map, ip); lock_acquired(&lock->dep_map, ip);
if (use_ww_ctx && ww_ctx) if (use_ww_ctx && ww_ctx)
...@@ -820,7 +846,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, ...@@ -820,7 +846,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
* or we must see its unlock and acquire. * or we must see its unlock and acquire.
*/ */
if (__mutex_trylock(lock) || if (__mutex_trylock(lock) ||
(first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, true))) (first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, &waiter)))
break; break;
spin_lock_mutex(&lock->wait_lock, flags); spin_lock_mutex(&lock->wait_lock, flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment