Commit 1c5aefb5 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'futex-fixes' (futex fixes from Thomas Gleixner)

Merge futex fixes from Thomas Gleixner:
 "So with more awake and less futex wreckaged brain, I went through my
  list of points again and came up with the following 4 patches.

  1) Prevent pi requeueing on the same futex

     I kept Kees check for uaddr1 == uaddr2 as a early check for private
     futexes and added a key comparison to both futex_requeue and
     futex_wait_requeue_pi.

     Sebastian, sorry for the confusion yesterday night.  I really
     misunderstood your question.

     You are right the check is pointless for shared futexes where the
     same physical address is mapped to two different virtual addresses.

  2) Sanity check atomic acquisiton in futex_lock_pi_atomic

     That's basically what Darren suggested.

     I just simplified it to use futex_top_waiter() to find kernel
     internal state.  If state is found return -EINVAL and do not bother
     to fix up the user space variable.  It's corrupted already.

  3) Ensure state consistency in futex_unlock_pi

     The code is silly versus the owner died bit.  There is no point to
     preserve it on unlock when the user space thread owns the futex.

     What's worse is that it does not update the user space value when
     the owner died bit is set.  So the kernel itself creates observable
     inconsistency.

     Another "optimization" is to retry an atomic unlock.  That's
     pointless as in a sane environment user space would not call into
     that code if it could have unlocked it atomically.  So we always
     check whether there is kernel state around and only if there is
     none, we do the unlock by setting the user space value to 0.

  4) Sanitize lookup_pi_state

     lookup_pi_state is ambigous about TID == 0 in the user space value.

     This can be a valid state even if there is kernel state on this
     uaddr, but we miss a few corner case checks.

     I tried to come up with a smaller solution hacking the checks into
     the current cruft, but it turned out to be ugly as hell and I got
     more confused than I was before.  So I rewrote the sanity checks
     along the state documentation with awful lots of commentry"

* emailed patches from Thomas Gleixner <tglx@linutronix.de>:
  futex: Make lookup_pi_state more robust
  futex: Always cleanup owner tid in unlock_pi
  futex: Validate atomic acquisition in futex_lock_pi_atomic()
  futex-prevent-requeue-pi-on-same-futex.patch futex: Forbid uaddr == uaddr2 in futex_requeue(..., requeue_pi=1)
parents 54539cd2 54a21788
...@@ -743,10 +743,58 @@ void exit_pi_state_list(struct task_struct *curr) ...@@ -743,10 +743,58 @@ void exit_pi_state_list(struct task_struct *curr)
raw_spin_unlock_irq(&curr->pi_lock); raw_spin_unlock_irq(&curr->pi_lock);
} }
/*
* We need to check the following states:
*
* Waiter | pi_state | pi->owner | uTID | uODIED | ?
*
* [1] NULL | --- | --- | 0 | 0/1 | Valid
* [2] NULL | --- | --- | >0 | 0/1 | Valid
*
* [3] Found | NULL | -- | Any | 0/1 | Invalid
*
* [4] Found | Found | NULL | 0 | 1 | Valid
* [5] Found | Found | NULL | >0 | 1 | Invalid
*
* [6] Found | Found | task | 0 | 1 | Valid
*
* [7] Found | Found | NULL | Any | 0 | Invalid
*
* [8] Found | Found | task | ==taskTID | 0/1 | Valid
* [9] Found | Found | task | 0 | 0 | Invalid
* [10] Found | Found | task | !=taskTID | 0/1 | Invalid
*
* [1] Indicates that the kernel can acquire the futex atomically. We
* came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
*
* [2] Valid, if TID does not belong to a kernel thread. If no matching
* thread is found then it indicates that the owner TID has died.
*
* [3] Invalid. The waiter is queued on a non PI futex
*
* [4] Valid state after exit_robust_list(), which sets the user space
* value to FUTEX_WAITERS | FUTEX_OWNER_DIED.
*
* [5] The user space value got manipulated between exit_robust_list()
* and exit_pi_state_list()
*
* [6] Valid state after exit_pi_state_list() which sets the new owner in
* the pi_state but cannot access the user space value.
*
* [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set.
*
* [8] Owner and user space value match
*
* [9] There is no transient state which sets the user space TID to 0
* except exit_robust_list(), but this is indicated by the
* FUTEX_OWNER_DIED bit. See [4]
*
* [10] There is no transient state which leaves owner and user space
* TID out of sync.
*/
static int static int
lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
union futex_key *key, struct futex_pi_state **ps, union futex_key *key, struct futex_pi_state **ps)
struct task_struct *task)
{ {
struct futex_pi_state *pi_state = NULL; struct futex_pi_state *pi_state = NULL;
struct futex_q *this, *next; struct futex_q *this, *next;
...@@ -756,12 +804,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, ...@@ -756,12 +804,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
plist_for_each_entry_safe(this, next, &hb->chain, list) { plist_for_each_entry_safe(this, next, &hb->chain, list) {
if (match_futex(&this->key, key)) { if (match_futex(&this->key, key)) {
/* /*
* Another waiter already exists - bump up * Sanity check the waiter before increasing
* the refcount and return its pi_state: * the refcount and attaching to it.
*/ */
pi_state = this->pi_state; pi_state = this->pi_state;
/* /*
* Userspace might have messed up non-PI and PI futexes * Userspace might have messed up non-PI and
* PI futexes [3]
*/ */
if (unlikely(!pi_state)) if (unlikely(!pi_state))
return -EINVAL; return -EINVAL;
...@@ -769,44 +818,70 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, ...@@ -769,44 +818,70 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
WARN_ON(!atomic_read(&pi_state->refcount)); WARN_ON(!atomic_read(&pi_state->refcount));
/* /*
* When pi_state->owner is NULL then the owner died * Handle the owner died case:
* and another waiter is on the fly. pi_state->owner
* is fixed up by the task which acquires
* pi_state->rt_mutex.
*
* We do not check for pid == 0 which can happen when
* the owner died and robust_list_exit() cleared the
* TID.
*/ */
if (pid && pi_state->owner) { if (uval & FUTEX_OWNER_DIED) {
/* /*
* Bail out if user space manipulated the * exit_pi_state_list sets owner to NULL and
* futex value. * wakes the topmost waiter. The task which
* acquires the pi_state->rt_mutex will fixup
* owner.
*/ */
if (pid != task_pid_vnr(pi_state->owner)) if (!pi_state->owner) {
/*
* No pi state owner, but the user
* space TID is not 0. Inconsistent
* state. [5]
*/
if (pid)
return -EINVAL;
/*
* Take a ref on the state and
* return. [4]
*/
goto out_state;
}
/*
* If TID is 0, then either the dying owner
* has not yet executed exit_pi_state_list()
* or some waiter acquired the rtmutex in the
* pi state, but did not yet fixup the TID in
* user space.
*
* Take a ref on the state and return. [6]
*/
if (!pid)
goto out_state;
} else {
/*
* If the owner died bit is not set,
* then the pi_state must have an
* owner. [7]
*/
if (!pi_state->owner)
return -EINVAL; return -EINVAL;
} }
/* /*
* Protect against a corrupted uval. If uval * Bail out if user space manipulated the
* is 0x80000000 then pid is 0 and the waiter * futex value. If pi state exists then the
* bit is set. So the deadlock check in the * owner TID must be the same as the user
* calling code has failed and we did not fall * space TID. [9/10]
* into the check above due to !pid.
*/ */
if (task && pi_state->owner == task) if (pid != task_pid_vnr(pi_state->owner))
return -EDEADLK; return -EINVAL;
out_state:
atomic_inc(&pi_state->refcount); atomic_inc(&pi_state->refcount);
*ps = pi_state; *ps = pi_state;
return 0; return 0;
} }
} }
/* /*
* We are the first waiter - try to look up the real owner and attach * We are the first waiter - try to look up the real owner and attach
* the new pi_state to it, but bail out when TID = 0 * the new pi_state to it, but bail out when TID = 0 [1]
*/ */
if (!pid) if (!pid)
return -ESRCH; return -ESRCH;
...@@ -839,6 +914,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, ...@@ -839,6 +914,9 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
return ret; return ret;
} }
/*
* No existing pi state. First waiter. [2]
*/
pi_state = alloc_pi_state(); pi_state = alloc_pi_state();
/* /*
...@@ -910,10 +988,18 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, ...@@ -910,10 +988,18 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
return -EDEADLK; return -EDEADLK;
/* /*
* Surprise - we got the lock. Just return to userspace: * Surprise - we got the lock, but we do not trust user space at all.
*/ */
if (unlikely(!curval)) if (unlikely(!curval)) {
return 1; /*
* We verify whether there is kernel state for this
* futex. If not, we can safely assume, that the 0 ->
* TID transition is correct. If state exists, we do
* not bother to fixup the user space state as it was
* corrupted already.
*/
return futex_top_waiter(hb, key) ? -EINVAL : 1;
}
uval = curval; uval = curval;
...@@ -951,7 +1037,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, ...@@ -951,7 +1037,7 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
* We dont have the lock. Look up the PI state (or create it if * We dont have the lock. Look up the PI state (or create it if
* we are the first waiter): * we are the first waiter):
*/ */
ret = lookup_pi_state(uval, hb, key, ps, task); ret = lookup_pi_state(uval, hb, key, ps);
if (unlikely(ret)) { if (unlikely(ret)) {
switch (ret) { switch (ret) {
...@@ -1044,6 +1130,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) ...@@ -1044,6 +1130,7 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
struct task_struct *new_owner; struct task_struct *new_owner;
struct futex_pi_state *pi_state = this->pi_state; struct futex_pi_state *pi_state = this->pi_state;
u32 uninitialized_var(curval), newval; u32 uninitialized_var(curval), newval;
int ret = 0;
if (!pi_state) if (!pi_state)
return -EINVAL; return -EINVAL;
...@@ -1067,23 +1154,19 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) ...@@ -1067,23 +1154,19 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
new_owner = this->task; new_owner = this->task;
/* /*
* We pass it to the next owner. (The WAITERS bit is always * We pass it to the next owner. The WAITERS bit is always
* kept enabled while there is PI state around. We must also * kept enabled while there is PI state around. We cleanup the
* preserve the owner died bit.) * owner died bit, because we are the owner.
*/ */
if (!(uval & FUTEX_OWNER_DIED)) { newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
int ret = 0;
newval = FUTEX_WAITERS | task_pid_vnr(new_owner);
if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))
ret = -EFAULT; ret = -EFAULT;
else if (curval != uval) else if (curval != uval)
ret = -EINVAL; ret = -EINVAL;
if (ret) { if (ret) {
raw_spin_unlock(&pi_state->pi_mutex.wait_lock); raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
return ret; return ret;
}
} }
raw_spin_lock_irq(&pi_state->owner->pi_lock); raw_spin_lock_irq(&pi_state->owner->pi_lock);
...@@ -1441,6 +1524,13 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, ...@@ -1441,6 +1524,13 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
struct futex_q *this, *next; struct futex_q *this, *next;
if (requeue_pi) { if (requeue_pi) {
/*
* Requeue PI only works on two distinct uaddrs. This
* check is only valid for private futexes. See below.
*/
if (uaddr1 == uaddr2)
return -EINVAL;
/* /*
* requeue_pi requires a pi_state, try to allocate it now * requeue_pi requires a pi_state, try to allocate it now
* without any locks in case it fails. * without any locks in case it fails.
...@@ -1479,6 +1569,15 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, ...@@ -1479,6 +1569,15 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
if (unlikely(ret != 0)) if (unlikely(ret != 0))
goto out_put_key1; goto out_put_key1;
/*
* The check above which compares uaddrs is not sufficient for
* shared futexes. We need to compare the keys:
*/
if (requeue_pi && match_futex(&key1, &key2)) {
ret = -EINVAL;
goto out_put_keys;
}
hb1 = hash_futex(&key1); hb1 = hash_futex(&key1);
hb2 = hash_futex(&key2); hb2 = hash_futex(&key2);
...@@ -1544,7 +1643,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, ...@@ -1544,7 +1643,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
* rereading and handing potential crap to * rereading and handing potential crap to
* lookup_pi_state. * lookup_pi_state.
*/ */
ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL); ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
} }
switch (ret) { switch (ret) {
...@@ -2327,9 +2426,10 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) ...@@ -2327,9 +2426,10 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
/* /*
* To avoid races, try to do the TID -> 0 atomic transition * To avoid races, try to do the TID -> 0 atomic transition
* again. If it succeeds then we can return without waking * again. If it succeeds then we can return without waking
* anyone else up: * anyone else up. We only try this if neither the waiters nor
* the owner died bit are set.
*/ */
if (!(uval & FUTEX_OWNER_DIED) && if (!(uval & ~FUTEX_TID_MASK) &&
cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0)) cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0))
goto pi_faulted; goto pi_faulted;
/* /*
...@@ -2359,11 +2459,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags) ...@@ -2359,11 +2459,9 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
/* /*
* No waiters - kernel unlocks the futex: * No waiters - kernel unlocks the futex:
*/ */
if (!(uval & FUTEX_OWNER_DIED)) { ret = unlock_futex_pi(uaddr, uval);
ret = unlock_futex_pi(uaddr, uval); if (ret == -EFAULT)
if (ret == -EFAULT) goto pi_faulted;
goto pi_faulted;
}
out_unlock: out_unlock:
spin_unlock(&hb->lock); spin_unlock(&hb->lock);
...@@ -2525,6 +2623,15 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, ...@@ -2525,6 +2623,15 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (ret) if (ret)
goto out_key2; goto out_key2;
/*
* The check above which compares uaddrs is not sufficient for
* shared futexes. We need to compare the keys:
*/
if (match_futex(&q.key, &key2)) {
ret = -EINVAL;
goto out_put_keys;
}
/* Queue the futex_q, drop the hb lock, wait for wakeup. */ /* Queue the futex_q, drop the hb lock, wait for wakeup. */
futex_wait_queue_me(hb, &q, to); futex_wait_queue_me(hb, &q, to);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment