Commit f7853c34 authored by Peter Zijlstra's avatar Peter Zijlstra

locking/rtmutex: Fix task->pi_waiters integrity

Henry reported that rt_mutex_adjust_prio_check() has an ordering
problem and puts the lie to the comment in [7]. Sharing the sort key
between lock->waiters and owner->pi_waiters *does* create problems,
since unlike what the comment claims, holding [L] is insufficient.

Notably, consider:

	A
      /   \
     M1   M2
     |     |
     B     C

That is, task A owns both M1 and M2, B and C block on them. In this
case a concurrent chain walk (B & C) will modify their resp. sort keys
in [7] while holding M1->wait_lock and M2->wait_lock. So holding [L]
is meaningless, they're different Ls.

This then gives rise to a race condition between [7] and [11], where
the requeue of pi_waiters will observe an inconsistent tree order.

	B				C

  (holds M1->wait_lock,		(holds M2->wait_lock,
   holds B->pi_lock)		 holds A->pi_lock)

  [7]
  waiter_update_prio();
  ...
  [8]
  raw_spin_unlock(B->pi_lock);
  ...
  [10]
  raw_spin_lock(A->pi_lock);

				[11]
				rt_mutex_enqueue_pi();
				// observes inconsistent A->pi_waiters
				// tree order

Fixing this means either extending the range of the owner lock from
[10-13] to [6-13], with the immediate problem that this means [6-8]
hold both blocked and owner locks, or duplicating the sort key.

Since the locking in chain walk is horrible enough without having to
consider pi_lock nesting rules, duplicate the sort key instead.

By giving each tree their own sort key, the above race becomes
harmless, if C sees B at the old location, then B will correct things
(if they need correcting) when it walks up the chain and reaches A.

Fixes: fb00aca4 ("rtmutex: Turn the plist into an rb-tree")
Reported-by: default avatarHenry Wu <triangletrap12@gmail.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: default avatarThomas Gleixner <tglx@linutronix.de>
Tested-by: default avatarHenry Wu <triangletrap12@gmail.com>
Link: https://lkml.kernel.org/r/20230707161052.GF2883469%40hirez.programming.kicks-ass.net
parent fdf0eaf1
...@@ -333,21 +333,43 @@ static __always_inline int __waiter_prio(struct task_struct *task) ...@@ -333,21 +333,43 @@ static __always_inline int __waiter_prio(struct task_struct *task)
return prio; return prio;
} }
/*
* Update the waiter->tree copy of the sort keys.
*/
static __always_inline void static __always_inline void
waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
{ {
waiter->prio = __waiter_prio(task); lockdep_assert_held(&waiter->lock->wait_lock);
waiter->deadline = task->dl.deadline; lockdep_assert(RB_EMPTY_NODE(&waiter->tree.entry));
waiter->tree.prio = __waiter_prio(task);
waiter->tree.deadline = task->dl.deadline;
}
/*
* Update the waiter->pi_tree copy of the sort keys (from the tree copy).
*/
static __always_inline void
waiter_clone_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
{
lockdep_assert_held(&waiter->lock->wait_lock);
lockdep_assert_held(&task->pi_lock);
lockdep_assert(RB_EMPTY_NODE(&waiter->pi_tree.entry));
waiter->pi_tree.prio = waiter->tree.prio;
waiter->pi_tree.deadline = waiter->tree.deadline;
} }
/* /*
* Only use with rt_mutex_waiter_{less,equal}() * Only use with rt_waiter_node_{less,equal}()
*/ */
#define task_to_waiter_node(p) \
&(struct rt_waiter_node){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
#define task_to_waiter(p) \ #define task_to_waiter(p) \
&(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline } &(struct rt_mutex_waiter){ .tree = *task_to_waiter_node(p) }
static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, static __always_inline int rt_waiter_node_less(struct rt_waiter_node *left,
struct rt_mutex_waiter *right) struct rt_waiter_node *right)
{ {
if (left->prio < right->prio) if (left->prio < right->prio)
return 1; return 1;
...@@ -364,8 +386,8 @@ static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, ...@@ -364,8 +386,8 @@ static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
return 0; return 0;
} }
static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, static __always_inline int rt_waiter_node_equal(struct rt_waiter_node *left,
struct rt_mutex_waiter *right) struct rt_waiter_node *right)
{ {
if (left->prio != right->prio) if (left->prio != right->prio)
return 0; return 0;
...@@ -385,7 +407,7 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, ...@@ -385,7 +407,7 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
struct rt_mutex_waiter *top_waiter) struct rt_mutex_waiter *top_waiter)
{ {
if (rt_mutex_waiter_less(waiter, top_waiter)) if (rt_waiter_node_less(&waiter->tree, &top_waiter->tree))
return true; return true;
#ifdef RT_MUTEX_BUILD_SPINLOCKS #ifdef RT_MUTEX_BUILD_SPINLOCKS
...@@ -393,30 +415,30 @@ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, ...@@ -393,30 +415,30 @@ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
* Note that RT tasks are excluded from same priority (lateral) * Note that RT tasks are excluded from same priority (lateral)
* steals to prevent the introduction of an unbounded latency. * steals to prevent the introduction of an unbounded latency.
*/ */
if (rt_prio(waiter->prio) || dl_prio(waiter->prio)) if (rt_prio(waiter->tree.prio) || dl_prio(waiter->tree.prio))
return false; return false;
return rt_mutex_waiter_equal(waiter, top_waiter); return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree);
#else #else
return false; return false;
#endif #endif
} }
#define __node_2_waiter(node) \ #define __node_2_waiter(node) \
rb_entry((node), struct rt_mutex_waiter, tree_entry) rb_entry((node), struct rt_mutex_waiter, tree.entry)
static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b) static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
{ {
struct rt_mutex_waiter *aw = __node_2_waiter(a); struct rt_mutex_waiter *aw = __node_2_waiter(a);
struct rt_mutex_waiter *bw = __node_2_waiter(b); struct rt_mutex_waiter *bw = __node_2_waiter(b);
if (rt_mutex_waiter_less(aw, bw)) if (rt_waiter_node_less(&aw->tree, &bw->tree))
return 1; return 1;
if (!build_ww_mutex()) if (!build_ww_mutex())
return 0; return 0;
if (rt_mutex_waiter_less(bw, aw)) if (rt_waiter_node_less(&bw->tree, &aw->tree))
return 0; return 0;
/* NOTE: relies on waiter->ww_ctx being set before insertion */ /* NOTE: relies on waiter->ww_ctx being set before insertion */
...@@ -434,48 +456,58 @@ static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_nod ...@@ -434,48 +456,58 @@ static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_nod
static __always_inline void static __always_inline void
rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
{ {
rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less); lockdep_assert_held(&lock->wait_lock);
rb_add_cached(&waiter->tree.entry, &lock->waiters, __waiter_less);
} }
static __always_inline void static __always_inline void
rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
{ {
if (RB_EMPTY_NODE(&waiter->tree_entry)) lockdep_assert_held(&lock->wait_lock);
if (RB_EMPTY_NODE(&waiter->tree.entry))
return; return;
rb_erase_cached(&waiter->tree_entry, &lock->waiters); rb_erase_cached(&waiter->tree.entry, &lock->waiters);
RB_CLEAR_NODE(&waiter->tree_entry); RB_CLEAR_NODE(&waiter->tree.entry);
} }
#define __node_2_pi_waiter(node) \ #define __node_2_rt_node(node) \
rb_entry((node), struct rt_mutex_waiter, pi_tree_entry) rb_entry((node), struct rt_waiter_node, entry)
static __always_inline bool static __always_inline bool __pi_waiter_less(struct rb_node *a, const struct rb_node *b)
__pi_waiter_less(struct rb_node *a, const struct rb_node *b)
{ {
return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b)); return rt_waiter_node_less(__node_2_rt_node(a), __node_2_rt_node(b));
} }
static __always_inline void static __always_inline void
rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
{ {
rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less); lockdep_assert_held(&task->pi_lock);
rb_add_cached(&waiter->pi_tree.entry, &task->pi_waiters, __pi_waiter_less);
} }
static __always_inline void static __always_inline void
rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
{ {
if (RB_EMPTY_NODE(&waiter->pi_tree_entry)) lockdep_assert_held(&task->pi_lock);
if (RB_EMPTY_NODE(&waiter->pi_tree.entry))
return; return;
rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters); rb_erase_cached(&waiter->pi_tree.entry, &task->pi_waiters);
RB_CLEAR_NODE(&waiter->pi_tree_entry); RB_CLEAR_NODE(&waiter->pi_tree.entry);
} }
static __always_inline void rt_mutex_adjust_prio(struct task_struct *p) static __always_inline void rt_mutex_adjust_prio(struct rt_mutex_base *lock,
struct task_struct *p)
{ {
struct task_struct *pi_task = NULL; struct task_struct *pi_task = NULL;
lockdep_assert_held(&lock->wait_lock);
lockdep_assert(rt_mutex_owner(lock) == p);
lockdep_assert_held(&p->pi_lock); lockdep_assert_held(&p->pi_lock);
if (task_has_pi_waiters(p)) if (task_has_pi_waiters(p))
...@@ -571,9 +603,14 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st ...@@ -571,9 +603,14 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st
* Chain walk basics and protection scope * Chain walk basics and protection scope
* *
* [R] refcount on task * [R] refcount on task
* [P] task->pi_lock held * [Pn] task->pi_lock held
* [L] rtmutex->wait_lock held * [L] rtmutex->wait_lock held
* *
* Normal locking order:
*
* rtmutex->wait_lock
* task->pi_lock
*
* Step Description Protected by * Step Description Protected by
* function arguments: * function arguments:
* @task [R] * @task [R]
...@@ -588,27 +625,32 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st ...@@ -588,27 +625,32 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st
* again: * again:
* loop_sanity_check(); * loop_sanity_check();
* retry: * retry:
* [1] lock(task->pi_lock); [R] acquire [P] * [1] lock(task->pi_lock); [R] acquire [P1]
* [2] waiter = task->pi_blocked_on; [P] * [2] waiter = task->pi_blocked_on; [P1]
* [3] check_exit_conditions_1(); [P] * [3] check_exit_conditions_1(); [P1]
* [4] lock = waiter->lock; [P] * [4] lock = waiter->lock; [P1]
* [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L] * [5] if (!try_lock(lock->wait_lock)) { [P1] try to acquire [L]
* unlock(task->pi_lock); release [P] * unlock(task->pi_lock); release [P1]
* goto retry; * goto retry;
* } * }
* [6] check_exit_conditions_2(); [P] + [L] * [6] check_exit_conditions_2(); [P1] + [L]
* [7] requeue_lock_waiter(lock, waiter); [P] + [L] * [7] requeue_lock_waiter(lock, waiter); [P1] + [L]
* [8] unlock(task->pi_lock); release [P] * [8] unlock(task->pi_lock); release [P1]
* put_task_struct(task); release [R] * put_task_struct(task); release [R]
* [9] check_exit_conditions_3(); [L] * [9] check_exit_conditions_3(); [L]
* [10] task = owner(lock); [L] * [10] task = owner(lock); [L]
* get_task_struct(task); [L] acquire [R] * get_task_struct(task); [L] acquire [R]
* lock(task->pi_lock); [L] acquire [P] * lock(task->pi_lock); [L] acquire [P2]
* [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L] * [11] requeue_pi_waiter(tsk, waiters(lock));[P2] + [L]
* [12] check_exit_conditions_4(); [P] + [L] * [12] check_exit_conditions_4(); [P2] + [L]
* [13] unlock(task->pi_lock); release [P] * [13] unlock(task->pi_lock); release [P2]
* unlock(lock->wait_lock); release [L] * unlock(lock->wait_lock); release [L]
* goto again; * goto again;
*
* Where P1 is the blocking task and P2 is the lock owner; going up one step
* the owner becomes the next blocked task etc..
*
*
*/ */
static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
enum rtmutex_chainwalk chwalk, enum rtmutex_chainwalk chwalk,
...@@ -756,7 +798,7 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, ...@@ -756,7 +798,7 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
* enabled we continue, but stop the requeueing in the chain * enabled we continue, but stop the requeueing in the chain
* walk. * walk.
*/ */
if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { if (rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) {
if (!detect_deadlock) if (!detect_deadlock)
goto out_unlock_pi; goto out_unlock_pi;
else else
...@@ -764,13 +806,18 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, ...@@ -764,13 +806,18 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
} }
/* /*
* [4] Get the next lock * [4] Get the next lock; per holding task->pi_lock we can't unblock
* and guarantee @lock's existence.
*/ */
lock = waiter->lock; lock = waiter->lock;
/* /*
* [5] We need to trylock here as we are holding task->pi_lock, * [5] We need to trylock here as we are holding task->pi_lock,
* which is the reverse lock order versus the other rtmutex * which is the reverse lock order versus the other rtmutex
* operations. * operations.
*
* Per the above, holding task->pi_lock guarantees lock exists, so
* inverting this lock order is infeasible from a life-time
* perspective.
*/ */
if (!raw_spin_trylock(&lock->wait_lock)) { if (!raw_spin_trylock(&lock->wait_lock)) {
raw_spin_unlock_irq(&task->pi_lock); raw_spin_unlock_irq(&task->pi_lock);
...@@ -874,17 +921,18 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, ...@@ -874,17 +921,18 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
* or * or
* *
* DL CBS enforcement advancing the effective deadline. * DL CBS enforcement advancing the effective deadline.
*
* Even though pi_waiters also uses these fields, and that tree is only
* updated in [11], we can do this here, since we hold [L], which
* serializes all pi_waiters access and rb_erase() does not care about
* the values of the node being removed.
*/ */
waiter_update_prio(waiter, task); waiter_update_prio(waiter, task);
rt_mutex_enqueue(lock, waiter); rt_mutex_enqueue(lock, waiter);
/* [8] Release the task */ /*
* [8] Release the (blocking) task in preparation for
* taking the owner task in [10].
*
* Since we hold lock->waiter_lock, task cannot unblock, even if we
* release task->pi_lock.
*/
raw_spin_unlock(&task->pi_lock); raw_spin_unlock(&task->pi_lock);
put_task_struct(task); put_task_struct(task);
...@@ -908,7 +956,12 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, ...@@ -908,7 +956,12 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
return 0; return 0;
} }
/* [10] Grab the next task, i.e. the owner of @lock */ /*
* [10] Grab the next task, i.e. the owner of @lock
*
* Per holding lock->wait_lock and checking for !owner above, there
* must be an owner and it cannot go away.
*/
task = get_task_struct(rt_mutex_owner(lock)); task = get_task_struct(rt_mutex_owner(lock));
raw_spin_lock(&task->pi_lock); raw_spin_lock(&task->pi_lock);
...@@ -921,8 +974,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, ...@@ -921,8 +974,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
* and adjust the priority of the owner. * and adjust the priority of the owner.
*/ */
rt_mutex_dequeue_pi(task, prerequeue_top_waiter); rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
waiter_clone_prio(waiter, task);
rt_mutex_enqueue_pi(task, waiter); rt_mutex_enqueue_pi(task, waiter);
rt_mutex_adjust_prio(task); rt_mutex_adjust_prio(lock, task);
} else if (prerequeue_top_waiter == waiter) { } else if (prerequeue_top_waiter == waiter) {
/* /*
...@@ -937,8 +991,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, ...@@ -937,8 +991,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
*/ */
rt_mutex_dequeue_pi(task, waiter); rt_mutex_dequeue_pi(task, waiter);
waiter = rt_mutex_top_waiter(lock); waiter = rt_mutex_top_waiter(lock);
waiter_clone_prio(waiter, task);
rt_mutex_enqueue_pi(task, waiter); rt_mutex_enqueue_pi(task, waiter);
rt_mutex_adjust_prio(task); rt_mutex_adjust_prio(lock, task);
} else { } else {
/* /*
* Nothing changed. No need to do any priority * Nothing changed. No need to do any priority
...@@ -1154,6 +1209,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, ...@@ -1154,6 +1209,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
waiter->task = task; waiter->task = task;
waiter->lock = lock; waiter->lock = lock;
waiter_update_prio(waiter, task); waiter_update_prio(waiter, task);
waiter_clone_prio(waiter, task);
/* Get the top priority waiter on the lock */ /* Get the top priority waiter on the lock */
if (rt_mutex_has_waiters(lock)) if (rt_mutex_has_waiters(lock))
...@@ -1187,7 +1243,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, ...@@ -1187,7 +1243,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
rt_mutex_dequeue_pi(owner, top_waiter); rt_mutex_dequeue_pi(owner, top_waiter);
rt_mutex_enqueue_pi(owner, waiter); rt_mutex_enqueue_pi(owner, waiter);
rt_mutex_adjust_prio(owner); rt_mutex_adjust_prio(lock, owner);
if (owner->pi_blocked_on) if (owner->pi_blocked_on)
chain_walk = 1; chain_walk = 1;
} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
...@@ -1234,6 +1290,8 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh, ...@@ -1234,6 +1290,8 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
{ {
struct rt_mutex_waiter *waiter; struct rt_mutex_waiter *waiter;
lockdep_assert_held(&lock->wait_lock);
raw_spin_lock(&current->pi_lock); raw_spin_lock(&current->pi_lock);
waiter = rt_mutex_top_waiter(lock); waiter = rt_mutex_top_waiter(lock);
...@@ -1246,7 +1304,7 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh, ...@@ -1246,7 +1304,7 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
* task unblocks. * task unblocks.
*/ */
rt_mutex_dequeue_pi(current, waiter); rt_mutex_dequeue_pi(current, waiter);
rt_mutex_adjust_prio(current); rt_mutex_adjust_prio(lock, current);
/* /*
* As we are waking up the top waiter, and the waiter stays * As we are waking up the top waiter, and the waiter stays
...@@ -1482,7 +1540,7 @@ static void __sched remove_waiter(struct rt_mutex_base *lock, ...@@ -1482,7 +1540,7 @@ static void __sched remove_waiter(struct rt_mutex_base *lock,
if (rt_mutex_has_waiters(lock)) if (rt_mutex_has_waiters(lock))
rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
rt_mutex_adjust_prio(owner); rt_mutex_adjust_prio(lock, owner);
/* Store the lock on which owner is blocked or NULL */ /* Store the lock on which owner is blocked or NULL */
next_lock = task_blocked_on_lock(owner); next_lock = task_blocked_on_lock(owner);
......
...@@ -459,7 +459,7 @@ void __sched rt_mutex_adjust_pi(struct task_struct *task) ...@@ -459,7 +459,7 @@ void __sched rt_mutex_adjust_pi(struct task_struct *task)
raw_spin_lock_irqsave(&task->pi_lock, flags); raw_spin_lock_irqsave(&task->pi_lock, flags);
waiter = task->pi_blocked_on; waiter = task->pi_blocked_on;
if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { if (!waiter || rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) {
raw_spin_unlock_irqrestore(&task->pi_lock, flags); raw_spin_unlock_irqrestore(&task->pi_lock, flags);
return; return;
} }
......
...@@ -17,27 +17,44 @@ ...@@ -17,27 +17,44 @@
#include <linux/rtmutex.h> #include <linux/rtmutex.h>
#include <linux/sched/wake_q.h> #include <linux/sched/wake_q.h>
/*
* This is a helper for the struct rt_mutex_waiter below. A waiter goes in two
* separate trees and they need their own copy of the sort keys because of
* different locking requirements.
*
* @entry: rbtree node to enqueue into the waiters tree
* @prio: Priority of the waiter
* @deadline: Deadline of the waiter if applicable
*
* See rt_waiter_node_less() and waiter_*_prio().
*/
struct rt_waiter_node {
struct rb_node entry;
int prio;
u64 deadline;
};
/* /*
* This is the control structure for tasks blocked on a rt_mutex, * This is the control structure for tasks blocked on a rt_mutex,
* which is allocated on the kernel stack on of the blocked task. * which is allocated on the kernel stack on of the blocked task.
* *
* @tree_entry: pi node to enqueue into the mutex waiters tree * @tree: node to enqueue into the mutex waiters tree
* @pi_tree_entry: pi node to enqueue into the mutex owner waiters tree * @pi_tree: node to enqueue into the mutex owner waiters tree
* @task: task reference to the blocked task * @task: task reference to the blocked task
* @lock: Pointer to the rt_mutex on which the waiter blocks * @lock: Pointer to the rt_mutex on which the waiter blocks
* @wake_state: Wakeup state to use (TASK_NORMAL or TASK_RTLOCK_WAIT) * @wake_state: Wakeup state to use (TASK_NORMAL or TASK_RTLOCK_WAIT)
* @prio: Priority of the waiter
* @deadline: Deadline of the waiter if applicable
* @ww_ctx: WW context pointer * @ww_ctx: WW context pointer
*
* @tree is ordered by @lock->wait_lock
* @pi_tree is ordered by rt_mutex_owner(@lock)->pi_lock
*/ */
struct rt_mutex_waiter { struct rt_mutex_waiter {
struct rb_node tree_entry; struct rt_waiter_node tree;
struct rb_node pi_tree_entry; struct rt_waiter_node pi_tree;
struct task_struct *task; struct task_struct *task;
struct rt_mutex_base *lock; struct rt_mutex_base *lock;
unsigned int wake_state; unsigned int wake_state;
int prio;
u64 deadline;
struct ww_acquire_ctx *ww_ctx; struct ww_acquire_ctx *ww_ctx;
}; };
...@@ -105,7 +122,7 @@ static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock, ...@@ -105,7 +122,7 @@ static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock,
{ {
struct rb_node *leftmost = rb_first_cached(&lock->waiters); struct rb_node *leftmost = rb_first_cached(&lock->waiters);
return rb_entry(leftmost, struct rt_mutex_waiter, tree_entry) == waiter; return rb_entry(leftmost, struct rt_mutex_waiter, tree.entry) == waiter;
} }
static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock) static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock)
...@@ -113,8 +130,10 @@ static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base * ...@@ -113,8 +130,10 @@ static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *
struct rb_node *leftmost = rb_first_cached(&lock->waiters); struct rb_node *leftmost = rb_first_cached(&lock->waiters);
struct rt_mutex_waiter *w = NULL; struct rt_mutex_waiter *w = NULL;
lockdep_assert_held(&lock->wait_lock);
if (leftmost) { if (leftmost) {
w = rb_entry(leftmost, struct rt_mutex_waiter, tree_entry); w = rb_entry(leftmost, struct rt_mutex_waiter, tree.entry);
BUG_ON(w->lock != lock); BUG_ON(w->lock != lock);
} }
return w; return w;
...@@ -127,8 +146,10 @@ static inline int task_has_pi_waiters(struct task_struct *p) ...@@ -127,8 +146,10 @@ static inline int task_has_pi_waiters(struct task_struct *p)
static inline struct rt_mutex_waiter *task_top_pi_waiter(struct task_struct *p) static inline struct rt_mutex_waiter *task_top_pi_waiter(struct task_struct *p)
{ {
lockdep_assert_held(&p->pi_lock);
return rb_entry(p->pi_waiters.rb_leftmost, struct rt_mutex_waiter, return rb_entry(p->pi_waiters.rb_leftmost, struct rt_mutex_waiter,
pi_tree_entry); pi_tree.entry);
} }
#define RT_MUTEX_HAS_WAITERS 1UL #define RT_MUTEX_HAS_WAITERS 1UL
...@@ -190,8 +211,8 @@ static inline void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) ...@@ -190,8 +211,8 @@ static inline void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
static inline void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) static inline void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
{ {
debug_rt_mutex_init_waiter(waiter); debug_rt_mutex_init_waiter(waiter);
RB_CLEAR_NODE(&waiter->pi_tree_entry); RB_CLEAR_NODE(&waiter->pi_tree.entry);
RB_CLEAR_NODE(&waiter->tree_entry); RB_CLEAR_NODE(&waiter->tree.entry);
waiter->wake_state = TASK_NORMAL; waiter->wake_state = TASK_NORMAL;
waiter->task = NULL; waiter->task = NULL;
} }
......
...@@ -96,25 +96,25 @@ __ww_waiter_first(struct rt_mutex *lock) ...@@ -96,25 +96,25 @@ __ww_waiter_first(struct rt_mutex *lock)
struct rb_node *n = rb_first(&lock->rtmutex.waiters.rb_root); struct rb_node *n = rb_first(&lock->rtmutex.waiters.rb_root);
if (!n) if (!n)
return NULL; return NULL;
return rb_entry(n, struct rt_mutex_waiter, tree_entry); return rb_entry(n, struct rt_mutex_waiter, tree.entry);
} }
static inline struct rt_mutex_waiter * static inline struct rt_mutex_waiter *
__ww_waiter_next(struct rt_mutex *lock, struct rt_mutex_waiter *w) __ww_waiter_next(struct rt_mutex *lock, struct rt_mutex_waiter *w)
{ {
struct rb_node *n = rb_next(&w->tree_entry); struct rb_node *n = rb_next(&w->tree.entry);
if (!n) if (!n)
return NULL; return NULL;
return rb_entry(n, struct rt_mutex_waiter, tree_entry); return rb_entry(n, struct rt_mutex_waiter, tree.entry);
} }
static inline struct rt_mutex_waiter * static inline struct rt_mutex_waiter *
__ww_waiter_prev(struct rt_mutex *lock, struct rt_mutex_waiter *w) __ww_waiter_prev(struct rt_mutex *lock, struct rt_mutex_waiter *w)
{ {
struct rb_node *n = rb_prev(&w->tree_entry); struct rb_node *n = rb_prev(&w->tree.entry);
if (!n) if (!n)
return NULL; return NULL;
return rb_entry(n, struct rt_mutex_waiter, tree_entry); return rb_entry(n, struct rt_mutex_waiter, tree.entry);
} }
static inline struct rt_mutex_waiter * static inline struct rt_mutex_waiter *
...@@ -123,7 +123,7 @@ __ww_waiter_last(struct rt_mutex *lock) ...@@ -123,7 +123,7 @@ __ww_waiter_last(struct rt_mutex *lock)
struct rb_node *n = rb_last(&lock->rtmutex.waiters.rb_root); struct rb_node *n = rb_last(&lock->rtmutex.waiters.rb_root);
if (!n) if (!n)
return NULL; return NULL;
return rb_entry(n, struct rt_mutex_waiter, tree_entry); return rb_entry(n, struct rt_mutex_waiter, tree.entry);
} }
static inline void static inline void
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment