Commit 59a7718e authored by Linus Torvalds's avatar Linus Torvalds

Allow BKL re-acquire to fail, causing us to re-schedule.

This allows for low-latency BKL contention even with
preemption. Previously, since preemption is disabled
over context switches, re-acquiring the kernel lock when
resuming a process would be non-preemtible.
parent 6f60f5cf
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
#define kernel_locked() (current->lock_depth >= 0) #define kernel_locked() (current->lock_depth >= 0)
extern void __lockfunc get_kernel_lock(void); extern int __lockfunc get_kernel_lock(void);
extern void __lockfunc put_kernel_lock(void); extern void __lockfunc put_kernel_lock(void);
/* /*
...@@ -20,10 +20,24 @@ extern void __lockfunc put_kernel_lock(void); ...@@ -20,10 +20,24 @@ extern void __lockfunc put_kernel_lock(void);
put_kernel_lock(); \ put_kernel_lock(); \
} while (0) } while (0)
#define reacquire_kernel_lock(tsk) do { \ /*
if (unlikely((tsk)->lock_depth >= 0)) \ * Non-SMP kernels will never block on the kernel lock,
get_kernel_lock(); \ * so we are better off returning a constant zero from
} while (0) * reacquire_kernel_lock() so that the compiler can see
* it at compile-time.
*/
#ifdef CONFIG_SMP
#define return_value_on_smp return
#else
#define return_value_on_smp
#endif
static inline int reacquire_kernel_lock(struct task_struct *task)
{
if (unlikely(task->lock_depth >= 0))
return_value_on_smp get_kernel_lock();
return 0;
}
extern void __lockfunc lock_kernel(void) __acquires(kernel_lock); extern void __lockfunc lock_kernel(void) __acquires(kernel_lock);
extern void __lockfunc unlock_kernel(void) __releases(kernel_lock); extern void __lockfunc unlock_kernel(void) __releases(kernel_lock);
......
...@@ -2502,6 +2502,8 @@ asmlinkage void __sched schedule(void) ...@@ -2502,6 +2502,8 @@ asmlinkage void __sched schedule(void)
need_resched: need_resched:
preempt_disable(); preempt_disable();
prev = current; prev = current;
release_kernel_lock(prev);
need_resched_nonpreemptible:
rq = this_rq(); rq = this_rq();
/* /*
...@@ -2513,7 +2515,6 @@ asmlinkage void __sched schedule(void) ...@@ -2513,7 +2515,6 @@ asmlinkage void __sched schedule(void)
dump_stack(); dump_stack();
} }
release_kernel_lock(prev);
schedstat_inc(rq, sched_cnt); schedstat_inc(rq, sched_cnt);
now = sched_clock(); now = sched_clock();
if (likely(now - prev->timestamp < NS_MAX_SLEEP_AVG)) if (likely(now - prev->timestamp < NS_MAX_SLEEP_AVG))
...@@ -2636,7 +2637,9 @@ asmlinkage void __sched schedule(void) ...@@ -2636,7 +2637,9 @@ asmlinkage void __sched schedule(void)
} else } else
spin_unlock_irq(&rq->lock); spin_unlock_irq(&rq->lock);
reacquire_kernel_lock(current); prev = current;
if (unlikely(reacquire_kernel_lock(prev) < 0))
goto need_resched_nonpreemptible;
preempt_enable_no_resched(); preempt_enable_no_resched();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched; goto need_resched;
......
...@@ -24,16 +24,40 @@ static spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; ...@@ -24,16 +24,40 @@ static spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
/* /*
* Acquire/release the underlying lock from the scheduler. * Acquire/release the underlying lock from the scheduler.
* *
* The scheduler release and re-acquire currently always happen * This is called with preemption disabled, and should
* with preemption disabled. Which is likely a bug in the acquire * return an error value if it cannot get the lock and
* case... * TIF_NEED_RESCHED gets set.
* *
* Regardless, we try to be polite about preemption. If SMP is * If it successfully gets the lock, it should increment
* not on (ie UP preemption), this all goes away because the * the preemption count like any spinlock does.
*
* (This works on UP too - _raw_spin_trylock will never
* return false in that case)
*/
int __lockfunc get_kernel_lock(void)
{
while (!_raw_spin_trylock(&kernel_flag)) {
if (test_thread_flag(TIF_NEED_RESCHED))
return -EAGAIN;
cpu_relax();
}
preempt_disable();
return 0;
}
void __lockfunc put_kernel_lock(void)
{
_raw_spin_unlock(&kernel_flag);
preempt_enable_no_resched();
}
/*
* These are the BKL spinlocks - we try to be polite about preemption.
* If SMP is not on (ie UP preemption), this all goes away because the
* _raw_spin_trylock() will always succeed. * _raw_spin_trylock() will always succeed.
*/ */
#ifdef CONFIG_PREEMPT #ifdef CONFIG_PREEMPT
inline void __lockfunc get_kernel_lock(void) static inline void __lock_kernel(void)
{ {
preempt_disable(); preempt_disable();
if (unlikely(!_raw_spin_trylock(&kernel_flag))) { if (unlikely(!_raw_spin_trylock(&kernel_flag))) {
...@@ -65,13 +89,13 @@ inline void __lockfunc get_kernel_lock(void) ...@@ -65,13 +89,13 @@ inline void __lockfunc get_kernel_lock(void)
/* /*
* Non-preemption case - just get the spinlock * Non-preemption case - just get the spinlock
*/ */
inline void __lockfunc get_kernel_lock(void) static inline void __lock_kernel(void)
{ {
_raw_spin_lock(&kernel_flag); _raw_spin_lock(&kernel_flag);
} }
#endif #endif
inline void __lockfunc put_kernel_lock(void) static inline void __unlock_kernel(void)
{ {
_raw_spin_unlock(&kernel_flag); _raw_spin_unlock(&kernel_flag);
preempt_enable(); preempt_enable();
...@@ -87,7 +111,7 @@ void __lockfunc lock_kernel(void) ...@@ -87,7 +111,7 @@ void __lockfunc lock_kernel(void)
{ {
int depth = current->lock_depth+1; int depth = current->lock_depth+1;
if (likely(!depth)) if (likely(!depth))
get_kernel_lock(); __lock_kernel();
current->lock_depth = depth; current->lock_depth = depth;
} }
...@@ -95,7 +119,7 @@ void __lockfunc unlock_kernel(void) ...@@ -95,7 +119,7 @@ void __lockfunc unlock_kernel(void)
{ {
BUG_ON(current->lock_depth < 0); BUG_ON(current->lock_depth < 0);
if (likely(--current->lock_depth < 0)) if (likely(--current->lock_depth < 0))
put_kernel_lock(); __unlock_kernel();
} }
EXPORT_SYMBOL(lock_kernel); EXPORT_SYMBOL(lock_kernel);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment