Commit 4866cde0 authored by Nick Piggin's avatar Nick Piggin Committed by Linus Torvalds

[PATCH] sched: cleanup context switch locking

Instead of requiring architecture code to interact with the scheduler's
locking implementation, provide a couple of defines that can be used by the
architecture to request runqueue unlocked context switches, and ask for
interrupts to be enabled over the context switch.

Also replaces the "switch_lock" used by these architectures with an oncpu
flag (note, not a potentially slow bitflag).  This eliminates one bus
locked memory operation when context switching, and simplifies the
task_running function.
Signed-off-by: default avatarNick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 48c08d3f
...@@ -145,34 +145,12 @@ extern unsigned int user_debug; ...@@ -145,34 +145,12 @@ extern unsigned int user_debug;
#define set_wmb(var, value) do { var = value; wmb(); } while (0) #define set_wmb(var, value) do { var = value; wmb(); } while (0)
#define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t"); #define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t");
#ifdef CONFIG_SMP
/* /*
* Define our own context switch locking. This allows us to enable * switch_mm() may do a full cache flush over the context switch,
* interrupts over the context switch, otherwise we end up with high * so enable interrupts over the context switch to avoid high
* interrupt latency. The real problem area is switch_mm() which may * latency.
* do a full cache flush.
*/ */
#define prepare_arch_switch(rq,next) \ #define __ARCH_WANT_INTERRUPTS_ON_CTXSW
do { \
spin_lock(&(next)->switch_lock); \
spin_unlock_irq(&(rq)->lock); \
} while (0)
#define finish_arch_switch(rq,prev) \
spin_unlock(&(prev)->switch_lock)
#define task_running(rq,p) \
((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
#else
/*
* Our UP-case is more simple, but we assume knowledge of how
* spin_unlock_irq() and friends are implemented. This avoids
* us needlessly decrementing and incrementing the preempt count.
*/
#define prepare_arch_switch(rq,next) local_irq_enable()
#define finish_arch_switch(rq,prev) spin_unlock(&(rq)->lock)
#define task_running(rq,p) ((rq)->curr == (p))
#endif
/* /*
* switch_to(prev, next) should switch from task `prev' to `next' * switch_to(prev, next) should switch from task `prev' to `next'
......
...@@ -183,8 +183,6 @@ do { \ ...@@ -183,8 +183,6 @@ do { \
#ifdef __KERNEL__ #ifdef __KERNEL__
#define prepare_to_switch() do { } while(0)
#ifdef CONFIG_IA32_SUPPORT #ifdef CONFIG_IA32_SUPPORT
# define IS_IA32_PROCESS(regs) (ia64_psr(regs)->is != 0) # define IS_IA32_PROCESS(regs) (ia64_psr(regs)->is != 0)
#else #else
...@@ -274,13 +272,7 @@ extern void ia64_load_extra (struct task_struct *task); ...@@ -274,13 +272,7 @@ extern void ia64_load_extra (struct task_struct *task);
* of that CPU which will not be released, because there we wait for the * of that CPU which will not be released, because there we wait for the
* tasklist_lock to become available. * tasklist_lock to become available.
*/ */
#define prepare_arch_switch(rq, next) \ #define __ARCH_WANT_UNLOCKED_CTXSW
do { \
spin_lock(&(next)->switch_lock); \
spin_unlock(&(rq)->lock); \
} while (0)
#define finish_arch_switch(rq, prev) spin_unlock_irq(&(prev)->switch_lock)
#define task_running(rq, p) ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
#define ia64_platform_is(x) (strcmp(x, platform_name) == 0) #define ia64_platform_is(x) (strcmp(x, platform_name) == 0)
......
...@@ -422,16 +422,10 @@ extern void __die_if_kernel(const char *, struct pt_regs *, const char *file, ...@@ -422,16 +422,10 @@ extern void __die_if_kernel(const char *, struct pt_regs *, const char *file,
extern int stop_a_enabled; extern int stop_a_enabled;
/* /*
* Taken from include/asm-ia64/system.h; prevents deadlock on SMP * See include/asm-ia64/system.h; prevents deadlock on SMP
* systems. * systems.
*/ */
#define prepare_arch_switch(rq, next) \ #define __ARCH_WANT_UNLOCKED_CTXSW
do { \
spin_lock(&(next)->switch_lock); \
spin_unlock(&(rq)->lock); \
} while (0)
#define finish_arch_switch(rq, prev) spin_unlock_irq(&(prev)->switch_lock)
#define task_running(rq, p) ((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
#define arch_align_stack(x) (x) #define arch_align_stack(x) (x)
......
...@@ -104,29 +104,18 @@ static inline void restore_access_regs(unsigned int *acrs) ...@@ -104,29 +104,18 @@ static inline void restore_access_regs(unsigned int *acrs)
prev = __switch_to(prev,next); \ prev = __switch_to(prev,next); \
} while (0) } while (0)
#define prepare_arch_switch(rq, next) do { } while(0)
#define task_running(rq, p) ((rq)->curr == (p))
#ifdef CONFIG_VIRT_CPU_ACCOUNTING #ifdef CONFIG_VIRT_CPU_ACCOUNTING
extern void account_user_vtime(struct task_struct *); extern void account_user_vtime(struct task_struct *);
extern void account_system_vtime(struct task_struct *); extern void account_system_vtime(struct task_struct *);
#define finish_arch_switch(rq, prev) do { \
set_fs(current->thread.mm_segment); \
spin_unlock(&(rq)->lock); \
account_system_vtime(prev); \
local_irq_enable(); \
} while (0)
#else #else
#define account_system_vtime(prev) do { } while (0)
#endif
#define finish_arch_switch(rq, prev) do { \ #define finish_arch_switch(rq, prev) do { \
set_fs(current->thread.mm_segment); \ set_fs(current->thread.mm_segment); \
spin_unlock_irq(&(rq)->lock); \ account_system_vtime(prev); \
} while (0) } while (0)
#endif
#define nop() __asm__ __volatile__ ("nop") #define nop() __asm__ __volatile__ ("nop")
#define xchg(ptr,x) \ #define xchg(ptr,x) \
......
...@@ -101,7 +101,7 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr, ...@@ -101,7 +101,7 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
* SWITCH_ENTER and SWITH_DO_LAZY_FPU do not work yet (e.g. SMP does not work) * SWITCH_ENTER and SWITH_DO_LAZY_FPU do not work yet (e.g. SMP does not work)
* XXX WTF is the above comment? Found in late teen 2.4.x. * XXX WTF is the above comment? Found in late teen 2.4.x.
*/ */
#define prepare_arch_switch(rq, next) do { \ #define prepare_arch_switch(next) do { \
__asm__ __volatile__( \ __asm__ __volatile__( \
".globl\tflush_patch_switch\nflush_patch_switch:\n\t" \ ".globl\tflush_patch_switch\nflush_patch_switch:\n\t" \
"save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \ "save %sp, -0x40, %sp; save %sp, -0x40, %sp; save %sp, -0x40, %sp\n\t" \
...@@ -109,8 +109,6 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr, ...@@ -109,8 +109,6 @@ extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
"save %sp, -0x40, %sp\n\t" \ "save %sp, -0x40, %sp\n\t" \
"restore; restore; restore; restore; restore; restore; restore"); \ "restore; restore; restore; restore; restore; restore; restore"); \
} while(0) } while(0)
#define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock)
#define task_running(rq, p) ((rq)->curr == (p))
/* Much care has gone into this code, do not touch it. /* Much care has gone into this code, do not touch it.
* *
......
...@@ -139,19 +139,13 @@ extern void __flushw_user(void); ...@@ -139,19 +139,13 @@ extern void __flushw_user(void);
#define flush_user_windows flushw_user #define flush_user_windows flushw_user
#define flush_register_windows flushw_all #define flush_register_windows flushw_all
#define prepare_arch_switch(rq, next) \ /* Don't hold the runqueue lock over context switch */
do { spin_lock(&(next)->switch_lock); \ #define __ARCH_WANT_UNLOCKED_CTXSW
spin_unlock(&(rq)->lock); \ #define prepare_arch_switch(next) \
do { \
flushw_all(); \ flushw_all(); \
} while (0) } while (0)
#define finish_arch_switch(rq, prev) \
do { spin_unlock_irq(&(prev)->switch_lock); \
} while (0)
#define task_running(rq, p) \
((rq)->curr == (p) || spin_is_locked(&(p)->switch_lock))
/* See what happens when you design the chip correctly? /* See what happens when you design the chip correctly?
* *
* We tell gcc we clobber all non-fixed-usage registers except * We tell gcc we clobber all non-fixed-usage registers except
......
...@@ -108,7 +108,6 @@ extern struct group_info init_groups; ...@@ -108,7 +108,6 @@ extern struct group_info init_groups;
.blocked = {{0}}, \ .blocked = {{0}}, \
.alloc_lock = SPIN_LOCK_UNLOCKED, \ .alloc_lock = SPIN_LOCK_UNLOCKED, \
.proc_lock = SPIN_LOCK_UNLOCKED, \ .proc_lock = SPIN_LOCK_UNLOCKED, \
.switch_lock = SPIN_LOCK_UNLOCKED, \
.journal_info = NULL, \ .journal_info = NULL, \
.cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \
} }
......
...@@ -368,6 +368,11 @@ struct signal_struct { ...@@ -368,6 +368,11 @@ struct signal_struct {
#endif #endif
}; };
/* Context switch must be unlocked if interrupts are to be enabled */
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
# define __ARCH_WANT_UNLOCKED_CTXSW
#endif
/* /*
* Bits in flags field of signal_struct. * Bits in flags field of signal_struct.
*/ */
...@@ -594,6 +599,9 @@ struct task_struct { ...@@ -594,6 +599,9 @@ struct task_struct {
int lock_depth; /* BKL lock depth */ int lock_depth; /* BKL lock depth */
#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
int oncpu;
#endif
int prio, static_prio; int prio, static_prio;
struct list_head run_list; struct list_head run_list;
prio_array_t *array; prio_array_t *array;
...@@ -716,8 +724,6 @@ struct task_struct { ...@@ -716,8 +724,6 @@ struct task_struct {
spinlock_t alloc_lock; spinlock_t alloc_lock;
/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */ /* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
spinlock_t proc_lock; spinlock_t proc_lock;
/* context-switch lock */
spinlock_t switch_lock;
/* journalling filesystem info */ /* journalling filesystem info */
void *journal_info; void *journal_info;
......
...@@ -268,14 +268,71 @@ static DEFINE_PER_CPU(struct runqueue, runqueues); ...@@ -268,14 +268,71 @@ static DEFINE_PER_CPU(struct runqueue, runqueues);
#define task_rq(p) cpu_rq(task_cpu(p)) #define task_rq(p) cpu_rq(task_cpu(p))
#define cpu_curr(cpu) (cpu_rq(cpu)->curr) #define cpu_curr(cpu) (cpu_rq(cpu)->curr)
/*
* Default context-switch locking:
*/
#ifndef prepare_arch_switch #ifndef prepare_arch_switch
# define prepare_arch_switch(rq, next) do { } while (0) # define prepare_arch_switch(next) do { } while (0)
# define finish_arch_switch(rq, next) spin_unlock_irq(&(rq)->lock) #endif
# define task_running(rq, p) ((rq)->curr == (p)) #ifndef finish_arch_switch
# define finish_arch_switch(prev) do { } while (0)
#endif
#ifndef __ARCH_WANT_UNLOCKED_CTXSW
static inline int task_running(runqueue_t *rq, task_t *p)
{
return rq->curr == p;
}
static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
{
}
static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
{
spin_unlock_irq(&rq->lock);
}
#else /* __ARCH_WANT_UNLOCKED_CTXSW */
static inline int task_running(runqueue_t *rq, task_t *p)
{
#ifdef CONFIG_SMP
return p->oncpu;
#else
return rq->curr == p;
#endif
}
static inline void prepare_lock_switch(runqueue_t *rq, task_t *next)
{
#ifdef CONFIG_SMP
/*
* We can optimise this out completely for !SMP, because the
* SMP rebalancing from interrupt is the only thing that cares
* here.
*/
next->oncpu = 1;
#endif
#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
spin_unlock_irq(&rq->lock);
#else
spin_unlock(&rq->lock);
#endif #endif
}
static inline void finish_lock_switch(runqueue_t *rq, task_t *prev)
{
#ifdef CONFIG_SMP
/*
* After ->oncpu is cleared, the task can be moved to a different CPU.
* We must ensure this doesn't happen until the switch is completely
* finished.
*/
smp_wmb();
prev->oncpu = 0;
#endif
#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
local_irq_enable();
#endif
}
#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
/* /*
* task_rq_lock - lock the runqueue a given task resides on and disable * task_rq_lock - lock the runqueue a given task resides on and disable
...@@ -1196,17 +1253,14 @@ void fastcall sched_fork(task_t *p) ...@@ -1196,17 +1253,14 @@ void fastcall sched_fork(task_t *p)
p->state = TASK_RUNNING; p->state = TASK_RUNNING;
INIT_LIST_HEAD(&p->run_list); INIT_LIST_HEAD(&p->run_list);
p->array = NULL; p->array = NULL;
spin_lock_init(&p->switch_lock);
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHEDSTATS
memset(&p->sched_info, 0, sizeof(p->sched_info)); memset(&p->sched_info, 0, sizeof(p->sched_info));
#endif #endif
#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
p->oncpu = 0;
#endif
#ifdef CONFIG_PREEMPT #ifdef CONFIG_PREEMPT
/* /* Want to start with kernel preemption disabled. */
* During context-switch we hold precisely one spinlock, which
* schedule_tail drops. (in the common case it's this_rq()->lock,
* but it also can be p->switch_lock.) So we compensate with a count
* of 1. Also, we want to start with kernel preemption disabled.
*/
p->thread_info->preempt_count = 1; p->thread_info->preempt_count = 1;
#endif #endif
/* /*
...@@ -1387,23 +1441,41 @@ void fastcall sched_exit(task_t * p) ...@@ -1387,23 +1441,41 @@ void fastcall sched_exit(task_t * p)
task_rq_unlock(rq, &flags); task_rq_unlock(rq, &flags);
} }
/**
* prepare_task_switch - prepare to switch tasks
* @rq: the runqueue preparing to switch
* @next: the task we are going to switch to.
*
* This is called with the rq lock held and interrupts off. It must
* be paired with a subsequent finish_task_switch after the context
* switch.
*
* prepare_task_switch sets up locking and calls architecture specific
* hooks.
*/
static inline void prepare_task_switch(runqueue_t *rq, task_t *next)
{
prepare_lock_switch(rq, next);
prepare_arch_switch(next);
}
/** /**
* finish_task_switch - clean up after a task-switch * finish_task_switch - clean up after a task-switch
* @prev: the thread we just switched away from. * @prev: the thread we just switched away from.
* *
* We enter this with the runqueue still locked, and finish_arch_switch() * finish_task_switch must be called after the context switch, paired
* will unlock it along with doing any other architecture-specific cleanup * with a prepare_task_switch call before the context switch.
* actions. * finish_task_switch will reconcile locking set up by prepare_task_switch,
* and do any other architecture-specific cleanup actions.
* *
* Note that we may have delayed dropping an mm in context_switch(). If * Note that we may have delayed dropping an mm in context_switch(). If
* so, we finish that here outside of the runqueue lock. (Doing it * so, we finish that here outside of the runqueue lock. (Doing it
* with the lock held can cause deadlocks; see schedule() for * with the lock held can cause deadlocks; see schedule() for
* details.) * details.)
*/ */
static inline void finish_task_switch(task_t *prev) static inline void finish_task_switch(runqueue_t *rq, task_t *prev)
__releases(rq->lock) __releases(rq->lock)
{ {
runqueue_t *rq = this_rq();
struct mm_struct *mm = rq->prev_mm; struct mm_struct *mm = rq->prev_mm;
unsigned long prev_task_flags; unsigned long prev_task_flags;
...@@ -1421,7 +1493,8 @@ static inline void finish_task_switch(task_t *prev) ...@@ -1421,7 +1493,8 @@ static inline void finish_task_switch(task_t *prev)
* Manfred Spraul <manfred@colorfullife.com> * Manfred Spraul <manfred@colorfullife.com>
*/ */
prev_task_flags = prev->flags; prev_task_flags = prev->flags;
finish_arch_switch(rq, prev); finish_arch_switch(prev);
finish_lock_switch(rq, prev);
if (mm) if (mm)
mmdrop(mm); mmdrop(mm);
if (unlikely(prev_task_flags & PF_DEAD)) if (unlikely(prev_task_flags & PF_DEAD))
...@@ -1435,8 +1508,12 @@ static inline void finish_task_switch(task_t *prev) ...@@ -1435,8 +1508,12 @@ static inline void finish_task_switch(task_t *prev)
asmlinkage void schedule_tail(task_t *prev) asmlinkage void schedule_tail(task_t *prev)
__releases(rq->lock) __releases(rq->lock)
{ {
finish_task_switch(prev); runqueue_t *rq = this_rq();
finish_task_switch(rq, prev);
#ifdef __ARCH_WANT_UNLOCKED_CTXSW
/* In this case, finish_task_switch does not reenable preemption */
preempt_enable();
#endif
if (current->set_child_tid) if (current->set_child_tid)
put_user(current->pid, current->set_child_tid); put_user(current->pid, current->set_child_tid);
} }
...@@ -2816,11 +2893,15 @@ asmlinkage void __sched schedule(void) ...@@ -2816,11 +2893,15 @@ asmlinkage void __sched schedule(void)
rq->curr = next; rq->curr = next;
++*switch_count; ++*switch_count;
prepare_arch_switch(rq, next); prepare_task_switch(rq, next);
prev = context_switch(rq, prev, next); prev = context_switch(rq, prev, next);
barrier(); barrier();
/*
finish_task_switch(prev); * this_rq must be evaluated again because prev may have moved
* CPUs since it called schedule(), thus the 'rq' on its stack
* frame will be invalid.
*/
finish_task_switch(this_rq(), prev);
} else } else
spin_unlock_irq(&rq->lock); spin_unlock_irq(&rq->lock);
...@@ -4085,6 +4166,9 @@ void __devinit init_idle(task_t *idle, int cpu) ...@@ -4085,6 +4166,9 @@ void __devinit init_idle(task_t *idle, int cpu)
spin_lock_irqsave(&rq->lock, flags); spin_lock_irqsave(&rq->lock, flags);
rq->curr = rq->idle = idle; rq->curr = rq->idle = idle;
#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
idle->oncpu = 1;
#endif
set_tsk_need_resched(idle); set_tsk_need_resched(idle);
spin_unlock_irqrestore(&rq->lock, flags); spin_unlock_irqrestore(&rq->lock, flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment