Commit 6d22c10f authored by Ingo Molnar's avatar Ingo Molnar

cleanups, speedups and fixes. Added support for non-current set_cpus_allowed().

parent c96f3323
...@@ -79,7 +79,6 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) ...@@ -79,7 +79,6 @@ BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd)
* through the ICC by us (IPIs) * through the ICC by us (IPIs)
*/ */
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
BUILD_SMP_INTERRUPT(task_migration_interrupt,TASK_MIGRATION_VECTOR)
BUILD_SMP_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) BUILD_SMP_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR) BUILD_SMP_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
...@@ -474,9 +473,6 @@ void __init init_IRQ(void) ...@@ -474,9 +473,6 @@ void __init init_IRQ(void)
*/ */
set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
/* IPI for task migration */
set_intr_gate(TASK_MIGRATION_VECTOR, task_migration_interrupt);
/* IPI for invalidation */ /* IPI for invalidation */
set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
......
...@@ -485,35 +485,6 @@ void flush_tlb_all(void) ...@@ -485,35 +485,6 @@ void flush_tlb_all(void)
do_flush_tlb_all_local(); do_flush_tlb_all_local();
} }
static spinlock_t migration_lock = SPIN_LOCK_UNLOCKED;
static task_t *new_task;
/*
* This function sends a 'task migration' IPI to another CPU.
* Must be called from syscall contexts, with interrupts *enabled*.
*/
void smp_migrate_task(int cpu, task_t *p)
{
/*
* The target CPU will unlock the migration spinlock:
*/
_raw_spin_lock(&migration_lock);
new_task = p;
send_IPI_mask(1 << cpu, TASK_MIGRATION_VECTOR);
}
/*
* Task migration callback.
*/
asmlinkage void smp_task_migration_interrupt(void)
{
task_t *p;
ack_APIC_irq();
p = new_task;
_raw_spin_unlock(&migration_lock);
sched_task_migrated(p);
}
/* /*
* this function sends a 'reschedule' IPI to another CPU. * this function sends a 'reschedule' IPI to another CPU.
* it goes straight through and wastes no time serializing * it goes straight through and wastes no time serializing
......
...@@ -116,7 +116,7 @@ pipe_read(struct file *filp, char *buf, size_t count, loff_t *ppos) ...@@ -116,7 +116,7 @@ pipe_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
* writers synchronously that there is more * writers synchronously that there is more
* room. * room.
*/ */
wake_up_interruptible_sync(PIPE_WAIT(*inode)); wake_up_interruptible(PIPE_WAIT(*inode));
if (!PIPE_EMPTY(*inode)) if (!PIPE_EMPTY(*inode))
BUG(); BUG();
goto do_more_read; goto do_more_read;
...@@ -214,7 +214,7 @@ pipe_write(struct file *filp, const char *buf, size_t count, loff_t *ppos) ...@@ -214,7 +214,7 @@ pipe_write(struct file *filp, const char *buf, size_t count, loff_t *ppos)
* is going to give up this CPU, so it doesnt have * is going to give up this CPU, so it doesnt have
* to do idle reschedules. * to do idle reschedules.
*/ */
wake_up_interruptible_sync(PIPE_WAIT(*inode)); wake_up_interruptible(PIPE_WAIT(*inode));
PIPE_WAITING_WRITERS(*inode)++; PIPE_WAITING_WRITERS(*inode)++;
pipe_wait(inode); pipe_wait(inode);
PIPE_WAITING_WRITERS(*inode)--; PIPE_WAITING_WRITERS(*inode)--;
......
...@@ -35,14 +35,13 @@ ...@@ -35,14 +35,13 @@
* into a single vector (CALL_FUNCTION_VECTOR) to save vector space. * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
* TLB, reschedule and local APIC vectors are performance-critical. * TLB, reschedule and local APIC vectors are performance-critical.
* *
* Vectors 0xf0-0xf9 are free (reserved for future Linux use). * Vectors 0xf0-0xfa are free (reserved for future Linux use).
*/ */
#define SPURIOUS_APIC_VECTOR 0xff #define SPURIOUS_APIC_VECTOR 0xff
#define ERROR_APIC_VECTOR 0xfe #define ERROR_APIC_VECTOR 0xfe
#define INVALIDATE_TLB_VECTOR 0xfd #define INVALIDATE_TLB_VECTOR 0xfd
#define RESCHEDULE_VECTOR 0xfc #define RESCHEDULE_VECTOR 0xfc
#define TASK_MIGRATION_VECTOR 0xfb #define CALL_FUNCTION_VECTOR 0xfb
#define CALL_FUNCTION_VECTOR 0xfa
/* /*
* Local APIC timer IRQ vector is on a different priority level, * Local APIC timer IRQ vector is on a different priority level,
......
...@@ -52,6 +52,8 @@ ...@@ -52,6 +52,8 @@
mm: NULL, \ mm: NULL, \
active_mm: &init_mm, \ active_mm: &init_mm, \
run_list: LIST_HEAD_INIT(tsk.run_list), \ run_list: LIST_HEAD_INIT(tsk.run_list), \
migration_list: LIST_HEAD_INIT(tsk.migration_list), \
migration_sem: __MUTEX_INITIALIZER(tsk.migration_sem), \
time_slice: HZ, \ time_slice: HZ, \
next_task: &tsk, \ next_task: &tsk, \
prev_task: &tsk, \ prev_task: &tsk, \
......
...@@ -150,8 +150,7 @@ extern void update_process_times(int user); ...@@ -150,8 +150,7 @@ extern void update_process_times(int user);
extern void update_one_process(struct task_struct *p, unsigned long user, extern void update_one_process(struct task_struct *p, unsigned long user,
unsigned long system, int cpu); unsigned long system, int cpu);
extern void scheduler_tick(int user_tick, int system); extern void scheduler_tick(int user_tick, int system);
extern void sched_task_migrated(struct task_struct *p); extern void migration_init(void);
extern void smp_migrate_task(int cpu, task_t *task);
extern unsigned long cache_decay_ticks; extern unsigned long cache_decay_ticks;
...@@ -286,6 +285,10 @@ struct task_struct { ...@@ -286,6 +285,10 @@ struct task_struct {
wait_queue_head_t wait_chldexit; /* for wait4() */ wait_queue_head_t wait_chldexit; /* for wait4() */
struct completion *vfork_done; /* for vfork() */ struct completion *vfork_done; /* for vfork() */
list_t migration_list;
struct semaphore migration_sem;
unsigned long rt_priority; unsigned long rt_priority;
unsigned long it_real_value, it_prof_value, it_virt_value; unsigned long it_real_value, it_prof_value, it_virt_value;
unsigned long it_real_incr, it_prof_incr, it_virt_incr; unsigned long it_real_incr, it_prof_incr, it_virt_incr;
...@@ -382,7 +385,12 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0) ...@@ -382,7 +385,12 @@ do { if (atomic_dec_and_test(&(tsk)->usage)) __put_task_struct(tsk); } while(0)
*/ */
#define _STK_LIM (8*1024*1024) #define _STK_LIM (8*1024*1024)
#if CONFIG_SMP
extern void set_cpus_allowed(task_t *p, unsigned long new_mask); extern void set_cpus_allowed(task_t *p, unsigned long new_mask);
#else
# define set_cpus_allowed(p, new_mask) do { } while (0)
#endif
extern void set_user_nice(task_t *p, long nice); extern void set_user_nice(task_t *p, long nice);
extern int task_prio(task_t *p); extern int task_prio(task_t *p);
extern int task_nice(task_t *p); extern int task_nice(task_t *p);
...@@ -460,7 +468,6 @@ extern unsigned long prof_len; ...@@ -460,7 +468,6 @@ extern unsigned long prof_len;
extern unsigned long prof_shift; extern unsigned long prof_shift;
extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr)); extern void FASTCALL(__wake_up(wait_queue_head_t *q, unsigned int mode, int nr));
extern void FASTCALL(__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr));
extern void FASTCALL(sleep_on(wait_queue_head_t *q)); extern void FASTCALL(sleep_on(wait_queue_head_t *q));
extern long FASTCALL(sleep_on_timeout(wait_queue_head_t *q, extern long FASTCALL(sleep_on_timeout(wait_queue_head_t *q,
signed long timeout)); signed long timeout));
...@@ -474,13 +481,9 @@ extern void FASTCALL(sched_exit(task_t * p)); ...@@ -474,13 +481,9 @@ extern void FASTCALL(sched_exit(task_t * p));
#define wake_up(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1) #define wake_up(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
#define wake_up_nr(x, nr) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr) #define wake_up_nr(x, nr) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
#define wake_up_all(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0) #define wake_up_all(x) __wake_up((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 0)
#define wake_up_sync(x) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1)
#define wake_up_sync_nr(x, nr) __wake_up_sync((x),TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, nr)
#define wake_up_interruptible(x) __wake_up((x),TASK_INTERRUPTIBLE, 1) #define wake_up_interruptible(x) __wake_up((x),TASK_INTERRUPTIBLE, 1)
#define wake_up_interruptible_nr(x, nr) __wake_up((x),TASK_INTERRUPTIBLE, nr) #define wake_up_interruptible_nr(x, nr) __wake_up((x),TASK_INTERRUPTIBLE, nr)
#define wake_up_interruptible_all(x) __wake_up((x),TASK_INTERRUPTIBLE, 0) #define wake_up_interruptible_all(x) __wake_up((x),TASK_INTERRUPTIBLE, 0)
#define wake_up_interruptible_sync(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, 1)
#define wake_up_interruptible_sync_nr(x) __wake_up_sync((x),TASK_INTERRUPTIBLE, nr)
asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru); asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru);
extern int in_group_p(gid_t); extern int in_group_p(gid_t);
......
...@@ -413,7 +413,12 @@ static void __init do_initcalls(void) ...@@ -413,7 +413,12 @@ static void __init do_initcalls(void)
*/ */
static void __init do_basic_setup(void) static void __init do_basic_setup(void)
{ {
/*
* Let the per-CPU migration threads start up:
*/
#if CONFIG_SMP
migration_init();
#endif
/* /*
* Tell the world that we're going to be the grim * Tell the world that we're going to be the grim
* reaper of innocent orphaned children. * reaper of innocent orphaned children.
......
...@@ -443,7 +443,6 @@ EXPORT_SYMBOL(iomem_resource); ...@@ -443,7 +443,6 @@ EXPORT_SYMBOL(iomem_resource);
/* process management */ /* process management */
EXPORT_SYMBOL(complete_and_exit); EXPORT_SYMBOL(complete_and_exit);
EXPORT_SYMBOL(__wake_up); EXPORT_SYMBOL(__wake_up);
EXPORT_SYMBOL(__wake_up_sync);
EXPORT_SYMBOL(wake_up_process); EXPORT_SYMBOL(wake_up_process);
EXPORT_SYMBOL(sleep_on); EXPORT_SYMBOL(sleep_on);
EXPORT_SYMBOL(sleep_on_timeout); EXPORT_SYMBOL(sleep_on_timeout);
...@@ -458,6 +457,9 @@ EXPORT_SYMBOL(sys_sched_yield); ...@@ -458,6 +457,9 @@ EXPORT_SYMBOL(sys_sched_yield);
EXPORT_SYMBOL(set_user_nice); EXPORT_SYMBOL(set_user_nice);
EXPORT_SYMBOL(task_nice); EXPORT_SYMBOL(task_nice);
EXPORT_SYMBOL_GPL(idle_cpu); EXPORT_SYMBOL_GPL(idle_cpu);
#if CONFIG_SMP
EXPORT_SYMBOL_GPL(set_cpus_allowed);
#endif
EXPORT_SYMBOL(jiffies); EXPORT_SYMBOL(jiffies);
EXPORT_SYMBOL(xtime); EXPORT_SYMBOL(xtime);
EXPORT_SYMBOL(do_gettimeofday); EXPORT_SYMBOL(do_gettimeofday);
......
...@@ -16,12 +16,12 @@ ...@@ -16,12 +16,12 @@
#include <linux/nmi.h> #include <linux/nmi.h>
#include <linux/init.h> #include <linux/init.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <linux/highmem.h>
#include <linux/smp_lock.h> #include <linux/smp_lock.h>
#include <asm/mmu_context.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/completion.h> #include <linux/completion.h>
#include <asm/mmu_context.h>
#include <linux/kernel_stat.h> #include <linux/kernel_stat.h>
#include <linux/highmem.h>
/* /*
* Priority of a process goes from 0 to 139. The 0-99 * Priority of a process goes from 0 to 139. The 0-99
...@@ -127,8 +127,6 @@ typedef struct runqueue runqueue_t; ...@@ -127,8 +127,6 @@ typedef struct runqueue runqueue_t;
struct prio_array { struct prio_array {
int nr_active; int nr_active;
spinlock_t *lock;
runqueue_t *rq;
unsigned long bitmap[BITMAP_SIZE]; unsigned long bitmap[BITMAP_SIZE];
list_t queue[MAX_PRIO]; list_t queue[MAX_PRIO];
}; };
...@@ -146,6 +144,8 @@ struct runqueue { ...@@ -146,6 +144,8 @@ struct runqueue {
task_t *curr, *idle; task_t *curr, *idle;
prio_array_t *active, *expired, arrays[2]; prio_array_t *active, *expired, arrays[2];
int prev_nr_running[NR_CPUS]; int prev_nr_running[NR_CPUS];
task_t *migration_thread;
list_t migration_queue;
} ____cacheline_aligned; } ____cacheline_aligned;
static struct runqueue runqueues[NR_CPUS] __cacheline_aligned; static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
...@@ -156,23 +156,23 @@ static struct runqueue runqueues[NR_CPUS] __cacheline_aligned; ...@@ -156,23 +156,23 @@ static struct runqueue runqueues[NR_CPUS] __cacheline_aligned;
#define cpu_curr(cpu) (cpu_rq(cpu)->curr) #define cpu_curr(cpu) (cpu_rq(cpu)->curr)
#define rt_task(p) ((p)->prio < MAX_RT_PRIO) #define rt_task(p) ((p)->prio < MAX_RT_PRIO)
static inline runqueue_t *lock_task_rq(task_t *p, unsigned long *flags) static inline runqueue_t *task_rq_lock(task_t *p, unsigned long *flags)
{ {
struct runqueue *__rq; struct runqueue *rq;
repeat_lock_task: repeat_lock_task:
preempt_disable(); preempt_disable();
__rq = task_rq(p); rq = task_rq(p);
spin_lock_irqsave(&__rq->lock, *flags); spin_lock_irqsave(&rq->lock, *flags);
if (unlikely(__rq != task_rq(p))) { if (unlikely(rq != task_rq(p))) {
spin_unlock_irqrestore(&__rq->lock, *flags); spin_unlock_irqrestore(&rq->lock, *flags);
preempt_enable(); preempt_enable();
goto repeat_lock_task; goto repeat_lock_task;
} }
return __rq; return rq;
} }
static inline void unlock_task_rq(runqueue_t *rq, unsigned long *flags) static inline void task_rq_unlock(runqueue_t *rq, unsigned long *flags)
{ {
spin_unlock_irqrestore(&rq->lock, *flags); spin_unlock_irqrestore(&rq->lock, *flags);
preempt_enable(); preempt_enable();
...@@ -184,7 +184,7 @@ static inline void unlock_task_rq(runqueue_t *rq, unsigned long *flags) ...@@ -184,7 +184,7 @@ static inline void unlock_task_rq(runqueue_t *rq, unsigned long *flags)
static inline void dequeue_task(struct task_struct *p, prio_array_t *array) static inline void dequeue_task(struct task_struct *p, prio_array_t *array)
{ {
array->nr_active--; array->nr_active--;
list_del_init(&p->run_list); list_del(&p->run_list);
if (list_empty(array->queue + p->prio)) if (list_empty(array->queue + p->prio))
__clear_bit(p->prio, array->bitmap); __clear_bit(p->prio, array->bitmap);
} }
...@@ -289,30 +289,16 @@ void wait_task_inactive(task_t * p) ...@@ -289,30 +289,16 @@ void wait_task_inactive(task_t * p)
cpu_relax(); cpu_relax();
barrier(); barrier();
} }
rq = lock_task_rq(p, &flags); rq = task_rq_lock(p, &flags);
if (unlikely(rq->curr == p)) { if (unlikely(rq->curr == p)) {
unlock_task_rq(rq, &flags); task_rq_unlock(rq, &flags);
preempt_enable(); preempt_enable();
goto repeat; goto repeat;
} }
unlock_task_rq(rq, &flags); task_rq_unlock(rq, &flags);
preempt_enable(); preempt_enable();
} }
/*
* The SMP message passing code calls this function whenever
* the new task has arrived at the target CPU. We move the
* new task into the local runqueue.
*
* This function must be called with interrupts disabled.
*/
void sched_task_migrated(task_t *new_task)
{
wait_task_inactive(new_task);
new_task->thread_info->cpu = smp_processor_id();
wake_up_process(new_task);
}
/* /*
* Kick the remote CPU if the task is running currently, * Kick the remote CPU if the task is running currently,
* this code is used by the signal code to signal tasks * this code is used by the signal code to signal tasks
...@@ -337,27 +323,27 @@ void kick_if_running(task_t * p) ...@@ -337,27 +323,27 @@ void kick_if_running(task_t * p)
* "current->state = TASK_RUNNING" to mark yourself runnable * "current->state = TASK_RUNNING" to mark yourself runnable
* without the overhead of this. * without the overhead of this.
*/ */
static int try_to_wake_up(task_t * p, int synchronous) static int try_to_wake_up(task_t * p)
{ {
unsigned long flags; unsigned long flags;
int success = 0; int success = 0;
runqueue_t *rq; runqueue_t *rq;
rq = lock_task_rq(p, &flags); rq = task_rq_lock(p, &flags);
p->state = TASK_RUNNING; p->state = TASK_RUNNING;
if (!p->array) { if (!p->array) {
activate_task(p, rq); activate_task(p, rq);
if ((rq->curr == rq->idle) || (p->prio < rq->curr->prio)) if (p->prio < rq->curr->prio)
resched_task(rq->curr); resched_task(rq->curr);
success = 1; success = 1;
} }
unlock_task_rq(rq, &flags); task_rq_unlock(rq, &flags);
return success; return success;
} }
int wake_up_process(task_t * p) int wake_up_process(task_t * p)
{ {
return try_to_wake_up(p, 0); return try_to_wake_up(p);
} }
void wake_up_forked_process(task_t * p) void wake_up_forked_process(task_t * p)
...@@ -366,6 +352,7 @@ void wake_up_forked_process(task_t * p) ...@@ -366,6 +352,7 @@ void wake_up_forked_process(task_t * p)
preempt_disable(); preempt_disable();
rq = this_rq(); rq = this_rq();
spin_lock_irq(&rq->lock);
p->state = TASK_RUNNING; p->state = TASK_RUNNING;
if (!rt_task(p)) { if (!rt_task(p)) {
...@@ -378,10 +365,12 @@ void wake_up_forked_process(task_t * p) ...@@ -378,10 +365,12 @@ void wake_up_forked_process(task_t * p)
p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100; p->sleep_avg = p->sleep_avg * CHILD_PENALTY / 100;
p->prio = effective_prio(p); p->prio = effective_prio(p);
} }
spin_lock_irq(&rq->lock); INIT_LIST_HEAD(&p->migration_list);
p->thread_info->cpu = smp_processor_id(); p->thread_info->cpu = smp_processor_id();
activate_task(p, rq); activate_task(p, rq);
spin_unlock_irq(&rq->lock); spin_unlock_irq(&rq->lock);
init_MUTEX(&p->migration_sem);
preempt_enable(); preempt_enable();
} }
...@@ -861,44 +850,33 @@ asmlinkage void preempt_schedule(void) ...@@ -861,44 +850,33 @@ asmlinkage void preempt_schedule(void)
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
* zero in this (rare) case, and we handle it by continuing to scan the queue. * zero in this (rare) case, and we handle it by continuing to scan the queue.
*/ */
static inline void __wake_up_common (wait_queue_head_t *q, unsigned int mode, static inline void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
int nr_exclusive, const int sync)
{ {
struct list_head *tmp; struct list_head *tmp;
unsigned int state;
wait_queue_t *curr;
task_t *p; task_t *p;
list_for_each(tmp,&q->task_list) { list_for_each(tmp, &q->task_list) {
unsigned int state; curr = list_entry(tmp, wait_queue_t, task_list);
wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list);
p = curr->task; p = curr->task;
state = p->state; state = p->state;
if ((state & mode) && if ((state & mode) && try_to_wake_up(p) &&
try_to_wake_up(p, sync) && ((curr->flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive))
((curr->flags & WQ_FLAG_EXCLUSIVE) && break;
!--nr_exclusive))
break;
} }
} }
void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr) void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
{ {
if (q) { unsigned long flags;
unsigned long flags;
wq_read_lock_irqsave(&q->lock, flags);
__wake_up_common(q, mode, nr, 0);
wq_read_unlock_irqrestore(&q->lock, flags);
}
}
void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr) if (unlikely(!q))
{ return;
if (q) {
unsigned long flags; wq_read_lock_irqsave(&q->lock, flags);
wq_read_lock_irqsave(&q->lock, flags); __wake_up_common(q, mode, nr_exclusive);
__wake_up_common(q, mode, nr, 1); wq_read_unlock_irqrestore(&q->lock, flags);
wq_read_unlock_irqrestore(&q->lock, flags);
}
} }
void complete(struct completion *x) void complete(struct completion *x)
...@@ -907,7 +885,7 @@ void complete(struct completion *x) ...@@ -907,7 +885,7 @@ void complete(struct completion *x)
spin_lock_irqsave(&x->wait.lock, flags); spin_lock_irqsave(&x->wait.lock, flags);
x->done++; x->done++;
__wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1, 0); __wake_up_common(&x->wait, TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 1);
spin_unlock_irqrestore(&x->wait.lock, flags); spin_unlock_irqrestore(&x->wait.lock, flags);
} }
...@@ -994,35 +972,66 @@ long sleep_on_timeout(wait_queue_head_t *q, long timeout) ...@@ -994,35 +972,66 @@ long sleep_on_timeout(wait_queue_head_t *q, long timeout)
return timeout; return timeout;
} }
void scheduling_functions_end_here(void) { }
#if CONFIG_SMP
/* /*
* Change the current task's CPU affinity. Migrate the process to a * Change a given task's CPU affinity. Migrate the process to a
* proper CPU and schedule away if the current CPU is removed from * proper CPU and schedule it away if the current CPU is removed
* the allowed bitmask. * from the allowed bitmask.
*/ */
void set_cpus_allowed(task_t *p, unsigned long new_mask) void set_cpus_allowed(task_t *p, unsigned long new_mask)
{ {
unsigned long flags;
runqueue_t *rq;
int dest_cpu;
down(&p->migration_sem);
if (!list_empty(&p->migration_list))
BUG();
new_mask &= cpu_online_map; new_mask &= cpu_online_map;
if (!new_mask) if (!new_mask)
BUG(); BUG();
if (p != current)
BUG();
rq = task_rq_lock(p, &flags);
p->cpus_allowed = new_mask; p->cpus_allowed = new_mask;
/* /*
* Can the task run on the current CPU? If not then * Can the task run on the task's current CPU? If not then
* migrate the process off to a proper CPU. * migrate the process off to a proper CPU.
*/ */
if (new_mask & (1UL << smp_processor_id())) if (new_mask & (1UL << p->thread_info->cpu)) {
return; task_rq_unlock(rq, &flags);
#if CONFIG_SMP goto out;
current->state = TASK_UNINTERRUPTIBLE; }
smp_migrate_task(__ffs(new_mask), current); /*
* We mark the process as nonrunnable, and kick it to
* schedule away from its current CPU. We also add
* the task to the migration queue and wake up the
* target CPU's migration thread, so that it can pick
* up this task and insert it into the local runqueue.
*/
p->state = TASK_UNINTERRUPTIBLE;
kick_if_running(p);
task_rq_unlock(rq, &flags);
schedule(); dest_cpu = __ffs(new_mask);
#endif rq = cpu_rq(dest_cpu);
spin_lock_irq(&rq->lock);
list_add(&p->migration_list, &rq->migration_queue);
spin_unlock_irq(&rq->lock);
wake_up_process(rq->migration_thread);
while (!((1UL << p->thread_info->cpu) & p->cpus_allowed) &&
(p->state != TASK_ZOMBIE))
yield();
out:
up(&p->migration_sem);
} }
void scheduling_functions_end_here(void) { } #endif
void set_user_nice(task_t *p, long nice) void set_user_nice(task_t *p, long nice)
{ {
...@@ -1036,7 +1045,7 @@ void set_user_nice(task_t *p, long nice) ...@@ -1036,7 +1045,7 @@ void set_user_nice(task_t *p, long nice)
* We have to be careful, if called from sys_setpriority(), * We have to be careful, if called from sys_setpriority(),
* the task might be in the middle of scheduling on another CPU. * the task might be in the middle of scheduling on another CPU.
*/ */
rq = lock_task_rq(p, &flags); rq = task_rq_lock(p, &flags);
if (rt_task(p)) { if (rt_task(p)) {
p->static_prio = NICE_TO_PRIO(nice); p->static_prio = NICE_TO_PRIO(nice);
goto out_unlock; goto out_unlock;
...@@ -1056,7 +1065,7 @@ void set_user_nice(task_t *p, long nice) ...@@ -1056,7 +1065,7 @@ void set_user_nice(task_t *p, long nice)
resched_task(rq->curr); resched_task(rq->curr);
} }
out_unlock: out_unlock:
unlock_task_rq(rq, &flags); task_rq_unlock(rq, &flags);
} }
#ifndef __alpha__ #ifndef __alpha__
...@@ -1154,7 +1163,7 @@ static int setscheduler(pid_t pid, int policy, struct sched_param *param) ...@@ -1154,7 +1163,7 @@ static int setscheduler(pid_t pid, int policy, struct sched_param *param)
* To be able to change p->policy safely, the apropriate * To be able to change p->policy safely, the apropriate
* runqueue lock must be held. * runqueue lock must be held.
*/ */
rq = lock_task_rq(p, &flags); rq = task_rq_lock(p, &flags);
if (policy < 0) if (policy < 0)
policy = p->policy; policy = p->policy;
...@@ -1197,7 +1206,7 @@ static int setscheduler(pid_t pid, int policy, struct sched_param *param) ...@@ -1197,7 +1206,7 @@ static int setscheduler(pid_t pid, int policy, struct sched_param *param)
activate_task(p, task_rq(p)); activate_task(p, task_rq(p));
out_unlock: out_unlock:
unlock_task_rq(rq, &flags); task_rq_unlock(rq, &flags);
out_unlock_tasklist: out_unlock_tasklist:
read_unlock_irq(&tasklist_lock); read_unlock_irq(&tasklist_lock);
...@@ -1477,7 +1486,7 @@ static inline void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2) ...@@ -1477,7 +1486,7 @@ static inline void double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
void __init init_idle(task_t *idle, int cpu) void __init init_idle(task_t *idle, int cpu)
{ {
runqueue_t *idle_rq = cpu_rq(cpu), *rq = idle->array->rq; runqueue_t *idle_rq = cpu_rq(cpu), *rq = cpu_rq(idle->thread_info->cpu);
unsigned long flags; unsigned long flags;
__save_flags(flags); __save_flags(flags);
...@@ -1509,14 +1518,13 @@ void __init sched_init(void) ...@@ -1509,14 +1518,13 @@ void __init sched_init(void)
runqueue_t *rq = cpu_rq(i); runqueue_t *rq = cpu_rq(i);
prio_array_t *array; prio_array_t *array;
rq->active = rq->arrays + 0; rq->active = rq->arrays;
rq->expired = rq->arrays + 1; rq->expired = rq->arrays + 1;
spin_lock_init(&rq->lock); spin_lock_init(&rq->lock);
INIT_LIST_HEAD(&rq->migration_queue);
for (j = 0; j < 2; j++) { for (j = 0; j < 2; j++) {
array = rq->arrays + j; array = rq->arrays + j;
array->rq = rq;
array->lock = &rq->lock;
for (k = 0; k < MAX_PRIO; k++) { for (k = 0; k < MAX_PRIO; k++) {
INIT_LIST_HEAD(array->queue + k); INIT_LIST_HEAD(array->queue + k);
__clear_bit(k, array->bitmap); __clear_bit(k, array->bitmap);
...@@ -1545,3 +1553,104 @@ void __init sched_init(void) ...@@ -1545,3 +1553,104 @@ void __init sched_init(void)
atomic_inc(&init_mm.mm_count); atomic_inc(&init_mm.mm_count);
enter_lazy_tlb(&init_mm, current, smp_processor_id()); enter_lazy_tlb(&init_mm, current, smp_processor_id());
} }
#if CONFIG_SMP
static volatile unsigned long migration_mask;
static int migration_thread(void * unused)
{
runqueue_t *rq;
daemonize();
sigfillset(&current->blocked);
set_user_nice(current, -20);
/*
* We have to migrate manually - there is no migration thread
* to do this for us yet :-)
*
* We use the following property of the Linux scheduler. At
* this point no other task is running, so by keeping all
* migration threads running, the load-balancer will distribute
* them between all CPUs equally. At that point every migration
* task binds itself to the current CPU.
*/
/* wait for all migration threads to start up. */
while (!migration_mask)
yield();
for (;;) {
preempt_disable();
if (test_and_clear_bit(smp_processor_id(), &migration_mask))
current->cpus_allowed = 1 << smp_processor_id();
if (test_thread_flag(TIF_NEED_RESCHED))
schedule();
if (!migration_mask)
break;
preempt_enable();
}
rq = this_rq();
rq->migration_thread = current;
preempt_enable();
sprintf(current->comm, "migration_CPU%d", smp_processor_id());
for (;;) {
struct list_head *head;
unsigned long flags;
task_t *p = NULL;
spin_lock_irqsave(&rq->lock, flags);
head = &rq->migration_queue;
if (list_empty(head)) {
current->state = TASK_UNINTERRUPTIBLE;
spin_unlock_irqrestore(&rq->lock, flags);
schedule();
continue;
}
p = list_entry(head->next, task_t, migration_list);
list_del_init(head->next);
spin_unlock_irqrestore(&rq->lock, flags);
for (;;) {
runqueue_t *rq2 = task_rq_lock(p, &flags);
if (!p->array) {
p->thread_info->cpu = smp_processor_id();
task_rq_unlock(rq2, &flags);
wake_up_process(p);
break;
}
if (p->state != TASK_UNINTERRUPTIBLE) {
p->state = TASK_UNINTERRUPTIBLE;
kick_if_running(p);
}
task_rq_unlock(rq2, &flags);
while ((p->state == TASK_UNINTERRUPTIBLE) && p->array) {
cpu_relax();
barrier();
}
}
}
}
void __init migration_init(void)
{
int cpu;
for (cpu = 0; cpu < smp_num_cpus; cpu++)
if (kernel_thread(migration_thread, NULL,
CLONE_FS | CLONE_FILES | CLONE_SIGNAL) < 0)
BUG();
migration_mask = (1 << smp_num_cpus) -1;
for (cpu = 0; cpu < smp_num_cpus; cpu++)
while (!cpu_rq(cpu)->migration_thread)
yield();
if (migration_mask)
BUG();
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment