Commit 383f2835 authored by Chen, Kenneth W's avatar Chen, Kenneth W Committed by Linus Torvalds

[PATCH] Prefetch kernel stacks to speed up context switch

For architecture like ia64, the switch stack structure is fairly large
(currently 528 bytes).  For context switch intensive application, we found
that significant amount of cache misses occurs in switch_to() function.
The following patch adds a hook in the schedule() function to prefetch
switch stack structure as soon as 'next' task is determined.  This allows
maximum overlap in prefetch cache lines for that structure.
Signed-off-by: default avatarKen Chen <kenneth.w.chen@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent b0d62e6d
...@@ -470,6 +470,29 @@ ENTRY(load_switch_stack) ...@@ -470,6 +470,29 @@ ENTRY(load_switch_stack)
br.cond.sptk.many b7 br.cond.sptk.many b7
END(load_switch_stack) END(load_switch_stack)
GLOBAL_ENTRY(prefetch_stack)
add r14 = -IA64_SWITCH_STACK_SIZE, sp
add r15 = IA64_TASK_THREAD_KSP_OFFSET, in0
;;
ld8 r16 = [r15] // load next's stack pointer
lfetch.fault.excl [r14], 128
;;
lfetch.fault.excl [r14], 128
lfetch.fault [r16], 128
;;
lfetch.fault.excl [r14], 128
lfetch.fault [r16], 128
;;
lfetch.fault.excl [r14], 128
lfetch.fault [r16], 128
;;
lfetch.fault.excl [r14], 128
lfetch.fault [r16], 128
;;
lfetch.fault [r16], 128
br.ret.sptk.many rp
END(prefetch_switch_stack)
GLOBAL_ENTRY(execve) GLOBAL_ENTRY(execve)
mov r15=__NR_execve // put syscall number in place mov r15=__NR_execve // put syscall number in place
break __BREAK_SYSCALL break __BREAK_SYSCALL
......
...@@ -275,6 +275,7 @@ extern void ia64_load_extra (struct task_struct *task); ...@@ -275,6 +275,7 @@ extern void ia64_load_extra (struct task_struct *task);
*/ */
#define __ARCH_WANT_UNLOCKED_CTXSW #define __ARCH_WANT_UNLOCKED_CTXSW
#define ARCH_HAS_PREFETCH_SWITCH_STACK
#define ia64_platform_is(x) (strcmp(x, platform_name) == 0) #define ia64_platform_is(x) (strcmp(x, platform_name) == 0)
void cpu_idle_wait(void); void cpu_idle_wait(void);
......
...@@ -604,6 +604,11 @@ extern int groups_search(struct group_info *group_info, gid_t grp); ...@@ -604,6 +604,11 @@ extern int groups_search(struct group_info *group_info, gid_t grp);
#define GROUP_AT(gi, i) \ #define GROUP_AT(gi, i) \
((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK]) ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK])
#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
extern void prefetch_stack(struct task_struct*);
#else
static inline void prefetch_stack(struct task_struct *t) { }
#endif
struct audit_context; /* See audit.c */ struct audit_context; /* See audit.c */
struct mempolicy; struct mempolicy;
......
...@@ -2888,6 +2888,7 @@ asmlinkage void __sched schedule(void) ...@@ -2888,6 +2888,7 @@ asmlinkage void __sched schedule(void)
if (next == rq->idle) if (next == rq->idle)
schedstat_inc(rq, sched_goidle); schedstat_inc(rq, sched_goidle);
prefetch(next); prefetch(next);
prefetch_stack(next);
clear_tsk_need_resched(prev); clear_tsk_need_resched(prev);
rcu_qsctr_inc(task_cpu(prev)); rcu_qsctr_inc(task_cpu(prev));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment