Commit b82d9fdd authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

sched: avoid large irq-latencies in smp-balancing

SMP balancing is done with IRQs disabled and can iterate the full rq.
When rqs are large this can cause large irq-latencies. Limit the nr of
iterations on each run.

This fixes a scheduling latency regression reported by the -rt folks.
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: default avatarSteven Rostedt <rostedt@goodmis.org>
Tested-by: default avatarGregory Haskins <ghaskins@novell.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent 3c90e6e9
...@@ -1466,6 +1466,7 @@ extern unsigned int sysctl_sched_batch_wakeup_granularity; ...@@ -1466,6 +1466,7 @@ extern unsigned int sysctl_sched_batch_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first; extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_features; extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost; extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
int sched_nr_latency_handler(struct ctl_table *table, int write, int sched_nr_latency_handler(struct ctl_table *table, int write,
struct file *file, void __user *buffer, size_t *length, struct file *file, void __user *buffer, size_t *length,
......
...@@ -471,6 +471,12 @@ const_debug unsigned int sysctl_sched_features = ...@@ -471,6 +471,12 @@ const_debug unsigned int sysctl_sched_features =
#define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x) #define sched_feat(x) (sysctl_sched_features & SCHED_FEAT_##x)
/*
* Number of tasks to iterate in a single balance run.
* Limited because this is done with IRQs disabled.
*/
const_debug unsigned int sysctl_sched_nr_migrate = 32;
/* /*
* For kernel-internal use: high-speed (but slightly incorrect) per-cpu * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
* clock constructed from sched_clock(): * clock constructed from sched_clock():
...@@ -2235,7 +2241,7 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, ...@@ -2235,7 +2241,7 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
enum cpu_idle_type idle, int *all_pinned, enum cpu_idle_type idle, int *all_pinned,
int *this_best_prio, struct rq_iterator *iterator) int *this_best_prio, struct rq_iterator *iterator)
{ {
int pulled = 0, pinned = 0, skip_for_load; int loops = 0, pulled = 0, pinned = 0, skip_for_load;
struct task_struct *p; struct task_struct *p;
long rem_load_move = max_load_move; long rem_load_move = max_load_move;
...@@ -2249,10 +2255,10 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, ...@@ -2249,10 +2255,10 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
*/ */
p = iterator->start(iterator->arg); p = iterator->start(iterator->arg);
next: next:
if (!p) if (!p || loops++ > sysctl_sched_nr_migrate)
goto out; goto out;
/* /*
* To help distribute high priority tasks accross CPUs we don't * To help distribute high priority tasks across CPUs we don't
* skip a task if it will be the highest priority task (i.e. smallest * skip a task if it will be the highest priority task (i.e. smallest
* prio value) on its new queue regardless of its load weight * prio value) on its new queue regardless of its load weight
*/ */
...@@ -2269,8 +2275,7 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, ...@@ -2269,8 +2275,7 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
rem_load_move -= p->se.load.weight; rem_load_move -= p->se.load.weight;
/* /*
* We only want to steal up to the prescribed number of tasks * We only want to steal up to the prescribed amount of weighted load.
* and the prescribed amount of weighted load.
*/ */
if (rem_load_move > 0) { if (rem_load_move > 0) {
if (p->prio < *this_best_prio) if (p->prio < *this_best_prio)
......
...@@ -301,6 +301,14 @@ static struct ctl_table kern_table[] = { ...@@ -301,6 +301,14 @@ static struct ctl_table kern_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = &proc_dointvec, .proc_handler = &proc_dointvec,
}, },
{
.ctl_name = CTL_UNNUMBERED,
.procname = "sched_nr_migrate",
.data = &sysctl_sched_nr_migrate,
.maxlen = sizeof(unsigned int),
.mode = 644,
.proc_handler = &proc_dointvec,
},
#endif #endif
{ {
.ctl_name = CTL_UNNUMBERED, .ctl_name = CTL_UNNUMBERED,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment