[PATCH] sched: fix SMT scheduling problems

SMT balancing has a couple of problems. Firstly, active_load_balance is too complex - basically it should be a dumb helper for when the periodic balancer has determined there is an imbalance, but gets stuck because the task is running. So rip out all its "smarts", and just make it move one task to the target CPU. Second, the busy CPU's sched-domain tree was being used for active balancing. This means that it may not see that nr_balance_failed has reached a critical level. So use the target CPU's sched-domain tree for this. We can do this because we hold its runqueue lock. Lastly, reset nr_balance_failed to a point where we allow cache hot migration. This will help ensure active load balancing is successful. Thanks to Suresh Siddha for pointing out these issues. Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>

[PATCH] sched: fix SMT scheduling problems
SMT balancing has a couple of problems. Firstly, active_load_balance is too complex - basically it should be a dumb helper for when the periodic balancer has determined there is an imbalance, but gets stuck because the task is running. So rip out all its "smarts", and just make it move one task to the target CPU. Second, the busy CPU's sched-domain tree was being used for active balancing. This means that it may not see that nr_balance_failed has reached a critical level. So use the target CPU's sched-domain tree for this. We can do this because we hold its runqueue lock. Lastly, reset nr_balance_failed to a point where we allow cache hot migration. This will help ensure active load balancing is successful. Thanks to Suresh Siddha for pointing out these issues. Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
39507451 · Nick Piggin · Linus Torvalds · 16cfb1c0 · 39507451
Commit 39507451 authored Jun 25, 2005 by Nick Piggin Committed by Linus Torvalds Jun 25, 2005
Hide whitespace changes
Inline Side-by-side

Showing with 31 additions and 45 deletions

kernel/sched.c kernel/sched.c +31 -45

No files found.
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1995,7 +1995,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
 			 * We've kicked active balancing, reset the failure
 			 * counter.
 			 */
-			sd->nr_balance_failed = sd->cache_nice_tries;
+			sd->nr_balance_failed = sd->cache_nice_tries+1;
 		}
 	} else
 		sd->nr_balance_failed = 0;
@@ -2106,56 +2106,42 @@ static inline void idle_balance(int this_cpu, runqueue_t *this_rq)
 static void active_load_balance(runqueue_t *busiest_rq, int busiest_cpu)
 {
 	struct sched_domain *sd;
-	struct sched_group *cpu_group;
 	runqueue_t *target_rq;
-	cpumask_t visited_cpus;
+	int target_cpu = busiest_rq->push_cpu;
-	int cpu;
+	if (busiest_rq->nr_running <= 1)
+		/* no task to move */
+		return;
+	target_rq = cpu_rq(target_cpu);
 	/*
-	 * Search for suitable CPUs to push tasks to in successively higher
+	 * This condition is "impossible", if it occurs
-	 * domains with SD_LOAD_BALANCE set.
+	 * we need to fix it.  Originally reported by
+	 * Bjorn Helgaas on a 128-cpu setup.
 	 */
-	visited_cpus = CPU_MASK_NONE;
+	BUG_ON(busiest_rq == target_rq);
-	for_each_domain(busiest_cpu, sd) {
-		if (!(sd->flags & SD_LOAD_BALANCE))
-			/* no more domains to search */
-			break;
-		schedstat_inc(sd, alb_cnt);
+	/* move a task from busiest_rq to target_rq */
+	double_lock_balance(busiest_rq, target_rq);
-		cpu_group = sd->groups;
+	/* Search for an sd spanning us and the target CPU. */
-		do {
+	for_each_domain(target_cpu, sd)
-			for_each_cpu_mask(cpu, cpu_group->cpumask) {
+		if ((sd->flags & SD_LOAD_BALANCE) &&
-				if (busiest_rq->nr_running <= 1)
+			cpu_isset(busiest_cpu, sd->span))
-					/* no more tasks left to move */
+				break;
-					return;
-				if (cpu_isset(cpu, visited_cpus))
+	if (unlikely(sd == NULL))
-					continue;
+		goto out;
-				cpu_set(cpu, visited_cpus);
-				if (!cpu_and_siblings_are_idle(cpu) || cpu == busiest_cpu)
+	schedstat_inc(sd, alb_cnt);
-					continue;
+	if (move_tasks(target_rq, target_cpu, busiest_rq, 1, sd, SCHED_IDLE, NULL))
-				target_rq = cpu_rq(cpu);
+		schedstat_inc(sd, alb_pushed);
-				/*
+	else
-				 * This condition is "impossible", if it occurs
+		schedstat_inc(sd, alb_failed);
-				 * we need to fix it.  Originally reported by
+out:
-				 * Bjorn Helgaas on a 128-cpu setup.
+	spin_unlock(&target_rq->lock);
-				 */
-				BUG_ON(busiest_rq == target_rq);
-				/* move a task from busiest_rq to target_rq */
-				double_lock_balance(busiest_rq, target_rq);
-				if (move_tasks(target_rq, cpu, busiest_rq,
-						1, sd, SCHED_IDLE, NULL)) {
-					schedstat_inc(sd, alb_pushed);
-				} else {
-					schedstat_inc(sd, alb_failed);
-				}
-				spin_unlock(&target_rq->lock);
-			}
-			cpu_group = cpu_group->next;
-		} while (cpu_group != sd->groups);
-	}
 }
 /*