sched: rt-group: deal with PI

Steven mentioned the fun case where a lock holding task will be throttled. Simple fix: allow groups that have boosted tasks to run anyway. If a runnable task in a throttled group gets boosted the dequeue/enqueue done by rt_mutex_setprio() is enough to unthrottle the group. This is ofcourse not quite correct. Two possible ways forward are: - second prio array for boosted tasks - boost to a prio ceiling (this would also work for deadline scheduling) Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>

sched: rt-group: deal with PI
Steven mentioned the fun case where a lock holding task will be throttled. Simple fix: allow groups that have boosted tasks to run anyway. If a runnable task in a throttled group gets boosted the dequeue/enqueue done by rt_mutex_setprio() is enough to unthrottle the group. This is ofcourse not quite correct. Two possible ways forward are: - second prio array for boosted tasks - boost to a prio ceiling (this would also work for deadline scheduling) Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
23b0fdfc · Peter Zijlstra · Ingo Molnar · 4cf5d77a · 23b0fdfc · 23b0fdfc
Commit 23b0fdfc authored Feb 13, 2008 by Peter Zijlstra Committed by Ingo Molnar Feb 13, 2008
Hide whitespace changes
Inline Side-by-side

Showing with 41 additions and 5 deletions

kernel/sched.c kernel/sched.c +3 -0

kernel/sched_rt.c kernel/sched_rt.c +38 -5

No files found.
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -362,6 +362,8 @@ struct rt_rq {
 	u64 rt_time;

 #ifdef CONFIG_FAIR_GROUP_SCHED
+	unsigned long rt_nr_boosted;
+
 	struct rq *rq;
 	struct list_head leaf_rt_rq_list;
 	struct task_group *tg;
@@ -7112,6 +7114,7 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
 	rt_rq->rt_throttled = 0;

 #ifdef CONFIG_FAIR_GROUP_SCHED
+	rt_rq->rt_nr_boosted = 0;
 	rt_rq->rq = rq;
 #endif
 }

--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -110,6 +110,23 @@ static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
 		dequeue_rt_entity(rt_se);
 }

+static inline int rt_rq_throttled(struct rt_rq *rt_rq)
+{
+	return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted;
+}
+
+static int rt_se_boosted(struct sched_rt_entity *rt_se)
+{
+	struct rt_rq *rt_rq = group_rt_rq(rt_se);
+	struct task_struct *p;
+
+	if (rt_rq)
+		return !!rt_rq->rt_nr_boosted;
+
+	p = rt_task_of(rt_se);
+	return p->prio != p->normal_prio;
+}
+
 #else

 static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
@@ -149,6 +166,10 @@ static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
 {
 }

+static inline int rt_rq_throttled(struct rt_rq *rt_rq)
+{
+	return rt_rq->rt_throttled;
+}
 #endif

 static inline int rt_se_prio(struct sched_rt_entity *rt_se)
@@ -172,7 +193,7 @@ static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq)
 		return 0;

 	if (rt_rq->rt_throttled)
-		return 1;
+		return rt_rq_throttled(rt_rq);

 	period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
 	ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
@@ -183,8 +204,10 @@ static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq)
 		rq->rt_throttled = 1;
 		rt_rq->rt_throttled = 1;

-		sched_rt_ratio_dequeue(rt_rq);
-		return 1;
+		if (rt_rq_throttled(rt_rq)) {
+			sched_rt_ratio_dequeue(rt_rq);
+			return 1;
+		}
 	}

 	return 0;
@@ -265,6 +288,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)

 	update_rt_migration(rq_of_rt_rq(rt_rq));
 #endif
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	if (rt_se_boosted(rt_se))
+		rt_rq->rt_nr_boosted++;
+#endif
 }

 static inline
@@ -295,6 +322,12 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)

 	update_rt_migration(rq_of_rt_rq(rt_rq));
 #endif /* CONFIG_SMP */
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	if (rt_se_boosted(rt_se))
+		rt_rq->rt_nr_boosted--;
+
+	WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
+#endif
 }

 static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
@@ -303,7 +336,7 @@ static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
 	struct rt_prio_array *array = &rt_rq->active;
 	struct rt_rq *group_rq = group_rt_rq(rt_se);

-	if (group_rq && group_rq->rt_throttled)
+	if (group_rq && rt_rq_throttled(group_rq))
 		return;

 	list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se));
@@ -496,7 +529,7 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
 	if (unlikely(!rt_rq->rt_nr_running))
 		return NULL;

-	if (sched_rt_ratio_exceeded(rt_rq))
+	if (rt_rq_throttled(rt_rq))
 		return NULL;

 	do {