Commit a790de99 authored by Paul Turner's avatar Paul Turner Committed by Ingo Molnar

sched: Validate CFS quota hierarchies

Add constraints validation for CFS bandwidth hierarchies.

Validate that:
   max(child bandwidth) <= parent_bandwidth

In a quota limited hierarchy, an unconstrained entity
(e.g. bandwidth==RUNTIME_INF) inherits the bandwidth of its parent.

This constraint is chosen over sum(child_bandwidth) as notion of over-commit is
valuable within SCHED_OTHER.  Some basic code from the RT case is re-factored
for reuse.
Signed-off-by: default avatarPaul Turner <pjt@google.com>
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110721184757.083774572@google.comSigned-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent ab84d31e
...@@ -252,6 +252,7 @@ struct cfs_bandwidth { ...@@ -252,6 +252,7 @@ struct cfs_bandwidth {
raw_spinlock_t lock; raw_spinlock_t lock;
ktime_t period; ktime_t period;
u64 quota; u64 quota;
s64 hierarchal_quota;
#endif #endif
}; };
...@@ -1518,7 +1519,8 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load) ...@@ -1518,7 +1519,8 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load)
update_load_sub(&rq->load, load); update_load_sub(&rq->load, load);
} }
#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED) #if defined(CONFIG_RT_GROUP_SCHED) || (defined(CONFIG_FAIR_GROUP_SCHED) && \
(defined(CONFIG_SMP) || defined(CONFIG_CFS_BANDWIDTH)))
typedef int (*tg_visitor)(struct task_group *, void *); typedef int (*tg_visitor)(struct task_group *, void *);
/* /*
...@@ -8708,12 +8710,7 @@ unsigned long sched_group_shares(struct task_group *tg) ...@@ -8708,12 +8710,7 @@ unsigned long sched_group_shares(struct task_group *tg)
} }
#endif #endif
#ifdef CONFIG_RT_GROUP_SCHED #if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_CFS_BANDWIDTH)
/*
* Ensure that the real time constraints are schedulable.
*/
static DEFINE_MUTEX(rt_constraints_mutex);
static unsigned long to_ratio(u64 period, u64 runtime) static unsigned long to_ratio(u64 period, u64 runtime)
{ {
if (runtime == RUNTIME_INF) if (runtime == RUNTIME_INF)
...@@ -8721,6 +8718,13 @@ static unsigned long to_ratio(u64 period, u64 runtime) ...@@ -8721,6 +8718,13 @@ static unsigned long to_ratio(u64 period, u64 runtime)
return div64_u64(runtime << 20, period); return div64_u64(runtime << 20, period);
} }
#endif
#ifdef CONFIG_RT_GROUP_SCHED
/*
* Ensure that the real time constraints are schedulable.
*/
static DEFINE_MUTEX(rt_constraints_mutex);
/* Must be called with tasklist_lock held */ /* Must be called with tasklist_lock held */
static inline int tg_has_rt_tasks(struct task_group *tg) static inline int tg_has_rt_tasks(struct task_group *tg)
...@@ -8741,7 +8745,7 @@ struct rt_schedulable_data { ...@@ -8741,7 +8745,7 @@ struct rt_schedulable_data {
u64 rt_runtime; u64 rt_runtime;
}; };
static int tg_schedulable(struct task_group *tg, void *data) static int tg_rt_schedulable(struct task_group *tg, void *data)
{ {
struct rt_schedulable_data *d = data; struct rt_schedulable_data *d = data;
struct task_group *child; struct task_group *child;
...@@ -8805,7 +8809,7 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) ...@@ -8805,7 +8809,7 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
.rt_runtime = runtime, .rt_runtime = runtime,
}; };
return walk_tg_tree(tg_schedulable, tg_nop, &data); return walk_tg_tree(tg_rt_schedulable, tg_nop, &data);
} }
static int tg_set_rt_bandwidth(struct task_group *tg, static int tg_set_rt_bandwidth(struct task_group *tg,
...@@ -9064,14 +9068,17 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft) ...@@ -9064,14 +9068,17 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
} }
#ifdef CONFIG_CFS_BANDWIDTH #ifdef CONFIG_CFS_BANDWIDTH
static DEFINE_MUTEX(cfs_constraints_mutex);
const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */
const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime);
static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
{ {
int i; int i, ret = 0;
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg); struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
static DEFINE_MUTEX(mutex);
if (tg == &root_task_group) if (tg == &root_task_group)
return -EINVAL; return -EINVAL;
...@@ -9092,7 +9099,11 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) ...@@ -9092,7 +9099,11 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
if (period > max_cfs_quota_period) if (period > max_cfs_quota_period)
return -EINVAL; return -EINVAL;
mutex_lock(&mutex); mutex_lock(&cfs_constraints_mutex);
ret = __cfs_schedulable(tg, period, quota);
if (ret)
goto out_unlock;
raw_spin_lock_irq(&cfs_b->lock); raw_spin_lock_irq(&cfs_b->lock);
cfs_b->period = ns_to_ktime(period); cfs_b->period = ns_to_ktime(period);
cfs_b->quota = quota; cfs_b->quota = quota;
...@@ -9107,9 +9118,10 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota) ...@@ -9107,9 +9118,10 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
cfs_rq->runtime_remaining = 0; cfs_rq->runtime_remaining = 0;
raw_spin_unlock_irq(&rq->lock); raw_spin_unlock_irq(&rq->lock);
} }
mutex_unlock(&mutex); out_unlock:
mutex_unlock(&cfs_constraints_mutex);
return 0; return ret;
} }
int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us) int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
...@@ -9183,6 +9195,78 @@ static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype, ...@@ -9183,6 +9195,78 @@ static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype,
return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us); return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us);
} }
struct cfs_schedulable_data {
struct task_group *tg;
u64 period, quota;
};
/*
* normalize group quota/period to be quota/max_period
* note: units are usecs
*/
static u64 normalize_cfs_quota(struct task_group *tg,
struct cfs_schedulable_data *d)
{
u64 quota, period;
if (tg == d->tg) {
period = d->period;
quota = d->quota;
} else {
period = tg_get_cfs_period(tg);
quota = tg_get_cfs_quota(tg);
}
/* note: these should typically be equivalent */
if (quota == RUNTIME_INF || quota == -1)
return RUNTIME_INF;
return to_ratio(period, quota);
}
static int tg_cfs_schedulable_down(struct task_group *tg, void *data)
{
struct cfs_schedulable_data *d = data;
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
s64 quota = 0, parent_quota = -1;
if (!tg->parent) {
quota = RUNTIME_INF;
} else {
struct cfs_bandwidth *parent_b = tg_cfs_bandwidth(tg->parent);
quota = normalize_cfs_quota(tg, d);
parent_quota = parent_b->hierarchal_quota;
/*
* ensure max(child_quota) <= parent_quota, inherit when no
* limit is set
*/
if (quota == RUNTIME_INF)
quota = parent_quota;
else if (parent_quota != RUNTIME_INF && quota > parent_quota)
return -EINVAL;
}
cfs_b->hierarchal_quota = quota;
return 0;
}
static int __cfs_schedulable(struct task_group *tg, u64 period, u64 quota)
{
struct cfs_schedulable_data data = {
.tg = tg,
.period = period,
.quota = quota,
};
if (quota != RUNTIME_INF) {
do_div(data.period, NSEC_PER_USEC);
do_div(data.quota, NSEC_PER_USEC);
}
return walk_tg_tree(tg_cfs_schedulable_down, tg_nop, &data);
}
#endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_CFS_BANDWIDTH */
#endif /* CONFIG_FAIR_GROUP_SCHED */ #endif /* CONFIG_FAIR_GROUP_SCHED */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment