Commit 531b5c9f authored by Quentin Perret's avatar Quentin Perret Committed by Ingo Molnar

sched/topology: Make Energy Aware Scheduling depend on schedutil

Energy Aware Scheduling (EAS) is designed with the assumption that
frequencies of CPUs follow their utilization value. When using a CPUFreq
governor other than schedutil, the chances of this assumption being true
are small, if any. When schedutil is being used, EAS' predictions are at
least consistent with the frequency requests. Although those requests
have no guarantees to be honored by the hardware, they should at least
guide DVFS in the right direction and provide some hope in regards to the
EAS model being accurate.

To make sure EAS is only used in a sane configuration, create a strong
dependency on schedutil being used. Since having sugov compiled-in does
not provide that guarantee, make CPUFreq call a scheduler function on
governor changes hence letting it rebuild the scheduling domains, check
the governors of the online CPUs, and enable/disable EAS accordingly.
Signed-off-by: default avatarQuentin Perret <quentin.perret@arm.com>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: adharmap@codeaurora.org
Cc: chris.redpath@arm.com
Cc: currojerez@riseup.net
Cc: dietmar.eggemann@arm.com
Cc: edubezval@gmail.com
Cc: gregkh@linuxfoundation.org
Cc: javi.merino@kernel.org
Cc: joel@joelfernandes.org
Cc: juri.lelli@redhat.com
Cc: morten.rasmussen@arm.com
Cc: patrick.bellasi@arm.com
Cc: pkondeti@codeaurora.org
Cc: skannan@codeaurora.org
Cc: smuckle@google.com
Cc: srinivas.pandruvada@linux.intel.com
Cc: thara.gopinath@linaro.org
Cc: tkjos@google.com
Cc: valentin.schneider@arm.com
Cc: vincent.guittot@linaro.org
Cc: viresh.kumar@linaro.org
Link: https://lkml.kernel.org/r/20181203095628.11858-9-quentin.perret@arm.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent b68a4c0d
...@@ -2277,6 +2277,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, ...@@ -2277,6 +2277,7 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
ret = cpufreq_start_governor(policy); ret = cpufreq_start_governor(policy);
if (!ret) { if (!ret) {
pr_debug("cpufreq: governor change\n"); pr_debug("cpufreq: governor change\n");
sched_cpufreq_governor_change(policy, old_gov);
return 0; return 0;
} }
cpufreq_exit_governor(policy); cpufreq_exit_governor(policy);
......
...@@ -950,6 +950,14 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy) ...@@ -950,6 +950,14 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy)
} }
#endif #endif
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
struct cpufreq_governor *old_gov);
#else
static inline void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
struct cpufreq_governor *old_gov) { }
#endif
extern void arch_freq_prepare_all(void); extern void arch_freq_prepare_all(void);
extern unsigned int arch_freq_get_on_cpu(int cpu); extern unsigned int arch_freq_get_on_cpu(int cpu);
......
...@@ -624,7 +624,7 @@ static struct kobj_type sugov_tunables_ktype = { ...@@ -624,7 +624,7 @@ static struct kobj_type sugov_tunables_ktype = {
/********************** cpufreq governor interface *********************/ /********************** cpufreq governor interface *********************/
static struct cpufreq_governor schedutil_gov; struct cpufreq_governor schedutil_gov;
static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
{ {
...@@ -883,7 +883,7 @@ static void sugov_limits(struct cpufreq_policy *policy) ...@@ -883,7 +883,7 @@ static void sugov_limits(struct cpufreq_policy *policy)
sg_policy->need_freq_update = true; sg_policy->need_freq_update = true;
} }
static struct cpufreq_governor schedutil_gov = { struct cpufreq_governor schedutil_gov = {
.name = "schedutil", .name = "schedutil",
.owner = THIS_MODULE, .owner = THIS_MODULE,
.dynamic_switching = true, .dynamic_switching = true,
...@@ -906,3 +906,36 @@ static int __init sugov_register(void) ...@@ -906,3 +906,36 @@ static int __init sugov_register(void)
return cpufreq_register_governor(&schedutil_gov); return cpufreq_register_governor(&schedutil_gov);
} }
fs_initcall(sugov_register); fs_initcall(sugov_register);
#ifdef CONFIG_ENERGY_MODEL
extern bool sched_energy_update;
extern struct mutex sched_energy_mutex;
static void rebuild_sd_workfn(struct work_struct *work)
{
mutex_lock(&sched_energy_mutex);
sched_energy_update = true;
rebuild_sched_domains();
sched_energy_update = false;
mutex_unlock(&sched_energy_mutex);
}
static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn);
/*
* EAS shouldn't be attempted without sugov, so rebuild the sched_domains
* on governor changes to make sure the scheduler knows about it.
*/
void sched_cpufreq_governor_change(struct cpufreq_policy *policy,
struct cpufreq_governor *old_gov)
{
if (old_gov == &schedutil_gov || policy->governor == &schedutil_gov) {
/*
* When called from the cpufreq_register_driver() path, the
* cpu_hotplug_lock is already held, so use a work item to
* avoid nested locking in rebuild_sched_domains().
*/
schedule_work(&rebuild_sd_work);
}
}
#endif
...@@ -2291,10 +2291,8 @@ unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned ...@@ -2291,10 +2291,8 @@ unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned
} }
#endif #endif
#ifdef CONFIG_SMP #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
#ifdef CONFIG_ENERGY_MODEL
#define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus))) #define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
#else #else
#define perf_domain_span(pd) NULL #define perf_domain_span(pd) NULL
#endif #endif
#endif
...@@ -201,7 +201,10 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) ...@@ -201,7 +201,10 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
return 1; return 1;
} }
#ifdef CONFIG_ENERGY_MODEL #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
DEFINE_MUTEX(sched_energy_mutex);
bool sched_energy_update;
static void free_pd(struct perf_domain *pd) static void free_pd(struct perf_domain *pd)
{ {
struct perf_domain *tmp; struct perf_domain *tmp;
...@@ -275,6 +278,7 @@ static void destroy_perf_domain_rcu(struct rcu_head *rp) ...@@ -275,6 +278,7 @@ static void destroy_perf_domain_rcu(struct rcu_head *rp)
* 1. an Energy Model (EM) is available; * 1. an Energy Model (EM) is available;
* 2. the SD_ASYM_CPUCAPACITY flag is set in the sched_domain hierarchy. * 2. the SD_ASYM_CPUCAPACITY flag is set in the sched_domain hierarchy.
* 3. the EM complexity is low enough to keep scheduling overheads low; * 3. the EM complexity is low enough to keep scheduling overheads low;
* 4. schedutil is driving the frequency of all CPUs of the rd;
* *
* The complexity of the Energy Model is defined as: * The complexity of the Energy Model is defined as:
* *
...@@ -294,12 +298,15 @@ static void destroy_perf_domain_rcu(struct rcu_head *rp) ...@@ -294,12 +298,15 @@ static void destroy_perf_domain_rcu(struct rcu_head *rp)
*/ */
#define EM_MAX_COMPLEXITY 2048 #define EM_MAX_COMPLEXITY 2048
extern struct cpufreq_governor schedutil_gov;
static void build_perf_domains(const struct cpumask *cpu_map) static void build_perf_domains(const struct cpumask *cpu_map)
{ {
int i, nr_pd = 0, nr_cs = 0, nr_cpus = cpumask_weight(cpu_map); int i, nr_pd = 0, nr_cs = 0, nr_cpus = cpumask_weight(cpu_map);
struct perf_domain *pd = NULL, *tmp; struct perf_domain *pd = NULL, *tmp;
int cpu = cpumask_first(cpu_map); int cpu = cpumask_first(cpu_map);
struct root_domain *rd = cpu_rq(cpu)->rd; struct root_domain *rd = cpu_rq(cpu)->rd;
struct cpufreq_policy *policy;
struct cpufreq_governor *gov;
/* EAS is enabled for asymmetric CPU capacity topologies. */ /* EAS is enabled for asymmetric CPU capacity topologies. */
if (!per_cpu(sd_asym_cpucapacity, cpu)) { if (!per_cpu(sd_asym_cpucapacity, cpu)) {
...@@ -315,6 +322,19 @@ static void build_perf_domains(const struct cpumask *cpu_map) ...@@ -315,6 +322,19 @@ static void build_perf_domains(const struct cpumask *cpu_map)
if (find_pd(pd, i)) if (find_pd(pd, i))
continue; continue;
/* Do not attempt EAS if schedutil is not being used. */
policy = cpufreq_cpu_get(i);
if (!policy)
goto free;
gov = policy->governor;
cpufreq_cpu_put(policy);
if (gov != &schedutil_gov) {
if (rd->pd)
pr_warn("rd %*pbl: Disabling EAS, schedutil is mandatory\n",
cpumask_pr_args(cpu_map));
goto free;
}
/* Create the new pd and add it to the local list. */ /* Create the new pd and add it to the local list. */
tmp = pd_init(i); tmp = pd_init(i);
if (!tmp) if (!tmp)
...@@ -356,7 +376,7 @@ static void build_perf_domains(const struct cpumask *cpu_map) ...@@ -356,7 +376,7 @@ static void build_perf_domains(const struct cpumask *cpu_map)
} }
#else #else
static void free_pd(struct perf_domain *pd) { } static void free_pd(struct perf_domain *pd) { }
#endif /* CONFIG_ENERGY_MODEL */ #endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL*/
static void free_rootdomain(struct rcu_head *rcu) static void free_rootdomain(struct rcu_head *rcu)
{ {
...@@ -2152,10 +2172,10 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], ...@@ -2152,10 +2172,10 @@ void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
; ;
} }
#ifdef CONFIG_ENERGY_MODEL #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
/* Build perf. domains: */ /* Build perf. domains: */
for (i = 0; i < ndoms_new; i++) { for (i = 0; i < ndoms_new; i++) {
for (j = 0; j < n; j++) { for (j = 0; j < n && !sched_energy_update; j++) {
if (cpumask_equal(doms_new[i], doms_cur[j]) && if (cpumask_equal(doms_new[i], doms_cur[j]) &&
cpu_rq(cpumask_first(doms_cur[j]))->rd->pd) cpu_rq(cpumask_first(doms_cur[j]))->rd->pd)
goto match3; goto match3;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment