Commit bf56b907 authored by Rafael J. Wysocki's avatar Rafael J. Wysocki

Merge branches 'pm-em' and 'powercap'

Merge Energy Model and power capping updates for 5.16-rc1:

 - Add support for inefficient operating performance points to the
   Energy Model and modify cpufreq to use them properly (Vincent
   Donnefort).

 - Rearrange the DTPM framework code to simplify it and make it easier
   to follow (Daniel Lezcano).

 - Fix power intialization in DTPM (Daniel Lezcano).

 - Add CPU load consideration when estimating the instaneous power
   consumption in DTPM (Daniel Lezcano).

* pm-em:
  cpufreq: mediatek-hw: Fix cpufreq_table_find_index_dl() call
  PM: EM: Mark inefficiencies in CPUFreq
  cpufreq: Use CPUFREQ_RELATION_E in DVFS governors
  cpufreq: Introducing CPUFREQ_RELATION_E
  cpufreq: Add an interface to mark inefficient frequencies
  cpufreq: Make policy min/max hard requirements
  PM: EM: Allow skipping inefficient states
  PM: EM: Extend em_perf_domain with a flag field
  PM: EM: Mark inefficient states
  PM: EM: Fix inefficient states detection

* powercap:
  powercap/drivers/dtpm: Fix power limit initialization
  powercap/drivers/dtpm: Scale the power with the load
  powercap/drivers/dtpm: Use container_of instead of a private data field
  powercap/drivers/dtpm: Simplify the dtpm table
  powercap/drivers/dtpm: Encapsulate even more the code
...@@ -470,7 +470,8 @@ static unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy, ...@@ -470,7 +470,8 @@ static unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
if (policy->cached_target_freq == target_freq) if (policy->cached_target_freq == target_freq)
index = policy->cached_resolved_idx; index = policy->cached_resolved_idx;
else else
index = cpufreq_table_find_index_dl(policy, target_freq); index = cpufreq_table_find_index_dl(policy, target_freq,
false);
entry = &policy->freq_table[index]; entry = &policy->freq_table[index];
next_freq = entry->frequency; next_freq = entry->frequency;
......
...@@ -91,7 +91,8 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy, ...@@ -91,7 +91,8 @@ static unsigned int amd_powersave_bias_target(struct cpufreq_policy *policy,
unsigned int index; unsigned int index;
index = cpufreq_table_find_index_h(policy, index = cpufreq_table_find_index_h(policy,
policy->cur - 1); policy->cur - 1,
relation & CPUFREQ_RELATION_E);
freq_next = policy->freq_table[index].frequency; freq_next = policy->freq_table[index].frequency;
} }
......
...@@ -554,7 +554,7 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy, ...@@ -554,7 +554,7 @@ static unsigned int __resolve_freq(struct cpufreq_policy *policy,
unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy,
unsigned int target_freq) unsigned int target_freq)
{ {
return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_L); return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_LE);
} }
EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq); EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq);
...@@ -2260,8 +2260,16 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, ...@@ -2260,8 +2260,16 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
!(cpufreq_driver->flags & CPUFREQ_NEED_UPDATE_LIMITS)) !(cpufreq_driver->flags & CPUFREQ_NEED_UPDATE_LIMITS))
return 0; return 0;
if (cpufreq_driver->target) if (cpufreq_driver->target) {
/*
* If the driver hasn't setup a single inefficient frequency,
* it's unlikely it knows how to decode CPUFREQ_RELATION_E.
*/
if (!policy->efficiencies_available)
relation &= ~CPUFREQ_RELATION_E;
return cpufreq_driver->target(policy, target_freq, relation); return cpufreq_driver->target(policy, target_freq, relation);
}
if (!cpufreq_driver->target_index) if (!cpufreq_driver->target_index)
return -EINVAL; return -EINVAL;
...@@ -2523,8 +2531,15 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, ...@@ -2523,8 +2531,15 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy,
if (ret) if (ret)
return ret; return ret;
/*
* Resolve policy min/max to available frequencies. It ensures
* no frequency resolution will neither overshoot the requested maximum
* nor undershoot the requested minimum.
*/
policy->min = new_data.min; policy->min = new_data.min;
policy->max = new_data.max; policy->max = new_data.max;
policy->min = __resolve_freq(policy, policy->min, CPUFREQ_RELATION_L);
policy->max = __resolve_freq(policy, policy->max, CPUFREQ_RELATION_H);
trace_cpu_frequency_limits(policy); trace_cpu_frequency_limits(policy);
policy->cached_target_freq = UINT_MAX; policy->cached_target_freq = UINT_MAX;
......
...@@ -111,7 +111,8 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy) ...@@ -111,7 +111,8 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy)
if (requested_freq > policy->max) if (requested_freq > policy->max)
requested_freq = policy->max; requested_freq = policy->max;
__cpufreq_driver_target(policy, requested_freq, CPUFREQ_RELATION_H); __cpufreq_driver_target(policy, requested_freq,
CPUFREQ_RELATION_HE);
dbs_info->requested_freq = requested_freq; dbs_info->requested_freq = requested_freq;
goto out; goto out;
} }
...@@ -134,7 +135,8 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy) ...@@ -134,7 +135,8 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy)
else else
requested_freq = policy->min; requested_freq = policy->min;
__cpufreq_driver_target(policy, requested_freq, CPUFREQ_RELATION_L); __cpufreq_driver_target(policy, requested_freq,
CPUFREQ_RELATION_LE);
dbs_info->requested_freq = requested_freq; dbs_info->requested_freq = requested_freq;
} }
......
...@@ -83,9 +83,11 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy, ...@@ -83,9 +83,11 @@ static unsigned int generic_powersave_bias_target(struct cpufreq_policy *policy,
freq_avg = freq_req - freq_reduc; freq_avg = freq_req - freq_reduc;
/* Find freq bounds for freq_avg in freq_table */ /* Find freq bounds for freq_avg in freq_table */
index = cpufreq_table_find_index_h(policy, freq_avg); index = cpufreq_table_find_index_h(policy, freq_avg,
relation & CPUFREQ_RELATION_E);
freq_lo = freq_table[index].frequency; freq_lo = freq_table[index].frequency;
index = cpufreq_table_find_index_l(policy, freq_avg); index = cpufreq_table_find_index_l(policy, freq_avg,
relation & CPUFREQ_RELATION_E);
freq_hi = freq_table[index].frequency; freq_hi = freq_table[index].frequency;
/* Find out how long we have to be in hi and lo freqs */ /* Find out how long we have to be in hi and lo freqs */
...@@ -118,12 +120,12 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) ...@@ -118,12 +120,12 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq)
if (od_tuners->powersave_bias) if (od_tuners->powersave_bias)
freq = od_ops.powersave_bias_target(policy, freq, freq = od_ops.powersave_bias_target(policy, freq,
CPUFREQ_RELATION_H); CPUFREQ_RELATION_HE);
else if (policy->cur == policy->max) else if (policy->cur == policy->max)
return; return;
__cpufreq_driver_target(policy, freq, od_tuners->powersave_bias ? __cpufreq_driver_target(policy, freq, od_tuners->powersave_bias ?
CPUFREQ_RELATION_L : CPUFREQ_RELATION_H); CPUFREQ_RELATION_LE : CPUFREQ_RELATION_HE);
} }
/* /*
...@@ -161,9 +163,9 @@ static void od_update(struct cpufreq_policy *policy) ...@@ -161,9 +163,9 @@ static void od_update(struct cpufreq_policy *policy)
if (od_tuners->powersave_bias) if (od_tuners->powersave_bias)
freq_next = od_ops.powersave_bias_target(policy, freq_next = od_ops.powersave_bias_target(policy,
freq_next, freq_next,
CPUFREQ_RELATION_L); CPUFREQ_RELATION_LE);
__cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_C); __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_CE);
} }
} }
...@@ -182,7 +184,7 @@ static unsigned int od_dbs_update(struct cpufreq_policy *policy) ...@@ -182,7 +184,7 @@ static unsigned int od_dbs_update(struct cpufreq_policy *policy)
*/ */
if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) { if (sample_type == OD_SUB_SAMPLE && policy_dbs->sample_delay_ns > 0) {
__cpufreq_driver_target(policy, dbs_info->freq_lo, __cpufreq_driver_target(policy, dbs_info->freq_lo,
CPUFREQ_RELATION_H); CPUFREQ_RELATION_HE);
return dbs_info->freq_lo_delay_us; return dbs_info->freq_lo_delay_us;
} }
......
...@@ -109,7 +109,7 @@ static unsigned int mtk_cpufreq_hw_fast_switch(struct cpufreq_policy *policy, ...@@ -109,7 +109,7 @@ static unsigned int mtk_cpufreq_hw_fast_switch(struct cpufreq_policy *policy,
struct mtk_cpufreq_data *data = policy->driver_data; struct mtk_cpufreq_data *data = policy->driver_data;
unsigned int index; unsigned int index;
index = cpufreq_table_find_index_dl(policy, target_freq); index = cpufreq_table_find_index_dl(policy, target_freq, false);
writel_relaxed(index, data->reg_bases[REG_FREQ_PERF_STATE]); writel_relaxed(index, data->reg_bases[REG_FREQ_PERF_STATE]);
......
...@@ -934,7 +934,7 @@ static void powernv_cpufreq_work_fn(struct work_struct *work) ...@@ -934,7 +934,7 @@ static void powernv_cpufreq_work_fn(struct work_struct *work)
policy = cpufreq_cpu_get(cpu); policy = cpufreq_cpu_get(cpu);
if (!policy) if (!policy)
continue; continue;
index = cpufreq_table_find_index_c(policy, policy->cur); index = cpufreq_table_find_index_c(policy, policy->cur, false);
powernv_cpufreq_target_index(policy, index); powernv_cpufreq_target_index(policy, index);
cpumask_andnot(&mask, &mask, policy->cpus); cpumask_andnot(&mask, &mask, policy->cpus);
cpufreq_cpu_put(policy); cpufreq_cpu_put(policy);
...@@ -1022,7 +1022,7 @@ static unsigned int powernv_fast_switch(struct cpufreq_policy *policy, ...@@ -1022,7 +1022,7 @@ static unsigned int powernv_fast_switch(struct cpufreq_policy *policy,
int index; int index;
struct powernv_smp_call_data freq_data; struct powernv_smp_call_data freq_data;
index = cpufreq_table_find_index_dl(policy, target_freq); index = cpufreq_table_find_index_dl(policy, target_freq, false);
freq_data.pstate_id = powernv_freqs[index].driver_data; freq_data.pstate_id = powernv_freqs[index].driver_data;
freq_data.gpstate_id = powernv_freqs[index].driver_data; freq_data.gpstate_id = powernv_freqs[index].driver_data;
set_pstate(&freq_data); set_pstate(&freq_data);
......
...@@ -243,7 +243,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index) ...@@ -243,7 +243,7 @@ static int s5pv210_target(struct cpufreq_policy *policy, unsigned int index)
new_freq = s5pv210_freq_table[index].frequency; new_freq = s5pv210_freq_table[index].frequency;
/* Finding current running level index */ /* Finding current running level index */
priv_index = cpufreq_table_find_index_h(policy, old_freq); priv_index = cpufreq_table_find_index_h(policy, old_freq, false);
arm_volt = dvs_conf[index].arm_volt; arm_volt = dvs_conf[index].arm_volt;
int_volt = dvs_conf[index].int_volt; int_volt = dvs_conf[index].int_volt;
......
...@@ -116,8 +116,6 @@ static void __dtpm_sub_power(struct dtpm *dtpm) ...@@ -116,8 +116,6 @@ static void __dtpm_sub_power(struct dtpm *dtpm)
parent->power_limit -= dtpm->power_limit; parent->power_limit -= dtpm->power_limit;
parent = parent->parent; parent = parent->parent;
} }
__dtpm_rebalance_weight(root);
} }
static void __dtpm_add_power(struct dtpm *dtpm) static void __dtpm_add_power(struct dtpm *dtpm)
...@@ -130,45 +128,45 @@ static void __dtpm_add_power(struct dtpm *dtpm) ...@@ -130,45 +128,45 @@ static void __dtpm_add_power(struct dtpm *dtpm)
parent->power_limit += dtpm->power_limit; parent->power_limit += dtpm->power_limit;
parent = parent->parent; parent = parent->parent;
} }
}
static int __dtpm_update_power(struct dtpm *dtpm)
{
int ret;
__dtpm_sub_power(dtpm);
ret = dtpm->ops->update_power_uw(dtpm);
if (ret)
pr_err("Failed to update power for '%s': %d\n",
dtpm->zone.name, ret);
__dtpm_rebalance_weight(root); if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags))
dtpm->power_limit = dtpm->power_max;
__dtpm_add_power(dtpm);
if (root)
__dtpm_rebalance_weight(root);
return ret;
} }
/** /**
* dtpm_update_power - Update the power on the dtpm * dtpm_update_power - Update the power on the dtpm
* @dtpm: a pointer to a dtpm structure to update * @dtpm: a pointer to a dtpm structure to update
* @power_min: a u64 representing the new power_min value
* @power_max: a u64 representing the new power_max value
* *
* Function to update the power values of the dtpm node specified in * Function to update the power values of the dtpm node specified in
* parameter. These new values will be propagated to the tree. * parameter. These new values will be propagated to the tree.
* *
* Return: zero on success, -EINVAL if the values are inconsistent * Return: zero on success, -EINVAL if the values are inconsistent
*/ */
int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max) int dtpm_update_power(struct dtpm *dtpm)
{ {
int ret = 0; int ret;
mutex_lock(&dtpm_lock); mutex_lock(&dtpm_lock);
ret = __dtpm_update_power(dtpm);
if (power_min == dtpm->power_min && power_max == dtpm->power_max)
goto unlock;
if (power_max < power_min) {
ret = -EINVAL;
goto unlock;
}
__dtpm_sub_power(dtpm);
dtpm->power_min = power_min;
dtpm->power_max = power_max;
if (!test_bit(DTPM_POWER_LIMIT_FLAG, &dtpm->flags))
dtpm->power_limit = power_max;
__dtpm_add_power(dtpm);
unlock:
mutex_unlock(&dtpm_lock); mutex_unlock(&dtpm_lock);
return ret; return ret;
...@@ -359,24 +357,18 @@ static struct powercap_zone_ops zone_ops = { ...@@ -359,24 +357,18 @@ static struct powercap_zone_ops zone_ops = {
}; };
/** /**
* dtpm_alloc - Allocate and initialize a dtpm struct * dtpm_init - Allocate and initialize a dtpm struct
* @name: a string specifying the name of the node * @dtpm: The dtpm struct pointer to be initialized
* * @ops: The dtpm device specific ops, NULL for a virtual node
* Return: a struct dtpm pointer, NULL in case of error
*/ */
struct dtpm *dtpm_alloc(struct dtpm_ops *ops) void dtpm_init(struct dtpm *dtpm, struct dtpm_ops *ops)
{ {
struct dtpm *dtpm;
dtpm = kzalloc(sizeof(*dtpm), GFP_KERNEL);
if (dtpm) { if (dtpm) {
INIT_LIST_HEAD(&dtpm->children); INIT_LIST_HEAD(&dtpm->children);
INIT_LIST_HEAD(&dtpm->sibling); INIT_LIST_HEAD(&dtpm->sibling);
dtpm->weight = 1024; dtpm->weight = 1024;
dtpm->ops = ops; dtpm->ops = ops;
} }
return dtpm;
} }
/** /**
...@@ -436,6 +428,7 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) ...@@ -436,6 +428,7 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
if (dtpm->ops && !(dtpm->ops->set_power_uw && if (dtpm->ops && !(dtpm->ops->set_power_uw &&
dtpm->ops->get_power_uw && dtpm->ops->get_power_uw &&
dtpm->ops->update_power_uw &&
dtpm->ops->release)) dtpm->ops->release))
return -EINVAL; return -EINVAL;
...@@ -455,7 +448,10 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) ...@@ -455,7 +448,10 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
root = dtpm; root = dtpm;
} }
__dtpm_add_power(dtpm); if (dtpm->ops && !dtpm->ops->update_power_uw(dtpm)) {
__dtpm_add_power(dtpm);
dtpm->power_limit = dtpm->power_max;
}
pr_info("Registered dtpm node '%s' / %llu-%llu uW, \n", pr_info("Registered dtpm node '%s' / %llu-%llu uW, \n",
dtpm->zone.name, dtpm->power_min, dtpm->power_max); dtpm->zone.name, dtpm->power_min, dtpm->power_max);
...@@ -465,9 +461,9 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent) ...@@ -465,9 +461,9 @@ int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent)
return 0; return 0;
} }
static int __init dtpm_init(void) static int __init init_dtpm(void)
{ {
struct dtpm_descr **dtpm_descr; struct dtpm_descr *dtpm_descr;
pct = powercap_register_control_type(NULL, "dtpm", NULL); pct = powercap_register_control_type(NULL, "dtpm", NULL);
if (IS_ERR(pct)) { if (IS_ERR(pct)) {
...@@ -476,8 +472,8 @@ static int __init dtpm_init(void) ...@@ -476,8 +472,8 @@ static int __init dtpm_init(void)
} }
for_each_dtpm_table(dtpm_descr) for_each_dtpm_table(dtpm_descr)
(*dtpm_descr)->init(*dtpm_descr); dtpm_descr->init();
return 0; return 0;
} }
late_initcall(dtpm_init); late_initcall(init_dtpm);
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
* The CPU hotplug is supported and the power numbers will be updated * The CPU hotplug is supported and the power numbers will be updated
* if a CPU is hot plugged / unplugged. * if a CPU is hot plugged / unplugged.
*/ */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <linux/cpufreq.h> #include <linux/cpufreq.h>
#include <linux/cpuhotplug.h> #include <linux/cpuhotplug.h>
...@@ -23,66 +25,29 @@ ...@@ -23,66 +25,29 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/units.h> #include <linux/units.h>
static struct dtpm *__parent;
static DEFINE_PER_CPU(struct dtpm *, dtpm_per_cpu);
struct dtpm_cpu { struct dtpm_cpu {
struct dtpm dtpm;
struct freq_qos_request qos_req; struct freq_qos_request qos_req;
int cpu; int cpu;
}; };
/* static DEFINE_PER_CPU(struct dtpm_cpu *, dtpm_per_cpu);
* When a new CPU is inserted at hotplug or boot time, add the power
* contribution and update the dtpm tree.
*/
static int power_add(struct dtpm *dtpm, struct em_perf_domain *em)
{
u64 power_min, power_max;
power_min = em->table[0].power;
power_min *= MICROWATT_PER_MILLIWATT;
power_min += dtpm->power_min;
power_max = em->table[em->nr_perf_states - 1].power; static struct dtpm_cpu *to_dtpm_cpu(struct dtpm *dtpm)
power_max *= MICROWATT_PER_MILLIWATT;
power_max += dtpm->power_max;
return dtpm_update_power(dtpm, power_min, power_max);
}
/*
* When a CPU is unplugged, remove its power contribution from the
* dtpm tree.
*/
static int power_sub(struct dtpm *dtpm, struct em_perf_domain *em)
{ {
u64 power_min, power_max; return container_of(dtpm, struct dtpm_cpu, dtpm);
power_min = em->table[0].power;
power_min *= MICROWATT_PER_MILLIWATT;
power_min = dtpm->power_min - power_min;
power_max = em->table[em->nr_perf_states - 1].power;
power_max *= MICROWATT_PER_MILLIWATT;
power_max = dtpm->power_max - power_max;
return dtpm_update_power(dtpm, power_min, power_max);
} }
static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
{ {
struct dtpm_cpu *dtpm_cpu = dtpm->private; struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
struct em_perf_domain *pd; struct em_perf_domain *pd = em_cpu_get(dtpm_cpu->cpu);
struct cpumask cpus; struct cpumask cpus;
unsigned long freq; unsigned long freq;
u64 power; u64 power;
int i, nr_cpus; int i, nr_cpus;
pd = em_cpu_get(dtpm_cpu->cpu);
cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus)); cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
nr_cpus = cpumask_weight(&cpus); nr_cpus = cpumask_weight(&cpus);
for (i = 0; i < pd->nr_perf_states; i++) { for (i = 0; i < pd->nr_perf_states; i++) {
...@@ -103,34 +68,88 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) ...@@ -103,34 +68,88 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
return power_limit; return power_limit;
} }
static u64 scale_pd_power_uw(struct cpumask *pd_mask, u64 power)
{
unsigned long max = 0, sum_util = 0;
int cpu;
for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
/*
* The capacity is the same for all CPUs belonging to
* the same perf domain, so a single call to
* arch_scale_cpu_capacity() is enough. However, we
* need the CPU parameter to be initialized by the
* loop, so the call ends up in this block.
*
* We can initialize 'max' with a cpumask_first() call
* before the loop but the bits computation is not
* worth given the arch_scale_cpu_capacity() just
* returns a value where the resulting assembly code
* will be optimized by the compiler.
*/
max = arch_scale_cpu_capacity(cpu);
sum_util += sched_cpu_util(cpu, max);
}
/*
* In the improbable case where all the CPUs of the perf
* domain are offline, 'max' will be zero and will lead to an
* illegal operation with a zero division.
*/
return max ? (power * ((sum_util << 10) / max)) >> 10 : 0;
}
static u64 get_pd_power_uw(struct dtpm *dtpm) static u64 get_pd_power_uw(struct dtpm *dtpm)
{ {
struct dtpm_cpu *dtpm_cpu = dtpm->private; struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
struct em_perf_domain *pd; struct em_perf_domain *pd;
struct cpumask cpus; struct cpumask *pd_mask;
unsigned long freq; unsigned long freq;
int i, nr_cpus; int i;
pd = em_cpu_get(dtpm_cpu->cpu); pd = em_cpu_get(dtpm_cpu->cpu);
pd_mask = em_span_cpus(pd);
freq = cpufreq_quick_get(dtpm_cpu->cpu); freq = cpufreq_quick_get(dtpm_cpu->cpu);
cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
nr_cpus = cpumask_weight(&cpus);
for (i = 0; i < pd->nr_perf_states; i++) { for (i = 0; i < pd->nr_perf_states; i++) {
if (pd->table[i].frequency < freq) if (pd->table[i].frequency < freq)
continue; continue;
return pd->table[i].power * return scale_pd_power_uw(pd_mask, pd->table[i].power *
MICROWATT_PER_MILLIWATT * nr_cpus; MICROWATT_PER_MILLIWATT);
} }
return 0; return 0;
} }
static int update_pd_power_uw(struct dtpm *dtpm)
{
struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
struct em_perf_domain *em = em_cpu_get(dtpm_cpu->cpu);
struct cpumask cpus;
int nr_cpus;
cpumask_and(&cpus, cpu_online_mask, to_cpumask(em->cpus));
nr_cpus = cpumask_weight(&cpus);
dtpm->power_min = em->table[0].power;
dtpm->power_min *= MICROWATT_PER_MILLIWATT;
dtpm->power_min *= nr_cpus;
dtpm->power_max = em->table[em->nr_perf_states - 1].power;
dtpm->power_max *= MICROWATT_PER_MILLIWATT;
dtpm->power_max *= nr_cpus;
return 0;
}
static void pd_release(struct dtpm *dtpm) static void pd_release(struct dtpm *dtpm)
{ {
struct dtpm_cpu *dtpm_cpu = dtpm->private; struct dtpm_cpu *dtpm_cpu = to_dtpm_cpu(dtpm);
if (freq_qos_request_active(&dtpm_cpu->qos_req)) if (freq_qos_request_active(&dtpm_cpu->qos_req))
freq_qos_remove_request(&dtpm_cpu->qos_req); freq_qos_remove_request(&dtpm_cpu->qos_req);
...@@ -139,44 +158,28 @@ static void pd_release(struct dtpm *dtpm) ...@@ -139,44 +158,28 @@ static void pd_release(struct dtpm *dtpm)
} }
static struct dtpm_ops dtpm_ops = { static struct dtpm_ops dtpm_ops = {
.set_power_uw = set_pd_power_limit, .set_power_uw = set_pd_power_limit,
.get_power_uw = get_pd_power_uw, .get_power_uw = get_pd_power_uw,
.release = pd_release, .update_power_uw = update_pd_power_uw,
.release = pd_release,
}; };
static int cpuhp_dtpm_cpu_offline(unsigned int cpu) static int cpuhp_dtpm_cpu_offline(unsigned int cpu)
{ {
struct cpufreq_policy *policy;
struct em_perf_domain *pd; struct em_perf_domain *pd;
struct dtpm *dtpm; struct dtpm_cpu *dtpm_cpu;
policy = cpufreq_cpu_get(cpu);
if (!policy)
return 0;
pd = em_cpu_get(cpu); pd = em_cpu_get(cpu);
if (!pd) if (!pd)
return -EINVAL; return -EINVAL;
dtpm = per_cpu(dtpm_per_cpu, cpu); dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
power_sub(dtpm, pd); return dtpm_update_power(&dtpm_cpu->dtpm);
if (cpumask_weight(policy->cpus) != 1)
return 0;
for_each_cpu(cpu, policy->related_cpus)
per_cpu(dtpm_per_cpu, cpu) = NULL;
dtpm_unregister(dtpm);
return 0;
} }
static int cpuhp_dtpm_cpu_online(unsigned int cpu) static int cpuhp_dtpm_cpu_online(unsigned int cpu)
{ {
struct dtpm *dtpm;
struct dtpm_cpu *dtpm_cpu; struct dtpm_cpu *dtpm_cpu;
struct cpufreq_policy *policy; struct cpufreq_policy *policy;
struct em_perf_domain *pd; struct em_perf_domain *pd;
...@@ -184,7 +187,6 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu) ...@@ -184,7 +187,6 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
int ret = -ENOMEM; int ret = -ENOMEM;
policy = cpufreq_cpu_get(cpu); policy = cpufreq_cpu_get(cpu);
if (!policy) if (!policy)
return 0; return 0;
...@@ -192,66 +194,82 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu) ...@@ -192,66 +194,82 @@ static int cpuhp_dtpm_cpu_online(unsigned int cpu)
if (!pd) if (!pd)
return -EINVAL; return -EINVAL;
dtpm = per_cpu(dtpm_per_cpu, cpu); dtpm_cpu = per_cpu(dtpm_per_cpu, cpu);
if (dtpm) if (dtpm_cpu)
return power_add(dtpm, pd); return dtpm_update_power(&dtpm_cpu->dtpm);
dtpm = dtpm_alloc(&dtpm_ops);
if (!dtpm)
return -EINVAL;
dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL); dtpm_cpu = kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL);
if (!dtpm_cpu) if (!dtpm_cpu)
goto out_kfree_dtpm; return -ENOMEM;
dtpm->private = dtpm_cpu; dtpm_init(&dtpm_cpu->dtpm, &dtpm_ops);
dtpm_cpu->cpu = cpu; dtpm_cpu->cpu = cpu;
for_each_cpu(cpu, policy->related_cpus) for_each_cpu(cpu, policy->related_cpus)
per_cpu(dtpm_per_cpu, cpu) = dtpm; per_cpu(dtpm_per_cpu, cpu) = dtpm_cpu;
sprintf(name, "cpu%d", dtpm_cpu->cpu); snprintf(name, sizeof(name), "cpu%d-cpufreq", dtpm_cpu->cpu);
ret = dtpm_register(name, dtpm, __parent); ret = dtpm_register(name, &dtpm_cpu->dtpm, NULL);
if (ret) if (ret)
goto out_kfree_dtpm_cpu; goto out_kfree_dtpm_cpu;
ret = power_add(dtpm, pd);
if (ret)
goto out_dtpm_unregister;
ret = freq_qos_add_request(&policy->constraints, ret = freq_qos_add_request(&policy->constraints,
&dtpm_cpu->qos_req, FREQ_QOS_MAX, &dtpm_cpu->qos_req, FREQ_QOS_MAX,
pd->table[pd->nr_perf_states - 1].frequency); pd->table[pd->nr_perf_states - 1].frequency);
if (ret) if (ret)
goto out_power_sub; goto out_dtpm_unregister;
return 0; return 0;
out_power_sub:
power_sub(dtpm, pd);
out_dtpm_unregister: out_dtpm_unregister:
dtpm_unregister(dtpm); dtpm_unregister(&dtpm_cpu->dtpm);
dtpm_cpu = NULL; dtpm_cpu = NULL;
dtpm = NULL;
out_kfree_dtpm_cpu: out_kfree_dtpm_cpu:
for_each_cpu(cpu, policy->related_cpus) for_each_cpu(cpu, policy->related_cpus)
per_cpu(dtpm_per_cpu, cpu) = NULL; per_cpu(dtpm_per_cpu, cpu) = NULL;
kfree(dtpm_cpu); kfree(dtpm_cpu);
out_kfree_dtpm:
kfree(dtpm);
return ret; return ret;
} }
int dtpm_register_cpu(struct dtpm *parent) static int __init dtpm_cpu_init(void)
{ {
__parent = parent; int ret;
/*
* The callbacks at CPU hotplug time are calling
* dtpm_update_power() which in turns calls update_pd_power().
*
* The function update_pd_power() uses the online mask to
* figure out the power consumption limits.
*
* At CPUHP_AP_ONLINE_DYN, the CPU is present in the CPU
* online mask when the cpuhp_dtpm_cpu_online function is
* called, but the CPU is still in the online mask for the
* tear down callback. So the power can not be updated when
* the CPU is unplugged.
*
* At CPUHP_AP_DTPM_CPU_DEAD, the situation is the opposite as
* above. The CPU online mask is not up to date when the CPU
* is plugged in.
*
* For this reason, we need to call the online and offline
* callbacks at different moments when the CPU online mask is
* consistent with the power numbers we want to update.
*/
ret = cpuhp_setup_state(CPUHP_AP_DTPM_CPU_DEAD, "dtpm_cpu:offline",
NULL, cpuhp_dtpm_cpu_offline);
if (ret < 0)
return ret;
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "dtpm_cpu:online",
cpuhp_dtpm_cpu_online, NULL);
if (ret < 0)
return ret;
return cpuhp_setup_state(CPUHP_AP_DTPM_CPU_ONLINE, return 0;
"dtpm_cpu:online",
cpuhp_dtpm_cpu_online,
cpuhp_dtpm_cpu_offline);
} }
DTPM_DECLARE(dtpm_cpu, dtpm_cpu_init);
This diff is collapsed.
...@@ -99,6 +99,7 @@ enum cpuhp_state { ...@@ -99,6 +99,7 @@ enum cpuhp_state {
CPUHP_LUSTRE_CFS_DEAD, CPUHP_LUSTRE_CFS_DEAD,
CPUHP_AP_ARM_CACHE_B15_RAC_DEAD, CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
CPUHP_PADATA_DEAD, CPUHP_PADATA_DEAD,
CPUHP_AP_DTPM_CPU_DEAD,
CPUHP_WORKQUEUE_PREP, CPUHP_WORKQUEUE_PREP,
CPUHP_POWER_NUMA_PREPARE, CPUHP_POWER_NUMA_PREPARE,
CPUHP_HRTIMERS_PREPARE, CPUHP_HRTIMERS_PREPARE,
...@@ -246,7 +247,6 @@ enum cpuhp_state { ...@@ -246,7 +247,6 @@ enum cpuhp_state {
CPUHP_AP_MM_DEMOTION_ONLINE, CPUHP_AP_MM_DEMOTION_ONLINE,
CPUHP_AP_X86_HPET_ONLINE, CPUHP_AP_X86_HPET_ONLINE,
CPUHP_AP_X86_KVM_CLK_ONLINE, CPUHP_AP_X86_KVM_CLK_ONLINE,
CPUHP_AP_DTPM_CPU_ONLINE,
CPUHP_AP_ACTIVE, CPUHP_AP_ACTIVE,
CPUHP_ONLINE, CPUHP_ONLINE,
}; };
......
...@@ -23,34 +23,32 @@ struct dtpm { ...@@ -23,34 +23,32 @@ struct dtpm {
u64 power_max; u64 power_max;
u64 power_min; u64 power_min;
int weight; int weight;
void *private;
}; };
struct dtpm_ops { struct dtpm_ops {
u64 (*set_power_uw)(struct dtpm *, u64); u64 (*set_power_uw)(struct dtpm *, u64);
u64 (*get_power_uw)(struct dtpm *); u64 (*get_power_uw)(struct dtpm *);
int (*update_power_uw)(struct dtpm *);
void (*release)(struct dtpm *); void (*release)(struct dtpm *);
}; };
struct dtpm_descr; typedef int (*dtpm_init_t)(void);
typedef int (*dtpm_init_t)(struct dtpm_descr *);
struct dtpm_descr { struct dtpm_descr {
struct dtpm *parent;
const char *name;
dtpm_init_t init; dtpm_init_t init;
}; };
/* Init section thermal table */ /* Init section thermal table */
extern struct dtpm_descr *__dtpm_table[]; extern struct dtpm_descr __dtpm_table[];
extern struct dtpm_descr *__dtpm_table_end[]; extern struct dtpm_descr __dtpm_table_end[];
#define DTPM_TABLE_ENTRY(name) \ #define DTPM_TABLE_ENTRY(name, __init) \
static typeof(name) *__dtpm_table_entry_##name \ static struct dtpm_descr __dtpm_table_entry_##name \
__used __section("__dtpm_table") = &name __used __section("__dtpm_table") = { \
.init = __init, \
}
#define DTPM_DECLARE(name) DTPM_TABLE_ENTRY(name) #define DTPM_DECLARE(name, init) DTPM_TABLE_ENTRY(name, init)
#define for_each_dtpm_table(__dtpm) \ #define for_each_dtpm_table(__dtpm) \
for (__dtpm = __dtpm_table; \ for (__dtpm = __dtpm_table; \
...@@ -62,11 +60,11 @@ static inline struct dtpm *to_dtpm(struct powercap_zone *zone) ...@@ -62,11 +60,11 @@ static inline struct dtpm *to_dtpm(struct powercap_zone *zone)
return container_of(zone, struct dtpm, zone); return container_of(zone, struct dtpm, zone);
} }
int dtpm_update_power(struct dtpm *dtpm, u64 power_min, u64 power_max); int dtpm_update_power(struct dtpm *dtpm);
int dtpm_release_zone(struct powercap_zone *pcz); int dtpm_release_zone(struct powercap_zone *pcz);
struct dtpm *dtpm_alloc(struct dtpm_ops *ops); void dtpm_init(struct dtpm *dtpm, struct dtpm_ops *ops);
void dtpm_unregister(struct dtpm *dtpm); void dtpm_unregister(struct dtpm *dtpm);
......
...@@ -17,19 +17,30 @@ ...@@ -17,19 +17,30 @@
* device). It can be a total power: static and dynamic. * device). It can be a total power: static and dynamic.
* @cost: The cost coefficient associated with this level, used during * @cost: The cost coefficient associated with this level, used during
* energy calculation. Equal to: power * max_frequency / frequency * energy calculation. Equal to: power * max_frequency / frequency
* @flags: see "em_perf_state flags" description below.
*/ */
struct em_perf_state { struct em_perf_state {
unsigned long frequency; unsigned long frequency;
unsigned long power; unsigned long power;
unsigned long cost; unsigned long cost;
unsigned long flags;
}; };
/*
* em_perf_state flags:
*
* EM_PERF_STATE_INEFFICIENT: The performance state is inefficient. There is
* in this em_perf_domain, another performance state with a higher frequency
* but a lower or equal power cost. Such inefficient states are ignored when
* using em_pd_get_efficient_*() functions.
*/
#define EM_PERF_STATE_INEFFICIENT BIT(0)
/** /**
* struct em_perf_domain - Performance domain * struct em_perf_domain - Performance domain
* @table: List of performance states, in ascending order * @table: List of performance states, in ascending order
* @nr_perf_states: Number of performance states * @nr_perf_states: Number of performance states
* @milliwatts: Flag indicating the power values are in milli-Watts * @flags: See "em_perf_domain flags"
* or some other scale.
* @cpus: Cpumask covering the CPUs of the domain. It's here * @cpus: Cpumask covering the CPUs of the domain. It's here
* for performance reasons to avoid potential cache * for performance reasons to avoid potential cache
* misses during energy calculations in the scheduler * misses during energy calculations in the scheduler
...@@ -44,10 +55,22 @@ struct em_perf_state { ...@@ -44,10 +55,22 @@ struct em_perf_state {
struct em_perf_domain { struct em_perf_domain {
struct em_perf_state *table; struct em_perf_state *table;
int nr_perf_states; int nr_perf_states;
int milliwatts; unsigned long flags;
unsigned long cpus[]; unsigned long cpus[];
}; };
/*
* em_perf_domain flags:
*
* EM_PERF_DOMAIN_MILLIWATTS: The power values are in milli-Watts or some
* other scale.
*
* EM_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip inefficient states when estimating
* energy consumption.
*/
#define EM_PERF_DOMAIN_MILLIWATTS BIT(0)
#define EM_PERF_DOMAIN_SKIP_INEFFICIENCIES BIT(1)
#define em_span_cpus(em) (to_cpumask((em)->cpus)) #define em_span_cpus(em) (to_cpumask((em)->cpus))
#ifdef CONFIG_ENERGY_MODEL #ifdef CONFIG_ENERGY_MODEL
...@@ -101,6 +124,37 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, ...@@ -101,6 +124,37 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
bool milliwatts); bool milliwatts);
void em_dev_unregister_perf_domain(struct device *dev); void em_dev_unregister_perf_domain(struct device *dev);
/**
* em_pd_get_efficient_state() - Get an efficient performance state from the EM
* @pd : Performance domain for which we want an efficient frequency
* @freq : Frequency to map with the EM
*
* It is called from the scheduler code quite frequently and as a consequence
* doesn't implement any check.
*
* Return: An efficient performance state, high enough to meet @freq
* requirement.
*/
static inline
struct em_perf_state *em_pd_get_efficient_state(struct em_perf_domain *pd,
unsigned long freq)
{
struct em_perf_state *ps;
int i;
for (i = 0; i < pd->nr_perf_states; i++) {
ps = &pd->table[i];
if (ps->frequency >= freq) {
if (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES &&
ps->flags & EM_PERF_STATE_INEFFICIENT)
continue;
break;
}
}
return ps;
}
/** /**
* em_cpu_energy() - Estimates the energy consumed by the CPUs of a * em_cpu_energy() - Estimates the energy consumed by the CPUs of a
* performance domain * performance domain
...@@ -123,7 +177,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, ...@@ -123,7 +177,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
{ {
unsigned long freq, scale_cpu; unsigned long freq, scale_cpu;
struct em_perf_state *ps; struct em_perf_state *ps;
int i, cpu; int cpu;
if (!sum_util) if (!sum_util)
return 0; return 0;
...@@ -148,11 +202,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, ...@@ -148,11 +202,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
* Find the lowest performance state of the Energy Model above the * Find the lowest performance state of the Energy Model above the
* requested frequency. * requested frequency.
*/ */
for (i = 0; i < pd->nr_perf_states; i++) { ps = em_pd_get_efficient_state(pd, freq);
ps = &pd->table[i];
if (ps->frequency >= freq)
break;
}
/* /*
* The capacity of a CPU in the domain at the performance state (ps) * The capacity of a CPU in the domain at the performance state (ps)
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
/* /*
* Energy Model of devices * Energy Model of devices
* *
* Copyright (c) 2018-2020, Arm ltd. * Copyright (c) 2018-2021, Arm ltd.
* Written by: Quentin Perret, Arm ltd. * Written by: Quentin Perret, Arm ltd.
* Improvements provided by: Lukasz Luba, Arm ltd. * Improvements provided by: Lukasz Luba, Arm ltd.
*/ */
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#define pr_fmt(fmt) "energy_model: " fmt #define pr_fmt(fmt) "energy_model: " fmt
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <linux/debugfs.h> #include <linux/debugfs.h>
#include <linux/energy_model.h> #include <linux/energy_model.h>
...@@ -42,6 +43,7 @@ static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd) ...@@ -42,6 +43,7 @@ static void em_debug_create_ps(struct em_perf_state *ps, struct dentry *pd)
debugfs_create_ulong("frequency", 0444, d, &ps->frequency); debugfs_create_ulong("frequency", 0444, d, &ps->frequency);
debugfs_create_ulong("power", 0444, d, &ps->power); debugfs_create_ulong("power", 0444, d, &ps->power);
debugfs_create_ulong("cost", 0444, d, &ps->cost); debugfs_create_ulong("cost", 0444, d, &ps->cost);
debugfs_create_ulong("inefficient", 0444, d, &ps->flags);
} }
static int em_debug_cpus_show(struct seq_file *s, void *unused) static int em_debug_cpus_show(struct seq_file *s, void *unused)
...@@ -55,7 +57,8 @@ DEFINE_SHOW_ATTRIBUTE(em_debug_cpus); ...@@ -55,7 +57,8 @@ DEFINE_SHOW_ATTRIBUTE(em_debug_cpus);
static int em_debug_units_show(struct seq_file *s, void *unused) static int em_debug_units_show(struct seq_file *s, void *unused)
{ {
struct em_perf_domain *pd = s->private; struct em_perf_domain *pd = s->private;
char *units = pd->milliwatts ? "milliWatts" : "bogoWatts"; char *units = (pd->flags & EM_PERF_DOMAIN_MILLIWATTS) ?
"milliWatts" : "bogoWatts";
seq_printf(s, "%s\n", units); seq_printf(s, "%s\n", units);
...@@ -63,6 +66,17 @@ static int em_debug_units_show(struct seq_file *s, void *unused) ...@@ -63,6 +66,17 @@ static int em_debug_units_show(struct seq_file *s, void *unused)
} }
DEFINE_SHOW_ATTRIBUTE(em_debug_units); DEFINE_SHOW_ATTRIBUTE(em_debug_units);
static int em_debug_skip_inefficiencies_show(struct seq_file *s, void *unused)
{
struct em_perf_domain *pd = s->private;
int enabled = (pd->flags & EM_PERF_DOMAIN_SKIP_INEFFICIENCIES) ? 1 : 0;
seq_printf(s, "%d\n", enabled);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(em_debug_skip_inefficiencies);
static void em_debug_create_pd(struct device *dev) static void em_debug_create_pd(struct device *dev)
{ {
struct dentry *d; struct dentry *d;
...@@ -76,6 +90,8 @@ static void em_debug_create_pd(struct device *dev) ...@@ -76,6 +90,8 @@ static void em_debug_create_pd(struct device *dev)
&em_debug_cpus_fops); &em_debug_cpus_fops);
debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops); debugfs_create_file("units", 0444, d, dev->em_pd, &em_debug_units_fops);
debugfs_create_file("skip-inefficiencies", 0444, d, dev->em_pd,
&em_debug_skip_inefficiencies_fops);
/* Create a sub-directory for each performance state */ /* Create a sub-directory for each performance state */
for (i = 0; i < dev->em_pd->nr_perf_states; i++) for (i = 0; i < dev->em_pd->nr_perf_states; i++)
...@@ -107,8 +123,7 @@ static void em_debug_remove_pd(struct device *dev) {} ...@@ -107,8 +123,7 @@ static void em_debug_remove_pd(struct device *dev) {}
static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
int nr_states, struct em_data_callback *cb) int nr_states, struct em_data_callback *cb)
{ {
unsigned long opp_eff, prev_opp_eff = ULONG_MAX; unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
unsigned long power, freq, prev_freq = 0;
struct em_perf_state *table; struct em_perf_state *table;
int i, ret; int i, ret;
u64 fmax; u64 fmax;
...@@ -153,27 +168,22 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, ...@@ -153,27 +168,22 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
table[i].power = power; table[i].power = power;
table[i].frequency = prev_freq = freq; table[i].frequency = prev_freq = freq;
/*
* The hertz/watts efficiency ratio should decrease as the
* frequency grows on sane platforms. But this isn't always
* true in practice so warn the user if a higher OPP is more
* power efficient than a lower one.
*/
opp_eff = freq / power;
if (opp_eff >= prev_opp_eff)
dev_dbg(dev, "EM: hertz/watts ratio non-monotonically decreasing: em_perf_state %d >= em_perf_state%d\n",
i, i - 1);
prev_opp_eff = opp_eff;
} }
/* Compute the cost of each performance state. */ /* Compute the cost of each performance state. */
fmax = (u64) table[nr_states - 1].frequency; fmax = (u64) table[nr_states - 1].frequency;
for (i = 0; i < nr_states; i++) { for (i = nr_states - 1; i >= 0; i--) {
unsigned long power_res = em_scale_power(table[i].power); unsigned long power_res = em_scale_power(table[i].power);
table[i].cost = div64_u64(fmax * power_res, table[i].cost = div64_u64(fmax * power_res,
table[i].frequency); table[i].frequency);
if (table[i].cost >= prev_cost) {
table[i].flags = EM_PERF_STATE_INEFFICIENT;
dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
table[i].frequency);
} else {
prev_cost = table[i].cost;
}
} }
pd->table = table; pd->table = table;
...@@ -222,6 +232,43 @@ static int em_create_pd(struct device *dev, int nr_states, ...@@ -222,6 +232,43 @@ static int em_create_pd(struct device *dev, int nr_states,
return 0; return 0;
} }
static void em_cpufreq_update_efficiencies(struct device *dev)
{
struct em_perf_domain *pd = dev->em_pd;
struct em_perf_state *table;
struct cpufreq_policy *policy;
int found = 0;
int i;
if (!_is_cpu_device(dev) || !pd)
return;
policy = cpufreq_cpu_get(cpumask_first(em_span_cpus(pd)));
if (!policy) {
dev_warn(dev, "EM: Access to CPUFreq policy failed");
return;
}
table = pd->table;
for (i = 0; i < pd->nr_perf_states; i++) {
if (!(table[i].flags & EM_PERF_STATE_INEFFICIENT))
continue;
if (!cpufreq_table_set_inefficient(policy, table[i].frequency))
found++;
}
if (!found)
return;
/*
* Efficiencies have been installed in CPUFreq, inefficient frequencies
* will be skipped. The EM can do the same.
*/
pd->flags |= EM_PERF_DOMAIN_SKIP_INEFFICIENCIES;
}
/** /**
* em_pd_get() - Return the performance domain for a device * em_pd_get() - Return the performance domain for a device
* @dev : Device to find the performance domain for * @dev : Device to find the performance domain for
...@@ -335,7 +382,10 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, ...@@ -335,7 +382,10 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
if (ret) if (ret)
goto unlock; goto unlock;
dev->em_pd->milliwatts = milliwatts; if (milliwatts)
dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS;
em_cpufreq_update_efficiencies(dev);
em_debug_create_pd(dev); em_debug_create_pd(dev);
dev_info(dev, "EM: created perf domain\n"); dev_info(dev, "EM: created perf domain\n");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment