Commit 9942cb22 authored by Vincent Guittot's avatar Vincent Guittot Committed by Ingo Molnar

sched/topology: Add a new arch_scale_freq_ref() method

Create a new method to get a unique and fixed max frequency. Currently
cpuinfo.max_freq or the highest (or last) state of performance domain are
used as the max frequency when computing the frequency for a level of
utilization, but:

  - cpuinfo_max_freq can change at runtime. boost is one example of
    such change.

  - cpuinfo.max_freq and last item of the PD can be different leading to
    different results between cpufreq and energy model.

We need to save the reference frequency that has been used when computing
the CPUs capacity and use this fixed and coherent value to convert between
frequency and CPU's capacity.

In fact, we already save the frequency that has been used when computing
the capacity of each CPU. We extend the precision to save kHz instead of
MHz currently and we modify the type to be aligned with other variables
used when converting frequency to capacity and the other way.

[ mingo: Minor edits. ]
Signed-off-by: default avatarVincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Tested-by: default avatarLukasz Luba <lukasz.luba@arm.com>
Reviewed-by: default avatarLukasz Luba <lukasz.luba@arm.com>
Acked-by: default avatarSudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20231211104855.558096-2-vincent.guittot@linaro.org
parent d2e9f53a
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#define arch_set_freq_scale topology_set_freq_scale #define arch_set_freq_scale topology_set_freq_scale
#define arch_scale_freq_capacity topology_get_freq_scale #define arch_scale_freq_capacity topology_get_freq_scale
#define arch_scale_freq_invariant topology_scale_freq_invariant #define arch_scale_freq_invariant topology_scale_freq_invariant
#define arch_scale_freq_ref topology_get_freq_ref
#endif #endif
/* Replace task scheduler's default cpu-invariant accounting */ /* Replace task scheduler's default cpu-invariant accounting */
......
...@@ -23,6 +23,7 @@ void update_freq_counters_refs(void); ...@@ -23,6 +23,7 @@ void update_freq_counters_refs(void);
#define arch_set_freq_scale topology_set_freq_scale #define arch_set_freq_scale topology_set_freq_scale
#define arch_scale_freq_capacity topology_get_freq_scale #define arch_scale_freq_capacity topology_get_freq_scale
#define arch_scale_freq_invariant topology_scale_freq_invariant #define arch_scale_freq_invariant topology_scale_freq_invariant
#define arch_scale_freq_ref topology_get_freq_ref
#ifdef CONFIG_ACPI_CPPC_LIB #ifdef CONFIG_ACPI_CPPC_LIB
#define arch_init_invariance_cppc topology_init_cpu_capacity_cppc #define arch_init_invariance_cppc topology_init_cpu_capacity_cppc
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#define arch_set_freq_scale topology_set_freq_scale #define arch_set_freq_scale topology_set_freq_scale
#define arch_scale_freq_capacity topology_get_freq_scale #define arch_scale_freq_capacity topology_get_freq_scale
#define arch_scale_freq_invariant topology_scale_freq_invariant #define arch_scale_freq_invariant topology_scale_freq_invariant
#define arch_scale_freq_ref topology_get_freq_ref
/* Replace task scheduler's default cpu-invariant accounting */ /* Replace task scheduler's default cpu-invariant accounting */
#define arch_scale_cpu_capacity topology_get_cpu_scale #define arch_scale_cpu_capacity topology_get_cpu_scale
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/units.h>
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/thermal_pressure.h> #include <trace/events/thermal_pressure.h>
...@@ -26,7 +27,8 @@ ...@@ -26,7 +27,8 @@
static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data); static DEFINE_PER_CPU(struct scale_freq_data __rcu *, sft_data);
static struct cpumask scale_freq_counters_mask; static struct cpumask scale_freq_counters_mask;
static bool scale_freq_invariant; static bool scale_freq_invariant;
static DEFINE_PER_CPU(u32, freq_factor) = 1; DEFINE_PER_CPU(unsigned long, capacity_freq_ref) = 1;
EXPORT_PER_CPU_SYMBOL_GPL(capacity_freq_ref);
static bool supports_scale_freq_counters(const struct cpumask *cpus) static bool supports_scale_freq_counters(const struct cpumask *cpus)
{ {
...@@ -170,9 +172,9 @@ DEFINE_PER_CPU(unsigned long, thermal_pressure); ...@@ -170,9 +172,9 @@ DEFINE_PER_CPU(unsigned long, thermal_pressure);
* operating on stale data when hot-plug is used for some CPUs. The * operating on stale data when hot-plug is used for some CPUs. The
* @capped_freq reflects the currently allowed max CPUs frequency due to * @capped_freq reflects the currently allowed max CPUs frequency due to
* thermal capping. It might be also a boost frequency value, which is bigger * thermal capping. It might be also a boost frequency value, which is bigger
* than the internal 'freq_factor' max frequency. In such case the pressure * than the internal 'capacity_freq_ref' max frequency. In such case the
* value should simply be removed, since this is an indication that there is * pressure value should simply be removed, since this is an indication that
* no thermal throttling. The @capped_freq must be provided in kHz. * there is no thermal throttling. The @capped_freq must be provided in kHz.
*/ */
void topology_update_thermal_pressure(const struct cpumask *cpus, void topology_update_thermal_pressure(const struct cpumask *cpus,
unsigned long capped_freq) unsigned long capped_freq)
...@@ -183,10 +185,7 @@ void topology_update_thermal_pressure(const struct cpumask *cpus, ...@@ -183,10 +185,7 @@ void topology_update_thermal_pressure(const struct cpumask *cpus,
cpu = cpumask_first(cpus); cpu = cpumask_first(cpus);
max_capacity = arch_scale_cpu_capacity(cpu); max_capacity = arch_scale_cpu_capacity(cpu);
max_freq = per_cpu(freq_factor, cpu); max_freq = arch_scale_freq_ref(cpu);
/* Convert to MHz scale which is used in 'freq_factor' */
capped_freq /= 1000;
/* /*
* Handle properly the boost frequencies, which should simply clean * Handle properly the boost frequencies, which should simply clean
...@@ -279,13 +278,13 @@ void topology_normalize_cpu_scale(void) ...@@ -279,13 +278,13 @@ void topology_normalize_cpu_scale(void)
capacity_scale = 1; capacity_scale = 1;
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu); capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu);
capacity_scale = max(capacity, capacity_scale); capacity_scale = max(capacity, capacity_scale);
} }
pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale); pr_debug("cpu_capacity: capacity_scale=%llu\n", capacity_scale);
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
capacity = raw_capacity[cpu] * per_cpu(freq_factor, cpu); capacity = raw_capacity[cpu] * per_cpu(capacity_freq_ref, cpu);
capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT, capacity = div64_u64(capacity << SCHED_CAPACITY_SHIFT,
capacity_scale); capacity_scale);
topology_set_cpu_scale(cpu, capacity); topology_set_cpu_scale(cpu, capacity);
...@@ -321,15 +320,15 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu) ...@@ -321,15 +320,15 @@ bool __init topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu)
cpu_node, raw_capacity[cpu]); cpu_node, raw_capacity[cpu]);
/* /*
* Update freq_factor for calculating early boot cpu capacities. * Update capacity_freq_ref for calculating early boot CPU capacities.
* For non-clk CPU DVFS mechanism, there's no way to get the * For non-clk CPU DVFS mechanism, there's no way to get the
* frequency value now, assuming they are running at the same * frequency value now, assuming they are running at the same
* frequency (by keeping the initial freq_factor value). * frequency (by keeping the initial capacity_freq_ref value).
*/ */
cpu_clk = of_clk_get(cpu_node, 0); cpu_clk = of_clk_get(cpu_node, 0);
if (!PTR_ERR_OR_ZERO(cpu_clk)) { if (!PTR_ERR_OR_ZERO(cpu_clk)) {
per_cpu(freq_factor, cpu) = per_cpu(capacity_freq_ref, cpu) =
clk_get_rate(cpu_clk) / 1000; clk_get_rate(cpu_clk) / HZ_PER_KHZ;
clk_put(cpu_clk); clk_put(cpu_clk);
} }
} else { } else {
...@@ -411,7 +410,7 @@ init_cpu_capacity_callback(struct notifier_block *nb, ...@@ -411,7 +410,7 @@ init_cpu_capacity_callback(struct notifier_block *nb,
cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus); cpumask_andnot(cpus_to_visit, cpus_to_visit, policy->related_cpus);
for_each_cpu(cpu, policy->related_cpus) for_each_cpu(cpu, policy->related_cpus)
per_cpu(freq_factor, cpu) = policy->cpuinfo.max_freq / 1000; per_cpu(capacity_freq_ref, cpu) = policy->cpuinfo.max_freq;
if (cpumask_empty(cpus_to_visit)) { if (cpumask_empty(cpus_to_visit)) {
topology_normalize_cpu_scale(); topology_normalize_cpu_scale();
......
...@@ -27,6 +27,13 @@ static inline unsigned long topology_get_cpu_scale(int cpu) ...@@ -27,6 +27,13 @@ static inline unsigned long topology_get_cpu_scale(int cpu)
void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity);
DECLARE_PER_CPU(unsigned long, capacity_freq_ref);
static inline unsigned long topology_get_freq_ref(int cpu)
{
return per_cpu(capacity_freq_ref, cpu);
}
DECLARE_PER_CPU(unsigned long, arch_freq_scale); DECLARE_PER_CPU(unsigned long, arch_freq_scale);
static inline unsigned long topology_get_freq_scale(int cpu) static inline unsigned long topology_get_freq_scale(int cpu)
......
...@@ -279,6 +279,14 @@ void arch_update_thermal_pressure(const struct cpumask *cpus, ...@@ -279,6 +279,14 @@ void arch_update_thermal_pressure(const struct cpumask *cpus,
{ } { }
#endif #endif
#ifndef arch_scale_freq_ref
static __always_inline
unsigned int arch_scale_freq_ref(int cpu)
{
return 0;
}
#endif
static inline int task_node(const struct task_struct *p) static inline int task_node(const struct task_struct *p)
{ {
return cpu_to_node(task_cpu(p)); return cpu_to_node(task_cpu(p));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment