Commit b6e25117 authored by Rafael J. Wysocki's avatar Rafael J. Wysocki

Merge branch 'pm-cpufreq-sched' into pm-cpufreq

parents 9ad0a1b6 3ba7bcaa
......@@ -28,7 +28,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=m
CONFIG_CPU_FREQ_GOV_USERSPACE=m
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m
CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
CONFIG_CPUFREQ_DT=y
CONFIG_CPU_IDLE=y
CONFIG_ARM_EXYNOS_CPUIDLE=y
......
......@@ -135,7 +135,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=m
CONFIG_CPU_FREQ_GOV_USERSPACE=m
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m
CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
CONFIG_QORIQ_CPUFREQ=y
CONFIG_CPU_IDLE=y
CONFIG_ARM_CPUIDLE=y
......
......@@ -194,7 +194,7 @@ config CPU_FREQ_GOV_CONSERVATIVE
If in doubt, say N.
config CPU_FREQ_GOV_SCHEDUTIL
tristate "'schedutil' cpufreq policy governor"
bool "'schedutil' cpufreq policy governor"
depends on CPU_FREQ && SMP
select CPU_FREQ_GOV_ATTR_SET
select IRQ_WORK
......@@ -208,9 +208,6 @@ config CPU_FREQ_GOV_SCHEDUTIL
frequency tipping point is at utilization/capacity equal to 80% in
both cases.
To compile this driver as a module, choose M here: the module will
be called cpufreq_schedutil.
If in doubt, say N.
comment "CPU frequency scaling drivers"
......
......@@ -260,7 +260,7 @@ static void dbs_irq_work(struct irq_work *irq_work)
}
static void dbs_update_util_handler(struct update_util_data *data, u64 time,
unsigned long util, unsigned long max)
unsigned int flags)
{
struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util);
struct policy_dbs_info *policy_dbs = cdbs->policy_dbs;
......
......@@ -181,6 +181,8 @@ struct _pid {
* @cpu: CPU number for this instance data
* @update_util: CPUFreq utility callback information
* @update_util_set: CPUFreq utility callback is set
* @iowait_boost: iowait-related boost fraction
* @last_update: Time of the last update.
* @pstate: Stores P state limits for this CPU
* @vid: Stores VID limits for this CPU
* @pid: Stores PID parameters for this CPU
......@@ -206,6 +208,7 @@ struct cpudata {
struct vid_data vid;
struct _pid pid;
u64 last_update;
u64 last_sample_time;
u64 prev_aperf;
u64 prev_mperf;
......@@ -216,6 +219,7 @@ struct cpudata {
struct acpi_processor_performance acpi_perf_data;
bool valid_pss_table;
#endif
unsigned int iowait_boost;
};
static struct cpudata **all_cpu_data;
......@@ -229,6 +233,7 @@ static struct cpudata **all_cpu_data;
* @p_gain_pct: PID proportional gain
* @i_gain_pct: PID integral gain
* @d_gain_pct: PID derivative gain
* @boost_iowait: Whether or not to use iowait boosting.
*
* Stores per CPU model static PID configuration data.
*/
......@@ -240,6 +245,7 @@ struct pstate_adjust_policy {
int p_gain_pct;
int d_gain_pct;
int i_gain_pct;
bool boost_iowait;
};
/**
......@@ -1037,6 +1043,7 @@ static const struct cpu_defaults silvermont_params = {
.p_gain_pct = 14,
.d_gain_pct = 0,
.i_gain_pct = 4,
.boost_iowait = true,
},
.funcs = {
.get_max = atom_get_max_pstate,
......@@ -1058,6 +1065,7 @@ static const struct cpu_defaults airmont_params = {
.p_gain_pct = 14,
.d_gain_pct = 0,
.i_gain_pct = 4,
.boost_iowait = true,
},
.funcs = {
.get_max = atom_get_max_pstate,
......@@ -1099,6 +1107,7 @@ static const struct cpu_defaults bxt_params = {
.p_gain_pct = 14,
.d_gain_pct = 0,
.i_gain_pct = 4,
.boost_iowait = true,
},
.funcs = {
.get_max = core_get_max_pstate,
......@@ -1222,36 +1231,18 @@ static inline int32_t get_avg_pstate(struct cpudata *cpu)
static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
{
struct sample *sample = &cpu->sample;
u64 cummulative_iowait, delta_iowait_us;
u64 delta_iowait_mperf;
u64 mperf, now;
int32_t cpu_load;
int32_t busy_frac, boost;
cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now);
busy_frac = div_fp(sample->mperf, sample->tsc);
/*
* Convert iowait time into number of IO cycles spent at max_freq.
* IO is considered as busy only for the cpu_load algorithm. For
* performance this is not needed since we always try to reach the
* maximum P-State, so we are already boosting the IOs.
*/
delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait;
delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling *
cpu->pstate.max_pstate, MSEC_PER_SEC);
boost = cpu->iowait_boost;
cpu->iowait_boost >>= 1;
mperf = cpu->sample.mperf + delta_iowait_mperf;
cpu->prev_cummulative_iowait = cummulative_iowait;
if (busy_frac < boost)
busy_frac = boost;
/*
* The load can be estimated as the ratio of the mperf counter
* running at a constant frequency during active periods
* (C0) and the time stamp counter running at the same frequency
* also during C-states.
*/
cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
cpu->sample.busy_scaled = cpu_load;
return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load);
sample->busy_scaled = busy_frac * 100;
return get_avg_pstate(cpu) - pid_calc(&cpu->pid, sample->busy_scaled);
}
static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
......@@ -1325,15 +1316,29 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
sample->mperf,
sample->aperf,
sample->tsc,
get_avg_frequency(cpu));
get_avg_frequency(cpu),
fp_toint(cpu->iowait_boost * 100));
}
static void intel_pstate_update_util(struct update_util_data *data, u64 time,
unsigned long util, unsigned long max)
unsigned int flags)
{
struct cpudata *cpu = container_of(data, struct cpudata, update_util);
u64 delta_ns = time - cpu->sample.time;
u64 delta_ns;
if (pid_params.boost_iowait) {
if (flags & SCHED_CPUFREQ_IOWAIT) {
cpu->iowait_boost = int_tofp(1);
} else if (cpu->iowait_boost) {
/* Clear iowait_boost if the CPU may have been idle. */
delta_ns = time - cpu->last_update;
if (delta_ns > TICK_NSEC)
cpu->iowait_boost = 0;
}
cpu->last_update = time;
}
delta_ns = time - cpu->sample.time;
if ((s64)delta_ns >= pid_params.sample_rate_ns) {
bool sample_taken = intel_pstate_sample(cpu, time);
......
......@@ -3469,15 +3469,20 @@ static inline unsigned long rlimit_max(unsigned int limit)
return task_rlimit_max(current, limit);
}
#define SCHED_CPUFREQ_RT (1U << 0)
#define SCHED_CPUFREQ_DL (1U << 1)
#define SCHED_CPUFREQ_IOWAIT (1U << 2)
#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
#ifdef CONFIG_CPU_FREQ
struct update_util_data {
void (*func)(struct update_util_data *data,
u64 time, unsigned long util, unsigned long max);
void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
};
void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
void (*func)(struct update_util_data *data, u64 time,
unsigned long util, unsigned long max));
unsigned int flags));
void cpufreq_remove_update_util_hook(int cpu);
#endif /* CONFIG_CPU_FREQ */
......
......@@ -69,7 +69,8 @@ TRACE_EVENT(pstate_sample,
u64 mperf,
u64 aperf,
u64 tsc,
u32 freq
u32 freq,
u32 io_boost
),
TP_ARGS(core_busy,
......@@ -79,7 +80,8 @@ TRACE_EVENT(pstate_sample,
mperf,
aperf,
tsc,
freq
freq,
io_boost
),
TP_STRUCT__entry(
......@@ -91,6 +93,7 @@ TRACE_EVENT(pstate_sample,
__field(u64, aperf)
__field(u64, tsc)
__field(u32, freq)
__field(u32, io_boost)
),
TP_fast_assign(
......@@ -102,9 +105,10 @@ TRACE_EVENT(pstate_sample,
__entry->aperf = aperf;
__entry->tsc = tsc;
__entry->freq = freq;
__entry->io_boost = io_boost;
),
TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu ",
TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu io_boost=%lu",
(unsigned long)__entry->core_busy,
(unsigned long)__entry->scaled_busy,
(unsigned long)__entry->from,
......@@ -112,7 +116,8 @@ TRACE_EVENT(pstate_sample,
(unsigned long long)__entry->mperf,
(unsigned long long)__entry->aperf,
(unsigned long long)__entry->tsc,
(unsigned long)__entry->freq
(unsigned long)__entry->freq,
(unsigned long)__entry->io_boost
)
);
......
......@@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
*/
void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
void (*func)(struct update_util_data *data, u64 time,
unsigned long util, unsigned long max))
unsigned int flags))
{
if (WARN_ON(!data || !func))
return;
......
......@@ -12,7 +12,6 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/cpufreq.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <trace/events/power.h>
......@@ -48,11 +47,14 @@ struct sugov_cpu {
struct sugov_policy *sg_policy;
unsigned int cached_raw_freq;
unsigned long iowait_boost;
unsigned long iowait_boost_max;
u64 last_update;
/* The fields below are only needed when sharing a policy. */
unsigned long util;
unsigned long max;
u64 last_update;
unsigned int flags;
};
static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
......@@ -144,24 +146,75 @@ static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util,
return cpufreq_driver_resolve_freq(policy, freq);
}
static void sugov_get_util(unsigned long *util, unsigned long *max)
{
struct rq *rq = this_rq();
unsigned long cfs_max;
cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id());
*util = min(rq->cfs.avg.util_avg, cfs_max);
*max = cfs_max;
}
static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
unsigned int flags)
{
if (flags & SCHED_CPUFREQ_IOWAIT) {
sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
} else if (sg_cpu->iowait_boost) {
s64 delta_ns = time - sg_cpu->last_update;
/* Clear iowait_boost if the CPU apprears to have been idle. */
if (delta_ns > TICK_NSEC)
sg_cpu->iowait_boost = 0;
}
}
static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
unsigned long *max)
{
unsigned long boost_util = sg_cpu->iowait_boost;
unsigned long boost_max = sg_cpu->iowait_boost_max;
if (!boost_util)
return;
if (*util * boost_max < *max * boost_util) {
*util = boost_util;
*max = boost_max;
}
sg_cpu->iowait_boost >>= 1;
}
static void sugov_update_single(struct update_util_data *hook, u64 time,
unsigned long util, unsigned long max)
unsigned int flags)
{
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
struct cpufreq_policy *policy = sg_policy->policy;
unsigned long util, max;
unsigned int next_f;
sugov_set_iowait_boost(sg_cpu, time, flags);
sg_cpu->last_update = time;
if (!sugov_should_update_freq(sg_policy, time))
return;
next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq :
get_next_freq(sg_cpu, util, max);
if (flags & SCHED_CPUFREQ_RT_DL) {
next_f = policy->cpuinfo.max_freq;
} else {
sugov_get_util(&util, &max);
sugov_iowait_boost(sg_cpu, &util, &max);
next_f = get_next_freq(sg_cpu, util, max);
}
sugov_update_commit(sg_policy, time, next_f);
}
static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
unsigned long util, unsigned long max)
unsigned long util, unsigned long max,
unsigned int flags)
{
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
struct cpufreq_policy *policy = sg_policy->policy;
......@@ -169,9 +222,11 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
u64 last_freq_update_time = sg_policy->last_freq_update_time;
unsigned int j;
if (util == ULONG_MAX)
if (flags & SCHED_CPUFREQ_RT_DL)
return max_f;
sugov_iowait_boost(sg_cpu, &util, &max);
for_each_cpu(j, policy->cpus) {
struct sugov_cpu *j_sg_cpu;
unsigned long j_util, j_max;
......@@ -186,41 +241,50 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
* frequency update and the time elapsed between the last update
* of the CPU utilization and the last frequency update is long
* enough, don't take the CPU into account as it probably is
* idle now.
* idle now (and clear iowait_boost for it).
*/
delta_ns = last_freq_update_time - j_sg_cpu->last_update;
if (delta_ns > TICK_NSEC)
if (delta_ns > TICK_NSEC) {
j_sg_cpu->iowait_boost = 0;
continue;
j_util = j_sg_cpu->util;
if (j_util == ULONG_MAX)
}
if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
return max_f;
j_util = j_sg_cpu->util;
j_max = j_sg_cpu->max;
if (j_util * max > j_max * util) {
util = j_util;
max = j_max;
}
sugov_iowait_boost(j_sg_cpu, &util, &max);
}
return get_next_freq(sg_cpu, util, max);
}
static void sugov_update_shared(struct update_util_data *hook, u64 time,
unsigned long util, unsigned long max)
unsigned int flags)
{
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
unsigned long util, max;
unsigned int next_f;
sugov_get_util(&util, &max);
raw_spin_lock(&sg_policy->update_lock);
sg_cpu->util = util;
sg_cpu->max = max;
sg_cpu->flags = flags;
sugov_set_iowait_boost(sg_cpu, time, flags);
sg_cpu->last_update = time;
if (sugov_should_update_freq(sg_policy, time)) {
next_f = sugov_next_freq_shared(sg_cpu, util, max);
next_f = sugov_next_freq_shared(sg_cpu, util, max, flags);
sugov_update_commit(sg_policy, time, next_f);
}
......@@ -444,10 +508,13 @@ static int sugov_start(struct cpufreq_policy *policy)
sg_cpu->sg_policy = sg_policy;
if (policy_is_shared(policy)) {
sg_cpu->util = ULONG_MAX;
sg_cpu->util = 0;
sg_cpu->max = 0;
sg_cpu->flags = SCHED_CPUFREQ_RT;
sg_cpu->last_update = 0;
sg_cpu->cached_raw_freq = 0;
sg_cpu->iowait_boost = 0;
sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
sugov_update_shared);
} else {
......@@ -495,28 +562,15 @@ static struct cpufreq_governor schedutil_gov = {
.limits = sugov_limits,
};
static int __init sugov_module_init(void)
{
return cpufreq_register_governor(&schedutil_gov);
}
static void __exit sugov_module_exit(void)
{
cpufreq_unregister_governor(&schedutil_gov);
}
MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>");
MODULE_DESCRIPTION("Utilization-based CPU frequency selection");
MODULE_LICENSE("GPL");
#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
struct cpufreq_governor *cpufreq_default_governor(void)
{
return &schedutil_gov;
}
fs_initcall(sugov_module_init);
#else
module_init(sugov_module_init);
#endif
module_exit(sugov_module_exit);
static int __init sugov_register(void)
{
return cpufreq_register_governor(&schedutil_gov);
}
fs_initcall(sugov_register);
......@@ -735,9 +735,8 @@ static void update_curr_dl(struct rq *rq)
return;
}
/* kick cpufreq (see the comment in linux/cpufreq.h). */
if (cpu_of(rq) == smp_processor_id())
cpufreq_trigger_update(rq_clock(rq));
/* kick cpufreq (see the comment in kernel/sched/sched.h). */
cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL);
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
......
......@@ -2875,12 +2875,7 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
{
struct rq *rq = rq_of(cfs_rq);
int cpu = cpu_of(rq);
if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
unsigned long max = rq->cpu_capacity_orig;
if (&this_rq()->cfs == cfs_rq) {
/*
* There are a few boundary cases this might miss but it should
* get called often enough that that should (hopefully) not be
......@@ -2897,8 +2892,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
*
* See cpu_util().
*/
cpufreq_update_util(rq_clock(rq),
min(cfs_rq->avg.util_avg, max), max);
cpufreq_update_util(rq_of(cfs_rq), 0);
}
}
......@@ -3159,10 +3153,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
static inline void update_load_avg(struct sched_entity *se, int not_used)
{
struct cfs_rq *cfs_rq = cfs_rq_of(se);
struct rq *rq = rq_of(cfs_rq);
cpufreq_trigger_update(rq_clock(rq));
cpufreq_update_util(rq_of(cfs_rq_of(se)), 0);
}
static inline void
......@@ -4509,6 +4500,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
/*
* If in_iowait is set, the code below may not trigger any cpufreq
* utilization updates, so do it here explicitly with the IOWAIT flag
* passed.
*/
if (p->in_iowait)
cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT);
for_each_sched_entity(se) {
if (se->on_rq)
break;
......
......@@ -957,9 +957,8 @@ static void update_curr_rt(struct rq *rq)
if (unlikely((s64)delta_exec <= 0))
return;
/* Kick cpufreq (see the comment in linux/cpufreq.h). */
if (cpu_of(rq) == smp_processor_id())
cpufreq_trigger_update(rq_clock(rq));
/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
schedstat_set(curr->se.statistics.exec_max,
max(curr->se.statistics.exec_max, delta_exec));
......
......@@ -1763,27 +1763,13 @@ DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
/**
* cpufreq_update_util - Take a note about CPU utilization changes.
* @time: Current time.
* @util: Current utilization.
* @max: Utilization ceiling.
* @rq: Runqueue to carry out the update for.
* @flags: Update reason flags.
*
* This function is called by the scheduler on every invocation of
* update_load_avg() on the CPU whose utilization is being updated.
* This function is called by the scheduler on the CPU whose utilization is
* being updated.
*
* It can only be called from RCU-sched read-side critical sections.
*/
static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
{
struct update_util_data *data;
data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
if (data)
data->func(data, time, util, max);
}
/**
* cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
* @time: Current time.
*
* The way cpufreq is currently arranged requires it to evaluate the CPU
* performance state (frequency/voltage) on a regular basis to prevent it from
......@@ -1797,13 +1783,23 @@ static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned lo
* but that really is a band-aid. Going forward it should be replaced with
* solutions targeted more specifically at RT and DL tasks.
*/
static inline void cpufreq_trigger_update(u64 time)
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
{
struct update_util_data *data;
data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
if (data)
data->func(data, rq_clock(rq), flags);
}
static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
{
cpufreq_update_util(time, ULONG_MAX, 0);
if (cpu_of(rq) == smp_processor_id())
cpufreq_update_util(rq, flags);
}
#else
static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {}
static inline void cpufreq_trigger_update(u64 time) {}
static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
#endif /* CONFIG_CPU_FREQ */
#ifdef arch_scale_freq_capacity
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment