Commit 88817acb authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'pm-6.1-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull power management fixes from Rafael Wysocki:
 "These revert a recent change in the schedutil cpufreq governor that
  had not been expected to make any functional difference, but turned
  out to introduce a performance regression, fix an initialization issue
  in the amd-pstate driver and make it actually replace the venerable
  ACPI cpufreq driver on the supported systems by default.

  Specifics:

   - Revert a recent schedutil cpufreq governor change that introduced a
     performace regression on Pixel 6 (Sam Wu)

   - Fix amd-pstate driver initialization after running the kernel via
     kexec (Wyes Karny)

   - Turn amd-pstate into a built-in driver which allows it to take
     precedence over acpi-cpufreq by default on supported systems and
     amend it with a mechanism to disable this behavior (Perry Yuan)

   - Update amd-pstate documentation in accordance with the other
     changes made to it (Perry Yuan)"

* tag 'pm-6.1-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  Documentation: add amd-pstate kernel command line options
  Documentation: amd-pstate: add driver working mode introduction
  cpufreq: amd-pstate: add amd-pstate driver parameter for mode selection
  cpufreq: amd-pstate: change amd-pstate driver to be built-in type
  cpufreq: amd-pstate: cpufreq: amd-pstate: reset MSR_AMD_PERF_CTL register at init
  Revert "cpufreq: schedutil: Move max CPU capacity to sugov_policy"
parents e3ebac80 1056d314
...@@ -6959,3 +6959,14 @@ ...@@ -6959,3 +6959,14 @@
memory, and other data can't be written using memory, and other data can't be written using
xmon commands. xmon commands.
off xmon is disabled. off xmon is disabled.
amd_pstate= [X86]
disable
Do not enable amd_pstate as the default
scaling driver for the supported processors
passive
Use amd_pstate as a scaling driver, driver requests a
desired performance on this abstract scale and the power
management firmware translates the requests into actual
hardware states (core frequency, data fabric and memory
clocks etc.)
...@@ -283,23 +283,19 @@ efficiency frequency management method on AMD processors. ...@@ -283,23 +283,19 @@ efficiency frequency management method on AMD processors.
Kernel Module Options for ``amd-pstate`` Kernel Module Options for ``amd-pstate``
========================================= =========================================
.. _shared_mem: Passive Mode
------------
``shared_mem``
Use a module param (shared_mem) to enable related processors manually with ``amd_pstate=passive``
**amd_pstate.shared_mem=1**.
Due to the performance issue on the processors with `Shared Memory Support It will be enabled if the ``amd_pstate=passive`` is passed to the kernel in the command line.
<perf_cap_>`_, we disable it presently and will re-enable this by default In this mode, ``amd_pstate`` driver software specifies a desired QoS target in the CPPC
once we address performance issue with this solution. performance scale as a relative number. This can be expressed as percentage of nominal
performance (infrastructure max). Below the nominal sustained performance level,
To check whether the current processor is using `Full MSR Support <perf_cap_>`_ desired performance expresses the average performance level of the processor subject
or `Shared Memory Support <perf_cap_>`_ : :: to the Performance Reduction Tolerance register. Above the nominal performance level,
processor must provide at least nominal performance requested and go higher if current
ray@hr-test1:~$ lscpu | grep cppc operating conditions allow.
Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd cppc arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm
If the CPU flags have ``cppc``, then this processor supports `Full MSR Support
<perf_cap_>`_. Otherwise, it supports `Shared Memory Support <perf_cap_>`_.
``cpupower`` tool support for ``amd-pstate`` ``cpupower`` tool support for ``amd-pstate``
......
...@@ -35,7 +35,7 @@ config X86_PCC_CPUFREQ ...@@ -35,7 +35,7 @@ config X86_PCC_CPUFREQ
If in doubt, say N. If in doubt, say N.
config X86_AMD_PSTATE config X86_AMD_PSTATE
tristate "AMD Processor P-State driver" bool "AMD Processor P-State driver"
depends on X86 && ACPI depends on X86 && ACPI
select ACPI_PROCESSOR select ACPI_PROCESSOR
select ACPI_CPPC_LIB if X86_64 select ACPI_CPPC_LIB if X86_64
......
...@@ -59,12 +59,8 @@ ...@@ -59,12 +59,8 @@
* we disable it by default to go acpi-cpufreq on these processors and add a * we disable it by default to go acpi-cpufreq on these processors and add a
* module parameter to be able to enable it manually for debugging. * module parameter to be able to enable it manually for debugging.
*/ */
static bool shared_mem = false;
module_param(shared_mem, bool, 0444);
MODULE_PARM_DESC(shared_mem,
"enable amd-pstate on processors with shared memory solution (false = disabled (default), true = enabled)");
static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_driver;
static int cppc_load __initdata;
static inline int pstate_enable(bool enable) static inline int pstate_enable(bool enable)
{ {
...@@ -424,12 +420,22 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata) ...@@ -424,12 +420,22 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
amd_pstate_driver.boost_enabled = true; amd_pstate_driver.boost_enabled = true;
} }
static void amd_perf_ctl_reset(unsigned int cpu)
{
wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
}
static int amd_pstate_cpu_init(struct cpufreq_policy *policy) static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
{ {
int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
struct device *dev; struct device *dev;
struct amd_cpudata *cpudata; struct amd_cpudata *cpudata;
/*
* Resetting PERF_CTL_MSR will put the CPU in P0 frequency,
* which is ideal for initialization process.
*/
amd_perf_ctl_reset(policy->cpu);
dev = get_cpu_device(policy->cpu); dev = get_cpu_device(policy->cpu);
if (!dev) if (!dev)
return -ENODEV; return -ENODEV;
...@@ -616,6 +622,15 @@ static int __init amd_pstate_init(void) ...@@ -616,6 +622,15 @@ static int __init amd_pstate_init(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
return -ENODEV; return -ENODEV;
/*
* by default the pstate driver is disabled to load
* enable the amd_pstate passive mode driver explicitly
* with amd_pstate=passive in kernel command line
*/
if (!cppc_load) {
pr_debug("driver load is disabled, boot with amd_pstate=passive to enable this\n");
return -ENODEV;
}
if (!acpi_cpc_valid()) { if (!acpi_cpc_valid()) {
pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n"); pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n");
...@@ -630,13 +645,11 @@ static int __init amd_pstate_init(void) ...@@ -630,13 +645,11 @@ static int __init amd_pstate_init(void)
if (boot_cpu_has(X86_FEATURE_CPPC)) { if (boot_cpu_has(X86_FEATURE_CPPC)) {
pr_debug("AMD CPPC MSR based functionality is supported\n"); pr_debug("AMD CPPC MSR based functionality is supported\n");
amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf; amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf;
} else if (shared_mem) { } else {
pr_debug("AMD CPPC shared memory based functionality is supported\n");
static_call_update(amd_pstate_enable, cppc_enable); static_call_update(amd_pstate_enable, cppc_enable);
static_call_update(amd_pstate_init_perf, cppc_init_perf); static_call_update(amd_pstate_init_perf, cppc_init_perf);
static_call_update(amd_pstate_update_perf, cppc_update_perf); static_call_update(amd_pstate_update_perf, cppc_update_perf);
} else {
pr_info("This processor supports shared memory solution, you can enable it with amd_pstate.shared_mem=1\n");
return -ENODEV;
} }
/* enable amd pstate feature */ /* enable amd pstate feature */
...@@ -653,16 +666,22 @@ static int __init amd_pstate_init(void) ...@@ -653,16 +666,22 @@ static int __init amd_pstate_init(void)
return ret; return ret;
} }
device_initcall(amd_pstate_init);
static void __exit amd_pstate_exit(void) static int __init amd_pstate_param(char *str)
{ {
cpufreq_unregister_driver(&amd_pstate_driver); if (!str)
return -EINVAL;
amd_pstate_enable(false); if (!strcmp(str, "disable")) {
} cppc_load = 0;
pr_info("driver is explicitly disabled\n");
} else if (!strcmp(str, "passive"))
cppc_load = 1;
module_init(amd_pstate_init); return 0;
module_exit(amd_pstate_exit); }
early_param("amd_pstate", amd_pstate_param);
MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>"); MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");
......
...@@ -25,9 +25,6 @@ struct sugov_policy { ...@@ -25,9 +25,6 @@ struct sugov_policy {
unsigned int next_freq; unsigned int next_freq;
unsigned int cached_raw_freq; unsigned int cached_raw_freq;
/* max CPU capacity, which is equal for all CPUs in freq. domain */
unsigned long max;
/* The next fields are only needed if fast switch cannot be used: */ /* The next fields are only needed if fast switch cannot be used: */
struct irq_work irq_work; struct irq_work irq_work;
struct kthread_work work; struct kthread_work work;
...@@ -51,6 +48,7 @@ struct sugov_cpu { ...@@ -51,6 +48,7 @@ struct sugov_cpu {
unsigned long util; unsigned long util;
unsigned long bw_dl; unsigned long bw_dl;
unsigned long max;
/* The field below is for single-CPU policies only: */ /* The field below is for single-CPU policies only: */
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
...@@ -160,6 +158,7 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) ...@@ -160,6 +158,7 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
{ {
struct rq *rq = cpu_rq(sg_cpu->cpu); struct rq *rq = cpu_rq(sg_cpu->cpu);
sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu);
sg_cpu->bw_dl = cpu_bw_dl(rq); sg_cpu->bw_dl = cpu_bw_dl(rq);
sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu),
FREQUENCY_UTIL, NULL); FREQUENCY_UTIL, NULL);
...@@ -254,7 +253,6 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, ...@@ -254,7 +253,6 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
*/ */
static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
{ {
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
unsigned long boost; unsigned long boost;
/* No boost currently required */ /* No boost currently required */
...@@ -282,8 +280,7 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) ...@@ -282,8 +280,7 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
* sg_cpu->util is already in capacity scale; convert iowait_boost * sg_cpu->util is already in capacity scale; convert iowait_boost
* into the same scale so we can compare. * into the same scale so we can compare.
*/ */
boost = sg_cpu->iowait_boost * sg_policy->max; boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT;
boost >>= SCHED_CAPACITY_SHIFT;
boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL); boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL);
if (sg_cpu->util < boost) if (sg_cpu->util < boost)
sg_cpu->util = boost; sg_cpu->util = boost;
...@@ -340,7 +337,7 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time, ...@@ -340,7 +337,7 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
if (!sugov_update_single_common(sg_cpu, time, flags)) if (!sugov_update_single_common(sg_cpu, time, flags))
return; return;
next_f = get_next_freq(sg_policy, sg_cpu->util, sg_policy->max); next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max);
/* /*
* Do not reduce the frequency if the CPU has not been idle * Do not reduce the frequency if the CPU has not been idle
* recently, as the reduction is likely to be premature then. * recently, as the reduction is likely to be premature then.
...@@ -376,7 +373,6 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, ...@@ -376,7 +373,6 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
unsigned int flags) unsigned int flags)
{ {
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
unsigned long prev_util = sg_cpu->util; unsigned long prev_util = sg_cpu->util;
/* /*
...@@ -403,8 +399,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, ...@@ -403,8 +399,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
sg_cpu->util = prev_util; sg_cpu->util = prev_util;
cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl), cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
map_util_perf(sg_cpu->util), map_util_perf(sg_cpu->util), sg_cpu->max);
sg_policy->max);
sg_cpu->sg_policy->last_freq_update_time = time; sg_cpu->sg_policy->last_freq_update_time = time;
} }
...@@ -413,19 +408,25 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) ...@@ -413,19 +408,25 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
{ {
struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct sugov_policy *sg_policy = sg_cpu->sg_policy;
struct cpufreq_policy *policy = sg_policy->policy; struct cpufreq_policy *policy = sg_policy->policy;
unsigned long util = 0; unsigned long util = 0, max = 1;
unsigned int j; unsigned int j;
for_each_cpu(j, policy->cpus) { for_each_cpu(j, policy->cpus) {
struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
unsigned long j_util, j_max;
sugov_get_util(j_sg_cpu); sugov_get_util(j_sg_cpu);
sugov_iowait_apply(j_sg_cpu, time); sugov_iowait_apply(j_sg_cpu, time);
j_util = j_sg_cpu->util;
j_max = j_sg_cpu->max;
util = max(j_sg_cpu->util, util); if (j_util * max > j_max * util) {
util = j_util;
max = j_max;
}
} }
return get_next_freq(sg_policy, util, sg_policy->max); return get_next_freq(sg_policy, util, max);
} }
static void static void
...@@ -751,7 +752,7 @@ static int sugov_start(struct cpufreq_policy *policy) ...@@ -751,7 +752,7 @@ static int sugov_start(struct cpufreq_policy *policy)
{ {
struct sugov_policy *sg_policy = policy->governor_data; struct sugov_policy *sg_policy = policy->governor_data;
void (*uu)(struct update_util_data *data, u64 time, unsigned int flags); void (*uu)(struct update_util_data *data, u64 time, unsigned int flags);
unsigned int cpu = cpumask_first(policy->cpus); unsigned int cpu;
sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
sg_policy->last_freq_update_time = 0; sg_policy->last_freq_update_time = 0;
...@@ -759,7 +760,6 @@ static int sugov_start(struct cpufreq_policy *policy) ...@@ -759,7 +760,6 @@ static int sugov_start(struct cpufreq_policy *policy)
sg_policy->work_in_progress = false; sg_policy->work_in_progress = false;
sg_policy->limits_changed = false; sg_policy->limits_changed = false;
sg_policy->cached_raw_freq = 0; sg_policy->cached_raw_freq = 0;
sg_policy->max = arch_scale_cpu_capacity(cpu);
sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment