Commit 40e8e98f authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'pm-6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm

Pull power management updates from Rafael Wysocki:
 "These add Intel TPMI (Topology Aware Register and PM Capsule
  Interface) support to the power capping subsystem, extend the
  intel_idle driver to work in VM guests where MWAIT is not available,
  extend the system-wide power management diagnostics, fix bugs and
  clean up code.

  Specifics:

   - Introduce power capping core support for Intel TPMI (Topology Aware
     Register and PM Capsule Interface) and a TPMI interface driver for
     Intel RAPL (Zhang Rui, Dan Carpenter)

   - Fix CONFIG_IOSF_MBI dependency in the Intel RAPL power capping
     driver (Zhang Rui)

   - Fix invalid initialization for pl4_supported field in the Intel
     RAPL power capping driver (Sumeet Pawnikar)

   - Clean up the intel_idle driver, make it work with VM guests that
     cannot use the MWAIT instruction and address the case in which the
     host may enter a deep idle state when the guest is idle (Arjan van
     de Ven)

   - Prevent cpufreq drivers that provide the ->adjust_perf() callback
     without a ->fast_switch() one which is used as a fallback from the
     former in some cases (Wyes Karny)

   - Fix some issues related to the AMD P-state cpufreq driver (Mario
     Limonciello, Wyes Karny)

   - Fix the energy_performance_preference attribute handling in the
     intel_pstate driver in passive mode (Tero Kristo)

   - Fix the handling of pm_suspend_target_state when CONFIG_PM is unset
     (Kai-Heng Feng)

   - Correct spelling mistake in a comment in the hibernation code (Wang
     Honghui)

   - Add arch_resume_nosmt() prototype to avoid a "missing prototypes"
     build warning (Arnd Bergmann)

   - Restrict pm_pr_dbg() to system-wide power transitions and use it in
     a few additional places (Mario Limonciello)

   - Drop verification of in-params from genpd_add_device() and ensure
     that all of its callers will do it (Ulf Hansson)

   - Prevent possible integer overflows from occurring in
     genpd_parse_state() (Nikita Zhandarovich)

   - Reorder fieldls in 'struct devfreq_dev_status' to reduce its size
     somewhat (Christophe JAILLET)

   - Ensure that the Exynos PPMU driver is already loaded before the
     Exynos Bus driver starts probing so as to avoid a possible freeze
     loading of the kernel modules (Marek Szyprowski)

   - Fix variable deferencing before NULL check in the mtk-cci devfreq
     driver (Sukrut Bellary)"

* tag 'pm-6.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (42 commits)
  intel_idle: Add a "Long HLT" C1 state for the VM guest mode
  cpufreq: intel_pstate: Fix energy_performance_preference for passive
  cpufreq: amd-pstate: Add a kernel config option to set default mode
  cpufreq: amd-pstate: Set a fallback policy based on preferred_profile
  ACPI: CPPC: Add definition for undefined FADT preferred PM profile value
  cpufreq: amd-pstate: Set default governor to schedutil
  PM: domains: Move the verification of in-params from genpd_add_device()
  cpufreq: amd-pstate: Make amd-pstate EPP driver name hyphenated
  cpufreq: amd-pstate: Write CPPC enable bit per-socket
  intel_idle: Add support for using intel_idle in a VM guest using just hlt
  cpufreq: Fail driver register if it has adjust_perf without fast_switch
  intel_idle: clean up the (new) state_update_enter_method function
  intel_idle: refactor state->enter manipulation into its own function
  platform/x86/amd: pmc: Use pm_pr_dbg() for suspend related messages
  pinctrl: amd: Use pm_pr_dbg to show debugging messages
  ACPI: x86: Add pm_debug_messages for LPS0 _DSM state tracking
  include/linux/suspend.h: Only show pm_pr_dbg messages at suspend/resume
  powercap: RAPL: Fix a NULL vs IS_ERR() bug
  powercap: RAPL: Fix CONFIG_IOSF_MBI dependency
  powercap: RAPL: fix invalid initialization for pl4_supported field
  ...
parents bb695055 c89a27f4
......@@ -59,6 +59,7 @@ static int lps0_dsm_func_mask;
static guid_t lps0_dsm_guid_microsoft;
static int lps0_dsm_func_mask_microsoft;
static int lps0_dsm_state;
/* Device constraint entry structure */
struct lpi_device_info {
......@@ -320,6 +321,44 @@ static void lpi_check_constraints(void)
}
}
static bool acpi_s2idle_vendor_amd(void)
{
return boot_cpu_data.x86_vendor == X86_VENDOR_AMD;
}
static const char *acpi_sleep_dsm_state_to_str(unsigned int state)
{
if (lps0_dsm_func_mask_microsoft || !acpi_s2idle_vendor_amd()) {
switch (state) {
case ACPI_LPS0_SCREEN_OFF:
return "screen off";
case ACPI_LPS0_SCREEN_ON:
return "screen on";
case ACPI_LPS0_ENTRY:
return "lps0 entry";
case ACPI_LPS0_EXIT:
return "lps0 exit";
case ACPI_LPS0_MS_ENTRY:
return "lps0 ms entry";
case ACPI_LPS0_MS_EXIT:
return "lps0 ms exit";
}
} else {
switch (state) {
case ACPI_LPS0_SCREEN_ON_AMD:
return "screen on";
case ACPI_LPS0_SCREEN_OFF_AMD:
return "screen off";
case ACPI_LPS0_ENTRY_AMD:
return "lps0 entry";
case ACPI_LPS0_EXIT_AMD:
return "lps0 exit";
}
}
return "unknown";
}
static void acpi_sleep_run_lps0_dsm(unsigned int func, unsigned int func_mask, guid_t dsm_guid)
{
union acpi_object *out_obj;
......@@ -331,14 +370,15 @@ static void acpi_sleep_run_lps0_dsm(unsigned int func, unsigned int func_mask, g
rev_id, func, NULL);
ACPI_FREE(out_obj);
acpi_handle_debug(lps0_device_handle, "_DSM function %u evaluation %s\n",
func, out_obj ? "successful" : "failed");
lps0_dsm_state = func;
if (pm_debug_messages_on) {
acpi_handle_info(lps0_device_handle,
"%s transitioned to state %s\n",
out_obj ? "Successfully" : "Failed to",
acpi_sleep_dsm_state_to_str(lps0_dsm_state));
}
}
static bool acpi_s2idle_vendor_amd(void)
{
return boot_cpu_data.x86_vendor == X86_VENDOR_AMD;
}
static int validate_dsm(acpi_handle handle, const char *uuid, int rev, guid_t *dsm_guid)
{
......
......@@ -1632,9 +1632,6 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
dev_dbg(dev, "%s()\n", __func__);
if (IS_ERR_OR_NULL(genpd) || IS_ERR_OR_NULL(dev))
return -EINVAL;
gpd_data = genpd_alloc_dev_data(dev, gd);
if (IS_ERR(gpd_data))
return PTR_ERR(gpd_data);
......@@ -1676,6 +1673,9 @@ int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev)
{
int ret;
if (!genpd || !dev)
return -EINVAL;
mutex_lock(&gpd_list_lock);
ret = genpd_add_device(genpd, dev, dev);
mutex_unlock(&gpd_list_lock);
......@@ -2523,6 +2523,9 @@ int of_genpd_add_device(struct of_phandle_args *genpdspec, struct device *dev)
struct generic_pm_domain *genpd;
int ret;
if (!dev)
return -EINVAL;
mutex_lock(&gpd_list_lock);
genpd = genpd_get_from_provider(genpdspec);
......@@ -2939,10 +2942,10 @@ static int genpd_parse_state(struct genpd_power_state *genpd_state,
err = of_property_read_u32(state_node, "min-residency-us", &residency);
if (!err)
genpd_state->residency_ns = 1000 * residency;
genpd_state->residency_ns = 1000LL * residency;
genpd_state->power_on_latency_ns = 1000 * exit_latency;
genpd_state->power_off_latency_ns = 1000 * entry_latency;
genpd_state->power_on_latency_ns = 1000LL * exit_latency;
genpd_state->power_off_latency_ns = 1000LL * entry_latency;
genpd_state->fwnode = &state_node->fwnode;
return 0;
......
......@@ -19,11 +19,6 @@
#include "power.h"
#ifndef CONFIG_SUSPEND
suspend_state_t pm_suspend_target_state;
#define pm_suspend_target_state (PM_SUSPEND_ON)
#endif
#define list_for_each_entry_rcu_locked(pos, head, member) \
list_for_each_entry_rcu(pos, head, member, \
srcu_read_lock_held(&wakeup_srcu))
......
......@@ -38,7 +38,7 @@ choice
prompt "Default CPUFreq governor"
default CPU_FREQ_DEFAULT_GOV_USERSPACE if ARM_SA1110_CPUFREQ
default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if ARM64 || ARM
default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if X86_INTEL_PSTATE && SMP
default CPU_FREQ_DEFAULT_GOV_SCHEDUTIL if (X86_INTEL_PSTATE || X86_AMD_PSTATE) && SMP
default CPU_FREQ_DEFAULT_GOV_PERFORMANCE
help
This option sets which CPUFreq governor shall be loaded at
......
......@@ -51,6 +51,23 @@ config X86_AMD_PSTATE
If in doubt, say N.
config X86_AMD_PSTATE_DEFAULT_MODE
int "AMD Processor P-State default mode"
depends on X86_AMD_PSTATE
default 3 if X86_AMD_PSTATE
range 1 4
help
Select the default mode the amd-pstate driver will use on
supported hardware.
The value set has the following meanings:
1 -> Disabled
2 -> Passive
3 -> Active (EPP)
4 -> Guided
For details, take a look at:
<file:Documentation/admin-guide/pm/amd-pstate.rst>.
config X86_AMD_PSTATE_UT
tristate "selftest for AMD Processor P-State driver"
depends on X86 && ACPI_PROCESSOR
......
......@@ -62,7 +62,8 @@
static struct cpufreq_driver *current_pstate_driver;
static struct cpufreq_driver amd_pstate_driver;
static struct cpufreq_driver amd_pstate_epp_driver;
static int cppc_state = AMD_PSTATE_DISABLE;
static int cppc_state = AMD_PSTATE_UNDEFINED;
static bool cppc_enabled;
/*
* AMD Energy Preference Performance (EPP)
......@@ -228,7 +229,28 @@ static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
static inline int pstate_enable(bool enable)
{
return wrmsrl_safe(MSR_AMD_CPPC_ENABLE, enable);
int ret, cpu;
unsigned long logical_proc_id_mask = 0;
if (enable == cppc_enabled)
return 0;
for_each_present_cpu(cpu) {
unsigned long logical_id = topology_logical_die_id(cpu);
if (test_bit(logical_id, &logical_proc_id_mask))
continue;
set_bit(logical_id, &logical_proc_id_mask);
ret = wrmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_ENABLE,
enable);
if (ret)
return ret;
}
cppc_enabled = enable;
return 0;
}
static int cppc_enable(bool enable)
......@@ -236,6 +258,9 @@ static int cppc_enable(bool enable)
int cpu, ret = 0;
struct cppc_perf_ctrls perf_ctrls;
if (enable == cppc_enabled)
return 0;
for_each_present_cpu(cpu) {
ret = cppc_set_enable(cpu, enable);
if (ret)
......@@ -251,6 +276,7 @@ static int cppc_enable(bool enable)
}
}
cppc_enabled = enable;
return ret;
}
......@@ -1045,6 +1071,26 @@ static const struct attribute_group amd_pstate_global_attr_group = {
.attrs = pstate_global_attributes,
};
static bool amd_pstate_acpi_pm_profile_server(void)
{
switch (acpi_gbl_FADT.preferred_profile) {
case PM_ENTERPRISE_SERVER:
case PM_SOHO_SERVER:
case PM_PERFORMANCE_SERVER:
return true;
}
return false;
}
static bool amd_pstate_acpi_pm_profile_undefined(void)
{
if (acpi_gbl_FADT.preferred_profile == PM_UNSPECIFIED)
return true;
if (acpi_gbl_FADT.preferred_profile >= NR_PM_PROFILES)
return true;
return false;
}
static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
{
int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
......@@ -1102,10 +1148,14 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
policy->max = policy->cpuinfo.max_freq;
/*
* Set the policy to powersave to provide a valid fallback value in case
* Set the policy to provide a valid fallback value in case
* the default cpufreq governor is neither powersave nor performance.
*/
policy->policy = CPUFREQ_POLICY_POWERSAVE;
if (amd_pstate_acpi_pm_profile_server() ||
amd_pstate_acpi_pm_profile_undefined())
policy->policy = CPUFREQ_POLICY_PERFORMANCE;
else
policy->policy = CPUFREQ_POLICY_POWERSAVE;
if (boot_cpu_has(X86_FEATURE_CPPC)) {
ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
......@@ -1356,10 +1406,29 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
.online = amd_pstate_epp_cpu_online,
.suspend = amd_pstate_epp_suspend,
.resume = amd_pstate_epp_resume,
.name = "amd_pstate_epp",
.name = "amd-pstate-epp",
.attr = amd_pstate_epp_attr,
};
static int __init amd_pstate_set_driver(int mode_idx)
{
if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
cppc_state = mode_idx;
if (cppc_state == AMD_PSTATE_DISABLE)
pr_info("driver is explicitly disabled\n");
if (cppc_state == AMD_PSTATE_ACTIVE)
current_pstate_driver = &amd_pstate_epp_driver;
if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
current_pstate_driver = &amd_pstate_driver;
return 0;
}
return -EINVAL;
}
static int __init amd_pstate_init(void)
{
struct device *dev_root;
......@@ -1367,15 +1436,6 @@ static int __init amd_pstate_init(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
return -ENODEV;
/*
* by default the pstate driver is disabled to load
* enable the amd_pstate passive mode driver explicitly
* with amd_pstate=passive or other modes in kernel command line
*/
if (cppc_state == AMD_PSTATE_DISABLE) {
pr_info("driver load is disabled, boot with specific mode to enable this\n");
return -ENODEV;
}
if (!acpi_cpc_valid()) {
pr_warn_once("the _CPC object is not present in SBIOS or ACPI disabled\n");
......@@ -1386,6 +1446,33 @@ static int __init amd_pstate_init(void)
if (cpufreq_get_current_driver())
return -EEXIST;
switch (cppc_state) {
case AMD_PSTATE_UNDEFINED:
/* Disable on the following configs by default:
* 1. Undefined platforms
* 2. Server platforms
* 3. Shared memory designs
*/
if (amd_pstate_acpi_pm_profile_undefined() ||
amd_pstate_acpi_pm_profile_server() ||
!boot_cpu_has(X86_FEATURE_CPPC)) {
pr_info("driver load is disabled, boot with specific mode to enable this\n");
return -ENODEV;
}
ret = amd_pstate_set_driver(CONFIG_X86_AMD_PSTATE_DEFAULT_MODE);
if (ret)
return ret;
break;
case AMD_PSTATE_DISABLE:
return -ENODEV;
case AMD_PSTATE_PASSIVE:
case AMD_PSTATE_ACTIVE:
case AMD_PSTATE_GUIDED:
break;
default:
return -EINVAL;
}
/* capability check */
if (boot_cpu_has(X86_FEATURE_CPPC)) {
pr_debug("AMD CPPC MSR based functionality is supported\n");
......@@ -1438,21 +1525,7 @@ static int __init amd_pstate_param(char *str)
size = strlen(str);
mode_idx = get_mode_idx_from_str(str, size);
if (mode_idx >= AMD_PSTATE_DISABLE && mode_idx < AMD_PSTATE_MAX) {
cppc_state = mode_idx;
if (cppc_state == AMD_PSTATE_DISABLE)
pr_info("driver is explicitly disabled\n");
if (cppc_state == AMD_PSTATE_ACTIVE)
current_pstate_driver = &amd_pstate_epp_driver;
if (cppc_state == AMD_PSTATE_PASSIVE || cppc_state == AMD_PSTATE_GUIDED)
current_pstate_driver = &amd_pstate_driver;
return 0;
}
return -EINVAL;
return amd_pstate_set_driver(mode_idx);
}
early_param("amd_pstate", amd_pstate_param);
......
......@@ -2828,7 +2828,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
(driver_data->setpolicy && (driver_data->target_index ||
driver_data->target)) ||
(!driver_data->get_intermediate != !driver_data->target_intermediate) ||
(!driver_data->online != !driver_data->offline))
(!driver_data->online != !driver_data->offline) ||
(driver_data->adjust_perf && !driver_data->fast_switch))
return -EINVAL;
pr_debug("trying to register driver %s\n", driver_data->name);
......
......@@ -824,6 +824,8 @@ static ssize_t store_energy_performance_preference(
err = cpufreq_start_governor(policy);
if (!ret)
ret = err;
} else {
ret = 0;
}
}
......
......@@ -518,6 +518,7 @@ static struct platform_driver exynos_bus_platdrv = {
};
module_platform_driver(exynos_bus_platdrv);
MODULE_SOFTDEP("pre: exynos_ppmu");
MODULE_DESCRIPTION("Generic Exynos Bus frequency driver");
MODULE_AUTHOR("Chanwoo Choi <cw00.choi@samsung.com>");
MODULE_LICENSE("GPL v2");
......@@ -127,7 +127,7 @@ static int mtk_ccifreq_target(struct device *dev, unsigned long *freq,
u32 flags)
{
struct mtk_ccifreq_drv *drv = dev_get_drvdata(dev);
struct clk *cci_pll = clk_get_parent(drv->cci_clk);
struct clk *cci_pll;
struct dev_pm_opp *opp;
unsigned long opp_rate;
int voltage, pre_voltage, inter_voltage, target_voltage, ret;
......@@ -139,6 +139,7 @@ static int mtk_ccifreq_target(struct device *dev, unsigned long *freq,
return 0;
inter_voltage = drv->inter_voltage;
cci_pll = clk_get_parent(drv->cci_clk);
opp_rate = *freq;
opp = devfreq_recommended_opp(dev, &opp_rate, 1);
......
......@@ -199,6 +199,43 @@ static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
return __intel_idle(dev, drv, index);
}
static __always_inline int __intel_idle_hlt(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
raw_safe_halt();
raw_local_irq_disable();
return index;
}
/**
* intel_idle_hlt - Ask the processor to enter the given idle state using hlt.
* @dev: cpuidle device of the target CPU.
* @drv: cpuidle driver (assumed to point to intel_idle_driver).
* @index: Target idle state index.
*
* Use the HLT instruction to notify the processor that the CPU represented by
* @dev is idle and it can try to enter the idle state corresponding to @index.
*
* Must be called under local_irq_disable().
*/
static __cpuidle int intel_idle_hlt(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
return __intel_idle_hlt(dev, drv, index);
}
static __cpuidle int intel_idle_hlt_irq_on(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
int ret;
raw_local_irq_enable();
ret = __intel_idle_hlt(dev, drv, index);
raw_local_irq_disable();
return ret;
}
/**
* intel_idle_s2idle - Ask the processor to enter the given idle state.
* @dev: cpuidle device of the target CPU.
......@@ -1242,6 +1279,25 @@ static struct cpuidle_state snr_cstates[] __initdata = {
.enter = NULL }
};
static struct cpuidle_state vmguest_cstates[] __initdata = {
{
.name = "C1",
.desc = "HLT",
.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
.exit_latency = 5,
.target_residency = 10,
.enter = &intel_idle_hlt, },
{
.name = "C1L",
.desc = "Long HLT",
.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TLB_FLUSHED,
.exit_latency = 5,
.target_residency = 200,
.enter = &intel_idle_hlt, },
{
.enter = NULL }
};
static const struct idle_cpu idle_cpu_nehalem __initconst = {
.state_table = nehalem_cstates,
.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
......@@ -1839,6 +1895,66 @@ static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
return true;
}
static void state_update_enter_method(struct cpuidle_state *state, int cstate)
{
if (state->enter == intel_idle_hlt) {
if (force_irq_on) {
pr_info("forced intel_idle_irq for state %d\n", cstate);
state->enter = intel_idle_hlt_irq_on;
}
return;
}
if (state->enter == intel_idle_hlt_irq_on)
return; /* no update scenarios */
if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) {
/*
* Combining with XSTATE with IBRS or IRQ_ENABLE flags
* is not currently supported but this driver.
*/
WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS);
WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
state->enter = intel_idle_xstate;
return;
}
if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
state->flags & CPUIDLE_FLAG_IBRS) {
/*
* IBRS mitigation requires that C-states are entered
* with interrupts disabled.
*/
WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
state->enter = intel_idle_ibrs;
return;
}
if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) {
state->enter = intel_idle_irq;
return;
}
if (force_irq_on) {
pr_info("forced intel_idle_irq for state %d\n", cstate);
state->enter = intel_idle_irq;
}
}
/*
* For mwait based states, we want to verify the cpuid data to see if the state
* is actually supported by this specific CPU.
* For non-mwait based states, this check should be skipped.
*/
static bool should_verify_mwait(struct cpuidle_state *state)
{
if (state->enter == intel_idle_hlt)
return false;
if (state->enter == intel_idle_hlt_irq_on)
return false;
return true;
}
static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
{
int cstate;
......@@ -1887,35 +2003,15 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
}
mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
if (!intel_idle_verify_cstate(mwait_hint))
if (should_verify_mwait(&cpuidle_state_table[cstate]) && !intel_idle_verify_cstate(mwait_hint))
continue;
/* Structure copy. */
drv->states[drv->state_count] = cpuidle_state_table[cstate];
state = &drv->states[drv->state_count];
if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) {
/*
* Combining with XSTATE with IBRS or IRQ_ENABLE flags
* is not currently supported but this driver.
*/
WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IBRS);
WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
state->enter = intel_idle_xstate;
} else if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
state->flags & CPUIDLE_FLAG_IBRS) {
/*
* IBRS mitigation requires that C-states are entered
* with interrupts disabled.
*/
WARN_ON_ONCE(state->flags & CPUIDLE_FLAG_IRQ_ENABLE);
state->enter = intel_idle_ibrs;
} else if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE) {
state->enter = intel_idle_irq;
} else if (force_irq_on) {
pr_info("forced intel_idle_irq for state %d\n", cstate);
state->enter = intel_idle_irq;
}
state_update_enter_method(state, cstate);
if ((disabled_states_mask & BIT(drv->state_count)) ||
((icpu->use_acpi || force_use_acpi) &&
......@@ -2041,6 +2137,93 @@ static void __init intel_idle_cpuidle_devices_uninit(void)
cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
}
/*
* Match up the latency and break even point of the bare metal (cpu based)
* states with the deepest VM available state.
*
* We only want to do this for the deepest state, the ones that has
* the TLB_FLUSHED flag set on the .
*
* All our short idle states are dominated by vmexit/vmenter latencies,
* not the underlying hardware latencies so we keep our values for these.
*/
static void matchup_vm_state_with_baremetal(void)
{
int cstate;
for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
int matching_cstate;
if (intel_idle_max_cstate_reached(cstate))
break;
if (!cpuidle_state_table[cstate].enter)
break;
if (!(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_TLB_FLUSHED))
continue;
for (matching_cstate = 0; matching_cstate < CPUIDLE_STATE_MAX; ++matching_cstate) {
if (!icpu->state_table[matching_cstate].enter)
break;
if (icpu->state_table[matching_cstate].exit_latency > cpuidle_state_table[cstate].exit_latency) {
cpuidle_state_table[cstate].exit_latency = icpu->state_table[matching_cstate].exit_latency;
cpuidle_state_table[cstate].target_residency = icpu->state_table[matching_cstate].target_residency;
}
}
}
}
static int __init intel_idle_vminit(const struct x86_cpu_id *id)
{
int retval;
cpuidle_state_table = vmguest_cstates;
icpu = (const struct idle_cpu *)id->driver_data;
pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
boot_cpu_data.x86_model);
intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
if (!intel_idle_cpuidle_devices)
return -ENOMEM;
/*
* We don't know exactly what the host will do when we go idle, but as a worst estimate
* we can assume that the exit latency of the deepest host state will be hit for our
* deep (long duration) guest idle state.
* The same logic applies to the break even point for the long duration guest idle state.
* So lets copy these two properties from the table we found for the host CPU type.
*/
matchup_vm_state_with_baremetal();
intel_idle_cpuidle_driver_init(&intel_idle_driver);
retval = cpuidle_register_driver(&intel_idle_driver);
if (retval) {
struct cpuidle_driver *drv = cpuidle_get_driver();
printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
drv ? drv->name : "none");
goto init_driver_fail;
}
retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
intel_idle_cpu_online, NULL);
if (retval < 0)
goto hp_setup_fail;
return 0;
hp_setup_fail:
intel_idle_cpuidle_devices_uninit();
cpuidle_unregister_driver(&intel_idle_driver);
init_driver_fail:
free_percpu(intel_idle_cpuidle_devices);
return retval;
}
static int __init intel_idle_init(void)
{
const struct x86_cpu_id *id;
......@@ -2059,6 +2242,8 @@ static int __init intel_idle_init(void)
id = x86_match_cpu(intel_idle_ids);
if (id) {
if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return intel_idle_vminit(id);
pr_debug("Please enable MWAIT in BIOS SETUP\n");
return -ENODEV;
}
......
......@@ -30,6 +30,7 @@
#include <linux/pinctrl/pinconf.h>
#include <linux/pinctrl/pinconf-generic.h>
#include <linux/pinctrl/pinmux.h>
#include <linux/suspend.h>
#include "core.h"
#include "pinctrl-utils.h"
......@@ -636,9 +637,8 @@ static bool do_amd_gpio_irq_handler(int irq, void *dev_id)
regval = readl(regs + i);
if (regval & PIN_IRQ_PENDING)
dev_dbg(&gpio_dev->pdev->dev,
"GPIO %d is active: 0x%x",
irqnr + i, regval);
pm_pr_dbg("GPIO %d is active: 0x%x",
irqnr + i, regval);
/* caused wake on resume context for shared IRQ */
if (irq < 0 && (regval & BIT(WAKE_STS_OFF)))
......
......@@ -543,7 +543,7 @@ static int amd_pmc_idlemask_read(struct amd_pmc_dev *pdev, struct device *dev,
}
if (dev)
dev_dbg(pdev->dev, "SMU idlemask s0i3: 0x%x\n", val);
pm_pr_dbg("SMU idlemask s0i3: 0x%x\n", val);
if (s)
seq_printf(s, "SMU idlemask : 0x%x\n", val);
......@@ -769,7 +769,7 @@ static int amd_pmc_verify_czn_rtc(struct amd_pmc_dev *pdev, u32 *arg)
*arg |= (duration << 16);
rc = rtc_alarm_irq_enable(rtc_device, 0);
dev_dbg(pdev->dev, "wakeup timer programmed for %lld seconds\n", duration);
pm_pr_dbg("wakeup timer programmed for %lld seconds\n", duration);
return rc;
}
......
......@@ -18,10 +18,12 @@ if POWERCAP
# Client driver configurations go here.
config INTEL_RAPL_CORE
tristate
depends on PCI
select IOSF_MBI
config INTEL_RAPL
tristate "Intel RAPL Support via MSR Interface"
depends on X86 && IOSF_MBI
depends on X86 && PCI
select INTEL_RAPL_CORE
help
This enables support for the Intel Running Average Power Limit (RAPL)
......@@ -33,6 +35,20 @@ config INTEL_RAPL
controller, CPU core (Power Plane 0), graphics uncore (Power Plane
1), etc.
config INTEL_RAPL_TPMI
tristate "Intel RAPL Support via TPMI Interface"
depends on X86
depends on INTEL_TPMI
select INTEL_RAPL_CORE
help
This enables support for the Intel Running Average Power Limit (RAPL)
technology via TPMI interface, which allows power limits to be enforced
and monitored.
In RAPL, the platform level settings are divided into domains for
fine grained control. These domains include processor package, DRAM
controller, platform, etc.
config IDLE_INJECT
bool "Idle injection framework"
depends on CPU_IDLE
......
......@@ -5,5 +5,6 @@ obj-$(CONFIG_DTPM_DEVFREQ) += dtpm_devfreq.o
obj-$(CONFIG_POWERCAP) += powercap_sys.o
obj-$(CONFIG_INTEL_RAPL_CORE) += intel_rapl_common.o
obj-$(CONFIG_INTEL_RAPL) += intel_rapl_msr.o
obj-$(CONFIG_INTEL_RAPL_TPMI) += intel_rapl_tpmi.o
obj-$(CONFIG_IDLE_INJECT) += idle_inject.o
obj-$(CONFIG_ARM_SCMI_POWERCAP) += arm_scmi_powercap.o
......@@ -75,6 +75,15 @@
#define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19)
#define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51)
/* bitmasks for RAPL TPMI, used by primitive access functions */
#define TPMI_POWER_LIMIT_MASK 0x3FFFF
#define TPMI_POWER_LIMIT_ENABLE BIT_ULL(62)
#define TPMI_TIME_WINDOW_MASK (0x7FULL<<18)
#define TPMI_INFO_SPEC_MASK 0x3FFFF
#define TPMI_INFO_MIN_MASK (0x3FFFFULL << 18)
#define TPMI_INFO_MAX_MASK (0x3FFFFULL << 36)
#define TPMI_INFO_MAX_TIME_WIN_MASK (0x7FULL << 54)
/* Non HW constants */
#define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */
#define RAPL_PRIMITIVE_DUMMY BIT(2)
......@@ -94,26 +103,120 @@ enum unit_type {
#define DOMAIN_STATE_INACTIVE BIT(0)
#define DOMAIN_STATE_POWER_LIMIT_SET BIT(1)
#define DOMAIN_STATE_BIOS_LOCKED BIT(2)
static const char pl1_name[] = "long_term";
static const char pl2_name[] = "short_term";
static const char pl4_name[] = "peak_power";
static const char *pl_names[NR_POWER_LIMITS] = {
[POWER_LIMIT1] = "long_term",
[POWER_LIMIT2] = "short_term",
[POWER_LIMIT4] = "peak_power",
};
enum pl_prims {
PL_ENABLE,
PL_CLAMP,
PL_LIMIT,
PL_TIME_WINDOW,
PL_MAX_POWER,
PL_LOCK,
};
static bool is_pl_valid(struct rapl_domain *rd, int pl)
{
if (pl < POWER_LIMIT1 || pl > POWER_LIMIT4)
return false;
return rd->rpl[pl].name ? true : false;
}
static int get_pl_lock_prim(struct rapl_domain *rd, int pl)
{
if (rd->rp->priv->type == RAPL_IF_TPMI) {
if (pl == POWER_LIMIT1)
return PL1_LOCK;
if (pl == POWER_LIMIT2)
return PL2_LOCK;
if (pl == POWER_LIMIT4)
return PL4_LOCK;
}
/* MSR/MMIO Interface doesn't have Lock bit for PL4 */
if (pl == POWER_LIMIT4)
return -EINVAL;
/*
* Power Limit register that supports two power limits has a different
* bit position for the Lock bit.
*/
if (rd->rp->priv->limits[rd->id] & BIT(POWER_LIMIT2))
return FW_HIGH_LOCK;
return FW_LOCK;
}
static int get_pl_prim(struct rapl_domain *rd, int pl, enum pl_prims prim)
{
switch (pl) {
case POWER_LIMIT1:
if (prim == PL_ENABLE)
return PL1_ENABLE;
if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI)
return PL1_CLAMP;
if (prim == PL_LIMIT)
return POWER_LIMIT1;
if (prim == PL_TIME_WINDOW)
return TIME_WINDOW1;
if (prim == PL_MAX_POWER)
return THERMAL_SPEC_POWER;
if (prim == PL_LOCK)
return get_pl_lock_prim(rd, pl);
return -EINVAL;
case POWER_LIMIT2:
if (prim == PL_ENABLE)
return PL2_ENABLE;
if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI)
return PL2_CLAMP;
if (prim == PL_LIMIT)
return POWER_LIMIT2;
if (prim == PL_TIME_WINDOW)
return TIME_WINDOW2;
if (prim == PL_MAX_POWER)
return MAX_POWER;
if (prim == PL_LOCK)
return get_pl_lock_prim(rd, pl);
return -EINVAL;
case POWER_LIMIT4:
if (prim == PL_LIMIT)
return POWER_LIMIT4;
if (prim == PL_ENABLE)
return PL4_ENABLE;
/* PL4 would be around two times PL2, use same prim as PL2. */
if (prim == PL_MAX_POWER)
return MAX_POWER;
if (prim == PL_LOCK)
return get_pl_lock_prim(rd, pl);
return -EINVAL;
default:
return -EINVAL;
}
}
#define power_zone_to_rapl_domain(_zone) \
container_of(_zone, struct rapl_domain, power_zone)
struct rapl_defaults {
u8 floor_freq_reg_addr;
int (*check_unit)(struct rapl_package *rp, int cpu);
int (*check_unit)(struct rapl_domain *rd);
void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
u64 (*compute_time_window)(struct rapl_package *rp, u64 val,
u64 (*compute_time_window)(struct rapl_domain *rd, u64 val,
bool to_raw);
unsigned int dram_domain_energy_unit;
unsigned int psys_domain_energy_unit;
bool spr_psys_bits;
};
static struct rapl_defaults *rapl_defaults;
static struct rapl_defaults *defaults_msr;
static const struct rapl_defaults defaults_tpmi;
static struct rapl_defaults *get_defaults(struct rapl_package *rp)
{
return rp->priv->defaults;
}
/* Sideband MBI registers */
#define IOSF_CPU_POWER_BUDGET_CTL_BYT (0x2)
......@@ -150,6 +253,12 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
static int rapl_write_data_raw(struct rapl_domain *rd,
enum rapl_primitives prim,
unsigned long long value);
static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
enum pl_prims pl_prim,
bool xlate, u64 *data);
static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
enum pl_prims pl_prim,
unsigned long long value);
static u64 rapl_unit_xlate(struct rapl_domain *rd,
enum unit_type type, u64 value, int to_raw);
static void package_power_limit_irq_save(struct rapl_package *rp);
......@@ -217,7 +326,7 @@ static int find_nr_power_limit(struct rapl_domain *rd)
int i, nr_pl = 0;
for (i = 0; i < NR_POWER_LIMITS; i++) {
if (rd->rpl[i].name)
if (is_pl_valid(rd, i))
nr_pl++;
}
......@@ -227,37 +336,35 @@ static int find_nr_power_limit(struct rapl_domain *rd)
static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
{
struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
return -EACCES;
struct rapl_defaults *defaults = get_defaults(rd->rp);
int ret;
cpus_read_lock();
rapl_write_data_raw(rd, PL1_ENABLE, mode);
if (rapl_defaults->set_floor_freq)
rapl_defaults->set_floor_freq(rd, mode);
ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode);
if (!ret && defaults->set_floor_freq)
defaults->set_floor_freq(rd, mode);
cpus_read_unlock();
return 0;
return ret;
}
static int get_domain_enable(struct powercap_zone *power_zone, bool *mode)
{
struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
u64 val;
int ret;
if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
if (rd->rpl[POWER_LIMIT1].locked) {
*mode = false;
return 0;
}
cpus_read_lock();
if (rapl_read_data_raw(rd, PL1_ENABLE, true, &val)) {
cpus_read_unlock();
return -EIO;
}
*mode = val;
ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, true, &val);
if (!ret)
*mode = val;
cpus_read_unlock();
return 0;
return ret;
}
/* per RAPL domain ops, in the order of rapl_domain_type */
......@@ -313,8 +420,8 @@ static int contraint_to_pl(struct rapl_domain *rd, int cid)
{
int i, j;
for (i = 0, j = 0; i < NR_POWER_LIMITS; i++) {
if ((rd->rpl[i].name) && j++ == cid) {
for (i = POWER_LIMIT1, j = 0; i < NR_POWER_LIMITS; i++) {
if (is_pl_valid(rd, i) && j++ == cid) {
pr_debug("%s: index %d\n", __func__, i);
return i;
}
......@@ -335,36 +442,11 @@ static int set_power_limit(struct powercap_zone *power_zone, int cid,
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
ret = id;
goto set_exit;
}
rp = rd->rp;
if (rd->state & DOMAIN_STATE_BIOS_LOCKED) {
dev_warn(&power_zone->dev,
"%s locked by BIOS, monitoring only\n", rd->name);
ret = -EACCES;
goto set_exit;
}
switch (rd->rpl[id].prim_id) {
case PL1_ENABLE:
rapl_write_data_raw(rd, POWER_LIMIT1, power_limit);
break;
case PL2_ENABLE:
rapl_write_data_raw(rd, POWER_LIMIT2, power_limit);
break;
case PL4_ENABLE:
rapl_write_data_raw(rd, POWER_LIMIT4, power_limit);
break;
default:
ret = -EINVAL;
}
ret = rapl_write_pl_data(rd, id, PL_LIMIT, power_limit);
if (!ret)
package_power_limit_irq_save(rp);
set_exit:
cpus_read_unlock();
return ret;
}
......@@ -374,38 +456,17 @@ static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
{
struct rapl_domain *rd;
u64 val;
int prim;
int ret = 0;
int id;
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
ret = id;
goto get_exit;
}
switch (rd->rpl[id].prim_id) {
case PL1_ENABLE:
prim = POWER_LIMIT1;
break;
case PL2_ENABLE:
prim = POWER_LIMIT2;
break;
case PL4_ENABLE:
prim = POWER_LIMIT4;
break;
default:
cpus_read_unlock();
return -EINVAL;
}
if (rapl_read_data_raw(rd, prim, true, &val))
ret = -EIO;
else
ret = rapl_read_pl_data(rd, id, PL_LIMIT, true, &val);
if (!ret)
*data = val;
get_exit:
cpus_read_unlock();
return ret;
......@@ -421,23 +482,9 @@ static int set_time_window(struct powercap_zone *power_zone, int cid,
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
ret = id;
goto set_time_exit;
}
switch (rd->rpl[id].prim_id) {
case PL1_ENABLE:
rapl_write_data_raw(rd, TIME_WINDOW1, window);
break;
case PL2_ENABLE:
rapl_write_data_raw(rd, TIME_WINDOW2, window);
break;
default:
ret = -EINVAL;
}
ret = rapl_write_pl_data(rd, id, PL_TIME_WINDOW, window);
set_time_exit:
cpus_read_unlock();
return ret;
}
......@@ -453,33 +500,11 @@ static int get_time_window(struct powercap_zone *power_zone, int cid,
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
id = contraint_to_pl(rd, cid);
if (id < 0) {
ret = id;
goto get_time_exit;
}
switch (rd->rpl[id].prim_id) {
case PL1_ENABLE:
ret = rapl_read_data_raw(rd, TIME_WINDOW1, true, &val);
break;
case PL2_ENABLE:
ret = rapl_read_data_raw(rd, TIME_WINDOW2, true, &val);
break;
case PL4_ENABLE:
/*
* Time window parameter is not applicable for PL4 entry
* so assigining '0' as default value.
*/
val = 0;
break;
default:
cpus_read_unlock();
return -EINVAL;
}
ret = rapl_read_pl_data(rd, id, PL_TIME_WINDOW, true, &val);
if (!ret)
*data = val;
get_time_exit:
cpus_read_unlock();
return ret;
......@@ -499,36 +524,23 @@ static const char *get_constraint_name(struct powercap_zone *power_zone,
return NULL;
}
static int get_max_power(struct powercap_zone *power_zone, int id, u64 *data)
static int get_max_power(struct powercap_zone *power_zone, int cid, u64 *data)
{
struct rapl_domain *rd;
u64 val;
int prim;
int ret = 0;
int id;
cpus_read_lock();
rd = power_zone_to_rapl_domain(power_zone);
switch (rd->rpl[id].prim_id) {
case PL1_ENABLE:
prim = THERMAL_SPEC_POWER;
break;
case PL2_ENABLE:
prim = MAX_POWER;
break;
case PL4_ENABLE:
prim = MAX_POWER;
break;
default:
cpus_read_unlock();
return -EINVAL;
}
if (rapl_read_data_raw(rd, prim, true, &val))
ret = -EIO;
else
id = contraint_to_pl(rd, cid);
ret = rapl_read_pl_data(rd, id, PL_MAX_POWER, true, &val);
if (!ret)
*data = val;
/* As a generalization rule, PL4 would be around two times PL2. */
if (rd->rpl[id].prim_id == PL4_ENABLE)
if (id == POWER_LIMIT4)
*data = *data * 2;
cpus_read_unlock();
......@@ -545,6 +557,12 @@ static const struct powercap_zone_constraint_ops constraint_ops = {
.get_name = get_constraint_name,
};
/* Return the id used for read_raw/write_raw callback */
static int get_rid(struct rapl_package *rp)
{
return rp->lead_cpu >= 0 ? rp->lead_cpu : rp->id;
}
/* called after domain detection and package level data are set */
static void rapl_init_domains(struct rapl_package *rp)
{
......@@ -554,6 +572,7 @@ static void rapl_init_domains(struct rapl_package *rp)
for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
unsigned int mask = rp->domain_map & (1 << i);
int t;
if (!mask)
continue;
......@@ -562,51 +581,26 @@ static void rapl_init_domains(struct rapl_package *rp)
if (i == RAPL_DOMAIN_PLATFORM && rp->id > 0) {
snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "psys-%d",
topology_physical_package_id(rp->lead_cpu));
} else
rp->lead_cpu >= 0 ? topology_physical_package_id(rp->lead_cpu) :
rp->id);
} else {
snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "%s",
rapl_domain_names[i]);
}
rd->id = i;
rd->rpl[0].prim_id = PL1_ENABLE;
rd->rpl[0].name = pl1_name;
/*
* The PL2 power domain is applicable for limits two
* and limits three
*/
if (rp->priv->limits[i] >= 2) {
rd->rpl[1].prim_id = PL2_ENABLE;
rd->rpl[1].name = pl2_name;
}
/* PL1 is supported by default */
rp->priv->limits[i] |= BIT(POWER_LIMIT1);
/* Enable PL4 domain if the total power limits are three */
if (rp->priv->limits[i] == 3) {
rd->rpl[2].prim_id = PL4_ENABLE;
rd->rpl[2].name = pl4_name;
for (t = POWER_LIMIT1; t < NR_POWER_LIMITS; t++) {
if (rp->priv->limits[i] & BIT(t))
rd->rpl[t].name = pl_names[t];
}
for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++)
rd->regs[j] = rp->priv->regs[i][j];
switch (i) {
case RAPL_DOMAIN_DRAM:
rd->domain_energy_unit =
rapl_defaults->dram_domain_energy_unit;
if (rd->domain_energy_unit)
pr_info("DRAM domain energy unit %dpj\n",
rd->domain_energy_unit);
break;
case RAPL_DOMAIN_PLATFORM:
rd->domain_energy_unit =
rapl_defaults->psys_domain_energy_unit;
if (rd->domain_energy_unit)
pr_info("Platform domain energy unit %dpj\n",
rd->domain_energy_unit);
break;
default:
break;
}
rd++;
}
}
......@@ -615,23 +609,19 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
u64 value, int to_raw)
{
u64 units = 1;
struct rapl_package *rp = rd->rp;
struct rapl_defaults *defaults = get_defaults(rd->rp);
u64 scale = 1;
switch (type) {
case POWER_UNIT:
units = rp->power_unit;
units = rd->power_unit;
break;
case ENERGY_UNIT:
scale = ENERGY_UNIT_SCALE;
/* per domain unit takes precedence */
if (rd->domain_energy_unit)
units = rd->domain_energy_unit;
else
units = rp->energy_unit;
units = rd->energy_unit;
break;
case TIME_UNIT:
return rapl_defaults->compute_time_window(rp, value, to_raw);
return defaults->compute_time_window(rd, value, to_raw);
case ARBITRARY_UNIT:
default:
return value;
......@@ -645,67 +635,141 @@ static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
return div64_u64(value, scale);
}
/* in the order of enum rapl_primitives */
static struct rapl_primitive_info rpi[] = {
/* RAPL primitives for MSR and MMIO I/F */
static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
/* name, mask, shift, msr index, unit divisor */
PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0,
[POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0,
RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32,
[POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32,
RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0,
[POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0,
RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31,
[ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
[FW_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15,
[FW_HIGH_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_HIGH_LOCK, 63,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16,
[PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47,
[PL1_CLAMP] = PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
[PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
PRIMITIVE_INFO_INIT(PL4_ENABLE, POWER_LIMIT4_MASK, 0,
[PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
[PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, POWER_LIMIT4_MASK, 0,
RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
[TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
[TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK,
[THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK,
0, RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32,
[MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32,
RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16,
[MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16,
RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48,
[MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48,
RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
[THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0,
[PRIORITY_LEVEL] = PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0,
RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0),
PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0,
[PSYS_POWER_LIMIT1] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0,
RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32,
[PSYS_POWER_LIMIT2] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32,
RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17,
[PSYS_PL1_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49,
[PSYS_PL2_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19,
[PSYS_TIME_WINDOW1] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19,
RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51,
[PSYS_TIME_WINDOW2] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51,
RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
/* non-hardware */
PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT,
[AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT,
RAPL_PRIMITIVE_DERIVED),
{NULL, 0, 0, 0},
};
/* RAPL primitives for TPMI I/F */
static struct rapl_primitive_info rpi_tpmi[NR_RAPL_PRIMITIVES] = {
/* name, mask, shift, msr index, unit divisor */
[POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, TPMI_POWER_LIMIT_MASK, 0,
RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
[POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, TPMI_POWER_LIMIT_MASK, 0,
RAPL_DOMAIN_REG_PL2, POWER_UNIT, 0),
[POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, TPMI_POWER_LIMIT_MASK, 0,
RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
[ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
[PL1_LOCK] = PRIMITIVE_INFO_INIT(PL1_LOCK, POWER_HIGH_LOCK, 63,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
[PL2_LOCK] = PRIMITIVE_INFO_INIT(PL2_LOCK, POWER_HIGH_LOCK, 63,
RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
[PL4_LOCK] = PRIMITIVE_INFO_INIT(PL4_LOCK, POWER_HIGH_LOCK, 63,
RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
[PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
[PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
[PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
[TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TPMI_TIME_WINDOW_MASK, 18,
RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
[TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TPMI_TIME_WINDOW_MASK, 18,
RAPL_DOMAIN_REG_PL2, TIME_UNIT, 0),
[THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, TPMI_INFO_SPEC_MASK, 0,
RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
[MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, TPMI_INFO_MAX_MASK, 36,
RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
[MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, TPMI_INFO_MIN_MASK, 18,
RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
[MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, TPMI_INFO_MAX_TIME_WIN_MASK, 54,
RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
[THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
/* non-hardware */
[AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0,
POWER_UNIT, RAPL_PRIMITIVE_DERIVED),
};
static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim)
{
struct rapl_primitive_info *rpi = rp->priv->rpi;
if (prim < 0 || prim > NR_RAPL_PRIMITIVES || !rpi)
return NULL;
return &rpi[prim];
}
static int rapl_config(struct rapl_package *rp)
{
switch (rp->priv->type) {
/* MMIO I/F shares the same register layout as MSR registers */
case RAPL_IF_MMIO:
case RAPL_IF_MSR:
rp->priv->defaults = (void *)defaults_msr;
rp->priv->rpi = (void *)rpi_msr;
break;
case RAPL_IF_TPMI:
rp->priv->defaults = (void *)&defaults_tpmi;
rp->priv->rpi = (void *)rpi_tpmi;
break;
default:
return -EINVAL;
}
return 0;
}
static enum rapl_primitives
prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim)
{
if (!rapl_defaults->spr_psys_bits)
struct rapl_defaults *defaults = get_defaults(rd->rp);
if (!defaults->spr_psys_bits)
return prim;
if (rd->id != RAPL_DOMAIN_PLATFORM)
......@@ -747,41 +811,33 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
{
u64 value;
enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
struct rapl_primitive_info *rp = &rpi[prim_fixed];
struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed);
struct reg_action ra;
int cpu;
if (!rp->name || rp->flag & RAPL_PRIMITIVE_DUMMY)
if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY)
return -EINVAL;
ra.reg = rd->regs[rp->id];
ra.reg = rd->regs[rpi->id];
if (!ra.reg)
return -EINVAL;
cpu = rd->rp->lead_cpu;
/* domain with 2 limits has different bit */
if (prim == FW_LOCK && rd->rp->priv->limits[rd->id] == 2) {
rp->mask = POWER_HIGH_LOCK;
rp->shift = 63;
}
/* non-hardware data are collected by the polling thread */
if (rp->flag & RAPL_PRIMITIVE_DERIVED) {
if (rpi->flag & RAPL_PRIMITIVE_DERIVED) {
*data = rd->rdd.primitives[prim];
return 0;
}
ra.mask = rp->mask;
ra.mask = rpi->mask;
if (rd->rp->priv->read_raw(cpu, &ra)) {
pr_debug("failed to read reg 0x%llx on cpu %d\n", ra.reg, cpu);
if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg, rd->rp->name, rd->name);
return -EIO;
}
value = ra.value >> rp->shift;
value = ra.value >> rpi->shift;
if (xlate)
*data = rapl_unit_xlate(rd, rp->unit, value, 0);
*data = rapl_unit_xlate(rd, rpi->unit, value, 0);
else
*data = value;
......@@ -794,28 +850,56 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
unsigned long long value)
{
enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
struct rapl_primitive_info *rp = &rpi[prim_fixed];
int cpu;
struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed);
u64 bits;
struct reg_action ra;
int ret;
cpu = rd->rp->lead_cpu;
bits = rapl_unit_xlate(rd, rp->unit, value, 1);
bits <<= rp->shift;
bits &= rp->mask;
if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY)
return -EINVAL;
bits = rapl_unit_xlate(rd, rpi->unit, value, 1);
bits <<= rpi->shift;
bits &= rpi->mask;
memset(&ra, 0, sizeof(ra));
ra.reg = rd->regs[rp->id];
ra.mask = rp->mask;
ra.reg = rd->regs[rpi->id];
ra.mask = rpi->mask;
ra.value = bits;
ret = rd->rp->priv->write_raw(cpu, &ra);
ret = rd->rp->priv->write_raw(get_rid(rd->rp), &ra);
return ret;
}
static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
enum pl_prims pl_prim, bool xlate, u64 *data)
{
enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim);
if (!is_pl_valid(rd, pl))
return -EINVAL;
return rapl_read_data_raw(rd, prim, xlate, data);
}
static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
enum pl_prims pl_prim,
unsigned long long value)
{
enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim);
if (!is_pl_valid(rd, pl))
return -EINVAL;
if (rd->rpl[pl].locked) {
pr_warn("%s:%s:%s locked by BIOS\n", rd->rp->name, rd->name, pl_names[pl]);
return -EACCES;
}
return rapl_write_data_raw(rd, prim, value);
}
/*
* Raw RAPL data stored in MSRs are in certain scales. We need to
* convert them into standard units based on the units reported in
......@@ -827,58 +911,58 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
* power unit : microWatts : Represented in milliWatts by default
* time unit : microseconds: Represented in seconds by default
*/
static int rapl_check_unit_core(struct rapl_package *rp, int cpu)
static int rapl_check_unit_core(struct rapl_domain *rd)
{
struct reg_action ra;
u32 value;
ra.reg = rp->priv->reg_unit;
ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
ra.mask = ~0;
if (rp->priv->read_raw(cpu, &ra)) {
pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n",
rp->priv->reg_unit, cpu);
if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
ra.reg, rd->rp->name, rd->name);
return -ENODEV;
}
value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
rp->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
rp->power_unit = 1000000 / (1 << value);
rd->power_unit = 1000000 / (1 << value);
value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
rp->time_unit = 1000000 / (1 << value);
rd->time_unit = 1000000 / (1 << value);
pr_debug("Core CPU %s energy=%dpJ, time=%dus, power=%duW\n",
rp->name, rp->energy_unit, rp->time_unit, rp->power_unit);
pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
return 0;
}
static int rapl_check_unit_atom(struct rapl_package *rp, int cpu)
static int rapl_check_unit_atom(struct rapl_domain *rd)
{
struct reg_action ra;
u32 value;
ra.reg = rp->priv->reg_unit;
ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
ra.mask = ~0;
if (rp->priv->read_raw(cpu, &ra)) {
pr_err("Failed to read power unit REG 0x%llx on CPU %d, exit.\n",
rp->priv->reg_unit, cpu);
if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
ra.reg, rd->rp->name, rd->name);
return -ENODEV;
}
value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
rp->energy_unit = ENERGY_UNIT_SCALE * 1 << value;
rd->energy_unit = ENERGY_UNIT_SCALE * 1 << value;
value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
rp->power_unit = (1 << value) * 1000;
rd->power_unit = (1 << value) * 1000;
value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
rp->time_unit = 1000000 / (1 << value);
rd->time_unit = 1000000 / (1 << value);
pr_debug("Atom %s energy=%dpJ, time=%dus, power=%duW\n",
rp->name, rp->energy_unit, rp->time_unit, rp->power_unit);
pr_debug("Atom %s:%s energy=%dpJ, time=%dus, power=%duW\n",
rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
return 0;
}
......@@ -910,6 +994,9 @@ static void power_limit_irq_save_cpu(void *info)
static void package_power_limit_irq_save(struct rapl_package *rp)
{
if (rp->lead_cpu < 0)
return;
if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
return;
......@@ -924,6 +1011,9 @@ static void package_power_limit_irq_restore(struct rapl_package *rp)
{
u32 l, h;
if (rp->lead_cpu < 0)
return;
if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
return;
......@@ -943,33 +1033,33 @@ static void package_power_limit_irq_restore(struct rapl_package *rp)
static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
{
int nr_powerlimit = find_nr_power_limit(rd);
int i;
/* always enable clamp such that p-state can go below OS requested
* range. power capping priority over guranteed frequency.
*/
rapl_write_data_raw(rd, PL1_CLAMP, mode);
rapl_write_pl_data(rd, POWER_LIMIT1, PL_CLAMP, mode);
/* some domains have pl2 */
if (nr_powerlimit > 1) {
rapl_write_data_raw(rd, PL2_ENABLE, mode);
rapl_write_data_raw(rd, PL2_CLAMP, mode);
for (i = POWER_LIMIT2; i < NR_POWER_LIMITS; i++) {
rapl_write_pl_data(rd, i, PL_ENABLE, mode);
rapl_write_pl_data(rd, i, PL_CLAMP, mode);
}
}
static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
{
static u32 power_ctrl_orig_val;
struct rapl_defaults *defaults = get_defaults(rd->rp);
u32 mdata;
if (!rapl_defaults->floor_freq_reg_addr) {
if (!defaults->floor_freq_reg_addr) {
pr_err("Invalid floor frequency config register\n");
return;
}
if (!power_ctrl_orig_val)
iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_CR_READ,
rapl_defaults->floor_freq_reg_addr,
defaults->floor_freq_reg_addr,
&power_ctrl_orig_val);
mdata = power_ctrl_orig_val;
if (enable) {
......@@ -977,10 +1067,10 @@ static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
mdata |= 1 << 8;
}
iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_CR_WRITE,
rapl_defaults->floor_freq_reg_addr, mdata);
defaults->floor_freq_reg_addr, mdata);
}
static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
static u64 rapl_compute_time_window_core(struct rapl_domain *rd, u64 value,
bool to_raw)
{
u64 f, y; /* fraction and exp. used for time unit */
......@@ -992,12 +1082,12 @@ static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
if (!to_raw) {
f = (value & 0x60) >> 5;
y = value & 0x1f;
value = (1 << y) * (4 + f) * rp->time_unit / 4;
value = (1 << y) * (4 + f) * rd->time_unit / 4;
} else {
if (value < rp->time_unit)
if (value < rd->time_unit)
return 0;
do_div(value, rp->time_unit);
do_div(value, rd->time_unit);
y = ilog2(value);
/*
......@@ -1013,7 +1103,7 @@ static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
return value;
}
static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value,
static u64 rapl_compute_time_window_atom(struct rapl_domain *rd, u64 value,
bool to_raw)
{
/*
......@@ -1021,13 +1111,56 @@ static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value,
* where time_unit is default to 1 sec. Never 0.
*/
if (!to_raw)
return (value) ? value * rp->time_unit : rp->time_unit;
return (value) ? value * rd->time_unit : rd->time_unit;
value = div64_u64(value, rp->time_unit);
value = div64_u64(value, rd->time_unit);
return value;
}
/* TPMI Unit register has different layout */
#define TPMI_POWER_UNIT_OFFSET POWER_UNIT_OFFSET
#define TPMI_POWER_UNIT_MASK POWER_UNIT_MASK
#define TPMI_ENERGY_UNIT_OFFSET 0x06
#define TPMI_ENERGY_UNIT_MASK 0x7C0
#define TPMI_TIME_UNIT_OFFSET 0x0C
#define TPMI_TIME_UNIT_MASK 0xF000
static int rapl_check_unit_tpmi(struct rapl_domain *rd)
{
struct reg_action ra;
u32 value;
ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
ra.mask = ~0;
if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
ra.reg, rd->rp->name, rd->name);
return -ENODEV;
}
value = (ra.value & TPMI_ENERGY_UNIT_MASK) >> TPMI_ENERGY_UNIT_OFFSET;
rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
value = (ra.value & TPMI_POWER_UNIT_MASK) >> TPMI_POWER_UNIT_OFFSET;
rd->power_unit = 1000000 / (1 << value);
value = (ra.value & TPMI_TIME_UNIT_MASK) >> TPMI_TIME_UNIT_OFFSET;
rd->time_unit = 1000000 / (1 << value);
pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
return 0;
}
static const struct rapl_defaults defaults_tpmi = {
.check_unit = rapl_check_unit_tpmi,
/* Reuse existing logic, ignore the PL_CLAMP failures and enable all Power Limits */
.set_floor_freq = set_floor_freq_default,
.compute_time_window = rapl_compute_time_window_core,
};
static const struct rapl_defaults rapl_defaults_core = {
.floor_freq_reg_addr = 0,
.check_unit = rapl_check_unit_core,
......@@ -1159,8 +1292,10 @@ static void rapl_update_domain_data(struct rapl_package *rp)
rp->domains[dmn].name);
/* exclude non-raw primitives */
for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) {
struct rapl_primitive_info *rpi = get_rpi(rp, prim);
if (!rapl_read_data_raw(&rp->domains[dmn], prim,
rpi[prim].unit, &val))
rpi->unit, &val))
rp->domains[dmn].rdd.primitives[prim] = val;
}
}
......@@ -1239,7 +1374,7 @@ static int rapl_package_register_powercap(struct rapl_package *rp)
return ret;
}
static int rapl_check_domain(int cpu, int domain, struct rapl_package *rp)
static int rapl_check_domain(int domain, struct rapl_package *rp)
{
struct reg_action ra;
......@@ -1260,9 +1395,43 @@ static int rapl_check_domain(int cpu, int domain, struct rapl_package *rp)
*/
ra.mask = ENERGY_STATUS_MASK;
if (rp->priv->read_raw(cpu, &ra) || !ra.value)
if (rp->priv->read_raw(get_rid(rp), &ra) || !ra.value)
return -ENODEV;
return 0;
}
/*
* Get per domain energy/power/time unit.
* RAPL Interfaces without per domain unit register will use the package
* scope unit register to set per domain units.
*/
static int rapl_get_domain_unit(struct rapl_domain *rd)
{
struct rapl_defaults *defaults = get_defaults(rd->rp);
int ret;
if (!rd->regs[RAPL_DOMAIN_REG_UNIT]) {
if (!rd->rp->priv->reg_unit) {
pr_err("No valid Unit register found\n");
return -ENODEV;
}
rd->regs[RAPL_DOMAIN_REG_UNIT] = rd->rp->priv->reg_unit;
}
if (!defaults->check_unit) {
pr_err("missing .check_unit() callback\n");
return -ENODEV;
}
ret = defaults->check_unit(rd);
if (ret)
return ret;
if (rd->id == RAPL_DOMAIN_DRAM && defaults->dram_domain_energy_unit)
rd->energy_unit = defaults->dram_domain_energy_unit;
if (rd->id == RAPL_DOMAIN_PLATFORM && defaults->psys_domain_energy_unit)
rd->energy_unit = defaults->psys_domain_energy_unit;
return 0;
}
......@@ -1280,19 +1449,16 @@ static void rapl_detect_powerlimit(struct rapl_domain *rd)
u64 val64;
int i;
/* check if the domain is locked by BIOS, ignore if MSR doesn't exist */
if (!rapl_read_data_raw(rd, FW_LOCK, false, &val64)) {
if (val64) {
pr_info("RAPL %s domain %s locked by BIOS\n",
rd->rp->name, rd->name);
rd->state |= DOMAIN_STATE_BIOS_LOCKED;
for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
if (!rapl_read_pl_data(rd, i, PL_LOCK, false, &val64)) {
if (val64) {
rd->rpl[i].locked = true;
pr_info("%s:%s:%s locked by BIOS\n",
rd->rp->name, rd->name, pl_names[i]);
}
}
}
/* check if power limit MSR exists, otherwise domain is monitoring only */
for (i = 0; i < NR_POWER_LIMITS; i++) {
int prim = rd->rpl[i].prim_id;
if (rapl_read_data_raw(rd, prim, false, &val64))
if (rapl_read_pl_data(rd, i, PL_ENABLE, false, &val64))
rd->rpl[i].name = NULL;
}
}
......@@ -1300,14 +1466,14 @@ static void rapl_detect_powerlimit(struct rapl_domain *rd)
/* Detect active and valid domains for the given CPU, caller must
* ensure the CPU belongs to the targeted package and CPU hotlug is disabled.
*/
static int rapl_detect_domains(struct rapl_package *rp, int cpu)
static int rapl_detect_domains(struct rapl_package *rp)
{
struct rapl_domain *rd;
int i;
for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
/* use physical package id to read counters */
if (!rapl_check_domain(cpu, i, rp)) {
if (!rapl_check_domain(i, rp)) {
rp->domain_map |= 1 << i;
pr_info("Found RAPL domain %s\n", rapl_domain_names[i]);
}
......@@ -1326,8 +1492,10 @@ static int rapl_detect_domains(struct rapl_package *rp, int cpu)
rapl_init_domains(rp);
for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++)
for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
rapl_get_domain_unit(rd);
rapl_detect_powerlimit(rd);
}
return 0;
}
......@@ -1340,13 +1508,13 @@ void rapl_remove_package(struct rapl_package *rp)
package_power_limit_irq_restore(rp);
for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
rapl_write_data_raw(rd, PL1_ENABLE, 0);
rapl_write_data_raw(rd, PL1_CLAMP, 0);
if (find_nr_power_limit(rd) > 1) {
rapl_write_data_raw(rd, PL2_ENABLE, 0);
rapl_write_data_raw(rd, PL2_CLAMP, 0);
rapl_write_data_raw(rd, PL4_ENABLE, 0);
int i;
for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
rapl_write_pl_data(rd, i, PL_ENABLE, 0);
rapl_write_pl_data(rd, i, PL_CLAMP, 0);
}
if (rd->id == RAPL_DOMAIN_PACKAGE) {
rd_package = rd;
continue;
......@@ -1365,13 +1533,18 @@ void rapl_remove_package(struct rapl_package *rp)
EXPORT_SYMBOL_GPL(rapl_remove_package);
/* caller to ensure CPU hotplug lock is held */
struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv)
struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu)
{
int id = topology_logical_die_id(cpu);
struct rapl_package *rp;
int uid;
if (id_is_cpu)
uid = topology_logical_die_id(id);
else
uid = id;
list_for_each_entry(rp, &rapl_packages, plist) {
if (rp->id == id
if (rp->id == uid
&& rp->priv->control_type == priv->control_type)
return rp;
}
......@@ -1381,34 +1554,37 @@ struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv
EXPORT_SYMBOL_GPL(rapl_find_package_domain);
/* called from CPU hotplug notifier, hotplug lock held */
struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv)
struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu)
{
int id = topology_logical_die_id(cpu);
struct rapl_package *rp;
int ret;
if (!rapl_defaults)
return ERR_PTR(-ENODEV);
rp = kzalloc(sizeof(struct rapl_package), GFP_KERNEL);
if (!rp)
return ERR_PTR(-ENOMEM);
/* add the new package to the list */
rp->id = id;
rp->lead_cpu = cpu;
rp->priv = priv;
if (id_is_cpu) {
rp->id = topology_logical_die_id(id);
rp->lead_cpu = id;
if (topology_max_die_per_package() > 1)
snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d-die-%d",
topology_physical_package_id(id), topology_die_id(id));
else
snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d",
topology_physical_package_id(id));
} else {
rp->id = id;
rp->lead_cpu = -1;
snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", id);
}
if (topology_max_die_per_package() > 1)
snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH,
"package-%d-die-%d",
topology_physical_package_id(cpu), topology_die_id(cpu));
else
snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d",
topology_physical_package_id(cpu));
rp->priv = priv;
ret = rapl_config(rp);
if (ret)
goto err_free_package;
/* check if the package contains valid domains */
if (rapl_detect_domains(rp, cpu) || rapl_defaults->check_unit(rp, cpu)) {
if (rapl_detect_domains(rp)) {
ret = -ENODEV;
goto err_free_package;
}
......@@ -1430,38 +1606,18 @@ static void power_limit_state_save(void)
{
struct rapl_package *rp;
struct rapl_domain *rd;
int nr_pl, ret, i;
int ret, i;
cpus_read_lock();
list_for_each_entry(rp, &rapl_packages, plist) {
if (!rp->power_zone)
continue;
rd = power_zone_to_rapl_domain(rp->power_zone);
nr_pl = find_nr_power_limit(rd);
for (i = 0; i < nr_pl; i++) {
switch (rd->rpl[i].prim_id) {
case PL1_ENABLE:
ret = rapl_read_data_raw(rd,
POWER_LIMIT1, true,
&rd->rpl[i].last_power_limit);
if (ret)
rd->rpl[i].last_power_limit = 0;
break;
case PL2_ENABLE:
ret = rapl_read_data_raw(rd,
POWER_LIMIT2, true,
for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
ret = rapl_read_pl_data(rd, i, PL_LIMIT, true,
&rd->rpl[i].last_power_limit);
if (ret)
rd->rpl[i].last_power_limit = 0;
break;
case PL4_ENABLE:
ret = rapl_read_data_raw(rd,
POWER_LIMIT4, true,
&rd->rpl[i].last_power_limit);
if (ret)
rd->rpl[i].last_power_limit = 0;
break;
}
if (ret)
rd->rpl[i].last_power_limit = 0;
}
}
cpus_read_unlock();
......@@ -1471,33 +1627,17 @@ static void power_limit_state_restore(void)
{
struct rapl_package *rp;
struct rapl_domain *rd;
int nr_pl, i;
int i;
cpus_read_lock();
list_for_each_entry(rp, &rapl_packages, plist) {
if (!rp->power_zone)
continue;
rd = power_zone_to_rapl_domain(rp->power_zone);
nr_pl = find_nr_power_limit(rd);
for (i = 0; i < nr_pl; i++) {
switch (rd->rpl[i].prim_id) {
case PL1_ENABLE:
if (rd->rpl[i].last_power_limit)
rapl_write_data_raw(rd, POWER_LIMIT1,
rd->rpl[i].last_power_limit);
break;
case PL2_ENABLE:
if (rd->rpl[i].last_power_limit)
rapl_write_data_raw(rd, POWER_LIMIT2,
rd->rpl[i].last_power_limit);
break;
case PL4_ENABLE:
if (rd->rpl[i].last_power_limit)
rapl_write_data_raw(rd, POWER_LIMIT4,
rd->rpl[i].last_power_limit);
break;
}
}
for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++)
if (rd->rpl[i].last_power_limit)
rapl_write_pl_data(rd, i, PL_LIMIT,
rd->rpl[i].last_power_limit);
}
cpus_read_unlock();
}
......@@ -1528,32 +1668,25 @@ static int __init rapl_init(void)
int ret;
id = x86_match_cpu(rapl_ids);
if (!id) {
pr_err("driver does not support CPU family %d model %d\n",
boot_cpu_data.x86, boot_cpu_data.x86_model);
return -ENODEV;
}
if (id) {
defaults_msr = (struct rapl_defaults *)id->driver_data;
rapl_defaults = (struct rapl_defaults *)id->driver_data;
ret = register_pm_notifier(&rapl_pm_notifier);
if (ret)
return ret;
rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0);
if (!rapl_msr_platdev)
return -ENOMEM;
rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0);
if (!rapl_msr_platdev) {
ret = -ENOMEM;
goto end;
ret = platform_device_add(rapl_msr_platdev);
if (ret) {
platform_device_put(rapl_msr_platdev);
return ret;
}
}
ret = platform_device_add(rapl_msr_platdev);
if (ret)
ret = register_pm_notifier(&rapl_pm_notifier);
if (ret && rapl_msr_platdev) {
platform_device_del(rapl_msr_platdev);
platform_device_put(rapl_msr_platdev);
end:
if (ret)
unregister_pm_notifier(&rapl_pm_notifier);
}
return ret;
}
......
......@@ -22,7 +22,6 @@
#include <linux/processor.h>
#include <linux/platform_device.h>
#include <asm/iosf_mbi.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
......@@ -34,6 +33,7 @@
static struct rapl_if_priv *rapl_msr_priv;
static struct rapl_if_priv rapl_msr_priv_intel = {
.type = RAPL_IF_MSR,
.reg_unit = MSR_RAPL_POWER_UNIT,
.regs[RAPL_DOMAIN_PACKAGE] = {
MSR_PKG_POWER_LIMIT, MSR_PKG_ENERGY_STATUS, MSR_PKG_PERF_STATUS, 0, MSR_PKG_POWER_INFO },
......@@ -45,11 +45,12 @@ static struct rapl_if_priv rapl_msr_priv_intel = {
MSR_DRAM_POWER_LIMIT, MSR_DRAM_ENERGY_STATUS, MSR_DRAM_PERF_STATUS, 0, MSR_DRAM_POWER_INFO },
.regs[RAPL_DOMAIN_PLATFORM] = {
MSR_PLATFORM_POWER_LIMIT, MSR_PLATFORM_ENERGY_STATUS, 0, 0, 0},
.limits[RAPL_DOMAIN_PACKAGE] = 2,
.limits[RAPL_DOMAIN_PLATFORM] = 2,
.limits[RAPL_DOMAIN_PACKAGE] = BIT(POWER_LIMIT2),
.limits[RAPL_DOMAIN_PLATFORM] = BIT(POWER_LIMIT2),
};
static struct rapl_if_priv rapl_msr_priv_amd = {
.type = RAPL_IF_MSR,
.reg_unit = MSR_AMD_RAPL_POWER_UNIT,
.regs[RAPL_DOMAIN_PACKAGE] = {
0, MSR_AMD_PKG_ENERGY_STATUS, 0, 0, 0 },
......@@ -68,9 +69,9 @@ static int rapl_cpu_online(unsigned int cpu)
{
struct rapl_package *rp;
rp = rapl_find_package_domain(cpu, rapl_msr_priv);
rp = rapl_find_package_domain(cpu, rapl_msr_priv, true);
if (!rp) {
rp = rapl_add_package(cpu, rapl_msr_priv);
rp = rapl_add_package(cpu, rapl_msr_priv, true);
if (IS_ERR(rp))
return PTR_ERR(rp);
}
......@@ -83,7 +84,7 @@ static int rapl_cpu_down_prep(unsigned int cpu)
struct rapl_package *rp;
int lead_cpu;
rp = rapl_find_package_domain(cpu, rapl_msr_priv);
rp = rapl_find_package_domain(cpu, rapl_msr_priv, true);
if (!rp)
return 0;
......@@ -137,14 +138,14 @@ static int rapl_msr_write_raw(int cpu, struct reg_action *ra)
/* List of verified CPUs. */
static const struct x86_cpu_id pl4_support_ids[] = {
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_TIGERLAKE_L, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_L, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ALDERLAKE_N, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_RAPTORLAKE, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_RAPTORLAKE_P, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_METEORLAKE, X86_FEATURE_ANY },
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_METEORLAKE_L, X86_FEATURE_ANY },
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, NULL),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, NULL),
X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, NULL),
X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE, NULL),
X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, NULL),
{}
};
......@@ -169,7 +170,7 @@ static int rapl_msr_probe(struct platform_device *pdev)
rapl_msr_priv->write_raw = rapl_msr_write_raw;
if (id) {
rapl_msr_priv->limits[RAPL_DOMAIN_PACKAGE] = 3;
rapl_msr_priv->limits[RAPL_DOMAIN_PACKAGE] |= BIT(POWER_LIMIT4);
rapl_msr_priv->regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PL4] =
MSR_VR_CURRENT_CONFIG;
pr_info("PL4 support detected.\n");
......
// SPDX-License-Identifier: GPL-2.0-only
/*
* intel_rapl_tpmi: Intel RAPL driver via TPMI interface
*
* Copyright (c) 2023, Intel Corporation.
* All Rights Reserved.
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/auxiliary_bus.h>
#include <linux/io.h>
#include <linux/intel_tpmi.h>
#include <linux/intel_rapl.h>
#include <linux/module.h>
#include <linux/slab.h>
#define TPMI_RAPL_VERSION 1
/* 1 header + 10 registers + 5 reserved. 8 bytes for each. */
#define TPMI_RAPL_DOMAIN_SIZE 128
enum tpmi_rapl_domain_type {
TPMI_RAPL_DOMAIN_INVALID,
TPMI_RAPL_DOMAIN_SYSTEM,
TPMI_RAPL_DOMAIN_PACKAGE,
TPMI_RAPL_DOMAIN_RESERVED,
TPMI_RAPL_DOMAIN_MEMORY,
TPMI_RAPL_DOMAIN_MAX,
};
enum tpmi_rapl_register {
TPMI_RAPL_REG_HEADER,
TPMI_RAPL_REG_UNIT,
TPMI_RAPL_REG_PL1,
TPMI_RAPL_REG_PL2,
TPMI_RAPL_REG_PL3,
TPMI_RAPL_REG_PL4,
TPMI_RAPL_REG_RESERVED,
TPMI_RAPL_REG_ENERGY_STATUS,
TPMI_RAPL_REG_PERF_STATUS,
TPMI_RAPL_REG_POWER_INFO,
TPMI_RAPL_REG_INTERRUPT,
TPMI_RAPL_REG_MAX = 15,
};
struct tpmi_rapl_package {
struct rapl_if_priv priv;
struct intel_tpmi_plat_info *tpmi_info;
struct rapl_package *rp;
void __iomem *base;
struct list_head node;
};
static LIST_HEAD(tpmi_rapl_packages);
static DEFINE_MUTEX(tpmi_rapl_lock);
static struct powercap_control_type *tpmi_control_type;
static int tpmi_rapl_read_raw(int id, struct reg_action *ra)
{
if (!ra->reg)
return -EINVAL;
ra->value = readq((void __iomem *)ra->reg);
ra->value &= ra->mask;
return 0;
}
static int tpmi_rapl_write_raw(int id, struct reg_action *ra)
{
u64 val;
if (!ra->reg)
return -EINVAL;
val = readq((void __iomem *)ra->reg);
val &= ~ra->mask;
val |= ra->value;
writeq(val, (void __iomem *)ra->reg);
return 0;
}
static struct tpmi_rapl_package *trp_alloc(int pkg_id)
{
struct tpmi_rapl_package *trp;
int ret;
mutex_lock(&tpmi_rapl_lock);
if (list_empty(&tpmi_rapl_packages)) {
tpmi_control_type = powercap_register_control_type(NULL, "intel-rapl", NULL);
if (IS_ERR(tpmi_control_type)) {
ret = PTR_ERR(tpmi_control_type);
goto err_unlock;
}
}
trp = kzalloc(sizeof(*trp), GFP_KERNEL);
if (!trp) {
ret = -ENOMEM;
goto err_del_powercap;
}
list_add(&trp->node, &tpmi_rapl_packages);
mutex_unlock(&tpmi_rapl_lock);
return trp;
err_del_powercap:
if (list_empty(&tpmi_rapl_packages))
powercap_unregister_control_type(tpmi_control_type);
err_unlock:
mutex_unlock(&tpmi_rapl_lock);
return ERR_PTR(ret);
}
static void trp_release(struct tpmi_rapl_package *trp)
{
mutex_lock(&tpmi_rapl_lock);
list_del(&trp->node);
if (list_empty(&tpmi_rapl_packages))
powercap_unregister_control_type(tpmi_control_type);
kfree(trp);
mutex_unlock(&tpmi_rapl_lock);
}
static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset)
{
u8 tpmi_domain_version;
enum rapl_domain_type domain_type;
enum tpmi_rapl_domain_type tpmi_domain_type;
enum tpmi_rapl_register reg_index;
enum rapl_domain_reg_id reg_id;
int tpmi_domain_size, tpmi_domain_flags;
u64 *tpmi_rapl_regs = trp->base + offset;
u64 tpmi_domain_header = readq((void __iomem *)tpmi_rapl_regs);
/* Domain Parent bits are ignored for now */
tpmi_domain_version = tpmi_domain_header & 0xff;
tpmi_domain_type = tpmi_domain_header >> 8 & 0xff;
tpmi_domain_size = tpmi_domain_header >> 16 & 0xff;
tpmi_domain_flags = tpmi_domain_header >> 32 & 0xffff;
if (tpmi_domain_version != TPMI_RAPL_VERSION) {
pr_warn(FW_BUG "Unsupported version:%d\n", tpmi_domain_version);
return -ENODEV;
}
/* Domain size: in unit of 128 Bytes */
if (tpmi_domain_size != 1) {
pr_warn(FW_BUG "Invalid Domain size %d\n", tpmi_domain_size);
return -EINVAL;
}
/* Unit register and Energy Status register are mandatory for each domain */
if (!(tpmi_domain_flags & BIT(TPMI_RAPL_REG_UNIT)) ||
!(tpmi_domain_flags & BIT(TPMI_RAPL_REG_ENERGY_STATUS))) {
pr_warn(FW_BUG "Invalid Domain flag 0x%x\n", tpmi_domain_flags);
return -EINVAL;
}
switch (tpmi_domain_type) {
case TPMI_RAPL_DOMAIN_PACKAGE:
domain_type = RAPL_DOMAIN_PACKAGE;
break;
case TPMI_RAPL_DOMAIN_SYSTEM:
domain_type = RAPL_DOMAIN_PLATFORM;
break;
case TPMI_RAPL_DOMAIN_MEMORY:
domain_type = RAPL_DOMAIN_DRAM;
break;
default:
pr_warn(FW_BUG "Unsupported Domain type %d\n", tpmi_domain_type);
return -EINVAL;
}
if (trp->priv.regs[domain_type][RAPL_DOMAIN_REG_UNIT]) {
pr_warn(FW_BUG "Duplicate Domain type %d\n", tpmi_domain_type);
return -EINVAL;
}
reg_index = TPMI_RAPL_REG_HEADER;
while (++reg_index != TPMI_RAPL_REG_MAX) {
if (!(tpmi_domain_flags & BIT(reg_index)))
continue;
switch (reg_index) {
case TPMI_RAPL_REG_UNIT:
reg_id = RAPL_DOMAIN_REG_UNIT;
break;
case TPMI_RAPL_REG_PL1:
reg_id = RAPL_DOMAIN_REG_LIMIT;
trp->priv.limits[domain_type] |= BIT(POWER_LIMIT1);
break;
case TPMI_RAPL_REG_PL2:
reg_id = RAPL_DOMAIN_REG_PL2;
trp->priv.limits[domain_type] |= BIT(POWER_LIMIT2);
break;
case TPMI_RAPL_REG_PL4:
reg_id = RAPL_DOMAIN_REG_PL4;
trp->priv.limits[domain_type] |= BIT(POWER_LIMIT4);
break;
case TPMI_RAPL_REG_ENERGY_STATUS:
reg_id = RAPL_DOMAIN_REG_STATUS;
break;
case TPMI_RAPL_REG_PERF_STATUS:
reg_id = RAPL_DOMAIN_REG_PERF;
break;
case TPMI_RAPL_REG_POWER_INFO:
reg_id = RAPL_DOMAIN_REG_INFO;
break;
default:
continue;
}
trp->priv.regs[domain_type][reg_id] = (u64)&tpmi_rapl_regs[reg_index];
}
return 0;
}
static int intel_rapl_tpmi_probe(struct auxiliary_device *auxdev,
const struct auxiliary_device_id *id)
{
struct tpmi_rapl_package *trp;
struct intel_tpmi_plat_info *info;
struct resource *res;
u32 offset;
int ret;
info = tpmi_get_platform_data(auxdev);
if (!info)
return -ENODEV;
trp = trp_alloc(info->package_id);
if (IS_ERR(trp))
return PTR_ERR(trp);
if (tpmi_get_resource_count(auxdev) > 1) {
dev_err(&auxdev->dev, "does not support multiple resources\n");
ret = -EINVAL;
goto err;
}
res = tpmi_get_resource_at_index(auxdev, 0);
if (!res) {
dev_err(&auxdev->dev, "can't fetch device resource info\n");
ret = -EIO;
goto err;
}
trp->base = devm_ioremap_resource(&auxdev->dev, res);
if (IS_ERR(trp->base)) {
ret = PTR_ERR(trp->base);
goto err;
}
for (offset = 0; offset < resource_size(res); offset += TPMI_RAPL_DOMAIN_SIZE) {
ret = parse_one_domain(trp, offset);
if (ret)
goto err;
}
trp->tpmi_info = info;
trp->priv.type = RAPL_IF_TPMI;
trp->priv.read_raw = tpmi_rapl_read_raw;
trp->priv.write_raw = tpmi_rapl_write_raw;
trp->priv.control_type = tpmi_control_type;
/* RAPL TPMI I/F is per physical package */
trp->rp = rapl_find_package_domain(info->package_id, &trp->priv, false);
if (trp->rp) {
dev_err(&auxdev->dev, "Domain for Package%d already exists\n", info->package_id);
ret = -EEXIST;
goto err;
}
trp->rp = rapl_add_package(info->package_id, &trp->priv, false);
if (IS_ERR(trp->rp)) {
dev_err(&auxdev->dev, "Failed to add RAPL Domain for Package%d, %ld\n",
info->package_id, PTR_ERR(trp->rp));
ret = PTR_ERR(trp->rp);
goto err;
}
auxiliary_set_drvdata(auxdev, trp);
return 0;
err:
trp_release(trp);
return ret;
}
static void intel_rapl_tpmi_remove(struct auxiliary_device *auxdev)
{
struct tpmi_rapl_package *trp = auxiliary_get_drvdata(auxdev);
rapl_remove_package(trp->rp);
trp_release(trp);
}
static const struct auxiliary_device_id intel_rapl_tpmi_ids[] = {
{.name = "intel_vsec.tpmi-rapl" },
{ }
};
MODULE_DEVICE_TABLE(auxiliary, intel_rapl_tpmi_ids);
static struct auxiliary_driver intel_rapl_tpmi_driver = {
.probe = intel_rapl_tpmi_probe,
.remove = intel_rapl_tpmi_remove,
.id_table = intel_rapl_tpmi_ids,
};
module_auxiliary_driver(intel_rapl_tpmi_driver)
MODULE_IMPORT_NS(INTEL_TPMI);
MODULE_DESCRIPTION("Intel RAPL TPMI Driver");
MODULE_LICENSE("GPL");
......@@ -15,8 +15,8 @@ static const struct rapl_mmio_regs rapl_mmio_default = {
.reg_unit = 0x5938,
.regs[RAPL_DOMAIN_PACKAGE] = { 0x59a0, 0x593c, 0x58f0, 0, 0x5930},
.regs[RAPL_DOMAIN_DRAM] = { 0x58e0, 0x58e8, 0x58ec, 0, 0},
.limits[RAPL_DOMAIN_PACKAGE] = 2,
.limits[RAPL_DOMAIN_DRAM] = 2,
.limits[RAPL_DOMAIN_PACKAGE] = BIT(POWER_LIMIT2),
.limits[RAPL_DOMAIN_DRAM] = BIT(POWER_LIMIT2),
};
static int rapl_mmio_cpu_online(unsigned int cpu)
......@@ -27,9 +27,9 @@ static int rapl_mmio_cpu_online(unsigned int cpu)
if (topology_physical_package_id(cpu))
return 0;
rp = rapl_find_package_domain(cpu, &rapl_mmio_priv);
rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true);
if (!rp) {
rp = rapl_add_package(cpu, &rapl_mmio_priv);
rp = rapl_add_package(cpu, &rapl_mmio_priv, true);
if (IS_ERR(rp))
return PTR_ERR(rp);
}
......@@ -42,7 +42,7 @@ static int rapl_mmio_cpu_down_prep(unsigned int cpu)
struct rapl_package *rp;
int lead_cpu;
rp = rapl_find_package_domain(cpu, &rapl_mmio_priv);
rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true);
if (!rp)
return 0;
......@@ -97,6 +97,7 @@ int proc_thermal_rapl_add(struct pci_dev *pdev, struct proc_thermal_device *proc
rapl_regs->regs[domain][reg];
rapl_mmio_priv.limits[domain] = rapl_regs->limits[domain];
}
rapl_mmio_priv.type = RAPL_IF_MMIO;
rapl_mmio_priv.reg_unit = (u64)proc_priv->mmio_base + rapl_regs->reg_unit;
rapl_mmio_priv.read_raw = rapl_mmio_read_raw;
......
......@@ -307,7 +307,8 @@ enum acpi_preferred_pm_profiles {
PM_SOHO_SERVER = 5,
PM_APPLIANCE_PC = 6,
PM_PERFORMANCE_SERVER = 7,
PM_TABLET = 8
PM_TABLET = 8,
NR_PM_PROFILES = 9
};
/* Values for sleep_status and sleep_control registers (V5+ FADT) */
......
......@@ -94,7 +94,8 @@ struct amd_cpudata {
* enum amd_pstate_mode - driver working mode of amd pstate
*/
enum amd_pstate_mode {
AMD_PSTATE_DISABLE = 0,
AMD_PSTATE_UNDEFINED = 0,
AMD_PSTATE_DISABLE,
AMD_PSTATE_PASSIVE,
AMD_PSTATE_ACTIVE,
AMD_PSTATE_GUIDED,
......@@ -102,6 +103,7 @@ enum amd_pstate_mode {
};
static const char * const amd_pstate_mode_string[] = {
[AMD_PSTATE_UNDEFINED] = "undefined",
[AMD_PSTATE_DISABLE] = "disable",
[AMD_PSTATE_PASSIVE] = "passive",
[AMD_PSTATE_ACTIVE] = "active",
......
......@@ -340,7 +340,10 @@ struct cpufreq_driver {
/*
* ->fast_switch() replacement for drivers that use an internal
* representation of performance levels and can pass hints other than
* the target performance level to the hardware.
* the target performance level to the hardware. This can only be set
* if ->fast_switch is set too, because in those cases (under specific
* conditions) scale invariance can be disabled, which causes the
* schedutil governor to fall back to the latter.
*/
void (*adjust_perf)(unsigned int cpu,
unsigned long min_perf,
......
......@@ -108,7 +108,6 @@ struct devfreq_dev_profile {
unsigned long initial_freq;
unsigned int polling_ms;
enum devfreq_timer timer;
bool is_cooling_device;
int (*target)(struct device *dev, unsigned long *freq, u32 flags);
int (*get_dev_status)(struct device *dev,
......@@ -118,6 +117,8 @@ struct devfreq_dev_profile {
unsigned long *freq_table;
unsigned int max_state;
bool is_cooling_device;
};
/**
......
......@@ -14,6 +14,12 @@
#include <linux/powercap.h>
#include <linux/cpuhotplug.h>
enum rapl_if_type {
RAPL_IF_MSR, /* RAPL I/F using MSR registers */
RAPL_IF_MMIO, /* RAPL I/F using MMIO registers */
RAPL_IF_TPMI, /* RAPL I/F using TPMI registers */
};
enum rapl_domain_type {
RAPL_DOMAIN_PACKAGE, /* entire package/socket */
RAPL_DOMAIN_PP0, /* core power plane */
......@@ -30,17 +36,23 @@ enum rapl_domain_reg_id {
RAPL_DOMAIN_REG_POLICY,
RAPL_DOMAIN_REG_INFO,
RAPL_DOMAIN_REG_PL4,
RAPL_DOMAIN_REG_UNIT,
RAPL_DOMAIN_REG_PL2,
RAPL_DOMAIN_REG_MAX,
};
struct rapl_domain;
enum rapl_primitives {
ENERGY_COUNTER,
POWER_LIMIT1,
POWER_LIMIT2,
POWER_LIMIT4,
ENERGY_COUNTER,
FW_LOCK,
FW_HIGH_LOCK,
PL1_LOCK,
PL2_LOCK,
PL4_LOCK,
PL1_ENABLE, /* power limit 1, aka long term */
PL1_CLAMP, /* allow frequency to go below OS request */
......@@ -74,12 +86,13 @@ struct rapl_domain_data {
unsigned long timestamp;
};
#define NR_POWER_LIMITS (3)
#define NR_POWER_LIMITS (POWER_LIMIT4 + 1)
struct rapl_power_limit {
struct powercap_zone_constraint *constraint;
int prim_id; /* primitive ID used to enable */
struct rapl_domain *domain;
const char *name;
bool locked;
u64 last_power_limit;
};
......@@ -96,7 +109,9 @@ struct rapl_domain {
struct rapl_power_limit rpl[NR_POWER_LIMITS];
u64 attr_map; /* track capabilities */
unsigned int state;
unsigned int domain_energy_unit;
unsigned int power_unit;
unsigned int energy_unit;
unsigned int time_unit;
struct rapl_package *rp;
};
......@@ -121,16 +136,20 @@ struct reg_action {
* registers.
* @write_raw: Callback for writing RAPL interface specific
* registers.
* @defaults: internal pointer to interface default settings
* @rpi: internal pointer to interface primitive info
*/
struct rapl_if_priv {
enum rapl_if_type type;
struct powercap_control_type *control_type;
struct rapl_domain *platform_rapl_domain;
enum cpuhp_state pcap_rapl_online;
u64 reg_unit;
u64 regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX];
int limits[RAPL_DOMAIN_MAX];
int (*read_raw)(int cpu, struct reg_action *ra);
int (*write_raw)(int cpu, struct reg_action *ra);
int (*read_raw)(int id, struct reg_action *ra);
int (*write_raw)(int id, struct reg_action *ra);
void *defaults;
void *rpi;
};
/* maximum rapl package domain name: package-%d-die-%d */
......@@ -140,9 +159,6 @@ struct rapl_package {
unsigned int id; /* logical die id, equals physical 1-die systems */
unsigned int nr_domains;
unsigned long domain_map; /* bit map of active domains */
unsigned int power_unit;
unsigned int energy_unit;
unsigned int time_unit;
struct rapl_domain *domains; /* array of domains, sized at runtime */
struct powercap_zone *power_zone; /* keep track of parent zone */
unsigned long power_limit_irq; /* keep track of package power limit
......@@ -156,8 +172,8 @@ struct rapl_package {
struct rapl_if_priv *priv;
};
struct rapl_package *rapl_find_package_domain(int cpu, struct rapl_if_priv *priv);
struct rapl_package *rapl_add_package(int cpu, struct rapl_if_priv *priv);
struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu);
struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu);
void rapl_remove_package(struct rapl_package *rp);
#endif /* __INTEL_RAPL_H__ */
......@@ -202,6 +202,7 @@ struct platform_s2idle_ops {
};
#ifdef CONFIG_SUSPEND
extern suspend_state_t pm_suspend_target_state;
extern suspend_state_t mem_sleep_current;
extern suspend_state_t mem_sleep_default;
......@@ -337,6 +338,8 @@ extern bool sync_on_suspend_enabled;
#else /* !CONFIG_SUSPEND */
#define suspend_valid_only_mem NULL
#define pm_suspend_target_state (PM_SUSPEND_ON)
static inline void pm_suspend_clear_flags(void) {}
static inline void pm_set_suspend_via_firmware(void) {}
static inline void pm_set_resume_via_firmware(void) {}
......@@ -472,6 +475,8 @@ static inline int hibernate_quiet_exec(int (*func)(void *data), void *data) {
}
#endif /* CONFIG_HIBERNATION */
int arch_resume_nosmt(void);
#ifdef CONFIG_HIBERNATION_SNAPSHOT_DEV
int is_hibernate_resume_dev(dev_t dev);
#else
......@@ -507,7 +512,6 @@ extern void pm_report_max_hw_sleep(u64 t);
/* drivers/base/power/wakeup.c */
extern bool events_check_enabled;
extern suspend_state_t pm_suspend_target_state;
extern bool pm_wakeup_pending(void);
extern void pm_system_wakeup(void);
......@@ -555,6 +559,7 @@ static inline void unlock_system_sleep(unsigned int flags) {}
#ifdef CONFIG_PM_SLEEP_DEBUG
extern bool pm_print_times_enabled;
extern bool pm_debug_messages_on;
extern bool pm_debug_messages_should_print(void);
static inline int pm_dyn_debug_messages_on(void)
{
#ifdef CONFIG_DYNAMIC_DEBUG
......@@ -568,14 +573,14 @@ static inline int pm_dyn_debug_messages_on(void)
#endif
#define __pm_pr_dbg(fmt, ...) \
do { \
if (pm_debug_messages_on) \
if (pm_debug_messages_should_print()) \
printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \
else if (pm_dyn_debug_messages_on()) \
pr_debug(fmt, ##__VA_ARGS__); \
} while (0)
#define __pm_deferred_pr_dbg(fmt, ...) \
do { \
if (pm_debug_messages_on) \
if (pm_debug_messages_should_print()) \
printk_deferred(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \
} while (0)
#else
......@@ -593,7 +598,8 @@ static inline int pm_dyn_debug_messages_on(void)
/**
* pm_pr_dbg - print pm sleep debug messages
*
* If pm_debug_messages_on is enabled, print message.
* If pm_debug_messages_on is enabled and the system is entering/leaving
* suspend, print message.
* If pm_debug_messages_on is disabled and CONFIG_DYNAMIC_DEBUG is enabled,
* print message only from instances explicitly enabled on dynamic debug's
* control.
......
......@@ -556,6 +556,12 @@ power_attr_ro(pm_wakeup_irq);
bool pm_debug_messages_on __read_mostly;
bool pm_debug_messages_should_print(void)
{
return pm_debug_messages_on && pm_suspend_target_state != PM_SUSPEND_ON;
}
EXPORT_SYMBOL_GPL(pm_debug_messages_should_print);
static ssize_t pm_debug_messages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
......
......@@ -398,7 +398,7 @@ struct mem_zone_bm_rtree {
unsigned int blocks; /* Number of Bitmap Blocks */
};
/* strcut bm_position is used for browsing memory bitmaps */
/* struct bm_position is used for browsing memory bitmaps */
struct bm_position {
struct mem_zone_bm_rtree *zone;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment