Commit 5c45bf27 authored by Siddha, Suresh B's avatar Siddha, Suresh B Committed by Linus Torvalds

[PATCH] sched: mc/smt power savings sched policy

sysfs entries 'sched_mc_power_savings' and 'sched_smt_power_savings' in
/sys/devices/system/cpu/ control the MC/SMT power savings policy for the
scheduler.

Based on the values (1-enable, 0-disable) for these controls, sched groups
cpu power will be determined for different domains.  When power savings
policy is enabled and under light load conditions, scheduler will minimize
the physical packages/cpu cores carrying the load and thus conserving
power(with a perf impact based on the workload characteristics...  see OLS
2005 CMP kernel scheduler paper for more details..)
Signed-off-by: default avatarSuresh Siddha <suresh.b.siddha@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Con Kolivas <kernel@kolivas.org>
Cc: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 36938169
......@@ -448,10 +448,12 @@ cpumask_t cpu_coregroup_map(int cpu)
struct cpuinfo_x86 *c = cpu_data + cpu;
/*
* For perf, we return last level cache shared map.
* TBD: when power saving sched policy is added, we will return
* cpu_core_map when power saving policy is enabled
* And for power savings, we return cpu_core_map
*/
return c->llc_shared_map;
if (sched_mc_power_savings || sched_smt_power_savings)
return cpu_core_map[cpu];
else
return c->llc_shared_map;
}
/* representing cpus for which sibling maps can be computed */
......
......@@ -455,10 +455,12 @@ cpumask_t cpu_coregroup_map(int cpu)
struct cpuinfo_x86 *c = cpu_data + cpu;
/*
* For perf, we return last level cache shared map.
* TBD: when power saving sched policy is added, we will return
* cpu_core_map when power saving policy is enabled
* And for power savings, we return cpu_core_map
*/
return c->llc_shared_map;
if (sched_mc_power_savings || sched_smt_power_savings)
return cpu_core_map[cpu];
else
return c->llc_shared_map;
}
/* representing cpus for which sibling maps can be computed */
......
......@@ -143,5 +143,13 @@ EXPORT_SYMBOL_GPL(get_cpu_sysdev);
int __init cpu_dev_init(void)
{
return sysdev_class_register(&cpu_sysdev_class);
int err;
err = sysdev_class_register(&cpu_sysdev_class);
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
if (!err)
err = sched_create_sysfs_power_savings_entries(&cpu_sysdev_class);
#endif
return err;
}
......@@ -112,4 +112,9 @@ extern unsigned long node_remap_size[];
extern cpumask_t cpu_coregroup_map(int cpu);
#ifdef CONFIG_SMP
#define mc_capable() (boot_cpu_data.x86_max_cores > 1)
#define smt_capable() (smp_num_siblings > 1)
#endif
#endif /* _ASM_I386_TOPOLOGY_H */
......@@ -112,6 +112,7 @@ void build_cpu_to_node_map(void);
#define topology_core_id(cpu) (cpu_data(cpu)->core_id)
#define topology_core_siblings(cpu) (cpu_core_map[cpu])
#define topology_thread_siblings(cpu) (cpu_sibling_map[cpu])
#define smt_capable() (smp_num_siblings > 1)
#endif
#include <asm-generic/topology.h>
......
......@@ -93,5 +93,10 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev,
#endif /* CONFIG_NUMA */
#ifdef CONFIG_SMP
#include <asm/cputable.h>
#define smt_capable() (cpu_has_feature(CPU_FTR_SMT))
#endif
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_TOPOLOGY_H */
#ifndef _ASM_SPARC64_TOPOLOGY_H
#define _ASM_SPARC64_TOPOLOGY_H
#include <asm/spitfire.h>
#define smt_capable() (tlb_type == hypervisor)
#include <asm-generic/topology.h>
#endif /* _ASM_SPARC64_TOPOLOGY_H */
......@@ -59,6 +59,8 @@ extern int __node_distance(int, int);
#define topology_core_id(cpu) (cpu_data[cpu].cpu_core_id)
#define topology_core_siblings(cpu) (cpu_core_map[cpu])
#define topology_thread_siblings(cpu) (cpu_sibling_map[cpu])
#define mc_capable() (boot_cpu_data.x86_max_cores > 1)
#define smt_capable() (smp_num_siblings > 1)
#endif
#include <asm-generic/topology.h>
......
......@@ -570,6 +570,11 @@ enum idle_type
#define SD_WAKE_AFFINE 32 /* Wake task to waking CPU */
#define SD_WAKE_BALANCE 64 /* Perform balancing at task wakeup */
#define SD_SHARE_CPUPOWER 128 /* Domain members share cpu power */
#define SD_POWERSAVINGS_BALANCE 256 /* Balance for power savings */
#define BALANCE_FOR_POWER ((sched_mc_power_savings || sched_smt_power_savings) \
? SD_POWERSAVINGS_BALANCE : 0)
struct sched_group {
struct sched_group *next; /* Must be a circular list */
......@@ -1412,6 +1417,11 @@ static inline void arch_pick_mmap_layout(struct mm_struct *mm)
extern long sched_setaffinity(pid_t pid, cpumask_t new_mask);
extern long sched_getaffinity(pid_t pid, cpumask_t *mask);
#include <linux/sysdev.h>
extern int sched_mc_power_savings, sched_smt_power_savings;
extern struct sysdev_attribute attr_sched_mc_power_savings, attr_sched_smt_power_savings;
extern int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls);
extern void normalize_rt_tasks(void);
#ifdef CONFIG_PM
......
......@@ -134,7 +134,8 @@
.flags = SD_LOAD_BALANCE \
| SD_BALANCE_NEWIDLE \
| SD_BALANCE_EXEC \
| SD_WAKE_AFFINE, \
| SD_WAKE_AFFINE \
| BALANCE_FOR_POWER, \
.last_balance = jiffies, \
.balance_interval = 1, \
.nr_balance_failed = 0, \
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment