Commit 92c020d0 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:
 "The main scheduler changes in this cycle were:

   - support Intel Turbo Boost Max Technology 3.0 (TBM3) by introducig a
     notion of 'better cores', which the scheduler will prefer to
     schedule single threaded workloads on. (Tim Chen, Srinivas
     Pandruvada)

   - enhance the handling of asymmetric capacity CPUs further (Morten
     Rasmussen)

   - improve/fix load handling when moving tasks between task groups
     (Vincent Guittot)

   - simplify and clean up the cputime code (Stanislaw Gruszka)

   - improve mass fork()ed task spread a.k.a. hackbench speedup (Vincent
     Guittot)

   - make struct kthread kmalloc()ed and related fixes (Oleg Nesterov)

   - add uaccess atomicity debugging (when using access_ok() in the
     wrong context), under CONFIG_DEBUG_ATOMIC_SLEEP=y (Peter Zijlstra)

   - implement various fixes, cleanups and other enhancements (Daniel
     Bristot de Oliveira, Martin Schwidefsky, Rafael J. Wysocki)"

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (41 commits)
  sched/core: Use load_avg for selecting idlest group
  sched/core: Fix find_idlest_group() for fork
  kthread: Don't abuse kthread_create_on_cpu() in __kthread_create_worker()
  kthread: Don't use to_live_kthread() in kthread_[un]park()
  kthread: Don't use to_live_kthread() in kthread_stop()
  Revert "kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function"
  kthread: Make struct kthread kmalloc'ed
  x86/uaccess, sched/preempt: Verify access_ok() context
  sched/x86: Make CONFIG_SCHED_MC_PRIO=y easier to enable
  sched/x86: Change CONFIG_SCHED_ITMT to CONFIG_SCHED_MC_PRIO
  x86/sched: Use #include <linux/mutex.h> instead of #include <asm/mutex.h>
  cpufreq/intel_pstate: Use CPPC to get max performance
  acpi/bus: Set _OSC for diverse core support
  acpi/bus: Enable HWP CPPC objects
  x86/sched: Add SD_ASYM_PACKING flags to x86 ITMT CPU
  x86/sysctl: Add sysctl for ITMT scheduling feature
  x86: Enable Intel Turbo Boost Max Technology 3.0
  x86/topology: Define x86's arch_update_cpu_topology
  sched: Extend scheduler's asym packing
  sched/fair: Clean up the tunable parameter definitions
  ...
parents bca13ce4 6b94780e
......@@ -513,6 +513,9 @@ config HAVE_CONTEXT_TRACKING
config HAVE_VIRT_CPU_ACCOUNTING
bool
config ARCH_HAS_SCALED_CPUTIME
bool
config HAVE_VIRT_CPU_ACCOUNTING_GEN
bool
default y if 64BIT
......
......@@ -68,7 +68,7 @@ void vtime_account_user(struct task_struct *tsk)
if (ti->ac_utime) {
delta_utime = cycle_to_cputime(ti->ac_utime);
account_user_time(tsk, delta_utime, delta_utime);
account_user_time(tsk, delta_utime);
ti->ac_utime = 0;
}
}
......@@ -112,7 +112,7 @@ void vtime_account_system(struct task_struct *tsk)
{
cputime_t delta = vtime_delta(tsk);
account_system_time(tsk, 0, delta, delta);
account_system_time(tsk, 0, delta);
}
EXPORT_SYMBOL_GPL(vtime_account_system);
......
......@@ -160,6 +160,7 @@ config PPC
select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
select GENERIC_CPU_AUTOPROBE
select HAVE_VIRT_CPU_ACCOUNTING
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
select HAVE_ARCH_HARDENED_USERCOPY
select HAVE_KERNEL_GZIP
......
......@@ -46,26 +46,12 @@ extern cputime_t cputime_one_jiffy;
* Convert cputime <-> jiffies
*/
extern u64 __cputime_jiffies_factor;
DECLARE_PER_CPU(unsigned long, cputime_last_delta);
DECLARE_PER_CPU(unsigned long, cputime_scaled_last_delta);
static inline unsigned long cputime_to_jiffies(const cputime_t ct)
{
return mulhdu((__force u64) ct, __cputime_jiffies_factor);
}
/* Estimate the scaled cputime by scaling the real cputime based on
* the last scaled to real ratio */
static inline cputime_t cputime_to_scaled(const cputime_t ct)
{
if (cpu_has_feature(CPU_FTR_SPURR) &&
__this_cpu_read(cputime_last_delta))
return (__force u64) ct *
__this_cpu_read(cputime_scaled_last_delta) /
__this_cpu_read(cputime_last_delta);
return ct;
}
static inline cputime_t jiffies_to_cputime(const unsigned long jif)
{
u64 ct;
......
......@@ -164,8 +164,6 @@ u64 __cputime_sec_factor;
EXPORT_SYMBOL(__cputime_sec_factor);
u64 __cputime_clockt_factor;
EXPORT_SYMBOL(__cputime_clockt_factor);
DEFINE_PER_CPU(unsigned long, cputime_last_delta);
DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta);
cputime_t cputime_one_jiffy;
......@@ -360,7 +358,8 @@ void vtime_account_system(struct task_struct *tsk)
unsigned long delta, sys_scaled, stolen;
delta = vtime_delta(tsk, &sys_scaled, &stolen);
account_system_time(tsk, 0, delta, sys_scaled);
account_system_time(tsk, 0, delta);
tsk->stimescaled += sys_scaled;
if (stolen)
account_steal_time(stolen);
}
......@@ -393,7 +392,8 @@ void vtime_account_user(struct task_struct *tsk)
acct->user_time = 0;
acct->user_time_scaled = 0;
acct->utime_sspurr = 0;
account_user_time(tsk, utime, utimescaled);
account_user_time(tsk, utime);
tsk->utimescaled += utimescaled;
}
#ifdef CONFIG_PPC32
......
......@@ -171,6 +171,7 @@ config S390
select SYSCTL_EXCEPTION_TRACE
select TTY
select VIRT_CPU_ACCOUNTING
select ARCH_HAS_SCALED_CPUTIME
select VIRT_TO_BUS
select HAVE_NMI
......
......@@ -137,8 +137,10 @@ static int do_account_vtime(struct task_struct *tsk, int hardirq_offset)
user_scaled = (user_scaled * mult) / div;
system_scaled = (system_scaled * mult) / div;
}
account_user_time(tsk, user, user_scaled);
account_system_time(tsk, hardirq_offset, system, system_scaled);
account_user_time(tsk, user);
tsk->utimescaled += user_scaled;
account_system_time(tsk, hardirq_offset, system);
tsk->stimescaled += system_scaled;
steal = S390_lowcore.steal_timer;
if ((s64) steal > 0) {
......@@ -202,7 +204,8 @@ void vtime_account_irq_enter(struct task_struct *tsk)
system_scaled = (system_scaled * mult) / div;
}
account_system_time(tsk, 0, system, system_scaled);
account_system_time(tsk, 0, system);
tsk->stimescaled += system_scaled;
virt_timer_forward(system);
}
......
......@@ -939,6 +939,27 @@ config SCHED_MC
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.
config SCHED_MC_PRIO
bool "CPU core priorities scheduler support"
depends on SCHED_MC && CPU_SUP_INTEL
select X86_INTEL_PSTATE
select CPU_FREQ
default y
---help---
Intel Turbo Boost Max Technology 3.0 enabled CPUs have a
core ordering determined at manufacturing time, which allows
certain cores to reach higher turbo frequencies (when running
single threaded workloads) than others.
Enabling this kernel feature teaches the scheduler about
the TBM3 (aka ITMT) priority order of the CPU cores and adjusts the
scheduler's CPU selection logic accordingly, so that higher
overall system performance can be achieved.
This feature will have no effect on CPUs without this feature.
If unsure say Y here.
source "kernel/Kconfig.preempt"
config UP_LATE_INIT
......
......@@ -24,7 +24,13 @@ static __always_inline int preempt_count(void)
static __always_inline void preempt_count_set(int pc)
{
raw_cpu_write_4(__preempt_count, pc);
int old, new;
do {
old = raw_cpu_read_4(__preempt_count);
new = (old & PREEMPT_NEED_RESCHED) |
(pc & ~PREEMPT_NEED_RESCHED);
} while (raw_cpu_cmpxchg_4(__preempt_count, old, new) != old);
}
/*
......
......@@ -146,4 +146,36 @@ struct pci_bus;
int x86_pci_root_bus_node(int bus);
void x86_pci_root_bus_resources(int bus, struct list_head *resources);
extern bool x86_topology_update;
#ifdef CONFIG_SCHED_MC_PRIO
#include <asm/percpu.h>
DECLARE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
extern unsigned int __read_mostly sysctl_sched_itmt_enabled;
/* Interface to set priority of a cpu */
void sched_set_itmt_core_prio(int prio, int core_cpu);
/* Interface to notify scheduler that system supports ITMT */
int sched_set_itmt_support(void);
/* Interface to notify scheduler that system revokes ITMT support */
void sched_clear_itmt_support(void);
#else /* CONFIG_SCHED_MC_PRIO */
#define sysctl_sched_itmt_enabled 0
static inline void sched_set_itmt_core_prio(int prio, int core_cpu)
{
}
static inline int sched_set_itmt_support(void)
{
return 0;
}
static inline void sched_clear_itmt_support(void)
{
}
#endif /* CONFIG_SCHED_MC_PRIO */
#endif /* _ASM_X86_TOPOLOGY_H */
......@@ -68,6 +68,12 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
__chk_range_not_ok((unsigned long __force)(addr), size, limit); \
})
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
# define WARN_ON_IN_IRQ() WARN_ON_ONCE(!in_task())
#else
# define WARN_ON_IN_IRQ()
#endif
/**
* access_ok: - Checks if a user space pointer is valid
* @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that
......@@ -88,8 +94,11 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
* checks that the pointer is in the user space range - after calling
* this function, memory access functions may still return -EFAULT.
*/
#define access_ok(type, addr, size) \
likely(!__range_not_ok(addr, size, user_addr_max()))
#define access_ok(type, addr, size) \
({ \
WARN_ON_IN_IRQ(); \
likely(!__range_not_ok(addr, size, user_addr_max())); \
})
/*
* These are the main single-value transfer routines. They automatically
......
......@@ -123,6 +123,7 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
obj-$(CONFIG_TRACING) += tracepoint.o
obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
ifdef CONFIG_FRAME_POINTER
obj-y += unwind_frame.o
......
......@@ -906,14 +906,14 @@ static int apm_cpu_idle(struct cpuidle_device *dev,
static int use_apm_idle; /* = 0 */
static unsigned int last_jiffies; /* = 0 */
static unsigned int last_stime; /* = 0 */
cputime_t stime;
cputime_t stime, utime;
int apm_idle_done = 0;
unsigned int jiffies_since_last_check = jiffies - last_jiffies;
unsigned int bucket;
recalc:
task_cputime(current, NULL, &stime);
task_cputime(current, &utime, &stime);
if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
use_apm_idle = 0;
} else if (jiffies_since_last_check > idle_period) {
......
/*
* itmt.c: Support Intel Turbo Boost Max Technology 3.0
*
* (C) Copyright 2016 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*
* On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
* the maximum turbo frequencies of some cores in a CPU package may be
* higher than for the other cores in the same package. In that case,
* better performance can be achieved by making the scheduler prefer
* to run tasks on the CPUs with higher max turbo frequencies.
*
* This file provides functions and data structures for enabling the
* scheduler to favor scheduling on cores can be boosted to a higher
* frequency under ITMT.
*/
#include <linux/sched.h>
#include <linux/cpumask.h>
#include <linux/cpuset.h>
#include <linux/mutex.h>
#include <linux/sched.h>
#include <linux/sysctl.h>
#include <linux/nodemask.h>
static DEFINE_MUTEX(itmt_update_mutex);
DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
/* Boolean to track if system has ITMT capabilities */
static bool __read_mostly sched_itmt_capable;
/*
* Boolean to control whether we want to move processes to cpu capable
* of higher turbo frequency for cpus supporting Intel Turbo Boost Max
* Technology 3.0.
*
* It can be set via /proc/sys/kernel/sched_itmt_enabled
*/
unsigned int __read_mostly sysctl_sched_itmt_enabled;
static int sched_itmt_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
loff_t *ppos)
{
unsigned int old_sysctl;
int ret;
mutex_lock(&itmt_update_mutex);
if (!sched_itmt_capable) {
mutex_unlock(&itmt_update_mutex);
return -EINVAL;
}
old_sysctl = sysctl_sched_itmt_enabled;
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
x86_topology_update = true;
rebuild_sched_domains();
}
mutex_unlock(&itmt_update_mutex);
return ret;
}
static unsigned int zero;
static unsigned int one = 1;
static struct ctl_table itmt_kern_table[] = {
{
.procname = "sched_itmt_enabled",
.data = &sysctl_sched_itmt_enabled,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_itmt_update_handler,
.extra1 = &zero,
.extra2 = &one,
},
{}
};
static struct ctl_table itmt_root_table[] = {
{
.procname = "kernel",
.mode = 0555,
.child = itmt_kern_table,
},
{}
};
static struct ctl_table_header *itmt_sysctl_header;
/**
* sched_set_itmt_support() - Indicate platform supports ITMT
*
* This function is used by the OS to indicate to scheduler that the platform
* is capable of supporting the ITMT feature.
*
* The current scheme has the pstate driver detects if the system
* is ITMT capable and call sched_set_itmt_support.
*
* This must be done only after sched_set_itmt_core_prio
* has been called to set the cpus' priorities.
* It must not be called with cpu hot plug lock
* held as we need to acquire the lock to rebuild sched domains
* later.
*
* Return: 0 on success
*/
int sched_set_itmt_support(void)
{
mutex_lock(&itmt_update_mutex);
if (sched_itmt_capable) {
mutex_unlock(&itmt_update_mutex);
return 0;
}
itmt_sysctl_header = register_sysctl_table(itmt_root_table);
if (!itmt_sysctl_header) {
mutex_unlock(&itmt_update_mutex);
return -ENOMEM;
}
sched_itmt_capable = true;
sysctl_sched_itmt_enabled = 1;
if (sysctl_sched_itmt_enabled) {
x86_topology_update = true;
rebuild_sched_domains();
}
mutex_unlock(&itmt_update_mutex);
return 0;
}
/**
* sched_clear_itmt_support() - Revoke platform's support of ITMT
*
* This function is used by the OS to indicate that it has
* revoked the platform's support of ITMT feature.
*
* It must not be called with cpu hot plug lock
* held as we need to acquire the lock to rebuild sched domains
* later.
*/
void sched_clear_itmt_support(void)
{
mutex_lock(&itmt_update_mutex);
if (!sched_itmt_capable) {
mutex_unlock(&itmt_update_mutex);
return;
}
sched_itmt_capable = false;
if (itmt_sysctl_header) {
unregister_sysctl_table(itmt_sysctl_header);
itmt_sysctl_header = NULL;
}
if (sysctl_sched_itmt_enabled) {
/* disable sched_itmt if we are no longer ITMT capable */
sysctl_sched_itmt_enabled = 0;
x86_topology_update = true;
rebuild_sched_domains();
}
mutex_unlock(&itmt_update_mutex);
}
int arch_asym_cpu_priority(int cpu)
{
return per_cpu(sched_core_priority, cpu);
}
/**
* sched_set_itmt_core_prio() - Set CPU priority based on ITMT
* @prio: Priority of cpu core
* @core_cpu: The cpu number associated with the core
*
* The pstate driver will find out the max boost frequency
* and call this function to set a priority proportional
* to the max boost frequency. CPU with higher boost
* frequency will receive higher priority.
*
* No need to rebuild sched domain after updating
* the CPU priorities. The sched domains have no
* dependency on CPU priorities.
*/
void sched_set_itmt_core_prio(int prio, int core_cpu)
{
int cpu, i = 1;
for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
int smt_prio;
/*
* Ensure that the siblings are moved to the end
* of the priority chain and only used when
* all other high priority cpus are out of capacity.
*/
smt_prio = prio * smp_num_siblings / i;
per_cpu(sched_core_priority, cpu) = smt_prio;
i++;
}
}
......@@ -109,6 +109,17 @@ static bool logical_packages_frozen __read_mostly;
/* Maximum number of SMT threads on any online core */
int __max_smt_threads __read_mostly;
/* Flag to indicate if a complete sched domain rebuild is required */
bool x86_topology_update;
int arch_update_cpu_topology(void)
{
int retval = x86_topology_update;
x86_topology_update = false;
return retval;
}
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
unsigned long flags;
......@@ -471,22 +482,42 @@ static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
return false;
}
#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_MC)
static inline int x86_sched_itmt_flags(void)
{
return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0;
}
#ifdef CONFIG_SCHED_MC
static int x86_core_flags(void)
{
return cpu_core_flags() | x86_sched_itmt_flags();
}
#endif
#ifdef CONFIG_SCHED_SMT
static int x86_smt_flags(void)
{
return cpu_smt_flags() | x86_sched_itmt_flags();
}
#endif
#endif
static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
#ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
{ cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
#endif
#ifdef CONFIG_SCHED_MC
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
{ cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
#endif
{ NULL, },
};
static struct sched_domain_topology_level x86_topology[] = {
#ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
{ cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
#endif
#ifdef CONFIG_SCHED_MC
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
{ cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
#endif
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
{ NULL, },
......
......@@ -331,6 +331,16 @@ static void acpi_bus_osc_support(void)
capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_HOTPLUG_OST_SUPPORT;
capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_PCLPI_SUPPORT;
#ifdef CONFIG_X86
if (boot_cpu_has(X86_FEATURE_HWP)) {
capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_SUPPORT;
capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPCV2_SUPPORT;
}
#endif
if (IS_ENABLED(CONFIG_SCHED_MC_PRIO))
capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_CPC_DIVERSE_HIGH_SUPPORT;
if (!ghes_disable)
capbuf[OSC_SUPPORT_DWORD] |= OSC_SB_APEI_SUPPORT;
if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)))
......
......@@ -6,6 +6,7 @@ config X86_INTEL_PSTATE
bool "Intel P state control"
depends on X86
select ACPI_PROCESSOR if ACPI
select ACPI_CPPC_LIB if X86_64 && ACPI && SCHED_MC_PRIO
help
This driver provides a P state for Intel core processors.
The driver implements an internal governor and will become
......
......@@ -44,6 +44,7 @@
#ifdef CONFIG_ACPI
#include <acpi/processor.h>
#include <acpi/cppc_acpi.h>
#endif
#define FRAC_BITS 8
......@@ -379,14 +380,67 @@ static bool intel_pstate_get_ppc_enable_status(void)
return acpi_ppc;
}
#ifdef CONFIG_ACPI_CPPC_LIB
/* The work item is needed to avoid CPU hotplug locking issues */
static void intel_pstste_sched_itmt_work_fn(struct work_struct *work)
{
sched_set_itmt_support();
}
static DECLARE_WORK(sched_itmt_work, intel_pstste_sched_itmt_work_fn);
static void intel_pstate_set_itmt_prio(int cpu)
{
struct cppc_perf_caps cppc_perf;
static u32 max_highest_perf = 0, min_highest_perf = U32_MAX;
int ret;
ret = cppc_get_perf_caps(cpu, &cppc_perf);
if (ret)
return;
/*
* The priorities can be set regardless of whether or not
* sched_set_itmt_support(true) has been called and it is valid to
* update them at any time after it has been called.
*/
sched_set_itmt_core_prio(cppc_perf.highest_perf, cpu);
if (max_highest_perf <= min_highest_perf) {
if (cppc_perf.highest_perf > max_highest_perf)
max_highest_perf = cppc_perf.highest_perf;
if (cppc_perf.highest_perf < min_highest_perf)
min_highest_perf = cppc_perf.highest_perf;
if (max_highest_perf > min_highest_perf) {
/*
* This code can be run during CPU online under the
* CPU hotplug locks, so sched_set_itmt_support()
* cannot be called from here. Queue up a work item
* to invoke it.
*/
schedule_work(&sched_itmt_work);
}
}
}
#else
static void intel_pstate_set_itmt_prio(int cpu)
{
}
#endif
static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
{
struct cpudata *cpu;
int ret;
int i;
if (hwp_active)
if (hwp_active) {
intel_pstate_set_itmt_prio(policy->cpu);
return;
}
if (!intel_pstate_get_ppc_enable_status())
return;
......
......@@ -7,7 +7,6 @@ typedef unsigned long __nocast cputime_t;
#define cputime_one_jiffy jiffies_to_cputime(1)
#define cputime_to_jiffies(__ct) (__force unsigned long)(__ct)
#define cputime_to_scaled(__ct) (__ct)
#define jiffies_to_cputime(__hz) (__force cputime_t)(__hz)
typedef u64 __nocast cputime64_t;
......
......@@ -34,7 +34,6 @@ typedef u64 __nocast cputime64_t;
*/
#define cputime_to_jiffies(__ct) \
cputime_div(__ct, NSEC_PER_SEC / HZ)
#define cputime_to_scaled(__ct) (__ct)
#define jiffies_to_cputime(__jif) \
(__force cputime_t)((__jif) * (NSEC_PER_SEC / HZ))
#define cputime64_to_jiffies64(__ct) \
......
......@@ -469,6 +469,7 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context);
#define OSC_SB_CPCV2_SUPPORT 0x00000040
#define OSC_SB_PCLPI_SUPPORT 0x00000080
#define OSC_SB_OSLPI_SUPPORT 0x00000100
#define OSC_SB_CPC_DIVERSE_HIGH_SUPPORT 0x00001000
extern bool osc_sb_apei_support_acked;
extern bool osc_pc_lpi_support_confirmed;
......
......@@ -78,8 +78,8 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
return kstat_cpu(cpu).irqs_sum;
}
extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
extern void account_user_time(struct task_struct *, cputime_t);
extern void account_system_time(struct task_struct *, int, cputime_t);
extern void account_steal_time(cputime_t);
extern void account_idle_time(cputime_t);
......
......@@ -48,6 +48,7 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
__k; \
})
void free_kthread_struct(struct task_struct *k);
void kthread_bind(struct task_struct *k, unsigned int cpu);
void kthread_bind_mask(struct task_struct *k, const struct cpumask *mask);
int kthread_stop(struct task_struct *k);
......
......@@ -65,19 +65,24 @@
/*
* Are we doing bottom half or hardware interrupt processing?
* Are we in a softirq context? Interrupt context?
* in_softirq - Are we currently processing softirq or have bh disabled?
* in_serving_softirq - Are we currently processing softirq?
*
* in_irq() - We're in (hard) IRQ context
* in_softirq() - We have BH disabled, or are processing softirqs
* in_interrupt() - We're in NMI,IRQ,SoftIRQ context or have BH disabled
* in_serving_softirq() - We're in softirq context
* in_nmi() - We're in NMI context
* in_task() - We're in task context
*
* Note: due to the BH disabled confusion: in_softirq(),in_interrupt() really
* should not be used in new code.
*/
#define in_irq() (hardirq_count())
#define in_softirq() (softirq_count())
#define in_interrupt() (irq_count())
#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
/*
* Are we in NMI context?
*/
#define in_nmi() (preempt_count() & NMI_MASK)
#define in_nmi() (preempt_count() & NMI_MASK)
#define in_task() (!(preempt_count() & \
(NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)))
/*
* The preempt_count offset after preempt_disable();
......
......@@ -262,20 +262,9 @@ extern char ___assert_task_state[1 - 2*!!(
#define set_task_state(tsk, state_value) \
do { \
(tsk)->task_state_change = _THIS_IP_; \
smp_store_mb((tsk)->state, (state_value)); \
smp_store_mb((tsk)->state, (state_value)); \
} while (0)
/*
* set_current_state() includes a barrier so that the write of current->state
* is correctly serialised wrt the caller's subsequent test of whether to
* actually sleep:
*
* set_current_state(TASK_UNINTERRUPTIBLE);
* if (do_i_need_to_sleep())
* schedule();
*
* If the caller does not need such serialisation then use __set_current_state()
*/
#define __set_current_state(state_value) \
do { \
current->task_state_change = _THIS_IP_; \
......@@ -284,11 +273,19 @@ extern char ___assert_task_state[1 - 2*!!(
#define set_current_state(state_value) \
do { \
current->task_state_change = _THIS_IP_; \
smp_store_mb(current->state, (state_value)); \
smp_store_mb(current->state, (state_value)); \
} while (0)
#else
/*
* @tsk had better be current, or you get to keep the pieces.
*
* The only reason is that computing current can be more expensive than
* using a pointer that's already available.
*
* Therefore, see set_current_state().
*/
#define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0)
#define set_task_state(tsk, state_value) \
......@@ -299,11 +296,34 @@ extern char ___assert_task_state[1 - 2*!!(
* is correctly serialised wrt the caller's subsequent test of whether to
* actually sleep:
*
* for (;;) {
* set_current_state(TASK_UNINTERRUPTIBLE);
* if (do_i_need_to_sleep())
* schedule();
* if (!need_sleep)
* break;
*
* schedule();
* }
* __set_current_state(TASK_RUNNING);
*
* If the caller does not need such serialisation (because, for instance, the
* condition test and condition change and wakeup are under the same lock) then
* use __set_current_state().
*
* The above is typically ordered against the wakeup, which does:
*
* need_sleep = false;
* wake_up_state(p, TASK_UNINTERRUPTIBLE);
*
* Where wake_up_state() (and all other wakeup primitives) imply enough
* barriers to order the store of the variable against wakeup.
*
* Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
* once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
* TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
*
* If the caller does not need such serialisation then use __set_current_state()
* This is obviously fine, since they both store the exact same value.
*
* Also see the comments of try_to_wake_up().
*/
#define __set_current_state(state_value) \
do { current->state = (state_value); } while (0)
......@@ -1057,6 +1077,8 @@ static inline int cpu_numa_flags(void)
}
#endif
extern int arch_asym_cpu_priority(int cpu);
struct sched_domain_attr {
int relax_domain_level;
};
......@@ -1627,7 +1649,10 @@ struct task_struct {
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
cputime_t utime, stime, utimescaled, stimescaled;
cputime_t utime, stime;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
cputime_t utimescaled, stimescaled;
#endif
cputime_t gtime;
struct prev_cputime prev_cputime;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
......@@ -2220,34 +2245,38 @@ struct task_struct *try_get_task_struct(struct task_struct **ptask);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern void task_cputime(struct task_struct *t,
cputime_t *utime, cputime_t *stime);
extern void task_cputime_scaled(struct task_struct *t,
cputime_t *utimescaled, cputime_t *stimescaled);
extern cputime_t task_gtime(struct task_struct *t);
#else
static inline void task_cputime(struct task_struct *t,
cputime_t *utime, cputime_t *stime)
{
if (utime)
*utime = t->utime;
if (stime)
*stime = t->stime;
*utime = t->utime;
*stime = t->stime;
}
static inline cputime_t task_gtime(struct task_struct *t)
{
return t->gtime;
}
#endif
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
static inline void task_cputime_scaled(struct task_struct *t,
cputime_t *utimescaled,
cputime_t *stimescaled)
{
if (utimescaled)
*utimescaled = t->utimescaled;
if (stimescaled)
*stimescaled = t->stimescaled;
*utimescaled = t->utimescaled;
*stimescaled = t->stimescaled;
}
static inline cputime_t task_gtime(struct task_struct *t)
#else
static inline void task_cputime_scaled(struct task_struct *t,
cputime_t *utimescaled,
cputime_t *stimescaled)
{
return t->gtime;
task_cputime(t, utimescaled, stimescaled);
}
#endif
extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
......
......@@ -36,7 +36,6 @@ extern unsigned int sysctl_numa_balancing_scan_size;
extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
extern unsigned int sysctl_sched_time_avg;
extern unsigned int sysctl_sched_shares_window;
int sched_proc_update_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length,
......
......@@ -354,6 +354,8 @@ void free_task(struct task_struct *tsk)
ftrace_graph_exit_task(tsk);
put_seccomp_filter(tsk);
arch_release_task_struct(tsk);
if (tsk->flags & PF_KTHREAD)
free_kthread_struct(tsk);
free_task_struct(tsk);
}
EXPORT_SYMBOL(free_task);
......@@ -1551,7 +1553,9 @@ static __latent_entropy struct task_struct *copy_process(
init_sigpending(&p->pending);
p->utime = p->stime = p->gtime = 0;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
p->utimescaled = p->stimescaled = 0;
#endif
prev_cputime_init(&p->prev_cputime);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
......
......@@ -53,20 +53,29 @@ enum KTHREAD_BITS {
KTHREAD_IS_PARKED,
};
#define __to_kthread(vfork) \
container_of(vfork, struct kthread, exited)
static inline void set_kthread_struct(void *kthread)
{
/*
* We abuse ->set_child_tid to avoid the new member and because it
* can't be wrongly copied by copy_process(). We also rely on fact
* that the caller can't exec, so PF_KTHREAD can't be cleared.
*/
current->set_child_tid = (__force void __user *)kthread;
}
static inline struct kthread *to_kthread(struct task_struct *k)
{
return __to_kthread(k->vfork_done);
WARN_ON(!(k->flags & PF_KTHREAD));
return (__force void *)k->set_child_tid;
}
static struct kthread *to_live_kthread(struct task_struct *k)
void free_kthread_struct(struct task_struct *k)
{
struct completion *vfork = ACCESS_ONCE(k->vfork_done);
if (likely(vfork) && try_get_task_stack(k))
return __to_kthread(vfork);
return NULL;
/*
* Can be NULL if this kthread was created by kernel_thread()
* or if kmalloc() in kthread() failed.
*/
kfree(to_kthread(k));
}
/**
......@@ -181,14 +190,11 @@ static int kthread(void *_create)
int (*threadfn)(void *data) = create->threadfn;
void *data = create->data;
struct completion *done;
struct kthread self;
struct kthread *self;
int ret;
self.flags = 0;
self.data = data;
init_completion(&self.exited);
init_completion(&self.parked);
current->vfork_done = &self.exited;
self = kmalloc(sizeof(*self), GFP_KERNEL);
set_kthread_struct(self);
/* If user was SIGKILLed, I release the structure. */
done = xchg(&create->done, NULL);
......@@ -196,6 +202,19 @@ static int kthread(void *_create)
kfree(create);
do_exit(-EINTR);
}
if (!self) {
create->result = ERR_PTR(-ENOMEM);
complete(done);
do_exit(-ENOMEM);
}
self->flags = 0;
self->data = data;
init_completion(&self->exited);
init_completion(&self->parked);
current->vfork_done = &self->exited;
/* OK, tell user we're spawned, wait for stop or wakeup */
__set_current_state(TASK_UNINTERRUPTIBLE);
create->result = current;
......@@ -203,12 +222,10 @@ static int kthread(void *_create)
schedule();
ret = -EINTR;
if (!test_bit(KTHREAD_SHOULD_STOP, &self.flags)) {
__kthread_parkme(&self);
if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
__kthread_parkme(self);
ret = threadfn(data);
}
/* we can't just return, we must preserve "self" on stack */
do_exit(ret);
}
......@@ -409,8 +426,18 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
return p;
}
static void __kthread_unpark(struct task_struct *k, struct kthread *kthread)
/**
* kthread_unpark - unpark a thread created by kthread_create().
* @k: thread created by kthread_create().
*
* Sets kthread_should_park() for @k to return false, wakes it, and
* waits for it to return. If the thread is marked percpu then its
* bound to the cpu again.
*/
void kthread_unpark(struct task_struct *k)
{
struct kthread *kthread = to_kthread(k);
clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
/*
* We clear the IS_PARKED bit here as we don't wait
......@@ -428,24 +455,6 @@ static void __kthread_unpark(struct task_struct *k, struct kthread *kthread)
wake_up_state(k, TASK_PARKED);
}
}
/**
* kthread_unpark - unpark a thread created by kthread_create().
* @k: thread created by kthread_create().
*
* Sets kthread_should_park() for @k to return false, wakes it, and
* waits for it to return. If the thread is marked percpu then its
* bound to the cpu again.
*/
void kthread_unpark(struct task_struct *k)
{
struct kthread *kthread = to_live_kthread(k);
if (kthread) {
__kthread_unpark(k, kthread);
put_task_stack(k);
}
}
EXPORT_SYMBOL_GPL(kthread_unpark);
/**
......@@ -462,21 +471,20 @@ EXPORT_SYMBOL_GPL(kthread_unpark);
*/
int kthread_park(struct task_struct *k)
{
struct kthread *kthread = to_live_kthread(k);
int ret = -ENOSYS;
if (kthread) {
if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
if (k != current) {
wake_up_process(k);
wait_for_completion(&kthread->parked);
}
struct kthread *kthread = to_kthread(k);
if (WARN_ON(k->flags & PF_EXITING))
return -ENOSYS;
if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) {
set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
if (k != current) {
wake_up_process(k);
wait_for_completion(&kthread->parked);
}
put_task_stack(k);
ret = 0;
}
return ret;
return 0;
}
EXPORT_SYMBOL_GPL(kthread_park);
......@@ -503,14 +511,11 @@ int kthread_stop(struct task_struct *k)
trace_sched_kthread_stop(k);
get_task_struct(k);
kthread = to_live_kthread(k);
if (kthread) {
set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
__kthread_unpark(k, kthread);
wake_up_process(k);
wait_for_completion(&kthread->exited);
put_task_stack(k);
}
kthread = to_kthread(k);
set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
kthread_unpark(k);
wake_up_process(k);
wait_for_completion(&kthread->exited);
ret = k->exit_code;
put_task_struct(k);
......@@ -636,6 +641,7 @@ __kthread_create_worker(int cpu, unsigned int flags,
{
struct kthread_worker *worker;
struct task_struct *task;
int node = -1;
worker = kzalloc(sizeof(*worker), GFP_KERNEL);
if (!worker)
......@@ -643,25 +649,17 @@ __kthread_create_worker(int cpu, unsigned int flags,
kthread_init_worker(worker);
if (cpu >= 0) {
char name[TASK_COMM_LEN];
/*
* kthread_create_worker_on_cpu() allows to pass a generic
* namefmt in compare with kthread_create_on_cpu. We need
* to format it here.
*/
vsnprintf(name, sizeof(name), namefmt, args);
task = kthread_create_on_cpu(kthread_worker_fn, worker,
cpu, name);
} else {
task = __kthread_create_on_node(kthread_worker_fn, worker,
-1, namefmt, args);
}
if (cpu >= 0)
node = cpu_to_node(cpu);
task = __kthread_create_on_node(kthread_worker_fn, worker,
node, namefmt, args);
if (IS_ERR(task))
goto fail_task;
if (cpu >= 0)
kthread_bind(task, cpu);
worker->flags = flags;
worker->task = task;
wake_up_process(task);
......
......@@ -1995,14 +1995,15 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
* @state: the mask of task states that can be woken
* @wake_flags: wake modifier flags (WF_*)
*
* Put it on the run-queue if it's not already there. The "current"
* thread is always on the run-queue (except when the actual
* re-schedule is in progress), and as such you're allowed to do
* the simpler "current->state = TASK_RUNNING" to mark yourself
* runnable without the overhead of this.
* If (@state & @p->state) @p->state = TASK_RUNNING.
*
* Return: %true if @p was woken up, %false if it was already running.
* or @state didn't match @p's state.
* If the task was not queued/runnable, also place it back on a runqueue.
*
* Atomic against schedule() which would dequeue a task, also see
* set_current_state().
*
* Return: %true if @p->state changes (an actual wakeup was done),
* %false otherwise.
*/
static int
try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
......@@ -5707,7 +5708,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
printk(KERN_CONT " %*pbl",
cpumask_pr_args(sched_group_cpus(group)));
if (group->sgc->capacity != SCHED_CAPACITY_SCALE) {
printk(KERN_CONT " (cpu_capacity = %d)",
printk(KERN_CONT " (cpu_capacity = %lu)",
group->sgc->capacity);
}
......@@ -6184,6 +6185,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
* die on a /0 trap.
*/
sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
/*
* Make sure the first group of this domain contains the
......@@ -6301,7 +6303,22 @@ static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
WARN_ON(!sg);
do {
int cpu, max_cpu = -1;
sg->group_weight = cpumask_weight(sched_group_cpus(sg));
if (!(sd->flags & SD_ASYM_PACKING))
goto next;
for_each_cpu(cpu, sched_group_cpus(sg)) {
if (max_cpu < 0)
max_cpu = cpu;
else if (sched_asym_prefer(cpu, max_cpu))
max_cpu = cpu;
}
sg->asym_prefer_cpu = max_cpu;
next:
sg = sg->next;
} while (sg != sd->groups);
......@@ -7602,6 +7619,7 @@ void __init sched_init(void)
#ifdef CONFIG_FAIR_GROUP_SCHED
root_task_group.shares = ROOT_TASK_GROUP_LOAD;
INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
/*
* How much cpu bandwidth does root_task_group get?
*
......
......@@ -297,7 +297,7 @@ static int cpuacct_stats_show(struct seq_file *sf, void *v)
for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) {
seq_printf(sf, "%s %lld\n",
cpuacct_stat_desc[stat],
cputime64_to_clock_t(val[stat]));
(long long)cputime64_to_clock_t(val[stat]));
}
return 0;
......
......@@ -128,16 +128,13 @@ static inline void task_group_account_field(struct task_struct *p, int index,
* Account user cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in user space since the last update
* @cputime_scaled: cputime scaled by cpu frequency
*/
void account_user_time(struct task_struct *p, cputime_t cputime,
cputime_t cputime_scaled)
void account_user_time(struct task_struct *p, cputime_t cputime)
{
int index;
/* Add user time to process. */
p->utime += cputime;
p->utimescaled += cputime_scaled;
account_group_user_time(p, cputime);
index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
......@@ -153,16 +150,13 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
* Account guest cpu time to a process.
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in virtual machine since the last update
* @cputime_scaled: cputime scaled by cpu frequency
*/
static void account_guest_time(struct task_struct *p, cputime_t cputime,
cputime_t cputime_scaled)
static void account_guest_time(struct task_struct *p, cputime_t cputime)
{
u64 *cpustat = kcpustat_this_cpu->cpustat;
/* Add guest time to process. */
p->utime += cputime;
p->utimescaled += cputime_scaled;
account_group_user_time(p, cputime);
p->gtime += cputime;
......@@ -180,16 +174,13 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime,
* Account system cpu time to a process and desired cpustat field
* @p: the process that the cpu time gets accounted to
* @cputime: the cpu time spent in kernel space since the last update
* @cputime_scaled: cputime scaled by cpu frequency
* @target_cputime64: pointer to cpustat field that has to be updated
* @index: pointer to cpustat field that has to be updated
*/
static inline
void __account_system_time(struct task_struct *p, cputime_t cputime,
cputime_t cputime_scaled, int index)
void __account_system_time(struct task_struct *p, cputime_t cputime, int index)
{
/* Add system time to process. */
p->stime += cputime;
p->stimescaled += cputime_scaled;
account_group_system_time(p, cputime);
/* Add system time to cpustat. */
......@@ -204,15 +195,14 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
* @p: the process that the cpu time gets accounted to
* @hardirq_offset: the offset to subtract from hardirq_count()
* @cputime: the cpu time spent in kernel space since the last update
* @cputime_scaled: cputime scaled by cpu frequency
*/
void account_system_time(struct task_struct *p, int hardirq_offset,
cputime_t cputime, cputime_t cputime_scaled)
cputime_t cputime)
{
int index;
if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
account_guest_time(p, cputime, cputime_scaled);
account_guest_time(p, cputime);
return;
}
......@@ -223,7 +213,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
else
index = CPUTIME_SYSTEM;
__account_system_time(p, cputime, cputime_scaled, index);
__account_system_time(p, cputime, index);
}
/*
......@@ -390,7 +380,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
struct rq *rq, int ticks)
{
u64 cputime = (__force u64) cputime_one_jiffy * ticks;
cputime_t scaled, other;
cputime_t other;
/*
* When returning from idle, many ticks can get accounted at
......@@ -403,7 +393,6 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
if (other >= cputime)
return;
cputime -= other;
scaled = cputime_to_scaled(cputime);
if (this_cpu_ksoftirqd() == p) {
/*
......@@ -411,15 +400,15 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
* So, we have to handle it separately here.
* Also, p->stime needs to be updated for ksoftirqd.
*/
__account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ);
__account_system_time(p, cputime, CPUTIME_SOFTIRQ);
} else if (user_tick) {
account_user_time(p, cputime, scaled);
account_user_time(p, cputime);
} else if (p == rq->idle) {
account_idle_time(cputime);
} else if (p->flags & PF_VCPU) { /* System time or guest time */
account_guest_time(p, cputime, scaled);
account_guest_time(p, cputime);
} else {
__account_system_time(p, cputime, scaled, CPUTIME_SYSTEM);
__account_system_time(p, cputime, CPUTIME_SYSTEM);
}
}
......@@ -502,7 +491,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
*/
void account_process_tick(struct task_struct *p, int user_tick)
{
cputime_t cputime, scaled, steal;
cputime_t cputime, steal;
struct rq *rq = this_rq();
if (vtime_accounting_cpu_enabled())
......@@ -520,12 +509,11 @@ void account_process_tick(struct task_struct *p, int user_tick)
return;
cputime -= steal;
scaled = cputime_to_scaled(cputime);
if (user_tick)
account_user_time(p, cputime, scaled);
account_user_time(p, cputime);
else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
account_system_time(p, HARDIRQ_OFFSET, cputime, scaled);
account_system_time(p, HARDIRQ_OFFSET, cputime);
else
account_idle_time(cputime);
}
......@@ -746,7 +734,7 @@ static void __vtime_account_system(struct task_struct *tsk)
{
cputime_t delta_cpu = get_vtime_delta(tsk);
account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
account_system_time(tsk, irq_count(), delta_cpu);
}
void vtime_account_system(struct task_struct *tsk)
......@@ -767,7 +755,7 @@ void vtime_account_user(struct task_struct *tsk)
tsk->vtime_snap_whence = VTIME_SYS;
if (vtime_delta(tsk)) {
delta_cpu = get_vtime_delta(tsk);
account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
account_user_time(tsk, delta_cpu);
}
write_seqcount_end(&tsk->vtime_seqcount);
}
......@@ -863,29 +851,25 @@ cputime_t task_gtime(struct task_struct *t)
* add up the pending nohz execution time since the last
* cputime snapshot.
*/
static void
fetch_task_cputime(struct task_struct *t,
cputime_t *u_dst, cputime_t *s_dst,
cputime_t *u_src, cputime_t *s_src,
cputime_t *udelta, cputime_t *sdelta)
void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
{
cputime_t delta;
unsigned int seq;
unsigned long long delta;
do {
*udelta = 0;
*sdelta = 0;
if (!vtime_accounting_enabled()) {
*utime = t->utime;
*stime = t->stime;
return;
}
do {
seq = read_seqcount_begin(&t->vtime_seqcount);
if (u_dst)
*u_dst = *u_src;
if (s_dst)
*s_dst = *s_src;
*utime = t->utime;
*stime = t->stime;
/* Task is sleeping, nothing to add */
if (t->vtime_snap_whence == VTIME_INACTIVE ||
is_idle_task(t))
if (t->vtime_snap_whence == VTIME_INACTIVE || is_idle_task(t))
continue;
delta = vtime_delta(t);
......@@ -894,54 +878,10 @@ fetch_task_cputime(struct task_struct *t,
* Task runs either in user or kernel space, add pending nohz time to
* the right place.
*/
if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) {
*udelta = delta;
} else {
if (t->vtime_snap_whence == VTIME_SYS)
*sdelta = delta;
}
if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU)
*utime += delta;
else if (t->vtime_snap_whence == VTIME_SYS)
*stime += delta;
} while (read_seqcount_retry(&t->vtime_seqcount, seq));
}
void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
{
cputime_t udelta, sdelta;
if (!vtime_accounting_enabled()) {
if (utime)
*utime = t->utime;
if (stime)
*stime = t->stime;
return;
}
fetch_task_cputime(t, utime, stime, &t->utime,
&t->stime, &udelta, &sdelta);
if (utime)
*utime += udelta;
if (stime)
*stime += sdelta;
}
void task_cputime_scaled(struct task_struct *t,
cputime_t *utimescaled, cputime_t *stimescaled)
{
cputime_t udelta, sdelta;
if (!vtime_accounting_enabled()) {
if (utimescaled)
*utimescaled = t->utimescaled;
if (stimescaled)
*stimescaled = t->stimescaled;
return;
}
fetch_task_cputime(t, utimescaled, stimescaled,
&t->utimescaled, &t->stimescaled, &udelta, &sdelta);
if (utimescaled)
*utimescaled += cputime_to_scaled(udelta);
if (stimescaled)
*stimescaled += cputime_to_scaled(sdelta);
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
......@@ -586,7 +586,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
/*
* The task might have changed its scheduling policy to something
* different than SCHED_DEADLINE (through switched_fromd_dl()).
* different than SCHED_DEADLINE (through switched_from_dl()).
*/
if (!dl_task(p)) {
__dl_clear_params(p);
......@@ -1137,7 +1137,7 @@ pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct pin_cookie coo
pull_dl_task(rq);
lockdep_repin_lock(&rq->lock, cookie);
/*
* pull_rt_task() can drop (and re-acquire) rq->lock; this
* pull_dl_task() can drop (and re-acquire) rq->lock; this
* means a stop task can slip in, in which case we need to
* re-start task selection.
*/
......
This diff is collapsed.
......@@ -404,6 +404,7 @@ struct cfs_rq {
unsigned long runnable_load_avg;
#ifdef CONFIG_FAIR_GROUP_SCHED
unsigned long tg_load_avg_contrib;
unsigned long propagate_avg;
#endif
atomic_long_t removed_load_avg, removed_util_avg;
#ifndef CONFIG_64BIT
......@@ -539,6 +540,11 @@ struct dl_rq {
#ifdef CONFIG_SMP
static inline bool sched_asym_prefer(int a, int b)
{
return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
}
/*
* We add the notion of a root-domain which will be used to define per-domain
* variables. Each exclusive cpuset essentially defines an island domain by
......@@ -623,6 +629,7 @@ struct rq {
#ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this cpu: */
struct list_head leaf_cfs_rq_list;
struct list_head *tmp_alone_branch;
#endif /* CONFIG_FAIR_GROUP_SCHED */
/*
......@@ -892,7 +899,8 @@ struct sched_group_capacity {
* CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
* for a single CPU.
*/
unsigned int capacity;
unsigned long capacity;
unsigned long min_capacity; /* Min per-CPU capacity in group */
unsigned long next_update;
int imbalance; /* XXX unrelated to capacity but shared group state */
......@@ -905,6 +913,7 @@ struct sched_group {
unsigned int group_weight;
struct sched_group_capacity *sgc;
int asym_prefer_cpu; /* cpu of highest priority in group */
/*
* The CPUs this group covers.
......
......@@ -347,13 +347,6 @@ static struct ctl_table kern_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "sched_shares_window_ns",
.data = &sysctl_sched_shares_window,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
#ifdef CONFIG_SCHEDSTATS
{
.procname = "sched_schedstats",
......
......@@ -133,9 +133,9 @@ static inline unsigned long long prof_ticks(struct task_struct *p)
}
static inline unsigned long long virt_ticks(struct task_struct *p)
{
cputime_t utime;
cputime_t utime, stime;
task_cputime(p, &utime, NULL);
task_cputime(p, &utime, &stime);
return cputime_to_expires(utime);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment