Commit c4efd6b5 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'sched-core-for-linus' of...

Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (27 commits)
  sched: Use correct macro to display sched_child_runs_first in /proc/sched_debug
  sched: No need for bootmem special cases
  sched: Revert nohz_ratelimit() for now
  sched: Reduce update_group_power() calls
  sched: Update rq->clock for nohz balanced cpus
  sched: Fix spelling of sibling
  sched, cpuset: Drop __cpuexit from cpu hotplug callbacks
  sched: Fix the racy usage of thread_group_cputimer() in fastpath_timer_check()
  sched: run_posix_cpu_timers: Don't check ->exit_state, use lock_task_sighand()
  sched: thread_group_cputime: Simplify, document the "alive" check
  sched: Remove the obsolete exit_state/signal hacks
  sched: task_tick_rt: Remove the obsolete ->signal != NULL check
  sched: __sched_setscheduler: Read the RLIMIT_RTPRIO value lockless
  sched: Fix comments to make them DocBook happy
  sched: Fix fix_small_capacity
  powerpc: Exclude arch_sd_sibiling_asym_packing() on UP
  powerpc: Enable asymmetric SMT scheduling on POWER7
  sched: Add asymmetric group packing option for sibling domain
  sched: Fix capacity calculations for SMT4
  sched: Change nohz idle load balancing logic to push model
  ...
parents 4aed2fd8 0bcfe758
...@@ -82,7 +82,7 @@ unsigned long ftrace_return_to_handler(unsigned long retval0, ...@@ -82,7 +82,7 @@ unsigned long ftrace_return_to_handler(unsigned long retval0,
unsigned long ret; unsigned long ret;
pop_return_trace(&trace, &ret); pop_return_trace(&trace, &ret);
trace.rettime = cpu_clock(raw_smp_processor_id()); trace.rettime = local_clock();
ftrace_graph_return(&trace); ftrace_graph_return(&trace);
if (unlikely(!ret)) { if (unlikely(!ret)) {
...@@ -126,7 +126,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) ...@@ -126,7 +126,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
return; return;
} }
calltime = cpu_clock(raw_smp_processor_id()); calltime = local_clock();
if (push_return_trace(old, calltime, if (push_return_trace(old, calltime,
self_addr, &trace.depth) == -EBUSY) { self_addr, &trace.depth) == -EBUSY) {
......
...@@ -197,6 +197,7 @@ extern const char *powerpc_base_platform; ...@@ -197,6 +197,7 @@ extern const char *powerpc_base_platform;
#define CPU_FTR_SAO LONG_ASM_CONST(0x0020000000000000) #define CPU_FTR_SAO LONG_ASM_CONST(0x0020000000000000)
#define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0040000000000000) #define CPU_FTR_CP_USE_DCBTZ LONG_ASM_CONST(0x0040000000000000)
#define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0080000000000000) #define CPU_FTR_UNALIGNED_LD_STD LONG_ASM_CONST(0x0080000000000000)
#define CPU_FTR_ASYM_SMT LONG_ASM_CONST(0x0100000000000000)
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
...@@ -412,7 +413,7 @@ extern const char *powerpc_base_platform; ...@@ -412,7 +413,7 @@ extern const char *powerpc_base_platform;
CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_MMCRA | CPU_FTR_SMT | \
CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \ CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \ CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
CPU_FTR_DSCR | CPU_FTR_SAO) CPU_FTR_DSCR | CPU_FTR_SAO | CPU_FTR_ASYM_SMT)
#define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
......
...@@ -1299,3 +1299,14 @@ unsigned long randomize_et_dyn(unsigned long base) ...@@ -1299,3 +1299,14 @@ unsigned long randomize_et_dyn(unsigned long base)
return ret; return ret;
} }
#ifdef CONFIG_SMP
int arch_sd_sibling_asym_packing(void)
{
if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
return SD_ASYM_PACKING;
}
return 0;
}
#endif
...@@ -48,6 +48,31 @@ extern ssize_t arch_cpu_release(const char *, size_t); ...@@ -48,6 +48,31 @@ extern ssize_t arch_cpu_release(const char *, size_t);
#endif #endif
struct notifier_block; struct notifier_block;
/*
* CPU notifier priorities.
*/
enum {
/*
* SCHED_ACTIVE marks a cpu which is coming up active during
* CPU_ONLINE and CPU_DOWN_FAILED and must be the first
* notifier. CPUSET_ACTIVE adjusts cpuset according to
* cpu_active mask right after SCHED_ACTIVE. During
* CPU_DOWN_PREPARE, SCHED_INACTIVE and CPUSET_INACTIVE are
* ordered in the similar way.
*
* This ordering guarantees consistent cpu_active mask and
* migration behavior to all cpu notifiers.
*/
CPU_PRI_SCHED_ACTIVE = INT_MAX,
CPU_PRI_CPUSET_ACTIVE = INT_MAX - 1,
CPU_PRI_SCHED_INACTIVE = INT_MIN + 1,
CPU_PRI_CPUSET_INACTIVE = INT_MIN,
/* migration should happen before other stuff but after perf */
CPU_PRI_PERF = 20,
CPU_PRI_MIGRATION = 10,
};
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
/* Need to know about CPUs going up/down? */ /* Need to know about CPUs going up/down? */
#if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE)
......
...@@ -20,6 +20,7 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */ ...@@ -20,6 +20,7 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */
extern int cpuset_init(void); extern int cpuset_init(void);
extern void cpuset_init_smp(void); extern void cpuset_init_smp(void);
extern void cpuset_update_active_cpus(void);
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
extern int cpuset_cpus_allowed_fallback(struct task_struct *p); extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
...@@ -132,6 +133,11 @@ static inline void set_mems_allowed(nodemask_t nodemask) ...@@ -132,6 +133,11 @@ static inline void set_mems_allowed(nodemask_t nodemask)
static inline int cpuset_init(void) { return 0; } static inline int cpuset_init(void) { return 0; }
static inline void cpuset_init_smp(void) {} static inline void cpuset_init_smp(void) {}
static inline void cpuset_update_active_cpus(void)
{
partition_sched_domains(1, NULL, NULL);
}
static inline void cpuset_cpus_allowed(struct task_struct *p, static inline void cpuset_cpus_allowed(struct task_struct *p,
struct cpumask *mask) struct cpumask *mask)
{ {
......
...@@ -1067,7 +1067,7 @@ static inline void perf_event_disable(struct perf_event *event) { } ...@@ -1067,7 +1067,7 @@ static inline void perf_event_disable(struct perf_event *event) { }
#define perf_cpu_notifier(fn) \ #define perf_cpu_notifier(fn) \
do { \ do { \
static struct notifier_block fn##_nb __cpuinitdata = \ static struct notifier_block fn##_nb __cpuinitdata = \
{ .notifier_call = fn, .priority = 20 }; \ { .notifier_call = fn, .priority = CPU_PRI_PERF }; \
fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \ fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \
(void *)(unsigned long)smp_processor_id()); \ (void *)(unsigned long)smp_processor_id()); \
fn(&fn##_nb, (unsigned long)CPU_STARTING, \ fn(&fn##_nb, (unsigned long)CPU_STARTING, \
......
...@@ -272,19 +272,10 @@ extern int runqueue_is_locked(int cpu); ...@@ -272,19 +272,10 @@ extern int runqueue_is_locked(int cpu);
extern cpumask_var_t nohz_cpu_mask; extern cpumask_var_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
extern int select_nohz_load_balancer(int cpu); extern void select_nohz_load_balancer(int stop_tick);
extern int get_nohz_load_balancer(void); extern int get_nohz_timer_target(void);
extern int nohz_ratelimit(int cpu);
#else #else
static inline int select_nohz_load_balancer(int cpu) static inline void select_nohz_load_balancer(int stop_tick) { }
{
return 0;
}
static inline int nohz_ratelimit(int cpu)
{
return 0;
}
#endif #endif
/* /*
...@@ -801,7 +792,7 @@ enum cpu_idle_type { ...@@ -801,7 +792,7 @@ enum cpu_idle_type {
#define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */ #define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
enum powersavings_balance_level { enum powersavings_balance_level {
...@@ -836,6 +827,8 @@ static inline int sd_balance_for_package_power(void) ...@@ -836,6 +827,8 @@ static inline int sd_balance_for_package_power(void)
return SD_PREFER_SIBLING; return SD_PREFER_SIBLING;
} }
extern int __weak arch_sd_sibiling_asym_packing(void);
/* /*
* Optimise SD flags for power savings: * Optimise SD flags for power savings:
* SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings. * SD_BALANCE_NEWIDLE helps agressive task consolidation and power savings.
...@@ -857,7 +850,7 @@ struct sched_group { ...@@ -857,7 +850,7 @@ struct sched_group {
* CPU power of this group, SCHED_LOAD_SCALE being max power for a * CPU power of this group, SCHED_LOAD_SCALE being max power for a
* single CPU. * single CPU.
*/ */
unsigned int cpu_power; unsigned int cpu_power, cpu_power_orig;
/* /*
* The CPUs this group covers. * The CPUs this group covers.
...@@ -1693,6 +1686,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * ...@@ -1693,6 +1686,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
#define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITING 0x00000004 /* getting shut down */
#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */
#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ #define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ #define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */
#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ #define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
...@@ -1787,20 +1781,23 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) ...@@ -1787,20 +1781,23 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
#endif #endif
/* /*
* Architectures can set this to 1 if they have specified * Do not use outside of architecture code which knows its limitations.
* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, *
* but then during bootup it turns out that sched_clock() * sched_clock() has no promise of monotonicity or bounded drift between
* is reliable after all: * CPUs, use (which you should not) requires disabling IRQs.
*
* Please use one of the three interfaces below.
*/ */
#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
extern int sched_clock_stable;
#endif
/* ftrace calls sched_clock() directly */
extern unsigned long long notrace sched_clock(void); extern unsigned long long notrace sched_clock(void);
/*
* See the comment in kernel/sched_clock.c
*/
extern u64 cpu_clock(int cpu);
extern u64 local_clock(void);
extern u64 sched_clock_cpu(int cpu);
extern void sched_clock_init(void); extern void sched_clock_init(void);
extern u64 sched_clock_cpu(int cpu);
#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
static inline void sched_clock_tick(void) static inline void sched_clock_tick(void)
...@@ -1815,17 +1812,19 @@ static inline void sched_clock_idle_wakeup_event(u64 delta_ns) ...@@ -1815,17 +1812,19 @@ static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
{ {
} }
#else #else
/*
* Architectures can set this to 1 if they have specified
* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
* but then during bootup it turns out that sched_clock()
* is reliable after all:
*/
extern int sched_clock_stable;
extern void sched_clock_tick(void); extern void sched_clock_tick(void);
extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_sleep_event(void);
extern void sched_clock_idle_wakeup_event(u64 delta_ns); extern void sched_clock_idle_wakeup_event(u64 delta_ns);
#endif #endif
/*
* For kernel-internal use: high-speed (but slightly incorrect) per-cpu
* clock constructed from sched_clock():
*/
extern unsigned long long cpu_clock(int cpu);
extern unsigned long long extern unsigned long long
task_sched_runtime(struct task_struct *task); task_sched_runtime(struct task_struct *task);
extern unsigned long long thread_group_sched_runtime(struct task_struct *task); extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
......
...@@ -103,6 +103,7 @@ int arch_update_cpu_topology(void); ...@@ -103,6 +103,7 @@ int arch_update_cpu_topology(void);
| 1*SD_SHARE_PKG_RESOURCES \ | 1*SD_SHARE_PKG_RESOURCES \
| 0*SD_SERIALIZE \ | 0*SD_SERIALIZE \
| 0*SD_PREFER_SIBLING \ | 0*SD_PREFER_SIBLING \
| arch_sd_sibling_asym_packing() \
, \ , \
.last_balance = jiffies, \ .last_balance = jiffies, \
.balance_interval = 1, \ .balance_interval = 1, \
......
...@@ -235,11 +235,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) ...@@ -235,11 +235,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
return -EINVAL; return -EINVAL;
cpu_hotplug_begin(); cpu_hotplug_begin();
set_cpu_active(cpu, false);
err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls);
if (err) { if (err) {
set_cpu_active(cpu, true);
nr_calls--; nr_calls--;
__cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL); __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL);
printk("%s: attempt to take down CPU %u failed\n", printk("%s: attempt to take down CPU %u failed\n",
...@@ -249,7 +246,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) ...@@ -249,7 +246,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
if (err) { if (err) {
set_cpu_active(cpu, true);
/* CPU didn't die: tell everyone. Can't complain. */ /* CPU didn't die: tell everyone. Can't complain. */
cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu);
...@@ -321,8 +317,6 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) ...@@ -321,8 +317,6 @@ static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
goto out_notify; goto out_notify;
BUG_ON(!cpu_online(cpu)); BUG_ON(!cpu_online(cpu));
set_cpu_active(cpu, true);
/* Now call notifier in preparation. */ /* Now call notifier in preparation. */
cpu_notify(CPU_ONLINE | mod, hcpu); cpu_notify(CPU_ONLINE | mod, hcpu);
......
...@@ -2113,31 +2113,17 @@ static void scan_for_empty_cpusets(struct cpuset *root) ...@@ -2113,31 +2113,17 @@ static void scan_for_empty_cpusets(struct cpuset *root)
* but making no active use of cpusets. * but making no active use of cpusets.
* *
* This routine ensures that top_cpuset.cpus_allowed tracks * This routine ensures that top_cpuset.cpus_allowed tracks
* cpu_online_map on each CPU hotplug (cpuhp) event. * cpu_active_mask on each CPU hotplug (cpuhp) event.
* *
* Called within get_online_cpus(). Needs to call cgroup_lock() * Called within get_online_cpus(). Needs to call cgroup_lock()
* before calling generate_sched_domains(). * before calling generate_sched_domains().
*/ */
static int cpuset_track_online_cpus(struct notifier_block *unused_nb, void cpuset_update_active_cpus(void)
unsigned long phase, void *unused_cpu)
{ {
struct sched_domain_attr *attr; struct sched_domain_attr *attr;
cpumask_var_t *doms; cpumask_var_t *doms;
int ndoms; int ndoms;
switch (phase) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
break;
default:
return NOTIFY_DONE;
}
cgroup_lock(); cgroup_lock();
mutex_lock(&callback_mutex); mutex_lock(&callback_mutex);
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
...@@ -2148,8 +2134,6 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb, ...@@ -2148,8 +2134,6 @@ static int cpuset_track_online_cpus(struct notifier_block *unused_nb,
/* Have scheduler rebuild the domains */ /* Have scheduler rebuild the domains */
partition_sched_domains(ndoms, doms, attr); partition_sched_domains(ndoms, doms, attr);
return NOTIFY_OK;
} }
#ifdef CONFIG_MEMORY_HOTPLUG #ifdef CONFIG_MEMORY_HOTPLUG
...@@ -2203,7 +2187,6 @@ void __init cpuset_init_smp(void) ...@@ -2203,7 +2187,6 @@ void __init cpuset_init_smp(void)
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY]; top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
hotcpu_notifier(cpuset_track_online_cpus, 0);
hotplug_memory_notifier(cpuset_track_online_nodes, 10); hotplug_memory_notifier(cpuset_track_online_nodes, 10);
cpuset_wq = create_singlethread_workqueue("cpuset"); cpuset_wq = create_singlethread_workqueue("cpuset");
......
...@@ -907,7 +907,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) ...@@ -907,7 +907,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
{ {
unsigned long new_flags = p->flags; unsigned long new_flags = p->flags;
new_flags &= ~PF_SUPERPRIV; new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
new_flags |= PF_FORKNOEXEC; new_flags |= PF_FORKNOEXEC;
new_flags |= PF_STARTING; new_flags |= PF_STARTING;
p->flags = new_flags; p->flags = new_flags;
......
...@@ -144,12 +144,8 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, ...@@ -144,12 +144,8 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
static int hrtimer_get_target(int this_cpu, int pinned) static int hrtimer_get_target(int this_cpu, int pinned)
{ {
#ifdef CONFIG_NO_HZ #ifdef CONFIG_NO_HZ
if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) { if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu))
int preferred_cpu = get_nohz_load_balancer(); return get_nohz_timer_target();
if (preferred_cpu >= 0)
return preferred_cpu;
}
#endif #endif
return this_cpu; return this_cpu;
} }
......
...@@ -146,7 +146,7 @@ static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], ...@@ -146,7 +146,7 @@ static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS],
static inline u64 lockstat_clock(void) static inline u64 lockstat_clock(void)
{ {
return cpu_clock(smp_processor_id()); return local_clock();
} }
static int lock_point(unsigned long points[], unsigned long ip) static int lock_point(unsigned long points[], unsigned long ip)
......
...@@ -214,7 +214,7 @@ static void perf_unpin_context(struct perf_event_context *ctx) ...@@ -214,7 +214,7 @@ static void perf_unpin_context(struct perf_event_context *ctx)
static inline u64 perf_clock(void) static inline u64 perf_clock(void)
{ {
return cpu_clock(raw_smp_processor_id()); return local_clock();
} }
/* /*
......
...@@ -232,31 +232,24 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p, ...@@ -232,31 +232,24 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
{ {
struct sighand_struct *sighand; struct signal_struct *sig = tsk->signal;
struct signal_struct *sig;
struct task_struct *t; struct task_struct *t;
*times = INIT_CPUTIME; times->utime = sig->utime;
times->stime = sig->stime;
times->sum_exec_runtime = sig->sum_sched_runtime;
rcu_read_lock(); rcu_read_lock();
sighand = rcu_dereference(tsk->sighand); /* make sure we can trust tsk->thread_group list */
if (!sighand) if (!likely(pid_alive(tsk)))
goto out; goto out;
sig = tsk->signal;
t = tsk; t = tsk;
do { do {
times->utime = cputime_add(times->utime, t->utime); times->utime = cputime_add(times->utime, t->utime);
times->stime = cputime_add(times->stime, t->stime); times->stime = cputime_add(times->stime, t->stime);
times->sum_exec_runtime += t->se.sum_exec_runtime; times->sum_exec_runtime += t->se.sum_exec_runtime;
} while_each_thread(tsk, t);
t = next_thread(t);
} while (t != tsk);
times->utime = cputime_add(times->utime, sig->utime);
times->stime = cputime_add(times->stime, sig->stime);
times->sum_exec_runtime += sig->sum_sched_runtime;
out: out:
rcu_read_unlock(); rcu_read_unlock();
} }
...@@ -1279,10 +1272,6 @@ static inline int fastpath_timer_check(struct task_struct *tsk) ...@@ -1279,10 +1272,6 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
{ {
struct signal_struct *sig; struct signal_struct *sig;
/* tsk == current, ensure it is safe to use ->signal/sighand */
if (unlikely(tsk->exit_state))
return 0;
if (!task_cputime_zero(&tsk->cputime_expires)) { if (!task_cputime_zero(&tsk->cputime_expires)) {
struct task_cputime task_sample = { struct task_cputime task_sample = {
.utime = tsk->utime, .utime = tsk->utime,
...@@ -1298,7 +1287,10 @@ static inline int fastpath_timer_check(struct task_struct *tsk) ...@@ -1298,7 +1287,10 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
if (sig->cputimer.running) { if (sig->cputimer.running) {
struct task_cputime group_sample; struct task_cputime group_sample;
thread_group_cputimer(tsk, &group_sample); spin_lock(&sig->cputimer.lock);
group_sample = sig->cputimer.cputime;
spin_unlock(&sig->cputimer.lock);
if (task_cputime_expired(&group_sample, &sig->cputime_expires)) if (task_cputime_expired(&group_sample, &sig->cputime_expires))
return 1; return 1;
} }
...@@ -1315,6 +1307,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) ...@@ -1315,6 +1307,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
{ {
LIST_HEAD(firing); LIST_HEAD(firing);
struct k_itimer *timer, *next; struct k_itimer *timer, *next;
unsigned long flags;
BUG_ON(!irqs_disabled()); BUG_ON(!irqs_disabled());
...@@ -1325,7 +1318,8 @@ void run_posix_cpu_timers(struct task_struct *tsk) ...@@ -1325,7 +1318,8 @@ void run_posix_cpu_timers(struct task_struct *tsk)
if (!fastpath_timer_check(tsk)) if (!fastpath_timer_check(tsk))
return; return;
spin_lock(&tsk->sighand->siglock); if (!lock_task_sighand(tsk, &flags))
return;
/* /*
* Here we take off tsk->signal->cpu_timers[N] and * Here we take off tsk->signal->cpu_timers[N] and
* tsk->cpu_timers[N] all the timers that are firing, and * tsk->cpu_timers[N] all the timers that are firing, and
...@@ -1347,7 +1341,7 @@ void run_posix_cpu_timers(struct task_struct *tsk) ...@@ -1347,7 +1341,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
* that gets the timer lock before we do will give it up and * that gets the timer lock before we do will give it up and
* spin until we've taken care of that timer below. * spin until we've taken care of that timer below.
*/ */
spin_unlock(&tsk->sighand->siglock); unlock_task_sighand(tsk, &flags);
/* /*
* Now that all the timers on our list have the firing flag, * Now that all the timers on our list have the firing flag,
......
...@@ -239,8 +239,7 @@ static unsigned long ...@@ -239,8 +239,7 @@ static unsigned long
rcu_random(struct rcu_random_state *rrsp) rcu_random(struct rcu_random_state *rrsp)
{ {
if (--rrsp->rrs_count < 0) { if (--rrsp->rrs_count < 0) {
rrsp->rrs_state += rrsp->rrs_state += (unsigned long)local_clock();
(unsigned long)cpu_clock(raw_smp_processor_id());
rrsp->rrs_count = RCU_RANDOM_REFRESH; rrsp->rrs_count = RCU_RANDOM_REFRESH;
} }
rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD; rrsp->rrs_state = rrsp->rrs_state * RCU_RANDOM_MULT + RCU_RANDOM_ADD;
......
This diff is collapsed.
...@@ -10,19 +10,55 @@ ...@@ -10,19 +10,55 @@
* Ingo Molnar <mingo@redhat.com> * Ingo Molnar <mingo@redhat.com>
* Guillaume Chazarain <guichaz@gmail.com> * Guillaume Chazarain <guichaz@gmail.com>
* *
* Create a semi stable clock from a mixture of other events, including: *
* - gtod * What:
*
* cpu_clock(i) provides a fast (execution time) high resolution
* clock with bounded drift between CPUs. The value of cpu_clock(i)
* is monotonic for constant i. The timestamp returned is in nanoseconds.
*
* ######################### BIG FAT WARNING ##########################
* # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
* # go backwards !! #
* ####################################################################
*
* There is no strict promise about the base, although it tends to start
* at 0 on boot (but people really shouldn't rely on that).
*
* cpu_clock(i) -- can be used from any context, including NMI.
* sched_clock_cpu(i) -- must be used with local IRQs disabled (implied by NMI)
* local_clock() -- is cpu_clock() on the current cpu.
*
* How:
*
* The implementation either uses sched_clock() when
* !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK, which means in that case the
* sched_clock() is assumed to provide these properties (mostly it means
* the architecture provides a globally synchronized highres time source).
*
* Otherwise it tries to create a semi stable clock from a mixture of other
* clocks, including:
*
* - GTOD (clock monotomic)
* - sched_clock() * - sched_clock()
* - explicit idle events * - explicit idle events
* *
* We use gtod as base and the unstable clock deltas. The deltas are filtered, * We use GTOD as base and use sched_clock() deltas to improve resolution. The
* making it monotonic and keeping it within an expected window. * deltas are filtered to provide monotonicity and keeping it within an
* expected window.
* *
* Furthermore, explicit sleep and wakeup hooks allow us to account for time * Furthermore, explicit sleep and wakeup hooks allow us to account for time
* that is otherwise invisible (TSC gets stopped). * that is otherwise invisible (TSC gets stopped).
* *
* The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat *
* consistent between cpus (never more than 2 jiffies difference). * Notes:
*
* The !IRQ-safetly of sched_clock() and sched_clock_cpu() comes from things
* like cpufreq interrupts that can change the base clock (TSC) multiplier
* and cause funny jumps in time -- although the filtering provided by
* sched_clock_cpu() should mitigate serious artifacts we cannot rely on it
* in general since for !CONFIG_HAVE_UNSTABLE_SCHED_CLOCK we fully rely on
* sched_clock().
*/ */
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/hardirq.h> #include <linux/hardirq.h>
...@@ -170,6 +206,11 @@ static u64 sched_clock_remote(struct sched_clock_data *scd) ...@@ -170,6 +206,11 @@ static u64 sched_clock_remote(struct sched_clock_data *scd)
return val; return val;
} }
/*
* Similar to cpu_clock(), but requires local IRQs to be disabled.
*
* See cpu_clock().
*/
u64 sched_clock_cpu(int cpu) u64 sched_clock_cpu(int cpu)
{ {
struct sched_clock_data *scd; struct sched_clock_data *scd;
...@@ -237,9 +278,19 @@ void sched_clock_idle_wakeup_event(u64 delta_ns) ...@@ -237,9 +278,19 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
} }
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event); EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
unsigned long long cpu_clock(int cpu) /*
* As outlined at the top, provides a fast, high resolution, nanosecond
* time source that is monotonic per cpu argument and has bounded drift
* between cpus.
*
* ######################### BIG FAT WARNING ##########################
* # when comparing cpu_clock(i) to cpu_clock(j) for i != j, time can #
* # go backwards !! #
* ####################################################################
*/
u64 cpu_clock(int cpu)
{ {
unsigned long long clock; u64 clock;
unsigned long flags; unsigned long flags;
local_irq_save(flags); local_irq_save(flags);
...@@ -249,6 +300,25 @@ unsigned long long cpu_clock(int cpu) ...@@ -249,6 +300,25 @@ unsigned long long cpu_clock(int cpu)
return clock; return clock;
} }
/*
* Similar to cpu_clock() for the current cpu. Time will only be observed
* to be monotonic if care is taken to only compare timestampt taken on the
* same CPU.
*
* See cpu_clock().
*/
u64 local_clock(void)
{
u64 clock;
unsigned long flags;
local_irq_save(flags);
clock = sched_clock_cpu(smp_processor_id());
local_irq_restore(flags);
return clock;
}
#else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ #else /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
void sched_clock_init(void) void sched_clock_init(void)
...@@ -264,12 +334,17 @@ u64 sched_clock_cpu(int cpu) ...@@ -264,12 +334,17 @@ u64 sched_clock_cpu(int cpu)
return sched_clock(); return sched_clock();
} }
u64 cpu_clock(int cpu)
unsigned long long cpu_clock(int cpu)
{ {
return sched_clock_cpu(cpu); return sched_clock_cpu(cpu);
} }
u64 local_clock(void)
{
return sched_clock_cpu(0);
}
#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */ #endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
EXPORT_SYMBOL_GPL(cpu_clock); EXPORT_SYMBOL_GPL(cpu_clock);
EXPORT_SYMBOL_GPL(local_clock);
...@@ -166,14 +166,10 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) ...@@ -166,14 +166,10 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
* *
* Returns: -ENOMEM if memory fails. * Returns: -ENOMEM if memory fails.
*/ */
int cpupri_init(struct cpupri *cp, bool bootmem) int cpupri_init(struct cpupri *cp)
{ {
gfp_t gfp = GFP_KERNEL;
int i; int i;
if (bootmem)
gfp = GFP_NOWAIT;
memset(cp, 0, sizeof(*cp)); memset(cp, 0, sizeof(*cp));
for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
...@@ -181,7 +177,7 @@ int cpupri_init(struct cpupri *cp, bool bootmem) ...@@ -181,7 +177,7 @@ int cpupri_init(struct cpupri *cp, bool bootmem)
raw_spin_lock_init(&vec->lock); raw_spin_lock_init(&vec->lock);
vec->count = 0; vec->count = 0;
if (!zalloc_cpumask_var(&vec->mask, gfp)) if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
goto cleanup; goto cleanup;
} }
......
...@@ -27,7 +27,7 @@ struct cpupri { ...@@ -27,7 +27,7 @@ struct cpupri {
int cpupri_find(struct cpupri *cp, int cpupri_find(struct cpupri *cp,
struct task_struct *p, struct cpumask *lowest_mask); struct task_struct *p, struct cpumask *lowest_mask);
void cpupri_set(struct cpupri *cp, int cpu, int pri); void cpupri_set(struct cpupri *cp, int cpu, int pri);
int cpupri_init(struct cpupri *cp, bool bootmem); int cpupri_init(struct cpupri *cp);
void cpupri_cleanup(struct cpupri *cp); void cpupri_cleanup(struct cpupri *cp);
#else #else
#define cpupri_set(cp, cpu, pri) do { } while (0) #define cpupri_set(cp, cpu, pri) do { } while (0)
......
...@@ -332,7 +332,7 @@ static int sched_debug_show(struct seq_file *m, void *v) ...@@ -332,7 +332,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
PN(sysctl_sched_latency); PN(sysctl_sched_latency);
PN(sysctl_sched_min_granularity); PN(sysctl_sched_min_granularity);
PN(sysctl_sched_wakeup_granularity); PN(sysctl_sched_wakeup_granularity);
PN(sysctl_sched_child_runs_first); P(sysctl_sched_child_runs_first);
P(sysctl_sched_features); P(sysctl_sched_features);
#undef PN #undef PN
#undef P #undef P
......
This diff is collapsed.
...@@ -1663,9 +1663,6 @@ static void watchdog(struct rq *rq, struct task_struct *p) ...@@ -1663,9 +1663,6 @@ static void watchdog(struct rq *rq, struct task_struct *p)
{ {
unsigned long soft, hard; unsigned long soft, hard;
if (!p->signal)
return;
/* max may change after cur was read, this will be fixed next tick */ /* max may change after cur was read, this will be fixed next tick */
soft = task_rlimit(p, RLIMIT_RTTIME); soft = task_rlimit(p, RLIMIT_RTTIME);
hard = task_rlimit_max(p, RLIMIT_RTTIME); hard = task_rlimit_max(p, RLIMIT_RTTIME);
......
...@@ -295,13 +295,7 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next) ...@@ -295,13 +295,7 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)
static inline void account_group_user_time(struct task_struct *tsk, static inline void account_group_user_time(struct task_struct *tsk,
cputime_t cputime) cputime_t cputime)
{ {
struct thread_group_cputimer *cputimer; struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
/* tsk == current, ensure it is safe to use ->signal */
if (unlikely(tsk->exit_state))
return;
cputimer = &tsk->signal->cputimer;
if (!cputimer->running) if (!cputimer->running)
return; return;
...@@ -325,13 +319,7 @@ static inline void account_group_user_time(struct task_struct *tsk, ...@@ -325,13 +319,7 @@ static inline void account_group_user_time(struct task_struct *tsk,
static inline void account_group_system_time(struct task_struct *tsk, static inline void account_group_system_time(struct task_struct *tsk,
cputime_t cputime) cputime_t cputime)
{ {
struct thread_group_cputimer *cputimer; struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
/* tsk == current, ensure it is safe to use ->signal */
if (unlikely(tsk->exit_state))
return;
cputimer = &tsk->signal->cputimer;
if (!cputimer->running) if (!cputimer->running)
return; return;
...@@ -355,16 +343,7 @@ static inline void account_group_system_time(struct task_struct *tsk, ...@@ -355,16 +343,7 @@ static inline void account_group_system_time(struct task_struct *tsk,
static inline void account_group_exec_runtime(struct task_struct *tsk, static inline void account_group_exec_runtime(struct task_struct *tsk,
unsigned long long ns) unsigned long long ns)
{ {
struct thread_group_cputimer *cputimer; struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
struct signal_struct *sig;
sig = tsk->signal;
/* see __exit_signal()->task_rq_unlock_wait() */
barrier();
if (unlikely(!sig))
return;
cputimer = &sig->cputimer;
if (!cputimer->running) if (!cputimer->running)
return; return;
......
...@@ -325,7 +325,7 @@ void tick_nohz_stop_sched_tick(int inidle) ...@@ -325,7 +325,7 @@ void tick_nohz_stop_sched_tick(int inidle)
} while (read_seqretry(&xtime_lock, seq)); } while (read_seqretry(&xtime_lock, seq));
if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
arch_needs_cpu(cpu) || nohz_ratelimit(cpu)) { arch_needs_cpu(cpu)) {
next_jiffies = last_jiffies + 1; next_jiffies = last_jiffies + 1;
delta_jiffies = 1; delta_jiffies = 1;
} else { } else {
...@@ -405,13 +405,7 @@ void tick_nohz_stop_sched_tick(int inidle) ...@@ -405,13 +405,7 @@ void tick_nohz_stop_sched_tick(int inidle)
* the scheduler tick in nohz_restart_sched_tick. * the scheduler tick in nohz_restart_sched_tick.
*/ */
if (!ts->tick_stopped) { if (!ts->tick_stopped) {
if (select_nohz_load_balancer(1)) { select_nohz_load_balancer(1);
/*
* sched tick not stopped!
*/
cpumask_clear_cpu(cpu, nohz_cpu_mask);
goto out;
}
ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); ts->idle_tick = hrtimer_get_expires(&ts->sched_timer);
ts->tick_stopped = 1; ts->tick_stopped = 1;
......
...@@ -692,12 +692,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires, ...@@ -692,12 +692,8 @@ __mod_timer(struct timer_list *timer, unsigned long expires,
cpu = smp_processor_id(); cpu = smp_processor_id();
#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu))
int preferred_cpu = get_nohz_load_balancer(); cpu = get_nohz_timer_target();
if (preferred_cpu >= 0)
cpu = preferred_cpu;
}
#endif #endif
new_base = per_cpu(tvec_bases, cpu); new_base = per_cpu(tvec_bases, cpu);
......
...@@ -55,7 +55,7 @@ u64 notrace trace_clock_local(void) ...@@ -55,7 +55,7 @@ u64 notrace trace_clock_local(void)
*/ */
u64 notrace trace_clock(void) u64 notrace trace_clock(void)
{ {
return cpu_clock(raw_smp_processor_id()); return local_clock();
} }
......
/*
* kernel/workqueue_sched.h
*
* Scheduler hooks for concurrency managed workqueue. Only to be
* included from sched.c and workqueue.c.
*/
static inline void wq_worker_waking_up(struct task_struct *task,
unsigned int cpu)
{
}
static inline struct task_struct *wq_worker_sleeping(struct task_struct *task,
unsigned int cpu)
{
return NULL;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment