Commit 16b3d0cf authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'sched-core-2021-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull scheduler updates from Ingo Molnar:

 - Clean up SCHED_DEBUG: move the decades old mess of sysctl, procfs and
   debugfs interfaces to a unified debugfs interface.

 - Signals: Allow caching one sigqueue object per task, to improve
   performance & latencies.

 - Improve newidle_balance() irq-off latencies on systems with a large
   number of CPU cgroups.

 - Improve energy-aware scheduling

 - Improve the PELT metrics for certain workloads

 - Reintroduce select_idle_smt() to improve load-balancing locality -
   but without the previous regressions

 - Add 'scheduler latency debugging': warn after long periods of pending
   need_resched. This is an opt-in feature that requires the enabling of
   the LATENCY_WARN scheduler feature, or the use of the
   resched_latency_warn_ms=xx boot parameter.

 - CPU hotplug fixes for HP-rollback, and for the 'fail' interface. Fix
   remaining balance_push() vs. hotplug holes/races

 - PSI fixes, plus allow /proc/pressure/ files to be written by
   CAP_SYS_RESOURCE tasks as well

 - Fix/improve various load-balancing corner cases vs. capacity margins

 - Fix sched topology on systems with NUMA diameter of 3 or above

 - Fix PF_KTHREAD vs to_kthread() race

 - Minor rseq optimizations

 - Misc cleanups, optimizations, fixes and smaller updates

* tag 'sched-core-2021-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (61 commits)
  cpumask/hotplug: Fix cpu_dying() state tracking
  kthread: Fix PF_KTHREAD vs to_kthread() race
  sched/debug: Fix cgroup_path[] serialization
  sched,psi: Handle potential task count underflow bugs more gracefully
  sched: Warn on long periods of pending need_resched
  sched/fair: Move update_nohz_stats() to the CONFIG_NO_HZ_COMMON block to simplify the code & fix an unused function warning
  sched/debug: Rename the sched_debug parameter to sched_verbose
  sched,fair: Alternative sched_slice()
  sched: Move /proc/sched_debug to debugfs
  sched,debug: Convert sysctl sched_domains to debugfs
  debugfs: Implement debugfs_create_str()
  sched,preempt: Move preempt_dynamic to debug.c
  sched: Move SCHED_DEBUG sysctl to debugfs
  sched: Don't make LATENCYTOP select SCHED_DEBUG
  sched: Remove sched_schedstats sysctl out from under SCHED_DEBUG
  sched/numa: Allow runtime enabling/disabling of NUMA balance without SCHED_DEBUG
  sched: Use cpu_dying() to fix balance_push vs hotplug-rollback
  cpumask: Introduce DYING mask
  cpumask: Make cpu_{online,possible,present,active}() inline
  rseq: Optimise rseq_get_rseq_cs() and clear_rseq_cs()
  ...
parents 42dec9a9 2ea46c6f
...@@ -4754,7 +4754,7 @@ ...@@ -4754,7 +4754,7 @@
sbni= [NET] Granch SBNI12 leased line adapter sbni= [NET] Granch SBNI12 leased line adapter
sched_debug [KNL] Enables verbose scheduler debug messages. sched_verbose [KNL] Enables verbose scheduler debug messages.
schedstats= [KNL,X86] Enable or disable scheduled statistics. schedstats= [KNL,X86] Enable or disable scheduled statistics.
Allowed values are enable and disable. This feature Allowed values are enable and disable. This feature
......
...@@ -74,8 +74,8 @@ for a given topology level by creating a sched_domain_topology_level array and ...@@ -74,8 +74,8 @@ for a given topology level by creating a sched_domain_topology_level array and
calling set_sched_topology() with this array as the parameter. calling set_sched_topology() with this array as the parameter.
The sched-domains debugging infrastructure can be enabled by enabling The sched-domains debugging infrastructure can be enabled by enabling
CONFIG_SCHED_DEBUG and adding 'sched_debug' to your cmdline. If you forgot to CONFIG_SCHED_DEBUG and adding 'sched_debug_verbose' to your cmdline. If you
tweak your cmdline, you can also flip the /sys/kernel/debug/sched_debug forgot to tweak your cmdline, you can also flip the
knob. This enables an error checking parse of the sched domains which should /sys/kernel/debug/sched/verbose knob. This enables an error checking parse of
catch most possible errors (described above). It also prints out the domain the sched domains which should catch most possible errors (described above). It
structure in a visual format. also prints out the domain structure in a visual format.
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/usb.h> #include <linux/usb.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/sched/task.h> #include <linux/sched/task.h>
#include <linux/kcov.h>
#include <uapi/linux/usbip.h> #include <uapi/linux/usbip.h>
#undef pr_fmt #undef pr_fmt
......
...@@ -864,6 +864,97 @@ struct dentry *debugfs_create_bool(const char *name, umode_t mode, ...@@ -864,6 +864,97 @@ struct dentry *debugfs_create_bool(const char *name, umode_t mode,
} }
EXPORT_SYMBOL_GPL(debugfs_create_bool); EXPORT_SYMBOL_GPL(debugfs_create_bool);
ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos)
{
struct dentry *dentry = F_DENTRY(file);
char *str, *copy = NULL;
int copy_len, len;
ssize_t ret;
ret = debugfs_file_get(dentry);
if (unlikely(ret))
return ret;
str = *(char **)file->private_data;
len = strlen(str) + 1;
copy = kmalloc(len, GFP_KERNEL);
if (!copy) {
debugfs_file_put(dentry);
return -ENOMEM;
}
copy_len = strscpy(copy, str, len);
debugfs_file_put(dentry);
if (copy_len < 0) {
kfree(copy);
return copy_len;
}
copy[copy_len] = '\n';
ret = simple_read_from_buffer(user_buf, count, ppos, copy, copy_len);
kfree(copy);
return ret;
}
static ssize_t debugfs_write_file_str(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos)
{
/* This is really only for read-only strings */
return -EINVAL;
}
static const struct file_operations fops_str = {
.read = debugfs_read_file_str,
.write = debugfs_write_file_str,
.open = simple_open,
.llseek = default_llseek,
};
static const struct file_operations fops_str_ro = {
.read = debugfs_read_file_str,
.open = simple_open,
.llseek = default_llseek,
};
static const struct file_operations fops_str_wo = {
.write = debugfs_write_file_str,
.open = simple_open,
.llseek = default_llseek,
};
/**
* debugfs_create_str - create a debugfs file that is used to read and write a string value
* @name: a pointer to a string containing the name of the file to create.
* @mode: the permission that the file should have
* @parent: a pointer to the parent dentry for this file. This should be a
* directory dentry if set. If this parameter is %NULL, then the
* file will be created in the root of the debugfs filesystem.
* @value: a pointer to the variable that the file should read to and write
* from.
*
* This function creates a file in debugfs with the given name that
* contains the value of the variable @value. If the @mode variable is so
* set, it can be read from, and written to.
*
* This function will return a pointer to a dentry if it succeeds. This
* pointer must be passed to the debugfs_remove() function when the file is
* to be removed (no automatic cleanup happens if your module is unloaded,
* you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be
* returned.
*
* If debugfs is not enabled in the kernel, the value ERR_PTR(-ENODEV) will
* be returned.
*/
void debugfs_create_str(const char *name, umode_t mode,
struct dentry *parent, char **value)
{
debugfs_create_mode_unsafe(name, mode, parent, value, &fops_str,
&fops_str_ro, &fops_str_wo);
}
static ssize_t read_file_blob(struct file *file, char __user *user_buf, static ssize_t read_file_blob(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos) size_t count, loff_t *ppos)
{ {
......
...@@ -91,44 +91,15 @@ extern struct cpumask __cpu_possible_mask; ...@@ -91,44 +91,15 @@ extern struct cpumask __cpu_possible_mask;
extern struct cpumask __cpu_online_mask; extern struct cpumask __cpu_online_mask;
extern struct cpumask __cpu_present_mask; extern struct cpumask __cpu_present_mask;
extern struct cpumask __cpu_active_mask; extern struct cpumask __cpu_active_mask;
extern struct cpumask __cpu_dying_mask;
#define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask) #define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask)
#define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask) #define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask)
#define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask) #define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask)
#define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask) #define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask)
#define cpu_dying_mask ((const struct cpumask *)&__cpu_dying_mask)
extern atomic_t __num_online_cpus; extern atomic_t __num_online_cpus;
#if NR_CPUS > 1
/**
* num_online_cpus() - Read the number of online CPUs
*
* Despite the fact that __num_online_cpus is of type atomic_t, this
* interface gives only a momentary snapshot and is not protected against
* concurrent CPU hotplug operations unless invoked from a cpuhp_lock held
* region.
*/
static inline unsigned int num_online_cpus(void)
{
return atomic_read(&__num_online_cpus);
}
#define num_possible_cpus() cpumask_weight(cpu_possible_mask)
#define num_present_cpus() cpumask_weight(cpu_present_mask)
#define num_active_cpus() cpumask_weight(cpu_active_mask)
#define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask)
#define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask)
#define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask)
#define cpu_active(cpu) cpumask_test_cpu((cpu), cpu_active_mask)
#else
#define num_online_cpus() 1U
#define num_possible_cpus() 1U
#define num_present_cpus() 1U
#define num_active_cpus() 1U
#define cpu_online(cpu) ((cpu) == 0)
#define cpu_possible(cpu) ((cpu) == 0)
#define cpu_present(cpu) ((cpu) == 0)
#define cpu_active(cpu) ((cpu) == 0)
#endif
extern cpumask_t cpus_booted_once_mask; extern cpumask_t cpus_booted_once_mask;
static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
...@@ -857,6 +828,14 @@ set_cpu_active(unsigned int cpu, bool active) ...@@ -857,6 +828,14 @@ set_cpu_active(unsigned int cpu, bool active)
cpumask_clear_cpu(cpu, &__cpu_active_mask); cpumask_clear_cpu(cpu, &__cpu_active_mask);
} }
static inline void
set_cpu_dying(unsigned int cpu, bool dying)
{
if (dying)
cpumask_set_cpu(cpu, &__cpu_dying_mask);
else
cpumask_clear_cpu(cpu, &__cpu_dying_mask);
}
/** /**
* to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
...@@ -894,6 +873,82 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) ...@@ -894,6 +873,82 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
return to_cpumask(p); return to_cpumask(p);
} }
#if NR_CPUS > 1
/**
* num_online_cpus() - Read the number of online CPUs
*
* Despite the fact that __num_online_cpus is of type atomic_t, this
* interface gives only a momentary snapshot and is not protected against
* concurrent CPU hotplug operations unless invoked from a cpuhp_lock held
* region.
*/
static inline unsigned int num_online_cpus(void)
{
return atomic_read(&__num_online_cpus);
}
#define num_possible_cpus() cpumask_weight(cpu_possible_mask)
#define num_present_cpus() cpumask_weight(cpu_present_mask)
#define num_active_cpus() cpumask_weight(cpu_active_mask)
static inline bool cpu_online(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_online_mask);
}
static inline bool cpu_possible(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_possible_mask);
}
static inline bool cpu_present(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_present_mask);
}
static inline bool cpu_active(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_active_mask);
}
static inline bool cpu_dying(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_dying_mask);
}
#else
#define num_online_cpus() 1U
#define num_possible_cpus() 1U
#define num_present_cpus() 1U
#define num_active_cpus() 1U
static inline bool cpu_online(unsigned int cpu)
{
return cpu == 0;
}
static inline bool cpu_possible(unsigned int cpu)
{
return cpu == 0;
}
static inline bool cpu_present(unsigned int cpu)
{
return cpu == 0;
}
static inline bool cpu_active(unsigned int cpu)
{
return cpu == 0;
}
static inline bool cpu_dying(unsigned int cpu)
{
return false;
}
#endif /* NR_CPUS > 1 */
#define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu))
#if NR_CPUS <= BITS_PER_LONG #if NR_CPUS <= BITS_PER_LONG
......
...@@ -128,6 +128,8 @@ void debugfs_create_atomic_t(const char *name, umode_t mode, ...@@ -128,6 +128,8 @@ void debugfs_create_atomic_t(const char *name, umode_t mode,
struct dentry *parent, atomic_t *value); struct dentry *parent, atomic_t *value);
struct dentry *debugfs_create_bool(const char *name, umode_t mode, struct dentry *debugfs_create_bool(const char *name, umode_t mode,
struct dentry *parent, bool *value); struct dentry *parent, bool *value);
void debugfs_create_str(const char *name, umode_t mode,
struct dentry *parent, char **value);
struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *debugfs_create_blob(const char *name, umode_t mode,
struct dentry *parent, struct dentry *parent,
...@@ -156,6 +158,9 @@ ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf, ...@@ -156,6 +158,9 @@ ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf,
ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos); size_t count, loff_t *ppos);
ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf,
size_t count, loff_t *ppos);
#else #else
#include <linux/err.h> #include <linux/err.h>
...@@ -297,6 +302,11 @@ static inline struct dentry *debugfs_create_bool(const char *name, umode_t mode, ...@@ -297,6 +302,11 @@ static inline struct dentry *debugfs_create_bool(const char *name, umode_t mode,
return ERR_PTR(-ENODEV); return ERR_PTR(-ENODEV);
} }
static inline void debugfs_create_str(const char *name, umode_t mode,
struct dentry *parent,
char **value)
{ }
static inline struct dentry *debugfs_create_blob(const char *name, umode_t mode, static inline struct dentry *debugfs_create_blob(const char *name, umode_t mode,
struct dentry *parent, struct dentry *parent,
struct debugfs_blob_wrapper *blob) struct debugfs_blob_wrapper *blob)
...@@ -348,6 +358,13 @@ static inline ssize_t debugfs_write_file_bool(struct file *file, ...@@ -348,6 +358,13 @@ static inline ssize_t debugfs_write_file_bool(struct file *file,
return -ENODEV; return -ENODEV;
} }
static inline ssize_t debugfs_read_file_str(struct file *file,
char __user *user_buf,
size_t count, loff_t *ppos)
{
return -ENODEV;
}
#endif #endif
/** /**
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
#ifndef _LINUX_KCOV_H #ifndef _LINUX_KCOV_H
#define _LINUX_KCOV_H #define _LINUX_KCOV_H
#include <linux/sched.h>
#include <uapi/linux/kcov.h> #include <uapi/linux/kcov.h>
struct task_struct; struct task_struct;
......
...@@ -20,7 +20,6 @@ void psi_task_change(struct task_struct *task, int clear, int set); ...@@ -20,7 +20,6 @@ void psi_task_change(struct task_struct *task, int clear, int set);
void psi_task_switch(struct task_struct *prev, struct task_struct *next, void psi_task_switch(struct task_struct *prev, struct task_struct *next,
bool sleep); bool sleep);
void psi_memstall_tick(struct task_struct *task, int cpu);
void psi_memstall_enter(unsigned long *flags); void psi_memstall_enter(unsigned long *flags);
void psi_memstall_leave(unsigned long *flags); void psi_memstall_leave(unsigned long *flags);
......
...@@ -50,9 +50,10 @@ enum psi_states { ...@@ -50,9 +50,10 @@ enum psi_states {
PSI_MEM_SOME, PSI_MEM_SOME,
PSI_MEM_FULL, PSI_MEM_FULL,
PSI_CPU_SOME, PSI_CPU_SOME,
PSI_CPU_FULL,
/* Only per-CPU, to weigh the CPU in the global average: */ /* Only per-CPU, to weigh the CPU in the global average: */
PSI_NONIDLE, PSI_NONIDLE,
NR_PSI_STATES = 6, NR_PSI_STATES = 7,
}; };
enum psi_aggregators { enum psi_aggregators {
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
#include <linux/pid.h> #include <linux/pid.h>
#include <linux/sem.h> #include <linux/sem.h>
#include <linux/shm.h> #include <linux/shm.h>
#include <linux/kcov.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/plist.h> #include <linux/plist.h>
#include <linux/hrtimer.h> #include <linux/hrtimer.h>
...@@ -985,6 +984,7 @@ struct task_struct { ...@@ -985,6 +984,7 @@ struct task_struct {
/* Signal handlers: */ /* Signal handlers: */
struct signal_struct *signal; struct signal_struct *signal;
struct sighand_struct __rcu *sighand; struct sighand_struct __rcu *sighand;
struct sigqueue *sigqueue_cache;
sigset_t blocked; sigset_t blocked;
sigset_t real_blocked; sigset_t real_blocked;
/* Restored if set_restore_sigmask() was used: */ /* Restored if set_restore_sigmask() was used: */
...@@ -1101,7 +1101,7 @@ struct task_struct { ...@@ -1101,7 +1101,7 @@ struct task_struct {
#ifdef CONFIG_CPUSETS #ifdef CONFIG_CPUSETS
/* Protected by ->alloc_lock: */ /* Protected by ->alloc_lock: */
nodemask_t mems_allowed; nodemask_t mems_allowed;
/* Seqence number to catch updates: */ /* Sequence number to catch updates: */
seqcount_spinlock_t mems_allowed_seq; seqcount_spinlock_t mems_allowed_seq;
int cpuset_mem_spread_rotor; int cpuset_mem_spread_rotor;
int cpuset_slab_spread_rotor; int cpuset_slab_spread_rotor;
......
...@@ -26,10 +26,11 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, ...@@ -26,10 +26,11 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
enum { sysctl_hung_task_timeout_secs = 0 }; enum { sysctl_hung_task_timeout_secs = 0 };
#endif #endif
extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first;
enum sched_tunable_scaling { enum sched_tunable_scaling {
SCHED_TUNABLESCALING_NONE, SCHED_TUNABLESCALING_NONE,
...@@ -37,7 +38,7 @@ enum sched_tunable_scaling { ...@@ -37,7 +38,7 @@ enum sched_tunable_scaling {
SCHED_TUNABLESCALING_LINEAR, SCHED_TUNABLESCALING_LINEAR,
SCHED_TUNABLESCALING_END, SCHED_TUNABLESCALING_END,
}; };
extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; extern unsigned int sysctl_sched_tunable_scaling;
extern unsigned int sysctl_numa_balancing_scan_delay; extern unsigned int sysctl_numa_balancing_scan_delay;
extern unsigned int sysctl_numa_balancing_scan_period_min; extern unsigned int sysctl_numa_balancing_scan_period_min;
...@@ -48,8 +49,8 @@ extern unsigned int sysctl_numa_balancing_scan_size; ...@@ -48,8 +49,8 @@ extern unsigned int sysctl_numa_balancing_scan_size;
extern __read_mostly unsigned int sysctl_sched_migration_cost; extern __read_mostly unsigned int sysctl_sched_migration_cost;
extern __read_mostly unsigned int sysctl_sched_nr_migrate; extern __read_mostly unsigned int sysctl_sched_nr_migrate;
int sched_proc_update_handler(struct ctl_table *table, int write, extern int sysctl_resched_latency_warn_ms;
void *buffer, size_t *length, loff_t *ppos); extern int sysctl_resched_latency_warn_once;
#endif #endif
/* /*
......
...@@ -266,6 +266,7 @@ static inline void init_sigpending(struct sigpending *sig) ...@@ -266,6 +266,7 @@ static inline void init_sigpending(struct sigpending *sig)
} }
extern void flush_sigqueue(struct sigpending *queue); extern void flush_sigqueue(struct sigpending *queue);
extern void exit_task_sigqueue_cache(struct task_struct *tsk);
/* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
static inline int valid_signal(unsigned long sig) static inline int valid_signal(unsigned long sig)
......
...@@ -102,6 +102,16 @@ struct ptrace_syscall_info { ...@@ -102,6 +102,16 @@ struct ptrace_syscall_info {
}; };
}; };
#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f
struct ptrace_rseq_configuration {
__u64 rseq_abi_pointer;
__u32 rseq_abi_size;
__u32 signature;
__u32 flags;
__u32 pad;
};
/* /*
* These values are stored in task->ptrace_message * These values are stored in task->ptrace_message
* by tracehook_report_syscall_* to describe the current syscall-stop. * by tracehook_report_syscall_* to describe the current syscall-stop.
......
...@@ -63,6 +63,7 @@ struct cpuhp_cpu_state { ...@@ -63,6 +63,7 @@ struct cpuhp_cpu_state {
bool rollback; bool rollback;
bool single; bool single;
bool bringup; bool bringup;
int cpu;
struct hlist_node *node; struct hlist_node *node;
struct hlist_node *last; struct hlist_node *last;
enum cpuhp_state cb_state; enum cpuhp_state cb_state;
...@@ -135,6 +136,11 @@ static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state) ...@@ -135,6 +136,11 @@ static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state)
return cpuhp_hp_states + state; return cpuhp_hp_states + state;
} }
static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step)
{
return bringup ? !step->startup.single : !step->teardown.single;
}
/** /**
* cpuhp_invoke_callback _ Invoke the callbacks for a given state * cpuhp_invoke_callback _ Invoke the callbacks for a given state
* @cpu: The cpu for which the callback should be invoked * @cpu: The cpu for which the callback should be invoked
...@@ -157,26 +163,24 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, ...@@ -157,26 +163,24 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
if (st->fail == state) { if (st->fail == state) {
st->fail = CPUHP_INVALID; st->fail = CPUHP_INVALID;
if (!(bringup ? step->startup.single : step->teardown.single))
return 0;
return -EAGAIN; return -EAGAIN;
} }
if (cpuhp_step_empty(bringup, step)) {
WARN_ON_ONCE(1);
return 0;
}
if (!step->multi_instance) { if (!step->multi_instance) {
WARN_ON_ONCE(lastp && *lastp); WARN_ON_ONCE(lastp && *lastp);
cb = bringup ? step->startup.single : step->teardown.single; cb = bringup ? step->startup.single : step->teardown.single;
if (!cb)
return 0;
trace_cpuhp_enter(cpu, st->target, state, cb); trace_cpuhp_enter(cpu, st->target, state, cb);
ret = cb(cpu); ret = cb(cpu);
trace_cpuhp_exit(cpu, st->state, state, ret); trace_cpuhp_exit(cpu, st->state, state, ret);
return ret; return ret;
} }
cbm = bringup ? step->startup.multi : step->teardown.multi; cbm = bringup ? step->startup.multi : step->teardown.multi;
if (!cbm)
return 0;
/* Single invocation for instance add/remove */ /* Single invocation for instance add/remove */
if (node) { if (node) {
...@@ -461,13 +465,16 @@ static inline enum cpuhp_state ...@@ -461,13 +465,16 @@ static inline enum cpuhp_state
cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target) cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
{ {
enum cpuhp_state prev_state = st->state; enum cpuhp_state prev_state = st->state;
bool bringup = st->state < target;
st->rollback = false; st->rollback = false;
st->last = NULL; st->last = NULL;
st->target = target; st->target = target;
st->single = false; st->single = false;
st->bringup = st->state < target; st->bringup = bringup;
if (cpu_dying(st->cpu) != !bringup)
set_cpu_dying(st->cpu, !bringup);
return prev_state; return prev_state;
} }
...@@ -475,6 +482,17 @@ cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target) ...@@ -475,6 +482,17 @@ cpuhp_set_state(struct cpuhp_cpu_state *st, enum cpuhp_state target)
static inline void static inline void
cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
{ {
bool bringup = !st->bringup;
st->target = prev_state;
/*
* Already rolling back. No need invert the bringup value or to change
* the current state.
*/
if (st->rollback)
return;
st->rollback = true; st->rollback = true;
/* /*
...@@ -488,8 +506,9 @@ cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) ...@@ -488,8 +506,9 @@ cpuhp_reset_state(struct cpuhp_cpu_state *st, enum cpuhp_state prev_state)
st->state++; st->state++;
} }
st->target = prev_state; st->bringup = bringup;
st->bringup = !st->bringup; if (cpu_dying(st->cpu) != !bringup)
set_cpu_dying(st->cpu, !bringup);
} }
/* Regular hotplug invocation of the AP hotplug thread */ /* Regular hotplug invocation of the AP hotplug thread */
...@@ -591,10 +610,53 @@ static int finish_cpu(unsigned int cpu) ...@@ -591,10 +610,53 @@ static int finish_cpu(unsigned int cpu)
* Hotplug state machine related functions * Hotplug state machine related functions
*/ */
static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st) /*
* Get the next state to run. Empty ones will be skipped. Returns true if a
* state must be run.
*
* st->state will be modified ahead of time, to match state_to_run, as if it
* has already ran.
*/
static bool cpuhp_next_state(bool bringup,
enum cpuhp_state *state_to_run,
struct cpuhp_cpu_state *st,
enum cpuhp_state target)
{
do {
if (bringup) {
if (st->state >= target)
return false;
*state_to_run = ++st->state;
} else {
if (st->state <= target)
return false;
*state_to_run = st->state--;
}
if (!cpuhp_step_empty(bringup, cpuhp_get_step(*state_to_run)))
break;
} while (true);
return true;
}
static int cpuhp_invoke_callback_range(bool bringup,
unsigned int cpu,
struct cpuhp_cpu_state *st,
enum cpuhp_state target)
{ {
for (st->state--; st->state > st->target; st->state--) enum cpuhp_state state;
cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); int err = 0;
while (cpuhp_next_state(bringup, &state, st, target)) {
err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL);
if (err)
break;
}
return err;
} }
static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st) static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st)
...@@ -617,16 +679,12 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, ...@@ -617,16 +679,12 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
enum cpuhp_state prev_state = st->state; enum cpuhp_state prev_state = st->state;
int ret = 0; int ret = 0;
while (st->state < target) { ret = cpuhp_invoke_callback_range(true, cpu, st, target);
st->state++; if (ret) {
ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); cpuhp_reset_state(st, prev_state);
if (ret) { if (can_rollback_cpu(st))
if (can_rollback_cpu(st)) { WARN_ON(cpuhp_invoke_callback_range(false, cpu, st,
st->target = prev_state; prev_state));
undo_cpu_up(cpu, st);
}
break;
}
} }
return ret; return ret;
} }
...@@ -640,6 +698,7 @@ static void cpuhp_create(unsigned int cpu) ...@@ -640,6 +698,7 @@ static void cpuhp_create(unsigned int cpu)
init_completion(&st->done_up); init_completion(&st->done_up);
init_completion(&st->done_down); init_completion(&st->done_down);
st->cpu = cpu;
} }
static int cpuhp_should_run(unsigned int cpu) static int cpuhp_should_run(unsigned int cpu)
...@@ -690,17 +749,9 @@ static void cpuhp_thread_fun(unsigned int cpu) ...@@ -690,17 +749,9 @@ static void cpuhp_thread_fun(unsigned int cpu)
state = st->cb_state; state = st->cb_state;
st->should_run = false; st->should_run = false;
} else { } else {
if (bringup) { st->should_run = cpuhp_next_state(bringup, &state, st, st->target);
st->state++; if (!st->should_run)
state = st->state; goto end;
st->should_run = (st->state < st->target);
WARN_ON_ONCE(st->state > st->target);
} else {
state = st->state;
st->state--;
st->should_run = (st->state > st->target);
WARN_ON_ONCE(st->state < st->target);
}
} }
WARN_ON_ONCE(!cpuhp_is_ap_state(state)); WARN_ON_ONCE(!cpuhp_is_ap_state(state));
...@@ -728,6 +779,7 @@ static void cpuhp_thread_fun(unsigned int cpu) ...@@ -728,6 +779,7 @@ static void cpuhp_thread_fun(unsigned int cpu)
st->should_run = false; st->should_run = false;
} }
end:
cpuhp_lock_release(bringup); cpuhp_lock_release(bringup);
lockdep_release_cpus_lock(); lockdep_release_cpus_lock();
...@@ -881,19 +933,18 @@ static int take_cpu_down(void *_param) ...@@ -881,19 +933,18 @@ static int take_cpu_down(void *_param)
return err; return err;
/* /*
* We get here while we are in CPUHP_TEARDOWN_CPU state and we must not * Must be called from CPUHP_TEARDOWN_CPU, which means, as we are going
* do this step again. * down, that the current state is CPUHP_TEARDOWN_CPU - 1.
*/ */
WARN_ON(st->state != CPUHP_TEARDOWN_CPU); WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1));
st->state--;
/* Invoke the former CPU_DYING callbacks */ /* Invoke the former CPU_DYING callbacks */
for (; st->state > target; st->state--) { ret = cpuhp_invoke_callback_range(false, cpu, st, target);
ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
/* /*
* DYING must not fail! * DYING must not fail!
*/ */
WARN_ON_ONCE(ret); WARN_ON_ONCE(ret);
}
/* Give up timekeeping duties */ /* Give up timekeeping duties */
tick_handover_do_timer(); tick_handover_do_timer();
...@@ -975,27 +1026,22 @@ void cpuhp_report_idle_dead(void) ...@@ -975,27 +1026,22 @@ void cpuhp_report_idle_dead(void)
cpuhp_complete_idle_dead, st, 0); cpuhp_complete_idle_dead, st, 0);
} }
static void undo_cpu_down(unsigned int cpu, struct cpuhp_cpu_state *st)
{
for (st->state++; st->state < st->target; st->state++)
cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL);
}
static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
enum cpuhp_state target) enum cpuhp_state target)
{ {
enum cpuhp_state prev_state = st->state; enum cpuhp_state prev_state = st->state;
int ret = 0; int ret = 0;
for (; st->state > target; st->state--) { ret = cpuhp_invoke_callback_range(false, cpu, st, target);
ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); if (ret) {
if (ret) {
st->target = prev_state; cpuhp_reset_state(st, prev_state);
if (st->state < prev_state)
undo_cpu_down(cpu, st); if (st->state < prev_state)
break; WARN_ON(cpuhp_invoke_callback_range(true, cpu, st,
} prev_state));
} }
return ret; return ret;
} }
...@@ -1045,9 +1091,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, ...@@ -1045,9 +1091,13 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
* to do the further cleanups. * to do the further cleanups.
*/ */
ret = cpuhp_down_callbacks(cpu, st, target); ret = cpuhp_down_callbacks(cpu, st, target);
if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) { if (ret && st->state < prev_state) {
cpuhp_reset_state(st, prev_state); if (st->state == CPUHP_TEARDOWN_CPU) {
__cpuhp_kick_ap(st); cpuhp_reset_state(st, prev_state);
__cpuhp_kick_ap(st);
} else {
WARN(1, "DEAD callback error for CPU%d", cpu);
}
} }
out: out:
...@@ -1164,14 +1214,12 @@ void notify_cpu_starting(unsigned int cpu) ...@@ -1164,14 +1214,12 @@ void notify_cpu_starting(unsigned int cpu)
rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ rcu_cpu_starting(cpu); /* Enables RCU usage on this CPU. */
cpumask_set_cpu(cpu, &cpus_booted_once_mask); cpumask_set_cpu(cpu, &cpus_booted_once_mask);
while (st->state < target) { ret = cpuhp_invoke_callback_range(true, cpu, st, target);
st->state++;
ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); /*
/* * STARTING must not fail!
* STARTING must not fail! */
*/ WARN_ON_ONCE(ret);
WARN_ON_ONCE(ret);
}
} }
/* /*
...@@ -1777,8 +1825,7 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup, ...@@ -1777,8 +1825,7 @@ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup,
* If there's nothing to do, we done. * If there's nothing to do, we done.
* Relies on the union for multi_instance. * Relies on the union for multi_instance.
*/ */
if ((bringup && !sp->startup.single) || if (cpuhp_step_empty(bringup, sp))
(!bringup && !sp->teardown.single))
return 0; return 0;
/* /*
* The non AP bound callbacks can fail on bringup. On teardown * The non AP bound callbacks can fail on bringup. On teardown
...@@ -2207,6 +2254,11 @@ static ssize_t write_cpuhp_fail(struct device *dev, ...@@ -2207,6 +2254,11 @@ static ssize_t write_cpuhp_fail(struct device *dev,
if (ret) if (ret)
return ret; return ret;
if (fail == CPUHP_INVALID) {
st->fail = fail;
return count;
}
if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE) if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
return -EINVAL; return -EINVAL;
...@@ -2216,6 +2268,15 @@ static ssize_t write_cpuhp_fail(struct device *dev, ...@@ -2216,6 +2268,15 @@ static ssize_t write_cpuhp_fail(struct device *dev,
if (cpuhp_is_atomic_state(fail)) if (cpuhp_is_atomic_state(fail))
return -EINVAL; return -EINVAL;
/*
* DEAD callbacks cannot fail...
* ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter
* triggering STARTING callbacks, a failure in this state would
* hinder rollback.
*/
if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU)
return -EINVAL;
/* /*
* Cannot fail anything that doesn't have callbacks. * Cannot fail anything that doesn't have callbacks.
*/ */
...@@ -2460,6 +2521,9 @@ EXPORT_SYMBOL(__cpu_present_mask); ...@@ -2460,6 +2521,9 @@ EXPORT_SYMBOL(__cpu_present_mask);
struct cpumask __cpu_active_mask __read_mostly; struct cpumask __cpu_active_mask __read_mostly;
EXPORT_SYMBOL(__cpu_active_mask); EXPORT_SYMBOL(__cpu_active_mask);
struct cpumask __cpu_dying_mask __read_mostly;
EXPORT_SYMBOL(__cpu_dying_mask);
atomic_t __num_online_cpus __read_mostly; atomic_t __num_online_cpus __read_mostly;
EXPORT_SYMBOL(__num_online_cpus); EXPORT_SYMBOL(__num_online_cpus);
......
...@@ -162,6 +162,7 @@ static void __exit_signal(struct task_struct *tsk) ...@@ -162,6 +162,7 @@ static void __exit_signal(struct task_struct *tsk)
flush_sigqueue(&sig->shared_pending); flush_sigqueue(&sig->shared_pending);
tty_kref_put(tty); tty_kref_put(tty);
} }
exit_task_sigqueue_cache(tsk);
} }
static void delayed_put_task_struct(struct rcu_head *rhp) static void delayed_put_task_struct(struct rcu_head *rhp)
......
...@@ -2009,6 +2009,7 @@ static __latent_entropy struct task_struct *copy_process( ...@@ -2009,6 +2009,7 @@ static __latent_entropy struct task_struct *copy_process(
spin_lock_init(&p->alloc_lock); spin_lock_init(&p->alloc_lock);
init_sigpending(&p->pending); init_sigpending(&p->pending);
p->sigqueue_cache = NULL;
p->utime = p->stime = p->gtime = 0; p->utime = p->stime = p->gtime = 0;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
......
...@@ -84,6 +84,25 @@ static inline struct kthread *to_kthread(struct task_struct *k) ...@@ -84,6 +84,25 @@ static inline struct kthread *to_kthread(struct task_struct *k)
return (__force void *)k->set_child_tid; return (__force void *)k->set_child_tid;
} }
/*
* Variant of to_kthread() that doesn't assume @p is a kthread.
*
* Per construction; when:
*
* (p->flags & PF_KTHREAD) && p->set_child_tid
*
* the task is both a kthread and struct kthread is persistent. However
* PF_KTHREAD on it's own is not, kernel_thread() can exec() (See umh.c and
* begin_new_exec()).
*/
static inline struct kthread *__to_kthread(struct task_struct *p)
{
void *kthread = (__force void *)p->set_child_tid;
if (kthread && !(p->flags & PF_KTHREAD))
kthread = NULL;
return kthread;
}
void free_kthread_struct(struct task_struct *k) void free_kthread_struct(struct task_struct *k)
{ {
struct kthread *kthread; struct kthread *kthread;
...@@ -168,8 +187,9 @@ EXPORT_SYMBOL_GPL(kthread_freezable_should_stop); ...@@ -168,8 +187,9 @@ EXPORT_SYMBOL_GPL(kthread_freezable_should_stop);
*/ */
void *kthread_func(struct task_struct *task) void *kthread_func(struct task_struct *task)
{ {
if (task->flags & PF_KTHREAD) struct kthread *kthread = __to_kthread(task);
return to_kthread(task)->threadfn; if (kthread)
return kthread->threadfn;
return NULL; return NULL;
} }
EXPORT_SYMBOL_GPL(kthread_func); EXPORT_SYMBOL_GPL(kthread_func);
...@@ -199,10 +219,11 @@ EXPORT_SYMBOL_GPL(kthread_data); ...@@ -199,10 +219,11 @@ EXPORT_SYMBOL_GPL(kthread_data);
*/ */
void *kthread_probe_data(struct task_struct *task) void *kthread_probe_data(struct task_struct *task)
{ {
struct kthread *kthread = to_kthread(task); struct kthread *kthread = __to_kthread(task);
void *data = NULL; void *data = NULL;
copy_from_kernel_nofault(&data, &kthread->data, sizeof(data)); if (kthread)
copy_from_kernel_nofault(&data, &kthread->data, sizeof(data));
return data; return data;
} }
...@@ -514,9 +535,9 @@ void kthread_set_per_cpu(struct task_struct *k, int cpu) ...@@ -514,9 +535,9 @@ void kthread_set_per_cpu(struct task_struct *k, int cpu)
set_bit(KTHREAD_IS_PER_CPU, &kthread->flags); set_bit(KTHREAD_IS_PER_CPU, &kthread->flags);
} }
bool kthread_is_per_cpu(struct task_struct *k) bool kthread_is_per_cpu(struct task_struct *p)
{ {
struct kthread *kthread = to_kthread(k); struct kthread *kthread = __to_kthread(p);
if (!kthread) if (!kthread)
return false; return false;
......
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include <linux/cn_proc.h> #include <linux/cn_proc.h>
#include <linux/compat.h> #include <linux/compat.h>
#include <linux/sched/signal.h> #include <linux/sched/signal.h>
#include <linux/minmax.h>
#include <asm/syscall.h> /* for syscall_get_* */ #include <asm/syscall.h> /* for syscall_get_* */
...@@ -779,6 +780,24 @@ static int ptrace_peek_siginfo(struct task_struct *child, ...@@ -779,6 +780,24 @@ static int ptrace_peek_siginfo(struct task_struct *child,
return ret; return ret;
} }
#ifdef CONFIG_RSEQ
static long ptrace_get_rseq_configuration(struct task_struct *task,
unsigned long size, void __user *data)
{
struct ptrace_rseq_configuration conf = {
.rseq_abi_pointer = (u64)(uintptr_t)task->rseq,
.rseq_abi_size = sizeof(*task->rseq),
.signature = task->rseq_sig,
.flags = 0,
};
size = min_t(unsigned long, size, sizeof(conf));
if (copy_to_user(data, &conf, size))
return -EFAULT;
return sizeof(conf);
}
#endif
#ifdef PTRACE_SINGLESTEP #ifdef PTRACE_SINGLESTEP
#define is_singlestep(request) ((request) == PTRACE_SINGLESTEP) #define is_singlestep(request) ((request) == PTRACE_SINGLESTEP)
#else #else
...@@ -1222,6 +1241,12 @@ int ptrace_request(struct task_struct *child, long request, ...@@ -1222,6 +1241,12 @@ int ptrace_request(struct task_struct *child, long request,
ret = seccomp_get_metadata(child, addr, datavp); ret = seccomp_get_metadata(child, addr, datavp);
break; break;
#ifdef CONFIG_RSEQ
case PTRACE_GET_RSEQ_CONFIGURATION:
ret = ptrace_get_rseq_configuration(child, addr, datavp);
break;
#endif
default: default:
break; break;
} }
......
...@@ -84,13 +84,20 @@ ...@@ -84,13 +84,20 @@
static int rseq_update_cpu_id(struct task_struct *t) static int rseq_update_cpu_id(struct task_struct *t)
{ {
u32 cpu_id = raw_smp_processor_id(); u32 cpu_id = raw_smp_processor_id();
struct rseq __user *rseq = t->rseq;
if (put_user(cpu_id, &t->rseq->cpu_id_start)) if (!user_write_access_begin(rseq, sizeof(*rseq)))
return -EFAULT; goto efault;
if (put_user(cpu_id, &t->rseq->cpu_id)) unsafe_put_user(cpu_id, &rseq->cpu_id_start, efault_end);
return -EFAULT; unsafe_put_user(cpu_id, &rseq->cpu_id, efault_end);
user_write_access_end();
trace_rseq_update(t); trace_rseq_update(t);
return 0; return 0;
efault_end:
user_write_access_end();
efault:
return -EFAULT;
} }
static int rseq_reset_rseq_cpu_id(struct task_struct *t) static int rseq_reset_rseq_cpu_id(struct task_struct *t)
...@@ -120,8 +127,13 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) ...@@ -120,8 +127,13 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs)
u32 sig; u32 sig;
int ret; int ret;
#ifdef CONFIG_64BIT
if (get_user(ptr, &t->rseq->rseq_cs.ptr64))
return -EFAULT;
#else
if (copy_from_user(&ptr, &t->rseq->rseq_cs.ptr64, sizeof(ptr))) if (copy_from_user(&ptr, &t->rseq->rseq_cs.ptr64, sizeof(ptr)))
return -EFAULT; return -EFAULT;
#endif
if (!ptr) { if (!ptr) {
memset(rseq_cs, 0, sizeof(*rseq_cs)); memset(rseq_cs, 0, sizeof(*rseq_cs));
return 0; return 0;
...@@ -204,9 +216,13 @@ static int clear_rseq_cs(struct task_struct *t) ...@@ -204,9 +216,13 @@ static int clear_rseq_cs(struct task_struct *t)
* *
* Set rseq_cs to NULL. * Set rseq_cs to NULL.
*/ */
#ifdef CONFIG_64BIT
return put_user(0UL, &t->rseq->rseq_cs.ptr64);
#else
if (clear_user(&t->rseq->rseq_cs.ptr64, sizeof(t->rseq->rseq_cs.ptr64))) if (clear_user(&t->rseq->rseq_cs.ptr64, sizeof(t->rseq->rseq_cs.ptr64)))
return -EFAULT; return -EFAULT;
return 0; return 0;
#endif
} }
/* /*
...@@ -266,8 +282,6 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) ...@@ -266,8 +282,6 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
if (unlikely(t->flags & PF_EXITING)) if (unlikely(t->flags & PF_EXITING))
return; return;
if (unlikely(!access_ok(t->rseq, sizeof(*t->rseq))))
goto error;
ret = rseq_ip_fixup(regs); ret = rseq_ip_fixup(regs);
if (unlikely(ret < 0)) if (unlikely(ret < 0))
goto error; goto error;
...@@ -294,8 +308,7 @@ void rseq_syscall(struct pt_regs *regs) ...@@ -294,8 +308,7 @@ void rseq_syscall(struct pt_regs *regs)
if (!t->rseq) if (!t->rseq)
return; return;
if (!access_ok(t->rseq, sizeof(*t->rseq)) || if (rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
force_sig(SIGSEGV); force_sig(SIGSEGV);
} }
......
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
* Otherwise it tries to create a semi stable clock from a mixture of other * Otherwise it tries to create a semi stable clock from a mixture of other
* clocks, including: * clocks, including:
* *
* - GTOD (clock monotomic) * - GTOD (clock monotonic)
* - sched_clock() * - sched_clock()
* - explicit idle events * - explicit idle events
* *
......
This diff is collapsed.
...@@ -104,7 +104,7 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, ...@@ -104,7 +104,7 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu,
/* /*
* We allow index == CPUACCT_STAT_NSTATS here to read * We allow index == CPUACCT_STAT_NSTATS here to read
* the sum of suages. * the sum of usages.
*/ */
BUG_ON(index > CPUACCT_STAT_NSTATS); BUG_ON(index > CPUACCT_STAT_NSTATS);
......
...@@ -466,7 +466,7 @@ static void sugov_work(struct kthread_work *work) ...@@ -466,7 +466,7 @@ static void sugov_work(struct kthread_work *work)
/* /*
* Hold sg_policy->update_lock shortly to handle the case where: * Hold sg_policy->update_lock shortly to handle the case where:
* incase sg_policy->next_freq is read here, and then updated by * in case sg_policy->next_freq is read here, and then updated by
* sugov_deferred_update() just before work_in_progress is set to false * sugov_deferred_update() just before work_in_progress is set to false
* here, we may miss queueing the new update. * here, we may miss queueing the new update.
* *
......
...@@ -77,7 +77,7 @@ static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p, ...@@ -77,7 +77,7 @@ static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p,
* When looking at the vector, we need to read the counter, * When looking at the vector, we need to read the counter,
* do a memory barrier, then read the mask. * do a memory barrier, then read the mask.
* *
* Note: This is still all racey, but we can deal with it. * Note: This is still all racy, but we can deal with it.
* Ideally, we only want to look at masks that are set. * Ideally, we only want to look at masks that are set.
* *
* If a mask is not set, then the only thing wrong is that we * If a mask is not set, then the only thing wrong is that we
...@@ -186,7 +186,7 @@ int cpupri_find_fitness(struct cpupri *cp, struct task_struct *p, ...@@ -186,7 +186,7 @@ int cpupri_find_fitness(struct cpupri *cp, struct task_struct *p,
* The cost of this trade-off is not entirely clear and will probably * The cost of this trade-off is not entirely clear and will probably
* be good for some workloads and bad for others. * be good for some workloads and bad for others.
* *
* The main idea here is that if some CPUs were overcommitted, we try * The main idea here is that if some CPUs were over-committed, we try
* to spread which is what the scheduler traditionally did. Sys admins * to spread which is what the scheduler traditionally did. Sys admins
* must do proper RT planning to avoid overloading the system if they * must do proper RT planning to avoid overloading the system if they
* really care. * really care.
......
...@@ -563,7 +563,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, ...@@ -563,7 +563,7 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev,
/* /*
* If either stime or utime are 0, assume all runtime is userspace. * If either stime or utime are 0, assume all runtime is userspace.
* Once a task gets some ticks, the monotonicy code at 'update:' * Once a task gets some ticks, the monotonicity code at 'update:'
* will ensure things converge to the observed ratio. * will ensure things converge to the observed ratio.
*/ */
if (stime == 0) { if (stime == 0) {
......
...@@ -245,7 +245,7 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw) ...@@ -245,7 +245,7 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw)
p->dl.dl_non_contending = 0; p->dl.dl_non_contending = 0;
/* /*
* If the timer handler is currently running and the * If the timer handler is currently running and the
* timer cannot be cancelled, inactive_task_timer() * timer cannot be canceled, inactive_task_timer()
* will see that dl_not_contending is not set, and * will see that dl_not_contending is not set, and
* will not touch the rq's active utilization, * will not touch the rq's active utilization,
* so we are still safe. * so we are still safe.
...@@ -267,7 +267,7 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw) ...@@ -267,7 +267,7 @@ static void dl_change_utilization(struct task_struct *p, u64 new_bw)
* fires. * fires.
* *
* If the task wakes up again before the inactive timer fires, * If the task wakes up again before the inactive timer fires,
* the timer is cancelled, whereas if the task wakes up after the * the timer is canceled, whereas if the task wakes up after the
* inactive timer fired (and running_bw has been decreased) the * inactive timer fired (and running_bw has been decreased) the
* task's utilization has to be added to running_bw again. * task's utilization has to be added to running_bw again.
* A flag in the deadline scheduling entity (dl_non_contending) * A flag in the deadline scheduling entity (dl_non_contending)
...@@ -385,7 +385,7 @@ static void task_contending(struct sched_dl_entity *dl_se, int flags) ...@@ -385,7 +385,7 @@ static void task_contending(struct sched_dl_entity *dl_se, int flags)
dl_se->dl_non_contending = 0; dl_se->dl_non_contending = 0;
/* /*
* If the timer handler is currently running and the * If the timer handler is currently running and the
* timer cannot be cancelled, inactive_task_timer() * timer cannot be canceled, inactive_task_timer()
* will see that dl_not_contending is not set, and * will see that dl_not_contending is not set, and
* will not touch the rq's active utilization, * will not touch the rq's active utilization,
* so we are still safe. * so we are still safe.
...@@ -1206,7 +1206,7 @@ extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); ...@@ -1206,7 +1206,7 @@ extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq);
* Since rq->dl.running_bw and rq->dl.this_bw contain utilizations * Since rq->dl.running_bw and rq->dl.this_bw contain utilizations
* multiplied by 2^BW_SHIFT, the result has to be shifted right by * multiplied by 2^BW_SHIFT, the result has to be shifted right by
* BW_SHIFT. * BW_SHIFT.
* Since rq->dl.bw_ratio contains 1 / Umax multipled by 2^RATIO_SHIFT, * Since rq->dl.bw_ratio contains 1 / Umax multiplied by 2^RATIO_SHIFT,
* dl_bw is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT. * dl_bw is multiped by rq->dl.bw_ratio and shifted right by RATIO_SHIFT.
* Since delta is a 64 bit variable, to have an overflow its value * Since delta is a 64 bit variable, to have an overflow its value
* should be larger than 2^(64 - 20 - 8), which is more than 64 seconds. * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds.
...@@ -1737,7 +1737,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused ...@@ -1737,7 +1737,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
p->dl.dl_non_contending = 0; p->dl.dl_non_contending = 0;
/* /*
* If the timer handler is currently running and the * If the timer handler is currently running and the
* timer cannot be cancelled, inactive_task_timer() * timer cannot be canceled, inactive_task_timer()
* will see that dl_not_contending is not set, and * will see that dl_not_contending is not set, and
* will not touch the rq's active utilization, * will not touch the rq's active utilization,
* so we are still safe. * so we are still safe.
...@@ -2745,7 +2745,7 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr) ...@@ -2745,7 +2745,7 @@ void __getparam_dl(struct task_struct *p, struct sched_attr *attr)
/* /*
* Default limits for DL period; on the top end we guard against small util * Default limits for DL period; on the top end we guard against small util
* tasks still getting rediculous long effective runtimes, on the bottom end we * tasks still getting ridiculously long effective runtimes, on the bottom end we
* guard against timer DoS. * guard against timer DoS.
*/ */
unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */ unsigned int sysctl_sched_dl_period_max = 1 << 22; /* ~4 seconds */
......
This diff is collapsed.
This diff is collapsed.
...@@ -27,7 +27,7 @@ SCHED_FEAT(NEXT_BUDDY, false) ...@@ -27,7 +27,7 @@ SCHED_FEAT(NEXT_BUDDY, false)
SCHED_FEAT(LAST_BUDDY, true) SCHED_FEAT(LAST_BUDDY, true)
/* /*
* Consider buddies to be cache hot, decreases the likelyness of a * Consider buddies to be cache hot, decreases the likeliness of a
* cache buddy being migrated away, increases cache locality. * cache buddy being migrated away, increases cache locality.
*/ */
SCHED_FEAT(CACHE_HOT_BUDDY, true) SCHED_FEAT(CACHE_HOT_BUDDY, true)
...@@ -90,3 +90,8 @@ SCHED_FEAT(WA_BIAS, true) ...@@ -90,3 +90,8 @@ SCHED_FEAT(WA_BIAS, true)
*/ */
SCHED_FEAT(UTIL_EST, true) SCHED_FEAT(UTIL_EST, true)
SCHED_FEAT(UTIL_EST_FASTUP, true) SCHED_FEAT(UTIL_EST_FASTUP, true)
SCHED_FEAT(LATENCY_WARN, false)
SCHED_FEAT(ALT_PERIOD, true)
SCHED_FEAT(BASE_SLICE, true)
...@@ -163,7 +163,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev, ...@@ -163,7 +163,7 @@ static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* *
* NOTE: no locks or semaphores should be used here * NOTE: no locks or semaphores should be used here
* *
* On archs that support TIF_POLLING_NRFLAG, is called with polling * On architectures that support TIF_POLLING_NRFLAG, is called with polling
* set, and it returns with polling set. If it ever stops polling, it * set, and it returns with polling set. If it ever stops polling, it
* must clear the polling bit. * must clear the polling bit.
*/ */
...@@ -199,7 +199,7 @@ static void cpuidle_idle_call(void) ...@@ -199,7 +199,7 @@ static void cpuidle_idle_call(void)
* Suspend-to-idle ("s2idle") is a system state in which all user space * Suspend-to-idle ("s2idle") is a system state in which all user space
* has been frozen, all I/O devices have been suspended and the only * has been frozen, all I/O devices have been suspended and the only
* activity happens here and in interrupts (if any). In that case bypass * activity happens here and in interrupts (if any). In that case bypass
* the cpuidle governor and go stratight for the deepest idle state * the cpuidle governor and go straight for the deepest idle state
* available. Possibly also suspend the local tick and the entire * available. Possibly also suspend the local tick and the entire
* timekeeping to prevent timer interrupts from kicking us out of idle * timekeeping to prevent timer interrupts from kicking us out of idle
* until a proper wakeup interrupt happens. * until a proper wakeup interrupt happens.
...@@ -261,6 +261,12 @@ static void cpuidle_idle_call(void) ...@@ -261,6 +261,12 @@ static void cpuidle_idle_call(void)
static void do_idle(void) static void do_idle(void)
{ {
int cpu = smp_processor_id(); int cpu = smp_processor_id();
/*
* Check if we need to update blocked load
*/
nohz_run_idle_balance(cpu);
/* /*
* If the arch has a polling bit, we maintain an invariant: * If the arch has a polling bit, we maintain an invariant:
* *
......
...@@ -189,7 +189,7 @@ calc_load_n(unsigned long load, unsigned long exp, ...@@ -189,7 +189,7 @@ calc_load_n(unsigned long load, unsigned long exp,
* w:0 1 1 0 0 1 1 0 0 * w:0 1 1 0 0 1 1 0 0
* *
* This ensures we'll fold the old NO_HZ contribution in this window while * This ensures we'll fold the old NO_HZ contribution in this window while
* accumlating the new one. * accumulating the new one.
* *
* - When we wake up from NO_HZ during the window, we push up our * - When we wake up from NO_HZ during the window, we push up our
* contribution, since we effectively move our sample point to a known * contribution, since we effectively move our sample point to a known
......
...@@ -133,7 +133,7 @@ accumulate_sum(u64 delta, struct sched_avg *sa, ...@@ -133,7 +133,7 @@ accumulate_sum(u64 delta, struct sched_avg *sa,
* runnable = running = 0; * runnable = running = 0;
* *
* clause from ___update_load_sum(); this results in * clause from ___update_load_sum(); this results in
* the below usage of @contrib to dissapear entirely, * the below usage of @contrib to disappear entirely,
* so no point in calculating it. * so no point in calculating it.
*/ */
contrib = __accumulate_pelt_segments(periods, contrib = __accumulate_pelt_segments(periods,
......
...@@ -130,7 +130,7 @@ static inline void update_idle_rq_clock_pelt(struct rq *rq) ...@@ -130,7 +130,7 @@ static inline void update_idle_rq_clock_pelt(struct rq *rq)
* Reflecting stolen time makes sense only if the idle * Reflecting stolen time makes sense only if the idle
* phase would be present at max capacity. As soon as the * phase would be present at max capacity. As soon as the
* utilization of a rq has reached the maximum value, it is * utilization of a rq has reached the maximum value, it is
* considered as an always runnig rq without idle time to * considered as an always running rq without idle time to
* steal. This potential idle time is considered as lost in * steal. This potential idle time is considered as lost in
* this case. We keep track of this lost idle time compare to * this case. We keep track of this lost idle time compare to
* rq's clock_task. * rq's clock_task.
......
This diff is collapsed.
...@@ -700,7 +700,7 @@ static void do_balance_runtime(struct rt_rq *rt_rq) ...@@ -700,7 +700,7 @@ static void do_balance_runtime(struct rt_rq *rt_rq)
/* /*
* Either all rqs have inf runtime and there's nothing to steal * Either all rqs have inf runtime and there's nothing to steal
* or __disable_runtime() below sets a specific rq to inf to * or __disable_runtime() below sets a specific rq to inf to
* indicate its been disabled and disalow stealing. * indicate its been disabled and disallow stealing.
*/ */
if (iter->rt_runtime == RUNTIME_INF) if (iter->rt_runtime == RUNTIME_INF)
goto next; goto next;
...@@ -1998,7 +1998,7 @@ static void push_rt_tasks(struct rq *rq) ...@@ -1998,7 +1998,7 @@ static void push_rt_tasks(struct rq *rq)
* *
* Each root domain has its own irq work function that can iterate over * Each root domain has its own irq work function that can iterate over
* all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT * all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT
* tassk must be checked if there's one or many CPUs that are lowering * task must be checked if there's one or many CPUs that are lowering
* their priority, there's a single irq work iterator that will try to * their priority, there's a single irq work iterator that will try to
* push off RT tasks that are waiting to run. * push off RT tasks that are waiting to run.
* *
...@@ -2216,7 +2216,7 @@ static void pull_rt_task(struct rq *this_rq) ...@@ -2216,7 +2216,7 @@ static void pull_rt_task(struct rq *this_rq)
/* /*
* There's a chance that p is higher in priority * There's a chance that p is higher in priority
* than what's currently running on its CPU. * than what's currently running on its CPU.
* This is just that p is wakeing up and hasn't * This is just that p is waking up and hasn't
* had a chance to schedule. We only pull * had a chance to schedule. We only pull
* p if it is lower in priority than the * p if it is lower in priority than the
* current task on the run queue * current task on the run queue
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include <uapi/linux/sched/types.h> #include <uapi/linux/sched/types.h>
#include <linux/binfmts.h> #include <linux/binfmts.h>
#include <linux/bitops.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/compat.h> #include <linux/compat.h>
#include <linux/context_tracking.h> #include <linux/context_tracking.h>
...@@ -57,6 +58,7 @@ ...@@ -57,6 +58,7 @@
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include <linux/profile.h> #include <linux/profile.h>
#include <linux/psi.h> #include <linux/psi.h>
#include <linux/ratelimit.h>
#include <linux/rcupdate_wait.h> #include <linux/rcupdate_wait.h>
#include <linux/security.h> #include <linux/security.h>
#include <linux/stop_machine.h> #include <linux/stop_machine.h>
...@@ -204,6 +206,13 @@ static inline void update_avg(u64 *avg, u64 sample) ...@@ -204,6 +206,13 @@ static inline void update_avg(u64 *avg, u64 sample)
*avg += diff / 8; *avg += diff / 8;
} }
/*
* Shifting a value by an exponent greater *or equal* to the size of said value
* is UB; cap at size-1.
*/
#define shr_bound(val, shift) \
(val >> min_t(typeof(shift), shift, BITS_PER_TYPE(typeof(val)) - 1))
/* /*
* !! For sched_setattr_nocheck() (kernel) only !! * !! For sched_setattr_nocheck() (kernel) only !!
* *
...@@ -963,6 +972,11 @@ struct rq { ...@@ -963,6 +972,11 @@ struct rq {
atomic_t nr_iowait; atomic_t nr_iowait;
#ifdef CONFIG_SCHED_DEBUG
u64 last_seen_need_resched_ns;
int ticks_without_resched;
#endif
#ifdef CONFIG_MEMBARRIER #ifdef CONFIG_MEMBARRIER
int membarrier_state; int membarrier_state;
#endif #endif
...@@ -975,7 +989,6 @@ struct rq { ...@@ -975,7 +989,6 @@ struct rq {
unsigned long cpu_capacity_orig; unsigned long cpu_capacity_orig;
struct callback_head *balance_callback; struct callback_head *balance_callback;
unsigned char balance_push;
unsigned char nohz_idle_balance; unsigned char nohz_idle_balance;
unsigned char idle_balance; unsigned char idle_balance;
...@@ -1147,7 +1160,7 @@ static inline u64 __rq_clock_broken(struct rq *rq) ...@@ -1147,7 +1160,7 @@ static inline u64 __rq_clock_broken(struct rq *rq)
* *
* if (rq-clock_update_flags >= RQCF_UPDATED) * if (rq-clock_update_flags >= RQCF_UPDATED)
* *
* to check if %RQCF_UPADTED is set. It'll never be shifted more than * to check if %RQCF_UPDATED is set. It'll never be shifted more than
* one position though, because the next rq_unpin_lock() will shift it * one position though, because the next rq_unpin_lock() will shift it
* back. * back.
*/ */
...@@ -1206,7 +1219,7 @@ static inline void rq_clock_skip_update(struct rq *rq) ...@@ -1206,7 +1219,7 @@ static inline void rq_clock_skip_update(struct rq *rq)
/* /*
* See rt task throttling, which is the only time a skip * See rt task throttling, which is the only time a skip
* request is cancelled. * request is canceled.
*/ */
static inline void rq_clock_cancel_skipupdate(struct rq *rq) static inline void rq_clock_cancel_skipupdate(struct rq *rq)
{ {
...@@ -1545,22 +1558,20 @@ static inline unsigned int group_first_cpu(struct sched_group *group) ...@@ -1545,22 +1558,20 @@ static inline unsigned int group_first_cpu(struct sched_group *group)
extern int group_balance_cpu(struct sched_group *sg); extern int group_balance_cpu(struct sched_group *sg);
#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) #ifdef CONFIG_SCHED_DEBUG
void register_sched_domain_sysctl(void); void update_sched_domain_debugfs(void);
void dirty_sched_domain_sysctl(int cpu); void dirty_sched_domain_sysctl(int cpu);
void unregister_sched_domain_sysctl(void);
#else #else
static inline void register_sched_domain_sysctl(void) static inline void update_sched_domain_debugfs(void)
{ {
} }
static inline void dirty_sched_domain_sysctl(int cpu) static inline void dirty_sched_domain_sysctl(int cpu)
{ {
} }
static inline void unregister_sched_domain_sysctl(void)
{
}
#endif #endif
extern int sched_update_scaling(void);
extern void flush_smp_call_function_from_idle(void); extern void flush_smp_call_function_from_idle(void);
#else /* !CONFIG_SMP: */ #else /* !CONFIG_SMP: */
...@@ -1853,7 +1864,7 @@ struct sched_class { ...@@ -1853,7 +1864,7 @@ struct sched_class {
/* /*
* The switched_from() call is allowed to drop rq->lock, therefore we * The switched_from() call is allowed to drop rq->lock, therefore we
* cannot assume the switched_from/switched_to pair is serliazed by * cannot assume the switched_from/switched_to pair is serialized by
* rq->lock. They are however serialized by p->pi_lock. * rq->lock. They are however serialized by p->pi_lock.
*/ */
void (*switched_from)(struct rq *this_rq, struct task_struct *task); void (*switched_from)(struct rq *this_rq, struct task_struct *task);
...@@ -2358,7 +2369,7 @@ extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq); ...@@ -2358,7 +2369,7 @@ extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq); extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
extern bool sched_debug_enabled; extern bool sched_debug_verbose;
extern void print_cfs_stats(struct seq_file *m, int cpu); extern void print_cfs_stats(struct seq_file *m, int cpu);
extern void print_rt_stats(struct seq_file *m, int cpu); extern void print_rt_stats(struct seq_file *m, int cpu);
...@@ -2366,6 +2377,8 @@ extern void print_dl_stats(struct seq_file *m, int cpu); ...@@ -2366,6 +2377,8 @@ extern void print_dl_stats(struct seq_file *m, int cpu);
extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq); extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
extern void resched_latency_warn(int cpu, u64 latency);
#ifdef CONFIG_NUMA_BALANCING #ifdef CONFIG_NUMA_BALANCING
extern void extern void
show_numa_stats(struct task_struct *p, struct seq_file *m); show_numa_stats(struct task_struct *p, struct seq_file *m);
...@@ -2373,6 +2386,8 @@ extern void ...@@ -2373,6 +2386,8 @@ extern void
print_numa_stats(struct seq_file *m, int node, unsigned long tsf, print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
unsigned long tpf, unsigned long gsf, unsigned long gpf); unsigned long tpf, unsigned long gsf, unsigned long gpf);
#endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_NUMA_BALANCING */
#else
static inline void resched_latency_warn(int cpu, u64 latency) {}
#endif /* CONFIG_SCHED_DEBUG */ #endif /* CONFIG_SCHED_DEBUG */
extern void init_cfs_rq(struct cfs_rq *cfs_rq); extern void init_cfs_rq(struct cfs_rq *cfs_rq);
...@@ -2385,9 +2400,11 @@ extern void cfs_bandwidth_usage_dec(void); ...@@ -2385,9 +2400,11 @@ extern void cfs_bandwidth_usage_dec(void);
#ifdef CONFIG_NO_HZ_COMMON #ifdef CONFIG_NO_HZ_COMMON
#define NOHZ_BALANCE_KICK_BIT 0 #define NOHZ_BALANCE_KICK_BIT 0
#define NOHZ_STATS_KICK_BIT 1 #define NOHZ_STATS_KICK_BIT 1
#define NOHZ_NEWILB_KICK_BIT 2
#define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT) #define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT)
#define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT) #define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT)
#define NOHZ_NEWILB_KICK BIT(NOHZ_NEWILB_KICK_BIT)
#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK) #define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
...@@ -2398,6 +2415,11 @@ extern void nohz_balance_exit_idle(struct rq *rq); ...@@ -2398,6 +2415,11 @@ extern void nohz_balance_exit_idle(struct rq *rq);
static inline void nohz_balance_exit_idle(struct rq *rq) { } static inline void nohz_balance_exit_idle(struct rq *rq) { }
#endif #endif
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
extern void nohz_run_idle_balance(int cpu);
#else
static inline void nohz_run_idle_balance(int cpu) { }
#endif
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
static inline static inline
...@@ -2437,7 +2459,7 @@ DECLARE_PER_CPU(struct irqtime, cpu_irqtime); ...@@ -2437,7 +2459,7 @@ DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
/* /*
* Returns the irqtime minus the softirq time computed by ksoftirqd. * Returns the irqtime minus the softirq time computed by ksoftirqd.
* Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime * Otherwise ksoftirqd's sum_exec_runtime is subtracted its own runtime
* and never move forward. * and never move forward.
*/ */
static inline u64 irq_time_read(int cpu) static inline u64 irq_time_read(int cpu)
...@@ -2718,5 +2740,12 @@ static inline bool is_per_cpu_kthread(struct task_struct *p) ...@@ -2718,5 +2740,12 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
} }
#endif #endif
void swake_up_all_locked(struct swait_queue_head *q); extern void swake_up_all_locked(struct swait_queue_head *q);
void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
#ifdef CONFIG_PREEMPT_DYNAMIC
extern int preempt_dynamic_mode;
extern int sched_dynamic_mode(const char *str);
extern void sched_dynamic_update(int mode);
#endif
...@@ -74,7 +74,7 @@ static int show_schedstat(struct seq_file *seq, void *v) ...@@ -74,7 +74,7 @@ static int show_schedstat(struct seq_file *seq, void *v)
} }
/* /*
* This itererator needs some explanation. * This iterator needs some explanation.
* It returns 1 for the header position. * It returns 1 for the header position.
* This means 2 is cpu 0. * This means 2 is cpu 0.
* In a hotplugged system some CPUs, including cpu 0, may be missing so we have * In a hotplugged system some CPUs, including cpu 0, may be missing so we have
......
...@@ -84,28 +84,24 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup) ...@@ -84,28 +84,24 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup)
static inline void psi_dequeue(struct task_struct *p, bool sleep) static inline void psi_dequeue(struct task_struct *p, bool sleep)
{ {
int clear = TSK_RUNNING, set = 0; int clear = TSK_RUNNING;
if (static_branch_likely(&psi_disabled)) if (static_branch_likely(&psi_disabled))
return; return;
if (!sleep) { /*
if (p->in_memstall) * A voluntary sleep is a dequeue followed by a task switch. To
clear |= TSK_MEMSTALL; * avoid walking all ancestors twice, psi_task_switch() handles
} else { * TSK_RUNNING and TSK_IOWAIT for us when it moves TSK_ONCPU.
/* * Do nothing here.
* When a task sleeps, schedule() dequeues it before */
* switching to the next one. Merge the clearing of if (sleep)
* TSK_RUNNING and TSK_ONCPU to save an unnecessary return;
* psi_task_change() call in psi_sched_switch().
*/
clear |= TSK_ONCPU;
if (p->in_iowait) if (p->in_memstall)
set |= TSK_IOWAIT; clear |= TSK_MEMSTALL;
}
psi_task_change(p, clear, set); psi_task_change(p, clear, 0);
} }
static inline void psi_ttwu_dequeue(struct task_struct *p) static inline void psi_ttwu_dequeue(struct task_struct *p)
...@@ -144,14 +140,6 @@ static inline void psi_sched_switch(struct task_struct *prev, ...@@ -144,14 +140,6 @@ static inline void psi_sched_switch(struct task_struct *prev,
psi_task_switch(prev, next, sleep); psi_task_switch(prev, next, sleep);
} }
static inline void psi_task_tick(struct rq *rq)
{
if (static_branch_likely(&psi_disabled))
return;
if (unlikely(rq->curr->in_memstall))
psi_memstall_tick(rq->curr, cpu_of(rq));
}
#else /* CONFIG_PSI */ #else /* CONFIG_PSI */
static inline void psi_enqueue(struct task_struct *p, bool wakeup) {} static inline void psi_enqueue(struct task_struct *p, bool wakeup) {}
static inline void psi_dequeue(struct task_struct *p, bool sleep) {} static inline void psi_dequeue(struct task_struct *p, bool sleep) {}
...@@ -159,7 +147,6 @@ static inline void psi_ttwu_dequeue(struct task_struct *p) {} ...@@ -159,7 +147,6 @@ static inline void psi_ttwu_dequeue(struct task_struct *p) {}
static inline void psi_sched_switch(struct task_struct *prev, static inline void psi_sched_switch(struct task_struct *prev,
struct task_struct *next, struct task_struct *next,
bool sleep) {} bool sleep) {}
static inline void psi_task_tick(struct rq *rq) {}
#endif /* CONFIG_PSI */ #endif /* CONFIG_PSI */
#ifdef CONFIG_SCHED_INFO #ifdef CONFIG_SCHED_INFO
......
...@@ -14,15 +14,15 @@ static cpumask_var_t sched_domains_tmpmask2; ...@@ -14,15 +14,15 @@ static cpumask_var_t sched_domains_tmpmask2;
static int __init sched_debug_setup(char *str) static int __init sched_debug_setup(char *str)
{ {
sched_debug_enabled = true; sched_debug_verbose = true;
return 0; return 0;
} }
early_param("sched_debug", sched_debug_setup); early_param("sched_verbose", sched_debug_setup);
static inline bool sched_debug(void) static inline bool sched_debug(void)
{ {
return sched_debug_enabled; return sched_debug_verbose;
} }
#define SD_FLAG(_name, mflags) [__##_name] = { .meta_flags = mflags, .name = #_name }, #define SD_FLAG(_name, mflags) [__##_name] = { .meta_flags = mflags, .name = #_name },
...@@ -131,7 +131,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) ...@@ -131,7 +131,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
{ {
int level = 0; int level = 0;
if (!sched_debug_enabled) if (!sched_debug_verbose)
return; return;
if (!sd) { if (!sd) {
...@@ -152,7 +152,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) ...@@ -152,7 +152,7 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu)
} }
#else /* !CONFIG_SCHED_DEBUG */ #else /* !CONFIG_SCHED_DEBUG */
# define sched_debug_enabled 0 # define sched_debug_verbose 0
# define sched_domain_debug(sd, cpu) do { } while (0) # define sched_domain_debug(sd, cpu) do { } while (0)
static inline bool sched_debug(void) static inline bool sched_debug(void)
{ {
...@@ -723,35 +723,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) ...@@ -723,35 +723,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
for (tmp = sd; tmp; tmp = tmp->parent) for (tmp = sd; tmp; tmp = tmp->parent)
numa_distance += !!(tmp->flags & SD_NUMA); numa_distance += !!(tmp->flags & SD_NUMA);
/*
* FIXME: Diameter >=3 is misrepresented.
*
* Smallest diameter=3 topology is:
*
* node 0 1 2 3
* 0: 10 20 30 40
* 1: 20 10 20 30
* 2: 30 20 10 20
* 3: 40 30 20 10
*
* 0 --- 1 --- 2 --- 3
*
* NUMA-3 0-3 N/A N/A 0-3
* groups: {0-2},{1-3} {1-3},{0-2}
*
* NUMA-2 0-2 0-3 0-3 1-3
* groups: {0-1},{1-3} {0-2},{2-3} {1-3},{0-1} {2-3},{0-2}
*
* NUMA-1 0-1 0-2 1-3 2-3
* groups: {0},{1} {1},{2},{0} {2},{3},{1} {3},{2}
*
* NUMA-0 0 1 2 3
*
* The NUMA-2 groups for nodes 0 and 3 are obviously buggered, as the
* group span isn't a subset of the domain span.
*/
WARN_ONCE(numa_distance > 2, "Shortest NUMA path spans too many nodes\n");
sched_domain_debug(sd, cpu); sched_domain_debug(sd, cpu);
rq_attach_root(rq, rd); rq_attach_root(rq, rd);
...@@ -963,7 +934,7 @@ static void init_overlap_sched_group(struct sched_domain *sd, ...@@ -963,7 +934,7 @@ static void init_overlap_sched_group(struct sched_domain *sd,
int cpu; int cpu;
build_balance_mask(sd, sg, mask); build_balance_mask(sd, sg, mask);
cpu = cpumask_first_and(sched_group_span(sg), mask); cpu = cpumask_first(mask);
sg->sgc = *per_cpu_ptr(sdd->sgc, cpu); sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
if (atomic_inc_return(&sg->sgc->ref) == 1) if (atomic_inc_return(&sg->sgc->ref) == 1)
...@@ -982,6 +953,31 @@ static void init_overlap_sched_group(struct sched_domain *sd, ...@@ -982,6 +953,31 @@ static void init_overlap_sched_group(struct sched_domain *sd,
sg->sgc->max_capacity = SCHED_CAPACITY_SCALE; sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
} }
static struct sched_domain *
find_descended_sibling(struct sched_domain *sd, struct sched_domain *sibling)
{
/*
* The proper descendant would be the one whose child won't span out
* of sd
*/
while (sibling->child &&
!cpumask_subset(sched_domain_span(sibling->child),
sched_domain_span(sd)))
sibling = sibling->child;
/*
* As we are referencing sgc across different topology level, we need
* to go down to skip those sched_domains which don't contribute to
* scheduling because they will be degenerated in cpu_attach_domain
*/
while (sibling->child &&
cpumask_equal(sched_domain_span(sibling->child),
sched_domain_span(sibling)))
sibling = sibling->child;
return sibling;
}
static int static int
build_overlap_sched_groups(struct sched_domain *sd, int cpu) build_overlap_sched_groups(struct sched_domain *sd, int cpu)
{ {
...@@ -1015,6 +1011,41 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) ...@@ -1015,6 +1011,41 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
if (!cpumask_test_cpu(i, sched_domain_span(sibling))) if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
continue; continue;
/*
* Usually we build sched_group by sibling's child sched_domain
* But for machines whose NUMA diameter are 3 or above, we move
* to build sched_group by sibling's proper descendant's child
* domain because sibling's child sched_domain will span out of
* the sched_domain being built as below.
*
* Smallest diameter=3 topology is:
*
* node 0 1 2 3
* 0: 10 20 30 40
* 1: 20 10 20 30
* 2: 30 20 10 20
* 3: 40 30 20 10
*
* 0 --- 1 --- 2 --- 3
*
* NUMA-3 0-3 N/A N/A 0-3
* groups: {0-2},{1-3} {1-3},{0-2}
*
* NUMA-2 0-2 0-3 0-3 1-3
* groups: {0-1},{1-3} {0-2},{2-3} {1-3},{0-1} {2-3},{0-2}
*
* NUMA-1 0-1 0-2 1-3 2-3
* groups: {0},{1} {1},{2},{0} {2},{3},{1} {3},{2}
*
* NUMA-0 0 1 2 3
*
* The NUMA-2 groups for nodes 0 and 3 are obviously buggered, as the
* group span isn't a subset of the domain span.
*/
if (sibling->child &&
!cpumask_subset(sched_domain_span(sibling->child), span))
sibling = find_descended_sibling(sd, sibling);
sg = build_group_from_child_sched_domain(sibling, cpu); sg = build_group_from_child_sched_domain(sibling, cpu);
if (!sg) if (!sg)
goto fail; goto fail;
...@@ -1022,7 +1053,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) ...@@ -1022,7 +1053,7 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
sg_span = sched_group_span(sg); sg_span = sched_group_span(sg);
cpumask_or(covered, covered, sg_span); cpumask_or(covered, covered, sg_span);
init_overlap_sched_group(sd, sg); init_overlap_sched_group(sibling, sg);
if (!first) if (!first)
first = sg; first = sg;
...@@ -2110,7 +2141,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att ...@@ -2110,7 +2141,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
if (has_asym) if (has_asym)
static_branch_inc_cpuslocked(&sched_asym_cpucapacity); static_branch_inc_cpuslocked(&sched_asym_cpucapacity);
if (rq && sched_debug_enabled) { if (rq && sched_debug_verbose) {
pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n", pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity); cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
} }
...@@ -2128,7 +2159,7 @@ static cpumask_var_t *doms_cur; ...@@ -2128,7 +2159,7 @@ static cpumask_var_t *doms_cur;
/* Number of sched domains in 'doms_cur': */ /* Number of sched domains in 'doms_cur': */
static int ndoms_cur; static int ndoms_cur;
/* Attribues of custom domains in 'doms_cur' */ /* Attributes of custom domains in 'doms_cur' */
static struct sched_domain_attr *dattr_cur; static struct sched_domain_attr *dattr_cur;
/* /*
...@@ -2192,7 +2223,6 @@ int sched_init_domains(const struct cpumask *cpu_map) ...@@ -2192,7 +2223,6 @@ int sched_init_domains(const struct cpumask *cpu_map)
doms_cur = &fallback_doms; doms_cur = &fallback_doms;
cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN)); cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN));
err = build_sched_domains(doms_cur[0], NULL); err = build_sched_domains(doms_cur[0], NULL);
register_sched_domain_sysctl();
return err; return err;
} }
...@@ -2267,9 +2297,6 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[], ...@@ -2267,9 +2297,6 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
lockdep_assert_held(&sched_domains_mutex); lockdep_assert_held(&sched_domains_mutex);
/* Always unregister in case we don't destroy any domains: */
unregister_sched_domain_sysctl();
/* Let the architecture update CPU core mappings: */ /* Let the architecture update CPU core mappings: */
new_topology = arch_update_cpu_topology(); new_topology = arch_update_cpu_topology();
...@@ -2358,7 +2385,7 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[], ...@@ -2358,7 +2385,7 @@ void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
dattr_cur = dattr_new; dattr_cur = dattr_new;
ndoms_cur = ndoms_new; ndoms_cur = ndoms_new;
register_sched_domain_sysctl(); update_sched_domain_debugfs();
} }
/* /*
......
...@@ -408,7 +408,8 @@ void task_join_group_stop(struct task_struct *task) ...@@ -408,7 +408,8 @@ void task_join_group_stop(struct task_struct *task)
* appropriate lock must be held to stop the target task from exiting * appropriate lock must be held to stop the target task from exiting
*/ */
static struct sigqueue * static struct sigqueue *
__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) __sigqueue_alloc(int sig, struct task_struct *t, gfp_t gfp_flags,
int override_rlimit, const unsigned int sigqueue_flags)
{ {
struct sigqueue *q = NULL; struct sigqueue *q = NULL;
struct user_struct *user; struct user_struct *user;
...@@ -430,7 +431,16 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi ...@@ -430,7 +431,16 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
rcu_read_unlock(); rcu_read_unlock();
if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
q = kmem_cache_alloc(sigqueue_cachep, flags); /*
* Preallocation does not hold sighand::siglock so it can't
* use the cache. The lockless caching requires that only
* one consumer and only one producer run at a time.
*/
q = READ_ONCE(t->sigqueue_cache);
if (!q || sigqueue_flags)
q = kmem_cache_alloc(sigqueue_cachep, gfp_flags);
else
WRITE_ONCE(t->sigqueue_cache, NULL);
} else { } else {
print_dropped_signal(sig); print_dropped_signal(sig);
} }
...@@ -440,20 +450,51 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi ...@@ -440,20 +450,51 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi
free_uid(user); free_uid(user);
} else { } else {
INIT_LIST_HEAD(&q->list); INIT_LIST_HEAD(&q->list);
q->flags = 0; q->flags = sigqueue_flags;
q->user = user; q->user = user;
} }
return q; return q;
} }
void exit_task_sigqueue_cache(struct task_struct *tsk)
{
/* Race free because @tsk is mopped up */
struct sigqueue *q = tsk->sigqueue_cache;
if (q) {
tsk->sigqueue_cache = NULL;
/*
* Hand it back to the cache as the task might
* be self reaping which would leak the object.
*/
kmem_cache_free(sigqueue_cachep, q);
}
}
static void sigqueue_cache_or_free(struct sigqueue *q)
{
/*
* Cache one sigqueue per task. This pairs with the consumer side
* in __sigqueue_alloc() and needs READ/WRITE_ONCE() to prevent the
* compiler from store tearing and to tell KCSAN that the data race
* is intentional when run without holding current->sighand->siglock,
* which is fine as current obviously cannot run __sigqueue_free()
* concurrently.
*/
if (!READ_ONCE(current->sigqueue_cache))
WRITE_ONCE(current->sigqueue_cache, q);
else
kmem_cache_free(sigqueue_cachep, q);
}
static void __sigqueue_free(struct sigqueue *q) static void __sigqueue_free(struct sigqueue *q)
{ {
if (q->flags & SIGQUEUE_PREALLOC) if (q->flags & SIGQUEUE_PREALLOC)
return; return;
if (atomic_dec_and_test(&q->user->sigpending)) if (atomic_dec_and_test(&q->user->sigpending))
free_uid(q->user); free_uid(q->user);
kmem_cache_free(sigqueue_cachep, q); sigqueue_cache_or_free(q);
} }
void flush_sigqueue(struct sigpending *queue) void flush_sigqueue(struct sigpending *queue)
...@@ -1111,7 +1152,8 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc ...@@ -1111,7 +1152,8 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc
else else
override_rlimit = 0; override_rlimit = 0;
q = __sigqueue_alloc(sig, t, GFP_ATOMIC, override_rlimit); q = __sigqueue_alloc(sig, t, GFP_ATOMIC, override_rlimit, 0);
if (q) { if (q) {
list_add_tail(&q->list, &pending->list); list_add_tail(&q->list, &pending->list);
switch ((unsigned long) info) { switch ((unsigned long) info) {
...@@ -1806,12 +1848,7 @@ EXPORT_SYMBOL(kill_pid); ...@@ -1806,12 +1848,7 @@ EXPORT_SYMBOL(kill_pid);
*/ */
struct sigqueue *sigqueue_alloc(void) struct sigqueue *sigqueue_alloc(void)
{ {
struct sigqueue *q = __sigqueue_alloc(-1, current, GFP_KERNEL, 0); return __sigqueue_alloc(-1, current, GFP_KERNEL, 0, SIGQUEUE_PREALLOC);
if (q)
q->flags |= SIGQUEUE_PREALLOC;
return q;
} }
void sigqueue_free(struct sigqueue *q) void sigqueue_free(struct sigqueue *q)
......
...@@ -409,6 +409,7 @@ static bool queue_stop_cpus_work(const struct cpumask *cpumask, ...@@ -409,6 +409,7 @@ static bool queue_stop_cpus_work(const struct cpumask *cpumask,
work->fn = fn; work->fn = fn;
work->arg = arg; work->arg = arg;
work->done = done; work->done = done;
work->caller = _RET_IP_;
if (cpu_stop_queue_work(cpu, work)) if (cpu_stop_queue_work(cpu, work))
queued = true; queued = true;
} }
......
...@@ -184,17 +184,6 @@ static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT; ...@@ -184,17 +184,6 @@ static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
int sysctl_legacy_va_layout; int sysctl_legacy_va_layout;
#endif #endif
#ifdef CONFIG_SCHED_DEBUG
static int min_sched_granularity_ns = 100000; /* 100 usecs */
static int max_sched_granularity_ns = NSEC_PER_SEC; /* 1 second */
static int min_wakeup_granularity_ns; /* 0 usecs */
static int max_wakeup_granularity_ns = NSEC_PER_SEC; /* 1 second */
#ifdef CONFIG_SMP
static int min_sched_tunable_scaling = SCHED_TUNABLESCALING_NONE;
static int max_sched_tunable_scaling = SCHED_TUNABLESCALING_END-1;
#endif /* CONFIG_SMP */
#endif /* CONFIG_SCHED_DEBUG */
#ifdef CONFIG_COMPACTION #ifdef CONFIG_COMPACTION
static int min_extfrag_threshold; static int min_extfrag_threshold;
static int max_extfrag_threshold = 1000; static int max_extfrag_threshold = 1000;
...@@ -1659,58 +1648,6 @@ static struct ctl_table kern_table[] = { ...@@ -1659,58 +1648,6 @@ static struct ctl_table kern_table[] = {
.mode = 0644, .mode = 0644,
.proc_handler = proc_dointvec, .proc_handler = proc_dointvec,
}, },
#ifdef CONFIG_SCHED_DEBUG
{
.procname = "sched_min_granularity_ns",
.data = &sysctl_sched_min_granularity,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_proc_update_handler,
.extra1 = &min_sched_granularity_ns,
.extra2 = &max_sched_granularity_ns,
},
{
.procname = "sched_latency_ns",
.data = &sysctl_sched_latency,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_proc_update_handler,
.extra1 = &min_sched_granularity_ns,
.extra2 = &max_sched_granularity_ns,
},
{
.procname = "sched_wakeup_granularity_ns",
.data = &sysctl_sched_wakeup_granularity,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = sched_proc_update_handler,
.extra1 = &min_wakeup_granularity_ns,
.extra2 = &max_wakeup_granularity_ns,
},
#ifdef CONFIG_SMP
{
.procname = "sched_tunable_scaling",
.data = &sysctl_sched_tunable_scaling,
.maxlen = sizeof(enum sched_tunable_scaling),
.mode = 0644,
.proc_handler = sched_proc_update_handler,
.extra1 = &min_sched_tunable_scaling,
.extra2 = &max_sched_tunable_scaling,
},
{
.procname = "sched_migration_cost_ns",
.data = &sysctl_sched_migration_cost,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "sched_nr_migrate",
.data = &sysctl_sched_nr_migrate,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
#ifdef CONFIG_SCHEDSTATS #ifdef CONFIG_SCHEDSTATS
{ {
.procname = "sched_schedstats", .procname = "sched_schedstats",
...@@ -1722,37 +1659,7 @@ static struct ctl_table kern_table[] = { ...@@ -1722,37 +1659,7 @@ static struct ctl_table kern_table[] = {
.extra2 = SYSCTL_ONE, .extra2 = SYSCTL_ONE,
}, },
#endif /* CONFIG_SCHEDSTATS */ #endif /* CONFIG_SCHEDSTATS */
#endif /* CONFIG_SMP */
#ifdef CONFIG_NUMA_BALANCING #ifdef CONFIG_NUMA_BALANCING
{
.procname = "numa_balancing_scan_delay_ms",
.data = &sysctl_numa_balancing_scan_delay,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "numa_balancing_scan_period_min_ms",
.data = &sysctl_numa_balancing_scan_period_min,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "numa_balancing_scan_period_max_ms",
.data = &sysctl_numa_balancing_scan_period_max,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "numa_balancing_scan_size_mb",
.data = &sysctl_numa_balancing_scan_size,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ONE,
},
{ {
.procname = "numa_balancing", .procname = "numa_balancing",
.data = NULL, /* filled in by handler */ .data = NULL, /* filled in by handler */
...@@ -1763,7 +1670,6 @@ static struct ctl_table kern_table[] = { ...@@ -1763,7 +1670,6 @@ static struct ctl_table kern_table[] = {
.extra2 = SYSCTL_ONE, .extra2 = SYSCTL_ONE,
}, },
#endif /* CONFIG_NUMA_BALANCING */ #endif /* CONFIG_NUMA_BALANCING */
#endif /* CONFIG_SCHED_DEBUG */
{ {
.procname = "sched_rt_period_us", .procname = "sched_rt_period_us",
.data = &sysctl_sched_rt_period, .data = &sysctl_sched_rt_period,
......
...@@ -1710,7 +1710,6 @@ config LATENCYTOP ...@@ -1710,7 +1710,6 @@ config LATENCYTOP
select KALLSYMS_ALL select KALLSYMS_ALL
select STACKTRACE select STACKTRACE
select SCHEDSTATS select SCHEDSTATS
select SCHED_DEBUG
help help
Enable this option if you want to use the LatencyTOP tool Enable this option if you want to use the LatencyTOP tool
to find out which userspace is blocking on what kernel operations. to find out which userspace is blocking on what kernel operations.
......
...@@ -60,6 +60,7 @@ ...@@ -60,6 +60,7 @@
#include <linux/prefetch.h> #include <linux/prefetch.h>
#include <linux/if_vlan.h> #include <linux/if_vlan.h>
#include <linux/mpls.h> #include <linux/mpls.h>
#include <linux/kcov.h>
#include <net/protocol.h> #include <net/protocol.h>
#include <net/dst.h> #include <net/dst.h>
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/if_arp.h> #include <linux/if_arp.h>
#include <linux/netdevice.h> #include <linux/netdevice.h>
#include <linux/rtnetlink.h> #include <linux/rtnetlink.h>
#include <linux/kcov.h>
#include <net/mac80211.h> #include <net/mac80211.h>
#include <net/ieee80211_radiotap.h> #include <net/ieee80211_radiotap.h>
#include "ieee80211_i.h" #include "ieee80211_i.h"
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include <linux/etherdevice.h> #include <linux/etherdevice.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <linux/export.h> #include <linux/export.h>
#include <linux/kcov.h>
#include <linux/bitops.h> #include <linux/bitops.h>
#include <net/mac80211.h> #include <net/mac80211.h>
#include <net/ieee80211_radiotap.h> #include <net/ieee80211_radiotap.h>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment