Commit 5ac99857 authored by Tejun Heo's avatar Tejun Heo

Merge branch 'tip/sched/core' into for-6.12

To receive 863ccdbb ("sched: Allow sched_class::dequeue_task() to fail")
which makes sched_class.dequeue_task() return bool instead of void. This
leads to compile breakage and will be fixed by a follow-up patch.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
parents 89909296 aef6987d
...@@ -335,7 +335,7 @@ static inline bool six_owner_running(struct six_lock *lock) ...@@ -335,7 +335,7 @@ static inline bool six_owner_running(struct six_lock *lock)
*/ */
rcu_read_lock(); rcu_read_lock();
struct task_struct *owner = READ_ONCE(lock->owner); struct task_struct *owner = READ_ONCE(lock->owner);
bool ret = owner ? owner_on_cpu(owner) : !rt_task(current); bool ret = owner ? owner_on_cpu(owner) : !rt_or_dl_task(current);
rcu_read_unlock(); rcu_read_unlock();
return ret; return ret;
......
...@@ -82,7 +82,7 @@ u64 select_estimate_accuracy(struct timespec64 *tv) ...@@ -82,7 +82,7 @@ u64 select_estimate_accuracy(struct timespec64 *tv)
* Realtime tasks get a slack of 0 for obvious reasons. * Realtime tasks get a slack of 0 for obvious reasons.
*/ */
if (rt_task(current)) if (rt_or_dl_task(current))
return 0; return 0;
ktime_get_ts64(&now); ktime_get_ts64(&now);
......
...@@ -40,7 +40,7 @@ static inline int task_nice_ioclass(struct task_struct *task) ...@@ -40,7 +40,7 @@ static inline int task_nice_ioclass(struct task_struct *task)
{ {
if (task->policy == SCHED_IDLE) if (task->policy == SCHED_IDLE)
return IOPRIO_CLASS_IDLE; return IOPRIO_CLASS_IDLE;
else if (task_is_realtime(task)) else if (rt_or_dl_task_policy(task))
return IOPRIO_CLASS_RT; return IOPRIO_CLASS_RT;
else else
return IOPRIO_CLASS_BE; return IOPRIO_CLASS_BE;
......
...@@ -152,7 +152,8 @@ struct user_event_mm; ...@@ -152,7 +152,8 @@ struct user_event_mm;
* the comment with set_special_state(). * the comment with set_special_state().
*/ */
#define is_special_task_state(state) \ #define is_special_task_state(state) \
((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | TASK_DEAD)) ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | \
TASK_DEAD | TASK_FROZEN))
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
# define debug_normal_state_change(state_value) \ # define debug_normal_state_change(state_value) \
...@@ -543,9 +544,14 @@ struct sched_entity { ...@@ -543,9 +544,14 @@ struct sched_entity {
struct rb_node run_node; struct rb_node run_node;
u64 deadline; u64 deadline;
u64 min_vruntime; u64 min_vruntime;
u64 min_slice;
struct list_head group_node; struct list_head group_node;
unsigned int on_rq; unsigned char on_rq;
unsigned char sched_delayed;
unsigned char rel_deadline;
unsigned char custom_slice;
/* hole */
u64 exec_start; u64 exec_start;
u64 sum_exec_runtime; u64 sum_exec_runtime;
......
...@@ -10,16 +10,16 @@ ...@@ -10,16 +10,16 @@
#include <linux/sched.h> #include <linux/sched.h>
#define MAX_DL_PRIO 0 static inline bool dl_prio(int prio)
static inline int dl_prio(int prio)
{ {
if (unlikely(prio < MAX_DL_PRIO)) return unlikely(prio < MAX_DL_PRIO);
return 1;
return 0;
} }
static inline int dl_task(struct task_struct *p) /*
* Returns true if a task has a priority that belongs to DL class. PI-boosted
* tasks will return true. Use dl_policy() to ignore PI-boosted tasks.
*/
static inline bool dl_task(struct task_struct *p)
{ {
return dl_prio(p->prio); return dl_prio(p->prio);
} }
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
*/ */
#define MAX_RT_PRIO 100 #define MAX_RT_PRIO 100
#define MAX_DL_PRIO 0
#define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) #define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH)
#define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) #define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2)
......
...@@ -6,19 +6,40 @@ ...@@ -6,19 +6,40 @@
struct task_struct; struct task_struct;
static inline int rt_prio(int prio) static inline bool rt_prio(int prio)
{ {
if (unlikely(prio < MAX_RT_PRIO)) return unlikely(prio < MAX_RT_PRIO && prio >= MAX_DL_PRIO);
return 1;
return 0;
} }
static inline int rt_task(struct task_struct *p) static inline bool rt_or_dl_prio(int prio)
{
return unlikely(prio < MAX_RT_PRIO);
}
/*
* Returns true if a task has a priority that belongs to RT class. PI-boosted
* tasks will return true. Use rt_policy() to ignore PI-boosted tasks.
*/
static inline bool rt_task(struct task_struct *p)
{ {
return rt_prio(p->prio); return rt_prio(p->prio);
} }
static inline bool task_is_realtime(struct task_struct *tsk) /*
* Returns true if a task has a priority that belongs to RT or DL classes.
* PI-boosted tasks will return true. Use rt_or_dl_task_policy() to ignore
* PI-boosted tasks.
*/
static inline bool rt_or_dl_task(struct task_struct *p)
{
return rt_or_dl_prio(p->prio);
}
/*
* Returns true if a task has a policy that belongs to RT or DL classes.
* PI-boosted tasks will return false.
*/
static inline bool rt_or_dl_task_policy(struct task_struct *tsk)
{ {
int policy = tsk->policy; int policy = tsk->policy;
......
...@@ -72,7 +72,7 @@ bool __refrigerator(bool check_kthr_stop) ...@@ -72,7 +72,7 @@ bool __refrigerator(bool check_kthr_stop)
bool freeze; bool freeze;
raw_spin_lock_irq(&current->pi_lock); raw_spin_lock_irq(&current->pi_lock);
set_current_state(TASK_FROZEN); WRITE_ONCE(current->__state, TASK_FROZEN);
/* unstale saved_state so that __thaw_task() will wake us up */ /* unstale saved_state so that __thaw_task() will wake us up */
current->saved_state = TASK_RUNNING; current->saved_state = TASK_RUNNING;
raw_spin_unlock_irq(&current->pi_lock); raw_spin_unlock_irq(&current->pi_lock);
......
...@@ -347,7 +347,7 @@ static __always_inline int __waiter_prio(struct task_struct *task) ...@@ -347,7 +347,7 @@ static __always_inline int __waiter_prio(struct task_struct *task)
{ {
int prio = task->prio; int prio = task->prio;
if (!rt_prio(prio)) if (!rt_or_dl_prio(prio))
return DEFAULT_PRIO; return DEFAULT_PRIO;
return prio; return prio;
...@@ -435,7 +435,7 @@ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, ...@@ -435,7 +435,7 @@ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
* Note that RT tasks are excluded from same priority (lateral) * Note that RT tasks are excluded from same priority (lateral)
* steals to prevent the introduction of an unbounded latency. * steals to prevent the introduction of an unbounded latency.
*/ */
if (rt_prio(waiter->tree.prio) || dl_prio(waiter->tree.prio)) if (rt_or_dl_prio(waiter->tree.prio))
return false; return false;
return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree); return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree);
......
...@@ -631,7 +631,7 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, ...@@ -631,7 +631,7 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
* if it is an RT task or wait in the wait queue * if it is an RT task or wait in the wait queue
* for too long. * for too long.
*/ */
if (has_handoff || (!rt_task(waiter->task) && if (has_handoff || (!rt_or_dl_task(waiter->task) &&
!time_after(jiffies, waiter->timeout))) !time_after(jiffies, waiter->timeout)))
return false; return false;
...@@ -914,7 +914,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) ...@@ -914,7 +914,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
if (owner_state != OWNER_WRITER) { if (owner_state != OWNER_WRITER) {
if (need_resched()) if (need_resched())
break; break;
if (rt_task(current) && if (rt_or_dl_task(current) &&
(prev_owner_state != OWNER_WRITER)) (prev_owner_state != OWNER_WRITER))
break; break;
} }
......
...@@ -237,7 +237,7 @@ __ww_ctx_less(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) ...@@ -237,7 +237,7 @@ __ww_ctx_less(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b)
int a_prio = a->task->prio; int a_prio = a->task->prio;
int b_prio = b->task->prio; int b_prio = b->task->prio;
if (rt_prio(a_prio) || rt_prio(b_prio)) { if (rt_or_dl_prio(a_prio) || rt_or_dl_prio(b_prio)) {
if (a_prio > b_prio) if (a_prio > b_prio)
return true; return true;
......
...@@ -166,7 +166,7 @@ static inline int __task_prio(const struct task_struct *p) ...@@ -166,7 +166,7 @@ static inline int __task_prio(const struct task_struct *p)
if (p->dl_server) if (p->dl_server)
return -1; /* deadline */ return -1; /* deadline */
if (rt_prio(p->prio)) /* includes deadline */ if (rt_or_dl_prio(p->prio))
return p->prio; /* [-1, 99] */ return p->prio; /* [-1, 99] */
if (p->sched_class == &idle_sched_class) if (p->sched_class == &idle_sched_class)
...@@ -1702,6 +1702,9 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) ...@@ -1702,6 +1702,9 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
if (unlikely(!p->sched_class->uclamp_enabled)) if (unlikely(!p->sched_class->uclamp_enabled))
return; return;
if (p->se.sched_delayed)
return;
for_each_clamp_id(clamp_id) for_each_clamp_id(clamp_id)
uclamp_rq_inc_id(rq, p, clamp_id); uclamp_rq_inc_id(rq, p, clamp_id);
...@@ -1726,6 +1729,9 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) ...@@ -1726,6 +1729,9 @@ static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p)
if (unlikely(!p->sched_class->uclamp_enabled)) if (unlikely(!p->sched_class->uclamp_enabled))
return; return;
if (p->se.sched_delayed)
return;
for_each_clamp_id(clamp_id) for_each_clamp_id(clamp_id)
uclamp_rq_dec_id(rq, p, clamp_id); uclamp_rq_dec_id(rq, p, clamp_id);
} }
...@@ -2005,14 +2011,21 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags) ...@@ -2005,14 +2011,21 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
psi_enqueue(p, (flags & ENQUEUE_WAKEUP) && !(flags & ENQUEUE_MIGRATED)); psi_enqueue(p, (flags & ENQUEUE_WAKEUP) && !(flags & ENQUEUE_MIGRATED));
} }
uclamp_rq_inc(rq, p);
p->sched_class->enqueue_task(rq, p, flags); p->sched_class->enqueue_task(rq, p, flags);
/*
* Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
* ->sched_delayed.
*/
uclamp_rq_inc(rq, p);
if (sched_core_enabled(rq)) if (sched_core_enabled(rq))
sched_core_enqueue(rq, p); sched_core_enqueue(rq, p);
} }
void dequeue_task(struct rq *rq, struct task_struct *p, int flags) /*
* Must only return false when DEQUEUE_SLEEP.
*/
inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
{ {
if (sched_core_enabled(rq)) if (sched_core_enabled(rq))
sched_core_dequeue(rq, p, flags); sched_core_dequeue(rq, p, flags);
...@@ -2025,8 +2038,12 @@ void dequeue_task(struct rq *rq, struct task_struct *p, int flags) ...@@ -2025,8 +2038,12 @@ void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
psi_dequeue(p, flags & DEQUEUE_SLEEP); psi_dequeue(p, flags & DEQUEUE_SLEEP);
} }
/*
* Must be before ->dequeue_task() because ->dequeue_task() can 'fail'
* and mark the task ->sched_delayed.
*/
uclamp_rq_dec(rq, p); uclamp_rq_dec(rq, p);
p->sched_class->dequeue_task(rq, p, flags); return p->sched_class->dequeue_task(rq, p, flags);
} }
void activate_task(struct rq *rq, struct task_struct *p, int flags) void activate_task(struct rq *rq, struct task_struct *p, int flags)
...@@ -2044,12 +2061,25 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags) ...@@ -2044,12 +2061,25 @@ void activate_task(struct rq *rq, struct task_struct *p, int flags)
void deactivate_task(struct rq *rq, struct task_struct *p, int flags) void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
{ {
WRITE_ONCE(p->on_rq, (flags & DEQUEUE_SLEEP) ? 0 : TASK_ON_RQ_MIGRATING); SCHED_WARN_ON(flags & DEQUEUE_SLEEP);
WRITE_ONCE(p->on_rq, TASK_ON_RQ_MIGRATING);
ASSERT_EXCLUSIVE_WRITER(p->on_rq); ASSERT_EXCLUSIVE_WRITER(p->on_rq);
/*
* Code explicitly relies on TASK_ON_RQ_MIGRATING begin set *before*
* dequeue_task() and cleared *after* enqueue_task().
*/
dequeue_task(rq, p, flags); dequeue_task(rq, p, flags);
} }
static void block_task(struct rq *rq, struct task_struct *p, int flags)
{
if (dequeue_task(rq, p, DEQUEUE_SLEEP | flags))
__block_task(rq, p);
}
/** /**
* task_curr - is this task currently executing on a CPU? * task_curr - is this task currently executing on a CPU?
* @p: the task in question. * @p: the task in question.
...@@ -3697,12 +3727,14 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags) ...@@ -3697,12 +3727,14 @@ static int ttwu_runnable(struct task_struct *p, int wake_flags)
rq = __task_rq_lock(p, &rf); rq = __task_rq_lock(p, &rf);
if (task_on_rq_queued(p)) { if (task_on_rq_queued(p)) {
update_rq_clock(rq);
if (p->se.sched_delayed)
enqueue_task(rq, p, ENQUEUE_NOCLOCK | ENQUEUE_DELAYED);
if (!task_on_cpu(rq, p)) { if (!task_on_cpu(rq, p)) {
/* /*
* When on_rq && !on_cpu the task is preempted, see if * When on_rq && !on_cpu the task is preempted, see if
* it should preempt the task that is current now. * it should preempt the task that is current now.
*/ */
update_rq_clock(rq);
wakeup_preempt(rq, p, wake_flags); wakeup_preempt(rq, p, wake_flags);
} }
ttwu_do_wakeup(p); ttwu_do_wakeup(p);
...@@ -4091,11 +4123,16 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) ...@@ -4091,11 +4123,16 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
* case the whole 'p->on_rq && ttwu_runnable()' case below * case the whole 'p->on_rq && ttwu_runnable()' case below
* without taking any locks. * without taking any locks.
* *
* Specifically, given current runs ttwu() we must be before
* schedule()'s block_task(), as such this must not observe
* sched_delayed.
*
* In particular: * In particular:
* - we rely on Program-Order guarantees for all the ordering, * - we rely on Program-Order guarantees for all the ordering,
* - we're serialized against set_special_state() by virtue of * - we're serialized against set_special_state() by virtue of
* it disabling IRQs (this allows not taking ->pi_lock). * it disabling IRQs (this allows not taking ->pi_lock).
*/ */
SCHED_WARN_ON(p->se.sched_delayed);
if (!ttwu_state_match(p, state, &success)) if (!ttwu_state_match(p, state, &success))
goto out; goto out;
...@@ -4384,9 +4421,11 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) ...@@ -4384,9 +4421,11 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.nr_migrations = 0; p->se.nr_migrations = 0;
p->se.vruntime = 0; p->se.vruntime = 0;
p->se.vlag = 0; p->se.vlag = 0;
p->se.slice = sysctl_sched_base_slice;
INIT_LIST_HEAD(&p->se.group_node); INIT_LIST_HEAD(&p->se.group_node);
/* A delayed task cannot be in clone(). */
SCHED_WARN_ON(p->se.sched_delayed);
#ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_FAIR_GROUP_SCHED
p->se.cfs_rq = NULL; p->se.cfs_rq = NULL;
#endif #endif
...@@ -4638,6 +4677,8 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) ...@@ -4638,6 +4677,8 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
p->prio = p->normal_prio = p->static_prio; p->prio = p->normal_prio = p->static_prio;
set_load_weight(p, false); set_load_weight(p, false);
p->se.custom_slice = 0;
p->se.slice = sysctl_sched_base_slice;
/* /*
* We don't need the reset flag anymore after the fork. It has * We don't need the reset flag anymore after the fork. It has
...@@ -6562,13 +6603,15 @@ static void __sched notrace __schedule(unsigned int sched_mode) ...@@ -6562,13 +6603,15 @@ static void __sched notrace __schedule(unsigned int sched_mode)
if (signal_pending_state(prev_state, prev)) { if (signal_pending_state(prev_state, prev)) {
WRITE_ONCE(prev->__state, TASK_RUNNING); WRITE_ONCE(prev->__state, TASK_RUNNING);
} else { } else {
int flags = DEQUEUE_NOCLOCK;
prev->sched_contributes_to_load = prev->sched_contributes_to_load =
(prev_state & TASK_UNINTERRUPTIBLE) && (prev_state & TASK_UNINTERRUPTIBLE) &&
!(prev_state & TASK_NOLOAD) && !(prev_state & TASK_NOLOAD) &&
!(prev_state & TASK_FROZEN); !(prev_state & TASK_FROZEN);
if (prev->sched_contributes_to_load) if (unlikely(is_special_task_state(prev_state)))
rq->nr_uninterruptible++; flags |= DEQUEUE_SPECIAL;
/* /*
* __schedule() ttwu() * __schedule() ttwu()
...@@ -6581,12 +6624,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) ...@@ -6581,12 +6624,7 @@ static void __sched notrace __schedule(unsigned int sched_mode)
* *
* After this, schedule() must not care about p->state any more. * After this, schedule() must not care about p->state any more.
*/ */
deactivate_task(rq, prev, DEQUEUE_SLEEP | DEQUEUE_NOCLOCK); block_task(rq, prev, flags);
if (prev->in_iowait) {
atomic_inc(&rq->nr_iowait);
delayacct_blkio_start();
}
} }
switch_count = &prev->nvcsw; switch_count = &prev->nvcsw;
} }
...@@ -8461,6 +8499,7 @@ void __init sched_init(void) ...@@ -8461,6 +8499,7 @@ void __init sched_init(void)
} }
set_load_weight(&init_task, false); set_load_weight(&init_task, false);
init_task.se.slice = sysctl_sched_base_slice,
/* /*
* The boot idle thread does lazy MMU switching as well: * The boot idle thread does lazy MMU switching as well:
...@@ -8677,7 +8716,7 @@ void normalize_rt_tasks(void) ...@@ -8677,7 +8716,7 @@ void normalize_rt_tasks(void)
schedstat_set(p->stats.sleep_start, 0); schedstat_set(p->stats.sleep_start, 0);
schedstat_set(p->stats.block_start, 0); schedstat_set(p->stats.block_start, 0);
if (!dl_task(p) && !rt_task(p)) { if (!rt_or_dl_task(p)) {
/* /*
* Renice negative nice level userspace * Renice negative nice level userspace
* tasks back to 0: * tasks back to 0:
......
...@@ -2162,7 +2162,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) ...@@ -2162,7 +2162,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
enqueue_pushable_dl_task(rq, p); enqueue_pushable_dl_task(rq, p);
} }
static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) static bool dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
{ {
update_curr_dl(rq); update_curr_dl(rq);
...@@ -2172,6 +2172,8 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) ...@@ -2172,6 +2172,8 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
dequeue_dl_entity(&p->dl, flags); dequeue_dl_entity(&p->dl, flags);
if (!p->dl.dl_throttled && !dl_server(&p->dl)) if (!p->dl.dl_throttled && !dl_server(&p->dl))
dequeue_pushable_dl_task(rq, p); dequeue_pushable_dl_task(rq, p);
return true;
} }
/* /*
...@@ -2426,7 +2428,6 @@ static struct task_struct *__pick_next_task_dl(struct rq *rq, bool peek) ...@@ -2426,7 +2428,6 @@ static struct task_struct *__pick_next_task_dl(struct rq *rq, bool peek)
else else
p = dl_se->server_pick_next(dl_se); p = dl_se->server_pick_next(dl_se);
if (!p) { if (!p) {
WARN_ON_ONCE(1);
dl_se->dl_yielded = 1; dl_se->dl_yielded = 1;
update_curr_dl_se(rq, dl_se, 0); update_curr_dl_se(rq, dl_se, 0);
goto again; goto again;
......
...@@ -338,7 +338,7 @@ enum dl_param { ...@@ -338,7 +338,7 @@ enum dl_param {
DL_PERIOD, DL_PERIOD,
}; };
static unsigned long fair_server_period_max = (1 << 22) * NSEC_PER_USEC; /* ~4 seconds */ static unsigned long fair_server_period_max = (1UL << 22) * NSEC_PER_USEC; /* ~4 seconds */
static unsigned long fair_server_period_min = (100) * NSEC_PER_USEC; /* 100 us */ static unsigned long fair_server_period_min = (100) * NSEC_PER_USEC; /* 100 us */
static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubuf, static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubuf,
...@@ -739,11 +739,12 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) ...@@ -739,11 +739,12 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
else else
SEQ_printf(m, " %c", task_state_to_char(p)); SEQ_printf(m, " %c", task_state_to_char(p));
SEQ_printf(m, "%15s %5d %9Ld.%06ld %c %9Ld.%06ld %9Ld.%06ld %9Ld.%06ld %9Ld %5d ", SEQ_printf(m, "%15s %5d %9Ld.%06ld %c %9Ld.%06ld %c %9Ld.%06ld %9Ld.%06ld %9Ld %5d ",
p->comm, task_pid_nr(p), p->comm, task_pid_nr(p),
SPLIT_NS(p->se.vruntime), SPLIT_NS(p->se.vruntime),
entity_eligible(cfs_rq_of(&p->se), &p->se) ? 'E' : 'N', entity_eligible(cfs_rq_of(&p->se), &p->se) ? 'E' : 'N',
SPLIT_NS(p->se.deadline), SPLIT_NS(p->se.deadline),
p->se.custom_slice ? 'S' : ' ',
SPLIT_NS(p->se.slice), SPLIT_NS(p->se.slice),
SPLIT_NS(p->se.sum_exec_runtime), SPLIT_NS(p->se.sum_exec_runtime),
(long long)(p->nvcsw + p->nivcsw), (long long)(p->nvcsw + p->nivcsw),
......
This diff is collapsed.
...@@ -5,8 +5,24 @@ ...@@ -5,8 +5,24 @@
* sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled. * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
*/ */
SCHED_FEAT(PLACE_LAG, true) SCHED_FEAT(PLACE_LAG, true)
/*
* Give new tasks half a slice to ease into the competition.
*/
SCHED_FEAT(PLACE_DEADLINE_INITIAL, true) SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
/*
* Preserve relative virtual deadline on 'migration'.
*/
SCHED_FEAT(PLACE_REL_DEADLINE, true)
/*
* Inhibit (wakeup) preemption until the current task has either matched the
* 0-lag point or until is has exhausted it's slice.
*/
SCHED_FEAT(RUN_TO_PARITY, true) SCHED_FEAT(RUN_TO_PARITY, true)
/*
* Allow wakeup of tasks with a shorter slice to cancel RESPECT_SLICE for
* current.
*/
SCHED_FEAT(PREEMPT_SHORT, true)
/* /*
* Prefer to schedule the task we woke last (assuming it failed * Prefer to schedule the task we woke last (assuming it failed
...@@ -21,6 +37,18 @@ SCHED_FEAT(NEXT_BUDDY, false) ...@@ -21,6 +37,18 @@ SCHED_FEAT(NEXT_BUDDY, false)
*/ */
SCHED_FEAT(CACHE_HOT_BUDDY, true) SCHED_FEAT(CACHE_HOT_BUDDY, true)
/*
* Delay dequeueing tasks until they get selected or woken.
*
* By delaying the dequeue for non-eligible tasks, they remain in the
* competition and can burn off their negative lag. When they get selected
* they'll have positive lag by definition.
*
* DELAY_ZERO clips the lag on dequeue (or wakeup) to 0.
*/
SCHED_FEAT(DELAY_DEQUEUE, true)
SCHED_FEAT(DELAY_ZERO, true)
/* /*
* Allow wakeup-time preemption of the current task: * Allow wakeup-time preemption of the current task:
*/ */
......
...@@ -484,13 +484,14 @@ struct task_struct *pick_next_task_idle(struct rq *rq) ...@@ -484,13 +484,14 @@ struct task_struct *pick_next_task_idle(struct rq *rq)
* It is not legal to sleep in the idle task - print a warning * It is not legal to sleep in the idle task - print a warning
* message if some code attempts to do it: * message if some code attempts to do it:
*/ */
static void static bool
dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags) dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
{ {
raw_spin_rq_unlock_irq(rq); raw_spin_rq_unlock_irq(rq);
printk(KERN_ERR "bad: scheduling from the idle thread!\n"); printk(KERN_ERR "bad: scheduling from the idle thread!\n");
dump_stack(); dump_stack();
raw_spin_rq_lock_irq(rq); raw_spin_rq_lock_irq(rq);
return true;
} }
/* /*
......
...@@ -1483,7 +1483,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) ...@@ -1483,7 +1483,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
enqueue_pushable_task(rq, p); enqueue_pushable_task(rq, p);
} }
static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) static bool dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
{ {
struct sched_rt_entity *rt_se = &p->rt; struct sched_rt_entity *rt_se = &p->rt;
...@@ -1491,6 +1491,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) ...@@ -1491,6 +1491,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
dequeue_rt_entity(rt_se, flags); dequeue_rt_entity(rt_se, flags);
dequeue_pushable_task(rq, p); dequeue_pushable_task(rq, p);
return true;
} }
/* /*
......
...@@ -68,6 +68,7 @@ ...@@ -68,6 +68,7 @@
#include <linux/wait_api.h> #include <linux/wait_api.h>
#include <linux/wait_bit.h> #include <linux/wait_bit.h>
#include <linux/workqueue_api.h> #include <linux/workqueue_api.h>
#include <linux/delayacct.h>
#include <trace/events/power.h> #include <trace/events/power.h>
#include <trace/events/sched.h> #include <trace/events/sched.h>
...@@ -645,10 +646,6 @@ struct cfs_rq { ...@@ -645,10 +646,6 @@ struct cfs_rq {
u64 min_vruntime_fi; u64 min_vruntime_fi;
#endif #endif
#ifndef CONFIG_64BIT
u64 min_vruntime_copy;
#endif
struct rb_root_cached tasks_timeline; struct rb_root_cached tasks_timeline;
/* /*
...@@ -891,6 +888,9 @@ static inline void se_update_runnable(struct sched_entity *se) ...@@ -891,6 +888,9 @@ static inline void se_update_runnable(struct sched_entity *se)
static inline long se_runnable(struct sched_entity *se) static inline long se_runnable(struct sched_entity *se)
{ {
if (se->sched_delayed)
return false;
if (entity_is_task(se)) if (entity_is_task(se))
return !!se->on_rq; return !!se->on_rq;
else else
...@@ -905,6 +905,9 @@ static inline void se_update_runnable(struct sched_entity *se) { } ...@@ -905,6 +905,9 @@ static inline void se_update_runnable(struct sched_entity *se) { }
static inline long se_runnable(struct sched_entity *se) static inline long se_runnable(struct sched_entity *se)
{ {
if (se->sched_delayed)
return false;
return !!se->on_rq; return !!se->on_rq;
} }
...@@ -2317,11 +2320,13 @@ extern const u32 sched_prio_to_wmult[40]; ...@@ -2317,11 +2320,13 @@ extern const u32 sched_prio_to_wmult[40];
* *
*/ */
#define DEQUEUE_SLEEP 0x01 #define DEQUEUE_SLEEP 0x01 /* Matches ENQUEUE_WAKEUP */
#define DEQUEUE_SAVE 0x02 /* Matches ENQUEUE_RESTORE */ #define DEQUEUE_SAVE 0x02 /* Matches ENQUEUE_RESTORE */
#define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */ #define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */
#define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */ #define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */
#define DEQUEUE_SPECIAL 0x10
#define DEQUEUE_MIGRATING 0x100 /* Matches ENQUEUE_MIGRATING */ #define DEQUEUE_MIGRATING 0x100 /* Matches ENQUEUE_MIGRATING */
#define DEQUEUE_DELAYED 0x200 /* Matches ENQUEUE_DELAYED */
#define ENQUEUE_WAKEUP 0x01 #define ENQUEUE_WAKEUP 0x01
#define ENQUEUE_RESTORE 0x02 #define ENQUEUE_RESTORE 0x02
...@@ -2337,6 +2342,7 @@ extern const u32 sched_prio_to_wmult[40]; ...@@ -2337,6 +2342,7 @@ extern const u32 sched_prio_to_wmult[40];
#endif #endif
#define ENQUEUE_INITIAL 0x80 #define ENQUEUE_INITIAL 0x80
#define ENQUEUE_MIGRATING 0x100 #define ENQUEUE_MIGRATING 0x100
#define ENQUEUE_DELAYED 0x200
#define RETRY_TASK ((void *)-1UL) #define RETRY_TASK ((void *)-1UL)
...@@ -2355,7 +2361,7 @@ struct sched_class { ...@@ -2355,7 +2361,7 @@ struct sched_class {
#endif #endif
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags); void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags); bool (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
void (*yield_task) (struct rq *rq); void (*yield_task) (struct rq *rq);
bool (*yield_to_task)(struct rq *rq, struct task_struct *p); bool (*yield_to_task)(struct rq *rq, struct task_struct *p);
...@@ -2711,6 +2717,19 @@ static inline void sub_nr_running(struct rq *rq, unsigned count) ...@@ -2711,6 +2717,19 @@ static inline void sub_nr_running(struct rq *rq, unsigned count)
sched_update_tick_dependency(rq); sched_update_tick_dependency(rq);
} }
static inline void __block_task(struct rq *rq, struct task_struct *p)
{
WRITE_ONCE(p->on_rq, 0);
ASSERT_EXCLUSIVE_WRITER(p->on_rq);
if (p->sched_contributes_to_load)
rq->nr_uninterruptible++;
if (p->in_iowait) {
atomic_inc(&rq->nr_iowait);
delayacct_blkio_start();
}
}
extern void activate_task(struct rq *rq, struct task_struct *p, int flags); extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags); extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
...@@ -3736,7 +3755,7 @@ extern int __sched_setaffinity(struct task_struct *p, struct affinity_context *c ...@@ -3736,7 +3755,7 @@ extern int __sched_setaffinity(struct task_struct *p, struct affinity_context *c
extern void __setscheduler_prio(struct task_struct *p, int prio); extern void __setscheduler_prio(struct task_struct *p, int prio);
extern void set_load_weight(struct task_struct *p, bool update_load); extern void set_load_weight(struct task_struct *p, bool update_load);
extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags); extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
extern void dequeue_task(struct rq *rq, struct task_struct *p, int flags); extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags);
extern void check_class_changing(struct rq *rq, struct task_struct *p, extern void check_class_changing(struct rq *rq, struct task_struct *p,
const struct sched_class *prev_class); const struct sched_class *prev_class);
......
...@@ -57,10 +57,11 @@ enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags) ...@@ -57,10 +57,11 @@ enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
add_nr_running(rq, 1); add_nr_running(rq, 1);
} }
static void static bool
dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags) dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
{ {
sub_nr_running(rq, 1); sub_nr_running(rq, 1);
return true;
} }
static void yield_task_stop(struct rq *rq) static void yield_task_stop(struct rq *rq)
......
...@@ -57,7 +57,7 @@ static int effective_prio(struct task_struct *p) ...@@ -57,7 +57,7 @@ static int effective_prio(struct task_struct *p)
* keep the priority unchanged. Otherwise, update priority * keep the priority unchanged. Otherwise, update priority
* to the normal priority: * to the normal priority:
*/ */
if (!rt_prio(p->prio)) if (!rt_or_dl_prio(p->prio))
return p->normal_prio; return p->normal_prio;
return p->prio; return p->prio;
} }
...@@ -420,10 +420,20 @@ static void __setscheduler_params(struct task_struct *p, ...@@ -420,10 +420,20 @@ static void __setscheduler_params(struct task_struct *p,
p->policy = policy; p->policy = policy;
if (dl_policy(policy)) if (dl_policy(policy)) {
__setparam_dl(p, attr); __setparam_dl(p, attr);
else if (fair_policy(policy)) } else if (fair_policy(policy)) {
p->static_prio = NICE_TO_PRIO(attr->sched_nice); p->static_prio = NICE_TO_PRIO(attr->sched_nice);
if (attr->sched_runtime) {
p->se.custom_slice = 1;
p->se.slice = clamp_t(u64, attr->sched_runtime,
NSEC_PER_MSEC/10, /* HZ=1000 * 10 */
NSEC_PER_MSEC*100); /* HZ=100 / 10 */
} else {
p->se.custom_slice = 0;
p->se.slice = sysctl_sched_base_slice;
}
}
/* /*
* __sched_setscheduler() ensures attr->sched_priority == 0 when * __sched_setscheduler() ensures attr->sched_priority == 0 when
...@@ -723,7 +733,9 @@ int __sched_setscheduler(struct task_struct *p, ...@@ -723,7 +733,9 @@ int __sched_setscheduler(struct task_struct *p,
* but store a possible modification of reset_on_fork. * but store a possible modification of reset_on_fork.
*/ */
if (unlikely(policy == p->policy)) { if (unlikely(policy == p->policy)) {
if (fair_policy(policy) && attr->sched_nice != task_nice(p)) if (fair_policy(policy) &&
(attr->sched_nice != task_nice(p) ||
(attr->sched_runtime != p->se.slice)))
goto change; goto change;
if (rt_policy(policy) && attr->sched_priority != p->rt_priority) if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
goto change; goto change;
...@@ -870,6 +882,9 @@ static int _sched_setscheduler(struct task_struct *p, int policy, ...@@ -870,6 +882,9 @@ static int _sched_setscheduler(struct task_struct *p, int policy,
.sched_nice = PRIO_TO_NICE(p->static_prio), .sched_nice = PRIO_TO_NICE(p->static_prio),
}; };
if (p->se.custom_slice)
attr.sched_runtime = p->se.slice;
/* Fixup the legacy SCHED_RESET_ON_FORK hack. */ /* Fixup the legacy SCHED_RESET_ON_FORK hack. */
if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) { if ((policy != SETPARAM_POLICY) && (policy & SCHED_RESET_ON_FORK)) {
attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; attr.sched_flags |= SCHED_FLAG_RESET_ON_FORK;
...@@ -1036,12 +1051,14 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a ...@@ -1036,12 +1051,14 @@ static int sched_copy_attr(struct sched_attr __user *uattr, struct sched_attr *a
static void get_params(struct task_struct *p, struct sched_attr *attr) static void get_params(struct task_struct *p, struct sched_attr *attr)
{ {
if (task_has_dl_policy(p)) if (task_has_dl_policy(p)) {
__getparam_dl(p, attr); __getparam_dl(p, attr);
else if (task_has_rt_policy(p)) } else if (task_has_rt_policy(p)) {
attr->sched_priority = p->rt_priority; attr->sched_priority = p->rt_priority;
else } else {
attr->sched_nice = task_nice(p); attr->sched_nice = task_nice(p);
attr->sched_runtime = p->se.slice;
}
} }
/** /**
......
...@@ -1975,7 +1975,7 @@ static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, ...@@ -1975,7 +1975,7 @@ static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
* expiry. * expiry.
*/ */
if (IS_ENABLED(CONFIG_PREEMPT_RT)) { if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT)) if (rt_or_dl_task_policy(current) && !(mode & HRTIMER_MODE_SOFT))
mode |= HRTIMER_MODE_HARD; mode |= HRTIMER_MODE_HARD;
} }
...@@ -2075,7 +2075,7 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, ...@@ -2075,7 +2075,7 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
u64 slack; u64 slack;
slack = current->timer_slack_ns; slack = current->timer_slack_ns;
if (rt_task(current)) if (rt_or_dl_task(current))
slack = 0; slack = 0;
hrtimer_init_sleeper_on_stack(&t, clockid, mode); hrtimer_init_sleeper_on_stack(&t, clockid, mode);
...@@ -2280,7 +2280,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, ...@@ -2280,7 +2280,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
* Override any slack passed by the user if under * Override any slack passed by the user if under
* rt contraints. * rt contraints.
*/ */
if (rt_task(current)) if (rt_or_dl_task(current))
delta = 0; delta = 0;
hrtimer_init_sleeper_on_stack(&t, clock_id, mode); hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
......
...@@ -547,7 +547,7 @@ probe_wakeup(void *ignore, struct task_struct *p) ...@@ -547,7 +547,7 @@ probe_wakeup(void *ignore, struct task_struct *p)
* - wakeup_dl handles tasks belonging to sched_dl class only. * - wakeup_dl handles tasks belonging to sched_dl class only.
*/ */
if (tracing_dl || (wakeup_dl && !dl_task(p)) || if (tracing_dl || (wakeup_dl && !dl_task(p)) ||
(wakeup_rt && !dl_task(p) && !rt_task(p)) || (wakeup_rt && !rt_or_dl_task(p)) ||
(!dl_task(p) && (p->prio >= wakeup_prio || p->prio >= current->prio))) (!dl_task(p) && (p->prio >= wakeup_prio || p->prio >= current->prio)))
return; return;
......
...@@ -418,7 +418,7 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc) ...@@ -418,7 +418,7 @@ static void domain_dirty_limits(struct dirty_throttle_control *dtc)
bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE; bg_thresh = (bg_ratio * available_memory) / PAGE_SIZE;
tsk = current; tsk = current;
if (rt_task(tsk)) { if (rt_or_dl_task(tsk)) {
bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32; bg_thresh += bg_thresh / 4 + global_wb_domain.dirty_limit / 32;
thresh += thresh / 4 + global_wb_domain.dirty_limit / 32; thresh += thresh / 4 + global_wb_domain.dirty_limit / 32;
} }
...@@ -477,7 +477,7 @@ static unsigned long node_dirty_limit(struct pglist_data *pgdat) ...@@ -477,7 +477,7 @@ static unsigned long node_dirty_limit(struct pglist_data *pgdat)
else else
dirty = vm_dirty_ratio * node_memory / 100; dirty = vm_dirty_ratio * node_memory / 100;
if (rt_task(tsk)) if (rt_or_dl_task(tsk))
dirty += dirty / 4; dirty += dirty / 4;
/* /*
......
...@@ -4002,7 +4002,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order) ...@@ -4002,7 +4002,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
*/ */
if (alloc_flags & ALLOC_MIN_RESERVE) if (alloc_flags & ALLOC_MIN_RESERVE)
alloc_flags &= ~ALLOC_CPUSET; alloc_flags &= ~ALLOC_CPUSET;
} else if (unlikely(rt_task(current)) && in_task()) } else if (unlikely(rt_or_dl_task(current)) && in_task())
alloc_flags |= ALLOC_MIN_RESERVE; alloc_flags |= ALLOC_MIN_RESERVE;
alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags); alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment