Commit 147f3efa authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

sched/fair: Implement an EEVDF-like scheduling policy

Where CFS is currently a WFQ based scheduler with only a single knob,
the weight. The addition of a second, latency oriented parameter,
makes something like WF2Q or EEVDF based a much better fit.

Specifically, EEVDF does EDF like scheduling in the left half of the
tree -- those entities that are owed service. Except because this is a
virtual time scheduler, the deadlines are in virtual time as well,
which is what allows over-subscription.

EEVDF has two parameters:

 - weight, or time-slope: which is mapped to nice just as before

 - request size, or slice length: which is used to compute
   the virtual deadline as: vd_i = ve_i + r_i/w_i

Basically, by setting a smaller slice, the deadline will be earlier
and the task will be more eligible and ran earlier.

Tick driven preemption is driven by request/slice completion; while
wakeup preemption is driven by the deadline.

Because the tree is now effectively an interval tree, and the
selection is no longer 'leftmost', over-scheduling is less of a
problem.
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230531124603.931005524@infradead.org
parent 99d4d265
......@@ -549,6 +549,9 @@ struct sched_entity {
/* For load-balancing: */
struct load_weight load;
struct rb_node run_node;
u64 deadline;
u64 min_deadline;
struct list_head group_node;
unsigned int on_rq;
......@@ -557,6 +560,7 @@ struct sched_entity {
u64 prev_sum_exec_runtime;
u64 vruntime;
s64 vlag;
u64 slice;
u64 nr_migrations;
......
......@@ -4502,6 +4502,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->se.nr_migrations = 0;
p->se.vruntime = 0;
p->se.vlag = 0;
p->se.slice = sysctl_sched_min_granularity;
INIT_LIST_HEAD(&p->se.group_node);
#ifdef CONFIG_FAIR_GROUP_SCHED
......
......@@ -582,9 +582,13 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
else
SEQ_printf(m, " %c", task_state_to_char(p));
SEQ_printf(m, " %15s %5d %9Ld.%06ld %9Ld %5d ",
SEQ_printf(m, "%15s %5d %9Ld.%06ld %c %9Ld.%06ld %9Ld.%06ld %9Ld.%06ld %9Ld %5d ",
p->comm, task_pid_nr(p),
SPLIT_NS(p->se.vruntime),
entity_eligible(cfs_rq_of(&p->se), &p->se) ? 'E' : 'N',
SPLIT_NS(p->se.deadline),
SPLIT_NS(p->se.slice),
SPLIT_NS(p->se.sum_exec_runtime),
(long long)(p->nvcsw + p->nivcsw),
p->prio);
......
This diff is collapsed.
......@@ -13,6 +13,7 @@ SCHED_FEAT(GENTLE_FAIR_SLEEPERS, true)
* sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
*/
SCHED_FEAT(PLACE_LAG, true)
SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
/*
* Prefer to schedule the task we woke last (assuming it failed
......@@ -103,3 +104,5 @@ SCHED_FEAT(LATENCY_WARN, false)
SCHED_FEAT(ALT_PERIOD, true)
SCHED_FEAT(BASE_SLICE, true)
SCHED_FEAT(EEVDF, true)
......@@ -2505,9 +2505,10 @@ extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
extern const_debug unsigned int sysctl_sched_nr_migrate;
extern const_debug unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_min_granularity;
#ifdef CONFIG_SCHED_DEBUG
extern unsigned int sysctl_sched_latency;
extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_idle_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern int sysctl_resched_latency_warn_ms;
......@@ -3487,5 +3488,6 @@ static inline void init_sched_mm_cid(struct task_struct *t) { }
#endif
extern u64 avg_vruntime(struct cfs_rq *cfs_rq);
extern int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se);
#endif /* _KERNEL_SCHED_SCHED_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment