Commit 63ba8422 authored by Peter Zijlstra's avatar Peter Zijlstra

sched/deadline: Introduce deadline servers

Low priority tasks (e.g., SCHED_OTHER) can suffer starvation if tasks
with higher priority (e.g., SCHED_FIFO) monopolize CPU(s).

RT Throttling has been introduced a while ago as a (mostly debug)
countermeasure one can utilize to reserve some CPU time for low priority
tasks (usually background type of work, e.g. workqueues, timers, etc.).
It however has its own problems (see documentation) and the undesired
effect of unconditionally throttling FIFO tasks even when no lower
priority activity needs to run (there are mechanisms to fix this issue
as well, but, again, with their own problems).

Introduce deadline servers to service low priority tasks needs under
starvation conditions. Deadline servers are built extending SCHED_DEADLINE
implementation to allow 2-level scheduling (a sched_deadline entity
becomes a container for lower priority scheduling entities).
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: default avatarDaniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: default avatarPeter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/4968601859d920335cf85822eb573a5f179f04b8.1699095159.git.bristot@kernel.org
parent 2f7a0f58
......@@ -63,11 +63,13 @@ struct robust_list_head;
struct root_domain;
struct rq;
struct sched_attr;
struct sched_dl_entity;
struct seq_file;
struct sighand_struct;
struct signal_struct;
struct task_delay_info;
struct task_group;
struct task_struct;
struct user_event_mm;
/*
......@@ -607,6 +609,9 @@ struct sched_rt_entity {
#endif
} __randomize_layout;
typedef bool (*dl_server_has_tasks_f)(struct sched_dl_entity *);
typedef struct task_struct *(*dl_server_pick_f)(struct sched_dl_entity *);
struct sched_dl_entity {
struct rb_node rb_node;
......@@ -654,6 +659,7 @@ struct sched_dl_entity {
unsigned int dl_yielded : 1;
unsigned int dl_non_contending : 1;
unsigned int dl_overrun : 1;
unsigned int dl_server : 1;
/*
* Bandwidth enforcement timer. Each -deadline task has its
......@@ -668,7 +674,20 @@ struct sched_dl_entity {
* timer is needed to decrease the active utilization at the correct
* time.
*/
struct hrtimer inactive_timer;
struct hrtimer inactive_timer;
/*
* Bits for DL-server functionality. Also see the comment near
* dl_server_update().
*
* @rq the runqueue this server is for
*
* @server_has_tasks() returns true if @server_pick return a
* runnable task.
*/
struct rq *rq;
dl_server_has_tasks_f server_has_tasks;
dl_server_pick_f server_pick;
#ifdef CONFIG_RT_MUTEXES
/*
......@@ -795,6 +814,7 @@ struct task_struct {
struct sched_entity se;
struct sched_rt_entity rt;
struct sched_dl_entity dl;
struct sched_dl_entity *dl_server;
const struct sched_class *sched_class;
#ifdef CONFIG_SCHED_CORE
......
......@@ -3797,6 +3797,8 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags,
rq->idle_stamp = 0;
}
#endif
p->dl_server = NULL;
}
/*
......@@ -6003,12 +6005,27 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
p = pick_next_task_idle(rq);
}
/*
* This is the fast path; it cannot be a DL server pick;
* therefore even if @p == @prev, ->dl_server must be NULL.
*/
if (p->dl_server)
p->dl_server = NULL;
return p;
}
restart:
put_prev_task_balance(rq, prev, rf);
/*
* We've updated @prev and no longer need the server link, clear it.
* Must be done before ->pick_next_task() because that can (re)set
* ->dl_server.
*/
if (prev->dl_server)
prev->dl_server = NULL;
for_each_class(class) {
p = class->pick_next_task(rq);
if (p)
......
This diff is collapsed.
......@@ -1131,6 +1131,8 @@ static inline void update_curr_task(struct task_struct *p, s64 delta_exec)
trace_sched_stat_runtime(p, delta_exec);
account_group_exec_runtime(p, delta_exec);
cgroup_account_cputime(p, delta_exec);
if (p->dl_server)
dl_server_update(p->dl_server, delta_exec);
}
/*
......
......@@ -313,6 +313,33 @@ extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *att
extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
extern int dl_bw_check_overflow(int cpu);
/*
* SCHED_DEADLINE supports servers (nested scheduling) with the following
* interface:
*
* dl_se::rq -- runqueue we belong to.
*
* dl_se::server_has_tasks() -- used on bandwidth enforcement; we 'stop' the
* server when it runs out of tasks to run.
*
* dl_se::server_pick() -- nested pick_next_task(); we yield the period if this
* returns NULL.
*
* dl_server_update() -- called from update_curr_common(), propagates runtime
* to the server.
*
* dl_server_start()
* dl_server_stop() -- start/stop the server when it has (no) tasks.
*
* dl_server_init() -- initializes the server.
*/
extern void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec);
extern void dl_server_start(struct sched_dl_entity *dl_se);
extern void dl_server_stop(struct sched_dl_entity *dl_se);
extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
dl_server_has_tasks_f has_tasks,
dl_server_pick_f pick);
#ifdef CONFIG_CGROUP_SCHED
struct cfs_rq;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment