Commit aab03e05 authored by Dario Faggioli's avatar Dario Faggioli Committed by Ingo Molnar

sched/deadline: Add SCHED_DEADLINE structures & implementation

Introduces the data structures, constants and symbols needed for
SCHED_DEADLINE implementation.

Core data structure of SCHED_DEADLINE are defined, along with their
initializers. Hooks for checking if a task belong to the new policy
are also added where they are needed.

Adds a scheduling class, in sched/dl.c and a new policy called
SCHED_DEADLINE. It is an implementation of the Earliest Deadline
First (EDF) scheduling algorithm, augmented with a mechanism (called
Constant Bandwidth Server, CBS) that makes it possible to isolate
the behaviour of tasks between each other.

The typical -deadline task will be made up of a computation phase
(instance) which is activated on a periodic or sporadic fashion. The
expected (maximum) duration of such computation is called the task's
runtime; the time interval by which each instance need to be completed
is called the task's relative deadline. The task's absolute deadline
is dynamically calculated as the time instant a task (better, an
instance) activates plus the relative deadline.

The EDF algorithms selects the task with the smallest absolute
deadline as the one to be executed first, while the CBS ensures each
task to run for at most its runtime every (relative) deadline
length time interval, avoiding any interference between different
tasks (bandwidth isolation).
Thanks to this feature, also tasks that do not strictly comply with
the computational model sketched above can effectively use the new
policy.

To summarize, this patch:
 - introduces the data structures, constants and symbols needed;
 - implements the core logic of the scheduling algorithm in the new
   scheduling class file;
 - provides all the glue code between the new scheduling class and
   the core scheduler and refines the interactions between sched/dl
   and the other existing scheduling classes.
Signed-off-by: default avatarDario Faggioli <raistlin@linux.it>
Signed-off-by: default avatarMichael Trimarchi <michael@amarulasolutions.com>
Signed-off-by: default avatarFabio Checconi <fchecconi@gmail.com>
Signed-off-by: default avatarJuri Lelli <juri.lelli@gmail.com>
Signed-off-by: default avatarPeter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1383831828-15501-4-git-send-email-juri.lelli@gmail.comSigned-off-by: default avatarIngo Molnar <mingo@kernel.org>
parent d50dde5a
......@@ -97,6 +97,10 @@ struct sched_param {
* Given this task model, there are a multiplicity of scheduling algorithms
* and policies, that can be used to ensure all the tasks will make their
* timing constraints.
*
* As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
* only user of this new interface. More information about the algorithm
* available in the scheduling class file or in Documentation/.
*/
struct sched_attr {
u32 size;
......@@ -1088,6 +1092,45 @@ struct sched_rt_entity {
#endif
};
struct sched_dl_entity {
struct rb_node rb_node;
/*
* Original scheduling parameters. Copied here from sched_attr
* during sched_setscheduler2(), they will remain the same until
* the next sched_setscheduler2().
*/
u64 dl_runtime; /* maximum runtime for each instance */
u64 dl_deadline; /* relative deadline of each instance */
/*
* Actual scheduling parameters. Initialized with the values above,
* they are continously updated during task execution. Note that
* the remaining runtime could be < 0 in case we are in overrun.
*/
s64 runtime; /* remaining runtime for this instance */
u64 deadline; /* absolute deadline for this instance */
unsigned int flags; /* specifying the scheduler behaviour */
/*
* Some bool flags:
*
* @dl_throttled tells if we exhausted the runtime. If so, the
* task has to wait for a replenishment to be performed at the
* next firing of dl_timer.
*
* @dl_new tells if a new instance arrived. If so we must
* start executing it with full runtime and reset its absolute
* deadline;
*/
int dl_throttled, dl_new;
/*
* Bandwidth enforcement timer. Each -deadline task has its
* own bandwidth to be enforced, thus we need one timer per task.
*/
struct hrtimer dl_timer;
};
struct rcu_node;
......@@ -1124,6 +1167,7 @@ struct task_struct {
#ifdef CONFIG_CGROUP_SCHED
struct task_group *sched_task_group;
#endif
struct sched_dl_entity dl;
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* list of struct preempt_notifier: */
......@@ -2099,7 +2143,7 @@ extern void wake_up_new_task(struct task_struct *tsk);
#else
static inline void kick_process(struct task_struct *tsk) { }
#endif
extern void sched_fork(unsigned long clone_flags, struct task_struct *p);
extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
extern void sched_dead(struct task_struct *p);
extern void proc_caches_init(void);
......
#ifndef _SCHED_DEADLINE_H
#define _SCHED_DEADLINE_H
/*
* SCHED_DEADLINE tasks has negative priorities, reflecting
* the fact that any of them has higher prio than RT and
* NORMAL/BATCH tasks.
*/
#define MAX_DL_PRIO 0
static inline int dl_prio(int prio)
{
if (unlikely(prio < MAX_DL_PRIO))
return 1;
return 0;
}
static inline int dl_task(struct task_struct *p)
{
return dl_prio(p->prio);
}
#endif /* _SCHED_DEADLINE_H */
......@@ -39,6 +39,7 @@
#define SCHED_BATCH 3
/* SCHED_ISO: reserved but not implemented yet */
#define SCHED_IDLE 5
#define SCHED_DEADLINE 6
/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
#define SCHED_RESET_ON_FORK 0x40000000
......
......@@ -1311,7 +1311,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#endif
/* Perform scheduler related setup. Assign this task to a CPU. */
sched_fork(clone_flags, p);
retval = sched_fork(clone_flags, p);
if (retval)
goto bad_fork_cleanup_policy;
retval = perf_event_init_task(p);
if (retval)
......
......@@ -46,6 +46,7 @@
#include <linux/sched.h>
#include <linux/sched/sysctl.h>
#include <linux/sched/rt.h>
#include <linux/sched/deadline.h>
#include <linux/timer.h>
#include <linux/freezer.h>
......@@ -1610,7 +1611,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
unsigned long slack;
slack = current->timer_slack_ns;
if (rt_task(current))
if (dl_task(current) || rt_task(current))
slack = 0;
hrtimer_init_on_stack(&t.timer, clockid, mode);
......
......@@ -11,7 +11,8 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
endif
obj-y += core.o proc.o clock.o cputime.o idle_task.o fair.o rt.o stop_task.o
obj-y += core.o proc.o clock.o cputime.o
obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
obj-y += wait.o completion.o
obj-$(CONFIG_SMP) += cpupri.o
obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
......
......@@ -899,7 +899,9 @@ static inline int normal_prio(struct task_struct *p)
{
int prio;
if (task_has_rt_policy(p))
if (task_has_dl_policy(p))
prio = MAX_DL_PRIO-1;
else if (task_has_rt_policy(p))
prio = MAX_RT_PRIO-1 - p->rt_priority;
else
prio = __normal_prio(p);
......@@ -1717,6 +1719,12 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
memset(&p->se.statistics, 0, sizeof(p->se.statistics));
#endif
RB_CLEAR_NODE(&p->dl.rb_node);
hrtimer_init(&p->dl.dl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
p->dl.dl_runtime = p->dl.runtime = 0;
p->dl.dl_deadline = p->dl.deadline = 0;
p->dl.flags = 0;
INIT_LIST_HEAD(&p->rt.run_list);
#ifdef CONFIG_PREEMPT_NOTIFIERS
......@@ -1768,7 +1776,7 @@ void set_numabalancing_state(bool enabled)
/*
* fork()/clone()-time setup:
*/
void sched_fork(unsigned long clone_flags, struct task_struct *p)
int sched_fork(unsigned long clone_flags, struct task_struct *p)
{
unsigned long flags;
int cpu = get_cpu();
......@@ -1790,7 +1798,7 @@ void sched_fork(unsigned long clone_flags, struct task_struct *p)
* Revert to default priority/policy on fork if requested.
*/
if (unlikely(p->sched_reset_on_fork)) {
if (task_has_rt_policy(p)) {
if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
p->policy = SCHED_NORMAL;
p->static_prio = NICE_TO_PRIO(0);
p->rt_priority = 0;
......@@ -1807,8 +1815,14 @@ void sched_fork(unsigned long clone_flags, struct task_struct *p)
p->sched_reset_on_fork = 0;
}
if (!rt_prio(p->prio))
if (dl_prio(p->prio)) {
put_cpu();
return -EAGAIN;
} else if (rt_prio(p->prio)) {
p->sched_class = &rt_sched_class;
} else {
p->sched_class = &fair_sched_class;
}
if (p->sched_class->task_fork)
p->sched_class->task_fork(p);
......@@ -1837,6 +1851,7 @@ void sched_fork(unsigned long clone_flags, struct task_struct *p)
#endif
put_cpu();
return 0;
}
/*
......@@ -2768,7 +2783,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
struct rq *rq;
const struct sched_class *prev_class;
BUG_ON(prio < 0 || prio > MAX_PRIO);
BUG_ON(prio > MAX_PRIO);
rq = __task_rq_lock(p);
......@@ -2800,7 +2815,9 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
if (running)
p->sched_class->put_prev_task(rq, p);
if (rt_prio(prio))
if (dl_prio(prio))
p->sched_class = &dl_sched_class;
else if (rt_prio(prio))
p->sched_class = &rt_sched_class;
else
p->sched_class = &fair_sched_class;
......@@ -2835,9 +2852,9 @@ void set_user_nice(struct task_struct *p, long nice)
* The RT priorities are set via sched_setscheduler(), but we still
* allow the 'normal' nice value to be set - but as expected
* it wont have any effect on scheduling until the task is
* SCHED_FIFO/SCHED_RR:
* SCHED_DEADLINE, SCHED_FIFO or SCHED_RR:
*/
if (task_has_rt_policy(p)) {
if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
p->static_prio = NICE_TO_PRIO(nice);
goto out_unlock;
}
......@@ -2992,6 +3009,27 @@ static struct task_struct *find_process_by_pid(pid_t pid)
return pid ? find_task_by_vpid(pid) : current;
}
/*
* This function initializes the sched_dl_entity of a newly becoming
* SCHED_DEADLINE task.
*
* Only the static values are considered here, the actual runtime and the
* absolute deadline will be properly calculated when the task is enqueued
* for the first time with its new policy.
*/
static void
__setparam_dl(struct task_struct *p, const struct sched_attr *attr)
{
struct sched_dl_entity *dl_se = &p->dl;
init_dl_task_timer(dl_se);
dl_se->dl_runtime = attr->sched_runtime;
dl_se->dl_deadline = attr->sched_deadline;
dl_se->flags = attr->sched_flags;
dl_se->dl_throttled = 0;
dl_se->dl_new = 1;
}
/* Actually do priority change: must hold pi & rq lock. */
static void __setscheduler(struct rq *rq, struct task_struct *p,
const struct sched_attr *attr)
......@@ -3000,7 +3038,9 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
p->policy = policy;
if (rt_policy(policy))
if (dl_policy(policy))
__setparam_dl(p, attr);
else if (rt_policy(policy))
p->rt_priority = attr->sched_priority;
else
p->static_prio = NICE_TO_PRIO(attr->sched_nice);
......@@ -3008,13 +3048,39 @@ static void __setscheduler(struct rq *rq, struct task_struct *p,
p->normal_prio = normal_prio(p);
p->prio = rt_mutex_getprio(p);
if (rt_prio(p->prio))
if (dl_prio(p->prio))
p->sched_class = &dl_sched_class;
else if (rt_prio(p->prio))
p->sched_class = &rt_sched_class;
else
p->sched_class = &fair_sched_class;
set_load_weight(p);
}
static void
__getparam_dl(struct task_struct *p, struct sched_attr *attr)
{
struct sched_dl_entity *dl_se = &p->dl;
attr->sched_priority = p->rt_priority;
attr->sched_runtime = dl_se->dl_runtime;
attr->sched_deadline = dl_se->dl_deadline;
attr->sched_flags = dl_se->flags;
}
/*
* This function validates the new parameters of a -deadline task.
* We ask for the deadline not being zero, and greater or equal
* than the runtime.
*/
static bool
__checkparam_dl(const struct sched_attr *attr)
{
return attr && attr->sched_deadline != 0 &&
(s64)(attr->sched_deadline - attr->sched_runtime) >= 0;
}
/*
* check the target process has a UID that matches the current process's
*/
......@@ -3053,7 +3119,8 @@ static int __sched_setscheduler(struct task_struct *p,
reset_on_fork = !!(policy & SCHED_RESET_ON_FORK);
policy &= ~SCHED_RESET_ON_FORK;
if (policy != SCHED_FIFO && policy != SCHED_RR &&
if (policy != SCHED_DEADLINE &&
policy != SCHED_FIFO && policy != SCHED_RR &&
policy != SCHED_NORMAL && policy != SCHED_BATCH &&
policy != SCHED_IDLE)
return -EINVAL;
......@@ -3068,7 +3135,8 @@ static int __sched_setscheduler(struct task_struct *p,
(p->mm && attr->sched_priority > MAX_USER_RT_PRIO-1) ||
(!p->mm && attr->sched_priority > MAX_RT_PRIO-1))
return -EINVAL;
if (rt_policy(policy) != (attr->sched_priority != 0))
if ((dl_policy(policy) && !__checkparam_dl(attr)) ||
(rt_policy(policy) != (attr->sched_priority != 0)))
return -EINVAL;
/*
......@@ -3143,6 +3211,8 @@ static int __sched_setscheduler(struct task_struct *p,
goto change;
if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
goto change;
if (dl_policy(policy))
goto change;
task_rq_unlock(rq, p, &flags);
return 0;
......@@ -3453,6 +3523,10 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
if (retval)
goto out_unlock;
if (task_has_dl_policy(p)) {
retval = -EINVAL;
goto out_unlock;
}
lp.sched_priority = p->rt_priority;
rcu_read_unlock();
......@@ -3510,7 +3584,7 @@ static int sched_read_attr(struct sched_attr __user *uattr,
}
/**
* sys_sched_getattr - same as above, but with extended "sched_param"
* sys_sched_getattr - similar to sched_getparam, but with sched_attr
* @pid: the pid in question.
* @attr: structure containing the extended parameters.
* @size: sizeof(attr) for fwd/bwd comp.
......@@ -3539,7 +3613,9 @@ SYSCALL_DEFINE3(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,
goto out_unlock;
attr.sched_policy = p->policy;
if (task_has_rt_policy(p))
if (task_has_dl_policy(p))
__getparam_dl(p, &attr);
else if (task_has_rt_policy(p))
attr.sched_priority = p->rt_priority;
else
attr.sched_nice = TASK_NICE(p);
......@@ -3965,6 +4041,7 @@ SYSCALL_DEFINE1(sched_get_priority_max, int, policy)
case SCHED_RR:
ret = MAX_USER_RT_PRIO-1;
break;
case SCHED_DEADLINE:
case SCHED_NORMAL:
case SCHED_BATCH:
case SCHED_IDLE:
......@@ -3991,6 +4068,7 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy)
case SCHED_RR:
ret = 1;
break;
case SCHED_DEADLINE:
case SCHED_NORMAL:
case SCHED_BATCH:
case SCHED_IDLE:
......@@ -6472,6 +6550,7 @@ void __init sched_init(void)
rq->calc_load_update = jiffies + LOAD_FREQ;
init_cfs_rq(&rq->cfs);
init_rt_rq(&rq->rt, rq);
init_dl_rq(&rq->dl, rq);
#ifdef CONFIG_FAIR_GROUP_SCHED
root_task_group.shares = ROOT_TASK_GROUP_LOAD;
INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
......@@ -6659,7 +6738,7 @@ void normalize_rt_tasks(void)
p->se.statistics.block_start = 0;
#endif
if (!rt_task(p)) {
if (!dl_task(p) && !rt_task(p)) {
/*
* Renice negative nice level userspace
* tasks back to 0:
......
This diff is collapsed.
......@@ -2,6 +2,7 @@
#include <linux/sched.h>
#include <linux/sched/sysctl.h>
#include <linux/sched/rt.h>
#include <linux/sched/deadline.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <linux/stop_machine.h>
......@@ -91,11 +92,21 @@ static inline int rt_policy(int policy)
return policy == SCHED_FIFO || policy == SCHED_RR;
}
static inline int dl_policy(int policy)
{
return policy == SCHED_DEADLINE;
}
static inline int task_has_rt_policy(struct task_struct *p)
{
return rt_policy(p->policy);
}
static inline int task_has_dl_policy(struct task_struct *p)
{
return dl_policy(p->policy);
}
/*
* This is the priority-queue data structure of the RT scheduling class:
*/
......@@ -367,6 +378,15 @@ struct rt_rq {
#endif
};
/* Deadline class' related fields in a runqueue */
struct dl_rq {
/* runqueue is an rbtree, ordered by deadline */
struct rb_root rb_root;
struct rb_node *rb_leftmost;
unsigned long dl_nr_running;
};
#ifdef CONFIG_SMP
/*
......@@ -435,6 +455,7 @@ struct rq {
struct cfs_rq cfs;
struct rt_rq rt;
struct dl_rq dl;
#ifdef CONFIG_FAIR_GROUP_SCHED
/* list of leaf cfs_rq on this cpu: */
......@@ -991,6 +1012,7 @@ static const u32 prio_to_wmult[40] = {
#else
#define ENQUEUE_WAKING 0
#endif
#define ENQUEUE_REPLENISH 8
#define DEQUEUE_SLEEP 1
......@@ -1046,6 +1068,7 @@ struct sched_class {
for (class = sched_class_highest; class; class = class->next)
extern const struct sched_class stop_sched_class;
extern const struct sched_class dl_sched_class;
extern const struct sched_class rt_sched_class;
extern const struct sched_class fair_sched_class;
extern const struct sched_class idle_sched_class;
......@@ -1081,6 +1104,8 @@ extern void resched_cpu(int cpu);
extern struct rt_bandwidth def_rt_bandwidth;
extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
extern void update_idle_cpu_load(struct rq *this_rq);
extern void init_task_runnable_average(struct task_struct *p);
......@@ -1357,6 +1382,7 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
extern void init_cfs_rq(struct cfs_rq *cfs_rq);
extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq);
extern void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq);
extern void cfs_bandwidth_usage_inc(void);
extern void cfs_bandwidth_usage_dec(void);
......
......@@ -103,7 +103,7 @@ get_rr_interval_stop(struct rq *rq, struct task_struct *task)
* Simple, special scheduling class for the per-CPU stop tasks:
*/
const struct sched_class stop_sched_class = {
.next = &rt_sched_class,
.next = &dl_sched_class,
.enqueue_task = enqueue_task_stop,
.dequeue_task = dequeue_task_stop,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment