Commit 6e2332e0 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'cgroup-for-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup updates from Tejun Heo:

 - Whenever cpuset needs to rebuild sched_domain, it walked all tasks
   looking for DEADLINE tasks as they need to be accounted on the new
   domain. Walking all tasks can be expensive and there may not be any
   DEADLINE tasks at all. Task iteration is now omitted if there are no
   DEADLINE tasks

 - Fixes DEADLINE bandwidth misaccounting after task migration failures

 - When no controller is enabled, -Wstringop-overflow warning is
   triggered. The fix patch added an early exit which is too eager and
   got reverted for now. Will fix later

 - Everything else is minor cleanups

* tag 'cgroup-for-6.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  Revert "cgroup: Avoid -Wstringop-overflow warnings"
  cgroup/misc: Expose misc.current on cgroup v2 root
  cgroup: Avoid -Wstringop-overflow warnings
  cgroup: remove obsolete comment on cgroup_on_dfl()
  cgroup: remove unused task_cgroup_path()
  cgroup/cpuset: remove unneeded header files
  cgroup: make cgroup_is_threaded() and cgroup_is_thread_root() static
  rdmacg: fix kernel-doc warnings in rdmacg
  cgroup: Replace the css_set call with cgroup_get
  cgroup: remove unused macro for_each_e_css()
  cgroup: Update out-of-date comment in cgroup_migrate()
  cgroup: Replace all non-returning strlcpy with strscpy
  cgroup/cpuset: remove unneeded header files
  cgroup/cpuset: Free DL BW in case can_attach() fails
  sched/deadline: Create DL BW alloc, free & check overflow interface
  cgroup/cpuset: Iterate only if DEADLINE tasks are present
  sched/cpuset: Keep track of SCHED_DEADLINE task in cpusets
  sched/cpuset: Bring back cpuset_mutex
  cgroup/cpuset: Rename functions dealing with DEADLINE accounting
parents 72dc6db7 81621430
...@@ -2443,7 +2443,7 @@ Miscellaneous controller provides 3 interface files. If two misc resources (res_ ...@@ -2443,7 +2443,7 @@ Miscellaneous controller provides 3 interface files. If two misc resources (res_
res_b 10 res_b 10
misc.current misc.current
A read-only flat-keyed file shown in the non-root cgroups. It shows A read-only flat-keyed file shown in the all cgroups. It shows
the current usage of the resources in the cgroup and its children.:: the current usage of the resources in the cgroup and its children.::
$ cat misc.current $ cat misc.current
......
...@@ -118,7 +118,6 @@ int cgroup_rm_cftypes(struct cftype *cfts); ...@@ -118,7 +118,6 @@ int cgroup_rm_cftypes(struct cftype *cfts);
void cgroup_file_notify(struct cgroup_file *cfile); void cgroup_file_notify(struct cgroup_file *cfile);
void cgroup_file_show(struct cgroup_file *cfile, bool show); void cgroup_file_show(struct cgroup_file *cfile, bool show);
int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry); int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *tsk); struct pid *pid, struct task_struct *tsk);
......
...@@ -71,8 +71,10 @@ extern void cpuset_init_smp(void); ...@@ -71,8 +71,10 @@ extern void cpuset_init_smp(void);
extern void cpuset_force_rebuild(void); extern void cpuset_force_rebuild(void);
extern void cpuset_update_active_cpus(void); extern void cpuset_update_active_cpus(void);
extern void cpuset_wait_for_hotplug(void); extern void cpuset_wait_for_hotplug(void);
extern void cpuset_read_lock(void); extern void inc_dl_tasks_cs(struct task_struct *task);
extern void cpuset_read_unlock(void); extern void dec_dl_tasks_cs(struct task_struct *task);
extern void cpuset_lock(void);
extern void cpuset_unlock(void);
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
extern bool cpuset_cpus_allowed_fallback(struct task_struct *p); extern bool cpuset_cpus_allowed_fallback(struct task_struct *p);
extern nodemask_t cpuset_mems_allowed(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
...@@ -189,8 +191,10 @@ static inline void cpuset_update_active_cpus(void) ...@@ -189,8 +191,10 @@ static inline void cpuset_update_active_cpus(void)
static inline void cpuset_wait_for_hotplug(void) { } static inline void cpuset_wait_for_hotplug(void) { }
static inline void cpuset_read_lock(void) { } static inline void inc_dl_tasks_cs(struct task_struct *task) { }
static inline void cpuset_read_unlock(void) { } static inline void dec_dl_tasks_cs(struct task_struct *task) { }
static inline void cpuset_lock(void) { }
static inline void cpuset_unlock(void) { }
static inline void cpuset_cpus_allowed(struct task_struct *p, static inline void cpuset_cpus_allowed(struct task_struct *p,
struct cpumask *mask) struct cpumask *mask)
......
...@@ -1852,7 +1852,9 @@ current_restore_flags(unsigned long orig_flags, unsigned long flags) ...@@ -1852,7 +1852,9 @@ current_restore_flags(unsigned long orig_flags, unsigned long flags)
} }
extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_effective_cpus); extern int task_can_attach(struct task_struct *p);
extern int dl_bw_alloc(int cpu, u64 dl_bw);
extern void dl_bw_free(int cpu, u64 dl_bw);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask);
extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask);
......
...@@ -220,8 +220,6 @@ static inline void get_css_set(struct css_set *cset) ...@@ -220,8 +220,6 @@ static inline void get_css_set(struct css_set *cset)
bool cgroup_ssid_enabled(int ssid); bool cgroup_ssid_enabled(int ssid);
bool cgroup_on_dfl(const struct cgroup *cgrp); bool cgroup_on_dfl(const struct cgroup *cgrp);
bool cgroup_is_thread_root(struct cgroup *cgrp);
bool cgroup_is_threaded(struct cgroup *cgrp);
struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root); struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root);
struct cgroup *task_cgroup_from_root(struct task_struct *task, struct cgroup *task_cgroup_from_root(struct task_struct *task,
......
...@@ -563,7 +563,7 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, ...@@ -563,7 +563,7 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
if (!cgrp) if (!cgrp)
return -ENODEV; return -ENODEV;
spin_lock(&release_agent_path_lock); spin_lock(&release_agent_path_lock);
strlcpy(cgrp->root->release_agent_path, strstrip(buf), strscpy(cgrp->root->release_agent_path, strstrip(buf),
sizeof(cgrp->root->release_agent_path)); sizeof(cgrp->root->release_agent_path));
spin_unlock(&release_agent_path_lock); spin_unlock(&release_agent_path_lock);
cgroup_kn_unlock(of->kn); cgroup_kn_unlock(of->kn);
...@@ -797,7 +797,7 @@ void cgroup1_release_agent(struct work_struct *work) ...@@ -797,7 +797,7 @@ void cgroup1_release_agent(struct work_struct *work)
goto out_free; goto out_free;
spin_lock(&release_agent_path_lock); spin_lock(&release_agent_path_lock);
strlcpy(agentbuf, cgrp->root->release_agent_path, PATH_MAX); strscpy(agentbuf, cgrp->root->release_agent_path, PATH_MAX);
spin_unlock(&release_agent_path_lock); spin_unlock(&release_agent_path_lock);
if (!agentbuf[0]) if (!agentbuf[0])
goto out_free; goto out_free;
......
...@@ -57,6 +57,7 @@ ...@@ -57,6 +57,7 @@
#include <linux/file.h> #include <linux/file.h>
#include <linux/fs_parser.h> #include <linux/fs_parser.h>
#include <linux/sched/cputime.h> #include <linux/sched/cputime.h>
#include <linux/sched/deadline.h>
#include <linux/psi.h> #include <linux/psi.h>
#include <net/sock.h> #include <net/sock.h>
...@@ -312,8 +313,6 @@ bool cgroup_ssid_enabled(int ssid) ...@@ -312,8 +313,6 @@ bool cgroup_ssid_enabled(int ssid)
* masks of ancestors. * masks of ancestors.
* *
* - blkcg: blk-throttle becomes properly hierarchical. * - blkcg: blk-throttle becomes properly hierarchical.
*
* - debug: disallowed on the default hierarchy.
*/ */
bool cgroup_on_dfl(const struct cgroup *cgrp) bool cgroup_on_dfl(const struct cgroup *cgrp)
{ {
...@@ -356,7 +355,7 @@ static bool cgroup_has_tasks(struct cgroup *cgrp) ...@@ -356,7 +355,7 @@ static bool cgroup_has_tasks(struct cgroup *cgrp)
return cgrp->nr_populated_csets; return cgrp->nr_populated_csets;
} }
bool cgroup_is_threaded(struct cgroup *cgrp) static bool cgroup_is_threaded(struct cgroup *cgrp)
{ {
return cgrp->dom_cgrp != cgrp; return cgrp->dom_cgrp != cgrp;
} }
...@@ -395,7 +394,7 @@ static bool cgroup_can_be_thread_root(struct cgroup *cgrp) ...@@ -395,7 +394,7 @@ static bool cgroup_can_be_thread_root(struct cgroup *cgrp)
} }
/* is @cgrp root of a threaded subtree? */ /* is @cgrp root of a threaded subtree? */
bool cgroup_is_thread_root(struct cgroup *cgrp) static bool cgroup_is_thread_root(struct cgroup *cgrp)
{ {
/* thread root should be a domain */ /* thread root should be a domain */
if (cgroup_is_threaded(cgrp)) if (cgroup_is_threaded(cgrp))
...@@ -618,7 +617,7 @@ EXPORT_SYMBOL_GPL(cgroup_get_e_css); ...@@ -618,7 +617,7 @@ EXPORT_SYMBOL_GPL(cgroup_get_e_css);
static void cgroup_get_live(struct cgroup *cgrp) static void cgroup_get_live(struct cgroup *cgrp)
{ {
WARN_ON_ONCE(cgroup_is_dead(cgrp)); WARN_ON_ONCE(cgroup_is_dead(cgrp));
css_get(&cgrp->self); cgroup_get(cgrp);
} }
/** /**
...@@ -689,21 +688,6 @@ EXPORT_SYMBOL_GPL(of_css); ...@@ -689,21 +688,6 @@ EXPORT_SYMBOL_GPL(of_css);
lockdep_is_held(&cgroup_mutex)))) { } \ lockdep_is_held(&cgroup_mutex)))) { } \
else else
/**
* for_each_e_css - iterate all effective css's of a cgroup
* @css: the iteration cursor
* @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end
* @cgrp: the target cgroup to iterate css's of
*
* Should be called under cgroup_[tree_]mutex.
*/
#define for_each_e_css(css, ssid, cgrp) \
for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
if (!((css) = cgroup_e_css_by_mask(cgrp, \
cgroup_subsys[(ssid)]))) \
; \
else
/** /**
* do_each_subsys_mask - filter for_each_subsys with a bitmask * do_each_subsys_mask - filter for_each_subsys with a bitmask
* @ss: the iteration cursor * @ss: the iteration cursor
...@@ -2392,45 +2376,6 @@ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, ...@@ -2392,45 +2376,6 @@ int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
} }
EXPORT_SYMBOL_GPL(cgroup_path_ns); EXPORT_SYMBOL_GPL(cgroup_path_ns);
/**
* task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
* @task: target task
* @buf: the buffer to write the path into
* @buflen: the length of the buffer
*
* Determine @task's cgroup on the first (the one with the lowest non-zero
* hierarchy_id) cgroup hierarchy and copy its path into @buf. This
* function grabs cgroup_mutex and shouldn't be used inside locks used by
* cgroup controller callbacks.
*
* Return value is the same as kernfs_path().
*/
int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
{
struct cgroup_root *root;
struct cgroup *cgrp;
int hierarchy_id = 1;
int ret;
cgroup_lock();
spin_lock_irq(&css_set_lock);
root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
if (root) {
cgrp = task_cgroup_from_root(task, root);
ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
} else {
/* if no hierarchy exists, everyone is in "/" */
ret = strscpy(buf, "/", buflen);
}
spin_unlock_irq(&css_set_lock);
cgroup_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(task_cgroup_path);
/** /**
* cgroup_attach_lock - Lock for ->attach() * cgroup_attach_lock - Lock for ->attach()
* @lock_threadgroup: whether to down_write cgroup_threadgroup_rwsem * @lock_threadgroup: whether to down_write cgroup_threadgroup_rwsem
...@@ -2885,9 +2830,9 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup, ...@@ -2885,9 +2830,9 @@ int cgroup_migrate(struct task_struct *leader, bool threadgroup,
struct task_struct *task; struct task_struct *task;
/* /*
* Prevent freeing of tasks while we take a snapshot. Tasks that are * The following thread iteration should be inside an RCU critical
* already PF_EXITING could be freed from underneath us unless we * section to prevent tasks from being freed while taking the snapshot.
* take an rcu_read_lock. * spin_lock_irq() implies RCU critical section here.
*/ */
spin_lock_irq(&css_set_lock); spin_lock_irq(&css_set_lock);
task = leader; task = leader;
...@@ -6708,6 +6653,9 @@ void cgroup_exit(struct task_struct *tsk) ...@@ -6708,6 +6653,9 @@ void cgroup_exit(struct task_struct *tsk)
list_add_tail(&tsk->cg_list, &cset->dying_tasks); list_add_tail(&tsk->cg_list, &cset->dying_tasks);
cset->nr_tasks--; cset->nr_tasks--;
if (dl_task(tsk))
dec_dl_tasks_cs(tsk);
WARN_ON_ONCE(cgroup_task_frozen(tsk)); WARN_ON_ONCE(cgroup_task_frozen(tsk));
if (unlikely(!(tsk->flags & PF_KTHREAD) && if (unlikely(!(tsk->flags & PF_KTHREAD) &&
test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags))) test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags)))
......
...@@ -25,45 +25,22 @@ ...@@ -25,45 +25,22 @@
#include <linux/cpu.h> #include <linux/cpu.h>
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <linux/cpuset.h> #include <linux/cpuset.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/kernel.h> #include <linux/kernel.h>
#include <linux/kmod.h>
#include <linux/kthread.h>
#include <linux/list.h>
#include <linux/mempolicy.h> #include <linux/mempolicy.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/memory.h> #include <linux/memory.h>
#include <linux/export.h> #include <linux/export.h>
#include <linux/mount.h>
#include <linux/fs_context.h>
#include <linux/namei.h>
#include <linux/pagemap.h>
#include <linux/proc_fs.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/sched/deadline.h> #include <linux/sched/deadline.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/sched/task.h> #include <linux/sched/task.h>
#include <linux/seq_file.h>
#include <linux/security.h> #include <linux/security.h>
#include <linux/slab.h>
#include <linux/spinlock.h> #include <linux/spinlock.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/time.h>
#include <linux/time64.h>
#include <linux/backing-dev.h>
#include <linux/sort.h>
#include <linux/oom.h> #include <linux/oom.h>
#include <linux/sched/isolation.h> #include <linux/sched/isolation.h>
#include <linux/uaccess.h>
#include <linux/atomic.h>
#include <linux/mutex.h>
#include <linux/cgroup.h> #include <linux/cgroup.h>
#include <linux/wait.h> #include <linux/wait.h>
...@@ -193,6 +170,14 @@ struct cpuset { ...@@ -193,6 +170,14 @@ struct cpuset {
int use_parent_ecpus; int use_parent_ecpus;
int child_ecpus_count; int child_ecpus_count;
/*
* number of SCHED_DEADLINE tasks attached to this cpuset, so that we
* know when to rebuild associated root domain bandwidth information.
*/
int nr_deadline_tasks;
int nr_migrate_dl_tasks;
u64 sum_migrate_dl_bw;
/* Invalid partition error code, not lock protected */ /* Invalid partition error code, not lock protected */
enum prs_errcode prs_err; enum prs_errcode prs_err;
...@@ -245,6 +230,20 @@ static inline struct cpuset *parent_cs(struct cpuset *cs) ...@@ -245,6 +230,20 @@ static inline struct cpuset *parent_cs(struct cpuset *cs)
return css_cs(cs->css.parent); return css_cs(cs->css.parent);
} }
void inc_dl_tasks_cs(struct task_struct *p)
{
struct cpuset *cs = task_cs(p);
cs->nr_deadline_tasks++;
}
void dec_dl_tasks_cs(struct task_struct *p)
{
struct cpuset *cs = task_cs(p);
cs->nr_deadline_tasks--;
}
/* bits in struct cpuset flags field */ /* bits in struct cpuset flags field */
typedef enum { typedef enum {
CS_ONLINE, CS_ONLINE,
...@@ -366,22 +365,23 @@ static struct cpuset top_cpuset = { ...@@ -366,22 +365,23 @@ static struct cpuset top_cpuset = {
if (is_cpuset_online(((des_cs) = css_cs((pos_css))))) if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
/* /*
* There are two global locks guarding cpuset structures - cpuset_rwsem and * There are two global locks guarding cpuset structures - cpuset_mutex and
* callback_lock. We also require taking task_lock() when dereferencing a * callback_lock. We also require taking task_lock() when dereferencing a
* task's cpuset pointer. See "The task_lock() exception", at the end of this * task's cpuset pointer. See "The task_lock() exception", at the end of this
* comment. The cpuset code uses only cpuset_rwsem write lock. Other * comment. The cpuset code uses only cpuset_mutex. Other kernel subsystems
* kernel subsystems can use cpuset_read_lock()/cpuset_read_unlock() to * can use cpuset_lock()/cpuset_unlock() to prevent change to cpuset
* prevent change to cpuset structures. * structures. Note that cpuset_mutex needs to be a mutex as it is used in
* paths that rely on priority inheritance (e.g. scheduler - on RT) for
* correctness.
* *
* A task must hold both locks to modify cpusets. If a task holds * A task must hold both locks to modify cpusets. If a task holds
* cpuset_rwsem, it blocks others wanting that rwsem, ensuring that it * cpuset_mutex, it blocks others, ensuring that it is the only task able to
* is the only task able to also acquire callback_lock and be able to * also acquire callback_lock and be able to modify cpusets. It can perform
* modify cpusets. It can perform various checks on the cpuset structure * various checks on the cpuset structure first, knowing nothing will change.
* first, knowing nothing will change. It can also allocate memory while * It can also allocate memory while just holding cpuset_mutex. While it is
* just holding cpuset_rwsem. While it is performing these checks, various * performing these checks, various callback routines can briefly acquire
* callback routines can briefly acquire callback_lock to query cpusets. * callback_lock to query cpusets. Once it is ready to make the changes, it
* Once it is ready to make the changes, it takes callback_lock, blocking * takes callback_lock, blocking everyone else.
* everyone else.
* *
* Calls to the kernel memory allocator can not be made while holding * Calls to the kernel memory allocator can not be made while holding
* callback_lock, as that would risk double tripping on callback_lock * callback_lock, as that would risk double tripping on callback_lock
...@@ -403,16 +403,16 @@ static struct cpuset top_cpuset = { ...@@ -403,16 +403,16 @@ static struct cpuset top_cpuset = {
* guidelines for accessing subsystem state in kernel/cgroup.c * guidelines for accessing subsystem state in kernel/cgroup.c
*/ */
DEFINE_STATIC_PERCPU_RWSEM(cpuset_rwsem); static DEFINE_MUTEX(cpuset_mutex);
void cpuset_read_lock(void) void cpuset_lock(void)
{ {
percpu_down_read(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
} }
void cpuset_read_unlock(void) void cpuset_unlock(void)
{ {
percpu_up_read(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
} }
static DEFINE_SPINLOCK(callback_lock); static DEFINE_SPINLOCK(callback_lock);
...@@ -496,7 +496,7 @@ static inline bool partition_is_populated(struct cpuset *cs, ...@@ -496,7 +496,7 @@ static inline bool partition_is_populated(struct cpuset *cs,
* One way or another, we guarantee to return some non-empty subset * One way or another, we guarantee to return some non-empty subset
* of cpu_online_mask. * of cpu_online_mask.
* *
* Call with callback_lock or cpuset_rwsem held. * Call with callback_lock or cpuset_mutex held.
*/ */
static void guarantee_online_cpus(struct task_struct *tsk, static void guarantee_online_cpus(struct task_struct *tsk,
struct cpumask *pmask) struct cpumask *pmask)
...@@ -538,7 +538,7 @@ static void guarantee_online_cpus(struct task_struct *tsk, ...@@ -538,7 +538,7 @@ static void guarantee_online_cpus(struct task_struct *tsk,
* One way or another, we guarantee to return some non-empty subset * One way or another, we guarantee to return some non-empty subset
* of node_states[N_MEMORY]. * of node_states[N_MEMORY].
* *
* Call with callback_lock or cpuset_rwsem held. * Call with callback_lock or cpuset_mutex held.
*/ */
static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
{ {
...@@ -550,7 +550,7 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) ...@@ -550,7 +550,7 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
/* /*
* update task's spread flag if cpuset's page/slab spread flag is set * update task's spread flag if cpuset's page/slab spread flag is set
* *
* Call with callback_lock or cpuset_rwsem held. The check can be skipped * Call with callback_lock or cpuset_mutex held. The check can be skipped
* if on default hierarchy. * if on default hierarchy.
*/ */
static void cpuset_update_task_spread_flags(struct cpuset *cs, static void cpuset_update_task_spread_flags(struct cpuset *cs,
...@@ -575,7 +575,7 @@ static void cpuset_update_task_spread_flags(struct cpuset *cs, ...@@ -575,7 +575,7 @@ static void cpuset_update_task_spread_flags(struct cpuset *cs,
* *
* One cpuset is a subset of another if all its allowed CPUs and * One cpuset is a subset of another if all its allowed CPUs and
* Memory Nodes are a subset of the other, and its exclusive flags * Memory Nodes are a subset of the other, and its exclusive flags
* are only set if the other's are set. Call holding cpuset_rwsem. * are only set if the other's are set. Call holding cpuset_mutex.
*/ */
static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q) static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
...@@ -713,7 +713,7 @@ static int validate_change_legacy(struct cpuset *cur, struct cpuset *trial) ...@@ -713,7 +713,7 @@ static int validate_change_legacy(struct cpuset *cur, struct cpuset *trial)
* If we replaced the flag and mask values of the current cpuset * If we replaced the flag and mask values of the current cpuset
* (cur) with those values in the trial cpuset (trial), would * (cur) with those values in the trial cpuset (trial), would
* our various subset and exclusive rules still be valid? Presumes * our various subset and exclusive rules still be valid? Presumes
* cpuset_rwsem held. * cpuset_mutex held.
* *
* 'cur' is the address of an actual, in-use cpuset. Operations * 'cur' is the address of an actual, in-use cpuset. Operations
* such as list traversal that depend on the actual address of the * such as list traversal that depend on the actual address of the
...@@ -829,7 +829,7 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr, ...@@ -829,7 +829,7 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
rcu_read_unlock(); rcu_read_unlock();
} }
/* Must be called with cpuset_rwsem held. */ /* Must be called with cpuset_mutex held. */
static inline int nr_cpusets(void) static inline int nr_cpusets(void)
{ {
/* jump label reference count + the top-level cpuset */ /* jump label reference count + the top-level cpuset */
...@@ -855,7 +855,7 @@ static inline int nr_cpusets(void) ...@@ -855,7 +855,7 @@ static inline int nr_cpusets(void)
* domains when operating in the severe memory shortage situations * domains when operating in the severe memory shortage situations
* that could cause allocation failures below. * that could cause allocation failures below.
* *
* Must be called with cpuset_rwsem held. * Must be called with cpuset_mutex held.
* *
* The three key local variables below are: * The three key local variables below are:
* cp - cpuset pointer, used (together with pos_css) to perform a * cp - cpuset pointer, used (together with pos_css) to perform a
...@@ -1066,11 +1066,14 @@ static int generate_sched_domains(cpumask_var_t **domains, ...@@ -1066,11 +1066,14 @@ static int generate_sched_domains(cpumask_var_t **domains,
return ndoms; return ndoms;
} }
static void update_tasks_root_domain(struct cpuset *cs) static void dl_update_tasks_root_domain(struct cpuset *cs)
{ {
struct css_task_iter it; struct css_task_iter it;
struct task_struct *task; struct task_struct *task;
if (cs->nr_deadline_tasks == 0)
return;
css_task_iter_start(&cs->css, 0, &it); css_task_iter_start(&cs->css, 0, &it);
while ((task = css_task_iter_next(&it))) while ((task = css_task_iter_next(&it)))
...@@ -1079,12 +1082,12 @@ static void update_tasks_root_domain(struct cpuset *cs) ...@@ -1079,12 +1082,12 @@ static void update_tasks_root_domain(struct cpuset *cs)
css_task_iter_end(&it); css_task_iter_end(&it);
} }
static void rebuild_root_domains(void) static void dl_rebuild_rd_accounting(void)
{ {
struct cpuset *cs = NULL; struct cpuset *cs = NULL;
struct cgroup_subsys_state *pos_css; struct cgroup_subsys_state *pos_css;
percpu_rwsem_assert_held(&cpuset_rwsem); lockdep_assert_held(&cpuset_mutex);
lockdep_assert_cpus_held(); lockdep_assert_cpus_held();
lockdep_assert_held(&sched_domains_mutex); lockdep_assert_held(&sched_domains_mutex);
...@@ -1107,7 +1110,7 @@ static void rebuild_root_domains(void) ...@@ -1107,7 +1110,7 @@ static void rebuild_root_domains(void)
rcu_read_unlock(); rcu_read_unlock();
update_tasks_root_domain(cs); dl_update_tasks_root_domain(cs);
rcu_read_lock(); rcu_read_lock();
css_put(&cs->css); css_put(&cs->css);
...@@ -1121,7 +1124,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[], ...@@ -1121,7 +1124,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
{ {
mutex_lock(&sched_domains_mutex); mutex_lock(&sched_domains_mutex);
partition_sched_domains_locked(ndoms_new, doms_new, dattr_new); partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
rebuild_root_domains(); dl_rebuild_rd_accounting();
mutex_unlock(&sched_domains_mutex); mutex_unlock(&sched_domains_mutex);
} }
...@@ -1134,7 +1137,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[], ...@@ -1134,7 +1137,7 @@ partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
* 'cpus' is removed, then call this routine to rebuild the * 'cpus' is removed, then call this routine to rebuild the
* scheduler's dynamic sched domains. * scheduler's dynamic sched domains.
* *
* Call with cpuset_rwsem held. Takes cpus_read_lock(). * Call with cpuset_mutex held. Takes cpus_read_lock().
*/ */
static void rebuild_sched_domains_locked(void) static void rebuild_sched_domains_locked(void)
{ {
...@@ -1145,7 +1148,7 @@ static void rebuild_sched_domains_locked(void) ...@@ -1145,7 +1148,7 @@ static void rebuild_sched_domains_locked(void)
int ndoms; int ndoms;
lockdep_assert_cpus_held(); lockdep_assert_cpus_held();
percpu_rwsem_assert_held(&cpuset_rwsem); lockdep_assert_held(&cpuset_mutex);
/* /*
* If we have raced with CPU hotplug, return early to avoid * If we have raced with CPU hotplug, return early to avoid
...@@ -1196,9 +1199,9 @@ static void rebuild_sched_domains_locked(void) ...@@ -1196,9 +1199,9 @@ static void rebuild_sched_domains_locked(void)
void rebuild_sched_domains(void) void rebuild_sched_domains(void)
{ {
cpus_read_lock(); cpus_read_lock();
percpu_down_write(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
rebuild_sched_domains_locked(); rebuild_sched_domains_locked();
percpu_up_write(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
cpus_read_unlock(); cpus_read_unlock();
} }
...@@ -1208,7 +1211,7 @@ void rebuild_sched_domains(void) ...@@ -1208,7 +1211,7 @@ void rebuild_sched_domains(void)
* @new_cpus: the temp variable for the new effective_cpus mask * @new_cpus: the temp variable for the new effective_cpus mask
* *
* Iterate through each task of @cs updating its cpus_allowed to the * Iterate through each task of @cs updating its cpus_allowed to the
* effective cpuset's. As this function is called with cpuset_rwsem held, * effective cpuset's. As this function is called with cpuset_mutex held,
* cpuset membership stays stable. For top_cpuset, task_cpu_possible_mask() * cpuset membership stays stable. For top_cpuset, task_cpu_possible_mask()
* is used instead of effective_cpus to make sure all offline CPUs are also * is used instead of effective_cpus to make sure all offline CPUs are also
* included as hotplug code won't update cpumasks for tasks in top_cpuset. * included as hotplug code won't update cpumasks for tasks in top_cpuset.
...@@ -1322,7 +1325,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, ...@@ -1322,7 +1325,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
int old_prs, new_prs; int old_prs, new_prs;
int part_error = PERR_NONE; /* Partition error? */ int part_error = PERR_NONE; /* Partition error? */
percpu_rwsem_assert_held(&cpuset_rwsem); lockdep_assert_held(&cpuset_mutex);
/* /*
* The parent must be a partition root. * The parent must be a partition root.
...@@ -1545,7 +1548,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd, ...@@ -1545,7 +1548,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cs, int cmd,
* *
* On legacy hierarchy, effective_cpus will be the same with cpu_allowed. * On legacy hierarchy, effective_cpus will be the same with cpu_allowed.
* *
* Called with cpuset_rwsem held * Called with cpuset_mutex held
*/ */
static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp, static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp,
bool force) bool force)
...@@ -1705,7 +1708,7 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, ...@@ -1705,7 +1708,7 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
struct cpuset *sibling; struct cpuset *sibling;
struct cgroup_subsys_state *pos_css; struct cgroup_subsys_state *pos_css;
percpu_rwsem_assert_held(&cpuset_rwsem); lockdep_assert_held(&cpuset_mutex);
/* /*
* Check all its siblings and call update_cpumasks_hier() * Check all its siblings and call update_cpumasks_hier()
...@@ -1955,12 +1958,12 @@ static void *cpuset_being_rebound; ...@@ -1955,12 +1958,12 @@ static void *cpuset_being_rebound;
* @cs: the cpuset in which each task's mems_allowed mask needs to be changed * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
* *
* Iterate through each task of @cs updating its mems_allowed to the * Iterate through each task of @cs updating its mems_allowed to the
* effective cpuset's. As this function is called with cpuset_rwsem held, * effective cpuset's. As this function is called with cpuset_mutex held,
* cpuset membership stays stable. * cpuset membership stays stable.
*/ */
static void update_tasks_nodemask(struct cpuset *cs) static void update_tasks_nodemask(struct cpuset *cs)
{ {
static nodemask_t newmems; /* protected by cpuset_rwsem */ static nodemask_t newmems; /* protected by cpuset_mutex */
struct css_task_iter it; struct css_task_iter it;
struct task_struct *task; struct task_struct *task;
...@@ -1973,7 +1976,7 @@ static void update_tasks_nodemask(struct cpuset *cs) ...@@ -1973,7 +1976,7 @@ static void update_tasks_nodemask(struct cpuset *cs)
* take while holding tasklist_lock. Forks can happen - the * take while holding tasklist_lock. Forks can happen - the
* mpol_dup() cpuset_being_rebound check will catch such forks, * mpol_dup() cpuset_being_rebound check will catch such forks,
* and rebind their vma mempolicies too. Because we still hold * and rebind their vma mempolicies too. Because we still hold
* the global cpuset_rwsem, we know that no other rebind effort * the global cpuset_mutex, we know that no other rebind effort
* will be contending for the global variable cpuset_being_rebound. * will be contending for the global variable cpuset_being_rebound.
* It's ok if we rebind the same mm twice; mpol_rebind_mm() * It's ok if we rebind the same mm twice; mpol_rebind_mm()
* is idempotent. Also migrate pages in each mm to new nodes. * is idempotent. Also migrate pages in each mm to new nodes.
...@@ -2019,7 +2022,7 @@ static void update_tasks_nodemask(struct cpuset *cs) ...@@ -2019,7 +2022,7 @@ static void update_tasks_nodemask(struct cpuset *cs)
* *
* On legacy hierarchy, effective_mems will be the same with mems_allowed. * On legacy hierarchy, effective_mems will be the same with mems_allowed.
* *
* Called with cpuset_rwsem held * Called with cpuset_mutex held
*/ */
static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
{ {
...@@ -2072,7 +2075,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) ...@@ -2072,7 +2075,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
* mempolicies and if the cpuset is marked 'memory_migrate', * mempolicies and if the cpuset is marked 'memory_migrate',
* migrate the tasks pages to the new memory. * migrate the tasks pages to the new memory.
* *
* Call with cpuset_rwsem held. May take callback_lock during call. * Call with cpuset_mutex held. May take callback_lock during call.
* Will take tasklist_lock, scan tasklist for tasks in cpuset cs, * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
* lock each such tasks mm->mmap_lock, scan its vma's and rebind * lock each such tasks mm->mmap_lock, scan its vma's and rebind
* their mempolicies to the cpusets new mems_allowed. * their mempolicies to the cpusets new mems_allowed.
...@@ -2164,7 +2167,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val) ...@@ -2164,7 +2167,7 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
* @cs: the cpuset in which each task's spread flags needs to be changed * @cs: the cpuset in which each task's spread flags needs to be changed
* *
* Iterate through each task of @cs updating its spread flags. As this * Iterate through each task of @cs updating its spread flags. As this
* function is called with cpuset_rwsem held, cpuset membership stays * function is called with cpuset_mutex held, cpuset membership stays
* stable. * stable.
*/ */
static void update_tasks_flags(struct cpuset *cs) static void update_tasks_flags(struct cpuset *cs)
...@@ -2184,7 +2187,7 @@ static void update_tasks_flags(struct cpuset *cs) ...@@ -2184,7 +2187,7 @@ static void update_tasks_flags(struct cpuset *cs)
* cs: the cpuset to update * cs: the cpuset to update
* turning_on: whether the flag is being set or cleared * turning_on: whether the flag is being set or cleared
* *
* Call with cpuset_rwsem held. * Call with cpuset_mutex held.
*/ */
static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
...@@ -2234,7 +2237,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, ...@@ -2234,7 +2237,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
* @new_prs: new partition root state * @new_prs: new partition root state
* Return: 0 if successful, != 0 if error * Return: 0 if successful, != 0 if error
* *
* Call with cpuset_rwsem held. * Call with cpuset_mutex held.
*/ */
static int update_prstate(struct cpuset *cs, int new_prs) static int update_prstate(struct cpuset *cs, int new_prs)
{ {
...@@ -2472,19 +2475,26 @@ static int cpuset_can_attach_check(struct cpuset *cs) ...@@ -2472,19 +2475,26 @@ static int cpuset_can_attach_check(struct cpuset *cs)
return 0; return 0;
} }
/* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */ static void reset_migrate_dl_data(struct cpuset *cs)
{
cs->nr_migrate_dl_tasks = 0;
cs->sum_migrate_dl_bw = 0;
}
/* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
static int cpuset_can_attach(struct cgroup_taskset *tset) static int cpuset_can_attach(struct cgroup_taskset *tset)
{ {
struct cgroup_subsys_state *css; struct cgroup_subsys_state *css;
struct cpuset *cs; struct cpuset *cs, *oldcs;
struct task_struct *task; struct task_struct *task;
int ret; int ret;
/* used later by cpuset_attach() */ /* used later by cpuset_attach() */
cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css)); cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
oldcs = cpuset_attach_old_cs;
cs = css_cs(css); cs = css_cs(css);
percpu_down_write(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
/* Check to see if task is allowed in the cpuset */ /* Check to see if task is allowed in the cpuset */
ret = cpuset_can_attach_check(cs); ret = cpuset_can_attach_check(cs);
...@@ -2492,21 +2502,46 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) ...@@ -2492,21 +2502,46 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
goto out_unlock; goto out_unlock;
cgroup_taskset_for_each(task, css, tset) { cgroup_taskset_for_each(task, css, tset) {
ret = task_can_attach(task, cs->effective_cpus); ret = task_can_attach(task);
if (ret) if (ret)
goto out_unlock; goto out_unlock;
ret = security_task_setscheduler(task); ret = security_task_setscheduler(task);
if (ret) if (ret)
goto out_unlock; goto out_unlock;
if (dl_task(task)) {
cs->nr_migrate_dl_tasks++;
cs->sum_migrate_dl_bw += task->dl.dl_bw;
}
} }
if (!cs->nr_migrate_dl_tasks)
goto out_success;
if (!cpumask_intersects(oldcs->effective_cpus, cs->effective_cpus)) {
int cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus);
if (unlikely(cpu >= nr_cpu_ids)) {
reset_migrate_dl_data(cs);
ret = -EINVAL;
goto out_unlock;
}
ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw);
if (ret) {
reset_migrate_dl_data(cs);
goto out_unlock;
}
}
out_success:
/* /*
* Mark attach is in progress. This makes validate_change() fail * Mark attach is in progress. This makes validate_change() fail
* changes which zero cpus/mems_allowed. * changes which zero cpus/mems_allowed.
*/ */
cs->attach_in_progress++; cs->attach_in_progress++;
out_unlock: out_unlock:
percpu_up_write(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
return ret; return ret;
} }
...@@ -2518,15 +2553,23 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset) ...@@ -2518,15 +2553,23 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
cgroup_taskset_first(tset, &css); cgroup_taskset_first(tset, &css);
cs = css_cs(css); cs = css_cs(css);
percpu_down_write(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
cs->attach_in_progress--; cs->attach_in_progress--;
if (!cs->attach_in_progress) if (!cs->attach_in_progress)
wake_up(&cpuset_attach_wq); wake_up(&cpuset_attach_wq);
percpu_up_write(&cpuset_rwsem);
if (cs->nr_migrate_dl_tasks) {
int cpu = cpumask_any(cs->effective_cpus);
dl_bw_free(cpu, cs->sum_migrate_dl_bw);
reset_migrate_dl_data(cs);
}
mutex_unlock(&cpuset_mutex);
} }
/* /*
* Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task() * Protected by cpuset_mutex. cpus_attach is used only by cpuset_attach_task()
* but we can't allocate it dynamically there. Define it global and * but we can't allocate it dynamically there. Define it global and
* allocate from cpuset_init(). * allocate from cpuset_init().
*/ */
...@@ -2535,7 +2578,7 @@ static nodemask_t cpuset_attach_nodemask_to; ...@@ -2535,7 +2578,7 @@ static nodemask_t cpuset_attach_nodemask_to;
static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task) static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
{ {
percpu_rwsem_assert_held(&cpuset_rwsem); lockdep_assert_held(&cpuset_mutex);
if (cs != &top_cpuset) if (cs != &top_cpuset)
guarantee_online_cpus(task, cpus_attach); guarantee_online_cpus(task, cpus_attach);
...@@ -2565,7 +2608,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) ...@@ -2565,7 +2608,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
cs = css_cs(css); cs = css_cs(css);
lockdep_assert_cpus_held(); /* see cgroup_attach_lock() */ lockdep_assert_cpus_held(); /* see cgroup_attach_lock() */
percpu_down_write(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
cpus_updated = !cpumask_equal(cs->effective_cpus, cpus_updated = !cpumask_equal(cs->effective_cpus,
oldcs->effective_cpus); oldcs->effective_cpus);
mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems); mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems);
...@@ -2622,11 +2665,17 @@ static void cpuset_attach(struct cgroup_taskset *tset) ...@@ -2622,11 +2665,17 @@ static void cpuset_attach(struct cgroup_taskset *tset)
out: out:
cs->old_mems_allowed = cpuset_attach_nodemask_to; cs->old_mems_allowed = cpuset_attach_nodemask_to;
if (cs->nr_migrate_dl_tasks) {
cs->nr_deadline_tasks += cs->nr_migrate_dl_tasks;
oldcs->nr_deadline_tasks -= cs->nr_migrate_dl_tasks;
reset_migrate_dl_data(cs);
}
cs->attach_in_progress--; cs->attach_in_progress--;
if (!cs->attach_in_progress) if (!cs->attach_in_progress)
wake_up(&cpuset_attach_wq); wake_up(&cpuset_attach_wq);
percpu_up_write(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
} }
/* The various types of files and directories in a cpuset file system */ /* The various types of files and directories in a cpuset file system */
...@@ -2658,7 +2707,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, ...@@ -2658,7 +2707,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
int retval = 0; int retval = 0;
cpus_read_lock(); cpus_read_lock();
percpu_down_write(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
if (!is_cpuset_online(cs)) { if (!is_cpuset_online(cs)) {
retval = -ENODEV; retval = -ENODEV;
goto out_unlock; goto out_unlock;
...@@ -2694,7 +2743,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, ...@@ -2694,7 +2743,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
break; break;
} }
out_unlock: out_unlock:
percpu_up_write(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
cpus_read_unlock(); cpus_read_unlock();
return retval; return retval;
} }
...@@ -2707,7 +2756,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, ...@@ -2707,7 +2756,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
int retval = -ENODEV; int retval = -ENODEV;
cpus_read_lock(); cpus_read_lock();
percpu_down_write(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
if (!is_cpuset_online(cs)) if (!is_cpuset_online(cs))
goto out_unlock; goto out_unlock;
...@@ -2720,7 +2769,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, ...@@ -2720,7 +2769,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
break; break;
} }
out_unlock: out_unlock:
percpu_up_write(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
cpus_read_unlock(); cpus_read_unlock();
return retval; return retval;
} }
...@@ -2753,7 +2802,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, ...@@ -2753,7 +2802,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
* operation like this one can lead to a deadlock through kernfs * operation like this one can lead to a deadlock through kernfs
* active_ref protection. Let's break the protection. Losing the * active_ref protection. Let's break the protection. Losing the
* protection is okay as we check whether @cs is online after * protection is okay as we check whether @cs is online after
* grabbing cpuset_rwsem anyway. This only happens on the legacy * grabbing cpuset_mutex anyway. This only happens on the legacy
* hierarchies. * hierarchies.
*/ */
css_get(&cs->css); css_get(&cs->css);
...@@ -2761,7 +2810,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, ...@@ -2761,7 +2810,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
flush_work(&cpuset_hotplug_work); flush_work(&cpuset_hotplug_work);
cpus_read_lock(); cpus_read_lock();
percpu_down_write(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
if (!is_cpuset_online(cs)) if (!is_cpuset_online(cs))
goto out_unlock; goto out_unlock;
...@@ -2785,7 +2834,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of, ...@@ -2785,7 +2834,7 @@ static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
free_cpuset(trialcs); free_cpuset(trialcs);
out_unlock: out_unlock:
percpu_up_write(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
cpus_read_unlock(); cpus_read_unlock();
kernfs_unbreak_active_protection(of->kn); kernfs_unbreak_active_protection(of->kn);
css_put(&cs->css); css_put(&cs->css);
...@@ -2933,13 +2982,13 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf, ...@@ -2933,13 +2982,13 @@ static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
css_get(&cs->css); css_get(&cs->css);
cpus_read_lock(); cpus_read_lock();
percpu_down_write(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
if (!is_cpuset_online(cs)) if (!is_cpuset_online(cs))
goto out_unlock; goto out_unlock;
retval = update_prstate(cs, val); retval = update_prstate(cs, val);
out_unlock: out_unlock:
percpu_up_write(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
cpus_read_unlock(); cpus_read_unlock();
css_put(&cs->css); css_put(&cs->css);
return retval ?: nbytes; return retval ?: nbytes;
...@@ -3156,7 +3205,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) ...@@ -3156,7 +3205,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
return 0; return 0;
cpus_read_lock(); cpus_read_lock();
percpu_down_write(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
set_bit(CS_ONLINE, &cs->flags); set_bit(CS_ONLINE, &cs->flags);
if (is_spread_page(parent)) if (is_spread_page(parent))
...@@ -3207,7 +3256,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) ...@@ -3207,7 +3256,7 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
cpumask_copy(cs->effective_cpus, parent->cpus_allowed); cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
out_unlock: out_unlock:
percpu_up_write(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
cpus_read_unlock(); cpus_read_unlock();
return 0; return 0;
} }
...@@ -3228,7 +3277,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) ...@@ -3228,7 +3277,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
struct cpuset *cs = css_cs(css); struct cpuset *cs = css_cs(css);
cpus_read_lock(); cpus_read_lock();
percpu_down_write(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
if (is_partition_valid(cs)) if (is_partition_valid(cs))
update_prstate(cs, 0); update_prstate(cs, 0);
...@@ -3247,7 +3296,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css) ...@@ -3247,7 +3296,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state *css)
cpuset_dec(); cpuset_dec();
clear_bit(CS_ONLINE, &cs->flags); clear_bit(CS_ONLINE, &cs->flags);
percpu_up_write(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
cpus_read_unlock(); cpus_read_unlock();
} }
...@@ -3260,7 +3309,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) ...@@ -3260,7 +3309,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
static void cpuset_bind(struct cgroup_subsys_state *root_css) static void cpuset_bind(struct cgroup_subsys_state *root_css)
{ {
percpu_down_write(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
spin_lock_irq(&callback_lock); spin_lock_irq(&callback_lock);
if (is_in_v2_mode()) { if (is_in_v2_mode()) {
...@@ -3273,7 +3322,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) ...@@ -3273,7 +3322,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
} }
spin_unlock_irq(&callback_lock); spin_unlock_irq(&callback_lock);
percpu_up_write(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
} }
/* /*
...@@ -3294,14 +3343,14 @@ static int cpuset_can_fork(struct task_struct *task, struct css_set *cset) ...@@ -3294,14 +3343,14 @@ static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
return 0; return 0;
lockdep_assert_held(&cgroup_mutex); lockdep_assert_held(&cgroup_mutex);
percpu_down_write(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
/* Check to see if task is allowed in the cpuset */ /* Check to see if task is allowed in the cpuset */
ret = cpuset_can_attach_check(cs); ret = cpuset_can_attach_check(cs);
if (ret) if (ret)
goto out_unlock; goto out_unlock;
ret = task_can_attach(task, cs->effective_cpus); ret = task_can_attach(task);
if (ret) if (ret)
goto out_unlock; goto out_unlock;
...@@ -3315,7 +3364,7 @@ static int cpuset_can_fork(struct task_struct *task, struct css_set *cset) ...@@ -3315,7 +3364,7 @@ static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
*/ */
cs->attach_in_progress++; cs->attach_in_progress++;
out_unlock: out_unlock:
percpu_up_write(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
return ret; return ret;
} }
...@@ -3331,11 +3380,11 @@ static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset) ...@@ -3331,11 +3380,11 @@ static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
if (same_cs) if (same_cs)
return; return;
percpu_down_write(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
cs->attach_in_progress--; cs->attach_in_progress--;
if (!cs->attach_in_progress) if (!cs->attach_in_progress)
wake_up(&cpuset_attach_wq); wake_up(&cpuset_attach_wq);
percpu_up_write(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
} }
/* /*
...@@ -3363,7 +3412,7 @@ static void cpuset_fork(struct task_struct *task) ...@@ -3363,7 +3412,7 @@ static void cpuset_fork(struct task_struct *task)
} }
/* CLONE_INTO_CGROUP */ /* CLONE_INTO_CGROUP */
percpu_down_write(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
guarantee_online_mems(cs, &cpuset_attach_nodemask_to); guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
cpuset_attach_task(cs, task); cpuset_attach_task(cs, task);
...@@ -3371,7 +3420,7 @@ static void cpuset_fork(struct task_struct *task) ...@@ -3371,7 +3420,7 @@ static void cpuset_fork(struct task_struct *task)
if (!cs->attach_in_progress) if (!cs->attach_in_progress)
wake_up(&cpuset_attach_wq); wake_up(&cpuset_attach_wq);
percpu_up_write(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
} }
struct cgroup_subsys cpuset_cgrp_subsys = { struct cgroup_subsys cpuset_cgrp_subsys = {
...@@ -3472,7 +3521,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs, ...@@ -3472,7 +3521,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
is_empty = cpumask_empty(cs->cpus_allowed) || is_empty = cpumask_empty(cs->cpus_allowed) ||
nodes_empty(cs->mems_allowed); nodes_empty(cs->mems_allowed);
percpu_up_write(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
/* /*
* Move tasks to the nearest ancestor with execution resources, * Move tasks to the nearest ancestor with execution resources,
...@@ -3482,7 +3531,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs, ...@@ -3482,7 +3531,7 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
if (is_empty) if (is_empty)
remove_tasks_in_empty_cpuset(cs); remove_tasks_in_empty_cpuset(cs);
percpu_down_write(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
} }
static void static void
...@@ -3533,14 +3582,14 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) ...@@ -3533,14 +3582,14 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
retry: retry:
wait_event(cpuset_attach_wq, cs->attach_in_progress == 0); wait_event(cpuset_attach_wq, cs->attach_in_progress == 0);
percpu_down_write(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
/* /*
* We have raced with task attaching. We wait until attaching * We have raced with task attaching. We wait until attaching
* is finished, so we won't attach a task to an empty cpuset. * is finished, so we won't attach a task to an empty cpuset.
*/ */
if (cs->attach_in_progress) { if (cs->attach_in_progress) {
percpu_up_write(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
goto retry; goto retry;
} }
...@@ -3637,7 +3686,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) ...@@ -3637,7 +3686,7 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
cpus_updated, mems_updated); cpus_updated, mems_updated);
unlock: unlock:
percpu_up_write(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
} }
/** /**
...@@ -3667,7 +3716,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) ...@@ -3667,7 +3716,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
if (on_dfl && !alloc_cpumasks(NULL, &tmp)) if (on_dfl && !alloc_cpumasks(NULL, &tmp))
ptmp = &tmp; ptmp = &tmp;
percpu_down_write(&cpuset_rwsem); mutex_lock(&cpuset_mutex);
/* fetch the available cpus/mems and find out which changed how */ /* fetch the available cpus/mems and find out which changed how */
cpumask_copy(&new_cpus, cpu_active_mask); cpumask_copy(&new_cpus, cpu_active_mask);
...@@ -3724,7 +3773,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) ...@@ -3724,7 +3773,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
update_tasks_nodemask(&top_cpuset); update_tasks_nodemask(&top_cpuset);
} }
percpu_up_write(&cpuset_rwsem); mutex_unlock(&cpuset_mutex);
/* if cpus or mems changed, we need to propagate to descendants */ /* if cpus or mems changed, we need to propagate to descendants */
if (cpus_updated || mems_updated) { if (cpus_updated || mems_updated) {
...@@ -4155,7 +4204,7 @@ void __cpuset_memory_pressure_bump(void) ...@@ -4155,7 +4204,7 @@ void __cpuset_memory_pressure_bump(void)
* - Used for /proc/<pid>/cpuset. * - Used for /proc/<pid>/cpuset.
* - No need to task_lock(tsk) on this tsk->cpuset reference, as it * - No need to task_lock(tsk) on this tsk->cpuset reference, as it
* doesn't really matter if tsk->cpuset changes after we read it, * doesn't really matter if tsk->cpuset changes after we read it,
* and we take cpuset_rwsem, keeping cpuset_attach() from changing it * and we take cpuset_mutex, keeping cpuset_attach() from changing it
* anyway. * anyway.
*/ */
int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
......
...@@ -357,7 +357,6 @@ static struct cftype misc_cg_files[] = { ...@@ -357,7 +357,6 @@ static struct cftype misc_cg_files[] = {
{ {
.name = "current", .name = "current",
.seq_show = misc_cg_current_show, .seq_show = misc_cg_current_show,
.flags = CFTYPE_NOT_ON_ROOT,
}, },
{ {
.name = "capacity", .name = "capacity",
......
...@@ -197,6 +197,7 @@ uncharge_cg_locked(struct rdma_cgroup *cg, ...@@ -197,6 +197,7 @@ uncharge_cg_locked(struct rdma_cgroup *cg,
/** /**
* rdmacg_uncharge_hierarchy - hierarchically uncharge rdma resource count * rdmacg_uncharge_hierarchy - hierarchically uncharge rdma resource count
* @cg: pointer to cg to uncharge and all parents in hierarchy
* @device: pointer to rdmacg device * @device: pointer to rdmacg device
* @stop_cg: while traversing hirerchy, when meet with stop_cg cgroup * @stop_cg: while traversing hirerchy, when meet with stop_cg cgroup
* stop uncharging * stop uncharging
...@@ -221,6 +222,7 @@ static void rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg, ...@@ -221,6 +222,7 @@ static void rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg,
/** /**
* rdmacg_uncharge - hierarchically uncharge rdma resource count * rdmacg_uncharge - hierarchically uncharge rdma resource count
* @cg: pointer to cg to uncharge and all parents in hierarchy
* @device: pointer to rdmacg device * @device: pointer to rdmacg device
* @index: index of the resource to uncharge in cgroup in given resource pool * @index: index of the resource to uncharge in cgroup in given resource pool
*/ */
......
...@@ -7631,6 +7631,7 @@ static int __sched_setscheduler(struct task_struct *p, ...@@ -7631,6 +7631,7 @@ static int __sched_setscheduler(struct task_struct *p,
int reset_on_fork; int reset_on_fork;
int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
struct rq *rq; struct rq *rq;
bool cpuset_locked = false;
/* The pi code expects interrupts enabled */ /* The pi code expects interrupts enabled */
BUG_ON(pi && in_interrupt()); BUG_ON(pi && in_interrupt());
...@@ -7680,8 +7681,14 @@ static int __sched_setscheduler(struct task_struct *p, ...@@ -7680,8 +7681,14 @@ static int __sched_setscheduler(struct task_struct *p,
return retval; return retval;
} }
if (pi) /*
cpuset_read_lock(); * SCHED_DEADLINE bandwidth accounting relies on stable cpusets
* information.
*/
if (dl_policy(policy) || dl_policy(p->policy)) {
cpuset_locked = true;
cpuset_lock();
}
/* /*
* Make sure no PI-waiters arrive (or leave) while we are * Make sure no PI-waiters arrive (or leave) while we are
...@@ -7757,8 +7764,8 @@ static int __sched_setscheduler(struct task_struct *p, ...@@ -7757,8 +7764,8 @@ static int __sched_setscheduler(struct task_struct *p,
if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
policy = oldpolicy = -1; policy = oldpolicy = -1;
task_rq_unlock(rq, p, &rf); task_rq_unlock(rq, p, &rf);
if (pi) if (cpuset_locked)
cpuset_read_unlock(); cpuset_unlock();
goto recheck; goto recheck;
} }
...@@ -7825,7 +7832,8 @@ static int __sched_setscheduler(struct task_struct *p, ...@@ -7825,7 +7832,8 @@ static int __sched_setscheduler(struct task_struct *p,
task_rq_unlock(rq, p, &rf); task_rq_unlock(rq, p, &rf);
if (pi) { if (pi) {
cpuset_read_unlock(); if (cpuset_locked)
cpuset_unlock();
rt_mutex_adjust_pi(p); rt_mutex_adjust_pi(p);
} }
...@@ -7837,8 +7845,8 @@ static int __sched_setscheduler(struct task_struct *p, ...@@ -7837,8 +7845,8 @@ static int __sched_setscheduler(struct task_struct *p,
unlock: unlock:
task_rq_unlock(rq, p, &rf); task_rq_unlock(rq, p, &rf);
if (pi) if (cpuset_locked)
cpuset_read_unlock(); cpuset_unlock();
return retval; return retval;
} }
...@@ -9327,8 +9335,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur, ...@@ -9327,8 +9335,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur,
return ret; return ret;
} }
int task_can_attach(struct task_struct *p, int task_can_attach(struct task_struct *p)
const struct cpumask *cs_effective_cpus)
{ {
int ret = 0; int ret = 0;
...@@ -9341,21 +9348,9 @@ int task_can_attach(struct task_struct *p, ...@@ -9341,21 +9348,9 @@ int task_can_attach(struct task_struct *p,
* success of set_cpus_allowed_ptr() on all attached tasks * success of set_cpus_allowed_ptr() on all attached tasks
* before cpus_mask may be changed. * before cpus_mask may be changed.
*/ */
if (p->flags & PF_NO_SETAFFINITY) { if (p->flags & PF_NO_SETAFFINITY)
ret = -EINVAL; ret = -EINVAL;
goto out;
}
if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
cs_effective_cpus)) {
int cpu = cpumask_any_and(cpu_active_mask, cs_effective_cpus);
if (unlikely(cpu >= nr_cpu_ids))
return -EINVAL;
ret = dl_cpu_busy(cpu, p);
}
out:
return ret; return ret;
} }
...@@ -9638,7 +9633,7 @@ static void cpuset_cpu_active(void) ...@@ -9638,7 +9633,7 @@ static void cpuset_cpu_active(void)
static int cpuset_cpu_inactive(unsigned int cpu) static int cpuset_cpu_inactive(unsigned int cpu)
{ {
if (!cpuhp_tasks_frozen) { if (!cpuhp_tasks_frozen) {
int ret = dl_cpu_busy(cpu, NULL); int ret = dl_bw_check_overflow(cpu);
if (ret) if (ret)
return ret; return ret;
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
* Fabio Checconi <fchecconi@gmail.com> * Fabio Checconi <fchecconi@gmail.com>
*/ */
#include <linux/cpuset.h>
/* /*
* Default limits for DL period; on the top end we guard against small util * Default limits for DL period; on the top end we guard against small util
* tasks still getting ridiculously long effective runtimes, on the bottom end we * tasks still getting ridiculously long effective runtimes, on the bottom end we
...@@ -2585,6 +2587,12 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p) ...@@ -2585,6 +2587,12 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
if (task_on_rq_queued(p) && p->dl.dl_runtime) if (task_on_rq_queued(p) && p->dl.dl_runtime)
task_non_contending(p); task_non_contending(p);
/*
* In case a task is setscheduled out from SCHED_DEADLINE we need to
* keep track of that on its cpuset (for correct bandwidth tracking).
*/
dec_dl_tasks_cs(p);
if (!task_on_rq_queued(p)) { if (!task_on_rq_queued(p)) {
/* /*
* Inactive timer is armed. However, p is leaving DEADLINE and * Inactive timer is armed. However, p is leaving DEADLINE and
...@@ -2625,6 +2633,12 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) ...@@ -2625,6 +2633,12 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1)
put_task_struct(p); put_task_struct(p);
/*
* In case a task is setscheduled to SCHED_DEADLINE we need to keep
* track of that on its cpuset (for correct bandwidth tracking).
*/
inc_dl_tasks_cs(p);
/* If p is not queued we will update its parameters at next wakeup. */ /* If p is not queued we will update its parameters at next wakeup. */
if (!task_on_rq_queued(p)) { if (!task_on_rq_queued(p)) {
add_rq_bw(&p->dl, &rq->dl); add_rq_bw(&p->dl, &rq->dl);
...@@ -3033,26 +3047,38 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, ...@@ -3033,26 +3047,38 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur,
return ret; return ret;
} }
int dl_cpu_busy(int cpu, struct task_struct *p) enum dl_bw_request {
dl_bw_req_check_overflow = 0,
dl_bw_req_alloc,
dl_bw_req_free
};
static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw)
{ {
unsigned long flags, cap; unsigned long flags;
struct dl_bw *dl_b; struct dl_bw *dl_b;
bool overflow; bool overflow = 0;
rcu_read_lock_sched(); rcu_read_lock_sched();
dl_b = dl_bw_of(cpu); dl_b = dl_bw_of(cpu);
raw_spin_lock_irqsave(&dl_b->lock, flags); raw_spin_lock_irqsave(&dl_b->lock, flags);
cap = dl_bw_capacity(cpu);
overflow = __dl_overflow(dl_b, cap, 0, p ? p->dl.dl_bw : 0);
if (!overflow && p) { if (req == dl_bw_req_free) {
/* __dl_sub(dl_b, dl_bw, dl_bw_cpus(cpu));
* We reserve space for this task in the destination } else {
* root_domain, as we can't fail after this point. unsigned long cap = dl_bw_capacity(cpu);
* We will free resources in the source root_domain
* later on (see set_cpus_allowed_dl()). overflow = __dl_overflow(dl_b, cap, 0, dl_bw);
*/
__dl_add(dl_b, p->dl.dl_bw, dl_bw_cpus(cpu)); if (req == dl_bw_req_alloc && !overflow) {
/*
* We reserve space in the destination
* root_domain, as we can't fail after this point.
* We will free resources in the source root_domain
* later on (see set_cpus_allowed_dl()).
*/
__dl_add(dl_b, dl_bw, dl_bw_cpus(cpu));
}
} }
raw_spin_unlock_irqrestore(&dl_b->lock, flags); raw_spin_unlock_irqrestore(&dl_b->lock, flags);
...@@ -3060,6 +3086,21 @@ int dl_cpu_busy(int cpu, struct task_struct *p) ...@@ -3060,6 +3086,21 @@ int dl_cpu_busy(int cpu, struct task_struct *p)
return overflow ? -EBUSY : 0; return overflow ? -EBUSY : 0;
} }
int dl_bw_check_overflow(int cpu)
{
return dl_bw_manage(dl_bw_req_check_overflow, cpu, 0);
}
int dl_bw_alloc(int cpu, u64 dl_bw)
{
return dl_bw_manage(dl_bw_req_alloc, cpu, dl_bw);
}
void dl_bw_free(int cpu, u64 dl_bw)
{
dl_bw_manage(dl_bw_req_free, cpu, dl_bw);
}
#endif #endif
#ifdef CONFIG_SCHED_DEBUG #ifdef CONFIG_SCHED_DEBUG
......
...@@ -324,7 +324,7 @@ extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr); ...@@ -324,7 +324,7 @@ extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
extern bool __checkparam_dl(const struct sched_attr *attr); extern bool __checkparam_dl(const struct sched_attr *attr);
extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr); extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
extern int dl_cpu_busy(int cpu, struct task_struct *p); extern int dl_bw_check_overflow(int cpu);
#ifdef CONFIG_CGROUP_SCHED #ifdef CONFIG_CGROUP_SCHED
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment