Commit 5c1ee569 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-5.17-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:

 - Fix for a subtle bug in the recent release_agent permission check
   update

 - Fix for a long-standing race condition between cpuset and cpu hotplug

 - Comment updates

* 'for-5.17-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cpuset: Fix kernel-doc
  cgroup-v1: Correct privileges check in release_agent writes
  cgroup: clarify cgroup_css_set_fork()
  cgroup/cpuset: Fix a race between cpuset_attach() and cpu hotplug
parents 917bbdb1 c70cd039
...@@ -546,6 +546,7 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, ...@@ -546,6 +546,7 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off) char *buf, size_t nbytes, loff_t off)
{ {
struct cgroup *cgrp; struct cgroup *cgrp;
struct cgroup_file_ctx *ctx;
BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
...@@ -553,8 +554,9 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, ...@@ -553,8 +554,9 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
* Release agent gets called with all capabilities, * Release agent gets called with all capabilities,
* require capabilities to set release agent. * require capabilities to set release agent.
*/ */
if ((of->file->f_cred->user_ns != &init_user_ns) || ctx = of->priv;
!capable(CAP_SYS_ADMIN)) if ((ctx->ns->user_ns != &init_user_ns) ||
!file_ns_capable(of->file, &init_user_ns, CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
cgrp = cgroup_kn_lock_live(of->kn, false); cgrp = cgroup_kn_lock_live(of->kn, false);
......
...@@ -6166,6 +6166,20 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs) ...@@ -6166,6 +6166,20 @@ static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
if (ret) if (ret)
goto err; goto err;
/*
* Spawning a task directly into a cgroup works by passing a file
* descriptor to the target cgroup directory. This can even be an O_PATH
* file descriptor. But it can never be a cgroup.procs file descriptor.
* This was done on purpose so spawning into a cgroup could be
* conceptualized as an atomic
*
* fd = openat(dfd_cgroup, "cgroup.procs", ...);
* write(fd, <child-pid>, ...);
*
* sequence, i.e. it's a shorthand for the caller opening and writing
* cgroup.procs of the cgroup indicated by @dfd_cgroup. This allows us
* to always use the caller's credentials.
*/
ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb, ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb,
!(kargs->flags & CLONE_THREAD), !(kargs->flags & CLONE_THREAD),
current->nsproxy->cgroup_ns); current->nsproxy->cgroup_ns);
......
...@@ -2289,6 +2289,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) ...@@ -2289,6 +2289,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
cgroup_taskset_first(tset, &css); cgroup_taskset_first(tset, &css);
cs = css_cs(css); cs = css_cs(css);
cpus_read_lock();
percpu_down_write(&cpuset_rwsem); percpu_down_write(&cpuset_rwsem);
guarantee_online_mems(cs, &cpuset_attach_nodemask_to); guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
...@@ -2342,6 +2343,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) ...@@ -2342,6 +2343,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
wake_up(&cpuset_attach_wq); wake_up(&cpuset_attach_wq);
percpu_up_write(&cpuset_rwsem); percpu_up_write(&cpuset_rwsem);
cpus_read_unlock();
} }
/* The various types of files and directories in a cpuset file system */ /* The various types of files and directories in a cpuset file system */
...@@ -3522,8 +3524,8 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs) ...@@ -3522,8 +3524,8 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
return cs; return cs;
} }
/** /*
* cpuset_node_allowed - Can we allocate on a memory node? * __cpuset_node_allowed - Can we allocate on a memory node?
* @node: is this an allowed node? * @node: is this an allowed node?
* @gfp_mask: memory allocation flags * @gfp_mask: memory allocation flags
* *
...@@ -3694,8 +3696,8 @@ void cpuset_print_current_mems_allowed(void) ...@@ -3694,8 +3696,8 @@ void cpuset_print_current_mems_allowed(void)
int cpuset_memory_pressure_enabled __read_mostly; int cpuset_memory_pressure_enabled __read_mostly;
/** /*
* cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims. * __cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims.
* *
* Keep a running average of the rate of synchronous (direct) * Keep a running average of the rate of synchronous (direct)
* page reclaim efforts initiated by tasks in each cpuset. * page reclaim efforts initiated by tasks in each cpuset.
...@@ -3710,7 +3712,7 @@ int cpuset_memory_pressure_enabled __read_mostly; ...@@ -3710,7 +3712,7 @@ int cpuset_memory_pressure_enabled __read_mostly;
* "memory_pressure". Value displayed is an integer * "memory_pressure". Value displayed is an integer
* representing the recent rate of entry into the synchronous * representing the recent rate of entry into the synchronous
* (direct) page reclaim by any task attached to the cpuset. * (direct) page reclaim by any task attached to the cpuset.
**/ */
void __cpuset_memory_pressure_bump(void) void __cpuset_memory_pressure_bump(void)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment