Commit ea15f8cc authored by Tejun Heo's avatar Tejun Heo

cgroup: split cgroup destruction into two steps

Split cgroup_destroy_locked() into two steps and put the latter half
into cgroup_offline_fn() which is executed from a work item.  The
latter half is responsible for offlining the css's, removing the
cgroup from internal lists, and propagating release notification to
the parent.  The separation is to allow using percpu refcnt for css.

Note that this allows for other cgroup operations to happen between
the first and second halves of destruction, including creating a new
cgroup with the same name.  As the target cgroup is marked DEAD in the
first half and cgroup internals don't care about the names of cgroups,
this should be fine.  A comment explaining this will be added by the
next patch which implements the actual percpu refcnting.

As RCU freeing is guaranteed to happen after the second step of
destruction, we can use the same work item for both.  This patch
renames cgroup->free_work to ->destroy_work and uses it for both
purposes.  INIT_WORK() is now performed right before queueing the work
item.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Acked-by: default avatarLi Zefan <lizefan@huawei.com>
parent 455050d2
...@@ -233,7 +233,7 @@ struct cgroup { ...@@ -233,7 +233,7 @@ struct cgroup {
/* For RCU-protected deletion */ /* For RCU-protected deletion */
struct rcu_head rcu_head; struct rcu_head rcu_head;
struct work_struct free_work; struct work_struct destroy_work;
/* List of events which userspace want to receive */ /* List of events which userspace want to receive */
struct list_head event_list; struct list_head event_list;
......
...@@ -208,6 +208,7 @@ static struct cgroup_name root_cgroup_name = { .name = "/" }; ...@@ -208,6 +208,7 @@ static struct cgroup_name root_cgroup_name = { .name = "/" };
*/ */
static int need_forkexit_callback __read_mostly; static int need_forkexit_callback __read_mostly;
static void cgroup_offline_fn(struct work_struct *work);
static int cgroup_destroy_locked(struct cgroup *cgrp); static int cgroup_destroy_locked(struct cgroup *cgrp);
static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
struct cftype cfts[], bool is_add); struct cftype cfts[], bool is_add);
...@@ -830,7 +831,7 @@ static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry) ...@@ -830,7 +831,7 @@ static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
static void cgroup_free_fn(struct work_struct *work) static void cgroup_free_fn(struct work_struct *work)
{ {
struct cgroup *cgrp = container_of(work, struct cgroup, free_work); struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
struct cgroup_subsys *ss; struct cgroup_subsys *ss;
mutex_lock(&cgroup_mutex); mutex_lock(&cgroup_mutex);
...@@ -875,7 +876,8 @@ static void cgroup_free_rcu(struct rcu_head *head) ...@@ -875,7 +876,8 @@ static void cgroup_free_rcu(struct rcu_head *head)
{ {
struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
schedule_work(&cgrp->free_work); INIT_WORK(&cgrp->destroy_work, cgroup_free_fn);
schedule_work(&cgrp->destroy_work);
} }
static void cgroup_diput(struct dentry *dentry, struct inode *inode) static void cgroup_diput(struct dentry *dentry, struct inode *inode)
...@@ -1407,7 +1409,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) ...@@ -1407,7 +1409,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
INIT_LIST_HEAD(&cgrp->allcg_node); INIT_LIST_HEAD(&cgrp->allcg_node);
INIT_LIST_HEAD(&cgrp->release_list); INIT_LIST_HEAD(&cgrp->release_list);
INIT_LIST_HEAD(&cgrp->pidlists); INIT_LIST_HEAD(&cgrp->pidlists);
INIT_WORK(&cgrp->free_work, cgroup_free_fn);
mutex_init(&cgrp->pidlist_mutex); mutex_init(&cgrp->pidlist_mutex);
INIT_LIST_HEAD(&cgrp->event_list); INIT_LIST_HEAD(&cgrp->event_list);
spin_lock_init(&cgrp->event_list_lock); spin_lock_init(&cgrp->event_list_lock);
...@@ -2991,12 +2992,13 @@ struct cgroup *cgroup_next_sibling(struct cgroup *pos) ...@@ -2991,12 +2992,13 @@ struct cgroup *cgroup_next_sibling(struct cgroup *pos)
/* /*
* @pos could already have been removed. Once a cgroup is removed, * @pos could already have been removed. Once a cgroup is removed,
* its ->sibling.next is no longer updated when its next sibling * its ->sibling.next is no longer updated when its next sibling
* changes. As CGRP_DEAD is set on removal which is fully * changes. As CGRP_DEAD assertion is serialized and happens
* serialized, if we see it unasserted, it's guaranteed that the * before the cgroup is taken off the ->sibling list, if we see it
* next sibling hasn't finished its grace period even if it's * unasserted, it's guaranteed that the next sibling hasn't
* already removed, and thus safe to dereference from this RCU * finished its grace period even if it's already removed, and thus
* critical section. If ->sibling.next is inaccessible, * safe to dereference from this RCU critical section. If
* cgroup_is_dead() is guaranteed to be visible as %true here. * ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed
* to be visible as %true here.
*/ */
if (likely(!cgroup_is_dead(pos))) { if (likely(!cgroup_is_dead(pos))) {
next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling); next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling);
...@@ -4359,7 +4361,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) ...@@ -4359,7 +4361,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
__releases(&cgroup_mutex) __acquires(&cgroup_mutex) __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
{ {
struct dentry *d = cgrp->dentry; struct dentry *d = cgrp->dentry;
struct cgroup *parent = cgrp->parent;
struct cgroup_event *event, *tmp; struct cgroup_event *event, *tmp;
struct cgroup_subsys *ss; struct cgroup_subsys *ss;
bool empty; bool empty;
...@@ -4423,6 +4424,21 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) ...@@ -4423,6 +4424,21 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
} }
spin_unlock(&cgrp->event_list_lock); spin_unlock(&cgrp->event_list_lock);
INIT_WORK(&cgrp->destroy_work, cgroup_offline_fn);
schedule_work(&cgrp->destroy_work);
return 0;
};
static void cgroup_offline_fn(struct work_struct *work)
{
struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
struct cgroup *parent = cgrp->parent;
struct dentry *d = cgrp->dentry;
struct cgroup_subsys *ss;
mutex_lock(&cgroup_mutex);
/* tell subsystems to initate destruction */ /* tell subsystems to initate destruction */
for_each_subsys(cgrp->root, ss) for_each_subsys(cgrp->root, ss)
offline_css(ss, cgrp); offline_css(ss, cgrp);
...@@ -4446,7 +4462,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) ...@@ -4446,7 +4462,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
set_bit(CGRP_RELEASABLE, &parent->flags); set_bit(CGRP_RELEASABLE, &parent->flags);
check_for_release(parent); check_for_release(parent);
return 0; mutex_unlock(&cgroup_mutex);
} }
static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment