Commit abde77eb authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup updates from Tejun Heo:
 "This includes Roman's cgroup2 freezer implementation.

  It's a separate machanism from cgroup1 freezer. Instead of blocking
  user tasks in arbitrary uninterruptible sleeps, the new implementation
  extends jobctl stop - frozen tasks are trapped in jobctl stop until
  thawed and can be killed and ptraced. Lots of thanks to Oleg for
  sheperding the effort.

  Other than that, there are a few trivial changes"

* 'for-5.2' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: never call do_group_exit() with task->frozen bit set
  kernel: cgroup: fix misuse of %x
  cgroup: get rid of cgroup_freezer_frozen_exit()
  cgroup: prevent spurious transition into non-frozen state
  cgroup: Remove unused cgrp variable
  cgroup: document cgroup v2 freezer interface
  cgroup: add tracing points for cgroup v2 freezer
  cgroup: make TRACE_CGROUP_PATH irq-safe
  kselftests: cgroup: add freezer controller self-tests
  kselftests: cgroup: don't fail on cg_kill_all() error in cg_destroy()
  cgroup: cgroup v2 freezer
  cgroup: protect cgroup->nr_(dying_)descendants by css_set_lock
  cgroup: implement __cgroup_task_count() helper
  cgroup: rename freezer.c into legacy_freezer.c
  cgroup: remove extra cgroup_migrate_finish() call
parents 23c97060 f2b31bb5
......@@ -864,6 +864,8 @@ All cgroup core files are prefixed with "cgroup."
populated
1 if the cgroup or its descendants contains any live
processes; otherwise, 0.
frozen
1 if the cgroup is frozen; otherwise, 0.
cgroup.max.descendants
A read-write single value files. The default is "max".
......@@ -897,6 +899,31 @@ All cgroup core files are prefixed with "cgroup."
A dying cgroup can consume system resources not exceeding
limits, which were active at the moment of cgroup deletion.
cgroup.freeze
A read-write single value file which exists on non-root cgroups.
Allowed values are "0" and "1". The default is "0".
Writing "1" to the file causes freezing of the cgroup and all
descendant cgroups. This means that all belonging processes will
be stopped and will not run until the cgroup will be explicitly
unfrozen. Freezing of the cgroup may take some time; when this action
is completed, the "frozen" value in the cgroup.events control file
will be updated to "1" and the corresponding notification will be
issued.
A cgroup can be frozen either by its own settings, or by settings
of any ancestor cgroups. If any of ancestor cgroups is frozen, the
cgroup will remain frozen.
Processes in the frozen cgroup can be killed by a fatal signal.
They also can enter and leave a frozen cgroup: either by an explicit
move by a user, or if freezing of the cgroup races with fork().
If a process is moved to a frozen cgroup, it stops. If a process is
moved out of a frozen cgroup, it becomes running.
Frozen status of a cgroup doesn't affect any cgroup tree operations:
it's possible to delete a frozen (and empty) cgroup, as well as
create new sub-cgroups.
Controllers
===========
......
......@@ -65,6 +65,12 @@ enum {
* specified at mount time and thus is implemented here.
*/
CGRP_CPUSET_CLONE_CHILDREN,
/* Control group has to be frozen. */
CGRP_FREEZE,
/* Cgroup is frozen. */
CGRP_FROZEN,
};
/* cgroup_root->flags */
......@@ -317,6 +323,25 @@ struct cgroup_rstat_cpu {
struct cgroup *updated_next; /* NULL iff not on the list */
};
struct cgroup_freezer_state {
/* Should the cgroup and its descendants be frozen. */
bool freeze;
/* Should the cgroup actually be frozen? */
int e_freeze;
/* Fields below are protected by css_set_lock */
/* Number of frozen descendant cgroups */
int nr_frozen_descendants;
/*
* Number of tasks, which are counted as frozen:
* frozen, SIGSTOPped, and PTRACEd.
*/
int nr_frozen_tasks;
};
struct cgroup {
/* self css with NULL ->ss, points back to this cgroup */
struct cgroup_subsys_state self;
......@@ -349,6 +374,11 @@ struct cgroup {
* Dying cgroups are cgroups which were deleted by a user,
* but are still existing because someone else is holding a reference.
* max_descendants is a maximum allowed number of descent cgroups.
*
* nr_descendants and nr_dying_descendants are protected
* by cgroup_mutex and css_set_lock. It's fine to read them holding
* any of cgroup_mutex and css_set_lock; for writing both locks
* should be held.
*/
int nr_descendants;
int nr_dying_descendants;
......@@ -448,6 +478,9 @@ struct cgroup {
/* If there is block congestion on this cgroup. */
atomic_t congestion_count;
/* Used to store internal freezer state */
struct cgroup_freezer_state freezer;
/* ids of the ancestors at each level including self */
int ancestor_ids[];
};
......
......@@ -881,4 +881,47 @@ static inline void put_cgroup_ns(struct cgroup_namespace *ns)
free_cgroup_ns(ns);
}
#ifdef CONFIG_CGROUPS
void cgroup_enter_frozen(void);
void cgroup_leave_frozen(bool always_leave);
void cgroup_update_frozen(struct cgroup *cgrp);
void cgroup_freeze(struct cgroup *cgrp, bool freeze);
void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src,
struct cgroup *dst);
static inline bool cgroup_task_freeze(struct task_struct *task)
{
bool ret;
if (task->flags & PF_KTHREAD)
return false;
rcu_read_lock();
ret = test_bit(CGRP_FREEZE, &task_dfl_cgroup(task)->flags);
rcu_read_unlock();
return ret;
}
static inline bool cgroup_task_frozen(struct task_struct *task)
{
return task->frozen;
}
#else /* !CONFIG_CGROUPS */
static inline void cgroup_enter_frozen(void) { }
static inline void cgroup_leave_frozen(bool always_leave) { }
static inline bool cgroup_task_freeze(struct task_struct *task)
{
return false;
}
static inline bool cgroup_task_frozen(struct task_struct *task)
{
return false;
}
#endif /* !CONFIG_CGROUPS */
#endif /* _LINUX_CGROUP_H */
......@@ -726,6 +726,8 @@ struct task_struct {
#ifdef CONFIG_CGROUPS
/* disallow userland-initiated cgroup migration */
unsigned no_cgroup_migration:1;
/* task is frozen/stopped (used by the cgroup freezer) */
unsigned frozen:1;
#endif
#ifdef CONFIG_BLK_CGROUP
/* to be used once the psi infrastructure lands upstream. */
......
......@@ -18,6 +18,7 @@ struct task_struct;
#define JOBCTL_TRAP_NOTIFY_BIT 20 /* trap for NOTIFY */
#define JOBCTL_TRAPPING_BIT 21 /* switching to TRACED */
#define JOBCTL_LISTENING_BIT 22 /* ptracer is listening for events */
#define JOBCTL_TRAP_FREEZE_BIT 23 /* trap for cgroup freezer */
#define JOBCTL_STOP_DEQUEUED (1UL << JOBCTL_STOP_DEQUEUED_BIT)
#define JOBCTL_STOP_PENDING (1UL << JOBCTL_STOP_PENDING_BIT)
......@@ -26,6 +27,7 @@ struct task_struct;
#define JOBCTL_TRAP_NOTIFY (1UL << JOBCTL_TRAP_NOTIFY_BIT)
#define JOBCTL_TRAPPING (1UL << JOBCTL_TRAPPING_BIT)
#define JOBCTL_LISTENING (1UL << JOBCTL_LISTENING_BIT)
#define JOBCTL_TRAP_FREEZE (1UL << JOBCTL_TRAP_FREEZE_BIT)
#define JOBCTL_TRAP_MASK (JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
#define JOBCTL_PENDING_MASK (JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
......
......@@ -103,6 +103,20 @@ DEFINE_EVENT(cgroup, cgroup_rename,
TP_ARGS(cgrp, path)
);
DEFINE_EVENT(cgroup, cgroup_freeze,
TP_PROTO(struct cgroup *cgrp, const char *path),
TP_ARGS(cgrp, path)
);
DEFINE_EVENT(cgroup, cgroup_unfreeze,
TP_PROTO(struct cgroup *cgrp, const char *path),
TP_ARGS(cgrp, path)
);
DECLARE_EVENT_CLASS(cgroup_migrate,
TP_PROTO(struct cgroup *dst_cgrp, const char *path,
......@@ -149,6 +163,47 @@ DEFINE_EVENT(cgroup_migrate, cgroup_transfer_tasks,
TP_ARGS(dst_cgrp, path, task, threadgroup)
);
DECLARE_EVENT_CLASS(cgroup_event,
TP_PROTO(struct cgroup *cgrp, const char *path, int val),
TP_ARGS(cgrp, path, val),
TP_STRUCT__entry(
__field( int, root )
__field( int, id )
__field( int, level )
__string( path, path )
__field( int, val )
),
TP_fast_assign(
__entry->root = cgrp->root->hierarchy_id;
__entry->id = cgrp->id;
__entry->level = cgrp->level;
__assign_str(path, path);
__entry->val = val;
),
TP_printk("root=%d id=%d level=%d path=%s val=%d",
__entry->root, __entry->id, __entry->level, __get_str(path),
__entry->val)
);
DEFINE_EVENT(cgroup_event, cgroup_notify_populated,
TP_PROTO(struct cgroup *cgrp, const char *path, int val),
TP_ARGS(cgrp, path, val)
);
DEFINE_EVENT(cgroup_event, cgroup_notify_frozen,
TP_PROTO(struct cgroup *cgrp, const char *path, int val),
TP_ARGS(cgrp, path, val)
);
#endif /* _TRACE_CGROUP_H */
/* This part must be outside protection */
......
# SPDX-License-Identifier: GPL-2.0
obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o
obj-y := cgroup.o rstat.o namespace.o cgroup-v1.o freezer.o
obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
obj-$(CONFIG_CGROUP_FREEZER) += legacy_freezer.o
obj-$(CONFIG_CGROUP_PIDS) += pids.o
obj-$(CONFIG_CGROUP_RDMA) += rdma.o
obj-$(CONFIG_CPUSETS) += cpuset.o
......
......@@ -28,12 +28,15 @@ extern void __init enable_debug_cgroup(void);
#define TRACE_CGROUP_PATH(type, cgrp, ...) \
do { \
if (trace_cgroup_##type##_enabled()) { \
spin_lock(&trace_cgroup_path_lock); \
unsigned long flags; \
spin_lock_irqsave(&trace_cgroup_path_lock, \
flags); \
cgroup_path(cgrp, trace_cgroup_path, \
TRACE_CGROUP_PATH_LEN); \
trace_cgroup_##type(cgrp, trace_cgroup_path, \
##__VA_ARGS__); \
spin_unlock(&trace_cgroup_path_lock); \
spin_unlock_irqrestore(&trace_cgroup_path_lock, \
flags); \
} \
} while (0)
......@@ -240,6 +243,7 @@ int cgroup_rmdir(struct kernfs_node *kn);
int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
struct kernfs_root *kf_root);
int __cgroup_task_count(const struct cgroup *cgrp);
int cgroup_task_count(const struct cgroup *cgrp);
/*
......
......@@ -342,22 +342,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp,
return l;
}
/**
* cgroup_task_count - count the number of tasks in a cgroup.
* @cgrp: the cgroup in question
*/
int cgroup_task_count(const struct cgroup *cgrp)
{
int count = 0;
struct cgrp_cset_link *link;
spin_lock_irq(&css_set_lock);
list_for_each_entry(link, &cgrp->cset_links, cset_link)
count += link->cset->nr_tasks;
spin_unlock_irq(&css_set_lock);
return count;
}
/*
* Load a cgroup's pidarray with either procs' tgids or tasks' pids
*/
......
......@@ -593,6 +593,39 @@ static void cgroup_get_live(struct cgroup *cgrp)
css_get(&cgrp->self);
}
/**
* __cgroup_task_count - count the number of tasks in a cgroup. The caller
* is responsible for taking the css_set_lock.
* @cgrp: the cgroup in question
*/
int __cgroup_task_count(const struct cgroup *cgrp)
{
int count = 0;
struct cgrp_cset_link *link;
lockdep_assert_held(&css_set_lock);
list_for_each_entry(link, &cgrp->cset_links, cset_link)
count += link->cset->nr_tasks;
return count;
}
/**
* cgroup_task_count - count the number of tasks in a cgroup.
* @cgrp: the cgroup in question
*/
int cgroup_task_count(const struct cgroup *cgrp)
{
int count;
spin_lock_irq(&css_set_lock);
count = __cgroup_task_count(cgrp);
spin_unlock_irq(&css_set_lock);
return count;
}
struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
{
struct cgroup *cgrp = of->kn->parent->priv;
......@@ -783,6 +816,8 @@ static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
break;
cgroup1_check_for_release(cgrp);
TRACE_CGROUP_PATH(notify_populated, cgrp,
cgroup_is_populated(cgrp));
cgroup_file_notify(&cgrp->events_file);
child = cgrp;
......@@ -2402,8 +2437,15 @@ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
get_css_set(to_cset);
to_cset->nr_tasks++;
css_set_move_task(task, from_cset, to_cset, true);
put_css_set_locked(from_cset);
from_cset->nr_tasks--;
/*
* If the source or destination cgroup is frozen,
* the task might require to change its state.
*/
cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp,
to_cset->dfl_cgrp);
put_css_set_locked(from_cset);
}
}
spin_unlock_irq(&css_set_lock);
......@@ -2602,7 +2644,7 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
if (!dst_cset)
goto err;
return -ENOMEM;
WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
......@@ -2634,9 +2676,6 @@ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
}
return 0;
err:
cgroup_migrate_finish(mgctx);
return -ENOMEM;
}
/**
......@@ -3447,8 +3486,11 @@ static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
static int cgroup_events_show(struct seq_file *seq, void *v)
{
seq_printf(seq, "populated %d\n",
cgroup_is_populated(seq_css(seq)->cgroup));
struct cgroup *cgrp = seq_css(seq)->cgroup;
seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp));
seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags));
return 0;
}
......@@ -3510,6 +3552,40 @@ static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
}
#endif
static int cgroup_freeze_show(struct seq_file *seq, void *v)
{
struct cgroup *cgrp = seq_css(seq)->cgroup;
seq_printf(seq, "%d\n", cgrp->freezer.freeze);
return 0;
}
static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
struct cgroup *cgrp;
ssize_t ret;
int freeze;
ret = kstrtoint(strstrip(buf), 0, &freeze);
if (ret)
return ret;
if (freeze < 0 || freeze > 1)
return -ERANGE;
cgrp = cgroup_kn_lock_live(of->kn, false);
if (!cgrp)
return -ENOENT;
cgroup_freeze(cgrp, freeze);
cgroup_kn_unlock(of->kn);
return nbytes;
}
static int cgroup_file_open(struct kernfs_open_file *of)
{
struct cftype *cft = of->kn->priv;
......@@ -4653,6 +4729,12 @@ static struct cftype cgroup_base_files[] = {
.name = "cgroup.stat",
.seq_show = cgroup_stat_show,
},
{
.name = "cgroup.freeze",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = cgroup_freeze_show,
.write = cgroup_freeze_write,
},
{
.name = "cpu.stat",
.flags = CFTYPE_NOT_ON_ROOT,
......@@ -4781,9 +4863,11 @@ static void css_release_work_fn(struct work_struct *work)
if (cgroup_on_dfl(cgrp))
cgroup_rstat_flush(cgrp);
spin_lock_irq(&css_set_lock);
for (tcgrp = cgroup_parent(cgrp); tcgrp;
tcgrp = cgroup_parent(tcgrp))
tcgrp->nr_dying_descendants--;
spin_unlock_irq(&css_set_lock);
cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
cgrp->id = -1;
......@@ -5001,12 +5085,31 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
if (ret)
goto out_psi_free;
/*
* New cgroup inherits effective freeze counter, and
* if the parent has to be frozen, the child has too.
*/
cgrp->freezer.e_freeze = parent->freezer.e_freeze;
if (cgrp->freezer.e_freeze)
set_bit(CGRP_FROZEN, &cgrp->flags);
spin_lock_irq(&css_set_lock);
for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
cgrp->ancestor_ids[tcgrp->level] = tcgrp->id;
if (tcgrp != cgrp)
if (tcgrp != cgrp) {
tcgrp->nr_descendants++;
/*
* If the new cgroup is frozen, all ancestor cgroups
* get a new frozen descendant, but their state can't
* change because of this.
*/
if (cgrp->freezer.e_freeze)
tcgrp->freezer.nr_frozen_descendants++;
}
}
spin_unlock_irq(&css_set_lock);
if (notify_on_release(parent))
set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
......@@ -5291,10 +5394,18 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
if (parent && cgroup_is_threaded(cgrp))
parent->nr_threaded_children--;
spin_lock_irq(&css_set_lock);
for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
tcgrp->nr_descendants--;
tcgrp->nr_dying_descendants++;
/*
* If the dying cgroup is frozen, decrease frozen descendants
* counters of ancestor cgroups.
*/
if (test_bit(CGRP_FROZEN, &cgrp->flags))
tcgrp->freezer.nr_frozen_descendants--;
}
spin_unlock_irq(&css_set_lock);
cgroup1_check_for_release(parent);
......@@ -5746,6 +5857,26 @@ void cgroup_post_fork(struct task_struct *child)
cset->nr_tasks++;
css_set_move_task(child, NULL, cset, false);
}
/*
* If the cgroup has to be frozen, the new task has too.
* Let's set the JOBCTL_TRAP_FREEZE jobctl bit to get
* the task into the frozen state.
*/
if (unlikely(cgroup_task_freeze(child))) {
spin_lock(&child->sighand->siglock);
WARN_ON_ONCE(child->frozen);
child->jobctl |= JOBCTL_TRAP_FREEZE;
spin_unlock(&child->sighand->siglock);
/*
* Calling cgroup_update_frozen() isn't required here,
* because it will be called anyway a bit later
* from do_freezer_trap(). So we avoid cgroup's
* transient switch from the frozen state and back.
*/
}
spin_unlock_irq(&css_set_lock);
}
......@@ -5794,6 +5925,11 @@ void cgroup_exit(struct task_struct *tsk)
spin_lock_irq(&css_set_lock);
css_set_move_task(tsk, cset, NULL, false);
cset->nr_tasks--;
WARN_ON_ONCE(cgroup_task_frozen(tsk));
if (unlikely(cgroup_task_freeze(tsk)))
cgroup_update_frozen(task_dfl_cgroup(tsk));
spin_unlock_irq(&css_set_lock);
} else {
get_css_set(cset);
......
......@@ -64,8 +64,8 @@ static int current_css_set_read(struct seq_file *seq, void *v)
css = cset->subsys[ss->id];
if (!css)
continue;
seq_printf(seq, "%2d: %-4s\t- %lx[%d]\n", ss->id, ss->name,
(unsigned long)css, css->id);
seq_printf(seq, "%2d: %-4s\t- %p[%d]\n", ss->id, ss->name,
css, css->id);
}
rcu_read_unlock();
spin_unlock_irq(&css_set_lock);
......@@ -224,8 +224,8 @@ static int cgroup_subsys_states_read(struct seq_file *seq, void *v)
if (css->parent)
snprintf(pbuf, sizeof(pbuf) - 1, " P=%d",
css->parent->id);
seq_printf(seq, "%2d: %-4s\t- %lx[%d] %d%s\n", ss->id, ss->name,
(unsigned long)css, css->id,
seq_printf(seq, "%2d: %-4s\t- %p[%d] %d%s\n", ss->id, ss->name,
css, css->id,
atomic_read(&css->online_cnt), pbuf);
}
......
/*
* cgroup_freezer.c - control group freezer subsystem
*
* Copyright IBM Corporation, 2007
*
* Author : Cedric Le Goater <clg@fr.ibm.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2.1 of the GNU Lesser General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
#include <linux/export.h>
#include <linux/slab.h>
//SPDX-License-Identifier: GPL-2.0
#include <linux/cgroup.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/freezer.h>
#include <linux/seq_file.h>
#include <linux/mutex.h>
/*
* A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
* set if "FROZEN" is written to freezer.state cgroupfs file, and cleared
* for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING
* for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of
* its ancestors has FREEZING_SELF set.
*/
enum freezer_state_flags {
CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */
CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */
CGROUP_FREEZING_PARENT = (1 << 2), /* the parent freezer is freezing */
CGROUP_FROZEN = (1 << 3), /* this and its descendants frozen */
#include <linux/sched.h>
#include <linux/sched/task.h>
#include <linux/sched/signal.h>
/* mask for all FREEZING flags */
CGROUP_FREEZING = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT,
};
#include "cgroup-internal.h"
struct freezer {
struct cgroup_subsys_state css;
unsigned int state;
};
static DEFINE_MUTEX(freezer_mutex);
static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
{
return css ? container_of(css, struct freezer, css) : NULL;
}
static inline struct freezer *task_freezer(struct task_struct *task)
{
return css_freezer(task_css(task, freezer_cgrp_id));
}
static struct freezer *parent_freezer(struct freezer *freezer)
{
return css_freezer(freezer->css.parent);
}
#include <trace/events/cgroup.h>
bool cgroup_freezing(struct task_struct *task)
/*
* Propagate the cgroup frozen state upwards by the cgroup tree.
*/
static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen)
{
bool ret;
rcu_read_lock();
ret = task_freezer(task)->state & CGROUP_FREEZING;
rcu_read_unlock();
int desc = 1;
return ret;
/*
* If the new state is frozen, some freezing ancestor cgroups may change
* their state too, depending on if all their descendants are frozen.
*
* Otherwise, all ancestor cgroups are forced into the non-frozen state.
*/
while ((cgrp = cgroup_parent(cgrp))) {
if (frozen) {
cgrp->freezer.nr_frozen_descendants += desc;
if (!test_bit(CGRP_FROZEN, &cgrp->flags) &&
test_bit(CGRP_FREEZE, &cgrp->flags) &&
cgrp->freezer.nr_frozen_descendants ==
cgrp->nr_descendants) {
set_bit(CGRP_FROZEN, &cgrp->flags);
cgroup_file_notify(&cgrp->events_file);
TRACE_CGROUP_PATH(notify_frozen, cgrp, 1);
desc++;
}
} else {
cgrp->freezer.nr_frozen_descendants -= desc;
if (test_bit(CGRP_FROZEN, &cgrp->flags)) {
clear_bit(CGRP_FROZEN, &cgrp->flags);
cgroup_file_notify(&cgrp->events_file);
TRACE_CGROUP_PATH(notify_frozen, cgrp, 0);
desc++;
}
}
}
}
static const char *freezer_state_strs(unsigned int state)
{
if (state & CGROUP_FROZEN)
return "FROZEN";
if (state & CGROUP_FREEZING)
return "FREEZING";
return "THAWED";
};
static struct cgroup_subsys_state *
freezer_css_alloc(struct cgroup_subsys_state *parent_css)
/*
* Revisit the cgroup frozen state.
* Checks if the cgroup is really frozen and perform all state transitions.
*/
void cgroup_update_frozen(struct cgroup *cgrp)
{
struct freezer *freezer;
bool frozen;
freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
if (!freezer)
return ERR_PTR(-ENOMEM);
lockdep_assert_held(&css_set_lock);
return &freezer->css;
}
/**
* freezer_css_online - commit creation of a freezer css
* @css: css being created
*
* We're committing to creation of @css. Mark it online and inherit
* parent's freezing state while holding both parent's and our
* freezer->lock.
/*
* If the cgroup has to be frozen (CGRP_FREEZE bit set),
* and all tasks are frozen and/or stopped, let's consider
* the cgroup frozen. Otherwise it's not frozen.
*/
static int freezer_css_online(struct cgroup_subsys_state *css)
{
struct freezer *freezer = css_freezer(css);
struct freezer *parent = parent_freezer(freezer);
frozen = test_bit(CGRP_FREEZE, &cgrp->flags) &&
cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp);
mutex_lock(&freezer_mutex);
if (frozen) {
/* Already there? */
if (test_bit(CGRP_FROZEN, &cgrp->flags))
return;
freezer->state |= CGROUP_FREEZER_ONLINE;
set_bit(CGRP_FROZEN, &cgrp->flags);
} else {
/* Already there? */
if (!test_bit(CGRP_FROZEN, &cgrp->flags))
return;
if (parent && (parent->state & CGROUP_FREEZING)) {
freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
atomic_inc(&system_freezing_cnt);
clear_bit(CGRP_FROZEN, &cgrp->flags);
}
cgroup_file_notify(&cgrp->events_file);
TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen);
mutex_unlock(&freezer_mutex);
return 0;
/* Update the state of ancestor cgroups. */
cgroup_propagate_frozen(cgrp, frozen);
}
/**
* freezer_css_offline - initiate destruction of a freezer css
* @css: css being destroyed
*
* @css is going away. Mark it dead and decrement system_freezing_count if
* it was holding one.
/*
* Increment cgroup's nr_frozen_tasks.
*/
static void freezer_css_offline(struct cgroup_subsys_state *css)
static void cgroup_inc_frozen_cnt(struct cgroup *cgrp)
{
struct freezer *freezer = css_freezer(css);
mutex_lock(&freezer_mutex);
if (freezer->state & CGROUP_FREEZING)
atomic_dec(&system_freezing_cnt);
freezer->state = 0;
mutex_unlock(&freezer_mutex);
cgrp->freezer.nr_frozen_tasks++;
}
static void freezer_css_free(struct cgroup_subsys_state *css)
/*
* Decrement cgroup's nr_frozen_tasks.
*/
static void cgroup_dec_frozen_cnt(struct cgroup *cgrp)
{
kfree(css_freezer(css));
cgrp->freezer.nr_frozen_tasks--;
WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0);
}
/*
* Tasks can be migrated into a different freezer anytime regardless of its
* current state. freezer_attach() is responsible for making new tasks
* conform to the current state.
*
* Freezer state changes and task migration are synchronized via
* @freezer->lock. freezer_attach() makes the new tasks conform to the
* current state and all following state changes can see the new tasks.
* Enter frozen/stopped state, if not yet there. Update cgroup's counters,
* and revisit the state of the cgroup, if necessary.
*/
static void freezer_attach(struct cgroup_taskset *tset)
void cgroup_enter_frozen(void)
{
struct task_struct *task;
struct cgroup_subsys_state *new_css;
struct cgroup *cgrp;
mutex_lock(&freezer_mutex);
if (current->frozen)
return;
/*
* Make the new tasks conform to the current state of @new_css.
* For simplicity, when migrating any task to a FROZEN cgroup, we
* revert it to FREEZING and let update_if_frozen() determine the
* correct state later.
spin_lock_irq(&css_set_lock);
current->frozen = true;
cgrp = task_dfl_cgroup(current);
cgroup_inc_frozen_cnt(cgrp);
cgroup_update_frozen(cgrp);
spin_unlock_irq(&css_set_lock);
}
/*
* Conditionally leave frozen/stopped state. Update cgroup's counters,
* and revisit the state of the cgroup, if necessary.
*
* Tasks in @tset are on @new_css but may not conform to its
* current state before executing the following - !frozen tasks may
* be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
* If always_leave is not set, and the cgroup is freezing,
* we're racing with the cgroup freezing. In this case, we don't
* drop the frozen counter to avoid a transient switch to
* the unfrozen state.
*/
cgroup_taskset_for_each(task, new_css, tset) {
struct freezer *freezer = css_freezer(new_css);
if (!(freezer->state & CGROUP_FREEZING)) {
__thaw_task(task);
} else {
freeze_task(task);
/* clear FROZEN and propagate upwards */
while (freezer && (freezer->state & CGROUP_FROZEN)) {
freezer->state &= ~CGROUP_FROZEN;
freezer = parent_freezer(freezer);
}
}
void cgroup_leave_frozen(bool always_leave)
{
struct cgroup *cgrp;
spin_lock_irq(&css_set_lock);
cgrp = task_dfl_cgroup(current);
if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) {
cgroup_dec_frozen_cnt(cgrp);
cgroup_update_frozen(cgrp);
WARN_ON_ONCE(!current->frozen);
current->frozen = false;
} else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) {
spin_lock(&current->sighand->siglock);
current->jobctl |= JOBCTL_TRAP_FREEZE;
set_thread_flag(TIF_SIGPENDING);
spin_unlock(&current->sighand->siglock);
}
mutex_unlock(&freezer_mutex);
spin_unlock_irq(&css_set_lock);
}
/**
* freezer_fork - cgroup post fork callback
* @task: a task which has just been forked
*
* @task has just been created and should conform to the current state of
* the cgroup_freezer it belongs to. This function may race against
* freezer_attach(). Losing to freezer_attach() means that we don't have
* to do anything as freezer_attach() will put @task into the appropriate
* state.
/*
* Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE
* jobctl bit.
*/
static void freezer_fork(struct task_struct *task)
static void cgroup_freeze_task(struct task_struct *task, bool freeze)
{
struct freezer *freezer;
unsigned long flags;
/*
* The root cgroup is non-freezable, so we can skip locking the
* freezer. This is safe regardless of race with task migration.
* If we didn't race or won, skipping is obviously the right thing
* to do. If we lost and root is the new cgroup, noop is still the
* right thing to do.
*/
if (task_css_is_root(task, freezer_cgrp_id))
/* If the task is about to die, don't bother with freezing it. */
if (!lock_task_sighand(task, &flags))
return;
mutex_lock(&freezer_mutex);
rcu_read_lock();
freezer = task_freezer(task);
if (freezer->state & CGROUP_FREEZING)
freeze_task(task);
if (freeze) {
task->jobctl |= JOBCTL_TRAP_FREEZE;
signal_wake_up(task, false);
} else {
task->jobctl &= ~JOBCTL_TRAP_FREEZE;
wake_up_process(task);
}
rcu_read_unlock();
mutex_unlock(&freezer_mutex);
unlock_task_sighand(task, &flags);
}
/**
* update_if_frozen - update whether a cgroup finished freezing
* @css: css of interest
*
* Once FREEZING is initiated, transition to FROZEN is lazily updated by
* calling this function. If the current state is FREEZING but not FROZEN,
* this function checks whether all tasks of this cgroup and the descendant
* cgroups finished freezing and, if so, sets FROZEN.
*
* The caller is responsible for grabbing RCU read lock and calling
* update_if_frozen() on all descendants prior to invoking this function.
*
* Task states and freezer state might disagree while tasks are being
* migrated into or out of @css, so we can't verify task states against
* @freezer state here. See freezer_attach() for details.
/*
* Freeze or unfreeze all tasks in the given cgroup.
*/
static void update_if_frozen(struct cgroup_subsys_state *css)
static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze)
{
struct freezer *freezer = css_freezer(css);
struct cgroup_subsys_state *pos;
struct css_task_iter it;
struct task_struct *task;
lockdep_assert_held(&freezer_mutex);
if (!(freezer->state & CGROUP_FREEZING) ||
(freezer->state & CGROUP_FROZEN))
return;
lockdep_assert_held(&cgroup_mutex);
/* are all (live) children frozen? */
rcu_read_lock();
css_for_each_child(pos, css) {
struct freezer *child = css_freezer(pos);
if ((child->state & CGROUP_FREEZER_ONLINE) &&
!(child->state & CGROUP_FROZEN)) {
rcu_read_unlock();
return;
}
}
rcu_read_unlock();
spin_lock_irq(&css_set_lock);
if (freeze)
set_bit(CGRP_FREEZE, &cgrp->flags);
else
clear_bit(CGRP_FREEZE, &cgrp->flags);
spin_unlock_irq(&css_set_lock);
/* are all tasks frozen? */
css_task_iter_start(css, 0, &it);
if (freeze)
TRACE_CGROUP_PATH(freeze, cgrp);
else
TRACE_CGROUP_PATH(unfreeze, cgrp);
css_task_iter_start(&cgrp->self, 0, &it);
while ((task = css_task_iter_next(&it))) {
if (freezing(task)) {
/*
* freezer_should_skip() indicates that the task
* should be skipped when determining freezing
* completion. Consider it frozen in addition to
* the usual frozen condition.
* Ignore kernel threads here. Freezing cgroups containing
* kthreads isn't supported.
*/
if (!frozen(task) && !freezer_should_skip(task))
goto out_iter_end;
}
if (task->flags & PF_KTHREAD)
continue;
cgroup_freeze_task(task, freeze);
}
freezer->state |= CGROUP_FROZEN;
out_iter_end:
css_task_iter_end(&it);
/*
* Cgroup state should be revisited here to cover empty leaf cgroups
* and cgroups which descendants are already in the desired state.
*/
spin_lock_irq(&css_set_lock);
if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants)
cgroup_update_frozen(cgrp);
spin_unlock_irq(&css_set_lock);
}
static int freezer_read(struct seq_file *m, void *v)
/*
* Adjust the task state (freeze or unfreeze) and revisit the state of
* source and destination cgroups.
*/
void cgroup_freezer_migrate_task(struct task_struct *task,
struct cgroup *src, struct cgroup *dst)
{
struct cgroup_subsys_state *css = seq_css(m), *pos;
mutex_lock(&freezer_mutex);
rcu_read_lock();
lockdep_assert_held(&css_set_lock);
/* update states bottom-up */
css_for_each_descendant_post(pos, css) {
if (!css_tryget_online(pos))
continue;
rcu_read_unlock();
update_if_frozen(pos);
/*
* Kernel threads are not supposed to be frozen at all.
*/
if (task->flags & PF_KTHREAD)
return;
rcu_read_lock();
css_put(pos);
/*
* Adjust counters of freezing and frozen tasks.
* Note, that if the task is frozen, but the destination cgroup is not
* frozen, we bump both counters to keep them balanced.
*/
if (task->frozen) {
cgroup_inc_frozen_cnt(dst);
cgroup_dec_frozen_cnt(src);
}
cgroup_update_frozen(dst);
cgroup_update_frozen(src);
rcu_read_unlock();
mutex_unlock(&freezer_mutex);
seq_puts(m, freezer_state_strs(css_freezer(css)->state));
seq_putc(m, '\n');
return 0;
/*
* Force the task to the desired state.
*/
cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags));
}
static void freeze_cgroup(struct freezer *freezer)
void cgroup_freeze(struct cgroup *cgrp, bool freeze)
{
struct css_task_iter it;
struct task_struct *task;
struct cgroup_subsys_state *css;
struct cgroup *dsct;
bool applied = false;
css_task_iter_start(&freezer->css, 0, &it);
while ((task = css_task_iter_next(&it)))
freeze_task(task);
css_task_iter_end(&it);
}
lockdep_assert_held(&cgroup_mutex);
static void unfreeze_cgroup(struct freezer *freezer)
{
struct css_task_iter it;
struct task_struct *task;
/*
* Nothing changed? Just exit.
*/
if (cgrp->freezer.freeze == freeze)
return;
css_task_iter_start(&freezer->css, 0, &it);
while ((task = css_task_iter_next(&it)))
__thaw_task(task);
css_task_iter_end(&it);
}
cgrp->freezer.freeze = freeze;
/**
* freezer_apply_state - apply state change to a single cgroup_freezer
* @freezer: freezer to apply state change to
* @freeze: whether to freeze or unfreeze
* @state: CGROUP_FREEZING_* flag to set or clear
*
* Set or clear @state on @cgroup according to @freeze, and perform
* freezing or thawing as necessary.
/*
* Propagate changes downwards the cgroup tree.
*/
static void freezer_apply_state(struct freezer *freezer, bool freeze,
unsigned int state)
{
/* also synchronizes against task migration, see freezer_attach() */
lockdep_assert_held(&freezer_mutex);
css_for_each_descendant_pre(css, &cgrp->self) {
dsct = css->cgroup;
if (!(freezer->state & CGROUP_FREEZER_ONLINE))
return;
if (cgroup_is_dead(dsct))
continue;
if (freeze) {
if (!(freezer->state & CGROUP_FREEZING))
atomic_inc(&system_freezing_cnt);
freezer->state |= state;
freeze_cgroup(freezer);
dsct->freezer.e_freeze++;
/*
* Already frozen because of ancestor's settings?
*/
if (dsct->freezer.e_freeze > 1)
continue;
} else {
bool was_freezing = freezer->state & CGROUP_FREEZING;
freezer->state &= ~state;
dsct->freezer.e_freeze--;
/*
* Still frozen because of ancestor's settings?
*/
if (dsct->freezer.e_freeze > 0)
continue;
if (!(freezer->state & CGROUP_FREEZING)) {
if (was_freezing)
atomic_dec(&system_freezing_cnt);
freezer->state &= ~CGROUP_FROZEN;
unfreeze_cgroup(freezer);
}
WARN_ON_ONCE(dsct->freezer.e_freeze < 0);
}
}
/**
* freezer_change_state - change the freezing state of a cgroup_freezer
* @freezer: freezer of interest
* @freeze: whether to freeze or thaw
*
* Freeze or thaw @freezer according to @freeze. The operations are
* recursive - all descendants of @freezer will be affected.
/*
* Do change actual state: freeze or unfreeze.
*/
static void freezer_change_state(struct freezer *freezer, bool freeze)
{
struct cgroup_subsys_state *pos;
cgroup_do_freeze(dsct, freeze);
applied = true;
}
/*
* Update all its descendants in pre-order traversal. Each
* descendant will try to inherit its parent's FREEZING state as
* CGROUP_FREEZING_PARENT.
* Even if the actual state hasn't changed, let's notify a user.
* The state can be enforced by an ancestor cgroup: the cgroup
* can already be in the desired state or it can be locked in the
* opposite state, so that the transition will never happen.
* In both cases it's better to notify a user, that there is
* nothing to wait for.
*/
mutex_lock(&freezer_mutex);
rcu_read_lock();
css_for_each_descendant_pre(pos, &freezer->css) {
struct freezer *pos_f = css_freezer(pos);
struct freezer *parent = parent_freezer(pos_f);
if (!css_tryget_online(pos))
continue;
rcu_read_unlock();
if (pos_f == freezer)
freezer_apply_state(pos_f, freeze,
CGROUP_FREEZING_SELF);
else
freezer_apply_state(pos_f,
parent->state & CGROUP_FREEZING,
CGROUP_FREEZING_PARENT);
rcu_read_lock();
css_put(pos);
if (!applied) {
TRACE_CGROUP_PATH(notify_frozen, cgrp,
test_bit(CGRP_FROZEN, &cgrp->flags));
cgroup_file_notify(&cgrp->events_file);
}
rcu_read_unlock();
mutex_unlock(&freezer_mutex);
}
static ssize_t freezer_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
bool freeze;
buf = strstrip(buf);
if (strcmp(buf, freezer_state_strs(0)) == 0)
freeze = false;
else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0)
freeze = true;
else
return -EINVAL;
freezer_change_state(css_freezer(of_css(of)), freeze);
return nbytes;
}
static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
struct freezer *freezer = css_freezer(css);
return (bool)(freezer->state & CGROUP_FREEZING_SELF);
}
static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
struct freezer *freezer = css_freezer(css);
return (bool)(freezer->state & CGROUP_FREEZING_PARENT);
}
static struct cftype files[] = {
{
.name = "state",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = freezer_read,
.write = freezer_write,
},
{
.name = "self_freezing",
.flags = CFTYPE_NOT_ON_ROOT,
.read_u64 = freezer_self_freezing_read,
},
{
.name = "parent_freezing",
.flags = CFTYPE_NOT_ON_ROOT,
.read_u64 = freezer_parent_freezing_read,
},
{ } /* terminate */
};
struct cgroup_subsys freezer_cgrp_subsys = {
.css_alloc = freezer_css_alloc,
.css_online = freezer_css_online,
.css_offline = freezer_css_offline,
.css_free = freezer_css_free,
.attach = freezer_attach,
.fork = freezer_fork,
.legacy_cftypes = files,
};
/*
* cgroup_freezer.c - control group freezer subsystem
*
* Copyright IBM Corporation, 2007
*
* Author : Cedric Le Goater <clg@fr.ibm.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2.1 of the GNU Lesser General Public License
* as published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/cgroup.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/freezer.h>
#include <linux/seq_file.h>
#include <linux/mutex.h>
/*
* A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is
* set if "FROZEN" is written to freezer.state cgroupfs file, and cleared
* for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING
* for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of
* its ancestors has FREEZING_SELF set.
*/
enum freezer_state_flags {
CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */
CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */
CGROUP_FREEZING_PARENT = (1 << 2), /* the parent freezer is freezing */
CGROUP_FROZEN = (1 << 3), /* this and its descendants frozen */
/* mask for all FREEZING flags */
CGROUP_FREEZING = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT,
};
struct freezer {
struct cgroup_subsys_state css;
unsigned int state;
};
static DEFINE_MUTEX(freezer_mutex);
static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)
{
return css ? container_of(css, struct freezer, css) : NULL;
}
static inline struct freezer *task_freezer(struct task_struct *task)
{
return css_freezer(task_css(task, freezer_cgrp_id));
}
static struct freezer *parent_freezer(struct freezer *freezer)
{
return css_freezer(freezer->css.parent);
}
bool cgroup_freezing(struct task_struct *task)
{
bool ret;
rcu_read_lock();
ret = task_freezer(task)->state & CGROUP_FREEZING;
rcu_read_unlock();
return ret;
}
static const char *freezer_state_strs(unsigned int state)
{
if (state & CGROUP_FROZEN)
return "FROZEN";
if (state & CGROUP_FREEZING)
return "FREEZING";
return "THAWED";
};
static struct cgroup_subsys_state *
freezer_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct freezer *freezer;
freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL);
if (!freezer)
return ERR_PTR(-ENOMEM);
return &freezer->css;
}
/**
* freezer_css_online - commit creation of a freezer css
* @css: css being created
*
* We're committing to creation of @css. Mark it online and inherit
* parent's freezing state while holding both parent's and our
* freezer->lock.
*/
static int freezer_css_online(struct cgroup_subsys_state *css)
{
struct freezer *freezer = css_freezer(css);
struct freezer *parent = parent_freezer(freezer);
mutex_lock(&freezer_mutex);
freezer->state |= CGROUP_FREEZER_ONLINE;
if (parent && (parent->state & CGROUP_FREEZING)) {
freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN;
atomic_inc(&system_freezing_cnt);
}
mutex_unlock(&freezer_mutex);
return 0;
}
/**
* freezer_css_offline - initiate destruction of a freezer css
* @css: css being destroyed
*
* @css is going away. Mark it dead and decrement system_freezing_count if
* it was holding one.
*/
static void freezer_css_offline(struct cgroup_subsys_state *css)
{
struct freezer *freezer = css_freezer(css);
mutex_lock(&freezer_mutex);
if (freezer->state & CGROUP_FREEZING)
atomic_dec(&system_freezing_cnt);
freezer->state = 0;
mutex_unlock(&freezer_mutex);
}
static void freezer_css_free(struct cgroup_subsys_state *css)
{
kfree(css_freezer(css));
}
/*
* Tasks can be migrated into a different freezer anytime regardless of its
* current state. freezer_attach() is responsible for making new tasks
* conform to the current state.
*
* Freezer state changes and task migration are synchronized via
* @freezer->lock. freezer_attach() makes the new tasks conform to the
* current state and all following state changes can see the new tasks.
*/
static void freezer_attach(struct cgroup_taskset *tset)
{
struct task_struct *task;
struct cgroup_subsys_state *new_css;
mutex_lock(&freezer_mutex);
/*
* Make the new tasks conform to the current state of @new_css.
* For simplicity, when migrating any task to a FROZEN cgroup, we
* revert it to FREEZING and let update_if_frozen() determine the
* correct state later.
*
* Tasks in @tset are on @new_css but may not conform to its
* current state before executing the following - !frozen tasks may
* be visible in a FROZEN cgroup and frozen tasks in a THAWED one.
*/
cgroup_taskset_for_each(task, new_css, tset) {
struct freezer *freezer = css_freezer(new_css);
if (!(freezer->state & CGROUP_FREEZING)) {
__thaw_task(task);
} else {
freeze_task(task);
/* clear FROZEN and propagate upwards */
while (freezer && (freezer->state & CGROUP_FROZEN)) {
freezer->state &= ~CGROUP_FROZEN;
freezer = parent_freezer(freezer);
}
}
}
mutex_unlock(&freezer_mutex);
}
/**
* freezer_fork - cgroup post fork callback
* @task: a task which has just been forked
*
* @task has just been created and should conform to the current state of
* the cgroup_freezer it belongs to. This function may race against
* freezer_attach(). Losing to freezer_attach() means that we don't have
* to do anything as freezer_attach() will put @task into the appropriate
* state.
*/
static void freezer_fork(struct task_struct *task)
{
struct freezer *freezer;
/*
* The root cgroup is non-freezable, so we can skip locking the
* freezer. This is safe regardless of race with task migration.
* If we didn't race or won, skipping is obviously the right thing
* to do. If we lost and root is the new cgroup, noop is still the
* right thing to do.
*/
if (task_css_is_root(task, freezer_cgrp_id))
return;
mutex_lock(&freezer_mutex);
rcu_read_lock();
freezer = task_freezer(task);
if (freezer->state & CGROUP_FREEZING)
freeze_task(task);
rcu_read_unlock();
mutex_unlock(&freezer_mutex);
}
/**
* update_if_frozen - update whether a cgroup finished freezing
* @css: css of interest
*
* Once FREEZING is initiated, transition to FROZEN is lazily updated by
* calling this function. If the current state is FREEZING but not FROZEN,
* this function checks whether all tasks of this cgroup and the descendant
* cgroups finished freezing and, if so, sets FROZEN.
*
* The caller is responsible for grabbing RCU read lock and calling
* update_if_frozen() on all descendants prior to invoking this function.
*
* Task states and freezer state might disagree while tasks are being
* migrated into or out of @css, so we can't verify task states against
* @freezer state here. See freezer_attach() for details.
*/
static void update_if_frozen(struct cgroup_subsys_state *css)
{
struct freezer *freezer = css_freezer(css);
struct cgroup_subsys_state *pos;
struct css_task_iter it;
struct task_struct *task;
lockdep_assert_held(&freezer_mutex);
if (!(freezer->state & CGROUP_FREEZING) ||
(freezer->state & CGROUP_FROZEN))
return;
/* are all (live) children frozen? */
rcu_read_lock();
css_for_each_child(pos, css) {
struct freezer *child = css_freezer(pos);
if ((child->state & CGROUP_FREEZER_ONLINE) &&
!(child->state & CGROUP_FROZEN)) {
rcu_read_unlock();
return;
}
}
rcu_read_unlock();
/* are all tasks frozen? */
css_task_iter_start(css, 0, &it);
while ((task = css_task_iter_next(&it))) {
if (freezing(task)) {
/*
* freezer_should_skip() indicates that the task
* should be skipped when determining freezing
* completion. Consider it frozen in addition to
* the usual frozen condition.
*/
if (!frozen(task) && !freezer_should_skip(task))
goto out_iter_end;
}
}
freezer->state |= CGROUP_FROZEN;
out_iter_end:
css_task_iter_end(&it);
}
static int freezer_read(struct seq_file *m, void *v)
{
struct cgroup_subsys_state *css = seq_css(m), *pos;
mutex_lock(&freezer_mutex);
rcu_read_lock();
/* update states bottom-up */
css_for_each_descendant_post(pos, css) {
if (!css_tryget_online(pos))
continue;
rcu_read_unlock();
update_if_frozen(pos);
rcu_read_lock();
css_put(pos);
}
rcu_read_unlock();
mutex_unlock(&freezer_mutex);
seq_puts(m, freezer_state_strs(css_freezer(css)->state));
seq_putc(m, '\n');
return 0;
}
static void freeze_cgroup(struct freezer *freezer)
{
struct css_task_iter it;
struct task_struct *task;
css_task_iter_start(&freezer->css, 0, &it);
while ((task = css_task_iter_next(&it)))
freeze_task(task);
css_task_iter_end(&it);
}
static void unfreeze_cgroup(struct freezer *freezer)
{
struct css_task_iter it;
struct task_struct *task;
css_task_iter_start(&freezer->css, 0, &it);
while ((task = css_task_iter_next(&it)))
__thaw_task(task);
css_task_iter_end(&it);
}
/**
* freezer_apply_state - apply state change to a single cgroup_freezer
* @freezer: freezer to apply state change to
* @freeze: whether to freeze or unfreeze
* @state: CGROUP_FREEZING_* flag to set or clear
*
* Set or clear @state on @cgroup according to @freeze, and perform
* freezing or thawing as necessary.
*/
static void freezer_apply_state(struct freezer *freezer, bool freeze,
unsigned int state)
{
/* also synchronizes against task migration, see freezer_attach() */
lockdep_assert_held(&freezer_mutex);
if (!(freezer->state & CGROUP_FREEZER_ONLINE))
return;
if (freeze) {
if (!(freezer->state & CGROUP_FREEZING))
atomic_inc(&system_freezing_cnt);
freezer->state |= state;
freeze_cgroup(freezer);
} else {
bool was_freezing = freezer->state & CGROUP_FREEZING;
freezer->state &= ~state;
if (!(freezer->state & CGROUP_FREEZING)) {
if (was_freezing)
atomic_dec(&system_freezing_cnt);
freezer->state &= ~CGROUP_FROZEN;
unfreeze_cgroup(freezer);
}
}
}
/**
* freezer_change_state - change the freezing state of a cgroup_freezer
* @freezer: freezer of interest
* @freeze: whether to freeze or thaw
*
* Freeze or thaw @freezer according to @freeze. The operations are
* recursive - all descendants of @freezer will be affected.
*/
static void freezer_change_state(struct freezer *freezer, bool freeze)
{
struct cgroup_subsys_state *pos;
/*
* Update all its descendants in pre-order traversal. Each
* descendant will try to inherit its parent's FREEZING state as
* CGROUP_FREEZING_PARENT.
*/
mutex_lock(&freezer_mutex);
rcu_read_lock();
css_for_each_descendant_pre(pos, &freezer->css) {
struct freezer *pos_f = css_freezer(pos);
struct freezer *parent = parent_freezer(pos_f);
if (!css_tryget_online(pos))
continue;
rcu_read_unlock();
if (pos_f == freezer)
freezer_apply_state(pos_f, freeze,
CGROUP_FREEZING_SELF);
else
freezer_apply_state(pos_f,
parent->state & CGROUP_FREEZING,
CGROUP_FREEZING_PARENT);
rcu_read_lock();
css_put(pos);
}
rcu_read_unlock();
mutex_unlock(&freezer_mutex);
}
static ssize_t freezer_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
bool freeze;
buf = strstrip(buf);
if (strcmp(buf, freezer_state_strs(0)) == 0)
freeze = false;
else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0)
freeze = true;
else
return -EINVAL;
freezer_change_state(css_freezer(of_css(of)), freeze);
return nbytes;
}
static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
struct freezer *freezer = css_freezer(css);
return (bool)(freezer->state & CGROUP_FREEZING_SELF);
}
static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css,
struct cftype *cft)
{
struct freezer *freezer = css_freezer(css);
return (bool)(freezer->state & CGROUP_FREEZING_PARENT);
}
static struct cftype files[] = {
{
.name = "state",
.flags = CFTYPE_NOT_ON_ROOT,
.seq_show = freezer_read,
.write = freezer_write,
},
{
.name = "self_freezing",
.flags = CFTYPE_NOT_ON_ROOT,
.read_u64 = freezer_self_freezing_read,
},
{
.name = "parent_freezing",
.flags = CFTYPE_NOT_ON_ROOT,
.read_u64 = freezer_parent_freezing_read,
},
{ } /* terminate */
};
struct cgroup_subsys freezer_cgrp_subsys = {
.css_alloc = freezer_css_alloc,
.css_online = freezer_css_online,
.css_offline = freezer_css_offline,
.css_free = freezer_css_free,
.attach = freezer_attach,
.fork = freezer_fork,
.legacy_cftypes = files,
};
......@@ -1225,7 +1225,9 @@ static int wait_for_vfork_done(struct task_struct *child,
int killed;
freezer_do_not_count();
cgroup_enter_frozen();
killed = wait_for_completion_killable(vfork);
cgroup_leave_frozen(false);
freezer_count();
if (killed) {
......
......@@ -43,6 +43,7 @@
#include <linux/compiler.h>
#include <linux/posix-timers.h>
#include <linux/livepatch.h>
#include <linux/cgroup.h>
#define CREATE_TRACE_POINTS
#include <trace/events/signal.h>
......@@ -146,9 +147,10 @@ static inline bool has_pending_signals(sigset_t *signal, sigset_t *blocked)
static bool recalc_sigpending_tsk(struct task_struct *t)
{
if ((t->jobctl & JOBCTL_PENDING_MASK) ||
if ((t->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) ||
PENDING(&t->pending, &t->blocked) ||
PENDING(&t->signal->shared_pending, &t->blocked)) {
PENDING(&t->signal->shared_pending, &t->blocked) ||
cgroup_task_frozen(t)) {
set_tsk_thread_flag(t, TIF_SIGPENDING);
return true;
}
......@@ -2108,6 +2110,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t
preempt_disable();
read_unlock(&tasklist_lock);
preempt_enable_no_resched();
cgroup_enter_frozen();
freezable_schedule();
} else {
/*
......@@ -2286,6 +2289,7 @@ static bool do_signal_stop(int signr)
}
/* Now we don't run again until woken by SIGCONT or SIGKILL */
cgroup_enter_frozen();
freezable_schedule();
return true;
} else {
......@@ -2332,6 +2336,43 @@ static void do_jobctl_trap(void)
}
}
/**
* do_freezer_trap - handle the freezer jobctl trap
*
* Puts the task into frozen state, if only the task is not about to quit.
* In this case it drops JOBCTL_TRAP_FREEZE.
*
* CONTEXT:
* Must be called with @current->sighand->siglock held,
* which is always released before returning.
*/
static void do_freezer_trap(void)
__releases(&current->sighand->siglock)
{
/*
* If there are other trap bits pending except JOBCTL_TRAP_FREEZE,
* let's make another loop to give it a chance to be handled.
* In any case, we'll return back.
*/
if ((current->jobctl & (JOBCTL_PENDING_MASK | JOBCTL_TRAP_FREEZE)) !=
JOBCTL_TRAP_FREEZE) {
spin_unlock_irq(&current->sighand->siglock);
return;
}
/*
* Now we're sure that there is no pending fatal signal and no
* pending traps. Clear TIF_SIGPENDING to not get out of schedule()
* immediately (if there is a non-fatal signal pending), and
* put the task into sleep.
*/
__set_current_state(TASK_INTERRUPTIBLE);
clear_thread_flag(TIF_SIGPENDING);
spin_unlock_irq(&current->sighand->siglock);
cgroup_enter_frozen();
freezable_schedule();
}
static int ptrace_signal(int signr, kernel_siginfo_t *info)
{
/*
......@@ -2452,9 +2493,24 @@ bool get_signal(struct ksignal *ksig)
do_signal_stop(0))
goto relock;
if (unlikely(current->jobctl & JOBCTL_TRAP_MASK)) {
if (unlikely(current->jobctl &
(JOBCTL_TRAP_MASK | JOBCTL_TRAP_FREEZE))) {
if (current->jobctl & JOBCTL_TRAP_MASK) {
do_jobctl_trap();
spin_unlock_irq(&sighand->siglock);
} else if (current->jobctl & JOBCTL_TRAP_FREEZE)
do_freezer_trap();
goto relock;
}
/*
* If the task is leaving the frozen state, let's update
* cgroup counters and reset the frozen bit.
*/
if (unlikely(cgroup_task_frozen(current))) {
spin_unlock_irq(&sighand->siglock);
cgroup_leave_frozen(false);
goto relock;
}
......@@ -2550,6 +2606,8 @@ bool get_signal(struct ksignal *ksig)
fatal:
spin_unlock_irq(&sighand->siglock);
if (unlikely(cgroup_task_frozen(current)))
cgroup_leave_frozen(true);
/*
* Anything else is fatal, maybe with a core dump.
......
......@@ -5,8 +5,10 @@ all:
TEST_GEN_PROGS = test_memcontrol
TEST_GEN_PROGS += test_core
TEST_GEN_PROGS += test_freezer
include ../lib.mk
$(OUTPUT)/test_memcontrol: cgroup_util.c
$(OUTPUT)/test_core: cgroup_util.c
$(OUTPUT)/test_freezer: cgroup_util.c
......@@ -74,6 +74,16 @@ char *cg_name_indexed(const char *root, const char *name, int index)
return ret;
}
char *cg_control(const char *cgroup, const char *control)
{
size_t len = strlen(cgroup) + strlen(control) + 2;
char *ret = malloc(len);
snprintf(ret, len, "%s/%s", cgroup, control);
return ret;
}
int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
{
char path[PATH_MAX];
......@@ -196,7 +206,32 @@ int cg_create(const char *cgroup)
return mkdir(cgroup, 0644);
}
static int cg_killall(const char *cgroup)
int cg_wait_for_proc_count(const char *cgroup, int count)
{
char buf[10 * PAGE_SIZE] = {0};
int attempts;
char *ptr;
for (attempts = 10; attempts >= 0; attempts--) {
int nr = 0;
if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
break;
for (ptr = buf; *ptr; ptr++)
if (*ptr == '\n')
nr++;
if (nr >= count)
return 0;
usleep(100000);
}
return -1;
}
int cg_killall(const char *cgroup)
{
char buf[PAGE_SIZE];
char *ptr = buf;
......@@ -227,9 +262,7 @@ int cg_destroy(const char *cgroup)
retry:
ret = rmdir(cgroup);
if (ret && errno == EBUSY) {
ret = cg_killall(cgroup);
if (ret)
return ret;
cg_killall(cgroup);
usleep(100);
goto retry;
}
......@@ -240,6 +273,14 @@ int cg_destroy(const char *cgroup)
return ret;
}
int cg_enter(const char *cgroup, int pid)
{
char pidbuf[64];
snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
return cg_write(cgroup, "cgroup.procs", pidbuf);
}
int cg_enter_current(const char *cgroup)
{
char pidbuf[64];
......@@ -369,3 +410,12 @@ int set_oom_adj_score(int pid, int score)
close(fd);
return 0;
}
char proc_read_text(int pid, const char *item, char *buf, size_t size)
{
char path[PATH_MAX];
snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
return read_text(path, buf, size);
}
......@@ -18,6 +18,7 @@ static inline int values_close(long a, long b, int err)
extern int cg_find_unified_root(char *root, size_t len);
extern char *cg_name(const char *root, const char *name);
extern char *cg_name_indexed(const char *root, const char *name, int index);
extern char *cg_control(const char *cgroup, const char *control);
extern int cg_create(const char *cgroup);
extern int cg_destroy(const char *cgroup);
extern int cg_read(const char *cgroup, const char *control,
......@@ -32,6 +33,7 @@ extern int cg_write(const char *cgroup, const char *control, char *buf);
extern int cg_run(const char *cgroup,
int (*fn)(const char *cgroup, void *arg),
void *arg);
extern int cg_enter(const char *cgroup, int pid);
extern int cg_enter_current(const char *cgroup);
extern int cg_run_nowait(const char *cgroup,
int (*fn)(const char *cgroup, void *arg),
......@@ -41,3 +43,6 @@ extern int alloc_pagecache(int fd, size_t size);
extern int alloc_anon(const char *cgroup, void *arg);
extern int is_swap_enabled(void);
extern int set_oom_adj_score(int pid, int score);
extern int cg_wait_for_proc_count(const char *cgroup, int count);
extern int cg_killall(const char *cgroup);
extern char proc_read_text(int pid, const char *item, char *buf, size_t size);
/* SPDX-License-Identifier: GPL-2.0 */
#include <stdbool.h>
#include <linux/limits.h>
#include <sys/ptrace.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <poll.h>
#include <stdlib.h>
#include <sys/inotify.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>
#include "../kselftest.h"
#include "cgroup_util.h"
#define DEBUG
#ifdef DEBUG
#define debug(args...) fprintf(stderr, args)
#else
#define debug(args...)
#endif
/*
* Check if the cgroup is frozen by looking at the cgroup.events::frozen value.
*/
static int cg_check_frozen(const char *cgroup, bool frozen)
{
if (frozen) {
if (cg_read_strstr(cgroup, "cgroup.events", "frozen 1") != 0) {
debug("Cgroup %s isn't frozen\n", cgroup);
return -1;
}
} else {
/*
* Check the cgroup.events::frozen value.
*/
if (cg_read_strstr(cgroup, "cgroup.events", "frozen 0") != 0) {
debug("Cgroup %s is frozen\n", cgroup);
return -1;
}
}
return 0;
}
/*
* Freeze the given cgroup.
*/
static int cg_freeze_nowait(const char *cgroup, bool freeze)
{
return cg_write(cgroup, "cgroup.freeze", freeze ? "1" : "0");
}
/*
* Prepare for waiting on cgroup.events file.
*/
static int cg_prepare_for_wait(const char *cgroup)
{
int fd, ret = -1;
fd = inotify_init1(0);
if (fd == -1) {
debug("Error: inotify_init1() failed\n");
return fd;
}
ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
IN_MODIFY);
if (ret == -1) {
debug("Error: inotify_add_watch() failed\n");
close(fd);
}
return fd;
}
/*
* Wait for an event. If there are no events for 10 seconds,
* treat this an error.
*/
static int cg_wait_for(int fd)
{
int ret = -1;
struct pollfd fds = {
.fd = fd,
.events = POLLIN,
};
while (true) {
ret = poll(&fds, 1, 10000);
if (ret == -1) {
if (errno == EINTR)
continue;
debug("Error: poll() failed\n");
break;
}
if (ret > 0 && fds.revents & POLLIN) {
ret = 0;
break;
}
}
return ret;
}
/*
* Attach a task to the given cgroup and wait for a cgroup frozen event.
* All transient events (e.g. populated) are ignored.
*/
static int cg_enter_and_wait_for_frozen(const char *cgroup, int pid,
bool frozen)
{
int fd, ret = -1;
int attempts;
fd = cg_prepare_for_wait(cgroup);
if (fd < 0)
return fd;
ret = cg_enter(cgroup, pid);
if (ret)
goto out;
for (attempts = 0; attempts < 10; attempts++) {
ret = cg_wait_for(fd);
if (ret)
break;
ret = cg_check_frozen(cgroup, frozen);
if (ret)
continue;
}
out:
close(fd);
return ret;
}
/*
* Freeze the given cgroup and wait for the inotify signal.
* If there are no events in 10 seconds, treat this as an error.
* Then check that the cgroup is in the desired state.
*/
static int cg_freeze_wait(const char *cgroup, bool freeze)
{
int fd, ret = -1;
fd = cg_prepare_for_wait(cgroup);
if (fd < 0)
return fd;
ret = cg_freeze_nowait(cgroup, freeze);
if (ret) {
debug("Error: cg_freeze_nowait() failed\n");
goto out;
}
ret = cg_wait_for(fd);
if (ret)
goto out;
ret = cg_check_frozen(cgroup, freeze);
out:
close(fd);
return ret;
}
/*
* A simple process running in a sleep loop until being
* re-parented.
*/
static int child_fn(const char *cgroup, void *arg)
{
int ppid = getppid();
while (getppid() == ppid)
usleep(1000);
return getppid() == ppid;
}
/*
* A simple test for the cgroup freezer: populated the cgroup with 100
* running processes and freeze it. Then unfreeze it. Then it kills all
* processes and destroys the cgroup.
*/
static int test_cgfreezer_simple(const char *root)
{
int ret = KSFT_FAIL;
char *cgroup = NULL;
int i;
cgroup = cg_name(root, "cg_test_simple");
if (!cgroup)
goto cleanup;
if (cg_create(cgroup))
goto cleanup;
for (i = 0; i < 100; i++)
cg_run_nowait(cgroup, child_fn, NULL);
if (cg_wait_for_proc_count(cgroup, 100))
goto cleanup;
if (cg_check_frozen(cgroup, false))
goto cleanup;
if (cg_freeze_wait(cgroup, true))
goto cleanup;
if (cg_freeze_wait(cgroup, false))
goto cleanup;
ret = KSFT_PASS;
cleanup:
if (cgroup)
cg_destroy(cgroup);
free(cgroup);
return ret;
}
/*
* The test creates the following hierarchy:
* A
* / / \ \
* B E I K
* /\ |
* C D F
* |
* G
* |
* H
*
* with a process in C, H and 3 processes in K.
* Then it tries to freeze and unfreeze the whole tree.
*/
static int test_cgfreezer_tree(const char *root)
{
char *cgroup[10] = {0};
int ret = KSFT_FAIL;
int i;
cgroup[0] = cg_name(root, "cg_test_tree_A");
if (!cgroup[0])
goto cleanup;
cgroup[1] = cg_name(cgroup[0], "B");
if (!cgroup[1])
goto cleanup;
cgroup[2] = cg_name(cgroup[1], "C");
if (!cgroup[2])
goto cleanup;
cgroup[3] = cg_name(cgroup[1], "D");
if (!cgroup[3])
goto cleanup;
cgroup[4] = cg_name(cgroup[0], "E");
if (!cgroup[4])
goto cleanup;
cgroup[5] = cg_name(cgroup[4], "F");
if (!cgroup[5])
goto cleanup;
cgroup[6] = cg_name(cgroup[5], "G");
if (!cgroup[6])
goto cleanup;
cgroup[7] = cg_name(cgroup[6], "H");
if (!cgroup[7])
goto cleanup;
cgroup[8] = cg_name(cgroup[0], "I");
if (!cgroup[8])
goto cleanup;
cgroup[9] = cg_name(cgroup[0], "K");
if (!cgroup[9])
goto cleanup;
for (i = 0; i < 10; i++)
if (cg_create(cgroup[i]))
goto cleanup;
cg_run_nowait(cgroup[2], child_fn, NULL);
cg_run_nowait(cgroup[7], child_fn, NULL);
cg_run_nowait(cgroup[9], child_fn, NULL);
cg_run_nowait(cgroup[9], child_fn, NULL);
cg_run_nowait(cgroup[9], child_fn, NULL);
/*
* Wait until all child processes will enter
* corresponding cgroups.
*/
if (cg_wait_for_proc_count(cgroup[2], 1) ||
cg_wait_for_proc_count(cgroup[7], 1) ||
cg_wait_for_proc_count(cgroup[9], 3))
goto cleanup;
/*
* Freeze B.
*/
if (cg_freeze_wait(cgroup[1], true))
goto cleanup;
/*
* Freeze F.
*/
if (cg_freeze_wait(cgroup[5], true))
goto cleanup;
/*
* Freeze G.
*/
if (cg_freeze_wait(cgroup[6], true))
goto cleanup;
/*
* Check that A and E are not frozen.
*/
if (cg_check_frozen(cgroup[0], false))
goto cleanup;
if (cg_check_frozen(cgroup[4], false))
goto cleanup;
/*
* Freeze A. Check that A, B and E are frozen.
*/
if (cg_freeze_wait(cgroup[0], true))
goto cleanup;
if (cg_check_frozen(cgroup[1], true))
goto cleanup;
if (cg_check_frozen(cgroup[4], true))
goto cleanup;
/*
* Unfreeze B, F and G
*/
if (cg_freeze_nowait(cgroup[1], false))
goto cleanup;
if (cg_freeze_nowait(cgroup[5], false))
goto cleanup;
if (cg_freeze_nowait(cgroup[6], false))
goto cleanup;
/*
* Check that C and H are still frozen.
*/
if (cg_check_frozen(cgroup[2], true))
goto cleanup;
if (cg_check_frozen(cgroup[7], true))
goto cleanup;
/*
* Unfreeze A. Check that A, C and K are not frozen.
*/
if (cg_freeze_wait(cgroup[0], false))
goto cleanup;
if (cg_check_frozen(cgroup[2], false))
goto cleanup;
if (cg_check_frozen(cgroup[9], false))
goto cleanup;
ret = KSFT_PASS;
cleanup:
for (i = 9; i >= 0 && cgroup[i]; i--) {
cg_destroy(cgroup[i]);
free(cgroup[i]);
}
return ret;
}
/*
* A fork bomb emulator.
*/
static int forkbomb_fn(const char *cgroup, void *arg)
{
int ppid;
fork();
fork();
ppid = getppid();
while (getppid() == ppid)
usleep(1000);
return getppid() == ppid;
}
/*
* The test runs a fork bomb in a cgroup and tries to freeze it.
* Then it kills all processes and checks that cgroup isn't populated
* anymore.
*/
static int test_cgfreezer_forkbomb(const char *root)
{
int ret = KSFT_FAIL;
char *cgroup = NULL;
cgroup = cg_name(root, "cg_forkbomb_test");
if (!cgroup)
goto cleanup;
if (cg_create(cgroup))
goto cleanup;
cg_run_nowait(cgroup, forkbomb_fn, NULL);
usleep(100000);
if (cg_freeze_wait(cgroup, true))
goto cleanup;
if (cg_killall(cgroup))
goto cleanup;
if (cg_wait_for_proc_count(cgroup, 0))
goto cleanup;
ret = KSFT_PASS;
cleanup:
if (cgroup)
cg_destroy(cgroup);
free(cgroup);
return ret;
}
/*
* The test creates two nested cgroups, freezes the parent
* and removes the child. Then it checks that the parent cgroup
* remains frozen and it's possible to create a new child
* without unfreezing. The new child is frozen too.
*/
static int test_cgfreezer_rmdir(const char *root)
{
int ret = KSFT_FAIL;
char *parent, *child = NULL;
parent = cg_name(root, "cg_test_rmdir_A");
if (!parent)
goto cleanup;
child = cg_name(parent, "cg_test_rmdir_B");
if (!child)
goto cleanup;
if (cg_create(parent))
goto cleanup;
if (cg_create(child))
goto cleanup;
if (cg_freeze_wait(parent, true))
goto cleanup;
if (cg_destroy(child))
goto cleanup;
if (cg_check_frozen(parent, true))
goto cleanup;
if (cg_create(child))
goto cleanup;
if (cg_check_frozen(child, true))
goto cleanup;
ret = KSFT_PASS;
cleanup:
if (child)
cg_destroy(child);
free(child);
if (parent)
cg_destroy(parent);
free(parent);
return ret;
}
/*
* The test creates two cgroups: A and B, runs a process in A
* and performs several migrations:
* 1) A (running) -> B (frozen)
* 2) B (frozen) -> A (running)
* 3) A (frozen) -> B (frozen)
*
* On each step it checks the actual state of both cgroups.
*/
static int test_cgfreezer_migrate(const char *root)
{
int ret = KSFT_FAIL;
char *cgroup[2] = {0};
int pid;
cgroup[0] = cg_name(root, "cg_test_migrate_A");
if (!cgroup[0])
goto cleanup;
cgroup[1] = cg_name(root, "cg_test_migrate_B");
if (!cgroup[1])
goto cleanup;
if (cg_create(cgroup[0]))
goto cleanup;
if (cg_create(cgroup[1]))
goto cleanup;
pid = cg_run_nowait(cgroup[0], child_fn, NULL);
if (pid < 0)
goto cleanup;
if (cg_wait_for_proc_count(cgroup[0], 1))
goto cleanup;
/*
* Migrate from A (running) to B (frozen)
*/
if (cg_freeze_wait(cgroup[1], true))
goto cleanup;
if (cg_enter_and_wait_for_frozen(cgroup[1], pid, true))
goto cleanup;
if (cg_check_frozen(cgroup[0], false))
goto cleanup;
/*
* Migrate from B (frozen) to A (running)
*/
if (cg_enter_and_wait_for_frozen(cgroup[0], pid, false))
goto cleanup;
if (cg_check_frozen(cgroup[1], true))
goto cleanup;
/*
* Migrate from A (frozen) to B (frozen)
*/
if (cg_freeze_wait(cgroup[0], true))
goto cleanup;
if (cg_enter_and_wait_for_frozen(cgroup[1], pid, true))
goto cleanup;
if (cg_check_frozen(cgroup[0], true))
goto cleanup;
ret = KSFT_PASS;
cleanup:
if (cgroup[0])
cg_destroy(cgroup[0]);
free(cgroup[0]);
if (cgroup[1])
cg_destroy(cgroup[1]);
free(cgroup[1]);
return ret;
}
/*
* The test checks that ptrace works with a tracing process in a frozen cgroup.
*/
static int test_cgfreezer_ptrace(const char *root)
{
int ret = KSFT_FAIL;
char *cgroup = NULL;
siginfo_t siginfo;
int pid;
cgroup = cg_name(root, "cg_test_ptrace");
if (!cgroup)
goto cleanup;
if (cg_create(cgroup))
goto cleanup;
pid = cg_run_nowait(cgroup, child_fn, NULL);
if (pid < 0)
goto cleanup;
if (cg_wait_for_proc_count(cgroup, 1))
goto cleanup;
if (cg_freeze_wait(cgroup, true))
goto cleanup;
if (ptrace(PTRACE_SEIZE, pid, NULL, NULL))
goto cleanup;
if (ptrace(PTRACE_INTERRUPT, pid, NULL, NULL))
goto cleanup;
waitpid(pid, NULL, 0);
/*
* Cgroup has to remain frozen, however the test task
* is in traced state.
*/
if (cg_check_frozen(cgroup, true))
goto cleanup;
if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo))
goto cleanup;
if (ptrace(PTRACE_DETACH, pid, NULL, NULL))
goto cleanup;
if (cg_check_frozen(cgroup, true))
goto cleanup;
ret = KSFT_PASS;
cleanup:
if (cgroup)
cg_destroy(cgroup);
free(cgroup);
return ret;
}
/*
* Check if the process is stopped.
*/
static int proc_check_stopped(int pid)
{
char buf[PAGE_SIZE];
int len;
len = proc_read_text(pid, "stat", buf, sizeof(buf));
if (len == -1) {
debug("Can't get %d stat\n", pid);
return -1;
}
if (strstr(buf, "(test_freezer) T ") == NULL) {
debug("Process %d in the unexpected state: %s\n", pid, buf);
return -1;
}
return 0;
}
/*
* Test that it's possible to freeze a cgroup with a stopped process.
*/
static int test_cgfreezer_stopped(const char *root)
{
int pid, ret = KSFT_FAIL;
char *cgroup = NULL;
cgroup = cg_name(root, "cg_test_stopped");
if (!cgroup)
goto cleanup;
if (cg_create(cgroup))
goto cleanup;
pid = cg_run_nowait(cgroup, child_fn, NULL);
if (cg_wait_for_proc_count(cgroup, 1))
goto cleanup;
if (kill(pid, SIGSTOP))
goto cleanup;
if (cg_check_frozen(cgroup, false))
goto cleanup;
if (cg_freeze_wait(cgroup, true))
goto cleanup;
if (cg_freeze_wait(cgroup, false))
goto cleanup;
if (proc_check_stopped(pid))
goto cleanup;
ret = KSFT_PASS;
cleanup:
if (cgroup)
cg_destroy(cgroup);
free(cgroup);
return ret;
}
/*
* Test that it's possible to freeze a cgroup with a ptraced process.
*/
static int test_cgfreezer_ptraced(const char *root)
{
int pid, ret = KSFT_FAIL;
char *cgroup = NULL;
siginfo_t siginfo;
cgroup = cg_name(root, "cg_test_ptraced");
if (!cgroup)
goto cleanup;
if (cg_create(cgroup))
goto cleanup;
pid = cg_run_nowait(cgroup, child_fn, NULL);
if (cg_wait_for_proc_count(cgroup, 1))
goto cleanup;
if (ptrace(PTRACE_SEIZE, pid, NULL, NULL))
goto cleanup;
if (ptrace(PTRACE_INTERRUPT, pid, NULL, NULL))
goto cleanup;
waitpid(pid, NULL, 0);
if (cg_check_frozen(cgroup, false))
goto cleanup;
if (cg_freeze_wait(cgroup, true))
goto cleanup;
/*
* cg_check_frozen(cgroup, true) will fail here,
* because the task in in the TRACEd state.
*/
if (cg_freeze_wait(cgroup, false))
goto cleanup;
if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo))
goto cleanup;
if (ptrace(PTRACE_DETACH, pid, NULL, NULL))
goto cleanup;
ret = KSFT_PASS;
cleanup:
if (cgroup)
cg_destroy(cgroup);
free(cgroup);
return ret;
}
static int vfork_fn(const char *cgroup, void *arg)
{
int pid = vfork();
if (pid == 0)
while (true)
sleep(1);
return pid;
}
/*
* Test that it's possible to freeze a cgroup with a process,
* which called vfork() and is waiting for a child.
*/
static int test_cgfreezer_vfork(const char *root)
{
int ret = KSFT_FAIL;
char *cgroup = NULL;
cgroup = cg_name(root, "cg_test_vfork");
if (!cgroup)
goto cleanup;
if (cg_create(cgroup))
goto cleanup;
cg_run_nowait(cgroup, vfork_fn, NULL);
if (cg_wait_for_proc_count(cgroup, 2))
goto cleanup;
if (cg_freeze_wait(cgroup, true))
goto cleanup;
ret = KSFT_PASS;
cleanup:
if (cgroup)
cg_destroy(cgroup);
free(cgroup);
return ret;
}
#define T(x) { x, #x }
struct cgfreezer_test {
int (*fn)(const char *root);
const char *name;
} tests[] = {
T(test_cgfreezer_simple),
T(test_cgfreezer_tree),
T(test_cgfreezer_forkbomb),
T(test_cgfreezer_rmdir),
T(test_cgfreezer_migrate),
T(test_cgfreezer_ptrace),
T(test_cgfreezer_stopped),
T(test_cgfreezer_ptraced),
T(test_cgfreezer_vfork),
};
#undef T
int main(int argc, char *argv[])
{
char root[PATH_MAX];
int i, ret = EXIT_SUCCESS;
if (cg_find_unified_root(root, sizeof(root)))
ksft_exit_skip("cgroup v2 isn't mounted\n");
for (i = 0; i < ARRAY_SIZE(tests); i++) {
switch (tests[i].fn(root)) {
case KSFT_PASS:
ksft_test_result_pass("%s\n", tests[i].name);
break;
case KSFT_SKIP:
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
ret = EXIT_FAILURE;
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
return ret;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment