Commit fba94807 authored by Tejun Heo's avatar Tejun Heo

cgroup, memcg: move cgroup->event_list[_lock] and event callbacks into memcg

cgroup_event is being moved from cgroup core to memcg and the
implementation is already moved by the previous patch.  This patch
moves the data fields and callbacks.

* cgroup->event_list[_lock] are moved to mem_cgroup.

* cftype->[un]register_event() are moved to cgroup_event.  This makes
  it impossible for individual cftype definitions to specify their
  event callbacks.  This is worked around by simply hard-coding
  filename to event callback mapping in cgroup_write_event_control().
  This is awkward and inflexible, which is actually desirable given
  that we don't want to grow more usages of this feature.

* eventfd_ctx declaration is removed from cgroup.h, which makes
  vmpressure.h miss eventfd_ctx declaration.  Include eventfd.h from
  vmpressure.h.

v2: Use file name from dentry instead of cftype.  This will allow
    removing all cftype handling in the function.
Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Acked-by: default avatarLi Zefan <lizefan@huawei.com>
Acked-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: default avatarMichal Hocko <mhocko@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Balbir Singh <bsingharora@gmail.com>
parent b5557c4c
...@@ -29,7 +29,6 @@ struct cgroup_subsys; ...@@ -29,7 +29,6 @@ struct cgroup_subsys;
struct inode; struct inode;
struct cgroup; struct cgroup;
struct css_id; struct css_id;
struct eventfd_ctx;
extern int cgroup_init_early(void); extern int cgroup_init_early(void);
extern int cgroup_init(void); extern int cgroup_init(void);
...@@ -239,10 +238,6 @@ struct cgroup { ...@@ -239,10 +238,6 @@ struct cgroup {
struct rcu_head rcu_head; struct rcu_head rcu_head;
struct work_struct destroy_work; struct work_struct destroy_work;
/* List of events which userspace want to receive */
struct list_head event_list;
spinlock_t event_list_lock;
/* directory xattrs */ /* directory xattrs */
struct simple_xattrs xattrs; struct simple_xattrs xattrs;
}; };
...@@ -506,25 +501,6 @@ struct cftype { ...@@ -506,25 +501,6 @@ struct cftype {
int (*trigger)(struct cgroup_subsys_state *css, unsigned int event); int (*trigger)(struct cgroup_subsys_state *css, unsigned int event);
int (*release)(struct inode *inode, struct file *file); int (*release)(struct inode *inode, struct file *file);
/*
* register_event() callback will be used to add new userspace
* waiter for changes related to the cftype. Implement it if
* you want to provide this functionality. Use eventfd_signal()
* on eventfd to send notification to userspace.
*/
int (*register_event)(struct cgroup_subsys_state *css,
struct cftype *cft, struct eventfd_ctx *eventfd,
const char *args);
/*
* unregister_event() callback will be called when userspace
* closes the eventfd or on cgroup removing.
* This callback must be implemented, if you want provide
* notification functionality.
*/
void (*unregister_event)(struct cgroup_subsys_state *css,
struct cftype *cft,
struct eventfd_ctx *eventfd);
}; };
/* /*
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/cgroup.h> #include <linux/cgroup.h>
#include <linux/eventfd.h>
struct vmpressure { struct vmpressure {
unsigned long scanned; unsigned long scanned;
......
...@@ -1352,8 +1352,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) ...@@ -1352,8 +1352,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
INIT_LIST_HEAD(&cgrp->pidlists); INIT_LIST_HEAD(&cgrp->pidlists);
mutex_init(&cgrp->pidlist_mutex); mutex_init(&cgrp->pidlist_mutex);
cgrp->dummy_css.cgroup = cgrp; cgrp->dummy_css.cgroup = cgrp;
INIT_LIST_HEAD(&cgrp->event_list);
spin_lock_init(&cgrp->event_list_lock);
simple_xattrs_init(&cgrp->xattrs); simple_xattrs_init(&cgrp->xattrs);
} }
......
...@@ -248,6 +248,22 @@ struct cgroup_event { ...@@ -248,6 +248,22 @@ struct cgroup_event {
* Each of these stored in a list by the cgroup. * Each of these stored in a list by the cgroup.
*/ */
struct list_head list; struct list_head list;
/*
* register_event() callback will be used to add new userspace
* waiter for changes related to this event. Use eventfd_signal()
* on eventfd to send notification to userspace.
*/
int (*register_event)(struct cgroup_subsys_state *css,
struct cftype *cft, struct eventfd_ctx *eventfd,
const char *args);
/*
* unregister_event() callback will be called when userspace closes
* the eventfd or on cgroup removing. This callback must be set,
* if you want provide notification functionality.
*/
void (*unregister_event)(struct cgroup_subsys_state *css,
struct cftype *cft,
struct eventfd_ctx *eventfd);
/* /*
* All fields below needed to unregister event when * All fields below needed to unregister event when
* userspace closes eventfd. * userspace closes eventfd.
...@@ -362,6 +378,10 @@ struct mem_cgroup { ...@@ -362,6 +378,10 @@ struct mem_cgroup {
atomic_t numainfo_updating; atomic_t numainfo_updating;
#endif #endif
/* List of events which userspace want to receive */
struct list_head event_list;
spinlock_t event_list_lock;
struct mem_cgroup_per_node *nodeinfo[0]; struct mem_cgroup_per_node *nodeinfo[0];
/* WARNING: nodeinfo must be the last member here */ /* WARNING: nodeinfo must be the last member here */
}; };
...@@ -5992,7 +6012,7 @@ static void cgroup_event_remove(struct work_struct *work) ...@@ -5992,7 +6012,7 @@ static void cgroup_event_remove(struct work_struct *work)
remove_wait_queue(event->wqh, &event->wait); remove_wait_queue(event->wqh, &event->wait);
event->cft->unregister_event(css, event->cft, event->eventfd); event->unregister_event(css, event->cft, event->eventfd);
/* Notify userspace the event is going away. */ /* Notify userspace the event is going away. */
eventfd_signal(event->eventfd, 1); eventfd_signal(event->eventfd, 1);
...@@ -6012,7 +6032,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, ...@@ -6012,7 +6032,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
{ {
struct cgroup_event *event = container_of(wait, struct cgroup_event *event = container_of(wait,
struct cgroup_event, wait); struct cgroup_event, wait);
struct cgroup *cgrp = event->css->cgroup; struct mem_cgroup *memcg = mem_cgroup_from_css(event->css);
unsigned long flags = (unsigned long)key; unsigned long flags = (unsigned long)key;
if (flags & POLLHUP) { if (flags & POLLHUP) {
...@@ -6025,7 +6045,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, ...@@ -6025,7 +6045,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
* side will require wqh->lock via remove_wait_queue(), * side will require wqh->lock via remove_wait_queue(),
* which we hold. * which we hold.
*/ */
spin_lock(&cgrp->event_list_lock); spin_lock(&memcg->event_list_lock);
if (!list_empty(&event->list)) { if (!list_empty(&event->list)) {
list_del_init(&event->list); list_del_init(&event->list);
/* /*
...@@ -6034,7 +6054,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode, ...@@ -6034,7 +6054,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
*/ */
schedule_work(&event->remove); schedule_work(&event->remove);
} }
spin_unlock(&cgrp->event_list_lock); spin_unlock(&memcg->event_list_lock);
} }
return 0; return 0;
...@@ -6059,12 +6079,13 @@ static void cgroup_event_ptable_queue_proc(struct file *file, ...@@ -6059,12 +6079,13 @@ static void cgroup_event_ptable_queue_proc(struct file *file,
static int cgroup_write_event_control(struct cgroup_subsys_state *css, static int cgroup_write_event_control(struct cgroup_subsys_state *css,
struct cftype *cft, const char *buffer) struct cftype *cft, const char *buffer)
{ {
struct cgroup *cgrp = css->cgroup; struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct cgroup_event *event; struct cgroup_event *event;
struct cgroup_subsys_state *cfile_css; struct cgroup_subsys_state *cfile_css;
unsigned int efd, cfd; unsigned int efd, cfd;
struct fd efile; struct fd efile;
struct fd cfile; struct fd cfile;
const char *name;
char *endp; char *endp;
int ret; int ret;
...@@ -6118,6 +6139,31 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css, ...@@ -6118,6 +6139,31 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css,
goto out_put_cfile; goto out_put_cfile;
} }
/*
* Determine the event callbacks and set them in @event. This used
* to be done via struct cftype but cgroup core no longer knows
* about these events. The following is crude but the whole thing
* is for compatibility anyway.
*/
name = cfile.file->f_dentry->d_name.name;
if (!strcmp(name, "memory.usage_in_bytes")) {
event->register_event = mem_cgroup_usage_register_event;
event->unregister_event = mem_cgroup_usage_unregister_event;
} else if (!strcmp(name, "memory.oom_control")) {
event->register_event = mem_cgroup_oom_register_event;
event->unregister_event = mem_cgroup_oom_unregister_event;
} else if (!strcmp(name, "memory.pressure_level")) {
event->register_event = vmpressure_register_event;
event->unregister_event = vmpressure_unregister_event;
} else if (!strcmp(name, "memory.memsw.usage_in_bytes")) {
event->register_event = mem_cgroup_usage_register_event;
event->unregister_event = mem_cgroup_usage_unregister_event;
} else {
ret = -EINVAL;
goto out_put_cfile;
}
/* /*
* Verify @cfile should belong to @css. Also, remaining events are * Verify @cfile should belong to @css. Also, remaining events are
* automatically removed on cgroup destruction but the removal is * automatically removed on cgroup destruction but the removal is
...@@ -6135,21 +6181,15 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css, ...@@ -6135,21 +6181,15 @@ static int cgroup_write_event_control(struct cgroup_subsys_state *css,
if (ret) if (ret)
goto out_put_cfile; goto out_put_cfile;
if (!event->cft->register_event || !event->cft->unregister_event) { ret = event->register_event(css, event->cft, event->eventfd, buffer);
ret = -EINVAL;
goto out_put_css;
}
ret = event->cft->register_event(css, event->cft,
event->eventfd, buffer);
if (ret) if (ret)
goto out_put_css; goto out_put_css;
efile.file->f_op->poll(efile.file, &event->pt); efile.file->f_op->poll(efile.file, &event->pt);
spin_lock(&cgrp->event_list_lock); spin_lock(&memcg->event_list_lock);
list_add(&event->list, &cgrp->event_list); list_add(&event->list, &memcg->event_list);
spin_unlock(&cgrp->event_list_lock); spin_unlock(&memcg->event_list_lock);
fdput(cfile); fdput(cfile);
fdput(efile); fdput(efile);
...@@ -6175,8 +6215,6 @@ static struct cftype mem_cgroup_files[] = { ...@@ -6175,8 +6215,6 @@ static struct cftype mem_cgroup_files[] = {
.name = "usage_in_bytes", .name = "usage_in_bytes",
.private = MEMFILE_PRIVATE(_MEM, RES_USAGE), .private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
.read = mem_cgroup_read, .read = mem_cgroup_read,
.register_event = mem_cgroup_usage_register_event,
.unregister_event = mem_cgroup_usage_unregister_event,
}, },
{ {
.name = "max_usage_in_bytes", .name = "max_usage_in_bytes",
...@@ -6236,14 +6274,10 @@ static struct cftype mem_cgroup_files[] = { ...@@ -6236,14 +6274,10 @@ static struct cftype mem_cgroup_files[] = {
.name = "oom_control", .name = "oom_control",
.read_map = mem_cgroup_oom_control_read, .read_map = mem_cgroup_oom_control_read,
.write_u64 = mem_cgroup_oom_control_write, .write_u64 = mem_cgroup_oom_control_write,
.register_event = mem_cgroup_oom_register_event,
.unregister_event = mem_cgroup_oom_unregister_event,
.private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL), .private = MEMFILE_PRIVATE(_OOM_TYPE, OOM_CONTROL),
}, },
{ {
.name = "pressure_level", .name = "pressure_level",
.register_event = vmpressure_register_event,
.unregister_event = vmpressure_unregister_event,
}, },
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
{ {
...@@ -6291,8 +6325,6 @@ static struct cftype memsw_cgroup_files[] = { ...@@ -6291,8 +6325,6 @@ static struct cftype memsw_cgroup_files[] = {
.name = "memsw.usage_in_bytes", .name = "memsw.usage_in_bytes",
.private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
.read = mem_cgroup_read, .read = mem_cgroup_read,
.register_event = mem_cgroup_usage_register_event,
.unregister_event = mem_cgroup_usage_unregister_event,
}, },
{ {
.name = "memsw.max_usage_in_bytes", .name = "memsw.max_usage_in_bytes",
...@@ -6483,6 +6515,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) ...@@ -6483,6 +6515,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
mutex_init(&memcg->thresholds_lock); mutex_init(&memcg->thresholds_lock);
spin_lock_init(&memcg->move_lock); spin_lock_init(&memcg->move_lock);
vmpressure_init(&memcg->vmpressure); vmpressure_init(&memcg->vmpressure);
INIT_LIST_HEAD(&memcg->event_list);
spin_lock_init(&memcg->event_list_lock);
return &memcg->css; return &memcg->css;
...@@ -6555,7 +6589,6 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg) ...@@ -6555,7 +6589,6 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg)
static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
{ {
struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup *memcg = mem_cgroup_from_css(css);
struct cgroup *cgrp = css->cgroup;
struct cgroup_event *event, *tmp; struct cgroup_event *event, *tmp;
/* /*
...@@ -6563,12 +6596,12 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) ...@@ -6563,12 +6596,12 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
* Notify userspace about cgroup removing only after rmdir of cgroup * Notify userspace about cgroup removing only after rmdir of cgroup
* directory to avoid race between userspace and kernelspace. * directory to avoid race between userspace and kernelspace.
*/ */
spin_lock(&cgrp->event_list_lock); spin_lock(&memcg->event_list_lock);
list_for_each_entry_safe(event, tmp, &cgrp->event_list, list) { list_for_each_entry_safe(event, tmp, &memcg->event_list, list) {
list_del_init(&event->list); list_del_init(&event->list);
schedule_work(&event->remove); schedule_work(&event->remove);
} }
spin_unlock(&cgrp->event_list_lock); spin_unlock(&memcg->event_list_lock);
kmem_cgroup_css_offline(memcg); kmem_cgroup_css_offline(memcg);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment