Commit 312734c0 authored by KAMEZAWA Hiroyuki's avatar KAMEZAWA Hiroyuki Committed by Linus Torvalds

memcg: remove PCG_MOVE_LOCK flag from page_cgroup

PCG_MOVE_LOCK is used for bit spinlock to avoid race between overwriting
pc->mem_cgroup and page statistics accounting per memcg.  This lock helps
to avoid the race but the race is very rare because moving tasks between
cgroup is not a usual job.  So, it seems using 1bit per page is too
costly.

This patch changes this lock as per-memcg spinlock and removes
PCG_MOVE_LOCK.

If smaller lock is required, we'll be able to add some hashes but I'd like
to start from this.
Signed-off-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: default avatarGreg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Ying Han <yinghan@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 619d094b
...@@ -7,7 +7,6 @@ enum { ...@@ -7,7 +7,6 @@ enum {
PCG_USED, /* this object is in use. */ PCG_USED, /* this object is in use. */
PCG_MIGRATION, /* under page migration */ PCG_MIGRATION, /* under page migration */
/* flags for mem_cgroup and file and I/O status */ /* flags for mem_cgroup and file and I/O status */
PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
PCG_FILE_MAPPED, /* page is accounted as "mapped" */ PCG_FILE_MAPPED, /* page is accounted as "mapped" */
__NR_PCG_FLAGS, __NR_PCG_FLAGS,
}; };
...@@ -89,24 +88,6 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc) ...@@ -89,24 +88,6 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc)
bit_spin_unlock(PCG_LOCK, &pc->flags); bit_spin_unlock(PCG_LOCK, &pc->flags);
} }
static inline void move_lock_page_cgroup(struct page_cgroup *pc,
unsigned long *flags)
{
/*
* We know updates to pc->flags of page cache's stats are from both of
* usual context or IRQ context. Disable IRQ to avoid deadlock.
*/
local_irq_save(*flags);
bit_spin_lock(PCG_MOVE_LOCK, &pc->flags);
}
static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
unsigned long *flags)
{
bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags);
local_irq_restore(*flags);
}
#else /* CONFIG_CGROUP_MEM_RES_CTLR */ #else /* CONFIG_CGROUP_MEM_RES_CTLR */
struct page_cgroup; struct page_cgroup;
......
...@@ -300,6 +300,8 @@ struct mem_cgroup { ...@@ -300,6 +300,8 @@ struct mem_cgroup {
* set > 0 if pages under this cgroup are moving to other cgroup. * set > 0 if pages under this cgroup are moving to other cgroup.
*/ */
atomic_t moving_account; atomic_t moving_account;
/* taken only while moving_account > 0 */
spinlock_t move_lock;
/* /*
* percpu counter. * percpu counter.
*/ */
...@@ -1376,6 +1378,24 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg) ...@@ -1376,6 +1378,24 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
return false; return false;
} }
/*
* Take this lock when
* - a code tries to modify page's memcg while it's USED.
* - a code tries to modify page state accounting in a memcg.
* see mem_cgroup_stealed(), too.
*/
static void move_lock_mem_cgroup(struct mem_cgroup *memcg,
unsigned long *flags)
{
spin_lock_irqsave(&memcg->move_lock, *flags);
}
static void move_unlock_mem_cgroup(struct mem_cgroup *memcg,
unsigned long *flags)
{
spin_unlock_irqrestore(&memcg->move_lock, *flags);
}
/** /**
* mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode. * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode.
* @memcg: The memory cgroup that went over limit * @memcg: The memory cgroup that went over limit
...@@ -1900,7 +1920,7 @@ void mem_cgroup_update_page_stat(struct page *page, ...@@ -1900,7 +1920,7 @@ void mem_cgroup_update_page_stat(struct page *page,
if (mem_cgroup_disabled()) if (mem_cgroup_disabled())
return; return;
again:
rcu_read_lock(); rcu_read_lock();
memcg = pc->mem_cgroup; memcg = pc->mem_cgroup;
if (unlikely(!memcg || !PageCgroupUsed(pc))) if (unlikely(!memcg || !PageCgroupUsed(pc)))
...@@ -1908,11 +1928,13 @@ void mem_cgroup_update_page_stat(struct page *page, ...@@ -1908,11 +1928,13 @@ void mem_cgroup_update_page_stat(struct page *page,
/* pc->mem_cgroup is unstable ? */ /* pc->mem_cgroup is unstable ? */
if (unlikely(mem_cgroup_stealed(memcg))) { if (unlikely(mem_cgroup_stealed(memcg))) {
/* take a lock against to access pc->mem_cgroup */ /* take a lock against to access pc->mem_cgroup */
move_lock_page_cgroup(pc, &flags); move_lock_mem_cgroup(memcg, &flags);
if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
move_unlock_mem_cgroup(memcg, &flags);
rcu_read_unlock();
goto again;
}
need_unlock = true; need_unlock = true;
memcg = pc->mem_cgroup;
if (!memcg || !PageCgroupUsed(pc))
goto out;
} }
switch (idx) { switch (idx) {
...@@ -1931,7 +1953,7 @@ void mem_cgroup_update_page_stat(struct page *page, ...@@ -1931,7 +1953,7 @@ void mem_cgroup_update_page_stat(struct page *page,
out: out:
if (unlikely(need_unlock)) if (unlikely(need_unlock))
move_unlock_page_cgroup(pc, &flags); move_unlock_mem_cgroup(memcg, &flags);
rcu_read_unlock(); rcu_read_unlock();
} }
...@@ -2500,8 +2522,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, ...@@ -2500,8 +2522,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\ #define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MIGRATION))
(1 << PCG_MIGRATION))
/* /*
* Because tail pages are not marked as "used", set it. We're under * Because tail pages are not marked as "used", set it. We're under
* zone->lru_lock, 'splitting on pmd' and compound_lock. * zone->lru_lock, 'splitting on pmd' and compound_lock.
...@@ -2572,7 +2593,7 @@ static int mem_cgroup_move_account(struct page *page, ...@@ -2572,7 +2593,7 @@ static int mem_cgroup_move_account(struct page *page,
if (!PageCgroupUsed(pc) || pc->mem_cgroup != from) if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
goto unlock; goto unlock;
move_lock_page_cgroup(pc, &flags); move_lock_mem_cgroup(from, &flags);
if (PageCgroupFileMapped(pc)) { if (PageCgroupFileMapped(pc)) {
/* Update mapped_file data for mem_cgroup */ /* Update mapped_file data for mem_cgroup */
...@@ -2596,7 +2617,7 @@ static int mem_cgroup_move_account(struct page *page, ...@@ -2596,7 +2617,7 @@ static int mem_cgroup_move_account(struct page *page,
* guaranteed that "to" is never removed. So, we don't check rmdir * guaranteed that "to" is never removed. So, we don't check rmdir
* status here. * status here.
*/ */
move_unlock_page_cgroup(pc, &flags); move_unlock_mem_cgroup(from, &flags);
ret = 0; ret = 0;
unlock: unlock:
unlock_page_cgroup(pc); unlock_page_cgroup(pc);
...@@ -4971,6 +4992,7 @@ mem_cgroup_create(struct cgroup *cont) ...@@ -4971,6 +4992,7 @@ mem_cgroup_create(struct cgroup *cont)
atomic_set(&memcg->refcnt, 1); atomic_set(&memcg->refcnt, 1);
memcg->move_charge_at_immigrate = 0; memcg->move_charge_at_immigrate = 0;
mutex_init(&memcg->thresholds_lock); mutex_init(&memcg->thresholds_lock);
spin_lock_init(&memcg->move_lock);
return &memcg->css; return &memcg->css;
free_out: free_out:
__mem_cgroup_free(memcg); __mem_cgroup_free(memcg);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment