Commit e8ea14cc authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds

mm: memcontrol: take a css reference for each charged page

Charges currently pin the css indirectly by playing tricks during
css_offline(): user pages stall the offlining process until all of them
have been reparented, whereas kmemcg acquires a keep-alive reference if
outstanding kernel pages are detected at that point.

In preparation for removing all this complexity, make the pinning explicit
and acquire a css references for every charged page.
Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Reviewed-by: default avatarVladimir Davydov <vdavydov@parallels.com>
Acked-by: default avatarMichal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 5ac8fb31
...@@ -112,6 +112,19 @@ static inline void css_get(struct cgroup_subsys_state *css) ...@@ -112,6 +112,19 @@ static inline void css_get(struct cgroup_subsys_state *css)
percpu_ref_get(&css->refcnt); percpu_ref_get(&css->refcnt);
} }
/**
* css_get_many - obtain references on the specified css
* @css: target css
* @n: number of references to get
*
* The caller must already have a reference.
*/
static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
{
if (!(css->flags & CSS_NO_REF))
percpu_ref_get_many(&css->refcnt, n);
}
/** /**
* css_tryget - try to obtain a reference on the specified css * css_tryget - try to obtain a reference on the specified css
* @css: target css * @css: target css
...@@ -159,6 +172,19 @@ static inline void css_put(struct cgroup_subsys_state *css) ...@@ -159,6 +172,19 @@ static inline void css_put(struct cgroup_subsys_state *css)
percpu_ref_put(&css->refcnt); percpu_ref_put(&css->refcnt);
} }
/**
* css_put_many - put css references
* @css: target css
* @n: number of references to put
*
* Put references obtained via css_get() and css_tryget_online().
*/
static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
{
if (!(css->flags & CSS_NO_REF))
percpu_ref_put_many(&css->refcnt, n);
}
/* bits in struct cgroup flags field */ /* bits in struct cgroup flags field */
enum { enum {
/* Control Group requires release notifications to userspace */ /* Control Group requires release notifications to userspace */
......
...@@ -147,27 +147,41 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref, ...@@ -147,27 +147,41 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
} }
/** /**
* percpu_ref_get - increment a percpu refcount * percpu_ref_get_many - increment a percpu refcount
* @ref: percpu_ref to get * @ref: percpu_ref to get
* @nr: number of references to get
* *
* Analagous to atomic_long_inc(). * Analogous to atomic_long_add().
* *
* This function is safe to call as long as @ref is between init and exit. * This function is safe to call as long as @ref is between init and exit.
*/ */
static inline void percpu_ref_get(struct percpu_ref *ref) static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr)
{ {
unsigned long __percpu *percpu_count; unsigned long __percpu *percpu_count;
rcu_read_lock_sched(); rcu_read_lock_sched();
if (__ref_is_percpu(ref, &percpu_count)) if (__ref_is_percpu(ref, &percpu_count))
this_cpu_inc(*percpu_count); this_cpu_add(*percpu_count, nr);
else else
atomic_long_inc(&ref->count); atomic_long_add(nr, &ref->count);
rcu_read_unlock_sched(); rcu_read_unlock_sched();
} }
/**
* percpu_ref_get - increment a percpu refcount
* @ref: percpu_ref to get
*
* Analagous to atomic_long_inc().
*
* This function is safe to call as long as @ref is between init and exit.
*/
static inline void percpu_ref_get(struct percpu_ref *ref)
{
percpu_ref_get_many(ref, 1);
}
/** /**
* percpu_ref_tryget - try to increment a percpu refcount * percpu_ref_tryget - try to increment a percpu refcount
* @ref: percpu_ref to try-get * @ref: percpu_ref to try-get
...@@ -231,28 +245,43 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) ...@@ -231,28 +245,43 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
} }
/** /**
* percpu_ref_put - decrement a percpu refcount * percpu_ref_put_many - decrement a percpu refcount
* @ref: percpu_ref to put * @ref: percpu_ref to put
* @nr: number of references to put
* *
* Decrement the refcount, and if 0, call the release function (which was passed * Decrement the refcount, and if 0, call the release function (which was passed
* to percpu_ref_init()) * to percpu_ref_init())
* *
* This function is safe to call as long as @ref is between init and exit. * This function is safe to call as long as @ref is between init and exit.
*/ */
static inline void percpu_ref_put(struct percpu_ref *ref) static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr)
{ {
unsigned long __percpu *percpu_count; unsigned long __percpu *percpu_count;
rcu_read_lock_sched(); rcu_read_lock_sched();
if (__ref_is_percpu(ref, &percpu_count)) if (__ref_is_percpu(ref, &percpu_count))
this_cpu_dec(*percpu_count); this_cpu_sub(*percpu_count, nr);
else if (unlikely(atomic_long_dec_and_test(&ref->count))) else if (unlikely(atomic_long_sub_and_test(nr, &ref->count)))
ref->release(ref); ref->release(ref);
rcu_read_unlock_sched(); rcu_read_unlock_sched();
} }
/**
* percpu_ref_put - decrement a percpu refcount
* @ref: percpu_ref to put
*
* Decrement the refcount, and if 0, call the release function (which was passed
* to percpu_ref_init())
*
* This function is safe to call as long as @ref is between init and exit.
*/
static inline void percpu_ref_put(struct percpu_ref *ref)
{
percpu_ref_put_many(ref, 1);
}
/** /**
* percpu_ref_is_zero - test whether a percpu refcount reached zero * percpu_ref_is_zero - test whether a percpu refcount reached zero
* @ref: percpu_ref to test * @ref: percpu_ref to test
......
...@@ -2273,6 +2273,7 @@ static void drain_stock(struct memcg_stock_pcp *stock) ...@@ -2273,6 +2273,7 @@ static void drain_stock(struct memcg_stock_pcp *stock)
page_counter_uncharge(&old->memory, stock->nr_pages); page_counter_uncharge(&old->memory, stock->nr_pages);
if (do_swap_account) if (do_swap_account)
page_counter_uncharge(&old->memsw, stock->nr_pages); page_counter_uncharge(&old->memsw, stock->nr_pages);
css_put_many(&old->css, stock->nr_pages);
stock->nr_pages = 0; stock->nr_pages = 0;
} }
stock->cached = NULL; stock->cached = NULL;
...@@ -2530,6 +2531,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, ...@@ -2530,6 +2531,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
return -EINTR; return -EINTR;
done_restock: done_restock:
css_get_many(&memcg->css, batch);
if (batch > nr_pages) if (batch > nr_pages)
refill_stock(memcg, batch - nr_pages); refill_stock(memcg, batch - nr_pages);
done: done:
...@@ -2544,6 +2546,8 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) ...@@ -2544,6 +2546,8 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
page_counter_uncharge(&memcg->memory, nr_pages); page_counter_uncharge(&memcg->memory, nr_pages);
if (do_swap_account) if (do_swap_account)
page_counter_uncharge(&memcg->memsw, nr_pages); page_counter_uncharge(&memcg->memsw, nr_pages);
css_put_many(&memcg->css, nr_pages);
} }
/* /*
...@@ -2739,6 +2743,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, ...@@ -2739,6 +2743,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
page_counter_charge(&memcg->memory, nr_pages); page_counter_charge(&memcg->memory, nr_pages);
if (do_swap_account) if (do_swap_account)
page_counter_charge(&memcg->memsw, nr_pages); page_counter_charge(&memcg->memsw, nr_pages);
css_get_many(&memcg->css, nr_pages);
ret = 0; ret = 0;
} else if (ret) } else if (ret)
page_counter_uncharge(&memcg->kmem, nr_pages); page_counter_uncharge(&memcg->kmem, nr_pages);
...@@ -2754,8 +2759,10 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg, ...@@ -2754,8 +2759,10 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg,
page_counter_uncharge(&memcg->memsw, nr_pages); page_counter_uncharge(&memcg->memsw, nr_pages);
/* Not down to 0 */ /* Not down to 0 */
if (page_counter_uncharge(&memcg->kmem, nr_pages)) if (page_counter_uncharge(&memcg->kmem, nr_pages)) {
css_put_many(&memcg->css, nr_pages);
return; return;
}
/* /*
* Releases a reference taken in kmem_cgroup_css_offline in case * Releases a reference taken in kmem_cgroup_css_offline in case
...@@ -2767,6 +2774,8 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg, ...@@ -2767,6 +2774,8 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg,
*/ */
if (memcg_kmem_test_and_clear_dead(memcg)) if (memcg_kmem_test_and_clear_dead(memcg))
css_put(&memcg->css); css_put(&memcg->css);
css_put_many(&memcg->css, nr_pages);
} }
/* /*
...@@ -3394,10 +3403,13 @@ static int mem_cgroup_move_parent(struct page *page, ...@@ -3394,10 +3403,13 @@ static int mem_cgroup_move_parent(struct page *page,
ret = mem_cgroup_move_account(page, nr_pages, ret = mem_cgroup_move_account(page, nr_pages,
pc, child, parent); pc, child, parent);
if (!ret) { if (!ret) {
if (!mem_cgroup_is_root(parent))
css_get_many(&parent->css, nr_pages);
/* Take charge off the local counters */ /* Take charge off the local counters */
page_counter_cancel(&child->memory, nr_pages); page_counter_cancel(&child->memory, nr_pages);
if (do_swap_account) if (do_swap_account)
page_counter_cancel(&child->memsw, nr_pages); page_counter_cancel(&child->memsw, nr_pages);
css_put_many(&child->css, nr_pages);
} }
if (nr_pages > 1) if (nr_pages > 1)
...@@ -5767,7 +5779,6 @@ static void __mem_cgroup_clear_mc(void) ...@@ -5767,7 +5779,6 @@ static void __mem_cgroup_clear_mc(void)
{ {
struct mem_cgroup *from = mc.from; struct mem_cgroup *from = mc.from;
struct mem_cgroup *to = mc.to; struct mem_cgroup *to = mc.to;
int i;
/* we must uncharge all the leftover precharges from mc.to */ /* we must uncharge all the leftover precharges from mc.to */
if (mc.precharge) { if (mc.precharge) {
...@@ -5795,8 +5806,7 @@ static void __mem_cgroup_clear_mc(void) ...@@ -5795,8 +5806,7 @@ static void __mem_cgroup_clear_mc(void)
if (!mem_cgroup_is_root(mc.to)) if (!mem_cgroup_is_root(mc.to))
page_counter_uncharge(&mc.to->memory, mc.moved_swap); page_counter_uncharge(&mc.to->memory, mc.moved_swap);
for (i = 0; i < mc.moved_swap; i++) css_put_many(&mc.from->css, mc.moved_swap);
css_put(&mc.from->css);
/* we've already done css_get(mc.to) */ /* we've already done css_get(mc.to) */
mc.moved_swap = 0; mc.moved_swap = 0;
...@@ -6343,6 +6353,9 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, ...@@ -6343,6 +6353,9 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
__this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file); __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
memcg_check_events(memcg, dummy_page); memcg_check_events(memcg, dummy_page);
local_irq_restore(flags); local_irq_restore(flags);
if (!mem_cgroup_is_root(memcg))
css_put_many(&memcg->css, max(nr_mem, nr_memsw));
} }
static void uncharge_list(struct list_head *page_list) static void uncharge_list(struct list_head *page_list)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment