Commit 12d27107 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

memcg: fix split_huge_page_refcounts()

This patch started off as a cleanup: __split_huge_page_refcounts() has to
cope with two scenarios, when the hugepage being split is already on LRU,
and when it is not; but why does it have to split that accounting across
three different sites?  Consolidate it in lru_add_page_tail(), handling
evictable and unevictable alike, and use standard add_page_to_lru_list()
when accounting is needed (when the head is not yet on LRU).

But a recent regression in -next, I guess the removal of PageCgroupAcctLRU
test from mem_cgroup_split_huge_fixup(), makes this now a necessary fix:
under load, the MEM_CGROUP_ZSTAT count was wrapping to a huge number,
messing up reclaim calculations and causing a freeze at rmdir of cgroup.

Add a VM_BUG_ON to mem_cgroup_lru_del_list() when we're about to wrap that
count - this has not been the only such incident.  Document that
lru_add_page_tail() is for Transparent HugePages by #ifdef around it.
Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 0cee34fd
...@@ -1229,7 +1229,6 @@ static void __split_huge_page_refcount(struct page *page) ...@@ -1229,7 +1229,6 @@ static void __split_huge_page_refcount(struct page *page)
{ {
int i; int i;
struct zone *zone = page_zone(page); struct zone *zone = page_zone(page);
int zonestat;
int tail_count = 0; int tail_count = 0;
/* prevent PageLRU to go away from under us, and freeze lru stats */ /* prevent PageLRU to go away from under us, and freeze lru stats */
...@@ -1317,15 +1316,6 @@ static void __split_huge_page_refcount(struct page *page) ...@@ -1317,15 +1316,6 @@ static void __split_huge_page_refcount(struct page *page)
__dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES); __dec_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
__mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
/*
* A hugepage counts for HPAGE_PMD_NR pages on the LRU statistics,
* so adjust those appropriately if this page is on the LRU.
*/
if (PageLRU(page)) {
zonestat = NR_LRU_BASE + page_lru(page);
__mod_zone_page_state(zone, zonestat, -(HPAGE_PMD_NR-1));
}
ClearPageCompound(page); ClearPageCompound(page);
compound_unlock(page); compound_unlock(page);
spin_unlock_irq(&zone->lru_lock); spin_unlock_irq(&zone->lru_lock);
......
...@@ -1071,6 +1071,7 @@ void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru) ...@@ -1071,6 +1071,7 @@ void mem_cgroup_lru_del_list(struct page *page, enum lru_list lru)
VM_BUG_ON(!memcg); VM_BUG_ON(!memcg);
mz = page_cgroup_zoneinfo(memcg, page); mz = page_cgroup_zoneinfo(memcg, page);
/* huge page split is done under lru_lock. so, we have no races. */ /* huge page split is done under lru_lock. so, we have no races. */
VM_BUG_ON(MEM_CGROUP_ZSTAT(mz, lru) < (1 << compound_order(page)));
MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page); MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page);
} }
...@@ -2465,9 +2466,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, ...@@ -2465,9 +2466,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
void mem_cgroup_split_huge_fixup(struct page *head) void mem_cgroup_split_huge_fixup(struct page *head)
{ {
struct page_cgroup *head_pc = lookup_page_cgroup(head); struct page_cgroup *head_pc = lookup_page_cgroup(head);
struct mem_cgroup_per_zone *mz;
struct page_cgroup *pc; struct page_cgroup *pc;
enum lru_list lru;
int i; int i;
if (mem_cgroup_disabled()) if (mem_cgroup_disabled())
...@@ -2478,15 +2477,8 @@ void mem_cgroup_split_huge_fixup(struct page *head) ...@@ -2478,15 +2477,8 @@ void mem_cgroup_split_huge_fixup(struct page *head)
smp_wmb();/* see __commit_charge() */ smp_wmb();/* see __commit_charge() */
pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
} }
/*
* Tail pages will be added to LRU.
* We hold lru_lock,then,reduce counter directly.
*/
lru = page_lru(head);
mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head);
MEM_CGROUP_ZSTAT(mz, lru) -= HPAGE_PMD_NR - 1;
} }
#endif #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/** /**
* mem_cgroup_move_account - move account of the page * mem_cgroup_move_account - move account of the page
......
...@@ -650,6 +650,7 @@ void __pagevec_release(struct pagevec *pvec) ...@@ -650,6 +650,7 @@ void __pagevec_release(struct pagevec *pvec)
EXPORT_SYMBOL(__pagevec_release); EXPORT_SYMBOL(__pagevec_release);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* used by __split_huge_page_refcount() */ /* used by __split_huge_page_refcount() */
void lru_add_page_tail(struct zone* zone, void lru_add_page_tail(struct zone* zone,
struct page *page, struct page *page_tail) struct page *page, struct page *page_tail)
...@@ -666,8 +667,6 @@ void lru_add_page_tail(struct zone* zone, ...@@ -666,8 +667,6 @@ void lru_add_page_tail(struct zone* zone,
SetPageLRU(page_tail); SetPageLRU(page_tail);
if (page_evictable(page_tail, NULL)) { if (page_evictable(page_tail, NULL)) {
struct lruvec *lruvec;
if (PageActive(page)) { if (PageActive(page)) {
SetPageActive(page_tail); SetPageActive(page_tail);
active = 1; active = 1;
...@@ -677,18 +676,28 @@ void lru_add_page_tail(struct zone* zone, ...@@ -677,18 +676,28 @@ void lru_add_page_tail(struct zone* zone,
lru = LRU_INACTIVE_ANON; lru = LRU_INACTIVE_ANON;
} }
update_page_reclaim_stat(zone, page_tail, file, active); update_page_reclaim_stat(zone, page_tail, file, active);
lruvec = mem_cgroup_lru_add_list(zone, page_tail, lru);
if (likely(PageLRU(page)))
list_add(&page_tail->lru, page->lru.prev);
else
list_add(&page_tail->lru, lruvec->lists[lru].prev);
__mod_zone_page_state(zone, NR_LRU_BASE + lru,
hpage_nr_pages(page_tail));
} else { } else {
SetPageUnevictable(page_tail); SetPageUnevictable(page_tail);
add_page_to_lru_list(zone, page_tail, LRU_UNEVICTABLE); lru = LRU_UNEVICTABLE;
}
if (likely(PageLRU(page)))
list_add_tail(&page_tail->lru, &page->lru);
else {
struct list_head *list_head;
/*
* Head page has not yet been counted, as an hpage,
* so we must account for each subpage individually.
*
* Use the standard add function to put page_tail on the list,
* but then correct its position so they all end up in order.
*/
add_page_to_lru_list(zone, page_tail, lru);
list_head = page_tail->lru.prev;
list_move_tail(&page_tail->lru, list_head);
} }
} }
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
static void ____pagevec_lru_add_fn(struct page *page, void *arg) static void ____pagevec_lru_add_fn(struct page *page, void *arg)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment