Commit 747db954 authored by Johannes Weiner's avatar Johannes Weiner Committed by Linus Torvalds

mm: memcontrol: use page lists for uncharge batching

Pages are now uncharged at release time, and all sources of batched
uncharges operate on lists of pages.  Directly use those lists, and
get rid of the per-task batching state.

This also batches statistics accounting, in addition to the res
counter charges, to reduce IRQ-disabling and re-enabling.
Signed-off-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Acked-by: default avatarMichal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 0a31bc97
...@@ -59,12 +59,8 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm, ...@@ -59,12 +59,8 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
bool lrucare); bool lrucare);
void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg); void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg);
void mem_cgroup_uncharge(struct page *page); void mem_cgroup_uncharge(struct page *page);
void mem_cgroup_uncharge_list(struct list_head *page_list);
/* Batched uncharging */
void mem_cgroup_uncharge_start(void);
void mem_cgroup_uncharge_end(void);
void mem_cgroup_migrate(struct page *oldpage, struct page *newpage, void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
bool lrucare); bool lrucare);
...@@ -233,11 +229,7 @@ static inline void mem_cgroup_uncharge(struct page *page) ...@@ -233,11 +229,7 @@ static inline void mem_cgroup_uncharge(struct page *page)
{ {
} }
static inline void mem_cgroup_uncharge_start(void) static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
{
}
static inline void mem_cgroup_uncharge_end(void)
{ {
} }
......
...@@ -1628,12 +1628,6 @@ struct task_struct { ...@@ -1628,12 +1628,6 @@ struct task_struct {
unsigned long trace_recursion; unsigned long trace_recursion;
#endif /* CONFIG_TRACING */ #endif /* CONFIG_TRACING */
#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */ #ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
struct memcg_batch_info {
int do_batch; /* incremented when batch uncharge started */
struct mem_cgroup *memcg; /* target memcg of uncharge */
unsigned long nr_pages; /* uncharged usage */
unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
} memcg_batch;
unsigned int memcg_kmem_skip_account; unsigned int memcg_kmem_skip_account;
struct memcg_oom_info { struct memcg_oom_info {
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
......
...@@ -1346,10 +1346,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, ...@@ -1346,10 +1346,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
#ifdef CONFIG_DEBUG_MUTEXES #ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */ p->blocked_on = NULL; /* not blocked yet */
#endif #endif
#ifdef CONFIG_MEMCG
p->memcg_batch.do_batch = 0;
p->memcg_batch.memcg = NULL;
#endif
#ifdef CONFIG_BCACHE #ifdef CONFIG_BCACHE
p->sequential_io = 0; p->sequential_io = 0;
p->sequential_io_avg = 0; p->sequential_io_avg = 0;
......
...@@ -3581,53 +3581,6 @@ static int mem_cgroup_move_parent(struct page *page, ...@@ -3581,53 +3581,6 @@ static int mem_cgroup_move_parent(struct page *page,
return ret; return ret;
} }
/*
* Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
* In that cases, pages are freed continuously and we can expect pages
* are in the same memcg. All these calls itself limits the number of
* pages freed at once, then uncharge_start/end() is called properly.
* This may be called prural(2) times in a context,
*/
void mem_cgroup_uncharge_start(void)
{
unsigned long flags;
local_irq_save(flags);
current->memcg_batch.do_batch++;
/* We can do nest. */
if (current->memcg_batch.do_batch == 1) {
current->memcg_batch.memcg = NULL;
current->memcg_batch.nr_pages = 0;
current->memcg_batch.memsw_nr_pages = 0;
}
local_irq_restore(flags);
}
void mem_cgroup_uncharge_end(void)
{
struct memcg_batch_info *batch = &current->memcg_batch;
unsigned long flags;
local_irq_save(flags);
VM_BUG_ON(!batch->do_batch);
if (--batch->do_batch) /* If stacked, do nothing */
goto out;
/*
* This "batch->memcg" is valid without any css_get/put etc...
* bacause we hide charges behind us.
*/
if (batch->nr_pages)
res_counter_uncharge(&batch->memcg->res,
batch->nr_pages * PAGE_SIZE);
if (batch->memsw_nr_pages)
res_counter_uncharge(&batch->memcg->memsw,
batch->memsw_nr_pages * PAGE_SIZE);
memcg_oom_recover(batch->memcg);
out:
local_irq_restore(flags);
}
#ifdef CONFIG_MEMCG_SWAP #ifdef CONFIG_MEMCG_SWAP
static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
bool charge) bool charge)
...@@ -6554,6 +6507,98 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg) ...@@ -6554,6 +6507,98 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
cancel_charge(memcg, nr_pages); cancel_charge(memcg, nr_pages);
} }
static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
unsigned long nr_mem, unsigned long nr_memsw,
unsigned long nr_anon, unsigned long nr_file,
unsigned long nr_huge, struct page *dummy_page)
{
unsigned long flags;
if (nr_mem)
res_counter_uncharge(&memcg->res, nr_mem * PAGE_SIZE);
if (nr_memsw)
res_counter_uncharge(&memcg->memsw, nr_memsw * PAGE_SIZE);
memcg_oom_recover(memcg);
local_irq_save(flags);
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
__this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
__this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
memcg_check_events(memcg, dummy_page);
local_irq_restore(flags);
}
static void uncharge_list(struct list_head *page_list)
{
struct mem_cgroup *memcg = NULL;
unsigned long nr_memsw = 0;
unsigned long nr_anon = 0;
unsigned long nr_file = 0;
unsigned long nr_huge = 0;
unsigned long pgpgout = 0;
unsigned long nr_mem = 0;
struct list_head *next;
struct page *page;
next = page_list->next;
do {
unsigned int nr_pages = 1;
struct page_cgroup *pc;
page = list_entry(next, struct page, lru);
next = page->lru.next;
VM_BUG_ON_PAGE(PageLRU(page), page);
VM_BUG_ON_PAGE(page_count(page), page);
pc = lookup_page_cgroup(page);
if (!PageCgroupUsed(pc))
continue;
/*
* Nobody should be changing or seriously looking at
* pc->mem_cgroup and pc->flags at this point, we have
* fully exclusive access to the page.
*/
if (memcg != pc->mem_cgroup) {
if (memcg) {
uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
nr_anon, nr_file, nr_huge, page);
pgpgout = nr_mem = nr_memsw = 0;
nr_anon = nr_file = nr_huge = 0;
}
memcg = pc->mem_cgroup;
}
if (PageTransHuge(page)) {
nr_pages <<= compound_order(page);
VM_BUG_ON_PAGE(!PageTransHuge(page), page);
nr_huge += nr_pages;
}
if (PageAnon(page))
nr_anon += nr_pages;
else
nr_file += nr_pages;
if (pc->flags & PCG_MEM)
nr_mem += nr_pages;
if (pc->flags & PCG_MEMSW)
nr_memsw += nr_pages;
pc->flags = 0;
pgpgout++;
} while (next != page_list);
if (memcg)
uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
nr_anon, nr_file, nr_huge, page);
}
/** /**
* mem_cgroup_uncharge - uncharge a page * mem_cgroup_uncharge - uncharge a page
* @page: page to uncharge * @page: page to uncharge
...@@ -6563,67 +6608,34 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg) ...@@ -6563,67 +6608,34 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
*/ */
void mem_cgroup_uncharge(struct page *page) void mem_cgroup_uncharge(struct page *page)
{ {
struct memcg_batch_info *batch;
unsigned int nr_pages = 1;
struct mem_cgroup *memcg;
struct page_cgroup *pc; struct page_cgroup *pc;
unsigned long pc_flags;
unsigned long flags;
VM_BUG_ON_PAGE(PageLRU(page), page);
VM_BUG_ON_PAGE(page_count(page), page);
if (mem_cgroup_disabled()) if (mem_cgroup_disabled())
return; return;
/* Don't touch page->lru of any random page, pre-check: */
pc = lookup_page_cgroup(page); pc = lookup_page_cgroup(page);
/* Every final put_page() ends up here */
if (!PageCgroupUsed(pc)) if (!PageCgroupUsed(pc))
return; return;
if (PageTransHuge(page)) { INIT_LIST_HEAD(&page->lru);
nr_pages <<= compound_order(page); uncharge_list(&page->lru);
VM_BUG_ON_PAGE(!PageTransHuge(page), page); }
}
/*
* Nobody should be changing or seriously looking at
* pc->mem_cgroup and pc->flags at this point, we have fully
* exclusive access to the page.
*/
memcg = pc->mem_cgroup;
pc_flags = pc->flags;
pc->flags = 0;
local_irq_save(flags);
if (nr_pages > 1) /**
goto direct; * mem_cgroup_uncharge_list - uncharge a list of page
if (unlikely(test_thread_flag(TIF_MEMDIE))) * @page_list: list of pages to uncharge
goto direct; *
batch = &current->memcg_batch; * Uncharge a list of pages previously charged with
if (!batch->do_batch) * mem_cgroup_try_charge() and mem_cgroup_commit_charge().
goto direct; */
if (batch->memcg && batch->memcg != memcg) void mem_cgroup_uncharge_list(struct list_head *page_list)
goto direct; {
if (!batch->memcg) if (mem_cgroup_disabled())
batch->memcg = memcg; return;
if (pc_flags & PCG_MEM)
batch->nr_pages++;
if (pc_flags & PCG_MEMSW)
batch->memsw_nr_pages++;
goto out;
direct:
if (pc_flags & PCG_MEM)
res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
if (pc_flags & PCG_MEMSW)
res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
memcg_oom_recover(memcg);
out:
mem_cgroup_charge_statistics(memcg, page, -nr_pages);
memcg_check_events(memcg, page);
local_irq_restore(flags); if (!list_empty(page_list))
uncharge_list(page_list);
} }
/** /**
......
...@@ -908,8 +908,6 @@ void release_pages(struct page **pages, int nr, bool cold) ...@@ -908,8 +908,6 @@ void release_pages(struct page **pages, int nr, bool cold)
struct lruvec *lruvec; struct lruvec *lruvec;
unsigned long uninitialized_var(flags); unsigned long uninitialized_var(flags);
mem_cgroup_uncharge_start();
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
struct page *page = pages[i]; struct page *page = pages[i];
...@@ -941,7 +939,6 @@ void release_pages(struct page **pages, int nr, bool cold) ...@@ -941,7 +939,6 @@ void release_pages(struct page **pages, int nr, bool cold)
__ClearPageLRU(page); __ClearPageLRU(page);
del_page_from_lru_list(page, lruvec, page_off_lru(page)); del_page_from_lru_list(page, lruvec, page_off_lru(page));
} }
mem_cgroup_uncharge(page);
/* Clear Active bit in case of parallel mark_page_accessed */ /* Clear Active bit in case of parallel mark_page_accessed */
__ClearPageActive(page); __ClearPageActive(page);
...@@ -951,8 +948,7 @@ void release_pages(struct page **pages, int nr, bool cold) ...@@ -951,8 +948,7 @@ void release_pages(struct page **pages, int nr, bool cold)
if (zone) if (zone)
spin_unlock_irqrestore(&zone->lru_lock, flags); spin_unlock_irqrestore(&zone->lru_lock, flags);
mem_cgroup_uncharge_end(); mem_cgroup_uncharge_list(&pages_to_free);
free_hot_cold_page_list(&pages_to_free, cold); free_hot_cold_page_list(&pages_to_free, cold);
} }
EXPORT_SYMBOL(release_pages); EXPORT_SYMBOL(release_pages);
......
...@@ -822,7 +822,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, ...@@ -822,7 +822,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
cond_resched(); cond_resched();
mem_cgroup_uncharge_start();
while (!list_empty(page_list)) { while (!list_empty(page_list)) {
struct address_space *mapping; struct address_space *mapping;
struct page *page; struct page *page;
...@@ -1103,7 +1102,6 @@ static unsigned long shrink_page_list(struct list_head *page_list, ...@@ -1103,7 +1102,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
*/ */
__clear_page_locked(page); __clear_page_locked(page);
free_it: free_it:
mem_cgroup_uncharge(page);
nr_reclaimed++; nr_reclaimed++;
/* /*
...@@ -1133,8 +1131,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, ...@@ -1133,8 +1131,8 @@ static unsigned long shrink_page_list(struct list_head *page_list,
list_add(&page->lru, &ret_pages); list_add(&page->lru, &ret_pages);
VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page); VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
} }
mem_cgroup_uncharge_end();
mem_cgroup_uncharge_list(&free_pages);
free_hot_cold_page_list(&free_pages, true); free_hot_cold_page_list(&free_pages, true);
list_splice(&ret_pages, page_list); list_splice(&ret_pages, page_list);
...@@ -1437,10 +1435,9 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) ...@@ -1437,10 +1435,9 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
__ClearPageActive(page); __ClearPageActive(page);
del_page_from_lru_list(page, lruvec, lru); del_page_from_lru_list(page, lruvec, lru);
mem_cgroup_uncharge(page);
if (unlikely(PageCompound(page))) { if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock); spin_unlock_irq(&zone->lru_lock);
mem_cgroup_uncharge(page);
(*get_compound_page_dtor(page))(page); (*get_compound_page_dtor(page))(page);
spin_lock_irq(&zone->lru_lock); spin_lock_irq(&zone->lru_lock);
} else } else
...@@ -1548,6 +1545,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, ...@@ -1548,6 +1545,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
spin_unlock_irq(&zone->lru_lock); spin_unlock_irq(&zone->lru_lock);
mem_cgroup_uncharge_list(&page_list);
free_hot_cold_page_list(&page_list, true); free_hot_cold_page_list(&page_list, true);
/* /*
...@@ -1660,10 +1658,9 @@ static void move_active_pages_to_lru(struct lruvec *lruvec, ...@@ -1660,10 +1658,9 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
__ClearPageActive(page); __ClearPageActive(page);
del_page_from_lru_list(page, lruvec, lru); del_page_from_lru_list(page, lruvec, lru);
mem_cgroup_uncharge(page);
if (unlikely(PageCompound(page))) { if (unlikely(PageCompound(page))) {
spin_unlock_irq(&zone->lru_lock); spin_unlock_irq(&zone->lru_lock);
mem_cgroup_uncharge(page);
(*get_compound_page_dtor(page))(page); (*get_compound_page_dtor(page))(page);
spin_lock_irq(&zone->lru_lock); spin_lock_irq(&zone->lru_lock);
} else } else
...@@ -1771,6 +1768,7 @@ static void shrink_active_list(unsigned long nr_to_scan, ...@@ -1771,6 +1768,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&zone->lru_lock); spin_unlock_irq(&zone->lru_lock);
mem_cgroup_uncharge_list(&l_hold);
free_hot_cold_page_list(&l_hold, true); free_hot_cold_page_list(&l_hold, true);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment