Commit ca707239 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

mm: update_lru_size warn and reset bad lru_size

Though debug kernels have a VM_BUG_ON to help protect from misaccounting
lru_size, non-debug kernels are liable to wrap it around: and then the
vast unsigned long size draws page reclaim into a loop of repeatedly
doing nothing on an empty list, without even a cond_resched().

That soft lockup looks confusingly like an over-busy reclaim scenario,
with lots of contention on the lru_lock in shrink_inactive_list(): yet
has a totally different origin.

Help differentiate with a custom warning in
mem_cgroup_update_lru_size(), even in non-debug kernels; and reset the
size to avoid the lockup.  But the particular bug which suggested this
change was mine alone, and since fixed.

Make it a WARN_ONCE: the first occurrence is the most informative, a
flurry may follow, yet even when rate-limited little more is learnt.
Signed-off-by: default avatarHugh Dickins <hughd@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andres Lagar-Cavilla <andreslc@google.com>
Cc: Yang Shi <yang.shi@linaro.org>
Cc: Ning Qu <quning@gmail.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Andres Lagar-Cavilla <andreslc@google.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 1269019e
...@@ -35,8 +35,8 @@ static __always_inline void del_page_from_lru_list(struct page *page, ...@@ -35,8 +35,8 @@ static __always_inline void del_page_from_lru_list(struct page *page,
struct lruvec *lruvec, enum lru_list lru) struct lruvec *lruvec, enum lru_list lru)
{ {
int nr_pages = hpage_nr_pages(page); int nr_pages = hpage_nr_pages(page);
mem_cgroup_update_lru_size(lruvec, lru, -nr_pages);
list_del(&page->lru); list_del(&page->lru);
mem_cgroup_update_lru_size(lruvec, lru, -nr_pages);
__mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, -nr_pages); __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, -nr_pages);
} }
......
...@@ -1023,22 +1023,38 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone) ...@@ -1023,22 +1023,38 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
* @lru: index of lru list the page is sitting on * @lru: index of lru list the page is sitting on
* @nr_pages: positive when adding or negative when removing * @nr_pages: positive when adding or negative when removing
* *
* This function must be called when a page is added to or removed from an * This function must be called under lru_lock, just before a page is added
* lru list. * to or just after a page is removed from an lru list (that ordering being
* so as to allow it to check that lru_size 0 is consistent with list_empty).
*/ */
void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
int nr_pages) int nr_pages)
{ {
struct mem_cgroup_per_zone *mz; struct mem_cgroup_per_zone *mz;
unsigned long *lru_size; unsigned long *lru_size;
long size;
bool empty;
if (mem_cgroup_disabled()) if (mem_cgroup_disabled())
return; return;
mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec);
lru_size = mz->lru_size + lru; lru_size = mz->lru_size + lru;
empty = list_empty(lruvec->lists + lru);
if (nr_pages < 0)
*lru_size += nr_pages;
size = *lru_size;
if (WARN_ONCE(size < 0 || empty != !size,
"%s(%p, %d, %d): lru_size %ld but %sempty\n",
__func__, lruvec, lru, nr_pages, size, empty ? "" : "not ")) {
VM_BUG_ON(1);
*lru_size = 0;
}
if (nr_pages > 0)
*lru_size += nr_pages; *lru_size += nr_pages;
VM_BUG_ON((long)(*lru_size) < 0);
} }
bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg) bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment