Commit da7cbd0c authored by Naoya Horiguchi's avatar Naoya Horiguchi Committed by Sasha Levin

mm/memory-failure: split thp earlier in memory error handling

[ Upstream commit 415c64c1 ]

memory_failure() doesn't handle thp itself at this time and need to split
it before doing isolation.  Currently thp is split in the middle of
hwpoison_user_mappings(), but there're corner cases where memory_failure()
wrongly tries to handle thp without splitting.

1) "non anonymous" thp, which is not a normal operating mode of thp,
   but a memory error could hit a thp before anon_vma is initialized.  In
   such case, split_huge_page() fails and me_huge_page() (intended for
   hugetlb) is called for thp, which triggers BUG_ON in page_hstate().

2) !PageLRU case, where hwpoison_user_mappings() returns with
   SWAP_SUCCESS and the result is the same as case 1.

memory_failure() can't avoid splitting, so let's split it more earlier,
which also reduces code which are prepared for both of normal page and
thp.
Signed-off-by: default avatarNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Tony Luck <tony.luck@intel.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: default avatarSasha Levin <alexander.levin@verizon.com>
parent aeb3435b
...@@ -927,7 +927,6 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, ...@@ -927,7 +927,6 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
int ret; int ret;
int kill = 1, forcekill; int kill = 1, forcekill;
struct page *hpage = *hpagep; struct page *hpage = *hpagep;
struct page *ppage;
/* /*
* Here we are interested only in user-mapped pages, so skip any * Here we are interested only in user-mapped pages, so skip any
...@@ -976,59 +975,6 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, ...@@ -976,59 +975,6 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
} }
} }
/*
* ppage: poisoned page
* if p is regular page(4k page)
* ppage == real poisoned page;
* else p is hugetlb or THP, ppage == head page.
*/
ppage = hpage;
if (PageTransHuge(hpage)) {
/*
* Verify that this isn't a hugetlbfs head page, the check for
* PageAnon is just for avoid tripping a split_huge_page
* internal debug check, as split_huge_page refuses to deal with
* anything that isn't an anon page. PageAnon can't go away fro
* under us because we hold a refcount on the hpage, without a
* refcount on the hpage. split_huge_page can't be safely called
* in the first place, having a refcount on the tail isn't
* enough * to be safe.
*/
if (!PageHuge(hpage) && PageAnon(hpage)) {
if (unlikely(split_huge_page(hpage))) {
/*
* FIXME: if splitting THP is failed, it is
* better to stop the following operation rather
* than causing panic by unmapping. System might
* survive if the page is freed later.
*/
printk(KERN_INFO
"MCE %#lx: failed to split THP\n", pfn);
BUG_ON(!PageHWPoison(p));
return SWAP_FAIL;
}
/*
* We pinned the head page for hwpoison handling,
* now we split the thp and we are interested in
* the hwpoisoned raw page, so move the refcount
* to it. Similarly, page lock is shifted.
*/
if (hpage != p) {
if (!(flags & MF_COUNT_INCREASED)) {
put_page(hpage);
get_page(p);
}
lock_page(p);
unlock_page(hpage);
*hpagep = p;
}
/* THP is split, so ppage should be the real poisoned page. */
ppage = p;
}
}
/* /*
* First collect all the processes that have the page * First collect all the processes that have the page
* mapped in dirty form. This has to be done before try_to_unmap, * mapped in dirty form. This has to be done before try_to_unmap,
...@@ -1038,12 +984,12 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, ...@@ -1038,12 +984,12 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
* there's nothing that can be done. * there's nothing that can be done.
*/ */
if (kill) if (kill)
collect_procs(ppage, &tokill, flags & MF_ACTION_REQUIRED); collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED);
ret = try_to_unmap(ppage, ttu); ret = try_to_unmap(hpage, ttu);
if (ret != SWAP_SUCCESS) if (ret != SWAP_SUCCESS)
printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n",
pfn, page_mapcount(ppage)); pfn, page_mapcount(hpage));
/* /*
* Now that the dirty bit has been propagated to the * Now that the dirty bit has been propagated to the
...@@ -1055,7 +1001,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, ...@@ -1055,7 +1001,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
* use a more force-full uncatchable kill to prevent * use a more force-full uncatchable kill to prevent
* any accesses to the poisoned memory. * any accesses to the poisoned memory.
*/ */
forcekill = PageDirty(ppage) || (flags & MF_MUST_KILL); forcekill = PageDirty(hpage) || (flags & MF_MUST_KILL);
kill_procs(&tokill, forcekill, trapno, kill_procs(&tokill, forcekill, trapno,
ret != SWAP_SUCCESS, p, pfn, flags); ret != SWAP_SUCCESS, p, pfn, flags);
...@@ -1101,6 +1047,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) ...@@ -1101,6 +1047,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
struct page_state *ps; struct page_state *ps;
struct page *p; struct page *p;
struct page *hpage; struct page *hpage;
struct page *orig_head;
int res; int res;
unsigned int nr_pages; unsigned int nr_pages;
unsigned long page_flags; unsigned long page_flags;
...@@ -1116,7 +1063,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) ...@@ -1116,7 +1063,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
} }
p = pfn_to_page(pfn); p = pfn_to_page(pfn);
hpage = compound_head(p); orig_head = hpage = compound_head(p);
if (TestSetPageHWPoison(p)) { if (TestSetPageHWPoison(p)) {
printk(KERN_ERR "MCE %#lx: already hardware poisoned\n", pfn); printk(KERN_ERR "MCE %#lx: already hardware poisoned\n", pfn);
return 0; return 0;
...@@ -1179,6 +1126,21 @@ int memory_failure(unsigned long pfn, int trapno, int flags) ...@@ -1179,6 +1126,21 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
} }
} }
if (!PageHuge(p) && PageTransHuge(hpage)) {
if (!PageAnon(hpage)) {
pr_err("MCE: %#lx: non anonymous thp\n", pfn);
put_page(p);
return -EBUSY;
}
if (unlikely(split_huge_page(hpage))) {
pr_err("MCE: %#lx: thp split failed\n", pfn);
put_page(p);
return -EBUSY;
}
VM_BUG_ON_PAGE(!page_count(p), p);
hpage = compound_head(p);
}
/* /*
* We ignore non-LRU pages for good reasons. * We ignore non-LRU pages for good reasons.
* - PG_locked is only well defined for LRU pages and a few others * - PG_locked is only well defined for LRU pages and a few others
...@@ -1188,9 +1150,9 @@ int memory_failure(unsigned long pfn, int trapno, int flags) ...@@ -1188,9 +1150,9 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
* walked by the page reclaim code, however that's not a big loss. * walked by the page reclaim code, however that's not a big loss.
*/ */
if (!PageHuge(p)) { if (!PageHuge(p)) {
if (!PageLRU(hpage)) if (!PageLRU(p))
shake_page(hpage, 0); shake_page(p, 0);
if (!PageLRU(hpage)) { if (!PageLRU(p)) {
/* /*
* shake_page could have turned it free. * shake_page could have turned it free.
*/ */
...@@ -1211,7 +1173,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags) ...@@ -1211,7 +1173,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
* The page could have changed compound pages during the locking. * The page could have changed compound pages during the locking.
* If this happens just bail out. * If this happens just bail out.
*/ */
if (compound_head(p) != hpage) { if (PageCompound(p) && compound_head(p) != orig_head) {
action_result(pfn, MSG_DIFFERENT_COMPOUND, IGNORED); action_result(pfn, MSG_DIFFERENT_COMPOUND, IGNORED);
res = -EBUSY; res = -EBUSY;
goto out; goto out;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment