Commit dd0f230a authored by Yang Shi's avatar Yang Shi Committed by Linus Torvalds

mm: hwpoison: refactor refcount check handling

Memory failure will report failure if the page still has extra pinned
refcount other than from hwpoison after the handler is done.  Actually
the check is not necessary for all handlers, so move the check into
specific handlers.  This would make the following keeping shmem page in
page cache patch easier.

There may be expected extra pin for some cases, for example, when the
page is dirty and in swapcache.

Link: https://lkml.kernel.org/r/20211020210755.23964-5-shy828301@gmail.comSigned-off-by: default avatarYang Shi <shy828301@gmail.com>
Signed-off-by: default avatarNaoya Horiguchi <naoya.horiguchi@nec.com>
Suggested-by: default avatarNaoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent e0f43fa5
...@@ -807,12 +807,44 @@ static int truncate_error_page(struct page *p, unsigned long pfn, ...@@ -807,12 +807,44 @@ static int truncate_error_page(struct page *p, unsigned long pfn,
return ret; return ret;
} }
struct page_state {
unsigned long mask;
unsigned long res;
enum mf_action_page_type type;
/* Callback ->action() has to unlock the relevant page inside it. */
int (*action)(struct page_state *ps, struct page *p);
};
/*
* Return true if page is still referenced by others, otherwise return
* false.
*
* The extra_pins is true when one extra refcount is expected.
*/
static bool has_extra_refcount(struct page_state *ps, struct page *p,
bool extra_pins)
{
int count = page_count(p) - 1;
if (extra_pins)
count -= 1;
if (count > 0) {
pr_err("Memory failure: %#lx: %s still referenced by %d users\n",
page_to_pfn(p), action_page_types[ps->type], count);
return true;
}
return false;
}
/* /*
* Error hit kernel page. * Error hit kernel page.
* Do nothing, try to be lucky and not touch this instead. For a few cases we * Do nothing, try to be lucky and not touch this instead. For a few cases we
* could be more sophisticated. * could be more sophisticated.
*/ */
static int me_kernel(struct page *p, unsigned long pfn) static int me_kernel(struct page_state *ps, struct page *p)
{ {
unlock_page(p); unlock_page(p);
return MF_IGNORED; return MF_IGNORED;
...@@ -821,9 +853,9 @@ static int me_kernel(struct page *p, unsigned long pfn) ...@@ -821,9 +853,9 @@ static int me_kernel(struct page *p, unsigned long pfn)
/* /*
* Page in unknown state. Do nothing. * Page in unknown state. Do nothing.
*/ */
static int me_unknown(struct page *p, unsigned long pfn) static int me_unknown(struct page_state *ps, struct page *p)
{ {
pr_err("Memory failure: %#lx: Unknown page state\n", pfn); pr_err("Memory failure: %#lx: Unknown page state\n", page_to_pfn(p));
unlock_page(p); unlock_page(p);
return MF_FAILED; return MF_FAILED;
} }
...@@ -831,7 +863,7 @@ static int me_unknown(struct page *p, unsigned long pfn) ...@@ -831,7 +863,7 @@ static int me_unknown(struct page *p, unsigned long pfn)
/* /*
* Clean (or cleaned) page cache page. * Clean (or cleaned) page cache page.
*/ */
static int me_pagecache_clean(struct page *p, unsigned long pfn) static int me_pagecache_clean(struct page_state *ps, struct page *p)
{ {
int ret; int ret;
struct address_space *mapping; struct address_space *mapping;
...@@ -868,9 +900,13 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) ...@@ -868,9 +900,13 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
* *
* Open: to take i_rwsem or not for this? Right now we don't. * Open: to take i_rwsem or not for this? Right now we don't.
*/ */
ret = truncate_error_page(p, pfn, mapping); ret = truncate_error_page(p, page_to_pfn(p), mapping);
out: out:
unlock_page(p); unlock_page(p);
if (has_extra_refcount(ps, p, false))
ret = MF_FAILED;
return ret; return ret;
} }
...@@ -879,7 +915,7 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) ...@@ -879,7 +915,7 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
* Issues: when the error hit a hole page the error is not properly * Issues: when the error hit a hole page the error is not properly
* propagated. * propagated.
*/ */
static int me_pagecache_dirty(struct page *p, unsigned long pfn) static int me_pagecache_dirty(struct page_state *ps, struct page *p)
{ {
struct address_space *mapping = page_mapping(p); struct address_space *mapping = page_mapping(p);
...@@ -923,7 +959,7 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) ...@@ -923,7 +959,7 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)
mapping_set_error(mapping, -EIO); mapping_set_error(mapping, -EIO);
} }
return me_pagecache_clean(p, pfn); return me_pagecache_clean(ps, p);
} }
/* /*
...@@ -945,9 +981,10 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) ...@@ -945,9 +981,10 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)
* Clean swap cache pages can be directly isolated. A later page fault will * Clean swap cache pages can be directly isolated. A later page fault will
* bring in the known good data from disk. * bring in the known good data from disk.
*/ */
static int me_swapcache_dirty(struct page *p, unsigned long pfn) static int me_swapcache_dirty(struct page_state *ps, struct page *p)
{ {
int ret; int ret;
bool extra_pins = false;
ClearPageDirty(p); ClearPageDirty(p);
/* Trigger EIO in shmem: */ /* Trigger EIO in shmem: */
...@@ -955,10 +992,17 @@ static int me_swapcache_dirty(struct page *p, unsigned long pfn) ...@@ -955,10 +992,17 @@ static int me_swapcache_dirty(struct page *p, unsigned long pfn)
ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED; ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED;
unlock_page(p); unlock_page(p);
if (ret == MF_DELAYED)
extra_pins = true;
if (has_extra_refcount(ps, p, extra_pins))
ret = MF_FAILED;
return ret; return ret;
} }
static int me_swapcache_clean(struct page *p, unsigned long pfn) static int me_swapcache_clean(struct page_state *ps, struct page *p)
{ {
int ret; int ret;
...@@ -966,6 +1010,10 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn) ...@@ -966,6 +1010,10 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED; ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED;
unlock_page(p); unlock_page(p);
if (has_extra_refcount(ps, p, false))
ret = MF_FAILED;
return ret; return ret;
} }
...@@ -975,7 +1023,7 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn) ...@@ -975,7 +1023,7 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)
* - Error on hugepage is contained in hugepage unit (not in raw page unit.) * - Error on hugepage is contained in hugepage unit (not in raw page unit.)
* To narrow down kill region to one page, we need to break up pmd. * To narrow down kill region to one page, we need to break up pmd.
*/ */
static int me_huge_page(struct page *p, unsigned long pfn) static int me_huge_page(struct page_state *ps, struct page *p)
{ {
int res; int res;
struct page *hpage = compound_head(p); struct page *hpage = compound_head(p);
...@@ -986,7 +1034,7 @@ static int me_huge_page(struct page *p, unsigned long pfn) ...@@ -986,7 +1034,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)
mapping = page_mapping(hpage); mapping = page_mapping(hpage);
if (mapping) { if (mapping) {
res = truncate_error_page(hpage, pfn, mapping); res = truncate_error_page(hpage, page_to_pfn(p), mapping);
unlock_page(hpage); unlock_page(hpage);
} else { } else {
res = MF_FAILED; res = MF_FAILED;
...@@ -1004,6 +1052,9 @@ static int me_huge_page(struct page *p, unsigned long pfn) ...@@ -1004,6 +1052,9 @@ static int me_huge_page(struct page *p, unsigned long pfn)
} }
} }
if (has_extra_refcount(ps, p, false))
res = MF_FAILED;
return res; return res;
} }
...@@ -1029,14 +1080,7 @@ static int me_huge_page(struct page *p, unsigned long pfn) ...@@ -1029,14 +1080,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)
#define slab (1UL << PG_slab) #define slab (1UL << PG_slab)
#define reserved (1UL << PG_reserved) #define reserved (1UL << PG_reserved)
static struct page_state { static struct page_state error_states[] = {
unsigned long mask;
unsigned long res;
enum mf_action_page_type type;
/* Callback ->action() has to unlock the relevant page inside it. */
int (*action)(struct page *p, unsigned long pfn);
} error_states[] = {
{ reserved, reserved, MF_MSG_KERNEL, me_kernel }, { reserved, reserved, MF_MSG_KERNEL, me_kernel },
/* /*
* free pages are specially detected outside this table: * free pages are specially detected outside this table:
...@@ -1096,19 +1140,10 @@ static int page_action(struct page_state *ps, struct page *p, ...@@ -1096,19 +1140,10 @@ static int page_action(struct page_state *ps, struct page *p,
unsigned long pfn) unsigned long pfn)
{ {
int result; int result;
int count;
/* page p should be unlocked after returning from ps->action(). */ /* page p should be unlocked after returning from ps->action(). */
result = ps->action(p, pfn); result = ps->action(ps, p);
count = page_count(p) - 1;
if (ps->action == me_swapcache_dirty && result == MF_DELAYED)
count--;
if (count > 0) {
pr_err("Memory failure: %#lx: %s still referenced by %d users\n",
pfn, action_page_types[ps->type], count);
result = MF_FAILED;
}
action_result(pfn, ps->type, result); action_result(pfn, ps->type, result);
/* Could do more checks here if page looks ok */ /* Could do more checks here if page looks ok */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment