Commit 4f775086 authored by Longlong Xia's avatar Longlong Xia Committed by Andrew Morton

mm: memory-failure: refactor add_to_kill()

Patch series "mm: ksm: support hwpoison for ksm page", v2.

Currently, ksm does not support hwpoison.  As ksm is being used more
widely for deduplication at the system level, container level, and process
level, supporting hwpoison for ksm has become increasingly important. 
However, ksm pages were not processed by hwpoison in 2009 [1].

The main method of implementation:

1. Refactor add_to_kill() and add new add_to_kill_*() to better
   accommodate the handling of different types of pages.

2.  Add collect_procs_ksm() to collect processes when the error hit an
   ksm page.

3. Add task_in_to_kill_list() to avoid duplicate addition of tsk to
   the to_kill list.  

4. Try_to_unmap ksm page (already supported).

5. Handle related processes such as sending SIGBUS.

Tested with poisoning to ksm page from
1) different process
2) one process

and with/without memory_failure_early_kill set, the processes are killed
as expected with the patchset.  

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/
commit/?h=01e00f88


This patch (of 2):

The page_address_in_vma() is used to find the user virtual address of page
in add_to_kill(), but it doesn't support ksm due to the ksm page->index
unusable, add an ksm_addr as parameter to add_to_kill(), let's the caller
to pass it, also rename the function to __add_to_kill(), and adding
add_to_kill_anon_file() for handling anonymous pages and file pages,
adding add_to_kill_fsdax() for handling fsdax pages.

Link: https://lkml.kernel.org/r/20230414021741.2597273-1-xialonglong1@huawei.com
Link: https://lkml.kernel.org/r/20230414021741.2597273-2-xialonglong1@huawei.comSigned-off-by: default avatarLonglong Xia <xialonglong1@huawei.com>
Tested-by: default avatarNaoya Horiguchi <naoya.horiguchi@nec.com>
Reviewed-by: default avatarKefeng Wang <wangkefeng.wang@huawei.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Nanyong Sun <sunnanyong@huawei.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent 3cc0c373
...@@ -405,9 +405,9 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma, ...@@ -405,9 +405,9 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
* page->mapping are sufficient for mapping the page back to its * page->mapping are sufficient for mapping the page back to its
* corresponding user virtual address. * corresponding user virtual address.
*/ */
static void add_to_kill(struct task_struct *tsk, struct page *p, static void __add_to_kill(struct task_struct *tsk, struct page *p,
pgoff_t fsdax_pgoff, struct vm_area_struct *vma, struct vm_area_struct *vma, struct list_head *to_kill,
struct list_head *to_kill) unsigned long ksm_addr, pgoff_t fsdax_pgoff)
{ {
struct to_kill *tk; struct to_kill *tk;
...@@ -417,7 +417,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, ...@@ -417,7 +417,7 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
return; return;
} }
tk->addr = page_address_in_vma(p, vma); tk->addr = ksm_addr ? ksm_addr : page_address_in_vma(p, vma);
if (is_zone_device_page(p)) { if (is_zone_device_page(p)) {
if (fsdax_pgoff != FSDAX_INVALID_PGOFF) if (fsdax_pgoff != FSDAX_INVALID_PGOFF)
tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma); tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
...@@ -448,6 +448,13 @@ static void add_to_kill(struct task_struct *tsk, struct page *p, ...@@ -448,6 +448,13 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
list_add_tail(&tk->nd, to_kill); list_add_tail(&tk->nd, to_kill);
} }
static void add_to_kill_anon_file(struct task_struct *tsk, struct page *p,
struct vm_area_struct *vma,
struct list_head *to_kill)
{
__add_to_kill(tsk, p, vma, to_kill, 0, FSDAX_INVALID_PGOFF);
}
/* /*
* Kill the processes that have been collected earlier. * Kill the processes that have been collected earlier.
* *
...@@ -573,7 +580,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, ...@@ -573,7 +580,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
continue; continue;
if (!page_mapped_in_vma(page, vma)) if (!page_mapped_in_vma(page, vma))
continue; continue;
add_to_kill(t, page, FSDAX_INVALID_PGOFF, vma, to_kill); add_to_kill_anon_file(t, page, vma, to_kill);
} }
} }
read_unlock(&tasklist_lock); read_unlock(&tasklist_lock);
...@@ -609,8 +616,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, ...@@ -609,8 +616,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
* to be informed of all such data corruptions. * to be informed of all such data corruptions.
*/ */
if (vma->vm_mm == t->mm) if (vma->vm_mm == t->mm)
add_to_kill(t, page, FSDAX_INVALID_PGOFF, vma, add_to_kill_anon_file(t, page, vma, to_kill);
to_kill);
} }
} }
read_unlock(&tasklist_lock); read_unlock(&tasklist_lock);
...@@ -618,6 +624,13 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill, ...@@ -618,6 +624,13 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
} }
#ifdef CONFIG_FS_DAX #ifdef CONFIG_FS_DAX
static void add_to_kill_fsdax(struct task_struct *tsk, struct page *p,
struct vm_area_struct *vma,
struct list_head *to_kill, pgoff_t pgoff)
{
__add_to_kill(tsk, p, vma, to_kill, 0, pgoff);
}
/* /*
* Collect processes when the error hit a fsdax page. * Collect processes when the error hit a fsdax page.
*/ */
...@@ -637,7 +650,7 @@ static void collect_procs_fsdax(struct page *page, ...@@ -637,7 +650,7 @@ static void collect_procs_fsdax(struct page *page,
continue; continue;
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
if (vma->vm_mm == t->mm) if (vma->vm_mm == t->mm)
add_to_kill(t, page, pgoff, vma, to_kill); add_to_kill_fsdax(t, page, vma, to_kill, pgoff);
} }
} }
read_unlock(&tasklist_lock); read_unlock(&tasklist_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment