Commit 0d665e7b authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds

mm, page_vma_mapped: Drop faulty pointer arithmetics in check_pte()

Tetsuo reported random crashes under memory pressure on 32-bit x86
system and tracked down to change that introduced
page_vma_mapped_walk().

The root cause of the issue is the faulty pointer math in check_pte().
As ->pte may point to an arbitrary page we have to check that they are
belong to the section before doing math. Otherwise it may lead to weird
results.

It wasn't noticed until now as mem_map[] is virtually contiguous on
flatmem or vmemmap sparsemem. Pointer arithmetic just works against all
'struct page' pointers. But with classic sparsemem, it doesn't because
each section memap is allocated separately and so consecutive pfns
crossing two sections might have struct pages at completely unrelated
addresses.

Let's restructure code a bit and replace pointer arithmetic with
operations on pfns.
Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reported-and-tested-by: default avatarTetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Fixes: ace71a19 ("mm: introduce page_vma_mapped_walk()")
Cc: stable@vger.kernel.org
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 0c5b9b5d
...@@ -124,6 +124,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry) ...@@ -124,6 +124,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry)
return unlikely(swp_type(entry) == SWP_DEVICE_WRITE); return unlikely(swp_type(entry) == SWP_DEVICE_WRITE);
} }
static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry)
{
return swp_offset(entry);
}
static inline struct page *device_private_entry_to_page(swp_entry_t entry) static inline struct page *device_private_entry_to_page(swp_entry_t entry)
{ {
return pfn_to_page(swp_offset(entry)); return pfn_to_page(swp_offset(entry));
...@@ -154,6 +159,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry) ...@@ -154,6 +159,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry)
return false; return false;
} }
static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry)
{
return 0;
}
static inline struct page *device_private_entry_to_page(swp_entry_t entry) static inline struct page *device_private_entry_to_page(swp_entry_t entry)
{ {
return NULL; return NULL;
...@@ -189,6 +199,11 @@ static inline int is_write_migration_entry(swp_entry_t entry) ...@@ -189,6 +199,11 @@ static inline int is_write_migration_entry(swp_entry_t entry)
return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE); return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE);
} }
static inline unsigned long migration_entry_to_pfn(swp_entry_t entry)
{
return swp_offset(entry);
}
static inline struct page *migration_entry_to_page(swp_entry_t entry) static inline struct page *migration_entry_to_page(swp_entry_t entry)
{ {
struct page *p = pfn_to_page(swp_offset(entry)); struct page *p = pfn_to_page(swp_offset(entry));
...@@ -218,6 +233,12 @@ static inline int is_migration_entry(swp_entry_t swp) ...@@ -218,6 +233,12 @@ static inline int is_migration_entry(swp_entry_t swp)
{ {
return 0; return 0;
} }
static inline unsigned long migration_entry_to_pfn(swp_entry_t entry)
{
return 0;
}
static inline struct page *migration_entry_to_page(swp_entry_t entry) static inline struct page *migration_entry_to_page(swp_entry_t entry)
{ {
return NULL; return NULL;
......
...@@ -30,10 +30,29 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw) ...@@ -30,10 +30,29 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw)
return true; return true;
} }
/**
* check_pte - check if @pvmw->page is mapped at the @pvmw->pte
*
* page_vma_mapped_walk() found a place where @pvmw->page is *potentially*
* mapped. check_pte() has to validate this.
*
* @pvmw->pte may point to empty PTE, swap PTE or PTE pointing to arbitrary
* page.
*
* If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration
* entry that points to @pvmw->page or any subpage in case of THP.
*
* If PVMW_MIGRATION flag is not set, returns true if @pvmw->pte points to
* @pvmw->page or any subpage in case of THP.
*
* Otherwise, return false.
*
*/
static bool check_pte(struct page_vma_mapped_walk *pvmw) static bool check_pte(struct page_vma_mapped_walk *pvmw)
{ {
unsigned long pfn;
if (pvmw->flags & PVMW_MIGRATION) { if (pvmw->flags & PVMW_MIGRATION) {
#ifdef CONFIG_MIGRATION
swp_entry_t entry; swp_entry_t entry;
if (!is_swap_pte(*pvmw->pte)) if (!is_swap_pte(*pvmw->pte))
return false; return false;
...@@ -41,36 +60,30 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw) ...@@ -41,36 +60,30 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
if (!is_migration_entry(entry)) if (!is_migration_entry(entry))
return false; return false;
if (migration_entry_to_page(entry) - pvmw->page >=
hpage_nr_pages(pvmw->page)) { pfn = migration_entry_to_pfn(entry);
return false; } else if (is_swap_pte(*pvmw->pte)) {
}
if (migration_entry_to_page(entry) < pvmw->page)
return false;
#else
WARN_ON_ONCE(1);
#endif
} else {
if (is_swap_pte(*pvmw->pte)) {
swp_entry_t entry; swp_entry_t entry;
/* Handle un-addressable ZONE_DEVICE memory */
entry = pte_to_swp_entry(*pvmw->pte); entry = pte_to_swp_entry(*pvmw->pte);
if (is_device_private_entry(entry) && if (!is_device_private_entry(entry))
device_private_entry_to_page(entry) == pvmw->page) return false;
return true;
}
pfn = device_private_entry_to_pfn(entry);
} else {
if (!pte_present(*pvmw->pte)) if (!pte_present(*pvmw->pte))
return false; return false;
/* THP can be referenced by any subpage */ pfn = pte_pfn(*pvmw->pte);
if (pte_page(*pvmw->pte) - pvmw->page >=
hpage_nr_pages(pvmw->page)) {
return false;
} }
if (pte_page(*pvmw->pte) < pvmw->page)
if (pfn < page_to_pfn(pvmw->page))
return false;
/* THP can be referenced by any subpage */
if (pfn - page_to_pfn(pvmw->page) >= hpage_nr_pages(pvmw->page))
return false; return false;
}
return true; return true;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment