Commit c08ed348 authored by Konstantin Khlebnikov's avatar Konstantin Khlebnikov Committed by Greg Kroah-Hartman

proc/pagemap: walk page tables under pte lock

commit 05fbf357 upstream.

Lockless access to pte in pagemap_pte_range() might race with page
migration and trigger BUG_ON(!PageLocked()) in migration_entry_to_page():

CPU A (pagemap)                           CPU B (migration)
                                          lock_page()
                                          try_to_unmap(page, TTU_MIGRATION...)
                                               make_migration_entry()
                                               set_pte_at()
<read *pte>
pte_to_pagemap_entry()
                                          remove_migration_ptes()
                                          unlock_page()
    if(is_migration_entry())
        migration_entry_to_page()
            BUG_ON(!PageLocked(page))

Also lockless read might be non-atomic if pte is larger than wordsize.
Other pte walkers (smaps, numa_maps, clear_refs) already lock ptes.

Fixes: 052fb0d6 ("proc: report file/anon bit in /proc/pid/pagemap")
Signed-off-by: default avatarKonstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reported-by: default avatarAndrey Ryabinin <a.ryabinin@samsung.com>
Reviewed-by: default avatarCyrill Gorcunov <gorcunov@openvz.org>
Acked-by: default avatarNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent ac8623fa
...@@ -1034,7 +1034,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ...@@ -1034,7 +1034,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
struct vm_area_struct *vma; struct vm_area_struct *vma;
struct pagemapread *pm = walk->private; struct pagemapread *pm = walk->private;
spinlock_t *ptl; spinlock_t *ptl;
pte_t *pte; pte_t *pte, *orig_pte;
int err = 0; int err = 0;
/* find the first VMA at or above 'addr' */ /* find the first VMA at or above 'addr' */
...@@ -1095,15 +1095,19 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ...@@ -1095,15 +1095,19 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
BUG_ON(is_vm_hugetlb_page(vma)); BUG_ON(is_vm_hugetlb_page(vma));
/* Addresses in the VMA. */ /* Addresses in the VMA. */
for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) { orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
for (; addr < min(end, vma->vm_end); pte++, addr += PAGE_SIZE) {
pagemap_entry_t pme; pagemap_entry_t pme;
pte = pte_offset_map(pmd, addr);
pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
pte_unmap(pte);
err = add_to_pagemap(addr, &pme, pm); err = add_to_pagemap(addr, &pme, pm);
if (err) if (err)
return err; break;
} }
pte_unmap_unlock(orig_pte, ptl);
if (err)
return err;
if (addr == end) if (addr == end)
break; break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment