Commit b20ce5e0 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds

mm: prepare page_referenced() and page_idle to new THP refcounting

Both page_referenced() and page_idle_clear_pte_refs_one() assume that
THP can only be mapped with PMD, so there's no reason to look on PTEs
for PageTransHuge() pages.  That's no true anymore: THP can be mapped
with PTEs too.

The patch removes PageTransHuge() test from the functions and opencode
page table check.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent e90309c9
...@@ -48,11 +48,6 @@ enum transparent_hugepage_flag { ...@@ -48,11 +48,6 @@ enum transparent_hugepage_flag {
#endif #endif
}; };
extern pmd_t *page_check_address_pmd(struct page *page,
struct mm_struct *mm,
unsigned long address,
spinlock_t **ptl);
#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
#define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER) #define HPAGE_PMD_NR (1<<HPAGE_PMD_ORDER)
......
...@@ -433,20 +433,25 @@ static inline void page_mapcount_reset(struct page *page) ...@@ -433,20 +433,25 @@ static inline void page_mapcount_reset(struct page *page)
atomic_set(&(page)->_mapcount, -1); atomic_set(&(page)->_mapcount, -1);
} }
int __page_mapcount(struct page *page);
static inline int page_mapcount(struct page *page) static inline int page_mapcount(struct page *page)
{ {
int ret;
VM_BUG_ON_PAGE(PageSlab(page), page); VM_BUG_ON_PAGE(PageSlab(page), page);
ret = atomic_read(&page->_mapcount) + 1; if (unlikely(PageCompound(page)))
if (PageCompound(page)) { return __page_mapcount(page);
page = compound_head(page); return atomic_read(&page->_mapcount) + 1;
ret += atomic_read(compound_mapcount_ptr(page)) + 1; }
if (PageDoubleMap(page))
ret--; #ifdef CONFIG_TRANSPARENT_HUGEPAGE
} int total_mapcount(struct page *page);
return ret; #else
static inline int total_mapcount(struct page *page)
{
return page_mapcount(page);
} }
#endif
static inline int page_count(struct page *page) static inline int page_count(struct page *page)
{ {
......
...@@ -1649,46 +1649,6 @@ bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma, ...@@ -1649,46 +1649,6 @@ bool __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
return false; return false;
} }
/*
* This function returns whether a given @page is mapped onto the @address
* in the virtual space of @mm.
*
* When it's true, this function returns *pmd with holding the page table lock
* and passing it back to the caller via @ptl.
* If it's false, returns NULL without holding the page table lock.
*/
pmd_t *page_check_address_pmd(struct page *page,
struct mm_struct *mm,
unsigned long address,
spinlock_t **ptl)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
if (address & ~HPAGE_PMD_MASK)
return NULL;
pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
return NULL;
pud = pud_offset(pgd, address);
if (!pud_present(*pud))
return NULL;
pmd = pmd_offset(pud, address);
*ptl = pmd_lock(mm, pmd);
if (!pmd_present(*pmd))
goto unlock;
if (pmd_page(*pmd) != page)
goto unlock;
if (pmd_trans_huge(*pmd))
return pmd;
unlock:
spin_unlock(*ptl);
return NULL;
}
#define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE) #define VM_NO_THP (VM_SPECIAL | VM_HUGETLB | VM_SHARED | VM_MAYSHARE)
int hugepage_madvise(struct vm_area_struct *vma, int hugepage_madvise(struct vm_area_struct *vma,
...@@ -3097,20 +3057,6 @@ static void unfreeze_page(struct anon_vma *anon_vma, struct page *page) ...@@ -3097,20 +3057,6 @@ static void unfreeze_page(struct anon_vma *anon_vma, struct page *page)
} }
} }
static int total_mapcount(struct page *page)
{
int i, ret;
ret = compound_mapcount(page);
for (i = 0; i < HPAGE_PMD_NR; i++)
ret += atomic_read(&page[i]._mapcount) + 1;
if (PageDoubleMap(page))
ret -= HPAGE_PMD_NR;
return ret;
}
static int __split_huge_page_tail(struct page *head, int tail, static int __split_huge_page_tail(struct page *head, int tail,
struct lruvec *lruvec, struct list_head *list) struct lruvec *lruvec, struct list_head *list)
{ {
...@@ -3211,6 +3157,25 @@ static void __split_huge_page(struct page *page, struct list_head *list) ...@@ -3211,6 +3157,25 @@ static void __split_huge_page(struct page *page, struct list_head *list)
} }
} }
int total_mapcount(struct page *page)
{
int i, ret;
VM_BUG_ON_PAGE(PageTail(page), page);
if (likely(!PageCompound(page)))
return atomic_read(&page->_mapcount) + 1;
ret = compound_mapcount(page);
if (PageHuge(page))
return ret;
for (i = 0; i < HPAGE_PMD_NR; i++)
ret += atomic_read(&page[i]._mapcount) + 1;
if (PageDoubleMap(page))
ret -= HPAGE_PMD_NR;
return ret;
}
/* /*
* This function splits huge page into normal pages. @page can point to any * This function splits huge page into normal pages. @page can point to any
* subpage of huge page to split. Split doesn't change the position of @page. * subpage of huge page to split. Split doesn't change the position of @page.
......
...@@ -56,23 +56,70 @@ static int page_idle_clear_pte_refs_one(struct page *page, ...@@ -56,23 +56,70 @@ static int page_idle_clear_pte_refs_one(struct page *page,
{ {
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
spinlock_t *ptl; spinlock_t *ptl;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
pte_t *pte; pte_t *pte;
bool referenced = false; bool referenced = false;
if (unlikely(PageTransHuge(page))) { pgd = pgd_offset(mm, addr);
pmd = page_check_address_pmd(page, mm, addr, &ptl); if (!pgd_present(*pgd))
if (pmd) { return SWAP_AGAIN;
referenced = pmdp_clear_young_notify(vma, addr, pmd); pud = pud_offset(pgd, addr);
if (!pud_present(*pud))
return SWAP_AGAIN;
pmd = pmd_offset(pud, addr);
if (pmd_trans_huge(*pmd)) {
ptl = pmd_lock(mm, pmd);
if (!pmd_present(*pmd))
goto unlock_pmd;
if (unlikely(!pmd_trans_huge(*pmd))) {
spin_unlock(ptl); spin_unlock(ptl);
goto map_pte;
} }
if (pmd_page(*pmd) != page)
goto unlock_pmd;
referenced = pmdp_clear_young_notify(vma, addr, pmd);
spin_unlock(ptl);
goto found;
unlock_pmd:
spin_unlock(ptl);
return SWAP_AGAIN;
} else { } else {
pte = page_check_address(page, mm, addr, &ptl, 0); pmd_t pmde = *pmd;
if (pte) {
referenced = ptep_clear_young_notify(vma, addr, pte); barrier();
pte_unmap_unlock(pte, ptl); if (!pmd_present(pmde) || pmd_trans_huge(pmde))
} return SWAP_AGAIN;
}
map_pte:
pte = pte_offset_map(pmd, addr);
if (!pte_present(*pte)) {
pte_unmap(pte);
return SWAP_AGAIN;
} }
ptl = pte_lockptr(mm, pmd);
spin_lock(ptl);
if (!pte_present(*pte)) {
pte_unmap_unlock(pte, ptl);
return SWAP_AGAIN;
}
/* THP can be referenced by any subpage */
if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
pte_unmap_unlock(pte, ptl);
return SWAP_AGAIN;
}
referenced = ptep_clear_young_notify(vma, addr, pte);
pte_unmap_unlock(pte, ptl);
found:
if (referenced) { if (referenced) {
clear_page_idle(page); clear_page_idle(page);
/* /*
......
...@@ -814,58 +814,105 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma, ...@@ -814,58 +814,105 @@ static int page_referenced_one(struct page *page, struct vm_area_struct *vma,
spinlock_t *ptl; spinlock_t *ptl;
int referenced = 0; int referenced = 0;
struct page_referenced_arg *pra = arg; struct page_referenced_arg *pra = arg;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
if (unlikely(PageTransHuge(page))) { if (unlikely(PageHuge(page))) {
pmd_t *pmd; /* when pud is not present, pte will be NULL */
pte = huge_pte_offset(mm, address);
/* if (!pte)
* rmap might return false positives; we must filter
* these out using page_check_address_pmd().
*/
pmd = page_check_address_pmd(page, mm, address, &ptl);
if (!pmd)
return SWAP_AGAIN; return SWAP_AGAIN;
if (vma->vm_flags & VM_LOCKED) { ptl = huge_pte_lockptr(page_hstate(page), mm, pte);
goto check_pte;
}
pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
return SWAP_AGAIN;
pud = pud_offset(pgd, address);
if (!pud_present(*pud))
return SWAP_AGAIN;
pmd = pmd_offset(pud, address);
if (pmd_trans_huge(*pmd)) {
int ret = SWAP_AGAIN;
ptl = pmd_lock(mm, pmd);
if (!pmd_present(*pmd))
goto unlock_pmd;
if (unlikely(!pmd_trans_huge(*pmd))) {
spin_unlock(ptl); spin_unlock(ptl);
goto map_pte;
}
if (pmd_page(*pmd) != page)
goto unlock_pmd;
if (vma->vm_flags & VM_LOCKED) {
pra->vm_flags |= VM_LOCKED; pra->vm_flags |= VM_LOCKED;
return SWAP_FAIL; /* To break the loop */ ret = SWAP_FAIL; /* To break the loop */
goto unlock_pmd;
} }
if (pmdp_clear_flush_young_notify(vma, address, pmd)) if (pmdp_clear_flush_young_notify(vma, address, pmd))
referenced++; referenced++;
spin_unlock(ptl); spin_unlock(ptl);
goto found;
unlock_pmd:
spin_unlock(ptl);
return ret;
} else { } else {
pte_t *pte; pmd_t pmde = *pmd;
/* barrier();
* rmap might return false positives; we must filter if (!pmd_present(pmde) || pmd_trans_huge(pmde))
* these out using page_check_address().
*/
pte = page_check_address(page, mm, address, &ptl, 0);
if (!pte)
return SWAP_AGAIN; return SWAP_AGAIN;
}
map_pte:
pte = pte_offset_map(pmd, address);
if (!pte_present(*pte)) {
pte_unmap(pte);
return SWAP_AGAIN;
}
if (vma->vm_flags & VM_LOCKED) { ptl = pte_lockptr(mm, pmd);
pte_unmap_unlock(pte, ptl); check_pte:
pra->vm_flags |= VM_LOCKED; spin_lock(ptl);
return SWAP_FAIL; /* To break the loop */
}
if (ptep_clear_flush_young_notify(vma, address, pte)) { if (!pte_present(*pte)) {
/* pte_unmap_unlock(pte, ptl);
* Don't treat a reference through a sequentially read return SWAP_AGAIN;
* mapping as such. If the page has been used in }
* another mapping, we will catch it; if this other
* mapping is already gone, the unmap path will have /* THP can be referenced by any subpage */
* set PG_referenced or activated the page. if (pte_pfn(*pte) - page_to_pfn(page) >= hpage_nr_pages(page)) {
*/ pte_unmap_unlock(pte, ptl);
if (likely(!(vma->vm_flags & VM_SEQ_READ))) return SWAP_AGAIN;
referenced++; }
}
if (vma->vm_flags & VM_LOCKED) {
pte_unmap_unlock(pte, ptl); pte_unmap_unlock(pte, ptl);
pra->vm_flags |= VM_LOCKED;
return SWAP_FAIL; /* To break the loop */
} }
if (ptep_clear_flush_young_notify(vma, address, pte)) {
/*
* Don't treat a reference through a sequentially read
* mapping as such. If the page has been used in
* another mapping, we will catch it; if this other
* mapping is already gone, the unmap path will have
* set PG_referenced or activated the page.
*/
if (likely(!(vma->vm_flags & VM_SEQ_READ)))
referenced++;
}
pte_unmap_unlock(pte, ptl);
found:
if (referenced) if (referenced)
clear_page_idle(page); clear_page_idle(page);
if (test_and_clear_page_young(page)) if (test_and_clear_page_young(page))
...@@ -912,7 +959,7 @@ int page_referenced(struct page *page, ...@@ -912,7 +959,7 @@ int page_referenced(struct page *page,
int ret; int ret;
int we_locked = 0; int we_locked = 0;
struct page_referenced_arg pra = { struct page_referenced_arg pra = {
.mapcount = page_mapcount(page), .mapcount = total_mapcount(page),
.memcg = memcg, .memcg = memcg,
}; };
struct rmap_walk_control rwc = { struct rmap_walk_control rwc = {
......
...@@ -407,6 +407,20 @@ struct address_space *page_mapping(struct page *page) ...@@ -407,6 +407,20 @@ struct address_space *page_mapping(struct page *page)
return mapping; return mapping;
} }
/* Slow path of page_mapcount() for compound pages */
int __page_mapcount(struct page *page)
{
int ret;
ret = atomic_read(&page->_mapcount) + 1;
page = compound_head(page);
ret += atomic_read(compound_mapcount_ptr(page)) + 1;
if (PageDoubleMap(page))
ret--;
return ret;
}
EXPORT_SYMBOL_GPL(__page_mapcount);
int overcommit_ratio_handler(struct ctl_table *table, int write, int overcommit_ratio_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, void __user *buffer, size_t *lenp,
loff_t *ppos) loff_t *ppos)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment