Commit 7267ec00 authored by Kirill A. Shutemov's avatar Kirill A. Shutemov Committed by Linus Torvalds

mm: postpone page table allocation until we have page to map

The idea (and most of code) is borrowed again: from Hugh's patchset on
huge tmpfs[1].

Instead of allocation pte page table upfront, we postpone this until we
have page to map in hands.  This approach opens possibility to map the
page as huge if filesystem supports this.

Comparing to Hugh's patch I've pushed page table allocation a bit
further: into do_set_pte().  This way we can postpone allocation even in
faultaround case without moving do_fault_around() after __do_fault().

do_set_pte() got renamed to alloc_set_pte() as it can allocate page
table if required.

[1] http://lkml.kernel.org/r/alpine.LSU.2.11.1502202015090.14414@eggly.anvils

Link: http://lkml.kernel.org/r/1466021202-61880-10-git-send-email-kirill.shutemov@linux.intel.comSigned-off-by: default avatarKirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent bae473a4
...@@ -330,6 +330,13 @@ struct fault_env { ...@@ -330,6 +330,13 @@ struct fault_env {
* Protects pte page table if 'pte' * Protects pte page table if 'pte'
* is not NULL, otherwise pmd. * is not NULL, otherwise pmd.
*/ */
pgtable_t prealloc_pte; /* Pre-allocated pte page table.
* vm_ops->map_pages() calls
* alloc_set_pte() from atomic context.
* do_fault_around() pre-allocates
* page table to avoid allocation from
* atomic context.
*/
}; };
/* /*
...@@ -618,7 +625,8 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) ...@@ -618,7 +625,8 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
return pte; return pte;
} }
void do_set_pte(struct fault_env *fe, struct page *page); int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
struct page *page);
#endif #endif
/* /*
......
...@@ -2144,11 +2144,6 @@ void filemap_map_pages(struct fault_env *fe, ...@@ -2144,11 +2144,6 @@ void filemap_map_pages(struct fault_env *fe,
start_pgoff) { start_pgoff) {
if (iter.index > end_pgoff) if (iter.index > end_pgoff)
break; break;
fe->pte += iter.index - last_pgoff;
fe->address += (iter.index - last_pgoff) << PAGE_SHIFT;
last_pgoff = iter.index;
if (!pte_none(*fe->pte))
goto next;
repeat: repeat:
page = radix_tree_deref_slot(slot); page = radix_tree_deref_slot(slot);
if (unlikely(!page)) if (unlikely(!page))
...@@ -2186,7 +2181,13 @@ void filemap_map_pages(struct fault_env *fe, ...@@ -2186,7 +2181,13 @@ void filemap_map_pages(struct fault_env *fe,
if (file->f_ra.mmap_miss > 0) if (file->f_ra.mmap_miss > 0)
file->f_ra.mmap_miss--; file->f_ra.mmap_miss--;
do_set_pte(fe, page);
fe->address += (iter.index - last_pgoff) << PAGE_SHIFT;
if (fe->pte)
fe->pte += iter.index - last_pgoff;
last_pgoff = iter.index;
if (alloc_set_pte(fe, NULL, page))
goto unlock;
unlock_page(page); unlock_page(page);
goto next; goto next;
unlock: unlock:
...@@ -2194,6 +2195,9 @@ void filemap_map_pages(struct fault_env *fe, ...@@ -2194,6 +2195,9 @@ void filemap_map_pages(struct fault_env *fe,
skip: skip:
put_page(page); put_page(page);
next: next:
/* Huge page is mapped? No need to proceed. */
if (pmd_trans_huge(*fe->pmd))
break;
if (iter.index == end_pgoff) if (iter.index == end_pgoff)
break; break;
} }
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment