Commit be97a41b authored by Vlastimil Babka's avatar Vlastimil Babka Committed by Linus Torvalds

mm/mempolicy.c: merge alloc_hugepage_vma to alloc_pages_vma

The previous commit ("mm/thp: Allocate transparent hugepages on local
node") introduced alloc_hugepage_vma() to mm/mempolicy.c to perform a
special policy for THP allocations.  The function has the same interface
as alloc_pages_vma(), shares a lot of boilerplate code and a long
comment.

This patch merges the hugepage special case into alloc_pages_vma.  The
extra if condition should be cheap enough price to pay.  We also prevent
a (however unlikely) race with parallel mems_allowed update, which could
make hugepage allocation restart only within the fallback call to
alloc_hugepage_vma() and not reconsider the special rule in
alloc_hugepage_vma().

Also by making sure mpol_cond_put(pol) is always called before actual
allocation attempt, we can use a single exit path within the function.

Also update the comment for missing node parameter and obsolete reference
to mm_sem.
Signed-off-by: default avatarVlastimil Babka <vbabka@suse.cz>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 077fcf11
...@@ -334,22 +334,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order) ...@@ -334,22 +334,22 @@ alloc_pages(gfp_t gfp_mask, unsigned int order)
} }
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
struct vm_area_struct *vma, unsigned long addr, struct vm_area_struct *vma, unsigned long addr,
int node); int node, bool hugepage);
extern struct page *alloc_hugepage_vma(gfp_t gfp, struct vm_area_struct *vma, #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
unsigned long addr, int order); alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true)
#else #else
#define alloc_pages(gfp_mask, order) \ #define alloc_pages(gfp_mask, order) \
alloc_pages_node(numa_node_id(), gfp_mask, order) alloc_pages_node(numa_node_id(), gfp_mask, order)
#define alloc_pages_vma(gfp_mask, order, vma, addr, node) \ #define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\
alloc_pages(gfp_mask, order) alloc_pages(gfp_mask, order)
#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
alloc_pages(gfp_mask, order) alloc_pages(gfp_mask, order)
#endif #endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
#define alloc_page_vma(gfp_mask, vma, addr) \ #define alloc_page_vma(gfp_mask, vma, addr) \
alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id()) alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false)
#define alloc_page_vma_node(gfp_mask, vma, addr, node) \ #define alloc_page_vma_node(gfp_mask, vma, addr, node) \
alloc_pages_vma(gfp_mask, 0, vma, addr, node) alloc_pages_vma(gfp_mask, 0, vma, addr, node, false)
extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order); extern struct page *alloc_kmem_pages(gfp_t gfp_mask, unsigned int order);
extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask, extern struct page *alloc_kmem_pages_node(int nid, gfp_t gfp_mask,
......
...@@ -1988,119 +1988,67 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, ...@@ -1988,119 +1988,67 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
* @order:Order of the GFP allocation. * @order:Order of the GFP allocation.
* @vma: Pointer to VMA or NULL if not available. * @vma: Pointer to VMA or NULL if not available.
* @addr: Virtual Address of the allocation. Must be inside the VMA. * @addr: Virtual Address of the allocation. Must be inside the VMA.
* @node: Which node to prefer for allocation (modulo policy).
* @hugepage: for hugepages try only the preferred node if possible
* *
* This function allocates a page from the kernel page pool and applies * This function allocates a page from the kernel page pool and applies
* a NUMA policy associated with the VMA or the current process. * a NUMA policy associated with the VMA or the current process.
* When VMA is not NULL caller must hold down_read on the mmap_sem of the * When VMA is not NULL caller must hold down_read on the mmap_sem of the
* mm_struct of the VMA to prevent it from going away. Should be used for * mm_struct of the VMA to prevent it from going away. Should be used for
* all allocations for pages that will be mapped into * all allocations for pages that will be mapped into user space. Returns
* user space. Returns NULL when no page can be allocated. * NULL when no page can be allocated.
*
* Should be called with the mm_sem of the vma hold.
*/ */
struct page * struct page *
alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
unsigned long addr, int node) unsigned long addr, int node, bool hugepage)
{ {
struct mempolicy *pol; struct mempolicy *pol;
struct page *page; struct page *page;
unsigned int cpuset_mems_cookie; unsigned int cpuset_mems_cookie;
struct zonelist *zl;
nodemask_t *nmask;
retry_cpuset: retry_cpuset:
pol = get_vma_policy(vma, addr); pol = get_vma_policy(vma, addr);
cpuset_mems_cookie = read_mems_allowed_begin(); cpuset_mems_cookie = read_mems_allowed_begin();
if (unlikely(pol->mode == MPOL_INTERLEAVE)) { if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage &&
pol->mode != MPOL_INTERLEAVE)) {
/*
* For hugepage allocation and non-interleave policy which
* allows the current node, we only try to allocate from the
* current node and don't fall back to other nodes, as the
* cost of remote accesses would likely offset THP benefits.
*
* If the policy is interleave, or does not allow the current
* node in its nodemask, we allocate the standard way.
*/
nmask = policy_nodemask(gfp, pol);
if (!nmask || node_isset(node, *nmask)) {
mpol_cond_put(pol);
page = alloc_pages_exact_node(node, gfp, order);
goto out;
}
}
if (pol->mode == MPOL_INTERLEAVE) {
unsigned nid; unsigned nid;
nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
mpol_cond_put(pol); mpol_cond_put(pol);
page = alloc_page_interleave(gfp, order, nid); page = alloc_page_interleave(gfp, order, nid);
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) goto out;
goto retry_cpuset;
return page;
} }
page = __alloc_pages_nodemask(gfp, order,
policy_zonelist(gfp, pol, node),
policy_nodemask(gfp, pol));
mpol_cond_put(pol);
if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
goto retry_cpuset;
return page;
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/**
* alloc_hugepage_vma: Allocate a hugepage for a VMA
* @gfp:
* %GFP_USER user allocation.
* %GFP_KERNEL kernel allocations,
* %GFP_HIGHMEM highmem/user allocations,
* %GFP_FS allocation should not call back into a file system.
* %GFP_ATOMIC don't sleep.
*
* @vma: Pointer to VMA or NULL if not available.
* @addr: Virtual Address of the allocation. Must be inside the VMA.
* @order: Order of the hugepage for gfp allocation.
*
* This functions allocate a huge page from the kernel page pool and applies
* a NUMA policy associated with the VMA or the current process.
* For policy other than %MPOL_INTERLEAVE, we make sure we allocate hugepage
* only from the current node if the current node is part of the node mask.
* If we can't allocate a hugepage we fail the allocation and don' try to fallback
* to other nodes in the node mask. If the current node is not part of node mask
* or if the NUMA policy is MPOL_INTERLEAVE we use the allocator that can
* fallback to nodes in the policy node mask.
*
* When VMA is not NULL caller must hold down_read on the mmap_sem of the
* mm_struct of the VMA to prevent it from going away. Should be used for
* all allocations for pages that will be mapped into
* user space. Returns NULL when no page can be allocated.
*
* Should be called with vma->vm_mm->mmap_sem held.
*
*/
struct page *alloc_hugepage_vma(gfp_t gfp, struct vm_area_struct *vma,
unsigned long addr, int order)
{
struct page *page;
nodemask_t *nmask;
struct mempolicy *pol;
int node = numa_node_id();
unsigned int cpuset_mems_cookie;
retry_cpuset:
pol = get_vma_policy(vma, addr);
cpuset_mems_cookie = read_mems_allowed_begin();
/*
* For interleave policy, we don't worry about
* current node. Otherwise if current node is
* in nodemask, try to allocate hugepage from
* the current node. Don't fall back to other nodes
* for THP.
*/
if (unlikely(pol->mode == MPOL_INTERLEAVE))
goto alloc_with_fallback;
nmask = policy_nodemask(gfp, pol); nmask = policy_nodemask(gfp, pol);
if (!nmask || node_isset(node, *nmask)) { zl = policy_zonelist(gfp, pol, node);
mpol_cond_put(pol); mpol_cond_put(pol);
page = alloc_pages_exact_node(node, gfp, order); page = __alloc_pages_nodemask(gfp, order, zl, nmask);
if (unlikely(!page && out:
read_mems_allowed_retry(cpuset_mems_cookie))) if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie)))
goto retry_cpuset; goto retry_cpuset;
return page; return page;
}
alloc_with_fallback:
mpol_cond_put(pol);
/*
* if current node is not part of node mask, try
* the allocation from any node, and we can do retry
* in that case.
*/
return alloc_pages_vma(gfp, order, vma, addr, node);
} }
#endif
/** /**
* alloc_pages_current - Allocate pages. * alloc_pages_current - Allocate pages.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment