Commit 091a1eaa authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman

Merge branch 'akpm'

* akpm:
  mm: madvise(MADV_DODUMP): allow hugetlbfs pages
  ocfs2: fix locking for res->tracking and dlm->tracking_list
  mm/vmscan.c: fix int overflow in callers of do_shrink_slab()
  mm/vmstat.c: skip NR_TLB_REMOTE_FLUSH* properly
  mm/vmstat.c: fix outdated vmstat_text
  proc: restrict kernel stack dumps to root
  mm/hugetlb: add mmap() encodings for 32MB and 512MB page sizes
  mm/migrate.c: split only transparent huge pages when allocation fails
  ipc/shm.c: use ERR_CAST() for shm_lock() error return
  mm/gup_benchmark: fix unsigned comparison to zero in __gup_benchmark_ioctl
  mm, thp: fix mlocking THP page with migration enabled
  ocfs2: fix crash in ocfs2_duplicate_clusters_by_page()
  hugetlb: take PMD sharing into account when flushing tlb/caches
  mm: migration: fix migration of huge PMD shared pages
parents 5943a9bb d41aa525
...@@ -584,9 +584,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, ...@@ -584,9 +584,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
res->last_used = 0; res->last_used = 0;
spin_lock(&dlm->spinlock); spin_lock(&dlm->track_lock);
list_add_tail(&res->tracking, &dlm->tracking_list); list_add_tail(&res->tracking, &dlm->tracking_list);
spin_unlock(&dlm->spinlock); spin_unlock(&dlm->track_lock);
memset(res->lvb, 0, DLM_LVB_LEN); memset(res->lvb, 0, DLM_LVB_LEN);
memset(res->refmap, 0, sizeof(res->refmap)); memset(res->refmap, 0, sizeof(res->refmap));
......
...@@ -2946,6 +2946,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, ...@@ -2946,6 +2946,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
if (map_end & (PAGE_SIZE - 1)) if (map_end & (PAGE_SIZE - 1))
to = map_end & (PAGE_SIZE - 1); to = map_end & (PAGE_SIZE - 1);
retry:
page = find_or_create_page(mapping, page_index, GFP_NOFS); page = find_or_create_page(mapping, page_index, GFP_NOFS);
if (!page) { if (!page) {
ret = -ENOMEM; ret = -ENOMEM;
...@@ -2954,11 +2955,18 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, ...@@ -2954,11 +2955,18 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
} }
/* /*
* In case PAGE_SIZE <= CLUSTER_SIZE, This page * In case PAGE_SIZE <= CLUSTER_SIZE, we do not expect a dirty
* can't be dirtied before we CoW it out. * page, so write it back.
*/ */
if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) {
BUG_ON(PageDirty(page)); if (PageDirty(page)) {
/*
* write_on_page will unlock the page on return
*/
ret = write_one_page(page);
goto retry;
}
}
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
ret = block_read_full_page(page, ocfs2_get_block); ret = block_read_full_page(page, ocfs2_get_block);
......
...@@ -407,6 +407,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, ...@@ -407,6 +407,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
unsigned long *entries; unsigned long *entries;
int err; int err;
/*
* The ability to racily run the kernel stack unwinder on a running task
* and then observe the unwinder output is scary; while it is useful for
* debugging kernel issues, it can also allow an attacker to leak kernel
* stack contents.
* Doing this in a manner that is at least safe from races would require
* some work to ensure that the remote task can not be scheduled; and
* even then, this would still expose the unwinder as local attack
* surface.
* Therefore, this interface is restricted to root.
*/
if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
return -EACCES;
entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries), entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries),
GFP_KERNEL); GFP_KERNEL);
if (!entries) if (!entries)
......
...@@ -140,6 +140,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, ...@@ -140,6 +140,8 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
pte_t *huge_pte_offset(struct mm_struct *mm, pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz); unsigned long addr, unsigned long sz);
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep); int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep);
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end);
struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
int write); int write);
struct page *follow_huge_pd(struct vm_area_struct *vma, struct page *follow_huge_pd(struct vm_area_struct *vma,
...@@ -170,6 +172,18 @@ static inline unsigned long hugetlb_total_pages(void) ...@@ -170,6 +172,18 @@ static inline unsigned long hugetlb_total_pages(void)
return 0; return 0;
} }
static inline int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr,
pte_t *ptep)
{
return 0;
}
static inline void adjust_range_if_pmd_sharing_possible(
struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
{
}
#define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n) ({ BUG(); 0; }) #define follow_hugetlb_page(m,v,p,vs,a,b,i,w,n) ({ BUG(); 0; })
#define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL) #define follow_huge_addr(mm, addr, write) ERR_PTR(-EINVAL)
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
......
...@@ -2455,6 +2455,12 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm, ...@@ -2455,6 +2455,12 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
return vma; return vma;
} }
static inline bool range_in_vma(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
return (vma && vma->vm_start <= start && end <= vma->vm_end);
}
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
pgprot_t vm_get_page_prot(unsigned long vm_flags); pgprot_t vm_get_page_prot(unsigned long vm_flags);
void vma_set_page_prot(struct vm_area_struct *vma); void vma_set_page_prot(struct vm_area_struct *vma);
......
...@@ -26,7 +26,9 @@ ...@@ -26,7 +26,9 @@
#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT) #define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT) #define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT) #define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT) #define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT) #define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT) #define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT)
#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT) #define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT)
......
...@@ -25,7 +25,9 @@ ...@@ -25,7 +25,9 @@
#define MFD_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB #define MFD_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
#define MFD_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB #define MFD_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
#define MFD_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB #define MFD_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
#define MFD_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
#define MFD_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB #define MFD_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
#define MFD_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
#define MFD_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB #define MFD_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
#define MFD_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB #define MFD_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
#define MFD_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB #define MFD_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
......
...@@ -28,7 +28,9 @@ ...@@ -28,7 +28,9 @@
#define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB #define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
#define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB #define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
#define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB #define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
#define MAP_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB #define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
#define MAP_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
#define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB #define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB #define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB #define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
......
...@@ -65,7 +65,9 @@ struct shmid_ds { ...@@ -65,7 +65,9 @@ struct shmid_ds {
#define SHM_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB #define SHM_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
#define SHM_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB #define SHM_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
#define SHM_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB #define SHM_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
#define SHM_HUGE_32MB HUGETLB_FLAG_ENCODE_32MB
#define SHM_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB #define SHM_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
#define SHM_HUGE_512MB HUGETLB_FLAG_ENCODE_512MB
#define SHM_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB #define SHM_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
#define SHM_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB #define SHM_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
#define SHM_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB #define SHM_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
......
...@@ -206,7 +206,7 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id) ...@@ -206,7 +206,7 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
* Callers of shm_lock() must validate the status of the returned ipc * Callers of shm_lock() must validate the status of the returned ipc
* object pointer and error out as appropriate. * object pointer and error out as appropriate.
*/ */
return (void *)ipcp; return ERR_CAST(ipcp);
} }
static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
......
...@@ -19,7 +19,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd, ...@@ -19,7 +19,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
struct gup_benchmark *gup) struct gup_benchmark *gup)
{ {
ktime_t start_time, end_time; ktime_t start_time, end_time;
unsigned long i, nr, nr_pages, addr, next; unsigned long i, nr_pages, addr, next;
int nr;
struct page **pages; struct page **pages;
nr_pages = gup->size / PAGE_SIZE; nr_pages = gup->size / PAGE_SIZE;
......
...@@ -2931,7 +2931,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) ...@@ -2931,7 +2931,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
else else
page_add_file_rmap(new, true); page_add_file_rmap(new, true);
set_pmd_at(mm, mmun_start, pvmw->pmd, pmde); set_pmd_at(mm, mmun_start, pvmw->pmd, pmde);
if (vma->vm_flags & VM_LOCKED) if ((vma->vm_flags & VM_LOCKED) && !PageDoubleMap(new))
mlock_vma_page(new); mlock_vma_page(new);
update_mmu_cache_pmd(vma, address, pvmw->pmd); update_mmu_cache_pmd(vma, address, pvmw->pmd);
} }
......
...@@ -3326,8 +3326,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -3326,8 +3326,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
struct page *page; struct page *page;
struct hstate *h = hstate_vma(vma); struct hstate *h = hstate_vma(vma);
unsigned long sz = huge_page_size(h); unsigned long sz = huge_page_size(h);
const unsigned long mmun_start = start; /* For mmu_notifiers */ unsigned long mmun_start = start; /* For mmu_notifiers */
const unsigned long mmun_end = end; /* For mmu_notifiers */ unsigned long mmun_end = end; /* For mmu_notifiers */
WARN_ON(!is_vm_hugetlb_page(vma)); WARN_ON(!is_vm_hugetlb_page(vma));
BUG_ON(start & ~huge_page_mask(h)); BUG_ON(start & ~huge_page_mask(h));
...@@ -3339,6 +3339,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -3339,6 +3339,11 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
*/ */
tlb_remove_check_page_size_change(tlb, sz); tlb_remove_check_page_size_change(tlb, sz);
tlb_start_vma(tlb, vma); tlb_start_vma(tlb, vma);
/*
* If sharing possible, alert mmu notifiers of worst case.
*/
adjust_range_if_pmd_sharing_possible(vma, &mmun_start, &mmun_end);
mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
address = start; address = start;
for (; address < end; address += sz) { for (; address < end; address += sz) {
...@@ -3349,6 +3354,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -3349,6 +3354,10 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
ptl = huge_pte_lock(h, mm, ptep); ptl = huge_pte_lock(h, mm, ptep);
if (huge_pmd_unshare(mm, &address, ptep)) { if (huge_pmd_unshare(mm, &address, ptep)) {
spin_unlock(ptl); spin_unlock(ptl);
/*
* We just unmapped a page of PMDs by clearing a PUD.
* The caller's TLB flush range should cover this area.
*/
continue; continue;
} }
...@@ -3431,12 +3440,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, ...@@ -3431,12 +3440,23 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
{ {
struct mm_struct *mm; struct mm_struct *mm;
struct mmu_gather tlb; struct mmu_gather tlb;
unsigned long tlb_start = start;
unsigned long tlb_end = end;
/*
* If shared PMDs were possibly used within this vma range, adjust
* start/end for worst case tlb flushing.
* Note that we can not be sure if PMDs are shared until we try to
* unmap pages. However, we want to make sure TLB flushing covers
* the largest possible range.
*/
adjust_range_if_pmd_sharing_possible(vma, &tlb_start, &tlb_end);
mm = vma->vm_mm; mm = vma->vm_mm;
tlb_gather_mmu(&tlb, mm, start, end); tlb_gather_mmu(&tlb, mm, tlb_start, tlb_end);
__unmap_hugepage_range(&tlb, vma, start, end, ref_page); __unmap_hugepage_range(&tlb, vma, start, end, ref_page);
tlb_finish_mmu(&tlb, start, end); tlb_finish_mmu(&tlb, tlb_start, tlb_end);
} }
/* /*
...@@ -4298,11 +4318,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, ...@@ -4298,11 +4318,21 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
pte_t pte; pte_t pte;
struct hstate *h = hstate_vma(vma); struct hstate *h = hstate_vma(vma);
unsigned long pages = 0; unsigned long pages = 0;
unsigned long f_start = start;
unsigned long f_end = end;
bool shared_pmd = false;
/*
* In the case of shared PMDs, the area to flush could be beyond
* start/end. Set f_start/f_end to cover the maximum possible
* range if PMD sharing is possible.
*/
adjust_range_if_pmd_sharing_possible(vma, &f_start, &f_end);
BUG_ON(address >= end); BUG_ON(address >= end);
flush_cache_range(vma, address, end); flush_cache_range(vma, f_start, f_end);
mmu_notifier_invalidate_range_start(mm, start, end); mmu_notifier_invalidate_range_start(mm, f_start, f_end);
i_mmap_lock_write(vma->vm_file->f_mapping); i_mmap_lock_write(vma->vm_file->f_mapping);
for (; address < end; address += huge_page_size(h)) { for (; address < end; address += huge_page_size(h)) {
spinlock_t *ptl; spinlock_t *ptl;
...@@ -4313,6 +4343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, ...@@ -4313,6 +4343,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
if (huge_pmd_unshare(mm, &address, ptep)) { if (huge_pmd_unshare(mm, &address, ptep)) {
pages++; pages++;
spin_unlock(ptl); spin_unlock(ptl);
shared_pmd = true;
continue; continue;
} }
pte = huge_ptep_get(ptep); pte = huge_ptep_get(ptep);
...@@ -4348,8 +4379,12 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, ...@@ -4348,8 +4379,12 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
* Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
* may have cleared our pud entry and done put_page on the page table: * may have cleared our pud entry and done put_page on the page table:
* once we release i_mmap_rwsem, another task can do the final put_page * once we release i_mmap_rwsem, another task can do the final put_page
* and that page table be reused and filled with junk. * and that page table be reused and filled with junk. If we actually
* did unshare a page of pmds, flush the range corresponding to the pud.
*/ */
if (shared_pmd)
flush_hugetlb_tlb_range(vma, f_start, f_end);
else
flush_hugetlb_tlb_range(vma, start, end); flush_hugetlb_tlb_range(vma, start, end);
/* /*
* No need to call mmu_notifier_invalidate_range() we are downgrading * No need to call mmu_notifier_invalidate_range() we are downgrading
...@@ -4358,7 +4393,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, ...@@ -4358,7 +4393,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
* See Documentation/vm/mmu_notifier.rst * See Documentation/vm/mmu_notifier.rst
*/ */
i_mmap_unlock_write(vma->vm_file->f_mapping); i_mmap_unlock_write(vma->vm_file->f_mapping);
mmu_notifier_invalidate_range_end(mm, start, end); mmu_notifier_invalidate_range_end(mm, f_start, f_end);
return pages << h->order; return pages << h->order;
} }
...@@ -4545,12 +4580,40 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr) ...@@ -4545,12 +4580,40 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
/* /*
* check on proper vm_flags and page table alignment * check on proper vm_flags and page table alignment
*/ */
if (vma->vm_flags & VM_MAYSHARE && if (vma->vm_flags & VM_MAYSHARE && range_in_vma(vma, base, end))
vma->vm_start <= base && end <= vma->vm_end)
return true; return true;
return false; return false;
} }
/*
* Determine if start,end range within vma could be mapped by shared pmd.
* If yes, adjust start and end to cover range associated with possible
* shared pmd mappings.
*/
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
{
unsigned long check_addr = *start;
if (!(vma->vm_flags & VM_MAYSHARE))
return;
for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) {
unsigned long a_start = check_addr & PUD_MASK;
unsigned long a_end = a_start + PUD_SIZE;
/*
* If sharing is possible, adjust start/end if necessary.
*/
if (range_in_vma(vma, a_start, a_end)) {
if (a_start < *start)
*start = a_start;
if (a_end > *end)
*end = a_end;
}
}
}
/* /*
* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc() * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
* and returns the corresponding pte. While this is not necessary for the * and returns the corresponding pte. While this is not necessary for the
...@@ -4648,6 +4711,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) ...@@ -4648,6 +4711,11 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
{ {
return 0; return 0;
} }
void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
unsigned long *start, unsigned long *end)
{
}
#define want_pmd_share() (0) #define want_pmd_share() (0)
#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ #endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
......
...@@ -96,7 +96,7 @@ static long madvise_behavior(struct vm_area_struct *vma, ...@@ -96,7 +96,7 @@ static long madvise_behavior(struct vm_area_struct *vma,
new_flags |= VM_DONTDUMP; new_flags |= VM_DONTDUMP;
break; break;
case MADV_DODUMP: case MADV_DODUMP:
if (new_flags & VM_SPECIAL) { if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) {
error = -EINVAL; error = -EINVAL;
goto out; goto out;
} }
......
...@@ -275,6 +275,9 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, ...@@ -275,6 +275,9 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new)) if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
mlock_vma_page(new); mlock_vma_page(new);
if (PageTransHuge(page) && PageMlocked(page))
clear_page_mlock(page);
/* No need to invalidate - it was non-present before */ /* No need to invalidate - it was non-present before */
update_mmu_cache(vma, pvmw.address, pvmw.pte); update_mmu_cache(vma, pvmw.address, pvmw.pte);
} }
...@@ -1411,7 +1414,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page, ...@@ -1411,7 +1414,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
* we encounter them after the rest of the list * we encounter them after the rest of the list
* is processed. * is processed.
*/ */
if (PageTransHuge(page)) { if (PageTransHuge(page) && !PageHuge(page)) {
lock_page(page); lock_page(page);
rc = split_huge_page_to_list(page, from); rc = split_huge_page_to_list(page, from);
unlock_page(page); unlock_page(page);
......
...@@ -1362,11 +1362,21 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, ...@@ -1362,11 +1362,21 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
} }
/* /*
* We have to assume the worse case ie pmd for invalidation. Note that * For THP, we have to assume the worse case ie pmd for invalidation.
* the page can not be free in this function as call of try_to_unmap() * For hugetlb, it could be much worse if we need to do pud
* must hold a reference on the page. * invalidation in the case of pmd sharing.
*
* Note that the page can not be free in this function as call of
* try_to_unmap() must hold a reference on the page.
*/ */
end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page))); end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page)));
if (PageHuge(page)) {
/*
* If sharing is possible, start and end will be adjusted
* accordingly.
*/
adjust_range_if_pmd_sharing_possible(vma, &start, &end);
}
mmu_notifier_invalidate_range_start(vma->vm_mm, start, end); mmu_notifier_invalidate_range_start(vma->vm_mm, start, end);
while (page_vma_mapped_walk(&pvmw)) { while (page_vma_mapped_walk(&pvmw)) {
...@@ -1409,6 +1419,32 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, ...@@ -1409,6 +1419,32 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte);
address = pvmw.address; address = pvmw.address;
if (PageHuge(page)) {
if (huge_pmd_unshare(mm, &address, pvmw.pte)) {
/*
* huge_pmd_unshare unmapped an entire PMD
* page. There is no way of knowing exactly
* which PMDs may be cached for this mm, so
* we must flush them all. start/end were
* already adjusted above to cover this range.
*/
flush_cache_range(vma, start, end);
flush_tlb_range(vma, start, end);
mmu_notifier_invalidate_range(mm, start, end);
/*
* The ref count of the PMD page was dropped
* which is part of the way map counting
* is done for shared PMDs. Return 'true'
* here. When there is no other sharing,
* huge_pmd_unshare returns false and we will
* unmap the actual page and drop map count
* to zero.
*/
page_vma_mapped_walk_done(&pvmw);
break;
}
}
if (IS_ENABLED(CONFIG_MIGRATION) && if (IS_ENABLED(CONFIG_MIGRATION) &&
(flags & TTU_MIGRATION) && (flags & TTU_MIGRATION) &&
......
...@@ -580,8 +580,8 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, ...@@ -580,8 +580,8 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
struct mem_cgroup *memcg, int priority) struct mem_cgroup *memcg, int priority)
{ {
struct memcg_shrinker_map *map; struct memcg_shrinker_map *map;
unsigned long freed = 0; unsigned long ret, freed = 0;
int ret, i; int i;
if (!memcg_kmem_enabled() || !mem_cgroup_online(memcg)) if (!memcg_kmem_enabled() || !mem_cgroup_online(memcg))
return 0; return 0;
...@@ -677,9 +677,8 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid, ...@@ -677,9 +677,8 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
struct mem_cgroup *memcg, struct mem_cgroup *memcg,
int priority) int priority)
{ {
unsigned long ret, freed = 0;
struct shrinker *shrinker; struct shrinker *shrinker;
unsigned long freed = 0;
int ret;
if (!mem_cgroup_is_root(memcg)) if (!mem_cgroup_is_root(memcg))
return shrink_slab_memcg(gfp_mask, nid, memcg, priority); return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
......
...@@ -1275,6 +1275,9 @@ const char * const vmstat_text[] = { ...@@ -1275,6 +1275,9 @@ const char * const vmstat_text[] = {
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
"nr_tlb_remote_flush", "nr_tlb_remote_flush",
"nr_tlb_remote_flush_received", "nr_tlb_remote_flush_received",
#else
"", /* nr_tlb_remote_flush */
"", /* nr_tlb_remote_flush_received */
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
"nr_tlb_local_flush_all", "nr_tlb_local_flush_all",
"nr_tlb_local_flush_one", "nr_tlb_local_flush_one",
...@@ -1283,7 +1286,6 @@ const char * const vmstat_text[] = { ...@@ -1283,7 +1286,6 @@ const char * const vmstat_text[] = {
#ifdef CONFIG_DEBUG_VM_VMACACHE #ifdef CONFIG_DEBUG_VM_VMACACHE
"vmacache_find_calls", "vmacache_find_calls",
"vmacache_find_hits", "vmacache_find_hits",
"vmacache_full_flushes",
#endif #endif
#ifdef CONFIG_SWAP #ifdef CONFIG_SWAP
"swap_ra", "swap_ra",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment