Commit a232591b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge fixes from Andrew Morton:
 "11 fixes.

  The presence of 'thp: reduce indentation level in change_huge_pmd()'
  is unfortunate. But the patchset had been decently reviewed and tested
  before we decided it was needed in -stable and I felt it best not to
  churn things at the last minute"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mailmap: add Martin Kepplinger's email
  zsmalloc: expand class bit
  zram: do not use copy_page with non-page aligned address
  zram: fix operator precedence to get offset
  hugetlbfs: fix offset overflow in hugetlbfs mmap
  thp: fix MADV_DONTNEED vs clear soft dirty race
  thp: fix MADV_DONTNEED vs. MADV_FREE race
  mm: drop unused pmdp_huge_get_and_clear_notify()
  thp: fix MADV_DONTNEED vs. numa balancing race
  thp: reduce indentation level in change_huge_pmd()
  z3fold: fix page locking in z3fold_alloc()
parents d8a6e3ae 5714320d
...@@ -99,6 +99,8 @@ Linas Vepstas <linas@austin.ibm.com> ...@@ -99,6 +99,8 @@ Linas Vepstas <linas@austin.ibm.com>
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de> Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch> Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
Mark Brown <broonie@sirena.org.uk> Mark Brown <broonie@sirena.org.uk>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
Matthieu CASTET <castet.matthieu@free.fr> Matthieu CASTET <castet.matthieu@free.fr>
Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@brturbo.com.br> Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@brturbo.com.br>
Mauro Carvalho Chehab <mchehab@kernel.org> <maurochehab@gmail.com> Mauro Carvalho Chehab <mchehab@kernel.org> <maurochehab@gmail.com>
......
...@@ -523,7 +523,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) ...@@ -523,7 +523,7 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO); cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
if (size == PAGE_SIZE) { if (size == PAGE_SIZE) {
copy_page(mem, cmem); memcpy(mem, cmem, PAGE_SIZE);
} else { } else {
struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp); struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
...@@ -717,7 +717,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, ...@@ -717,7 +717,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
src = kmap_atomic(page); src = kmap_atomic(page);
copy_page(cmem, src); memcpy(cmem, src, PAGE_SIZE);
kunmap_atomic(src); kunmap_atomic(src);
} else { } else {
memcpy(cmem, src, clen); memcpy(cmem, src, clen);
...@@ -928,7 +928,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, ...@@ -928,7 +928,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
} }
index = sector >> SECTORS_PER_PAGE_SHIFT; index = sector >> SECTORS_PER_PAGE_SHIFT;
offset = sector & (SECTORS_PER_PAGE - 1) << SECTOR_SHIFT; offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
bv.bv_page = page; bv.bv_page = page;
bv.bv_len = PAGE_SIZE; bv.bv_len = PAGE_SIZE;
......
...@@ -136,17 +136,26 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -136,17 +136,26 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
vma->vm_ops = &hugetlb_vm_ops; vma->vm_ops = &hugetlb_vm_ops;
/*
* Offset passed to mmap (before page shift) could have been
* negative when represented as a (l)off_t.
*/
if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0)
return -EINVAL;
if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
return -EINVAL; return -EINVAL;
vma_len = (loff_t)(vma->vm_end - vma->vm_start); vma_len = (loff_t)(vma->vm_end - vma->vm_start);
len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
/* check for overflow */
if (len < vma_len)
return -EINVAL;
inode_lock(inode); inode_lock(inode);
file_accessed(file); file_accessed(file);
ret = -ENOMEM; ret = -ENOMEM;
len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
if (hugetlb_reserve_pages(inode, if (hugetlb_reserve_pages(inode,
vma->vm_pgoff >> huge_page_order(h), vma->vm_pgoff >> huge_page_order(h),
len >> huge_page_shift(h), vma, len >> huge_page_shift(h), vma,
...@@ -155,7 +164,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -155,7 +164,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
ret = 0; ret = 0;
if (vma->vm_flags & VM_WRITE && inode->i_size < len) if (vma->vm_flags & VM_WRITE && inode->i_size < len)
inode->i_size = len; i_size_write(inode, len);
out: out:
inode_unlock(inode); inode_unlock(inode);
......
...@@ -900,7 +900,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, ...@@ -900,7 +900,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp) unsigned long addr, pmd_t *pmdp)
{ {
pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp); pmd_t pmd = *pmdp;
/* See comment in change_huge_pmd() */
pmdp_invalidate(vma, addr, pmdp);
if (pmd_dirty(*pmdp))
pmd = pmd_mkdirty(pmd);
if (pmd_young(*pmdp))
pmd = pmd_mkyoung(pmd);
pmd = pmd_wrprotect(pmd); pmd = pmd_wrprotect(pmd);
pmd = pmd_clear_soft_dirty(pmd); pmd = pmd_clear_soft_dirty(pmd);
......
...@@ -394,18 +394,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) ...@@ -394,18 +394,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
___pud; \ ___pud; \
}) })
#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \
({ \
unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \
pmd_t ___pmd; \
\
___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd); \
mmu_notifier_invalidate_range(__mm, ___haddr, \
___haddr + HPAGE_PMD_SIZE); \
\
___pmd; \
})
/* /*
* set_pte_at_notify() sets the pte _after_ running the notifier. * set_pte_at_notify() sets the pte _after_ running the notifier.
* This is safe to start by updating the secondary MMUs, because the primary MMU * This is safe to start by updating the secondary MMUs, because the primary MMU
...@@ -489,7 +477,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) ...@@ -489,7 +477,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
#define ptep_clear_flush_notify ptep_clear_flush #define ptep_clear_flush_notify ptep_clear_flush
#define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush
#define pudp_huge_clear_flush_notify pudp_huge_clear_flush #define pudp_huge_clear_flush_notify pudp_huge_clear_flush
#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear
#define set_pte_at_notify set_pte_at #define set_pte_at_notify set_pte_at
#endif /* CONFIG_MMU_NOTIFIER */ #endif /* CONFIG_MMU_NOTIFIER */
......
...@@ -1568,8 +1568,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, ...@@ -1568,8 +1568,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
deactivate_page(page); deactivate_page(page);
if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) { if (pmd_young(orig_pmd) || pmd_dirty(orig_pmd)) {
orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd, pmdp_invalidate(vma, addr, pmd);
tlb->fullmm);
orig_pmd = pmd_mkold(orig_pmd); orig_pmd = pmd_mkold(orig_pmd);
orig_pmd = pmd_mkclean(orig_pmd); orig_pmd = pmd_mkclean(orig_pmd);
...@@ -1724,12 +1723,15 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -1724,12 +1723,15 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
{ {
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
spinlock_t *ptl; spinlock_t *ptl;
int ret = 0; pmd_t entry;
bool preserve_write;
int ret;
ptl = __pmd_trans_huge_lock(pmd, vma); ptl = __pmd_trans_huge_lock(pmd, vma);
if (ptl) { if (!ptl)
pmd_t entry; return 0;
bool preserve_write = prot_numa && pmd_write(*pmd);
preserve_write = prot_numa && pmd_write(*pmd);
ret = 1; ret = 1;
/* /*
...@@ -1737,24 +1739,53 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -1737,24 +1739,53 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
* data is likely to be read-cached on the local CPU and * data is likely to be read-cached on the local CPU and
* local/remote hits to the zero page are not interesting. * local/remote hits to the zero page are not interesting.
*/ */
if (prot_numa && is_huge_zero_pmd(*pmd)) { if (prot_numa && is_huge_zero_pmd(*pmd))
spin_unlock(ptl); goto unlock;
return ret;
} if (prot_numa && pmd_protnone(*pmd))
goto unlock;
/*
* In case prot_numa, we are under down_read(mmap_sem). It's critical
* to not clear pmd intermittently to avoid race with MADV_DONTNEED
* which is also under down_read(mmap_sem):
*
* CPU0: CPU1:
* change_huge_pmd(prot_numa=1)
* pmdp_huge_get_and_clear_notify()
* madvise_dontneed()
* zap_pmd_range()
* pmd_trans_huge(*pmd) == 0 (without ptl)
* // skip the pmd
* set_pmd_at();
* // pmd is re-established
*
* The race makes MADV_DONTNEED miss the huge pmd and don't clear it
* which may break userspace.
*
* pmdp_invalidate() is required to make sure we don't miss
* dirty/young flags set by hardware.
*/
entry = *pmd;
pmdp_invalidate(vma, addr, pmd);
/*
* Recover dirty/young flags. It relies on pmdp_invalidate to not
* corrupt them.
*/
if (pmd_dirty(*pmd))
entry = pmd_mkdirty(entry);
if (pmd_young(*pmd))
entry = pmd_mkyoung(entry);
if (!prot_numa || !pmd_protnone(*pmd)) {
entry = pmdp_huge_get_and_clear_notify(mm, addr, pmd);
entry = pmd_modify(entry, newprot); entry = pmd_modify(entry, newprot);
if (preserve_write) if (preserve_write)
entry = pmd_mk_savedwrite(entry); entry = pmd_mk_savedwrite(entry);
ret = HPAGE_PMD_NR; ret = HPAGE_PMD_NR;
set_pmd_at(mm, addr, pmd, entry); set_pmd_at(mm, addr, pmd, entry);
BUG_ON(vma_is_anonymous(vma) && !preserve_write && BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry));
pmd_write(entry)); unlock:
}
spin_unlock(ptl); spin_unlock(ptl);
}
return ret; return ret;
} }
......
...@@ -185,6 +185,12 @@ static inline void z3fold_page_lock(struct z3fold_header *zhdr) ...@@ -185,6 +185,12 @@ static inline void z3fold_page_lock(struct z3fold_header *zhdr)
spin_lock(&zhdr->page_lock); spin_lock(&zhdr->page_lock);
} }
/* Try to lock a z3fold page */
static inline int z3fold_page_trylock(struct z3fold_header *zhdr)
{
return spin_trylock(&zhdr->page_lock);
}
/* Unlock a z3fold page */ /* Unlock a z3fold page */
static inline void z3fold_page_unlock(struct z3fold_header *zhdr) static inline void z3fold_page_unlock(struct z3fold_header *zhdr)
{ {
...@@ -385,7 +391,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, ...@@ -385,7 +391,7 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
spin_lock(&pool->lock); spin_lock(&pool->lock);
zhdr = list_first_entry_or_null(&pool->unbuddied[i], zhdr = list_first_entry_or_null(&pool->unbuddied[i],
struct z3fold_header, buddy); struct z3fold_header, buddy);
if (!zhdr) { if (!zhdr || !z3fold_page_trylock(zhdr)) {
spin_unlock(&pool->lock); spin_unlock(&pool->lock);
continue; continue;
} }
...@@ -394,7 +400,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp, ...@@ -394,7 +400,6 @@ static int z3fold_alloc(struct z3fold_pool *pool, size_t size, gfp_t gfp,
spin_unlock(&pool->lock); spin_unlock(&pool->lock);
page = virt_to_page(zhdr); page = virt_to_page(zhdr);
z3fold_page_lock(zhdr);
if (zhdr->first_chunks == 0) { if (zhdr->first_chunks == 0) {
if (zhdr->middle_chunks != 0 && if (zhdr->middle_chunks != 0 &&
chunks >= zhdr->start_middle) chunks >= zhdr->start_middle)
......
...@@ -276,7 +276,7 @@ struct zs_pool { ...@@ -276,7 +276,7 @@ struct zs_pool {
struct zspage { struct zspage {
struct { struct {
unsigned int fullness:FULLNESS_BITS; unsigned int fullness:FULLNESS_BITS;
unsigned int class:CLASS_BITS; unsigned int class:CLASS_BITS + 1;
unsigned int isolated:ISOLATED_BITS; unsigned int isolated:ISOLATED_BITS;
unsigned int magic:MAGIC_VAL_BITS; unsigned int magic:MAGIC_VAL_BITS;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment