Commit f0395d5b authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "7 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mm, swap: fix race between swap count continuation operations
  mm/huge_memory.c: deposit page table when copying a PMD migration entry
  initramfs: fix initramfs rebuilds w/ compression after disabling
  fs/hugetlbfs/inode.c: fix hwpoison reserve accounting
  ocfs2: fstrim: Fix start offset of first cluster group during fstrim
  mm, /proc/pid/pagemap: fix soft dirty marking for PMD migration entry
  userfaultfd: hugetlbfs: prevent UFFDIO_COPY to fill beyond the end of i_size
parents fb615d61 2628bd6f
...@@ -842,9 +842,12 @@ static int hugetlbfs_error_remove_page(struct address_space *mapping, ...@@ -842,9 +842,12 @@ static int hugetlbfs_error_remove_page(struct address_space *mapping,
struct page *page) struct page *page)
{ {
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
pgoff_t index = page->index;
remove_huge_page(page); remove_huge_page(page);
if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1)))
hugetlb_fix_reserve_counts(inode); hugetlb_fix_reserve_counts(inode);
return 0; return 0;
} }
......
...@@ -7304,13 +7304,24 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, ...@@ -7304,13 +7304,24 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
static int ocfs2_trim_extent(struct super_block *sb, static int ocfs2_trim_extent(struct super_block *sb,
struct ocfs2_group_desc *gd, struct ocfs2_group_desc *gd,
u32 start, u32 count) u64 group, u32 start, u32 count)
{ {
u64 discard, bcount; u64 discard, bcount;
struct ocfs2_super *osb = OCFS2_SB(sb);
bcount = ocfs2_clusters_to_blocks(sb, count); bcount = ocfs2_clusters_to_blocks(sb, count);
discard = le64_to_cpu(gd->bg_blkno) + discard = ocfs2_clusters_to_blocks(sb, start);
ocfs2_clusters_to_blocks(sb, start);
/*
* For the first cluster group, the gd->bg_blkno is not at the start
* of the group, but at an offset from the start. If we add it while
* calculating discard for first group, we will wrongly start fstrim a
* few blocks after the desried start block and the range can cross
* over into the next cluster group. So, add it only if this is not
* the first cluster group.
*/
if (group != osb->first_cluster_group_blkno)
discard += le64_to_cpu(gd->bg_blkno);
trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount); trace_ocfs2_trim_extent(sb, (unsigned long long)discard, bcount);
...@@ -7318,7 +7329,7 @@ static int ocfs2_trim_extent(struct super_block *sb, ...@@ -7318,7 +7329,7 @@ static int ocfs2_trim_extent(struct super_block *sb,
} }
static int ocfs2_trim_group(struct super_block *sb, static int ocfs2_trim_group(struct super_block *sb,
struct ocfs2_group_desc *gd, struct ocfs2_group_desc *gd, u64 group,
u32 start, u32 max, u32 minbits) u32 start, u32 max, u32 minbits)
{ {
int ret = 0, count = 0, next; int ret = 0, count = 0, next;
...@@ -7337,7 +7348,7 @@ static int ocfs2_trim_group(struct super_block *sb, ...@@ -7337,7 +7348,7 @@ static int ocfs2_trim_group(struct super_block *sb,
next = ocfs2_find_next_bit(bitmap, max, start); next = ocfs2_find_next_bit(bitmap, max, start);
if ((next - start) >= minbits) { if ((next - start) >= minbits) {
ret = ocfs2_trim_extent(sb, gd, ret = ocfs2_trim_extent(sb, gd, group,
start, next - start); start, next - start);
if (ret < 0) { if (ret < 0) {
mlog_errno(ret); mlog_errno(ret);
...@@ -7435,7 +7446,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range) ...@@ -7435,7 +7446,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
} }
gd = (struct ocfs2_group_desc *)gd_bh->b_data; gd = (struct ocfs2_group_desc *)gd_bh->b_data;
cnt = ocfs2_trim_group(sb, gd, first_bit, last_bit, minlen); cnt = ocfs2_trim_group(sb, gd, group,
first_bit, last_bit, minlen);
brelse(gd_bh); brelse(gd_bh);
gd_bh = NULL; gd_bh = NULL;
if (cnt < 0) { if (cnt < 0) {
......
...@@ -1311,13 +1311,15 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, ...@@ -1311,13 +1311,15 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
pmd_t pmd = *pmdp; pmd_t pmd = *pmdp;
struct page *page = NULL; struct page *page = NULL;
if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(pmd)) if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY; flags |= PM_SOFT_DIRTY;
if (pmd_present(pmd)) { if (pmd_present(pmd)) {
page = pmd_page(pmd); page = pmd_page(pmd);
flags |= PM_PRESENT; flags |= PM_PRESENT;
if (pmd_soft_dirty(pmd))
flags |= PM_SOFT_DIRTY;
if (pm->show_pfn) if (pm->show_pfn)
frame = pmd_pfn(pmd) + frame = pmd_pfn(pmd) +
((addr & ~PMD_MASK) >> PAGE_SHIFT); ((addr & ~PMD_MASK) >> PAGE_SHIFT);
...@@ -1329,6 +1331,8 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, ...@@ -1329,6 +1331,8 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
frame = swp_type(entry) | frame = swp_type(entry) |
(swp_offset(entry) << MAX_SWAPFILES_SHIFT); (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
flags |= PM_SWAP; flags |= PM_SWAP;
if (pmd_swp_soft_dirty(pmd))
flags |= PM_SOFT_DIRTY;
VM_BUG_ON(!is_pmd_migration_entry(pmd)); VM_BUG_ON(!is_pmd_migration_entry(pmd));
page = migration_entry_to_page(entry); page = migration_entry_to_page(entry);
} }
......
...@@ -266,6 +266,10 @@ struct swap_info_struct { ...@@ -266,6 +266,10 @@ struct swap_info_struct {
* both locks need hold, hold swap_lock * both locks need hold, hold swap_lock
* first. * first.
*/ */
spinlock_t cont_lock; /*
* protect swap count continuation page
* list.
*/
struct work_struct discard_work; /* discard worker */ struct work_struct discard_work; /* discard worker */
struct swap_cluster_list discard_clusters; /* discard clusters list */ struct swap_cluster_list discard_clusters; /* discard clusters list */
}; };
......
...@@ -941,6 +941,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, ...@@ -941,6 +941,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd = pmd_swp_mksoft_dirty(pmd); pmd = pmd_swp_mksoft_dirty(pmd);
set_pmd_at(src_mm, addr, src_pmd, pmd); set_pmd_at(src_mm, addr, src_pmd, pmd);
} }
add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
atomic_long_inc(&dst_mm->nr_ptes);
pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
set_pmd_at(dst_mm, addr, dst_pmd, pmd); set_pmd_at(dst_mm, addr, dst_pmd, pmd);
ret = 0; ret = 0;
goto out_unlock; goto out_unlock;
......
...@@ -3984,6 +3984,9 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -3984,6 +3984,9 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
unsigned long src_addr, unsigned long src_addr,
struct page **pagep) struct page **pagep)
{ {
struct address_space *mapping;
pgoff_t idx;
unsigned long size;
int vm_shared = dst_vma->vm_flags & VM_SHARED; int vm_shared = dst_vma->vm_flags & VM_SHARED;
struct hstate *h = hstate_vma(dst_vma); struct hstate *h = hstate_vma(dst_vma);
pte_t _dst_pte; pte_t _dst_pte;
...@@ -4021,13 +4024,24 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -4021,13 +4024,24 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
__SetPageUptodate(page); __SetPageUptodate(page);
set_page_huge_active(page); set_page_huge_active(page);
mapping = dst_vma->vm_file->f_mapping;
idx = vma_hugecache_offset(h, dst_vma, dst_addr);
/* /*
* If shared, add to page cache * If shared, add to page cache
*/ */
if (vm_shared) { if (vm_shared) {
struct address_space *mapping = dst_vma->vm_file->f_mapping; size = i_size_read(mapping->host) >> huge_page_shift(h);
pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr); ret = -EFAULT;
if (idx >= size)
goto out_release_nounlock;
/*
* Serialization between remove_inode_hugepages() and
* huge_add_to_page_cache() below happens through the
* hugetlb_fault_mutex_table that here must be hold by
* the caller.
*/
ret = huge_add_to_page_cache(page, mapping, idx); ret = huge_add_to_page_cache(page, mapping, idx);
if (ret) if (ret)
goto out_release_nounlock; goto out_release_nounlock;
...@@ -4036,6 +4050,20 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -4036,6 +4050,20 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
ptl = huge_pte_lockptr(h, dst_mm, dst_pte); ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
spin_lock(ptl); spin_lock(ptl);
/*
* Recheck the i_size after holding PT lock to make sure not
* to leave any page mapped (as page_mapped()) beyond the end
* of the i_size (remove_inode_hugepages() is strict about
* enforcing that). If we bail out here, we'll also leave a
* page in the radix tree in the vm_shared case beyond the end
* of the i_size, but remove_inode_hugepages() will take care
* of it as soon as we drop the hugetlb_fault_mutex_table.
*/
size = i_size_read(mapping->host) >> huge_page_shift(h);
ret = -EFAULT;
if (idx >= size)
goto out_release_unlock;
ret = -EEXIST; ret = -EEXIST;
if (!huge_pte_none(huge_ptep_get(dst_pte))) if (!huge_pte_none(huge_ptep_get(dst_pte)))
goto out_release_unlock; goto out_release_unlock;
......
...@@ -2869,6 +2869,7 @@ static struct swap_info_struct *alloc_swap_info(void) ...@@ -2869,6 +2869,7 @@ static struct swap_info_struct *alloc_swap_info(void)
p->flags = SWP_USED; p->flags = SWP_USED;
spin_unlock(&swap_lock); spin_unlock(&swap_lock);
spin_lock_init(&p->lock); spin_lock_init(&p->lock);
spin_lock_init(&p->cont_lock);
return p; return p;
} }
...@@ -3545,6 +3546,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) ...@@ -3545,6 +3546,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
head = vmalloc_to_page(si->swap_map + offset); head = vmalloc_to_page(si->swap_map + offset);
offset &= ~PAGE_MASK; offset &= ~PAGE_MASK;
spin_lock(&si->cont_lock);
/* /*
* Page allocation does not initialize the page's lru field, * Page allocation does not initialize the page's lru field,
* but it does always reset its private field. * but it does always reset its private field.
...@@ -3564,7 +3566,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) ...@@ -3564,7 +3566,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
* a continuation page, free our allocation and use this one. * a continuation page, free our allocation and use this one.
*/ */
if (!(count & COUNT_CONTINUED)) if (!(count & COUNT_CONTINUED))
goto out; goto out_unlock_cont;
map = kmap_atomic(list_page) + offset; map = kmap_atomic(list_page) + offset;
count = *map; count = *map;
...@@ -3575,11 +3577,13 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask) ...@@ -3575,11 +3577,13 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
* free our allocation and use this one. * free our allocation and use this one.
*/ */
if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX) if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX)
goto out; goto out_unlock_cont;
} }
list_add_tail(&page->lru, &head->lru); list_add_tail(&page->lru, &head->lru);
page = NULL; /* now it's attached, don't free it */ page = NULL; /* now it's attached, don't free it */
out_unlock_cont:
spin_unlock(&si->cont_lock);
out: out:
unlock_cluster(ci); unlock_cluster(ci);
spin_unlock(&si->lock); spin_unlock(&si->lock);
...@@ -3604,6 +3608,7 @@ static bool swap_count_continued(struct swap_info_struct *si, ...@@ -3604,6 +3608,7 @@ static bool swap_count_continued(struct swap_info_struct *si,
struct page *head; struct page *head;
struct page *page; struct page *page;
unsigned char *map; unsigned char *map;
bool ret;
head = vmalloc_to_page(si->swap_map + offset); head = vmalloc_to_page(si->swap_map + offset);
if (page_private(head) != SWP_CONTINUED) { if (page_private(head) != SWP_CONTINUED) {
...@@ -3611,6 +3616,7 @@ static bool swap_count_continued(struct swap_info_struct *si, ...@@ -3611,6 +3616,7 @@ static bool swap_count_continued(struct swap_info_struct *si,
return false; /* need to add count continuation */ return false; /* need to add count continuation */
} }
spin_lock(&si->cont_lock);
offset &= ~PAGE_MASK; offset &= ~PAGE_MASK;
page = list_entry(head->lru.next, struct page, lru); page = list_entry(head->lru.next, struct page, lru);
map = kmap_atomic(page) + offset; map = kmap_atomic(page) + offset;
...@@ -3631,8 +3637,10 @@ static bool swap_count_continued(struct swap_info_struct *si, ...@@ -3631,8 +3637,10 @@ static bool swap_count_continued(struct swap_info_struct *si,
if (*map == SWAP_CONT_MAX) { if (*map == SWAP_CONT_MAX) {
kunmap_atomic(map); kunmap_atomic(map);
page = list_entry(page->lru.next, struct page, lru); page = list_entry(page->lru.next, struct page, lru);
if (page == head) if (page == head) {
return false; /* add count continuation */ ret = false; /* add count continuation */
goto out;
}
map = kmap_atomic(page) + offset; map = kmap_atomic(page) + offset;
init_map: *map = 0; /* we didn't zero the page */ init_map: *map = 0; /* we didn't zero the page */
} }
...@@ -3645,7 +3653,7 @@ init_map: *map = 0; /* we didn't zero the page */ ...@@ -3645,7 +3653,7 @@ init_map: *map = 0; /* we didn't zero the page */
kunmap_atomic(map); kunmap_atomic(map);
page = list_entry(page->lru.prev, struct page, lru); page = list_entry(page->lru.prev, struct page, lru);
} }
return true; /* incremented */ ret = true; /* incremented */
} else { /* decrementing */ } else { /* decrementing */
/* /*
...@@ -3671,8 +3679,11 @@ init_map: *map = 0; /* we didn't zero the page */ ...@@ -3671,8 +3679,11 @@ init_map: *map = 0; /* we didn't zero the page */
kunmap_atomic(map); kunmap_atomic(map);
page = list_entry(page->lru.prev, struct page, lru); page = list_entry(page->lru.prev, struct page, lru);
} }
return count == COUNT_CONTINUED; ret = count == COUNT_CONTINUED;
} }
out:
spin_unlock(&si->cont_lock);
return ret;
} }
/* /*
......
...@@ -8,6 +8,7 @@ PHONY += klibcdirs ...@@ -8,6 +8,7 @@ PHONY += klibcdirs
suffix_y = $(subst $\",,$(CONFIG_INITRAMFS_COMPRESSION)) suffix_y = $(subst $\",,$(CONFIG_INITRAMFS_COMPRESSION))
datafile_y = initramfs_data.cpio$(suffix_y) datafile_y = initramfs_data.cpio$(suffix_y)
datafile_d_y = .$(datafile_y).d
AFLAGS_initramfs_data.o += -DINITRAMFS_IMAGE="usr/$(datafile_y)" AFLAGS_initramfs_data.o += -DINITRAMFS_IMAGE="usr/$(datafile_y)"
...@@ -30,12 +31,12 @@ ramfs-args := \ ...@@ -30,12 +31,12 @@ ramfs-args := \
$(if $(CONFIG_INITRAMFS_ROOT_UID), -u $(CONFIG_INITRAMFS_ROOT_UID)) \ $(if $(CONFIG_INITRAMFS_ROOT_UID), -u $(CONFIG_INITRAMFS_ROOT_UID)) \
$(if $(CONFIG_INITRAMFS_ROOT_GID), -g $(CONFIG_INITRAMFS_ROOT_GID)) $(if $(CONFIG_INITRAMFS_ROOT_GID), -g $(CONFIG_INITRAMFS_ROOT_GID))
# .initramfs_data.cpio.d is used to identify all files included # $(datafile_d_y) is used to identify all files included
# in initramfs and to detect if any files are added/removed. # in initramfs and to detect if any files are added/removed.
# Removed files are identified by directory timestamp being updated # Removed files are identified by directory timestamp being updated
# The dependency list is generated by gen_initramfs.sh -l # The dependency list is generated by gen_initramfs.sh -l
ifneq ($(wildcard $(obj)/.initramfs_data.cpio.d),) ifneq ($(wildcard $(obj)/$(datafile_d_y)),)
include $(obj)/.initramfs_data.cpio.d include $(obj)/$(datafile_d_y)
endif endif
quiet_cmd_initfs = GEN $@ quiet_cmd_initfs = GEN $@
...@@ -53,5 +54,5 @@ $(deps_initramfs): klibcdirs ...@@ -53,5 +54,5 @@ $(deps_initramfs): klibcdirs
# 3) If gen_init_cpio are newer than initramfs_data.cpio # 3) If gen_init_cpio are newer than initramfs_data.cpio
# 4) arguments to gen_initramfs.sh changes # 4) arguments to gen_initramfs.sh changes
$(obj)/$(datafile_y): $(obj)/gen_init_cpio $(deps_initramfs) klibcdirs $(obj)/$(datafile_y): $(obj)/gen_init_cpio $(deps_initramfs) klibcdirs
$(Q)$(initramfs) -l $(ramfs-input) > $(obj)/.initramfs_data.cpio.d $(Q)$(initramfs) -l $(ramfs-input) > $(obj)/$(datafile_d_y)
$(call if_changed,initfs) $(call if_changed,initfs)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment