Commit f1d6e17f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew Morton)

Merge a bunch of fixes from Andrew Morton.

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  fs/proc/task_mmu.c: fix buffer overflow in add_page_map()
  arch: *: Kconfig: add "kernel/Kconfig.freezer" to "arch/*/Kconfig"
  ocfs2: fix null pointer dereference in ocfs2_dir_foreach_blk_id()
  x86 get_unmapped_area(): use proper mmap base for bottom-up direction
  ocfs2: fix NULL pointer dereference in ocfs2_duplicate_clusters_by_page
  ocfs2: Revert 40bd62eb to avoid regression in extended allocation
  drivers/rtc/rtc-stmp3xxx.c: provide timeout for potentially endless loop polling a HW bit
  hugetlb: fix lockdep splat caused by pmd sharing
  aoe: adjust ref of head for compound page tails
  microblaze: fix clone syscall
  mm: save soft-dirty bits on file pages
  mm: save soft-dirty bits on swapped pages
  memcg: don't initialize kmem-cache destroying work for root caches
parents 28fbc8b6 8c829622
...@@ -407,6 +407,12 @@ config CLONE_BACKWARDS2 ...@@ -407,6 +407,12 @@ config CLONE_BACKWARDS2
help help
Architecture has the first two arguments of clone(2) swapped. Architecture has the first two arguments of clone(2) swapped.
config CLONE_BACKWARDS3
bool
help
Architecture has tls passed as the 3rd argument of clone(2),
not the 5th one.
config ODD_RT_SIGACTION config ODD_RT_SIGACTION
bool bool
help help
......
...@@ -158,6 +158,7 @@ source "kernel/Kconfig.hz" ...@@ -158,6 +158,7 @@ source "kernel/Kconfig.hz"
endmenu endmenu
source "init/Kconfig" source "init/Kconfig"
source "kernel/Kconfig.freezer"
source "drivers/Kconfig" source "drivers/Kconfig"
source "fs/Kconfig" source "fs/Kconfig"
......
...@@ -28,7 +28,7 @@ config MICROBLAZE ...@@ -28,7 +28,7 @@ config MICROBLAZE
select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS
select GENERIC_IDLE_POLL_SETUP select GENERIC_IDLE_POLL_SETUP
select MODULES_USE_ELF_RELA select MODULES_USE_ELF_RELA
select CLONE_BACKWARDS select CLONE_BACKWARDS3
config SWAP config SWAP
def_bool n def_bool n
......
...@@ -55,6 +55,7 @@ config GENERIC_CSUM ...@@ -55,6 +55,7 @@ config GENERIC_CSUM
source "init/Kconfig" source "init/Kconfig"
source "kernel/Kconfig.freezer"
menu "Processor type and features" menu "Processor type and features"
......
...@@ -87,6 +87,8 @@ config STACKTRACE_SUPPORT ...@@ -87,6 +87,8 @@ config STACKTRACE_SUPPORT
source "init/Kconfig" source "init/Kconfig"
source "kernel/Kconfig.freezer"
config MMU config MMU
def_bool y def_bool y
......
...@@ -55,9 +55,53 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) ...@@ -55,9 +55,53 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
#endif #endif
#ifdef CONFIG_MEM_SOFT_DIRTY
/*
* Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE, _PAGE_BIT_SOFT_DIRTY and
* _PAGE_BIT_PROTNONE are taken, split up the 28 bits of offset
* into this range.
*/
#define PTE_FILE_MAX_BITS 28
#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1)
#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1)
#define PTE_FILE_SHIFT4 (_PAGE_BIT_SOFT_DIRTY + 1)
#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1)
#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1)
#define PTE_FILE_BITS3 (PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1)
#define pte_to_pgoff(pte) \
((((pte).pte_low >> (PTE_FILE_SHIFT1)) \
& ((1U << PTE_FILE_BITS1) - 1))) \
+ ((((pte).pte_low >> (PTE_FILE_SHIFT2)) \
& ((1U << PTE_FILE_BITS2) - 1)) \
<< (PTE_FILE_BITS1)) \
+ ((((pte).pte_low >> (PTE_FILE_SHIFT3)) \
& ((1U << PTE_FILE_BITS3) - 1)) \
<< (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \
+ ((((pte).pte_low >> (PTE_FILE_SHIFT4))) \
<< (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))
#define pgoff_to_pte(off) \
((pte_t) { .pte_low = \
((((off)) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \
+ ((((off) >> PTE_FILE_BITS1) \
& ((1U << PTE_FILE_BITS2) - 1)) \
<< PTE_FILE_SHIFT2) \
+ ((((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \
& ((1U << PTE_FILE_BITS3) - 1)) \
<< PTE_FILE_SHIFT3) \
+ ((((off) >> \
(PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))) \
<< PTE_FILE_SHIFT4) \
+ _PAGE_FILE })
#else /* CONFIG_MEM_SOFT_DIRTY */
/* /*
* Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken,
* split up the 29 bits of offset into this range: * split up the 29 bits of offset into this range.
*/ */
#define PTE_FILE_MAX_BITS 29 #define PTE_FILE_MAX_BITS 29
#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1) #define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1)
...@@ -88,6 +132,8 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) ...@@ -88,6 +132,8 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
<< PTE_FILE_SHIFT3) \ << PTE_FILE_SHIFT3) \
+ _PAGE_FILE }) + _PAGE_FILE })
#endif /* CONFIG_MEM_SOFT_DIRTY */
/* Encode and de-code a swap entry */ /* Encode and de-code a swap entry */
#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
......
...@@ -179,6 +179,9 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) ...@@ -179,6 +179,9 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
/* /*
* Bits 0, 6 and 7 are taken in the low part of the pte, * Bits 0, 6 and 7 are taken in the low part of the pte,
* put the 32 bits of offset into the high part. * put the 32 bits of offset into the high part.
*
* For soft-dirty tracking 11 bit is taken from
* the low part of pte as well.
*/ */
#define pte_to_pgoff(pte) ((pte).pte_high) #define pte_to_pgoff(pte) ((pte).pte_high)
#define pgoff_to_pte(off) \ #define pgoff_to_pte(off) \
......
...@@ -314,6 +314,36 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) ...@@ -314,6 +314,36 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
} }
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
{
return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
}
static inline int pte_swp_soft_dirty(pte_t pte)
{
return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
}
static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
{
return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
}
static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
{
return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
}
static inline pte_t pte_file_mksoft_dirty(pte_t pte)
{
return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
}
static inline int pte_file_soft_dirty(pte_t pte)
{
return pte_flags(pte) & _PAGE_SOFT_DIRTY;
}
/* /*
* Mask out unsupported bits in a present pgprot. Non-present pgprots * Mask out unsupported bits in a present pgprot. Non-present pgprots
* can use those bits for other purposes, so leave them be. * can use those bits for other purposes, so leave them be.
......
...@@ -61,12 +61,27 @@ ...@@ -61,12 +61,27 @@
* they do not conflict with each other. * they do not conflict with each other.
*/ */
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_HIDDEN
#ifdef CONFIG_MEM_SOFT_DIRTY #ifdef CONFIG_MEM_SOFT_DIRTY
#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_SOFT_DIRTY)
#else #else
#define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0)) #define _PAGE_SOFT_DIRTY (_AT(pteval_t, 0))
#endif #endif
/*
* Tracking soft dirty bit when a page goes to a swap is tricky.
* We need a bit which can be stored in pte _and_ not conflict
* with swap entry format. On x86 bits 6 and 7 are *not* involved
* into swap entry computation, but bit 6 is used for nonlinear
* file mapping, so we borrow bit 7 for soft dirty tracking.
*/
#ifdef CONFIG_MEM_SOFT_DIRTY
#define _PAGE_SWP_SOFT_DIRTY _PAGE_PSE
#else
#define _PAGE_SWP_SOFT_DIRTY (_AT(pteval_t, 0))
#endif
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
#else #else
......
...@@ -101,7 +101,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin, ...@@ -101,7 +101,7 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
*begin = new_begin; *begin = new_begin;
} }
} else { } else {
*begin = TASK_UNMAPPED_BASE; *begin = mmap_legacy_base();
*end = TASK_SIZE; *end = TASK_SIZE;
} }
} }
......
...@@ -98,7 +98,7 @@ static unsigned long mmap_base(void) ...@@ -98,7 +98,7 @@ static unsigned long mmap_base(void)
* Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64 * Bottom-up (legacy) layout on X86_32 did not support randomization, X86_64
* does, but not when emulating X86_32 * does, but not when emulating X86_32
*/ */
static unsigned long mmap_legacy_base(void) unsigned long mmap_legacy_base(void)
{ {
if (mmap_is_ia32()) if (mmap_is_ia32())
return TASK_UNMAPPED_BASE; return TASK_UNMAPPED_BASE;
......
...@@ -906,16 +906,10 @@ bio_pageinc(struct bio *bio) ...@@ -906,16 +906,10 @@ bio_pageinc(struct bio *bio)
int i; int i;
bio_for_each_segment(bv, bio, i) { bio_for_each_segment(bv, bio, i) {
page = bv->bv_page;
/* Non-zero page count for non-head members of /* Non-zero page count for non-head members of
* compound pages is no longer allowed by the kernel, * compound pages is no longer allowed by the kernel.
* but this has never been seen here.
*/ */
if (unlikely(PageCompound(page))) page = compound_trans_head(bv->bv_page);
if (compound_trans_head(page) != page) {
pr_crit("page tail used for block I/O\n");
BUG();
}
atomic_inc(&page->_count); atomic_inc(&page->_count);
} }
} }
...@@ -924,10 +918,13 @@ static void ...@@ -924,10 +918,13 @@ static void
bio_pagedec(struct bio *bio) bio_pagedec(struct bio *bio)
{ {
struct bio_vec *bv; struct bio_vec *bv;
struct page *page;
int i; int i;
bio_for_each_segment(bv, bio, i) bio_for_each_segment(bv, bio, i) {
atomic_dec(&bv->bv_page->_count); page = compound_trans_head(bv->bv_page);
atomic_dec(&page->_count);
}
} }
static void static void
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/platform_device.h> #include <linux/platform_device.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/delay.h>
#include <linux/rtc.h> #include <linux/rtc.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/of_device.h> #include <linux/of_device.h>
...@@ -119,24 +120,39 @@ static void stmp3xxx_wdt_register(struct platform_device *rtc_pdev) ...@@ -119,24 +120,39 @@ static void stmp3xxx_wdt_register(struct platform_device *rtc_pdev)
} }
#endif /* CONFIG_STMP3XXX_RTC_WATCHDOG */ #endif /* CONFIG_STMP3XXX_RTC_WATCHDOG */
static void stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data) static int stmp3xxx_wait_time(struct stmp3xxx_rtc_data *rtc_data)
{ {
int timeout = 5000; /* 3ms according to i.MX28 Ref Manual */
/* /*
* The datasheet doesn't say which way round the * The i.MX28 Applications Processor Reference Manual, Rev. 1, 2010
* NEW_REGS/STALE_REGS bitfields go. In fact it's 0x1=P0, * states:
* 0x2=P1, .., 0x20=P5, 0x40=ALARM, 0x80=SECONDS * | The order in which registers are updated is
* | Persistent 0, 1, 2, 3, 4, 5, Alarm, Seconds.
* | (This list is in bitfield order, from LSB to MSB, as they would
* | appear in the STALE_REGS and NEW_REGS bitfields of the HW_RTC_STAT
* | register. For example, the Seconds register corresponds to
* | STALE_REGS or NEW_REGS containing 0x80.)
*/ */
while (readl(rtc_data->io + STMP3XXX_RTC_STAT) & do {
(0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT)) if (!(readl(rtc_data->io + STMP3XXX_RTC_STAT) &
cpu_relax(); (0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT)))
return 0;
udelay(1);
} while (--timeout > 0);
return (readl(rtc_data->io + STMP3XXX_RTC_STAT) &
(0x80 << STMP3XXX_RTC_STAT_STALE_SHIFT)) ? -ETIME : 0;
} }
/* Time read/write */ /* Time read/write */
static int stmp3xxx_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm) static int stmp3xxx_rtc_gettime(struct device *dev, struct rtc_time *rtc_tm)
{ {
int ret;
struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev); struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
stmp3xxx_wait_time(rtc_data); ret = stmp3xxx_wait_time(rtc_data);
if (ret)
return ret;
rtc_time_to_tm(readl(rtc_data->io + STMP3XXX_RTC_SECONDS), rtc_tm); rtc_time_to_tm(readl(rtc_data->io + STMP3XXX_RTC_SECONDS), rtc_tm);
return 0; return 0;
} }
...@@ -146,8 +162,7 @@ static int stmp3xxx_rtc_set_mmss(struct device *dev, unsigned long t) ...@@ -146,8 +162,7 @@ static int stmp3xxx_rtc_set_mmss(struct device *dev, unsigned long t)
struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev); struct stmp3xxx_rtc_data *rtc_data = dev_get_drvdata(dev);
writel(t, rtc_data->io + STMP3XXX_RTC_SECONDS); writel(t, rtc_data->io + STMP3XXX_RTC_SECONDS);
stmp3xxx_wait_time(rtc_data); return stmp3xxx_wait_time(rtc_data);
return 0;
} }
/* interrupt(s) handler */ /* interrupt(s) handler */
......
...@@ -463,6 +463,14 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb, ...@@ -463,6 +463,14 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
return inode; return inode;
} }
/*
* Hugetlbfs is not reclaimable; therefore its i_mmap_mutex will never
* be taken from reclaim -- unlike regular filesystems. This needs an
* annotation because huge_pmd_share() does an allocation under
* i_mmap_mutex.
*/
struct lock_class_key hugetlbfs_i_mmap_mutex_key;
static struct inode *hugetlbfs_get_inode(struct super_block *sb, static struct inode *hugetlbfs_get_inode(struct super_block *sb,
struct inode *dir, struct inode *dir,
umode_t mode, dev_t dev) umode_t mode, dev_t dev)
...@@ -474,6 +482,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, ...@@ -474,6 +482,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
struct hugetlbfs_inode_info *info; struct hugetlbfs_inode_info *info;
inode->i_ino = get_next_ino(); inode->i_ino = get_next_ino();
inode_init_owner(inode, dir, mode); inode_init_owner(inode, dir, mode);
lockdep_set_class(&inode->i_mapping->i_mmap_mutex,
&hugetlbfs_i_mmap_mutex_key);
inode->i_mapping->a_ops = &hugetlbfs_aops; inode->i_mapping->a_ops = &hugetlbfs_aops;
inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
......
...@@ -1757,7 +1757,7 @@ int ocfs2_write_begin_nolock(struct file *filp, ...@@ -1757,7 +1757,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
goto out; goto out;
} else if (ret == 1) { } else if (ret == 1) {
clusters_need = wc->w_clen; clusters_need = wc->w_clen;
ret = ocfs2_refcount_cow(inode, filp, di_bh, ret = ocfs2_refcount_cow(inode, di_bh,
wc->w_cpos, wc->w_clen, UINT_MAX); wc->w_cpos, wc->w_clen, UINT_MAX);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
......
...@@ -2153,11 +2153,9 @@ int ocfs2_empty_dir(struct inode *inode) ...@@ -2153,11 +2153,9 @@ int ocfs2_empty_dir(struct inode *inode)
{ {
int ret; int ret;
struct ocfs2_empty_dir_priv priv = { struct ocfs2_empty_dir_priv priv = {
.ctx.actor = ocfs2_empty_dir_filldir .ctx.actor = ocfs2_empty_dir_filldir,
}; };
memset(&priv, 0, sizeof(priv));
if (ocfs2_dir_indexed(inode)) { if (ocfs2_dir_indexed(inode)) {
ret = ocfs2_empty_dir_dx(inode, &priv); ret = ocfs2_empty_dir_dx(inode, &priv);
if (ret) if (ret)
......
...@@ -370,7 +370,7 @@ static int ocfs2_cow_file_pos(struct inode *inode, ...@@ -370,7 +370,7 @@ static int ocfs2_cow_file_pos(struct inode *inode,
if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
goto out; goto out;
return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1); return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
out: out:
return status; return status;
...@@ -899,7 +899,7 @@ static int ocfs2_zero_extend_get_range(struct inode *inode, ...@@ -899,7 +899,7 @@ static int ocfs2_zero_extend_get_range(struct inode *inode,
zero_clusters = last_cpos - zero_cpos; zero_clusters = last_cpos - zero_cpos;
if (needs_cow) { if (needs_cow) {
rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos, rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos,
zero_clusters, UINT_MAX); zero_clusters, UINT_MAX);
if (rc) { if (rc) {
mlog_errno(rc); mlog_errno(rc);
...@@ -2078,7 +2078,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode, ...@@ -2078,7 +2078,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
*meta_level = 1; *meta_level = 1;
ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX); ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
if (ret) if (ret)
mlog_errno(ret); mlog_errno(ret);
out: out:
......
...@@ -537,7 +537,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb, ...@@ -537,7 +537,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth);
return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks + return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks +
ocfs2_quota_trans_credits(sb) + bits_wanted; ocfs2_quota_trans_credits(sb);
} }
static inline int ocfs2_calc_symlink_credits(struct super_block *sb) static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
......
...@@ -69,7 +69,7 @@ static int __ocfs2_move_extent(handle_t *handle, ...@@ -69,7 +69,7 @@ static int __ocfs2_move_extent(handle_t *handle,
u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci); u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci);
u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos); u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos);
ret = ocfs2_duplicate_clusters_by_page(handle, context->file, cpos, ret = ocfs2_duplicate_clusters_by_page(handle, inode, cpos,
p_cpos, new_p_cpos, len); p_cpos, new_p_cpos, len);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
......
...@@ -49,7 +49,6 @@ ...@@ -49,7 +49,6 @@
struct ocfs2_cow_context { struct ocfs2_cow_context {
struct inode *inode; struct inode *inode;
struct file *file;
u32 cow_start; u32 cow_start;
u32 cow_len; u32 cow_len;
struct ocfs2_extent_tree data_et; struct ocfs2_extent_tree data_et;
...@@ -66,7 +65,7 @@ struct ocfs2_cow_context { ...@@ -66,7 +65,7 @@ struct ocfs2_cow_context {
u32 *num_clusters, u32 *num_clusters,
unsigned int *extent_flags); unsigned int *extent_flags);
int (*cow_duplicate_clusters)(handle_t *handle, int (*cow_duplicate_clusters)(handle_t *handle,
struct file *file, struct inode *inode,
u32 cpos, u32 old_cluster, u32 cpos, u32 old_cluster,
u32 new_cluster, u32 new_len); u32 new_cluster, u32 new_len);
}; };
...@@ -2922,14 +2921,12 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh) ...@@ -2922,14 +2921,12 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh)
} }
int ocfs2_duplicate_clusters_by_page(handle_t *handle, int ocfs2_duplicate_clusters_by_page(handle_t *handle,
struct file *file, struct inode *inode,
u32 cpos, u32 old_cluster, u32 cpos, u32 old_cluster,
u32 new_cluster, u32 new_len) u32 new_cluster, u32 new_len)
{ {
int ret = 0, partial; int ret = 0, partial;
struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb;
struct ocfs2_caching_info *ci = INODE_CACHE(inode);
struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
struct page *page; struct page *page;
pgoff_t page_index; pgoff_t page_index;
...@@ -2978,13 +2975,6 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, ...@@ -2978,13 +2975,6 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
BUG_ON(PageDirty(page)); BUG_ON(PageDirty(page));
if (PageReadahead(page)) {
page_cache_async_readahead(mapping,
&file->f_ra, file,
page, page_index,
readahead_pages);
}
if (!PageUptodate(page)) { if (!PageUptodate(page)) {
ret = block_read_full_page(page, ocfs2_get_block); ret = block_read_full_page(page, ocfs2_get_block);
if (ret) { if (ret) {
...@@ -3004,7 +2994,8 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, ...@@ -3004,7 +2994,8 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
} }
} }
ocfs2_map_and_dirty_page(inode, handle, from, to, ocfs2_map_and_dirty_page(inode,
handle, from, to,
page, 0, &new_block); page, 0, &new_block);
mark_page_accessed(page); mark_page_accessed(page);
unlock: unlock:
...@@ -3020,12 +3011,11 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, ...@@ -3020,12 +3011,11 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
} }
int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
struct file *file, struct inode *inode,
u32 cpos, u32 old_cluster, u32 cpos, u32 old_cluster,
u32 new_cluster, u32 new_len) u32 new_cluster, u32 new_len)
{ {
int ret = 0; int ret = 0;
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct ocfs2_caching_info *ci = INODE_CACHE(inode); struct ocfs2_caching_info *ci = INODE_CACHE(inode);
int i, blocks = ocfs2_clusters_to_blocks(sb, new_len); int i, blocks = ocfs2_clusters_to_blocks(sb, new_len);
...@@ -3150,7 +3140,7 @@ static int ocfs2_replace_clusters(handle_t *handle, ...@@ -3150,7 +3140,7 @@ static int ocfs2_replace_clusters(handle_t *handle,
/*If the old clusters is unwritten, no need to duplicate. */ /*If the old clusters is unwritten, no need to duplicate. */
if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) { if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
ret = context->cow_duplicate_clusters(handle, context->file, ret = context->cow_duplicate_clusters(handle, context->inode,
cpos, old, new, len); cpos, old, new, len);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
...@@ -3428,35 +3418,12 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context) ...@@ -3428,35 +3418,12 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context)
return ret; return ret;
} }
static void ocfs2_readahead_for_cow(struct inode *inode,
struct file *file,
u32 start, u32 len)
{
struct address_space *mapping;
pgoff_t index;
unsigned long num_pages;
int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
if (!file)
return;
mapping = file->f_mapping;
num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT;
if (!num_pages)
num_pages = 1;
index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT;
page_cache_sync_readahead(mapping, &file->f_ra, file,
index, num_pages);
}
/* /*
* Starting at cpos, try to CoW write_len clusters. Don't CoW * Starting at cpos, try to CoW write_len clusters. Don't CoW
* past max_cpos. This will stop when it runs into a hole or an * past max_cpos. This will stop when it runs into a hole or an
* unrefcounted extent. * unrefcounted extent.
*/ */
static int ocfs2_refcount_cow_hunk(struct inode *inode, static int ocfs2_refcount_cow_hunk(struct inode *inode,
struct file *file,
struct buffer_head *di_bh, struct buffer_head *di_bh,
u32 cpos, u32 write_len, u32 max_cpos) u32 cpos, u32 write_len, u32 max_cpos)
{ {
...@@ -3485,8 +3452,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, ...@@ -3485,8 +3452,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
BUG_ON(cow_len == 0); BUG_ON(cow_len == 0);
ocfs2_readahead_for_cow(inode, file, cow_start, cow_len);
context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
if (!context) { if (!context) {
ret = -ENOMEM; ret = -ENOMEM;
...@@ -3508,7 +3473,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, ...@@ -3508,7 +3473,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
context->ref_root_bh = ref_root_bh; context->ref_root_bh = ref_root_bh;
context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page;
context->get_clusters = ocfs2_di_get_clusters; context->get_clusters = ocfs2_di_get_clusters;
context->file = file;
ocfs2_init_dinode_extent_tree(&context->data_et, ocfs2_init_dinode_extent_tree(&context->data_et,
INODE_CACHE(inode), di_bh); INODE_CACHE(inode), di_bh);
...@@ -3537,7 +3501,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, ...@@ -3537,7 +3501,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
* clusters between cpos and cpos+write_len are safe to modify. * clusters between cpos and cpos+write_len are safe to modify.
*/ */
int ocfs2_refcount_cow(struct inode *inode, int ocfs2_refcount_cow(struct inode *inode,
struct file *file,
struct buffer_head *di_bh, struct buffer_head *di_bh,
u32 cpos, u32 write_len, u32 max_cpos) u32 cpos, u32 write_len, u32 max_cpos)
{ {
...@@ -3557,7 +3520,7 @@ int ocfs2_refcount_cow(struct inode *inode, ...@@ -3557,7 +3520,7 @@ int ocfs2_refcount_cow(struct inode *inode,
num_clusters = write_len; num_clusters = write_len;
if (ext_flags & OCFS2_EXT_REFCOUNTED) { if (ext_flags & OCFS2_EXT_REFCOUNTED) {
ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos, ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
num_clusters, max_cpos); num_clusters, max_cpos);
if (ret) { if (ret) {
mlog_errno(ret); mlog_errno(ret);
......
...@@ -53,7 +53,7 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, ...@@ -53,7 +53,7 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
int *credits, int *credits,
int *ref_blocks); int *ref_blocks);
int ocfs2_refcount_cow(struct inode *inode, int ocfs2_refcount_cow(struct inode *inode,
struct file *filep, struct buffer_head *di_bh, struct buffer_head *di_bh,
u32 cpos, u32 write_len, u32 max_cpos); u32 cpos, u32 write_len, u32 max_cpos);
typedef int (ocfs2_post_refcount_func)(struct inode *inode, typedef int (ocfs2_post_refcount_func)(struct inode *inode,
...@@ -85,11 +85,11 @@ int ocfs2_refcount_cow_xattr(struct inode *inode, ...@@ -85,11 +85,11 @@ int ocfs2_refcount_cow_xattr(struct inode *inode,
u32 cpos, u32 write_len, u32 cpos, u32 write_len,
struct ocfs2_post_refcount *post); struct ocfs2_post_refcount *post);
int ocfs2_duplicate_clusters_by_page(handle_t *handle, int ocfs2_duplicate_clusters_by_page(handle_t *handle,
struct file *file, struct inode *inode,
u32 cpos, u32 old_cluster, u32 cpos, u32 old_cluster,
u32 new_cluster, u32 new_len); u32 new_cluster, u32 new_len);
int ocfs2_duplicate_clusters_by_jbd(handle_t *handle, int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
struct file *file, struct inode *inode,
u32 cpos, u32 old_cluster, u32 cpos, u32 old_cluster,
u32 new_cluster, u32 new_len); u32 new_cluster, u32 new_len);
int ocfs2_cow_sync_writeback(struct super_block *sb, int ocfs2_cow_sync_writeback(struct super_block *sb,
......
...@@ -730,8 +730,16 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, ...@@ -730,8 +730,16 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
* of how soft-dirty works. * of how soft-dirty works.
*/ */
pte_t ptent = *pte; pte_t ptent = *pte;
ptent = pte_wrprotect(ptent);
ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); if (pte_present(ptent)) {
ptent = pte_wrprotect(ptent);
ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
} else if (is_swap_pte(ptent)) {
ptent = pte_swp_clear_soft_dirty(ptent);
} else if (pte_file(ptent)) {
ptent = pte_file_clear_soft_dirty(ptent);
}
set_pte_at(vma->vm_mm, addr, pte, ptent); set_pte_at(vma->vm_mm, addr, pte, ptent);
#endif #endif
} }
...@@ -752,14 +760,15 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, ...@@ -752,14 +760,15 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) { for (; addr != end; pte++, addr += PAGE_SIZE) {
ptent = *pte; ptent = *pte;
if (!pte_present(ptent))
continue;
if (cp->type == CLEAR_REFS_SOFT_DIRTY) { if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
clear_soft_dirty(vma, addr, pte); clear_soft_dirty(vma, addr, pte);
continue; continue;
} }
if (!pte_present(ptent))
continue;
page = vm_normal_page(vma, addr, ptent); page = vm_normal_page(vma, addr, ptent);
if (!page) if (!page)
continue; continue;
...@@ -859,7 +868,7 @@ typedef struct { ...@@ -859,7 +868,7 @@ typedef struct {
} pagemap_entry_t; } pagemap_entry_t;
struct pagemapread { struct pagemapread {
int pos, len; int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
pagemap_entry_t *buffer; pagemap_entry_t *buffer;
bool v2; bool v2;
}; };
...@@ -867,7 +876,7 @@ struct pagemapread { ...@@ -867,7 +876,7 @@ struct pagemapread {
#define PAGEMAP_WALK_SIZE (PMD_SIZE) #define PAGEMAP_WALK_SIZE (PMD_SIZE)
#define PAGEMAP_WALK_MASK (PMD_MASK) #define PAGEMAP_WALK_MASK (PMD_MASK)
#define PM_ENTRY_BYTES sizeof(u64) #define PM_ENTRY_BYTES sizeof(pagemap_entry_t)
#define PM_STATUS_BITS 3 #define PM_STATUS_BITS 3
#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
...@@ -930,8 +939,10 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, ...@@ -930,8 +939,10 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
flags = PM_PRESENT; flags = PM_PRESENT;
page = vm_normal_page(vma, addr, pte); page = vm_normal_page(vma, addr, pte);
} else if (is_swap_pte(pte)) { } else if (is_swap_pte(pte)) {
swp_entry_t entry = pte_to_swp_entry(pte); swp_entry_t entry;
if (pte_swp_soft_dirty(pte))
flags2 |= __PM_SOFT_DIRTY;
entry = pte_to_swp_entry(pte);
frame = swp_type(entry) | frame = swp_type(entry) |
(swp_offset(entry) << MAX_SWAPFILES_SHIFT); (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
flags = PM_SWAP; flags = PM_SWAP;
...@@ -1116,8 +1127,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, ...@@ -1116,8 +1127,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
goto out_task; goto out_task;
pm.v2 = soft_dirty_cleared; pm.v2 = soft_dirty_cleared;
pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY);
ret = -ENOMEM; ret = -ENOMEM;
if (!pm.buffer) if (!pm.buffer)
goto out_task; goto out_task;
......
...@@ -417,6 +417,36 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) ...@@ -417,6 +417,36 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
{ {
return pmd; return pmd;
} }
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
{
return pte;
}
static inline int pte_swp_soft_dirty(pte_t pte)
{
return 0;
}
static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
{
return pte;
}
static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
{
return pte;
}
static inline pte_t pte_file_mksoft_dirty(pte_t pte)
{
return pte;
}
static inline int pte_file_soft_dirty(pte_t pte)
{
return 0;
}
#endif #endif
#ifndef __HAVE_PFNMAP_TRACKING #ifndef __HAVE_PFNMAP_TRACKING
......
...@@ -314,6 +314,7 @@ struct nsproxy; ...@@ -314,6 +314,7 @@ struct nsproxy;
struct user_namespace; struct user_namespace;
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
extern unsigned long mmap_legacy_base(void);
extern void arch_pick_mmap_layout(struct mm_struct *mm); extern void arch_pick_mmap_layout(struct mm_struct *mm);
extern unsigned long extern unsigned long
arch_get_unmapped_area(struct file *, unsigned long, unsigned long, arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
......
...@@ -67,6 +67,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte) ...@@ -67,6 +67,8 @@ static inline swp_entry_t pte_to_swp_entry(pte_t pte)
swp_entry_t arch_entry; swp_entry_t arch_entry;
BUG_ON(pte_file(pte)); BUG_ON(pte_file(pte));
if (pte_swp_soft_dirty(pte))
pte = pte_swp_clear_soft_dirty(pte);
arch_entry = __pte_to_swp_entry(pte); arch_entry = __pte_to_swp_entry(pte);
return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry));
} }
......
...@@ -802,9 +802,14 @@ asmlinkage long sys_vfork(void); ...@@ -802,9 +802,14 @@ asmlinkage long sys_vfork(void);
asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int, asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, int,
int __user *); int __user *);
#else #else
#ifdef CONFIG_CLONE_BACKWARDS3
asmlinkage long sys_clone(unsigned long, unsigned long, int, int __user *,
int __user *, int);
#else
asmlinkage long sys_clone(unsigned long, unsigned long, int __user *, asmlinkage long sys_clone(unsigned long, unsigned long, int __user *,
int __user *, int); int __user *, int);
#endif #endif
#endif
asmlinkage long sys_execve(const char __user *filename, asmlinkage long sys_execve(const char __user *filename,
const char __user *const __user *argv, const char __user *const __user *argv,
......
...@@ -1679,6 +1679,12 @@ SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags, ...@@ -1679,6 +1679,12 @@ SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
int __user *, parent_tidptr, int __user *, parent_tidptr,
int __user *, child_tidptr, int __user *, child_tidptr,
int, tls_val) int, tls_val)
#elif defined(CONFIG_CLONE_BACKWARDS3)
SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
int, stack_size,
int __user *, parent_tidptr,
int __user *, child_tidptr,
int, tls_val)
#else #else
SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
int __user *, parent_tidptr, int __user *, parent_tidptr,
......
...@@ -57,17 +57,22 @@ static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -57,17 +57,22 @@ static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, unsigned long pgoff, pgprot_t prot) unsigned long addr, unsigned long pgoff, pgprot_t prot)
{ {
int err = -ENOMEM; int err = -ENOMEM;
pte_t *pte; pte_t *pte, ptfile;
spinlock_t *ptl; spinlock_t *ptl;
pte = get_locked_pte(mm, addr, &ptl); pte = get_locked_pte(mm, addr, &ptl);
if (!pte) if (!pte)
goto out; goto out;
if (!pte_none(*pte)) ptfile = pgoff_to_pte(pgoff);
if (!pte_none(*pte)) {
if (pte_present(*pte) && pte_soft_dirty(*pte))
pte_file_mksoft_dirty(ptfile);
zap_pte(mm, vma, addr, pte); zap_pte(mm, vma, addr, pte);
}
set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff)); set_pte_at(mm, addr, pte, ptfile);
/* /*
* We don't need to run update_mmu_cache() here because the "file pte" * We don't need to run update_mmu_cache() here because the "file pte"
* being installed by install_file_pte() is not a real pte - it's a * being installed by install_file_pte() is not a real pte - it's a
......
...@@ -3195,11 +3195,11 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s, ...@@ -3195,11 +3195,11 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
if (!s->memcg_params) if (!s->memcg_params)
return -ENOMEM; return -ENOMEM;
INIT_WORK(&s->memcg_params->destroy,
kmem_cache_destroy_work_func);
if (memcg) { if (memcg) {
s->memcg_params->memcg = memcg; s->memcg_params->memcg = memcg;
s->memcg_params->root_cache = root_cache; s->memcg_params->root_cache = root_cache;
INIT_WORK(&s->memcg_params->destroy,
kmem_cache_destroy_work_func);
} else } else
s->memcg_params->is_root_cache = true; s->memcg_params->is_root_cache = true;
......
...@@ -1141,9 +1141,12 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, ...@@ -1141,9 +1141,12 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
continue; continue;
if (unlikely(details) && details->nonlinear_vma if (unlikely(details) && details->nonlinear_vma
&& linear_page_index(details->nonlinear_vma, && linear_page_index(details->nonlinear_vma,
addr) != page->index) addr) != page->index) {
set_pte_at(mm, addr, pte, pte_t ptfile = pgoff_to_pte(page->index);
pgoff_to_pte(page->index)); if (pte_soft_dirty(ptent))
pte_file_mksoft_dirty(ptfile);
set_pte_at(mm, addr, pte, ptfile);
}
if (PageAnon(page)) if (PageAnon(page))
rss[MM_ANONPAGES]--; rss[MM_ANONPAGES]--;
else { else {
...@@ -3115,6 +3118,8 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3115,6 +3118,8 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
exclusive = 1; exclusive = 1;
} }
flush_icache_page(vma, page); flush_icache_page(vma, page);
if (pte_swp_soft_dirty(orig_pte))
pte = pte_mksoft_dirty(pte);
set_pte_at(mm, address, page_table, pte); set_pte_at(mm, address, page_table, pte);
if (page == swapcache) if (page == swapcache)
do_page_add_anon_rmap(page, vma, address, exclusive); do_page_add_anon_rmap(page, vma, address, exclusive);
...@@ -3408,6 +3413,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -3408,6 +3413,8 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
entry = mk_pte(page, vma->vm_page_prot); entry = mk_pte(page, vma->vm_page_prot);
if (flags & FAULT_FLAG_WRITE) if (flags & FAULT_FLAG_WRITE)
entry = maybe_mkwrite(pte_mkdirty(entry), vma); entry = maybe_mkwrite(pte_mkdirty(entry), vma);
else if (pte_file(orig_pte) && pte_file_soft_dirty(orig_pte))
pte_mksoft_dirty(entry);
if (anon) { if (anon) {
inc_mm_counter_fast(mm, MM_ANONPAGES); inc_mm_counter_fast(mm, MM_ANONPAGES);
page_add_new_anon_rmap(page, vma, address); page_add_new_anon_rmap(page, vma, address);
......
...@@ -1236,6 +1236,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, ...@@ -1236,6 +1236,7 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
swp_entry_to_pte(make_hwpoison_entry(page))); swp_entry_to_pte(make_hwpoison_entry(page)));
} else if (PageAnon(page)) { } else if (PageAnon(page)) {
swp_entry_t entry = { .val = page_private(page) }; swp_entry_t entry = { .val = page_private(page) };
pte_t swp_pte;
if (PageSwapCache(page)) { if (PageSwapCache(page)) {
/* /*
...@@ -1264,7 +1265,10 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma, ...@@ -1264,7 +1265,10 @@ int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION); BUG_ON(TTU_ACTION(flags) != TTU_MIGRATION);
entry = make_migration_entry(page, pte_write(pteval)); entry = make_migration_entry(page, pte_write(pteval));
} }
set_pte_at(mm, address, pte, swp_entry_to_pte(entry)); swp_pte = swp_entry_to_pte(entry);
if (pte_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
set_pte_at(mm, address, pte, swp_pte);
BUG_ON(pte_file(*pte)); BUG_ON(pte_file(*pte));
} else if (IS_ENABLED(CONFIG_MIGRATION) && } else if (IS_ENABLED(CONFIG_MIGRATION) &&
(TTU_ACTION(flags) == TTU_MIGRATION)) { (TTU_ACTION(flags) == TTU_MIGRATION)) {
...@@ -1401,8 +1405,12 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, ...@@ -1401,8 +1405,12 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
pteval = ptep_clear_flush(vma, address, pte); pteval = ptep_clear_flush(vma, address, pte);
/* If nonlinear, store the file page offset in the pte. */ /* If nonlinear, store the file page offset in the pte. */
if (page->index != linear_page_index(vma, address)) if (page->index != linear_page_index(vma, address)) {
set_pte_at(mm, address, pte, pgoff_to_pte(page->index)); pte_t ptfile = pgoff_to_pte(page->index);
if (pte_soft_dirty(pteval))
pte_file_mksoft_dirty(ptfile);
set_pte_at(mm, address, pte, ptfile);
}
/* Move the dirty bit to the physical page now the pte is gone. */ /* Move the dirty bit to the physical page now the pte is gone. */
if (pte_dirty(pteval)) if (pte_dirty(pteval))
......
...@@ -866,6 +866,21 @@ unsigned int count_swap_pages(int type, int free) ...@@ -866,6 +866,21 @@ unsigned int count_swap_pages(int type, int free)
} }
#endif /* CONFIG_HIBERNATION */ #endif /* CONFIG_HIBERNATION */
static inline int maybe_same_pte(pte_t pte, pte_t swp_pte)
{
#ifdef CONFIG_MEM_SOFT_DIRTY
/*
* When pte keeps soft dirty bit the pte generated
* from swap entry does not has it, still it's same
* pte from logical point of view.
*/
pte_t swp_pte_dirty = pte_swp_mksoft_dirty(swp_pte);
return pte_same(pte, swp_pte) || pte_same(pte, swp_pte_dirty);
#else
return pte_same(pte, swp_pte);
#endif
}
/* /*
* No need to decide whether this PTE shares the swap entry with others, * No need to decide whether this PTE shares the swap entry with others,
* just let do_wp_page work it out if a write is requested later - to * just let do_wp_page work it out if a write is requested later - to
...@@ -892,7 +907,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -892,7 +907,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
} }
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (unlikely(!pte_same(*pte, swp_entry_to_pte(entry)))) { if (unlikely(!maybe_same_pte(*pte, swp_entry_to_pte(entry)))) {
mem_cgroup_cancel_charge_swapin(memcg); mem_cgroup_cancel_charge_swapin(memcg);
ret = 0; ret = 0;
goto out; goto out;
...@@ -947,7 +962,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, ...@@ -947,7 +962,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
* swapoff spends a _lot_ of time in this loop! * swapoff spends a _lot_ of time in this loop!
* Test inline before going to call unuse_pte. * Test inline before going to call unuse_pte.
*/ */
if (unlikely(pte_same(*pte, swp_pte))) { if (unlikely(maybe_same_pte(*pte, swp_pte))) {
pte_unmap(pte); pte_unmap(pte);
ret = unuse_pte(vma, pmd, addr, entry, page); ret = unuse_pte(vma, pmd, addr, entry, page);
if (ret) if (ret)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment