Commit 1251704a authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "15 fixes"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mm, docs: update memory.stat description with workingset* entries
  mm: vmscan: scan until it finds eligible pages
  mm, thp: copying user pages must schedule on collapse
  dax: fix PMD data corruption when fault races with write
  dax: fix data corruption when fault races with write
  ext4: return to starting transaction in ext4_dax_huge_fault()
  mm: fix data corruption due to stale mmap reads
  dax: prevent invalidation of mapped DAX entries
  Tigran has moved
  mm, vmalloc: fix vmalloc users tracking properly
  mm/khugepaged: add missed tracepoint for collapse_huge_page_swapin
  gcov: support GCC 7.1
  mm, vmstat: Remove spurious WARN() during zoneinfo print
  time: delete current_fs_time()
  hwpoison, memcg: forcibly uncharge LRU pages
parents 0fcc3ab2 b340959e
...@@ -918,6 +918,18 @@ PAGE_SIZE multiple when read back. ...@@ -918,6 +918,18 @@ PAGE_SIZE multiple when read back.
Number of major page faults incurred Number of major page faults incurred
workingset_refault
Number of refaults of previously evicted pages
workingset_activate
Number of refaulted pages that were immediately activated
workingset_nodereclaim
Number of times a shadow node has been reclaimed
memory.swap.current memory.swap.current
A read-only single value file which exists on non-root A read-only single value file which exists on non-root
......
...@@ -54,4 +54,4 @@ The first 4 bytes should be 0x1badface. ...@@ -54,4 +54,4 @@ The first 4 bytes should be 0x1badface.
If you have any patches, questions or suggestions regarding this BFS If you have any patches, questions or suggestions regarding this BFS
implementation please contact the author: implementation please contact the author:
Tigran Aivazian <tigran@aivazian.fsnet.co.uk> Tigran Aivazian <aivazian.tigran@gmail.com>
...@@ -2483,7 +2483,7 @@ S: Maintained ...@@ -2483,7 +2483,7 @@ S: Maintained
F: drivers/net/ethernet/ec_bhf.c F: drivers/net/ethernet/ec_bhf.c
BFS FILE SYSTEM BFS FILE SYSTEM
M: "Tigran A. Aivazian" <tigran@aivazian.fsnet.co.uk> M: "Tigran A. Aivazian" <aivazian.tigran@gmail.com>
S: Maintained S: Maintained
F: Documentation/filesystems/bfs.txt F: Documentation/filesystems/bfs.txt
F: fs/bfs/ F: fs/bfs/
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
* Author: Peter Oruba <peter.oruba@amd.com> * Author: Peter Oruba <peter.oruba@amd.com>
* *
* Based on work by: * Based on work by:
* Tigran Aivazian <tigran@aivazian.fsnet.co.uk> * Tigran Aivazian <aivazian.tigran@gmail.com>
* *
* early loader: * early loader:
* Copyright (C) 2013 Advanced Micro Devices, Inc. * Copyright (C) 2013 Advanced Micro Devices, Inc.
......
/* /*
* CPU Microcode Update Driver for Linux * CPU Microcode Update Driver for Linux
* *
* Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
* 2006 Shaohua Li <shaohua.li@intel.com> * 2006 Shaohua Li <shaohua.li@intel.com>
* 2013-2016 Borislav Petkov <bp@alien8.de> * 2013-2016 Borislav Petkov <bp@alien8.de>
* *
......
/* /*
* Intel CPU Microcode Update Driver for Linux * Intel CPU Microcode Update Driver for Linux
* *
* Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> * Copyright (C) 2000-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
* 2006 Shaohua Li <shaohua.li@intel.com> * 2006 Shaohua Li <shaohua.li@intel.com>
* *
* Intel CPU microcode early update for Linux * Intel CPU microcode early update for Linux
......
/* /*
* fs/bfs/inode.c * fs/bfs/inode.c
* BFS superblock and inode operations. * BFS superblock and inode operations.
* Copyright (C) 1999-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk> * Copyright (C) 1999-2006 Tigran Aivazian <aivazian.tigran@gmail.com>
* From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds. * From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds.
* *
* Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005. * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005.
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include <linux/uaccess.h> #include <linux/uaccess.h>
#include "bfs.h" #include "bfs.h"
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); MODULE_AUTHOR("Tigran Aivazian <aivazian.tigran@gmail.com>");
MODULE_DESCRIPTION("SCO UnixWare BFS filesystem for Linux"); MODULE_DESCRIPTION("SCO UnixWare BFS filesystem for Linux");
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
......
...@@ -460,35 +460,6 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) ...@@ -460,35 +460,6 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
return ret; return ret;
} }
/*
* Invalidate exceptional DAX entry if easily possible. This handles DAX
* entries for invalidate_inode_pages() so we evict the entry only if we can
* do so without blocking.
*/
int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index)
{
int ret = 0;
void *entry, **slot;
struct radix_tree_root *page_tree = &mapping->page_tree;
spin_lock_irq(&mapping->tree_lock);
entry = __radix_tree_lookup(page_tree, index, NULL, &slot);
if (!entry || !radix_tree_exceptional_entry(entry) ||
slot_locked(mapping, slot))
goto out;
if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
goto out;
radix_tree_delete(page_tree, index);
mapping->nrexceptional--;
ret = 1;
out:
spin_unlock_irq(&mapping->tree_lock);
if (ret)
dax_wake_mapping_entry_waiter(mapping, index, entry, true);
return ret;
}
/* /*
* Invalidate exceptional DAX entry if it is clean. * Invalidate exceptional DAX entry if it is clean.
*/ */
...@@ -1044,7 +1015,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, ...@@ -1044,7 +1015,7 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
* into page tables. We have to tear down these mappings so that data * into page tables. We have to tear down these mappings so that data
* written by write(2) is visible in mmap. * written by write(2) is visible in mmap.
*/ */
if ((iomap->flags & IOMAP_F_NEW) && inode->i_mapping->nrpages) { if (iomap->flags & IOMAP_F_NEW) {
invalidate_inode_pages2_range(inode->i_mapping, invalidate_inode_pages2_range(inode->i_mapping,
pos >> PAGE_SHIFT, pos >> PAGE_SHIFT,
(end - 1) >> PAGE_SHIFT); (end - 1) >> PAGE_SHIFT);
...@@ -1177,6 +1148,12 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, ...@@ -1177,6 +1148,12 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page) if ((vmf->flags & FAULT_FLAG_WRITE) && !vmf->cow_page)
flags |= IOMAP_WRITE; flags |= IOMAP_WRITE;
entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
if (IS_ERR(entry)) {
vmf_ret = dax_fault_return(PTR_ERR(entry));
goto out;
}
/* /*
* Note that we don't bother to use iomap_apply here: DAX required * Note that we don't bother to use iomap_apply here: DAX required
* the file system block size to be equal the page size, which means * the file system block size to be equal the page size, which means
...@@ -1185,17 +1162,11 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, ...@@ -1185,17 +1162,11 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap); error = ops->iomap_begin(inode, pos, PAGE_SIZE, flags, &iomap);
if (error) { if (error) {
vmf_ret = dax_fault_return(error); vmf_ret = dax_fault_return(error);
goto out; goto unlock_entry;
} }
if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) { if (WARN_ON_ONCE(iomap.offset + iomap.length < pos + PAGE_SIZE)) {
vmf_ret = dax_fault_return(-EIO); /* fs corruption? */ error = -EIO; /* fs corruption? */
goto finish_iomap; goto error_finish_iomap;
}
entry = grab_mapping_entry(mapping, vmf->pgoff, 0);
if (IS_ERR(entry)) {
vmf_ret = dax_fault_return(PTR_ERR(entry));
goto finish_iomap;
} }
sector = dax_iomap_sector(&iomap, pos); sector = dax_iomap_sector(&iomap, pos);
...@@ -1217,13 +1188,13 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, ...@@ -1217,13 +1188,13 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
} }
if (error) if (error)
goto error_unlock_entry; goto error_finish_iomap;
__SetPageUptodate(vmf->cow_page); __SetPageUptodate(vmf->cow_page);
vmf_ret = finish_fault(vmf); vmf_ret = finish_fault(vmf);
if (!vmf_ret) if (!vmf_ret)
vmf_ret = VM_FAULT_DONE_COW; vmf_ret = VM_FAULT_DONE_COW;
goto unlock_entry; goto finish_iomap;
} }
switch (iomap.type) { switch (iomap.type) {
...@@ -1243,7 +1214,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, ...@@ -1243,7 +1214,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
case IOMAP_HOLE: case IOMAP_HOLE:
if (!(vmf->flags & FAULT_FLAG_WRITE)) { if (!(vmf->flags & FAULT_FLAG_WRITE)) {
vmf_ret = dax_load_hole(mapping, &entry, vmf); vmf_ret = dax_load_hole(mapping, &entry, vmf);
goto unlock_entry; goto finish_iomap;
} }
/*FALLTHRU*/ /*FALLTHRU*/
default: default:
...@@ -1252,10 +1223,8 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, ...@@ -1252,10 +1223,8 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
break; break;
} }
error_unlock_entry: error_finish_iomap:
vmf_ret = dax_fault_return(error) | major; vmf_ret = dax_fault_return(error) | major;
unlock_entry:
put_locked_mapping_entry(mapping, vmf->pgoff, entry);
finish_iomap: finish_iomap:
if (ops->iomap_end) { if (ops->iomap_end) {
int copied = PAGE_SIZE; int copied = PAGE_SIZE;
...@@ -1270,7 +1239,9 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, ...@@ -1270,7 +1239,9 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf,
*/ */
ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap); ops->iomap_end(inode, pos, PAGE_SIZE, copied, flags, &iomap);
} }
out: unlock_entry:
put_locked_mapping_entry(mapping, vmf->pgoff, entry);
out:
trace_dax_pte_fault_done(inode, vmf, vmf_ret); trace_dax_pte_fault_done(inode, vmf, vmf_ret);
return vmf_ret; return vmf_ret;
} }
...@@ -1416,6 +1387,16 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, ...@@ -1416,6 +1387,16 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
if ((pgoff | PG_PMD_COLOUR) > max_pgoff) if ((pgoff | PG_PMD_COLOUR) > max_pgoff)
goto fallback; goto fallback;
/*
* grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
* PMD or a HZP entry. If it can't (because a 4k page is already in
* the tree, for instance), it will return -EEXIST and we just fall
* back to 4k entries.
*/
entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
if (IS_ERR(entry))
goto fallback;
/* /*
* Note that we don't use iomap_apply here. We aren't doing I/O, only * Note that we don't use iomap_apply here. We aren't doing I/O, only
* setting up a mapping, so really we're using iomap_begin() as a way * setting up a mapping, so really we're using iomap_begin() as a way
...@@ -1424,21 +1405,11 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, ...@@ -1424,21 +1405,11 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
pos = (loff_t)pgoff << PAGE_SHIFT; pos = (loff_t)pgoff << PAGE_SHIFT;
error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap); error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
if (error) if (error)
goto fallback; goto unlock_entry;
if (iomap.offset + iomap.length < pos + PMD_SIZE) if (iomap.offset + iomap.length < pos + PMD_SIZE)
goto finish_iomap; goto finish_iomap;
/*
* grab_mapping_entry() will make sure we get a 2M empty entry, a DAX
* PMD or a HZP entry. If it can't (because a 4k page is already in
* the tree, for instance), it will return -EEXIST and we just fall
* back to 4k entries.
*/
entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
if (IS_ERR(entry))
goto finish_iomap;
switch (iomap.type) { switch (iomap.type) {
case IOMAP_MAPPED: case IOMAP_MAPPED:
result = dax_pmd_insert_mapping(vmf, &iomap, pos, &entry); result = dax_pmd_insert_mapping(vmf, &iomap, pos, &entry);
...@@ -1446,7 +1417,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, ...@@ -1446,7 +1417,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
case IOMAP_UNWRITTEN: case IOMAP_UNWRITTEN:
case IOMAP_HOLE: case IOMAP_HOLE:
if (WARN_ON_ONCE(write)) if (WARN_ON_ONCE(write))
goto unlock_entry; break;
result = dax_pmd_load_hole(vmf, &iomap, &entry); result = dax_pmd_load_hole(vmf, &iomap, &entry);
break; break;
default: default:
...@@ -1454,8 +1425,6 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, ...@@ -1454,8 +1425,6 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
break; break;
} }
unlock_entry:
put_locked_mapping_entry(mapping, pgoff, entry);
finish_iomap: finish_iomap:
if (ops->iomap_end) { if (ops->iomap_end) {
int copied = PMD_SIZE; int copied = PMD_SIZE;
...@@ -1471,6 +1440,8 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, ...@@ -1471,6 +1440,8 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf,
ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags, ops->iomap_end(inode, pos, PMD_SIZE, copied, iomap_flags,
&iomap); &iomap);
} }
unlock_entry:
put_locked_mapping_entry(mapping, pgoff, entry);
fallback: fallback:
if (result == VM_FAULT_FALLBACK) { if (result == VM_FAULT_FALLBACK) {
split_huge_pmd(vma, vmf->pmd, vmf->address); split_huge_pmd(vma, vmf->pmd, vmf->address);
......
...@@ -257,6 +257,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf, ...@@ -257,6 +257,7 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
enum page_entry_size pe_size) enum page_entry_size pe_size)
{ {
int result; int result;
handle_t *handle = NULL;
struct inode *inode = file_inode(vmf->vma->vm_file); struct inode *inode = file_inode(vmf->vma->vm_file);
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
bool write = vmf->flags & FAULT_FLAG_WRITE; bool write = vmf->flags & FAULT_FLAG_WRITE;
...@@ -264,12 +265,24 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf, ...@@ -264,12 +265,24 @@ static int ext4_dax_huge_fault(struct vm_fault *vmf,
if (write) { if (write) {
sb_start_pagefault(sb); sb_start_pagefault(sb);
file_update_time(vmf->vma->vm_file); file_update_time(vmf->vma->vm_file);
}
down_read(&EXT4_I(inode)->i_mmap_sem); down_read(&EXT4_I(inode)->i_mmap_sem);
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
EXT4_DATA_TRANS_BLOCKS(sb));
} else {
down_read(&EXT4_I(inode)->i_mmap_sem);
}
if (!IS_ERR(handle))
result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops); result = dax_iomap_fault(vmf, pe_size, &ext4_iomap_ops);
else
result = VM_FAULT_SIGBUS;
if (write) {
if (!IS_ERR(handle))
ext4_journal_stop(handle);
up_read(&EXT4_I(inode)->i_mmap_sem); up_read(&EXT4_I(inode)->i_mmap_sem);
if (write)
sb_end_pagefault(sb); sb_end_pagefault(sb);
} else {
up_read(&EXT4_I(inode)->i_mmap_sem);
}
return result; return result;
} }
......
...@@ -89,7 +89,6 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -89,7 +89,6 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size,
const struct iomap_ops *ops); const struct iomap_ops *ops);
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping, int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index); pgoff_t index);
void dax_wake_mapping_entry_waiter(struct address_space *mapping, void dax_wake_mapping_entry_waiter(struct address_space *mapping,
......
...@@ -1431,7 +1431,6 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) ...@@ -1431,7 +1431,6 @@ static inline void i_gid_write(struct inode *inode, gid_t gid)
inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid); inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid);
} }
extern struct timespec current_fs_time(struct super_block *sb);
extern struct timespec current_time(struct inode *inode); extern struct timespec current_time(struct inode *inode);
/* /*
......
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
#include <linux/list.h> #include <linux/list.h>
#include <linux/llist.h> #include <linux/llist.h>
#include <asm/page.h> /* pgprot_t */ #include <asm/page.h> /* pgprot_t */
#include <asm/pgtable.h> /* PAGE_KERNEL */
#include <linux/rbtree.h> #include <linux/rbtree.h>
struct vm_area_struct; /* vma defining user mapping in mm_types.h */ struct vm_area_struct; /* vma defining user mapping in mm_types.h */
...@@ -83,22 +82,14 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, ...@@ -83,22 +82,14 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
const void *caller); const void *caller);
#ifndef CONFIG_MMU #ifndef CONFIG_MMU
extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags);
#else static inline void *__vmalloc_node_flags_caller(unsigned long size, int node,
extern void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t flags, void *caller)
gfp_t gfp_mask, pgprot_t prot,
int node, const void *caller);
/*
* We really want to have this inlined due to caller tracking. This
* function is used by the highlevel vmalloc apis and so we want to track
* their callers and inlining will achieve that.
*/
static inline void *__vmalloc_node_flags(unsigned long size,
int node, gfp_t flags)
{ {
return __vmalloc_node(size, 1, flags, PAGE_KERNEL, return __vmalloc_node_flags(size, node, flags);
node, __builtin_return_address(0));
} }
#else
extern void *__vmalloc_node_flags_caller(unsigned long size,
int node, gfp_t flags, void *caller);
#endif #endif
extern void vfree(const void *addr); extern void vfree(const void *addr);
......
...@@ -98,6 +98,12 @@ void __gcov_merge_icall_topn(gcov_type *counters, unsigned int n_counters) ...@@ -98,6 +98,12 @@ void __gcov_merge_icall_topn(gcov_type *counters, unsigned int n_counters)
} }
EXPORT_SYMBOL(__gcov_merge_icall_topn); EXPORT_SYMBOL(__gcov_merge_icall_topn);
void __gcov_exit(void)
{
/* Unused. */
}
EXPORT_SYMBOL(__gcov_exit);
/** /**
* gcov_enable_events - enable event reporting through gcov_event() * gcov_enable_events - enable event reporting through gcov_event()
* *
......
...@@ -18,7 +18,9 @@ ...@@ -18,7 +18,9 @@
#include <linux/vmalloc.h> #include <linux/vmalloc.h>
#include "gcov.h" #include "gcov.h"
#if (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1) #if (__GNUC__ >= 7)
#define GCOV_COUNTERS 9
#elif (__GNUC__ > 5) || (__GNUC__ == 5 && __GNUC_MINOR__ >= 1)
#define GCOV_COUNTERS 10 #define GCOV_COUNTERS 10
#elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9 #elif __GNUC__ == 4 && __GNUC_MINOR__ >= 9
#define GCOV_COUNTERS 9 #define GCOV_COUNTERS 9
......
...@@ -230,20 +230,6 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) ...@@ -230,20 +230,6 @@ SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret; return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
} }
/**
* current_fs_time - Return FS time
* @sb: Superblock.
*
* Return the current time truncated to the time granularity supported by
* the fs.
*/
struct timespec current_fs_time(struct super_block *sb)
{
struct timespec now = current_kernel_time();
return timespec_trunc(now, sb->s_time_gran);
}
EXPORT_SYMBOL(current_fs_time);
/* /*
* Convert jiffies to milliseconds and back. * Convert jiffies to milliseconds and back.
* *
......
...@@ -612,7 +612,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, ...@@ -612,7 +612,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
spinlock_t *ptl) spinlock_t *ptl)
{ {
pte_t *_pte; pte_t *_pte;
for (_pte = pte; _pte < pte+HPAGE_PMD_NR; _pte++) { for (_pte = pte; _pte < pte + HPAGE_PMD_NR;
_pte++, page++, address += PAGE_SIZE) {
pte_t pteval = *_pte; pte_t pteval = *_pte;
struct page *src_page; struct page *src_page;
...@@ -651,9 +652,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, ...@@ -651,9 +652,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
spin_unlock(ptl); spin_unlock(ptl);
free_page_and_swap_cache(src_page); free_page_and_swap_cache(src_page);
} }
cond_resched();
address += PAGE_SIZE;
page++;
} }
} }
...@@ -907,9 +906,11 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm, ...@@ -907,9 +906,11 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
return false; return false;
} }
/* check if the pmd is still valid */ /* check if the pmd is still valid */
if (mm_find_pmd(mm, address) != pmd) if (mm_find_pmd(mm, address) != pmd) {
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
return false; return false;
} }
}
if (ret & VM_FAULT_ERROR) { if (ret & VM_FAULT_ERROR) {
trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0); trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
return false; return false;
......
...@@ -5528,7 +5528,7 @@ static void uncharge_list(struct list_head *page_list) ...@@ -5528,7 +5528,7 @@ static void uncharge_list(struct list_head *page_list)
next = page->lru.next; next = page->lru.next;
VM_BUG_ON_PAGE(PageLRU(page), page); VM_BUG_ON_PAGE(PageLRU(page), page);
VM_BUG_ON_PAGE(page_count(page), page); VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page);
if (!page->mem_cgroup) if (!page->mem_cgroup)
continue; continue;
......
...@@ -539,6 +539,13 @@ static int delete_from_lru_cache(struct page *p) ...@@ -539,6 +539,13 @@ static int delete_from_lru_cache(struct page *p)
*/ */
ClearPageActive(p); ClearPageActive(p);
ClearPageUnevictable(p); ClearPageUnevictable(p);
/*
* Poisoned page might never drop its ref count to 0 so we have
* to uncharge it manually from its memcg.
*/
mem_cgroup_uncharge(p);
/* /*
* drop the page count elevated by isolate_lru_page() * drop the page count elevated by isolate_lru_page()
*/ */
......
...@@ -67,17 +67,14 @@ static void truncate_exceptional_entry(struct address_space *mapping, ...@@ -67,17 +67,14 @@ static void truncate_exceptional_entry(struct address_space *mapping,
/* /*
* Invalidate exceptional entry if easily possible. This handles exceptional * Invalidate exceptional entry if easily possible. This handles exceptional
* entries for invalidate_inode_pages() so for DAX it evicts only unlocked and * entries for invalidate_inode_pages().
* clean entries.
*/ */
static int invalidate_exceptional_entry(struct address_space *mapping, static int invalidate_exceptional_entry(struct address_space *mapping,
pgoff_t index, void *entry) pgoff_t index, void *entry)
{ {
/* Handled by shmem itself */ /* Handled by shmem itself, or for DAX we do nothing. */
if (shmem_mapping(mapping)) if (shmem_mapping(mapping) || dax_mapping(mapping))
return 1; return 1;
if (dax_mapping(mapping))
return dax_invalidate_mapping_entry(mapping, index);
clear_shadow_entry(mapping, index, entry); clear_shadow_entry(mapping, index, entry);
return 1; return 1;
} }
...@@ -689,7 +686,17 @@ int invalidate_inode_pages2_range(struct address_space *mapping, ...@@ -689,7 +686,17 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
cond_resched(); cond_resched();
index++; index++;
} }
/*
* For DAX we invalidate page tables after invalidating radix tree. We
* could invalidate page tables while invalidating each entry however
* that would be expensive. And doing range unmapping before doesn't
* work as we have no cheap way to find whether radix tree entry didn't
* get remapped later.
*/
if (dax_mapping(mapping)) {
unmap_mapping_range(mapping, (loff_t)start << PAGE_SHIFT,
(loff_t)(end - start + 1) << PAGE_SHIFT, 0);
}
out: out:
cleancache_invalidate_inode(mapping); cleancache_invalidate_inode(mapping);
return ret; return ret;
......
...@@ -382,7 +382,8 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) ...@@ -382,7 +382,8 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
if (ret || size <= PAGE_SIZE) if (ret || size <= PAGE_SIZE)
return ret; return ret;
return __vmalloc_node_flags(size, node, flags); return __vmalloc_node_flags_caller(size, node, flags,
__builtin_return_address(0));
} }
EXPORT_SYMBOL(kvmalloc_node); EXPORT_SYMBOL(kvmalloc_node);
......
...@@ -1649,6 +1649,9 @@ void *vmap(struct page **pages, unsigned int count, ...@@ -1649,6 +1649,9 @@ void *vmap(struct page **pages, unsigned int count,
} }
EXPORT_SYMBOL(vmap); EXPORT_SYMBOL(vmap);
static void *__vmalloc_node(unsigned long size, unsigned long align,
gfp_t gfp_mask, pgprot_t prot,
int node, const void *caller);
static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
pgprot_t prot, int node) pgprot_t prot, int node)
{ {
...@@ -1791,7 +1794,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, ...@@ -1791,7 +1794,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
* with mm people. * with mm people.
* *
*/ */
void *__vmalloc_node(unsigned long size, unsigned long align, static void *__vmalloc_node(unsigned long size, unsigned long align,
gfp_t gfp_mask, pgprot_t prot, gfp_t gfp_mask, pgprot_t prot,
int node, const void *caller) int node, const void *caller)
{ {
...@@ -1806,6 +1809,20 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) ...@@ -1806,6 +1809,20 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
} }
EXPORT_SYMBOL(__vmalloc); EXPORT_SYMBOL(__vmalloc);
static inline void *__vmalloc_node_flags(unsigned long size,
int node, gfp_t flags)
{
return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
node, __builtin_return_address(0));
}
void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags,
void *caller)
{
return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller);
}
/** /**
* vmalloc - allocate virtually contiguous memory * vmalloc - allocate virtually contiguous memory
* @size: allocation size * @size: allocation size
......
...@@ -1449,7 +1449,7 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec, ...@@ -1449,7 +1449,7 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
* *
* Appropriate locks must be held before calling this function. * Appropriate locks must be held before calling this function.
* *
* @nr_to_scan: The number of pages to look through on the list. * @nr_to_scan: The number of eligible pages to look through on the list.
* @lruvec: The LRU vector to pull pages from. * @lruvec: The LRU vector to pull pages from.
* @dst: The temp list to put pages on to. * @dst: The temp list to put pages on to.
* @nr_scanned: The number of pages that were scanned. * @nr_scanned: The number of pages that were scanned.
...@@ -1469,11 +1469,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, ...@@ -1469,11 +1469,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 }; unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 };
unsigned long nr_skipped[MAX_NR_ZONES] = { 0, }; unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
unsigned long skipped = 0; unsigned long skipped = 0;
unsigned long scan, nr_pages; unsigned long scan, total_scan, nr_pages;
LIST_HEAD(pages_skipped); LIST_HEAD(pages_skipped);
for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan && scan = 0;
!list_empty(src); scan++) { for (total_scan = 0;
scan < nr_to_scan && nr_taken < nr_to_scan && !list_empty(src);
total_scan++) {
struct page *page; struct page *page;
page = lru_to_page(src); page = lru_to_page(src);
...@@ -1487,6 +1489,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, ...@@ -1487,6 +1489,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
continue; continue;
} }
/*
* Do not count skipped pages because that makes the function
* return with no isolated pages if the LRU mostly contains
* ineligible pages. This causes the VM to not reclaim any
* pages, triggering a premature OOM.
*/
scan++;
switch (__isolate_lru_page(page, mode)) { switch (__isolate_lru_page(page, mode)) {
case 0: case 0:
nr_pages = hpage_nr_pages(page); nr_pages = hpage_nr_pages(page);
...@@ -1524,9 +1533,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, ...@@ -1524,9 +1533,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
skipped += nr_skipped[zid]; skipped += nr_skipped[zid];
} }
} }
*nr_scanned = scan; *nr_scanned = total_scan;
trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan,
scan, skipped, nr_taken, mode, lru); total_scan, skipped, nr_taken, mode, lru);
update_lru_sizes(lruvec, lru, nr_zone_taken); update_lru_sizes(lruvec, lru, nr_zone_taken);
return nr_taken; return nr_taken;
} }
......
...@@ -1359,8 +1359,6 @@ static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone) ...@@ -1359,8 +1359,6 @@ static bool is_zone_first_populated(pg_data_t *pgdat, struct zone *zone)
return zone == compare; return zone == compare;
} }
/* The zone must be somewhere! */
WARN_ON_ONCE(1);
return false; return false;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment