Commit 5ab0fc15 authored by Andrew Morton's avatar Andrew Morton

Sync mm-stable with mm-hotfixes-stable to pick up dependent patches

Merge branch 'mm-hotfixes-stable' into mm-stable
parents 9a3f21fe ac86f547
......@@ -130,6 +130,7 @@ Domen Puncer <domen@coderock.org>
Douglas Gilbert <dougg@torque.net>
Ed L. Cashin <ecashin@coraid.com>
Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com>
Eugen Hristev <eugen.hristev@collabora.com> <eugen.hristev@microchip.com>
Evgeniy Polyakov <johnpol@2ka.mipt.ru>
Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> <ezequiel@collabora.com>
Felipe W Damasio <felipewd@terra.com.br>
......
......@@ -1245,13 +1245,17 @@ PAGE_SIZE multiple when read back.
This is a simple interface to trigger memory reclaim in the
target cgroup.
This file accepts a string which contains the number of bytes to
reclaim.
This file accepts a single key, the number of bytes to reclaim.
No nested keys are currently supported.
Example::
echo "1G" > memory.reclaim
The interface can be later extended with nested keys to
configure the reclaim behavior. For example, specify the
type of memory to reclaim from (anon, file, ..).
Please note that the kernel can over or under reclaim from
the target cgroup. If less bytes are reclaimed than the
specified amount, -EAGAIN is returned.
......@@ -1263,13 +1267,6 @@ PAGE_SIZE multiple when read back.
This means that the networking layer will not adapt based on
reclaim induced by memory.reclaim.
This file also allows the user to specify the nodes to reclaim from,
via the 'nodes=' key, for example::
echo "1G nodes=0,1" > memory.reclaim
The above instructs the kernel to reclaim memory from nodes 0,1.
memory.peak
A read-only single value file which exists on non-root
cgroups.
......
......@@ -170,6 +170,9 @@ ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, u
asmlinkage long
ia64_clock_getres(const clockid_t which_clock, struct __kernel_timespec __user *tp)
{
struct timespec64 rtn_tp;
s64 tick_ns;
/*
* ia64's clock_gettime() syscall is implemented as a vdso call
* fsys_clock_gettime(). Currently it handles only
......@@ -185,8 +188,8 @@ ia64_clock_getres(const clockid_t which_clock, struct __kernel_timespec __user *
switch (which_clock) {
case CLOCK_REALTIME:
case CLOCK_MONOTONIC:
s64 tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, local_cpu_data->itc_freq);
struct timespec64 rtn_tp = ns_to_timespec64(tick_ns);
tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, local_cpu_data->itc_freq);
rtn_tp = ns_to_timespec64(tick_ns);
return put_timespec64(&rtn_tp, tp);
}
......
......@@ -4,6 +4,7 @@
* Written by Niibe Yutaka and Paul Mundt
*/
OUTPUT_ARCH(sh)
#define RUNTIME_DISCARD_EXIT
#include <asm/thread_info.h>
#include <asm/cache.h>
#include <asm/vmlinux.lds.h>
......
......@@ -26,7 +26,6 @@
#include <linux/serial_core.h>
#include <linux/sysfs.h>
#include <linux/random.h>
#include <linux/kmemleak.h>
#include <asm/setup.h> /* for COMMAND_LINE_SIZE */
#include <asm/page.h>
......@@ -525,12 +524,9 @@ static int __init __reserved_mem_reserve_reg(unsigned long node,
size = dt_mem_next_cell(dt_root_size_cells, &prop);
if (size &&
early_init_dt_reserve_memory(base, size, nomap) == 0) {
early_init_dt_reserve_memory(base, size, nomap) == 0)
pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %lu MiB\n",
uname, &base, (unsigned long)(size / SZ_1M));
if (!nomap)
kmemleak_alloc_phys(base, size, 0);
}
else
pr_err("Reserved memory: failed to reserve memory for node '%s': base %pa, size %lu MiB\n",
uname, &base, (unsigned long)(size / SZ_1M));
......
......@@ -8,7 +8,7 @@ config VXFS_FS
of SCO UnixWare (and possibly others) and optionally available
for Sunsoft Solaris, HP-UX and many other operating systems. However
these particular OS implementations of vxfs may differ in on-disk
data endianess and/or superblock offset. The vxfs module has been
data endianness and/or superblock offset. The vxfs module has been
tested with SCO UnixWare and HP-UX B.10.20 (pa-risc 1.1 arch.)
Currently only readonly access is supported and VxFX versions
2, 3 and 4. Tests were performed with HP-UX VxFS version 3.
......
......@@ -745,9 +745,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
page = pfn_swap_entry_to_page(swpent);
}
if (page) {
int mapcount = page_mapcount(page);
if (mapcount >= 2)
if (page_mapcount(page) >= 2 || hugetlb_pmd_shared(pte))
mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
else
mss->private_hugetlb += huge_page_size(hstate_vma(vma));
......
......@@ -183,7 +183,7 @@ static inline int squashfs_block_size(__le32 raw)
#define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\
sizeof(u64))
/* xattr id lookup table defines */
#define SQUASHFS_XATTR_BYTES(A) ((A) * sizeof(struct squashfs_xattr_id))
#define SQUASHFS_XATTR_BYTES(A) (((u64) (A)) * sizeof(struct squashfs_xattr_id))
#define SQUASHFS_XATTR_BLOCK(A) (SQUASHFS_XATTR_BYTES(A) / \
SQUASHFS_METADATA_SIZE)
......
......@@ -63,7 +63,7 @@ struct squashfs_sb_info {
long long bytes_used;
unsigned int inodes;
unsigned int fragments;
int xattr_ids;
unsigned int xattr_ids;
unsigned int ids;
bool panic_on_errors;
const struct squashfs_decompressor_thread_ops *thread_ops;
......
......@@ -10,12 +10,12 @@
#ifdef CONFIG_SQUASHFS_XATTR
extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64,
u64 *, int *);
u64 *, unsigned int *);
extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
unsigned int *, unsigned long long *);
#else
static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
u64 start, u64 *xattr_table_start, int *xattr_ids)
u64 start, u64 *xattr_table_start, unsigned int *xattr_ids)
{
struct squashfs_xattr_id_table *id_table;
......
......@@ -56,7 +56,7 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index,
* Read uncompressed xattr id lookup table indexes from disk into memory
*/
__le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
u64 *xattr_table_start, int *xattr_ids)
u64 *xattr_table_start, unsigned int *xattr_ids)
{
struct squashfs_sb_info *msblk = sb->s_fs_info;
unsigned int len, indexes;
......@@ -76,7 +76,7 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
/* Sanity check values */
/* there is always at least one xattr id */
if (*xattr_ids == 0)
if (*xattr_ids <= 0)
return ERR_PTR(-EINVAL);
len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids);
......
......@@ -200,7 +200,7 @@ static inline void *kmap_local_pfn(unsigned long pfn)
static inline void __kunmap_local(const void *addr)
{
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
kunmap_flush_on_unmap(addr);
kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE));
#endif
}
......@@ -227,7 +227,7 @@ static inline void *kmap_atomic_pfn(unsigned long pfn)
static inline void __kunmap_atomic(const void *addr)
{
#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
kunmap_flush_on_unmap(addr);
kunmap_flush_on_unmap(PTR_ALIGN_DOWN(addr, PAGE_SIZE));
#endif
pagefault_enable();
if (IS_ENABLED(CONFIG_PREEMPT_RT))
......
......@@ -8,6 +8,7 @@
#include <linux/fs.h>
#include <linux/hugetlb_inline.h>
#include <linux/cgroup.h>
#include <linux/page_ref.h>
#include <linux/list.h>
#include <linux/kref.h>
#include <linux/pgtable.h>
......@@ -1225,6 +1226,18 @@ static inline __init void hugetlb_cma_reserve(int order)
}
#endif
#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
static inline bool hugetlb_pmd_shared(pte_t *pte)
{
return page_count(virt_to_page(pte)) > 1;
}
#else
static inline bool hugetlb_pmd_shared(pte_t *pte)
{
return false;
}
#endif
bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr);
#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
......
......@@ -1688,10 +1688,13 @@ void mem_cgroup_track_foreign_dirty_slowpath(struct folio *folio,
static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
struct bdi_writeback *wb)
{
struct mem_cgroup *memcg;
if (mem_cgroup_disabled())
return;
if (unlikely(&folio_memcg(folio)->css != wb->memcg_css))
memcg = folio_memcg(folio);
if (unlikely(memcg && &memcg->css != wb->memcg_css))
mem_cgroup_track_foreign_dirty_slowpath(folio, wb);
}
......
......@@ -418,8 +418,7 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
unsigned int reclaim_options,
nodemask_t *nodemask);
unsigned int reclaim_options);
extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
gfp_t gfp_mask, bool noswap,
pg_data_t *pgdat,
......
......@@ -754,6 +754,7 @@ config DEBUG_KMEMLEAK
select KALLSYMS
select CRC32
select STACKDEPOT
select STACKDEPOT_ALWAYS_INIT if !DEBUG_KMEMLEAK_DEFAULT_OFF
help
Say Y here if you want to enable the memory leak
detector. The memory allocation/freeing is traced in a way
......@@ -1207,7 +1208,7 @@ config SCHED_DEBUG
depends on DEBUG_KERNEL && PROC_FS
default y
help
If you say Y here, the /proc/sched_debug file will be provided
If you say Y here, the /sys/kernel/debug/sched file will be provided
that can help debug the scheduler. The runtime overhead of this
option is minimal.
......
......@@ -667,12 +667,13 @@ static inline unsigned long mte_pivot(const struct maple_enode *mn,
unsigned char piv)
{
struct maple_node *node = mte_to_node(mn);
enum maple_type type = mte_node_type(mn);
if (piv >= mt_pivots[piv]) {
if (piv >= mt_pivots[type]) {
WARN_ON(1);
return 0;
}
switch (mte_node_type(mn)) {
switch (type) {
case maple_arange_64:
return node->ma64.pivot[piv];
case maple_range_64:
......@@ -4876,7 +4877,7 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
unsigned long *pivots, *gaps;
void __rcu **slots;
unsigned long gap = 0;
unsigned long max, min, index;
unsigned long max, min;
unsigned char offset;
if (unlikely(mas_is_err(mas)))
......@@ -4898,8 +4899,7 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
min = mas_safe_min(mas, pivots, --offset);
max = mas_safe_pivot(mas, pivots, offset, type);
index = mas->index;
while (index <= max) {
while (mas->index <= max) {
gap = 0;
if (gaps)
gap = gaps[offset];
......@@ -4930,10 +4930,8 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
min = mas_safe_min(mas, pivots, offset);
}
if (unlikely(index > max)) {
mas_set_err(mas, -EBUSY);
return false;
}
if (unlikely((mas->index > max) || (size - 1 > max - mas->index)))
goto no_space;
if (unlikely(ma_is_leaf(type))) {
mas->offset = offset;
......@@ -4950,9 +4948,11 @@ static bool mas_rev_awalk(struct ma_state *mas, unsigned long size)
return false;
ascend:
if (mte_is_root(mas->node))
mas_set_err(mas, -EBUSY);
if (!mte_is_root(mas->node))
return false;
no_space:
mas_set_err(mas, -EBUSY);
return false;
}
......
......@@ -2517,6 +2517,91 @@ static noinline void check_bnode_min_spanning(struct maple_tree *mt)
mt_set_non_kernel(0);
}
static noinline void check_empty_area_window(struct maple_tree *mt)
{
unsigned long i, nr_entries = 20;
MA_STATE(mas, mt, 0, 0);
for (i = 1; i <= nr_entries; i++)
mtree_store_range(mt, i*10, i*10 + 9,
xa_mk_value(i), GFP_KERNEL);
/* Create another hole besides the one at 0 */
mtree_store_range(mt, 160, 169, NULL, GFP_KERNEL);
/* Check lower bounds that don't fit */
rcu_read_lock();
MT_BUG_ON(mt, mas_empty_area_rev(&mas, 5, 90, 10) != -EBUSY);
mas_reset(&mas);
MT_BUG_ON(mt, mas_empty_area_rev(&mas, 6, 90, 5) != -EBUSY);
/* Check lower bound that does fit */
mas_reset(&mas);
MT_BUG_ON(mt, mas_empty_area_rev(&mas, 5, 90, 5) != 0);
MT_BUG_ON(mt, mas.index != 5);
MT_BUG_ON(mt, mas.last != 9);
rcu_read_unlock();
/* Check one gap that doesn't fit and one that does */
rcu_read_lock();
mas_reset(&mas);
MT_BUG_ON(mt, mas_empty_area_rev(&mas, 5, 217, 9) != 0);
MT_BUG_ON(mt, mas.index != 161);
MT_BUG_ON(mt, mas.last != 169);
/* Check one gap that does fit above the min */
mas_reset(&mas);
MT_BUG_ON(mt, mas_empty_area_rev(&mas, 100, 218, 3) != 0);
MT_BUG_ON(mt, mas.index != 216);
MT_BUG_ON(mt, mas.last != 218);
/* Check size that doesn't fit any gap */
mas_reset(&mas);
MT_BUG_ON(mt, mas_empty_area_rev(&mas, 100, 218, 16) != -EBUSY);
/*
* Check size that doesn't fit the lower end of the window but
* does fit the gap
*/
mas_reset(&mas);
MT_BUG_ON(mt, mas_empty_area_rev(&mas, 167, 200, 4) != -EBUSY);
/*
* Check size that doesn't fit the upper end of the window but
* does fit the gap
*/
mas_reset(&mas);
MT_BUG_ON(mt, mas_empty_area_rev(&mas, 100, 162, 4) != -EBUSY);
/* Check mas_empty_area forward */
mas_reset(&mas);
MT_BUG_ON(mt, mas_empty_area(&mas, 0, 100, 9) != 0);
MT_BUG_ON(mt, mas.index != 0);
MT_BUG_ON(mt, mas.last != 8);
mas_reset(&mas);
MT_BUG_ON(mt, mas_empty_area(&mas, 0, 100, 4) != 0);
MT_BUG_ON(mt, mas.index != 0);
MT_BUG_ON(mt, mas.last != 3);
mas_reset(&mas);
MT_BUG_ON(mt, mas_empty_area(&mas, 0, 100, 11) != -EBUSY);
mas_reset(&mas);
MT_BUG_ON(mt, mas_empty_area(&mas, 5, 100, 6) != -EBUSY);
mas_reset(&mas);
MT_BUG_ON(mt, mas_empty_area(&mas, 0, 8, 10) != -EBUSY);
mas_reset(&mas);
mas_empty_area(&mas, 100, 165, 3);
mas_reset(&mas);
MT_BUG_ON(mt, mas_empty_area(&mas, 100, 163, 6) != -EBUSY);
rcu_read_unlock();
}
static DEFINE_MTREE(tree);
static int maple_tree_seed(void)
{
......@@ -2765,6 +2850,10 @@ static int maple_tree_seed(void)
check_bnode_min_spanning(&tree);
mtree_destroy(&tree);
mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE);
check_empty_area_window(&tree);
mtree_destroy(&tree);
#if defined(BENCH)
skip:
#endif
......
......@@ -847,6 +847,10 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
return SCAN_SUCCEED;
}
/*
* See pmd_trans_unstable() for how the result may change out from
* underneath us, even if we hold mmap_lock in read.
*/
static int find_pmd_or_thp_or_none(struct mm_struct *mm,
unsigned long address,
pmd_t **pmd)
......@@ -865,8 +869,12 @@ static int find_pmd_or_thp_or_none(struct mm_struct *mm,
#endif
if (pmd_none(pmde))
return SCAN_PMD_NONE;
if (!pmd_present(pmde))
return SCAN_PMD_NULL;
if (pmd_trans_huge(pmde))
return SCAN_PMD_MAPPED;
if (pmd_devmap(pmde))
return SCAN_PMD_NULL;
if (pmd_bad(pmde))
return SCAN_PMD_NULL;
return SCAN_SUCCEED;
......@@ -1642,7 +1650,7 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
* has higher cost too. It would also probably require locking
* the anon_vma.
*/
if (vma->anon_vma) {
if (READ_ONCE(vma->anon_vma)) {
result = SCAN_PAGE_ANON;
goto next;
}
......@@ -1670,6 +1678,18 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff,
result = SCAN_PTE_MAPPED_HUGEPAGE;
if ((cc->is_khugepaged || is_target) &&
mmap_write_trylock(mm)) {
/*
* Re-check whether we have an ->anon_vma, because
* collapse_and_free_pmd() requires that either no
* ->anon_vma exists or the anon_vma is locked.
* We already checked ->anon_vma above, but that check
* is racy because ->anon_vma can be populated under the
* mmap lock in read mode.
*/
if (vma->anon_vma) {
result = SCAN_PAGE_ANON;
goto unlock_next;
}
/*
* When a vma is registered with uffd-wp, we can't
* recycle the pmd pgtable because there can be pte
......
......@@ -2070,8 +2070,10 @@ static int __init kmemleak_boot_config(char *str)
return -EINVAL;
if (strcmp(str, "off") == 0)
kmemleak_disable();
else if (strcmp(str, "on") == 0)
else if (strcmp(str, "on") == 0) {
kmemleak_skip_disable = 1;
stack_depot_want_early_init();
}
else
return -EINVAL;
return 0;
......@@ -2093,7 +2095,6 @@ void __init kmemleak_init(void)
if (kmemleak_error)
return;
stack_depot_init();
jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);
......
......@@ -63,7 +63,6 @@
#include <linux/resume_user_mode.h>
#include <linux/psi.h>
#include <linux/seq_buf.h>
#include <linux/parser.h>
#include "internal.h"
#include <net/sock.h>
#include <net/ip.h>
......@@ -2403,8 +2402,7 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,
psi_memstall_enter(&pflags);
nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
gfp_mask,
MEMCG_RECLAIM_MAY_SWAP,
NULL);
MEMCG_RECLAIM_MAY_SWAP);
psi_memstall_leave(&pflags);
} while ((memcg = parent_mem_cgroup(memcg)) &&
!mem_cgroup_is_root(memcg));
......@@ -2695,8 +2693,7 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask,
psi_memstall_enter(&pflags);
nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
gfp_mask, reclaim_options,
NULL);
gfp_mask, reclaim_options);
psi_memstall_leave(&pflags);
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
......@@ -3516,8 +3513,7 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
}
if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP,
NULL)) {
memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP)) {
ret = -EBUSY;
break;
}
......@@ -3631,8 +3627,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
return -EINTR;
if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
MEMCG_RECLAIM_MAY_SWAP,
NULL))
MEMCG_RECLAIM_MAY_SWAP))
nr_retries--;
}
......@@ -6473,8 +6468,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
}
reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP,
NULL);
GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP);
if (!reclaimed && !nr_retries--)
break;
......@@ -6523,8 +6517,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
if (nr_reclaims) {
if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP,
NULL))
GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP))
nr_reclaims--;
continue;
}
......@@ -6647,54 +6640,21 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
return nbytes;
}
enum {
MEMORY_RECLAIM_NODES = 0,
MEMORY_RECLAIM_NULL,
};
static const match_table_t if_tokens = {
{ MEMORY_RECLAIM_NODES, "nodes=%s" },
{ MEMORY_RECLAIM_NULL, NULL },
};
static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
unsigned int nr_retries = MAX_RECLAIM_RETRIES;
unsigned long nr_to_reclaim, nr_reclaimed = 0;
unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP |
MEMCG_RECLAIM_PROACTIVE;
char *old_buf, *start;
substring_t args[MAX_OPT_ARGS];
int token;
char value[256];
nodemask_t nodemask = NODE_MASK_ALL;
buf = strstrip(buf);
old_buf = buf;
nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE;
if (buf == old_buf)
return -EINVAL;
unsigned int reclaim_options;
int err;
buf = strstrip(buf);
err = page_counter_memparse(buf, "", &nr_to_reclaim);
if (err)
return err;
while ((start = strsep(&buf, " ")) != NULL) {
if (!strlen(start))
continue;
token = match_token(start, if_tokens, args);
match_strlcpy(value, args, sizeof(value));
switch (token) {
case MEMORY_RECLAIM_NODES:
if (nodelist_parse(value, nodemask) < 0)
return -EINVAL;
break;
default:
return -EINVAL;
}
}
reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
while (nr_reclaimed < nr_to_reclaim) {
unsigned long reclaimed;
......@@ -6711,8 +6671,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
reclaimed = try_to_free_mem_cgroup_pages(memcg,
nr_to_reclaim - nr_reclaimed,
GFP_KERNEL, reclaim_options,
&nodemask);
GFP_KERNEL, reclaim_options);
if (!reclaimed && !nr_retries--)
return -EAGAIN;
......
......@@ -600,7 +600,8 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask,
/* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
if (flags & (MPOL_MF_MOVE_ALL) ||
(flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) {
(flags & MPOL_MF_MOVE && page_mapcount(page) == 1 &&
!hugetlb_pmd_shared(pte))) {
if (isolate_hugetlb(page, qp->pagelist) &&
(flags & MPOL_MF_STRICT))
/*
......
......@@ -1027,16 +1027,29 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
}
/*
* Function vma_merge() is called on the extension we are adding to
* the already existing vma, vma_merge() will merge this extension with
* the already existing vma (expand operation itself) and possibly also
* with the next vma if it becomes adjacent to the expanded vma and
* otherwise compatible.
* Function vma_merge() is called on the extension we
* are adding to the already existing vma, vma_merge()
* will merge this extension with the already existing
* vma (expand operation itself) and possibly also with
* the next vma if it becomes adjacent to the expanded
* vma and otherwise compatible.
*
* However, vma_merge() can currently fail due to
* is_mergeable_vma() check for vm_ops->close (see the
* comment there). Yet this should not prevent vma
* expanding, so perform a simple expand for such vma.
* Ideally the check for close op should be only done
* when a vma would be actually removed due to a merge.
*/
vma = vma_merge(mm, vma, extension_start, extension_end,
if (!vma->vm_ops || !vma->vm_ops->close) {
vma = vma_merge(mm, vma, extension_start, extension_end,
vma->vm_flags, vma->anon_vma, vma->vm_file,
extension_pgoff, vma_policy(vma),
vma->vm_userfaultfd_ctx, anon_vma_name(vma));
} else if (vma_adjust(vma, vma->vm_start, addr + new_len,
vma->vm_pgoff, NULL)) {
vma = NULL;
}
if (!vma) {
vm_unacct_memory(pages);
ret = -ENOMEM;
......
......@@ -1100,6 +1100,7 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
goto check_out;
pr_debug("scan_swap_map of si %d failed to find offset\n",
si->type);
cond_resched();
spin_lock(&swap_avail_lock);
nextsi:
......
......@@ -3335,13 +3335,16 @@ void lru_gen_migrate_mm(struct mm_struct *mm)
if (mem_cgroup_disabled())
return;
/* migration can happen before addition */
if (!mm->lru_gen.memcg)
return;
rcu_read_lock();
memcg = mem_cgroup_from_task(task);
rcu_read_unlock();
if (memcg == mm->lru_gen.memcg)
return;
VM_WARN_ON_ONCE(!mm->lru_gen.memcg);
VM_WARN_ON_ONCE(list_empty(&mm->lru_gen.list));
lru_gen_del_mm(mm);
......@@ -7022,8 +7025,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
unsigned int reclaim_options,
nodemask_t *nodemask)
unsigned int reclaim_options)
{
unsigned long nr_reclaimed;
unsigned int noreclaim_flag;
......@@ -7038,7 +7040,6 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
.may_unmap = 1,
.may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
.proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
.nodemask = nodemask,
};
/*
* Traverse the ZONELIST_FALLBACK zonelist of the current node to put
......
This diff is collapsed.
File mode changed from 100644 to 100755
......@@ -17,7 +17,6 @@
#include <stdio.h>
#include <unistd.h>
#include <sys/mman.h>
#define __USE_GNU
#include <fcntl.h>
#define MIN_FREE_PAGES 20
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment