Commit d9abdee5 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "19 patches.

  Subsystems affected by this patch series: mm (userfaultfd, migration,
  memblock, mempolicy, slub, secretmem, and thp), ocfs2, binfmt, vfs,
  and misc"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  mailmap: add Andrej Shadura
  mm/thp: decrease nr_thps in file's mapping on THP split
  mm/secretmem: fix NULL page->mapping dereference in page_is_secretmem()
  vfs: check fd has read access in kernel_read_file_from_fd()
  elfcore: correct reference to CONFIG_UML
  mm, slub: fix incorrect memcg slab count for bulk free
  mm, slub: fix potential use-after-free in slab_debugfs_fops
  mm, slub: fix potential memoryleak in kmem_cache_open()
  mm, slub: fix mismatch between reconstructed freelist depth and cnt
  mm, slub: fix two bugs in slab_debug_trace_open()
  mm/mempolicy: do not allow illegal MPOL_F_NUMA_BALANCING | MPOL_LOCAL in mbind()
  memblock: check memory total_size
  ocfs2: mount fails with buffer overflow in strlen
  ocfs2: fix data corruption after conversion from inline format
  mm/migrate: fix CPUHP state to update node demotion order
  mm/migrate: add CPU hotplug to demotion #ifdef
  mm/migrate: optimize hotplug-time demotion order updates
  userfaultfd: fix a race between writeprotect and exit_mmap()
  mm/userfaultfd: selftests: fix memory corruption with thp enabled
parents 519d8195 362d5dfc
......@@ -33,6 +33,8 @@ Al Viro <viro@zenIV.linux.org.uk>
Andi Kleen <ak@linux.intel.com> <ak@suse.de>
Andi Shyti <andi@etezian.org> <andi.shyti@samsung.com>
Andreas Herrmann <aherrman@de.ibm.com>
Andrej Shadura <andrew.shadura@collabora.co.uk>
Andrej Shadura <andrew@shadura.me> <andrew@beldisplaytech.com>
Andrew Morton <akpm@linux-foundation.org>
Andrew Murray <amurray@thegoodpenguin.co.uk> <amurray@embedded-bits.co.uk>
Andrew Murray <amurray@thegoodpenguin.co.uk> <andrew.murray@arm.com>
......
......@@ -178,7 +178,7 @@ int kernel_read_file_from_fd(int fd, loff_t offset, void **buf,
struct fd f = fdget(fd);
int ret = -EBADF;
if (!f.file)
if (!f.file || !(f.file->f_mode & FMODE_READ))
goto out;
ret = kernel_read_file(f.file, offset, buf, buf_size, file_size, id);
......
......@@ -7045,7 +7045,7 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct buffer_head *di_bh)
{
int ret, i, has_data, num_pages = 0;
int ret, has_data, num_pages = 0;
int need_free = 0;
u32 bit_off, num;
handle_t *handle;
......@@ -7054,26 +7054,17 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
struct ocfs2_alloc_context *data_ac = NULL;
struct page **pages = NULL;
loff_t end = osb->s_clustersize;
struct page *page = NULL;
struct ocfs2_extent_tree et;
int did_quota = 0;
has_data = i_size_read(inode) ? 1 : 0;
if (has_data) {
pages = kcalloc(ocfs2_pages_per_cluster(osb->sb),
sizeof(struct page *), GFP_NOFS);
if (pages == NULL) {
ret = -ENOMEM;
mlog_errno(ret);
return ret;
}
ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
if (ret) {
mlog_errno(ret);
goto free_pages;
goto out;
}
}
......@@ -7093,7 +7084,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
}
if (has_data) {
unsigned int page_end;
unsigned int page_end = min_t(unsigned, PAGE_SIZE,
osb->s_clustersize);
u64 phys;
ret = dquot_alloc_space_nodirty(inode,
......@@ -7117,15 +7109,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
*/
block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
/*
* Non sparse file systems zero on extend, so no need
* to do that now.
*/
if (!ocfs2_sparse_alloc(osb) &&
PAGE_SIZE < osb->s_clustersize)
end = PAGE_SIZE;
ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
ret = ocfs2_grab_eof_pages(inode, 0, page_end, &page,
&num_pages);
if (ret) {
mlog_errno(ret);
need_free = 1;
......@@ -7136,20 +7121,15 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
* This should populate the 1st page for us and mark
* it up to date.
*/
ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
ret = ocfs2_read_inline_data(inode, page, di_bh);
if (ret) {
mlog_errno(ret);
need_free = 1;
goto out_unlock;
}
page_end = PAGE_SIZE;
if (PAGE_SIZE > osb->s_clustersize)
page_end = osb->s_clustersize;
for (i = 0; i < num_pages; i++)
ocfs2_map_and_dirty_page(inode, handle, 0, page_end,
pages[i], i > 0, &phys);
ocfs2_map_and_dirty_page(inode, handle, 0, page_end, page, 0,
&phys);
}
spin_lock(&oi->ip_lock);
......@@ -7180,8 +7160,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
}
out_unlock:
if (pages)
ocfs2_unlock_and_free_pages(pages, num_pages);
if (page)
ocfs2_unlock_and_free_pages(&page, num_pages);
out_commit:
if (ret < 0 && did_quota)
......@@ -7205,8 +7185,6 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
out:
if (data_ac)
ocfs2_free_alloc_context(data_ac);
free_pages:
kfree(pages);
return ret;
}
......
......@@ -2167,11 +2167,17 @@ static int ocfs2_initialize_super(struct super_block *sb,
}
if (ocfs2_clusterinfo_valid(osb)) {
/*
* ci_stack and ci_cluster in ocfs2_cluster_info may not be null
* terminated, so make sure no overflow happens here by using
* memcpy. Destination strings will always be null terminated
* because osb is allocated using kzalloc.
*/
osb->osb_stackflags =
OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags;
strlcpy(osb->osb_cluster_stack,
memcpy(osb->osb_cluster_stack,
OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
OCFS2_STACK_LABEL_LEN + 1);
OCFS2_STACK_LABEL_LEN);
if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) {
mlog(ML_ERROR,
"couldn't mount because of an invalid "
......@@ -2180,9 +2186,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
status = -EINVAL;
goto bail;
}
strlcpy(osb->osb_cluster_name,
memcpy(osb->osb_cluster_name,
OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster,
OCFS2_CLUSTER_NAME_LEN + 1);
OCFS2_CLUSTER_NAME_LEN);
} else {
/* The empty string is identical with classic tools that
* don't know about s_cluster_info. */
......
......@@ -1827,9 +1827,15 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
if (mode_wp && mode_dontwake)
return -EINVAL;
if (mmget_not_zero(ctx->mm)) {
ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start,
uffdio_wp.range.len, mode_wp,
&ctx->mmap_changing);
mmput(ctx->mm);
} else {
return -ESRCH;
}
if (ret)
return ret;
......
......@@ -72,6 +72,8 @@ enum cpuhp_state {
CPUHP_SLUB_DEAD,
CPUHP_DEBUG_OBJ_DEAD,
CPUHP_MM_WRITEBACK_DEAD,
/* Must be after CPUHP_MM_VMSTAT_DEAD */
CPUHP_MM_DEMOTION_DEAD,
CPUHP_MM_VMSTAT_DEAD,
CPUHP_SOFTIRQ_DEAD,
CPUHP_NET_MVNETA_DEAD,
......@@ -240,6 +242,8 @@ enum cpuhp_state {
CPUHP_AP_BASE_CACHEINFO_ONLINE,
CPUHP_AP_ONLINE_DYN,
CPUHP_AP_ONLINE_DYN_END = CPUHP_AP_ONLINE_DYN + 30,
/* Must be after CPUHP_AP_ONLINE_DYN for node_states[N_CPU] update */
CPUHP_AP_MM_DEMOTION_ONLINE,
CPUHP_AP_X86_HPET_ONLINE,
CPUHP_AP_X86_KVM_CLK_ONLINE,
CPUHP_AP_DTPM_CPU_ONLINE,
......
......@@ -109,7 +109,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg
#endif
}
#if defined(CONFIG_UM) || defined(CONFIG_IA64)
#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64)
/*
* These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out
* extra segments containing the gate DSO contents. Dumping its
......
......@@ -160,7 +160,10 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
#define register_hotmemory_notifier(nb) register_memory_notifier(nb)
#define unregister_hotmemory_notifier(nb) unregister_memory_notifier(nb)
#else
#define hotplug_memory_notifier(fn, pri) ({ 0; })
static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
{
return 0;
}
/* These aren't inline functions due to a GCC bug. */
#define register_hotmemory_notifier(nb) ({ (void)(nb); 0; })
#define unregister_hotmemory_notifier(nb) ({ (void)(nb); })
......
......@@ -23,7 +23,7 @@ static inline bool page_is_secretmem(struct page *page)
mapping = (struct address_space *)
((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);
if (mapping != page->mapping)
if (!mapping || mapping != page->mapping)
return false;
return mapping->a_ops == &secretmem_aops;
......
......@@ -2700,12 +2700,14 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
if (mapping) {
int nr = thp_nr_pages(head);
if (PageSwapBacked(head))
if (PageSwapBacked(head)) {
__mod_lruvec_page_state(head, NR_SHMEM_THPS,
-nr);
else
} else {
__mod_lruvec_page_state(head, NR_FILE_THPS,
-nr);
filemap_nr_thps_dec(mapping);
}
}
__split_huge_page(page, list, end);
......
......@@ -1692,7 +1692,7 @@ void __init memblock_cap_memory_range(phys_addr_t base, phys_addr_t size)
if (!size)
return;
if (memblock.memory.cnt <= 1) {
if (!memblock_memory->total_size) {
pr_warn("%s: No memory registered yet\n", __func__);
return;
}
......
......@@ -856,16 +856,6 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
goto out;
}
if (flags & MPOL_F_NUMA_BALANCING) {
if (new && new->mode == MPOL_BIND) {
new->flags |= (MPOL_F_MOF | MPOL_F_MORON);
} else {
ret = -EINVAL;
mpol_put(new);
goto out;
}
}
ret = mpol_set_nodemask(new, nodes, scratch);
if (ret) {
mpol_put(new);
......@@ -1458,7 +1448,11 @@ static inline int sanitize_mpol_flags(int *mode, unsigned short *flags)
return -EINVAL;
if ((*flags & MPOL_F_STATIC_NODES) && (*flags & MPOL_F_RELATIVE_NODES))
return -EINVAL;
if (*flags & MPOL_F_NUMA_BALANCING) {
if (*mode != MPOL_BIND)
return -EINVAL;
*flags |= (MPOL_F_MOF | MPOL_F_MORON);
}
return 0;
}
......
......@@ -3066,7 +3066,7 @@ void migrate_vma_finalize(struct migrate_vma *migrate)
EXPORT_SYMBOL(migrate_vma_finalize);
#endif /* CONFIG_DEVICE_PRIVATE */
#if defined(CONFIG_MEMORY_HOTPLUG)
#if defined(CONFIG_HOTPLUG_CPU)
/* Disable reclaim-based migration. */
static void __disable_all_migrate_targets(void)
{
......@@ -3208,25 +3208,6 @@ static void set_migration_target_nodes(void)
put_online_mems();
}
/*
* React to hotplug events that might affect the migration targets
* like events that online or offline NUMA nodes.
*
* The ordering is also currently dependent on which nodes have
* CPUs. That means we need CPU on/offline notification too.
*/
static int migration_online_cpu(unsigned int cpu)
{
set_migration_target_nodes();
return 0;
}
static int migration_offline_cpu(unsigned int cpu)
{
set_migration_target_nodes();
return 0;
}
/*
* This leaves migrate-on-reclaim transiently disabled between
* the MEM_GOING_OFFLINE and MEM_OFFLINE events. This runs
......@@ -3239,8 +3220,18 @@ static int migration_offline_cpu(unsigned int cpu)
* set_migration_target_nodes().
*/
static int __meminit migrate_on_reclaim_callback(struct notifier_block *self,
unsigned long action, void *arg)
unsigned long action, void *_arg)
{
struct memory_notify *arg = _arg;
/*
* Only update the node migration order when a node is
* changing status, like online->offline. This avoids
* the overhead of synchronize_rcu() in most cases.
*/
if (arg->status_change_nid < 0)
return notifier_from_errno(0);
switch (action) {
case MEM_GOING_OFFLINE:
/*
......@@ -3274,13 +3265,31 @@ static int __meminit migrate_on_reclaim_callback(struct notifier_block *self,
return notifier_from_errno(0);
}
/*
* React to hotplug events that might affect the migration targets
* like events that online or offline NUMA nodes.
*
* The ordering is also currently dependent on which nodes have
* CPUs. That means we need CPU on/offline notification too.
*/
static int migration_online_cpu(unsigned int cpu)
{
set_migration_target_nodes();
return 0;
}
static int migration_offline_cpu(unsigned int cpu)
{
set_migration_target_nodes();
return 0;
}
static int __init migrate_on_reclaim_init(void)
{
int ret;
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "migrate on reclaim",
migration_online_cpu,
migration_offline_cpu);
ret = cpuhp_setup_state_nocalls(CPUHP_MM_DEMOTION_DEAD, "mm/demotion:offline",
NULL, migration_offline_cpu);
/*
* In the unlikely case that this fails, the automatic
* migration targets may become suboptimal for nodes
......@@ -3288,9 +3297,12 @@ static int __init migrate_on_reclaim_init(void)
* rare case, do not bother trying to do anything special.
*/
WARN_ON(ret < 0);
ret = cpuhp_setup_state(CPUHP_AP_MM_DEMOTION_ONLINE, "mm/demotion:online",
migration_online_cpu, NULL);
WARN_ON(ret < 0);
hotplug_memory_notifier(migrate_on_reclaim_callback, 100);
return 0;
}
late_initcall(migrate_on_reclaim_init);
#endif /* CONFIG_MEMORY_HOTPLUG */
#endif /* CONFIG_HOTPLUG_CPU */
......@@ -269,7 +269,7 @@ static int __meminit init_section_page_ext(unsigned long pfn, int nid)
total_usage += table_size;
return 0;
}
#ifdef CONFIG_MEMORY_HOTPLUG
static void free_page_ext(void *addr)
{
if (is_vmalloc_addr(addr)) {
......@@ -374,8 +374,6 @@ static int __meminit page_ext_callback(struct notifier_block *self,
return notifier_from_errno(ret);
}
#endif
void __init page_ext_init(void)
{
unsigned long pfn;
......
......@@ -1095,7 +1095,7 @@ static int slab_offline_cpu(unsigned int cpu)
return 0;
}
#if defined(CONFIG_NUMA) && defined(CONFIG_MEMORY_HOTPLUG)
#if defined(CONFIG_NUMA)
/*
* Drains freelist for a node on each slab cache, used for memory hot-remove.
* Returns -EBUSY if all objects cannot be drained so that the node is not
......@@ -1157,7 +1157,7 @@ static int __meminit slab_memory_callback(struct notifier_block *self,
out:
return notifier_from_errno(ret);
}
#endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
#endif /* CONFIG_NUMA */
/*
* swap the static kmem_cache_node with kmalloced memory
......
......@@ -1701,7 +1701,8 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
}
static inline bool slab_free_freelist_hook(struct kmem_cache *s,
void **head, void **tail)
void **head, void **tail,
int *cnt)
{
void *object;
......@@ -1728,6 +1729,12 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
*head = object;
if (!*tail)
*tail = object;
} else {
/*
* Adjust the reconstructed freelist depth
* accordingly if object's reuse is delayed.
*/
--(*cnt);
}
} while (object != old_tail);
......@@ -3413,6 +3420,8 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
struct kmem_cache_cpu *c;
unsigned long tid;
/* memcg_slab_free_hook() is already called for bulk free. */
if (!tail)
memcg_slab_free_hook(s, &head, 1);
redo:
/*
......@@ -3480,7 +3489,7 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page,
* With KASAN enabled slab_free_freelist_hook modifies the freelist
* to remove objects, whose reuse must be delayed.
*/
if (slab_free_freelist_hook(s, &head, &tail))
if (slab_free_freelist_hook(s, &head, &tail, &cnt))
do_slab_free(s, page, head, tail, cnt, addr);
}
......@@ -4203,8 +4212,8 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
if (alloc_kmem_cache_cpus(s))
return 0;
free_kmem_cache_nodes(s);
error:
__kmem_cache_release(s);
return -EINVAL;
}
......@@ -4880,13 +4889,15 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags)
return 0;
err = sysfs_slab_add(s);
if (err)
if (err) {
__kmem_cache_release(s);
return err;
}
if (s->flags & SLAB_STORE_USER)
debugfs_slab_add(s);
return err;
return 0;
}
void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
......@@ -6108,9 +6119,14 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep)
struct kmem_cache *s = file_inode(filep)->i_private;
unsigned long *obj_map;
if (!t)
return -ENOMEM;
obj_map = bitmap_alloc(oo_objects(s->oo), GFP_KERNEL);
if (!obj_map)
if (!obj_map) {
seq_release_private(inode, filep);
return -ENOMEM;
}
if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0)
alloc = TRACK_ALLOC;
......@@ -6119,6 +6135,7 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep)
if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) {
bitmap_free(obj_map);
seq_release_private(inode, filep);
return -ENOMEM;
}
......
......@@ -414,9 +414,6 @@ static void uffd_test_ctx_init_ext(uint64_t *features)
uffd_test_ops->allocate_area((void **)&area_src);
uffd_test_ops->allocate_area((void **)&area_dst);
uffd_test_ops->release_pages(area_src);
uffd_test_ops->release_pages(area_dst);
userfaultfd_open(features);
count_verify = malloc(nr_pages * sizeof(unsigned long long));
......@@ -437,6 +434,26 @@ static void uffd_test_ctx_init_ext(uint64_t *features)
*(area_count(area_src, nr) + 1) = 1;
}
/*
* After initialization of area_src, we must explicitly release pages
* for area_dst to make sure it's fully empty. Otherwise we could have
* some area_dst pages be errornously initialized with zero pages,
* hence we could hit memory corruption later in the test.
*
* One example is when THP is globally enabled, above allocate_area()
* calls could have the two areas merged into a single VMA (as they
* will have the same VMA flags so they're mergeable). When we
* initialize the area_src above, it's possible that some part of
* area_dst could have been faulted in via one huge THP that will be
* shared between area_src and area_dst. It could cause some of the
* area_dst won't be trapped by missing userfaults.
*
* This release_pages() will guarantee even if that happened, we'll
* proactively split the thp and drop any accidentally initialized
* pages within area_dst.
*/
uffd_test_ops->release_pages(area_dst);
pipefd = malloc(sizeof(int) * nr_cpus * 2);
if (!pipefd)
err("pipefd");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment