Commit 0014404f authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'akpm' (patches from Andrew)

Merge misc fixes from Andrew Morton:
 "8 patches.

  Subsystems affected by this patch series: mm (hugetlb, pagemap, and
  userfaultfd), memfd, selftests, and kconfig"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  configs/debug: set CONFIG_DEBUG_INFO=y properly
  proc: fix documentation and description of pagemap
  kselftest/vm: fix tests build with old libc
  memfd: fix F_SEAL_WRITE after shmem huge page allocated
  mm: fix use-after-free when anon vma name is used after vma is freed
  mm: prevent vm_area_struct::anon_name refcount saturation
  mm: refactor vm_area_struct::anon_vma_name usage code
  selftests/vm: cleanup hugetlb file after mremap test
parents f9026e19 d1eff16d
...@@ -23,7 +23,7 @@ There are four components to pagemap: ...@@ -23,7 +23,7 @@ There are four components to pagemap:
* Bit 56 page exclusively mapped (since 4.2) * Bit 56 page exclusively mapped (since 4.2)
* Bit 57 pte is uffd-wp write-protected (since 5.13) (see * Bit 57 pte is uffd-wp write-protected (since 5.13) (see
:ref:`Documentation/admin-guide/mm/userfaultfd.rst <userfaultfd>`) :ref:`Documentation/admin-guide/mm/userfaultfd.rst <userfaultfd>`)
* Bits 57-60 zero * Bits 58-60 zero
* Bit 61 page is file-page or shared-anon (since 3.5) * Bit 61 page is file-page or shared-anon (since 3.5)
* Bit 62 page swapped * Bit 62 page swapped
* Bit 63 page present * Bit 63 page present
......
...@@ -309,7 +309,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) ...@@ -309,7 +309,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
name = arch_vma_name(vma); name = arch_vma_name(vma);
if (!name) { if (!name) {
const char *anon_name; struct anon_vma_name *anon_name;
if (!mm) { if (!mm) {
name = "[vdso]"; name = "[vdso]";
...@@ -327,10 +327,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) ...@@ -327,10 +327,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
goto done; goto done;
} }
anon_name = vma_anon_name(vma); anon_name = anon_vma_name(vma);
if (anon_name) { if (anon_name) {
seq_pad(m, ' '); seq_pad(m, ' ');
seq_printf(m, "[anon:%s]", anon_name); seq_printf(m, "[anon:%s]", anon_name->name);
} }
} }
...@@ -1597,7 +1597,8 @@ static const struct mm_walk_ops pagemap_ops = { ...@@ -1597,7 +1597,8 @@ static const struct mm_walk_ops pagemap_ops = {
* Bits 5-54 swap offset if swapped * Bits 5-54 swap offset if swapped
* Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst) * Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst)
* Bit 56 page exclusively mapped * Bit 56 page exclusively mapped
* Bits 57-60 zero * Bit 57 pte is uffd-wp write-protected
* Bits 58-60 zero
* Bit 61 page is file-page or shared-anon * Bit 61 page is file-page or shared-anon
* Bit 62 page swapped * Bit 62 page swapped
* Bit 63 page present * Bit 63 page present
......
...@@ -878,7 +878,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) ...@@ -878,7 +878,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
new_flags, vma->anon_vma, new_flags, vma->anon_vma,
vma->vm_file, vma->vm_pgoff, vma->vm_file, vma->vm_pgoff,
vma_policy(vma), vma_policy(vma),
NULL_VM_UFFD_CTX, vma_anon_name(vma)); NULL_VM_UFFD_CTX, anon_vma_name(vma));
if (prev) if (prev)
vma = prev; vma = prev;
else else
...@@ -1438,7 +1438,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, ...@@ -1438,7 +1438,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma->anon_vma, vma->vm_file, vma->vm_pgoff,
vma_policy(vma), vma_policy(vma),
((struct vm_userfaultfd_ctx){ ctx }), ((struct vm_userfaultfd_ctx){ ctx }),
vma_anon_name(vma)); anon_vma_name(vma));
if (prev) { if (prev) {
vma = prev; vma = prev;
goto next; goto next;
...@@ -1615,7 +1615,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, ...@@ -1615,7 +1615,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
prev = vma_merge(mm, prev, start, vma_end, new_flags, prev = vma_merge(mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma->anon_vma, vma->vm_file, vma->vm_pgoff,
vma_policy(vma), vma_policy(vma),
NULL_VM_UFFD_CTX, vma_anon_name(vma)); NULL_VM_UFFD_CTX, anon_vma_name(vma));
if (prev) { if (prev) {
vma = prev; vma = prev;
goto next; goto next;
......
...@@ -2626,7 +2626,7 @@ static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start, ...@@ -2626,7 +2626,7 @@ static inline int vma_adjust(struct vm_area_struct *vma, unsigned long start,
extern struct vm_area_struct *vma_merge(struct mm_struct *, extern struct vm_area_struct *vma_merge(struct mm_struct *,
struct vm_area_struct *prev, unsigned long addr, unsigned long end, struct vm_area_struct *prev, unsigned long addr, unsigned long end,
unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t, unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
struct mempolicy *, struct vm_userfaultfd_ctx, const char *); struct mempolicy *, struct vm_userfaultfd_ctx, struct anon_vma_name *);
extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
extern int __split_vma(struct mm_struct *, struct vm_area_struct *, extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
unsigned long addr, int new_below); unsigned long addr, int new_below);
...@@ -3372,11 +3372,12 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) ...@@ -3372,11 +3372,12 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
#ifdef CONFIG_ANON_VMA_NAME #ifdef CONFIG_ANON_VMA_NAME
int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
unsigned long len_in, const char *name); unsigned long len_in,
struct anon_vma_name *anon_name);
#else #else
static inline int static inline int
madvise_set_anon_name(struct mm_struct *mm, unsigned long start, madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
unsigned long len_in, const char *name) { unsigned long len_in, struct anon_vma_name *anon_name) {
return 0; return 0;
} }
#endif #endif
......
...@@ -140,50 +140,91 @@ static __always_inline void del_page_from_lru_list(struct page *page, ...@@ -140,50 +140,91 @@ static __always_inline void del_page_from_lru_list(struct page *page,
#ifdef CONFIG_ANON_VMA_NAME #ifdef CONFIG_ANON_VMA_NAME
/* /*
* mmap_lock should be read-locked when calling vma_anon_name() and while using * mmap_lock should be read-locked when calling anon_vma_name(). Caller should
* the returned pointer. * either keep holding the lock while using the returned pointer or it should
* raise anon_vma_name refcount before releasing the lock.
*/ */
extern const char *vma_anon_name(struct vm_area_struct *vma); extern struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma);
extern struct anon_vma_name *anon_vma_name_alloc(const char *name);
extern void anon_vma_name_free(struct kref *kref);
/* /* mmap_lock should be read-locked */
* mmap_lock should be read-locked for orig_vma->vm_mm. static inline void anon_vma_name_get(struct anon_vma_name *anon_name)
* mmap_lock should be write-locked for new_vma->vm_mm or new_vma should be {
* isolated. if (anon_name)
*/ kref_get(&anon_name->kref);
extern void dup_vma_anon_name(struct vm_area_struct *orig_vma, }
struct vm_area_struct *new_vma);
/* static inline void anon_vma_name_put(struct anon_vma_name *anon_name)
* mmap_lock should be write-locked or vma should have been isolated under {
* write-locked mmap_lock protection. if (anon_name)
*/ kref_put(&anon_name->kref, anon_vma_name_free);
extern void free_vma_anon_name(struct vm_area_struct *vma); }
/* mmap_lock should be read-locked */ static inline
static inline bool is_same_vma_anon_name(struct vm_area_struct *vma, struct anon_vma_name *anon_vma_name_reuse(struct anon_vma_name *anon_name)
const char *name) {
/* Prevent anon_name refcount saturation early on */
if (kref_read(&anon_name->kref) < REFCOUNT_MAX) {
anon_vma_name_get(anon_name);
return anon_name;
}
return anon_vma_name_alloc(anon_name->name);
}
static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
struct vm_area_struct *new_vma)
{
struct anon_vma_name *anon_name = anon_vma_name(orig_vma);
if (anon_name)
new_vma->anon_name = anon_vma_name_reuse(anon_name);
}
static inline void free_anon_vma_name(struct vm_area_struct *vma)
{ {
const char *vma_name = vma_anon_name(vma); /*
* Not using anon_vma_name because it generates a warning if mmap_lock
* is not held, which might be the case here.
*/
if (!vma->vm_file)
anon_vma_name_put(vma->anon_name);
}
/* either both NULL, or pointers to same string */ static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
if (vma_name == name) struct anon_vma_name *anon_name2)
{
if (anon_name1 == anon_name2)
return true; return true;
return name && vma_name && !strcmp(name, vma_name); return anon_name1 && anon_name2 &&
!strcmp(anon_name1->name, anon_name2->name);
} }
#else /* CONFIG_ANON_VMA_NAME */ #else /* CONFIG_ANON_VMA_NAME */
static inline const char *vma_anon_name(struct vm_area_struct *vma) static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
{ {
return NULL; return NULL;
} }
static inline void dup_vma_anon_name(struct vm_area_struct *orig_vma,
static inline struct anon_vma_name *anon_vma_name_alloc(const char *name)
{
return NULL;
}
static inline void anon_vma_name_get(struct anon_vma_name *anon_name) {}
static inline void anon_vma_name_put(struct anon_vma_name *anon_name) {}
static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma,
struct vm_area_struct *new_vma) {} struct vm_area_struct *new_vma) {}
static inline void free_vma_anon_name(struct vm_area_struct *vma) {} static inline void free_anon_vma_name(struct vm_area_struct *vma) {}
static inline bool is_same_vma_anon_name(struct vm_area_struct *vma,
const char *name) static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1,
struct anon_vma_name *anon_name2)
{ {
return true; return true;
} }
#endif /* CONFIG_ANON_VMA_NAME */ #endif /* CONFIG_ANON_VMA_NAME */
static inline void init_tlb_flush_pending(struct mm_struct *mm) static inline void init_tlb_flush_pending(struct mm_struct *mm)
......
...@@ -416,7 +416,10 @@ struct vm_area_struct { ...@@ -416,7 +416,10 @@ struct vm_area_struct {
struct rb_node rb; struct rb_node rb;
unsigned long rb_subtree_last; unsigned long rb_subtree_last;
} shared; } shared;
/* Serialized by mmap_sem. */ /*
* Serialized by mmap_sem. Never use directly because it is
* valid only when vm_file is NULL. Use anon_vma_name instead.
*/
struct anon_vma_name *anon_name; struct anon_vma_name *anon_name;
}; };
......
...@@ -16,7 +16,7 @@ CONFIG_SYMBOLIC_ERRNAME=y ...@@ -16,7 +16,7 @@ CONFIG_SYMBOLIC_ERRNAME=y
# #
# Compile-time checks and compiler options # Compile-time checks and compiler options
# #
CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
CONFIG_DEBUG_SECTION_MISMATCH=y CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_FRAME_WARN=2048 CONFIG_FRAME_WARN=2048
CONFIG_SECTION_MISMATCH_WARN_ONLY=y CONFIG_SECTION_MISMATCH_WARN_ONLY=y
......
...@@ -366,14 +366,14 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) ...@@ -366,14 +366,14 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig)
*new = data_race(*orig); *new = data_race(*orig);
INIT_LIST_HEAD(&new->anon_vma_chain); INIT_LIST_HEAD(&new->anon_vma_chain);
new->vm_next = new->vm_prev = NULL; new->vm_next = new->vm_prev = NULL;
dup_vma_anon_name(orig, new); dup_anon_vma_name(orig, new);
} }
return new; return new;
} }
void vm_area_free(struct vm_area_struct *vma) void vm_area_free(struct vm_area_struct *vma)
{ {
free_vma_anon_name(vma); free_anon_vma_name(vma);
kmem_cache_free(vm_area_cachep, vma); kmem_cache_free(vm_area_cachep, vma);
} }
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <linux/export.h> #include <linux/export.h>
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/mm_inline.h>
#include <linux/utsname.h> #include <linux/utsname.h>
#include <linux/mman.h> #include <linux/mman.h>
#include <linux/reboot.h> #include <linux/reboot.h>
...@@ -2286,15 +2287,16 @@ static int prctl_set_vma(unsigned long opt, unsigned long addr, ...@@ -2286,15 +2287,16 @@ static int prctl_set_vma(unsigned long opt, unsigned long addr,
{ {
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
const char __user *uname; const char __user *uname;
char *name, *pch; struct anon_vma_name *anon_name = NULL;
int error; int error;
switch (opt) { switch (opt) {
case PR_SET_VMA_ANON_NAME: case PR_SET_VMA_ANON_NAME:
uname = (const char __user *)arg; uname = (const char __user *)arg;
if (uname) { if (uname) {
name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN); char *name, *pch;
name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN);
if (IS_ERR(name)) if (IS_ERR(name))
return PTR_ERR(name); return PTR_ERR(name);
...@@ -2304,15 +2306,18 @@ static int prctl_set_vma(unsigned long opt, unsigned long addr, ...@@ -2304,15 +2306,18 @@ static int prctl_set_vma(unsigned long opt, unsigned long addr,
return -EINVAL; return -EINVAL;
} }
} }
} else { /* anon_vma has its own copy */
/* Reset the name */ anon_name = anon_vma_name_alloc(name);
name = NULL; kfree(name);
if (!anon_name)
return -ENOMEM;
} }
mmap_write_lock(mm); mmap_write_lock(mm);
error = madvise_set_anon_name(mm, addr, size, name); error = madvise_set_anon_name(mm, addr, size, anon_name);
mmap_write_unlock(mm); mmap_write_unlock(mm);
kfree(name); anon_vma_name_put(anon_name);
break; break;
default: default:
error = -EINVAL; error = -EINVAL;
......
...@@ -65,7 +65,7 @@ static int madvise_need_mmap_write(int behavior) ...@@ -65,7 +65,7 @@ static int madvise_need_mmap_write(int behavior)
} }
#ifdef CONFIG_ANON_VMA_NAME #ifdef CONFIG_ANON_VMA_NAME
static struct anon_vma_name *anon_vma_name_alloc(const char *name) struct anon_vma_name *anon_vma_name_alloc(const char *name)
{ {
struct anon_vma_name *anon_name; struct anon_vma_name *anon_name;
size_t count; size_t count;
...@@ -81,78 +81,48 @@ static struct anon_vma_name *anon_vma_name_alloc(const char *name) ...@@ -81,78 +81,48 @@ static struct anon_vma_name *anon_vma_name_alloc(const char *name)
return anon_name; return anon_name;
} }
static void vma_anon_name_free(struct kref *kref) void anon_vma_name_free(struct kref *kref)
{ {
struct anon_vma_name *anon_name = struct anon_vma_name *anon_name =
container_of(kref, struct anon_vma_name, kref); container_of(kref, struct anon_vma_name, kref);
kfree(anon_name); kfree(anon_name);
} }
static inline bool has_vma_anon_name(struct vm_area_struct *vma) struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
{ {
return !vma->vm_file && vma->anon_name;
}
const char *vma_anon_name(struct vm_area_struct *vma)
{
if (!has_vma_anon_name(vma))
return NULL;
mmap_assert_locked(vma->vm_mm); mmap_assert_locked(vma->vm_mm);
return vma->anon_name->name; if (vma->vm_file)
} return NULL;
void dup_vma_anon_name(struct vm_area_struct *orig_vma,
struct vm_area_struct *new_vma)
{
if (!has_vma_anon_name(orig_vma))
return;
kref_get(&orig_vma->anon_name->kref);
new_vma->anon_name = orig_vma->anon_name;
}
void free_vma_anon_name(struct vm_area_struct *vma)
{
struct anon_vma_name *anon_name;
if (!has_vma_anon_name(vma))
return;
anon_name = vma->anon_name; return vma->anon_name;
vma->anon_name = NULL;
kref_put(&anon_name->kref, vma_anon_name_free);
} }
/* mmap_lock should be write-locked */ /* mmap_lock should be write-locked */
static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name) static int replace_anon_vma_name(struct vm_area_struct *vma,
struct anon_vma_name *anon_name)
{ {
const char *anon_name; struct anon_vma_name *orig_name = anon_vma_name(vma);
if (!name) { if (!anon_name) {
free_vma_anon_name(vma); vma->anon_name = NULL;
anon_vma_name_put(orig_name);
return 0; return 0;
} }
anon_name = vma_anon_name(vma); if (anon_vma_name_eq(orig_name, anon_name))
if (anon_name) {
/* Same name, nothing to do here */
if (!strcmp(name, anon_name))
return 0; return 0;
free_vma_anon_name(vma); vma->anon_name = anon_vma_name_reuse(anon_name);
} anon_vma_name_put(orig_name);
vma->anon_name = anon_vma_name_alloc(name);
if (!vma->anon_name)
return -ENOMEM;
return 0; return 0;
} }
#else /* CONFIG_ANON_VMA_NAME */ #else /* CONFIG_ANON_VMA_NAME */
static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name) static int replace_anon_vma_name(struct vm_area_struct *vma,
struct anon_vma_name *anon_name)
{ {
if (name) if (anon_name)
return -EINVAL; return -EINVAL;
return 0; return 0;
...@@ -161,17 +131,19 @@ static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name) ...@@ -161,17 +131,19 @@ static int replace_vma_anon_name(struct vm_area_struct *vma, const char *name)
/* /*
* Update the vm_flags on region of a vma, splitting it or merging it as * Update the vm_flags on region of a vma, splitting it or merging it as
* necessary. Must be called with mmap_sem held for writing; * necessary. Must be called with mmap_sem held for writing;
* Caller should ensure anon_name stability by raising its refcount even when
* anon_name belongs to a valid vma because this function might free that vma.
*/ */
static int madvise_update_vma(struct vm_area_struct *vma, static int madvise_update_vma(struct vm_area_struct *vma,
struct vm_area_struct **prev, unsigned long start, struct vm_area_struct **prev, unsigned long start,
unsigned long end, unsigned long new_flags, unsigned long end, unsigned long new_flags,
const char *name) struct anon_vma_name *anon_name)
{ {
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
int error; int error;
pgoff_t pgoff; pgoff_t pgoff;
if (new_flags == vma->vm_flags && is_same_vma_anon_name(vma, name)) { if (new_flags == vma->vm_flags && anon_vma_name_eq(anon_vma_name(vma), anon_name)) {
*prev = vma; *prev = vma;
return 0; return 0;
} }
...@@ -179,7 +151,7 @@ static int madvise_update_vma(struct vm_area_struct *vma, ...@@ -179,7 +151,7 @@ static int madvise_update_vma(struct vm_area_struct *vma,
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
*prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma, *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
vma->vm_file, pgoff, vma_policy(vma), vma->vm_file, pgoff, vma_policy(vma),
vma->vm_userfaultfd_ctx, name); vma->vm_userfaultfd_ctx, anon_name);
if (*prev) { if (*prev) {
vma = *prev; vma = *prev;
goto success; goto success;
...@@ -209,7 +181,7 @@ static int madvise_update_vma(struct vm_area_struct *vma, ...@@ -209,7 +181,7 @@ static int madvise_update_vma(struct vm_area_struct *vma,
*/ */
vma->vm_flags = new_flags; vma->vm_flags = new_flags;
if (!vma->vm_file) { if (!vma->vm_file) {
error = replace_vma_anon_name(vma, name); error = replace_anon_vma_name(vma, anon_name);
if (error) if (error)
return error; return error;
} }
...@@ -975,6 +947,7 @@ static int madvise_vma_behavior(struct vm_area_struct *vma, ...@@ -975,6 +947,7 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
unsigned long behavior) unsigned long behavior)
{ {
int error; int error;
struct anon_vma_name *anon_name;
unsigned long new_flags = vma->vm_flags; unsigned long new_flags = vma->vm_flags;
switch (behavior) { switch (behavior) {
...@@ -1040,8 +1013,11 @@ static int madvise_vma_behavior(struct vm_area_struct *vma, ...@@ -1040,8 +1013,11 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
break; break;
} }
anon_name = anon_vma_name(vma);
anon_vma_name_get(anon_name);
error = madvise_update_vma(vma, prev, start, end, new_flags, error = madvise_update_vma(vma, prev, start, end, new_flags,
vma_anon_name(vma)); anon_name);
anon_vma_name_put(anon_name);
out: out:
/* /*
...@@ -1225,7 +1201,7 @@ int madvise_walk_vmas(struct mm_struct *mm, unsigned long start, ...@@ -1225,7 +1201,7 @@ int madvise_walk_vmas(struct mm_struct *mm, unsigned long start,
static int madvise_vma_anon_name(struct vm_area_struct *vma, static int madvise_vma_anon_name(struct vm_area_struct *vma,
struct vm_area_struct **prev, struct vm_area_struct **prev,
unsigned long start, unsigned long end, unsigned long start, unsigned long end,
unsigned long name) unsigned long anon_name)
{ {
int error; int error;
...@@ -1234,7 +1210,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma, ...@@ -1234,7 +1210,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma,
return -EBADF; return -EBADF;
error = madvise_update_vma(vma, prev, start, end, vma->vm_flags, error = madvise_update_vma(vma, prev, start, end, vma->vm_flags,
(const char *)name); (struct anon_vma_name *)anon_name);
/* /*
* madvise() returns EAGAIN if kernel resources, such as * madvise() returns EAGAIN if kernel resources, such as
...@@ -1246,7 +1222,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma, ...@@ -1246,7 +1222,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma,
} }
int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
unsigned long len_in, const char *name) unsigned long len_in, struct anon_vma_name *anon_name)
{ {
unsigned long end; unsigned long end;
unsigned long len; unsigned long len;
...@@ -1266,7 +1242,7 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, ...@@ -1266,7 +1242,7 @@ int madvise_set_anon_name(struct mm_struct *mm, unsigned long start,
if (end == start) if (end == start)
return 0; return 0;
return madvise_walk_vmas(mm, start, end, (unsigned long)name, return madvise_walk_vmas(mm, start, end, (unsigned long)anon_name,
madvise_vma_anon_name); madvise_vma_anon_name);
} }
#endif /* CONFIG_ANON_VMA_NAME */ #endif /* CONFIG_ANON_VMA_NAME */
......
...@@ -31,20 +31,28 @@ ...@@ -31,20 +31,28 @@
static void memfd_tag_pins(struct xa_state *xas) static void memfd_tag_pins(struct xa_state *xas)
{ {
struct page *page; struct page *page;
unsigned int tagged = 0; int latency = 0;
int cache_count;
lru_add_drain(); lru_add_drain();
xas_lock_irq(xas); xas_lock_irq(xas);
xas_for_each(xas, page, ULONG_MAX) { xas_for_each(xas, page, ULONG_MAX) {
if (xa_is_value(page)) cache_count = 1;
continue; if (!xa_is_value(page) &&
page = find_subpage(page, xas->xa_index); PageTransHuge(page) && !PageHuge(page))
if (page_count(page) - page_mapcount(page) > 1) cache_count = HPAGE_PMD_NR;
if (!xa_is_value(page) &&
page_count(page) - total_mapcount(page) != cache_count)
xas_set_mark(xas, MEMFD_TAG_PINNED); xas_set_mark(xas, MEMFD_TAG_PINNED);
if (cache_count != 1)
xas_set(xas, page->index + cache_count);
if (++tagged % XA_CHECK_SCHED) latency += cache_count;
if (latency < XA_CHECK_SCHED)
continue; continue;
latency = 0;
xas_pause(xas); xas_pause(xas);
xas_unlock_irq(xas); xas_unlock_irq(xas);
...@@ -73,7 +81,8 @@ static int memfd_wait_for_pins(struct address_space *mapping) ...@@ -73,7 +81,8 @@ static int memfd_wait_for_pins(struct address_space *mapping)
error = 0; error = 0;
for (scan = 0; scan <= LAST_SCAN; scan++) { for (scan = 0; scan <= LAST_SCAN; scan++) {
unsigned int tagged = 0; int latency = 0;
int cache_count;
if (!xas_marked(&xas, MEMFD_TAG_PINNED)) if (!xas_marked(&xas, MEMFD_TAG_PINNED))
break; break;
...@@ -87,10 +96,14 @@ static int memfd_wait_for_pins(struct address_space *mapping) ...@@ -87,10 +96,14 @@ static int memfd_wait_for_pins(struct address_space *mapping)
xas_lock_irq(&xas); xas_lock_irq(&xas);
xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) { xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) {
bool clear = true; bool clear = true;
if (xa_is_value(page))
continue; cache_count = 1;
page = find_subpage(page, xas.xa_index); if (!xa_is_value(page) &&
if (page_count(page) - page_mapcount(page) != 1) { PageTransHuge(page) && !PageHuge(page))
cache_count = HPAGE_PMD_NR;
if (!xa_is_value(page) && cache_count !=
page_count(page) - total_mapcount(page)) {
/* /*
* On the last scan, we clean up all those tags * On the last scan, we clean up all those tags
* we inserted; but make a note that we still * we inserted; but make a note that we still
...@@ -103,8 +116,11 @@ static int memfd_wait_for_pins(struct address_space *mapping) ...@@ -103,8 +116,11 @@ static int memfd_wait_for_pins(struct address_space *mapping)
} }
if (clear) if (clear)
xas_clear_mark(&xas, MEMFD_TAG_PINNED); xas_clear_mark(&xas, MEMFD_TAG_PINNED);
if (++tagged % XA_CHECK_SCHED)
latency += cache_count;
if (latency < XA_CHECK_SCHED)
continue; continue;
latency = 0;
xas_pause(&xas); xas_pause(&xas);
xas_unlock_irq(&xas); xas_unlock_irq(&xas);
......
...@@ -814,7 +814,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, ...@@ -814,7 +814,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags,
vma->anon_vma, vma->vm_file, pgoff, vma->anon_vma, vma->vm_file, pgoff,
new_pol, vma->vm_userfaultfd_ctx, new_pol, vma->vm_userfaultfd_ctx,
vma_anon_name(vma)); anon_vma_name(vma));
if (prev) { if (prev) {
vma = prev; vma = prev;
next = vma->vm_next; next = vma->vm_next;
......
...@@ -512,7 +512,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, ...@@ -512,7 +512,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
vma->vm_file, pgoff, vma_policy(vma), vma->vm_file, pgoff, vma_policy(vma),
vma->vm_userfaultfd_ctx, vma_anon_name(vma)); vma->vm_userfaultfd_ctx, anon_vma_name(vma));
if (*prev) { if (*prev) {
vma = *prev; vma = *prev;
goto success; goto success;
......
...@@ -1031,7 +1031,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start, ...@@ -1031,7 +1031,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
static inline int is_mergeable_vma(struct vm_area_struct *vma, static inline int is_mergeable_vma(struct vm_area_struct *vma,
struct file *file, unsigned long vm_flags, struct file *file, unsigned long vm_flags,
struct vm_userfaultfd_ctx vm_userfaultfd_ctx, struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
const char *anon_name) struct anon_vma_name *anon_name)
{ {
/* /*
* VM_SOFTDIRTY should not prevent from VMA merging, if we * VM_SOFTDIRTY should not prevent from VMA merging, if we
...@@ -1049,7 +1049,7 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma, ...@@ -1049,7 +1049,7 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
return 0; return 0;
if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx)) if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
return 0; return 0;
if (!is_same_vma_anon_name(vma, anon_name)) if (!anon_vma_name_eq(anon_vma_name(vma), anon_name))
return 0; return 0;
return 1; return 1;
} }
...@@ -1084,7 +1084,7 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags, ...@@ -1084,7 +1084,7 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags,
struct anon_vma *anon_vma, struct file *file, struct anon_vma *anon_vma, struct file *file,
pgoff_t vm_pgoff, pgoff_t vm_pgoff,
struct vm_userfaultfd_ctx vm_userfaultfd_ctx, struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
const char *anon_name) struct anon_vma_name *anon_name)
{ {
if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) && if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
...@@ -1106,7 +1106,7 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, ...@@ -1106,7 +1106,7 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags,
struct anon_vma *anon_vma, struct file *file, struct anon_vma *anon_vma, struct file *file,
pgoff_t vm_pgoff, pgoff_t vm_pgoff,
struct vm_userfaultfd_ctx vm_userfaultfd_ctx, struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
const char *anon_name) struct anon_vma_name *anon_name)
{ {
if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) && if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) &&
is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) {
...@@ -1167,7 +1167,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, ...@@ -1167,7 +1167,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
struct anon_vma *anon_vma, struct file *file, struct anon_vma *anon_vma, struct file *file,
pgoff_t pgoff, struct mempolicy *policy, pgoff_t pgoff, struct mempolicy *policy,
struct vm_userfaultfd_ctx vm_userfaultfd_ctx, struct vm_userfaultfd_ctx vm_userfaultfd_ctx,
const char *anon_name) struct anon_vma_name *anon_name)
{ {
pgoff_t pglen = (end - addr) >> PAGE_SHIFT; pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
struct vm_area_struct *area, *next; struct vm_area_struct *area, *next;
...@@ -3256,7 +3256,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, ...@@ -3256,7 +3256,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
return NULL; /* should never get here */ return NULL; /* should never get here */
new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags, new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
vma->vm_userfaultfd_ctx, vma_anon_name(vma)); vma->vm_userfaultfd_ctx, anon_vma_name(vma));
if (new_vma) { if (new_vma) {
/* /*
* Source vma may have been merged into new_vma * Source vma may have been merged into new_vma
......
...@@ -464,7 +464,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, ...@@ -464,7 +464,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
*pprev = vma_merge(mm, *pprev, start, end, newflags, *pprev = vma_merge(mm, *pprev, start, end, newflags,
vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
vma->vm_userfaultfd_ctx, vma_anon_name(vma)); vma->vm_userfaultfd_ctx, anon_vma_name(vma));
if (*pprev) { if (*pprev) {
vma = *pprev; vma = *pprev;
VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY); VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY);
......
...@@ -3,9 +3,10 @@ ...@@ -3,9 +3,10 @@
* hugepage-mremap: * hugepage-mremap:
* *
* Example of remapping huge page memory in a user application using the * Example of remapping huge page memory in a user application using the
* mremap system call. Code assumes a hugetlbfs filesystem is mounted * mremap system call. The path to a file in a hugetlbfs filesystem must
* at './huge'. The amount of memory used by this test is decided by a command * be passed as the last argument to this test. The amount of memory used
* line argument in MBs. If missing, the default amount is 10MB. * by this test in MBs can optionally be passed as an argument. If no memory
* amount is passed, the default amount is 10MB.
* *
* To make sure the test triggers pmd sharing and goes through the 'unshare' * To make sure the test triggers pmd sharing and goes through the 'unshare'
* path in the mremap code use 1GB (1024) or more. * path in the mremap code use 1GB (1024) or more.
...@@ -25,7 +26,6 @@ ...@@ -25,7 +26,6 @@
#define DEFAULT_LENGTH_MB 10UL #define DEFAULT_LENGTH_MB 10UL
#define MB_TO_BYTES(x) (x * 1024 * 1024) #define MB_TO_BYTES(x) (x * 1024 * 1024)
#define FILE_NAME "huge/hugepagefile"
#define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC) #define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC)
#define FLAGS (MAP_SHARED | MAP_ANONYMOUS) #define FLAGS (MAP_SHARED | MAP_ANONYMOUS)
...@@ -107,17 +107,26 @@ static void register_region_with_uffd(char *addr, size_t len) ...@@ -107,17 +107,26 @@ static void register_region_with_uffd(char *addr, size_t len)
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
size_t length;
if (argc != 2 && argc != 3) {
printf("Usage: %s [length_in_MB] <hugetlb_file>\n", argv[0]);
exit(1);
}
/* Read memory length as the first arg if valid, otherwise fallback to /* Read memory length as the first arg if valid, otherwise fallback to
* the default length. Any additional args are ignored. * the default length.
*/ */
size_t length = argc > 1 ? (size_t)atoi(argv[1]) : 0UL; if (argc == 3)
length = argc > 2 ? (size_t)atoi(argv[1]) : 0UL;
length = length > 0 ? length : DEFAULT_LENGTH_MB; length = length > 0 ? length : DEFAULT_LENGTH_MB;
length = MB_TO_BYTES(length); length = MB_TO_BYTES(length);
int ret = 0; int ret = 0;
int fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755); /* last arg is the hugetlb file name */
int fd = open(argv[argc-1], O_CREAT | O_RDWR, 0755);
if (fd < 0) { if (fd < 0) {
perror("Open failed"); perror("Open failed");
...@@ -169,5 +178,8 @@ int main(int argc, char *argv[]) ...@@ -169,5 +178,8 @@ int main(int argc, char *argv[])
munmap(addr, length); munmap(addr, length);
close(fd);
unlink(argv[argc-1]);
return ret; return ret;
} }
...@@ -111,13 +111,14 @@ fi ...@@ -111,13 +111,14 @@ fi
echo "-----------------------" echo "-----------------------"
echo "running hugepage-mremap" echo "running hugepage-mremap"
echo "-----------------------" echo "-----------------------"
./hugepage-mremap 256 ./hugepage-mremap $mnt/huge_mremap
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "[FAIL]" echo "[FAIL]"
exitcode=1 exitcode=1
else else
echo "[PASS]" echo "[PASS]"
fi fi
rm -f $mnt/huge_mremap
echo "NOTE: The above hugetlb tests provide minimal coverage. Use" echo "NOTE: The above hugetlb tests provide minimal coverage. Use"
echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" echo " https://github.com/libhugetlbfs/libhugetlbfs.git for"
......
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include <signal.h> #include <signal.h>
#include <poll.h> #include <poll.h>
#include <string.h> #include <string.h>
#include <linux/mman.h>
#include <sys/mman.h> #include <sys/mman.h>
#include <sys/syscall.h> #include <sys/syscall.h>
#include <sys/ioctl.h> #include <sys/ioctl.h>
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment