Commit d09e8ca6 authored by Pasha Tatashin's avatar Pasha Tatashin Committed by Andrew Morton

mm: anonymous shared memory naming

Since commit 9a10064f ("mm: add a field to store names for private
anonymous memory"), name for private anonymous memory, but not shared
anonymous, can be set.  However, naming shared anonymous memory just as
useful for tracking purposes.

Extend the functionality to be able to set names for shared anon.

There are two ways to create anonymous shared memory, using memfd or
directly via mmap():
1. fd = memfd_create(...)
   mem = mmap(..., MAP_SHARED, fd, ...)
2. mem = mmap(..., MAP_SHARED | MAP_ANONYMOUS, -1, ...)

In both cases the anonymous shared memory is created the same way by
mapping an unlinked file on tmpfs.

The memfd way allows to give a name for anonymous shared memory, but
not useful when parts of shared memory require to have distinct names.

Example use case: The VMM maps VM memory as anonymous shared memory (not
private because VMM is sandboxed and drivers are running in their own
processes).  However, the VM tells back to the VMM how parts of the memory
are actually used by the guest, how each of the segments should be backed
(i.e.  4K pages, 2M pages), and some other information about the segments.
The naming allows us to monitor the effective memory footprint for each
of these segments from the host without looking inside the guest.

Sample output:
  /* Create shared anonymous segmenet */
  anon_shmem = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,
                    MAP_SHARED | MAP_ANONYMOUS, -1, 0);
  /* Name the segment: "MY-NAME" */
  rv = prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME,
             anon_shmem, SIZE, "MY-NAME");

cat /proc/<pid>/maps (and smaps):
7fc8e2b4c000-7fc8f2b4c000 rw-s 00000000 00:01 1024 [anon_shmem:MY-NAME]

If the segment is not named, the output is:
7fc8e2b4c000-7fc8f2b4c000 rw-s 00000000 00:01 1024 /dev/zero (deleted)

Link: https://lkml.kernel.org/r/20221115020602.804224-1-pasha.tatashin@soleen.comSigned-off-by: default avatarPasha Tatashin <pasha.tatashin@soleen.com>
Acked-by: default avatarDavid Hildenbrand <david@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bagas Sanjaya <bagasdotme@gmail.com>
Cc: Colin Cross <ccross@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Vincent Whitchurch <vincent.whitchurch@axis.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: xu xin <cgel.zte@gmail.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent b7217a0b
...@@ -426,14 +426,16 @@ with the memory region, as the case would be with BSS (uninitialized data). ...@@ -426,14 +426,16 @@ with the memory region, as the case would be with BSS (uninitialized data).
The "pathname" shows the name associated file for this mapping. If the mapping The "pathname" shows the name associated file for this mapping. If the mapping
is not associated with a file: is not associated with a file:
============= ==================================== =================== ===========================================
[heap] the heap of the program [heap] the heap of the program
[stack] the stack of the main process [stack] the stack of the main process
[vdso] the "virtual dynamic shared object", [vdso] the "virtual dynamic shared object",
the kernel system call handler the kernel system call handler
[anon:<name>] an anonymous mapping that has been [anon:<name>] a private anonymous mapping that has been
named by userspace named by userspace
============= ==================================== [anon_shmem:<name>] an anonymous shared memory mapping that has
been named by userspace
=================== ===========================================
or if empty, the mapping is anonymous. or if empty, the mapping is anonymous.
......
...@@ -274,6 +274,7 @@ static void show_vma_header_prefix(struct seq_file *m, ...@@ -274,6 +274,7 @@ static void show_vma_header_prefix(struct seq_file *m,
static void static void
show_map_vma(struct seq_file *m, struct vm_area_struct *vma) show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
{ {
struct anon_vma_name *anon_name = NULL;
struct mm_struct *mm = vma->vm_mm; struct mm_struct *mm = vma->vm_mm;
struct file *file = vma->vm_file; struct file *file = vma->vm_file;
vm_flags_t flags = vma->vm_flags; vm_flags_t flags = vma->vm_flags;
...@@ -293,6 +294,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) ...@@ -293,6 +294,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
start = vma->vm_start; start = vma->vm_start;
end = vma->vm_end; end = vma->vm_end;
show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino); show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
if (mm)
anon_name = anon_vma_name(vma);
/* /*
* Print the dentry name for named mappings, and a * Print the dentry name for named mappings, and a
...@@ -300,6 +303,13 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) ...@@ -300,6 +303,13 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
*/ */
if (file) { if (file) {
seq_pad(m, ' '); seq_pad(m, ' ');
/*
* If user named this anon shared memory via
* prctl(PR_SET_VMA ..., use the provided name.
*/
if (anon_name)
seq_printf(m, "[anon_shmem:%s]", anon_name->name);
else
seq_file_path(m, file, "\n"); seq_file_path(m, file, "\n");
goto done; goto done;
} }
...@@ -312,8 +322,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) ...@@ -312,8 +322,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
name = arch_vma_name(vma); name = arch_vma_name(vma);
if (!name) { if (!name) {
struct anon_vma_name *anon_name;
if (!mm) { if (!mm) {
name = "[vdso]"; name = "[vdso]";
goto done; goto done;
...@@ -330,7 +338,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) ...@@ -330,7 +338,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
goto done; goto done;
} }
anon_name = anon_vma_name(vma);
if (anon_name) { if (anon_name) {
seq_pad(m, ' '); seq_pad(m, ' ');
seq_printf(m, "[anon:%s]", anon_name->name); seq_printf(m, "[anon:%s]", anon_name->name);
......
...@@ -700,8 +700,10 @@ static inline unsigned long vma_iter_addr(struct vma_iterator *vmi) ...@@ -700,8 +700,10 @@ static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
* paths in userfault. * paths in userfault.
*/ */
bool vma_is_shmem(struct vm_area_struct *vma); bool vma_is_shmem(struct vm_area_struct *vma);
bool vma_is_anon_shmem(struct vm_area_struct *vma);
#else #else
static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; } static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
static inline bool vma_is_anon_shmem(struct vm_area_struct *vma) { return false; }
#endif #endif
int vma_is_stack_for_current(struct vm_area_struct *vma); int vma_is_stack_for_current(struct vm_area_struct *vma);
......
...@@ -549,21 +549,11 @@ struct vm_area_struct { ...@@ -549,21 +549,11 @@ struct vm_area_struct {
* For areas with an address space and backing store, * For areas with an address space and backing store,
* linkage into the address_space->i_mmap interval tree. * linkage into the address_space->i_mmap interval tree.
* *
* For private anonymous mappings, a pointer to a null terminated string
* containing the name given to the vma, or NULL if unnamed.
*/ */
union {
struct { struct {
struct rb_node rb; struct rb_node rb;
unsigned long rb_subtree_last; unsigned long rb_subtree_last;
} shared; } shared;
/*
* Serialized by mmap_sem. Never use directly because it is
* valid only when vm_file is NULL. Use anon_vma_name instead.
*/
struct anon_vma_name *anon_name;
};
/* /*
* A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
...@@ -584,6 +574,14 @@ struct vm_area_struct { ...@@ -584,6 +574,14 @@ struct vm_area_struct {
struct file * vm_file; /* File we map to (can be NULL). */ struct file * vm_file; /* File we map to (can be NULL). */
void * vm_private_data; /* was vm_pte (shared mem) */ void * vm_private_data; /* was vm_pte (shared mem) */
#ifdef CONFIG_ANON_VMA_NAME
/*
* For private and shared anonymous mappings, a pointer to a null
* terminated string containing the name given to the vma, or NULL if
* unnamed. Serialized by mmap_sem. Use anon_vma_name to access.
*/
struct anon_vma_name *anon_name;
#endif
#ifdef CONFIG_SWAP #ifdef CONFIG_SWAP
atomic_long_t swap_readahead_info; atomic_long_t swap_readahead_info;
#endif #endif
......
...@@ -95,9 +95,6 @@ struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) ...@@ -95,9 +95,6 @@ struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
{ {
mmap_assert_locked(vma->vm_mm); mmap_assert_locked(vma->vm_mm);
if (vma->vm_file)
return NULL;
return vma->anon_name; return vma->anon_name;
} }
...@@ -183,7 +180,7 @@ static int madvise_update_vma(struct vm_area_struct *vma, ...@@ -183,7 +180,7 @@ static int madvise_update_vma(struct vm_area_struct *vma,
* vm_flags is protected by the mmap_lock held in write mode. * vm_flags is protected by the mmap_lock held in write mode.
*/ */
vma->vm_flags = new_flags; vma->vm_flags = new_flags;
if (!vma->vm_file) { if (!vma->vm_file || vma_is_anon_shmem(vma)) {
error = replace_anon_vma_name(vma, anon_name); error = replace_anon_vma_name(vma, anon_name);
if (error) if (error)
return error; return error;
...@@ -1273,7 +1270,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma, ...@@ -1273,7 +1270,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma,
int error; int error;
/* Only anonymous mappings can be named */ /* Only anonymous mappings can be named */
if (vma->vm_file) if (vma->vm_file && !vma_is_anon_shmem(vma))
return -EBADF; return -EBADF;
error = madvise_update_vma(vma, prev, start, end, vma->vm_flags, error = madvise_update_vma(vma, prev, start, end, vma->vm_flags,
......
...@@ -237,11 +237,17 @@ static const struct inode_operations shmem_inode_operations; ...@@ -237,11 +237,17 @@ static const struct inode_operations shmem_inode_operations;
static const struct inode_operations shmem_dir_inode_operations; static const struct inode_operations shmem_dir_inode_operations;
static const struct inode_operations shmem_special_inode_operations; static const struct inode_operations shmem_special_inode_operations;
static const struct vm_operations_struct shmem_vm_ops; static const struct vm_operations_struct shmem_vm_ops;
static const struct vm_operations_struct shmem_anon_vm_ops;
static struct file_system_type shmem_fs_type; static struct file_system_type shmem_fs_type;
bool vma_is_anon_shmem(struct vm_area_struct *vma)
{
return vma->vm_ops == &shmem_anon_vm_ops;
}
bool vma_is_shmem(struct vm_area_struct *vma) bool vma_is_shmem(struct vm_area_struct *vma)
{ {
return vma->vm_ops == &shmem_vm_ops; return vma_is_anon_shmem(vma) || vma->vm_ops == &shmem_vm_ops;
} }
static LIST_HEAD(shmem_swaplist); static LIST_HEAD(shmem_swaplist);
...@@ -2263,7 +2269,8 @@ int shmem_lock(struct file *file, int lock, struct ucounts *ucounts) ...@@ -2263,7 +2269,8 @@ int shmem_lock(struct file *file, int lock, struct ucounts *ucounts)
static int shmem_mmap(struct file *file, struct vm_area_struct *vma) static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
{ {
struct shmem_inode_info *info = SHMEM_I(file_inode(file)); struct inode *inode = file_inode(file);
struct shmem_inode_info *info = SHMEM_I(inode);
int ret; int ret;
ret = seal_check_future_write(info->seals, vma); ret = seal_check_future_write(info->seals, vma);
...@@ -2274,7 +2281,11 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -2274,7 +2281,11 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_flags |= VM_MTE_ALLOWED; vma->vm_flags |= VM_MTE_ALLOWED;
file_accessed(file); file_accessed(file);
/* This is anonymous shared memory if it is unlinked at the time of mmap */
if (inode->i_nlink)
vma->vm_ops = &shmem_vm_ops; vma->vm_ops = &shmem_vm_ops;
else
vma->vm_ops = &shmem_anon_vm_ops;
return 0; return 0;
} }
...@@ -3988,6 +3999,15 @@ static const struct vm_operations_struct shmem_vm_ops = { ...@@ -3988,6 +3999,15 @@ static const struct vm_operations_struct shmem_vm_ops = {
#endif #endif
}; };
static const struct vm_operations_struct shmem_anon_vm_ops = {
.fault = shmem_fault,
.map_pages = filemap_map_pages,
#ifdef CONFIG_NUMA
.set_policy = shmem_set_policy,
.get_policy = shmem_get_policy,
#endif
};
int shmem_init_fs_context(struct fs_context *fc) int shmem_init_fs_context(struct fs_context *fc)
{ {
struct shmem_options *ctx; struct shmem_options *ctx;
...@@ -4163,6 +4183,7 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) ...@@ -4163,6 +4183,7 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
EXPORT_SYMBOL_GPL(shmem_truncate_range); EXPORT_SYMBOL_GPL(shmem_truncate_range);
#define shmem_vm_ops generic_file_vm_ops #define shmem_vm_ops generic_file_vm_ops
#define shmem_anon_vm_ops generic_file_vm_ops
#define shmem_file_operations ramfs_file_operations #define shmem_file_operations ramfs_file_operations
#define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev) #define shmem_get_inode(sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
#define shmem_acct_size(flags, size) 0 #define shmem_acct_size(flags, size) 0
...@@ -4268,7 +4289,7 @@ int shmem_zero_setup(struct vm_area_struct *vma) ...@@ -4268,7 +4289,7 @@ int shmem_zero_setup(struct vm_area_struct *vma)
if (vma->vm_file) if (vma->vm_file)
fput(vma->vm_file); fput(vma->vm_file);
vma->vm_file = file; vma->vm_file = file;
vma->vm_ops = &shmem_vm_ops; vma->vm_ops = &shmem_anon_vm_ops;
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment