Commit 22247efd authored by Peter Xu's avatar Peter Xu Committed by Linus Torvalds

mm/hugetlb: fix F_SEAL_FUTURE_WRITE

Patch series "mm/hugetlb: Fix issues on file sealing and fork", v2.

Hugh reported issue with F_SEAL_FUTURE_WRITE not applied correctly to
hugetlbfs, which I can easily verify using the memfd_test program, which
seems that the program is hardly run with hugetlbfs pages (as by default
shmem).

Meanwhile I found another probably even more severe issue on that hugetlb
fork won't wr-protect child cow pages, so child can potentially write to
parent private pages.  Patch 2 addresses that.

After this series applied, "memfd_test hugetlbfs" should start to pass.

This patch (of 2):

F_SEAL_FUTURE_WRITE is missing for hugetlb starting from the first day.
There is a test program for that and it fails constantly.

$ ./memfd_test hugetlbfs
memfd-hugetlb: CREATE
memfd-hugetlb: BASIC
memfd-hugetlb: SEAL-WRITE
memfd-hugetlb: SEAL-FUTURE-WRITE
mmap() didn't fail as expected
Aborted (core dumped)

I think it's probably because no one is really running the hugetlbfs test.

Fix it by checking FUTURE_WRITE also in hugetlbfs_file_mmap() as what we
do in shmem_mmap().  Generalize a helper for that.

Link: https://lkml.kernel.org/r/20210503234356.9097-1-peterx@redhat.com
Link: https://lkml.kernel.org/r/20210503234356.9097-2-peterx@redhat.com
Fixes: ab3948f5 ("mm/memfd: add an F_SEAL_FUTURE_WRITE seal to memfd")
Signed-off-by: default avatarPeter Xu <peterx@redhat.com>
Reported-by: default avatarHugh Dickins <hughd@google.com>
Reviewed-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent bd3c9cdb
...@@ -131,6 +131,7 @@ static void huge_pagevec_release(struct pagevec *pvec) ...@@ -131,6 +131,7 @@ static void huge_pagevec_release(struct pagevec *pvec)
static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{ {
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
loff_t len, vma_len; loff_t len, vma_len;
int ret; int ret;
struct hstate *h = hstate_file(file); struct hstate *h = hstate_file(file);
...@@ -146,6 +147,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) ...@@ -146,6 +147,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND; vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
vma->vm_ops = &hugetlb_vm_ops; vma->vm_ops = &hugetlb_vm_ops;
ret = seal_check_future_write(info->seals, vma);
if (ret)
return ret;
/* /*
* page based offset in vm_pgoff could be sufficiently large to * page based offset in vm_pgoff could be sufficiently large to
* overflow a loff_t when converted to byte offset. This can * overflow a loff_t when converted to byte offset. This can
......
...@@ -3216,5 +3216,37 @@ void mem_dump_obj(void *object); ...@@ -3216,5 +3216,37 @@ void mem_dump_obj(void *object);
static inline void mem_dump_obj(void *object) {} static inline void mem_dump_obj(void *object) {}
#endif #endif
/**
* seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it
* @seals: the seals to check
* @vma: the vma to operate on
*
* Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on
* the vma flags. Return 0 if check pass, or <0 for errors.
*/
static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
{
if (seals & F_SEAL_FUTURE_WRITE) {
/*
* New PROT_WRITE and MAP_SHARED mmaps are not allowed when
* "future write" seal active.
*/
if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
return -EPERM;
/*
* Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
* MAP_SHARED and read-only, take care to not allow mprotect to
* revert protections on such mappings. Do this only for shared
* mappings. For private mappings, don't need to mask
* VM_MAYWRITE as we still want them to be COW-writable.
*/
if (vma->vm_flags & VM_SHARED)
vma->vm_flags &= ~(VM_MAYWRITE);
}
return 0;
}
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */ #endif /* _LINUX_MM_H */
...@@ -2258,25 +2258,11 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) ...@@ -2258,25 +2258,11 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
static int shmem_mmap(struct file *file, struct vm_area_struct *vma) static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
{ {
struct shmem_inode_info *info = SHMEM_I(file_inode(file)); struct shmem_inode_info *info = SHMEM_I(file_inode(file));
int ret;
if (info->seals & F_SEAL_FUTURE_WRITE) { ret = seal_check_future_write(info->seals, vma);
/* if (ret)
* New PROT_WRITE and MAP_SHARED mmaps are not allowed when return ret;
* "future write" seal active.
*/
if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
return -EPERM;
/*
* Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
* MAP_SHARED and read-only, take care to not allow mprotect to
* revert protections on such mappings. Do this only for shared
* mappings. For private mappings, don't need to mask
* VM_MAYWRITE as we still want them to be COW-writable.
*/
if (vma->vm_flags & VM_SHARED)
vma->vm_flags &= ~(VM_MAYWRITE);
}
/* arm64 - allow memory tagging on RAM-based files */ /* arm64 - allow memory tagging on RAM-based files */
vma->vm_flags |= VM_MTE_ALLOWED; vma->vm_flags |= VM_MTE_ALLOWED;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment