Commit a759a909 authored by Nadav Amit's avatar Nadav Amit Committed by Linus Torvalds

userfaultfd: change mmap_changing to atomic

Patch series "userfaultfd: minor bug fixes".

Three unrelated bug fixes. The first two addresses possible issues (not
too theoretical ones), but I did not encounter them in practice.

The third patch addresses a test bug that causes the test to fail on my
system. It has been sent before as part of a bigger RFC.

This patch (of 3):

mmap_changing is currently a boolean variable, which is set and cleared
without any lock that protects against concurrent modifications.

mmap_changing is supposed to mark whether userfaultfd page-faults handling
should be retried since mappings are undergoing a change.  However,
concurrent calls, for instance to madvise(MADV_DONTNEED), might cause
mmap_changing to be false, although the remove event was still not read
(hence acknowledged) by the user.

Change mmap_changing to atomic_t and increase/decrease appropriately.  Add
a debug assertion to see whether mmap_changing is negative.

Link: https://lkml.kernel.org/r/20210808020724.1022515-1-namit@vmware.com
Link: https://lkml.kernel.org/r/20210808020724.1022515-2-namit@vmware.com
Fixes: df2cc96e ("userfaultfd: prevent non-cooperative events vs mcopy_atomic races")
Signed-off-by: default avatarNadav Amit <namit@vmware.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 09a26e83
...@@ -74,7 +74,7 @@ struct userfaultfd_ctx { ...@@ -74,7 +74,7 @@ struct userfaultfd_ctx {
/* released */ /* released */
bool released; bool released;
/* memory mappings are changing because of non-cooperative event */ /* memory mappings are changing because of non-cooperative event */
bool mmap_changing; atomic_t mmap_changing;
/* mm with one ore more vmas attached to this userfaultfd_ctx */ /* mm with one ore more vmas attached to this userfaultfd_ctx */
struct mm_struct *mm; struct mm_struct *mm;
}; };
...@@ -623,7 +623,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, ...@@ -623,7 +623,8 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
* already released. * already released.
*/ */
out: out:
WRITE_ONCE(ctx->mmap_changing, false); atomic_dec(&ctx->mmap_changing);
VM_BUG_ON(atomic_read(&ctx->mmap_changing) < 0);
userfaultfd_ctx_put(ctx); userfaultfd_ctx_put(ctx);
} }
...@@ -669,12 +670,12 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) ...@@ -669,12 +670,12 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs)
ctx->state = UFFD_STATE_RUNNING; ctx->state = UFFD_STATE_RUNNING;
ctx->features = octx->features; ctx->features = octx->features;
ctx->released = false; ctx->released = false;
ctx->mmap_changing = false; atomic_set(&ctx->mmap_changing, 0);
ctx->mm = vma->vm_mm; ctx->mm = vma->vm_mm;
mmgrab(ctx->mm); mmgrab(ctx->mm);
userfaultfd_ctx_get(octx); userfaultfd_ctx_get(octx);
WRITE_ONCE(octx->mmap_changing, true); atomic_inc(&octx->mmap_changing);
fctx->orig = octx; fctx->orig = octx;
fctx->new = ctx; fctx->new = ctx;
list_add_tail(&fctx->list, fcs); list_add_tail(&fctx->list, fcs);
...@@ -721,7 +722,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma, ...@@ -721,7 +722,7 @@ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
if (ctx->features & UFFD_FEATURE_EVENT_REMAP) { if (ctx->features & UFFD_FEATURE_EVENT_REMAP) {
vm_ctx->ctx = ctx; vm_ctx->ctx = ctx;
userfaultfd_ctx_get(ctx); userfaultfd_ctx_get(ctx);
WRITE_ONCE(ctx->mmap_changing, true); atomic_inc(&ctx->mmap_changing);
} else { } else {
/* Drop uffd context if remap feature not enabled */ /* Drop uffd context if remap feature not enabled */
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX; vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
...@@ -766,7 +767,7 @@ bool userfaultfd_remove(struct vm_area_struct *vma, ...@@ -766,7 +767,7 @@ bool userfaultfd_remove(struct vm_area_struct *vma,
return true; return true;
userfaultfd_ctx_get(ctx); userfaultfd_ctx_get(ctx);
WRITE_ONCE(ctx->mmap_changing, true); atomic_inc(&ctx->mmap_changing);
mmap_read_unlock(mm); mmap_read_unlock(mm);
msg_init(&ewq.msg); msg_init(&ewq.msg);
...@@ -810,7 +811,7 @@ int userfaultfd_unmap_prep(struct vm_area_struct *vma, ...@@ -810,7 +811,7 @@ int userfaultfd_unmap_prep(struct vm_area_struct *vma,
return -ENOMEM; return -ENOMEM;
userfaultfd_ctx_get(ctx); userfaultfd_ctx_get(ctx);
WRITE_ONCE(ctx->mmap_changing, true); atomic_inc(&ctx->mmap_changing);
unmap_ctx->ctx = ctx; unmap_ctx->ctx = ctx;
unmap_ctx->start = start; unmap_ctx->start = start;
unmap_ctx->end = end; unmap_ctx->end = end;
...@@ -1700,7 +1701,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, ...@@ -1700,7 +1701,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
user_uffdio_copy = (struct uffdio_copy __user *) arg; user_uffdio_copy = (struct uffdio_copy __user *) arg;
ret = -EAGAIN; ret = -EAGAIN;
if (READ_ONCE(ctx->mmap_changing)) if (atomic_read(&ctx->mmap_changing))
goto out; goto out;
ret = -EFAULT; ret = -EFAULT;
...@@ -1757,7 +1758,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, ...@@ -1757,7 +1758,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg; user_uffdio_zeropage = (struct uffdio_zeropage __user *) arg;
ret = -EAGAIN; ret = -EAGAIN;
if (READ_ONCE(ctx->mmap_changing)) if (atomic_read(&ctx->mmap_changing))
goto out; goto out;
ret = -EFAULT; ret = -EFAULT;
...@@ -1807,7 +1808,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx, ...@@ -1807,7 +1808,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
struct userfaultfd_wake_range range; struct userfaultfd_wake_range range;
bool mode_wp, mode_dontwake; bool mode_wp, mode_dontwake;
if (READ_ONCE(ctx->mmap_changing)) if (atomic_read(&ctx->mmap_changing))
return -EAGAIN; return -EAGAIN;
user_uffdio_wp = (struct uffdio_writeprotect __user *) arg; user_uffdio_wp = (struct uffdio_writeprotect __user *) arg;
...@@ -1855,7 +1856,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg) ...@@ -1855,7 +1856,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
user_uffdio_continue = (struct uffdio_continue __user *)arg; user_uffdio_continue = (struct uffdio_continue __user *)arg;
ret = -EAGAIN; ret = -EAGAIN;
if (READ_ONCE(ctx->mmap_changing)) if (atomic_read(&ctx->mmap_changing))
goto out; goto out;
ret = -EFAULT; ret = -EFAULT;
...@@ -2087,7 +2088,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags) ...@@ -2087,7 +2088,7 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
ctx->features = 0; ctx->features = 0;
ctx->state = UFFD_STATE_WAIT_API; ctx->state = UFFD_STATE_WAIT_API;
ctx->released = false; ctx->released = false;
ctx->mmap_changing = false; atomic_set(&ctx->mmap_changing, 0);
ctx->mm = current->mm; ctx->mm = current->mm;
/* prevent the mm struct to be freed */ /* prevent the mm struct to be freed */
mmgrab(ctx->mm); mmgrab(ctx->mm);
......
...@@ -60,16 +60,16 @@ extern int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd, ...@@ -60,16 +60,16 @@ extern int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
unsigned long src_start, unsigned long len, unsigned long src_start, unsigned long len,
bool *mmap_changing, __u64 mode); atomic_t *mmap_changing, __u64 mode);
extern ssize_t mfill_zeropage(struct mm_struct *dst_mm, extern ssize_t mfill_zeropage(struct mm_struct *dst_mm,
unsigned long dst_start, unsigned long dst_start,
unsigned long len, unsigned long len,
bool *mmap_changing); atomic_t *mmap_changing);
extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start, extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start,
unsigned long len, bool *mmap_changing); unsigned long len, atomic_t *mmap_changing);
extern int mwriteprotect_range(struct mm_struct *dst_mm, extern int mwriteprotect_range(struct mm_struct *dst_mm,
unsigned long start, unsigned long len, unsigned long start, unsigned long len,
bool enable_wp, bool *mmap_changing); bool enable_wp, atomic_t *mmap_changing);
/* mm helpers */ /* mm helpers */
static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma,
......
...@@ -483,7 +483,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, ...@@ -483,7 +483,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
unsigned long src_start, unsigned long src_start,
unsigned long len, unsigned long len,
enum mcopy_atomic_mode mcopy_mode, enum mcopy_atomic_mode mcopy_mode,
bool *mmap_changing, atomic_t *mmap_changing,
__u64 mode) __u64 mode)
{ {
struct vm_area_struct *dst_vma; struct vm_area_struct *dst_vma;
...@@ -517,7 +517,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, ...@@ -517,7 +517,7 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
* request the user to retry later * request the user to retry later
*/ */
err = -EAGAIN; err = -EAGAIN;
if (mmap_changing && READ_ONCE(*mmap_changing)) if (mmap_changing && atomic_read(mmap_changing))
goto out_unlock; goto out_unlock;
/* /*
...@@ -650,28 +650,29 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm, ...@@ -650,28 +650,29 @@ static __always_inline ssize_t __mcopy_atomic(struct mm_struct *dst_mm,
ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start, ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
unsigned long src_start, unsigned long len, unsigned long src_start, unsigned long len,
bool *mmap_changing, __u64 mode) atomic_t *mmap_changing, __u64 mode)
{ {
return __mcopy_atomic(dst_mm, dst_start, src_start, len, return __mcopy_atomic(dst_mm, dst_start, src_start, len,
MCOPY_ATOMIC_NORMAL, mmap_changing, mode); MCOPY_ATOMIC_NORMAL, mmap_changing, mode);
} }
ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start, ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
unsigned long len, bool *mmap_changing) unsigned long len, atomic_t *mmap_changing)
{ {
return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE, return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE,
mmap_changing, 0); mmap_changing, 0);
} }
ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start, ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
unsigned long len, bool *mmap_changing) unsigned long len, atomic_t *mmap_changing)
{ {
return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE, return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE,
mmap_changing, 0); mmap_changing, 0);
} }
int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
unsigned long len, bool enable_wp, bool *mmap_changing) unsigned long len, bool enable_wp,
atomic_t *mmap_changing)
{ {
struct vm_area_struct *dst_vma; struct vm_area_struct *dst_vma;
pgprot_t newprot; pgprot_t newprot;
...@@ -694,7 +695,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start, ...@@ -694,7 +695,7 @@ int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
* request the user to retry later * request the user to retry later
*/ */
err = -EAGAIN; err = -EAGAIN;
if (mmap_changing && READ_ONCE(*mmap_changing)) if (mmap_changing && atomic_read(mmap_changing))
goto out_unlock; goto out_unlock;
err = -ENOENT; err = -ENOENT;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment