Commit b8da2e46 authored by Peter Xu's avatar Peter Xu Committed by Andrew Morton

mm/hugetlb: make userfaultfd_huge_must_wait() safe to pmd unshare

We can take the hugetlb walker lock, here taking vma lock directly.

Link: https://lkml.kernel.org/r/20221216155217.2043700-1-peterx@redhat.comSigned-off-by: default avatarPeter Xu <peterx@redhat.com>
Reviewed-by: default avatarDavid Hildenbrand <david@redhat.com>
Reviewed-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: default avatarJohn Hubbard <jhubbard@nvidia.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent fcd48540
...@@ -391,7 +391,8 @@ static inline unsigned int userfaultfd_get_blocking_state(unsigned int flags) ...@@ -391,7 +391,8 @@ static inline unsigned int userfaultfd_get_blocking_state(unsigned int flags)
*/ */
vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
{ {
struct mm_struct *mm = vmf->vma->vm_mm; struct vm_area_struct *vma = vmf->vma;
struct mm_struct *mm = vma->vm_mm;
struct userfaultfd_ctx *ctx; struct userfaultfd_ctx *ctx;
struct userfaultfd_wait_queue uwq; struct userfaultfd_wait_queue uwq;
vm_fault_t ret = VM_FAULT_SIGBUS; vm_fault_t ret = VM_FAULT_SIGBUS;
...@@ -418,7 +419,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) ...@@ -418,7 +419,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
*/ */
mmap_assert_locked(mm); mmap_assert_locked(mm);
ctx = vmf->vma->vm_userfaultfd_ctx.ctx; ctx = vma->vm_userfaultfd_ctx.ctx;
if (!ctx) if (!ctx)
goto out; goto out;
...@@ -508,6 +509,15 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) ...@@ -508,6 +509,15 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
blocking_state = userfaultfd_get_blocking_state(vmf->flags); blocking_state = userfaultfd_get_blocking_state(vmf->flags);
/*
* Take the vma lock now, in order to safely call
* userfaultfd_huge_must_wait() later. Since acquiring the
* (sleepable) vma lock can modify the current task state, that
* must be before explicitly calling set_current_state().
*/
if (is_vm_hugetlb_page(vma))
hugetlb_vma_lock_read(vma);
spin_lock_irq(&ctx->fault_pending_wqh.lock); spin_lock_irq(&ctx->fault_pending_wqh.lock);
/* /*
* After the __add_wait_queue the uwq is visible to userland * After the __add_wait_queue the uwq is visible to userland
...@@ -522,13 +532,15 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) ...@@ -522,13 +532,15 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
set_current_state(blocking_state); set_current_state(blocking_state);
spin_unlock_irq(&ctx->fault_pending_wqh.lock); spin_unlock_irq(&ctx->fault_pending_wqh.lock);
if (!is_vm_hugetlb_page(vmf->vma)) if (!is_vm_hugetlb_page(vma))
must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags, must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
reason); reason);
else else
must_wait = userfaultfd_huge_must_wait(ctx, vmf->vma, must_wait = userfaultfd_huge_must_wait(ctx, vma,
vmf->address, vmf->address,
vmf->flags, reason); vmf->flags, reason);
if (is_vm_hugetlb_page(vma))
hugetlb_vma_unlock_read(vma);
mmap_read_unlock(mm); mmap_read_unlock(mm);
if (likely(must_wait && !READ_ONCE(ctx->released))) { if (likely(must_wait && !READ_ONCE(ctx->released))) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment