Commit b8da2e46 authored by Peter Xu's avatar Peter Xu Committed by Andrew Morton
Browse files

mm/hugetlb: make userfaultfd_huge_must_wait() safe to pmd unshare

We can take the hugetlb walker lock, here taking vma lock directly.

Link: https://lkml.kernel.org/r/20221216155217.2043700-1-peterx@redhat.com


Signed-off-by: default avatarPeter Xu <peterx@redhat.com>
Reviewed-by: default avatarDavid Hildenbrand <david@redhat.com>
Reviewed-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: default avatarJohn Hubbard <jhubbard@nvidia.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent fcd48540
Loading
Loading
Loading
Loading
+16 −4
Original line number Diff line number Diff line
@@ -391,7 +391,8 @@ static inline unsigned int userfaultfd_get_blocking_state(unsigned int flags)
 */
vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
{
	struct mm_struct *mm = vmf->vma->vm_mm;
	struct vm_area_struct *vma = vmf->vma;
	struct mm_struct *mm = vma->vm_mm;
	struct userfaultfd_ctx *ctx;
	struct userfaultfd_wait_queue uwq;
	vm_fault_t ret = VM_FAULT_SIGBUS;
@@ -418,7 +419,7 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
	 */
	mmap_assert_locked(mm);

	ctx = vmf->vma->vm_userfaultfd_ctx.ctx;
	ctx = vma->vm_userfaultfd_ctx.ctx;
	if (!ctx)
		goto out;

@@ -508,6 +509,15 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)

	blocking_state = userfaultfd_get_blocking_state(vmf->flags);

        /*
         * Take the vma lock now, in order to safely call
         * userfaultfd_huge_must_wait() later. Since acquiring the
         * (sleepable) vma lock can modify the current task state, that
         * must be before explicitly calling set_current_state().
         */
	if (is_vm_hugetlb_page(vma))
		hugetlb_vma_lock_read(vma);

	spin_lock_irq(&ctx->fault_pending_wqh.lock);
	/*
	 * After the __add_wait_queue the uwq is visible to userland
@@ -522,13 +532,15 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason)
	set_current_state(blocking_state);
	spin_unlock_irq(&ctx->fault_pending_wqh.lock);

	if (!is_vm_hugetlb_page(vmf->vma))
	if (!is_vm_hugetlb_page(vma))
		must_wait = userfaultfd_must_wait(ctx, vmf->address, vmf->flags,
						  reason);
	else
		must_wait = userfaultfd_huge_must_wait(ctx, vmf->vma,
		must_wait = userfaultfd_huge_must_wait(ctx, vma,
						       vmf->address,
						       vmf->flags, reason);
	if (is_vm_hugetlb_page(vma))
		hugetlb_vma_unlock_read(vma);
	mmap_read_unlock(mm);

	if (likely(must_wait && !READ_ONCE(ctx->released))) {