Commit c7b1850d authored by Mike Kravetz's avatar Mike Kravetz Committed by Linus Torvalds

hugetlb: don't pass page cache pages to restore_reserve_on_error

syzbot hit kernel BUG at fs/hugetlbfs/inode.c:532 as described in [1].
This BUG triggers if the HPageRestoreReserve flag is set on a page in
the page cache.  It should never be set, as the routine
huge_add_to_page_cache explicitly clears the flag after adding a page to
the cache.

The only code other than huge page allocation which sets the flag is
restore_reserve_on_error.  It will potentially set the flag in rare out
of memory conditions.  syzbot was injecting errors to cause memory
allocation errors which exercised this specific path.

The code in restore_reserve_on_error is doing the right thing.  However,
there are instances where pages in the page cache were being passed to
restore_reserve_on_error.  This is incorrect, as once a page goes into
the cache reservation information will not be modified for the page
until it is removed from the cache.  Error paths do not remove pages
from the cache, so even in the case of error, the page will remain in
the cache and no reservation adjustment is needed.

Modify routines that potentially call restore_reserve_on_error with a
page cache page to no longer do so.

Note on fixes tag: Prior to commit 846be085 ("mm/hugetlb: expand
restore_reserve_on_error functionality") the routine would not process
page cache pages because the HPageRestoreReserve flag is not set on such
pages.  Therefore, this issue could not be trigggered.  The code added
by commit 846be085 ("mm/hugetlb: expand restore_reserve_on_error
functionality") is needed and correct.  It exposed incorrect calls to
restore_reserve_on_error which is the root cause addressed by this
commit.

[1] https://lore.kernel.org/linux-mm/00000000000050776d05c9b7c7f0@google.com/

Link: https://lkml.kernel.org/r/20210818213304.37038-1-mike.kravetz@oracle.com
Fixes: 846be085 ("mm/hugetlb: expand restore_reserve_on_error functionality")
Signed-off-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Reported-by: <syzbot+67654e51e54455f1c585@syzkaller.appspotmail.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent a7cb5d23
...@@ -2476,7 +2476,7 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, ...@@ -2476,7 +2476,7 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
if (!rc) { if (!rc) {
/* /*
* This indicates there is an entry in the reserve map * This indicates there is an entry in the reserve map
* added by alloc_huge_page. We know it was added * not added by alloc_huge_page. We know it was added
* before the alloc_huge_page call, otherwise * before the alloc_huge_page call, otherwise
* HPageRestoreReserve would be set on the page. * HPageRestoreReserve would be set on the page.
* Remove the entry so that a subsequent allocation * Remove the entry so that a subsequent allocation
...@@ -4660,7 +4660,9 @@ static vm_fault_t hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, ...@@ -4660,7 +4660,9 @@ static vm_fault_t hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
spin_unlock(ptl); spin_unlock(ptl);
mmu_notifier_invalidate_range_end(&range); mmu_notifier_invalidate_range_end(&range);
out_release_all: out_release_all:
restore_reserve_on_error(h, vma, haddr, new_page); /* No restore in case of successful pagetable update (Break COW) */
if (new_page != old_page)
restore_reserve_on_error(h, vma, haddr, new_page);
put_page(new_page); put_page(new_page);
out_release_old: out_release_old:
put_page(old_page); put_page(old_page);
...@@ -4776,7 +4778,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, ...@@ -4776,7 +4778,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
pte_t new_pte; pte_t new_pte;
spinlock_t *ptl; spinlock_t *ptl;
unsigned long haddr = address & huge_page_mask(h); unsigned long haddr = address & huge_page_mask(h);
bool new_page = false; bool new_page, new_pagecache_page = false;
/* /*
* Currently, we are forced to kill the process in the event the * Currently, we are forced to kill the process in the event the
...@@ -4799,6 +4801,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, ...@@ -4799,6 +4801,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
goto out; goto out;
retry: retry:
new_page = false;
page = find_lock_page(mapping, idx); page = find_lock_page(mapping, idx);
if (!page) { if (!page) {
/* Check for page in userfault range */ /* Check for page in userfault range */
...@@ -4842,6 +4845,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, ...@@ -4842,6 +4845,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
goto retry; goto retry;
goto out; goto out;
} }
new_pagecache_page = true;
} else { } else {
lock_page(page); lock_page(page);
if (unlikely(anon_vma_prepare(vma))) { if (unlikely(anon_vma_prepare(vma))) {
...@@ -4926,7 +4930,9 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, ...@@ -4926,7 +4930,9 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
spin_unlock(ptl); spin_unlock(ptl);
backout_unlocked: backout_unlocked:
unlock_page(page); unlock_page(page);
restore_reserve_on_error(h, vma, haddr, page); /* restore reserve for newly allocated pages not in page cache */
if (new_page && !new_pagecache_page)
restore_reserve_on_error(h, vma, haddr, page);
put_page(page); put_page(page);
goto out; goto out;
} }
...@@ -5135,6 +5141,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -5135,6 +5141,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
int ret = -ENOMEM; int ret = -ENOMEM;
struct page *page; struct page *page;
int writable; int writable;
bool new_pagecache_page = false;
if (is_continue) { if (is_continue) {
ret = -EFAULT; ret = -EFAULT;
...@@ -5228,6 +5235,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -5228,6 +5235,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
ret = huge_add_to_page_cache(page, mapping, idx); ret = huge_add_to_page_cache(page, mapping, idx);
if (ret) if (ret)
goto out_release_nounlock; goto out_release_nounlock;
new_pagecache_page = true;
} }
ptl = huge_pte_lockptr(h, dst_mm, dst_pte); ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
...@@ -5291,7 +5299,8 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, ...@@ -5291,7 +5299,8 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
if (vm_shared || is_continue) if (vm_shared || is_continue)
unlock_page(page); unlock_page(page);
out_release_nounlock: out_release_nounlock:
restore_reserve_on_error(h, dst_vma, dst_addr, page); if (!new_pagecache_page)
restore_reserve_on_error(h, dst_vma, dst_addr, page);
put_page(page); put_page(page);
goto out; goto out;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment