Commit d28c2c9a authored by Ralph Campbell's avatar Ralph Campbell Committed by Jason Gunthorpe

mm/hmm: make full use of walk_page_range()

hmm_range_fault() calls find_vma() and walk_page_range() in a loop.  This
is unnecessary duplication since walk_page_range() calls find_vma() in a
loop already.

Simplify hmm_range_fault() by defining a walk_test() callback function to
filter unhandled vmas.

This also fixes a bug where hmm_range_fault() was not checking start >=
vma->vm_start before checking vma->vm_flags so hmm_range_fault() could
return an error based on the wrong vma for the requested range.

It also fixes a bug when the vma has no read access and the caller did not
request a fault, there shouldn't be any error return code.

Link: https://lore.kernel.org/r/20191104222141.5173-2-rcampbell@nvidia.comSigned-off-by: default avatarRalph Campbell <rcampbell@nvidia.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarJason Gunthorpe <jgg@mellanox.com>
Signed-off-by: default avatarJason Gunthorpe <jgg@mellanox.com>
parent d3eeb1d7
...@@ -65,18 +65,15 @@ static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr, ...@@ -65,18 +65,15 @@ static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr,
return -EFAULT; return -EFAULT;
} }
static int hmm_pfns_bad(unsigned long addr, static int hmm_pfns_fill(unsigned long addr, unsigned long end,
unsigned long end, struct hmm_range *range, enum hmm_pfn_value_e value)
struct mm_walk *walk)
{ {
struct hmm_vma_walk *hmm_vma_walk = walk->private;
struct hmm_range *range = hmm_vma_walk->range;
uint64_t *pfns = range->pfns; uint64_t *pfns = range->pfns;
unsigned long i; unsigned long i;
i = (addr - range->start) >> PAGE_SHIFT; i = (addr - range->start) >> PAGE_SHIFT;
for (; addr < end; addr += PAGE_SIZE, i++) for (; addr < end; addr += PAGE_SIZE, i++)
pfns[i] = range->values[HMM_PFN_ERROR]; pfns[i] = range->values[value];
return 0; return 0;
} }
...@@ -403,7 +400,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, ...@@ -403,7 +400,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
} }
return 0; return 0;
} else if (!pmd_present(pmd)) } else if (!pmd_present(pmd))
return hmm_pfns_bad(start, end, walk); return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) { if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) {
/* /*
...@@ -431,7 +428,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, ...@@ -431,7 +428,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp,
* recover. * recover.
*/ */
if (pmd_bad(pmd)) if (pmd_bad(pmd))
return hmm_pfns_bad(start, end, walk); return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
ptep = pte_offset_map(pmdp, addr); ptep = pte_offset_map(pmdp, addr);
i = (addr - range->start) >> PAGE_SHIFT; i = (addr - range->start) >> PAGE_SHIFT;
...@@ -589,13 +586,47 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, ...@@ -589,13 +586,47 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
#define hmm_vma_walk_hugetlb_entry NULL #define hmm_vma_walk_hugetlb_entry NULL
#endif /* CONFIG_HUGETLB_PAGE */ #endif /* CONFIG_HUGETLB_PAGE */
static void hmm_pfns_clear(struct hmm_range *range, static int hmm_vma_walk_test(unsigned long start, unsigned long end,
uint64_t *pfns, struct mm_walk *walk)
unsigned long addr,
unsigned long end)
{ {
for (; addr < end; addr += PAGE_SIZE, pfns++) struct hmm_vma_walk *hmm_vma_walk = walk->private;
*pfns = range->values[HMM_PFN_NONE]; struct hmm_range *range = hmm_vma_walk->range;
struct vm_area_struct *vma = walk->vma;
/*
* Skip vma ranges that don't have struct page backing them or
* map I/O devices directly.
*/
if (vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP))
return -EFAULT;
/*
* If the vma does not allow read access, then assume that it does not
* allow write access either. HMM does not support architectures
* that allow write without read.
*/
if (!(vma->vm_flags & VM_READ)) {
bool fault, write_fault;
/*
* Check to see if a fault is requested for any page in the
* range.
*/
hmm_range_need_fault(hmm_vma_walk, range->pfns +
((start - range->start) >> PAGE_SHIFT),
(end - start) >> PAGE_SHIFT,
0, &fault, &write_fault);
if (fault || write_fault)
return -EFAULT;
hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
hmm_vma_walk->last = end;
/* Skip this vma and continue processing the next vma. */
return 1;
}
return 0;
} }
static const struct mm_walk_ops hmm_walk_ops = { static const struct mm_walk_ops hmm_walk_ops = {
...@@ -603,6 +634,7 @@ static const struct mm_walk_ops hmm_walk_ops = { ...@@ -603,6 +634,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
.pmd_entry = hmm_vma_walk_pmd, .pmd_entry = hmm_vma_walk_pmd,
.pte_hole = hmm_vma_walk_hole, .pte_hole = hmm_vma_walk_hole,
.hugetlb_entry = hmm_vma_walk_hugetlb_entry, .hugetlb_entry = hmm_vma_walk_hugetlb_entry,
.test_walk = hmm_vma_walk_test,
}; };
/** /**
...@@ -635,11 +667,12 @@ static const struct mm_walk_ops hmm_walk_ops = { ...@@ -635,11 +667,12 @@ static const struct mm_walk_ops hmm_walk_ops = {
*/ */
long hmm_range_fault(struct hmm_range *range, unsigned int flags) long hmm_range_fault(struct hmm_range *range, unsigned int flags)
{ {
const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP; struct hmm_vma_walk hmm_vma_walk = {
unsigned long start = range->start, end; .range = range,
struct hmm_vma_walk hmm_vma_walk; .last = range->start,
.flags = flags,
};
struct mm_struct *mm = range->notifier->mm; struct mm_struct *mm = range->notifier->mm;
struct vm_area_struct *vma;
int ret; int ret;
lockdep_assert_held(&mm->mmap_sem); lockdep_assert_held(&mm->mmap_sem);
...@@ -649,53 +682,12 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags) ...@@ -649,53 +682,12 @@ long hmm_range_fault(struct hmm_range *range, unsigned int flags)
if (mmu_interval_check_retry(range->notifier, if (mmu_interval_check_retry(range->notifier,
range->notifier_seq)) range->notifier_seq))
return -EBUSY; return -EBUSY;
ret = walk_page_range(mm, hmm_vma_walk.last, range->end,
&hmm_walk_ops, &hmm_vma_walk);
} while (ret == -EBUSY);
vma = find_vma(mm, start); if (ret)
if (vma == NULL || (vma->vm_flags & device_vma)) return ret;
return -EFAULT;
if (!(vma->vm_flags & VM_READ)) {
/*
* If vma do not allow read access, then assume that it
* does not allow write access, either. HMM does not
* support architecture that allow write without read.
*/
hmm_pfns_clear(range, range->pfns,
range->start, range->end);
return -EPERM;
}
hmm_vma_walk.pgmap = NULL;
hmm_vma_walk.last = start;
hmm_vma_walk.flags = flags;
hmm_vma_walk.range = range;
end = min(range->end, vma->vm_end);
walk_page_range(vma->vm_mm, start, end, &hmm_walk_ops,
&hmm_vma_walk);
do {
ret = walk_page_range(vma->vm_mm, start, end,
&hmm_walk_ops, &hmm_vma_walk);
start = hmm_vma_walk.last;
/* Keep trying while the range is valid. */
} while (ret == -EBUSY &&
!mmu_interval_check_retry(range->notifier,
range->notifier_seq));
if (ret) {
unsigned long i;
i = (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
hmm_pfns_clear(range, &range->pfns[i],
hmm_vma_walk.last, range->end);
return ret;
}
start = end;
} while (start < range->end);
return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT; return (hmm_vma_walk.last - range->start) >> PAGE_SHIFT;
} }
EXPORT_SYMBOL(hmm_range_fault); EXPORT_SYMBOL(hmm_range_fault);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment