Commit 38c1ddbd authored by Jiaqi Yan's avatar Jiaqi Yan Committed by Andrew Morton

hugetlbfs: improve read HWPOISON hugepage

When a hugepage contains HWPOISON pages, read() fails to read any byte of
the hugepage and returns -EIO, although many bytes in the HWPOISON
hugepage are readable.

Improve this by allowing hugetlbfs_read_iter returns as many bytes as
possible.  For a requested range [offset, offset + len) that contains
HWPOISON page, return [offset, first HWPOISON page addr); the next read
attempt will fail and return -EIO.

Link: https://lkml.kernel.org/r/20230713001833.3778937-4-jiaqiyan@google.comSigned-off-by: default avatarJiaqi Yan <jiaqiyan@google.com>
Reviewed-by: default avatarMike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: default avatarNaoya Horiguchi <naoya.horiguchi@nec.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Yang Shi <shy828301@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
parent b79f8eb4
...@@ -282,6 +282,41 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, ...@@ -282,6 +282,41 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
} }
#endif #endif
/*
* Someone wants to read @bytes from a HWPOISON hugetlb @page from @offset.
* Returns the maximum number of bytes one can read without touching the 1st raw
* HWPOISON subpage.
*
* The implementation borrows the iteration logic from copy_page_to_iter*.
*/
static size_t adjust_range_hwpoison(struct page *page, size_t offset, size_t bytes)
{
size_t n = 0;
size_t res = 0;
/* First subpage to start the loop. */
page += offset / PAGE_SIZE;
offset %= PAGE_SIZE;
while (1) {
if (is_raw_hwpoison_page_in_hugepage(page))
break;
/* Safe to read n bytes without touching HWPOISON subpage. */
n = min(bytes, (size_t)PAGE_SIZE - offset);
res += n;
bytes -= n;
if (!bytes || !n)
break;
offset += n;
if (offset == PAGE_SIZE) {
page++;
offset = 0;
}
}
return res;
}
/* /*
* Support for read() - Find the page attached to f_mapping and copy out the * Support for read() - Find the page attached to f_mapping and copy out the
* data. This provides functionality similar to filemap_read(). * data. This provides functionality similar to filemap_read().
...@@ -300,7 +335,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) ...@@ -300,7 +335,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
while (iov_iter_count(to)) { while (iov_iter_count(to)) {
struct page *page; struct page *page;
size_t nr, copied; size_t nr, copied, want;
/* nr is the maximum number of bytes to copy from this page */ /* nr is the maximum number of bytes to copy from this page */
nr = huge_page_size(h); nr = huge_page_size(h);
...@@ -328,16 +363,26 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) ...@@ -328,16 +363,26 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
} else { } else {
unlock_page(page); unlock_page(page);
if (PageHWPoison(page)) { if (!PageHWPoison(page))
want = nr;
else {
/*
* Adjust how many bytes safe to read without
* touching the 1st raw HWPOISON subpage after
* offset.
*/
want = adjust_range_hwpoison(page, offset, nr);
if (want == 0) {
put_page(page); put_page(page);
retval = -EIO; retval = -EIO;
break; break;
} }
}
/* /*
* We have the page, copy it to user space buffer. * We have the page, copy it to user space buffer.
*/ */
copied = copy_page_to_iter(page, offset, nr, to); copied = copy_page_to_iter(page, offset, want, to);
put_page(page); put_page(page);
} }
offset += copied; offset += copied;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment