Commit b6316429 authored by Josef Bacik's avatar Josef Bacik Committed by Chris Mason

Btrfs: force a page fault if we have a shorty copy on a page boundary

A user reported a problem where ceph was getting into 100% cpu usage while doing
some writing.  It turns out it's because we were doing a short write on a not
uptodate page, which means we'd fall back at one page at a time and fault the
page in.  The problem is our position is on the page boundary, so our fault in
logic wasn't actually reading the page, so we'd just spin forever or until the
page got read in by somebody else.  This will force a readpage if we end up
doing a short copy.  Alexandre could reproduce this easily with ceph and reports
it fixes his problem.  I also wrote a reproducer that no longer hangs my box
with this patch.  Thanks,
Reported-and-tested-by: default avatarAlexandre Oliva <aoliva@redhat.com>
Signed-off-by: default avatarJosef Bacik <josef@redhat.com>
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent b6f3409b
...@@ -1036,11 +1036,13 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, ...@@ -1036,11 +1036,13 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
* on error we return an unlocked page and the error value * on error we return an unlocked page and the error value
* on success we return a locked page and 0 * on success we return a locked page and 0
*/ */
static int prepare_uptodate_page(struct page *page, u64 pos) static int prepare_uptodate_page(struct page *page, u64 pos,
bool force_uptodate)
{ {
int ret = 0; int ret = 0;
if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) { if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) &&
!PageUptodate(page)) {
ret = btrfs_readpage(NULL, page); ret = btrfs_readpage(NULL, page);
if (ret) if (ret)
return ret; return ret;
...@@ -1061,7 +1063,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos) ...@@ -1061,7 +1063,7 @@ static int prepare_uptodate_page(struct page *page, u64 pos)
static noinline int prepare_pages(struct btrfs_root *root, struct file *file, static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
struct page **pages, size_t num_pages, struct page **pages, size_t num_pages,
loff_t pos, unsigned long first_index, loff_t pos, unsigned long first_index,
size_t write_bytes) size_t write_bytes, bool force_uptodate)
{ {
struct extent_state *cached_state = NULL; struct extent_state *cached_state = NULL;
int i; int i;
...@@ -1086,10 +1088,11 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, ...@@ -1086,10 +1088,11 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
} }
if (i == 0) if (i == 0)
err = prepare_uptodate_page(pages[i], pos); err = prepare_uptodate_page(pages[i], pos,
force_uptodate);
if (i == num_pages - 1) if (i == num_pages - 1)
err = prepare_uptodate_page(pages[i], err = prepare_uptodate_page(pages[i],
pos + write_bytes); pos + write_bytes, false);
if (err) { if (err) {
page_cache_release(pages[i]); page_cache_release(pages[i]);
faili = i - 1; faili = i - 1;
...@@ -1158,6 +1161,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, ...@@ -1158,6 +1161,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
size_t num_written = 0; size_t num_written = 0;
int nrptrs; int nrptrs;
int ret = 0; int ret = 0;
bool force_page_uptodate = false;
nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) / nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
...@@ -1200,7 +1204,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, ...@@ -1200,7 +1204,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
* contents of pages from loop to loop * contents of pages from loop to loop
*/ */
ret = prepare_pages(root, file, pages, num_pages, ret = prepare_pages(root, file, pages, num_pages,
pos, first_index, write_bytes); pos, first_index, write_bytes,
force_page_uptodate);
if (ret) { if (ret) {
btrfs_delalloc_release_space(inode, btrfs_delalloc_release_space(inode,
num_pages << PAGE_CACHE_SHIFT); num_pages << PAGE_CACHE_SHIFT);
...@@ -1217,12 +1222,15 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, ...@@ -1217,12 +1222,15 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
if (copied < write_bytes) if (copied < write_bytes)
nrptrs = 1; nrptrs = 1;
if (copied == 0) if (copied == 0) {
force_page_uptodate = true;
dirty_pages = 0; dirty_pages = 0;
else } else {
force_page_uptodate = false;
dirty_pages = (copied + offset + dirty_pages = (copied + offset +
PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT; PAGE_CACHE_SHIFT;
}
/* /*
* If we had a short copy we need to release the excess delaloc * If we had a short copy we need to release the excess delaloc
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment