Commit 9e736cf7 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'netfs-fixes-20210621' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs

Pull netfs fixes from David Howells:
 "This contains patches to fix netfs_write_begin() and afs_write_end()
  in the following ways:

  (1) In netfs_write_begin(), extract the decision about whether to skip
      a page out to its own helper and have that clear around the region
      to be written, but not clear that region. This requires the
      filesystem to patch it up afterwards if the hole doesn't get
      completely filled.

  (2) Use offset_in_thp() in (1) rather than manually calculating the
      offset into the page.

  (3) Due to (1), afs_write_end() now needs to handle short data write
      into the page by generic_perform_write(). I've adopted an
      analogous approach to ceph of just returning 0 in this case and
      letting the caller go round again.

  It also adds a note that (in the future) the len parameter may extend
  beyond the page allocated. This is because the page allocation is
  deferred to write_begin() and that gets to decide what size of THP to
  allocate."

Jeff Layton points out:
 "The netfs fix in particular fixes a data corruption bug in cephfs"

* tag 'netfs-fixes-20210621' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs:
  netfs: fix test for whether we can skip read when writing beyond EOF
  afs: Fix afs_write_end() to handle short writes
parents c13e3021 827a746f
...@@ -118,6 +118,15 @@ int afs_write_end(struct file *file, struct address_space *mapping, ...@@ -118,6 +118,15 @@ int afs_write_end(struct file *file, struct address_space *mapping,
_enter("{%llx:%llu},{%lx}", _enter("{%llx:%llu},{%lx}",
vnode->fid.vid, vnode->fid.vnode, page->index); vnode->fid.vid, vnode->fid.vnode, page->index);
if (!PageUptodate(page)) {
if (copied < len) {
copied = 0;
goto out;
}
SetPageUptodate(page);
}
if (copied == 0) if (copied == 0)
goto out; goto out;
...@@ -132,8 +141,6 @@ int afs_write_end(struct file *file, struct address_space *mapping, ...@@ -132,8 +141,6 @@ int afs_write_end(struct file *file, struct address_space *mapping,
write_sequnlock(&vnode->cb_lock); write_sequnlock(&vnode->cb_lock);
} }
ASSERT(PageUptodate(page));
if (PagePrivate(page)) { if (PagePrivate(page)) {
priv = page_private(page); priv = page_private(page);
f = afs_page_dirty_from(page, priv); f = afs_page_dirty_from(page, priv);
......
...@@ -1011,12 +1011,42 @@ int netfs_readpage(struct file *file, ...@@ -1011,12 +1011,42 @@ int netfs_readpage(struct file *file,
} }
EXPORT_SYMBOL(netfs_readpage); EXPORT_SYMBOL(netfs_readpage);
static void netfs_clear_thp(struct page *page) /**
* netfs_skip_page_read - prep a page for writing without reading first
* @page: page being prepared
* @pos: starting position for the write
* @len: length of write
*
* In some cases, write_begin doesn't need to read at all:
* - full page write
* - write that lies in a page that is completely beyond EOF
* - write that covers the the page from start to EOF or beyond it
*
* If any of these criteria are met, then zero out the unwritten parts
* of the page and return true. Otherwise, return false.
*/
static bool netfs_skip_page_read(struct page *page, loff_t pos, size_t len)
{ {
unsigned int i; struct inode *inode = page->mapping->host;
loff_t i_size = i_size_read(inode);
size_t offset = offset_in_thp(page, pos);
/* Full page write */
if (offset == 0 && len >= thp_size(page))
return true;
/* pos beyond last page in the file */
if (pos - offset >= i_size)
goto zero_out;
/* Write that covers from the start of the page to EOF or beyond */
if (offset == 0 && (pos + len) >= i_size)
goto zero_out;
for (i = 0; i < thp_nr_pages(page); i++) return false;
clear_highpage(page + i); zero_out:
zero_user_segments(page, 0, offset, offset + len, thp_size(page));
return true;
} }
/** /**
...@@ -1024,7 +1054,7 @@ static void netfs_clear_thp(struct page *page) ...@@ -1024,7 +1054,7 @@ static void netfs_clear_thp(struct page *page)
* @file: The file to read from * @file: The file to read from
* @mapping: The mapping to read from * @mapping: The mapping to read from
* @pos: File position at which the write will begin * @pos: File position at which the write will begin
* @len: The length of the write in this page * @len: The length of the write (may extend beyond the end of the page chosen)
* @flags: AOP_* flags * @flags: AOP_* flags
* @_page: Where to put the resultant page * @_page: Where to put the resultant page
* @_fsdata: Place for the netfs to store a cookie * @_fsdata: Place for the netfs to store a cookie
...@@ -1061,8 +1091,6 @@ int netfs_write_begin(struct file *file, struct address_space *mapping, ...@@ -1061,8 +1091,6 @@ int netfs_write_begin(struct file *file, struct address_space *mapping,
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
unsigned int debug_index = 0; unsigned int debug_index = 0;
pgoff_t index = pos >> PAGE_SHIFT; pgoff_t index = pos >> PAGE_SHIFT;
int pos_in_page = pos & ~PAGE_MASK;
loff_t size;
int ret; int ret;
DEFINE_READAHEAD(ractl, file, NULL, mapping, index); DEFINE_READAHEAD(ractl, file, NULL, mapping, index);
...@@ -1090,13 +1118,8 @@ int netfs_write_begin(struct file *file, struct address_space *mapping, ...@@ -1090,13 +1118,8 @@ int netfs_write_begin(struct file *file, struct address_space *mapping,
* within the cache granule containing the EOF, in which case we need * within the cache granule containing the EOF, in which case we need
* to preload the granule. * to preload the granule.
*/ */
size = i_size_read(inode);
if (!ops->is_cache_enabled(inode) && if (!ops->is_cache_enabled(inode) &&
((pos_in_page == 0 && len == thp_size(page)) || netfs_skip_page_read(page, pos, len)) {
(pos >= size) ||
(pos_in_page == 0 && (pos + len) >= size))) {
netfs_clear_thp(page);
SetPageUptodate(page);
netfs_stat(&netfs_n_rh_write_zskip); netfs_stat(&netfs_n_rh_write_zskip);
goto have_page_no_wait; goto have_page_no_wait;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment