Commit 37187df4 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'iomap-5.10-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull iomap updates from Darrick Wong:
 "There's not a lot of new stuff going on here -- a little bit of code
  refactoring to make iomap workable with btrfs' fsync locking model,
  cleanups in preparation for adding THP support for filesystems, and
  fixing a data corruption issue for blocksize < pagesize filesystems.

  Summary:

   - Don't WARN_ON weird states that unprivileged users can create.

   - Don't invalidate page cache when direct writes want to fall back to
     buffered.

   - Fix some problems when readahead ios fail.

   - Fix a problem where inline data pages weren't getting flushed
     during an unshare operation.

   - Rework iomap to support arbitrarily many blocks per page in
     preparation to support THP for the page cache.

   - Fix a bug in the blocksize < pagesize buffered io path where we
     could fail to initialize the many-blocks-per-page uptodate bitmap
     correctly when the backing page is actually up to date. This could
     cause us to forget to write out dirty pages.

   - Split out the generic_write_sync at the end of the directio write
     path so that btrfs can drop the inode lock before sync'ing the
     file.

   - Call inode_dio_end before trying to sync the file after a O_DSYNC
     direct write (instead of afterwards) to match the behavior of the
     old directio code"

* tag 'iomap-5.10-merge-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  iomap: Call inode_dio_end() before generic_write_sync()
  iomap: Allow filesystem to call iomap_dio_complete without i_rwsem
  iomap: Set all uptodate bits for an Uptodate page
  iomap: Change calling convention for zeroing
  iomap: Convert iomap_write_end types
  iomap: Convert write_count to write_bytes_pending
  iomap: Convert read_count to read_bytes_pending
  iomap: Support arbitrarily many blocks per page
  iomap: Use bitmap ops to set uptodate bits
  iomap: Use kzalloc to allocate iomap_page
  fs: Introduce i_blocks_per_page
  iomap: Fix misplaced page flushing
  iomap: Use round_down/round_up macros in __iomap_write_begin
  iomap: Mark read blocks uptodate in write_begin
  iomap: Clear page error before beginning a write
  iomap: Fix direct I/O write consistency check
  iomap: fix WARN_ON_ONCE() from unprivileged users
parents 531d29b0 1a31182e
...@@ -1037,18 +1037,18 @@ static vm_fault_t dax_load_hole(struct xa_state *xas, ...@@ -1037,18 +1037,18 @@ static vm_fault_t dax_load_hole(struct xa_state *xas,
return ret; return ret;
} }
int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size, s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap)
struct iomap *iomap)
{ {
sector_t sector = iomap_sector(iomap, pos & PAGE_MASK); sector_t sector = iomap_sector(iomap, pos & PAGE_MASK);
pgoff_t pgoff; pgoff_t pgoff;
long rc, id; long rc, id;
void *kaddr; void *kaddr;
bool page_aligned = false; bool page_aligned = false;
unsigned offset = offset_in_page(pos);
unsigned size = min_t(u64, PAGE_SIZE - offset, length);
if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) && if (IS_ALIGNED(sector << SECTOR_SHIFT, PAGE_SIZE) &&
IS_ALIGNED(size, PAGE_SIZE)) (size == PAGE_SIZE))
page_aligned = true; page_aligned = true;
rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff); rc = bdev_dax_pgoff(iomap->bdev, sector, PAGE_SIZE, &pgoff);
...@@ -1058,8 +1058,7 @@ int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size, ...@@ -1058,8 +1058,7 @@ int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
id = dax_read_lock(); id = dax_read_lock();
if (page_aligned) if (page_aligned)
rc = dax_zero_page_range(iomap->dax_dev, pgoff, rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1);
size >> PAGE_SHIFT);
else else
rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL); rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL);
if (rc < 0) { if (rc < 0) {
...@@ -1072,7 +1071,7 @@ int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size, ...@@ -1072,7 +1071,7 @@ int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size,
dax_flush(iomap->dax_dev, kaddr + offset, size); dax_flush(iomap->dax_dev, kaddr + offset, size);
} }
dax_read_unlock(id); dax_read_unlock(id);
return 0; return size;
} }
static loff_t static loff_t
......
This diff is collapsed.
...@@ -76,7 +76,7 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap, ...@@ -76,7 +76,7 @@ static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap,
dio->submit.cookie = submit_bio(bio); dio->submit.cookie = submit_bio(bio);
} }
static ssize_t iomap_dio_complete(struct iomap_dio *dio) ssize_t iomap_dio_complete(struct iomap_dio *dio)
{ {
const struct iomap_dio_ops *dops = dio->dops; const struct iomap_dio_ops *dops = dio->dops;
struct kiocb *iocb = dio->iocb; struct kiocb *iocb = dio->iocb;
...@@ -108,7 +108,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) ...@@ -108,7 +108,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
* ->end_io() when necessary, otherwise a racing buffer read would cache * ->end_io() when necessary, otherwise a racing buffer read would cache
* zeros from unwritten extents. * zeros from unwritten extents.
*/ */
if (!dio->error && if (!dio->error && dio->size &&
(dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
int err; int err;
err = invalidate_inode_pages2_range(inode->i_mapping, err = invalidate_inode_pages2_range(inode->i_mapping,
...@@ -118,6 +118,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) ...@@ -118,6 +118,7 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
dio_warn_stale_pagecache(iocb->ki_filp); dio_warn_stale_pagecache(iocb->ki_filp);
} }
inode_dio_end(file_inode(iocb->ki_filp));
/* /*
* If this is a DSYNC write, make sure we push it to stable storage now * If this is a DSYNC write, make sure we push it to stable storage now
* that we've written data. * that we've written data.
...@@ -125,11 +126,11 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) ...@@ -125,11 +126,11 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC)) if (ret > 0 && (dio->flags & IOMAP_DIO_NEED_SYNC))
ret = generic_write_sync(iocb, ret); ret = generic_write_sync(iocb, ret);
inode_dio_end(file_inode(iocb->ki_filp));
kfree(dio); kfree(dio);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(iomap_dio_complete);
static void iomap_dio_complete_work(struct work_struct *work) static void iomap_dio_complete_work(struct work_struct *work)
{ {
...@@ -388,6 +389,16 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, ...@@ -388,6 +389,16 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
return iomap_dio_bio_actor(inode, pos, length, dio, iomap); return iomap_dio_bio_actor(inode, pos, length, dio, iomap);
case IOMAP_INLINE: case IOMAP_INLINE:
return iomap_dio_inline_actor(inode, pos, length, dio, iomap); return iomap_dio_inline_actor(inode, pos, length, dio, iomap);
case IOMAP_DELALLOC:
/*
* DIO is not serialised against mmap() access at all, and so
* if the page_mkwrite occurs between the writeback and the
* iomap_apply() call in the DIO path, then it will see the
* DELALLOC block that the page-mkwrite allocated.
*/
pr_warn_ratelimited("Direct I/O collision with buffered writes! File: %pD4 Comm: %.20s\n",
dio->iocb->ki_filp, current->comm);
return -EIO;
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
return -EIO; return -EIO;
...@@ -406,8 +417,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, ...@@ -406,8 +417,8 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
* Returns -ENOTBLK In case of a page invalidation invalidation failure for * Returns -ENOTBLK In case of a page invalidation invalidation failure for
* writes. The callers needs to fall back to buffered I/O in this case. * writes. The callers needs to fall back to buffered I/O in this case.
*/ */
ssize_t struct iomap_dio *
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops, const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
bool wait_for_completion) bool wait_for_completion)
{ {
...@@ -421,14 +432,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -421,14 +432,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
struct iomap_dio *dio; struct iomap_dio *dio;
if (!count) if (!count)
return 0; return NULL;
if (WARN_ON(is_sync_kiocb(iocb) && !wait_for_completion)) if (WARN_ON(is_sync_kiocb(iocb) && !wait_for_completion))
return -EIO; return ERR_PTR(-EIO);
dio = kmalloc(sizeof(*dio), GFP_KERNEL); dio = kmalloc(sizeof(*dio), GFP_KERNEL);
if (!dio) if (!dio)
return -ENOMEM; return ERR_PTR(-ENOMEM);
dio->iocb = iocb; dio->iocb = iocb;
atomic_set(&dio->ref, 1); atomic_set(&dio->ref, 1);
...@@ -558,7 +569,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -558,7 +569,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio->wait_for_completion = wait_for_completion; dio->wait_for_completion = wait_for_completion;
if (!atomic_dec_and_test(&dio->ref)) { if (!atomic_dec_and_test(&dio->ref)) {
if (!wait_for_completion) if (!wait_for_completion)
return -EIOCBQUEUED; return ERR_PTR(-EIOCBQUEUED);
for (;;) { for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE); set_current_state(TASK_UNINTERRUPTIBLE);
...@@ -574,10 +585,26 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ...@@ -574,10 +585,26 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
__set_current_state(TASK_RUNNING); __set_current_state(TASK_RUNNING);
} }
return iomap_dio_complete(dio); return dio;
out_free_dio: out_free_dio:
kfree(dio); kfree(dio);
return ret; if (ret)
return ERR_PTR(ret);
return NULL;
}
EXPORT_SYMBOL_GPL(__iomap_dio_rw);
ssize_t
iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
bool wait_for_completion)
{
struct iomap_dio *dio;
dio = __iomap_dio_rw(iocb, iter, ops, dops, wait_for_completion);
if (IS_ERR_OR_NULL(dio))
return PTR_ERR_OR_ZERO(dio);
return iomap_dio_complete(dio);
} }
EXPORT_SYMBOL_GPL(iomap_dio_rw); EXPORT_SYMBOL_GPL(iomap_dio_rw);
...@@ -473,7 +473,7 @@ static int metapage_readpage(struct file *fp, struct page *page) ...@@ -473,7 +473,7 @@ static int metapage_readpage(struct file *fp, struct page *page)
struct inode *inode = page->mapping->host; struct inode *inode = page->mapping->host;
struct bio *bio = NULL; struct bio *bio = NULL;
int block_offset; int block_offset;
int blocks_per_page = PAGE_SIZE >> inode->i_blkbits; int blocks_per_page = i_blocks_per_page(inode, page);
sector_t page_start; /* address of page in fs blocks */ sector_t page_start; /* address of page in fs blocks */
sector_t pblock; sector_t pblock;
int xlen; int xlen;
......
...@@ -544,7 +544,7 @@ xfs_discard_page( ...@@ -544,7 +544,7 @@ xfs_discard_page(
page, ip->i_ino, offset); page, ip->i_ino, offset);
error = xfs_bmap_punch_delalloc_range(ip, start_fsb, error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
PAGE_SIZE / i_blocksize(inode)); i_blocks_per_page(inode, page));
if (error && !XFS_FORCED_SHUTDOWN(mp)) if (error && !XFS_FORCED_SHUTDOWN(mp))
xfs_alert(mp, "page discard unable to remove delalloc mapping."); xfs_alert(mp, "page discard unable to remove delalloc mapping.");
out_invalidate: out_invalidate:
......
...@@ -231,8 +231,7 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, ...@@ -231,8 +231,7 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf,
int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
int dax_invalidate_mapping_entry_sync(struct address_space *mapping, int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
pgoff_t index); pgoff_t index);
int dax_iomap_zero(loff_t pos, unsigned offset, unsigned size, s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap);
struct iomap *iomap);
static inline bool dax_mapping(struct address_space *mapping) static inline bool dax_mapping(struct address_space *mapping)
{ {
return mapping->host && IS_DAX(mapping->host); return mapping->host && IS_DAX(mapping->host);
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
struct address_space; struct address_space;
struct fiemap_extent_info; struct fiemap_extent_info;
struct inode; struct inode;
struct iomap_dio;
struct iomap_writepage_ctx; struct iomap_writepage_ctx;
struct iov_iter; struct iov_iter;
struct kiocb; struct kiocb;
...@@ -258,6 +259,10 @@ struct iomap_dio_ops { ...@@ -258,6 +259,10 @@ struct iomap_dio_ops {
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops, const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
bool wait_for_completion); bool wait_for_completion);
struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
bool wait_for_completion);
ssize_t iomap_dio_complete(struct iomap_dio *dio);
int iomap_dio_iopoll(struct kiocb *kiocb, bool spin); int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
#ifdef CONFIG_SWAP #ifdef CONFIG_SWAP
......
...@@ -927,4 +927,20 @@ static inline int page_mkwrite_check_truncate(struct page *page, ...@@ -927,4 +927,20 @@ static inline int page_mkwrite_check_truncate(struct page *page,
return offset; return offset;
} }
/**
* i_blocks_per_page - How many blocks fit in this page.
* @inode: The inode which contains the blocks.
* @page: The page (head page if the page is a THP).
*
* If the block size is larger than the size of this page, return zero.
*
* Context: The caller should hold a refcount on the page to prevent it
* from being split.
* Return: The number of filesystem blocks covered by this page.
*/
static inline
unsigned int i_blocks_per_page(struct inode *inode, struct page *page)
{
return thp_size(page) >> inode->i_blkbits;
}
#endif /* _LINUX_PAGEMAP_H */ #endif /* _LINUX_PAGEMAP_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment