Commit 9d645db8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.8-part2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "This reverts the direct io port to iomap infrastructure of btrfs
  merged in the first pull request. We found problems in invalidate page
  that don't seem to be fixable as regressions or without changing iomap
  code that would not affect other filesystems.

  There are four reverts in total, but three of them are followup
  cleanups needed to revert a43a67a2 cleanly. The result is the
  buffer head based implementation of direct io.

  Reverts are not great, but under current circumstances I don't see
  better options"

* tag 'for-5.8-part2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  Revert "btrfs: switch to iomap_dio_rw() for dio"
  Revert "fs: remove dio_end_io()"
  Revert "btrfs: remove BTRFS_INODE_READDIO_NEED_LOCK"
  Revert "btrfs: split btrfs_direct_IO to read and write part"
parents 96144c58 55e20bd1
......@@ -14,7 +14,6 @@ config BTRFS_FS
select LZO_DECOMPRESS
select ZSTD_COMPRESS
select ZSTD_DECOMPRESS
select FS_IOMAP
select RAID6_PQ
select XOR_BLOCKS
select SRCU
......
......@@ -28,6 +28,7 @@ enum {
BTRFS_INODE_NEEDS_FULL_SYNC,
BTRFS_INODE_COPY_EVERYTHING,
BTRFS_INODE_IN_DELALLOC_LIST,
BTRFS_INODE_READDIO_NEED_LOCK,
BTRFS_INODE_HAS_PROPS,
BTRFS_INODE_SNAPSHOT_FLUSH,
};
......@@ -312,6 +313,23 @@ struct btrfs_dio_private {
u8 csums[];
};
/*
* Disable DIO read nolock optimization, so new dio readers will be forced
* to grab i_mutex. It is used to avoid the endless truncate due to
* nonlocked dio read.
*/
static inline void btrfs_inode_block_unlocked_dio(struct btrfs_inode *inode)
{
set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
smp_mb();
}
static inline void btrfs_inode_resume_unlocked_dio(struct btrfs_inode *inode)
{
smp_mb__before_atomic();
clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, &inode->runtime_flags);
}
/* Array of bytes with variable length, hexadecimal format 0x1234 */
#define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes
......
......@@ -28,7 +28,6 @@
#include <linux/dynamic_debug.h>
#include <linux/refcount.h>
#include <linux/crc32c.h>
#include <linux/iomap.h>
#include "extent-io-tree.h"
#include "extent_io.h"
#include "extent_map.h"
......@@ -2934,9 +2933,6 @@ int btrfs_writepage_cow_fixup(struct page *page, u64 start, u64 end);
void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
u64 end, int uptodate);
extern const struct dentry_operations btrfs_dentry_operations;
ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
extern const struct iomap_ops btrfs_dio_iomap_ops;
extern const struct iomap_dio_ops btrfs_dops;
/* ioctl.c */
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
......
......@@ -1809,61 +1809,21 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
return num_written ? num_written : ret;
}
static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
const struct iov_iter *iter, loff_t offset)
{
const unsigned int blocksize_mask = fs_info->sectorsize - 1;
if (offset & blocksize_mask)
return -EINVAL;
if (iov_iter_alignment(iter) & blocksize_mask)
return -EINVAL;
return 0;
}
static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
static ssize_t __btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
loff_t pos = iocb->ki_pos;
ssize_t written = 0;
loff_t pos;
ssize_t written;
ssize_t written_buffered;
loff_t endbyte;
int err;
size_t count = 0;
bool relock = false;
if (check_direct_IO(fs_info, from, pos))
goto buffered;
count = iov_iter_count(from);
/*
* If the write DIO is beyond the EOF, we need update the isize, but it
* is protected by i_mutex. So we can not unlock the i_mutex at this
* case.
*/
if (pos + count <= inode->i_size) {
inode_unlock(inode);
relock = true;
} else if (iocb->ki_flags & IOCB_NOWAIT) {
return -EAGAIN;
}
down_read(&BTRFS_I(inode)->dio_sem);
written = iomap_dio_rw(iocb, from, &btrfs_dio_iomap_ops, &btrfs_dops,
is_sync_kiocb(iocb));
up_read(&BTRFS_I(inode)->dio_sem);
if (relock)
inode_lock(inode);
written = generic_file_direct_write(iocb, from);
if (written < 0 || !iov_iter_count(from))
return written;
buffered:
pos = iocb->ki_pos;
written_buffered = btrfs_buffered_write(iocb, from);
if (written_buffered < 0) {
......@@ -2002,7 +1962,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
atomic_inc(&BTRFS_I(inode)->sync_writers);
if (iocb->ki_flags & IOCB_DIRECT) {
num_written = btrfs_direct_write(iocb, from);
num_written = __btrfs_direct_write(iocb, from);
} else {
num_written = btrfs_buffered_write(iocb, from);
if (num_written > 0)
......@@ -3516,54 +3476,9 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
return generic_file_open(inode, filp);
}
static int check_direct_read(struct btrfs_fs_info *fs_info,
const struct iov_iter *iter, loff_t offset)
{
int ret;
int i, seg;
ret = check_direct_IO(fs_info, iter, offset);
if (ret < 0)
return ret;
for (seg = 0; seg < iter->nr_segs; seg++)
for (i = seg + 1; i < iter->nr_segs; i++)
if (iter->iov[seg].iov_base == iter->iov[i].iov_base)
return -EINVAL;
return 0;
}
static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
if (check_direct_read(btrfs_sb(inode->i_sb), to, iocb->ki_pos))
return 0;
inode_lock_shared(inode);
ret = iomap_dio_rw(iocb, to, &btrfs_dio_iomap_ops, &btrfs_dops,
is_sync_kiocb(iocb));
inode_unlock_shared(inode);
return ret;
}
static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
ssize_t ret = 0;
if (iocb->ki_flags & IOCB_DIRECT) {
ret = btrfs_direct_read(iocb, to);
if (ret < 0)
return ret;
}
return generic_file_buffered_read(iocb, to, ret);
}
const struct file_operations btrfs_file_operations = {
.llseek = btrfs_file_llseek,
.read_iter = btrfs_file_read_iter,
.read_iter = generic_file_read_iter,
.splice_read = generic_file_splice_read,
.write_iter = btrfs_file_write_iter,
.mmap = btrfs_file_mmap,
......
This diff is collapsed.
......@@ -386,6 +386,25 @@ static void dio_bio_end_io(struct bio *bio)
spin_unlock_irqrestore(&dio->bio_lock, flags);
}
/**
* dio_end_io - handle the end io action for the given bio
* @bio: The direct io bio thats being completed
*
* This is meant to be called by any filesystem that uses their own dio_submit_t
* so that the DIO specific endio actions are dealt with after the filesystem
* has done it's completion work.
*/
void dio_end_io(struct bio *bio)
{
struct dio *dio = bio->bi_private;
if (dio->is_async)
dio_bio_end_aio(bio);
else
dio_bio_end_io(bio);
}
EXPORT_SYMBOL_GPL(dio_end_io);
static inline void
dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
struct block_device *bdev,
......
......@@ -3204,6 +3204,8 @@ enum {
DIO_SKIP_HOLES = 0x02,
};
void dio_end_io(struct bio *bio);
ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, struct iov_iter *iter,
get_block_t get_block,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment