Commit b47ec80b authored by Dave Chinner's avatar Dave Chinner

Merge branch 'xfs-4.8-split-dax-dio' into for-next

parents bbfeb614 16d4d435
......@@ -1303,7 +1303,7 @@ xfs_get_blocks_dax_fault(
* whereas if we have flags set we will always be called in task context
* (i.e. from a workqueue).
*/
STATIC int
int
xfs_end_io_direct_write(
struct kiocb *iocb,
loff_t offset,
......@@ -1374,24 +1374,10 @@ xfs_vm_direct_IO(
struct kiocb *iocb,
struct iov_iter *iter)
{
struct inode *inode = iocb->ki_filp->f_mapping->host;
dio_iodone_t *endio = NULL;
int flags = 0;
struct block_device *bdev;
if (iov_iter_rw(iter) == WRITE) {
endio = xfs_end_io_direct_write;
flags = DIO_ASYNC_EXTEND;
}
if (IS_DAX(inode)) {
return dax_do_io(iocb, inode, iter,
xfs_get_blocks_direct, endio, 0);
}
bdev = xfs_find_bdev_for_inode(inode);
return __blockdev_direct_IO(iocb, inode, bdev, iter,
xfs_get_blocks_direct, endio, NULL, flags);
/*
* We just need the method present so that open/fcntl allow direct I/O.
*/
return -EINVAL;
}
STATIC sector_t
......
......@@ -60,6 +60,9 @@ int xfs_get_blocks_direct(struct inode *inode, sector_t offset,
int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
struct buffer_head *map_bh, int create);
int xfs_end_io_direct_write(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private);
extern void xfs_count_page_state(struct page *, int *, int *);
extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
......
......@@ -239,48 +239,35 @@ xfs_file_fsync(
}
STATIC ssize_t
xfs_file_read_iter(
xfs_file_dio_aio_read(
struct kiocb *iocb,
struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
size_t size = iov_iter_count(to);
loff_t isize = i_size_read(inode);
size_t count = iov_iter_count(to);
struct iov_iter data;
struct xfs_buftarg *target;
ssize_t ret = 0;
int ioflags = 0;
xfs_fsize_t n;
loff_t pos = iocb->ki_pos;
XFS_STATS_INC(mp, xs_read_calls);
trace_xfs_file_direct_read(ip, count, iocb->ki_pos);
if (unlikely(iocb->ki_flags & IOCB_DIRECT))
ioflags |= XFS_IO_ISDIRECT;
if (file->f_mode & FMODE_NOCMTIME)
ioflags |= XFS_IO_INVIS;
if (!count)
return 0; /* skip atime */
if (XFS_IS_REALTIME_INODE(ip))
target = ip->i_mount->m_rtdev_targp;
else
target = ip->i_mount->m_ddev_targp;
if ((ioflags & XFS_IO_ISDIRECT) && !IS_DAX(inode)) {
xfs_buftarg_t *target =
XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
/* DIO must be aligned to device logical sector size */
if ((pos | size) & target->bt_logical_sectormask) {
if (pos == i_size_read(inode))
if ((iocb->ki_pos | count) & target->bt_logical_sectormask) {
if (iocb->ki_pos == isize)
return 0;
return -EINVAL;
}
}
n = mp->m_super->s_maxbytes - pos;
if (n <= 0 || size == 0)
return 0;
if (n < size)
size = n;
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
/*
* Locking is a bit tricky here. If we take an exclusive lock for direct
......@@ -293,7 +280,7 @@ xfs_file_read_iter(
* serialisation.
*/
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
if ((ioflags & XFS_IO_ISDIRECT) && inode->i_mapping->nrpages) {
if (mapping->nrpages) {
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
......@@ -308,8 +295,8 @@ xfs_file_read_iter(
* flush and reduce the chances of repeated iolock cycles going
* forward.
*/
if (inode->i_mapping->nrpages) {
ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
if (mapping->nrpages) {
ret = filemap_write_and_wait(mapping);
if (ret) {
xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
......@@ -320,20 +307,95 @@ xfs_file_read_iter(
* we fail to invalidate a page, but this should never
* happen on XFS. Warn if it does fail.
*/
ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
ret = invalidate_inode_pages2(mapping);
WARN_ON_ONCE(ret);
ret = 0;
}
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
}
trace_xfs_file_read(ip, size, pos, ioflags);
data = *to;
ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
xfs_get_blocks_direct, NULL, NULL, 0);
if (ret > 0) {
iocb->ki_pos += ret;
iov_iter_advance(to, ret);
}
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
file_accessed(iocb->ki_filp);
return ret;
}
STATIC ssize_t
xfs_file_dax_read(
struct kiocb *iocb,
struct iov_iter *to)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct iov_iter data = *to;
size_t count = iov_iter_count(to);
ssize_t ret = 0;
trace_xfs_file_dax_read(ip, count, iocb->ki_pos);
if (!count)
return 0; /* skip atime */
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct, NULL, 0);
if (ret > 0) {
iocb->ki_pos += ret;
iov_iter_advance(to, ret);
}
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
file_accessed(iocb->ki_filp);
return ret;
}
STATIC ssize_t
xfs_file_buffered_aio_read(
struct kiocb *iocb,
struct iov_iter *to)
{
struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp));
ssize_t ret;
trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos);
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
ret = generic_file_read_iter(iocb, to);
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
STATIC ssize_t
xfs_file_read_iter(
struct kiocb *iocb,
struct iov_iter *to)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct xfs_mount *mp = XFS_I(inode)->i_mount;
ssize_t ret = 0;
XFS_STATS_INC(mp, xs_read_calls);
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
if (IS_DAX(inode))
ret = xfs_file_dax_read(iocb, to);
else if (iocb->ki_flags & IOCB_DIRECT)
ret = xfs_file_dio_aio_read(iocb, to);
else
ret = xfs_file_buffered_aio_read(iocb, to);
if (ret > 0)
XFS_STATS_ADD(mp, xs_read_bytes, ret);
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
......@@ -346,18 +408,14 @@ xfs_file_splice_read(
unsigned int flags)
{
struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
int ioflags = 0;
ssize_t ret;
XFS_STATS_INC(ip->i_mount, xs_read_calls);
if (infilp->f_mode & FMODE_NOCMTIME)
ioflags |= XFS_IO_INVIS;
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
trace_xfs_file_splice_read(ip, count, *ppos);
/*
* DAX inodes cannot ues the page cache for splice, so we have to push
......@@ -553,8 +611,7 @@ xfs_file_dio_aio_write(
mp->m_rtdev_targp : mp->m_ddev_targp;
/* DIO must be aligned to device logical sector size */
if (!IS_DAX(inode) &&
((iocb->ki_pos | count) & target->bt_logical_sectormask))
if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
return -EINVAL;
/* "unaligned" here means not aligned to a filesystem block */
......@@ -593,7 +650,7 @@ xfs_file_dio_aio_write(
end = iocb->ki_pos + count - 1;
/*
* See xfs_file_read_iter() for why we do a full-file flush here.
* See xfs_file_dio_aio_read() for why we do a full-file flush here.
*/
if (mapping->nrpages) {
ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
......@@ -620,10 +677,12 @@ xfs_file_dio_aio_write(
iolock = XFS_IOLOCK_SHARED;
}
trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
data = *from;
ret = mapping->a_ops->direct_IO(iocb, &data);
ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
xfs_get_blocks_direct, xfs_end_io_direct_write,
NULL, DIO_ASYNC_EXTEND);
/* see generic_file_direct_write() for why this is necessary */
if (mapping->nrpages) {
......@@ -640,10 +699,70 @@ xfs_file_dio_aio_write(
xfs_rw_iunlock(ip, iolock);
/*
* No fallback to buffered IO on errors for XFS. DAX can result in
* partial writes, but direct IO will either complete fully or fail.
* No fallback to buffered IO on errors for XFS, direct IO will either
* complete fully or fail.
*/
ASSERT(ret < 0 || ret == count);
return ret;
}
STATIC ssize_t
xfs_file_dax_write(
struct kiocb *iocb,
struct iov_iter *from)
{
struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
ssize_t ret = 0;
int unaligned_io = 0;
int iolock;
struct iov_iter data;
/* "unaligned" here means not aligned to a filesystem block */
if ((iocb->ki_pos & mp->m_blockmask) ||
((iocb->ki_pos + iov_iter_count(from)) & mp->m_blockmask)) {
unaligned_io = 1;
iolock = XFS_IOLOCK_EXCL;
} else if (mapping->nrpages) {
iolock = XFS_IOLOCK_EXCL;
} else {
iolock = XFS_IOLOCK_SHARED;
}
xfs_rw_ilock(ip, iolock);
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
/*
* Yes, even DAX files can have page cache attached to them: A zeroed
* page is inserted into the pagecache when we have to serve a write
* fault on a hole. It should never be dirtied and can simply be
* dropped from the pagecache once we get real data for the page.
*/
ASSERT(ret < 0 || ret == count || IS_DAX(VFS_I(ip)));
if (mapping->nrpages) {
ret = invalidate_inode_pages2(mapping);
WARN_ON_ONCE(ret);
}
if (iolock == XFS_IOLOCK_EXCL && !unaligned_io) {
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
iolock = XFS_IOLOCK_SHARED;
}
trace_xfs_file_dax_write(ip, iov_iter_count(from), iocb->ki_pos);
data = *from;
ret = dax_do_io(iocb, inode, &data, xfs_get_blocks_direct,
xfs_end_io_direct_write, 0);
if (ret > 0) {
iocb->ki_pos += ret;
iov_iter_advance(from, ret);
}
out:
xfs_rw_iunlock(ip, iolock);
return ret;
}
......@@ -670,8 +789,7 @@ xfs_file_buffered_aio_write(
current->backing_dev_info = inode_to_bdi(inode);
write_retry:
trace_xfs_file_buffered_write(ip, iov_iter_count(from),
iocb->ki_pos, 0);
trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos);
ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
if (likely(ret >= 0))
iocb->ki_pos += ret;
......@@ -726,7 +844,9 @@ xfs_file_write_iter(
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
if (IS_DAX(inode))
ret = xfs_file_dax_write(iocb, from);
else if (iocb->ki_flags & IOCB_DIRECT)
ret = xfs_file_dio_aio_write(iocb, from);
else
ret = xfs_file_buffered_aio_write(iocb, from);
......
......@@ -473,14 +473,4 @@ do { \
extern struct kmem_zone *xfs_inode_zone;
/*
* Flags for read/write calls
*/
#define XFS_IO_ISDIRECT 0x00001 /* bypass page cache */
#define XFS_IO_INVIS 0x00002 /* don't update inode timestamps */
#define XFS_IO_FLAGS \
{ XFS_IO_ISDIRECT, "DIRECT" }, \
{ XFS_IO_INVIS, "INVIS"}
#endif /* __XFS_INODE_H__ */
......@@ -595,13 +595,12 @@ xfs_attrmulti_by_handle(
int
xfs_ioc_space(
struct xfs_inode *ip,
struct inode *inode,
struct file *filp,
int ioflags,
unsigned int cmd,
xfs_flock64_t *bf)
{
struct inode *inode = file_inode(filp);
struct xfs_inode *ip = XFS_I(inode);
struct iattr iattr;
enum xfs_prealloc_flags flags = 0;
uint iolock = XFS_IOLOCK_EXCL;
......@@ -626,7 +625,7 @@ xfs_ioc_space(
if (filp->f_flags & O_DSYNC)
flags |= XFS_PREALLOC_SYNC;
if (ioflags & XFS_IO_INVIS)
if (filp->f_mode & FMODE_NOCMTIME)
flags |= XFS_PREALLOC_INVISIBLE;
error = mnt_want_write_file(filp);
......@@ -1464,8 +1463,7 @@ xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
STATIC int
xfs_ioc_getbmap(
struct xfs_inode *ip,
int ioflags,
struct file *file,
unsigned int cmd,
void __user *arg)
{
......@@ -1479,10 +1477,10 @@ xfs_ioc_getbmap(
return -EINVAL;
bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
if (ioflags & XFS_IO_INVIS)
if (file->f_mode & FMODE_NOCMTIME)
bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
error = xfs_getbmap(XFS_I(file_inode(file)), &bmx, xfs_getbmap_format,
(__force struct getbmap *)arg+1);
if (error)
return error;
......@@ -1630,12 +1628,8 @@ xfs_file_ioctl(
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
void __user *arg = (void __user *)p;
int ioflags = 0;
int error;
if (filp->f_mode & FMODE_NOCMTIME)
ioflags |= XFS_IO_INVIS;
trace_xfs_file_ioctl(ip);
switch (cmd) {
......@@ -1654,7 +1648,7 @@ xfs_file_ioctl(
if (copy_from_user(&bf, arg, sizeof(bf)))
return -EFAULT;
return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
return xfs_ioc_space(filp, cmd, &bf);
}
case XFS_IOC_DIOINFO: {
struct dioattr da;
......@@ -1713,7 +1707,7 @@ xfs_file_ioctl(
case XFS_IOC_GETBMAP:
case XFS_IOC_GETBMAPA:
return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
return xfs_ioc_getbmap(filp, cmd, arg);
case XFS_IOC_GETBMAPX:
return xfs_ioc_getbmapx(ip, arg);
......
......@@ -20,10 +20,7 @@
extern int
xfs_ioc_space(
struct xfs_inode *ip,
struct inode *inode,
struct file *filp,
int ioflags,
unsigned int cmd,
xfs_flock64_t *bf);
......
......@@ -532,12 +532,8 @@ xfs_file_compat_ioctl(
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
void __user *arg = (void __user *)p;
int ioflags = 0;
int error;
if (filp->f_mode & FMODE_NOCMTIME)
ioflags |= XFS_IO_INVIS;
trace_xfs_file_compat_ioctl(ip);
switch (cmd) {
......@@ -589,7 +585,7 @@ xfs_file_compat_ioctl(
if (xfs_compat_flock64_copyin(&bf, arg))
return -EFAULT;
cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
return xfs_ioc_space(filp, cmd, &bf);
}
case XFS_IOC_FSGEOMETRY_V1_32:
return xfs_compat_ioc_fsgeometry_v1(mp, arg);
......
......@@ -1135,15 +1135,14 @@ TRACE_EVENT(xfs_log_assign_tail_lsn,
)
DECLARE_EVENT_CLASS(xfs_file_class,
TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
TP_ARGS(ip, count, offset, flags),
TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset),
TP_ARGS(ip, count, offset),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_fsize_t, size)
__field(loff_t, offset)
__field(size_t, count)
__field(int, flags)
),
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
......@@ -1151,25 +1150,25 @@ DECLARE_EVENT_CLASS(xfs_file_class,
__entry->size = ip->i_d.di_size;
__entry->offset = offset;
__entry->count = count;
__entry->flags = flags;
),
TP_printk("dev %d:%d ino 0x%llx size 0x%llx "
"offset 0x%llx count 0x%zx ioflags %s",
TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count 0x%zx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->size,
__entry->offset,
__entry->count,
__print_flags(__entry->flags, "|", XFS_IO_FLAGS))
__entry->count)
)
#define DEFINE_RW_EVENT(name) \
DEFINE_EVENT(xfs_file_class, name, \
TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
TP_ARGS(ip, count, offset, flags))
DEFINE_RW_EVENT(xfs_file_read);
TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset), \
TP_ARGS(ip, count, offset))
DEFINE_RW_EVENT(xfs_file_buffered_read);
DEFINE_RW_EVENT(xfs_file_direct_read);
DEFINE_RW_EVENT(xfs_file_dax_read);
DEFINE_RW_EVENT(xfs_file_buffered_write);
DEFINE_RW_EVENT(xfs_file_direct_write);
DEFINE_RW_EVENT(xfs_file_dax_write);
DEFINE_RW_EVENT(xfs_file_splice_read);
DECLARE_EVENT_CLASS(xfs_page_class,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment