Commit 50964d31 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Nathan Scott

[XFS] Rework parts of the write path so that when a direct write

needs to fallback to buffered in the generic code, we are able to
relock the XFS inode correctly.
Signed-off-by: default avatarNathan Scott <nathans@sgi.com>
parent 31316be2
...@@ -116,17 +116,10 @@ __linvfs_write( ...@@ -116,17 +116,10 @@ __linvfs_write(
ssize_t rval; ssize_t rval;
BUG_ON(iocb->ki_pos != pos); BUG_ON(iocb->ki_pos != pos);
if (unlikely(file->f_flags & O_DIRECT)) { if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT; ioflags |= IO_ISDIRECT;
VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos,
ioflags, NULL, rval);
} else {
down(&inode->i_sem);
VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos,
ioflags, NULL, rval);
up(&inode->i_sem);
}
VOP_WRITE(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL, rval);
return rval; return rval;
} }
...@@ -214,17 +207,10 @@ __linvfs_writev( ...@@ -214,17 +207,10 @@ __linvfs_writev(
init_sync_kiocb(&kiocb, file); init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = *ppos; kiocb.ki_pos = *ppos;
if (unlikely(file->f_flags & O_DIRECT)) { if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT; ioflags |= IO_ISDIRECT;
VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos,
ioflags, NULL, rval);
} else {
down(&inode->i_sem);
VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos,
ioflags, NULL, rval);
up(&inode->i_sem);
}
VOP_WRITE(vp, &kiocb, iov, nr_segs, &kiocb.ki_pos, ioflags, NULL, rval);
if (rval == -EIOCBQUEUED) if (rval == -EIOCBQUEUED)
rval = wait_on_sync_kiocb(&kiocb); rval = wait_on_sync_kiocb(&kiocb);
......
...@@ -225,40 +225,11 @@ xfs_inval_cached_pages( ...@@ -225,40 +225,11 @@ xfs_inval_cached_pages(
int write, int write,
int relock) int relock)
{ {
xfs_mount_t *mp; if (VN_CACHED(vp)) {
xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1);
if (!VN_CACHED(vp)) { VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED);
return;
}
mp = io->io_mount;
/*
* We need to get the I/O lock exclusively in order
* to safely invalidate pages and mappings.
*/
if (relock) {
XFS_IUNLOCK(mp, io, XFS_IOLOCK_SHARED);
XFS_ILOCK(mp, io, XFS_IOLOCK_EXCL);
} }
/* Writing beyond EOF creates a hole that must be zeroed */
if (write && (offset > XFS_SIZE(mp, io))) {
xfs_fsize_t isize;
XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
isize = XFS_SIZE(mp, io);
if (offset > isize) {
xfs_zero_eof(vp, io, offset, isize, offset);
}
XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
}
xfs_inval_cached_trace(io, offset, -1, ctooff(offtoct(offset)), -1);
VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(offset)), -1, FI_REMAPF_LOCKED);
if (relock) {
XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);
}
} }
ssize_t /* bytes read, or (-) error */ ssize_t /* bytes read, or (-) error */
...@@ -637,32 +608,34 @@ xfs_write( ...@@ -637,32 +608,34 @@ xfs_write(
bhv_desc_t *bdp, bhv_desc_t *bdp,
struct kiocb *iocb, struct kiocb *iocb,
const struct iovec *iovp, const struct iovec *iovp,
unsigned int segs, unsigned int nsegs,
loff_t *offset, loff_t *offset,
int ioflags, int ioflags,
cred_t *credp) cred_t *credp)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
size_t size = 0; struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
unsigned long segs = nsegs;
xfs_inode_t *xip; xfs_inode_t *xip;
xfs_mount_t *mp; xfs_mount_t *mp;
ssize_t ret; ssize_t ret = 0, error = 0;
int error = 0;
xfs_fsize_t isize, new_size; xfs_fsize_t isize, new_size;
xfs_fsize_t n, limit;
xfs_iocore_t *io; xfs_iocore_t *io;
vnode_t *vp; vnode_t *vp;
unsigned long seg; unsigned long seg;
int iolock; int iolock;
int eventsent = 0; int eventsent = 0;
vrwlock_t locktype; vrwlock_t locktype;
size_t ocount = 0, count;
loff_t pos;
int need_isem = 1, need_flush = 0;
XFS_STATS_INC(xs_write_calls); XFS_STATS_INC(xs_write_calls);
vp = BHV_TO_VNODE(bdp); vp = BHV_TO_VNODE(bdp);
xip = XFS_BHVTOI(bdp); xip = XFS_BHVTOI(bdp);
/* START copy & waste from filemap.c */
for (seg = 0; seg < segs; seg++) { for (seg = 0; seg < segs; seg++) {
const struct iovec *iv = &iovp[seg]; const struct iovec *iv = &iovp[seg];
...@@ -670,73 +643,90 @@ xfs_write( ...@@ -670,73 +643,90 @@ xfs_write(
* If any segment has a negative length, or the cumulative * If any segment has a negative length, or the cumulative
* length ever wraps negative then return -EINVAL. * length ever wraps negative then return -EINVAL.
*/ */
size += iv->iov_len; ocount += iv->iov_len;
if (unlikely((ssize_t)(size|iv->iov_len) < 0)) if (unlikely((ssize_t)(ocount|iv->iov_len) < 0))
return XFS_ERROR(-EINVAL); return -EINVAL;
if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len))
continue;
if (seg == 0)
return -EFAULT;
segs = seg;
ocount -= iv->iov_len; /* This segment is no good */
break;
} }
/* END copy & waste from filemap.c */
if (size == 0) count = ocount;
pos = *offset;
if (count == 0)
return 0; return 0;
io = &xip->i_iocore; io = &xip->i_iocore;
mp = io->io_mount; mp = io->io_mount;
if (XFS_FORCED_SHUTDOWN(mp)) { if (XFS_FORCED_SHUTDOWN(mp))
return -EIO; return -EIO;
}
if (ioflags & IO_ISDIRECT) { if (ioflags & IO_ISDIRECT) {
xfs_buftarg_t *target = xfs_buftarg_t *target =
(xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ? (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
mp->m_rtdev_targp : mp->m_ddev_targp; mp->m_rtdev_targp : mp->m_ddev_targp;
if ((*offset & target->pbr_smask) || if ((pos & target->pbr_smask) || (count & target->pbr_smask))
(size & target->pbr_smask)) {
return XFS_ERROR(-EINVAL); return XFS_ERROR(-EINVAL);
}
iolock = XFS_IOLOCK_SHARED; if (!VN_CACHED(vp) && pos < i_size_read(inode))
locktype = VRWLOCK_WRITE_DIRECT; need_isem = 0;
} else {
if (VN_CACHED(vp))
need_flush = 1;
}
relock:
if (need_isem) {
iolock = XFS_IOLOCK_EXCL; iolock = XFS_IOLOCK_EXCL;
locktype = VRWLOCK_WRITE; locktype = VRWLOCK_WRITE;
down(&inode->i_sem);
} else {
iolock = XFS_IOLOCK_SHARED;
locktype = VRWLOCK_WRITE_DIRECT;
} }
xfs_ilock(xip, XFS_ILOCK_EXCL|iolock); xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
isize = xip->i_d.di_size; isize = i_size_read(inode);
limit = XFS_MAXIOFFSET(mp);
if (file->f_flags & O_APPEND) if (file->f_flags & O_APPEND)
*offset = isize; *offset = isize;
start: start:
n = limit - *offset; error = -generic_write_checks(file, &pos, &count,
if (n <= 0) { S_ISBLK(inode->i_mode));
if (error) {
xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
return -EFBIG; goto out_unlock_isem;
} }
if (n < size) new_size = pos + count;
size = n; if (new_size > isize)
new_size = *offset + size;
if (new_size > isize) {
io->io_new_size = new_size; io->io_new_size = new_size;
}
if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) && if ((DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_WRITE) &&
!(ioflags & IO_INVIS) && !eventsent)) { !(ioflags & IO_INVIS) && !eventsent)) {
loff_t savedsize = *offset; loff_t savedsize = pos;
int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags); int dmflags = FILP_DELAY_FLAG(file);
if (need_isem)
dmflags |= DM_FLAGS_ISEM;
xfs_iunlock(xip, XFS_ILOCK_EXCL); xfs_iunlock(xip, XFS_ILOCK_EXCL);
error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp, error = XFS_SEND_DATA(xip->i_mount, DM_EVENT_WRITE, vp,
*offset, size, pos, count,
dmflags, &locktype); dmflags, &locktype);
if (error) { if (error) {
xfs_iunlock(xip, iolock); xfs_iunlock(xip, iolock);
return -error; goto out_unlock_isem;
} }
xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_ilock(xip, XFS_ILOCK_EXCL);
eventsent = 1; eventsent = 1;
...@@ -748,9 +738,8 @@ xfs_write( ...@@ -748,9 +738,8 @@ xfs_write(
* event prevents another call to XFS_SEND_DATA, which is * event prevents another call to XFS_SEND_DATA, which is
* what allows the size to change in the first place. * what allows the size to change in the first place.
*/ */
if ((file->f_flags & O_APPEND) && if ((file->f_flags & O_APPEND) && savedsize != isize) {
savedsize != xip->i_d.di_size) { pos = isize = xip->i_d.di_size;
*offset = isize = xip->i_d.di_size;
goto start; goto start;
} }
} }
...@@ -761,8 +750,10 @@ xfs_write( ...@@ -761,8 +750,10 @@ xfs_write(
* *
* We must update xfs' times since revalidate will overcopy xfs. * We must update xfs' times since revalidate will overcopy xfs.
*/ */
if (size && !(ioflags & IO_INVIS)) if (!(ioflags & IO_INVIS)) {
xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
inode_update_time(inode, 1);
}
/* /*
* If the offset is beyond the size of the file, we have a couple * If the offset is beyond the size of the file, we have a couple
...@@ -773,12 +764,12 @@ xfs_write( ...@@ -773,12 +764,12 @@ xfs_write(
* to zero it out up to the new size. * to zero it out up to the new size.
*/ */
if (!(ioflags & IO_ISDIRECT) && (*offset > isize && isize)) { if (pos > isize) {
error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, *offset, error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos,
isize, *offset + size); isize, pos + count);
if (error) { if (error) {
xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock); xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
return(-error); goto out_unlock_isem;
} }
} }
xfs_iunlock(xip, XFS_ILOCK_EXCL); xfs_iunlock(xip, XFS_ILOCK_EXCL);
...@@ -795,22 +786,64 @@ xfs_write( ...@@ -795,22 +786,64 @@ xfs_write(
(S_ISGID | S_IXGRP))) && (S_ISGID | S_IXGRP))) &&
!capable(CAP_FSETID)) { !capable(CAP_FSETID)) {
error = xfs_write_clear_setuid(xip); error = xfs_write_clear_setuid(xip);
if (error) { if (likely(!error))
error = -remove_suid(file->f_dentry);
if (unlikely(error)) {
xfs_iunlock(xip, iolock); xfs_iunlock(xip, iolock);
return -error; goto out_unlock_isem;
} }
} }
retry: retry:
if (ioflags & IO_ISDIRECT) { /* We can write back this queue in page reclaim */
xfs_inval_cached_pages(vp, io, *offset, 1, 1); current->backing_dev_info = mapping->backing_dev_info;
xfs_rw_enter_trace(XFS_DIOWR_ENTER,
io, (void *)iovp, segs, *offset, ioflags); if ((ioflags & IO_ISDIRECT)) {
if (need_flush) {
xfs_inval_cached_trace(io, pos, -1,
ctooff(offtoct(pos)), -1);
VOP_FLUSHINVAL_PAGES(vp, ctooff(offtoct(pos)),
-1, FI_REMAPF_LOCKED);
}
if (need_isem) {
/* demote the lock now the cached pages are gone */
XFS_ILOCK_DEMOTE(mp, io, XFS_IOLOCK_EXCL);
up(&inode->i_sem);
iolock = XFS_IOLOCK_SHARED;
locktype = VRWLOCK_WRITE_DIRECT;
need_isem = 0;
}
xfs_rw_enter_trace(XFS_DIOWR_ENTER, io, (void *)iovp, segs,
*offset, ioflags);
ret = generic_file_direct_write(iocb, iovp,
&segs, pos, offset, count, ocount);
/*
* direct-io write to a hole: fall through to buffered I/O
* for completing the rest of the request.
*/
if (ret >= 0 && ret != count) {
XFS_STATS_ADD(xs_write_bytes, ret);
pos += ret;
count -= ret;
need_isem = 1;
ioflags &= ~IO_ISDIRECT;
xfs_iunlock(xip, iolock);
goto relock;
}
} else { } else {
xfs_rw_enter_trace(XFS_WRITE_ENTER, xfs_rw_enter_trace(XFS_WRITE_ENTER, io, (void *)iovp, segs,
io, (void *)iovp, segs, *offset, ioflags); *offset, ioflags);
ret = generic_file_buffered_write(iocb, iovp, segs,
pos, offset, count, ret);
} }
ret = generic_file_aio_write_nolock(iocb, iovp, segs, offset);
current->backing_dev_info = NULL;
if ((ret == -ENOSPC) && if ((ret == -ENOSPC) &&
DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) && DM_EVENT_ENABLED(vp->v_vfsp, xip, DM_EVENT_NOSPACE) &&
...@@ -821,17 +854,15 @@ xfs_write( ...@@ -821,17 +854,15 @@ xfs_write(
DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL, DM_RIGHT_NULL, vp, DM_RIGHT_NULL, NULL, NULL,
0, 0, 0); /* Delay flag intentionally unused */ 0, 0, 0); /* Delay flag intentionally unused */
if (error) if (error)
return -error; goto out_unlock_isem;
xfs_rwlock(bdp, locktype); xfs_rwlock(bdp, locktype);
*offset = xip->i_d.di_size; pos = xip->i_d.di_size;
goto retry; goto retry;
} }
if (*offset > xip->i_d.di_size) { if (*offset > xip->i_d.di_size) {
xfs_ilock(xip, XFS_ILOCK_EXCL); xfs_ilock(xip, XFS_ILOCK_EXCL);
if (*offset > xip->i_d.di_size) { if (*offset > xip->i_d.di_size) {
struct inode *inode = LINVFS_GET_IP(vp);
xip->i_d.di_size = *offset; xip->i_d.di_size = *offset;
i_size_write(inode, *offset); i_size_write(inode, *offset);
xip->i_update_core = 1; xip->i_update_core = 1;
...@@ -842,7 +873,8 @@ xfs_write( ...@@ -842,7 +873,8 @@ xfs_write(
if (ret <= 0) { if (ret <= 0) {
xfs_rwunlock(bdp, locktype); xfs_rwunlock(bdp, locktype);
return ret; error = -ret;
goto out_unlock_isem;
} }
XFS_STATS_ADD(xs_write_bytes, ret); XFS_STATS_ADD(xs_write_bytes, ret);
...@@ -929,7 +961,12 @@ xfs_write( ...@@ -929,7 +961,12 @@ xfs_write(
} /* (ioflags & O_SYNC) */ } /* (ioflags & O_SYNC) */
xfs_rwunlock(bdp, locktype); xfs_rwunlock(bdp, locktype);
return(ret); error = -ret;
out_unlock_isem:
if (need_isem)
up(&inode->i_sem);
return -error;
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment