Commit a27a263b authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Alex Elder

xfs: make log devices with write back caches work

There's no reason not to support cache flushing on external log devices.
The only thing this really requires is flushing the data device first
both in fsync and log commits.  A side effect is that we also have to
remove the barrier write test during mount, which has been superflous
since the new FLUSH+FUA code anyway.  Also use the chance to flush the
RT subvolume write cache before the fsync commit, which is required
for correct semantics.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarAlex Elder <aelder@sgi.com>
parent c46a131c
...@@ -131,19 +131,34 @@ xfs_file_fsync( ...@@ -131,19 +131,34 @@ xfs_file_fsync(
{ {
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode); struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp; struct xfs_trans *tp;
int error = 0; int error = 0;
int log_flushed = 0; int log_flushed = 0;
trace_xfs_file_fsync(ip); trace_xfs_file_fsync(ip);
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) if (XFS_FORCED_SHUTDOWN(mp))
return -XFS_ERROR(EIO); return -XFS_ERROR(EIO);
xfs_iflags_clear(ip, XFS_ITRUNCATED); xfs_iflags_clear(ip, XFS_ITRUNCATED);
xfs_ioend_wait(ip); xfs_ioend_wait(ip);
if (mp->m_flags & XFS_MOUNT_BARRIER) {
/*
* If we have an RT and/or log subvolume we need to make sure
* to flush the write cache the device used for file data
* first. This is to ensure newly written file data make
* it to disk before logging the new inode size in case of
* an extending write.
*/
if (XFS_IS_REALTIME_INODE(ip))
xfs_blkdev_issue_flush(mp->m_rtdev_targp);
else if (mp->m_logdev_targp != mp->m_ddev_targp)
xfs_blkdev_issue_flush(mp->m_ddev_targp);
}
/* /*
* We always need to make sure that the required inode state is safe on * We always need to make sure that the required inode state is safe on
* disk. The inode might be clean but we still might need to force the * disk. The inode might be clean but we still might need to force the
...@@ -175,9 +190,9 @@ xfs_file_fsync( ...@@ -175,9 +190,9 @@ xfs_file_fsync(
* updates. The sync transaction will also force the log. * updates. The sync transaction will also force the log.
*/ */
xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_iunlock(ip, XFS_ILOCK_SHARED);
tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS); tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
error = xfs_trans_reserve(tp, 0, error = xfs_trans_reserve(tp, 0,
XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0); XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
if (error) { if (error) {
xfs_trans_cancel(tp, 0); xfs_trans_cancel(tp, 0);
return -error; return -error;
...@@ -209,28 +224,25 @@ xfs_file_fsync( ...@@ -209,28 +224,25 @@ xfs_file_fsync(
* force the log. * force the log.
*/ */
if (xfs_ipincount(ip)) { if (xfs_ipincount(ip)) {
error = _xfs_log_force_lsn(ip->i_mount, error = _xfs_log_force_lsn(mp,
ip->i_itemp->ili_last_lsn, ip->i_itemp->ili_last_lsn,
XFS_LOG_SYNC, &log_flushed); XFS_LOG_SYNC, &log_flushed);
} }
xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_iunlock(ip, XFS_ILOCK_SHARED);
} }
if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) { /*
/* * If we only have a single device, and the log force about was
* If the log write didn't issue an ordered tag we need * a no-op we might have to flush the data device cache here.
* to flush the disk cache for the data device now. * This can only happen for fdatasync/O_DSYNC if we were overwriting
*/ * an already allocated file and thus do not have any metadata to
if (!log_flushed) * commit.
xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp); */
if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
/* mp->m_logdev_targp == mp->m_ddev_targp &&
* If this inode is on the RT dev we need to flush that !XFS_IS_REALTIME_INODE(ip) &&
* cache as well. !log_flushed)
*/ xfs_blkdev_issue_flush(mp->m_ddev_targp);
if (XFS_IS_REALTIME_INODE(ip))
xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
}
return -error; return -error;
} }
......
...@@ -627,68 +627,6 @@ xfs_blkdev_put( ...@@ -627,68 +627,6 @@ xfs_blkdev_put(
blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
} }
/*
* Try to write out the superblock using barriers.
*/
STATIC int
xfs_barrier_test(
xfs_mount_t *mp)
{
xfs_buf_t *sbp = xfs_getsb(mp, 0);
int error;
XFS_BUF_UNDONE(sbp);
XFS_BUF_UNREAD(sbp);
XFS_BUF_UNDELAYWRITE(sbp);
XFS_BUF_WRITE(sbp);
XFS_BUF_UNASYNC(sbp);
XFS_BUF_ORDERED(sbp);
xfsbdstrat(mp, sbp);
error = xfs_buf_iowait(sbp);
/*
* Clear all the flags we set and possible error state in the
* buffer. We only did the write to try out whether barriers
* worked and shouldn't leave any traces in the superblock
* buffer.
*/
XFS_BUF_DONE(sbp);
XFS_BUF_ERROR(sbp, 0);
XFS_BUF_UNORDERED(sbp);
xfs_buf_relse(sbp);
return error;
}
STATIC void
xfs_mountfs_check_barriers(xfs_mount_t *mp)
{
int error;
if (mp->m_logdev_targp != mp->m_ddev_targp) {
xfs_notice(mp,
"Disabling barriers, not supported with external log device");
mp->m_flags &= ~XFS_MOUNT_BARRIER;
return;
}
if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
xfs_notice(mp,
"Disabling barriers, underlying device is readonly");
mp->m_flags &= ~XFS_MOUNT_BARRIER;
return;
}
error = xfs_barrier_test(mp);
if (error) {
xfs_notice(mp,
"Disabling barriers, trial barrier write failed");
mp->m_flags &= ~XFS_MOUNT_BARRIER;
return;
}
}
void void
xfs_blkdev_issue_flush( xfs_blkdev_issue_flush(
xfs_buftarg_t *buftarg) xfs_buftarg_t *buftarg)
...@@ -1240,14 +1178,6 @@ xfs_fs_remount( ...@@ -1240,14 +1178,6 @@ xfs_fs_remount(
switch (token) { switch (token) {
case Opt_barrier: case Opt_barrier:
mp->m_flags |= XFS_MOUNT_BARRIER; mp->m_flags |= XFS_MOUNT_BARRIER;
/*
* Test if barriers are actually working if we can,
* else delay this check until the filesystem is
* marked writeable.
*/
if (!(mp->m_flags & XFS_MOUNT_RDONLY))
xfs_mountfs_check_barriers(mp);
break; break;
case Opt_nobarrier: case Opt_nobarrier:
mp->m_flags &= ~XFS_MOUNT_BARRIER; mp->m_flags &= ~XFS_MOUNT_BARRIER;
...@@ -1282,8 +1212,6 @@ xfs_fs_remount( ...@@ -1282,8 +1212,6 @@ xfs_fs_remount(
/* ro -> rw */ /* ro -> rw */
if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
mp->m_flags &= ~XFS_MOUNT_RDONLY; mp->m_flags &= ~XFS_MOUNT_RDONLY;
if (mp->m_flags & XFS_MOUNT_BARRIER)
xfs_mountfs_check_barriers(mp);
/* /*
* If this is the first remount to writeable state we * If this is the first remount to writeable state we
...@@ -1465,9 +1393,6 @@ xfs_fs_fill_super( ...@@ -1465,9 +1393,6 @@ xfs_fs_fill_super(
if (error) if (error)
goto out_free_sb; goto out_free_sb;
if (mp->m_flags & XFS_MOUNT_BARRIER)
xfs_mountfs_check_barriers(mp);
error = xfs_filestream_mount(mp); error = xfs_filestream_mount(mp);
if (error) if (error)
goto out_free_sb; goto out_free_sb;
......
...@@ -1372,8 +1372,17 @@ xlog_sync(xlog_t *log, ...@@ -1372,8 +1372,17 @@ xlog_sync(xlog_t *log,
XFS_BUF_ASYNC(bp); XFS_BUF_ASYNC(bp);
bp->b_flags |= XBF_LOG_BUFFER; bp->b_flags |= XBF_LOG_BUFFER;
if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
/*
* If we have an external log device, flush the data device
* before flushing the log to make sure all meta data
* written back from the AIL actually made it to disk
* before writing out the new log tail LSN in the log buffer.
*/
if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
XFS_BUF_ORDERED(bp); XFS_BUF_ORDERED(bp);
}
ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment