Commit 6c4155a9 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus-v3.11-rc3' of git://oss.sgi.com/xfs/xfs

Pull xfs fix from Ben Myers:
 "Fix for regression in commit cca9f93a ("xfs: don't do IO when
  creating an new inode"), recovery causing filesystem corruption after
  a crash"

* tag 'for-linus-v3.11-rc3' of git://oss.sgi.com/xfs/xfs:
  xfs: di_flushiter considered harmful
parents f315cf5e e1b4271a
...@@ -39,6 +39,9 @@ typedef struct xfs_timestamp { ...@@ -39,6 +39,9 @@ typedef struct xfs_timestamp {
* There is a very similar struct icdinode in xfs_inode which matches the * There is a very similar struct icdinode in xfs_inode which matches the
* layout of the first 96 bytes of this structure, but is kept in native * layout of the first 96 bytes of this structure, but is kept in native
* format instead of big endian. * format instead of big endian.
*
* Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed
* padding field for v3 inodes.
*/ */
typedef struct xfs_dinode { typedef struct xfs_dinode {
__be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */ __be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */
......
...@@ -896,7 +896,6 @@ xfs_dinode_to_disk( ...@@ -896,7 +896,6 @@ xfs_dinode_to_disk(
to->di_projid_lo = cpu_to_be16(from->di_projid_lo); to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
to->di_projid_hi = cpu_to_be16(from->di_projid_hi); to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
to->di_flushiter = cpu_to_be16(from->di_flushiter);
to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec); to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec); to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
...@@ -924,6 +923,9 @@ xfs_dinode_to_disk( ...@@ -924,6 +923,9 @@ xfs_dinode_to_disk(
to->di_lsn = cpu_to_be64(from->di_lsn); to->di_lsn = cpu_to_be64(from->di_lsn);
memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
uuid_copy(&to->di_uuid, &from->di_uuid); uuid_copy(&to->di_uuid, &from->di_uuid);
to->di_flushiter = 0;
} else {
to->di_flushiter = cpu_to_be16(from->di_flushiter);
} }
} }
...@@ -1029,10 +1031,14 @@ xfs_dinode_calc_crc( ...@@ -1029,10 +1031,14 @@ xfs_dinode_calc_crc(
/* /*
* Read the disk inode attributes into the in-core inode structure. * Read the disk inode attributes into the in-core inode structure.
* *
* If we are initialising a new inode and we are not utilising the * For version 5 superblocks, if we are initialising a new inode and we are not
* XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
* with a random generation number. If we are keeping inodes around, we need to * inode core with a random generation number. If we are keeping inodes around,
* read the inode cluster to get the existing generation number off disk. * we need to read the inode cluster to get the existing generation number off
* disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
* format) then log recovery is dependent on the di_flushiter field being
* initialised from the current on-disk value and hence we must also read the
* inode off disk.
*/ */
int int
xfs_iread( xfs_iread(
...@@ -1054,6 +1060,7 @@ xfs_iread( ...@@ -1054,6 +1060,7 @@ xfs_iread(
/* shortcut IO on inode allocation if possible */ /* shortcut IO on inode allocation if possible */
if ((iget_flags & XFS_IGET_CREATE) && if ((iget_flags & XFS_IGET_CREATE) &&
xfs_sb_version_hascrc(&mp->m_sb) &&
!(mp->m_flags & XFS_MOUNT_IKEEP)) { !(mp->m_flags & XFS_MOUNT_IKEEP)) {
/* initialise the on-disk inode core */ /* initialise the on-disk inode core */
memset(&ip->i_d, 0, sizeof(ip->i_d)); memset(&ip->i_d, 0, sizeof(ip->i_d));
...@@ -2882,12 +2889,18 @@ xfs_iflush_int( ...@@ -2882,12 +2889,18 @@ xfs_iflush_int(
__func__, ip->i_ino, ip->i_d.di_forkoff, ip); __func__, ip->i_ino, ip->i_d.di_forkoff, ip);
goto corrupt_out; goto corrupt_out;
} }
/* /*
* bump the flush iteration count, used to detect flushes which * Inode item log recovery for v1/v2 inodes are dependent on the
* postdate a log record during recovery. This is redundant as we now * di_flushiter count for correct sequencing. We bump the flush
* log every change and hence this can't happen. Still, it doesn't hurt. * iteration count so we can detect flushes which postdate a log record
* during recovery. This is redundant as we now log every change and
* hence this can't happen but we need to still do it to ensure
* backwards compatibility with old kernels that predate logging all
* inode changes.
*/ */
ip->i_d.di_flushiter++; if (ip->i_d.di_version < 3)
ip->i_d.di_flushiter++;
/* /*
* Copy the dirty parts of the inode into the on-disk * Copy the dirty parts of the inode into the on-disk
......
...@@ -2592,8 +2592,16 @@ xlog_recover_inode_pass2( ...@@ -2592,8 +2592,16 @@ xlog_recover_inode_pass2(
goto error; goto error;
} }
/* Skip replay when the on disk inode is newer than the log one */ /*
if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) { * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
* are transactional and if ordering is necessary we can determine that
* more accurately by the LSN field in the V3 inode core. Don't trust
* the inode versions we might be changing them here - use the
* superblock flag to determine whether we need to look at di_flushiter
* to skip replay when the on disk inode is newer than the log one
*/
if (!xfs_sb_version_hascrc(&mp->m_sb) &&
dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
/* /*
* Deal with the wrap case, DI_MAX_FLUSH is less * Deal with the wrap case, DI_MAX_FLUSH is less
* than smaller numbers * than smaller numbers
...@@ -2608,6 +2616,7 @@ xlog_recover_inode_pass2( ...@@ -2608,6 +2616,7 @@ xlog_recover_inode_pass2(
goto error; goto error;
} }
} }
/* Take the opportunity to reset the flush iteration count */ /* Take the opportunity to reset the flush iteration count */
dicp->di_flushiter = 0; dicp->di_flushiter = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment