Commit 1be44e23 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-for-linus-4.1-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

Pull xfs fixes from Dave Chinner:
 "This is a little larger than I'd like late in the release cycle, but
  all the fixes are for regressions introduced in the 4.1-rc1 merge, or
  are needed back in -stable kernels fairly quickly as they are
  filesystem corruption or userspace visible correctness issues.

  Changes in this update:

   - regression fix for new rename whiteout code

   - regression fixes for new superblock generic per-cpu counter code

   - fix for incorrect error return sign introduced in 3.17

   - metadata corruption fixes that need to go back to -stable kernels"

* tag 'xfs-for-linus-4.1-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs:
  xfs: fix broken i_nlink accounting for whiteout tmpfile inode
  xfs: xfs_iozero can return positive errno
  xfs: xfs_attr_inactive leaves inconsistent attr fork state behind
  xfs: extent size hints can round up extents past MAXEXTLEN
  xfs: inode and free block counters need to use __percpu_counter_compare
  percpu_counter: batch size aware __percpu_counter_compare()
  xfs: use percpu_counter_read_positive for mp->m_icount
parents 2a645171 22419ac9
...@@ -574,8 +574,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) ...@@ -574,8 +574,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
* After the last attribute is removed revert to original inode format, * After the last attribute is removed revert to original inode format,
* making all literal area available to the data fork once more. * making all literal area available to the data fork once more.
*/ */
STATIC void void
xfs_attr_fork_reset( xfs_attr_fork_remove(
struct xfs_inode *ip, struct xfs_inode *ip,
struct xfs_trans *tp) struct xfs_trans *tp)
{ {
...@@ -641,7 +641,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) ...@@ -641,7 +641,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
(mp->m_flags & XFS_MOUNT_ATTR2) && (mp->m_flags & XFS_MOUNT_ATTR2) &&
(dp->i_d.di_format != XFS_DINODE_FMT_BTREE) && (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
!(args->op_flags & XFS_DA_OP_ADDNAME)) { !(args->op_flags & XFS_DA_OP_ADDNAME)) {
xfs_attr_fork_reset(dp, args->trans); xfs_attr_fork_remove(dp, args->trans);
} else { } else {
xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
...@@ -905,7 +905,7 @@ xfs_attr3_leaf_to_shortform( ...@@ -905,7 +905,7 @@ xfs_attr3_leaf_to_shortform(
if (forkoff == -1) { if (forkoff == -1) {
ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2); ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE); ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE);
xfs_attr_fork_reset(dp, args->trans); xfs_attr_fork_remove(dp, args->trans);
goto out; goto out;
} }
......
...@@ -53,7 +53,7 @@ int xfs_attr_shortform_remove(struct xfs_da_args *args); ...@@ -53,7 +53,7 @@ int xfs_attr_shortform_remove(struct xfs_da_args *args);
int xfs_attr_shortform_list(struct xfs_attr_list_context *context); int xfs_attr_shortform_list(struct xfs_attr_list_context *context);
int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes); int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp);
/* /*
* Internal routines when attribute fork size == XFS_LBSIZE(mp). * Internal routines when attribute fork size == XFS_LBSIZE(mp).
......
...@@ -3224,12 +3224,24 @@ xfs_bmap_extsize_align( ...@@ -3224,12 +3224,24 @@ xfs_bmap_extsize_align(
align_alen += temp; align_alen += temp;
align_off -= temp; align_off -= temp;
} }
/* Same adjustment for the end of the requested area. */
temp = (align_alen % extsz);
if (temp)
align_alen += extsz - temp;
/* /*
* Same adjustment for the end of the requested area. * For large extent hint sizes, the aligned extent might be larger than
* MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls
* the length back under MAXEXTLEN. The outer allocation loops handle
* short allocation just fine, so it is safe to do this. We only want to
* do it when we are forced to, though, because it means more allocation
* operations are required.
*/ */
if ((temp = (align_alen % extsz))) { while (align_alen > MAXEXTLEN)
align_alen += extsz - temp; align_alen -= extsz;
} ASSERT(align_alen <= MAXEXTLEN);
/* /*
* If the previous block overlaps with this proposed allocation * If the previous block overlaps with this proposed allocation
* then move the start forward without adjusting the length. * then move the start forward without adjusting the length.
...@@ -3318,7 +3330,9 @@ xfs_bmap_extsize_align( ...@@ -3318,7 +3330,9 @@ xfs_bmap_extsize_align(
return -EINVAL; return -EINVAL;
} else { } else {
ASSERT(orig_off >= align_off); ASSERT(orig_off >= align_off);
ASSERT(orig_end <= align_off + align_alen); /* see MAXEXTLEN handling above */
ASSERT(orig_end <= align_off + align_alen ||
align_alen + extsz > MAXEXTLEN);
} }
#ifdef DEBUG #ifdef DEBUG
...@@ -4099,13 +4113,6 @@ xfs_bmapi_reserve_delalloc( ...@@ -4099,13 +4113,6 @@ xfs_bmapi_reserve_delalloc(
/* Figure out the extent size, adjust alen */ /* Figure out the extent size, adjust alen */
extsz = xfs_get_extsz_hint(ip); extsz = xfs_get_extsz_hint(ip);
if (extsz) { if (extsz) {
/*
* Make sure we don't exceed a single extent length when we
* align the extent by reducing length we are going to
* allocate by the maximum amount extent size aligment may
* require.
*/
alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1));
error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof, error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
1, 0, &aoff, &alen); 1, 0, &aoff, &alen);
ASSERT(!error); ASSERT(!error);
......
...@@ -376,7 +376,7 @@ xfs_ialloc_ag_alloc( ...@@ -376,7 +376,7 @@ xfs_ialloc_ag_alloc(
*/ */
newlen = args.mp->m_ialloc_inos; newlen = args.mp->m_ialloc_inos;
if (args.mp->m_maxicount && if (args.mp->m_maxicount &&
percpu_counter_read(&args.mp->m_icount) + newlen > percpu_counter_read_positive(&args.mp->m_icount) + newlen >
args.mp->m_maxicount) args.mp->m_maxicount)
return -ENOSPC; return -ENOSPC;
args.minlen = args.maxlen = args.mp->m_ialloc_blks; args.minlen = args.maxlen = args.mp->m_ialloc_blks;
...@@ -1339,10 +1339,13 @@ xfs_dialloc( ...@@ -1339,10 +1339,13 @@ xfs_dialloc(
* If we have already hit the ceiling of inode blocks then clear * If we have already hit the ceiling of inode blocks then clear
* okalloc so we scan all available agi structures for a free * okalloc so we scan all available agi structures for a free
* inode. * inode.
*
* Read rough value of mp->m_icount by percpu_counter_read_positive,
* which will sacrifice the preciseness but improve the performance.
*/ */
if (mp->m_maxicount && if (mp->m_maxicount &&
percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos > percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos
mp->m_maxicount) { > mp->m_maxicount) {
noroom = 1; noroom = 1;
okalloc = 0; okalloc = 0;
} }
......
...@@ -380,23 +380,31 @@ xfs_attr3_root_inactive( ...@@ -380,23 +380,31 @@ xfs_attr3_root_inactive(
return error; return error;
} }
/*
* xfs_attr_inactive kills all traces of an attribute fork on an inode. It
* removes both the on-disk and in-memory inode fork. Note that this also has to
* handle the condition of inodes without attributes but with an attribute fork
* configured, so we can't use xfs_inode_hasattr() here.
*
* The in-memory attribute fork is removed even on error.
*/
int int
xfs_attr_inactive(xfs_inode_t *dp) xfs_attr_inactive(
struct xfs_inode *dp)
{ {
xfs_trans_t *trans; struct xfs_trans *trans;
xfs_mount_t *mp; struct xfs_mount *mp;
int error; int cancel_flags = 0;
int lock_mode = XFS_ILOCK_SHARED;
int error = 0;
mp = dp->i_mount; mp = dp->i_mount;
ASSERT(! XFS_NOT_DQATTACHED(mp, dp)); ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
xfs_ilock(dp, XFS_ILOCK_SHARED); xfs_ilock(dp, lock_mode);
if (!xfs_inode_hasattr(dp) || if (!XFS_IFORK_Q(dp))
dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { goto out_destroy_fork;
xfs_iunlock(dp, XFS_ILOCK_SHARED); xfs_iunlock(dp, lock_mode);
return 0;
}
xfs_iunlock(dp, XFS_ILOCK_SHARED);
/* /*
* Start our first transaction of the day. * Start our first transaction of the day.
...@@ -408,13 +416,18 @@ xfs_attr_inactive(xfs_inode_t *dp) ...@@ -408,13 +416,18 @@ xfs_attr_inactive(xfs_inode_t *dp)
* the inode in every transaction to let it float upward through * the inode in every transaction to let it float upward through
* the log. * the log.
*/ */
lock_mode = 0;
trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL); trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0); error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0);
if (error) { if (error)
xfs_trans_cancel(trans, 0); goto out_cancel;
return error;
} lock_mode = XFS_ILOCK_EXCL;
xfs_ilock(dp, XFS_ILOCK_EXCL); cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT;
xfs_ilock(dp, lock_mode);
if (!XFS_IFORK_Q(dp))
goto out_cancel;
/* /*
* No need to make quota reservations here. We expect to release some * No need to make quota reservations here. We expect to release some
...@@ -422,29 +435,31 @@ xfs_attr_inactive(xfs_inode_t *dp) ...@@ -422,29 +435,31 @@ xfs_attr_inactive(xfs_inode_t *dp)
*/ */
xfs_trans_ijoin(trans, dp, 0); xfs_trans_ijoin(trans, dp, 0);
/* /* invalidate and truncate the attribute fork extents */
* Decide on what work routines to call based on the inode size. if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) {
*/ error = xfs_attr3_root_inactive(&trans, dp);
if (!xfs_inode_hasattr(dp) || if (error)
dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { goto out_cancel;
error = 0;
goto out; error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
if (error)
goto out_cancel;
} }
error = xfs_attr3_root_inactive(&trans, dp);
if (error)
goto out;
error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); /* Reset the attribute fork - this also destroys the in-core fork */
if (error) xfs_attr_fork_remove(dp, trans);
goto out;
error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_iunlock(dp, lock_mode);
return error; return error;
out: out_cancel:
xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); xfs_trans_cancel(trans, cancel_flags);
xfs_iunlock(dp, XFS_ILOCK_EXCL); out_destroy_fork:
/* kill the in-core attr fork before we drop the inode lock */
if (dp->i_afp)
xfs_idestroy_fork(dp, XFS_ATTR_FORK);
if (lock_mode)
xfs_iunlock(dp, lock_mode);
return error; return error;
} }
...@@ -124,7 +124,7 @@ xfs_iozero( ...@@ -124,7 +124,7 @@ xfs_iozero(
status = 0; status = 0;
} while (count); } while (count);
return (-status); return status;
} }
int int
......
...@@ -1946,21 +1946,17 @@ xfs_inactive( ...@@ -1946,21 +1946,17 @@ xfs_inactive(
/* /*
* If there are attributes associated with the file then blow them away * If there are attributes associated with the file then blow them away
* now. The code calls a routine that recursively deconstructs the * now. The code calls a routine that recursively deconstructs the
* attribute fork. We need to just commit the current transaction * attribute fork. If also blows away the in-core attribute fork.
* because we can't use it for xfs_attr_inactive().
*/ */
if (ip->i_d.di_anextents > 0) { if (XFS_IFORK_Q(ip)) {
ASSERT(ip->i_d.di_forkoff != 0);
error = xfs_attr_inactive(ip); error = xfs_attr_inactive(ip);
if (error) if (error)
return; return;
} }
if (ip->i_afp) ASSERT(!ip->i_afp);
xfs_idestroy_fork(ip, XFS_ATTR_FORK);
ASSERT(ip->i_d.di_anextents == 0); ASSERT(ip->i_d.di_anextents == 0);
ASSERT(ip->i_d.di_forkoff == 0);
/* /*
* Free the inode. * Free the inode.
...@@ -2883,7 +2879,13 @@ xfs_rename_alloc_whiteout( ...@@ -2883,7 +2879,13 @@ xfs_rename_alloc_whiteout(
if (error) if (error)
return error; return error;
/* Satisfy xfs_bumplink that this is a real tmpfile */ /*
* Prepare the tmpfile inode as if it were created through the VFS.
* Otherwise, the link increment paths will complain about nlink 0->1.
* Drop the link count as done by d_tmpfile(), complete the inode setup
* and flag it as linkable.
*/
drop_nlink(VFS_I(tmpfile));
xfs_finish_inode_setup(tmpfile); xfs_finish_inode_setup(tmpfile);
VFS_I(tmpfile)->i_state |= I_LINKABLE; VFS_I(tmpfile)->i_state |= I_LINKABLE;
...@@ -3151,7 +3153,7 @@ xfs_rename( ...@@ -3151,7 +3153,7 @@ xfs_rename(
* intermediate state on disk. * intermediate state on disk.
*/ */
if (wip) { if (wip) {
ASSERT(wip->i_d.di_nlink == 0); ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0);
error = xfs_bumplink(tp, wip); error = xfs_bumplink(tp, wip);
if (error) if (error)
goto out_trans_abort; goto out_trans_abort;
......
...@@ -1084,14 +1084,18 @@ xfs_log_sbcount(xfs_mount_t *mp) ...@@ -1084,14 +1084,18 @@ xfs_log_sbcount(xfs_mount_t *mp)
return xfs_sync_sb(mp, true); return xfs_sync_sb(mp, true);
} }
/*
* Deltas for the inode count are +/-64, hence we use a large batch size
* of 128 so we don't need to take the counter lock on every update.
*/
#define XFS_ICOUNT_BATCH 128
int int
xfs_mod_icount( xfs_mod_icount(
struct xfs_mount *mp, struct xfs_mount *mp,
int64_t delta) int64_t delta)
{ {
/* deltas are +/-64, hence the large batch size of 128. */ __percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH);
__percpu_counter_add(&mp->m_icount, delta, 128); if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) {
if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
ASSERT(0); ASSERT(0);
percpu_counter_add(&mp->m_icount, -delta); percpu_counter_add(&mp->m_icount, -delta);
return -EINVAL; return -EINVAL;
...@@ -1113,6 +1117,14 @@ xfs_mod_ifree( ...@@ -1113,6 +1117,14 @@ xfs_mod_ifree(
return 0; return 0;
} }
/*
* Deltas for the block count can vary from 1 to very large, but lock contention
* only occurs on frequent small block count updates such as in the delayed
* allocation path for buffered writes (page a time updates). Hence we set
* a large batch count (1024) to minimise global counter updates except when
* we get near to ENOSPC and we have to be very accurate with our updates.
*/
#define XFS_FDBLOCKS_BATCH 1024
int int
xfs_mod_fdblocks( xfs_mod_fdblocks(
struct xfs_mount *mp, struct xfs_mount *mp,
...@@ -1151,25 +1163,19 @@ xfs_mod_fdblocks( ...@@ -1151,25 +1163,19 @@ xfs_mod_fdblocks(
* Taking blocks away, need to be more accurate the closer we * Taking blocks away, need to be more accurate the closer we
* are to zero. * are to zero.
* *
* batch size is set to a maximum of 1024 blocks - if we are
* allocating of freeing extents larger than this then we aren't
* going to be hammering the counter lock so a lock per update
* is not a problem.
*
* If the counter has a value of less than 2 * max batch size, * If the counter has a value of less than 2 * max batch size,
* then make everything serialise as we are real close to * then make everything serialise as we are real close to
* ENOSPC. * ENOSPC.
*/ */
#define __BATCH 1024 if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH,
if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0) XFS_FDBLOCKS_BATCH) < 0)
batch = 1; batch = 1;
else else
batch = __BATCH; batch = XFS_FDBLOCKS_BATCH;
#undef __BATCH
__percpu_counter_add(&mp->m_fdblocks, delta, batch); __percpu_counter_add(&mp->m_fdblocks, delta, batch);
if (percpu_counter_compare(&mp->m_fdblocks, if (__percpu_counter_compare(&mp->m_fdblocks, XFS_ALLOC_SET_ASIDE(mp),
XFS_ALLOC_SET_ASIDE(mp)) >= 0) { XFS_FDBLOCKS_BATCH) >= 0) {
/* we had space! */ /* we had space! */
return 0; return 0;
} }
......
...@@ -41,7 +41,12 @@ void percpu_counter_destroy(struct percpu_counter *fbc); ...@@ -41,7 +41,12 @@ void percpu_counter_destroy(struct percpu_counter *fbc);
void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
s64 __percpu_counter_sum(struct percpu_counter *fbc); s64 __percpu_counter_sum(struct percpu_counter *fbc);
int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs); int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch);
static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
{
return __percpu_counter_compare(fbc, rhs, percpu_counter_batch);
}
static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
{ {
...@@ -116,6 +121,12 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) ...@@ -116,6 +121,12 @@ static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
return 0; return 0;
} }
static inline int
__percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch)
{
return percpu_counter_compare(fbc, rhs);
}
static inline void static inline void
percpu_counter_add(struct percpu_counter *fbc, s64 amount) percpu_counter_add(struct percpu_counter *fbc, s64 amount)
{ {
......
...@@ -197,13 +197,13 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb, ...@@ -197,13 +197,13 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb,
* Compare counter against given value. * Compare counter against given value.
* Return 1 if greater, 0 if equal and -1 if less * Return 1 if greater, 0 if equal and -1 if less
*/ */
int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch)
{ {
s64 count; s64 count;
count = percpu_counter_read(fbc); count = percpu_counter_read(fbc);
/* Check to see if rough count will be sufficient for comparison */ /* Check to see if rough count will be sufficient for comparison */
if (abs(count - rhs) > (percpu_counter_batch*num_online_cpus())) { if (abs(count - rhs) > (batch * num_online_cpus())) {
if (count > rhs) if (count > rhs)
return 1; return 1;
else else
...@@ -218,7 +218,7 @@ int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) ...@@ -218,7 +218,7 @@ int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs)
else else
return 0; return 0;
} }
EXPORT_SYMBOL(percpu_counter_compare); EXPORT_SYMBOL(__percpu_counter_compare);
static int __init percpu_counter_startup(void) static int __init percpu_counter_startup(void)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment