Commit 8c3c0743 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-5.7-merge-12' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull more xfs updates from Darrick Wong:
 "As promised last week, this batch changes how xfs interacts with
  memory reclaim; how the log batches and throttles log items; how hard
  writes near ENOSPC will try to squeeze more space out of the
  filesystem; and hopefully fix the last of the umount hangs after a
  catastrophic failure.

  Summary:

   - Validate the realtime geometry in the superblock when mounting

   - Refactor a bunch of tricky flag handling in the log code

   - Flush the CIL more judiciously so that we don't wait until there
     are millions of log items consuming a lot of memory.

   - Throttle transaction commits to prevent the xfs frontend from
     flooding the CIL with too many log items.

   - Account metadata buffers correctly for memory reclaim.

   - Mark slabs properly for memory reclaim. These should help reclaim
     run more effectively when XFS is using a lot of memory.

   - Don't write a garbage log record at unmount time if we're trying to
     trigger summary counter recalculation at next mount.

   - Don't block the AIL on locked dquot/inode buffers; instead trigger
     its backoff mechanism to give the lock holder a chance to finish
     up.

   - Ratelimit writeback flushing when buffered writes encounter ENOSPC.

   - Other minor cleanups.

   - Make reflink a synchronous operation when the fs is mounted with
     wsync or sync, which means that now we force the log to disk to
     record the changes"

* tag 'xfs-5.7-merge-12' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (26 commits)
  xfs: reflink should force the log out if mounted with wsync
  xfs: factor out a new xfs_log_force_inode helper
  xfs: fix inode number overflow in ifree cluster helper
  xfs: remove redundant variable assignment in xfs_symlink()
  xfs: ratelimit inode flush on buffered write ENOSPC
  xfs: return locked status of inode buffer on xfsaild push
  xfs: trylock underlying buffer on dquot flush
  xfs: remove unnecessary ternary from xfs_create
  xfs: don't write a corrupt unmount record to force summary counter recalc
  xfs: factor inode lookup from xfs_ifree_cluster
  xfs: tail updates only need to occur when LSN changes
  xfs: factor common AIL item deletion code
  xfs: correctly acount for reclaimable slabs
  xfs: Improve metadata buffer reclaim accountability
  xfs: don't allow log IO to be throttled
  xfs: Throttle commits on delayed background CIL push
  xfs: Lower CIL flush limit for large logs
  xfs: remove some stale comments from the log code
  xfs: refactor unmount record writing
  xfs: merge xlog_commit_record with xlog_write_done
  ...
parents d3e5e977 5833112d
...@@ -328,6 +328,38 @@ xfs_validate_sb_common( ...@@ -328,6 +328,38 @@ xfs_validate_sb_common(
return -EFSCORRUPTED; return -EFSCORRUPTED;
} }
/* Validate the realtime geometry; stolen from xfs_repair */
if (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE ||
sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) {
xfs_notice(mp,
"realtime extent sanity check failed");
return -EFSCORRUPTED;
}
if (sbp->sb_rblocks == 0) {
if (sbp->sb_rextents != 0 || sbp->sb_rbmblocks != 0 ||
sbp->sb_rextslog != 0 || sbp->sb_frextents != 0) {
xfs_notice(mp,
"realtime zeroed geometry check failed");
return -EFSCORRUPTED;
}
} else {
uint64_t rexts;
uint64_t rbmblocks;
rexts = div_u64(sbp->sb_rblocks, sbp->sb_rextsize);
rbmblocks = howmany_64(sbp->sb_rextents,
NBBY * sbp->sb_blocksize);
if (sbp->sb_rextents != rexts ||
sbp->sb_rextslog != xfs_highbit32(sbp->sb_rextents) ||
sbp->sb_rbmblocks != rbmblocks) {
xfs_notice(mp,
"realtime geometry sanity check failed");
return -EFSCORRUPTED;
}
}
if (sbp->sb_unit) { if (sbp->sb_unit) {
if (!xfs_sb_version_hasdalign(sbp) || if (!xfs_sb_version_hasdalign(sbp) ||
sbp->sb_unit > sbp->sb_width || sbp->sb_unit > sbp->sb_width ||
......
...@@ -327,6 +327,9 @@ xfs_buf_free( ...@@ -327,6 +327,9 @@ xfs_buf_free(
__free_page(page); __free_page(page);
} }
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab +=
bp->b_page_count;
} else if (bp->b_flags & _XBF_KMEM) } else if (bp->b_flags & _XBF_KMEM)
kmem_free(bp->b_addr); kmem_free(bp->b_addr);
_xfs_buf_free_pages(bp); _xfs_buf_free_pages(bp);
...@@ -2114,9 +2117,11 @@ xfs_buf_delwri_pushbuf( ...@@ -2114,9 +2117,11 @@ xfs_buf_delwri_pushbuf(
int __init int __init
xfs_buf_init(void) xfs_buf_init(void)
{ {
xfs_buf_zone = kmem_cache_create("xfs_buf", xfs_buf_zone = kmem_cache_create("xfs_buf", sizeof(struct xfs_buf), 0,
sizeof(struct xfs_buf), 0, SLAB_HWCACHE_ALIGN |
SLAB_HWCACHE_ALIGN, NULL); SLAB_RECLAIM_ACCOUNT |
SLAB_MEM_SPREAD,
NULL);
if (!xfs_buf_zone) if (!xfs_buf_zone)
goto out; goto out;
......
...@@ -1105,8 +1105,8 @@ xfs_qm_dqflush( ...@@ -1105,8 +1105,8 @@ xfs_qm_dqflush(
* Get the buffer containing the on-disk dquot * Get the buffer containing the on-disk dquot
*/ */
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
mp->m_quotainfo->qi_dqchunklen, 0, &bp, mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK,
&xfs_dquot_buf_ops); &bp, &xfs_dquot_buf_ops);
if (error) if (error)
goto out_unlock; goto out_unlock;
...@@ -1177,7 +1177,7 @@ xfs_qm_dqflush( ...@@ -1177,7 +1177,7 @@ xfs_qm_dqflush(
out_unlock: out_unlock:
xfs_dqfunlock(dqp); xfs_dqfunlock(dqp);
return -EIO; return error;
} }
/* /*
......
...@@ -189,7 +189,8 @@ xfs_qm_dquot_logitem_push( ...@@ -189,7 +189,8 @@ xfs_qm_dquot_logitem_push(
if (!xfs_buf_delwri_queue(bp, buffer_list)) if (!xfs_buf_delwri_queue(bp, buffer_list))
rval = XFS_ITEM_FLUSHING; rval = XFS_ITEM_FLUSHING;
xfs_buf_relse(bp); xfs_buf_relse(bp);
} } else if (error == -EAGAIN)
rval = XFS_ITEM_LOCKED;
spin_lock(&lip->li_ailp->ail_lock); spin_lock(&lip->li_ailp->ail_lock);
out_unlock: out_unlock:
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
#include "xfs_trans.h" #include "xfs_trans.h"
#include "xfs_inode_item.h" #include "xfs_inode_item.h"
#include "xfs_icache.h" #include "xfs_icache.h"
#include "xfs_log.h"
#include "xfs_pnfs.h" #include "xfs_pnfs.h"
/* /*
...@@ -221,18 +220,7 @@ STATIC int ...@@ -221,18 +220,7 @@ STATIC int
xfs_fs_nfs_commit_metadata( xfs_fs_nfs_commit_metadata(
struct inode *inode) struct inode *inode)
{ {
struct xfs_inode *ip = XFS_I(inode); return xfs_log_force_inode(XFS_I(inode));
struct xfs_mount *mp = ip->i_mount;
xfs_lsn_t lsn = 0;
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (xfs_ipincount(ip))
lsn = ip->i_itemp->ili_last_lsn;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (!lsn)
return 0;
return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
} }
const struct export_operations xfs_export_operations = { const struct export_operations xfs_export_operations = {
......
...@@ -80,19 +80,9 @@ xfs_dir_fsync( ...@@ -80,19 +80,9 @@ xfs_dir_fsync(
int datasync) int datasync)
{ {
struct xfs_inode *ip = XFS_I(file->f_mapping->host); struct xfs_inode *ip = XFS_I(file->f_mapping->host);
struct xfs_mount *mp = ip->i_mount;
xfs_lsn_t lsn = 0;
trace_xfs_dir_fsync(ip); trace_xfs_dir_fsync(ip);
return xfs_log_force_inode(ip);
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (xfs_ipincount(ip))
lsn = ip->i_itemp->ili_last_lsn;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (!lsn)
return 0;
return xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
} }
STATIC int STATIC int
...@@ -1069,7 +1059,11 @@ xfs_file_remap_range( ...@@ -1069,7 +1059,11 @@ xfs_file_remap_range(
ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize,
remap_flags); remap_flags);
if (ret)
goto out_unlock;
if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_log_force_inode(dest);
out_unlock: out_unlock:
xfs_reflink_remap_unlock(file_in, file_out); xfs_reflink_remap_unlock(file_in, file_out);
if (ret) if (ret)
......
...@@ -1200,8 +1200,7 @@ xfs_create( ...@@ -1200,8 +1200,7 @@ xfs_create(
unlock_dp_on_error = false; unlock_dp_on_error = false;
error = xfs_dir_createname(tp, dp, name, ip->i_ino, error = xfs_dir_createname(tp, dp, name, ip->i_ino,
resblks ? resblks - XFS_IALLOC_SPACE_RES(mp));
resblks - XFS_IALLOC_SPACE_RES(mp) : 0);
if (error) { if (error) {
ASSERT(error != -ENOSPC); ASSERT(error != -ENOSPC);
goto out_trans_cancel; goto out_trans_cancel;
...@@ -2503,6 +2502,88 @@ xfs_iunlink_remove( ...@@ -2503,6 +2502,88 @@ xfs_iunlink_remove(
return error; return error;
} }
/*
* Look up the inode number specified and mark it stale if it is found. If it is
* dirty, return the inode so it can be attached to the cluster buffer so it can
* be processed appropriately when the cluster free transaction completes.
*/
static struct xfs_inode *
xfs_ifree_get_one_inode(
struct xfs_perag *pag,
struct xfs_inode *free_ip,
xfs_ino_t inum)
{
struct xfs_mount *mp = pag->pag_mount;
struct xfs_inode *ip;
retry:
rcu_read_lock();
ip = radix_tree_lookup(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, inum));
/* Inode not in memory, nothing to do */
if (!ip)
goto out_rcu_unlock;
/*
* because this is an RCU protected lookup, we could find a recently
* freed or even reallocated inode during the lookup. We need to check
* under the i_flags_lock for a valid inode here. Skip it if it is not
* valid, the wrong inode or stale.
*/
spin_lock(&ip->i_flags_lock);
if (ip->i_ino != inum || __xfs_iflags_test(ip, XFS_ISTALE)) {
spin_unlock(&ip->i_flags_lock);
goto out_rcu_unlock;
}
spin_unlock(&ip->i_flags_lock);
/*
* Don't try to lock/unlock the current inode, but we _cannot_ skip the
* other inodes that we did not find in the list attached to the buffer
* and are not already marked stale. If we can't lock it, back off and
* retry.
*/
if (ip != free_ip) {
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
rcu_read_unlock();
delay(1);
goto retry;
}
/*
* Check the inode number again in case we're racing with
* freeing in xfs_reclaim_inode(). See the comments in that
* function for more information as to why the initial check is
* not sufficient.
*/
if (ip->i_ino != inum) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
goto out_rcu_unlock;
}
}
rcu_read_unlock();
xfs_iflock(ip);
xfs_iflags_set(ip, XFS_ISTALE);
/*
* We don't need to attach clean inodes or those only with unlogged
* changes (which we throw away, anyway).
*/
if (!ip->i_itemp || xfs_inode_clean(ip)) {
ASSERT(ip != free_ip);
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
goto out_no_inode;
}
return ip;
out_rcu_unlock:
rcu_read_unlock();
out_no_inode:
return NULL;
}
/* /*
* A big issue when freeing the inode cluster is that we _cannot_ skip any * A big issue when freeing the inode cluster is that we _cannot_ skip any
* inodes that are in memory - they all must be marked stale and attached to * inodes that are in memory - they all must be marked stale and attached to
...@@ -2603,77 +2684,11 @@ xfs_ifree_cluster( ...@@ -2603,77 +2684,11 @@ xfs_ifree_cluster(
* even trying to lock them. * even trying to lock them.
*/ */
for (i = 0; i < igeo->inodes_per_cluster; i++) { for (i = 0; i < igeo->inodes_per_cluster; i++) {
retry: ip = xfs_ifree_get_one_inode(pag, free_ip, inum + i);
rcu_read_lock(); if (!ip)
ip = radix_tree_lookup(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, (inum + i)));
/* Inode not in memory, nothing to do */
if (!ip) {
rcu_read_unlock();
continue; continue;
}
/*
* because this is an RCU protected lookup, we could
* find a recently freed or even reallocated inode
* during the lookup. We need to check under the
* i_flags_lock for a valid inode here. Skip it if it
* is not valid, the wrong inode or stale.
*/
spin_lock(&ip->i_flags_lock);
if (ip->i_ino != inum + i ||
__xfs_iflags_test(ip, XFS_ISTALE)) {
spin_unlock(&ip->i_flags_lock);
rcu_read_unlock();
continue;
}
spin_unlock(&ip->i_flags_lock);
/*
* Don't try to lock/unlock the current inode, but we
* _cannot_ skip the other inodes that we did not find
* in the list attached to the buffer and are not
* already marked stale. If we can't lock it, back off
* and retry.
*/
if (ip != free_ip) {
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
rcu_read_unlock();
delay(1);
goto retry;
}
/*
* Check the inode number again in case we're
* racing with freeing in xfs_reclaim_inode().
* See the comments in that function for more
* information as to why the initial check is
* not sufficient.
*/
if (ip->i_ino != inum + i) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
rcu_read_unlock();
continue;
}
}
rcu_read_unlock();
xfs_iflock(ip);
xfs_iflags_set(ip, XFS_ISTALE);
/*
* we don't need to attach clean inodes or those only
* with unlogged changes (which we throw away, anyway).
*/
iip = ip->i_itemp; iip = ip->i_itemp;
if (!iip || xfs_inode_clean(ip)) {
ASSERT(ip != free_ip);
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
continue;
}
iip->ili_last_fields = iip->ili_fields; iip->ili_last_fields = iip->ili_fields;
iip->ili_fields = 0; iip->ili_fields = 0;
iip->ili_fsync_fields = 0; iip->ili_fsync_fields = 0;
...@@ -3930,3 +3945,22 @@ xfs_irele( ...@@ -3930,3 +3945,22 @@ xfs_irele(
trace_xfs_irele(ip, _RET_IP_); trace_xfs_irele(ip, _RET_IP_);
iput(VFS_I(ip)); iput(VFS_I(ip));
} }
/*
* Ensure all commited transactions touching the inode are written to the log.
*/
int
xfs_log_force_inode(
struct xfs_inode *ip)
{
xfs_lsn_t lsn = 0;
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (xfs_ipincount(ip))
lsn = ip->i_itemp->ili_last_lsn;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (!lsn)
return 0;
return xfs_log_force_lsn(ip->i_mount, lsn, XFS_LOG_SYNC, NULL);
}
...@@ -426,6 +426,7 @@ int xfs_itruncate_extents_flags(struct xfs_trans **, ...@@ -426,6 +426,7 @@ int xfs_itruncate_extents_flags(struct xfs_trans **,
struct xfs_inode *, int, xfs_fsize_t, int); struct xfs_inode *, int, xfs_fsize_t, int);
void xfs_iext_realloc(xfs_inode_t *, int, int); void xfs_iext_realloc(xfs_inode_t *, int, int);
int xfs_log_force_inode(struct xfs_inode *ip);
void xfs_iunpin_wait(xfs_inode_t *); void xfs_iunpin_wait(xfs_inode_t *);
#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) #define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
......
...@@ -552,7 +552,8 @@ xfs_inode_item_push( ...@@ -552,7 +552,8 @@ xfs_inode_item_push(
if (!xfs_buf_delwri_queue(bp, buffer_list)) if (!xfs_buf_delwri_queue(bp, buffer_list))
rval = XFS_ITEM_FLUSHING; rval = XFS_ITEM_FLUSHING;
xfs_buf_relse(bp); xfs_buf_relse(bp);
} } else if (error == -EAGAIN)
rval = XFS_ITEM_LOCKED;
spin_lock(&lip->li_ailp->ail_lock); spin_lock(&lip->li_ailp->ail_lock);
out_unlock: out_unlock:
...@@ -730,29 +731,27 @@ xfs_iflush_done( ...@@ -730,29 +731,27 @@ xfs_iflush_done(
* holding the lock before removing the inode from the AIL. * holding the lock before removing the inode from the AIL.
*/ */
if (need_ail) { if (need_ail) {
bool mlip_changed = false; xfs_lsn_t tail_lsn = 0;
/* this is an opencoded batch version of xfs_trans_ail_delete */ /* this is an opencoded batch version of xfs_trans_ail_delete */
spin_lock(&ailp->ail_lock); spin_lock(&ailp->ail_lock);
list_for_each_entry(blip, &tmp, li_bio_list) { list_for_each_entry(blip, &tmp, li_bio_list) {
if (INODE_ITEM(blip)->ili_logged && if (INODE_ITEM(blip)->ili_logged &&
blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) {
mlip_changed |= xfs_ail_delete_one(ailp, blip); /*
else { * xfs_ail_update_finish() only cares about the
* lsn of the first tail item removed, any
* others will be at the same or higher lsn so
* we just ignore them.
*/
xfs_lsn_t lsn = xfs_ail_delete_one(ailp, blip);
if (!tail_lsn && lsn)
tail_lsn = lsn;
} else {
xfs_clear_li_failed(blip); xfs_clear_li_failed(blip);
} }
} }
xfs_ail_update_finish(ailp, tail_lsn);
if (mlip_changed) {
if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
xlog_assign_tail_lsn_locked(ailp->ail_mount);
if (list_empty(&ailp->ail_head))
wake_up_all(&ailp->ail_empty);
}
spin_unlock(&ailp->ail_lock);
if (mlip_changed)
xfs_log_space_wake(ailp->ail_mount);
} }
/* /*
......
This diff is collapsed.
...@@ -105,10 +105,6 @@ struct xfs_log_item; ...@@ -105,10 +105,6 @@ struct xfs_log_item;
struct xfs_item_ops; struct xfs_item_ops;
struct xfs_trans; struct xfs_trans;
xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
struct xlog_ticket *ticket,
struct xlog_in_core **iclog,
bool regrant);
int xfs_log_force(struct xfs_mount *mp, uint flags); int xfs_log_force(struct xfs_mount *mp, uint flags);
int xfs_log_force_lsn(struct xfs_mount *mp, xfs_lsn_t lsn, uint flags, int xfs_log_force_lsn(struct xfs_mount *mp, xfs_lsn_t lsn, uint flags,
int *log_forced); int *log_forced);
......
...@@ -668,6 +668,11 @@ xlog_cil_push_work( ...@@ -668,6 +668,11 @@ xlog_cil_push_work(
push_seq = cil->xc_push_seq; push_seq = cil->xc_push_seq;
ASSERT(push_seq <= ctx->sequence); ASSERT(push_seq <= ctx->sequence);
/*
* Wake up any background push waiters now this context is being pushed.
*/
wake_up_all(&ctx->push_wait);
/* /*
* Check if we've anything to push. If there is nothing, then we don't * Check if we've anything to push. If there is nothing, then we don't
* move on to a new sequence number and so we have to be able to push * move on to a new sequence number and so we have to be able to push
...@@ -744,6 +749,7 @@ xlog_cil_push_work( ...@@ -744,6 +749,7 @@ xlog_cil_push_work(
*/ */
INIT_LIST_HEAD(&new_ctx->committing); INIT_LIST_HEAD(&new_ctx->committing);
INIT_LIST_HEAD(&new_ctx->busy_extents); INIT_LIST_HEAD(&new_ctx->busy_extents);
init_waitqueue_head(&new_ctx->push_wait);
new_ctx->sequence = ctx->sequence + 1; new_ctx->sequence = ctx->sequence + 1;
new_ctx->cil = cil; new_ctx->cil = cil;
cil->xc_ctx = new_ctx; cil->xc_ctx = new_ctx;
...@@ -801,7 +807,7 @@ xlog_cil_push_work( ...@@ -801,7 +807,7 @@ xlog_cil_push_work(
lvhdr.lv_iovecp = &lhdr; lvhdr.lv_iovecp = &lhdr;
lvhdr.lv_next = ctx->lv_chain; lvhdr.lv_next = ctx->lv_chain;
error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0); error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0, true);
if (error) if (error)
goto out_abort_free_ticket; goto out_abort_free_ticket;
...@@ -839,10 +845,11 @@ xlog_cil_push_work( ...@@ -839,10 +845,11 @@ xlog_cil_push_work(
} }
spin_unlock(&cil->xc_push_lock); spin_unlock(&cil->xc_push_lock);
/* xfs_log_done always frees the ticket on error. */ error = xlog_commit_record(log, tic, &commit_iclog, &commit_lsn);
commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, false); if (error)
if (commit_lsn == -1) goto out_abort_free_ticket;
goto out_abort;
xfs_log_ticket_ungrant(log, tic);
spin_lock(&commit_iclog->ic_callback_lock); spin_lock(&commit_iclog->ic_callback_lock);
if (commit_iclog->ic_state == XLOG_STATE_IOERROR) { if (commit_iclog->ic_state == XLOG_STATE_IOERROR) {
...@@ -875,7 +882,7 @@ xlog_cil_push_work( ...@@ -875,7 +882,7 @@ xlog_cil_push_work(
return; return;
out_abort_free_ticket: out_abort_free_ticket:
xfs_log_ticket_put(tic); xfs_log_ticket_ungrant(log, tic);
out_abort: out_abort:
ASSERT(XLOG_FORCED_SHUTDOWN(log)); ASSERT(XLOG_FORCED_SHUTDOWN(log));
xlog_cil_committed(ctx); xlog_cil_committed(ctx);
...@@ -890,7 +897,7 @@ xlog_cil_push_work( ...@@ -890,7 +897,7 @@ xlog_cil_push_work(
*/ */
static void static void
xlog_cil_push_background( xlog_cil_push_background(
struct xlog *log) struct xlog *log) __releases(cil->xc_ctx_lock)
{ {
struct xfs_cil *cil = log->l_cilp; struct xfs_cil *cil = log->l_cilp;
...@@ -904,14 +911,36 @@ xlog_cil_push_background( ...@@ -904,14 +911,36 @@ xlog_cil_push_background(
* don't do a background push if we haven't used up all the * don't do a background push if we haven't used up all the
* space available yet. * space available yet.
*/ */
if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) {
up_read(&cil->xc_ctx_lock);
return; return;
}
spin_lock(&cil->xc_push_lock); spin_lock(&cil->xc_push_lock);
if (cil->xc_push_seq < cil->xc_current_sequence) { if (cil->xc_push_seq < cil->xc_current_sequence) {
cil->xc_push_seq = cil->xc_current_sequence; cil->xc_push_seq = cil->xc_current_sequence;
queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work); queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
} }
/*
* Drop the context lock now, we can't hold that if we need to sleep
* because we are over the blocking threshold. The push_lock is still
* held, so blocking threshold sleep/wakeup is still correctly
* serialised here.
*/
up_read(&cil->xc_ctx_lock);
/*
* If we are well over the space limit, throttle the work that is being
* done until the push work on this context has begun.
*/
if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) {
trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket);
ASSERT(cil->xc_ctx->space_used < log->l_logsize);
xlog_wait(&cil->xc_ctx->push_wait, &cil->xc_push_lock);
return;
}
spin_unlock(&cil->xc_push_lock); spin_unlock(&cil->xc_push_lock);
} }
...@@ -1007,7 +1036,10 @@ xfs_log_commit_cil( ...@@ -1007,7 +1036,10 @@ xfs_log_commit_cil(
if (commit_lsn) if (commit_lsn)
*commit_lsn = xc_commit_lsn; *commit_lsn = xc_commit_lsn;
xfs_log_done(mp, tp->t_ticket, NULL, regrant); if (regrant && !XLOG_FORCED_SHUTDOWN(log))
xfs_log_ticket_regrant(log, tp->t_ticket);
else
xfs_log_ticket_ungrant(log, tp->t_ticket);
tp->t_ticket = NULL; tp->t_ticket = NULL;
xfs_trans_unreserve_and_mod_sb(tp); xfs_trans_unreserve_and_mod_sb(tp);
...@@ -1028,9 +1060,9 @@ xfs_log_commit_cil( ...@@ -1028,9 +1060,9 @@ xfs_log_commit_cil(
if (lip->li_ops->iop_committing) if (lip->li_ops->iop_committing)
lip->li_ops->iop_committing(lip, xc_commit_lsn); lip->li_ops->iop_committing(lip, xc_commit_lsn);
} }
xlog_cil_push_background(log);
up_read(&cil->xc_ctx_lock); /* xlog_cil_push_background() releases cil->xc_ctx_lock */
xlog_cil_push_background(log);
} }
/* /*
...@@ -1189,6 +1221,7 @@ xlog_cil_init( ...@@ -1189,6 +1221,7 @@ xlog_cil_init(
INIT_LIST_HEAD(&ctx->committing); INIT_LIST_HEAD(&ctx->committing);
INIT_LIST_HEAD(&ctx->busy_extents); INIT_LIST_HEAD(&ctx->busy_extents);
init_waitqueue_head(&ctx->push_wait);
ctx->sequence = 1; ctx->sequence = 1;
ctx->cil = cil; ctx->cil = cil;
cil->xc_ctx = ctx; cil->xc_ctx = ctx;
......
...@@ -51,13 +51,11 @@ enum xlog_iclog_state { ...@@ -51,13 +51,11 @@ enum xlog_iclog_state {
}; };
/* /*
* Flags to log ticket * Log ticket flags
*/ */
#define XLOG_TIC_INITED 0x1 /* has been initialized */ #define XLOG_TIC_PERM_RESERV 0x1 /* permanent reservation */
#define XLOG_TIC_PERM_RESERV 0x2 /* permanent reservation */
#define XLOG_TIC_FLAGS \ #define XLOG_TIC_FLAGS \
{ XLOG_TIC_INITED, "XLOG_TIC_INITED" }, \
{ XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" } { XLOG_TIC_PERM_RESERV, "XLOG_TIC_PERM_RESERV" }
/* /*
...@@ -242,6 +240,7 @@ struct xfs_cil_ctx { ...@@ -242,6 +240,7 @@ struct xfs_cil_ctx {
struct xfs_log_vec *lv_chain; /* logvecs being pushed */ struct xfs_log_vec *lv_chain; /* logvecs being pushed */
struct list_head iclog_entry; struct list_head iclog_entry;
struct list_head committing; /* ctx committing list */ struct list_head committing; /* ctx committing list */
wait_queue_head_t push_wait; /* background push throttle */
struct work_struct discard_endio_work; struct work_struct discard_endio_work;
}; };
...@@ -318,13 +317,53 @@ struct xfs_cil { ...@@ -318,13 +317,53 @@ struct xfs_cil {
* tries to keep 25% of the log free, so we need to keep below that limit or we * tries to keep 25% of the log free, so we need to keep below that limit or we
* risk running out of free log space to start any new transactions. * risk running out of free log space to start any new transactions.
* *
* In order to keep background CIL push efficient, we will set a lower * In order to keep background CIL push efficient, we only need to ensure the
* threshold at which background pushing is attempted without blocking current * CIL is large enough to maintain sufficient in-memory relogging to avoid
* transaction commits. A separate, higher bound defines when CIL pushes are * repeated physical writes of frequently modified metadata. If we allow the CIL
* enforced to ensure we stay within our maximum checkpoint size bounds. * to grow to a substantial fraction of the log, then we may be pinning hundreds
* threshold, yet give us plenty of space for aggregation on large logs. * of megabytes of metadata in memory until the CIL flushes. This can cause
* issues when we are running low on memory - pinned memory cannot be reclaimed,
* and the CIL consumes a lot of memory. Hence we need to set an upper physical
* size limit for the CIL that limits the maximum amount of memory pinned by the
* CIL but does not limit performance by reducing relogging efficiency
* significantly.
*
* As such, the CIL push threshold ends up being the smaller of two thresholds:
* - a threshold large enough that it allows CIL to be pushed and progress to be
* made without excessive blocking of incoming transaction commits. This is
* defined to be 12.5% of the log space - half the 25% push threshold of the
* AIL.
* - small enough that it doesn't pin excessive amounts of memory but maintains
* close to peak relogging efficiency. This is defined to be 16x the iclog
* buffer window (32MB) as measurements have shown this to be roughly the
* point of diminishing performance increases under highly concurrent
* modification workloads.
*
* To prevent the CIL from overflowing upper commit size bounds, we introduce a
* new threshold at which we block committing transactions until the background
* CIL commit commences and switches to a new context. While this is not a hard
* limit, it forces the process committing a transaction to the CIL to block and
* yeild the CPU, giving the CIL push work a chance to be scheduled and start
* work. This prevents a process running lots of transactions from overfilling
* the CIL because it is not yielding the CPU. We set the blocking limit at
* twice the background push space threshold so we keep in line with the AIL
* push thresholds.
*
* Note: this is not a -hard- limit as blocking is applied after the transaction
* is inserted into the CIL and the push has been triggered. It is largely a
* throttling mechanism that allows the CIL push to be scheduled and run. A hard
* limit will be difficult to implement without introducing global serialisation
* in the CIL commit fast path, and it's not at all clear that we actually need
* such hard limits given the ~7 years we've run without a hard limit before
* finding the first situation where a checkpoint size overflow actually
* occurred. Hence the simple throttle, and an ASSERT check to tell us that
* we've overrun the max size.
*/ */
#define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3) #define XLOG_CIL_SPACE_LIMIT(log) \
min_t(int, (log)->l_logsize >> 3, BBTOB(XLOG_TOTAL_REC_SHIFT(log)) << 4)
#define XLOG_CIL_BLOCKING_SPACE_LIMIT(log) \
(XLOG_CIL_SPACE_LIMIT(log) * 2)
/* /*
* ticket grant locks, queues and accounting have their own cachlines * ticket grant locks, queues and accounting have their own cachlines
...@@ -439,14 +478,14 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes) ...@@ -439,14 +478,14 @@ xlog_write_adv_cnt(void **ptr, int *len, int *off, size_t bytes)
void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket);
void xlog_print_trans(struct xfs_trans *); void xlog_print_trans(struct xfs_trans *);
int int xlog_write(struct xlog *log, struct xfs_log_vec *log_vector,
xlog_write( struct xlog_ticket *tic, xfs_lsn_t *start_lsn,
struct xlog *log, struct xlog_in_core **commit_iclog, uint flags,
struct xfs_log_vec *log_vector, bool need_start_rec);
struct xlog_ticket *tic, int xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket,
xfs_lsn_t *start_lsn, struct xlog_in_core **iclog, xfs_lsn_t *lsn);
struct xlog_in_core **commit_iclog, void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket);
uint flags); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket);
/* /*
* When we crack an atomic LSN, we sample it first so that the value will not * When we crack an atomic LSN, we sample it first so that the value will not
......
...@@ -167,6 +167,7 @@ typedef struct xfs_mount { ...@@ -167,6 +167,7 @@ typedef struct xfs_mount {
struct xfs_kobj m_error_meta_kobj; struct xfs_kobj m_error_meta_kobj;
struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX]; struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
struct xstats m_stats; /* per-fs stats */ struct xstats m_stats; /* per-fs stats */
struct ratelimit_state m_flush_inodes_ratelimit;
struct workqueue_struct *m_buf_workqueue; struct workqueue_struct *m_buf_workqueue;
struct workqueue_struct *m_unwritten_workqueue; struct workqueue_struct *m_unwritten_workqueue;
......
...@@ -121,12 +121,11 @@ xfs_qm_dqpurge( ...@@ -121,12 +121,11 @@ xfs_qm_dqpurge(
{ {
struct xfs_mount *mp = dqp->q_mount; struct xfs_mount *mp = dqp->q_mount;
struct xfs_quotainfo *qi = mp->m_quotainfo; struct xfs_quotainfo *qi = mp->m_quotainfo;
int error = -EAGAIN;
xfs_dqlock(dqp); xfs_dqlock(dqp);
if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0)
xfs_dqunlock(dqp); goto out_unlock;
return -EAGAIN;
}
dqp->dq_flags |= XFS_DQ_FREEING; dqp->dq_flags |= XFS_DQ_FREEING;
...@@ -139,7 +138,6 @@ xfs_qm_dqpurge( ...@@ -139,7 +138,6 @@ xfs_qm_dqpurge(
*/ */
if (XFS_DQ_IS_DIRTY(dqp)) { if (XFS_DQ_IS_DIRTY(dqp)) {
struct xfs_buf *bp = NULL; struct xfs_buf *bp = NULL;
int error;
/* /*
* We don't care about getting disk errors here. We need * We don't care about getting disk errors here. We need
...@@ -149,6 +147,8 @@ xfs_qm_dqpurge( ...@@ -149,6 +147,8 @@ xfs_qm_dqpurge(
if (!error) { if (!error) {
error = xfs_bwrite(bp); error = xfs_bwrite(bp);
xfs_buf_relse(bp); xfs_buf_relse(bp);
} else if (error == -EAGAIN) {
goto out_unlock;
} }
xfs_dqflock(dqp); xfs_dqflock(dqp);
} }
...@@ -174,6 +174,10 @@ xfs_qm_dqpurge( ...@@ -174,6 +174,10 @@ xfs_qm_dqpurge(
xfs_qm_dqdestroy(dqp); xfs_qm_dqdestroy(dqp);
return 0; return 0;
out_unlock:
xfs_dqunlock(dqp);
return error;
} }
/* /*
......
...@@ -528,6 +528,9 @@ xfs_flush_inodes( ...@@ -528,6 +528,9 @@ xfs_flush_inodes(
{ {
struct super_block *sb = mp->m_super; struct super_block *sb = mp->m_super;
if (!__ratelimit(&mp->m_flush_inodes_ratelimit))
return;
if (down_read_trylock(&sb->s_umount)) { if (down_read_trylock(&sb->s_umount)) {
sync_inodes_sb(sb); sync_inodes_sb(sb);
up_read(&sb->s_umount); up_read(&sb->s_umount);
...@@ -1366,6 +1369,17 @@ xfs_fc_fill_super( ...@@ -1366,6 +1369,17 @@ xfs_fc_fill_super(
if (error) if (error)
goto out_free_names; goto out_free_names;
/*
* Cap the number of invocations of xfs_flush_inodes to 16 for every
* quarter of a second. The magic numbers here were determined by
* observation neither to cause stalls in writeback when there are a
* lot of IO threads and the fs is near ENOSPC, nor cause any fstest
* regressions. YMMV.
*/
ratelimit_state_init(&mp->m_flush_inodes_ratelimit, HZ / 4, 16);
ratelimit_set_flags(&mp->m_flush_inodes_ratelimit,
RATELIMIT_MSG_ON_RELEASE);
error = xfs_init_mount_workqueues(mp); error = xfs_init_mount_workqueues(mp);
if (error) if (error)
goto out_close_devices; goto out_close_devices;
...@@ -1861,7 +1875,8 @@ xfs_init_zones(void) ...@@ -1861,7 +1875,8 @@ xfs_init_zones(void)
xfs_ili_zone = kmem_cache_create("xfs_ili", xfs_ili_zone = kmem_cache_create("xfs_ili",
sizeof(struct xfs_inode_log_item), 0, sizeof(struct xfs_inode_log_item), 0,
SLAB_MEM_SPREAD, NULL); SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
NULL);
if (!xfs_ili_zone) if (!xfs_ili_zone)
goto out_destroy_inode_zone; goto out_destroy_inode_zone;
......
...@@ -176,7 +176,6 @@ xfs_symlink( ...@@ -176,7 +176,6 @@ xfs_symlink(
return -ENAMETOOLONG; return -ENAMETOOLONG;
ASSERT(pathlen > 0); ASSERT(pathlen > 0);
udqp = gdqp = NULL;
prid = xfs_get_initial_prid(dp); prid = xfs_get_initial_prid(dp);
/* /*
......
...@@ -1001,8 +1001,6 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class, ...@@ -1001,8 +1001,6 @@ DECLARE_EVENT_CLASS(xfs_loggrant_class,
DEFINE_EVENT(xfs_loggrant_class, name, \ DEFINE_EVENT(xfs_loggrant_class, name, \
TP_PROTO(struct xlog *log, struct xlog_ticket *tic), \ TP_PROTO(struct xlog *log, struct xlog_ticket *tic), \
TP_ARGS(log, tic)) TP_ARGS(log, tic))
DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
DEFINE_LOGGRANT_EVENT(xfs_log_umount_write); DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep); DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep);
DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake); DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake);
...@@ -1011,12 +1009,13 @@ DEFINE_LOGGRANT_EVENT(xfs_log_reserve); ...@@ -1011,12 +1009,13 @@ DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
DEFINE_LOGGRANT_EVENT(xfs_log_reserve_exit); DEFINE_LOGGRANT_EVENT(xfs_log_reserve_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant); DEFINE_LOGGRANT_EVENT(xfs_log_regrant);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_exit); DEFINE_LOGGRANT_EVENT(xfs_log_regrant_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_regrant);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_regrant_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_regrant_sub);
DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant);
DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_sub);
DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub); DEFINE_LOGGRANT_EVENT(xfs_log_ticket_ungrant_exit);
DEFINE_LOGGRANT_EVENT(xfs_log_cil_wait);
DECLARE_EVENT_CLASS(xfs_log_item_class, DECLARE_EVENT_CLASS(xfs_log_item_class,
TP_PROTO(struct xfs_log_item *lip), TP_PROTO(struct xfs_log_item *lip),
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "xfs_shared.h" #include "xfs_shared.h"
#include "xfs_format.h" #include "xfs_format.h"
#include "xfs_log_format.h" #include "xfs_log_format.h"
#include "xfs_log_priv.h"
#include "xfs_trans_resv.h" #include "xfs_trans_resv.h"
#include "xfs_mount.h" #include "xfs_mount.h"
#include "xfs_extent_busy.h" #include "xfs_extent_busy.h"
...@@ -150,6 +151,7 @@ xfs_trans_reserve( ...@@ -150,6 +151,7 @@ xfs_trans_reserve(
uint blocks, uint blocks,
uint rtextents) uint rtextents)
{ {
struct xfs_mount *mp = tp->t_mountp;
int error = 0; int error = 0;
bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
...@@ -162,7 +164,7 @@ xfs_trans_reserve( ...@@ -162,7 +164,7 @@ xfs_trans_reserve(
* fail if the count would go below zero. * fail if the count would go below zero.
*/ */
if (blocks > 0) { if (blocks > 0) {
error = xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd); error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd);
if (error != 0) { if (error != 0) {
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
return -ENOSPC; return -ENOSPC;
...@@ -191,9 +193,9 @@ xfs_trans_reserve( ...@@ -191,9 +193,9 @@ xfs_trans_reserve(
if (tp->t_ticket != NULL) { if (tp->t_ticket != NULL) {
ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES); ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES);
error = xfs_log_regrant(tp->t_mountp, tp->t_ticket); error = xfs_log_regrant(mp, tp->t_ticket);
} else { } else {
error = xfs_log_reserve(tp->t_mountp, error = xfs_log_reserve(mp,
resp->tr_logres, resp->tr_logres,
resp->tr_logcount, resp->tr_logcount,
&tp->t_ticket, XFS_TRANSACTION, &tp->t_ticket, XFS_TRANSACTION,
...@@ -213,7 +215,7 @@ xfs_trans_reserve( ...@@ -213,7 +215,7 @@ xfs_trans_reserve(
* fail if the count would go below zero. * fail if the count would go below zero.
*/ */
if (rtextents > 0) { if (rtextents > 0) {
error = xfs_mod_frextents(tp->t_mountp, -((int64_t)rtextents)); error = xfs_mod_frextents(mp, -((int64_t)rtextents));
if (error) { if (error) {
error = -ENOSPC; error = -ENOSPC;
goto undo_log; goto undo_log;
...@@ -229,7 +231,7 @@ xfs_trans_reserve( ...@@ -229,7 +231,7 @@ xfs_trans_reserve(
*/ */
undo_log: undo_log:
if (resp->tr_logres > 0) { if (resp->tr_logres > 0) {
xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, false); xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
tp->t_ticket = NULL; tp->t_ticket = NULL;
tp->t_log_res = 0; tp->t_log_res = 0;
tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES; tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES;
...@@ -237,7 +239,7 @@ xfs_trans_reserve( ...@@ -237,7 +239,7 @@ xfs_trans_reserve(
undo_blocks: undo_blocks:
if (blocks > 0) { if (blocks > 0) {
xfs_mod_fdblocks(tp->t_mountp, (int64_t)blocks, rsvd); xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd);
tp->t_blk_res = 0; tp->t_blk_res = 0;
} }
...@@ -1004,9 +1006,10 @@ __xfs_trans_commit( ...@@ -1004,9 +1006,10 @@ __xfs_trans_commit(
*/ */
xfs_trans_unreserve_and_mod_dquots(tp); xfs_trans_unreserve_and_mod_dquots(tp);
if (tp->t_ticket) { if (tp->t_ticket) {
commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, regrant); if (regrant && !XLOG_FORCED_SHUTDOWN(mp->m_log))
if (commit_lsn == -1 && !error) xfs_log_ticket_regrant(mp->m_log, tp->t_ticket);
error = -EIO; else
xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
tp->t_ticket = NULL; tp->t_ticket = NULL;
} }
current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
...@@ -1065,7 +1068,7 @@ xfs_trans_cancel( ...@@ -1065,7 +1068,7 @@ xfs_trans_cancel(
xfs_trans_unreserve_and_mod_dquots(tp); xfs_trans_unreserve_and_mod_dquots(tp);
if (tp->t_ticket) { if (tp->t_ticket) {
xfs_log_done(mp, tp->t_ticket, NULL, false); xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
tp->t_ticket = NULL; tp->t_ticket = NULL;
} }
......
...@@ -109,17 +109,25 @@ xfs_ail_next( ...@@ -109,17 +109,25 @@ xfs_ail_next(
* We need the AIL lock in order to get a coherent read of the lsn of the last * We need the AIL lock in order to get a coherent read of the lsn of the last
* item in the AIL. * item in the AIL.
*/ */
static xfs_lsn_t
__xfs_ail_min_lsn(
struct xfs_ail *ailp)
{
struct xfs_log_item *lip = xfs_ail_min(ailp);
if (lip)
return lip->li_lsn;
return 0;
}
xfs_lsn_t xfs_lsn_t
xfs_ail_min_lsn( xfs_ail_min_lsn(
struct xfs_ail *ailp) struct xfs_ail *ailp)
{ {
xfs_lsn_t lsn = 0; xfs_lsn_t lsn;
struct xfs_log_item *lip;
spin_lock(&ailp->ail_lock); spin_lock(&ailp->ail_lock);
lip = xfs_ail_min(ailp); lsn = __xfs_ail_min_lsn(ailp);
if (lip)
lsn = lip->li_lsn;
spin_unlock(&ailp->ail_lock); spin_unlock(&ailp->ail_lock);
return lsn; return lsn;
...@@ -681,6 +689,28 @@ xfs_ail_push_all_sync( ...@@ -681,6 +689,28 @@ xfs_ail_push_all_sync(
finish_wait(&ailp->ail_empty, &wait); finish_wait(&ailp->ail_empty, &wait);
} }
void
xfs_ail_update_finish(
struct xfs_ail *ailp,
xfs_lsn_t old_lsn) __releases(ailp->ail_lock)
{
struct xfs_mount *mp = ailp->ail_mount;
/* if the tail lsn hasn't changed, don't do updates or wakeups. */
if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) {
spin_unlock(&ailp->ail_lock);
return;
}
if (!XFS_FORCED_SHUTDOWN(mp))
xlog_assign_tail_lsn_locked(mp);
if (list_empty(&ailp->ail_head))
wake_up_all(&ailp->ail_empty);
spin_unlock(&ailp->ail_lock);
xfs_log_space_wake(mp);
}
/* /*
* xfs_trans_ail_update - bulk AIL insertion operation. * xfs_trans_ail_update - bulk AIL insertion operation.
* *
...@@ -712,7 +742,7 @@ xfs_trans_ail_update_bulk( ...@@ -712,7 +742,7 @@ xfs_trans_ail_update_bulk(
xfs_lsn_t lsn) __releases(ailp->ail_lock) xfs_lsn_t lsn) __releases(ailp->ail_lock)
{ {
struct xfs_log_item *mlip; struct xfs_log_item *mlip;
int mlip_changed = 0; xfs_lsn_t tail_lsn = 0;
int i; int i;
LIST_HEAD(tmp); LIST_HEAD(tmp);
...@@ -727,9 +757,10 @@ xfs_trans_ail_update_bulk( ...@@ -727,9 +757,10 @@ xfs_trans_ail_update_bulk(
continue; continue;
trace_xfs_ail_move(lip, lip->li_lsn, lsn); trace_xfs_ail_move(lip, lip->li_lsn, lsn);
if (mlip == lip && !tail_lsn)
tail_lsn = lip->li_lsn;
xfs_ail_delete(ailp, lip); xfs_ail_delete(ailp, lip);
if (mlip == lip)
mlip_changed = 1;
} else { } else {
trace_xfs_ail_insert(lip, 0, lsn); trace_xfs_ail_insert(lip, 0, lsn);
} }
...@@ -740,23 +771,23 @@ xfs_trans_ail_update_bulk( ...@@ -740,23 +771,23 @@ xfs_trans_ail_update_bulk(
if (!list_empty(&tmp)) if (!list_empty(&tmp))
xfs_ail_splice(ailp, cur, &tmp, lsn); xfs_ail_splice(ailp, cur, &tmp, lsn);
if (mlip_changed) { xfs_ail_update_finish(ailp, tail_lsn);
if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
xlog_assign_tail_lsn_locked(ailp->ail_mount);
spin_unlock(&ailp->ail_lock);
xfs_log_space_wake(ailp->ail_mount);
} else {
spin_unlock(&ailp->ail_lock);
}
} }
bool /*
* Delete one log item from the AIL.
*
* If this item was at the tail of the AIL, return the LSN of the log item so
* that we can use it to check if the LSN of the tail of the log has moved
* when finishing up the AIL delete process in xfs_ail_update_finish().
*/
xfs_lsn_t
xfs_ail_delete_one( xfs_ail_delete_one(
struct xfs_ail *ailp, struct xfs_ail *ailp,
struct xfs_log_item *lip) struct xfs_log_item *lip)
{ {
struct xfs_log_item *mlip = xfs_ail_min(ailp); struct xfs_log_item *mlip = xfs_ail_min(ailp);
xfs_lsn_t lsn = lip->li_lsn;
trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn); trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
xfs_ail_delete(ailp, lip); xfs_ail_delete(ailp, lip);
...@@ -764,7 +795,9 @@ xfs_ail_delete_one( ...@@ -764,7 +795,9 @@ xfs_ail_delete_one(
clear_bit(XFS_LI_IN_AIL, &lip->li_flags); clear_bit(XFS_LI_IN_AIL, &lip->li_flags);
lip->li_lsn = 0; lip->li_lsn = 0;
return mlip == lip; if (mlip == lip)
return lsn;
return 0;
} }
/** /**
...@@ -792,10 +825,10 @@ void ...@@ -792,10 +825,10 @@ void
xfs_trans_ail_delete( xfs_trans_ail_delete(
struct xfs_ail *ailp, struct xfs_ail *ailp,
struct xfs_log_item *lip, struct xfs_log_item *lip,
int shutdown_type) __releases(ailp->ail_lock) int shutdown_type)
{ {
struct xfs_mount *mp = ailp->ail_mount; struct xfs_mount *mp = ailp->ail_mount;
bool mlip_changed; xfs_lsn_t tail_lsn;
if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) { if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
spin_unlock(&ailp->ail_lock); spin_unlock(&ailp->ail_lock);
...@@ -808,17 +841,8 @@ xfs_trans_ail_delete( ...@@ -808,17 +841,8 @@ xfs_trans_ail_delete(
return; return;
} }
mlip_changed = xfs_ail_delete_one(ailp, lip); tail_lsn = xfs_ail_delete_one(ailp, lip);
if (mlip_changed) { xfs_ail_update_finish(ailp, tail_lsn);
if (!XFS_FORCED_SHUTDOWN(mp))
xlog_assign_tail_lsn_locked(mp);
if (list_empty(&ailp->ail_head))
wake_up_all(&ailp->ail_empty);
}
spin_unlock(&ailp->ail_lock);
if (mlip_changed)
xfs_log_space_wake(ailp->ail_mount);
} }
int int
......
...@@ -91,9 +91,11 @@ xfs_trans_ail_update( ...@@ -91,9 +91,11 @@ xfs_trans_ail_update(
xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn); xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
} }
bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip); xfs_lsn_t xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
void xfs_ail_update_finish(struct xfs_ail *ailp, xfs_lsn_t old_lsn)
__releases(ailp->ail_lock);
void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip, void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip,
int shutdown_type) __releases(ailp->ail_lock); int shutdown_type);
static inline void static inline void
xfs_trans_ail_remove( xfs_trans_ail_remove(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment