Commit a78feb7c authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6

* 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6:
  [XFS] Avoid replaying inode buffer initialisation log items if on-disk version is newer.
  [XFS] Ensure file size updates have been completed before writing inode to disk.
  [XFS] On-demand reaping of the MRU cache
parents 91fe7d7c b394e43e
...@@ -181,6 +181,7 @@ xfs_setfilesize( ...@@ -181,6 +181,7 @@ xfs_setfilesize(
ip->i_d.di_size = isize; ip->i_d.di_size = isize;
ip->i_update_core = 1; ip->i_update_core = 1;
ip->i_update_size = 1; ip->i_update_size = 1;
mark_inode_dirty_sync(vn_to_inode(ioend->io_vnode));
} }
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
......
...@@ -415,8 +415,10 @@ xfs_fs_write_inode( ...@@ -415,8 +415,10 @@ xfs_fs_write_inode(
if (vp) { if (vp) {
vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address);
if (sync) if (sync) {
filemap_fdatawait(inode->i_mapping);
flags |= FLUSH_SYNC; flags |= FLUSH_SYNC;
}
error = bhv_vop_iflush(vp, flags); error = bhv_vop_iflush(vp, flags);
if (error == EAGAIN) if (error == EAGAIN)
error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0; error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0;
......
...@@ -52,6 +52,11 @@ typedef struct xfs_buf_log_format_t { ...@@ -52,6 +52,11 @@ typedef struct xfs_buf_log_format_t {
#define XFS_BLI_UDQUOT_BUF 0x4 #define XFS_BLI_UDQUOT_BUF 0x4
#define XFS_BLI_PDQUOT_BUF 0x8 #define XFS_BLI_PDQUOT_BUF 0x8
#define XFS_BLI_GDQUOT_BUF 0x10 #define XFS_BLI_GDQUOT_BUF 0x10
/*
* This flag indicates that the buffer contains newly allocated
* inodes.
*/
#define XFS_BLI_INODE_NEW_BUF 0x20
#define XFS_BLI_CHUNK 128 #define XFS_BLI_CHUNK 128
#define XFS_BLI_SHIFT 7 #define XFS_BLI_SHIFT 7
......
...@@ -467,8 +467,7 @@ void ...@@ -467,8 +467,7 @@ void
xfs_filestream_flush( xfs_filestream_flush(
xfs_mount_t *mp) xfs_mount_t *mp)
{ {
/* point in time flush, so keep the reaper running */ xfs_mru_cache_flush(mp->m_filestream);
xfs_mru_cache_flush(mp->m_filestream, 1);
} }
/* /*
......
...@@ -1874,6 +1874,7 @@ xlog_recover_do_inode_buffer( ...@@ -1874,6 +1874,7 @@ xlog_recover_do_inode_buffer(
/*ARGSUSED*/ /*ARGSUSED*/
STATIC void STATIC void
xlog_recover_do_reg_buffer( xlog_recover_do_reg_buffer(
xfs_mount_t *mp,
xlog_recover_item_t *item, xlog_recover_item_t *item,
xfs_buf_t *bp, xfs_buf_t *bp,
xfs_buf_log_format_t *buf_f) xfs_buf_log_format_t *buf_f)
...@@ -1884,6 +1885,50 @@ xlog_recover_do_reg_buffer( ...@@ -1884,6 +1885,50 @@ xlog_recover_do_reg_buffer(
unsigned int *data_map = NULL; unsigned int *data_map = NULL;
unsigned int map_size = 0; unsigned int map_size = 0;
int error; int error;
int stale_buf = 1;
/*
* Scan through the on-disk inode buffer and attempt to
* determine if it has been written to since it was logged.
*
* - If any of the magic numbers are incorrect then the buffer is stale
* - If any of the modes are non-zero then the buffer is not stale
* - If all of the modes are zero and at least one of the generation
* counts is non-zero then the buffer is stale
*
* If the end result is a stale buffer then the log buffer is replayed
* otherwise it is skipped.
*
* This heuristic is not perfect. It can be improved by scanning the
* entire inode chunk for evidence that any of the inode clusters have
* been updated. To fix this problem completely we will need a major
* architectural change to the logging system.
*/
if (buf_f->blf_flags & XFS_BLI_INODE_NEW_BUF) {
xfs_dinode_t *dip;
int inodes_per_buf;
int mode_count = 0;
int gen_count = 0;
stale_buf = 0;
inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
for (i = 0; i < inodes_per_buf; i++) {
dip = (xfs_dinode_t *)xfs_buf_offset(bp,
i * mp->m_sb.sb_inodesize);
if (be16_to_cpu(dip->di_core.di_magic) !=
XFS_DINODE_MAGIC) {
stale_buf = 1;
break;
}
if (be16_to_cpu(dip->di_core.di_mode))
mode_count++;
if (be16_to_cpu(dip->di_core.di_gen))
gen_count++;
}
if (!mode_count && gen_count)
stale_buf = 1;
}
switch (buf_f->blf_type) { switch (buf_f->blf_type) {
case XFS_LI_BUF: case XFS_LI_BUF:
...@@ -1917,7 +1962,7 @@ xlog_recover_do_reg_buffer( ...@@ -1917,7 +1962,7 @@ xlog_recover_do_reg_buffer(
-1, 0, XFS_QMOPT_DOWARN, -1, 0, XFS_QMOPT_DOWARN,
"dquot_buf_recover"); "dquot_buf_recover");
} }
if (!error) if (!error && stale_buf)
memcpy(xfs_buf_offset(bp, memcpy(xfs_buf_offset(bp,
(uint)bit << XFS_BLI_SHIFT), /* dest */ (uint)bit << XFS_BLI_SHIFT), /* dest */
item->ri_buf[i].i_addr, /* source */ item->ri_buf[i].i_addr, /* source */
...@@ -2089,7 +2134,7 @@ xlog_recover_do_dquot_buffer( ...@@ -2089,7 +2134,7 @@ xlog_recover_do_dquot_buffer(
if (log->l_quotaoffs_flag & type) if (log->l_quotaoffs_flag & type)
return; return;
xlog_recover_do_reg_buffer(item, bp, buf_f); xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
} }
/* /*
...@@ -2190,7 +2235,7 @@ xlog_recover_do_buffer_trans( ...@@ -2190,7 +2235,7 @@ xlog_recover_do_buffer_trans(
(XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) { (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f); xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
} else { } else {
xlog_recover_do_reg_buffer(item, bp, buf_f); xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
} }
if (error) if (error)
return XFS_ERROR(error); return XFS_ERROR(error);
......
...@@ -206,8 +206,11 @@ _xfs_mru_cache_list_insert( ...@@ -206,8 +206,11 @@ _xfs_mru_cache_list_insert(
*/ */
if (!_xfs_mru_cache_migrate(mru, now)) { if (!_xfs_mru_cache_migrate(mru, now)) {
mru->time_zero = now; mru->time_zero = now;
if (!mru->next_reap) if (!mru->queued) {
mru->next_reap = mru->grp_count * mru->grp_time; mru->queued = 1;
queue_delayed_work(xfs_mru_reap_wq, &mru->work,
mru->grp_count * mru->grp_time);
}
} else { } else {
grp = (now - mru->time_zero) / mru->grp_time; grp = (now - mru->time_zero) / mru->grp_time;
grp = (mru->lru_grp + grp) % mru->grp_count; grp = (mru->lru_grp + grp) % mru->grp_count;
...@@ -271,29 +274,26 @@ _xfs_mru_cache_reap( ...@@ -271,29 +274,26 @@ _xfs_mru_cache_reap(
struct work_struct *work) struct work_struct *work)
{ {
xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work); xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work);
unsigned long now; unsigned long now, next;
ASSERT(mru && mru->lists); ASSERT(mru && mru->lists);
if (!mru || !mru->lists) if (!mru || !mru->lists)
return; return;
mutex_spinlock(&mru->lock); mutex_spinlock(&mru->lock);
now = jiffies; next = _xfs_mru_cache_migrate(mru, jiffies);
if (mru->reap_all ||
(mru->next_reap && time_after(now, mru->next_reap))) {
if (mru->reap_all)
now += mru->grp_count * mru->grp_time * 2;
mru->next_reap = _xfs_mru_cache_migrate(mru, now);
_xfs_mru_cache_clear_reap_list(mru); _xfs_mru_cache_clear_reap_list(mru);
mru->queued = next;
if ((mru->queued > 0)) {
now = jiffies;
if (next <= now)
next = 0;
else
next -= now;
queue_delayed_work(xfs_mru_reap_wq, &mru->work, next);
} }
/*
* the process that triggered the reap_all is responsible
* for restating the periodic reap if it is required.
*/
if (!mru->reap_all)
queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
mru->reap_all = 0;
mutex_spinunlock(&mru->lock, 0); mutex_spinunlock(&mru->lock, 0);
} }
...@@ -352,7 +352,7 @@ xfs_mru_cache_create( ...@@ -352,7 +352,7 @@ xfs_mru_cache_create(
/* An extra list is needed to avoid reaping up to a grp_time early. */ /* An extra list is needed to avoid reaping up to a grp_time early. */
mru->grp_count = grp_count + 1; mru->grp_count = grp_count + 1;
mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
if (!mru->lists) { if (!mru->lists) {
err = ENOMEM; err = ENOMEM;
...@@ -374,11 +374,6 @@ xfs_mru_cache_create( ...@@ -374,11 +374,6 @@ xfs_mru_cache_create(
mru->grp_time = grp_time; mru->grp_time = grp_time;
mru->free_func = free_func; mru->free_func = free_func;
/* start up the reaper event */
mru->next_reap = 0;
mru->reap_all = 0;
queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
*mrup = mru; *mrup = mru;
exit: exit:
...@@ -394,35 +389,25 @@ xfs_mru_cache_create( ...@@ -394,35 +389,25 @@ xfs_mru_cache_create(
* Call xfs_mru_cache_flush() to flush out all cached entries, calling their * Call xfs_mru_cache_flush() to flush out all cached entries, calling their
* free functions as they're deleted. When this function returns, the caller is * free functions as they're deleted. When this function returns, the caller is
* guaranteed that all the free functions for all the elements have finished * guaranteed that all the free functions for all the elements have finished
* executing. * executing and the reaper is not running.
*
* While we are flushing, we stop the periodic reaper event from triggering.
* Normally, we want to restart this periodic event, but if we are shutting
* down the cache we do not want it restarted. hence the restart parameter
* where 0 = do not restart reaper and 1 = restart reaper.
*/ */
void void
xfs_mru_cache_flush( xfs_mru_cache_flush(
xfs_mru_cache_t *mru, xfs_mru_cache_t *mru)
int restart)
{ {
if (!mru || !mru->lists) if (!mru || !mru->lists)
return; return;
cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
mutex_spinlock(&mru->lock); mutex_spinlock(&mru->lock);
mru->reap_all = 1; if (mru->queued) {
mutex_spinunlock(&mru->lock, 0); mutex_spinunlock(&mru->lock, 0);
cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work);
mutex_spinlock(&mru->lock);
}
queue_work(xfs_mru_reap_wq, &mru->work.work); _xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time);
flush_workqueue(xfs_mru_reap_wq); _xfs_mru_cache_clear_reap_list(mru);
mutex_spinlock(&mru->lock);
WARN_ON_ONCE(mru->reap_all != 0);
mru->reap_all = 0;
if (restart)
queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time);
mutex_spinunlock(&mru->lock, 0); mutex_spinunlock(&mru->lock, 0);
} }
...@@ -433,8 +418,7 @@ xfs_mru_cache_destroy( ...@@ -433,8 +418,7 @@ xfs_mru_cache_destroy(
if (!mru || !mru->lists) if (!mru || !mru->lists)
return; return;
/* we don't want the reaper to restart here */ xfs_mru_cache_flush(mru);
xfs_mru_cache_flush(mru, 0);
kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists));
kmem_free(mru, sizeof(*mru)); kmem_free(mru, sizeof(*mru));
......
...@@ -32,11 +32,9 @@ typedef struct xfs_mru_cache ...@@ -32,11 +32,9 @@ typedef struct xfs_mru_cache
unsigned int grp_time; /* Time period spanned by grps. */ unsigned int grp_time; /* Time period spanned by grps. */
unsigned int lru_grp; /* Group containing time zero. */ unsigned int lru_grp; /* Group containing time zero. */
unsigned long time_zero; /* Time first element was added. */ unsigned long time_zero; /* Time first element was added. */
unsigned long next_reap; /* Time that the reaper should
next do something. */
unsigned int reap_all; /* if set, reap all lists */
xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */
struct delayed_work work; /* Workqueue data for reaping. */ struct delayed_work work; /* Workqueue data for reaping. */
unsigned int queued; /* work has been queued */
} xfs_mru_cache_t; } xfs_mru_cache_t;
int xfs_mru_cache_init(void); int xfs_mru_cache_init(void);
...@@ -44,7 +42,7 @@ void xfs_mru_cache_uninit(void); ...@@ -44,7 +42,7 @@ void xfs_mru_cache_uninit(void);
int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,
unsigned int grp_count, unsigned int grp_count,
xfs_mru_cache_free_func_t free_func); xfs_mru_cache_free_func_t free_func);
void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart); void xfs_mru_cache_flush(xfs_mru_cache_t *mru);
void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);
int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key,
void *value); void *value);
......
...@@ -966,6 +966,7 @@ xfs_trans_inode_alloc_buf( ...@@ -966,6 +966,7 @@ xfs_trans_inode_alloc_buf(
ASSERT(atomic_read(&bip->bli_refcount) > 0); ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
bip->bli_format.blf_flags |= XFS_BLI_INODE_NEW_BUF;
} }
......
...@@ -1082,6 +1082,9 @@ xfs_fsync( ...@@ -1082,6 +1082,9 @@ xfs_fsync(
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return XFS_ERROR(EIO); return XFS_ERROR(EIO);
if (flag & FSYNC_DATA)
filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping);
/* /*
* We always need to make sure that the required inode state * We always need to make sure that the required inode state
* is safe on disk. The vnode might be clean but because * is safe on disk. The vnode might be clean but because
...@@ -3769,12 +3772,16 @@ xfs_inode_flush( ...@@ -3769,12 +3772,16 @@ xfs_inode_flush(
sync_lsn = log->l_last_sync_lsn; sync_lsn = log->l_last_sync_lsn;
GRANT_UNLOCK(log, s); GRANT_UNLOCK(log, s);
if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0)) if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) {
return 0;
if (flags & FLUSH_SYNC) if (flags & FLUSH_SYNC)
log_flags |= XFS_LOG_SYNC; log_flags |= XFS_LOG_SYNC;
return xfs_log_force(mp, iip->ili_last_lsn, log_flags); error = xfs_log_force(mp, iip->ili_last_lsn, log_flags);
if (error)
return error;
}
if (ip->i_update_core == 0)
return 0;
} }
} }
...@@ -3788,9 +3795,6 @@ xfs_inode_flush( ...@@ -3788,9 +3795,6 @@ xfs_inode_flush(
if (flags & FLUSH_INODE) { if (flags & FLUSH_INODE) {
int flush_flags; int flush_flags;
if (xfs_ipincount(ip))
return EAGAIN;
if (flags & FLUSH_SYNC) { if (flags & FLUSH_SYNC) {
xfs_ilock(ip, XFS_ILOCK_SHARED); xfs_ilock(ip, XFS_ILOCK_SHARED);
xfs_iflock(ip); xfs_iflock(ip);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment