Commit 871eae48 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch '2.6.36-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/xfsdev

* '2.6.36-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/xfsdev:
  xfs: do not discard page cache data on EAGAIN
  xfs: don't do memory allocation under the CIL context lock
  xfs: Reduce log force overhead for delayed logging
  xfs: dummy transactions should not dirty VFS state
  xfs: ensure f_ffree returned by statfs() is non-negative
  xfs: handle negative wbc->nr_to_write during sync writeback
  writeback: write_cache_pages doesn't terminate at nr_to_write <= 0
  xfs: fix untrusted inode number lookup
  xfs: ensure we mark all inodes in a freed cluster XFS_ISTALE
  xfs: unlock items before allowing the CIL to commit
parents 502adf57 b5420f23
......@@ -852,8 +852,8 @@ xfs_convert_page(
SetPageUptodate(page);
if (count) {
wbc->nr_to_write--;
if (wbc->nr_to_write <= 0)
if (--wbc->nr_to_write <= 0 &&
wbc->sync_mode == WB_SYNC_NONE)
done = 1;
}
xfs_start_page_writeback(page, !page_dirty, count);
......@@ -1068,7 +1068,7 @@ xfs_vm_writepage(
* by themselves.
*/
if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
goto out_fail;
goto redirty;
/*
* We need a transaction if there are delalloc or unwritten buffers
......@@ -1080,7 +1080,7 @@ xfs_vm_writepage(
*/
xfs_count_page_state(page, &delalloc, &unwritten);
if ((current->flags & PF_FSTRANS) && (delalloc || unwritten))
goto out_fail;
goto redirty;
/* Is this page beyond the end of the file? */
offset = i_size_read(inode);
......@@ -1245,12 +1245,15 @@ xfs_vm_writepage(
if (iohead)
xfs_cancel_ioend(iohead);
if (err == -EAGAIN)
goto redirty;
xfs_aops_discard_page(page);
ClearPageUptodate(page);
unlock_page(page);
return err;
out_fail:
redirty:
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
......
......@@ -1226,6 +1226,7 @@ xfs_fs_statfs(
struct xfs_inode *ip = XFS_I(dentry->d_inode);
__uint64_t fakeinos, id;
xfs_extlen_t lsize;
__int64_t ffree;
statp->f_type = XFS_SB_MAGIC;
statp->f_namelen = MAXNAMELEN - 1;
......@@ -1249,7 +1250,11 @@ xfs_fs_statfs(
statp->f_files = min_t(typeof(statp->f_files),
statp->f_files,
mp->m_maxicount);
statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
/* make sure statp->f_ffree does not underflow */
ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
statp->f_ffree = max_t(__int64_t, ffree, 0);
spin_unlock(&mp->m_sb_lock);
if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
......@@ -1402,7 +1407,7 @@ xfs_fs_freeze(
xfs_save_resvblks(mp);
xfs_quiesce_attr(mp);
return -xfs_fs_log_dummy(mp);
return -xfs_fs_log_dummy(mp, SYNC_WAIT);
}
STATIC int
......
......@@ -34,6 +34,7 @@
#include "xfs_inode_item.h"
#include "xfs_quota.h"
#include "xfs_trace.h"
#include "xfs_fsops.h"
#include <linux/kthread.h>
#include <linux/freezer.h>
......@@ -340,38 +341,6 @@ xfs_sync_attr(
XFS_ICI_NO_TAG, 0, NULL);
}
STATIC int
xfs_commit_dummy_trans(
struct xfs_mount *mp,
uint flags)
{
struct xfs_inode *ip = mp->m_rootip;
struct xfs_trans *tp;
int error;
/*
* Put a dummy transaction in the log to tell recovery
* that all others are OK.
*/
tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
if (error) {
xfs_trans_cancel(tp, 0);
return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
error = xfs_trans_commit(tp, 0);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
/* the log force ensures this transaction is pushed to disk */
xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
return error;
}
STATIC int
xfs_sync_fsdata(
struct xfs_mount *mp)
......@@ -432,7 +401,7 @@ xfs_quiesce_data(
/* mark the log as covered if needed */
if (xfs_log_need_covered(mp))
error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT);
error2 = xfs_fs_log_dummy(mp, SYNC_WAIT);
/* flush data-only devices */
if (mp->m_rtdev_targp)
......@@ -563,7 +532,7 @@ xfs_flush_inodes(
/*
* Every sync period we need to unpin all items, reclaim inodes and sync
* disk quotas. We might need to cover the log to indicate that the
* filesystem is idle.
* filesystem is idle and not frozen.
*/
STATIC void
xfs_sync_worker(
......@@ -577,8 +546,9 @@ xfs_sync_worker(
xfs_reclaim_inodes(mp, 0);
/* dgc: errors ignored here */
error = xfs_qm_sync(mp, SYNC_TRYLOCK);
if (xfs_log_need_covered(mp))
error = xfs_commit_dummy_trans(mp, 0);
if (mp->m_super->s_frozen == SB_UNFROZEN &&
xfs_log_need_covered(mp))
error = xfs_fs_log_dummy(mp, 0);
}
mp->m_sync_seq++;
wake_up(&mp->m_wait_single_sync_task);
......
......@@ -604,31 +604,36 @@ xfs_reserve_blocks(
return 0;
}
/*
* Dump a transaction into the log that contains no real change. This is needed
* to be able to make the log dirty or stamp the current tail LSN into the log
* during the covering operation.
*
* We cannot use an inode here for this - that will push dirty state back up
* into the VFS and then periodic inode flushing will prevent log covering from
* making progress. Hence we log a field in the superblock instead.
*/
int
xfs_fs_log_dummy(
xfs_mount_t *mp)
xfs_mount_t *mp,
int flags)
{
xfs_trans_t *tp;
xfs_inode_t *ip;
int error;
tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
XFS_DEFAULT_LOG_COUNT);
if (error) {
xfs_trans_cancel(tp, 0);
return error;
}
ip = mp->m_rootip;
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
/* log the UUID because it is an unchanging field */
xfs_mod_sb(tp, XFS_SB_UUID);
if (flags & SYNC_WAIT)
xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, 0);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
return xfs_trans_commit(tp, 0);
}
int
......
......@@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
xfs_fsop_resblks_t *outval);
extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
extern int xfs_fs_log_dummy(xfs_mount_t *mp);
extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags);
#endif /* __XFS_FSOPS_H__ */
......@@ -1213,7 +1213,6 @@ xfs_imap_lookup(
struct xfs_inobt_rec_incore rec;
struct xfs_btree_cur *cur;
struct xfs_buf *agbp;
xfs_agino_t startino;
int error;
int i;
......@@ -1227,13 +1226,13 @@ xfs_imap_lookup(
}
/*
* derive and lookup the exact inode record for the given agino. If the
* record cannot be found, then it's an invalid inode number and we
* should abort.
* Lookup the inode record for the given agino. If the record cannot be
* found, then it's an invalid inode number and we should abort. Once
* we have a record, we need to ensure it contains the inode number
* we are looking up.
*/
cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
startino = agino & ~(XFS_IALLOC_INODES(mp) - 1);
error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
if (!error) {
if (i)
error = xfs_inobt_get_rec(cur, &rec, &i);
......@@ -1246,6 +1245,11 @@ xfs_imap_lookup(
if (error)
return error;
/* check that the returned record contains the required inode */
if (rec.ir_startino > agino ||
rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino)
return EINVAL;
/* for untrusted inodes check it is allocated first */
if ((flags & XFS_IGET_UNTRUSTED) &&
(rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
......
......@@ -1914,6 +1914,11 @@ xfs_iunlink_remove(
return 0;
}
/*
* A big issue when freeing the inode cluster is is that we _cannot_ skip any
* inodes that are in memory - they all must be marked stale and attached to
* the cluster buffer.
*/
STATIC void
xfs_ifree_cluster(
xfs_inode_t *free_ip,
......@@ -1945,8 +1950,6 @@ xfs_ifree_cluster(
}
for (j = 0; j < nbufs; j++, inum += ninodes) {
int found = 0;
blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
XFS_INO_TO_AGBNO(mp, inum));
......@@ -1965,7 +1968,9 @@ xfs_ifree_cluster(
/*
* Walk the inodes already attached to the buffer and mark them
* stale. These will all have the flush locks held, so an
* in-memory inode walk can't lock them.
* in-memory inode walk can't lock them. By marking them all
* stale first, we will not attempt to lock them in the loop
* below as the XFS_ISTALE flag will be set.
*/
lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
while (lip) {
......@@ -1977,11 +1982,11 @@ xfs_ifree_cluster(
&iip->ili_flush_lsn,
&iip->ili_item.li_lsn);
xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
found++;
}
lip = lip->li_bio_list;
}
/*
* For each inode in memory attempt to add it to the inode
* buffer and set it up for being staled on buffer IO
......@@ -1993,6 +1998,7 @@ xfs_ifree_cluster(
* even trying to lock them.
*/
for (i = 0; i < ninodes; i++) {
retry:
read_lock(&pag->pag_ici_lock);
ip = radix_tree_lookup(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, (inum + i)));
......@@ -2003,38 +2009,36 @@ xfs_ifree_cluster(
continue;
}
/* don't try to lock/unlock the current inode */
/*
* Don't try to lock/unlock the current inode, but we
* _cannot_ skip the other inodes that we did not find
* in the list attached to the buffer and are not
* already marked stale. If we can't lock it, back off
* and retry.
*/
if (ip != free_ip &&
!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
read_unlock(&pag->pag_ici_lock);
continue;
delay(1);
goto retry;
}
read_unlock(&pag->pag_ici_lock);
if (!xfs_iflock_nowait(ip)) {
if (ip != free_ip)
xfs_iunlock(ip, XFS_ILOCK_EXCL);
continue;
}
xfs_iflock(ip);
xfs_iflags_set(ip, XFS_ISTALE);
if (xfs_inode_clean(ip)) {
ASSERT(ip != free_ip);
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
continue;
}
/*
* we don't need to attach clean inodes or those only
* with unlogged changes (which we throw away, anyway).
*/
iip = ip->i_itemp;
if (!iip) {
/* inode with unlogged changes only */
if (!iip || xfs_inode_clean(ip)) {
ASSERT(ip != free_ip);
ip->i_update_core = 0;
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
continue;
}
found++;
iip->ili_last_fields = iip->ili_format.ilf_fields;
iip->ili_format.ilf_fields = 0;
......@@ -2049,7 +2053,6 @@ xfs_ifree_cluster(
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
if (found)
xfs_trans_stale_inode_buf(tp, bp);
xfs_trans_binval(tp, bp);
}
......
......@@ -3015,7 +3015,8 @@ _xfs_log_force(
XFS_STATS_INC(xs_log_force);
xlog_cil_push(log, 1);
if (log->l_cilp)
xlog_cil_force(log);
spin_lock(&log->l_icloglock);
......@@ -3167,7 +3168,7 @@ _xfs_log_force_lsn(
XFS_STATS_INC(xs_log_force);
if (log->l_cilp) {
lsn = xlog_cil_push_lsn(log, lsn);
lsn = xlog_cil_force_lsn(log, lsn);
if (lsn == NULLCOMMITLSN)
return 0;
}
......@@ -3724,7 +3725,7 @@ xfs_log_force_umount(
* call below.
*/
if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG))
xlog_cil_push(log, 1);
xlog_cil_force(log);
/*
* We must hold both the GRANT lock and the LOG lock,
......
This diff is collapsed.
......@@ -422,6 +422,7 @@ struct xfs_cil {
struct rw_semaphore xc_ctx_lock;
struct list_head xc_committing;
sv_t xc_commit_wait;
xfs_lsn_t xc_current_sequence;
};
/*
......@@ -562,8 +563,16 @@ int xlog_cil_init(struct log *log);
void xlog_cil_init_post_recovery(struct log *log);
void xlog_cil_destroy(struct log *log);
int xlog_cil_push(struct log *log, int push_now);
xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence);
/*
* CIL force routines
*/
xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence);
static inline void
xlog_cil_force(struct log *log)
{
xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
}
/*
* Unmount record type is used as a pseudo transaction type for the ticket.
......
......@@ -1167,7 +1167,7 @@ xfs_trans_del_item(
* Unlock all of the items of a transaction and free all the descriptors
* of that transaction.
*/
STATIC void
void
xfs_trans_free_items(
struct xfs_trans *tp,
xfs_lsn_t commit_lsn,
......@@ -1653,9 +1653,6 @@ xfs_trans_commit_cil(
return error;
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
/* xfs_trans_free_items() unlocks them first */
xfs_trans_free_items(tp, *commit_lsn, 0);
xfs_trans_free(tp);
return 0;
}
......
......@@ -25,7 +25,8 @@ struct xfs_trans;
void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
void xfs_trans_del_item(struct xfs_log_item *);
void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
int flags);
void xfs_trans_item_committed(struct xfs_log_item *lip,
xfs_lsn_t commit_lsn, int aborted);
void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
......
......@@ -985,24 +985,18 @@ int write_cache_pages(struct address_space *mapping,
}
}
if (wbc->nr_to_write > 0) {
if (--wbc->nr_to_write == 0 &&
wbc->sync_mode == WB_SYNC_NONE) {
/*
* We stop writing back only if we are
* not doing integrity sync. In case of
* integrity sync we have to keep going
* because someone may be concurrently
* dirtying pages, and we might have
* synced a lot of newly appeared dirty
* pages, but have not synced all of the
* old dirty pages.
* We stop writing back only if we are not doing
* integrity sync. In case of integrity sync we have to
* keep going until we have written all the pages
* we tagged for writeback prior to entering this loop.
*/
if (--wbc->nr_to_write <= 0 &&
wbc->sync_mode == WB_SYNC_NONE) {
done = 1;
break;
}
}
}
pagevec_release(&pvec);
cond_resched();
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment