Commit f11c9c5c authored by Edward Shishkin's avatar Edward Shishkin Committed by Jens Axboe

vfs: improve writeback_inodes_wb()

Do not pin/unpin superblock for every inode in writeback_inodes_wb(), pin
it for the whole group of inodes which belong to the same superblock and
call writeback_sb_inodes() handler for them.
Signed-off-by: default avatarEdward Shishkin <edward.shishkin@gmail.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarJens Axboe <jens.axboe@oracle.com>
parent c12ec0a2
...@@ -553,108 +553,85 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) ...@@ -553,108 +553,85 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
return ret; return ret;
} }
static void unpin_sb_for_writeback(struct super_block **psb) static void unpin_sb_for_writeback(struct super_block *sb)
{ {
struct super_block *sb = *psb; up_read(&sb->s_umount);
put_super(sb);
if (sb) {
up_read(&sb->s_umount);
put_super(sb);
*psb = NULL;
}
} }
enum sb_pin_state {
SB_PINNED,
SB_NOT_PINNED,
SB_PIN_FAILED
};
/* /*
* For WB_SYNC_NONE writeback, the caller does not have the sb pinned * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
* before calling writeback. So make sure that we do pin it, so it doesn't * before calling writeback. So make sure that we do pin it, so it doesn't
* go away while we are writing inodes from it. * go away while we are writing inodes from it.
*
* Returns 0 if the super was successfully pinned (or pinning wasn't needed),
* 1 if we failed.
*/ */
static int pin_sb_for_writeback(struct writeback_control *wbc, static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
struct inode *inode, struct super_block **psb) struct super_block *sb)
{ {
struct super_block *sb = inode->i_sb;
/*
* If this sb is already pinned, nothing more to do. If not and
* *psb is non-NULL, unpin the old one first
*/
if (sb == *psb)
return 0;
else if (*psb)
unpin_sb_for_writeback(psb);
/* /*
* Caller must already hold the ref for this * Caller must already hold the ref for this
*/ */
if (wbc->sync_mode == WB_SYNC_ALL) { if (wbc->sync_mode == WB_SYNC_ALL) {
WARN_ON(!rwsem_is_locked(&sb->s_umount)); WARN_ON(!rwsem_is_locked(&sb->s_umount));
return 0; return SB_NOT_PINNED;
} }
spin_lock(&sb_lock); spin_lock(&sb_lock);
sb->s_count++; sb->s_count++;
if (down_read_trylock(&sb->s_umount)) { if (down_read_trylock(&sb->s_umount)) {
if (sb->s_root) { if (sb->s_root) {
spin_unlock(&sb_lock); spin_unlock(&sb_lock);
goto pinned; return SB_PINNED;
} }
/* /*
* umounted, drop rwsem again and fall through to failure * umounted, drop rwsem again and fall through to failure
*/ */
up_read(&sb->s_umount); up_read(&sb->s_umount);
} }
sb->s_count--; sb->s_count--;
spin_unlock(&sb_lock); spin_unlock(&sb_lock);
return 1; return SB_PIN_FAILED;
pinned:
*psb = sb;
return 0;
} }
static void writeback_inodes_wb(struct bdi_writeback *wb, /*
struct writeback_control *wbc) * Write a portion of b_io inodes which belong to @sb.
* If @wbc->sb != NULL, then find and write all such
* inodes. Otherwise write only ones which go sequentially
* in reverse order.
* Return 1, if the caller writeback routine should be
* interrupted. Otherwise return 0.
*/
static int writeback_sb_inodes(struct super_block *sb,
struct bdi_writeback *wb,
struct writeback_control *wbc)
{ {
struct super_block *sb = wbc->sb, *pin_sb = NULL;
const unsigned long start = jiffies; /* livelock avoidance */
spin_lock(&inode_lock);
if (!wbc->for_kupdate || list_empty(&wb->b_io))
queue_io(wb, wbc->older_than_this);
while (!list_empty(&wb->b_io)) { while (!list_empty(&wb->b_io)) {
struct inode *inode = list_entry(wb->b_io.prev,
struct inode, i_list);
long pages_skipped; long pages_skipped;
struct inode *inode = list_entry(wb->b_io.prev,
/* struct inode, i_list);
* super block given and doesn't match, skip this inode if (wbc->sb && sb != inode->i_sb) {
*/ /* super block given and doesn't
if (sb && sb != inode->i_sb) { match, skip this inode */
redirty_tail(inode); redirty_tail(inode);
continue; continue;
} }
if (sb != inode->i_sb)
/* finish with this superblock */
return 0;
if (inode->i_state & (I_NEW | I_WILL_FREE)) { if (inode->i_state & (I_NEW | I_WILL_FREE)) {
requeue_io(inode); requeue_io(inode);
continue; continue;
} }
/* /*
* Was this inode dirtied after sync_sb_inodes was called? * Was this inode dirtied after sync_sb_inodes was called?
* This keeps sync from extra jobs and livelock. * This keeps sync from extra jobs and livelock.
*/ */
if (inode_dirtied_after(inode, start)) if (inode_dirtied_after(inode, wbc->wb_start))
break; return 1;
if (pin_sb_for_writeback(wbc, inode, &pin_sb)) {
requeue_io(inode);
continue;
}
BUG_ON(inode->i_state & (I_FREEING | I_CLEAR)); BUG_ON(inode->i_state & (I_FREEING | I_CLEAR));
__iget(inode); __iget(inode);
...@@ -673,14 +650,50 @@ static void writeback_inodes_wb(struct bdi_writeback *wb, ...@@ -673,14 +650,50 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
spin_lock(&inode_lock); spin_lock(&inode_lock);
if (wbc->nr_to_write <= 0) { if (wbc->nr_to_write <= 0) {
wbc->more_io = 1; wbc->more_io = 1;
break; return 1;
} }
if (!list_empty(&wb->b_more_io)) if (!list_empty(&wb->b_more_io))
wbc->more_io = 1; wbc->more_io = 1;
} }
/* b_io is empty */
return 1;
}
static void writeback_inodes_wb(struct bdi_writeback *wb,
struct writeback_control *wbc)
{
int ret = 0;
unpin_sb_for_writeback(&pin_sb); wbc->wb_start = jiffies; /* livelock avoidance */
spin_lock(&inode_lock);
if (!wbc->for_kupdate || list_empty(&wb->b_io))
queue_io(wb, wbc->older_than_this);
while (!list_empty(&wb->b_io)) {
struct inode *inode = list_entry(wb->b_io.prev,
struct inode, i_list);
struct super_block *sb = inode->i_sb;
enum sb_pin_state state;
if (wbc->sb && sb != wbc->sb) {
/* super block given and doesn't
match, skip this inode */
redirty_tail(inode);
continue;
}
state = pin_sb_for_writeback(wbc, sb);
if (state == SB_PIN_FAILED) {
requeue_io(inode);
continue;
}
ret = writeback_sb_inodes(sb, wb, wbc);
if (state == SB_PINNED)
unpin_sb_for_writeback(sb);
if (ret)
break;
}
spin_unlock(&inode_lock); spin_unlock(&inode_lock);
/* Leave any unwritten inodes on b_io */ /* Leave any unwritten inodes on b_io */
} }
......
...@@ -34,6 +34,9 @@ struct writeback_control { ...@@ -34,6 +34,9 @@ struct writeback_control {
enum writeback_sync_modes sync_mode; enum writeback_sync_modes sync_mode;
unsigned long *older_than_this; /* If !NULL, only write back inodes unsigned long *older_than_this; /* If !NULL, only write back inodes
older than this */ older than this */
unsigned long wb_start; /* Time writeback_inodes_wb was
called. This is needed to avoid
extra jobs and livelock */
long nr_to_write; /* Write this many pages, and decrement long nr_to_write; /* Write this many pages, and decrement
this for each page written */ this for each page written */
long pages_skipped; /* Pages which were not written */ long pages_skipped; /* Pages which were not written */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment