Commit 03891159 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'lazytime' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull lazytime mount option support from Al Viro:
 "Lazytime stuff from tytso"

* 'lazytime' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  ext4: add optimization for the lazytime mount option
  vfs: add find_inode_nowait() function
  vfs: add support for a lazytime mount option
parents 66dc830d a26f4992
...@@ -4174,6 +4174,65 @@ static int ext4_inode_blocks_set(handle_t *handle, ...@@ -4174,6 +4174,65 @@ static int ext4_inode_blocks_set(handle_t *handle,
return 0; return 0;
} }
struct other_inode {
unsigned long orig_ino;
struct ext4_inode *raw_inode;
};
static int other_inode_match(struct inode * inode, unsigned long ino,
void *data)
{
struct other_inode *oi = (struct other_inode *) data;
if ((inode->i_ino != ino) ||
(inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
((inode->i_state & I_DIRTY_TIME) == 0))
return 0;
spin_lock(&inode->i_lock);
if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) &&
(inode->i_state & I_DIRTY_TIME)) {
struct ext4_inode_info *ei = EXT4_I(inode);
inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
spin_unlock(&inode->i_lock);
spin_lock(&ei->i_raw_lock);
EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode);
EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode);
EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode);
ext4_inode_csum_set(inode, oi->raw_inode, ei);
spin_unlock(&ei->i_raw_lock);
trace_ext4_other_inode_update_time(inode, oi->orig_ino);
return -1;
}
spin_unlock(&inode->i_lock);
return -1;
}
/*
* Opportunistically update the other time fields for other inodes in
* the same inode table block.
*/
static void ext4_update_other_inodes_time(struct super_block *sb,
unsigned long orig_ino, char *buf)
{
struct other_inode oi;
unsigned long ino;
int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
int inode_size = EXT4_INODE_SIZE(sb);
oi.orig_ino = orig_ino;
ino = orig_ino & ~(inodes_per_block - 1);
for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
if (ino == orig_ino)
continue;
oi.raw_inode = (struct ext4_inode *) buf;
(void) find_inode_nowait(sb, ino, other_inode_match, &oi);
}
}
/* /*
* Post the struct inode info into an on-disk inode location in the * Post the struct inode info into an on-disk inode location in the
* buffer-cache. This gobbles the caller's reference to the * buffer-cache. This gobbles the caller's reference to the
...@@ -4283,10 +4342,11 @@ static int ext4_do_update_inode(handle_t *handle, ...@@ -4283,10 +4342,11 @@ static int ext4_do_update_inode(handle_t *handle,
cpu_to_le16(ei->i_extra_isize); cpu_to_le16(ei->i_extra_isize);
} }
} }
ext4_inode_csum_set(inode, raw_inode, ei); ext4_inode_csum_set(inode, raw_inode, ei);
spin_unlock(&ei->i_raw_lock); spin_unlock(&ei->i_raw_lock);
if (inode->i_sb->s_flags & MS_LAZYTIME)
ext4_update_other_inodes_time(inode->i_sb, inode->i_ino,
bh->b_data);
BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
rc = ext4_handle_dirty_metadata(handle, NULL, bh); rc = ext4_handle_dirty_metadata(handle, NULL, bh);
...@@ -4875,11 +4935,17 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) ...@@ -4875,11 +4935,17 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
* If the inode is marked synchronous, we don't honour that here - doing * If the inode is marked synchronous, we don't honour that here - doing
* so would cause a commit on atime updates, which we don't bother doing. * so would cause a commit on atime updates, which we don't bother doing.
* We handle synchronous inodes at the highest possible level. * We handle synchronous inodes at the highest possible level.
*
* If only the I_DIRTY_TIME flag is set, we can skip everything. If
* I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need
* to copy into the on-disk inode structure are the timestamp files.
*/ */
void ext4_dirty_inode(struct inode *inode, int flags) void ext4_dirty_inode(struct inode *inode, int flags)
{ {
handle_t *handle; handle_t *handle;
if (flags == I_DIRTY_TIME)
return;
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle)) if (IS_ERR(handle))
goto out; goto out;
......
...@@ -1126,6 +1126,7 @@ enum { ...@@ -1126,6 +1126,7 @@ enum {
Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax, Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax,
Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
Opt_lazytime, Opt_nolazytime,
Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
Opt_inode_readahead_blks, Opt_journal_ioprio, Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock, Opt_dioread_nolock, Opt_dioread_lock,
...@@ -1190,6 +1191,8 @@ static const match_table_t tokens = { ...@@ -1190,6 +1191,8 @@ static const match_table_t tokens = {
{Opt_dax, "dax"}, {Opt_dax, "dax"},
{Opt_stripe, "stripe=%u"}, {Opt_stripe, "stripe=%u"},
{Opt_delalloc, "delalloc"}, {Opt_delalloc, "delalloc"},
{Opt_lazytime, "lazytime"},
{Opt_nolazytime, "nolazytime"},
{Opt_nodelalloc, "nodelalloc"}, {Opt_nodelalloc, "nodelalloc"},
{Opt_removed, "mblk_io_submit"}, {Opt_removed, "mblk_io_submit"},
{Opt_removed, "nomblk_io_submit"}, {Opt_removed, "nomblk_io_submit"},
...@@ -1448,6 +1451,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, ...@@ -1448,6 +1451,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
case Opt_i_version: case Opt_i_version:
sb->s_flags |= MS_I_VERSION; sb->s_flags |= MS_I_VERSION;
return 1; return 1;
case Opt_lazytime:
sb->s_flags |= MS_LAZYTIME;
return 1;
case Opt_nolazytime:
sb->s_flags &= ~MS_LAZYTIME;
return 1;
} }
for (m = ext4_mount_opts; m->token != Opt_err; m++) for (m = ext4_mount_opts; m->token != Opt_err; m++)
...@@ -5044,6 +5053,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) ...@@ -5044,6 +5053,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
} }
#endif #endif
*flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME);
ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
kfree(orig_data); kfree(orig_data);
return 0; return 0;
......
...@@ -253,14 +253,19 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) ...@@ -253,14 +253,19 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
return ret; return ret;
} }
#define EXPIRE_DIRTY_ATIME 0x0001
/* /*
* Move expired (dirtied before work->older_than_this) dirty inodes from * Move expired (dirtied before work->older_than_this) dirty inodes from
* @delaying_queue to @dispatch_queue. * @delaying_queue to @dispatch_queue.
*/ */
static int move_expired_inodes(struct list_head *delaying_queue, static int move_expired_inodes(struct list_head *delaying_queue,
struct list_head *dispatch_queue, struct list_head *dispatch_queue,
int flags,
struct wb_writeback_work *work) struct wb_writeback_work *work)
{ {
unsigned long *older_than_this = NULL;
unsigned long expire_time;
LIST_HEAD(tmp); LIST_HEAD(tmp);
struct list_head *pos, *node; struct list_head *pos, *node;
struct super_block *sb = NULL; struct super_block *sb = NULL;
...@@ -268,13 +273,21 @@ static int move_expired_inodes(struct list_head *delaying_queue, ...@@ -268,13 +273,21 @@ static int move_expired_inodes(struct list_head *delaying_queue,
int do_sb_sort = 0; int do_sb_sort = 0;
int moved = 0; int moved = 0;
if ((flags & EXPIRE_DIRTY_ATIME) == 0)
older_than_this = work->older_than_this;
else if ((work->reason == WB_REASON_SYNC) == 0) {
expire_time = jiffies - (HZ * 86400);
older_than_this = &expire_time;
}
while (!list_empty(delaying_queue)) { while (!list_empty(delaying_queue)) {
inode = wb_inode(delaying_queue->prev); inode = wb_inode(delaying_queue->prev);
if (work->older_than_this && if (older_than_this &&
inode_dirtied_after(inode, *work->older_than_this)) inode_dirtied_after(inode, *older_than_this))
break; break;
list_move(&inode->i_wb_list, &tmp); list_move(&inode->i_wb_list, &tmp);
moved++; moved++;
if (flags & EXPIRE_DIRTY_ATIME)
set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
if (sb_is_blkdev_sb(inode->i_sb)) if (sb_is_blkdev_sb(inode->i_sb))
continue; continue;
if (sb && sb != inode->i_sb) if (sb && sb != inode->i_sb)
...@@ -315,9 +328,12 @@ static int move_expired_inodes(struct list_head *delaying_queue, ...@@ -315,9 +328,12 @@ static int move_expired_inodes(struct list_head *delaying_queue,
static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work) static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
{ {
int moved; int moved;
assert_spin_locked(&wb->list_lock); assert_spin_locked(&wb->list_lock);
list_splice_init(&wb->b_more_io, &wb->b_io); list_splice_init(&wb->b_more_io, &wb->b_io);
moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work); moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work);
moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
EXPIRE_DIRTY_ATIME, work);
trace_writeback_queue_io(wb, work, moved); trace_writeback_queue_io(wb, work, moved);
} }
...@@ -441,6 +457,8 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, ...@@ -441,6 +457,8 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
* updates after data IO completion. * updates after data IO completion.
*/ */
redirty_tail(inode, wb); redirty_tail(inode, wb);
} else if (inode->i_state & I_DIRTY_TIME) {
list_move(&inode->i_wb_list, &wb->b_dirty_time);
} else { } else {
/* The inode is clean. Remove from writeback lists. */ /* The inode is clean. Remove from writeback lists. */
list_del_init(&inode->i_wb_list); list_del_init(&inode->i_wb_list);
...@@ -487,7 +505,13 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) ...@@ -487,7 +505,13 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
dirty = inode->i_state & I_DIRTY; dirty = inode->i_state & I_DIRTY;
inode->i_state &= ~I_DIRTY; if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) &&
(inode->i_state & I_DIRTY_TIME)) ||
(inode->i_state & I_DIRTY_TIME_EXPIRED)) {
dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
trace_writeback_lazytime(inode);
}
inode->i_state &= ~dirty;
/* /*
* Paired with smp_mb() in __mark_inode_dirty(). This allows * Paired with smp_mb() in __mark_inode_dirty(). This allows
...@@ -507,8 +531,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) ...@@ -507,8 +531,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
if (dirty & I_DIRTY_TIME)
mark_inode_dirty_sync(inode);
/* Don't write the inode if only I_DIRTY_PAGES was set */ /* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { if (dirty & ~I_DIRTY_PAGES) {
int err = write_inode(inode, wbc); int err = write_inode(inode, wbc);
if (ret == 0) if (ret == 0)
ret = err; ret = err;
...@@ -556,7 +582,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, ...@@ -556,7 +582,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
* make sure inode is on some writeback list and leave it there unless * make sure inode is on some writeback list and leave it there unless
* we have completely cleaned the inode. * we have completely cleaned the inode.
*/ */
if (!(inode->i_state & I_DIRTY) && if (!(inode->i_state & I_DIRTY_ALL) &&
(wbc->sync_mode != WB_SYNC_ALL || (wbc->sync_mode != WB_SYNC_ALL ||
!mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
goto out; goto out;
...@@ -571,7 +597,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, ...@@ -571,7 +597,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
* If inode is clean, remove it from writeback lists. Otherwise don't * If inode is clean, remove it from writeback lists. Otherwise don't
* touch it. See comment above for explanation. * touch it. See comment above for explanation.
*/ */
if (!(inode->i_state & I_DIRTY)) if (!(inode->i_state & I_DIRTY_ALL))
list_del_init(&inode->i_wb_list); list_del_init(&inode->i_wb_list);
spin_unlock(&wb->list_lock); spin_unlock(&wb->list_lock);
inode_sync_complete(inode); inode_sync_complete(inode);
...@@ -713,7 +739,7 @@ static long writeback_sb_inodes(struct super_block *sb, ...@@ -713,7 +739,7 @@ static long writeback_sb_inodes(struct super_block *sb,
wrote += write_chunk - wbc.nr_to_write; wrote += write_chunk - wbc.nr_to_write;
spin_lock(&wb->list_lock); spin_lock(&wb->list_lock);
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if (!(inode->i_state & I_DIRTY)) if (!(inode->i_state & I_DIRTY_ALL))
wrote++; wrote++;
requeue_inode(inode, wb, &wbc); requeue_inode(inode, wb, &wbc);
inode_sync_complete(inode); inode_sync_complete(inode);
...@@ -1151,16 +1177,20 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) ...@@ -1151,16 +1177,20 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
* page->mapping->host, so the page-dirtying time is recorded in the internal * page->mapping->host, so the page-dirtying time is recorded in the internal
* blockdev inode. * blockdev inode.
*/ */
#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
void __mark_inode_dirty(struct inode *inode, int flags) void __mark_inode_dirty(struct inode *inode, int flags)
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct backing_dev_info *bdi = NULL; struct backing_dev_info *bdi = NULL;
int dirtytime;
trace_writeback_mark_inode_dirty(inode, flags);
/* /*
* Don't do this for I_DIRTY_PAGES - that doesn't actually * Don't do this for I_DIRTY_PAGES - that doesn't actually
* dirty the inode itself * dirty the inode itself
*/ */
if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_TIME)) {
trace_writeback_dirty_inode_start(inode, flags); trace_writeback_dirty_inode_start(inode, flags);
if (sb->s_op->dirty_inode) if (sb->s_op->dirty_inode)
...@@ -1168,6 +1198,9 @@ void __mark_inode_dirty(struct inode *inode, int flags) ...@@ -1168,6 +1198,9 @@ void __mark_inode_dirty(struct inode *inode, int flags)
trace_writeback_dirty_inode(inode, flags); trace_writeback_dirty_inode(inode, flags);
} }
if (flags & I_DIRTY_INODE)
flags &= ~I_DIRTY_TIME;
dirtytime = flags & I_DIRTY_TIME;
/* /*
* Paired with smp_mb() in __writeback_single_inode() for the * Paired with smp_mb() in __writeback_single_inode() for the
...@@ -1175,16 +1208,21 @@ void __mark_inode_dirty(struct inode *inode, int flags) ...@@ -1175,16 +1208,21 @@ void __mark_inode_dirty(struct inode *inode, int flags)
*/ */
smp_mb(); smp_mb();
if ((inode->i_state & flags) == flags) if (((inode->i_state & flags) == flags) ||
(dirtytime && (inode->i_state & I_DIRTY_INODE)))
return; return;
if (unlikely(block_dump)) if (unlikely(block_dump))
block_dump___mark_inode_dirty(inode); block_dump___mark_inode_dirty(inode);
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if (dirtytime && (inode->i_state & I_DIRTY_INODE))
goto out_unlock_inode;
if ((inode->i_state & flags) != flags) { if ((inode->i_state & flags) != flags) {
const int was_dirty = inode->i_state & I_DIRTY; const int was_dirty = inode->i_state & I_DIRTY;
if (flags & I_DIRTY_INODE)
inode->i_state &= ~I_DIRTY_TIME;
inode->i_state |= flags; inode->i_state |= flags;
/* /*
...@@ -1231,8 +1269,10 @@ void __mark_inode_dirty(struct inode *inode, int flags) ...@@ -1231,8 +1269,10 @@ void __mark_inode_dirty(struct inode *inode, int flags)
} }
inode->dirtied_when = jiffies; inode->dirtied_when = jiffies;
list_move(&inode->i_wb_list, &bdi->wb.b_dirty); list_move(&inode->i_wb_list, dirtytime ?
&bdi->wb.b_dirty_time : &bdi->wb.b_dirty);
spin_unlock(&bdi->wb.list_lock); spin_unlock(&bdi->wb.list_lock);
trace_writeback_dirty_inode_enqueue(inode);
if (wakeup_bdi) if (wakeup_bdi)
bdi_wakeup_thread_delayed(bdi); bdi_wakeup_thread_delayed(bdi);
......
...@@ -654,7 +654,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, ...@@ -654,7 +654,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
{ {
struct address_space *mapping = file->f_mapping; struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
int sync_state = inode->i_state & I_DIRTY; int sync_state = inode->i_state & I_DIRTY_ALL;
struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_inode *ip = GFS2_I(inode);
int ret = 0, ret1 = 0; int ret = 0, ret1 = 0;
...@@ -667,7 +667,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, ...@@ -667,7 +667,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
if (!gfs2_is_jdata(ip)) if (!gfs2_is_jdata(ip))
sync_state &= ~I_DIRTY_PAGES; sync_state &= ~I_DIRTY_PAGES;
if (datasync) if (datasync)
sync_state &= ~I_DIRTY_SYNC; sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME);
if (sync_state) { if (sync_state) {
ret = sync_inode_metadata(inode, 1); ret = sync_inode_metadata(inode, 1);
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/buffer_head.h> /* for inode_has_buffers */ #include <linux/buffer_head.h> /* for inode_has_buffers */
#include <linux/ratelimit.h> #include <linux/ratelimit.h>
#include <linux/list_lru.h> #include <linux/list_lru.h>
#include <trace/events/writeback.h>
#include "internal.h" #include "internal.h"
/* /*
...@@ -30,7 +31,7 @@ ...@@ -30,7 +31,7 @@
* inode_sb_list_lock protects: * inode_sb_list_lock protects:
* sb->s_inodes, inode->i_sb_list * sb->s_inodes, inode->i_sb_list
* bdi->wb.list_lock protects: * bdi->wb.list_lock protects:
* bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list * bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_wb_list
* inode_hash_lock protects: * inode_hash_lock protects:
* inode_hashtable, inode->i_hash * inode_hashtable, inode->i_hash
* *
...@@ -403,7 +404,8 @@ static void inode_lru_list_add(struct inode *inode) ...@@ -403,7 +404,8 @@ static void inode_lru_list_add(struct inode *inode)
*/ */
void inode_add_lru(struct inode *inode) void inode_add_lru(struct inode *inode)
{ {
if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) && if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC |
I_FREEING | I_WILL_FREE)) &&
!atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE) !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE)
inode_lru_list_add(inode); inode_lru_list_add(inode);
} }
...@@ -634,7 +636,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) ...@@ -634,7 +636,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
continue; continue;
} }
if (inode->i_state & I_DIRTY && !kill_dirty) { if (inode->i_state & I_DIRTY_ALL && !kill_dirty) {
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
busy = 1; busy = 1;
continue; continue;
...@@ -1268,6 +1270,56 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino) ...@@ -1268,6 +1270,56 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
} }
EXPORT_SYMBOL(ilookup); EXPORT_SYMBOL(ilookup);
/**
* find_inode_nowait - find an inode in the inode cache
* @sb: super block of file system to search
* @hashval: hash value (usually inode number) to search for
* @match: callback used for comparisons between inodes
* @data: opaque data pointer to pass to @match
*
* Search for the inode specified by @hashval and @data in the inode
* cache, where the helper function @match will return 0 if the inode
* does not match, 1 if the inode does match, and -1 if the search
* should be stopped. The @match function must be responsible for
* taking the i_lock spin_lock and checking i_state for an inode being
* freed or being initialized, and incrementing the reference count
* before returning 1. It also must not sleep, since it is called with
* the inode_hash_lock spinlock held.
*
* This is a even more generalized version of ilookup5() when the
* function must never block --- find_inode() can block in
* __wait_on_freeing_inode() --- or when the caller can not increment
* the reference count because the resulting iput() might cause an
* inode eviction. The tradeoff is that the @match funtion must be
* very carefully implemented.
*/
struct inode *find_inode_nowait(struct super_block *sb,
unsigned long hashval,
int (*match)(struct inode *, unsigned long,
void *),
void *data)
{
struct hlist_head *head = inode_hashtable + hash(sb, hashval);
struct inode *inode, *ret_inode = NULL;
int mval;
spin_lock(&inode_hash_lock);
hlist_for_each_entry(inode, head, i_hash) {
if (inode->i_sb != sb)
continue;
mval = match(inode, hashval, data);
if (mval == 0)
continue;
if (mval == 1)
ret_inode = inode;
goto out;
}
out:
spin_unlock(&inode_hash_lock);
return ret_inode;
}
EXPORT_SYMBOL(find_inode_nowait);
int insert_inode_locked(struct inode *inode) int insert_inode_locked(struct inode *inode)
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
...@@ -1418,10 +1470,19 @@ static void iput_final(struct inode *inode) ...@@ -1418,10 +1470,19 @@ static void iput_final(struct inode *inode)
*/ */
void iput(struct inode *inode) void iput(struct inode *inode)
{ {
if (inode) { if (!inode)
return;
BUG_ON(inode->i_state & I_CLEAR); BUG_ON(inode->i_state & I_CLEAR);
retry:
if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
atomic_inc(&inode->i_count);
inode->i_state &= ~I_DIRTY_TIME;
spin_unlock(&inode->i_lock);
trace_writeback_lazytime_iput(inode);
mark_inode_dirty_sync(inode);
goto retry;
}
iput_final(inode); iput_final(inode);
} }
} }
...@@ -1481,14 +1542,9 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, ...@@ -1481,14 +1542,9 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
return 0; return 0;
} }
/* int generic_update_time(struct inode *inode, struct timespec *time, int flags)
* This does the actual work of updating an inodes time or version. Must have
* had called mnt_want_write() before calling this.
*/
static int update_time(struct inode *inode, struct timespec *time, int flags)
{ {
if (inode->i_op->update_time) int iflags = I_DIRTY_TIME;
return inode->i_op->update_time(inode, time, flags);
if (flags & S_ATIME) if (flags & S_ATIME)
inode->i_atime = *time; inode->i_atime = *time;
...@@ -1498,9 +1554,27 @@ static int update_time(struct inode *inode, struct timespec *time, int flags) ...@@ -1498,9 +1554,27 @@ static int update_time(struct inode *inode, struct timespec *time, int flags)
inode->i_ctime = *time; inode->i_ctime = *time;
if (flags & S_MTIME) if (flags & S_MTIME)
inode->i_mtime = *time; inode->i_mtime = *time;
mark_inode_dirty_sync(inode);
if (!(inode->i_sb->s_flags & MS_LAZYTIME) || (flags & S_VERSION))
iflags |= I_DIRTY_SYNC;
__mark_inode_dirty(inode, iflags);
return 0; return 0;
} }
EXPORT_SYMBOL(generic_update_time);
/*
* This does the actual work of updating an inodes time or version. Must have
* had called mnt_want_write() before calling this.
*/
static int update_time(struct inode *inode, struct timespec *time, int flags)
{
int (*update_time)(struct inode *, struct timespec *, int);
update_time = inode->i_op->update_time ? inode->i_op->update_time :
generic_update_time;
return update_time(inode, time, flags);
}
/** /**
* touch_atime - update the access time * touch_atime - update the access time
......
...@@ -39,7 +39,7 @@ int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) ...@@ -39,7 +39,7 @@ int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
return rc; return rc;
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
if (!(inode->i_state & I_DIRTY) || if (!(inode->i_state & I_DIRTY_ALL) ||
(datasync && !(inode->i_state & I_DIRTY_DATASYNC))) { (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) {
/* Make sure committed changes hit the disk */ /* Make sure committed changes hit the disk */
jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1); jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1);
......
...@@ -948,7 +948,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end, ...@@ -948,7 +948,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
ret = sync_mapping_buffers(inode->i_mapping); ret = sync_mapping_buffers(inode->i_mapping);
if (!(inode->i_state & I_DIRTY)) if (!(inode->i_state & I_DIRTY_ALL))
goto out; goto out;
if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
goto out; goto out;
......
...@@ -44,6 +44,7 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb) ...@@ -44,6 +44,7 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb)
{ MS_SYNCHRONOUS, ",sync" }, { MS_SYNCHRONOUS, ",sync" },
{ MS_DIRSYNC, ",dirsync" }, { MS_DIRSYNC, ",dirsync" },
{ MS_MANDLOCK, ",mand" }, { MS_MANDLOCK, ",mand" },
{ MS_LAZYTIME, ",lazytime" },
{ 0, NULL } { 0, NULL }
}; };
const struct proc_fs_info *fs_infop; const struct proc_fs_info *fs_infop;
......
...@@ -177,8 +177,16 @@ SYSCALL_DEFINE1(syncfs, int, fd) ...@@ -177,8 +177,16 @@ SYSCALL_DEFINE1(syncfs, int, fd)
*/ */
int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
{ {
struct inode *inode = file->f_mapping->host;
if (!file->f_op->fsync) if (!file->f_op->fsync)
return -EINVAL; return -EINVAL;
if (!datasync && (inode->i_state & I_DIRTY_TIME)) {
spin_lock(&inode->i_lock);
inode->i_state &= ~I_DIRTY_TIME;
spin_unlock(&inode->i_lock);
mark_inode_dirty_sync(inode);
}
return file->f_op->fsync(file, start, end, datasync); return file->f_op->fsync(file, start, end, datasync);
} }
EXPORT_SYMBOL(vfs_fsync_range); EXPORT_SYMBOL(vfs_fsync_range);
......
...@@ -55,6 +55,7 @@ struct bdi_writeback { ...@@ -55,6 +55,7 @@ struct bdi_writeback {
struct list_head b_dirty; /* dirty inodes */ struct list_head b_dirty; /* dirty inodes */
struct list_head b_io; /* parked for writeback */ struct list_head b_io; /* parked for writeback */
struct list_head b_more_io; /* parked for more writeback */ struct list_head b_more_io; /* parked for more writeback */
struct list_head b_dirty_time; /* time stamps are dirty */
spinlock_t list_lock; /* protects the b_* lists */ spinlock_t list_lock; /* protects the b_* lists */
}; };
......
...@@ -1790,8 +1790,12 @@ struct super_operations { ...@@ -1790,8 +1790,12 @@ struct super_operations {
#define __I_DIO_WAKEUP 9 #define __I_DIO_WAKEUP 9
#define I_DIO_WAKEUP (1 << I_DIO_WAKEUP) #define I_DIO_WAKEUP (1 << I_DIO_WAKEUP)
#define I_LINKABLE (1 << 10) #define I_LINKABLE (1 << 10)
#define I_DIRTY_TIME (1 << 11)
#define __I_DIRTY_TIME_EXPIRED 12
#define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED)
#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
extern void __mark_inode_dirty(struct inode *, int); extern void __mark_inode_dirty(struct inode *, int);
static inline void mark_inode_dirty(struct inode *inode) static inline void mark_inode_dirty(struct inode *inode)
...@@ -1954,6 +1958,7 @@ extern int current_umask(void); ...@@ -1954,6 +1958,7 @@ extern int current_umask(void);
extern void ihold(struct inode * inode); extern void ihold(struct inode * inode);
extern void iput(struct inode *); extern void iput(struct inode *);
extern int generic_update_time(struct inode *, struct timespec *, int);
static inline struct inode *file_inode(const struct file *f) static inline struct inode *file_inode(const struct file *f)
{ {
...@@ -2492,6 +2497,11 @@ extern struct inode *ilookup(struct super_block *sb, unsigned long ino); ...@@ -2492,6 +2497,11 @@ extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
extern struct inode * iget_locked(struct super_block *, unsigned long); extern struct inode * iget_locked(struct super_block *, unsigned long);
extern struct inode *find_inode_nowait(struct super_block *,
unsigned long,
int (*match)(struct inode *,
unsigned long, void *),
void *data);
extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
extern int insert_inode_locked(struct inode *); extern int insert_inode_locked(struct inode *);
#ifdef CONFIG_DEBUG_LOCK_ALLOC #ifdef CONFIG_DEBUG_LOCK_ALLOC
......
...@@ -73,6 +73,36 @@ struct extent_status; ...@@ -73,6 +73,36 @@ struct extent_status;
{ FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"})
TRACE_EVENT(ext4_other_inode_update_time,
TP_PROTO(struct inode *inode, ino_t orig_ino),
TP_ARGS(inode, orig_ino),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( ino_t, orig_ino )
__field( uid_t, uid )
__field( gid_t, gid )
__field( __u16, mode )
),
TP_fast_assign(
__entry->orig_ino = orig_ino;
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->uid = i_uid_read(inode);
__entry->gid = i_gid_read(inode);
__entry->mode = inode->i_mode;
),
TP_printk("dev %d,%d orig_ino %lu ino %lu mode 0%o uid %u gid %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->orig_ino,
(unsigned long) __entry->ino, __entry->mode,
__entry->uid, __entry->gid)
);
TRACE_EVENT(ext4_free_inode, TRACE_EVENT(ext4_free_inode,
TP_PROTO(struct inode *inode), TP_PROTO(struct inode *inode),
......
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
{I_FREEING, "I_FREEING"}, \ {I_FREEING, "I_FREEING"}, \
{I_CLEAR, "I_CLEAR"}, \ {I_CLEAR, "I_CLEAR"}, \
{I_SYNC, "I_SYNC"}, \ {I_SYNC, "I_SYNC"}, \
{I_DIRTY_TIME, "I_DIRTY_TIME"}, \
{I_DIRTY_TIME_EXPIRED, "I_DIRTY_TIME_EXPIRED"}, \
{I_REFERENCED, "I_REFERENCED"} \ {I_REFERENCED, "I_REFERENCED"} \
) )
...@@ -68,6 +70,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, ...@@ -68,6 +70,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
TP_STRUCT__entry ( TP_STRUCT__entry (
__array(char, name, 32) __array(char, name, 32)
__field(unsigned long, ino) __field(unsigned long, ino)
__field(unsigned long, state)
__field(unsigned long, flags) __field(unsigned long, flags)
), ),
...@@ -78,16 +81,25 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, ...@@ -78,16 +81,25 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
strncpy(__entry->name, strncpy(__entry->name,
bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
__entry->ino = inode->i_ino; __entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->flags = flags; __entry->flags = flags;
), ),
TP_printk("bdi %s: ino=%lu flags=%s", TP_printk("bdi %s: ino=%lu state=%s flags=%s",
__entry->name, __entry->name,
__entry->ino, __entry->ino,
show_inode_state(__entry->state),
show_inode_state(__entry->flags) show_inode_state(__entry->flags)
) )
); );
DEFINE_EVENT(writeback_dirty_inode_template, writeback_mark_inode_dirty,
TP_PROTO(struct inode *inode, int flags),
TP_ARGS(inode, flags)
);
DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start, DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start,
TP_PROTO(struct inode *inode, int flags), TP_PROTO(struct inode *inode, int flags),
...@@ -596,6 +608,52 @@ DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode, ...@@ -596,6 +608,52 @@ DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode,
TP_ARGS(inode, wbc, nr_to_write) TP_ARGS(inode, wbc, nr_to_write)
); );
DECLARE_EVENT_CLASS(writeback_lazytime_template,
TP_PROTO(struct inode *inode),
TP_ARGS(inode),
TP_STRUCT__entry(
__field( dev_t, dev )
__field(unsigned long, ino )
__field(unsigned long, state )
__field( __u16, mode )
__field(unsigned long, dirtied_when )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->state = inode->i_state;
__entry->mode = inode->i_mode;
__entry->dirtied_when = inode->dirtied_when;
),
TP_printk("dev %d,%d ino %lu dirtied %lu state %s mode 0%o",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino, __entry->dirtied_when,
show_inode_state(__entry->state), __entry->mode)
);
DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime,
TP_PROTO(struct inode *inode),
TP_ARGS(inode)
);
DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime_iput,
TP_PROTO(struct inode *inode),
TP_ARGS(inode)
);
DEFINE_EVENT(writeback_lazytime_template, writeback_dirty_inode_enqueue,
TP_PROTO(struct inode *inode),
TP_ARGS(inode)
);
#endif /* _TRACE_WRITEBACK_H */ #endif /* _TRACE_WRITEBACK_H */
/* This part must be outside protection */ /* This part must be outside protection */
......
...@@ -90,6 +90,7 @@ struct inodes_stat_t { ...@@ -90,6 +90,7 @@ struct inodes_stat_t {
#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */
#define MS_I_VERSION (1<<23) /* Update inode I_version field */ #define MS_I_VERSION (1<<23) /* Update inode I_version field */
#define MS_STRICTATIME (1<<24) /* Always perform atime updates */ #define MS_STRICTATIME (1<<24) /* Always perform atime updates */
#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */
/* These sb flags are internal to the kernel */ /* These sb flags are internal to the kernel */
#define MS_NOSEC (1<<28) #define MS_NOSEC (1<<28)
...@@ -100,7 +101,8 @@ struct inodes_stat_t { ...@@ -100,7 +101,8 @@ struct inodes_stat_t {
/* /*
* Superblock flags that can be altered by MS_REMOUNT * Superblock flags that can be altered by MS_REMOUNT
*/ */
#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION) #define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
MS_LAZYTIME)
/* /*
* Old magic mount flag and mask * Old magic mount flag and mask
......
...@@ -49,10 +49,10 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) ...@@ -49,10 +49,10 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
unsigned long background_thresh; unsigned long background_thresh;
unsigned long dirty_thresh; unsigned long dirty_thresh;
unsigned long bdi_thresh; unsigned long bdi_thresh;
unsigned long nr_dirty, nr_io, nr_more_io; unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
struct inode *inode; struct inode *inode;
nr_dirty = nr_io = nr_more_io = 0; nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
spin_lock(&wb->list_lock); spin_lock(&wb->list_lock);
list_for_each_entry(inode, &wb->b_dirty, i_wb_list) list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
nr_dirty++; nr_dirty++;
...@@ -60,6 +60,9 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) ...@@ -60,6 +60,9 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
nr_io++; nr_io++;
list_for_each_entry(inode, &wb->b_more_io, i_wb_list) list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
nr_more_io++; nr_more_io++;
list_for_each_entry(inode, &wb->b_dirty_time, i_wb_list)
if (inode->i_state & I_DIRTY_TIME)
nr_dirty_time++;
spin_unlock(&wb->list_lock); spin_unlock(&wb->list_lock);
global_dirty_limits(&background_thresh, &dirty_thresh); global_dirty_limits(&background_thresh, &dirty_thresh);
...@@ -78,6 +81,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) ...@@ -78,6 +81,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
"b_dirty: %10lu\n" "b_dirty: %10lu\n"
"b_io: %10lu\n" "b_io: %10lu\n"
"b_more_io: %10lu\n" "b_more_io: %10lu\n"
"b_dirty_time: %10lu\n"
"bdi_list: %10u\n" "bdi_list: %10u\n"
"state: %10lx\n", "state: %10lx\n",
(unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
...@@ -91,6 +95,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) ...@@ -91,6 +95,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
nr_dirty, nr_dirty,
nr_io, nr_io,
nr_more_io, nr_more_io,
nr_dirty_time,
!list_empty(&bdi->bdi_list), bdi->state); !list_empty(&bdi->bdi_list), bdi->state);
#undef K #undef K
...@@ -380,6 +385,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) ...@@ -380,6 +385,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
INIT_LIST_HEAD(&wb->b_dirty); INIT_LIST_HEAD(&wb->b_dirty);
INIT_LIST_HEAD(&wb->b_io); INIT_LIST_HEAD(&wb->b_io);
INIT_LIST_HEAD(&wb->b_more_io); INIT_LIST_HEAD(&wb->b_more_io);
INIT_LIST_HEAD(&wb->b_dirty_time);
spin_lock_init(&wb->list_lock); spin_lock_init(&wb->list_lock);
INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn); INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment