Commit d61c6a58 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'lazytime_for_v5.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs

Pull lazytime updates from Jan Kara:
 "Cleanups of the lazytime handling in the writeback code making rules
  for calling ->dirty_inode() filesystem handlers saner"

* tag 'lazytime_for_v5.12-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
  ext4: simplify i_state checks in __ext4_update_other_inode_time()
  gfs2: don't worry about I_DIRTY_TIME in gfs2_fsync()
  fs: improve comments for writeback_single_inode()
  fs: drop redundant check from __writeback_single_inode()
  fs: clean up __mark_inode_dirty() a bit
  fs: pass only I_DIRTY_INODE flags to ->dirty_inode
  fs: don't call ->dirty_inode for lazytime timestamp updates
  fat: only specify I_DIRTY_TIME when needed in fat_update_time()
  fs: only specify I_DIRTY_TIME when needed in generic_update_time()
  fs: correctly document the inode dirty flags
parents c63dca9e ed296c6c
......@@ -270,7 +270,10 @@ or bottom half).
->alloc_inode.
``dirty_inode``
this method is called by the VFS to mark an inode dirty.
this method is called by the VFS when an inode is marked dirty.
This is specifically for the inode itself being marked dirty,
not its data. If the update needs to be persisted by fdatasync(),
then I_DIRTY_DATASYNC will be set in the flags argument.
``write_inode``
this method is called when the VFS needs to write an inode to
......
......@@ -4961,15 +4961,11 @@ static void __ext4_update_other_inode_time(struct super_block *sb,
if (!inode)
return;
if ((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
I_DIRTY_INODE)) ||
((inode->i_state & I_DIRTY_TIME) == 0))
if (!inode_is_dirtytime_only(inode))
return;
spin_lock(&inode->i_lock);
if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
I_DIRTY_INODE)) == 0) &&
(inode->i_state & I_DIRTY_TIME)) {
if (inode_is_dirtytime_only(inode)) {
struct ext4_inode_info *ei = EXT4_I(inode);
inode->i_state &= ~I_DIRTY_TIME;
......@@ -5937,26 +5933,16 @@ int __ext4_mark_inode_dirty(handle_t *handle, struct inode *inode,
* If the inode is marked synchronous, we don't honour that here - doing
* so would cause a commit on atime updates, which we don't bother doing.
* We handle synchronous inodes at the highest possible level.
*
* If only the I_DIRTY_TIME flag is set, we can skip everything. If
* I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need
* to copy into the on-disk inode structure are the timestamp files.
*/
void ext4_dirty_inode(struct inode *inode, int flags)
{
handle_t *handle;
if (flags == I_DIRTY_TIME)
return;
handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
if (IS_ERR(handle))
goto out;
return;
ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
out:
return;
}
int ext4_change_inode_journal_flag(struct inode *inode, int val)
......
......@@ -1300,9 +1300,6 @@ static void f2fs_dirty_inode(struct inode *inode, int flags)
inode->i_ino == F2FS_META_INO(sbi))
return;
if (flags == I_DIRTY_TIME)
return;
if (is_inode_flag_set(inode, FI_AUTO_RECOVER))
clear_inode_flag(inode, FI_AUTO_RECOVER);
......
......@@ -329,22 +329,23 @@ EXPORT_SYMBOL_GPL(fat_truncate_time);
int fat_update_time(struct inode *inode, struct timespec64 *now, int flags)
{
int iflags = I_DIRTY_TIME;
bool dirty = false;
int dirty_flags = 0;
if (inode->i_ino == MSDOS_ROOT_INO)
return 0;
fat_truncate_time(inode, now, flags);
if (flags & S_VERSION)
dirty = inode_maybe_inc_iversion(inode, false);
if ((flags & (S_ATIME | S_CTIME | S_MTIME)) &&
!(inode->i_sb->s_flags & SB_LAZYTIME))
dirty = true;
if (flags & (S_ATIME | S_CTIME | S_MTIME)) {
fat_truncate_time(inode, now, flags);
if (inode->i_sb->s_flags & SB_LAZYTIME)
dirty_flags |= I_DIRTY_TIME;
else
dirty_flags |= I_DIRTY_SYNC;
}
if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false))
dirty_flags |= I_DIRTY_SYNC;
if (dirty)
iflags |= I_DIRTY_SYNC;
__mark_inode_dirty(inode, iflags);
__mark_inode_dirty(inode, dirty_flags);
return 0;
}
EXPORT_SYMBOL_GPL(fat_update_time);
......
......@@ -1442,9 +1442,15 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
}
/*
* Write out an inode and its dirty pages. Do not update the writeback list
* linkage. That is left to the caller. The caller is also responsible for
* setting I_SYNC flag and calling inode_sync_complete() to clear it.
* Write out an inode and its dirty pages (or some of its dirty pages, depending
* on @wbc->nr_to_write), and clear the relevant dirty flags from i_state.
*
* This doesn't remove the inode from the writeback list it is on, except
* potentially to move it from b_dirty_time to b_dirty due to timestamp
* expiration. The caller is otherwise responsible for writeback list handling.
*
* The caller is also responsible for setting the I_SYNC flag beforehand and
* calling inode_sync_complete() to clear it afterwards.
*/
static int
__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
......@@ -1479,7 +1485,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* change I_DIRTY_TIME into I_DIRTY_SYNC.
*/
if ((inode->i_state & I_DIRTY_TIME) &&
(wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync ||
(wbc->sync_mode == WB_SYNC_ALL ||
time_after(jiffies, inode->dirtied_time_when +
dirtytime_expire_interval * HZ))) {
trace_writeback_lazytime(inode);
......@@ -1487,9 +1493,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
}
/*
* Some filesystems may redirty the inode during the writeback
* due to delalloc, clear dirty metadata flags right before
* write_inode()
* Get and clear the dirty flags from i_state. This needs to be done
* after calling writepages because some filesystems may redirty the
* inode during writepages due to delalloc. It also needs to be done
* after handling timestamp expiration, as that may dirty the inode too.
*/
spin_lock(&inode->i_lock);
dirty = inode->i_state & I_DIRTY;
......@@ -1524,12 +1531,13 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
}
/*
* Write out an inode's dirty pages. Either the caller has an active reference
* on the inode or the inode has I_WILL_FREE set.
* Write out an inode's dirty data and metadata on-demand, i.e. separately from
* the regular batched writeback done by the flusher threads in
* writeback_sb_inodes(). @wbc controls various aspects of the write, such as
* whether it is a data-integrity sync (%WB_SYNC_ALL) or not (%WB_SYNC_NONE).
*
* This function is designed to be called for writing back one inode which
* we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
* and does more profound writeback list handling in writeback_sb_inodes().
* To prevent the inode from going away, either the caller must have a reference
* to the inode, or the inode must have I_WILL_FREE or I_FREEING set.
*/
static int writeback_single_inode(struct inode *inode,
struct writeback_control *wbc)
......@@ -1544,23 +1552,23 @@ static int writeback_single_inode(struct inode *inode,
WARN_ON(inode->i_state & I_WILL_FREE);
if (inode->i_state & I_SYNC) {
if (wbc->sync_mode != WB_SYNC_ALL)
goto out;
/*
* It's a data-integrity sync. We must wait. Since callers hold
* inode reference or inode has I_WILL_FREE set, it cannot go
* away under us.
* Writeback is already running on the inode. For WB_SYNC_NONE,
* that's enough and we can just return. For WB_SYNC_ALL, we
* must wait for the existing writeback to complete, then do
* writeback again if there's anything left.
*/
if (wbc->sync_mode != WB_SYNC_ALL)
goto out;
__inode_wait_for_writeback(inode);
}
WARN_ON(inode->i_state & I_SYNC);
/*
* Skip inode if it is clean and we have no outstanding writeback in
* WB_SYNC_ALL mode. We don't want to mess with writeback lists in this
* function since flusher thread may be doing for example sync in
* parallel and if we move the inode, it could get skipped. So here we
* make sure inode is on some writeback list and leave it there unless
* we have completely cleaned the inode.
* If the inode is already fully clean, then there's nothing to do.
*
* For data-integrity syncs we also need to check whether any pages are
* still under writeback, e.g. due to prior WB_SYNC_NONE writeback. If
* there are any such pages, we'll need to wait for them.
*/
if (!(inode->i_state & I_DIRTY_ALL) &&
(wbc->sync_mode != WB_SYNC_ALL ||
......@@ -1576,8 +1584,9 @@ static int writeback_single_inode(struct inode *inode,
wb = inode_to_wb_and_lock_list(inode);
spin_lock(&inode->i_lock);
/*
* If inode is clean, remove it from writeback lists. Otherwise don't
* touch it. See comment above for explanation.
* If the inode is now fully clean, then it can be safely removed from
* its writeback list (if any). Otherwise the flusher threads are
* responsible for the writeback lists.
*/
if (!(inode->i_state & I_DIRTY_ALL))
inode_io_list_del_locked(inode, wb);
......@@ -2219,23 +2228,24 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
}
/**
* __mark_inode_dirty - internal function
* __mark_inode_dirty - internal function to mark an inode dirty
*
* @inode: inode to mark
* @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
* @flags: what kind of dirty, e.g. I_DIRTY_SYNC. This can be a combination of
* multiple I_DIRTY_* flags, except that I_DIRTY_TIME can't be combined
* with I_DIRTY_PAGES.
*
* Mark an inode as dirty. Callers should use mark_inode_dirty or
* mark_inode_dirty_sync.
* Mark an inode as dirty. We notify the filesystem, then update the inode's
* dirty flags. Then, if needed we add the inode to the appropriate dirty list.
*
* Put the inode on the super block's dirty list.
* Most callers should use mark_inode_dirty() or mark_inode_dirty_sync()
* instead of calling this directly.
*
* CAREFUL! We mark it dirty unconditionally, but move it onto the
* dirty list only if it is hashed or if it refers to a blockdev.
* If it was not hashed, it will never be added to the dirty list
* even if it is later hashed, as it will have been marked dirty already.
* CAREFUL! We only add the inode to the dirty list if it is hashed or if it
* refers to a blockdev. Unhashed inodes will never be added to the dirty list
* even if they are later hashed, as they will have been marked dirty already.
*
* In short, make sure you hash any inodes _before_ you start marking
* them dirty.
* In short, ensure you hash any inodes _before_ you start marking them dirty.
*
* Note that for blockdevs, inode->dirtied_when represents the dirtying time of
* the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
......@@ -2247,25 +2257,34 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
void __mark_inode_dirty(struct inode *inode, int flags)
{
struct super_block *sb = inode->i_sb;
int dirtytime;
int dirtytime = 0;
trace_writeback_mark_inode_dirty(inode, flags);
/*
* Don't do this for I_DIRTY_PAGES - that doesn't actually
* dirty the inode itself
*/
if (flags & (I_DIRTY_INODE | I_DIRTY_TIME)) {
if (flags & I_DIRTY_INODE) {
/*
* Notify the filesystem about the inode being dirtied, so that
* (if needed) it can update on-disk fields and journal the
* inode. This is only needed when the inode itself is being
* dirtied now. I.e. it's only needed for I_DIRTY_INODE, not
* for just I_DIRTY_PAGES or I_DIRTY_TIME.
*/
trace_writeback_dirty_inode_start(inode, flags);
if (sb->s_op->dirty_inode)
sb->s_op->dirty_inode(inode, flags);
sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE);
trace_writeback_dirty_inode(inode, flags);
}
if (flags & I_DIRTY_INODE)
/* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
flags &= ~I_DIRTY_TIME;
dirtytime = flags & I_DIRTY_TIME;
} else {
/*
* Else it's either I_DIRTY_PAGES, I_DIRTY_TIME, or nothing.
* (We don't support setting both I_DIRTY_PAGES and I_DIRTY_TIME
* in one call to __mark_inode_dirty().)
*/
dirtytime = flags & I_DIRTY_TIME;
WARN_ON_ONCE(dirtytime && flags != I_DIRTY_TIME);
}
/*
* Paired with smp_mb() in __writeback_single_inode() for the
......@@ -2288,6 +2307,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
inode_attach_wb(inode, NULL);
/* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
if (flags & I_DIRTY_INODE)
inode->i_state &= ~I_DIRTY_TIME;
inode->i_state |= flags;
......
......@@ -749,7 +749,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
{
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
int sync_state = inode->i_state & I_DIRTY_ALL;
int sync_state = inode->i_state & I_DIRTY;
struct gfs2_inode *ip = GFS2_I(inode);
int ret = 0, ret1 = 0;
......@@ -762,7 +762,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
if (!gfs2_is_jdata(ip))
sync_state &= ~I_DIRTY_PAGES;
if (datasync)
sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME);
sync_state &= ~I_DIRTY_SYNC;
if (sync_state) {
ret = sync_inode_metadata(inode, 1);
......
......@@ -562,8 +562,6 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
int need_endtrans = 0;
int ret;
if (!(flags & I_DIRTY_INODE))
return;
if (unlikely(gfs2_withdrawn(sdp)))
return;
if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
......
......@@ -1743,24 +1743,26 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
int generic_update_time(struct inode *inode, struct timespec64 *time, int flags)
{
int iflags = I_DIRTY_TIME;
bool dirty = false;
if (flags & S_ATIME)
inode->i_atime = *time;
if (flags & S_VERSION)
dirty = inode_maybe_inc_iversion(inode, false);
if (flags & S_CTIME)
inode->i_ctime = *time;
if (flags & S_MTIME)
inode->i_mtime = *time;
if ((flags & (S_ATIME | S_CTIME | S_MTIME)) &&
!(inode->i_sb->s_flags & SB_LAZYTIME))
dirty = true;
if (dirty)
iflags |= I_DIRTY_SYNC;
__mark_inode_dirty(inode, iflags);
int dirty_flags = 0;
if (flags & (S_ATIME | S_CTIME | S_MTIME)) {
if (flags & S_ATIME)
inode->i_atime = *time;
if (flags & S_CTIME)
inode->i_ctime = *time;
if (flags & S_MTIME)
inode->i_mtime = *time;
if (inode->i_sb->s_flags & SB_LAZYTIME)
dirty_flags |= I_DIRTY_TIME;
else
dirty_flags |= I_DIRTY_SYNC;
}
if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false))
dirty_flags |= I_DIRTY_SYNC;
__mark_inode_dirty(inode, dirty_flags);
return 0;
}
EXPORT_SYMBOL(generic_update_time);
......
......@@ -2084,8 +2084,8 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
/*
* Inode state bits. Protected by inode->i_lock
*
* Three bits determine the dirty state of the inode, I_DIRTY_SYNC,
* I_DIRTY_DATASYNC and I_DIRTY_PAGES.
* Four bits determine the dirty state of the inode: I_DIRTY_SYNC,
* I_DIRTY_DATASYNC, I_DIRTY_PAGES, and I_DIRTY_TIME.
*
* Four bits define the lifetime of an inode. Initially, inodes are I_NEW,
* until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at
......@@ -2094,12 +2094,20 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src,
* Two bits are used for locking and completion notification, I_NEW and I_SYNC.
*
* I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on
* fdatasync(). i_atime is the usual cause.
* I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of
* fdatasync() (unless I_DIRTY_DATASYNC is also set).
* Timestamp updates are the usual cause.
* I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of
* these changes separately from I_DIRTY_SYNC so that we
* don't have to write inode on fdatasync() when only
* mtime has changed in it.
* e.g. the timestamps have changed.
* I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean.
* I_DIRTY_TIME The inode itself only has dirty timestamps, and the
* lazytime mount option is enabled. We keep track of this
* separately from I_DIRTY_SYNC in order to implement
* lazytime. This gets cleared if I_DIRTY_INODE
* (I_DIRTY_SYNC and/or I_DIRTY_DATASYNC) gets set. I.e.
* either I_DIRTY_TIME *or* I_DIRTY_INODE can be set in
* i_state, but not both. I_DIRTY_PAGES may still be set.
* I_NEW Serves as both a mutex and completion notification.
* New inodes set I_NEW. If two processes both create
* the same inode, one of them will release its inode and
......@@ -2186,6 +2194,21 @@ static inline void mark_inode_dirty_sync(struct inode *inode)
__mark_inode_dirty(inode, I_DIRTY_SYNC);
}
/*
* Returns true if the given inode itself only has dirty timestamps (its pages
* may still be dirty) and isn't currently being allocated or freed.
* Filesystems should call this if when writing an inode when lazytime is
* enabled, they want to opportunistically write the timestamps of other inodes
* located very nearby on-disk, e.g. in the same inode block. This returns true
* if the given inode is in need of such an opportunistic update. Requires
* i_lock, or at least later re-checking under i_lock.
*/
static inline bool inode_is_dirtytime_only(struct inode *inode)
{
return (inode->i_state & (I_DIRTY_TIME | I_NEW |
I_FREEING | I_WILL_FREE)) == I_DIRTY_TIME;
}
extern void inc_nlink(struct inode *inode);
extern void drop_nlink(struct inode *inode);
extern void clear_nlink(struct inode *inode);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment