Commit e85c81ba authored by Xin Yin's avatar Xin Yin Committed by Theodore Ts'o

ext4: fast commit may not fallback for ineligible commit

For the follow scenario:
1. jbd start commit transaction n
2. task A get new handle for transaction n+1
3. task A do some ineligible actions and mark FC_INELIGIBLE
4. jbd complete transaction n and clean FC_INELIGIBLE
5. task A call fsync

In this case fast commit will not fallback to full commit and
transaction n+1 also not handled by jbd.

Make ext4_fc_mark_ineligible() also record transaction tid for
latest ineligible case, when call ext4_fc_cleanup() check
current transaction tid, if small than latest ineligible tid
do not clear the EXT4_MF_FC_INELIGIBLE.
Reported-by: default avatarkernel test robot <lkp@intel.com>
Reported-by: default avatarDan Carpenter <dan.carpenter@oracle.com>
Reported-by: default avatarRitesh Harjani <riteshh@linux.ibm.com>
Suggested-by: default avatarHarshad Shirwadkar <harshadshirwadkar@gmail.com>
Signed-off-by: default avatarXin Yin <yinxin.x@bytedance.com>
Link: https://lore.kernel.org/r/20220117093655.35160-2-yinxin.x@bytedance.comSigned-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
parent 31a074a0
...@@ -1749,6 +1749,7 @@ struct ext4_sb_info { ...@@ -1749,6 +1749,7 @@ struct ext4_sb_info {
spinlock_t s_fc_lock; spinlock_t s_fc_lock;
struct buffer_head *s_fc_bh; struct buffer_head *s_fc_bh;
struct ext4_fc_stats s_fc_stats; struct ext4_fc_stats s_fc_stats;
tid_t s_fc_ineligible_tid;
#ifdef CONFIG_EXT4_DEBUG #ifdef CONFIG_EXT4_DEBUG
int s_fc_debug_max_replay; int s_fc_debug_max_replay;
#endif #endif
...@@ -2925,7 +2926,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode, ...@@ -2925,7 +2926,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
struct dentry *dentry); struct dentry *dentry);
void ext4_fc_track_create(handle_t *handle, struct dentry *dentry); void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
void ext4_fc_track_inode(handle_t *handle, struct inode *inode); void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
void ext4_fc_mark_ineligible(struct super_block *sb, int reason); void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle);
void ext4_fc_start_update(struct inode *inode); void ext4_fc_start_update(struct inode *inode);
void ext4_fc_stop_update(struct inode *inode); void ext4_fc_stop_update(struct inode *inode);
void ext4_fc_del(struct inode *inode); void ext4_fc_del(struct inode *inode);
......
...@@ -5336,7 +5336,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -5336,7 +5336,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
ret = PTR_ERR(handle); ret = PTR_ERR(handle);
goto out_mmap; goto out_mmap;
} }
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
down_write(&EXT4_I(inode)->i_data_sem); down_write(&EXT4_I(inode)->i_data_sem);
ext4_discard_preallocations(inode, 0); ext4_discard_preallocations(inode, 0);
...@@ -5476,7 +5476,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -5476,7 +5476,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
ret = PTR_ERR(handle); ret = PTR_ERR(handle);
goto out_mmap; goto out_mmap;
} }
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
/* Expand file to avoid data loss if there is error while shifting */ /* Expand file to avoid data loss if there is error while shifting */
inode->i_size += len; inode->i_size += len;
......
...@@ -300,18 +300,32 @@ void ext4_fc_del(struct inode *inode) ...@@ -300,18 +300,32 @@ void ext4_fc_del(struct inode *inode)
} }
/* /*
* Mark file system as fast commit ineligible. This means that next commit * Mark file system as fast commit ineligible, and record latest
* operation would result in a full jbd2 commit. * ineligible transaction tid. This means until the recorded
* transaction, commit operation would result in a full jbd2 commit.
*/ */
void ext4_fc_mark_ineligible(struct super_block *sb, int reason) void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
{ {
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
tid_t tid;
if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
return; return;
ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
if (handle && !IS_ERR(handle))
tid = handle->h_transaction->t_tid;
else {
read_lock(&sbi->s_journal->j_state_lock);
tid = sbi->s_journal->j_running_transaction ?
sbi->s_journal->j_running_transaction->t_tid : 0;
read_unlock(&sbi->s_journal->j_state_lock);
}
spin_lock(&sbi->s_fc_lock);
if (sbi->s_fc_ineligible_tid < tid)
sbi->s_fc_ineligible_tid = tid;
spin_unlock(&sbi->s_fc_lock);
WARN_ON(reason >= EXT4_FC_REASON_MAX); WARN_ON(reason >= EXT4_FC_REASON_MAX);
sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
} }
...@@ -387,7 +401,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) ...@@ -387,7 +401,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
mutex_unlock(&ei->i_fc_lock); mutex_unlock(&ei->i_fc_lock);
node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
if (!node) { if (!node) {
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
mutex_lock(&ei->i_fc_lock); mutex_lock(&ei->i_fc_lock);
return -ENOMEM; return -ENOMEM;
} }
...@@ -400,7 +414,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) ...@@ -400,7 +414,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
if (!node->fcd_name.name) { if (!node->fcd_name.name) {
kmem_cache_free(ext4_fc_dentry_cachep, node); kmem_cache_free(ext4_fc_dentry_cachep, node);
ext4_fc_mark_ineligible(inode->i_sb, ext4_fc_mark_ineligible(inode->i_sb,
EXT4_FC_REASON_NOMEM); EXT4_FC_REASON_NOMEM, NULL);
mutex_lock(&ei->i_fc_lock); mutex_lock(&ei->i_fc_lock);
return -ENOMEM; return -ENOMEM;
} }
...@@ -502,7 +516,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode) ...@@ -502,7 +516,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
if (ext4_should_journal_data(inode)) { if (ext4_should_journal_data(inode)) {
ext4_fc_mark_ineligible(inode->i_sb, ext4_fc_mark_ineligible(inode->i_sb,
EXT4_FC_REASON_INODE_JOURNAL_DATA); EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
return; return;
} }
...@@ -1179,7 +1193,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid) ...@@ -1179,7 +1193,7 @@ int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
* Fast commit cleanup routine. This is called after every fast commit and * Fast commit cleanup routine. This is called after every fast commit and
* full commit. full is true if we are called after a full commit. * full commit. full is true if we are called after a full commit.
*/ */
static void ext4_fc_cleanup(journal_t *journal, int full) static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
{ {
struct super_block *sb = journal->j_private; struct super_block *sb = journal->j_private;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
...@@ -1227,7 +1241,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full) ...@@ -1227,7 +1241,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
&sbi->s_fc_q[FC_Q_MAIN]); &sbi->s_fc_q[FC_Q_MAIN]);
ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
if (tid >= sbi->s_fc_ineligible_tid) {
sbi->s_fc_ineligible_tid = 0;
ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
}
if (full) if (full)
sbi->s_fc_bytes = 0; sbi->s_fc_bytes = 0;
......
...@@ -337,7 +337,7 @@ void ext4_evict_inode(struct inode *inode) ...@@ -337,7 +337,7 @@ void ext4_evict_inode(struct inode *inode)
return; return;
no_delete: no_delete:
if (!list_empty(&EXT4_I(inode)->i_fc_list)) if (!list_empty(&EXT4_I(inode)->i_fc_list))
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
} }
...@@ -5976,7 +5976,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ...@@ -5976,7 +5976,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
return PTR_ERR(handle); return PTR_ERR(handle);
ext4_fc_mark_ineligible(inode->i_sb, ext4_fc_mark_ineligible(inode->i_sb,
EXT4_FC_REASON_JOURNAL_FLAG_CHANGE); EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle);
err = ext4_mark_inode_dirty(handle, inode); err = ext4_mark_inode_dirty(handle, inode);
ext4_handle_sync(handle); ext4_handle_sync(handle);
ext4_journal_stop(handle); ext4_journal_stop(handle);
......
...@@ -411,7 +411,7 @@ static long swap_inode_boot_loader(struct super_block *sb, ...@@ -411,7 +411,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
err = -EINVAL; err = -EINVAL;
goto err_out; goto err_out;
} }
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT); ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle);
/* Protect extent tree against block allocations via delalloc */ /* Protect extent tree against block allocations via delalloc */
ext4_double_down_write_data_sem(inode, inode_bl); ext4_double_down_write_data_sem(inode, inode_bl);
...@@ -1373,7 +1373,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) ...@@ -1373,7 +1373,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
err = ext4_resize_fs(sb, n_blocks_count); err = ext4_resize_fs(sb, n_blocks_count);
if (EXT4_SB(sb)->s_journal) { if (EXT4_SB(sb)->s_journal) {
ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE); ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL);
jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
......
...@@ -3889,7 +3889,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, ...@@ -3889,7 +3889,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
* dirents in directories. * dirents in directories.
*/ */
ext4_fc_mark_ineligible(old.inode->i_sb, ext4_fc_mark_ineligible(old.inode->i_sb,
EXT4_FC_REASON_RENAME_DIR); EXT4_FC_REASON_RENAME_DIR, handle);
} else { } else {
if (new.inode) if (new.inode)
ext4_fc_track_unlink(handle, new.dentry); ext4_fc_track_unlink(handle, new.dentry);
...@@ -4049,7 +4049,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -4049,7 +4049,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
if (unlikely(retval)) if (unlikely(retval))
goto end_rename; goto end_rename;
ext4_fc_mark_ineligible(new.inode->i_sb, ext4_fc_mark_ineligible(new.inode->i_sb,
EXT4_FC_REASON_CROSS_RENAME); EXT4_FC_REASON_CROSS_RENAME, handle);
if (old.dir_bh) { if (old.dir_bh) {
retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
if (retval) if (retval)
......
...@@ -5084,6 +5084,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) ...@@ -5084,6 +5084,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
sbi->s_fc_bytes = 0; sbi->s_fc_bytes = 0;
ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
sbi->s_fc_ineligible_tid = 0;
spin_lock_init(&sbi->s_fc_lock); spin_lock_init(&sbi->s_fc_lock);
memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats)); memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
sbi->s_fc_replay_state.fc_regions = NULL; sbi->s_fc_replay_state.fc_regions = NULL;
......
...@@ -2408,7 +2408,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, ...@@ -2408,7 +2408,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
if (IS_SYNC(inode)) if (IS_SYNC(inode))
ext4_handle_sync(handle); ext4_handle_sync(handle);
} }
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
cleanup: cleanup:
brelse(is.iloc.bh); brelse(is.iloc.bh);
...@@ -2486,7 +2486,7 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name, ...@@ -2486,7 +2486,7 @@ ext4_xattr_set(struct inode *inode, int name_index, const char *name,
if (error == 0) if (error == 0)
error = error2; error = error2;
} }
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, NULL);
return error; return error;
} }
...@@ -2920,7 +2920,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, ...@@ -2920,7 +2920,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
error); error);
goto cleanup; goto cleanup;
} }
ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
} }
error = 0; error = 0;
cleanup: cleanup:
......
...@@ -1170,7 +1170,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -1170,7 +1170,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
if (journal->j_commit_callback) if (journal->j_commit_callback)
journal->j_commit_callback(journal, commit_transaction); journal->j_commit_callback(journal, commit_transaction);
if (journal->j_fc_cleanup_callback) if (journal->j_fc_cleanup_callback)
journal->j_fc_cleanup_callback(journal, 1); journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);
trace_jbd2_end_commit(journal, commit_transaction); trace_jbd2_end_commit(journal, commit_transaction);
jbd_debug(1, "JBD2: commit %d complete, head %d\n", jbd_debug(1, "JBD2: commit %d complete, head %d\n",
......
...@@ -771,7 +771,7 @@ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback) ...@@ -771,7 +771,7 @@ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
{ {
jbd2_journal_unlock_updates(journal); jbd2_journal_unlock_updates(journal);
if (journal->j_fc_cleanup_callback) if (journal->j_fc_cleanup_callback)
journal->j_fc_cleanup_callback(journal, 0); journal->j_fc_cleanup_callback(journal, 0, tid);
write_lock(&journal->j_state_lock); write_lock(&journal->j_state_lock);
journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING; journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
if (fallback) if (fallback)
......
...@@ -1295,7 +1295,7 @@ struct journal_s ...@@ -1295,7 +1295,7 @@ struct journal_s
* Clean-up after fast commit or full commit. JBD2 calls this function * Clean-up after fast commit or full commit. JBD2 calls this function
* after every commit operation. * after every commit operation.
*/ */
void (*j_fc_cleanup_callback)(struct journal_s *journal, int); void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid);
/** /**
* @j_fc_replay_callback: * @j_fc_replay_callback:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment