Commit 24bbcf04 authored by Yan, Zheng's avatar Yan, Zheng Committed by Chris Mason

Btrfs: Add delayed iput

iput() can trigger new transactions if we are dropping the
final reference, so calling it in btrfs_commit_transaction
may end up deadlock. This patch adds delayed iput to avoid
the issue.
Signed-off-by: default avatarYan Zheng <zheng.yan@oracle.com>
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent f34f57a3
...@@ -872,6 +872,9 @@ struct btrfs_fs_info { ...@@ -872,6 +872,9 @@ struct btrfs_fs_info {
struct list_head dead_roots; struct list_head dead_roots;
struct list_head caching_block_groups; struct list_head caching_block_groups;
spinlock_t delayed_iput_lock;
struct list_head delayed_iputs;
atomic_t nr_async_submits; atomic_t nr_async_submits;
atomic_t async_submit_draining; atomic_t async_submit_draining;
atomic_t nr_async_bios; atomic_t nr_async_bios;
...@@ -2301,7 +2304,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, ...@@ -2301,7 +2304,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct inode *inode, u64 new_size, struct inode *inode, u64 new_size,
u32 min_type); u32 min_type);
int btrfs_start_delalloc_inodes(struct btrfs_root *root); int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
int btrfs_writepages(struct address_space *mapping, int btrfs_writepages(struct address_space *mapping,
struct writeback_control *wbc); struct writeback_control *wbc);
...@@ -2341,6 +2344,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); ...@@ -2341,6 +2344,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
void btrfs_orphan_cleanup(struct btrfs_root *root); void btrfs_orphan_cleanup(struct btrfs_root *root);
int btrfs_cont_expand(struct inode *inode, loff_t size); int btrfs_cont_expand(struct inode *inode, loff_t size);
int btrfs_invalidate_inodes(struct btrfs_root *root); int btrfs_invalidate_inodes(struct btrfs_root *root);
void btrfs_add_delayed_iput(struct inode *inode);
void btrfs_run_delayed_iputs(struct btrfs_root *root);
extern const struct dentry_operations btrfs_dentry_operations; extern const struct dentry_operations btrfs_dentry_operations;
/* ioctl.c */ /* ioctl.c */
......
...@@ -1476,6 +1476,7 @@ static int cleaner_kthread(void *arg) ...@@ -1476,6 +1476,7 @@ static int cleaner_kthread(void *arg)
if (!(root->fs_info->sb->s_flags & MS_RDONLY) && if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
mutex_trylock(&root->fs_info->cleaner_mutex)) { mutex_trylock(&root->fs_info->cleaner_mutex)) {
btrfs_run_delayed_iputs(root);
btrfs_clean_old_snapshots(root); btrfs_clean_old_snapshots(root);
mutex_unlock(&root->fs_info->cleaner_mutex); mutex_unlock(&root->fs_info->cleaner_mutex);
} }
...@@ -1605,6 +1606,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, ...@@ -1605,6 +1606,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->dead_roots);
INIT_LIST_HEAD(&fs_info->delayed_iputs);
INIT_LIST_HEAD(&fs_info->hashers); INIT_LIST_HEAD(&fs_info->hashers);
INIT_LIST_HEAD(&fs_info->delalloc_inodes); INIT_LIST_HEAD(&fs_info->delalloc_inodes);
INIT_LIST_HEAD(&fs_info->ordered_operations); INIT_LIST_HEAD(&fs_info->ordered_operations);
...@@ -1613,6 +1615,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, ...@@ -1613,6 +1615,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->new_trans_lock); spin_lock_init(&fs_info->new_trans_lock);
spin_lock_init(&fs_info->ref_cache_lock); spin_lock_init(&fs_info->ref_cache_lock);
spin_lock_init(&fs_info->fs_roots_radix_lock); spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
init_completion(&fs_info->kobj_unregister); init_completion(&fs_info->kobj_unregister);
fs_info->tree_root = tree_root; fs_info->tree_root = tree_root;
...@@ -2386,6 +2389,7 @@ int btrfs_commit_super(struct btrfs_root *root) ...@@ -2386,6 +2389,7 @@ int btrfs_commit_super(struct btrfs_root *root)
int ret; int ret;
mutex_lock(&root->fs_info->cleaner_mutex); mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(root);
btrfs_clean_old_snapshots(root); btrfs_clean_old_snapshots(root);
mutex_unlock(&root->fs_info->cleaner_mutex); mutex_unlock(&root->fs_info->cleaner_mutex);
......
...@@ -2880,9 +2880,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work) ...@@ -2880,9 +2880,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work)
root = async->root; root = async->root;
info = async->info; info = async->info;
btrfs_start_delalloc_inodes(root); btrfs_start_delalloc_inodes(root, 0);
wake_up(&info->flush_wait); wake_up(&info->flush_wait);
btrfs_wait_ordered_extents(root, 0); btrfs_wait_ordered_extents(root, 0, 0);
spin_lock(&info->lock); spin_lock(&info->lock);
info->flushing = 0; info->flushing = 0;
...@@ -2956,8 +2956,8 @@ static void flush_delalloc(struct btrfs_root *root, ...@@ -2956,8 +2956,8 @@ static void flush_delalloc(struct btrfs_root *root,
return; return;
flush: flush:
btrfs_start_delalloc_inodes(root); btrfs_start_delalloc_inodes(root, 0);
btrfs_wait_ordered_extents(root, 0); btrfs_wait_ordered_extents(root, 0, 0);
spin_lock(&info->lock); spin_lock(&info->lock);
info->flushing = 0; info->flushing = 0;
......
...@@ -2022,6 +2022,54 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, ...@@ -2022,6 +2022,54 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
return -EIO; return -EIO;
} }
struct delayed_iput {
struct list_head list;
struct inode *inode;
};
void btrfs_add_delayed_iput(struct inode *inode)
{
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct delayed_iput *delayed;
if (atomic_add_unless(&inode->i_count, -1, 1))
return;
delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
delayed->inode = inode;
spin_lock(&fs_info->delayed_iput_lock);
list_add_tail(&delayed->list, &fs_info->delayed_iputs);
spin_unlock(&fs_info->delayed_iput_lock);
}
void btrfs_run_delayed_iputs(struct btrfs_root *root)
{
LIST_HEAD(list);
struct btrfs_fs_info *fs_info = root->fs_info;
struct delayed_iput *delayed;
int empty;
spin_lock(&fs_info->delayed_iput_lock);
empty = list_empty(&fs_info->delayed_iputs);
spin_unlock(&fs_info->delayed_iput_lock);
if (empty)
return;
down_read(&root->fs_info->cleanup_work_sem);
spin_lock(&fs_info->delayed_iput_lock);
list_splice_init(&fs_info->delayed_iputs, &list);
spin_unlock(&fs_info->delayed_iput_lock);
while (!list_empty(&list)) {
delayed = list_entry(list.next, struct delayed_iput, list);
list_del(&delayed->list);
iput(delayed->inode);
kfree(delayed);
}
up_read(&root->fs_info->cleanup_work_sem);
}
/* /*
* This creates an orphan entry for the given inode in case something goes * This creates an orphan entry for the given inode in case something goes
* wrong in the middle of an unlink/truncate. * wrong in the middle of an unlink/truncate.
...@@ -5568,7 +5616,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, ...@@ -5568,7 +5616,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
* some fairly slow code that needs optimization. This walks the list * some fairly slow code that needs optimization. This walks the list
* of all the inodes with pending delalloc and forces them to disk. * of all the inodes with pending delalloc and forces them to disk.
*/ */
int btrfs_start_delalloc_inodes(struct btrfs_root *root) int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
{ {
struct list_head *head = &root->fs_info->delalloc_inodes; struct list_head *head = &root->fs_info->delalloc_inodes;
struct btrfs_inode *binode; struct btrfs_inode *binode;
...@@ -5587,7 +5635,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root) ...@@ -5587,7 +5635,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
spin_unlock(&root->fs_info->delalloc_lock); spin_unlock(&root->fs_info->delalloc_lock);
if (inode) { if (inode) {
filemap_flush(inode->i_mapping); filemap_flush(inode->i_mapping);
iput(inode); if (delay_iput)
btrfs_add_delayed_iput(inode);
else
iput(inode);
} }
cond_resched(); cond_resched();
spin_lock(&root->fs_info->delalloc_lock); spin_lock(&root->fs_info->delalloc_lock);
......
...@@ -352,7 +352,8 @@ int btrfs_remove_ordered_extent(struct inode *inode, ...@@ -352,7 +352,8 @@ int btrfs_remove_ordered_extent(struct inode *inode,
* wait for all the ordered extents in a root. This is done when balancing * wait for all the ordered extents in a root. This is done when balancing
* space between drives. * space between drives.
*/ */
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) int btrfs_wait_ordered_extents(struct btrfs_root *root,
int nocow_only, int delay_iput)
{ {
struct list_head splice; struct list_head splice;
struct list_head *cur; struct list_head *cur;
...@@ -389,7 +390,10 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) ...@@ -389,7 +390,10 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
if (inode) { if (inode) {
btrfs_start_ordered_extent(inode, ordered, 1); btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered);
iput(inode); if (delay_iput)
btrfs_add_delayed_iput(inode);
else
iput(inode);
} else { } else {
btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered);
} }
...@@ -447,7 +451,7 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) ...@@ -447,7 +451,7 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
btrfs_wait_ordered_range(inode, 0, (u64)-1); btrfs_wait_ordered_range(inode, 0, (u64)-1);
else else
filemap_flush(inode->i_mapping); filemap_flush(inode->i_mapping);
iput(inode); btrfs_add_delayed_iput(inode);
} }
cond_resched(); cond_resched();
......
...@@ -153,9 +153,10 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); ...@@ -153,9 +153,10 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered); struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
struct inode *inode); struct inode *inode);
int btrfs_wait_ordered_extents(struct btrfs_root *root,
int nocow_only, int delay_iput);
#endif #endif
...@@ -3541,8 +3541,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) ...@@ -3541,8 +3541,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
(unsigned long long)rc->block_group->key.objectid, (unsigned long long)rc->block_group->key.objectid,
(unsigned long long)rc->block_group->flags); (unsigned long long)rc->block_group->flags);
btrfs_start_delalloc_inodes(fs_info->tree_root); btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
btrfs_wait_ordered_extents(fs_info->tree_root, 0); btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
while (1) { while (1) {
rc->extents_found = 0; rc->extents_found = 0;
......
...@@ -405,8 +405,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) ...@@ -405,8 +405,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return 0; return 0;
} }
btrfs_start_delalloc_inodes(root); btrfs_start_delalloc_inodes(root, 0);
btrfs_wait_ordered_extents(root, 0); btrfs_wait_ordered_extents(root, 0, 0);
trans = btrfs_start_transaction(root, 1); trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root); ret = btrfs_commit_transaction(trans, root);
......
...@@ -333,6 +333,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, ...@@ -333,6 +333,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
memset(trans, 0, sizeof(*trans)); memset(trans, 0, sizeof(*trans));
kmem_cache_free(btrfs_trans_handle_cachep, trans); kmem_cache_free(btrfs_trans_handle_cachep, trans);
if (throttle)
btrfs_run_delayed_iputs(root);
return 0; return 0;
} }
...@@ -991,11 +994,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -991,11 +994,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->trans_mutex);
if (flush_on_commit) { if (flush_on_commit) {
btrfs_start_delalloc_inodes(root); btrfs_start_delalloc_inodes(root, 1);
ret = btrfs_wait_ordered_extents(root, 0); ret = btrfs_wait_ordered_extents(root, 0, 1);
BUG_ON(ret); BUG_ON(ret);
} else if (snap_pending) { } else if (snap_pending) {
ret = btrfs_wait_ordered_extents(root, 1); ret = btrfs_wait_ordered_extents(root, 0, 1);
BUG_ON(ret); BUG_ON(ret);
} }
...@@ -1113,6 +1116,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -1113,6 +1116,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
current->journal_info = NULL; current->journal_info = NULL;
kmem_cache_free(btrfs_trans_handle_cachep, trans); kmem_cache_free(btrfs_trans_handle_cachep, trans);
if (current != root->fs_info->transaction_kthread)
btrfs_run_delayed_iputs(root);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment