Commit 546888da authored by Chris Mason's avatar Chris Mason

Btrfs: fix btrfs fallocate oops and deadlock

Btrfs fallocate was incorrectly starting a transaction with a lock held
on the extent_io tree for the file, which could deadlock.  Strictly
speaking it was using join_transaction which would be safe, but it is better
to move the transaction outside of the lock.

When preallocated extents are overwritten, btrfs_mark_buffer_dirty was
being called on an unlocked buffer.  This was triggering an assertion and
oops because the lock is supposed to be held.

The bug was calling btrfs_mark_buffer_dirty on a leaf after btrfs_del_item had
been run.  btrfs_del_item takes care of dirtying things, so the solution is a
to skip the btrfs_mark_buffer_dirty call in this case.
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 8c594ea8
...@@ -830,7 +830,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, ...@@ -830,7 +830,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
ret = btrfs_del_items(trans, root, path, del_slot, del_nr); ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
BUG_ON(ret); BUG_ON(ret);
goto done; goto release;
} else if (split == start) { } else if (split == start) {
if (locked_end < extent_end) { if (locked_end < extent_end) {
ret = try_lock_extent(&BTRFS_I(inode)->io_tree, ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
...@@ -926,6 +926,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, ...@@ -926,6 +926,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
} }
done: done:
btrfs_mark_buffer_dirty(leaf); btrfs_mark_buffer_dirty(leaf);
release:
btrfs_release_path(root, path); btrfs_release_path(root, path);
if (split_end && split == start) { if (split_end && split == start) {
split = end; split = end;
......
...@@ -4970,10 +4970,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, ...@@ -4970,10 +4970,10 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
return err; return err;
} }
static int prealloc_file_range(struct inode *inode, u64 start, u64 end, static int prealloc_file_range(struct btrfs_trans_handle *trans,
struct inode *inode, u64 start, u64 end,
u64 alloc_hint, int mode) u64 alloc_hint, int mode)
{ {
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key ins; struct btrfs_key ins;
u64 alloc_size; u64 alloc_size;
...@@ -4981,10 +4981,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, ...@@ -4981,10 +4981,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
u64 num_bytes = end - start; u64 num_bytes = end - start;
int ret = 0; int ret = 0;
trans = btrfs_join_transaction(root, 1);
BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
while (num_bytes > 0) { while (num_bytes > 0) {
alloc_size = min(num_bytes, root->fs_info->max_extent); alloc_size = min(num_bytes, root->fs_info->max_extent);
ret = btrfs_reserve_extent(trans, root, alloc_size, ret = btrfs_reserve_extent(trans, root, alloc_size,
...@@ -5015,7 +5011,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, ...@@ -5015,7 +5011,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
BUG_ON(ret); BUG_ON(ret);
} }
btrfs_end_transaction(trans, root);
return ret; return ret;
} }
...@@ -5029,11 +5024,18 @@ static long btrfs_fallocate(struct inode *inode, int mode, ...@@ -5029,11 +5024,18 @@ static long btrfs_fallocate(struct inode *inode, int mode,
u64 alloc_hint = 0; u64 alloc_hint = 0;
u64 mask = BTRFS_I(inode)->root->sectorsize - 1; u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
struct extent_map *em; struct extent_map *em;
struct btrfs_trans_handle *trans;
int ret; int ret;
alloc_start = offset & ~mask; alloc_start = offset & ~mask;
alloc_end = (offset + len + mask) & ~mask; alloc_end = (offset + len + mask) & ~mask;
/*
* wait for ordered IO before we have any locks. We'll loop again
* below with the locks held.
*/
btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
mutex_lock(&inode->i_mutex); mutex_lock(&inode->i_mutex);
if (alloc_start > inode->i_size) { if (alloc_start > inode->i_size) {
ret = btrfs_cont_expand(inode, alloc_start); ret = btrfs_cont_expand(inode, alloc_start);
...@@ -5043,6 +5045,16 @@ static long btrfs_fallocate(struct inode *inode, int mode, ...@@ -5043,6 +5045,16 @@ static long btrfs_fallocate(struct inode *inode, int mode,
while (1) { while (1) {
struct btrfs_ordered_extent *ordered; struct btrfs_ordered_extent *ordered;
trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
if (!trans) {
ret = -EIO;
goto out;
}
/* the extent lock is ordered inside the running
* transaction
*/
lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, lock_extent(&BTRFS_I(inode)->io_tree, alloc_start,
alloc_end - 1, GFP_NOFS); alloc_end - 1, GFP_NOFS);
ordered = btrfs_lookup_first_ordered_extent(inode, ordered = btrfs_lookup_first_ordered_extent(inode,
...@@ -5053,6 +5065,12 @@ static long btrfs_fallocate(struct inode *inode, int mode, ...@@ -5053,6 +5065,12 @@ static long btrfs_fallocate(struct inode *inode, int mode,
btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered);
unlock_extent(&BTRFS_I(inode)->io_tree, unlock_extent(&BTRFS_I(inode)->io_tree,
alloc_start, alloc_end - 1, GFP_NOFS); alloc_start, alloc_end - 1, GFP_NOFS);
btrfs_end_transaction(trans, BTRFS_I(inode)->root);
/*
* we can't wait on the range with the transaction
* running or with the extent lock held
*/
btrfs_wait_ordered_range(inode, alloc_start, btrfs_wait_ordered_range(inode, alloc_start,
alloc_end - alloc_start); alloc_end - alloc_start);
} else { } else {
...@@ -5070,7 +5088,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, ...@@ -5070,7 +5088,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
last_byte = min(extent_map_end(em), alloc_end); last_byte = min(extent_map_end(em), alloc_end);
last_byte = (last_byte + mask) & ~mask; last_byte = (last_byte + mask) & ~mask;
if (em->block_start == EXTENT_MAP_HOLE) { if (em->block_start == EXTENT_MAP_HOLE) {
ret = prealloc_file_range(inode, cur_offset, ret = prealloc_file_range(trans, inode, cur_offset,
last_byte, alloc_hint, mode); last_byte, alloc_hint, mode);
if (ret < 0) { if (ret < 0) {
free_extent_map(em); free_extent_map(em);
...@@ -5089,6 +5107,8 @@ static long btrfs_fallocate(struct inode *inode, int mode, ...@@ -5089,6 +5107,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
} }
unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1, unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1,
GFP_NOFS); GFP_NOFS);
btrfs_end_transaction(trans, BTRFS_I(inode)->root);
out: out:
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
return ret; return ret;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment