Commit 5d6f0e98 authored by Filipe Manana's avatar Filipe Manana Committed by David Sterba

btrfs: stop locking the source extent range during reflink

Nowadays before starting a reflink operation we do this:

1) Take the VFS lock of the inodes in exclusive mode (a rw semaphore);

2) Take the  mmap lock of the inodes (struct btrfs_inode::i_mmap_lock);

3) Flush all delalloc in the source and target ranges;

4) Wait for all ordered extents in the source and target ranges to
   complete;

5) Lock the source and destination ranges in the inodes' io trees.

In step 5 we lock the source range because:

1) We needed to serialize against mmap writes, but that is not needed
   anymore because nowadays we do that through the inode's i_mmap_lock
   (step 2). This happens since commit 8c99516a ("btrfs: exclude mmaps
   while doing remap");

2) To serialize against a concurrent relocation and avoid generating
   a delayed ref for an extent that was just dropped by relocation, see
   commit d8b55242 ("Btrfs: fix race between reflink/dedupe and
   relocation").

Locking the source range however blocks any concurrent reads for that
range and makes test case generic/733 fail.

So instead of locking the source range during reflinks, make relocation
read lock the inode's i_mmap_lock, so that it serializes with a concurrent
reflink while still able to run concurrently with mmap writes and allow
concurrent reads too.
Reviewed-by: default avatarBoris Burkov <boris@bur.io>
Signed-off-by: default avatarFilipe Manana <fdmanana@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 4a43d735
...@@ -616,35 +616,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode, ...@@ -616,35 +616,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
return ret; return ret;
} }
static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
struct inode *inode2, u64 loff2, u64 len)
{
unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1, NULL);
unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1, NULL);
}
static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
struct inode *inode2, u64 loff2, u64 len)
{
u64 range1_end = loff1 + len - 1;
u64 range2_end = loff2 + len - 1;
if (inode1 < inode2) {
swap(inode1, inode2);
swap(loff1, loff2);
swap(range1_end, range2_end);
} else if (inode1 == inode2 && loff2 < loff1) {
swap(loff1, loff2);
swap(range1_end, range2_end);
}
lock_extent(&BTRFS_I(inode1)->io_tree, loff1, range1_end, NULL);
lock_extent(&BTRFS_I(inode2)->io_tree, loff2, range2_end, NULL);
btrfs_assert_inode_range_clean(BTRFS_I(inode1), loff1, range1_end);
btrfs_assert_inode_range_clean(BTRFS_I(inode2), loff2, range2_end);
}
static void btrfs_double_mmap_lock(struct inode *inode1, struct inode *inode2) static void btrfs_double_mmap_lock(struct inode *inode1, struct inode *inode2)
{ {
if (inode1 < inode2) if (inode1 < inode2)
...@@ -662,17 +633,21 @@ static void btrfs_double_mmap_unlock(struct inode *inode1, struct inode *inode2) ...@@ -662,17 +633,21 @@ static void btrfs_double_mmap_unlock(struct inode *inode1, struct inode *inode2)
static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len, static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
struct inode *dst, u64 dst_loff) struct inode *dst, u64 dst_loff)
{ {
const u64 end = dst_loff + len - 1;
struct extent_state *cached_state = NULL;
struct btrfs_fs_info *fs_info = BTRFS_I(src)->root->fs_info; struct btrfs_fs_info *fs_info = BTRFS_I(src)->root->fs_info;
const u64 bs = fs_info->sectorsize; const u64 bs = fs_info->sectorsize;
int ret; int ret;
/* /*
* Lock destination range to serialize with concurrent readahead() and * Lock destination range to serialize with concurrent readahead(), and
* source range to serialize with relocation. * we are safe from concurrency with relocation of source extents
* because we have already locked the inode's i_mmap_lock in exclusive
* mode.
*/ */
btrfs_double_extent_lock(src, loff, dst, dst_loff, len); lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, end, &cached_state);
ret = btrfs_clone(src, dst, loff, len, ALIGN(len, bs), dst_loff, 1); ret = btrfs_clone(src, dst, loff, len, ALIGN(len, bs), dst_loff, 1);
btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, end, &cached_state);
btrfs_btree_balance_dirty(fs_info); btrfs_btree_balance_dirty(fs_info);
...@@ -724,6 +699,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, ...@@ -724,6 +699,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
static noinline int btrfs_clone_files(struct file *file, struct file *file_src, static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
u64 off, u64 olen, u64 destoff) u64 off, u64 olen, u64 destoff)
{ {
struct extent_state *cached_state = NULL;
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
struct inode *src = file_inode(file_src); struct inode *src = file_inode(file_src);
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
...@@ -731,6 +707,7 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, ...@@ -731,6 +707,7 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
int wb_ret; int wb_ret;
u64 len = olen; u64 len = olen;
u64 bs = fs_info->sectorsize; u64 bs = fs_info->sectorsize;
u64 end;
/* /*
* VFS's generic_remap_file_range_prep() protects us from cloning the * VFS's generic_remap_file_range_prep() protects us from cloning the
...@@ -763,12 +740,15 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src, ...@@ -763,12 +740,15 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
} }
/* /*
* Lock destination range to serialize with concurrent readahead() and * Lock destination range to serialize with concurrent readahead(), and
* source range to serialize with relocation. * we are safe from concurrency with relocation of source extents
* because we have already locked the inode's i_mmap_lock in exclusive
* mode.
*/ */
btrfs_double_extent_lock(src, off, inode, destoff, len); end = destoff + len - 1;
lock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state);
ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
btrfs_double_extent_unlock(src, off, inode, destoff, len); unlock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state);
/* /*
* We may have copied an inline extent into a page of the destination * We may have copied an inline extent into a page of the destination
......
...@@ -1127,16 +1127,22 @@ int replace_file_extents(struct btrfs_trans_handle *trans, ...@@ -1127,16 +1127,22 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
fs_info->sectorsize)); fs_info->sectorsize));
WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize)); WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
end--; end--;
/* Take mmap lock to serialize with reflinks. */
if (!down_read_trylock(&BTRFS_I(inode)->i_mmap_lock))
continue;
ret = try_lock_extent(&BTRFS_I(inode)->io_tree, ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
key.offset, end, key.offset, end,
&cached_state); &cached_state);
if (!ret) if (!ret) {
up_read(&BTRFS_I(inode)->i_mmap_lock);
continue; continue;
}
btrfs_drop_extent_map_range(BTRFS_I(inode), btrfs_drop_extent_map_range(BTRFS_I(inode),
key.offset, end, true); key.offset, end, true);
unlock_extent(&BTRFS_I(inode)->io_tree, unlock_extent(&BTRFS_I(inode)->io_tree,
key.offset, end, &cached_state); key.offset, end, &cached_state);
up_read(&BTRFS_I(inode)->i_mmap_lock);
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment