Commit d7a8ab4e authored by Filipe Manana's avatar Filipe Manana Committed by David Sterba

btrfs: avoid double nocow check when doing nowait dio writes

When doing a NOWAIT direct IO write we are checking twice if we can COW
into the target file range using can_nocow_extent() - once at the very
beginning of the write path, at btrfs_write_check() via
check_nocow_nolock(), and later again at btrfs_get_blocks_direct_write().

The can_nocow_extent() function does a lot of expensive things - searching
for the file extent item in the inode's subvolume tree, searching for the
extent item in the extent tree, checking delayed references, etc, so it
isn't a very cheap call.

We can remove the first check at btrfs_write_check(), and add there a
quick check to verify if the inode has the NODATACOW or PREALLOC flags,
and quickly bail out if it doesn't have neither of those flags, as that
means we have to COW and therefore can't comply with the NOWAIT semantics.

After this we do only one call to can_nocow_extent(), while we are at
btrfs_get_blocks_direct_write(), where we have already locked the file
range and we did a try lock on the range before, at
btrfs_dio_iomap_begin() (since the previous patch in the series).
Signed-off-by: default avatarFilipe Manana <fdmanana@suse.com>
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 59094403
...@@ -1460,8 +1460,27 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages, ...@@ -1460,8 +1460,27 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
return ret; return ret;
} }
static int check_can_nocow(struct btrfs_inode *inode, loff_t pos, /*
size_t *write_bytes, bool nowait) * Check if we can do nocow write into the range [@pos, @pos + @write_bytes)
*
* @pos: File offset.
* @write_bytes: The length to write, will be updated to the nocow writeable
* range.
*
* This function will flush ordered extents in the range to ensure proper
* nocow checks.
*
* Return:
* > 0 If we can nocow, and updates @write_bytes.
* 0 If we can't do a nocow write.
* -EAGAIN If we can't do a nocow write because snapshoting of the inode's
* root is in progress.
* < 0 If an error happened.
*
* NOTE: Callers need to call btrfs_check_nocow_unlock() if we return > 0.
*/
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
size_t *write_bytes)
{ {
struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_root *root = inode->root; struct btrfs_root *root = inode->root;
...@@ -1472,7 +1491,7 @@ static int check_can_nocow(struct btrfs_inode *inode, loff_t pos, ...@@ -1472,7 +1491,7 @@ static int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC))) if (!(inode->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
return 0; return 0;
if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock)) if (!btrfs_drew_try_write_lock(&root->snapshot_lock))
return -EAGAIN; return -EAGAIN;
lockstart = round_down(pos, fs_info->sectorsize); lockstart = round_down(pos, fs_info->sectorsize);
...@@ -1480,71 +1499,21 @@ static int check_can_nocow(struct btrfs_inode *inode, loff_t pos, ...@@ -1480,71 +1499,21 @@ static int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
fs_info->sectorsize) - 1; fs_info->sectorsize) - 1;
num_bytes = lockend - lockstart + 1; num_bytes = lockend - lockstart + 1;
if (nowait) { btrfs_lock_and_flush_ordered_range(inode, lockstart, lockend, NULL);
struct btrfs_ordered_extent *ordered;
if (!try_lock_extent(&inode->io_tree, lockstart, lockend))
return -EAGAIN;
ordered = btrfs_lookup_ordered_range(inode, lockstart,
num_bytes);
if (ordered) {
btrfs_put_ordered_extent(ordered);
ret = -EAGAIN;
goto out_unlock;
}
} else {
btrfs_lock_and_flush_ordered_range(inode, lockstart,
lockend, NULL);
}
ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes, ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
NULL, NULL, NULL, false); NULL, NULL, NULL, false);
if (ret <= 0) { if (ret <= 0) {
ret = 0; ret = 0;
if (!nowait) btrfs_drew_write_unlock(&root->snapshot_lock);
btrfs_drew_write_unlock(&root->snapshot_lock);
} else { } else {
*write_bytes = min_t(size_t, *write_bytes , *write_bytes = min_t(size_t, *write_bytes ,
num_bytes - pos + lockstart); num_bytes - pos + lockstart);
} }
out_unlock:
unlock_extent(&inode->io_tree, lockstart, lockend); unlock_extent(&inode->io_tree, lockstart, lockend);
return ret; return ret;
} }
static int check_nocow_nolock(struct btrfs_inode *inode, loff_t pos,
size_t *write_bytes)
{
return check_can_nocow(inode, pos, write_bytes, true);
}
/*
* Check if we can do nocow write into the range [@pos, @pos + @write_bytes)
*
* @pos: File offset
* @write_bytes: The length to write, will be updated to the nocow writeable
* range
*
* This function will flush ordered extents in the range to ensure proper
* nocow checks.
*
* Return:
* >0 and update @write_bytes if we can do nocow write
* 0 if we can't do nocow write
* -EAGAIN if we can't get the needed lock or there are ordered extents
* for * (nowait == true) case
* <0 if other error happened
*
* NOTE: Callers need to release the lock by btrfs_check_nocow_unlock().
*/
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
size_t *write_bytes)
{
return check_can_nocow(inode, pos, write_bytes, false);
}
void btrfs_check_nocow_unlock(struct btrfs_inode *inode) void btrfs_check_nocow_unlock(struct btrfs_inode *inode)
{ {
btrfs_drew_write_unlock(&inode->root->snapshot_lock); btrfs_drew_write_unlock(&inode->root->snapshot_lock);
...@@ -1579,20 +1548,15 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, ...@@ -1579,20 +1548,15 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
loff_t oldsize; loff_t oldsize;
loff_t start_pos; loff_t start_pos;
if (iocb->ki_flags & IOCB_NOWAIT) { /*
size_t nocow_bytes = count; * Quickly bail out on NOWAIT writes if we don't have the nodatacow or
* prealloc flags, as without those flags we always have to COW. We will
/* We will allocate space in case nodatacow is not set, so bail */ * later check if we can really COW into the target range (using
if (check_nocow_nolock(BTRFS_I(inode), pos, &nocow_bytes) <= 0) * can_nocow_extent() at btrfs_get_blocks_direct_write()).
return -EAGAIN; */
/* if ((iocb->ki_flags & IOCB_NOWAIT) &&
* There are holes in the range or parts of the range that must !(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW | BTRFS_INODE_PREALLOC)))
* be COWed (shared extents, RO block groups, etc), so just bail return -EAGAIN;
* out.
*/
if (nocow_bytes < count)
return -EAGAIN;
}
current->backing_dev_info = inode_to_bdi(inode); current->backing_dev_info = inode_to_bdi(inode);
ret = file_remove_privs(file); ret = file_remove_privs(file);
......
...@@ -7403,7 +7403,8 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, ...@@ -7403,7 +7403,8 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
static int btrfs_get_blocks_direct_write(struct extent_map **map, static int btrfs_get_blocks_direct_write(struct extent_map **map,
struct inode *inode, struct inode *inode,
struct btrfs_dio_data *dio_data, struct btrfs_dio_data *dio_data,
u64 start, u64 len) u64 start, u64 len,
unsigned int iomap_flags)
{ {
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_map *em = *map; struct extent_map *em = *map;
...@@ -7473,6 +7474,9 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map, ...@@ -7473,6 +7474,9 @@ static int btrfs_get_blocks_direct_write(struct extent_map **map,
free_extent_map(em); free_extent_map(em);
*map = NULL; *map = NULL;
if (iomap_flags & IOMAP_NOWAIT)
return -EAGAIN;
/* We have to COW, so need to reserve metadata and data space. */ /* We have to COW, so need to reserve metadata and data space. */
ret = btrfs_delalloc_reserve_space(BTRFS_I(inode), ret = btrfs_delalloc_reserve_space(BTRFS_I(inode),
&dio_data->data_reserved, &dio_data->data_reserved,
...@@ -7649,7 +7653,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, ...@@ -7649,7 +7653,7 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
if (write) { if (write) {
ret = btrfs_get_blocks_direct_write(&em, inode, dio_data, ret = btrfs_get_blocks_direct_write(&em, inode, dio_data,
start, len); start, len, flags);
if (ret < 0) if (ret < 0)
goto unlock_err; goto unlock_err;
unlock_extents = true; unlock_extents = true;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment