Commit 8257b2dc authored by Miao Xie's avatar Miao Xie Committed by Josef Bacik

Btrfs: introduce btrfs_{start, end}_nocow_write() for each subvolume

If the snapshot creation happened after the nocow write but before the dirty
data flush, we would fail to flush the dirty data because of no space.

So we must keep track of when those nocow write operations start and when they
end, if there are nocow writers, the snapshot creators must wait. In order
to implement this function, I introduce btrfs_{start, end}_nocow_write(),
which is similar to mnt_{want,drop}_write().

These two functions are only used for nocow file write operations.
Signed-off-by: default avatarMiao Xie <miaox@cn.fujitsu.com>
Signed-off-by: default avatarJosef Bacik <jbacik@fb.com>
parent 52483bc2
......@@ -1681,6 +1681,11 @@ struct btrfs_fs_info {
unsigned int update_uuid_tree_gen:1;
};
struct btrfs_subvolume_writers {
struct percpu_counter counter;
wait_queue_head_t wait;
};
/*
* in ram representation of the tree. extent_root is used for all allocations
* and for the extent tree extent_root root.
......@@ -1829,6 +1834,8 @@ struct btrfs_root {
* manipulation with the read-only status via SUBVOL_SETFLAGS
*/
int send_in_progress;
struct btrfs_subvolume_writers *subv_writers;
atomic_t will_be_snapshoted;
};
struct btrfs_ioctl_defrag_range_args {
......@@ -3353,6 +3360,9 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int __get_raid_index(u64 flags);
int btrfs_start_nocow_write(struct btrfs_root *root);
void btrfs_end_nocow_write(struct btrfs_root *root);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot);
......
......@@ -1149,6 +1149,32 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
}
}
static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void)
{
struct btrfs_subvolume_writers *writers;
int ret;
writers = kmalloc(sizeof(*writers), GFP_NOFS);
if (!writers)
return ERR_PTR(-ENOMEM);
ret = percpu_counter_init(&writers->counter, 0);
if (ret < 0) {
kfree(writers);
return ERR_PTR(ret);
}
init_waitqueue_head(&writers->wait);
return writers;
}
static void
btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers)
{
percpu_counter_destroy(&writers->counter);
kfree(writers);
}
static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
u32 stripesize, struct btrfs_root *root,
struct btrfs_fs_info *fs_info,
......@@ -1205,6 +1231,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
atomic_set(&root->log_batch, 0);
atomic_set(&root->orphan_inodes, 0);
atomic_set(&root->refs, 1);
atomic_set(&root->will_be_snapshoted, 0);
root->log_transid = 0;
root->log_transid_committed = -1;
root->last_log_commit = 0;
......@@ -1502,6 +1529,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
int btrfs_init_fs_root(struct btrfs_root *root)
{
int ret;
struct btrfs_subvolume_writers *writers;
root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
......@@ -1511,6 +1539,13 @@ int btrfs_init_fs_root(struct btrfs_root *root)
goto fail;
}
writers = btrfs_alloc_subvolume_writers();
if (IS_ERR(writers)) {
ret = PTR_ERR(writers);
goto fail;
}
root->subv_writers = writers;
btrfs_init_free_ino_ctl(root);
mutex_init(&root->fs_commit_mutex);
spin_lock_init(&root->cache_lock);
......@@ -1518,8 +1553,11 @@ int btrfs_init_fs_root(struct btrfs_root *root)
ret = get_anon_bdev(&root->anon_dev);
if (ret)
goto fail;
goto free_writers;
return 0;
free_writers:
btrfs_free_subvolume_writers(root->subv_writers);
fail:
kfree(root->free_ino_ctl);
kfree(root->free_ino_pinned);
......@@ -3459,6 +3497,8 @@ static void free_fs_root(struct btrfs_root *root)
root->orphan_block_rsv = NULL;
if (root->anon_dev)
free_anon_bdev(root->anon_dev);
if (root->subv_writers)
btrfs_free_subvolume_writers(root->subv_writers);
free_extent_buffer(root->node);
free_extent_buffer(root->commit_root);
kfree(root->free_ino_ctl);
......
......@@ -8938,3 +8938,38 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
range->len = trimmed;
return ret;
}
/*
* btrfs_{start,end}_write() is similar to mnt_{want, drop}_write(),
* they are used to prevent the some tasks writing data into the page cache
* by nocow before the subvolume is snapshoted, but flush the data into
* the disk after the snapshot creation.
*/
void btrfs_end_nocow_write(struct btrfs_root *root)
{
percpu_counter_dec(&root->subv_writers->counter);
/*
* Make sure counter is updated before we wake up
* waiters.
*/
smp_mb();
if (waitqueue_active(&root->subv_writers->wait))
wake_up(&root->subv_writers->wait);
}
int btrfs_start_nocow_write(struct btrfs_root *root)
{
if (unlikely(atomic_read(&root->will_be_snapshoted)))
return 0;
percpu_counter_inc(&root->subv_writers->counter);
/*
* Make sure counter is updated before we check for snapshot creation.
*/
smp_mb();
if (unlikely(atomic_read(&root->will_be_snapshoted))) {
btrfs_end_nocow_write(root);
return 0;
}
return 1;
}
......@@ -1410,6 +1410,10 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
u64 num_bytes;
int ret;
ret = btrfs_start_nocow_write(root);
if (!ret)
return -ENOSPC;
lockstart = round_down(pos, root->sectorsize);
lockend = round_up(pos + *write_bytes, root->sectorsize) - 1;
......@@ -1427,11 +1431,13 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
num_bytes = lockend - lockstart + 1;
ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
if (ret <= 0)
if (ret <= 0) {
ret = 0;
else
btrfs_end_nocow_write(root);
} else {
*write_bytes = min_t(size_t, *write_bytes ,
num_bytes - pos + lockstart);
}
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
......@@ -1520,6 +1526,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
if (!only_release_metadata)
btrfs_free_reserved_data_space(inode,
reserve_bytes);
else
btrfs_end_nocow_write(root);
break;
}
......@@ -1608,6 +1616,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
}
release_bytes = 0;
if (only_release_metadata)
btrfs_end_nocow_write(root);
if (only_release_metadata && copied > 0) {
u64 lockstart = round_down(pos, root->sectorsize);
u64 lockend = lockstart +
......@@ -1634,11 +1645,13 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
kfree(pages);
if (release_bytes) {
if (only_release_metadata)
if (only_release_metadata) {
btrfs_end_nocow_write(root);
btrfs_delalloc_release_metadata(inode, release_bytes);
else
} else {
btrfs_delalloc_release_space(inode, release_bytes);
}
}
return num_written ? num_written : ret;
}
......
......@@ -611,6 +611,23 @@ static noinline int create_subvol(struct inode *dir,
return ret;
}
static void btrfs_wait_nocow_write(struct btrfs_root *root)
{
s64 writers;
DEFINE_WAIT(wait);
do {
prepare_to_wait(&root->subv_writers->wait, &wait,
TASK_UNINTERRUPTIBLE);
writers = percpu_counter_sum(&root->subv_writers->counter);
if (writers)
schedule();
finish_wait(&root->subv_writers->wait, &wait);
} while (writers);
}
static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct dentry *dentry, char *name, int namelen,
u64 *async_transid, bool readonly,
......@@ -624,15 +641,21 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
if (!root->ref_cows)
return -EINVAL;
atomic_inc(&root->will_be_snapshoted);
smp_mb__after_atomic_inc();
btrfs_wait_nocow_write(root);
ret = btrfs_start_delalloc_inodes(root, 0);
if (ret)
return ret;
goto out;
btrfs_wait_ordered_extents(root, -1);
pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
if (!pending_snapshot)
return -ENOMEM;
if (!pending_snapshot) {
ret = -ENOMEM;
goto out;
}
btrfs_init_block_rsv(&pending_snapshot->block_rsv,
BTRFS_BLOCK_RSV_TEMP);
......@@ -649,7 +672,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
&pending_snapshot->qgroup_reserved,
false);
if (ret)
goto out;
goto free;
pending_snapshot->dentry = dentry;
pending_snapshot->root = root;
......@@ -700,8 +723,10 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
btrfs_subvolume_release_metadata(BTRFS_I(dir)->root,
&pending_snapshot->block_rsv,
pending_snapshot->qgroup_reserved);
out:
free:
kfree(pending_snapshot);
out:
atomic_dec(&root->will_be_snapshoted);
return ret;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment