Commit effaf901 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-5.5-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "A few more fixes that have been in the works during last twp weeks.
  All have a user visible effect and are stable material:

   - scrub: properly update progress after calling cancel ioctl, calling
     'resume' would start from the beginning otherwise

   - fix subvolume reference removal, after moving out of the original
     path the reference is not recognized and will lead to transaction
     abort

   - fix reloc root lifetime checks, could lead to crashes when there's
     subvolume cleaning running in parallel

   - fix memory leak when quotas get disabled in the middle of extent
     accounting

   - fix transaction abort in case of balance being started on degraded
     mount on eg. RAID1"

* tag 'for-5.5-rc6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: check rw_devices, not num_devices for balance
  Btrfs: always copy scrub arguments back to user space
  btrfs: relocation: fix reloc_root lifespan and access
  btrfs: fix memory leak in qgroup accounting
  btrfs: do not delete mismatched root refs
  btrfs: fix invalid removal of root ref
  btrfs: rework arguments of btrfs_unlink_subvol
parents ab7541c3 b35cf1f0
......@@ -4238,18 +4238,30 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
}
static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
struct inode *dir, u64 objectid,
const char *name, int name_len)
struct inode *dir, struct dentry *dentry)
{
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_dir_item *di;
struct btrfs_key key;
const char *name = dentry->d_name.name;
int name_len = dentry->d_name.len;
u64 index;
int ret;
u64 objectid;
u64 dir_ino = btrfs_ino(BTRFS_I(dir));
if (btrfs_ino(inode) == BTRFS_FIRST_FREE_OBJECTID) {
objectid = inode->root->root_key.objectid;
} else if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
objectid = inode->location.objectid;
} else {
WARN_ON(1);
return -EINVAL;
}
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
......@@ -4271,13 +4283,16 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
}
btrfs_release_path(path);
ret = btrfs_del_root_ref(trans, objectid, root->root_key.objectid,
dir_ino, &index, name, name_len);
if (ret < 0) {
if (ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
goto out;
}
/*
* This is a placeholder inode for a subvolume we didn't have a
* reference to at the time of the snapshot creation. In the meantime
* we could have renamed the real subvol link into our snapshot, so
* depending on btrfs_del_root_ref to return -ENOENT here is incorret.
* Instead simply lookup the dir_index_item for this entry so we can
* remove it. Otherwise we know we have a ref to the root and we can
* call btrfs_del_root_ref, and it _shouldn't_ fail.
*/
if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) {
di = btrfs_search_dir_index_item(root, path, dir_ino,
name, name_len);
if (IS_ERR_OR_NULL(di)) {
......@@ -4292,8 +4307,16 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
index = key.offset;
}
btrfs_release_path(path);
} else {
ret = btrfs_del_root_ref(trans, objectid,
root->root_key.objectid, dir_ino,
&index, name, name_len);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
}
ret = btrfs_delete_delayed_dir_index(trans, BTRFS_I(dir), index);
if (ret) {
......@@ -4487,8 +4510,7 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
ret = btrfs_unlink_subvol(trans, dir, dest->root_key.objectid,
dentry->d_name.name, dentry->d_name.len);
ret = btrfs_unlink_subvol(trans, dir, dentry);
if (ret) {
err = ret;
btrfs_abort_transaction(trans, ret);
......@@ -4583,10 +4605,7 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
return PTR_ERR(trans);
if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
err = btrfs_unlink_subvol(trans, dir,
BTRFS_I(inode)->location.objectid,
dentry->d_name.name,
dentry->d_name.len);
err = btrfs_unlink_subvol(trans, dir, dentry);
goto out;
}
......@@ -9536,7 +9555,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
u64 new_ino = btrfs_ino(BTRFS_I(new_inode));
u64 old_idx = 0;
u64 new_idx = 0;
u64 root_objectid;
int ret;
bool root_log_pinned = false;
bool dest_log_pinned = false;
......@@ -9642,10 +9660,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* src is a subvolume */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
old_dentry->d_name.name,
old_dentry->d_name.len);
ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
} else { /* src is an inode */
ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
BTRFS_I(old_dentry->d_inode),
......@@ -9661,10 +9676,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* dest is a subvolume */
if (new_ino == BTRFS_FIRST_FREE_OBJECTID) {
root_objectid = BTRFS_I(new_inode)->root->root_key.objectid;
ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
new_dentry->d_name.name,
new_dentry->d_name.len);
ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
} else { /* dest is an inode */
ret = __btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
BTRFS_I(new_dentry->d_inode),
......@@ -9862,7 +9874,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_inode = d_inode(new_dentry);
struct inode *old_inode = d_inode(old_dentry);
u64 index = 0;
u64 root_objectid;
int ret;
u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
bool log_pinned = false;
......@@ -9970,10 +9981,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
BTRFS_I(old_inode), 1);
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
root_objectid = BTRFS_I(old_inode)->root->root_key.objectid;
ret = btrfs_unlink_subvol(trans, old_dir, root_objectid,
old_dentry->d_name.name,
old_dentry->d_name.len);
ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
} else {
ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
BTRFS_I(d_inode(old_dentry)),
......@@ -9992,10 +10000,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_inode->i_ctime = current_time(new_inode);
if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
root_objectid = BTRFS_I(new_inode)->location.objectid;
ret = btrfs_unlink_subvol(trans, new_dir, root_objectid,
new_dentry->d_name.name,
new_dentry->d_name.len);
ret = btrfs_unlink_subvol(trans, new_dir, new_dentry);
BUG_ON(new_inode->i_nlink == 0);
} else {
ret = btrfs_unlink_inode(trans, dest, BTRFS_I(new_dir),
......
......@@ -4252,7 +4252,19 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
&sa->progress, sa->flags & BTRFS_SCRUB_READONLY,
0);
if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa)))
/*
* Copy scrub args to user space even if btrfs_scrub_dev() returned an
* error. This is important as it allows user space to know how much
* progress scrub has done. For example, if scrub is canceled we get
* -ECANCELED from btrfs_scrub_dev() and return that error back to user
* space. Later user space can inspect the progress from the structure
* btrfs_ioctl_scrub_args and resume scrub from where it left off
* previously (btrfs-progs does this).
* If we fail to copy the btrfs_ioctl_scrub_args structure to user space
* then return -EFAULT to signal the structure was not copied or it may
* be corrupt and unreliable due to a partial copy.
*/
if (copy_to_user(arg, sa, sizeof(*sa)))
ret = -EFAULT;
if (!(sa->flags & BTRFS_SCRUB_READONLY))
......
......@@ -2423,8 +2423,12 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
u64 nr_old_roots = 0;
int ret = 0;
/*
* If quotas get disabled meanwhile, the resouces need to be freed and
* we can't just exit here.
*/
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
return 0;
goto out_free;
if (new_roots) {
if (!maybe_fs_roots(new_roots))
......
......@@ -517,6 +517,34 @@ static int update_backref_cache(struct btrfs_trans_handle *trans,
return 1;
}
static bool reloc_root_is_dead(struct btrfs_root *root)
{
/*
* Pair with set_bit/clear_bit in clean_dirty_subvols and
* btrfs_update_reloc_root. We need to see the updated bit before
* trying to access reloc_root
*/
smp_rmb();
if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
return true;
return false;
}
/*
* Check if this subvolume tree has valid reloc tree.
*
* Reloc tree after swap is considered dead, thus not considered as valid.
* This is enough for most callers, as they don't distinguish dead reloc root
* from no reloc root. But should_ignore_root() below is a special case.
*/
static bool have_reloc_root(struct btrfs_root *root)
{
if (reloc_root_is_dead(root))
return false;
if (!root->reloc_root)
return false;
return true;
}
static int should_ignore_root(struct btrfs_root *root)
{
......@@ -525,6 +553,10 @@ static int should_ignore_root(struct btrfs_root *root)
if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
return 0;
/* This root has been merged with its reloc tree, we can ignore it */
if (reloc_root_is_dead(root))
return 1;
reloc_root = root->reloc_root;
if (!reloc_root)
return 0;
......@@ -1439,7 +1471,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
* The subvolume has reloc tree but the swap is finished, no need to
* create/update the dead reloc tree
*/
if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state))
if (reloc_root_is_dead(root))
return 0;
if (root->reloc_root) {
......@@ -1478,8 +1510,7 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root_item *root_item;
int ret;
if (test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state) ||
!root->reloc_root)
if (!have_reloc_root(root))
goto out;
reloc_root = root->reloc_root;
......@@ -1489,6 +1520,11 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
if (fs_info->reloc_ctl->merge_reloc_tree &&
btrfs_root_refs(root_item) == 0) {
set_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
/*
* Mark the tree as dead before we change reloc_root so
* have_reloc_root will not touch it from now on.
*/
smp_wmb();
__del_reloc_root(reloc_root);
}
......@@ -2201,6 +2237,11 @@ static int clean_dirty_subvols(struct reloc_control *rc)
if (ret2 < 0 && !ret)
ret = ret2;
}
/*
* Need barrier to ensure clear_bit() only happens after
* root->reloc_root = NULL. Pairs with have_reloc_root.
*/
smp_wmb();
clear_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state);
btrfs_put_fs_root(root);
} else {
......@@ -4718,7 +4759,7 @@ void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
struct btrfs_root *root = pending->root;
struct reloc_control *rc = root->fs_info->reloc_ctl;
if (!root->reloc_root || !rc)
if (!rc || !have_reloc_root(root))
return;
if (!rc->merge_reloc_tree)
......@@ -4752,7 +4793,7 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
struct reloc_control *rc = root->fs_info->reloc_ctl;
int ret;
if (!root->reloc_root || !rc)
if (!rc || !have_reloc_root(root))
return 0;
rc = root->fs_info->reloc_ctl;
......
......@@ -376,11 +376,13 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_root_ref);
WARN_ON(btrfs_root_ref_dirid(leaf, ref) != dirid);
WARN_ON(btrfs_root_ref_name_len(leaf, ref) != name_len);
ptr = (unsigned long)(ref + 1);
WARN_ON(memcmp_extent_buffer(leaf, name, ptr, name_len));
if ((btrfs_root_ref_dirid(leaf, ref) != dirid) ||
(btrfs_root_ref_name_len(leaf, ref) != name_len) ||
memcmp_extent_buffer(leaf, name, ptr, name_len)) {
err = -ENOENT;
goto out;
}
*sequence = btrfs_root_ref_sequence(leaf, ref);
ret = btrfs_del_item(trans, tree_root, path);
......
......@@ -3881,7 +3881,11 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
}
}
num_devices = btrfs_num_devices(fs_info);
/*
* rw_devices will not change at the moment, device add/delete/replace
* are excluded by EXCL_OP
*/
num_devices = fs_info->fs_devices->rw_devices;
/*
* SINGLE profile on-disk has no profile bit, but in-memory we have a
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment