Commit e5e03ad9 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-4.17-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "We've accumulated some fixes during the last week, some of them were
  in the works for a longer time but there are some newer ones too.

  Most of the fixes have a reproducer and fix user visible problems,
  also candidates for stable kernels. They IMHO qualify for a late rc,
  though I did not expect that many"

* tag 'for-4.17-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix crash when trying to resume balance without the resume flag
  btrfs: Fix delalloc inodes invalidation during transaction abort
  btrfs: Split btrfs_del_delalloc_inode into 2 functions
  btrfs: fix reading stale metadata blocks after degraded raid1 mounts
  btrfs: property: Set incompat flag if lzo/zstd compression is set
  Btrfs: fix duplicate extents after fsync of file with prealloc extents
  Btrfs: fix xattr loss after power failure
  Btrfs: send, fix invalid access to commit roots due to concurrent snapshotting
parents 132ce5d4 02ee654d
......@@ -2436,10 +2436,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
if (p->reada != READA_NONE)
reada_for_search(fs_info, p, level, slot, key->objectid);
btrfs_release_path(p);
ret = -EAGAIN;
tmp = read_tree_block(fs_info, blocknr, 0, parent_level - 1,
tmp = read_tree_block(fs_info, blocknr, gen, parent_level - 1,
&first_key);
if (!IS_ERR(tmp)) {
/*
......@@ -2454,6 +2452,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
} else {
ret = PTR_ERR(tmp);
}
btrfs_release_path(p);
return ret;
}
......@@ -5414,12 +5414,24 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
down_read(&fs_info->commit_root_sem);
left_level = btrfs_header_level(left_root->commit_root);
left_root_level = left_level;
left_path->nodes[left_level] = left_root->commit_root;
left_path->nodes[left_level] =
btrfs_clone_extent_buffer(left_root->commit_root);
if (!left_path->nodes[left_level]) {
up_read(&fs_info->commit_root_sem);
ret = -ENOMEM;
goto out;
}
extent_buffer_get(left_path->nodes[left_level]);
right_level = btrfs_header_level(right_root->commit_root);
right_root_level = right_level;
right_path->nodes[right_level] = right_root->commit_root;
right_path->nodes[right_level] =
btrfs_clone_extent_buffer(right_root->commit_root);
if (!right_path->nodes[right_level]) {
up_read(&fs_info->commit_root_sem);
ret = -ENOMEM;
goto out;
}
extent_buffer_get(right_path->nodes[right_level]);
up_read(&fs_info->commit_root_sem);
......
......@@ -3182,6 +3182,8 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes);
void __btrfs_del_delalloc_inode(struct btrfs_root *root,
struct btrfs_inode *inode);
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index);
int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
......
......@@ -3818,6 +3818,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
btrfs_free_qgroup_config(fs_info);
ASSERT(list_empty(&fs_info->delalloc_roots));
if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
btrfs_info(fs_info, "at unmount delalloc count %lld",
......@@ -4125,15 +4126,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info)
static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info)
{
/* cleanup FS via transaction */
btrfs_cleanup_transaction(fs_info);
mutex_lock(&fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(fs_info);
mutex_unlock(&fs_info->cleaner_mutex);
down_write(&fs_info->cleanup_work_sem);
up_write(&fs_info->cleanup_work_sem);
/* cleanup FS via transaction */
btrfs_cleanup_transaction(fs_info);
}
static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
......@@ -4258,19 +4259,23 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
list_splice_init(&root->delalloc_inodes, &splice);
while (!list_empty(&splice)) {
struct inode *inode = NULL;
btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
delalloc_inodes);
list_del_init(&btrfs_inode->delalloc_inodes);
clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
&btrfs_inode->runtime_flags);
__btrfs_del_delalloc_inode(root, btrfs_inode);
spin_unlock(&root->delalloc_lock);
btrfs_invalidate_inodes(btrfs_inode->root);
/*
* Make sure we get a live inode and that it'll not disappear
* meanwhile.
*/
inode = igrab(&btrfs_inode->vfs_inode);
if (inode) {
invalidate_inode_pages2(inode->i_mapping);
iput(inode);
}
spin_lock(&root->delalloc_lock);
}
spin_unlock(&root->delalloc_lock);
}
......@@ -4286,7 +4291,6 @@ static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
while (!list_empty(&splice)) {
root = list_first_entry(&splice, struct btrfs_root,
delalloc_root);
list_del_init(&root->delalloc_root);
root = btrfs_grab_fs_root(root);
BUG_ON(!root);
spin_unlock(&fs_info->delalloc_root_lock);
......
......@@ -1742,12 +1742,12 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root,
spin_unlock(&root->delalloc_lock);
}
static void btrfs_del_delalloc_inode(struct btrfs_root *root,
struct btrfs_inode *inode)
void __btrfs_del_delalloc_inode(struct btrfs_root *root,
struct btrfs_inode *inode)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb);
spin_lock(&root->delalloc_lock);
if (!list_empty(&inode->delalloc_inodes)) {
list_del_init(&inode->delalloc_inodes);
clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
......@@ -1760,6 +1760,13 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root,
spin_unlock(&fs_info->delalloc_root_lock);
}
}
}
static void btrfs_del_delalloc_inode(struct btrfs_root *root,
struct btrfs_inode *inode)
{
spin_lock(&root->delalloc_lock);
__btrfs_del_delalloc_inode(root, inode);
spin_unlock(&root->delalloc_lock);
}
......
......@@ -380,6 +380,7 @@ static int prop_compression_apply(struct inode *inode,
const char *value,
size_t len)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
int type;
if (len == 0) {
......@@ -390,14 +391,17 @@ static int prop_compression_apply(struct inode *inode,
return 0;
}
if (!strncmp("lzo", value, 3))
if (!strncmp("lzo", value, 3)) {
type = BTRFS_COMPRESS_LZO;
else if (!strncmp("zlib", value, 4))
btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
} else if (!strncmp("zlib", value, 4)) {
type = BTRFS_COMPRESS_ZLIB;
else if (!strncmp("zstd", value, len))
} else if (!strncmp("zstd", value, len)) {
type = BTRFS_COMPRESS_ZSTD;
else
btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
} else {
return -EINVAL;
}
BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS;
BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS;
......
......@@ -4320,6 +4320,110 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
return ret;
}
/*
* Log all prealloc extents beyond the inode's i_size to make sure we do not
* lose them after doing a fast fsync and replaying the log. We scan the
* subvolume's root instead of iterating the inode's extent map tree because
* otherwise we can log incorrect extent items based on extent map conversion.
* That can happen due to the fact that extent maps are merged when they
* are not in the extent map tree's list of modified extents.
*/
static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_path *path)
{
struct btrfs_root *root = inode->root;
struct btrfs_key key;
const u64 i_size = i_size_read(&inode->vfs_inode);
const u64 ino = btrfs_ino(inode);
struct btrfs_path *dst_path = NULL;
u64 last_extent = (u64)-1;
int ins_nr = 0;
int start_slot;
int ret;
if (!(inode->flags & BTRFS_INODE_PREALLOC))
return 0;
key.objectid = ino;
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = i_size;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
while (true) {
struct extent_buffer *leaf = path->nodes[0];
int slot = path->slots[0];
if (slot >= btrfs_header_nritems(leaf)) {
if (ins_nr > 0) {
ret = copy_items(trans, inode, dst_path, path,
&last_extent, start_slot,
ins_nr, 1, 0);
if (ret < 0)
goto out;
ins_nr = 0;
}
ret = btrfs_next_leaf(root, path);
if (ret < 0)
goto out;
if (ret > 0) {
ret = 0;
break;
}
continue;
}
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.objectid > ino)
break;
if (WARN_ON_ONCE(key.objectid < ino) ||
key.type < BTRFS_EXTENT_DATA_KEY ||
key.offset < i_size) {
path->slots[0]++;
continue;
}
if (last_extent == (u64)-1) {
last_extent = key.offset;
/*
* Avoid logging extent items logged in past fsync calls
* and leading to duplicate keys in the log tree.
*/
do {
ret = btrfs_truncate_inode_items(trans,
root->log_root,
&inode->vfs_inode,
i_size,
BTRFS_EXTENT_DATA_KEY);
} while (ret == -EAGAIN);
if (ret)
goto out;
}
if (ins_nr == 0)
start_slot = slot;
ins_nr++;
path->slots[0]++;
if (!dst_path) {
dst_path = btrfs_alloc_path();
if (!dst_path) {
ret = -ENOMEM;
goto out;
}
}
}
if (ins_nr > 0) {
ret = copy_items(trans, inode, dst_path, path, &last_extent,
start_slot, ins_nr, 1, 0);
if (ret > 0)
ret = 0;
}
out:
btrfs_release_path(path);
btrfs_free_path(dst_path);
return ret;
}
static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_inode *inode,
......@@ -4362,6 +4466,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
if (em->generation <= test_gen)
continue;
/* We log prealloc extents beyond eof later. */
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) &&
em->start >= i_size_read(&inode->vfs_inode))
continue;
if (em->start < logged_start)
logged_start = em->start;
if ((em->start + em->len - 1) > logged_end)
......@@ -4374,31 +4483,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
num++;
}
/*
* Add all prealloc extents beyond the inode's i_size to make sure we
* don't lose them after doing a fast fsync and replaying the log.
*/
if (inode->flags & BTRFS_INODE_PREALLOC) {
struct rb_node *node;
for (node = rb_last(&tree->map); node; node = rb_prev(node)) {
em = rb_entry(node, struct extent_map, rb_node);
if (em->start < i_size_read(&inode->vfs_inode))
break;
if (!list_empty(&em->list))
continue;
/* Same as above loop. */
if (++num > 32768) {
list_del_init(&tree->modified_extents);
ret = -EFBIG;
goto process;
}
refcount_inc(&em->refs);
set_bit(EXTENT_FLAG_LOGGING, &em->flags);
list_add_tail(&em->list, &extents);
}
}
list_sort(NULL, &extents, extent_cmp);
btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end);
/*
......@@ -4443,6 +4527,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
up_write(&inode->dio_sem);
btrfs_release_path(path);
if (!ret)
ret = btrfs_log_prealloc_extents(trans, inode, path);
return ret;
}
......@@ -4827,6 +4914,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
struct extent_map_tree *em_tree = &inode->extent_tree;
u64 logged_isize = 0;
bool need_log_inode_item = true;
bool xattrs_logged = false;
path = btrfs_alloc_path();
if (!path)
......@@ -5128,6 +5216,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
if (err)
goto out_unlock;
xattrs_logged = true;
if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
btrfs_release_path(path);
btrfs_release_path(dst_path);
......@@ -5140,6 +5229,11 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
btrfs_release_path(dst_path);
if (need_log_inode_item) {
err = log_inode_item(trans, log, dst_path, inode);
if (!err && !xattrs_logged) {
err = btrfs_log_all_xattrs(trans, root, inode, path,
dst_path);
btrfs_release_path(path);
}
if (err)
goto out_unlock;
}
......
......@@ -4052,6 +4052,15 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
return 0;
}
/*
* A ro->rw remount sequence should continue with the paused balance
* regardless of who pauses it, system or the user as of now, so set
* the resume flag.
*/
spin_lock(&fs_info->balance_lock);
fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME;
spin_unlock(&fs_info->balance_lock);
tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
return PTR_ERR_OR_ZERO(tsk);
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment