Commit 85b5d4bc authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-4.20-part2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull more btrfs updates from David Sterba:
 "This contains a few minor updates and fixes that were under testing or
  arrived shortly after the merge window freeze, mostly stable material"

* tag 'for-4.20-part2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  Btrfs: fix use-after-free when dumping free space
  Btrfs: fix use-after-free during inode eviction
  btrfs: move the dio_sem higher up the callchain
  btrfs: don't run delayed_iputs in commit
  btrfs: fix insert_reserved error handling
  btrfs: only free reserved extent if we didn't insert it
  btrfs: don't use ctl->free_space for max_extent_size
  btrfs: set max_extent_size properly
  btrfs: reset max_extent_size properly
  MAINTAINERS: update my email address for btrfs
  btrfs: delayed-ref: extract find_first_ref_head from find_ref_head
  Btrfs: fix deadlock when writing out free space caches
  Btrfs: fix assertion on fsync of regular file when using no-holes feature
  Btrfs: fix null pointer dereference on compressed write path error
parents 11743c56 9084cb6a
......@@ -3160,7 +3160,7 @@ F: drivers/gpio/gpio-bt8xx.c
BTRFS FILE SYSTEM
M: Chris Mason <clm@fb.com>
M: Josef Bacik <jbacik@fb.com>
M: Josef Bacik <josef@toxicpanda.com>
M: David Sterba <dsterba@suse.com>
L: linux-btrfs@vger.kernel.org
W: http://btrfs.wiki.kernel.org/
......
......@@ -1014,9 +1014,26 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if ((root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && parent)
parent_start = parent->start;
/*
* If we are COWing a node/leaf from the extent, chunk or device trees,
* make sure that we do not finish block group creation of pending block
* groups. We do this to avoid a deadlock.
* COWing can result in allocation of a new chunk, and flushing pending
* block groups (btrfs_create_pending_block_groups()) can be triggered
* when finishing allocation of a new chunk. Creation of a pending block
* group modifies the extent, chunk and device trees, therefore we could
* deadlock with ourselves since we are holding a lock on an extent
* buffer that btrfs_create_pending_block_groups() may try to COW later.
*/
if (root == fs_info->extent_root ||
root == fs_info->chunk_root ||
root == fs_info->dev_root)
trans->can_flush_pending_bgs = false;
cow = btrfs_alloc_tree_block(trans, root, parent_start,
root->root_key.objectid, &disk_key, level,
search_start, empty_size);
trans->can_flush_pending_bgs = true;
if (IS_ERR(cow))
return PTR_ERR(cow);
......
......@@ -164,14 +164,27 @@ static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root,
return NULL;
}
static struct btrfs_delayed_ref_head *find_first_ref_head(
struct btrfs_delayed_ref_root *dr)
{
struct rb_node *n;
struct btrfs_delayed_ref_head *entry;
n = rb_first_cached(&dr->href_root);
if (!n)
return NULL;
entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
return entry;
}
/*
* find an head entry based on bytenr. This returns the delayed ref
* head if it was able to find one, or NULL if nothing was in that spot.
* If return_bigger is given, the next bigger entry is returned if no exact
* match is found. But if no bigger one is found then the first node of the
* ref head tree will be returned.
* Find a head entry based on bytenr. This returns the delayed ref head if it
* was able to find one, or NULL if nothing was in that spot. If return_bigger
* is given, the next bigger entry is returned if no exact match is found.
*/
static struct btrfs_delayed_ref_head* find_ref_head(
static struct btrfs_delayed_ref_head *find_ref_head(
struct btrfs_delayed_ref_root *dr, u64 bytenr,
bool return_bigger)
{
......@@ -195,10 +208,9 @@ static struct btrfs_delayed_ref_head* find_ref_head(
if (bytenr > entry->bytenr) {
n = rb_next(&entry->href_node);
if (!n)
n = rb_first_cached(&dr->href_root);
return NULL;
entry = rb_entry(n, struct btrfs_delayed_ref_head,
href_node);
return entry;
}
return entry;
}
......@@ -355,33 +367,25 @@ struct btrfs_delayed_ref_head *btrfs_select_ref_head(
struct btrfs_delayed_ref_root *delayed_refs)
{
struct btrfs_delayed_ref_head *head;
u64 start;
bool loop = false;
again:
start = delayed_refs->run_delayed_start;
head = find_ref_head(delayed_refs, start, true);
if (!head && !loop) {
head = find_ref_head(delayed_refs, delayed_refs->run_delayed_start,
true);
if (!head && delayed_refs->run_delayed_start != 0) {
delayed_refs->run_delayed_start = 0;
start = 0;
loop = true;
head = find_ref_head(delayed_refs, start, true);
head = find_first_ref_head(delayed_refs);
}
if (!head)
return NULL;
} else if (!head && loop) {
return NULL;
}
while (head->processing) {
struct rb_node *node;
node = rb_next(&head->href_node);
if (!node) {
if (loop)
if (delayed_refs->run_delayed_start == 0)
return NULL;
delayed_refs->run_delayed_start = 0;
start = 0;
loop = true;
goto again;
}
head = rb_entry(node, struct btrfs_delayed_ref_head,
......
......@@ -2366,6 +2366,9 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
insert_reserved);
else
BUG();
if (ret && insert_reserved)
btrfs_pin_extent(trans->fs_info, node->bytenr,
node->num_bytes, 1);
return ret;
}
......@@ -2954,7 +2957,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head;
int ret;
int run_all = count == (unsigned long)-1;
bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
/* We'll clean this up in btrfs_cleanup_transaction */
if (trans->aborted)
......@@ -2971,7 +2973,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
#ifdef SCRAMBLE_DELAYED_REFS
delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
#endif
trans->can_flush_pending_bgs = false;
ret = __btrfs_run_delayed_refs(trans, count);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
......@@ -3002,7 +3003,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
goto again;
}
out:
trans->can_flush_pending_bgs = can_flush_pending_bgs;
return 0;
}
......@@ -4568,6 +4568,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
goto out;
} else {
ret = 1;
space_info->max_extent_size = 0;
}
space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
......@@ -4589,11 +4590,9 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
* the block groups that were made dirty during the lifetime of the
* transaction.
*/
if (trans->can_flush_pending_bgs &&
trans->chunk_bytes_reserved >= (u64)SZ_2M) {
if (trans->chunk_bytes_reserved >= (u64)SZ_2M)
btrfs_create_pending_block_groups(trans);
btrfs_trans_release_chunk_metadata(trans);
}
return ret;
}
......@@ -6464,6 +6463,7 @@ static void btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
space_info->bytes_readonly += num_bytes;
cache->reserved -= num_bytes;
space_info->bytes_reserved -= num_bytes;
space_info->max_extent_size = 0;
if (delalloc)
cache->delalloc_bytes -= num_bytes;
......@@ -7260,6 +7260,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
struct btrfs_block_group_cache *block_group = NULL;
u64 search_start = 0;
u64 max_extent_size = 0;
u64 max_free_space = 0;
u64 empty_cluster = 0;
struct btrfs_space_info *space_info;
int loop = 0;
......@@ -7555,8 +7556,8 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
spin_lock(&ctl->tree_lock);
if (ctl->free_space <
num_bytes + empty_cluster + empty_size) {
if (ctl->free_space > max_extent_size)
max_extent_size = ctl->free_space;
max_free_space = max(max_free_space,
ctl->free_space);
spin_unlock(&ctl->tree_lock);
goto loop;
}
......@@ -7723,6 +7724,8 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
}
out:
if (ret == -ENOSPC) {
if (!max_extent_size)
max_extent_size = max_free_space;
spin_lock(&space_info->lock);
space_info->max_extent_size = max_extent_size;
spin_unlock(&space_info->lock);
......@@ -8004,21 +8007,14 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
}
path = btrfs_alloc_path();
if (!path) {
btrfs_free_and_pin_reserved_extent(fs_info,
extent_key.objectid,
fs_info->nodesize);
if (!path)
return -ENOMEM;
}
path->leave_spinning = 1;
ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path,
&extent_key, size);
if (ret) {
btrfs_free_path(path);
btrfs_free_and_pin_reserved_extent(fs_info,
extent_key.objectid,
fs_info->nodesize);
return ret;
}
......@@ -10132,9 +10128,10 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
struct btrfs_block_group_item item;
struct btrfs_key key;
int ret = 0;
bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
trans->can_flush_pending_bgs = false;
if (!trans->can_flush_pending_bgs)
return;
while (!list_empty(&trans->new_bgs)) {
block_group = list_first_entry(&trans->new_bgs,
struct btrfs_block_group_cache,
......@@ -10159,7 +10156,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
next:
list_del_init(&block_group->bg_list);
}
trans->can_flush_pending_bgs = can_flush_pending_bgs;
btrfs_trans_release_chunk_metadata(trans);
}
int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
......
......@@ -2078,6 +2078,14 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out;
inode_lock(inode);
/*
* We take the dio_sem here because the tree log stuff can race with
* lockless dio writes and get an extent map logged for an extent we
* never waited on. We need it this high up for lockdep reasons.
*/
down_write(&BTRFS_I(inode)->dio_sem);
atomic_inc(&root->log_batch);
/*
......@@ -2086,6 +2094,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
*/
ret = btrfs_wait_ordered_range(inode, start, len);
if (ret) {
up_write(&BTRFS_I(inode)->dio_sem);
inode_unlock(inode);
goto out;
}
......@@ -2109,6 +2118,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* checked called fsync.
*/
ret = filemap_check_wb_err(inode->i_mapping, file->f_wb_err);
up_write(&BTRFS_I(inode)->dio_sem);
inode_unlock(inode);
goto out;
}
......@@ -2127,6 +2137,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
up_write(&BTRFS_I(inode)->dio_sem);
inode_unlock(inode);
goto out;
}
......@@ -2148,6 +2159,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
* file again, but that will end up using the synchronization
* inside btrfs_sync_log to keep things safe.
*/
up_write(&BTRFS_I(inode)->dio_sem);
inode_unlock(inode);
/*
......
......@@ -1772,6 +1772,13 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
return -1;
}
static inline u64 get_max_extent_size(struct btrfs_free_space *entry)
{
if (entry->bitmap)
return entry->max_extent_size;
return entry->bytes;
}
/* Cache the size of the max extent in bytes */
static struct btrfs_free_space *
find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
......@@ -1793,8 +1800,8 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
for (node = &entry->offset_index; node; node = rb_next(node)) {
entry = rb_entry(node, struct btrfs_free_space, offset_index);
if (entry->bytes < *bytes) {
if (entry->bytes > *max_extent_size)
*max_extent_size = entry->bytes;
*max_extent_size = max(get_max_extent_size(entry),
*max_extent_size);
continue;
}
......@@ -1812,8 +1819,8 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
}
if (entry->bytes < *bytes + align_off) {
if (entry->bytes > *max_extent_size)
*max_extent_size = entry->bytes;
*max_extent_size = max(get_max_extent_size(entry),
*max_extent_size);
continue;
}
......@@ -1825,8 +1832,10 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
*offset = tmp;
*bytes = size;
return entry;
} else if (size > *max_extent_size) {
*max_extent_size = size;
} else {
*max_extent_size =
max(get_max_extent_size(entry),
*max_extent_size);
}
continue;
}
......@@ -2449,6 +2458,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
struct rb_node *n;
int count = 0;
spin_lock(&ctl->tree_lock);
for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
info = rb_entry(n, struct btrfs_free_space, offset_index);
if (info->bytes >= bytes && !block_group->ro)
......@@ -2457,6 +2467,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
info->offset, info->bytes,
(info->bitmap) ? "yes" : "no");
}
spin_unlock(&ctl->tree_lock);
btrfs_info(fs_info, "block group has cluster?: %s",
list_empty(&block_group->cluster_list) ? "no" : "yes");
btrfs_info(fs_info,
......@@ -2685,8 +2696,8 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
err = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
if (err) {
if (search_bytes > *max_extent_size)
*max_extent_size = search_bytes;
*max_extent_size = max(get_max_extent_size(entry),
*max_extent_size);
return 0;
}
......@@ -2723,8 +2734,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
entry = rb_entry(node, struct btrfs_free_space, offset_index);
while (1) {
if (entry->bytes < bytes && entry->bytes > *max_extent_size)
*max_extent_size = entry->bytes;
if (entry->bytes < bytes)
*max_extent_size = max(get_max_extent_size(entry),
*max_extent_size);
if (entry->bytes < bytes ||
(!entry->bitmap && entry->offset < min_start)) {
......
......@@ -502,6 +502,7 @@ static noinline void compress_file_range(struct inode *inode,
pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
if (!pages) {
/* just bail out to the uncompressed code */
nr_pages = 0;
goto cont;
}
......@@ -2940,6 +2941,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
bool truncated = false;
bool range_locked = false;
bool clear_new_delalloc_bytes = false;
bool clear_reserved_extent = true;
if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
!test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags) &&
......@@ -3043,11 +3045,13 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
logical_len, logical_len,
compress_type, 0, 0,
BTRFS_FILE_EXTENT_REG);
if (!ret)
if (!ret) {
clear_reserved_extent = false;
btrfs_release_delalloc_bytes(fs_info,
ordered_extent->start,
ordered_extent->disk_len);
}
}
unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
ordered_extent->file_offset, ordered_extent->len,
trans->transid);
......@@ -3107,8 +3111,13 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
* wrong we need to return the space for this ordered extent
* back to the allocator. We only free the extent in the
* truncated case if we didn't write out the extent at all.
*
* If we made it past insert_reserved_file_extent before we
* errored out then we don't need to do this as the accounting
* has already been done.
*/
if ((ret || !logical_len) &&
clear_reserved_extent &&
!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
!test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
btrfs_free_reserved_extent(fs_info,
......@@ -5259,11 +5268,13 @@ static void evict_inode_truncate_pages(struct inode *inode)
struct extent_state *cached_state = NULL;
u64 start;
u64 end;
unsigned state_flags;
node = rb_first(&io_tree->state);
state = rb_entry(node, struct extent_state, rb_node);
start = state->start;
end = state->end;
state_flags = state->state;
spin_unlock(&io_tree->lock);
lock_extent_bits(io_tree, start, end, &cached_state);
......@@ -5276,7 +5287,7 @@ static void evict_inode_truncate_pages(struct inode *inode)
*
* Note, end is the bytenr of last byte, so we need + 1 here.
*/
if (state->state & EXTENT_DELALLOC)
if (state_flags & EXTENT_DELALLOC)
btrfs_qgroup_free_data(inode, NULL, start, end - start + 1);
clear_extent_bit(io_tree, start, end,
......
......@@ -2283,15 +2283,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
kmem_cache_free(btrfs_trans_handle_cachep, trans);
/*
* If fs has been frozen, we can not handle delayed iputs, otherwise
* it'll result in deadlock about SB_FREEZE_FS.
*/
if (current != fs_info->transaction_kthread &&
current != fs_info->cleaner_kthread &&
!test_bit(BTRFS_FS_FROZEN, &fs_info->flags))
btrfs_run_delayed_iputs(fs_info);
return ret;
scrub_continue:
......
......@@ -4390,7 +4390,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
INIT_LIST_HEAD(&extents);
down_write(&inode->dio_sem);
write_lock(&tree->lock);
test_gen = root->fs_info->last_trans_committed;
logged_start = start;
......@@ -4456,7 +4455,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
}
WARN_ON(!list_empty(&extents));
write_unlock(&tree->lock);
up_write(&inode->dio_sem);
btrfs_release_path(path);
if (!ret)
......@@ -4652,7 +4650,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
ASSERT(len == i_size ||
(len == fs_info->sectorsize &&
btrfs_file_extent_compression(leaf, extent) !=
BTRFS_COMPRESS_NONE));
BTRFS_COMPRESS_NONE) ||
(len < i_size && i_size < fs_info->sectorsize));
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment