Commit e75cdf98 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes and cleanups from Chris Mason:
 "Some of this got cherry-picked from a github repo this week, but I
  verified the patches.

  We have three small scrub cleanups and a collection of fixes"

* 'for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  btrfs: Use fs_info directly in btrfs_delete_unused_bgs
  btrfs: Fix lost-data-profile caused by balance bg
  btrfs: Fix lost-data-profile caused by auto removing bg
  btrfs: Remove len argument from scrub_find_csum
  btrfs: Reduce unnecessary arguments in scrub_recheck_block
  btrfs: Use scrub_checksum_data and scrub_checksum_tree_block for scrub_recheck_block_checksum
  btrfs: Reset sblock->xxx_error stats before calling scrub_recheck_block_checksum
  btrfs: scrub: setup all fields for sblock_to_check
  btrfs: scrub: set error stats when tree block spanning stripes
  Btrfs: fix race when listing an inode's xattrs
  Btrfs: fix race leading to BUG_ON when running delalloc for nodatacow
  Btrfs: fix race leading to incorrect item deletion when dropping extents
  Btrfs: fix sleeping inside atomic context in qgroup rescan worker
  Btrfs: fix race waiting for qgroup rescan worker
  btrfs: qgroup: exit the rescan worker during umount
  Btrfs: fix extent accounting for partial direct IO writes
parents ca4ba96e d5f2e33b
......@@ -3780,6 +3780,9 @@ void close_ctree(struct btrfs_root *root)
fs_info->closing = 1;
smp_mb();
/* wait for the qgroup rescan worker to stop */
btrfs_qgroup_wait_for_completion(fs_info);
/* wait for the uuid_scan task to finish */
down(&fs_info->uuid_tree_rescan_sem);
/* avoid complains from lockdep et al., set sem back to initial state */
......
......@@ -10279,22 +10279,25 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
block_group = list_first_entry(&fs_info->unused_bgs,
struct btrfs_block_group_cache,
bg_list);
space_info = block_group->space_info;
list_del_init(&block_group->bg_list);
space_info = block_group->space_info;
if (ret || btrfs_mixed_space_info(space_info)) {
btrfs_put_block_group(block_group);
continue;
}
spin_unlock(&fs_info->unused_bgs_lock);
mutex_lock(&root->fs_info->delete_unused_bgs_mutex);
mutex_lock(&fs_info->delete_unused_bgs_mutex);
/* Don't want to race with allocators so take the groups_sem */
down_write(&space_info->groups_sem);
spin_lock(&block_group->lock);
if (block_group->reserved ||
btrfs_block_group_used(&block_group->item) ||
block_group->ro) {
block_group->ro ||
list_is_singular(&block_group->list)) {
/*
* We want to bail if we made new allocations or have
* outstanding allocations in this block group. We do
......@@ -10410,7 +10413,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
end_trans:
btrfs_end_transaction(trans, root);
next:
mutex_unlock(&root->fs_info->delete_unused_bgs_mutex);
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
btrfs_put_block_group(block_group);
spin_lock(&fs_info->unused_bgs_lock);
}
......
......@@ -756,8 +756,16 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
}
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid > ino ||
key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
if (key.objectid > ino)
break;
if (WARN_ON_ONCE(key.objectid < ino) ||
key.type < BTRFS_EXTENT_DATA_KEY) {
ASSERT(del_nr == 0);
path->slots[0]++;
goto next_slot;
}
if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
break;
fi = btrfs_item_ptr(leaf, path->slots[0],
......@@ -776,8 +784,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
btrfs_file_extent_inline_len(leaf,
path->slots[0], fi);
} else {
WARN_ON(1);
extent_end = search_start;
/* can't happen */
BUG();
}
/*
......
......@@ -1304,8 +1304,14 @@ static noinline int run_delalloc_nocow(struct inode *inode,
num_bytes = 0;
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
if (found_key.objectid > ino ||
found_key.type > BTRFS_EXTENT_DATA_KEY ||
if (found_key.objectid > ino)
break;
if (WARN_ON_ONCE(found_key.objectid < ino) ||
found_key.type < BTRFS_EXTENT_DATA_KEY) {
path->slots[0]++;
goto next_slot;
}
if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
found_key.offset > end)
break;
......@@ -7503,6 +7509,28 @@ struct btrfs_dio_data {
u64 reserve;
};
static void adjust_dio_outstanding_extents(struct inode *inode,
struct btrfs_dio_data *dio_data,
const u64 len)
{
unsigned num_extents;
num_extents = (unsigned) div64_u64(len + BTRFS_MAX_EXTENT_SIZE - 1,
BTRFS_MAX_EXTENT_SIZE);
/*
* If we have an outstanding_extents count still set then we're
* within our reservation, otherwise we need to adjust our inode
* counter appropriately.
*/
if (dio_data->outstanding_extents) {
dio_data->outstanding_extents -= num_extents;
} else {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents += num_extents;
spin_unlock(&BTRFS_I(inode)->lock);
}
}
static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
......@@ -7538,8 +7566,11 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
* If this errors out it's because we couldn't invalidate pagecache for
* this range and we need to fallback to buffered.
*/
if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
return -ENOTBLK;
if (lock_extent_direct(inode, lockstart, lockend, &cached_state,
create)) {
ret = -ENOTBLK;
goto err;
}
em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
if (IS_ERR(em)) {
......@@ -7657,19 +7688,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
if (start + len > i_size_read(inode))
i_size_write(inode, start + len);
/*
* If we have an outstanding_extents count still set then we're
* within our reservation, otherwise we need to adjust our inode
* counter appropriately.
*/
if (dio_data->outstanding_extents) {
(dio_data->outstanding_extents)--;
} else {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
}
adjust_dio_outstanding_extents(inode, dio_data, len);
btrfs_free_reserved_data_space(inode, start, len);
WARN_ON(dio_data->reserve < len);
dio_data->reserve -= len;
......@@ -7696,8 +7715,17 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
unlock_err:
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
unlock_bits, 1, 0, &cached_state, GFP_NOFS);
err:
if (dio_data)
current->journal_info = dio_data;
/*
* Compensate the delalloc release we do in btrfs_direct_IO() when we
* write less data then expected, so that we don't underflow our inode's
* outstanding extents counter.
*/
if (create && dio_data)
adjust_dio_outstanding_extents(inode, dio_data, len);
return ret;
}
......
......@@ -2198,7 +2198,6 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
int slot;
int ret;
path->leave_spinning = 1;
mutex_lock(&fs_info->qgroup_rescan_lock);
ret = btrfs_search_slot_for_read(fs_info->extent_root,
&fs_info->qgroup_rescan_progress,
......@@ -2286,7 +2285,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
goto out;
err = 0;
while (!err) {
while (!err && !btrfs_fs_closing(fs_info)) {
trans = btrfs_start_transaction(fs_info->fs_root, 0);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
......@@ -2307,6 +2306,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
btrfs_free_path(path);
mutex_lock(&fs_info->qgroup_rescan_lock);
if (!btrfs_fs_closing(fs_info))
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
if (err > 0 &&
......@@ -2336,7 +2336,9 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
}
btrfs_end_transaction(trans, fs_info->quota_root);
if (err >= 0) {
if (btrfs_fs_closing(fs_info)) {
btrfs_info(fs_info, "qgroup scan paused");
} else if (err >= 0) {
btrfs_info(fs_info, "qgroup scan completed%s",
err > 0 ? " (inconsistency flag cleared)" : "");
} else {
......@@ -2384,12 +2386,11 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
memset(&fs_info->qgroup_rescan_progress, 0,
sizeof(fs_info->qgroup_rescan_progress));
fs_info->qgroup_rescan_progress.objectid = progress_objectid;
init_completion(&fs_info->qgroup_rescan_completion);
spin_unlock(&fs_info->qgroup_lock);
mutex_unlock(&fs_info->qgroup_rescan_lock);
init_completion(&fs_info->qgroup_rescan_completion);
memset(&fs_info->qgroup_rescan_work, 0,
sizeof(fs_info->qgroup_rescan_work));
btrfs_init_work(&fs_info->qgroup_rescan_work,
......
This diff is collapsed.
......@@ -3400,6 +3400,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
u32 count_data = 0;
u32 count_meta = 0;
u32 count_sys = 0;
int chunk_reserved = 0;
/* step one make some room on all the devices */
devices = &fs_info->fs_devices->devices;
......@@ -3501,6 +3502,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
ret = should_balance_chunk(chunk_root, leaf, chunk,
found_key.offset);
btrfs_release_path(path);
if (!ret) {
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
......@@ -3537,6 +3539,25 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
goto loop;
}
if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && !chunk_reserved) {
trans = btrfs_start_transaction(chunk_root, 0);
if (IS_ERR(trans)) {
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
ret = PTR_ERR(trans);
goto error;
}
ret = btrfs_force_chunk_alloc(trans, chunk_root,
BTRFS_BLOCK_GROUP_DATA);
if (ret < 0) {
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
goto error;
}
btrfs_end_transaction(trans, chunk_root);
chunk_reserved = 1;
}
ret = btrfs_relocate_chunk(chunk_root,
found_key.offset);
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
......
......@@ -313,8 +313,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
/* check to make sure this item is what we want */
if (found_key.objectid != key.objectid)
break;
if (found_key.type != BTRFS_XATTR_ITEM_KEY)
if (found_key.type > BTRFS_XATTR_ITEM_KEY)
break;
if (found_key.type < BTRFS_XATTR_ITEM_KEY)
goto next;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
if (verify_dir_item(root, leaf, di))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment