Commit 8379c0b3 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.0-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "Fixes:

   - check that subvolume is writable when changing xattrs from security
     namespace

   - fix memory leak in device lookup helper

   - update generation of hole file extent item when merging holes

   - fix space cache corruption and potential double allocations; this
     is a rare bug but can be serious once it happens, stable backports
     and analysis tool will be provided

   - fix error handling when deleting root references

   - fix crash due to assert when attempting to cancel suspended device
     replace, add message what to do if mount fails due to missing
     replace item

  Regressions:

   - don't merge pages into bio if their page offset is not contiguous

   - don't allow large NOWAIT direct reads, this could lead to short
     reads eg. in io_uring"

* tag 'for-6.0-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: add info when mount fails due to stale replace target
  btrfs: replace: drop assert for suspended replace
  btrfs: fix silent failure when deleting root reference
  btrfs: fix space cache corruption and potential double allocations
  btrfs: don't allow large NOWAIT direct reads
  btrfs: don't merge pages into bio if their page offset is not contiguous
  btrfs: update generation of hole file extent item when merging holes
  btrfs: fix possible memory leak in btrfs_get_dev_args_from_path()
  btrfs: check if root is readonly while setting security xattr
parents c7bb3fbc f2c3bec2
...@@ -440,39 +440,26 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, ...@@ -440,39 +440,26 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
btrfs_put_caching_control(caching_ctl); btrfs_put_caching_control(caching_ctl);
} }
int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache) static int btrfs_caching_ctl_wait_done(struct btrfs_block_group *cache,
struct btrfs_caching_control *caching_ctl)
{
wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
return cache->cached == BTRFS_CACHE_ERROR ? -EIO : 0;
}
static int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache)
{ {
struct btrfs_caching_control *caching_ctl; struct btrfs_caching_control *caching_ctl;
int ret = 0; int ret;
caching_ctl = btrfs_get_caching_control(cache); caching_ctl = btrfs_get_caching_control(cache);
if (!caching_ctl) if (!caching_ctl)
return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0; return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
wait_event(caching_ctl->wait, btrfs_block_group_done(cache));
if (cache->cached == BTRFS_CACHE_ERROR)
ret = -EIO;
btrfs_put_caching_control(caching_ctl); btrfs_put_caching_control(caching_ctl);
return ret; return ret;
} }
static bool space_cache_v1_done(struct btrfs_block_group *cache)
{
bool ret;
spin_lock(&cache->lock);
ret = cache->cached != BTRFS_CACHE_FAST;
spin_unlock(&cache->lock);
return ret;
}
void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
struct btrfs_caching_control *caching_ctl)
{
wait_event(caching_ctl->wait, space_cache_v1_done(cache));
}
#ifdef CONFIG_BTRFS_DEBUG #ifdef CONFIG_BTRFS_DEBUG
static void fragment_free_space(struct btrfs_block_group *block_group) static void fragment_free_space(struct btrfs_block_group *block_group)
{ {
...@@ -750,9 +737,8 @@ static noinline void caching_thread(struct btrfs_work *work) ...@@ -750,9 +737,8 @@ static noinline void caching_thread(struct btrfs_work *work)
btrfs_put_block_group(block_group); btrfs_put_block_group(block_group);
} }
int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only) int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
{ {
DEFINE_WAIT(wait);
struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_caching_control *caching_ctl = NULL; struct btrfs_caching_control *caching_ctl = NULL;
int ret = 0; int ret = 0;
...@@ -785,9 +771,6 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only ...@@ -785,9 +771,6 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
} }
WARN_ON(cache->caching_ctl); WARN_ON(cache->caching_ctl);
cache->caching_ctl = caching_ctl; cache->caching_ctl = caching_ctl;
if (btrfs_test_opt(fs_info, SPACE_CACHE))
cache->cached = BTRFS_CACHE_FAST;
else
cache->cached = BTRFS_CACHE_STARTED; cache->cached = BTRFS_CACHE_STARTED;
cache->has_caching_ctl = 1; cache->has_caching_ctl = 1;
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
...@@ -801,8 +784,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only ...@@ -801,8 +784,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, int load_cache_only
btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
out: out:
if (load_cache_only && caching_ctl) if (wait && caching_ctl)
btrfs_wait_space_cache_v1_finished(cache, caching_ctl); ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
if (caching_ctl) if (caching_ctl)
btrfs_put_caching_control(caching_ctl); btrfs_put_caching_control(caching_ctl);
...@@ -3312,7 +3295,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, ...@@ -3312,7 +3295,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
* space back to the block group, otherwise we will leak space. * space back to the block group, otherwise we will leak space.
*/ */
if (!alloc && !btrfs_block_group_done(cache)) if (!alloc && !btrfs_block_group_done(cache))
btrfs_cache_block_group(cache, 1); btrfs_cache_block_group(cache, true);
byte_in_group = bytenr - cache->start; byte_in_group = bytenr - cache->start;
WARN_ON(byte_in_group > cache->length); WARN_ON(byte_in_group > cache->length);
......
...@@ -263,9 +263,7 @@ void btrfs_dec_nocow_writers(struct btrfs_block_group *bg); ...@@ -263,9 +263,7 @@ void btrfs_dec_nocow_writers(struct btrfs_block_group *bg);
void btrfs_wait_nocow_writers(struct btrfs_block_group *bg); void btrfs_wait_nocow_writers(struct btrfs_block_group *bg);
void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache, void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
u64 num_bytes); u64 num_bytes);
int btrfs_wait_block_group_cache_done(struct btrfs_block_group *cache); int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait);
int btrfs_cache_block_group(struct btrfs_block_group *cache,
int load_cache_only);
void btrfs_put_caching_control(struct btrfs_caching_control *ctl); void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
struct btrfs_caching_control *btrfs_get_caching_control( struct btrfs_caching_control *btrfs_get_caching_control(
struct btrfs_block_group *cache); struct btrfs_block_group *cache);
......
...@@ -505,7 +505,6 @@ struct btrfs_free_cluster { ...@@ -505,7 +505,6 @@ struct btrfs_free_cluster {
enum btrfs_caching_type { enum btrfs_caching_type {
BTRFS_CACHE_NO, BTRFS_CACHE_NO,
BTRFS_CACHE_STARTED, BTRFS_CACHE_STARTED,
BTRFS_CACHE_FAST,
BTRFS_CACHE_FINISHED, BTRFS_CACHE_FINISHED,
BTRFS_CACHE_ERROR, BTRFS_CACHE_ERROR,
}; };
......
...@@ -165,7 +165,7 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) ...@@ -165,7 +165,7 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
*/ */
if (btrfs_find_device(fs_info->fs_devices, &args)) { if (btrfs_find_device(fs_info->fs_devices, &args)) {
btrfs_err(fs_info, btrfs_err(fs_info,
"replace devid present without an active replace item"); "replace without active item, run 'device scan --forget' on the target device");
ret = -EUCLEAN; ret = -EUCLEAN;
} else { } else {
dev_replace->srcdev = NULL; dev_replace->srcdev = NULL;
...@@ -1129,8 +1129,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) ...@@ -1129,8 +1129,7 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
up_write(&dev_replace->rwsem); up_write(&dev_replace->rwsem);
/* Scrub for replace must not be running in suspended state */ /* Scrub for replace must not be running in suspended state */
ret = btrfs_scrub_cancel(fs_info); btrfs_scrub_cancel(fs_info);
ASSERT(ret != -ENOTCONN);
trans = btrfs_start_transaction(root, 0); trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) { if (IS_ERR(trans)) {
......
...@@ -2551,17 +2551,10 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, ...@@ -2551,17 +2551,10 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
return -EINVAL; return -EINVAL;
/* /*
* pull in the free space cache (if any) so that our pin * Fully cache the free space first so that our pin removes the free space
* removes the free space from the cache. We have load_only set * from the cache.
* to one because the slow code to read in the free extents does check
* the pinned extents.
*/ */
btrfs_cache_block_group(cache, 1); ret = btrfs_cache_block_group(cache, true);
/*
* Make sure we wait until the cache is completely built in case it is
* missing or is invalid and therefore needs to be rebuilt.
*/
ret = btrfs_wait_block_group_cache_done(cache);
if (ret) if (ret)
goto out; goto out;
...@@ -2584,12 +2577,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info, ...@@ -2584,12 +2577,7 @@ static int __exclude_logged_extent(struct btrfs_fs_info *fs_info,
if (!block_group) if (!block_group)
return -EINVAL; return -EINVAL;
btrfs_cache_block_group(block_group, 1); ret = btrfs_cache_block_group(block_group, true);
/*
* Make sure we wait until the cache is completely built in case it is
* missing or is invalid and therefore needs to be rebuilt.
*/
ret = btrfs_wait_block_group_cache_done(block_group);
if (ret) if (ret)
goto out; goto out;
...@@ -4399,7 +4387,7 @@ static noinline int find_free_extent(struct btrfs_root *root, ...@@ -4399,7 +4387,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
ffe_ctl->cached = btrfs_block_group_done(block_group); ffe_ctl->cached = btrfs_block_group_done(block_group);
if (unlikely(!ffe_ctl->cached)) { if (unlikely(!ffe_ctl->cached)) {
ffe_ctl->have_caching_bg = true; ffe_ctl->have_caching_bg = true;
ret = btrfs_cache_block_group(block_group, 0); ret = btrfs_cache_block_group(block_group, false);
/* /*
* If we get ENOMEM here or something else we want to * If we get ENOMEM here or something else we want to
...@@ -6169,13 +6157,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) ...@@ -6169,13 +6157,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
if (end - start >= range->minlen) { if (end - start >= range->minlen) {
if (!btrfs_block_group_done(cache)) { if (!btrfs_block_group_done(cache)) {
ret = btrfs_cache_block_group(cache, 0); ret = btrfs_cache_block_group(cache, true);
if (ret) {
bg_failed++;
bg_ret = ret;
continue;
}
ret = btrfs_wait_block_group_cache_done(cache);
if (ret) { if (ret) {
bg_failed++; bg_failed++;
bg_ret = ret; bg_ret = ret;
......
...@@ -3233,7 +3233,7 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl, ...@@ -3233,7 +3233,7 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
u32 bio_size = bio->bi_iter.bi_size; u32 bio_size = bio->bi_iter.bi_size;
u32 real_size; u32 real_size;
const sector_t sector = disk_bytenr >> SECTOR_SHIFT; const sector_t sector = disk_bytenr >> SECTOR_SHIFT;
bool contig; bool contig = false;
int ret; int ret;
ASSERT(bio); ASSERT(bio);
...@@ -3242,10 +3242,35 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl, ...@@ -3242,10 +3242,35 @@ static int btrfs_bio_add_page(struct btrfs_bio_ctrl *bio_ctrl,
if (bio_ctrl->compress_type != compress_type) if (bio_ctrl->compress_type != compress_type)
return 0; return 0;
if (bio_ctrl->compress_type != BTRFS_COMPRESS_NONE)
if (bio->bi_iter.bi_size == 0) {
/* We can always add a page into an empty bio. */
contig = true;
} else if (bio_ctrl->compress_type == BTRFS_COMPRESS_NONE) {
struct bio_vec *bvec = bio_last_bvec_all(bio);
/*
* The contig check requires the following conditions to be met:
* 1) The pages are belonging to the same inode
* This is implied by the call chain.
*
* 2) The range has adjacent logical bytenr
*
* 3) The range has adjacent file offset
* This is required for the usage of btrfs_bio->file_offset.
*/
if (bio_end_sector(bio) == sector &&
page_offset(bvec->bv_page) + bvec->bv_offset +
bvec->bv_len == page_offset(page) + pg_offset)
contig = true;
} else {
/*
* For compression, all IO should have its logical bytenr
* set to the starting bytenr of the compressed extent.
*/
contig = bio->bi_iter.bi_sector == sector; contig = bio->bi_iter.bi_sector == sector;
else }
contig = bio_end_sector(bio) == sector;
if (!contig) if (!contig)
return 0; return 0;
......
...@@ -2482,6 +2482,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, ...@@ -2482,6 +2482,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_offset(leaf, fi, 0); btrfs_set_file_extent_offset(leaf, fi, 0);
btrfs_set_file_extent_generation(leaf, fi, trans->transid);
btrfs_mark_buffer_dirty(leaf); btrfs_mark_buffer_dirty(leaf);
goto out; goto out;
} }
...@@ -2498,6 +2499,7 @@ static int fill_holes(struct btrfs_trans_handle *trans, ...@@ -2498,6 +2499,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes); btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_offset(leaf, fi, 0); btrfs_set_file_extent_offset(leaf, fi, 0);
btrfs_set_file_extent_generation(leaf, fi, trans->transid);
btrfs_mark_buffer_dirty(leaf); btrfs_mark_buffer_dirty(leaf);
goto out; goto out;
} }
......
...@@ -7693,6 +7693,20 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start, ...@@ -7693,6 +7693,20 @@ static int btrfs_dio_iomap_begin(struct inode *inode, loff_t start,
const u64 data_alloc_len = length; const u64 data_alloc_len = length;
bool unlock_extents = false; bool unlock_extents = false;
/*
* We could potentially fault if we have a buffer > PAGE_SIZE, and if
* we're NOWAIT we may submit a bio for a partial range and return
* EIOCBQUEUED, which would result in an errant short read.
*
* The best way to handle this would be to allow for partial completions
* of iocb's, so we could submit the partial bio, return and fault in
* the rest of the pages, and then submit the io for the rest of the
* range. However we don't have that currently, so simply return
* -EAGAIN at this point so that the normal path is used.
*/
if (!write && (flags & IOMAP_NOWAIT) && length > PAGE_SIZE)
return -EAGAIN;
/* /*
* Cap the size of reads to that usually seen in buffered I/O as we need * Cap the size of reads to that usually seen in buffered I/O as we need
* to allocate a contiguous array for the checksums. * to allocate a contiguous array for the checksums.
......
...@@ -349,9 +349,10 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, ...@@ -349,9 +349,10 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
key.offset = ref_id; key.offset = ref_id;
again: again:
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
if (ret < 0) if (ret < 0) {
err = ret;
goto out; goto out;
if (ret == 0) { } else if (ret == 0) {
leaf = path->nodes[0]; leaf = path->nodes[0];
ref = btrfs_item_ptr(leaf, path->slots[0], ref = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_root_ref); struct btrfs_root_ref);
......
...@@ -2345,8 +2345,11 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info, ...@@ -2345,8 +2345,11 @@ int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0, ret = btrfs_get_bdev_and_sb(path, FMODE_READ, fs_info->bdev_holder, 0,
&bdev, &disk_super); &bdev, &disk_super);
if (ret) if (ret) {
btrfs_put_dev_args_from_path(args);
return ret; return ret;
}
args->devid = btrfs_stack_device_id(&disk_super->dev_item); args->devid = btrfs_stack_device_id(&disk_super->dev_item);
memcpy(args->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE); memcpy(args->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE);
if (btrfs_fs_incompat(fs_info, METADATA_UUID)) if (btrfs_fs_incompat(fs_info, METADATA_UUID))
......
...@@ -371,6 +371,9 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler, ...@@ -371,6 +371,9 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
const char *name, const void *buffer, const char *name, const void *buffer,
size_t size, int flags) size_t size, int flags)
{ {
if (btrfs_root_readonly(BTRFS_I(inode)->root))
return -EROFS;
name = xattr_full_name(handler, name); name = xattr_full_name(handler, name);
return btrfs_setxattr_trans(inode, name, buffer, size, flags); return btrfs_setxattr_trans(inode, name, buffer, size, flags);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment