Commit 9b450949 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-6.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "A few more fixes to zoned mode and one regression fix for chunk limit:

    - Zoned mode fixes:
        - fix how wait/wake up is done when finishing zone
        - fix zone append limit in emulated mode
        - fix mount on devices with conventional zones

   - fix regression, user settable data chunk limit got accidentally
     lowered and causes allocation problems on some profiles (raid0,
     raid1)"

* tag 'for-6.0-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: fix the max chunk size and stripe length calculation
  btrfs: zoned: fix mounting with conventional zones
  btrfs: zoned: set pseudo max append zone limit in zone emulation mode
  btrfs: zoned: fix API misuse of zone finish waiting
parents 725f3f3b 5da431b7
...@@ -1088,8 +1088,6 @@ struct btrfs_fs_info { ...@@ -1088,8 +1088,6 @@ struct btrfs_fs_info {
spinlock_t zone_active_bgs_lock; spinlock_t zone_active_bgs_lock;
struct list_head zone_active_bgs; struct list_head zone_active_bgs;
/* Waiters when BTRFS_FS_NEED_ZONE_FINISH is set */
wait_queue_head_t zone_finish_wait;
/* Updates are not protected by any lock */ /* Updates are not protected by any lock */
struct btrfs_commit_stats commit_stats; struct btrfs_commit_stats commit_stats;
......
...@@ -3068,7 +3068,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) ...@@ -3068,7 +3068,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
init_waitqueue_head(&fs_info->transaction_blocked_wait); init_waitqueue_head(&fs_info->transaction_blocked_wait);
init_waitqueue_head(&fs_info->async_submit_wait); init_waitqueue_head(&fs_info->async_submit_wait);
init_waitqueue_head(&fs_info->delayed_iputs_wait); init_waitqueue_head(&fs_info->delayed_iputs_wait);
init_waitqueue_head(&fs_info->zone_finish_wait);
/* Usable values until the real ones are cached from the superblock */ /* Usable values until the real ones are cached from the superblock */
fs_info->nodesize = 4096; fs_info->nodesize = 4096;
......
...@@ -1644,10 +1644,9 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode, ...@@ -1644,10 +1644,9 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
done_offset = end; done_offset = end;
if (done_offset == start) { if (done_offset == start) {
struct btrfs_fs_info *info = inode->root->fs_info; wait_on_bit_io(&inode->root->fs_info->flags,
BTRFS_FS_NEED_ZONE_FINISH,
wait_var_event(&info->zone_finish_wait, TASK_UNINTERRUPTIBLE);
!test_bit(BTRFS_FS_NEED_ZONE_FINISH, &info->flags));
continue; continue;
} }
......
...@@ -199,7 +199,7 @@ static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags) ...@@ -199,7 +199,7 @@ static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags)
ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK); ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
if (flags & BTRFS_BLOCK_GROUP_DATA) if (flags & BTRFS_BLOCK_GROUP_DATA)
return SZ_1G; return BTRFS_MAX_DATA_CHUNK_SIZE;
else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
return SZ_32M; return SZ_32M;
......
...@@ -5267,6 +5267,9 @@ static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl, ...@@ -5267,6 +5267,9 @@ static int decide_stripe_size_regular(struct alloc_chunk_ctl *ctl,
ctl->stripe_size); ctl->stripe_size);
} }
/* Stripe size should not go beyond 1G. */
ctl->stripe_size = min_t(u64, ctl->stripe_size, SZ_1G);
/* Align to BTRFS_STRIPE_LEN */ /* Align to BTRFS_STRIPE_LEN */
ctl->stripe_size = round_down(ctl->stripe_size, BTRFS_STRIPE_LEN); ctl->stripe_size = round_down(ctl->stripe_size, BTRFS_STRIPE_LEN);
ctl->chunk_size = ctl->stripe_size * data_stripes; ctl->chunk_size = ctl->stripe_size * data_stripes;
......
...@@ -421,10 +421,19 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) ...@@ -421,10 +421,19 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
* since btrfs adds the pages one by one to a bio, and btrfs cannot * since btrfs adds the pages one by one to a bio, and btrfs cannot
* increase the metadata reservation even if it increases the number of * increase the metadata reservation even if it increases the number of
* extents, it is safe to stick with the limit. * extents, it is safe to stick with the limit.
*
* With the zoned emulation, we can have non-zoned device on the zoned
* mode. In this case, we don't have a valid max zone append size. So,
* use max_segments * PAGE_SIZE as the pseudo max_zone_append_size.
*/ */
zone_info->max_zone_append_size = if (bdev_is_zoned(bdev)) {
min_t(u64, (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT, zone_info->max_zone_append_size = min_t(u64,
(u64)bdev_max_segments(bdev) << PAGE_SHIFT); (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
(u64)bdev_max_segments(bdev) << PAGE_SHIFT);
} else {
zone_info->max_zone_append_size =
(u64)bdev_max_segments(bdev) << PAGE_SHIFT;
}
if (!IS_ALIGNED(nr_sectors, zone_sectors)) if (!IS_ALIGNED(nr_sectors, zone_sectors))
zone_info->nr_zones++; zone_info->nr_zones++;
...@@ -1178,7 +1187,7 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size) ...@@ -1178,7 +1187,7 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
* offset. * offset.
*/ */
static int calculate_alloc_pointer(struct btrfs_block_group *cache, static int calculate_alloc_pointer(struct btrfs_block_group *cache,
u64 *offset_ret) u64 *offset_ret, bool new)
{ {
struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_root *root; struct btrfs_root *root;
...@@ -1188,6 +1197,21 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache, ...@@ -1188,6 +1197,21 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache,
int ret; int ret;
u64 length; u64 length;
/*
* Avoid tree lookups for a new block group, there's no use for it.
* It must always be 0.
*
* Also, we have a lock chain of extent buffer lock -> chunk mutex.
* For new a block group, this function is called from
* btrfs_make_block_group() which is already taking the chunk mutex.
* Thus, we cannot call calculate_alloc_pointer() which takes extent
* buffer locks to avoid deadlock.
*/
if (new) {
*offset_ret = 0;
return 0;
}
path = btrfs_alloc_path(); path = btrfs_alloc_path();
if (!path) if (!path)
return -ENOMEM; return -ENOMEM;
...@@ -1323,6 +1347,13 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) ...@@ -1323,6 +1347,13 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
else else
num_conventional++; num_conventional++;
/*
* Consider a zone as active if we can allow any number of
* active zones.
*/
if (!device->zone_info->max_active_zones)
__set_bit(i, active);
if (!is_sequential) { if (!is_sequential) {
alloc_offsets[i] = WP_CONVENTIONAL; alloc_offsets[i] = WP_CONVENTIONAL;
continue; continue;
...@@ -1389,45 +1420,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) ...@@ -1389,45 +1420,23 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
__set_bit(i, active); __set_bit(i, active);
break; break;
} }
/*
* Consider a zone as active if we can allow any number of
* active zones.
*/
if (!device->zone_info->max_active_zones)
__set_bit(i, active);
} }
if (num_sequential > 0) if (num_sequential > 0)
cache->seq_zone = true; cache->seq_zone = true;
if (num_conventional > 0) { if (num_conventional > 0) {
/*
* Avoid calling calculate_alloc_pointer() for new BG. It
* is no use for new BG. It must be always 0.
*
* Also, we have a lock chain of extent buffer lock ->
* chunk mutex. For new BG, this function is called from
* btrfs_make_block_group() which is already taking the
* chunk mutex. Thus, we cannot call
* calculate_alloc_pointer() which takes extent buffer
* locks to avoid deadlock.
*/
/* Zone capacity is always zone size in emulation */ /* Zone capacity is always zone size in emulation */
cache->zone_capacity = cache->length; cache->zone_capacity = cache->length;
if (new) { ret = calculate_alloc_pointer(cache, &last_alloc, new);
cache->alloc_offset = 0; if (ret) {
goto out; btrfs_err(fs_info,
}
ret = calculate_alloc_pointer(cache, &last_alloc);
if (ret || map->num_stripes == num_conventional) {
if (!ret)
cache->alloc_offset = last_alloc;
else
btrfs_err(fs_info,
"zoned: failed to determine allocation offset of bg %llu", "zoned: failed to determine allocation offset of bg %llu",
cache->start); cache->start);
goto out;
} else if (map->num_stripes == num_conventional) {
cache->alloc_offset = last_alloc;
cache->zone_is_active = 1;
goto out; goto out;
} }
} }
...@@ -1495,13 +1504,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) ...@@ -1495,13 +1504,6 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
goto out; goto out;
} }
if (cache->zone_is_active) {
btrfs_get_block_group(cache);
spin_lock(&fs_info->zone_active_bgs_lock);
list_add_tail(&cache->active_bg_list, &fs_info->zone_active_bgs);
spin_unlock(&fs_info->zone_active_bgs_lock);
}
out: out:
if (cache->alloc_offset > fs_info->zone_size) { if (cache->alloc_offset > fs_info->zone_size) {
btrfs_err(fs_info, btrfs_err(fs_info,
...@@ -1526,10 +1528,16 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) ...@@ -1526,10 +1528,16 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
ret = -EIO; ret = -EIO;
} }
if (!ret) if (!ret) {
cache->meta_write_pointer = cache->alloc_offset + cache->start; cache->meta_write_pointer = cache->alloc_offset + cache->start;
if (cache->zone_is_active) {
if (ret) { btrfs_get_block_group(cache);
spin_lock(&fs_info->zone_active_bgs_lock);
list_add_tail(&cache->active_bg_list,
&fs_info->zone_active_bgs);
spin_unlock(&fs_info->zone_active_bgs_lock);
}
} else {
kfree(cache->physical_map); kfree(cache->physical_map);
cache->physical_map = NULL; cache->physical_map = NULL;
} }
...@@ -2007,8 +2015,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ ...@@ -2007,8 +2015,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
/* For active_bg_list */ /* For active_bg_list */
btrfs_put_block_group(block_group); btrfs_put_block_group(block_group);
clear_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags); clear_and_wake_up_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
wake_up_all(&fs_info->zone_finish_wait);
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment