Commit 07730d87 authored by Josef Bacik's avatar Josef Bacik Committed by David Sterba

btrfs: migrate the chunk allocation code

This feels more at home in block-group.c than in extent-tree.c.
Signed-off-by: default avatarJosef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>i
[ refresh ]
Signed-off-by: default avatarDavid Sterba <dsterba@suse.com>
parent 606d1bf1
......@@ -13,6 +13,7 @@
#include "sysfs.h"
#include "tree-log.h"
#include "delalloc-space.h"
#include "math.h"
void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
{
......@@ -2694,3 +2695,248 @@ void btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
}
static void force_metadata_allocation(struct btrfs_fs_info *info)
{
struct list_head *head = &info->space_info;
struct btrfs_space_info *found;
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
found->force_alloc = CHUNK_ALLOC_FORCE;
}
rcu_read_unlock();
}
static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *sinfo, int force)
{
u64 bytes_used = btrfs_space_info_used(sinfo, false);
u64 thresh;
if (force == CHUNK_ALLOC_FORCE)
return 1;
/*
* in limited mode, we want to have some free space up to
* about 1% of the FS size.
*/
if (force == CHUNK_ALLOC_LIMITED) {
thresh = btrfs_super_total_bytes(fs_info->super_copy);
thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
if (sinfo->total_bytes - bytes_used < thresh)
return 1;
}
if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8))
return 0;
return 1;
}
int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
{
u64 alloc_flags = btrfs_get_alloc_profile(trans->fs_info, type);
return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
}
/*
* If force is CHUNK_ALLOC_FORCE:
* - return 1 if it successfully allocates a chunk,
* - return errors including -ENOSPC otherwise.
* If force is NOT CHUNK_ALLOC_FORCE:
* - return 0 if it doesn't need to allocate a new chunk,
* - return 1 if it successfully allocates a chunk,
* - return errors including -ENOSPC otherwise.
*/
int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
enum btrfs_chunk_alloc_enum force)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_space_info *space_info;
bool wait_for_alloc = false;
bool should_alloc = false;
int ret = 0;
/* Don't re-enter if we're already allocating a chunk */
if (trans->allocating_chunk)
return -ENOSPC;
space_info = btrfs_find_space_info(fs_info, flags);
ASSERT(space_info);
do {
spin_lock(&space_info->lock);
if (force < space_info->force_alloc)
force = space_info->force_alloc;
should_alloc = should_alloc_chunk(fs_info, space_info, force);
if (space_info->full) {
/* No more free physical space */
if (should_alloc)
ret = -ENOSPC;
else
ret = 0;
spin_unlock(&space_info->lock);
return ret;
} else if (!should_alloc) {
spin_unlock(&space_info->lock);
return 0;
} else if (space_info->chunk_alloc) {
/*
* Someone is already allocating, so we need to block
* until this someone is finished and then loop to
* recheck if we should continue with our allocation
* attempt.
*/
wait_for_alloc = true;
spin_unlock(&space_info->lock);
mutex_lock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->chunk_mutex);
} else {
/* Proceed with allocation */
space_info->chunk_alloc = 1;
wait_for_alloc = false;
spin_unlock(&space_info->lock);
}
cond_resched();
} while (wait_for_alloc);
mutex_lock(&fs_info->chunk_mutex);
trans->allocating_chunk = true;
/*
* If we have mixed data/metadata chunks we want to make sure we keep
* allocating mixed chunks instead of individual chunks.
*/
if (btrfs_mixed_space_info(space_info))
flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
/*
* if we're doing a data chunk, go ahead and make sure that
* we keep a reasonable number of metadata chunks allocated in the
* FS as well.
*/
if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
fs_info->data_chunk_allocations++;
if (!(fs_info->data_chunk_allocations %
fs_info->metadata_ratio))
force_metadata_allocation(fs_info);
}
/*
* Check if we have enough space in SYSTEM chunk because we may need
* to update devices.
*/
check_system_chunk(trans, flags);
ret = btrfs_alloc_chunk(trans, flags);
trans->allocating_chunk = false;
spin_lock(&space_info->lock);
if (ret < 0) {
if (ret == -ENOSPC)
space_info->full = 1;
else
goto out;
} else {
ret = 1;
space_info->max_extent_size = 0;
}
space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
out:
space_info->chunk_alloc = 0;
spin_unlock(&space_info->lock);
mutex_unlock(&fs_info->chunk_mutex);
/*
* When we allocate a new chunk we reserve space in the chunk block
* reserve to make sure we can COW nodes/leafs in the chunk tree or
* add new nodes/leafs to it if we end up needing to do it when
* inserting the chunk item and updating device items as part of the
* second phase of chunk allocation, performed by
* btrfs_finish_chunk_alloc(). So make sure we don't accumulate a
* large number of new block groups to create in our transaction
* handle's new_bgs list to avoid exhausting the chunk block reserve
* in extreme cases - like having a single transaction create many new
* block groups when starting to write out the free space caches of all
* the block groups that were made dirty during the lifetime of the
* transaction.
*/
if (trans->chunk_bytes_reserved >= (u64)SZ_2M)
btrfs_create_pending_block_groups(trans);
return ret;
}
static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
{
u64 num_dev;
num_dev = btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max;
if (!num_dev)
num_dev = fs_info->fs_devices->rw_devices;
return num_dev;
}
/*
* If @is_allocation is true, reserve space in the system space info necessary
* for allocating a chunk, otherwise if it's false, reserve space necessary for
* removing a chunk.
*/
void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_space_info *info;
u64 left;
u64 thresh;
int ret = 0;
u64 num_devs;
/*
* Needed because we can end up allocating a system chunk and for an
* atomic and race free space reservation in the chunk block reserve.
*/
lockdep_assert_held(&fs_info->chunk_mutex);
info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
spin_lock(&info->lock);
left = info->total_bytes - btrfs_space_info_used(info, true);
spin_unlock(&info->lock);
num_devs = get_profile_num_devs(fs_info, type);
/* num_devs device items to update and 1 chunk item to add or remove */
thresh = btrfs_calc_trunc_metadata_size(fs_info, num_devs) +
btrfs_calc_trans_metadata_size(fs_info, 1);
if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
left, thresh, type);
btrfs_dump_space_info(fs_info, info, 0, 0);
}
if (left < thresh) {
u64 flags = btrfs_system_alloc_profile(fs_info);
/*
* Ignore failure to create system chunk. We might end up not
* needing it, as we might not need to COW all nodes/leafs from
* the paths we visit in the chunk tree (they were already COWed
* or created in the current transaction for example).
*/
ret = btrfs_alloc_chunk(trans, flags);
}
if (!ret) {
ret = btrfs_block_rsv_add(fs_info->chunk_root,
&fs_info->chunk_block_rsv,
thresh, BTRFS_RESERVE_NO_FLUSH);
if (!ret)
trans->chunk_bytes_reserved += thresh;
}
}
......@@ -10,6 +10,23 @@ enum btrfs_disk_cache_state {
BTRFS_DC_SETUP,
};
/*
* Control flags for do_chunk_alloc's force field CHUNK_ALLOC_NO_FORCE means to
* only allocate a chunk if we really need one.
*
* CHUNK_ALLOC_LIMITED means to only try and allocate one if we have very few
* chunks already allocated. This is used as part of the clustering code to
* help make sure we have a good pool of storage to cluster in, without filling
* the FS with empty chunks
*
* CHUNK_ALLOC_FORCE means it must try to allocate one
*/
enum btrfs_chunk_alloc_enum {
CHUNK_ALLOC_NO_FORCE,
CHUNK_ALLOC_LIMITED,
CHUNK_ALLOC_FORCE,
};
struct btrfs_caching_control {
struct list_head list;
struct mutex mutex;
......@@ -198,6 +215,10 @@ int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 ram_bytes, u64 num_bytes, int delalloc);
void btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache,
u64 num_bytes, int delalloc);
int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
enum btrfs_chunk_alloc_enum force);
int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type);
void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
static inline int btrfs_block_group_cache_done(
struct btrfs_block_group_cache *cache)
......
......@@ -2556,28 +2556,6 @@ enum btrfs_flush_state {
COMMIT_TRANS = 9,
};
/*
* control flags for do_chunk_alloc's force field
* CHUNK_ALLOC_NO_FORCE means to only allocate a chunk
* if we really need one.
*
* CHUNK_ALLOC_LIMITED means to only try and allocate one
* if we have very few chunks already allocated. This is
* used as part of the clustering code to help make sure
* we have a good pool of storage to cluster in, without
* filling the FS with empty chunks
*
* CHUNK_ALLOC_FORCE means it must try to allocate one
*
*/
enum btrfs_chunk_alloc_enum {
CHUNK_ALLOC_NO_FORCE,
CHUNK_ALLOC_LIMITED,
CHUNK_ALLOC_FORCE,
};
int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
enum btrfs_chunk_alloc_enum force);
int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv,
int nitems, bool use_global_rsv);
......@@ -2593,7 +2571,6 @@ int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
u64 start, u64 end);
int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
u64 num_bytes, u64 *actual_bytes);
int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type);
int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range);
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
......@@ -2602,7 +2579,6 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
int btrfs_start_write_no_snapshotting(struct btrfs_root *root);
void btrfs_end_write_no_snapshotting(struct btrfs_root *root);
void btrfs_wait_for_snapshot_creation(struct btrfs_root *root);
void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
/* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
......
......@@ -7,6 +7,7 @@
#include "space-info.h"
#include "transaction.h"
#include "qgroup.h"
#include "block-group.h"
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
{
......
......@@ -2661,243 +2661,6 @@ u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
return get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
}
static void force_metadata_allocation(struct btrfs_fs_info *info)
{
struct list_head *head = &info->space_info;
struct btrfs_space_info *found;
rcu_read_lock();
list_for_each_entry_rcu(found, head, list) {
if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
found->force_alloc = CHUNK_ALLOC_FORCE;
}
rcu_read_unlock();
}
static int should_alloc_chunk(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *sinfo, int force)
{
u64 bytes_used = btrfs_space_info_used(sinfo, false);
u64 thresh;
if (force == CHUNK_ALLOC_FORCE)
return 1;
/*
* in limited mode, we want to have some free space up to
* about 1% of the FS size.
*/
if (force == CHUNK_ALLOC_LIMITED) {
thresh = btrfs_super_total_bytes(fs_info->super_copy);
thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1));
if (sinfo->total_bytes - bytes_used < thresh)
return 1;
}
if (bytes_used + SZ_2M < div_factor(sinfo->total_bytes, 8))
return 0;
return 1;
}
static u64 get_profile_num_devs(struct btrfs_fs_info *fs_info, u64 type)
{
u64 num_dev;
num_dev = btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)].devs_max;
if (!num_dev)
num_dev = fs_info->fs_devices->rw_devices;
return num_dev;
}
/*
* If @is_allocation is true, reserve space in the system space info necessary
* for allocating a chunk, otherwise if it's false, reserve space necessary for
* removing a chunk.
*/
void check_system_chunk(struct btrfs_trans_handle *trans, u64 type)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_space_info *info;
u64 left;
u64 thresh;
int ret = 0;
u64 num_devs;
/*
* Needed because we can end up allocating a system chunk and for an
* atomic and race free space reservation in the chunk block reserve.
*/
lockdep_assert_held(&fs_info->chunk_mutex);
info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
spin_lock(&info->lock);
left = info->total_bytes - btrfs_space_info_used(info, true);
spin_unlock(&info->lock);
num_devs = get_profile_num_devs(fs_info, type);
/* num_devs device items to update and 1 chunk item to add or remove */
thresh = btrfs_calc_trunc_metadata_size(fs_info, num_devs) +
btrfs_calc_trans_metadata_size(fs_info, 1);
if (left < thresh && btrfs_test_opt(fs_info, ENOSPC_DEBUG)) {
btrfs_info(fs_info, "left=%llu, need=%llu, flags=%llu",
left, thresh, type);
btrfs_dump_space_info(fs_info, info, 0, 0);
}
if (left < thresh) {
u64 flags = btrfs_system_alloc_profile(fs_info);
/*
* Ignore failure to create system chunk. We might end up not
* needing it, as we might not need to COW all nodes/leafs from
* the paths we visit in the chunk tree (they were already COWed
* or created in the current transaction for example).
*/
ret = btrfs_alloc_chunk(trans, flags);
}
if (!ret) {
ret = btrfs_block_rsv_add(fs_info->chunk_root,
&fs_info->chunk_block_rsv,
thresh, BTRFS_RESERVE_NO_FLUSH);
if (!ret)
trans->chunk_bytes_reserved += thresh;
}
}
/*
* If force is CHUNK_ALLOC_FORCE:
* - return 1 if it successfully allocates a chunk,
* - return errors including -ENOSPC otherwise.
* If force is NOT CHUNK_ALLOC_FORCE:
* - return 0 if it doesn't need to allocate a new chunk,
* - return 1 if it successfully allocates a chunk,
* - return errors including -ENOSPC otherwise.
*/
int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
enum btrfs_chunk_alloc_enum force)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_space_info *space_info;
bool wait_for_alloc = false;
bool should_alloc = false;
int ret = 0;
/* Don't re-enter if we're already allocating a chunk */
if (trans->allocating_chunk)
return -ENOSPC;
space_info = btrfs_find_space_info(fs_info, flags);
ASSERT(space_info);
do {
spin_lock(&space_info->lock);
if (force < space_info->force_alloc)
force = space_info->force_alloc;
should_alloc = should_alloc_chunk(fs_info, space_info, force);
if (space_info->full) {
/* No more free physical space */
if (should_alloc)
ret = -ENOSPC;
else
ret = 0;
spin_unlock(&space_info->lock);
return ret;
} else if (!should_alloc) {
spin_unlock(&space_info->lock);
return 0;
} else if (space_info->chunk_alloc) {
/*
* Someone is already allocating, so we need to block
* until this someone is finished and then loop to
* recheck if we should continue with our allocation
* attempt.
*/
wait_for_alloc = true;
spin_unlock(&space_info->lock);
mutex_lock(&fs_info->chunk_mutex);
mutex_unlock(&fs_info->chunk_mutex);
} else {
/* Proceed with allocation */
space_info->chunk_alloc = 1;
wait_for_alloc = false;
spin_unlock(&space_info->lock);
}
cond_resched();
} while (wait_for_alloc);
mutex_lock(&fs_info->chunk_mutex);
trans->allocating_chunk = true;
/*
* If we have mixed data/metadata chunks we want to make sure we keep
* allocating mixed chunks instead of individual chunks.
*/
if (btrfs_mixed_space_info(space_info))
flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
/*
* if we're doing a data chunk, go ahead and make sure that
* we keep a reasonable number of metadata chunks allocated in the
* FS as well.
*/
if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) {
fs_info->data_chunk_allocations++;
if (!(fs_info->data_chunk_allocations %
fs_info->metadata_ratio))
force_metadata_allocation(fs_info);
}
/*
* Check if we have enough space in SYSTEM chunk because we may need
* to update devices.
*/
check_system_chunk(trans, flags);
ret = btrfs_alloc_chunk(trans, flags);
trans->allocating_chunk = false;
spin_lock(&space_info->lock);
if (ret < 0) {
if (ret == -ENOSPC)
space_info->full = 1;
else
goto out;
} else {
ret = 1;
space_info->max_extent_size = 0;
}
space_info->force_alloc = CHUNK_ALLOC_NO_FORCE;
out:
space_info->chunk_alloc = 0;
spin_unlock(&space_info->lock);
mutex_unlock(&fs_info->chunk_mutex);
/*
* When we allocate a new chunk we reserve space in the chunk block
* reserve to make sure we can COW nodes/leafs in the chunk tree or
* add new nodes/leafs to it if we end up needing to do it when
* inserting the chunk item and updating device items as part of the
* second phase of chunk allocation, performed by
* btrfs_finish_chunk_alloc(). So make sure we don't accumulate a
* large number of new block groups to create in our transaction
* handle's new_bgs list to avoid exhausting the chunk block reserve
* in extreme cases - like having a single transaction create many new
* block groups when starting to write out the free space caches of all
* the block groups that were made dirty during the lifetime of the
* transaction.
*/
if (trans->chunk_bytes_reserved >= (u64)SZ_2M)
btrfs_create_pending_block_groups(trans);
return ret;
}
static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
{
struct btrfs_block_group_cache *cache;
......@@ -5837,13 +5600,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
return ret;
}
int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type)
{
u64 alloc_flags = get_alloc_profile(trans->fs_info, type);
return btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
}
/*
* helper to account the unused space of all the readonly block group in the
* space_info. takes mirrors into account.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment