Commit 1bb91902 authored by Alexandre Oliva's avatar Alexandre Oliva Committed by Chris Mason

Btrfs: revamp clustered allocation logic

Parameterize clusters on minimum total size, minimum chunk size and
minimum contiguous size for at least one chunk, without limits on
cluster, window or gap sizes.  Don't tolerate any fragmentation for
SSD_SPREAD; accept it for metadata, but try to keep data dense.
Signed-off-by: default avatarAlexandre Oliva <oliva@lsd.ic.unicamp.br>
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent fc7c1077
...@@ -2283,23 +2283,23 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, ...@@ -2283,23 +2283,23 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_space *entry, struct btrfs_free_space *entry,
struct btrfs_free_cluster *cluster, struct btrfs_free_cluster *cluster,
u64 offset, u64 bytes, u64 min_bytes) u64 offset, u64 bytes,
u64 cont1_bytes, u64 min_bytes)
{ {
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
unsigned long next_zero; unsigned long next_zero;
unsigned long i; unsigned long i;
unsigned long search_bits; unsigned long want_bits;
unsigned long total_bits; unsigned long min_bits;
unsigned long found_bits; unsigned long found_bits;
unsigned long start = 0; unsigned long start = 0;
unsigned long total_found = 0; unsigned long total_found = 0;
int ret; int ret;
bool found = false;
i = offset_to_bit(entry->offset, block_group->sectorsize, i = offset_to_bit(entry->offset, block_group->sectorsize,
max_t(u64, offset, entry->offset)); max_t(u64, offset, entry->offset));
search_bits = bytes_to_bits(bytes, block_group->sectorsize); want_bits = bytes_to_bits(bytes, block_group->sectorsize);
total_bits = bytes_to_bits(min_bytes, block_group->sectorsize); min_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
again: again:
found_bits = 0; found_bits = 0;
...@@ -2308,7 +2308,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, ...@@ -2308,7 +2308,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
next_zero = find_next_zero_bit(entry->bitmap, next_zero = find_next_zero_bit(entry->bitmap,
BITS_PER_BITMAP, i); BITS_PER_BITMAP, i);
if (next_zero - i >= search_bits) { if (next_zero - i >= min_bits) {
found_bits = next_zero - i; found_bits = next_zero - i;
break; break;
} }
...@@ -2318,10 +2318,9 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, ...@@ -2318,10 +2318,9 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
if (!found_bits) if (!found_bits)
return -ENOSPC; return -ENOSPC;
if (!found) { if (!total_found) {
start = i; start = i;
cluster->max_size = 0; cluster->max_size = 0;
found = true;
} }
total_found += found_bits; total_found += found_bits;
...@@ -2329,13 +2328,8 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, ...@@ -2329,13 +2328,8 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
if (cluster->max_size < found_bits * block_group->sectorsize) if (cluster->max_size < found_bits * block_group->sectorsize)
cluster->max_size = found_bits * block_group->sectorsize; cluster->max_size = found_bits * block_group->sectorsize;
if (total_found < total_bits) { if (total_found < want_bits || cluster->max_size < cont1_bytes) {
i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero); i = next_zero + 1;
if (i - start > total_bits * 2) {
total_found = 0;
cluster->max_size = 0;
found = false;
}
goto again; goto again;
} }
...@@ -2351,23 +2345,23 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, ...@@ -2351,23 +2345,23 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
/* /*
* This searches the block group for just extents to fill the cluster with. * This searches the block group for just extents to fill the cluster with.
* Try to find a cluster with at least bytes total bytes, at least one
* extent of cont1_bytes, and other clusters of at least min_bytes.
*/ */
static noinline int static noinline int
setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster, struct btrfs_free_cluster *cluster,
struct list_head *bitmaps, u64 offset, u64 bytes, struct list_head *bitmaps, u64 offset, u64 bytes,
u64 min_bytes) u64 cont1_bytes, u64 min_bytes)
{ {
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *first = NULL; struct btrfs_free_space *first = NULL;
struct btrfs_free_space *entry = NULL; struct btrfs_free_space *entry = NULL;
struct btrfs_free_space *prev = NULL;
struct btrfs_free_space *last; struct btrfs_free_space *last;
struct rb_node *node; struct rb_node *node;
u64 window_start; u64 window_start;
u64 window_free; u64 window_free;
u64 max_extent; u64 max_extent;
u64 max_gap = 128 * 1024;
entry = tree_search_offset(ctl, offset, 0, 1); entry = tree_search_offset(ctl, offset, 0, 1);
if (!entry) if (!entry)
...@@ -2377,8 +2371,8 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, ...@@ -2377,8 +2371,8 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
* We don't want bitmaps, so just move along until we find a normal * We don't want bitmaps, so just move along until we find a normal
* extent entry. * extent entry.
*/ */
while (entry->bitmap) { while (entry->bitmap || entry->bytes < min_bytes) {
if (list_empty(&entry->list)) if (entry->bitmap && list_empty(&entry->list))
list_add_tail(&entry->list, bitmaps); list_add_tail(&entry->list, bitmaps);
node = rb_next(&entry->offset_index); node = rb_next(&entry->offset_index);
if (!node) if (!node)
...@@ -2391,12 +2385,9 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, ...@@ -2391,12 +2385,9 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
max_extent = entry->bytes; max_extent = entry->bytes;
first = entry; first = entry;
last = entry; last = entry;
prev = entry;
while (window_free <= min_bytes) { for (node = rb_next(&entry->offset_index); node;
node = rb_next(&entry->offset_index); node = rb_next(&entry->offset_index)) {
if (!node)
return -ENOSPC;
entry = rb_entry(node, struct btrfs_free_space, offset_index); entry = rb_entry(node, struct btrfs_free_space, offset_index);
if (entry->bitmap) { if (entry->bitmap) {
...@@ -2405,26 +2396,18 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, ...@@ -2405,26 +2396,18 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
continue; continue;
} }
/* if (entry->bytes < min_bytes)
* we haven't filled the empty size and the window is continue;
* very large. reset and try again
*/ last = entry;
if (entry->offset - (prev->offset + prev->bytes) > max_gap || window_free += entry->bytes;
entry->offset - window_start > (min_bytes * 2)) { if (entry->bytes > max_extent)
first = entry;
window_start = entry->offset;
window_free = entry->bytes;
last = entry;
max_extent = entry->bytes; max_extent = entry->bytes;
} else {
last = entry;
window_free += entry->bytes;
if (entry->bytes > max_extent)
max_extent = entry->bytes;
}
prev = entry;
} }
if (window_free < bytes || max_extent < cont1_bytes)
return -ENOSPC;
cluster->window_start = first->offset; cluster->window_start = first->offset;
node = &first->offset_index; node = &first->offset_index;
...@@ -2438,7 +2421,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, ...@@ -2438,7 +2421,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
entry = rb_entry(node, struct btrfs_free_space, offset_index); entry = rb_entry(node, struct btrfs_free_space, offset_index);
node = rb_next(&entry->offset_index); node = rb_next(&entry->offset_index);
if (entry->bitmap) if (entry->bitmap || entry->bytes < min_bytes)
continue; continue;
rb_erase(&entry->offset_index, &ctl->free_space_offset); rb_erase(&entry->offset_index, &ctl->free_space_offset);
...@@ -2460,7 +2443,7 @@ static noinline int ...@@ -2460,7 +2443,7 @@ static noinline int
setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster, struct btrfs_free_cluster *cluster,
struct list_head *bitmaps, u64 offset, u64 bytes, struct list_head *bitmaps, u64 offset, u64 bytes,
u64 min_bytes) u64 cont1_bytes, u64 min_bytes)
{ {
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry; struct btrfs_free_space *entry;
...@@ -2485,7 +2468,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, ...@@ -2485,7 +2468,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
if (entry->bytes < min_bytes) if (entry->bytes < min_bytes)
continue; continue;
ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset,
bytes, min_bytes); bytes, cont1_bytes, min_bytes);
if (!ret) if (!ret)
return 0; return 0;
} }
...@@ -2499,7 +2482,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, ...@@ -2499,7 +2482,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
/* /*
* here we try to find a cluster of blocks in a block group. The goal * here we try to find a cluster of blocks in a block group. The goal
* is to find at least bytes free and up to empty_size + bytes free. * is to find at least bytes+empty_size.
* We might not find them all in one contiguous area. * We might not find them all in one contiguous area.
* *
* returns zero and sets up cluster if things worked out, otherwise * returns zero and sets up cluster if things worked out, otherwise
...@@ -2515,23 +2498,24 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, ...@@ -2515,23 +2498,24 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
struct btrfs_free_space *entry, *tmp; struct btrfs_free_space *entry, *tmp;
LIST_HEAD(bitmaps); LIST_HEAD(bitmaps);
u64 min_bytes; u64 min_bytes;
u64 cont1_bytes;
int ret; int ret;
/* for metadata, allow allocates with more holes */ /*
* Choose the minimum extent size we'll require for this
* cluster. For SSD_SPREAD, don't allow any fragmentation.
* For metadata, allow allocates with smaller extents. For
* data, keep it dense.
*/
if (btrfs_test_opt(root, SSD_SPREAD)) { if (btrfs_test_opt(root, SSD_SPREAD)) {
min_bytes = bytes + empty_size; cont1_bytes = min_bytes = bytes + empty_size;
} else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
/* cont1_bytes = bytes;
* we want to do larger allocations when we are min_bytes = block_group->sectorsize;
* flushing out the delayed refs, it helps prevent } else {
* making more work as we go along. cont1_bytes = max(bytes, (bytes + empty_size) >> 2);
*/ min_bytes = block_group->sectorsize;
if (trans->transaction->delayed_refs.flushing) }
min_bytes = max(bytes, (bytes + empty_size) >> 1);
else
min_bytes = max(bytes, (bytes + empty_size) >> 4);
} else
min_bytes = max(bytes, (bytes + empty_size) >> 2);
spin_lock(&ctl->tree_lock); spin_lock(&ctl->tree_lock);
...@@ -2539,7 +2523,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, ...@@ -2539,7 +2523,7 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
* If we know we don't have enough space to make a cluster don't even * If we know we don't have enough space to make a cluster don't even
* bother doing all the work to try and find one. * bother doing all the work to try and find one.
*/ */
if (ctl->free_space < min_bytes) { if (ctl->free_space < bytes) {
spin_unlock(&ctl->tree_lock); spin_unlock(&ctl->tree_lock);
return -ENOSPC; return -ENOSPC;
} }
...@@ -2553,10 +2537,12 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, ...@@ -2553,10 +2537,12 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
} }
ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset, ret = setup_cluster_no_bitmap(block_group, cluster, &bitmaps, offset,
bytes, min_bytes); bytes + empty_size,
cont1_bytes, min_bytes);
if (ret) if (ret)
ret = setup_cluster_bitmap(block_group, cluster, &bitmaps, ret = setup_cluster_bitmap(block_group, cluster, &bitmaps,
offset, bytes, min_bytes); offset, bytes + empty_size,
cont1_bytes, min_bytes);
/* Clear our temporary list */ /* Clear our temporary list */
list_for_each_entry_safe(entry, tmp, &bitmaps, list) list_for_each_entry_safe(entry, tmp, &bitmaps, list)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment