Commit a9e6d153 authored by Chris Mason's avatar Chris Mason

Merge branch 'allocator-fixes' into for-linus-4.4

Signed-off-by: default avatarChris Mason <clm@fb.com>
parents 56fa9d07 0584f718
...@@ -362,6 +362,12 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, ...@@ -362,6 +362,12 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
goto out; goto out;
} }
if (btrfs_test_is_dummy_root(root)) {
srcu_read_unlock(&fs_info->subvol_srcu, index);
ret = -ENOENT;
goto out;
}
if (path->search_commit_root) if (path->search_commit_root)
root_level = btrfs_header_level(root->commit_root); root_level = btrfs_header_level(root->commit_root);
else if (time_seq == (u64)-1) else if (time_seq == (u64)-1)
......
...@@ -1154,6 +1154,10 @@ struct btrfs_space_info { ...@@ -1154,6 +1154,10 @@ struct btrfs_space_info {
delalloc/allocations */ delalloc/allocations */
u64 bytes_readonly; /* total bytes that are read only */ u64 bytes_readonly; /* total bytes that are read only */
u64 max_extent_size; /* This will hold the maximum extent size of
the space info if we had an ENOSPC in the
allocator. */
unsigned int full:1; /* indicates that we cannot allocate any more unsigned int full:1; /* indicates that we cannot allocate any more
chunks for this space */ chunks for this space */
unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
...@@ -1228,6 +1232,9 @@ struct btrfs_free_cluster { ...@@ -1228,6 +1232,9 @@ struct btrfs_free_cluster {
/* first extent starting offset */ /* first extent starting offset */
u64 window_start; u64 window_start;
/* We did a full search and couldn't create a cluster */
bool fragmented;
struct btrfs_block_group_cache *block_group; struct btrfs_block_group_cache *block_group;
/* /*
* when a cluster is allocated from a block group, we put the * when a cluster is allocated from a block group, we put the
...@@ -2148,6 +2155,8 @@ struct btrfs_ioctl_defrag_range_args { ...@@ -2148,6 +2155,8 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
#define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22)
#define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23) #define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23)
#define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24)
#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25)
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30) #define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
#define BTRFS_DEFAULT_MAX_INLINE (8192) #define BTRFS_DEFAULT_MAX_INLINE (8192)
...@@ -2172,6 +2181,18 @@ struct btrfs_ioctl_defrag_range_args { ...@@ -2172,6 +2181,18 @@ struct btrfs_ioctl_defrag_range_args {
btrfs_clear_opt(root->fs_info->mount_opt, opt); \ btrfs_clear_opt(root->fs_info->mount_opt, opt); \
} }
#ifdef CONFIG_BTRFS_DEBUG
static inline int
btrfs_should_fragment_free_space(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group)
{
return (btrfs_test_opt(root, FRAGMENT_METADATA) &&
block_group->flags & BTRFS_BLOCK_GROUP_METADATA) ||
(btrfs_test_opt(root, FRAGMENT_DATA) &&
block_group->flags & BTRFS_BLOCK_GROUP_DATA);
}
#endif
/* /*
* Requests for changes that need to be done during transaction commit. * Requests for changes that need to be done during transaction commit.
* *
......
...@@ -4327,25 +4327,6 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, ...@@ -4327,25 +4327,6 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
return 0; return 0;
} }
static void btrfs_free_pending_ordered(struct btrfs_transaction *cur_trans,
struct btrfs_fs_info *fs_info)
{
struct btrfs_ordered_extent *ordered;
spin_lock(&fs_info->trans_lock);
while (!list_empty(&cur_trans->pending_ordered)) {
ordered = list_first_entry(&cur_trans->pending_ordered,
struct btrfs_ordered_extent,
trans_list);
list_del_init(&ordered->trans_list);
spin_unlock(&fs_info->trans_lock);
btrfs_put_ordered_extent(ordered);
spin_lock(&fs_info->trans_lock);
}
spin_unlock(&fs_info->trans_lock);
}
void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
struct btrfs_root *root) struct btrfs_root *root)
{ {
...@@ -4357,7 +4338,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, ...@@ -4357,7 +4338,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
cur_trans->state = TRANS_STATE_UNBLOCKED; cur_trans->state = TRANS_STATE_UNBLOCKED;
wake_up(&root->fs_info->transaction_wait); wake_up(&root->fs_info->transaction_wait);
btrfs_free_pending_ordered(cur_trans, root->fs_info);
btrfs_destroy_delayed_inodes(root); btrfs_destroy_delayed_inodes(root);
btrfs_assert_delayed_root_empty(root); btrfs_assert_delayed_root_empty(root);
......
This diff is collapsed.
...@@ -1730,7 +1730,7 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, ...@@ -1730,7 +1730,7 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
*/ */
static int search_bitmap(struct btrfs_free_space_ctl *ctl, static int search_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *bitmap_info, u64 *offset, struct btrfs_free_space *bitmap_info, u64 *offset,
u64 *bytes) u64 *bytes, bool for_alloc)
{ {
unsigned long found_bits = 0; unsigned long found_bits = 0;
unsigned long max_bits = 0; unsigned long max_bits = 0;
...@@ -1738,11 +1738,26 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl, ...@@ -1738,11 +1738,26 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
unsigned long next_zero; unsigned long next_zero;
unsigned long extent_bits; unsigned long extent_bits;
/*
* Skip searching the bitmap if we don't have a contiguous section that
* is large enough for this allocation.
*/
if (for_alloc &&
bitmap_info->max_extent_size &&
bitmap_info->max_extent_size < *bytes) {
*bytes = bitmap_info->max_extent_size;
return -1;
}
i = offset_to_bit(bitmap_info->offset, ctl->unit, i = offset_to_bit(bitmap_info->offset, ctl->unit,
max_t(u64, *offset, bitmap_info->offset)); max_t(u64, *offset, bitmap_info->offset));
bits = bytes_to_bits(*bytes, ctl->unit); bits = bytes_to_bits(*bytes, ctl->unit);
for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
if (for_alloc && bits == 1) {
found_bits = 1;
break;
}
next_zero = find_next_zero_bit(bitmap_info->bitmap, next_zero = find_next_zero_bit(bitmap_info->bitmap,
BITS_PER_BITMAP, i); BITS_PER_BITMAP, i);
extent_bits = next_zero - i; extent_bits = next_zero - i;
...@@ -1762,6 +1777,7 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl, ...@@ -1762,6 +1777,7 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
} }
*bytes = (u64)(max_bits) * ctl->unit; *bytes = (u64)(max_bits) * ctl->unit;
bitmap_info->max_extent_size = *bytes;
return -1; return -1;
} }
...@@ -1813,7 +1829,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, ...@@ -1813,7 +1829,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
if (entry->bitmap) { if (entry->bitmap) {
u64 size = *bytes; u64 size = *bytes;
ret = search_bitmap(ctl, entry, &tmp, &size); ret = search_bitmap(ctl, entry, &tmp, &size, true);
if (!ret) { if (!ret) {
*offset = tmp; *offset = tmp;
*bytes = size; *bytes = size;
...@@ -1874,7 +1890,8 @@ static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, ...@@ -1874,7 +1890,8 @@ static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
search_start = *offset; search_start = *offset;
search_bytes = ctl->unit; search_bytes = ctl->unit;
search_bytes = min(search_bytes, end - search_start + 1); search_bytes = min(search_bytes, end - search_start + 1);
ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes); ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes,
false);
if (ret < 0 || search_start != *offset) if (ret < 0 || search_start != *offset)
return -EINVAL; return -EINVAL;
...@@ -1919,7 +1936,7 @@ static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl, ...@@ -1919,7 +1936,7 @@ static noinline int remove_from_bitmap(struct btrfs_free_space_ctl *ctl,
search_start = *offset; search_start = *offset;
search_bytes = ctl->unit; search_bytes = ctl->unit;
ret = search_bitmap(ctl, bitmap_info, &search_start, ret = search_bitmap(ctl, bitmap_info, &search_start,
&search_bytes); &search_bytes, false);
if (ret < 0 || search_start != *offset) if (ret < 0 || search_start != *offset)
return -EAGAIN; return -EAGAIN;
...@@ -1943,6 +1960,12 @@ static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, ...@@ -1943,6 +1960,12 @@ static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl,
bitmap_set_bits(ctl, info, offset, bytes_to_set); bitmap_set_bits(ctl, info, offset, bytes_to_set);
/*
* We set some bytes, we have no idea what the max extent size is
* anymore.
*/
info->max_extent_size = 0;
return bytes_to_set; return bytes_to_set;
} }
...@@ -1951,12 +1974,19 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl, ...@@ -1951,12 +1974,19 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info) struct btrfs_free_space *info)
{ {
struct btrfs_block_group_cache *block_group = ctl->private; struct btrfs_block_group_cache *block_group = ctl->private;
bool forced = false;
#ifdef CONFIG_BTRFS_DEBUG
if (btrfs_should_fragment_free_space(block_group->fs_info->extent_root,
block_group))
forced = true;
#endif
/* /*
* If we are below the extents threshold then we can add this as an * If we are below the extents threshold then we can add this as an
* extent, and don't have to deal with the bitmap * extent, and don't have to deal with the bitmap
*/ */
if (ctl->free_extents < ctl->extents_thresh) { if (!forced && ctl->free_extents < ctl->extents_thresh) {
/* /*
* If this block group has some small extents we don't want to * If this block group has some small extents we don't want to
* use up all of our free slots in the cache with them, we want * use up all of our free slots in the cache with them, we want
...@@ -2661,7 +2691,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, ...@@ -2661,7 +2691,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
search_start = min_start; search_start = min_start;
search_bytes = bytes; search_bytes = bytes;
err = search_bitmap(ctl, entry, &search_start, &search_bytes); err = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
if (err) { if (err) {
if (search_bytes > *max_extent_size) if (search_bytes > *max_extent_size)
*max_extent_size = search_bytes; *max_extent_size = search_bytes;
...@@ -2775,6 +2805,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, ...@@ -2775,6 +2805,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
unsigned long want_bits; unsigned long want_bits;
unsigned long min_bits; unsigned long min_bits;
unsigned long found_bits; unsigned long found_bits;
unsigned long max_bits = 0;
unsigned long start = 0; unsigned long start = 0;
unsigned long total_found = 0; unsigned long total_found = 0;
int ret; int ret;
...@@ -2784,6 +2815,13 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, ...@@ -2784,6 +2815,13 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
want_bits = bytes_to_bits(bytes, ctl->unit); want_bits = bytes_to_bits(bytes, ctl->unit);
min_bits = bytes_to_bits(min_bytes, ctl->unit); min_bits = bytes_to_bits(min_bytes, ctl->unit);
/*
* Don't bother looking for a cluster in this bitmap if it's heavily
* fragmented.
*/
if (entry->max_extent_size &&
entry->max_extent_size < cont1_bytes)
return -ENOSPC;
again: again:
found_bits = 0; found_bits = 0;
for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) { for_each_set_bit_from(i, entry->bitmap, BITS_PER_BITMAP) {
...@@ -2791,13 +2829,19 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, ...@@ -2791,13 +2829,19 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
BITS_PER_BITMAP, i); BITS_PER_BITMAP, i);
if (next_zero - i >= min_bits) { if (next_zero - i >= min_bits) {
found_bits = next_zero - i; found_bits = next_zero - i;
if (found_bits > max_bits)
max_bits = found_bits;
break; break;
} }
if (next_zero - i > max_bits)
max_bits = next_zero - i;
i = next_zero; i = next_zero;
} }
if (!found_bits) if (!found_bits) {
entry->max_extent_size = (u64)max_bits * ctl->unit;
return -ENOSPC; return -ENOSPC;
}
if (!total_found) { if (!total_found) {
start = i; start = i;
...@@ -3056,6 +3100,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) ...@@ -3056,6 +3100,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
spin_lock_init(&cluster->refill_lock); spin_lock_init(&cluster->refill_lock);
cluster->root = RB_ROOT; cluster->root = RB_ROOT;
cluster->max_size = 0; cluster->max_size = 0;
cluster->fragmented = false;
INIT_LIST_HEAD(&cluster->block_group_list); INIT_LIST_HEAD(&cluster->block_group_list);
cluster->block_group = NULL; cluster->block_group = NULL;
} }
...@@ -3223,7 +3268,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group, ...@@ -3223,7 +3268,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
} }
bytes = minlen; bytes = minlen;
ret2 = search_bitmap(ctl, entry, &start, &bytes); ret2 = search_bitmap(ctl, entry, &start, &bytes, false);
if (ret2 || start >= end) { if (ret2 || start >= end) {
spin_unlock(&ctl->tree_lock); spin_unlock(&ctl->tree_lock);
mutex_unlock(&ctl->cache_writeout_mutex); mutex_unlock(&ctl->cache_writeout_mutex);
...@@ -3376,7 +3421,7 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root) ...@@ -3376,7 +3421,7 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
u64 count = 1; u64 count = 1;
int ret; int ret;
ret = search_bitmap(ctl, entry, &offset, &count); ret = search_bitmap(ctl, entry, &offset, &count, true);
/* Logic error; Should be empty if it can't find anything */ /* Logic error; Should be empty if it can't find anything */
ASSERT(!ret); ASSERT(!ret);
...@@ -3532,6 +3577,7 @@ int test_add_free_space_entry(struct btrfs_block_group_cache *cache, ...@@ -3532,6 +3577,7 @@ int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
spin_lock(&ctl->tree_lock); spin_lock(&ctl->tree_lock);
info->offset = offset; info->offset = offset;
info->bytes = bytes; info->bytes = bytes;
info->max_extent_size = 0;
ret = link_free_space(ctl, info); ret = link_free_space(ctl, info);
spin_unlock(&ctl->tree_lock); spin_unlock(&ctl->tree_lock);
if (ret) if (ret)
...@@ -3559,6 +3605,7 @@ int test_add_free_space_entry(struct btrfs_block_group_cache *cache, ...@@ -3559,6 +3605,7 @@ int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
} }
bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
bytes -= bytes_added; bytes -= bytes_added;
offset += bytes_added; offset += bytes_added;
spin_unlock(&ctl->tree_lock); spin_unlock(&ctl->tree_lock);
...@@ -3602,7 +3649,7 @@ int test_check_exists(struct btrfs_block_group_cache *cache, ...@@ -3602,7 +3649,7 @@ int test_check_exists(struct btrfs_block_group_cache *cache,
bit_off = offset; bit_off = offset;
bit_bytes = ctl->unit; bit_bytes = ctl->unit;
ret = search_bitmap(ctl, info, &bit_off, &bit_bytes); ret = search_bitmap(ctl, info, &bit_off, &bit_bytes, false);
if (!ret) { if (!ret) {
if (bit_off == offset) { if (bit_off == offset) {
ret = 1; ret = 1;
......
...@@ -23,6 +23,7 @@ struct btrfs_free_space { ...@@ -23,6 +23,7 @@ struct btrfs_free_space {
struct rb_node offset_index; struct rb_node offset_index;
u64 offset; u64 offset;
u64 bytes; u64 bytes;
u64 max_extent_size;
unsigned long *bitmap; unsigned long *bitmap;
struct list_head list; struct list_head list;
}; };
......
...@@ -9745,6 +9745,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, ...@@ -9745,6 +9745,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
u64 cur_offset = start; u64 cur_offset = start;
u64 i_size; u64 i_size;
u64 cur_bytes; u64 cur_bytes;
u64 last_alloc = (u64)-1;
int ret = 0; int ret = 0;
bool own_trans = true; bool own_trans = true;
...@@ -9761,6 +9762,13 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, ...@@ -9761,6 +9762,13 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
cur_bytes = max(cur_bytes, min_size); cur_bytes = max(cur_bytes, min_size);
/*
* If we are severely fragmented we could end up with really
* small allocations, so if the allocator is returning small
* chunks lets make its job easier by only searching for those
* sized chunks.
*/
cur_bytes = min(cur_bytes, last_alloc);
ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
*alloc_hint, &ins, 1, 0); *alloc_hint, &ins, 1, 0);
if (ret) { if (ret) {
...@@ -9769,6 +9777,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, ...@@ -9769,6 +9777,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
break; break;
} }
last_alloc = ins.offset;
ret = insert_reserved_file_extent(trans, inode, ret = insert_reserved_file_extent(trans, inode,
cur_offset, ins.objectid, cur_offset, ins.objectid,
ins.offset, ins.offset, ins.offset, ins.offset,
......
...@@ -490,15 +490,16 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, ...@@ -490,15 +490,16 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
spin_lock_irq(&log->log_extents_lock[index]); spin_lock_irq(&log->log_extents_lock[index]);
while (!list_empty(&log->logged_list[index])) { while (!list_empty(&log->logged_list[index])) {
struct inode *inode;
ordered = list_first_entry(&log->logged_list[index], ordered = list_first_entry(&log->logged_list[index],
struct btrfs_ordered_extent, struct btrfs_ordered_extent,
log_list); log_list);
list_del_init(&ordered->log_list); list_del_init(&ordered->log_list);
inode = ordered->inode;
spin_unlock_irq(&log->log_extents_lock[index]); spin_unlock_irq(&log->log_extents_lock[index]);
if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) && if (!test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags) &&
!test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) { !test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
struct inode *inode = ordered->inode;
u64 start = ordered->file_offset; u64 start = ordered->file_offset;
u64 end = ordered->file_offset + ordered->len - 1; u64 end = ordered->file_offset + ordered->len - 1;
...@@ -509,20 +510,25 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, ...@@ -509,20 +510,25 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
&ordered->flags)); &ordered->flags));
/* /*
* If our ordered extent completed it means it updated the * In order to keep us from losing our ordered extent
* fs/subvol and csum trees already, so no need to make the * information when committing the transaction we have to make
* current transaction's commit wait for it, as we end up * sure that any logged extents are completed when we go to
* holding memory unnecessarily and delaying the inode's iput * commit the transaction. To do this we simply increase the
* until the transaction commit (we schedule an iput for the * current transactions pending_ordered counter and decrement it
* inode when the ordered extent's refcount drops to 0), which * when the ordered extent completes.
* prevents it from being evictable until the transaction
* commits.
*/ */
if (test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
btrfs_put_ordered_extent(ordered); struct btrfs_ordered_inode_tree *tree;
else
list_add_tail(&ordered->trans_list, &trans->ordered); tree = &BTRFS_I(inode)->ordered_tree;
spin_lock_irq(&tree->lock);
if (!test_bit(BTRFS_ORDERED_COMPLETE, &ordered->flags)) {
set_bit(BTRFS_ORDERED_PENDING, &ordered->flags);
atomic_inc(&trans->transaction->pending_ordered);
}
spin_unlock_irq(&tree->lock);
}
btrfs_put_ordered_extent(ordered);
spin_lock_irq(&log->log_extents_lock[index]); spin_lock_irq(&log->log_extents_lock[index]);
} }
spin_unlock_irq(&log->log_extents_lock[index]); spin_unlock_irq(&log->log_extents_lock[index]);
...@@ -584,6 +590,7 @@ void btrfs_remove_ordered_extent(struct inode *inode, ...@@ -584,6 +590,7 @@ void btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_inode_tree *tree; struct btrfs_ordered_inode_tree *tree;
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
struct rb_node *node; struct rb_node *node;
bool dec_pending_ordered = false;
tree = &BTRFS_I(inode)->ordered_tree; tree = &BTRFS_I(inode)->ordered_tree;
spin_lock_irq(&tree->lock); spin_lock_irq(&tree->lock);
...@@ -593,8 +600,37 @@ void btrfs_remove_ordered_extent(struct inode *inode, ...@@ -593,8 +600,37 @@ void btrfs_remove_ordered_extent(struct inode *inode,
if (tree->last == node) if (tree->last == node)
tree->last = NULL; tree->last = NULL;
set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
if (test_and_clear_bit(BTRFS_ORDERED_PENDING, &entry->flags))
dec_pending_ordered = true;
spin_unlock_irq(&tree->lock); spin_unlock_irq(&tree->lock);
/*
* The current running transaction is waiting on us, we need to let it
* know that we're complete and wake it up.
*/
if (dec_pending_ordered) {
struct btrfs_transaction *trans;
/*
* The checks for trans are just a formality, it should be set,
* but if it isn't we don't want to deref/assert under the spin
* lock, so be nice and check if trans is set, but ASSERT() so
* if it isn't set a developer will notice.
*/
spin_lock(&root->fs_info->trans_lock);
trans = root->fs_info->running_transaction;
if (trans)
atomic_inc(&trans->use_count);
spin_unlock(&root->fs_info->trans_lock);
ASSERT(trans);
if (trans) {
if (atomic_dec_and_test(&trans->pending_ordered))
wake_up(&trans->pending_wait);
btrfs_put_transaction(trans);
}
}
spin_lock(&root->ordered_extent_lock); spin_lock(&root->ordered_extent_lock);
list_del_init(&entry->root_extent_list); list_del_init(&entry->root_extent_list);
root->nr_ordered_extents--; root->nr_ordered_extents--;
......
...@@ -73,6 +73,8 @@ struct btrfs_ordered_sum { ...@@ -73,6 +73,8 @@ struct btrfs_ordered_sum {
#define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent #define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent
* in the logging code. */ * in the logging code. */
#define BTRFS_ORDERED_PENDING 11 /* We are waiting for this ordered extent to
* complete in the current transaction. */
struct btrfs_ordered_extent { struct btrfs_ordered_extent {
/* logical offset in the file */ /* logical offset in the file */
u64 file_offset; u64 file_offset;
......
...@@ -303,6 +303,9 @@ enum { ...@@ -303,6 +303,9 @@ enum {
Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard, Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow, Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
Opt_datasum, Opt_treelog, Opt_noinode_cache, Opt_datasum, Opt_treelog, Opt_noinode_cache,
#ifdef CONFIG_BTRFS_DEBUG
Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
#endif
Opt_err, Opt_err,
}; };
...@@ -355,6 +358,11 @@ static match_table_t tokens = { ...@@ -355,6 +358,11 @@ static match_table_t tokens = {
{Opt_rescan_uuid_tree, "rescan_uuid_tree"}, {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
{Opt_fatal_errors, "fatal_errors=%s"}, {Opt_fatal_errors, "fatal_errors=%s"},
{Opt_commit_interval, "commit=%d"}, {Opt_commit_interval, "commit=%d"},
#ifdef CONFIG_BTRFS_DEBUG
{Opt_fragment_data, "fragment=data"},
{Opt_fragment_metadata, "fragment=metadata"},
{Opt_fragment_all, "fragment=all"},
#endif
{Opt_err, NULL}, {Opt_err, NULL},
}; };
...@@ -721,6 +729,22 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) ...@@ -721,6 +729,22 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
} }
break; break;
#ifdef CONFIG_BTRFS_DEBUG
case Opt_fragment_all:
btrfs_info(root->fs_info, "fragmenting all space");
btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA);
break;
case Opt_fragment_metadata:
btrfs_info(root->fs_info, "fragmenting metadata");
btrfs_set_opt(info->mount_opt,
FRAGMENT_METADATA);
break;
case Opt_fragment_data:
btrfs_info(root->fs_info, "fragmenting data");
btrfs_set_opt(info->mount_opt, FRAGMENT_DATA);
break;
#endif
case Opt_err: case Opt_err:
btrfs_info(root->fs_info, "unrecognized mount option '%s'", p); btrfs_info(root->fs_info, "unrecognized mount option '%s'", p);
ret = -EINVAL; ret = -EINVAL;
...@@ -1172,6 +1196,12 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) ...@@ -1172,6 +1196,12 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_puts(seq, ",fatal_errors=panic"); seq_puts(seq, ",fatal_errors=panic");
if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL) if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
seq_printf(seq, ",commit=%d", info->commit_interval); seq_printf(seq, ",commit=%d", info->commit_interval);
#ifdef CONFIG_BTRFS_DEBUG
if (btrfs_test_opt(root, FRAGMENT_DATA))
seq_puts(seq, ",fragment=data");
if (btrfs_test_opt(root, FRAGMENT_METADATA))
seq_puts(seq, ",fragment=metadata");
#endif
seq_printf(seq, ",subvolid=%llu", seq_printf(seq, ",subvolid=%llu",
BTRFS_I(d_inode(dentry))->root->root_key.objectid); BTRFS_I(d_inode(dentry))->root->root_key.objectid);
seq_puts(seq, ",subvol="); seq_puts(seq, ",subvol=");
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <linux/slab.h> #include <linux/slab.h>
#include "btrfs-tests.h" #include "btrfs-tests.h"
#include "../ctree.h" #include "../ctree.h"
#include "../disk-io.h"
#include "../free-space-cache.h" #include "../free-space-cache.h"
#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
...@@ -35,6 +36,12 @@ static struct btrfs_block_group_cache *init_test_block_group(void) ...@@ -35,6 +36,12 @@ static struct btrfs_block_group_cache *init_test_block_group(void)
kfree(cache); kfree(cache);
return NULL; return NULL;
} }
cache->fs_info = btrfs_alloc_dummy_fs_info();
if (!cache->fs_info) {
kfree(cache->free_space_ctl);
kfree(cache);
return NULL;
}
cache->key.objectid = 0; cache->key.objectid = 0;
cache->key.offset = 1024 * 1024 * 1024; cache->key.offset = 1024 * 1024 * 1024;
...@@ -879,7 +886,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) ...@@ -879,7 +886,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache)
int btrfs_test_free_space_cache(void) int btrfs_test_free_space_cache(void)
{ {
struct btrfs_block_group_cache *cache; struct btrfs_block_group_cache *cache;
int ret; struct btrfs_root *root = NULL;
int ret = -ENOMEM;
test_msg("Running btrfs free space cache tests\n"); test_msg("Running btrfs free space cache tests\n");
...@@ -889,6 +897,17 @@ int btrfs_test_free_space_cache(void) ...@@ -889,6 +897,17 @@ int btrfs_test_free_space_cache(void)
return 0; return 0;
} }
root = btrfs_alloc_dummy_root();
if (!root)
goto out;
root->fs_info = btrfs_alloc_dummy_fs_info();
if (!root->fs_info)
goto out;
root->fs_info->extent_root = root;
cache->fs_info = root->fs_info;
ret = test_extents(cache); ret = test_extents(cache);
if (ret) if (ret)
goto out; goto out;
...@@ -904,6 +923,7 @@ int btrfs_test_free_space_cache(void) ...@@ -904,6 +923,7 @@ int btrfs_test_free_space_cache(void)
__btrfs_remove_free_space_cache(cache->free_space_ctl); __btrfs_remove_free_space_cache(cache->free_space_ctl);
kfree(cache->free_space_ctl); kfree(cache->free_space_ctl);
kfree(cache); kfree(cache);
btrfs_free_dummy_root(root);
test_msg("Free space cache tests finished\n"); test_msg("Free space cache tests finished\n");
return ret; return ret;
} }
...@@ -232,15 +232,16 @@ static noinline int join_transaction(struct btrfs_root *root, unsigned int type) ...@@ -232,15 +232,16 @@ static noinline int join_transaction(struct btrfs_root *root, unsigned int type)
extwriter_counter_init(cur_trans, type); extwriter_counter_init(cur_trans, type);
init_waitqueue_head(&cur_trans->writer_wait); init_waitqueue_head(&cur_trans->writer_wait);
init_waitqueue_head(&cur_trans->commit_wait); init_waitqueue_head(&cur_trans->commit_wait);
init_waitqueue_head(&cur_trans->pending_wait);
cur_trans->state = TRANS_STATE_RUNNING; cur_trans->state = TRANS_STATE_RUNNING;
/* /*
* One for this trans handle, one so it will live on until we * One for this trans handle, one so it will live on until we
* commit the transaction. * commit the transaction.
*/ */
atomic_set(&cur_trans->use_count, 2); atomic_set(&cur_trans->use_count, 2);
cur_trans->have_free_bgs = 0; atomic_set(&cur_trans->pending_ordered, 0);
cur_trans->flags = 0;
cur_trans->start_time = get_seconds(); cur_trans->start_time = get_seconds();
cur_trans->dirty_bg_run = 0;
memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs)); memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
...@@ -266,7 +267,6 @@ static noinline int join_transaction(struct btrfs_root *root, unsigned int type) ...@@ -266,7 +267,6 @@ static noinline int join_transaction(struct btrfs_root *root, unsigned int type)
INIT_LIST_HEAD(&cur_trans->pending_snapshots); INIT_LIST_HEAD(&cur_trans->pending_snapshots);
INIT_LIST_HEAD(&cur_trans->pending_chunks); INIT_LIST_HEAD(&cur_trans->pending_chunks);
INIT_LIST_HEAD(&cur_trans->switch_commits); INIT_LIST_HEAD(&cur_trans->switch_commits);
INIT_LIST_HEAD(&cur_trans->pending_ordered);
INIT_LIST_HEAD(&cur_trans->dirty_bgs); INIT_LIST_HEAD(&cur_trans->dirty_bgs);
INIT_LIST_HEAD(&cur_trans->io_bgs); INIT_LIST_HEAD(&cur_trans->io_bgs);
INIT_LIST_HEAD(&cur_trans->dropped_roots); INIT_LIST_HEAD(&cur_trans->dropped_roots);
...@@ -549,7 +549,6 @@ start_transaction(struct btrfs_root *root, unsigned int num_items, ...@@ -549,7 +549,6 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
h->can_flush_pending_bgs = true; h->can_flush_pending_bgs = true;
INIT_LIST_HEAD(&h->qgroup_ref_list); INIT_LIST_HEAD(&h->qgroup_ref_list);
INIT_LIST_HEAD(&h->new_bgs); INIT_LIST_HEAD(&h->new_bgs);
INIT_LIST_HEAD(&h->ordered);
smp_mb(); smp_mb();
if (cur_trans->state >= TRANS_STATE_BLOCKED && if (cur_trans->state >= TRANS_STATE_BLOCKED &&
...@@ -780,12 +779,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, ...@@ -780,12 +779,6 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
if (!list_empty(&trans->new_bgs)) if (!list_empty(&trans->new_bgs))
btrfs_create_pending_block_groups(trans, root); btrfs_create_pending_block_groups(trans, root);
if (!list_empty(&trans->ordered)) {
spin_lock(&info->trans_lock);
list_splice_init(&trans->ordered, &cur_trans->pending_ordered);
spin_unlock(&info->trans_lock);
}
trans->delayed_ref_updates = 0; trans->delayed_ref_updates = 0;
if (!trans->sync) { if (!trans->sync) {
must_run_delayed_refs = must_run_delayed_refs =
...@@ -1776,25 +1769,10 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) ...@@ -1776,25 +1769,10 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
} }
static inline void static inline void
btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans, btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans)
struct btrfs_fs_info *fs_info)
{ {
struct btrfs_ordered_extent *ordered; wait_event(cur_trans->pending_wait,
atomic_read(&cur_trans->pending_ordered) == 0);
spin_lock(&fs_info->trans_lock);
while (!list_empty(&cur_trans->pending_ordered)) {
ordered = list_first_entry(&cur_trans->pending_ordered,
struct btrfs_ordered_extent,
trans_list);
list_del_init(&ordered->trans_list);
spin_unlock(&fs_info->trans_lock);
wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE,
&ordered->flags));
btrfs_put_ordered_extent(ordered);
spin_lock(&fs_info->trans_lock);
}
spin_unlock(&fs_info->trans_lock);
} }
int btrfs_commit_transaction(struct btrfs_trans_handle *trans, int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
...@@ -1842,7 +1820,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -1842,7 +1820,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
return ret; return ret;
} }
if (!cur_trans->dirty_bg_run) { if (!test_bit(BTRFS_TRANS_DIRTY_BG_RUN, &cur_trans->flags)) {
int run_it = 0; int run_it = 0;
/* this mutex is also taken before trying to set /* this mutex is also taken before trying to set
...@@ -1851,18 +1829,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -1851,18 +1829,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
* after a extents from that block group have been * after a extents from that block group have been
* allocated for cache files. btrfs_set_block_group_ro * allocated for cache files. btrfs_set_block_group_ro
* will wait for the transaction to commit if it * will wait for the transaction to commit if it
* finds dirty_bg_run = 1 * finds BTRFS_TRANS_DIRTY_BG_RUN set.
* *
* The dirty_bg_run flag is also used to make sure only * The BTRFS_TRANS_DIRTY_BG_RUN flag is also used to make sure
* one process starts all the block group IO. It wouldn't * only one process starts all the block group IO. It wouldn't
* hurt to have more than one go through, but there's no * hurt to have more than one go through, but there's no
* real advantage to it either. * real advantage to it either.
*/ */
mutex_lock(&root->fs_info->ro_block_group_mutex); mutex_lock(&root->fs_info->ro_block_group_mutex);
if (!cur_trans->dirty_bg_run) { if (!test_and_set_bit(BTRFS_TRANS_DIRTY_BG_RUN,
&cur_trans->flags))
run_it = 1; run_it = 1;
cur_trans->dirty_bg_run = 1;
}
mutex_unlock(&root->fs_info->ro_block_group_mutex); mutex_unlock(&root->fs_info->ro_block_group_mutex);
if (run_it) if (run_it)
...@@ -1874,7 +1851,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -1874,7 +1851,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
} }
spin_lock(&root->fs_info->trans_lock); spin_lock(&root->fs_info->trans_lock);
list_splice_init(&trans->ordered, &cur_trans->pending_ordered);
if (cur_trans->state >= TRANS_STATE_COMMIT_START) { if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
spin_unlock(&root->fs_info->trans_lock); spin_unlock(&root->fs_info->trans_lock);
atomic_inc(&cur_trans->use_count); atomic_inc(&cur_trans->use_count);
...@@ -1933,7 +1909,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -1933,7 +1909,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_wait_delalloc_flush(root->fs_info); btrfs_wait_delalloc_flush(root->fs_info);
btrfs_wait_pending_ordered(cur_trans, root->fs_info); btrfs_wait_pending_ordered(cur_trans);
btrfs_scrub_pause(root); btrfs_scrub_pause(root);
/* /*
...@@ -2133,7 +2109,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -2133,7 +2109,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_finish_extent_commit(trans, root); btrfs_finish_extent_commit(trans, root);
if (cur_trans->have_free_bgs) if (test_bit(BTRFS_TRANS_HAVE_FREE_BGS, &cur_trans->flags))
btrfs_clear_space_info_full(root->fs_info); btrfs_clear_space_info_full(root->fs_info);
root->fs_info->last_trans_committed = cur_trans->transid; root->fs_info->last_trans_committed = cur_trans->transid;
......
...@@ -32,6 +32,10 @@ enum btrfs_trans_state { ...@@ -32,6 +32,10 @@ enum btrfs_trans_state {
TRANS_STATE_MAX = 6, TRANS_STATE_MAX = 6,
}; };
#define BTRFS_TRANS_HAVE_FREE_BGS 0
#define BTRFS_TRANS_DIRTY_BG_RUN 1
#define BTRFS_TRANS_CACHE_ENOSPC 2
struct btrfs_transaction { struct btrfs_transaction {
u64 transid; u64 transid;
/* /*
...@@ -46,11 +50,9 @@ struct btrfs_transaction { ...@@ -46,11 +50,9 @@ struct btrfs_transaction {
*/ */
atomic_t num_writers; atomic_t num_writers;
atomic_t use_count; atomic_t use_count;
atomic_t pending_ordered;
/* unsigned long flags;
* true if there is free bgs operations in this transaction
*/
int have_free_bgs;
/* Be protected by fs_info->trans_lock when we want to change it. */ /* Be protected by fs_info->trans_lock when we want to change it. */
enum btrfs_trans_state state; enum btrfs_trans_state state;
...@@ -59,9 +61,9 @@ struct btrfs_transaction { ...@@ -59,9 +61,9 @@ struct btrfs_transaction {
unsigned long start_time; unsigned long start_time;
wait_queue_head_t writer_wait; wait_queue_head_t writer_wait;
wait_queue_head_t commit_wait; wait_queue_head_t commit_wait;
wait_queue_head_t pending_wait;
struct list_head pending_snapshots; struct list_head pending_snapshots;
struct list_head pending_chunks; struct list_head pending_chunks;
struct list_head pending_ordered;
struct list_head switch_commits; struct list_head switch_commits;
struct list_head dirty_bgs; struct list_head dirty_bgs;
struct list_head io_bgs; struct list_head io_bgs;
...@@ -80,7 +82,6 @@ struct btrfs_transaction { ...@@ -80,7 +82,6 @@ struct btrfs_transaction {
spinlock_t dropped_roots_lock; spinlock_t dropped_roots_lock;
struct btrfs_delayed_ref_root delayed_refs; struct btrfs_delayed_ref_root delayed_refs;
int aborted; int aborted;
int dirty_bg_run;
}; };
#define __TRANS_FREEZABLE (1U << 0) #define __TRANS_FREEZABLE (1U << 0)
...@@ -128,7 +129,6 @@ struct btrfs_trans_handle { ...@@ -128,7 +129,6 @@ struct btrfs_trans_handle {
*/ */
struct btrfs_root *root; struct btrfs_root *root;
struct seq_list delayed_ref_elem; struct seq_list delayed_ref_elem;
struct list_head ordered;
struct list_head qgroup_ref_list; struct list_head qgroup_ref_list;
struct list_head new_bgs; struct list_head new_bgs;
}; };
......
...@@ -1462,7 +1462,7 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, ...@@ -1462,7 +1462,7 @@ static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
btrfs_std_error(root->fs_info, ret, btrfs_std_error(root->fs_info, ret,
"Failed to remove dev extent item"); "Failed to remove dev extent item");
} else { } else {
trans->transaction->have_free_bgs = 1; set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
} }
out: out:
btrfs_free_path(path); btrfs_free_path(path);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment