Commit 655c5d8f authored by Linus Torvalds's avatar Linus Torvalds

Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable

* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (22 commits)
  Btrfs: Fix async caching interaction with unmount
  Btrfs: change how we unpin extents
  Btrfs: Correct redundant test in add_inode_ref
  Btrfs: find smallest available device extent during chunk allocation
  Btrfs: clear all space_info->full after removing a block group
  Btrfs: make flushoncommit mount option correctly wait on ordered_extents
  Btrfs: Avoid delayed reference update looping
  Btrfs: Fix ordering of key field checks in btrfs_previous_item
  Btrfs: find_free_dev_extent doesn't handle holes at the start of the device
  Btrfs: Remove code duplication in comp_keys
  Btrfs: async block group caching
  Btrfs: use hybrid extents+bitmap rb tree for free space
  Btrfs: Fix crash on read failures at mount
  Btrfs: remove of redundant btrfs_header_level
  Btrfs: adjust NULL test
  Btrfs: Remove broken sanity check from btrfs_rmap_block()
  Btrfs: convert nested spin_lock_irqsave to spin_lock
  Btrfs: make sure all dirty blocks are written at commit time
  Btrfs: fix locking issue in btrfs_find_next_key
  Btrfs: fix double increment of path->slots[0] in btrfs_next_leaf
  ...
parents ce4adcc6 f25784b3
...@@ -424,11 +424,11 @@ int btrfs_requeue_work(struct btrfs_work *work) ...@@ -424,11 +424,11 @@ int btrfs_requeue_work(struct btrfs_work *work)
* list * list
*/ */
if (worker->idle) { if (worker->idle) {
spin_lock_irqsave(&worker->workers->lock, flags); spin_lock(&worker->workers->lock);
worker->idle = 0; worker->idle = 0;
list_move_tail(&worker->worker_list, list_move_tail(&worker->worker_list,
&worker->workers->worker_list); &worker->workers->worker_list);
spin_unlock_irqrestore(&worker->workers->lock, flags); spin_unlock(&worker->workers->lock);
} }
if (!worker->working) { if (!worker->working) {
wake = 1; wake = 1;
......
...@@ -557,19 +557,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) ...@@ -557,19 +557,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
btrfs_disk_key_to_cpu(&k1, disk); btrfs_disk_key_to_cpu(&k1, disk);
if (k1.objectid > k2->objectid) return btrfs_comp_cpu_keys(&k1, k2);
return 1;
if (k1.objectid < k2->objectid)
return -1;
if (k1.type > k2->type)
return 1;
if (k1.type < k2->type)
return -1;
if (k1.offset > k2->offset)
return 1;
if (k1.offset < k2->offset)
return -1;
return 0;
} }
/* /*
...@@ -1052,9 +1040,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, ...@@ -1052,9 +1040,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BTRFS_NODEPTRS_PER_BLOCK(root) / 4) BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
return 0; return 0;
if (btrfs_header_nritems(mid) > 2)
return 0;
if (btrfs_header_nritems(mid) < 2) if (btrfs_header_nritems(mid) < 2)
err_on_enospc = 1; err_on_enospc = 1;
...@@ -1701,6 +1686,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ...@@ -1701,6 +1686,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
struct extent_buffer *b; struct extent_buffer *b;
int slot; int slot;
int ret; int ret;
int err;
int level; int level;
int lowest_unlock = 1; int lowest_unlock = 1;
u8 lowest_level = 0; u8 lowest_level = 0;
...@@ -1737,8 +1723,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ...@@ -1737,8 +1723,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
p->locks[level] = 1; p->locks[level] = 1;
if (cow) { if (cow) {
int wret;
/* /*
* if we don't really need to cow this block * if we don't really need to cow this block
* then we don't want to set the path blocking, * then we don't want to set the path blocking,
...@@ -1749,12 +1733,12 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ...@@ -1749,12 +1733,12 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_set_path_blocking(p); btrfs_set_path_blocking(p);
wret = btrfs_cow_block(trans, root, b, err = btrfs_cow_block(trans, root, b,
p->nodes[level + 1], p->nodes[level + 1],
p->slots[level + 1], &b); p->slots[level + 1], &b);
if (wret) { if (err) {
free_extent_buffer(b); free_extent_buffer(b);
ret = wret; ret = err;
goto done; goto done;
} }
} }
...@@ -1793,41 +1777,45 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ...@@ -1793,41 +1777,45 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
ret = bin_search(b, key, level, &slot); ret = bin_search(b, key, level, &slot);
if (level != 0) { if (level != 0) {
if (ret && slot > 0) int dec = 0;
if (ret && slot > 0) {
dec = 1;
slot -= 1; slot -= 1;
}
p->slots[level] = slot; p->slots[level] = slot;
ret = setup_nodes_for_search(trans, root, p, b, level, err = setup_nodes_for_search(trans, root, p, b, level,
ins_len); ins_len);
if (ret == -EAGAIN) if (err == -EAGAIN)
goto again; goto again;
else if (ret) if (err) {
ret = err;
goto done; goto done;
}
b = p->nodes[level]; b = p->nodes[level];
slot = p->slots[level]; slot = p->slots[level];
unlock_up(p, level, lowest_unlock); unlock_up(p, level, lowest_unlock);
/* this is only true while dropping a snapshot */
if (level == lowest_level) { if (level == lowest_level) {
ret = 0; if (dec)
p->slots[level]++;
goto done; goto done;
} }
ret = read_block_for_search(trans, root, p, err = read_block_for_search(trans, root, p,
&b, level, slot, key); &b, level, slot, key);
if (ret == -EAGAIN) if (err == -EAGAIN)
goto again; goto again;
if (err) {
if (ret == -EIO) ret = err;
goto done; goto done;
}
if (!p->skip_locking) { if (!p->skip_locking) {
int lret;
btrfs_clear_path_blocking(p, NULL); btrfs_clear_path_blocking(p, NULL);
lret = btrfs_try_spin_lock(b); err = btrfs_try_spin_lock(b);
if (!lret) { if (!err) {
btrfs_set_path_blocking(p); btrfs_set_path_blocking(p);
btrfs_tree_lock(b); btrfs_tree_lock(b);
btrfs_clear_path_blocking(p, b); btrfs_clear_path_blocking(p, b);
...@@ -1837,16 +1825,14 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root ...@@ -1837,16 +1825,14 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
p->slots[level] = slot; p->slots[level] = slot;
if (ins_len > 0 && if (ins_len > 0 &&
btrfs_leaf_free_space(root, b) < ins_len) { btrfs_leaf_free_space(root, b) < ins_len) {
int sret;
btrfs_set_path_blocking(p); btrfs_set_path_blocking(p);
sret = split_leaf(trans, root, key, err = split_leaf(trans, root, key,
p, ins_len, ret == 0); p, ins_len, ret == 0);
btrfs_clear_path_blocking(p, NULL); btrfs_clear_path_blocking(p, NULL);
BUG_ON(sret > 0); BUG_ON(err > 0);
if (sret) { if (err) {
ret = sret; ret = err;
goto done; goto done;
} }
} }
...@@ -3807,7 +3793,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, ...@@ -3807,7 +3793,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
} }
/* delete the leaf if it is mostly empty */ /* delete the leaf if it is mostly empty */
if (used < BTRFS_LEAF_DATA_SIZE(root) / 2) { if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
/* push_leaf_left fixes the path. /* push_leaf_left fixes the path.
* make sure the path still points to our leaf * make sure the path still points to our leaf
* for possible call to del_ptr below * for possible call to del_ptr below
...@@ -4042,10 +4028,9 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, ...@@ -4042,10 +4028,9 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
* calling this function. * calling this function.
*/ */
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *key, int lowest_level, struct btrfs_key *key, int level,
int cache_only, u64 min_trans) int cache_only, u64 min_trans)
{ {
int level = lowest_level;
int slot; int slot;
struct extent_buffer *c; struct extent_buffer *c;
...@@ -4058,11 +4043,40 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, ...@@ -4058,11 +4043,40 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
c = path->nodes[level]; c = path->nodes[level];
next: next:
if (slot >= btrfs_header_nritems(c)) { if (slot >= btrfs_header_nritems(c)) {
level++; int ret;
if (level == BTRFS_MAX_LEVEL) int orig_lowest;
struct btrfs_key cur_key;
if (level + 1 >= BTRFS_MAX_LEVEL ||
!path->nodes[level + 1])
return 1; return 1;
if (path->locks[level + 1]) {
level++;
continue; continue;
} }
slot = btrfs_header_nritems(c) - 1;
if (level == 0)
btrfs_item_key_to_cpu(c, &cur_key, slot);
else
btrfs_node_key_to_cpu(c, &cur_key, slot);
orig_lowest = path->lowest_level;
btrfs_release_path(root, path);
path->lowest_level = level;
ret = btrfs_search_slot(NULL, root, &cur_key, path,
0, 0);
path->lowest_level = orig_lowest;
if (ret < 0)
return ret;
c = path->nodes[level];
slot = path->slots[level];
if (ret == 0)
slot++;
goto next;
}
if (level == 0) if (level == 0)
btrfs_item_key_to_cpu(c, key, slot); btrfs_item_key_to_cpu(c, key, slot);
else { else {
...@@ -4146,6 +4160,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) ...@@ -4146,6 +4160,7 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
* advance the path if there are now more items available. * advance the path if there are now more items available.
*/ */
if (nritems > 0 && path->slots[0] < nritems - 1) { if (nritems > 0 && path->slots[0] < nritems - 1) {
if (ret == 0)
path->slots[0]++; path->slots[0]++;
ret = 0; ret = 0;
goto done; goto done;
...@@ -4278,10 +4293,10 @@ int btrfs_previous_item(struct btrfs_root *root, ...@@ -4278,10 +4293,10 @@ int btrfs_previous_item(struct btrfs_root *root,
path->slots[0]--; path->slots[0]--;
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
if (found_key.type == type)
return 0;
if (found_key.objectid < min_objectid) if (found_key.objectid < min_objectid)
break; break;
if (found_key.type == type)
return 0;
if (found_key.objectid == min_objectid && if (found_key.objectid == min_objectid &&
found_key.type < type) found_key.type < type)
break; break;
......
...@@ -481,7 +481,7 @@ struct btrfs_shared_data_ref { ...@@ -481,7 +481,7 @@ struct btrfs_shared_data_ref {
struct btrfs_extent_inline_ref { struct btrfs_extent_inline_ref {
u8 type; u8 type;
u64 offset; __le64 offset;
} __attribute__ ((__packed__)); } __attribute__ ((__packed__));
/* old style backrefs item */ /* old style backrefs item */
...@@ -689,6 +689,7 @@ struct btrfs_space_info { ...@@ -689,6 +689,7 @@ struct btrfs_space_info {
struct list_head block_groups; struct list_head block_groups;
spinlock_t lock; spinlock_t lock;
struct rw_semaphore groups_sem; struct rw_semaphore groups_sem;
atomic_t caching_threads;
}; };
/* /*
...@@ -707,6 +708,9 @@ struct btrfs_free_cluster { ...@@ -707,6 +708,9 @@ struct btrfs_free_cluster {
/* first extent starting offset */ /* first extent starting offset */
u64 window_start; u64 window_start;
/* if this cluster simply points at a bitmap in the block group */
bool points_to_bitmap;
struct btrfs_block_group_cache *block_group; struct btrfs_block_group_cache *block_group;
/* /*
* when a cluster is allocated from a block group, we put the * when a cluster is allocated from a block group, we put the
...@@ -716,24 +720,37 @@ struct btrfs_free_cluster { ...@@ -716,24 +720,37 @@ struct btrfs_free_cluster {
struct list_head block_group_list; struct list_head block_group_list;
}; };
enum btrfs_caching_type {
BTRFS_CACHE_NO = 0,
BTRFS_CACHE_STARTED = 1,
BTRFS_CACHE_FINISHED = 2,
};
struct btrfs_block_group_cache { struct btrfs_block_group_cache {
struct btrfs_key key; struct btrfs_key key;
struct btrfs_block_group_item item; struct btrfs_block_group_item item;
struct btrfs_fs_info *fs_info;
spinlock_t lock; spinlock_t lock;
struct mutex cache_mutex;
u64 pinned; u64 pinned;
u64 reserved; u64 reserved;
u64 flags; u64 flags;
int cached; u64 sectorsize;
int extents_thresh;
int free_extents;
int total_bitmaps;
int ro; int ro;
int dirty; int dirty;
/* cache tracking stuff */
wait_queue_head_t caching_q;
int cached;
struct btrfs_space_info *space_info; struct btrfs_space_info *space_info;
/* free space cache stuff */ /* free space cache stuff */
spinlock_t tree_lock; spinlock_t tree_lock;
struct rb_root free_space_bytes;
struct rb_root free_space_offset; struct rb_root free_space_offset;
u64 free_space;
/* block group cache stuff */ /* block group cache stuff */
struct rb_node cache_node; struct rb_node cache_node;
...@@ -942,6 +959,9 @@ struct btrfs_root { ...@@ -942,6 +959,9 @@ struct btrfs_root {
/* the node lock is held while changing the node pointer */ /* the node lock is held while changing the node pointer */
spinlock_t node_lock; spinlock_t node_lock;
/* taken when updating the commit root */
struct rw_semaphore commit_root_sem;
struct extent_buffer *commit_root; struct extent_buffer *commit_root;
struct btrfs_root *log_root; struct btrfs_root *log_root;
struct btrfs_root *reloc_root; struct btrfs_root *reloc_root;
...@@ -1988,6 +2008,7 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, ...@@ -1988,6 +2008,7 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode,
u64 bytes); u64 bytes);
void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode,
u64 bytes); u64 bytes);
void btrfs_free_pinned_extents(struct btrfs_fs_info *info);
/* ctree.c */ /* ctree.c */
int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
int level, int *slot); int level, int *slot);
......
...@@ -909,6 +909,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, ...@@ -909,6 +909,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
spin_lock_init(&root->inode_lock); spin_lock_init(&root->inode_lock);
mutex_init(&root->objectid_mutex); mutex_init(&root->objectid_mutex);
mutex_init(&root->log_mutex); mutex_init(&root->log_mutex);
init_rwsem(&root->commit_root_sem);
init_waitqueue_head(&root->log_writer_wait); init_waitqueue_head(&root->log_writer_wait);
init_waitqueue_head(&root->log_commit_wait[0]); init_waitqueue_head(&root->log_commit_wait[0]);
init_waitqueue_head(&root->log_commit_wait[1]); init_waitqueue_head(&root->log_commit_wait[1]);
...@@ -1799,6 +1800,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, ...@@ -1799,6 +1800,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_super_chunk_root(disk_super), btrfs_super_chunk_root(disk_super),
blocksize, generation); blocksize, generation);
BUG_ON(!chunk_root->node); BUG_ON(!chunk_root->node);
if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
sb->s_id);
goto fail_chunk_root;
}
btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
chunk_root->commit_root = btrfs_root_node(chunk_root); chunk_root->commit_root = btrfs_root_node(chunk_root);
...@@ -1826,6 +1832,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, ...@@ -1826,6 +1832,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
blocksize, generation); blocksize, generation);
if (!tree_root->node) if (!tree_root->node)
goto fail_chunk_root; goto fail_chunk_root;
if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
printk(KERN_WARNING "btrfs: failed to read tree root on %s\n",
sb->s_id);
goto fail_tree_root;
}
btrfs_set_root_node(&tree_root->root_item, tree_root->node); btrfs_set_root_node(&tree_root->root_item, tree_root->node);
tree_root->commit_root = btrfs_root_node(tree_root); tree_root->commit_root = btrfs_root_node(tree_root);
...@@ -2322,6 +2333,9 @@ int close_ctree(struct btrfs_root *root) ...@@ -2322,6 +2333,9 @@ int close_ctree(struct btrfs_root *root)
printk(KERN_ERR "btrfs: commit super ret %d\n", ret); printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
} }
fs_info->closing = 2;
smp_mb();
if (fs_info->delalloc_bytes) { if (fs_info->delalloc_bytes) {
printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
(unsigned long long)fs_info->delalloc_bytes); (unsigned long long)fs_info->delalloc_bytes);
...@@ -2343,6 +2357,7 @@ int close_ctree(struct btrfs_root *root) ...@@ -2343,6 +2357,7 @@ int close_ctree(struct btrfs_root *root)
free_extent_buffer(root->fs_info->csum_root->commit_root); free_extent_buffer(root->fs_info->csum_root->commit_root);
btrfs_free_block_groups(root->fs_info); btrfs_free_block_groups(root->fs_info);
btrfs_free_pinned_extents(root->fs_info);
del_fs_roots(fs_info); del_fs_roots(fs_info);
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/sort.h> #include <linux/sort.h>
#include <linux/rcupdate.h> #include <linux/rcupdate.h>
#include <linux/kthread.h>
#include "compat.h" #include "compat.h"
#include "hash.h" #include "hash.h"
#include "ctree.h" #include "ctree.h"
...@@ -61,6 +62,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, ...@@ -61,6 +62,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes, struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags, int force); u64 flags, int force);
static noinline int
block_group_cache_done(struct btrfs_block_group_cache *cache)
{
smp_mb();
return cache->cached == BTRFS_CACHE_FINISHED;
}
static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
{ {
return (cache->flags & bits) == bits; return (cache->flags & bits) == bits;
...@@ -145,21 +153,71 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, ...@@ -145,21 +153,71 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
return ret; return ret;
} }
/*
* We always set EXTENT_LOCKED for the super mirror extents so we don't
* overwrite them, so those bits need to be unset. Also, if we are unmounting
* with pinned extents still sitting there because we had a block group caching,
* we need to clear those now, since we are done.
*/
void btrfs_free_pinned_extents(struct btrfs_fs_info *info)
{
u64 start, end, last = 0;
int ret;
while (1) {
ret = find_first_extent_bit(&info->pinned_extents, last,
&start, &end,
EXTENT_LOCKED|EXTENT_DIRTY);
if (ret)
break;
clear_extent_bits(&info->pinned_extents, start, end,
EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS);
last = end+1;
}
}
static int remove_sb_from_cache(struct btrfs_root *root,
struct btrfs_block_group_cache *cache)
{
struct btrfs_fs_info *fs_info = root->fs_info;
u64 bytenr;
u64 *logical;
int stripe_len;
int i, nr, ret;
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
bytenr = btrfs_sb_offset(i);
ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
cache->key.objectid, bytenr,
0, &logical, &nr, &stripe_len);
BUG_ON(ret);
while (nr--) {
try_lock_extent(&fs_info->pinned_extents,
logical[nr],
logical[nr] + stripe_len - 1, GFP_NOFS);
}
kfree(logical);
}
return 0;
}
/* /*
* this is only called by cache_block_group, since we could have freed extents * this is only called by cache_block_group, since we could have freed extents
* we need to check the pinned_extents for any extents that can't be used yet * we need to check the pinned_extents for any extents that can't be used yet
* since their free space will be released as soon as the transaction commits. * since their free space will be released as soon as the transaction commits.
*/ */
static int add_new_free_space(struct btrfs_block_group_cache *block_group, static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
struct btrfs_fs_info *info, u64 start, u64 end) struct btrfs_fs_info *info, u64 start, u64 end)
{ {
u64 extent_start, extent_end, size; u64 extent_start, extent_end, size, total_added = 0;
int ret; int ret;
while (start < end) { while (start < end) {
ret = find_first_extent_bit(&info->pinned_extents, start, ret = find_first_extent_bit(&info->pinned_extents, start,
&extent_start, &extent_end, &extent_start, &extent_end,
EXTENT_DIRTY); EXTENT_DIRTY|EXTENT_LOCKED);
if (ret) if (ret)
break; break;
...@@ -167,6 +225,7 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, ...@@ -167,6 +225,7 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
start = extent_end + 1; start = extent_end + 1;
} else if (extent_start > start && extent_start < end) { } else if (extent_start > start && extent_start < end) {
size = extent_start - start; size = extent_start - start;
total_added += size;
ret = btrfs_add_free_space(block_group, start, ret = btrfs_add_free_space(block_group, start,
size); size);
BUG_ON(ret); BUG_ON(ret);
...@@ -178,84 +237,79 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, ...@@ -178,84 +237,79 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
if (start < end) { if (start < end) {
size = end - start; size = end - start;
total_added += size;
ret = btrfs_add_free_space(block_group, start, size); ret = btrfs_add_free_space(block_group, start, size);
BUG_ON(ret); BUG_ON(ret);
} }
return 0; return total_added;
}
static int remove_sb_from_cache(struct btrfs_root *root,
struct btrfs_block_group_cache *cache)
{
u64 bytenr;
u64 *logical;
int stripe_len;
int i, nr, ret;
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
bytenr = btrfs_sb_offset(i);
ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
cache->key.objectid, bytenr, 0,
&logical, &nr, &stripe_len);
BUG_ON(ret);
while (nr--) {
btrfs_remove_free_space(cache, logical[nr],
stripe_len);
}
kfree(logical);
}
return 0;
} }
static int cache_block_group(struct btrfs_root *root, static int caching_kthread(void *data)
struct btrfs_block_group_cache *block_group)
{ {
struct btrfs_block_group_cache *block_group = data;
struct btrfs_fs_info *fs_info = block_group->fs_info;
u64 last = 0;
struct btrfs_path *path; struct btrfs_path *path;
int ret = 0; int ret = 0;
struct btrfs_key key; struct btrfs_key key;
struct extent_buffer *leaf; struct extent_buffer *leaf;
int slot; int slot;
u64 last; u64 total_found = 0;
if (!block_group)
return 0;
root = root->fs_info->extent_root;
if (block_group->cached) BUG_ON(!fs_info);
return 0;
path = btrfs_alloc_path(); path = btrfs_alloc_path();
if (!path) if (!path)
return -ENOMEM; return -ENOMEM;
path->reada = 2; atomic_inc(&block_group->space_info->caching_threads);
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
again:
/* need to make sure the commit_root doesn't disappear */
down_read(&fs_info->extent_root->commit_root_sem);
/* /*
* we get into deadlocks with paths held by callers of this function. * We don't want to deadlock with somebody trying to allocate a new
* since the alloc_mutex is protecting things right now, just * extent for the extent root while also trying to search the extent
* skip the locking here * root to add free space. So we skip locking and search the commit
* root, since its read-only
*/ */
path->skip_locking = 1; path->skip_locking = 1;
last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); path->search_commit_root = 1;
path->reada = 2;
key.objectid = last; key.objectid = last;
key.offset = 0; key.offset = 0;
btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
if (ret < 0) if (ret < 0)
goto err; goto err;
while (1) { while (1) {
smp_mb();
if (block_group->fs_info->closing > 1) {
last = (u64)-1;
break;
}
leaf = path->nodes[0]; leaf = path->nodes[0];
slot = path->slots[0]; slot = path->slots[0];
if (slot >= btrfs_header_nritems(leaf)) { if (slot >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path); ret = btrfs_next_leaf(fs_info->extent_root, path);
if (ret < 0) if (ret < 0)
goto err; goto err;
if (ret == 0) else if (ret)
continue;
else
break; break;
if (need_resched()) {
btrfs_release_path(fs_info->extent_root, path);
up_read(&fs_info->extent_root->commit_root_sem);
cond_resched();
goto again;
}
continue;
} }
btrfs_item_key_to_cpu(leaf, &key, slot); btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.objectid < block_group->key.objectid) if (key.objectid < block_group->key.objectid)
...@@ -266,24 +320,59 @@ static int cache_block_group(struct btrfs_root *root, ...@@ -266,24 +320,59 @@ static int cache_block_group(struct btrfs_root *root,
break; break;
if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
add_new_free_space(block_group, root->fs_info, last, total_found += add_new_free_space(block_group,
fs_info, last,
key.objectid); key.objectid);
last = key.objectid + key.offset; last = key.objectid + key.offset;
} }
if (total_found > (1024 * 1024 * 2)) {
total_found = 0;
wake_up(&block_group->caching_q);
}
next: next:
path->slots[0]++; path->slots[0]++;
} }
ret = 0;
add_new_free_space(block_group, root->fs_info, last, total_found += add_new_free_space(block_group, fs_info, last,
block_group->key.objectid + block_group->key.objectid +
block_group->key.offset); block_group->key.offset);
block_group->cached = 1; spin_lock(&block_group->lock);
remove_sb_from_cache(root, block_group); block_group->cached = BTRFS_CACHE_FINISHED;
ret = 0; spin_unlock(&block_group->lock);
err: err:
btrfs_free_path(path); btrfs_free_path(path);
up_read(&fs_info->extent_root->commit_root_sem);
atomic_dec(&block_group->space_info->caching_threads);
wake_up(&block_group->caching_q);
return 0;
}
static int cache_block_group(struct btrfs_block_group_cache *cache)
{
struct task_struct *tsk;
int ret = 0;
spin_lock(&cache->lock);
if (cache->cached != BTRFS_CACHE_NO) {
spin_unlock(&cache->lock);
return ret;
}
cache->cached = BTRFS_CACHE_STARTED;
spin_unlock(&cache->lock);
tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
cache->key.objectid);
if (IS_ERR(tsk)) {
ret = PTR_ERR(tsk);
printk(KERN_ERR "error running thread %d\n", ret);
BUG();
}
return ret; return ret;
} }
...@@ -2387,13 +2476,29 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, ...@@ -2387,13 +2476,29 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
} }
static struct btrfs_block_group_cache *
next_block_group(struct btrfs_root *root,
struct btrfs_block_group_cache *cache)
{
struct rb_node *node;
spin_lock(&root->fs_info->block_group_cache_lock);
node = rb_next(&cache->cache_node);
btrfs_put_block_group(cache);
if (node) {
cache = rb_entry(node, struct btrfs_block_group_cache,
cache_node);
atomic_inc(&cache->count);
} else
cache = NULL;
spin_unlock(&root->fs_info->block_group_cache_lock);
return cache;
}
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root) struct btrfs_root *root)
{ {
struct btrfs_block_group_cache *cache, *entry; struct btrfs_block_group_cache *cache;
struct rb_node *n;
int err = 0; int err = 0;
int werr = 0;
struct btrfs_path *path; struct btrfs_path *path;
u64 last = 0; u64 last = 0;
...@@ -2402,39 +2507,35 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, ...@@ -2402,39 +2507,35 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
return -ENOMEM; return -ENOMEM;
while (1) { while (1) {
cache = NULL; if (last == 0) {
spin_lock(&root->fs_info->block_group_cache_lock); err = btrfs_run_delayed_refs(trans, root,
for (n = rb_first(&root->fs_info->block_group_cache_tree); (unsigned long)-1);
n; n = rb_next(n)) { BUG_ON(err);
entry = rb_entry(n, struct btrfs_block_group_cache,
cache_node);
if (entry->dirty) {
cache = entry;
break;
}
} }
spin_unlock(&root->fs_info->block_group_cache_lock);
if (!cache) cache = btrfs_lookup_first_block_group(root->fs_info, last);
while (cache) {
if (cache->dirty)
break; break;
cache = next_block_group(root, cache);
}
if (!cache) {
if (last == 0)
break;
last = 0;
continue;
}
cache->dirty = 0; cache->dirty = 0;
last += cache->key.offset; last = cache->key.objectid + cache->key.offset;
err = write_one_cache_group(trans, root, err = write_one_cache_group(trans, root, path, cache);
path, cache); BUG_ON(err);
/* btrfs_put_block_group(cache);
* if we fail to write the cache group, we want
* to keep it marked dirty in hopes that a later
* write will work
*/
if (err) {
werr = err;
continue;
}
} }
btrfs_free_path(path); btrfs_free_path(path);
return werr; return 0;
} }
int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr)
...@@ -2484,6 +2585,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, ...@@ -2484,6 +2585,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
found->force_alloc = 0; found->force_alloc = 0;
*space_info = found; *space_info = found;
list_add_rcu(&found->list, &info->space_info); list_add_rcu(&found->list, &info->space_info);
atomic_set(&found->caching_threads, 0);
return 0; return 0;
} }
...@@ -2947,13 +3049,9 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, ...@@ -2947,13 +3049,9 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
struct btrfs_block_group_cache *cache; struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
if (pin) { if (pin)
set_extent_dirty(&fs_info->pinned_extents, set_extent_dirty(&fs_info->pinned_extents,
bytenr, bytenr + num - 1, GFP_NOFS); bytenr, bytenr + num - 1, GFP_NOFS);
} else {
clear_extent_dirty(&fs_info->pinned_extents,
bytenr, bytenr + num - 1, GFP_NOFS);
}
while (num > 0) { while (num > 0) {
cache = btrfs_lookup_block_group(fs_info, bytenr); cache = btrfs_lookup_block_group(fs_info, bytenr);
...@@ -2969,14 +3067,34 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, ...@@ -2969,14 +3067,34 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
spin_unlock(&cache->space_info->lock); spin_unlock(&cache->space_info->lock);
fs_info->total_pinned += len; fs_info->total_pinned += len;
} else { } else {
int unpin = 0;
/*
* in order to not race with the block group caching, we
* only want to unpin the extent if we are cached. If
* we aren't cached, we want to start async caching this
* block group so we can free the extent the next time
* around.
*/
spin_lock(&cache->space_info->lock); spin_lock(&cache->space_info->lock);
spin_lock(&cache->lock); spin_lock(&cache->lock);
unpin = (cache->cached == BTRFS_CACHE_FINISHED);
if (likely(unpin)) {
cache->pinned -= len; cache->pinned -= len;
cache->space_info->bytes_pinned -= len; cache->space_info->bytes_pinned -= len;
fs_info->total_pinned -= len;
}
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock); spin_unlock(&cache->space_info->lock);
fs_info->total_pinned -= len;
if (cache->cached) if (likely(unpin))
clear_extent_dirty(&fs_info->pinned_extents,
bytenr, bytenr + len -1,
GFP_NOFS);
else
cache_block_group(cache);
if (unpin)
btrfs_add_free_space(cache, bytenr, len); btrfs_add_free_space(cache, bytenr, len);
} }
btrfs_put_block_group(cache); btrfs_put_block_group(cache);
...@@ -3030,6 +3148,7 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) ...@@ -3030,6 +3148,7 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy)
&start, &end, EXTENT_DIRTY); &start, &end, EXTENT_DIRTY);
if (ret) if (ret)
break; break;
set_extent_dirty(copy, start, end, GFP_NOFS); set_extent_dirty(copy, start, end, GFP_NOFS);
last = end + 1; last = end + 1;
} }
...@@ -3058,6 +3177,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, ...@@ -3058,6 +3177,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
cond_resched(); cond_resched();
} }
return ret; return ret;
} }
...@@ -3435,6 +3555,45 @@ static u64 stripe_align(struct btrfs_root *root, u64 val) ...@@ -3435,6 +3555,45 @@ static u64 stripe_align(struct btrfs_root *root, u64 val)
return ret; return ret;
} }
/*
* when we wait for progress in the block group caching, its because
* our allocation attempt failed at least once. So, we must sleep
* and let some progress happen before we try again.
*
* This function will sleep at least once waiting for new free space to
* show up, and then it will check the block group free space numbers
* for our min num_bytes. Another option is to have it go ahead
* and look in the rbtree for a free extent of a given size, but this
* is a good start.
*/
static noinline int
wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
u64 num_bytes)
{
DEFINE_WAIT(wait);
prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE);
if (block_group_cache_done(cache)) {
finish_wait(&cache->caching_q, &wait);
return 0;
}
schedule();
finish_wait(&cache->caching_q, &wait);
wait_event(cache->caching_q, block_group_cache_done(cache) ||
(cache->free_space >= num_bytes));
return 0;
}
enum btrfs_loop_type {
LOOP_CACHED_ONLY = 0,
LOOP_CACHING_NOWAIT = 1,
LOOP_CACHING_WAIT = 2,
LOOP_ALLOC_CHUNK = 3,
LOOP_NO_EMPTY_SIZE = 4,
};
/* /*
* walks the btree of allocated extents and find a hole of a given size. * walks the btree of allocated extents and find a hole of a given size.
* The key ins is changed to record the hole: * The key ins is changed to record the hole:
...@@ -3460,6 +3619,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ...@@ -3460,6 +3619,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
struct btrfs_space_info *space_info; struct btrfs_space_info *space_info;
int last_ptr_loop = 0; int last_ptr_loop = 0;
int loop = 0; int loop = 0;
bool found_uncached_bg = false;
WARN_ON(num_bytes < root->sectorsize); WARN_ON(num_bytes < root->sectorsize);
btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
...@@ -3491,15 +3651,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ...@@ -3491,15 +3651,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
search_start = max(search_start, first_logical_byte(root, 0)); search_start = max(search_start, first_logical_byte(root, 0));
search_start = max(search_start, hint_byte); search_start = max(search_start, hint_byte);
if (!last_ptr) { if (!last_ptr)
empty_cluster = 0; empty_cluster = 0;
loop = 1;
}
if (search_start == hint_byte) { if (search_start == hint_byte) {
block_group = btrfs_lookup_block_group(root->fs_info, block_group = btrfs_lookup_block_group(root->fs_info,
search_start); search_start);
if (block_group && block_group_bits(block_group, data)) { /*
* we don't want to use the block group if it doesn't match our
* allocation bits, or if its not cached.
*/
if (block_group && block_group_bits(block_group, data) &&
block_group_cache_done(block_group)) {
down_read(&space_info->groups_sem); down_read(&space_info->groups_sem);
if (list_empty(&block_group->list) || if (list_empty(&block_group->list) ||
block_group->ro) { block_group->ro) {
...@@ -3522,21 +3685,35 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ...@@ -3522,21 +3685,35 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
down_read(&space_info->groups_sem); down_read(&space_info->groups_sem);
list_for_each_entry(block_group, &space_info->block_groups, list) { list_for_each_entry(block_group, &space_info->block_groups, list) {
u64 offset; u64 offset;
int cached;
atomic_inc(&block_group->count); atomic_inc(&block_group->count);
search_start = block_group->key.objectid; search_start = block_group->key.objectid;
have_block_group: have_block_group:
if (unlikely(!block_group->cached)) { if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
mutex_lock(&block_group->cache_mutex); /*
ret = cache_block_group(root, block_group); * we want to start caching kthreads, but not too many
mutex_unlock(&block_group->cache_mutex); * right off the bat so we don't overwhelm the system,
if (ret) { * so only start them if there are less than 2 and we're
btrfs_put_block_group(block_group); * in the initial allocation phase.
break; */
if (loop > LOOP_CACHING_NOWAIT ||
atomic_read(&space_info->caching_threads) < 2) {
ret = cache_block_group(block_group);
BUG_ON(ret);
} }
} }
cached = block_group_cache_done(block_group);
if (unlikely(!cached)) {
found_uncached_bg = true;
/* if we only want cached bgs, loop */
if (loop == LOOP_CACHED_ONLY)
goto loop;
}
if (unlikely(block_group->ro)) if (unlikely(block_group->ro))
goto loop; goto loop;
...@@ -3615,14 +3792,21 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ...@@ -3615,14 +3792,21 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
spin_unlock(&last_ptr->refill_lock); spin_unlock(&last_ptr->refill_lock);
goto checks; goto checks;
} }
} else if (!cached && loop > LOOP_CACHING_NOWAIT) {
spin_unlock(&last_ptr->refill_lock);
wait_block_group_cache_progress(block_group,
num_bytes + empty_cluster + empty_size);
goto have_block_group;
} }
/* /*
* at this point we either didn't find a cluster * at this point we either didn't find a cluster
* or we weren't able to allocate a block from our * or we weren't able to allocate a block from our
* cluster. Free the cluster we've been trying * cluster. Free the cluster we've been trying
* to use, and go to the next block group * to use, and go to the next block group
*/ */
if (loop < 2) { if (loop < LOOP_NO_EMPTY_SIZE) {
btrfs_return_cluster_to_free_space(NULL, btrfs_return_cluster_to_free_space(NULL,
last_ptr); last_ptr);
spin_unlock(&last_ptr->refill_lock); spin_unlock(&last_ptr->refill_lock);
...@@ -3633,11 +3817,17 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ...@@ -3633,11 +3817,17 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
offset = btrfs_find_space_for_alloc(block_group, search_start, offset = btrfs_find_space_for_alloc(block_group, search_start,
num_bytes, empty_size); num_bytes, empty_size);
if (!offset) if (!offset && (cached || (!cached &&
loop == LOOP_CACHING_NOWAIT))) {
goto loop; goto loop;
} else if (!offset && (!cached &&
loop > LOOP_CACHING_NOWAIT)) {
wait_block_group_cache_progress(block_group,
num_bytes + empty_size);
goto have_block_group;
}
checks: checks:
search_start = stripe_align(root, offset); search_start = stripe_align(root, offset);
/* move on to the next group */ /* move on to the next group */
if (search_start + num_bytes >= search_end) { if (search_start + num_bytes >= search_end) {
btrfs_add_free_space(block_group, offset, num_bytes); btrfs_add_free_space(block_group, offset, num_bytes);
...@@ -3683,13 +3873,26 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ...@@ -3683,13 +3873,26 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
} }
up_read(&space_info->groups_sem); up_read(&space_info->groups_sem);
/* loop == 0, try to find a clustered alloc in every block group /* LOOP_CACHED_ONLY, only search fully cached block groups
* loop == 1, try again after forcing a chunk allocation * LOOP_CACHING_NOWAIT, search partially cached block groups, but
* loop == 2, set empty_size and empty_cluster to 0 and try again * dont wait foR them to finish caching
* LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
* LOOP_ALLOC_CHUNK, force a chunk allocation and try again
* LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
* again
*/ */
if (!ins->objectid && loop < 3 && if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE &&
(empty_size || empty_cluster || allowed_chunk_alloc)) { (found_uncached_bg || empty_size || empty_cluster ||
if (loop >= 2) { allowed_chunk_alloc)) {
if (found_uncached_bg) {
found_uncached_bg = false;
if (loop < LOOP_CACHING_WAIT) {
loop++;
goto search;
}
}
if (loop == LOOP_ALLOC_CHUNK) {
empty_size = 0; empty_size = 0;
empty_cluster = 0; empty_cluster = 0;
} }
...@@ -3702,7 +3905,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, ...@@ -3702,7 +3905,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
space_info->force_alloc = 1; space_info->force_alloc = 1;
} }
if (loop < 3) { if (loop < LOOP_NO_EMPTY_SIZE) {
loop++; loop++;
goto search; goto search;
} }
...@@ -3798,7 +4001,7 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, ...@@ -3798,7 +4001,7 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
num_bytes, data, 1); num_bytes, data, 1);
goto again; goto again;
} }
if (ret) { if (ret == -ENOSPC) {
struct btrfs_space_info *sinfo; struct btrfs_space_info *sinfo;
sinfo = __find_space_info(root->fs_info, data); sinfo = __find_space_info(root->fs_info, data);
...@@ -3806,7 +4009,6 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, ...@@ -3806,7 +4009,6 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
"wanted %llu\n", (unsigned long long)data, "wanted %llu\n", (unsigned long long)data,
(unsigned long long)num_bytes); (unsigned long long)num_bytes);
dump_space_info(sinfo, num_bytes); dump_space_info(sinfo, num_bytes);
BUG();
} }
return ret; return ret;
...@@ -3844,7 +4046,9 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, ...@@ -3844,7 +4046,9 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size,
empty_size, hint_byte, search_end, ins, empty_size, hint_byte, search_end, ins,
data); data);
if (!ret)
update_reserved_extents(root, ins->objectid, ins->offset, 1); update_reserved_extents(root, ins->objectid, ins->offset, 1);
return ret; return ret;
} }
...@@ -4006,9 +4210,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, ...@@ -4006,9 +4210,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *block_group; struct btrfs_block_group_cache *block_group;
block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
mutex_lock(&block_group->cache_mutex); cache_block_group(block_group);
cache_block_group(root, block_group); wait_event(block_group->caching_q,
mutex_unlock(&block_group->cache_mutex); block_group_cache_done(block_group));
ret = btrfs_remove_free_space(block_group, ins->objectid, ret = btrfs_remove_free_space(block_group, ins->objectid,
ins->offset); ins->offset);
...@@ -4039,7 +4243,8 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, ...@@ -4039,7 +4243,8 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
empty_size, hint_byte, search_end, empty_size, hint_byte, search_end,
ins, 0); ins, 0);
BUG_ON(ret); if (ret)
return ret;
if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
if (parent == 0) if (parent == 0)
...@@ -6955,11 +7160,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) ...@@ -6955,11 +7160,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
&info->block_group_cache_tree); &info->block_group_cache_tree);
spin_unlock(&info->block_group_cache_lock); spin_unlock(&info->block_group_cache_lock);
btrfs_remove_free_space_cache(block_group);
down_write(&block_group->space_info->groups_sem); down_write(&block_group->space_info->groups_sem);
list_del(&block_group->list); list_del(&block_group->list);
up_write(&block_group->space_info->groups_sem); up_write(&block_group->space_info->groups_sem);
if (block_group->cached == BTRFS_CACHE_STARTED)
wait_event(block_group->caching_q,
block_group_cache_done(block_group));
btrfs_remove_free_space_cache(block_group);
WARN_ON(atomic_read(&block_group->count) != 1); WARN_ON(atomic_read(&block_group->count) != 1);
kfree(block_group); kfree(block_group);
...@@ -7025,9 +7235,19 @@ int btrfs_read_block_groups(struct btrfs_root *root) ...@@ -7025,9 +7235,19 @@ int btrfs_read_block_groups(struct btrfs_root *root)
atomic_set(&cache->count, 1); atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock); spin_lock_init(&cache->lock);
spin_lock_init(&cache->tree_lock); spin_lock_init(&cache->tree_lock);
mutex_init(&cache->cache_mutex); cache->fs_info = info;
init_waitqueue_head(&cache->caching_q);
INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list); INIT_LIST_HEAD(&cache->cluster_list);
/*
* we only want to have 32k of ram per block group for keeping
* track of free space, and if we pass 1/2 of that we want to
* start converting things over to using bitmaps
*/
cache->extents_thresh = ((1024 * 32) / 2) /
sizeof(struct btrfs_free_space);
read_extent_buffer(leaf, &cache->item, read_extent_buffer(leaf, &cache->item,
btrfs_item_ptr_offset(leaf, path->slots[0]), btrfs_item_ptr_offset(leaf, path->slots[0]),
sizeof(cache->item)); sizeof(cache->item));
...@@ -7036,6 +7256,26 @@ int btrfs_read_block_groups(struct btrfs_root *root) ...@@ -7036,6 +7256,26 @@ int btrfs_read_block_groups(struct btrfs_root *root)
key.objectid = found_key.objectid + found_key.offset; key.objectid = found_key.objectid + found_key.offset;
btrfs_release_path(root, path); btrfs_release_path(root, path);
cache->flags = btrfs_block_group_flags(&cache->item); cache->flags = btrfs_block_group_flags(&cache->item);
cache->sectorsize = root->sectorsize;
remove_sb_from_cache(root, cache);
/*
* check for two cases, either we are full, and therefore
* don't need to bother with the caching work since we won't
* find any space, or we are empty, and we can just add all
* the space in and be done with it. This saves us _alot_ of
* time, particularly in the full case.
*/
if (found_key.offset == btrfs_block_group_used(&cache->item)) {
cache->cached = BTRFS_CACHE_FINISHED;
} else if (btrfs_block_group_used(&cache->item) == 0) {
cache->cached = BTRFS_CACHE_FINISHED;
add_new_free_space(cache, root->fs_info,
found_key.objectid,
found_key.objectid +
found_key.offset);
}
ret = update_space_info(info, cache->flags, found_key.offset, ret = update_space_info(info, cache->flags, found_key.offset,
btrfs_block_group_used(&cache->item), btrfs_block_group_used(&cache->item),
...@@ -7079,10 +7319,19 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, ...@@ -7079,10 +7319,19 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
cache->key.objectid = chunk_offset; cache->key.objectid = chunk_offset;
cache->key.offset = size; cache->key.offset = size;
cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
cache->sectorsize = root->sectorsize;
/*
* we only want to have 32k of ram per block group for keeping track
* of free space, and if we pass 1/2 of that we want to start
* converting things over to using bitmaps
*/
cache->extents_thresh = ((1024 * 32) / 2) /
sizeof(struct btrfs_free_space);
atomic_set(&cache->count, 1); atomic_set(&cache->count, 1);
spin_lock_init(&cache->lock); spin_lock_init(&cache->lock);
spin_lock_init(&cache->tree_lock); spin_lock_init(&cache->tree_lock);
mutex_init(&cache->cache_mutex); init_waitqueue_head(&cache->caching_q);
INIT_LIST_HEAD(&cache->list); INIT_LIST_HEAD(&cache->list);
INIT_LIST_HEAD(&cache->cluster_list); INIT_LIST_HEAD(&cache->cluster_list);
...@@ -7091,6 +7340,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, ...@@ -7091,6 +7340,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
cache->flags = type; cache->flags = type;
btrfs_set_block_group_flags(&cache->item, type); btrfs_set_block_group_flags(&cache->item, type);
cache->cached = BTRFS_CACHE_FINISHED;
remove_sb_from_cache(root, cache);
add_new_free_space(cache, root->fs_info, chunk_offset,
chunk_offset + size);
ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, ret = update_space_info(root->fs_info, cache->flags, size, bytes_used,
&cache->space_info); &cache->space_info);
BUG_ON(ret); BUG_ON(ret);
...@@ -7149,7 +7404,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ...@@ -7149,7 +7404,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
rb_erase(&block_group->cache_node, rb_erase(&block_group->cache_node,
&root->fs_info->block_group_cache_tree); &root->fs_info->block_group_cache_tree);
spin_unlock(&root->fs_info->block_group_cache_lock); spin_unlock(&root->fs_info->block_group_cache_lock);
btrfs_remove_free_space_cache(block_group);
down_write(&block_group->space_info->groups_sem); down_write(&block_group->space_info->groups_sem);
/* /*
* we must use list_del_init so people can check to see if they * we must use list_del_init so people can check to see if they
...@@ -7158,11 +7413,18 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, ...@@ -7158,11 +7413,18 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
list_del_init(&block_group->list); list_del_init(&block_group->list);
up_write(&block_group->space_info->groups_sem); up_write(&block_group->space_info->groups_sem);
if (block_group->cached == BTRFS_CACHE_STARTED)
wait_event(block_group->caching_q,
block_group_cache_done(block_group));
btrfs_remove_free_space_cache(block_group);
spin_lock(&block_group->space_info->lock); spin_lock(&block_group->space_info->lock);
block_group->space_info->total_bytes -= block_group->key.offset; block_group->space_info->total_bytes -= block_group->key.offset;
block_group->space_info->bytes_readonly -= block_group->key.offset; block_group->space_info->bytes_readonly -= block_group->key.offset;
spin_unlock(&block_group->space_info->lock); spin_unlock(&block_group->space_info->lock);
block_group->space_info->full = 0;
btrfs_clear_space_info_full(root->fs_info);
btrfs_put_block_group(block_group); btrfs_put_block_group(block_group);
btrfs_put_block_group(block_group); btrfs_put_block_group(block_group);
......
...@@ -16,45 +16,46 @@ ...@@ -16,45 +16,46 @@
* Boston, MA 021110-1307, USA. * Boston, MA 021110-1307, USA.
*/ */
#include <linux/pagemap.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/math64.h>
#include "ctree.h" #include "ctree.h"
#include "free-space-cache.h" #include "free-space-cache.h"
#include "transaction.h" #include "transaction.h"
struct btrfs_free_space { #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
struct rb_node bytes_index; #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
struct rb_node offset_index;
u64 offset;
u64 bytes;
};
static int tree_insert_offset(struct rb_root *root, u64 offset, static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
struct rb_node *node) u64 offset)
{ {
struct rb_node **p = &root->rb_node; BUG_ON(offset < bitmap_start);
struct rb_node *parent = NULL; offset -= bitmap_start;
struct btrfs_free_space *info; return (unsigned long)(div64_u64(offset, sectorsize));
}
while (*p) { static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
parent = *p; {
info = rb_entry(parent, struct btrfs_free_space, offset_index); return (unsigned long)(div64_u64(bytes, sectorsize));
}
if (offset < info->offset) static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
p = &(*p)->rb_left; u64 offset)
else if (offset > info->offset) {
p = &(*p)->rb_right; u64 bitmap_start;
else u64 bytes_per_bitmap;
return -EEXIST;
}
rb_link_node(node, parent, p); bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
rb_insert_color(node, root); bitmap_start = offset - block_group->key.objectid;
bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
bitmap_start *= bytes_per_bitmap;
bitmap_start += block_group->key.objectid;
return 0; return bitmap_start;
} }
static int tree_insert_bytes(struct rb_root *root, u64 bytes, static int tree_insert_offset(struct rb_root *root, u64 offset,
struct rb_node *node) struct rb_node *node, int bitmap)
{ {
struct rb_node **p = &root->rb_node; struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL; struct rb_node *parent = NULL;
...@@ -62,12 +63,34 @@ static int tree_insert_bytes(struct rb_root *root, u64 bytes, ...@@ -62,12 +63,34 @@ static int tree_insert_bytes(struct rb_root *root, u64 bytes,
while (*p) { while (*p) {
parent = *p; parent = *p;
info = rb_entry(parent, struct btrfs_free_space, bytes_index); info = rb_entry(parent, struct btrfs_free_space, offset_index);
if (bytes < info->bytes) if (offset < info->offset) {
p = &(*p)->rb_left; p = &(*p)->rb_left;
else } else if (offset > info->offset) {
p = &(*p)->rb_right;
} else {
/*
* we could have a bitmap entry and an extent entry
* share the same offset. If this is the case, we want
* the extent entry to always be found first if we do a
* linear search through the tree, since we want to have
* the quickest allocation time, and allocating from an
* extent is faster than allocating from a bitmap. So
* if we're inserting a bitmap and we find an entry at
* this offset, we want to go right, or after this entry
* logically. If we are inserting an extent and we've
* found a bitmap, we want to go left, or before
* logically.
*/
if (bitmap) {
WARN_ON(info->bitmap);
p = &(*p)->rb_right; p = &(*p)->rb_right;
} else {
WARN_ON(!info->bitmap);
p = &(*p)->rb_left;
}
}
} }
rb_link_node(node, parent, p); rb_link_node(node, parent, p);
...@@ -79,110 +102,143 @@ static int tree_insert_bytes(struct rb_root *root, u64 bytes, ...@@ -79,110 +102,143 @@ static int tree_insert_bytes(struct rb_root *root, u64 bytes,
/* /*
* searches the tree for the given offset. * searches the tree for the given offset.
* *
* fuzzy == 1: this is used for allocations where we are given a hint of where * fuzzy - If this is set, then we are trying to make an allocation, and we just
* to look for free space. Because the hint may not be completely on an offset * want a section that has at least bytes size and comes at or after the given
* mark, or the hint may no longer point to free space we need to fudge our * offset.
* results a bit. So we look for free space starting at or after offset with at
* least bytes size. We prefer to find as close to the given offset as we can.
* Also if the offset is within a free space range, then we will return the free
* space that contains the given offset, which means we can return a free space
* chunk with an offset before the provided offset.
*
* fuzzy == 0: this is just a normal tree search. Give us the free space that
* starts at the given offset which is at least bytes size, and if its not there
* return NULL.
*/ */
static struct btrfs_free_space *tree_search_offset(struct rb_root *root, static struct btrfs_free_space *
u64 offset, u64 bytes, tree_search_offset(struct btrfs_block_group_cache *block_group,
int fuzzy) u64 offset, int bitmap_only, int fuzzy)
{ {
struct rb_node *n = root->rb_node; struct rb_node *n = block_group->free_space_offset.rb_node;
struct btrfs_free_space *entry, *ret = NULL; struct btrfs_free_space *entry, *prev = NULL;
/* find entry that is closest to the 'offset' */
while (1) {
if (!n) {
entry = NULL;
break;
}
while (n) {
entry = rb_entry(n, struct btrfs_free_space, offset_index); entry = rb_entry(n, struct btrfs_free_space, offset_index);
prev = entry;
if (offset < entry->offset) { if (offset < entry->offset)
if (fuzzy &&
(!ret || entry->offset < ret->offset) &&
(bytes <= entry->bytes))
ret = entry;
n = n->rb_left; n = n->rb_left;
} else if (offset > entry->offset) { else if (offset > entry->offset)
if (fuzzy &&
(entry->offset + entry->bytes - 1) >= offset &&
bytes <= entry->bytes) {
ret = entry;
break;
}
n = n->rb_right;
} else {
if (bytes > entry->bytes) {
n = n->rb_right; n = n->rb_right;
continue; else
}
ret = entry;
break; break;
} }
}
return ret; if (bitmap_only) {
} if (!entry)
return NULL;
if (entry->bitmap)
return entry;
/* /*
* return a chunk at least bytes size, as close to offset that we can get. * bitmap entry and extent entry may share same offset,
* in that case, bitmap entry comes after extent entry.
*/ */
static struct btrfs_free_space *tree_search_bytes(struct rb_root *root, n = rb_next(n);
u64 offset, u64 bytes) if (!n)
{ return NULL;
struct rb_node *n = root->rb_node; entry = rb_entry(n, struct btrfs_free_space, offset_index);
struct btrfs_free_space *entry, *ret = NULL; if (entry->offset != offset)
return NULL;
while (n) {
entry = rb_entry(n, struct btrfs_free_space, bytes_index);
if (bytes < entry->bytes) { WARN_ON(!entry->bitmap);
return entry;
} else if (entry) {
if (entry->bitmap) {
/* /*
* We prefer to get a hole size as close to the size we * if previous extent entry covers the offset,
* are asking for so we don't take small slivers out of * we should return it instead of the bitmap entry
* huge holes, but we also want to get as close to the
* offset as possible so we don't have a whole lot of
* fragmentation.
*/ */
if (offset <= entry->offset) { n = &entry->offset_index;
if (!ret) while (1) {
ret = entry; n = rb_prev(n);
else if (entry->bytes < ret->bytes) if (!n)
ret = entry; break;
else if (entry->offset < ret->offset) prev = rb_entry(n, struct btrfs_free_space,
ret = entry; offset_index);
if (!prev->bitmap) {
if (prev->offset + prev->bytes > offset)
entry = prev;
break;
} }
n = n->rb_left; }
} else if (bytes > entry->bytes) { }
n = n->rb_right; return entry;
}
if (!prev)
return NULL;
/* find last entry before the 'offset' */
entry = prev;
if (entry->offset > offset) {
n = rb_prev(&entry->offset_index);
if (n) {
entry = rb_entry(n, struct btrfs_free_space,
offset_index);
BUG_ON(entry->offset > offset);
} else { } else {
/* if (fuzzy)
* Ok we may have multiple chunks of the wanted size, return entry;
* so we don't want to take the first one we find, we else
* want to take the one closest to our given offset, so return NULL;
* keep searching just in case theres a better match.
*/
n = n->rb_right;
if (offset > entry->offset)
continue;
else if (!ret || entry->offset < ret->offset)
ret = entry;
} }
} }
return ret; if (entry->bitmap) {
n = &entry->offset_index;
while (1) {
n = rb_prev(n);
if (!n)
break;
prev = rb_entry(n, struct btrfs_free_space,
offset_index);
if (!prev->bitmap) {
if (prev->offset + prev->bytes > offset)
return prev;
break;
}
}
if (entry->offset + BITS_PER_BITMAP *
block_group->sectorsize > offset)
return entry;
} else if (entry->offset + entry->bytes > offset)
return entry;
if (!fuzzy)
return NULL;
while (1) {
if (entry->bitmap) {
if (entry->offset + BITS_PER_BITMAP *
block_group->sectorsize > offset)
break;
} else {
if (entry->offset + entry->bytes > offset)
break;
}
n = rb_next(&entry->offset_index);
if (!n)
return NULL;
entry = rb_entry(n, struct btrfs_free_space, offset_index);
}
return entry;
} }
static void unlink_free_space(struct btrfs_block_group_cache *block_group, static void unlink_free_space(struct btrfs_block_group_cache *block_group,
struct btrfs_free_space *info) struct btrfs_free_space *info)
{ {
rb_erase(&info->offset_index, &block_group->free_space_offset); rb_erase(&info->offset_index, &block_group->free_space_offset);
rb_erase(&info->bytes_index, &block_group->free_space_bytes); block_group->free_extents--;
block_group->free_space -= info->bytes;
} }
static int link_free_space(struct btrfs_block_group_cache *block_group, static int link_free_space(struct btrfs_block_group_cache *block_group,
...@@ -190,17 +246,314 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, ...@@ -190,17 +246,314 @@ static int link_free_space(struct btrfs_block_group_cache *block_group,
{ {
int ret = 0; int ret = 0;
BUG_ON(!info->bitmap && !info->bytes);
BUG_ON(!info->bytes);
ret = tree_insert_offset(&block_group->free_space_offset, info->offset, ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
&info->offset_index); &info->offset_index, (info->bitmap != NULL));
if (ret) if (ret)
return ret; return ret;
ret = tree_insert_bytes(&block_group->free_space_bytes, info->bytes, block_group->free_space += info->bytes;
&info->bytes_index); block_group->free_extents++;
if (ret)
return ret; return ret;
}
static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
{
u64 max_bytes, possible_bytes;
/*
* The goal is to keep the total amount of memory used per 1gb of space
* at or below 32k, so we need to adjust how much memory we allow to be
* used by extent based free space tracking
*/
max_bytes = MAX_CACHE_BYTES_PER_GIG *
(div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) +
(sizeof(struct btrfs_free_space) *
block_group->extents_thresh);
if (possible_bytes > max_bytes) {
int extent_bytes = max_bytes -
(block_group->total_bitmaps * PAGE_CACHE_SIZE);
if (extent_bytes <= 0) {
block_group->extents_thresh = 0;
return;
}
block_group->extents_thresh = extent_bytes /
(sizeof(struct btrfs_free_space));
}
}
static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
struct btrfs_free_space *info, u64 offset,
u64 bytes)
{
unsigned long start, end;
unsigned long i;
start = offset_to_bit(info->offset, block_group->sectorsize, offset);
end = start + bytes_to_bits(bytes, block_group->sectorsize);
BUG_ON(end > BITS_PER_BITMAP);
for (i = start; i < end; i++)
clear_bit(i, info->bitmap);
info->bytes -= bytes;
block_group->free_space -= bytes;
}
static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
struct btrfs_free_space *info, u64 offset,
u64 bytes)
{
unsigned long start, end;
unsigned long i;
start = offset_to_bit(info->offset, block_group->sectorsize, offset);
end = start + bytes_to_bits(bytes, block_group->sectorsize);
BUG_ON(end > BITS_PER_BITMAP);
for (i = start; i < end; i++)
set_bit(i, info->bitmap);
info->bytes += bytes;
block_group->free_space += bytes;
}
static int search_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_space *bitmap_info, u64 *offset,
u64 *bytes)
{
unsigned long found_bits = 0;
unsigned long bits, i;
unsigned long next_zero;
i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
max_t(u64, *offset, bitmap_info->offset));
bits = bytes_to_bits(*bytes, block_group->sectorsize);
for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
i < BITS_PER_BITMAP;
i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
next_zero = find_next_zero_bit(bitmap_info->bitmap,
BITS_PER_BITMAP, i);
if ((next_zero - i) >= bits) {
found_bits = next_zero - i;
break;
}
i = next_zero;
}
if (found_bits) {
*offset = (u64)(i * block_group->sectorsize) +
bitmap_info->offset;
*bytes = (u64)(found_bits) * block_group->sectorsize;
return 0;
}
return -1;
}
static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
*block_group, u64 *offset,
u64 *bytes, int debug)
{
struct btrfs_free_space *entry;
struct rb_node *node;
int ret;
if (!block_group->free_space_offset.rb_node)
return NULL;
entry = tree_search_offset(block_group,
offset_to_bitmap(block_group, *offset),
0, 1);
if (!entry)
return NULL;
for (node = &entry->offset_index; node; node = rb_next(node)) {
entry = rb_entry(node, struct btrfs_free_space, offset_index);
if (entry->bytes < *bytes)
continue;
if (entry->bitmap) {
ret = search_bitmap(block_group, entry, offset, bytes);
if (!ret)
return entry;
continue;
}
*offset = entry->offset;
*bytes = entry->bytes;
return entry;
}
return NULL;
}
static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_space *info, u64 offset)
{
u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
int max_bitmaps = (int)div64_u64(block_group->key.offset +
bytes_per_bg - 1, bytes_per_bg);
BUG_ON(block_group->total_bitmaps >= max_bitmaps);
info->offset = offset_to_bitmap(block_group, offset);
link_free_space(block_group, info);
block_group->total_bitmaps++;
recalculate_thresholds(block_group);
}
static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_space *bitmap_info,
u64 *offset, u64 *bytes)
{
u64 end;
again:
end = bitmap_info->offset +
(u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
if (*offset > bitmap_info->offset && *offset + *bytes > end) {
bitmap_clear_bits(block_group, bitmap_info, *offset,
end - *offset + 1);
*bytes -= end - *offset + 1;
*offset = end + 1;
} else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
*bytes = 0;
}
if (*bytes) {
if (!bitmap_info->bytes) {
unlink_free_space(block_group, bitmap_info);
kfree(bitmap_info->bitmap);
kfree(bitmap_info);
block_group->total_bitmaps--;
recalculate_thresholds(block_group);
}
bitmap_info = tree_search_offset(block_group,
offset_to_bitmap(block_group,
*offset),
1, 0);
if (!bitmap_info)
return -EINVAL;
if (!bitmap_info->bitmap)
return -EAGAIN;
goto again;
} else if (!bitmap_info->bytes) {
unlink_free_space(block_group, bitmap_info);
kfree(bitmap_info->bitmap);
kfree(bitmap_info);
block_group->total_bitmaps--;
recalculate_thresholds(block_group);
}
return 0;
}
static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_space *info)
{
struct btrfs_free_space *bitmap_info;
int added = 0;
u64 bytes, offset, end;
int ret;
/*
* If we are below the extents threshold then we can add this as an
* extent, and don't have to deal with the bitmap
*/
if (block_group->free_extents < block_group->extents_thresh &&
info->bytes > block_group->sectorsize * 4)
return 0;
/*
* some block groups are so tiny they can't be enveloped by a bitmap, so
* don't even bother to create a bitmap for this
*/
if (BITS_PER_BITMAP * block_group->sectorsize >
block_group->key.offset)
return 0;
bytes = info->bytes;
offset = info->offset;
again:
bitmap_info = tree_search_offset(block_group,
offset_to_bitmap(block_group, offset),
1, 0);
if (!bitmap_info) {
BUG_ON(added);
goto new_bitmap;
}
end = bitmap_info->offset +
(u64)(BITS_PER_BITMAP * block_group->sectorsize);
if (offset >= bitmap_info->offset && offset + bytes > end) {
bitmap_set_bits(block_group, bitmap_info, offset,
end - offset);
bytes -= end - offset;
offset = end;
added = 0;
} else if (offset >= bitmap_info->offset && offset + bytes <= end) {
bitmap_set_bits(block_group, bitmap_info, offset, bytes);
bytes = 0;
} else {
BUG();
}
if (!bytes) {
ret = 1;
goto out;
} else
goto again;
new_bitmap:
if (info && info->bitmap) {
add_new_bitmap(block_group, info, offset);
added = 1;
info = NULL;
goto again;
} else {
spin_unlock(&block_group->tree_lock);
/* no pre-allocated info, allocate a new one */
if (!info) {
info = kzalloc(sizeof(struct btrfs_free_space),
GFP_NOFS);
if (!info) {
spin_lock(&block_group->tree_lock);
ret = -ENOMEM;
goto out;
}
}
/* allocate the bitmap */
info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
spin_lock(&block_group->tree_lock);
if (!info->bitmap) {
ret = -ENOMEM;
goto out;
}
goto again;
}
out:
if (info) {
if (info->bitmap)
kfree(info->bitmap);
kfree(info);
}
return ret; return ret;
} }
...@@ -208,8 +561,8 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, ...@@ -208,8 +561,8 @@ static int link_free_space(struct btrfs_block_group_cache *block_group,
int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
u64 offset, u64 bytes) u64 offset, u64 bytes)
{ {
struct btrfs_free_space *right_info; struct btrfs_free_space *right_info = NULL;
struct btrfs_free_space *left_info; struct btrfs_free_space *left_info = NULL;
struct btrfs_free_space *info = NULL; struct btrfs_free_space *info = NULL;
int ret = 0; int ret = 0;
...@@ -227,18 +580,38 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, ...@@ -227,18 +580,38 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
* are adding, if there is remove that struct and add a new one to * are adding, if there is remove that struct and add a new one to
* cover the entire range * cover the entire range
*/ */
right_info = tree_search_offset(&block_group->free_space_offset, right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
offset+bytes, 0, 0); if (right_info && rb_prev(&right_info->offset_index))
left_info = tree_search_offset(&block_group->free_space_offset, left_info = rb_entry(rb_prev(&right_info->offset_index),
offset-1, 0, 1); struct btrfs_free_space, offset_index);
else
left_info = tree_search_offset(block_group, offset - 1, 0, 0);
/*
* If there was no extent directly to the left or right of this new
* extent then we know we're going to have to allocate a new extent, so
* before we do that see if we need to drop this into a bitmap
*/
if ((!left_info || left_info->bitmap) &&
(!right_info || right_info->bitmap)) {
ret = insert_into_bitmap(block_group, info);
if (ret < 0) {
goto out;
} else if (ret) {
ret = 0;
goto out;
}
}
if (right_info) { if (right_info && !right_info->bitmap) {
unlink_free_space(block_group, right_info); unlink_free_space(block_group, right_info);
info->bytes += right_info->bytes; info->bytes += right_info->bytes;
kfree(right_info); kfree(right_info);
} }
if (left_info && left_info->offset + left_info->bytes == offset) { if (left_info && !left_info->bitmap &&
left_info->offset + left_info->bytes == offset) {
unlink_free_space(block_group, left_info); unlink_free_space(block_group, left_info);
info->offset = left_info->offset; info->offset = left_info->offset;
info->bytes += left_info->bytes; info->bytes += left_info->bytes;
...@@ -248,11 +621,11 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, ...@@ -248,11 +621,11 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
ret = link_free_space(block_group, info); ret = link_free_space(block_group, info);
if (ret) if (ret)
kfree(info); kfree(info);
out:
spin_unlock(&block_group->tree_lock); spin_unlock(&block_group->tree_lock);
if (ret) { if (ret) {
printk(KERN_ERR "btrfs: unable to add free space :%d\n", ret); printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
BUG_ON(ret == -EEXIST); BUG_ON(ret == -EEXIST);
} }
...@@ -263,39 +636,64 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, ...@@ -263,39 +636,64 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
u64 offset, u64 bytes) u64 offset, u64 bytes)
{ {
struct btrfs_free_space *info; struct btrfs_free_space *info;
struct btrfs_free_space *next_info = NULL;
int ret = 0; int ret = 0;
spin_lock(&block_group->tree_lock); spin_lock(&block_group->tree_lock);
info = tree_search_offset(&block_group->free_space_offset, offset, 0, again:
1); info = tree_search_offset(block_group, offset, 0, 0);
if (info && info->offset == offset) { if (!info) {
if (info->bytes < bytes) { WARN_ON(1);
printk(KERN_ERR "Found free space at %llu, size %llu," goto out_lock;
"trying to use %llu\n", }
if (info->bytes < bytes && rb_next(&info->offset_index)) {
u64 end;
next_info = rb_entry(rb_next(&info->offset_index),
struct btrfs_free_space,
offset_index);
if (next_info->bitmap)
end = next_info->offset + BITS_PER_BITMAP *
block_group->sectorsize - 1;
else
end = next_info->offset + next_info->bytes;
if (next_info->bytes < bytes ||
next_info->offset > offset || offset > end) {
printk(KERN_CRIT "Found free space at %llu, size %llu,"
" trying to use %llu\n",
(unsigned long long)info->offset, (unsigned long long)info->offset,
(unsigned long long)info->bytes, (unsigned long long)info->bytes,
(unsigned long long)bytes); (unsigned long long)bytes);
WARN_ON(1); WARN_ON(1);
ret = -EINVAL; ret = -EINVAL;
spin_unlock(&block_group->tree_lock); goto out_lock;
goto out; }
info = next_info;
} }
unlink_free_space(block_group, info);
if (info->bytes == bytes) { if (info->bytes == bytes) {
unlink_free_space(block_group, info);
if (info->bitmap) {
kfree(info->bitmap);
block_group->total_bitmaps--;
}
kfree(info); kfree(info);
spin_unlock(&block_group->tree_lock); goto out_lock;
goto out;
} }
if (!info->bitmap && info->offset == offset) {
unlink_free_space(block_group, info);
info->offset += bytes; info->offset += bytes;
info->bytes -= bytes; info->bytes -= bytes;
link_free_space(block_group, info);
goto out_lock;
}
ret = link_free_space(block_group, info); if (!info->bitmap && info->offset <= offset &&
spin_unlock(&block_group->tree_lock);
BUG_ON(ret);
} else if (info && info->offset < offset &&
info->offset + info->bytes >= offset + bytes) { info->offset + info->bytes >= offset + bytes) {
u64 old_start = info->offset; u64 old_start = info->offset;
/* /*
...@@ -312,7 +710,9 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, ...@@ -312,7 +710,9 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
info->offset = offset + bytes; info->offset = offset + bytes;
info->bytes = old_end - info->offset; info->bytes = old_end - info->offset;
ret = link_free_space(block_group, info); ret = link_free_space(block_group, info);
BUG_ON(ret); WARN_ON(ret);
if (ret)
goto out_lock;
} else { } else {
/* the hole we're creating ends at the end /* the hole we're creating ends at the end
* of the info struct, just free the info * of the info struct, just free the info
...@@ -320,32 +720,22 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, ...@@ -320,32 +720,22 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
kfree(info); kfree(info);
} }
spin_unlock(&block_group->tree_lock); spin_unlock(&block_group->tree_lock);
/* step two, insert a new info struct to cover anything
* before the hole /* step two, insert a new info struct to cover
* anything before the hole
*/ */
ret = btrfs_add_free_space(block_group, old_start, ret = btrfs_add_free_space(block_group, old_start,
offset - old_start); offset - old_start);
WARN_ON(ret);
goto out;
}
ret = remove_from_bitmap(block_group, info, &offset, &bytes);
if (ret == -EAGAIN)
goto again;
BUG_ON(ret); BUG_ON(ret);
} else { out_lock:
spin_unlock(&block_group->tree_lock); spin_unlock(&block_group->tree_lock);
if (!info) {
printk(KERN_ERR "couldn't find space %llu to free\n",
(unsigned long long)offset);
printk(KERN_ERR "cached is %d, offset %llu bytes %llu\n",
block_group->cached,
(unsigned long long)block_group->key.objectid,
(unsigned long long)block_group->key.offset);
btrfs_dump_free_space(block_group, bytes);
} else if (info) {
printk(KERN_ERR "hmm, found offset=%llu bytes=%llu, "
"but wanted offset=%llu bytes=%llu\n",
(unsigned long long)info->offset,
(unsigned long long)info->bytes,
(unsigned long long)offset,
(unsigned long long)bytes);
}
WARN_ON(1);
}
out: out:
return ret; return ret;
} }
...@@ -361,10 +751,13 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, ...@@ -361,10 +751,13 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
info = rb_entry(n, struct btrfs_free_space, offset_index); info = rb_entry(n, struct btrfs_free_space, offset_index);
if (info->bytes >= bytes) if (info->bytes >= bytes)
count++; count++;
printk(KERN_ERR "entry offset %llu, bytes %llu\n", printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
(unsigned long long)info->offset, (unsigned long long)info->offset,
(unsigned long long)info->bytes); (unsigned long long)info->bytes,
(info->bitmap) ? "yes" : "no");
} }
printk(KERN_INFO "block group has cluster?: %s\n",
list_empty(&block_group->cluster_list) ? "no" : "yes");
printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
"\n", count); "\n", count);
} }
...@@ -397,26 +790,35 @@ __btrfs_return_cluster_to_free_space( ...@@ -397,26 +790,35 @@ __btrfs_return_cluster_to_free_space(
{ {
struct btrfs_free_space *entry; struct btrfs_free_space *entry;
struct rb_node *node; struct rb_node *node;
bool bitmap;
spin_lock(&cluster->lock); spin_lock(&cluster->lock);
if (cluster->block_group != block_group) if (cluster->block_group != block_group)
goto out; goto out;
bitmap = cluster->points_to_bitmap;
cluster->block_group = NULL;
cluster->window_start = 0; cluster->window_start = 0;
list_del_init(&cluster->block_group_list);
cluster->points_to_bitmap = false;
if (bitmap)
goto out;
node = rb_first(&cluster->root); node = rb_first(&cluster->root);
while(node) { while (node) {
entry = rb_entry(node, struct btrfs_free_space, offset_index); entry = rb_entry(node, struct btrfs_free_space, offset_index);
node = rb_next(&entry->offset_index); node = rb_next(&entry->offset_index);
rb_erase(&entry->offset_index, &cluster->root); rb_erase(&entry->offset_index, &cluster->root);
link_free_space(block_group, entry); BUG_ON(entry->bitmap);
tree_insert_offset(&block_group->free_space_offset,
entry->offset, &entry->offset_index, 0);
} }
list_del_init(&cluster->block_group_list);
btrfs_put_block_group(cluster->block_group);
cluster->block_group = NULL;
cluster->root.rb_node = NULL; cluster->root.rb_node = NULL;
out: out:
spin_unlock(&cluster->lock); spin_unlock(&cluster->lock);
btrfs_put_block_group(block_group);
return 0; return 0;
} }
...@@ -425,20 +827,28 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) ...@@ -425,20 +827,28 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
struct btrfs_free_space *info; struct btrfs_free_space *info;
struct rb_node *node; struct rb_node *node;
struct btrfs_free_cluster *cluster; struct btrfs_free_cluster *cluster;
struct btrfs_free_cluster *safe; struct list_head *head;
spin_lock(&block_group->tree_lock); spin_lock(&block_group->tree_lock);
while ((head = block_group->cluster_list.next) !=
list_for_each_entry_safe(cluster, safe, &block_group->cluster_list, &block_group->cluster_list) {
block_group_list) { cluster = list_entry(head, struct btrfs_free_cluster,
block_group_list);
WARN_ON(cluster->block_group != block_group); WARN_ON(cluster->block_group != block_group);
__btrfs_return_cluster_to_free_space(block_group, cluster); __btrfs_return_cluster_to_free_space(block_group, cluster);
if (need_resched()) {
spin_unlock(&block_group->tree_lock);
cond_resched();
spin_lock(&block_group->tree_lock);
}
} }
while ((node = rb_last(&block_group->free_space_bytes)) != NULL) { while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
info = rb_entry(node, struct btrfs_free_space, bytes_index); info = rb_entry(node, struct btrfs_free_space, offset_index);
unlink_free_space(block_group, info); unlink_free_space(block_group, info);
if (info->bitmap)
kfree(info->bitmap);
kfree(info); kfree(info);
if (need_resched()) { if (need_resched()) {
spin_unlock(&block_group->tree_lock); spin_unlock(&block_group->tree_lock);
...@@ -446,6 +856,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) ...@@ -446,6 +856,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
spin_lock(&block_group->tree_lock); spin_lock(&block_group->tree_lock);
} }
} }
spin_unlock(&block_group->tree_lock); spin_unlock(&block_group->tree_lock);
} }
...@@ -453,25 +864,35 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, ...@@ -453,25 +864,35 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
u64 offset, u64 bytes, u64 empty_size) u64 offset, u64 bytes, u64 empty_size)
{ {
struct btrfs_free_space *entry = NULL; struct btrfs_free_space *entry = NULL;
u64 bytes_search = bytes + empty_size;
u64 ret = 0; u64 ret = 0;
spin_lock(&block_group->tree_lock); spin_lock(&block_group->tree_lock);
entry = tree_search_offset(&block_group->free_space_offset, offset, entry = find_free_space(block_group, &offset, &bytes_search, 0);
bytes + empty_size, 1);
if (!entry) if (!entry)
entry = tree_search_bytes(&block_group->free_space_bytes, goto out;
offset, bytes + empty_size);
if (entry) { ret = offset;
if (entry->bitmap) {
bitmap_clear_bits(block_group, entry, offset, bytes);
if (!entry->bytes) {
unlink_free_space(block_group, entry);
kfree(entry->bitmap);
kfree(entry);
block_group->total_bitmaps--;
recalculate_thresholds(block_group);
}
} else {
unlink_free_space(block_group, entry); unlink_free_space(block_group, entry);
ret = entry->offset;
entry->offset += bytes; entry->offset += bytes;
entry->bytes -= bytes; entry->bytes -= bytes;
if (!entry->bytes) if (!entry->bytes)
kfree(entry); kfree(entry);
else else
link_free_space(block_group, entry); link_free_space(block_group, entry);
} }
out:
spin_unlock(&block_group->tree_lock); spin_unlock(&block_group->tree_lock);
return ret; return ret;
...@@ -517,6 +938,47 @@ int btrfs_return_cluster_to_free_space( ...@@ -517,6 +938,47 @@ int btrfs_return_cluster_to_free_space(
return ret; return ret;
} }
static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
u64 bytes, u64 min_start)
{
struct btrfs_free_space *entry;
int err;
u64 search_start = cluster->window_start;
u64 search_bytes = bytes;
u64 ret = 0;
spin_lock(&block_group->tree_lock);
spin_lock(&cluster->lock);
if (!cluster->points_to_bitmap)
goto out;
if (cluster->block_group != block_group)
goto out;
entry = tree_search_offset(block_group, search_start, 0, 0);
if (!entry || !entry->bitmap)
goto out;
search_start = min_start;
search_bytes = bytes;
err = search_bitmap(block_group, entry, &search_start,
&search_bytes);
if (err)
goto out;
ret = search_start;
bitmap_clear_bits(block_group, entry, ret, bytes);
out:
spin_unlock(&cluster->lock);
spin_unlock(&block_group->tree_lock);
return ret;
}
/* /*
* given a cluster, try to allocate 'bytes' from it, returns 0 * given a cluster, try to allocate 'bytes' from it, returns 0
* if it couldn't find anything suitably large, or a logical disk offset * if it couldn't find anything suitably large, or a logical disk offset
...@@ -530,6 +992,10 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, ...@@ -530,6 +992,10 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
struct rb_node *node; struct rb_node *node;
u64 ret = 0; u64 ret = 0;
if (cluster->points_to_bitmap)
return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
min_start);
spin_lock(&cluster->lock); spin_lock(&cluster->lock);
if (bytes > cluster->max_size) if (bytes > cluster->max_size)
goto out; goto out;
...@@ -567,9 +1033,73 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, ...@@ -567,9 +1033,73 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
} }
out: out:
spin_unlock(&cluster->lock); spin_unlock(&cluster->lock);
return ret; return ret;
} }
static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_space *entry,
struct btrfs_free_cluster *cluster,
u64 offset, u64 bytes, u64 min_bytes)
{
unsigned long next_zero;
unsigned long i;
unsigned long search_bits;
unsigned long total_bits;
unsigned long found_bits;
unsigned long start = 0;
unsigned long total_found = 0;
bool found = false;
i = offset_to_bit(entry->offset, block_group->sectorsize,
max_t(u64, offset, entry->offset));
search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
total_bits = bytes_to_bits(bytes, block_group->sectorsize);
again:
found_bits = 0;
for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
i < BITS_PER_BITMAP;
i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
next_zero = find_next_zero_bit(entry->bitmap,
BITS_PER_BITMAP, i);
if (next_zero - i >= search_bits) {
found_bits = next_zero - i;
break;
}
i = next_zero;
}
if (!found_bits)
return -1;
if (!found) {
start = i;
found = true;
}
total_found += found_bits;
if (cluster->max_size < found_bits * block_group->sectorsize)
cluster->max_size = found_bits * block_group->sectorsize;
if (total_found < total_bits) {
i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
if (i - start > total_bits * 2) {
total_found = 0;
cluster->max_size = 0;
found = false;
}
goto again;
}
cluster->window_start = start * block_group->sectorsize +
entry->offset;
cluster->points_to_bitmap = true;
return 0;
}
/* /*
* here we try to find a cluster of blocks in a block group. The goal * here we try to find a cluster of blocks in a block group. The goal
* is to find at least bytes free and up to empty_size + bytes free. * is to find at least bytes free and up to empty_size + bytes free.
...@@ -587,12 +1117,12 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, ...@@ -587,12 +1117,12 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
struct btrfs_free_space *entry = NULL; struct btrfs_free_space *entry = NULL;
struct rb_node *node; struct rb_node *node;
struct btrfs_free_space *next; struct btrfs_free_space *next;
struct btrfs_free_space *last; struct btrfs_free_space *last = NULL;
u64 min_bytes; u64 min_bytes;
u64 window_start; u64 window_start;
u64 window_free; u64 window_free;
u64 max_extent = 0; u64 max_extent = 0;
int total_retries = 0; bool found_bitmap = false;
int ret; int ret;
/* for metadata, allow allocates with more holes */ /* for metadata, allow allocates with more holes */
...@@ -620,30 +1150,79 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, ...@@ -620,30 +1150,79 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
goto out; goto out;
} }
again: again:
min_bytes = min(min_bytes, bytes + empty_size); entry = tree_search_offset(block_group, offset, found_bitmap, 1);
entry = tree_search_bytes(&block_group->free_space_bytes,
offset, min_bytes);
if (!entry) { if (!entry) {
ret = -ENOSPC; ret = -ENOSPC;
goto out; goto out;
} }
/*
* If found_bitmap is true, we exhausted our search for extent entries,
* and we just want to search all of the bitmaps that we can find, and
* ignore any extent entries we find.
*/
while (entry->bitmap || found_bitmap ||
(!entry->bitmap && entry->bytes < min_bytes)) {
struct rb_node *node = rb_next(&entry->offset_index);
if (entry->bitmap && entry->bytes > bytes + empty_size) {
ret = btrfs_bitmap_cluster(block_group, entry, cluster,
offset, bytes + empty_size,
min_bytes);
if (!ret)
goto got_it;
}
if (!node) {
ret = -ENOSPC;
goto out;
}
entry = rb_entry(node, struct btrfs_free_space, offset_index);
}
/*
* We already searched all the extent entries from the passed in offset
* to the end and didn't find enough space for the cluster, and we also
* didn't find any bitmaps that met our criteria, just go ahead and exit
*/
if (found_bitmap) {
ret = -ENOSPC;
goto out;
}
cluster->points_to_bitmap = false;
window_start = entry->offset; window_start = entry->offset;
window_free = entry->bytes; window_free = entry->bytes;
last = entry; last = entry;
max_extent = entry->bytes; max_extent = entry->bytes;
while(1) { while (1) {
/* out window is just right, lets fill it */ /* out window is just right, lets fill it */
if (window_free >= bytes + empty_size) if (window_free >= bytes + empty_size)
break; break;
node = rb_next(&last->offset_index); node = rb_next(&last->offset_index);
if (!node) { if (!node) {
if (found_bitmap)
goto again;
ret = -ENOSPC; ret = -ENOSPC;
goto out; goto out;
} }
next = rb_entry(node, struct btrfs_free_space, offset_index); next = rb_entry(node, struct btrfs_free_space, offset_index);
/*
* we found a bitmap, so if this search doesn't result in a
* cluster, we know to go and search again for the bitmaps and
* start looking for space there
*/
if (next->bitmap) {
if (!found_bitmap)
offset = next->offset;
found_bitmap = true;
last = next;
continue;
}
/* /*
* we haven't filled the empty size and the window is * we haven't filled the empty size and the window is
* very large. reset and try again * very large. reset and try again
...@@ -655,19 +1234,6 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, ...@@ -655,19 +1234,6 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
window_free = entry->bytes; window_free = entry->bytes;
last = entry; last = entry;
max_extent = 0; max_extent = 0;
total_retries++;
if (total_retries % 64 == 0) {
if (min_bytes >= (bytes + empty_size)) {
ret = -ENOSPC;
goto out;
}
/*
* grow our allocation a bit, we're not having
* much luck
*/
min_bytes *= 2;
goto again;
}
} else { } else {
last = next; last = next;
window_free += next->bytes; window_free += next->bytes;
...@@ -685,11 +1251,19 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, ...@@ -685,11 +1251,19 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
* The cluster includes an rbtree, but only uses the offset index * The cluster includes an rbtree, but only uses the offset index
* of each free space cache entry. * of each free space cache entry.
*/ */
while(1) { while (1) {
node = rb_next(&entry->offset_index); node = rb_next(&entry->offset_index);
unlink_free_space(block_group, entry); if (entry->bitmap && node) {
entry = rb_entry(node, struct btrfs_free_space,
offset_index);
continue;
} else if (entry->bitmap && !node) {
break;
}
rb_erase(&entry->offset_index, &block_group->free_space_offset);
ret = tree_insert_offset(&cluster->root, entry->offset, ret = tree_insert_offset(&cluster->root, entry->offset,
&entry->offset_index); &entry->offset_index, 0);
BUG_ON(ret); BUG_ON(ret);
if (!node || entry == last) if (!node || entry == last)
...@@ -697,8 +1271,10 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, ...@@ -697,8 +1271,10 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
entry = rb_entry(node, struct btrfs_free_space, offset_index); entry = rb_entry(node, struct btrfs_free_space, offset_index);
} }
ret = 0;
cluster->max_size = max_extent; cluster->max_size = max_extent;
got_it:
ret = 0;
atomic_inc(&block_group->count); atomic_inc(&block_group->count);
list_add_tail(&cluster->block_group_list, &block_group->cluster_list); list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
cluster->block_group = block_group; cluster->block_group = block_group;
...@@ -718,6 +1294,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) ...@@ -718,6 +1294,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
spin_lock_init(&cluster->refill_lock); spin_lock_init(&cluster->refill_lock);
cluster->root.rb_node = NULL; cluster->root.rb_node = NULL;
cluster->max_size = 0; cluster->max_size = 0;
cluster->points_to_bitmap = false;
INIT_LIST_HEAD(&cluster->block_group_list); INIT_LIST_HEAD(&cluster->block_group_list);
cluster->block_group = NULL; cluster->block_group = NULL;
} }
......
...@@ -19,6 +19,14 @@ ...@@ -19,6 +19,14 @@
#ifndef __BTRFS_FREE_SPACE_CACHE #ifndef __BTRFS_FREE_SPACE_CACHE
#define __BTRFS_FREE_SPACE_CACHE #define __BTRFS_FREE_SPACE_CACHE
struct btrfs_free_space {
struct rb_node offset_index;
u64 offset;
u64 bytes;
unsigned long *bitmap;
struct list_head list;
};
int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
u64 bytenr, u64 size); u64 bytenr, u64 size);
int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
......
...@@ -2603,8 +2603,8 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, ...@@ -2603,8 +2603,8 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
if (root->ref_cows) if (root->ref_cows)
btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
path = btrfs_alloc_path(); path = btrfs_alloc_path();
path->reada = -1;
BUG_ON(!path); BUG_ON(!path);
path->reada = -1;
/* FIXME, add redo link to tree so we don't leak on crash */ /* FIXME, add redo link to tree so we don't leak on crash */
key.objectid = inode->i_ino; key.objectid = inode->i_ino;
......
...@@ -309,7 +309,7 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) ...@@ -309,7 +309,7 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
} }
printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n", printk(KERN_INFO "node %llu level %d total ptrs %d free spc %u\n",
(unsigned long long)btrfs_header_bytenr(c), (unsigned long long)btrfs_header_bytenr(c),
btrfs_header_level(c), nr, level, nr,
(u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr); (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
btrfs_node_key_to_cpu(c, &key, i); btrfs_node_key_to_cpu(c, &key, i);
...@@ -326,10 +326,10 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c) ...@@ -326,10 +326,10 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
btrfs_level_size(root, level - 1), btrfs_level_size(root, level - 1),
btrfs_node_ptr_generation(c, i)); btrfs_node_ptr_generation(c, i));
if (btrfs_is_leaf(next) && if (btrfs_is_leaf(next) &&
btrfs_header_level(c) != 1) level != 1)
BUG(); BUG();
if (btrfs_header_level(next) != if (btrfs_header_level(next) !=
btrfs_header_level(c) - 1) level - 1)
BUG(); BUG();
btrfs_print_tree(root, next); btrfs_print_tree(root, next);
free_extent_buffer(next); free_extent_buffer(next);
......
...@@ -670,6 +670,8 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc, ...@@ -670,6 +670,8 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc,
err = ret; err = ret;
goto out; goto out;
} }
if (ret > 0 && path2->slots[level] > 0)
path2->slots[level]--;
eb = path2->nodes[level]; eb = path2->nodes[level];
WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) != WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
...@@ -1609,6 +1611,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, ...@@ -1609,6 +1611,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
BUG_ON(level == 0); BUG_ON(level == 0);
path->lowest_level = level; path->lowest_level = level;
ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0); ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0);
path->lowest_level = 0;
if (ret < 0) { if (ret < 0) {
btrfs_free_path(path); btrfs_free_path(path);
return ret; return ret;
......
...@@ -40,6 +40,14 @@ static noinline void put_transaction(struct btrfs_transaction *transaction) ...@@ -40,6 +40,14 @@ static noinline void put_transaction(struct btrfs_transaction *transaction)
} }
} }
static noinline void switch_commit_root(struct btrfs_root *root)
{
down_write(&root->commit_root_sem);
free_extent_buffer(root->commit_root);
root->commit_root = btrfs_root_node(root);
up_write(&root->commit_root_sem);
}
/* /*
* either allocate a new transaction or hop into the existing one * either allocate a new transaction or hop into the existing one
*/ */
...@@ -444,9 +452,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, ...@@ -444,9 +452,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
btrfs_write_dirty_block_groups(trans, root); btrfs_write_dirty_block_groups(trans, root);
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
BUG_ON(ret);
while (1) { while (1) {
old_root_bytenr = btrfs_root_bytenr(&root->root_item); old_root_bytenr = btrfs_root_bytenr(&root->root_item);
if (old_root_bytenr == root->node->start) if (old_root_bytenr == root->node->start)
...@@ -457,13 +462,11 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, ...@@ -457,13 +462,11 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
&root->root_key, &root->root_key,
&root->root_item); &root->root_item);
BUG_ON(ret); BUG_ON(ret);
btrfs_write_dirty_block_groups(trans, root);
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); ret = btrfs_write_dirty_block_groups(trans, root);
BUG_ON(ret); BUG_ON(ret);
} }
free_extent_buffer(root->commit_root); switch_commit_root(root);
root->commit_root = btrfs_root_node(root);
return 0; return 0;
} }
...@@ -495,9 +498,6 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, ...@@ -495,9 +498,6 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
root = list_entry(next, struct btrfs_root, dirty_list); root = list_entry(next, struct btrfs_root, dirty_list);
update_cowonly_root(trans, root); update_cowonly_root(trans, root);
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
BUG_ON(ret);
} }
return 0; return 0;
} }
...@@ -544,8 +544,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, ...@@ -544,8 +544,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
btrfs_update_reloc_root(trans, root); btrfs_update_reloc_root(trans, root);
if (root->commit_root != root->node) { if (root->commit_root != root->node) {
free_extent_buffer(root->commit_root); switch_commit_root(root);
root->commit_root = btrfs_root_node(root);
btrfs_set_root_node(&root->root_item, btrfs_set_root_node(&root->root_item,
root->node); root->node);
} }
...@@ -943,9 +942,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -943,9 +942,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->trans_mutex);
if (flush_on_commit || snap_pending) { if (flush_on_commit) {
if (flush_on_commit)
btrfs_start_delalloc_inodes(root); btrfs_start_delalloc_inodes(root);
ret = btrfs_wait_ordered_extents(root, 0);
BUG_ON(ret);
} else if (snap_pending) {
ret = btrfs_wait_ordered_extents(root, 1); ret = btrfs_wait_ordered_extents(root, 1);
BUG_ON(ret); BUG_ON(ret);
} }
...@@ -1009,15 +1010,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -1009,15 +1010,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_set_root_node(&root->fs_info->tree_root->root_item, btrfs_set_root_node(&root->fs_info->tree_root->root_item,
root->fs_info->tree_root->node); root->fs_info->tree_root->node);
free_extent_buffer(root->fs_info->tree_root->commit_root); switch_commit_root(root->fs_info->tree_root);
root->fs_info->tree_root->commit_root =
btrfs_root_node(root->fs_info->tree_root);
btrfs_set_root_node(&root->fs_info->chunk_root->root_item, btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
root->fs_info->chunk_root->node); root->fs_info->chunk_root->node);
free_extent_buffer(root->fs_info->chunk_root->commit_root); switch_commit_root(root->fs_info->chunk_root);
root->fs_info->chunk_root->commit_root =
btrfs_root_node(root->fs_info->chunk_root);
update_super_roots(root); update_super_roots(root);
...@@ -1057,6 +1054,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -1057,6 +1054,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
cur_trans->commit_done = 1; cur_trans->commit_done = 1;
root->fs_info->last_trans_committed = cur_trans->transid; root->fs_info->last_trans_committed = cur_trans->transid;
wake_up(&cur_trans->commit_wait); wake_up(&cur_trans->commit_wait);
put_transaction(cur_trans); put_transaction(cur_trans);
......
...@@ -797,7 +797,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, ...@@ -797,7 +797,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
return -ENOENT; return -ENOENT;
inode = read_one_inode(root, key->objectid); inode = read_one_inode(root, key->objectid);
BUG_ON(!dir); BUG_ON(!inode);
ref_ptr = btrfs_item_ptr_offset(eb, slot); ref_ptr = btrfs_item_ptr_offset(eb, slot);
ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); ref_end = ref_ptr + btrfs_item_size_nr(eb, slot);
......
...@@ -721,7 +721,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, ...@@ -721,7 +721,8 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
*/ */
static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, struct btrfs_device *device,
u64 num_bytes, u64 *start) u64 num_bytes, u64 *start,
u64 *max_avail)
{ {
struct btrfs_key key; struct btrfs_key key;
struct btrfs_root *root = device->dev_root; struct btrfs_root *root = device->dev_root;
...@@ -758,9 +759,13 @@ static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, ...@@ -758,9 +759,13 @@ static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, &key, path, 0, 0); ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
if (ret < 0) if (ret < 0)
goto error; goto error;
ret = btrfs_previous_item(root, path, 0, key.type); if (ret > 0) {
ret = btrfs_previous_item(root, path, key.objectid, key.type);
if (ret < 0) if (ret < 0)
goto error; goto error;
if (ret > 0)
start_found = 1;
}
l = path->nodes[0]; l = path->nodes[0];
btrfs_item_key_to_cpu(l, &key, path->slots[0]); btrfs_item_key_to_cpu(l, &key, path->slots[0]);
while (1) { while (1) {
...@@ -803,6 +808,10 @@ static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, ...@@ -803,6 +808,10 @@ static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
if (last_byte < search_start) if (last_byte < search_start)
last_byte = search_start; last_byte = search_start;
hole_size = key.offset - last_byte; hole_size = key.offset - last_byte;
if (hole_size > *max_avail)
*max_avail = hole_size;
if (key.offset > last_byte && if (key.offset > last_byte &&
hole_size >= num_bytes) { hole_size >= num_bytes) {
*start = last_byte; *start = last_byte;
...@@ -1621,6 +1630,7 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, ...@@ -1621,6 +1630,7 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
device->fs_devices->total_rw_bytes += diff; device->fs_devices->total_rw_bytes += diff;
device->total_bytes = new_size; device->total_bytes = new_size;
device->disk_total_bytes = new_size;
btrfs_clear_space_info_full(device->dev_root->fs_info); btrfs_clear_space_info_full(device->dev_root->fs_info);
return btrfs_update_device(trans, device); return btrfs_update_device(trans, device);
...@@ -2007,7 +2017,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) ...@@ -2007,7 +2017,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
goto done; goto done;
if (ret) { if (ret) {
ret = 0; ret = 0;
goto done; break;
} }
l = path->nodes[0]; l = path->nodes[0];
...@@ -2015,7 +2025,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) ...@@ -2015,7 +2025,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
btrfs_item_key_to_cpu(l, &key, path->slots[0]); btrfs_item_key_to_cpu(l, &key, path->slots[0]);
if (key.objectid != device->devid) if (key.objectid != device->devid)
goto done; break;
dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
length = btrfs_dev_extent_length(l, dev_extent); length = btrfs_dev_extent_length(l, dev_extent);
...@@ -2171,6 +2181,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ...@@ -2171,6 +2181,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
max_chunk_size); max_chunk_size);
again: again:
max_avail = 0;
if (!map || map->num_stripes != num_stripes) { if (!map || map->num_stripes != num_stripes) {
kfree(map); kfree(map);
map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS); map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
...@@ -2219,7 +2230,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ...@@ -2219,7 +2230,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (device->in_fs_metadata && avail >= min_free) { if (device->in_fs_metadata && avail >= min_free) {
ret = find_free_dev_extent(trans, device, ret = find_free_dev_extent(trans, device,
min_free, &dev_offset); min_free, &dev_offset,
&max_avail);
if (ret == 0) { if (ret == 0) {
list_move_tail(&device->dev_alloc_list, list_move_tail(&device->dev_alloc_list,
&private_devs); &private_devs);
...@@ -2795,26 +2807,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, ...@@ -2795,26 +2807,6 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
} }
} }
for (i = 0; i > nr; i++) {
struct btrfs_multi_bio *multi;
struct btrfs_bio_stripe *stripe;
int ret;
length = 1;
ret = btrfs_map_block(map_tree, WRITE, buf[i],
&length, &multi, 0);
BUG_ON(ret);
stripe = multi->stripes;
for (j = 0; j < multi->num_stripes; j++) {
if (stripe->physical >= physical &&
physical < stripe->physical + length)
break;
}
BUG_ON(j >= multi->num_stripes);
kfree(multi);
}
*logical = buf; *logical = buf;
*naddrs = nr; *naddrs = nr;
*stripe_len = map->stripe_len; *stripe_len = map->stripe_len;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment