Commit 6702ed49 authored by Chris Mason's avatar Chris Mason Committed by David Woodhouse

Btrfs: Add run time btree defrag, and an ioctl to force btree defrag

This adds two types of btree defrag, a run time form that tries to
defrag recently allocated blocks in the btree when they are still in ram,
and an ioctl that forces defrag of all btree blocks.

File data blocks are not defragged yet, but this can make a huge difference
in sequential btree reads.
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 3c69faec
...@@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),) ...@@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),)
obj-m := btrfs.o obj-m := btrfs.o
btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
hash.o file-item.o inode-item.o inode-map.o disk-io.o \ hash.o file-item.o inode-item.o inode-map.o disk-io.o \
transaction.o bit-radix.o inode.o file.o transaction.o bit-radix.o inode.o file.o tree-defrag.o
#btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
# root-tree.o dir-item.o hash.o file-item.o inode-item.o \ # root-tree.o dir-item.o hash.o file-item.o inode-item.o \
......
This diff is collapsed.
...@@ -178,6 +178,7 @@ struct btrfs_path { ...@@ -178,6 +178,7 @@ struct btrfs_path {
struct buffer_head *nodes[BTRFS_MAX_LEVEL]; struct buffer_head *nodes[BTRFS_MAX_LEVEL];
int slots[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL];
int reada; int reada;
int lowest_level;
}; };
/* /*
...@@ -338,6 +339,9 @@ struct btrfs_root { ...@@ -338,6 +339,9 @@ struct btrfs_root {
u64 highest_inode; u64 highest_inode;
u64 last_inode_alloc; u64 last_inode_alloc;
int ref_cows; int ref_cows;
struct btrfs_key defrag_progress;
int defrag_running;
int defrag_level;
}; };
/* the lower bits in the key flags defines the item type */ /* the lower bits in the key flags defines the item type */
...@@ -1031,10 +1035,11 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root, ...@@ -1031,10 +1035,11 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
int btrfs_inc_root_ref(struct btrfs_trans_handle *trans, int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root); struct btrfs_root *root);
struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 hint); struct btrfs_root *root, u64 hint,
u64 empty_size);
int btrfs_alloc_extent(struct btrfs_trans_handle *trans, int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 owner, struct btrfs_root *root, u64 owner,
u64 num_blocks, u64 search_start, u64 num_blocks, u64 empty_size, u64 search_start,
u64 search_end, struct btrfs_key *ins, int data); u64 search_end, struct btrfs_key *ins, int data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct buffer_head *buf); struct buffer_head *buf);
...@@ -1051,6 +1056,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, ...@@ -1051,6 +1056,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
int btrfs_free_block_groups(struct btrfs_fs_info *info); int btrfs_free_block_groups(struct btrfs_fs_info *info);
int btrfs_read_block_groups(struct btrfs_root *root); int btrfs_read_block_groups(struct btrfs_root *root);
/* ctree.c */ /* ctree.c */
int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct buffer_head *buf, struct buffer_head
*parent, int parent_slot, struct buffer_head
**cow_ret);
int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_path *path, u32 data_size); *root, struct btrfs_path *path, u32 data_size);
int btrfs_truncate_item(struct btrfs_trans_handle *trans, int btrfs_truncate_item(struct btrfs_trans_handle *trans,
...@@ -1060,6 +1069,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, ...@@ -1060,6 +1069,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_key *key, struct btrfs_path *p, int *root, struct btrfs_key *key, struct btrfs_path *p, int
ins_len, int cow); ins_len, int cow);
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct buffer_head *parent,
int cache_only);
void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p); void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p);
struct btrfs_path *btrfs_alloc_path(void); struct btrfs_path *btrfs_alloc_path(void);
void btrfs_free_path(struct btrfs_path *p); void btrfs_free_path(struct btrfs_path *p);
...@@ -1171,4 +1183,7 @@ extern struct file_operations btrfs_file_operations; ...@@ -1171,4 +1183,7 @@ extern struct file_operations btrfs_file_operations;
int btrfs_drop_extents(struct btrfs_trans_handle *trans, int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode, struct btrfs_root *root, struct inode *inode,
u64 start, u64 end, u64 *hint_block); u64 start, u64 end, u64 *hint_block);
/* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int cache_only);
#endif #endif
...@@ -273,7 +273,9 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, ...@@ -273,7 +273,9 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct buffer_head *buf) struct buffer_head *buf)
{ {
WARN_ON(atomic_read(&buf->b_count) == 0); WARN_ON(atomic_read(&buf->b_count) == 0);
lock_buffer(buf);
clear_buffer_dirty(buf); clear_buffer_dirty(buf);
unlock_buffer(buf);
return 0; return 0;
} }
...@@ -294,6 +296,9 @@ static int __setup_root(int blocksize, ...@@ -294,6 +296,9 @@ static int __setup_root(int blocksize,
root->last_inode_alloc = 0; root->last_inode_alloc = 0;
memset(&root->root_key, 0, sizeof(root->root_key)); memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item)); memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
root->defrag_running = 0;
root->defrag_level = 0;
root->root_key.objectid = objectid; root->root_key.objectid = objectid;
return 0; return 0;
} }
...@@ -585,6 +590,7 @@ int close_ctree(struct btrfs_root *root) ...@@ -585,6 +590,7 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 1; fs_info->closing = 1;
btrfs_transaction_flush_work(root); btrfs_transaction_flush_work(root);
mutex_lock(&fs_info->fs_mutex); mutex_lock(&fs_info->fs_mutex);
btrfs_defrag_dirty_roots(root->fs_info);
trans = btrfs_start_transaction(root, 1); trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root); ret = btrfs_commit_transaction(trans, root);
/* run commit again to drop the original snapshot */ /* run commit again to drop the original snapshot */
...@@ -616,7 +622,9 @@ void btrfs_mark_buffer_dirty(struct buffer_head *bh) ...@@ -616,7 +622,9 @@ void btrfs_mark_buffer_dirty(struct buffer_head *bh)
{ {
struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root; struct btrfs_root *root = BTRFS_I(bh->b_page->mapping->host)->root;
u64 transid = btrfs_header_generation(btrfs_buffer_header(bh)); u64 transid = btrfs_header_generation(btrfs_buffer_header(bh));
WARN_ON(!atomic_read(&bh->b_count)); WARN_ON(!atomic_read(&bh->b_count));
if (transid != root->fs_info->generation) { if (transid != root->fs_info->generation) {
printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n", printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n",
(unsigned long long)bh->b_blocknr, (unsigned long long)bh->b_blocknr,
......
...@@ -23,7 +23,8 @@ ...@@ -23,7 +23,8 @@
#include "transaction.h" #include "transaction.h"
static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
*orig_root, u64 num_blocks, u64 search_start, *orig_root, u64 num_blocks, u64 empty_size,
u64 search_start,
u64 search_end, u64 hint_block, u64 search_end, u64 hint_block,
struct btrfs_key *ins, u64 exclude_start, struct btrfs_key *ins, u64 exclude_start,
u64 exclude_nr, int data); u64 exclude_nr, int data);
...@@ -379,7 +380,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, ...@@ -379,7 +380,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path(); path = btrfs_alloc_path();
if (!path) if (!path)
return -ENOMEM; return -ENOMEM;
ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, ret = find_free_extent(trans, root->fs_info->extent_root, 0, 0, 0,
(u64)-1, 0, &ins, 0, 0, 0); (u64)-1, 0, &ins, 0, 0, 0);
if (ret) { if (ret) {
btrfs_free_path(path); btrfs_free_path(path);
...@@ -533,7 +534,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, ...@@ -533,7 +534,7 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
struct btrfs_block_group_item *bi; struct btrfs_block_group_item *bi;
struct btrfs_key ins; struct btrfs_key ins;
ret = find_free_extent(trans, extent_root, 0, 0, (u64)-1, 0, &ins, ret = find_free_extent(trans, extent_root, 0, 0, 0, (u64)-1, 0, &ins,
0, 0, 0); 0, 0, 0);
/* FIXME, set bit to recalc cache groups on next mount */ /* FIXME, set bit to recalc cache groups on next mount */
if (ret) if (ret)
...@@ -708,6 +709,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, ...@@ -708,6 +709,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
static int try_remove_page(struct address_space *mapping, unsigned long index) static int try_remove_page(struct address_space *mapping, unsigned long index)
{ {
int ret; int ret;
return 0;
ret = invalidate_mapping_pages(mapping, index, index); ret = invalidate_mapping_pages(mapping, index, index);
return ret; return ret;
} }
...@@ -866,7 +868,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ...@@ -866,7 +868,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
if (!path) if (!path)
return -ENOMEM; return -ENOMEM;
ret = find_free_extent(trans, root, 0, 0, (u64)-1, 0, &ins, 0, 0, 0); ret = find_free_extent(trans, root, 0, 0, 0, (u64)-1, 0, &ins, 0, 0, 0);
if (ret) { if (ret) {
btrfs_free_path(path); btrfs_free_path(path);
return ret; return ret;
...@@ -983,8 +985,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ...@@ -983,8 +985,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
* Any available blocks before search_start are skipped. * Any available blocks before search_start are skipped.
*/ */
static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
*orig_root, u64 num_blocks, u64 search_start, u64 *orig_root, u64 num_blocks, u64 empty_size,
search_end, u64 hint_block, u64 search_start, u64 search_end, u64 hint_block,
struct btrfs_key *ins, u64 exclude_start, struct btrfs_key *ins, u64 exclude_start,
u64 exclude_nr, int data) u64 exclude_nr, int data)
{ {
...@@ -1042,6 +1044,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ...@@ -1042,6 +1044,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
data, 1); data, 1);
} }
total_needed += empty_size;
path = btrfs_alloc_path(); path = btrfs_alloc_path();
check_failed: check_failed:
...@@ -1157,9 +1160,11 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ...@@ -1157,9 +1160,11 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
goto error; goto error;
} }
search_start = orig_search_start; search_start = orig_search_start;
if (wrapped) if (wrapped) {
if (!full_scan)
total_needed -= empty_size;
full_scan = 1; full_scan = 1;
else } else
wrapped = 1; wrapped = 1;
goto new_group; goto new_group;
} }
...@@ -1238,9 +1243,11 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ...@@ -1238,9 +1243,11 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
ret = -ENOSPC; ret = -ENOSPC;
goto error; goto error;
} }
if (wrapped) if (wrapped) {
if (!full_scan)
total_needed -= empty_size;
full_scan = 1; full_scan = 1;
else } else
wrapped = 1; wrapped = 1;
} }
block_group = btrfs_lookup_block_group(info, search_start); block_group = btrfs_lookup_block_group(info, search_start);
...@@ -1264,7 +1271,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root ...@@ -1264,7 +1271,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
*/ */
int btrfs_alloc_extent(struct btrfs_trans_handle *trans, int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 owner, struct btrfs_root *root, u64 owner,
u64 num_blocks, u64 hint_block, u64 num_blocks, u64 empty_size, u64 hint_block,
u64 search_end, struct btrfs_key *ins, int data) u64 search_end, struct btrfs_key *ins, int data)
{ {
int ret; int ret;
...@@ -1303,7 +1310,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ...@@ -1303,7 +1310,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
* in the correct block group. * in the correct block group.
*/ */
if (data) { if (data) {
ret = find_free_extent(trans, root, 0, 0, ret = find_free_extent(trans, root, 0, 0, 0,
search_end, 0, &prealloc_key, 0, 0, 0); search_end, 0, &prealloc_key, 0, 0, 0);
BUG_ON(ret); BUG_ON(ret);
if (ret) if (ret)
...@@ -1313,8 +1320,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ...@@ -1313,8 +1320,8 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
} }
/* do the real allocation */ /* do the real allocation */
ret = find_free_extent(trans, root, num_blocks, search_start, ret = find_free_extent(trans, root, num_blocks, empty_size,
search_end, hint_block, ins, search_start, search_end, hint_block, ins,
exclude_start, exclude_nr, data); exclude_start, exclude_nr, data);
BUG_ON(ret); BUG_ON(ret);
if (ret) if (ret)
...@@ -1333,7 +1340,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ...@@ -1333,7 +1340,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
exclude_start = ins->objectid; exclude_start = ins->objectid;
exclude_nr = ins->offset; exclude_nr = ins->offset;
hint_block = exclude_start + exclude_nr; hint_block = exclude_start + exclude_nr;
ret = find_free_extent(trans, root, 0, search_start, ret = find_free_extent(trans, root, 0, 0, search_start,
search_end, hint_block, search_end, hint_block,
&prealloc_key, exclude_start, &prealloc_key, exclude_start,
exclude_nr, 0); exclude_nr, 0);
...@@ -1368,14 +1375,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ...@@ -1368,14 +1375,16 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
* returns the tree buffer or NULL. * returns the tree buffer or NULL.
*/ */
struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 hint) struct btrfs_root *root, u64 hint,
u64 empty_size)
{ {
struct btrfs_key ins; struct btrfs_key ins;
int ret; int ret;
struct buffer_head *buf; struct buffer_head *buf;
ret = btrfs_alloc_extent(trans, root, root->root_key.objectid, ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1, hint, (unsigned long)-1, &ins, 0); 1, empty_size, hint,
(unsigned long)-1, &ins, 0);
if (ret) { if (ret) {
BUG_ON(ret > 0); BUG_ON(ret > 0);
return ERR_PTR(ret); return ERR_PTR(ret);
...@@ -1385,6 +1394,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, ...@@ -1385,6 +1394,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
btrfs_free_extent(trans, root, ins.objectid, 1, 0); btrfs_free_extent(trans, root, ins.objectid, 1, 0);
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
WARN_ON(buffer_dirty(buf));
set_buffer_uptodate(buf); set_buffer_uptodate(buf);
set_buffer_checked(buf); set_buffer_checked(buf);
set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index); set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
...@@ -1591,13 +1601,15 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root ...@@ -1591,13 +1601,15 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
struct btrfs_key key; struct btrfs_key key;
struct btrfs_disk_key *found_key; struct btrfs_disk_key *found_key;
struct btrfs_node *node; struct btrfs_node *node;
btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
level = root_item->drop_level;
path->lowest_level = level;
wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0) { if (wret < 0) {
ret = wret; ret = wret;
goto out; goto out;
} }
level = root_item->drop_level;
node = btrfs_buffer_node(path->nodes[level]); node = btrfs_buffer_node(path->nodes[level]);
found_key = &node->ptrs[path->slots[level]].key; found_key = &node->ptrs[path->slots[level]].key;
WARN_ON(memcmp(found_key, &root_item->drop_progress, WARN_ON(memcmp(found_key, &root_item->drop_progress,
...@@ -1617,8 +1629,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root ...@@ -1617,8 +1629,6 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
ret = wret; ret = wret;
num_walks++; num_walks++;
if (num_walks > 10) { if (num_walks > 10) {
struct btrfs_key key;
btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
ret = -EAGAIN; ret = -EAGAIN;
get_bh(root->node); get_bh(root->node);
break; break;
...@@ -1627,6 +1637,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root ...@@ -1627,6 +1637,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
for (i = 0; i <= orig_level; i++) { for (i = 0; i <= orig_level; i++) {
if (path->nodes[i]) { if (path->nodes[i]) {
btrfs_block_release(root, path->nodes[i]); btrfs_block_release(root, path->nodes[i]);
path->nodes[i] = 0;
} }
} }
out: out:
......
...@@ -512,7 +512,7 @@ static int prepare_pages(struct btrfs_root *root, ...@@ -512,7 +512,7 @@ static int prepare_pages(struct btrfs_root *root,
if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size || if (isize >= PAGE_CACHE_SIZE || pos + write_bytes < inode->i_size ||
pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) { pos + write_bytes - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
err = btrfs_alloc_extent(trans, root, inode->i_ino, err = btrfs_alloc_extent(trans, root, inode->i_ino,
num_blocks, hint_block, (u64)-1, num_blocks, 0, hint_block, (u64)-1,
&ins, 1); &ins, 1);
if (err) if (err)
goto failed_truncate; goto failed_truncate;
......
...@@ -554,7 +554,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) ...@@ -554,7 +554,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
&alloc_hint); &alloc_hint);
if (ret) if (ret)
goto out; goto out;
ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0,
alloc_hint, (u64)-1, &ins, 1); alloc_hint, (u64)-1, &ins, 1);
if (ret) if (ret)
goto out; goto out;
...@@ -1360,7 +1360,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock, ...@@ -1360,7 +1360,7 @@ static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
if (create & BTRFS_GET_BLOCK_CREATE) { if (create & BTRFS_GET_BLOCK_CREATE) {
struct btrfs_key ins; struct btrfs_key ins;
ret = btrfs_alloc_extent(trans, root, inode->i_ino, ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1, alloc_hint, (u64)-1, 1, 0, alloc_hint, (u64)-1,
&ins, 1); &ins, 1);
if (ret) { if (ret) {
err = ret; err = ret;
...@@ -1998,7 +1998,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen) ...@@ -1998,7 +1998,7 @@ static int create_subvol(struct btrfs_root *root, char *name, int namelen)
trans = btrfs_start_transaction(root, 1); trans = btrfs_start_transaction(root, 1);
BUG_ON(!trans); BUG_ON(!trans);
subvol = btrfs_alloc_free_block(trans, root, 0); subvol = btrfs_alloc_free_block(trans, root, 0, 0);
if (IS_ERR(subvol)) if (IS_ERR(subvol))
return PTR_ERR(subvol); return PTR_ERR(subvol);
leaf = btrfs_buffer_leaf(subvol); leaf = btrfs_buffer_leaf(subvol);
...@@ -2159,7 +2159,9 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int ...@@ -2159,7 +2159,9 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
{ {
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_ioctl_vol_args vol_args; struct btrfs_ioctl_vol_args vol_args;
struct btrfs_trans_handle *trans;
int ret = 0; int ret = 0;
int err;
struct btrfs_dir_item *di; struct btrfs_dir_item *di;
int namelen; int namelen;
struct btrfs_path *path; struct btrfs_path *path;
...@@ -2196,6 +2198,31 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int ...@@ -2196,6 +2198,31 @@ int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
else else
ret = create_snapshot(root, vol_args.name, namelen); ret = create_snapshot(root, vol_args.name, namelen);
break; break;
case BTRFS_IOC_DEFRAG:
mutex_lock(&root->fs_info->fs_mutex);
trans = btrfs_start_transaction(root, 1);
memset(&root->defrag_progress, 0,
sizeof(root->defrag_progress));
while (1) {
root->defrag_running = 1;
err = btrfs_defrag_leaves(trans, root, 0);
btrfs_end_transaction(trans, root);
mutex_unlock(&root->fs_info->fs_mutex);
btrfs_btree_balance_dirty(root);
mutex_lock(&root->fs_info->fs_mutex);
trans = btrfs_start_transaction(root, 1);
if (err != -EAGAIN)
break;
}
root->defrag_running = 0;
btrfs_end_transaction(trans, root);
mutex_unlock(&root->fs_info->fs_mutex);
ret = 0;
break;
default: default:
return -ENOTTY; return -ENOTTY;
} }
......
...@@ -28,6 +28,6 @@ struct btrfs_ioctl_vol_args { ...@@ -28,6 +28,6 @@ struct btrfs_ioctl_vol_args {
#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
struct btrfs_ioctl_vol_args) struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_ADD_DISK _IOW(BTRFS_IOCTL_MAGIC, 2, \ #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
struct btrfs_ioctl_vol_args) struct btrfs_ioctl_vol_args)
#endif #endif
...@@ -29,6 +29,7 @@ extern struct kmem_cache *btrfs_transaction_cachep; ...@@ -29,6 +29,7 @@ extern struct kmem_cache *btrfs_transaction_cachep;
static struct workqueue_struct *trans_wq; static struct workqueue_struct *trans_wq;
#define BTRFS_ROOT_TRANS_TAG 0 #define BTRFS_ROOT_TRANS_TAG 0
#define BTRFS_ROOT_DEFRAG_TAG 1
static void put_transaction(struct btrfs_transaction *transaction) static void put_transaction(struct btrfs_transaction *transaction)
{ {
...@@ -69,35 +70,41 @@ static int join_transaction(struct btrfs_root *root) ...@@ -69,35 +70,41 @@ static int join_transaction(struct btrfs_root *root)
return 0; return 0;
} }
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, static int record_root_in_trans(struct btrfs_root *root)
int num_blocks)
{ {
struct btrfs_trans_handle *h = u64 running_trans_id = root->fs_info->running_transaction->transid;
kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); if (root->ref_cows && root->last_trans < running_trans_id) {
int ret;
u64 running_trans_id;
mutex_lock(&root->fs_info->trans_mutex);
ret = join_transaction(root);
BUG_ON(ret);
running_trans_id = root->fs_info->running_transaction->transid;
if (root != root->fs_info->tree_root && root->last_trans <
running_trans_id) {
WARN_ON(root == root->fs_info->extent_root); WARN_ON(root == root->fs_info->extent_root);
WARN_ON(root->ref_cows != 1);
if (root->root_item.refs != 0) { if (root->root_item.refs != 0) {
radix_tree_tag_set(&root->fs_info->fs_roots_radix, radix_tree_tag_set(&root->fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid, (unsigned long)root->root_key.objectid,
BTRFS_ROOT_TRANS_TAG); BTRFS_ROOT_TRANS_TAG);
radix_tree_tag_set(&root->fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_DEFRAG_TAG);
root->commit_root = root->node; root->commit_root = root->node;
get_bh(root->node); get_bh(root->node);
} else { } else {
WARN_ON(1); WARN_ON(1);
} }
}
root->last_trans = running_trans_id; root->last_trans = running_trans_id;
h->transid = running_trans_id; }
return 0;
}
struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
int num_blocks)
{
struct btrfs_trans_handle *h =
kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
int ret;
mutex_lock(&root->fs_info->trans_mutex);
ret = join_transaction(root);
BUG_ON(ret);
record_root_in_trans(root);
h->transid = root->fs_info->running_transaction->transid;
h->transaction = root->fs_info->running_transaction; h->transaction = root->fs_info->running_transaction;
h->blocks_reserved = num_blocks; h->blocks_reserved = num_blocks;
h->blocks_used = 0; h->blocks_used = 0;
...@@ -155,6 +162,15 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, ...@@ -155,6 +162,15 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
gang[i]); gang[i]);
if (!page) if (!page)
continue; continue;
if (PageWriteback(page)) {
if (PageDirty(page))
wait_on_page_writeback(page);
else {
unlock_page(page);
page_cache_release(page);
continue;
}
}
err = write_one_page(page, 0); err = write_one_page(page, 0);
if (err) if (err)
werr = err; werr = err;
...@@ -299,6 +315,58 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans, ...@@ -299,6 +315,58 @@ static int add_dirty_roots(struct btrfs_trans_handle *trans,
return err; return err;
} }
int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info)
{
struct btrfs_root *gang[1];
struct btrfs_root *root;
struct btrfs_root *tree_root = info->tree_root;
struct btrfs_trans_handle *trans;
int i;
int ret;
int err = 0;
u64 last = 0;
trans = btrfs_start_transaction(tree_root, 1);
while(1) {
ret = radix_tree_gang_lookup_tag(&info->fs_roots_radix,
(void **)gang, last,
ARRAY_SIZE(gang),
BTRFS_ROOT_DEFRAG_TAG);
if (ret == 0)
break;
for (i = 0; i < ret; i++) {
root = gang[i];
last = root->root_key.objectid + 1;
radix_tree_tag_clear(&info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_DEFRAG_TAG);
if (root->defrag_running)
continue;
while (1) {
mutex_lock(&root->fs_info->trans_mutex);
record_root_in_trans(root);
mutex_unlock(&root->fs_info->trans_mutex);
root->defrag_running = 1;
err = btrfs_defrag_leaves(trans, root, 1);
btrfs_end_transaction(trans, tree_root);
mutex_unlock(&info->fs_mutex);
btrfs_btree_balance_dirty(root);
mutex_lock(&info->fs_mutex);
trans = btrfs_start_transaction(tree_root, 1);
if (err != -EAGAIN)
break;
}
root->defrag_running = 0;
}
}
btrfs_end_transaction(trans, tree_root);
return err;
}
static int drop_dirty_roots(struct btrfs_root *tree_root, static int drop_dirty_roots(struct btrfs_root *tree_root,
struct list_head *list) struct list_head *list)
{ {
...@@ -475,6 +543,7 @@ void btrfs_transaction_cleaner(struct work_struct *work) ...@@ -475,6 +543,7 @@ void btrfs_transaction_cleaner(struct work_struct *work)
goto out; goto out;
} }
mutex_unlock(&root->fs_info->trans_mutex); mutex_unlock(&root->fs_info->trans_mutex);
btrfs_defrag_dirty_roots(root->fs_info);
trans = btrfs_start_transaction(root, 1); trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root); ret = btrfs_commit_transaction(trans, root);
out: out:
......
...@@ -70,5 +70,6 @@ void btrfs_transaction_queue_work(struct btrfs_root *root, int delay); ...@@ -70,5 +70,6 @@ void btrfs_transaction_queue_work(struct btrfs_root *root, int delay);
void btrfs_init_transaction_sys(void); void btrfs_init_transaction_sys(void);
void btrfs_exit_transaction_sys(void); void btrfs_exit_transaction_sys(void);
int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list); int btrfs_add_dead_root(struct btrfs_root *root, struct list_head *dead_list);
int btrfs_defrag_dirty_roots(struct btrfs_fs_info *info);
#endif #endif
/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/sched.h>
#include "ctree.h"
#include "disk-io.h"
#include "print-tree.h"
#include "transaction.h"
static void reada_defrag(struct btrfs_root *root,
struct btrfs_node *node)
{
int i;
u32 nritems;
u64 blocknr;
int ret;
nritems = btrfs_header_nritems(&node->header);
for (i = 0; i < nritems; i++) {
blocknr = btrfs_node_blockptr(node, i);
ret = readahead_tree_block(root, blocknr);
if (ret)
break;
}
}
static int defrag_walk_down(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int *level,
int cache_only)
{
struct buffer_head *next;
struct buffer_head *cur;
u64 blocknr;
int ret = 0;
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
while(*level > 0) {
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
cur = path->nodes[*level];
if (!cache_only && *level > 1 && path->slots[*level] == 0)
reada_defrag(root, btrfs_buffer_node(cur));
if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
WARN_ON(1);
if (path->slots[*level] >=
btrfs_header_nritems(btrfs_buffer_header(cur)))
break;
if (*level == 1) {
ret = btrfs_realloc_node(trans, root,
path->nodes[*level],
cache_only);
break;
}
blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
path->slots[*level]);
if (cache_only) {
next = btrfs_find_tree_block(root, blocknr);
if (!next || !buffer_uptodate(next) ||
buffer_locked(next)) {
brelse(next);
path->slots[*level]++;
continue;
}
} else {
next = read_tree_block(root, blocknr);
}
ret = btrfs_cow_block(trans, root, next, path->nodes[*level],
path->slots[*level], &next);
BUG_ON(ret);
ret = btrfs_realloc_node(trans, root, next, cache_only);
BUG_ON(ret);
WARN_ON(*level <= 0);
if (path->nodes[*level-1])
btrfs_block_release(root, path->nodes[*level-1]);
path->nodes[*level-1] = next;
*level = btrfs_header_level(btrfs_buffer_header(next));
path->slots[*level] = 0;
}
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
btrfs_block_release(root, path->nodes[*level]);
path->nodes[*level] = NULL;
*level += 1;
WARN_ON(ret);
return 0;
}
static int defrag_walk_up(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, int *level,
int cache_only)
{
int i;
int slot;
struct btrfs_node *node;
for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
slot = path->slots[i];
if (slot < btrfs_header_nritems(
btrfs_buffer_header(path->nodes[i])) - 1) {
path->slots[i]++;
*level = i;
node = btrfs_buffer_node(path->nodes[i]);
WARN_ON(i == 0);
btrfs_disk_key_to_cpu(&root->defrag_progress,
&node->ptrs[path->slots[i]].key);
root->defrag_level = i;
return 0;
} else {
btrfs_block_release(root, path->nodes[*level]);
path->nodes[*level] = NULL;
*level = i + 1;
}
}
return 1;
}
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int cache_only)
{
struct btrfs_path *path = NULL;
struct buffer_head *tmp;
int ret = 0;
int wret;
int level;
int orig_level;
int i;
int num_runs = 0;
if (root->ref_cows == 0) {
goto out;
}
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
level = btrfs_header_level(btrfs_buffer_header(root->node));
orig_level = level;
if (level == 0) {
goto out;
}
if (root->defrag_progress.objectid == 0) {
get_bh(root->node);
ret = btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
BUG_ON(ret);
ret = btrfs_realloc_node(trans, root, root->node, cache_only);
BUG_ON(ret);
path->nodes[level] = root->node;
path->slots[level] = 0;
} else {
level = root->defrag_level;
path->lowest_level = level;
wret = btrfs_search_slot(trans, root, &root->defrag_progress,
path, 0, 1);
if (wret < 0) {
ret = wret;
goto out;
}
while(level > 0 && !path->nodes[level])
level--;
if (!path->nodes[level]) {
ret = 0;
goto out;
}
}
while(1) {
wret = defrag_walk_down(trans, root, path, &level, cache_only);
if (wret > 0)
break;
if (wret < 0)
ret = wret;
wret = defrag_walk_up(trans, root, path, &level, cache_only);
if (wret > 0)
break;
if (wret < 0)
ret = wret;
if (num_runs++ > 8) {
ret = -EAGAIN;
break;
}
}
for (i = 0; i <= orig_level; i++) {
if (path->nodes[i]) {
btrfs_block_release(root, path->nodes[i]);
path->nodes[i] = 0;
}
}
out:
if (path)
btrfs_free_path(path);
if (ret != -EAGAIN) {
memset(&root->defrag_progress, 0,
sizeof(root->defrag_progress));
}
return ret;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment