Commit 4dc11904 authored by Chris Mason's avatar Chris Mason

Btrfs: Add an extent buffer LRU to reduce radix tree hits

Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent e19caa5f
......@@ -88,8 +88,6 @@ static int __btrfs_cow_block(struct btrfs_trans_handle *trans,
if (IS_ERR(cow))
return PTR_ERR(cow);
cow->alloc_addr = (unsigned long)__builtin_return_address(0);
copy_extent_buffer(cow, buf, 0, 0, cow->len);
btrfs_set_header_bytenr(cow, cow->start);
btrfs_set_header_generation(cow, trans->transid);
......@@ -151,7 +149,6 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
ret = __btrfs_cow_block(trans, root, buf, parent,
parent_slot, cow_ret, search_start, 0);
(*cow_ret)->alloc_addr = (unsigned long)__builtin_return_address(0);
return ret;
}
......
......@@ -50,8 +50,6 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
struct extent_buffer *eb;
eb = find_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
bytenr, blocksize, GFP_NOFS);
if (eb)
eb->alloc_addr = (unsigned long)__builtin_return_address(0);
return eb;
}
......@@ -63,7 +61,6 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root,
eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->extent_tree,
bytenr, blocksize, GFP_NOFS);
eb->alloc_addr = (unsigned long)__builtin_return_address(0);
return eb;
}
......@@ -234,7 +231,6 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
return NULL;
read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
buf, 1);
buf->alloc_addr = (unsigned long)__builtin_return_address(0);
return buf;
}
......@@ -638,6 +634,7 @@ int close_ctree(struct btrfs_root *root)
btrfs_free_block_groups(root->fs_info);
del_fs_roots(fs_info);
extent_map_tree_cleanup(&BTRFS_I(fs_info->btree_inode)->extent_tree);
truncate_inode_pages(fs_info->btree_inode->i_mapping, 0);
iput(fs_info->btree_inode);
kfree(fs_info->extent_root);
......@@ -647,20 +644,20 @@ int close_ctree(struct btrfs_root *root)
int btrfs_buffer_uptodate(struct extent_buffer *buf)
{
struct inode *btree_inode = buf->pages[0]->mapping->host;
struct inode *btree_inode = buf->last_page->mapping->host;
return extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree, buf);
}
int btrfs_set_buffer_uptodate(struct extent_buffer *buf)
{
struct inode *btree_inode = buf->pages[0]->mapping->host;
struct inode *btree_inode = buf->last_page->mapping->host;
return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->extent_tree,
buf);
}
void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
u64 transid = btrfs_header_generation(buf);
struct inode *btree_inode = root->fs_info->btree_inode;
......@@ -681,7 +678,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
void btrfs_set_buffer_defrag(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS);
......@@ -689,7 +686,7 @@ void btrfs_set_buffer_defrag(struct extent_buffer *buf)
void btrfs_set_buffer_defrag_done(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
set_extent_bits(&BTRFS_I(btree_inode)->extent_tree, buf->start,
buf->start + buf->len - 1, EXTENT_DEFRAG_DONE,
......@@ -698,7 +695,7 @@ void btrfs_set_buffer_defrag_done(struct extent_buffer *buf)
int btrfs_buffer_defrag(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0);
......@@ -706,7 +703,7 @@ int btrfs_buffer_defrag(struct extent_buffer *buf)
int btrfs_buffer_defrag_done(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
return test_range_bit(&BTRFS_I(btree_inode)->extent_tree,
buf->start, buf->start + buf->len - 1,
......@@ -715,7 +712,7 @@ int btrfs_buffer_defrag_done(struct extent_buffer *buf)
int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
buf->start, buf->start + buf->len - 1,
......@@ -724,7 +721,7 @@ int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf)
int btrfs_clear_buffer_defrag(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
return clear_extent_bits(&BTRFS_I(btree_inode)->extent_tree,
buf->start, buf->start + buf->len - 1,
......@@ -733,7 +730,7 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf)
int btrfs_read_buffer(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
struct btrfs_root *root = BTRFS_I(buf->last_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
return read_extent_buffer_pages(&BTRFS_I(btree_inode)->extent_tree,
buf, 1);
......
......@@ -1176,7 +1176,6 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
return ERR_PTR(-ENOMEM);
}
btrfs_set_buffer_uptodate(buf);
buf->alloc_addr = (unsigned long)__builtin_return_address(0);
set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
buf->start + buf->len - 1, GFP_NOFS);
btrfs_set_buffer_defrag(buf);
......
......@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/blkdev.h>
#include <linux/swap.h>
#include "extent_map.h"
/* temporary define until extent_map moves out of btrfs */
......@@ -20,14 +21,11 @@ static struct kmem_cache *extent_map_cache;
static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
static LIST_HEAD(extent_buffers);
static LIST_HEAD(buffers);
static LIST_HEAD(states);
static spinlock_t extent_buffers_lock;
static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
static int nr_extent_buffers;
#define MAX_EXTENT_BUFFER_CACHE 128
#define BUFFER_LRU_MAX 64
struct tree_entry {
u64 start;
......@@ -47,20 +45,12 @@ void __init extent_map_init(void)
extent_buffer_cache = btrfs_cache_create("extent_buffers",
sizeof(struct extent_buffer), 0,
NULL);
spin_lock_init(&extent_buffers_lock);
}
void __exit extent_map_exit(void)
{
struct extent_buffer *eb;
struct extent_state *state;
while (!list_empty(&extent_buffers)) {
eb = list_entry(extent_buffers.next,
struct extent_buffer, list);
list_del(&eb->list);
kmem_cache_free(extent_buffer_cache, eb);
}
while (!list_empty(&states)) {
state = list_entry(states.next, struct extent_state, list);
printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs));
......@@ -68,14 +58,6 @@ void __exit extent_map_exit(void)
kmem_cache_free(extent_state_cache, state);
}
while (!list_empty(&buffers)) {
eb = list_entry(buffers.next,
struct extent_buffer, leak_list);
printk("buffer leak start %Lu len %lu return %lX\n", eb->start, eb->len, eb->alloc_addr);
list_del(&eb->leak_list);
kmem_cache_free(extent_buffer_cache, eb);
}
if (extent_map_cache)
kmem_cache_destroy(extent_map_cache);
......@@ -92,10 +74,25 @@ void extent_map_tree_init(struct extent_map_tree *tree,
tree->state.rb_node = NULL;
tree->ops = NULL;
rwlock_init(&tree->lock);
spin_lock_init(&tree->lru_lock);
tree->mapping = mapping;
INIT_LIST_HEAD(&tree->buffer_lru);
tree->lru_size = 0;
}
EXPORT_SYMBOL(extent_map_tree_init);
void extent_map_tree_cleanup(struct extent_map_tree *tree)
{
struct extent_buffer *eb;
while(!list_empty(&tree->buffer_lru)) {
eb = list_entry(tree->buffer_lru.next, struct extent_buffer,
lru);
list_del(&eb->lru);
free_extent_buffer(eb);
}
}
EXPORT_SYMBOL(extent_map_tree_cleanup);
struct extent_map *alloc_extent_map(gfp_t mask)
{
struct extent_map *em;
......@@ -1915,66 +1912,99 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
return (em->block_start + start - em->start) >> inode->i_blkbits;
}
static struct extent_buffer *__alloc_extent_buffer(gfp_t mask)
static int add_lru(struct extent_map_tree *tree, struct extent_buffer *eb)
{
struct extent_buffer *eb = NULL;
spin_lock(&extent_buffers_lock);
if (!list_empty(&extent_buffers)) {
eb = list_entry(extent_buffers.next, struct extent_buffer,
list);
list_del(&eb->list);
WARN_ON(nr_extent_buffers == 0);
nr_extent_buffers--;
}
spin_unlock(&extent_buffers_lock);
if (list_empty(&eb->lru)) {
extent_buffer_get(eb);
list_add(&eb->lru, &tree->buffer_lru);
tree->lru_size++;
if (tree->lru_size >= BUFFER_LRU_MAX) {
struct extent_buffer *rm;
rm = list_entry(tree->buffer_lru.prev,
struct extent_buffer, lru);
tree->lru_size--;
list_del(&rm->lru);
free_extent_buffer(rm);
}
} else
list_move(&eb->lru, &tree->buffer_lru);
return 0;
}
static struct extent_buffer *find_lru(struct extent_map_tree *tree,
u64 start, unsigned long len)
{
struct list_head *lru = &tree->buffer_lru;
struct list_head *cur = lru->next;
struct extent_buffer *eb;
if (eb) {
memset(eb, 0, sizeof(*eb));
} else {
eb = kmem_cache_zalloc(extent_buffer_cache, mask);
}
spin_lock(&extent_buffers_lock);
list_add(&eb->leak_list, &buffers);
spin_unlock(&extent_buffers_lock);
if (list_empty(lru))
return NULL;
return eb;
do {
eb = list_entry(cur, struct extent_buffer, lru);
if (eb->start == start && eb->len == len) {
extent_buffer_get(eb);
return eb;
}
cur = cur->next;
} while (cur != lru);
return NULL;
}
static void __free_extent_buffer(struct extent_buffer *eb)
static inline unsigned long num_extent_pages(u64 start, u64 len)
{
spin_lock(&extent_buffers_lock);
list_del_init(&eb->leak_list);
spin_unlock(&extent_buffers_lock);
if (nr_extent_buffers >= MAX_EXTENT_BUFFER_CACHE) {
kmem_cache_free(extent_buffer_cache, eb);
} else {
spin_lock(&extent_buffers_lock);
list_add(&eb->list, &extent_buffers);
nr_extent_buffers++;
spin_unlock(&extent_buffers_lock);
}
return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
(start >> PAGE_CACHE_SHIFT);
}
static inline struct page *extent_buffer_page(struct extent_buffer *eb, int i)
static inline struct page *extent_buffer_page(struct extent_buffer *eb,
unsigned long i)
{
struct page *p;
if (i < EXTENT_INLINE_PAGES)
return eb->pages[i];
if (i == 0)
return eb->last_page;
i += eb->start >> PAGE_CACHE_SHIFT;
p = find_get_page(eb->pages[0]->mapping, i);
p = find_get_page(eb->last_page->mapping, i);
page_cache_release(p);
return p;
}
static inline unsigned long num_extent_pages(u64 start, u64 len)
static struct extent_buffer *__alloc_extent_buffer(struct extent_map_tree *tree,
u64 start,
unsigned long len,
gfp_t mask)
{
return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
(start >> PAGE_CACHE_SHIFT);
struct extent_buffer *eb = NULL;
spin_lock(&tree->lru_lock);
eb = find_lru(tree, start, len);
if (eb)
goto lru_add;
spin_unlock(&tree->lru_lock);
if (eb) {
memset(eb, 0, sizeof(*eb));
} else {
eb = kmem_cache_zalloc(extent_buffer_cache, mask);
}
INIT_LIST_HEAD(&eb->lru);
eb->start = start;
eb->len = len;
atomic_set(&eb->refs, 1);
spin_lock(&tree->lru_lock);
lru_add:
add_lru(tree, eb);
spin_unlock(&tree->lru_lock);
return eb;
}
static void __free_extent_buffer(struct extent_buffer *eb)
{
kmem_cache_free(extent_buffer_cache, eb);
}
struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
u64 start, unsigned long len,
gfp_t mask)
......@@ -1987,14 +2017,12 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
struct address_space *mapping = tree->mapping;
int uptodate = 0;
eb = __alloc_extent_buffer(mask);
eb = __alloc_extent_buffer(tree, start, len, mask);
if (!eb || IS_ERR(eb))
return NULL;
eb->alloc_addr = (unsigned long)__builtin_return_address(0);
eb->start = start;
eb->len = len;
atomic_set(&eb->refs, 1);
if (eb->flags & EXTENT_BUFFER_FILLED)
return eb;
for (i = 0; i < num_pages; i++, index++) {
p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
......@@ -2008,14 +2036,15 @@ struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
goto fail;
}
set_page_extent_mapped(p);
if (i < EXTENT_INLINE_PAGES)
eb->pages[i] = p;
if (i == 0)
eb->last_page = p;
if (!PageUptodate(p))
uptodate = 0;
unlock_page(p);
}
if (uptodate)
eb->flags |= EXTENT_UPTODATE;
eb->flags |= EXTENT_BUFFER_FILLED;
return eb;
fail:
free_extent_buffer(eb);
......@@ -2035,14 +2064,12 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
struct address_space *mapping = tree->mapping;
int uptodate = 1;
eb = __alloc_extent_buffer(mask);
eb = __alloc_extent_buffer(tree, start, len, mask);
if (!eb || IS_ERR(eb))
return NULL;
eb->alloc_addr = (unsigned long)__builtin_return_address(0);
eb->start = start;
eb->len = len;
atomic_set(&eb->refs, 1);
if (eb->flags & EXTENT_BUFFER_FILLED)
return eb;
for (i = 0; i < num_pages; i++, index++) {
p = find_lock_page(mapping, index);
......@@ -2055,14 +2082,15 @@ struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
goto fail;
}
set_page_extent_mapped(p);
if (i < EXTENT_INLINE_PAGES)
eb->pages[i] = p;
if (i == 0)
eb->last_page = p;
if (!PageUptodate(p))
uptodate = 0;
unlock_page(p);
}
if (uptodate)
eb->flags |= EXTENT_UPTODATE;
eb->flags |= EXTENT_BUFFER_FILLED;
return eb;
fail:
free_extent_buffer(eb);
......@@ -2231,7 +2259,8 @@ int read_extent_buffer_pages(struct extent_map_tree *tree,
ret = -EIO;
}
}
eb->flags |= EXTENT_UPTODATE;
if (!ret)
eb->flags |= EXTENT_UPTODATE;
return ret;
}
EXPORT_SYMBOL(read_extent_buffer_pages);
......
......@@ -16,6 +16,7 @@
#define EXTENT_DELALLOC (1 << 5)
#define EXTENT_DEFRAG (1 << 6)
#define EXTENT_DEFRAG_DONE (1 << 7)
#define EXTENT_BUFFER_FILLED (1 << 8)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
......@@ -33,6 +34,9 @@ struct extent_map_tree {
struct address_space *mapping;
rwlock_t lock;
struct extent_map_ops *ops;
spinlock_t lru_lock;
struct list_head buffer_lru;
int lru_size;
};
/* note, this must start with the same fields as fs/extent_map.c:tree_entry */
......@@ -64,20 +68,17 @@ struct extent_state {
struct list_head list;
};
#define EXTENT_INLINE_PAGES 32
struct extent_buffer {
u64 start;
unsigned long len;
atomic_t refs;
int flags;
struct list_head list;
struct list_head leak_list;
unsigned long alloc_addr;
char *map_token;
char *kaddr;
unsigned long map_start;
unsigned long map_len;
struct page *pages[EXTENT_INLINE_PAGES];
struct page *last_page;
struct list_head lru;
atomic_t refs;
int flags;
};
typedef struct extent_map *(get_extent_t)(struct inode *inode,
......@@ -88,6 +89,7 @@ typedef struct extent_map *(get_extent_t)(struct inode *inode,
void extent_map_tree_init(struct extent_map_tree *tree,
struct address_space *mapping, gfp_t mask);
void extent_map_tree_cleanup(struct extent_map_tree *tree);
struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
u64 start, u64 end);
int add_extent_mapping(struct extent_map_tree *tree,
......
......@@ -443,8 +443,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root,
BUG_ON(ret);
mutex_unlock(&tree_root->fs_info->fs_mutex);
btrfs_btree_balance_dirty(tree_root, nr);
schedule();
cond_resched();
mutex_lock(&tree_root->fs_info->fs_mutex);
}
BUG_ON(ret);
......@@ -471,7 +470,7 @@ static int drop_dirty_roots(struct btrfs_root *tree_root,
mutex_unlock(&tree_root->fs_info->fs_mutex);
btrfs_btree_balance_dirty(tree_root, nr);
schedule();
cond_resched();
}
return ret;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment