Commit 70dec807 authored by Chris Mason's avatar Chris Mason

Btrfs: extent_io and extent_state optimizations

The end_bio routines are changed to take a pointer to the extent state
struct, and the state tree is walked in order to set/clear appropriate
bits as IO completes.  This greatly reduces the number of rbtree searches
done by the end_bio handlers, and reduces lock contention.

The extent_io releasepage function is changed to avoid expensive searches
for locked state.
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent aadfeb6e
...@@ -274,7 +274,7 @@ int btree_readpage(struct file *file, struct page *page) ...@@ -274,7 +274,7 @@ int btree_readpage(struct file *file, struct page *page)
return extent_read_full_page(tree, page, btree_get_extent); return extent_read_full_page(tree, page, btree_get_extent);
} }
static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) static int btree_releasepage(struct page *page, gfp_t gfp_flags)
{ {
struct extent_io_tree *tree; struct extent_io_tree *tree;
struct extent_map_tree *map; struct extent_map_tree *map;
...@@ -282,7 +282,7 @@ static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags) ...@@ -282,7 +282,7 @@ static int btree_releasepage(struct page *page, gfp_t unused_gfp_flags)
tree = &BTRFS_I(page->mapping->host)->io_tree; tree = &BTRFS_I(page->mapping->host)->io_tree;
map = &BTRFS_I(page->mapping->host)->extent_tree; map = &BTRFS_I(page->mapping->host)->extent_tree;
ret = try_release_extent_mapping(map, tree, page); ret = try_release_extent_mapping(map, tree, page, gfp_flags);
if (ret == 1) { if (ret == 1) {
ClearPagePrivate(page); ClearPagePrivate(page);
set_page_private(page, 0); set_page_private(page, 0);
......
...@@ -27,13 +27,11 @@ static struct kmem_cache *extent_buffer_cache; ...@@ -27,13 +27,11 @@ static struct kmem_cache *extent_buffer_cache;
static LIST_HEAD(buffers); static LIST_HEAD(buffers);
static LIST_HEAD(states); static LIST_HEAD(states);
static spinlock_t state_lock = SPIN_LOCK_UNLOCKED;
#define BUFFER_LRU_MAX 64 #define BUFFER_LRU_MAX 64
struct tree_entry { struct tree_entry {
u64 start; u64 start;
u64 end; u64 end;
int in_tree;
struct rb_node rb_node; struct rb_node rb_node;
}; };
...@@ -69,7 +67,7 @@ void extent_io_exit(void) ...@@ -69,7 +67,7 @@ void extent_io_exit(void)
while (!list_empty(&states)) { while (!list_empty(&states)) {
state = list_entry(states.next, struct extent_state, list); state = list_entry(states.next, struct extent_state, list);
printk("state leak: start %Lu end %Lu state %lu in tree %d refs %d\n", state->start, state->end, state->state, state->in_tree, atomic_read(&state->refs)); printk("state leak: start %Lu end %Lu state %lu in tree %p refs %d\n", state->start, state->end, state->state, state->tree, atomic_read(&state->refs));
list_del(&state->list); list_del(&state->list);
kmem_cache_free(extent_state_cache, state); kmem_cache_free(extent_state_cache, state);
...@@ -87,7 +85,7 @@ void extent_io_tree_init(struct extent_io_tree *tree, ...@@ -87,7 +85,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
tree->state.rb_node = NULL; tree->state.rb_node = NULL;
tree->ops = NULL; tree->ops = NULL;
tree->dirty_bytes = 0; tree->dirty_bytes = 0;
rwlock_init(&tree->lock); spin_lock_init(&tree->lock);
spin_lock_init(&tree->lru_lock); spin_lock_init(&tree->lru_lock);
tree->mapping = mapping; tree->mapping = mapping;
INIT_LIST_HEAD(&tree->buffer_lru); INIT_LIST_HEAD(&tree->buffer_lru);
...@@ -110,18 +108,13 @@ EXPORT_SYMBOL(extent_io_tree_empty_lru); ...@@ -110,18 +108,13 @@ EXPORT_SYMBOL(extent_io_tree_empty_lru);
struct extent_state *alloc_extent_state(gfp_t mask) struct extent_state *alloc_extent_state(gfp_t mask)
{ {
struct extent_state *state; struct extent_state *state;
unsigned long flags;
state = kmem_cache_alloc(extent_state_cache, mask); state = kmem_cache_alloc(extent_state_cache, mask);
if (!state || IS_ERR(state)) if (!state || IS_ERR(state))
return state; return state;
state->state = 0; state->state = 0;
state->in_tree = 0;
state->private = 0; state->private = 0;
state->tree = NULL;
spin_lock_irqsave(&state_lock, flags);
list_add(&state->list, &states);
spin_unlock_irqrestore(&state_lock, flags);
atomic_set(&state->refs, 1); atomic_set(&state->refs, 1);
init_waitqueue_head(&state->wq); init_waitqueue_head(&state->wq);
...@@ -131,14 +124,10 @@ EXPORT_SYMBOL(alloc_extent_state); ...@@ -131,14 +124,10 @@ EXPORT_SYMBOL(alloc_extent_state);
void free_extent_state(struct extent_state *state) void free_extent_state(struct extent_state *state)
{ {
unsigned long flags;
if (!state) if (!state)
return; return;
if (atomic_dec_and_test(&state->refs)) { if (atomic_dec_and_test(&state->refs)) {
WARN_ON(state->in_tree); WARN_ON(state->tree);
spin_lock_irqsave(&state_lock, flags);
list_del(&state->list);
spin_unlock_irqrestore(&state_lock, flags);
kmem_cache_free(extent_state_cache, state); kmem_cache_free(extent_state_cache, state);
} }
} }
...@@ -164,7 +153,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, ...@@ -164,7 +153,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
} }
entry = rb_entry(node, struct tree_entry, rb_node); entry = rb_entry(node, struct tree_entry, rb_node);
entry->in_tree = 1;
rb_link_node(node, parent, p); rb_link_node(node, parent, p);
rb_insert_color(node, root); rb_insert_color(node, root);
return NULL; return NULL;
...@@ -216,8 +204,9 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, ...@@ -216,8 +204,9 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
static inline struct rb_node *tree_search(struct rb_root *root, u64 offset) static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
{ {
struct rb_node *prev; struct rb_node *prev = NULL;
struct rb_node *ret; struct rb_node *ret;
ret = __tree_search(root, offset, &prev, NULL); ret = __tree_search(root, offset, &prev, NULL);
if (!ret) if (!ret)
return prev; return prev;
...@@ -248,7 +237,7 @@ static int merge_state(struct extent_io_tree *tree, ...@@ -248,7 +237,7 @@ static int merge_state(struct extent_io_tree *tree,
if (other->end == state->start - 1 && if (other->end == state->start - 1 &&
other->state == state->state) { other->state == state->state) {
state->start = other->start; state->start = other->start;
other->in_tree = 0; other->tree = NULL;
rb_erase(&other->rb_node, &tree->state); rb_erase(&other->rb_node, &tree->state);
free_extent_state(other); free_extent_state(other);
} }
...@@ -259,7 +248,7 @@ static int merge_state(struct extent_io_tree *tree, ...@@ -259,7 +248,7 @@ static int merge_state(struct extent_io_tree *tree,
if (other->start == state->end + 1 && if (other->start == state->end + 1 &&
other->state == state->state) { other->state == state->state) {
other->start = state->start; other->start = state->start;
state->in_tree = 0; state->tree = NULL;
rb_erase(&state->rb_node, &tree->state); rb_erase(&state->rb_node, &tree->state);
free_extent_state(state); free_extent_state(state);
} }
...@@ -300,6 +289,7 @@ static int insert_state(struct extent_io_tree *tree, ...@@ -300,6 +289,7 @@ static int insert_state(struct extent_io_tree *tree,
free_extent_state(state); free_extent_state(state);
return -EEXIST; return -EEXIST;
} }
state->tree = tree;
merge_state(tree, state); merge_state(tree, state);
return 0; return 0;
} }
...@@ -335,6 +325,7 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, ...@@ -335,6 +325,7 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
free_extent_state(prealloc); free_extent_state(prealloc);
return -EEXIST; return -EEXIST;
} }
prealloc->tree = tree;
return 0; return 0;
} }
...@@ -361,9 +352,9 @@ static int clear_state_bit(struct extent_io_tree *tree, ...@@ -361,9 +352,9 @@ static int clear_state_bit(struct extent_io_tree *tree,
if (wake) if (wake)
wake_up(&state->wq); wake_up(&state->wq);
if (delete || state->state == 0) { if (delete || state->state == 0) {
if (state->in_tree) { if (state->tree) {
rb_erase(&state->rb_node, &tree->state); rb_erase(&state->rb_node, &tree->state);
state->in_tree = 0; state->tree = NULL;
free_extent_state(state); free_extent_state(state);
} else { } else {
WARN_ON(1); WARN_ON(1);
...@@ -404,7 +395,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -404,7 +395,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
return -ENOMEM; return -ENOMEM;
} }
write_lock_irqsave(&tree->lock, flags); spin_lock_irqsave(&tree->lock, flags);
/* /*
* this search will find the extents that end after * this search will find the extents that end after
* our range starts * our range starts
...@@ -434,6 +425,8 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -434,6 +425,8 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
*/ */
if (state->start < start) { if (state->start < start) {
if (!prealloc)
prealloc = alloc_extent_state(GFP_ATOMIC);
err = split_state(tree, state, prealloc, start); err = split_state(tree, state, prealloc, start);
BUG_ON(err == -EEXIST); BUG_ON(err == -EEXIST);
prealloc = NULL; prealloc = NULL;
...@@ -455,6 +448,8 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -455,6 +448,8 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
* on the first half * on the first half
*/ */
if (state->start <= end && state->end > end) { if (state->start <= end && state->end > end) {
if (!prealloc)
prealloc = alloc_extent_state(GFP_ATOMIC);
err = split_state(tree, state, prealloc, end + 1); err = split_state(tree, state, prealloc, end + 1);
BUG_ON(err == -EEXIST); BUG_ON(err == -EEXIST);
...@@ -471,7 +466,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -471,7 +466,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
goto search_again; goto search_again;
out: out:
write_unlock_irqrestore(&tree->lock, flags); spin_unlock_irqrestore(&tree->lock, flags);
if (prealloc) if (prealloc)
free_extent_state(prealloc); free_extent_state(prealloc);
...@@ -480,7 +475,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -480,7 +475,7 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
search_again: search_again:
if (start > end) if (start > end)
goto out; goto out;
write_unlock_irqrestore(&tree->lock, flags); spin_unlock_irqrestore(&tree->lock, flags);
if (mask & __GFP_WAIT) if (mask & __GFP_WAIT)
cond_resched(); cond_resched();
goto again; goto again;
...@@ -492,9 +487,9 @@ static int wait_on_state(struct extent_io_tree *tree, ...@@ -492,9 +487,9 @@ static int wait_on_state(struct extent_io_tree *tree,
{ {
DEFINE_WAIT(wait); DEFINE_WAIT(wait);
prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE); prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
read_unlock_irq(&tree->lock); spin_unlock_irq(&tree->lock);
schedule(); schedule();
read_lock_irq(&tree->lock); spin_lock_irq(&tree->lock);
finish_wait(&state->wq, &wait); finish_wait(&state->wq, &wait);
return 0; return 0;
} }
...@@ -509,7 +504,7 @@ int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) ...@@ -509,7 +504,7 @@ int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
struct extent_state *state; struct extent_state *state;
struct rb_node *node; struct rb_node *node;
read_lock_irq(&tree->lock); spin_lock_irq(&tree->lock);
again: again:
while (1) { while (1) {
/* /*
...@@ -538,13 +533,13 @@ int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits) ...@@ -538,13 +533,13 @@ int wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits)
break; break;
if (need_resched()) { if (need_resched()) {
read_unlock_irq(&tree->lock); spin_unlock_irq(&tree->lock);
cond_resched(); cond_resched();
read_lock_irq(&tree->lock); spin_lock_irq(&tree->lock);
} }
} }
out: out:
read_unlock_irq(&tree->lock); spin_unlock_irq(&tree->lock);
return 0; return 0;
} }
EXPORT_SYMBOL(wait_extent_bit); EXPORT_SYMBOL(wait_extent_bit);
...@@ -589,7 +584,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, ...@@ -589,7 +584,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
return -ENOMEM; return -ENOMEM;
} }
write_lock_irqsave(&tree->lock, flags); spin_lock_irqsave(&tree->lock, flags);
/* /*
* this search will find all the extents that end after * this search will find all the extents that end after
* our range starts. * our range starts.
...@@ -709,7 +704,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, ...@@ -709,7 +704,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
goto search_again; goto search_again;
out: out:
write_unlock_irqrestore(&tree->lock, flags); spin_unlock_irqrestore(&tree->lock, flags);
if (prealloc) if (prealloc)
free_extent_state(prealloc); free_extent_state(prealloc);
...@@ -718,7 +713,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, ...@@ -718,7 +713,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits,
search_again: search_again:
if (start > end) if (start > end)
goto out; goto out;
write_unlock_irqrestore(&tree->lock, flags); spin_unlock_irqrestore(&tree->lock, flags);
if (mask & __GFP_WAIT) if (mask & __GFP_WAIT)
cond_resched(); cond_resched();
goto again; goto again;
...@@ -817,10 +812,6 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) ...@@ -817,10 +812,6 @@ int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
} }
EXPORT_SYMBOL(wait_on_extent_writeback); EXPORT_SYMBOL(wait_on_extent_writeback);
/*
* locks a range in ascending order, waiting for any locked regions
* it hits on the way. [start,end] are inclusive, and this will sleep.
*/
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask)
{ {
int err; int err;
...@@ -896,7 +887,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, ...@@ -896,7 +887,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
struct extent_state *state; struct extent_state *state;
int ret = 1; int ret = 1;
read_lock_irq(&tree->lock); spin_lock_irq(&tree->lock);
/* /*
* this search will find all the extents that end after * this search will find all the extents that end after
* our range starts. * our range starts.
...@@ -919,7 +910,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, ...@@ -919,7 +910,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
break; break;
} }
out: out:
read_unlock_irq(&tree->lock); spin_unlock_irq(&tree->lock);
return ret; return ret;
} }
EXPORT_SYMBOL(find_first_extent_bit); EXPORT_SYMBOL(find_first_extent_bit);
...@@ -933,7 +924,7 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree, ...@@ -933,7 +924,7 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree,
u64 found = 0; u64 found = 0;
u64 total_bytes = 0; u64 total_bytes = 0;
write_lock_irq(&tree->lock); spin_lock_irq(&tree->lock);
/* /*
* this search will find all the extents that end after * this search will find all the extents that end after
* our range starts. * our range starts.
...@@ -976,9 +967,9 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree, ...@@ -976,9 +967,9 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree,
atomic_inc(&state->refs); atomic_inc(&state->refs);
prepare_to_wait(&state->wq, &wait, prepare_to_wait(&state->wq, &wait,
TASK_UNINTERRUPTIBLE); TASK_UNINTERRUPTIBLE);
write_unlock_irq(&tree->lock); spin_unlock_irq(&tree->lock);
schedule(); schedule();
write_lock_irq(&tree->lock); spin_lock_irq(&tree->lock);
finish_wait(&state->wq, &wait); finish_wait(&state->wq, &wait);
free_extent_state(state); free_extent_state(state);
goto search_again; goto search_again;
...@@ -997,7 +988,7 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree, ...@@ -997,7 +988,7 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree,
break; break;
} }
out: out:
write_unlock_irq(&tree->lock); spin_unlock_irq(&tree->lock);
return found; return found;
} }
...@@ -1017,7 +1008,7 @@ u64 count_range_bits(struct extent_io_tree *tree, ...@@ -1017,7 +1008,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
return 0; return 0;
} }
write_lock_irq(&tree->lock); spin_lock_irq(&tree->lock);
if (cur_start == 0 && bits == EXTENT_DIRTY) { if (cur_start == 0 && bits == EXTENT_DIRTY) {
total_bytes = tree->dirty_bytes; total_bytes = tree->dirty_bytes;
goto out; goto out;
...@@ -1050,7 +1041,7 @@ u64 count_range_bits(struct extent_io_tree *tree, ...@@ -1050,7 +1041,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
break; break;
} }
out: out:
write_unlock_irq(&tree->lock); spin_unlock_irq(&tree->lock);
return total_bytes; return total_bytes;
} }
/* /*
...@@ -1122,7 +1113,7 @@ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) ...@@ -1122,7 +1113,7 @@ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
struct extent_state *state; struct extent_state *state;
int ret = 0; int ret = 0;
write_lock_irq(&tree->lock); spin_lock_irq(&tree->lock);
/* /*
* this search will find all the extents that end after * this search will find all the extents that end after
* our range starts. * our range starts.
...@@ -1139,7 +1130,7 @@ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private) ...@@ -1139,7 +1130,7 @@ int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
} }
state->private = private; state->private = private;
out: out:
write_unlock_irq(&tree->lock); spin_unlock_irq(&tree->lock);
return ret; return ret;
} }
...@@ -1149,7 +1140,7 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) ...@@ -1149,7 +1140,7 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
struct extent_state *state; struct extent_state *state;
int ret = 0; int ret = 0;
read_lock_irq(&tree->lock); spin_lock_irq(&tree->lock);
/* /*
* this search will find all the extents that end after * this search will find all the extents that end after
* our range starts. * our range starts.
...@@ -1166,13 +1157,13 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private) ...@@ -1166,13 +1157,13 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
} }
*private = state->private; *private = state->private;
out: out:
read_unlock_irq(&tree->lock); spin_unlock_irq(&tree->lock);
return ret; return ret;
} }
/* /*
* searches a range in the state tree for a given mask. * searches a range in the state tree for a given mask.
* If 'filled' == 1, this returns 1 only if ever extent in the tree * If 'filled' == 1, this returns 1 only if every extent in the tree
* has the bits set. Otherwise, 1 is returned if any bit in the * has the bits set. Otherwise, 1 is returned if any bit in the
* range is found set. * range is found set.
*/ */
...@@ -1184,7 +1175,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -1184,7 +1175,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
int bitset = 0; int bitset = 0;
unsigned long flags; unsigned long flags;
read_lock_irqsave(&tree->lock, flags); spin_lock_irqsave(&tree->lock, flags);
node = tree_search(&tree->state, start); node = tree_search(&tree->state, start);
while (node && start <= end) { while (node && start <= end) {
state = rb_entry(node, struct extent_state, rb_node); state = rb_entry(node, struct extent_state, rb_node);
...@@ -1215,7 +1206,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -1215,7 +1206,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
break; break;
} }
} }
read_unlock_irqrestore(&tree->lock, flags); spin_unlock_irqrestore(&tree->lock, flags);
return bitset; return bitset;
} }
EXPORT_SYMBOL(test_range_bit); EXPORT_SYMBOL(test_range_bit);
...@@ -1282,16 +1273,19 @@ static int end_bio_extent_writepage(struct bio *bio, ...@@ -1282,16 +1273,19 @@ static int end_bio_extent_writepage(struct bio *bio,
{ {
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
struct extent_io_tree *tree = bio->bi_private; struct extent_state *state = bio->bi_private;
struct extent_io_tree *tree = state->tree;
struct rb_node *node;
u64 start; u64 start;
u64 end; u64 end;
u64 cur;
int whole_page; int whole_page;
unsigned long flags;
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
if (bio->bi_size) if (bio->bi_size)
return 1; return 1;
#endif #endif
do { do {
struct page *page = bvec->bv_page; struct page *page = bvec->bv_page;
start = ((u64)page->index << PAGE_CACHE_SHIFT) + start = ((u64)page->index << PAGE_CACHE_SHIFT) +
...@@ -1311,16 +1305,80 @@ static int end_bio_extent_writepage(struct bio *bio, ...@@ -1311,16 +1305,80 @@ static int end_bio_extent_writepage(struct bio *bio,
ClearPageUptodate(page); ClearPageUptodate(page);
SetPageError(page); SetPageError(page);
} }
clear_extent_writeback(tree, start, end, GFP_ATOMIC);
if (tree->ops && tree->ops->writepage_end_io_hook) {
tree->ops->writepage_end_io_hook(page, start, end,
state);
}
/*
* bios can get merged in funny ways, and so we need to
* be careful with the state variable. We know the
* state won't be merged with others because it has
* WRITEBACK set, but we can't be sure each biovec is
* sequential in the file. So, if our cached state
* doesn't match the expected end, search the tree
* for the correct one.
*/
spin_lock_irqsave(&tree->lock, flags);
if (!state || state->end != end) {
state = NULL;
node = __tree_search(&tree->state, start, NULL, NULL);
if (node) {
state = rb_entry(node, struct extent_state,
rb_node);
if (state->end != end ||
!(state->state & EXTENT_WRITEBACK))
state = NULL;
}
if (!state) {
spin_unlock_irqrestore(&tree->lock, flags);
clear_extent_writeback(tree, start,
end, GFP_ATOMIC);
goto next_io;
}
}
cur = end;
while(1) {
struct extent_state *clear = state;
cur = state->start;
node = rb_prev(&state->rb_node);
if (node) {
state = rb_entry(node,
struct extent_state,
rb_node);
} else {
state = NULL;
}
clear_state_bit(tree, clear, EXTENT_WRITEBACK,
1, 0);
if (cur == start)
break;
if (cur < start) {
WARN_ON(1);
break;
}
if (!node)
break;
}
/* before releasing the lock, make sure the next state
* variable has the expected bits set and corresponds
* to the correct offsets in the file
*/
if (state && (state->end + 1 != start ||
!state->state & EXTENT_WRITEBACK)) {
state = NULL;
}
spin_unlock_irqrestore(&tree->lock, flags);
next_io:
if (whole_page) if (whole_page)
end_page_writeback(page); end_page_writeback(page);
else else
check_page_writeback(tree, page); check_page_writeback(tree, page);
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, start, end);
} while (bvec >= bio->bi_io_vec); } while (bvec >= bio->bi_io_vec);
bio_put(bio); bio_put(bio);
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23) #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
return 0; return 0;
...@@ -1347,9 +1405,13 @@ static int end_bio_extent_readpage(struct bio *bio, ...@@ -1347,9 +1405,13 @@ static int end_bio_extent_readpage(struct bio *bio,
{ {
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
struct extent_io_tree *tree = bio->bi_private; struct extent_state *state = bio->bi_private;
struct extent_io_tree *tree = state->tree;
struct rb_node *node;
u64 start; u64 start;
u64 end; u64 end;
u64 cur;
unsigned long flags;
int whole_page; int whole_page;
int ret; int ret;
...@@ -1373,27 +1435,83 @@ static int end_bio_extent_readpage(struct bio *bio, ...@@ -1373,27 +1435,83 @@ static int end_bio_extent_readpage(struct bio *bio,
prefetchw(&bvec->bv_page->flags); prefetchw(&bvec->bv_page->flags);
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end); ret = tree->ops->readpage_end_io_hook(page, start, end,
state);
if (ret) if (ret)
uptodate = 0; uptodate = 0;
} }
if (uptodate) {
set_extent_uptodate(tree, start, end, GFP_ATOMIC);
if (whole_page)
SetPageUptodate(page);
else
check_page_uptodate(tree, page);
} else {
ClearPageUptodate(page);
SetPageError(page);
}
unlock_extent(tree, start, end, GFP_ATOMIC); spin_lock_irqsave(&tree->lock, flags);
if (!state || state->end != end) {
state = NULL;
node = __tree_search(&tree->state, start, NULL, NULL);
if (node) {
state = rb_entry(node, struct extent_state,
rb_node);
if (state->end != end ||
!(state->state & EXTENT_LOCKED))
state = NULL;
}
if (!state) {
spin_unlock_irqrestore(&tree->lock, flags);
set_extent_uptodate(tree, start, end,
GFP_ATOMIC);
unlock_extent(tree, start, end, GFP_ATOMIC);
goto next_io;
}
}
if (whole_page) cur = end;
while(1) {
struct extent_state *clear = state;
cur = state->start;
node = rb_prev(&state->rb_node);
if (node) {
state = rb_entry(node,
struct extent_state,
rb_node);
} else {
state = NULL;
}
clear->state |= EXTENT_UPTODATE;
clear_state_bit(tree, clear, EXTENT_LOCKED,
1, 0);
if (cur == start)
break;
if (cur < start) {
WARN_ON(1);
break;
}
if (!node)
break;
}
/* before releasing the lock, make sure the next state
* variable has the expected bits set and corresponds
* to the correct offsets in the file
*/
if (state && (state->end + 1 != start ||
!state->state & EXTENT_WRITEBACK)) {
state = NULL;
}
spin_unlock_irqrestore(&tree->lock, flags);
next_io:
if (whole_page) {
if (uptodate) {
SetPageUptodate(page);
} else {
ClearPageUptodate(page);
SetPageError(page);
}
unlock_page(page); unlock_page(page);
else } else {
if (uptodate) {
check_page_uptodate(tree, page);
} else {
ClearPageUptodate(page);
SetPageError(page);
}
check_page_locked(tree, page); check_page_locked(tree, page);
}
} while (bvec >= bio->bi_io_vec); } while (bvec >= bio->bi_io_vec);
bio_put(bio); bio_put(bio);
...@@ -1416,7 +1534,8 @@ static int end_bio_extent_preparewrite(struct bio *bio, ...@@ -1416,7 +1534,8 @@ static int end_bio_extent_preparewrite(struct bio *bio,
{ {
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
struct extent_io_tree *tree = bio->bi_private; struct extent_state *state = bio->bi_private;
struct extent_io_tree *tree = state->tree;
u64 start; u64 start;
u64 end; u64 end;
...@@ -1475,6 +1594,29 @@ static int submit_one_bio(int rw, struct bio *bio) ...@@ -1475,6 +1594,29 @@ static int submit_one_bio(int rw, struct bio *bio)
{ {
u64 maxsector; u64 maxsector;
int ret = 0; int ret = 0;
struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
struct page *page = bvec->bv_page;
struct extent_io_tree *tree = bio->bi_private;
struct rb_node *node;
struct extent_state *state;
u64 start;
u64 end;
start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
end = start + bvec->bv_len - 1;
spin_lock_irq(&tree->lock);
node = __tree_search(&tree->state, start, NULL, NULL);
BUG_ON(!node);
state = rb_entry(node, struct extent_state, rb_node);
while(state->end < end) {
node = rb_next(node);
state = rb_entry(node, struct extent_state, rb_node);
}
BUG_ON(state->end != end);
spin_unlock_irq(&tree->lock);
bio->bi_private = state;
bio_get(bio); bio_get(bio);
...@@ -1519,9 +1661,12 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree, ...@@ -1519,9 +1661,12 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
if (!bio) { if (!bio) {
printk("failed to allocate bio nr %d\n", nr); printk("failed to allocate bio nr %d\n", nr);
} }
bio_add_page(bio, page, size, offset); bio_add_page(bio, page, size, offset);
bio->bi_end_io = end_io_func; bio->bi_end_io = end_io_func;
bio->bi_private = tree; bio->bi_private = tree;
if (bio_ret) { if (bio_ret) {
*bio_ret = bio; *bio_ret = bio;
} else { } else {
...@@ -1635,6 +1780,16 @@ static int __extent_read_full_page(struct extent_io_tree *tree, ...@@ -1635,6 +1780,16 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
page_offset += iosize; page_offset += iosize;
continue; continue;
} }
/* we have an inline extent but it didn't get marked up
* to date. Error out
*/
if (block_start == EXTENT_MAP_INLINE) {
SetPageError(page);
unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
cur = cur + iosize;
page_offset += iosize;
continue;
}
ret = 0; ret = 0;
if (tree->ops && tree->ops->readpage_io_hook) { if (tree->ops && tree->ops->readpage_io_hook) {
...@@ -2205,7 +2360,8 @@ EXPORT_SYMBOL(extent_prepare_write); ...@@ -2205,7 +2360,8 @@ EXPORT_SYMBOL(extent_prepare_write);
* map records are removed * map records are removed
*/ */
int try_release_extent_mapping(struct extent_map_tree *map, int try_release_extent_mapping(struct extent_map_tree *map,
struct extent_io_tree *tree, struct page *page) struct extent_io_tree *tree, struct page *page,
gfp_t mask)
{ {
struct extent_map *em; struct extent_map *em;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT; u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
...@@ -2213,30 +2369,42 @@ int try_release_extent_mapping(struct extent_map_tree *map, ...@@ -2213,30 +2369,42 @@ int try_release_extent_mapping(struct extent_map_tree *map,
u64 orig_start = start; u64 orig_start = start;
int ret = 1; int ret = 1;
while (start <= end) { if ((mask & __GFP_WAIT) &&
spin_lock(&map->lock); page->mapping->host->i_size > 16 * 1024 * 1024) {
em = lookup_extent_mapping(map, start, end); while (start <= end) {
if (!em || IS_ERR(em)) { spin_lock(&map->lock);
em = lookup_extent_mapping(map, start, end);
if (!em || IS_ERR(em)) {
spin_unlock(&map->lock);
break;
}
if (em->start != start) {
spin_unlock(&map->lock);
free_extent_map(em);
break;
}
if (!test_range_bit(tree, em->start,
extent_map_end(em) - 1,
EXTENT_LOCKED, 0)) {
remove_extent_mapping(map, em);
/* once for the rb tree */
free_extent_map(em);
}
start = extent_map_end(em);
spin_unlock(&map->lock); spin_unlock(&map->lock);
break;
} /* once for us */
if (!test_range_bit(tree, em->start, extent_map_end(em) - 1,
EXTENT_LOCKED, 0)) {
remove_extent_mapping(map, em);
/* once for the rb tree */
free_extent_map(em); free_extent_map(em);
} }
start = extent_map_end(em);
spin_unlock(&map->lock);
/* once for us */
free_extent_map(em);
} }
if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0)) if (test_range_bit(tree, orig_start, end, EXTENT_IOBITS, 0))
ret = 0; ret = 0;
else else {
if ((mask & GFP_NOFS) == GFP_NOFS)
mask = GFP_NOFS;
clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE, clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE,
1, 1, GFP_NOFS); 1, 1, mask);
}
return ret; return ret;
} }
EXPORT_SYMBOL(try_release_extent_mapping); EXPORT_SYMBOL(try_release_extent_mapping);
...@@ -2553,13 +2721,13 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, ...@@ -2553,13 +2721,13 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
} }
} }
clear_page_dirty_for_io(page); clear_page_dirty_for_io(page);
write_lock_irq(&page->mapping->tree_lock); read_lock_irq(&page->mapping->tree_lock);
if (!PageDirty(page)) { if (!PageDirty(page)) {
radix_tree_tag_clear(&page->mapping->page_tree, radix_tree_tag_clear(&page->mapping->page_tree,
page_index(page), page_index(page),
PAGECACHE_TAG_DIRTY); PAGECACHE_TAG_DIRTY);
} }
write_unlock_irq(&page->mapping->tree_lock); read_unlock_irq(&page->mapping->tree_lock);
unlock_page(page); unlock_page(page);
} }
return 0; return 0;
......
...@@ -23,19 +23,23 @@ ...@@ -23,19 +23,23 @@
#define EXTENT_PAGE_PRIVATE 1 #define EXTENT_PAGE_PRIVATE 1
#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3 #define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3
struct extent_state;
struct extent_io_ops { struct extent_io_ops {
int (*fill_delalloc)(struct inode *inode, u64 start, u64 end); int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
int (*writepage_io_hook)(struct page *page, u64 start, u64 end); int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
int (*readpage_io_hook)(struct page *page, u64 start, u64 end); int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end); int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end); struct extent_state *state);
void (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state);
}; };
struct extent_io_tree { struct extent_io_tree {
struct rb_root state; struct rb_root state;
struct address_space *mapping; struct address_space *mapping;
u64 dirty_bytes; u64 dirty_bytes;
rwlock_t lock; spinlock_t lock;
struct extent_io_ops *ops; struct extent_io_ops *ops;
spinlock_t lru_lock; spinlock_t lru_lock;
struct list_head buffer_lru; struct list_head buffer_lru;
...@@ -45,8 +49,8 @@ struct extent_io_tree { ...@@ -45,8 +49,8 @@ struct extent_io_tree {
struct extent_state { struct extent_state {
u64 start; u64 start;
u64 end; /* inclusive */ u64 end; /* inclusive */
int in_tree;
struct rb_node rb_node; struct rb_node rb_node;
struct extent_io_tree *tree;
wait_queue_head_t wq; wait_queue_head_t wq;
atomic_t refs; atomic_t refs;
unsigned long state; unsigned long state;
...@@ -82,7 +86,8 @@ void extent_io_tree_init(struct extent_io_tree *tree, ...@@ -82,7 +86,8 @@ void extent_io_tree_init(struct extent_io_tree *tree,
struct address_space *mapping, gfp_t mask); struct address_space *mapping, gfp_t mask);
void extent_io_tree_empty_lru(struct extent_io_tree *tree); void extent_io_tree_empty_lru(struct extent_io_tree *tree);
int try_release_extent_mapping(struct extent_map_tree *map, int try_release_extent_mapping(struct extent_map_tree *map,
struct extent_io_tree *tree, struct page *page); struct extent_io_tree *tree, struct page *page,
gfp_t mask);
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
int extent_read_full_page(struct extent_io_tree *tree, struct page *page, int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
......
...@@ -331,7 +331,8 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end) ...@@ -331,7 +331,8 @@ int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
return ret; return ret;
} }
int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state)
{ {
size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
struct inode *inode = page->mapping->host; struct inode *inode = page->mapping->host;
...@@ -347,7 +348,12 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end) ...@@ -347,7 +348,12 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end)
btrfs_test_flag(inode, NODATASUM)) btrfs_test_flag(inode, NODATASUM))
return 0; return 0;
ret = get_state_private(io_tree, start, &private); if (state->start == start) {
private = state->private;
ret = 0;
} else {
ret = get_state_private(io_tree, start, &private);
}
local_irq_save(flags); local_irq_save(flags);
kaddr = kmap_atomic(page, KM_IRQ0); kaddr = kmap_atomic(page, KM_IRQ0);
if (ret) { if (ret) {
...@@ -1830,7 +1836,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) ...@@ -1830,7 +1836,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
} }
struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
size_t page_offset, u64 start, u64 len, size_t pg_offset, u64 start, u64 len,
int create) int create)
{ {
int ret; int ret;
...@@ -1865,7 +1871,10 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, ...@@ -1865,7 +1871,10 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
start, len, em->start, em->len); start, len, em->start, em->len);
WARN_ON(1); WARN_ON(1);
} }
goto out; if (em->block_start == EXTENT_MAP_INLINE && page)
free_extent_map(em);
else
goto out;
} }
em = alloc_extent_map(GFP_NOFS); em = alloc_extent_map(GFP_NOFS);
if (!em) { if (!em) {
...@@ -1930,6 +1939,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, ...@@ -1930,6 +1939,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
em->len = extent_end - extent_start; em->len = extent_end - extent_start;
goto insert; goto insert;
} else if (found_type == BTRFS_FILE_EXTENT_INLINE) { } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
u64 page_start;
unsigned long ptr; unsigned long ptr;
char *map; char *map;
size_t size; size_t size;
...@@ -1959,16 +1969,17 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, ...@@ -1959,16 +1969,17 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
goto out; goto out;
} }
extent_offset = ((u64)page->index << PAGE_CACHE_SHIFT) - page_start = page_offset(page) + pg_offset;
extent_start + page_offset; extent_offset = page_start - extent_start;
copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset, copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
size - extent_offset); size - extent_offset);
em->start = extent_start + extent_offset; em->start = extent_start + extent_offset;
em->len = copy_size; em->len = (copy_size + root->sectorsize - 1) &
~((u64)root->sectorsize - 1);
map = kmap(page); map = kmap(page);
ptr = btrfs_file_extent_inline_start(item) + extent_offset; ptr = btrfs_file_extent_inline_start(item) + extent_offset;
if (create == 0 && !PageUptodate(page)) { if (create == 0 && !PageUptodate(page)) {
read_extent_buffer(leaf, map + page_offset, ptr, read_extent_buffer(leaf, map + pg_offset, ptr,
copy_size); copy_size);
flush_dcache_page(page); flush_dcache_page(page);
} else if (create && PageUptodate(page)) { } else if (create && PageUptodate(page)) {
...@@ -1980,7 +1991,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, ...@@ -1980,7 +1991,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
trans = btrfs_start_transaction(root, 1); trans = btrfs_start_transaction(root, 1);
goto again; goto again;
} }
write_extent_buffer(leaf, map + page_offset, ptr, write_extent_buffer(leaf, map + pg_offset, ptr,
copy_size); copy_size);
btrfs_mark_buffer_dirty(leaf); btrfs_mark_buffer_dirty(leaf);
} }
...@@ -2077,7 +2088,7 @@ btrfs_readpages(struct file *file, struct address_space *mapping, ...@@ -2077,7 +2088,7 @@ btrfs_readpages(struct file *file, struct address_space *mapping,
btrfs_get_extent); btrfs_get_extent);
} }
static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
{ {
struct extent_io_tree *tree; struct extent_io_tree *tree;
struct extent_map_tree *map; struct extent_map_tree *map;
...@@ -2085,7 +2096,7 @@ static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags) ...@@ -2085,7 +2096,7 @@ static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
tree = &BTRFS_I(page->mapping->host)->io_tree; tree = &BTRFS_I(page->mapping->host)->io_tree;
map = &BTRFS_I(page->mapping->host)->extent_tree; map = &BTRFS_I(page->mapping->host)->extent_tree;
ret = try_release_extent_mapping(map, tree, page); ret = try_release_extent_mapping(map, tree, page, gfp_flags);
if (ret == 1) { if (ret == 1) {
ClearPagePrivate(page); ClearPagePrivate(page);
set_page_private(page, 0); set_page_private(page, 0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment