Commit eb84ae03 authored by Chris Mason's avatar Chris Mason

Btrfs: Cleanup and comment ordered-data.c

Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 54641bd1
...@@ -1343,7 +1343,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) ...@@ -1343,7 +1343,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
unlock_page(page); unlock_page(page);
page_cache_release(page); page_cache_release(page);
btrfs_wait_ordered_extent(inode, ordered); btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered);
goto again; goto again;
} }
...@@ -2660,6 +2660,10 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) ...@@ -2660,6 +2660,10 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
ordered = btrfs_lookup_ordered_extent(page->mapping->host, ordered = btrfs_lookup_ordered_extent(page->mapping->host,
page_offset(page)); page_offset(page));
if (ordered) { if (ordered) {
/*
* IO on this page will never be started, so we need
* to account for any ordered extents now
*/
clear_extent_bit(tree, page_start, page_end, clear_extent_bit(tree, page_start, page_end,
EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_LOCKED, 1, 0, GFP_NOFS); EXTENT_LOCKED, 1, 0, GFP_NOFS);
...@@ -2732,11 +2736,15 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page) ...@@ -2732,11 +2736,15 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
lock_extent(io_tree, page_start, page_end, GFP_NOFS); lock_extent(io_tree, page_start, page_end, GFP_NOFS);
set_page_extent_mapped(page); set_page_extent_mapped(page);
/*
* we can't set the delalloc bits if there are pending ordered
* extents. Drop our locks and wait for them to finish
*/
ordered = btrfs_lookup_ordered_extent(inode, page_start); ordered = btrfs_lookup_ordered_extent(inode, page_start);
if (ordered) { if (ordered) {
unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
unlock_page(page); unlock_page(page);
btrfs_wait_ordered_extent(inode, ordered); btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered); btrfs_put_ordered_extent(ordered);
goto again; goto again;
} }
......
...@@ -136,6 +136,19 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, ...@@ -136,6 +136,19 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
return ret; return ret;
} }
/* allocate and add a new ordered_extent into the per-inode tree.
* file_offset is the logical offset in the file
*
* start is the disk block number of an extent already reserved in the
* extent allocation tree
*
* len is the length of the extent
*
* This also sets the EXTENT_ORDERED bit on the range in the inode.
*
* The tree is given a single reference on the ordered extent that was
* inserted.
*/
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
u64 start, u64 len) u64 start, u64 len)
{ {
...@@ -152,7 +165,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, ...@@ -152,7 +165,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
entry->file_offset = file_offset; entry->file_offset = file_offset;
entry->start = start; entry->start = start;
entry->len = len; entry->len = len;
entry->inode = inode;
/* one ref for the tree */ /* one ref for the tree */
atomic_set(&entry->refs, 1); atomic_set(&entry->refs, 1);
init_waitqueue_head(&entry->wait); init_waitqueue_head(&entry->wait);
...@@ -167,12 +179,15 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, ...@@ -167,12 +179,15 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
entry_end(entry) - 1, GFP_NOFS); entry_end(entry) - 1, GFP_NOFS);
set_bit(BTRFS_ORDERED_START, &entry->flags);
mutex_unlock(&tree->mutex); mutex_unlock(&tree->mutex);
BUG_ON(node); BUG_ON(node);
return 0; return 0;
} }
/*
* Add a struct btrfs_ordered_sum into the list of checksums to be inserted
* when an ordered extent is finished.
*/
int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum) int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum)
{ {
struct btrfs_ordered_inode_tree *tree; struct btrfs_ordered_inode_tree *tree;
...@@ -182,29 +197,25 @@ int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum) ...@@ -182,29 +197,25 @@ int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum)
tree = &BTRFS_I(inode)->ordered_tree; tree = &BTRFS_I(inode)->ordered_tree;
mutex_lock(&tree->mutex); mutex_lock(&tree->mutex);
node = tree_search(tree, sum->file_offset); node = tree_search(tree, sum->file_offset);
if (!node) {
search_fail:
printk("add ordered sum failed to find a node for inode %lu offset %Lu\n", inode->i_ino, sum->file_offset);
node = rb_first(&tree->tree);
while(node) {
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
printk("entry %Lu %Lu %Lu\n", entry->file_offset, entry->file_offset + entry->len, entry->start);
node = rb_next(node);
}
BUG();
}
BUG_ON(!node); BUG_ON(!node);
entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
if (!offset_in_entry(entry, sum->file_offset)) { BUG_ON(!offset_in_entry(entry, sum->file_offset));
goto search_fail;
}
list_add_tail(&sum->list, &entry->list); list_add_tail(&sum->list, &entry->list);
mutex_unlock(&tree->mutex); mutex_unlock(&tree->mutex);
return 0; return 0;
} }
/*
* this is used to account for finished IO across a given range
* of the file. The IO should not span ordered extents. If
* a given ordered_extent is completely done, 1 is returned, otherwise
* 0.
*
* test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
* to make sure this function only returns 1 once for a given ordered extent.
*/
int btrfs_dec_test_ordered_pending(struct inode *inode, int btrfs_dec_test_ordered_pending(struct inode *inode,
u64 file_offset, u64 io_size) u64 file_offset, u64 io_size)
{ {
...@@ -233,9 +244,6 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, ...@@ -233,9 +244,6 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
ret = test_range_bit(io_tree, entry->file_offset, ret = test_range_bit(io_tree, entry->file_offset,
entry->file_offset + entry->len - 1, entry->file_offset + entry->len - 1,
EXTENT_ORDERED, 0); EXTENT_ORDERED, 0);
if (!test_bit(BTRFS_ORDERED_START, &entry->flags)) {
printk("inode %lu not ready yet for extent %Lu %Lu\n", inode->i_ino, entry->file_offset, entry_end(entry));
}
if (ret == 0) if (ret == 0)
ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
out: out:
...@@ -243,6 +251,10 @@ printk("inode %lu not ready yet for extent %Lu %Lu\n", inode->i_ino, entry->file ...@@ -243,6 +251,10 @@ printk("inode %lu not ready yet for extent %Lu %Lu\n", inode->i_ino, entry->file
return ret == 0; return ret == 0;
} }
/*
* used to drop a reference on an ordered extent. This will free
* the extent if the last reference is dropped
*/
int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
{ {
struct list_head *cur; struct list_head *cur;
...@@ -260,6 +272,10 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) ...@@ -260,6 +272,10 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
return 0; return 0;
} }
/*
* remove an ordered extent from the tree. No references are dropped
* but, anyone waiting on this extent is woken up.
*/
int btrfs_remove_ordered_extent(struct inode *inode, int btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry) struct btrfs_ordered_extent *entry)
{ {
...@@ -277,27 +293,25 @@ int btrfs_remove_ordered_extent(struct inode *inode, ...@@ -277,27 +293,25 @@ int btrfs_remove_ordered_extent(struct inode *inode,
return 0; return 0;
} }
void btrfs_wait_ordered_extent(struct inode *inode, /*
struct btrfs_ordered_extent *entry) * Used to start IO or wait for a given ordered extent to finish.
{ *
u64 start = entry->file_offset; * If wait is one, this effectively waits on page writeback for all the pages
u64 end = start + entry->len - 1; * in the extent, and it waits on the io completion code to insert
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) * metadata into the btree corresponding to the extent
do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE); */
#else void btrfs_start_ordered_extent(struct inode *inode,
do_sync_mapping_range(inode->i_mapping, start, end, struct btrfs_ordered_extent *entry,
SYNC_FILE_RANGE_WRITE); int wait)
#endif
wait_event(entry->wait,
test_bit(BTRFS_ORDERED_COMPLETE, &entry->flags));
}
static void btrfs_start_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry, int wait)
{ {
u64 start = entry->file_offset; u64 start = entry->file_offset;
u64 end = start + entry->len - 1; u64 end = start + entry->len - 1;
/*
* pages in the range can be dirty, clean or writeback. We
* start IO on any dirty ones so the wait doesn't stall waiting
* for pdflush to find them
*/
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE); do_sync_file_range(file, start, end, SYNC_FILE_RANGE_WRITE);
#else #else
...@@ -309,6 +323,9 @@ static void btrfs_start_ordered_extent(struct inode *inode, ...@@ -309,6 +323,9 @@ static void btrfs_start_ordered_extent(struct inode *inode,
&entry->flags)); &entry->flags));
} }
/*
* Used to wait on ordered extents across a large range of bytes.
*/
void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
{ {
u64 end; u64 end;
...@@ -349,31 +366,11 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) ...@@ -349,31 +366,11 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
} }
} }
int btrfs_add_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent *ordered,
u64 start, u64 len)
{
WARN_ON(1);
return 0;
#if 0
int ret;
struct btrfs_ordered_inode_tree *tree;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
tree = &BTRFS_I(inode)->ordered_tree;
mutex_lock(&tree->mutex);
if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) {
ret = -EAGAIN;
goto out;
}
set_extent_ordered(io_tree, start, start + len - 1, GFP_NOFS);
ret = 0;
out:
mutex_unlock(&tree->mutex);
return ret;
#endif
}
/*
* find an ordered extent corresponding to file_offset. return NULL if
* nothing is found, otherwise take a reference on the extent and return it
*/
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
u64 file_offset) u64 file_offset)
{ {
...@@ -397,6 +394,10 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, ...@@ -397,6 +394,10 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
return entry; return entry;
} }
/*
* lookup and return any extent before 'file_offset'. NULL is returned
* if none is found
*/
struct btrfs_ordered_extent * struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset) btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset)
{ {
...@@ -417,6 +418,10 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset) ...@@ -417,6 +418,10 @@ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset)
return entry; return entry;
} }
/*
* After an extent is done, call this to conditionally update the on disk
* i_size. i_size is updated to cover any fully written part of the file.
*/
int btrfs_ordered_update_i_size(struct inode *inode, int btrfs_ordered_update_i_size(struct inode *inode,
struct btrfs_ordered_extent *ordered) struct btrfs_ordered_extent *ordered)
{ {
...@@ -507,6 +512,11 @@ int btrfs_ordered_update_i_size(struct inode *inode, ...@@ -507,6 +512,11 @@ int btrfs_ordered_update_i_size(struct inode *inode,
return 0; return 0;
} }
/*
* search the ordered extents for one corresponding to 'offset' and
* try to find a checksum. This is used because we allow pages to
* be reclaimed before their checksum is actually put into the btree
*/
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum) int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum)
{ {
struct btrfs_ordered_sum *ordered_sum; struct btrfs_ordered_sum *ordered_sum;
......
...@@ -19,12 +19,19 @@ ...@@ -19,12 +19,19 @@
#ifndef __BTRFS_ORDERED_DATA__ #ifndef __BTRFS_ORDERED_DATA__
#define __BTRFS_ORDERED_DATA__ #define __BTRFS_ORDERED_DATA__
/* one of these per inode */
struct btrfs_ordered_inode_tree { struct btrfs_ordered_inode_tree {
struct mutex mutex; struct mutex mutex;
struct rb_root tree; struct rb_root tree;
struct rb_node *last; struct rb_node *last;
}; };
/*
* these are used to collect checksums done just before bios submission.
* They are attached via a list into the ordered extent, and
* checksum items are inserted into the tree after all the blocks in
* the ordered extent are on disk
*/
struct btrfs_sector_sum { struct btrfs_sector_sum {
u64 offset; u64 offset;
u32 sum; u32 sum;
...@@ -34,27 +41,56 @@ struct btrfs_ordered_sum { ...@@ -34,27 +41,56 @@ struct btrfs_ordered_sum {
u64 file_offset; u64 file_offset;
u64 len; u64 len;
struct list_head list; struct list_head list;
/* last field is a variable length array of btrfs_sector_sums */
struct btrfs_sector_sum sums; struct btrfs_sector_sum sums;
}; };
/* bits for the flags field */ /*
* bits for the flags field:
*
* BTRFS_ORDERED_IO_DONE is set when all of the blocks are written.
* It is used to make sure metadata is inserted into the tree only once
* per extent.
*
* BTRFS_ORDERED_COMPLETE is set when the extent is removed from the
* rbtree, just before waking any waiters. It is used to indicate the
* IO is done and any metadata is inserted into the tree.
*/
#define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */ #define BTRFS_ORDERED_IO_DONE 0 /* set when all the pages are written */
#define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */ #define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */
#define BTRFS_ORDERED_START 2 /* set when tree setup */
struct btrfs_ordered_extent { struct btrfs_ordered_extent {
/* logical offset in the file */
u64 file_offset; u64 file_offset;
/* disk byte number */
u64 start; u64 start;
/* length of the extent in bytes */
u64 len; u64 len;
/* flags (described above) */
unsigned long flags; unsigned long flags;
/* reference count */
atomic_t refs; atomic_t refs;
/* list of checksums for insertion when the extent io is done */
struct list_head list; struct list_head list;
struct inode *inode;
/* used to wait for the BTRFS_ORDERED_COMPLETE bit */
wait_queue_head_t wait; wait_queue_head_t wait;
/* our friendly rbtree entry */
struct rb_node rb_node; struct rb_node rb_node;
}; };
/*
* calculates the total size you need to allocate for an ordered sum
* structure spanning 'bytes' in the file
*/
static inline int btrfs_ordered_sum_size(struct btrfs_root *root, u64 bytes) static inline int btrfs_ordered_sum_size(struct btrfs_root *root, u64 bytes)
{ {
unsigned long num_sectors = (bytes + root->sectorsize - 1) / unsigned long num_sectors = (bytes + root->sectorsize - 1) /
...@@ -81,14 +117,11 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, ...@@ -81,14 +117,11 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum); int btrfs_add_ordered_sum(struct inode *inode, struct btrfs_ordered_sum *sum);
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode, struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
u64 file_offset); u64 file_offset);
void btrfs_wait_ordered_extent(struct inode *inode, void btrfs_start_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry); struct btrfs_ordered_extent *entry, int wait);
void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
struct btrfs_ordered_extent * struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
int btrfs_add_ordered_pending(struct inode *inode,
struct btrfs_ordered_extent *ordered,
u64 start, u64 len);
int btrfs_ordered_update_i_size(struct inode *inode, int btrfs_ordered_update_i_size(struct inode *inode,
struct btrfs_ordered_extent *ordered); struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum); int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment