Commit f188591e authored by Chris Mason's avatar Chris Mason

Btrfs: Retry metadata reads in the face of checksum failures

Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 22c59948
......@@ -569,7 +569,16 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
static int noinline check_block(struct btrfs_root *root,
struct btrfs_path *path, int level)
{
return 0;
u64 found_start;
if (btrfs_header_level(path->nodes[level]) != level)
printk("warning: bad level %Lu wanted %d found %d\n",
path->nodes[level]->start, level,
btrfs_header_level(path->nodes[level]));
found_start = btrfs_header_bytenr(path->nodes[level]);
if (found_start != path->nodes[level]->start) {
printk("warning: bad bytentr %Lu found %Lu\n",
path->nodes[level]->start, found_start);
}
#if 0
struct extent_buffer *buf = path->nodes[level];
......
......@@ -156,7 +156,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
memcpy(&found, result, BTRFS_CRC32_SIZE);
read_extent_buffer(buf, &val, 0, BTRFS_CRC32_SIZE);
WARN_ON(1);
printk("btrfs: %s checksum verify failed on %llu "
"wanted %X found %X from_this_trans %d "
"level %d\n",
......@@ -171,6 +170,40 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
return 0;
}
static int btree_read_extent_buffer_pages(struct btrfs_root *root,
struct extent_buffer *eb,
u64 start)
{
struct extent_io_tree *io_tree;
int ret;
int num_copies = 0;
int mirror_num = 0;
io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
while (1) {
ret = read_extent_buffer_pages(io_tree, eb, start, 1,
btree_get_extent, mirror_num);
if (!ret) {
if (mirror_num)
printk("good read %Lu mirror %d total %d\n", eb->start, mirror_num, num_copies);
return ret;
}
num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
eb->start, eb->len);
printk("failed to read %Lu mirror %d total %d\n", eb->start, mirror_num, num_copies);
if (num_copies == 1) {
printk("reading %Lu failed only one copy\n", eb->start);
return ret;
}
mirror_num++;
if (mirror_num > num_copies) {
printk("bailing at mirror %d of %d\n", mirror_num, num_copies);
return ret;
}
}
printk("read extent buffer page last\n");
return -EIO;
}
int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
{
......@@ -180,6 +213,8 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
int found_level;
unsigned long len;
struct extent_buffer *eb;
int ret;
tree = &BTRFS_I(page->mapping->host)->io_tree;
if (page->private == EXTENT_PAGE_PRIVATE)
......@@ -191,8 +226,8 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
WARN_ON(1);
}
eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1,
btree_get_extent);
ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE);
BUG_ON(ret);
btrfs_clear_buffer_defrag(eb);
found_start = btrfs_header_bytenr(eb);
if (found_start != start) {
......@@ -240,7 +275,7 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
unsigned long len;
struct extent_buffer *eb;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
int ret;
int ret = 0;
tree = &BTRFS_I(page->mapping->host)->io_tree;
if (page->private == EXTENT_PAGE_PRIVATE)
......@@ -252,25 +287,26 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
WARN_ON(1);
}
eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);
read_extent_buffer_pages(tree, eb, start + PAGE_CACHE_SIZE, 1,
btree_get_extent);
btrfs_clear_buffer_defrag(eb);
found_start = btrfs_header_bytenr(eb);
if (found_start != start) {
printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n",
start, found_start, len);
WARN_ON(1);
printk("bad start on %Lu found %Lu\n", eb->start, found_start);
ret = -EIO;
goto err;
}
if (eb->first_page != page) {
printk("bad first page %lu %lu\n", eb->first_page->index,
page->index);
WARN_ON(1);
ret = -EIO;
goto err;
}
found_level = btrfs_header_level(eb);
ret = csum_tree_block(root, eb, 1);
if (ret)
ret = -EIO;
end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
end = eb->start + end - 1;
......@@ -278,7 +314,7 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
err:
free_extent_buffer(eb);
out:
return 0;
return ret;
}
#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
......@@ -329,7 +365,8 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
return 0;
}
static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio)
static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
int mirror_num)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 offset;
......@@ -338,7 +375,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio)
offset = bio->bi_sector << 9;
if (rw & (1 << BIO_RW)) {
return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio);
return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num);
}
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 1);
......@@ -349,7 +386,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio)
submit_bio(rw, bio);
return 0;
}
return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio);
return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num);
}
static int btree_writepage(struct page *page, struct writeback_control *wbc)
......@@ -459,7 +496,7 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize)
if (!buf)
return 0;
read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
buf, 0, 0, btree_get_extent);
buf, 0, 0, btree_get_extent, 0);
free_extent_buffer(buf);
return ret;
}
......@@ -522,8 +559,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
if (!buf)
return NULL;
ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, buf, 0,
1, btree_get_extent);
ret = btree_read_extent_buffer_pages(root, buf, 0);
if (ret == 0) {
buf->flags |= EXTENT_UPTODATE;
......@@ -1366,10 +1402,8 @@ int btrfs_clear_buffer_defrag(struct extent_buffer *buf)
int btrfs_read_buffer(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
int ret;
ret = read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
buf, 0, 1, btree_get_extent);
ret = btree_read_extent_buffer_pages(root, buf, 0);
if (ret == 0) {
buf->flags |= EXTENT_UPTODATE;
}
......
......@@ -1546,7 +1546,7 @@ static int end_bio_extent_readpage(struct bio *bio,
!(state->state & EXTENT_LOCKED))
state = NULL;
}
if (!state) {
if (!state && uptodate) {
spin_unlock_irqrestore(&tree->lock, flags);
set_extent_uptodate(tree, start, end,
GFP_ATOMIC);
......@@ -1567,8 +1567,10 @@ static int end_bio_extent_readpage(struct bio *bio,
} else {
state = NULL;
}
if (uptodate) {
set_state_cb(tree, clear, EXTENT_UPTODATE);
clear->state |= EXTENT_UPTODATE;
}
clear_state_bit(tree, clear, EXTENT_LOCKED,
1, 0);
if (cur == start)
......@@ -1685,7 +1687,7 @@ extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
return bio;
}
static int submit_one_bio(int rw, struct bio *bio)
static int submit_one_bio(int rw, struct bio *bio, int mirror_num)
{
u64 maxsector;
int ret = 0;
......@@ -1722,7 +1724,8 @@ static int submit_one_bio(int rw, struct bio *bio)
WARN_ON(1);
}
if (tree->ops && tree->ops->submit_bio_hook)
tree->ops->submit_bio_hook(page->mapping->host, rw, bio);
tree->ops->submit_bio_hook(page->mapping->host, rw, bio,
mirror_num);
else
submit_bio(rw, bio);
if (bio_flagged(bio, BIO_EOPNOTSUPP))
......@@ -1737,7 +1740,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
struct block_device *bdev,
struct bio **bio_ret,
unsigned long max_pages,
bio_end_io_t end_io_func)
bio_end_io_t end_io_func,
int mirror_num)
{
int ret = 0;
struct bio *bio;
......@@ -1749,7 +1753,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
(tree->ops && tree->ops->merge_bio_hook &&
tree->ops->merge_bio_hook(page, offset, size, bio)) ||
bio_add_page(bio, page, size, offset) < size) {
ret = submit_one_bio(rw, bio);
ret = submit_one_bio(rw, bio, mirror_num);
bio = NULL;
} else {
return 0;
......@@ -1769,7 +1773,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
if (bio_ret) {
*bio_ret = bio;
} else {
ret = submit_one_bio(rw, bio);
ret = submit_one_bio(rw, bio, mirror_num);
}
return ret;
......@@ -1798,7 +1802,7 @@ void set_page_extent_head(struct page *page, unsigned long len)
static int __extent_read_full_page(struct extent_io_tree *tree,
struct page *page,
get_extent_t *get_extent,
struct bio **bio)
struct bio **bio, int mirror_num)
{
struct inode *inode = page->mapping->host;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
......@@ -1901,7 +1905,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
ret = submit_extent_page(READ, tree, page,
sector, iosize, page_offset,
bdev, bio, nr,
end_bio_extent_readpage);
end_bio_extent_readpage, mirror_num);
}
if (ret)
SetPageError(page);
......@@ -1923,9 +1927,9 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
struct bio *bio = NULL;
int ret;
ret = __extent_read_full_page(tree, page, get_extent, &bio);
ret = __extent_read_full_page(tree, page, get_extent, &bio, 0);
if (bio)
submit_one_bio(READ, bio);
submit_one_bio(READ, bio, 0);
return ret;
}
EXPORT_SYMBOL(extent_read_full_page);
......@@ -2077,7 +2081,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
ret = submit_extent_page(WRITE, tree, page, sector,
iosize, page_offset, bdev,
&epd->bio, max_nr,
end_bio_extent_writepage);
end_bio_extent_writepage, 0);
if (ret)
SetPageError(page);
}
......@@ -2244,7 +2248,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page,
write_cache_pages(mapping, &wbc_writepages, __extent_writepage, &epd);
if (epd.bio) {
submit_one_bio(WRITE, epd.bio);
submit_one_bio(WRITE, epd.bio, 0);
}
return ret;
}
......@@ -2265,7 +2269,7 @@ int extent_writepages(struct extent_io_tree *tree,
ret = write_cache_pages(mapping, wbc, __extent_writepage, &epd);
if (epd.bio) {
submit_one_bio(WRITE, epd.bio);
submit_one_bio(WRITE, epd.bio, 0);
}
return ret;
}
......@@ -2297,7 +2301,8 @@ int extent_readpages(struct extent_io_tree *tree,
page_cache_get(page);
if (!pagevec_add(&pvec, page))
__pagevec_lru_add(&pvec);
__extent_read_full_page(tree, page, get_extent, &bio);
__extent_read_full_page(tree, page, get_extent,
&bio, 0);
}
page_cache_release(page);
}
......@@ -2305,7 +2310,7 @@ int extent_readpages(struct extent_io_tree *tree,
__pagevec_lru_add(&pvec);
BUG_ON(!list_empty(pages));
if (bio)
submit_one_bio(READ, bio);
submit_one_bio(READ, bio, 0);
return 0;
}
EXPORT_SYMBOL(extent_readpages);
......@@ -2430,7 +2435,7 @@ int extent_prepare_write(struct extent_io_tree *tree,
ret = submit_extent_page(READ, tree, page,
sector, iosize, page_offset, em->bdev,
NULL, 1,
end_bio_extent_preparewrite);
end_bio_extent_preparewrite, 0);
iocount++;
block_start = block_start + iosize;
} else {
......@@ -2696,6 +2701,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
mark_page_accessed(page0);
set_page_extent_mapped(page0);
set_page_extent_head(page0, len);
uptodate = PageUptodate(page0);
} else {
i = 0;
}
......@@ -3006,7 +3012,7 @@ EXPORT_SYMBOL(extent_buffer_uptodate);
int read_extent_buffer_pages(struct extent_io_tree *tree,
struct extent_buffer *eb,
u64 start, int wait,
get_extent_t *get_extent)
get_extent_t *get_extent, int mirror_num)
{
unsigned long i;
unsigned long start_i;
......@@ -3062,8 +3068,10 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
if (!PageUptodate(page)) {
if (start_i == 0)
inc_all_pages = 1;
ClearPageError(page);
err = __extent_read_full_page(tree, page,
get_extent, &bio);
get_extent, &bio,
mirror_num);
if (err) {
ret = err;
}
......@@ -3073,7 +3081,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
}
if (bio)
submit_one_bio(READ, bio);
submit_one_bio(READ, bio, mirror_num);
if (ret || !wait) {
return ret;
......
......@@ -27,7 +27,8 @@ struct extent_state;
struct extent_io_ops {
int (*fill_delalloc)(struct inode *inode, u64 start, u64 end);
int (*writepage_io_hook)(struct page *page, u64 start, u64 end);
int (*submit_bio_hook)(struct inode *inode, int rw, struct bio *bio);
int (*submit_bio_hook)(struct inode *inode, int rw, struct bio *bio,
int mirror_num);
int (*merge_bio_hook)(struct page *page, unsigned long offset,
size_t size, struct bio *bio);
int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
......@@ -172,7 +173,7 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
void free_extent_buffer(struct extent_buffer *eb);
int read_extent_buffer_pages(struct extent_io_tree *tree,
struct extent_buffer *eb, u64 start, int wait,
get_extent_t *get_extent);
get_extent_t *get_extent, int mirror_num);
static inline void extent_buffer_get(struct extent_buffer *eb)
{
......
......@@ -314,7 +314,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
map_tree = &root->fs_info->mapping_tree;
map_length = length;
ret = btrfs_map_block(map_tree, READ, logical,
&map_length, NULL);
&map_length, NULL, 0);
if (map_length < length + size) {
return 1;
......@@ -322,7 +322,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
return 0;
}
int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio)
int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
int mirror_num)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
......@@ -347,7 +348,7 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio)
BUG_ON(ret);
mutex_unlock(&root->fs_info->fs_mutex);
mapit:
return btrfs_map_bio(root, rw, bio);
return btrfs_map_bio(root, rw, bio, mirror_num);
}
int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
......
......@@ -788,9 +788,31 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
}
}
int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
{
struct extent_map *em;
struct map_lookup *map;
struct extent_map_tree *em_tree = &map_tree->map_tree;
int ret;
spin_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, logical, len);
BUG_ON(!em);
BUG_ON(em->start > logical || em->start + em->len < logical);
map = (struct map_lookup *)em->bdev;
if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
ret = map->num_stripes;
else
ret = 1;
free_extent_map(em);
spin_unlock(&em_tree->lock);
return ret;
}
int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
u64 logical, u64 *length,
struct btrfs_multi_bio **multi_ret)
struct btrfs_multi_bio **multi_ret, int mirror_num)
{
struct extent_map *em;
struct map_lookup *map;
......@@ -822,6 +844,9 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
map = (struct map_lookup *)em->bdev;
offset = logical - em->start;
if (mirror_num > map->num_stripes)
mirror_num = 0;
/* if our multi bio struct is too small, back off and try again */
if (multi_ret && (rw & (1 << BIO_RW)) &&
stripes_allocated < map->num_stripes &&
......@@ -862,7 +887,9 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
if (rw & (1 << BIO_RW))
multi->num_stripes = map->num_stripes;
else {
else if (mirror_num) {
stripe_index = mirror_num - 1;
} else {
int i;
u64 least = (u64)-1;
struct btrfs_device *cur;
......@@ -880,6 +907,8 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
if (rw & (1 << BIO_RW))
multi->num_stripes = map->num_stripes;
else if (mirror_num)
stripe_index = mirror_num - 1;
} else {
/*
* after this do_div call, stripe_nr is the number of stripes
......@@ -938,7 +967,8 @@ static int end_bio_multi_stripe(struct bio *bio,
#endif
}
int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio)
int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
int mirror_num)
{
struct btrfs_mapping_tree *map_tree;
struct btrfs_device *dev;
......@@ -960,7 +990,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio)
map_tree = &root->fs_info->mapping_tree;
map_length = length;
ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi);
ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi,
mirror_num);
BUG_ON(ret);
total_devs = multi->num_stripes;
......
......@@ -93,7 +93,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
u64 owner, u64 num_bytes, u64 *start);
int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
u64 logical, u64 *length,
struct btrfs_multi_bio **multi_ret);
struct btrfs_multi_bio **multi_ret, int mirror_num);
int btrfs_read_sys_array(struct btrfs_root *root);
int btrfs_read_chunk_tree(struct btrfs_root *root);
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
......@@ -101,7 +101,8 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
u64 *num_bytes, u64 type);
void btrfs_mapping_init(struct btrfs_mapping_tree *tree);
void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree);
int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio);
int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
int mirror_num);
int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf);
int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
int flags, void *holder);
......@@ -112,4 +113,5 @@ int btrfs_add_device(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_device *device);
int btrfs_cleanup_fs_uuids(void);
int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len);
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment