Commit a512bbf8 authored by Yan Zheng's avatar Yan Zheng Committed by Chris Mason

Btrfs: superblock duplication

This patch implements superblock duplication. Superblocks
are stored at offset 16K, 64M and 256G on every devices.
Spaces used by superblocks are preserved by the allocator,
which uses a reverse mapping function to find the logical
addresses that correspond to superblocks. Thank you,
Signed-off-by: default avatarYan Zheng <zheng.yan@oracle.com>
parent d20f7043
......@@ -1595,8 +1595,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info, BTRFS_ROOT_TREE_OBJECTID);
bh = __bread(fs_devices->latest_bdev,
BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
bh = btrfs_read_dev_super(fs_devices->latest_bdev);
if (!bh)
goto fail_iput;
......@@ -1710,7 +1709,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
}
mutex_lock(&fs_info->chunk_mutex);
ret = btrfs_read_sys_array(tree_root);
ret = btrfs_read_sys_array(tree_root, btrfs_super_bytenr(disk_super));
mutex_unlock(&fs_info->chunk_mutex);
if (ret) {
printk("btrfs: failed to read the system array on %s\n",
......@@ -1905,19 +1904,147 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
put_bh(bh);
}
static int write_all_supers(struct btrfs_root *root)
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
{
struct buffer_head *bh;
struct buffer_head *latest = NULL;
struct btrfs_super_block *super;
int i;
u64 transid = 0;
u64 bytenr;
/* we would like to check all the supers, but that would make
* a btrfs mount succeed after a mkfs from a different FS.
* So, we need to add a special mount option to scan for
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
*/
for (i = 0; i < 1; i++) {
bytenr = btrfs_sb_offset(i);
if (bytenr + 4096 >= i_size_read(bdev->bd_inode))
break;
bh = __bread(bdev, bytenr / 4096, 4096);
if (!bh)
continue;
super = (struct btrfs_super_block *)bh->b_data;
if (btrfs_super_bytenr(super) != bytenr ||
strncmp((char *)(&super->magic), BTRFS_MAGIC,
sizeof(super->magic))) {
brelse(bh);
continue;
}
if (!latest || btrfs_super_generation(super) > transid) {
brelse(latest);
latest = bh;
transid = btrfs_super_generation(super);
} else {
brelse(bh);
}
}
return latest;
}
static int write_dev_supers(struct btrfs_device *device,
struct btrfs_super_block *sb,
int do_barriers, int wait, int max_mirrors)
{
struct buffer_head *bh;
int i;
int ret;
int errors = 0;
u32 crc;
u64 bytenr;
int last_barrier = 0;
if (max_mirrors == 0)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
/* make sure only the last submit_bh does a barrier */
if (do_barriers) {
for (i = 0; i < max_mirrors; i++) {
bytenr = btrfs_sb_offset(i);
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
device->total_bytes)
break;
last_barrier = i;
}
}
for (i = 0; i < max_mirrors; i++) {
bytenr = btrfs_sb_offset(i);
if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
break;
if (wait) {
bh = __find_get_block(device->bdev, bytenr / 4096,
BTRFS_SUPER_INFO_SIZE);
BUG_ON(!bh);
brelse(bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh)) {
brelse(bh);
continue;
}
} else {
btrfs_set_super_bytenr(sb, bytenr);
crc = ~(u32)0;
crc = btrfs_csum_data(NULL, (char *)sb +
BTRFS_CSUM_SIZE, crc,
BTRFS_SUPER_INFO_SIZE -
BTRFS_CSUM_SIZE);
btrfs_csum_final(crc, sb->csum);
bh = __getblk(device->bdev, bytenr / 4096,
BTRFS_SUPER_INFO_SIZE);
memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
set_buffer_uptodate(bh);
get_bh(bh);
lock_buffer(bh);
bh->b_end_io = btrfs_end_buffer_write_sync;
}
if (i == last_barrier && do_barriers && device->barriers) {
ret = submit_bh(WRITE_BARRIER, bh);
if (ret == -EOPNOTSUPP) {
printk("btrfs: disabling barriers on dev %s\n",
device->name);
set_buffer_uptodate(bh);
device->barriers = 0;
get_bh(bh);
lock_buffer(bh);
ret = submit_bh(WRITE, bh);
}
} else {
ret = submit_bh(WRITE, bh);
}
if (!ret && wait) {
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
errors++;
} else if (ret) {
errors++;
}
if (wait)
brelse(bh);
}
return errors < i ? 0 : -1;
}
int write_all_supers(struct btrfs_root *root, int max_mirrors)
{
struct list_head *cur;
struct list_head *head = &root->fs_info->fs_devices->devices;
struct btrfs_device *dev;
struct btrfs_super_block *sb;
struct btrfs_dev_item *dev_item;
struct buffer_head *bh;
int ret;
int do_barriers;
int max_errors;
int total_errors = 0;
u32 crc;
u64 flags;
max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1;
......@@ -1944,40 +2071,11 @@ static int write_all_supers(struct btrfs_root *root)
btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE);
flags = btrfs_super_flags(sb);
btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);
crc = ~(u32)0;
crc = btrfs_csum_data(root, (char *)sb + BTRFS_CSUM_SIZE, crc,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
btrfs_csum_final(crc, sb->csum);
bh = __getblk(dev->bdev, BTRFS_SUPER_INFO_OFFSET / 4096,
BTRFS_SUPER_INFO_SIZE);
memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
dev->pending_io = bh;
get_bh(bh);
set_buffer_uptodate(bh);
lock_buffer(bh);
bh->b_end_io = btrfs_end_buffer_write_sync;
if (do_barriers && dev->barriers) {
ret = submit_bh(WRITE_BARRIER, bh);
if (ret == -EOPNOTSUPP) {
printk("btrfs: disabling barriers on dev %s\n",
dev->name);
set_buffer_uptodate(bh);
dev->barriers = 0;
get_bh(bh);
lock_buffer(bh);
ret = submit_bh(WRITE, bh);
}
} else {
ret = submit_bh(WRITE, bh);
}
ret = write_dev_supers(dev, sb, do_barriers, 0, max_mirrors);
if (ret)
total_errors++;
}
......@@ -1985,8 +2083,8 @@ static int write_all_supers(struct btrfs_root *root)
printk("btrfs: %d errors while writing supers\n", total_errors);
BUG();
}
total_errors = 0;
total_errors = 0;
list_for_each(cur, head) {
dev = list_entry(cur, struct btrfs_device, dev_list);
if (!dev->bdev)
......@@ -1994,29 +2092,9 @@ static int write_all_supers(struct btrfs_root *root)
if (!dev->in_fs_metadata || !dev->writeable)
continue;
BUG_ON(!dev->pending_io);
bh = dev->pending_io;
wait_on_buffer(bh);
if (!buffer_uptodate(dev->pending_io)) {
if (do_barriers && dev->barriers) {
printk("btrfs: disabling barriers on dev %s\n",
dev->name);
set_buffer_uptodate(bh);
get_bh(bh);
lock_buffer(bh);
dev->barriers = 0;
ret = submit_bh(WRITE, bh);
BUG_ON(ret);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
total_errors++;
} else {
total_errors++;
}
}
dev->pending_io = NULL;
brelse(bh);
ret = write_dev_supers(dev, sb, do_barriers, 1, max_mirrors);
if (ret)
total_errors++;
}
if (total_errors > max_errors) {
printk("btrfs: %d errors while writing supers\n", total_errors);
......@@ -2025,12 +2103,12 @@ static int write_all_supers(struct btrfs_root *root)
return 0;
}
int write_ctree_super(struct btrfs_trans_handle *trans, struct btrfs_root
*root)
int write_ctree_super(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int max_mirrors)
{
int ret;
ret = write_all_supers(root);
ret = write_all_supers(root, max_mirrors);
return ret;
}
......@@ -2116,7 +2194,7 @@ int btrfs_commit_super(struct btrfs_root *root)
ret = btrfs_write_and_wait_transaction(NULL, root);
BUG_ON(ret);
ret = write_ctree_super(NULL, root);
ret = write_ctree_super(NULL, root, 0);
return ret;
}
......
......@@ -19,8 +19,20 @@
#ifndef __DISKIO__
#define __DISKIO__
#define BTRFS_SUPER_INFO_OFFSET (16 * 1024)
#define BTRFS_SUPER_INFO_OFFSET (64 * 1024)
#define BTRFS_SUPER_INFO_SIZE 4096
#define BTRFS_SUPER_MIRROR_MAX 3
#define BTRFS_SUPER_MIRROR_SHIFT 12
static inline u64 btrfs_sb_offset(int mirror)
{
u64 start = 16 * 1024;
if (mirror)
return start << (BTRFS_SUPER_MIRROR_SHIFT * mirror);
return BTRFS_SUPER_INFO_OFFSET;
}
struct btrfs_device;
struct btrfs_fs_devices;
......@@ -37,7 +49,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
char *options);
int close_ctree(struct btrfs_root *root);
int write_ctree_super(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
struct btrfs_root *root, int max_mirrors);
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
int btrfs_commit_super(struct btrfs_root *root);
struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
u64 bytenr, u32 blocksize);
......
......@@ -189,6 +189,29 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group,
return 0;
}
static int remove_sb_from_cache(struct btrfs_root *root,
struct btrfs_block_group_cache *cache)
{
u64 bytenr;
u64 *logical;
int stripe_len;
int i, nr, ret;
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
bytenr = btrfs_sb_offset(i);
ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
cache->key.objectid, bytenr, 0,
&logical, &nr, &stripe_len);
BUG_ON(ret);
while (nr--) {
btrfs_remove_free_space(cache, logical[nr],
stripe_len);
}
kfree(logical);
}
return 0;
}
static int cache_block_group(struct btrfs_root *root,
struct btrfs_block_group_cache *block_group)
{
......@@ -197,9 +220,7 @@ static int cache_block_group(struct btrfs_root *root,
struct btrfs_key key;
struct extent_buffer *leaf;
int slot;
u64 last = 0;
u64 first_free;
int found = 0;
u64 last = block_group->key.objectid;
if (!block_group)
return 0;
......@@ -220,23 +241,13 @@ static int cache_block_group(struct btrfs_root *root,
* skip the locking here
*/
path->skip_locking = 1;
first_free = max_t(u64, block_group->key.objectid,
BTRFS_SUPER_INFO_OFFSET + BTRFS_SUPER_INFO_SIZE);
key.objectid = block_group->key.objectid;
key.objectid = last;
key.offset = 0;
btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto err;
ret = btrfs_previous_item(root, path, 0, BTRFS_EXTENT_ITEM_KEY);
if (ret < 0)
goto err;
if (ret == 0) {
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid + key.offset > first_free)
first_free = key.objectid + key.offset;
}
while(1) {
leaf = path->nodes[0];
slot = path->slots[0];
......@@ -258,11 +269,6 @@ static int cache_block_group(struct btrfs_root *root,
break;
if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
if (!found) {
last = first_free;
found = 1;
}
add_new_free_space(block_group, root->fs_info, last,
key.objectid);
......@@ -272,13 +278,11 @@ static int cache_block_group(struct btrfs_root *root,
path->slots[0]++;
}
if (!found)
last = first_free;
add_new_free_space(block_group, root->fs_info, last,
block_group->key.objectid +
block_group->key.offset);
remove_sb_from_cache(root, block_group);
block_group->cached = 1;
ret = 0;
err:
......@@ -1974,10 +1978,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
if (alloc) {
old_val += num_bytes;
cache->space_info->bytes_used += num_bytes;
if (cache->ro) {
if (cache->ro)
cache->space_info->bytes_readonly -= num_bytes;
WARN_ON(1);
}
btrfs_set_block_group_used(&cache->item, old_val);
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
......
......@@ -290,7 +290,6 @@ __btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
ret = -EINVAL;
goto out;
}
unlink_free_space(block_group, info);
if (info->bytes == bytes) {
......
......@@ -1038,7 +1038,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
mutex_unlock(&root->fs_info->trans_mutex);
ret = btrfs_write_and_wait_transaction(trans, root);
BUG_ON(ret);
write_ctree_super(trans, root);
write_ctree_super(trans, root, 0);
/*
* the super is written, we can safely allow the tree-loggers
......
......@@ -1996,7 +1996,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
btrfs_set_super_log_root_level(&root->fs_info->super_for_commit,
btrfs_header_level(log->fs_info->log_root_tree->node));
write_ctree_super(trans, log->fs_info->tree_root);
write_ctree_super(trans, log->fs_info->tree_root, 2);
log->fs_info->tree_log_transid++;
log->fs_info->tree_log_batch = 0;
atomic_set(&log->fs_info->tree_log_commit, 0);
......@@ -2006,7 +2006,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
out:
mutex_unlock(&log->fs_info->tree_log_mutex);
return 0;
}
/* * free all the extents used by the tree log. This should be called
......
......@@ -423,15 +423,11 @@ int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
}
set_blocksize(bdev, 4096);
bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
bh = btrfs_read_dev_super(bdev);
if (!bh)
goto error_close;
disk_super = (struct btrfs_super_block *)bh->b_data;
if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
sizeof(disk_super->magic)))
goto error_brelse;
devid = le64_to_cpu(disk_super->dev_item.devid);
if (devid != device->devid)
goto error_brelse;
......@@ -529,17 +525,12 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
ret = set_blocksize(bdev, 4096);
if (ret)
goto error_close;
bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
bh = btrfs_read_dev_super(bdev);
if (!bh) {
ret = -EIO;
goto error_close;
}
disk_super = (struct btrfs_super_block *)bh->b_data;
if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
sizeof(disk_super->magic))) {
ret = -EINVAL;
goto error_brelse;
}
devid = le64_to_cpu(disk_super->dev_item.devid);
transid = btrfs_super_generation(disk_super);
if (disk_super->label[0])
......@@ -553,7 +544,6 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
printk("devid %Lu transid %Lu %s\n", devid, transid, path);
ret = device_list_add(path, disk_super, devid, fs_devices_ret);
error_brelse:
brelse(bh);
error_close:
close_bdev_exclusive(bdev, flags);
......@@ -1016,17 +1006,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
}
set_blocksize(bdev, 4096);
bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
bh = btrfs_read_dev_super(bdev);
if (!bh) {
ret = -EIO;
goto error_close;
}
disk_super = (struct btrfs_super_block *)bh->b_data;
if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
sizeof(disk_super->magic))) {
ret = -ENOENT;
goto error_brelse;
}
devid = le64_to_cpu(disk_super->dev_item.devid);
dev_uuid = disk_super->dev_item.uuid;
device = btrfs_find_device(root, devid, dev_uuid,
......@@ -2563,6 +2548,88 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
mirror_num, NULL);
}
int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
u64 chunk_start, u64 physical, u64 devid,
u64 **logical, int *naddrs, int *stripe_len)
{
struct extent_map_tree *em_tree = &map_tree->map_tree;
struct extent_map *em;
struct map_lookup *map;
u64 *buf;
u64 bytenr;
u64 length;
u64 stripe_nr;
int i, j, nr = 0;
spin_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, chunk_start, 1);
spin_unlock(&em_tree->lock);
BUG_ON(!em || em->start != chunk_start);
map = (struct map_lookup *)em->bdev;
length = em->len;
if (map->type & BTRFS_BLOCK_GROUP_RAID10)
do_div(length, map->num_stripes / map->sub_stripes);
else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
do_div(length, map->num_stripes);
buf = kzalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
BUG_ON(!buf);
for (i = 0; i < map->num_stripes; i++) {
if (devid && map->stripes[i].dev->devid != devid)
continue;
if (map->stripes[i].physical > physical ||
map->stripes[i].physical + length <= physical)
continue;
stripe_nr = physical - map->stripes[i].physical;
do_div(stripe_nr, map->stripe_len);
if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
stripe_nr = stripe_nr * map->num_stripes + i;
do_div(stripe_nr, map->sub_stripes);
} else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
stripe_nr = stripe_nr * map->num_stripes + i;
}
bytenr = chunk_start + stripe_nr * map->stripe_len;
for (j = 0; j < nr; j++) {
if (buf[j] == bytenr)
break;
}
if (j == nr)
buf[nr++] = bytenr;
}
for (i = 0; i > nr; i++) {
struct btrfs_multi_bio *multi;
struct btrfs_bio_stripe *stripe;
int ret;
length = 1;
ret = btrfs_map_block(map_tree, WRITE, buf[i],
&length, &multi, 0);
BUG_ON(ret);
stripe = multi->stripes;
for (j = 0; j < multi->num_stripes; j++) {
if (stripe->physical >= physical &&
physical < stripe->physical + length)
break;
}
BUG_ON(j >= multi->num_stripes);
kfree(multi);
}
*logical = buf;
*naddrs = nr;
*stripe_len = map->stripe_len;
free_extent_map(em);
return 0;
}
int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
u64 logical, struct page *page)
{
......@@ -3003,7 +3070,7 @@ int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
return read_one_dev(root, buf, dev_item);
}
int btrfs_read_sys_array(struct btrfs_root *root)
int btrfs_read_sys_array(struct btrfs_root *root, u64 sb_bytenr)
{
struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
struct extent_buffer *sb;
......@@ -3018,7 +3085,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
u32 cur;
struct btrfs_key key;
sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
sb = btrfs_find_create_tree_block(root, sb_bytenr,
BTRFS_SUPER_INFO_SIZE);
if (!sb)
return -ENOMEM;
......
......@@ -28,7 +28,6 @@ struct btrfs_device {
struct list_head dev_alloc_list;
struct btrfs_fs_devices *fs_devices;
struct btrfs_root *dev_root;
struct buffer_head *pending_io;
struct bio *pending_bios;
struct bio *pending_bio_tail;
int running_pending;
......@@ -125,7 +124,10 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
u64 logical, u64 *length,
struct btrfs_multi_bio **multi_ret, int mirror_num);
int btrfs_read_sys_array(struct btrfs_root *root);
int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree,
u64 chunk_start, u64 physical, u64 devid,
u64 **logical, int *naddrs, int *stripe_len);
int btrfs_read_sys_array(struct btrfs_root *root, u64 sb_bytenr);
int btrfs_read_chunk_tree(struct btrfs_root *root);
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 type);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment