Commit 8dabb742 authored by Stefan Behrens's avatar Stefan Behrens Committed by Josef Bacik

Btrfs: change core code of btrfs to support the device replace operations

This commit contains all the essential changes to the core code
of Btrfs for support of the device replace procedure.
Signed-off-by: default avatarStefan Behrens <sbehrens@giantdisaster.de>
Signed-off-by: default avatarChris Mason <chris.mason@fusionio.com>
parent e93c89c1
...@@ -45,6 +45,7 @@ ...@@ -45,6 +45,7 @@
#include "inode-map.h" #include "inode-map.h"
#include "check-integrity.h" #include "check-integrity.h"
#include "rcu-string.h" #include "rcu-string.h"
#include "dev-replace.h"
#ifdef CONFIG_X86 #ifdef CONFIG_X86
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
...@@ -2438,7 +2439,11 @@ int open_ctree(struct super_block *sb, ...@@ -2438,7 +2439,11 @@ int open_ctree(struct super_block *sb,
goto fail_tree_roots; goto fail_tree_roots;
} }
btrfs_close_extra_devices(fs_devices); /*
* keep the device that is marked to be the target device for the
* dev_replace procedure
*/
btrfs_close_extra_devices(fs_info, fs_devices, 0);
if (!fs_devices->latest_bdev) { if (!fs_devices->latest_bdev) {
printk(KERN_CRIT "btrfs: failed to read devices on %s\n", printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
...@@ -2510,6 +2515,14 @@ int open_ctree(struct super_block *sb, ...@@ -2510,6 +2515,14 @@ int open_ctree(struct super_block *sb,
goto fail_block_groups; goto fail_block_groups;
} }
ret = btrfs_init_dev_replace(fs_info);
if (ret) {
pr_err("btrfs: failed to init dev_replace: %d\n", ret);
goto fail_block_groups;
}
btrfs_close_extra_devices(fs_info, fs_devices, 1);
ret = btrfs_init_space_info(fs_info); ret = btrfs_init_space_info(fs_info);
if (ret) { if (ret) {
printk(KERN_ERR "Failed to initial space info: %d\n", ret); printk(KERN_ERR "Failed to initial space info: %d\n", ret);
...@@ -2658,6 +2671,13 @@ int open_ctree(struct super_block *sb, ...@@ -2658,6 +2671,13 @@ int open_ctree(struct super_block *sb,
return ret; return ret;
} }
ret = btrfs_resume_dev_replace_async(fs_info);
if (ret) {
pr_warn("btrfs: failed to resume dev_replace\n");
close_ctree(tree_root);
return ret;
}
return 0; return 0;
fail_qgroup: fail_qgroup:
...@@ -3300,6 +3320,8 @@ int close_ctree(struct btrfs_root *root) ...@@ -3300,6 +3320,8 @@ int close_ctree(struct btrfs_root *root)
/* pause restriper - we want to resume on mount */ /* pause restriper - we want to resume on mount */
btrfs_pause_balance(fs_info); btrfs_pause_balance(fs_info);
btrfs_dev_replace_suspend_for_unmount(fs_info);
btrfs_scrub_cancel(fs_info); btrfs_scrub_cancel(fs_info);
/* wait for any defraggers to finish */ /* wait for any defraggers to finish */
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "volumes.h" #include "volumes.h"
#include "disk-io.h" #include "disk-io.h"
#include "transaction.h" #include "transaction.h"
#include "dev-replace.h"
#undef DEBUG #undef DEBUG
...@@ -331,6 +332,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -331,6 +332,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
int nzones = 0; int nzones = 0;
int i; int i;
unsigned long index = logical >> PAGE_CACHE_SHIFT; unsigned long index = logical >> PAGE_CACHE_SHIFT;
int dev_replace_is_ongoing;
spin_lock(&fs_info->reada_lock); spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index); re = radix_tree_lookup(&fs_info->reada_tree, index);
...@@ -392,6 +394,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -392,6 +394,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
} }
/* insert extent in reada_tree + all per-device trees, all or nothing */ /* insert extent in reada_tree + all per-device trees, all or nothing */
btrfs_dev_replace_lock(&fs_info->dev_replace);
spin_lock(&fs_info->reada_lock); spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&fs_info->reada_tree, index, re); ret = radix_tree_insert(&fs_info->reada_tree, index, re);
if (ret == -EEXIST) { if (ret == -EEXIST) {
...@@ -399,13 +402,17 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -399,13 +402,17 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
BUG_ON(!re_exist); BUG_ON(!re_exist);
re_exist->refcnt++; re_exist->refcnt++;
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error; goto error;
} }
if (ret) { if (ret) {
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error; goto error;
} }
prev_dev = NULL; prev_dev = NULL;
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
&fs_info->dev_replace);
for (i = 0; i < nzones; ++i) { for (i = 0; i < nzones; ++i) {
dev = bbio->stripes[i].dev; dev = bbio->stripes[i].dev;
if (dev == prev_dev) { if (dev == prev_dev) {
...@@ -422,6 +429,14 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -422,6 +429,14 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
/* cannot read ahead on missing device */ /* cannot read ahead on missing device */
continue; continue;
} }
if (dev_replace_is_ongoing &&
dev == fs_info->dev_replace.tgtdev) {
/*
* as this device is selected for reading only as
* a last resort, skip it for read ahead.
*/
continue;
}
prev_dev = dev; prev_dev = dev;
ret = radix_tree_insert(&dev->reada_extents, index, re); ret = radix_tree_insert(&dev->reada_extents, index, re);
if (ret) { if (ret) {
...@@ -434,10 +449,12 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -434,10 +449,12 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
BUG_ON(fs_info == NULL); BUG_ON(fs_info == NULL);
radix_tree_delete(&fs_info->reada_tree, index); radix_tree_delete(&fs_info->reada_tree, index);
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error; goto error;
} }
} }
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
kfree(bbio); kfree(bbio);
return re; return re;
......
...@@ -2843,12 +2843,17 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, ...@@ -2843,12 +2843,17 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
return -EIO; return -EIO;
} }
if (dev->scrub_device) { btrfs_dev_replace_lock(&fs_info->dev_replace);
if (dev->scrub_device ||
(!is_dev_replace &&
btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
btrfs_dev_replace_unlock(&fs_info->dev_replace);
mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex); mutex_unlock(&fs_info->fs_devices->device_list_mutex);
scrub_workers_put(fs_info); scrub_workers_put(fs_info);
return -EINPROGRESS; return -EINPROGRESS;
} }
btrfs_dev_replace_unlock(&fs_info->dev_replace);
sctx = scrub_setup_ctx(dev, is_dev_replace); sctx = scrub_setup_ctx(dev, is_dev_replace);
if (IS_ERR(sctx)) { if (IS_ERR(sctx)) {
mutex_unlock(&fs_info->scrub_lock); mutex_unlock(&fs_info->scrub_lock);
......
...@@ -55,6 +55,7 @@ ...@@ -55,6 +55,7 @@
#include "export.h" #include "export.h"
#include "compression.h" #include "compression.h"
#include "rcu-string.h" #include "rcu-string.h"
#include "dev-replace.h"
#define CREATE_TRACE_POINTS #define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h> #include <trace/events/btrfs.h>
...@@ -1225,8 +1226,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) ...@@ -1225,8 +1226,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
return 0; return 0;
if (*flags & MS_RDONLY) { if (*flags & MS_RDONLY) {
/*
* this also happens on 'umount -rf' or on shutdown, when
* the filesystem is busy.
*/
sb->s_flags |= MS_RDONLY; sb->s_flags |= MS_RDONLY;
btrfs_dev_replace_suspend_for_unmount(fs_info);
btrfs_scrub_cancel(fs_info);
ret = btrfs_commit_super(root); ret = btrfs_commit_super(root);
if (ret) if (ret)
goto restore; goto restore;
...@@ -1263,6 +1271,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) ...@@ -1263,6 +1271,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (ret) if (ret)
goto restore; goto restore;
ret = btrfs_resume_dev_replace_async(fs_info);
if (ret) {
pr_warn("btrfs: failed to resume dev_replace\n");
goto restore;
}
sb->s_flags &= ~MS_RDONLY; sb->s_flags &= ~MS_RDONLY;
} }
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "tree-log.h" #include "tree-log.h"
#include "inode-map.h" #include "inode-map.h"
#include "volumes.h" #include "volumes.h"
#include "dev-replace.h"
#define BTRFS_ROOT_TRANS_TAG 0 #define BTRFS_ROOT_TRANS_TAG 0
...@@ -845,7 +846,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, ...@@ -845,7 +846,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
return ret; return ret;
ret = btrfs_run_dev_stats(trans, root->fs_info); ret = btrfs_run_dev_stats(trans, root->fs_info);
BUG_ON(ret); WARN_ON(ret);
ret = btrfs_run_dev_replace(trans, root->fs_info);
WARN_ON(ret);
ret = btrfs_run_qgroups(trans, root->fs_info); ret = btrfs_run_qgroups(trans, root->fs_info);
BUG_ON(ret); BUG_ON(ret);
...@@ -868,6 +871,8 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, ...@@ -868,6 +871,8 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
switch_commit_root(fs_info->extent_root); switch_commit_root(fs_info->extent_root);
up_write(&fs_info->extent_commit_sem); up_write(&fs_info->extent_commit_sem);
btrfs_after_dev_replace_commit(fs_info);
return 0; return 0;
} }
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#include "check-integrity.h" #include "check-integrity.h"
#include "rcu-string.h" #include "rcu-string.h"
#include "math.h" #include "math.h"
#include "dev-replace.h"
static int init_first_rw_device(struct btrfs_trans_handle *trans, static int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
...@@ -505,7 +506,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) ...@@ -505,7 +506,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
struct btrfs_fs_devices *fs_devices, int step)
{ {
struct btrfs_device *device, *next; struct btrfs_device *device, *next;
...@@ -528,6 +530,21 @@ void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) ...@@ -528,6 +530,21 @@ void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
continue; continue;
} }
if (device->devid == BTRFS_DEV_REPLACE_DEVID) {
/*
* In the first step, keep the device which has
* the correct fsid and the devid that is used
* for the dev_replace procedure.
* In the second step, the dev_replace state is
* read from the device tree and it is known
* whether the procedure is really active or
* not, which means whether this device is
* used or whether it should be removed.
*/
if (step == 0 || device->is_tgtdev_for_dev_replace) {
continue;
}
}
if (device->bdev) { if (device->bdev) {
blkdev_put(device->bdev, device->mode); blkdev_put(device->bdev, device->mode);
device->bdev = NULL; device->bdev = NULL;
...@@ -536,7 +553,8 @@ void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) ...@@ -536,7 +553,8 @@ void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
if (device->writeable) { if (device->writeable) {
list_del_init(&device->dev_alloc_list); list_del_init(&device->dev_alloc_list);
device->writeable = 0; device->writeable = 0;
fs_devices->rw_devices--; if (!device->is_tgtdev_for_dev_replace)
fs_devices->rw_devices--;
} }
list_del_init(&device->dev_list); list_del_init(&device->dev_list);
fs_devices->num_devices--; fs_devices->num_devices--;
...@@ -594,7 +612,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) ...@@ -594,7 +612,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
if (device->bdev) if (device->bdev)
fs_devices->open_devices--; fs_devices->open_devices--;
if (device->writeable) { if (device->writeable && !device->is_tgtdev_for_dev_replace) {
list_del_init(&device->dev_alloc_list); list_del_init(&device->dev_alloc_list);
fs_devices->rw_devices--; fs_devices->rw_devices--;
} }
...@@ -718,7 +736,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, ...@@ -718,7 +736,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fs_devices->rotating = 1; fs_devices->rotating = 1;
fs_devices->open_devices++; fs_devices->open_devices++;
if (device->writeable) { if (device->writeable && !device->is_tgtdev_for_dev_replace) {
fs_devices->rw_devices++; fs_devices->rw_devices++;
list_add(&device->dev_alloc_list, list_add(&device->dev_alloc_list,
&fs_devices->alloc_list); &fs_devices->alloc_list);
...@@ -1350,16 +1368,22 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) ...@@ -1350,16 +1368,22 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
root->fs_info->avail_system_alloc_bits | root->fs_info->avail_system_alloc_bits |
root->fs_info->avail_metadata_alloc_bits; root->fs_info->avail_metadata_alloc_bits;
if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices = root->fs_info->fs_devices->num_devices;
root->fs_info->fs_devices->num_devices <= 4) { btrfs_dev_replace_lock(&root->fs_info->dev_replace);
if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) {
WARN_ON(num_devices < 1);
num_devices--;
}
btrfs_dev_replace_unlock(&root->fs_info->dev_replace);
if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
printk(KERN_ERR "btrfs: unable to go below four devices " printk(KERN_ERR "btrfs: unable to go below four devices "
"on raid10\n"); "on raid10\n");
ret = -EINVAL; ret = -EINVAL;
goto out; goto out;
} }
if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) {
root->fs_info->fs_devices->num_devices <= 2) {
printk(KERN_ERR "btrfs: unable to go below two " printk(KERN_ERR "btrfs: unable to go below two "
"devices on raid1\n"); "devices on raid1\n");
ret = -EINVAL; ret = -EINVAL;
...@@ -2935,6 +2959,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, ...@@ -2935,6 +2959,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
u64 allowed; u64 allowed;
int mixed = 0; int mixed = 0;
int ret; int ret;
u64 num_devices;
if (btrfs_fs_closing(fs_info) || if (btrfs_fs_closing(fs_info) ||
atomic_read(&fs_info->balance_pause_req) || atomic_read(&fs_info->balance_pause_req) ||
...@@ -2963,10 +2988,17 @@ int btrfs_balance(struct btrfs_balance_control *bctl, ...@@ -2963,10 +2988,17 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
} }
} }
num_devices = fs_info->fs_devices->num_devices;
btrfs_dev_replace_lock(&fs_info->dev_replace);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
BUG_ON(num_devices < 1);
num_devices--;
}
btrfs_dev_replace_unlock(&fs_info->dev_replace);
allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if (fs_info->fs_devices->num_devices == 1) if (num_devices == 1)
allowed |= BTRFS_BLOCK_GROUP_DUP; allowed |= BTRFS_BLOCK_GROUP_DUP;
else if (fs_info->fs_devices->num_devices < 4) else if (num_devices < 4)
allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
else else
allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
...@@ -3591,6 +3623,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ...@@ -3591,6 +3623,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
devices_info[ndevs].total_avail = total_avail; devices_info[ndevs].total_avail = total_avail;
devices_info[ndevs].dev = device; devices_info[ndevs].dev = device;
++ndevs; ++ndevs;
WARN_ON(ndevs > fs_devices->rw_devices);
} }
/* /*
...@@ -4773,6 +4806,7 @@ static void fill_device_from_item(struct extent_buffer *leaf, ...@@ -4773,6 +4806,7 @@ static void fill_device_from_item(struct extent_buffer *leaf,
device->io_align = btrfs_device_io_align(leaf, dev_item); device->io_align = btrfs_device_io_align(leaf, dev_item);
device->io_width = btrfs_device_io_width(leaf, dev_item); device->io_width = btrfs_device_io_width(leaf, dev_item);
device->sector_size = btrfs_device_sector_size(leaf, dev_item); device->sector_size = btrfs_device_sector_size(leaf, dev_item);
WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
device->is_tgtdev_for_dev_replace = 0; device->is_tgtdev_for_dev_replace = 0;
ptr = (unsigned long)btrfs_device_uuid(dev_item); ptr = (unsigned long)btrfs_device_uuid(dev_item);
......
...@@ -268,7 +268,8 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, ...@@ -268,7 +268,8 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
struct btrfs_fs_devices **fs_devices_ret); struct btrfs_fs_devices **fs_devices_ret);
int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices); void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
struct btrfs_fs_devices *fs_devices, int step);
int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
char *device_path, char *device_path,
struct btrfs_device **device); struct btrfs_device **device);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment