Commit 709ae487 authored by NeilBrown's avatar NeilBrown

md/raid1: add takeover support for raid5->raid1

A 2-device raid5 array can now be converted to raid1.
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 6eef4b21
...@@ -677,6 +677,7 @@ static void raise_barrier(conf_t *conf) ...@@ -677,6 +677,7 @@ static void raise_barrier(conf_t *conf)
static void lower_barrier(conf_t *conf) static void lower_barrier(conf_t *conf)
{ {
unsigned long flags; unsigned long flags;
BUG_ON(conf->barrier <= 0);
spin_lock_irqsave(&conf->resync_lock, flags); spin_lock_irqsave(&conf->resync_lock, flags);
conf->barrier--; conf->barrier--;
spin_unlock_irqrestore(&conf->resync_lock, flags); spin_unlock_irqrestore(&conf->resync_lock, flags);
...@@ -1960,74 +1961,48 @@ static sector_t raid1_size(mddev_t *mddev, sector_t sectors, int raid_disks) ...@@ -1960,74 +1961,48 @@ static sector_t raid1_size(mddev_t *mddev, sector_t sectors, int raid_disks)
return mddev->dev_sectors; return mddev->dev_sectors;
} }
static int run(mddev_t *mddev) static conf_t *setup_conf(mddev_t *mddev)
{ {
conf_t *conf; conf_t *conf;
int i, j, disk_idx; int i;
mirror_info_t *disk; mirror_info_t *disk;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
int err = -ENOMEM;
if (mddev->level != 1) {
printk("raid1: %s: raid level not set to mirroring (%d)\n",
mdname(mddev), mddev->level);
goto out;
}
if (mddev->reshape_position != MaxSector) {
printk("raid1: %s: reshape_position set but not supported\n",
mdname(mddev));
goto out;
}
/*
* copy the already verified devices into our private RAID1
* bookkeeping area. [whatever we allocate in run(),
* should be freed in stop()]
*/
conf = kzalloc(sizeof(conf_t), GFP_KERNEL); conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
mddev->private = conf;
if (!conf) if (!conf)
goto out_no_mem; goto abort;
conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks, conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
GFP_KERNEL); GFP_KERNEL);
if (!conf->mirrors) if (!conf->mirrors)
goto out_no_mem; goto abort;
conf->tmppage = alloc_page(GFP_KERNEL); conf->tmppage = alloc_page(GFP_KERNEL);
if (!conf->tmppage) if (!conf->tmppage)
goto out_no_mem; goto abort;
conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL); conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
if (!conf->poolinfo) if (!conf->poolinfo)
goto out_no_mem; goto abort;
conf->poolinfo->mddev = NULL;
conf->poolinfo->raid_disks = mddev->raid_disks; conf->poolinfo->raid_disks = mddev->raid_disks;
conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc, conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
r1bio_pool_free, r1bio_pool_free,
conf->poolinfo); conf->poolinfo);
if (!conf->r1bio_pool) if (!conf->r1bio_pool)
goto out_no_mem; goto abort;
conf->poolinfo->mddev = mddev; conf->poolinfo->mddev = mddev;
spin_lock_init(&conf->device_lock); spin_lock_init(&conf->device_lock);
mddev->queue->queue_lock = &conf->device_lock;
list_for_each_entry(rdev, &mddev->disks, same_set) { list_for_each_entry(rdev, &mddev->disks, same_set) {
disk_idx = rdev->raid_disk; int disk_idx = rdev->raid_disk;
if (disk_idx >= mddev->raid_disks if (disk_idx >= mddev->raid_disks
|| disk_idx < 0) || disk_idx < 0)
continue; continue;
disk = conf->mirrors + disk_idx; disk = conf->mirrors + disk_idx;
disk->rdev = rdev; disk->rdev = rdev;
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
/* as we don't honour merge_bvec_fn, we must never risk
* violating it, so limit ->max_sector to one PAGE, as
* a one page request is never in violation.
*/
if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
disk->head_position = 0; disk->head_position = 0;
} }
...@@ -2041,8 +2016,7 @@ static int run(mddev_t *mddev) ...@@ -2041,8 +2016,7 @@ static int run(mddev_t *mddev)
bio_list_init(&conf->pending_bio_list); bio_list_init(&conf->pending_bio_list);
bio_list_init(&conf->flushing_bio_list); bio_list_init(&conf->flushing_bio_list);
conf->last_used = -1;
mddev->degraded = 0;
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
disk = conf->mirrors + i; disk = conf->mirrors + i;
...@@ -2050,38 +2024,97 @@ static int run(mddev_t *mddev) ...@@ -2050,38 +2024,97 @@ static int run(mddev_t *mddev)
if (!disk->rdev || if (!disk->rdev ||
!test_bit(In_sync, &disk->rdev->flags)) { !test_bit(In_sync, &disk->rdev->flags)) {
disk->head_position = 0; disk->head_position = 0;
mddev->degraded++;
if (disk->rdev) if (disk->rdev)
conf->fullsync = 1; conf->fullsync = 1;
} else if (conf->last_used < 0)
/*
* The first working device is used as a
* starting point to read balancing.
*/
conf->last_used = i;
} }
}
if (mddev->degraded == conf->raid_disks) { err = -EIO;
if (conf->last_used < 0) {
printk(KERN_ERR "raid1: no operational mirrors for %s\n", printk(KERN_ERR "raid1: no operational mirrors for %s\n",
mdname(mddev)); mdname(mddev));
goto out_free_conf; goto abort;
}
err = -ENOMEM;
conf->thread = md_register_thread(raid1d, mddev, NULL);
if (!conf->thread) {
printk(KERN_ERR
"raid1: couldn't allocate thread for %s\n",
mdname(mddev));
goto abort;
} }
if (conf->raid_disks - mddev->degraded == 1)
mddev->recovery_cp = MaxSector;
return conf;
abort:
if (conf) {
if (conf->r1bio_pool)
mempool_destroy(conf->r1bio_pool);
kfree(conf->mirrors);
safe_put_page(conf->tmppage);
kfree(conf->poolinfo);
kfree(conf);
}
return ERR_PTR(err);
}
static int run(mddev_t *mddev)
{
conf_t *conf;
int i;
mdk_rdev_t *rdev;
if (mddev->level != 1) {
printk("raid1: %s: raid level not set to mirroring (%d)\n",
mdname(mddev), mddev->level);
return -EIO;
}
if (mddev->reshape_position != MaxSector) {
printk("raid1: %s: reshape_position set but not supported\n",
mdname(mddev));
return -EIO;
}
/* /*
* find the first working one and use it as a starting point * copy the already verified devices into our private RAID1
* to read balancing. * bookkeeping area. [whatever we allocate in run(),
* should be freed in stop()]
*/ */
for (j = 0; j < conf->raid_disks && if (mddev->private == NULL)
(!conf->mirrors[j].rdev || conf = setup_conf(mddev);
!test_bit(In_sync, &conf->mirrors[j].rdev->flags)) ; j++) else
/* nothing */; conf = mddev->private;
conf->last_used = j;
if (IS_ERR(conf))
return PTR_ERR(conf);
mddev->thread = md_register_thread(raid1d, mddev, NULL); mddev->queue->queue_lock = &conf->device_lock;
if (!mddev->thread) { list_for_each_entry(rdev, &mddev->disks, same_set) {
printk(KERN_ERR disk_stack_limits(mddev->gendisk, rdev->bdev,
"raid1: couldn't allocate thread for %s\n", rdev->data_offset << 9);
mdname(mddev)); /* as we don't honour merge_bvec_fn, we must never risk
goto out_free_conf; * violating it, so limit ->max_sector to one PAGE, as
* a one page request is never in violation.
*/
if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
queue_max_sectors(mddev->queue) > (PAGE_SIZE>>9))
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
} }
mddev->degraded = 0;
for (i=0; i < conf->raid_disks; i++)
if (conf->mirrors[i].rdev == NULL ||
!test_bit(In_sync, &conf->mirrors[i].rdev->flags) ||
test_bit(Faulty, &conf->mirrors[i].rdev->flags))
mddev->degraded++;
if (conf->raid_disks - mddev->degraded == 1)
mddev->recovery_cp = MaxSector;
if (mddev->recovery_cp != MaxSector) if (mddev->recovery_cp != MaxSector)
printk(KERN_NOTICE "raid1: %s is not clean" printk(KERN_NOTICE "raid1: %s is not clean"
" -- starting background reconstruction\n", " -- starting background reconstruction\n",
...@@ -2090,9 +2123,14 @@ static int run(mddev_t *mddev) ...@@ -2090,9 +2123,14 @@ static int run(mddev_t *mddev)
"raid1: raid set %s active with %d out of %d mirrors\n", "raid1: raid set %s active with %d out of %d mirrors\n",
mdname(mddev), mddev->raid_disks - mddev->degraded, mdname(mddev), mddev->raid_disks - mddev->degraded,
mddev->raid_disks); mddev->raid_disks);
/* /*
* Ok, everything is just fine now * Ok, everything is just fine now
*/ */
mddev->thread = conf->thread;
conf->thread = NULL;
mddev->private = conf;
md_set_array_sectors(mddev, raid1_size(mddev, 0, 0)); md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
mddev->queue->unplug_fn = raid1_unplug; mddev->queue->unplug_fn = raid1_unplug;
...@@ -2100,23 +2138,6 @@ static int run(mddev_t *mddev) ...@@ -2100,23 +2138,6 @@ static int run(mddev_t *mddev)
mddev->queue->backing_dev_info.congested_data = mddev; mddev->queue->backing_dev_info.congested_data = mddev;
md_integrity_register(mddev); md_integrity_register(mddev);
return 0; return 0;
out_no_mem:
printk(KERN_ERR "raid1: couldn't allocate memory for %s\n",
mdname(mddev));
out_free_conf:
if (conf) {
if (conf->r1bio_pool)
mempool_destroy(conf->r1bio_pool);
kfree(conf->mirrors);
safe_put_page(conf->tmppage);
kfree(conf->poolinfo);
kfree(conf);
mddev->private = NULL;
}
out:
return -EIO;
} }
static int stop(mddev_t *mddev) static int stop(mddev_t *mddev)
...@@ -2302,6 +2323,23 @@ static void raid1_quiesce(mddev_t *mddev, int state) ...@@ -2302,6 +2323,23 @@ static void raid1_quiesce(mddev_t *mddev, int state)
} }
} }
static void *raid1_takeover(mddev_t *mddev)
{
/* raid1 can take over:
* raid5 with 2 devices, any layout or chunk size
*/
if (mddev->level == 5 && mddev->raid_disks == 2) {
conf_t *conf;
mddev->new_level = 1;
mddev->new_layout = 0;
mddev->new_chunk_sectors = 0;
conf = setup_conf(mddev);
if (!IS_ERR(conf))
conf->barrier = 1;
return conf;
}
return ERR_PTR(-EINVAL);
}
static struct mdk_personality raid1_personality = static struct mdk_personality raid1_personality =
{ {
...@@ -2321,6 +2359,7 @@ static struct mdk_personality raid1_personality = ...@@ -2321,6 +2359,7 @@ static struct mdk_personality raid1_personality =
.size = raid1_size, .size = raid1_size,
.check_reshape = raid1_reshape, .check_reshape = raid1_reshape,
.quiesce = raid1_quiesce, .quiesce = raid1_quiesce,
.takeover = raid1_takeover,
}; };
static int __init raid_init(void) static int __init raid_init(void)
......
...@@ -59,6 +59,11 @@ struct r1_private_data_s { ...@@ -59,6 +59,11 @@ struct r1_private_data_s {
mempool_t *r1bio_pool; mempool_t *r1bio_pool;
mempool_t *r1buf_pool; mempool_t *r1buf_pool;
/* When taking over an array from a different personality, we store
* the new thread here until we fully activate the array.
*/
struct mdk_thread_s *thread;
}; };
typedef struct r1_private_data_s conf_t; typedef struct r1_private_data_s conf_t;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment