Commit 91adb564 authored by NeilBrown's avatar NeilBrown

md/raid5: refactor raid5 "run"

.. so that the code to create the private data structures is separate.
This will help with future code to change the level of an active
array.
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 34817e8c
...@@ -4164,95 +4164,49 @@ static struct attribute_group raid5_attrs_group = { ...@@ -4164,95 +4164,49 @@ static struct attribute_group raid5_attrs_group = {
.attrs = raid5_attrs, .attrs = raid5_attrs,
}; };
static int run(mddev_t *mddev) static raid5_conf_t *setup_conf(mddev_t *mddev)
{ {
raid5_conf_t *conf; raid5_conf_t *conf;
int raid_disk, memory; int raid_disk, memory;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct disk_info *disk; struct disk_info *disk;
int working_disks = 0;
if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) { if (mddev->new_level != 5
&& mddev->new_level != 4
&& mddev->new_level != 6) {
printk(KERN_ERR "raid5: %s: raid level not set to 4/5/6 (%d)\n", printk(KERN_ERR "raid5: %s: raid level not set to 4/5/6 (%d)\n",
mdname(mddev), mddev->level); mdname(mddev), mddev->new_level);
return -EIO; return ERR_PTR(-EIO);
} }
if ((mddev->level == 5 && !algorithm_valid_raid5(mddev->layout)) || if ((mddev->new_level == 5
(mddev->level == 6 && !algorithm_valid_raid6(mddev->layout))) { && !algorithm_valid_raid5(mddev->new_layout)) ||
(mddev->new_level == 6
&& !algorithm_valid_raid6(mddev->new_layout))) {
printk(KERN_ERR "raid5: %s: layout %d not supported\n", printk(KERN_ERR "raid5: %s: layout %d not supported\n",
mdname(mddev), mddev->layout); mdname(mddev), mddev->new_layout);
return -EIO; return ERR_PTR(-EIO);
} }
if (mddev->new_level == 6 && mddev->raid_disks < 4) {
if (mddev->chunk_size < PAGE_SIZE) { printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n",
printk(KERN_ERR "md/raid5: chunk_size must be at least " mdname(mddev), mddev->raid_disks);
"PAGE_SIZE but %d < %ld\n", return ERR_PTR(-EINVAL);
mddev->chunk_size, PAGE_SIZE);
return -EINVAL;
} }
if (mddev->reshape_position != MaxSector) { if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE) {
/* Check that we can continue the reshape. printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
* Currently only disks can change, it must mddev->new_chunk, mdname(mddev));
* increase, and we must be past the point where return ERR_PTR(-EINVAL);
* a stripe over-writes itself
*/
sector_t here_new, here_old;
int old_disks;
int max_degraded = (mddev->level == 5 ? 1 : 2);
if (mddev->new_level != mddev->level ||
mddev->new_layout != mddev->layout ||
mddev->new_chunk != mddev->chunk_size) {
printk(KERN_ERR "raid5: %s: unsupported reshape "
"required - aborting.\n",
mdname(mddev));
return -EINVAL;
}
if (mddev->delta_disks <= 0) {
printk(KERN_ERR "raid5: %s: unsupported reshape "
"(reduce disks) required - aborting.\n",
mdname(mddev));
return -EINVAL;
}
old_disks = mddev->raid_disks - mddev->delta_disks;
/* reshape_position must be on a new-stripe boundary, and one
* further up in new geometry must map after here in old
* geometry.
*/
here_new = mddev->reshape_position;
if (sector_div(here_new, (mddev->chunk_size>>9)*
(mddev->raid_disks - max_degraded))) {
printk(KERN_ERR "raid5: reshape_position not "
"on a stripe boundary\n");
return -EINVAL;
}
/* here_new is the stripe we will write to */
here_old = mddev->reshape_position;
sector_div(here_old, (mddev->chunk_size>>9)*
(old_disks-max_degraded));
/* here_old is the first stripe that we might need to read
* from */
if (here_new >= here_old) {
/* Reading from the same stripe as writing to - bad */
printk(KERN_ERR "raid5: reshape_position too early for "
"auto-recovery - aborting.\n");
return -EINVAL;
}
printk(KERN_INFO "raid5: reshape will continue\n");
/* OK, we should be able to continue; */
} }
conf = kzalloc(sizeof(raid5_conf_t), GFP_KERNEL);
mddev->private = kzalloc(sizeof (raid5_conf_t), GFP_KERNEL); if (conf == NULL)
if ((conf = mddev->private) == NULL)
goto abort; goto abort;
if (mddev->reshape_position == MaxSector) {
conf->previous_raid_disks = conf->raid_disks = mddev->raid_disks; conf->raid_disks = mddev->raid_disks;
} else { if (mddev->reshape_position == MaxSector)
conf->raid_disks = mddev->raid_disks; conf->previous_raid_disks = mddev->raid_disks;
else
conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks; conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks;
}
conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info), conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info),
GFP_KERNEL); GFP_KERNEL);
...@@ -4264,13 +4218,12 @@ static int run(mddev_t *mddev) ...@@ -4264,13 +4218,12 @@ static int run(mddev_t *mddev)
if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
goto abort; goto abort;
if (mddev->level == 6) { if (mddev->new_level == 6) {
conf->spare_page = alloc_page(GFP_KERNEL); conf->spare_page = alloc_page(GFP_KERNEL);
if (!conf->spare_page) if (!conf->spare_page)
goto abort; goto abort;
} }
spin_lock_init(&conf->device_lock); spin_lock_init(&conf->device_lock);
mddev->queue->queue_lock = &conf->device_lock;
init_waitqueue_head(&conf->wait_for_stripe); init_waitqueue_head(&conf->wait_for_stripe);
init_waitqueue_head(&conf->wait_for_overlap); init_waitqueue_head(&conf->wait_for_overlap);
INIT_LIST_HEAD(&conf->handle_list); INIT_LIST_HEAD(&conf->handle_list);
...@@ -4299,41 +4252,136 @@ static int run(mddev_t *mddev) ...@@ -4299,41 +4252,136 @@ static int run(mddev_t *mddev)
printk(KERN_INFO "raid5: device %s operational as raid" printk(KERN_INFO "raid5: device %s operational as raid"
" disk %d\n", bdevname(rdev->bdev,b), " disk %d\n", bdevname(rdev->bdev,b),
raid_disk); raid_disk);
working_disks++;
} else } else
/* Cannot rely on bitmap to complete recovery */ /* Cannot rely on bitmap to complete recovery */
conf->fullsync = 1; conf->fullsync = 1;
} }
/* conf->chunk_size = mddev->new_chunk;
* 0 for a fully functional array, 1 or 2 for a degraded array. conf->level = mddev->new_level;
*/
mddev->degraded = conf->raid_disks - working_disks;
conf->mddev = mddev;
conf->chunk_size = mddev->chunk_size;
conf->level = mddev->level;
if (conf->level == 6) if (conf->level == 6)
conf->max_degraded = 2; conf->max_degraded = 2;
else else
conf->max_degraded = 1; conf->max_degraded = 1;
conf->algorithm = mddev->layout; conf->algorithm = mddev->new_layout;
conf->max_nr_stripes = NR_STRIPES; conf->max_nr_stripes = NR_STRIPES;
conf->expand_progress = mddev->reshape_position; conf->expand_progress = mddev->reshape_position;
/* device size must be a multiple of chunk size */ memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1); conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
mddev->resync_max_sectors = mddev->dev_sectors; if (grow_stripes(conf, conf->max_nr_stripes)) {
printk(KERN_ERR
"raid5: couldn't allocate %dkB for buffers\n", memory);
goto abort;
} else
printk(KERN_INFO "raid5: allocated %dkB for %s\n",
memory, mdname(mddev));
if (conf->level == 6 && conf->raid_disks < 4) { conf->thread = md_register_thread(raid5d, mddev, "%s_raid5");
printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n", if (!conf->thread) {
mdname(mddev), conf->raid_disks); printk(KERN_ERR
"raid5: couldn't allocate thread for %s\n",
mdname(mddev));
goto abort; goto abort;
} }
if (!conf->chunk_size || conf->chunk_size % 4) {
printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", return conf;
conf->chunk_size, mdname(mddev));
goto abort; abort:
if (conf) {
shrink_stripes(conf);
safe_put_page(conf->spare_page);
kfree(conf->disks);
kfree(conf->stripe_hashtbl);
kfree(conf);
return ERR_PTR(-EIO);
} else
return ERR_PTR(-ENOMEM);
}
static int run(mddev_t *mddev)
{
raid5_conf_t *conf;
int working_disks = 0;
mdk_rdev_t *rdev;
if (mddev->reshape_position != MaxSector) {
/* Check that we can continue the reshape.
* Currently only disks can change, it must
* increase, and we must be past the point where
* a stripe over-writes itself
*/
sector_t here_new, here_old;
int old_disks;
int max_degraded = (mddev->level == 5 ? 1 : 2);
if (mddev->new_level != mddev->level ||
mddev->new_layout != mddev->layout ||
mddev->new_chunk != mddev->chunk_size) {
printk(KERN_ERR "raid5: %s: unsupported reshape "
"required - aborting.\n",
mdname(mddev));
return -EINVAL;
}
if (mddev->delta_disks <= 0) {
printk(KERN_ERR "raid5: %s: unsupported reshape "
"(reduce disks) required - aborting.\n",
mdname(mddev));
return -EINVAL;
}
old_disks = mddev->raid_disks - mddev->delta_disks;
/* reshape_position must be on a new-stripe boundary, and one
* further up in new geometry must map after here in old
* geometry.
*/
here_new = mddev->reshape_position;
if (sector_div(here_new, (mddev->chunk_size>>9)*
(mddev->raid_disks - max_degraded))) {
printk(KERN_ERR "raid5: reshape_position not "
"on a stripe boundary\n");
return -EINVAL;
}
/* here_new is the stripe we will write to */
here_old = mddev->reshape_position;
sector_div(here_old, (mddev->chunk_size>>9)*
(old_disks-max_degraded));
/* here_old is the first stripe that we might need to read
* from */
if (here_new >= here_old) {
/* Reading from the same stripe as writing to - bad */
printk(KERN_ERR "raid5: reshape_position too early for "
"auto-recovery - aborting.\n");
return -EINVAL;
}
printk(KERN_INFO "raid5: reshape will continue\n");
/* OK, we should be able to continue; */
} else {
BUG_ON(mddev->level != mddev->new_level);
BUG_ON(mddev->layout != mddev->new_layout);
BUG_ON(mddev->chunk_size != mddev->new_chunk);
BUG_ON(mddev->delta_disks != 0);
} }
conf = setup_conf(mddev);
if (conf == NULL)
return -EIO;
if (IS_ERR(conf))
return PTR_ERR(conf);
mddev->thread = conf->thread;
conf->thread = NULL;
mddev->private = conf;
/*
* 0 for a fully functional array, 1 or 2 for a degraded array.
*/
list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0 &&
test_bit(In_sync, &rdev->flags))
working_disks++;
mddev->degraded = conf->raid_disks - working_disks;
if (mddev->degraded > conf->max_degraded) { if (mddev->degraded > conf->max_degraded) {
printk(KERN_ERR "raid5: not enough operational devices for %s" printk(KERN_ERR "raid5: not enough operational devices for %s"
" (%d/%d failed)\n", " (%d/%d failed)\n",
...@@ -4341,6 +4389,10 @@ static int run(mddev_t *mddev) ...@@ -4341,6 +4389,10 @@ static int run(mddev_t *mddev)
goto abort; goto abort;
} }
/* device size must be a multiple of chunk size */
mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1);
mddev->resync_max_sectors = mddev->dev_sectors;
if (mddev->degraded > 0 && if (mddev->degraded > 0 &&
mddev->recovery_cp != MaxSector) { mddev->recovery_cp != MaxSector) {
if (mddev->ok_start_degraded) if (mddev->ok_start_degraded)
...@@ -4356,27 +4408,6 @@ static int run(mddev_t *mddev) ...@@ -4356,27 +4408,6 @@ static int run(mddev_t *mddev)
} }
} }
{
mddev->thread = md_register_thread(raid5d, mddev, "%s_raid5");
if (!mddev->thread) {
printk(KERN_ERR
"raid5: couldn't allocate thread for %s\n",
mdname(mddev));
goto abort;
}
}
memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
if (grow_stripes(conf, conf->max_nr_stripes)) {
printk(KERN_ERR
"raid5: couldn't allocate %dkB for buffers\n", memory);
shrink_stripes(conf);
md_unregister_thread(mddev->thread);
goto abort;
} else
printk(KERN_INFO "raid5: allocated %dkB for %s\n",
memory, mdname(mddev));
if (mddev->degraded == 0) if (mddev->degraded == 0)
printk("raid5: raid level %d set %s active with %d out of %d" printk("raid5: raid level %d set %s active with %d out of %d"
" devices, algorithm %d\n", conf->level, mdname(mddev), " devices, algorithm %d\n", conf->level, mdname(mddev),
...@@ -4419,6 +4450,8 @@ static int run(mddev_t *mddev) ...@@ -4419,6 +4450,8 @@ static int run(mddev_t *mddev)
"raid5: failed to create sysfs attributes for %s\n", "raid5: failed to create sysfs attributes for %s\n",
mdname(mddev)); mdname(mddev));
mddev->queue->queue_lock = &conf->device_lock;
mddev->queue->unplug_fn = raid5_unplug_device; mddev->queue->unplug_fn = raid5_unplug_device;
mddev->queue->backing_dev_info.congested_data = mddev; mddev->queue->backing_dev_info.congested_data = mddev;
mddev->queue->backing_dev_info.congested_fn = raid5_congested; mddev->queue->backing_dev_info.congested_fn = raid5_congested;
...@@ -4430,7 +4463,11 @@ static int run(mddev_t *mddev) ...@@ -4430,7 +4463,11 @@ static int run(mddev_t *mddev)
return 0; return 0;
abort: abort:
if (mddev->thread)
md_unregister_thread(mddev->thread);
mddev->thread = NULL;
if (conf) { if (conf) {
shrink_stripes(conf);
print_raid5_conf(conf); print_raid5_conf(conf);
safe_put_page(conf->spare_page); safe_put_page(conf->spare_page);
kfree(conf->disks); kfree(conf->disks);
......
...@@ -386,6 +386,11 @@ struct raid5_private_data { ...@@ -386,6 +386,11 @@ struct raid5_private_data {
int pool_size; /* number of disks in stripeheads in pool */ int pool_size; /* number of disks in stripeheads in pool */
spinlock_t device_lock; spinlock_t device_lock;
struct disk_info *disks; struct disk_info *disks;
/* When taking over an array from a different personality, we store
* the new thread here until we fully activate the array.
*/
struct mdk_thread_s *thread;
}; };
typedef struct raid5_private_data raid5_conf_t; typedef struct raid5_private_data raid5_conf_t;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment