Commit d109d34c authored by Neil Brown's avatar Neil Brown Committed by Trond Myklebust

[PATCH] MD - Don't maintain disc status in superblock.

Don't maintain disc status in superblock.

The state is now in rdev so we don't maintain it
in superblock any more.
We also nolonger test content of superblock for
disk status
mddev->spare is now an rdev and not a superblock fragment.
parent 1b114450
This diff is collapsed.
......@@ -214,15 +214,8 @@ static void mark_disk_bad (mddev_t *mddev, int failed)
{
multipath_conf_t *conf = mddev_to_conf(mddev);
struct multipath_info *multipath = conf->multipaths+failed;
mdp_super_t *sb = mddev->sb;
multipath->operational = 0;
mark_disk_faulty(sb->disks+multipath->number);
mark_disk_nonsync(sb->disks+multipath->number);
mark_disk_inactive(sb->disks+multipath->number);
sb->active_disks--;
sb->working_disks--;
sb->failed_disks++;
mddev->sb_dirty = 1;
conf->working_disks--;
printk (DISK_FAILED, bdev_partition_name (multipath->bdev),
......@@ -296,30 +289,23 @@ static void print_multipath_conf (multipath_conf_t *conf)
}
static int multipath_add_disk(mddev_t *mddev, mdp_disk_t *added_desc,
mdk_rdev_t *rdev)
static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
{
multipath_conf_t *conf = mddev->private;
int err = 1;
int i;
struct multipath_info *p = conf->multipaths + rdev->raid_disk;
print_multipath_conf(conf);
spin_lock_irq(&conf->device_lock);
for (i = 0; i < MD_SB_DISKS; i++) {
struct multipath_info *p = conf->multipaths + i;
if (!p->used_slot) {
if (added_desc->number != i)
break;
p->number = added_desc->number;
p->raid_disk = added_desc->raid_disk;
p->bdev = rdev->bdev;
p->operational = 1;
p->used_slot = 1;
conf->nr_disks++;
conf->working_disks++;
err = 0;
break;
}
if (!p->used_slot) {
p->number = rdev->desc_nr;
p->raid_disk = rdev->raid_disk;
p->bdev = rdev->bdev;
p->operational = 1;
p->used_slot = 1;
conf->nr_disks++;
conf->working_disks++;
err = 0;
}
if (err)
MD_BUG();
......@@ -451,10 +437,9 @@ static void multipathd (void *data)
static int multipath_run (mddev_t *mddev)
{
multipath_conf_t *conf;
int i, j, disk_idx;
int disk_idx;
struct multipath_info *disk;
mdp_super_t *sb = mddev->sb;
mdp_disk_t *desc;
mdk_rdev_t *rdev;
struct list_head *tmp;
int num_rdevs = 0;
......@@ -498,32 +483,24 @@ static int multipath_run (mddev_t *mddev)
continue;
}
desc = &sb->disks[rdev->desc_nr];
disk_idx = desc->raid_disk;
disk_idx = rdev->raid_disk;
disk = conf->multipaths + disk_idx;
if (!disk_sync(desc))
printk(NOT_IN_SYNC, bdev_partition_name(rdev->bdev));
/*
* Mark all disks as active to start with, there are no
* spares. multipath_read_balance deals with choose
* the "best" operational device.
*/
disk->number = desc->number;
disk->raid_disk = desc->raid_disk;
disk->number = rdev->desc_nr;
disk->raid_disk = disk_idx;
disk->bdev = rdev->bdev;
disk->operational = 1;
disk->used_slot = 1;
mark_disk_sync(desc);
mark_disk_active(desc);
num_rdevs++;
}
conf->raid_disks = sb->raid_disks = sb->active_disks = num_rdevs;
conf->nr_disks = sb->nr_disks = sb->working_disks = num_rdevs;
sb->failed_disks = 0;
sb->spare_disks = 0;
conf->raid_disks = sb->raid_disks = num_rdevs;
conf->nr_disks = num_rdevs;
mddev->sb_dirty = 1;
conf->mddev = mddev;
conf->device_lock = SPIN_LOCK_UNLOCKED;
......@@ -551,18 +528,6 @@ static int multipath_run (mddev_t *mddev)
}
}
/*
* Regenerate the "device is in sync with the raid set" bit for
* each device.
*/
for (i = 0; i < MD_SB_DISKS; i++) {
mark_disk_nonsync(sb->disks+i);
for (j = 0; j < sb->raid_disks; j++) {
if (sb->disks[i].number == conf->multipaths[j].number)
mark_disk_sync(sb->disks+i);
}
}
printk(ARRAY_IS_ACTIVE, mdidx(mddev), sb->active_disks,
sb->raid_disks, sb->spare_disks);
/*
......
......@@ -575,21 +575,13 @@ static void mark_disk_bad(mddev_t *mddev, int failed)
{
conf_t *conf = mddev_to_conf(mddev);
mirror_info_t *mirror = conf->mirrors+failed;
mdp_super_t *sb = mddev->sb;
mirror->operational = 0;
mark_disk_faulty(sb->disks+mirror->number);
mark_disk_nonsync(sb->disks+mirror->number);
mark_disk_inactive(sb->disks+mirror->number);
if (!mirror->write_only) {
sb->active_disks--;
mddev->degraded++;
conf->working_disks--;
}
sb->working_disks--;
sb->failed_disks++;
mddev->sb_dirty = 1;
if (!mirror->write_only)
conf->working_disks--;
printk(DISK_FAILED, bdev_partition_name(mirror->bdev), conf->working_disks);
}
......@@ -665,8 +657,6 @@ static int raid1_spare_active(mddev_t *mddev)
int i, failed_disk = -1, spare_disk = -1;
conf_t *conf = mddev->private;
mirror_info_t *tmp, *sdisk, *fdisk;
mdp_super_t *sb = mddev->sb;
mdp_disk_t *failed_desc, *spare_desc;
mdk_rdev_t *spare_rdev, *failed_rdev;
print_conf(conf);
......@@ -701,17 +691,6 @@ static int raid1_spare_active(mddev_t *mddev)
sdisk = conf->mirrors + spare_disk;
fdisk = conf->mirrors + failed_disk;
spare_desc = &sb->disks[sdisk->number];
failed_desc = &sb->disks[fdisk->number];
if (spare_desc->raid_disk != sdisk->raid_disk ||
sdisk->raid_disk != spare_disk || fdisk->raid_disk != failed_disk ||
failed_desc->raid_disk != fdisk->raid_disk) {
MD_BUG();
err = 1;
goto abort;
}
/*
* do the switch finally
*/
......@@ -722,15 +701,13 @@ static int raid1_spare_active(mddev_t *mddev)
* There must be a spare_rdev, but there may not be a
* failed_rdev. That slot might be empty...
*/
spare_rdev->desc_nr = failed_desc->number;
spare_rdev->desc_nr = failed_disk;
spare_rdev->raid_disk = failed_disk;
if (failed_rdev) {
failed_rdev->desc_nr = spare_desc->number;
failed_rdev->desc_nr = spare_disk;
failed_rdev->raid_disk = spare_disk;
}
spare_rdev->in_sync = 1;
xchg_values(*spare_desc, *failed_desc);
xchg_values(*fdisk, *sdisk);
/*
......@@ -740,9 +717,7 @@ static int raid1_spare_active(mddev_t *mddev)
* give the proper raid_disk number to the now activated
* disk. (this means we switch back these values)
*/
xchg_values(spare_desc->raid_disk, failed_desc->raid_disk);
xchg_values(sdisk->raid_disk, fdisk->raid_disk);
xchg_values(spare_desc->number, failed_desc->number);
xchg_values(sdisk->number, fdisk->number);
if (!sdisk->bdev)
......@@ -810,36 +785,26 @@ static int raid1_spare_write(mddev_t *mddev)
return err;
}
static int raid1_add_disk(mddev_t *mddev, mdp_disk_t *added_desc,
mdk_rdev_t *rdev)
static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
{
conf_t *conf = mddev->private;
int err = 1;
int i;
mirror_info_t *p = conf->mirrors + rdev->raid_disk;
print_conf(conf);
spin_lock_irq(&conf->device_lock);
/*
* find the disk ...
*/
for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
mirror_info_t *p = conf->mirrors + i;
if (!p->used_slot) {
if (added_desc->number != i)
break;
p->number = added_desc->number;
p->raid_disk = added_desc->raid_disk;
/* it will be held open by rdev */
p->bdev = rdev->bdev;
p->operational = 0;
p->write_only = 0;
p->spare = 1;
p->used_slot = 1;
p->head_position = 0;
conf->nr_disks++;
err = 0;
break;
}
if (!p->used_slot) {
p->number = rdev->desc_nr;
p->raid_disk = rdev->raid_disk;
/* it will be held open by rdev */
p->bdev = rdev->bdev;
p->operational = 0;
p->write_only = 0;
p->spare = 1;
p->used_slot = 1;
p->head_position = 0;
conf->nr_disks++;
err = 0;
}
if (err)
MD_BUG();
......@@ -1222,7 +1187,6 @@ static int run(mddev_t *mddev)
int i, j, disk_idx;
mirror_info_t *disk;
mdp_super_t *sb = mddev->sb;
mdp_disk_t *descriptor;
mdk_rdev_t *rdev;
struct list_head *tmp;
......@@ -1267,12 +1231,11 @@ static int run(mddev_t *mddev)
MD_BUG();
continue;
}
descriptor = &sb->disks[rdev->desc_nr];
disk_idx = descriptor->raid_disk;
disk_idx = rdev->raid_disk;
disk = conf->mirrors + disk_idx;
if (disk_faulty(descriptor)) {
disk->number = descriptor->number;
if (rdev->faulty) {
disk->number = rdev->desc_nr;
disk->raid_disk = disk_idx;
disk->bdev = rdev->bdev;
disk->operational = 0;
......@@ -1282,19 +1245,7 @@ static int run(mddev_t *mddev)
disk->head_position = 0;
continue;
}
if (disk_active(descriptor)) {
if (!disk_sync(descriptor)) {
printk(NOT_IN_SYNC,
bdev_partition_name(rdev->bdev));
continue;
}
if ((descriptor->number > MD_SB_DISKS) ||
(disk_idx > sb->raid_disks)) {
printk(INCONSISTENT,
bdev_partition_name(rdev->bdev));
continue;
}
if (rdev->in_sync) {
if (disk->operational) {
printk(ALREADY_RUNNING,
bdev_partition_name(rdev->bdev),
......@@ -1303,7 +1254,7 @@ static int run(mddev_t *mddev)
}
printk(OPERATIONAL, bdev_partition_name(rdev->bdev),
disk_idx);
disk->number = descriptor->number;
disk->number = rdev->desc_nr;
disk->raid_disk = disk_idx;
disk->bdev = rdev->bdev;
disk->operational = 1;
......@@ -1317,7 +1268,7 @@ static int run(mddev_t *mddev)
* Must be a spare disk ..
*/
printk(SPARE, bdev_partition_name(rdev->bdev));
disk->number = descriptor->number;
disk->number = rdev->desc_nr;
disk->raid_disk = disk_idx;
disk->bdev = rdev->bdev;
disk->operational = 0;
......@@ -1342,16 +1293,13 @@ static int run(mddev_t *mddev)
}
mddev->degraded = 0;
for (i = 0; i < MD_SB_DISKS; i++) {
for (i = 0; i < conf->raid_disks; i++) {
descriptor = sb->disks+i;
disk_idx = descriptor->raid_disk;
disk = conf->mirrors + disk_idx;
disk = conf->mirrors + i;
if (disk_faulty(descriptor) && (disk_idx < conf->raid_disks) &&
!disk->used_slot) {
disk->number = descriptor->number;
disk->raid_disk = disk_idx;
if (!disk->used_slot) {
disk->number = i;
disk->raid_disk = i;
disk->bdev = NULL;
disk->operational = 0;
disk->write_only = 0;
......@@ -1359,7 +1307,7 @@ static int run(mddev_t *mddev)
disk->used_slot = 1;
disk->head_position = 0;
}
if (!disk->used_slot && disk_idk < conf->raid_disks)
if (!disk->used_slot)
mddev->degraded++;
}
......@@ -1383,23 +1331,7 @@ static int run(mddev_t *mddev)
}
}
/*
* Regenerate the "device is in sync with the raid set" bit for
* each device.
*/
for (i = 0; i < MD_SB_DISKS; i++) {
mark_disk_nonsync(sb->disks+i);
for (j = 0; j < sb->raid_disks; j++) {
if (!conf->mirrors[j].operational)
continue;
if (sb->disks[i].number == conf->mirrors[j].number)
mark_disk_sync(sb->disks+i);
}
}
sb->active_disks = conf->working_disks;
printk(ARRAY_IS_ACTIVE, mdidx(mddev), sb->active_disks, sb->raid_disks);
printk(ARRAY_IS_ACTIVE, mdidx(mddev), sb->raid_disks - mddev->degraded, sb->raid_disks);
/*
* Ok, everything is just fine now
*/
......
......@@ -442,7 +442,6 @@ static void raid5_build_block (struct stripe_head *sh, int i)
static int error(mddev_t *mddev, struct block_device *bdev)
{
raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
mdp_super_t *sb = mddev->sb;
struct disk_info *disk;
int i;
......@@ -453,12 +452,6 @@ static int error(mddev_t *mddev, struct block_device *bdev)
continue;
if (disk->operational) {
disk->operational = 0;
mark_disk_faulty(sb->disks+disk->number);
mark_disk_nonsync(sb->disks+disk->number);
mark_disk_inactive(sb->disks+disk->number);
sb->active_disks--;
sb->working_disks--;
sb->failed_disks++;
mddev->sb_dirty = 1;
mddev->degraded++;
conf->working_disks--;
......@@ -486,12 +479,6 @@ static int error(mddev_t *mddev, struct block_device *bdev)
disk->operational = 0;
disk->write_only = 0;
conf->spare = NULL;
mark_disk_faulty(sb->disks+disk->number);
mark_disk_nonsync(sb->disks+disk->number);
mark_disk_inactive(sb->disks+disk->number);
sb->spare_disks--;
sb->working_disks--;
sb->failed_disks++;
mddev->sb_dirty = 1;
......@@ -1376,9 +1363,8 @@ static void raid5d (void *data)
static int run (mddev_t *mddev)
{
raid5_conf_t *conf;
int i, j, raid_disk, memory;
int i, raid_disk, memory;
mdp_super_t *sb = mddev->sb;
mdp_disk_t *desc;
mdk_rdev_t *rdev;
struct disk_info *disk;
struct list_head *tmp;
......@@ -1419,17 +1405,12 @@ static int run (mddev_t *mddev)
* the disk only to get a pointer to the descriptor on
* the main superblock, which might be more recent.
*/
desc = sb->disks + rdev->desc_nr;
raid_disk = desc->raid_disk;
raid_disk = rdev->raid_disk;
disk = conf->disks + raid_disk;
if (disk_faulty(desc)) {
if (rdev->faulty) {
printk(KERN_ERR "raid5: disabled device %s (errors detected)\n", bdev_partition_name(rdev->bdev));
if (!rdev->faulty) {
MD_BUG();
goto abort;
}
disk->number = desc->number;
disk->number = rdev->desc_nr;
disk->raid_disk = raid_disk;
disk->bdev = rdev->bdev;
......@@ -1439,23 +1420,14 @@ static int run (mddev_t *mddev)
disk->used_slot = 1;
continue;
}
if (disk_active(desc)) {
if (!disk_sync(desc)) {
printk(KERN_ERR "raid5: disabled device %s (not in sync)\n", bdev_partition_name(rdev->bdev));
MD_BUG();
goto abort;
}
if (raid_disk > sb->raid_disks) {
printk(KERN_ERR "raid5: disabled device %s (inconsistent descriptor)\n", bdev_partition_name(rdev->bdev));
continue;
}
if (rdev->in_sync) {
if (disk->operational) {
printk(KERN_ERR "raid5: disabled device %s (device %d already operational)\n", bdev_partition_name(rdev->bdev), raid_disk);
continue;
}
printk(KERN_INFO "raid5: device %s operational as raid disk %d\n", bdev_partition_name(rdev->bdev), raid_disk);
disk->number = desc->number;
disk->number = rdev->desc_nr;
disk->raid_disk = raid_disk;
disk->bdev = rdev->bdev;
disk->operational = 1;
......@@ -1467,7 +1439,7 @@ static int run (mddev_t *mddev)
* Must be a spare disk ..
*/
printk(KERN_INFO "raid5: spare disk %s\n", bdev_partition_name(rdev->bdev));
disk->number = desc->number;
disk->number = rdev->desc_nr;
disk->raid_disk = raid_disk;
disk->bdev = rdev->bdev;
......@@ -1478,16 +1450,13 @@ static int run (mddev_t *mddev)
}
}
for (i = 0; i < MD_SB_DISKS; i++) {
desc = sb->disks + i;
raid_disk = desc->raid_disk;
disk = conf->disks + raid_disk;
for (i = 0; i < sb->raid_disks; i++) {
disk = conf->disks + i;
if (disk_faulty(desc) && (raid_disk < sb->raid_disks) &&
!conf->disks[raid_disk].used_slot) {
if (!disk->used_slot) {
disk->number = desc->number;
disk->raid_disk = raid_disk;
disk->number = i;
disk->raid_disk = i;
disk->bdev = NULL;
disk->operational = 0;
......@@ -1555,22 +1524,7 @@ static int run (mddev_t *mddev)
} else
printk(KERN_INFO "raid5: allocated %dkB for md%d\n", memory, mdidx(mddev));
/*
* Regenerate the "device is in sync with the raid set" bit for
* each device.
*/
for (i = 0; i < MD_SB_DISKS ; i++) {
mark_disk_nonsync(sb->disks + i);
for (j = 0; j < sb->raid_disks; j++) {
if (!conf->disks[j].operational)
continue;
if (sb->disks[i].number == conf->disks[j].number)
mark_disk_sync(sb->disks + i);
}
}
sb->active_disks = conf->working_disks;
if (sb->active_disks == sb->raid_disks)
if (conf->working_disks == conf->raid_disks)
printk("raid5: raid level %d set md%d active with %d out of %d devices, algorithm %d\n", conf->level, mdidx(mddev), sb->active_disks, sb->raid_disks, conf->algorithm);
else
printk(KERN_ALERT "raid5: raid level %d set md%d active with %d out of %d devices, algorithm %d\n", conf->level, mdidx(mddev), sb->active_disks, sb->raid_disks, conf->algorithm);
......@@ -1693,8 +1647,6 @@ static int raid5_spare_active(mddev_t *mddev)
int i, failed_disk=-1, spare_disk=-1;
raid5_conf_t *conf = mddev->private;
struct disk_info *tmp, *sdisk, *fdisk;
mdp_super_t *sb = mddev->sb;
mdp_disk_t *failed_desc, *spare_desc;
mdk_rdev_t *spare_rdev, *failed_rdev;
print_raid5_conf(conf);
......@@ -1726,17 +1678,6 @@ static int raid5_spare_active(mddev_t *mddev)
sdisk = conf->disks + spare_disk;
fdisk = conf->disks + failed_disk;
spare_desc = &sb->disks[sdisk->number];
failed_desc = &sb->disks[fdisk->number];
if ( spare_desc->raid_disk != sdisk->raid_disk ||
sdisk->raid_disk != spare_disk || fdisk->raid_disk != failed_disk ||
failed_desc->raid_disk != fdisk->raid_disk) {
MD_BUG();
err = 1;
goto abort;
}
/*
* do the switch finally
*/
......@@ -1746,15 +1687,13 @@ static int raid5_spare_active(mddev_t *mddev)
/* There must be a spare_rdev, but there may not be a
* failed_rdev. That slot might be empty...
*/
spare_rdev->desc_nr = failed_desc->number;
spare_rdev->desc_nr = failed_disk;
spare_rdev->raid_disk = failed_disk;
if (failed_rdev) {
failed_rdev->desc_nr = spare_desc->number;
failed_rdev->desc_nr = spare_disk;
failed_rdev->raid_disk = spare_disk;
}
spare_rdev->in_sync = 1;
xchg_values(*spare_desc, *failed_desc);
xchg_values(*fdisk, *sdisk);
/*
......@@ -1765,9 +1704,7 @@ static int raid5_spare_active(mddev_t *mddev)
* disk. (this means we switch back these values)
*/
xchg_values(spare_desc->raid_disk, failed_desc->raid_disk);
xchg_values(sdisk->raid_disk, fdisk->raid_disk);
xchg_values(spare_desc->number, failed_desc->number);
xchg_values(sdisk->number, fdisk->number);
if (!sdisk->bdev)
......@@ -1865,12 +1802,11 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
return err;
}
static int raid5_add_disk(mddev_t *mddev, mdp_disk_t *added_desc,
mdk_rdev_t *rdev)
static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
{
raid5_conf_t *conf = mddev->private;
int err = 1;
int i;
struct disk_info *p = conf->disks + rdev->raid_disk;
print_raid5_conf(conf);
spin_lock_irq(&conf->device_lock);
......@@ -1878,22 +1814,16 @@ static int raid5_add_disk(mddev_t *mddev, mdp_disk_t *added_desc,
* find the disk ...
*/
for (i = conf->raid_disks; i < MD_SB_DISKS; i++) {
struct disk_info *p = conf->disks + i;
if (!p->used_slot) {
if (added_desc->number != i)
break;
p->number = added_desc->number;
p->raid_disk = added_desc->raid_disk;
/* it will be held open by rdev */
p->bdev = rdev->bdev;
p->operational = 0;
p->write_only = 0;
p->spare = 1;
p->used_slot = 1;
err = 0;
break;
}
if (!p->used_slot) {
p->number = rdev->desc_nr;
p->raid_disk = rdev->raid_disk;
/* it will be held open by rdev */
p->bdev = rdev->bdev;
p->operational = 0;
p->write_only = 0;
p->spare = 1;
p->used_slot = 1;
err = 0;
}
if (err)
MD_BUG();
......
......@@ -190,7 +190,7 @@ struct mddev_s
int in_sync; /* know to not need resync */
struct semaphore reconfig_sem;
atomic_t active;
mdp_disk_t *spare;
mdk_rdev_t *spare;
int degraded; /* whether md should consider
* adding a spare
......@@ -212,7 +212,7 @@ struct mdk_personality_s
int (*stop)(mddev_t *mddev);
int (*status)(char *page, mddev_t *mddev);
int (*error_handler)(mddev_t *mddev, struct block_device *bdev);
int (*hot_add_disk) (mddev_t *mddev, mdp_disk_t *descriptor, mdk_rdev_t *rdev);
int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev);
int (*hot_remove_disk) (mddev_t *mddev, int number);
int (*spare_write) (mddev_t *mddev);
int (*spare_inactive) (mddev_t *mddev);
......@@ -238,7 +238,7 @@ static inline kdev_t mddev_to_kdev(mddev_t * mddev)
extern mdk_rdev_t * find_rdev(mddev_t * mddev, kdev_t dev);
extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr);
extern mdp_disk_t *get_spare(mddev_t *mddev);
extern mdk_rdev_t *get_spare(mddev_t *mddev);
/*
* iterates through some rdev ringlist. It's safe to remove the
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment