Commit 09b63e46 authored by Neil Brown's avatar Neil Brown Committed by Linus Torvalds

[PATCH] md: Remove per-personality 'operational' and 'write_only' flags

raid1, raid5 and multipath maintain their own
'operational' flag.  This is equivalent to
   !rdev->faulty
and so isn't needed.
Similarly raid1 and raid1 maintain a "write_only" flag
that is equivalnt to
   !rdev->in_sync
so it isn't needed either.

As part of implementing this change, we introduce some extra
flag bit in raid5 that are meaningful only inside 'handle_stripe'.
Some of these replace the "action" array which recorded what
actions were required (and would be performed after the stripe
spinlock was released).  This has the advantage of reducing our
dependance on MD_SB_DISKS which personalities shouldn't need
to know about.
parent 0ce3712f
...@@ -365,9 +365,6 @@ static void free_disk_sb(mdk_rdev_t * rdev) ...@@ -365,9 +365,6 @@ static void free_disk_sb(mdk_rdev_t * rdev)
rdev->sb_page = NULL; rdev->sb_page = NULL;
rdev->sb_offset = 0; rdev->sb_offset = 0;
rdev->size = 0; rdev->size = 0;
} else {
if (!rdev->faulty)
MD_BUG();
} }
} }
...@@ -586,7 +583,6 @@ static void export_rdev(mdk_rdev_t * rdev) ...@@ -586,7 +583,6 @@ static void export_rdev(mdk_rdev_t * rdev)
md_autodetect_dev(rdev->bdev->bd_dev); md_autodetect_dev(rdev->bdev->bd_dev);
#endif #endif
unlock_rdev(rdev); unlock_rdev(rdev);
rdev->faulty = 0;
kfree(rdev); kfree(rdev);
} }
...@@ -671,9 +667,9 @@ static void print_sb(mdp_super_t *sb) ...@@ -671,9 +667,9 @@ static void print_sb(mdp_super_t *sb)
static void print_rdev(mdk_rdev_t *rdev) static void print_rdev(mdk_rdev_t *rdev)
{ {
printk(KERN_INFO "md: rdev %s, SZ:%08ld F:%d DN:%d ", printk(KERN_INFO "md: rdev %s, SZ:%08ld F:%d S:%d DN:%d ",
bdev_partition_name(rdev->bdev), bdev_partition_name(rdev->bdev),
rdev->size, rdev->faulty, rdev->desc_nr); rdev->size, rdev->faulty, rdev->in_sync, rdev->desc_nr);
if (rdev->sb) { if (rdev->sb) {
printk(KERN_INFO "md: rdev superblock:\n"); printk(KERN_INFO "md: rdev superblock:\n");
print_sb(rdev->sb); print_sb(rdev->sb);
...@@ -1006,6 +1002,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int on_disk) ...@@ -1006,6 +1002,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int on_disk)
} }
rdev->desc_nr = -1; rdev->desc_nr = -1;
rdev->faulty = 0; rdev->faulty = 0;
rdev->in_sync = 0;
atomic_set(&rdev->nr_pending, 0); atomic_set(&rdev->nr_pending, 0);
size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
...@@ -2182,14 +2179,13 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) ...@@ -2182,14 +2179,13 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
static int set_disk_faulty(mddev_t *mddev, dev_t dev) static int set_disk_faulty(mddev_t *mddev, dev_t dev)
{ {
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
int ret;
rdev = find_rdev(mddev, dev); rdev = find_rdev(mddev, dev);
if (!rdev) if (!rdev)
return 0; return 0;
ret = md_error(mddev, rdev); md_error(mddev, rdev);
return ret; return 1;
} }
static int md_ioctl(struct inode *inode, struct file *file, static int md_ioctl(struct inode *inode, struct file *file,
...@@ -2604,9 +2600,8 @@ static void md_recover_arrays(void) ...@@ -2604,9 +2600,8 @@ static void md_recover_arrays(void)
} }
int md_error(mddev_t *mddev, mdk_rdev_t *rdev) void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
{ {
dprintk("md_error dev:(%d:%d), rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", dprintk("md_error dev:(%d:%d), rdev:(%d:%d), (caller: %p,%p,%p,%p).\n",
MD_MAJOR,mdidx(mddev),MAJOR(bdev->bd_dev),MINOR(bdev->bd_dev), MD_MAJOR,mdidx(mddev),MAJOR(bdev->bd_dev),MINOR(bdev->bd_dev),
__builtin_return_address(0),__builtin_return_address(1), __builtin_return_address(0),__builtin_return_address(1),
...@@ -2614,25 +2609,15 @@ int md_error(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -2614,25 +2609,15 @@ int md_error(mddev_t *mddev, mdk_rdev_t *rdev)
if (!mddev) { if (!mddev) {
MD_BUG(); MD_BUG();
return 0; return;
} }
if (!rdev || rdev->faulty) if (!rdev || rdev->faulty)
return 0; return;
if (!mddev->pers->error_handler if (!mddev->pers->error_handler)
|| mddev->pers->error_handler(mddev,rdev) <= 0) { return;
rdev->faulty = 1; mddev->pers->error_handler(mddev,rdev);
rdev->in_sync = 0;
} else
return 1;
/*
* if recovery was running, stop it now.
*/
if (mddev->recovery_running)
mddev->recovery_running = -EIO;
md_recover_arrays(); md_recover_arrays();
return 0;
} }
static int status_unused(char * page) static int status_unused(char * page)
...@@ -3510,7 +3495,7 @@ static int __init raid_setup(char *str) ...@@ -3510,7 +3495,7 @@ static int __init raid_setup(char *str)
return 1; return 1;
} }
int __init md_run_setup(void) static int __init md_run_setup(void)
{ {
if (raid_setup_args.noautodetect) if (raid_setup_args.noautodetect)
printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=noautodetect)\n"); printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=noautodetect)\n");
......
...@@ -70,7 +70,7 @@ static void mp_pool_free(void *mpb, void *data) ...@@ -70,7 +70,7 @@ static void mp_pool_free(void *mpb, void *data)
kfree(mpb); kfree(mpb);
} }
static int multipath_map (mddev_t *mddev, mdk_rdev_t **rdev) static int multipath_map (mddev_t *mddev, mdk_rdev_t **rdevp)
{ {
multipath_conf_t *conf = mddev_to_conf(mddev); multipath_conf_t *conf = mddev_to_conf(mddev);
int i, disks = MD_SB_DISKS; int i, disks = MD_SB_DISKS;
...@@ -82,10 +82,10 @@ static int multipath_map (mddev_t *mddev, mdk_rdev_t **rdev) ...@@ -82,10 +82,10 @@ static int multipath_map (mddev_t *mddev, mdk_rdev_t **rdev)
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
for (i = 0; i < disks; i++) { for (i = 0; i < disks; i++) {
if (conf->multipaths[i].operational && mdk_rdev_t *rdev = conf->multipaths[i].rdev;
conf->multipaths[i].rdev) { if (rdev && rdev->in_sync) {
*rdev = conf->multipaths[i].rdev; *rdevp = rdev;
atomic_inc(&(*rdev)->nr_pending); atomic_inc(&rdev->nr_pending);
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
return 0; return 0;
} }
...@@ -158,10 +158,11 @@ static int multipath_read_balance (multipath_conf_t *conf) ...@@ -158,10 +158,11 @@ static int multipath_read_balance (multipath_conf_t *conf)
{ {
int disk; int disk;
for (disk = 0; disk < MD_SB_DISKS; disk++) for (disk = 0; disk < MD_SB_DISKS; disk++) {
if (conf->multipaths[disk].operational && mdk_rdev_t *rdev = conf->multipaths[disk].rdev;
conf->multipaths[disk].rdev) if (rdev && rdev->in_sync)
return disk; return disk;
}
BUG(); BUG();
return 0; return 0;
} }
...@@ -204,7 +205,8 @@ static int multipath_status (char *page, mddev_t *mddev) ...@@ -204,7 +205,8 @@ static int multipath_status (char *page, mddev_t *mddev)
conf->working_disks); conf->working_disks);
for (i = 0; i < conf->raid_disks; i++) for (i = 0; i < conf->raid_disks; i++)
sz += sprintf (page+sz, "%s", sz += sprintf (page+sz, "%s",
conf->multipaths[i].operational ? "U" : "_"); conf->multipaths[i].rdev &&
conf->multipaths[i].rdev->in_sync ? "U" : "_");
sz += sprintf (page+sz, "]"); sz += sprintf (page+sz, "]");
return sz; return sz;
} }
...@@ -219,28 +221,13 @@ static int multipath_status (char *page, mddev_t *mddev) ...@@ -219,28 +221,13 @@ static int multipath_status (char *page, mddev_t *mddev)
"multipath: IO failure on %s, disabling IO path. \n" \ "multipath: IO failure on %s, disabling IO path. \n" \
" Operation continuing on %d IO paths.\n" " Operation continuing on %d IO paths.\n"
static void mark_disk_bad (mddev_t *mddev, int failed)
{
multipath_conf_t *conf = mddev_to_conf(mddev);
struct multipath_info *multipath = conf->multipaths+failed;
multipath->operational = 0;
mddev->sb_dirty = 1;
conf->working_disks--;
printk (DISK_FAILED, bdev_partition_name (multipath->rdev->bdev),
conf->working_disks);
}
/* /*
* Careful, this can execute in IRQ contexts as well! * Careful, this can execute in IRQ contexts as well!
*/ */
static int multipath_error (mddev_t *mddev, mdk_rdev_t *rdev) static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev)
{ {
multipath_conf_t *conf = mddev_to_conf(mddev); multipath_conf_t *conf = mddev_to_conf(mddev);
struct multipath_info * multipaths = conf->multipaths;
int disks = MD_SB_DISKS;
int i;
if (conf->working_disks <= 1) { if (conf->working_disks <= 1) {
/* /*
...@@ -248,24 +235,21 @@ static int multipath_error (mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -248,24 +235,21 @@ static int multipath_error (mddev_t *mddev, mdk_rdev_t *rdev)
* first check if this is a queued request for a device * first check if this is a queued request for a device
* which has just failed. * which has just failed.
*/ */
for (i = 0; i < disks; i++) {
if (multipaths[i].rdev == rdev && !multipaths[i].operational)
return 0;
}
printk (LAST_DISK); printk (LAST_DISK);
return 1; /* leave it active... it's all we have */ /* leave it active... it's all we have */
} else { } else {
/* /*
* Mark disk as unusable * Mark disk as unusable
*/ */
for (i = 0; i < disks; i++) { if (!rdev->faulty) {
if (multipaths[i].rdev == rdev && multipaths[i].operational) { rdev->in_sync = 0;
mark_disk_bad(mddev, i); rdev->faulty = 1;
break; mddev->sb_dirty = 1;
} conf->working_disks--;
printk (DISK_FAILED, bdev_partition_name (rdev->bdev),
conf->working_disks);
} }
} }
return 0;
} }
#undef LAST_DISK #undef LAST_DISK
...@@ -290,7 +274,7 @@ static void print_multipath_conf (multipath_conf_t *conf) ...@@ -290,7 +274,7 @@ static void print_multipath_conf (multipath_conf_t *conf)
tmp = conf->multipaths + i; tmp = conf->multipaths + i;
if (tmp->rdev) if (tmp->rdev)
printk(" disk%d, o:%d, dev:%s\n", printk(" disk%d, o:%d, dev:%s\n",
i,tmp->operational, i,!tmp->rdev->faulty,
bdev_partition_name(tmp->rdev->bdev)); bdev_partition_name(tmp->rdev->bdev));
} }
} }
...@@ -308,7 +292,6 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -308,7 +292,6 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
for (path=0; path<mddev->raid_disks; path++) for (path=0; path<mddev->raid_disks; path++)
if ((p=conf->multipaths+path)->rdev == NULL) { if ((p=conf->multipaths+path)->rdev == NULL) {
p->rdev = rdev; p->rdev = rdev;
p->operational = 1;
conf->working_disks++; conf->working_disks++;
rdev->raid_disk = path; rdev->raid_disk = path;
found = 1; found = 1;
...@@ -329,8 +312,8 @@ static int multipath_remove_disk(mddev_t *mddev, int number) ...@@ -329,8 +312,8 @@ static int multipath_remove_disk(mddev_t *mddev, int number)
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
if (p->rdev) { if (p->rdev) {
if (p->operational || if (p->rdev->in_sync ||
(p->rdev && atomic_read(&p->rdev->nr_pending))) { atomic_read(&p->rdev->nr_pending)) {
printk(KERN_ERR "hot-remove-disk, slot %d is identified but is still operational!\n", number); printk(KERN_ERR "hot-remove-disk, slot %d is identified but is still operational!\n", number);
err = -EBUSY; err = -EBUSY;
goto abort; goto abort;
...@@ -474,18 +457,8 @@ static int multipath_run (mddev_t *mddev) ...@@ -474,18 +457,8 @@ static int multipath_run (mddev_t *mddev)
disk = conf->multipaths + disk_idx; disk = conf->multipaths + disk_idx;
disk->rdev = rdev; disk->rdev = rdev;
if (rdev->faulty) if (!rdev->faulty)
disk->operational = 0;
else {
/*
* Mark all disks as active to start with, there are no
* spares. multipath_read_balance deals with choose
* the "best" operational device.
*/
disk->operational = 1;
conf->working_disks++; conf->working_disks++;
}
} }
conf->raid_disks = mddev->raid_disks; conf->raid_disks = mddev->raid_disks;
......
...@@ -188,7 +188,7 @@ static inline void put_buf(r1bio_t *r1_bio) ...@@ -188,7 +188,7 @@ static inline void put_buf(r1bio_t *r1_bio)
mempool_free(r1_bio, conf->r1buf_pool); mempool_free(r1_bio, conf->r1buf_pool);
} }
static int map(mddev_t *mddev, mdk_rdev_t **rdev) static int map(mddev_t *mddev, mdk_rdev_t **rdevp)
{ {
conf_t *conf = mddev_to_conf(mddev); conf_t *conf = mddev_to_conf(mddev);
int i, disks = conf->raid_disks; int i, disks = conf->raid_disks;
...@@ -200,11 +200,10 @@ static int map(mddev_t *mddev, mdk_rdev_t **rdev) ...@@ -200,11 +200,10 @@ static int map(mddev_t *mddev, mdk_rdev_t **rdev)
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
for (i = 0; i < disks; i++) { for (i = 0; i < disks; i++) {
if (conf->mirrors[i].operational mdk_rdev_t *rdev = conf->mirrors[i].rdev;
&& !conf->mirrors[i].write_only if (rdev && rdev->in_sync) {
&& conf->mirrors[i].rdev) { *rdevp = rdev;
*rdev = conf->mirrors[i].rdev; atomic_inc(&rdev->nr_pending);
atomic_inc(&(*rdev)->nr_pending);
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
return 0; return 0;
} }
...@@ -346,7 +345,9 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio) ...@@ -346,7 +345,9 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio)
if (!conf->mddev->in_sync && (this_sector + sectors >= conf->next_resync)) { if (!conf->mddev->in_sync && (this_sector + sectors >= conf->next_resync)) {
/* make sure that disk is operational */ /* make sure that disk is operational */
new_disk = 0; new_disk = 0;
while (!conf->mirrors[new_disk].operational || conf->mirrors[new_disk].write_only) {
while (!conf->mirrors[new_disk].rdev ||
!conf->mirrors[new_disk].rdev->in_sync) {
new_disk++; new_disk++;
if (new_disk == conf->raid_disks) { if (new_disk == conf->raid_disks) {
new_disk = 0; new_disk = 0;
...@@ -358,7 +359,8 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio) ...@@ -358,7 +359,8 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio)
/* make sure the disk is operational */ /* make sure the disk is operational */
while (!conf->mirrors[new_disk].operational || conf->mirrors[new_disk].write_only) { while (!conf->mirrors[new_disk].rdev ||
!conf->mirrors[new_disk].rdev->in_sync) {
if (new_disk <= 0) if (new_disk <= 0)
new_disk = conf->raid_disks; new_disk = conf->raid_disks;
new_disk--; new_disk--;
...@@ -387,8 +389,8 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio) ...@@ -387,8 +389,8 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio)
disk = conf->raid_disks; disk = conf->raid_disks;
disk--; disk--;
if (conf->mirrors[disk].write_only || if (!conf->mirrors[disk].rdev ||
!conf->mirrors[disk].operational) !conf->mirrors[disk].rdev->in_sync)
continue; continue;
if (!atomic_read(&conf->mirrors[disk].rdev->nr_pending)) { if (!atomic_read(&conf->mirrors[disk].rdev->nr_pending)) {
...@@ -509,8 +511,8 @@ static int make_request(request_queue_t *q, struct bio * bio) ...@@ -509,8 +511,8 @@ static int make_request(request_queue_t *q, struct bio * bio)
*/ */
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
for (i = 0; i < disks; i++) { for (i = 0; i < disks; i++) {
if (conf->mirrors[i].operational && if (conf->mirrors[i].rdev &&
conf->mirrors[i].rdev) { !conf->mirrors[i].rdev->faulty) {
atomic_inc(&conf->mirrors[i].rdev->nr_pending); atomic_inc(&conf->mirrors[i].rdev->nr_pending);
r1_bio->write_bios[i] = bio; r1_bio->write_bios[i] = bio;
} else } else
...@@ -573,7 +575,8 @@ static int status(char *page, mddev_t *mddev) ...@@ -573,7 +575,8 @@ static int status(char *page, mddev_t *mddev)
conf->working_disks); conf->working_disks);
for (i = 0; i < conf->raid_disks; i++) for (i = 0; i < conf->raid_disks; i++)
sz += sprintf(page+sz, "%s", sz += sprintf(page+sz, "%s",
conf->mirrors[i].operational ? "U" : "_"); conf->mirrors[i].rdev &&
conf->mirrors[i].rdev->in_sync ? "U" : "_");
sz += sprintf (page+sz, "]"); sz += sprintf (page+sz, "]");
return sz; return sz;
} }
...@@ -594,49 +597,37 @@ static int status(char *page, mddev_t *mddev) ...@@ -594,49 +597,37 @@ static int status(char *page, mddev_t *mddev)
#define ALREADY_SYNCING KERN_INFO \ #define ALREADY_SYNCING KERN_INFO \
"raid1: syncing already in progress.\n" "raid1: syncing already in progress.\n"
static void mark_disk_bad(mddev_t *mddev, int failed)
{
conf_t *conf = mddev_to_conf(mddev);
mirror_info_t *mirror = conf->mirrors+failed;
mirror->operational = 0;
if (!mirror->write_only) {
mddev->degraded++;
conf->working_disks--;
}
mddev->sb_dirty = 1;
printk(DISK_FAILED, bdev_partition_name(mirror->rdev->bdev), conf->working_disks);
}
static int error(mddev_t *mddev, mdk_rdev_t *rdev) static void error(mddev_t *mddev, mdk_rdev_t *rdev)
{ {
conf_t *conf = mddev_to_conf(mddev); conf_t *conf = mddev_to_conf(mddev);
mirror_info_t * mirrors = conf->mirrors;
int disks = conf->raid_disks;
int i;
/* /*
* Find the drive.
* If it is not operational, then we have already marked it as dead * If it is not operational, then we have already marked it as dead
* else if it is the last working disks, ignore the error, let the * else if it is the last working disks, ignore the error, let the
* next level up know. * next level up know.
* else mark the drive as failed * else mark the drive as failed
*/ */
for (i = 0; i < disks; i++) if (rdev->in_sync
if (mirrors[i].operational && mirrors[i].rdev == rdev)
break;
if (i == disks)
return 0;
if (mirrors[i].operational && !mirrors[i].write_only
&& conf->working_disks == 1) && conf->working_disks == 1)
/* /*
* Don't fail the drive, act as though we were just a * Don't fail the drive, act as though we were just a
* normal single drive * normal single drive
*/ */
return 1; return;
mark_disk_bad(mddev, i); if (rdev->in_sync) {
return 0; mddev->degraded++;
conf->working_disks--;
/*
* if recovery was running, stop it now.
*/
if (mddev->recovery_running)
mddev->recovery_running = -EIO;
}
rdev->in_sync = 0;
rdev->faulty = 1;
mddev->sb_dirty = 1;
printk(DISK_FAILED, bdev_partition_name(rdev->bdev), conf->working_disks);
} }
static void print_conf(conf_t *conf) static void print_conf(conf_t *conf)
...@@ -656,7 +647,7 @@ static void print_conf(conf_t *conf) ...@@ -656,7 +647,7 @@ static void print_conf(conf_t *conf)
tmp = conf->mirrors + i; tmp = conf->mirrors + i;
if (tmp->rdev) if (tmp->rdev)
printk(" disk %d, wo:%d, o:%d, dev:%s\n", printk(" disk %d, wo:%d, o:%d, dev:%s\n",
i, tmp->write_only, tmp->operational, i, !tmp->rdev->in_sync, !tmp->rdev->faulty,
bdev_partition_name(tmp->rdev->bdev)); bdev_partition_name(tmp->rdev->bdev));
} }
} }
...@@ -688,12 +679,11 @@ static int raid1_spare_active(mddev_t *mddev) ...@@ -688,12 +679,11 @@ static int raid1_spare_active(mddev_t *mddev)
*/ */
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
tmp = conf->mirrors + i; tmp = conf->mirrors + i;
if (tmp->operational && tmp->rdev if (tmp->rdev
&& !tmp->rdev->faulty && !tmp->rdev->faulty
&& tmp->write_only) { && !tmp->rdev->in_sync) {
conf->working_disks++; conf->working_disks++;
mddev->degraded--; mddev->degraded--;
tmp->write_only = 0;
tmp->rdev->in_sync = 1; tmp->rdev->in_sync = 1;
} }
} }
...@@ -715,8 +705,6 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -715,8 +705,6 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
for (mirror=0; mirror < mddev->raid_disks; mirror++) for (mirror=0; mirror < mddev->raid_disks; mirror++)
if ( !(p=conf->mirrors+mirror)->rdev) { if ( !(p=conf->mirrors+mirror)->rdev) {
p->rdev = rdev; p->rdev = rdev;
p->write_only = 1;
p->operational = 1;
p->head_position = 0; p->head_position = 0;
rdev->raid_disk = mirror; rdev->raid_disk = mirror;
found = 1; found = 1;
...@@ -737,8 +725,8 @@ static int raid1_remove_disk(mddev_t *mddev, int number) ...@@ -737,8 +725,8 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
print_conf(conf); print_conf(conf);
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
if (p->rdev) { if (p->rdev) {
if (p->operational || if (p->rdev->in_sync ||
(p->rdev && atomic_read(&p->rdev->nr_pending))) { atomic_read(&p->rdev->nr_pending)) {
err = -EBUSY; err = -EBUSY;
goto abort; goto abort;
} }
...@@ -837,20 +825,19 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) ...@@ -837,20 +825,19 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
for (i = 0; i < disks ; i++) { for (i = 0; i < disks ; i++) {
r1_bio->write_bios[i] = NULL; r1_bio->write_bios[i] = NULL;
if (!conf->mirrors[i].operational) if (!conf->mirrors[i].rdev ||
conf->mirrors[i].rdev->faulty)
continue; continue;
if (i == conf->last_used) if (i == conf->last_used)
/* /*
* we read from here, no need to write * we read from here, no need to write
*/ */
continue; continue;
if (!conf->mirrors[i].write_only && mddev->in_sync) if (conf->mirrors[i].rdev->in_sync && mddev->in_sync)
/* /*
* don't need to write this we are just rebuilding * don't need to write this we are just rebuilding
*/ */
continue; continue;
if (!conf->mirrors[i].rdev)
continue;
atomic_inc(&conf->mirrors[i].rdev->nr_pending); atomic_inc(&conf->mirrors[i].rdev->nr_pending);
r1_bio->write_bios[i] = bio; r1_bio->write_bios[i] = bio;
} }
...@@ -1009,9 +996,8 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) ...@@ -1009,9 +996,8 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
disk = conf->last_used; disk = conf->last_used;
/* make sure disk is operational */ /* make sure disk is operational */
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
while (!conf->mirrors[disk].operational || while (conf->mirrors[disk].rdev == NULL ||
conf->mirrors[disk].write_only || !conf->mirrors[disk].rdev->in_sync) {
!conf->mirrors[disk].rdev) {
if (disk <= 0) if (disk <= 0)
disk = conf->raid_disks; disk = conf->raid_disks;
disk--; disk--;
...@@ -1149,8 +1135,6 @@ static int run(mddev_t *mddev) ...@@ -1149,8 +1135,6 @@ static int run(mddev_t *mddev)
disk = conf->mirrors + disk_idx; disk = conf->mirrors + disk_idx;
disk->rdev = rdev; disk->rdev = rdev;
disk->operational = ! rdev->faulty;
disk->write_only = ! rdev->in_sync;
disk->head_position = 0; disk->head_position = 0;
if (!rdev->faulty && rdev->in_sync) if (!rdev->faulty && rdev->in_sync)
conf->working_disks++; conf->working_disks++;
...@@ -1174,8 +1158,6 @@ static int run(mddev_t *mddev) ...@@ -1174,8 +1158,6 @@ static int run(mddev_t *mddev)
disk = conf->mirrors + i; disk = conf->mirrors + i;
if (!disk->rdev) { if (!disk->rdev) {
disk->operational = 0;
disk->write_only = 0;
disk->head_position = 0; disk->head_position = 0;
mddev->degraded++; mddev->degraded++;
} }
...@@ -1186,8 +1168,8 @@ static int run(mddev_t *mddev) ...@@ -1186,8 +1168,8 @@ static int run(mddev_t *mddev)
* to read balancing. * to read balancing.
*/ */
for (j = 0; j < conf->raid_disks && for (j = 0; j < conf->raid_disks &&
(!conf->mirrors[j].operational || (!conf->mirrors[j].rdev ||
conf->mirrors[j].write_only) ; j++) !conf->mirrors[j].rdev->in_sync) ; j++)
/* nothing */; /* nothing */;
conf->last_used = j; conf->last_used = j;
......
This diff is collapsed.
...@@ -77,8 +77,7 @@ extern void md_wakeup_thread(mdk_thread_t *thread); ...@@ -77,8 +77,7 @@ extern void md_wakeup_thread(mdk_thread_t *thread);
extern void md_interrupt_thread (mdk_thread_t *thread); extern void md_interrupt_thread (mdk_thread_t *thread);
extern void md_done_sync(mddev_t *mddev, int blocks, int ok); extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
extern void md_sync_acct(mdk_rdev_t *rdev, unsigned long nr_sectors); extern void md_sync_acct(mdk_rdev_t *rdev, unsigned long nr_sectors);
extern int md_error (mddev_t *mddev, mdk_rdev_t *rdev); extern void md_error (mddev_t *mddev, mdk_rdev_t *rdev);
extern int md_run_setup(void);
extern void md_print_devices (void); extern void md_print_devices (void);
......
...@@ -154,6 +154,16 @@ struct mdk_rdev_s ...@@ -154,6 +154,16 @@ struct mdk_rdev_s
mdp_super_t *sb; mdp_super_t *sb;
unsigned long sb_offset; unsigned long sb_offset;
/* A device can be in one of three states based on two flags:
* Not working: faulty==1 in_sync==0
* Fully working: faulty==0 in_sync==1
* Working, but not
* in sync with array
* faulty==0 in_sync==0
*
* It can never have faulty==1, in_sync==1
* This reduces the burden of testing multiple flags in many cases
*/
int faulty; /* if faulty do not issue IO requests */ int faulty; /* if faulty do not issue IO requests */
int in_sync; /* device is a full member of the array */ int in_sync; /* device is a full member of the array */
...@@ -227,7 +237,10 @@ struct mdk_personality_s ...@@ -227,7 +237,10 @@ struct mdk_personality_s
int (*run)(mddev_t *mddev); int (*run)(mddev_t *mddev);
int (*stop)(mddev_t *mddev); int (*stop)(mddev_t *mddev);
int (*status)(char *page, mddev_t *mddev); int (*status)(char *page, mddev_t *mddev);
int (*error_handler)(mddev_t *mddev, mdk_rdev_t *rdev); /* error_handler must set ->faulty and clear ->in_sync
* if appropriate, and should abort recovery if needed
*/
void (*error_handler)(mddev_t *mddev, mdk_rdev_t *rdev);
int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev); int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev);
int (*hot_remove_disk) (mddev_t *mddev, int number); int (*hot_remove_disk) (mddev_t *mddev, int number);
int (*spare_active) (mddev_t *mddev); int (*spare_active) (mddev_t *mddev);
......
...@@ -6,11 +6,6 @@ ...@@ -6,11 +6,6 @@
struct multipath_info { struct multipath_info {
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
/*
* State bits:
*/
int operational;
}; };
struct multipath_private_data { struct multipath_private_data {
......
...@@ -8,12 +8,6 @@ typedef struct mirror_info mirror_info_t; ...@@ -8,12 +8,6 @@ typedef struct mirror_info mirror_info_t;
struct mirror_info { struct mirror_info {
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
sector_t head_position; sector_t head_position;
/*
* State bits:
*/
int operational;
int write_only;
}; };
typedef struct r1bio_s r1bio_t; typedef struct r1bio_s r1bio_t;
......
...@@ -148,6 +148,11 @@ struct stripe_head { ...@@ -148,6 +148,11 @@ struct stripe_head {
#define R5_UPTODATE 0 /* page contains current data */ #define R5_UPTODATE 0 /* page contains current data */
#define R5_LOCKED 1 /* IO has been submitted on "req" */ #define R5_LOCKED 1 /* IO has been submitted on "req" */
#define R5_OVERWRITE 2 /* towrite covers whole page */ #define R5_OVERWRITE 2 /* towrite covers whole page */
/* and some that are internal to handle_stripe */
#define R5_Insync 3 /* rdev && rdev->in_sync at start */
#define R5_Wantread 4 /* want to schedule a read */
#define R5_Wantwrite 5
#define R5_Syncio 6 /* this io need to be accounted as resync io */
/* /*
* Write method * Write method
...@@ -193,8 +198,6 @@ struct stripe_head { ...@@ -193,8 +198,6 @@ struct stripe_head {
struct disk_info { struct disk_info {
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
int operational;
int write_only;
}; };
struct raid5_private_data { struct raid5_private_data {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment