Commit 86711d5e authored by Neil Brown's avatar Neil Brown Committed by Linus Torvalds

[PATCH] md 22 of 22 - Generalise md sync threads

Previously each raid personality (Well, 1 and 5) started their
own thread to do resync, but md.c had a single common thread to do
reconstruct.  Apart from being untidy, this means that you cannot
have two arrays reconstructing at the same time, though you can have
to array resyncing at the same time..

This patch changes the personalities so they don't start the resync,
but just leave a flag to say that it is needed.
The common thread (mdrecoveryd) now just monitors things and starts a
separate per-array thread whenever resync or recovery (or both) is
needed.
When the recovery finishes, mdrecoveryd will be woken up to re-lock
the device and activate the spares or whatever.

raid1 needs to know when resync/recovery starts and ends so it can
allocate and release resources.
It allocated when a resync request for stripe 0 is received.
Previously it deallocated for resync in it's own thread, and
deallocated for recovery when the spare is made active or inactive
(depending on success).

As raid1 doesn't own a thread anymore this needed to change.  So to
match the "alloc on 0", the md_do_resync now calls sync_request one
last time asking to sync one block past the end.  This is a signal to
release any resources.
parent 7444c718
...@@ -106,6 +106,7 @@ static ctl_table raid_root_table[] = { ...@@ -106,6 +106,7 @@ static ctl_table raid_root_table[] = {
* subsystems want to have a pre-defined structure * subsystems want to have a pre-defined structure
*/ */
struct hd_struct md_hd_struct[MAX_MD_DEVS]; struct hd_struct md_hd_struct[MAX_MD_DEVS];
static void md_recover_arrays(void);
static mdk_thread_t *md_recovery_thread; static mdk_thread_t *md_recovery_thread;
int md_size[MAX_MD_DEVS]; int md_size[MAX_MD_DEVS];
...@@ -1657,9 +1658,15 @@ static int do_md_run(mddev_t * mddev) ...@@ -1657,9 +1658,15 @@ static int do_md_run(mddev_t * mddev)
return -EINVAL; return -EINVAL;
} }
mddev->sb->state &= ~(1 << MD_SB_CLEAN); mddev->in_sync = (mddev->sb->state & (1<<MD_SB_CLEAN));
/* if personality doesn't have "sync_request", then
* a dirty array doesn't mean anything
*/
if (mddev->pers->sync_request)
mddev->sb->state &= ~(1 << MD_SB_CLEAN);
__md_update_sb(mddev); __md_update_sb(mddev);
md_recover_arrays();
/* /*
* md_size has units of 1K blocks, which are * md_size has units of 1K blocks, which are
* twice as large as sectors. * twice as large as sectors.
...@@ -1699,8 +1706,6 @@ static int restart_array(mddev_t *mddev) ...@@ -1699,8 +1706,6 @@ static int restart_array(mddev_t *mddev)
* Kick recovery or resync if necessary * Kick recovery or resync if necessary
*/ */
md_recover_arrays(); md_recover_arrays();
if (mddev->pers->restart_resync)
mddev->pers->restart_resync(mddev);
err = 0; err = 0;
} else { } else {
printk(KERN_ERR "md: md%d has no personality assigned.\n", printk(KERN_ERR "md: md%d has no personality assigned.\n",
...@@ -1717,11 +1722,9 @@ static int restart_array(mddev_t *mddev) ...@@ -1717,11 +1722,9 @@ static int restart_array(mddev_t *mddev)
#define STILL_IN_USE \ #define STILL_IN_USE \
"md: md%d still in use.\n" "md: md%d still in use.\n"
DECLARE_WAIT_QUEUE_HEAD(resync_wait);
static int do_md_stop(mddev_t * mddev, int ro) static int do_md_stop(mddev_t * mddev, int ro)
{ {
int err = 0, resync_interrupted = 0; int err = 0;
kdev_t dev = mddev_to_kdev(mddev); kdev_t dev = mddev_to_kdev(mddev);
if (atomic_read(&mddev->active)>1) { if (atomic_read(&mddev->active)>1) {
...@@ -1731,26 +1734,17 @@ static int do_md_stop(mddev_t * mddev, int ro) ...@@ -1731,26 +1734,17 @@ static int do_md_stop(mddev_t * mddev, int ro)
} }
if (mddev->pers) { if (mddev->pers) {
/* if (mddev->sync_thread) {
* It is safe to call stop here, it only frees private if (mddev->recovery_running > 0)
* data. Also, it tells us if a device is unstoppable mddev->recovery_running = -EINTR;
* (eg. resyncing is in progress) md_unregister_thread(mddev->sync_thread);
*/ mddev->sync_thread = NULL;
if (mddev->pers->stop_resync) if (mddev->spare) {
if (mddev->pers->stop_resync(mddev)) mddev->pers->diskop(mddev, &mddev->spare,
resync_interrupted = 1; DISKOP_SPARE_INACTIVE);
mddev->spare = NULL;
if (mddev->recovery_running==1) }
md_interrupt_thread(md_recovery_thread); }
/*
* This synchronizes with signal delivery to the
* resync or reconstruction thread. It also nicely
* hangs the process if some reconstruction has not
* finished.
*/
wait_event(resync_wait, mddev->recovery_running <= 0);
invalidate_device(dev, 1); invalidate_device(dev, 1);
...@@ -1776,7 +1770,7 @@ static int do_md_stop(mddev_t * mddev, int ro) ...@@ -1776,7 +1770,7 @@ static int do_md_stop(mddev_t * mddev, int ro)
* mark it clean only if there was no resync * mark it clean only if there was no resync
* interrupted. * interrupted.
*/ */
if (!mddev->recovery_running && !resync_interrupted) { if (mddev->in_sync) {
printk(KERN_INFO "md: marking sb clean...\n"); printk(KERN_INFO "md: marking sb clean...\n");
mddev->sb->state |= 1 << MD_SB_CLEAN; mddev->sb->state |= 1 << MD_SB_CLEAN;
} }
...@@ -2795,6 +2789,7 @@ int md_thread(void * arg) ...@@ -2795,6 +2789,7 @@ int md_thread(void * arg)
*/ */
daemonize(); daemonize();
reparent_to_init();
sprintf(current->comm, thread->name); sprintf(current->comm, thread->name);
current->exit_signal = SIGCHLD; current->exit_signal = SIGCHLD;
...@@ -2896,7 +2891,7 @@ void md_unregister_thread(mdk_thread_t *thread) ...@@ -2896,7 +2891,7 @@ void md_unregister_thread(mdk_thread_t *thread)
kfree(thread); kfree(thread);
} }
void md_recover_arrays(void) static void md_recover_arrays(void)
{ {
if (!md_recovery_thread) { if (!md_recovery_thread) {
MD_BUG(); MD_BUG();
...@@ -2931,10 +2926,8 @@ int md_error(mddev_t *mddev, struct block_device *bdev) ...@@ -2931,10 +2926,8 @@ int md_error(mddev_t *mddev, struct block_device *bdev)
/* /*
* if recovery was running, stop it now. * if recovery was running, stop it now.
*/ */
if (mddev->pers->stop_resync) if (mddev->recovery_running)
mddev->pers->stop_resync(mddev); mddev->recovery_running = -EIO;
if (mddev->recovery_running==1)
md_interrupt_thread(md_recovery_thread);
md_recover_arrays(); md_recover_arrays();
return 0; return 0;
...@@ -2992,18 +2985,9 @@ static int status_resync(char * page, mddev_t * mddev) ...@@ -2992,18 +2985,9 @@ static int status_resync(char * page, mddev_t * mddev)
sz += sprintf(page + sz, "."); sz += sprintf(page + sz, ".");
sz += sprintf(page + sz, "] "); sz += sprintf(page + sz, "] ");
} }
if (mddev->recovery_running==2) sz += sprintf(page + sz, " %s =%3lu.%lu%% (%lu/%lu)",
/* (mddev->spare ? "recovery" : "resync"),
* true resync res/10, res % 10, resync, max_blocks);
*/
sz += sprintf(page + sz, " resync =%3lu.%lu%% (%lu/%lu)",
res/10, res % 10, resync, max_blocks);
else
/*
* recovery ...
*/
sz += sprintf(page + sz, " recovery =%3lu.%lu%% (%lu/%lu)",
res/10, res % 10, resync, max_blocks);
/* /*
* We do not want to overflow, so the order of operands and * We do not want to overflow, so the order of operands and
...@@ -3078,12 +3062,11 @@ static int md_status_read_proc(char *page, char **start, off_t off, ...@@ -3078,12 +3062,11 @@ static int md_status_read_proc(char *page, char **start, off_t off,
sz += mddev->pers->status (page+sz, mddev); sz += mddev->pers->status (page+sz, mddev);
sz += sprintf(page+sz, "\n "); sz += sprintf(page+sz, "\n ");
if (mddev->curr_resync) { if (mddev->curr_resync > 1)
sz += status_resync (page+sz, mddev); sz += status_resync (page+sz, mddev);
} else { else if (mddev->curr_resync == 1)
if (mddev->recovery_running < 0)
sz += sprintf(page + sz, " resync=DELAYED"); sz += sprintf(page + sz, " resync=DELAYED");
}
sz += sprintf(page + sz, "\n"); sz += sprintf(page + sz, "\n");
mddev_unlock(mddev); mddev_unlock(mddev);
} }
...@@ -3192,14 +3175,20 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok) ...@@ -3192,14 +3175,20 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
atomic_sub(blocks, &mddev->recovery_active); atomic_sub(blocks, &mddev->recovery_active);
wake_up(&mddev->recovery_wait); wake_up(&mddev->recovery_wait);
if (!ok) { if (!ok) {
mddev->recovery_running = -EIO;
md_recover_arrays();
// stop recovery, signal do_sync .... // stop recovery, signal do_sync ....
} }
} }
DECLARE_WAIT_QUEUE_HEAD(resync_wait);
#define SYNC_MARKS 10 #define SYNC_MARKS 10
#define SYNC_MARK_STEP (3*HZ) #define SYNC_MARK_STEP (3*HZ)
int md_do_sync(mddev_t *mddev, mdp_disk_t *spare) static void md_do_sync(void *data)
{ {
mddev_t *mddev = data;
mddev_t *mddev2; mddev_t *mddev2;
unsigned int max_sectors, currspeed = 0, unsigned int max_sectors, currspeed = 0,
j, window, err; j, window, err;
...@@ -3209,6 +3198,9 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare) ...@@ -3209,6 +3198,9 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare)
struct list_head *tmp; struct list_head *tmp;
unsigned long last_check; unsigned long last_check;
/* just incase thread restarts... */
if (mddev->recovery_running <= 0)
return;
/* we overload curr_resync somewhat here. /* we overload curr_resync somewhat here.
* 0 == not engaged in resync at all * 0 == not engaged in resync at all
...@@ -3304,7 +3296,6 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare) ...@@ -3304,7 +3296,6 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare)
/* /*
* got a signal, exit. * got a signal, exit.
*/ */
mddev->curr_resync = 0;
printk(KERN_INFO "md: md_do_sync() got signal ... exiting\n"); printk(KERN_INFO "md: md_do_sync() got signal ... exiting\n");
flush_curr_signals(); flush_curr_signals();
err = -EINTR; err = -EINTR;
...@@ -3339,98 +3330,112 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare) ...@@ -3339,98 +3330,112 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare)
*/ */
out: out:
wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
/* tell personality that we are finished */
mddev->pers->sync_request(mddev, max_sectors, 1);
mddev->curr_resync = 0; mddev->curr_resync = 0;
mddev->recovery_running = err; if (err)
wake_up(&resync_wait); mddev->recovery_running = err;
return err; if (mddev->recovery_running > 0)
mddev->recovery_running = 0;
if (mddev->recovery_running == 0)
mddev->in_sync = 1;
md_recover_arrays();
} }
/* /*
* This is a kernel thread which syncs a spare disk with the active array * This is the kernel thread that watches all md arrays for re-sync action
* * that might be needed.
* the amount of foolproofing might seem to be a tad excessive, but an * It does not do any resync itself, but rather "forks" off other threads
* early (not so error-safe) version of raid1syncd synced the first 0.5 gigs * to do that as needed.
* of my root partition with the first 0.5 gigs of my /home partition ... so * When it is determined that resync is needed, we set "->recovery_running" and
* i'm a bit nervous ;) * create a thread at ->sync_thread.
* When the thread finishes is clears recovery_running (or set and error)
* and wakeup up this thread which will reap the thread and finish up.
*/ */
void md_do_recovery(void *data) void md_do_recovery(void *data)
{ {
int err;
mddev_t *mddev; mddev_t *mddev;
mdp_super_t *sb; mdp_super_t *sb;
mdp_disk_t *spare;
struct list_head *tmp; struct list_head *tmp;
dprintk(KERN_INFO "md: recovery thread got woken up ...\n"); dprintk(KERN_INFO "md: recovery thread got woken up ...\n");
ITERATE_MDDEV(mddev,tmp) if (mddev_lock(mddev)==0) { ITERATE_MDDEV(mddev,tmp) if (mddev_lock(mddev)==0) {
sb = mddev->sb; sb = mddev->sb;
if (!sb) if (!sb || !mddev->pers || !mddev->pers->diskop || mddev->ro)
goto unlock; goto unlock;
if (mddev->recovery_running) if (mddev->recovery_running > 0)
/* resync/recovery still happening */
goto unlock; goto unlock;
if (sb->active_disks == sb->raid_disks) if (mddev->sync_thread) {
goto unlock; /* resync has finished, collect result */
if (!sb->spare_disks) { md_unregister_thread(mddev->sync_thread);
printk(KERN_ERR "md%d: no spare disk to reconstruct array! " mddev->sync_thread = NULL;
"-- continuing in degraded mode\n", mdidx(mddev)); if (mddev->recovery_running < 0) {
/* some sort of failure.
* If we were doing a reconstruction,
* we need to retrieve the spare
*/
if (mddev->spare) {
mddev->pers->diskop(mddev, &mddev->spare,
DISKOP_SPARE_INACTIVE);
mddev->spare = NULL;
}
} else {
/* success...*/
if (mddev->spare) {
mddev->pers->diskop(mddev, &mddev->spare,
DISKOP_SPARE_ACTIVE);
mark_disk_sync(mddev->spare);
mark_disk_active(mddev->spare);
sb->active_disks++;
sb->spare_disks--;
mddev->spare = NULL;
}
}
__md_update_sb(mddev);
mddev->recovery_running = 0;
wake_up(&resync_wait);
goto unlock; goto unlock;
} }
/* if (mddev->recovery_running) {
* now here we get the spare and resync it. /* that's odd.. */
*/ mddev->recovery_running = 0;
spare = get_spare(mddev); wake_up(&resync_wait);
if (!spare) }
goto unlock;
printk(KERN_INFO "md%d: resyncing spare disk %s to replace failed disk\n", if (sb->active_disks < sb->raid_disks) {
mdidx(mddev), partition_name(mk_kdev(spare->major,spare->minor))); mddev->spare = get_spare(mddev);
if (!mddev->pers->diskop) if (!mddev->spare)
goto unlock; printk(KERN_ERR "md%d: no spare disk to reconstruct array! "
if (mddev->pers->diskop(mddev, &spare, DISKOP_SPARE_WRITE)) "-- continuing in degraded mode\n", mdidx(mddev));
goto unlock; else
mddev->recovery_running = 1; printk(KERN_INFO "md%d: resyncing spare disk %s to replace failed disk\n",
mddev_unlock(mddev); mdidx(mddev), partition_name(mk_kdev(mddev->spare->major,mddev->spare->minor)));
err = md_do_sync(mddev, spare); }
mddev_lock(mddev); /* FIXME this can fail or deadlock with do_md_close */ if (!mddev->spare && mddev->in_sync) {
if (err == -EIO) { /* nothing we can do ... */
printk(KERN_INFO "md%d: spare disk %s failed, skipping to next spare.\n",
mdidx(mddev), partition_name(mk_kdev(spare->major,spare->minor)));
if (!disk_faulty(spare)) {
mddev->pers->diskop(mddev,&spare,DISKOP_SPARE_INACTIVE);
mark_disk_faulty(spare);
mark_disk_nonsync(spare);
mark_disk_inactive(spare);
sb->spare_disks--;
sb->working_disks--;
sb->failed_disks++;
}
} else
if (disk_faulty(spare))
mddev->pers->diskop(mddev, &spare,
DISKOP_SPARE_INACTIVE);
if (err == -EINTR || err == -ENOMEM) {
/*
* Recovery got interrupted, or ran out of mem ...
* signal back that we have finished using the array.
*/
mddev->pers->diskop(mddev, &spare,
DISKOP_SPARE_INACTIVE);
goto unlock; goto unlock;
} }
if (!disk_faulty(spare)) { if (mddev->pers->sync_request) {
/* mddev->sync_thread = md_register_thread(md_do_sync,
* the SPARE_ACTIVE diskop possibly changes the mddev,
* pointer too "md_resync");
*/ if (!mddev->sync_thread) {
mddev->pers->diskop(mddev, &spare, DISKOP_SPARE_ACTIVE); printk(KERN_ERR "md%d: could not start resync thread...\n", mdidx(mddev));
mark_disk_sync(spare); if (mddev->spare)
mark_disk_active(spare); mddev->pers->diskop(mddev, &mddev->spare, DISKOP_SPARE_INACTIVE);
sb->active_disks++; mddev->spare = NULL;
sb->spare_disks--; mddev->recovery_running = 0;
} else {
if (mddev->spare)
mddev->pers->diskop(mddev, &mddev->spare, DISKOP_SPARE_WRITE);
mddev->recovery_running = 1;
md_wakeup_thread(mddev->sync_thread);
}
} }
__md_update_sb(mddev);
mddev->recovery_running = 0;
unlock: unlock:
mddev_unlock(mddev); mddev_unlock(mddev);
} }
...@@ -3900,10 +3905,8 @@ EXPORT_SYMBOL(register_md_personality); ...@@ -3900,10 +3905,8 @@ EXPORT_SYMBOL(register_md_personality);
EXPORT_SYMBOL(unregister_md_personality); EXPORT_SYMBOL(unregister_md_personality);
EXPORT_SYMBOL(partition_name); EXPORT_SYMBOL(partition_name);
EXPORT_SYMBOL(md_error); EXPORT_SYMBOL(md_error);
EXPORT_SYMBOL(md_do_sync);
EXPORT_SYMBOL(md_sync_acct); EXPORT_SYMBOL(md_sync_acct);
EXPORT_SYMBOL(md_done_sync); EXPORT_SYMBOL(md_done_sync);
EXPORT_SYMBOL(md_recover_arrays);
EXPORT_SYMBOL(md_register_thread); EXPORT_SYMBOL(md_register_thread);
EXPORT_SYMBOL(md_unregister_thread); EXPORT_SYMBOL(md_unregister_thread);
EXPORT_SYMBOL(md_update_sb); EXPORT_SYMBOL(md_update_sb);
......
...@@ -333,7 +333,7 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio) ...@@ -333,7 +333,7 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio)
* device if no resync is going on, or below the resync window. * device if no resync is going on, or below the resync window.
* We take the first readable disk when above the resync window. * We take the first readable disk when above the resync window.
*/ */
if (conf->resync_mirrors && (this_sector + sectors >= conf->next_resync)) { if (!conf->mddev->in_sync && (this_sector + sectors >= conf->next_resync)) {
/* make sure that disk is operational */ /* make sure that disk is operational */
new_disk = 0; new_disk = 0;
while (!conf->mirrors[new_disk].operational || conf->mirrors[new_disk].write_only) { while (!conf->mirrors[new_disk].operational || conf->mirrors[new_disk].write_only) {
...@@ -652,6 +652,9 @@ static void close_sync(conf_t *conf) ...@@ -652,6 +652,9 @@ static void close_sync(conf_t *conf)
if (conf->barrier) BUG(); if (conf->barrier) BUG();
if (waitqueue_active(&conf->wait_idle)) BUG(); if (waitqueue_active(&conf->wait_idle)) BUG();
if (waitqueue_active(&conf->wait_resume)) BUG(); if (waitqueue_active(&conf->wait_resume)) BUG();
mempool_destroy(conf->r1buf_pool);
conf->r1buf_pool = NULL;
} }
static int diskop(mddev_t *mddev, mdp_disk_t **d, int state) static int diskop(mddev_t *mddev, mdp_disk_t **d, int state)
...@@ -768,7 +771,6 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state) ...@@ -768,7 +771,6 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state)
* Deactivate a spare disk: * Deactivate a spare disk:
*/ */
case DISKOP_SPARE_INACTIVE: case DISKOP_SPARE_INACTIVE:
close_sync(conf);
sdisk = conf->mirrors + spare_disk; sdisk = conf->mirrors + spare_disk;
sdisk->operational = 0; sdisk->operational = 0;
sdisk->write_only = 0; sdisk->write_only = 0;
...@@ -781,7 +783,6 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state) ...@@ -781,7 +783,6 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state)
* property) * property)
*/ */
case DISKOP_SPARE_ACTIVE: case DISKOP_SPARE_ACTIVE:
close_sync(conf);
sdisk = conf->mirrors + spare_disk; sdisk = conf->mirrors + spare_disk;
fdisk = conf->mirrors + failed_disk; fdisk = conf->mirrors + failed_disk;
...@@ -915,10 +916,6 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state) ...@@ -915,10 +916,6 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state)
} }
abort: abort:
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
if (state == DISKOP_SPARE_ACTIVE || state == DISKOP_SPARE_INACTIVE) {
mempool_destroy(conf->r1buf_pool);
conf->r1buf_pool = NULL;
}
print_conf(conf); print_conf(conf);
return err; return err;
...@@ -1008,7 +1005,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) ...@@ -1008,7 +1005,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
* we read from here, no need to write * we read from here, no need to write
*/ */
continue; continue;
if (i < conf->raid_disks && !conf->resync_mirrors) if (i < conf->raid_disks && mddev->in_sync)
/* /*
* don't need to write this we are just rebuilding * don't need to write this we are just rebuilding
*/ */
...@@ -1113,29 +1110,6 @@ static void raid1d(void *data) ...@@ -1113,29 +1110,6 @@ static void raid1d(void *data)
spin_unlock_irqrestore(&retry_list_lock, flags); spin_unlock_irqrestore(&retry_list_lock, flags);
} }
/*
* Private kernel thread to reconstruct mirrors after an unclean
* shutdown.
*/
static void raid1syncd(void *data)
{
conf_t *conf = data;
mddev_t *mddev = conf->mddev;
if (!conf->resync_mirrors)
return;
if (mddev->recovery_running != 2)
return;
if (!md_do_sync(mddev, NULL)) {
/*
* Only if everything went Ok.
*/
conf->resync_mirrors = 0;
}
close_sync(conf);
}
static int init_resync(conf_t *conf) static int init_resync(conf_t *conf)
{ {
...@@ -1170,9 +1144,16 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) ...@@ -1170,9 +1144,16 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
sector_t max_sector, nr_sectors; sector_t max_sector, nr_sectors;
int disk, partial; int disk, partial;
if (!sector_nr) if (sector_nr == 0)
if (init_resync(conf)) if (init_resync(conf))
return -ENOMEM; return -ENOMEM;
max_sector = mddev->sb->size << 1;
if (sector_nr >= max_sector) {
close_sync(conf);
return 0;
}
/* /*
* If there is non-resync activity waiting for us then * If there is non-resync activity waiting for us then
* put in a delay to throttle resync. * put in a delay to throttle resync.
...@@ -1209,10 +1190,6 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) ...@@ -1209,10 +1190,6 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
r1_bio->sector = sector_nr; r1_bio->sector = sector_nr;
r1_bio->cmd = SPECIAL; r1_bio->cmd = SPECIAL;
max_sector = mddev->sb->size << 1;
if (sector_nr >= max_sector)
BUG();
bio = r1_bio->master_bio; bio = r1_bio->master_bio;
nr_sectors = RESYNC_BLOCK_SIZE >> 9; nr_sectors = RESYNC_BLOCK_SIZE >> 9;
if (max_sector - sector_nr < nr_sectors) if (max_sector - sector_nr < nr_sectors)
...@@ -1295,7 +1272,6 @@ static int run(mddev_t *mddev) ...@@ -1295,7 +1272,6 @@ static int run(mddev_t *mddev)
mdp_disk_t *descriptor; mdp_disk_t *descriptor;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct list_head *tmp; struct list_head *tmp;
int start_recovery = 0;
MOD_INC_USE_COUNT; MOD_INC_USE_COUNT;
...@@ -1447,10 +1423,6 @@ static int run(mddev_t *mddev) ...@@ -1447,10 +1423,6 @@ static int run(mddev_t *mddev)
conf->last_used = j; conf->last_used = j;
if (conf->working_disks != sb->raid_disks) {
printk(KERN_ALERT "raid1: md%d, not all disks are operational -- trying to recover array\n", mdidx(mddev));
start_recovery = 1;
}
{ {
const char * name = "raid1d"; const char * name = "raid1d";
...@@ -1462,21 +1434,6 @@ static int run(mddev_t *mddev) ...@@ -1462,21 +1434,6 @@ static int run(mddev_t *mddev)
} }
} }
if (!start_recovery && !(sb->state & (1 << MD_SB_CLEAN)) &&
(conf->working_disks > 1)) {
const char * name = "raid1syncd";
conf->resync_thread = md_register_thread(raid1syncd, conf, name);
if (!conf->resync_thread) {
printk(THREAD_ERROR, mdidx(mddev));
goto out_free_conf;
}
printk(START_RESYNC, mdidx(mddev));
conf->resync_mirrors = 1;
mddev->recovery_running = 2;
md_wakeup_thread(conf->resync_thread);
}
/* /*
* Regenerate the "device is in sync with the raid set" bit for * Regenerate the "device is in sync with the raid set" bit for
...@@ -1493,10 +1450,6 @@ static int run(mddev_t *mddev) ...@@ -1493,10 +1450,6 @@ static int run(mddev_t *mddev)
} }
sb->active_disks = conf->working_disks; sb->active_disks = conf->working_disks;
if (start_recovery)
md_recover_arrays();
printk(ARRAY_IS_ACTIVE, mdidx(mddev), sb->active_disks, sb->raid_disks); printk(ARRAY_IS_ACTIVE, mdidx(mddev), sb->active_disks, sb->raid_disks);
/* /*
* Ok, everything is just fine now * Ok, everything is just fine now
...@@ -1516,46 +1469,12 @@ static int run(mddev_t *mddev) ...@@ -1516,46 +1469,12 @@ static int run(mddev_t *mddev)
return -EIO; return -EIO;
} }
static int stop_resync(mddev_t *mddev)
{
conf_t *conf = mddev_to_conf(mddev);
if (conf->resync_thread) {
if (conf->resync_mirrors) {
md_interrupt_thread(conf->resync_thread);
printk(KERN_INFO "raid1: mirror resync was not fully finished, restarting next time.\n");
return 1;
}
return 0;
}
return 0;
}
static int restart_resync(mddev_t *mddev)
{
conf_t *conf = mddev_to_conf(mddev);
if (conf->resync_mirrors) {
if (!conf->resync_thread) {
MD_BUG();
return 0;
}
mddev->recovery_running = 2;
md_wakeup_thread(conf->resync_thread);
return 1;
}
return 0;
}
static int stop(mddev_t *mddev) static int stop(mddev_t *mddev)
{ {
conf_t *conf = mddev_to_conf(mddev); conf_t *conf = mddev_to_conf(mddev);
int i; int i;
md_unregister_thread(conf->thread); md_unregister_thread(conf->thread);
if (conf->resync_thread)
md_unregister_thread(conf->resync_thread);
if (conf->r1bio_pool) if (conf->r1bio_pool)
mempool_destroy(conf->r1bio_pool); mempool_destroy(conf->r1bio_pool);
for (i = 0; i < MD_SB_DISKS; i++) for (i = 0; i < MD_SB_DISKS; i++)
...@@ -1576,8 +1495,6 @@ static mdk_personality_t raid1_personality = ...@@ -1576,8 +1495,6 @@ static mdk_personality_t raid1_personality =
status: status, status: status,
error_handler: error, error_handler: error,
diskop: diskop, diskop: diskop,
stop_resync: stop_resync,
restart_resync: restart_resync,
sync_request: sync_request sync_request: sync_request
}; };
......
...@@ -1037,7 +1037,7 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -1037,7 +1037,7 @@ static void handle_stripe(struct stripe_head *sh)
) && ) &&
!test_bit(R5_UPTODATE, &dev->flags)) { !test_bit(R5_UPTODATE, &dev->flags)) {
if (conf->disks[i].operational if (conf->disks[i].operational
/* && !(conf->resync_parity && i == sh->pd_idx) */ /* && !(!mddev->insync && i == sh->pd_idx) */
) )
rmw++; rmw++;
else rmw += 2*disks; /* cannot read it */ else rmw += 2*disks; /* cannot read it */
...@@ -1303,6 +1303,10 @@ static int sync_request (mddev_t *mddev, sector_t sector_nr, int go_faster) ...@@ -1303,6 +1303,10 @@ static int sync_request (mddev_t *mddev, sector_t sector_nr, int go_faster)
int raid_disks = conf->raid_disks; int raid_disks = conf->raid_disks;
int data_disks = raid_disks-1; int data_disks = raid_disks-1;
if (sector_nr >= mddev->sb->size <<1)
/* just being told to finish up .. nothing to do */
return 0;
first_sector = raid5_compute_sector(stripe*data_disks*sectors_per_chunk first_sector = raid5_compute_sector(stripe*data_disks*sectors_per_chunk
+ chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf); + chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf);
sh = get_active_stripe(conf, sector_nr, pd_idx, 0); sh = get_active_stripe(conf, sector_nr, pd_idx, 0);
...@@ -1372,28 +1376,6 @@ static void raid5d (void *data) ...@@ -1372,28 +1376,6 @@ static void raid5d (void *data)
PRINTK("--- raid5d inactive\n"); PRINTK("--- raid5d inactive\n");
} }
/*
* Private kernel thread for parity reconstruction after an unclean
* shutdown. Reconstruction on spare drives in case of a failed drive
* is done by the generic mdsyncd.
*/
static void raid5syncd (void *data)
{
raid5_conf_t *conf = data;
mddev_t *mddev = conf->mddev;
if (!conf->resync_parity)
return;
if (mddev->recovery_running != 2)
return;
if (md_do_sync(mddev,NULL)) {
printk("raid5: resync aborted!\n");
return;
}
conf->resync_parity = 0;
printk("raid5: resync finished.\n");
}
static int run (mddev_t *mddev) static int run (mddev_t *mddev)
{ {
raid5_conf_t *conf; raid5_conf_t *conf;
...@@ -1403,7 +1385,6 @@ static int run (mddev_t *mddev) ...@@ -1403,7 +1385,6 @@ static int run (mddev_t *mddev)
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct disk_info *disk; struct disk_info *disk;
struct list_head *tmp; struct list_head *tmp;
int start_recovery = 0;
MOD_INC_USE_COUNT; MOD_INC_USE_COUNT;
...@@ -1555,9 +1536,10 @@ static int run (mddev_t *mddev) ...@@ -1555,9 +1536,10 @@ static int run (mddev_t *mddev)
goto abort; goto abort;
} }
if (conf->working_disks != sb->raid_disks) { if (conf->failed_disks == 1 &&
printk(KERN_ALERT "raid5: md%d, not all disks are operational -- trying to recover array\n", mdidx(mddev)); !(sb->state & (1<<MD_SB_CLEAN))) {
start_recovery = 1; printk(KERN_ERR "raid5: cannot start dirty degraded array for md%d\n", mdidx(mddev));
goto abort;
} }
{ {
...@@ -1575,6 +1557,7 @@ static int run (mddev_t *mddev) ...@@ -1575,6 +1557,7 @@ static int run (mddev_t *mddev)
if (grow_stripes(conf, conf->max_nr_stripes)) { if (grow_stripes(conf, conf->max_nr_stripes)) {
printk(KERN_ERR "raid5: couldn't allocate %dkB for buffers\n", memory); printk(KERN_ERR "raid5: couldn't allocate %dkB for buffers\n", memory);
shrink_stripes(conf); shrink_stripes(conf);
md_unregister_thread(conf->thread);
goto abort; goto abort;
} else } else
printk(KERN_INFO "raid5: allocated %dkB for md%d\n", memory, mdidx(mddev)); printk(KERN_INFO "raid5: allocated %dkB for md%d\n", memory, mdidx(mddev));
...@@ -1599,24 +1582,6 @@ static int run (mddev_t *mddev) ...@@ -1599,24 +1582,6 @@ static int run (mddev_t *mddev)
else else
printk(KERN_ALERT "raid5: raid level %d set md%d active with %d out of %d devices, algorithm %d\n", conf->level, mdidx(mddev), sb->active_disks, sb->raid_disks, conf->algorithm); printk(KERN_ALERT "raid5: raid level %d set md%d active with %d out of %d devices, algorithm %d\n", conf->level, mdidx(mddev), sb->active_disks, sb->raid_disks, conf->algorithm);
if (!start_recovery && !(sb->state & (1 << MD_SB_CLEAN))) {
const char * name = "raid5syncd";
conf->resync_thread = md_register_thread(raid5syncd, conf,name);
if (!conf->resync_thread) {
printk(KERN_ERR "raid5: couldn't allocate thread for md%d\n", mdidx(mddev));
goto abort;
}
printk("raid5: raid set md%d not clean; reconstructing parity\n", mdidx(mddev));
conf->resync_parity = 1;
mddev->recovery_running = 2;
md_wakeup_thread(conf->resync_thread);
}
print_raid5_conf(conf);
if (start_recovery)
md_recover_arrays();
print_raid5_conf(conf); print_raid5_conf(conf);
/* Ok, everything is just fine now */ /* Ok, everything is just fine now */
...@@ -1635,47 +1600,12 @@ static int run (mddev_t *mddev) ...@@ -1635,47 +1600,12 @@ static int run (mddev_t *mddev)
return -EIO; return -EIO;
} }
static int stop_resync (mddev_t *mddev)
{
raid5_conf_t *conf = mddev_to_conf(mddev);
mdk_thread_t *thread = conf->resync_thread;
if (thread) {
if (conf->resync_parity) {
md_interrupt_thread(thread);
printk(KERN_INFO "raid5: parity resync was not fully finished, restarting next time.\n");
return 1;
}
return 0;
}
return 0;
}
static int restart_resync (mddev_t *mddev)
{
raid5_conf_t *conf = mddev_to_conf(mddev);
if (conf->resync_parity) {
if (!conf->resync_thread) {
MD_BUG();
return 0;
}
printk("raid5: waking up raid5resync.\n");
mddev->recovery_running = 2;
md_wakeup_thread(conf->resync_thread);
return 1;
} else
printk("raid5: no restart-resync needed.\n");
return 0;
}
static int stop (mddev_t *mddev) static int stop (mddev_t *mddev)
{ {
raid5_conf_t *conf = (raid5_conf_t *) mddev->private; raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
if (conf->resync_thread)
md_unregister_thread(conf->resync_thread);
md_unregister_thread(conf->thread); md_unregister_thread(conf->thread);
shrink_stripes(conf); shrink_stripes(conf);
free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER); free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER);
...@@ -2050,8 +1980,6 @@ static mdk_personality_t raid5_personality= ...@@ -2050,8 +1980,6 @@ static mdk_personality_t raid5_personality=
status: status, status: status,
error_handler: error, error_handler: error,
diskop: diskop, diskop: diskop,
stop_resync: stop_resync,
restart_resync: restart_resync,
sync_request: sync_request sync_request: sync_request
}; };
......
...@@ -76,12 +76,8 @@ extern void md_unregister_thread (mdk_thread_t *thread); ...@@ -76,12 +76,8 @@ extern void md_unregister_thread (mdk_thread_t *thread);
extern void md_wakeup_thread(mdk_thread_t *thread); extern void md_wakeup_thread(mdk_thread_t *thread);
extern void md_interrupt_thread (mdk_thread_t *thread); extern void md_interrupt_thread (mdk_thread_t *thread);
extern void md_update_sb (mddev_t *mddev); extern void md_update_sb (mddev_t *mddev);
extern int md_do_sync(mddev_t *mddev, mdp_disk_t *spare);
extern void md_done_sync(mddev_t *mddev, int blocks, int ok); extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
extern void md_sync_acct(kdev_t dev, unsigned long nr_sectors); extern void md_sync_acct(kdev_t dev, unsigned long nr_sectors);
extern void md_recover_arrays (void);
extern int md_notify_reboot(struct notifier_block *this,
unsigned long code, void *x);
extern int md_error (mddev_t *mddev, struct block_device *bdev); extern int md_error (mddev_t *mddev, struct block_device *bdev);
extern int md_run_setup(void); extern int md_run_setup(void);
......
...@@ -181,6 +181,8 @@ struct mddev_s ...@@ -181,6 +181,8 @@ struct mddev_s
struct list_head disks; struct list_head disks;
int sb_dirty; int sb_dirty;
int ro; int ro;
struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */
unsigned long curr_resync; /* blocks scheduled */ unsigned long curr_resync; /* blocks scheduled */
unsigned long resync_mark; /* a recent timestamp */ unsigned long resync_mark; /* a recent timestamp */
unsigned long resync_mark_cnt;/* blocks written at resync_mark */ unsigned long resync_mark_cnt;/* blocks written at resync_mark */
...@@ -191,8 +193,10 @@ struct mddev_s ...@@ -191,8 +193,10 @@ struct mddev_s
* it can only be set > 0 under reconfig_sem * it can only be set > 0 under reconfig_sem
*/ */
int recovery_running; int recovery_running;
int in_sync; /* know to not need resync */
struct semaphore reconfig_sem; struct semaphore reconfig_sem;
atomic_t active; atomic_t active;
mdp_disk_t *spare;
atomic_t recovery_active; /* blocks scheduled, but not written */ atomic_t recovery_active; /* blocks scheduled, but not written */
wait_queue_head_t recovery_wait; wait_queue_head_t recovery_wait;
...@@ -222,9 +226,6 @@ struct mdk_personality_s ...@@ -222,9 +226,6 @@ struct mdk_personality_s
* SPARE_ACTIVE expects such a change) * SPARE_ACTIVE expects such a change)
*/ */
int (*diskop) (mddev_t *mddev, mdp_disk_t **descriptor, int state); int (*diskop) (mddev_t *mddev, mdp_disk_t **descriptor, int state);
int (*stop_resync)(mddev_t *mddev);
int (*restart_resync)(mddev_t *mddev);
int (*sync_request)(mddev_t *mddev, sector_t sector_nr, int go_faster); int (*sync_request)(mddev_t *mddev, sector_t sector_nr, int go_faster);
}; };
......
...@@ -33,8 +33,7 @@ struct r1_private_data_s { ...@@ -33,8 +33,7 @@ struct r1_private_data_s {
int working_disks; int working_disks;
int last_used; int last_used;
sector_t next_seq_sect; sector_t next_seq_sect;
mdk_thread_t *thread, *resync_thread; mdk_thread_t *thread;
int resync_mirrors;
mirror_info_t *spare; mirror_info_t *spare;
spinlock_t device_lock; spinlock_t device_lock;
......
...@@ -204,12 +204,11 @@ struct disk_info { ...@@ -204,12 +204,11 @@ struct disk_info {
struct raid5_private_data { struct raid5_private_data {
struct stripe_head **stripe_hashtbl; struct stripe_head **stripe_hashtbl;
mddev_t *mddev; mddev_t *mddev;
mdk_thread_t *thread, *resync_thread; mdk_thread_t *thread;
struct disk_info disks[MD_SB_DISKS]; struct disk_info disks[MD_SB_DISKS];
struct disk_info *spare; struct disk_info *spare;
int chunk_size, level, algorithm; int chunk_size, level, algorithm;
int raid_disks, working_disks, failed_disks; int raid_disks, working_disks, failed_disks;
int resync_parity;
int max_nr_stripes; int max_nr_stripes;
struct list_head handle_list; /* stripes needing handling */ struct list_head handle_list; /* stripes needing handling */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment