Commit 32c31806 authored by Neil Brown's avatar Neil Brown Committed by Linus Torvalds

[PATCH] md: assorted fixes/improvemnet to generic md resync code.

1/ Introduce "mddev->resync_max_sectors" so that an md personality
can ask for resync to cover a different address range than that of a
single drive.  raid10 will use this.

2/ fix is_mddev_idle so that if there seem to be a negative number
 of events, it doesn't immediately assume activity.

3/ make "sync_io" (the count of IO sectors used for array resync)
 an atomic_t to avoid SMP races. 

4/ Pass md_sync_acct a "block_device" rather than the containing "rdev",
  as the whole rdev isn't needed. Also make this an inline function.

5/ Make sure recovery gets interrupted on any error.
Signed-off-by: default avatarNeil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent b60e5e71
......@@ -1648,6 +1648,8 @@ static int do_md_run(mddev_t * mddev)
mddev->pers = pers[pnum];
spin_unlock(&pers_lock);
mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
err = mddev->pers->run(mddev);
if (err) {
printk(KERN_ERR "md: pers->run() failed ...\n");
......@@ -2953,6 +2955,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
if (!mddev->pers->error_handler)
return;
mddev->pers->error_handler(mddev,rdev);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
......@@ -2985,7 +2988,11 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
unsigned long max_blocks, resync, res, dt, db, rt;
resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2;
max_blocks = mddev->size;
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
max_blocks = mddev->resync_max_sectors >> 1;
else
max_blocks = mddev->size;
/*
* Should not happen.
......@@ -3221,11 +3228,6 @@ int unregister_md_personality(int pnum)
return 0;
}
void md_sync_acct(mdk_rdev_t *rdev, unsigned long nr_sectors)
{
rdev->bdev->bd_contains->bd_disk->sync_io += nr_sectors;
}
static int is_mddev_idle(mddev_t *mddev)
{
mdk_rdev_t * rdev;
......@@ -3238,8 +3240,12 @@ static int is_mddev_idle(mddev_t *mddev)
struct gendisk *disk = rdev->bdev->bd_contains->bd_disk;
curr_events = disk_stat_read(disk, read_sectors) +
disk_stat_read(disk, write_sectors) -
disk->sync_io;
if ((curr_events - rdev->last_events) > 32) {
atomic_read(&disk->sync_io);
/* Allow some slack between valud of curr_events and last_events,
* as there are some uninteresting races.
* Note: the following is an unsigned comparison.
*/
if ((curr_events - rdev->last_events + 32) > 64) {
rdev->last_events = curr_events;
idle = 0;
}
......@@ -3373,7 +3379,14 @@ static void md_do_sync(mddev_t *mddev)
}
} while (mddev->curr_resync < 2);
max_sectors = mddev->size << 1;
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
/* resync follows the size requested by the personality,
* which default to physical size, but can be virtual size
*/
max_sectors = mddev->resync_max_sectors;
else
/* recovery follows the physical size of devices */
max_sectors = mddev->size << 1;
printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev));
printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:"
......@@ -3796,7 +3809,6 @@ module_exit(md_exit)
EXPORT_SYMBOL(register_md_personality);
EXPORT_SYMBOL(unregister_md_personality);
EXPORT_SYMBOL(md_error);
EXPORT_SYMBOL(md_sync_acct);
EXPORT_SYMBOL(md_done_sync);
EXPORT_SYMBOL(md_write_start);
EXPORT_SYMBOL(md_write_end);
......
......@@ -903,7 +903,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
atomic_inc(&conf->mirrors[i].rdev->nr_pending);
atomic_inc(&r1_bio->remaining);
md_sync_acct(conf->mirrors[i].rdev, wbio->bi_size >> 9);
md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9);
generic_make_request(wbio);
}
......@@ -1143,7 +1143,7 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
bio = r1_bio->bios[disk];
r1_bio->sectors = nr_sectors;
md_sync_acct(mirror->rdev, nr_sectors);
md_sync_acct(mirror->rdev->bdev, nr_sectors);
generic_make_request(bio);
......
......@@ -1071,7 +1071,8 @@ static void handle_stripe(struct stripe_head *sh)
PRINTK("Reading block %d (sync=%d)\n",
i, syncing);
if (syncing)
md_sync_acct(conf->disks[i].rdev, STRIPE_SECTORS);
md_sync_acct(conf->disks[i].rdev->bdev,
STRIPE_SECTORS);
}
}
}
......@@ -1256,7 +1257,7 @@ static void handle_stripe(struct stripe_head *sh)
if (rdev) {
if (test_bit(R5_Syncio, &sh->dev[i].flags))
md_sync_acct(rdev, STRIPE_SECTORS);
md_sync_acct(rdev->bdev, STRIPE_SECTORS);
bi->bi_bdev = rdev->bdev;
PRINTK("for %llu schedule op %ld on disc %d\n",
......
......@@ -1208,7 +1208,8 @@ static void handle_stripe(struct stripe_head *sh)
PRINTK("Reading block %d (sync=%d)\n",
i, syncing);
if (syncing)
md_sync_acct(conf->disks[i].rdev, STRIPE_SECTORS);
md_sync_acct(conf->disks[i].rdev->bdev,
STRIPE_SECTORS);
}
}
}
......@@ -1418,7 +1419,7 @@ static void handle_stripe(struct stripe_head *sh)
if (rdev) {
if (test_bit(R5_Syncio, &sh->dev[i].flags))
md_sync_acct(rdev, STRIPE_SECTORS);
md_sync_acct(rdev->bdev, STRIPE_SECTORS);
bi->bi_bdev = rdev->bdev;
PRINTK("for %llu schedule op %ld on disc %d\n",
......
......@@ -100,7 +100,7 @@ struct gendisk {
struct timer_rand_state *random;
int policy;
unsigned sync_io; /* RAID */
atomic_t sync_io; /* RAID */
unsigned long stamp, stamp_idle;
int in_flight;
#ifdef CONFIG_SMP
......
......@@ -74,7 +74,6 @@ extern void md_write_start(mddev_t *mddev);
extern void md_write_end(mddev_t *mddev);
extern void md_handle_safemode(mddev_t *mddev);
extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
extern void md_sync_acct(mdk_rdev_t *rdev, unsigned long nr_sectors);
extern void md_error (mddev_t *mddev, mdk_rdev_t *rdev);
extern void md_unplug_mddev(mddev_t *mddev);
......
......@@ -216,6 +216,7 @@ struct mddev_s
unsigned long resync_mark; /* a recent timestamp */
sector_t resync_mark_cnt;/* blocks written at resync_mark */
sector_t resync_max_sectors; /* may be set by personality */
/* recovery/resync flags
* NEEDED: we might need to start a resync/recover
* RUNNING: a thread is running, or about to be started
......@@ -263,6 +264,11 @@ static inline void rdev_dec_pending(mdk_rdev_t *rdev, mddev_t *mddev)
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
}
static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sectors)
{
atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io);
}
struct mdk_personality_s
{
char *name;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment