Commit 5077fef0 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] md: Dynamically limit size of bio requests used for raid1 resync

From: NeilBrown <neilb@cse.unsw.edu.au>

Currently raid1 uses PAGE_SIZE read/write requests for resync, as it doesn't
know how to honour per-device restrictions.  This patch uses to bio_add_page
to honour those restrictions and ups the limit on request size to 64K.  This
has a measurable impact on rebuild speed (25M/s -> 60M/s)
parent 89654f5b
...@@ -56,8 +56,8 @@ static void r1bio_pool_free(void *r1_bio, void *data) ...@@ -56,8 +56,8 @@ static void r1bio_pool_free(void *r1_bio, void *data)
kfree(r1_bio); kfree(r1_bio);
} }
//#define RESYNC_BLOCK_SIZE (64*1024) #define RESYNC_BLOCK_SIZE (64*1024)
#define RESYNC_BLOCK_SIZE PAGE_SIZE //#define RESYNC_BLOCK_SIZE PAGE_SIZE
#define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9) #define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
#define RESYNC_WINDOW (2048*1024) #define RESYNC_WINDOW (2048*1024)
...@@ -73,38 +73,39 @@ static void * r1buf_pool_alloc(int gfp_flags, void *data) ...@@ -73,38 +73,39 @@ static void * r1buf_pool_alloc(int gfp_flags, void *data)
r1_bio = r1bio_pool_alloc(gfp_flags, conf->mddev); r1_bio = r1bio_pool_alloc(gfp_flags, conf->mddev);
if (!r1_bio) if (!r1_bio)
return NULL; return NULL;
/*
* Allocate bios : 1 for reading, n-1 for writing
*/
for (j = conf->raid_disks ; j-- ; ) {
bio = bio_alloc(gfp_flags, RESYNC_PAGES); bio = bio_alloc(gfp_flags, RESYNC_PAGES);
if (!bio) if (!bio)
goto out_free_r1_bio; goto out_free_bio;
r1_bio->bios[j] = bio;
}
/* /*
* Allocate RESYNC_PAGES data pages for this iovec. * Allocate RESYNC_PAGES data pages and attach them to
* the first bio;
*/ */
bio = r1_bio->bios[0];
for (i = 0; i < RESYNC_PAGES; i++) { for (i = 0; i < RESYNC_PAGES; i++) {
page = alloc_page(gfp_flags); page = alloc_page(gfp_flags);
if (unlikely(!page)) if (unlikely(!page))
goto out_free_pages; goto out_free_pages;
bio->bi_io_vec[i].bv_page = page; bio->bi_io_vec[i].bv_page = page;
bio->bi_io_vec[i].bv_len = PAGE_SIZE;
bio->bi_io_vec[i].bv_offset = 0;
} }
bio->bi_vcnt = RESYNC_PAGES;
bio->bi_idx = 0;
bio->bi_size = RESYNC_BLOCK_SIZE;
bio->bi_end_io = NULL;
atomic_set(&bio->bi_cnt, 1);
r1_bio->master_bio = bio; r1_bio->master_bio = bio;
return r1_bio; return r1_bio;
out_free_pages: out_free_pages:
for (j = 0; j < i; j++) for ( ; i > 0 ; i--)
__free_page(bio->bi_io_vec[j].bv_page); __free_page(bio->bi_io_vec[i-1].bv_page);
bio_put(bio); out_free_bio:
out_free_r1_bio: while ( j < conf->raid_disks )
bio_put(r1_bio->bios[++j]);
r1bio_pool_free(r1_bio, conf->mddev); r1bio_pool_free(r1_bio, conf->mddev);
return NULL; return NULL;
} }
...@@ -114,15 +115,15 @@ static void r1buf_pool_free(void *__r1_bio, void *data) ...@@ -114,15 +115,15 @@ static void r1buf_pool_free(void *__r1_bio, void *data)
int i; int i;
conf_t *conf = data; conf_t *conf = data;
r1bio_t *r1bio = __r1_bio; r1bio_t *r1bio = __r1_bio;
struct bio *bio = r1bio->master_bio; struct bio *bio = r1bio->bios[0];
if (atomic_read(&bio->bi_cnt) != 1)
BUG();
for (i = 0; i < RESYNC_PAGES; i++) { for (i = 0; i < RESYNC_PAGES; i++) {
__free_page(bio->bi_io_vec[i].bv_page); __free_page(bio->bi_io_vec[i].bv_page);
bio->bi_io_vec[i].bv_page = NULL; bio->bi_io_vec[i].bv_page = NULL;
} }
bio_put(bio); for (i=0 ; i < conf->raid_disks; i++)
bio_put(r1bio->bios[i]);
r1bio_pool_free(r1bio, conf->mddev); r1bio_pool_free(r1bio, conf->mddev);
} }
...@@ -162,15 +163,8 @@ static inline void free_r1bio(r1bio_t *r1_bio) ...@@ -162,15 +163,8 @@ static inline void free_r1bio(r1bio_t *r1_bio)
static inline void put_buf(r1bio_t *r1_bio) static inline void put_buf(r1bio_t *r1_bio)
{ {
conf_t *conf = mddev_to_conf(r1_bio->mddev); conf_t *conf = mddev_to_conf(r1_bio->mddev);
struct bio *bio = r1_bio->master_bio;
unsigned long flags; unsigned long flags;
/*
* undo any possible partial request fixup magic:
*/
if (bio->bi_size != RESYNC_BLOCK_SIZE)
bio->bi_io_vec[bio->bi_vcnt-1].bv_len = PAGE_SIZE;
put_all_bios(conf, r1_bio);
mempool_free(r1_bio, conf->r1buf_pool); mempool_free(r1_bio, conf->r1buf_pool);
spin_lock_irqsave(&conf->resync_lock, flags); spin_lock_irqsave(&conf->resync_lock, flags);
...@@ -810,12 +804,11 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) ...@@ -810,12 +804,11 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
conf_t *conf = mddev_to_conf(mddev); conf_t *conf = mddev_to_conf(mddev);
int i; int i;
int disks = conf->raid_disks; int disks = conf->raid_disks;
struct bio *bio, *mbio; struct bio *bio, *wbio;
bio = r1_bio->master_bio; bio = r1_bio->bios[r1_bio->read_disk];
/* /*
* have to allocate lots of bio structures and
* schedule writes * schedule writes
*/ */
if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) { if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
...@@ -833,43 +826,16 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) ...@@ -833,43 +826,16 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
return; return;
} }
spin_lock_irq(&conf->device_lock);
for (i = 0; i < disks ; i++) {
r1_bio->bios[i] = NULL;
if (!conf->mirrors[i].rdev ||
conf->mirrors[i].rdev->faulty)
continue;
if (i == r1_bio->read_disk)
/*
* we read from here, no need to write
*/
continue;
if (conf->mirrors[i].rdev->in_sync &&
r1_bio->sector + (bio->bi_size>>9) <= mddev->recovery_cp)
/*
* don't need to write this we are just rebuilding
*/
continue;
atomic_inc(&conf->mirrors[i].rdev->nr_pending);
r1_bio->bios[i] = bio;
}
spin_unlock_irq(&conf->device_lock);
atomic_set(&r1_bio->remaining, 1); atomic_set(&r1_bio->remaining, 1);
for (i = disks; i-- ; ) { for (i = 0; i < disks ; i++) {
if (!r1_bio->bios[i]) wbio = r1_bio->bios[i];
if (wbio->bi_end_io != end_sync_write)
continue; continue;
mbio = bio_clone(bio, GFP_NOIO);
r1_bio->bios[i] = mbio;
mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
mbio->bi_end_io = end_sync_write;
mbio->bi_rw = WRITE;
mbio->bi_private = r1_bio;
atomic_inc(&conf->mirrors[i].rdev->nr_pending);
atomic_inc(&r1_bio->remaining); atomic_inc(&r1_bio->remaining);
md_sync_acct(conf->mirrors[i].rdev, mbio->bi_size >> 9); md_sync_acct(conf->mirrors[i].rdev, wbio->bi_size >> 9);
generic_make_request(mbio); generic_make_request(wbio);
} }
if (atomic_dec_and_test(&r1_bio->remaining)) { if (atomic_dec_and_test(&r1_bio->remaining)) {
...@@ -967,7 +933,8 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) ...@@ -967,7 +933,8 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
r1bio_t *r1_bio; r1bio_t *r1_bio;
struct bio *bio; struct bio *bio;
sector_t max_sector, nr_sectors; sector_t max_sector, nr_sectors;
int disk, partial; int disk;
int i;
if (!conf->r1buf_pool) if (!conf->r1buf_pool)
if (init_resync(conf)) if (init_resync(conf))
...@@ -1020,27 +987,69 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) ...@@ -1020,27 +987,69 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
set_bit(R1BIO_IsSync, &r1_bio->state); set_bit(R1BIO_IsSync, &r1_bio->state);
r1_bio->read_disk = disk; r1_bio->read_disk = disk;
bio = r1_bio->master_bio; for (i=0; i < conf->raid_disks; i++) {
nr_sectors = RESYNC_BLOCK_SIZE >> 9; bio = r1_bio->bios[i];
if (max_sector - sector_nr < nr_sectors)
nr_sectors = max_sector - sector_nr;
bio->bi_size = nr_sectors << 9;
bio->bi_vcnt = (bio->bi_size + PAGE_SIZE-1) / PAGE_SIZE;
/*
* Is there a partial page at the end of the request?
*/
partial = bio->bi_size % PAGE_SIZE;
if (partial)
bio->bi_io_vec[bio->bi_vcnt-1].bv_len = partial;
/* take from bio_init */
bio->bi_next = NULL;
bio->bi_flags |= 1 << BIO_UPTODATE;
bio->bi_rw = 0;
bio->bi_vcnt = 0;
bio->bi_idx = 0;
bio->bi_phys_segments = 0;
bio->bi_hw_segments = 0;
bio->bi_size = 0;
bio->bi_end_io = NULL;
bio->bi_private = NULL;
bio->bi_sector = sector_nr + mirror->rdev->data_offset; if (i == disk) {
bio->bi_bdev = mirror->rdev->bdev;
bio->bi_end_io = end_sync_read;
bio->bi_rw = READ; bio->bi_rw = READ;
bio->bi_end_io = end_sync_read;
} else if (conf->mirrors[i].rdev &&
!conf->mirrors[i].rdev->faulty &&
(!conf->mirrors[i].rdev->in_sync ||
sector_nr + RESYNC_SECTORS > mddev->recovery_cp)) {
bio->bi_rw = WRITE;
bio->bi_end_io = end_sync_write;
} else
continue;
bio->bi_sector = sector_nr + conf->mirrors[i].rdev->data_offset;
bio->bi_bdev = conf->mirrors[i].rdev->bdev;
bio->bi_private = r1_bio; bio->bi_private = r1_bio;
bio_get(bio); }
r1_bio->bios[r1_bio->read_disk] = bio; nr_sectors = 0;
do {
struct page *page;
int len = PAGE_SIZE;
if (sector_nr + (len>>9) > max_sector)
len = (max_sector - sector_nr) << 9;
if (len == 0)
break;
for (i=0 ; i < conf->raid_disks; i++) {
bio = r1_bio->bios[i];
if (bio->bi_end_io) {
page = r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page;
if (bio_add_page(bio, page, len, 0) == 0) {
/* stop here */
r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page = page;
while (i > 0) {
i--;
bio = r1_bio->bios[i];
if (bio->bi_end_io==NULL) continue;
/* remove last page from this bio */
bio->bi_vcnt--;
bio->bi_size -= len;
bio->bi_flags &= ~(1<< BIO_SEG_VALID);
}
goto bio_full;
}
}
}
nr_sectors += len>>9;
sector_nr += len>>9;
} while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES);
bio_full:
bio = r1_bio->bios[disk];
r1_bio->sectors = nr_sectors; r1_bio->sectors = nr_sectors;
md_sync_acct(mirror->rdev, nr_sectors); md_sync_acct(mirror->rdev, nr_sectors);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment