Commit c6563a8c authored by NeilBrown's avatar NeilBrown

md: add possibility to change data-offset for devices.

When reshaping we can avoid costly intermediate backup by
changing the 'start' address of the array on the device
(if there is enough room).

So as a first step, allow such a change to be requested
through sysfs, and recorded in v1.x metadata.

(As we didn't previous check that all 'pad' fields were zero,
 we need a new FEATURE flag for this.
 A (belatedly) check that all remaining 'pad' fields are
 zero to avoid a repeat of this)

The new data offset must be requested separately for each device.
This allows each to have a different change in the data offset.
This is not likely to be used often but as data_offset can be
set per-device, new_data_offset should be too.

This patch also removes the 'acknowledged' arg to rdev_set_badblocks as
it is never used and never will be.  At the same time we add a new
arg ('in_new') which is currently always zero but will be used more
soon.

When a reshape finishes we will need to update the data_offset
and rdev->sectors.  So provide an exported function to do that.
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent 2c810cdd
This diff is collapsed.
...@@ -55,6 +55,7 @@ struct md_rdev { ...@@ -55,6 +55,7 @@ struct md_rdev {
int sb_loaded; int sb_loaded;
__u64 sb_events; __u64 sb_events;
sector_t data_offset; /* start of data in array */ sector_t data_offset; /* start of data in array */
sector_t new_data_offset;/* only relevant while reshaping */
sector_t sb_start; /* offset of the super block (in 512byte sectors) */ sector_t sb_start; /* offset of the super block (in 512byte sectors) */
int sb_size; /* bytes in the superblock */ int sb_size; /* bytes in the superblock */
int preferred_minor; /* autorun support */ int preferred_minor; /* autorun support */
...@@ -193,8 +194,9 @@ static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, ...@@ -193,8 +194,9 @@ static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
return 0; return 0;
} }
extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int acknowledged); int is_new);
extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors); extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new);
extern void md_ack_all_badblocks(struct badblocks *bb); extern void md_ack_all_badblocks(struct badblocks *bb);
struct mddev { struct mddev {
...@@ -592,6 +594,7 @@ extern void md_write_start(struct mddev *mddev, struct bio *bi); ...@@ -592,6 +594,7 @@ extern void md_write_start(struct mddev *mddev, struct bio *bi);
extern void md_write_end(struct mddev *mddev); extern void md_write_end(struct mddev *mddev);
extern void md_done_sync(struct mddev *mddev, int blocks, int ok); extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
extern void md_error(struct mddev *mddev, struct md_rdev *rdev); extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
extern void md_finish_reshape(struct mddev *mddev);
extern int mddev_congested(struct mddev *mddev, int bits); extern int mddev_congested(struct mddev *mddev, int bits);
extern void md_flush_request(struct mddev *mddev, struct bio *bio); extern void md_flush_request(struct mddev *mddev, struct bio *bio);
......
...@@ -2024,7 +2024,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio ...@@ -2024,7 +2024,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
continue; continue;
if (test_bit(BIO_UPTODATE, &bio->bi_flags) && if (test_bit(BIO_UPTODATE, &bio->bi_flags) &&
test_bit(R1BIO_MadeGood, &r1_bio->state)) { test_bit(R1BIO_MadeGood, &r1_bio->state)) {
rdev_clear_badblocks(rdev, r1_bio->sector, s); rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
} }
if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
test_bit(R1BIO_WriteError, &r1_bio->state)) { test_bit(R1BIO_WriteError, &r1_bio->state)) {
...@@ -2044,7 +2044,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio) ...@@ -2044,7 +2044,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
struct md_rdev *rdev = conf->mirrors[m].rdev; struct md_rdev *rdev = conf->mirrors[m].rdev;
rdev_clear_badblocks(rdev, rdev_clear_badblocks(rdev,
r1_bio->sector, r1_bio->sector,
r1_bio->sectors); r1_bio->sectors, 0);
rdev_dec_pending(rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
} else if (r1_bio->bios[m] != NULL) { } else if (r1_bio->bios[m] != NULL) {
/* This drive got a write error. We need to /* This drive got a write error. We need to
......
...@@ -2480,7 +2480,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) ...@@ -2480,7 +2480,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
rdev_clear_badblocks( rdev_clear_badblocks(
rdev, rdev,
r10_bio->devs[m].addr, r10_bio->devs[m].addr,
r10_bio->sectors); r10_bio->sectors, 0);
} else { } else {
if (!rdev_set_badblocks( if (!rdev_set_badblocks(
rdev, rdev,
...@@ -2496,7 +2496,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) ...@@ -2496,7 +2496,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
rdev_clear_badblocks( rdev_clear_badblocks(
rdev, rdev,
r10_bio->devs[m].addr, r10_bio->devs[m].addr,
r10_bio->sectors); r10_bio->sectors, 0);
} else { } else {
if (!rdev_set_badblocks( if (!rdev_set_badblocks(
rdev, rdev,
...@@ -2515,7 +2515,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) ...@@ -2515,7 +2515,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
rdev_clear_badblocks( rdev_clear_badblocks(
rdev, rdev,
r10_bio->devs[m].addr, r10_bio->devs[m].addr,
r10_bio->sectors); r10_bio->sectors, 0);
rdev_dec_pending(rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
} else if (bio != NULL && } else if (bio != NULL &&
!test_bit(BIO_UPTODATE, &bio->bi_flags)) { !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
...@@ -2532,7 +2532,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) ...@@ -2532,7 +2532,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
rdev_clear_badblocks( rdev_clear_badblocks(
rdev, rdev,
r10_bio->devs[m].addr, r10_bio->devs[m].addr,
r10_bio->sectors); r10_bio->sectors, 0);
rdev_dec_pending(rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
} }
} }
......
...@@ -3561,7 +3561,7 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -3561,7 +3561,7 @@ static void handle_stripe(struct stripe_head *sh)
if (test_and_clear_bit(R5_MadeGood, &dev->flags)) { if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {
rdev = conf->disks[i].rdev; rdev = conf->disks[i].rdev;
rdev_clear_badblocks(rdev, sh->sector, rdev_clear_badblocks(rdev, sh->sector,
STRIPE_SECTORS); STRIPE_SECTORS, 0);
rdev_dec_pending(rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
} }
if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) { if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) {
...@@ -3570,7 +3570,7 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -3570,7 +3570,7 @@ static void handle_stripe(struct stripe_head *sh)
/* rdev have been moved down */ /* rdev have been moved down */
rdev = conf->disks[i].rdev; rdev = conf->disks[i].rdev;
rdev_clear_badblocks(rdev, sh->sector, rdev_clear_badblocks(rdev, sh->sector,
STRIPE_SECTORS); STRIPE_SECTORS, 0);
rdev_dec_pending(rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
} }
} }
...@@ -5505,10 +5505,14 @@ static int raid5_start_reshape(struct mddev *mddev) ...@@ -5505,10 +5505,14 @@ static int raid5_start_reshape(struct mddev *mddev)
if (!check_stripe_cache(mddev)) if (!check_stripe_cache(mddev))
return -ENOSPC; return -ENOSPC;
rdev_for_each(rdev, mddev) rdev_for_each(rdev, mddev) {
/* Don't support changing data_offset yet */
if (rdev->new_data_offset != rdev->data_offset)
return -EINVAL;
if (!test_bit(In_sync, &rdev->flags) if (!test_bit(In_sync, &rdev->flags)
&& !test_bit(Faulty, &rdev->flags)) && !test_bit(Faulty, &rdev->flags))
spares++; spares++;
}
if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded) if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded)
/* Not enough devices even to make a degraded array /* Not enough devices even to make a degraded array
......
...@@ -233,7 +233,10 @@ struct mdp_superblock_1 { ...@@ -233,7 +233,10 @@ struct mdp_superblock_1 {
__le32 delta_disks; /* change in number of raid_disks */ __le32 delta_disks; /* change in number of raid_disks */
__le32 new_layout; /* new layout */ __le32 new_layout; /* new layout */
__le32 new_chunk; /* new chunk size (512byte sectors) */ __le32 new_chunk; /* new chunk size (512byte sectors) */
__u8 pad1[128-124]; /* set to 0 when written */ __le32 new_offset; /* signed number to add to data_offset in new
* layout. 0 == no-change. This can be
* different on each device in the array.
*/
/* constant this-device information - 64 bytes */ /* constant this-device information - 64 bytes */
__le64 data_offset; /* sector start of data, often 0 */ __le64 data_offset; /* sector start of data, often 0 */
...@@ -285,11 +288,14 @@ struct mdp_superblock_1 { ...@@ -285,11 +288,14 @@ struct mdp_superblock_1 {
* of devices, but is going * of devices, but is going
* backwards anyway. * backwards anyway.
*/ */
#define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */
#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
|MD_FEATURE_RECOVERY_OFFSET \ |MD_FEATURE_RECOVERY_OFFSET \
|MD_FEATURE_RESHAPE_ACTIVE \ |MD_FEATURE_RESHAPE_ACTIVE \
|MD_FEATURE_BAD_BLOCKS \ |MD_FEATURE_BAD_BLOCKS \
|MD_FEATURE_REPLACEMENT \ |MD_FEATURE_REPLACEMENT \
|MD_FEATURE_RESHAPE_BACKWARDS) |MD_FEATURE_RESHAPE_BACKWARDS \
|MD_FEATURE_NEW_OFFSET \
)
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment