Commit 509e4aef authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md:
  md: Fix removal of extra drives when converting RAID6 to RAID5
  md: range check slot number when manually adding a spare.
  md/raid5: handle manually-added spares in start_reshape.
  md: fix sync_completed reporting for very large drives (>2TB)
  md: allow suspend_lo and suspend_hi to decrease as well as increase.
  md: Don't let implementation detail of curr_resync leak out through sysfs.
  md: separate meta and data devs
  md-new-param-to_sync_page_io
  md-new-param-to-calc_dev_sboffset
  md: Be more careful about clearing flags bit in ->recovery
  md: md_stop_writes requires mddev_lock.
  md/raid5: use sysfs_notify_dirent_safe to avoid NULL pointer
  md: Ensure no IO request to get md device before it is properly initialised.
  md: Fix single printks with multiple KERN_<level>s
  md: fix regression resulting in delays in clearing bits in a bitmap
  md: fix regression with re-adding devices to arrays with no metadata
parents 375b6f5a bf2cb0da
...@@ -210,11 +210,11 @@ static struct page *read_sb_page(mddev_t *mddev, loff_t offset, ...@@ -210,11 +210,11 @@ static struct page *read_sb_page(mddev_t *mddev, loff_t offset,
|| test_bit(Faulty, &rdev->flags)) || test_bit(Faulty, &rdev->flags))
continue; continue;
target = rdev->sb_start + offset + index * (PAGE_SIZE/512); target = offset + index * (PAGE_SIZE/512);
if (sync_page_io(rdev, target, if (sync_page_io(rdev, target,
roundup(size, bdev_logical_block_size(rdev->bdev)), roundup(size, bdev_logical_block_size(rdev->bdev)),
page, READ)) { page, READ, true)) {
page->index = index; page->index = index;
attach_page_buffers(page, NULL); /* so that free_buffer will attach_page_buffers(page, NULL); /* so that free_buffer will
* quietly no-op */ * quietly no-op */
...@@ -264,14 +264,18 @@ static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev) ...@@ -264,14 +264,18 @@ static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
{ {
mdk_rdev_t *rdev = NULL; mdk_rdev_t *rdev = NULL;
struct block_device *bdev;
mddev_t *mddev = bitmap->mddev; mddev_t *mddev = bitmap->mddev;
while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { while ((rdev = next_active_rdev(rdev, mddev)) != NULL) {
int size = PAGE_SIZE; int size = PAGE_SIZE;
loff_t offset = mddev->bitmap_info.offset; loff_t offset = mddev->bitmap_info.offset;
bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
if (page->index == bitmap->file_pages-1) if (page->index == bitmap->file_pages-1)
size = roundup(bitmap->last_page_size, size = roundup(bitmap->last_page_size,
bdev_logical_block_size(rdev->bdev)); bdev_logical_block_size(bdev));
/* Just make sure we aren't corrupting data or /* Just make sure we aren't corrupting data or
* metadata * metadata
*/ */
...@@ -1542,7 +1546,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) ...@@ -1542,7 +1546,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
wait_event(bitmap->mddev->recovery_wait, wait_event(bitmap->mddev->recovery_wait,
atomic_read(&bitmap->mddev->recovery_active) == 0); atomic_read(&bitmap->mddev->recovery_active) == 0);
bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync; bitmap->mddev->curr_resync_completed = sector;
set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags); set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1); sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1);
s = 0; s = 0;
......
This diff is collapsed.
...@@ -60,6 +60,12 @@ struct mdk_rdev_s ...@@ -60,6 +60,12 @@ struct mdk_rdev_s
mddev_t *mddev; /* RAID array if running */ mddev_t *mddev; /* RAID array if running */
int last_events; /* IO event timestamp */ int last_events; /* IO event timestamp */
/*
* If meta_bdev is non-NULL, it means that a separate device is
* being used to store the metadata (superblock/bitmap) which
* would otherwise be contained on the same device as the data (bdev).
*/
struct block_device *meta_bdev;
struct block_device *bdev; /* block device handle */ struct block_device *bdev; /* block device handle */
struct page *sb_page; struct page *sb_page;
...@@ -148,7 +154,8 @@ struct mddev_s ...@@ -148,7 +154,8 @@ struct mddev_s
* are happening, so run/ * are happening, so run/
* takeover/stop are not safe * takeover/stop are not safe
*/ */
int ready; /* See when safe to pass
* IO requests down */
struct gendisk *gendisk; struct gendisk *gendisk;
struct kobject kobj; struct kobject kobj;
...@@ -497,8 +504,8 @@ extern void md_flush_request(mddev_t *mddev, struct bio *bio); ...@@ -497,8 +504,8 @@ extern void md_flush_request(mddev_t *mddev, struct bio *bio);
extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
sector_t sector, int size, struct page *page); sector_t sector, int size, struct page *page);
extern void md_super_wait(mddev_t *mddev); extern void md_super_wait(mddev_t *mddev);
extern int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size, extern int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
struct page *page, int rw); struct page *page, int rw, bool metadata_op);
extern void md_do_sync(mddev_t *mddev); extern void md_do_sync(mddev_t *mddev);
extern void md_new_event(mddev_t *mddev); extern void md_new_event(mddev_t *mddev);
extern int md_allow_write(mddev_t *mddev); extern int md_allow_write(mddev_t *mddev);
......
...@@ -1027,8 +1027,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -1027,8 +1027,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
} else } else
set_bit(Faulty, &rdev->flags); set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT "md/raid1:%s: Disk failure on %s, disabling device.\n" printk(KERN_ALERT
KERN_ALERT "md/raid1:%s: Operation continuing on %d devices.\n", "md/raid1:%s: Disk failure on %s, disabling device.\n"
"md/raid1:%s: Operation continuing on %d devices.\n",
mdname(mddev), bdevname(rdev->bdev, b), mdname(mddev), bdevname(rdev->bdev, b),
mdname(mddev), conf->raid_disks - mddev->degraded); mdname(mddev), conf->raid_disks - mddev->degraded);
} }
...@@ -1364,10 +1365,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) ...@@ -1364,10 +1365,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
*/ */
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
if (sync_page_io(rdev, if (sync_page_io(rdev,
sect + rdev->data_offset, sect,
s<<9, s<<9,
bio->bi_io_vec[idx].bv_page, bio->bi_io_vec[idx].bv_page,
READ)) { READ, false)) {
success = 1; success = 1;
break; break;
} }
...@@ -1390,10 +1391,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) ...@@ -1390,10 +1391,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
atomic_add(s, &rdev->corrected_errors); atomic_add(s, &rdev->corrected_errors);
if (sync_page_io(rdev, if (sync_page_io(rdev,
sect + rdev->data_offset, sect,
s<<9, s<<9,
bio->bi_io_vec[idx].bv_page, bio->bi_io_vec[idx].bv_page,
WRITE) == 0) WRITE, false) == 0)
md_error(mddev, rdev); md_error(mddev, rdev);
} }
d = start; d = start;
...@@ -1405,10 +1406,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) ...@@ -1405,10 +1406,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
continue; continue;
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
if (sync_page_io(rdev, if (sync_page_io(rdev,
sect + rdev->data_offset, sect,
s<<9, s<<9,
bio->bi_io_vec[idx].bv_page, bio->bi_io_vec[idx].bv_page,
READ) == 0) READ, false) == 0)
md_error(mddev, rdev); md_error(mddev, rdev);
} }
} else { } else {
...@@ -1488,10 +1489,8 @@ static void fix_read_error(conf_t *conf, int read_disk, ...@@ -1488,10 +1489,8 @@ static void fix_read_error(conf_t *conf, int read_disk,
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
if (rdev && if (rdev &&
test_bit(In_sync, &rdev->flags) && test_bit(In_sync, &rdev->flags) &&
sync_page_io(rdev, sync_page_io(rdev, sect, s<<9,
sect + rdev->data_offset, conf->tmppage, READ, false))
s<<9,
conf->tmppage, READ))
success = 1; success = 1;
else { else {
d++; d++;
...@@ -1514,9 +1513,8 @@ static void fix_read_error(conf_t *conf, int read_disk, ...@@ -1514,9 +1513,8 @@ static void fix_read_error(conf_t *conf, int read_disk,
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
if (rdev && if (rdev &&
test_bit(In_sync, &rdev->flags)) { test_bit(In_sync, &rdev->flags)) {
if (sync_page_io(rdev, if (sync_page_io(rdev, sect, s<<9,
sect + rdev->data_offset, conf->tmppage, WRITE, false)
s<<9, conf->tmppage, WRITE)
== 0) == 0)
/* Well, this device is dead */ /* Well, this device is dead */
md_error(mddev, rdev); md_error(mddev, rdev);
...@@ -1531,9 +1529,8 @@ static void fix_read_error(conf_t *conf, int read_disk, ...@@ -1531,9 +1529,8 @@ static void fix_read_error(conf_t *conf, int read_disk,
rdev = conf->mirrors[d].rdev; rdev = conf->mirrors[d].rdev;
if (rdev && if (rdev &&
test_bit(In_sync, &rdev->flags)) { test_bit(In_sync, &rdev->flags)) {
if (sync_page_io(rdev, if (sync_page_io(rdev, sect, s<<9,
sect + rdev->data_offset, conf->tmppage, READ, false)
s<<9, conf->tmppage, READ)
== 0) == 0)
/* Well, this device is dead */ /* Well, this device is dead */
md_error(mddev, rdev); md_error(mddev, rdev);
......
...@@ -1051,8 +1051,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -1051,8 +1051,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
} }
set_bit(Faulty, &rdev->flags); set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT "md/raid10:%s: Disk failure on %s, disabling device.\n" printk(KERN_ALERT
KERN_ALERT "md/raid10:%s: Operation continuing on %d devices.\n", "md/raid10:%s: Disk failure on %s, disabling device.\n"
"md/raid10:%s: Operation continuing on %d devices.\n",
mdname(mddev), bdevname(rdev->bdev, b), mdname(mddev), bdevname(rdev->bdev, b),
mdname(mddev), conf->raid_disks - mddev->degraded); mdname(mddev), conf->raid_disks - mddev->degraded);
} }
...@@ -1559,9 +1560,9 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) ...@@ -1559,9 +1560,9 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
rcu_read_unlock(); rcu_read_unlock();
success = sync_page_io(rdev, success = sync_page_io(rdev,
r10_bio->devs[sl].addr + r10_bio->devs[sl].addr +
sect + rdev->data_offset, sect,
s<<9, s<<9,
conf->tmppage, READ); conf->tmppage, READ, false);
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
rcu_read_lock(); rcu_read_lock();
if (success) if (success)
...@@ -1598,8 +1599,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) ...@@ -1598,8 +1599,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
atomic_add(s, &rdev->corrected_errors); atomic_add(s, &rdev->corrected_errors);
if (sync_page_io(rdev, if (sync_page_io(rdev,
r10_bio->devs[sl].addr + r10_bio->devs[sl].addr +
sect + rdev->data_offset, sect,
s<<9, conf->tmppage, WRITE) s<<9, conf->tmppage, WRITE, false)
== 0) { == 0) {
/* Well, this device is dead */ /* Well, this device is dead */
printk(KERN_NOTICE printk(KERN_NOTICE
...@@ -1635,9 +1636,9 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) ...@@ -1635,9 +1636,9 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
rcu_read_unlock(); rcu_read_unlock();
if (sync_page_io(rdev, if (sync_page_io(rdev,
r10_bio->devs[sl].addr + r10_bio->devs[sl].addr +
sect + rdev->data_offset, sect,
s<<9, conf->tmppage, s<<9, conf->tmppage,
READ) == 0) { READ, false) == 0) {
/* Well, this device is dead */ /* Well, this device is dead */
printk(KERN_NOTICE printk(KERN_NOTICE
"md/raid10:%s: unable to read back " "md/raid10:%s: unable to read back "
......
...@@ -1721,7 +1721,6 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -1721,7 +1721,6 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
set_bit(Faulty, &rdev->flags); set_bit(Faulty, &rdev->flags);
printk(KERN_ALERT printk(KERN_ALERT
"md/raid:%s: Disk failure on %s, disabling device.\n" "md/raid:%s: Disk failure on %s, disabling device.\n"
KERN_ALERT
"md/raid:%s: Operation continuing on %d devices.\n", "md/raid:%s: Operation continuing on %d devices.\n",
mdname(mddev), mdname(mddev),
bdevname(rdev->bdev, b), bdevname(rdev->bdev, b),
...@@ -4237,7 +4236,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped ...@@ -4237,7 +4236,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
wait_event(conf->wait_for_overlap, wait_event(conf->wait_for_overlap,
atomic_read(&conf->reshape_stripes)==0); atomic_read(&conf->reshape_stripes)==0);
mddev->reshape_position = conf->reshape_progress; mddev->reshape_position = conf->reshape_progress;
mddev->curr_resync_completed = mddev->curr_resync; mddev->curr_resync_completed = sector_nr;
conf->reshape_checkpoint = jiffies; conf->reshape_checkpoint = jiffies;
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
...@@ -4338,7 +4337,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped ...@@ -4338,7 +4337,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
wait_event(conf->wait_for_overlap, wait_event(conf->wait_for_overlap,
atomic_read(&conf->reshape_stripes) == 0); atomic_read(&conf->reshape_stripes) == 0);
mddev->reshape_position = conf->reshape_progress; mddev->reshape_position = conf->reshape_progress;
mddev->curr_resync_completed = mddev->curr_resync + reshape_sectors; mddev->curr_resync_completed = sector_nr;
conf->reshape_checkpoint = jiffies; conf->reshape_checkpoint = jiffies;
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
...@@ -5339,7 +5338,7 @@ static int raid5_spare_active(mddev_t *mddev) ...@@ -5339,7 +5338,7 @@ static int raid5_spare_active(mddev_t *mddev)
&& !test_bit(Faulty, &tmp->rdev->flags) && !test_bit(Faulty, &tmp->rdev->flags)
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) { && !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
count++; count++;
sysfs_notify_dirent(tmp->rdev->sysfs_state); sysfs_notify_dirent_safe(tmp->rdev->sysfs_state);
} }
} }
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
...@@ -5528,8 +5527,8 @@ static int raid5_start_reshape(mddev_t *mddev) ...@@ -5528,8 +5527,8 @@ static int raid5_start_reshape(mddev_t *mddev)
return -ENOSPC; return -ENOSPC;
list_for_each_entry(rdev, &mddev->disks, same_set) list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk < 0 && if ((rdev->raid_disk < 0 || rdev->raid_disk >= conf->raid_disks)
!test_bit(Faulty, &rdev->flags)) && !test_bit(Faulty, &rdev->flags))
spares++; spares++;
if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded) if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded)
...@@ -5589,6 +5588,11 @@ static int raid5_start_reshape(mddev_t *mddev) ...@@ -5589,6 +5588,11 @@ static int raid5_start_reshape(mddev_t *mddev)
/* Failure here is OK */; /* Failure here is OK */;
} else } else
break; break;
} else if (rdev->raid_disk >= conf->previous_raid_disks
&& !test_bit(Faulty, &rdev->flags)) {
/* This is a spare that was manually added */
set_bit(In_sync, &rdev->flags);
added_devices++;
} }
/* When a reshape changes the number of devices, ->degraded /* When a reshape changes the number of devices, ->degraded
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment