Commit f788893d authored by Jens Axboe's avatar Jens Axboe

Merge tag 'md-next-20231208' of...

Merge tag 'md-next-20231208' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.8/block

Pull MD updates from Song:

"1. Fix/Cleanup RCU usage from conf->disks[i].rdev, by Yu Kuai;
 2. Fix raid5 hang issue, by Junxiao Bi;
 3. Add Yu Kuai as Reviewer of the md subsystem."

* tag 'md-next-20231208' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md: synchronize flush io with array reconfiguration
  MAINTAINERS: SOFTWARE RAID: Add Yu Kuai as Reviewer
  md/md-multipath: remove rcu protection to access rdev from conf
  md/raid5: remove rcu protection to access rdev from conf
  md/raid1: remove rcu protection to access rdev from conf
  md/raid10: remove rcu protection to access rdev from conf
  md: remove flag RemoveSynchronized
  Revert "md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d"
  md: bypass block throttle for superblock update
parents 1b151e24 fa2bbff7
...@@ -20106,6 +20106,7 @@ F: include/linux/property.h ...@@ -20106,6 +20106,7 @@ F: include/linux/property.h
SOFTWARE RAID (Multiple Disks) SUPPORT SOFTWARE RAID (Multiple Disks) SUPPORT
M: Song Liu <song@kernel.org> M: Song Liu <song@kernel.org>
R: Yu Kuai <yukuai3@huawei.com>
L: linux-raid@vger.kernel.org L: linux-raid@vger.kernel.org
S: Supported S: Supported
Q: https://patchwork.kernel.org/project/linux-raid/list/ Q: https://patchwork.kernel.org/project/linux-raid/list/
......
...@@ -32,17 +32,15 @@ static int multipath_map (struct mpconf *conf) ...@@ -32,17 +32,15 @@ static int multipath_map (struct mpconf *conf)
* now we use the first available disk. * now we use the first available disk.
*/ */
rcu_read_lock();
for (i = 0; i < disks; i++) { for (i = 0; i < disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev); struct md_rdev *rdev = conf->multipaths[i].rdev;
if (rdev && test_bit(In_sync, &rdev->flags) && if (rdev && test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags)) { !test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
return i; return i;
} }
} }
rcu_read_unlock();
pr_crit_ratelimited("multipath_map(): no more operational IO paths?\n"); pr_crit_ratelimited("multipath_map(): no more operational IO paths?\n");
return (-1); return (-1);
...@@ -137,14 +135,16 @@ static void multipath_status(struct seq_file *seq, struct mddev *mddev) ...@@ -137,14 +135,16 @@ static void multipath_status(struct seq_file *seq, struct mddev *mddev)
struct mpconf *conf = mddev->private; struct mpconf *conf = mddev->private;
int i; int i;
lockdep_assert_held(&mddev->lock);
seq_printf (seq, " [%d/%d] [", conf->raid_disks, seq_printf (seq, " [%d/%d] [", conf->raid_disks,
conf->raid_disks - mddev->degraded); conf->raid_disks - mddev->degraded);
rcu_read_lock();
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev); struct md_rdev *rdev = READ_ONCE(conf->multipaths[i].rdev);
seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
seq_printf(seq, "%s",
rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
} }
rcu_read_unlock();
seq_putc(seq, ']'); seq_putc(seq, ']');
} }
...@@ -182,7 +182,7 @@ static void multipath_error (struct mddev *mddev, struct md_rdev *rdev) ...@@ -182,7 +182,7 @@ static void multipath_error (struct mddev *mddev, struct md_rdev *rdev)
conf->raid_disks - mddev->degraded); conf->raid_disks - mddev->degraded);
} }
static void print_multipath_conf (struct mpconf *conf) static void print_multipath_conf(struct mpconf *conf)
{ {
int i; int i;
struct multipath_info *tmp; struct multipath_info *tmp;
...@@ -195,6 +195,7 @@ static void print_multipath_conf (struct mpconf *conf) ...@@ -195,6 +195,7 @@ static void print_multipath_conf (struct mpconf *conf)
pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded, pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
conf->raid_disks); conf->raid_disks);
lockdep_assert_held(&conf->mddev->reconfig_mutex);
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
tmp = conf->multipaths + i; tmp = conf->multipaths + i;
if (tmp->rdev) if (tmp->rdev)
...@@ -231,7 +232,7 @@ static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -231,7 +232,7 @@ static int multipath_add_disk(struct mddev *mddev, struct md_rdev *rdev)
rdev->raid_disk = path; rdev->raid_disk = path;
set_bit(In_sync, &rdev->flags); set_bit(In_sync, &rdev->flags);
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
rcu_assign_pointer(p->rdev, rdev); WRITE_ONCE(p->rdev, rdev);
err = 0; err = 0;
break; break;
} }
...@@ -257,16 +258,7 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -257,16 +258,7 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
err = -EBUSY; err = -EBUSY;
goto abort; goto abort;
} }
p->rdev = NULL; WRITE_ONCE(p->rdev, NULL);
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
synchronize_rcu();
if (atomic_read(&rdev->nr_pending)) {
/* lost the race, try later */
err = -EBUSY;
p->rdev = rdev;
goto abort;
}
}
err = md_integrity_register(mddev); err = md_integrity_register(mddev);
} }
abort: abort:
......
...@@ -529,6 +529,9 @@ static void md_end_flush(struct bio *bio) ...@@ -529,6 +529,9 @@ static void md_end_flush(struct bio *bio)
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
if (atomic_dec_and_test(&mddev->flush_pending)) { if (atomic_dec_and_test(&mddev->flush_pending)) {
/* The pair is percpu_ref_get() from md_flush_request() */
percpu_ref_put(&mddev->active_io);
/* The pre-request flush has finished */ /* The pre-request flush has finished */
queue_work(md_wq, &mddev->flush_work); queue_work(md_wq, &mddev->flush_work);
} }
...@@ -548,12 +551,8 @@ static void submit_flushes(struct work_struct *ws) ...@@ -548,12 +551,8 @@ static void submit_flushes(struct work_struct *ws)
rdev_for_each_rcu(rdev, mddev) rdev_for_each_rcu(rdev, mddev)
if (rdev->raid_disk >= 0 && if (rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags)) { !test_bit(Faulty, &rdev->flags)) {
/* Take two references, one is dropped
* when request finishes, one after
* we reclaim rcu_read_lock
*/
struct bio *bi; struct bio *bi;
atomic_inc(&rdev->nr_pending);
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock(); rcu_read_unlock();
bi = bio_alloc_bioset(rdev->bdev, 0, bi = bio_alloc_bioset(rdev->bdev, 0,
...@@ -564,7 +563,6 @@ static void submit_flushes(struct work_struct *ws) ...@@ -564,7 +563,6 @@ static void submit_flushes(struct work_struct *ws)
atomic_inc(&mddev->flush_pending); atomic_inc(&mddev->flush_pending);
submit_bio(bi); submit_bio(bi);
rcu_read_lock(); rcu_read_lock();
rdev_dec_pending(rdev, mddev);
} }
rcu_read_unlock(); rcu_read_unlock();
if (atomic_dec_and_test(&mddev->flush_pending)) if (atomic_dec_and_test(&mddev->flush_pending))
...@@ -617,6 +615,18 @@ bool md_flush_request(struct mddev *mddev, struct bio *bio) ...@@ -617,6 +615,18 @@ bool md_flush_request(struct mddev *mddev, struct bio *bio)
/* new request after previous flush is completed */ /* new request after previous flush is completed */
if (ktime_after(req_start, mddev->prev_flush_start)) { if (ktime_after(req_start, mddev->prev_flush_start)) {
WARN_ON(mddev->flush_bio); WARN_ON(mddev->flush_bio);
/*
* Grab a reference to make sure mddev_suspend() will wait for
* this flush to be done.
*
* md_flush_reqeust() is called under md_handle_request() and
* 'active_io' is already grabbed, hence percpu_ref_is_zero()
* won't pass, percpu_ref_tryget_live() can't be used because
* percpu_ref_kill() can be called by mddev_suspend()
* concurrently.
*/
WARN_ON(percpu_ref_is_zero(&mddev->active_io));
percpu_ref_get(&mddev->active_io);
mddev->flush_bio = bio; mddev->flush_bio = bio;
bio = NULL; bio = NULL;
} }
...@@ -1013,9 +1023,10 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev, ...@@ -1013,9 +1023,10 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
return; return;
bio = bio_alloc_bioset(rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev, bio = bio_alloc_bioset(rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev,
1, 1,
REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | REQ_FUA, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE | REQ_META
GFP_NOIO, &mddev->sync_set); | REQ_PREFLUSH | REQ_FUA,
GFP_NOIO, &mddev->sync_set);
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
...@@ -9243,44 +9254,19 @@ static int remove_and_add_spares(struct mddev *mddev, ...@@ -9243,44 +9254,19 @@ static int remove_and_add_spares(struct mddev *mddev,
struct md_rdev *rdev; struct md_rdev *rdev;
int spares = 0; int spares = 0;
int removed = 0; int removed = 0;
bool remove_some = false;
if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
/* Mustn't remove devices when resync thread is running */ /* Mustn't remove devices when resync thread is running */
return 0; return 0;
rdev_for_each(rdev, mddev) { rdev_for_each(rdev, mddev) {
if ((this == NULL || rdev == this) && if ((this == NULL || rdev == this) && rdev_removeable(rdev) &&
rdev->raid_disk >= 0 && !mddev->pers->hot_remove_disk(mddev, rdev)) {
!test_bit(Blocked, &rdev->flags) && sysfs_unlink_rdev(mddev, rdev);
test_bit(Faulty, &rdev->flags) && rdev->saved_raid_disk = rdev->raid_disk;
atomic_read(&rdev->nr_pending)==0) { rdev->raid_disk = -1;
/* Faulty non-Blocked devices with nr_pending == 0 removed++;
* never get nr_pending incremented,
* never get Faulty cleared, and never get Blocked set.
* So we can synchronize_rcu now rather than once per device
*/
remove_some = true;
set_bit(RemoveSynchronized, &rdev->flags);
}
}
if (remove_some)
synchronize_rcu();
rdev_for_each(rdev, mddev) {
if ((this == NULL || rdev == this) &&
(test_bit(RemoveSynchronized, &rdev->flags) ||
rdev_removeable(rdev))) {
if (mddev->pers->hot_remove_disk(
mddev, rdev) == 0) {
sysfs_unlink_rdev(mddev, rdev);
rdev->saved_raid_disk = rdev->raid_disk;
rdev->raid_disk = -1;
removed++;
}
} }
if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
clear_bit(RemoveSynchronized, &rdev->flags);
} }
if (removed && mddev->kobj.sd) if (removed && mddev->kobj.sd)
......
...@@ -190,11 +190,6 @@ enum flag_bits { ...@@ -190,11 +190,6 @@ enum flag_bits {
* than other devices in the array * than other devices in the array
*/ */
ClusterRemove, ClusterRemove,
RemoveSynchronized, /* synchronize_rcu() was called after
* this device was known to be faulty,
* so it is safe to remove without
* another synchronize_rcu() call.
*/
ExternalBbl, /* External metadata provides bad ExternalBbl, /* External metadata provides bad
* block management for a disk * block management for a disk
*/ */
......
...@@ -609,7 +609,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect ...@@ -609,7 +609,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
int choose_first; int choose_first;
int choose_next_idle; int choose_next_idle;
rcu_read_lock();
/* /*
* Check if we can balance. We can balance on the whole * Check if we can balance. We can balance on the whole
* device if no resync is going on, or below the resync window. * device if no resync is going on, or below the resync window.
...@@ -642,7 +641,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect ...@@ -642,7 +641,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
unsigned int pending; unsigned int pending;
bool nonrot; bool nonrot;
rdev = rcu_dereference(conf->mirrors[disk].rdev); rdev = conf->mirrors[disk].rdev;
if (r1_bio->bios[disk] == IO_BLOCKED if (r1_bio->bios[disk] == IO_BLOCKED
|| rdev == NULL || rdev == NULL
|| test_bit(Faulty, &rdev->flags)) || test_bit(Faulty, &rdev->flags))
...@@ -773,7 +772,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect ...@@ -773,7 +772,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
} }
if (best_disk >= 0) { if (best_disk >= 0) {
rdev = rcu_dereference(conf->mirrors[best_disk].rdev); rdev = conf->mirrors[best_disk].rdev;
if (!rdev) if (!rdev)
goto retry; goto retry;
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
...@@ -784,7 +783,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect ...@@ -784,7 +783,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
conf->mirrors[best_disk].next_seq_sect = this_sector + sectors; conf->mirrors[best_disk].next_seq_sect = this_sector + sectors;
} }
rcu_read_unlock();
*max_sectors = sectors; *max_sectors = sectors;
return best_disk; return best_disk;
...@@ -1235,14 +1233,12 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio, ...@@ -1235,14 +1233,12 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
if (r1bio_existed) { if (r1bio_existed) {
/* Need to get the block device name carefully */ /* Need to get the block device name carefully */
struct md_rdev *rdev; struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;
rcu_read_lock();
rdev = rcu_dereference(conf->mirrors[r1_bio->read_disk].rdev);
if (rdev) if (rdev)
snprintf(b, sizeof(b), "%pg", rdev->bdev); snprintf(b, sizeof(b), "%pg", rdev->bdev);
else else
strcpy(b, "???"); strcpy(b, "???");
rcu_read_unlock();
} }
/* /*
...@@ -1396,10 +1392,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1396,10 +1392,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
disks = conf->raid_disks * 2; disks = conf->raid_disks * 2;
blocked_rdev = NULL; blocked_rdev = NULL;
rcu_read_lock();
max_sectors = r1_bio->sectors; max_sectors = r1_bio->sectors;
for (i = 0; i < disks; i++) { for (i = 0; i < disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); struct md_rdev *rdev = conf->mirrors[i].rdev;
/* /*
* The write-behind io is only attempted on drives marked as * The write-behind io is only attempted on drives marked as
...@@ -1465,7 +1460,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio, ...@@ -1465,7 +1460,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
} }
r1_bio->bios[i] = bio; r1_bio->bios[i] = bio;
} }
rcu_read_unlock();
if (unlikely(blocked_rdev)) { if (unlikely(blocked_rdev)) {
/* Wait for this device to become unblocked */ /* Wait for this device to become unblocked */
...@@ -1617,15 +1611,16 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev) ...@@ -1617,15 +1611,16 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev)
struct r1conf *conf = mddev->private; struct r1conf *conf = mddev->private;
int i; int i;
lockdep_assert_held(&mddev->lock);
seq_printf(seq, " [%d/%d] [", conf->raid_disks, seq_printf(seq, " [%d/%d] [", conf->raid_disks,
conf->raid_disks - mddev->degraded); conf->raid_disks - mddev->degraded);
rcu_read_lock();
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); struct md_rdev *rdev = READ_ONCE(conf->mirrors[i].rdev);
seq_printf(seq, "%s", seq_printf(seq, "%s",
rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_"); rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
} }
rcu_read_unlock();
seq_printf(seq, "]"); seq_printf(seq, "]");
} }
...@@ -1691,16 +1686,15 @@ static void print_conf(struct r1conf *conf) ...@@ -1691,16 +1686,15 @@ static void print_conf(struct r1conf *conf)
pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded, pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
conf->raid_disks); conf->raid_disks);
rcu_read_lock(); lockdep_assert_held(&conf->mddev->reconfig_mutex);
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); struct md_rdev *rdev = conf->mirrors[i].rdev;
if (rdev) if (rdev)
pr_debug(" disk %d, wo:%d, o:%d, dev:%pg\n", pr_debug(" disk %d, wo:%d, o:%d, dev:%pg\n",
i, !test_bit(In_sync, &rdev->flags), i, !test_bit(In_sync, &rdev->flags),
!test_bit(Faulty, &rdev->flags), !test_bit(Faulty, &rdev->flags),
rdev->bdev); rdev->bdev);
} }
rcu_read_unlock();
} }
static void close_sync(struct r1conf *conf) static void close_sync(struct r1conf *conf)
...@@ -1810,7 +1804,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1810,7 +1804,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
*/ */
if (rdev->saved_raid_disk < 0) if (rdev->saved_raid_disk < 0)
conf->fullsync = 1; conf->fullsync = 1;
rcu_assign_pointer(p->rdev, rdev); WRITE_ONCE(p->rdev, rdev);
break; break;
} }
if (test_bit(WantReplacement, &p->rdev->flags) && if (test_bit(WantReplacement, &p->rdev->flags) &&
...@@ -1826,7 +1820,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1826,7 +1820,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
rdev->raid_disk = repl_slot; rdev->raid_disk = repl_slot;
err = 0; err = 0;
conf->fullsync = 1; conf->fullsync = 1;
rcu_assign_pointer(p[conf->raid_disks].rdev, rdev); WRITE_ONCE(p[conf->raid_disks].rdev, rdev);
} }
print_conf(conf); print_conf(conf);
...@@ -1862,16 +1856,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1862,16 +1856,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
err = -EBUSY; err = -EBUSY;
goto abort; goto abort;
} }
p->rdev = NULL; WRITE_ONCE(p->rdev, NULL);
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
synchronize_rcu();
if (atomic_read(&rdev->nr_pending)) {
/* lost the race, try later */
err = -EBUSY;
p->rdev = rdev;
goto abort;
}
}
if (conf->mirrors[conf->raid_disks + number].rdev) { if (conf->mirrors[conf->raid_disks + number].rdev) {
/* We just removed a device that is being replaced. /* We just removed a device that is being replaced.
* Move down the replacement. We drain all IO before * Move down the replacement. We drain all IO before
...@@ -1892,7 +1877,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -1892,7 +1877,7 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
goto abort; goto abort;
} }
clear_bit(Replacement, &repl->flags); clear_bit(Replacement, &repl->flags);
p->rdev = repl; WRITE_ONCE(p->rdev, repl);
conf->mirrors[conf->raid_disks + number].rdev = NULL; conf->mirrors[conf->raid_disks + number].rdev = NULL;
unfreeze_array(conf); unfreeze_array(conf);
} }
...@@ -2290,8 +2275,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk, ...@@ -2290,8 +2275,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
sector_t first_bad; sector_t first_bad;
int bad_sectors; int bad_sectors;
rcu_read_lock(); rdev = conf->mirrors[d].rdev;
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev && if (rdev &&
(test_bit(In_sync, &rdev->flags) || (test_bit(In_sync, &rdev->flags) ||
(!test_bit(Faulty, &rdev->flags) && (!test_bit(Faulty, &rdev->flags) &&
...@@ -2299,15 +2283,14 @@ static void fix_read_error(struct r1conf *conf, int read_disk, ...@@ -2299,15 +2283,14 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
is_badblock(rdev, sect, s, is_badblock(rdev, sect, s,
&first_bad, &bad_sectors) == 0) { &first_bad, &bad_sectors) == 0) {
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
if (sync_page_io(rdev, sect, s<<9, if (sync_page_io(rdev, sect, s<<9,
conf->tmppage, REQ_OP_READ, false)) conf->tmppage, REQ_OP_READ, false))
success = 1; success = 1;
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
if (success) if (success)
break; break;
} else }
rcu_read_unlock();
d++; d++;
if (d == conf->raid_disks * 2) if (d == conf->raid_disks * 2)
d = 0; d = 0;
...@@ -2326,29 +2309,24 @@ static void fix_read_error(struct r1conf *conf, int read_disk, ...@@ -2326,29 +2309,24 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
if (d==0) if (d==0)
d = conf->raid_disks * 2; d = conf->raid_disks * 2;
d--; d--;
rcu_read_lock(); rdev = conf->mirrors[d].rdev;
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev && if (rdev &&
!test_bit(Faulty, &rdev->flags)) { !test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
r1_sync_page_io(rdev, sect, s, r1_sync_page_io(rdev, sect, s,
conf->tmppage, WRITE); conf->tmppage, WRITE);
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
} else }
rcu_read_unlock();
} }
d = start; d = start;
while (d != read_disk) { while (d != read_disk) {
if (d==0) if (d==0)
d = conf->raid_disks * 2; d = conf->raid_disks * 2;
d--; d--;
rcu_read_lock(); rdev = conf->mirrors[d].rdev;
rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev && if (rdev &&
!test_bit(Faulty, &rdev->flags)) { !test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
if (r1_sync_page_io(rdev, sect, s, if (r1_sync_page_io(rdev, sect, s,
conf->tmppage, READ)) { conf->tmppage, READ)) {
atomic_add(s, &rdev->corrected_errors); atomic_add(s, &rdev->corrected_errors);
...@@ -2359,8 +2337,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk, ...@@ -2359,8 +2337,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
rdev->bdev); rdev->bdev);
} }
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
} else }
rcu_read_unlock();
} }
sectors -= s; sectors -= s;
sect += s; sect += s;
...@@ -2741,7 +2718,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -2741,7 +2718,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
r1_bio = raid1_alloc_init_r1buf(conf); r1_bio = raid1_alloc_init_r1buf(conf);
rcu_read_lock();
/* /*
* If we get a correctably read error during resync or recovery, * If we get a correctably read error during resync or recovery,
* we might want to read from a different device. So we * we might want to read from a different device. So we
...@@ -2762,7 +2738,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -2762,7 +2738,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
struct md_rdev *rdev; struct md_rdev *rdev;
bio = r1_bio->bios[i]; bio = r1_bio->bios[i];
rdev = rcu_dereference(conf->mirrors[i].rdev); rdev = conf->mirrors[i].rdev;
if (rdev == NULL || if (rdev == NULL ||
test_bit(Faulty, &rdev->flags)) { test_bit(Faulty, &rdev->flags)) {
if (i < conf->raid_disks) if (i < conf->raid_disks)
...@@ -2820,7 +2796,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -2820,7 +2796,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
bio->bi_opf |= MD_FAILFAST; bio->bi_opf |= MD_FAILFAST;
} }
} }
rcu_read_unlock();
if (disk < 0) if (disk < 0)
disk = wonly; disk = wonly;
r1_bio->read_disk = disk; r1_bio->read_disk = disk;
......
This diff is collapsed.
...@@ -1890,28 +1890,22 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf, ...@@ -1890,28 +1890,22 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf,
continue; continue;
/* in case device is broken */ /* in case device is broken */
rcu_read_lock(); rdev = conf->disks[disk_index].rdev;
rdev = rcu_dereference(conf->disks[disk_index].rdev);
if (rdev) { if (rdev) {
atomic_inc(&rdev->nr_pending); atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
sync_page_io(rdev, sh->sector, PAGE_SIZE, sync_page_io(rdev, sh->sector, PAGE_SIZE,
sh->dev[disk_index].page, REQ_OP_WRITE, sh->dev[disk_index].page, REQ_OP_WRITE,
false); false);
rdev_dec_pending(rdev, rdev->mddev); rdev_dec_pending(rdev, rdev->mddev);
rcu_read_lock();
} }
rrdev = rcu_dereference(conf->disks[disk_index].replacement); rrdev = conf->disks[disk_index].replacement;
if (rrdev) { if (rrdev) {
atomic_inc(&rrdev->nr_pending); atomic_inc(&rrdev->nr_pending);
rcu_read_unlock();
sync_page_io(rrdev, sh->sector, PAGE_SIZE, sync_page_io(rrdev, sh->sector, PAGE_SIZE,
sh->dev[disk_index].page, REQ_OP_WRITE, sh->dev[disk_index].page, REQ_OP_WRITE,
false); false);
rdev_dec_pending(rrdev, rrdev->mddev); rdev_dec_pending(rrdev, rrdev->mddev);
rcu_read_lock();
} }
rcu_read_unlock();
} }
ctx->data_parity_stripes++; ctx->data_parity_stripes++;
out: out:
...@@ -2948,7 +2942,6 @@ bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect) ...@@ -2948,7 +2942,6 @@ bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect)
if (!log) if (!log)
return false; return false;
WARN_ON_ONCE(!rcu_read_lock_held());
tree_index = r5c_tree_index(conf, sect); tree_index = r5c_tree_index(conf, sect);
slot = radix_tree_lookup(&log->big_stripe_tree, tree_index); slot = radix_tree_lookup(&log->big_stripe_tree, tree_index);
return slot != NULL; return slot != NULL;
......
...@@ -620,11 +620,9 @@ static void ppl_do_flush(struct ppl_io_unit *io) ...@@ -620,11 +620,9 @@ static void ppl_do_flush(struct ppl_io_unit *io)
struct md_rdev *rdev; struct md_rdev *rdev;
struct block_device *bdev = NULL; struct block_device *bdev = NULL;
rcu_read_lock(); rdev = conf->disks[i].rdev;
rdev = rcu_dereference(conf->disks[i].rdev);
if (rdev && !test_bit(Faulty, &rdev->flags)) if (rdev && !test_bit(Faulty, &rdev->flags))
bdev = rdev->bdev; bdev = rdev->bdev;
rcu_read_unlock();
if (bdev) { if (bdev) {
struct bio *bio; struct bio *bio;
...@@ -882,9 +880,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e, ...@@ -882,9 +880,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
(unsigned long long)r_sector, dd_idx, (unsigned long long)r_sector, dd_idx,
(unsigned long long)sector); (unsigned long long)sector);
/* Array has not started so rcu dereference is safe */ rdev = conf->disks[dd_idx].rdev;
rdev = rcu_dereference_protected(
conf->disks[dd_idx].rdev, 1);
if (!rdev || (!test_bit(In_sync, &rdev->flags) && if (!rdev || (!test_bit(In_sync, &rdev->flags) &&
sector >= rdev->recovery_offset)) { sector >= rdev->recovery_offset)) {
pr_debug("%s:%*s data member disk %d missing\n", pr_debug("%s:%*s data member disk %d missing\n",
...@@ -936,9 +932,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e, ...@@ -936,9 +932,7 @@ static int ppl_recover_entry(struct ppl_log *log, struct ppl_header_entry *e,
0, &disk, &sh); 0, &disk, &sh);
BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk)); BUG_ON(sh.pd_idx != le32_to_cpu(e->parity_disk));
/* Array has not started so rcu dereference is safe */ parity_rdev = conf->disks[sh.pd_idx].rdev;
parity_rdev = rcu_dereference_protected(
conf->disks[sh.pd_idx].rdev, 1);
BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev); BUG_ON(parity_rdev->bdev->bd_dev != log->rdev->bdev->bd_dev);
pr_debug("%s:%*s write parity at sector %llu, disk %pg\n", pr_debug("%s:%*s write parity at sector %llu, disk %pg\n",
...@@ -1404,9 +1398,7 @@ int ppl_init_log(struct r5conf *conf) ...@@ -1404,9 +1398,7 @@ int ppl_init_log(struct r5conf *conf)
for (i = 0; i < ppl_conf->count; i++) { for (i = 0; i < ppl_conf->count; i++) {
struct ppl_log *log = &ppl_conf->child_logs[i]; struct ppl_log *log = &ppl_conf->child_logs[i];
/* Array has not started so rcu dereference is safe */ struct md_rdev *rdev = conf->disks[i].rdev;
struct md_rdev *rdev =
rcu_dereference_protected(conf->disks[i].rdev, 1);
mutex_init(&log->io_mutex); mutex_init(&log->io_mutex);
spin_lock_init(&log->io_list_lock); spin_lock_init(&log->io_list_lock);
......
This diff is collapsed.
...@@ -473,8 +473,8 @@ enum { ...@@ -473,8 +473,8 @@ enum {
*/ */
struct disk_info { struct disk_info {
struct md_rdev __rcu *rdev; struct md_rdev *rdev;
struct md_rdev __rcu *replacement; struct md_rdev *replacement;
struct page *extra_page; /* extra page to use in prexor */ struct page *extra_page; /* extra page to use in prexor */
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment