Commit 106f2e59 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'md/4.8-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md

Pull MD fixes from Shaohua Li:
 "A few bug fixes for MD:

   - Guoqing fixed a bug compiling md-cluster in kernel

   - I fixed a potential deadlock in raid5-cache superblock write, a
     hang in raid5 reshape resume and a race condition introduced in
     rc4"

* tag 'md/4.8-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  raid5: fix a small race condition
  md-cluster: make md-cluster also can work when compiled into kernel
  raid5: guarantee enough stripes to avoid reshape hang
  raid5-cache: fix a deadlock in superblock write
parents 309a18ae c9445555
...@@ -7610,16 +7610,12 @@ EXPORT_SYMBOL(unregister_md_cluster_operations); ...@@ -7610,16 +7610,12 @@ EXPORT_SYMBOL(unregister_md_cluster_operations);
int md_setup_cluster(struct mddev *mddev, int nodes) int md_setup_cluster(struct mddev *mddev, int nodes)
{ {
int err; if (!md_cluster_ops)
request_module("md-cluster");
err = request_module("md-cluster");
if (err) {
pr_err("md-cluster module not found.\n");
return -ENOENT;
}
spin_lock(&pers_lock); spin_lock(&pers_lock);
/* ensure module won't be unloaded */
if (!md_cluster_ops || !try_module_get(md_cluster_mod)) { if (!md_cluster_ops || !try_module_get(md_cluster_mod)) {
pr_err("can't find md-cluster module or get it's reference.\n");
spin_unlock(&pers_lock); spin_unlock(&pers_lock);
return -ENOENT; return -ENOENT;
} }
......
...@@ -96,7 +96,6 @@ struct r5l_log { ...@@ -96,7 +96,6 @@ struct r5l_log {
spinlock_t no_space_stripes_lock; spinlock_t no_space_stripes_lock;
bool need_cache_flush; bool need_cache_flush;
bool in_teardown;
}; };
/* /*
...@@ -704,31 +703,22 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log, ...@@ -704,31 +703,22 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log,
mddev = log->rdev->mddev; mddev = log->rdev->mddev;
/* /*
* This is to avoid a deadlock. r5l_quiesce holds reconfig_mutex and * Discard could zero data, so before discard we must make sure
* wait for this thread to finish. This thread waits for * superblock is updated to new log tail. Updating superblock (either
* MD_CHANGE_PENDING clear, which is supposed to be done in * directly call md_update_sb() or depend on md thread) must hold
* md_check_recovery(). md_check_recovery() tries to get * reconfig mutex. On the other hand, raid5_quiesce is called with
* reconfig_mutex. Since r5l_quiesce already holds the mutex, * reconfig_mutex hold. The first step of raid5_quiesce() is waitting
* md_check_recovery() fails, so the PENDING never get cleared. The * for all IO finish, hence waitting for reclaim thread, while reclaim
* in_teardown check workaround this issue. * thread is calling this function and waitting for reconfig mutex. So
* there is a deadlock. We workaround this issue with a trylock.
* FIXME: we could miss discard if we can't take reconfig mutex
*/ */
if (!log->in_teardown) {
set_mask_bits(&mddev->flags, 0, set_mask_bits(&mddev->flags, 0,
BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING)); BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING));
md_wakeup_thread(mddev->thread); if (!mddev_trylock(mddev))
wait_event(mddev->sb_wait, return;
!test_bit(MD_CHANGE_PENDING, &mddev->flags) ||
log->in_teardown);
/*
* r5l_quiesce could run after in_teardown check and hold
* mutex first. Superblock might get updated twice.
*/
if (log->in_teardown)
md_update_sb(mddev, 1);
} else {
WARN_ON(!mddev_is_locked(mddev));
md_update_sb(mddev, 1); md_update_sb(mddev, 1);
} mddev_unlock(mddev);
/* discard IO error really doesn't matter, ignore it */ /* discard IO error really doesn't matter, ignore it */
if (log->last_checkpoint < end) { if (log->last_checkpoint < end) {
...@@ -827,7 +817,6 @@ void r5l_quiesce(struct r5l_log *log, int state) ...@@ -827,7 +817,6 @@ void r5l_quiesce(struct r5l_log *log, int state)
if (!log || state == 2) if (!log || state == 2)
return; return;
if (state == 0) { if (state == 0) {
log->in_teardown = 0;
/* /*
* This is a special case for hotadd. In suspend, the array has * This is a special case for hotadd. In suspend, the array has
* no journal. In resume, journal is initialized as well as the * no journal. In resume, journal is initialized as well as the
...@@ -838,11 +827,6 @@ void r5l_quiesce(struct r5l_log *log, int state) ...@@ -838,11 +827,6 @@ void r5l_quiesce(struct r5l_log *log, int state)
log->reclaim_thread = md_register_thread(r5l_reclaim_thread, log->reclaim_thread = md_register_thread(r5l_reclaim_thread,
log->rdev->mddev, "reclaim"); log->rdev->mddev, "reclaim");
} else if (state == 1) { } else if (state == 1) {
/*
* at this point all stripes are finished, so io_unit is at
* least in STRIPE_END state
*/
log->in_teardown = 1;
/* make sure r5l_write_super_and_discard_space exits */ /* make sure r5l_write_super_and_discard_space exits */
mddev = log->rdev->mddev; mddev = log->rdev->mddev;
wake_up(&mddev->sb_wait); wake_up(&mddev->sb_wait);
......
...@@ -2423,10 +2423,10 @@ static void raid5_end_read_request(struct bio * bi) ...@@ -2423,10 +2423,10 @@ static void raid5_end_read_request(struct bio * bi)
} }
} }
rdev_dec_pending(rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
bio_reset(bi);
clear_bit(R5_LOCKED, &sh->dev[i].flags); clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state);
raid5_release_stripe(sh); raid5_release_stripe(sh);
bio_reset(bi);
} }
static void raid5_end_write_request(struct bio *bi) static void raid5_end_write_request(struct bio *bi)
...@@ -2498,6 +2498,7 @@ static void raid5_end_write_request(struct bio *bi) ...@@ -2498,6 +2498,7 @@ static void raid5_end_write_request(struct bio *bi)
if (sh->batch_head && bi->bi_error && !replacement) if (sh->batch_head && bi->bi_error && !replacement)
set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state);
bio_reset(bi);
if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags))
clear_bit(R5_LOCKED, &sh->dev[i].flags); clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state);
...@@ -2505,7 +2506,6 @@ static void raid5_end_write_request(struct bio *bi) ...@@ -2505,7 +2506,6 @@ static void raid5_end_write_request(struct bio *bi)
if (sh->batch_head && sh != sh->batch_head) if (sh->batch_head && sh != sh->batch_head)
raid5_release_stripe(sh->batch_head); raid5_release_stripe(sh->batch_head);
bio_reset(bi);
} }
static void raid5_build_block(struct stripe_head *sh, int i, int previous) static void raid5_build_block(struct stripe_head *sh, int i, int previous)
...@@ -6639,6 +6639,16 @@ static struct r5conf *setup_conf(struct mddev *mddev) ...@@ -6639,6 +6639,16 @@ static struct r5conf *setup_conf(struct mddev *mddev)
} }
conf->min_nr_stripes = NR_STRIPES; conf->min_nr_stripes = NR_STRIPES;
if (mddev->reshape_position != MaxSector) {
int stripes = max_t(int,
((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4,
((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4);
conf->min_nr_stripes = max(NR_STRIPES, stripes);
if (conf->min_nr_stripes != NR_STRIPES)
printk(KERN_INFO
"md/raid:%s: force stripe size %d for reshape\n",
mdname(mddev), conf->min_nr_stripes);
}
memory = conf->min_nr_stripes * (sizeof(struct stripe_head) + memory = conf->min_nr_stripes * (sizeof(struct stripe_head) +
max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS); atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment