Commit 4526b710 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'md/4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md

Pull MD updates from Shaohua Li:
 "This update mainly fixes bugs.

   - a raid5 discard related fix from Jes
   - a MD multipath bio clone fix from Ming
   - raid1 error handling deadlock fix from Nate and corresponding
     raid10 fix from myself
   - a raid5 stripe batch fix from Neil
   - a patch from Sebastian to avoid unnecessary uevent
   - several cleanup/debug patches"

* tag 'md/4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  md/raid5: Cleanup cpu hotplug notifier
  raid10: include bio_end_io_list in nr_queued to prevent freeze_array hang
  raid1: include bio_end_io_list in nr_queued to prevent freeze_array hang
  md: fix typos for stipe
  md/bitmap: remove redundant return in bitmap_checkpage
  md/raid1: remove unnecessary BUG_ON
  md: multipath: don't hardcopy bio in .make_request path
  md/raid5: output stripe state for debug
  md/raid5: preserve STRIPE_PREREAD_ACTIVE in break_stripe_batch_list
  Update MD git tree URL
  md/bitmap: remove redundant check
  MD: warn for potential deadlock
  md: Drop sending a change uevent when stopping
  RAID5: revert e9e4c377 to fix a livelock
  RAID5: check_reshape() shouldn't call mddev_suspend
  md/raid5: Compare apples to apples (or sectors to sectors)
parents 770c4c11 1d034e68
...@@ -10291,7 +10291,7 @@ F: drivers/media/pci/solo6x10/ ...@@ -10291,7 +10291,7 @@ F: drivers/media/pci/solo6x10/
SOFTWARE RAID (Multiple Disks) SUPPORT SOFTWARE RAID (Multiple Disks) SUPPORT
M: Shaohua Li <shli@kernel.org> M: Shaohua Li <shli@kernel.org>
L: linux-raid@vger.kernel.org L: linux-raid@vger.kernel.org
T: git git://neil.brown.name/md T: git git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git
S: Supported S: Supported
F: drivers/md/ F: drivers/md/
F: include/linux/raid/ F: include/linux/raid/
......
...@@ -98,7 +98,6 @@ __acquires(bitmap->lock) ...@@ -98,7 +98,6 @@ __acquires(bitmap->lock)
bitmap->bp[page].hijacked) { bitmap->bp[page].hijacked) {
/* somebody beat us to getting the page */ /* somebody beat us to getting the page */
kfree(mappage); kfree(mappage);
return 0;
} else { } else {
/* no page was in place and we have one, so install it */ /* no page was in place and we have one, so install it */
...@@ -510,8 +509,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap) ...@@ -510,8 +509,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap)
sb->chunksize = cpu_to_le32(chunksize); sb->chunksize = cpu_to_le32(chunksize);
daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep; daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
if (!daemon_sleep || if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
(daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n"); printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
daemon_sleep = 5 * HZ; daemon_sleep = 5 * HZ;
} }
......
...@@ -49,8 +49,8 @@ ...@@ -49,8 +49,8 @@
* When we set a bit, or in the counter (to start a write), if the fields is * When we set a bit, or in the counter (to start a write), if the fields is
* 0, we first set the disk bit and set the counter to 1. * 0, we first set the disk bit and set the counter to 1.
* *
* If the counter is 0, the on-disk bit is clear and the stipe is clean * If the counter is 0, the on-disk bit is clear and the stripe is clean
* Anything that dirties the stipe pushes the counter to 2 (at least) * Anything that dirties the stripe pushes the counter to 2 (at least)
* and sets the on-disk bit (lazily). * and sets the on-disk bit (lazily).
* If a periodic sweep find the counter at 2, it is decremented to 1. * If a periodic sweep find the counter at 2, it is decremented to 1.
* If the sweep find the counter at 1, the on-disk bit is cleared and the * If the sweep find the counter at 1, the on-disk bit is cleared and the
......
...@@ -305,6 +305,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) ...@@ -305,6 +305,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
*/ */
void mddev_suspend(struct mddev *mddev) void mddev_suspend(struct mddev *mddev)
{ {
WARN_ON_ONCE(current == mddev->thread->tsk);
if (mddev->suspended++) if (mddev->suspended++)
return; return;
synchronize_rcu(); synchronize_rcu();
...@@ -5671,7 +5672,6 @@ static int do_md_stop(struct mddev *mddev, int mode, ...@@ -5671,7 +5672,6 @@ static int do_md_stop(struct mddev *mddev, int mode,
export_array(mddev); export_array(mddev);
md_clean(mddev); md_clean(mddev);
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
if (mddev->hold_active == UNTIL_STOP) if (mddev->hold_active == UNTIL_STOP)
mddev->hold_active = 0; mddev->hold_active = 0;
} }
......
...@@ -129,7 +129,9 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio) ...@@ -129,7 +129,9 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio)
} }
multipath = conf->multipaths + mp_bh->path; multipath = conf->multipaths + mp_bh->path;
mp_bh->bio = *bio; bio_init(&mp_bh->bio);
__bio_clone_fast(&mp_bh->bio, bio);
mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset; mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
mp_bh->bio.bi_bdev = multipath->rdev->bdev; mp_bh->bio.bi_bdev = multipath->rdev->bdev;
mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT; mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT;
......
...@@ -2274,6 +2274,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio) ...@@ -2274,6 +2274,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
if (fail) { if (fail) {
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
list_add(&r1_bio->retry_list, &conf->bio_end_io_list); list_add(&r1_bio->retry_list, &conf->bio_end_io_list);
conf->nr_queued++;
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
md_wakeup_thread(conf->mddev->thread); md_wakeup_thread(conf->mddev->thread);
} else { } else {
...@@ -2391,8 +2392,10 @@ static void raid1d(struct md_thread *thread) ...@@ -2391,8 +2392,10 @@ static void raid1d(struct md_thread *thread)
LIST_HEAD(tmp); LIST_HEAD(tmp);
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) { if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
list_add(&tmp, &conf->bio_end_io_list); while (!list_empty(&conf->bio_end_io_list)) {
list_del_init(&conf->bio_end_io_list); list_move(conf->bio_end_io_list.prev, &tmp);
conf->nr_queued--;
}
} }
spin_unlock_irqrestore(&conf->device_lock, flags); spin_unlock_irqrestore(&conf->device_lock, flags);
while (!list_empty(&tmp)) { while (!list_empty(&tmp)) {
...@@ -2695,7 +2698,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr, ...@@ -2695,7 +2698,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
!conf->fullsync && !conf->fullsync &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
break; break;
BUG_ON(sync_blocks < (PAGE_SIZE>>9));
if ((len >> 9) > sync_blocks) if ((len >> 9) > sync_blocks)
len = sync_blocks<<9; len = sync_blocks<<9;
} }
......
...@@ -2664,6 +2664,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) ...@@ -2664,6 +2664,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
if (fail) { if (fail) {
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
list_add(&r10_bio->retry_list, &conf->bio_end_io_list); list_add(&r10_bio->retry_list, &conf->bio_end_io_list);
conf->nr_queued++;
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
md_wakeup_thread(conf->mddev->thread); md_wakeup_thread(conf->mddev->thread);
} else { } else {
...@@ -2691,8 +2692,10 @@ static void raid10d(struct md_thread *thread) ...@@ -2691,8 +2692,10 @@ static void raid10d(struct md_thread *thread)
LIST_HEAD(tmp); LIST_HEAD(tmp);
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) { if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
list_add(&tmp, &conf->bio_end_io_list); while (!list_empty(&conf->bio_end_io_list)) {
list_del_init(&conf->bio_end_io_list); list_move(conf->bio_end_io_list.prev, &tmp);
conf->nr_queued--;
}
} }
spin_unlock_irqrestore(&conf->device_lock, flags); spin_unlock_irqrestore(&conf->device_lock, flags);
while (!list_empty(&tmp)) { while (!list_empty(&tmp)) {
......
...@@ -340,8 +340,7 @@ static void release_inactive_stripe_list(struct r5conf *conf, ...@@ -340,8 +340,7 @@ static void release_inactive_stripe_list(struct r5conf *conf,
int hash) int hash)
{ {
int size; int size;
unsigned long do_wakeup = 0; bool do_wakeup = false;
int i = 0;
unsigned long flags; unsigned long flags;
if (hash == NR_STRIPE_HASH_LOCKS) { if (hash == NR_STRIPE_HASH_LOCKS) {
...@@ -362,19 +361,15 @@ static void release_inactive_stripe_list(struct r5conf *conf, ...@@ -362,19 +361,15 @@ static void release_inactive_stripe_list(struct r5conf *conf,
!list_empty(list)) !list_empty(list))
atomic_dec(&conf->empty_inactive_list_nr); atomic_dec(&conf->empty_inactive_list_nr);
list_splice_tail_init(list, conf->inactive_list + hash); list_splice_tail_init(list, conf->inactive_list + hash);
do_wakeup |= 1 << hash; do_wakeup = true;
spin_unlock_irqrestore(conf->hash_locks + hash, flags); spin_unlock_irqrestore(conf->hash_locks + hash, flags);
} }
size--; size--;
hash--; hash--;
} }
for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
if (do_wakeup & (1 << i))
wake_up(&conf->wait_for_stripe[i]);
}
if (do_wakeup) { if (do_wakeup) {
wake_up(&conf->wait_for_stripe);
if (atomic_read(&conf->active_stripes) == 0) if (atomic_read(&conf->active_stripes) == 0)
wake_up(&conf->wait_for_quiescent); wake_up(&conf->wait_for_quiescent);
if (conf->retry_read_aligned) if (conf->retry_read_aligned)
...@@ -687,15 +682,14 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, ...@@ -687,15 +682,14 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
if (!sh) { if (!sh) {
set_bit(R5_INACTIVE_BLOCKED, set_bit(R5_INACTIVE_BLOCKED,
&conf->cache_state); &conf->cache_state);
wait_event_exclusive_cmd( wait_event_lock_irq(
conf->wait_for_stripe[hash], conf->wait_for_stripe,
!list_empty(conf->inactive_list + hash) && !list_empty(conf->inactive_list + hash) &&
(atomic_read(&conf->active_stripes) (atomic_read(&conf->active_stripes)
< (conf->max_nr_stripes * 3 / 4) < (conf->max_nr_stripes * 3 / 4)
|| !test_bit(R5_INACTIVE_BLOCKED, || !test_bit(R5_INACTIVE_BLOCKED,
&conf->cache_state)), &conf->cache_state)),
spin_unlock_irq(conf->hash_locks + hash), *(conf->hash_locks + hash));
spin_lock_irq(conf->hash_locks + hash));
clear_bit(R5_INACTIVE_BLOCKED, clear_bit(R5_INACTIVE_BLOCKED,
&conf->cache_state); &conf->cache_state);
} else { } else {
...@@ -720,9 +714,6 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, ...@@ -720,9 +714,6 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
} }
} while (sh == NULL); } while (sh == NULL);
if (!list_empty(conf->inactive_list + hash))
wake_up(&conf->wait_for_stripe[hash]);
spin_unlock_irq(conf->hash_locks + hash); spin_unlock_irq(conf->hash_locks + hash);
return sh; return sh;
} }
...@@ -2089,6 +2080,14 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) ...@@ -2089,6 +2080,14 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
unsigned long cpu; unsigned long cpu;
int err = 0; int err = 0;
/*
* Never shrink. And mddev_suspend() could deadlock if this is called
* from raid5d. In that case, scribble_disks and scribble_sectors
* should equal to new_disks and new_sectors
*/
if (conf->scribble_disks >= new_disks &&
conf->scribble_sectors >= new_sectors)
return 0;
mddev_suspend(conf->mddev); mddev_suspend(conf->mddev);
get_online_cpus(); get_online_cpus();
for_each_present_cpu(cpu) { for_each_present_cpu(cpu) {
...@@ -2110,6 +2109,10 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) ...@@ -2110,6 +2109,10 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
} }
put_online_cpus(); put_online_cpus();
mddev_resume(conf->mddev); mddev_resume(conf->mddev);
if (!err) {
conf->scribble_disks = new_disks;
conf->scribble_sectors = new_sectors;
}
return err; return err;
} }
...@@ -2190,7 +2193,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) ...@@ -2190,7 +2193,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
cnt = 0; cnt = 0;
list_for_each_entry(nsh, &newstripes, lru) { list_for_each_entry(nsh, &newstripes, lru) {
lock_device_hash_lock(conf, hash); lock_device_hash_lock(conf, hash);
wait_event_exclusive_cmd(conf->wait_for_stripe[hash], wait_event_cmd(conf->wait_for_stripe,
!list_empty(conf->inactive_list + hash), !list_empty(conf->inactive_list + hash),
unlock_device_hash_lock(conf, hash), unlock_device_hash_lock(conf, hash),
lock_device_hash_lock(conf, hash)); lock_device_hash_lock(conf, hash));
...@@ -4233,10 +4236,9 @@ static void break_stripe_batch_list(struct stripe_head *head_sh, ...@@ -4233,10 +4236,9 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
list_del_init(&sh->batch_list); list_del_init(&sh->batch_list);
WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) | WARN_ONCE(sh->state & ((1 << STRIPE_ACTIVE) |
(1 << STRIPE_SYNCING) | (1 << STRIPE_SYNCING) |
(1 << STRIPE_REPLACED) | (1 << STRIPE_REPLACED) |
(1 << STRIPE_PREREAD_ACTIVE) |
(1 << STRIPE_DELAYED) | (1 << STRIPE_DELAYED) |
(1 << STRIPE_BIT_DELAY) | (1 << STRIPE_BIT_DELAY) |
(1 << STRIPE_FULL_WRITE) | (1 << STRIPE_FULL_WRITE) |
...@@ -4246,11 +4248,14 @@ static void break_stripe_batch_list(struct stripe_head *head_sh, ...@@ -4246,11 +4248,14 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
(1 << STRIPE_DISCARD) | (1 << STRIPE_DISCARD) |
(1 << STRIPE_BATCH_READY) | (1 << STRIPE_BATCH_READY) |
(1 << STRIPE_BATCH_ERR) | (1 << STRIPE_BATCH_ERR) |
(1 << STRIPE_BITMAP_PENDING))); (1 << STRIPE_BITMAP_PENDING)),
WARN_ON_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) | "stripe state: %lx\n", sh->state);
(1 << STRIPE_REPLACED))); WARN_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
(1 << STRIPE_REPLACED)),
"head stripe state: %lx\n", head_sh->state);
set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS | set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
(1 << STRIPE_PREREAD_ACTIVE) |
(1 << STRIPE_DEGRADED)), (1 << STRIPE_DEGRADED)),
head_sh->state & (1 << STRIPE_INSYNC)); head_sh->state & (1 << STRIPE_INSYNC));
...@@ -6376,6 +6381,8 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action, ...@@ -6376,6 +6381,8 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
break; break;
case CPU_DEAD: case CPU_DEAD:
case CPU_DEAD_FROZEN: case CPU_DEAD_FROZEN:
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
break; break;
default: default:
...@@ -6413,6 +6420,12 @@ static int raid5_alloc_percpu(struct r5conf *conf) ...@@ -6413,6 +6420,12 @@ static int raid5_alloc_percpu(struct r5conf *conf)
} }
put_online_cpus(); put_online_cpus();
if (!err) {
conf->scribble_disks = max(conf->raid_disks,
conf->previous_raid_disks);
conf->scribble_sectors = max(conf->chunk_sectors,
conf->prev_chunk_sectors);
}
return err; return err;
} }
...@@ -6503,9 +6516,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) ...@@ -6503,9 +6516,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
seqcount_init(&conf->gen_lock); seqcount_init(&conf->gen_lock);
mutex_init(&conf->cache_size_mutex); mutex_init(&conf->cache_size_mutex);
init_waitqueue_head(&conf->wait_for_quiescent); init_waitqueue_head(&conf->wait_for_quiescent);
for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) { init_waitqueue_head(&conf->wait_for_stripe);
init_waitqueue_head(&conf->wait_for_stripe[i]);
}
init_waitqueue_head(&conf->wait_for_overlap); init_waitqueue_head(&conf->wait_for_overlap);
INIT_LIST_HEAD(&conf->handle_list); INIT_LIST_HEAD(&conf->handle_list);
INIT_LIST_HEAD(&conf->hold_list); INIT_LIST_HEAD(&conf->hold_list);
...@@ -7014,8 +7025,8 @@ static int raid5_run(struct mddev *mddev) ...@@ -7014,8 +7025,8 @@ static int raid5_run(struct mddev *mddev)
} }
if (discard_supported && if (discard_supported &&
mddev->queue->limits.max_discard_sectors >= stripe && mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
mddev->queue->limits.discard_granularity >= stripe) mddev->queue->limits.discard_granularity >= stripe)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
mddev->queue); mddev->queue);
else else
......
...@@ -510,6 +510,8 @@ struct r5conf { ...@@ -510,6 +510,8 @@ struct r5conf {
* conversions * conversions
*/ */
} __percpu *percpu; } __percpu *percpu;
int scribble_disks;
int scribble_sectors;
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU
struct notifier_block cpu_notify; struct notifier_block cpu_notify;
#endif #endif
...@@ -522,7 +524,7 @@ struct r5conf { ...@@ -522,7 +524,7 @@ struct r5conf {
atomic_t empty_inactive_list_nr; atomic_t empty_inactive_list_nr;
struct llist_head released_stripes; struct llist_head released_stripes;
wait_queue_head_t wait_for_quiescent; wait_queue_head_t wait_for_quiescent;
wait_queue_head_t wait_for_stripe[NR_STRIPE_HASH_LOCKS]; wait_queue_head_t wait_for_stripe;
wait_queue_head_t wait_for_overlap; wait_queue_head_t wait_for_overlap;
unsigned long cache_state; unsigned long cache_state;
#define R5_INACTIVE_BLOCKED 1 /* release of inactive stripes blocked, #define R5_INACTIVE_BLOCKED 1 /* release of inactive stripes blocked,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment