Commit 8fd2b980 authored by Jens Axboe's avatar Jens Axboe

Merge branch 'md-next' of...

Merge branch 'md-next' of git://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-5.8/drivers

Pull MD changes from Song.

* 'md-next' of git://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md/raid1: Replace zero-length array with flexible-array
  md: add a newline when printing parameter 'start_ro' by sysfs
  md: stop using ->queuedata
  md/raid1: release pending accounting for an I/O only after write-behind is also finished
  md: remove redundant memalloc scope API usage
  raid5: update code comment of scribble_alloc()
  raid5: remove gfp flags from scribble_alloc()
  md: use memalloc scope APIs in mddev_suspend()/mddev_resume()
  md: remove the extra line for ->hot_add_disk
  md: flush md_rdev_misc_wq for HOT_ADD_DISK case
  md: don't flush workqueue unconditionally in md_open
  md: add new workqueue for delete rdev
  md: add checkings before flush md_misc_wq
parents 91bf5ec3 358369f0
...@@ -12,6 +12,6 @@ struct linear_conf ...@@ -12,6 +12,6 @@ struct linear_conf
struct rcu_head rcu; struct rcu_head rcu;
sector_t array_sectors; sector_t array_sectors;
int raid_disks; /* a copy of mddev->raid_disks */ int raid_disks; /* a copy of mddev->raid_disks */
struct dev_info disks[0]; struct dev_info disks[];
}; };
#endif #endif
...@@ -89,6 +89,7 @@ static struct module *md_cluster_mod; ...@@ -89,6 +89,7 @@ static struct module *md_cluster_mod;
static DECLARE_WAIT_QUEUE_HEAD(resync_wait); static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
static struct workqueue_struct *md_wq; static struct workqueue_struct *md_wq;
static struct workqueue_struct *md_misc_wq; static struct workqueue_struct *md_misc_wq;
static struct workqueue_struct *md_rdev_misc_wq;
static int remove_and_add_spares(struct mddev *mddev, static int remove_and_add_spares(struct mddev *mddev,
struct md_rdev *this); struct md_rdev *this);
...@@ -227,13 +228,13 @@ void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev, ...@@ -227,13 +228,13 @@ void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
goto abort; goto abort;
if (mddev->serial_info_pool == NULL) { if (mddev->serial_info_pool == NULL) {
unsigned int noio_flag; /*
* already in memalloc noio context by
noio_flag = memalloc_noio_save(); * mddev_suspend()
*/
mddev->serial_info_pool = mddev->serial_info_pool =
mempool_create_kmalloc_pool(NR_SERIAL_INFOS, mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
sizeof(struct serial_info)); sizeof(struct serial_info));
memalloc_noio_restore(noio_flag);
if (!mddev->serial_info_pool) { if (!mddev->serial_info_pool) {
rdevs_uninit_serial(mddev); rdevs_uninit_serial(mddev);
pr_err("can't alloc memory pool for serialization\n"); pr_err("can't alloc memory pool for serialization\n");
...@@ -466,7 +467,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) ...@@ -466,7 +467,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
{ {
const int rw = bio_data_dir(bio); const int rw = bio_data_dir(bio);
const int sgrp = op_stat_group(bio_op(bio)); const int sgrp = op_stat_group(bio_op(bio));
struct mddev *mddev = q->queuedata; struct mddev *mddev = bio->bi_disk->private_data;
unsigned int sectors; unsigned int sectors;
if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) { if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
...@@ -527,11 +528,15 @@ void mddev_suspend(struct mddev *mddev) ...@@ -527,11 +528,15 @@ void mddev_suspend(struct mddev *mddev)
wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags)); wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
del_timer_sync(&mddev->safemode_timer); del_timer_sync(&mddev->safemode_timer);
/* restrict memory reclaim I/O during raid array is suspend */
mddev->noio_flag = memalloc_noio_save();
} }
EXPORT_SYMBOL_GPL(mddev_suspend); EXPORT_SYMBOL_GPL(mddev_suspend);
void mddev_resume(struct mddev *mddev) void mddev_resume(struct mddev *mddev)
{ {
/* entred the memalloc scope from mddev_suspend() */
memalloc_noio_restore(mddev->noio_flag);
lockdep_assert_held(&mddev->reconfig_mutex); lockdep_assert_held(&mddev->reconfig_mutex);
if (--mddev->suspended) if (--mddev->suspended)
return; return;
...@@ -2454,7 +2459,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) ...@@ -2454,7 +2459,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
return err; return err;
} }
static void md_delayed_delete(struct work_struct *ws) static void rdev_delayed_delete(struct work_struct *ws)
{ {
struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work); struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
kobject_del(&rdev->kobj); kobject_del(&rdev->kobj);
...@@ -2479,9 +2484,9 @@ static void unbind_rdev_from_array(struct md_rdev *rdev) ...@@ -2479,9 +2484,9 @@ static void unbind_rdev_from_array(struct md_rdev *rdev)
* to delay it due to rcu usage. * to delay it due to rcu usage.
*/ */
synchronize_rcu(); synchronize_rcu();
INIT_WORK(&rdev->del_work, md_delayed_delete); INIT_WORK(&rdev->del_work, rdev_delayed_delete);
kobject_get(&rdev->kobj); kobject_get(&rdev->kobj);
queue_work(md_misc_wq, &rdev->del_work); queue_work(md_rdev_misc_wq, &rdev->del_work);
} }
/* /*
...@@ -3191,8 +3196,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) ...@@ -3191,8 +3196,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
rdev->saved_raid_disk = -1; rdev->saved_raid_disk = -1;
clear_bit(In_sync, &rdev->flags); clear_bit(In_sync, &rdev->flags);
clear_bit(Bitmap_sync, &rdev->flags); clear_bit(Bitmap_sync, &rdev->flags);
err = rdev->mddev->pers-> err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev);
hot_add_disk(rdev->mddev, rdev);
if (err) { if (err) {
rdev->raid_disk = -1; rdev->raid_disk = -1;
return err; return err;
...@@ -4514,6 +4518,20 @@ null_show(struct mddev *mddev, char *page) ...@@ -4514,6 +4518,20 @@ null_show(struct mddev *mddev, char *page)
return -EINVAL; return -EINVAL;
} }
/* need to ensure rdev_delayed_delete() has completed */
static void flush_rdev_wq(struct mddev *mddev)
{
struct md_rdev *rdev;
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev)
if (work_pending(&rdev->del_work)) {
flush_workqueue(md_rdev_misc_wq);
break;
}
rcu_read_unlock();
}
static ssize_t static ssize_t
new_dev_store(struct mddev *mddev, const char *buf, size_t len) new_dev_store(struct mddev *mddev, const char *buf, size_t len)
{ {
...@@ -4541,8 +4559,7 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) ...@@ -4541,8 +4559,7 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
minor != MINOR(dev)) minor != MINOR(dev))
return -EOVERFLOW; return -EOVERFLOW;
flush_workqueue(md_misc_wq); flush_rdev_wq(mddev);
err = mddev_lock(mddev); err = mddev_lock(mddev);
if (err) if (err)
return err; return err;
...@@ -4780,7 +4797,8 @@ action_store(struct mddev *mddev, const char *page, size_t len) ...@@ -4780,7 +4797,8 @@ action_store(struct mddev *mddev, const char *page, size_t len)
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
mddev_lock(mddev) == 0) { mddev_lock(mddev) == 0) {
flush_workqueue(md_misc_wq); if (work_pending(&mddev->del_work))
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) { if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev); md_reap_sync_thread(mddev);
...@@ -5626,7 +5644,6 @@ static int md_alloc(dev_t dev, char *name) ...@@ -5626,7 +5644,6 @@ static int md_alloc(dev_t dev, char *name)
mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE); mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE);
if (!mddev->queue) if (!mddev->queue)
goto abort; goto abort;
mddev->queue->queuedata = mddev;
blk_set_stacking_limits(&mddev->queue->limits); blk_set_stacking_limits(&mddev->queue->limits);
...@@ -6147,7 +6164,8 @@ static void md_clean(struct mddev *mddev) ...@@ -6147,7 +6164,8 @@ static void md_clean(struct mddev *mddev)
static void __md_stop_writes(struct mddev *mddev) static void __md_stop_writes(struct mddev *mddev)
{ {
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
flush_workqueue(md_misc_wq); if (work_pending(&mddev->del_work))
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) { if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev); md_reap_sync_thread(mddev);
...@@ -6200,7 +6218,8 @@ static void __md_stop(struct mddev *mddev) ...@@ -6200,7 +6218,8 @@ static void __md_stop(struct mddev *mddev)
md_bitmap_destroy(mddev); md_bitmap_destroy(mddev);
mddev_detach(mddev); mddev_detach(mddev);
/* Ensure ->event_work is done */ /* Ensure ->event_work is done */
flush_workqueue(md_misc_wq); if (mddev->event_work.func)
flush_workqueue(md_misc_wq);
spin_lock(&mddev->lock); spin_lock(&mddev->lock);
mddev->pers = NULL; mddev->pers = NULL;
spin_unlock(&mddev->lock); spin_unlock(&mddev->lock);
...@@ -7495,9 +7514,8 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, ...@@ -7495,9 +7514,8 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
} }
if (cmd == ADD_NEW_DISK) if (cmd == ADD_NEW_DISK || cmd == HOT_ADD_DISK)
/* need to ensure md_delayed_delete() has completed */ flush_rdev_wq(mddev);
flush_workqueue(md_misc_wq);
if (cmd == HOT_REMOVE_DISK) if (cmd == HOT_REMOVE_DISK)
/* need to ensure recovery thread has run */ /* need to ensure recovery thread has run */
...@@ -7752,7 +7770,8 @@ static int md_open(struct block_device *bdev, fmode_t mode) ...@@ -7752,7 +7770,8 @@ static int md_open(struct block_device *bdev, fmode_t mode)
*/ */
mddev_put(mddev); mddev_put(mddev);
/* Wait until bdev->bd_disk is definitely gone */ /* Wait until bdev->bd_disk is definitely gone */
flush_workqueue(md_misc_wq); if (work_pending(&mddev->del_work))
flush_workqueue(md_misc_wq);
/* Then retry the open from the top */ /* Then retry the open from the top */
return -ERESTARTSYS; return -ERESTARTSYS;
} }
...@@ -9040,8 +9059,7 @@ static int remove_and_add_spares(struct mddev *mddev, ...@@ -9040,8 +9059,7 @@ static int remove_and_add_spares(struct mddev *mddev,
rdev->recovery_offset = 0; rdev->recovery_offset = 0;
} }
if (mddev->pers-> if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
hot_add_disk(mddev, rdev) == 0) {
if (sysfs_link_rdev(mddev, rdev)) if (sysfs_link_rdev(mddev, rdev))
/* failure here is OK */; /* failure here is OK */;
if (!test_bit(Journal, &rdev->flags)) if (!test_bit(Journal, &rdev->flags))
...@@ -9469,6 +9487,10 @@ static int __init md_init(void) ...@@ -9469,6 +9487,10 @@ static int __init md_init(void)
if (!md_misc_wq) if (!md_misc_wq)
goto err_misc_wq; goto err_misc_wq;
md_rdev_misc_wq = alloc_workqueue("md_rdev_misc", 0, 0);
if (!md_misc_wq)
goto err_rdev_misc_wq;
if ((ret = register_blkdev(MD_MAJOR, "md")) < 0) if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
goto err_md; goto err_md;
...@@ -9490,6 +9512,8 @@ static int __init md_init(void) ...@@ -9490,6 +9512,8 @@ static int __init md_init(void)
err_mdp: err_mdp:
unregister_blkdev(MD_MAJOR, "md"); unregister_blkdev(MD_MAJOR, "md");
err_md: err_md:
destroy_workqueue(md_rdev_misc_wq);
err_rdev_misc_wq:
destroy_workqueue(md_misc_wq); destroy_workqueue(md_misc_wq);
err_misc_wq: err_misc_wq:
destroy_workqueue(md_wq); destroy_workqueue(md_wq);
...@@ -9776,6 +9800,7 @@ static __exit void md_exit(void) ...@@ -9776,6 +9800,7 @@ static __exit void md_exit(void)
* destroy_workqueue() below will wait for that to complete. * destroy_workqueue() below will wait for that to complete.
*/ */
} }
destroy_workqueue(md_rdev_misc_wq);
destroy_workqueue(md_misc_wq); destroy_workqueue(md_misc_wq);
destroy_workqueue(md_wq); destroy_workqueue(md_wq);
} }
...@@ -9785,7 +9810,7 @@ module_exit(md_exit) ...@@ -9785,7 +9810,7 @@ module_exit(md_exit)
static int get_ro(char *buffer, const struct kernel_param *kp) static int get_ro(char *buffer, const struct kernel_param *kp)
{ {
return sprintf(buffer, "%d", start_readonly); return sprintf(buffer, "%d\n", start_readonly);
} }
static int set_ro(const char *val, const struct kernel_param *kp) static int set_ro(const char *val, const struct kernel_param *kp)
{ {
......
...@@ -497,6 +497,7 @@ struct mddev { ...@@ -497,6 +497,7 @@ struct mddev {
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
struct md_cluster_info *cluster_info; struct md_cluster_info *cluster_info;
unsigned int good_device_nr; /* good device num within cluster raid */ unsigned int good_device_nr; /* good device num within cluster raid */
unsigned int noio_flag; /* for memalloc scope API */
bool has_superblocks:1; bool has_superblocks:1;
bool fail_last_dev:1; bool fail_last_dev:1;
......
...@@ -296,22 +296,17 @@ static void reschedule_retry(struct r1bio *r1_bio) ...@@ -296,22 +296,17 @@ static void reschedule_retry(struct r1bio *r1_bio)
static void call_bio_endio(struct r1bio *r1_bio) static void call_bio_endio(struct r1bio *r1_bio)
{ {
struct bio *bio = r1_bio->master_bio; struct bio *bio = r1_bio->master_bio;
struct r1conf *conf = r1_bio->mddev->private;
if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
bio->bi_status = BLK_STS_IOERR; bio->bi_status = BLK_STS_IOERR;
bio_endio(bio); bio_endio(bio);
/*
* Wake up any possible resync thread that waits for the device
* to go idle.
*/
allow_barrier(conf, r1_bio->sector);
} }
static void raid_end_bio_io(struct r1bio *r1_bio) static void raid_end_bio_io(struct r1bio *r1_bio)
{ {
struct bio *bio = r1_bio->master_bio; struct bio *bio = r1_bio->master_bio;
struct r1conf *conf = r1_bio->mddev->private;
/* if nobody has done the final endio yet, do it now */ /* if nobody has done the final endio yet, do it now */
if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
...@@ -322,6 +317,12 @@ static void raid_end_bio_io(struct r1bio *r1_bio) ...@@ -322,6 +317,12 @@ static void raid_end_bio_io(struct r1bio *r1_bio)
call_bio_endio(r1_bio); call_bio_endio(r1_bio);
} }
/*
* Wake up any possible resync thread that waits for the device
* to go idle. All I/Os, even write-behind writes, are done.
*/
allow_barrier(conf, r1_bio->sector);
free_r1bio(r1_bio); free_r1bio(r1_bio);
} }
......
...@@ -180,7 +180,7 @@ struct r1bio { ...@@ -180,7 +180,7 @@ struct r1bio {
* if the IO is in WRITE direction, then multiple bios are used. * if the IO is in WRITE direction, then multiple bios are used.
* We choose the number when they are allocated. * We choose the number when they are allocated.
*/ */
struct bio *bios[0]; struct bio *bios[];
/* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/ /* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/
}; };
......
...@@ -153,7 +153,7 @@ struct r10bio { ...@@ -153,7 +153,7 @@ struct r10bio {
}; };
sector_t addr; sector_t addr;
int devnum; int devnum;
} devs[0]; } devs[];
}; };
/* bits for r10bio.state */ /* bits for r10bio.state */
......
...@@ -2215,10 +2215,13 @@ static int grow_stripes(struct r5conf *conf, int num) ...@@ -2215,10 +2215,13 @@ static int grow_stripes(struct r5conf *conf, int num)
} }
/** /**
* scribble_len - return the required size of the scribble region * scribble_alloc - allocate percpu scribble buffer for required size
* of the scribble region
* @percpu - from for_each_present_cpu() of the caller
* @num - total number of disks in the array * @num - total number of disks in the array
* @cnt - scribble objs count for required size of the scribble region
* *
* The size must be enough to contain: * The scribble buffer size must be enough to contain:
* 1/ a struct page pointer for each device in the array +2 * 1/ a struct page pointer for each device in the array +2
* 2/ room to convert each entry in (1) to its corresponding dma * 2/ room to convert each entry in (1) to its corresponding dma
* (dma_map_page()) or page (page_address()) address. * (dma_map_page()) or page (page_address()) address.
...@@ -2228,14 +2231,19 @@ static int grow_stripes(struct r5conf *conf, int num) ...@@ -2228,14 +2231,19 @@ static int grow_stripes(struct r5conf *conf, int num)
* of the P and Q blocks. * of the P and Q blocks.
*/ */
static int scribble_alloc(struct raid5_percpu *percpu, static int scribble_alloc(struct raid5_percpu *percpu,
int num, int cnt, gfp_t flags) int num, int cnt)
{ {
size_t obj_size = size_t obj_size =
sizeof(struct page *) * (num+2) + sizeof(struct page *) * (num+2) +
sizeof(addr_conv_t) * (num+2); sizeof(addr_conv_t) * (num+2);
void *scribble; void *scribble;
scribble = kvmalloc_array(cnt, obj_size, flags); /*
* If here is in raid array suspend context, it is in memalloc noio
* context as well, there is no potential recursive memory reclaim
* I/Os with the GFP_KERNEL flag.
*/
scribble = kvmalloc_array(cnt, obj_size, GFP_KERNEL);
if (!scribble) if (!scribble)
return -ENOMEM; return -ENOMEM;
...@@ -2267,8 +2275,7 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) ...@@ -2267,8 +2275,7 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
percpu = per_cpu_ptr(conf->percpu, cpu); percpu = per_cpu_ptr(conf->percpu, cpu);
err = scribble_alloc(percpu, new_disks, err = scribble_alloc(percpu, new_disks,
new_sectors / STRIPE_SECTORS, new_sectors / STRIPE_SECTORS);
GFP_NOIO);
if (err) if (err)
break; break;
} }
...@@ -6759,8 +6766,7 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu ...@@ -6759,8 +6766,7 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu
conf->previous_raid_disks), conf->previous_raid_disks),
max(conf->chunk_sectors, max(conf->chunk_sectors,
conf->prev_chunk_sectors) conf->prev_chunk_sectors)
/ STRIPE_SECTORS, / STRIPE_SECTORS)) {
GFP_KERNEL)) {
free_scratch_buffer(conf, percpu); free_scratch_buffer(conf, percpu);
return -ENOMEM; return -ENOMEM;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment