Commit 845b9e22 authored by Artur Paszkiewicz's avatar Artur Paszkiewicz Committed by Shaohua Li

raid5-ppl: use resize_stripes() when enabling or disabling ppl

Use resize_stripes() instead of raid5_reset_stripe_cache() to allocate
or free sh->ppl_page at runtime for all stripes in the stripe cache.
raid5_reset_stripe_cache() required suspending the mddev and could
deadlock because of GFP_KERNEL allocations.

Move the 'newsize' check to check_reshape() to allow reallocating the
stripes with the same number of disks. Allocate sh->ppl_page in
alloc_stripe() instead of grow_buffers(). Pass 'struct r5conf *conf' as
a parameter to alloc_stripe() because it is needed to check whether to
allocate ppl_page. Add free_stripe() and use it to free stripes rather
than directly call kmem_cache_free(). Also free sh->ppl_page in
free_stripe().

Set MD_HAS_PPL at the end of ppl_init_log() instead of explicitly
setting it in advance and add another parameter to log_init() to allow
calling ppl_init_log() without the bit set. Don't try to calculate
partial parity or add a stripe to log if it does not have ppl_page set.

Enabling ppl can now be performed without suspending the mddev, because
the log won't be used until new stripes are allocated with ppl_page.
Calling mddev_suspend/resume is still necessary when disabling ppl,
because we want all stripes to finish before stopping the log, but
resize_stripes() can be called after mddev_resume() when ppl is no
longer active.
Suggested-by: default avatarNeilBrown <neilb@suse.com>
Signed-off-by: default avatarArtur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: default avatarShaohua Li <shli@fb.com>
parent 94568f64
...@@ -93,11 +93,12 @@ static inline void log_exit(struct r5conf *conf) ...@@ -93,11 +93,12 @@ static inline void log_exit(struct r5conf *conf)
ppl_exit_log(conf); ppl_exit_log(conf);
} }
static inline int log_init(struct r5conf *conf, struct md_rdev *journal_dev) static inline int log_init(struct r5conf *conf, struct md_rdev *journal_dev,
bool ppl)
{ {
if (journal_dev) if (journal_dev)
return r5l_init_log(conf, journal_dev); return r5l_init_log(conf, journal_dev);
else if (raid5_has_ppl(conf)) else if (ppl)
return ppl_init_log(conf); return ppl_init_log(conf);
return 0; return 0;
......
...@@ -328,7 +328,7 @@ int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh) ...@@ -328,7 +328,7 @@ int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh)
struct ppl_io_unit *io = sh->ppl_io; struct ppl_io_unit *io = sh->ppl_io;
struct ppl_log *log; struct ppl_log *log;
if (io || test_bit(STRIPE_SYNCING, &sh->state) || if (io || test_bit(STRIPE_SYNCING, &sh->state) || !sh->ppl_page ||
!test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags) || !test_bit(R5_Wantwrite, &sh->dev[sh->pd_idx].flags) ||
!test_bit(R5_Insync, &sh->dev[sh->pd_idx].flags)) { !test_bit(R5_Insync, &sh->dev[sh->pd_idx].flags)) {
clear_bit(STRIPE_LOG_TRAPPED, &sh->state); clear_bit(STRIPE_LOG_TRAPPED, &sh->state);
...@@ -1204,6 +1204,7 @@ int ppl_init_log(struct r5conf *conf) ...@@ -1204,6 +1204,7 @@ int ppl_init_log(struct r5conf *conf)
} }
conf->log_private = ppl_conf; conf->log_private = ppl_conf;
set_bit(MD_HAS_PPL, &ppl_conf->mddev->flags);
return 0; return 0;
err: err:
......
...@@ -471,11 +471,6 @@ static void shrink_buffers(struct stripe_head *sh) ...@@ -471,11 +471,6 @@ static void shrink_buffers(struct stripe_head *sh)
sh->dev[i].page = NULL; sh->dev[i].page = NULL;
put_page(p); put_page(p);
} }
if (sh->ppl_page) {
put_page(sh->ppl_page);
sh->ppl_page = NULL;
}
} }
static int grow_buffers(struct stripe_head *sh, gfp_t gfp) static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
...@@ -493,12 +488,6 @@ static int grow_buffers(struct stripe_head *sh, gfp_t gfp) ...@@ -493,12 +488,6 @@ static int grow_buffers(struct stripe_head *sh, gfp_t gfp)
sh->dev[i].orig_page = page; sh->dev[i].orig_page = page;
} }
if (raid5_has_ppl(sh->raid_conf)) {
sh->ppl_page = alloc_page(gfp);
if (!sh->ppl_page)
return 1;
}
return 0; return 0;
} }
...@@ -2132,8 +2121,15 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) ...@@ -2132,8 +2121,15 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
put_cpu(); put_cpu();
} }
static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh)
{
if (sh->ppl_page)
__free_page(sh->ppl_page);
kmem_cache_free(sc, sh);
}
static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
int disks) int disks, struct r5conf *conf)
{ {
struct stripe_head *sh; struct stripe_head *sh;
int i; int i;
...@@ -2147,6 +2143,7 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, ...@@ -2147,6 +2143,7 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
INIT_LIST_HEAD(&sh->r5c); INIT_LIST_HEAD(&sh->r5c);
INIT_LIST_HEAD(&sh->log_list); INIT_LIST_HEAD(&sh->log_list);
atomic_set(&sh->count, 1); atomic_set(&sh->count, 1);
sh->raid_conf = conf;
sh->log_start = MaxSector; sh->log_start = MaxSector;
for (i = 0; i < disks; i++) { for (i = 0; i < disks; i++) {
struct r5dev *dev = &sh->dev[i]; struct r5dev *dev = &sh->dev[i];
...@@ -2154,6 +2151,14 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, ...@@ -2154,6 +2151,14 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
bio_init(&dev->req, &dev->vec, 1); bio_init(&dev->req, &dev->vec, 1);
bio_init(&dev->rreq, &dev->rvec, 1); bio_init(&dev->rreq, &dev->rvec, 1);
} }
if (raid5_has_ppl(conf)) {
sh->ppl_page = alloc_page(gfp);
if (!sh->ppl_page) {
free_stripe(sc, sh);
sh = NULL;
}
}
} }
return sh; return sh;
} }
...@@ -2161,15 +2166,13 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) ...@@ -2161,15 +2166,13 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
{ {
struct stripe_head *sh; struct stripe_head *sh;
sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size); sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size, conf);
if (!sh) if (!sh)
return 0; return 0;
sh->raid_conf = conf;
if (grow_buffers(sh, gfp)) { if (grow_buffers(sh, gfp)) {
shrink_buffers(sh); shrink_buffers(sh);
kmem_cache_free(conf->slab_cache, sh); free_stripe(conf->slab_cache, sh);
return 0; return 0;
} }
sh->hash_lock_index = sh->hash_lock_index =
...@@ -2314,9 +2317,6 @@ static int resize_stripes(struct r5conf *conf, int newsize) ...@@ -2314,9 +2317,6 @@ static int resize_stripes(struct r5conf *conf, int newsize)
int i; int i;
int hash, cnt; int hash, cnt;
if (newsize <= conf->pool_size)
return 0; /* never bother to shrink */
err = md_allow_write(conf->mddev); err = md_allow_write(conf->mddev);
if (err) if (err)
return err; return err;
...@@ -2332,11 +2332,10 @@ static int resize_stripes(struct r5conf *conf, int newsize) ...@@ -2332,11 +2332,10 @@ static int resize_stripes(struct r5conf *conf, int newsize)
mutex_lock(&conf->cache_size_mutex); mutex_lock(&conf->cache_size_mutex);
for (i = conf->max_nr_stripes; i; i--) { for (i = conf->max_nr_stripes; i; i--) {
nsh = alloc_stripe(sc, GFP_KERNEL, newsize); nsh = alloc_stripe(sc, GFP_KERNEL, newsize, conf);
if (!nsh) if (!nsh)
break; break;
nsh->raid_conf = conf;
list_add(&nsh->lru, &newstripes); list_add(&nsh->lru, &newstripes);
} }
if (i) { if (i) {
...@@ -2344,7 +2343,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) ...@@ -2344,7 +2343,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
while (!list_empty(&newstripes)) { while (!list_empty(&newstripes)) {
nsh = list_entry(newstripes.next, struct stripe_head, lru); nsh = list_entry(newstripes.next, struct stripe_head, lru);
list_del(&nsh->lru); list_del(&nsh->lru);
kmem_cache_free(sc, nsh); free_stripe(sc, nsh);
} }
kmem_cache_destroy(sc); kmem_cache_destroy(sc);
mutex_unlock(&conf->cache_size_mutex); mutex_unlock(&conf->cache_size_mutex);
...@@ -2370,7 +2369,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) ...@@ -2370,7 +2369,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
nsh->dev[i].orig_page = osh->dev[i].page; nsh->dev[i].orig_page = osh->dev[i].page;
} }
nsh->hash_lock_index = hash; nsh->hash_lock_index = hash;
kmem_cache_free(conf->slab_cache, osh); free_stripe(conf->slab_cache, osh);
cnt++; cnt++;
if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS + if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS +
!!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) { !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) {
...@@ -2447,7 +2446,7 @@ static int drop_one_stripe(struct r5conf *conf) ...@@ -2447,7 +2446,7 @@ static int drop_one_stripe(struct r5conf *conf)
return 0; return 0;
BUG_ON(atomic_read(&sh->count)); BUG_ON(atomic_read(&sh->count));
shrink_buffers(sh); shrink_buffers(sh);
kmem_cache_free(conf->slab_cache, sh); free_stripe(conf->slab_cache, sh);
atomic_dec(&conf->active_stripes); atomic_dec(&conf->active_stripes);
conf->max_nr_stripes--; conf->max_nr_stripes--;
return 1; return 1;
...@@ -3170,7 +3169,7 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, ...@@ -3170,7 +3169,7 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s,
s->locked++; s->locked++;
} }
if (raid5_has_ppl(sh->raid_conf) && if (raid5_has_ppl(sh->raid_conf) && sh->ppl_page &&
test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) && test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) &&
!test_bit(STRIPE_FULL_WRITE, &sh->state) && !test_bit(STRIPE_FULL_WRITE, &sh->state) &&
test_bit(R5_Insync, &sh->dev[pd_idx].flags)) test_bit(R5_Insync, &sh->dev[pd_idx].flags))
...@@ -7427,7 +7426,7 @@ static int raid5_run(struct mddev *mddev) ...@@ -7427,7 +7426,7 @@ static int raid5_run(struct mddev *mddev)
blk_queue_max_hw_sectors(mddev->queue, UINT_MAX); blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
} }
if (log_init(conf, journal_dev)) if (log_init(conf, journal_dev, raid5_has_ppl(conf)))
goto abort; goto abort;
return 0; return 0;
...@@ -7636,7 +7635,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) ...@@ -7636,7 +7635,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
* The array is in readonly mode if journal is missing, so no * The array is in readonly mode if journal is missing, so no
* write requests running. We should be safe * write requests running. We should be safe
*/ */
log_init(conf, rdev); log_init(conf, rdev, false);
return 0; return 0;
} }
if (mddev->recovery_disabled == conf->recovery_disabled) if (mddev->recovery_disabled == conf->recovery_disabled)
...@@ -7786,6 +7785,9 @@ static int check_reshape(struct mddev *mddev) ...@@ -7786,6 +7785,9 @@ static int check_reshape(struct mddev *mddev)
mddev->chunk_sectors) mddev->chunk_sectors)
) < 0) ) < 0)
return -ENOMEM; return -ENOMEM;
if (conf->previous_raid_disks + mddev->delta_disks <= conf->pool_size)
return 0; /* never bother to shrink */
return resize_stripes(conf, (conf->previous_raid_disks return resize_stripes(conf, (conf->previous_raid_disks
+ mddev->delta_disks)); + mddev->delta_disks));
} }
...@@ -8276,20 +8278,6 @@ static void *raid6_takeover(struct mddev *mddev) ...@@ -8276,20 +8278,6 @@ static void *raid6_takeover(struct mddev *mddev)
return setup_conf(mddev); return setup_conf(mddev);
} }
static void raid5_reset_stripe_cache(struct mddev *mddev)
{
struct r5conf *conf = mddev->private;
mutex_lock(&conf->cache_size_mutex);
while (conf->max_nr_stripes &&
drop_one_stripe(conf))
;
while (conf->min_nr_stripes > conf->max_nr_stripes &&
grow_one_stripe(conf, GFP_KERNEL))
;
mutex_unlock(&conf->cache_size_mutex);
}
static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf) static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
{ {
struct r5conf *conf; struct r5conf *conf;
...@@ -8304,23 +8292,23 @@ static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf) ...@@ -8304,23 +8292,23 @@ static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf)
return -ENODEV; return -ENODEV;
} }
if (strncmp(buf, "ppl", 3) == 0 && !raid5_has_ppl(conf)) { if (strncmp(buf, "ppl", 3) == 0) {
/* ppl only works with RAID 5 */ /* ppl only works with RAID 5 */
if (conf->level == 5) { if (!raid5_has_ppl(conf) && conf->level == 5) {
mddev_suspend(mddev); err = log_init(conf, NULL, true);
set_bit(MD_HAS_PPL, &mddev->flags); if (!err) {
err = log_init(conf, NULL); err = resize_stripes(conf, conf->pool_size);
if (!err) if (err)
raid5_reset_stripe_cache(mddev); log_exit(conf);
mddev_resume(mddev); }
} else } else
err = -EINVAL; err = -EINVAL;
} else if (strncmp(buf, "resync", 6) == 0) { } else if (strncmp(buf, "resync", 6) == 0) {
if (raid5_has_ppl(conf)) { if (raid5_has_ppl(conf)) {
mddev_suspend(mddev); mddev_suspend(mddev);
log_exit(conf); log_exit(conf);
raid5_reset_stripe_cache(mddev);
mddev_resume(mddev); mddev_resume(mddev);
err = resize_stripes(conf, conf->pool_size);
} else if (test_bit(MD_HAS_JOURNAL, &conf->mddev->flags) && } else if (test_bit(MD_HAS_JOURNAL, &conf->mddev->flags) &&
r5l_log_disk_error(conf)) { r5l_log_disk_error(conf)) {
bool journal_dev_exists = false; bool journal_dev_exists = false;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment