Commit 483cbbed authored by Alexei Naberezhnov's avatar Alexei Naberezhnov Committed by Song Liu

md/raid5: fix 'out of memory' during raid cache recovery

This fixes the case when md array assembly fails because of raid cache recovery
unable to allocate a stripe, despite attempts to replay stripes and increase
cache size. This happens because stripes released by r5c_recovery_replay_stripes
and raid5_set_cache_size don't become available for allocation immediately.
Released stripes first are placed on conf->released_stripes list and require
md thread to merge them on conf->inactive_list before they can be allocated.

Patch allows final allocation attempt during cache recovery to wait for
new stripes to become availabe for allocation.

Cc: linux-raid@vger.kernel.org
Cc: Shaohua Li <shli@kernel.org>
Cc: linux-stable <stable@vger.kernel.org> # 4.10+
Fixes: b4c625c6 ("md/r5cache: r5cache recovery: part 1")
Signed-off-by: default avatarAlexei Naberezhnov <anaberezhnov@fb.com>
Signed-off-by: default avatarSong Liu <songliubraving@fb.com>
parent f17b5f06
...@@ -1935,12 +1935,14 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf, ...@@ -1935,12 +1935,14 @@ r5l_recovery_replay_one_stripe(struct r5conf *conf,
} }
static struct stripe_head * static struct stripe_head *
r5c_recovery_alloc_stripe(struct r5conf *conf, r5c_recovery_alloc_stripe(
sector_t stripe_sect) struct r5conf *conf,
sector_t stripe_sect,
int noblock)
{ {
struct stripe_head *sh; struct stripe_head *sh;
sh = raid5_get_active_stripe(conf, stripe_sect, 0, 1, 0); sh = raid5_get_active_stripe(conf, stripe_sect, 0, noblock, 0);
if (!sh) if (!sh)
return NULL; /* no more stripe available */ return NULL; /* no more stripe available */
...@@ -2150,7 +2152,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, ...@@ -2150,7 +2152,7 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
stripe_sect); stripe_sect);
if (!sh) { if (!sh) {
sh = r5c_recovery_alloc_stripe(conf, stripe_sect); sh = r5c_recovery_alloc_stripe(conf, stripe_sect, 1);
/* /*
* cannot get stripe from raid5_get_active_stripe * cannot get stripe from raid5_get_active_stripe
* try replay some stripes * try replay some stripes
...@@ -2159,20 +2161,29 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log, ...@@ -2159,20 +2161,29 @@ r5c_recovery_analyze_meta_block(struct r5l_log *log,
r5c_recovery_replay_stripes( r5c_recovery_replay_stripes(
cached_stripe_list, ctx); cached_stripe_list, ctx);
sh = r5c_recovery_alloc_stripe( sh = r5c_recovery_alloc_stripe(
conf, stripe_sect); conf, stripe_sect, 1);
} }
if (!sh) { if (!sh) {
int new_size = conf->min_nr_stripes * 2;
pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n", pr_debug("md/raid:%s: Increasing stripe cache size to %d to recovery data on journal.\n",
mdname(mddev), mdname(mddev),
conf->min_nr_stripes * 2); new_size);
raid5_set_cache_size(mddev, ret = raid5_set_cache_size(mddev, new_size);
conf->min_nr_stripes * 2); if (conf->min_nr_stripes <= new_size / 2) {
sh = r5c_recovery_alloc_stripe(conf, pr_err("md/raid:%s: Cannot increase cache size, ret=%d, new_size=%d, min_nr_stripes=%d, max_nr_stripes=%d\n",
stripe_sect); mdname(mddev),
ret,
new_size,
conf->min_nr_stripes,
conf->max_nr_stripes);
return -ENOMEM;
}
sh = r5c_recovery_alloc_stripe(
conf, stripe_sect, 0);
} }
if (!sh) { if (!sh) {
pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n", pr_err("md/raid:%s: Cannot get enough stripes due to memory pressure. Recovery failed.\n",
mdname(mddev)); mdname(mddev));
return -ENOMEM; return -ENOMEM;
} }
list_add_tail(&sh->lru, cached_stripe_list); list_add_tail(&sh->lru, cached_stripe_list);
......
...@@ -6369,6 +6369,7 @@ raid5_show_stripe_cache_size(struct mddev *mddev, char *page) ...@@ -6369,6 +6369,7 @@ raid5_show_stripe_cache_size(struct mddev *mddev, char *page)
int int
raid5_set_cache_size(struct mddev *mddev, int size) raid5_set_cache_size(struct mddev *mddev, int size)
{ {
int result = 0;
struct r5conf *conf = mddev->private; struct r5conf *conf = mddev->private;
if (size <= 16 || size > 32768) if (size <= 16 || size > 32768)
...@@ -6385,11 +6386,14 @@ raid5_set_cache_size(struct mddev *mddev, int size) ...@@ -6385,11 +6386,14 @@ raid5_set_cache_size(struct mddev *mddev, int size)
mutex_lock(&conf->cache_size_mutex); mutex_lock(&conf->cache_size_mutex);
while (size > conf->max_nr_stripes) while (size > conf->max_nr_stripes)
if (!grow_one_stripe(conf, GFP_KERNEL)) if (!grow_one_stripe(conf, GFP_KERNEL)) {
conf->min_nr_stripes = conf->max_nr_stripes;
result = -ENOMEM;
break; break;
}
mutex_unlock(&conf->cache_size_mutex); mutex_unlock(&conf->cache_size_mutex);
return 0; return result;
} }
EXPORT_SYMBOL(raid5_set_cache_size); EXPORT_SYMBOL(raid5_set_cache_size);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment