Commit 5ce10a38 authored by Yu Kuai's avatar Yu Kuai Committed by Song Liu

md: don't fail action_store() if sync_thread is not registered

MD_RECOVERY_RUNNING will always be set when trying to register a new
sync_thread, however, if md_start_sync() turns out to do nothing,
MD_RECOVERY_RUNNING will be cleared in this case. And during the race
window, action_store() will return -EBUSY, which will cause some
mdadm tests to fail. For example:

The test 07reshape5intr will add a new disk to array, then start
reshape:

mdadm /dev/md0 --add /dev/xxx
mdadm --grow /dev/md0 -n 3

And add_bound_rdev() from mdadm --add will set MD_RECOVERY_NEEDED,
then during the race windown, mdadm --grow will fail.

Fix the problem by waiting in action_store() during the race window,
fail only if sync_thread is registered.
Signed-off-by: default avatarYu Kuai <yukuai3@huawei.com>
Signed-off-by: default avatarSong Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240611132251.1967786-8-yukuai1@huaweicloud.com
parent df79234b
...@@ -753,7 +753,6 @@ int mddev_init(struct mddev *mddev) ...@@ -753,7 +753,6 @@ int mddev_init(struct mddev *mddev)
mutex_init(&mddev->open_mutex); mutex_init(&mddev->open_mutex);
mutex_init(&mddev->reconfig_mutex); mutex_init(&mddev->reconfig_mutex);
mutex_init(&mddev->sync_mutex);
mutex_init(&mddev->suspend_mutex); mutex_init(&mddev->suspend_mutex);
mutex_init(&mddev->bitmap_info.mutex); mutex_init(&mddev->bitmap_info.mutex);
INIT_LIST_HEAD(&mddev->disks); INIT_LIST_HEAD(&mddev->disks);
...@@ -5021,34 +5020,6 @@ void md_unfrozen_sync_thread(struct mddev *mddev) ...@@ -5021,34 +5020,6 @@ void md_unfrozen_sync_thread(struct mddev *mddev)
} }
EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread); EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
static void idle_sync_thread(struct mddev *mddev)
{
mutex_lock(&mddev->sync_mutex);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (mddev_lock(mddev)) {
mutex_unlock(&mddev->sync_mutex);
return;
}
stop_sync_thread(mddev, false);
mutex_unlock(&mddev->sync_mutex);
}
static void frozen_sync_thread(struct mddev *mddev)
{
mutex_lock(&mddev->sync_mutex);
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (mddev_lock(mddev)) {
mutex_unlock(&mddev->sync_mutex);
return;
}
stop_sync_thread(mddev, false);
mutex_unlock(&mddev->sync_mutex);
}
static int mddev_start_reshape(struct mddev *mddev) static int mddev_start_reshape(struct mddev *mddev)
{ {
int ret; int ret;
...@@ -5056,24 +5027,13 @@ static int mddev_start_reshape(struct mddev *mddev) ...@@ -5056,24 +5027,13 @@ static int mddev_start_reshape(struct mddev *mddev)
if (mddev->pers->start_reshape == NULL) if (mddev->pers->start_reshape == NULL)
return -EINVAL; return -EINVAL;
ret = mddev_lock(mddev);
if (ret)
return ret;
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
mddev_unlock(mddev);
return -EBUSY;
}
if (mddev->reshape_position == MaxSector || if (mddev->reshape_position == MaxSector ||
mddev->pers->check_reshape == NULL || mddev->pers->check_reshape == NULL ||
mddev->pers->check_reshape(mddev)) { mddev->pers->check_reshape(mddev)) {
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
ret = mddev->pers->start_reshape(mddev); ret = mddev->pers->start_reshape(mddev);
if (ret) { if (ret)
mddev_unlock(mddev);
return ret; return ret;
}
} else { } else {
/* /*
* If reshape is still in progress, and md_check_recovery() can * If reshape is still in progress, and md_check_recovery() can
...@@ -5083,7 +5043,6 @@ static int mddev_start_reshape(struct mddev *mddev) ...@@ -5083,7 +5043,6 @@ static int mddev_start_reshape(struct mddev *mddev)
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
} }
mddev_unlock(mddev);
sysfs_notify_dirent_safe(mddev->sysfs_degraded); sysfs_notify_dirent_safe(mddev->sysfs_degraded);
return 0; return 0;
} }
...@@ -5097,36 +5056,53 @@ action_store(struct mddev *mddev, const char *page, size_t len) ...@@ -5097,36 +5056,53 @@ action_store(struct mddev *mddev, const char *page, size_t len)
if (!mddev->pers || !mddev->pers->sync_request) if (!mddev->pers || !mddev->pers->sync_request)
return -EINVAL; return -EINVAL;
retry:
if (work_busy(&mddev->sync_work))
flush_work(&mddev->sync_work);
ret = mddev_lock(mddev);
if (ret)
return ret;
if (work_busy(&mddev->sync_work)) {
mddev_unlock(mddev);
goto retry;
}
action = md_sync_action_by_name(page); action = md_sync_action_by_name(page);
/* TODO: mdadm rely on "idle" to start sync_thread. */ /* TODO: mdadm rely on "idle" to start sync_thread. */
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
switch (action) { switch (action) {
case ACTION_FROZEN: case ACTION_FROZEN:
frozen_sync_thread(mddev); md_frozen_sync_thread(mddev);
return len; ret = len;
goto out;
case ACTION_IDLE: case ACTION_IDLE:
idle_sync_thread(mddev); md_idle_sync_thread(mddev);
break; break;
case ACTION_RESHAPE: case ACTION_RESHAPE:
case ACTION_RECOVER: case ACTION_RECOVER:
case ACTION_CHECK: case ACTION_CHECK:
case ACTION_REPAIR: case ACTION_REPAIR:
case ACTION_RESYNC: case ACTION_RESYNC:
return -EBUSY; ret = -EBUSY;
goto out;
default: default:
return -EINVAL; ret = -EINVAL;
goto out;
} }
} else { } else {
switch (action) { switch (action) {
case ACTION_FROZEN: case ACTION_FROZEN:
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
return len; ret = len;
goto out;
case ACTION_RESHAPE: case ACTION_RESHAPE:
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
ret = mddev_start_reshape(mddev); ret = mddev_start_reshape(mddev);
if (ret) if (ret)
return ret; goto out;
break; break;
case ACTION_RECOVER: case ACTION_RECOVER:
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
...@@ -5144,7 +5120,8 @@ action_store(struct mddev *mddev, const char *page, size_t len) ...@@ -5144,7 +5120,8 @@ action_store(struct mddev *mddev, const char *page, size_t len)
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
break; break;
default: default:
return -EINVAL; ret = -EINVAL;
goto out;
} }
} }
...@@ -5152,14 +5129,18 @@ action_store(struct mddev *mddev, const char *page, size_t len) ...@@ -5152,14 +5129,18 @@ action_store(struct mddev *mddev, const char *page, size_t len)
/* A write to sync_action is enough to justify /* A write to sync_action is enough to justify
* canceling read-auto mode * canceling read-auto mode
*/ */
flush_work(&mddev->sync_work);
mddev->ro = MD_RDWR; mddev->ro = MD_RDWR;
md_wakeup_thread(mddev->sync_thread); md_wakeup_thread(mddev->sync_thread);
} }
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
sysfs_notify_dirent_safe(mddev->sysfs_action); sysfs_notify_dirent_safe(mddev->sysfs_action);
return len; ret = len;
out:
mddev_unlock(mddev);
return ret;
} }
static struct md_sysfs_entry md_scan_mode = static struct md_sysfs_entry md_scan_mode =
......
...@@ -595,8 +595,6 @@ struct mddev { ...@@ -595,8 +595,6 @@ struct mddev {
*/ */
struct list_head deleting; struct list_head deleting;
/* Used to synchronize idle and frozen for action_store() */
struct mutex sync_mutex;
/* The sequence number for sync thread */ /* The sequence number for sync thread */
atomic_t sync_seq; atomic_t sync_seq;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment