Commit 41a9a0dc authored by Guoqing Jiang's avatar Guoqing Jiang Committed by Shaohua Li

md-cluster: change resync lock from asynchronous to synchronous

If multiple nodes choose to attempt do resync at the same time
they need to be serialized so they don't duplicate effort. This
serialization is done by locking the 'resync' DLM lock.

Currently if a node cannot get the lock immediately it doesn't
request notification when the lock becomes available (i.e.
DLM_LKF_NOQUEUE is set), so it may not reliably find out when it
is safe to try again.

Rather than trying to arrange an async wake-up when the lock
becomes available, switch to using synchronous locking - this is
a lot easier to think about.  As it is not permitted to block in
the 'raid1d' thread, move the locking to the resync thread.  So
the rsync thread is forked immediately, but it blocks until the
resync lock is available. Once the lock is locked it checks again
if any resync action is needed.

A particular symptom of the current problem is that a node can
get stuck with "resync=pending" indefinitely.
Reviewed-by: default avatarNeilBrown <neilb@suse.com>
Signed-off-by: default avatarGuoqing Jiang <gqjiang@suse.com>
Signed-off-by: default avatarShaohua Li <shli@fb.com>
parent 4810d968
...@@ -937,7 +937,6 @@ static void metadata_update_cancel(struct mddev *mddev) ...@@ -937,7 +937,6 @@ static void metadata_update_cancel(struct mddev *mddev)
static int resync_start(struct mddev *mddev) static int resync_start(struct mddev *mddev)
{ {
struct md_cluster_info *cinfo = mddev->cluster_info; struct md_cluster_info *cinfo = mddev->cluster_info;
cinfo->resync_lockres->flags |= DLM_LKF_NOQUEUE;
return dlm_lock_sync(cinfo->resync_lockres, DLM_LOCK_EX); return dlm_lock_sync(cinfo->resync_lockres, DLM_LOCK_EX);
} }
...@@ -967,7 +966,6 @@ static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi) ...@@ -967,7 +966,6 @@ static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
static int resync_finish(struct mddev *mddev) static int resync_finish(struct mddev *mddev)
{ {
struct md_cluster_info *cinfo = mddev->cluster_info; struct md_cluster_info *cinfo = mddev->cluster_info;
cinfo->resync_lockres->flags &= ~DLM_LKF_NOQUEUE;
dlm_unlock_sync(cinfo->resync_lockres); dlm_unlock_sync(cinfo->resync_lockres);
return resync_info_update(mddev, 0, 0); return resync_info_update(mddev, 0, 0);
} }
......
...@@ -7786,6 +7786,7 @@ void md_do_sync(struct md_thread *thread) ...@@ -7786,6 +7786,7 @@ void md_do_sync(struct md_thread *thread)
char *desc, *action = NULL; char *desc, *action = NULL;
struct blk_plug plug; struct blk_plug plug;
bool cluster_resync_finished = false; bool cluster_resync_finished = false;
int ret;
/* just incase thread restarts... */ /* just incase thread restarts... */
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
...@@ -7795,6 +7796,19 @@ void md_do_sync(struct md_thread *thread) ...@@ -7795,6 +7796,19 @@ void md_do_sync(struct md_thread *thread)
return; return;
} }
if (mddev_is_clustered(mddev)) {
ret = md_cluster_ops->resync_start(mddev);
if (ret)
goto skip;
if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
&& ((unsigned long long)mddev->curr_resync_completed
< (unsigned long long)mddev->resync_max_sectors))
goto skip;
}
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) { if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
desc = "data-check"; desc = "data-check";
...@@ -8226,18 +8240,9 @@ static void md_start_sync(struct work_struct *ws) ...@@ -8226,18 +8240,9 @@ static void md_start_sync(struct work_struct *ws)
struct mddev *mddev = container_of(ws, struct mddev, del_work); struct mddev *mddev = container_of(ws, struct mddev, del_work);
int ret = 0; int ret = 0;
if (mddev_is_clustered(mddev)) {
ret = md_cluster_ops->resync_start(mddev);
if (ret) {
mddev->sync_thread = NULL;
goto out;
}
}
mddev->sync_thread = md_register_thread(md_do_sync, mddev->sync_thread = md_register_thread(md_do_sync,
mddev, mddev,
"resync"); "resync");
out:
if (!mddev->sync_thread) { if (!mddev->sync_thread) {
if (!(mddev_is_clustered(mddev) && ret == -EAGAIN)) if (!(mddev_is_clustered(mddev) && ret == -EAGAIN))
printk(KERN_ERR "%s: could not start resync" printk(KERN_ERR "%s: could not start resync"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment