Commit 59661212 authored by tsutomu.owa@toshiba.co.jp's avatar tsutomu.owa@toshiba.co.jp Committed by David Teigland

DLM: retry rcom when dlm_wait_function is timed out.

If a node sends a DLM_RCOM_STATUS command and an error occurs on the
receiving side, the DLM_RCOM_STATUS_REPLY response may not be returned.
We retransmitted the DLM_RCOM_STATUS command so that we do not wait for
an infinite response.
Signed-off-by: default avatarTadashi Miyauchi <miyauchi@toshiba-tops.co.jp>
Signed-off-by: default avatarTsutomu Owa <tsutomu.owa@toshiba.co.jp>
Signed-off-by: default avatarDavid Teigland <teigland@redhat.com>
parent c7355827
...@@ -155,6 +155,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags) ...@@ -155,6 +155,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
goto out; goto out;
} }
retry:
error = create_rcom(ls, nodeid, DLM_RCOM_STATUS, error = create_rcom(ls, nodeid, DLM_RCOM_STATUS,
sizeof(struct rcom_status), &rc, &mh); sizeof(struct rcom_status), &rc, &mh);
if (error) if (error)
...@@ -169,6 +170,8 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags) ...@@ -169,6 +170,8 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags)
error = dlm_wait_function(ls, &rcom_response); error = dlm_wait_function(ls, &rcom_response);
disallow_sync_reply(ls); disallow_sync_reply(ls);
if (error == -ETIMEDOUT)
goto retry;
if (error) if (error)
goto out; goto out;
...@@ -276,6 +279,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) ...@@ -276,6 +279,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
ls->ls_recover_nodeid = nodeid; ls->ls_recover_nodeid = nodeid;
retry:
error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh); error = create_rcom(ls, nodeid, DLM_RCOM_NAMES, last_len, &rc, &mh);
if (error) if (error)
goto out; goto out;
...@@ -288,6 +292,8 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) ...@@ -288,6 +292,8 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
error = dlm_wait_function(ls, &rcom_response); error = dlm_wait_function(ls, &rcom_response);
disallow_sync_reply(ls); disallow_sync_reply(ls);
if (error == -ETIMEDOUT)
goto retry;
out: out:
return error; return error;
} }
......
...@@ -52,6 +52,10 @@ int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls)) ...@@ -52,6 +52,10 @@ int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls))
dlm_config.ci_recover_timer * HZ); dlm_config.ci_recover_timer * HZ);
if (rv) if (rv)
break; break;
if (test_bit(LSFL_RCOM_WAIT, &ls->ls_flags)) {
log_debug(ls, "dlm_wait_function timed out");
return -ETIMEDOUT;
}
} }
if (dlm_recovery_stopped(ls)) { if (dlm_recovery_stopped(ls)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment