Commit 5c689e68 authored by Alexander.Boyko's avatar Alexander.Boyko Committed by Greg Kroah-Hartman

staging/lustre/ptlrpc: race at req processing

Race between ptlrpc_resend_req() and ptlrpc_check_set().
1 thread do ptlrpc_check_set()->after_reply()
2 thread do ptlrpc_resend_req()
The result is request with rq_resend = 1 and MSG_REPLY flag.
When this request will came to server it will cause client eviction.
The patch skip ptlrpc_resend_req logic if rq_replied is set,
and clear rq_resend flag at reply_in_callback() when client got
reply.
Signed-off-by: default avatarAlexander Boyko <alexander_boyko@xyratex.com>
Xyratex-bug-id: MRP-1888
Reviewed-on: http://review.whamcloud.com/10471
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-5116Reviewed-by: default avatarAndreas Dilger <andreas.dilger@intel.com>
Reviewed-by: default avatarMike Pershin <mike.pershin@intel.com>
Reviewed-by: default avatarChris Horn <hornc@cray.com>
Signed-off-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent a2ff0f97
...@@ -2530,10 +2530,19 @@ EXPORT_SYMBOL(ptlrpc_cleanup_client); ...@@ -2530,10 +2530,19 @@ EXPORT_SYMBOL(ptlrpc_cleanup_client);
void ptlrpc_resend_req(struct ptlrpc_request *req) void ptlrpc_resend_req(struct ptlrpc_request *req)
{ {
DEBUG_REQ(D_HA, req, "going to resend"); DEBUG_REQ(D_HA, req, "going to resend");
spin_lock(&req->rq_lock);
/* Request got reply but linked to the import list still.
Let ptlrpc_check_set() to process it. */
if (ptlrpc_client_replied(req)) {
spin_unlock(&req->rq_lock);
DEBUG_REQ(D_HA, req, "it has reply, so skip it");
return;
}
lustre_msg_set_handle(req->rq_reqmsg, &(struct lustre_handle){ 0 }); lustre_msg_set_handle(req->rq_reqmsg, &(struct lustre_handle){ 0 });
req->rq_status = -EAGAIN; req->rq_status = -EAGAIN;
spin_lock(&req->rq_lock);
req->rq_resend = 1; req->rq_resend = 1;
req->rq_net_err = 0; req->rq_net_err = 0;
req->rq_timedout = 0; req->rq_timedout = 0;
......
...@@ -145,6 +145,8 @@ void reply_in_callback(lnet_event_t *ev) ...@@ -145,6 +145,8 @@ void reply_in_callback(lnet_event_t *ev)
/* Real reply */ /* Real reply */
req->rq_rep_swab_mask = 0; req->rq_rep_swab_mask = 0;
req->rq_replied = 1; req->rq_replied = 1;
/* Got reply, no resend required */
req->rq_resend = 0;
req->rq_reply_off = ev->offset; req->rq_reply_off = ev->offset;
req->rq_nob_received = ev->mlength; req->rq_nob_received = ev->mlength;
/* LNetMDUnlink can't be called under the LNET_LOCK, /* LNetMDUnlink can't be called under the LNET_LOCK,
......
...@@ -505,6 +505,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) ...@@ -505,6 +505,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
/* If this is a re-transmit, we're required to have disengaged /* If this is a re-transmit, we're required to have disengaged
* cleanly from the previous attempt */ * cleanly from the previous attempt */
LASSERT(!request->rq_receiving_reply); LASSERT(!request->rq_receiving_reply);
LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) &&
(request->rq_import->imp_state == LUSTRE_IMP_FULL)));
if (unlikely(obd != NULL && obd->obd_fail)) { if (unlikely(obd != NULL && obd->obd_fail)) {
CDEBUG(D_HA, "muting rpc for failed imp obd %s\n", CDEBUG(D_HA, "muting rpc for failed imp obd %s\n",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment