Commit 81ea39ec authored by Vitaly Fertman's avatar Vitaly Fertman Committed by Greg Kroah-Hartman

staging/lustre/ptlrpc: lost bulk leads to a hang

The reverse order of request_out_callback() and reply_in_callback()
puts the RPC into UNREGISTERING state, which is waiting for RPC &
bulk md unlink, whereas only RPC md unlink has been called so far.
If bulk is lost, even expired_set does not check for UNREGISTERING
state.

The same for write if server returns an error.

This phase is ambiguous, split to UNREG_RPC and UNREG_BULK.
Signed-off-by: default avatarVitaly Fertman <vitaly.fertman@seagate.com>
Seagate-bug-id:  MRP-2953, MRP-3206
Reviewed-by: default avatarAndriy Skulysh <andriy.skulysh@seagate.com>
Reviewed-by: default avatarAlexey Leonidovich Lyashkov <alexey.lyashkov@seagate.com>
Tested-by: default avatarElena V. Gryaznova <elena.gryaznova@seagate.com>
Reviewed-on: http://review.whamcloud.com/19953Reviewed-by: default avatarChris Horn <hornc@cray.com>
Reviewed-by: default avatarAnn Koehler <amk@cray.com>
Reviewed-by: default avatarAndreas Dilger <andreas.dilger@intel.com>
Signed-off-by: default avatarOleg Drokin <green@linuxhacker.ru>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 63a46519
...@@ -480,8 +480,9 @@ enum rq_phase { ...@@ -480,8 +480,9 @@ enum rq_phase {
RQ_PHASE_BULK = 0xebc0de02, RQ_PHASE_BULK = 0xebc0de02,
RQ_PHASE_INTERPRET = 0xebc0de03, RQ_PHASE_INTERPRET = 0xebc0de03,
RQ_PHASE_COMPLETE = 0xebc0de04, RQ_PHASE_COMPLETE = 0xebc0de04,
RQ_PHASE_UNREGISTERING = 0xebc0de05, RQ_PHASE_UNREG_RPC = 0xebc0de05,
RQ_PHASE_UNDEFINED = 0xebc0de06 RQ_PHASE_UNREG_BULK = 0xebc0de06,
RQ_PHASE_UNDEFINED = 0xebc0de07
}; };
/** Type of request interpreter call-back */ /** Type of request interpreter call-back */
...@@ -1263,6 +1264,8 @@ struct ptlrpc_cli_req { ...@@ -1263,6 +1264,8 @@ struct ptlrpc_cli_req {
time_t cr_reply_deadline; time_t cr_reply_deadline;
/** when req bulk unlink must finish. */ /** when req bulk unlink must finish. */
time_t cr_bulk_deadline; time_t cr_bulk_deadline;
/** when req unlink must finish. */
time_t cr_req_deadline;
/** Portal to which this request would be sent */ /** Portal to which this request would be sent */
short cr_req_ptl; short cr_req_ptl;
/** Portal where to wait for reply and where reply would be sent */ /** Portal where to wait for reply and where reply would be sent */
...@@ -1318,6 +1321,7 @@ struct ptlrpc_cli_req { ...@@ -1318,6 +1321,7 @@ struct ptlrpc_cli_req {
#define rq_real_sent rq_cli.cr_sent_out #define rq_real_sent rq_cli.cr_sent_out
#define rq_reply_deadline rq_cli.cr_reply_deadline #define rq_reply_deadline rq_cli.cr_reply_deadline
#define rq_bulk_deadline rq_cli.cr_bulk_deadline #define rq_bulk_deadline rq_cli.cr_bulk_deadline
#define rq_req_deadline rq_cli.cr_req_deadline
#define rq_nr_resend rq_cli.cr_resend_nr #define rq_nr_resend rq_cli.cr_resend_nr
#define rq_request_portal rq_cli.cr_req_ptl #define rq_request_portal rq_cli.cr_req_ptl
#define rq_reply_portal rq_cli.cr_rep_ptl #define rq_reply_portal rq_cli.cr_rep_ptl
...@@ -1692,8 +1696,10 @@ ptlrpc_phase2str(enum rq_phase phase) ...@@ -1692,8 +1696,10 @@ ptlrpc_phase2str(enum rq_phase phase)
return "Interpret"; return "Interpret";
case RQ_PHASE_COMPLETE: case RQ_PHASE_COMPLETE:
return "Complete"; return "Complete";
case RQ_PHASE_UNREGISTERING: case RQ_PHASE_UNREG_RPC:
return "Unregistering"; return "UnregRPC";
case RQ_PHASE_UNREG_BULK:
return "UnregBULK";
default: default:
return "?Phase?"; return "?Phase?";
} }
...@@ -1720,7 +1726,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req) ...@@ -1720,7 +1726,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req)
#define DEBUG_REQ_FLAGS(req) \ #define DEBUG_REQ_FLAGS(req) \
ptlrpc_rqphase2str(req), \ ptlrpc_rqphase2str(req), \
FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \ FLAG(req->rq_intr, "I"), FLAG(req->rq_replied, "R"), \
FLAG(req->rq_err, "E"), \ FLAG(req->rq_err, "E"), FLAG(req->rq_net_err, "e"), \
FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \ FLAG(req->rq_timedout, "X") /* eXpired */, FLAG(req->rq_resend, "S"), \
FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \ FLAG(req->rq_restart, "T"), FLAG(req->rq_replay, "P"), \
FLAG(req->rq_no_resend, "N"), \ FLAG(req->rq_no_resend, "N"), \
...@@ -1728,7 +1734,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req) ...@@ -1728,7 +1734,7 @@ ptlrpc_rqphase2str(struct ptlrpc_request *req)
FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"), \ FLAG(req->rq_wait_ctx, "C"), FLAG(req->rq_hp, "H"), \
FLAG(req->rq_committed, "M") FLAG(req->rq_committed, "M")
#define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s" #define REQ_FLAGS_FMT "%s:%s%s%s%s%s%s%s%s%s%s%s%s%s"
void _debug_req(struct ptlrpc_request *req, void _debug_req(struct ptlrpc_request *req,
struct libcfs_debug_msg_data *data, const char *fmt, ...) struct libcfs_debug_msg_data *data, const char *fmt, ...)
...@@ -2379,8 +2385,7 @@ static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req) ...@@ -2379,8 +2385,7 @@ static inline int ptlrpc_client_bulk_active(struct ptlrpc_request *req)
desc = req->rq_bulk; desc = req->rq_bulk;
if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && if (req->rq_bulk_deadline > ktime_get_real_seconds())
req->rq_bulk_deadline > ktime_get_real_seconds())
return 1; return 1;
if (!desc) if (!desc)
...@@ -2727,13 +2732,20 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase) ...@@ -2727,13 +2732,20 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase)
if (req->rq_phase == new_phase) if (req->rq_phase == new_phase)
return; return;
if (new_phase == RQ_PHASE_UNREGISTERING) { if (new_phase == RQ_PHASE_UNREG_RPC ||
new_phase == RQ_PHASE_UNREG_BULK) {
/* No embedded unregistering phases */
if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
req->rq_phase == RQ_PHASE_UNREG_BULK)
return;
req->rq_next_phase = req->rq_phase; req->rq_next_phase = req->rq_phase;
if (req->rq_import) if (req->rq_import)
atomic_inc(&req->rq_import->imp_unregistering); atomic_inc(&req->rq_import->imp_unregistering);
} }
if (req->rq_phase == RQ_PHASE_UNREGISTERING) { if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
req->rq_phase == RQ_PHASE_UNREG_BULK) {
if (req->rq_import) if (req->rq_import)
atomic_dec(&req->rq_import->imp_unregistering); atomic_dec(&req->rq_import->imp_unregistering);
} }
...@@ -2750,9 +2762,6 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase) ...@@ -2750,9 +2762,6 @@ ptlrpc_rqphase_move(struct ptlrpc_request *req, enum rq_phase new_phase)
static inline int static inline int
ptlrpc_client_early(struct ptlrpc_request *req) ptlrpc_client_early(struct ptlrpc_request *req)
{ {
if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
req->rq_reply_deadline > ktime_get_real_seconds())
return 0;
return req->rq_early; return req->rq_early;
} }
...@@ -2762,8 +2771,7 @@ ptlrpc_client_early(struct ptlrpc_request *req) ...@@ -2762,8 +2771,7 @@ ptlrpc_client_early(struct ptlrpc_request *req)
static inline int static inline int
ptlrpc_client_replied(struct ptlrpc_request *req) ptlrpc_client_replied(struct ptlrpc_request *req)
{ {
if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && if (req->rq_reply_deadline > ktime_get_real_seconds())
req->rq_reply_deadline > ktime_get_real_seconds())
return 0; return 0;
return req->rq_replied; return req->rq_replied;
} }
...@@ -2772,8 +2780,7 @@ ptlrpc_client_replied(struct ptlrpc_request *req) ...@@ -2772,8 +2780,7 @@ ptlrpc_client_replied(struct ptlrpc_request *req)
static inline int static inline int
ptlrpc_client_recv(struct ptlrpc_request *req) ptlrpc_client_recv(struct ptlrpc_request *req)
{ {
if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && if (req->rq_reply_deadline > ktime_get_real_seconds())
req->rq_reply_deadline > ktime_get_real_seconds())
return 1; return 1;
return req->rq_receiving_reply; return req->rq_receiving_reply;
} }
...@@ -2784,8 +2791,11 @@ ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req) ...@@ -2784,8 +2791,11 @@ ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req)
int rc; int rc;
spin_lock(&req->rq_lock); spin_lock(&req->rq_lock);
if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && if (req->rq_reply_deadline > ktime_get_real_seconds()) {
req->rq_reply_deadline > ktime_get_real_seconds()) { spin_unlock(&req->rq_lock);
return 1;
}
if (req->rq_req_deadline > ktime_get_real_seconds()) {
spin_unlock(&req->rq_lock); spin_unlock(&req->rq_lock);
return 1; return 1;
} }
......
...@@ -364,6 +364,9 @@ extern char obd_jobid_var[]; ...@@ -364,6 +364,9 @@ extern char obd_jobid_var[];
#define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2 0x515 #define OBD_FAIL_PTLRPC_CLIENT_BULK_CB2 0x515
#define OBD_FAIL_PTLRPC_DELAY_IMP_FULL 0x516 #define OBD_FAIL_PTLRPC_DELAY_IMP_FULL 0x516
#define OBD_FAIL_PTLRPC_CANCEL_RESEND 0x517 #define OBD_FAIL_PTLRPC_CANCEL_RESEND 0x517
#define OBD_FAIL_PTLRPC_DROP_BULK 0x51a
#define OBD_FAIL_PTLRPC_LONG_REQ_UNLINK 0x51b
#define OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK 0x51c
#define OBD_FAIL_OBD_PING_NET 0x600 #define OBD_FAIL_OBD_PING_NET 0x600
#define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601 #define OBD_FAIL_OBD_LOG_CANCEL_NET 0x601
......
...@@ -621,6 +621,8 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, ...@@ -621,6 +621,8 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
request->rq_reply_cbid.cbid_arg = request; request->rq_reply_cbid.cbid_arg = request;
request->rq_reply_deadline = 0; request->rq_reply_deadline = 0;
request->rq_bulk_deadline = 0;
request->rq_req_deadline = 0;
request->rq_phase = RQ_PHASE_NEW; request->rq_phase = RQ_PHASE_NEW;
request->rq_next_phase = RQ_PHASE_UNDEFINED; request->rq_next_phase = RQ_PHASE_UNDEFINED;
...@@ -632,6 +634,37 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request, ...@@ -632,6 +634,37 @@ int ptlrpc_request_bufs_pack(struct ptlrpc_request *request,
request->rq_xid = ptlrpc_next_xid(); request->rq_xid = ptlrpc_next_xid();
lustre_msg_set_opc(request->rq_reqmsg, opcode); lustre_msg_set_opc(request->rq_reqmsg, opcode);
/* Let's setup deadline for req/reply/bulk unlink for opcode. */
if (cfs_fail_val == opcode) {
time_t *fail_t = NULL, *fail2_t = NULL;
if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK)) {
fail_t = &request->rq_bulk_deadline;
} else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK)) {
fail_t = &request->rq_reply_deadline;
} else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK)) {
fail_t = &request->rq_req_deadline;
} else if (CFS_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BOTH_UNLINK)) {
fail_t = &request->rq_reply_deadline;
fail2_t = &request->rq_bulk_deadline;
}
if (fail_t) {
*fail_t = ktime_get_real_seconds() + LONG_UNLINK;
if (fail2_t)
*fail2_t = ktime_get_real_seconds() +
LONG_UNLINK;
/* The RPC is infected, let the test change the
* fail_loc
*/
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(cfs_time_seconds(2));
set_current_state(TASK_RUNNING);
}
}
return 0; return 0;
out_ctx: out_ctx:
...@@ -1481,16 +1514,28 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) ...@@ -1481,16 +1514,28 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
if (!(req->rq_phase == RQ_PHASE_RPC || if (!(req->rq_phase == RQ_PHASE_RPC ||
req->rq_phase == RQ_PHASE_BULK || req->rq_phase == RQ_PHASE_BULK ||
req->rq_phase == RQ_PHASE_INTERPRET || req->rq_phase == RQ_PHASE_INTERPRET ||
req->rq_phase == RQ_PHASE_UNREGISTERING || req->rq_phase == RQ_PHASE_UNREG_RPC ||
req->rq_phase == RQ_PHASE_UNREG_BULK ||
req->rq_phase == RQ_PHASE_COMPLETE)) { req->rq_phase == RQ_PHASE_COMPLETE)) {
DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase); DEBUG_REQ(D_ERROR, req, "bad phase %x", req->rq_phase);
LBUG(); LBUG();
} }
if (req->rq_phase == RQ_PHASE_UNREGISTERING) { if (req->rq_phase == RQ_PHASE_UNREG_RPC ||
req->rq_phase == RQ_PHASE_UNREG_BULK) {
LASSERT(req->rq_next_phase != req->rq_phase); LASSERT(req->rq_next_phase != req->rq_phase);
LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED); LASSERT(req->rq_next_phase != RQ_PHASE_UNDEFINED);
if (req->rq_req_deadline &&
!OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REQ_UNLINK))
req->rq_req_deadline = 0;
if (req->rq_reply_deadline &&
!OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK))
req->rq_reply_deadline = 0;
if (req->rq_bulk_deadline &&
!OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK))
req->rq_bulk_deadline = 0;
/* /*
* Skip processing until reply is unlinked. We * Skip processing until reply is unlinked. We
* can't return to pool before that and we can't * can't return to pool before that and we can't
...@@ -1498,7 +1543,10 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) ...@@ -1498,7 +1543,10 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
* sure that all rdma transfers finished and will * sure that all rdma transfers finished and will
* not corrupt any data. * not corrupt any data.
*/ */
if (ptlrpc_client_recv_or_unlink(req) || if (req->rq_phase == RQ_PHASE_UNREG_RPC &&
ptlrpc_client_recv_or_unlink(req))
continue;
if (req->rq_phase == RQ_PHASE_UNREG_BULK &&
ptlrpc_client_bulk_active(req)) ptlrpc_client_bulk_active(req))
continue; continue;
...@@ -1976,7 +2024,7 @@ void ptlrpc_interrupted_set(void *data) ...@@ -1976,7 +2024,7 @@ void ptlrpc_interrupted_set(void *data)
list_entry(tmp, struct ptlrpc_request, rq_set_chain); list_entry(tmp, struct ptlrpc_request, rq_set_chain);
if (req->rq_phase != RQ_PHASE_RPC && if (req->rq_phase != RQ_PHASE_RPC &&
req->rq_phase != RQ_PHASE_UNREGISTERING) req->rq_phase != RQ_PHASE_UNREG_RPC)
continue; continue;
ptlrpc_mark_interrupted(req); ptlrpc_mark_interrupted(req);
...@@ -2288,8 +2336,9 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) ...@@ -2288,8 +2336,9 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
/* Let's setup deadline for reply unlink. */ /* Let's setup deadline for reply unlink. */
if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) && if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_REPL_UNLINK) &&
async && request->rq_reply_deadline == 0) async && request->rq_reply_deadline == 0 && cfs_fail_val == 0)
request->rq_reply_deadline = ktime_get_real_seconds()+LONG_UNLINK; request->rq_reply_deadline =
ktime_get_real_seconds() + LONG_UNLINK;
/* Nothing left to do. */ /* Nothing left to do. */
if (!ptlrpc_client_recv_or_unlink(request)) if (!ptlrpc_client_recv_or_unlink(request))
...@@ -2302,7 +2351,7 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) ...@@ -2302,7 +2351,7 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
return 1; return 1;
/* Move to "Unregistering" phase as reply was not unlinked yet. */ /* Move to "Unregistering" phase as reply was not unlinked yet. */
ptlrpc_rqphase_move(request, RQ_PHASE_UNREGISTERING); ptlrpc_rqphase_move(request, RQ_PHASE_UNREG_RPC);
/* Do not wait for unlink to finish. */ /* Do not wait for unlink to finish. */
if (async) if (async)
...@@ -2932,7 +2981,6 @@ static void ptlrpcd_add_work_req(struct ptlrpc_request *req) ...@@ -2932,7 +2981,6 @@ static void ptlrpcd_add_work_req(struct ptlrpc_request *req)
req->rq_timeout = obd_timeout; req->rq_timeout = obd_timeout;
req->rq_sent = ktime_get_real_seconds(); req->rq_sent = ktime_get_real_seconds();
req->rq_deadline = req->rq_sent + req->rq_timeout; req->rq_deadline = req->rq_sent + req->rq_timeout;
req->rq_reply_deadline = req->rq_deadline;
req->rq_phase = RQ_PHASE_INTERPRET; req->rq_phase = RQ_PHASE_INTERPRET;
req->rq_next_phase = RQ_PHASE_COMPLETE; req->rq_next_phase = RQ_PHASE_COMPLETE;
req->rq_xid = ptlrpc_next_xid(); req->rq_xid = ptlrpc_next_xid();
......
...@@ -356,9 +356,8 @@ void ptlrpc_invalidate_import(struct obd_import *imp) ...@@ -356,9 +356,8 @@ void ptlrpc_invalidate_import(struct obd_import *imp)
"still on delayed list"); "still on delayed list");
} }
CERROR("%s: RPCs in \"%s\" phase found (%d). Network is sluggish? Waiting them to error out.\n", CERROR("%s: Unregistering RPCs found (%d). Network is sluggish? Waiting them to error out.\n",
cli_tgt, cli_tgt,
ptlrpc_phase2str(RQ_PHASE_UNREGISTERING),
atomic_read(&imp-> atomic_read(&imp->
imp_unregistering)); imp_unregistering));
} }
......
...@@ -247,7 +247,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) ...@@ -247,7 +247,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
/* Let's setup deadline for reply unlink. */ /* Let's setup deadline for reply unlink. */
if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) && if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_LONG_BULK_UNLINK) &&
async && req->rq_bulk_deadline == 0) async && req->rq_bulk_deadline == 0 && cfs_fail_val == 0)
req->rq_bulk_deadline = ktime_get_real_seconds() + LONG_UNLINK; req->rq_bulk_deadline = ktime_get_real_seconds() + LONG_UNLINK;
if (ptlrpc_client_bulk_active(req) == 0) /* completed or */ if (ptlrpc_client_bulk_active(req) == 0) /* completed or */
...@@ -266,7 +266,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async) ...@@ -266,7 +266,7 @@ int ptlrpc_unregister_bulk(struct ptlrpc_request *req, int async)
return 1; /* never registered */ return 1; /* never registered */
/* Move to "Unregistering" phase as bulk was not unlinked yet. */ /* Move to "Unregistering" phase as bulk was not unlinked yet. */
ptlrpc_rqphase_move(req, RQ_PHASE_UNREGISTERING); ptlrpc_rqphase_move(req, RQ_PHASE_UNREG_BULK);
/* Do not wait for unlink to finish. */ /* Do not wait for unlink to finish. */
if (async) if (async)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment