Commit 82a373ae authored by Liang Zhen's avatar Liang Zhen Committed by Greg Kroah-Hartman

lustre/ptlrpc: re-enqueue ptlrpcd worker

osc_extent_wait can be stuck in scenario like this:

1) thread-1 held an active extent
2) thread-2 called flush cache, and marked this extent as "urgent"
   and "sync_wait"
3) thread-3 wants to write to the same extent, osc_extent_find will
   get "conflict" because this extent is "sync_wait", so it starts
   to wait...
4) cl_writeback_work has been scheduled by thread-4 to write some
   other extents, it has sent RPCs but not returned yet.
5) thread-1 finished his work, and called osc_extent_release()->
   osc_io_unplug_async()->ptlrpcd_queue_work(), but found
   cl_writeback_work is still running, so it's ignored (-EBUSY)
6) thread-3 is stuck because nobody will wake him up.

This patch allows ptlrpcd_work to be rescheduled, so it will not
miss request anymore
Signed-off-by: default avatarLiang Zhen <liang.zhen@intel.com>
Reviewed-on: http://review.whamcloud.com/8922
Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-4509Reviewed-by: default avatarJinshan Xiong <jinshan.xiong@intel.com>
Reviewed-by: default avatarBobi Jam <bobijam@gmail.com>
Reviewed-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarOleg Drokin <oleg.drokin@intel.com>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 15c50ccc
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
#include "ptlrpc_internal.h" #include "ptlrpc_internal.h"
static int ptlrpc_send_new_req(struct ptlrpc_request *req); static int ptlrpc_send_new_req(struct ptlrpc_request *req);
static int ptlrpcd_check_work(struct ptlrpc_request *req);
/** /**
* Initialize passed in client structure \a cl. * Initialize passed in client structure \a cl.
...@@ -1779,6 +1780,10 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) ...@@ -1779,6 +1780,10 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set)
ptlrpc_req_interpret(env, req, req->rq_status); ptlrpc_req_interpret(env, req, req->rq_status);
if (ptlrpcd_check_work(req)) {
atomic_dec(&set->set_remaining);
continue;
}
ptlrpc_rqphase_move(req, RQ_PHASE_COMPLETE); ptlrpc_rqphase_move(req, RQ_PHASE_COMPLETE);
CDEBUG(req->rq_reqmsg != NULL ? D_RPCTRACE : 0, CDEBUG(req->rq_reqmsg != NULL ? D_RPCTRACE : 0,
...@@ -2952,22 +2957,50 @@ EXPORT_SYMBOL(ptlrpc_sample_next_xid); ...@@ -2952,22 +2957,50 @@ EXPORT_SYMBOL(ptlrpc_sample_next_xid);
* have delay before it really runs by ptlrpcd thread. * have delay before it really runs by ptlrpcd thread.
*/ */
struct ptlrpc_work_async_args { struct ptlrpc_work_async_args {
__u64 magic;
int (*cb)(const struct lu_env *, void *); int (*cb)(const struct lu_env *, void *);
void *cbdata; void *cbdata;
}; };
#define PTLRPC_WORK_MAGIC 0x6655436b676f4f44ULL /* magic code */ static void ptlrpcd_add_work_req(struct ptlrpc_request *req)
{
/* re-initialize the req */
req->rq_timeout = obd_timeout;
req->rq_sent = cfs_time_current_sec();
req->rq_deadline = req->rq_sent + req->rq_timeout;
req->rq_reply_deadline = req->rq_deadline;
req->rq_phase = RQ_PHASE_INTERPRET;
req->rq_next_phase = RQ_PHASE_COMPLETE;
req->rq_xid = ptlrpc_next_xid();
req->rq_import_generation = req->rq_import->imp_generation;
ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
}
static int work_interpreter(const struct lu_env *env, static int work_interpreter(const struct lu_env *env,
struct ptlrpc_request *req, void *data, int rc) struct ptlrpc_request *req, void *data, int rc)
{ {
struct ptlrpc_work_async_args *arg = data; struct ptlrpc_work_async_args *arg = data;
LASSERT(arg->magic == PTLRPC_WORK_MAGIC); LASSERT(ptlrpcd_check_work(req));
LASSERT(arg->cb != NULL); LASSERT(arg->cb != NULL);
return arg->cb(env, arg->cbdata); rc = arg->cb(env, arg->cbdata);
list_del_init(&req->rq_set_chain);
req->rq_set = NULL;
if (atomic_dec_return(&req->rq_refcount) > 1) {
atomic_set(&req->rq_refcount, 2);
ptlrpcd_add_work_req(req);
}
return rc;
}
static int worker_format;
static int ptlrpcd_check_work(struct ptlrpc_request *req)
{
return req->rq_pill.rc_fmt == (void *)&worker_format;
} }
/** /**
...@@ -3000,6 +3033,7 @@ void *ptlrpcd_alloc_work(struct obd_import *imp, ...@@ -3000,6 +3033,7 @@ void *ptlrpcd_alloc_work(struct obd_import *imp,
req->rq_receiving_reply = 0; req->rq_receiving_reply = 0;
req->rq_must_unlink = 0; req->rq_must_unlink = 0;
req->rq_no_delay = req->rq_no_resend = 1; req->rq_no_delay = req->rq_no_resend = 1;
req->rq_pill.rc_fmt = (void *)&worker_format;
spin_lock_init(&req->rq_lock); spin_lock_init(&req->rq_lock);
INIT_LIST_HEAD(&req->rq_list); INIT_LIST_HEAD(&req->rq_list);
...@@ -3013,7 +3047,6 @@ void *ptlrpcd_alloc_work(struct obd_import *imp, ...@@ -3013,7 +3047,6 @@ void *ptlrpcd_alloc_work(struct obd_import *imp,
CLASSERT(sizeof(*args) <= sizeof(req->rq_async_args)); CLASSERT(sizeof(*args) <= sizeof(req->rq_async_args));
args = ptlrpc_req_async_args(req); args = ptlrpc_req_async_args(req);
args->magic = PTLRPC_WORK_MAGIC;
args->cb = cb; args->cb = cb;
args->cbdata = cbdata; args->cbdata = cbdata;
...@@ -3043,25 +3076,8 @@ int ptlrpcd_queue_work(void *handler) ...@@ -3043,25 +3076,8 @@ int ptlrpcd_queue_work(void *handler)
* req as opaque data. - Jinshan * req as opaque data. - Jinshan
*/ */
LASSERT(atomic_read(&req->rq_refcount) > 0); LASSERT(atomic_read(&req->rq_refcount) > 0);
if (atomic_read(&req->rq_refcount) > 1) if (atomic_inc_return(&req->rq_refcount) == 2)
return -EBUSY; ptlrpcd_add_work_req(req);
if (atomic_inc_return(&req->rq_refcount) > 2) { /* race */
atomic_dec(&req->rq_refcount);
return -EBUSY;
}
/* re-initialize the req */
req->rq_timeout = obd_timeout;
req->rq_sent = cfs_time_current_sec();
req->rq_deadline = req->rq_sent + req->rq_timeout;
req->rq_reply_deadline = req->rq_deadline;
req->rq_phase = RQ_PHASE_INTERPRET;
req->rq_next_phase = RQ_PHASE_COMPLETE;
req->rq_xid = ptlrpc_next_xid();
req->rq_import_generation = req->rq_import->imp_generation;
ptlrpcd_add_req(req, PDL_POLICY_ROUND, -1);
return 0; return 0;
} }
EXPORT_SYMBOL(ptlrpcd_queue_work); EXPORT_SYMBOL(ptlrpcd_queue_work);
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment