Commit c297eb42 authored by Ilya Dryomov's avatar Ilya Dryomov

libceph: always signal completion when done

r_safe_completion is currently, and has always been, signaled only if
on-disk ack was requested.  It's there for fsync and syncfs, which wait
for in-flight writes to flush - all data write requests set ONDISK.

However, the pool perm check code introduced in 4.2 sends a write
request with only ACK set.  An unfortunately timed syncfs can then hang
forever: r_safe_completion won't be signaled because only an unsafe
reply was requested.

We could patch ceph_osdc_sync() to skip !ONDISK write requests, but
that is somewhat incomplete and yet another special case.  Instead,
rename this completion to r_done_completion and always signal it when
the OSD client is done with the request, whether unsafe, safe, or
error.  This is a bit cleaner and helps with the cancellation code.
Reported-by: default avatarYan, Zheng <zyan@redhat.com>
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent 80e80fbb
...@@ -864,7 +864,7 @@ void ceph_sync_write_wait(struct inode *inode) ...@@ -864,7 +864,7 @@ void ceph_sync_write_wait(struct inode *inode)
dout("sync_write_wait on tid %llu (until %llu)\n", dout("sync_write_wait on tid %llu (until %llu)\n",
req->r_tid, last_tid); req->r_tid, last_tid);
wait_for_completion(&req->r_safe_completion); wait_for_completion(&req->r_done_completion);
ceph_osdc_put_request(req); ceph_osdc_put_request(req);
spin_lock(&ci->i_unsafe_lock); spin_lock(&ci->i_unsafe_lock);
......
...@@ -176,7 +176,7 @@ struct ceph_osd_request { ...@@ -176,7 +176,7 @@ struct ceph_osd_request {
struct kref r_kref; struct kref r_kref;
bool r_mempool; bool r_mempool;
struct completion r_completion; struct completion r_completion;
struct completion r_safe_completion; /* fsync waiter */ struct completion r_done_completion; /* fsync waiter */
ceph_osdc_callback_t r_callback; ceph_osdc_callback_t r_callback;
ceph_osdc_unsafe_callback_t r_unsafe_callback; ceph_osdc_unsafe_callback_t r_unsafe_callback;
struct list_head r_unsafe_item; struct list_head r_unsafe_item;
......
...@@ -460,7 +460,7 @@ static void request_init(struct ceph_osd_request *req) ...@@ -460,7 +460,7 @@ static void request_init(struct ceph_osd_request *req)
kref_init(&req->r_kref); kref_init(&req->r_kref);
init_completion(&req->r_completion); init_completion(&req->r_completion);
init_completion(&req->r_safe_completion); init_completion(&req->r_done_completion);
RB_CLEAR_NODE(&req->r_node); RB_CLEAR_NODE(&req->r_node);
RB_CLEAR_NODE(&req->r_mc_node); RB_CLEAR_NODE(&req->r_mc_node);
INIT_LIST_HEAD(&req->r_unsafe_item); INIT_LIST_HEAD(&req->r_unsafe_item);
...@@ -1772,7 +1772,7 @@ static void complete_request(struct ceph_osd_request *req, int err) ...@@ -1772,7 +1772,7 @@ static void complete_request(struct ceph_osd_request *req, int err)
req->r_result = err; req->r_result = err;
__finish_request(req); __finish_request(req);
__complete_request(req); __complete_request(req);
complete_all(&req->r_safe_completion); complete_all(&req->r_done_completion);
ceph_osdc_put_request(req); ceph_osdc_put_request(req);
} }
...@@ -1797,7 +1797,9 @@ static void cancel_request(struct ceph_osd_request *req) ...@@ -1797,7 +1797,9 @@ static void cancel_request(struct ceph_osd_request *req)
dout("%s req %p tid %llu\n", __func__, req, req->r_tid); dout("%s req %p tid %llu\n", __func__, req, req->r_tid);
cancel_map_check(req); cancel_map_check(req);
finish_request(req); __finish_request(req);
complete_all(&req->r_done_completion);
ceph_osdc_put_request(req);
} }
static void check_pool_dne(struct ceph_osd_request *req) static void check_pool_dne(struct ceph_osd_request *req)
...@@ -2808,12 +2810,12 @@ static bool done_request(const struct ceph_osd_request *req, ...@@ -2808,12 +2810,12 @@ static bool done_request(const struct ceph_osd_request *req,
* ->r_unsafe_callback is set? yes no * ->r_unsafe_callback is set? yes no
* *
* first reply is OK (needed r_cb/r_completion, r_cb/r_completion, * first reply is OK (needed r_cb/r_completion, r_cb/r_completion,
* any or needed/got safe) r_safe_completion r_safe_completion * any or needed/got safe) r_done_completion r_done_completion
* *
* first reply is unsafe r_unsafe_cb(true) (nothing) * first reply is unsafe r_unsafe_cb(true) (nothing)
* *
* when we get the safe reply r_unsafe_cb(false), r_cb/r_completion, * when we get the safe reply r_unsafe_cb(false), r_cb/r_completion,
* r_safe_completion r_safe_completion * r_done_completion r_done_completion
*/ */
static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg) static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
{ {
...@@ -2934,8 +2936,7 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg) ...@@ -2934,8 +2936,7 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
dout("req %p tid %llu cb\n", req, req->r_tid); dout("req %p tid %llu cb\n", req, req->r_tid);
__complete_request(req); __complete_request(req);
} }
if (m.flags & CEPH_OSD_FLAG_ONDISK) complete_all(&req->r_done_completion);
complete_all(&req->r_safe_completion);
ceph_osdc_put_request(req); ceph_osdc_put_request(req);
} else { } else {
if (req->r_unsafe_callback) { if (req->r_unsafe_callback) {
...@@ -3471,9 +3472,8 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, ...@@ -3471,9 +3472,8 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
EXPORT_SYMBOL(ceph_osdc_start_request); EXPORT_SYMBOL(ceph_osdc_start_request);
/* /*
* Unregister a registered request. The request is not completed (i.e. * Unregister a registered request. The request is not completed:
* no callbacks or wakeups) - higher layers are supposed to know what * ->r_result isn't set and __complete_request() isn't called.
* they are canceling.
*/ */
void ceph_osdc_cancel_request(struct ceph_osd_request *req) void ceph_osdc_cancel_request(struct ceph_osd_request *req)
{ {
...@@ -3500,9 +3500,6 @@ static int wait_request_timeout(struct ceph_osd_request *req, ...@@ -3500,9 +3500,6 @@ static int wait_request_timeout(struct ceph_osd_request *req,
if (left <= 0) { if (left <= 0) {
left = left ?: -ETIMEDOUT; left = left ?: -ETIMEDOUT;
ceph_osdc_cancel_request(req); ceph_osdc_cancel_request(req);
/* kludge - need to to wake ceph_osdc_sync() */
complete_all(&req->r_safe_completion);
} else { } else {
left = req->r_result; /* completed */ left = req->r_result; /* completed */
} }
...@@ -3549,7 +3546,7 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc) ...@@ -3549,7 +3546,7 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc)
up_read(&osdc->lock); up_read(&osdc->lock);
dout("%s waiting on req %p tid %llu last_tid %llu\n", dout("%s waiting on req %p tid %llu last_tid %llu\n",
__func__, req, req->r_tid, last_tid); __func__, req, req->r_tid, last_tid);
wait_for_completion(&req->r_safe_completion); wait_for_completion(&req->r_done_completion);
ceph_osdc_put_request(req); ceph_osdc_put_request(req);
goto again; goto again;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment