Commit 9abf82b8 authored by Sage Weil's avatar Sage Weil

ceph: fix locking for waking session requests after reconnect

The session->s_waiting list is protected by mdsc->mutex, not s_mutex.  This
was causing (rare) s_waiting list corruption.

Fix errors paths too, while we're here.  A more thorough cleanup of this
function is coming soon.
Signed-off-by: default avatarSage Weil <sage@newdream.net>
parent d85b7056
...@@ -2136,7 +2136,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) ...@@ -2136,7 +2136,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
struct ceph_mds_session *session = NULL; struct ceph_mds_session *session = NULL;
struct ceph_msg *reply; struct ceph_msg *reply;
struct rb_node *p; struct rb_node *p;
int err; int err = -ENOMEM;
struct ceph_pagelist *pagelist; struct ceph_pagelist *pagelist;
pr_info("reconnect to recovering mds%d\n", mds); pr_info("reconnect to recovering mds%d\n", mds);
...@@ -2185,7 +2185,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) ...@@ -2185,7 +2185,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
goto fail; goto fail;
err = iterate_session_caps(session, encode_caps_cb, pagelist); err = iterate_session_caps(session, encode_caps_cb, pagelist);
if (err < 0) if (err < 0)
goto out; goto fail;
/* /*
* snaprealms. we provide mds with the ino, seq (version), and * snaprealms. we provide mds with the ino, seq (version), and
...@@ -2213,28 +2213,31 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) ...@@ -2213,28 +2213,31 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
reply->nr_pages = calc_pages_for(0, pagelist->length); reply->nr_pages = calc_pages_for(0, pagelist->length);
ceph_con_send(&session->s_con, reply); ceph_con_send(&session->s_con, reply);
if (session) { session->s_state = CEPH_MDS_SESSION_OPEN;
session->s_state = CEPH_MDS_SESSION_OPEN; mutex_unlock(&session->s_mutex);
__wake_requests(mdsc, &session->s_waiting);
} mutex_lock(&mdsc->mutex);
__wake_requests(mdsc, &session->s_waiting);
mutex_unlock(&mdsc->mutex);
ceph_put_mds_session(session);
out:
up_read(&mdsc->snap_rwsem); up_read(&mdsc->snap_rwsem);
if (session) {
mutex_unlock(&session->s_mutex);
ceph_put_mds_session(session);
}
mutex_lock(&mdsc->mutex); mutex_lock(&mdsc->mutex);
return; return;
fail: fail:
ceph_msg_put(reply); ceph_msg_put(reply);
up_read(&mdsc->snap_rwsem);
mutex_unlock(&session->s_mutex);
ceph_put_mds_session(session);
fail_nomsg: fail_nomsg:
ceph_pagelist_release(pagelist); ceph_pagelist_release(pagelist);
kfree(pagelist); kfree(pagelist);
fail_nopagelist: fail_nopagelist:
pr_err("ENOMEM preparing reconnect for mds%d\n", mds); pr_err("error %d preparing reconnect for mds%d\n", err, mds);
goto out; mutex_lock(&mdsc->mutex);
return;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment