Commit d517b398 authored by Xiubo Li's avatar Xiubo Li Committed by Ilya Dryomov

ceph: reconnect to the export targets on new mdsmaps

In the case where the export MDS has crashed just after the EImportStart
journal is flushed, a standby MDS takes over for it and when replaying
the EImportStart journal the MDS will wait the client to reconnect. That
may never happen because the client may not have registered or opened
the sessions yet.

When receiving a new map, ensure we reconnect to valid export targets as
well if their sessions don't exist yet.
Signed-off-by: default avatarXiubo Li <xiubli@redhat.com>
Reviewed-by: default avatarJeff Layton <jlayton@kernel.org>
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent 692e1715
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include <linux/ratelimit.h> #include <linux/ratelimit.h>
#include <linux/bits.h> #include <linux/bits.h>
#include <linux/ktime.h> #include <linux/ktime.h>
#include <linux/bitmap.h>
#include "super.h" #include "super.h"
#include "mds_client.h" #include "mds_client.h"
...@@ -4171,13 +4172,21 @@ static void check_new_map(struct ceph_mds_client *mdsc, ...@@ -4171,13 +4172,21 @@ static void check_new_map(struct ceph_mds_client *mdsc,
struct ceph_mdsmap *newmap, struct ceph_mdsmap *newmap,
struct ceph_mdsmap *oldmap) struct ceph_mdsmap *oldmap)
{ {
int i; int i, j, err;
int oldstate, newstate; int oldstate, newstate;
struct ceph_mds_session *s; struct ceph_mds_session *s;
unsigned long targets[DIV_ROUND_UP(CEPH_MAX_MDS, sizeof(unsigned long))] = {0};
dout("check_new_map new %u old %u\n", dout("check_new_map new %u old %u\n",
newmap->m_epoch, oldmap->m_epoch); newmap->m_epoch, oldmap->m_epoch);
if (newmap->m_info) {
for (i = 0; i < newmap->possible_max_rank; i++) {
for (j = 0; j < newmap->m_info[i].num_export_targets; j++)
set_bit(newmap->m_info[i].export_targets[j], targets);
}
}
for (i = 0; i < oldmap->possible_max_rank && i < mdsc->max_sessions; i++) { for (i = 0; i < oldmap->possible_max_rank && i < mdsc->max_sessions; i++) {
if (!mdsc->sessions[i]) if (!mdsc->sessions[i])
continue; continue;
...@@ -4231,6 +4240,7 @@ static void check_new_map(struct ceph_mds_client *mdsc, ...@@ -4231,6 +4240,7 @@ static void check_new_map(struct ceph_mds_client *mdsc,
if (s->s_state == CEPH_MDS_SESSION_RESTARTING && if (s->s_state == CEPH_MDS_SESSION_RESTARTING &&
newstate >= CEPH_MDS_STATE_RECONNECT) { newstate >= CEPH_MDS_STATE_RECONNECT) {
mutex_unlock(&mdsc->mutex); mutex_unlock(&mdsc->mutex);
clear_bit(i, targets);
send_mds_reconnect(mdsc, s); send_mds_reconnect(mdsc, s);
mutex_lock(&mdsc->mutex); mutex_lock(&mdsc->mutex);
} }
...@@ -4253,6 +4263,51 @@ static void check_new_map(struct ceph_mds_client *mdsc, ...@@ -4253,6 +4263,51 @@ static void check_new_map(struct ceph_mds_client *mdsc,
} }
} }
/*
* Only open and reconnect sessions that don't exist yet.
*/
for (i = 0; i < newmap->possible_max_rank; i++) {
/*
* In case the import MDS is crashed just after
* the EImportStart journal is flushed, so when
* a standby MDS takes over it and is replaying
* the EImportStart journal the new MDS daemon
* will wait the client to reconnect it, but the
* client may never register/open the session yet.
*
* Will try to reconnect that MDS daemon if the
* rank number is in the export targets array and
* is the up:reconnect state.
*/
newstate = ceph_mdsmap_get_state(newmap, i);
if (!test_bit(i, targets) || newstate != CEPH_MDS_STATE_RECONNECT)
continue;
/*
* The session maybe registered and opened by some
* requests which were choosing random MDSes during
* the mdsc->mutex's unlock/lock gap below in rare
* case. But the related MDS daemon will just queue
* that requests and be still waiting for the client's
* reconnection request in up:reconnect state.
*/
s = __ceph_lookup_mds_session(mdsc, i);
if (likely(!s)) {
s = __open_export_target_session(mdsc, i);
if (IS_ERR(s)) {
err = PTR_ERR(s);
pr_err("failed to open export target session, err %d\n",
err);
continue;
}
}
dout("send reconnect to export target mds.%d\n", i);
mutex_unlock(&mdsc->mutex);
send_mds_reconnect(mdsc, s);
ceph_put_mds_session(s);
mutex_lock(&mdsc->mutex);
}
for (i = 0; i < newmap->possible_max_rank && i < mdsc->max_sessions; i++) { for (i = 0; i < newmap->possible_max_rank && i < mdsc->max_sessions; i++) {
s = mdsc->sessions[i]; s = mdsc->sessions[i];
if (!s) if (!s)
......
...@@ -122,6 +122,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2) ...@@ -122,6 +122,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
int err; int err;
u8 mdsmap_v; u8 mdsmap_v;
u16 mdsmap_ev; u16 mdsmap_ev;
u32 target;
m = kzalloc(sizeof(*m), GFP_NOFS); m = kzalloc(sizeof(*m), GFP_NOFS);
if (!m) if (!m)
...@@ -260,9 +261,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2) ...@@ -260,9 +261,14 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
sizeof(u32), GFP_NOFS); sizeof(u32), GFP_NOFS);
if (!info->export_targets) if (!info->export_targets)
goto nomem; goto nomem;
for (j = 0; j < num_export_targets; j++) for (j = 0; j < num_export_targets; j++) {
info->export_targets[j] = target = ceph_decode_32(&pexport_targets);
ceph_decode_32(&pexport_targets); if (target >= m->possible_max_rank) {
err = -EIO;
goto corrupt;
}
info->export_targets[j] = target;
}
} else { } else {
info->export_targets = NULL; info->export_targets = NULL;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment