Commit e9e427f0 authored by Yan, Zheng's avatar Yan, Zheng Committed by Ilya Dryomov

ceph: check availability of mds cluster on mount

Signed-off-by: default avatarYan, Zheng <zyan@redhat.com>
parent 7ce469a5
...@@ -2100,17 +2100,26 @@ static int __do_request(struct ceph_mds_client *mdsc, ...@@ -2100,17 +2100,26 @@ static int __do_request(struct ceph_mds_client *mdsc,
err = -EIO; err = -EIO;
goto finish; goto finish;
} }
if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
if (mdsc->mdsmap_err) {
err = mdsc->mdsmap_err;
dout("do_request mdsmap err %d\n", err);
goto finish;
}
if (!(mdsc->fsc->mount_options->flags &
CEPH_MOUNT_OPT_MOUNTWAIT) &&
!ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) {
err = -ENOENT;
pr_info("probably no mds server is up\n");
goto finish;
}
}
put_request_session(req); put_request_session(req);
mds = __choose_mds(mdsc, req); mds = __choose_mds(mdsc, req);
if (mds < 0 || if (mds < 0 ||
ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) { ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
if (mdsc->mdsmap_err) {
err = mdsc->mdsmap_err;
dout("do_request mdsmap err %d\n", err);
goto finish;
}
dout("do_request no mds or not active, waiting for map\n"); dout("do_request no mds or not active, waiting for map\n");
list_add(&req->r_wait, &mdsc->waiting_for_map); list_add(&req->r_wait, &mdsc->waiting_for_map);
goto out; goto out;
......
...@@ -42,6 +42,60 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m) ...@@ -42,6 +42,60 @@ int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m)
return i; return i;
} }
#define __decode_and_drop_type(p, end, type, bad) \
do { \
if (*p + sizeof(type) > end) \
goto bad; \
*p += sizeof(type); \
} while (0)
#define __decode_and_drop_set(p, end, type, bad) \
do { \
u32 n; \
size_t need; \
ceph_decode_32_safe(p, end, n, bad); \
need = sizeof(type) * n; \
ceph_decode_need(p, end, need, bad); \
*p += need; \
} while (0)
#define __decode_and_drop_map(p, end, ktype, vtype, bad) \
do { \
u32 n; \
size_t need; \
ceph_decode_32_safe(p, end, n, bad); \
need = (sizeof(ktype) + sizeof(vtype)) * n; \
ceph_decode_need(p, end, need, bad); \
*p += need; \
} while (0)
static int __decode_and_drop_compat_set(void **p, void* end)
{
int i;
/* compat, ro_compat, incompat*/
for (i = 0; i < 3; i++) {
u32 n;
ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad);
/* mask */
*p += sizeof(u64);
/* names (map<u64, string>) */
n = ceph_decode_32(p);
while (n-- > 0) {
u32 len;
ceph_decode_need(p, end, sizeof(u64) + sizeof(u32),
bad);
*p += sizeof(u64);
len = ceph_decode_32(p);
ceph_decode_need(p, end, len, bad);
*p += len;
}
}
return 0;
bad:
return -1;
}
/* /*
* Decode an MDS map * Decode an MDS map
* *
...@@ -55,6 +109,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ...@@ -55,6 +109,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
int i, j, n; int i, j, n;
int err = -EINVAL; int err = -EINVAL;
u8 mdsmap_v, mdsmap_cv; u8 mdsmap_v, mdsmap_cv;
u16 mdsmap_ev;
m = kzalloc(sizeof(*m), GFP_NOFS); m = kzalloc(sizeof(*m), GFP_NOFS);
if (m == NULL) if (m == NULL)
...@@ -83,7 +138,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ...@@ -83,7 +138,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS); m->m_info = kcalloc(m->m_max_mds, sizeof(*m->m_info), GFP_NOFS);
if (m->m_info == NULL) if (m->m_info == NULL)
goto badmem; goto nomem;
/* pick out active nodes from mds_info (state > 0) */ /* pick out active nodes from mds_info (state > 0) */
n = ceph_decode_32(p); n = ceph_decode_32(p);
...@@ -166,7 +221,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ...@@ -166,7 +221,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
info->export_targets = kcalloc(num_export_targets, info->export_targets = kcalloc(num_export_targets,
sizeof(u32), GFP_NOFS); sizeof(u32), GFP_NOFS);
if (info->export_targets == NULL) if (info->export_targets == NULL)
goto badmem; goto nomem;
for (j = 0; j < num_export_targets; j++) for (j = 0; j < num_export_targets; j++)
info->export_targets[j] = info->export_targets[j] =
ceph_decode_32(&pexport_targets); ceph_decode_32(&pexport_targets);
...@@ -180,24 +235,104 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) ...@@ -180,24 +235,104 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
m->m_num_data_pg_pools = n; m->m_num_data_pg_pools = n;
m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS); m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS);
if (!m->m_data_pg_pools) if (!m->m_data_pg_pools)
goto badmem; goto nomem;
ceph_decode_need(p, end, sizeof(u64)*(n+1), bad); ceph_decode_need(p, end, sizeof(u64)*(n+1), bad);
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
m->m_data_pg_pools[i] = ceph_decode_64(p); m->m_data_pg_pools[i] = ceph_decode_64(p);
m->m_cas_pg_pool = ceph_decode_64(p); m->m_cas_pg_pool = ceph_decode_64(p);
m->m_enabled = m->m_epoch > 1;
mdsmap_ev = 1;
if (mdsmap_v >= 2) {
ceph_decode_16_safe(p, end, mdsmap_ev, bad_ext);
}
if (mdsmap_ev >= 3) {
if (__decode_and_drop_compat_set(p, end) < 0)
goto bad_ext;
}
/* metadata_pool */
if (mdsmap_ev < 5) {
__decode_and_drop_type(p, end, u32, bad_ext);
} else {
__decode_and_drop_type(p, end, u64, bad_ext);
}
/* created + modified + tableserver */
__decode_and_drop_type(p, end, struct ceph_timespec, bad_ext);
__decode_and_drop_type(p, end, struct ceph_timespec, bad_ext);
__decode_and_drop_type(p, end, u32, bad_ext);
/* ok, we don't care about the rest. */ /* in */
{
int num_laggy = 0;
ceph_decode_32_safe(p, end, n, bad_ext);
ceph_decode_need(p, end, sizeof(u32) * n, bad_ext);
for (i = 0; i < n; i++) {
s32 mds = ceph_decode_32(p);
if (mds >= 0 && mds < m->m_max_mds) {
if (m->m_info[mds].laggy)
num_laggy++;
}
}
m->m_num_laggy = num_laggy;
}
/* inc */
__decode_and_drop_map(p, end, u32, u32, bad_ext);
/* up */
__decode_and_drop_map(p, end, u32, u64, bad_ext);
/* failed */
__decode_and_drop_set(p, end, u32, bad_ext);
/* stopped */
__decode_and_drop_set(p, end, u32, bad_ext);
if (mdsmap_ev >= 4) {
/* last_failure_osd_epoch */
__decode_and_drop_type(p, end, u32, bad_ext);
}
if (mdsmap_ev >= 6) {
/* ever_allowed_snaps */
__decode_and_drop_type(p, end, u8, bad_ext);
/* explicitly_allowed_snaps */
__decode_and_drop_type(p, end, u8, bad_ext);
}
if (mdsmap_ev >= 7) {
/* inline_data_enabled */
__decode_and_drop_type(p, end, u8, bad_ext);
}
if (mdsmap_ev >= 8) {
u32 name_len;
/* enabled */
ceph_decode_8_safe(p, end, m->m_enabled, bad_ext);
ceph_decode_32_safe(p, end, name_len, bad_ext);
ceph_decode_need(p, end, name_len, bad_ext);
*p += name_len;
}
/* damaged */
if (mdsmap_ev >= 9) {
size_t need;
ceph_decode_32_safe(p, end, n, bad_ext);
need = sizeof(u32) * n;
ceph_decode_need(p, end, need, bad_ext);
*p += need;
m->m_damaged = n > 0;
} else {
m->m_damaged = false;
}
bad_ext:
*p = end; *p = end;
dout("mdsmap_decode success epoch %u\n", m->m_epoch); dout("mdsmap_decode success epoch %u\n", m->m_epoch);
return m; return m;
nomem:
badmem:
err = -ENOMEM; err = -ENOMEM;
goto out_err;
bad: bad:
pr_err("corrupt mdsmap\n"); pr_err("corrupt mdsmap\n");
print_hex_dump(KERN_DEBUG, "mdsmap: ", print_hex_dump(KERN_DEBUG, "mdsmap: ",
DUMP_PREFIX_OFFSET, 16, 1, DUMP_PREFIX_OFFSET, 16, 1,
start, end - start, true); start, end - start, true);
out_err:
ceph_mdsmap_destroy(m); ceph_mdsmap_destroy(m);
return ERR_PTR(err); return ERR_PTR(err);
} }
...@@ -212,3 +347,19 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m) ...@@ -212,3 +347,19 @@ void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
kfree(m->m_data_pg_pools); kfree(m->m_data_pg_pools);
kfree(m); kfree(m);
} }
bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m)
{
int i, nr_active = 0;
if (!m->m_enabled)
return false;
if (m->m_damaged)
return false;
if (m->m_num_laggy > 0)
return false;
for (i = 0; i < m->m_max_mds; i++) {
if (m->m_info[i].state == CEPH_MDS_STATE_ACTIVE)
nr_active++;
}
return nr_active > 0;
}
...@@ -137,6 +137,8 @@ enum { ...@@ -137,6 +137,8 @@ enum {
Opt_nofscache, Opt_nofscache,
Opt_poolperm, Opt_poolperm,
Opt_nopoolperm, Opt_nopoolperm,
Opt_require_active_mds,
Opt_norequire_active_mds,
#ifdef CONFIG_CEPH_FS_POSIX_ACL #ifdef CONFIG_CEPH_FS_POSIX_ACL
Opt_acl, Opt_acl,
#endif #endif
...@@ -171,6 +173,8 @@ static match_table_t fsopt_tokens = { ...@@ -171,6 +173,8 @@ static match_table_t fsopt_tokens = {
{Opt_nofscache, "nofsc"}, {Opt_nofscache, "nofsc"},
{Opt_poolperm, "poolperm"}, {Opt_poolperm, "poolperm"},
{Opt_nopoolperm, "nopoolperm"}, {Opt_nopoolperm, "nopoolperm"},
{Opt_require_active_mds, "require_active_mds"},
{Opt_norequire_active_mds, "norequire_active_mds"},
#ifdef CONFIG_CEPH_FS_POSIX_ACL #ifdef CONFIG_CEPH_FS_POSIX_ACL
{Opt_acl, "acl"}, {Opt_acl, "acl"},
#endif #endif
...@@ -287,6 +291,12 @@ static int parse_fsopt_token(char *c, void *private) ...@@ -287,6 +291,12 @@ static int parse_fsopt_token(char *c, void *private)
case Opt_nopoolperm: case Opt_nopoolperm:
fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM; fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM;
break; break;
case Opt_require_active_mds:
fsopt->flags &= ~CEPH_MOUNT_OPT_MOUNTWAIT;
break;
case Opt_norequire_active_mds:
fsopt->flags |= CEPH_MOUNT_OPT_MOUNTWAIT;
break;
#ifdef CONFIG_CEPH_FS_POSIX_ACL #ifdef CONFIG_CEPH_FS_POSIX_ACL
case Opt_acl: case Opt_acl:
fsopt->sb_flags |= MS_POSIXACL; fsopt->sb_flags |= MS_POSIXACL;
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
#define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */ #define CEPH_MOUNT_OPT_DCACHE (1<<9) /* use dcache for readdir etc */
#define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */ #define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */
#define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */ #define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */
#define CEPH_MOUNT_OPT_MOUNTWAIT (1<<12) /* mount waits if no mds is up */
#define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE #define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE
......
...@@ -31,6 +31,10 @@ struct ceph_mdsmap { ...@@ -31,6 +31,10 @@ struct ceph_mdsmap {
int m_num_data_pg_pools; int m_num_data_pg_pools;
u64 *m_data_pg_pools; u64 *m_data_pg_pools;
u64 m_cas_pg_pool; u64 m_cas_pg_pool;
bool m_enabled;
bool m_damaged;
int m_num_laggy;
}; };
static inline struct ceph_entity_addr * static inline struct ceph_entity_addr *
...@@ -59,5 +63,6 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) ...@@ -59,5 +63,6 @@ static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w)
extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m);
extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end);
extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m);
extern bool ceph_mdsmap_is_cluster_available(struct ceph_mdsmap *m);
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment