Commit 685f9a5d authored by Sage Weil's avatar Sage Weil

ceph: do not confuse stale and dead (unreconnected) caps

We were using the cap_gen to track both stale caps (caps that timed out
due to temporarily losing touch with the mds) and dead caps that did not
reconnect after an MDS failure.  Introduce a recon_gen counter to track
reconnections to restarted MDSs and kill dead caps based on that instead.

Rename gen to cap_gen while we're at it to make it more clear which is
which.
Signed-off-by: default avatarSage Weil <sage@newdream.net>
parent fb690390
...@@ -609,7 +609,8 @@ int ceph_add_cap(struct inode *inode, ...@@ -609,7 +609,8 @@ int ceph_add_cap(struct inode *inode,
cap->seq = seq; cap->seq = seq;
cap->issue_seq = seq; cap->issue_seq = seq;
cap->mseq = mseq; cap->mseq = mseq;
cap->gen = session->s_cap_gen; cap->cap_gen = session->s_cap_gen;
cap->recon_gen = session->s_recon_gen;
if (fmode >= 0) if (fmode >= 0)
__ceph_get_fmode(ci, fmode); __ceph_get_fmode(ci, fmode);
...@@ -626,17 +627,25 @@ int ceph_add_cap(struct inode *inode, ...@@ -626,17 +627,25 @@ int ceph_add_cap(struct inode *inode,
static int __cap_is_valid(struct ceph_cap *cap) static int __cap_is_valid(struct ceph_cap *cap)
{ {
unsigned long ttl; unsigned long ttl;
u32 gen; u32 gen, recon_gen;
spin_lock(&cap->session->s_cap_lock); spin_lock(&cap->session->s_cap_lock);
gen = cap->session->s_cap_gen; gen = cap->session->s_cap_gen;
recon_gen = cap->session->s_recon_gen;
ttl = cap->session->s_cap_ttl; ttl = cap->session->s_cap_ttl;
spin_unlock(&cap->session->s_cap_lock); spin_unlock(&cap->session->s_cap_lock);
if (cap->gen < gen || time_after_eq(jiffies, ttl)) { if (cap->recon_gen != recon_gen) {
dout("__cap_is_valid %p cap %p issued %s "
"but DEAD (recon_gen %u vs %u)\n", &cap->ci->vfs_inode,
cap, ceph_cap_string(cap->issued), cap->recon_gen,
recon_gen);
return 0;
}
if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) {
dout("__cap_is_valid %p cap %p issued %s " dout("__cap_is_valid %p cap %p issued %s "
"but STALE (gen %u vs %u)\n", &cap->ci->vfs_inode, "but STALE (gen %u vs %u)\n", &cap->ci->vfs_inode,
cap, ceph_cap_string(cap->issued), cap->gen, gen); cap, ceph_cap_string(cap->issued), cap->cap_gen, gen);
return 0; return 0;
} }
...@@ -2203,7 +2212,8 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, ...@@ -2203,7 +2212,8 @@ static int handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
issued = __ceph_caps_issued(ci, &implemented); issued = __ceph_caps_issued(ci, &implemented);
issued |= implemented | __ceph_caps_dirty(ci); issued |= implemented | __ceph_caps_dirty(ci);
cap->gen = session->s_cap_gen; cap->cap_gen = session->s_cap_gen;
cap->recon_gen = session->s_recon_gen;
__check_cap_issue(ci, cap, newcaps); __check_cap_issue(ci, cap, newcaps);
......
...@@ -329,6 +329,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, ...@@ -329,6 +329,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
ceph_con_open(&s->s_con, ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); ceph_con_open(&s->s_con, ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
spin_lock_init(&s->s_cap_lock); spin_lock_init(&s->s_cap_lock);
s->s_recon_gen = 0;
s->s_cap_gen = 0; s->s_cap_gen = 0;
s->s_cap_ttl = 0; s->s_cap_ttl = 0;
s->s_renew_requested = 0; s->s_renew_requested = 0;
...@@ -738,10 +739,11 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap, ...@@ -738,10 +739,11 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
struct ceph_mds_session *session = arg; struct ceph_mds_session *session = arg;
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if (cap->gen != session->s_cap_gen) { if (cap->recon_gen != session->s_recon_gen) {
pr_err("failed reconnect %p %llx.%llx cap %p " pr_err("failed reconnect %p %llx.%llx cap %p "
"(gen %d < session %d)\n", inode, ceph_vinop(inode), "(recon_gen %d < session %d)\n", inode,
cap, cap->gen, session->s_cap_gen); ceph_vinop(inode), cap,
cap->recon_gen, session->s_recon_gen);
__ceph_remove_cap(cap, NULL); __ceph_remove_cap(cap, NULL);
} }
wake_up(&ceph_inode(inode)->i_cap_wq); wake_up(&ceph_inode(inode)->i_cap_wq);
...@@ -2050,6 +2052,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds) ...@@ -2050,6 +2052,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc, int mds)
session->s_state = CEPH_MDS_SESSION_RECONNECTING; session->s_state = CEPH_MDS_SESSION_RECONNECTING;
session->s_seq = 0; session->s_seq = 0;
session->s_recon_gen++;
ceph_con_open(&session->s_con, ceph_con_open(&session->s_con,
ceph_mdsmap_get_addr(mdsc->mdsmap, mds)); ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
......
...@@ -98,6 +98,8 @@ struct ceph_mds_session { ...@@ -98,6 +98,8 @@ struct ceph_mds_session {
u64 s_seq; /* incoming msg seq # */ u64 s_seq; /* incoming msg seq # */
struct mutex s_mutex; /* serialize session messages */ struct mutex s_mutex; /* serialize session messages */
int s_recon_gen; /* inc on reconnect to recovered mds */
struct ceph_connection s_con; struct ceph_connection s_con;
/* protected by s_cap_lock */ /* protected by s_cap_lock */
......
...@@ -169,7 +169,9 @@ struct ceph_cap { ...@@ -169,7 +169,9 @@ struct ceph_cap {
int issued; /* latest, from the mds */ int issued; /* latest, from the mds */
int implemented; /* implemented superset of issued (for revocation) */ int implemented; /* implemented superset of issued (for revocation) */
int mds_wanted; int mds_wanted;
u32 seq, issue_seq, mseq, gen; u32 seq, issue_seq, mseq;
u32 cap_gen; /* active/stale cycle */
u32 recon_gen; /* mds restart reconnect cycle */
unsigned long last_used; unsigned long last_used;
struct list_head caps_item; struct list_head caps_item;
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment