Commit e9964c10 authored by Sage Weil's avatar Sage Weil

ceph: fix flush_dirty_caps race with caps migration

The flush_dirty_caps() used to loop over the first entry of the cap_dirty
dirty list on the assumption that after calling ceph_check_caps() it would
be removed from the list.  This isn't true for caps that are being
migrated between MDSs, where we've received the EXPORT but not the IMPORT.

Instead, do a safe list iteration, and pin the next inode on the list via
the CEPH_I_NOFLUSH flag.
Signed-off-by: default avatarSage Weil <sage@newdream.net>
parent 7af8f1e4
...@@ -1573,6 +1573,11 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, ...@@ -1573,6 +1573,11 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
} }
ack: ack:
if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
dout(" skipping %p I_NOFLUSH set\n", inode);
continue;
}
if (session && session != cap->session) { if (session && session != cap->session) {
dout("oops, wrong session %p mutex\n", session); dout("oops, wrong session %p mutex\n", session);
mutex_unlock(&session->s_mutex); mutex_unlock(&session->s_mutex);
...@@ -1652,6 +1657,10 @@ static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, ...@@ -1652,6 +1657,10 @@ static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
retry: retry:
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
goto out;
}
if (ci->i_dirty_caps && ci->i_auth_cap) { if (ci->i_dirty_caps && ci->i_auth_cap) {
struct ceph_cap *cap = ci->i_auth_cap; struct ceph_cap *cap = ci->i_auth_cap;
int used = __ceph_caps_used(ci); int used = __ceph_caps_used(ci);
...@@ -2747,16 +2756,38 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) ...@@ -2747,16 +2756,38 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
*/ */
void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
{ {
struct ceph_inode_info *ci; struct ceph_inode_info *ci, *nci = NULL;
struct inode *inode; struct inode *inode, *ninode = NULL;
struct list_head *p, *n;
dout("flush_dirty_caps\n"); dout("flush_dirty_caps\n");
spin_lock(&mdsc->cap_dirty_lock); spin_lock(&mdsc->cap_dirty_lock);
while (!list_empty(&mdsc->cap_dirty)) { list_for_each_safe(p, n, &mdsc->cap_dirty) {
ci = list_first_entry(&mdsc->cap_dirty, if (nci) {
struct ceph_inode_info, ci = nci;
inode = ninode;
ci->i_ceph_flags &= ~CEPH_I_NOFLUSH;
dout("flush_dirty_caps inode %p (was next inode)\n",
inode);
} else {
ci = list_entry(p, struct ceph_inode_info,
i_dirty_item); i_dirty_item);
inode = igrab(&ci->vfs_inode); inode = igrab(&ci->vfs_inode);
BUG_ON(!inode);
dout("flush_dirty_caps inode %p\n", inode);
}
if (n != &mdsc->cap_dirty) {
nci = list_entry(n, struct ceph_inode_info,
i_dirty_item);
ninode = igrab(&nci->vfs_inode);
BUG_ON(!ninode);
nci->i_ceph_flags |= CEPH_I_NOFLUSH;
dout("flush_dirty_caps next inode %p, noflush\n",
ninode);
} else {
nci = NULL;
ninode = NULL;
}
spin_unlock(&mdsc->cap_dirty_lock); spin_unlock(&mdsc->cap_dirty_lock);
if (inode) { if (inode) {
ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH,
......
...@@ -289,6 +289,7 @@ struct ceph_inode_xattrs_info { ...@@ -289,6 +289,7 @@ struct ceph_inode_xattrs_info {
#define CEPH_I_COMPLETE 1 /* we have complete directory cached */ #define CEPH_I_COMPLETE 1 /* we have complete directory cached */
#define CEPH_I_NODELAY 4 /* do not delay cap release */ #define CEPH_I_NODELAY 4 /* do not delay cap release */
#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ #define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
struct ceph_inode_info { struct ceph_inode_info {
struct ceph_vino i_vino; /* ceph ino + snap */ struct ceph_vino i_vino; /* ceph ino + snap */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment