Commit 0e294387 authored by Yan, Zheng's avatar Yan, Zheng Committed by Ilya Dryomov

ceph: unify cap flush and snapcap flush

This patch includes following changes
- Assign flush tid to snapcap flush
- Remove session's s_cap_snaps_flushing list. Add inode to session's
  s_cap_flushing list instead. Inode is removed from the list when
  there is no pending snapcap flush or cap flush.
- make __kick_flushing_caps() re-send both snapcap flushes and cap
  flushes.
Signed-off-by: default avatarYan, Zheng <zyan@redhat.com>
parent e4500b5e
This diff is collapsed.
...@@ -472,7 +472,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, ...@@ -472,7 +472,6 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
s->s_cap_iterator = NULL; s->s_cap_iterator = NULL;
INIT_LIST_HEAD(&s->s_cap_releases); INIT_LIST_HEAD(&s->s_cap_releases);
INIT_LIST_HEAD(&s->s_cap_flushing); INIT_LIST_HEAD(&s->s_cap_flushing);
INIT_LIST_HEAD(&s->s_cap_snaps_flushing);
dout("register_session mds%d\n", mds); dout("register_session mds%d\n", mds);
if (mds >= mdsc->max_sessions) { if (mds >= mdsc->max_sessions) {
...@@ -1479,21 +1478,6 @@ static int trim_caps(struct ceph_mds_client *mdsc, ...@@ -1479,21 +1478,6 @@ static int trim_caps(struct ceph_mds_client *mdsc,
return 0; return 0;
} }
static int check_capsnap_flush(struct ceph_inode_info *ci,
u64 want_snap_seq)
{
int ret = 1;
spin_lock(&ci->i_ceph_lock);
if (want_snap_seq > 0 && !list_empty(&ci->i_cap_snaps)) {
struct ceph_cap_snap *capsnap =
list_first_entry(&ci->i_cap_snaps,
struct ceph_cap_snap, ci_item);
ret = capsnap->follows >= want_snap_seq;
}
spin_unlock(&ci->i_ceph_lock);
return ret;
}
static int check_caps_flush(struct ceph_mds_client *mdsc, static int check_caps_flush(struct ceph_mds_client *mdsc,
u64 want_flush_tid) u64 want_flush_tid)
{ {
...@@ -1520,54 +1504,9 @@ static int check_caps_flush(struct ceph_mds_client *mdsc, ...@@ -1520,54 +1504,9 @@ static int check_caps_flush(struct ceph_mds_client *mdsc,
* returns true if we've flushed through want_flush_tid * returns true if we've flushed through want_flush_tid
*/ */
static void wait_caps_flush(struct ceph_mds_client *mdsc, static void wait_caps_flush(struct ceph_mds_client *mdsc,
u64 want_flush_tid, u64 want_snap_seq) u64 want_flush_tid)
{ {
int mds; dout("check_caps_flush want %llu\n", want_flush_tid);
dout("check_caps_flush want %llu snap want %llu\n",
want_flush_tid, want_snap_seq);
mutex_lock(&mdsc->mutex);
for (mds = 0; mds < mdsc->max_sessions; ) {
struct ceph_mds_session *session = mdsc->sessions[mds];
struct inode *inode = NULL;
if (!session) {
mds++;
continue;
}
get_session(session);
mutex_unlock(&mdsc->mutex);
mutex_lock(&session->s_mutex);
if (!list_empty(&session->s_cap_snaps_flushing)) {
struct ceph_cap_snap *capsnap =
list_first_entry(&session->s_cap_snaps_flushing,
struct ceph_cap_snap,
flushing_item);
struct ceph_inode_info *ci = capsnap->ci;
if (!check_capsnap_flush(ci, want_snap_seq)) {
dout("check_cap_flush still flushing snap %p "
"follows %lld <= %lld to mds%d\n",
&ci->vfs_inode, capsnap->follows,
want_snap_seq, mds);
inode = igrab(&ci->vfs_inode);
}
}
mutex_unlock(&session->s_mutex);
ceph_put_mds_session(session);
if (inode) {
wait_event(mdsc->cap_flushing_wq,
check_capsnap_flush(ceph_inode(inode),
want_snap_seq));
iput(inode);
} else {
mds++;
}
mutex_lock(&mdsc->mutex);
}
mutex_unlock(&mdsc->mutex);
wait_event(mdsc->cap_flushing_wq, wait_event(mdsc->cap_flushing_wq,
check_caps_flush(mdsc, want_flush_tid)); check_caps_flush(mdsc, want_flush_tid));
...@@ -3584,7 +3523,7 @@ static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid) ...@@ -3584,7 +3523,7 @@ static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
void ceph_mdsc_sync(struct ceph_mds_client *mdsc) void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
{ {
u64 want_tid, want_flush, want_snap; u64 want_tid, want_flush;
if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) if (ACCESS_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
return; return;
...@@ -3599,15 +3538,11 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) ...@@ -3599,15 +3538,11 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
want_flush = mdsc->last_cap_flush_tid; want_flush = mdsc->last_cap_flush_tid;
spin_unlock(&mdsc->cap_dirty_lock); spin_unlock(&mdsc->cap_dirty_lock);
down_read(&mdsc->snap_rwsem); dout("sync want tid %lld flush_seq %lld\n",
want_snap = mdsc->last_snap_seq; want_tid, want_flush);
up_read(&mdsc->snap_rwsem);
dout("sync want tid %lld flush_seq %lld snap_seq %lld\n",
want_tid, want_flush, want_snap);
wait_unsafe_requests(mdsc, want_tid); wait_unsafe_requests(mdsc, want_tid);
wait_caps_flush(mdsc, want_flush, want_snap); wait_caps_flush(mdsc, want_flush);
} }
/* /*
......
...@@ -152,7 +152,6 @@ struct ceph_mds_session { ...@@ -152,7 +152,6 @@ struct ceph_mds_session {
/* protected by mutex */ /* protected by mutex */
struct list_head s_cap_flushing; /* inodes w/ flushing caps */ struct list_head s_cap_flushing; /* inodes w/ flushing caps */
struct list_head s_cap_snaps_flushing;
unsigned long s_renew_requested; /* last time we sent a renew req */ unsigned long s_renew_requested; /* last time we sent a renew req */
u64 s_renew_seq; u64 s_renew_seq;
......
...@@ -520,9 +520,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) ...@@ -520,9 +520,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
ihold(inode); ihold(inode);
atomic_set(&capsnap->nref, 1); atomic_set(&capsnap->nref, 1);
capsnap->ci = ci;
INIT_LIST_HEAD(&capsnap->ci_item); INIT_LIST_HEAD(&capsnap->ci_item);
INIT_LIST_HEAD(&capsnap->flushing_item);
capsnap->follows = old_snapc->seq; capsnap->follows = old_snapc->seq;
capsnap->issued = __ceph_caps_issued(ci, NULL); capsnap->issued = __ceph_caps_issued(ci, NULL);
...@@ -800,7 +798,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc) ...@@ -800,7 +798,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
ihold(inode); ihold(inode);
spin_unlock(&mdsc->snap_flush_lock); spin_unlock(&mdsc->snap_flush_lock);
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
__ceph_flush_snaps(ci, &session, 0); __ceph_flush_snaps(ci, &session);
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
iput(inode); iput(inode);
spin_lock(&mdsc->snap_flush_lock); spin_lock(&mdsc->snap_flush_lock);
......
...@@ -147,6 +147,13 @@ struct ceph_cap { ...@@ -147,6 +147,13 @@ struct ceph_cap {
#define CHECK_CAPS_AUTHONLY 2 /* only check auth cap */ #define CHECK_CAPS_AUTHONLY 2 /* only check auth cap */
#define CHECK_CAPS_FLUSH 4 /* flush any dirty caps */ #define CHECK_CAPS_FLUSH 4 /* flush any dirty caps */
struct ceph_cap_flush {
u64 tid;
int caps; /* 0 means capsnap */
struct list_head g_list; // global
struct list_head i_list; // per inode
};
/* /*
* Snapped cap state that is pending flush to mds. When a snapshot occurs, * Snapped cap state that is pending flush to mds. When a snapshot occurs,
* we first complete any in-process sync writes and writeback any dirty * we first complete any in-process sync writes and writeback any dirty
...@@ -154,10 +161,11 @@ struct ceph_cap { ...@@ -154,10 +161,11 @@ struct ceph_cap {
*/ */
struct ceph_cap_snap { struct ceph_cap_snap {
atomic_t nref; atomic_t nref;
struct ceph_inode_info *ci; struct list_head ci_item;
struct list_head ci_item, flushing_item;
u64 follows, flush_tid; struct ceph_cap_flush cap_flush;
u64 follows;
int issued, dirty; int issued, dirty;
struct ceph_snap_context *context; struct ceph_snap_context *context;
...@@ -186,13 +194,6 @@ static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap) ...@@ -186,13 +194,6 @@ static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
} }
} }
struct ceph_cap_flush {
u64 tid;
int caps;
struct list_head g_list; // global
struct list_head i_list; // per inode
};
/* /*
* The frag tree describes how a directory is fragmented, potentially across * The frag tree describes how a directory is fragmented, potentially across
* multiple metadata servers. It is also used to indicate points where * multiple metadata servers. It is also used to indicate points where
...@@ -888,8 +889,7 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); ...@@ -888,8 +889,7 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
struct ceph_snap_context *snapc); struct ceph_snap_context *snapc);
extern void __ceph_flush_snaps(struct ceph_inode_info *ci, extern void __ceph_flush_snaps(struct ceph_inode_info *ci,
struct ceph_mds_session **psession, struct ceph_mds_session **psession);
int again);
extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct ceph_mds_session *session); struct ceph_mds_session *session);
extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment