Commit 37505d57 authored by Yan, Zheng's avatar Yan, Zheng Committed by Sage Weil

ceph: take i_mutex before getting Fw cap

There is deadlock as illustrated bellow. The fix is taking i_mutex
before getting Fw cap reference.

      write                    truncate                 MDS
---------------------     --------------------      --------------
get Fw cap
                          lock i_mutex
lock i_mutex (blocked)
                          request setattr.size  ->
                                                <-   revoke Fw cap
Signed-off-by: default avatarYan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: default avatarAlex Elder <elder@inktank.com>
Reviewed-by: default avatarSage Weil <sage@inktank.com>
parent 26be8808
...@@ -2052,6 +2052,13 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, ...@@ -2052,6 +2052,13 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
goto out; goto out;
} }
/* finish pending truncate */
while (ci->i_truncate_pending) {
spin_unlock(&ci->i_ceph_lock);
__ceph_do_pending_vmtruncate(inode, !(need & CEPH_CAP_FILE_WR));
spin_lock(&ci->i_ceph_lock);
}
if (need & CEPH_CAP_FILE_WR) { if (need & CEPH_CAP_FILE_WR) {
if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) { if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
dout("get_cap_refs %p endoff %llu > maxsize %llu\n", dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
...@@ -2073,12 +2080,6 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want, ...@@ -2073,12 +2080,6 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
} }
have = __ceph_caps_issued(ci, &implemented); have = __ceph_caps_issued(ci, &implemented);
/*
* disallow writes while a truncate is pending
*/
if (ci->i_truncate_pending)
have &= ~CEPH_CAP_FILE_WR;
if ((have & need) == need) { if ((have & need) == need) {
/* /*
* Look at (implemented & ~have & not) so that we keep waiting * Look at (implemented & ~have & not) so that we keep waiting
......
...@@ -651,7 +651,6 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, ...@@ -651,7 +651,6 @@ static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov,
dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n", dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
inode, ceph_vinop(inode), pos, (unsigned)len, inode); inode, ceph_vinop(inode), pos, (unsigned)len, inode);
again: again:
__ceph_do_pending_vmtruncate(inode, true);
if (fi->fmode & CEPH_FILE_MODE_LAZY) if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO; want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
else else
...@@ -728,7 +727,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -728,7 +727,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
ret = -ENOSPC; ret = -ENOSPC;
goto out; goto out;
} }
__ceph_do_pending_vmtruncate(inode, true); mutex_lock(&inode->i_mutex);
dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n", dout("aio_write %p %llx.%llx %llu~%u getting caps. i_size %llu\n",
inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
inode->i_size); inode->i_size);
...@@ -737,8 +736,10 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -737,8 +736,10 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
else else
want = CEPH_CAP_FILE_BUFFER; want = CEPH_CAP_FILE_BUFFER;
ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
if (ret < 0) if (ret < 0) {
goto out_put; mutex_unlock(&inode->i_mutex);
goto out;
}
dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n",
inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
...@@ -748,10 +749,10 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -748,10 +749,10 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
(iocb->ki_filp->f_flags & O_DIRECT) || (iocb->ki_filp->f_flags & O_DIRECT) ||
(inode->i_sb->s_flags & MS_SYNCHRONOUS) || (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
(fi->flags & CEPH_F_SYNC)) { (fi->flags & CEPH_F_SYNC)) {
mutex_unlock(&inode->i_mutex);
ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, ret = ceph_sync_write(file, iov->iov_base, iov->iov_len,
&iocb->ki_pos); &iocb->ki_pos);
} else { } else {
mutex_lock(&inode->i_mutex);
ret = __generic_file_aio_write(iocb, iov, nr_segs, ret = __generic_file_aio_write(iocb, iov, nr_segs,
&iocb->ki_pos); &iocb->ki_pos);
mutex_unlock(&inode->i_mutex); mutex_unlock(&inode->i_mutex);
...@@ -766,7 +767,6 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, ...@@ -766,7 +767,6 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov,
__mark_inode_dirty(inode, dirty); __mark_inode_dirty(inode, dirty);
} }
out_put:
dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n",
inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
ceph_cap_string(got)); ceph_cap_string(got));
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment