Commit 61f68816 authored by Yan, Zheng's avatar Yan, Zheng Committed by Ilya Dryomov

ceph: check caps in filemap_fault and page_mkwrite

Adds cap check to the page fault handler. The check prevents page
fault handler from adding new page to the page cache while Fcb caps
are being revoked. This solves Fc revoking hang in multiple clients
mmap IO workload.
Signed-off-by: default avatarYan, Zheng <zheng.z.yan@intel.com>
Reviewed-by: default avatarSage Weil <sage@inktank.com>
parent 9b60e70b
...@@ -1210,6 +1210,41 @@ const struct address_space_operations ceph_aops = { ...@@ -1210,6 +1210,41 @@ const struct address_space_operations ceph_aops = {
/* /*
* vm ops * vm ops
*/ */
static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct inode *inode = file_inode(vma->vm_file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
int want, got, ret;
dout("filemap_fault %p %llx.%llx %llu~%zd trying to get caps\n",
inode, ceph_vinop(inode), off, PAGE_CACHE_SIZE);
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_CACHE;
while (1) {
got = 0;
ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
if (ret == 0)
break;
if (ret != -ERESTARTSYS) {
WARN_ON(1);
return VM_FAULT_SIGBUS;
}
}
dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
inode, off, PAGE_CACHE_SIZE, ceph_cap_string(got));
ret = filemap_fault(vma, vmf);
dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
inode, off, PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
ceph_put_cap_refs(ci, got);
return ret;
}
/* /*
* Reuse write_begin here for simplicity. * Reuse write_begin here for simplicity.
...@@ -1217,23 +1252,41 @@ const struct address_space_operations ceph_aops = { ...@@ -1217,23 +1252,41 @@ const struct address_space_operations ceph_aops = {
static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{ {
struct inode *inode = file_inode(vma->vm_file); struct inode *inode = file_inode(vma->vm_file);
struct page *page = vmf->page; struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct page *page = vmf->page;
loff_t off = page_offset(page); loff_t off = page_offset(page);
loff_t size, len; loff_t size = i_size_read(inode);
int ret; size_t len;
int want, got, ret;
/* Update time before taking page lock */
file_update_time(vma->vm_file);
size = i_size_read(inode);
if (off + PAGE_CACHE_SIZE <= size) if (off + PAGE_CACHE_SIZE <= size)
len = PAGE_CACHE_SIZE; len = PAGE_CACHE_SIZE;
else else
len = size & ~PAGE_CACHE_MASK; len = size & ~PAGE_CACHE_MASK;
dout("page_mkwrite %p %llu~%llu page %p idx %lu\n", inode, dout("page_mkwrite %p %llx.%llx %llu~%zd getting caps i_size %llu\n",
off, len, page, page->index); inode, ceph_vinop(inode), off, len, size);
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_BUFFER;
while (1) {
got = 0;
ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len);
if (ret == 0)
break;
if (ret != -ERESTARTSYS) {
WARN_ON(1);
return VM_FAULT_SIGBUS;
}
}
dout("page_mkwrite %p %llu~%zd got cap refs on %s\n",
inode, off, len, ceph_cap_string(got));
/* Update time before taking page lock */
file_update_time(vma->vm_file);
lock_page(page); lock_page(page);
...@@ -1255,14 +1308,26 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -1255,14 +1308,26 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = VM_FAULT_SIGBUS; ret = VM_FAULT_SIGBUS;
} }
out: out:
dout("page_mkwrite %p %llu~%llu = %d\n", inode, off, len, ret); if (ret != VM_FAULT_LOCKED) {
if (ret != VM_FAULT_LOCKED)
unlock_page(page); unlock_page(page);
} else {
int dirty;
spin_lock(&ci->i_ceph_lock);
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
spin_unlock(&ci->i_ceph_lock);
if (dirty)
__mark_inode_dirty(inode, dirty);
}
dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %d\n",
inode, off, len, ceph_cap_string(got), ret);
ceph_put_cap_refs(ci, got);
return ret; return ret;
} }
static struct vm_operations_struct ceph_vmops = { static struct vm_operations_struct ceph_vmops = {
.fault = filemap_fault, .fault = ceph_filemap_fault,
.page_mkwrite = ceph_page_mkwrite, .page_mkwrite = ceph_page_mkwrite,
.remap_pages = generic_file_remap_pages, .remap_pages = generic_file_remap_pages,
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment