Commit 083db6fd authored by David Howells's avatar David Howells Committed by Ilya Dryomov

ceph: uninline the data on a file opened for writing

If a ceph file is made up of inline data, uninline that in the ceph_open()
rather than in ceph_page_mkwrite(), ceph_write_iter(), ceph_fallocate() or
ceph_write_begin().

This makes it easier to convert to using the netfs library for VM write
hooks.

Should this also take the inode lock for the duration on uninlining to
prevent a race with truncation?

[ jlayton: fix up folio locking, update i_inline_version after write ]
Signed-off-by: default avatarDavid Howells <dhowells@redhat.com>
Signed-off-by: default avatarJeff Layton <jlayton@kernel.org>
Signed-off-by: default avatarIlya Dryomov <idryomov@gmail.com>
parent 5b19f1eb
...@@ -1317,45 +1317,11 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, ...@@ -1317,45 +1317,11 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
struct page **pagep, void **fsdata) struct page **pagep, void **fsdata)
{ {
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct folio *folio = NULL; struct folio *folio = NULL;
pgoff_t index = pos >> PAGE_SHIFT;
int r; int r;
/*
* Uninlining should have already been done and everything updated, EXCEPT
* for inline_version sent to the MDS.
*/
if (ci->i_inline_version != CEPH_INLINE_NONE) {
unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
if (aop_flags & AOP_FLAG_NOFS)
fgp_flags |= FGP_NOFS;
folio = __filemap_get_folio(mapping, index, fgp_flags,
mapping_gfp_mask(mapping));
if (!folio)
return -ENOMEM;
/*
* The inline_version on a new inode is set to 1. If that's the
* case, then the folio is brand new and isn't yet Uptodate.
*/
r = 0;
if (index == 0 && ci->i_inline_version != 1) {
if (!folio_test_uptodate(folio)) {
WARN_ONCE(1, "ceph: write_begin called on still-inlined inode (inline_version %llu)!\n",
ci->i_inline_version);
r = -EINVAL;
}
goto out;
}
zero_user_segment(&folio->page, 0, folio_size(folio));
folio_mark_uptodate(folio);
goto out;
}
r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &folio, NULL, r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &folio, NULL,
&ceph_netfs_read_ops, NULL); &ceph_netfs_read_ops, NULL);
out:
if (r == 0) if (r == 0)
folio_wait_fscache(folio); folio_wait_fscache(folio);
if (r < 0) { if (r < 0) {
...@@ -1551,19 +1517,6 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) ...@@ -1551,19 +1517,6 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
sb_start_pagefault(inode->i_sb); sb_start_pagefault(inode->i_sb);
ceph_block_sigs(&oldset); ceph_block_sigs(&oldset);
if (ci->i_inline_version != CEPH_INLINE_NONE) {
struct page *locked_page = NULL;
if (off == 0) {
lock_page(page);
locked_page = page;
}
err = ceph_uninline_data(vma->vm_file, locked_page);
if (locked_page)
unlock_page(locked_page);
if (err < 0)
goto out_free;
}
if (off + thp_size(page) <= size) if (off + thp_size(page) <= size)
len = thp_size(page); len = thp_size(page);
else else
...@@ -1620,11 +1573,9 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf) ...@@ -1620,11 +1573,9 @@ static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
ceph_put_snap_context(snapc); ceph_put_snap_context(snapc);
} while (err == 0); } while (err == 0);
if (ret == VM_FAULT_LOCKED || if (ret == VM_FAULT_LOCKED) {
ci->i_inline_version != CEPH_INLINE_NONE) {
int dirty; int dirty;
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
&prealloc_cf); &prealloc_cf);
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
...@@ -1688,16 +1639,29 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page, ...@@ -1688,16 +1639,29 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
} }
} }
int ceph_uninline_data(struct file *filp, struct page *locked_page) int ceph_uninline_data(struct file *file)
{ {
struct inode *inode = file_inode(filp); struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_osd_request *req; struct ceph_osd_request *req;
struct page *page = NULL; struct ceph_cap_flush *prealloc_cf;
struct folio *folio = NULL;
struct page *pages[1];
u64 len, inline_version; u64 len, inline_version;
int err = 0; int err = 0;
bool from_pagecache = false;
prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
return -ENOMEM;
folio = read_mapping_folio(inode->i_mapping, 0, file);
if (IS_ERR(folio)) {
err = PTR_ERR(folio);
goto out;
}
folio_lock(folio);
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
inline_version = ci->i_inline_version; inline_version = ci->i_inline_version;
...@@ -1708,45 +1672,11 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) ...@@ -1708,45 +1672,11 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
if (inline_version == 1 || /* initial version, no data */ if (inline_version == 1 || /* initial version, no data */
inline_version == CEPH_INLINE_NONE) inline_version == CEPH_INLINE_NONE)
goto out; goto out_unlock;
if (locked_page) {
page = locked_page;
WARN_ON(!PageUptodate(page));
} else if (ceph_caps_issued(ci) &
(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) {
page = find_get_page(inode->i_mapping, 0);
if (page) {
if (PageUptodate(page)) {
from_pagecache = true;
lock_page(page);
} else {
put_page(page);
page = NULL;
}
}
}
if (page) { len = i_size_read(inode);
len = i_size_read(inode); if (len > folio_size(folio))
if (len > PAGE_SIZE) len = folio_size(folio);
len = PAGE_SIZE;
} else {
page = __page_cache_alloc(GFP_NOFS);
if (!page) {
err = -ENOMEM;
goto out;
}
err = __ceph_do_getattr(inode, page,
CEPH_STAT_CAP_INLINE_DATA, true);
if (err < 0) {
/* no inline data */
if (err == -ENODATA)
err = 0;
goto out;
}
len = err;
}
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode), 0, &len, 0, 1, ceph_vino(inode), 0, &len, 0, 1,
...@@ -1754,7 +1684,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) ...@@ -1754,7 +1684,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
NULL, 0, 0, false); NULL, 0, 0, false);
if (IS_ERR(req)) { if (IS_ERR(req)) {
err = PTR_ERR(req); err = PTR_ERR(req);
goto out; goto out_unlock;
} }
req->r_mtime = inode->i_mtime; req->r_mtime = inode->i_mtime;
...@@ -1763,7 +1693,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) ...@@ -1763,7 +1693,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
err = ceph_osdc_wait_request(&fsc->client->osdc, req); err = ceph_osdc_wait_request(&fsc->client->osdc, req);
ceph_osdc_put_request(req); ceph_osdc_put_request(req);
if (err < 0) if (err < 0)
goto out; goto out_unlock;
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode), 0, &len, 1, 3, ceph_vino(inode), 0, &len, 1, 3,
...@@ -1772,10 +1702,11 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) ...@@ -1772,10 +1702,11 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
ci->i_truncate_size, false); ci->i_truncate_size, false);
if (IS_ERR(req)) { if (IS_ERR(req)) {
err = PTR_ERR(req); err = PTR_ERR(req);
goto out; goto out_unlock;
} }
osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false); pages[0] = folio_page(folio, 0);
osd_req_op_extent_osd_data_pages(req, 1, pages, len, 0, false, false);
{ {
__le64 xattr_buf = cpu_to_le64(inline_version); __le64 xattr_buf = cpu_to_le64(inline_version);
...@@ -1785,7 +1716,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) ...@@ -1785,7 +1716,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
CEPH_OSD_CMPXATTR_OP_GT, CEPH_OSD_CMPXATTR_OP_GT,
CEPH_OSD_CMPXATTR_MODE_U64); CEPH_OSD_CMPXATTR_MODE_U64);
if (err) if (err)
goto out_put; goto out_put_req;
} }
{ {
...@@ -1796,7 +1727,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) ...@@ -1796,7 +1727,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
"inline_version", "inline_version",
xattr_buf, xattr_len, 0, 0); xattr_buf, xattr_len, 0, 0);
if (err) if (err)
goto out_put; goto out_put_req;
} }
req->r_mtime = inode->i_mtime; req->r_mtime = inode->i_mtime;
...@@ -1807,19 +1738,28 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page) ...@@ -1807,19 +1738,28 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency, ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, len, err); req->r_end_latency, len, err);
out_put: if (!err) {
int dirty;
/* Set to CAP_INLINE_NONE and dirty the caps */
down_read(&fsc->mdsc->snap_rwsem);
spin_lock(&ci->i_ceph_lock);
ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, &prealloc_cf);
spin_unlock(&ci->i_ceph_lock);
up_read(&fsc->mdsc->snap_rwsem);
if (dirty)
__mark_inode_dirty(inode, dirty);
}
out_put_req:
ceph_osdc_put_request(req); ceph_osdc_put_request(req);
if (err == -ECANCELED) if (err == -ECANCELED)
err = 0; err = 0;
out_unlock:
folio_unlock(folio);
folio_put(folio);
out: out:
if (page && page != locked_page) { ceph_free_cap_flush(prealloc_cf);
if (from_pagecache) {
unlock_page(page);
put_page(page);
} else
__free_pages(page, 0);
}
dout("uninline_data %p %llx.%llx inline_version %llu = %d\n", dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
inode, ceph_vinop(inode), inline_version, err); inode, ceph_vinop(inode), inline_version, err);
return err; return err;
......
...@@ -207,6 +207,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, ...@@ -207,6 +207,7 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
struct ceph_mount_options *opt = struct ceph_mount_options *opt =
ceph_inode_to_client(&ci->vfs_inode)->mount_options; ceph_inode_to_client(&ci->vfs_inode)->mount_options;
struct ceph_file_info *fi; struct ceph_file_info *fi;
int ret;
dout("%s %p %p 0%o (%s)\n", __func__, inode, file, dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
inode->i_mode, isdir ? "dir" : "regular"); inode->i_mode, isdir ? "dir" : "regular");
...@@ -240,7 +241,22 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, ...@@ -240,7 +241,22 @@ static int ceph_init_file_info(struct inode *inode, struct file *file,
INIT_LIST_HEAD(&fi->rw_contexts); INIT_LIST_HEAD(&fi->rw_contexts);
fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen); fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen);
if ((file->f_mode & FMODE_WRITE) &&
ci->i_inline_version != CEPH_INLINE_NONE) {
ret = ceph_uninline_data(file);
if (ret < 0)
goto error;
}
return 0; return 0;
error:
ceph_fscache_unuse_cookie(inode, file->f_mode & FMODE_WRITE);
ceph_put_fmode(ci, fi->fmode, 1);
kmem_cache_free(ceph_file_cachep, fi);
/* wake up anyone waiting for caps on this inode */
wake_up_all(&ci->i_cap_wq);
return ret;
} }
/* /*
...@@ -1041,7 +1057,6 @@ static void ceph_aio_complete(struct inode *inode, ...@@ -1041,7 +1057,6 @@ static void ceph_aio_complete(struct inode *inode,
} }
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
&aio_req->prealloc_cf); &aio_req->prealloc_cf);
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
...@@ -1778,12 +1793,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -1778,12 +1793,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (err) if (err)
goto out; goto out;
if (ci->i_inline_version != CEPH_INLINE_NONE) {
err = ceph_uninline_data(file, NULL);
if (err < 0)
goto out;
}
dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n", dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
inode, ceph_vinop(inode), pos, count, i_size_read(inode)); inode, ceph_vinop(inode), pos, count, i_size_read(inode));
if (!(fi->flags & CEPH_F_SYNC) && !direct_lock) if (!(fi->flags & CEPH_F_SYNC) && !direct_lock)
...@@ -1855,7 +1864,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -1855,7 +1864,6 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
int dirty; int dirty;
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
&prealloc_cf); &prealloc_cf);
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
...@@ -2109,12 +2117,6 @@ static long ceph_fallocate(struct file *file, int mode, ...@@ -2109,12 +2117,6 @@ static long ceph_fallocate(struct file *file, int mode,
goto unlock; goto unlock;
} }
if (ci->i_inline_version != CEPH_INLINE_NONE) {
ret = ceph_uninline_data(file, NULL);
if (ret < 0)
goto unlock;
}
size = i_size_read(inode); size = i_size_read(inode);
/* Are we punching a hole beyond EOF? */ /* Are we punching a hole beyond EOF? */
...@@ -2139,7 +2141,6 @@ static long ceph_fallocate(struct file *file, int mode, ...@@ -2139,7 +2141,6 @@ static long ceph_fallocate(struct file *file, int mode,
if (!ret) { if (!ret) {
spin_lock(&ci->i_ceph_lock); spin_lock(&ci->i_ceph_lock);
ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR, dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
&prealloc_cf); &prealloc_cf);
spin_unlock(&ci->i_ceph_lock); spin_unlock(&ci->i_ceph_lock);
...@@ -2532,7 +2533,6 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, ...@@ -2532,7 +2533,6 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
} }
/* Mark Fw dirty */ /* Mark Fw dirty */
spin_lock(&dst_ci->i_ceph_lock); spin_lock(&dst_ci->i_ceph_lock);
dst_ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(dst_ci, CEPH_CAP_FILE_WR, &prealloc_cf); dirty = __ceph_mark_dirty_caps(dst_ci, CEPH_CAP_FILE_WR, &prealloc_cf);
spin_unlock(&dst_ci->i_ceph_lock); spin_unlock(&dst_ci->i_ceph_lock);
if (dirty) if (dirty)
......
...@@ -1213,7 +1213,7 @@ extern void __ceph_touch_fmode(struct ceph_inode_info *ci, ...@@ -1213,7 +1213,7 @@ extern void __ceph_touch_fmode(struct ceph_inode_info *ci,
/* addr.c */ /* addr.c */
extern const struct address_space_operations ceph_aops; extern const struct address_space_operations ceph_aops;
extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); extern int ceph_mmap(struct file *file, struct vm_area_struct *vma);
extern int ceph_uninline_data(struct file *filp, struct page *locked_page); extern int ceph_uninline_data(struct file *file);
extern int ceph_pool_perm_check(struct inode *inode, int need); extern int ceph_pool_perm_check(struct inode *inode, int need);
extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc); extern void ceph_pool_perm_destroy(struct ceph_mds_client* mdsc);
int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate); int ceph_purge_inode_cap(struct inode *inode, struct ceph_cap *cap, bool *invalidate);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment