Commit 5a3a2d83 authored by Qiuyang Sun's avatar Qiuyang Sun Committed by Jaegeuk Kim

f2fs: dax: fix races between page faults and truncating pages

Currently in F2FS, page faults and operations that truncate the pagecahe
or data blocks, are completely unsynchronized. This can result in page
fault faulting in a page into a range that we are changing after
truncating, and thus we can end up with a page mapped to disk blocks that
will be shortly freed. Filesystem corruption will shortly follow.

This patch fixes the problem by creating new rw semaphore i_mmap_sem in
f2fs_inode_info and grab it for functions removing blocks from extent tree
and for read over page faults. The mechanism is similar to that in ext4.
Signed-off-by: default avatarQiuyang Sun <sunqiuyang@huawei.com>
Reviewed-by: default avatarChao Yu <yuchao0@huawei.com>
Signed-off-by: default avatarJaegeuk Kim <jaegeuk@kernel.org>
parent 72fdbe2e
...@@ -1801,8 +1801,10 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to) ...@@ -1801,8 +1801,10 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
loff_t i_size = i_size_read(inode); loff_t i_size = i_size_read(inode);
if (to > i_size) { if (to > i_size) {
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_pagecache(inode, i_size); truncate_pagecache(inode, i_size);
truncate_blocks(inode, i_size, true); truncate_blocks(inode, i_size, true);
up_write(&F2FS_I(inode)->i_mmap_sem);
} }
} }
......
...@@ -519,6 +519,7 @@ struct f2fs_inode_info { ...@@ -519,6 +519,7 @@ struct f2fs_inode_info {
struct mutex inmem_lock; /* lock for inmemory pages */ struct mutex inmem_lock; /* lock for inmemory pages */
struct extent_tree *extent_tree; /* cached extent_tree entry */ struct extent_tree *extent_tree; /* cached extent_tree entry */
struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */ struct rw_semaphore dio_rwsem[2];/* avoid racing between dio and gc */
struct rw_semaphore i_mmap_sem;
}; };
static inline void get_extent_info(struct extent_info *ext, static inline void get_extent_info(struct extent_info *ext,
......
...@@ -33,6 +33,18 @@ ...@@ -33,6 +33,18 @@
#include "trace.h" #include "trace.h"
#include <trace/events/f2fs.h> #include <trace/events/f2fs.h>
static int f2fs_filemap_fault(struct vm_fault *vmf)
{
struct inode *inode = file_inode(vmf->vma->vm_file);
int err;
down_read(&F2FS_I(inode)->i_mmap_sem);
err = filemap_fault(vmf);
up_read(&F2FS_I(inode)->i_mmap_sem);
return err;
}
static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
{ {
struct page *page = vmf->page; struct page *page = vmf->page;
...@@ -59,13 +71,14 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) ...@@ -59,13 +71,14 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
f2fs_balance_fs(sbi, dn.node_changed); f2fs_balance_fs(sbi, dn.node_changed);
file_update_time(vmf->vma->vm_file); file_update_time(vmf->vma->vm_file);
down_read(&F2FS_I(inode)->i_mmap_sem);
lock_page(page); lock_page(page);
if (unlikely(page->mapping != inode->i_mapping || if (unlikely(page->mapping != inode->i_mapping ||
page_offset(page) > i_size_read(inode) || page_offset(page) > i_size_read(inode) ||
!PageUptodate(page))) { !PageUptodate(page))) {
unlock_page(page); unlock_page(page);
err = -EFAULT; err = -EFAULT;
goto out; goto out_sem;
} }
/* /*
...@@ -94,6 +107,8 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) ...@@ -94,6 +107,8 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr); f2fs_wait_on_encrypted_page_writeback(sbi, dn.data_blkaddr);
out_sem:
up_read(&F2FS_I(inode)->i_mmap_sem);
out: out:
sb_end_pagefault(inode->i_sb); sb_end_pagefault(inode->i_sb);
f2fs_update_time(sbi, REQ_TIME); f2fs_update_time(sbi, REQ_TIME);
...@@ -101,7 +116,7 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf) ...@@ -101,7 +116,7 @@ static int f2fs_vm_page_mkwrite(struct vm_fault *vmf)
} }
static const struct vm_operations_struct f2fs_file_vm_ops = { static const struct vm_operations_struct f2fs_file_vm_ops = {
.fault = filemap_fault, .fault = f2fs_filemap_fault,
.map_pages = filemap_map_pages, .map_pages = filemap_map_pages,
.page_mkwrite = f2fs_vm_page_mkwrite, .page_mkwrite = f2fs_vm_page_mkwrite,
}; };
...@@ -700,8 +715,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) ...@@ -700,8 +715,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
return -EACCES; return -EACCES;
if (attr->ia_size <= i_size_read(inode)) { if (attr->ia_size <= i_size_read(inode)) {
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_setsize(inode, attr->ia_size); truncate_setsize(inode, attr->ia_size);
err = f2fs_truncate(inode); err = f2fs_truncate(inode);
up_write(&F2FS_I(inode)->i_mmap_sem);
if (err) if (err)
return err; return err;
} else { } else {
...@@ -709,7 +726,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) ...@@ -709,7 +726,9 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
* do not trim all blocks after i_size if target size is * do not trim all blocks after i_size if target size is
* larger than i_size. * larger than i_size.
*/ */
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_setsize(inode, attr->ia_size); truncate_setsize(inode, attr->ia_size);
up_write(&F2FS_I(inode)->i_mmap_sem);
/* should convert inline inode here */ /* should convert inline inode here */
if (!f2fs_may_inline_data(inode)) { if (!f2fs_may_inline_data(inode)) {
...@@ -852,12 +871,14 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) ...@@ -852,12 +871,14 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
blk_start = (loff_t)pg_start << PAGE_SHIFT; blk_start = (loff_t)pg_start << PAGE_SHIFT;
blk_end = (loff_t)pg_end << PAGE_SHIFT; blk_end = (loff_t)pg_end << PAGE_SHIFT;
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_inode_pages_range(mapping, blk_start, truncate_inode_pages_range(mapping, blk_start,
blk_end - 1); blk_end - 1);
f2fs_lock_op(sbi); f2fs_lock_op(sbi);
ret = truncate_hole(inode, pg_start, pg_end); ret = truncate_hole(inode, pg_start, pg_end);
f2fs_unlock_op(sbi); f2fs_unlock_op(sbi);
up_write(&F2FS_I(inode)->i_mmap_sem);
} }
} }
...@@ -1096,16 +1117,17 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -1096,16 +1117,17 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
pg_start = offset >> PAGE_SHIFT; pg_start = offset >> PAGE_SHIFT;
pg_end = (offset + len) >> PAGE_SHIFT; pg_end = (offset + len) >> PAGE_SHIFT;
down_write(&F2FS_I(inode)->i_mmap_sem);
/* write out all dirty pages from offset */ /* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret) if (ret)
return ret; goto out;
truncate_pagecache(inode, offset); truncate_pagecache(inode, offset);
ret = f2fs_do_collapse(inode, pg_start, pg_end); ret = f2fs_do_collapse(inode, pg_start, pg_end);
if (ret) if (ret)
return ret; goto out;
/* write out all moved pages, if possible */ /* write out all moved pages, if possible */
filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
...@@ -1118,6 +1140,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -1118,6 +1140,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
if (!ret) if (!ret)
f2fs_i_size_write(inode, new_size); f2fs_i_size_write(inode, new_size);
out:
up_write(&F2FS_I(inode)->i_mmap_sem);
return ret; return ret;
} }
...@@ -1182,9 +1206,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ...@@ -1182,9 +1206,10 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (ret) if (ret)
return ret; return ret;
down_write(&F2FS_I(inode)->i_mmap_sem);
ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1); ret = filemap_write_and_wait_range(mapping, offset, offset + len - 1);
if (ret) if (ret)
return ret; goto out_sem;
truncate_pagecache_range(inode, offset, offset + len - 1); truncate_pagecache_range(inode, offset, offset + len - 1);
...@@ -1198,7 +1223,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ...@@ -1198,7 +1223,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = fill_zero(inode, pg_start, off_start, ret = fill_zero(inode, pg_start, off_start,
off_end - off_start); off_end - off_start);
if (ret) if (ret)
return ret; goto out_sem;
new_size = max_t(loff_t, new_size, offset + len); new_size = max_t(loff_t, new_size, offset + len);
} else { } else {
...@@ -1206,7 +1231,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ...@@ -1206,7 +1231,7 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
ret = fill_zero(inode, pg_start++, off_start, ret = fill_zero(inode, pg_start++, off_start,
PAGE_SIZE - off_start); PAGE_SIZE - off_start);
if (ret) if (ret)
return ret; goto out_sem;
new_size = max_t(loff_t, new_size, new_size = max_t(loff_t, new_size,
(loff_t)pg_start << PAGE_SHIFT); (loff_t)pg_start << PAGE_SHIFT);
...@@ -1255,6 +1280,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ...@@ -1255,6 +1280,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
out: out:
if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size) if (!(mode & FALLOC_FL_KEEP_SIZE) && i_size_read(inode) < new_size)
f2fs_i_size_write(inode, new_size); f2fs_i_size_write(inode, new_size);
out_sem:
up_write(&F2FS_I(inode)->i_mmap_sem);
return ret; return ret;
} }
...@@ -1284,14 +1311,15 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -1284,14 +1311,15 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_balance_fs(sbi, true); f2fs_balance_fs(sbi, true);
down_write(&F2FS_I(inode)->i_mmap_sem);
ret = truncate_blocks(inode, i_size_read(inode), true); ret = truncate_blocks(inode, i_size_read(inode), true);
if (ret) if (ret)
return ret; goto out;
/* write out all dirty pages from offset */ /* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
if (ret) if (ret)
return ret; goto out;
truncate_pagecache(inode, offset); truncate_pagecache(inode, offset);
...@@ -1320,6 +1348,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) ...@@ -1320,6 +1348,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
if (!ret) if (!ret)
f2fs_i_size_write(inode, new_size); f2fs_i_size_write(inode, new_size);
out:
up_write(&F2FS_I(inode)->i_mmap_sem);
return ret; return ret;
} }
......
...@@ -624,6 +624,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) ...@@ -624,6 +624,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
mutex_init(&fi->inmem_lock); mutex_init(&fi->inmem_lock);
init_rwsem(&fi->dio_rwsem[READ]); init_rwsem(&fi->dio_rwsem[READ]);
init_rwsem(&fi->dio_rwsem[WRITE]); init_rwsem(&fi->dio_rwsem[WRITE]);
init_rwsem(&fi->i_mmap_sem);
/* Will be used by directory only */ /* Will be used by directory only */
fi->i_dir_level = F2FS_SB(sb)->dir_level; fi->i_dir_level = F2FS_SB(sb)->dir_level;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment