Commit 02d262df authored by Dmitry Monakhov's avatar Dmitry Monakhov Committed by Theodore Ts'o

ext4: punch_hole should wait for DIO writers

punch_hole is the place where we have to wait for all existing writers
(writeback, aio, dio), but currently we simply flush pended end_io request
which is not sufficient. Other issue is that punch_hole performed w/o i_mutex
held which obviously result in dangerous data corruption due to
write-after-free.

This patch performs following changes:
- Guard punch_hole with i_mutex
- Recheck inode flags under i_mutex
- Block all new dio readers in order to prevent information leak caused by
  read-after-free pattern.
- punch_hole now wait for all writers in flight
  NOTE: XXX write-after-free race is still possible because new dirty pages
  may appear due to mmap(), and currently there is no easy way to stop
  writeback while punch_hole is in progress.

[ Fixed error return from ext4_ext_punch_hole() to make sure that we
  release i_mutex before returning EPERM or ETXTBUSY -- Ted ]
Signed-off-by: default avatarDmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent 1f555cfa
...@@ -4794,9 +4794,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) ...@@ -4794,9 +4794,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
loff_t first_page_offset, last_page_offset; loff_t first_page_offset, last_page_offset;
int credits, err = 0; int credits, err = 0;
/*
* Write out all dirty pages to avoid race conditions
* Then release them.
*/
if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
err = filemap_write_and_wait_range(mapping,
offset, offset + length - 1);
if (err)
return err;
}
mutex_lock(&inode->i_mutex);
/* It's not possible punch hole on append only file */
if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
err = -EPERM;
goto out_mutex;
}
if (IS_SWAPFILE(inode)) {
err = -ETXTBSY;
goto out_mutex;
}
/* No need to punch hole beyond i_size */ /* No need to punch hole beyond i_size */
if (offset >= inode->i_size) if (offset >= inode->i_size)
return 0; goto out_mutex;
/* /*
* If the hole extends beyond i_size, set the hole * If the hole extends beyond i_size, set the hole
...@@ -4814,33 +4837,25 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) ...@@ -4814,33 +4837,25 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
first_page_offset = first_page << PAGE_CACHE_SHIFT; first_page_offset = first_page << PAGE_CACHE_SHIFT;
last_page_offset = last_page << PAGE_CACHE_SHIFT; last_page_offset = last_page << PAGE_CACHE_SHIFT;
/*
* Write out all dirty pages to avoid race conditions
* Then release them.
*/
if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
err = filemap_write_and_wait_range(mapping,
offset, offset + length - 1);
if (err)
return err;
}
/* Now release the pages */ /* Now release the pages */
if (last_page_offset > first_page_offset) { if (last_page_offset > first_page_offset) {
truncate_pagecache_range(inode, first_page_offset, truncate_pagecache_range(inode, first_page_offset,
last_page_offset - 1); last_page_offset - 1);
} }
/* finish any pending end_io work */ /* Wait all existing dio workers, newcomers will block on i_mutex */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
err = ext4_flush_completed_IO(inode); err = ext4_flush_completed_IO(inode);
if (err) if (err)
return err; goto out_dio;
credits = ext4_writepage_trans_blocks(inode); credits = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, credits); handle = ext4_journal_start(inode, credits);
if (IS_ERR(handle)) if (IS_ERR(handle)) {
return PTR_ERR(handle); err = PTR_ERR(handle);
goto out_dio;
}
/* /*
...@@ -4930,6 +4945,10 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length) ...@@ -4930,6 +4945,10 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
inode->i_mtime = inode->i_ctime = ext4_current_time(inode); inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode); ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle); ext4_journal_stop(handle);
out_dio:
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
mutex_unlock(&inode->i_mutex);
return err; return err;
} }
int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment