Commit cc883236 authored by Zhang Yi's avatar Zhang Yi Committed by Theodore Ts'o

ext4: drop unnecessary journal handle in delalloc write

After we factor out the inline data write procedure from
ext4_da_write_end(), we don't need to start journal handle for the cases
of both buffer overwrite and append-write. If we need to update
i_disksize, mark_inode_dirty() do start handle and update inode buffer.
So we could just remove all the journal handle codes in the delalloc
write procedure.

After this patch, we could get a lot of performance improvement. Below
is the Unixbench comparison data test on my machine with 'Intel Xeon
Gold 5120' CPU and nvme SSD backend.

Test cmd:

  ./Run -c 56 -i 3 fstime fsbuffer fsdisk

Before this patch:

  System Benchmarks Partial Index           BASELINE       RESULT   INDEX
  File Copy 1024 bufsize 2000 maxblocks       3960.0     422965.0   1068.1
  File Copy 256 bufsize 500 maxblocks         1655.0     105077.0   634.9
  File Copy 4096 bufsize 8000 maxblocks       5800.0    1429092.0   2464.0
                                                                    ======
  System Benchmarks Index Score (Partial Only)                      1186.6

After this patch:

  System Benchmarks Partial Index           BASELINE       RESULT   INDEX
  File Copy 1024 bufsize 2000 maxblocks       3960.0     732716.0   1850.3
  File Copy 256 bufsize 500 maxblocks         1655.0     184940.0   1117.5
  File Copy 4096 bufsize 8000 maxblocks       5800.0    2427152.0   4184.7
                                                                    ======
  System Benchmarks Index Score (Partial Only)                      2053.0
Signed-off-by: default avatarZhang Yi <yi.zhang@huawei.com>
Reviewed-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
Link: https://lore.kernel.org/r/20210716122024.1105856-5-yi.zhang@huawei.com
parent 6984aef5
...@@ -2910,19 +2910,6 @@ static int ext4_nonda_switch(struct super_block *sb) ...@@ -2910,19 +2910,6 @@ static int ext4_nonda_switch(struct super_block *sb)
return 0; return 0;
} }
/* We always reserve for an inode update; the superblock could be there too */
static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len)
{
if (likely(ext4_has_feature_large_file(inode->i_sb)))
return 1;
if (pos + len <= 0x7fffffffULL)
return 1;
/* We might need to update the superblock to set LARGE_FILE */
return 2;
}
static int ext4_da_write_begin(struct file *file, struct address_space *mapping, static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags, loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata) struct page **pagep, void **fsdata)
...@@ -2931,7 +2918,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, ...@@ -2931,7 +2918,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
struct page *page; struct page *page;
pgoff_t index; pgoff_t index;
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
handle_t *handle;
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
return -EIO; return -EIO;
...@@ -2957,41 +2943,11 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, ...@@ -2957,41 +2943,11 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
return 0; return 0;
} }
/* retry:
* grab_cache_page_write_begin() can take a long time if the
* system is thrashing due to memory pressure, or if the page
* is being written back. So grab it first before we start
* the transaction handle. This also allows us to allocate
* the page (if needed) without using GFP_NOFS.
*/
retry_grab:
page = grab_cache_page_write_begin(mapping, index, flags); page = grab_cache_page_write_begin(mapping, index, flags);
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;
unlock_page(page);
/*
* With delayed allocation, we don't log the i_disksize update
* if there is delayed block allocation. But we still need
* to journalling the i_disksize update if writes to the end
* of file which has an already mapped buffer.
*/
retry_journal:
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
ext4_da_write_credits(inode, pos, len));
if (IS_ERR(handle)) {
put_page(page);
return PTR_ERR(handle);
}
lock_page(page);
if (page->mapping != mapping) {
/* The page got truncated from under us */
unlock_page(page);
put_page(page);
ext4_journal_stop(handle);
goto retry_grab;
}
/* In case writeback began while the page was unlocked */ /* In case writeback began while the page was unlocked */
wait_for_stable_page(page); wait_for_stable_page(page);
...@@ -3003,20 +2959,18 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, ...@@ -3003,20 +2959,18 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
#endif #endif
if (ret < 0) { if (ret < 0) {
unlock_page(page); unlock_page(page);
ext4_journal_stop(handle); put_page(page);
/* /*
* block_write_begin may have instantiated a few blocks * block_write_begin may have instantiated a few blocks
* outside i_size. Trim these off again. Don't need * outside i_size. Trim these off again. Don't need
* i_size_read because we hold i_mutex. * i_size_read because we hold inode lock.
*/ */
if (pos + len > inode->i_size) if (pos + len > inode->i_size)
ext4_truncate_failed_write(inode); ext4_truncate_failed_write(inode);
if (ret == -ENOSPC && if (ret == -ENOSPC &&
ext4_should_retry_alloc(inode->i_sb, &retries)) ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry_journal; goto retry;
put_page(page);
return ret; return ret;
} }
...@@ -3053,8 +3007,6 @@ static int ext4_da_write_end(struct file *file, ...@@ -3053,8 +3007,6 @@ static int ext4_da_write_end(struct file *file,
struct page *page, void *fsdata) struct page *page, void *fsdata)
{ {
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
int ret;
handle_t *handle = ext4_journal_current_handle();
loff_t new_i_size; loff_t new_i_size;
unsigned long start, end; unsigned long start, end;
int write_mode = (int)(unsigned long)fsdata; int write_mode = (int)(unsigned long)fsdata;
...@@ -3093,9 +3045,7 @@ static int ext4_da_write_end(struct file *file, ...@@ -3093,9 +3045,7 @@ static int ext4_da_write_end(struct file *file,
ext4_da_should_update_i_disksize(page, end)) ext4_da_should_update_i_disksize(page, end))
ext4_update_i_disksize(inode, new_i_size); ext4_update_i_disksize(inode, new_i_size);
copied = generic_write_end(file, mapping, pos, len, copied, page, fsdata); return generic_write_end(file, mapping, pos, len, copied, page, fsdata);
ret = ext4_journal_stop(handle);
return ret ? ret : copied;
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment