Commit 4e7ea81d authored by Jan Kara's avatar Jan Kara Committed by Theodore Ts'o

ext4: restructure writeback path

There are two issues with current writeback path in ext4.  For one we
don't necessarily map complete pages when blocksize < pagesize and
thus needn't do any writeback in one iteration.  We always map some
blocks though so we will eventually finish mapping the page.  Just if
writeback races with other operations on the file, forward progress is
not really guaranteed. The second problem is that current code
structure makes it hard to associate all the bios to some range of
pages with one io_end structure so that unwritten extents can be
converted after all the bios are finished.  This will be especially
difficult later when io_end will be associated with reserved
transaction handle.

We restructure the writeback path to a relatively simple loop which
first prepares extent of pages, then maps one or more extents so that
no page is partially mapped, and once page is fully mapped it is
submitted for IO. We keep all the mapping and IO submission
information in mpage_da_data structure to somewhat reduce stack usage.
Resulting code is somewhat shorter than the old one and hopefully also
easier to read.
Reviewed-by: default avatarZheng Liu <wenqing.lz@taobao.com>
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent fffb2739
...@@ -176,21 +176,6 @@ struct ext4_map_blocks { ...@@ -176,21 +176,6 @@ struct ext4_map_blocks {
unsigned int m_flags; unsigned int m_flags;
}; };
/*
* For delayed allocation tracking
*/
struct mpage_da_data {
struct inode *inode;
sector_t b_blocknr; /* start block number of extent */
size_t b_size; /* size of extent */
unsigned long b_state; /* state of the extent */
unsigned long first_page, next_page; /* extent of pages */
struct writeback_control *wbc;
int io_done;
int pages_written;
int retval;
};
/* /*
* Flags for ext4_io_end->flags * Flags for ext4_io_end->flags
*/ */
......
This diff is collapsed.
...@@ -360,9 +360,6 @@ static int io_submit_init_bio(struct ext4_io_submit *io, ...@@ -360,9 +360,6 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
bio->bi_bdev = bh->b_bdev; bio->bi_bdev = bh->b_bdev;
bio->bi_end_io = ext4_end_bio; bio->bi_end_io = ext4_end_bio;
bio->bi_private = ext4_get_io_end(io->io_end); bio->bi_private = ext4_get_io_end(io->io_end);
if (!io->io_end->size)
io->io_end->offset = (bh->b_page->index << PAGE_CACHE_SHIFT)
+ bh_offset(bh);
io->io_bio = bio; io->io_bio = bio;
io->io_next_block = bh->b_blocknr; io->io_next_block = bh->b_blocknr;
return 0; return 0;
...@@ -390,7 +387,6 @@ static int io_submit_add_bh(struct ext4_io_submit *io, ...@@ -390,7 +387,6 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
io_end = io->io_end; io_end = io->io_end;
if (test_clear_buffer_uninit(bh)) if (test_clear_buffer_uninit(bh))
ext4_set_io_unwritten_flag(inode, io_end); ext4_set_io_unwritten_flag(inode, io_end);
io_end->size += bh->b_size;
io->io_next_block++; io->io_next_block++;
return 0; return 0;
} }
......
...@@ -324,43 +324,59 @@ TRACE_EVENT(ext4_da_writepages, ...@@ -324,43 +324,59 @@ TRACE_EVENT(ext4_da_writepages,
); );
TRACE_EVENT(ext4_da_write_pages, TRACE_EVENT(ext4_da_write_pages,
TP_PROTO(struct inode *inode, struct mpage_da_data *mpd), TP_PROTO(struct inode *inode, pgoff_t first_page,
struct writeback_control *wbc),
TP_ARGS(inode, mpd), TP_ARGS(inode, first_page, wbc),
TP_STRUCT__entry( TP_STRUCT__entry(
__field( dev_t, dev ) __field( dev_t, dev )
__field( ino_t, ino ) __field( ino_t, ino )
__field( __u64, b_blocknr ) __field( pgoff_t, first_page )
__field( __u32, b_size ) __field( long, nr_to_write )
__field( __u32, b_state )
__field( unsigned long, first_page )
__field( int, io_done )
__field( int, pages_written )
__field( int, sync_mode ) __field( int, sync_mode )
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = inode->i_sb->s_dev; __entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino; __entry->ino = inode->i_ino;
__entry->b_blocknr = mpd->b_blocknr; __entry->first_page = first_page;
__entry->b_size = mpd->b_size; __entry->nr_to_write = wbc->nr_to_write;
__entry->b_state = mpd->b_state; __entry->sync_mode = wbc->sync_mode;
__entry->first_page = mpd->first_page;
__entry->io_done = mpd->io_done;
__entry->pages_written = mpd->pages_written;
__entry->sync_mode = mpd->wbc->sync_mode;
), ),
TP_printk("dev %d,%d ino %lu b_blocknr %llu b_size %u b_state 0x%04x " TP_printk("dev %d,%d ino %lu first_page %lu nr_to_write %ld "
"first_page %lu io_done %d pages_written %d sync_mode %d", "sync_mode %d",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, (unsigned long) __entry->ino, __entry->first_page,
__entry->b_blocknr, __entry->b_size, __entry->nr_to_write, __entry->sync_mode)
__entry->b_state, __entry->first_page, );
__entry->io_done, __entry->pages_written,
__entry->sync_mode TRACE_EVENT(ext4_da_write_pages_extent,
) TP_PROTO(struct inode *inode, struct ext4_map_blocks *map),
TP_ARGS(inode, map),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( ino_t, ino )
__field( __u64, lblk )
__field( __u32, len )
__field( __u32, flags )
),
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
__entry->lblk = map->m_lblk;
__entry->len = map->m_len;
__entry->flags = map->m_flags;
),
TP_printk("dev %d,%d ino %lu lblk %llu len %u flags 0x%04x",
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long) __entry->ino, __entry->lblk, __entry->len,
__entry->flags)
); );
TRACE_EVENT(ext4_da_writepages_result, TRACE_EVENT(ext4_da_writepages_result,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment