Commit c8cc8816 authored by Ritesh Harjani's avatar Ritesh Harjani Committed by Theodore Ts'o

ext4: Add support for blocksize < pagesize in dioread_nolock

This patch adds the support for blocksize < pagesize for
dioread_nolock feature.

Since in case of blocksize < pagesize, we can have multiple
small buffers of page as unwritten extents, we need to
maintain a vector of these unwritten extents which needs
the conversion after the IO is complete. Thus, we maintain
a list of tuple <offset, size> pair (io_end_vec) for this &
traverse this list to do the unwritten to written conversion.
Signed-off-by: default avatarRitesh Harjani <riteshh@linux.ibm.com>
Link: https://lore.kernel.org/r/20191016073711.4141-5-riteshh@linux.ibm.comSigned-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent 2943fdbc
...@@ -198,6 +198,12 @@ struct ext4_system_blocks { ...@@ -198,6 +198,12 @@ struct ext4_system_blocks {
*/ */
#define EXT4_IO_END_UNWRITTEN 0x0001 #define EXT4_IO_END_UNWRITTEN 0x0001
struct ext4_io_end_vec {
struct list_head list; /* list of io_end_vec */
loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */
};
/* /*
* For converting unwritten extents on a work queue. 'handle' is used for * For converting unwritten extents on a work queue. 'handle' is used for
* buffered writeback. * buffered writeback.
...@@ -211,8 +217,7 @@ typedef struct ext4_io_end { ...@@ -211,8 +217,7 @@ typedef struct ext4_io_end {
* bios covering the extent */ * bios covering the extent */
unsigned int flag; /* unwritten or not */ unsigned int flag; /* unwritten or not */
atomic_t count; /* reference counter */ atomic_t count; /* reference counter */
loff_t offset; /* offset in the file */ struct list_head list_vec; /* list of ext4_io_end_vec */
ssize_t size; /* size of the extent */
} ext4_io_end_t; } ext4_io_end_t;
struct ext4_io_submit { struct ext4_io_submit {
...@@ -3324,6 +3329,8 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io, ...@@ -3324,6 +3329,8 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io,
int len, int len,
struct writeback_control *wbc, struct writeback_control *wbc,
bool keep_towrite); bool keep_towrite);
extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end);
extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end);
/* mmp.c */ /* mmp.c */
extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
......
...@@ -5005,6 +5005,7 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, ...@@ -5005,6 +5005,7 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end) int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end)
{ {
int ret, err = 0; int ret, err = 0;
struct ext4_io_end_vec *io_end_vec;
/* /*
* This is somewhat ugly but the idea is clear: When transaction is * This is somewhat ugly but the idea is clear: When transaction is
...@@ -5018,8 +5019,14 @@ int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end) ...@@ -5018,8 +5019,14 @@ int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end)
return PTR_ERR(handle); return PTR_ERR(handle);
} }
ret = ext4_convert_unwritten_extents(handle, io_end->inode, list_for_each_entry(io_end_vec, &io_end->list_vec, list) {
io_end->offset, io_end->size); ret = ext4_convert_unwritten_extents(handle, io_end->inode,
io_end_vec->offset,
io_end_vec->size);
if (ret)
break;
}
if (handle) if (handle)
err = ext4_journal_stop(handle); err = ext4_journal_stop(handle);
......
...@@ -2364,6 +2364,9 @@ static int mpage_process_page(struct mpage_da_data *mpd, struct page *page, ...@@ -2364,6 +2364,9 @@ static int mpage_process_page(struct mpage_da_data *mpd, struct page *page,
ext4_lblk_t lblk = *m_lblk; ext4_lblk_t lblk = *m_lblk;
ext4_fsblk_t pblock = *m_pblk; ext4_fsblk_t pblock = *m_pblk;
int err = 0; int err = 0;
int blkbits = mpd->inode->i_blkbits;
ssize_t io_end_size = 0;
struct ext4_io_end_vec *io_end_vec = ext4_last_io_end_vec(io_end);
bh = head = page_buffers(page); bh = head = page_buffers(page);
do { do {
...@@ -2376,17 +2379,16 @@ static int mpage_process_page(struct mpage_da_data *mpd, struct page *page, ...@@ -2376,17 +2379,16 @@ static int mpage_process_page(struct mpage_da_data *mpd, struct page *page,
*/ */
mpd->map.m_len = 0; mpd->map.m_len = 0;
mpd->map.m_flags = 0; mpd->map.m_flags = 0;
io_end_vec->size += io_end_size;
io_end_size = 0;
/*
* FIXME: If dioread_nolock supports
* blocksize < pagesize, we need to make
* sure we add size mapped so far to
* io_end->size as the following call
* can submit the page for IO.
*/
err = mpage_process_page_bufs(mpd, head, bh, lblk); err = mpage_process_page_bufs(mpd, head, bh, lblk);
if (err > 0) if (err > 0)
err = 0; err = 0;
if (!err && mpd->map.m_len && mpd->map.m_lblk > lblk) {
io_end_vec = ext4_alloc_io_end_vec(io_end);
io_end_vec->offset = mpd->map.m_lblk << blkbits;
}
*map_bh = true; *map_bh = true;
goto out; goto out;
} }
...@@ -2395,13 +2397,11 @@ static int mpage_process_page(struct mpage_da_data *mpd, struct page *page, ...@@ -2395,13 +2397,11 @@ static int mpage_process_page(struct mpage_da_data *mpd, struct page *page,
bh->b_blocknr = pblock++; bh->b_blocknr = pblock++;
} }
clear_buffer_unwritten(bh); clear_buffer_unwritten(bh);
io_end_size += (1 << blkbits);
} while (lblk++, (bh = bh->b_this_page) != head); } while (lblk++, (bh = bh->b_this_page) != head);
/*
* FIXME: This is going to break if dioread_nolock io_end_vec->size += io_end_size;
* supports blocksize < pagesize as we will try to io_end_size = 0;
* convert potentially unmapped parts of inode.
*/
io_end->size += PAGE_SIZE;
*map_bh = false; *map_bh = false;
out: out:
*m_lblk = lblk; *m_lblk = lblk;
...@@ -2551,9 +2551,10 @@ static int mpage_map_and_submit_extent(handle_t *handle, ...@@ -2551,9 +2551,10 @@ static int mpage_map_and_submit_extent(handle_t *handle,
int err; int err;
loff_t disksize; loff_t disksize;
int progress = 0; int progress = 0;
ext4_io_end_t *io_end = mpd->io_submit.io_end;
struct ext4_io_end_vec *io_end_vec = ext4_alloc_io_end_vec(io_end);
mpd->io_submit.io_end->offset = io_end_vec->offset = ((loff_t)map->m_lblk) << inode->i_blkbits;
((loff_t)map->m_lblk) << inode->i_blkbits;
do { do {
err = mpage_map_one_extent(handle, mpd); err = mpage_map_one_extent(handle, mpd);
if (err < 0) { if (err < 0) {
...@@ -3654,6 +3655,7 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, ...@@ -3654,6 +3655,7 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private) ssize_t size, void *private)
{ {
ext4_io_end_t *io_end = private; ext4_io_end_t *io_end = private;
struct ext4_io_end_vec *io_end_vec;
/* if not async direct IO just return */ /* if not async direct IO just return */
if (!io_end) if (!io_end)
...@@ -3671,8 +3673,9 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, ...@@ -3671,8 +3673,9 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ext4_clear_io_unwritten_flag(io_end); ext4_clear_io_unwritten_flag(io_end);
size = 0; size = 0;
} }
io_end->offset = offset; io_end_vec = ext4_alloc_io_end_vec(io_end);
io_end->size = size; io_end_vec->offset = offset;
io_end_vec->size = size;
ext4_put_io_end(io_end); ext4_put_io_end(io_end);
return 0; return 0;
......
...@@ -31,18 +31,56 @@ ...@@ -31,18 +31,56 @@
#include "acl.h" #include "acl.h"
static struct kmem_cache *io_end_cachep; static struct kmem_cache *io_end_cachep;
static struct kmem_cache *io_end_vec_cachep;
int __init ext4_init_pageio(void) int __init ext4_init_pageio(void)
{ {
io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT); io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
if (io_end_cachep == NULL) if (io_end_cachep == NULL)
return -ENOMEM; return -ENOMEM;
io_end_vec_cachep = KMEM_CACHE(ext4_io_end_vec, 0);
if (io_end_vec_cachep == NULL) {
kmem_cache_destroy(io_end_cachep);
return -ENOMEM;
}
return 0; return 0;
} }
void ext4_exit_pageio(void) void ext4_exit_pageio(void)
{ {
kmem_cache_destroy(io_end_cachep); kmem_cache_destroy(io_end_cachep);
kmem_cache_destroy(io_end_vec_cachep);
}
struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end)
{
struct ext4_io_end_vec *io_end_vec;
io_end_vec = kmem_cache_zalloc(io_end_vec_cachep, GFP_NOFS);
if (!io_end_vec)
return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&io_end_vec->list);
list_add_tail(&io_end_vec->list, &io_end->list_vec);
return io_end_vec;
}
static void ext4_free_io_end_vec(ext4_io_end_t *io_end)
{
struct ext4_io_end_vec *io_end_vec, *tmp;
if (list_empty(&io_end->list_vec))
return;
list_for_each_entry_safe(io_end_vec, tmp, &io_end->list_vec, list) {
list_del(&io_end_vec->list);
kmem_cache_free(io_end_vec_cachep, io_end_vec);
}
}
struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end)
{
BUG_ON(list_empty(&io_end->list_vec));
return list_last_entry(&io_end->list_vec, struct ext4_io_end_vec, list);
} }
/* /*
...@@ -125,6 +163,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end) ...@@ -125,6 +163,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
ext4_finish_bio(bio); ext4_finish_bio(bio);
bio_put(bio); bio_put(bio);
} }
ext4_free_io_end_vec(io_end);
kmem_cache_free(io_end_cachep, io_end); kmem_cache_free(io_end_cachep, io_end);
} }
...@@ -139,8 +178,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end) ...@@ -139,8 +178,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
static int ext4_end_io_end(ext4_io_end_t *io_end) static int ext4_end_io_end(ext4_io_end_t *io_end)
{ {
struct inode *inode = io_end->inode; struct inode *inode = io_end->inode;
loff_t offset = io_end->offset;
ssize_t size = io_end->size;
handle_t *handle = io_end->handle; handle_t *handle = io_end->handle;
int ret = 0; int ret = 0;
...@@ -154,8 +191,7 @@ static int ext4_end_io_end(ext4_io_end_t *io_end) ...@@ -154,8 +191,7 @@ static int ext4_end_io_end(ext4_io_end_t *io_end)
ext4_msg(inode->i_sb, KERN_EMERG, ext4_msg(inode->i_sb, KERN_EMERG,
"failed to convert unwritten extents to written " "failed to convert unwritten extents to written "
"extents -- potential data loss! " "extents -- potential data loss! "
"(inode %lu, offset %llu, size %zd, error %d)", "(inode %lu, error %d)", inode->i_ino, ret);
inode->i_ino, offset, size, ret);
} }
ext4_clear_io_unwritten_flag(io_end); ext4_clear_io_unwritten_flag(io_end);
ext4_release_io_end(io_end); ext4_release_io_end(io_end);
...@@ -247,6 +283,7 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) ...@@ -247,6 +283,7 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
if (io_end) { if (io_end) {
io_end->inode = inode; io_end->inode = inode;
INIT_LIST_HEAD(&io_end->list); INIT_LIST_HEAD(&io_end->list);
INIT_LIST_HEAD(&io_end->list_vec);
atomic_set(&io_end->count, 1); atomic_set(&io_end->count, 1);
} }
return io_end; return io_end;
...@@ -255,7 +292,8 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) ...@@ -255,7 +292,8 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
void ext4_put_io_end_defer(ext4_io_end_t *io_end) void ext4_put_io_end_defer(ext4_io_end_t *io_end)
{ {
if (atomic_dec_and_test(&io_end->count)) { if (atomic_dec_and_test(&io_end->count)) {
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) { if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) ||
list_empty(&io_end->list_vec)) {
ext4_release_io_end(io_end); ext4_release_io_end(io_end);
return; return;
} }
...@@ -307,10 +345,8 @@ static void ext4_end_bio(struct bio *bio) ...@@ -307,10 +345,8 @@ static void ext4_end_bio(struct bio *bio)
struct inode *inode = io_end->inode; struct inode *inode = io_end->inode;
ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu " ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
"(offset %llu size %ld starting block %llu)", "starting block %llu)",
bio->bi_status, inode->i_ino, bio->bi_status, inode->i_ino,
(unsigned long long) io_end->offset,
(long) io_end->size,
(unsigned long long) (unsigned long long)
bi_sector >> (inode->i_blkbits - 9)); bi_sector >> (inode->i_blkbits - 9));
mapping_set_error(inode->i_mapping, mapping_set_error(inode->i_mapping,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment