Commit 2d90c160 authored by Jan Kara's avatar Jan Kara Committed by Theodore Ts'o

ext4: more efficient SEEK_DATA implementation

Using SEEK_DATA in a huge sparse file can easily lead to sotflockups as
ext4_seek_data() iterates hole block-by-block. Fix the problem by using
returned hole size from ext4_map_blocks() and thus skip the hole in one
go.

Update also SEEK_HOLE implementation to follow the same pattern as
SEEK_DATA to make future maintenance easier.

Furthermore we add cond_resched() to both ext4_seek_data() and
ext4_seek_hole() to avoid softlockups in case evil user creates huge
fragmented file and we have to go through lots of extents.
Signed-off-by: default avatarJan Kara <jack@suse.cz>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent e3fb8eb1
...@@ -2546,6 +2546,9 @@ extern void ext4_da_update_reserve_space(struct inode *inode, ...@@ -2546,6 +2546,9 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim); int used, int quota_claim);
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk, extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
ext4_fsblk_t pblk, ext4_lblk_t len); ext4_fsblk_t pblk, ext4_lblk_t len);
extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
unsigned int map_len,
struct extent_status *result);
/* indirect.c */ /* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
......
...@@ -426,7 +426,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp) ...@@ -426,7 +426,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
*/ */
static int ext4_find_unwritten_pgoff(struct inode *inode, static int ext4_find_unwritten_pgoff(struct inode *inode,
int whence, int whence,
struct ext4_map_blocks *map, ext4_lblk_t end_blk,
loff_t *offset) loff_t *offset)
{ {
struct pagevec pvec; struct pagevec pvec;
...@@ -441,7 +441,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, ...@@ -441,7 +441,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
blkbits = inode->i_sb->s_blocksize_bits; blkbits = inode->i_sb->s_blocksize_bits;
startoff = *offset; startoff = *offset;
lastoff = startoff; lastoff = startoff;
endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; endoff = (loff_t)end_blk << blkbits;
index = startoff >> PAGE_CACHE_SHIFT; index = startoff >> PAGE_CACHE_SHIFT;
end = endoff >> PAGE_CACHE_SHIFT; end = endoff >> PAGE_CACHE_SHIFT;
...@@ -559,12 +559,11 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, ...@@ -559,12 +559,11 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
{ {
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
struct ext4_map_blocks map;
struct extent_status es; struct extent_status es;
ext4_lblk_t start, last, end; ext4_lblk_t start, last, end;
loff_t dataoff, isize; loff_t dataoff, isize;
int blkbits; int blkbits;
int ret = 0; int ret;
inode_lock(inode); inode_lock(inode);
...@@ -581,41 +580,32 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) ...@@ -581,41 +580,32 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
dataoff = offset; dataoff = offset;
do { do {
map.m_lblk = last; ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
map.m_len = end - last + 1; if (ret <= 0) {
ret = ext4_map_blocks(NULL, inode, &map, 0); /* No extent found -> no data */
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { if (ret == 0)
if (last != start) ret = -ENXIO;
dataoff = (loff_t)last << blkbits; inode_unlock(inode);
break; return ret;
} }
/* last = es.es_lblk;
* If there is a delay extent at this offset, if (last != start)
* it will be as a data. dataoff = (loff_t)last << blkbits;
*/ if (!ext4_es_is_unwritten(&es))
ext4_es_find_delayed_extent_range(inode, last, last, &es);
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
if (last != start)
dataoff = (loff_t)last << blkbits;
break; break;
}
/* /*
* If there is a unwritten extent at this offset, * If there is a unwritten extent at this offset,
* it will be as a data or a hole according to page * it will be as a data or a hole according to page
* cache that has data or not. * cache that has data or not.
*/ */
if (map.m_flags & EXT4_MAP_UNWRITTEN) { if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
int unwritten; es.es_lblk + es.es_len, &dataoff))
unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA, break;
&map, &dataoff); last += es.es_len;
if (unwritten)
break;
}
last++;
dataoff = (loff_t)last << blkbits; dataoff = (loff_t)last << blkbits;
cond_resched();
} while (last <= end); } while (last <= end);
inode_unlock(inode); inode_unlock(inode);
...@@ -632,12 +622,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) ...@@ -632,12 +622,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
{ {
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
struct ext4_map_blocks map;
struct extent_status es; struct extent_status es;
ext4_lblk_t start, last, end; ext4_lblk_t start, last, end;
loff_t holeoff, isize; loff_t holeoff, isize;
int blkbits; int blkbits;
int ret = 0; int ret;
inode_lock(inode); inode_lock(inode);
...@@ -654,44 +643,30 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) ...@@ -654,44 +643,30 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
holeoff = offset; holeoff = offset;
do { do {
map.m_lblk = last; ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
map.m_len = end - last + 1; if (ret < 0) {
ret = ext4_map_blocks(NULL, inode, &map, 0); inode_unlock(inode);
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { return ret;
last += ret;
holeoff = (loff_t)last << blkbits;
continue;
} }
/* Found a hole? */
/* if (ret == 0 || es.es_lblk > last) {
* If there is a delay extent at this offset, if (last != start)
* we will skip this extent. holeoff = (loff_t)last << blkbits;
*/ break;
ext4_es_find_delayed_extent_range(inode, last, last, &es);
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
last = es.es_lblk + es.es_len;
holeoff = (loff_t)last << blkbits;
continue;
} }
/* /*
* If there is a unwritten extent at this offset, * If there is a unwritten extent at this offset,
* it will be as a data or a hole according to page * it will be as a data or a hole according to page
* cache that has data or not. * cache that has data or not.
*/ */
if (map.m_flags & EXT4_MAP_UNWRITTEN) { if (ext4_es_is_unwritten(&es) &&
int unwritten; ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE, last + es.es_len, &holeoff))
&map, &holeoff); break;
if (!unwritten) {
last += ret;
holeoff = (loff_t)last << blkbits;
continue;
}
}
/* find a hole */ last += es.es_len;
break; holeoff = (loff_t)last << blkbits;
cond_resched();
} while (last <= end); } while (last <= end);
inode_unlock(inode); inode_unlock(inode);
......
...@@ -5596,3 +5596,70 @@ int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) ...@@ -5596,3 +5596,70 @@ int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return err; return err;
} }
/*
* Find the first extent at or after @lblk in an inode that is not a hole.
* Search for @map_len blocks at most. The extent is returned in @result.
*
* The function returns 1 if we found an extent. The function returns 0 in
* case there is no extent at or after @lblk and in that case also sets
* @result->es_len to 0. In case of error, the error code is returned.
*/
int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
unsigned int map_len, struct extent_status *result)
{
struct ext4_map_blocks map;
struct extent_status es = {};
int ret;
map.m_lblk = lblk;
map.m_len = map_len;
/*
* For non-extent based files this loop may iterate several times since
* we do not determine full hole size.
*/
while (map.m_len > 0) {
ret = ext4_map_blocks(NULL, inode, &map, 0);
if (ret < 0)
return ret;
/* There's extent covering m_lblk? Just return it. */
if (ret > 0) {
int status;
ext4_es_store_pblock(result, map.m_pblk);
result->es_lblk = map.m_lblk;
result->es_len = map.m_len;
if (map.m_flags & EXT4_MAP_UNWRITTEN)
status = EXTENT_STATUS_UNWRITTEN;
else
status = EXTENT_STATUS_WRITTEN;
ext4_es_store_status(result, status);
return 1;
}
ext4_es_find_delayed_extent_range(inode, map.m_lblk,
map.m_lblk + map.m_len - 1,
&es);
/* Is delalloc data before next block in extent tree? */
if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
ext4_lblk_t offset = 0;
if (es.es_lblk < lblk)
offset = lblk - es.es_lblk;
result->es_lblk = es.es_lblk + offset;
ext4_es_store_pblock(result,
ext4_es_pblock(&es) + offset);
result->es_len = es.es_len - offset;
ext4_es_store_status(result, ext4_es_status(&es));
return 1;
}
/* There's a hole at m_lblk, advance us after it */
map.m_lblk += map.m_len;
map_len -= map.m_len;
map.m_len = map_len;
cond_resched();
}
result->es_len = 0;
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment