Commit 7d1b1fbc authored by Zheng Liu's avatar Zheng Liu Committed by Theodore Ts'o

ext4: reimplement ext4_find_delay_alloc_range on extent status tree

Signed-off-by: default avatarYongqiang Yang <xiaoqiangnk@gmail.com>
Signed-off-by: default avatarAllison Henderson <achender@linux.vnet.ibm.com>
Signed-off-by: default avatarZheng Liu <wenqing.lz@taobao.com>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent 992e9fdd
...@@ -2451,14 +2451,10 @@ enum ext4_state_bits { ...@@ -2451,14 +2451,10 @@ enum ext4_state_bits {
* never, ever appear in a buffer_head's state * never, ever appear in a buffer_head's state
* flag. See EXT4_MAP_FROM_CLUSTER to see where * flag. See EXT4_MAP_FROM_CLUSTER to see where
* this is used. */ * this is used. */
BH_Da_Mapped, /* Delayed allocated block that now has a mapping. This
* flag is set when ext4_map_blocks is called on a
* delayed allocated block to get its real mapping. */
}; };
BUFFER_FNS(Uninit, uninit) BUFFER_FNS(Uninit, uninit)
TAS_BUFFER_FNS(Uninit, uninit) TAS_BUFFER_FNS(Uninit, uninit)
BUFFER_FNS(Da_Mapped, da_mapped)
/* /*
* Add new method to test wether block and inode bitmaps are properly * Add new method to test wether block and inode bitmaps are properly
......
...@@ -314,7 +314,6 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, ...@@ -314,7 +314,6 @@ extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
struct ext4_ext_path *); struct ext4_ext_path *);
extern void ext4_ext_drop_refs(struct ext4_ext_path *); extern void ext4_ext_drop_refs(struct ext4_ext_path *);
extern int ext4_ext_check_inode(struct inode *inode); extern int ext4_ext_check_inode(struct inode *inode);
extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk, extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
int search_hint_reverse);
#endif /* _EXT4_EXTENTS */ #endif /* _EXT4_EXTENTS */
...@@ -3461,115 +3461,34 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode, ...@@ -3461,115 +3461,34 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
/** /**
* ext4_find_delalloc_range: find delayed allocated block in the given range. * ext4_find_delalloc_range: find delayed allocated block in the given range.
* *
* Goes through the buffer heads in the range [lblk_start, lblk_end] and returns * Return 1 if there is a delalloc block in the range, otherwise 0.
* whether there are any buffers marked for delayed allocation. It returns '1'
* on the first delalloc'ed buffer head found. If no buffer head in the given
* range is marked for delalloc, it returns 0.
* lblk_start should always be <= lblk_end.
* search_hint_reverse is to indicate that searching in reverse from lblk_end to
* lblk_start might be more efficient (i.e., we will likely hit the delalloc'ed
* block sooner). This is useful when blocks are truncated sequentially from
* lblk_start towards lblk_end.
*/ */
static int ext4_find_delalloc_range(struct inode *inode, static int ext4_find_delalloc_range(struct inode *inode,
ext4_lblk_t lblk_start, ext4_lblk_t lblk_start,
ext4_lblk_t lblk_end, ext4_lblk_t lblk_end)
int search_hint_reverse)
{ {
struct address_space *mapping = inode->i_mapping; struct extent_status es;
struct buffer_head *head, *bh = NULL;
struct page *page;
ext4_lblk_t i, pg_lblk;
pgoff_t index;
if (!test_opt(inode->i_sb, DELALLOC))
return 0;
/* reverse search wont work if fs block size is less than page size */
if (inode->i_blkbits < PAGE_CACHE_SHIFT)
search_hint_reverse = 0;
if (search_hint_reverse)
i = lblk_end;
else
i = lblk_start;
index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
while ((i >= lblk_start) && (i <= lblk_end)) {
page = find_get_page(mapping, index);
if (!page)
goto nextpage;
if (!page_has_buffers(page))
goto nextpage;
head = page_buffers(page);
if (!head)
goto nextpage;
bh = head;
pg_lblk = index << (PAGE_CACHE_SHIFT -
inode->i_blkbits);
do {
if (unlikely(pg_lblk < lblk_start)) {
/*
* This is possible when fs block size is less
* than page size and our cluster starts/ends in
* middle of the page. So we need to skip the
* initial few blocks till we reach the 'lblk'
*/
pg_lblk++;
continue;
}
/* Check if the buffer is delayed allocated and that it es.start = lblk_start;
* is not yet mapped. (when da-buffers are mapped during ext4_es_find_extent(inode, &es);
* their writeout, their da_mapped bit is set.) if (es.len == 0)
*/ return 0; /* there is no delay extent in this tree */
if (buffer_delay(bh) && !buffer_da_mapped(bh)) { else if (es.start <= lblk_start && lblk_start < es.start + es.len)
page_cache_release(page); return 1;
trace_ext4_find_delalloc_range(inode, else if (lblk_start <= es.start && es.start <= lblk_end)
lblk_start, lblk_end,
search_hint_reverse,
1, i);
return 1; return 1;
}
if (search_hint_reverse)
i--;
else
i++;
} while ((i >= lblk_start) && (i <= lblk_end) &&
((bh = bh->b_this_page) != head));
nextpage:
if (page)
page_cache_release(page);
/*
* Move to next page. 'i' will be the first lblk in the next
* page.
*/
if (search_hint_reverse)
index--;
else else
index++;
i = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
}
trace_ext4_find_delalloc_range(inode, lblk_start, lblk_end,
search_hint_reverse, 0, 0);
return 0; return 0;
} }
int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk, int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk)
int search_hint_reverse)
{ {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_lblk_t lblk_start, lblk_end; ext4_lblk_t lblk_start, lblk_end;
lblk_start = lblk & (~(sbi->s_cluster_ratio - 1)); lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
lblk_end = lblk_start + sbi->s_cluster_ratio - 1; lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
return ext4_find_delalloc_range(inode, lblk_start, lblk_end, return ext4_find_delalloc_range(inode, lblk_start, lblk_end);
search_hint_reverse);
} }
/** /**
...@@ -3630,7 +3549,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, ...@@ -3630,7 +3549,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1)); lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
lblk_to = lblk_from + c_offset - 1; lblk_to = lblk_from + c_offset - 1;
if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0)) if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
allocated_clusters--; allocated_clusters--;
} }
...@@ -3640,7 +3559,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, ...@@ -3640,7 +3559,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
lblk_from = lblk_start + num_blks; lblk_from = lblk_start + num_blks;
lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1; lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
if (ext4_find_delalloc_range(inode, lblk_from, lblk_to, 0)) if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
allocated_clusters--; allocated_clusters--;
} }
...@@ -3927,7 +3846,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -3927,7 +3846,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) {
if (!newex.ee_start_lo && !newex.ee_start_hi) { if (!newex.ee_start_lo && !newex.ee_start_hi) {
if ((sbi->s_cluster_ratio > 1) && if ((sbi->s_cluster_ratio > 1) &&
ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) ext4_find_delalloc_cluster(inode, map->m_lblk))
map->m_flags |= EXT4_MAP_FROM_CLUSTER; map->m_flags |= EXT4_MAP_FROM_CLUSTER;
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) { if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
...@@ -4015,7 +3934,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -4015,7 +3934,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
} }
if ((sbi->s_cluster_ratio > 1) && if ((sbi->s_cluster_ratio > 1) &&
ext4_find_delalloc_cluster(inode, map->m_lblk, 0)) ext4_find_delalloc_cluster(inode, map->m_lblk))
map->m_flags |= EXT4_MAP_FROM_CLUSTER; map->m_flags |= EXT4_MAP_FROM_CLUSTER;
/* /*
......
...@@ -483,49 +483,6 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, ...@@ -483,49 +483,6 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
return num; return num;
} }
/*
* Sets the BH_Da_Mapped bit on the buffer heads corresponding to the given map.
*/
static void set_buffers_da_mapped(struct inode *inode,
struct ext4_map_blocks *map)
{
struct address_space *mapping = inode->i_mapping;
struct pagevec pvec;
int i, nr_pages;
pgoff_t index, end;
index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
end = (map->m_lblk + map->m_len - 1) >>
(PAGE_CACHE_SHIFT - inode->i_blkbits);
pagevec_init(&pvec, 0);
while (index <= end) {
nr_pages = pagevec_lookup(&pvec, mapping, index,
min(end - index + 1,
(pgoff_t)PAGEVEC_SIZE));
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
struct buffer_head *bh, *head;
if (unlikely(page->mapping != mapping) ||
!PageDirty(page))
break;
if (page_has_buffers(page)) {
bh = head = page_buffers(page);
do {
set_buffer_da_mapped(bh);
bh = bh->b_this_page;
} while (bh != head);
}
index++;
}
pagevec_release(&pvec);
}
}
/* /*
* The ext4_map_blocks() function tries to look up the requested blocks, * The ext4_map_blocks() function tries to look up the requested blocks,
* and returns if the blocks are already mapped. * and returns if the blocks are already mapped.
...@@ -661,13 +618,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -661,13 +618,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) { if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) {
ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
/* If we have successfully mapped the delayed allocated blocks,
* set the BH_Da_Mapped bit on them. Its important to do this
* under the protection of i_data_sem.
*/
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
int ret; int ret;
set_buffers_da_mapped(inode, map);
delayed_mapped: delayed_mapped:
/* delayed allocation blocks has been allocated */ /* delayed allocation blocks has been allocated */
ret = ext4_es_remove_extent(inode, map->m_lblk, ret = ext4_es_remove_extent(inode, map->m_lblk,
...@@ -1330,7 +1282,6 @@ static void ext4_da_page_release_reservation(struct page *page, ...@@ -1330,7 +1282,6 @@ static void ext4_da_page_release_reservation(struct page *page,
if ((offset <= curr_off) && (buffer_delay(bh))) { if ((offset <= curr_off) && (buffer_delay(bh))) {
to_release++; to_release++;
clear_buffer_delay(bh); clear_buffer_delay(bh);
clear_buffer_da_mapped(bh);
} }
curr_off = next_off; curr_off = next_off;
} while ((bh = bh->b_this_page) != head); } while ((bh = bh->b_this_page) != head);
...@@ -1347,7 +1298,7 @@ static void ext4_da_page_release_reservation(struct page *page, ...@@ -1347,7 +1298,7 @@ static void ext4_da_page_release_reservation(struct page *page,
lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) + lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
((num_clusters - 1) << sbi->s_cluster_bits); ((num_clusters - 1) << sbi->s_cluster_bits);
if (sbi->s_cluster_ratio == 1 || if (sbi->s_cluster_ratio == 1 ||
!ext4_find_delalloc_cluster(inode, lblk, 1)) !ext4_find_delalloc_cluster(inode, lblk))
ext4_da_release_space(inode, 1); ext4_da_release_space(inode, 1);
num_clusters--; num_clusters--;
...@@ -1453,8 +1404,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, ...@@ -1453,8 +1404,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
clear_buffer_delay(bh); clear_buffer_delay(bh);
bh->b_blocknr = pblock; bh->b_blocknr = pblock;
} }
if (buffer_da_mapped(bh))
clear_buffer_da_mapped(bh);
if (buffer_unwritten(bh) || if (buffer_unwritten(bh) ||
buffer_mapped(bh)) buffer_mapped(bh))
BUG_ON(bh->b_blocknr != pblock); BUG_ON(bh->b_blocknr != pblock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment