Commit d3c92626 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linue' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 fixes from Ted Ts'o:
 "Fix a number of regression and other bugs in ext4, most of which were
  relatively obscure cornercases or races that were found using
  regression tests."

* tag 'ext4_for_linue' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (21 commits)
  ext4: fix data=journal fast mount/umount hang
  ext4: fix ext4_evict_inode() racing against workqueue processing code
  ext4: fix memory leakage in mext_check_coverage
  ext4: use s_extent_max_zeroout_kb value as number of kb
  ext4: use atomic64_t for the per-flexbg free_clusters count
  jbd2: fix use after free in jbd2_journal_dirty_metadata()
  ext4: reserve metadata block for every delayed write
  ext4: update reserved space after the 'correction'
  ext4: do not use yield()
  ext4: remove unused variable in ext4_free_blocks()
  ext4: fix WARN_ON from ext4_releasepage()
  ext4: fix the wrong number of the allocated blocks in ext4_split_extent()
  ext4: update extent status tree after an extent is zeroed out
  ext4: fix wrong m_len value after unwritten extent conversion
  ext4: add self-testing infrastructure to do a sanity check
  ext4: avoid a potential overflow in ext4_es_can_be_merged()
  ext4: invalidate extent status tree during extent migration
  ext4: remove unnecessary wait for extent conversion in ext4_fallocate()
  ext4: add warning to ext4_convert_unwritten_extents_endio
  ext4: disable merging of uninitialized extents
  ...
parents 0a7e4531 2b405bfa
...@@ -335,9 +335,9 @@ struct ext4_group_desc ...@@ -335,9 +335,9 @@ struct ext4_group_desc
*/ */
struct flex_groups { struct flex_groups {
atomic_t free_inodes; atomic64_t free_clusters;
atomic_t free_clusters; atomic_t free_inodes;
atomic_t used_dirs; atomic_t used_dirs;
}; };
#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ #define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
...@@ -2617,7 +2617,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, ...@@ -2617,7 +2617,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
extern int __init ext4_init_pageio(void); extern int __init ext4_init_pageio(void);
extern void ext4_add_complete_io(ext4_io_end_t *io_end); extern void ext4_add_complete_io(ext4_io_end_t *io_end);
extern void ext4_exit_pageio(void); extern void ext4_exit_pageio(void);
extern void ext4_ioend_wait(struct inode *); extern void ext4_ioend_shutdown(struct inode *);
extern void ext4_free_io_end(ext4_io_end_t *io); extern void ext4_free_io_end(ext4_io_end_t *io);
extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
extern void ext4_end_io_work(struct work_struct *work); extern void ext4_end_io_work(struct work_struct *work);
......
...@@ -1584,10 +1584,12 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, ...@@ -1584,10 +1584,12 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
unsigned short ext1_ee_len, ext2_ee_len, max_len; unsigned short ext1_ee_len, ext2_ee_len, max_len;
/* /*
* Make sure that either both extents are uninitialized, or * Make sure that both extents are initialized. We don't merge
* both are _not_. * uninitialized extents so that we can be sure that end_io code has
* the extent that was written properly split out and conversion to
* initialized is trivial.
*/ */
if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2))
return 0; return 0;
if (ext4_ext_is_uninitialized(ex1)) if (ext4_ext_is_uninitialized(ex1))
...@@ -2923,7 +2925,7 @@ static int ext4_split_extent_at(handle_t *handle, ...@@ -2923,7 +2925,7 @@ static int ext4_split_extent_at(handle_t *handle,
{ {
ext4_fsblk_t newblock; ext4_fsblk_t newblock;
ext4_lblk_t ee_block; ext4_lblk_t ee_block;
struct ext4_extent *ex, newex, orig_ex; struct ext4_extent *ex, newex, orig_ex, zero_ex;
struct ext4_extent *ex2 = NULL; struct ext4_extent *ex2 = NULL;
unsigned int ee_len, depth; unsigned int ee_len, depth;
int err = 0; int err = 0;
...@@ -2943,6 +2945,10 @@ static int ext4_split_extent_at(handle_t *handle, ...@@ -2943,6 +2945,10 @@ static int ext4_split_extent_at(handle_t *handle,
newblock = split - ee_block + ext4_ext_pblock(ex); newblock = split - ee_block + ext4_ext_pblock(ex);
BUG_ON(split < ee_block || split >= (ee_block + ee_len)); BUG_ON(split < ee_block || split >= (ee_block + ee_len));
BUG_ON(!ext4_ext_is_uninitialized(ex) &&
split_flag & (EXT4_EXT_MAY_ZEROOUT |
EXT4_EXT_MARK_UNINIT1 |
EXT4_EXT_MARK_UNINIT2));
err = ext4_ext_get_access(handle, inode, path + depth); err = ext4_ext_get_access(handle, inode, path + depth);
if (err) if (err)
...@@ -2990,12 +2996,26 @@ static int ext4_split_extent_at(handle_t *handle, ...@@ -2990,12 +2996,26 @@ static int ext4_split_extent_at(handle_t *handle,
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
if (split_flag & EXT4_EXT_DATA_VALID1) if (split_flag & EXT4_EXT_DATA_VALID1) {
err = ext4_ext_zeroout(inode, ex2); err = ext4_ext_zeroout(inode, ex2);
else zero_ex.ee_block = ex2->ee_block;
zero_ex.ee_len = ext4_ext_get_actual_len(ex2);
ext4_ext_store_pblock(&zero_ex,
ext4_ext_pblock(ex2));
} else {
err = ext4_ext_zeroout(inode, ex); err = ext4_ext_zeroout(inode, ex);
} else zero_ex.ee_block = ex->ee_block;
zero_ex.ee_len = ext4_ext_get_actual_len(ex);
ext4_ext_store_pblock(&zero_ex,
ext4_ext_pblock(ex));
}
} else {
err = ext4_ext_zeroout(inode, &orig_ex); err = ext4_ext_zeroout(inode, &orig_ex);
zero_ex.ee_block = orig_ex.ee_block;
zero_ex.ee_len = ext4_ext_get_actual_len(&orig_ex);
ext4_ext_store_pblock(&zero_ex,
ext4_ext_pblock(&orig_ex));
}
if (err) if (err)
goto fix_extent_len; goto fix_extent_len;
...@@ -3003,6 +3023,12 @@ static int ext4_split_extent_at(handle_t *handle, ...@@ -3003,6 +3023,12 @@ static int ext4_split_extent_at(handle_t *handle,
ex->ee_len = cpu_to_le16(ee_len); ex->ee_len = cpu_to_le16(ee_len);
ext4_ext_try_to_merge(handle, inode, path, ex); ext4_ext_try_to_merge(handle, inode, path, ex);
err = ext4_ext_dirty(handle, inode, path + path->p_depth); err = ext4_ext_dirty(handle, inode, path + path->p_depth);
if (err)
goto fix_extent_len;
/* update extent status tree */
err = ext4_es_zeroout(inode, &zero_ex);
goto out; goto out;
} else if (err) } else if (err)
goto fix_extent_len; goto fix_extent_len;
...@@ -3041,6 +3067,7 @@ static int ext4_split_extent(handle_t *handle, ...@@ -3041,6 +3067,7 @@ static int ext4_split_extent(handle_t *handle,
int err = 0; int err = 0;
int uninitialized; int uninitialized;
int split_flag1, flags1; int split_flag1, flags1;
int allocated = map->m_len;
depth = ext_depth(inode); depth = ext_depth(inode);
ex = path[depth].p_ext; ex = path[depth].p_ext;
...@@ -3060,20 +3087,29 @@ static int ext4_split_extent(handle_t *handle, ...@@ -3060,20 +3087,29 @@ static int ext4_split_extent(handle_t *handle,
map->m_lblk + map->m_len, split_flag1, flags1); map->m_lblk + map->m_len, split_flag1, flags1);
if (err) if (err)
goto out; goto out;
} else {
allocated = ee_len - (map->m_lblk - ee_block);
} }
/*
* Update path is required because previous ext4_split_extent_at() may
* result in split of original leaf or extent zeroout.
*/
ext4_ext_drop_refs(path); ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode, map->m_lblk, path); path = ext4_ext_find_extent(inode, map->m_lblk, path);
if (IS_ERR(path)) if (IS_ERR(path))
return PTR_ERR(path); return PTR_ERR(path);
depth = ext_depth(inode);
ex = path[depth].p_ext;
uninitialized = ext4_ext_is_uninitialized(ex);
split_flag1 = 0;
if (map->m_lblk >= ee_block) { if (map->m_lblk >= ee_block) {
split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT | split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
EXT4_EXT_DATA_VALID2); if (uninitialized) {
if (uninitialized)
split_flag1 |= EXT4_EXT_MARK_UNINIT1; split_flag1 |= EXT4_EXT_MARK_UNINIT1;
if (split_flag & EXT4_EXT_MARK_UNINIT2) split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
split_flag1 |= EXT4_EXT_MARK_UNINIT2; EXT4_EXT_MARK_UNINIT2);
}
err = ext4_split_extent_at(handle, inode, path, err = ext4_split_extent_at(handle, inode, path,
map->m_lblk, split_flag1, flags); map->m_lblk, split_flag1, flags);
if (err) if (err)
...@@ -3082,7 +3118,7 @@ static int ext4_split_extent(handle_t *handle, ...@@ -3082,7 +3118,7 @@ static int ext4_split_extent(handle_t *handle,
ext4_ext_show_leaf(inode, path); ext4_ext_show_leaf(inode, path);
out: out:
return err ? err : map->m_len; return err ? err : allocated;
} }
/* /*
...@@ -3137,6 +3173,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -3137,6 +3173,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ee_block = le32_to_cpu(ex->ee_block); ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex); ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (map->m_lblk - ee_block); allocated = ee_len - (map->m_lblk - ee_block);
zero_ex.ee_len = 0;
trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
...@@ -3227,13 +3264,16 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -3227,13 +3264,16 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
if (EXT4_EXT_MAY_ZEROOUT & split_flag) if (EXT4_EXT_MAY_ZEROOUT & split_flag)
max_zeroout = sbi->s_extent_max_zeroout_kb >> max_zeroout = sbi->s_extent_max_zeroout_kb >>
inode->i_sb->s_blocksize_bits; (inode->i_sb->s_blocksize_bits - 10);
/* If extent is less than s_max_zeroout_kb, zeroout directly */ /* If extent is less than s_max_zeroout_kb, zeroout directly */
if (max_zeroout && (ee_len <= max_zeroout)) { if (max_zeroout && (ee_len <= max_zeroout)) {
err = ext4_ext_zeroout(inode, ex); err = ext4_ext_zeroout(inode, ex);
if (err) if (err)
goto out; goto out;
zero_ex.ee_block = ex->ee_block;
zero_ex.ee_len = ext4_ext_get_actual_len(ex);
ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex));
err = ext4_ext_get_access(handle, inode, path + depth); err = ext4_ext_get_access(handle, inode, path + depth);
if (err) if (err)
...@@ -3292,6 +3332,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -3292,6 +3332,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
err = allocated; err = allocated;
out: out:
/* If we have gotten a failure, don't zero out status tree */
if (!err)
err = ext4_es_zeroout(inode, &zero_ex);
return err ? err : allocated; return err ? err : allocated;
} }
...@@ -3374,8 +3417,19 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, ...@@ -3374,8 +3417,19 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
"block %llu, max_blocks %u\n", inode->i_ino, "block %llu, max_blocks %u\n", inode->i_ino,
(unsigned long long)ee_block, ee_len); (unsigned long long)ee_block, ee_len);
/* If extent is larger than requested then split is required */ /* If extent is larger than requested it is a clear sign that we still
* have some extent state machine issues left. So extent_split is still
* required.
* TODO: Once all related issues will be fixed this situation should be
* illegal.
*/
if (ee_block != map->m_lblk || ee_len > map->m_len) { if (ee_block != map->m_lblk || ee_len > map->m_len) {
#ifdef EXT4_DEBUG
ext4_warning("Inode (%ld) finished: extent logical block %llu,"
" len %u; IO logical block %llu, len %u\n",
inode->i_ino, (unsigned long long)ee_block, ee_len,
(unsigned long long)map->m_lblk, map->m_len);
#endif
err = ext4_split_unwritten_extents(handle, inode, map, path, err = ext4_split_unwritten_extents(handle, inode, map, path,
EXT4_GET_BLOCKS_CONVERT); EXT4_GET_BLOCKS_CONVERT);
if (err < 0) if (err < 0)
...@@ -3626,6 +3680,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ...@@ -3626,6 +3680,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
path, map->m_len); path, map->m_len);
} else } else
err = ret; err = ret;
map->m_flags |= EXT4_MAP_MAPPED;
if (allocated > map->m_len)
allocated = map->m_len;
map->m_len = allocated;
goto out2; goto out2;
} }
/* buffered IO case */ /* buffered IO case */
...@@ -3675,6 +3733,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ...@@ -3675,6 +3733,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
allocated - map->m_len); allocated - map->m_len);
allocated = map->m_len; allocated = map->m_len;
} }
map->m_len = allocated;
/* /*
* If we have done fallocate with the offset that is already * If we have done fallocate with the offset that is already
...@@ -4106,9 +4165,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -4106,9 +4165,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
} }
} else { } else {
BUG_ON(allocated_clusters < reserved_clusters); BUG_ON(allocated_clusters < reserved_clusters);
/* We will claim quota for all newly allocated blocks.*/
ext4_da_update_reserve_space(inode, allocated_clusters,
1);
if (reserved_clusters < allocated_clusters) { if (reserved_clusters < allocated_clusters) {
struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_inode_info *ei = EXT4_I(inode);
int reservation = allocated_clusters - int reservation = allocated_clusters -
...@@ -4159,6 +4215,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, ...@@ -4159,6 +4215,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
ei->i_reserved_data_blocks += reservation; ei->i_reserved_data_blocks += reservation;
spin_unlock(&ei->i_block_reservation_lock); spin_unlock(&ei->i_block_reservation_lock);
} }
/*
* We will claim quota for all newly allocated blocks.
* We're updating the reserved space *after* the
* correction above so we do not accidentally free
* all the metadata reservation because we might
* actually need it later on.
*/
ext4_da_update_reserve_space(inode, allocated_clusters,
1);
} }
} }
...@@ -4368,8 +4433,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) ...@@ -4368,8 +4433,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (len <= EXT_UNINIT_MAX_LEN << blkbits) if (len <= EXT_UNINIT_MAX_LEN << blkbits)
flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
/* Prevent race condition between unwritten */
ext4_flush_unwritten_io(inode);
retry: retry:
while (ret >= 0 && ret < max_blocks) { while (ret >= 0 && ret < max_blocks) {
map.m_lblk = map.m_lblk + ret; map.m_lblk = map.m_lblk + ret;
......
...@@ -333,17 +333,27 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) ...@@ -333,17 +333,27 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
static int ext4_es_can_be_merged(struct extent_status *es1, static int ext4_es_can_be_merged(struct extent_status *es1,
struct extent_status *es2) struct extent_status *es2)
{ {
if (es1->es_lblk + es1->es_len != es2->es_lblk) if (ext4_es_status(es1) != ext4_es_status(es2))
return 0; return 0;
if (ext4_es_status(es1) != ext4_es_status(es2)) if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL)
return 0; return 0;
if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) && if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
(ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2)))
return 0; return 0;
return 1; if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
(ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2)))
return 1;
if (ext4_es_is_hole(es1))
return 1;
/* we need to check delayed extent is without unwritten status */
if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
return 1;
return 0;
} }
static struct extent_status * static struct extent_status *
...@@ -389,6 +399,179 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es) ...@@ -389,6 +399,179 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
return es; return es;
} }
#ifdef ES_AGGRESSIVE_TEST
static void ext4_es_insert_extent_ext_check(struct inode *inode,
struct extent_status *es)
{
struct ext4_ext_path *path = NULL;
struct ext4_extent *ex;
ext4_lblk_t ee_block;
ext4_fsblk_t ee_start;
unsigned short ee_len;
int depth, ee_status, es_status;
path = ext4_ext_find_extent(inode, es->es_lblk, NULL);
if (IS_ERR(path))
return;
depth = ext_depth(inode);
ex = path[depth].p_ext;
if (ex) {
ee_block = le32_to_cpu(ex->ee_block);
ee_start = ext4_ext_pblock(ex);
ee_len = ext4_ext_get_actual_len(ex);
ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0;
es_status = ext4_es_is_unwritten(es) ? 1 : 0;
/*
* Make sure ex and es are not overlap when we try to insert
* a delayed/hole extent.
*/
if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
if (in_range(es->es_lblk, ee_block, ee_len)) {
pr_warn("ES insert assertation failed for "
"inode: %lu we can find an extent "
"at block [%d/%d/%llu/%c], but we "
"want to add an delayed/hole extent "
"[%d/%d/%llu/%llx]\n",
inode->i_ino, ee_block, ee_len,
ee_start, ee_status ? 'u' : 'w',
es->es_lblk, es->es_len,
ext4_es_pblock(es), ext4_es_status(es));
}
goto out;
}
/*
* We don't check ee_block == es->es_lblk, etc. because es
* might be a part of whole extent, vice versa.
*/
if (es->es_lblk < ee_block ||
ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
pr_warn("ES insert assertation failed for inode: %lu "
"ex_status [%d/%d/%llu/%c] != "
"es_status [%d/%d/%llu/%c]\n", inode->i_ino,
ee_block, ee_len, ee_start,
ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
ext4_es_pblock(es), es_status ? 'u' : 'w');
goto out;
}
if (ee_status ^ es_status) {
pr_warn("ES insert assertation failed for inode: %lu "
"ex_status [%d/%d/%llu/%c] != "
"es_status [%d/%d/%llu/%c]\n", inode->i_ino,
ee_block, ee_len, ee_start,
ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
ext4_es_pblock(es), es_status ? 'u' : 'w');
}
} else {
/*
* We can't find an extent on disk. So we need to make sure
* that we don't want to add an written/unwritten extent.
*/
if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
pr_warn("ES insert assertation failed for inode: %lu "
"can't find an extent at block %d but we want "
"to add an written/unwritten extent "
"[%d/%d/%llu/%llx]\n", inode->i_ino,
es->es_lblk, es->es_lblk, es->es_len,
ext4_es_pblock(es), ext4_es_status(es));
}
}
out:
if (path) {
ext4_ext_drop_refs(path);
kfree(path);
}
}
static void ext4_es_insert_extent_ind_check(struct inode *inode,
struct extent_status *es)
{
struct ext4_map_blocks map;
int retval;
/*
* Here we call ext4_ind_map_blocks to lookup a block mapping because
* 'Indirect' structure is defined in indirect.c. So we couldn't
* access direct/indirect tree from outside. It is too dirty to define
* this function in indirect.c file.
*/
map.m_lblk = es->es_lblk;
map.m_len = es->es_len;
retval = ext4_ind_map_blocks(NULL, inode, &map, 0);
if (retval > 0) {
if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) {
/*
* We want to add a delayed/hole extent but this
* block has been allocated.
*/
pr_warn("ES insert assertation failed for inode: %lu "
"We can find blocks but we want to add a "
"delayed/hole extent [%d/%d/%llu/%llx]\n",
inode->i_ino, es->es_lblk, es->es_len,
ext4_es_pblock(es), ext4_es_status(es));
return;
} else if (ext4_es_is_written(es)) {
if (retval != es->es_len) {
pr_warn("ES insert assertation failed for "
"inode: %lu retval %d != es_len %d\n",
inode->i_ino, retval, es->es_len);
return;
}
if (map.m_pblk != ext4_es_pblock(es)) {
pr_warn("ES insert assertation failed for "
"inode: %lu m_pblk %llu != "
"es_pblk %llu\n",
inode->i_ino, map.m_pblk,
ext4_es_pblock(es));
return;
}
} else {
/*
* We don't need to check unwritten extent because
* indirect-based file doesn't have it.
*/
BUG_ON(1);
}
} else if (retval == 0) {
if (ext4_es_is_written(es)) {
pr_warn("ES insert assertation failed for inode: %lu "
"We can't find the block but we want to add "
"an written extent [%d/%d/%llu/%llx]\n",
inode->i_ino, es->es_lblk, es->es_len,
ext4_es_pblock(es), ext4_es_status(es));
return;
}
}
}
static inline void ext4_es_insert_extent_check(struct inode *inode,
struct extent_status *es)
{
/*
* We don't need to worry about the race condition because
* caller takes i_data_sem locking.
*/
BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
ext4_es_insert_extent_ext_check(inode, es);
else
ext4_es_insert_extent_ind_check(inode, es);
}
#else
static inline void ext4_es_insert_extent_check(struct inode *inode,
struct extent_status *es)
{
}
#endif
static int __es_insert_extent(struct inode *inode, struct extent_status *newes) static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
{ {
struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
...@@ -471,6 +654,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, ...@@ -471,6 +654,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_es_store_status(&newes, status); ext4_es_store_status(&newes, status);
trace_ext4_es_insert_extent(inode, &newes); trace_ext4_es_insert_extent(inode, &newes);
ext4_es_insert_extent_check(inode, &newes);
write_lock(&EXT4_I(inode)->i_es_lock); write_lock(&EXT4_I(inode)->i_es_lock);
err = __es_remove_extent(inode, lblk, end); err = __es_remove_extent(inode, lblk, end);
if (err != 0) if (err != 0)
...@@ -669,6 +854,23 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, ...@@ -669,6 +854,23 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
return err; return err;
} }
int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex)
{
ext4_lblk_t ee_block;
ext4_fsblk_t ee_pblock;
unsigned int ee_len;
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
ee_pblock = ext4_ext_pblock(ex);
if (ee_len == 0)
return 0;
return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
EXTENT_STATUS_WRITTEN);
}
static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
{ {
struct ext4_sb_info *sbi = container_of(shrink, struct ext4_sb_info *sbi = container_of(shrink,
......
...@@ -20,6 +20,12 @@ ...@@ -20,6 +20,12 @@
#define es_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #define es_debug(fmt, ...) no_printk(fmt, ##__VA_ARGS__)
#endif #endif
/*
* With ES_AGGRESSIVE_TEST defined, the result of es caching will be
* checked with old map_block's result.
*/
#define ES_AGGRESSIVE_TEST__
/* /*
* These flags live in the high bits of extent_status.es_pblk * These flags live in the high bits of extent_status.es_pblk
*/ */
...@@ -33,6 +39,8 @@ ...@@ -33,6 +39,8 @@
EXTENT_STATUS_DELAYED | \ EXTENT_STATUS_DELAYED | \
EXTENT_STATUS_HOLE) EXTENT_STATUS_HOLE)
struct ext4_extent;
struct extent_status { struct extent_status {
struct rb_node rb_node; struct rb_node rb_node;
ext4_lblk_t es_lblk; /* first logical block extent covers */ ext4_lblk_t es_lblk; /* first logical block extent covers */
...@@ -58,6 +66,7 @@ extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk, ...@@ -58,6 +66,7 @@ extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status *es); struct extent_status *es);
extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
struct extent_status *es); struct extent_status *es);
extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex);
static inline int ext4_es_is_written(struct extent_status *es) static inline int ext4_es_is_written(struct extent_status *es)
{ {
......
...@@ -324,8 +324,8 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) ...@@ -324,8 +324,8 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
} }
struct orlov_stats { struct orlov_stats {
__u64 free_clusters;
__u32 free_inodes; __u32 free_inodes;
__u32 free_clusters;
__u32 used_dirs; __u32 used_dirs;
}; };
...@@ -342,7 +342,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g, ...@@ -342,7 +342,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
if (flex_size > 1) { if (flex_size > 1) {
stats->free_inodes = atomic_read(&flex_group[g].free_inodes); stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
stats->free_clusters = atomic_read(&flex_group[g].free_clusters); stats->free_clusters = atomic64_read(&flex_group[g].free_clusters);
stats->used_dirs = atomic_read(&flex_group[g].used_dirs); stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
return; return;
} }
......
...@@ -185,8 +185,6 @@ void ext4_evict_inode(struct inode *inode) ...@@ -185,8 +185,6 @@ void ext4_evict_inode(struct inode *inode)
trace_ext4_evict_inode(inode); trace_ext4_evict_inode(inode);
ext4_ioend_wait(inode);
if (inode->i_nlink) { if (inode->i_nlink) {
/* /*
* When journalling data dirty buffers are tracked only in the * When journalling data dirty buffers are tracked only in the
...@@ -207,7 +205,8 @@ void ext4_evict_inode(struct inode *inode) ...@@ -207,7 +205,8 @@ void ext4_evict_inode(struct inode *inode)
* don't use page cache. * don't use page cache.
*/ */
if (ext4_should_journal_data(inode) && if (ext4_should_journal_data(inode) &&
(S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
inode->i_ino != EXT4_JOURNAL_INO) {
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
...@@ -216,6 +215,7 @@ void ext4_evict_inode(struct inode *inode) ...@@ -216,6 +215,7 @@ void ext4_evict_inode(struct inode *inode)
filemap_write_and_wait(&inode->i_data); filemap_write_and_wait(&inode->i_data);
} }
truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(&inode->i_data, 0);
ext4_ioend_shutdown(inode);
goto no_delete; goto no_delete;
} }
...@@ -225,6 +225,7 @@ void ext4_evict_inode(struct inode *inode) ...@@ -225,6 +225,7 @@ void ext4_evict_inode(struct inode *inode)
if (ext4_should_order_data(inode)) if (ext4_should_order_data(inode))
ext4_begin_ordered_truncate(inode, 0); ext4_begin_ordered_truncate(inode, 0);
truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(&inode->i_data, 0);
ext4_ioend_shutdown(inode);
if (is_bad_inode(inode)) if (is_bad_inode(inode))
goto no_delete; goto no_delete;
...@@ -482,6 +483,58 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, ...@@ -482,6 +483,58 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
return num; return num;
} }
#ifdef ES_AGGRESSIVE_TEST
static void ext4_map_blocks_es_recheck(handle_t *handle,
struct inode *inode,
struct ext4_map_blocks *es_map,
struct ext4_map_blocks *map,
int flags)
{
int retval;
map->m_flags = 0;
/*
* There is a race window that the result is not the same.
* e.g. xfstests #223 when dioread_nolock enables. The reason
* is that we lookup a block mapping in extent status tree with
* out taking i_data_sem. So at the time the unwritten extent
* could be converted.
*/
if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
down_read((&EXT4_I(inode)->i_data_sem));
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
retval = ext4_ext_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
} else {
retval = ext4_ind_map_blocks(handle, inode, map, flags &
EXT4_GET_BLOCKS_KEEP_SIZE);
}
if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
up_read((&EXT4_I(inode)->i_data_sem));
/*
* Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag
* because it shouldn't be marked in es_map->m_flags.
*/
map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY);
/*
* We don't check m_len because extent will be collpased in status
* tree. So the m_len might not equal.
*/
if (es_map->m_lblk != map->m_lblk ||
es_map->m_flags != map->m_flags ||
es_map->m_pblk != map->m_pblk) {
printk("ES cache assertation failed for inode: %lu "
"es_cached ex [%d/%d/%llu/%x] != "
"found ex [%d/%d/%llu/%x] retval %d flags %x\n",
inode->i_ino, es_map->m_lblk, es_map->m_len,
es_map->m_pblk, es_map->m_flags, map->m_lblk,
map->m_len, map->m_pblk, map->m_flags,
retval, flags);
}
}
#endif /* ES_AGGRESSIVE_TEST */
/* /*
* The ext4_map_blocks() function tries to look up the requested blocks, * The ext4_map_blocks() function tries to look up the requested blocks,
* and returns if the blocks are already mapped. * and returns if the blocks are already mapped.
...@@ -509,6 +562,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -509,6 +562,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
{ {
struct extent_status es; struct extent_status es;
int retval; int retval;
#ifdef ES_AGGRESSIVE_TEST
struct ext4_map_blocks orig_map;
memcpy(&orig_map, map, sizeof(*map));
#endif
map->m_flags = 0; map->m_flags = 0;
ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
...@@ -531,6 +589,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -531,6 +589,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
} else { } else {
BUG_ON(1); BUG_ON(1);
} }
#ifdef ES_AGGRESSIVE_TEST
ext4_map_blocks_es_recheck(handle, inode, map,
&orig_map, flags);
#endif
goto found; goto found;
} }
...@@ -551,6 +613,15 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -551,6 +613,15 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
int ret; int ret;
unsigned long long status; unsigned long long status;
#ifdef ES_AGGRESSIVE_TEST
if (retval != map->m_len) {
printk("ES len assertation failed for inode: %lu "
"retval %d != map->m_len %d "
"in %s (lookup)\n", inode->i_ino, retval,
map->m_len, __func__);
}
#endif
status = map->m_flags & EXT4_MAP_UNWRITTEN ? status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
...@@ -643,6 +714,24 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -643,6 +714,24 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
int ret; int ret;
unsigned long long status; unsigned long long status;
#ifdef ES_AGGRESSIVE_TEST
if (retval != map->m_len) {
printk("ES len assertation failed for inode: %lu "
"retval %d != map->m_len %d "
"in %s (allocation)\n", inode->i_ino, retval,
map->m_len, __func__);
}
#endif
/*
* If the extent has been zeroed out, we don't need to update
* extent status tree.
*/
if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
if (ext4_es_is_written(&es))
goto has_zeroout;
}
status = map->m_flags & EXT4_MAP_UNWRITTEN ? status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
...@@ -655,6 +744,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, ...@@ -655,6 +744,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
retval = ret; retval = ret;
} }
has_zeroout:
up_write((&EXT4_I(inode)->i_data_sem)); up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
int ret = check_block_validity(inode, map); int ret = check_block_validity(inode, map);
...@@ -1215,6 +1305,55 @@ static int ext4_journalled_write_end(struct file *file, ...@@ -1215,6 +1305,55 @@ static int ext4_journalled_write_end(struct file *file,
return ret ? ret : copied; return ret ? ret : copied;
} }
/*
* Reserve a metadata for a single block located at lblock
*/
static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
{
int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
ext4_lblk_t save_last_lblock;
int save_len;
/*
* recalculate the amount of metadata blocks to reserve
* in order to allocate nrblocks
* worse case is one extent per block
*/
repeat:
spin_lock(&ei->i_block_reservation_lock);
/*
* ext4_calc_metadata_amount() has side effects, which we have
* to be prepared undo if we fail to claim space.
*/
save_len = ei->i_da_metadata_calc_len;
save_last_lblock = ei->i_da_metadata_calc_last_lblock;
md_needed = EXT4_NUM_B2C(sbi,
ext4_calc_metadata_amount(inode, lblock));
trace_ext4_da_reserve_space(inode, md_needed);
/*
* We do still charge estimated metadata to the sb though;
* we cannot afford to run out of free blocks.
*/
if (ext4_claim_free_clusters(sbi, md_needed, 0)) {
ei->i_da_metadata_calc_len = save_len;
ei->i_da_metadata_calc_last_lblock = save_last_lblock;
spin_unlock(&ei->i_block_reservation_lock);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
cond_resched();
goto repeat;
}
return -ENOSPC;
}
ei->i_reserved_meta_blocks += md_needed;
spin_unlock(&ei->i_block_reservation_lock);
return 0; /* success */
}
/* /*
* Reserve a single cluster located at lblock * Reserve a single cluster located at lblock
*/ */
...@@ -1263,7 +1402,7 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) ...@@ -1263,7 +1402,7 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
ei->i_da_metadata_calc_last_lblock = save_last_lblock; ei->i_da_metadata_calc_last_lblock = save_last_lblock;
spin_unlock(&ei->i_block_reservation_lock); spin_unlock(&ei->i_block_reservation_lock);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) { if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
yield(); cond_resched();
goto repeat; goto repeat;
} }
dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
...@@ -1768,6 +1907,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, ...@@ -1768,6 +1907,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
struct extent_status es; struct extent_status es;
int retval; int retval;
sector_t invalid_block = ~((sector_t) 0xffff); sector_t invalid_block = ~((sector_t) 0xffff);
#ifdef ES_AGGRESSIVE_TEST
struct ext4_map_blocks orig_map;
memcpy(&orig_map, map, sizeof(*map));
#endif
if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
invalid_block = ~0; invalid_block = ~0;
...@@ -1809,6 +1953,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, ...@@ -1809,6 +1953,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
else else
BUG_ON(1); BUG_ON(1);
#ifdef ES_AGGRESSIVE_TEST
ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
#endif
return retval; return retval;
} }
...@@ -1843,8 +1990,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, ...@@ -1843,8 +1990,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
* XXX: __block_prepare_write() unmaps passed block, * XXX: __block_prepare_write() unmaps passed block,
* is it OK? * is it OK?
*/ */
/* If the block was allocated from previously allocated cluster, /*
* then we dont need to reserve it again. */ * If the block was allocated from previously allocated cluster,
* then we don't need to reserve it again. However we still need
* to reserve metadata for every block we're going to write.
*/
if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
ret = ext4_da_reserve_space(inode, iblock); ret = ext4_da_reserve_space(inode, iblock);
if (ret) { if (ret) {
...@@ -1852,6 +2002,13 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, ...@@ -1852,6 +2002,13 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
retval = ret; retval = ret;
goto out_unlock; goto out_unlock;
} }
} else {
ret = ext4_da_reserve_metadata(inode, iblock);
if (ret) {
/* not enough space to reserve */
retval = ret;
goto out_unlock;
}
} }
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
...@@ -1873,6 +2030,15 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, ...@@ -1873,6 +2030,15 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
int ret; int ret;
unsigned long long status; unsigned long long status;
#ifdef ES_AGGRESSIVE_TEST
if (retval != map->m_len) {
printk("ES len assertation failed for inode: %lu "
"retval %d != map->m_len %d "
"in %s (lookup)\n", inode->i_ino, retval,
map->m_len, __func__);
}
#endif
status = map->m_flags & EXT4_MAP_UNWRITTEN ? status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
...@@ -2908,8 +3074,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait) ...@@ -2908,8 +3074,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
trace_ext4_releasepage(page); trace_ext4_releasepage(page);
WARN_ON(PageChecked(page)); /* Page has dirty journalled data -> cannot release */
if (!page_has_buffers(page)) if (PageChecked(page))
return 0; return 0;
if (journal) if (journal)
return jbd2_journal_try_to_free_buffers(journal, page, wait); return jbd2_journal_try_to_free_buffers(journal, page, wait);
......
...@@ -2804,8 +2804,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, ...@@ -2804,8 +2804,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
if (sbi->s_log_groups_per_flex) { if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, ext4_group_t flex_group = ext4_flex_group(sbi,
ac->ac_b_ex.fe_group); ac->ac_b_ex.fe_group);
atomic_sub(ac->ac_b_ex.fe_len, atomic64_sub(ac->ac_b_ex.fe_len,
&sbi->s_flex_groups[flex_group].free_clusters); &sbi->s_flex_groups[flex_group].free_clusters);
} }
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
...@@ -3692,11 +3692,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, ...@@ -3692,11 +3692,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
if (free < needed && busy) { if (free < needed && busy) {
busy = 0; busy = 0;
ext4_unlock_group(sb, group); ext4_unlock_group(sb, group);
/* cond_resched();
* Yield the CPU here so that we don't get soft lockup
* in non preempt case.
*/
yield();
goto repeat; goto repeat;
} }
...@@ -4246,7 +4242,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, ...@@ -4246,7 +4242,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
ext4_claim_free_clusters(sbi, ar->len, ar->flags)) { ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
/* let others to free the space */ /* let others to free the space */
yield(); cond_resched();
ar->len = ar->len >> 1; ar->len = ar->len >> 1;
} }
if (!ar->len) { if (!ar->len) {
...@@ -4464,7 +4460,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, ...@@ -4464,7 +4460,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *bitmap_bh = NULL; struct buffer_head *bitmap_bh = NULL;
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
struct ext4_group_desc *gdp; struct ext4_group_desc *gdp;
unsigned long freed = 0;
unsigned int overflow; unsigned int overflow;
ext4_grpblk_t bit; ext4_grpblk_t bit;
struct buffer_head *gd_bh; struct buffer_head *gd_bh;
...@@ -4666,14 +4661,12 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, ...@@ -4666,14 +4661,12 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
if (sbi->s_log_groups_per_flex) { if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, block_group); ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
atomic_add(count_clusters, atomic64_add(count_clusters,
&sbi->s_flex_groups[flex_group].free_clusters); &sbi->s_flex_groups[flex_group].free_clusters);
} }
ext4_mb_unload_buddy(&e4b); ext4_mb_unload_buddy(&e4b);
freed += count;
if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
...@@ -4811,8 +4804,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, ...@@ -4811,8 +4804,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
if (sbi->s_log_groups_per_flex) { if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, block_group); ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
atomic_add(EXT4_NUM_B2C(sbi, blocks_freed), atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed),
&sbi->s_flex_groups[flex_group].free_clusters); &sbi->s_flex_groups[flex_group].free_clusters);
} }
ext4_mb_unload_buddy(&e4b); ext4_mb_unload_buddy(&e4b);
......
...@@ -32,16 +32,18 @@ ...@@ -32,16 +32,18 @@
*/ */
static inline int static inline int
get_ext_path(struct inode *inode, ext4_lblk_t lblock, get_ext_path(struct inode *inode, ext4_lblk_t lblock,
struct ext4_ext_path **path) struct ext4_ext_path **orig_path)
{ {
int ret = 0; int ret = 0;
struct ext4_ext_path *path;
*path = ext4_ext_find_extent(inode, lblock, *path); path = ext4_ext_find_extent(inode, lblock, *orig_path);
if (IS_ERR(*path)) { if (IS_ERR(path))
ret = PTR_ERR(*path); ret = PTR_ERR(path);
*path = NULL; else if (path[ext_depth(inode)].p_ext == NULL)
} else if ((*path)[ext_depth(inode)].p_ext == NULL)
ret = -ENODATA; ret = -ENODATA;
else
*orig_path = path;
return ret; return ret;
} }
...@@ -611,24 +613,25 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, ...@@ -611,24 +613,25 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
{ {
struct ext4_ext_path *path = NULL; struct ext4_ext_path *path = NULL;
struct ext4_extent *ext; struct ext4_extent *ext;
int ret = 0;
ext4_lblk_t last = from + count; ext4_lblk_t last = from + count;
while (from < last) { while (from < last) {
*err = get_ext_path(inode, from, &path); *err = get_ext_path(inode, from, &path);
if (*err) if (*err)
return 0; goto out;
ext = path[ext_depth(inode)].p_ext; ext = path[ext_depth(inode)].p_ext;
if (!ext) { if (uninit != ext4_ext_is_uninitialized(ext))
ext4_ext_drop_refs(path); goto out;
return 0;
}
if (uninit != ext4_ext_is_uninitialized(ext)) {
ext4_ext_drop_refs(path);
return 0;
}
from += ext4_ext_get_actual_len(ext); from += ext4_ext_get_actual_len(ext);
ext4_ext_drop_refs(path); ext4_ext_drop_refs(path);
} }
return 1; ret = 1;
out:
if (path) {
ext4_ext_drop_refs(path);
kfree(path);
}
return ret;
} }
/** /**
...@@ -666,6 +669,14 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, ...@@ -666,6 +669,14 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
int replaced_count = 0; int replaced_count = 0;
int dext_alen; int dext_alen;
*err = ext4_es_remove_extent(orig_inode, from, count);
if (*err)
goto out;
*err = ext4_es_remove_extent(donor_inode, from, count);
if (*err)
goto out;
/* Get the original extent for the block "orig_off" */ /* Get the original extent for the block "orig_off" */
*err = get_ext_path(orig_inode, orig_off, &orig_path); *err = get_ext_path(orig_inode, orig_off, &orig_path);
if (*err) if (*err)
......
...@@ -50,11 +50,21 @@ void ext4_exit_pageio(void) ...@@ -50,11 +50,21 @@ void ext4_exit_pageio(void)
kmem_cache_destroy(io_page_cachep); kmem_cache_destroy(io_page_cachep);
} }
void ext4_ioend_wait(struct inode *inode) /*
* This function is called by ext4_evict_inode() to make sure there is
* no more pending I/O completion work left to do.
*/
void ext4_ioend_shutdown(struct inode *inode)
{ {
wait_queue_head_t *wq = ext4_ioend_wq(inode); wait_queue_head_t *wq = ext4_ioend_wq(inode);
wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
/*
* We need to make sure the work structure is finished being
* used before we let the inode get destroyed.
*/
if (work_pending(&EXT4_I(inode)->i_unwritten_work))
cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
} }
static void put_io_page(struct ext4_io_page *io_page) static void put_io_page(struct ext4_io_page *io_page)
......
...@@ -1360,8 +1360,8 @@ static void ext4_update_super(struct super_block *sb, ...@@ -1360,8 +1360,8 @@ static void ext4_update_super(struct super_block *sb,
sbi->s_log_groups_per_flex) { sbi->s_log_groups_per_flex) {
ext4_group_t flex_group; ext4_group_t flex_group;
flex_group = ext4_flex_group(sbi, group_data[0].group); flex_group = ext4_flex_group(sbi, group_data[0].group);
atomic_add(EXT4_NUM_B2C(sbi, free_blocks), atomic64_add(EXT4_NUM_B2C(sbi, free_blocks),
&sbi->s_flex_groups[flex_group].free_clusters); &sbi->s_flex_groups[flex_group].free_clusters);
atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
&sbi->s_flex_groups[flex_group].free_inodes); &sbi->s_flex_groups[flex_group].free_inodes);
} }
......
...@@ -1927,8 +1927,8 @@ static int ext4_fill_flex_info(struct super_block *sb) ...@@ -1927,8 +1927,8 @@ static int ext4_fill_flex_info(struct super_block *sb)
flex_group = ext4_flex_group(sbi, i); flex_group = ext4_flex_group(sbi, i);
atomic_add(ext4_free_inodes_count(sb, gdp), atomic_add(ext4_free_inodes_count(sb, gdp),
&sbi->s_flex_groups[flex_group].free_inodes); &sbi->s_flex_groups[flex_group].free_inodes);
atomic_add(ext4_free_group_clusters(sb, gdp), atomic64_add(ext4_free_group_clusters(sb, gdp),
&sbi->s_flex_groups[flex_group].free_clusters); &sbi->s_flex_groups[flex_group].free_clusters);
atomic_add(ext4_used_dirs_count(sb, gdp), atomic_add(ext4_used_dirs_count(sb, gdp),
&sbi->s_flex_groups[flex_group].used_dirs); &sbi->s_flex_groups[flex_group].used_dirs);
} }
......
...@@ -1065,9 +1065,12 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) ...@@ -1065,9 +1065,12 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
void jbd2_journal_set_triggers(struct buffer_head *bh, void jbd2_journal_set_triggers(struct buffer_head *bh,
struct jbd2_buffer_trigger_type *type) struct jbd2_buffer_trigger_type *type)
{ {
struct journal_head *jh = bh2jh(bh); struct journal_head *jh = jbd2_journal_grab_journal_head(bh);
if (WARN_ON(!jh))
return;
jh->b_triggers = type; jh->b_triggers = type;
jbd2_journal_put_journal_head(jh);
} }
void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data,
...@@ -1119,17 +1122,18 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) ...@@ -1119,17 +1122,18 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
{ {
transaction_t *transaction = handle->h_transaction; transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal; journal_t *journal = transaction->t_journal;
struct journal_head *jh = bh2jh(bh); struct journal_head *jh;
int ret = 0; int ret = 0;
jbd_debug(5, "journal_head %p\n", jh);
JBUFFER_TRACE(jh, "entry");
if (is_handle_aborted(handle)) if (is_handle_aborted(handle))
goto out; goto out;
if (!buffer_jbd(bh)) { jh = jbd2_journal_grab_journal_head(bh);
if (!jh) {
ret = -EUCLEAN; ret = -EUCLEAN;
goto out; goto out;
} }
jbd_debug(5, "journal_head %p\n", jh);
JBUFFER_TRACE(jh, "entry");
jbd_lock_bh_state(bh); jbd_lock_bh_state(bh);
...@@ -1220,6 +1224,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) ...@@ -1220,6 +1224,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
out_unlock_bh: out_unlock_bh:
jbd_unlock_bh_state(bh); jbd_unlock_bh_state(bh);
jbd2_journal_put_journal_head(jh);
out: out:
JBUFFER_TRACE(jh, "exit"); JBUFFER_TRACE(jh, "exit");
WARN_ON(ret); /* All errors are bugs, so dump the stack */ WARN_ON(ret); /* All errors are bugs, so dump the stack */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment