Commit 30a7eb97 authored by Tahsin Erdogan's avatar Tahsin Erdogan Committed by Theodore Ts'o

ext4: cleanup transaction restarts during inode deletion

During inode deletion, the number of journal credits that will be
needed is hard to determine.  For that reason we have journal
extend/restart calls in several places.  Whenever a transaction is
restarted, filesystem must be in a consistent state because there is
no atomicity guarantee beyond a restart call.

Add ext4_xattr_ensure_credits() helper function which takes care of
journal extend/restart logic.  It also handles getting jbd2 write
access and dirty metadata calls.  This function is called at every
iteration of handling an ea_inode reference.
Signed-off-by: default avatarTahsin Erdogan <tahsin@google.com>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent 02749a4c
...@@ -239,7 +239,11 @@ void ext4_evict_inode(struct inode *inode) ...@@ -239,7 +239,11 @@ void ext4_evict_inode(struct inode *inode)
*/ */
sb_start_intwrite(inode->i_sb); sb_start_intwrite(inode->i_sb);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, extra_credits); if (!IS_NOQUOTA(inode))
extra_credits += EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE,
ext4_blocks_for_truncate(inode)+extra_credits);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
ext4_std_error(inode->i_sb, PTR_ERR(handle)); ext4_std_error(inode->i_sb, PTR_ERR(handle));
/* /*
...@@ -251,36 +255,9 @@ void ext4_evict_inode(struct inode *inode) ...@@ -251,36 +255,9 @@ void ext4_evict_inode(struct inode *inode)
sb_end_intwrite(inode->i_sb); sb_end_intwrite(inode->i_sb);
goto no_delete; goto no_delete;
} }
if (IS_SYNC(inode)) if (IS_SYNC(inode))
ext4_handle_sync(handle); ext4_handle_sync(handle);
/*
* Delete xattr inode before deleting the main inode.
*/
err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array);
if (err) {
ext4_warning(inode->i_sb,
"couldn't delete inode's xattr (err %d)", err);
goto stop_handle;
}
if (!IS_NOQUOTA(inode))
extra_credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
if (!ext4_handle_has_enough_credits(handle,
ext4_blocks_for_truncate(inode) + extra_credits)) {
err = ext4_journal_extend(handle,
ext4_blocks_for_truncate(inode) + extra_credits);
if (err > 0)
err = ext4_journal_restart(handle,
ext4_blocks_for_truncate(inode) + extra_credits);
if (err != 0) {
ext4_warning(inode->i_sb,
"couldn't extend journal (err %d)", err);
goto stop_handle;
}
}
inode->i_size = 0; inode->i_size = 0;
err = ext4_mark_inode_dirty(handle, inode); err = ext4_mark_inode_dirty(handle, inode);
if (err) { if (err) {
...@@ -298,25 +275,17 @@ void ext4_evict_inode(struct inode *inode) ...@@ -298,25 +275,17 @@ void ext4_evict_inode(struct inode *inode)
} }
} }
/* /* Remove xattr references. */
* ext4_ext_truncate() doesn't reserve any slop when it err = ext4_xattr_delete_inode(handle, inode, &ea_inode_array,
* restarts journal transactions; therefore there may not be extra_credits);
* enough credits left in the handle to remove the inode from if (err) {
* the orphan list and set the dtime field. ext4_warning(inode->i_sb, "xattr delete (err %d)", err);
*/ stop_handle:
if (!ext4_handle_has_enough_credits(handle, extra_credits)) { ext4_journal_stop(handle);
err = ext4_journal_extend(handle, extra_credits); ext4_orphan_del(NULL, inode);
if (err > 0) sb_end_intwrite(inode->i_sb);
err = ext4_journal_restart(handle, extra_credits); ext4_xattr_inode_array_free(ea_inode_array);
if (err != 0) { goto no_delete;
ext4_warning(inode->i_sb,
"couldn't extend journal (err %d)", err);
stop_handle:
ext4_journal_stop(handle);
ext4_orphan_del(NULL, inode);
sb_end_intwrite(inode->i_sb);
goto no_delete;
}
} }
/* /*
...@@ -342,7 +311,6 @@ void ext4_evict_inode(struct inode *inode) ...@@ -342,7 +311,6 @@ void ext4_evict_inode(struct inode *inode)
ext4_clear_inode(inode); ext4_clear_inode(inode);
else else
ext4_free_inode(handle, inode); ext4_free_inode(handle, inode);
ext4_journal_stop(handle); ext4_journal_stop(handle);
sb_end_intwrite(inode->i_sb); sb_end_intwrite(inode->i_sb);
ext4_xattr_inode_array_free(ea_inode_array); ext4_xattr_inode_array_free(ea_inode_array);
......
...@@ -108,6 +108,10 @@ const struct xattr_handler *ext4_xattr_handlers[] = { ...@@ -108,6 +108,10 @@ const struct xattr_handler *ext4_xattr_handlers[] = {
#define EA_BLOCK_CACHE(inode) (((struct ext4_sb_info *) \ #define EA_BLOCK_CACHE(inode) (((struct ext4_sb_info *) \
inode->i_sb->s_fs_info)->s_ea_block_cache) inode->i_sb->s_fs_info)->s_ea_block_cache)
static int
ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
struct inode *inode);
#ifdef CONFIG_LOCKDEP #ifdef CONFIG_LOCKDEP
void ext4_xattr_inode_set_class(struct inode *ea_inode) void ext4_xattr_inode_set_class(struct inode *ea_inode)
{ {
...@@ -652,6 +656,128 @@ static void ext4_xattr_update_super_block(handle_t *handle, ...@@ -652,6 +656,128 @@ static void ext4_xattr_update_super_block(handle_t *handle,
} }
} }
static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode,
int credits, struct buffer_head *bh,
bool dirty, bool block_csum)
{
int error;
if (!ext4_handle_valid(handle))
return 0;
if (handle->h_buffer_credits >= credits)
return 0;
error = ext4_journal_extend(handle, credits - handle->h_buffer_credits);
if (!error)
return 0;
if (error < 0) {
ext4_warning(inode->i_sb, "Extend journal (error %d)", error);
return error;
}
if (bh && dirty) {
if (block_csum)
ext4_xattr_block_csum_set(inode, bh);
error = ext4_handle_dirty_metadata(handle, NULL, bh);
if (error) {
ext4_warning(inode->i_sb, "Handle metadata (error %d)",
error);
return error;
}
}
error = ext4_journal_restart(handle, credits);
if (error) {
ext4_warning(inode->i_sb, "Restart journal (error %d)", error);
return error;
}
if (bh) {
error = ext4_journal_get_write_access(handle, bh);
if (error) {
ext4_warning(inode->i_sb,
"Get write access failed (error %d)",
error);
return error;
}
}
return 0;
}
static void
ext4_xattr_inode_remove_all(handle_t *handle, struct inode *parent,
struct buffer_head *bh,
struct ext4_xattr_entry *first, bool block_csum,
struct ext4_xattr_inode_array **ea_inode_array,
int extra_credits)
{
struct inode *ea_inode;
struct ext4_xattr_entry *entry;
bool dirty = false;
unsigned int ea_ino;
int err;
int credits;
/* One credit for dec ref on ea_inode, one for orphan list addition, */
credits = 2 + extra_credits;
for (entry = first; !IS_LAST_ENTRY(entry);
entry = EXT4_XATTR_NEXT(entry)) {
if (!entry->e_value_inum)
continue;
ea_ino = le32_to_cpu(entry->e_value_inum);
err = ext4_xattr_inode_iget(parent, ea_ino, &ea_inode);
if (err)
continue;
err = ext4_expand_inode_array(ea_inode_array, ea_inode);
if (err) {
ext4_warning_inode(ea_inode,
"Expand inode array err=%d", err);
iput(ea_inode);
continue;
}
err = ext4_xattr_ensure_credits(handle, parent, credits, bh,
dirty, block_csum);
if (err) {
ext4_warning_inode(ea_inode, "Ensure credits err=%d",
err);
continue;
}
inode_lock(ea_inode);
clear_nlink(ea_inode);
ext4_orphan_add(handle, ea_inode);
inode_unlock(ea_inode);
/*
* Forget about ea_inode within the same transaction that
* decrements the ref count. This avoids duplicate decrements in
* case the rest of the work spills over to subsequent
* transactions.
*/
entry->e_value_inum = 0;
entry->e_value_size = 0;
dirty = true;
}
if (dirty) {
/*
* Note that we are deliberately skipping csum calculation for
* the final update because we do not expect any journal
* restarts until xattr block is freed.
*/
err = ext4_handle_dirty_metadata(handle, NULL, bh);
if (err)
ext4_warning_inode(parent,
"handle dirty metadata err=%d", err);
}
}
/* /*
* Release the xattr block BH: If the reference count is > 1, decrement it; * Release the xattr block BH: If the reference count is > 1, decrement it;
* otherwise free the block. * otherwise free the block.
...@@ -1982,42 +2108,6 @@ ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array, ...@@ -1982,42 +2108,6 @@ ext4_expand_inode_array(struct ext4_xattr_inode_array **ea_inode_array,
return 0; return 0;
} }
/**
* Add xattr inode to orphan list
*/
static int
ext4_xattr_inode_orphan_add(handle_t *handle, struct inode *inode, int credits,
struct ext4_xattr_inode_array *ea_inode_array)
{
int idx = 0, error = 0;
struct inode *ea_inode;
if (ea_inode_array == NULL)
return 0;
for (; idx < ea_inode_array->count; ++idx) {
if (!ext4_handle_has_enough_credits(handle, credits)) {
error = ext4_journal_extend(handle, credits);
if (error > 0)
error = ext4_journal_restart(handle, credits);
if (error != 0) {
ext4_warning(inode->i_sb,
"couldn't extend journal "
"(err %d)", error);
return error;
}
}
ea_inode = ea_inode_array->inodes[idx];
inode_lock(ea_inode);
ext4_orphan_add(handle, ea_inode);
inode_unlock(ea_inode);
/* the inode's i_count will be released by caller */
}
return 0;
}
/* /*
* ext4_xattr_delete_inode() * ext4_xattr_delete_inode()
* *
...@@ -2030,16 +2120,23 @@ ext4_xattr_inode_orphan_add(handle_t *handle, struct inode *inode, int credits, ...@@ -2030,16 +2120,23 @@ ext4_xattr_inode_orphan_add(handle_t *handle, struct inode *inode, int credits,
*/ */
int int
ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
struct ext4_xattr_inode_array **ea_inode_array) struct ext4_xattr_inode_array **ea_inode_array,
int extra_credits)
{ {
struct buffer_head *bh = NULL; struct buffer_head *bh = NULL;
struct ext4_xattr_ibody_header *header; struct ext4_xattr_ibody_header *header;
struct ext4_inode *raw_inode; struct ext4_inode *raw_inode;
struct ext4_iloc iloc; struct ext4_iloc iloc = { .bh = NULL };
struct ext4_xattr_entry *entry; int error;
struct inode *ea_inode;
unsigned int ea_ino; error = ext4_xattr_ensure_credits(handle, inode, extra_credits,
int credits = 3, error = 0; NULL /* bh */,
false /* dirty */,
false /* block_csum */);
if (error) {
EXT4_ERROR_INODE(inode, "ensure credits (error %d)", error);
goto cleanup;
}
if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
goto delete_external_ea; goto delete_external_ea;
...@@ -2047,31 +2144,20 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, ...@@ -2047,31 +2144,20 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
error = ext4_get_inode_loc(inode, &iloc); error = ext4_get_inode_loc(inode, &iloc);
if (error) if (error)
goto cleanup; goto cleanup;
error = ext4_journal_get_write_access(handle, iloc.bh);
if (error)
goto cleanup;
raw_inode = ext4_raw_inode(&iloc); raw_inode = ext4_raw_inode(&iloc);
header = IHDR(inode, raw_inode); header = IHDR(inode, raw_inode);
for (entry = IFIRST(header); !IS_LAST_ENTRY(entry); ext4_xattr_inode_remove_all(handle, inode, iloc.bh, IFIRST(header),
entry = EXT4_XATTR_NEXT(entry)) { false /* block_csum */, ea_inode_array,
if (!entry->e_value_inum) extra_credits);
continue;
ea_ino = le32_to_cpu(entry->e_value_inum);
error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode);
if (error)
continue;
error = ext4_expand_inode_array(ea_inode_array, ea_inode);
if (error) {
iput(ea_inode);
brelse(iloc.bh);
goto cleanup;
}
entry->e_value_inum = 0;
}
brelse(iloc.bh);
delete_external_ea: delete_external_ea:
if (!EXT4_I(inode)->i_file_acl) { if (!EXT4_I(inode)->i_file_acl) {
/* add xattr inode to orphan list */ error = 0;
error = ext4_xattr_inode_orphan_add(handle, inode, credits,
*ea_inode_array);
goto cleanup; goto cleanup;
} }
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl); bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
...@@ -2089,46 +2175,32 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, ...@@ -2089,46 +2175,32 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
goto cleanup; goto cleanup;
} }
for (entry = BFIRST(bh); !IS_LAST_ENTRY(entry); if (ext4_has_feature_ea_inode(inode->i_sb)) {
entry = EXT4_XATTR_NEXT(entry)) { error = ext4_journal_get_write_access(handle, bh);
if (!entry->e_value_inum)
continue;
ea_ino = le32_to_cpu(entry->e_value_inum);
error = ext4_xattr_inode_iget(inode, ea_ino, &ea_inode);
if (error)
continue;
error = ext4_expand_inode_array(ea_inode_array, ea_inode);
if (error)
goto cleanup;
entry->e_value_inum = 0;
}
/* add xattr inode to orphan list */
error = ext4_xattr_inode_orphan_add(handle, inode, credits,
*ea_inode_array);
if (error)
goto cleanup;
if (!IS_NOQUOTA(inode))
credits += 2 * EXT4_QUOTA_DEL_BLOCKS(inode->i_sb);
if (!ext4_handle_has_enough_credits(handle, credits)) {
error = ext4_journal_extend(handle, credits);
if (error > 0)
error = ext4_journal_restart(handle, credits);
if (error) { if (error) {
ext4_warning(inode->i_sb, EXT4_ERROR_INODE(inode, "write access %llu",
"couldn't extend journal (err %d)", error); EXT4_I(inode)->i_file_acl);
goto cleanup; goto cleanup;
} }
ext4_xattr_inode_remove_all(handle, inode, bh,
BFIRST(bh),
true /* block_csum */,
ea_inode_array,
extra_credits);
} }
ext4_xattr_release_block(handle, inode, bh); ext4_xattr_release_block(handle, inode, bh);
/* Update i_file_acl within the same transaction that releases block. */
EXT4_I(inode)->i_file_acl = 0; EXT4_I(inode)->i_file_acl = 0;
error = ext4_mark_inode_dirty(handle, inode);
if (error) {
EXT4_ERROR_INODE(inode, "mark inode dirty (error %d)",
error);
goto cleanup;
}
cleanup: cleanup:
brelse(iloc.bh);
brelse(bh); brelse(bh);
return error; return error;
} }
......
...@@ -169,7 +169,8 @@ extern int ext4_xattr_set_credits(struct inode *inode, size_t value_len); ...@@ -169,7 +169,8 @@ extern int ext4_xattr_set_credits(struct inode *inode, size_t value_len);
extern int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino); extern int ext4_xattr_inode_unlink(struct inode *inode, unsigned long ea_ino);
extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, extern int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
struct ext4_xattr_inode_array **array); struct ext4_xattr_inode_array **array,
int extra_credits);
extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array); extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment