Commit 8f616cd5 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: remove write-only variables from ext4_ordered_write_end
  ext4: unexport jbd2_journal_update_superblock
  ext4: Cleanup whitespace and other miscellaneous style issues
  ext4: improve ext4_fill_flex_info() a bit
  ext4: Cleanup the block reservation code path
  ext4: don't assume extents can't cross block groups when truncating
  ext4: Fix lack of credits BUG() when deleting a badly fragmented inode
  ext4: Fix ext4_ext_journal_restart()
  ext4: fix ext4_da_write_begin error path
  jbd2: don't abort if flushing file data failed
  ext4: don't read inode block if the buffer has a write error
  ext4: Don't allow lg prealloc list to be grow large.
  ext4: Convert the usage of NR_CPUS to nr_cpu_ids.
  ext4: Improve error handling in mballoc
  ext4: lock block groups when initializing
  ext4: sync up block and inode bitmap reading functions
  ext4: Allow read/only mounts with corrupted block group checksums
  ext4: Fix data corruption when writing to prealloc area
parents 7e31aa11 7d55992d
...@@ -40,14 +40,15 @@ ext4_acl_from_disk(const void *value, size_t size) ...@@ -40,14 +40,15 @@ ext4_acl_from_disk(const void *value, size_t size)
acl = posix_acl_alloc(count, GFP_NOFS); acl = posix_acl_alloc(count, GFP_NOFS);
if (!acl) if (!acl)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
for (n=0; n < count; n++) { for (n = 0; n < count; n++) {
ext4_acl_entry *entry = ext4_acl_entry *entry =
(ext4_acl_entry *)value; (ext4_acl_entry *)value;
if ((char *)value + sizeof(ext4_acl_entry_short) > end) if ((char *)value + sizeof(ext4_acl_entry_short) > end)
goto fail; goto fail;
acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
switch(acl->a_entries[n].e_tag) {
switch (acl->a_entries[n].e_tag) {
case ACL_USER_OBJ: case ACL_USER_OBJ:
case ACL_GROUP_OBJ: case ACL_GROUP_OBJ:
case ACL_MASK: case ACL_MASK:
...@@ -96,15 +97,14 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size) ...@@ -96,15 +97,14 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
e = (char *)ext_acl + sizeof(ext4_acl_header); e = (char *)ext_acl + sizeof(ext4_acl_header);
for (n=0; n < acl->a_count; n++) { for (n = 0; n < acl->a_count; n++) {
ext4_acl_entry *entry = (ext4_acl_entry *)e; ext4_acl_entry *entry = (ext4_acl_entry *)e;
entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
switch(acl->a_entries[n].e_tag) { switch (acl->a_entries[n].e_tag) {
case ACL_USER: case ACL_USER:
case ACL_GROUP: case ACL_GROUP:
entry->e_id = entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
cpu_to_le32(acl->a_entries[n].e_id);
e += sizeof(ext4_acl_entry); e += sizeof(ext4_acl_entry);
break; break;
...@@ -167,7 +167,7 @@ ext4_get_acl(struct inode *inode, int type) ...@@ -167,7 +167,7 @@ ext4_get_acl(struct inode *inode, int type)
if (!test_opt(inode->i_sb, POSIX_ACL)) if (!test_opt(inode->i_sb, POSIX_ACL))
return NULL; return NULL;
switch(type) { switch (type) {
case ACL_TYPE_ACCESS: case ACL_TYPE_ACCESS:
acl = ext4_iget_acl(inode, &ei->i_acl); acl = ext4_iget_acl(inode, &ei->i_acl);
if (acl != EXT4_ACL_NOT_CACHED) if (acl != EXT4_ACL_NOT_CACHED)
...@@ -201,7 +201,7 @@ ext4_get_acl(struct inode *inode, int type) ...@@ -201,7 +201,7 @@ ext4_get_acl(struct inode *inode, int type)
kfree(value); kfree(value);
if (!IS_ERR(acl)) { if (!IS_ERR(acl)) {
switch(type) { switch (type) {
case ACL_TYPE_ACCESS: case ACL_TYPE_ACCESS:
ext4_iset_acl(inode, &ei->i_acl, acl); ext4_iset_acl(inode, &ei->i_acl, acl);
break; break;
...@@ -232,7 +232,7 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, ...@@ -232,7 +232,7 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
if (S_ISLNK(inode->i_mode)) if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP; return -EOPNOTSUPP;
switch(type) { switch (type) {
case ACL_TYPE_ACCESS: case ACL_TYPE_ACCESS:
name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl) { if (acl) {
...@@ -269,7 +269,7 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, ...@@ -269,7 +269,7 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
kfree(value); kfree(value);
if (!error) { if (!error) {
switch(type) { switch (type) {
case ACL_TYPE_ACCESS: case ACL_TYPE_ACCESS:
ext4_iset_acl(inode, &ei->i_acl, acl); ext4_iset_acl(inode, &ei->i_acl, acl);
break; break;
......
...@@ -314,25 +314,28 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) ...@@ -314,25 +314,28 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
if (unlikely(!bh)) { if (unlikely(!bh)) {
ext4_error(sb, __func__, ext4_error(sb, __func__,
"Cannot read block bitmap - " "Cannot read block bitmap - "
"block_group = %d, block_bitmap = %llu", "block_group = %lu, block_bitmap = %llu",
(int)block_group, (unsigned long long)bitmap_blk); block_group, bitmap_blk);
return NULL; return NULL;
} }
if (bh_uptodate_or_lock(bh)) if (bh_uptodate_or_lock(bh))
return bh; return bh;
spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
ext4_init_block_bitmap(sb, bh, block_group, desc); ext4_init_block_bitmap(sb, bh, block_group, desc);
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
unlock_buffer(bh); unlock_buffer(bh);
spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
return bh; return bh;
} }
spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
if (bh_submit_read(bh) < 0) { if (bh_submit_read(bh) < 0) {
put_bh(bh); put_bh(bh);
ext4_error(sb, __func__, ext4_error(sb, __func__,
"Cannot read block bitmap - " "Cannot read block bitmap - "
"block_group = %d, block_bitmap = %llu", "block_group = %lu, block_bitmap = %llu",
(int)block_group, (unsigned long long)bitmap_blk); block_group, bitmap_blk);
return NULL; return NULL;
} }
ext4_valid_block_bitmap(sb, desc, block_group, bh); ext4_valid_block_bitmap(sb, desc, block_group, bh);
......
...@@ -1044,7 +1044,6 @@ extern void ext4_mb_update_group_info(struct ext4_group_info *grp, ...@@ -1044,7 +1044,6 @@ extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
/* inode.c */ /* inode.c */
void ext4_da_release_space(struct inode *inode, int used, int to_free);
int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t blocknr); struct buffer_head *bh, ext4_fsblk_t blocknr);
struct buffer_head *ext4_getblk(handle_t *, struct inode *, struct buffer_head *ext4_getblk(handle_t *, struct inode *,
......
...@@ -99,7 +99,7 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed) ...@@ -99,7 +99,7 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed)
if (handle->h_buffer_credits > needed) if (handle->h_buffer_credits > needed)
return 0; return 0;
err = ext4_journal_extend(handle, needed); err = ext4_journal_extend(handle, needed);
if (err) if (err <= 0)
return err; return err;
return ext4_journal_restart(handle, needed); return ext4_journal_restart(handle, needed);
} }
...@@ -1910,9 +1910,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, ...@@ -1910,9 +1910,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
BUG_ON(b != ex_ee_block + ex_ee_len - 1); BUG_ON(b != ex_ee_block + ex_ee_len - 1);
} }
/* at present, extent can't cross block group: */ /*
/* leaf + bitmap + group desc + sb + inode */ * 3 for leaf, sb, and inode plus 2 (bmap and group
credits = 5; * descriptor) for each block group; assume two block
* groups plus ex_ee_len/blocks_per_block_group for
* the worst case
*/
credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
if (ex == EXT_FIRST_EXTENT(eh)) { if (ex == EXT_FIRST_EXTENT(eh)) {
correct_index = 1; correct_index = 1;
credits += (ext_depth(inode)) + 1; credits += (ext_depth(inode)) + 1;
...@@ -2323,7 +2327,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2323,7 +2327,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
unsigned int newdepth; unsigned int newdepth;
/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
if (allocated <= EXT4_EXT_ZERO_LEN) { if (allocated <= EXT4_EXT_ZERO_LEN) {
/* Mark first half uninitialized. /*
* iblock == ee_block is handled by the zerouout
* at the beginning.
* Mark first half uninitialized.
* Mark second half initialized and zero out the * Mark second half initialized and zero out the
* initialized extent * initialized extent
*/ */
...@@ -2346,7 +2353,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2346,7 +2353,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ex->ee_len = orig_ex.ee_len; ex->ee_len = orig_ex.ee_len;
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth); ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */ /* blocks available from iblock */
return allocated; return allocated;
} else if (err) } else if (err)
...@@ -2374,6 +2381,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2374,6 +2381,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
err = PTR_ERR(path); err = PTR_ERR(path);
return err; return err;
} }
/* get the second half extent details */
ex = path[depth].p_ext; ex = path[depth].p_ext;
err = ext4_ext_get_access(handle, inode, err = ext4_ext_get_access(handle, inode,
path + depth); path + depth);
...@@ -2403,6 +2411,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2403,6 +2411,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth); ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */ /* zeroed the full extent */
/* blocks available from iblock */
return allocated; return allocated;
} else if (err) } else if (err)
...@@ -2418,7 +2427,6 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2418,7 +2427,6 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
*/ */
orig_ex.ee_len = cpu_to_le16(ee_len - orig_ex.ee_len = cpu_to_le16(ee_len -
ext4_ext_get_actual_len(ex3)); ext4_ext_get_actual_len(ex3));
if (newdepth != depth) {
depth = newdepth; depth = newdepth;
ext4_ext_drop_refs(path); ext4_ext_drop_refs(path);
path = ext4_ext_find_extent(inode, iblock, path); path = ext4_ext_find_extent(inode, iblock, path);
...@@ -2434,7 +2442,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2434,7 +2442,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
err = ext4_ext_get_access(handle, inode, path + depth); err = ext4_ext_get_access(handle, inode, path + depth);
if (err) if (err)
goto out; goto out;
}
allocated = max_blocks; allocated = max_blocks;
/* If extent has less than EXT4_EXT_ZERO_LEN and we are trying /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
...@@ -2452,6 +2460,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ...@@ -2452,6 +2460,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth); ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */ /* zero out the first half */
/* blocks available from iblock */
return allocated; return allocated;
} }
} }
......
...@@ -97,34 +97,44 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, ...@@ -97,34 +97,44 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
* Return buffer_head of bitmap on success or NULL. * Return buffer_head of bitmap on success or NULL.
*/ */
static struct buffer_head * static struct buffer_head *
read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
{ {
struct ext4_group_desc *desc; struct ext4_group_desc *desc;
struct buffer_head *bh = NULL; struct buffer_head *bh = NULL;
ext4_fsblk_t bitmap_blk;
desc = ext4_get_group_desc(sb, block_group, NULL); desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc) if (!desc)
goto error_out; return NULL;
bitmap_blk = ext4_inode_bitmap(sb, desc);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
ext4_error(sb, __func__,
"Cannot read inode bitmap - "
"block_group = %lu, inode_bitmap = %llu",
block_group, bitmap_blk);
return NULL;
}
if (bh_uptodate_or_lock(bh))
return bh;
spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc)); ext4_init_inode_bitmap(sb, bh, block_group, desc);
if (!buffer_uptodate(bh)) {
lock_buffer(bh);
if (!buffer_uptodate(bh)) {
ext4_init_inode_bitmap(sb, bh, block_group,
desc);
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
}
unlock_buffer(bh); unlock_buffer(bh);
spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
return bh;
} }
} else { spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
bh = sb_bread(sb, ext4_inode_bitmap(sb, desc)); if (bh_submit_read(bh) < 0) {
} put_bh(bh);
if (!bh) ext4_error(sb, __func__,
ext4_error(sb, "read_inode_bitmap",
"Cannot read inode bitmap - " "Cannot read inode bitmap - "
"block_group = %lu, inode_bitmap = %llu", "block_group = %lu, inode_bitmap = %llu",
block_group, ext4_inode_bitmap(sb, desc)); block_group, bitmap_blk);
error_out: return NULL;
}
return bh; return bh;
} }
...@@ -200,7 +210,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) ...@@ -200,7 +210,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
} }
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
bitmap_bh = read_inode_bitmap(sb, block_group); bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
if (!bitmap_bh) if (!bitmap_bh)
goto error_return; goto error_return;
...@@ -623,7 +633,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) ...@@ -623,7 +633,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
goto fail; goto fail;
brelse(bitmap_bh); brelse(bitmap_bh);
bitmap_bh = read_inode_bitmap(sb, group); bitmap_bh = ext4_read_inode_bitmap(sb, group);
if (!bitmap_bh) if (!bitmap_bh)
goto fail; goto fail;
...@@ -728,7 +738,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) ...@@ -728,7 +738,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
/* When marking the block group with /* When marking the block group with
* ~EXT4_BG_INODE_UNINIT we don't want to depend * ~EXT4_BG_INODE_UNINIT we don't want to depend
* on the value of bg_itable_unsed even though * on the value of bg_itable_unused even though
* mke2fs could have initialized the same for us. * mke2fs could have initialized the same for us.
* Instead we calculated the value below * Instead we calculated the value below
*/ */
...@@ -891,7 +901,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) ...@@ -891,7 +901,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
bitmap_bh = read_inode_bitmap(sb, block_group); bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
if (!bitmap_bh) { if (!bitmap_bh) {
ext4_warning(sb, __func__, ext4_warning(sb, __func__,
"inode bitmap error for orphan %lu", ino); "inode bitmap error for orphan %lu", ino);
...@@ -969,7 +979,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) ...@@ -969,7 +979,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
continue; continue;
desc_count += le16_to_cpu(gdp->bg_free_inodes_count); desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
brelse(bitmap_bh); brelse(bitmap_bh);
bitmap_bh = read_inode_bitmap(sb, i); bitmap_bh = ext4_read_inode_bitmap(sb, i);
if (!bitmap_bh) if (!bitmap_bh)
continue; continue;
......
...@@ -191,6 +191,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) ...@@ -191,6 +191,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
void ext4_delete_inode (struct inode * inode) void ext4_delete_inode (struct inode * inode)
{ {
handle_t *handle; handle_t *handle;
int err;
if (ext4_should_order_data(inode)) if (ext4_should_order_data(inode))
ext4_begin_ordered_truncate(inode, 0); ext4_begin_ordered_truncate(inode, 0);
...@@ -199,8 +200,9 @@ void ext4_delete_inode (struct inode * inode) ...@@ -199,8 +200,9 @@ void ext4_delete_inode (struct inode * inode)
if (is_bad_inode(inode)) if (is_bad_inode(inode))
goto no_delete; goto no_delete;
handle = start_transaction(inode); handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3);
if (IS_ERR(handle)) { if (IS_ERR(handle)) {
ext4_std_error(inode->i_sb, PTR_ERR(handle));
/* /*
* If we're going to skip the normal cleanup, we still need to * If we're going to skip the normal cleanup, we still need to
* make sure that the in-core orphan linked list is properly * make sure that the in-core orphan linked list is properly
...@@ -213,8 +215,34 @@ void ext4_delete_inode (struct inode * inode) ...@@ -213,8 +215,34 @@ void ext4_delete_inode (struct inode * inode)
if (IS_SYNC(inode)) if (IS_SYNC(inode))
handle->h_sync = 1; handle->h_sync = 1;
inode->i_size = 0; inode->i_size = 0;
err = ext4_mark_inode_dirty(handle, inode);
if (err) {
ext4_warning(inode->i_sb, __func__,
"couldn't mark inode dirty (err %d)", err);
goto stop_handle;
}
if (inode->i_blocks) if (inode->i_blocks)
ext4_truncate(inode); ext4_truncate(inode);
/*
* ext4_ext_truncate() doesn't reserve any slop when it
* restarts journal transactions; therefore there may not be
* enough credits left in the handle to remove the inode from
* the orphan list and set the dtime field.
*/
if (handle->h_buffer_credits < 3) {
err = ext4_journal_extend(handle, 3);
if (err > 0)
err = ext4_journal_restart(handle, 3);
if (err != 0) {
ext4_warning(inode->i_sb, __func__,
"couldn't extend journal (err %d)", err);
stop_handle:
ext4_journal_stop(handle);
goto no_delete;
}
}
/* /*
* Kill off the orphan record which ext4_truncate created. * Kill off the orphan record which ext4_truncate created.
* AKPM: I think this can be inside the above `if'. * AKPM: I think this can be inside the above `if'.
...@@ -952,6 +980,67 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, ...@@ -952,6 +980,67 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
return err; return err;
} }
/*
* Calculate the number of metadata blocks need to reserve
* to allocate @blocks for non extent file based file
*/
static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
{
int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
int ind_blks, dind_blks, tind_blks;
/* number of new indirect blocks needed */
ind_blks = (blocks + icap - 1) / icap;
dind_blks = (ind_blks + icap - 1) / icap;
tind_blks = 1;
return ind_blks + dind_blks + tind_blks;
}
/*
* Calculate the number of metadata blocks need to reserve
* to allocate given number of blocks
*/
static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
{
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
return ext4_ext_calc_metadata_amount(inode, blocks);
return ext4_indirect_calc_metadata_amount(inode, blocks);
}
static void ext4_da_update_reserve_space(struct inode *inode, int used)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int total, mdb, mdb_free;
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
/* recalculate the number of metablocks still need to be reserved */
total = EXT4_I(inode)->i_reserved_data_blocks - used;
mdb = ext4_calc_metadata_amount(inode, total);
/* figure out how many metablocks to release */
BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
/* Account for allocated meta_blocks */
mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
/* update fs free blocks counter for truncate case */
percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
/* update per-inode reservations */
BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
EXT4_I(inode)->i_reserved_data_blocks -= used;
BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
EXT4_I(inode)->i_reserved_meta_blocks = mdb;
EXT4_I(inode)->i_allocated_meta_blocks = 0;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
}
/* Maximum number of blocks we map for direct IO at once. */ /* Maximum number of blocks we map for direct IO at once. */
#define DIO_MAX_BLOCKS 4096 #define DIO_MAX_BLOCKS 4096
/* /*
...@@ -965,10 +1054,9 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, ...@@ -965,10 +1054,9 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
/* /*
* The ext4_get_blocks_wrap() function try to look up the requested blocks,
* and returns if the blocks are already mapped.
* *
*
* ext4_ext4 get_block() wrapper function
* It will do a look up first, and returns if the blocks already mapped.
* Otherwise it takes the write lock of the i_data_sem and allocate blocks * Otherwise it takes the write lock of the i_data_sem and allocate blocks
* and store the allocated blocks in the result buffer head and mark it * and store the allocated blocks in the result buffer head and mark it
* mapped. * mapped.
...@@ -1069,7 +1157,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, ...@@ -1069,7 +1157,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
* which were deferred till now * which were deferred till now
*/ */
if ((retval > 0) && buffer_delay(bh)) if ((retval > 0) && buffer_delay(bh))
ext4_da_release_space(inode, retval, 0); ext4_da_update_reserve_space(inode, retval);
} }
up_write((&EXT4_I(inode)->i_data_sem)); up_write((&EXT4_I(inode)->i_data_sem));
...@@ -1336,12 +1424,8 @@ static int ext4_ordered_write_end(struct file *file, ...@@ -1336,12 +1424,8 @@ static int ext4_ordered_write_end(struct file *file,
{ {
handle_t *handle = ext4_journal_current_handle(); handle_t *handle = ext4_journal_current_handle();
struct inode *inode = mapping->host; struct inode *inode = mapping->host;
unsigned from, to;
int ret = 0, ret2; int ret = 0, ret2;
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
ret = ext4_jbd2_file_inode(handle, inode); ret = ext4_jbd2_file_inode(handle, inode);
if (ret == 0) { if (ret == 0) {
...@@ -1437,36 +1521,6 @@ static int ext4_journalled_write_end(struct file *file, ...@@ -1437,36 +1521,6 @@ static int ext4_journalled_write_end(struct file *file,
return ret ? ret : copied; return ret ? ret : copied;
} }
/*
* Calculate the number of metadata blocks need to reserve
* to allocate @blocks for non extent file based file
*/
static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
{
int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
int ind_blks, dind_blks, tind_blks;
/* number of new indirect blocks needed */
ind_blks = (blocks + icap - 1) / icap;
dind_blks = (ind_blks + icap - 1) / icap;
tind_blks = 1;
return ind_blks + dind_blks + tind_blks;
}
/*
* Calculate the number of metadata blocks need to reserve
* to allocate given number of blocks
*/
static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
{
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
return ext4_ext_calc_metadata_amount(inode, blocks);
return ext4_indirect_calc_metadata_amount(inode, blocks);
}
static int ext4_da_reserve_space(struct inode *inode, int nrblocks) static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
{ {
...@@ -1490,7 +1544,6 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) ...@@ -1490,7 +1544,6 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
return -ENOSPC; return -ENOSPC;
} }
/* reduce fs free blocks counter */ /* reduce fs free blocks counter */
percpu_counter_sub(&sbi->s_freeblocks_counter, total); percpu_counter_sub(&sbi->s_freeblocks_counter, total);
...@@ -1501,35 +1554,31 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) ...@@ -1501,35 +1554,31 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
return 0; /* success */ return 0; /* success */
} }
void ext4_da_release_space(struct inode *inode, int used, int to_free) static void ext4_da_release_space(struct inode *inode, int to_free)
{ {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
int total, mdb, mdb_free, release; int total, mdb, mdb_free, release;
spin_lock(&EXT4_I(inode)->i_block_reservation_lock); spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
/* recalculate the number of metablocks still need to be reserved */ /* recalculate the number of metablocks still need to be reserved */
total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free; total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
mdb = ext4_calc_metadata_amount(inode, total); mdb = ext4_calc_metadata_amount(inode, total);
/* figure out how many metablocks to release */ /* figure out how many metablocks to release */
BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
/* Account for allocated meta_blocks */
mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
release = to_free + mdb_free; release = to_free + mdb_free;
/* update fs free blocks counter for truncate case */ /* update fs free blocks counter for truncate case */
percpu_counter_add(&sbi->s_freeblocks_counter, release); percpu_counter_add(&sbi->s_freeblocks_counter, release);
/* update per-inode reservations */ /* update per-inode reservations */
BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks); BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free); EXT4_I(inode)->i_reserved_data_blocks -= to_free;
BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
EXT4_I(inode)->i_reserved_meta_blocks = mdb; EXT4_I(inode)->i_reserved_meta_blocks = mdb;
EXT4_I(inode)->i_allocated_meta_blocks = 0;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
} }
...@@ -1551,7 +1600,7 @@ static void ext4_da_page_release_reservation(struct page *page, ...@@ -1551,7 +1600,7 @@ static void ext4_da_page_release_reservation(struct page *page,
} }
curr_off = next_off; curr_off = next_off;
} while ((bh = bh->b_this_page) != head); } while ((bh = bh->b_this_page) != head);
ext4_da_release_space(page->mapping->host, 0, to_release); ext4_da_release_space(page->mapping->host, to_release);
} }
/* /*
...@@ -2280,8 +2329,11 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, ...@@ -2280,8 +2329,11 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
} }
page = __grab_cache_page(mapping, index); page = __grab_cache_page(mapping, index);
if (!page) if (!page) {
return -ENOMEM; ext4_journal_stop(handle);
ret = -ENOMEM;
goto out;
}
*pagep = page; *pagep = page;
ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
...@@ -3590,6 +3642,16 @@ static int __ext4_get_inode_loc(struct inode *inode, ...@@ -3590,6 +3642,16 @@ static int __ext4_get_inode_loc(struct inode *inode,
} }
if (!buffer_uptodate(bh)) { if (!buffer_uptodate(bh)) {
lock_buffer(bh); lock_buffer(bh);
/*
* If the buffer has the write error flag, we have failed
* to write out another inode in the same block. In this
* case, we don't have to read the block because we may
* read the old inode data successfully.
*/
if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
set_buffer_uptodate(bh);
if (buffer_uptodate(bh)) { if (buffer_uptodate(bh)) {
/* someone brought it uptodate while we waited */ /* someone brought it uptodate while we waited */
unlock_buffer(bh); unlock_buffer(bh);
......
This diff is collapsed.
...@@ -164,11 +164,17 @@ struct ext4_free_extent { ...@@ -164,11 +164,17 @@ struct ext4_free_extent {
* Locality group: * Locality group:
* we try to group all related changes together * we try to group all related changes together
* so that writeback can flush/allocate them together as well * so that writeback can flush/allocate them together as well
* Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
* (512). We store prealloc space into the hash based on the pa_free blocks
* order value.ie, fls(pa_free)-1;
*/ */
#define PREALLOC_TB_SIZE 10
struct ext4_locality_group { struct ext4_locality_group {
/* for allocator */ /* for allocator */
struct mutex lg_mutex; /* to serialize allocates */ /* to serialize allocates */
struct list_head lg_prealloc_list;/* list of preallocations */ struct mutex lg_mutex;
/* list of preallocations */
struct list_head lg_prealloc_list[PREALLOC_TB_SIZE];
spinlock_t lg_prealloc_lock; spinlock_t lg_prealloc_lock;
}; };
......
...@@ -507,14 +507,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, ...@@ -507,14 +507,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
return 0; return 0;
exit_inode: exit_inode:
//ext4_journal_release_buffer(handle, iloc.bh); /* ext4_journal_release_buffer(handle, iloc.bh); */
brelse(iloc.bh); brelse(iloc.bh);
exit_dindj: exit_dindj:
//ext4_journal_release_buffer(handle, dind); /* ext4_journal_release_buffer(handle, dind); */
exit_primary: exit_primary:
//ext4_journal_release_buffer(handle, *primary); /* ext4_journal_release_buffer(handle, *primary); */
exit_sbh: exit_sbh:
//ext4_journal_release_buffer(handle, *primary); /* ext4_journal_release_buffer(handle, *primary); */
exit_dind: exit_dind:
brelse(dind); brelse(dind);
exit_bh: exit_bh:
...@@ -946,7 +946,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) ...@@ -946,7 +946,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
return err; return err;
} /* ext4_group_add */ } /* ext4_group_add */
/* Extend the filesystem to the new number of blocks specified. This entry /*
* Extend the filesystem to the new number of blocks specified. This entry
* point is only used to extend the current filesystem to the end of the last * point is only used to extend the current filesystem to the end of the last
* existing group. It can be accessed via ioctl, or by "remount,resize=<size>" * existing group. It can be accessed via ioctl, or by "remount,resize=<size>"
* for emergencies (because it has no dependencies on reserved blocks). * for emergencies (because it has no dependencies on reserved blocks).
...@@ -1024,7 +1025,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, ...@@ -1024,7 +1025,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
o_blocks_count + add, add); o_blocks_count + add, add);
/* See if the device is actually as big as what was requested */ /* See if the device is actually as big as what was requested */
bh = sb_bread(sb, o_blocks_count + add -1); bh = sb_bread(sb, o_blocks_count + add - 1);
if (!bh) { if (!bh) {
ext4_warning(sb, __func__, ext4_warning(sb, __func__,
"can't read last block, resize aborted"); "can't read last block, resize aborted");
......
This diff is collapsed.
...@@ -1512,7 +1512,7 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header, ...@@ -1512,7 +1512,7 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
char *name = entry->e_name; char *name = entry->e_name;
int n; int n;
for (n=0; n < entry->e_name_len; n++) { for (n = 0; n < entry->e_name_len; n++) {
hash = (hash << NAME_HASH_SHIFT) ^ hash = (hash << NAME_HASH_SHIFT) ^
(hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
*name++; *name++;
......
...@@ -262,8 +262,18 @@ static int journal_finish_inode_data_buffers(journal_t *journal, ...@@ -262,8 +262,18 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
jinode->i_flags |= JI_COMMIT_RUNNING; jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
if (err) {
/*
* Because AS_EIO is cleared by
* wait_on_page_writeback_range(), set it again so
* that user process can get -EIO from fsync().
*/
set_bit(AS_EIO,
&jinode->i_vfs_inode->i_mapping->flags);
if (!ret) if (!ret)
ret = err; ret = err;
}
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
jinode->i_flags &= ~JI_COMMIT_RUNNING; jinode->i_flags &= ~JI_COMMIT_RUNNING;
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
...@@ -670,8 +680,14 @@ void jbd2_journal_commit_transaction(journal_t *journal) ...@@ -670,8 +680,14 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* commit block, which happens below in such setting. * commit block, which happens below in such setting.
*/ */
err = journal_finish_inode_data_buffers(journal, commit_transaction); err = journal_finish_inode_data_buffers(journal, commit_transaction);
if (err) if (err) {
jbd2_journal_abort(journal, err); char b[BDEVNAME_SIZE];
printk(KERN_WARNING
"JBD2: Detected IO errors while flushing file data "
"on %s\n", bdevname(journal->j_fs_dev, b));
err = 0;
}
/* Lo and behold: we have just managed to send a transaction to /* Lo and behold: we have just managed to send a transaction to
the log. Before we can commit it, wait for the IO so far to the log. Before we can commit it, wait for the IO so far to
......
...@@ -68,7 +68,6 @@ EXPORT_SYMBOL(jbd2_journal_set_features); ...@@ -68,7 +68,6 @@ EXPORT_SYMBOL(jbd2_journal_set_features);
EXPORT_SYMBOL(jbd2_journal_create); EXPORT_SYMBOL(jbd2_journal_create);
EXPORT_SYMBOL(jbd2_journal_load); EXPORT_SYMBOL(jbd2_journal_load);
EXPORT_SYMBOL(jbd2_journal_destroy); EXPORT_SYMBOL(jbd2_journal_destroy);
EXPORT_SYMBOL(jbd2_journal_update_superblock);
EXPORT_SYMBOL(jbd2_journal_abort); EXPORT_SYMBOL(jbd2_journal_abort);
EXPORT_SYMBOL(jbd2_journal_errno); EXPORT_SYMBOL(jbd2_journal_errno);
EXPORT_SYMBOL(jbd2_journal_ack_err); EXPORT_SYMBOL(jbd2_journal_ack_err);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment