Commit b07ce43d authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o:
 "Improve performance for ext4 by allowing multiple process to perform
  direct I/O writes to preallocated blocks by using a shared inode lock
  instead of taking an exclusive lock.

  In addition, multiple bug fixes and cleanups"

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: fix incorrect options show of original mount_opt and extend mount_opt2
  ext4: Fix possible corruption when moving a directory
  ext4: init error handle resource before init group descriptors
  ext4: fix task hung in ext4_xattr_delete_inode
  jbd2: fix data missing when reusing bh which is ready to be checkpointed
  ext4: update s_journal_inum if it changes after journal replay
  ext4: fail ext4_iget if special inode unallocated
  ext4: fix function prototype mismatch for ext4_feat_ktype
  ext4: remove unnecessary variable initialization
  ext4: fix inode tree inconsistency caused by ENOMEM
  ext4: refuse to create ea block when umounted
  ext4: optimize ea_inode block expansion
  ext4: remove dead code in updating backup sb
  ext4: dio take shared inode lock when overwriting preallocated blocks
  ext4: don't show commit interval if it is zero
  ext4: use ext4_fc_tl_mem in fast-commit replay path
  ext4: improve xattr consistency checking and error reporting
parents ae3419fb e3645d72
...@@ -1529,6 +1529,7 @@ struct ext4_sb_info { ...@@ -1529,6 +1529,7 @@ struct ext4_sb_info {
unsigned int s_mount_opt2; unsigned int s_mount_opt2;
unsigned long s_mount_flags; unsigned long s_mount_flags;
unsigned int s_def_mount_opt; unsigned int s_def_mount_opt;
unsigned int s_def_mount_opt2;
ext4_fsblk_t s_sb_block; ext4_fsblk_t s_sb_block;
atomic64_t s_resv_clusters; atomic64_t s_resv_clusters;
kuid_t s_resuid; kuid_t s_resuid;
......
...@@ -3251,7 +3251,7 @@ static int ext4_split_extent_at(handle_t *handle, ...@@ -3251,7 +3251,7 @@ static int ext4_split_extent_at(handle_t *handle,
ext4_ext_mark_unwritten(ex2); ext4_ext_mark_unwritten(ex2);
err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags); err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
if (err != -ENOSPC && err != -EDQUOT) if (err != -ENOSPC && err != -EDQUOT && err != -ENOMEM)
goto out; goto out;
if (EXT4_EXT_MAY_ZEROOUT & split_flag) { if (EXT4_EXT_MAY_ZEROOUT & split_flag) {
......
...@@ -1332,8 +1332,14 @@ struct dentry_info_args { ...@@ -1332,8 +1332,14 @@ struct dentry_info_args {
char *dname; char *dname;
}; };
/* Same as struct ext4_fc_tl, but uses native endianness fields */
struct ext4_fc_tl_mem {
u16 fc_tag;
u16 fc_len;
};
static inline void tl_to_darg(struct dentry_info_args *darg, static inline void tl_to_darg(struct dentry_info_args *darg,
struct ext4_fc_tl *tl, u8 *val) struct ext4_fc_tl_mem *tl, u8 *val)
{ {
struct ext4_fc_dentry_info fcd; struct ext4_fc_dentry_info fcd;
...@@ -1345,16 +1351,18 @@ static inline void tl_to_darg(struct dentry_info_args *darg, ...@@ -1345,16 +1351,18 @@ static inline void tl_to_darg(struct dentry_info_args *darg,
darg->dname_len = tl->fc_len - sizeof(struct ext4_fc_dentry_info); darg->dname_len = tl->fc_len - sizeof(struct ext4_fc_dentry_info);
} }
static inline void ext4_fc_get_tl(struct ext4_fc_tl *tl, u8 *val) static inline void ext4_fc_get_tl(struct ext4_fc_tl_mem *tl, u8 *val)
{ {
memcpy(tl, val, EXT4_FC_TAG_BASE_LEN); struct ext4_fc_tl tl_disk;
tl->fc_len = le16_to_cpu(tl->fc_len);
tl->fc_tag = le16_to_cpu(tl->fc_tag); memcpy(&tl_disk, val, EXT4_FC_TAG_BASE_LEN);
tl->fc_len = le16_to_cpu(tl_disk.fc_len);
tl->fc_tag = le16_to_cpu(tl_disk.fc_tag);
} }
/* Unlink replay function */ /* Unlink replay function */
static int ext4_fc_replay_unlink(struct super_block *sb, struct ext4_fc_tl *tl, static int ext4_fc_replay_unlink(struct super_block *sb,
u8 *val) struct ext4_fc_tl_mem *tl, u8 *val)
{ {
struct inode *inode, *old_parent; struct inode *inode, *old_parent;
struct qstr entry; struct qstr entry;
...@@ -1451,8 +1459,8 @@ static int ext4_fc_replay_link_internal(struct super_block *sb, ...@@ -1451,8 +1459,8 @@ static int ext4_fc_replay_link_internal(struct super_block *sb,
} }
/* Link replay function */ /* Link replay function */
static int ext4_fc_replay_link(struct super_block *sb, struct ext4_fc_tl *tl, static int ext4_fc_replay_link(struct super_block *sb,
u8 *val) struct ext4_fc_tl_mem *tl, u8 *val)
{ {
struct inode *inode; struct inode *inode;
struct dentry_info_args darg; struct dentry_info_args darg;
...@@ -1506,8 +1514,8 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino) ...@@ -1506,8 +1514,8 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
/* /*
* Inode replay function * Inode replay function
*/ */
static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, static int ext4_fc_replay_inode(struct super_block *sb,
u8 *val) struct ext4_fc_tl_mem *tl, u8 *val)
{ {
struct ext4_fc_inode fc_inode; struct ext4_fc_inode fc_inode;
struct ext4_inode *raw_inode; struct ext4_inode *raw_inode;
...@@ -1609,8 +1617,8 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, ...@@ -1609,8 +1617,8 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl,
* inode for which we are trying to create a dentry here, should already have * inode for which we are trying to create a dentry here, should already have
* been replayed before we start here. * been replayed before we start here.
*/ */
static int ext4_fc_replay_create(struct super_block *sb, struct ext4_fc_tl *tl, static int ext4_fc_replay_create(struct super_block *sb,
u8 *val) struct ext4_fc_tl_mem *tl, u8 *val)
{ {
int ret = 0; int ret = 0;
struct inode *inode = NULL; struct inode *inode = NULL;
...@@ -1708,7 +1716,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino, ...@@ -1708,7 +1716,7 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
/* Replay add range tag */ /* Replay add range tag */
static int ext4_fc_replay_add_range(struct super_block *sb, static int ext4_fc_replay_add_range(struct super_block *sb,
struct ext4_fc_tl *tl, u8 *val) struct ext4_fc_tl_mem *tl, u8 *val)
{ {
struct ext4_fc_add_range fc_add_ex; struct ext4_fc_add_range fc_add_ex;
struct ext4_extent newex, *ex; struct ext4_extent newex, *ex;
...@@ -1828,8 +1836,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, ...@@ -1828,8 +1836,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb,
/* Replay DEL_RANGE tag */ /* Replay DEL_RANGE tag */
static int static int
ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, ext4_fc_replay_del_range(struct super_block *sb,
u8 *val) struct ext4_fc_tl_mem *tl, u8 *val)
{ {
struct inode *inode; struct inode *inode;
struct ext4_fc_del_range lrange; struct ext4_fc_del_range lrange;
...@@ -2025,7 +2033,7 @@ static int ext4_fc_replay_scan(journal_t *journal, ...@@ -2025,7 +2033,7 @@ static int ext4_fc_replay_scan(journal_t *journal,
struct ext4_fc_replay_state *state; struct ext4_fc_replay_state *state;
int ret = JBD2_FC_REPLAY_CONTINUE; int ret = JBD2_FC_REPLAY_CONTINUE;
struct ext4_fc_add_range ext; struct ext4_fc_add_range ext;
struct ext4_fc_tl tl; struct ext4_fc_tl_mem tl;
struct ext4_fc_tail tail; struct ext4_fc_tail tail;
__u8 *start, *end, *cur, *val; __u8 *start, *end, *cur, *val;
struct ext4_fc_head head; struct ext4_fc_head head;
...@@ -2144,7 +2152,7 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh, ...@@ -2144,7 +2152,7 @@ static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
{ {
struct super_block *sb = journal->j_private; struct super_block *sb = journal->j_private;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_fc_tl tl; struct ext4_fc_tl_mem tl;
__u8 *start, *end, *cur, *val; __u8 *start, *end, *cur, *val;
int ret = JBD2_FC_REPLAY_CONTINUE; int ret = JBD2_FC_REPLAY_CONTINUE;
struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state; struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
......
...@@ -202,8 +202,9 @@ ext4_extending_io(struct inode *inode, loff_t offset, size_t len) ...@@ -202,8 +202,9 @@ ext4_extending_io(struct inode *inode, loff_t offset, size_t len)
return false; return false;
} }
/* Is IO overwriting allocated and initialized blocks? */ /* Is IO overwriting allocated or initialized blocks? */
static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len) static bool ext4_overwrite_io(struct inode *inode,
loff_t pos, loff_t len, bool *unwritten)
{ {
struct ext4_map_blocks map; struct ext4_map_blocks map;
unsigned int blkbits = inode->i_blkbits; unsigned int blkbits = inode->i_blkbits;
...@@ -217,12 +218,15 @@ static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len) ...@@ -217,12 +218,15 @@ static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
blklen = map.m_len; blklen = map.m_len;
err = ext4_map_blocks(NULL, inode, &map, 0); err = ext4_map_blocks(NULL, inode, &map, 0);
if (err != blklen)
return false;
/* /*
* 'err==len' means that all of the blocks have been preallocated, * 'err==len' means that all of the blocks have been preallocated,
* regardless of whether they have been initialized or not. To exclude * regardless of whether they have been initialized or not. We need to
* unwritten extents, we need to check m_flags. * check m_flags to distinguish the unwritten extents.
*/ */
return err == blklen && (map.m_flags & EXT4_MAP_MAPPED); *unwritten = !(map.m_flags & EXT4_MAP_MAPPED);
return true;
} }
static ssize_t ext4_generic_write_checks(struct kiocb *iocb, static ssize_t ext4_generic_write_checks(struct kiocb *iocb,
...@@ -431,11 +435,16 @@ static const struct iomap_dio_ops ext4_dio_write_ops = { ...@@ -431,11 +435,16 @@ static const struct iomap_dio_ops ext4_dio_write_ops = {
* - For extending writes case we don't take the shared lock, since it requires * - For extending writes case we don't take the shared lock, since it requires
* updating inode i_disksize and/or orphan handling with exclusive lock. * updating inode i_disksize and/or orphan handling with exclusive lock.
* *
* - shared locking will only be true mostly with overwrites. Otherwise we will * - shared locking will only be true mostly with overwrites, including
* switch to exclusive i_rwsem lock. * initialized blocks and unwritten blocks. For overwrite unwritten blocks
* we protect splitting extents by i_data_sem in ext4_inode_info, so we can
* also release exclusive i_rwsem lock.
*
* - Otherwise we will switch to exclusive i_rwsem lock.
*/ */
static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from, static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
bool *ilock_shared, bool *extend) bool *ilock_shared, bool *extend,
bool *unwritten)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file); struct inode *inode = file_inode(file);
...@@ -459,7 +468,7 @@ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from, ...@@ -459,7 +468,7 @@ static ssize_t ext4_dio_write_checks(struct kiocb *iocb, struct iov_iter *from,
* in file_modified(). * in file_modified().
*/ */
if (*ilock_shared && (!IS_NOSEC(inode) || *extend || if (*ilock_shared && (!IS_NOSEC(inode) || *extend ||
!ext4_overwrite_io(inode, offset, count))) { !ext4_overwrite_io(inode, offset, count, unwritten))) {
if (iocb->ki_flags & IOCB_NOWAIT) { if (iocb->ki_flags & IOCB_NOWAIT) {
ret = -EAGAIN; ret = -EAGAIN;
goto out; goto out;
...@@ -491,7 +500,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -491,7 +500,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
loff_t offset = iocb->ki_pos; loff_t offset = iocb->ki_pos;
size_t count = iov_iter_count(from); size_t count = iov_iter_count(from);
const struct iomap_ops *iomap_ops = &ext4_iomap_ops; const struct iomap_ops *iomap_ops = &ext4_iomap_ops;
bool extend = false, unaligned_io = false; bool extend = false, unaligned_io = false, unwritten = false;
bool ilock_shared = true; bool ilock_shared = true;
/* /*
...@@ -534,7 +543,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -534,7 +543,8 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
return ext4_buffered_write_iter(iocb, from); return ext4_buffered_write_iter(iocb, from);
} }
ret = ext4_dio_write_checks(iocb, from, &ilock_shared, &extend); ret = ext4_dio_write_checks(iocb, from,
&ilock_shared, &extend, &unwritten);
if (ret <= 0) if (ret <= 0)
return ret; return ret;
...@@ -582,7 +592,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from) ...@@ -582,7 +592,7 @@ static ssize_t ext4_dio_write_iter(struct kiocb *iocb, struct iov_iter *from)
ext4_journal_stop(handle); ext4_journal_stop(handle);
} }
if (ilock_shared) if (ilock_shared && !unwritten)
iomap_ops = &ext4_iomap_overwrite_ops; iomap_ops = &ext4_iomap_overwrite_ops;
ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops, ret = iomap_dio_rw(iocb, from, iomap_ops, &ext4_dio_write_ops,
(unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0, (unaligned_io || extend) ? IOMAP_DIO_FORCE_WAIT : 0,
......
...@@ -4872,13 +4872,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, ...@@ -4872,13 +4872,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
goto bad_inode; goto bad_inode;
raw_inode = ext4_raw_inode(&iloc); raw_inode = ext4_raw_inode(&iloc);
if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
ext4_error_inode(inode, function, line, 0,
"iget: root inode unallocated");
ret = -EFSCORRUPTED;
goto bad_inode;
}
if ((flags & EXT4_IGET_HANDLE) && if ((flags & EXT4_IGET_HANDLE) &&
(raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) { (raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) {
ret = -ESTALE; ret = -ESTALE;
...@@ -4951,11 +4944,16 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, ...@@ -4951,11 +4944,16 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
* NeilBrown 1999oct15 * NeilBrown 1999oct15
*/ */
if (inode->i_nlink == 0) { if (inode->i_nlink == 0) {
if ((inode->i_mode == 0 || if ((inode->i_mode == 0 || flags & EXT4_IGET_SPECIAL ||
!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) && !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
ino != EXT4_BOOT_LOADER_INO) { ino != EXT4_BOOT_LOADER_INO) {
/* this inode is deleted */ /* this inode is deleted or unallocated */
ret = -ESTALE; if (flags & EXT4_IGET_SPECIAL) {
ext4_error_inode(inode, function, line, 0,
"iget: special inode unallocated");
ret = -EFSCORRUPTED;
} else
ret = -ESTALE;
goto bad_inode; goto bad_inode;
} }
/* The only unlinked inodes we let through here have /* The only unlinked inodes we let through here have
...@@ -5788,7 +5786,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, ...@@ -5788,7 +5786,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
int gdpblocks; int gdpblocks;
int idxblocks; int idxblocks;
int ret = 0; int ret;
/* /*
* How many index blocks need to touch to map @lblocks logical blocks * How many index blocks need to touch to map @lblocks logical blocks
......
...@@ -155,9 +155,6 @@ static int ext4_update_backup_sb(struct super_block *sb, ...@@ -155,9 +155,6 @@ static int ext4_update_backup_sb(struct super_block *sb,
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
unlock_buffer(bh); unlock_buffer(bh);
if (err)
goto out_bh;
if (handle) { if (handle) {
err = ext4_handle_dirty_metadata(handle, NULL, bh); err = ext4_handle_dirty_metadata(handle, NULL, bh);
if (err) if (err)
......
...@@ -3872,9 +3872,16 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir, ...@@ -3872,9 +3872,16 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir)) if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir))
goto end_rename; goto end_rename;
} }
/*
* We need to protect against old.inode directory getting
* converted from inline directory format into a normal one.
*/
inode_lock_nested(old.inode, I_MUTEX_NONDIR2);
retval = ext4_rename_dir_prepare(handle, &old); retval = ext4_rename_dir_prepare(handle, &old);
if (retval) if (retval) {
inode_unlock(old.inode);
goto end_rename; goto end_rename;
}
} }
/* /*
* If we're renaming a file within an inline_data dir and adding or * If we're renaming a file within an inline_data dir and adding or
...@@ -4006,6 +4013,8 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir, ...@@ -4006,6 +4013,8 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
} else { } else {
ext4_journal_stop(handle); ext4_journal_stop(handle);
} }
if (old.dir_bh)
inode_unlock(old.inode);
release_bh: release_bh:
brelse(old.dir_bh); brelse(old.dir_bh);
brelse(old.bh); brelse(old.bh);
......
...@@ -2146,7 +2146,7 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) ...@@ -2146,7 +2146,7 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
return 0; return 0;
case Opt_commit: case Opt_commit:
if (result.uint_32 == 0) if (result.uint_32 == 0)
ctx->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE; result.uint_32 = JBD2_DEFAULT_MAX_COMMIT_AGE;
else if (result.uint_32 > INT_MAX / HZ) { else if (result.uint_32 > INT_MAX / HZ) {
ext4_msg(NULL, KERN_ERR, ext4_msg(NULL, KERN_ERR,
"Invalid commit interval %d, " "Invalid commit interval %d, "
...@@ -2883,7 +2883,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, ...@@ -2883,7 +2883,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
{ {
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es; struct ext4_super_block *es = sbi->s_es;
int def_errors, def_mount_opt = sbi->s_def_mount_opt; int def_errors;
const struct mount_opts *m; const struct mount_opts *m;
char sep = nodefs ? '\n' : ','; char sep = nodefs ? '\n' : ',';
...@@ -2895,15 +2895,28 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, ...@@ -2895,15 +2895,28 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
for (m = ext4_mount_opts; m->token != Opt_err; m++) { for (m = ext4_mount_opts; m->token != Opt_err; m++) {
int want_set = m->flags & MOPT_SET; int want_set = m->flags & MOPT_SET;
int opt_2 = m->flags & MOPT_2;
unsigned int mount_opt, def_mount_opt;
if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) || if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
m->flags & MOPT_SKIP) m->flags & MOPT_SKIP)
continue; continue;
if (!nodefs && !(m->mount_opt & (sbi->s_mount_opt ^ def_mount_opt)))
continue; /* skip if same as the default */ if (opt_2) {
mount_opt = sbi->s_mount_opt2;
def_mount_opt = sbi->s_def_mount_opt2;
} else {
mount_opt = sbi->s_mount_opt;
def_mount_opt = sbi->s_def_mount_opt;
}
/* skip if same as the default */
if (!nodefs && !(m->mount_opt & (mount_opt ^ def_mount_opt)))
continue;
/* select Opt_noFoo vs Opt_Foo */
if ((want_set && if ((want_set &&
(sbi->s_mount_opt & m->mount_opt) != m->mount_opt) || (mount_opt & m->mount_opt) != m->mount_opt) ||
(!want_set && (sbi->s_mount_opt & m->mount_opt))) (!want_set && (mount_opt & m->mount_opt)))
continue; /* select Opt_noFoo vs Opt_Foo */ continue;
SEQ_OPTS_PRINT("%s", token2str(m->token)); SEQ_OPTS_PRINT("%s", token2str(m->token));
} }
...@@ -2931,7 +2944,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, ...@@ -2931,7 +2944,7 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
if (nodefs || sbi->s_stripe) if (nodefs || sbi->s_stripe)
SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
if (nodefs || EXT4_MOUNT_DATA_FLAGS & if (nodefs || EXT4_MOUNT_DATA_FLAGS &
(sbi->s_mount_opt ^ def_mount_opt)) { (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
SEQ_OPTS_PUTS("data=journal"); SEQ_OPTS_PUTS("data=journal");
else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
...@@ -4727,7 +4740,6 @@ static int ext4_group_desc_init(struct super_block *sb, ...@@ -4727,7 +4740,6 @@ static int ext4_group_desc_init(struct super_block *sb,
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned int db_count; unsigned int db_count;
ext4_fsblk_t block; ext4_fsblk_t block;
int ret;
int i; int i;
db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
...@@ -4767,8 +4779,7 @@ static int ext4_group_desc_init(struct super_block *sb, ...@@ -4767,8 +4779,7 @@ static int ext4_group_desc_init(struct super_block *sb,
ext4_msg(sb, KERN_ERR, ext4_msg(sb, KERN_ERR,
"can't read group descriptor %d", i); "can't read group descriptor %d", i);
sbi->s_gdb_count = i; sbi->s_gdb_count = i;
ret = PTR_ERR(bh); return PTR_ERR(bh);
goto out;
} }
rcu_read_lock(); rcu_read_lock();
rcu_dereference(sbi->s_group_desc)[i] = bh; rcu_dereference(sbi->s_group_desc)[i] = bh;
...@@ -4777,13 +4788,10 @@ static int ext4_group_desc_init(struct super_block *sb, ...@@ -4777,13 +4788,10 @@ static int ext4_group_desc_init(struct super_block *sb,
sbi->s_gdb_count = db_count; sbi->s_gdb_count = db_count;
if (!ext4_check_descriptors(sb, logical_sb_block, first_not_zeroed)) { if (!ext4_check_descriptors(sb, logical_sb_block, first_not_zeroed)) {
ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
ret = -EFSCORRUPTED; return -EFSCORRUPTED;
goto out;
} }
return 0; return 0;
out:
ext4_group_desc_free(sbi);
return ret;
} }
static int ext4_load_and_init_journal(struct super_block *sb, static int ext4_load_and_init_journal(struct super_block *sb,
...@@ -5075,6 +5083,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) ...@@ -5075,6 +5083,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
goto failed_mount; goto failed_mount;
sbi->s_def_mount_opt = sbi->s_mount_opt; sbi->s_def_mount_opt = sbi->s_mount_opt;
sbi->s_def_mount_opt2 = sbi->s_mount_opt2;
err = ext4_check_opt_consistency(fc, sb); err = ext4_check_opt_consistency(fc, sb);
if (err < 0) if (err < 0)
...@@ -5209,14 +5218,14 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) ...@@ -5209,14 +5218,14 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
if (ext4_geometry_check(sb, es)) if (ext4_geometry_check(sb, es))
goto failed_mount; goto failed_mount;
err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
if (err)
goto failed_mount;
timer_setup(&sbi->s_err_report, print_daily_error_info, 0); timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
spin_lock_init(&sbi->s_error_lock); spin_lock_init(&sbi->s_error_lock);
INIT_WORK(&sbi->s_error_work, flush_stashed_error_work); INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);
err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
if (err)
goto failed_mount3;
/* Register extent status tree shrinker */ /* Register extent status tree shrinker */
if (ext4_es_register_shrinker(sbi)) if (ext4_es_register_shrinker(sbi))
goto failed_mount3; goto failed_mount3;
...@@ -5937,8 +5946,11 @@ static int ext4_load_journal(struct super_block *sb, ...@@ -5937,8 +5946,11 @@ static int ext4_load_journal(struct super_block *sb,
if (!really_read_only && journal_devnum && if (!really_read_only && journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) { journal_devnum != le32_to_cpu(es->s_journal_dev)) {
es->s_journal_dev = cpu_to_le32(journal_devnum); es->s_journal_dev = cpu_to_le32(journal_devnum);
ext4_commit_super(sb);
/* Make sure we flush the recovery flag to disk. */ }
if (!really_read_only && journal_inum &&
journal_inum != le32_to_cpu(es->s_journal_inum)) {
es->s_journal_inum = cpu_to_le32(journal_inum);
ext4_commit_super(sb); ext4_commit_super(sb);
} }
......
...@@ -184,27 +184,73 @@ ext4_xattr_handler(int name_index) ...@@ -184,27 +184,73 @@ ext4_xattr_handler(int name_index)
} }
static int static int
ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end, check_xattrs(struct inode *inode, struct buffer_head *bh,
void *value_start) struct ext4_xattr_entry *entry, void *end, void *value_start,
const char *function, unsigned int line)
{ {
struct ext4_xattr_entry *e = entry; struct ext4_xattr_entry *e = entry;
int err = -EFSCORRUPTED;
char *err_str;
if (bh) {
if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
BHDR(bh)->h_blocks != cpu_to_le32(1)) {
err_str = "invalid header";
goto errout;
}
if (buffer_verified(bh))
return 0;
if (!ext4_xattr_block_csum_verify(inode, bh)) {
err = -EFSBADCRC;
err_str = "invalid checksum";
goto errout;
}
} else {
struct ext4_xattr_ibody_header *header = value_start;
header -= 1;
if (end - (void *)header < sizeof(*header) + sizeof(u32)) {
err_str = "in-inode xattr block too small";
goto errout;
}
if (header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
err_str = "bad magic number in in-inode xattr";
goto errout;
}
}
/* Find the end of the names list */ /* Find the end of the names list */
while (!IS_LAST_ENTRY(e)) { while (!IS_LAST_ENTRY(e)) {
struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e); struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
if ((void *)next >= end) if ((void *)next >= end) {
return -EFSCORRUPTED; err_str = "e_name out of bounds";
if (strnlen(e->e_name, e->e_name_len) != e->e_name_len) goto errout;
return -EFSCORRUPTED; }
if (strnlen(e->e_name, e->e_name_len) != e->e_name_len) {
err_str = "bad e_name length";
goto errout;
}
e = next; e = next;
} }
/* Check the values */ /* Check the values */
while (!IS_LAST_ENTRY(entry)) { while (!IS_LAST_ENTRY(entry)) {
u32 size = le32_to_cpu(entry->e_value_size); u32 size = le32_to_cpu(entry->e_value_size);
unsigned long ea_ino = le32_to_cpu(entry->e_value_inum);
if (size > EXT4_XATTR_SIZE_MAX) if (!ext4_has_feature_ea_inode(inode->i_sb) && ea_ino) {
return -EFSCORRUPTED; err_str = "ea_inode specified without ea_inode feature enabled";
goto errout;
}
if (ea_ino && ((ea_ino == EXT4_ROOT_INO) ||
!ext4_valid_inum(inode->i_sb, ea_ino))) {
err_str = "invalid ea_ino";
goto errout;
}
if (size > EXT4_XATTR_SIZE_MAX) {
err_str = "e_value size too large";
goto errout;
}
if (size != 0 && entry->e_value_inum == 0) { if (size != 0 && entry->e_value_inum == 0) {
u16 offs = le16_to_cpu(entry->e_value_offs); u16 offs = le16_to_cpu(entry->e_value_offs);
...@@ -216,66 +262,54 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end, ...@@ -216,66 +262,54 @@ ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end,
* the padded and unpadded sizes, since the size may * the padded and unpadded sizes, since the size may
* overflow to 0 when adding padding. * overflow to 0 when adding padding.
*/ */
if (offs > end - value_start) if (offs > end - value_start) {
return -EFSCORRUPTED; err_str = "e_value out of bounds";
goto errout;
}
value = value_start + offs; value = value_start + offs;
if (value < (void *)e + sizeof(u32) || if (value < (void *)e + sizeof(u32) ||
size > end - value || size > end - value ||
EXT4_XATTR_SIZE(size) > end - value) EXT4_XATTR_SIZE(size) > end - value) {
return -EFSCORRUPTED; err_str = "overlapping e_value ";
goto errout;
}
} }
entry = EXT4_XATTR_NEXT(entry); entry = EXT4_XATTR_NEXT(entry);
} }
if (bh)
set_buffer_verified(bh);
return 0; return 0;
errout:
if (bh)
__ext4_error_inode(inode, function, line, 0, -err,
"corrupted xattr block %llu: %s",
(unsigned long long) bh->b_blocknr,
err_str);
else
__ext4_error_inode(inode, function, line, 0, -err,
"corrupted in-inode xattr: %s", err_str);
return err;
} }
static inline int static inline int
__ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh, __ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh,
const char *function, unsigned int line) const char *function, unsigned int line)
{ {
int error = -EFSCORRUPTED; return check_xattrs(inode, bh, BFIRST(bh), bh->b_data + bh->b_size,
bh->b_data, function, line);
if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
BHDR(bh)->h_blocks != cpu_to_le32(1))
goto errout;
if (buffer_verified(bh))
return 0;
error = -EFSBADCRC;
if (!ext4_xattr_block_csum_verify(inode, bh))
goto errout;
error = ext4_xattr_check_entries(BFIRST(bh), bh->b_data + bh->b_size,
bh->b_data);
errout:
if (error)
__ext4_error_inode(inode, function, line, 0, -error,
"corrupted xattr block %llu",
(unsigned long long) bh->b_blocknr);
else
set_buffer_verified(bh);
return error;
} }
#define ext4_xattr_check_block(inode, bh) \ #define ext4_xattr_check_block(inode, bh) \
__ext4_xattr_check_block((inode), (bh), __func__, __LINE__) __ext4_xattr_check_block((inode), (bh), __func__, __LINE__)
static int static inline int
__xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header, __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,
void *end, const char *function, unsigned int line) void *end, const char *function, unsigned int line)
{ {
int error = -EFSCORRUPTED; return check_xattrs(inode, NULL, IFIRST(header), end, IFIRST(header),
function, line);
if (end - (void *)header < sizeof(*header) + sizeof(u32) ||
(header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)))
goto errout;
error = ext4_xattr_check_entries(IFIRST(header), end, IFIRST(header));
errout:
if (error)
__ext4_error_inode(inode, function, line, 0, -error,
"corrupted in-inode xattr");
return error;
} }
#define xattr_check_inode(inode, header, end) \ #define xattr_check_inode(inode, header, end) \
...@@ -388,6 +422,17 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, ...@@ -388,6 +422,17 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
struct inode *inode; struct inode *inode;
int err; int err;
/*
* We have to check for this corruption early as otherwise
* iget_locked() could wait indefinitely for the state of our
* parent inode.
*/
if (parent->i_ino == ea_ino) {
ext4_error(parent->i_sb,
"Parent and EA inode have the same ino %lu", ea_ino);
return -EFSCORRUPTED;
}
inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL); inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL);
if (IS_ERR(inode)) { if (IS_ERR(inode)) {
err = PTR_ERR(inode); err = PTR_ERR(inode);
...@@ -1438,6 +1483,13 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle, ...@@ -1438,6 +1483,13 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle,
uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) }; uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) };
int err; int err;
if (inode->i_sb->s_root == NULL) {
ext4_warning(inode->i_sb,
"refuse to create EA inode when umounting");
WARN_ON(1);
return ERR_PTR(-EINVAL);
}
/* /*
* Let the next inode be the goal, so we try and allocate the EA inode * Let the next inode be the goal, so we try and allocate the EA inode
* in the same group, or nearby one. * in the same group, or nearby one.
...@@ -2567,9 +2619,8 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, ...@@ -2567,9 +2619,8 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS); is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS); bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
buffer = kvmalloc(value_size, GFP_NOFS);
b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS); b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
if (!is || !bs || !buffer || !b_entry_name) { if (!is || !bs || !b_entry_name) {
error = -ENOMEM; error = -ENOMEM;
goto out; goto out;
} }
...@@ -2581,12 +2632,18 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, ...@@ -2581,12 +2632,18 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
/* Save the entry name and the entry value */ /* Save the entry name and the entry value */
if (entry->e_value_inum) { if (entry->e_value_inum) {
buffer = kvmalloc(value_size, GFP_NOFS);
if (!buffer) {
error = -ENOMEM;
goto out;
}
error = ext4_xattr_inode_get(inode, entry, buffer, value_size); error = ext4_xattr_inode_get(inode, entry, buffer, value_size);
if (error) if (error)
goto out; goto out;
} else { } else {
size_t value_offs = le16_to_cpu(entry->e_value_offs); size_t value_offs = le16_to_cpu(entry->e_value_offs);
memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size); buffer = (void *)IFIRST(header) + value_offs;
} }
memcpy(b_entry_name, entry->e_name, entry->e_name_len); memcpy(b_entry_name, entry->e_name, entry->e_name_len);
...@@ -2601,25 +2658,26 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, ...@@ -2601,25 +2658,26 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
if (error) if (error)
goto out; goto out;
/* Remove the chosen entry from the inode */
error = ext4_xattr_ibody_set(handle, inode, &i, is);
if (error)
goto out;
i.value = buffer; i.value = buffer;
i.value_len = value_size; i.value_len = value_size;
error = ext4_xattr_block_find(inode, &i, bs); error = ext4_xattr_block_find(inode, &i, bs);
if (error) if (error)
goto out; goto out;
/* Add entry which was removed from the inode into the block */ /* Move ea entry from the inode into the block */
error = ext4_xattr_block_set(handle, inode, &i, bs); error = ext4_xattr_block_set(handle, inode, &i, bs);
if (error) if (error)
goto out; goto out;
error = 0;
/* Remove the chosen entry from the inode */
i.value = NULL;
i.value_len = 0;
error = ext4_xattr_ibody_set(handle, inode, &i, is);
out: out:
kfree(b_entry_name); kfree(b_entry_name);
kvfree(buffer); if (entry->e_value_inum && buffer)
kvfree(buffer);
if (is) if (is)
brelse(is->iloc.bh); brelse(is->iloc.bh);
if (bs) if (bs)
......
...@@ -1010,36 +1010,28 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, ...@@ -1010,36 +1010,28 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
* ie. locked but not dirty) or tune2fs (which may actually have * ie. locked but not dirty) or tune2fs (which may actually have
* the buffer dirtied, ugh.) */ * the buffer dirtied, ugh.) */
if (buffer_dirty(bh)) { if (buffer_dirty(bh) && jh->b_transaction) {
warn_dirty_buffer(bh);
/* /*
* First question: is this buffer already part of the current * We need to clean the dirty flag and we must do it under the
* transaction or the existing committing transaction? * buffer lock to be sure we don't race with running write-out.
*/
if (jh->b_transaction) {
J_ASSERT_JH(jh,
jh->b_transaction == transaction ||
jh->b_transaction ==
journal->j_committing_transaction);
if (jh->b_next_transaction)
J_ASSERT_JH(jh, jh->b_next_transaction ==
transaction);
warn_dirty_buffer(bh);
}
/*
* In any case we need to clean the dirty flag and we must
* do it under the buffer lock to be sure we don't race
* with running write-out.
*/ */
JBUFFER_TRACE(jh, "Journalling dirty buffer"); JBUFFER_TRACE(jh, "Journalling dirty buffer");
clear_buffer_dirty(bh); clear_buffer_dirty(bh);
/*
* The buffer is going to be added to BJ_Reserved list now and
* nothing guarantees jbd2_journal_dirty_metadata() will be
* ever called for it. So we need to set jbddirty bit here to
* make sure the buffer is dirtied and written out when the
* journaling machinery is done with it.
*/
set_buffer_jbddirty(bh); set_buffer_jbddirty(bh);
} }
unlock_buffer(bh);
error = -EROFS; error = -EROFS;
if (is_handle_aborted(handle)) { if (is_handle_aborted(handle)) {
spin_unlock(&jh->b_state_lock); spin_unlock(&jh->b_state_lock);
unlock_buffer(bh);
goto out; goto out;
} }
error = 0; error = 0;
...@@ -1049,8 +1041,10 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, ...@@ -1049,8 +1041,10 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
* b_next_transaction points to it * b_next_transaction points to it
*/ */
if (jh->b_transaction == transaction || if (jh->b_transaction == transaction ||
jh->b_next_transaction == transaction) jh->b_next_transaction == transaction) {
unlock_buffer(bh);
goto done; goto done;
}
/* /*
* this is the first time this transaction is touching this buffer, * this is the first time this transaction is touching this buffer,
...@@ -1074,10 +1068,24 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, ...@@ -1074,10 +1068,24 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
*/ */
smp_wmb(); smp_wmb();
spin_lock(&journal->j_list_lock); spin_lock(&journal->j_list_lock);
if (test_clear_buffer_dirty(bh)) {
/*
* Execute buffer dirty clearing and jh->b_transaction
* assignment under journal->j_list_lock locked to
* prevent bh being removed from checkpoint list if
* the buffer is in an intermediate state (not dirty
* and jh->b_transaction is NULL).
*/
JBUFFER_TRACE(jh, "Journalling dirty buffer");
set_buffer_jbddirty(bh);
}
__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_list_lock);
unlock_buffer(bh);
goto done; goto done;
} }
unlock_buffer(bh);
/* /*
* If there is already a copy-out version of this buffer, then we don't * If there is already a copy-out version of this buffer, then we don't
* need to make another one * need to make another one
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment