Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 updates from Ted Ts'o: "Clean ups and miscellaneous bug fixes, in particular for the new collapse_range and zero_range fallocate functions. In addition, improve the scalability of adding and remove inodes from the orphan list" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (25 commits) ext4: handle symlink properly with inline_data ext4: fix wrong assert in ext4_mb_normalize_request() ext4: fix zeroing of page during writeback ext4: remove unused local variable "stored" from ext4_readdir(...) ext4: fix ZERO_RANGE test failure in data journalling ext4: reduce contention on s_orphan_lock ext4: use sbi in ext4_orphan_{add|del}() ext4: use EXT_MAX_BLOCKS in ext4_es_can_be_merged() ext4: add missing BUFFER_TRACE before ext4_journal_get_write_access ext4: remove unnecessary double parentheses ext4: do not destroy ext4_groupinfo_caches if ext4_mb_init() fails ext4: make local functions static ext4: fix block bitmap validation when bigalloc, ^flex_bg ext4: fix block bitmap initialization under sparse_super2 ext4: find the group descriptors on a 1k-block bigalloc,meta_bg filesystem ext4: avoid unneeded lookup when xattr name is invalid ext4: fix data integrity sync in ordered mode ext4: remove obsoleted check ext4: add a new spinlock i_raw_lock to protect the ext4's raw inode ext4: fix locking for O_APPEND writes ...

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "Clean ups and miscellaneous bug fixes, in particular for the new collapse_range and zero_range fallocate functions. In addition, improve the scalability of adding and remove inodes from the orphan list" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (25 commits) ext4: handle symlink properly with inline_data ext4: fix wrong assert in ext4_mb_normalize_request() ext4: fix zeroing of page during writeback ext4: remove unused local variable "stored" from ext4_readdir(...) ext4: fix ZERO_RANGE test failure in data journalling ext4: reduce contention on s_orphan_lock ext4: use sbi in ext4_orphan_{add|del}() ext4: use EXT_MAX_BLOCKS in ext4_es_can_be_merged() ext4: add missing BUFFER_TRACE before ext4_journal_get_write_access ext4: remove unnecessary double parentheses ext4: do not destroy ext4_groupinfo_caches if ext4_mb_init() fails ext4: make local functions static ext4: fix block bitmap validation when bigalloc, ^flex_bg ext4: fix block bitmap initialization under sparse_super2 ext4: find the group descriptors on a 1k-block bigalloc,meta_bg filesystem ext4: avoid unneeded lookup when xattr name is invalid ext4: fix data integrity sync in ordered mode ext4: remove obsoleted check ext4: add a new spinlock i_raw_lock to protect the ext4's raw inode ext4: fix locking for O_APPEND writes ...
f8409abd · Linus Torvalds · b20dcab9 · bd9db175 · f8409abd · f8409abd
Commit f8409abd authored Jun 08, 2014 by Linus Torvalds
23 changed files
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -83,7 +83,7 @@ static inline int ext4_block_in_group(struct super_block *sb,
 /* Return the number of clusters used for file system metadata; this
 * represents the overhead needed by the file system.
 */
-unsigned ext4_num_overhead_clusters(struct super_block *sb,
+static unsigned ext4_num_overhead_clusters(struct super_block *sb,
 					   ext4_group_t block_group,
 					   struct ext4_group_desc *gdp)
 {
@@ -176,7 +176,8 @@ static unsigned int num_clusters_in_group(struct super_block *sb,
 }

 /* Initializes an uninitialized block bitmap */
-void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
+static void ext4_init_block_bitmap(struct super_block *sb,
+				   struct buffer_head *bh,
 				   ext4_group_t block_group,
 				   struct ext4_group_desc *gdp)
 {
@@ -307,6 +308,7 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
 					    ext4_group_t block_group,
 					    struct buffer_head *bh)
 {
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_grpblk_t offset;
 	ext4_grpblk_t next_zero_bit;
 	ext4_fsblk_t blk;
@@ -326,14 +328,14 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
 	/* check whether block bitmap block number is set */
 	blk = ext4_block_bitmap(sb, desc);
 	offset = blk - group_first_block;
-	if (!ext4_test_bit(offset, bh->b_data))
+	if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
 		/* bad block bitmap */
 		return blk;

 	/* check whether the inode bitmap block number is set */
 	blk = ext4_inode_bitmap(sb, desc);
 	offset = blk - group_first_block;
-	if (!ext4_test_bit(offset, bh->b_data))
+	if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
 		/* bad block bitmap */
 		return blk;

@@ -341,15 +343,16 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
 	blk = ext4_inode_table(sb, desc);
 	offset = blk - group_first_block;
 	next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
-				offset + EXT4_SB(sb)->s_itb_per_group,
-				offset);
-	if (next_zero_bit < offset + EXT4_SB(sb)->s_itb_per_group)
+			EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group),
+			EXT4_B2C(sbi, offset));
+	if (next_zero_bit <
+	    EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group))
 		/* bad bitmap for inode tables */
 		return blk;
 	return 0;
 }

-void ext4_validate_block_bitmap(struct super_block *sb,
+static void ext4_validate_block_bitmap(struct super_block *sb,
 				       struct ext4_group_desc *desc,
 				       ext4_group_t block_group,
 				       struct buffer_head *bh)
@@ -708,16 +711,6 @@ static inline int test_root(ext4_group_t a, int b)
 	}
 }

-static int ext4_group_sparse(ext4_group_t group)
-{
-	if (group <= 1)
-		return 1;
-	if (!(group & 1))
-		return 0;
-	return (test_root(group, 7) || test_root(group, 5) ||
-		test_root(group, 3));
-}
-
 /**
 *	ext4_bg_has_super - number of blocks used by the superblock in group
 *	@sb: superblock for filesystem
@@ -728,11 +721,26 @@ static int ext4_group_sparse(ext4_group_t group)
 */
 int ext4_bg_has_super(struct super_block *sb, ext4_group_t group)
 {
-	if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
-				EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
-			!ext4_group_sparse(group))
+	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+
+	if (group == 0)
+		return 1;
+	if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_SPARSE_SUPER2)) {
+		if (group == le32_to_cpu(es->s_backup_bgs[0]) ||
+		    group == le32_to_cpu(es->s_backup_bgs[1]))
+			return 1;
+		return 0;
+	}
+	if ((group <= 1) || !EXT4_HAS_RO_COMPAT_FEATURE(sb,
+					EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER))
+		return 1;
+	if (!(group & 1))
 		return 0;
+	if (test_root(group, 3) || (test_root(group, 5)) ||
+	    test_root(group, 7))
 		return 1;
+
+	return 0;
 }

 static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,

--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -105,7 +105,7 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
 static int ext4_readdir(struct file *file, struct dir_context *ctx)
 {
 	unsigned int offset;
-	int i, stored;
+	int i;
 	struct ext4_dir_entry_2 *de;
 	int err;
 	struct inode *inode = file_inode(file);
@@ -133,7 +133,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
 			return ret;
 	}

-	stored = 0;
 	offset = ctx->pos & (sb->s_blocksize - 1);

 	while (ctx->pos < inode->i_size) {

--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -158,7 +158,6 @@ struct ext4_allocation_request {
 #define EXT4_MAP_MAPPED		(1 << BH_Mapped)
 #define EXT4_MAP_UNWRITTEN	(1 << BH_Unwritten)
 #define EXT4_MAP_BOUNDARY	(1 << BH_Boundary)
-#define EXT4_MAP_UNINIT		(1 << BH_Uninit)
 /* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
 * ext4_map_blocks wants to know whether or not the underlying cluster has
 * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
@@ -169,7 +168,7 @@ struct ext4_allocation_request {
 #define EXT4_MAP_FROM_CLUSTER	(1 << BH_AllocFromCluster)
 #define EXT4_MAP_FLAGS		(EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
 				 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
-				 EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER)
+				 EXT4_MAP_FROM_CLUSTER)

 struct ext4_map_blocks {
 	ext4_fsblk_t m_pblk;
@@ -184,7 +183,7 @@ struct ext4_map_blocks {
 #define	EXT4_IO_END_UNWRITTEN	0x0001

 /*
- * For converting uninitialized extents on a work queue. 'handle' is used for
+ * For converting unwritten extents on a work queue. 'handle' is used for
 * buffered writeback.
 */
 typedef struct ext4_io_end {
@@ -537,26 +536,26 @@ enum {
 /*
 * Flags used by ext4_map_blocks()
 */
-	/* Allocate any needed blocks and/or convert an unitialized
+	/* Allocate any needed blocks and/or convert an unwritten
 	   extent to be an initialized ext4 */
 #define EXT4_GET_BLOCKS_CREATE			0x0001
-	/* Request the creation of an unitialized extent */
-#define EXT4_GET_BLOCKS_UNINIT_EXT		0x0002
-#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT	(EXT4_GET_BLOCKS_UNINIT_EXT|\
+	/* Request the creation of an unwritten extent */
+#define EXT4_GET_BLOCKS_UNWRIT_EXT		0x0002
+#define EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT	(EXT4_GET_BLOCKS_UNWRIT_EXT|\
 						 EXT4_GET_BLOCKS_CREATE)
 	/* Caller is from the delayed allocation writeout path
 	 * finally doing the actual allocation of delayed blocks */
 #define EXT4_GET_BLOCKS_DELALLOC_RESERVE	0x0004
 	/* caller is from the direct IO path, request to creation of an
-	unitialized extents if not allocated, split the uninitialized
+	unwritten extents if not allocated, split the unwritten
 	extent if blocks has been preallocated already*/
 #define EXT4_GET_BLOCKS_PRE_IO			0x0008
 #define EXT4_GET_BLOCKS_CONVERT			0x0010
 #define EXT4_GET_BLOCKS_IO_CREATE_EXT		(EXT4_GET_BLOCKS_PRE_IO|\
-					 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
+					 EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT)
 	/* Convert extent to initialized after IO complete */
 #define EXT4_GET_BLOCKS_IO_CONVERT_EXT		(EXT4_GET_BLOCKS_CONVERT|\
-					 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
+					 EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT)
 	/* Eventual metadata allocation (due to growing extent tree)
 	 * should not fail, so try to use reserved blocks for that.*/
 #define EXT4_GET_BLOCKS_METADATA_NOFAIL		0x0020
@@ -876,6 +875,8 @@ struct ext4_inode_info {
 	struct inode vfs_inode;
 	struct jbd2_inode *jinode;

+	spinlock_t i_raw_lock;	/* protects updates to the raw inode */
+
 	/*
 	 * File creation time. Its function is same as that of
 	 * struct timespec i_{a,c,m}time in the generic inode.
@@ -1159,7 +1160,8 @@ struct ext4_super_block {
 	__le32	s_usr_quota_inum;	/* inode for tracking user quota */
 	__le32	s_grp_quota_inum;	/* inode for tracking group quota */
 	__le32	s_overhead_clusters;	/* overhead blocks/clusters in fs */
-	__le32	s_reserved[108];	/* Padding to the end of the block */
+	__le32	s_backup_bgs[2];	/* groups with sparse_super2 SBs */
+	__le32	s_reserved[106];	/* Padding to the end of the block */
 	__le32	s_checksum;		/* crc32c(superblock) */
 };

@@ -1505,6 +1507,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
 #define EXT4_FEATURE_COMPAT_EXT_ATTR		0x0008
 #define EXT4_FEATURE_COMPAT_RESIZE_INODE	0x0010
 #define EXT4_FEATURE_COMPAT_DIR_INDEX		0x0020
+#define EXT4_FEATURE_COMPAT_SPARSE_SUPER2	0x0200

 #define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER	0x0001
 #define EXT4_FEATURE_RO_COMPAT_LARGE_FILE	0x0002
@@ -1953,10 +1956,6 @@ extern void ext4_get_group_no_and_offset(struct super_block *sb,
 extern ext4_group_t ext4_get_group_number(struct super_block *sb,
 					  ext4_fsblk_t block);

-extern void ext4_validate_block_bitmap(struct super_block *sb,
-				       struct ext4_group_desc *desc,
-				       ext4_group_t block_group,
-				       struct buffer_head *bh);
 extern unsigned int ext4_block_group(struct super_block *sb,
 			ext4_fsblk_t blocknr);
 extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
@@ -1985,16 +1984,9 @@ extern int ext4_wait_block_bitmap(struct super_block *sb,
 				  struct buffer_head *bh);
 extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
 						  ext4_group_t block_group);
-extern void ext4_init_block_bitmap(struct super_block *sb,
-				   struct buffer_head *bh,
-				   ext4_group_t group,
-				   struct ext4_group_desc *desc);
 extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
 					      ext4_group_t block_group,
 					      struct ext4_group_desc *gdp);
-extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
-					   ext4_group_t block_group,
-					   struct ext4_group_desc *gdp);
 ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);

 /* dir.c */
@@ -2137,8 +2129,6 @@ extern int ext4_alloc_da_blocks(struct inode *inode);
 extern void ext4_set_aops(struct inode *inode);
 extern int ext4_writepage_trans_blocks(struct inode *);
 extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
-extern int ext4_block_truncate_page(handle_t *handle,
-		struct address_space *mapping, loff_t from);
 extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
 			     loff_t lstart, loff_t lend);
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
@@ -2198,8 +2188,6 @@ extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);

 /* super.c */
 extern int ext4_calculate_overhead(struct super_block *sb);
-extern int ext4_superblock_csum_verify(struct super_block *sb,
-				       struct ext4_super_block *es);
 extern void ext4_superblock_csum_set(struct super_block *sb);
 extern void *ext4_kvmalloc(size_t size, gfp_t flags);
 extern void *ext4_kvzalloc(size_t size, gfp_t flags);
@@ -2571,19 +2559,11 @@ extern const struct file_operations ext4_dir_operations;
 extern const struct inode_operations ext4_file_inode_operations;
 extern const struct file_operations ext4_file_operations;
 extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
-extern void ext4_unwritten_wait(struct inode *inode);

 /* inline.c */
 extern int ext4_has_inline_data(struct inode *inode);
-extern int ext4_get_inline_size(struct inode *inode);
 extern int ext4_get_max_inline_size(struct inode *inode);
 extern int ext4_find_inline_data_nolock(struct inode *inode);
-extern void ext4_write_inline_data(struct inode *inode,
-				   struct ext4_iloc *iloc,
-				   void *buffer, loff_t pos,
-				   unsigned int len);
-extern int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
-				    unsigned int len);
 extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
 				 unsigned int len);
 extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
@@ -2771,23 +2751,20 @@ extern void ext4_io_submit(struct ext4_io_submit *io);
 extern int ext4_bio_write_page(struct ext4_io_submit *io,
 			       struct page *page,
 			       int len,
-			       struct writeback_control *wbc);
+			       struct writeback_control *wbc,
+			       bool keep_towrite);

 /* mmp.c */
 extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
-extern void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp);
-extern int ext4_mmp_csum_verify(struct super_block *sb,
-				struct mmp_struct *mmp);

 /*
 * Note that these flags will never ever appear in a buffer_head's state flag.
 * See EXT4_MAP_... to see where this is used.
 */
 enum ext4_state_bits {
-	BH_Uninit	/* blocks are allocated but uninitialized on disk */
-	 = BH_JBDPrivateStart,
-	BH_AllocFromCluster,	/* allocated blocks were part of already
+	BH_AllocFromCluster	/* allocated blocks were part of already
 				 * allocated cluster. */
+	= BH_JBDPrivateStart
 };

 /*

--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -137,21 +137,21 @@ struct ext4_ext_path {
 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
 * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
 * MSB of ee_len field in the extent datastructure to signify if this
- * particular extent is an initialized extent or an uninitialized (i.e.
+ * particular extent is an initialized extent or an unwritten (i.e.
 * preallocated).
- * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
- * uninitialized extent.
+ * EXT_UNWRITTEN_MAX_LEN is the maximum number of blocks we can have in an
+ * unwritten extent.
 * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
- * uninitialized one. In other words, if MSB of ee_len is set, it is an
- * uninitialized extent with only one special scenario when ee_len = 0x8000.
- * In this case we can not have an uninitialized extent of zero length and
+ * unwritten one. In other words, if MSB of ee_len is set, it is an
+ * unwritten extent with only one special scenario when ee_len = 0x8000.
+ * In this case we can not have an unwritten extent of zero length and
 * thus we make it as a special case of initialized extent with 0x8000 length.
 * This way we get better extent-to-group alignment for initialized extents.
 * Hence, the maximum number of blocks we can have in an *initialized*
- * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
+ * extent is 2^15 (32768) and in an *unwritten* extent is 2^15-1 (32767).
 */
 #define EXT_INIT_MAX_LEN	(1UL << 15)
-#define EXT_UNINIT_MAX_LEN	(EXT_INIT_MAX_LEN - 1)
+#define EXT_UNWRITTEN_MAX_LEN	(EXT_INIT_MAX_LEN - 1)


 #define EXT_FIRST_EXTENT(__hdr__) \
@@ -187,14 +187,14 @@ static inline unsigned short ext_depth(struct inode *inode)
 	return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
 }

-static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
+static inline void ext4_ext_mark_unwritten(struct ext4_extent *ext)
 {
-	/* We can not have an uninitialized extent of zero length! */
+	/* We can not have an unwritten extent of zero length! */
 	BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0);
 	ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN);
 }

-static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
+static inline int ext4_ext_is_unwritten(struct ext4_extent *ext)
 {
 	/* Extent with ee_len of 0x8000 is treated as an initialized extent */
 	return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);

--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -122,8 +122,9 @@ handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
 	return handle;
 }

-void ext4_journal_abort_handle(const char *caller, unsigned int line,
-			       const char *err_fn, struct buffer_head *bh,
+static void ext4_journal_abort_handle(const char *caller, unsigned int line,
+				      const char *err_fn,
+				      struct buffer_head *bh,
 				      handle_t *handle, int err)
 {
 	char nbuf[16];

--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -231,10 +231,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
 /*
 * Wrapper functions with which ext4 calls into JBD.
 */
-void ext4_journal_abort_handle(const char *caller, unsigned int line,
-			       const char *err_fn,
-		struct buffer_head *bh, handle_t *handle, int err);
-
 int __ext4_journal_get_write_access(const char *where, unsigned int line,
 				    handle_t *handle, struct buffer_head *bh);


--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -344,8 +344,14 @@ static int ext4_es_can_be_merged(struct extent_status *es1,
 	if (ext4_es_status(es1) != ext4_es_status(es2))
 		return 0;

-	if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL)
+	if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) {
+		pr_warn("ES assertion failed when merging extents. "
+			"The sum of lengths of es1 (%d) and es2 (%d) "
+			"is bigger than allowed file size (%d)\n",
+			es1->es_len, es2->es_len, EXT_MAX_BLOCKS);
+		WARN_ON(1);
 		return 0;
+	}

 	if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
 		return 0;
@@ -433,7 +439,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
 		ee_start = ext4_ext_pblock(ex);
 		ee_len = ext4_ext_get_actual_len(ex);

-		ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0;
+		ee_status = ext4_ext_is_unwritten(ex) ? 1 : 0;
 		es_status = ext4_es_is_unwritten(es) ? 1 : 0;

 		/*

--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -57,7 +57,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
 	return 0;
 }

-void ext4_unwritten_wait(struct inode *inode)
+static void ext4_unwritten_wait(struct inode *inode)
 {
 	wait_queue_head_t *wq = ext4_ioend_wq(inode);

@@ -92,36 +92,65 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
 }

 static ssize_t
-ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
+ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
 		unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
-	struct inode *inode = file->f_mapping->host;
+	struct inode *inode = file_inode(iocb->ki_filp);
+	struct mutex *aio_mutex = NULL;
 	struct blk_plug plug;
-	int unaligned_aio = 0;
-	ssize_t ret;
+	int o_direct = file->f_flags & O_DIRECT;
 	int overwrite = 0;
 	size_t length = iov_length(iov, nr_segs);
+	ssize_t ret;

-	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
-	    !is_sync_kiocb(iocb))
-		unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
+	BUG_ON(iocb->ki_pos != pos);

-	/* Unaligned direct AIO must be serialized; see comment above */
-	if (unaligned_aio) {
-		mutex_lock(ext4_aio_mutex(inode));
+	/*
+	 * Unaligned direct AIO must be serialized; see comment above
+	 * In the case of O_APPEND, assume that we must always serialize
+	 */
+	if (o_direct &&
+	    ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
+	    !is_sync_kiocb(iocb) &&
+	    (file->f_flags & O_APPEND ||
+	     ext4_unaligned_aio(inode, iov, nr_segs, pos))) {
+		aio_mutex = ext4_aio_mutex(inode);
+		mutex_lock(aio_mutex);
 		ext4_unwritten_wait(inode);
 	}

-	BUG_ON(iocb->ki_pos != pos);
-
 	mutex_lock(&inode->i_mutex);
+	if (file->f_flags & O_APPEND)
+		iocb->ki_pos = pos = i_size_read(inode);
+
+	/*
+	 * If we have encountered a bitmap-format file, the size limit
+	 * is smaller than s_maxbytes, which is for extent-mapped files.
+	 */
+	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
+		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+
+		if ((pos > sbi->s_bitmap_maxbytes) ||
+		    (pos == sbi->s_bitmap_maxbytes && length > 0)) {
+			mutex_unlock(&inode->i_mutex);
+			ret = -EFBIG;
+			goto errout;
+		}
+
+		if (pos + length > sbi->s_bitmap_maxbytes) {
+			nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
+					      sbi->s_bitmap_maxbytes - pos);
+		}
+	}
+
+	if (o_direct) {
 		blk_start_plug(&plug);

 		iocb->private = &overwrite;

 		/* check whether we do a DIO overwrite or not */
-	if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
+		if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
 		    !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
 			struct ext4_map_blocks map;
 			unsigned int blkbits = inode->i_blkbits;
@@ -134,17 +163,21 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,

 			err = ext4_map_blocks(NULL, inode, &map, 0);
 			/*
-		 * 'err==len' means that all of blocks has been preallocated no
-		 * matter they are initialized or not.  For excluding
-		 * uninitialized extents, we need to check m_flags.  There are
-		 * two conditions that indicate for initialized extents.
-		 * 1) If we hit extent cache, EXT4_MAP_MAPPED flag is returned;
-		 * 2) If we do a real lookup, non-flags are returned.
-		 * So we should check these two conditions.
+			 * 'err==len' means that all of blocks has
+			 * been preallocated no matter they are
+			 * initialized or not.  For excluding
+			 * unwritten extents, we need to check
+			 * m_flags.  There are two conditions that
+			 * indicate for initialized extents.  1) If we
+			 * hit extent cache, EXT4_MAP_MAPPED flag is
+			 * returned; 2) If we do a real lookup,
+			 * non-flags are returned.  So we should check
+			 * these two conditions.
 			 */
 			if (err == len && (map.m_flags & EXT4_MAP_MAPPED))
 				overwrite = 1;
 		}
+	}

 	ret = __generic_file_aio_write(iocb, iov, nr_segs);
 	mutex_unlock(&inode->i_mutex);
@@ -156,45 +189,12 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
 		if (err < 0)
 			ret = err;
 	}
+	if (o_direct)
 		blk_finish_plug(&plug);

-	if (unaligned_aio)
-		mutex_unlock(ext4_aio_mutex(inode));
-
-	return ret;
-}
-
-static ssize_t
-ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
-		unsigned long nr_segs, loff_t pos)
-{
-	struct inode *inode = file_inode(iocb->ki_filp);
-	ssize_t ret;
-
-	/*
-	 * If we have encountered a bitmap-format file, the size limit
-	 * is smaller than s_maxbytes, which is for extent-mapped files.
-	 */
-
-	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
-		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-		size_t length = iov_length(iov, nr_segs);
-
-		if ((pos > sbi->s_bitmap_maxbytes ||
-		    (pos == sbi->s_bitmap_maxbytes && length > 0)))
-			return -EFBIG;
-
-		if (pos + length > sbi->s_bitmap_maxbytes) {
-			nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
-					      sbi->s_bitmap_maxbytes - pos);
-		}
-	}
-
-	if (unlikely(iocb->ki_filp->f_flags & O_DIRECT))
-		ret = ext4_file_dio_write(iocb, iov, nr_segs, pos);
-	else
-		ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
-
+errout:
+	if (aio_mutex)
+		mutex_unlock(aio_mutex);
 	return ret;
 }

@@ -244,6 +244,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 			handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
 			if (IS_ERR(handle))
 				return PTR_ERR(handle);
+			BUFFER_TRACE(sbi->s_sbh, "get_write_access");
 			err = ext4_journal_get_write_access(handle, sbi->s_sbh);
 			if (err) {
 				ext4_journal_stop(handle);

--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -22,7 +22,7 @@
 #define EXT4_INLINE_DOTDOT_OFFSET	2
 #define EXT4_INLINE_DOTDOT_SIZE		4

-int ext4_get_inline_size(struct inode *inode)
+static int ext4_get_inline_size(struct inode *inode)
 {
 	if (EXT4_I(inode)->i_inline_off)
 		return EXT4_I(inode)->i_inline_size;
@@ -211,7 +211,7 @@ static int ext4_read_inline_data(struct inode *inode, void *buffer,
 * value since it is already handled by ext4_xattr_ibody_inline_set.
 * That saves us one memcpy.
 */
-void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
+static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
 				   void *buffer, loff_t pos, unsigned int len)
 {
 	struct ext4_xattr_entry *entry;
@@ -264,6 +264,7 @@ static int ext4_create_inline_data(handle_t *handle,
 	if (error)
 		return error;

+	BUFFER_TRACE(is.iloc.bh, "get_write_access");
 	error = ext4_journal_get_write_access(handle, is.iloc.bh);
 	if (error)
 		goto out;
@@ -347,6 +348,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
 	if (error == -ENODATA)
 		goto out;

+	BUFFER_TRACE(is.iloc.bh, "get_write_access");
 	error = ext4_journal_get_write_access(handle, is.iloc.bh);
 	if (error)
 		goto out;
@@ -373,7 +375,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
 	return error;
 }

-int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
+static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
 				    unsigned int len)
 {
 	int ret, size;
@@ -424,6 +426,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle,
 	if (error)
 		goto out;

+	BUFFER_TRACE(is.iloc.bh, "get_write_access");
 	error = ext4_journal_get_write_access(handle, is.iloc.bh);
 	if (error)
 		goto out;
@@ -1007,6 +1010,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
 	if (err)
 		return err;

+	BUFFER_TRACE(iloc->bh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, iloc->bh);
 	if (err)
 		return err;
@@ -1669,6 +1673,7 @@ int ext4_delete_inline_entry(handle_t *handle,
 				EXT4_MIN_INLINE_DATA_SIZE;
 	}

+	BUFFER_TRACE(bh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, bh);
 	if (err)
 		goto out;

--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2619,7 +2619,7 @@ int ext4_mb_init(struct super_block *sb)
 	sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
 	if (sbi->s_locality_groups == NULL) {
 		ret = -ENOMEM;
-		goto out_free_groupinfo_slab;
+		goto out;
 	}
 	for_each_possible_cpu(i) {
 		struct ext4_locality_group *lg;
@@ -2644,8 +2644,6 @@ int ext4_mb_init(struct super_block *sb)
 out_free_locality_groups:
 	free_percpu(sbi->s_locality_groups);
 	sbi->s_locality_groups = NULL;
-out_free_groupinfo_slab:
-	ext4_groupinfo_destroy_slabs();
 out:
 	kfree(sbi->s_mb_offsets);
 	sbi->s_mb_offsets = NULL;
@@ -2878,6 +2876,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 	if (!bitmap_bh)
 		goto out_err;

+	BUFFER_TRACE(bitmap_bh, "getting write access");
 	err = ext4_journal_get_write_access(handle, bitmap_bh);
 	if (err)
 		goto out_err;
@@ -2890,6 +2889,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
 	ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
 			ext4_free_group_clusters(sb, gdp));

+	BUFFER_TRACE(gdp_bh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, gdp_bh);
 	if (err)
 		goto out_err;
@@ -3147,7 +3147,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
 	}
 	BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
 			start > ac->ac_o_ex.fe_logical);
-	BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
+	BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));

 	/* now prepare goal request */


--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -505,7 +505,7 @@ int ext4_ext_migrate(struct inode *inode)
 	 * with i_data_sem held to prevent racing with block
 	 * allocation.
 	 */
-	down_read((&EXT4_I(inode)->i_data_sem));
+	down_read(&EXT4_I(inode)->i_data_sem);
 	ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
 	up_read((&EXT4_I(inode)->i_data_sem));


--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -18,7 +18,7 @@ static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
 	return cpu_to_le32(csum);
 }

-int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
+static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
 {
 	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
 				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
@@ -27,7 +27,7 @@ int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
 	return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
 }

-void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
+static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
 {
 	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
 				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))

--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -57,8 +57,8 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock,
 static void
 copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
 {
-	if (ext4_ext_is_uninitialized(src))
-		ext4_ext_mark_uninitialized(dest);
+	if (ext4_ext_is_unwritten(src))
+		ext4_ext_mark_unwritten(dest);
 	else
 		dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));
 }
@@ -391,6 +391,7 @@ mext_insert_extents(handle_t *handle, struct inode *orig_inode,

 	if (depth) {
 		/* Register to journal */
+		BUFFER_TRACE(orig_path->p_bh, "get_write_access");
 		ret = ext4_journal_get_write_access(handle, orig_path->p_bh);
 		if (ret)
 			return ret;
@@ -593,14 +594,14 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
 * @inode:		inode in question
 * @from:		block offset of inode
 * @count:		block count to be checked
- * @uninit:		extents expected to be uninitialized
+ * @unwritten:		extents expected to be unwritten
 * @err:		pointer to save error value
 *
 * Return 1 if all extents in range has expected type, and zero otherwise.
 */
 static int
 mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
-			  int uninit, int *err)
+		    int unwritten, int *err)
 {
 	struct ext4_ext_path *path = NULL;
 	struct ext4_extent *ext;
@@ -611,7 +612,7 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
 		if (*err)
 			goto out;
 		ext = path[ext_depth(inode)].p_ext;
-		if (uninit != ext4_ext_is_uninitialized(ext))
+		if (unwritten != ext4_ext_is_unwritten(ext))
 			goto out;
 		from += ext4_ext_get_actual_len(ext);
 		ext4_ext_drop_refs(path);
@@ -894,7 +895,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
 * @orig_page_offset:		page index on original file
 * @data_offset_in_page:	block index where data swapping starts
 * @block_len_in_page:		the number of blocks to be swapped
- * @uninit:			orig extent is uninitialized or not
+ * @unwritten:			orig extent is unwritten or not
 * @err:			pointer to save return value
 *
 * Save the data in original inode blocks and replace original inode extents
@@ -905,7 +906,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
 static int
 move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
 		  pgoff_t orig_page_offset, int data_offset_in_page,
-		  int block_len_in_page, int uninit, int *err)
+		  int block_len_in_page, int unwritten, int *err)
 {
 	struct inode *orig_inode = file_inode(o_filp);
 	struct page *pagep[2] = {NULL, NULL};
@@ -962,27 +963,27 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
 	if (unlikely(*err < 0))
 		goto stop_journal;
 	/*
-	 * If orig extent was uninitialized it can become initialized
+	 * If orig extent was unwritten it can become initialized
 	 * at any time after i_data_sem was dropped, in order to
 	 * serialize with delalloc we have recheck extent while we
 	 * hold page's lock, if it is still the case data copy is not
 	 * necessary, just swap data blocks between orig and donor.
 	 */
-	if (uninit) {
+	if (unwritten) {
 		ext4_double_down_write_data_sem(orig_inode, donor_inode);
 		/* If any of extents in range became initialized we have to
 		 * fallback to data copying */
-		uninit = mext_check_coverage(orig_inode, orig_blk_offset,
+		unwritten = mext_check_coverage(orig_inode, orig_blk_offset,
 						block_len_in_page, 1, err);
 		if (*err)
 			goto drop_data_sem;

-		uninit &= mext_check_coverage(donor_inode, orig_blk_offset,
+		unwritten &= mext_check_coverage(donor_inode, orig_blk_offset,
 						 block_len_in_page, 1, err);
 		if (*err)
 			goto drop_data_sem;

-		if (!uninit) {
+		if (!unwritten) {
 			ext4_double_up_write_data_sem(orig_inode, donor_inode);
 			goto data_copy;
 		}
@@ -1259,7 +1260,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
 	int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
 	int data_offset_in_page;
 	int block_len_in_page;
-	int uninit;
+	int unwritten;

 	if (orig_inode->i_sb != donor_inode->i_sb) {
 		ext4_debug("ext4 move extent: The argument files "
@@ -1391,8 +1392,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
 		    !last_extent)
 			continue;

-		/* Is original extent is uninitialized */
-		uninit = ext4_ext_is_uninitialized(ext_prev);
+		/* Is original extent is unwritten */
+		unwritten = ext4_ext_is_unwritten(ext_prev);

 		data_offset_in_page = seq_start % blocks_per_page;

@@ -1432,8 +1433,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
 						o_filp, donor_inode,
 						orig_page_offset,
 						data_offset_in_page,
-						block_len_in_page, uninit,
-						&ret);
+						block_len_in_page,
+						unwritten, &ret);

 			/* Count how many blocks we have exchanged */
 			*moved_len += block_len_in_page;

--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -67,6 +67,7 @@ static struct buffer_head *ext4_append(handle_t *handle,
 		return ERR_PTR(err);
 	inode->i_size += inode->i_sb->s_blocksize;
 	EXT4_I(inode)->i_disksize = inode->i_size;
+	BUFFER_TRACE(bh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, bh);
 	if (err) {
 		brelse(bh);
@@ -1778,6 +1779,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,

 	blocksize =  dir->i_sb->s_blocksize;
 	dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
+	BUFFER_TRACE(bh, "get_write_access");
 	retval = ext4_journal_get_write_access(handle, bh);
 	if (retval) {
 		ext4_std_error(dir->i_sb, retval);
@@ -2510,8 +2512,7 @@ static int empty_dir(struct inode *inode)
 		 ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize);
 	de = ext4_next_entry(de1, sb->s_blocksize);
 	while (offset < inode->i_size) {
-		if (!bh ||
-		    (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
+		if ((void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
 			unsigned int lblock;
 			err = 0;
 			brelse(bh);
@@ -2539,26 +2540,37 @@ static int empty_dir(struct inode *inode)
 	return 1;
 }

-/* ext4_orphan_add() links an unlinked or truncated inode into a list of
+/*
+ * ext4_orphan_add() links an unlinked or truncated inode into a list of
 * such inodes, starting at the superblock, in case we crash before the
 * file is closed/deleted, or in case the inode truncate spans multiple
 * transactions and the last transaction is not recovered after a crash.
 *
 * At filesystem recovery time, we walk this list deleting unlinked
 * inodes and truncating linked inodes in ext4_orphan_cleanup().
+ *
+ * Orphan list manipulation functions must be called under i_mutex unless
+ * we are just creating the inode or deleting it.
 */
 int ext4_orphan_add(handle_t *handle, struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_iloc iloc;
 	int err = 0, rc;
+	bool dirty = false;

-	if (!EXT4_SB(sb)->s_journal)
+	if (!sbi->s_journal)
 		return 0;

-	mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
+	WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
+		     !mutex_is_locked(&inode->i_mutex));
+	/*
+	 * Exit early if inode already is on orphan list. This is a big speedup
+	 * since we don't have to contend on the global s_orphan_lock.
+	 */
 	if (!list_empty(&EXT4_I(inode)->i_orphan))
-		goto out_unlock;
+		return 0;

 	/*
 	 * Orphan handling is only valid for files with data blocks
@@ -2569,48 +2581,51 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
 	J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 		  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);

-	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
-	err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
+	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
+	err = ext4_journal_get_write_access(handle, sbi->s_sbh);
 	if (err)
-		goto out_unlock;
+		goto out;

 	err = ext4_reserve_inode_write(handle, inode, &iloc);
 	if (err)
-		goto out_unlock;
+		goto out;
+
+	mutex_lock(&sbi->s_orphan_lock);
 	/*
 	 * Due to previous errors inode may be already a part of on-disk
 	 * orphan list. If so skip on-disk list modification.
 	 */
-	if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <=
-		(le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)))
-			goto mem_insert;
+	if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) >
+	    (le32_to_cpu(sbi->s_es->s_inodes_count))) {
+		/* Insert this inode at the head of the on-disk orphan list */
+		NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
+		sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
+		dirty = true;
+	}
+	list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
+	mutex_unlock(&sbi->s_orphan_lock);

-	/* Insert this inode at the head of the on-disk orphan list... */
-	NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
-	EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
+	if (dirty) {
 		err = ext4_handle_dirty_super(handle, sb);
 		rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
 		if (!err)
 			err = rc;
-
-	/* Only add to the head of the in-memory list if all the
-	 * previous operations succeeded.  If the orphan_add is going to
-	 * fail (possibly taking the journal offline), we can't risk
-	 * leaving the inode on the orphan list: stray orphan-list
-	 * entries can cause panics at unmount time.
-	 *
-	 * This is safe: on error we're going to ignore the orphan list
-	 * anyway on the next recovery. */
-mem_insert:
-	if (!err)
-		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
-
+		if (err) {
+			/*
+			 * We have to remove inode from in-memory list if
+			 * addition to on disk orphan list failed. Stray orphan
+			 * list entries can cause panics at unmount time.
+			 */
+			mutex_lock(&sbi->s_orphan_lock);
+			list_del(&EXT4_I(inode)->i_orphan);
+			mutex_unlock(&sbi->s_orphan_lock);
+		}
+	}
 	jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
 	jbd_debug(4, "orphan inode %lu will point to %d\n",
 			inode->i_ino, NEXT_ORPHAN(inode));
-out_unlock:
-	mutex_unlock(&EXT4_SB(sb)->s_orphan_lock);
-	ext4_std_error(inode->i_sb, err);
+out:
+	ext4_std_error(sb, err);
 	return err;
 }

@@ -2622,45 +2637,51 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
 {
 	struct list_head *prev;
 	struct ext4_inode_info *ei = EXT4_I(inode);
-	struct ext4_sb_info *sbi;
+	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
 	__u32 ino_next;
 	struct ext4_iloc iloc;
 	int err = 0;

-	if ((!EXT4_SB(inode->i_sb)->s_journal) &&
-	    !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS))
+	if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS))
 		return 0;

-	mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
+	WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
+		     !mutex_is_locked(&inode->i_mutex));
+	/* Do this quick check before taking global s_orphan_lock. */
 	if (list_empty(&ei->i_orphan))
-		goto out;
+		return 0;

-	ino_next = NEXT_ORPHAN(inode);
-	prev = ei->i_orphan.prev;
-	sbi = EXT4_SB(inode->i_sb);
+	if (handle) {
+		/* Grab inode buffer early before taking global s_orphan_lock */
+		err = ext4_reserve_inode_write(handle, inode, &iloc);
+	}

+	mutex_lock(&sbi->s_orphan_lock);
 	jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);

+	prev = ei->i_orphan.prev;
 	list_del_init(&ei->i_orphan);

 	/* If we're on an error path, we may not have a valid
 	 * transaction handle with which to update the orphan list on
 	 * disk, but we still need to remove the inode from the linked
 	 * list in memory. */
-	if (!handle)
-		goto out;
-
-	err = ext4_reserve_inode_write(handle, inode, &iloc);
-	if (err)
+	if (!handle || err) {
+		mutex_unlock(&sbi->s_orphan_lock);
 		goto out_err;
+	}

+	ino_next = NEXT_ORPHAN(inode);
 	if (prev == &sbi->s_orphan) {
 		jbd_debug(4, "superblock will point to %u\n", ino_next);
 		BUFFER_TRACE(sbi->s_sbh, "get_write_access");
 		err = ext4_journal_get_write_access(handle, sbi->s_sbh);
-		if (err)
+		if (err) {
+			mutex_unlock(&sbi->s_orphan_lock);
 			goto out_brelse;
+		}
 		sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
+		mutex_unlock(&sbi->s_orphan_lock);
 		err = ext4_handle_dirty_super(handle, inode->i_sb);
 	} else {
 		struct ext4_iloc iloc2;
@@ -2670,20 +2691,20 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
 		jbd_debug(4, "orphan inode %lu will point to %u\n",
 			  i_prev->i_ino, ino_next);
 		err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
-		if (err)
+		if (err) {
+			mutex_unlock(&sbi->s_orphan_lock);
 			goto out_brelse;
+		}
 		NEXT_ORPHAN(i_prev) = ino_next;
 		err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
+		mutex_unlock(&sbi->s_orphan_lock);
 	}
 	if (err)
 		goto out_brelse;
 	NEXT_ORPHAN(inode) = 0;
 	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-
 out_err:
 	ext4_std_error(inode->i_sb, err);
-out:
-	mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
 	return err;

 out_brelse:

--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -401,7 +401,8 @@ static int io_submit_add_bh(struct ext4_io_submit *io,
 int ext4_bio_write_page(struct ext4_io_submit *io,
 			struct page *page,
 			int len,
-			struct writeback_control *wbc)
+			struct writeback_control *wbc,
+			bool keep_towrite)
 {
 	struct inode *inode = page->mapping->host;
 	unsigned block_start, blocksize;
@@ -414,9 +415,23 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 	BUG_ON(!PageLocked(page));
 	BUG_ON(PageWriteback(page));

+	if (keep_towrite)
+		set_page_writeback_keepwrite(page);
+	else
 		set_page_writeback(page);
 	ClearPageError(page);

+	/*
+	 * Comments copied from block_write_full_page:
+	 *
+	 * The page straddles i_size.  It must be zeroed out on each and every
+	 * writepage invocation because it may be mmapped.  "A file is mapped
+	 * in multiples of the page size.  For a file that is not a multiple of
+	 * the page size, the remaining memory is zeroed when mapped, and
+	 * writes to that region are not written out to the file."
+	 */
+	if (len < PAGE_CACHE_SIZE)
+		zero_user_segment(page, len, PAGE_CACHE_SIZE);
 	/*
 	 * In the first loop we prepare and mark buffers to submit. We have to
 	 * mark all buffers in the page before submitting so that
@@ -428,19 +443,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 	do {
 		block_start = bh_offset(bh);
 		if (block_start >= len) {
-			/*
-			 * Comments copied from block_write_full_page:
-			 *
-			 * The page straddles i_size.  It must be zeroed out on
-			 * each and every writepage invocation because it may
-			 * be mmapped.  "A file is mapped in multiples of the
-			 * page size.  For a file that is not a multiple of
-			 * the  page size, the remaining memory is zeroed when
-			 * mapped, and writes to that region are not written
-			 * out to the file."
-			 */
-			zero_user_segment(page, block_start,
-					  block_start + blocksize);
 			clear_buffer_dirty(bh);
 			set_buffer_uptodate(bh);
 			continue;

--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -348,6 +348,7 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
 	bh = sb_getblk(sb, blk);
 	if (unlikely(!bh))
 		return ERR_PTR(-ENOMEM);
+	BUFFER_TRACE(bh, "get_write_access");
 	if ((err = ext4_journal_get_write_access(handle, bh))) {
 		brelse(bh);
 		bh = ERR_PTR(err);
@@ -426,6 +427,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
 		if (unlikely(!bh))
 			return -ENOMEM;

+		BUFFER_TRACE(bh, "get_write_access");
 		err = ext4_journal_get_write_access(handle, bh);
 		if (err)
 			return err;
@@ -518,6 +520,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
 				goto out;
 			}

+			BUFFER_TRACE(gdb, "get_write_access");
 			err = ext4_journal_get_write_access(handle, gdb);
 			if (err) {
 				brelse(gdb);
@@ -790,14 +793,17 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 		goto exit_dind;
 	}

+	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
 	if (unlikely(err))
 		goto exit_dind;

+	BUFFER_TRACE(gdb_bh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, gdb_bh);
 	if (unlikely(err))
 		goto exit_dind;

+	BUFFER_TRACE(dind, "get_write_access");
 	err = ext4_journal_get_write_access(handle, dind);
 	if (unlikely(err))
 		ext4_std_error(sb, err);
@@ -902,6 +908,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
 	EXT4_SB(sb)->s_group_desc = n_group_desc;
 	EXT4_SB(sb)->s_gdb_count++;
 	ext4_kvfree(o_group_desc);
+	BUFFER_TRACE(gdb_bh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, gdb_bh);
 	if (unlikely(err))
 		brelse(gdb_bh);
@@ -977,6 +984,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 	}

 	for (i = 0; i < reserved_gdb; i++) {
+		BUFFER_TRACE(primary[i], "get_write_access");
 		if ((err = ext4_journal_get_write_access(handle, primary[i])))
 			goto exit_bh;
 	}
@@ -1084,6 +1092,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,
 		ext4_debug("update metadata backup %llu(+%llu)\n",
 			   backup_block, backup_block -
 			   ext4_group_first_block_no(sb, group));
+		BUFFER_TRACE(bh, "get_write_access");
 		if ((err = ext4_journal_get_write_access(handle, bh)))
 			break;
 		lock_buffer(bh);
@@ -1163,6 +1172,7 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
 		 */
 		if (gdb_off) {
 			gdb_bh = sbi->s_group_desc[gdb_num];
+			BUFFER_TRACE(gdb_bh, "get_write_access");
 			err = ext4_journal_get_write_access(handle, gdb_bh);

 			if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
@@ -1433,6 +1443,7 @@ static int ext4_flex_group_add(struct super_block *sb,
 		goto exit;
 	}

+	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, sbi->s_sbh);
 	if (err)
 		goto exit_journal;
@@ -1645,6 +1656,7 @@ static int ext4_group_extend_no_check(struct super_block *sb,
 		return err;
 	}

+	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
 	if (err) {
 		ext4_warning(sb, "error %d on journal write access", err);
@@ -1804,6 +1816,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);

+	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, sbi->s_sbh);
 	if (err)
 		goto errout;

--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -138,7 +138,7 @@ static __le32 ext4_superblock_csum(struct super_block *sb,
 	return cpu_to_le32(csum);
 }

-int ext4_superblock_csum_verify(struct super_block *sb,
+static int ext4_superblock_csum_verify(struct super_block *sb,
 				       struct ext4_super_block *es)
 {
 	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
@@ -879,6 +879,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 		return NULL;

 	ei->vfs_inode.i_version = 1;
+	spin_lock_init(&ei->i_raw_lock);
 	INIT_LIST_HEAD(&ei->i_prealloc_list);
 	spin_lock_init(&ei->i_prealloc_lock);
 	ext4_es_init_tree(&ei->i_es_tree);
@@ -1903,7 +1904,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 	if (!(sbi->s_mount_state & EXT4_VALID_FS))
 		ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
 			 "running e2fsck is recommended");
-	else if ((sbi->s_mount_state & EXT4_ERROR_FS))
+	else if (sbi->s_mount_state & EXT4_ERROR_FS)
 		ext4_msg(sb, KERN_WARNING,
 			 "warning: mounting fs with errors, "
 			 "running e2fsck is recommended");
@@ -2404,6 +2405,16 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
 	if (ext4_bg_has_super(sb, bg))
 		has_super = 1;

+	/*
+	 * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
+	 * block 2, not 1.  If s_first_data_block == 0 (bigalloc is enabled
+	 * on modern mke2fs or blksize > 1k on older mke2fs) then we must
+	 * compensate.
+	 */
+	if (sb->s_blocksize == 1024 && nr == 0 &&
+	    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0)
+		has_super++;
+
 	return (has_super + ext4_group_first_block_no(sb, bg));
 }

@@ -3337,7 +3348,7 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb)
 	 * By default we reserve 2% or 4096 clusters, whichever is smaller.
 	 * This should cover the situations where we can not afford to run
 	 * out of space like for example punch hole, or converting
-	 * uninitialized extents in delalloc path. In most cases such
+	 * unwritten extents in delalloc path. In most cases such
 	 * allocation would require 1, or 2 blocks, higher numbers are
 	 * very rare.
 	 */
@@ -5370,6 +5381,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
 	bh = ext4_bread(handle, inode, blk, 1, &err);
 	if (!bh)
 		goto out;
+	BUFFER_TRACE(bh, "get write access");
 	err = ext4_journal_get_write_access(handle, bh);
 	if (err) {
 		brelse(bh);

--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -369,6 +369,9 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
 {
 	int error;

+	if (strlen(name) > 255)
+		return -ERANGE;
+
 	down_read(&EXT4_I(inode)->xattr_sem);
 	error = ext4_xattr_ibody_get(inode, name_index, name, buffer,
 				     buffer_size);
@@ -513,6 +516,7 @@ static void ext4_xattr_update_super_block(handle_t *handle,
 	if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR))
 		return;

+	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
 	if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
 		EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);
 		ext4_handle_dirty_super(handle, sb);
@@ -532,6 +536,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);

 	ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr);
+	BUFFER_TRACE(bh, "get_write_access");
 	error = ext4_journal_get_write_access(handle, bh);
 	if (error)
 		goto out;
@@ -774,6 +779,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 	if (s->base) {
 		ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev,
 					bs->bh->b_blocknr);
+		BUFFER_TRACE(bs->bh, "get_write_access");
 		error = ext4_journal_get_write_access(handle, bs->bh);
 		if (error)
 			goto cleanup;
@@ -859,6 +865,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 						EXT4_C2B(EXT4_SB(sb), 1));
 				if (error)
 					goto cleanup;
+				BUFFER_TRACE(new_bh, "get_write_access");
 				error = ext4_journal_get_write_access(handle,
 								      new_bh);
 				if (error)
@@ -896,7 +903,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 			 * take i_data_sem because we will test
 			 * i_delalloc_reserved_flag in ext4_mb_new_blocks
 			 */
-			down_read((&EXT4_I(inode)->i_data_sem));
+			down_read(&EXT4_I(inode)->i_data_sem);
 			block = ext4_new_meta_blocks(handle, inode, goal, 0,
 						     NULL, &error);
 			up_read((&EXT4_I(inode)->i_data_sem));

--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -319,13 +319,23 @@ CLEARPAGEFLAG(Uptodate, uptodate)
 extern void cancel_dirty_page(struct page *page, unsigned int account_size);

 int test_clear_page_writeback(struct page *page);
-int test_set_page_writeback(struct page *page);
+int __test_set_page_writeback(struct page *page, bool keep_write);
+
+#define test_set_page_writeback(page)			\
+	__test_set_page_writeback(page, false)
+#define test_set_page_writeback_keepwrite(page)	\
+	__test_set_page_writeback(page, true)

 static inline void set_page_writeback(struct page *page)
 {
 	test_set_page_writeback(page);
 }

+static inline void set_page_writeback_keepwrite(struct page *page)
+{
+	test_set_page_writeback_keepwrite(page);
+}
+
 #ifdef CONFIG_PAGEFLAGS_EXTENDED
 /*
 * System with lots of page flags available. This allows separate

--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -36,7 +36,7 @@ struct extent_status;

 #define show_map_flags(flags) __print_flags(flags, "|",			\
 	{ EXT4_GET_BLOCKS_CREATE,		"CREATE" },		\
-	{ EXT4_GET_BLOCKS_UNINIT_EXT,		"UNINIT" },		\
+	{ EXT4_GET_BLOCKS_UNWRIT_EXT,		"UNWRIT" },		\
 	{ EXT4_GET_BLOCKS_DELALLOC_RESERVE,	"DELALLOC" },		\
 	{ EXT4_GET_BLOCKS_PRE_IO,		"PRE_IO" },		\
 	{ EXT4_GET_BLOCKS_CONVERT,		"CONVERT" },		\
@@ -51,7 +51,6 @@ struct extent_status;
 	{ EXT4_MAP_MAPPED,	"M" },			\
 	{ EXT4_MAP_UNWRITTEN,	"U" },			\
 	{ EXT4_MAP_BOUNDARY,	"B" },			\
-	{ EXT4_MAP_UNINIT,	"u" },			\
 	{ EXT4_MAP_FROM_CLUSTER, "C" })

 #define show_free_flags(flags) __print_flags(flags, "|",	\
@@ -1497,7 +1496,7 @@ DEFINE_EVENT(ext4__truncate, ext4_truncate_exit,
 	TP_ARGS(inode)
 );

-/* 'ux' is the uninitialized extent. */
+/* 'ux' is the unwritten extent. */
 TRACE_EVENT(ext4_ext_convert_to_initialized_enter,
 	TP_PROTO(struct inode *inode, struct ext4_map_blocks *map,
 		 struct ext4_extent *ux),
@@ -1533,7 +1532,7 @@ TRACE_EVENT(ext4_ext_convert_to_initialized_enter,
 );

 /*
- * 'ux' is the uninitialized extent.
+ * 'ux' is the unwritten extent.
 * 'ix' is the initialized extent to which blocks are transferred.
 */
 TRACE_EVENT(ext4_ext_convert_to_initialized_fastpath,
@@ -1811,7 +1810,7 @@ DEFINE_EVENT(ext4__trim, ext4_trim_all_free,
 	TP_ARGS(sb, group, start, len)
 );

-TRACE_EVENT(ext4_ext_handle_uninitialized_extents,
+TRACE_EVENT(ext4_ext_handle_unwritten_extents,
 	TP_PROTO(struct inode *inode, struct ext4_map_blocks *map, int flags,
 		 unsigned int allocated, ext4_fsblk_t newblock),


--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2380,7 +2380,7 @@ int test_clear_page_writeback(struct page *page)
 	return ret;
 }

-int test_set_page_writeback(struct page *page)
+int __test_set_page_writeback(struct page *page, bool keep_write)
 {
 	struct address_space *mapping = page_mapping(page);
 	int ret;
@@ -2405,6 +2405,7 @@ int test_set_page_writeback(struct page *page)
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_DIRTY);
+		if (!keep_write)
 			radix_tree_tag_clear(&mapping->page_tree,
 						page_index(page),
 						PAGECACHE_TAG_TOWRITE);
@@ -2418,7 +2419,7 @@ int test_set_page_writeback(struct page *page)
 	return ret;

 }
-EXPORT_SYMBOL(test_set_page_writeback);
+EXPORT_SYMBOL(__test_set_page_writeback);

 /*
 * Return true if any of the pages in the mapping are marked with the