Commit b5f10eed authored by Eric Sandeen's avatar Eric Sandeen Committed by Theodore Ts'o

ext4: lock block groups when initializing

I noticed when filling a 1T filesystem with 4 threads using the
fs_mark benchmark:

fs_mark -d /mnt/test -D 256 -n 100000 -t 4 -s 20480 -F -S 0

that I occasionally got checksum mismatch errors:

EXT4-fs error (device sdb): ext4_init_inode_bitmap: Checksum bad for group 6935

etc.  I'd reliably get 4-5 of them during the run.

It appears that the problem is likely a race to init the bg's
when the uninit_bg feature is enabled.

With the patch below, which adds sb_bgl_locking around initialization,
I was able to complete several runs with no errors or warnings.
Signed-off-by: default avatarEric Sandeen <sandeen@redhat.com>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent e29d1cde
...@@ -321,12 +321,15 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) ...@@ -321,12 +321,15 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
if (bh_uptodate_or_lock(bh)) if (bh_uptodate_or_lock(bh))
return bh; return bh;
spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
ext4_init_block_bitmap(sb, bh, block_group, desc); ext4_init_block_bitmap(sb, bh, block_group, desc);
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
unlock_buffer(bh); unlock_buffer(bh);
spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
return bh; return bh;
} }
spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
if (bh_submit_read(bh) < 0) { if (bh_submit_read(bh) < 0) {
put_bh(bh); put_bh(bh);
ext4_error(sb, __func__, ext4_error(sb, __func__,
......
...@@ -118,12 +118,15 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) ...@@ -118,12 +118,15 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
if (bh_uptodate_or_lock(bh)) if (bh_uptodate_or_lock(bh))
return bh; return bh;
spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
ext4_init_inode_bitmap(sb, bh, block_group, desc); ext4_init_inode_bitmap(sb, bh, block_group, desc);
set_buffer_uptodate(bh); set_buffer_uptodate(bh);
unlock_buffer(bh); unlock_buffer(bh);
spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
return bh; return bh;
} }
spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
if (bh_submit_read(bh) < 0) { if (bh_submit_read(bh) < 0) {
put_bh(bh); put_bh(bh);
ext4_error(sb, __func__, ext4_error(sb, __func__,
...@@ -735,7 +738,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) ...@@ -735,7 +738,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
/* When marking the block group with /* When marking the block group with
* ~EXT4_BG_INODE_UNINIT we don't want to depend * ~EXT4_BG_INODE_UNINIT we don't want to depend
* on the value of bg_itable_unsed even though * on the value of bg_itable_unused even though
* mke2fs could have initialized the same for us. * mke2fs could have initialized the same for us.
* Instead we calculated the value below * Instead we calculated the value below
*/ */
......
...@@ -787,13 +787,16 @@ static int ext4_mb_init_cache(struct page *page, char *incore) ...@@ -787,13 +787,16 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
if (bh_uptodate_or_lock(bh[i])) if (bh_uptodate_or_lock(bh[i]))
continue; continue;
spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
ext4_init_block_bitmap(sb, bh[i], ext4_init_block_bitmap(sb, bh[i],
first_group + i, desc); first_group + i, desc);
set_buffer_uptodate(bh[i]); set_buffer_uptodate(bh[i]);
unlock_buffer(bh[i]); unlock_buffer(bh[i]);
spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
continue; continue;
} }
spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
get_bh(bh[i]); get_bh(bh[i]);
bh[i]->b_end_io = end_buffer_read_sync; bh[i]->b_end_io = end_buffer_read_sync;
submit_bh(READ, bh[i]); submit_bh(READ, bh[i]);
......
...@@ -1621,6 +1621,7 @@ static int ext4_check_descriptors(struct super_block *sb) ...@@ -1621,6 +1621,7 @@ static int ext4_check_descriptors(struct super_block *sb)
"(block %llu)!", i, inode_table); "(block %llu)!", i, inode_table);
return 0; return 0;
} }
spin_lock(sb_bgl_lock(sbi, i));
if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
"Checksum for group %lu failed (%u!=%u)\n", "Checksum for group %lu failed (%u!=%u)\n",
...@@ -1629,6 +1630,7 @@ static int ext4_check_descriptors(struct super_block *sb) ...@@ -1629,6 +1630,7 @@ static int ext4_check_descriptors(struct super_block *sb)
if (!(sb->s_flags & MS_RDONLY)) if (!(sb->s_flags & MS_RDONLY))
return 0; return 0;
} }
spin_unlock(sb_bgl_lock(sbi, i));
if (!flexbg_flag) if (!flexbg_flag)
first_block += EXT4_BLOCKS_PER_GROUP(sb); first_block += EXT4_BLOCKS_PER_GROUP(sb);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment