Commit 1c2bf374 authored by Mingming Cao's avatar Mingming Cao Committed by Linus Torvalds

[PATCH] ext3_fsblk_t: filesystem, group blocks and bug fixes

Some of the in-kernel ext3 block variable type are treated as signed 4 bytes
int type, thus limited ext3 filesystem to 8TB (4kblock size based).  While
trying to fix them, it seems quite confusing in the ext3 code where some
blocks are filesystem-wide blocks, some are group relative offsets that need
to be signed value (as -1 has special meaning).  So it seem saner to define
two types of physical blocks: one is filesystem wide blocks, another is
group-relative blocks.  The following patches clarify these two types of
blocks in the ext3 code, and fix the type bugs which limit current 32 bit ext3
filesystem limit to 8TB.

With this series of patches and the percpu counter data type changes in the mm
tree, we are able to extend exts filesystem limit to 16TB.

This work is also a pre-request for the recent >32 bit ext3 work, and makes
the kernel to able to address 48 bit ext3 block a lot easier: Simply redefine
ext3_fsblk_t from unsigned long to sector_t and redefine the format string for
ext3 filesystem block corresponding.

Two RFC with a series patches have been posted to ext2-devel list and have
been reviewed and discussed:
http://marc.theaimsgroup.com/?l=ext2-devel&m=114722190816690&w=2

http://marc.theaimsgroup.com/?l=ext2-devel&m=114784919525942&w=2

Patches are tested on both 32 bit machine and 64 bit machine, <8TB ext3 and
>8TB ext3 filesystem(with the latest to be released e2fsprogs-1.39).  Tests
includes overnight fsx, tiobench, dbench and fsstress.

This patch:

Defines ext3_fsblk_t and ext3_grpblk_t, and the printk format string for
filesystem wide blocks.

This patch classifies all block group relative blocks, and ext3_fsblk_t blocks
occurs in the same function where used to be confusing before.  Also include
kernel bug fixes for filesystem wide in-kernel block variables.  There are
some fileystem wide blocks are treated as int/unsigned int type in the kernel
currently, especially in ext3 block allocation and reservation code.  This
patch fixed those bugs by converting those variables to ext3_fsblk_t(unsigned
long) type.
Signed-off-by: default avatarMingming Cao <cmm@us.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent cedfb178
This diff is collapsed.
......@@ -262,9 +262,11 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
int ngroups = sbi->s_groups_count;
int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
int freei, avefreei;
int freeb, avefreeb;
int blocks_per_dir, ndirs;
int max_debt, max_dirs, min_blocks, min_inodes;
ext3_fsblk_t freeb, avefreeb;
ext3_fsblk_t blocks_per_dir;
int ndirs;
int max_debt, max_dirs, min_inodes;
ext3_grpblk_t min_blocks;
int group = -1, i;
struct ext3_group_desc *desc;
struct buffer_head *bh;
......@@ -307,7 +309,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
min_inodes = avefreei - inodes_per_group / 4;
min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4;
max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, BLOCK_COST);
max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST);
if (max_debt * INODE_COST > inodes_per_group)
max_debt = inodes_per_group / INODE_COST;
if (max_debt > 255)
......
......@@ -62,7 +62,7 @@ static int ext3_inode_is_fast_symlink(struct inode *inode)
* still needs to be revoked.
*/
int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
struct buffer_head *bh, int blocknr)
struct buffer_head *bh, ext3_fsblk_t blocknr)
{
int err;
......
......@@ -28,16 +28,16 @@ static int verify_group_input(struct super_block *sb,
{
struct ext3_sb_info *sbi = EXT3_SB(sb);
struct ext3_super_block *es = sbi->s_es;
unsigned start = le32_to_cpu(es->s_blocks_count);
unsigned end = start + input->blocks_count;
ext3_fsblk_t start = le32_to_cpu(es->s_blocks_count);
ext3_fsblk_t end = start + input->blocks_count;
unsigned group = input->group;
unsigned itend = input->inode_table + sbi->s_itb_per_group;
ext3_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
unsigned overhead = ext3_bg_has_super(sb, group) ?
(1 + ext3_bg_num_gdb(sb, group) +
le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
unsigned metaend = start + overhead;
ext3_fsblk_t metaend = start + overhead;
struct buffer_head *bh = NULL;
int free_blocks_count;
ext3_grpblk_t free_blocks_count;
int err = -EINVAL;
input->free_blocks_count = free_blocks_count =
......@@ -64,7 +64,8 @@ static int verify_group_input(struct super_block *sb,
ext3_warning(sb, __FUNCTION__, "Bad blocks count %u",
input->blocks_count);
else if (!(bh = sb_bread(sb, end - 1)))
ext3_warning(sb, __FUNCTION__, "Cannot read last block (%u)",
ext3_warning(sb, __FUNCTION__,
"Cannot read last block ("E3FSBLK")",
end - 1);
else if (outside(input->block_bitmap, start, end))
ext3_warning(sb, __FUNCTION__,
......@@ -77,7 +78,7 @@ static int verify_group_input(struct super_block *sb,
else if (outside(input->inode_table, start, end) ||
outside(itend - 1, start, end))
ext3_warning(sb, __FUNCTION__,
"Inode table not in group (blocks %u-%u)",
"Inode table not in group (blocks %u-"E3FSBLK")",
input->inode_table, itend - 1);
else if (input->inode_bitmap == input->block_bitmap)
ext3_warning(sb, __FUNCTION__,
......@@ -85,24 +86,27 @@ static int verify_group_input(struct super_block *sb,
input->block_bitmap);
else if (inside(input->block_bitmap, input->inode_table, itend))
ext3_warning(sb, __FUNCTION__,
"Block bitmap (%u) in inode table (%u-%u)",
"Block bitmap (%u) in inode table (%u-"E3FSBLK")",
input->block_bitmap, input->inode_table, itend-1);
else if (inside(input->inode_bitmap, input->inode_table, itend))
ext3_warning(sb, __FUNCTION__,
"Inode bitmap (%u) in inode table (%u-%u)",
"Inode bitmap (%u) in inode table (%u-"E3FSBLK")",
input->inode_bitmap, input->inode_table, itend-1);
else if (inside(input->block_bitmap, start, metaend))
ext3_warning(sb, __FUNCTION__,
"Block bitmap (%u) in GDT table (%u-%u)",
"Block bitmap (%u) in GDT table"
" ("E3FSBLK"-"E3FSBLK")",
input->block_bitmap, start, metaend - 1);
else if (inside(input->inode_bitmap, start, metaend))
ext3_warning(sb, __FUNCTION__,
"Inode bitmap (%u) in GDT table (%u-%u)",
"Inode bitmap (%u) in GDT table"
" ("E3FSBLK"-"E3FSBLK")",
input->inode_bitmap, start, metaend - 1);
else if (inside(input->inode_table, start, metaend) ||
inside(itend - 1, start, metaend))
ext3_warning(sb, __FUNCTION__,
"Inode table (%u-%u) overlaps GDT table (%u-%u)",
"Inode table (%u-"E3FSBLK") overlaps"
"GDT table ("E3FSBLK"-"E3FSBLK")",
input->inode_table, itend - 1, start, metaend - 1);
else
err = 0;
......@@ -171,7 +175,7 @@ static int setup_new_group_blocks(struct super_block *sb,
struct buffer_head *bh;
handle_t *handle;
unsigned long block;
int bit;
ext3_grpblk_t bit;
int i;
int err = 0, err2;
......@@ -340,7 +344,7 @@ static int verify_reserved_gdb(struct super_block *sb,
while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
ext3_warning(sb, __FUNCTION__,
"reserved GDT %ld missing grp %d (%ld)",
"reserved GDT %lu missing grp %d (%lu)",
blk, grp,
grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
return -EINVAL;
......@@ -906,11 +910,12 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
{
unsigned long o_blocks_count;
unsigned long o_groups_count;
unsigned long last;
int add;
ext3_grpblk_t last;
ext3_grpblk_t add;
struct buffer_head * bh;
handle_t *handle;
int err, freed_blocks;
int err;
unsigned long freed_blocks;
/* We don't need to worry about locking wrt other resizers just
* yet: we're going to revalidate es->s_blocks_count after
......@@ -1001,10 +1006,10 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
sb->s_dirt = 1;
unlock_super(sb);
ext3_debug("freeing blocks %ld through %ld\n", o_blocks_count,
ext3_debug("freeing blocks %lu through %lu\n", o_blocks_count,
o_blocks_count + add);
ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
ext3_debug("freed blocks %ld through %ld\n", o_blocks_count,
ext3_debug("freed blocks %lu through %lu\n", o_blocks_count,
o_blocks_count + add);
if ((err = ext3_journal_stop(handle)))
goto exit_put;
......
......@@ -1841,7 +1841,7 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
struct buffer_head * bh;
journal_t *journal;
int start;
int len;
ext3_fsblk_t len;
int hblock, blocksize;
unsigned long sb_block;
unsigned long offset;
......
......@@ -225,7 +225,7 @@ ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
error = -ENODATA;
if (!EXT3_I(inode)->i_file_acl)
goto cleanup;
ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl);
ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
if (!bh)
goto cleanup;
......@@ -233,7 +233,7 @@ ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
if (ext3_xattr_check_block(bh)) {
bad_block: ext3_error(inode->i_sb, __FUNCTION__,
"inode %ld: bad block %d", inode->i_ino,
"inode %ld: bad block %u", inode->i_ino,
EXT3_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
......@@ -366,7 +366,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
error = 0;
if (!EXT3_I(inode)->i_file_acl)
goto cleanup;
ea_idebug(inode, "reading block %d", EXT3_I(inode)->i_file_acl);
ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
error = -EIO;
if (!bh)
......@@ -375,7 +375,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
if (ext3_xattr_check_block(bh)) {
ext3_error(inode->i_sb, __FUNCTION__,
"inode %ld: bad block %d", inode->i_ino,
"inode %ld: bad block %u", inode->i_ino,
EXT3_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
......@@ -647,7 +647,7 @@ ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
le32_to_cpu(BHDR(bs->bh)->h_refcount));
if (ext3_xattr_check_block(bs->bh)) {
ext3_error(sb, __FUNCTION__,
"inode %ld: bad block %d", inode->i_ino,
"inode %ld: bad block %u", inode->i_ino,
EXT3_I(inode)->i_file_acl);
error = -EIO;
goto cleanup;
......@@ -792,11 +792,12 @@ ext3_xattr_block_set(handle_t *handle, struct inode *inode,
get_bh(new_bh);
} else {
/* We need to allocate a new block */
int goal = le32_to_cpu(
ext3_fsblk_t goal = le32_to_cpu(
EXT3_SB(sb)->s_es->s_first_data_block) +
EXT3_I(inode)->i_block_group *
(ext3_fsblk_t)EXT3_I(inode)->i_block_group *
EXT3_BLOCKS_PER_GROUP(sb);
int block = ext3_new_block(handle, inode, goal, &error);
ext3_fsblk_t block = ext3_new_block(handle, inode,
goal, &error);
if (error)
goto cleanup;
ea_idebug(inode, "creating block %d", block);
......@@ -847,7 +848,7 @@ ext3_xattr_block_set(handle_t *handle, struct inode *inode,
bad_block:
ext3_error(inode->i_sb, __FUNCTION__,
"inode %ld: bad block %d", inode->i_ino,
"inode %ld: bad block %u", inode->i_ino,
EXT3_I(inode)->i_file_acl);
goto cleanup;
......@@ -1076,14 +1077,14 @@ ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
if (!bh) {
ext3_error(inode->i_sb, __FUNCTION__,
"inode %ld: block %d read error", inode->i_ino,
"inode %ld: block %u read error", inode->i_ino,
EXT3_I(inode)->i_file_acl);
goto cleanup;
}
if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
BHDR(bh)->h_blocks != cpu_to_le32(1)) {
ext3_error(inode->i_sb, __FUNCTION__,
"inode %ld: bad block %d", inode->i_ino,
"inode %ld: bad block %u", inode->i_ino,
EXT3_I(inode)->i_file_acl);
goto cleanup;
}
......@@ -1210,11 +1211,11 @@ ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header,
bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) {
ext3_error(inode->i_sb, __FUNCTION__,
"inode %ld: block %ld read error",
"inode %ld: block %lu read error",
inode->i_ino, (unsigned long) ce->e_block);
} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
EXT3_XATTR_REFCOUNT_MAX) {
ea_idebug(inode, "block %ld refcount %d>=%d",
ea_idebug(inode, "block %lu refcount %d>=%d",
(unsigned long) ce->e_block,
le32_to_cpu(BHDR(bh)->h_refcount),
EXT3_XATTR_REFCOUNT_MAX);
......
......@@ -730,13 +730,15 @@ struct dir_private_info {
/* balloc.c */
extern int ext3_bg_has_super(struct super_block *sb, int group);
extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
extern int ext3_new_blocks (handle_t *, struct inode *, unsigned long,
unsigned long *, int *);
extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
unsigned long);
extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
unsigned long, unsigned long, int *);
extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode,
ext3_fsblk_t goal, int *errp);
extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode,
ext3_fsblk_t goal, unsigned long *count, int *errp);
extern void ext3_free_blocks (handle_t *handle, struct inode *inode,
ext3_fsblk_t block, unsigned long count);
extern void ext3_free_blocks_sb (handle_t *handle, struct super_block *sb,
ext3_fsblk_t block, unsigned long count,
unsigned long *pdquot_freed_blocks);
extern unsigned long ext3_count_free_blocks (struct super_block *);
extern void ext3_check_blocks_bitmap (struct super_block *);
extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
......@@ -773,7 +775,8 @@ extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
/* inode.c */
int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
struct buffer_head *bh, ext3_fsblk_t blocknr);
struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
......
......@@ -21,6 +21,14 @@
#include <linux/seqlock.h>
#include <linux/mutex.h>
/* data type for block offset of block group */
typedef int ext3_grpblk_t;
/* data type for filesystem-wide blocks number */
typedef unsigned long ext3_fsblk_t;
#define E3FSBLK "%lu"
struct ext3_reserve_window {
__u32 _rsv_start; /* First byte reserved */
__u32 _rsv_end; /* Last byte reserved or 0 */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment