Commit e417c49a authored by Stephen C. Tweedie's avatar Stephen C. Tweedie Committed by Linus Torvalds

[PATCH] ext3: online resizing

The patch below adds online resize capability to ext3 based on Andreas
patch for 2.4 and fixed up by Stephen.

The patch also removes s_debts:

s_debts is currently not used by ext3 (it is created, destroyed and checked
but never set).  Remove it for now.

Resurrecting this will require adding it back in changed form.  In existing
form it's already unsafe wrt.  byte-tearing as it performs unlocked byte
increment/decrement on words which may be being accessed simultaneously on
other CPUs.  It is also the only in-memory dynamic table which needs to be
extended by online-resize, so locking it will require care.
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 7db96583
......@@ -5,7 +5,7 @@
obj-$(CONFIG_EXT3_FS) += ext3.o
ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
ioctl.o namei.o super.o symlink.o hash.o
ioctl.o namei.o super.o symlink.o hash.o resize.o
ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
......
......@@ -54,6 +54,7 @@ struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
return NULL;
}
smp_rmb();
group_desc = block_group / EXT3_DESC_PER_BLOCK(sb);
desc = block_group % EXT3_DESC_PER_BLOCK(sb);
......@@ -274,8 +275,9 @@ void ext3_discard_reservation(struct inode *inode)
}
/* Free given blocks, update quota and i_blocks field */
void ext3_free_blocks(handle_t *handle, struct inode *inode,
unsigned long block, unsigned long count)
void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
unsigned long block, unsigned long count,
int *pdquot_freed_blocks)
{
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *gd_bh;
......@@ -283,18 +285,12 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
unsigned long bit;
unsigned long i;
unsigned long overflow;
struct super_block * sb;
struct ext3_group_desc * gdp;
struct ext3_super_block * es;
struct ext3_sb_info *sbi;
int err = 0, ret;
int dquot_freed_blocks = 0;
sb = inode->i_sb;
if (!sb) {
printk ("ext3_free_blocks: nonexistent device");
return;
}
*pdquot_freed_blocks = 0;
sbi = EXT3_SB(sb);
es = EXT3_SB(sb)->s_es;
if (block < le32_to_cpu(es->s_first_data_block) ||
......@@ -421,7 +417,7 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
jbd_lock_bh_state(bitmap_bh);
BUFFER_TRACE(bitmap_bh, "bit already cleared");
} else {
dquot_freed_blocks++;
(*pdquot_freed_blocks)++;
}
}
jbd_unlock_bh_state(bitmap_bh);
......@@ -429,7 +425,7 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
spin_lock(sb_bgl_lock(sbi, block_group));
gdp->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +
dquot_freed_blocks);
*pdquot_freed_blocks);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_mod(&sbi->s_freeblocks_counter, count);
......@@ -451,6 +447,22 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
error_return:
brelse(bitmap_bh);
ext3_std_error(sb, err);
return;
}
/* Free given blocks, update quota and i_blocks field */
void ext3_free_blocks(handle_t *handle, struct inode *inode,
unsigned long block, unsigned long count)
{
struct super_block * sb;
int dquot_freed_blocks;
sb = inode->i_sb;
if (!sb) {
printk ("ext3_free_blocks: nonexistent device");
return;
}
ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
if (dquot_freed_blocks)
DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
return;
......@@ -1141,6 +1153,8 @@ int ext3_new_block(handle_t *handle, struct inode *inode,
#ifdef EXT3FS_DEBUG
static int goal_hits, goal_attempts;
#endif
unsigned long ngroups;
*errp = -ENOSPC;
sb = inode->i_sb;
if (!sb) {
......@@ -1205,13 +1219,16 @@ int ext3_new_block(handle_t *handle, struct inode *inode,
goto allocated;
}
ngroups = EXT3_SB(sb)->s_groups_count;
smp_rmb();
/*
* Now search the rest of the groups. We assume that
* i and gdp correctly point to the last group visited.
*/
for (bgi = 0; bgi < EXT3_SB(sb)->s_groups_count; bgi++) {
for (bgi = 0; bgi < ngroups; bgi++) {
group_no++;
if (group_no >= EXT3_SB(sb)->s_groups_count)
if (group_no >= ngroups)
group_no = 0;
gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
if (!gdp) {
......@@ -1362,6 +1379,7 @@ unsigned long ext3_count_free_blocks(struct super_block *sb)
unsigned long desc_count;
struct ext3_group_desc *gdp;
int i;
unsigned long ngroups;
#ifdef EXT3FS_DEBUG
struct ext3_super_block *es;
unsigned long bitmap_count, x;
......@@ -1394,7 +1412,9 @@ unsigned long ext3_count_free_blocks(struct super_block *sb)
return bitmap_count;
#else
desc_count = 0;
for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
ngroups = EXT3_SB(sb)->s_groups_count;
smp_rmb();
for (i = 0; i < ngroups; i++) {
gdp = ext3_get_group_desc(sb, i, NULL);
if (!gdp)
continue;
......
......@@ -320,8 +320,6 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
desc = ext3_get_group_desc (sb, group, &bh);
if (!desc || !desc->bg_free_inodes_count)
continue;
if (sbi->s_debts[group] >= max_debt)
continue;
if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
continue;
if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
......
......@@ -2231,8 +2231,10 @@ static unsigned long ext3_get_inode_block(struct super_block *sb,
struct buffer_head *bh;
struct ext3_group_desc * gdp;
if ((ino != EXT3_ROOT_INO &&
ino != EXT3_JOURNAL_INO &&
ino != EXT3_RESIZE_INO &&
ino < EXT3_FIRST_INO(sb)) ||
ino > le32_to_cpu(
EXT3_SB(sb)->s_es->s_inodes_count)) {
......@@ -2246,6 +2248,7 @@ static unsigned long ext3_get_inode_block(struct super_block *sb,
"group >= groups count");
return 0;
}
smp_rmb();
group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
bh = EXT3_SB(sb)->s_group_desc[group_desc];
......
......@@ -175,6 +175,51 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
rsv_window_size = EXT3_MAX_RESERVE_BLOCKS;
atomic_set(&ei->i_rsv_window.rsv_goal_size, rsv_window_size);
return 0;
case EXT3_IOC_GROUP_EXTEND: {
unsigned long n_blocks_count;
struct super_block *sb = inode->i_sb;
int err;
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
if (IS_RDONLY(inode))
return -EROFS;
if (get_user(n_blocks_count, (__u32 *)arg))
return -EFAULT;
err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count);
journal_lock_updates(EXT3_SB(sb)->s_journal);
journal_flush(EXT3_SB(sb)->s_journal);
journal_unlock_updates(EXT3_SB(sb)->s_journal);
return err;
}
case EXT3_IOC_GROUP_ADD: {
struct ext3_new_group_data input;
struct super_block *sb = inode->i_sb;
int err;
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
if (IS_RDONLY(inode))
return -EROFS;
if (copy_from_user(&input, (struct ext3_new_group_input *)arg,
sizeof(input)))
return -EFAULT;
err = ext3_group_add(sb, &input);
journal_lock_updates(EXT3_SB(sb)->s_journal);
journal_flush(EXT3_SB(sb)->s_journal);
journal_unlock_updates(EXT3_SB(sb)->s_journal);
return err;
}
default:
return -ENOTTY;
}
......
This diff is collapsed.
......@@ -59,19 +59,19 @@ static int ext3_sync_fs(struct super_block *sb, int wait);
* that sync() will call the filesystem's write_super callback if
* appropriate.
*/
handle_t *ext3_journal_start(struct inode *inode, int nblocks)
handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
{
journal_t *journal;
if (inode->i_sb->s_flags & MS_RDONLY)
if (sb->s_flags & MS_RDONLY)
return ERR_PTR(-EROFS);
/* Special case here: if the journal has aborted behind our
* backs (eg. EIO in the commit thread), then we still need to
* take the FS itself readonly cleanly. */
journal = EXT3_JOURNAL(inode);
journal = EXT3_SB(sb)->s_journal;
if (is_journal_aborted(journal)) {
ext3_abort(inode->i_sb, __FUNCTION__,
ext3_abort(sb, __FUNCTION__,
"Detected aborted journal");
return ERR_PTR(-EROFS);
}
......@@ -400,7 +400,6 @@ void ext3_put_super (struct super_block * sb)
for (i = 0; i < sbi->s_gdb_count; i++)
brelse(sbi->s_group_desc[i]);
kfree(sbi->s_group_desc);
kfree(sbi->s_debts);
brelse(sbi->s_sbh);
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++) {
......@@ -582,7 +581,7 @@ enum {
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
Opt_ignore, Opt_barrier, Opt_err,
Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
};
static match_table_t tokens = {
......@@ -630,7 +629,8 @@ static match_table_t tokens = {
{Opt_ignore, "quota"},
{Opt_ignore, "usrquota"},
{Opt_barrier, "barrier=%u"},
{Opt_err, NULL}
{Opt_err, NULL},
{Opt_resize, "resize"},
};
static unsigned long get_sb_block(void **data)
......@@ -654,7 +654,7 @@ static unsigned long get_sb_block(void **data)
}
static int parse_options (char * options, struct super_block *sb,
unsigned long * inum, int is_remount)
unsigned long * inum, unsigned long *n_blocks_count, int is_remount)
{
struct ext3_sb_info *sbi = EXT3_SB(sb);
char * p;
......@@ -911,6 +911,15 @@ static int parse_options (char * options, struct super_block *sb,
break;
case Opt_ignore:
break;
case Opt_resize:
if (!n_blocks_count) {
printk("EXT3-fs: resize option only available "
"for remount\n");
return 0;
}
match_int(&args[0], &option);
*n_blocks_count = option;
break;
default:
printk (KERN_ERR
"EXT3-fs: Unrecognized mount option \"%s\" "
......@@ -1004,6 +1013,7 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
return res;
}
/* Called at mount-time, super-block is locked */
static int ext3_check_descriptors (struct super_block * sb)
{
struct ext3_sb_info *sbi = EXT3_SB(sb);
......@@ -1302,7 +1312,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
set_opt(sbi->s_mount_opt, RESERVATION);
if (!parse_options ((char *) data, sb, &journal_inum, 0))
if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0))
goto failed_mount;
sb->s_flags |= MS_ONE_SECOND;
......@@ -1447,13 +1457,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
printk (KERN_ERR "EXT3-fs: not enough memory\n");
goto failed_mount;
}
sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(u8),
GFP_KERNEL);
if (!sbi->s_debts) {
printk("EXT3-fs: not enough memory to allocate s_bgi\n");
goto failed_mount2;
}
memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(u8));
percpu_counter_init(&sbi->s_freeblocks_counter);
percpu_counter_init(&sbi->s_freeinodes_counter);
......@@ -1604,7 +1607,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
failed_mount3:
journal_destroy(sbi->s_journal);
failed_mount2:
kfree(sbi->s_debts);
for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]);
kfree(sbi->s_group_desc);
......@@ -2049,11 +2051,12 @@ int ext3_remount (struct super_block * sb, int * flags, char * data)
struct ext3_super_block * es;
struct ext3_sb_info *sbi = EXT3_SB(sb);
unsigned long tmp;
unsigned long n_blocks_count = 0;
/*
* Allow the "check" option to be passed as a remount option.
*/
if (!parse_options(data, sb, &tmp, 1))
if (!parse_options(data, sb, &tmp, &n_blocks_count, 1))
return -EINVAL;
if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
......@@ -2066,7 +2069,8 @@ int ext3_remount (struct super_block * sb, int * flags, char * data)
ext3_init_journal_params(sb, sbi->s_journal);
if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
return -EROFS;
......@@ -2105,6 +2109,8 @@ int ext3_remount (struct super_block * sb, int * flags, char * data)
*/
ext3_clear_journal_err(sb, es);
sbi->s_mount_state = le16_to_cpu(es->s_state);
if ((ret = ext3_group_extend(sb, es, n_blocks_count)))
return ret;
if (!ext3_setup_super (sb, es, 0))
sb->s_flags &= ~MS_RDONLY;
}
......@@ -2121,6 +2127,10 @@ int ext3_statfs (struct super_block * sb, struct kstatfs * buf)
if (test_opt (sb, MINIX_DF))
overhead = 0;
else {
unsigned long ngroups;
ngroups = EXT3_SB(sb)->s_groups_count;
smp_rmb();
/*
* Compute the overhead (FS structures)
*/
......@@ -2136,7 +2146,7 @@ int ext3_statfs (struct super_block * sb, struct kstatfs * buf)
* block group descriptors. If the sparse superblocks
* feature is turned on, then not all groups have this.
*/
for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
for (i = 0; i < ngroups; i++)
overhead += ext3_bg_has_super(sb, i) +
ext3_bg_num_gdb(sb, i);
......@@ -2144,8 +2154,7 @@ int ext3_statfs (struct super_block * sb, struct kstatfs * buf)
* Every block group has an inode bitmap, a block
* bitmap, and an inode table.
*/
overhead += (EXT3_SB(sb)->s_groups_count *
(2 + EXT3_SB(sb)->s_itb_per_group));
overhead += (ngroups * (2 + EXT3_SB(sb)->s_itb_per_group));
}
buf->f_type = EXT3_SUPER_MAGIC;
......
......@@ -196,6 +196,31 @@ struct ext3_group_desc
#define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */
#define EXT3_STATE_NEW 0x00000002 /* inode is newly created */
/* Used to pass group descriptor data when online resize is done */
struct ext3_new_group_input {
__u32 group; /* Group number for this data */
__u32 block_bitmap; /* Absolute block number of block bitmap */
__u32 inode_bitmap; /* Absolute block number of inode bitmap */
__u32 inode_table; /* Absolute block number of inode table start */
__u32 blocks_count; /* Total number of blocks in this group */
__u16 reserved_blocks; /* Number of reserved blocks in this group */
__u16 unused;
};
/* The struct ext3_new_group_input in kernel space, with free_blocks_count */
struct ext3_new_group_data {
__u32 group;
__u32 block_bitmap;
__u32 inode_bitmap;
__u32 inode_table;
__u32 blocks_count;
__u16 reserved_blocks;
__u16 unused;
__u32 free_blocks_count;
};
/*
* ioctl commands
*/
......@@ -203,6 +228,8 @@ struct ext3_group_desc
#define EXT3_IOC_SETFLAGS _IOW('f', 2, long)
#define EXT3_IOC_GETVERSION _IOR('f', 3, long)
#define EXT3_IOC_SETVERSION _IOW('f', 4, long)
#define EXT3_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
#define EXT3_IOC_GROUP_ADD _IOW('f', 8,struct ext3_new_group_input)
#define EXT3_IOC_GETVERSION_OLD _IOR('v', 1, long)
#define EXT3_IOC_SETVERSION_OLD _IOW('v', 2, long)
#ifdef CONFIG_JBD_DEBUG
......@@ -421,7 +448,7 @@ struct ext3_super_block {
*/
__u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
__u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
__u16 s_padding1;
__u16 s_reserved_gdt_blocks; /* Per group desc for online growth */
/*
* Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
*/
......@@ -687,6 +714,8 @@ extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
unsigned long);
extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
unsigned long, unsigned long, int *);
extern unsigned long ext3_count_free_blocks (struct super_block *);
extern void ext3_check_blocks_bitmap (struct super_block *);
extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
......@@ -749,6 +778,13 @@ extern int ext3_orphan_del(handle_t *, struct inode *);
extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
__u32 start_minor_hash, __u32 *next_hash);
/* resize.c */
extern int ext3_group_add(struct super_block *sb,
struct ext3_new_group_data *input);
extern int ext3_group_extend(struct super_block *sb,
struct ext3_super_block *es,
unsigned long n_blocks_count);
/* super.c */
extern void ext3_error (struct super_block *, const char *, const char *, ...)
__attribute__ ((format (printf, 3, 4)));
......
......@@ -54,7 +54,6 @@ struct ext3_sb_info {
u32 s_next_generation;
u32 s_hash_seed[4];
int s_def_hash_version;
u8 *s_debts;
struct percpu_counter s_freeblocks_counter;
struct percpu_counter s_freeinodes_counter;
struct percpu_counter s_dirs_counter;
......
......@@ -188,9 +188,14 @@ __ext3_journal_dirty_metadata(const char *where,
#define ext3_journal_dirty_metadata(handle, bh) \
__ext3_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
handle_t *ext3_journal_start(struct inode *inode, int nblocks);
handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks);
int __ext3_journal_stop(const char *where, handle_t *handle);
static inline handle_t *ext3_journal_start(struct inode *inode, int nblocks)
{
return ext3_journal_start_sb(inode->i_sb, nblocks);
}
#define ext3_journal_stop(handle) \
__ext3_journal_stop(__FUNCTION__, (handle))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment