Commit 17aff938 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] ext3: scalable counters and locks

From: Alex Tomas <bzzz@tmi.comex.ru>

This is a port from ext2 of the fuzzy counters (for Orlov allocator
heuristics) and the hashed spinlocking (for the inode and bloock allocators).
parent c12b9866
...@@ -110,6 +110,7 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode, ...@@ -110,6 +110,7 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode,
struct super_block * sb; struct super_block * sb;
struct ext3_group_desc * gdp; struct ext3_group_desc * gdp;
struct ext3_super_block * es; struct ext3_super_block * es;
struct ext3_sb_info *sbi;
int err = 0, ret; int err = 0, ret;
int dquot_freed_blocks = 0; int dquot_freed_blocks = 0;
...@@ -118,6 +119,7 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode, ...@@ -118,6 +119,7 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode,
printk ("ext3_free_blocks: nonexistent device"); printk ("ext3_free_blocks: nonexistent device");
return; return;
} }
sbi = EXT3_SB(sb);
es = EXT3_SB(sb)->s_es; es = EXT3_SB(sb)->s_es;
if (block < le32_to_cpu(es->s_first_data_block) || if (block < le32_to_cpu(es->s_first_data_block) ||
block + count < block || block + count < block ||
...@@ -242,11 +244,12 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode, ...@@ -242,11 +244,12 @@ void ext3_free_blocks (handle_t *handle, struct inode * inode,
} }
} }
spin_lock(bg_lock(sb, block_group)); spin_lock(sb_bgl_lock(sbi, block_group));
gdp->bg_free_blocks_count = gdp->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +
dquot_freed_blocks); dquot_freed_blocks);
spin_unlock(bg_lock(sb, block_group)); spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_mod(&sbi->s_freeblocks_counter, count);
/* We dirtied the bitmap block */ /* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
...@@ -429,7 +432,7 @@ ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group, ...@@ -429,7 +432,7 @@ ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
have_access = 1; have_access = 1;
} }
if (!claim_block(bg_lock(sb, group), goal, bitmap_bh)) { if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group), goal, bitmap_bh)) {
/* /*
* The block was allocated by another thread, or it was * The block was allocated by another thread, or it was
* allocated and then freed by another thread * allocated and then freed by another thread
...@@ -477,11 +480,11 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal, ...@@ -477,11 +480,11 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
int target_block; /* tmp */ int target_block; /* tmp */
int fatal = 0, err; int fatal = 0, err;
int performed_allocation = 0; int performed_allocation = 0;
int free; int free_blocks, root_blocks;
int use_reserve = 0;
struct super_block *sb; struct super_block *sb;
struct ext3_group_desc *gdp; struct ext3_group_desc *gdp;
struct ext3_super_block *es; struct ext3_super_block *es;
struct ext3_sb_info *sbi;
#ifdef EXT3FS_DEBUG #ifdef EXT3FS_DEBUG
static int goal_hits = 0, goal_attempts = 0; static int goal_hits = 0, goal_attempts = 0;
#endif #endif
...@@ -500,9 +503,19 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal, ...@@ -500,9 +503,19 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
return 0; return 0;
} }
sbi = EXT3_SB(sb);
es = EXT3_SB(sb)->s_es; es = EXT3_SB(sb)->s_es;
ext3_debug("goal=%lu.\n", goal); ext3_debug("goal=%lu.\n", goal);
free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
root_blocks = le32_to_cpu(es->s_r_blocks_count);
if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
sbi->s_resuid != current->fsuid &&
(sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
*errp = -ENOSPC;
return 0;
}
/* /*
* First, test whether the goal block is free. * First, test whether the goal block is free.
*/ */
...@@ -515,9 +528,8 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal, ...@@ -515,9 +528,8 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
if (!gdp) if (!gdp)
goto io_error; goto io_error;
free = le16_to_cpu(gdp->bg_free_blocks_count); free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
free -= EXT3_SB(sb)->s_bgi[group_no].bg_reserved; if (free_blocks > 0) {
if (free > 0) {
ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) % ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
EXT3_BLOCKS_PER_GROUP(sb)); EXT3_BLOCKS_PER_GROUP(sb));
bitmap_bh = read_block_bitmap(sb, group_no); bitmap_bh = read_block_bitmap(sb, group_no);
...@@ -535,7 +547,6 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal, ...@@ -535,7 +547,6 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
* Now search the rest of the groups. We assume that * Now search the rest of the groups. We assume that
* i and gdp correctly point to the last group visited. * i and gdp correctly point to the last group visited.
*/ */
repeat:
for (bgi = 0; bgi < EXT3_SB(sb)->s_groups_count; bgi++) { for (bgi = 0; bgi < EXT3_SB(sb)->s_groups_count; bgi++) {
group_no++; group_no++;
if (group_no >= EXT3_SB(sb)->s_groups_count) if (group_no >= EXT3_SB(sb)->s_groups_count)
...@@ -545,10 +556,8 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal, ...@@ -545,10 +556,8 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
*errp = -EIO; *errp = -EIO;
goto out; goto out;
} }
free = le16_to_cpu(gdp->bg_free_blocks_count); free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
if (!use_reserve) if (free_blocks <= 0)
free -= EXT3_SB(sb)->s_bgi[group_no].bg_reserved;
if (free <= 0)
continue; continue;
brelse(bitmap_bh); brelse(bitmap_bh);
...@@ -563,15 +572,6 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal, ...@@ -563,15 +572,6 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
goto allocated; goto allocated;
} }
if (!use_reserve &&
(EXT3_SB(sb)->s_resuid == current->fsuid ||
(EXT3_SB(sb)->s_resgid != 0 && in_group_p(EXT3_SB(sb)->s_resgid)) ||
capable(CAP_SYS_RESOURCE))) {
use_reserve = 1;
group_no = 0;
goto repeat;
}
/* No space left on the device */ /* No space left on the device */
*errp = -ENOSPC; *errp = -ENOSPC;
goto out; goto out;
...@@ -612,13 +612,13 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal, ...@@ -612,13 +612,13 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
} }
} }
#endif #endif
spin_lock(bg_lock(sb, group_no)); spin_lock(sb_bgl_lock(sbi, group_no));
if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data)
J_ASSERT_BH(bitmap_bh, J_ASSERT_BH(bitmap_bh,
!ext3_test_bit(ret_block, !ext3_test_bit(ret_block,
bh2jh(bitmap_bh)->b_committed_data)); bh2jh(bitmap_bh)->b_committed_data));
ext3_debug("found bit %d\n", ret_block); ext3_debug("found bit %d\n", ret_block);
spin_unlock(bg_lock(sb, group_no)); spin_unlock(sb_bgl_lock(sbi, group_no));
/* ret_block was blockgroup-relative. Now it becomes fs-relative */ /* ret_block was blockgroup-relative. Now it becomes fs-relative */
ret_block = target_block; ret_block = target_block;
...@@ -639,10 +639,11 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal, ...@@ -639,10 +639,11 @@ ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
ext3_debug("allocating block %d. Goal hits %d of %d.\n", ext3_debug("allocating block %d. Goal hits %d of %d.\n",
ret_block, goal_hits, goal_attempts); ret_block, goal_hits, goal_attempts);
spin_lock(bg_lock(sb, group_no)); spin_lock(sb_bgl_lock(sbi, group_no));
gdp->bg_free_blocks_count = gdp->bg_free_blocks_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1); cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
spin_unlock(bg_lock(sb, group_no)); spin_unlock(sb_bgl_lock(sbi, group_no));
percpu_counter_mod(&sbi->s_freeblocks_counter, -1);
BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
err = ext3_journal_dirty_metadata(handle, gdp_bh); err = ext3_journal_dirty_metadata(handle, gdp_bh);
......
...@@ -97,6 +97,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode) ...@@ -97,6 +97,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
unsigned long bit; unsigned long bit;
struct ext3_group_desc * gdp; struct ext3_group_desc * gdp;
struct ext3_super_block * es; struct ext3_super_block * es;
struct ext3_sb_info *sbi = EXT3_SB(sb);
int fatal = 0, err; int fatal = 0, err;
if (atomic_read(&inode->i_count) > 1) { if (atomic_read(&inode->i_count) > 1) {
...@@ -161,13 +162,17 @@ void ext3_free_inode (handle_t *handle, struct inode * inode) ...@@ -161,13 +162,17 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
if (fatal) goto error_return; if (fatal) goto error_return;
if (gdp) { if (gdp) {
spin_lock(&EXT3_SB(sb)->s_bgi[block_group].bg_ialloc_lock); spin_lock(sb_bgl_lock(sbi, block_group));
gdp->bg_free_inodes_count = cpu_to_le16( gdp->bg_free_inodes_count = cpu_to_le16(
le16_to_cpu(gdp->bg_free_inodes_count) + 1); le16_to_cpu(gdp->bg_free_inodes_count) + 1);
if (is_directory) if (is_directory)
gdp->bg_used_dirs_count = cpu_to_le16( gdp->bg_used_dirs_count = cpu_to_le16(
le16_to_cpu(gdp->bg_used_dirs_count) - 1); le16_to_cpu(gdp->bg_used_dirs_count) - 1);
spin_unlock(&EXT3_SB(sb)->s_bgi[block_group].bg_ialloc_lock); spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_inc(&sbi->s_freeinodes_counter);
if (is_directory)
percpu_counter_dec(&sbi->s_dirs_counter);
} }
BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, bh2); err = ext3_journal_dirty_metadata(handle, bh2);
...@@ -196,11 +201,14 @@ void ext3_free_inode (handle_t *handle, struct inode * inode) ...@@ -196,11 +201,14 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
static int find_group_dir(struct super_block *sb, struct inode *parent) static int find_group_dir(struct super_block *sb, struct inode *parent)
{ {
int ngroups = EXT3_SB(sb)->s_groups_count; int ngroups = EXT3_SB(sb)->s_groups_count;
int avefreei = ext3_count_free_inodes(sb) / ngroups; int freei, avefreei;
struct ext3_group_desc *desc, *best_desc = NULL; struct ext3_group_desc *desc, *best_desc = NULL;
struct buffer_head *bh; struct buffer_head *bh;
int group, best_group = -1; int group, best_group = -1;
freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter);
avefreei = freei / ngroups;
for (group = 0; group < ngroups; group++) { for (group = 0; group < ngroups; group++) {
desc = ext3_get_group_desc (sb, group, &bh); desc = ext3_get_group_desc (sb, group, &bh);
if (!desc || !desc->bg_free_inodes_count) if (!desc || !desc->bg_free_inodes_count)
...@@ -252,17 +260,20 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) ...@@ -252,17 +260,20 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
struct ext3_super_block *es = sbi->s_es; struct ext3_super_block *es = sbi->s_es;
int ngroups = sbi->s_groups_count; int ngroups = sbi->s_groups_count;
int inodes_per_group = EXT3_INODES_PER_GROUP(sb); int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
int freei = ext3_count_free_inodes(sb); int freei, avefreei;
int avefreei = freei / ngroups; int freeb, avefreeb;
int freeb = ext3_count_free_blocks(sb); int blocks_per_dir, ndirs;
int avefreeb = freeb / ngroups;
int blocks_per_dir;
int ndirs = ext3_count_dirs(sb);
int max_debt, max_dirs, min_blocks, min_inodes; int max_debt, max_dirs, min_blocks, min_inodes;
int group = -1, i; int group = -1, i;
struct ext3_group_desc *desc; struct ext3_group_desc *desc;
struct buffer_head *bh; struct buffer_head *bh;
freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
avefreei = freei / ngroups;
freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
avefreeb = freeb / ngroups;
ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
if ((parent == sb->s_root->d_inode) || if ((parent == sb->s_root->d_inode) ||
(parent->i_flags & EXT3_TOPDIR_FL)) { (parent->i_flags & EXT3_TOPDIR_FL)) {
int best_ndir = inodes_per_group; int best_ndir = inodes_per_group;
...@@ -289,8 +300,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) ...@@ -289,8 +300,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
goto fallback; goto fallback;
} }
blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - freeb) / ndirs;
le32_to_cpu(es->s_free_blocks_count)) / ndirs;
max_dirs = ndirs / ngroups + inodes_per_group / 16; max_dirs = ndirs / ngroups + inodes_per_group / 16;
min_inodes = avefreei - inodes_per_group / 4; min_inodes = avefreei - inodes_per_group / 4;
...@@ -309,7 +319,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) ...@@ -309,7 +319,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
desc = ext3_get_group_desc (sb, group, &bh); desc = ext3_get_group_desc (sb, group, &bh);
if (!desc || !desc->bg_free_inodes_count) if (!desc || !desc->bg_free_inodes_count)
continue; continue;
if (sbi->s_bgi[group].bg_debts >= max_debt) if (sbi->s_debts[group] >= max_debt)
continue; continue;
if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
continue; continue;
...@@ -416,13 +426,15 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode) ...@@ -416,13 +426,15 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
struct buffer_head *bitmap_bh = NULL; struct buffer_head *bitmap_bh = NULL;
struct buffer_head *bh2; struct buffer_head *bh2;
int group; int group;
unsigned long ino; unsigned long ino = 0;
struct inode * inode; struct inode * inode;
struct ext3_group_desc * gdp; struct ext3_group_desc * gdp = NULL;
struct ext3_super_block * es; struct ext3_super_block * es;
struct ext3_inode_info *ei; struct ext3_inode_info *ei;
struct ext3_sb_info *sbi;
int err = 0; int err = 0;
struct inode *ret; struct inode *ret;
int i;
/* Cannot create files in a deleted directory */ /* Cannot create files in a deleted directory */
if (!dir || !dir->i_nlink) if (!dir || !dir->i_nlink)
...@@ -435,7 +447,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode) ...@@ -435,7 +447,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
ei = EXT3_I(inode); ei = EXT3_I(inode);
es = EXT3_SB(sb)->s_es; es = EXT3_SB(sb)->s_es;
repeat: sbi = EXT3_SB(sb);
if (S_ISDIR(mode)) { if (S_ISDIR(mode)) {
if (test_opt (sb, OLDALLOC)) if (test_opt (sb, OLDALLOC))
group = find_group_dir(sb, dir); group = find_group_dir(sb, dir);
...@@ -448,46 +460,52 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode) ...@@ -448,46 +460,52 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
if (group == -1) if (group == -1)
goto out; goto out;
err = -EIO; for (i = 0; i < sbi->s_groups_count; i++) {
brelse(bitmap_bh); gdp = ext3_get_group_desc(sb, group, &bh2);
bitmap_bh = read_inode_bitmap(sb, group);
if (!bitmap_bh) err = -EIO;
goto fail; brelse(bitmap_bh);
gdp = ext3_get_group_desc (sb, group, &bh2); bitmap_bh = read_inode_bitmap(sb, group);
if (!bitmap_bh)
if ((ino = ext3_find_first_zero_bit((unsigned long *)bitmap_bh->b_data, goto fail;
EXT3_INODES_PER_GROUP(sb))) <
EXT3_INODES_PER_GROUP(sb)) { ino = ext3_find_first_zero_bit((unsigned long *)
BUFFER_TRACE(bitmap_bh, "get_write_access"); bitmap_bh->b_data, EXT3_INODES_PER_GROUP(sb));
err = ext3_journal_get_write_access(handle, bitmap_bh); if (ino < EXT3_INODES_PER_GROUP(sb)) {
if (err) goto fail; BUFFER_TRACE(bitmap_bh, "get_write_access");
err = ext3_journal_get_write_access(handle, bitmap_bh);
if (ext3_set_bit_atomic(sb_bgl_lock(sbi, group), if (err)
ino, bitmap_bh->b_data))
goto repeat;
BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, bitmap_bh);
if (err) goto fail;
} else {
if (le16_to_cpu(gdp->bg_free_inodes_count) != 0) {
ext3_error (sb, "ext3_new_inode",
"Free inodes count corrupted in group %d",
group);
/* Is it really ENOSPC? */
err = -ENOSPC;
if (sb->s_flags & MS_RDONLY)
goto fail; goto fail;
BUFFER_TRACE(bh2, "get_write_access"); if (!ext3_set_bit_atomic(sb_bgl_lock(sbi, group),
err = ext3_journal_get_write_access(handle, bh2); ino, bitmap_bh->b_data)) {
if (err) goto fail; /* we won it */
gdp->bg_free_inodes_count = 0; BUFFER_TRACE(bitmap_bh,
BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, bh2); err = ext3_journal_dirty_metadata(handle,
if (err) goto fail; bitmap_bh);
if (err)
goto fail;
goto got;
}
/* we lost it */
journal_release_buffer(handle, bitmap_bh);
} }
goto repeat;
/*
* This case is possible in concurrent environment. It is very
* rare. We cannot repeat the find_group_xxx() call because
* that will simply return the same blockgroup, because the
* group descriptor metadata has not yet been updated.
* So we just go onto the next blockgroup.
*/
if (++group == sbi->s_groups_count)
group = 0;
} }
err = -ENOSPC;
goto out;
got:
ino += group * EXT3_INODES_PER_GROUP(sb) + 1; ino += group * EXT3_INODES_PER_GROUP(sb) + 1;
if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
ext3_error (sb, "ext3_new_inode", ext3_error (sb, "ext3_new_inode",
...@@ -500,18 +518,21 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode) ...@@ -500,18 +518,21 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
BUFFER_TRACE(bh2, "get_write_access"); BUFFER_TRACE(bh2, "get_write_access");
err = ext3_journal_get_write_access(handle, bh2); err = ext3_journal_get_write_access(handle, bh2);
if (err) goto fail; if (err) goto fail;
spin_lock(&EXT3_SB(sb)->s_bgi[group].bg_ialloc_lock); spin_lock(sb_bgl_lock(sbi, group));
gdp->bg_free_inodes_count = gdp->bg_free_inodes_count =
cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
if (S_ISDIR(mode)) { if (S_ISDIR(mode)) {
gdp->bg_used_dirs_count = gdp->bg_used_dirs_count =
cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
} }
spin_unlock(&EXT3_SB(sb)->s_bgi[group].bg_ialloc_lock); spin_unlock(sb_bgl_lock(sbi, group));
BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, bh2); err = ext3_journal_dirty_metadata(handle, bh2);
if (err) goto fail; if (err) goto fail;
percpu_counter_dec(&sbi->s_freeinodes_counter);
if (S_ISDIR(mode))
percpu_counter_inc(&sbi->s_dirs_counter);
sb->s_dirt = 1; sb->s_dirt = 1;
inode->i_uid = current->fsuid; inode->i_uid = current->fsuid;
......
...@@ -460,7 +460,7 @@ void ext3_put_super (struct super_block * sb) ...@@ -460,7 +460,7 @@ void ext3_put_super (struct super_block * sb)
for (i = 0; i < sbi->s_gdb_count; i++) for (i = 0; i < sbi->s_gdb_count; i++)
brelse(sbi->s_group_desc[i]); brelse(sbi->s_group_desc[i]);
kfree(sbi->s_group_desc); kfree(sbi->s_group_desc);
kfree(sbi->s_bgi); kfree(sbi->s_debts);
brelse(sbi->s_sbh); brelse(sbi->s_sbh);
/* Debugging code just in case the in-memory inode orphan list /* Debugging code just in case the in-memory inode orphan list
...@@ -902,7 +902,6 @@ static int ext3_check_descriptors (struct super_block * sb) ...@@ -902,7 +902,6 @@ static int ext3_check_descriptors (struct super_block * sb)
unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block); unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block);
struct ext3_group_desc * gdp = NULL; struct ext3_group_desc * gdp = NULL;
unsigned long total_free; unsigned long total_free;
unsigned int reserved = le32_to_cpu(sbi->s_es->s_r_blocks_count);
int desc_block = 0; int desc_block = 0;
int i; int i;
...@@ -958,25 +957,6 @@ static int ext3_check_descriptors (struct super_block * sb) ...@@ -958,25 +957,6 @@ static int ext3_check_descriptors (struct super_block * sb)
EXT3_SB(sb)->s_es->s_free_blocks_count = cpu_to_le32(total_free); EXT3_SB(sb)->s_es->s_free_blocks_count = cpu_to_le32(total_free);
} }
/* distribute reserved blocks over groups -bzzz */
for(i = sbi->s_groups_count - 1; reserved && total_free && i >= 0; i--) {
int free;
gdp = ext3_get_group_desc (sb, i, NULL);
if (!gdp) {
ext3_error (sb, "ext3_check_descriptors",
"cant get descriptor for group %d", i);
return 0;
}
free = le16_to_cpu(gdp->bg_free_blocks_count);
if (free > reserved)
free = reserved;
sbi->s_bgi[i].bg_reserved = free;
reserved -= free;
total_free -= free;
}
total_free = ext3_count_free_inodes(sb); total_free = ext3_count_free_inodes(sb);
if (total_free != le32_to_cpu(EXT3_SB(sb)->s_es->s_free_inodes_count)) { if (total_free != le32_to_cpu(EXT3_SB(sb)->s_es->s_free_inodes_count)) {
printk("EXT3-fs: invalid s_free_inodes_count %u (real %lu)\n", printk("EXT3-fs: invalid s_free_inodes_count %u (real %lu)\n",
...@@ -1346,17 +1326,19 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) ...@@ -1346,17 +1326,19 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
printk (KERN_ERR "EXT3-fs: not enough memory\n"); printk (KERN_ERR "EXT3-fs: not enough memory\n");
goto failed_mount; goto failed_mount;
} }
sbi->s_bgi = kmalloc(sbi->s_groups_count * sizeof(struct ext3_bg_info), sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(u8),
GFP_KERNEL); GFP_KERNEL);
if (!sbi->s_bgi) { if (!sbi->s_debts) {
printk("EXT3-fs: not enough memory to allocate s_bgi\n"); printk("EXT3-fs: not enough memory to allocate s_bgi\n");
goto failed_mount2; goto failed_mount2;
} }
memset(sbi->s_bgi, 0, sbi->s_groups_count * sizeof(struct ext3_bg_info)); memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(u8));
for (i = 0; i < sbi->s_groups_count; i++) {
spin_lock_init(&sbi->s_bgi[i].bg_balloc_lock); percpu_counter_init(&sbi->s_freeblocks_counter);
spin_lock_init(&sbi->s_bgi[i].bg_ialloc_lock); percpu_counter_init(&sbi->s_freeinodes_counter);
} percpu_counter_init(&sbi->s_dirs_counter);
bgl_lock_init(&sbi->s_blockgroup_lock);
for (i = 0; i < db_count; i++) { for (i = 0; i < db_count; i++) {
block = descriptor_loc(sb, logic_sb_block, i); block = descriptor_loc(sb, logic_sb_block, i);
sbi->s_group_desc[i] = sb_bread(sb, block); sbi->s_group_desc[i] = sb_bread(sb, block);
...@@ -1469,12 +1451,19 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) ...@@ -1469,12 +1451,19 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
"writeback"); "writeback");
percpu_counter_mod(&sbi->s_freeblocks_counter,
ext3_count_free_blocks(sb));
percpu_counter_mod(&sbi->s_freeinodes_counter,
ext3_count_free_inodes(sb));
percpu_counter_mod(&sbi->s_dirs_counter,
ext3_count_dirs(sb));
return 0; return 0;
failed_mount3: failed_mount3:
journal_destroy(sbi->s_journal); journal_destroy(sbi->s_journal);
failed_mount2: failed_mount2:
kfree(sbi->s_bgi); kfree(sbi->s_debts);
for (i = 0; i < db_count; i++) for (i = 0; i < db_count; i++)
brelse(sbi->s_group_desc[i]); brelse(sbi->s_group_desc[i]);
kfree(sbi->s_group_desc); kfree(sbi->s_group_desc);
......
...@@ -19,15 +19,10 @@ ...@@ -19,15 +19,10 @@
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/timer.h> #include <linux/timer.h>
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/blockgroup_lock.h>
#include <linux/percpu_counter.h>
#endif #endif
struct ext3_bg_info {
u8 bg_debts;
spinlock_t bg_balloc_lock;
spinlock_t bg_ialloc_lock;
unsigned long bg_reserved;
} ____cacheline_aligned_in_smp;
/* /*
* third extended-fs super-block data in memory * third extended-fs super-block data in memory
*/ */
...@@ -57,7 +52,11 @@ struct ext3_sb_info { ...@@ -57,7 +52,11 @@ struct ext3_sb_info {
u32 s_next_generation; u32 s_next_generation;
u32 s_hash_seed[4]; u32 s_hash_seed[4];
int s_def_hash_version; int s_def_hash_version;
struct ext3_bg_info *s_bgi; u8 *s_debts;
struct percpu_counter s_freeblocks_counter;
struct percpu_counter s_freeinodes_counter;
struct percpu_counter s_dirs_counter;
struct blockgroup_lock s_blockgroup_lock;
/* Journaling */ /* Journaling */
struct inode * s_journal_inode; struct inode * s_journal_inode;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment