Commit eabe0444 authored by Andrey Sidorov's avatar Andrey Sidorov Committed by Theodore Ts'o

ext4: speed-up releasing blocks on commit

Improve mb_free_blocks speed by clearing entire range at once instead
of iterating over each bit. Freeing block-by-block also makes buddy
bitmap subtree flip twice making most of the work a no-op. Very few
bits in buddy bitmap require change, e.g. freeing entire group is a 1
bit flip only.  As a result, releasing blocks of 60G file now takes
5ms instead of 2.7s.  This is especially good for non-preemptive
kernels as there is no rescheduling during release.
Signed-off-by: default avatarAndrey Sidorov <qrxd43@motorola.com>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent 5c1ff336
...@@ -405,6 +405,12 @@ static inline void mb_clear_bit(int bit, void *addr) ...@@ -405,6 +405,12 @@ static inline void mb_clear_bit(int bit, void *addr)
ext4_clear_bit(bit, addr); ext4_clear_bit(bit, addr);
} }
static inline int mb_test_and_clear_bit(int bit, void *addr)
{
addr = mb_correct_addr_and_bit(&bit, addr);
return ext4_test_and_clear_bit(bit, addr);
}
static inline int mb_find_next_zero_bit(void *addr, int max, int start) static inline int mb_find_next_zero_bit(void *addr, int max, int start)
{ {
int fix = 0, ret, tmpmax; int fix = 0, ret, tmpmax;
...@@ -764,6 +770,24 @@ void ext4_mb_generate_buddy(struct super_block *sb, ...@@ -764,6 +770,24 @@ void ext4_mb_generate_buddy(struct super_block *sb,
spin_unlock(&EXT4_SB(sb)->s_bal_lock); spin_unlock(&EXT4_SB(sb)->s_bal_lock);
} }
static void mb_regenerate_buddy(struct ext4_buddy *e4b)
{
int count;
int order = 1;
void *buddy;
while ((buddy = mb_find_buddy(e4b, order++, &count))) {
ext4_set_bits(buddy, 0, count);
}
e4b->bd_info->bb_fragments = 0;
memset(e4b->bd_info->bb_counters, 0,
sizeof(*e4b->bd_info->bb_counters) *
(e4b->bd_sb->s_blocksize_bits + 2));
ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
e4b->bd_bitmap, e4b->bd_group);
}
/* The buddy information is attached the buddy cache inode /* The buddy information is attached the buddy cache inode
* for convenience. The information regarding each group * for convenience. The information regarding each group
* is loaded via ext4_mb_load_buddy. The information involve * is loaded via ext4_mb_load_buddy. The information involve
...@@ -1246,6 +1270,33 @@ static void mb_clear_bits(void *bm, int cur, int len) ...@@ -1246,6 +1270,33 @@ static void mb_clear_bits(void *bm, int cur, int len)
} }
} }
/* clear bits in given range
* will return first found zero bit if any, -1 otherwise
*/
static int mb_test_and_clear_bits(void *bm, int cur, int len)
{
__u32 *addr;
int zero_bit = -1;
len = cur + len;
while (cur < len) {
if ((cur & 31) == 0 && (len - cur) >= 32) {
/* fast path: clear whole word at once */
addr = bm + (cur >> 3);
if (*addr != (__u32)(-1) && zero_bit == -1)
zero_bit = cur + mb_find_next_zero_bit(addr, 32, 0);
*addr = 0;
cur += 32;
continue;
}
if (!mb_test_and_clear_bit(cur, bm) && zero_bit == -1)
zero_bit = cur;
cur++;
}
return zero_bit;
}
void ext4_set_bits(void *bm, int cur, int len) void ext4_set_bits(void *bm, int cur, int len)
{ {
__u32 *addr; __u32 *addr;
...@@ -1264,17 +1315,90 @@ void ext4_set_bits(void *bm, int cur, int len) ...@@ -1264,17 +1315,90 @@ void ext4_set_bits(void *bm, int cur, int len)
} }
} }
/*
* _________________________________________________________________ */
static inline int mb_buddy_adjust_border(int* bit, void* bitmap, int side)
{
if (mb_test_bit(*bit + side, bitmap)) {
mb_clear_bit(*bit, bitmap);
(*bit) -= side;
return 1;
}
else {
(*bit) += side;
mb_set_bit(*bit, bitmap);
return -1;
}
}
static void mb_buddy_mark_free(struct ext4_buddy *e4b, int first, int last)
{
int max;
int order = 1;
void *buddy = mb_find_buddy(e4b, order, &max);
while (buddy) {
void *buddy2;
/* Bits in range [first; last] are known to be set since
* corresponding blocks were allocated. Bits in range
* (first; last) will stay set because they form buddies on
* upper layer. We just deal with borders if they don't
* align with upper layer and then go up.
* Releasing entire group is all about clearing
* single bit of highest order buddy.
*/
/* Example:
* ---------------------------------
* | 1 | 1 | 1 | 1 |
* ---------------------------------
* | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
* ---------------------------------
* 0 1 2 3 4 5 6 7
* \_____________________/
*
* Neither [1] nor [6] is aligned to above layer.
* Left neighbour [0] is free, so mark it busy,
* decrease bb_counters and extend range to
* [0; 6]
* Right neighbour [7] is busy. It can't be coaleasced with [6], so
* mark [6] free, increase bb_counters and shrink range to
* [0; 5].
* Then shift range to [0; 2], go up and do the same.
*/
if (first & 1)
e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&first, buddy, -1);
if (!(last & 1))
e4b->bd_info->bb_counters[order] += mb_buddy_adjust_border(&last, buddy, 1);
if (first > last)
break;
order++;
if (first == last || !(buddy2 = mb_find_buddy(e4b, order, &max))) {
mb_clear_bits(buddy, first, last - first + 1);
e4b->bd_info->bb_counters[order - 1] += last - first + 1;
break;
}
first >>= 1;
last >>= 1;
buddy = buddy2;
}
}
static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
int first, int count) int first, int count)
{ {
int block = 0; int left_is_free = 0;
int max = 0; int right_is_free = 0;
int order; int block;
void *buddy; int last = first + count - 1;
void *buddy2;
struct super_block *sb = e4b->bd_sb; struct super_block *sb = e4b->bd_sb;
BUG_ON(first + count > (sb->s_blocksize << 3)); BUG_ON(last >= (sb->s_blocksize << 3));
assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
mb_check_buddy(e4b); mb_check_buddy(e4b);
mb_free_blocks_double(inode, e4b, first, count); mb_free_blocks_double(inode, e4b, first, count);
...@@ -1283,67 +1407,54 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, ...@@ -1283,67 +1407,54 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
if (first < e4b->bd_info->bb_first_free) if (first < e4b->bd_info->bb_first_free)
e4b->bd_info->bb_first_free = first; e4b->bd_info->bb_first_free = first;
/* let's maintain fragments counter */ /* access memory sequentially: check left neighbour,
* clear range and then check right neighbour
*/
if (first != 0) if (first != 0)
block = !mb_test_bit(first - 1, e4b->bd_bitmap); left_is_free = !mb_test_bit(first - 1, e4b->bd_bitmap);
if (first + count < EXT4_SB(sb)->s_mb_maxs[0]) block = mb_test_and_clear_bits(e4b->bd_bitmap, first, count);
max = !mb_test_bit(first + count, e4b->bd_bitmap); if (last + 1 < EXT4_SB(sb)->s_mb_maxs[0])
if (block && max) right_is_free = !mb_test_bit(last + 1, e4b->bd_bitmap);
e4b->bd_info->bb_fragments--;
else if (!block && !max)
e4b->bd_info->bb_fragments++;
/* let's maintain buddy itself */
while (count-- > 0) {
block = first++;
order = 0;
if (!mb_test_bit(block, e4b->bd_bitmap)) {
ext4_fsblk_t blocknr;
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += EXT4_C2B(EXT4_SB(sb), block);
ext4_grp_locked_error(sb, e4b->bd_group,
inode ? inode->i_ino : 0,
blocknr,
"freeing already freed block "
"(bit %u)", block);
}
mb_clear_bit(block, e4b->bd_bitmap);
e4b->bd_info->bb_counters[order]++;
/* start of the buddy */
buddy = mb_find_buddy(e4b, order, &max);
do { if (unlikely(block != -1)) {
block &= ~1UL; ext4_fsblk_t blocknr;
if (mb_test_bit(block, buddy) ||
mb_test_bit(block + 1, buddy))
break;
/* both the buddies are free, try to coalesce them */ blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
buddy2 = mb_find_buddy(e4b, order + 1, &max); blocknr += EXT4_C2B(EXT4_SB(sb), block);
ext4_grp_locked_error(sb, e4b->bd_group,
inode ? inode->i_ino : 0,
blocknr,
"freeing already freed block "
"(bit %u)", block);
mb_regenerate_buddy(e4b);
goto done;
}
if (!buddy2) /* let's maintain fragments counter */
break; if (left_is_free && right_is_free)
e4b->bd_info->bb_fragments--;
else if (!left_is_free && !right_is_free)
e4b->bd_info->bb_fragments++;
if (order > 0) { /* buddy[0] == bd_bitmap is a special case, so handle
/* for special purposes, we don't set * it right away and let mb_buddy_mark_free stay free of
* free bits in bitmap */ * zero order checks.
mb_set_bit(block, buddy); * Check if neighbours are to be coaleasced,
mb_set_bit(block + 1, buddy); * adjust bitmap bb_counters and borders appropriately.
} */
e4b->bd_info->bb_counters[order]--; if (first & 1) {
e4b->bd_info->bb_counters[order]--; first += !left_is_free;
e4b->bd_info->bb_counters[0] += left_is_free ? -1 : 1;
}
if (!(last & 1)) {
last -= !right_is_free;
e4b->bd_info->bb_counters[0] += right_is_free ? -1 : 1;
}
block = block >> 1; if (first <= last)
order++; mb_buddy_mark_free(e4b, first >> 1, last >> 1);
e4b->bd_info->bb_counters[order]++;
mb_clear_bit(block, buddy2); done:
buddy = buddy2;
} while (1);
}
mb_set_largest_free_order(sb, e4b->bd_info); mb_set_largest_free_order(sb, e4b->bd_info);
mb_check_buddy(e4b); mb_check_buddy(e4b);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment