Commit 2de8807b authored by Amir Goldstein's avatar Amir Goldstein Committed by Theodore Ts'o

ext4: synchronize ext4_mb_init_group() with buddy page lock

The old routines ext4_mb_[get|put]_buddy_cache_lock(), which used
to take grp->alloc_sem for all groups on the buddy page have been
replaced with the routines ext4_mb_[get|put]_buddy_page_lock().

The new routines take both buddy and bitmap page locks to protect
against concurrent init of groups on the same buddy page.

The GROUP_NEED_INIT flag is tested again under page lock to check
if the group was initialized by another caller.
Signed-off-by: default avatarAmir Goldstein <amir73il@users.sf.net>
Signed-off-by: default avatar"Theodore Ts'o" <tytso@mit.edu>
parent e73a347b
...@@ -957,22 +957,21 @@ static int ext4_mb_init_cache(struct page *page, char *incore) ...@@ -957,22 +957,21 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
} }
/* /*
* lock the group_info alloc_sem of all the groups * Lock the buddy and bitmap pages. This make sure other parallel init_group
* belonging to the same buddy cache page. This * on the same buddy page doesn't happen whild holding the buddy page lock.
* make sure other parallel operation on the buddy * Return locked buddy and bitmap pages on e4b struct. If buddy and bitmap
* cache doesn't happen whild holding the buddy cache * are on the same page e4b->bd_buddy_page is NULL and return value is 0.
* lock
*/ */
static int ext4_mb_get_buddy_cache_lock(struct super_block *sb, static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
ext4_group_t group) ext4_group_t group, struct ext4_buddy *e4b)
{ {
int i; struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
int block, pnum; int block, pnum, poff;
int blocks_per_page; int blocks_per_page;
int groups_per_page; struct page *page;
ext4_group_t ngroups = ext4_get_groups_count(sb);
ext4_group_t first_group; e4b->bd_buddy_page = NULL;
struct ext4_group_info *grp; e4b->bd_bitmap_page = NULL;
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
/* /*
...@@ -982,57 +981,40 @@ static int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ...@@ -982,57 +981,40 @@ static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
*/ */
block = group * 2; block = group * 2;
pnum = block / blocks_per_page; pnum = block / blocks_per_page;
first_group = pnum * blocks_per_page / 2; poff = block % blocks_per_page;
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
groups_per_page = blocks_per_page >> 1; if (!page)
if (groups_per_page == 0) return -EIO;
groups_per_page = 1; BUG_ON(page->mapping != inode->i_mapping);
/* read all groups the page covers into the cache */ e4b->bd_bitmap_page = page;
for (i = 0; i < groups_per_page; i++) { e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
if ((first_group + i) >= ngroups) if (blocks_per_page >= 2) {
break; /* buddy and bitmap are on the same page */
grp = ext4_get_group_info(sb, first_group + i); return 0;
/* take all groups write allocation
* semaphore. This make sure there is
* no block allocation going on in any
* of that groups
*/
down_write_nested(&grp->alloc_sem, i);
} }
return i;
block++;
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
if (!page)
return -EIO;
BUG_ON(page->mapping != inode->i_mapping);
e4b->bd_buddy_page = page;
return 0;
} }
static void ext4_mb_put_buddy_cache_lock(struct super_block *sb, static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
ext4_group_t group, int locked_group)
{ {
int i; if (e4b->bd_bitmap_page) {
int block, pnum; unlock_page(e4b->bd_bitmap_page);
int blocks_per_page; page_cache_release(e4b->bd_bitmap_page);
ext4_group_t first_group; }
struct ext4_group_info *grp; if (e4b->bd_buddy_page) {
unlock_page(e4b->bd_buddy_page);
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; page_cache_release(e4b->bd_buddy_page);
/*
* the buddy cache inode stores the block bitmap
* and buddy information in consecutive blocks.
* So for each group we need two blocks.
*/
block = group * 2;
pnum = block / blocks_per_page;
first_group = pnum * blocks_per_page / 2;
/* release locks on all the groups */
for (i = 0; i < locked_group; i++) {
grp = ext4_get_group_info(sb, first_group + i);
/* take all groups write allocation
* semaphore. This make sure there is
* no block allocation going on in any
* of that groups
*/
up_write(&grp->alloc_sem);
} }
} }
/* /*
...@@ -1044,93 +1026,60 @@ static noinline_for_stack ...@@ -1044,93 +1026,60 @@ static noinline_for_stack
int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
{ {
int ret = 0;
void *bitmap;
int blocks_per_page;
int block, pnum, poff;
int num_grp_locked = 0;
struct ext4_group_info *this_grp; struct ext4_group_info *this_grp;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_buddy e4b;
struct inode *inode = sbi->s_buddy_cache; struct page *page;
struct page *page = NULL, *bitmap_page = NULL; int ret = 0;
mb_debug(1, "init group %u\n", group); mb_debug(1, "init group %u\n", group);
blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
this_grp = ext4_get_group_info(sb, group); this_grp = ext4_get_group_info(sb, group);
/* /*
* This ensures that we don't reinit the buddy cache * This ensures that we don't reinit the buddy cache
* page which map to the group from which we are already * page which map to the group from which we are already
* allocating. If we are looking at the buddy cache we would * allocating. If we are looking at the buddy cache we would
* have taken a reference using ext4_mb_load_buddy and that * have taken a reference using ext4_mb_load_buddy and that
* would have taken the alloc_sem lock. * would have pinned buddy page to page cache.
*/ */
num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
/* /*
* somebody initialized the group * somebody initialized the group
* return without doing anything * return without doing anything
*/ */
ret = 0;
goto err; goto err;
} }
/*
* the buddy cache inode stores the block bitmap page = e4b.bd_bitmap_page;
* and buddy information in consecutive blocks.
* So for each group we need two blocks.
*/
block = group * 2;
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
if (page) {
BUG_ON(page->mapping != inode->i_mapping);
ret = ext4_mb_init_cache(page, NULL); ret = ext4_mb_init_cache(page, NULL);
if (ret) { if (ret)
unlock_page(page);
goto err; goto err;
} if (!PageUptodate(page)) {
unlock_page(page);
}
if (page == NULL || !PageUptodate(page)) {
ret = -EIO; ret = -EIO;
goto err; goto err;
} }
mark_page_accessed(page); mark_page_accessed(page);
bitmap_page = page;
bitmap = page_address(page) + (poff * sb->s_blocksize);
/* init buddy cache */ if (e4b.bd_buddy_page == NULL) {
block++;
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
if (page == bitmap_page) {
/* /*
* If both the bitmap and buddy are in * If both the bitmap and buddy are in
* the same page we don't need to force * the same page we don't need to force
* init the buddy * init the buddy
*/ */
unlock_page(page); ret = 0;
} else if (page) {
BUG_ON(page->mapping != inode->i_mapping);
ret = ext4_mb_init_cache(page, bitmap);
if (ret) {
unlock_page(page);
goto err; goto err;
} }
unlock_page(page); /* init buddy cache */
} page = e4b.bd_buddy_page;
if (page == NULL || !PageUptodate(page)) { ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
if (ret)
goto err;
if (!PageUptodate(page)) {
ret = -EIO; ret = -EIO;
goto err; goto err;
} }
mark_page_accessed(page); mark_page_accessed(page);
err: err:
ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); ext4_mb_put_buddy_page_lock(&e4b);
if (bitmap_page)
page_cache_release(bitmap_page);
if (page)
page_cache_release(page);
return ret; return ret;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment