Commit 44ad37d6 authored by Bob Peterson's avatar Bob Peterson Committed by Steven Whitehouse

GFS2: filesystem hang caused by incorrect lock order

This patch fixes a deadlock in GFS2 where two processes are trying
to reclaim an unlinked dinode:
One holds the inode glock and calls gfs2_lookup_by_inum trying to look
up the inode, which it can't, due to I_FREEING.  The other has set
I_FREEING from vfs and is at the beginning of gfs2_delete_inode
waiting for the glock, which is held by the first.  The solution is to
add a new non_block parameter to the gfs2_iget function that causes it
to return -ENOENT if the inode is being freed.
Signed-off-by: default avatarBob Peterson <rpeterso@redhat.com>
Signed-off-by: default avatarSteven Whitehouse <swhiteho@redhat.com>
parent 001e8e8d
...@@ -1506,7 +1506,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name) ...@@ -1506,7 +1506,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name)
inode = gfs2_inode_lookup(dir->i_sb, inode = gfs2_inode_lookup(dir->i_sb,
be16_to_cpu(dent->de_type), be16_to_cpu(dent->de_type),
be64_to_cpu(dent->de_inum.no_addr), be64_to_cpu(dent->de_inum.no_addr),
be64_to_cpu(dent->de_inum.no_formal_ino)); be64_to_cpu(dent->de_inum.no_formal_ino), 0);
brelse(bh); brelse(bh);
return inode; return inode;
} }
......
...@@ -40,37 +40,61 @@ struct gfs2_inum_range_host { ...@@ -40,37 +40,61 @@ struct gfs2_inum_range_host {
u64 ir_length; u64 ir_length;
}; };
struct gfs2_skip_data {
u64 no_addr;
int skipped;
int non_block;
};
static int iget_test(struct inode *inode, void *opaque) static int iget_test(struct inode *inode, void *opaque)
{ {
struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_inode *ip = GFS2_I(inode);
u64 *no_addr = opaque; struct gfs2_skip_data *data = opaque;
if (ip->i_no_addr == *no_addr) if (ip->i_no_addr == data->no_addr) {
if (data->non_block &&
inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
data->skipped = 1;
return 0;
}
return 1; return 1;
}
return 0; return 0;
} }
static int iget_set(struct inode *inode, void *opaque) static int iget_set(struct inode *inode, void *opaque)
{ {
struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_inode *ip = GFS2_I(inode);
u64 *no_addr = opaque; struct gfs2_skip_data *data = opaque;
inode->i_ino = (unsigned long)*no_addr; if (data->skipped)
ip->i_no_addr = *no_addr; return -ENOENT;
inode->i_ino = (unsigned long)(data->no_addr);
ip->i_no_addr = data->no_addr;
return 0; return 0;
} }
struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr) struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
{ {
unsigned long hash = (unsigned long)no_addr; unsigned long hash = (unsigned long)no_addr;
return ilookup5(sb, hash, iget_test, &no_addr); struct gfs2_skip_data data;
data.no_addr = no_addr;
data.skipped = 0;
data.non_block = 0;
return ilookup5(sb, hash, iget_test, &data);
} }
static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr) static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr,
int non_block)
{ {
struct gfs2_skip_data data;
unsigned long hash = (unsigned long)no_addr; unsigned long hash = (unsigned long)no_addr;
return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
data.no_addr = no_addr;
data.skipped = 0;
data.non_block = non_block;
return iget5_locked(sb, hash, iget_test, iget_set, &data);
} }
/** /**
...@@ -111,19 +135,20 @@ static void gfs2_set_iop(struct inode *inode) ...@@ -111,19 +135,20 @@ static void gfs2_set_iop(struct inode *inode)
* @sb: The super block * @sb: The super block
* @no_addr: The inode number * @no_addr: The inode number
* @type: The type of the inode * @type: The type of the inode
* non_block: Can we block on inodes that are being freed?
* *
* Returns: A VFS inode, or an error * Returns: A VFS inode, or an error
*/ */
struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type, struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
u64 no_addr, u64 no_formal_ino) u64 no_addr, u64 no_formal_ino, int non_block)
{ {
struct inode *inode; struct inode *inode;
struct gfs2_inode *ip; struct gfs2_inode *ip;
struct gfs2_glock *io_gl = NULL; struct gfs2_glock *io_gl = NULL;
int error; int error;
inode = gfs2_iget(sb, no_addr); inode = gfs2_iget(sb, no_addr, non_block);
ip = GFS2_I(inode); ip = GFS2_I(inode);
if (!inode) if (!inode)
...@@ -185,11 +210,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, ...@@ -185,11 +210,12 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
{ {
struct super_block *sb = sdp->sd_vfs; struct super_block *sb = sdp->sd_vfs;
struct gfs2_holder i_gh; struct gfs2_holder i_gh;
struct inode *inode; struct inode *inode = NULL;
int error; int error;
/* Must not read in block until block type is verified */
error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops, error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
LM_ST_SHARED, LM_FLAG_ANY, &i_gh); LM_ST_EXCLUSIVE, GL_SKIP, &i_gh);
if (error) if (error)
return ERR_PTR(error); return ERR_PTR(error);
...@@ -197,7 +223,7 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, ...@@ -197,7 +223,7 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
if (error) if (error)
goto fail; goto fail;
inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0); inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1);
if (IS_ERR(inode)) if (IS_ERR(inode))
goto fail; goto fail;
...@@ -843,7 +869,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, ...@@ -843,7 +869,7 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
goto fail_gunlock2; goto fail_gunlock2;
inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr, inode = gfs2_inode_lookup(dir->i_sb, IF2DT(mode), inum.no_addr,
inum.no_formal_ino); inum.no_formal_ino, 0);
if (IS_ERR(inode)) if (IS_ERR(inode))
goto fail_gunlock2; goto fail_gunlock2;
......
...@@ -97,7 +97,8 @@ static inline int gfs2_check_internal_file_size(struct inode *inode, ...@@ -97,7 +97,8 @@ static inline int gfs2_check_internal_file_size(struct inode *inode,
} }
extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
u64 no_addr, u64 no_formal_ino); u64 no_addr, u64 no_formal_ino,
int non_block);
extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr, extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
u64 *no_formal_ino, u64 *no_formal_ino,
unsigned int blktype); unsigned int blktype);
......
...@@ -430,7 +430,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, ...@@ -430,7 +430,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
struct dentry *dentry; struct dentry *dentry;
struct inode *inode; struct inode *inode;
inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0); inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
if (IS_ERR(inode)) { if (IS_ERR(inode)) {
fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
return PTR_ERR(inode); return PTR_ERR(inode);
......
...@@ -945,7 +945,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip ...@@ -945,7 +945,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
/* rgblk_search can return a block < goal, so we need to /* rgblk_search can return a block < goal, so we need to
keep it marching forward. */ keep it marching forward. */
no_addr = block + rgd->rd_data0; no_addr = block + rgd->rd_data0;
goal++; goal = max(block + 1, goal + 1);
if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked)
continue; continue;
if (no_addr == skip) if (no_addr == skip)
...@@ -971,7 +971,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip ...@@ -971,7 +971,7 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
found++; found++;
/* Limit reclaim to sensible number of tasks */ /* Limit reclaim to sensible number of tasks */
if (found > 2*NR_CPUS) if (found > NR_CPUS)
return; return;
} }
......
...@@ -1327,7 +1327,8 @@ static void gfs2_evict_inode(struct inode *inode) ...@@ -1327,7 +1327,8 @@ static void gfs2_evict_inode(struct inode *inode)
if (inode->i_nlink || (sb->s_flags & MS_RDONLY)) if (inode->i_nlink || (sb->s_flags & MS_RDONLY))
goto out; goto out;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); /* Must not read inode block until block type has been verified */
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
if (unlikely(error)) { if (unlikely(error)) {
gfs2_glock_dq_uninit(&ip->i_iopen_gh); gfs2_glock_dq_uninit(&ip->i_iopen_gh);
goto out; goto out;
...@@ -1337,6 +1338,12 @@ static void gfs2_evict_inode(struct inode *inode) ...@@ -1337,6 +1338,12 @@ static void gfs2_evict_inode(struct inode *inode)
if (error) if (error)
goto out_truncate; goto out_truncate;
if (test_bit(GIF_INVALID, &ip->i_flags)) {
error = gfs2_inode_refresh(ip);
if (error)
goto out_truncate;
}
ip->i_iopen_gh.gh_flags |= GL_NOCACHE; ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
gfs2_glock_dq_wait(&ip->i_iopen_gh); gfs2_glock_dq_wait(&ip->i_iopen_gh);
gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh); gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment