Commit 2fd26cc0 authored by Dave Chinner's avatar Dave Chinner Committed by Dave Chinner

xfs: double link the unlinked inode list

Now we have forwards traversal via the incore inode in place, we now
need to add back pointers to the incore inode to entirely replace
the back reference cache. We use the same lookup semantics and
constraints as for the forwards pointer lookups during unlinks, and
so we can look up any inode in the unlinked list directly and update
the list pointers, forwards or backwards, at any time.

The only wrinkle in converting the unlinked list manipulations to
use in-core previous pointers is that log recovery doesn't have the
incore inode state built up so it can't just read in an inode and
release it to finish off the unlink. Hence we need to modify the
traversal in recovery to read one inode ahead before we
release the inode at the head of the list. This populates the
next->prev relationship sufficient to be able to replay the unlinked
list and hence greatly simplify the runtime code.

This recovery algorithm also requires that we actually remove inodes
from the unlinked list one at a time as background inode
inactivation will result in unlinked list removal racing with the
building of the in-memory unlinked list state. We could serialise
this by holding the AGI buffer lock when constructing the in memory
state, but all that does is lockstep background processing with list
building. It is much simpler to flush the inodegc immediately after
releasing the inode so that it is unlinked immediately and there is
no races present at all.
Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent a83d5a8b
......@@ -194,7 +194,6 @@ xfs_free_perag(
XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);
cancel_delayed_work_sync(&pag->pag_blockgc_work);
xfs_iunlink_destroy(pag);
xfs_buf_hash_destroy(pag);
call_rcu(&pag->rcu_head, __xfs_free_perag);
......@@ -323,10 +322,6 @@ xfs_initialize_perag(
if (error)
goto out_remove_pag;
error = xfs_iunlink_init(pag);
if (error)
goto out_hash_destroy;
/* first new pag is fully initialized */
if (first_initialised == NULLAGNUMBER)
first_initialised = index;
......@@ -349,8 +344,6 @@ xfs_initialize_perag(
mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
return 0;
out_hash_destroy:
xfs_buf_hash_destroy(pag);
out_remove_pag:
radix_tree_delete(&mp->m_perag_tree, index);
out_free_pag:
......@@ -362,7 +355,6 @@ xfs_initialize_perag(
if (!pag)
break;
xfs_buf_hash_destroy(pag);
xfs_iunlink_destroy(pag);
kmem_free(pag);
}
return error;
......
......@@ -103,12 +103,6 @@ struct xfs_perag {
/* background prealloc block trimming */
struct delayed_work pag_blockgc_work;
/*
* Unlinked inode information. This incore information reflects
* data stored in the AGI, so callers must hold the AGI buffer lock
* or have some other means to control concurrency.
*/
struct rhashtable pagi_unlinked_hash;
#endif /* __KERNEL__ */
};
......
......@@ -111,6 +111,8 @@ xfs_inode_alloc(
INIT_WORK(&ip->i_ioend_work, xfs_end_io);
INIT_LIST_HEAD(&ip->i_ioend_list);
spin_lock_init(&ip->i_ioend_lock);
ip->i_next_unlinked = NULLAGINO;
ip->i_prev_unlinked = NULLAGINO;
return ip;
}
......
This diff is collapsed.
......@@ -70,6 +70,7 @@ typedef struct xfs_inode {
/* unlinked list pointers */
xfs_agino_t i_next_unlinked;
xfs_agino_t i_prev_unlinked;
/* VFS inode */
struct inode i_vnode; /* embedded VFS inode */
......@@ -508,9 +509,6 @@ extern struct kmem_cache *xfs_inode_cache;
bool xfs_inode_needs_inactive(struct xfs_inode *ip);
int xfs_iunlink_init(struct xfs_perag *pag);
void xfs_iunlink_destroy(struct xfs_perag *pag);
void xfs_end_io(struct work_struct *work);
int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
......
......@@ -2674,28 +2674,50 @@ xlog_recover_iunlink_bucket(
int bucket)
{
struct xfs_mount *mp = pag->pag_mount;
struct xfs_inode *prev_ip = NULL;
struct xfs_inode *ip;
xfs_agino_t agino;
xfs_agino_t prev_agino, agino;
int error = 0;
agino = be32_to_cpu(agi->agi_unlinked[bucket]);
while (agino != NULLAGINO) {
int error;
error = xfs_iget(mp, NULL,
XFS_AGINO_TO_INO(mp, pag->pag_agno, agino),
0, 0, &ip);
if (error)
return error;;
break;
ASSERT(VFS_I(ip)->i_nlink == 0);
ASSERT(VFS_I(ip)->i_mode != 0);
xfs_iflags_clear(ip, XFS_IRECOVERY);
agino = ip->i_next_unlinked;
xfs_irele(ip);
cond_resched();
if (prev_ip) {
ip->i_prev_unlinked = prev_agino;
xfs_irele(prev_ip);
/*
* Ensure the inode is removed from the unlinked list
* before we continue so that it won't race with
* building the in-memory list here. This could be
* serialised with the agibp lock, but that just
* serialises via lockstepping and it's much simpler
* just to flush the inodegc queue and wait for it to
* complete.
*/
xfs_inodegc_flush(mp);
}
prev_agino = agino;
prev_ip = ip;
}
return 0;
if (prev_ip) {
ip->i_prev_unlinked = prev_agino;
xfs_irele(prev_ip);
}
xfs_inodegc_flush(mp);
return error;
}
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment