Commit 8237fbf5 authored by Dave Chinner's avatar Dave Chinner Committed by Dave Chinner

xfs: clean up and simplify xfs_dialloc()

Because it's a mess.
Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarBrian Foster <bfoster@redhat.com>
Reviewed-by: default avatarDarrick J. Wong <djwong@kernel.org>
parent 309161f6
...@@ -604,9 +604,10 @@ xfs_inobt_insert_sprec( ...@@ -604,9 +604,10 @@ xfs_inobt_insert_sprec(
} }
/* /*
* Allocate new inodes in the allocation group specified by agbp. * Allocate new inodes in the allocation group specified by agbp. Returns 0 if
* Returns 0 if inodes were allocated in this AG; 1 if there was no space * inodes were allocated in this AG; -EAGAIN if there was no space in this AG so
* in this AG; or the usual negative error code. * the caller knows it can try another AG, a hard -ENOSPC when over the maximum
* inode count threshold, or the usual negative error code for other errors.
*/ */
STATIC int STATIC int
xfs_ialloc_ag_alloc( xfs_ialloc_ag_alloc(
...@@ -792,7 +793,7 @@ xfs_ialloc_ag_alloc( ...@@ -792,7 +793,7 @@ xfs_ialloc_ag_alloc(
} }
if (args.fsbno == NULLFSBLOCK) if (args.fsbno == NULLFSBLOCK)
return 1; return -EAGAIN;
ASSERT(args.len == args.minlen); ASSERT(args.len == args.minlen);
...@@ -1568,14 +1569,17 @@ xfs_dialloc_roll( ...@@ -1568,14 +1569,17 @@ xfs_dialloc_roll(
/* Re-attach the quota info that we detached from prev trx. */ /* Re-attach the quota info that we detached from prev trx. */
tp->t_dqinfo = dqinfo; tp->t_dqinfo = dqinfo;
/*
* Join the buffer even on commit error so that the buffer is released
* when the caller cancels the transaction and doesn't have to handle
* this error case specially.
*/
xfs_trans_bjoin(tp, agibp);
*tpp = tp; *tpp = tp;
if (error)
return error; return error;
xfs_trans_bjoin(tp, agibp);
return 0;
} }
STATIC xfs_agnumber_t static xfs_agnumber_t
xfs_ialloc_next_ag( xfs_ialloc_next_ag(
xfs_mount_t *mp) xfs_mount_t *mp)
{ {
...@@ -1590,16 +1594,136 @@ xfs_ialloc_next_ag( ...@@ -1590,16 +1594,136 @@ xfs_ialloc_next_ag(
return agno; return agno;
} }
static bool
xfs_dialloc_good_ag(
struct xfs_trans *tp,
struct xfs_perag *pag,
umode_t mode,
int flags,
bool ok_alloc)
{
struct xfs_mount *mp = tp->t_mountp;
xfs_extlen_t ineed;
xfs_extlen_t longest = 0;
int needspace;
int error;
if (!pag->pagi_inodeok)
return false;
if (!pag->pagi_init) {
error = xfs_ialloc_pagi_init(mp, tp, pag->pag_agno);
if (error)
return false;
}
if (pag->pagi_freecount)
return true;
if (!ok_alloc)
return false;
if (!pag->pagf_init) {
error = xfs_alloc_pagf_init(mp, tp, pag->pag_agno, flags);
if (error)
return false;
}
/*
* Check that there is enough free space for the file plus a chunk of
* inodes if we need to allocate some. If this is the first pass across
* the AGs, take into account the potential space needed for alignment
* of inode chunks when checking the longest contiguous free space in
* the AG - this prevents us from getting ENOSPC because we have free
* space larger than ialloc_blks but alignment constraints prevent us
* from using it.
*
* If we can't find an AG with space for full alignment slack to be
* taken into account, we must be near ENOSPC in all AGs. Hence we
* don't include alignment for the second pass and so if we fail
* allocation due to alignment issues then it is most likely a real
* ENOSPC condition.
*
* XXX(dgc): this calculation is now bogus thanks to the per-ag
* reservations that xfs_alloc_fix_freelist() now does via
* xfs_alloc_space_available(). When the AG fills up, pagf_freeblks will
* be more than large enough for the check below to succeed, but
* xfs_alloc_space_available() will fail because of the non-zero
* metadata reservation and hence we won't actually be able to allocate
* more inodes in this AG. We do soooo much unnecessary work near ENOSPC
* because of this.
*/
ineed = M_IGEO(mp)->ialloc_min_blks;
if (flags && ineed > 1)
ineed += M_IGEO(mp)->cluster_align;
longest = pag->pagf_longest;
if (!longest)
longest = pag->pagf_flcount > 0;
needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
if (pag->pagf_freeblks < needspace + ineed || longest < ineed)
return false;
return true;
}
static int
xfs_dialloc_try_ag(
struct xfs_trans **tpp,
struct xfs_perag *pag,
xfs_ino_t parent,
xfs_ino_t *new_ino,
bool ok_alloc)
{
struct xfs_buf *agbp;
xfs_ino_t ino;
int error;
/*
* Then read in the AGI buffer and recheck with the AGI buffer
* lock held.
*/
error = xfs_ialloc_read_agi(pag->pag_mount, *tpp, pag->pag_agno, &agbp);
if (error)
return error;
if (!pag->pagi_freecount) {
if (!ok_alloc) {
error = -EAGAIN;
goto out_release;
}
error = xfs_ialloc_ag_alloc(*tpp, agbp, pag);
if (error < 0)
goto out_release;
/*
* We successfully allocated space for an inode cluster in this
* AG. Roll the transaction so that we can allocate one of the
* new inodes.
*/
ASSERT(pag->pagi_freecount > 0);
error = xfs_dialloc_roll(tpp, agbp);
if (error)
goto out_release;
}
/* Allocate an inode in the found AG */
error = xfs_dialloc_ag(*tpp, agbp, pag, parent, &ino);
if (!error)
*new_ino = ino;
return error;
out_release:
xfs_trans_brelse(*tpp, agbp);
return error;
}
/* /*
* Select and prepare an AG for inode allocation. * Allocate an on-disk inode.
* *
* Mode is used to tell whether the new inode is a directory and hence where to * Mode is used to tell whether the new inode is a directory and hence where to
* locate it. * locate it. The on-disk inode that is allocated will be returned in @new_ino
* * on success, otherwise an error will be set to indicate the failure (e.g.
* This function will ensure that the selected AG has free inodes available to * -ENOSPC).
* allocate from. The selected AGI will be returned locked to the caller, and it
* will allocate more free inodes if required. If no free inodes are found or
* can be allocated, -ENOSPC be returned.
*/ */
int int
xfs_dialloc( xfs_dialloc(
...@@ -1609,14 +1733,12 @@ xfs_dialloc( ...@@ -1609,14 +1733,12 @@ xfs_dialloc(
xfs_ino_t *new_ino) xfs_ino_t *new_ino)
{ {
struct xfs_mount *mp = (*tpp)->t_mountp; struct xfs_mount *mp = (*tpp)->t_mountp;
struct xfs_buf *agbp;
xfs_agnumber_t agno; xfs_agnumber_t agno;
int error = 0; int error = 0;
xfs_agnumber_t start_agno; xfs_agnumber_t start_agno;
struct xfs_perag *pag; struct xfs_perag *pag;
struct xfs_ino_geometry *igeo = M_IGEO(mp); struct xfs_ino_geometry *igeo = M_IGEO(mp);
bool okalloc = true; bool ok_alloc = true;
int needspace;
int flags; int flags;
xfs_ino_t ino; xfs_ino_t ino;
...@@ -1625,7 +1747,6 @@ xfs_dialloc( ...@@ -1625,7 +1747,6 @@ xfs_dialloc(
* one block, so factor that potential expansion when we examine whether * one block, so factor that potential expansion when we examine whether
* an AG has enough space for file creation. * an AG has enough space for file creation.
*/ */
needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
if (S_ISDIR(mode)) if (S_ISDIR(mode))
start_agno = xfs_ialloc_next_ag(mp); start_agno = xfs_ialloc_next_ag(mp);
else { else {
...@@ -1636,7 +1757,7 @@ xfs_dialloc( ...@@ -1636,7 +1757,7 @@ xfs_dialloc(
/* /*
* If we have already hit the ceiling of inode blocks then clear * If we have already hit the ceiling of inode blocks then clear
* okalloc so we scan all available agi structures for a free * ok_alloc so we scan all available agi structures for a free
* inode. * inode.
* *
* Read rough value of mp->m_icount by percpu_counter_read_positive, * Read rough value of mp->m_icount by percpu_counter_read_positive,
...@@ -1645,7 +1766,7 @@ xfs_dialloc( ...@@ -1645,7 +1766,7 @@ xfs_dialloc(
if (igeo->maxicount && if (igeo->maxicount &&
percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos
> igeo->maxicount) { > igeo->maxicount) {
okalloc = false; ok_alloc = false;
} }
/* /*
...@@ -1656,96 +1777,14 @@ xfs_dialloc( ...@@ -1656,96 +1777,14 @@ xfs_dialloc(
agno = start_agno; agno = start_agno;
flags = XFS_ALLOC_FLAG_TRYLOCK; flags = XFS_ALLOC_FLAG_TRYLOCK;
for (;;) { for (;;) {
xfs_extlen_t ineed;
xfs_extlen_t longest = 0;
pag = xfs_perag_get(mp, agno); pag = xfs_perag_get(mp, agno);
if (!pag->pagi_inodeok) if (xfs_dialloc_good_ag(*tpp, pag, mode, flags, ok_alloc)) {
goto nextag; error = xfs_dialloc_try_ag(tpp, pag, parent,
&ino, ok_alloc);
if (!pag->pagi_init) { if (error != -EAGAIN)
error = xfs_ialloc_pagi_init(mp, *tpp, agno);
if (error)
break;
}
if (!pag->pagi_freecount && !okalloc)
goto nextag;
if (!pag->pagf_init) {
error = xfs_alloc_pagf_init(mp, *tpp, agno, flags);
if (error)
goto nextag;
}
/*
* Check that there is enough free space for the file plus a
* chunk of inodes if we need to allocate some. If this is the
* first pass across the AGs, take into account the potential
* space needed for alignment of inode chunks when checking the
* longest contiguous free space in the AG - this prevents us
* from getting ENOSPC because we have free space larger than
* ialloc_blks but alignment constraints prevent us from using
* it.
*
* If we can't find an AG with space for full alignment slack to
* be taken into account, we must be near ENOSPC in all AGs.
* Hence we don't include alignment for the second pass and so
* if we fail allocation due to alignment issues then it is most
* likely a real ENOSPC condition.
*/
if (!pag->pagi_freecount) {
ineed = M_IGEO(mp)->ialloc_min_blks;
if (flags && ineed > 1)
ineed += M_IGEO(mp)->cluster_align;
longest = pag->pagf_longest;
if (!longest)
longest = pag->pagf_flcount > 0;
if (pag->pagf_freeblks < needspace + ineed ||
longest < ineed)
goto nextag;
}
/*
* Then read in the AGI buffer and recheck with the AGI buffer
* lock held.
*/
error = xfs_ialloc_read_agi(mp, *tpp, agno, &agbp);
if (error)
break;
if (pag->pagi_freecount)
goto found_ag;
if (!okalloc)
goto nextag_relse_buffer;
error = xfs_ialloc_ag_alloc(*tpp, agbp, pag);
if (error < 0) {
xfs_trans_brelse(*tpp, agbp);
break; break;
} }
if (error == 0) {
/*
* We successfully allocated space for an inode cluster
* in this AG. Roll the transaction so that we can
* allocate one of the new inodes.
*/
ASSERT(pag->pagi_freecount > 0);
error = xfs_dialloc_roll(tpp, agbp);
if (error) {
xfs_buf_relse(agbp);
break;
}
goto found_ag;
}
nextag_relse_buffer:
xfs_trans_brelse(*tpp, agbp);
nextag:
if (XFS_FORCED_SHUTDOWN(mp)) { if (XFS_FORCED_SHUTDOWN(mp)) {
error = -EFSCORRUPTED; error = -EFSCORRUPTED;
break; break;
...@@ -1753,23 +1792,19 @@ xfs_dialloc( ...@@ -1753,23 +1792,19 @@ xfs_dialloc(
if (++agno == mp->m_maxagi) if (++agno == mp->m_maxagi)
agno = 0; agno = 0;
if (agno == start_agno) { if (agno == start_agno) {
if (!flags) if (!flags) {
error = -ENOSPC;
break; break;
}
flags = 0; flags = 0;
} }
xfs_perag_put(pag); xfs_perag_put(pag);
} }
if (!error)
*new_ino = ino;
xfs_perag_put(pag); xfs_perag_put(pag);
return error ? error : -ENOSPC;
found_ag:
/* Allocate an inode in the found AG */
error = xfs_dialloc_ag(*tpp, agbp, pag, parent, &ino);
xfs_perag_put(pag);
if (error)
return error; return error;
*new_ino = ino;
return 0;
} }
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment