Commit 408fd484 authored by Brian Foster's avatar Brian Foster Committed by Dave Chinner

xfs: refactor xfs_reserve_blocks() to handle ENOSPC correctly

xfs_reserve_blocks() is responsible to update the XFS reserved block
pool count at mount time or based on user request. When the caller
requests to increase the reserve pool, blocks must be allocated from
the global counters such that they are no longer available for
general purpose use. If the requested reserve pool size is too
large, XFS reserves what blocks are available. The implementation
requires looking at the percpu counters and making an educated guess
as to how many blocks to try and allocate from xfs_mod_fdblocks(),
which can return -ENOSPC if the guess was not accurate due to
counters being modified in parallel.

xfs_reserve_blocks() retries the guess in this scenario until the
allocation succeeds or it is determined that there is no space
available in the fs. While not easily reproducible in the current
form, the retry code doesn't actually work correctly if
xfs_mod_fdblocks() actually fails. The problem is that the percpu
calculations use the m_resblks counter to determine how many blocks
to allocate, but unconditionally update m_resblks before the block
allocation has actually succeeded.  Therefore, if xfs_mod_fdblocks()
fails, the code jumps to the retry label and uses the already
updated m_resblks value to determine how many blocks to try and
allocate. If the percpu counters previously suggested that the
entire request was available, fdblocks_delta could end up set to 0.
In that case, m_resblks is updated to the requested value, yet no
blocks have been reserved at all.

Refactor xfs_reserve_blocks() to use an explicit loop and make the
code easier to follow. Since we have to drop the spinlock across the
xfs_mod_fdblocks() call, use a delta value for m_resblks as well and
only apply the delta once allocation succeeds.

[dchinner: convert to do {} while() loop]
Signed-off-by: default avatarBrian Foster <bfoster@redhat.com>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
Signed-off-by: default avatarDave Chinner <david@fromorbit.com>
parent fa5a4f57
...@@ -667,8 +667,11 @@ xfs_reserve_blocks( ...@@ -667,8 +667,11 @@ xfs_reserve_blocks(
__uint64_t *inval, __uint64_t *inval,
xfs_fsop_resblks_t *outval) xfs_fsop_resblks_t *outval)
{ {
__int64_t lcounter, delta, fdblks_delta; __int64_t lcounter, delta;
__int64_t fdblks_delta = 0;
__uint64_t request; __uint64_t request;
__int64_t free;
int error = 0;
/* If inval is null, report current values and return */ /* If inval is null, report current values and return */
if (inval == (__uint64_t *)NULL) { if (inval == (__uint64_t *)NULL) {
...@@ -682,24 +685,23 @@ xfs_reserve_blocks( ...@@ -682,24 +685,23 @@ xfs_reserve_blocks(
request = *inval; request = *inval;
/* /*
* With per-cpu counters, this becomes an interesting * With per-cpu counters, this becomes an interesting problem. we need
* problem. we needto work out if we are freeing or allocation * to work out if we are freeing or allocation blocks first, then we can
* blocks first, then we can do the modification as necessary. * do the modification as necessary.
* *
* We do this under the m_sb_lock so that if we are near * We do this under the m_sb_lock so that if we are near ENOSPC, we will
* ENOSPC, we will hold out any changes while we work out * hold out any changes while we work out what to do. This means that
* what to do. This means that the amount of free space can * the amount of free space can change while we do this, so we need to
* change while we do this, so we need to retry if we end up * retry if we end up trying to reserve more space than is available.
* trying to reserve more space than is available.
*/ */
retry:
spin_lock(&mp->m_sb_lock); spin_lock(&mp->m_sb_lock);
/* /*
* If our previous reservation was larger than the current value, * If our previous reservation was larger than the current value,
* then move any unused blocks back to the free pool. * then move any unused blocks back to the free pool. Modify the resblks
* counters directly since we shouldn't have any problems unreserving
* space.
*/ */
fdblks_delta = 0;
if (mp->m_resblks > request) { if (mp->m_resblks > request) {
lcounter = mp->m_resblks_avail - request; lcounter = mp->m_resblks_avail - request;
if (lcounter > 0) { /* release unused blocks */ if (lcounter > 0) { /* release unused blocks */
...@@ -707,54 +709,67 @@ xfs_reserve_blocks( ...@@ -707,54 +709,67 @@ xfs_reserve_blocks(
mp->m_resblks_avail -= lcounter; mp->m_resblks_avail -= lcounter;
} }
mp->m_resblks = request; mp->m_resblks = request;
} else { if (fdblks_delta) {
__int64_t free; spin_unlock(&mp->m_sb_lock);
error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
spin_lock(&mp->m_sb_lock);
}
goto out;
}
/*
* If the request is larger than the current reservation, reserve the
* blocks before we update the reserve counters. Sample m_fdblocks and
* perform a partial reservation if the request exceeds free space.
*/
error = -ENOSPC;
do {
free = percpu_counter_sum(&mp->m_fdblocks) - free = percpu_counter_sum(&mp->m_fdblocks) -
XFS_ALLOC_SET_ASIDE(mp); XFS_ALLOC_SET_ASIDE(mp);
if (!free) if (!free)
goto out; /* ENOSPC and fdblks_delta = 0 */ break;
delta = request - mp->m_resblks; delta = request - mp->m_resblks;
lcounter = free - delta; lcounter = free - delta;
if (lcounter < 0) { if (lcounter < 0)
/* We can't satisfy the request, just get what we can */ /* We can't satisfy the request, just get what we can */
mp->m_resblks += free; fdblks_delta = free;
mp->m_resblks_avail += free; else
fdblks_delta = -free; fdblks_delta = delta;
} else {
fdblks_delta = -delta;
mp->m_resblks = request;
mp->m_resblks_avail += delta;
}
}
out:
if (outval) {
outval->resblks = mp->m_resblks;
outval->resblks_avail = mp->m_resblks_avail;
}
spin_unlock(&mp->m_sb_lock);
if (fdblks_delta) {
/* /*
* If we are putting blocks back here, m_resblks_avail is * We'll either succeed in getting space from the free block
* already at its max so this will put it in the free pool. * count or we'll get an ENOSPC. If we get a ENOSPC, it means
* * things changed while we were calculating fdblks_delta and so
* If we need space, we'll either succeed in getting it * we should try again to see if there is anything left to
* from the free block count or we'll get an enospc. If * reserve.
* we get a ENOSPC, it means things changed while we were
* calculating fdblks_delta and so we should try again to
* see if there is anything left to reserve.
* *
* Don't set the reserved flag here - we don't want to reserve * Don't set the reserved flag here - we don't want to reserve
* the extra reserve blocks from the reserve..... * the extra reserve blocks from the reserve.....
*/ */
int error; spin_unlock(&mp->m_sb_lock);
error = xfs_mod_fdblocks(mp, fdblks_delta, 0); error = xfs_mod_fdblocks(mp, -fdblks_delta, 0);
if (error == -ENOSPC) spin_lock(&mp->m_sb_lock);
goto retry; } while (error == -ENOSPC);
/*
* Update the reserve counters if blocks have been successfully
* allocated.
*/
if (!error && fdblks_delta) {
mp->m_resblks += fdblks_delta;
mp->m_resblks_avail += fdblks_delta;
} }
return 0;
out:
if (outval) {
outval->resblks = mp->m_resblks;
outval->resblks_avail = mp->m_resblks_avail;
}
spin_unlock(&mp->m_sb_lock);
return error;
} }
int int
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment