Commit 79ad5761 authored by Dave Chinner's avatar Dave Chinner

Merge branch 'xfs-4.9-reflink-prep' into for-next

parents b036b970 3fd129b6
...@@ -52,6 +52,7 @@ xfs-y += $(addprefix libxfs/, \ ...@@ -52,6 +52,7 @@ xfs-y += $(addprefix libxfs/, \
xfs_inode_fork.o \ xfs_inode_fork.o \
xfs_inode_buf.o \ xfs_inode_buf.o \
xfs_log_rlimit.o \ xfs_log_rlimit.o \
xfs_ag_resv.o \
xfs_rmap.o \ xfs_rmap.o \
xfs_rmap_btree.o \ xfs_rmap_btree.o \
xfs_sb.o \ xfs_sb.o \
......
/*
* Copyright (C) 2016 Oracle. All Rights Reserved.
*
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_cksum.h"
#include "xfs_trans.h"
#include "xfs_bit.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_ag_resv.h"
#include "xfs_trans_space.h"
#include "xfs_rmap_btree.h"
#include "xfs_btree.h"
/*
* Per-AG Block Reservations
*
* For some kinds of allocation group metadata structures, it is advantageous
* to reserve a small number of blocks in each AG so that future expansions of
* that data structure do not encounter ENOSPC because errors during a btree
* split cause the filesystem to go offline.
*
* Prior to the introduction of reflink, this wasn't an issue because the free
* space btrees maintain a reserve of space (the AGFL) to handle any expansion
* that may be necessary; and allocations of other metadata (inodes, BMBT,
* dir/attr) aren't restricted to a single AG. However, with reflink it is
* possible to allocate all the space in an AG, have subsequent reflink/CoW
* activity expand the refcount btree, and discover that there's no space left
* to handle that expansion. Since we can calculate the maximum size of the
* refcount btree, we can reserve space for it and avoid ENOSPC.
*
* Handling per-AG reservations consists of three changes to the allocator's
* behavior: First, because these reservations are always needed, we decrease
* the ag_max_usable counter to reflect the size of the AG after the reserved
* blocks are taken. Second, the reservations must be reflected in the
* fdblocks count to maintain proper accounting. Third, each AG must maintain
* its own reserved block counter so that we can calculate the amount of space
* that must remain free to maintain the reservations. Fourth, the "remaining
* reserved blocks" count must be used when calculating the length of the
* longest free extent in an AG and to clamp maxlen in the per-AG allocation
* functions. In other words, we maintain a virtual allocation via in-core
* accounting tricks so that we don't have to clean up after a crash. :)
*
* Reserved blocks can be managed by passing one of the enum xfs_ag_resv_type
* values via struct xfs_alloc_arg or directly to the xfs_free_extent
* function. It might seem a little funny to maintain a reservoir of blocks
* to feed another reservoir, but the AGFL only holds enough blocks to get
* through the next transaction. The per-AG reservation is to ensure (we
* hope) that each AG never runs out of blocks. Each data structure wanting
* to use the reservation system should update ask/used in xfs_ag_resv_init.
*/
/*
* Are we critically low on blocks? For now we'll define that as the number
* of blocks we can get our hands on being less than 10% of what we reserved
* or less than some arbitrary number (maximum btree height).
*/
bool
xfs_ag_resv_critical(
struct xfs_perag *pag,
enum xfs_ag_resv_type type)
{
xfs_extlen_t avail;
xfs_extlen_t orig;
switch (type) {
case XFS_AG_RESV_METADATA:
avail = pag->pagf_freeblks - pag->pag_agfl_resv.ar_reserved;
orig = pag->pag_meta_resv.ar_asked;
break;
case XFS_AG_RESV_AGFL:
avail = pag->pagf_freeblks + pag->pagf_flcount -
pag->pag_meta_resv.ar_reserved;
orig = pag->pag_agfl_resv.ar_asked;
break;
default:
ASSERT(0);
return false;
}
trace_xfs_ag_resv_critical(pag, type, avail);
/* Critically low if less than 10% or max btree height remains. */
return avail < orig / 10 || avail < XFS_BTREE_MAXLEVELS;
}
/*
* How many blocks are reserved but not used, and therefore must not be
* allocated away?
*/
xfs_extlen_t
xfs_ag_resv_needed(
struct xfs_perag *pag,
enum xfs_ag_resv_type type)
{
xfs_extlen_t len;
len = pag->pag_meta_resv.ar_reserved + pag->pag_agfl_resv.ar_reserved;
switch (type) {
case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_AGFL:
len -= xfs_perag_resv(pag, type)->ar_reserved;
break;
case XFS_AG_RESV_NONE:
/* empty */
break;
default:
ASSERT(0);
}
trace_xfs_ag_resv_needed(pag, type, len);
return len;
}
/* Clean out a reservation */
static int
__xfs_ag_resv_free(
struct xfs_perag *pag,
enum xfs_ag_resv_type type)
{
struct xfs_ag_resv *resv;
xfs_extlen_t oldresv;
int error;
trace_xfs_ag_resv_free(pag, type, 0);
resv = xfs_perag_resv(pag, type);
pag->pag_mount->m_ag_max_usable += resv->ar_asked;
/*
* AGFL blocks are always considered "free", so whatever
* was reserved at mount time must be given back at umount.
*/
if (type == XFS_AG_RESV_AGFL)
oldresv = resv->ar_orig_reserved;
else
oldresv = resv->ar_reserved;
error = xfs_mod_fdblocks(pag->pag_mount, oldresv, true);
resv->ar_reserved = 0;
resv->ar_asked = 0;
if (error)
trace_xfs_ag_resv_free_error(pag->pag_mount, pag->pag_agno,
error, _RET_IP_);
return error;
}
/* Free a per-AG reservation. */
int
xfs_ag_resv_free(
struct xfs_perag *pag)
{
int error;
int err2;
error = __xfs_ag_resv_free(pag, XFS_AG_RESV_AGFL);
err2 = __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA);
if (err2 && !error)
error = err2;
return error;
}
static int
__xfs_ag_resv_init(
struct xfs_perag *pag,
enum xfs_ag_resv_type type,
xfs_extlen_t ask,
xfs_extlen_t used)
{
struct xfs_mount *mp = pag->pag_mount;
struct xfs_ag_resv *resv;
int error;
resv = xfs_perag_resv(pag, type);
if (used > ask)
ask = used;
resv->ar_asked = ask;
resv->ar_reserved = resv->ar_orig_reserved = ask - used;
mp->m_ag_max_usable -= ask;
trace_xfs_ag_resv_init(pag, type, ask);
error = xfs_mod_fdblocks(mp, -(int64_t)resv->ar_reserved, true);
if (error)
trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
error, _RET_IP_);
return error;
}
/* Create a per-AG block reservation. */
int
xfs_ag_resv_init(
struct xfs_perag *pag)
{
xfs_extlen_t ask;
xfs_extlen_t used;
int error = 0;
/* Create the metadata reservation. */
if (pag->pag_meta_resv.ar_asked == 0) {
ask = used = 0;
error = __xfs_ag_resv_init(pag, XFS_AG_RESV_METADATA,
ask, used);
if (error)
goto out;
}
/* Create the AGFL metadata reservation */
if (pag->pag_agfl_resv.ar_asked == 0) {
ask = used = 0;
error = __xfs_ag_resv_init(pag, XFS_AG_RESV_AGFL, ask, used);
if (error)
goto out;
}
out:
return error;
}
/* Allocate a block from the reservation. */
void
xfs_ag_resv_alloc_extent(
struct xfs_perag *pag,
enum xfs_ag_resv_type type,
struct xfs_alloc_arg *args)
{
struct xfs_ag_resv *resv;
xfs_extlen_t len;
uint field;
trace_xfs_ag_resv_alloc_extent(pag, type, args->len);
switch (type) {
case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_AGFL:
resv = xfs_perag_resv(pag, type);
break;
default:
ASSERT(0);
/* fall through */
case XFS_AG_RESV_NONE:
field = args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
XFS_TRANS_SB_FDBLOCKS;
xfs_trans_mod_sb(args->tp, field, -(int64_t)args->len);
return;
}
len = min_t(xfs_extlen_t, args->len, resv->ar_reserved);
resv->ar_reserved -= len;
if (type == XFS_AG_RESV_AGFL)
return;
/* Allocations of reserved blocks only need on-disk sb updates... */
xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len);
/* ...but non-reserved blocks need in-core and on-disk updates. */
if (args->len > len)
xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_FDBLOCKS,
-((int64_t)args->len - len));
}
/* Free a block to the reservation. */
void
xfs_ag_resv_free_extent(
struct xfs_perag *pag,
enum xfs_ag_resv_type type,
struct xfs_trans *tp,
xfs_extlen_t len)
{
xfs_extlen_t leftover;
struct xfs_ag_resv *resv;
trace_xfs_ag_resv_free_extent(pag, type, len);
switch (type) {
case XFS_AG_RESV_METADATA:
case XFS_AG_RESV_AGFL:
resv = xfs_perag_resv(pag, type);
break;
default:
ASSERT(0);
/* fall through */
case XFS_AG_RESV_NONE:
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len);
return;
}
leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved);
resv->ar_reserved += leftover;
if (type == XFS_AG_RESV_AGFL)
return;
/* Freeing into the reserved pool only requires on-disk update... */
xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len);
/* ...but freeing beyond that requires in-core and on-disk update. */
if (len > leftover)
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len - leftover);
}
/*
* Copyright (C) 2016 Oracle. All Rights Reserved.
*
* Author: Darrick J. Wong <darrick.wong@oracle.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __XFS_AG_RESV_H__
#define __XFS_AG_RESV_H__
int xfs_ag_resv_free(struct xfs_perag *pag);
int xfs_ag_resv_init(struct xfs_perag *pag);
bool xfs_ag_resv_critical(struct xfs_perag *pag, enum xfs_ag_resv_type type);
xfs_extlen_t xfs_ag_resv_needed(struct xfs_perag *pag,
enum xfs_ag_resv_type type);
void xfs_ag_resv_alloc_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type,
struct xfs_alloc_arg *args);
void xfs_ag_resv_free_extent(struct xfs_perag *pag, enum xfs_ag_resv_type type,
struct xfs_trans *tp, xfs_extlen_t len);
#endif /* __XFS_AG_RESV_H__ */
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include "xfs_trans.h" #include "xfs_trans.h"
#include "xfs_buf_item.h" #include "xfs_buf_item.h"
#include "xfs_log.h" #include "xfs_log.h"
#include "xfs_ag_resv.h"
struct workqueue_struct *xfs_alloc_wq; struct workqueue_struct *xfs_alloc_wq;
...@@ -74,14 +75,8 @@ xfs_prealloc_blocks( ...@@ -74,14 +75,8 @@ xfs_prealloc_blocks(
* extents need to be actually allocated. To get around this, we explicitly set * extents need to be actually allocated. To get around this, we explicitly set
* aside a few blocks which will not be reserved in delayed allocation. * aside a few blocks which will not be reserved in delayed allocation.
* *
* When rmap is disabled, we need to reserve 4 fsbs _per AG_ for the freelist * We need to reserve 4 fsbs _per AG_ for the freelist and 4 more to handle a
* and 4 more to handle a potential split of the file's bmap btree. * potential split of the file's bmap btree.
*
* When rmap is enabled, we must also be able to handle two rmap btree inserts
* to record both the file data extent and a new bmbt block. The bmbt block
* might not be in the same AG as the file data extent. In the worst case
* the bmap btree splits multiple levels and all the new blocks come from
* different AGs, so set aside enough to handle rmap btree splits in all AGs.
*/ */
unsigned int unsigned int
xfs_alloc_set_aside( xfs_alloc_set_aside(
...@@ -90,8 +85,6 @@ xfs_alloc_set_aside( ...@@ -90,8 +85,6 @@ xfs_alloc_set_aside(
unsigned int blocks; unsigned int blocks;
blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE); blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE);
if (xfs_sb_version_hasrmapbt(&mp->m_sb))
blocks += mp->m_sb.sb_agcount * mp->m_rmap_maxlevels;
return blocks; return blocks;
} }
...@@ -680,12 +673,29 @@ xfs_alloc_ag_vextent( ...@@ -680,12 +673,29 @@ xfs_alloc_ag_vextent(
xfs_alloc_arg_t *args) /* argument structure for allocation */ xfs_alloc_arg_t *args) /* argument structure for allocation */
{ {
int error=0; int error=0;
xfs_extlen_t reservation;
xfs_extlen_t oldmax;
ASSERT(args->minlen > 0); ASSERT(args->minlen > 0);
ASSERT(args->maxlen > 0); ASSERT(args->maxlen > 0);
ASSERT(args->minlen <= args->maxlen); ASSERT(args->minlen <= args->maxlen);
ASSERT(args->mod < args->prod); ASSERT(args->mod < args->prod);
ASSERT(args->alignment > 0); ASSERT(args->alignment > 0);
/*
* Clamp maxlen to the amount of free space minus any reservations
* that have been made.
*/
oldmax = args->maxlen;
reservation = xfs_ag_resv_needed(args->pag, args->resv);
if (args->maxlen > args->pag->pagf_freeblks - reservation)
args->maxlen = args->pag->pagf_freeblks - reservation;
if (args->maxlen == 0) {
args->agbno = NULLAGBLOCK;
args->maxlen = oldmax;
return 0;
}
/* /*
* Branch to correct routine based on the type. * Branch to correct routine based on the type.
*/ */
...@@ -705,12 +715,14 @@ xfs_alloc_ag_vextent( ...@@ -705,12 +715,14 @@ xfs_alloc_ag_vextent(
/* NOTREACHED */ /* NOTREACHED */
} }
args->maxlen = oldmax;
if (error || args->agbno == NULLAGBLOCK) if (error || args->agbno == NULLAGBLOCK)
return error; return error;
ASSERT(args->len >= args->minlen); ASSERT(args->len >= args->minlen);
ASSERT(args->len <= args->maxlen); ASSERT(args->len <= args->maxlen);
ASSERT(!args->wasfromfl || !args->isfl); ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL);
ASSERT(args->agbno % args->alignment == 0); ASSERT(args->agbno % args->alignment == 0);
/* if not file data, insert new block into the reverse map btree */ /* if not file data, insert new block into the reverse map btree */
...@@ -732,12 +744,7 @@ xfs_alloc_ag_vextent( ...@@ -732,12 +744,7 @@ xfs_alloc_ag_vextent(
args->agbno, args->len)); args->agbno, args->len));
} }
if (!args->isfl) { xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
xfs_trans_mod_sb(args->tp, args->wasdel ?
XFS_TRANS_SB_RES_FDBLOCKS :
XFS_TRANS_SB_FDBLOCKS,
-((long)(args->len)));
}
XFS_STATS_INC(args->mp, xs_allocx); XFS_STATS_INC(args->mp, xs_allocx);
XFS_STATS_ADD(args->mp, xs_allocb, args->len); XFS_STATS_ADD(args->mp, xs_allocb, args->len);
...@@ -1583,6 +1590,7 @@ xfs_alloc_ag_vextent_small( ...@@ -1583,6 +1590,7 @@ xfs_alloc_ag_vextent_small(
int *stat) /* status: 0-freelist, 1-normal/none */ int *stat) /* status: 0-freelist, 1-normal/none */
{ {
struct xfs_owner_info oinfo; struct xfs_owner_info oinfo;
struct xfs_perag *pag;
int error; int error;
xfs_agblock_t fbno; xfs_agblock_t fbno;
xfs_extlen_t flen; xfs_extlen_t flen;
...@@ -1600,7 +1608,8 @@ xfs_alloc_ag_vextent_small( ...@@ -1600,7 +1608,8 @@ xfs_alloc_ag_vextent_small(
* to respect minleft even when pulling from the * to respect minleft even when pulling from the
* freelist. * freelist.
*/ */
else if (args->minlen == 1 && args->alignment == 1 && !args->isfl && else if (args->minlen == 1 && args->alignment == 1 &&
args->resv != XFS_AG_RESV_AGFL &&
(be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
> args->minleft)) { > args->minleft)) {
error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
...@@ -1629,13 +1638,18 @@ xfs_alloc_ag_vextent_small( ...@@ -1629,13 +1638,18 @@ xfs_alloc_ag_vextent_small(
/* /*
* If we're feeding an AGFL block to something that * If we're feeding an AGFL block to something that
* doesn't live in the free space, we need to clear * doesn't live in the free space, we need to clear
* out the OWN_AG rmap. * out the OWN_AG rmap and add the block back to
* the AGFL per-AG reservation.
*/ */
xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG);
error = xfs_rmap_free(args->tp, args->agbp, args->agno, error = xfs_rmap_free(args->tp, args->agbp, args->agno,
fbno, 1, &oinfo); fbno, 1, &oinfo);
if (error) if (error)
goto error0; goto error0;
pag = xfs_perag_get(args->mp, args->agno);
xfs_ag_resv_free_extent(pag, XFS_AG_RESV_AGFL,
args->tp, 1);
xfs_perag_put(pag);
*stat = 0; *stat = 0;
return 0; return 0;
...@@ -1683,7 +1697,7 @@ xfs_free_ag_extent( ...@@ -1683,7 +1697,7 @@ xfs_free_ag_extent(
xfs_agblock_t bno, xfs_agblock_t bno,
xfs_extlen_t len, xfs_extlen_t len,
struct xfs_owner_info *oinfo, struct xfs_owner_info *oinfo,
int isfl) enum xfs_ag_resv_type type)
{ {
xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
...@@ -1911,21 +1925,22 @@ xfs_free_ag_extent( ...@@ -1911,21 +1925,22 @@ xfs_free_ag_extent(
*/ */
pag = xfs_perag_get(mp, agno); pag = xfs_perag_get(mp, agno);
error = xfs_alloc_update_counters(tp, pag, agbp, len); error = xfs_alloc_update_counters(tp, pag, agbp, len);
xfs_ag_resv_free_extent(pag, type, tp, len);
xfs_perag_put(pag); xfs_perag_put(pag);
if (error) if (error)
goto error0; goto error0;
if (!isfl)
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
XFS_STATS_INC(mp, xs_freex); XFS_STATS_INC(mp, xs_freex);
XFS_STATS_ADD(mp, xs_freeb, len); XFS_STATS_ADD(mp, xs_freeb, len);
trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
haveleft, haveright);
return 0; return 0;
error0: error0:
trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1); trace_xfs_free_extent(mp, agno, bno, len, type == XFS_AG_RESV_AGFL,
-1, -1);
if (bno_cur) if (bno_cur)
xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
if (cnt_cur) if (cnt_cur)
...@@ -1950,21 +1965,43 @@ xfs_alloc_compute_maxlevels( ...@@ -1950,21 +1965,43 @@ xfs_alloc_compute_maxlevels(
} }
/* /*
* Find the length of the longest extent in an AG. * Find the length of the longest extent in an AG. The 'need' parameter
* specifies how much space we're going to need for the AGFL and the
* 'reserved' parameter tells us how many blocks in this AG are reserved for
* other callers.
*/ */
xfs_extlen_t xfs_extlen_t
xfs_alloc_longest_free_extent( xfs_alloc_longest_free_extent(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_perag *pag, struct xfs_perag *pag,
xfs_extlen_t need) xfs_extlen_t need,
xfs_extlen_t reserved)
{ {
xfs_extlen_t delta = 0; xfs_extlen_t delta = 0;
/*
* If the AGFL needs a recharge, we'll have to subtract that from the
* longest extent.
*/
if (need > pag->pagf_flcount) if (need > pag->pagf_flcount)
delta = need - pag->pagf_flcount; delta = need - pag->pagf_flcount;
/*
* If we cannot maintain others' reservations with space from the
* not-longest freesp extents, we'll have to subtract /that/ from
* the longest extent too.
*/
if (pag->pagf_freeblks - pag->pagf_longest < reserved)
delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
/*
* If the longest extent is long enough to satisfy all the
* reservations and AGFL rules in place, we can return this extent.
*/
if (pag->pagf_longest > delta) if (pag->pagf_longest > delta)
return pag->pagf_longest - delta; return pag->pagf_longest - delta;
/* Otherwise, let the caller try for 1 block if there's space. */
return pag->pagf_flcount > 0 || pag->pagf_longest > 0; return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
} }
...@@ -2004,20 +2041,24 @@ xfs_alloc_space_available( ...@@ -2004,20 +2041,24 @@ xfs_alloc_space_available(
{ {
struct xfs_perag *pag = args->pag; struct xfs_perag *pag = args->pag;
xfs_extlen_t longest; xfs_extlen_t longest;
xfs_extlen_t reservation; /* blocks that are still reserved */
int available; int available;
if (flags & XFS_ALLOC_FLAG_FREEING) if (flags & XFS_ALLOC_FLAG_FREEING)
return true; return true;
reservation = xfs_ag_resv_needed(pag, args->resv);
/* do we have enough contiguous free space for the allocation? */ /* do we have enough contiguous free space for the allocation? */
longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free); longest = xfs_alloc_longest_free_extent(args->mp, pag, min_free,
reservation);
if ((args->minlen + args->alignment + args->minalignslop - 1) > longest) if ((args->minlen + args->alignment + args->minalignslop - 1) > longest)
return false; return false;
/* do have enough free space remaining for the allocation? */ /* do we have enough free space remaining for the allocation? */
available = (int)(pag->pagf_freeblks + pag->pagf_flcount - available = (int)(pag->pagf_freeblks + pag->pagf_flcount -
min_free - args->total); reservation - min_free - args->total);
if (available < (int)args->minleft) if (available < (int)args->minleft || available <= 0)
return false; return false;
return true; return true;
...@@ -2124,7 +2165,7 @@ xfs_alloc_fix_freelist( ...@@ -2124,7 +2165,7 @@ xfs_alloc_fix_freelist(
if (error) if (error)
goto out_agbp_relse; goto out_agbp_relse;
error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1,
&targs.oinfo, 1); &targs.oinfo, XFS_AG_RESV_AGFL);
if (error) if (error)
goto out_agbp_relse; goto out_agbp_relse;
bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
...@@ -2135,7 +2176,7 @@ xfs_alloc_fix_freelist( ...@@ -2135,7 +2176,7 @@ xfs_alloc_fix_freelist(
targs.mp = mp; targs.mp = mp;
targs.agbp = agbp; targs.agbp = agbp;
targs.agno = args->agno; targs.agno = args->agno;
targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; targs.alignment = targs.minlen = targs.prod = 1;
targs.type = XFS_ALLOCTYPE_THIS_AG; targs.type = XFS_ALLOCTYPE_THIS_AG;
targs.pag = pag; targs.pag = pag;
error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp); error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp);
...@@ -2146,6 +2187,7 @@ xfs_alloc_fix_freelist( ...@@ -2146,6 +2187,7 @@ xfs_alloc_fix_freelist(
while (pag->pagf_flcount < need) { while (pag->pagf_flcount < need) {
targs.agbno = 0; targs.agbno = 0;
targs.maxlen = need - pag->pagf_flcount; targs.maxlen = need - pag->pagf_flcount;
targs.resv = XFS_AG_RESV_AGFL;
/* Allocate as many blocks as possible at once. */ /* Allocate as many blocks as possible at once. */
error = xfs_alloc_ag_vextent(&targs); error = xfs_alloc_ag_vextent(&targs);
...@@ -2825,7 +2867,8 @@ xfs_free_extent( ...@@ -2825,7 +2867,8 @@ xfs_free_extent(
struct xfs_trans *tp, /* transaction pointer */ struct xfs_trans *tp, /* transaction pointer */
xfs_fsblock_t bno, /* starting block number of extent */ xfs_fsblock_t bno, /* starting block number of extent */
xfs_extlen_t len, /* length of extent */ xfs_extlen_t len, /* length of extent */
struct xfs_owner_info *oinfo) /* extent owner */ struct xfs_owner_info *oinfo, /* extent owner */
enum xfs_ag_resv_type type) /* block reservation type */
{ {
struct xfs_mount *mp = tp->t_mountp; struct xfs_mount *mp = tp->t_mountp;
struct xfs_buf *agbp; struct xfs_buf *agbp;
...@@ -2834,6 +2877,7 @@ xfs_free_extent( ...@@ -2834,6 +2877,7 @@ xfs_free_extent(
int error; int error;
ASSERT(len != 0); ASSERT(len != 0);
ASSERT(type != XFS_AG_RESV_AGFL);
if (XFS_TEST_ERROR(false, mp, if (XFS_TEST_ERROR(false, mp,
XFS_ERRTAG_FREE_EXTENT, XFS_ERRTAG_FREE_EXTENT,
...@@ -2851,7 +2895,7 @@ xfs_free_extent( ...@@ -2851,7 +2895,7 @@ xfs_free_extent(
agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length), agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
err); err);
error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, 0); error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
if (error) if (error)
goto err; goto err;
......
...@@ -87,10 +87,10 @@ typedef struct xfs_alloc_arg { ...@@ -87,10 +87,10 @@ typedef struct xfs_alloc_arg {
xfs_alloctype_t otype; /* original allocation type */ xfs_alloctype_t otype; /* original allocation type */
char wasdel; /* set if allocation was prev delayed */ char wasdel; /* set if allocation was prev delayed */
char wasfromfl; /* set if allocation is from freelist */ char wasfromfl; /* set if allocation is from freelist */
char isfl; /* set if is freelist blocks - !acctg */
char userdata; /* mask defining userdata treatment */ char userdata; /* mask defining userdata treatment */
xfs_fsblock_t firstblock; /* io first block allocated */ xfs_fsblock_t firstblock; /* io first block allocated */
struct xfs_owner_info oinfo; /* owner of blocks being allocated */ struct xfs_owner_info oinfo; /* owner of blocks being allocated */
enum xfs_ag_resv_type resv; /* block reservation to use */
} xfs_alloc_arg_t; } xfs_alloc_arg_t;
/* /*
...@@ -106,7 +106,8 @@ unsigned int xfs_alloc_set_aside(struct xfs_mount *mp); ...@@ -106,7 +106,8 @@ unsigned int xfs_alloc_set_aside(struct xfs_mount *mp);
unsigned int xfs_alloc_ag_max_usable(struct xfs_mount *mp); unsigned int xfs_alloc_ag_max_usable(struct xfs_mount *mp);
xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp, xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
struct xfs_perag *pag, xfs_extlen_t need); struct xfs_perag *pag, xfs_extlen_t need,
xfs_extlen_t reserved);
unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp, unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp,
struct xfs_perag *pag); struct xfs_perag *pag);
...@@ -184,7 +185,8 @@ xfs_free_extent( ...@@ -184,7 +185,8 @@ xfs_free_extent(
struct xfs_trans *tp, /* transaction pointer */ struct xfs_trans *tp, /* transaction pointer */
xfs_fsblock_t bno, /* starting block number of extent */ xfs_fsblock_t bno, /* starting block number of extent */
xfs_extlen_t len, /* length of extent */ xfs_extlen_t len, /* length of extent */
struct xfs_owner_info *oinfo);/* extent owner */ struct xfs_owner_info *oinfo, /* extent owner */
enum xfs_ag_resv_type type); /* block reservation type */
int /* error */ int /* error */
xfs_alloc_lookup_ge( xfs_alloc_lookup_ge(
......
...@@ -47,6 +47,7 @@ ...@@ -47,6 +47,7 @@
#include "xfs_attr_leaf.h" #include "xfs_attr_leaf.h"
#include "xfs_filestream.h" #include "xfs_filestream.h"
#include "xfs_rmap.h" #include "xfs_rmap.h"
#include "xfs_ag_resv.h"
kmem_zone_t *xfs_bmap_free_item_zone; kmem_zone_t *xfs_bmap_free_item_zone;
...@@ -3501,7 +3502,8 @@ xfs_bmap_longest_free_extent( ...@@ -3501,7 +3502,8 @@ xfs_bmap_longest_free_extent(
} }
longest = xfs_alloc_longest_free_extent(mp, pag, longest = xfs_alloc_longest_free_extent(mp, pag,
xfs_alloc_min_freelist(mp, pag)); xfs_alloc_min_freelist(mp, pag),
xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
if (*blen < longest) if (*blen < longest)
*blen = longest; *blen = longest;
...@@ -3781,7 +3783,7 @@ xfs_bmap_btalloc( ...@@ -3781,7 +3783,7 @@ xfs_bmap_btalloc(
} }
args.minleft = ap->minleft; args.minleft = ap->minleft;
args.wasdel = ap->wasdel; args.wasdel = ap->wasdel;
args.isfl = 0; args.resv = XFS_AG_RESV_NONE;
args.userdata = ap->userdata; args.userdata = ap->userdata;
if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) if (ap->userdata & XFS_ALLOC_USERDATA_ZERO)
args.ip = ap->ip; args.ip = ap->ip;
......
...@@ -2070,7 +2070,7 @@ __xfs_btree_updkeys( ...@@ -2070,7 +2070,7 @@ __xfs_btree_updkeys(
struct xfs_buf *bp0, struct xfs_buf *bp0,
bool force_all) bool force_all)
{ {
union xfs_btree_bigkey key; /* keys from current level */ union xfs_btree_key key; /* keys from current level */
union xfs_btree_key *lkey; /* keys from the next level up */ union xfs_btree_key *lkey; /* keys from the next level up */
union xfs_btree_key *hkey; union xfs_btree_key *hkey;
union xfs_btree_key *nlkey; /* keys from the next level up */ union xfs_btree_key *nlkey; /* keys from the next level up */
...@@ -2086,7 +2086,7 @@ __xfs_btree_updkeys( ...@@ -2086,7 +2086,7 @@ __xfs_btree_updkeys(
trace_xfs_btree_updkeys(cur, level, bp0); trace_xfs_btree_updkeys(cur, level, bp0);
lkey = (union xfs_btree_key *)&key; lkey = &key;
hkey = xfs_btree_high_key_from_key(cur, lkey); hkey = xfs_btree_high_key_from_key(cur, lkey);
xfs_btree_get_keys(cur, block, lkey); xfs_btree_get_keys(cur, block, lkey);
for (level++; level < cur->bc_nlevels; level++) { for (level++; level < cur->bc_nlevels; level++) {
...@@ -3226,7 +3226,7 @@ xfs_btree_insrec( ...@@ -3226,7 +3226,7 @@ xfs_btree_insrec(
struct xfs_buf *bp; /* buffer for block */ struct xfs_buf *bp; /* buffer for block */
union xfs_btree_ptr nptr; /* new block ptr */ union xfs_btree_ptr nptr; /* new block ptr */
struct xfs_btree_cur *ncur; /* new btree cursor */ struct xfs_btree_cur *ncur; /* new btree cursor */
union xfs_btree_bigkey nkey; /* new block key */ union xfs_btree_key nkey; /* new block key */
union xfs_btree_key *lkey; union xfs_btree_key *lkey;
int optr; /* old key/record index */ int optr; /* old key/record index */
int ptr; /* key/record index */ int ptr; /* key/record index */
...@@ -3241,7 +3241,7 @@ xfs_btree_insrec( ...@@ -3241,7 +3241,7 @@ xfs_btree_insrec(
XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec); XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, &rec);
ncur = NULL; ncur = NULL;
lkey = (union xfs_btree_key *)&nkey; lkey = &nkey;
/* /*
* If we have an external root pointer, and we've made it to the * If we have an external root pointer, and we've made it to the
...@@ -3444,14 +3444,14 @@ xfs_btree_insert( ...@@ -3444,14 +3444,14 @@ xfs_btree_insert(
union xfs_btree_ptr nptr; /* new block number (split result) */ union xfs_btree_ptr nptr; /* new block number (split result) */
struct xfs_btree_cur *ncur; /* new cursor (split result) */ struct xfs_btree_cur *ncur; /* new cursor (split result) */
struct xfs_btree_cur *pcur; /* previous level's cursor */ struct xfs_btree_cur *pcur; /* previous level's cursor */
union xfs_btree_bigkey bkey; /* key of block to insert */ union xfs_btree_key bkey; /* key of block to insert */
union xfs_btree_key *key; union xfs_btree_key *key;
union xfs_btree_rec rec; /* record to insert */ union xfs_btree_rec rec; /* record to insert */
level = 0; level = 0;
ncur = NULL; ncur = NULL;
pcur = cur; pcur = cur;
key = (union xfs_btree_key *)&bkey; key = &bkey;
xfs_btree_set_ptr_null(cur, &nptr); xfs_btree_set_ptr_null(cur, &nptr);
...@@ -4797,3 +4797,50 @@ xfs_btree_query_range( ...@@ -4797,3 +4797,50 @@ xfs_btree_query_range(
return xfs_btree_overlapped_query_range(cur, &low_key, &high_key, return xfs_btree_overlapped_query_range(cur, &low_key, &high_key,
fn, priv); fn, priv);
} }
/*
* Calculate the number of blocks needed to store a given number of records
* in a short-format (per-AG metadata) btree.
*/
xfs_extlen_t
xfs_btree_calc_size(
struct xfs_mount *mp,
uint *limits,
unsigned long long len)
{
int level;
int maxrecs;
xfs_extlen_t rval;
maxrecs = limits[0];
for (level = 0, rval = 0; len > 1; level++) {
len += maxrecs - 1;
do_div(len, maxrecs);
maxrecs = limits[1];
rval += len;
}
return rval;
}
int
xfs_btree_count_blocks_helper(
struct xfs_btree_cur *cur,
int level,
void *data)
{
xfs_extlen_t *blocks = data;
(*blocks)++;
return 0;
}
/* Count the blocks in a btree and return the result in *blocks. */
int
xfs_btree_count_blocks(
struct xfs_btree_cur *cur,
xfs_extlen_t *blocks)
{
*blocks = 0;
return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
blocks);
}
...@@ -37,30 +37,18 @@ union xfs_btree_ptr { ...@@ -37,30 +37,18 @@ union xfs_btree_ptr {
__be64 l; /* long form ptr */ __be64 l; /* long form ptr */
}; };
union xfs_btree_key {
struct xfs_bmbt_key bmbt;
xfs_bmdr_key_t bmbr; /* bmbt root block */
xfs_alloc_key_t alloc;
struct xfs_inobt_key inobt;
struct xfs_rmap_key rmap;
};
/* /*
* In-core key that holds both low and high keys for overlapped btrees. * The in-core btree key. Overlapping btrees actually store two keys
* The two keys are packed next to each other on disk, so do the same * per pointer, so we reserve enough memory to hold both. The __*bigkey
* in memory. Preserve the existing xfs_btree_key as a single key to * items should never be accessed directly.
* avoid the mental model breakage that would happen if we passed a
* bigkey into a function that operates on a single key.
*/ */
union xfs_btree_bigkey { union xfs_btree_key {
struct xfs_bmbt_key bmbt; struct xfs_bmbt_key bmbt;
xfs_bmdr_key_t bmbr; /* bmbt root block */ xfs_bmdr_key_t bmbr; /* bmbt root block */
xfs_alloc_key_t alloc; xfs_alloc_key_t alloc;
struct xfs_inobt_key inobt; struct xfs_inobt_key inobt;
struct {
struct xfs_rmap_key rmap; struct xfs_rmap_key rmap;
struct xfs_rmap_key rmap_hi; struct xfs_rmap_key __rmap_bigkey[2];
};
}; };
union xfs_btree_rec { union xfs_btree_rec {
...@@ -513,6 +501,8 @@ bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); ...@@ -513,6 +501,8 @@ bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs); bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs);
uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits, uint xfs_btree_compute_maxlevels(struct xfs_mount *mp, uint *limits,
unsigned long len); unsigned long len);
xfs_extlen_t xfs_btree_calc_size(struct xfs_mount *mp, uint *limits,
unsigned long long len);
/* return codes */ /* return codes */
#define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */ #define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */
...@@ -529,4 +519,6 @@ typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level, ...@@ -529,4 +519,6 @@ typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level,
int xfs_btree_visit_blocks(struct xfs_btree_cur *cur, int xfs_btree_visit_blocks(struct xfs_btree_cur *cur,
xfs_btree_visit_blocks_fn fn, void *data); xfs_btree_visit_blocks_fn fn, void *data);
int xfs_btree_count_blocks(struct xfs_btree_cur *cur, xfs_extlen_t *blocks);
#endif /* __XFS_BTREE_H__ */ #endif /* __XFS_BTREE_H__ */
...@@ -81,6 +81,10 @@ ...@@ -81,6 +81,10 @@
* - For each work item attached to the log intent item, * - For each work item attached to the log intent item,
* * Perform the described action. * * Perform the described action.
* * Attach the work item to the log done item. * * Attach the work item to the log done item.
* * If the result of doing the work was -EAGAIN, ->finish work
* wants a new transaction. See the "Requesting a Fresh
* Transaction while Finishing Deferred Work" section below for
* details.
* *
* The key here is that we must log an intent item for all pending * The key here is that we must log an intent item for all pending
* work items every time we roll the transaction, and that we must log * work items every time we roll the transaction, and that we must log
...@@ -88,6 +92,34 @@ ...@@ -88,6 +92,34 @@
* we can perform complex remapping operations, chaining intent items * we can perform complex remapping operations, chaining intent items
* as needed. * as needed.
* *
* Requesting a Fresh Transaction while Finishing Deferred Work
*
* If ->finish_item decides that it needs a fresh transaction to
* finish the work, it must ask its caller (xfs_defer_finish) for a
* continuation. The most likely cause of this circumstance are the
* refcount adjust functions deciding that they've logged enough items
* to be at risk of exceeding the transaction reservation.
*
* To get a fresh transaction, we want to log the existing log done
* item to prevent the log intent item from replaying, immediately log
* a new log intent item with the unfinished work items, roll the
* transaction, and re-call ->finish_item wherever it left off. The
* log done item and the new log intent item must be in the same
* transaction or atomicity cannot be guaranteed; defer_finish ensures
* that this happens.
*
* This requires some coordination between ->finish_item and
* defer_finish. Upon deciding to request a new transaction,
* ->finish_item should update the current work item to reflect the
* unfinished work. Next, it should reset the log done item's list
* count to the number of items finished, and return -EAGAIN.
* defer_finish sees the -EAGAIN, logs the new log intent item
* with the remaining work items, and leaves the xfs_defer_pending
* item at the head of the dop_work queue. Then it rolls the
* transaction and picks up processing where it left off. It is
* required that ->finish_item must be careful to leave enough
* transaction reservation to fit the new log intent item.
*
* This is an example of remapping the extent (E, E+B) into file X at * This is an example of remapping the extent (E, E+B) into file X at
* offset A and dealing with the extent (C, C+B) already being mapped * offset A and dealing with the extent (C, C+B) already being mapped
* there: * there:
...@@ -104,21 +136,26 @@ ...@@ -104,21 +136,26 @@
* | Intent to add rmap (X, E, A, B) | * | Intent to add rmap (X, E, A, B) |
* +-------------------------------------------------+ * +-------------------------------------------------+
* | Reduce refcount for extent (C, B) | t2 * | Reduce refcount for extent (C, B) | t2
* | Done reducing refcount for extent (C, B) | * | Done reducing refcount for extent (C, 9) |
* | Intent to reduce refcount for extent (C+9, B-9) |
* | (ran out of space after 9 refcount updates) |
* +-------------------------------------------------+
* | Reduce refcount for extent (C+9, B+9) | t3
* | Done reducing refcount for extent (C+9, B-9) |
* | Increase refcount for extent (E, B) | * | Increase refcount for extent (E, B) |
* | Done increasing refcount for extent (E, B) | * | Done increasing refcount for extent (E, B) |
* | Intent to free extent (C, B) | * | Intent to free extent (C, B) |
* | Intent to free extent (F, 1) (refcountbt block) | * | Intent to free extent (F, 1) (refcountbt block) |
* | Intent to remove rmap (F, 1, REFC) | * | Intent to remove rmap (F, 1, REFC) |
* +-------------------------------------------------+ * +-------------------------------------------------+
* | Remove rmap (X, C, A, B) | t3 * | Remove rmap (X, C, A, B) | t4
* | Done removing rmap (X, C, A, B) | * | Done removing rmap (X, C, A, B) |
* | Add rmap (X, E, A, B) | * | Add rmap (X, E, A, B) |
* | Done adding rmap (X, E, A, B) | * | Done adding rmap (X, E, A, B) |
* | Remove rmap (F, 1, REFC) | * | Remove rmap (F, 1, REFC) |
* | Done removing rmap (F, 1, REFC) | * | Done removing rmap (F, 1, REFC) |
* +-------------------------------------------------+ * +-------------------------------------------------+
* | Free extent (C, B) | t4 * | Free extent (C, B) | t5
* | Done freeing extent (C, B) | * | Done freeing extent (C, B) |
* | Free extent (D, 1) | * | Free extent (D, 1) |
* | Done freeing extent (D, 1) | * | Done freeing extent (D, 1) |
...@@ -141,6 +178,9 @@ ...@@ -141,6 +178,9 @@
* - Intent to free extent (C, B) * - Intent to free extent (C, B)
* - Intent to free extent (F, 1) (refcountbt block) * - Intent to free extent (F, 1) (refcountbt block)
* - Intent to remove rmap (F, 1, REFC) * - Intent to remove rmap (F, 1, REFC)
*
* Note that the continuation requested between t2 and t3 is likely to
* reoccur.
*/ */
static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX]; static const struct xfs_defer_op_type *defer_op_types[XFS_DEFER_OPS_TYPE_MAX];
...@@ -323,7 +363,16 @@ xfs_defer_finish( ...@@ -323,7 +363,16 @@ xfs_defer_finish(
dfp->dfp_count--; dfp->dfp_count--;
error = dfp->dfp_type->finish_item(*tp, dop, li, error = dfp->dfp_type->finish_item(*tp, dop, li,
dfp->dfp_done, &state); dfp->dfp_done, &state);
if (error) { if (error == -EAGAIN) {
/*
* Caller wants a fresh transaction;
* put the work item back on the list
* and jump out.
*/
list_add(li, &dfp->dfp_work);
dfp->dfp_count++;
break;
} else if (error) {
/* /*
* Clean up after ourselves and jump out. * Clean up after ourselves and jump out.
* xfs_defer_cancel will take care of freeing * xfs_defer_cancel will take care of freeing
...@@ -335,9 +384,25 @@ xfs_defer_finish( ...@@ -335,9 +384,25 @@ xfs_defer_finish(
goto out; goto out;
} }
} }
if (error == -EAGAIN) {
/*
* Caller wants a fresh transaction, so log a
* new log intent item to replace the old one
* and roll the transaction. See "Requesting
* a Fresh Transaction while Finishing
* Deferred Work" above.
*/
dfp->dfp_intent = dfp->dfp_type->create_intent(*tp,
dfp->dfp_count);
dfp->dfp_done = NULL;
list_for_each(li, &dfp->dfp_work)
dfp->dfp_type->log_item(*tp, dfp->dfp_intent,
li);
} else {
/* Done with the dfp, free it. */ /* Done with the dfp, free it. */
list_del(&dfp->dfp_list); list_del(&dfp->dfp_list);
kmem_free(dfp); kmem_free(dfp);
}
if (cleanup_fn) if (cleanup_fn)
cleanup_fn(*tp, state, error); cleanup_fn(*tp, state, error);
......
...@@ -132,7 +132,7 @@ xfs_inobt_free_block( ...@@ -132,7 +132,7 @@ xfs_inobt_free_block(
xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT);
return xfs_free_extent(cur->bc_tp, return xfs_free_extent(cur->bc_tp,
XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1,
&oinfo); &oinfo, XFS_AG_RESV_NONE);
} }
STATIC int STATIC int
......
...@@ -647,9 +647,17 @@ struct xfs_rui_log_format { ...@@ -647,9 +647,17 @@ struct xfs_rui_log_format {
__uint16_t rui_size; /* size of this item */ __uint16_t rui_size; /* size of this item */
__uint32_t rui_nextents; /* # extents to free */ __uint32_t rui_nextents; /* # extents to free */
__uint64_t rui_id; /* rui identifier */ __uint64_t rui_id; /* rui identifier */
struct xfs_map_extent rui_extents[1]; /* array of extents to rmap */ struct xfs_map_extent rui_extents[]; /* array of extents to rmap */
}; };
static inline size_t
xfs_rui_log_format_sizeof(
unsigned int nr)
{
return sizeof(struct xfs_rui_log_format) +
nr * sizeof(struct xfs_map_extent);
}
/* /*
* This is the structure used to lay out an rud log item in the * This is the structure used to lay out an rud log item in the
* log. The rud_extents array is a variable size array whose * log. The rud_extents array is a variable size array whose
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "xfs_mru_cache.h" #include "xfs_mru_cache.h"
#include "xfs_filestream.h" #include "xfs_filestream.h"
#include "xfs_trace.h" #include "xfs_trace.h"
#include "xfs_ag_resv.h"
struct xfs_fstrm_item { struct xfs_fstrm_item {
struct xfs_mru_cache_elem mru; struct xfs_mru_cache_elem mru;
...@@ -198,7 +199,8 @@ xfs_filestream_pick_ag( ...@@ -198,7 +199,8 @@ xfs_filestream_pick_ag(
} }
longest = xfs_alloc_longest_free_extent(mp, pag, longest = xfs_alloc_longest_free_extent(mp, pag,
xfs_alloc_min_freelist(mp, pag)); xfs_alloc_min_freelist(mp, pag),
xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
if (((minlen && longest >= minlen) || if (((minlen && longest >= minlen) ||
(!minlen && pag->pagf_freeblks >= minfree)) && (!minlen && pag->pagf_freeblks >= minfree)) &&
(!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) || (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) ||
......
...@@ -553,7 +553,7 @@ xfs_growfs_data_private( ...@@ -553,7 +553,7 @@ xfs_growfs_data_private(
error = xfs_free_extent(tp, error = xfs_free_extent(tp,
XFS_AGB_TO_FSB(mp, agno, XFS_AGB_TO_FSB(mp, agno,
be32_to_cpu(agf->agf_length) - new), be32_to_cpu(agf->agf_length) - new),
new, &oinfo); new, &oinfo, XFS_AG_RESV_NONE);
if (error) if (error)
goto error0; goto error0;
} }
......
...@@ -331,6 +331,22 @@ xfs_mp_fail_writes(struct xfs_mount *mp) ...@@ -331,6 +331,22 @@ xfs_mp_fail_writes(struct xfs_mount *mp)
} }
#endif #endif
/* per-AG block reservation data structures*/
enum xfs_ag_resv_type {
XFS_AG_RESV_NONE = 0,
XFS_AG_RESV_METADATA,
XFS_AG_RESV_AGFL,
};
struct xfs_ag_resv {
/* number of blocks originally reserved here */
xfs_extlen_t ar_orig_reserved;
/* number of blocks reserved here */
xfs_extlen_t ar_reserved;
/* number of blocks originally asked for */
xfs_extlen_t ar_asked;
};
/* /*
* Per-ag incore structure, copies of information in agf and agi, to improve the * Per-ag incore structure, copies of information in agf and agi, to improve the
* performance of allocation group selection. * performance of allocation group selection.
...@@ -378,8 +394,28 @@ typedef struct xfs_perag { ...@@ -378,8 +394,28 @@ typedef struct xfs_perag {
/* for rcu-safe freeing */ /* for rcu-safe freeing */
struct rcu_head rcu_head; struct rcu_head rcu_head;
int pagb_count; /* pagb slots in use */ int pagb_count; /* pagb slots in use */
/* Blocks reserved for all kinds of metadata. */
struct xfs_ag_resv pag_meta_resv;
/* Blocks reserved for just AGFL-based metadata. */
struct xfs_ag_resv pag_agfl_resv;
} xfs_perag_t; } xfs_perag_t;
static inline struct xfs_ag_resv *
xfs_perag_resv(
struct xfs_perag *pag,
enum xfs_ag_resv_type type)
{
switch (type) {
case XFS_AG_RESV_METADATA:
return &pag->pag_meta_resv;
case XFS_AG_RESV_AGFL:
return &pag->pag_agfl_resv;
default:
return NULL;
}
}
extern void xfs_uuid_table_free(void); extern void xfs_uuid_table_free(void);
extern int xfs_log_sbcount(xfs_mount_t *); extern int xfs_log_sbcount(xfs_mount_t *);
extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
......
...@@ -51,28 +51,16 @@ xfs_rui_item_free( ...@@ -51,28 +51,16 @@ xfs_rui_item_free(
kmem_zone_free(xfs_rui_zone, ruip); kmem_zone_free(xfs_rui_zone, ruip);
} }
/*
* This returns the number of iovecs needed to log the given rui item.
* We only need 1 iovec for an rui item. It just logs the rui_log_format
* structure.
*/
static inline int
xfs_rui_item_sizeof(
struct xfs_rui_log_item *ruip)
{
return sizeof(struct xfs_rui_log_format) +
(ruip->rui_format.rui_nextents - 1) *
sizeof(struct xfs_map_extent);
}
STATIC void STATIC void
xfs_rui_item_size( xfs_rui_item_size(
struct xfs_log_item *lip, struct xfs_log_item *lip,
int *nvecs, int *nvecs,
int *nbytes) int *nbytes)
{ {
struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
*nvecs += 1; *nvecs += 1;
*nbytes += xfs_rui_item_sizeof(RUI_ITEM(lip)); *nbytes += xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents);
} }
/* /*
...@@ -97,7 +85,7 @@ xfs_rui_item_format( ...@@ -97,7 +85,7 @@ xfs_rui_item_format(
ruip->rui_format.rui_size = 1; ruip->rui_format.rui_size = 1;
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format, xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format,
xfs_rui_item_sizeof(ruip)); xfs_rui_log_format_sizeof(ruip->rui_format.rui_nextents));
} }
/* /*
...@@ -205,16 +193,12 @@ xfs_rui_init( ...@@ -205,16 +193,12 @@ xfs_rui_init(
{ {
struct xfs_rui_log_item *ruip; struct xfs_rui_log_item *ruip;
uint size;
ASSERT(nextents > 0); ASSERT(nextents > 0);
if (nextents > XFS_RUI_MAX_FAST_EXTENTS) { if (nextents > XFS_RUI_MAX_FAST_EXTENTS)
size = (uint)(sizeof(struct xfs_rui_log_item) + ruip = kmem_zalloc(xfs_rui_log_item_sizeof(nextents), KM_SLEEP);
((nextents - 1) * sizeof(struct xfs_map_extent))); else
ruip = kmem_zalloc(size, KM_SLEEP);
} else {
ruip = kmem_zone_zalloc(xfs_rui_zone, KM_SLEEP); ruip = kmem_zone_zalloc(xfs_rui_zone, KM_SLEEP);
}
xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops); xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops);
ruip->rui_format.rui_nextents = nextents; ruip->rui_format.rui_nextents = nextents;
...@@ -239,14 +223,12 @@ xfs_rui_copy_format( ...@@ -239,14 +223,12 @@ xfs_rui_copy_format(
uint len; uint len;
src_rui_fmt = buf->i_addr; src_rui_fmt = buf->i_addr;
len = sizeof(struct xfs_rui_log_format) + len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents);
(src_rui_fmt->rui_nextents - 1) *
sizeof(struct xfs_map_extent);
if (buf->i_len != len) if (buf->i_len != len)
return -EFSCORRUPTED; return -EFSCORRUPTED;
memcpy((char *)dst_rui_fmt, (char *)src_rui_fmt, len); memcpy(dst_rui_fmt, src_rui_fmt, len);
return 0; return 0;
} }
......
...@@ -70,6 +70,14 @@ struct xfs_rui_log_item { ...@@ -70,6 +70,14 @@ struct xfs_rui_log_item {
struct xfs_rui_log_format rui_format; struct xfs_rui_log_format rui_format;
}; };
static inline size_t
xfs_rui_log_item_sizeof(
unsigned int nr)
{
return offsetof(struct xfs_rui_log_item, rui_format) +
xfs_rui_log_format_sizeof(nr);
}
/* /*
* This is the "rmap update done" log item. It is used to log the fact that * This is the "rmap update done" log item. It is used to log the fact that
* some rmapbt updates mentioned in an earlier rui item have been performed. * some rmapbt updates mentioned in an earlier rui item have been performed.
......
...@@ -1782,9 +1782,8 @@ xfs_init_zones(void) ...@@ -1782,9 +1782,8 @@ xfs_init_zones(void)
if (!xfs_rud_zone) if (!xfs_rud_zone)
goto out_destroy_icreate_zone; goto out_destroy_icreate_zone;
xfs_rui_zone = kmem_zone_init((sizeof(struct xfs_rui_log_item) + xfs_rui_zone = kmem_zone_init(
((XFS_RUI_MAX_FAST_EXTENTS - 1) * xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
sizeof(struct xfs_map_extent))),
"xfs_rui_item"); "xfs_rui_item");
if (!xfs_rui_zone) if (!xfs_rui_zone)
goto out_destroy_rud_zone; goto out_destroy_rud_zone;
......
...@@ -1570,14 +1570,15 @@ TRACE_EVENT(xfs_agf, ...@@ -1570,14 +1570,15 @@ TRACE_EVENT(xfs_agf,
TRACE_EVENT(xfs_free_extent, TRACE_EVENT(xfs_free_extent,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
xfs_extlen_t len, bool isfl, int haveleft, int haveright), xfs_extlen_t len, enum xfs_ag_resv_type resv, int haveleft,
TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright), int haveright),
TP_ARGS(mp, agno, agbno, len, resv, haveleft, haveright),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(dev_t, dev) __field(dev_t, dev)
__field(xfs_agnumber_t, agno) __field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno) __field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len) __field(xfs_extlen_t, len)
__field(int, isfl) __field(int, resv)
__field(int, haveleft) __field(int, haveleft)
__field(int, haveright) __field(int, haveright)
), ),
...@@ -1586,16 +1587,16 @@ TRACE_EVENT(xfs_free_extent, ...@@ -1586,16 +1587,16 @@ TRACE_EVENT(xfs_free_extent,
__entry->agno = agno; __entry->agno = agno;
__entry->agbno = agbno; __entry->agbno = agbno;
__entry->len = len; __entry->len = len;
__entry->isfl = isfl; __entry->resv = resv;
__entry->haveleft = haveleft; __entry->haveleft = haveleft;
__entry->haveright = haveright; __entry->haveright = haveright;
), ),
TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s", TP_printk("dev %d:%d agno %u agbno %u len %u resv %d %s",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno, __entry->agno,
__entry->agbno, __entry->agbno,
__entry->len, __entry->len,
__entry->isfl, __entry->resv,
__entry->haveleft ? __entry->haveleft ?
(__entry->haveright ? "both" : "left") : (__entry->haveright ? "both" : "left") :
(__entry->haveright ? "right" : "none")) (__entry->haveright ? "right" : "none"))
...@@ -1622,7 +1623,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, ...@@ -1622,7 +1623,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
__field(short, otype) __field(short, otype)
__field(char, wasdel) __field(char, wasdel)
__field(char, wasfromfl) __field(char, wasfromfl)
__field(char, isfl) __field(int, resv)
__field(char, userdata) __field(char, userdata)
__field(xfs_fsblock_t, firstblock) __field(xfs_fsblock_t, firstblock)
), ),
...@@ -1643,13 +1644,13 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, ...@@ -1643,13 +1644,13 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
__entry->otype = args->otype; __entry->otype = args->otype;
__entry->wasdel = args->wasdel; __entry->wasdel = args->wasdel;
__entry->wasfromfl = args->wasfromfl; __entry->wasfromfl = args->wasfromfl;
__entry->isfl = args->isfl; __entry->resv = args->resv;
__entry->userdata = args->userdata; __entry->userdata = args->userdata;
__entry->firstblock = args->firstblock; __entry->firstblock = args->firstblock;
), ),
TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
"prod %u minleft %u total %u alignment %u minalignslop %u " "prod %u minleft %u total %u alignment %u minalignslop %u "
"len %u type %s otype %s wasdel %d wasfromfl %d isfl %d " "len %u type %s otype %s wasdel %d wasfromfl %d resv %d "
"userdata %d firstblock 0x%llx", "userdata %d firstblock 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev), MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno, __entry->agno,
...@@ -1667,7 +1668,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, ...@@ -1667,7 +1668,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class,
__print_symbolic(__entry->otype, XFS_ALLOC_TYPES), __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
__entry->wasdel, __entry->wasdel,
__entry->wasfromfl, __entry->wasfromfl,
__entry->isfl, __entry->resv,
__entry->userdata, __entry->userdata,
(unsigned long long)__entry->firstblock) (unsigned long long)__entry->firstblock)
) )
...@@ -2558,6 +2559,60 @@ DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range_result); ...@@ -2558,6 +2559,60 @@ DEFINE_RMAPBT_EVENT(xfs_rmap_lookup_le_range_result);
DEFINE_RMAPBT_EVENT(xfs_rmap_find_right_neighbor_result); DEFINE_RMAPBT_EVENT(xfs_rmap_find_right_neighbor_result);
DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_result); DEFINE_RMAPBT_EVENT(xfs_rmap_find_left_neighbor_result);
/* per-AG reservation */
DECLARE_EVENT_CLASS(xfs_ag_resv_class,
TP_PROTO(struct xfs_perag *pag, enum xfs_ag_resv_type resv,
xfs_extlen_t len),
TP_ARGS(pag, resv, len),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
__field(int, resv)
__field(xfs_extlen_t, freeblks)
__field(xfs_extlen_t, flcount)
__field(xfs_extlen_t, reserved)
__field(xfs_extlen_t, asked)
__field(xfs_extlen_t, len)
),
TP_fast_assign(
struct xfs_ag_resv *r = xfs_perag_resv(pag, resv);
__entry->dev = pag->pag_mount->m_super->s_dev;
__entry->agno = pag->pag_agno;
__entry->resv = resv;
__entry->freeblks = pag->pagf_freeblks;
__entry->flcount = pag->pagf_flcount;
__entry->reserved = r ? r->ar_reserved : 0;
__entry->asked = r ? r->ar_asked : 0;
__entry->len = len;
),
TP_printk("dev %d:%d agno %u resv %d freeblks %u flcount %u resv %u ask %u len %u\n",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->resv,
__entry->freeblks,
__entry->flcount,
__entry->reserved,
__entry->asked,
__entry->len)
)
#define DEFINE_AG_RESV_EVENT(name) \
DEFINE_EVENT(xfs_ag_resv_class, name, \
TP_PROTO(struct xfs_perag *pag, enum xfs_ag_resv_type type, \
xfs_extlen_t len), \
TP_ARGS(pag, type, len))
/* per-AG reservation tracepoints */
DEFINE_AG_RESV_EVENT(xfs_ag_resv_init);
DEFINE_AG_RESV_EVENT(xfs_ag_resv_free);
DEFINE_AG_RESV_EVENT(xfs_ag_resv_alloc_extent);
DEFINE_AG_RESV_EVENT(xfs_ag_resv_free_extent);
DEFINE_AG_RESV_EVENT(xfs_ag_resv_critical);
DEFINE_AG_RESV_EVENT(xfs_ag_resv_needed);
DEFINE_AG_ERROR_EVENT(xfs_ag_resv_free_error);
DEFINE_AG_ERROR_EVENT(xfs_ag_resv_init_error);
#endif /* _TRACE_XFS_H */ #endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_PATH
......
...@@ -318,7 +318,6 @@ xfs_trans_mod_sb( ...@@ -318,7 +318,6 @@ xfs_trans_mod_sb(
* in-core superblock's counter. This should only * in-core superblock's counter. This should only
* be applied to the on-disk superblock. * be applied to the on-disk superblock.
*/ */
ASSERT(delta < 0);
tp->t_res_fdblocks_delta += delta; tp->t_res_fdblocks_delta += delta;
if (xfs_sb_version_haslazysbcount(&mp->m_sb)) if (xfs_sb_version_haslazysbcount(&mp->m_sb))
flags &= ~XFS_TRANS_SB_DIRTY; flags &= ~XFS_TRANS_SB_DIRTY;
......
...@@ -79,7 +79,8 @@ xfs_trans_free_extent( ...@@ -79,7 +79,8 @@ xfs_trans_free_extent(
trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
error = xfs_free_extent(tp, start_block, ext_len, oinfo); error = xfs_free_extent(tp, start_block, ext_len, oinfo,
XFS_AG_RESV_NONE);
/* /*
* Mark the transaction dirty, even on error. This ensures the * Mark the transaction dirty, even on error. This ensures the
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment