Commit 49391d13 authored by Chandan Babu R's avatar Chandan Babu R

Merge tag 'repair-auto-reap-space-reservations-6.8_2023-12-06' of...

Merge tag 'repair-auto-reap-space-reservations-6.8_2023-12-06' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.8-mergeA

xfs: reserve disk space for online repairs

Online repair fixes metadata structures by writing a new copy out to
disk and atomically committing the new structure into the filesystem.
For this to work, we need to reserve all the space we're going to need
ahead of time so that the atomic commit transaction is as small as
possible.  We also require the reserved space to be freed if the system
goes down, or if we decide not to commit the repair, or if we reserve
too much space.

To keep the atomic commit transaction as small as possible, we would
like to allocate some space and simultaneously schedule automatic
reaping of the reserved space, even on log recovery.  EFIs are the
mechanism to get us there, but we need to use them in a novel manner.
Once we allocate the space, we want to hold on to the EFI (relogging as
necessary) until we can commit or cancel the repair.  EFIs for written
committed blocks need to go away, but unwritten or uncommitted blocks
can be freed like normal.

Earlier versions of this patchset directly manipulated the log items,
but Dave thought that to be a layering violation.  For v27, I've
modified the defer ops handling code to be capable of pausing a deferred
work item.  Log intent items are created as they always have been, but
paused items are pushed onto a side list when finishing deferred work
items, and pushed back onto the transaction after that.  Log intent done
item are not created for paused work.

The second part adds a "stale" flag to the EFI so that the repair
reservation code can dispose of an EFI the normal way, but without the
space actually being freed.

This has been lightly tested with fstests.  Enjoy!
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChandan Babu R <chandanbabu@kernel.org>

* tag 'repair-auto-reap-space-reservations-6.8_2023-12-06' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux:
  xfs: force small EFIs for reaping btree extents
  xfs: log EFIs for all btree blocks being used to stage a btree
  xfs: implement block reservation accounting for btrees we're staging
  xfs: remove unused fields from struct xbtree_ifakeroot
  xfs: automatic freeing of freshly allocated unwritten space
  xfs: remove __xfs_free_extent_later
  xfs: allow pausing of pending deferred work items
  xfs: don't append work items to logged xfs_defer_pending objects
parents dec0224b 3f3cec03
......@@ -181,6 +181,7 @@ xfs-$(CONFIG_XFS_QUOTA) += scrub/quota.o
ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
xfs-y += $(addprefix scrub/, \
agheader_repair.o \
newbt.o \
reap.o \
repair.o \
)
......
......@@ -984,7 +984,7 @@ xfs_ag_shrink_space(
if (err2 != -ENOSPC)
goto resv_err;
err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
err2 = xfs_free_extent_later(*tpp, args.fsbno, delta, NULL,
XFS_AG_RESV_NONE, true);
if (err2)
goto resv_err;
......
......@@ -2522,14 +2522,15 @@ xfs_defer_agfl_block(
* Add the extent to the list of extents to be free at transaction end.
* The list is maintained sorted (by block number).
*/
int
__xfs_free_extent_later(
static int
xfs_defer_extent_free(
struct xfs_trans *tp,
xfs_fsblock_t bno,
xfs_filblks_t len,
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type,
bool skip_discard)
bool skip_discard,
struct xfs_defer_pending **dfpp)
{
struct xfs_extent_free_item *xefi;
struct xfs_mount *mp = tp->t_mountp;
......@@ -2577,10 +2578,105 @@ __xfs_free_extent_later(
XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
xfs_extent_free_get_group(mp, xefi);
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
*dfpp = xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
return 0;
}
int
xfs_free_extent_later(
struct xfs_trans *tp,
xfs_fsblock_t bno,
xfs_filblks_t len,
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type,
bool skip_discard)
{
struct xfs_defer_pending *dontcare = NULL;
return xfs_defer_extent_free(tp, bno, len, oinfo, type, skip_discard,
&dontcare);
}
/*
* Set up automatic freeing of unwritten space in the filesystem.
*
* This function attached a paused deferred extent free item to the
* transaction. Pausing means that the EFI will be logged in the next
* transaction commit, but the pending EFI will not be finished until the
* pending item is unpaused.
*
* If the system goes down after the EFI has been persisted to the log but
* before the pending item is unpaused, log recovery will find the EFI, fail to
* find the EFD, and free the space.
*
* If the pending item is unpaused, the next transaction commit will log an EFD
* without freeing the space.
*
* Caller must ensure that the tp, fsbno, len, oinfo, and resv flags of the
* @args structure are set to the relevant values.
*/
int
xfs_alloc_schedule_autoreap(
const struct xfs_alloc_arg *args,
bool skip_discard,
struct xfs_alloc_autoreap *aarp)
{
int error;
error = xfs_defer_extent_free(args->tp, args->fsbno, args->len,
&args->oinfo, args->resv, skip_discard, &aarp->dfp);
if (error)
return error;
xfs_defer_item_pause(args->tp, aarp->dfp);
return 0;
}
/*
* Cancel automatic freeing of unwritten space in the filesystem.
*
* Earlier, we created a paused deferred extent free item and attached it to
* this transaction so that we could automatically roll back a new space
* allocation if the system went down. Now we want to cancel the paused work
* item by marking the EFI stale so we don't actually free the space, unpausing
* the pending item and logging an EFD.
*
* The caller generally should have already mapped the space into the ondisk
* filesystem. If the reserved space was partially used, the caller must call
* xfs_free_extent_later to create a new EFI to free the unused space.
*/
void
xfs_alloc_cancel_autoreap(
struct xfs_trans *tp,
struct xfs_alloc_autoreap *aarp)
{
struct xfs_defer_pending *dfp = aarp->dfp;
struct xfs_extent_free_item *xefi;
if (!dfp)
return;
list_for_each_entry(xefi, &dfp->dfp_work, xefi_list)
xefi->xefi_flags |= XFS_EFI_CANCELLED;
xfs_defer_item_unpause(tp, dfp);
}
/*
* Commit automatic freeing of unwritten space in the filesystem.
*
* This unpauses an earlier _schedule_autoreap and commits to freeing the
* allocated space. Call this if none of the reserved space was used.
*/
void
xfs_alloc_commit_autoreap(
struct xfs_trans *tp,
struct xfs_alloc_autoreap *aarp)
{
if (aarp->dfp)
xfs_defer_item_unpause(tp, aarp->dfp);
}
#ifdef DEBUG
/*
* Check if an AGF has a free extent record whose length is equal to
......
......@@ -231,7 +231,7 @@ xfs_buf_to_agfl_bno(
return bp->b_addr;
}
int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
int xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
xfs_filblks_t len, const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type, bool skip_discard);
......@@ -255,18 +255,18 @@ void xfs_extent_free_get_group(struct xfs_mount *mp,
#define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */
#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */
#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */
#define XFS_EFI_CANCELLED (1U << 3) /* dont actually free the space */
static inline int
xfs_free_extent_later(
struct xfs_trans *tp,
xfs_fsblock_t bno,
xfs_filblks_t len,
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type)
{
return __xfs_free_extent_later(tp, bno, len, oinfo, type, false);
}
struct xfs_alloc_autoreap {
struct xfs_defer_pending *dfp;
};
int xfs_alloc_schedule_autoreap(const struct xfs_alloc_arg *args,
bool skip_discard, struct xfs_alloc_autoreap *aarp);
void xfs_alloc_cancel_autoreap(struct xfs_trans *tp,
struct xfs_alloc_autoreap *aarp);
void xfs_alloc_commit_autoreap(struct xfs_trans *tp,
struct xfs_alloc_autoreap *aarp);
extern struct kmem_cache *xfs_extfree_item_cache;
......
......@@ -575,7 +575,7 @@ xfs_bmap_btree_to_extents(
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo,
XFS_AG_RESV_NONE);
XFS_AG_RESV_NONE, false);
if (error)
return error;
......@@ -5218,7 +5218,7 @@ xfs_bmap_del_extent_real(
if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
xfs_refcount_decrease_extent(tp, del);
} else {
error = __xfs_free_extent_later(tp, del->br_startblock,
error = xfs_free_extent_later(tp, del->br_startblock,
del->br_blockcount, NULL,
XFS_AG_RESV_NONE,
((bflags & XFS_BMAPI_NODISCARD) ||
......
......@@ -272,7 +272,7 @@ xfs_bmbt_free_block(
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo,
XFS_AG_RESV_NONE);
XFS_AG_RESV_NONE, false);
if (error)
return error;
......
......@@ -37,12 +37,6 @@ struct xbtree_ifakeroot {
/* Number of bytes available for this fork in the inode. */
unsigned int if_fork_size;
/* Fork format. */
unsigned int if_format;
/* Number of records. */
unsigned int if_extents;
};
/* Cursor interactions with fake roots for inode-rooted btrees. */
......
......@@ -182,6 +182,58 @@ static struct kmem_cache *xfs_defer_pending_cache;
* Note that the continuation requested between t2 and t3 is likely to
* reoccur.
*/
STATIC struct xfs_log_item *
xfs_defer_barrier_create_intent(
struct xfs_trans *tp,
struct list_head *items,
unsigned int count,
bool sort)
{
return NULL;
}
STATIC void
xfs_defer_barrier_abort_intent(
struct xfs_log_item *intent)
{
/* empty */
}
STATIC struct xfs_log_item *
xfs_defer_barrier_create_done(
struct xfs_trans *tp,
struct xfs_log_item *intent,
unsigned int count)
{
return NULL;
}
STATIC int
xfs_defer_barrier_finish_item(
struct xfs_trans *tp,
struct xfs_log_item *done,
struct list_head *item,
struct xfs_btree_cur **state)
{
ASSERT(0);
return -EFSCORRUPTED;
}
STATIC void
xfs_defer_barrier_cancel_item(
struct list_head *item)
{
ASSERT(0);
}
static const struct xfs_defer_op_type xfs_barrier_defer_type = {
.max_items = 1,
.create_intent = xfs_defer_barrier_create_intent,
.abort_intent = xfs_defer_barrier_abort_intent,
.create_done = xfs_defer_barrier_create_done,
.finish_item = xfs_defer_barrier_finish_item,
.cancel_item = xfs_defer_barrier_cancel_item,
};
static const struct xfs_defer_op_type *defer_op_types[] = {
[XFS_DEFER_OPS_TYPE_BMAP] = &xfs_bmap_update_defer_type,
......@@ -190,6 +242,7 @@ static const struct xfs_defer_op_type *defer_op_types[] = {
[XFS_DEFER_OPS_TYPE_FREE] = &xfs_extent_free_defer_type,
[XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type,
[XFS_DEFER_OPS_TYPE_ATTR] = &xfs_attr_defer_type,
[XFS_DEFER_OPS_TYPE_BARRIER] = &xfs_barrier_defer_type,
};
/* Create a log intent done item for a log intent item. */
......@@ -487,7 +540,7 @@ xfs_defer_relog_intent(
* done item to release the intent item; and then log a new intent item.
* The caller should provide a fresh transaction and roll it after we're done.
*/
static int
static void
xfs_defer_relog(
struct xfs_trans **tpp,
struct list_head *dfops)
......@@ -529,10 +582,6 @@ xfs_defer_relog(
xfs_defer_relog_intent(*tpp, dfp);
}
if ((*tpp)->t_flags & XFS_TRANS_DIRTY)
return xfs_defer_trans_roll(tpp);
return 0;
}
/*
......@@ -588,6 +637,24 @@ xfs_defer_finish_one(
return error;
}
/* Move all paused deferred work from @tp to @paused_list. */
static void
xfs_defer_isolate_paused(
struct xfs_trans *tp,
struct list_head *paused_list)
{
struct xfs_defer_pending *dfp;
struct xfs_defer_pending *pli;
list_for_each_entry_safe(dfp, pli, &tp->t_dfops, dfp_list) {
if (!(dfp->dfp_flags & XFS_DEFER_PAUSED))
continue;
list_move_tail(&dfp->dfp_list, paused_list);
trace_xfs_defer_isolate_paused(tp->t_mountp, dfp);
}
}
/*
* Finish all the pending work. This involves logging intent items for
* any work items that wandered in since the last transaction roll (if
......@@ -603,6 +670,7 @@ xfs_defer_finish_noroll(
struct xfs_defer_pending *dfp = NULL;
int error = 0;
LIST_HEAD(dop_pending);
LIST_HEAD(dop_paused);
ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
......@@ -621,6 +689,8 @@ xfs_defer_finish_noroll(
*/
int has_intents = xfs_defer_create_intents(*tp);
xfs_defer_isolate_paused(*tp, &dop_paused);
list_splice_init(&(*tp)->t_dfops, &dop_pending);
if (has_intents < 0) {
......@@ -633,22 +703,33 @@ xfs_defer_finish_noroll(
goto out_shutdown;
/* Relog intent items to keep the log moving. */
error = xfs_defer_relog(tp, &dop_pending);
if (error)
goto out_shutdown;
xfs_defer_relog(tp, &dop_pending);
xfs_defer_relog(tp, &dop_paused);
if ((*tp)->t_flags & XFS_TRANS_DIRTY) {
error = xfs_defer_trans_roll(tp);
if (error)
goto out_shutdown;
}
}
dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
dfp_list);
dfp = list_first_entry_or_null(&dop_pending,
struct xfs_defer_pending, dfp_list);
if (!dfp)
break;
error = xfs_defer_finish_one(*tp, dfp);
if (error && error != -EAGAIN)
goto out_shutdown;
}
/* Requeue the paused items in the outgoing transaction. */
list_splice_tail_init(&dop_paused, &(*tp)->t_dfops);
trace_xfs_defer_finish_done(*tp, _RET_IP_);
return 0;
out_shutdown:
list_splice_tail_init(&dop_paused, &dop_pending);
xfs_defer_trans_abort(*tp, &dop_pending);
xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE);
trace_xfs_defer_finish_error(*tp, error);
......@@ -661,6 +742,9 @@ int
xfs_defer_finish(
struct xfs_trans **tp)
{
#ifdef DEBUG
struct xfs_defer_pending *dfp;
#endif
int error;
/*
......@@ -680,7 +764,10 @@ xfs_defer_finish(
}
/* Reset LOWMODE now that we've finished all the dfops. */
ASSERT(list_empty(&(*tp)->t_dfops));
#ifdef DEBUG
list_for_each_entry(dfp, &(*tp)->t_dfops, dfp_list)
ASSERT(dfp->dfp_flags & XFS_DEFER_PAUSED);
#endif
(*tp)->t_flags &= ~XFS_TRANS_LOWMODE;
return 0;
}
......@@ -692,11 +779,78 @@ xfs_defer_cancel(
struct xfs_mount *mp = tp->t_mountp;
trace_xfs_defer_cancel(tp, _RET_IP_);
xfs_defer_trans_abort(tp, &tp->t_dfops);
xfs_defer_cancel_list(mp, &tp->t_dfops);
}
/*
* Return the last pending work item attached to this transaction if it matches
* the deferred op type.
*/
static inline struct xfs_defer_pending *
xfs_defer_find_last(
struct xfs_trans *tp,
enum xfs_defer_ops_type type,
const struct xfs_defer_op_type *ops)
{
struct xfs_defer_pending *dfp = NULL;
/* No dfops at all? */
if (list_empty(&tp->t_dfops))
return NULL;
dfp = list_last_entry(&tp->t_dfops, struct xfs_defer_pending,
dfp_list);
/* Wrong type? */
if (dfp->dfp_type != type)
return NULL;
return dfp;
}
/*
* Decide if we can add a deferred work item to the last dfops item attached
* to the transaction.
*/
static inline bool
xfs_defer_can_append(
struct xfs_defer_pending *dfp,
const struct xfs_defer_op_type *ops)
{
/* Already logged? */
if (dfp->dfp_intent)
return false;
/* Paused items cannot absorb more work */
if (dfp->dfp_flags & XFS_DEFER_PAUSED)
return NULL;
/* Already full? */
if (ops->max_items && dfp->dfp_count >= ops->max_items)
return false;
return true;
}
/* Create a new pending item at the end of the transaction list. */
static inline struct xfs_defer_pending *
xfs_defer_alloc(
struct xfs_trans *tp,
enum xfs_defer_ops_type type)
{
struct xfs_defer_pending *dfp;
dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
GFP_NOFS | __GFP_NOFAIL);
dfp->dfp_type = type;
INIT_LIST_HEAD(&dfp->dfp_work);
list_add_tail(&dfp->dfp_list, &tp->t_dfops);
return dfp;
}
/* Add an item for later deferred processing. */
void
struct xfs_defer_pending *
xfs_defer_add(
struct xfs_trans *tp,
enum xfs_defer_ops_type type,
......@@ -708,31 +862,37 @@ xfs_defer_add(
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX);
/*
* Add the item to a pending item at the end of the intake list.
* If the last pending item has the same type, reuse it. Else,
* create a new pending item at the end of the intake list.
*/
if (!list_empty(&tp->t_dfops)) {
dfp = list_last_entry(&tp->t_dfops,
struct xfs_defer_pending, dfp_list);
if (dfp->dfp_type != type ||
(ops->max_items && dfp->dfp_count >= ops->max_items))
dfp = NULL;
}
if (!dfp) {
dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
GFP_NOFS | __GFP_NOFAIL);
dfp->dfp_type = type;
dfp->dfp_intent = NULL;
dfp->dfp_done = NULL;
dfp->dfp_count = 0;
INIT_LIST_HEAD(&dfp->dfp_work);
list_add_tail(&dfp->dfp_list, &tp->t_dfops);
}
dfp = xfs_defer_find_last(tp, type, ops);
if (!dfp || !xfs_defer_can_append(dfp, ops))
dfp = xfs_defer_alloc(tp, type);
xfs_defer_add_item(dfp, li);
trace_xfs_defer_add_item(tp->t_mountp, dfp, li);
return dfp;
}
/*
* Add a defer ops barrier to force two otherwise adjacent deferred work items
* to be tracked separately and have separate log items.
*/
void
xfs_defer_add_barrier(
struct xfs_trans *tp)
{
struct xfs_defer_pending *dfp;
const enum xfs_defer_ops_type type = XFS_DEFER_OPS_TYPE_BARRIER;
const struct xfs_defer_op_type *ops = defer_op_types[type];
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
/* If the last defer op added was a barrier, we're done. */
dfp = xfs_defer_find_last(tp, type, ops);
if (dfp)
return;
xfs_defer_alloc(tp, type);
trace_xfs_defer_add_item(tp->t_mountp, dfp, NULL);
}
/*
......@@ -1058,3 +1218,36 @@ xfs_defer_destroy_item_caches(void)
xfs_rmap_intent_destroy_cache();
xfs_defer_destroy_cache();
}
/*
* Mark a deferred work item so that it will be requeued indefinitely without
* being finished. Caller must ensure there are no data dependencies on this
* work item in the meantime.
*/
void
xfs_defer_item_pause(
struct xfs_trans *tp,
struct xfs_defer_pending *dfp)
{
ASSERT(!(dfp->dfp_flags & XFS_DEFER_PAUSED));
dfp->dfp_flags |= XFS_DEFER_PAUSED;
trace_xfs_defer_item_pause(tp->t_mountp, dfp);
}
/*
* Release a paused deferred work item so that it will be finished during the
* next transaction roll.
*/
void
xfs_defer_item_unpause(
struct xfs_trans *tp,
struct xfs_defer_pending *dfp)
{
ASSERT(dfp->dfp_flags & XFS_DEFER_PAUSED);
dfp->dfp_flags &= ~XFS_DEFER_PAUSED;
trace_xfs_defer_item_unpause(tp->t_mountp, dfp);
}
......@@ -20,6 +20,7 @@ enum xfs_defer_ops_type {
XFS_DEFER_OPS_TYPE_FREE,
XFS_DEFER_OPS_TYPE_AGFL_FREE,
XFS_DEFER_OPS_TYPE_ATTR,
XFS_DEFER_OPS_TYPE_BARRIER,
XFS_DEFER_OPS_TYPE_MAX,
};
......@@ -34,11 +35,24 @@ struct xfs_defer_pending {
struct xfs_log_item *dfp_intent; /* log intent item */
struct xfs_log_item *dfp_done; /* log done item */
unsigned int dfp_count; /* # extent items */
unsigned int dfp_flags;
enum xfs_defer_ops_type dfp_type;
};
void xfs_defer_add(struct xfs_trans *tp, enum xfs_defer_ops_type type,
struct list_head *h);
/*
* Create a log intent item for this deferred item, but don't actually finish
* the work. Caller must clear this before the final transaction commit.
*/
#define XFS_DEFER_PAUSED (1U << 0)
#define XFS_DEFER_PENDING_STRINGS \
{ XFS_DEFER_PAUSED, "paused" }
void xfs_defer_item_pause(struct xfs_trans *tp, struct xfs_defer_pending *dfp);
void xfs_defer_item_unpause(struct xfs_trans *tp, struct xfs_defer_pending *dfp);
struct xfs_defer_pending *xfs_defer_add(struct xfs_trans *tp,
enum xfs_defer_ops_type type, struct list_head *h);
int xfs_defer_finish_noroll(struct xfs_trans **tp);
int xfs_defer_finish(struct xfs_trans **tp);
int xfs_defer_finish_one(struct xfs_trans *tp, struct xfs_defer_pending *dfp);
......@@ -150,4 +164,6 @@ xfs_defer_add_item(
int __init xfs_defer_init_item_caches(void);
void xfs_defer_destroy_item_caches(void);
void xfs_defer_add_barrier(struct xfs_trans *tp);
#endif /* __XFS_DEFER_H__ */
......@@ -1854,7 +1854,7 @@ xfs_difree_inode_chunk(
return xfs_free_extent_later(tp,
XFS_AGB_TO_FSB(mp, agno, sagbno),
M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
XFS_AG_RESV_NONE);
XFS_AG_RESV_NONE, false);
}
/* holemask is only 16-bits (fits in an unsigned long) */
......@@ -1900,7 +1900,8 @@ xfs_difree_inode_chunk(
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
error = xfs_free_extent_later(tp,
XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
&XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE);
&XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE,
false);
if (error)
return error;
......
......@@ -161,7 +161,7 @@ __xfs_inobt_free_block(
xfs_inobt_mod_blockcount(cur, -1);
fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
&XFS_RMAP_OINFO_INOBT, resv);
&XFS_RMAP_OINFO_INOBT, resv, false);
}
STATIC int
......
......@@ -1153,7 +1153,7 @@ xfs_refcount_adjust_extents(
tmp.rc_startblock);
error = xfs_free_extent_later(cur->bc_tp, fsbno,
tmp.rc_blockcount, NULL,
XFS_AG_RESV_NONE);
XFS_AG_RESV_NONE, false);
if (error)
goto out_error;
}
......@@ -1215,7 +1215,7 @@ xfs_refcount_adjust_extents(
ext.rc_startblock);
error = xfs_free_extent_later(cur->bc_tp, fsbno,
ext.rc_blockcount, NULL,
XFS_AG_RESV_NONE);
XFS_AG_RESV_NONE, false);
if (error)
goto out_error;
}
......@@ -1985,7 +1985,7 @@ xfs_refcount_recover_cow_leftovers(
/* Free the block. */
error = xfs_free_extent_later(tp, fsb,
rr->rr_rrec.rc_blockcount, NULL,
XFS_AG_RESV_NONE);
XFS_AG_RESV_NONE, false);
if (error)
goto out_trans;
......
......@@ -112,7 +112,7 @@ xfs_refcountbt_free_block(
be32_add_cpu(&agf->agf_refcount_blocks, -1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS);
return xfs_free_extent_later(cur->bc_tp, fsbno, 1,
&XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA);
&XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA, false);
}
STATIC int
......
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2022-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_SCRUB_NEWBT_H__
#define __XFS_SCRUB_NEWBT_H__
struct xrep_newbt_resv {
/* Link to list of extents that we've reserved. */
struct list_head list;
struct xfs_perag *pag;
/* Auto-freeing this reservation if we don't commit. */
struct xfs_alloc_autoreap autoreap;
/* AG block of the extent we reserved. */
xfs_agblock_t agbno;
/* Length of the reservation. */
xfs_extlen_t len;
/* How much of this reservation has been used. */
xfs_extlen_t used;
};
struct xrep_newbt {
struct xfs_scrub *sc;
/* List of extents that we've reserved. */
struct list_head resv_list;
/* Fake root for new btree. */
union {
struct xbtree_afakeroot afake;
struct xbtree_ifakeroot ifake;
};
/* rmap owner of these blocks */
struct xfs_owner_info oinfo;
/* btree geometry for the bulk loader */
struct xfs_btree_bload bload;
/* Allocation hint */
xfs_fsblock_t alloc_hint;
/* per-ag reservation type */
enum xfs_ag_resv_type resv;
};
void xrep_newbt_init_bare(struct xrep_newbt *xnr, struct xfs_scrub *sc);
void xrep_newbt_init_ag(struct xrep_newbt *xnr, struct xfs_scrub *sc,
const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
enum xfs_ag_resv_type resv);
int xrep_newbt_init_inode(struct xrep_newbt *xnr, struct xfs_scrub *sc,
int whichfork, const struct xfs_owner_info *oinfo);
int xrep_newbt_alloc_blocks(struct xrep_newbt *xnr, uint64_t nr_blocks);
void xrep_newbt_cancel(struct xrep_newbt *xnr);
int xrep_newbt_commit(struct xrep_newbt *xnr);
int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xnr,
union xfs_btree_ptr *ptr);
#endif /* __XFS_SCRUB_NEWBT_H__ */
......@@ -31,6 +31,7 @@
#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_attr_remote.h"
#include "xfs_defer.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
......@@ -409,13 +410,17 @@ xreap_agextent_iter(
/*
* Use deferred frees to get rid of the old btree blocks to try to
* minimize the window in which we could crash and lose the old blocks.
* Add a defer ops barrier every other extent to avoid stressing the
* system with large EFIs.
*/
error = __xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo,
error = xfs_free_extent_later(sc->tp, fsbno, *aglenp, rs->oinfo,
rs->resv, true);
if (error)
return error;
rs->deferred++;
if (rs->deferred % 2 == 0)
xfs_defer_add_barrier(sc->tp);
return 0;
}
......
......@@ -1332,6 +1332,43 @@ TRACE_EVENT(xrep_ialloc_insert,
__entry->freemask)
)
DECLARE_EVENT_CLASS(xrep_newbt_extent_class,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agblock_t agbno, xfs_extlen_t len,
int64_t owner),
TP_ARGS(mp, agno, agbno, len, owner),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
__field(int64_t, owner)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
__entry->agbno = agbno;
__entry->len = len;
__entry->owner = owner;
),
TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x owner 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->agbno,
__entry->len,
__entry->owner)
);
#define DEFINE_NEWBT_EXTENT_EVENT(name) \
DEFINE_EVENT(xrep_newbt_extent_class, name, \
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
xfs_agblock_t agbno, xfs_extlen_t len, \
int64_t owner), \
TP_ARGS(mp, agno, agbno, len, owner))
DEFINE_NEWBT_EXTENT_EVENT(xrep_newbt_alloc_ag_blocks);
DEFINE_NEWBT_EXTENT_EVENT(xrep_newbt_alloc_file_blocks);
DEFINE_NEWBT_EXTENT_EVENT(xrep_newbt_free_blocks);
DEFINE_NEWBT_EXTENT_EVENT(xrep_newbt_claim_block);
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */
......
......@@ -453,7 +453,7 @@ xfs_extent_free_finish_item(
struct xfs_extent *extp;
uint next_extent;
xfs_agblock_t agbno;
int error;
int error = 0;
xefi = container_of(item, struct xfs_extent_free_item, xefi_list);
agbno = XFS_FSB_TO_AGBNO(mp, xefi->xefi_startblock);
......@@ -473,9 +473,10 @@ xfs_extent_free_finish_item(
* the existing EFI, and so we need to copy all the unprocessed extents
* in this EFI to the EFD so this works correctly.
*/
error = __xfs_free_extent(tp, xefi->xefi_pag, agbno,
xefi->xefi_blockcount, &oinfo, xefi->xefi_agresv,
xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);
if (!(xefi->xefi_flags & XFS_EFI_CANCELLED))
error = __xfs_free_extent(tp, xefi->xefi_pag, agbno,
xefi->xefi_blockcount, &oinfo, xefi->xefi_agresv,
xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);
if (error == -EAGAIN) {
xfs_efd_from_efi(efdp);
return error;
......
......@@ -618,7 +618,7 @@ xfs_reflink_cancel_cow_blocks(
error = xfs_free_extent_later(*tpp, del.br_startblock,
del.br_blockcount, NULL,
XFS_AG_RESV_NONE);
XFS_AG_RESV_NONE, false);
if (error)
break;
......
......@@ -2551,6 +2551,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class,
__field(dev_t, dev)
__field(int, type)
__field(void *, intent)
__field(unsigned int, flags)
__field(char, committed)
__field(int, nr)
),
......@@ -2558,13 +2559,15 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_class,
__entry->dev = mp ? mp->m_super->s_dev : 0;
__entry->type = dfp->dfp_type;
__entry->intent = dfp->dfp_intent;
__entry->flags = dfp->dfp_flags;
__entry->committed = dfp->dfp_done != NULL;
__entry->nr = dfp->dfp_count;
),
TP_printk("dev %d:%d optype %d intent %p committed %d nr %d",
TP_printk("dev %d:%d optype %d intent %p flags %s committed %d nr %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->type,
__entry->intent,
__print_flags(__entry->flags, "|", XFS_DEFER_PENDING_STRINGS),
__entry->committed,
__entry->nr)
)
......@@ -2675,6 +2678,9 @@ DEFINE_DEFER_PENDING_EVENT(xfs_defer_cancel_list);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_relog_intent);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_isolate_paused);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_item_pause);
DEFINE_DEFER_PENDING_EVENT(xfs_defer_item_unpause);
#define DEFINE_BMAP_FREE_DEFERRED_EVENT DEFINE_PHYS_EXTENT_DEFERRED_EVENT
DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_defer);
......@@ -2692,6 +2698,7 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_item_class,
__field(void *, intent)
__field(void *, item)
__field(char, committed)
__field(unsigned int, flags)
__field(int, nr)
),
TP_fast_assign(
......@@ -2700,13 +2707,15 @@ DECLARE_EVENT_CLASS(xfs_defer_pending_item_class,
__entry->intent = dfp->dfp_intent;
__entry->item = item;
__entry->committed = dfp->dfp_done != NULL;
__entry->flags = dfp->dfp_flags;
__entry->nr = dfp->dfp_count;
),
TP_printk("dev %d:%d optype %d intent %p item %p committed %d nr %d",
TP_printk("dev %d:%d optype %d intent %p item %p flags %s committed %d nr %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->type,
__entry->intent,
__entry->item,
__print_flags(__entry->flags, "|", XFS_DEFER_PENDING_STRINGS),
__entry->committed,
__entry->nr)
)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment