Commit e3042be3 authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: automatic freeing of freshly allocated unwritten space

As mentioned in the previous commit, online repair wants to allocate
space to write out a new metadata structure, and it also wants to hedge
against system crashes during repairs by logging (and later cancelling)
EFIs to free the space if we crash before committing the new data
structure.

Therefore, create a trio of functions to schedule automatic reaping of
freshly allocated unwritten space.  xfs_alloc_schedule_autoreap creates
a paused EFI representing the space we just allocated.  Once the
allocations are made and the autoreaps scheduled, we can start writing
to disk.

If the writes succeed, xfs_alloc_cancel_autoreap marks the EFI work
items as stale and unpauses the pending deferred work item.  Assuming
that's done in the same transaction that commits the new structure into
the filesystem, we guarantee that either the new object is fully
visible, or that all the space gets reclaimed.

If the writes succeed but only part of an extent was used, repair must
call the same _cancel_autoreap function to kill the first EFI and then
log a new EFI to free the unused space.  The first EFI is already
committed, so it cannot be changed.

For full extents that aren't used, xfs_alloc_commit_autoreap will
unpause the EFI, which results in the space being freed during the next
_defer_finish cycle.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 4c88fef3
...@@ -2522,14 +2522,15 @@ xfs_defer_agfl_block( ...@@ -2522,14 +2522,15 @@ xfs_defer_agfl_block(
* Add the extent to the list of extents to be free at transaction end. * Add the extent to the list of extents to be free at transaction end.
* The list is maintained sorted (by block number). * The list is maintained sorted (by block number).
*/ */
int static int
xfs_free_extent_later( xfs_defer_extent_free(
struct xfs_trans *tp, struct xfs_trans *tp,
xfs_fsblock_t bno, xfs_fsblock_t bno,
xfs_filblks_t len, xfs_filblks_t len,
const struct xfs_owner_info *oinfo, const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type, enum xfs_ag_resv_type type,
bool skip_discard) bool skip_discard,
struct xfs_defer_pending **dfpp)
{ {
struct xfs_extent_free_item *xefi; struct xfs_extent_free_item *xefi;
struct xfs_mount *mp = tp->t_mountp; struct xfs_mount *mp = tp->t_mountp;
...@@ -2577,10 +2578,105 @@ xfs_free_extent_later( ...@@ -2577,10 +2578,105 @@ xfs_free_extent_later(
XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
xfs_extent_free_get_group(mp, xefi); xfs_extent_free_get_group(mp, xefi);
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list); *dfpp = xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
return 0;
}
int
xfs_free_extent_later(
struct xfs_trans *tp,
xfs_fsblock_t bno,
xfs_filblks_t len,
const struct xfs_owner_info *oinfo,
enum xfs_ag_resv_type type,
bool skip_discard)
{
struct xfs_defer_pending *dontcare = NULL;
return xfs_defer_extent_free(tp, bno, len, oinfo, type, skip_discard,
&dontcare);
}
/*
* Set up automatic freeing of unwritten space in the filesystem.
*
* This function attached a paused deferred extent free item to the
* transaction. Pausing means that the EFI will be logged in the next
* transaction commit, but the pending EFI will not be finished until the
* pending item is unpaused.
*
* If the system goes down after the EFI has been persisted to the log but
* before the pending item is unpaused, log recovery will find the EFI, fail to
* find the EFD, and free the space.
*
* If the pending item is unpaused, the next transaction commit will log an EFD
* without freeing the space.
*
* Caller must ensure that the tp, fsbno, len, oinfo, and resv flags of the
* @args structure are set to the relevant values.
*/
int
xfs_alloc_schedule_autoreap(
const struct xfs_alloc_arg *args,
bool skip_discard,
struct xfs_alloc_autoreap *aarp)
{
int error;
error = xfs_defer_extent_free(args->tp, args->fsbno, args->len,
&args->oinfo, args->resv, skip_discard, &aarp->dfp);
if (error)
return error;
xfs_defer_item_pause(args->tp, aarp->dfp);
return 0; return 0;
} }
/*
* Cancel automatic freeing of unwritten space in the filesystem.
*
* Earlier, we created a paused deferred extent free item and attached it to
* this transaction so that we could automatically roll back a new space
* allocation if the system went down. Now we want to cancel the paused work
* item by marking the EFI stale so we don't actually free the space, unpausing
* the pending item and logging an EFD.
*
* The caller generally should have already mapped the space into the ondisk
* filesystem. If the reserved space was partially used, the caller must call
* xfs_free_extent_later to create a new EFI to free the unused space.
*/
void
xfs_alloc_cancel_autoreap(
struct xfs_trans *tp,
struct xfs_alloc_autoreap *aarp)
{
struct xfs_defer_pending *dfp = aarp->dfp;
struct xfs_extent_free_item *xefi;
if (!dfp)
return;
list_for_each_entry(xefi, &dfp->dfp_work, xefi_list)
xefi->xefi_flags |= XFS_EFI_CANCELLED;
xfs_defer_item_unpause(tp, dfp);
}
/*
* Commit automatic freeing of unwritten space in the filesystem.
*
* This unpauses an earlier _schedule_autoreap and commits to freeing the
* allocated space. Call this if none of the reserved space was used.
*/
void
xfs_alloc_commit_autoreap(
struct xfs_trans *tp,
struct xfs_alloc_autoreap *aarp)
{
if (aarp->dfp)
xfs_defer_item_unpause(tp, aarp->dfp);
}
#ifdef DEBUG #ifdef DEBUG
/* /*
* Check if an AGF has a free extent record whose length is equal to * Check if an AGF has a free extent record whose length is equal to
......
...@@ -255,6 +255,18 @@ void xfs_extent_free_get_group(struct xfs_mount *mp, ...@@ -255,6 +255,18 @@ void xfs_extent_free_get_group(struct xfs_mount *mp,
#define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */ #define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */
#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */ #define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */
#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */ #define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */
#define XFS_EFI_CANCELLED (1U << 3) /* dont actually free the space */
struct xfs_alloc_autoreap {
struct xfs_defer_pending *dfp;
};
int xfs_alloc_schedule_autoreap(const struct xfs_alloc_arg *args,
bool skip_discard, struct xfs_alloc_autoreap *aarp);
void xfs_alloc_cancel_autoreap(struct xfs_trans *tp,
struct xfs_alloc_autoreap *aarp);
void xfs_alloc_commit_autoreap(struct xfs_trans *tp,
struct xfs_alloc_autoreap *aarp);
extern struct kmem_cache *xfs_extfree_item_cache; extern struct kmem_cache *xfs_extfree_item_cache;
......
...@@ -453,7 +453,7 @@ xfs_extent_free_finish_item( ...@@ -453,7 +453,7 @@ xfs_extent_free_finish_item(
struct xfs_extent *extp; struct xfs_extent *extp;
uint next_extent; uint next_extent;
xfs_agblock_t agbno; xfs_agblock_t agbno;
int error; int error = 0;
xefi = container_of(item, struct xfs_extent_free_item, xefi_list); xefi = container_of(item, struct xfs_extent_free_item, xefi_list);
agbno = XFS_FSB_TO_AGBNO(mp, xefi->xefi_startblock); agbno = XFS_FSB_TO_AGBNO(mp, xefi->xefi_startblock);
...@@ -473,9 +473,10 @@ xfs_extent_free_finish_item( ...@@ -473,9 +473,10 @@ xfs_extent_free_finish_item(
* the existing EFI, and so we need to copy all the unprocessed extents * the existing EFI, and so we need to copy all the unprocessed extents
* in this EFI to the EFD so this works correctly. * in this EFI to the EFD so this works correctly.
*/ */
error = __xfs_free_extent(tp, xefi->xefi_pag, agbno, if (!(xefi->xefi_flags & XFS_EFI_CANCELLED))
xefi->xefi_blockcount, &oinfo, xefi->xefi_agresv, error = __xfs_free_extent(tp, xefi->xefi_pag, agbno,
xefi->xefi_flags & XFS_EFI_SKIP_DISCARD); xefi->xefi_blockcount, &oinfo, xefi->xefi_agresv,
xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);
if (error == -EAGAIN) { if (error == -EAGAIN) {
xfs_efd_from_efi(efdp); xfs_efd_from_efi(efdp);
return error; return error;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment