Commit 174edb0e authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: store in-progress CoW allocations in the refcount btree

Due to the way the CoW algorithm in XFS works, there's an interval
during which blocks allocated to handle a CoW can be lost -- if the FS
goes down after the blocks are allocated but before the block
remapping takes place.  This is exacerbated by the cowextsz hint --
allocated reservations can sit around for a while, waiting to get
used.

Since the refcount btree doesn't normally store records with refcount
of 1, we can use it to record these in-progress extents.  In-progress
blocks cannot be shared because they're not user-visible, so there
shouldn't be any conflicts with other programs.  This is a better
solution than holding EFIs during writeback because (a) EFIs can't be
relogged currently, (b) even if they could, EFIs are bound by
available log space, which puts an unnecessary upper bound on how much
CoW we can have in flight, and (c) we already have a mechanism to
track blocks.

At mount time, read the refcount records and free anything we find
with a refcount of 1 because those were in-progress when the FS went
down.
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 5e7e605c
...@@ -4631,6 +4631,17 @@ xfs_bmapi_write( ...@@ -4631,6 +4631,17 @@ xfs_bmapi_write(
goto error0; goto error0;
if (bma.blkno == NULLFSBLOCK) if (bma.blkno == NULLFSBLOCK)
break; break;
/*
* If this is a CoW allocation, record the data in
* the refcount btree for orphan recovery.
*/
if (whichfork == XFS_COW_FORK) {
error = xfs_refcount_alloc_cow_extent(mp, dfops,
bma.blkno, bma.length);
if (error)
goto error0;
}
} }
/* Deal with the allocated space we found. */ /* Deal with the allocated space we found. */
......
...@@ -1375,7 +1375,8 @@ struct xfs_owner_info { ...@@ -1375,7 +1375,8 @@ struct xfs_owner_info {
#define XFS_RMAP_OWN_INOBT (-6ULL) /* Inode btree blocks */ #define XFS_RMAP_OWN_INOBT (-6ULL) /* Inode btree blocks */
#define XFS_RMAP_OWN_INODES (-7ULL) /* Inode chunk */ #define XFS_RMAP_OWN_INODES (-7ULL) /* Inode chunk */
#define XFS_RMAP_OWN_REFC (-8ULL) /* refcount tree */ #define XFS_RMAP_OWN_REFC (-8ULL) /* refcount tree */
#define XFS_RMAP_OWN_MIN (-9ULL) /* guard */ #define XFS_RMAP_OWN_COW (-9ULL) /* cow allocations */
#define XFS_RMAP_OWN_MIN (-10ULL) /* guard */
#define XFS_RMAP_NON_INODE_OWNER(owner) (!!((owner) & (1ULL << 63))) #define XFS_RMAP_NON_INODE_OWNER(owner) (!!((owner) & (1ULL << 63)))
...@@ -1477,6 +1478,17 @@ unsigned int xfs_refc_block(struct xfs_mount *mp); ...@@ -1477,6 +1478,17 @@ unsigned int xfs_refc_block(struct xfs_mount *mp);
* data) are not tracked here. Free space is also not tracked here. * data) are not tracked here. Free space is also not tracked here.
* This is consistent with pre-reflink XFS. * This is consistent with pre-reflink XFS.
*/ */
/*
* Extents that are being used to stage a copy on write are stored
* in the refcount btree with a refcount of 1 and the upper bit set
* on the startblock. This speeds up mount time deletion of stale
* staging extents because they're all at the right side of the tree.
*/
#define XFS_REFC_COW_START ((xfs_agblock_t)(1U << 31))
#define REFCNTBT_COWFLAG_BITLEN 1
#define REFCNTBT_AGBLOCK_BITLEN 31
struct xfs_refcount_rec { struct xfs_refcount_rec {
__be32 rc_startblock; /* starting block number */ __be32 rc_startblock; /* starting block number */
__be32 rc_blockcount; /* count of blocks */ __be32 rc_blockcount; /* count of blocks */
......
This diff is collapsed.
...@@ -58,4 +58,13 @@ extern int xfs_refcount_find_shared(struct xfs_btree_cur *cur, ...@@ -58,4 +58,13 @@ extern int xfs_refcount_find_shared(struct xfs_btree_cur *cur,
xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno, xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno,
xfs_extlen_t *flen, bool find_end_of_shared); xfs_extlen_t *flen, bool find_end_of_shared);
extern int xfs_refcount_alloc_cow_extent(struct xfs_mount *mp,
struct xfs_defer_ops *dfops, xfs_fsblock_t fsb,
xfs_extlen_t len);
extern int xfs_refcount_free_cow_extent(struct xfs_mount *mp,
struct xfs_defer_ops *dfops, xfs_fsblock_t fsb,
xfs_extlen_t len);
extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
xfs_agnumber_t agno);
#endif /* __XFS_REFCOUNT_H__ */ #endif /* __XFS_REFCOUNT_H__ */
...@@ -44,6 +44,7 @@ ...@@ -44,6 +44,7 @@
#include "xfs_sysfs.h" #include "xfs_sysfs.h"
#include "xfs_rmap_btree.h" #include "xfs_rmap_btree.h"
#include "xfs_refcount_btree.h" #include "xfs_refcount_btree.h"
#include "xfs_reflink.h"
static DEFINE_MUTEX(xfs_uuid_table_mutex); static DEFINE_MUTEX(xfs_uuid_table_mutex);
...@@ -985,10 +986,21 @@ xfs_mountfs( ...@@ -985,10 +986,21 @@ xfs_mountfs(
if (error) if (error)
xfs_warn(mp, xfs_warn(mp,
"Unable to allocate reserve blocks. Continuing without reserve pool."); "Unable to allocate reserve blocks. Continuing without reserve pool.");
/* Recover any CoW blocks that never got remapped. */
error = xfs_reflink_recover_cow(mp);
if (error) {
xfs_err(mp,
"Error %d recovering leftover CoW allocations.", error);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
goto out_quota;
}
} }
return 0; return 0;
out_quota:
xfs_qm_unmount_quotas(mp);
out_rtunmount: out_rtunmount:
xfs_rtunmount_inodes(mp); xfs_rtunmount_inodes(mp);
out_rele_rip: out_rele_rip:
......
...@@ -502,6 +502,18 @@ xfs_cui_recover( ...@@ -502,6 +502,18 @@ xfs_cui_recover(
error = xfs_refcount_decrease_extent( error = xfs_refcount_decrease_extent(
tp->t_mountp, &dfops, &irec); tp->t_mountp, &dfops, &irec);
break; break;
case XFS_REFCOUNT_ALLOC_COW:
error = xfs_refcount_alloc_cow_extent(
tp->t_mountp, &dfops,
irec.br_startblock,
irec.br_blockcount);
break;
case XFS_REFCOUNT_FREE_COW:
error = xfs_refcount_free_cow_extent(
tp->t_mountp, &dfops,
irec.br_startblock,
irec.br_blockcount);
break;
default: default:
ASSERT(0); ASSERT(0);
} }
......
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include "xfs_log.h" #include "xfs_log.h"
#include "xfs_icache.h" #include "xfs_icache.h"
#include "xfs_pnfs.h" #include "xfs_pnfs.h"
#include "xfs_btree.h"
#include "xfs_refcount_btree.h" #include "xfs_refcount_btree.h"
#include "xfs_refcount.h" #include "xfs_refcount.h"
#include "xfs_bmap_btree.h" #include "xfs_bmap_btree.h"
...@@ -563,6 +564,13 @@ xfs_reflink_cancel_cow_blocks( ...@@ -563,6 +564,13 @@ xfs_reflink_cancel_cow_blocks(
xfs_trans_ijoin(*tpp, ip, 0); xfs_trans_ijoin(*tpp, ip, 0);
xfs_defer_init(&dfops, &firstfsb); xfs_defer_init(&dfops, &firstfsb);
/* Free the CoW orphan record. */
error = xfs_refcount_free_cow_extent(ip->i_mount,
&dfops, irec.br_startblock,
irec.br_blockcount);
if (error)
break;
xfs_bmap_add_free(ip->i_mount, &dfops, xfs_bmap_add_free(ip->i_mount, &dfops,
irec.br_startblock, irec.br_blockcount, irec.br_startblock, irec.br_blockcount,
NULL); NULL);
...@@ -719,6 +727,13 @@ xfs_reflink_end_cow( ...@@ -719,6 +727,13 @@ xfs_reflink_end_cow(
irec.br_blockcount = rlen; irec.br_blockcount = rlen;
trace_xfs_reflink_cow_remap_piece(ip, &uirec); trace_xfs_reflink_cow_remap_piece(ip, &uirec);
/* Free the CoW orphan record. */
error = xfs_refcount_free_cow_extent(tp->t_mountp,
&dfops, uirec.br_startblock,
uirec.br_blockcount);
if (error)
goto out_defer;
/* Map the new blocks into the data fork. */ /* Map the new blocks into the data fork. */
error = xfs_bmap_map_extent(tp->t_mountp, &dfops, error = xfs_bmap_map_extent(tp->t_mountp, &dfops,
ip, &uirec); ip, &uirec);
...@@ -755,3 +770,25 @@ xfs_reflink_end_cow( ...@@ -755,3 +770,25 @@ xfs_reflink_end_cow(
trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_); trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
return error; return error;
} }
/*
* Free leftover CoW reservations that didn't get cleaned out.
*/
int
xfs_reflink_recover_cow(
struct xfs_mount *mp)
{
xfs_agnumber_t agno;
int error = 0;
if (!xfs_sb_version_hasreflink(&mp->m_sb))
return 0;
for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
error = xfs_refcount_recover_cow_leftovers(mp, agno);
if (error)
break;
}
return error;
}
...@@ -42,5 +42,6 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, ...@@ -42,5 +42,6 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t count); xfs_off_t count);
extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t count); xfs_off_t count);
extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
#endif /* __XFS_REFLINK_H */ #endif /* __XFS_REFLINK_H */
...@@ -1316,6 +1316,15 @@ xfs_fs_remount( ...@@ -1316,6 +1316,15 @@ xfs_fs_remount(
xfs_restore_resvblks(mp); xfs_restore_resvblks(mp);
xfs_log_work_queue(mp); xfs_log_work_queue(mp);
xfs_queue_eofblocks(mp); xfs_queue_eofblocks(mp);
/* Recover any CoW blocks that never got remapped. */
error = xfs_reflink_recover_cow(mp);
if (error) {
xfs_err(mp,
"Error %d recovering leftover CoW allocations.", error);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
return error;
}
} }
/* rw -> ro */ /* rw -> ro */
......
...@@ -2916,14 +2916,18 @@ DEFINE_AG_ERROR_EVENT(xfs_refcount_update_error); ...@@ -2916,14 +2916,18 @@ DEFINE_AG_ERROR_EVENT(xfs_refcount_update_error);
/* refcount adjustment tracepoints */ /* refcount adjustment tracepoints */
DEFINE_AG_EXTENT_EVENT(xfs_refcount_increase); DEFINE_AG_EXTENT_EVENT(xfs_refcount_increase);
DEFINE_AG_EXTENT_EVENT(xfs_refcount_decrease); DEFINE_AG_EXTENT_EVENT(xfs_refcount_decrease);
DEFINE_AG_EXTENT_EVENT(xfs_refcount_cow_increase);
DEFINE_AG_EXTENT_EVENT(xfs_refcount_cow_decrease);
DEFINE_REFCOUNT_TRIPLE_EXTENT_EVENT(xfs_refcount_merge_center_extents); DEFINE_REFCOUNT_TRIPLE_EXTENT_EVENT(xfs_refcount_merge_center_extents);
DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_modify_extent); DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_modify_extent);
DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_recover_extent);
DEFINE_REFCOUNT_EXTENT_AT_EVENT(xfs_refcount_split_extent); DEFINE_REFCOUNT_EXTENT_AT_EVENT(xfs_refcount_split_extent);
DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(xfs_refcount_merge_left_extent); DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(xfs_refcount_merge_left_extent);
DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(xfs_refcount_merge_right_extent); DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(xfs_refcount_merge_right_extent);
DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(xfs_refcount_find_left_extent); DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(xfs_refcount_find_left_extent);
DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(xfs_refcount_find_right_extent); DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(xfs_refcount_find_right_extent);
DEFINE_AG_ERROR_EVENT(xfs_refcount_adjust_error); DEFINE_AG_ERROR_EVENT(xfs_refcount_adjust_error);
DEFINE_AG_ERROR_EVENT(xfs_refcount_adjust_cow_error);
DEFINE_AG_ERROR_EVENT(xfs_refcount_merge_center_extents_error); DEFINE_AG_ERROR_EVENT(xfs_refcount_merge_center_extents_error);
DEFINE_AG_ERROR_EVENT(xfs_refcount_modify_extent_error); DEFINE_AG_ERROR_EVENT(xfs_refcount_modify_extent_error);
DEFINE_AG_ERROR_EVENT(xfs_refcount_split_extent_error); DEFINE_AG_ERROR_EVENT(xfs_refcount_split_extent_error);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment