Commit 00fd1d56 authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: redesign the reflink remap loop to fix blkres depletion crash

The existing reflink remapping loop has some structural problems that
need addressing:

The biggest problem is that we create one transaction for each extent in
the source file without accounting for the number of mappings there are
for the same range in the destination file.  In other words, we don't
know the number of remap operations that will be necessary and we
therefore cannot guess the block reservation required.  On highly
fragmented filesystems (e.g. ones with active dedupe) we guess wrong,
run out of block reservation, and fail.

The second problem is that we don't actually use the bmap intents to
their full potential -- instead of calling bunmapi directly and having
to deal with its backwards operation, we could call the deferred ops
xfs_bmap_unmap_extent and xfs_refcount_decrease_extent instead.  This
makes the frontend loop much simpler.

Solve all of these problems by refactoring the remapping loops so that
we only perform one remapping operation per transaction, and each
operation only tries to remap a single extent from source to dest.
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: default avatarBrian Foster <bfoster@redhat.com>
Reported-by: default avatarEdwin Török <edwin@etorok.net>
Tested-by: default avatarEdwin Török <edwin@etorok.net>
parent 877f58f5
......@@ -158,6 +158,13 @@ static inline int xfs_bmapi_whichfork(int bmapi_flags)
{ BMAP_ATTRFORK, "ATTR" }, \
{ BMAP_COWFORK, "COW" }
/* Return true if the extent is an allocated extent, written or not. */
static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec)
{
return irec->br_startblock != HOLESTARTBLOCK &&
irec->br_startblock != DELAYSTARTBLOCK &&
!isnullstartblock(irec->br_startblock);
}
/*
* Return true if the extent is a real, allocated extent, or false if it is a
......@@ -165,10 +172,8 @@ static inline int xfs_bmapi_whichfork(int bmapi_flags)
*/
static inline bool xfs_bmap_is_written_extent(struct xfs_bmbt_irec *irec)
{
return irec->br_state != XFS_EXT_UNWRITTEN &&
irec->br_startblock != HOLESTARTBLOCK &&
irec->br_startblock != DELAYSTARTBLOCK &&
!isnullstartblock(irec->br_startblock);
return xfs_bmap_is_real_extent(irec) &&
irec->br_state != XFS_EXT_UNWRITTEN;
}
/*
......
This diff is collapsed.
......@@ -3052,8 +3052,7 @@ DEFINE_EVENT(xfs_inode_irec_class, name, \
DEFINE_INODE_EVENT(xfs_reflink_set_inode_flag);
DEFINE_INODE_EVENT(xfs_reflink_unset_inode_flag);
DEFINE_ITRUNC_EVENT(xfs_reflink_update_inode_size);
DEFINE_IMAP_EVENT(xfs_reflink_remap_imap);
TRACE_EVENT(xfs_reflink_remap_blocks_loop,
TRACE_EVENT(xfs_reflink_remap_blocks,
TP_PROTO(struct xfs_inode *src, xfs_fileoff_t soffset,
xfs_filblks_t len, struct xfs_inode *dest,
xfs_fileoff_t doffset),
......@@ -3084,59 +3083,14 @@ TRACE_EVENT(xfs_reflink_remap_blocks_loop,
__entry->dest_ino,
__entry->dest_lblk)
);
TRACE_EVENT(xfs_reflink_punch_range,
TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk,
xfs_extlen_t len),
TP_ARGS(ip, lblk, len),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_fileoff_t, lblk)
__field(xfs_extlen_t, len)
),
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
__entry->lblk = lblk;
__entry->len = len;
),
TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->lblk,
__entry->len)
);
TRACE_EVENT(xfs_reflink_remap,
TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t lblk,
xfs_extlen_t len, xfs_fsblock_t new_pblk),
TP_ARGS(ip, lblk, len, new_pblk),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_fileoff_t, lblk)
__field(xfs_extlen_t, len)
__field(xfs_fsblock_t, new_pblk)
),
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
__entry->lblk = lblk;
__entry->len = len;
__entry->new_pblk = new_pblk;
),
TP_printk("dev %d:%d ino 0x%llx lblk 0x%llx len 0x%x new_pblk %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->lblk,
__entry->len,
__entry->new_pblk)
);
DEFINE_DOUBLE_IO_EVENT(xfs_reflink_remap_range);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_set_inode_flag_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_update_inode_size_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_blocks_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_remap_extent_error);
DEFINE_INODE_IREC_EVENT(xfs_reflink_remap_extent_src);
DEFINE_INODE_IREC_EVENT(xfs_reflink_remap_extent_dest);
/* dedupe tracepoints */
DEFINE_DOUBLE_IO_EVENT(xfs_reflink_compare_extents);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment