Commit f65306ea authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: map an inode's offset to an exact physical block

Teach the bmap routine to know how to map a range of file blocks to a
specific range of physical blocks, instead of simply allocating fresh
blocks.  This enables reflink to map a file to blocks that are already
in use.
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 77d61fe4
...@@ -3876,6 +3876,63 @@ xfs_bmap_btalloc( ...@@ -3876,6 +3876,63 @@ xfs_bmap_btalloc(
return 0; return 0;
} }
/*
* For a remap operation, just "allocate" an extent at the address that the
* caller passed in, and ensure that the AGFL is the right size. The caller
* will then map the "allocated" extent into the file somewhere.
*/
STATIC int
xfs_bmap_remap_alloc(
struct xfs_bmalloca *ap)
{
struct xfs_trans *tp = ap->tp;
struct xfs_mount *mp = tp->t_mountp;
xfs_agblock_t bno;
struct xfs_alloc_arg args;
int error;
/*
* validate that the block number is legal - the enables us to detect
* and handle a silent filesystem corruption rather than crashing.
*/
memset(&args, 0, sizeof(struct xfs_alloc_arg));
args.tp = ap->tp;
args.mp = ap->tp->t_mountp;
bno = *ap->firstblock;
args.agno = XFS_FSB_TO_AGNO(mp, bno);
args.agbno = XFS_FSB_TO_AGBNO(mp, bno);
if (args.agno >= mp->m_sb.sb_agcount ||
args.agbno >= mp->m_sb.sb_agblocks)
return -EFSCORRUPTED;
/* "Allocate" the extent from the range we passed in. */
trace_xfs_bmap_remap_alloc(ap->ip, *ap->firstblock, ap->length);
ap->blkno = bno;
ap->ip->i_d.di_nblocks += ap->length;
xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
/* Fix the freelist, like a real allocator does. */
args.datatype = ap->datatype;
args.pag = xfs_perag_get(args.mp, args.agno);
ASSERT(args.pag);
/*
* The freelist fixing code will decline the allocation if
* the size and shape of the free space doesn't allow for
* allocating the extent and updating all the metadata that
* happens during an allocation. We're remapping, not
* allocating, so skip that check by pretending to be freeing.
*/
error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
if (error)
goto error0;
error0:
xfs_perag_put(args.pag);
if (error)
trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_);
return error;
}
/* /*
* xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file. * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
* It figures out where to ask the underlying allocator to put the new extent. * It figures out where to ask the underlying allocator to put the new extent.
...@@ -3884,6 +3941,8 @@ STATIC int ...@@ -3884,6 +3941,8 @@ STATIC int
xfs_bmap_alloc( xfs_bmap_alloc(
struct xfs_bmalloca *ap) /* bmap alloc argument struct */ struct xfs_bmalloca *ap) /* bmap alloc argument struct */
{ {
if (ap->flags & XFS_BMAPI_REMAP)
return xfs_bmap_remap_alloc(ap);
if (XFS_IS_REALTIME_INODE(ap->ip) && if (XFS_IS_REALTIME_INODE(ap->ip) &&
xfs_alloc_is_userdata(ap->datatype)) xfs_alloc_is_userdata(ap->datatype))
return xfs_bmap_rtalloc(ap); return xfs_bmap_rtalloc(ap);
...@@ -4442,6 +4501,9 @@ xfs_bmapi_write( ...@@ -4442,6 +4501,9 @@ xfs_bmapi_write(
ASSERT(len > 0); ASSERT(len > 0);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK);
ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP));
ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP));
/* zeroing is for currently only for data extents, not metadata */ /* zeroing is for currently only for data extents, not metadata */
ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
...@@ -4502,6 +4564,12 @@ xfs_bmapi_write( ...@@ -4502,6 +4564,12 @@ xfs_bmapi_write(
inhole = eof || bma.got.br_startoff > bno; inhole = eof || bma.got.br_startoff > bno;
wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
/*
* Make sure we only reflink into a hole.
*/
if (flags & XFS_BMAPI_REMAP)
ASSERT(inhole);
/* /*
* First, deal with the hole before the allocated space * First, deal with the hole before the allocated space
* that we found, if any. * that we found, if any.
......
...@@ -97,6 +97,13 @@ struct xfs_extent_free_item ...@@ -97,6 +97,13 @@ struct xfs_extent_free_item
*/ */
#define XFS_BMAPI_ZERO 0x080 #define XFS_BMAPI_ZERO 0x080
/*
* Map the inode offset to the block given in ap->firstblock. Primarily
* used for reflink. The range must be in a hole, and this flag cannot be
* turned on with PREALLOC or CONVERT, and cannot be used on the attr fork.
*/
#define XFS_BMAPI_REMAP 0x100
#define XFS_BMAPI_FLAGS \ #define XFS_BMAPI_FLAGS \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \ { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
{ XFS_BMAPI_METADATA, "METADATA" }, \ { XFS_BMAPI_METADATA, "METADATA" }, \
...@@ -105,7 +112,8 @@ struct xfs_extent_free_item ...@@ -105,7 +112,8 @@ struct xfs_extent_free_item
{ XFS_BMAPI_IGSTATE, "IGSTATE" }, \ { XFS_BMAPI_IGSTATE, "IGSTATE" }, \
{ XFS_BMAPI_CONTIG, "CONTIG" }, \ { XFS_BMAPI_CONTIG, "CONTIG" }, \
{ XFS_BMAPI_CONVERT, "CONVERT" }, \ { XFS_BMAPI_CONVERT, "CONVERT" }, \
{ XFS_BMAPI_ZERO, "ZERO" } { XFS_BMAPI_ZERO, "ZERO" }, \
{ XFS_BMAPI_REMAP, "REMAP" }
static inline int xfs_bmapi_aflag(int w) static inline int xfs_bmapi_aflag(int w)
......
...@@ -2968,6 +2968,60 @@ TRACE_EVENT(xfs_refcount_finish_one_leftover, ...@@ -2968,6 +2968,60 @@ TRACE_EVENT(xfs_refcount_finish_one_leftover,
__entry->new_len) __entry->new_len)
); );
/* simple inode-based error/%ip tracepoint class */
DECLARE_EVENT_CLASS(xfs_inode_error_class,
TP_PROTO(struct xfs_inode *ip, int error, unsigned long caller_ip),
TP_ARGS(ip, error, caller_ip),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(int, error)
__field(unsigned long, caller_ip)
),
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
__entry->error = error;
__entry->caller_ip = caller_ip;
),
TP_printk("dev %d:%d ino %llx error %d caller %ps",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->error,
(char *)__entry->caller_ip)
);
#define DEFINE_INODE_ERROR_EVENT(name) \
DEFINE_EVENT(xfs_inode_error_class, name, \
TP_PROTO(struct xfs_inode *ip, int error, \
unsigned long caller_ip), \
TP_ARGS(ip, error, caller_ip))
/* reflink allocator */
TRACE_EVENT(xfs_bmap_remap_alloc,
TP_PROTO(struct xfs_inode *ip, xfs_fsblock_t fsbno,
xfs_extlen_t len),
TP_ARGS(ip, fsbno, len),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_fsblock_t, fsbno)
__field(xfs_extlen_t, len)
),
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
__entry->fsbno = fsbno;
__entry->len = len;
),
TP_printk("dev %d:%d ino 0x%llx fsbno 0x%llx len %x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->fsbno,
__entry->len)
);
DEFINE_INODE_ERROR_EVENT(xfs_bmap_remap_alloc_error);
#endif /* _TRACE_XFS_H */ #endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_PATH
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment