Commit 3ba020be authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Dave Chinner

xfs: optimize writes to reflink files

Instead of reserving space as the first thing in write_begin move it past
reading the extent in the data fork.  That way we only have to read from
the data fork once and can reuse that information for trimming the extent
to the shared/unshared boundary.  Additionally this allows to easily
limit the actual write size to said boundary, and avoid a roundtrip on the
ilock.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Reviewed-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: default avatarBrian Foster <bfoster@redhat.com>
Signed-off-by: default avatarDave Chinner <david@fromorbit.com>
parent 5f9268ca
...@@ -566,6 +566,17 @@ xfs_file_iomap_begin_delay( ...@@ -566,6 +566,17 @@ xfs_file_iomap_begin_delay(
xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx, xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx,
&got, &prev); &got, &prev);
if (!eof && got.br_startoff <= offset_fsb) { if (!eof && got.br_startoff <= offset_fsb) {
if (xfs_is_reflink_inode(ip)) {
bool shared;
end_fsb = min(XFS_B_TO_FSB(mp, offset + count),
maxbytes_fsb);
xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb);
error = xfs_reflink_reserve_cow(ip, &got, &shared);
if (error)
goto out_unlock;
}
trace_xfs_iomap_found(ip, offset, count, 0, &got); trace_xfs_iomap_found(ip, offset, count, 0, &got);
goto done; goto done;
} }
...@@ -961,19 +972,13 @@ xfs_file_iomap_begin( ...@@ -961,19 +972,13 @@ xfs_file_iomap_begin(
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
struct xfs_bmbt_irec imap; struct xfs_bmbt_irec imap;
xfs_fileoff_t offset_fsb, end_fsb; xfs_fileoff_t offset_fsb, end_fsb;
bool shared, trimmed;
int nimaps = 1, error = 0; int nimaps = 1, error = 0;
bool shared = false, trimmed = false;
unsigned lockmode; unsigned lockmode;
if (XFS_FORCED_SHUTDOWN(mp)) if (XFS_FORCED_SHUTDOWN(mp))
return -EIO; return -EIO;
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
error = xfs_reflink_reserve_cow_range(ip, offset, length);
if (error < 0)
return error;
}
if ((flags & IOMAP_WRITE) && !IS_DAX(inode) && if ((flags & IOMAP_WRITE) && !IS_DAX(inode) &&
!xfs_get_extsz_hint(ip)) { !xfs_get_extsz_hint(ip)) {
/* Reserve delalloc blocks for regular writeback. */ /* Reserve delalloc blocks for regular writeback. */
...@@ -981,7 +986,16 @@ xfs_file_iomap_begin( ...@@ -981,7 +986,16 @@ xfs_file_iomap_begin(
iomap); iomap);
} }
lockmode = xfs_ilock_data_map_shared(ip); /*
* COW writes will allocate delalloc space, so we need to make sure
* to take the lock exclusively here.
*/
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
lockmode = XFS_ILOCK_EXCL;
xfs_ilock(ip, XFS_ILOCK_EXCL);
} else {
lockmode = xfs_ilock_data_map_shared(ip);
}
ASSERT(offset <= mp->m_super->s_maxbytes); ASSERT(offset <= mp->m_super->s_maxbytes);
if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
...@@ -991,19 +1005,24 @@ xfs_file_iomap_begin( ...@@ -991,19 +1005,24 @@ xfs_file_iomap_begin(
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, 0); &nimaps, 0);
if (error) { if (error)
xfs_iunlock(ip, lockmode); goto out_unlock;
return error;
}
if (flags & (IOMAP_WRITE | IOMAP_ZERO | IOMAP_REPORT)) { if (flags & IOMAP_REPORT) {
/* Trim the mapping to the nearest shared extent boundary. */ /* Trim the mapping to the nearest shared extent boundary. */
error = xfs_reflink_trim_around_shared(ip, &imap, &shared, error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
&trimmed); &trimmed);
if (error) { if (error)
xfs_iunlock(ip, lockmode); goto out_unlock;
return error; }
}
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
error = xfs_reflink_reserve_cow(ip, &imap, &shared);
if (error)
goto out_unlock;
end_fsb = imap.br_startoff + imap.br_blockcount;
length = XFS_FSB_TO_B(mp, end_fsb) - offset;
} }
if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) { if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
...@@ -1042,6 +1061,9 @@ xfs_file_iomap_begin( ...@@ -1042,6 +1061,9 @@ xfs_file_iomap_begin(
if (shared) if (shared)
iomap->flags |= IOMAP_F_SHARED; iomap->flags |= IOMAP_F_SHARED;
return 0; return 0;
out_unlock:
xfs_iunlock(ip, lockmode);
return error;
} }
static int static int
......
...@@ -228,50 +228,54 @@ xfs_reflink_trim_around_shared( ...@@ -228,50 +228,54 @@ xfs_reflink_trim_around_shared(
} }
} }
/* Create a CoW reservation for a range of blocks within a file. */ /*
static int * Trim the passed in imap to the next shared/unshared extent boundary, and
__xfs_reflink_reserve_cow( * if imap->br_startoff points to a shared extent reserve space for it in the
* COW fork. In this case *shared is set to true, else to false.
*
* Note that imap will always contain the block numbers for the existing blocks
* in the data fork, as the upper layers need them for read-modify-write
* operations.
*/
int
xfs_reflink_reserve_cow(
struct xfs_inode *ip, struct xfs_inode *ip,
xfs_fileoff_t *offset_fsb, struct xfs_bmbt_irec *imap,
xfs_fileoff_t end_fsb, bool *shared)
bool *skipped)
{ {
struct xfs_bmbt_irec got, prev, imap; struct xfs_bmbt_irec got, prev;
xfs_fileoff_t orig_end_fsb; xfs_fileoff_t end_fsb, orig_end_fsb;
int nimaps, eof = 0, error = 0; int eof = 0, error = 0;
bool shared = false, trimmed = false; bool trimmed;
xfs_extnum_t idx; xfs_extnum_t idx;
xfs_extlen_t align; xfs_extlen_t align;
/* Already reserved? Skip the refcount btree access. */ /*
xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx, * Search the COW fork extent list first. This serves two purposes:
* first this implement the speculative preallocation using cowextisze,
* so that we also unshared block adjacent to shared blocks instead
* of just the shared blocks themselves. Second the lookup in the
* extent list is generally faster than going out to the shared extent
* tree.
*/
xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx,
&got, &prev); &got, &prev);
if (!eof && got.br_startoff <= *offset_fsb) { if (!eof && got.br_startoff <= imap->br_startoff) {
end_fsb = orig_end_fsb = got.br_startoff + got.br_blockcount; trace_xfs_reflink_cow_found(ip, imap);
trace_xfs_reflink_cow_found(ip, &got); xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
goto done;
}
/* Read extent from the source file. */ *shared = true;
nimaps = 1; return 0;
error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb, }
&imap, &nimaps, 0);
if (error)
goto out_unlock;
ASSERT(nimaps == 1);
/* Trim the mapping to the nearest shared extent boundary. */ /* Trim the mapping to the nearest shared extent boundary. */
error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed); error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
if (error) if (error)
goto out_unlock; return error;
end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount;
/* Not shared? Just report the (potentially capped) extent. */ /* Not shared? Just report the (potentially capped) extent. */
if (!shared) { if (!*shared)
*skipped = true; return 0;
goto done;
}
/* /*
* Fork all the shared blocks from our write offset until the end of * Fork all the shared blocks from our write offset until the end of
...@@ -279,72 +283,38 @@ __xfs_reflink_reserve_cow( ...@@ -279,72 +283,38 @@ __xfs_reflink_reserve_cow(
*/ */
error = xfs_qm_dqattach_locked(ip, 0); error = xfs_qm_dqattach_locked(ip, 0);
if (error) if (error)
goto out_unlock; return error;
end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount;
align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip)); align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip));
if (align) if (align)
end_fsb = roundup_64(end_fsb, align); end_fsb = roundup_64(end_fsb, align);
retry: retry:
error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb, error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
end_fsb - *offset_fsb, &got, end_fsb - imap->br_startoff, &got, &prev, &idx, eof);
&prev, &idx, eof);
switch (error) { switch (error) {
case 0: case 0:
break; break;
case -ENOSPC: case -ENOSPC:
case -EDQUOT: case -EDQUOT:
/* retry without any preallocation */ /* retry without any preallocation */
trace_xfs_reflink_cow_enospc(ip, &imap); trace_xfs_reflink_cow_enospc(ip, imap);
if (end_fsb != orig_end_fsb) { if (end_fsb != orig_end_fsb) {
end_fsb = orig_end_fsb; end_fsb = orig_end_fsb;
goto retry; goto retry;
} }
/*FALLTHRU*/ /*FALLTHRU*/
default: default:
goto out_unlock; return error;
} }
if (end_fsb != orig_end_fsb) if (end_fsb != orig_end_fsb)
xfs_inode_set_cowblocks_tag(ip); xfs_inode_set_cowblocks_tag(ip);
trace_xfs_reflink_cow_alloc(ip, &got); trace_xfs_reflink_cow_alloc(ip, &got);
done: return 0;
*offset_fsb = end_fsb;
out_unlock:
return error;
}
/* Create a CoW reservation for part of a file. */
int
xfs_reflink_reserve_cow_range(
struct xfs_inode *ip,
xfs_off_t offset,
xfs_off_t count)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t offset_fsb, end_fsb;
bool skipped = false;
int error = 0;
trace_xfs_reflink_reserve_cow_range(ip, offset, count);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
end_fsb = XFS_B_TO_FSB(mp, offset + count);
xfs_ilock(ip, XFS_ILOCK_EXCL);
while (offset_fsb < end_fsb) {
error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb,
&skipped);
if (error) {
trace_xfs_reflink_reserve_cow_range_error(ip, error,
_RET_IP_);
break;
}
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
} }
/* Allocate all CoW reservations covering a range of blocks in a file. */ /* Allocate all CoW reservations covering a range of blocks in a file. */
...@@ -359,9 +329,8 @@ __xfs_reflink_allocate_cow( ...@@ -359,9 +329,8 @@ __xfs_reflink_allocate_cow(
struct xfs_defer_ops dfops; struct xfs_defer_ops dfops;
struct xfs_trans *tp; struct xfs_trans *tp;
xfs_fsblock_t first_block; xfs_fsblock_t first_block;
xfs_fileoff_t next_fsb;
int nimaps = 1, error; int nimaps = 1, error;
bool skipped = false; bool shared;
xfs_defer_init(&dfops, &first_block); xfs_defer_init(&dfops, &first_block);
...@@ -372,33 +341,38 @@ __xfs_reflink_allocate_cow( ...@@ -372,33 +341,38 @@ __xfs_reflink_allocate_cow(
xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_ilock(ip, XFS_ILOCK_EXCL);
next_fsb = *offset_fsb; /* Read extent from the source file. */
error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped); nimaps = 1;
error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
&imap, &nimaps, 0);
if (error)
goto out_unlock;
ASSERT(nimaps == 1);
error = xfs_reflink_reserve_cow(ip, &imap, &shared);
if (error) if (error)
goto out_trans_cancel; goto out_trans_cancel;
if (skipped) { if (!shared) {
*offset_fsb = next_fsb; *offset_fsb = imap.br_startoff + imap.br_blockcount;
goto out_trans_cancel; goto out_trans_cancel;
} }
xfs_trans_ijoin(tp, ip, 0); xfs_trans_ijoin(tp, ip, 0);
error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb, error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
XFS_BMAPI_COWFORK, &first_block, XFS_BMAPI_COWFORK, &first_block,
XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK), XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
&imap, &nimaps, &dfops); &imap, &nimaps, &dfops);
if (error) if (error)
goto out_trans_cancel; goto out_trans_cancel;
/* We might not have been able to map the whole delalloc extent */
*offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb);
error = xfs_defer_finish(&tp, &dfops, NULL); error = xfs_defer_finish(&tp, &dfops, NULL);
if (error) if (error)
goto out_trans_cancel; goto out_trans_cancel;
error = xfs_trans_commit(tp); error = xfs_trans_commit(tp);
*offset_fsb = imap.br_startoff + imap.br_blockcount;
out_unlock: out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error; return error;
......
...@@ -26,8 +26,8 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno, ...@@ -26,8 +26,8 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno,
extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed); struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed);
extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip, extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
xfs_off_t offset, xfs_off_t count); struct xfs_bmbt_irec *imap, bool *shared);
extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
xfs_off_t offset, xfs_off_t count); xfs_off_t offset, xfs_off_t count);
extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset, extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
......
...@@ -3346,7 +3346,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc); ...@@ -3346,7 +3346,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range); DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range); DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write); DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
...@@ -3358,7 +3358,6 @@ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow); ...@@ -3358,7 +3358,6 @@ DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece); DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_reserve_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error); DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment