Commit 05a630d7 authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: allow unwritten extents in the CoW fork

In the data fork, we only allow extents to perform the following state
transitions:

delay -> real <-> unwritten

There's no way to move directly from a delalloc reservation to an
/unwritten/ allocated extent.  However, for the CoW fork we want to be
able to do the following to each extent:

delalloc -> unwritten -> written -> remapped to data fork

This will help us to avoid a race in the speculative CoW preallocation
code between a first thread that is allocating a CoW extent and a second
thread that is remapping part of a file after a write.  In order to do
this, however, we need two things: first, we have to be able to
transition from da to unwritten, and second the function that converts
between real and unwritten has to be made aware of the cow fork.  Do
both of those things.
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent de14c5f5
...@@ -1850,6 +1850,7 @@ xfs_bmap_add_extent_delay_real( ...@@ -1850,6 +1850,7 @@ xfs_bmap_add_extent_delay_real(
*/ */
trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
xfs_bmbt_set_startblock(ep, new->br_startblock); xfs_bmbt_set_startblock(ep, new->br_startblock);
xfs_bmbt_set_state(ep, new->br_state);
trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
(*nextents)++; (*nextents)++;
...@@ -2188,6 +2189,7 @@ STATIC int /* error */ ...@@ -2188,6 +2189,7 @@ STATIC int /* error */
xfs_bmap_add_extent_unwritten_real( xfs_bmap_add_extent_unwritten_real(
struct xfs_trans *tp, struct xfs_trans *tp,
xfs_inode_t *ip, /* incore inode pointer */ xfs_inode_t *ip, /* incore inode pointer */
int whichfork,
xfs_extnum_t *idx, /* extent number to update/insert */ xfs_extnum_t *idx, /* extent number to update/insert */
xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
xfs_bmbt_irec_t *new, /* new data to add to file extents */ xfs_bmbt_irec_t *new, /* new data to add to file extents */
...@@ -2207,12 +2209,14 @@ xfs_bmap_add_extent_unwritten_real( ...@@ -2207,12 +2209,14 @@ xfs_bmap_add_extent_unwritten_real(
/* left is 0, right is 1, prev is 2 */ /* left is 0, right is 1, prev is 2 */
int rval=0; /* return value (logging flags) */ int rval=0; /* return value (logging flags) */
int state = 0;/* state bits, accessed thru macros */ int state = 0;/* state bits, accessed thru macros */
struct xfs_mount *mp = tp->t_mountp; struct xfs_mount *mp = ip->i_mount;
*logflagsp = 0; *logflagsp = 0;
cur = *curp; cur = *curp;
ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); ifp = XFS_IFORK_PTR(ip, whichfork);
if (whichfork == XFS_COW_FORK)
state |= BMAP_COWFORK;
ASSERT(*idx >= 0); ASSERT(*idx >= 0);
ASSERT(*idx <= xfs_iext_count(ifp)); ASSERT(*idx <= xfs_iext_count(ifp));
...@@ -2271,7 +2275,7 @@ xfs_bmap_add_extent_unwritten_real( ...@@ -2271,7 +2275,7 @@ xfs_bmap_add_extent_unwritten_real(
* Don't set contiguous if the combined extent would be too large. * Don't set contiguous if the combined extent would be too large.
* Also check for all-three-contiguous being too large. * Also check for all-three-contiguous being too large.
*/ */
if (*idx < xfs_iext_count(&ip->i_df) - 1) { if (*idx < xfs_iext_count(ifp) - 1) {
state |= BMAP_RIGHT_VALID; state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
if (isnullstartblock(RIGHT.br_startblock)) if (isnullstartblock(RIGHT.br_startblock))
...@@ -2311,7 +2315,8 @@ xfs_bmap_add_extent_unwritten_real( ...@@ -2311,7 +2315,8 @@ xfs_bmap_add_extent_unwritten_real(
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
xfs_iext_remove(ip, *idx + 1, 2, state); xfs_iext_remove(ip, *idx + 1, 2, state);
ip->i_d.di_nextents -= 2; XFS_IFORK_NEXT_SET(ip, whichfork,
XFS_IFORK_NEXTENTS(ip, whichfork) - 2);
if (cur == NULL) if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else { else {
...@@ -2354,7 +2359,8 @@ xfs_bmap_add_extent_unwritten_real( ...@@ -2354,7 +2359,8 @@ xfs_bmap_add_extent_unwritten_real(
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
xfs_iext_remove(ip, *idx + 1, 1, state); xfs_iext_remove(ip, *idx + 1, 1, state);
ip->i_d.di_nextents--; XFS_IFORK_NEXT_SET(ip, whichfork,
XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
if (cur == NULL) if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else { else {
...@@ -2389,7 +2395,8 @@ xfs_bmap_add_extent_unwritten_real( ...@@ -2389,7 +2395,8 @@ xfs_bmap_add_extent_unwritten_real(
xfs_bmbt_set_state(ep, newext); xfs_bmbt_set_state(ep, newext);
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
xfs_iext_remove(ip, *idx + 1, 1, state); xfs_iext_remove(ip, *idx + 1, 1, state);
ip->i_d.di_nextents--; XFS_IFORK_NEXT_SET(ip, whichfork,
XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
if (cur == NULL) if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else { else {
...@@ -2501,7 +2508,8 @@ xfs_bmap_add_extent_unwritten_real( ...@@ -2501,7 +2508,8 @@ xfs_bmap_add_extent_unwritten_real(
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
xfs_iext_insert(ip, *idx, 1, new, state); xfs_iext_insert(ip, *idx, 1, new, state);
ip->i_d.di_nextents++; XFS_IFORK_NEXT_SET(ip, whichfork,
XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
if (cur == NULL) if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else { else {
...@@ -2579,7 +2587,8 @@ xfs_bmap_add_extent_unwritten_real( ...@@ -2579,7 +2587,8 @@ xfs_bmap_add_extent_unwritten_real(
++*idx; ++*idx;
xfs_iext_insert(ip, *idx, 1, new, state); xfs_iext_insert(ip, *idx, 1, new, state);
ip->i_d.di_nextents++; XFS_IFORK_NEXT_SET(ip, whichfork,
XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
if (cur == NULL) if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else { else {
...@@ -2627,7 +2636,8 @@ xfs_bmap_add_extent_unwritten_real( ...@@ -2627,7 +2636,8 @@ xfs_bmap_add_extent_unwritten_real(
++*idx; ++*idx;
xfs_iext_insert(ip, *idx, 2, &r[0], state); xfs_iext_insert(ip, *idx, 2, &r[0], state);
ip->i_d.di_nextents += 2; XFS_IFORK_NEXT_SET(ip, whichfork,
XFS_IFORK_NEXTENTS(ip, whichfork) + 2);
if (cur == NULL) if (cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else { else {
...@@ -2681,17 +2691,17 @@ xfs_bmap_add_extent_unwritten_real( ...@@ -2681,17 +2691,17 @@ xfs_bmap_add_extent_unwritten_real(
} }
/* update reverse mappings */ /* update reverse mappings */
error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new); error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new);
if (error) if (error)
goto done; goto done;
/* convert to a btree if necessary */ /* convert to a btree if necessary */
if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { if (xfs_bmap_needs_btree(ip, whichfork)) {
int tmp_logflags; /* partial log flag return val */ int tmp_logflags; /* partial log flag return val */
ASSERT(cur == NULL); ASSERT(cur == NULL);
error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur, error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur,
0, &tmp_logflags, XFS_DATA_FORK); 0, &tmp_logflags, whichfork);
*logflagsp |= tmp_logflags; *logflagsp |= tmp_logflags;
if (error) if (error)
goto done; goto done;
...@@ -2703,7 +2713,7 @@ xfs_bmap_add_extent_unwritten_real( ...@@ -2703,7 +2713,7 @@ xfs_bmap_add_extent_unwritten_real(
*curp = cur; *curp = cur;
} }
xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK); xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
done: done:
*logflagsp |= rval; *logflagsp |= rval;
return error; return error;
...@@ -4354,10 +4364,16 @@ xfs_bmapi_allocate( ...@@ -4354,10 +4364,16 @@ xfs_bmapi_allocate(
bma->got.br_state = XFS_EXT_NORM; bma->got.br_state = XFS_EXT_NORM;
/* /*
* A wasdelay extent has been initialized, so shouldn't be flagged * In the data fork, a wasdelay extent has been initialized, so
* as unwritten. * shouldn't be flagged as unwritten.
*
* For the cow fork, however, we convert delalloc reservations
* (extents allocated for speculative preallocation) to
* allocated unwritten extents, and only convert the unwritten
* extents to real extents when we're about to write the data.
*/ */
if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) && if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) &&
(bma->flags & XFS_BMAPI_PREALLOC) &&
xfs_sb_version_hasextflgbit(&mp->m_sb)) xfs_sb_version_hasextflgbit(&mp->m_sb))
bma->got.br_state = XFS_EXT_UNWRITTEN; bma->got.br_state = XFS_EXT_UNWRITTEN;
...@@ -4408,8 +4424,6 @@ xfs_bmapi_convert_unwritten( ...@@ -4408,8 +4424,6 @@ xfs_bmapi_convert_unwritten(
(XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
return 0; return 0;
ASSERT(whichfork != XFS_COW_FORK);
/* /*
* Modify (by adding) the state flag, if writing. * Modify (by adding) the state flag, if writing.
*/ */
...@@ -4434,8 +4448,8 @@ xfs_bmapi_convert_unwritten( ...@@ -4434,8 +4448,8 @@ xfs_bmapi_convert_unwritten(
return error; return error;
} }
error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
&bma->cur, mval, bma->firstblock, bma->dfops, &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops,
&tmp_logflags); &tmp_logflags);
/* /*
* Log the inode core unconditionally in the unwritten extent conversion * Log the inode core unconditionally in the unwritten extent conversion
...@@ -4444,7 +4458,11 @@ xfs_bmapi_convert_unwritten( ...@@ -4444,7 +4458,11 @@ xfs_bmapi_convert_unwritten(
* in the transaction for the sake of fsync(), even if nothing has * in the transaction for the sake of fsync(), even if nothing has
* changed, because fsync() will not force the log for this transaction * changed, because fsync() will not force the log for this transaction
* unless it sees the inode pinned. * unless it sees the inode pinned.
*
* Note: If we're only converting cow fork extents, there aren't
* any on-disk updates to make, so we don't need to log anything.
*/ */
if (whichfork != XFS_COW_FORK)
bma->logflags |= tmp_logflags | XFS_ILOG_CORE; bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
if (error) if (error)
return error; return error;
...@@ -4519,15 +4537,15 @@ xfs_bmapi_write( ...@@ -4519,15 +4537,15 @@ xfs_bmapi_write(
ASSERT(*nmap >= 1); ASSERT(*nmap >= 1);
ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
ASSERT(!(flags & XFS_BMAPI_IGSTATE)); ASSERT(!(flags & XFS_BMAPI_IGSTATE));
ASSERT(tp != NULL); ASSERT(tp != NULL ||
(flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) ==
(XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK));
ASSERT(len > 0); ASSERT(len > 0);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK); ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK);
ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP)); ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP));
ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP)); ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP));
ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK);
ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK);
/* zeroing is for currently only for data extents, not metadata */ /* zeroing is for currently only for data extents, not metadata */
ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
...@@ -5542,8 +5560,8 @@ __xfs_bunmapi( ...@@ -5542,8 +5560,8 @@ __xfs_bunmapi(
} }
del.br_state = XFS_EXT_UNWRITTEN; del.br_state = XFS_EXT_UNWRITTEN;
error = xfs_bmap_add_extent_unwritten_real(tp, ip, error = xfs_bmap_add_extent_unwritten_real(tp, ip,
&lastx, &cur, &del, firstblock, dfops, whichfork, &lastx, &cur, &del,
&logflags); firstblock, dfops, &logflags);
if (error) if (error)
goto error0; goto error0;
goto nodelete; goto nodelete;
...@@ -5596,8 +5614,9 @@ __xfs_bunmapi( ...@@ -5596,8 +5614,9 @@ __xfs_bunmapi(
prev.br_state = XFS_EXT_UNWRITTEN; prev.br_state = XFS_EXT_UNWRITTEN;
lastx--; lastx--;
error = xfs_bmap_add_extent_unwritten_real(tp, error = xfs_bmap_add_extent_unwritten_real(tp,
ip, &lastx, &cur, &prev, ip, whichfork, &lastx, &cur,
firstblock, dfops, &logflags); &prev, firstblock, dfops,
&logflags);
if (error) if (error)
goto error0; goto error0;
goto nodelete; goto nodelete;
...@@ -5605,8 +5624,9 @@ __xfs_bunmapi( ...@@ -5605,8 +5624,9 @@ __xfs_bunmapi(
ASSERT(del.br_state == XFS_EXT_NORM); ASSERT(del.br_state == XFS_EXT_NORM);
del.br_state = XFS_EXT_UNWRITTEN; del.br_state = XFS_EXT_UNWRITTEN;
error = xfs_bmap_add_extent_unwritten_real(tp, error = xfs_bmap_add_extent_unwritten_real(tp,
ip, &lastx, &cur, &del, ip, whichfork, &lastx, &cur,
firstblock, dfops, &logflags); &del, firstblock, dfops,
&logflags);
if (error) if (error)
goto error0; goto error0;
goto nodelete; goto nodelete;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment