Commit e890038e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-fixes-for-linus-4.9-rc3' of...

Merge tag 'xfs-fixes-for-linus-4.9-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

Pull xfs fixes from Dave Chinner:
 "This update contains fixes for most of the outstanding regressions
  introduced with the 4.9-rc1 XFS merge. There is also a fix for an
  iomap bug, too.

  This is a quite a bit larger than I'd prefer for a -rc3, but most of
  the change comes from cleaning up the new reflink copy on write code;
  it's much simpler and easier to understand now. These changes fixed
  several bugs in the new code, and it wasn't clear that there was an
  easier/simpler way to fix them. The rest of the fixes are the usual
  size you'd expect at this stage.

  I've left the commits to soak in linux-next for a some extra time
  because of the size before asking you to pull, no new problems with
  them have been reported so I think it's all OK.

  Summary:
   - iomap page offset masking fix for page faults
   - add IOMAP_REPORT to distinguish between read and fiemap map
     requests
   - cleanups to new shared data extent code
   - fix mount active status on failed log recovery
   - fix broken dquots in a buffer calculation
   - fix locking order issues and merge xfs_reflink_remap_range and
     xfs_file_share_range
   - rework unmapping of CoW extents and remove now unused functions
   - clean state when CoW is done"

* tag 'xfs-fixes-for-linus-4.9-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (25 commits)
  xfs: clear cowblocks tag when cow fork is emptied
  xfs: fix up inode cowblocks tracking tracepoints
  fs: Do to trim high file position bits in iomap_page_mkwrite_actor
  xfs: remove xfs_bunmapi_cow
  xfs: optimize xfs_reflink_end_cow
  xfs: optimize xfs_reflink_cancel_cow_blocks
  xfs: refactor xfs_bunmapi_cow
  xfs: optimize writes to reflink files
  xfs: don't bother looking at the refcount tree for reads
  xfs: handle "raw" delayed extents xfs_reflink_trim_around_shared
  xfs: add xfs_trim_extent
  iomap: add IOMAP_REPORT
  xfs: merge xfs_reflink_remap_range and xfs_file_share_range
  xfs: remove xfs_file_wait_for_io
  xfs: move inode locking from xfs_reflink_remap_range to xfs_file_share_range
  xfs: fix the same_inode check in xfs_file_share_range
  xfs: remove the same fs check from xfs_file_share_range
  libxfs: v3 inodes are only valid on crc-enabled filesystems
  libxfs: clean up _calc_dquots_per_chunk
  xfs: unset MS_ACTIVE if mount fails
  ...
parents 18c2152d c17a8ef4
......@@ -433,8 +433,7 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
struct page *page = data;
int ret;
ret = __block_write_begin_int(page, pos & ~PAGE_MASK, length,
NULL, iomap);
ret = __block_write_begin_int(page, pos, length, NULL, iomap);
if (ret)
return ret;
......@@ -561,7 +560,7 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi,
}
while (len > 0) {
ret = iomap_apply(inode, start, len, 0, ops, &ctx,
ret = iomap_apply(inode, start, len, IOMAP_REPORT, ops, &ctx,
iomap_fiemap_actor);
/* inode with no (attribute) mapping will give ENOENT */
if (ret == -ENOENT)
......
......@@ -3974,9 +3974,6 @@ xfs_bmap_remap_alloc(
* allocating, so skip that check by pretending to be freeing.
*/
error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
if (error)
goto error0;
error0:
xfs_perag_put(args.pag);
if (error)
trace_xfs_bmap_remap_alloc_error(ap->ip, error, _RET_IP_);
......@@ -3999,6 +3996,39 @@ xfs_bmap_alloc(
return xfs_bmap_btalloc(ap);
}
/* Trim extent to fit a logical block range. */
void
xfs_trim_extent(
struct xfs_bmbt_irec *irec,
xfs_fileoff_t bno,
xfs_filblks_t len)
{
xfs_fileoff_t distance;
xfs_fileoff_t end = bno + len;
if (irec->br_startoff + irec->br_blockcount <= bno ||
irec->br_startoff >= end) {
irec->br_blockcount = 0;
return;
}
if (irec->br_startoff < bno) {
distance = bno - irec->br_startoff;
if (isnullstartblock(irec->br_startblock))
irec->br_startblock = DELAYSTARTBLOCK;
if (irec->br_startblock != DELAYSTARTBLOCK &&
irec->br_startblock != HOLESTARTBLOCK)
irec->br_startblock += distance;
irec->br_startoff += distance;
irec->br_blockcount -= distance;
}
if (end < irec->br_startoff + irec->br_blockcount) {
distance = irec->br_startoff + irec->br_blockcount - end;
irec->br_blockcount -= distance;
}
}
/*
* Trim the returned map to the required bounds
*/
......@@ -4829,6 +4859,219 @@ xfs_bmap_split_indlen(
return stolen;
}
int
xfs_bmap_del_extent_delay(
struct xfs_inode *ip,
int whichfork,
xfs_extnum_t *idx,
struct xfs_bmbt_irec *got,
struct xfs_bmbt_irec *del)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_bmbt_irec new;
int64_t da_old, da_new, da_diff = 0;
xfs_fileoff_t del_endoff, got_endoff;
xfs_filblks_t got_indlen, new_indlen, stolen;
int error = 0, state = 0;
bool isrt;
XFS_STATS_INC(mp, xs_del_exlist);
isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
del_endoff = del->br_startoff + del->br_blockcount;
got_endoff = got->br_startoff + got->br_blockcount;
da_old = startblockval(got->br_startblock);
da_new = 0;
ASSERT(*idx >= 0);
ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
ASSERT(del->br_blockcount > 0);
ASSERT(got->br_startoff <= del->br_startoff);
ASSERT(got_endoff >= del_endoff);
if (isrt) {
int64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
do_div(rtexts, mp->m_sb.sb_rextsize);
xfs_mod_frextents(mp, rtexts);
}
/*
* Update the inode delalloc counter now and wait to update the
* sb counters as we might have to borrow some blocks for the
* indirect block accounting.
*/
xfs_trans_reserve_quota_nblks(NULL, ip, -((long)del->br_blockcount), 0,
isrt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
ip->i_delayed_blks -= del->br_blockcount;
if (whichfork == XFS_COW_FORK)
state |= BMAP_COWFORK;
if (got->br_startoff == del->br_startoff)
state |= BMAP_LEFT_CONTIG;
if (got_endoff == del_endoff)
state |= BMAP_RIGHT_CONTIG;
switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
/*
* Matches the whole extent. Delete the entry.
*/
xfs_iext_remove(ip, *idx, 1, state);
--*idx;
break;
case BMAP_LEFT_CONTIG:
/*
* Deleting the first part of the extent.
*/
trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
got->br_startoff = del_endoff;
got->br_blockcount -= del->br_blockcount;
da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
got->br_blockcount), da_old);
got->br_startblock = nullstartblock((int)da_new);
xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
break;
case BMAP_RIGHT_CONTIG:
/*
* Deleting the last part of the extent.
*/
trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
got->br_blockcount = got->br_blockcount - del->br_blockcount;
da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip,
got->br_blockcount), da_old);
got->br_startblock = nullstartblock((int)da_new);
xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
break;
case 0:
/*
* Deleting the middle of the extent.
*
* Distribute the original indlen reservation across the two new
* extents. Steal blocks from the deleted extent if necessary.
* Stealing blocks simply fudges the fdblocks accounting below.
* Warn if either of the new indlen reservations is zero as this
* can lead to delalloc problems.
*/
trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
got->br_blockcount = del->br_startoff - got->br_startoff;
got_indlen = xfs_bmap_worst_indlen(ip, got->br_blockcount);
new.br_blockcount = got_endoff - del_endoff;
new_indlen = xfs_bmap_worst_indlen(ip, new.br_blockcount);
WARN_ON_ONCE(!got_indlen || !new_indlen);
stolen = xfs_bmap_split_indlen(da_old, &got_indlen, &new_indlen,
del->br_blockcount);
got->br_startblock = nullstartblock((int)got_indlen);
xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
trace_xfs_bmap_post_update(ip, *idx, 0, _THIS_IP_);
new.br_startoff = del_endoff;
new.br_state = got->br_state;
new.br_startblock = nullstartblock((int)new_indlen);
++*idx;
xfs_iext_insert(ip, *idx, 1, &new, state);
da_new = got_indlen + new_indlen - stolen;
del->br_blockcount -= stolen;
break;
}
ASSERT(da_old >= da_new);
da_diff = da_old - da_new;
if (!isrt)
da_diff += del->br_blockcount;
if (da_diff)
xfs_mod_fdblocks(mp, da_diff, false);
return error;
}
void
xfs_bmap_del_extent_cow(
struct xfs_inode *ip,
xfs_extnum_t *idx,
struct xfs_bmbt_irec *got,
struct xfs_bmbt_irec *del)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
struct xfs_bmbt_irec new;
xfs_fileoff_t del_endoff, got_endoff;
int state = BMAP_COWFORK;
XFS_STATS_INC(mp, xs_del_exlist);
del_endoff = del->br_startoff + del->br_blockcount;
got_endoff = got->br_startoff + got->br_blockcount;
ASSERT(*idx >= 0);
ASSERT(*idx < ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
ASSERT(del->br_blockcount > 0);
ASSERT(got->br_startoff <= del->br_startoff);
ASSERT(got_endoff >= del_endoff);
ASSERT(!isnullstartblock(got->br_startblock));
if (got->br_startoff == del->br_startoff)
state |= BMAP_LEFT_CONTIG;
if (got_endoff == del_endoff)
state |= BMAP_RIGHT_CONTIG;
switch (state & (BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG)) {
case BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
/*
* Matches the whole extent. Delete the entry.
*/
xfs_iext_remove(ip, *idx, 1, state);
--*idx;
break;
case BMAP_LEFT_CONTIG:
/*
* Deleting the first part of the extent.
*/
trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
got->br_startoff = del_endoff;
got->br_blockcount -= del->br_blockcount;
got->br_startblock = del->br_startblock + del->br_blockcount;
xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
break;
case BMAP_RIGHT_CONTIG:
/*
* Deleting the last part of the extent.
*/
trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
got->br_blockcount -= del->br_blockcount;
xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
break;
case 0:
/*
* Deleting the middle of the extent.
*/
trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
got->br_blockcount = del->br_startoff - got->br_startoff;
xfs_bmbt_set_all(xfs_iext_get_ext(ifp, *idx), got);
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
new.br_startoff = del_endoff;
new.br_blockcount = got_endoff - del_endoff;
new.br_state = got->br_state;
new.br_startblock = del->br_startblock + del->br_blockcount;
++*idx;
xfs_iext_insert(ip, *idx, 1, &new, state);
break;
}
}
/*
* Called by xfs_bmapi to update file extent records and the btree
* after removing space (or undoing a delayed allocation).
......@@ -5171,175 +5414,6 @@ xfs_bmap_del_extent(
return error;
}
/* Remove an extent from the CoW fork. Similar to xfs_bmap_del_extent. */
int
xfs_bunmapi_cow(
struct xfs_inode *ip,
struct xfs_bmbt_irec *del)
{
xfs_filblks_t da_new;
xfs_filblks_t da_old;
xfs_fsblock_t del_endblock = 0;
xfs_fileoff_t del_endoff;
int delay;
struct xfs_bmbt_rec_host *ep;
int error;
struct xfs_bmbt_irec got;
xfs_fileoff_t got_endoff;
struct xfs_ifork *ifp;
struct xfs_mount *mp;
xfs_filblks_t nblks;
struct xfs_bmbt_irec new;
/* REFERENCED */
uint qfield;
xfs_filblks_t temp;
xfs_filblks_t temp2;
int state = BMAP_COWFORK;
int eof;
xfs_extnum_t eidx;
mp = ip->i_mount;
XFS_STATS_INC(mp, xs_del_exlist);
ep = xfs_bmap_search_extents(ip, del->br_startoff, XFS_COW_FORK, &eof,
&eidx, &got, &new);
ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); ifp = ifp;
ASSERT((eidx >= 0) && (eidx < ifp->if_bytes /
(uint)sizeof(xfs_bmbt_rec_t)));
ASSERT(del->br_blockcount > 0);
ASSERT(got.br_startoff <= del->br_startoff);
del_endoff = del->br_startoff + del->br_blockcount;
got_endoff = got.br_startoff + got.br_blockcount;
ASSERT(got_endoff >= del_endoff);
delay = isnullstartblock(got.br_startblock);
ASSERT(isnullstartblock(del->br_startblock) == delay);
qfield = 0;
error = 0;
/*
* If deleting a real allocation, must free up the disk space.
*/
if (!delay) {
nblks = del->br_blockcount;
qfield = XFS_TRANS_DQ_BCOUNT;
/*
* Set up del_endblock and cur for later.
*/
del_endblock = del->br_startblock + del->br_blockcount;
da_old = da_new = 0;
} else {
da_old = startblockval(got.br_startblock);
da_new = 0;
nblks = 0;
}
qfield = qfield;
nblks = nblks;
/*
* Set flag value to use in switch statement.
* Left-contig is 2, right-contig is 1.
*/
switch (((got.br_startoff == del->br_startoff) << 1) |
(got_endoff == del_endoff)) {
case 3:
/*
* Matches the whole extent. Delete the entry.
*/
xfs_iext_remove(ip, eidx, 1, BMAP_COWFORK);
--eidx;
break;
case 2:
/*
* Deleting the first part of the extent.
*/
trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
xfs_bmbt_set_startoff(ep, del_endoff);
temp = got.br_blockcount - del->br_blockcount;
xfs_bmbt_set_blockcount(ep, temp);
if (delay) {
temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
da_old);
xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
da_new = temp;
break;
}
xfs_bmbt_set_startblock(ep, del_endblock);
trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
break;
case 1:
/*
* Deleting the last part of the extent.
*/
temp = got.br_blockcount - del->br_blockcount;
trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
xfs_bmbt_set_blockcount(ep, temp);
if (delay) {
temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
da_old);
xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
da_new = temp;
break;
}
trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
break;
case 0:
/*
* Deleting the middle of the extent.
*/
temp = del->br_startoff - got.br_startoff;
trace_xfs_bmap_pre_update(ip, eidx, state, _THIS_IP_);
xfs_bmbt_set_blockcount(ep, temp);
new.br_startoff = del_endoff;
temp2 = got_endoff - del_endoff;
new.br_blockcount = temp2;
new.br_state = got.br_state;
if (!delay) {
new.br_startblock = del_endblock;
} else {
temp = xfs_bmap_worst_indlen(ip, temp);
xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
temp2 = xfs_bmap_worst_indlen(ip, temp2);
new.br_startblock = nullstartblock((int)temp2);
da_new = temp + temp2;
while (da_new > da_old) {
if (temp) {
temp--;
da_new--;
xfs_bmbt_set_startblock(ep,
nullstartblock((int)temp));
}
if (da_new == da_old)
break;
if (temp2) {
temp2--;
da_new--;
new.br_startblock =
nullstartblock((int)temp2);
}
}
}
trace_xfs_bmap_post_update(ip, eidx, state, _THIS_IP_);
xfs_iext_insert(ip, eidx + 1, 1, &new, state);
++eidx;
break;
}
/*
* Account for change in delayed indirect blocks.
* Nothing to do for disk quota accounting here.
*/
ASSERT(da_old >= da_new);
if (da_old > da_new)
xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
return error;
}
/*
* Unmap (remove) blocks from a file.
* If nexts is nonzero then the number of extents to remove is limited to
......
......@@ -190,6 +190,8 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
#endif
void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
xfs_filblks_t len);
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
......@@ -221,7 +223,11 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, int flags,
xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
struct xfs_defer_ops *dfops, int *done);
int xfs_bunmapi_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *del);
int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork,
xfs_extnum_t *idx, struct xfs_bmbt_irec *got,
struct xfs_bmbt_irec *del);
void xfs_bmap_del_extent_cow(struct xfs_inode *ip, xfs_extnum_t *idx,
struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del);
int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
xfs_extnum_t num);
uint xfs_default_attroffset(struct xfs_inode *ip);
......
......@@ -4826,7 +4826,7 @@ xfs_btree_calc_size(
return rval;
}
int
static int
xfs_btree_count_blocks_helper(
struct xfs_btree_cur *cur,
int level,
......
......@@ -191,8 +191,7 @@ xfs_dquot_buf_verify_crc(
if (mp->m_quotainfo)
ndquots = mp->m_quotainfo->qi_dqperchunk;
else
ndquots = xfs_calc_dquots_per_chunk(
XFS_BB_TO_FSB(mp, bp->b_length));
ndquots = xfs_calc_dquots_per_chunk(bp->b_length);
for (i = 0; i < ndquots; i++, d++) {
if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
......
......@@ -865,7 +865,6 @@ typedef struct xfs_timestamp {
* padding field for v3 inodes.
*/
#define XFS_DINODE_MAGIC 0x494e /* 'IN' */
#define XFS_DINODE_GOOD_VERSION(v) ((v) >= 1 && (v) <= 3)
typedef struct xfs_dinode {
__be16 di_magic; /* inode magic # = XFS_DINODE_MAGIC */
__be16 di_mode; /* mode and type of file */
......
......@@ -57,6 +57,17 @@ xfs_inobp_check(
}
#endif
bool
xfs_dinode_good_version(
struct xfs_mount *mp,
__u8 version)
{
if (xfs_sb_version_hascrc(&mp->m_sb))
return version == 3;
return version == 1 || version == 2;
}
/*
* If we are doing readahead on an inode buffer, we might be in log recovery
* reading an inode allocation buffer that hasn't yet been replayed, and hence
......@@ -91,7 +102,7 @@ xfs_inode_buf_verify(
dip = xfs_buf_offset(bp, (i << mp->m_sb.sb_inodelog));
di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
XFS_DINODE_GOOD_VERSION(dip->di_version);
xfs_dinode_good_version(mp, dip->di_version);
if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
XFS_ERRTAG_ITOBP_INOTOBP,
XFS_RANDOM_ITOBP_INOTOBP))) {
......
......@@ -74,6 +74,8 @@ void xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
void xfs_log_dinode_to_disk(struct xfs_log_dinode *from,
struct xfs_dinode *to);
bool xfs_dinode_good_version(struct xfs_mount *mp, __u8 version);
#if defined(DEBUG)
void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
#else
......
......@@ -249,6 +249,7 @@ xfs_file_dio_aio_read(
struct xfs_inode *ip = XFS_I(inode);
loff_t isize = i_size_read(inode);
size_t count = iov_iter_count(to);
loff_t end = iocb->ki_pos + count - 1;
struct iov_iter data;
struct xfs_buftarg *target;
ssize_t ret = 0;
......@@ -272,50 +273,22 @@ xfs_file_dio_aio_read(
file_accessed(iocb->ki_filp);
/*
* Locking is a bit tricky here. If we take an exclusive lock for direct
* IO, we effectively serialise all new concurrent read IO to this file
* and block it behind IO that is currently in progress because IO in
* progress holds the IO lock shared. We only need to hold the lock
* exclusive to blow away the page cache, so only take lock exclusively
* if the page cache needs invalidation. This allows the normal direct
* IO case of no page cache pages to proceeed concurrently without
* serialisation.
*/
xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
if (mapping->nrpages) {
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
/*
* The generic dio code only flushes the range of the particular
* I/O. Because we take an exclusive lock here, this whole
* sequence is considerably more expensive for us. This has a
* noticeable performance impact for any file with cached pages,
* even when outside of the range of the particular I/O.
*
* Hence, amortize the cost of the lock against a full file
* flush and reduce the chances of repeated iolock cycles going
* forward.
*/
if (mapping->nrpages) {
ret = filemap_write_and_wait(mapping);
if (ret) {
xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
if (ret)
goto out_unlock;
/*
* Invalidate whole pages. This can return an error if
* we fail to invalidate a page, but this should never
* happen on XFS. Warn if it does fail.
* Invalidate whole pages. This can return an error if we fail
* to invalidate a page, but this should never happen on XFS.
* Warn if it does fail.
*/
ret = invalidate_inode_pages2(mapping);
ret = invalidate_inode_pages2_range(mapping,
iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
WARN_ON_ONCE(ret);
ret = 0;
}
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
}
data = *to;
ret = __blockdev_direct_IO(iocb, inode, target->bt_bdev, &data,
......@@ -324,8 +297,9 @@ xfs_file_dio_aio_read(
iocb->ki_pos += ret;
iov_iter_advance(to, ret);
}
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
out_unlock:
xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
}
......@@ -570,34 +544,22 @@ xfs_file_dio_aio_write(
if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
return -EINVAL;
/* "unaligned" here means not aligned to a filesystem block */
if ((iocb->ki_pos & mp->m_blockmask) ||
((iocb->ki_pos + count) & mp->m_blockmask))
unaligned_io = 1;
/*
* We don't need to take an exclusive lock unless there page cache needs
* to be invalidated or unaligned IO is being executed. We don't need to
* consider the EOF extension case here because
* xfs_file_aio_write_checks() will relock the inode as necessary for
* EOF zeroing cases and fill out the new inode size as appropriate.
* Don't take the exclusive iolock here unless the I/O is unaligned to
* the file system block size. We don't need to consider the EOF
* extension case here because xfs_file_aio_write_checks() will relock
* the inode as necessary for EOF zeroing cases and fill out the new
* inode size as appropriate.
*/
if (unaligned_io || mapping->nrpages)
if ((iocb->ki_pos & mp->m_blockmask) ||
((iocb->ki_pos + count) & mp->m_blockmask)) {
unaligned_io = 1;
iolock = XFS_IOLOCK_EXCL;
else
} else {
iolock = XFS_IOLOCK_SHARED;
xfs_rw_ilock(ip, iolock);
}
/*
* Recheck if there are cached pages that need invalidate after we got
* the iolock to protect against other threads adding new pages while
* we were waiting for the iolock.
*/
if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) {
xfs_rw_iunlock(ip, iolock);
iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, iolock);
}
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
......@@ -605,26 +567,26 @@ xfs_file_dio_aio_write(
count = iov_iter_count(from);
end = iocb->ki_pos + count - 1;
/*
* See xfs_file_dio_aio_read() for why we do a full-file flush here.
*/
if (mapping->nrpages) {
ret = filemap_write_and_wait(VFS_I(ip)->i_mapping);
ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end);
if (ret)
goto out;
/*
* Invalidate whole pages. This can return an error if we fail
* to invalidate a page, but this should never happen on XFS.
* Warn if it does fail.
*/
ret = invalidate_inode_pages2(VFS_I(ip)->i_mapping);
ret = invalidate_inode_pages2_range(mapping,
iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
WARN_ON_ONCE(ret);
ret = 0;
}
/*
* If we are doing unaligned IO, wait for all other IO to drain,
* otherwise demote the lock if we had to flush cached pages
* otherwise demote the lock if we had to take the exclusive lock
* for other reasons in xfs_file_aio_write_checks.
*/
if (unaligned_io)
inode_dio_wait(inode);
......@@ -947,134 +909,6 @@ xfs_file_fallocate(
return error;
}
/*
* Flush all file writes out to disk.
*/
static int
xfs_file_wait_for_io(
struct inode *inode,
loff_t offset,
size_t len)
{
loff_t rounding;
loff_t ioffset;
loff_t iendoffset;
loff_t bs;
int ret;
bs = inode->i_sb->s_blocksize;
inode_dio_wait(inode);
rounding = max_t(xfs_off_t, bs, PAGE_SIZE);
ioffset = round_down(offset, rounding);
iendoffset = round_up(offset + len, rounding) - 1;
ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
iendoffset);
return ret;
}
/* Hook up to the VFS reflink function */
STATIC int
xfs_file_share_range(
struct file *file_in,
loff_t pos_in,
struct file *file_out,
loff_t pos_out,
u64 len,
bool is_dedupe)
{
struct inode *inode_in;
struct inode *inode_out;
ssize_t ret;
loff_t bs;
loff_t isize;
int same_inode;
loff_t blen;
unsigned int flags = 0;
inode_in = file_inode(file_in);
inode_out = file_inode(file_out);
bs = inode_out->i_sb->s_blocksize;
/* Don't touch certain kinds of inodes */
if (IS_IMMUTABLE(inode_out))
return -EPERM;
if (IS_SWAPFILE(inode_in) ||
IS_SWAPFILE(inode_out))
return -ETXTBSY;
/* Reflink only works within this filesystem. */
if (inode_in->i_sb != inode_out->i_sb)
return -EXDEV;
same_inode = (inode_in->i_ino == inode_out->i_ino);
/* Don't reflink dirs, pipes, sockets... */
if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
return -EISDIR;
if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
return -EINVAL;
if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
return -EINVAL;
/* Don't share DAX file data for now. */
if (IS_DAX(inode_in) || IS_DAX(inode_out))
return -EINVAL;
/* Are we going all the way to the end? */
isize = i_size_read(inode_in);
if (isize == 0)
return 0;
if (len == 0)
len = isize - pos_in;
/* Ensure offsets don't wrap and the input is inside i_size */
if (pos_in + len < pos_in || pos_out + len < pos_out ||
pos_in + len > isize)
return -EINVAL;
/* Don't allow dedupe past EOF in the dest file */
if (is_dedupe) {
loff_t disize;
disize = i_size_read(inode_out);
if (pos_out >= disize || pos_out + len > disize)
return -EINVAL;
}
/* If we're linking to EOF, continue to the block boundary. */
if (pos_in + len == isize)
blen = ALIGN(isize, bs) - pos_in;
else
blen = len;
/* Only reflink if we're aligned to block boundaries */
if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
!IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
return -EINVAL;
/* Don't allow overlapped reflink within the same file */
if (same_inode && pos_out + blen > pos_in && pos_out < pos_in + blen)
return -EINVAL;
/* Wait for the completion of any pending IOs on srcfile */
ret = xfs_file_wait_for_io(inode_in, pos_in, len);
if (ret)
goto out;
ret = xfs_file_wait_for_io(inode_out, pos_out, len);
if (ret)
goto out;
if (is_dedupe)
flags |= XFS_REFLINK_DEDUPE;
ret = xfs_reflink_remap_range(XFS_I(inode_in), pos_in, XFS_I(inode_out),
pos_out, len, flags);
if (ret < 0)
goto out;
out:
return ret;
}
STATIC ssize_t
xfs_file_copy_range(
struct file *file_in,
......@@ -1086,7 +920,7 @@ xfs_file_copy_range(
{
int error;
error = xfs_file_share_range(file_in, pos_in, file_out, pos_out,
error = xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
len, false);
if (error)
return error;
......@@ -1101,7 +935,7 @@ xfs_file_clone_range(
loff_t pos_out,
u64 len)
{
return xfs_file_share_range(file_in, pos_in, file_out, pos_out,
return xfs_reflink_remap_range(file_in, pos_in, file_out, pos_out,
len, false);
}
......@@ -1124,7 +958,7 @@ xfs_file_dedupe_range(
if (len > XFS_MAX_DEDUPE_LEN)
len = XFS_MAX_DEDUPE_LEN;
error = xfs_file_share_range(src_file, loff, dst_file, dst_loff,
error = xfs_reflink_remap_range(src_file, loff, dst_file, dst_loff,
len, true);
if (error)
return error;
......
......@@ -1656,9 +1656,9 @@ void
xfs_inode_set_cowblocks_tag(
xfs_inode_t *ip)
{
trace_xfs_inode_set_eofblocks_tag(ip);
trace_xfs_inode_set_cowblocks_tag(ip);
return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks,
trace_xfs_perag_set_eofblocks,
trace_xfs_perag_set_cowblocks,
XFS_ICI_COWBLOCKS_TAG);
}
......@@ -1666,7 +1666,7 @@ void
xfs_inode_clear_cowblocks_tag(
xfs_inode_t *ip)
{
trace_xfs_inode_clear_eofblocks_tag(ip);
trace_xfs_inode_clear_cowblocks_tag(ip);
return __xfs_inode_clear_eofblocks_tag(ip,
trace_xfs_perag_clear_eofblocks, XFS_ICI_COWBLOCKS_TAG);
trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
}
......@@ -566,6 +566,17 @@ xfs_file_iomap_begin_delay(
xfs_bmap_search_extents(ip, offset_fsb, XFS_DATA_FORK, &eof, &idx,
&got, &prev);
if (!eof && got.br_startoff <= offset_fsb) {
if (xfs_is_reflink_inode(ip)) {
bool shared;
end_fsb = min(XFS_B_TO_FSB(mp, offset + count),
maxbytes_fsb);
xfs_trim_extent(&got, offset_fsb, end_fsb - offset_fsb);
error = xfs_reflink_reserve_cow(ip, &got, &shared);
if (error)
goto out_unlock;
}
trace_xfs_iomap_found(ip, offset, count, 0, &got);
goto done;
}
......@@ -961,19 +972,13 @@ xfs_file_iomap_begin(
struct xfs_mount *mp = ip->i_mount;
struct xfs_bmbt_irec imap;
xfs_fileoff_t offset_fsb, end_fsb;
bool shared, trimmed;
int nimaps = 1, error = 0;
bool shared = false, trimmed = false;
unsigned lockmode;
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
error = xfs_reflink_reserve_cow_range(ip, offset, length);
if (error < 0)
return error;
}
if ((flags & IOMAP_WRITE) && !IS_DAX(inode) &&
!xfs_get_extsz_hint(ip)) {
/* Reserve delalloc blocks for regular writeback. */
......@@ -981,7 +986,16 @@ xfs_file_iomap_begin(
iomap);
}
/*
* COW writes will allocate delalloc space, so we need to make sure
* to take the lock exclusively here.
*/
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
lockmode = XFS_ILOCK_EXCL;
xfs_ilock(ip, XFS_ILOCK_EXCL);
} else {
lockmode = xfs_ilock_data_map_shared(ip);
}
ASSERT(offset <= mp->m_super->s_maxbytes);
if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
......@@ -991,16 +1005,24 @@ xfs_file_iomap_begin(
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, 0);
if (error) {
xfs_iunlock(ip, lockmode);
return error;
}
if (error)
goto out_unlock;
if (flags & IOMAP_REPORT) {
/* Trim the mapping to the nearest shared extent boundary. */
error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed);
if (error) {
xfs_iunlock(ip, lockmode);
return error;
error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
&trimmed);
if (error)
goto out_unlock;
}
if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && xfs_is_reflink_inode(ip)) {
error = xfs_reflink_reserve_cow(ip, &imap, &shared);
if (error)
goto out_unlock;
end_fsb = imap.br_startoff + imap.br_blockcount;
length = XFS_FSB_TO_B(mp, end_fsb) - offset;
}
if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
......@@ -1039,6 +1061,9 @@ xfs_file_iomap_begin(
if (shared)
iomap->flags |= IOMAP_F_SHARED;
return 0;
out_unlock:
xfs_iunlock(ip, lockmode);
return error;
}
static int
......
......@@ -1009,6 +1009,7 @@ xfs_mountfs(
out_quota:
xfs_qm_unmount_quotas(mp);
out_rtunmount:
mp->m_super->s_flags &= ~MS_ACTIVE;
xfs_rtunmount_inodes(mp);
out_rele_rip:
IRELE(rip);
......
......@@ -182,7 +182,8 @@ xfs_reflink_trim_around_shared(
if (!xfs_is_reflink_inode(ip) ||
ISUNWRITTEN(irec) ||
irec->br_startblock == HOLESTARTBLOCK ||
irec->br_startblock == DELAYSTARTBLOCK) {
irec->br_startblock == DELAYSTARTBLOCK ||
isnullstartblock(irec->br_startblock)) {
*shared = false;
return 0;
}
......@@ -227,50 +228,54 @@ xfs_reflink_trim_around_shared(
}
}
/* Create a CoW reservation for a range of blocks within a file. */
static int
__xfs_reflink_reserve_cow(
/*
* Trim the passed in imap to the next shared/unshared extent boundary, and
* if imap->br_startoff points to a shared extent reserve space for it in the
* COW fork. In this case *shared is set to true, else to false.
*
* Note that imap will always contain the block numbers for the existing blocks
* in the data fork, as the upper layers need them for read-modify-write
* operations.
*/
int
xfs_reflink_reserve_cow(
struct xfs_inode *ip,
xfs_fileoff_t *offset_fsb,
xfs_fileoff_t end_fsb,
bool *skipped)
struct xfs_bmbt_irec *imap,
bool *shared)
{
struct xfs_bmbt_irec got, prev, imap;
xfs_fileoff_t orig_end_fsb;
int nimaps, eof = 0, error = 0;
bool shared = false, trimmed = false;
struct xfs_bmbt_irec got, prev;
xfs_fileoff_t end_fsb, orig_end_fsb;
int eof = 0, error = 0;
bool trimmed;
xfs_extnum_t idx;
xfs_extlen_t align;
/* Already reserved? Skip the refcount btree access. */
xfs_bmap_search_extents(ip, *offset_fsb, XFS_COW_FORK, &eof, &idx,
/*
* Search the COW fork extent list first. This serves two purposes:
* first this implement the speculative preallocation using cowextisze,
* so that we also unshared block adjacent to shared blocks instead
* of just the shared blocks themselves. Second the lookup in the
* extent list is generally faster than going out to the shared extent
* tree.
*/
xfs_bmap_search_extents(ip, imap->br_startoff, XFS_COW_FORK, &eof, &idx,
&got, &prev);
if (!eof && got.br_startoff <= *offset_fsb) {
end_fsb = orig_end_fsb = got.br_startoff + got.br_blockcount;
trace_xfs_reflink_cow_found(ip, &got);
goto done;
}
if (!eof && got.br_startoff <= imap->br_startoff) {
trace_xfs_reflink_cow_found(ip, imap);
xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
/* Read extent from the source file. */
nimaps = 1;
error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
&imap, &nimaps, 0);
if (error)
goto out_unlock;
ASSERT(nimaps == 1);
*shared = true;
return 0;
}
/* Trim the mapping to the nearest shared extent boundary. */
error = xfs_reflink_trim_around_shared(ip, &imap, &shared, &trimmed);
error = xfs_reflink_trim_around_shared(ip, imap, shared, &trimmed);
if (error)
goto out_unlock;
end_fsb = orig_end_fsb = imap.br_startoff + imap.br_blockcount;
return error;
/* Not shared? Just report the (potentially capped) extent. */
if (!shared) {
*skipped = true;
goto done;
}
if (!*shared)
return 0;
/*
* Fork all the shared blocks from our write offset until the end of
......@@ -278,72 +283,38 @@ __xfs_reflink_reserve_cow(
*/
error = xfs_qm_dqattach_locked(ip, 0);
if (error)
goto out_unlock;
return error;
end_fsb = orig_end_fsb = imap->br_startoff + imap->br_blockcount;
align = xfs_eof_alignment(ip, xfs_get_cowextsz_hint(ip));
if (align)
end_fsb = roundup_64(end_fsb, align);
retry:
error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, *offset_fsb,
end_fsb - *offset_fsb, &got,
&prev, &idx, eof);
error = xfs_bmapi_reserve_delalloc(ip, XFS_COW_FORK, imap->br_startoff,
end_fsb - imap->br_startoff, &got, &prev, &idx, eof);
switch (error) {
case 0:
break;
case -ENOSPC:
case -EDQUOT:
/* retry without any preallocation */
trace_xfs_reflink_cow_enospc(ip, &imap);
trace_xfs_reflink_cow_enospc(ip, imap);
if (end_fsb != orig_end_fsb) {
end_fsb = orig_end_fsb;
goto retry;
}
/*FALLTHRU*/
default:
goto out_unlock;
return error;
}
if (end_fsb != orig_end_fsb)
xfs_inode_set_cowblocks_tag(ip);
trace_xfs_reflink_cow_alloc(ip, &got);
done:
*offset_fsb = end_fsb;
out_unlock:
return error;
}
/* Create a CoW reservation for part of a file. */
int
xfs_reflink_reserve_cow_range(
struct xfs_inode *ip,
xfs_off_t offset,
xfs_off_t count)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t offset_fsb, end_fsb;
bool skipped = false;
int error;
trace_xfs_reflink_reserve_cow_range(ip, offset, count);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
end_fsb = XFS_B_TO_FSB(mp, offset + count);
xfs_ilock(ip, XFS_ILOCK_EXCL);
while (offset_fsb < end_fsb) {
error = __xfs_reflink_reserve_cow(ip, &offset_fsb, end_fsb,
&skipped);
if (error) {
trace_xfs_reflink_reserve_cow_range_error(ip, error,
_RET_IP_);
break;
}
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
return 0;
}
/* Allocate all CoW reservations covering a range of blocks in a file. */
......@@ -358,9 +329,8 @@ __xfs_reflink_allocate_cow(
struct xfs_defer_ops dfops;
struct xfs_trans *tp;
xfs_fsblock_t first_block;
xfs_fileoff_t next_fsb;
int nimaps = 1, error;
bool skipped = false;
bool shared;
xfs_defer_init(&dfops, &first_block);
......@@ -371,33 +341,38 @@ __xfs_reflink_allocate_cow(
xfs_ilock(ip, XFS_ILOCK_EXCL);
next_fsb = *offset_fsb;
error = __xfs_reflink_reserve_cow(ip, &next_fsb, end_fsb, &skipped);
/* Read extent from the source file. */
nimaps = 1;
error = xfs_bmapi_read(ip, *offset_fsb, end_fsb - *offset_fsb,
&imap, &nimaps, 0);
if (error)
goto out_unlock;
ASSERT(nimaps == 1);
error = xfs_reflink_reserve_cow(ip, &imap, &shared);
if (error)
goto out_trans_cancel;
if (skipped) {
*offset_fsb = next_fsb;
if (!shared) {
*offset_fsb = imap.br_startoff + imap.br_blockcount;
goto out_trans_cancel;
}
xfs_trans_ijoin(tp, ip, 0);
error = xfs_bmapi_write(tp, ip, *offset_fsb, next_fsb - *offset_fsb,
error = xfs_bmapi_write(tp, ip, imap.br_startoff, imap.br_blockcount,
XFS_BMAPI_COWFORK, &first_block,
XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK),
&imap, &nimaps, &dfops);
if (error)
goto out_trans_cancel;
/* We might not have been able to map the whole delalloc extent */
*offset_fsb = min(*offset_fsb + imap.br_blockcount, next_fsb);
error = xfs_defer_finish(&tp, &dfops, NULL);
if (error)
goto out_trans_cancel;
error = xfs_trans_commit(tp);
*offset_fsb = imap.br_startoff + imap.br_blockcount;
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
......@@ -536,58 +511,49 @@ xfs_reflink_cancel_cow_blocks(
xfs_fileoff_t offset_fsb,
xfs_fileoff_t end_fsb)
{
struct xfs_bmbt_irec irec;
xfs_filblks_t count_fsb;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
struct xfs_bmbt_irec got, prev, del;
xfs_extnum_t idx;
xfs_fsblock_t firstfsb;
struct xfs_defer_ops dfops;
int error = 0;
int nimaps;
int error = 0, eof = 0;
if (!xfs_is_reflink_inode(ip))
return 0;
/* Go find the old extent in the CoW fork. */
while (offset_fsb < end_fsb) {
nimaps = 1;
count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
&nimaps, XFS_BMAPI_COWFORK);
if (error)
break;
ASSERT(nimaps == 1);
trace_xfs_reflink_cancel_cow(ip, &irec);
xfs_bmap_search_extents(ip, offset_fsb, XFS_COW_FORK, &eof, &idx,
&got, &prev);
if (eof)
return 0;
if (irec.br_startblock == DELAYSTARTBLOCK) {
/* Free a delayed allocation. */
xfs_mod_fdblocks(ip->i_mount, irec.br_blockcount,
false);
ip->i_delayed_blks -= irec.br_blockcount;
while (got.br_startoff < end_fsb) {
del = got;
xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
trace_xfs_reflink_cancel_cow(ip, &del);
/* Remove the mapping from the CoW fork. */
error = xfs_bunmapi_cow(ip, &irec);
if (isnullstartblock(del.br_startblock)) {
error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK,
&idx, &got, &del);
if (error)
break;
} else if (irec.br_startblock == HOLESTARTBLOCK) {
/* empty */
} else {
xfs_trans_ijoin(*tpp, ip, 0);
xfs_defer_init(&dfops, &firstfsb);
/* Free the CoW orphan record. */
error = xfs_refcount_free_cow_extent(ip->i_mount,
&dfops, irec.br_startblock,
irec.br_blockcount);
&dfops, del.br_startblock,
del.br_blockcount);
if (error)
break;
xfs_bmap_add_free(ip->i_mount, &dfops,
irec.br_startblock, irec.br_blockcount,
del.br_startblock, del.br_blockcount,
NULL);
/* Update quota accounting */
xfs_trans_mod_dquot_byino(*tpp, ip, XFS_TRANS_DQ_BCOUNT,
-(long)irec.br_blockcount);
-(long)del.br_blockcount);
/* Roll the transaction */
error = xfs_defer_finish(tpp, &dfops, ip);
......@@ -597,15 +563,18 @@ xfs_reflink_cancel_cow_blocks(
}
/* Remove the mapping from the CoW fork. */
error = xfs_bunmapi_cow(ip, &irec);
if (error)
break;
xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
}
/* Roll on... */
offset_fsb = irec.br_startoff + irec.br_blockcount;
if (++idx >= ifp->if_bytes / sizeof(struct xfs_bmbt_rec))
break;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
}
/* clear tag if cow fork is emptied */
if (!ifp->if_bytes)
xfs_inode_clear_cowblocks_tag(ip);
return error;
}
......@@ -668,25 +637,26 @@ xfs_reflink_end_cow(
xfs_off_t offset,
xfs_off_t count)
{
struct xfs_bmbt_irec irec;
struct xfs_bmbt_irec uirec;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
struct xfs_bmbt_irec got, prev, del;
struct xfs_trans *tp;
xfs_fileoff_t offset_fsb;
xfs_fileoff_t end_fsb;
xfs_filblks_t count_fsb;
xfs_fsblock_t firstfsb;
struct xfs_defer_ops dfops;
int error;
int error, eof = 0;
unsigned int resblks;
xfs_filblks_t ilen;
xfs_filblks_t rlen;
int nimaps;
xfs_extnum_t idx;
trace_xfs_reflink_end_cow(ip, offset, count);
/* No COW extents? That's easy! */
if (ifp->if_bytes == 0)
return 0;
offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
/* Start a rolling transaction to switch the mappings */
resblks = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
......@@ -698,72 +668,65 @@ xfs_reflink_end_cow(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
/* Go find the old extent in the CoW fork. */
while (offset_fsb < end_fsb) {
/* Read extent from the source file */
nimaps = 1;
count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
error = xfs_bmapi_read(ip, offset_fsb, count_fsb, &irec,
&nimaps, XFS_BMAPI_COWFORK);
if (error)
goto out_cancel;
ASSERT(nimaps == 1);
xfs_bmap_search_extents(ip, end_fsb - 1, XFS_COW_FORK, &eof, &idx,
&got, &prev);
ASSERT(irec.br_startblock != DELAYSTARTBLOCK);
trace_xfs_reflink_cow_remap(ip, &irec);
/* If there is a hole at end_fsb - 1 go to the previous extent */
if (eof || got.br_startoff > end_fsb) {
ASSERT(idx > 0);
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, --idx), &got);
}
/*
* We can have a hole in the CoW fork if part of a directio
* write is CoW but part of it isn't.
*/
rlen = ilen = irec.br_blockcount;
if (irec.br_startblock == HOLESTARTBLOCK)
/* Walk backwards until we're out of the I/O range... */
while (got.br_startoff + got.br_blockcount > offset_fsb) {
del = got;
xfs_trim_extent(&del, offset_fsb, end_fsb - offset_fsb);
/* Extent delete may have bumped idx forward */
if (!del.br_blockcount) {
idx--;
goto next_extent;
}
ASSERT(!isnullstartblock(got.br_startblock));
/* Unmap the old blocks in the data fork. */
while (rlen) {
xfs_defer_init(&dfops, &firstfsb);
error = __xfs_bunmapi(tp, ip, irec.br_startoff,
&rlen, 0, 1, &firstfsb, &dfops);
rlen = del.br_blockcount;
error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1,
&firstfsb, &dfops);
if (error)
goto out_defer;
/*
* Trim the extent to whatever got unmapped.
* Remember, bunmapi works backwards.
*/
uirec.br_startblock = irec.br_startblock + rlen;
uirec.br_startoff = irec.br_startoff + rlen;
uirec.br_blockcount = irec.br_blockcount - rlen;
irec.br_blockcount = rlen;
trace_xfs_reflink_cow_remap_piece(ip, &uirec);
/* Trim the extent to whatever got unmapped. */
if (rlen) {
xfs_trim_extent(&del, del.br_startoff + rlen,
del.br_blockcount - rlen);
}
trace_xfs_reflink_cow_remap(ip, &del);
/* Free the CoW orphan record. */
error = xfs_refcount_free_cow_extent(tp->t_mountp,
&dfops, uirec.br_startblock,
uirec.br_blockcount);
error = xfs_refcount_free_cow_extent(tp->t_mountp, &dfops,
del.br_startblock, del.br_blockcount);
if (error)
goto out_defer;
/* Map the new blocks into the data fork. */
error = xfs_bmap_map_extent(tp->t_mountp, &dfops,
ip, &uirec);
error = xfs_bmap_map_extent(tp->t_mountp, &dfops, ip, &del);
if (error)
goto out_defer;
/* Remove the mapping from the CoW fork. */
error = xfs_bunmapi_cow(ip, &uirec);
if (error)
goto out_defer;
xfs_bmap_del_extent_cow(ip, &idx, &got, &del);
error = xfs_defer_finish(&tp, &dfops, ip);
if (error)
goto out_defer;
}
next_extent:
/* Roll on... */
offset_fsb = irec.br_startoff + ilen;
if (idx < 0)
break;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &got);
}
error = xfs_trans_commit(tp);
......@@ -774,7 +737,6 @@ xfs_reflink_end_cow(
out_defer:
xfs_defer_cancel(&dfops);
out_cancel:
xfs_trans_cancel(tp);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
out:
......@@ -1312,19 +1274,26 @@ xfs_compare_extents(
*/
int
xfs_reflink_remap_range(
struct xfs_inode *src,
xfs_off_t srcoff,
struct xfs_inode *dest,
xfs_off_t destoff,
xfs_off_t len,
unsigned int flags)
struct file *file_in,
loff_t pos_in,
struct file *file_out,
loff_t pos_out,
u64 len,
bool is_dedupe)
{
struct inode *inode_in = file_inode(file_in);
struct xfs_inode *src = XFS_I(inode_in);
struct inode *inode_out = file_inode(file_out);
struct xfs_inode *dest = XFS_I(inode_out);
struct xfs_mount *mp = src->i_mount;
loff_t bs = inode_out->i_sb->s_blocksize;
bool same_inode = (inode_in == inode_out);
xfs_fileoff_t sfsbno, dfsbno;
xfs_filblks_t fsblen;
int error;
xfs_extlen_t cowextsize;
bool is_same;
loff_t isize;
ssize_t ret;
loff_t blen;
if (!xfs_sb_version_hasreflink(&mp->m_sb))
return -EOPNOTSUPP;
......@@ -1332,17 +1301,8 @@ xfs_reflink_remap_range(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
/* Don't reflink realtime inodes */
if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
return -EINVAL;
if (flags & ~XFS_REFLINK_ALL)
return -EINVAL;
trace_xfs_reflink_remap_range(src, srcoff, len, dest, destoff);
/* Lock both files against IO */
if (src->i_ino == dest->i_ino) {
if (same_inode) {
xfs_ilock(src, XFS_IOLOCK_EXCL);
xfs_ilock(src, XFS_MMAPLOCK_EXCL);
} else {
......@@ -1350,39 +1310,126 @@ xfs_reflink_remap_range(
xfs_lock_two_inodes(src, dest, XFS_MMAPLOCK_EXCL);
}
/* Don't touch certain kinds of inodes */
ret = -EPERM;
if (IS_IMMUTABLE(inode_out))
goto out_unlock;
ret = -ETXTBSY;
if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
goto out_unlock;
/* Don't reflink dirs, pipes, sockets... */
ret = -EISDIR;
if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
goto out_unlock;
ret = -EINVAL;
if (S_ISFIFO(inode_in->i_mode) || S_ISFIFO(inode_out->i_mode))
goto out_unlock;
if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
goto out_unlock;
/* Don't reflink realtime inodes */
if (XFS_IS_REALTIME_INODE(src) || XFS_IS_REALTIME_INODE(dest))
goto out_unlock;
/* Don't share DAX file data for now. */
if (IS_DAX(inode_in) || IS_DAX(inode_out))
goto out_unlock;
/* Are we going all the way to the end? */
isize = i_size_read(inode_in);
if (isize == 0) {
ret = 0;
goto out_unlock;
}
if (len == 0)
len = isize - pos_in;
/* Ensure offsets don't wrap and the input is inside i_size */
if (pos_in + len < pos_in || pos_out + len < pos_out ||
pos_in + len > isize)
goto out_unlock;
/* Don't allow dedupe past EOF in the dest file */
if (is_dedupe) {
loff_t disize;
disize = i_size_read(inode_out);
if (pos_out >= disize || pos_out + len > disize)
goto out_unlock;
}
/* If we're linking to EOF, continue to the block boundary. */
if (pos_in + len == isize)
blen = ALIGN(isize, bs) - pos_in;
else
blen = len;
/* Only reflink if we're aligned to block boundaries */
if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
!IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
goto out_unlock;
/* Don't allow overlapped reflink within the same file */
if (same_inode) {
if (pos_out + blen > pos_in && pos_out < pos_in + blen)
goto out_unlock;
}
/* Wait for the completion of any pending IOs on both files */
inode_dio_wait(inode_in);
if (!same_inode)
inode_dio_wait(inode_out);
ret = filemap_write_and_wait_range(inode_in->i_mapping,
pos_in, pos_in + len - 1);
if (ret)
goto out_unlock;
ret = filemap_write_and_wait_range(inode_out->i_mapping,
pos_out, pos_out + len - 1);
if (ret)
goto out_unlock;
trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
/*
* Check that the extents are the same.
*/
if (flags & XFS_REFLINK_DEDUPE) {
is_same = false;
error = xfs_compare_extents(VFS_I(src), srcoff, VFS_I(dest),
destoff, len, &is_same);
if (error)
goto out_error;
if (is_dedupe) {
bool is_same = false;
ret = xfs_compare_extents(inode_in, pos_in, inode_out, pos_out,
len, &is_same);
if (ret)
goto out_unlock;
if (!is_same) {
error = -EBADE;
goto out_error;
ret = -EBADE;
goto out_unlock;
}
}
error = xfs_reflink_set_inode_flag(src, dest);
if (error)
goto out_error;
ret = xfs_reflink_set_inode_flag(src, dest);
if (ret)
goto out_unlock;
/*
* Invalidate the page cache so that we can clear any CoW mappings
* in the destination file.
*/
truncate_inode_pages_range(&VFS_I(dest)->i_data, destoff,
PAGE_ALIGN(destoff + len) - 1);
truncate_inode_pages_range(&inode_out->i_data, pos_out,
PAGE_ALIGN(pos_out + len) - 1);
dfsbno = XFS_B_TO_FSBT(mp, destoff);
sfsbno = XFS_B_TO_FSBT(mp, srcoff);
dfsbno = XFS_B_TO_FSBT(mp, pos_out);
sfsbno = XFS_B_TO_FSBT(mp, pos_in);
fsblen = XFS_B_TO_FSB(mp, len);
error = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
destoff + len);
if (error)
goto out_error;
ret = xfs_reflink_remap_blocks(src, sfsbno, dest, dfsbno, fsblen,
pos_out + len);
if (ret)
goto out_unlock;
/*
* Carry the cowextsize hint from src to dest if we're sharing the
......@@ -1390,26 +1437,24 @@ xfs_reflink_remap_range(
* has a cowextsize hint, and the destination file does not.
*/
cowextsize = 0;
if (srcoff == 0 && len == i_size_read(VFS_I(src)) &&
if (pos_in == 0 && len == i_size_read(inode_in) &&
(src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) &&
destoff == 0 && len >= i_size_read(VFS_I(dest)) &&
pos_out == 0 && len >= i_size_read(inode_out) &&
!(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE))
cowextsize = src->i_d.di_cowextsize;
error = xfs_reflink_update_dest(dest, destoff + len, cowextsize);
if (error)
goto out_error;
ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize);
out_error:
out_unlock:
xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
xfs_iunlock(src, XFS_IOLOCK_EXCL);
if (src->i_ino != dest->i_ino) {
xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
xfs_iunlock(dest, XFS_IOLOCK_EXCL);
}
if (error)
trace_xfs_reflink_remap_range_error(dest, error, _RET_IP_);
return error;
if (ret)
trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
return ret;
}
/*
......
......@@ -26,8 +26,8 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, xfs_agnumber_t agno,
extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
struct xfs_bmbt_irec *irec, bool *shared, bool *trimmed);
extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip,
xfs_off_t offset, xfs_off_t count);
extern int xfs_reflink_reserve_cow(struct xfs_inode *ip,
struct xfs_bmbt_irec *imap, bool *shared);
extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip,
xfs_off_t offset, xfs_off_t count);
extern bool xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
......@@ -43,11 +43,8 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t count);
extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
#define XFS_REFLINK_DEDUPE 1 /* only reflink if contents match */
#define XFS_REFLINK_ALL (XFS_REFLINK_DEDUPE)
extern int xfs_reflink_remap_range(struct xfs_inode *src, xfs_off_t srcoff,
struct xfs_inode *dest, xfs_off_t destoff, xfs_off_t len,
unsigned int flags);
extern int xfs_reflink_remap_range(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out, u64 len, bool is_dedupe);
extern int xfs_reflink_clear_inode_flag(struct xfs_inode *ip,
struct xfs_trans **tpp);
extern int xfs_reflink_unshare(struct xfs_inode *ip, xfs_off_t offset,
......
......@@ -512,13 +512,13 @@ static struct attribute *xfs_error_attrs[] = {
};
struct kobj_type xfs_error_cfg_ktype = {
static struct kobj_type xfs_error_cfg_ktype = {
.release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops,
.default_attrs = xfs_error_attrs,
};
struct kobj_type xfs_error_ktype = {
static struct kobj_type xfs_error_ktype = {
.release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops,
};
......
......@@ -3346,7 +3346,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_alloc);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_found);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_enospc);
DEFINE_RW_EVENT(xfs_reflink_reserve_cow_range);
DEFINE_RW_EVENT(xfs_reflink_reserve_cow);
DEFINE_RW_EVENT(xfs_reflink_allocate_cow_range);
DEFINE_INODE_IREC_EVENT(xfs_reflink_bounce_dio_write);
......@@ -3356,9 +3356,7 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_trim_irec);
DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_piece);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_reserve_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_allocate_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);
......
......@@ -19,11 +19,15 @@ struct vm_fault;
#define IOMAP_UNWRITTEN 0x04 /* blocks allocated @blkno in unwritten state */
/*
* Flags for iomap mappings:
* Flags for all iomap mappings:
*/
#define IOMAP_F_MERGED 0x01 /* contains multiple blocks/extents */
#define IOMAP_F_SHARED 0x02 /* block shared with another file */
#define IOMAP_F_NEW 0x04 /* blocks have been newly allocated */
#define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */
/*
* Flags that only need to be reported for IOMAP_REPORT requests:
*/
#define IOMAP_F_MERGED 0x10 /* contains multiple blocks/extents */
#define IOMAP_F_SHARED 0x20 /* block shared with another file */
/*
* Magic value for blkno:
......@@ -42,8 +46,9 @@ struct iomap {
/*
* Flags for iomap_begin / iomap_end. No flag implies a read.
*/
#define IOMAP_WRITE (1 << 0)
#define IOMAP_ZERO (1 << 1)
#define IOMAP_WRITE (1 << 0) /* writing, must allocate blocks */
#define IOMAP_ZERO (1 << 1) /* zeroing operation, may skip holes */
#define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */
struct iomap_ops {
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment