Commit 9872e4a8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-5.20-merge-8' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull more xfs updates from Darrick Wong:
 "There's not a lot this time around, just the usual bug fixes and
  corrections for missing error returns.

   - Return error codes from block device flushes to userspace

   - Fix a deadlock between reclaim and mount time quotacheck

   - Fix an unnecessary ENOSPC return when doing COW on a filesystem
     with severe free space fragmentation

   - Fix a miscalculation in the transaction reservation computations
     for file removal operations"

* tag 'xfs-5.20-merge-8' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: fix inode reservation space for removing transaction
  xfs: Fix false ENOSPC when performing direct write on a delalloc extent in cow fork
  xfs: fix intermittent hang during quotacheck
  xfs: check return codes when flushing block devices
parents e140f731 031d166f
...@@ -515,7 +515,7 @@ xfs_calc_remove_reservation( ...@@ -515,7 +515,7 @@ xfs_calc_remove_reservation(
{ {
return XFS_DQUOT_LOGRES(mp) + return XFS_DQUOT_LOGRES(mp) +
xfs_calc_iunlink_add_reservation(mp) + xfs_calc_iunlink_add_reservation(mp) +
max((xfs_calc_inode_res(mp, 1) + max((xfs_calc_inode_res(mp, 2) +
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))), XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) + (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
......
...@@ -143,7 +143,7 @@ xfs_file_fsync( ...@@ -143,7 +143,7 @@ xfs_file_fsync(
{ {
struct xfs_inode *ip = XFS_I(file->f_mapping->host); struct xfs_inode *ip = XFS_I(file->f_mapping->host);
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
int error = 0; int error, err2;
int log_flushed = 0; int log_flushed = 0;
trace_xfs_file_fsync(ip); trace_xfs_file_fsync(ip);
...@@ -164,18 +164,21 @@ xfs_file_fsync( ...@@ -164,18 +164,21 @@ xfs_file_fsync(
* inode size in case of an extending write. * inode size in case of an extending write.
*/ */
if (XFS_IS_REALTIME_INODE(ip)) if (XFS_IS_REALTIME_INODE(ip))
blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev); error = blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
else if (mp->m_logdev_targp != mp->m_ddev_targp) else if (mp->m_logdev_targp != mp->m_ddev_targp)
blkdev_issue_flush(mp->m_ddev_targp->bt_bdev); error = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
/* /*
* Any inode that has dirty modifications in the log is pinned. The * Any inode that has dirty modifications in the log is pinned. The
* racy check here for a pinned inode while not catch modifications * racy check here for a pinned inode will not catch modifications
* that happen concurrently to the fsync call, but fsync semantics * that happen concurrently to the fsync call, but fsync semantics
* only require to sync previously completed I/O. * only require to sync previously completed I/O.
*/ */
if (xfs_ipincount(ip)) if (xfs_ipincount(ip)) {
error = xfs_fsync_flush_log(ip, datasync, &log_flushed); err2 = xfs_fsync_flush_log(ip, datasync, &log_flushed);
if (err2 && !error)
error = err2;
}
/* /*
* If we only have a single device, and the log force about was * If we only have a single device, and the log force about was
...@@ -185,8 +188,11 @@ xfs_file_fsync( ...@@ -185,8 +188,11 @@ xfs_file_fsync(
* commit. * commit.
*/ */
if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) && if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) &&
mp->m_logdev_targp == mp->m_ddev_targp) mp->m_logdev_targp == mp->m_ddev_targp) {
blkdev_issue_flush(mp->m_ddev_targp->bt_bdev); err2 = blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
if (err2 && !error)
error = err2;
}
return error; return error;
} }
......
...@@ -1925,9 +1925,17 @@ xlog_write_iclog( ...@@ -1925,9 +1925,17 @@ xlog_write_iclog(
* device cache first to ensure all metadata writeback covered * device cache first to ensure all metadata writeback covered
* by the LSN in this iclog is on stable storage. This is slow, * by the LSN in this iclog is on stable storage. This is slow,
* but it *must* complete before we issue the external log IO. * but it *must* complete before we issue the external log IO.
*
* If the flush fails, we cannot conclude that past metadata
* writeback from the log succeeded. Repeating the flush is
* not possible, hence we must shut down with log IO error to
* avoid shutdown re-entering this path and erroring out again.
*/ */
if (log->l_targ != log->l_mp->m_ddev_targp) if (log->l_targ != log->l_mp->m_ddev_targp &&
blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev); blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev)) {
xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR);
return;
}
} }
if (iclog->ic_flags & XLOG_ICL_NEED_FUA) if (iclog->ic_flags & XLOG_ICL_NEED_FUA)
iclog->ic_bio.bi_opf |= REQ_FUA; iclog->ic_bio.bi_opf |= REQ_FUA;
......
...@@ -1235,6 +1235,11 @@ xfs_qm_flush_one( ...@@ -1235,6 +1235,11 @@ xfs_qm_flush_one(
if (error) if (error)
goto out_unlock; goto out_unlock;
if (!(bp->b_flags & _XBF_DELWRI_Q)) {
error = -EAGAIN;
xfs_buf_relse(bp);
goto out_unlock;
}
xfs_buf_unlock(bp); xfs_buf_unlock(bp);
xfs_buf_delwri_pushbuf(bp, buffer_list); xfs_buf_delwri_pushbuf(bp, buffer_list);
......
...@@ -341,36 +341,55 @@ xfs_find_trim_cow_extent( ...@@ -341,36 +341,55 @@ xfs_find_trim_cow_extent(
return 0; return 0;
} }
/* Allocate all CoW reservations covering a range of blocks in a file. */ static int
int xfs_reflink_convert_unwritten(
xfs_reflink_allocate_cow(
struct xfs_inode *ip, struct xfs_inode *ip,
struct xfs_bmbt_irec *imap, struct xfs_bmbt_irec *imap,
struct xfs_bmbt_irec *cmap, struct xfs_bmbt_irec *cmap,
bool *shared,
uint *lockmode,
bool convert_now) bool convert_now)
{ {
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t offset_fsb = imap->br_startoff; xfs_fileoff_t offset_fsb = imap->br_startoff;
xfs_filblks_t count_fsb = imap->br_blockcount; xfs_filblks_t count_fsb = imap->br_blockcount;
struct xfs_trans *tp; int error;
int nimaps, error = 0;
bool found;
xfs_filblks_t resaligned;
xfs_extlen_t resblks = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); /*
if (!ip->i_cowfp) { * cmap might larger than imap due to cowextsize hint.
ASSERT(!xfs_is_reflink_inode(ip)); */
xfs_ifork_init_cow(ip); xfs_trim_extent(cmap, offset_fsb, count_fsb);
}
/*
* COW fork extents are supposed to remain unwritten until we're ready
* to initiate a disk write. For direct I/O we are going to write the
* data and need the conversion, but for buffered writes we're done.
*/
if (!convert_now || cmap->br_state == XFS_EXT_NORM)
return 0;
trace_xfs_reflink_convert_cow(ip, cmap);
error = xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb);
if (!error)
cmap->br_state = XFS_EXT_NORM;
error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found);
if (error || !*shared)
return error; return error;
if (found) }
goto convert;
static int
xfs_reflink_fill_cow_hole(
struct xfs_inode *ip,
struct xfs_bmbt_irec *imap,
struct xfs_bmbt_irec *cmap,
bool *shared,
uint *lockmode,
bool convert_now)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
xfs_filblks_t resaligned;
xfs_extlen_t resblks;
int nimaps;
int error;
bool found;
resaligned = xfs_aligned_fsb_count(imap->br_startoff, resaligned = xfs_aligned_fsb_count(imap->br_startoff,
imap->br_blockcount, xfs_get_cowextsz_hint(ip)); imap->br_blockcount, xfs_get_cowextsz_hint(ip));
...@@ -386,17 +405,17 @@ xfs_reflink_allocate_cow( ...@@ -386,17 +405,17 @@ xfs_reflink_allocate_cow(
*lockmode = XFS_ILOCK_EXCL; *lockmode = XFS_ILOCK_EXCL;
/*
* Check for an overlapping extent again now that we dropped the ilock.
*/
error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found); error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found);
if (error || !*shared) if (error || !*shared)
goto out_trans_cancel; goto out_trans_cancel;
if (found) { if (found) {
xfs_trans_cancel(tp); xfs_trans_cancel(tp);
goto convert; goto convert;
} }
ASSERT(cmap->br_startoff > imap->br_startoff);
/* Allocate the entire reservation as unwritten blocks. */ /* Allocate the entire reservation as unwritten blocks. */
nimaps = 1; nimaps = 1;
error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
...@@ -416,26 +435,135 @@ xfs_reflink_allocate_cow( ...@@ -416,26 +435,135 @@ xfs_reflink_allocate_cow(
*/ */
if (nimaps == 0) if (nimaps == 0)
return -ENOSPC; return -ENOSPC;
convert: convert:
xfs_trim_extent(cmap, offset_fsb, count_fsb); return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now);
out_trans_cancel:
xfs_trans_cancel(tp);
return error;
}
static int
xfs_reflink_fill_delalloc(
struct xfs_inode *ip,
struct xfs_bmbt_irec *imap,
struct xfs_bmbt_irec *cmap,
bool *shared,
uint *lockmode,
bool convert_now)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
int nimaps;
int error;
bool found;
do {
xfs_iunlock(ip, *lockmode);
*lockmode = 0;
error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, 0, 0,
false, &tp);
if (error)
return error;
*lockmode = XFS_ILOCK_EXCL;
error = xfs_find_trim_cow_extent(ip, imap, cmap, shared,
&found);
if (error || !*shared)
goto out_trans_cancel;
if (found) {
xfs_trans_cancel(tp);
break;
}
ASSERT(isnullstartblock(cmap->br_startblock) ||
cmap->br_startblock == DELAYSTARTBLOCK);
/* /*
* COW fork extents are supposed to remain unwritten until we're ready * Replace delalloc reservation with an unwritten extent.
* to initiate a disk write. For direct I/O we are going to write the
* data and need the conversion, but for buffered writes we're done.
*/ */
if (!convert_now || cmap->br_state == XFS_EXT_NORM) nimaps = 1;
return 0; error = xfs_bmapi_write(tp, ip, cmap->br_startoff,
trace_xfs_reflink_convert_cow(ip, cmap); cmap->br_blockcount,
error = xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb); XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0,
if (!error) cmap, &nimaps);
cmap->br_state = XFS_EXT_NORM; if (error)
goto out_trans_cancel;
xfs_inode_set_cowblocks_tag(ip);
error = xfs_trans_commit(tp);
if (error)
return error; return error;
/*
* Allocation succeeded but the requested range was not even
* partially satisfied? Bail out!
*/
if (nimaps == 0)
return -ENOSPC;
} while (cmap->br_startoff + cmap->br_blockcount <= imap->br_startoff);
return xfs_reflink_convert_unwritten(ip, imap, cmap, convert_now);
out_trans_cancel: out_trans_cancel:
xfs_trans_cancel(tp); xfs_trans_cancel(tp);
return error; return error;
} }
/* Allocate all CoW reservations covering a range of blocks in a file. */
int
xfs_reflink_allocate_cow(
struct xfs_inode *ip,
struct xfs_bmbt_irec *imap,
struct xfs_bmbt_irec *cmap,
bool *shared,
uint *lockmode,
bool convert_now)
{
int error;
bool found;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (!ip->i_cowfp) {
ASSERT(!xfs_is_reflink_inode(ip));
xfs_ifork_init_cow(ip);
}
error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found);
if (error || !*shared)
return error;
/* CoW fork has a real extent */
if (found)
return xfs_reflink_convert_unwritten(ip, imap, cmap,
convert_now);
/*
* CoW fork does not have an extent and data extent is shared.
* Allocate a real extent in the CoW fork.
*/
if (cmap->br_startoff > imap->br_startoff)
return xfs_reflink_fill_cow_hole(ip, imap, cmap, shared,
lockmode, convert_now);
/*
* CoW fork has a delalloc reservation. Replace it with a real extent.
* There may or may not be a data fork mapping.
*/
if (isnullstartblock(cmap->br_startblock) ||
cmap->br_startblock == DELAYSTARTBLOCK)
return xfs_reflink_fill_delalloc(ip, imap, cmap, shared,
lockmode, convert_now);
/* Shouldn't get here. */
ASSERT(0);
return -EFSCORRUPTED;
}
/* /*
* Cancel CoW reservations for some block range of an inode. * Cancel CoW reservations for some block range of an inode.
* *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment