Commit 166afc45 authored by Dave Chinner's avatar Dave Chinner

Merge tag 'reflink-speedups-5.19_2022-04-28' of...

Merge tag 'reflink-speedups-5.19_2022-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-5.19-for-next

xfs: fix reflink inefficiencies

As Dave Chinner has complained about on IRC, there are a couple of
things about reflink that are very inefficient.  First of all, we
limited the size of all bunmapi operations to avoid flooding the log
with defer ops in the worst case, but recent changes to the defer
ops code have solved that problem, so get rid of the bunmapi length
clamp.

Second, the log reservations for reflink operations are far far
larger than they need to be.  Shrink them to exactly what we need to
handle each deferred RUI and CUI log item, and no more.  Also reduce
logcount because we don't need 8 rolls per operation.  Introduce a
transaction reservation compatibility layer to avoid changing the
minimum log size calculations.
Signed-off-by: default avatarDave Chinner <david@fromorbit.com>
parents 956f1b8f 6ed7e509
......@@ -5280,7 +5280,6 @@ __xfs_bunmapi(
int whichfork; /* data or attribute fork */
xfs_fsblock_t sum;
xfs_filblks_t len = *rlen; /* length to unmap in file */
xfs_fileoff_t max_len;
xfs_fileoff_t end;
struct xfs_iext_cursor icur;
bool done = false;
......@@ -5299,16 +5298,6 @@ __xfs_bunmapi(
ASSERT(len > 0);
ASSERT(nexts >= 0);
/*
* Guesstimate how many blocks we can unmap without running the risk of
* blowing out the transaction with a mix of EFIs and reflink
* adjustments.
*/
if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
else
max_len = len;
error = xfs_iread_extents(tp, ip, whichfork);
if (error)
return error;
......@@ -5347,7 +5336,7 @@ __xfs_bunmapi(
extno = 0;
while (end != (xfs_fileoff_t)-1 && end >= start &&
(nexts == 0 || extno < nexts) && max_len > 0) {
(nexts == 0 || extno < nexts)) {
/*
* Is the found extent after a hole in which end lives?
* Just back up to the previous extent, if so.
......@@ -5381,14 +5370,6 @@ __xfs_bunmapi(
if (del.br_startoff + del.br_blockcount > end + 1)
del.br_blockcount = end + 1 - del.br_startoff;
/* How much can we safely unmap? */
if (max_len < del.br_blockcount) {
del.br_startoff += del.br_blockcount - max_len;
if (!wasdel)
del.br_startblock += del.br_blockcount - max_len;
del.br_blockcount = max_len;
}
if (!isrt)
goto delete;
......@@ -5524,7 +5505,6 @@ __xfs_bunmapi(
if (error)
goto error0;
max_len -= del.br_blockcount;
end = del.br_startoff - 1;
nodelete:
/*
......
......@@ -14,6 +14,7 @@
#include "xfs_trans_space.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_trace.h"
/*
* Calculate the maximum length in bytes that would be required for a local
......@@ -36,6 +37,65 @@ xfs_log_calc_max_attrsetm_res(
M_RES(mp)->tr_attrsetrt.tr_logres * nblks;
}
/*
* Compute an alternate set of log reservation sizes for use exclusively with
* minimum log size calculations.
*/
static void
xfs_log_calc_trans_resv_for_minlogblocks(
struct xfs_mount *mp,
struct xfs_trans_resv *resv)
{
unsigned int rmap_maxlevels = mp->m_rmap_maxlevels;
/*
* In the early days of rmap+reflink, we always set the rmap maxlevels
* to 9 even if the AG was small enough that it would never grow to
* that height. Transaction reservation sizes influence the minimum
* log size calculation, which influences the size of the log that mkfs
* creates. Use the old value here to ensure that newly formatted
* small filesystems will mount on older kernels.
*/
if (xfs_has_rmapbt(mp) && xfs_has_reflink(mp))
mp->m_rmap_maxlevels = XFS_OLD_REFLINK_RMAP_MAXLEVELS;
xfs_trans_resv_calc(mp, resv);
if (xfs_has_reflink(mp)) {
/*
* In the early days of reflink, typical log operation counts
* were greatly overestimated.
*/
resv->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK;
resv->tr_itruncate.tr_logcount =
XFS_ITRUNCATE_LOG_COUNT_REFLINK;
resv->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK;
} else if (xfs_has_rmapbt(mp)) {
/*
* In the early days of non-reflink rmap, the impact of rmapbt
* updates on log counts were not taken into account at all.
*/
resv->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
resv->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
resv->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
}
/*
* In the early days of reflink, we did not use deferred refcount
* update log items, so log reservations must be recomputed using the
* old calculations.
*/
resv->tr_write.tr_logres =
xfs_calc_write_reservation_minlogsize(mp);
resv->tr_itruncate.tr_logres =
xfs_calc_itruncate_reservation_minlogsize(mp);
resv->tr_qm_dqalloc.tr_logres =
xfs_calc_qm_dqalloc_reservation_minlogsize(mp);
/* Put everything back the way it was. This goes at the end. */
mp->m_rmap_maxlevels = rmap_maxlevels;
}
/*
* Iterate over the log space reservation table to figure out and return
* the maximum one in terms of the pre-calculated values which were done
......@@ -46,19 +106,25 @@ xfs_log_get_max_trans_res(
struct xfs_mount *mp,
struct xfs_trans_res *max_resp)
{
struct xfs_trans_resv resv = {};
struct xfs_trans_res *resp;
struct xfs_trans_res *end_resp;
unsigned int i;
int log_space = 0;
int attr_space;
attr_space = xfs_log_calc_max_attrsetm_res(mp);
resp = (struct xfs_trans_res *)M_RES(mp);
end_resp = (struct xfs_trans_res *)(M_RES(mp) + 1);
for (; resp < end_resp; resp++) {
xfs_log_calc_trans_resv_for_minlogblocks(mp, &resv);
resp = (struct xfs_trans_res *)&resv;
end_resp = (struct xfs_trans_res *)(&resv + 1);
for (i = 0; resp < end_resp; i++, resp++) {
int tmp = resp->tr_logcount > 1 ?
resp->tr_logres * resp->tr_logcount :
resp->tr_logres;
trace_xfs_trans_resv_calc_minlogsize(mp, i, resp);
if (log_space < tmp) {
log_space = tmp;
*max_resp = *resp; /* struct copy */
......@@ -66,9 +132,10 @@ xfs_log_get_max_trans_res(
}
if (attr_space > log_space) {
*max_resp = M_RES(mp)->tr_attrsetm; /* struct copy */
*max_resp = resv.tr_attrsetm; /* struct copy */
max_resp->tr_logres = attr_space;
}
trace_xfs_log_get_max_trans_res(mp, max_resp);
}
/*
......
......@@ -886,8 +886,13 @@ xfs_refcount_still_have_space(
{
unsigned long overhead;
overhead = cur->bc_ag.refc.shape_changes *
xfs_allocfree_log_count(cur->bc_mp, 1);
/*
* Worst case estimate: full splits of the free space and rmap btrees
* to handle each of the shape changes to the refcount btree.
*/
overhead = xfs_allocfree_block_count(cur->bc_mp,
cur->bc_ag.refc.shape_changes);
overhead += cur->bc_mp->m_refc_maxlevels;
overhead *= cur->bc_mp->m_sb.sb_blocksize;
/*
......@@ -960,6 +965,7 @@ xfs_refcount_adjust_extents(
* Either cover the hole (increment) or
* delete the range (decrement).
*/
cur->bc_ag.refc.nr_ops++;
if (tmp.rc_refcount) {
error = xfs_refcount_insert(cur, &tmp,
&found_tmp);
......@@ -970,7 +976,6 @@ xfs_refcount_adjust_extents(
error = -EFSCORRUPTED;
goto out_error;
}
cur->bc_ag.refc.nr_ops++;
} else {
fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
cur->bc_ag.pag->pag_agno,
......@@ -1001,11 +1006,11 @@ xfs_refcount_adjust_extents(
ext.rc_refcount += adj;
trace_xfs_refcount_modify_extent(cur->bc_mp,
cur->bc_ag.pag->pag_agno, &ext);
cur->bc_ag.refc.nr_ops++;
if (ext.rc_refcount > 1) {
error = xfs_refcount_update(cur, &ext);
if (error)
goto out_error;
cur->bc_ag.refc.nr_ops++;
} else if (ext.rc_refcount == 1) {
error = xfs_refcount_delete(cur, &found_rec);
if (error)
......@@ -1014,7 +1019,6 @@ xfs_refcount_adjust_extents(
error = -EFSCORRUPTED;
goto out_error;
}
cur->bc_ag.refc.nr_ops++;
goto advloop;
} else {
fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
......
......@@ -67,14 +67,17 @@ extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
* log (plus any key updates) so we'll conservatively assume 32 bytes
* per record. We must also leave space for btree splits on both ends
* of the range and space for the CUD and a new CUI.
*
* Each EFI that we attach to the transaction is assumed to consume ~32 bytes.
* This is a low estimate for an EFI tracking a single extent (16 bytes for the
* EFI header, 16 for the extent, and 12 for the xlog op header), but the
* estimate is acceptable if there's more than one extent being freed.
* In the worst case of freeing every other block during a refcount decrease
* operation, we amortize the space used for one EFI log item across 16
* extents.
*/
#define XFS_REFCOUNT_ITEM_OVERHEAD 32
static inline xfs_fileoff_t xfs_refcount_max_unmap(int log_res)
{
return (log_res * 3 / 4) / XFS_REFCOUNT_ITEM_OVERHEAD;
}
extern int xfs_refcount_has_record(struct xfs_btree_cur *cur,
xfs_agblock_t bno, xfs_extlen_t len, bool *exists);
union xfs_btree_rec;
......
......@@ -56,15 +56,14 @@ xfs_calc_buf_res(
* Per-extent log reservation for the btree changes involved in freeing or
* allocating an extent. In classic XFS there were two trees that will be
* modified (bnobt + cntbt). With rmap enabled, there are three trees
* (rmapbt). With reflink, there are four trees (refcountbt). The number of
* blocks reserved is based on the formula:
* (rmapbt). The number of blocks reserved is based on the formula:
*
* num trees * ((2 blocks/level * max depth) - 1)
*
* Keep in mind that max depth is calculated separately for each type of tree.
*/
uint
xfs_allocfree_log_count(
xfs_allocfree_block_count(
struct xfs_mount *mp,
uint num_ops)
{
......@@ -73,12 +72,23 @@ xfs_allocfree_log_count(
blocks = num_ops * 2 * (2 * mp->m_alloc_maxlevels - 1);
if (xfs_has_rmapbt(mp))
blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1);
if (xfs_has_reflink(mp))
blocks += num_ops * (2 * mp->m_refc_maxlevels - 1);
return blocks;
}
/*
* Per-extent log reservation for refcount btree changes. These are never done
* in the same transaction as an allocation or a free, so we compute them
* separately.
*/
static unsigned int
xfs_refcountbt_block_count(
struct xfs_mount *mp,
unsigned int num_ops)
{
return num_ops * (2 * mp->m_refc_maxlevels - 1);
}
/*
* Logging inodes is really tricksy. They are logged in memory format,
* which means that what we write into the log doesn't directly translate into
......@@ -136,7 +146,7 @@ xfs_calc_inobt_res(
{
return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels,
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
XFS_FSB_TO_B(mp, 1));
}
......@@ -183,7 +193,7 @@ xfs_calc_inode_chunk_res(
{
uint res, size = 0;
res = xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
res = xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
XFS_FSB_TO_B(mp, 1));
if (alloc) {
/* icreate tx uses ordered buffers */
......@@ -203,7 +213,7 @@ xfs_calc_inode_chunk_res(
* extents, as well as the realtime summary block.
*/
static unsigned int
xfs_rtalloc_log_count(
xfs_rtalloc_block_count(
struct xfs_mount *mp,
unsigned int num_ops)
{
......@@ -233,6 +243,28 @@ xfs_rtalloc_log_count(
* register overflow from temporaries in the calculations.
*/
/*
* Compute the log reservation required to handle the refcount update
* transaction. Refcount updates are always done via deferred log items.
*
* This is calculated as:
* Data device refcount updates (t1):
* the agfs of the ags containing the blocks: nr_ops * sector size
* the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
*/
static unsigned int
xfs_calc_refcountbt_reservation(
struct xfs_mount *mp,
unsigned int nr_ops)
{
unsigned int blksz = XFS_FSB_TO_B(mp, 1);
if (!xfs_has_reflink(mp))
return 0;
return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops), blksz);
}
/*
* In a write transaction we can allocate a maximum of 2
......@@ -255,34 +287,65 @@ xfs_rtalloc_log_count(
* the agfls of the ags containing the blocks: 2 * sector size
* the super block free block counter: sector size
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
* And any refcount updates that happen in a separate transaction (t4).
*/
STATIC uint
xfs_calc_write_reservation(
struct xfs_mount *mp)
struct xfs_mount *mp,
bool for_minlogsize)
{
unsigned int t1, t2, t3;
unsigned int t1, t2, t3, t4;
unsigned int blksz = XFS_FSB_TO_B(mp, 1);
t1 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
if (xfs_has_realtime(mp)) {
t2 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
blksz) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 1), blksz) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), blksz);
xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 1), blksz) +
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1), blksz);
} else {
t2 = 0;
}
t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
/*
* In the early days of reflink, we included enough reservation to log
* two refcountbt splits for each transaction. The codebase runs
* refcountbt updates in separate transactions now, so to compute the
* minimum log size, add the refcountbtree splits back to t1 and t3 and
* do not account them separately as t4. Reflink did not support
* realtime when the reservations were established, so no adjustment to
* t2 is needed.
*/
if (for_minlogsize) {
unsigned int adj = 0;
if (xfs_has_reflink(mp))
adj = xfs_calc_buf_res(
xfs_refcountbt_block_count(mp, 2),
blksz);
t1 += adj;
t3 += adj;
return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
}
t4 = xfs_calc_refcountbt_reservation(mp, 1);
return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
}
unsigned int
xfs_calc_write_reservation_minlogsize(
struct xfs_mount *mp)
{
return xfs_calc_write_reservation(mp, true);
}
/*
......@@ -304,29 +367,57 @@ xfs_calc_write_reservation(
* the realtime summary: 2 exts * 1 block
* worst case split in allocation btrees per extent assuming 2 extents:
* 2 exts * 2 trees * (2 * max depth - 1) * block size
* And any refcount updates that happen in a separate transaction (t4).
*/
STATIC uint
xfs_calc_itruncate_reservation(
struct xfs_mount *mp)
struct xfs_mount *mp,
bool for_minlogsize)
{
unsigned int t1, t2, t3;
unsigned int t1, t2, t3, t4;
unsigned int blksz = XFS_FSB_TO_B(mp, 1);
t1 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz);
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4), blksz);
if (xfs_has_realtime(mp)) {
t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
xfs_calc_buf_res(xfs_rtalloc_block_count(mp, 2), blksz) +
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2), blksz);
} else {
t3 = 0;
}
/*
* In the early days of reflink, we included enough reservation to log
* four refcountbt splits in the same transaction as bnobt/cntbt
* updates. The codebase runs refcountbt updates in separate
* transactions now, so to compute the minimum log size, add the
* refcount btree splits back here and do not compute them separately
* as t4. Reflink did not support realtime when the reservations were
* established, so do not adjust t3.
*/
if (for_minlogsize) {
if (xfs_has_reflink(mp))
t2 += xfs_calc_buf_res(
xfs_refcountbt_block_count(mp, 4),
blksz);
return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
}
t4 = xfs_calc_refcountbt_reservation(mp, 2);
return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
}
unsigned int
xfs_calc_itruncate_reservation_minlogsize(
struct xfs_mount *mp)
{
return xfs_calc_itruncate_reservation(mp, true);
}
/*
......@@ -350,7 +441,7 @@ xfs_calc_rename_reservation(
xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 3),
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 3),
XFS_FSB_TO_B(mp, 1))));
}
......@@ -390,7 +481,7 @@ xfs_calc_link_reservation(
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
XFS_FSB_TO_B(mp, 1))));
}
......@@ -428,7 +519,7 @@ xfs_calc_remove_reservation(
xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
XFS_FSB_TO_B(mp, 1))));
}
......@@ -573,7 +664,7 @@ xfs_calc_growdata_reservation(
struct xfs_mount *mp)
{
return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
XFS_FSB_TO_B(mp, 1));
}
......@@ -595,7 +686,7 @@ xfs_calc_growrtalloc_reservation(
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
XFS_FSB_TO_B(mp, 1));
}
......@@ -671,7 +762,7 @@ xfs_calc_addafork_reservation(
xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +
xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,
XFS_FSB_TO_B(mp, 1)) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 1),
XFS_FSB_TO_B(mp, 1));
}
......@@ -694,7 +785,7 @@ xfs_calc_attrinval_reservation(
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK),
XFS_FSB_TO_B(mp, 1))),
(xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 4),
XFS_FSB_TO_B(mp, 1))));
}
......@@ -761,7 +852,7 @@ xfs_calc_attrrm_reservation(
XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)),
(xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
xfs_calc_buf_res(xfs_allocfree_block_count(mp, 2),
XFS_FSB_TO_B(mp, 1))));
}
......@@ -792,13 +883,21 @@ xfs_calc_qm_setqlim_reservation(void)
*/
STATIC uint
xfs_calc_qm_dqalloc_reservation(
struct xfs_mount *mp)
struct xfs_mount *mp,
bool for_minlogsize)
{
return xfs_calc_write_reservation(mp) +
return xfs_calc_write_reservation(mp, for_minlogsize) +
xfs_calc_buf_res(1,
XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
}
unsigned int
xfs_calc_qm_dqalloc_reservation_minlogsize(
struct xfs_mount *mp)
{
return xfs_calc_qm_dqalloc_reservation(mp, true);
}
/*
* Syncing the incore super block changes to disk.
* the super block to reflect the changes: sector size
......@@ -815,35 +914,17 @@ xfs_trans_resv_calc(
struct xfs_mount *mp,
struct xfs_trans_resv *resp)
{
unsigned int rmap_maxlevels = mp->m_rmap_maxlevels;
/*
* In the early days of rmap+reflink, we always set the rmap maxlevels
* to 9 even if the AG was small enough that it would never grow to
* that height. Transaction reservation sizes influence the minimum
* log size calculation, which influences the size of the log that mkfs
* creates. Use the old value here to ensure that newly formatted
* small filesystems will mount on older kernels.
*/
if (xfs_has_rmapbt(mp) && xfs_has_reflink(mp))
mp->m_rmap_maxlevels = XFS_OLD_REFLINK_RMAP_MAXLEVELS;
int logcount_adj = 0;
/*
* The following transactions are logged in physical format and
* require a permanent reservation on space.
*/
resp->tr_write.tr_logres = xfs_calc_write_reservation(mp);
if (xfs_has_reflink(mp))
resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK;
else
resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp);
if (xfs_has_reflink(mp))
resp->tr_itruncate.tr_logcount =
XFS_ITRUNCATE_LOG_COUNT_REFLINK;
else
resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false);
resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
......@@ -900,10 +981,8 @@ xfs_trans_resv_calc(
resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp);
if (xfs_has_reflink(mp))
resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT_REFLINK;
else
resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp,
false);
resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
......@@ -931,6 +1010,19 @@ xfs_trans_resv_calc(
resp->tr_growrtzero.tr_logres = xfs_calc_growrtzero_reservation(mp);
resp->tr_growrtfree.tr_logres = xfs_calc_growrtfree_reservation(mp);
/* Put everything back the way it was. This goes at the end. */
mp->m_rmap_maxlevels = rmap_maxlevels;
/*
* Add one logcount for BUI items that appear with rmap or reflink,
* one logcount for refcount intent items, and one logcount for rmap
* intent items.
*/
if (xfs_has_reflink(mp) || xfs_has_rmapbt(mp))
logcount_adj++;
if (xfs_has_reflink(mp))
logcount_adj++;
if (xfs_has_rmapbt(mp))
logcount_adj++;
resp->tr_itruncate.tr_logcount += logcount_adj;
resp->tr_write.tr_logcount += logcount_adj;
resp->tr_qm_dqalloc.tr_logcount += logcount_adj;
}
......@@ -73,7 +73,6 @@ struct xfs_trans_resv {
#define XFS_DEFAULT_LOG_COUNT 1
#define XFS_DEFAULT_PERM_LOG_COUNT 2
#define XFS_ITRUNCATE_LOG_COUNT 2
#define XFS_ITRUNCATE_LOG_COUNT_REFLINK 8
#define XFS_INACTIVE_LOG_COUNT 2
#define XFS_CREATE_LOG_COUNT 2
#define XFS_CREATE_TMPFILE_LOG_COUNT 2
......@@ -83,13 +82,24 @@ struct xfs_trans_resv {
#define XFS_LINK_LOG_COUNT 2
#define XFS_RENAME_LOG_COUNT 2
#define XFS_WRITE_LOG_COUNT 2
#define XFS_WRITE_LOG_COUNT_REFLINK 8
#define XFS_ADDAFORK_LOG_COUNT 2
#define XFS_ATTRINVAL_LOG_COUNT 1
#define XFS_ATTRSET_LOG_COUNT 3
#define XFS_ATTRRM_LOG_COUNT 3
/*
* Original log operation counts were overestimated in the early days of
* reflink. These are retained here purely for minimum log size calculations
* and must not be used for runtime reservations.
*/
#define XFS_ITRUNCATE_LOG_COUNT_REFLINK 8
#define XFS_WRITE_LOG_COUNT_REFLINK 8
void xfs_trans_resv_calc(struct xfs_mount *mp, struct xfs_trans_resv *resp);
uint xfs_allocfree_log_count(struct xfs_mount *mp, uint num_ops);
uint xfs_allocfree_block_count(struct xfs_mount *mp, uint num_ops);
unsigned int xfs_calc_itruncate_reservation_minlogsize(struct xfs_mount *mp);
unsigned int xfs_calc_write_reservation_minlogsize(struct xfs_mount *mp);
unsigned int xfs_calc_qm_dqalloc_reservation_minlogsize(struct xfs_mount *mp);
#endif /* __XFS_TRANS_RESV_H__ */
......@@ -586,21 +586,21 @@ xfs_reflink_cancel_cow_range(
STATIC int
xfs_reflink_end_cow_extent(
struct xfs_inode *ip,
xfs_fileoff_t offset_fsb,
xfs_fileoff_t *end_fsb)
xfs_fileoff_t *offset_fsb,
xfs_fileoff_t end_fsb)
{
struct xfs_bmbt_irec got, del;
struct xfs_iext_cursor icur;
struct xfs_bmbt_irec got, del, data;
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
xfs_filblks_t rlen;
unsigned int resblks;
int nmaps;
int error;
/* No COW extents? That's easy! */
if (ifp->if_bytes == 0) {
*end_fsb = offset_fsb;
*offset_fsb = end_fsb;
return 0;
}
......@@ -631,42 +631,66 @@ xfs_reflink_end_cow_extent(
* left by the time I/O completes for the loser of the race. In that
* case we are done.
*/
if (!xfs_iext_lookup_extent_before(ip, ifp, end_fsb, &icur, &got) ||
got.br_startoff + got.br_blockcount <= offset_fsb) {
*end_fsb = offset_fsb;
if (!xfs_iext_lookup_extent(ip, ifp, *offset_fsb, &icur, &got) ||
got.br_startoff >= end_fsb) {
*offset_fsb = end_fsb;
goto out_cancel;
}
/*
* Structure copy @got into @del, then trim @del to the range that we
* were asked to remap. We preserve @got for the eventual CoW fork
* Only remap real extents that contain data. With AIO, speculative
* preallocations can leak into the range we are called upon, and we
* need to skip them. Preserve @got for the eventual CoW fork
* deletion; from now on @del represents the mapping that we're
* actually remapping.
*/
while (!xfs_bmap_is_written_extent(&got)) {
if (!xfs_iext_next_extent(ifp, &icur, &got) ||
got.br_startoff >= end_fsb) {
*offset_fsb = end_fsb;
goto out_cancel;
}
}
del = got;
xfs_trim_extent(&del, offset_fsb, *end_fsb - offset_fsb);
ASSERT(del.br_blockcount > 0);
/* Grab the corresponding mapping in the data fork. */
nmaps = 1;
error = xfs_bmapi_read(ip, del.br_startoff, del.br_blockcount, &data,
&nmaps, 0);
if (error)
goto out_cancel;
/* We can only remap the smaller of the two extent sizes. */
data.br_blockcount = min(data.br_blockcount, del.br_blockcount);
del.br_blockcount = data.br_blockcount;
trace_xfs_reflink_cow_remap_from(ip, &del);
trace_xfs_reflink_cow_remap_to(ip, &data);
if (xfs_bmap_is_real_extent(&data)) {
/*
* Only remap real extents that contain data. With AIO, speculative
* preallocations can leak into the range we are called upon, and we
* need to skip them.
* If the extent we're remapping is backed by storage (written
* or not), unmap the extent and drop its refcount.
*/
if (!xfs_bmap_is_written_extent(&got)) {
*end_fsb = del.br_startoff;
goto out_cancel;
}
xfs_bmap_unmap_extent(tp, ip, &data);
xfs_refcount_decrease_extent(tp, &data);
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT,
-data.br_blockcount);
} else if (data.br_startblock == DELAYSTARTBLOCK) {
int done;
/* Unmap the old blocks in the data fork. */
rlen = del.br_blockcount;
error = __xfs_bunmapi(tp, ip, del.br_startoff, &rlen, 0, 1);
/*
* If the extent we're remapping is a delalloc reservation,
* we can use the regular bunmapi function to release the
* incore state. Dropping the delalloc reservation takes care
* of the quota reservation for us.
*/
error = xfs_bunmapi(NULL, ip, data.br_startoff,
data.br_blockcount, 0, 1, &done);
if (error)
goto out_cancel;
/* Trim the extent to whatever got unmapped. */
xfs_trim_extent(&del, del.br_startoff + rlen, del.br_blockcount - rlen);
trace_xfs_reflink_cow_remap(ip, &del);
ASSERT(done);
}
/* Free the CoW orphan record. */
xfs_refcount_free_cow_extent(tp, del.br_startblock, del.br_blockcount);
......@@ -687,7 +711,7 @@ xfs_reflink_end_cow_extent(
return error;
/* Update the caller about how much progress we made. */
*end_fsb = del.br_startoff;
*offset_fsb = del.br_startoff + del.br_blockcount;
return 0;
out_cancel:
......@@ -715,7 +739,7 @@ xfs_reflink_end_cow(
end_fsb = XFS_B_TO_FSB(ip->i_mount, offset + count);
/*
* Walk backwards until we're out of the I/O range. The loop function
* Walk forwards until we've remapped the I/O range. The loop function
* repeatedly cycles the ILOCK to allocate one transaction per remapped
* extent.
*
......@@ -747,7 +771,7 @@ xfs_reflink_end_cow(
* blocks will be remapped.
*/
while (end_fsb > offset_fsb && !error)
error = xfs_reflink_end_cow_extent(ip, offset_fsb, &end_fsb);
error = xfs_reflink_end_cow_extent(ip, &offset_fsb, end_fsb);
if (error)
trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
......@@ -1138,7 +1162,7 @@ xfs_reflink_remap_extent(
xfs_refcount_decrease_extent(tp, &smap);
qdelta -= smap.br_blockcount;
} else if (smap.br_startblock == DELAYSTARTBLOCK) {
xfs_filblks_t len = smap.br_blockcount;
int done;
/*
* If the extent we're unmapping is a delalloc reservation,
......@@ -1146,10 +1170,11 @@ xfs_reflink_remap_extent(
* incore state. Dropping the delalloc reservation takes care
* of the quota reservation for us.
*/
error = __xfs_bunmapi(NULL, ip, smap.br_startoff, &len, 0, 1);
error = xfs_bunmapi(NULL, ip, smap.br_startoff,
smap.br_blockcount, 0, 1, &done);
if (error)
goto out_cancel;
ASSERT(len == 0);
ASSERT(done);
}
/*
......
......@@ -3408,7 +3408,8 @@ DEFINE_INODE_IREC_EVENT(xfs_reflink_convert_cow);
DEFINE_SIMPLE_IO_EVENT(xfs_reflink_cancel_cow_range);
DEFINE_SIMPLE_IO_EVENT(xfs_reflink_end_cow);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_from);
DEFINE_INODE_IREC_EVENT(xfs_reflink_cow_remap_to);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_cancel_cow_range_error);
DEFINE_INODE_ERROR_EVENT(xfs_reflink_end_cow_error);
......@@ -3503,7 +3504,7 @@ DEFINE_GETFSMAP_EVENT(xfs_getfsmap_low_key);
DEFINE_GETFSMAP_EVENT(xfs_getfsmap_high_key);
DEFINE_GETFSMAP_EVENT(xfs_getfsmap_mapping);
TRACE_EVENT(xfs_trans_resv_calc,
DECLARE_EVENT_CLASS(xfs_trans_resv_class,
TP_PROTO(struct xfs_mount *mp, unsigned int type,
struct xfs_trans_res *res),
TP_ARGS(mp, type, res),
......@@ -3527,6 +3528,33 @@ TRACE_EVENT(xfs_trans_resv_calc,
__entry->logres,
__entry->logcount,
__entry->logflags)
)
#define DEFINE_TRANS_RESV_EVENT(name) \
DEFINE_EVENT(xfs_trans_resv_class, name, \
TP_PROTO(struct xfs_mount *mp, unsigned int type, \
struct xfs_trans_res *res), \
TP_ARGS(mp, type, res))
DEFINE_TRANS_RESV_EVENT(xfs_trans_resv_calc);
DEFINE_TRANS_RESV_EVENT(xfs_trans_resv_calc_minlogsize);
TRACE_EVENT(xfs_log_get_max_trans_res,
TP_PROTO(struct xfs_mount *mp, const struct xfs_trans_res *res),
TP_ARGS(mp, res),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(uint, logres)
__field(int, logcount)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->logres = res->tr_logres;
__entry->logcount = res->tr_logcount;
),
TP_printk("dev %d:%d logres %u logcount %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->logres,
__entry->logcount)
);
DECLARE_EVENT_CLASS(xfs_trans_class,
......
......@@ -32,7 +32,6 @@ static void
xfs_trans_trace_reservations(
struct xfs_mount *mp)
{
struct xfs_trans_res resv;
struct xfs_trans_res *res;
struct xfs_trans_res *end_res;
int i;
......@@ -41,8 +40,6 @@ xfs_trans_trace_reservations(
end_res = (struct xfs_trans_res *)(M_RES(mp) + 1);
for (i = 0; res < end_res; i++, res++)
trace_xfs_trans_resv_calc(mp, i, res);
xfs_log_get_max_trans_res(mp, &resv);
trace_xfs_trans_resv_calc(mp, -1, &resv);
}
#else
# define xfs_trans_trace_reservations(mp)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment