Commit be408417 authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: implement block reservation accounting for btrees we're staging

Create a new xrep_newbt structure to encapsulate a fake root for
creating a staged btree cursor as well as to track all the blocks that
we need to reserve in order to build that btree.

As for the particular choice of lowspace thresholds and btree block
slack factors -- at this point one could say that the thresholds in
online repair come from bulkload_estimate_ag_slack in xfs_repair[1].
But that's not the entire story, since the offline btree rebuilding
code in xfs_repair was merged as a retroport of the online btree code
in this patchset!

Before xfs_btree_staging.[ch] came along, xfs_repair determined the
slack factor (aka the number of slots to leave unfilled in each new
btree block) via open-coded logic in repair/phase5.c[2].  At that point
the slack factors were arbitrary quantities per btree.  The rmapbt
automatically left 10 slots free; everything else left zero.

That had a noticeable effect on performance straight after mounting
because adding records to /any/ btree would result in splits.  A few
years ago when this patch was first written, Dave and I decided that
repair should generate btree blocks that were 75% full unless space was
tight, in which case it should try to fill the blocks to nearly full.
We defined tight as ~10% free to avoid repair failures but settled on
3/32 (~9%) to avoid div64.

IOWs, we mostly pulled the thresholds out of thin air.  We've been
QAing with those geometry numbers ever since. ;)

Link: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/tree/repair/bulkload.c?h=v6.5.0#n114
Link: https://git.kernel.org/pub/scm/fs/xfs/xfsprogs-dev.git/tree/repair/phase5.c?h=v4.19.0#n1349Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
parent 4c8ecd1c
......@@ -181,6 +181,7 @@ xfs-$(CONFIG_XFS_QUOTA) += scrub/quota.o
ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
xfs-y += $(addprefix scrub/, \
agheader_repair.o \
newbt.o \
reap.o \
repair.o \
)
......
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2022-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_SCRUB_NEWBT_H__
#define __XFS_SCRUB_NEWBT_H__
struct xrep_newbt_resv {
/* Link to list of extents that we've reserved. */
struct list_head list;
struct xfs_perag *pag;
/* AG block of the extent we reserved. */
xfs_agblock_t agbno;
/* Length of the reservation. */
xfs_extlen_t len;
/* How much of this reservation has been used. */
xfs_extlen_t used;
};
struct xrep_newbt {
struct xfs_scrub *sc;
/* List of extents that we've reserved. */
struct list_head resv_list;
/* Fake root for new btree. */
union {
struct xbtree_afakeroot afake;
struct xbtree_ifakeroot ifake;
};
/* rmap owner of these blocks */
struct xfs_owner_info oinfo;
/* btree geometry for the bulk loader */
struct xfs_btree_bload bload;
/* Allocation hint */
xfs_fsblock_t alloc_hint;
/* per-ag reservation type */
enum xfs_ag_resv_type resv;
};
void xrep_newbt_init_bare(struct xrep_newbt *xnr, struct xfs_scrub *sc);
void xrep_newbt_init_ag(struct xrep_newbt *xnr, struct xfs_scrub *sc,
const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
enum xfs_ag_resv_type resv);
int xrep_newbt_init_inode(struct xrep_newbt *xnr, struct xfs_scrub *sc,
int whichfork, const struct xfs_owner_info *oinfo);
int xrep_newbt_alloc_blocks(struct xrep_newbt *xnr, uint64_t nr_blocks);
void xrep_newbt_cancel(struct xrep_newbt *xnr);
int xrep_newbt_commit(struct xrep_newbt *xnr);
int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xnr,
union xfs_btree_ptr *ptr);
#endif /* __XFS_SCRUB_NEWBT_H__ */
......@@ -1332,6 +1332,43 @@ TRACE_EVENT(xrep_ialloc_insert,
__entry->freemask)
)
DECLARE_EVENT_CLASS(xrep_newbt_extent_class,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
xfs_agblock_t agbno, xfs_extlen_t len,
int64_t owner),
TP_ARGS(mp, agno, agbno, len, owner),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
__field(int64_t, owner)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
__entry->agbno = agbno;
__entry->len = len;
__entry->owner = owner;
),
TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x owner 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->agbno,
__entry->len,
__entry->owner)
);
#define DEFINE_NEWBT_EXTENT_EVENT(name) \
DEFINE_EVENT(xrep_newbt_extent_class, name, \
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
xfs_agblock_t agbno, xfs_extlen_t len, \
int64_t owner), \
TP_ARGS(mp, agno, agbno, len, owner))
DEFINE_NEWBT_EXTENT_EVENT(xrep_newbt_alloc_ag_blocks);
DEFINE_NEWBT_EXTENT_EVENT(xrep_newbt_alloc_file_blocks);
DEFINE_NEWBT_EXTENT_EVENT(xrep_newbt_free_blocks);
DEFINE_NEWBT_EXTENT_EVENT(xrep_newbt_claim_block);
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment