Commit 60e3d707 authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: support bulk loading of staged btrees

Add a new btree function that enables us to bulk load a btree cursor.
This will be used by the upcoming online repair patches to generate new
btrees.  This avoids the programmatic inefficiency of calling
xfs_btree_insert in a loop (which generates a lot of log traffic) in
favor of stamping out new btree blocks with ordered buffers, and then
committing both the new root and scheduling the removal of the old btree
blocks in a single transaction commit.

The design of this new generic code is based off the btree rebuilding
code in xfs_repair's phase 5 code, with the explicit goal of enabling us
to share that code between scrub and repair.  It has the additional
feature of being able to control btree block loading factors.
Signed-off-by: default avatarDarrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: default avatarBrian Foster <bfoster@redhat.com>
parent 349e1c03
......@@ -1027,7 +1027,7 @@ xfs_btree_ptr_is_null(
return ptr->s == cpu_to_be32(NULLAGBLOCK);
}
STATIC void
void
xfs_btree_set_ptr_null(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr)
......@@ -1063,7 +1063,7 @@ xfs_btree_get_sibling(
}
}
STATIC void
void
xfs_btree_set_sibling(
struct xfs_btree_cur *cur,
struct xfs_btree_block *block,
......@@ -1141,7 +1141,7 @@ xfs_btree_init_block(
btnum, level, numrecs, owner, 0);
}
STATIC void
void
xfs_btree_init_block_cur(
struct xfs_btree_cur *cur,
struct xfs_buf *bp,
......@@ -1233,7 +1233,7 @@ xfs_btree_set_refs(
}
}
STATIC int
int
xfs_btree_get_buf_block(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr,
......@@ -1293,7 +1293,7 @@ xfs_btree_read_buf_block(
/*
* Copy keys from one btree block to another.
*/
STATIC void
void
xfs_btree_copy_keys(
struct xfs_btree_cur *cur,
union xfs_btree_key *dst_key,
......@@ -1321,11 +1321,11 @@ xfs_btree_copy_recs(
/*
* Copy block pointers from one btree block to another.
*/
STATIC void
void
xfs_btree_copy_ptrs(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *dst_ptr,
union xfs_btree_ptr *src_ptr,
const union xfs_btree_ptr *src_ptr,
int numptrs)
{
ASSERT(numptrs >= 0);
......
......@@ -530,4 +530,20 @@ xfs_btree_islastblock(
return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
}
void xfs_btree_set_ptr_null(struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr);
int xfs_btree_get_buf_block(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr,
struct xfs_btree_block **block, struct xfs_buf **bpp);
void xfs_btree_set_sibling(struct xfs_btree_cur *cur,
struct xfs_btree_block *block, union xfs_btree_ptr *ptr,
int lr);
void xfs_btree_init_block_cur(struct xfs_btree_cur *cur,
struct xfs_buf *bp, int level, int numrecs);
void xfs_btree_copy_ptrs(struct xfs_btree_cur *cur,
union xfs_btree_ptr *dst_ptr,
const union xfs_btree_ptr *src_ptr, int numptrs);
void xfs_btree_copy_keys(struct xfs_btree_cur *cur,
union xfs_btree_key *dst_key, union xfs_btree_key *src_key,
int numkeys);
#endif /* __XFS_BTREE_H__ */
This diff is collapsed.
......@@ -52,4 +52,72 @@ void xfs_btree_stage_ifakeroot(struct xfs_btree_cur *cur,
void xfs_btree_commit_ifakeroot(struct xfs_btree_cur *cur, struct xfs_trans *tp,
int whichfork, const struct xfs_btree_ops *ops);
/* Bulk loading of staged btrees. */
typedef int (*xfs_btree_bload_get_record_fn)(struct xfs_btree_cur *cur, void *priv);
typedef int (*xfs_btree_bload_claim_block_fn)(struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr, void *priv);
typedef size_t (*xfs_btree_bload_iroot_size_fn)(struct xfs_btree_cur *cur,
unsigned int nr_this_level, void *priv);
struct xfs_btree_bload {
/*
* This function will be called nr_records times to load records into
* the btree. The function does this by setting the cursor's bc_rec
* field in in-core format. Records must be returned in sort order.
*/
xfs_btree_bload_get_record_fn get_record;
/*
* This function will be called nr_blocks times to obtain a pointer
* to a new btree block on disk. Callers must preallocate all space
* for the new btree before calling xfs_btree_bload, and this function
* is what claims that reservation.
*/
xfs_btree_bload_claim_block_fn claim_block;
/*
* This function should return the size of the in-core btree root
* block. It is only necessary for XFS_BTREE_ROOT_IN_INODE btree
* types.
*/
xfs_btree_bload_iroot_size_fn iroot_size;
/*
* The caller should set this to the number of records that will be
* stored in the new btree.
*/
uint64_t nr_records;
/*
* Number of free records to leave in each leaf block. If the caller
* sets this to -1, the slack value will be calculated to be be halfway
* between maxrecs and minrecs. This typically leaves the block 75%
* full. Note that slack values are not enforced on inode root blocks.
*/
int leaf_slack;
/*
* Number of free key/ptrs pairs to leave in each node block. This
* field has the same semantics as leaf_slack.
*/
int node_slack;
/*
* The xfs_btree_bload_compute_geometry function will set this to the
* number of btree blocks needed to store nr_records records.
*/
uint64_t nr_blocks;
/*
* The xfs_btree_bload_compute_geometry function will set this to the
* height of the new btree.
*/
unsigned int btree_height;
};
int xfs_btree_bload_compute_geometry(struct xfs_btree_cur *cur,
struct xfs_btree_bload *bbl, uint64_t nr_records);
int xfs_btree_bload(struct xfs_btree_cur *cur, struct xfs_btree_bload *bbl,
void *priv);
#endif /* __XFS_BTREE_STAGING_H__ */
......@@ -6,6 +6,7 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_bit.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
......
......@@ -35,6 +35,7 @@ struct xfs_icreate_log;
struct xfs_owner_info;
struct xfs_trans_res;
struct xfs_inobt_rec_incore;
union xfs_btree_ptr;
#define XFS_ATTR_FILTER_FLAGS \
{ XFS_ATTR_ROOT, "ROOT" }, \
......@@ -3666,6 +3667,90 @@ TRACE_EVENT(xfs_btree_commit_ifakeroot,
__entry->blocks)
)
TRACE_EVENT(xfs_btree_bload_level_geometry,
TP_PROTO(struct xfs_btree_cur *cur, unsigned int level,
uint64_t nr_this_level, unsigned int nr_per_block,
unsigned int desired_npb, uint64_t blocks,
uint64_t blocks_with_extra),
TP_ARGS(cur, level, nr_this_level, nr_per_block, desired_npb, blocks,
blocks_with_extra),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_btnum_t, btnum)
__field(unsigned int, level)
__field(unsigned int, nlevels)
__field(uint64_t, nr_this_level)
__field(unsigned int, nr_per_block)
__field(unsigned int, desired_npb)
__field(unsigned long long, blocks)
__field(unsigned long long, blocks_with_extra)
),
TP_fast_assign(
__entry->dev = cur->bc_mp->m_super->s_dev;
__entry->btnum = cur->bc_btnum;
__entry->level = level;
__entry->nlevels = cur->bc_nlevels;
__entry->nr_this_level = nr_this_level;
__entry->nr_per_block = nr_per_block;
__entry->desired_npb = desired_npb;
__entry->blocks = blocks;
__entry->blocks_with_extra = blocks_with_extra;
),
TP_printk("dev %d:%d btree %s level %u/%u nr_this_level %llu nr_per_block %u desired_npb %u blocks %llu blocks_with_extra %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
__entry->level,
__entry->nlevels,
__entry->nr_this_level,
__entry->nr_per_block,
__entry->desired_npb,
__entry->blocks,
__entry->blocks_with_extra)
)
TRACE_EVENT(xfs_btree_bload_block,
TP_PROTO(struct xfs_btree_cur *cur, unsigned int level,
uint64_t block_idx, uint64_t nr_blocks,
union xfs_btree_ptr *ptr, unsigned int nr_records),
TP_ARGS(cur, level, block_idx, nr_blocks, ptr, nr_records),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_btnum_t, btnum)
__field(unsigned int, level)
__field(unsigned long long, block_idx)
__field(unsigned long long, nr_blocks)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(unsigned int, nr_records)
),
TP_fast_assign(
__entry->dev = cur->bc_mp->m_super->s_dev;
__entry->btnum = cur->bc_btnum;
__entry->level = level;
__entry->block_idx = block_idx;
__entry->nr_blocks = nr_blocks;
if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
xfs_fsblock_t fsb = be64_to_cpu(ptr->l);
__entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsb);
__entry->agbno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsb);
} else {
__entry->agno = cur->bc_ag.agno;
__entry->agbno = be32_to_cpu(ptr->s);
}
__entry->nr_records = nr_records;
),
TP_printk("dev %d:%d btree %s level %u block %llu/%llu fsb (%u/%u) recs %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
__entry->level,
__entry->block_idx,
__entry->nr_blocks,
__entry->agno,
__entry->agbno,
__entry->nr_records)
)
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment