Commit 4bdfd7d1 authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: repair free space btrees

Rebuild the free space btrees from the gaps in the rmap btree.  Refer to
the case study in Documentation/filesystems/xfs-online-fsck-design.rst
for more details.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 8bd0bf57
......@@ -182,6 +182,7 @@ xfs-$(CONFIG_XFS_QUOTA) += scrub/quota.o
ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
xfs-y += $(addprefix scrub/, \
agheader_repair.o \
alloc_repair.o \
newbt.o \
reap.o \
repair.o \
......
......@@ -80,6 +80,15 @@ struct xfs_perag {
*/
uint16_t pag_checked;
uint16_t pag_sick;
#ifdef CONFIG_XFS_ONLINE_REPAIR
/*
* Alternate btree heights so that online repair won't trip the write
* verifiers while rebuilding the AG btrees.
*/
uint8_t pagf_repair_levels[XFS_BTNUM_AGF];
#endif
spinlock_t pag_state_lock;
spinlock_t pagb_lock; /* lock for pagb_tree */
......
......@@ -411,6 +411,8 @@ xfs_ag_resv_free_extent(
fallthrough;
case XFS_AG_RESV_NONE:
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len);
fallthrough;
case XFS_AG_RESV_IGNORE:
return;
}
......
......@@ -246,11 +246,9 @@ xfs_alloc_btrec_to_irec(
/* Simple checks for free space records. */
xfs_failaddr_t
xfs_alloc_check_irec(
struct xfs_btree_cur *cur,
struct xfs_perag *pag,
const struct xfs_alloc_rec_incore *irec)
{
struct xfs_perag *pag = cur->bc_ag.pag;
if (irec->ar_blockcount == 0)
return __this_address;
......@@ -299,7 +297,7 @@ xfs_alloc_get_rec(
return error;
xfs_alloc_btrec_to_irec(rec, &irec);
fa = xfs_alloc_check_irec(cur, &irec);
fa = xfs_alloc_check_irec(cur->bc_ag.pag, &irec);
if (fa)
return xfs_alloc_complain_bad_rec(cur, fa, &irec);
......@@ -3944,7 +3942,7 @@ xfs_alloc_query_range_helper(
xfs_failaddr_t fa;
xfs_alloc_btrec_to_irec(rec, &irec);
fa = xfs_alloc_check_irec(cur, &irec);
fa = xfs_alloc_check_irec(cur->bc_ag.pag, &irec);
if (fa)
return xfs_alloc_complain_bad_rec(cur, fa, &irec);
......
......@@ -185,7 +185,7 @@ xfs_alloc_get_rec(
union xfs_btree_rec;
void xfs_alloc_btrec_to_irec(const union xfs_btree_rec *rec,
struct xfs_alloc_rec_incore *irec);
xfs_failaddr_t xfs_alloc_check_irec(struct xfs_btree_cur *cur,
xfs_failaddr_t xfs_alloc_check_irec(struct xfs_perag *pag,
const struct xfs_alloc_rec_incore *irec);
int xfs_read_agf(struct xfs_perag *pag, struct xfs_trans *tp, int flags,
......
......@@ -323,7 +323,18 @@ xfs_allocbt_verify(
if (bp->b_ops->magic[0] == cpu_to_be32(XFS_ABTC_MAGIC))
btnum = XFS_BTNUM_CNTi;
if (pag && xfs_perag_initialised_agf(pag)) {
if (level >= pag->pagf_levels[btnum])
unsigned int maxlevel = pag->pagf_levels[btnum];
#ifdef CONFIG_XFS_ONLINE_REPAIR
/*
* Online repair could be rewriting the free space btrees, so
* we'll validate against the larger of either tree while this
* is going on.
*/
maxlevel = max_t(unsigned int, maxlevel,
pag->pagf_repair_levels[btnum]);
#endif
if (level >= maxlevel)
return __this_address;
} else if (level >= mp->m_alloc_maxlevels)
return __this_address;
......
......@@ -208,6 +208,13 @@ enum xfs_ag_resv_type {
XFS_AG_RESV_AGFL,
XFS_AG_RESV_METADATA,
XFS_AG_RESV_RMAPBT,
/*
* Don't increase fdblocks when freeing extent. This is a pony for
* the bnobt repair functions to re-free the free space without
* altering fdblocks. If you think you need this you're wrong.
*/
XFS_AG_RESV_IGNORE,
};
/* Results of scanning a btree keyspace to check occupancy. */
......
......@@ -9,13 +9,16 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_btree.h"
#include "xfs_alloc.h"
#include "xfs_rmap.h"
#include "xfs_ag.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
#include "xfs_ag.h"
#include "scrub/repair.h"
/*
* Set us up to scrub free space btrees.
......@@ -24,10 +27,19 @@ int
xchk_setup_ag_allocbt(
struct xfs_scrub *sc)
{
int error;
if (xchk_need_intent_drain(sc))
xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
return xchk_setup_ag_btree(sc, false);
error = xchk_setup_ag_btree(sc, false);
if (error)
return error;
if (xchk_could_repair(sc))
return xrep_setup_ag_allocbt(sc);
return 0;
}
/* Free space btree scrubber. */
......@@ -127,7 +139,7 @@ xchk_allocbt_rec(
struct xchk_alloc *ca = bs->private;
xfs_alloc_btrec_to_irec(rec, &irec);
if (xfs_alloc_check_irec(bs->cur, &irec) != NULL) {
if (xfs_alloc_check_irec(bs->cur->bc_ag.pag, &irec) != NULL) {
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
return 0;
}
......
This diff is collapsed.
......@@ -200,8 +200,21 @@ static inline bool xchk_needs_repair(const struct xfs_scrub_metadata *sm)
XFS_SCRUB_OFLAG_XCORRUPT |
XFS_SCRUB_OFLAG_PREEN);
}
/*
* "Should we prepare for a repair?"
*
* Return true if the caller permits us to repair metadata and we're not
* setting up for a post-repair evaluation.
*/
static inline bool xchk_could_repair(const struct xfs_scrub *sc)
{
return (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
!(sc->flags & XREP_ALREADY_FIXED);
}
#else
# define xchk_needs_repair(sc) (false)
# define xchk_could_repair(sc) (false)
#endif /* CONFIG_XFS_ONLINE_REPAIR */
int xchk_metadata_inode_forks(struct xfs_scrub *sc);
......@@ -213,6 +226,12 @@ int xchk_metadata_inode_forks(struct xfs_scrub *sc);
#define xchk_xfile_descr(sc, fmt, ...) \
kasprintf(XCHK_GFP_FLAGS, "XFS (%s): " fmt, \
(sc)->mp->m_super->s_id, ##__VA_ARGS__)
#define xchk_xfile_ag_descr(sc, fmt, ...) \
kasprintf(XCHK_GFP_FLAGS, "XFS (%s): AG 0x%x " fmt, \
(sc)->mp->m_super->s_id, \
(sc)->sa.pag ? (sc)->sa.pag->pag_agno : (sc)->sm->sm_agno, \
##__VA_ARGS__)
/*
* Setting up a hook to wait for intents to drain is costly -- we have to take
......
......@@ -157,11 +157,13 @@ xrep_newbt_add_blocks(
resv->used = 0;
resv->pag = xfs_perag_hold(pag);
if (args->tp) {
ASSERT(xnr->oinfo.oi_offset == 0);
error = xfs_alloc_schedule_autoreap(args, true, &resv->autoreap);
if (error)
goto out_pag;
}
list_add_tail(&resv->list, &xnr->resv_list);
return 0;
......@@ -171,6 +173,30 @@ xrep_newbt_add_blocks(
return error;
}
/*
* Add an extent to the new btree reservation pool. Callers are required to
* reap this reservation manually if the repair is cancelled. @pag must be a
* passive reference.
*/
int
xrep_newbt_add_extent(
struct xrep_newbt *xnr,
struct xfs_perag *pag,
xfs_agblock_t agbno,
xfs_extlen_t len)
{
struct xfs_mount *mp = xnr->sc->mp;
struct xfs_alloc_arg args = {
.tp = NULL, /* no autoreap */
.oinfo = xnr->oinfo,
.fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno),
.len = len,
.resv = xnr->resv,
};
return xrep_newbt_add_blocks(xnr, pag, &args);
}
/* Don't let our allocation hint take us beyond this AG */
static inline void
xrep_newbt_validate_ag_alloc_hint(
......@@ -372,6 +398,7 @@ xrep_newbt_free_extent(
free_aglen, xnr->oinfo.oi_owner);
ASSERT(xnr->resv != XFS_AG_RESV_AGFL);
ASSERT(xnr->resv != XFS_AG_RESV_IGNORE);
/*
* Use EFIs to free the reservations. This reduces the chance
......@@ -517,3 +544,16 @@ xrep_newbt_claim_block(
/* Relog all the EFIs. */
return xrep_defer_finish(xnr->sc);
}
/* How many reserved blocks are unused? */
unsigned int
xrep_newbt_unused_blocks(
struct xrep_newbt *xnr)
{
struct xrep_newbt_resv *resv;
unsigned int unused = 0;
list_for_each_entry(resv, &xnr->resv_list, list)
unused += resv->len - resv->used;
return unused;
}
......@@ -57,9 +57,12 @@ void xrep_newbt_init_ag(struct xrep_newbt *xnr, struct xfs_scrub *sc,
int xrep_newbt_init_inode(struct xrep_newbt *xnr, struct xfs_scrub *sc,
int whichfork, const struct xfs_owner_info *oinfo);
int xrep_newbt_alloc_blocks(struct xrep_newbt *xnr, uint64_t nr_blocks);
int xrep_newbt_add_extent(struct xrep_newbt *xnr, struct xfs_perag *pag,
xfs_agblock_t agbno, xfs_extlen_t len);
void xrep_newbt_cancel(struct xrep_newbt *xnr);
int xrep_newbt_commit(struct xrep_newbt *xnr);
int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xnr,
union xfs_btree_ptr *ptr);
unsigned int xrep_newbt_unused_blocks(struct xrep_newbt *xnr);
#endif /* __XFS_SCRUB_NEWBT_H__ */
......@@ -734,3 +734,75 @@ xrep_ino_dqattach(
return error;
}
/*
* Initialize all the btree cursors for an AG repair except for the btree that
* we're rebuilding.
*/
void
xrep_ag_btcur_init(
struct xfs_scrub *sc,
struct xchk_ag *sa)
{
struct xfs_mount *mp = sc->mp;
/* Set up a bnobt cursor for cross-referencing. */
if (sc->sm->sm_type != XFS_SCRUB_TYPE_BNOBT &&
sc->sm->sm_type != XFS_SCRUB_TYPE_CNTBT) {
sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
sc->sa.pag, XFS_BTNUM_BNO);
sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
sc->sa.pag, XFS_BTNUM_CNT);
}
/* Set up a inobt cursor for cross-referencing. */
if (sc->sm->sm_type != XFS_SCRUB_TYPE_INOBT &&
sc->sm->sm_type != XFS_SCRUB_TYPE_FINOBT) {
sa->ino_cur = xfs_inobt_init_cursor(sc->sa.pag, sc->tp,
sa->agi_bp, XFS_BTNUM_INO);
if (xfs_has_finobt(mp))
sa->fino_cur = xfs_inobt_init_cursor(sc->sa.pag,
sc->tp, sa->agi_bp, XFS_BTNUM_FINO);
}
/* Set up a rmapbt cursor for cross-referencing. */
if (sc->sm->sm_type != XFS_SCRUB_TYPE_RMAPBT &&
xfs_has_rmapbt(mp))
sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
sc->sa.pag);
/* Set up a refcountbt cursor for cross-referencing. */
if (sc->sm->sm_type != XFS_SCRUB_TYPE_REFCNTBT &&
xfs_has_reflink(mp))
sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
sa->agf_bp, sc->sa.pag);
}
/*
* Reinitialize the in-core AG state after a repair by rereading the AGF
* buffer. We had better get the same AGF buffer as the one that's attached
* to the scrub context.
*/
int
xrep_reinit_pagf(
struct xfs_scrub *sc)
{
struct xfs_perag *pag = sc->sa.pag;
struct xfs_buf *bp;
int error;
ASSERT(pag);
ASSERT(xfs_perag_initialised_agf(pag));
clear_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
error = xfs_alloc_read_agf(pag, sc->tp, 0, &bp);
if (error)
return error;
if (bp != sc->sa.agf_bp) {
ASSERT(bp == sc->sa.agf_bp);
return -EFSCORRUPTED;
}
return 0;
}
......@@ -60,6 +60,15 @@ int xrep_find_ag_btree_roots(struct xfs_scrub *sc, struct xfs_buf *agf_bp,
void xrep_force_quotacheck(struct xfs_scrub *sc, xfs_dqtype_t type);
int xrep_ino_dqattach(struct xfs_scrub *sc);
/* Repair setup functions */
int xrep_setup_ag_allocbt(struct xfs_scrub *sc);
void xrep_ag_btcur_init(struct xfs_scrub *sc, struct xchk_ag *sa);
/* Metadata revalidators */
int xrep_revalidate_allocbt(struct xfs_scrub *sc);
/* Metadata repairers */
int xrep_probe(struct xfs_scrub *sc);
......@@ -67,6 +76,9 @@ int xrep_superblock(struct xfs_scrub *sc);
int xrep_agf(struct xfs_scrub *sc);
int xrep_agfl(struct xfs_scrub *sc);
int xrep_agi(struct xfs_scrub *sc);
int xrep_allocbt(struct xfs_scrub *sc);
int xrep_reinit_pagf(struct xfs_scrub *sc);
#else
......@@ -87,11 +99,23 @@ xrep_calc_ag_resblks(
return 0;
}
/* repair setup functions for no-repair */
static inline int
xrep_setup_nothing(
struct xfs_scrub *sc)
{
return 0;
}
#define xrep_setup_ag_allocbt xrep_setup_nothing
#define xrep_revalidate_allocbt (NULL)
#define xrep_probe xrep_notsupported
#define xrep_superblock xrep_notsupported
#define xrep_agf xrep_notsupported
#define xrep_agfl xrep_notsupported
#define xrep_agi xrep_notsupported
#define xrep_allocbt xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
......
......@@ -239,13 +239,15 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.type = ST_PERAG,
.setup = xchk_setup_ag_allocbt,
.scrub = xchk_allocbt,
.repair = xrep_notsupported,
.repair = xrep_allocbt,
.repair_eval = xrep_revalidate_allocbt,
},
[XFS_SCRUB_TYPE_CNTBT] = { /* cntbt */
.type = ST_PERAG,
.setup = xchk_setup_ag_allocbt,
.scrub = xchk_allocbt,
.repair = xrep_notsupported,
.repair = xrep_allocbt,
.repair_eval = xrep_revalidate_allocbt,
},
[XFS_SCRUB_TYPE_INOBT] = { /* inobt */
.type = ST_PERAG,
......@@ -531,6 +533,9 @@ xfs_scrub_metadata(
/* Scrub for errors. */
check_start = xchk_stats_now();
if ((sc->flags & XREP_ALREADY_FIXED) && sc->ops->repair_eval != NULL)
error = sc->ops->repair_eval(sc);
else
error = sc->ops->scrub(sc);
run.scrub_ns += xchk_stats_elapsed_ns(check_start);
if (error == -EDEADLOCK && !(sc->flags & XCHK_TRY_HARDER))
......@@ -542,8 +547,7 @@ xfs_scrub_metadata(
xchk_update_health(sc);
if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
!(sc->flags & XREP_ALREADY_FIXED)) {
if (xchk_could_repair(sc)) {
bool needs_fix = xchk_needs_repair(sc->sm);
/* Userspace asked us to rebuild the structure regardless. */
......
......@@ -35,6 +35,14 @@ struct xchk_meta_ops {
/* Repair or optimize the metadata. */
int (*repair)(struct xfs_scrub *);
/*
* Re-scrub the metadata we repaired, in case there's extra work that
* we need to do to check our repair work. If this is NULL, we'll use
* the ->scrub function pointer, assuming that the regular scrub is
* sufficient.
*/
int (*repair_eval)(struct xfs_scrub *sc);
/* Decide if we even have this piece of metadata. */
bool (*has)(struct xfs_mount *);
......
......@@ -1172,11 +1172,33 @@ DEFINE_EVENT(xrep_rmap_class, name, \
xfs_agblock_t agbno, xfs_extlen_t len, \
uint64_t owner, uint64_t offset, unsigned int flags), \
TP_ARGS(mp, agno, agbno, len, owner, offset, flags))
DEFINE_REPAIR_RMAP_EVENT(xrep_alloc_extent_fn);
DEFINE_REPAIR_RMAP_EVENT(xrep_ialloc_extent_fn);
DEFINE_REPAIR_RMAP_EVENT(xrep_rmap_extent_fn);
DEFINE_REPAIR_RMAP_EVENT(xrep_bmap_extent_fn);
TRACE_EVENT(xrep_abt_found,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
const struct xfs_alloc_rec_incore *rec),
TP_ARGS(mp, agno, rec),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, startblock)
__field(xfs_extlen_t, blockcount)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
__entry->startblock = rec->ar_startblock;
__entry->blockcount = rec->ar_blockcount;
),
TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->startblock,
__entry->blockcount)
)
TRACE_EVENT(xrep_refcount_extent_fn,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
struct xfs_refcount_irec *irec),
......
......@@ -54,6 +54,28 @@ static inline int xfarray_append(struct xfarray *array, const void *ptr)
uint64_t xfarray_length(struct xfarray *array);
int xfarray_load_next(struct xfarray *array, xfarray_idx_t *idx, void *rec);
/*
* Iterate the non-null elements in a sparse xfarray. Callers should
* initialize *idx to XFARRAY_CURSOR_INIT before the first call; on return, it
* will be set to one more than the index of the record that was retrieved.
* Returns 1 if a record was retrieved, 0 if there weren't any more records, or
* a negative errno.
*/
static inline int
xfarray_iter(
struct xfarray *array,
xfarray_idx_t *idx,
void *rec)
{
int ret = xfarray_load_next(array, idx, rec);
if (ret == -ENODATA)
return 0;
if (ret == 0)
return 1;
return ret;
}
/* Declarations for xfile array sort functionality. */
typedef cmp_func_t xfarray_cmp_fn;
......
......@@ -678,3 +678,16 @@ xfs_extent_busy_ag_cmp(
diff = b1->bno - b2->bno;
return diff;
}
/* Are there any busy extents in this AG? */
bool
xfs_extent_busy_list_empty(
struct xfs_perag *pag)
{
bool res;
spin_lock(&pag->pagb_lock);
res = RB_EMPTY_ROOT(&pag->pagb_tree);
spin_unlock(&pag->pagb_lock);
return res;
}
......@@ -85,4 +85,6 @@ static inline void xfs_extent_busy_sort(struct list_head *list)
list_sort(NULL, list, xfs_extent_busy_ag_cmp);
}
bool xfs_extent_busy_list_empty(struct xfs_perag *pag);
#endif /* __XFS_EXTENT_BUSY_H__ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment