Commit 8f71bede authored by Darrick J. Wong's avatar Darrick J. Wong

xfs: repair inode fork block mapping data structures

Use the reverse-mapping btree information to rebuild an inode block map.
Update the btree bulk loading code as necessary to support inode rooted
btrees and fix some bitrot problems.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Reviewed-by: default avatarChristoph Hellwig <hch@lst.de>
parent 66da1128
......@@ -183,6 +183,7 @@ ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
xfs-y += $(addprefix scrub/, \
agheader_repair.o \
alloc_repair.o \
bmap_repair.o \
ialloc_repair.o \
inode_repair.o \
newbt.o \
......
......@@ -15,6 +15,7 @@
#include "xfs_trans.h"
#include "xfs_alloc.h"
#include "xfs_btree.h"
#include "xfs_btree_staging.h"
#include "xfs_bmap_btree.h"
#include "xfs_bmap.h"
#include "xfs_error.h"
......@@ -288,10 +289,7 @@ xfs_bmbt_get_minrecs(
int level)
{
if (level == cur->bc_nlevels - 1) {
struct xfs_ifork *ifp;
ifp = xfs_ifork_ptr(cur->bc_ino.ip,
cur->bc_ino.whichfork);
struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
return xfs_bmbt_maxrecs(cur->bc_mp,
ifp->if_broot_bytes, level == 0) / 2;
......@@ -306,10 +304,7 @@ xfs_bmbt_get_maxrecs(
int level)
{
if (level == cur->bc_nlevels - 1) {
struct xfs_ifork *ifp;
ifp = xfs_ifork_ptr(cur->bc_ino.ip,
cur->bc_ino.whichfork);
struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
return xfs_bmbt_maxrecs(cur->bc_mp,
ifp->if_broot_bytes, level == 0);
......@@ -543,23 +538,19 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
.keys_contiguous = xfs_bmbt_keys_contiguous,
};
/*
* Allocate a new bmap btree cursor.
*/
struct xfs_btree_cur * /* new bmap btree cursor */
xfs_bmbt_init_cursor(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* inode owning the btree */
int whichfork) /* data or attr fork */
static struct xfs_btree_cur *
xfs_bmbt_init_common(
struct xfs_mount *mp,
struct xfs_trans *tp,
struct xfs_inode *ip,
int whichfork)
{
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
struct xfs_btree_cur *cur;
ASSERT(whichfork != XFS_COW_FORK);
cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_BMAP,
mp->m_bm_maxlevels[whichfork], xfs_bmbt_cur_cache);
cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2);
cur->bc_ops = &xfs_bmbt_ops;
......@@ -567,10 +558,30 @@ xfs_bmbt_init_cursor(
if (xfs_has_crc(mp))
cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
cur->bc_ino.forksize = xfs_inode_fork_size(ip, whichfork);
cur->bc_ino.ip = ip;
cur->bc_ino.allocated = 0;
cur->bc_ino.flags = 0;
return cur;
}
/*
* Allocate a new bmap btree cursor.
*/
struct xfs_btree_cur *
xfs_bmbt_init_cursor(
struct xfs_mount *mp,
struct xfs_trans *tp,
struct xfs_inode *ip,
int whichfork)
{
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
struct xfs_btree_cur *cur;
cur = xfs_bmbt_init_common(mp, tp, ip, whichfork);
cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
cur->bc_ino.forksize = xfs_inode_fork_size(ip, whichfork);
cur->bc_ino.whichfork = whichfork;
return cur;
......@@ -587,6 +598,76 @@ xfs_bmbt_block_maxrecs(
return blocklen / (sizeof(xfs_bmbt_key_t) + sizeof(xfs_bmbt_ptr_t));
}
/*
* Allocate a new bmap btree cursor for reloading an inode block mapping data
* structure. Note that callers can use the staged cursor to reload extents
* format inode forks if they rebuild the iext tree and commit the staged
* cursor immediately.
*/
struct xfs_btree_cur *
xfs_bmbt_stage_cursor(
struct xfs_mount *mp,
struct xfs_inode *ip,
struct xbtree_ifakeroot *ifake)
{
struct xfs_btree_cur *cur;
struct xfs_btree_ops *ops;
/* data fork always has larger maxheight */
cur = xfs_bmbt_init_common(mp, NULL, ip, XFS_DATA_FORK);
cur->bc_nlevels = ifake->if_levels;
cur->bc_ino.forksize = ifake->if_fork_size;
/* Don't let anyone think we're attached to the real fork yet. */
cur->bc_ino.whichfork = -1;
xfs_btree_stage_ifakeroot(cur, ifake, &ops);
ops->update_cursor = NULL;
return cur;
}
/*
* Swap in the new inode fork root. Once we pass this point the newly rebuilt
* mappings are in place and we have to kill off any old btree blocks.
*/
void
xfs_bmbt_commit_staged_btree(
struct xfs_btree_cur *cur,
struct xfs_trans *tp,
int whichfork)
{
struct xbtree_ifakeroot *ifake = cur->bc_ino.ifake;
struct xfs_ifork *ifp;
static const short brootflag[2] = {XFS_ILOG_DBROOT, XFS_ILOG_ABROOT};
static const short extflag[2] = {XFS_ILOG_DEXT, XFS_ILOG_AEXT};
int flags = XFS_ILOG_CORE;
ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
ASSERT(whichfork != XFS_COW_FORK);
/*
* Free any resources hanging off the real fork, then shallow-copy the
* staging fork's contents into the real fork to transfer everything
* we just built.
*/
ifp = xfs_ifork_ptr(cur->bc_ino.ip, whichfork);
xfs_idestroy_fork(ifp);
memcpy(ifp, ifake->if_fork, sizeof(struct xfs_ifork));
switch (ifp->if_format) {
case XFS_DINODE_FMT_EXTENTS:
flags |= extflag[whichfork];
break;
case XFS_DINODE_FMT_BTREE:
flags |= brootflag[whichfork];
break;
default:
ASSERT(0);
break;
}
xfs_trans_log_inode(tp, cur->bc_ino.ip, flags);
xfs_btree_commit_ifakeroot(cur, tp, whichfork, &xfs_bmbt_ops);
}
/*
* Calculate number of records in a bmap btree block.
*/
......
......@@ -11,6 +11,7 @@ struct xfs_btree_block;
struct xfs_mount;
struct xfs_inode;
struct xfs_trans;
struct xbtree_ifakeroot;
/*
* Btree block header size depends on a superblock flag.
......@@ -106,6 +107,10 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
struct xfs_trans *, struct xfs_inode *, int);
struct xfs_btree_cur *xfs_bmbt_stage_cursor(struct xfs_mount *mp,
struct xfs_inode *ip, struct xbtree_ifakeroot *ifake);
void xfs_bmbt_commit_staged_btree(struct xfs_btree_cur *cur,
struct xfs_trans *tp, int whichfork);
extern unsigned long long xfs_bmbt_calc_size(struct xfs_mount *mp,
unsigned long long len);
......
......@@ -405,7 +405,7 @@ xfs_btree_bload_prep_block(
ASSERT(*bpp == NULL);
/* Allocate a new incore btree root block. */
new_size = bbl->iroot_size(cur, nr_this_block, priv);
new_size = bbl->iroot_size(cur, level, nr_this_block, priv);
ifp->if_broot = kmem_zalloc(new_size, 0);
ifp->if_broot_bytes = (int)new_size;
......@@ -596,7 +596,14 @@ xfs_btree_bload_level_geometry(
unsigned int desired_npb;
unsigned int maxnr;
maxnr = cur->bc_ops->get_maxrecs(cur, level);
/*
* Compute the absolute maximum number of records that we can store in
* the ondisk block or inode root.
*/
if (cur->bc_ops->get_dmaxrecs)
maxnr = cur->bc_ops->get_dmaxrecs(cur, level);
else
maxnr = cur->bc_ops->get_maxrecs(cur, level);
/*
* Compute the number of blocks we need to fill each block with the
......
......@@ -53,7 +53,7 @@ typedef int (*xfs_btree_bload_get_records_fn)(struct xfs_btree_cur *cur,
typedef int (*xfs_btree_bload_claim_block_fn)(struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr, void *priv);
typedef size_t (*xfs_btree_bload_iroot_size_fn)(struct xfs_btree_cur *cur,
unsigned int nr_this_level, void *priv);
unsigned int level, unsigned int nr_this_level, void *priv);
struct xfs_btree_bload {
/*
......
......@@ -622,13 +622,11 @@ static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp)
}
void
xfs_iext_insert(
struct xfs_inode *ip,
xfs_iext_insert_raw(
struct xfs_ifork *ifp,
struct xfs_iext_cursor *cur,
struct xfs_bmbt_irec *irec,
int state)
struct xfs_bmbt_irec *irec)
{
struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state);
xfs_fileoff_t offset = irec->br_startoff;
struct xfs_iext_leaf *new = NULL;
int nr_entries, i;
......@@ -662,12 +660,23 @@ xfs_iext_insert(
xfs_iext_set(cur_rec(cur), irec);
ifp->if_bytes += sizeof(struct xfs_iext_rec);
trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
if (new)
xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
}
void
xfs_iext_insert(
struct xfs_inode *ip,
struct xfs_iext_cursor *cur,
struct xfs_bmbt_irec *irec,
int state)
{
struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state);
xfs_iext_insert_raw(ifp, cur, irec);
trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
}
static struct xfs_iext_node *
xfs_iext_rebalance_node(
struct xfs_iext_node *parent,
......
......@@ -520,6 +520,7 @@ xfs_idata_realloc(
ifp->if_bytes = new_size;
}
/* Free all memory and reset a fork back to its initial state. */
void
xfs_idestroy_fork(
struct xfs_ifork *ifp)
......
......@@ -180,6 +180,9 @@ void xfs_init_local_fork(struct xfs_inode *ip, int whichfork,
const void *data, int64_t size);
xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp);
void xfs_iext_insert_raw(struct xfs_ifork *ifp,
struct xfs_iext_cursor *cur,
struct xfs_bmbt_irec *irec);
void xfs_iext_insert(struct xfs_inode *, struct xfs_iext_cursor *cur,
struct xfs_bmbt_irec *, int);
void xfs_iext_remove(struct xfs_inode *, struct xfs_iext_cursor *,
......
......@@ -50,9 +50,18 @@ xchk_setup_inode_bmap(
if (S_ISREG(VFS_I(sc->ip)->i_mode) &&
sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) {
struct address_space *mapping = VFS_I(sc->ip)->i_mapping;
bool is_repair = xchk_could_repair(sc);
xchk_ilock(sc, XFS_MMAPLOCK_EXCL);
/* Break all our leases, we're going to mess with things. */
if (is_repair) {
error = xfs_break_layouts(VFS_I(sc->ip),
&sc->ilock_flags, BREAK_WRITE);
if (error)
goto out;
}
inode_dio_wait(VFS_I(sc->ip));
/*
......@@ -73,6 +82,15 @@ xchk_setup_inode_bmap(
error = filemap_fdatawait_keep_errors(mapping);
if (error && (error != -ENOSPC && error != -EIO))
goto out;
/* Drop the page cache if we're repairing block mappings. */
if (is_repair) {
error = invalidate_inode_pages2(
VFS_I(sc->ip)->i_mapping);
if (error)
goto out;
}
}
/* Got the inode, lock it and we're ready to go. */
......
This diff is collapsed.
......@@ -239,7 +239,11 @@ int xchk_metadata_inode_forks(struct xfs_scrub *sc);
(sc)->mp->m_super->s_id, \
(sc)->sa.pag ? (sc)->sa.pag->pag_agno : (sc)->sm->sm_agno, \
##__VA_ARGS__)
#define xchk_xfile_ino_descr(sc, fmt, ...) \
kasprintf(XCHK_GFP_FLAGS, "XFS (%s): inode 0x%llx " fmt, \
(sc)->mp->m_super->s_id, \
(sc)->ip ? (sc)->ip->i_ino : (sc)->sm->sm_ino, \
##__VA_ARGS__)
/*
* Setting up a hook to wait for intents to drain is costly -- we have to take
......
......@@ -883,6 +883,34 @@ xrep_reinit_pagi(
return 0;
}
/*
* Given an active reference to a perag structure, load AG headers and cursors.
* This should only be called to scan an AG while repairing file-based metadata.
*/
int
xrep_ag_init(
struct xfs_scrub *sc,
struct xfs_perag *pag,
struct xchk_ag *sa)
{
int error;
ASSERT(!sa->pag);
error = xfs_ialloc_read_agi(pag, sc->tp, &sa->agi_bp);
if (error)
return error;
error = xfs_alloc_read_agf(pag, sc->tp, 0, &sa->agf_bp);
if (error)
return error;
/* Grab our own passive reference from the caller's ref. */
sa->pag = xfs_perag_hold(pag);
xrep_ag_btcur_init(sc, sa);
return 0;
}
/* Reinitialize the per-AG block reservation for the AG we just fixed. */
int
xrep_reset_perag_resv(
......
......@@ -89,6 +89,8 @@ struct xfs_imap;
int xrep_setup_inode(struct xfs_scrub *sc, const struct xfs_imap *imap);
void xrep_ag_btcur_init(struct xfs_scrub *sc, struct xchk_ag *sa);
int xrep_ag_init(struct xfs_scrub *sc, struct xfs_perag *pag,
struct xchk_ag *sa);
/* Metadata revalidators */
......@@ -106,6 +108,8 @@ int xrep_allocbt(struct xfs_scrub *sc);
int xrep_iallocbt(struct xfs_scrub *sc);
int xrep_refcountbt(struct xfs_scrub *sc);
int xrep_inode(struct xfs_scrub *sc);
int xrep_bmap_data(struct xfs_scrub *sc);
int xrep_bmap_attr(struct xfs_scrub *sc);
int xrep_reinit_pagf(struct xfs_scrub *sc);
int xrep_reinit_pagi(struct xfs_scrub *sc);
......@@ -165,6 +169,8 @@ xrep_setup_nothing(
#define xrep_iallocbt xrep_notsupported
#define xrep_refcountbt xrep_notsupported
#define xrep_inode xrep_notsupported
#define xrep_bmap_data xrep_notsupported
#define xrep_bmap_attr xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
......
......@@ -288,13 +288,13 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.type = ST_INODE,
.setup = xchk_setup_inode_bmap,
.scrub = xchk_bmap_data,
.repair = xrep_notsupported,
.repair = xrep_bmap_data,
},
[XFS_SCRUB_TYPE_BMBTA] = { /* inode attr fork */
.type = ST_INODE,
.setup = xchk_setup_inode_bmap,
.scrub = xchk_bmap_attr,
.repair = xrep_notsupported,
.repair = xrep_bmap_attr,
},
[XFS_SCRUB_TYPE_BMBTC] = { /* inode CoW fork */
.type = ST_INODE,
......
......@@ -1175,7 +1175,7 @@ DEFINE_EVENT(xrep_rmap_class, name, \
TP_ARGS(mp, agno, agbno, len, owner, offset, flags))
DEFINE_REPAIR_RMAP_EVENT(xrep_ibt_walk_rmap);
DEFINE_REPAIR_RMAP_EVENT(xrep_rmap_extent_fn);
DEFINE_REPAIR_RMAP_EVENT(xrep_bmap_extent_fn);
DEFINE_REPAIR_RMAP_EVENT(xrep_bmap_walk_rmap);
TRACE_EVENT(xrep_abt_found,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
......@@ -1260,6 +1260,38 @@ TRACE_EVENT(xrep_refc_found,
__entry->refcount)
)
TRACE_EVENT(xrep_bmap_found,
TP_PROTO(struct xfs_inode *ip, int whichfork,
struct xfs_bmbt_irec *irec),
TP_ARGS(ip, whichfork, irec),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(int, whichfork)
__field(xfs_fileoff_t, lblk)
__field(xfs_filblks_t, len)
__field(xfs_fsblock_t, pblk)
__field(int, state)
),
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
__entry->whichfork = whichfork;
__entry->lblk = irec->br_startoff;
__entry->len = irec->br_blockcount;
__entry->pblk = irec->br_startblock;
__entry->state = irec->br_state;
),
TP_printk("dev %d:%d ino 0x%llx whichfork %s fileoff 0x%llx fsbcount 0x%llx startblock 0x%llx state %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS),
__entry->lblk,
__entry->len,
__entry->pblk,
__entry->state)
);
TRACE_EVENT(xrep_findroot_block,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
uint32_t magic, uint16_t level),
......
......@@ -1236,6 +1236,68 @@ xfs_trans_alloc_inode(
return error;
}
/*
* Try to reserve more blocks for a transaction.
*
* This is for callers that need to attach resources to a transaction, scan
* those resources to determine the space reservation requirements, and then
* modify the attached resources. In other words, online repair. This can
* fail due to ENOSPC, so the caller must be able to cancel the transaction
* without shutting down the fs.
*/
int
xfs_trans_reserve_more(
struct xfs_trans *tp,
unsigned int blocks,
unsigned int rtextents)
{
struct xfs_trans_res resv = { };
return xfs_trans_reserve(tp, &resv, blocks, rtextents);
}
/*
* Try to reserve more blocks and file quota for a transaction. Same
* conditions of usage as xfs_trans_reserve_more.
*/
int
xfs_trans_reserve_more_inode(
struct xfs_trans *tp,
struct xfs_inode *ip,
unsigned int dblocks,
unsigned int rblocks,
bool force_quota)
{
struct xfs_trans_res resv = { };
struct xfs_mount *mp = ip->i_mount;
unsigned int rtx = xfs_extlen_to_rtxlen(mp, rblocks);
int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
error = xfs_trans_reserve(tp, &resv, dblocks, rtx);
if (error)
return error;
if (!XFS_IS_QUOTA_ON(mp) || xfs_is_quota_inode(&mp->m_sb, ip->i_ino))
return 0;
if (tp->t_flags & XFS_TRANS_RESERVE)
force_quota = true;
error = xfs_trans_reserve_quota_nblks(tp, ip, dblocks, rblocks,
force_quota);
if (!error)
return 0;
/* Quota failed, give back the new reservation. */
xfs_mod_fdblocks(mp, dblocks, tp->t_flags & XFS_TRANS_RESERVE);
tp->t_blk_res -= dblocks;
xfs_mod_frextents(mp, rtx);
tp->t_rtx_res -= rtx;
return error;
}
/*
* Allocate an transaction in preparation for inode creation by reserving quota
* against the given dquots. Callers are not required to hold any inode locks.
......
......@@ -164,6 +164,8 @@ typedef struct xfs_trans {
int xfs_trans_alloc(struct xfs_mount *mp, struct xfs_trans_res *resp,
uint blocks, uint rtextents, uint flags,
struct xfs_trans **tpp);
int xfs_trans_reserve_more(struct xfs_trans *tp,
unsigned int blocks, unsigned int rtextents);
int xfs_trans_alloc_empty(struct xfs_mount *mp,
struct xfs_trans **tpp);
void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
......@@ -248,6 +250,8 @@ struct xfs_dquot;
int xfs_trans_alloc_inode(struct xfs_inode *ip, struct xfs_trans_res *resv,
unsigned int dblocks, unsigned int rblocks, bool force,
struct xfs_trans **tpp);
int xfs_trans_reserve_more_inode(struct xfs_trans *tp, struct xfs_inode *ip,
unsigned int dblocks, unsigned int rblocks, bool force_quota);
int xfs_trans_alloc_icreate(struct xfs_mount *mp, struct xfs_trans_res *resv,
struct xfs_dquot *udqp, struct xfs_dquot *gdqp,
struct xfs_dquot *pdqp, unsigned int dblocks,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment