Commit 5bb4ad95 authored by Chandan Babu R's avatar Chandan Babu R

Merge tag 'repair-rtbitmap-6.8_2023-12-15' of...

Merge tag 'repair-rtbitmap-6.8_2023-12-15' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.8-mergeB

xfs: online repair of rt bitmap file

Add in the necessary infrastructure to check the inode and data forks of
metadata files, then apply that to the realtime bitmap file.  We won't
be able to reconstruct the contents of the rtbitmap file until rmapbt is
added for realtime volumes, but we can at least get the basics started.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChandan Babu R <chandanbabu@kernel.org>

* tag 'repair-rtbitmap-6.8_2023-12-15' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux:
  xfs: online repair of realtime bitmaps
  xfs: create a new inode fork block unmap helper
  xfs: repair the inode core and forks of a metadata inode
  xfs: always check the rtbitmap and rtsummary files
  xfs: check rt summary file geometry more thoroughly
  xfs: check rt bitmap file geometry more thoroughly
parents 98e63b91 ffd37b22
...@@ -192,5 +192,9 @@ xfs-y += $(addprefix scrub/, \ ...@@ -192,5 +192,9 @@ xfs-y += $(addprefix scrub/, \
refcount_repair.o \ refcount_repair.o \
repair.o \ repair.o \
) )
xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
rtbitmap_repair.o \
)
endif endif
endif endif
...@@ -5239,7 +5239,7 @@ xfs_bmap_del_extent_real( ...@@ -5239,7 +5239,7 @@ xfs_bmap_del_extent_real(
* that value. If not all extents in the block range can be removed then * that value. If not all extents in the block range can be removed then
* *done is set. * *done is set.
*/ */
int /* error */ static int
__xfs_bunmapi( __xfs_bunmapi(
struct xfs_trans *tp, /* transaction pointer */ struct xfs_trans *tp, /* transaction pointer */
struct xfs_inode *ip, /* incore inode */ struct xfs_inode *ip, /* incore inode */
...@@ -6220,3 +6220,42 @@ xfs_bmap_validate_extent( ...@@ -6220,3 +6220,42 @@ xfs_bmap_validate_extent(
return xfs_bmap_validate_extent_raw(ip->i_mount, return xfs_bmap_validate_extent_raw(ip->i_mount,
XFS_IS_REALTIME_INODE(ip), whichfork, irec); XFS_IS_REALTIME_INODE(ip), whichfork, irec);
} }
/*
* Used in xfs_itruncate_extents(). This is the maximum number of extents
* freed from a file in a single transaction.
*/
#define XFS_ITRUNC_MAX_EXTENTS 2
/*
* Unmap every extent in part of an inode's fork. We don't do any higher level
* invalidation work at all.
*/
int
xfs_bunmapi_range(
struct xfs_trans **tpp,
struct xfs_inode *ip,
uint32_t flags,
xfs_fileoff_t startoff,
xfs_fileoff_t endoff)
{
xfs_filblks_t unmap_len = endoff - startoff + 1;
int error = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
while (unmap_len > 0) {
ASSERT((*tpp)->t_highest_agno == NULLAGNUMBER);
error = __xfs_bunmapi(*tpp, ip, startoff, &unmap_len, flags,
XFS_ITRUNC_MAX_EXTENTS);
if (error)
goto out;
/* free the just unmapped extents */
error = xfs_defer_finish(tpp);
if (error)
goto out;
}
out:
return error;
}
...@@ -190,9 +190,6 @@ int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno, ...@@ -190,9 +190,6 @@ int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno,
int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip, int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, uint32_t flags, xfs_fileoff_t bno, xfs_filblks_t len, uint32_t flags,
xfs_extlen_t total, struct xfs_bmbt_irec *mval, int *nmap); xfs_extlen_t total, struct xfs_bmbt_irec *mval, int *nmap);
int __xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t *rlen, uint32_t flags,
xfs_extnum_t nexts);
int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, uint32_t flags, xfs_fileoff_t bno, xfs_filblks_t len, uint32_t flags,
xfs_extnum_t nexts, int *done); xfs_extnum_t nexts, int *done);
...@@ -273,6 +270,8 @@ int xfs_bmap_complain_bad_rec(struct xfs_inode *ip, int whichfork, ...@@ -273,6 +270,8 @@ int xfs_bmap_complain_bad_rec(struct xfs_inode *ip, int whichfork,
int xfs_bmapi_remap(struct xfs_trans *tp, struct xfs_inode *ip, int xfs_bmapi_remap(struct xfs_trans *tp, struct xfs_inode *ip,
xfs_fileoff_t bno, xfs_filblks_t len, xfs_fsblock_t startblock, xfs_fileoff_t bno, xfs_filblks_t len, xfs_fsblock_t startblock,
uint32_t flags); uint32_t flags);
int xfs_bunmapi_range(struct xfs_trans **tpp, struct xfs_inode *ip,
uint32_t flags, xfs_fileoff_t startoff, xfs_fileoff_t endoff);
extern struct kmem_cache *xfs_bmap_intent_cache; extern struct kmem_cache *xfs_bmap_intent_cache;
......
...@@ -86,6 +86,9 @@ struct xrep_bmap { ...@@ -86,6 +86,9 @@ struct xrep_bmap {
/* What d the REFLINK flag be set when the repair is over? */ /* What d the REFLINK flag be set when the repair is over? */
enum reflink_scan_state reflink_scan; enum reflink_scan_state reflink_scan;
/* Do we allow unwritten extents? */
bool allow_unwritten;
}; };
/* Is this space extent shared? Flag the inode if it is. */ /* Is this space extent shared? Flag the inode if it is. */
...@@ -262,6 +265,10 @@ xrep_bmap_walk_rmap( ...@@ -262,6 +265,10 @@ xrep_bmap_walk_rmap(
!(rec->rm_flags & XFS_RMAP_ATTR_FORK)) !(rec->rm_flags & XFS_RMAP_ATTR_FORK))
return 0; return 0;
/* Reject unwritten extents if we don't allow those. */
if ((rec->rm_flags & XFS_RMAP_UNWRITTEN) && !rb->allow_unwritten)
return -EFSCORRUPTED;
fsbno = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno, fsbno = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno,
rec->rm_startblock); rec->rm_startblock);
...@@ -780,10 +787,11 @@ xrep_bmap_init_reflink_scan( ...@@ -780,10 +787,11 @@ xrep_bmap_init_reflink_scan(
} }
/* Repair an inode fork. */ /* Repair an inode fork. */
STATIC int int
xrep_bmap( xrep_bmap(
struct xfs_scrub *sc, struct xfs_scrub *sc,
int whichfork) int whichfork,
bool allow_unwritten)
{ {
struct xrep_bmap *rb; struct xrep_bmap *rb;
char *descr; char *descr;
...@@ -803,6 +811,7 @@ xrep_bmap( ...@@ -803,6 +811,7 @@ xrep_bmap(
rb->sc = sc; rb->sc = sc;
rb->whichfork = whichfork; rb->whichfork = whichfork;
rb->reflink_scan = xrep_bmap_init_reflink_scan(sc, whichfork); rb->reflink_scan = xrep_bmap_init_reflink_scan(sc, whichfork);
rb->allow_unwritten = allow_unwritten;
/* Set up enough storage to handle the max records for this fork. */ /* Set up enough storage to handle the max records for this fork. */
large_extcount = xfs_has_large_extent_counts(sc->mp); large_extcount = xfs_has_large_extent_counts(sc->mp);
...@@ -846,7 +855,7 @@ int ...@@ -846,7 +855,7 @@ int
xrep_bmap_data( xrep_bmap_data(
struct xfs_scrub *sc) struct xfs_scrub *sc)
{ {
return xrep_bmap(sc, XFS_DATA_FORK); return xrep_bmap(sc, XFS_DATA_FORK, true);
} }
/* Repair an inode's attr fork. */ /* Repair an inode's attr fork. */
...@@ -854,5 +863,5 @@ int ...@@ -854,5 +863,5 @@ int
xrep_bmap_attr( xrep_bmap_attr(
struct xfs_scrub *sc) struct xfs_scrub *sc)
{ {
return xrep_bmap(sc, XFS_ATTR_FORK); return xrep_bmap(sc, XFS_ATTR_FORK, false);
} }
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "xfs_defer.h" #include "xfs_defer.h"
#include "xfs_errortag.h" #include "xfs_errortag.h"
#include "xfs_error.h" #include "xfs_error.h"
#include "xfs_reflink.h"
#include "scrub/scrub.h" #include "scrub/scrub.h"
#include "scrub/common.h" #include "scrub/common.h"
#include "scrub/trace.h" #include "scrub/trace.h"
...@@ -962,3 +963,155 @@ xrep_will_attempt( ...@@ -962,3 +963,155 @@ xrep_will_attempt(
return false; return false;
} }
/* Try to fix some part of a metadata inode by calling another scrubber. */
STATIC int
xrep_metadata_inode_subtype(
struct xfs_scrub *sc,
unsigned int scrub_type)
{
__u32 smtype = sc->sm->sm_type;
__u32 smflags = sc->sm->sm_flags;
unsigned int sick_mask = sc->sick_mask;
int error;
/*
* Let's see if the inode needs repair. We're going to open-code calls
* to the scrub and repair functions so that we can hang on to the
* resources that we already acquired instead of using the standard
* setup/teardown routines.
*/
sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
sc->sm->sm_type = scrub_type;
switch (scrub_type) {
case XFS_SCRUB_TYPE_INODE:
error = xchk_inode(sc);
break;
case XFS_SCRUB_TYPE_BMBTD:
error = xchk_bmap_data(sc);
break;
case XFS_SCRUB_TYPE_BMBTA:
error = xchk_bmap_attr(sc);
break;
default:
ASSERT(0);
error = -EFSCORRUPTED;
}
if (error)
goto out;
if (!xrep_will_attempt(sc))
goto out;
/*
* Repair some part of the inode. This will potentially join the inode
* to the transaction.
*/
switch (scrub_type) {
case XFS_SCRUB_TYPE_INODE:
error = xrep_inode(sc);
break;
case XFS_SCRUB_TYPE_BMBTD:
error = xrep_bmap(sc, XFS_DATA_FORK, false);
break;
case XFS_SCRUB_TYPE_BMBTA:
error = xrep_bmap(sc, XFS_ATTR_FORK, false);
break;
}
if (error)
goto out;
/*
* Finish all deferred intent items and then roll the transaction so
* that the inode will not be joined to the transaction when we exit
* the function.
*/
error = xfs_defer_finish(&sc->tp);
if (error)
goto out;
error = xfs_trans_roll(&sc->tp);
if (error)
goto out;
/*
* Clear the corruption flags and re-check the metadata that we just
* repaired.
*/
sc->sm->sm_flags &= ~XFS_SCRUB_FLAGS_OUT;
switch (scrub_type) {
case XFS_SCRUB_TYPE_INODE:
error = xchk_inode(sc);
break;
case XFS_SCRUB_TYPE_BMBTD:
error = xchk_bmap_data(sc);
break;
case XFS_SCRUB_TYPE_BMBTA:
error = xchk_bmap_attr(sc);
break;
}
if (error)
goto out;
/* If corruption persists, the repair has failed. */
if (xchk_needs_repair(sc->sm)) {
error = -EFSCORRUPTED;
goto out;
}
out:
sc->sick_mask = sick_mask;
sc->sm->sm_type = smtype;
sc->sm->sm_flags = smflags;
return error;
}
/*
* Repair the ondisk forks of a metadata inode. The caller must ensure that
* sc->ip points to the metadata inode and the ILOCK is held on that inode.
* The inode must not be joined to the transaction before the call, and will
* not be afterwards.
*/
int
xrep_metadata_inode_forks(
struct xfs_scrub *sc)
{
bool dirty = false;
int error;
/* Repair the inode record and the data fork. */
error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE);
if (error)
return error;
error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD);
if (error)
return error;
/* Make sure the attr fork looks ok before we delete it. */
error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA);
if (error)
return error;
/* Clear the reflink flag since metadata never shares. */
if (xfs_is_reflink_inode(sc->ip)) {
dirty = true;
xfs_trans_ijoin(sc->tp, sc->ip, 0);
error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp);
if (error)
return error;
}
/*
* If we modified the inode, roll the transaction but don't rejoin the
* inode to the new transaction because xrep_bmap_data can do that.
*/
if (dirty) {
error = xfs_trans_roll(&sc->tp);
if (error)
return error;
dirty = false;
}
return 0;
}
...@@ -82,6 +82,8 @@ int xrep_ino_dqattach(struct xfs_scrub *sc); ...@@ -82,6 +82,8 @@ int xrep_ino_dqattach(struct xfs_scrub *sc);
int xrep_ino_ensure_extent_count(struct xfs_scrub *sc, int whichfork, int xrep_ino_ensure_extent_count(struct xfs_scrub *sc, int whichfork,
xfs_extnum_t nextents); xfs_extnum_t nextents);
int xrep_reset_perag_resv(struct xfs_scrub *sc); int xrep_reset_perag_resv(struct xfs_scrub *sc);
int xrep_bmap(struct xfs_scrub *sc, int whichfork, bool allow_unwritten);
int xrep_metadata_inode_forks(struct xfs_scrub *sc);
/* Repair setup functions */ /* Repair setup functions */
int xrep_setup_ag_allocbt(struct xfs_scrub *sc); int xrep_setup_ag_allocbt(struct xfs_scrub *sc);
...@@ -113,6 +115,12 @@ int xrep_bmap_data(struct xfs_scrub *sc); ...@@ -113,6 +115,12 @@ int xrep_bmap_data(struct xfs_scrub *sc);
int xrep_bmap_attr(struct xfs_scrub *sc); int xrep_bmap_attr(struct xfs_scrub *sc);
int xrep_bmap_cow(struct xfs_scrub *sc); int xrep_bmap_cow(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_RT
int xrep_rtbitmap(struct xfs_scrub *sc);
#else
# define xrep_rtbitmap xrep_notsupported
#endif /* CONFIG_XFS_RT */
int xrep_reinit_pagf(struct xfs_scrub *sc); int xrep_reinit_pagf(struct xfs_scrub *sc);
int xrep_reinit_pagi(struct xfs_scrub *sc); int xrep_reinit_pagi(struct xfs_scrub *sc);
...@@ -175,6 +183,7 @@ xrep_setup_nothing( ...@@ -175,6 +183,7 @@ xrep_setup_nothing(
#define xrep_bmap_data xrep_notsupported #define xrep_bmap_data xrep_notsupported
#define xrep_bmap_attr xrep_notsupported #define xrep_bmap_attr xrep_notsupported
#define xrep_bmap_cow xrep_notsupported #define xrep_bmap_cow xrep_notsupported
#define xrep_rtbitmap xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */ #endif /* CONFIG_XFS_ONLINE_REPAIR */
......
...@@ -14,17 +14,33 @@ ...@@ -14,17 +14,33 @@
#include "xfs_rtbitmap.h" #include "xfs_rtbitmap.h"
#include "xfs_inode.h" #include "xfs_inode.h"
#include "xfs_bmap.h" #include "xfs_bmap.h"
#include "xfs_bit.h"
#include "scrub/scrub.h" #include "scrub/scrub.h"
#include "scrub/common.h" #include "scrub/common.h"
#include "scrub/repair.h"
#include "scrub/rtbitmap.h"
/* Set us up with the realtime metadata locked. */ /* Set us up with the realtime metadata locked. */
int int
xchk_setup_rtbitmap( xchk_setup_rtbitmap(
struct xfs_scrub *sc) struct xfs_scrub *sc)
{ {
struct xfs_mount *mp = sc->mp;
struct xchk_rtbitmap *rtb;
int error; int error;
error = xchk_trans_alloc(sc, 0); rtb = kzalloc(sizeof(struct xchk_rtbitmap), XCHK_GFP_FLAGS);
if (!rtb)
return -ENOMEM;
sc->buf = rtb;
if (xchk_could_repair(sc)) {
error = xrep_setup_rtbitmap(sc, rtb);
if (error)
return error;
}
error = xchk_trans_alloc(sc, rtb->resblks);
if (error) if (error)
return error; return error;
...@@ -37,6 +53,17 @@ xchk_setup_rtbitmap( ...@@ -37,6 +53,17 @@ xchk_setup_rtbitmap(
return error; return error;
xchk_ilock(sc, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP); xchk_ilock(sc, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP);
/*
* Now that we've locked the rtbitmap, we can't race with growfsrt
* trying to expand the bitmap or change the size of the rt volume.
* Hence it is safe to compute and check the geometry values.
*/
if (mp->m_sb.sb_rblocks) {
rtb->rextents = xfs_rtb_to_rtx(mp, mp->m_sb.sb_rblocks);
rtb->rextslog = xfs_compute_rextslog(rtb->rextents);
rtb->rbmblocks = xfs_rtbitmap_blockcount(mp, rtb->rextents);
}
return 0; return 0;
} }
...@@ -67,21 +94,30 @@ STATIC int ...@@ -67,21 +94,30 @@ STATIC int
xchk_rtbitmap_check_extents( xchk_rtbitmap_check_extents(
struct xfs_scrub *sc) struct xfs_scrub *sc)
{ {
struct xfs_mount *mp = sc->mp;
struct xfs_bmbt_irec map; struct xfs_bmbt_irec map;
xfs_rtblock_t off; struct xfs_iext_cursor icur;
int nmap; struct xfs_mount *mp = sc->mp;
struct xfs_inode *ip = sc->ip;
xfs_fileoff_t off = 0;
xfs_fileoff_t endoff;
int error = 0; int error = 0;
for (off = 0; off < mp->m_sb.sb_rbmblocks;) { /* Mappings may not cross or lie beyond EOF. */
endoff = XFS_B_TO_FSB(mp, ip->i_disk_size);
if (xfs_iext_lookup_extent(ip, &ip->i_df, endoff, &icur, &map)) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, endoff);
return 0;
}
while (off < endoff) {
int nmap = 1;
if (xchk_should_terminate(sc, &error) || if (xchk_should_terminate(sc, &error) ||
(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
break; break;
/* Make sure we have a written extent. */ /* Make sure we have a written extent. */
nmap = 1; error = xfs_bmapi_read(ip, off, endoff - off, &map, &nmap,
error = xfs_bmapi_read(mp->m_rbmip, off,
mp->m_sb.sb_rbmblocks - off, &map, &nmap,
XFS_DATA_FORK); XFS_DATA_FORK);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error)) if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error))
break; break;
...@@ -102,12 +138,48 @@ int ...@@ -102,12 +138,48 @@ int
xchk_rtbitmap( xchk_rtbitmap(
struct xfs_scrub *sc) struct xfs_scrub *sc)
{ {
struct xfs_mount *mp = sc->mp;
struct xchk_rtbitmap *rtb = sc->buf;
int error; int error;
/* Is the size of the rtbitmap correct? */ /* Is sb_rextents correct? */
if (sc->mp->m_rbmip->i_disk_size != if (mp->m_sb.sb_rextents != rtb->rextents) {
XFS_FSB_TO_B(sc->mp, sc->mp->m_sb.sb_rbmblocks)) { xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
xchk_ino_set_corrupt(sc, sc->mp->m_rbmip->i_ino); return 0;
}
/* Is sb_rextslog correct? */
if (mp->m_sb.sb_rextslog != rtb->rextslog) {
xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
return 0;
}
/*
* Is sb_rbmblocks large enough to handle the current rt volume? In no
* case can we exceed 4bn bitmap blocks since the super field is a u32.
*/
if (rtb->rbmblocks > U32_MAX) {
xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
return 0;
}
if (mp->m_sb.sb_rbmblocks != rtb->rbmblocks) {
xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
return 0;
}
/* The bitmap file length must be aligned to an fsblock. */
if (mp->m_rbmip->i_disk_size & mp->m_blockmask) {
xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
return 0;
}
/*
* Is the bitmap file itself large enough to handle the rt volume?
* growfsrt expands the bitmap file before updating sb_rextents, so the
* file can be larger than sb_rbmblocks.
*/
if (mp->m_rbmip->i_disk_size < XFS_FSB_TO_B(mp, rtb->rbmblocks)) {
xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
return 0; return 0;
} }
...@@ -120,12 +192,11 @@ xchk_rtbitmap( ...@@ -120,12 +192,11 @@ xchk_rtbitmap(
if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
return error; return error;
error = xfs_rtalloc_query_all(sc->mp, sc->tp, xchk_rtbitmap_rec, sc); error = xfs_rtalloc_query_all(mp, sc->tp, xchk_rtbitmap_rec, sc);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error)) if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
goto out; return error;
out: return 0;
return error;
} }
/* xref check that the extent is not free in the rtbitmap */ /* xref check that the extent is not free in the rtbitmap */
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_SCRUB_RTBITMAP_H__
#define __XFS_SCRUB_RTBITMAP_H__
struct xchk_rtbitmap {
uint64_t rextents;
uint64_t rbmblocks;
unsigned int rextslog;
unsigned int resblks;
};
#ifdef CONFIG_XFS_ONLINE_REPAIR
int xrep_setup_rtbitmap(struct xfs_scrub *sc, struct xchk_rtbitmap *rtb);
#else
# define xrep_setup_rtbitmap(sc, rtb) (0)
#endif /* CONFIG_XFS_ONLINE_REPAIR */
#endif /* __XFS_SCRUB_RTBITMAP_H__ */
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2020-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_btree.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_inode.h"
#include "xfs_bit.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/xfile.h"
#include "scrub/rtbitmap.h"
/* Set up to repair the realtime bitmap file metadata. */
int
xrep_setup_rtbitmap(
struct xfs_scrub *sc,
struct xchk_rtbitmap *rtb)
{
struct xfs_mount *mp = sc->mp;
unsigned long long blocks = 0;
/*
* Reserve enough blocks to write out a completely new bmbt for a
* maximally fragmented bitmap file. We do not hold the rtbitmap
* ILOCK yet, so this is entirely speculative.
*/
blocks = xfs_bmbt_calc_size(mp, mp->m_sb.sb_rbmblocks);
if (blocks > UINT_MAX)
return -EOPNOTSUPP;
rtb->resblks += blocks;
return 0;
}
/*
* Make sure that the given range of the data fork of the realtime file is
* mapped to written blocks. The caller must ensure that the inode is joined
* to the transaction.
*/
STATIC int
xrep_rtbitmap_data_mappings(
struct xfs_scrub *sc,
xfs_filblks_t len)
{
struct xfs_bmbt_irec map;
xfs_fileoff_t off = 0;
int error;
ASSERT(sc->ip != NULL);
while (off < len) {
int nmaps = 1;
/*
* If we have a real extent mapping this block then we're
* in ok shape.
*/
error = xfs_bmapi_read(sc->ip, off, len - off, &map, &nmaps,
XFS_DATA_FORK);
if (error)
return error;
if (nmaps == 0) {
ASSERT(nmaps != 0);
return -EFSCORRUPTED;
}
/*
* Written extents are ok. Holes are not filled because we
* do not know the freespace information.
*/
if (xfs_bmap_is_written_extent(&map) ||
map.br_startblock == HOLESTARTBLOCK) {
off = map.br_startoff + map.br_blockcount;
continue;
}
/*
* If we find a delalloc reservation then something is very
* very wrong. Bail out.
*/
if (map.br_startblock == DELAYSTARTBLOCK)
return -EFSCORRUPTED;
/* Make sure we're really converting an unwritten extent. */
if (map.br_state != XFS_EXT_UNWRITTEN) {
ASSERT(map.br_state == XFS_EXT_UNWRITTEN);
return -EFSCORRUPTED;
}
/* Make sure this block has a real zeroed extent mapped. */
nmaps = 1;
error = xfs_bmapi_write(sc->tp, sc->ip, map.br_startoff,
map.br_blockcount,
XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO,
0, &map, &nmaps);
if (error)
return error;
if (nmaps != 1)
return -EFSCORRUPTED;
/* Commit new extent and all deferred work. */
error = xrep_defer_finish(sc);
if (error)
return error;
off = map.br_startoff + map.br_blockcount;
}
return 0;
}
/* Fix broken rt volume geometry. */
STATIC int
xrep_rtbitmap_geometry(
struct xfs_scrub *sc,
struct xchk_rtbitmap *rtb)
{
struct xfs_mount *mp = sc->mp;
struct xfs_trans *tp = sc->tp;
/* Superblock fields */
if (mp->m_sb.sb_rextents != rtb->rextents)
xfs_trans_mod_sb(sc->tp, XFS_TRANS_SB_REXTENTS,
rtb->rextents - mp->m_sb.sb_rextents);
if (mp->m_sb.sb_rbmblocks != rtb->rbmblocks)
xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBMBLOCKS,
rtb->rbmblocks - mp->m_sb.sb_rbmblocks);
if (mp->m_sb.sb_rextslog != rtb->rextslog)
xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSLOG,
rtb->rextslog - mp->m_sb.sb_rextslog);
/* Fix broken isize */
sc->ip->i_disk_size = roundup_64(sc->ip->i_disk_size,
mp->m_sb.sb_blocksize);
if (sc->ip->i_disk_size < XFS_FSB_TO_B(mp, rtb->rbmblocks))
sc->ip->i_disk_size = XFS_FSB_TO_B(mp, rtb->rbmblocks);
xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
return xrep_roll_trans(sc);
}
/* Repair the realtime bitmap file metadata. */
int
xrep_rtbitmap(
struct xfs_scrub *sc)
{
struct xchk_rtbitmap *rtb = sc->buf;
struct xfs_mount *mp = sc->mp;
unsigned long long blocks = 0;
int error;
/* Impossibly large rtbitmap means we can't touch the filesystem. */
if (rtb->rbmblocks > U32_MAX)
return 0;
/*
* If the size of the rt bitmap file is larger than what we reserved,
* figure out if we need to adjust the block reservation in the
* transaction.
*/
blocks = xfs_bmbt_calc_size(mp, rtb->rbmblocks);
if (blocks > UINT_MAX)
return -EOPNOTSUPP;
if (blocks > rtb->resblks) {
error = xfs_trans_reserve_more(sc->tp, blocks, 0);
if (error)
return error;
rtb->resblks += blocks;
}
/* Fix inode core and forks. */
error = xrep_metadata_inode_forks(sc);
if (error)
return error;
xfs_trans_ijoin(sc->tp, sc->ip, 0);
/* Ensure no unwritten extents. */
error = xrep_rtbitmap_data_mappings(sc, rtb->rbmblocks);
if (error)
return error;
/* Fix inconsistent bitmap geometry */
return xrep_rtbitmap_geometry(sc, rtb);
}
...@@ -31,6 +31,18 @@ ...@@ -31,6 +31,18 @@
* (potentially large) amount of data in pageable memory. * (potentially large) amount of data in pageable memory.
*/ */
struct xchk_rtsummary {
struct xfs_rtalloc_args args;
uint64_t rextents;
uint64_t rbmblocks;
uint64_t rsumsize;
unsigned int rsumlevels;
/* Memory buffer for the summary comparison. */
union xfs_suminfo_raw words[];
};
/* Set us up to check the rtsummary file. */ /* Set us up to check the rtsummary file. */
int int
xchk_setup_rtsummary( xchk_setup_rtsummary(
...@@ -38,8 +50,15 @@ xchk_setup_rtsummary( ...@@ -38,8 +50,15 @@ xchk_setup_rtsummary(
{ {
struct xfs_mount *mp = sc->mp; struct xfs_mount *mp = sc->mp;
char *descr; char *descr;
struct xchk_rtsummary *rts;
int error; int error;
rts = kvzalloc(struct_size(rts, words, mp->m_blockwsize),
XCHK_GFP_FLAGS);
if (!rts)
return -ENOMEM;
sc->buf = rts;
/* /*
* Create an xfile to construct a new rtsummary file. The xfile allows * Create an xfile to construct a new rtsummary file. The xfile allows
* us to avoid pinning kernel memory for this purpose. * us to avoid pinning kernel memory for this purpose.
...@@ -54,11 +73,6 @@ xchk_setup_rtsummary( ...@@ -54,11 +73,6 @@ xchk_setup_rtsummary(
if (error) if (error)
return error; return error;
/* Allocate a memory buffer for the summary comparison. */
sc->buf = kvmalloc(mp->m_sb.sb_blocksize, XCHK_GFP_FLAGS);
if (!sc->buf)
return -ENOMEM;
error = xchk_install_live_inode(sc, mp->m_rsumip); error = xchk_install_live_inode(sc, mp->m_rsumip);
if (error) if (error)
return error; return error;
...@@ -75,13 +89,29 @@ xchk_setup_rtsummary( ...@@ -75,13 +89,29 @@ xchk_setup_rtsummary(
*/ */
xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP); xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
xchk_ilock(sc, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM); xchk_ilock(sc, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM);
/*
* Now that we've locked the rtbitmap and rtsummary, we can't race with
* growfsrt trying to expand the summary or change the size of the rt
* volume. Hence it is safe to compute and check the geometry values.
*/
if (mp->m_sb.sb_rblocks) {
xfs_filblks_t rsumblocks;
int rextslog;
rts->rextents = xfs_rtb_to_rtx(mp, mp->m_sb.sb_rblocks);
rextslog = xfs_compute_rextslog(rts->rextents);
rts->rsumlevels = rextslog + 1;
rts->rbmblocks = xfs_rtbitmap_blockcount(mp, rts->rextents);
rsumblocks = xfs_rtsummary_blockcount(mp, rts->rsumlevels,
rts->rbmblocks);
rts->rsumsize = XFS_FSB_TO_B(mp, rsumblocks);
}
return 0; return 0;
} }
/* Helper functions to record suminfo words in an xfile. */ /* Helper functions to record suminfo words in an xfile. */
typedef unsigned int xchk_rtsumoff_t;
static inline int static inline int
xfsum_load( xfsum_load(
struct xfs_scrub *sc, struct xfs_scrub *sc,
...@@ -192,19 +222,29 @@ STATIC int ...@@ -192,19 +222,29 @@ STATIC int
xchk_rtsum_compare( xchk_rtsum_compare(
struct xfs_scrub *sc) struct xfs_scrub *sc)
{ {
struct xfs_rtalloc_args args = {
.mp = sc->mp,
.tp = sc->tp,
};
struct xfs_mount *mp = sc->mp;
struct xfs_bmbt_irec map; struct xfs_bmbt_irec map;
xfs_fileoff_t off; struct xfs_iext_cursor icur;
xchk_rtsumoff_t sumoff = 0;
int nmap; struct xfs_mount *mp = sc->mp;
struct xfs_inode *ip = sc->ip;
struct xchk_rtsummary *rts = sc->buf;
xfs_fileoff_t off = 0;
xfs_fileoff_t endoff;
xfs_rtsumoff_t sumoff = 0;
int error = 0;
for (off = 0; off < XFS_B_TO_FSB(mp, mp->m_rsumsize); off++) { rts->args.mp = sc->mp;
union xfs_suminfo_raw *ondisk_info; rts->args.tp = sc->tp;
int error = 0;
/* Mappings may not cross or lie beyond EOF. */
endoff = XFS_B_TO_FSB(mp, ip->i_disk_size);
if (xfs_iext_lookup_extent(ip, &ip->i_df, endoff, &icur, &map)) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, endoff);
return 0;
}
while (off < endoff) {
int nmap = 1;
if (xchk_should_terminate(sc, &error)) if (xchk_should_terminate(sc, &error))
return error; return error;
...@@ -212,8 +252,7 @@ xchk_rtsum_compare( ...@@ -212,8 +252,7 @@ xchk_rtsum_compare(
return 0; return 0;
/* Make sure we have a written extent. */ /* Make sure we have a written extent. */
nmap = 1; error = xfs_bmapi_read(ip, off, endoff - off, &map, &nmap,
error = xfs_bmapi_read(mp->m_rsumip, off, 1, &map, &nmap,
XFS_DATA_FORK); XFS_DATA_FORK);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error)) if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error))
return error; return error;
...@@ -223,24 +262,33 @@ xchk_rtsum_compare( ...@@ -223,24 +262,33 @@ xchk_rtsum_compare(
return 0; return 0;
} }
off += map.br_blockcount;
}
for (off = 0; off < endoff; off++) {
union xfs_suminfo_raw *ondisk_info;
/* Read a block's worth of ondisk rtsummary file. */ /* Read a block's worth of ondisk rtsummary file. */
error = xfs_rtsummary_read_buf(&args, off); error = xfs_rtsummary_read_buf(&rts->args, off);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error)) if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error))
return error; return error;
/* Read a block's worth of computed rtsummary file. */ /* Read a block's worth of computed rtsummary file. */
error = xfsum_copyout(sc, sumoff, sc->buf, mp->m_blockwsize); error = xfsum_copyout(sc, sumoff, rts->words, mp->m_blockwsize);
if (error) { if (error) {
xfs_rtbuf_cache_relse(&args); xfs_rtbuf_cache_relse(&rts->args);
return error; return error;
} }
ondisk_info = xfs_rsumblock_infoptr(&args, 0); ondisk_info = xfs_rsumblock_infoptr(&rts->args, 0);
if (memcmp(ondisk_info, sc->buf, if (memcmp(ondisk_info, rts->words,
mp->m_blockwsize << XFS_WORDLOG) != 0) mp->m_blockwsize << XFS_WORDLOG) != 0) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, off); xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, off);
xfs_rtbuf_cache_relse(&rts->args);
return error;
}
xfs_rtbuf_cache_relse(&args); xfs_rtbuf_cache_relse(&rts->args);
sumoff += mp->m_blockwsize; sumoff += mp->m_blockwsize;
} }
...@@ -253,8 +301,43 @@ xchk_rtsummary( ...@@ -253,8 +301,43 @@ xchk_rtsummary(
struct xfs_scrub *sc) struct xfs_scrub *sc)
{ {
struct xfs_mount *mp = sc->mp; struct xfs_mount *mp = sc->mp;
struct xchk_rtsummary *rts = sc->buf;
int error = 0; int error = 0;
/* Is sb_rextents correct? */
if (mp->m_sb.sb_rextents != rts->rextents) {
xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
goto out_rbm;
}
/* Is m_rsumlevels correct? */
if (mp->m_rsumlevels != rts->rsumlevels) {
xchk_ino_set_corrupt(sc, mp->m_rsumip->i_ino);
goto out_rbm;
}
/* Is m_rsumsize correct? */
if (mp->m_rsumsize != rts->rsumsize) {
xchk_ino_set_corrupt(sc, mp->m_rsumip->i_ino);
goto out_rbm;
}
/* The summary file length must be aligned to an fsblock. */
if (mp->m_rsumip->i_disk_size & mp->m_blockmask) {
xchk_ino_set_corrupt(sc, mp->m_rsumip->i_ino);
goto out_rbm;
}
/*
* Is the summary file itself large enough to handle the rt volume?
* growfsrt expands the summary file before updating sb_rextents, so
* the file can be larger than rsumsize.
*/
if (mp->m_rsumip->i_disk_size < rts->rsumsize) {
xchk_ino_set_corrupt(sc, mp->m_rsumip->i_ino);
goto out_rbm;
}
/* Invoke the fork scrubber. */ /* Invoke the fork scrubber. */
error = xchk_metadata_inode_forks(sc); error = xchk_metadata_inode_forks(sc);
if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
......
...@@ -328,14 +328,12 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { ...@@ -328,14 +328,12 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.type = ST_FS, .type = ST_FS,
.setup = xchk_setup_rtbitmap, .setup = xchk_setup_rtbitmap,
.scrub = xchk_rtbitmap, .scrub = xchk_rtbitmap,
.has = xfs_has_realtime, .repair = xrep_rtbitmap,
.repair = xrep_notsupported,
}, },
[XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */ [XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */
.type = ST_FS, .type = ST_FS,
.setup = xchk_setup_rtsummary, .setup = xchk_setup_rtsummary,
.scrub = xchk_rtsummary, .scrub = xchk_rtsummary,
.has = xfs_has_realtime,
.repair = xrep_notsupported, .repair = xrep_notsupported,
}, },
[XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */ [XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */
......
...@@ -41,12 +41,6 @@ ...@@ -41,12 +41,6 @@
struct kmem_cache *xfs_inode_cache; struct kmem_cache *xfs_inode_cache;
/*
* Used in xfs_itruncate_extents(). This is the maximum number of extents
* freed from a file in a single transaction.
*/
#define XFS_ITRUNC_MAX_EXTENTS 2
STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *); STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *);
STATIC int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag, STATIC int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag,
struct xfs_inode *); struct xfs_inode *);
...@@ -1346,7 +1340,6 @@ xfs_itruncate_extents_flags( ...@@ -1346,7 +1340,6 @@ xfs_itruncate_extents_flags(
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp = *tpp; struct xfs_trans *tp = *tpp;
xfs_fileoff_t first_unmap_block; xfs_fileoff_t first_unmap_block;
xfs_filblks_t unmap_len;
int error = 0; int error = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
...@@ -1378,19 +1371,10 @@ xfs_itruncate_extents_flags( ...@@ -1378,19 +1371,10 @@ xfs_itruncate_extents_flags(
return 0; return 0;
} }
unmap_len = XFS_MAX_FILEOFF - first_unmap_block + 1; error = xfs_bunmapi_range(&tp, ip, flags, first_unmap_block,
while (unmap_len > 0) { XFS_MAX_FILEOFF);
ASSERT(tp->t_highest_agno == NULLAGNUMBER); if (error)
error = __xfs_bunmapi(tp, ip, first_unmap_block, &unmap_len, goto out;
flags, XFS_ITRUNC_MAX_EXTENTS);
if (error)
goto out;
/* free the just unmapped extents */
error = xfs_defer_finish(&tp);
if (error)
goto out;
}
if (whichfork == XFS_DATA_FORK) { if (whichfork == XFS_DATA_FORK) {
/* Remove all pending CoW reservations. */ /* Remove all pending CoW reservations. */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment