Commit 5f3e9511 authored by Chandan Babu R's avatar Chandan Babu R

Merge tag 'repair-xattrs-6.10_2024-04-15' of...

Merge tag 'repair-xattrs-6.10_2024-04-15' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.10-mergeA

xfs: online repair of extended attributes

This series employs atomic extent swapping to enable safe reconstruction
of extended attribute data attached to a file.  Because xattrs do not
have any redundant information to draw off of, we can at best salvage
as much data as we can and build a new structure.

Rebuilding an extended attribute structure consists of these three
steps:

First, we walk the existing attributes to salvage as many of them as we
can, by adding them as new attributes attached to the repair tempfile.
We need to add a new xfile-based data structure to hold blobs of
arbitrary length to stage the xattr names and values.

Second, we write the salvaged attributes to a temporary file, and use
atomic extent swaps to exchange the entire attribute fork between the
two files.

Finally, we reap the old xattr blocks (which are now in the temporary
file) as carefully as we can.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChandan Babu R <chandanbabu@kernel.org>

* tag 'repair-xattrs-6.10_2024-04-15' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux:
  xfs: create an xattr iteration function for scrub
  xfs: flag empty xattr leaf blocks for optimization
  xfs: scrub should set preen if attr leaf has holes
  xfs: repair extended attributes
  xfs: use atomic extent swapping to fix user file fork data
  xfs: create a blob array data structure
  xfs: enable discarding of folios backing an xfile
parents fb1f7c66 6c631e79
...@@ -165,6 +165,7 @@ xfs-y += $(addprefix scrub/, \ ...@@ -165,6 +165,7 @@ xfs-y += $(addprefix scrub/, \
ialloc.o \ ialloc.o \
inode.o \ inode.o \
iscan.o \ iscan.o \
listxattr.o \
nlinks.o \ nlinks.o \
parent.o \ parent.o \
readdir.o \ readdir.o \
...@@ -194,6 +195,7 @@ ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y) ...@@ -194,6 +195,7 @@ ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
xfs-y += $(addprefix scrub/, \ xfs-y += $(addprefix scrub/, \
agheader_repair.o \ agheader_repair.o \
alloc_repair.o \ alloc_repair.o \
attr_repair.o \
bmap_repair.o \ bmap_repair.o \
cow_repair.o \ cow_repair.o \
fscounters_repair.o \ fscounters_repair.o \
...@@ -208,6 +210,7 @@ xfs-y += $(addprefix scrub/, \ ...@@ -208,6 +210,7 @@ xfs-y += $(addprefix scrub/, \
repair.o \ repair.o \
rmap_repair.o \ rmap_repair.o \
tempfile.o \ tempfile.o \
xfblob.o \
) )
xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \ xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
......
...@@ -1055,7 +1055,7 @@ xfs_attr_set( ...@@ -1055,7 +1055,7 @@ xfs_attr_set(
* External routines when attribute list is inside the inode * External routines when attribute list is inside the inode
*========================================================================*/ *========================================================================*/
static inline int xfs_attr_sf_totsize(struct xfs_inode *dp) int xfs_attr_sf_totsize(struct xfs_inode *dp)
{ {
struct xfs_attr_sf_hdr *sf = dp->i_af.if_data; struct xfs_attr_sf_hdr *sf = dp->i_af.if_data;
......
...@@ -618,4 +618,6 @@ extern struct kmem_cache *xfs_attr_intent_cache; ...@@ -618,4 +618,6 @@ extern struct kmem_cache *xfs_attr_intent_cache;
int __init xfs_attr_intent_init_cache(void); int __init xfs_attr_intent_init_cache(void);
void xfs_attr_intent_destroy_cache(void); void xfs_attr_intent_destroy_cache(void);
int xfs_attr_sf_totsize(struct xfs_inode *dp);
#endif /* __XFS_ATTR_H__ */ #endif /* __XFS_ATTR_H__ */
...@@ -721,6 +721,11 @@ struct xfs_attr3_leafblock { ...@@ -721,6 +721,11 @@ struct xfs_attr3_leafblock {
#define XFS_ATTR_INCOMPLETE (1u << XFS_ATTR_INCOMPLETE_BIT) #define XFS_ATTR_INCOMPLETE (1u << XFS_ATTR_INCOMPLETE_BIT)
#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE) #define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE)
#define XFS_ATTR_NAMESPACE_STR \
{ XFS_ATTR_LOCAL, "local" }, \
{ XFS_ATTR_ROOT, "root" }, \
{ XFS_ATTR_SECURE, "secure" }
/* /*
* Alignment for namelist and valuelist entries (since they are mixed * Alignment for namelist and valuelist entries (since they are mixed
* there can be only one alignment value) * there can be only one alignment value)
......
...@@ -675,7 +675,7 @@ xfs_exchmaps_rmapbt_blocks( ...@@ -675,7 +675,7 @@ xfs_exchmaps_rmapbt_blocks(
} }
/* Estimate the bmbt and rmapbt overhead required to exchange mappings. */ /* Estimate the bmbt and rmapbt overhead required to exchange mappings. */
static int int
xfs_exchmaps_estimate_overhead( xfs_exchmaps_estimate_overhead(
struct xfs_exchmaps_req *req) struct xfs_exchmaps_req *req)
{ {
......
...@@ -97,6 +97,7 @@ xfs_exchmaps_reqfork(const struct xfs_exchmaps_req *req) ...@@ -97,6 +97,7 @@ xfs_exchmaps_reqfork(const struct xfs_exchmaps_req *req)
return XFS_DATA_FORK; return XFS_DATA_FORK;
} }
int xfs_exchmaps_estimate_overhead(struct xfs_exchmaps_req *req);
int xfs_exchmaps_estimate(struct xfs_exchmaps_req *req); int xfs_exchmaps_estimate(struct xfs_exchmaps_req *req);
extern struct kmem_cache *xfs_exchmaps_intent_cache; extern struct kmem_cache *xfs_exchmaps_intent_cache;
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "xfs_trans_resv.h" #include "xfs_trans_resv.h"
#include "xfs_mount.h" #include "xfs_mount.h"
#include "xfs_log_format.h" #include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_inode.h" #include "xfs_inode.h"
#include "xfs_da_format.h" #include "xfs_da_format.h"
#include "xfs_da_btree.h" #include "xfs_da_btree.h"
...@@ -20,6 +21,8 @@ ...@@ -20,6 +21,8 @@
#include "scrub/common.h" #include "scrub/common.h"
#include "scrub/dabtree.h" #include "scrub/dabtree.h"
#include "scrub/attr.h" #include "scrub/attr.h"
#include "scrub/listxattr.h"
#include "scrub/repair.h"
/* Free the buffers linked from the xattr buffer. */ /* Free the buffers linked from the xattr buffer. */
static void static void
...@@ -35,6 +38,8 @@ xchk_xattr_buf_cleanup( ...@@ -35,6 +38,8 @@ xchk_xattr_buf_cleanup(
kvfree(ab->value); kvfree(ab->value);
ab->value = NULL; ab->value = NULL;
ab->value_sz = 0; ab->value_sz = 0;
kvfree(ab->name);
ab->name = NULL;
} }
/* /*
...@@ -65,7 +70,7 @@ xchk_xattr_want_freemap( ...@@ -65,7 +70,7 @@ xchk_xattr_want_freemap(
* reallocating the buffer if necessary. Buffer contents are not preserved * reallocating the buffer if necessary. Buffer contents are not preserved
* across a reallocation. * across a reallocation.
*/ */
static int int
xchk_setup_xattr_buf( xchk_setup_xattr_buf(
struct xfs_scrub *sc, struct xfs_scrub *sc,
size_t value_size) size_t value_size)
...@@ -95,6 +100,12 @@ xchk_setup_xattr_buf( ...@@ -95,6 +100,12 @@ xchk_setup_xattr_buf(
return -ENOMEM; return -ENOMEM;
} }
if (xchk_could_repair(sc)) {
ab->name = kvmalloc(XATTR_NAME_MAX + 1, XCHK_GFP_FLAGS);
if (!ab->name)
return -ENOMEM;
}
resize_value: resize_value:
if (ab->value_sz >= value_size) if (ab->value_sz >= value_size)
return 0; return 0;
...@@ -121,6 +132,12 @@ xchk_setup_xattr( ...@@ -121,6 +132,12 @@ xchk_setup_xattr(
{ {
int error; int error;
if (xchk_could_repair(sc)) {
error = xrep_setup_xattr(sc);
if (error)
return error;
}
/* /*
* We failed to get memory while checking attrs, so this time try to * We failed to get memory while checking attrs, so this time try to
* get all the memory we're ever going to need. Allocate the buffer * get all the memory we're ever going to need. Allocate the buffer
...@@ -137,90 +154,81 @@ xchk_setup_xattr( ...@@ -137,90 +154,81 @@ xchk_setup_xattr(
/* Extended Attributes */ /* Extended Attributes */
struct xchk_xattr {
struct xfs_attr_list_context context;
struct xfs_scrub *sc;
};
/* /*
* Check that an extended attribute key can be looked up by hash. * Check that an extended attribute key can be looked up by hash.
* *
* We use the XFS attribute list iterator (i.e. xfs_attr_list_ilocked) * We use the extended attribute walk helper to call this function for every
* to call this function for every attribute key in an inode. Once * attribute key in an inode. Once we're here, we load the attribute value to
* we're here, we load the attribute value to see if any errors happen, * see if any errors happen, or if we get more or less data than we expected.
* or if we get more or less data than we expected.
*/ */
static void static int
xchk_xattr_listent( xchk_xattr_actor(
struct xfs_attr_list_context *context, struct xfs_scrub *sc,
int flags, struct xfs_inode *ip,
unsigned char *name, unsigned int attr_flags,
int namelen, const unsigned char *name,
int valuelen) unsigned int namelen,
const void *value,
unsigned int valuelen,
void *priv)
{ {
struct xfs_da_args args = { struct xfs_da_args args = {
.op_flags = XFS_DA_OP_NOTIME, .op_flags = XFS_DA_OP_NOTIME,
.attr_filter = flags & XFS_ATTR_NSP_ONDISK_MASK, .attr_filter = attr_flags & XFS_ATTR_NSP_ONDISK_MASK,
.geo = context->dp->i_mount->m_attr_geo, .geo = sc->mp->m_attr_geo,
.whichfork = XFS_ATTR_FORK, .whichfork = XFS_ATTR_FORK,
.dp = context->dp, .dp = ip,
.name = name, .name = name,
.namelen = namelen, .namelen = namelen,
.hashval = xfs_da_hashname(name, namelen), .hashval = xfs_da_hashname(name, namelen),
.trans = context->tp, .trans = sc->tp,
.valuelen = valuelen, .valuelen = valuelen,
.owner = context->dp->i_ino, .owner = ip->i_ino,
}; };
struct xchk_xattr_buf *ab; struct xchk_xattr_buf *ab;
struct xchk_xattr *sx;
int error = 0; int error = 0;
sx = container_of(context, struct xchk_xattr, context); ab = sc->buf;
ab = sx->sc->buf;
if (xchk_should_terminate(sx->sc, &error)) { if (xchk_should_terminate(sc, &error))
context->seen_enough = error; return error;
return;
}
if (flags & XFS_ATTR_INCOMPLETE) { if (attr_flags & XFS_ATTR_INCOMPLETE) {
/* Incomplete attr key, just mark the inode for preening. */ /* Incomplete attr key, just mark the inode for preening. */
xchk_ino_set_preen(sx->sc, context->dp->i_ino); xchk_ino_set_preen(sc, ip->i_ino);
return; return 0;
} }
/* Only one namespace bit allowed. */ /* Only one namespace bit allowed. */
if (hweight32(flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) { if (hweight32(attr_flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) {
xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, args.blkno); xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, args.blkno);
goto fail_xref; return -ECANCELED;
} }
/* Does this name make sense? */ /* Does this name make sense? */
if (!xfs_attr_namecheck(name, namelen)) { if (!xfs_attr_namecheck(name, namelen)) {
xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, args.blkno); xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, args.blkno);
goto fail_xref; return -ECANCELED;
} }
/* /*
* Local xattr values are stored in the attr leaf block, so we don't * Local and shortform xattr values are stored in the attr leaf block,
* need to retrieve the value from a remote block to detect corruption * so we don't need to retrieve the value from a remote block to detect
* problems. * corruption problems.
*/ */
if (flags & XFS_ATTR_LOCAL) if (value)
goto fail_xref; return 0;
/* /*
* Try to allocate enough memory to extrat the attr value. If that * Try to allocate enough memory to extract the attr value. If that
* doesn't work, we overload the seen_enough variable to convey * doesn't work, return -EDEADLOCK as a signal to try again with a
* the error message back to the main scrub function. * maximally sized buffer.
*/ */
error = xchk_setup_xattr_buf(sx->sc, valuelen); error = xchk_setup_xattr_buf(sc, valuelen);
if (error == -ENOMEM) if (error == -ENOMEM)
error = -EDEADLOCK; error = -EDEADLOCK;
if (error) { if (error)
context->seen_enough = error; return error;
return;
}
args.value = ab->value; args.value = ab->value;
...@@ -228,16 +236,13 @@ xchk_xattr_listent( ...@@ -228,16 +236,13 @@ xchk_xattr_listent(
/* ENODATA means the hash lookup failed and the attr is bad */ /* ENODATA means the hash lookup failed and the attr is bad */
if (error == -ENODATA) if (error == -ENODATA)
error = -EFSCORRUPTED; error = -EFSCORRUPTED;
if (!xchk_fblock_process_error(sx->sc, XFS_ATTR_FORK, args.blkno, if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, args.blkno,
&error)) &error))
goto fail_xref; return error;
if (args.valuelen != valuelen) if (args.valuelen != valuelen)
xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, args.blkno);
args.blkno);
fail_xref: return 0;
if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
context->seen_enough = 1;
return;
} }
/* /*
...@@ -247,7 +252,7 @@ xchk_xattr_listent( ...@@ -247,7 +252,7 @@ xchk_xattr_listent(
* Within a char, the lowest bit of the char represents the byte with * Within a char, the lowest bit of the char represents the byte with
* the smallest address * the smallest address
*/ */
STATIC bool bool
xchk_xattr_set_map( xchk_xattr_set_map(
struct xfs_scrub *sc, struct xfs_scrub *sc,
unsigned long *map, unsigned long *map,
...@@ -404,6 +409,17 @@ xchk_xattr_block( ...@@ -404,6 +409,17 @@ xchk_xattr_block(
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
hdrsize = xfs_attr3_leaf_hdr_size(leaf); hdrsize = xfs_attr3_leaf_hdr_size(leaf);
/*
* Empty xattr leaf blocks mapped at block 0 are probably a byproduct
* of a race between setxattr and a log shutdown. Anywhere else in the
* attr fork is a corruption.
*/
if (leafhdr.count == 0) {
if (blk->blkno == 0)
xchk_da_set_preen(ds, level);
else
xchk_da_set_corrupt(ds, level);
}
if (leafhdr.usedbytes > mp->m_attr_geo->blksize) if (leafhdr.usedbytes > mp->m_attr_geo->blksize)
xchk_da_set_corrupt(ds, level); xchk_da_set_corrupt(ds, level);
if (leafhdr.firstused > mp->m_attr_geo->blksize) if (leafhdr.firstused > mp->m_attr_geo->blksize)
...@@ -412,6 +428,8 @@ xchk_xattr_block( ...@@ -412,6 +428,8 @@ xchk_xattr_block(
xchk_da_set_corrupt(ds, level); xchk_da_set_corrupt(ds, level);
if (!xchk_xattr_set_map(ds->sc, ab->usedmap, 0, hdrsize)) if (!xchk_xattr_set_map(ds->sc, ab->usedmap, 0, hdrsize))
xchk_da_set_corrupt(ds, level); xchk_da_set_corrupt(ds, level);
if (leafhdr.holes)
xchk_da_set_preen(ds, level);
if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
goto out; goto out;
...@@ -589,16 +607,6 @@ int ...@@ -589,16 +607,6 @@ int
xchk_xattr( xchk_xattr(
struct xfs_scrub *sc) struct xfs_scrub *sc)
{ {
struct xchk_xattr sx = {
.sc = sc,
.context = {
.dp = sc->ip,
.tp = sc->tp,
.resynch = 1,
.put_listent = xchk_xattr_listent,
.allow_incomplete = true,
},
};
xfs_dablk_t last_checked = -1U; xfs_dablk_t last_checked = -1U;
int error = 0; int error = 0;
...@@ -627,12 +635,6 @@ xchk_xattr( ...@@ -627,12 +635,6 @@ xchk_xattr(
/* /*
* Look up every xattr in this file by name and hash. * Look up every xattr in this file by name and hash.
* *
* Use the backend implementation of xfs_attr_list to call
* xchk_xattr_listent on every attribute key in this inode.
* In other words, we use the same iterator/callback mechanism
* that listattr uses to scrub extended attributes, though in our
* _listent function, we check the value of the attribute.
*
* The VFS only locks i_rwsem when modifying attrs, so keep all * The VFS only locks i_rwsem when modifying attrs, so keep all
* three locks held because that's the only way to ensure we're * three locks held because that's the only way to ensure we're
* the only thread poking into the da btree. We traverse the da * the only thread poking into the da btree. We traverse the da
...@@ -640,13 +642,9 @@ xchk_xattr( ...@@ -640,13 +642,9 @@ xchk_xattr(
* iteration, which doesn't really follow the usual buffer * iteration, which doesn't really follow the usual buffer
* locking order. * locking order.
*/ */
error = xfs_attr_list_ilocked(&sx.context); error = xchk_xattr_walk(sc, sc->ip, xchk_xattr_actor, NULL);
if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error)) if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error))
return error; return error;
/* Did our listent function try to return any errors? */
if (sx.context.seen_enough < 0)
return sx.context.seen_enough;
return 0; return 0;
} }
...@@ -16,9 +16,16 @@ struct xchk_xattr_buf { ...@@ -16,9 +16,16 @@ struct xchk_xattr_buf {
/* Bitmap of free space in xattr leaf blocks. */ /* Bitmap of free space in xattr leaf blocks. */
unsigned long *freemap; unsigned long *freemap;
/* Memory buffer used to hold salvaged xattr names. */
unsigned char *name;
/* Memory buffer used to extract xattr values. */ /* Memory buffer used to extract xattr values. */
void *value; void *value;
size_t value_sz; size_t value_sz;
}; };
bool xchk_xattr_set_map(struct xfs_scrub *sc, unsigned long *map,
unsigned int start, unsigned int len);
int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size);
#endif /* __XFS_SCRUB_ATTR_H__ */ #endif /* __XFS_SCRUB_ATTR_H__ */
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2018-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_SCRUB_ATTR_REPAIR_H__
#define __XFS_SCRUB_ATTR_REPAIR_H__
int xrep_xattr_reset_fork(struct xfs_scrub *sc);
#endif /* __XFS_SCRUB_ATTR_REPAIR_H__ */
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_SCRUB_DAB_BITMAP_H__
#define __XFS_SCRUB_DAB_BITMAP_H__
/* Bitmaps, but for type-checked for xfs_dablk_t */
struct xdab_bitmap {
struct xbitmap32 dabitmap;
};
static inline void xdab_bitmap_init(struct xdab_bitmap *bitmap)
{
xbitmap32_init(&bitmap->dabitmap);
}
static inline void xdab_bitmap_destroy(struct xdab_bitmap *bitmap)
{
xbitmap32_destroy(&bitmap->dabitmap);
}
static inline int xdab_bitmap_set(struct xdab_bitmap *bitmap,
xfs_dablk_t dabno, xfs_extlen_t len)
{
return xbitmap32_set(&bitmap->dabitmap, dabno, len);
}
static inline bool xdab_bitmap_test(struct xdab_bitmap *bitmap,
xfs_dablk_t dabno, xfs_extlen_t *len)
{
return xbitmap32_test(&bitmap->dabitmap, dabno, len);
}
#endif /* __XFS_SCRUB_DAB_BITMAP_H__ */
...@@ -78,6 +78,22 @@ xchk_da_set_corrupt( ...@@ -78,6 +78,22 @@ xchk_da_set_corrupt(
__return_address); __return_address);
} }
/* Flag a da btree node in need of optimization. */
void
xchk_da_set_preen(
struct xchk_da_btree *ds,
int level)
{
struct xfs_scrub *sc = ds->sc;
sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
trace_xchk_fblock_preen(sc, ds->dargs.whichfork,
xfs_dir2_da_to_db(ds->dargs.geo,
ds->state->path.blk[level].blkno),
__return_address);
}
/* Find an entry at a certain level in a da btree. */
static struct xfs_da_node_entry * static struct xfs_da_node_entry *
xchk_da_btree_node_entry( xchk_da_btree_node_entry(
struct xchk_da_btree *ds, struct xchk_da_btree *ds,
......
...@@ -35,6 +35,9 @@ bool xchk_da_process_error(struct xchk_da_btree *ds, int level, int *error); ...@@ -35,6 +35,9 @@ bool xchk_da_process_error(struct xchk_da_btree *ds, int level, int *error);
/* Check for da btree corruption. */ /* Check for da btree corruption. */
void xchk_da_set_corrupt(struct xchk_da_btree *ds, int level); void xchk_da_set_corrupt(struct xchk_da_btree *ds, int level);
void xchk_da_set_preen(struct xchk_da_btree *ds, int level);
void xchk_da_set_preen(struct xchk_da_btree *ds, int level);
int xchk_da_btree_hash(struct xchk_da_btree *ds, int level, __be32 *hashp); int xchk_da_btree_hash(struct xchk_da_btree *ds, int level, __be32 *hashp);
int xchk_da_btree(struct xfs_scrub *sc, int whichfork, int xchk_da_btree(struct xfs_scrub *sc, int whichfork,
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
#include "xfs_attr_sf.h"
#include "xfs_trans.h"
#include "scrub/scrub.h"
#include "scrub/bitmap.h"
#include "scrub/dab_bitmap.h"
#include "scrub/listxattr.h"
/* Call a function for every entry in a shortform xattr structure. */
STATIC int
xchk_xattr_walk_sf(
struct xfs_scrub *sc,
struct xfs_inode *ip,
xchk_xattr_fn attr_fn,
void *priv)
{
struct xfs_attr_sf_hdr *hdr = ip->i_af.if_data;
struct xfs_attr_sf_entry *sfe;
unsigned int i;
int error;
sfe = xfs_attr_sf_firstentry(hdr);
for (i = 0; i < hdr->count; i++) {
error = attr_fn(sc, ip, sfe->flags, sfe->nameval, sfe->namelen,
&sfe->nameval[sfe->namelen], sfe->valuelen,
priv);
if (error)
return error;
sfe = xfs_attr_sf_nextentry(sfe);
}
return 0;
}
/* Call a function for every entry in this xattr leaf block. */
STATIC int
xchk_xattr_walk_leaf_entries(
struct xfs_scrub *sc,
struct xfs_inode *ip,
xchk_xattr_fn attr_fn,
struct xfs_buf *bp,
void *priv)
{
struct xfs_attr3_icleaf_hdr ichdr;
struct xfs_mount *mp = sc->mp;
struct xfs_attr_leafblock *leaf = bp->b_addr;
struct xfs_attr_leaf_entry *entry;
unsigned int i;
int error;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
entry = xfs_attr3_leaf_entryp(leaf);
for (i = 0; i < ichdr.count; entry++, i++) {
void *value;
unsigned char *name;
unsigned int namelen, valuelen;
if (entry->flags & XFS_ATTR_LOCAL) {
struct xfs_attr_leaf_name_local *name_loc;
name_loc = xfs_attr3_leaf_name_local(leaf, i);
name = name_loc->nameval;
namelen = name_loc->namelen;
value = &name_loc->nameval[name_loc->namelen];
valuelen = be16_to_cpu(name_loc->valuelen);
} else {
struct xfs_attr_leaf_name_remote *name_rmt;
name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
name = name_rmt->name;
namelen = name_rmt->namelen;
value = NULL;
valuelen = be32_to_cpu(name_rmt->valuelen);
}
error = attr_fn(sc, ip, entry->flags, name, namelen, value,
valuelen, priv);
if (error)
return error;
}
return 0;
}
/*
* Call a function for every entry in a leaf-format xattr structure. Avoid
* memory allocations for the loop detector since there's only one block.
*/
STATIC int
xchk_xattr_walk_leaf(
struct xfs_scrub *sc,
struct xfs_inode *ip,
xchk_xattr_fn attr_fn,
void *priv)
{
struct xfs_buf *leaf_bp;
int error;
error = xfs_attr3_leaf_read(sc->tp, ip, ip->i_ino, 0, &leaf_bp);
if (error)
return error;
error = xchk_xattr_walk_leaf_entries(sc, ip, attr_fn, leaf_bp, priv);
xfs_trans_brelse(sc->tp, leaf_bp);
return error;
}
/* Find the leftmost leaf in the xattr dabtree. */
STATIC int
xchk_xattr_find_leftmost_leaf(
struct xfs_scrub *sc,
struct xfs_inode *ip,
struct xdab_bitmap *seen_dablks,
struct xfs_buf **leaf_bpp)
{
struct xfs_da3_icnode_hdr nodehdr;
struct xfs_mount *mp = sc->mp;
struct xfs_trans *tp = sc->tp;
struct xfs_da_intnode *node;
struct xfs_da_node_entry *btree;
struct xfs_buf *bp;
xfs_failaddr_t fa;
xfs_dablk_t blkno = 0;
unsigned int expected_level = 0;
int error;
for (;;) {
xfs_extlen_t len = 1;
uint16_t magic;
/* Make sure we haven't seen this new block already. */
if (xdab_bitmap_test(seen_dablks, blkno, &len))
return -EFSCORRUPTED;
error = xfs_da3_node_read(tp, ip, blkno, &bp, XFS_ATTR_FORK);
if (error)
return error;
node = bp->b_addr;
magic = be16_to_cpu(node->hdr.info.magic);
if (magic == XFS_ATTR_LEAF_MAGIC ||
magic == XFS_ATTR3_LEAF_MAGIC)
break;
error = -EFSCORRUPTED;
if (magic != XFS_DA_NODE_MAGIC &&
magic != XFS_DA3_NODE_MAGIC)
goto out_buf;
fa = xfs_da3_node_header_check(bp, ip->i_ino);
if (fa)
goto out_buf;
xfs_da3_node_hdr_from_disk(mp, &nodehdr, node);
if (nodehdr.count == 0 || nodehdr.level >= XFS_DA_NODE_MAXDEPTH)
goto out_buf;
/* Check the level from the root node. */
if (blkno == 0)
expected_level = nodehdr.level - 1;
else if (expected_level != nodehdr.level)
goto out_buf;
else
expected_level--;
/* Remember that we've seen this node. */
error = xdab_bitmap_set(seen_dablks, blkno, 1);
if (error)
goto out_buf;
/* Find the next level towards the leaves of the dabtree. */
btree = nodehdr.btree;
blkno = be32_to_cpu(btree->before);
xfs_trans_brelse(tp, bp);
}
error = -EFSCORRUPTED;
fa = xfs_attr3_leaf_header_check(bp, ip->i_ino);
if (fa)
goto out_buf;
if (expected_level != 0)
goto out_buf;
/* Remember that we've seen this leaf. */
error = xdab_bitmap_set(seen_dablks, blkno, 1);
if (error)
goto out_buf;
*leaf_bpp = bp;
return 0;
out_buf:
xfs_trans_brelse(tp, bp);
return error;
}
/* Call a function for every entry in a node-format xattr structure. */
STATIC int
xchk_xattr_walk_node(
struct xfs_scrub *sc,
struct xfs_inode *ip,
xchk_xattr_fn attr_fn,
void *priv)
{
struct xfs_attr3_icleaf_hdr leafhdr;
struct xdab_bitmap seen_dablks;
struct xfs_mount *mp = sc->mp;
struct xfs_attr_leafblock *leaf;
struct xfs_buf *leaf_bp;
int error;
xdab_bitmap_init(&seen_dablks);
error = xchk_xattr_find_leftmost_leaf(sc, ip, &seen_dablks, &leaf_bp);
if (error)
goto out_bitmap;
for (;;) {
xfs_extlen_t len;
error = xchk_xattr_walk_leaf_entries(sc, ip, attr_fn, leaf_bp,
priv);
if (error)
goto out_leaf;
/* Find the right sibling of this leaf block. */
leaf = leaf_bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
if (leafhdr.forw == 0)
goto out_leaf;
xfs_trans_brelse(sc->tp, leaf_bp);
/* Make sure we haven't seen this new leaf already. */
len = 1;
if (xdab_bitmap_test(&seen_dablks, leafhdr.forw, &len)) {
error = -EFSCORRUPTED;
goto out_bitmap;
}
error = xfs_attr3_leaf_read(sc->tp, ip, ip->i_ino,
leafhdr.forw, &leaf_bp);
if (error)
goto out_bitmap;
/* Remember that we've seen this new leaf. */
error = xdab_bitmap_set(&seen_dablks, leafhdr.forw, 1);
if (error)
goto out_leaf;
}
out_leaf:
xfs_trans_brelse(sc->tp, leaf_bp);
out_bitmap:
xdab_bitmap_destroy(&seen_dablks);
return error;
}
/*
* Call a function for every extended attribute in a file.
*
* Callers must hold the ILOCK. No validation or cursor restarts allowed.
* Returns -EFSCORRUPTED on any problem, including loops in the dabtree.
*/
int
xchk_xattr_walk(
struct xfs_scrub *sc,
struct xfs_inode *ip,
xchk_xattr_fn attr_fn,
void *priv)
{
int error;
xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
if (!xfs_inode_hasattr(ip))
return 0;
if (ip->i_af.if_format == XFS_DINODE_FMT_LOCAL)
return xchk_xattr_walk_sf(sc, ip, attr_fn, priv);
/* attr functions require that the attr fork is loaded */
error = xfs_iread_extents(sc->tp, ip, XFS_ATTR_FORK);
if (error)
return error;
if (xfs_attr_is_leaf(ip))
return xchk_xattr_walk_leaf(sc, ip, attr_fn, priv);
return xchk_xattr_walk_node(sc, ip, attr_fn, priv);
}
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_SCRUB_LISTXATTR_H__
#define __XFS_SCRUB_LISTXATTR_H__
typedef int (*xchk_xattr_fn)(struct xfs_scrub *sc, struct xfs_inode *ip,
unsigned int attr_flags, const unsigned char *name,
unsigned int namelen, const void *value, unsigned int valuelen,
void *priv);
int xchk_xattr_walk(struct xfs_scrub *sc, struct xfs_inode *ip,
xchk_xattr_fn attr_fn, void *priv);
#endif /* __XFS_SCRUB_LISTXATTR_H__ */
...@@ -32,6 +32,9 @@ ...@@ -32,6 +32,9 @@
#include "xfs_reflink.h" #include "xfs_reflink.h"
#include "xfs_health.h" #include "xfs_health.h"
#include "xfs_buf_mem.h" #include "xfs_buf_mem.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "scrub/scrub.h" #include "scrub/scrub.h"
#include "scrub/common.h" #include "scrub/common.h"
#include "scrub/trace.h" #include "scrub/trace.h"
...@@ -39,6 +42,7 @@ ...@@ -39,6 +42,7 @@
#include "scrub/bitmap.h" #include "scrub/bitmap.h"
#include "scrub/stats.h" #include "scrub/stats.h"
#include "scrub/xfile.h" #include "scrub/xfile.h"
#include "scrub/attr_repair.h"
/* /*
* Attempt to repair some metadata, if the metadata is corrupt and userspace * Attempt to repair some metadata, if the metadata is corrupt and userspace
...@@ -1136,6 +1140,17 @@ xrep_metadata_inode_forks( ...@@ -1136,6 +1140,17 @@ xrep_metadata_inode_forks(
return error; return error;
} }
/* Clear the attr forks since metadata shouldn't have that. */
if (xfs_inode_hasattr(sc->ip)) {
if (!dirty) {
dirty = true;
xfs_trans_ijoin(sc->tp, sc->ip, 0);
}
error = xrep_xattr_reset_fork(sc);
if (error)
return error;
}
/* /*
* If we modified the inode, roll the transaction but don't rejoin the * If we modified the inode, roll the transaction but don't rejoin the
* inode to the new transaction because xrep_bmap_data can do that. * inode to the new transaction because xrep_bmap_data can do that.
...@@ -1201,3 +1216,34 @@ xrep_trans_cancel_hook_dummy( ...@@ -1201,3 +1216,34 @@ xrep_trans_cancel_hook_dummy(
current->journal_info = *cookiep; current->journal_info = *cookiep;
*cookiep = NULL; *cookiep = NULL;
} }
/*
* See if this buffer can pass the given ->verify_struct() function.
*
* If the buffer already has ops attached and they're not the ones that were
* passed in, we reject the buffer. Otherwise, we perform the structure test
* (note that we do not check CRCs) and return the outcome of the test. The
* buffer ops and error state are left unchanged.
*/
bool
xrep_buf_verify_struct(
struct xfs_buf *bp,
const struct xfs_buf_ops *ops)
{
const struct xfs_buf_ops *old_ops = bp->b_ops;
xfs_failaddr_t fa;
int old_error;
if (old_ops) {
if (old_ops != ops)
return false;
}
old_error = bp->b_error;
bp->b_ops = ops;
fa = bp->b_ops->verify_struct(bp);
bp->b_ops = old_ops;
bp->b_error = old_error;
return fa == NULL;
}
...@@ -90,6 +90,7 @@ int xrep_bmap(struct xfs_scrub *sc, int whichfork, bool allow_unwritten); ...@@ -90,6 +90,7 @@ int xrep_bmap(struct xfs_scrub *sc, int whichfork, bool allow_unwritten);
int xrep_metadata_inode_forks(struct xfs_scrub *sc); int xrep_metadata_inode_forks(struct xfs_scrub *sc);
int xrep_setup_ag_rmapbt(struct xfs_scrub *sc); int xrep_setup_ag_rmapbt(struct xfs_scrub *sc);
int xrep_setup_ag_refcountbt(struct xfs_scrub *sc); int xrep_setup_ag_refcountbt(struct xfs_scrub *sc);
int xrep_setup_xattr(struct xfs_scrub *sc);
/* Repair setup functions */ /* Repair setup functions */
int xrep_setup_ag_allocbt(struct xfs_scrub *sc); int xrep_setup_ag_allocbt(struct xfs_scrub *sc);
...@@ -123,6 +124,7 @@ int xrep_bmap_attr(struct xfs_scrub *sc); ...@@ -123,6 +124,7 @@ int xrep_bmap_attr(struct xfs_scrub *sc);
int xrep_bmap_cow(struct xfs_scrub *sc); int xrep_bmap_cow(struct xfs_scrub *sc);
int xrep_nlinks(struct xfs_scrub *sc); int xrep_nlinks(struct xfs_scrub *sc);
int xrep_fscounters(struct xfs_scrub *sc); int xrep_fscounters(struct xfs_scrub *sc);
int xrep_xattr(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_RT #ifdef CONFIG_XFS_RT
int xrep_rtbitmap(struct xfs_scrub *sc); int xrep_rtbitmap(struct xfs_scrub *sc);
...@@ -147,6 +149,8 @@ int xrep_trans_alloc_hook_dummy(struct xfs_mount *mp, void **cookiep, ...@@ -147,6 +149,8 @@ int xrep_trans_alloc_hook_dummy(struct xfs_mount *mp, void **cookiep,
struct xfs_trans **tpp); struct xfs_trans **tpp);
void xrep_trans_cancel_hook_dummy(void **cookiep, struct xfs_trans *tp); void xrep_trans_cancel_hook_dummy(void **cookiep, struct xfs_trans *tp);
bool xrep_buf_verify_struct(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
#else #else
#define xrep_ino_dqattach(sc) (0) #define xrep_ino_dqattach(sc) (0)
...@@ -190,6 +194,7 @@ xrep_setup_nothing( ...@@ -190,6 +194,7 @@ xrep_setup_nothing(
#define xrep_setup_ag_allocbt xrep_setup_nothing #define xrep_setup_ag_allocbt xrep_setup_nothing
#define xrep_setup_ag_rmapbt xrep_setup_nothing #define xrep_setup_ag_rmapbt xrep_setup_nothing
#define xrep_setup_ag_refcountbt xrep_setup_nothing #define xrep_setup_ag_refcountbt xrep_setup_nothing
#define xrep_setup_xattr xrep_setup_nothing
#define xrep_setup_inode(sc, imap) ((void)0) #define xrep_setup_inode(sc, imap) ((void)0)
...@@ -215,6 +220,7 @@ xrep_setup_nothing( ...@@ -215,6 +220,7 @@ xrep_setup_nothing(
#define xrep_nlinks xrep_notsupported #define xrep_nlinks xrep_notsupported
#define xrep_fscounters xrep_notsupported #define xrep_fscounters xrep_notsupported
#define xrep_rtsummary xrep_notsupported #define xrep_rtsummary xrep_notsupported
#define xrep_xattr xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */ #endif /* CONFIG_XFS_ONLINE_REPAIR */
......
...@@ -331,7 +331,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { ...@@ -331,7 +331,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.type = ST_INODE, .type = ST_INODE,
.setup = xchk_setup_xattr, .setup = xchk_setup_xattr,
.scrub = xchk_xattr, .scrub = xchk_xattr,
.repair = xrep_notsupported, .repair = xrep_xattr,
}, },
[XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */ [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */
.type = ST_INODE, .type = ST_INODE,
......
...@@ -14,6 +14,8 @@ struct xrep_tempexch { ...@@ -14,6 +14,8 @@ struct xrep_tempexch {
int xrep_tempexch_enable(struct xfs_scrub *sc); int xrep_tempexch_enable(struct xfs_scrub *sc);
int xrep_tempexch_trans_reserve(struct xfs_scrub *sc, int whichfork, int xrep_tempexch_trans_reserve(struct xfs_scrub *sc, int whichfork,
struct xrep_tempexch *ti); struct xrep_tempexch *ti);
int xrep_tempexch_trans_alloc(struct xfs_scrub *sc, int whichfork,
struct xrep_tempexch *ti);
int xrep_tempexch_contents(struct xfs_scrub *sc, struct xrep_tempexch *ti); int xrep_tempexch_contents(struct xfs_scrub *sc, struct xrep_tempexch *ti);
#endif /* CONFIG_XFS_ONLINE_REPAIR */ #endif /* CONFIG_XFS_ONLINE_REPAIR */
......
...@@ -239,6 +239,28 @@ xrep_tempfile_iunlock( ...@@ -239,6 +239,28 @@ xrep_tempfile_iunlock(
sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL; sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL;
} }
/*
* Begin the process of making changes to both the file being scrubbed and
* the temporary file by taking ILOCK_EXCL on both.
*/
void
xrep_tempfile_ilock_both(
struct xfs_scrub *sc)
{
xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL);
sc->ilock_flags |= XFS_ILOCK_EXCL;
sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
}
/* Unlock ILOCK_EXCL on both files. */
void
xrep_tempfile_iunlock_both(
struct xfs_scrub *sc)
{
xrep_tempfile_iunlock(sc);
xchk_iunlock(sc, XFS_ILOCK_EXCL);
}
/* Release the temporary file. */ /* Release the temporary file. */
void void
xrep_tempfile_rele( xrep_tempfile_rele(
...@@ -514,6 +536,89 @@ xrep_tempexch_prep_request( ...@@ -514,6 +536,89 @@ xrep_tempexch_prep_request(
return 0; return 0;
} }
/*
* Fill out the mapping exchange resource estimation structures in preparation
* for exchanging the contents of a metadata file that we've rebuilt in the
* temp file. Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files.
*/
STATIC int
xrep_tempexch_estimate(
struct xfs_scrub *sc,
struct xrep_tempexch *tx)
{
struct xfs_exchmaps_req *req = &tx->req;
struct xfs_ifork *ifp;
struct xfs_ifork *tifp;
int whichfork = xfs_exchmaps_reqfork(req);
int state = 0;
/*
* The exchmaps code only knows how to exchange file fork space
* mappings. Any fork data in local format must be promoted to a
* single block before the exchange can take place.
*/
ifp = xfs_ifork_ptr(sc->ip, whichfork);
if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
state |= 1;
tifp = xfs_ifork_ptr(sc->tempip, whichfork);
if (tifp->if_format == XFS_DINODE_FMT_LOCAL)
state |= 2;
switch (state) {
case 0:
/* Both files have mapped extents; use the regular estimate. */
return xfs_exchrange_estimate(req);
case 1:
/*
* The file being repaired is in local format, but the temp
* file has mapped extents. To perform the exchange, the file
* being repaired must have its shorform data converted to an
* ondisk block so that the forks will be in extents format.
* We need one resblk for the conversion; the number of
* exchanges is (worst case) the temporary file's extent count
* plus the block we converted.
*/
req->ip1_bcount = sc->tempip->i_nblocks;
req->ip2_bcount = 1;
req->nr_exchanges = 1 + tifp->if_nextents;
req->resblks = 1;
break;
case 2:
/*
* The temporary file is in local format, but the file being
* repaired has mapped extents. To perform the exchange, the
* temp file must have its shortform data converted to an
* ondisk block, and the fork changed to extents format. We
* need one resblk for the conversion; the number of exchanges
* is (worst case) the extent count of the file being repaired
* plus the block we converted.
*/
req->ip1_bcount = 1;
req->ip2_bcount = sc->ip->i_nblocks;
req->nr_exchanges = 1 + ifp->if_nextents;
req->resblks = 1;
break;
case 3:
/*
* Both forks are in local format. To perform the exchange,
* both files must have their shortform data converted to
* fsblocks, and both forks must be converted to extents
* format. We need two resblks for the two conversions, and
* the number of exchanges is 1 since there's only one block at
* fileoff 0. Presumably, the caller could not exchange the
* two inode fork areas directly.
*/
req->ip1_bcount = 1;
req->ip2_bcount = 1;
req->nr_exchanges = 1;
req->resblks = 2;
break;
}
return xfs_exchmaps_estimate_overhead(req);
}
/* /*
* Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip
* this if quota enforcement is disabled or if both inodes' dquots are the * this if quota enforcement is disabled or if both inodes' dquots are the
...@@ -604,6 +709,55 @@ xrep_tempexch_trans_reserve( ...@@ -604,6 +709,55 @@ xrep_tempexch_trans_reserve(
return xrep_tempexch_reserve_quota(sc, tx); return xrep_tempexch_reserve_quota(sc, tx);
} }
/*
* Create a new transaction for a file contents exchange.
*
* This function fills out the mapping excahange request and resource
* estimation structures in preparation for exchanging the contents of a
* metadata file that has been rebuilt in the temp file. Next, it reserves
* space, takes ILOCK_EXCL of both inodes, joins them to the transaction and
* reserves quota for the transaction.
*
* The caller is responsible for dropping both ILOCKs when appropriate.
*/
int
xrep_tempexch_trans_alloc(
struct xfs_scrub *sc,
int whichfork,
struct xrep_tempexch *tx)
{
unsigned int flags = 0;
int error;
ASSERT(sc->tp == NULL);
error = xrep_tempexch_prep_request(sc, whichfork, tx);
if (error)
return error;
error = xrep_tempexch_estimate(sc, tx);
if (error)
return error;
if (xfs_has_lazysbcount(sc->mp))
flags |= XFS_TRANS_RES_FDBLKS;
error = xrep_tempexch_enable(sc);
if (error)
return error;
error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
tx->req.resblks, 0, flags, &sc->tp);
if (error)
return error;
sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
sc->ilock_flags |= XFS_ILOCK_EXCL;
xfs_exchrange_ilock(sc->tp, sc->ip, sc->tempip);
return xrep_tempexch_reserve_quota(sc, tx);
}
/* /*
* Exchange file mappings (and hence file contents) between the file being * Exchange file mappings (and hence file contents) between the file being
* repaired and the temporary file. Returns with both inodes locked and joined * repaired and the temporary file. Returns with both inodes locked and joined
...@@ -637,3 +791,53 @@ xrep_tempexch_contents( ...@@ -637,3 +791,53 @@ xrep_tempexch_contents(
return 0; return 0;
} }
/*
* Write local format data from one of the temporary file's forks into the same
* fork of file being repaired, and exchange the file sizes, if appropriate.
* Caller must ensure that the file being repaired has enough fork space to
* hold all the bytes.
*/
void
xrep_tempfile_copyout_local(
struct xfs_scrub *sc,
int whichfork)
{
struct xfs_ifork *temp_ifp;
struct xfs_ifork *ifp;
unsigned int ilog_flags = XFS_ILOG_CORE;
temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork);
ifp = xfs_ifork_ptr(sc->ip, whichfork);
ASSERT(temp_ifp != NULL);
ASSERT(ifp != NULL);
ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL);
ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
switch (whichfork) {
case XFS_DATA_FORK:
ASSERT(sc->tempip->i_disk_size <=
xfs_inode_data_fork_size(sc->ip));
break;
case XFS_ATTR_FORK:
ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff);
break;
default:
ASSERT(0);
return;
}
/* Recreate @sc->ip's incore fork (ifp) with data from temp_ifp. */
xfs_idestroy_fork(ifp);
xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_data,
temp_ifp->if_bytes);
if (whichfork == XFS_DATA_FORK) {
i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
sc->ip->i_disk_size = sc->tempip->i_disk_size;
}
ilog_flags |= xfs_ilog_fdata(whichfork);
xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags);
}
...@@ -17,6 +17,8 @@ void xrep_tempfile_iounlock(struct xfs_scrub *sc); ...@@ -17,6 +17,8 @@ void xrep_tempfile_iounlock(struct xfs_scrub *sc);
void xrep_tempfile_ilock(struct xfs_scrub *sc); void xrep_tempfile_ilock(struct xfs_scrub *sc);
bool xrep_tempfile_ilock_nowait(struct xfs_scrub *sc); bool xrep_tempfile_ilock_nowait(struct xfs_scrub *sc);
void xrep_tempfile_iunlock(struct xfs_scrub *sc); void xrep_tempfile_iunlock(struct xfs_scrub *sc);
void xrep_tempfile_iunlock_both(struct xfs_scrub *sc);
void xrep_tempfile_ilock_both(struct xfs_scrub *sc);
int xrep_tempfile_prealloc(struct xfs_scrub *sc, xfs_fileoff_t off, int xrep_tempfile_prealloc(struct xfs_scrub *sc, xfs_fileoff_t off,
xfs_filblks_t len); xfs_filblks_t len);
...@@ -32,6 +34,7 @@ int xrep_tempfile_copyin(struct xfs_scrub *sc, xfs_fileoff_t off, ...@@ -32,6 +34,7 @@ int xrep_tempfile_copyin(struct xfs_scrub *sc, xfs_fileoff_t off,
int xrep_tempfile_set_isize(struct xfs_scrub *sc, unsigned long long isize); int xrep_tempfile_set_isize(struct xfs_scrub *sc, unsigned long long isize);
int xrep_tempfile_roll_trans(struct xfs_scrub *sc); int xrep_tempfile_roll_trans(struct xfs_scrub *sc);
void xrep_tempfile_copyout_local(struct xfs_scrub *sc, int whichfork);
#else #else
static inline void xrep_tempfile_iolock_both(struct xfs_scrub *sc) static inline void xrep_tempfile_iolock_both(struct xfs_scrub *sc)
{ {
......
...@@ -365,6 +365,7 @@ DEFINE_EVENT(xchk_fblock_error_class, name, \ ...@@ -365,6 +365,7 @@ DEFINE_EVENT(xchk_fblock_error_class, name, \
DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_error); DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_error);
DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_warning); DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_warning);
DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_preen);
#ifdef CONFIG_XFS_QUOTA #ifdef CONFIG_XFS_QUOTA
DECLARE_EVENT_CLASS(xchk_dqiter_class, DECLARE_EVENT_CLASS(xchk_dqiter_class,
...@@ -948,6 +949,7 @@ DEFINE_XFILE_EVENT(xfile_store); ...@@ -948,6 +949,7 @@ DEFINE_XFILE_EVENT(xfile_store);
DEFINE_XFILE_EVENT(xfile_seek_data); DEFINE_XFILE_EVENT(xfile_seek_data);
DEFINE_XFILE_EVENT(xfile_get_folio); DEFINE_XFILE_EVENT(xfile_get_folio);
DEFINE_XFILE_EVENT(xfile_put_folio); DEFINE_XFILE_EVENT(xfile_put_folio);
DEFINE_XFILE_EVENT(xfile_discard);
TRACE_EVENT(xfarray_create, TRACE_EVENT(xfarray_create,
TP_PROTO(struct xfarray *xfa, unsigned long long required_capacity), TP_PROTO(struct xfarray *xfa, unsigned long long required_capacity),
...@@ -2415,6 +2417,89 @@ TRACE_EVENT(xreap_bmapi_binval_scan, ...@@ -2415,6 +2417,89 @@ TRACE_EVENT(xreap_bmapi_binval_scan,
__entry->scan_blocks) __entry->scan_blocks)
); );
TRACE_EVENT(xrep_xattr_recover_leafblock,
TP_PROTO(struct xfs_inode *ip, xfs_dablk_t dabno, uint16_t magic),
TP_ARGS(ip, dabno, magic),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_dablk_t, dabno)
__field(uint16_t, magic)
),
TP_fast_assign(
__entry->dev = ip->i_mount->m_super->s_dev;
__entry->ino = ip->i_ino;
__entry->dabno = dabno;
__entry->magic = magic;
),
TP_printk("dev %d:%d ino 0x%llx dablk 0x%x magic 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->dabno,
__entry->magic)
);
DECLARE_EVENT_CLASS(xrep_xattr_salvage_class,
TP_PROTO(struct xfs_inode *ip, unsigned int flags, char *name,
unsigned int namelen, unsigned int valuelen),
TP_ARGS(ip, flags, name, namelen, valuelen),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(unsigned int, flags)
__field(unsigned int, namelen)
__dynamic_array(char, name, namelen)
__field(unsigned int, valuelen)
),
TP_fast_assign(
__entry->dev = ip->i_mount->m_super->s_dev;
__entry->ino = ip->i_ino;
__entry->flags = flags;
__entry->namelen = namelen;
memcpy(__get_str(name), name, namelen);
__entry->valuelen = valuelen;
),
TP_printk("dev %d:%d ino 0x%llx flags %s name '%.*s' valuelen 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__print_flags(__entry->flags, "|", XFS_ATTR_NAMESPACE_STR),
__entry->namelen,
__get_str(name),
__entry->valuelen)
);
#define DEFINE_XREP_XATTR_SALVAGE_EVENT(name) \
DEFINE_EVENT(xrep_xattr_salvage_class, name, \
TP_PROTO(struct xfs_inode *ip, unsigned int flags, char *name, \
unsigned int namelen, unsigned int valuelen), \
TP_ARGS(ip, flags, name, namelen, valuelen))
DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_salvage_rec);
DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_insert_rec);
TRACE_EVENT(xrep_xattr_class,
TP_PROTO(struct xfs_inode *ip, struct xfs_inode *arg_ip),
TP_ARGS(ip, arg_ip),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_ino_t, src_ino)
),
TP_fast_assign(
__entry->dev = ip->i_mount->m_super->s_dev;
__entry->ino = ip->i_ino;
__entry->src_ino = arg_ip->i_ino;
),
TP_printk("dev %d:%d ino 0x%llx src 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->src_ino)
)
#define DEFINE_XREP_XATTR_EVENT(name) \
DEFINE_EVENT(xrep_xattr_class, name, \
TP_PROTO(struct xfs_inode *ip, struct xfs_inode *arg_ip), \
TP_ARGS(ip, arg_ip))
DEFINE_XREP_XATTR_EVENT(xrep_xattr_rebuild_tree);
DEFINE_XREP_XATTR_EVENT(xrep_xattr_reset_fork);
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */ #endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */ #endif /* _TRACE_XFS_SCRUB_TRACE_H */
......
...@@ -1051,3 +1051,20 @@ xfarray_sort( ...@@ -1051,3 +1051,20 @@ xfarray_sort(
kvfree(si); kvfree(si);
return error; return error;
} }
/* How many bytes is this array consuming? */
unsigned long long
xfarray_bytes(
struct xfarray *array)
{
return xfile_bytes(array->xfile);
}
/* Empty the entire array. */
void
xfarray_truncate(
struct xfarray *array)
{
xfile_discard(array->xfile, 0, MAX_LFS_FILESIZE);
array->nr = 0;
}
...@@ -44,6 +44,8 @@ int xfarray_unset(struct xfarray *array, xfarray_idx_t idx); ...@@ -44,6 +44,8 @@ int xfarray_unset(struct xfarray *array, xfarray_idx_t idx);
int xfarray_store(struct xfarray *array, xfarray_idx_t idx, const void *ptr); int xfarray_store(struct xfarray *array, xfarray_idx_t idx, const void *ptr);
int xfarray_store_anywhere(struct xfarray *array, const void *ptr); int xfarray_store_anywhere(struct xfarray *array, const void *ptr);
bool xfarray_element_is_null(struct xfarray *array, const void *ptr); bool xfarray_element_is_null(struct xfarray *array, const void *ptr);
void xfarray_truncate(struct xfarray *array);
unsigned long long xfarray_bytes(struct xfarray *array);
/* /*
* Load an array element, but zero the buffer if there's no data because we * Load an array element, but zero the buffer if there's no data because we
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "scrub/scrub.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
#include "scrub/xfblob.h"
/*
* XFS Blob Storage
* ================
* Stores and retrieves blobs using an xfile. Objects are appended to the file
* and the offset is returned as a magic cookie for retrieval.
*/
#define XB_KEY_MAGIC 0xABAADDAD
struct xb_key {
uint32_t xb_magic; /* XB_KEY_MAGIC */
uint32_t xb_size; /* size of the blob, in bytes */
loff_t xb_offset; /* byte offset of this key */
/* blob comes after here */
} __packed;
/* Initialize a blob storage object. */
int
xfblob_create(
const char *description,
struct xfblob **blobp)
{
struct xfblob *blob;
struct xfile *xfile;
int error;
error = xfile_create(description, 0, &xfile);
if (error)
return error;
blob = kmalloc(sizeof(struct xfblob), XCHK_GFP_FLAGS);
if (!blob) {
error = -ENOMEM;
goto out_xfile;
}
blob->xfile = xfile;
blob->last_offset = PAGE_SIZE;
*blobp = blob;
return 0;
out_xfile:
xfile_destroy(xfile);
return error;
}
/* Destroy a blob storage object. */
void
xfblob_destroy(
struct xfblob *blob)
{
xfile_destroy(blob->xfile);
kfree(blob);
}
/* Retrieve a blob. */
int
xfblob_load(
struct xfblob *blob,
xfblob_cookie cookie,
void *ptr,
uint32_t size)
{
struct xb_key key;
int error;
error = xfile_load(blob->xfile, &key, sizeof(key), cookie);
if (error)
return error;
if (key.xb_magic != XB_KEY_MAGIC || key.xb_offset != cookie) {
ASSERT(0);
return -ENODATA;
}
if (size < key.xb_size) {
ASSERT(0);
return -EFBIG;
}
return xfile_load(blob->xfile, ptr, key.xb_size,
cookie + sizeof(key));
}
/* Store a blob. */
int
xfblob_store(
struct xfblob *blob,
xfblob_cookie *cookie,
const void *ptr,
uint32_t size)
{
struct xb_key key = {
.xb_offset = blob->last_offset,
.xb_magic = XB_KEY_MAGIC,
.xb_size = size,
};
loff_t pos = blob->last_offset;
int error;
error = xfile_store(blob->xfile, &key, sizeof(key), pos);
if (error)
return error;
pos += sizeof(key);
error = xfile_store(blob->xfile, ptr, size, pos);
if (error)
goto out_err;
*cookie = blob->last_offset;
blob->last_offset += sizeof(key) + size;
return 0;
out_err:
xfile_discard(blob->xfile, blob->last_offset, sizeof(key));
return error;
}
/* Free a blob. */
int
xfblob_free(
struct xfblob *blob,
xfblob_cookie cookie)
{
struct xb_key key;
int error;
error = xfile_load(blob->xfile, &key, sizeof(key), cookie);
if (error)
return error;
if (key.xb_magic != XB_KEY_MAGIC || key.xb_offset != cookie) {
ASSERT(0);
return -ENODATA;
}
xfile_discard(blob->xfile, cookie, sizeof(key) + key.xb_size);
return 0;
}
/* How many bytes is this blob storage object consuming? */
unsigned long long
xfblob_bytes(
struct xfblob *blob)
{
return xfile_bytes(blob->xfile);
}
/* Drop all the blobs. */
void
xfblob_truncate(
struct xfblob *blob)
{
xfile_discard(blob->xfile, PAGE_SIZE, MAX_LFS_FILESIZE - PAGE_SIZE);
blob->last_offset = PAGE_SIZE;
}
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_SCRUB_XFBLOB_H__
#define __XFS_SCRUB_XFBLOB_H__
struct xfblob {
struct xfile *xfile;
loff_t last_offset;
};
typedef loff_t xfblob_cookie;
int xfblob_create(const char *descr, struct xfblob **blobp);
void xfblob_destroy(struct xfblob *blob);
int xfblob_load(struct xfblob *blob, xfblob_cookie cookie, void *ptr,
uint32_t size);
int xfblob_store(struct xfblob *blob, xfblob_cookie *cookie, const void *ptr,
uint32_t size);
int xfblob_free(struct xfblob *blob, xfblob_cookie cookie);
unsigned long long xfblob_bytes(struct xfblob *blob);
void xfblob_truncate(struct xfblob *blob);
#endif /* __XFS_SCRUB_XFBLOB_H__ */
...@@ -310,3 +310,15 @@ xfile_put_folio( ...@@ -310,3 +310,15 @@ xfile_put_folio(
folio_unlock(folio); folio_unlock(folio);
folio_put(folio); folio_put(folio);
} }
/* Discard the page cache that's backing a range of the xfile. */
void
xfile_discard(
struct xfile *xf,
loff_t pos,
u64 count)
{
trace_xfile_discard(xf, pos, count);
shmem_truncate_range(file_inode(xf->file), pos, pos + count - 1);
}
...@@ -17,6 +17,7 @@ int xfile_load(struct xfile *xf, void *buf, size_t count, loff_t pos); ...@@ -17,6 +17,7 @@ int xfile_load(struct xfile *xf, void *buf, size_t count, loff_t pos);
int xfile_store(struct xfile *xf, const void *buf, size_t count, int xfile_store(struct xfile *xf, const void *buf, size_t count,
loff_t pos); loff_t pos);
void xfile_discard(struct xfile *xf, loff_t pos, u64 count);
loff_t xfile_seek_data(struct xfile *xf, loff_t pos); loff_t xfile_seek_data(struct xfile *xf, loff_t pos);
#define XFILE_MAX_FOLIO_SIZE (PAGE_SIZE << MAX_PAGECACHE_ORDER) #define XFILE_MAX_FOLIO_SIZE (PAGE_SIZE << MAX_PAGECACHE_ORDER)
...@@ -26,4 +27,9 @@ struct folio *xfile_get_folio(struct xfile *xf, loff_t offset, size_t len, ...@@ -26,4 +27,9 @@ struct folio *xfile_get_folio(struct xfile *xf, loff_t offset, size_t len,
unsigned int flags); unsigned int flags);
void xfile_put_folio(struct xfile *xf, struct folio *folio); void xfile_put_folio(struct xfile *xf, struct folio *folio);
static inline unsigned long long xfile_bytes(struct xfile *xf)
{
return file_inode(xf->file)->i_blocks << SECTOR_SHIFT;
}
#endif /* __XFS_SCRUB_XFILE_H__ */ #endif /* __XFS_SCRUB_XFILE_H__ */
...@@ -494,6 +494,9 @@ _xfs_buf_obj_cmp( ...@@ -494,6 +494,9 @@ _xfs_buf_obj_cmp(
* it stale has not yet committed. i.e. we are * it stale has not yet committed. i.e. we are
* reallocating a busy extent. Skip this buffer and * reallocating a busy extent. Skip this buffer and
* continue searching for an exact match. * continue searching for an exact match.
*
* Note: If we're scanning for incore buffers to stale, don't
* complain if we find non-stale buffers.
*/ */
if (!(map->bm_flags & XBM_LIVESCAN)) if (!(map->bm_flags & XBM_LIVESCAN))
ASSERT(bp->b_flags & XBF_STALE); ASSERT(bp->b_flags & XBF_STALE);
......
...@@ -31,6 +31,8 @@ ...@@ -31,6 +31,8 @@
* pos: file offset, in bytes * pos: file offset, in bytes
* bytecount: number of bytes * bytecount: number of bytes
* *
* dablk: directory or xattr block offset, in filesystem blocks
*
* disize: ondisk file size, in bytes * disize: ondisk file size, in bytes
* isize: incore file size, in bytes * isize: incore file size, in bytes
* *
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment