Commit 5f3e9511 authored by Chandan Babu R's avatar Chandan Babu R

Merge tag 'repair-xattrs-6.10_2024-04-15' of...

Merge tag 'repair-xattrs-6.10_2024-04-15' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.10-mergeA

xfs: online repair of extended attributes

This series employs atomic extent swapping to enable safe reconstruction
of extended attribute data attached to a file.  Because xattrs do not
have any redundant information to draw off of, we can at best salvage
as much data as we can and build a new structure.

Rebuilding an extended attribute structure consists of these three
steps:

First, we walk the existing attributes to salvage as many of them as we
can, by adding them as new attributes attached to the repair tempfile.
We need to add a new xfile-based data structure to hold blobs of
arbitrary length to stage the xattr names and values.

Second, we write the salvaged attributes to a temporary file, and use
atomic extent swaps to exchange the entire attribute fork between the
two files.

Finally, we reap the old xattr blocks (which are now in the temporary
file) as carefully as we can.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChandan Babu R <chandanbabu@kernel.org>

* tag 'repair-xattrs-6.10_2024-04-15' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux:
  xfs: create an xattr iteration function for scrub
  xfs: flag empty xattr leaf blocks for optimization
  xfs: scrub should set preen if attr leaf has holes
  xfs: repair extended attributes
  xfs: use atomic extent swapping to fix user file fork data
  xfs: create a blob array data structure
  xfs: enable discarding of folios backing an xfile
parents fb1f7c66 6c631e79
......@@ -165,6 +165,7 @@ xfs-y += $(addprefix scrub/, \
ialloc.o \
inode.o \
iscan.o \
listxattr.o \
nlinks.o \
parent.o \
readdir.o \
......@@ -194,6 +195,7 @@ ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
xfs-y += $(addprefix scrub/, \
agheader_repair.o \
alloc_repair.o \
attr_repair.o \
bmap_repair.o \
cow_repair.o \
fscounters_repair.o \
......@@ -208,6 +210,7 @@ xfs-y += $(addprefix scrub/, \
repair.o \
rmap_repair.o \
tempfile.o \
xfblob.o \
)
xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
......
......@@ -1055,7 +1055,7 @@ xfs_attr_set(
* External routines when attribute list is inside the inode
*========================================================================*/
static inline int xfs_attr_sf_totsize(struct xfs_inode *dp)
int xfs_attr_sf_totsize(struct xfs_inode *dp)
{
struct xfs_attr_sf_hdr *sf = dp->i_af.if_data;
......
......@@ -618,4 +618,6 @@ extern struct kmem_cache *xfs_attr_intent_cache;
int __init xfs_attr_intent_init_cache(void);
void xfs_attr_intent_destroy_cache(void);
int xfs_attr_sf_totsize(struct xfs_inode *dp);
#endif /* __XFS_ATTR_H__ */
......@@ -721,6 +721,11 @@ struct xfs_attr3_leafblock {
#define XFS_ATTR_INCOMPLETE (1u << XFS_ATTR_INCOMPLETE_BIT)
#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE)
#define XFS_ATTR_NAMESPACE_STR \
{ XFS_ATTR_LOCAL, "local" }, \
{ XFS_ATTR_ROOT, "root" }, \
{ XFS_ATTR_SECURE, "secure" }
/*
* Alignment for namelist and valuelist entries (since they are mixed
* there can be only one alignment value)
......
......@@ -675,7 +675,7 @@ xfs_exchmaps_rmapbt_blocks(
}
/* Estimate the bmbt and rmapbt overhead required to exchange mappings. */
static int
int
xfs_exchmaps_estimate_overhead(
struct xfs_exchmaps_req *req)
{
......
......@@ -97,6 +97,7 @@ xfs_exchmaps_reqfork(const struct xfs_exchmaps_req *req)
return XFS_DATA_FORK;
}
int xfs_exchmaps_estimate_overhead(struct xfs_exchmaps_req *req);
int xfs_exchmaps_estimate(struct xfs_exchmaps_req *req);
extern struct kmem_cache *xfs_exchmaps_intent_cache;
......
......@@ -10,6 +10,7 @@
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_inode.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
......@@ -20,6 +21,8 @@
#include "scrub/common.h"
#include "scrub/dabtree.h"
#include "scrub/attr.h"
#include "scrub/listxattr.h"
#include "scrub/repair.h"
/* Free the buffers linked from the xattr buffer. */
static void
......@@ -35,6 +38,8 @@ xchk_xattr_buf_cleanup(
kvfree(ab->value);
ab->value = NULL;
ab->value_sz = 0;
kvfree(ab->name);
ab->name = NULL;
}
/*
......@@ -65,7 +70,7 @@ xchk_xattr_want_freemap(
* reallocating the buffer if necessary. Buffer contents are not preserved
* across a reallocation.
*/
static int
int
xchk_setup_xattr_buf(
struct xfs_scrub *sc,
size_t value_size)
......@@ -95,6 +100,12 @@ xchk_setup_xattr_buf(
return -ENOMEM;
}
if (xchk_could_repair(sc)) {
ab->name = kvmalloc(XATTR_NAME_MAX + 1, XCHK_GFP_FLAGS);
if (!ab->name)
return -ENOMEM;
}
resize_value:
if (ab->value_sz >= value_size)
return 0;
......@@ -121,6 +132,12 @@ xchk_setup_xattr(
{
int error;
if (xchk_could_repair(sc)) {
error = xrep_setup_xattr(sc);
if (error)
return error;
}
/*
* We failed to get memory while checking attrs, so this time try to
* get all the memory we're ever going to need. Allocate the buffer
......@@ -137,90 +154,81 @@ xchk_setup_xattr(
/* Extended Attributes */
struct xchk_xattr {
struct xfs_attr_list_context context;
struct xfs_scrub *sc;
};
/*
* Check that an extended attribute key can be looked up by hash.
*
* We use the XFS attribute list iterator (i.e. xfs_attr_list_ilocked)
* to call this function for every attribute key in an inode. Once
* we're here, we load the attribute value to see if any errors happen,
* or if we get more or less data than we expected.
* We use the extended attribute walk helper to call this function for every
* attribute key in an inode. Once we're here, we load the attribute value to
* see if any errors happen, or if we get more or less data than we expected.
*/
static void
xchk_xattr_listent(
struct xfs_attr_list_context *context,
int flags,
unsigned char *name,
int namelen,
int valuelen)
static int
xchk_xattr_actor(
struct xfs_scrub *sc,
struct xfs_inode *ip,
unsigned int attr_flags,
const unsigned char *name,
unsigned int namelen,
const void *value,
unsigned int valuelen,
void *priv)
{
struct xfs_da_args args = {
.op_flags = XFS_DA_OP_NOTIME,
.attr_filter = flags & XFS_ATTR_NSP_ONDISK_MASK,
.geo = context->dp->i_mount->m_attr_geo,
.attr_filter = attr_flags & XFS_ATTR_NSP_ONDISK_MASK,
.geo = sc->mp->m_attr_geo,
.whichfork = XFS_ATTR_FORK,
.dp = context->dp,
.dp = ip,
.name = name,
.namelen = namelen,
.hashval = xfs_da_hashname(name, namelen),
.trans = context->tp,
.trans = sc->tp,
.valuelen = valuelen,
.owner = context->dp->i_ino,
.owner = ip->i_ino,
};
struct xchk_xattr_buf *ab;
struct xchk_xattr *sx;
int error = 0;
sx = container_of(context, struct xchk_xattr, context);
ab = sx->sc->buf;
ab = sc->buf;
if (xchk_should_terminate(sx->sc, &error)) {
context->seen_enough = error;
return;
}
if (xchk_should_terminate(sc, &error))
return error;
if (flags & XFS_ATTR_INCOMPLETE) {
if (attr_flags & XFS_ATTR_INCOMPLETE) {
/* Incomplete attr key, just mark the inode for preening. */
xchk_ino_set_preen(sx->sc, context->dp->i_ino);
return;
xchk_ino_set_preen(sc, ip->i_ino);
return 0;
}
/* Only one namespace bit allowed. */
if (hweight32(flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) {
xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, args.blkno);
goto fail_xref;
if (hweight32(attr_flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) {
xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, args.blkno);
return -ECANCELED;
}
/* Does this name make sense? */
if (!xfs_attr_namecheck(name, namelen)) {
xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK, args.blkno);
goto fail_xref;
xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, args.blkno);
return -ECANCELED;
}
/*
* Local xattr values are stored in the attr leaf block, so we don't
* need to retrieve the value from a remote block to detect corruption
* problems.
* Local and shortform xattr values are stored in the attr leaf block,
* so we don't need to retrieve the value from a remote block to detect
* corruption problems.
*/
if (flags & XFS_ATTR_LOCAL)
goto fail_xref;
if (value)
return 0;
/*
* Try to allocate enough memory to extrat the attr value. If that
* doesn't work, we overload the seen_enough variable to convey
* the error message back to the main scrub function.
* Try to allocate enough memory to extract the attr value. If that
* doesn't work, return -EDEADLOCK as a signal to try again with a
* maximally sized buffer.
*/
error = xchk_setup_xattr_buf(sx->sc, valuelen);
error = xchk_setup_xattr_buf(sc, valuelen);
if (error == -ENOMEM)
error = -EDEADLOCK;
if (error) {
context->seen_enough = error;
return;
}
if (error)
return error;
args.value = ab->value;
......@@ -228,16 +236,13 @@ xchk_xattr_listent(
/* ENODATA means the hash lookup failed and the attr is bad */
if (error == -ENODATA)
error = -EFSCORRUPTED;
if (!xchk_fblock_process_error(sx->sc, XFS_ATTR_FORK, args.blkno,
if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, args.blkno,
&error))
goto fail_xref;
return error;
if (args.valuelen != valuelen)
xchk_fblock_set_corrupt(sx->sc, XFS_ATTR_FORK,
args.blkno);
fail_xref:
if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
context->seen_enough = 1;
return;
xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, args.blkno);
return 0;
}
/*
......@@ -247,7 +252,7 @@ xchk_xattr_listent(
* Within a char, the lowest bit of the char represents the byte with
* the smallest address
*/
STATIC bool
bool
xchk_xattr_set_map(
struct xfs_scrub *sc,
unsigned long *map,
......@@ -404,6 +409,17 @@ xchk_xattr_block(
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
hdrsize = xfs_attr3_leaf_hdr_size(leaf);
/*
* Empty xattr leaf blocks mapped at block 0 are probably a byproduct
* of a race between setxattr and a log shutdown. Anywhere else in the
* attr fork is a corruption.
*/
if (leafhdr.count == 0) {
if (blk->blkno == 0)
xchk_da_set_preen(ds, level);
else
xchk_da_set_corrupt(ds, level);
}
if (leafhdr.usedbytes > mp->m_attr_geo->blksize)
xchk_da_set_corrupt(ds, level);
if (leafhdr.firstused > mp->m_attr_geo->blksize)
......@@ -412,6 +428,8 @@ xchk_xattr_block(
xchk_da_set_corrupt(ds, level);
if (!xchk_xattr_set_map(ds->sc, ab->usedmap, 0, hdrsize))
xchk_da_set_corrupt(ds, level);
if (leafhdr.holes)
xchk_da_set_preen(ds, level);
if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
goto out;
......@@ -589,16 +607,6 @@ int
xchk_xattr(
struct xfs_scrub *sc)
{
struct xchk_xattr sx = {
.sc = sc,
.context = {
.dp = sc->ip,
.tp = sc->tp,
.resynch = 1,
.put_listent = xchk_xattr_listent,
.allow_incomplete = true,
},
};
xfs_dablk_t last_checked = -1U;
int error = 0;
......@@ -627,12 +635,6 @@ xchk_xattr(
/*
* Look up every xattr in this file by name and hash.
*
* Use the backend implementation of xfs_attr_list to call
* xchk_xattr_listent on every attribute key in this inode.
* In other words, we use the same iterator/callback mechanism
* that listattr uses to scrub extended attributes, though in our
* _listent function, we check the value of the attribute.
*
* The VFS only locks i_rwsem when modifying attrs, so keep all
* three locks held because that's the only way to ensure we're
* the only thread poking into the da btree. We traverse the da
......@@ -640,13 +642,9 @@ xchk_xattr(
* iteration, which doesn't really follow the usual buffer
* locking order.
*/
error = xfs_attr_list_ilocked(&sx.context);
error = xchk_xattr_walk(sc, sc->ip, xchk_xattr_actor, NULL);
if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error))
return error;
/* Did our listent function try to return any errors? */
if (sx.context.seen_enough < 0)
return sx.context.seen_enough;
return 0;
}
......@@ -16,9 +16,16 @@ struct xchk_xattr_buf {
/* Bitmap of free space in xattr leaf blocks. */
unsigned long *freemap;
/* Memory buffer used to hold salvaged xattr names. */
unsigned char *name;
/* Memory buffer used to extract xattr values. */
void *value;
size_t value_sz;
};
bool xchk_xattr_set_map(struct xfs_scrub *sc, unsigned long *map,
unsigned int start, unsigned int len);
int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size);
#endif /* __XFS_SCRUB_ATTR_H__ */
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2018-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_btree.h"
#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_dir2.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
#include "xfs_attr_sf.h"
#include "xfs_attr_remote.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_exchmaps.h"
#include "xfs_exchrange.h"
#include "xfs_acl.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/tempfile.h"
#include "scrub/tempexch.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
#include "scrub/xfblob.h"
#include "scrub/attr.h"
#include "scrub/reap.h"
#include "scrub/attr_repair.h"
/*
* Extended Attribute Repair
* =========================
*
* We repair extended attributes by reading the attr leaf blocks looking for
* attributes entries that look salvageable (name passes verifiers, value can
* be retrieved, etc). Each extended attribute worth salvaging is stashed in
* memory, and the stashed entries are periodically replayed into a temporary
* file to constrain memory use. Batching the construction of the temporary
* extended attribute structure in this fashion reduces lock cycling of the
* file being repaired and the temporary file.
*
* When salvaging completes, the remaining stashed attributes are replayed to
* the temporary file. An atomic file contents exchange is used to commit the
* new xattr blocks to the file being repaired. This will disrupt attrmulti
* cursors.
*/
struct xrep_xattr_key {
/* Cookie for retrieval of the xattr name. */
xfblob_cookie name_cookie;
/* Cookie for retrieval of the xattr value. */
xfblob_cookie value_cookie;
/* XFS_ATTR_* flags */
int flags;
/* Length of the value and name. */
uint32_t valuelen;
uint16_t namelen;
};
/*
* Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write
* them to the temp file.
*/
#define XREP_XATTR_MAX_STASH_BYTES (PAGE_SIZE * 8)
struct xrep_xattr {
struct xfs_scrub *sc;
/* Information for exchanging attr fork mappings at the end. */
struct xrep_tempexch tx;
/* xattr keys */
struct xfarray *xattr_records;
/* xattr values */
struct xfblob *xattr_blobs;
/* Number of attributes that we are salvaging. */
unsigned long long attrs_found;
};
/* Set up to recreate the extended attributes. */
int
xrep_setup_xattr(
struct xfs_scrub *sc)
{
return xrep_tempfile_create(sc, S_IFREG);
}
/*
* Decide if we want to salvage this attribute. We don't bother with
* incomplete or oversized keys or values. The @value parameter can be null
* for remote attrs.
*/
STATIC int
xrep_xattr_want_salvage(
struct xrep_xattr *rx,
unsigned int attr_flags,
const void *name,
int namelen,
const void *value,
int valuelen)
{
if (attr_flags & XFS_ATTR_INCOMPLETE)
return false;
if (namelen > XATTR_NAME_MAX || namelen <= 0)
return false;
if (!xfs_attr_namecheck(name, namelen))
return false;
if (valuelen > XATTR_SIZE_MAX || valuelen < 0)
return false;
if (hweight32(attr_flags & XFS_ATTR_NSP_ONDISK_MASK) > 1)
return false;
return true;
}
/* Allocate an in-core record to hold xattrs while we rebuild the xattr data. */
STATIC int
xrep_xattr_salvage_key(
struct xrep_xattr *rx,
int flags,
unsigned char *name,
int namelen,
unsigned char *value,
int valuelen)
{
struct xrep_xattr_key key = {
.valuelen = valuelen,
.flags = flags & XFS_ATTR_NSP_ONDISK_MASK,
};
unsigned int i = 0;
int error = 0;
if (xchk_should_terminate(rx->sc, &error))
return error;
/*
* Truncate the name to the first character that would trip namecheck.
* If we no longer have a name after that, ignore this attribute.
*/
while (i < namelen && name[i] != 0)
i++;
if (i == 0)
return 0;
key.namelen = i;
trace_xrep_xattr_salvage_rec(rx->sc->ip, flags, name, key.namelen,
valuelen);
error = xfblob_store(rx->xattr_blobs, &key.name_cookie, name,
key.namelen);
if (error)
return error;
error = xfblob_store(rx->xattr_blobs, &key.value_cookie, value,
key.valuelen);
if (error)
return error;
error = xfarray_append(rx->xattr_records, &key);
if (error)
return error;
rx->attrs_found++;
return 0;
}
/*
* Record a shortform extended attribute key & value for later reinsertion
* into the inode.
*/
STATIC int
xrep_xattr_salvage_sf_attr(
struct xrep_xattr *rx,
struct xfs_attr_sf_hdr *hdr,
struct xfs_attr_sf_entry *sfe)
{
struct xfs_scrub *sc = rx->sc;
struct xchk_xattr_buf *ab = sc->buf;
unsigned char *name = sfe->nameval;
unsigned char *value = &sfe->nameval[sfe->namelen];
if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)name - (char *)hdr,
sfe->namelen))
return 0;
if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)value - (char *)hdr,
sfe->valuelen))
return 0;
if (!xrep_xattr_want_salvage(rx, sfe->flags, sfe->nameval,
sfe->namelen, value, sfe->valuelen))
return 0;
return xrep_xattr_salvage_key(rx, sfe->flags, sfe->nameval,
sfe->namelen, value, sfe->valuelen);
}
/*
* Record a local format extended attribute key & value for later reinsertion
* into the inode.
*/
STATIC int
xrep_xattr_salvage_local_attr(
struct xrep_xattr *rx,
struct xfs_attr_leaf_entry *ent,
unsigned int nameidx,
const char *buf_end,
struct xfs_attr_leaf_name_local *lentry)
{
struct xchk_xattr_buf *ab = rx->sc->buf;
unsigned char *value;
unsigned int valuelen;
unsigned int namesize;
/*
* Decode the leaf local entry format. If something seems wrong, we
* junk the attribute.
*/
value = &lentry->nameval[lentry->namelen];
valuelen = be16_to_cpu(lentry->valuelen);
namesize = xfs_attr_leaf_entsize_local(lentry->namelen, valuelen);
if ((char *)lentry + namesize > buf_end)
return 0;
if (!xrep_xattr_want_salvage(rx, ent->flags, lentry->nameval,
lentry->namelen, value, valuelen))
return 0;
if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize))
return 0;
/* Try to save this attribute. */
return xrep_xattr_salvage_key(rx, ent->flags, lentry->nameval,
lentry->namelen, value, valuelen);
}
/*
* Record a remote format extended attribute key & value for later reinsertion
* into the inode.
*/
STATIC int
xrep_xattr_salvage_remote_attr(
struct xrep_xattr *rx,
struct xfs_attr_leaf_entry *ent,
unsigned int nameidx,
const char *buf_end,
struct xfs_attr_leaf_name_remote *rentry,
unsigned int ent_idx,
struct xfs_buf *leaf_bp)
{
struct xchk_xattr_buf *ab = rx->sc->buf;
struct xfs_da_args args = {
.trans = rx->sc->tp,
.dp = rx->sc->ip,
.index = ent_idx,
.geo = rx->sc->mp->m_attr_geo,
.owner = rx->sc->ip->i_ino,
.attr_filter = ent->flags & XFS_ATTR_NSP_ONDISK_MASK,
.namelen = rentry->namelen,
.name = rentry->name,
.value = ab->value,
.valuelen = be32_to_cpu(rentry->valuelen),
};
unsigned int namesize;
int error;
/*
* Decode the leaf remote entry format. If something seems wrong, we
* junk the attribute. Note that we should never find a zero-length
* remote attribute value.
*/
namesize = xfs_attr_leaf_entsize_remote(rentry->namelen);
if ((char *)rentry + namesize > buf_end)
return 0;
if (args.valuelen == 0 ||
!xrep_xattr_want_salvage(rx, ent->flags, rentry->name,
rentry->namelen, NULL, args.valuelen))
return 0;
if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize))
return 0;
/*
* Enlarge the buffer (if needed) to hold the value that we're trying
* to salvage from the old extended attribute data.
*/
error = xchk_setup_xattr_buf(rx->sc, args.valuelen);
if (error == -ENOMEM)
error = -EDEADLOCK;
if (error)
return error;
/* Look up the remote value and stash it for reconstruction. */
error = xfs_attr3_leaf_getvalue(leaf_bp, &args);
if (error || args.rmtblkno == 0)
goto err_free;
error = xfs_attr_rmtval_get(&args);
if (error)
goto err_free;
/* Try to save this attribute. */
error = xrep_xattr_salvage_key(rx, ent->flags, rentry->name,
rentry->namelen, ab->value, args.valuelen);
err_free:
/* remote value was garbage, junk it */
if (error == -EFSBADCRC || error == -EFSCORRUPTED)
error = 0;
return error;
}
/* Extract every xattr key that we can from this attr fork block. */
STATIC int
xrep_xattr_recover_leaf(
struct xrep_xattr *rx,
struct xfs_buf *bp)
{
struct xfs_attr3_icleaf_hdr leafhdr;
struct xfs_scrub *sc = rx->sc;
struct xfs_mount *mp = sc->mp;
struct xfs_attr_leafblock *leaf;
struct xfs_attr_leaf_name_local *lentry;
struct xfs_attr_leaf_name_remote *rentry;
struct xfs_attr_leaf_entry *ent;
struct xfs_attr_leaf_entry *entries;
struct xchk_xattr_buf *ab = rx->sc->buf;
char *buf_end;
size_t off;
unsigned int nameidx;
unsigned int hdrsize;
int i;
int error = 0;
bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize);
/* Check the leaf header */
leaf = bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
hdrsize = xfs_attr3_leaf_hdr_size(leaf);
xchk_xattr_set_map(sc, ab->usedmap, 0, hdrsize);
entries = xfs_attr3_leaf_entryp(leaf);
buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize;
for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) {
if (xchk_should_terminate(sc, &error))
return error;
/* Skip key if it conflicts with something else? */
off = (char *)ent - (char *)leaf;
if (!xchk_xattr_set_map(sc, ab->usedmap, off,
sizeof(xfs_attr_leaf_entry_t)))
continue;
/* Check the name information. */
nameidx = be16_to_cpu(ent->nameidx);
if (nameidx < leafhdr.firstused ||
nameidx >= mp->m_attr_geo->blksize)
continue;
if (ent->flags & XFS_ATTR_LOCAL) {
lentry = xfs_attr3_leaf_name_local(leaf, i);
error = xrep_xattr_salvage_local_attr(rx, ent, nameidx,
buf_end, lentry);
} else {
rentry = xfs_attr3_leaf_name_remote(leaf, i);
error = xrep_xattr_salvage_remote_attr(rx, ent, nameidx,
buf_end, rentry, i, bp);
}
if (error)
return error;
}
return 0;
}
/* Try to recover shortform attrs. */
STATIC int
xrep_xattr_recover_sf(
struct xrep_xattr *rx)
{
struct xfs_scrub *sc = rx->sc;
struct xchk_xattr_buf *ab = sc->buf;
struct xfs_attr_sf_hdr *hdr;
struct xfs_attr_sf_entry *sfe;
struct xfs_attr_sf_entry *next;
struct xfs_ifork *ifp;
unsigned char *end;
int i;
int error = 0;
ifp = xfs_ifork_ptr(rx->sc->ip, XFS_ATTR_FORK);
hdr = ifp->if_data;
bitmap_zero(ab->usedmap, ifp->if_bytes);
end = (unsigned char *)ifp->if_data + ifp->if_bytes;
xchk_xattr_set_map(sc, ab->usedmap, 0, sizeof(*hdr));
sfe = xfs_attr_sf_firstentry(hdr);
if ((unsigned char *)sfe > end)
return 0;
for (i = 0; i < hdr->count; i++) {
if (xchk_should_terminate(sc, &error))
return error;
next = xfs_attr_sf_nextentry(sfe);
if ((unsigned char *)next > end)
break;
if (xchk_xattr_set_map(sc, ab->usedmap,
(char *)sfe - (char *)hdr,
sizeof(struct xfs_attr_sf_entry))) {
/*
* No conflicts with the sf entry; let's save this
* attribute.
*/
error = xrep_xattr_salvage_sf_attr(rx, hdr, sfe);
if (error)
return error;
}
sfe = next;
}
return 0;
}
/*
* Try to return a buffer of xattr data for a given physical extent.
*
* Because the buffer cache get function complains if it finds a buffer
* matching the block number but not matching the length, we must be careful to
* look for incore buffers (up to the maximum length of a remote value) that
* could be hiding anywhere in the physical range. If we find an incore
* buffer, we can pass that to the caller. Optionally, read a single block and
* pass that back.
*
* Note the subtlety that remote attr value blocks for which there is no incore
* buffer will be passed to the callback one block at a time. These buffers
* will not have any ops attached and must be staled to prevent aliasing with
* multiblock buffers once we drop the ILOCK.
*/
STATIC int
xrep_xattr_find_buf(
struct xfs_mount *mp,
xfs_fsblock_t fsbno,
xfs_extlen_t max_len,
bool can_read,
struct xfs_buf **bpp)
{
struct xrep_bufscan scan = {
.daddr = XFS_FSB_TO_DADDR(mp, fsbno),
.max_sectors = xrep_bufscan_max_sectors(mp, max_len),
.daddr_step = XFS_FSB_TO_BB(mp, 1),
};
struct xfs_buf *bp;
while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
*bpp = bp;
return 0;
}
if (!can_read) {
*bpp = NULL;
return 0;
}
return xfs_buf_read(mp->m_ddev_targp, scan.daddr, XFS_FSB_TO_BB(mp, 1),
XBF_TRYLOCK, bpp, NULL);
}
/*
* Deal with a buffer that we found during our walk of the attr fork.
*
* Attribute leaf and node blocks are simple -- they're a single block, so we
* can walk them one at a time and we never have to worry about discontiguous
* multiblock buffers like we do for directories.
*
* Unfortunately, remote attr blocks add a lot of complexity here. Each disk
* block is totally self contained, in the sense that the v5 header provides no
* indication that there could be more data in the next block. The incore
* buffers can span multiple blocks, though they never cross extent records.
* However, they don't necessarily start or end on an extent record boundary.
* Therefore, we need a special buffer find function to walk the buffer cache
* for us.
*
* The caller must hold the ILOCK on the file being repaired. We use
* XBF_TRYLOCK here to skip any locked buffer on the assumption that we don't
* own the block and don't want to hang the system on a potentially garbage
* buffer.
*/
STATIC int
xrep_xattr_recover_block(
struct xrep_xattr *rx,
xfs_dablk_t dabno,
xfs_fsblock_t fsbno,
xfs_extlen_t max_len,
xfs_extlen_t *actual_len)
{
struct xfs_da_blkinfo *info;
struct xfs_buf *bp;
int error;
error = xrep_xattr_find_buf(rx->sc->mp, fsbno, max_len, true, &bp);
if (error)
return error;
info = bp->b_addr;
*actual_len = XFS_BB_TO_FSB(rx->sc->mp, bp->b_length);
trace_xrep_xattr_recover_leafblock(rx->sc->ip, dabno,
be16_to_cpu(info->magic));
/*
* If the buffer has the right magic number for an attr leaf block and
* passes a structure check (we don't care about checksums), salvage
* as much as we can from the block. */
if (info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC) &&
xrep_buf_verify_struct(bp, &xfs_attr3_leaf_buf_ops) &&
xfs_attr3_leaf_header_check(bp, rx->sc->ip->i_ino) == NULL)
error = xrep_xattr_recover_leaf(rx, bp);
/*
* If the buffer didn't already have buffer ops set, it was read in by
* the _find_buf function and could very well be /part/ of a multiblock
* remote block. Mark it stale so that it doesn't hang around in
* memory to cause problems.
*/
if (bp->b_ops == NULL)
xfs_buf_stale(bp);
xfs_buf_relse(bp);
return error;
}
/* Insert one xattr key/value. */
STATIC int
xrep_xattr_insert_rec(
struct xrep_xattr *rx,
const struct xrep_xattr_key *key)
{
struct xfs_da_args args = {
.dp = rx->sc->tempip,
.attr_filter = key->flags,
.attr_flags = XATTR_CREATE,
.namelen = key->namelen,
.valuelen = key->valuelen,
.owner = rx->sc->ip->i_ino,
};
struct xchk_xattr_buf *ab = rx->sc->buf;
int error;
/*
* Grab pointers to the scrub buffer so that we can use them to insert
* attrs into the temp file.
*/
args.name = ab->name;
args.value = ab->value;
/*
* The attribute name is stored near the end of the in-core buffer,
* though we reserve one more byte to ensure null termination.
*/
ab->name[XATTR_NAME_MAX] = 0;
error = xfblob_load(rx->xattr_blobs, key->name_cookie, ab->name,
key->namelen);
if (error)
return error;
error = xfblob_free(rx->xattr_blobs, key->name_cookie);
if (error)
return error;
error = xfblob_load(rx->xattr_blobs, key->value_cookie, args.value,
key->valuelen);
if (error)
return error;
error = xfblob_free(rx->xattr_blobs, key->value_cookie);
if (error)
return error;
ab->name[key->namelen] = 0;
trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags, ab->name,
key->namelen, key->valuelen);
/*
* xfs_attr_set creates and commits its own transaction. If the attr
* already exists, we'll just drop it during the rebuild.
*/
error = xfs_attr_set(&args);
if (error == -EEXIST)
error = 0;
return error;
}
/*
* Periodically flush salvaged attributes to the temporary file. This is done
* to reduce the memory requirements of the xattr rebuild because files can
* contain millions of attributes.
*/
STATIC int
xrep_xattr_flush_stashed(
struct xrep_xattr *rx)
{
xfarray_idx_t array_cur;
int error;
/*
* Entering this function, the scrub context has a reference to the
* inode being repaired, the temporary file, and a scrub transaction
* that we use during xattr salvaging to avoid livelocking if there
* are cycles in the xattr structures. We hold ILOCK_EXCL on both
* the inode being repaired, though it is not ijoined to the scrub
* transaction.
*
* To constrain kernel memory use, we occasionally flush salvaged
* xattrs from the xfarray and xfblob structures into the temporary
* file in preparation for exchanging the xattr structures at the end.
* Updating the temporary file requires a transaction, so we commit the
* scrub transaction and drop the two ILOCKs so that xfs_attr_set can
* allocate whatever transaction it wants.
*
* We still hold IOLOCK_EXCL on the inode being repaired, which
* prevents anyone from modifying the damaged xattr data while we
* repair it.
*/
error = xrep_trans_commit(rx->sc);
if (error)
return error;
xchk_iunlock(rx->sc, XFS_ILOCK_EXCL);
/*
* Take the IOLOCK of the temporary file while we modify xattrs. This
* isn't strictly required because the temporary file is never revealed
* to userspace, but we follow the same locking rules. We still hold
* sc->ip's IOLOCK.
*/
error = xrep_tempfile_iolock_polled(rx->sc);
if (error)
return error;
/* Add all the salvaged attrs to the temporary file. */
foreach_xfarray_idx(rx->xattr_records, array_cur) {
struct xrep_xattr_key key;
error = xfarray_load(rx->xattr_records, array_cur, &key);
if (error)
return error;
error = xrep_xattr_insert_rec(rx, &key);
if (error)
return error;
}
/* Empty out both arrays now that we've added the entries. */
xfarray_truncate(rx->xattr_records);
xfblob_truncate(rx->xattr_blobs);
xrep_tempfile_iounlock(rx->sc);
/* Recreate the salvage transaction and relock the inode. */
error = xchk_trans_alloc(rx->sc, 0);
if (error)
return error;
xchk_ilock(rx->sc, XFS_ILOCK_EXCL);
return 0;
}
/* Decide if we've stashed too much xattr data in memory. */
static inline bool
xrep_xattr_want_flush_stashed(
struct xrep_xattr *rx)
{
unsigned long long bytes;
bytes = xfarray_bytes(rx->xattr_records) +
xfblob_bytes(rx->xattr_blobs);
return bytes > XREP_XATTR_MAX_STASH_BYTES;
}
/* Extract as many attribute keys and values as we can. */
STATIC int
xrep_xattr_recover(
struct xrep_xattr *rx)
{
struct xfs_bmbt_irec got;
struct xfs_scrub *sc = rx->sc;
struct xfs_da_geometry *geo = sc->mp->m_attr_geo;
xfs_fileoff_t offset;
xfs_extlen_t len;
xfs_dablk_t dabno;
int nmap;
int error;
/*
* Iterate each xattr leaf block in the attr fork to scan them for any
* attributes that we might salvage.
*/
for (offset = 0;
offset < XFS_MAX_FILEOFF;
offset = got.br_startoff + got.br_blockcount) {
nmap = 1;
error = xfs_bmapi_read(sc->ip, offset, XFS_MAX_FILEOFF - offset,
&got, &nmap, XFS_BMAPI_ATTRFORK);
if (error)
return error;
if (nmap != 1)
return -EFSCORRUPTED;
if (!xfs_bmap_is_written_extent(&got))
continue;
for (dabno = round_up(got.br_startoff, geo->fsbcount);
dabno < got.br_startoff + got.br_blockcount;
dabno += len) {
xfs_fileoff_t curr_offset = dabno - got.br_startoff;
xfs_extlen_t maxlen;
if (xchk_should_terminate(rx->sc, &error))
return error;
maxlen = min_t(xfs_filblks_t, INT_MAX,
got.br_blockcount - curr_offset);
error = xrep_xattr_recover_block(rx, dabno,
curr_offset + got.br_startblock,
maxlen, &len);
if (error)
return error;
if (xrep_xattr_want_flush_stashed(rx)) {
error = xrep_xattr_flush_stashed(rx);
if (error)
return error;
}
}
}
return 0;
}
/*
* Reset the extended attribute fork to a state where we can start re-adding
* the salvaged attributes.
*/
STATIC int
xrep_xattr_fork_remove(
struct xfs_scrub *sc,
struct xfs_inode *ip)
{
struct xfs_attr_sf_hdr *hdr;
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_ATTR_FORK);
/*
* If the data fork is in btree format, we can't change di_forkoff
* because we could run afoul of the rule that the data fork isn't
* supposed to be in btree format if there's enough space in the fork
* that it could have used extents format. Instead, reinitialize the
* attr fork to have a shortform structure with zero attributes.
*/
if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) {
ifp->if_format = XFS_DINODE_FMT_LOCAL;
hdr = xfs_idata_realloc(ip, (int)sizeof(*hdr) - ifp->if_bytes,
XFS_ATTR_FORK);
hdr->count = 0;
hdr->totsize = cpu_to_be16(sizeof(*hdr));
xfs_trans_log_inode(sc->tp, ip,
XFS_ILOG_CORE | XFS_ILOG_ADATA);
return 0;
}
/* If we still have attr fork extents, something's wrong. */
if (ifp->if_nextents != 0) {
struct xfs_iext_cursor icur;
struct xfs_bmbt_irec irec;
unsigned int i = 0;
xfs_emerg(sc->mp,
"inode 0x%llx attr fork still has %llu attr extents, format %d?!",
ip->i_ino, ifp->if_nextents, ifp->if_format);
for_each_xfs_iext(ifp, &icur, &irec) {
xfs_err(sc->mp,
"[%u]: startoff %llu startblock %llu blockcount %llu state %u",
i++, irec.br_startoff,
irec.br_startblock, irec.br_blockcount,
irec.br_state);
}
ASSERT(0);
return -EFSCORRUPTED;
}
xfs_attr_fork_remove(ip, sc->tp);
return 0;
}
/*
* Free all the attribute fork blocks of the file being repaired and delete the
* fork. The caller must ILOCK the scrub file and join it to the transaction.
* This function returns with the inode joined to a clean transaction.
*/
int
xrep_xattr_reset_fork(
struct xfs_scrub *sc)
{
int error;
trace_xrep_xattr_reset_fork(sc->ip, sc->ip);
/* Unmap all the attr blocks. */
if (xfs_ifork_has_extents(&sc->ip->i_af)) {
error = xrep_reap_ifork(sc, sc->ip, XFS_ATTR_FORK);
if (error)
return error;
}
error = xrep_xattr_fork_remove(sc, sc->ip);
if (error)
return error;
return xfs_trans_roll_inode(&sc->tp, sc->ip);
}
/*
* Free all the attribute fork blocks of the temporary file and delete the attr
* fork. The caller must ILOCK the tempfile and join it to the transaction.
* This function returns with the inode joined to a clean scrub transaction.
*/
STATIC int
xrep_xattr_reset_tempfile_fork(
struct xfs_scrub *sc)
{
int error;
trace_xrep_xattr_reset_fork(sc->ip, sc->tempip);
/*
* Wipe out the attr fork of the temp file so that regular inode
* inactivation won't trip over the corrupt attr fork.
*/
if (xfs_ifork_has_extents(&sc->tempip->i_af)) {
error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK);
if (error)
return error;
}
return xrep_xattr_fork_remove(sc, sc->tempip);
}
/*
* Find all the extended attributes for this inode by scraping them out of the
* attribute key blocks by hand, and flushing them into the temp file.
* When we're done, free the staging memory before exchanging the xattr
* structures to reduce memory usage.
*/
STATIC int
xrep_xattr_salvage_attributes(
struct xrep_xattr *rx)
{
struct xfs_inode *ip = rx->sc->ip;
int error;
/* Short format xattrs are easy! */
if (rx->sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL) {
error = xrep_xattr_recover_sf(rx);
if (error)
return error;
return xrep_xattr_flush_stashed(rx);
}
/*
* For non-inline xattr structures, the salvage function scans the
* buffer cache looking for potential attr leaf blocks. The scan
* requires the ability to lock any buffer found and runs independently
* of any transaction <-> buffer item <-> buffer linkage. Therefore,
* roll the transaction to ensure there are no buffers joined. We hold
* the ILOCK independently of the transaction.
*/
error = xfs_trans_roll(&rx->sc->tp);
if (error)
return error;
error = xfs_iread_extents(rx->sc->tp, ip, XFS_ATTR_FORK);
if (error)
return error;
error = xrep_xattr_recover(rx);
if (error)
return error;
return xrep_xattr_flush_stashed(rx);
}
/*
* Prepare both inodes' attribute forks for an exchange. Promote the tempfile
* from short format to leaf format, and if the file being repaired has a short
* format attr fork, turn it into an empty extent list.
*/
STATIC int
xrep_xattr_swap_prep(
struct xfs_scrub *sc,
bool temp_local,
bool ip_local)
{
int error;
/*
* If the tempfile's attributes are in shortform format, convert that
* to a single leaf extent so that we can use the atomic mapping
* exchange.
*/
if (temp_local) {
struct xfs_da_args args = {
.dp = sc->tempip,
.geo = sc->mp->m_attr_geo,
.whichfork = XFS_ATTR_FORK,
.trans = sc->tp,
.total = 1,
.owner = sc->ip->i_ino,
};
error = xfs_attr_shortform_to_leaf(&args);
if (error)
return error;
/*
* Roll the deferred log items to get us back to a clean
* transaction.
*/
error = xfs_defer_finish(&sc->tp);
if (error)
return error;
}
/*
* If the file being repaired had a shortform attribute fork, convert
* that to an empty extent list in preparation for the atomic mapping
* exchange.
*/
if (ip_local) {
struct xfs_ifork *ifp;
ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
xfs_idestroy_fork(ifp);
ifp->if_format = XFS_DINODE_FMT_EXTENTS;
ifp->if_nextents = 0;
ifp->if_bytes = 0;
ifp->if_data = NULL;
ifp->if_height = 0;
xfs_trans_log_inode(sc->tp, sc->ip,
XFS_ILOG_CORE | XFS_ILOG_ADATA);
}
return 0;
}
/* Exchange the temporary file's attribute fork with the one being repaired. */
STATIC int
xrep_xattr_swap(
struct xfs_scrub *sc,
struct xrep_tempexch *tx)
{
bool ip_local, temp_local;
int error = 0;
ip_local = sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL;
temp_local = sc->tempip->i_af.if_format == XFS_DINODE_FMT_LOCAL;
/*
* If the both files have a local format attr fork and the rebuilt
* xattr data would fit in the repaired file's attr fork, just copy
* the contents from the tempfile and declare ourselves done.
*/
if (ip_local && temp_local) {
int forkoff;
int newsize;
newsize = xfs_attr_sf_totsize(sc->tempip);
forkoff = xfs_attr_shortform_bytesfit(sc->ip, newsize);
if (forkoff > 0) {
sc->ip->i_forkoff = forkoff;
xrep_tempfile_copyout_local(sc, XFS_ATTR_FORK);
return 0;
}
}
/* Otherwise, make sure both attr forks are in block-mapping mode. */
error = xrep_xattr_swap_prep(sc, temp_local, ip_local);
if (error)
return error;
return xrep_tempexch_contents(sc, tx);
}
/*
* Exchange the new extended attribute data (which we created in the tempfile)
* with the file being repaired.
*/
STATIC int
xrep_xattr_rebuild_tree(
struct xrep_xattr *rx)
{
struct xfs_scrub *sc = rx->sc;
int error;
/*
* If we didn't find any attributes to salvage, repair the file by
* zapping its attr fork.
*/
if (rx->attrs_found == 0) {
xfs_trans_ijoin(sc->tp, sc->ip, 0);
error = xrep_xattr_reset_fork(sc);
if (error)
return error;
goto forget_acls;
}
trace_xrep_xattr_rebuild_tree(sc->ip, sc->tempip);
/*
* Commit the repair transaction and drop the ILOCKs so that we can use
* the atomic file content exchange helper functions to compute the
* correct resource reservations.
*
* We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent xattr
* modifications, but there's nothing to prevent userspace from reading
* the attributes until we're ready for the exchange operation. Reads
* will return -EIO without shutting down the fs, so we're ok with
* that.
*/
error = xrep_trans_commit(sc);
if (error)
return error;
xchk_iunlock(sc, XFS_ILOCK_EXCL);
/*
* Take the IOLOCK on the temporary file so that we can run xattr
* operations with the same locks held as we would for a normal file.
* We still hold sc->ip's IOLOCK.
*/
error = xrep_tempfile_iolock_polled(rx->sc);
if (error)
return error;
/* Allocate exchange transaction and lock both inodes. */
error = xrep_tempexch_trans_alloc(rx->sc, XFS_ATTR_FORK, &rx->tx);
if (error)
return error;
/*
* Exchange the blocks mapped by the tempfile's attr fork with the file
* being repaired. The old attr blocks will then be attached to the
* tempfile, so reap its attr fork.
*/
error = xrep_xattr_swap(sc, &rx->tx);
if (error)
return error;
error = xrep_xattr_reset_tempfile_fork(sc);
if (error)
return error;
/*
* Roll to get a transaction without any inodes joined to it. Then we
* can drop the tempfile's ILOCK and IOLOCK before doing more work on
* the scrub target file.
*/
error = xfs_trans_roll(&sc->tp);
if (error)
return error;
xrep_tempfile_iunlock(sc);
xrep_tempfile_iounlock(sc);
forget_acls:
/* Invalidate cached ACLs now that we've reloaded all the xattrs. */
xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_FILE);
xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_DEFAULT);
return 0;
}
/* Tear down all the incore scan stuff we created. */
STATIC void
xrep_xattr_teardown(
struct xrep_xattr *rx)
{
xfblob_destroy(rx->xattr_blobs);
xfarray_destroy(rx->xattr_records);
kfree(rx);
}
/* Set up the filesystem scan so we can regenerate extended attributes. */
STATIC int
xrep_xattr_setup_scan(
struct xfs_scrub *sc,
struct xrep_xattr **rxp)
{
struct xrep_xattr *rx;
char *descr;
int max_len;
int error;
rx = kzalloc(sizeof(struct xrep_xattr), XCHK_GFP_FLAGS);
if (!rx)
return -ENOMEM;
rx->sc = sc;
/*
* Allocate enough memory to handle loading local attr values from the
* xfblob data while flushing stashed attrs to the temporary file.
* We only realloc the buffer when salvaging remote attr values.
*/
max_len = xfs_attr_leaf_entsize_local_max(sc->mp->m_attr_geo->blksize);
error = xchk_setup_xattr_buf(rx->sc, max_len);
if (error == -ENOMEM)
error = -EDEADLOCK;
if (error)
goto out_rx;
/* Set up some staging for salvaged attribute keys and values */
descr = xchk_xfile_ino_descr(sc, "xattr keys");
error = xfarray_create(descr, 0, sizeof(struct xrep_xattr_key),
&rx->xattr_records);
kfree(descr);
if (error)
goto out_rx;
descr = xchk_xfile_ino_descr(sc, "xattr names");
error = xfblob_create(descr, &rx->xattr_blobs);
kfree(descr);
if (error)
goto out_keys;
*rxp = rx;
return 0;
out_keys:
xfarray_destroy(rx->xattr_records);
out_rx:
kfree(rx);
return error;
}
/*
* Repair the extended attribute metadata.
*
* XXX: Remote attribute value buffers encompass the entire (up to 64k) buffer.
* The buffer cache in XFS can't handle aliased multiblock buffers, so this
* might misbehave if the attr fork is crosslinked with other filesystem
* metadata.
*/
int
xrep_xattr(
struct xfs_scrub *sc)
{
struct xrep_xattr *rx = NULL;
int error;
if (!xfs_inode_hasattr(sc->ip))
return -ENOENT;
/* The rmapbt is required to reap the old attr fork. */
if (!xfs_has_rmapbt(sc->mp))
return -EOPNOTSUPP;
error = xrep_xattr_setup_scan(sc, &rx);
if (error)
return error;
ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
error = xrep_xattr_salvage_attributes(rx);
if (error)
goto out_scan;
/* Last chance to abort before we start committing fixes. */
if (xchk_should_terminate(sc, &error))
goto out_scan;
error = xrep_xattr_rebuild_tree(rx);
if (error)
goto out_scan;
out_scan:
xrep_xattr_teardown(rx);
return error;
}
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2018-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_SCRUB_ATTR_REPAIR_H__
#define __XFS_SCRUB_ATTR_REPAIR_H__
int xrep_xattr_reset_fork(struct xfs_scrub *sc);
#endif /* __XFS_SCRUB_ATTR_REPAIR_H__ */
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_SCRUB_DAB_BITMAP_H__
#define __XFS_SCRUB_DAB_BITMAP_H__
/* Bitmaps, but for type-checked for xfs_dablk_t */
struct xdab_bitmap {
struct xbitmap32 dabitmap;
};
static inline void xdab_bitmap_init(struct xdab_bitmap *bitmap)
{
xbitmap32_init(&bitmap->dabitmap);
}
static inline void xdab_bitmap_destroy(struct xdab_bitmap *bitmap)
{
xbitmap32_destroy(&bitmap->dabitmap);
}
static inline int xdab_bitmap_set(struct xdab_bitmap *bitmap,
xfs_dablk_t dabno, xfs_extlen_t len)
{
return xbitmap32_set(&bitmap->dabitmap, dabno, len);
}
static inline bool xdab_bitmap_test(struct xdab_bitmap *bitmap,
xfs_dablk_t dabno, xfs_extlen_t *len)
{
return xbitmap32_test(&bitmap->dabitmap, dabno, len);
}
#endif /* __XFS_SCRUB_DAB_BITMAP_H__ */
......@@ -78,6 +78,22 @@ xchk_da_set_corrupt(
__return_address);
}
/* Flag a da btree node in need of optimization. */
void
xchk_da_set_preen(
struct xchk_da_btree *ds,
int level)
{
struct xfs_scrub *sc = ds->sc;
sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN;
trace_xchk_fblock_preen(sc, ds->dargs.whichfork,
xfs_dir2_da_to_db(ds->dargs.geo,
ds->state->path.blk[level].blkno),
__return_address);
}
/* Find an entry at a certain level in a da btree. */
static struct xfs_da_node_entry *
xchk_da_btree_node_entry(
struct xchk_da_btree *ds,
......
......@@ -35,6 +35,9 @@ bool xchk_da_process_error(struct xchk_da_btree *ds, int level, int *error);
/* Check for da btree corruption. */
void xchk_da_set_corrupt(struct xchk_da_btree *ds, int level);
void xchk_da_set_preen(struct xchk_da_btree *ds, int level);
void xchk_da_set_preen(struct xchk_da_btree *ds, int level);
int xchk_da_btree_hash(struct xchk_da_btree *ds, int level, __be32 *hashp);
int xchk_da_btree(struct xfs_scrub *sc, int whichfork,
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
#include "xfs_attr_sf.h"
#include "xfs_trans.h"
#include "scrub/scrub.h"
#include "scrub/bitmap.h"
#include "scrub/dab_bitmap.h"
#include "scrub/listxattr.h"
/* Call a function for every entry in a shortform xattr structure. */
STATIC int
xchk_xattr_walk_sf(
struct xfs_scrub *sc,
struct xfs_inode *ip,
xchk_xattr_fn attr_fn,
void *priv)
{
struct xfs_attr_sf_hdr *hdr = ip->i_af.if_data;
struct xfs_attr_sf_entry *sfe;
unsigned int i;
int error;
sfe = xfs_attr_sf_firstentry(hdr);
for (i = 0; i < hdr->count; i++) {
error = attr_fn(sc, ip, sfe->flags, sfe->nameval, sfe->namelen,
&sfe->nameval[sfe->namelen], sfe->valuelen,
priv);
if (error)
return error;
sfe = xfs_attr_sf_nextentry(sfe);
}
return 0;
}
/* Call a function for every entry in this xattr leaf block. */
STATIC int
xchk_xattr_walk_leaf_entries(
struct xfs_scrub *sc,
struct xfs_inode *ip,
xchk_xattr_fn attr_fn,
struct xfs_buf *bp,
void *priv)
{
struct xfs_attr3_icleaf_hdr ichdr;
struct xfs_mount *mp = sc->mp;
struct xfs_attr_leafblock *leaf = bp->b_addr;
struct xfs_attr_leaf_entry *entry;
unsigned int i;
int error;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
entry = xfs_attr3_leaf_entryp(leaf);
for (i = 0; i < ichdr.count; entry++, i++) {
void *value;
unsigned char *name;
unsigned int namelen, valuelen;
if (entry->flags & XFS_ATTR_LOCAL) {
struct xfs_attr_leaf_name_local *name_loc;
name_loc = xfs_attr3_leaf_name_local(leaf, i);
name = name_loc->nameval;
namelen = name_loc->namelen;
value = &name_loc->nameval[name_loc->namelen];
valuelen = be16_to_cpu(name_loc->valuelen);
} else {
struct xfs_attr_leaf_name_remote *name_rmt;
name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
name = name_rmt->name;
namelen = name_rmt->namelen;
value = NULL;
valuelen = be32_to_cpu(name_rmt->valuelen);
}
error = attr_fn(sc, ip, entry->flags, name, namelen, value,
valuelen, priv);
if (error)
return error;
}
return 0;
}
/*
* Call a function for every entry in a leaf-format xattr structure. Avoid
* memory allocations for the loop detector since there's only one block.
*/
STATIC int
xchk_xattr_walk_leaf(
struct xfs_scrub *sc,
struct xfs_inode *ip,
xchk_xattr_fn attr_fn,
void *priv)
{
struct xfs_buf *leaf_bp;
int error;
error = xfs_attr3_leaf_read(sc->tp, ip, ip->i_ino, 0, &leaf_bp);
if (error)
return error;
error = xchk_xattr_walk_leaf_entries(sc, ip, attr_fn, leaf_bp, priv);
xfs_trans_brelse(sc->tp, leaf_bp);
return error;
}
/* Find the leftmost leaf in the xattr dabtree. */
STATIC int
xchk_xattr_find_leftmost_leaf(
struct xfs_scrub *sc,
struct xfs_inode *ip,
struct xdab_bitmap *seen_dablks,
struct xfs_buf **leaf_bpp)
{
struct xfs_da3_icnode_hdr nodehdr;
struct xfs_mount *mp = sc->mp;
struct xfs_trans *tp = sc->tp;
struct xfs_da_intnode *node;
struct xfs_da_node_entry *btree;
struct xfs_buf *bp;
xfs_failaddr_t fa;
xfs_dablk_t blkno = 0;
unsigned int expected_level = 0;
int error;
for (;;) {
xfs_extlen_t len = 1;
uint16_t magic;
/* Make sure we haven't seen this new block already. */
if (xdab_bitmap_test(seen_dablks, blkno, &len))
return -EFSCORRUPTED;
error = xfs_da3_node_read(tp, ip, blkno, &bp, XFS_ATTR_FORK);
if (error)
return error;
node = bp->b_addr;
magic = be16_to_cpu(node->hdr.info.magic);
if (magic == XFS_ATTR_LEAF_MAGIC ||
magic == XFS_ATTR3_LEAF_MAGIC)
break;
error = -EFSCORRUPTED;
if (magic != XFS_DA_NODE_MAGIC &&
magic != XFS_DA3_NODE_MAGIC)
goto out_buf;
fa = xfs_da3_node_header_check(bp, ip->i_ino);
if (fa)
goto out_buf;
xfs_da3_node_hdr_from_disk(mp, &nodehdr, node);
if (nodehdr.count == 0 || nodehdr.level >= XFS_DA_NODE_MAXDEPTH)
goto out_buf;
/* Check the level from the root node. */
if (blkno == 0)
expected_level = nodehdr.level - 1;
else if (expected_level != nodehdr.level)
goto out_buf;
else
expected_level--;
/* Remember that we've seen this node. */
error = xdab_bitmap_set(seen_dablks, blkno, 1);
if (error)
goto out_buf;
/* Find the next level towards the leaves of the dabtree. */
btree = nodehdr.btree;
blkno = be32_to_cpu(btree->before);
xfs_trans_brelse(tp, bp);
}
error = -EFSCORRUPTED;
fa = xfs_attr3_leaf_header_check(bp, ip->i_ino);
if (fa)
goto out_buf;
if (expected_level != 0)
goto out_buf;
/* Remember that we've seen this leaf. */
error = xdab_bitmap_set(seen_dablks, blkno, 1);
if (error)
goto out_buf;
*leaf_bpp = bp;
return 0;
out_buf:
xfs_trans_brelse(tp, bp);
return error;
}
/* Call a function for every entry in a node-format xattr structure. */
STATIC int
xchk_xattr_walk_node(
struct xfs_scrub *sc,
struct xfs_inode *ip,
xchk_xattr_fn attr_fn,
void *priv)
{
struct xfs_attr3_icleaf_hdr leafhdr;
struct xdab_bitmap seen_dablks;
struct xfs_mount *mp = sc->mp;
struct xfs_attr_leafblock *leaf;
struct xfs_buf *leaf_bp;
int error;
xdab_bitmap_init(&seen_dablks);
error = xchk_xattr_find_leftmost_leaf(sc, ip, &seen_dablks, &leaf_bp);
if (error)
goto out_bitmap;
for (;;) {
xfs_extlen_t len;
error = xchk_xattr_walk_leaf_entries(sc, ip, attr_fn, leaf_bp,
priv);
if (error)
goto out_leaf;
/* Find the right sibling of this leaf block. */
leaf = leaf_bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
if (leafhdr.forw == 0)
goto out_leaf;
xfs_trans_brelse(sc->tp, leaf_bp);
/* Make sure we haven't seen this new leaf already. */
len = 1;
if (xdab_bitmap_test(&seen_dablks, leafhdr.forw, &len)) {
error = -EFSCORRUPTED;
goto out_bitmap;
}
error = xfs_attr3_leaf_read(sc->tp, ip, ip->i_ino,
leafhdr.forw, &leaf_bp);
if (error)
goto out_bitmap;
/* Remember that we've seen this new leaf. */
error = xdab_bitmap_set(&seen_dablks, leafhdr.forw, 1);
if (error)
goto out_leaf;
}
out_leaf:
xfs_trans_brelse(sc->tp, leaf_bp);
out_bitmap:
xdab_bitmap_destroy(&seen_dablks);
return error;
}
/*
* Call a function for every extended attribute in a file.
*
* Callers must hold the ILOCK. No validation or cursor restarts allowed.
* Returns -EFSCORRUPTED on any problem, including loops in the dabtree.
*/
int
xchk_xattr_walk(
struct xfs_scrub *sc,
struct xfs_inode *ip,
xchk_xattr_fn attr_fn,
void *priv)
{
int error;
xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
if (!xfs_inode_hasattr(ip))
return 0;
if (ip->i_af.if_format == XFS_DINODE_FMT_LOCAL)
return xchk_xattr_walk_sf(sc, ip, attr_fn, priv);
/* attr functions require that the attr fork is loaded */
error = xfs_iread_extents(sc->tp, ip, XFS_ATTR_FORK);
if (error)
return error;
if (xfs_attr_is_leaf(ip))
return xchk_xattr_walk_leaf(sc, ip, attr_fn, priv);
return xchk_xattr_walk_node(sc, ip, attr_fn, priv);
}
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (c) 2022-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_SCRUB_LISTXATTR_H__
#define __XFS_SCRUB_LISTXATTR_H__
typedef int (*xchk_xattr_fn)(struct xfs_scrub *sc, struct xfs_inode *ip,
unsigned int attr_flags, const unsigned char *name,
unsigned int namelen, const void *value, unsigned int valuelen,
void *priv);
int xchk_xattr_walk(struct xfs_scrub *sc, struct xfs_inode *ip,
xchk_xattr_fn attr_fn, void *priv);
#endif /* __XFS_SCRUB_LISTXATTR_H__ */
......@@ -32,6 +32,9 @@
#include "xfs_reflink.h"
#include "xfs_health.h"
#include "xfs_buf_mem.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
......@@ -39,6 +42,7 @@
#include "scrub/bitmap.h"
#include "scrub/stats.h"
#include "scrub/xfile.h"
#include "scrub/attr_repair.h"
/*
* Attempt to repair some metadata, if the metadata is corrupt and userspace
......@@ -1136,6 +1140,17 @@ xrep_metadata_inode_forks(
return error;
}
/* Clear the attr forks since metadata shouldn't have that. */
if (xfs_inode_hasattr(sc->ip)) {
if (!dirty) {
dirty = true;
xfs_trans_ijoin(sc->tp, sc->ip, 0);
}
error = xrep_xattr_reset_fork(sc);
if (error)
return error;
}
/*
* If we modified the inode, roll the transaction but don't rejoin the
* inode to the new transaction because xrep_bmap_data can do that.
......@@ -1201,3 +1216,34 @@ xrep_trans_cancel_hook_dummy(
current->journal_info = *cookiep;
*cookiep = NULL;
}
/*
* See if this buffer can pass the given ->verify_struct() function.
*
* If the buffer already has ops attached and they're not the ones that were
* passed in, we reject the buffer. Otherwise, we perform the structure test
* (note that we do not check CRCs) and return the outcome of the test. The
* buffer ops and error state are left unchanged.
*/
bool
xrep_buf_verify_struct(
struct xfs_buf *bp,
const struct xfs_buf_ops *ops)
{
const struct xfs_buf_ops *old_ops = bp->b_ops;
xfs_failaddr_t fa;
int old_error;
if (old_ops) {
if (old_ops != ops)
return false;
}
old_error = bp->b_error;
bp->b_ops = ops;
fa = bp->b_ops->verify_struct(bp);
bp->b_ops = old_ops;
bp->b_error = old_error;
return fa == NULL;
}
......@@ -90,6 +90,7 @@ int xrep_bmap(struct xfs_scrub *sc, int whichfork, bool allow_unwritten);
int xrep_metadata_inode_forks(struct xfs_scrub *sc);
int xrep_setup_ag_rmapbt(struct xfs_scrub *sc);
int xrep_setup_ag_refcountbt(struct xfs_scrub *sc);
int xrep_setup_xattr(struct xfs_scrub *sc);
/* Repair setup functions */
int xrep_setup_ag_allocbt(struct xfs_scrub *sc);
......@@ -123,6 +124,7 @@ int xrep_bmap_attr(struct xfs_scrub *sc);
int xrep_bmap_cow(struct xfs_scrub *sc);
int xrep_nlinks(struct xfs_scrub *sc);
int xrep_fscounters(struct xfs_scrub *sc);
int xrep_xattr(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_RT
int xrep_rtbitmap(struct xfs_scrub *sc);
......@@ -147,6 +149,8 @@ int xrep_trans_alloc_hook_dummy(struct xfs_mount *mp, void **cookiep,
struct xfs_trans **tpp);
void xrep_trans_cancel_hook_dummy(void **cookiep, struct xfs_trans *tp);
bool xrep_buf_verify_struct(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
#else
#define xrep_ino_dqattach(sc) (0)
......@@ -190,6 +194,7 @@ xrep_setup_nothing(
#define xrep_setup_ag_allocbt xrep_setup_nothing
#define xrep_setup_ag_rmapbt xrep_setup_nothing
#define xrep_setup_ag_refcountbt xrep_setup_nothing
#define xrep_setup_xattr xrep_setup_nothing
#define xrep_setup_inode(sc, imap) ((void)0)
......@@ -215,6 +220,7 @@ xrep_setup_nothing(
#define xrep_nlinks xrep_notsupported
#define xrep_fscounters xrep_notsupported
#define xrep_rtsummary xrep_notsupported
#define xrep_xattr xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
......
......@@ -331,7 +331,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.type = ST_INODE,
.setup = xchk_setup_xattr,
.scrub = xchk_xattr,
.repair = xrep_notsupported,
.repair = xrep_xattr,
},
[XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */
.type = ST_INODE,
......
......@@ -14,6 +14,8 @@ struct xrep_tempexch {
int xrep_tempexch_enable(struct xfs_scrub *sc);
int xrep_tempexch_trans_reserve(struct xfs_scrub *sc, int whichfork,
struct xrep_tempexch *ti);
int xrep_tempexch_trans_alloc(struct xfs_scrub *sc, int whichfork,
struct xrep_tempexch *ti);
int xrep_tempexch_contents(struct xfs_scrub *sc, struct xrep_tempexch *ti);
#endif /* CONFIG_XFS_ONLINE_REPAIR */
......
......@@ -239,6 +239,28 @@ xrep_tempfile_iunlock(
sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL;
}
/*
* Begin the process of making changes to both the file being scrubbed and
* the temporary file by taking ILOCK_EXCL on both.
*/
void
xrep_tempfile_ilock_both(
struct xfs_scrub *sc)
{
xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL);
sc->ilock_flags |= XFS_ILOCK_EXCL;
sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
}
/* Unlock ILOCK_EXCL on both files. */
void
xrep_tempfile_iunlock_both(
struct xfs_scrub *sc)
{
xrep_tempfile_iunlock(sc);
xchk_iunlock(sc, XFS_ILOCK_EXCL);
}
/* Release the temporary file. */
void
xrep_tempfile_rele(
......@@ -514,6 +536,89 @@ xrep_tempexch_prep_request(
return 0;
}
/*
* Fill out the mapping exchange resource estimation structures in preparation
* for exchanging the contents of a metadata file that we've rebuilt in the
* temp file. Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files.
*/
STATIC int
xrep_tempexch_estimate(
struct xfs_scrub *sc,
struct xrep_tempexch *tx)
{
struct xfs_exchmaps_req *req = &tx->req;
struct xfs_ifork *ifp;
struct xfs_ifork *tifp;
int whichfork = xfs_exchmaps_reqfork(req);
int state = 0;
/*
* The exchmaps code only knows how to exchange file fork space
* mappings. Any fork data in local format must be promoted to a
* single block before the exchange can take place.
*/
ifp = xfs_ifork_ptr(sc->ip, whichfork);
if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
state |= 1;
tifp = xfs_ifork_ptr(sc->tempip, whichfork);
if (tifp->if_format == XFS_DINODE_FMT_LOCAL)
state |= 2;
switch (state) {
case 0:
/* Both files have mapped extents; use the regular estimate. */
return xfs_exchrange_estimate(req);
case 1:
/*
* The file being repaired is in local format, but the temp
* file has mapped extents. To perform the exchange, the file
* being repaired must have its shorform data converted to an
* ondisk block so that the forks will be in extents format.
* We need one resblk for the conversion; the number of
* exchanges is (worst case) the temporary file's extent count
* plus the block we converted.
*/
req->ip1_bcount = sc->tempip->i_nblocks;
req->ip2_bcount = 1;
req->nr_exchanges = 1 + tifp->if_nextents;
req->resblks = 1;
break;
case 2:
/*
* The temporary file is in local format, but the file being
* repaired has mapped extents. To perform the exchange, the
* temp file must have its shortform data converted to an
* ondisk block, and the fork changed to extents format. We
* need one resblk for the conversion; the number of exchanges
* is (worst case) the extent count of the file being repaired
* plus the block we converted.
*/
req->ip1_bcount = 1;
req->ip2_bcount = sc->ip->i_nblocks;
req->nr_exchanges = 1 + ifp->if_nextents;
req->resblks = 1;
break;
case 3:
/*
* Both forks are in local format. To perform the exchange,
* both files must have their shortform data converted to
* fsblocks, and both forks must be converted to extents
* format. We need two resblks for the two conversions, and
* the number of exchanges is 1 since there's only one block at
* fileoff 0. Presumably, the caller could not exchange the
* two inode fork areas directly.
*/
req->ip1_bcount = 1;
req->ip2_bcount = 1;
req->nr_exchanges = 1;
req->resblks = 2;
break;
}
return xfs_exchmaps_estimate_overhead(req);
}
/*
* Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip
* this if quota enforcement is disabled or if both inodes' dquots are the
......@@ -604,6 +709,55 @@ xrep_tempexch_trans_reserve(
return xrep_tempexch_reserve_quota(sc, tx);
}
/*
* Create a new transaction for a file contents exchange.
*
* This function fills out the mapping excahange request and resource
* estimation structures in preparation for exchanging the contents of a
* metadata file that has been rebuilt in the temp file. Next, it reserves
* space, takes ILOCK_EXCL of both inodes, joins them to the transaction and
* reserves quota for the transaction.
*
* The caller is responsible for dropping both ILOCKs when appropriate.
*/
int
xrep_tempexch_trans_alloc(
struct xfs_scrub *sc,
int whichfork,
struct xrep_tempexch *tx)
{
unsigned int flags = 0;
int error;
ASSERT(sc->tp == NULL);
error = xrep_tempexch_prep_request(sc, whichfork, tx);
if (error)
return error;
error = xrep_tempexch_estimate(sc, tx);
if (error)
return error;
if (xfs_has_lazysbcount(sc->mp))
flags |= XFS_TRANS_RES_FDBLKS;
error = xrep_tempexch_enable(sc);
if (error)
return error;
error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
tx->req.resblks, 0, flags, &sc->tp);
if (error)
return error;
sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
sc->ilock_flags |= XFS_ILOCK_EXCL;
xfs_exchrange_ilock(sc->tp, sc->ip, sc->tempip);
return xrep_tempexch_reserve_quota(sc, tx);
}
/*
* Exchange file mappings (and hence file contents) between the file being
* repaired and the temporary file. Returns with both inodes locked and joined
......@@ -637,3 +791,53 @@ xrep_tempexch_contents(
return 0;
}
/*
* Write local format data from one of the temporary file's forks into the same
* fork of file being repaired, and exchange the file sizes, if appropriate.
* Caller must ensure that the file being repaired has enough fork space to
* hold all the bytes.
*/
void
xrep_tempfile_copyout_local(
struct xfs_scrub *sc,
int whichfork)
{
struct xfs_ifork *temp_ifp;
struct xfs_ifork *ifp;
unsigned int ilog_flags = XFS_ILOG_CORE;
temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork);
ifp = xfs_ifork_ptr(sc->ip, whichfork);
ASSERT(temp_ifp != NULL);
ASSERT(ifp != NULL);
ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL);
ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);
switch (whichfork) {
case XFS_DATA_FORK:
ASSERT(sc->tempip->i_disk_size <=
xfs_inode_data_fork_size(sc->ip));
break;
case XFS_ATTR_FORK:
ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff);
break;
default:
ASSERT(0);
return;
}
/* Recreate @sc->ip's incore fork (ifp) with data from temp_ifp. */
xfs_idestroy_fork(ifp);
xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_data,
temp_ifp->if_bytes);
if (whichfork == XFS_DATA_FORK) {
i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
sc->ip->i_disk_size = sc->tempip->i_disk_size;
}
ilog_flags |= xfs_ilog_fdata(whichfork);
xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags);
}
......@@ -17,6 +17,8 @@ void xrep_tempfile_iounlock(struct xfs_scrub *sc);
void xrep_tempfile_ilock(struct xfs_scrub *sc);
bool xrep_tempfile_ilock_nowait(struct xfs_scrub *sc);
void xrep_tempfile_iunlock(struct xfs_scrub *sc);
void xrep_tempfile_iunlock_both(struct xfs_scrub *sc);
void xrep_tempfile_ilock_both(struct xfs_scrub *sc);
int xrep_tempfile_prealloc(struct xfs_scrub *sc, xfs_fileoff_t off,
xfs_filblks_t len);
......@@ -32,6 +34,7 @@ int xrep_tempfile_copyin(struct xfs_scrub *sc, xfs_fileoff_t off,
int xrep_tempfile_set_isize(struct xfs_scrub *sc, unsigned long long isize);
int xrep_tempfile_roll_trans(struct xfs_scrub *sc);
void xrep_tempfile_copyout_local(struct xfs_scrub *sc, int whichfork);
#else
static inline void xrep_tempfile_iolock_both(struct xfs_scrub *sc)
{
......
......@@ -365,6 +365,7 @@ DEFINE_EVENT(xchk_fblock_error_class, name, \
DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_error);
DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_warning);
DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_preen);
#ifdef CONFIG_XFS_QUOTA
DECLARE_EVENT_CLASS(xchk_dqiter_class,
......@@ -948,6 +949,7 @@ DEFINE_XFILE_EVENT(xfile_store);
DEFINE_XFILE_EVENT(xfile_seek_data);
DEFINE_XFILE_EVENT(xfile_get_folio);
DEFINE_XFILE_EVENT(xfile_put_folio);
DEFINE_XFILE_EVENT(xfile_discard);
TRACE_EVENT(xfarray_create,
TP_PROTO(struct xfarray *xfa, unsigned long long required_capacity),
......@@ -2415,6 +2417,89 @@ TRACE_EVENT(xreap_bmapi_binval_scan,
__entry->scan_blocks)
);
TRACE_EVENT(xrep_xattr_recover_leafblock,
TP_PROTO(struct xfs_inode *ip, xfs_dablk_t dabno, uint16_t magic),
TP_ARGS(ip, dabno, magic),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_dablk_t, dabno)
__field(uint16_t, magic)
),
TP_fast_assign(
__entry->dev = ip->i_mount->m_super->s_dev;
__entry->ino = ip->i_ino;
__entry->dabno = dabno;
__entry->magic = magic;
),
TP_printk("dev %d:%d ino 0x%llx dablk 0x%x magic 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->dabno,
__entry->magic)
);
DECLARE_EVENT_CLASS(xrep_xattr_salvage_class,
TP_PROTO(struct xfs_inode *ip, unsigned int flags, char *name,
unsigned int namelen, unsigned int valuelen),
TP_ARGS(ip, flags, name, namelen, valuelen),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(unsigned int, flags)
__field(unsigned int, namelen)
__dynamic_array(char, name, namelen)
__field(unsigned int, valuelen)
),
TP_fast_assign(
__entry->dev = ip->i_mount->m_super->s_dev;
__entry->ino = ip->i_ino;
__entry->flags = flags;
__entry->namelen = namelen;
memcpy(__get_str(name), name, namelen);
__entry->valuelen = valuelen;
),
TP_printk("dev %d:%d ino 0x%llx flags %s name '%.*s' valuelen 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__print_flags(__entry->flags, "|", XFS_ATTR_NAMESPACE_STR),
__entry->namelen,
__get_str(name),
__entry->valuelen)
);
#define DEFINE_XREP_XATTR_SALVAGE_EVENT(name) \
DEFINE_EVENT(xrep_xattr_salvage_class, name, \
TP_PROTO(struct xfs_inode *ip, unsigned int flags, char *name, \
unsigned int namelen, unsigned int valuelen), \
TP_ARGS(ip, flags, name, namelen, valuelen))
DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_salvage_rec);
DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_insert_rec);
TRACE_EVENT(xrep_xattr_class,
TP_PROTO(struct xfs_inode *ip, struct xfs_inode *arg_ip),
TP_ARGS(ip, arg_ip),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_ino_t, src_ino)
),
TP_fast_assign(
__entry->dev = ip->i_mount->m_super->s_dev;
__entry->ino = ip->i_ino;
__entry->src_ino = arg_ip->i_ino;
),
TP_printk("dev %d:%d ino 0x%llx src 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->src_ino)
)
#define DEFINE_XREP_XATTR_EVENT(name) \
DEFINE_EVENT(xrep_xattr_class, name, \
TP_PROTO(struct xfs_inode *ip, struct xfs_inode *arg_ip), \
TP_ARGS(ip, arg_ip))
DEFINE_XREP_XATTR_EVENT(xrep_xattr_rebuild_tree);
DEFINE_XREP_XATTR_EVENT(xrep_xattr_reset_fork);
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */
......
......@@ -1051,3 +1051,20 @@ xfarray_sort(
kvfree(si);
return error;
}
/* How many bytes is this array consuming? */
unsigned long long
xfarray_bytes(
struct xfarray *array)
{
return xfile_bytes(array->xfile);
}
/* Empty the entire array. */
void
xfarray_truncate(
struct xfarray *array)
{
xfile_discard(array->xfile, 0, MAX_LFS_FILESIZE);
array->nr = 0;
}
......@@ -44,6 +44,8 @@ int xfarray_unset(struct xfarray *array, xfarray_idx_t idx);
int xfarray_store(struct xfarray *array, xfarray_idx_t idx, const void *ptr);
int xfarray_store_anywhere(struct xfarray *array, const void *ptr);
bool xfarray_element_is_null(struct xfarray *array, const void *ptr);
void xfarray_truncate(struct xfarray *array);
unsigned long long xfarray_bytes(struct xfarray *array);
/*
* Load an array element, but zero the buffer if there's no data because we
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "scrub/scrub.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
#include "scrub/xfblob.h"
/*
* XFS Blob Storage
* ================
* Stores and retrieves blobs using an xfile. Objects are appended to the file
* and the offset is returned as a magic cookie for retrieval.
*/
#define XB_KEY_MAGIC 0xABAADDAD
struct xb_key {
uint32_t xb_magic; /* XB_KEY_MAGIC */
uint32_t xb_size; /* size of the blob, in bytes */
loff_t xb_offset; /* byte offset of this key */
/* blob comes after here */
} __packed;
/* Initialize a blob storage object. */
int
xfblob_create(
const char *description,
struct xfblob **blobp)
{
struct xfblob *blob;
struct xfile *xfile;
int error;
error = xfile_create(description, 0, &xfile);
if (error)
return error;
blob = kmalloc(sizeof(struct xfblob), XCHK_GFP_FLAGS);
if (!blob) {
error = -ENOMEM;
goto out_xfile;
}
blob->xfile = xfile;
blob->last_offset = PAGE_SIZE;
*blobp = blob;
return 0;
out_xfile:
xfile_destroy(xfile);
return error;
}
/* Destroy a blob storage object. */
void
xfblob_destroy(
struct xfblob *blob)
{
xfile_destroy(blob->xfile);
kfree(blob);
}
/* Retrieve a blob. */
int
xfblob_load(
struct xfblob *blob,
xfblob_cookie cookie,
void *ptr,
uint32_t size)
{
struct xb_key key;
int error;
error = xfile_load(blob->xfile, &key, sizeof(key), cookie);
if (error)
return error;
if (key.xb_magic != XB_KEY_MAGIC || key.xb_offset != cookie) {
ASSERT(0);
return -ENODATA;
}
if (size < key.xb_size) {
ASSERT(0);
return -EFBIG;
}
return xfile_load(blob->xfile, ptr, key.xb_size,
cookie + sizeof(key));
}
/* Store a blob. */
int
xfblob_store(
struct xfblob *blob,
xfblob_cookie *cookie,
const void *ptr,
uint32_t size)
{
struct xb_key key = {
.xb_offset = blob->last_offset,
.xb_magic = XB_KEY_MAGIC,
.xb_size = size,
};
loff_t pos = blob->last_offset;
int error;
error = xfile_store(blob->xfile, &key, sizeof(key), pos);
if (error)
return error;
pos += sizeof(key);
error = xfile_store(blob->xfile, ptr, size, pos);
if (error)
goto out_err;
*cookie = blob->last_offset;
blob->last_offset += sizeof(key) + size;
return 0;
out_err:
xfile_discard(blob->xfile, blob->last_offset, sizeof(key));
return error;
}
/* Free a blob. */
int
xfblob_free(
struct xfblob *blob,
xfblob_cookie cookie)
{
struct xb_key key;
int error;
error = xfile_load(blob->xfile, &key, sizeof(key), cookie);
if (error)
return error;
if (key.xb_magic != XB_KEY_MAGIC || key.xb_offset != cookie) {
ASSERT(0);
return -ENODATA;
}
xfile_discard(blob->xfile, cookie, sizeof(key) + key.xb_size);
return 0;
}
/* How many bytes is this blob storage object consuming? */
unsigned long long
xfblob_bytes(
struct xfblob *blob)
{
return xfile_bytes(blob->xfile);
}
/* Drop all the blobs. */
void
xfblob_truncate(
struct xfblob *blob)
{
xfile_discard(blob->xfile, PAGE_SIZE, MAX_LFS_FILESIZE - PAGE_SIZE);
blob->last_offset = PAGE_SIZE;
}
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_SCRUB_XFBLOB_H__
#define __XFS_SCRUB_XFBLOB_H__
struct xfblob {
struct xfile *xfile;
loff_t last_offset;
};
typedef loff_t xfblob_cookie;
int xfblob_create(const char *descr, struct xfblob **blobp);
void xfblob_destroy(struct xfblob *blob);
int xfblob_load(struct xfblob *blob, xfblob_cookie cookie, void *ptr,
uint32_t size);
int xfblob_store(struct xfblob *blob, xfblob_cookie *cookie, const void *ptr,
uint32_t size);
int xfblob_free(struct xfblob *blob, xfblob_cookie cookie);
unsigned long long xfblob_bytes(struct xfblob *blob);
void xfblob_truncate(struct xfblob *blob);
#endif /* __XFS_SCRUB_XFBLOB_H__ */
......@@ -310,3 +310,15 @@ xfile_put_folio(
folio_unlock(folio);
folio_put(folio);
}
/* Discard the page cache that's backing a range of the xfile. */
void
xfile_discard(
struct xfile *xf,
loff_t pos,
u64 count)
{
trace_xfile_discard(xf, pos, count);
shmem_truncate_range(file_inode(xf->file), pos, pos + count - 1);
}
......@@ -17,6 +17,7 @@ int xfile_load(struct xfile *xf, void *buf, size_t count, loff_t pos);
int xfile_store(struct xfile *xf, const void *buf, size_t count,
loff_t pos);
void xfile_discard(struct xfile *xf, loff_t pos, u64 count);
loff_t xfile_seek_data(struct xfile *xf, loff_t pos);
#define XFILE_MAX_FOLIO_SIZE (PAGE_SIZE << MAX_PAGECACHE_ORDER)
......@@ -26,4 +27,9 @@ struct folio *xfile_get_folio(struct xfile *xf, loff_t offset, size_t len,
unsigned int flags);
void xfile_put_folio(struct xfile *xf, struct folio *folio);
static inline unsigned long long xfile_bytes(struct xfile *xf)
{
return file_inode(xf->file)->i_blocks << SECTOR_SHIFT;
}
#endif /* __XFS_SCRUB_XFILE_H__ */
......@@ -494,6 +494,9 @@ _xfs_buf_obj_cmp(
* it stale has not yet committed. i.e. we are
* reallocating a busy extent. Skip this buffer and
* continue searching for an exact match.
*
* Note: If we're scanning for incore buffers to stale, don't
* complain if we find non-stale buffers.
*/
if (!(map->bm_flags & XBM_LIVESCAN))
ASSERT(bp->b_flags & XBF_STALE);
......
......@@ -31,6 +31,8 @@
* pos: file offset, in bytes
* bytecount: number of bytes
*
* dablk: directory or xattr block offset, in filesystem blocks
*
* disize: ondisk file size, in bytes
* isize: incore file size, in bytes
*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment