Commit f697c2cc authored by Dave Chinner's avatar Dave Chinner Committed by Dave Chinner

Merge tag 'scrub-dir-iget-fixes-6.4_2023-04-12' of...

Merge tag 'scrub-dir-iget-fixes-6.4_2023-04-12' of git://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into guilt/xfs-for-next

xfs: fix iget usage in directory scrub [v24.5]

In this series, we fix some problems with how the directory scrubber
grabs child inodes.  First, we want to reduce EDEADLOCK returns by
replacing fixed-iteration loops with interruptible trylock loops.
Second, we add UNTRUSTED to the child iget call so that we can detect a
dirent that points to an unallocated inode.  Third, we fix a bug where
we weren't checking the inode pointed to by dotdot entries at all.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarDave Chinner <david@fromorbit.com>
parents b1bdab25 6bb9209c
...@@ -158,6 +158,7 @@ xfs-y += $(addprefix scrub/, \ ...@@ -158,6 +158,7 @@ xfs-y += $(addprefix scrub/, \
ialloc.o \ ialloc.o \
inode.o \ inode.o \
parent.o \ parent.o \
readdir.o \
refcount.o \ refcount.o \
rmap.o \ rmap.o \
scrub.o \ scrub.o \
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "scrub/scrub.h" #include "scrub/scrub.h"
#include "scrub/common.h" #include "scrub/common.h"
#include "scrub/dabtree.h" #include "scrub/dabtree.h"
#include "scrub/readdir.h"
/* Set us up to scrub directories. */ /* Set us up to scrub directories. */
int int
...@@ -31,168 +32,120 @@ xchk_setup_directory( ...@@ -31,168 +32,120 @@ xchk_setup_directory(
/* Scrub a directory entry. */ /* Scrub a directory entry. */
struct xchk_dir_ctx { /* Check that an inode's mode matches a given XFS_DIR3_FT_* type. */
/* VFS fill-directory iterator */ STATIC void
struct dir_context dir_iter;
struct xfs_scrub *sc;
};
/* Check that an inode's mode matches a given DT_ type. */
STATIC int
xchk_dir_check_ftype( xchk_dir_check_ftype(
struct xchk_dir_ctx *sdc, struct xfs_scrub *sc,
xfs_fileoff_t offset, xfs_fileoff_t offset,
xfs_ino_t inum, struct xfs_inode *ip,
int dtype) int ftype)
{ {
struct xfs_mount *mp = sdc->sc->mp; struct xfs_mount *mp = sc->mp;
struct xfs_inode *ip;
int ino_dtype;
int error = 0;
if (!xfs_has_ftype(mp)) { if (!xfs_has_ftype(mp)) {
if (dtype != DT_UNKNOWN && dtype != DT_DIR) if (ftype != XFS_DIR3_FT_UNKNOWN && ftype != XFS_DIR3_FT_DIR)
xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
offset); return;
goto out;
}
/*
* Grab the inode pointed to by the dirent. We release the
* inode before we cancel the scrub transaction. Since we're
* don't know a priori that releasing the inode won't trigger
* eofblocks cleanup (which allocates what would be a nested
* transaction), we can't use DONTCACHE here because DONTCACHE
* inodes can trigger immediate inactive cleanup of the inode.
*
* If _iget returns -EINVAL or -ENOENT then the child inode number is
* garbage and the directory is corrupt. If the _iget returns
* -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a
* cross referencing error. Any other error is an operational error.
*/
error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip);
if (error == -EINVAL || error == -ENOENT) {
error = -EFSCORRUPTED;
xchk_fblock_process_error(sdc->sc, XFS_DATA_FORK, 0, &error);
goto out;
} }
if (!xchk_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset,
&error))
goto out;
/* Convert mode to the DT_* values that dir_emit uses. */ if (xfs_mode_to_ftype(VFS_I(ip)->i_mode) != ftype)
ino_dtype = xfs_dir3_get_dtype(mp, xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
xfs_mode_to_ftype(VFS_I(ip)->i_mode));
if (ino_dtype != dtype)
xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
xfs_irele(ip);
out:
return error;
} }
/* /*
* Scrub a single directory entry. * Scrub a single directory entry.
* *
* We use the VFS directory iterator (i.e. readdir) to call this * Check the inode number to make sure it's sane, then we check that we can
* function for every directory entry in a directory. Once we're here, * look up this filename. Finally, we check the ftype.
* we check the inode number to make sure it's sane, then we check that
* we can look up this filename. Finally, we check the ftype.
*/ */
STATIC bool STATIC int
xchk_dir_actor( xchk_dir_actor(
struct dir_context *dir_iter, struct xfs_scrub *sc,
const char *name, struct xfs_inode *dp,
int namelen, xfs_dir2_dataptr_t dapos,
loff_t pos, const struct xfs_name *name,
u64 ino, xfs_ino_t ino,
unsigned type) void *priv)
{ {
struct xfs_mount *mp; struct xfs_mount *mp = dp->i_mount;
struct xfs_inode *ip; struct xfs_inode *ip;
struct xchk_dir_ctx *sdc;
struct xfs_name xname;
xfs_ino_t lookup_ino; xfs_ino_t lookup_ino;
xfs_dablk_t offset; xfs_dablk_t offset;
bool checked_ftype = false;
int error = 0; int error = 0;
sdc = container_of(dir_iter, struct xchk_dir_ctx, dir_iter);
ip = sdc->sc->ip;
mp = ip->i_mount;
offset = xfs_dir2_db_to_da(mp->m_dir_geo, offset = xfs_dir2_db_to_da(mp->m_dir_geo,
xfs_dir2_dataptr_to_db(mp->m_dir_geo, pos)); xfs_dir2_dataptr_to_db(mp->m_dir_geo, dapos));
if (xchk_should_terminate(sdc->sc, &error)) if (xchk_should_terminate(sc, &error))
return !error; return error;
/* Does this inode number make sense? */ /* Does this inode number make sense? */
if (!xfs_verify_dir_ino(mp, ino)) { if (!xfs_verify_dir_ino(mp, ino)) {
xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
goto out; return -ECANCELED;
} }
/* Does this name make sense? */ /* Does this name make sense? */
if (!xfs_dir2_namecheck(name, namelen)) { if (!xfs_dir2_namecheck(name->name, name->len)) {
xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
goto out; return -ECANCELED;
} }
if (!strncmp(".", name, namelen)) { if (!strncmp(".", name->name, name->len)) {
/* If this is "." then check that the inum matches the dir. */ /* If this is "." then check that the inum matches the dir. */
if (xfs_has_ftype(mp) && type != DT_DIR) if (ino != dp->i_ino)
xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
offset); } else if (!strncmp("..", name->name, name->len)) {
checked_ftype = true;
if (ino != ip->i_ino)
xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
offset);
} else if (!strncmp("..", name, namelen)) {
/* /*
* If this is ".." in the root inode, check that the inum * If this is ".." in the root inode, check that the inum
* matches this dir. * matches this dir.
*/ */
if (xfs_has_ftype(mp) && type != DT_DIR) if (dp->i_ino == mp->m_sb.sb_rootino && ino != dp->i_ino)
xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
offset);
checked_ftype = true;
if (ip->i_ino == mp->m_sb.sb_rootino && ino != ip->i_ino)
xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
offset);
} }
/* Verify that we can look up this name by hash. */ /* Verify that we can look up this name by hash. */
xname.name = name; error = xchk_dir_lookup(sc, dp, name, &lookup_ino);
xname.len = namelen;
xname.type = XFS_DIR3_FT_UNKNOWN;
error = xfs_dir_lookup(sdc->sc->tp, ip, &xname, &lookup_ino, NULL);
/* ENOENT means the hash lookup failed and the dir is corrupt */ /* ENOENT means the hash lookup failed and the dir is corrupt */
if (error == -ENOENT) if (error == -ENOENT)
error = -EFSCORRUPTED; error = -EFSCORRUPTED;
if (!xchk_fblock_process_error(sdc->sc, XFS_DATA_FORK, offset, if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, offset, &error))
&error))
goto out; goto out;
if (lookup_ino != ino) { if (lookup_ino != ino) {
xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
goto out; return -ECANCELED;
} }
/* Verify the file type. This function absorbs error codes. */ /*
if (!checked_ftype) { * Grab the inode pointed to by the dirent. We release the
error = xchk_dir_check_ftype(sdc, offset, lookup_ino, type); * inode before we cancel the scrub transaction. Since we're
if (error) * don't know a priori that releasing the inode won't trigger
* eofblocks cleanup (which allocates what would be a nested
* transaction), we can't use DONTCACHE here because DONTCACHE
* inodes can trigger immediate inactive cleanup of the inode.
* Use UNTRUSTED here to check the allocation status of the inode in
* the inode btrees.
*
* If _iget returns -EINVAL or -ENOENT then the child inode number is
* garbage and the directory is corrupt. If the _iget returns
* -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a
* cross referencing error. Any other error is an operational error.
*/
error = xfs_iget(mp, sc->tp, ino, XFS_IGET_UNTRUSTED, 0, &ip);
if (error == -EINVAL || error == -ENOENT) {
error = -EFSCORRUPTED;
xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
goto out; goto out;
} }
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, offset, &error))
goto out;
xchk_dir_check_ftype(sc, offset, ip, name->type);
xfs_irele(ip);
out: out:
/* if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
* A negative error code returned here is supposed to cause the return -ECANCELED;
* dir_emit caller (xfs_readdir) to abort the directory iteration return error;
* and return zero to xchk_directory.
*/
if (error == 0 && sdc->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
return false;
return !error;
} }
/* Scrub a directory btree record. */ /* Scrub a directory btree record. */
...@@ -201,6 +154,7 @@ xchk_dir_rec( ...@@ -201,6 +154,7 @@ xchk_dir_rec(
struct xchk_da_btree *ds, struct xchk_da_btree *ds,
int level) int level)
{ {
struct xfs_name dname = { };
struct xfs_da_state_blk *blk = &ds->state->path.blk[level]; struct xfs_da_state_blk *blk = &ds->state->path.blk[level];
struct xfs_mount *mp = ds->state->mp; struct xfs_mount *mp = ds->state->mp;
struct xfs_inode *dp = ds->dargs.dp; struct xfs_inode *dp = ds->dargs.dp;
...@@ -297,7 +251,11 @@ xchk_dir_rec( ...@@ -297,7 +251,11 @@ xchk_dir_rec(
xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
goto out_relse; goto out_relse;
} }
calc_hash = xfs_da_hashname(dent->name, dent->namelen);
/* Does the directory hash match? */
dname.name = dent->name;
dname.len = dent->namelen;
calc_hash = xfs_dir2_hashname(mp, &dname);
if (calc_hash != hash) if (calc_hash != hash)
xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno); xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
...@@ -803,14 +761,7 @@ int ...@@ -803,14 +761,7 @@ int
xchk_directory( xchk_directory(
struct xfs_scrub *sc) struct xfs_scrub *sc)
{ {
struct xchk_dir_ctx sdc = { int error;
.dir_iter.actor = xchk_dir_actor,
.dir_iter.pos = 0,
.sc = sc,
};
size_t bufsize;
loff_t oldpos;
int error = 0;
if (!S_ISDIR(VFS_I(sc->ip)->i_mode)) if (!S_ISDIR(VFS_I(sc->ip)->i_mode))
return -ENOENT; return -ENOENT;
...@@ -818,7 +769,7 @@ xchk_directory( ...@@ -818,7 +769,7 @@ xchk_directory(
/* Plausible size? */ /* Plausible size? */
if (sc->ip->i_disk_size < xfs_dir2_sf_hdr_size(0)) { if (sc->ip->i_disk_size < xfs_dir2_sf_hdr_size(0)) {
xchk_ino_set_corrupt(sc, sc->ip->i_ino); xchk_ino_set_corrupt(sc, sc->ip->i_ino);
goto out; return 0;
} }
/* Check directory tree structure */ /* Check directory tree structure */
...@@ -827,7 +778,7 @@ xchk_directory( ...@@ -827,7 +778,7 @@ xchk_directory(
return error; return error;
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
return error; return 0;
/* Check the freespace. */ /* Check the freespace. */
error = xchk_directory_blocks(sc); error = xchk_directory_blocks(sc);
...@@ -835,44 +786,11 @@ xchk_directory( ...@@ -835,44 +786,11 @@ xchk_directory(
return error; return error;
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
return error; return 0;
/*
* Check that every dirent we see can also be looked up by hash.
* Userspace usually asks for a 32k buffer, so we will too.
*/
bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE,
sc->ip->i_disk_size);
/*
* Look up every name in this directory by hash.
*
* Use the xfs_readdir function to call xchk_dir_actor on
* every directory entry in this directory. In _actor, we check
* the name, inode number, and ftype (if applicable) of the
* entry. xfs_readdir uses the VFS filldir functions to provide
* iteration context.
*
* The VFS grabs a read or write lock via i_rwsem before it reads
* or writes to a directory. If we've gotten this far we've
* already obtained IOLOCK_EXCL, which (since 4.10) is the same as
* getting a write lock on i_rwsem. Therefore, it is safe for us
* to drop the ILOCK here in order to reuse the _readdir and
* _dir_lookup routines, which do their own ILOCK locking.
*/
oldpos = 0;
sc->ilock_flags &= ~XFS_ILOCK_EXCL;
xfs_iunlock(sc->ip, XFS_ILOCK_EXCL);
while (true) {
error = xfs_readdir(sc->tp, sc->ip, &sdc.dir_iter, bufsize);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0,
&error))
goto out;
if (oldpos == sdc.dir_iter.pos)
break;
oldpos = sdc.dir_iter.pos;
}
out: /* Look up every name in this directory by hash. */
error = xchk_dir_walk(sc, sc->ip, xchk_dir_actor, NULL);
if (error == -ECANCELED)
error = 0;
return error; return error;
} }
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include "xfs_dir2_priv.h" #include "xfs_dir2_priv.h"
#include "scrub/scrub.h" #include "scrub/scrub.h"
#include "scrub/common.h" #include "scrub/common.h"
#include "scrub/readdir.h"
/* Set us up to scrub parents. */ /* Set us up to scrub parents. */
int int
...@@ -30,39 +31,36 @@ xchk_setup_parent( ...@@ -30,39 +31,36 @@ xchk_setup_parent(
/* Look for an entry in a parent pointing to this inode. */ /* Look for an entry in a parent pointing to this inode. */
struct xchk_parent_ctx { struct xchk_parent_ctx {
struct dir_context dc;
struct xfs_scrub *sc; struct xfs_scrub *sc;
xfs_ino_t ino;
xfs_nlink_t nlink; xfs_nlink_t nlink;
bool cancelled;
}; };
/* Look for a single entry in a directory pointing to an inode. */ /* Look for a single entry in a directory pointing to an inode. */
STATIC bool STATIC int
xchk_parent_actor( xchk_parent_actor(
struct dir_context *dc, struct xfs_scrub *sc,
const char *name, struct xfs_inode *dp,
int namelen, xfs_dir2_dataptr_t dapos,
loff_t pos, const struct xfs_name *name,
u64 ino, xfs_ino_t ino,
unsigned type) void *priv)
{ {
struct xchk_parent_ctx *spc; struct xchk_parent_ctx *spc = priv;
int error = 0; int error = 0;
spc = container_of(dc, struct xchk_parent_ctx, dc); /* Does this name make sense? */
if (spc->ino == ino) if (!xfs_dir2_namecheck(name->name, name->len))
error = -EFSCORRUPTED;
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
return error;
if (sc->ip->i_ino == ino)
spc->nlink++; spc->nlink++;
/*
* If we're facing a fatal signal, bail out. Store the cancellation
* status separately because the VFS readdir code squashes error codes
* into short directory reads.
*/
if (xchk_should_terminate(spc->sc, &error)) if (xchk_should_terminate(spc->sc, &error))
spc->cancelled = true; return error;
return !error; return 0;
} }
/* Count the number of dentries in the parent dir that point to this inode. */ /* Count the number of dentries in the parent dir that point to this inode. */
...@@ -73,50 +71,19 @@ xchk_parent_count_parent_dentries( ...@@ -73,50 +71,19 @@ xchk_parent_count_parent_dentries(
xfs_nlink_t *nlink) xfs_nlink_t *nlink)
{ {
struct xchk_parent_ctx spc = { struct xchk_parent_ctx spc = {
.dc.actor = xchk_parent_actor,
.ino = sc->ip->i_ino,
.sc = sc, .sc = sc,
.nlink = 0,
}; };
size_t bufsize;
loff_t oldpos;
uint lock_mode; uint lock_mode;
int error = 0; int error = 0;
/*
* If there are any blocks, read-ahead block 0 as we're almost
* certain to have the next operation be a read there. This is
* how we guarantee that the parent's extent map has been loaded,
* if there is one.
*/
lock_mode = xfs_ilock_data_map_shared(parent); lock_mode = xfs_ilock_data_map_shared(parent);
if (parent->i_df.if_nextents > 0) error = xchk_dir_walk(sc, parent, xchk_parent_actor, &spc);
error = xfs_dir3_data_readahead(parent, 0, 0);
xfs_iunlock(parent, lock_mode); xfs_iunlock(parent, lock_mode);
if (error) if (error)
return error; return error;
/*
* Iterate the parent dir to confirm that there is
* exactly one entry pointing back to the inode being
* scanned.
*/
bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE,
parent->i_disk_size);
oldpos = 0;
while (true) {
error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize);
if (error)
goto out;
if (spc.cancelled) {
error = -EAGAIN;
goto out;
}
if (oldpos == spc.dc.pos)
break;
oldpos = spc.dc.pos;
}
*nlink = spc.nlink; *nlink = spc.nlink;
out:
return error; return error;
} }
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2022-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_trace.h"
#include "xfs_bmap.h"
#include "xfs_trans.h"
#include "xfs_error.h"
#include "scrub/scrub.h"
#include "scrub/readdir.h"
/* Call a function for every entry in a shortform directory. */
STATIC int
xchk_dir_walk_sf(
struct xfs_scrub *sc,
struct xfs_inode *dp,
xchk_dirent_fn dirent_fn,
void *priv)
{
struct xfs_name name = {
.name = ".",
.len = 1,
.type = XFS_DIR3_FT_DIR,
};
struct xfs_mount *mp = dp->i_mount;
struct xfs_da_geometry *geo = mp->m_dir_geo;
struct xfs_dir2_sf_entry *sfep;
struct xfs_dir2_sf_hdr *sfp;
xfs_ino_t ino;
xfs_dir2_dataptr_t dapos;
unsigned int i;
int error;
ASSERT(dp->i_df.if_bytes == dp->i_disk_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
sfp = (struct xfs_dir2_sf_hdr *)dp->i_df.if_u1.if_data;
/* dot entry */
dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
geo->data_entry_offset);
error = dirent_fn(sc, dp, dapos, &name, dp->i_ino, priv);
if (error)
return error;
/* dotdot entry */
dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
geo->data_entry_offset +
xfs_dir2_data_entsize(mp, sizeof(".") - 1));
ino = xfs_dir2_sf_get_parent_ino(sfp);
name.name = "..";
name.len = 2;
error = dirent_fn(sc, dp, dapos, &name, ino, priv);
if (error)
return error;
/* iterate everything else */
sfep = xfs_dir2_sf_firstentry(sfp);
for (i = 0; i < sfp->count; i++) {
dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
xfs_dir2_sf_get_offset(sfep));
ino = xfs_dir2_sf_get_ino(mp, sfp, sfep);
name.name = sfep->name;
name.len = sfep->namelen;
name.type = xfs_dir2_sf_get_ftype(mp, sfep);
error = dirent_fn(sc, dp, dapos, &name, ino, priv);
if (error)
return error;
sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
}
return 0;
}
/* Call a function for every entry in a block directory. */
STATIC int
xchk_dir_walk_block(
struct xfs_scrub *sc,
struct xfs_inode *dp,
xchk_dirent_fn dirent_fn,
void *priv)
{
struct xfs_mount *mp = dp->i_mount;
struct xfs_da_geometry *geo = mp->m_dir_geo;
struct xfs_buf *bp;
unsigned int off, next_off, end;
int error;
error = xfs_dir3_block_read(sc->tp, dp, &bp);
if (error)
return error;
/* Walk each directory entry. */
end = xfs_dir3_data_end_offset(geo, bp->b_addr);
for (off = geo->data_entry_offset; off < end; off = next_off) {
struct xfs_name name = { };
struct xfs_dir2_data_unused *dup = bp->b_addr + off;
struct xfs_dir2_data_entry *dep = bp->b_addr + off;
xfs_ino_t ino;
xfs_dir2_dataptr_t dapos;
/* Skip an empty entry. */
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
next_off = off + be16_to_cpu(dup->length);
continue;
}
/* Otherwise, find the next entry and report it. */
next_off = off + xfs_dir2_data_entsize(mp, dep->namelen);
if (next_off > end)
break;
dapos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, off);
ino = be64_to_cpu(dep->inumber);
name.name = dep->name;
name.len = dep->namelen;
name.type = xfs_dir2_data_get_ftype(mp, dep);
error = dirent_fn(sc, dp, dapos, &name, ino, priv);
if (error)
break;
}
xfs_trans_brelse(sc->tp, bp);
return error;
}
/* Read a leaf-format directory buffer. */
STATIC int
xchk_read_leaf_dir_buf(
struct xfs_trans *tp,
struct xfs_inode *dp,
struct xfs_da_geometry *geo,
xfs_dir2_off_t *curoff,
struct xfs_buf **bpp)
{
struct xfs_iext_cursor icur;
struct xfs_bmbt_irec map;
struct xfs_ifork *ifp = xfs_ifork_ptr(dp, XFS_DATA_FORK);
xfs_dablk_t last_da;
xfs_dablk_t map_off;
xfs_dir2_off_t new_off;
*bpp = NULL;
/*
* Look for mapped directory blocks at or above the current offset.
* Truncate down to the nearest directory block to start the scanning
* operation.
*/
last_da = xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET);
map_off = xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, *curoff));
if (!xfs_iext_lookup_extent(dp, ifp, map_off, &icur, &map))
return 0;
if (map.br_startoff >= last_da)
return 0;
xfs_trim_extent(&map, map_off, last_da - map_off);
/* Read the directory block of that first mapping. */
new_off = xfs_dir2_da_to_byte(geo, map.br_startoff);
if (new_off > *curoff)
*curoff = new_off;
return xfs_dir3_data_read(tp, dp, map.br_startoff, 0, bpp);
}
/* Call a function for every entry in a leaf directory. */
STATIC int
xchk_dir_walk_leaf(
struct xfs_scrub *sc,
struct xfs_inode *dp,
xchk_dirent_fn dirent_fn,
void *priv)
{
struct xfs_mount *mp = dp->i_mount;
struct xfs_da_geometry *geo = mp->m_dir_geo;
struct xfs_buf *bp = NULL;
xfs_dir2_off_t curoff = 0;
unsigned int offset = 0;
int error;
/* Iterate every directory offset in this directory. */
while (curoff < XFS_DIR2_LEAF_OFFSET) {
struct xfs_name name = { };
struct xfs_dir2_data_unused *dup;
struct xfs_dir2_data_entry *dep;
xfs_ino_t ino;
unsigned int length;
xfs_dir2_dataptr_t dapos;
/*
* If we have no buffer, or we're off the end of the
* current buffer, need to get another one.
*/
if (!bp || offset >= geo->blksize) {
if (bp) {
xfs_trans_brelse(sc->tp, bp);
bp = NULL;
}
error = xchk_read_leaf_dir_buf(sc->tp, dp, geo, &curoff,
&bp);
if (error || !bp)
break;
/*
* Find our position in the block.
*/
offset = geo->data_entry_offset;
curoff += geo->data_entry_offset;
}
/* Skip an empty entry. */
dup = bp->b_addr + offset;
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
length = be16_to_cpu(dup->length);
offset += length;
curoff += length;
continue;
}
/* Otherwise, find the next entry and report it. */
dep = bp->b_addr + offset;
length = xfs_dir2_data_entsize(mp, dep->namelen);
dapos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
ino = be64_to_cpu(dep->inumber);
name.name = dep->name;
name.len = dep->namelen;
name.type = xfs_dir2_data_get_ftype(mp, dep);
error = dirent_fn(sc, dp, dapos, &name, ino, priv);
if (error)
break;
/* Advance to the next entry. */
offset += length;
curoff += length;
}
if (bp)
xfs_trans_brelse(sc->tp, bp);
return error;
}
/*
* Call a function for every entry in a directory.
*
* Callers must hold the ILOCK. File types are XFS_DIR3_FT_*.
*/
int
xchk_dir_walk(
struct xfs_scrub *sc,
struct xfs_inode *dp,
xchk_dirent_fn dirent_fn,
void *priv)
{
struct xfs_da_args args = {
.dp = dp,
.geo = dp->i_mount->m_dir_geo,
.trans = sc->tp,
};
bool isblock;
int error;
if (xfs_is_shutdown(dp->i_mount))
return -EIO;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
ASSERT(xfs_isilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL)
return xchk_dir_walk_sf(sc, dp, dirent_fn, priv);
/* dir2 functions require that the data fork is loaded */
error = xfs_iread_extents(sc->tp, dp, XFS_DATA_FORK);
if (error)
return error;
error = xfs_dir2_isblock(&args, &isblock);
if (error)
return error;
if (isblock)
return xchk_dir_walk_block(sc, dp, dirent_fn, priv);
return xchk_dir_walk_leaf(sc, dp, dirent_fn, priv);
}
/*
* Look up the inode number for an exact name in a directory.
*
* Callers must hold the ILOCK. File types are XFS_DIR3_FT_*. Names are not
* checked for correctness.
*/
int
xchk_dir_lookup(
struct xfs_scrub *sc,
struct xfs_inode *dp,
const struct xfs_name *name,
xfs_ino_t *ino)
{
struct xfs_da_args args = {
.dp = dp,
.geo = dp->i_mount->m_dir_geo,
.trans = sc->tp,
.name = name->name,
.namelen = name->len,
.filetype = name->type,
.hashval = xfs_dir2_hashname(dp->i_mount, name),
.whichfork = XFS_DATA_FORK,
.op_flags = XFS_DA_OP_OKNOENT,
};
bool isblock, isleaf;
int error;
if (xfs_is_shutdown(dp->i_mount))
return -EIO;
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
ASSERT(xfs_isilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
if (dp->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
error = xfs_dir2_sf_lookup(&args);
goto out_check_rval;
}
/* dir2 functions require that the data fork is loaded */
error = xfs_iread_extents(sc->tp, dp, XFS_DATA_FORK);
if (error)
return error;
error = xfs_dir2_isblock(&args, &isblock);
if (error)
return error;
if (isblock) {
error = xfs_dir2_block_lookup(&args);
goto out_check_rval;
}
error = xfs_dir2_isleaf(&args, &isleaf);
if (error)
return error;
if (isleaf) {
error = xfs_dir2_leaf_lookup(&args);
goto out_check_rval;
}
error = xfs_dir2_node_lookup(&args);
out_check_rval:
if (error == -EEXIST)
error = 0;
if (!error)
*ino = args.inumber;
return error;
}
/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Copyright (C) 2022-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_SCRUB_READDIR_H__
#define __XFS_SCRUB_READDIR_H__
typedef int (*xchk_dirent_fn)(struct xfs_scrub *sc, struct xfs_inode *dp,
xfs_dir2_dataptr_t dapos, const struct xfs_name *name,
xfs_ino_t ino, void *priv);
int xchk_dir_walk(struct xfs_scrub *sc, struct xfs_inode *dp,
xchk_dirent_fn dirent_fn, void *priv);
int xchk_dir_lookup(struct xfs_scrub *sc, struct xfs_inode *dp,
const struct xfs_name *name, xfs_ino_t *ino);
#endif /* __XFS_SCRUB_READDIR_H__ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment