Commit 783c5170 authored by Chandan Babu R's avatar Chandan Babu R

Merge tag 'repair-tempfiles-6.10_2024-04-15' of...

Merge tag 'repair-tempfiles-6.10_2024-04-15' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.10-mergeA

xfs: create temporary files for online repair

As mentioned earlier, the repair strategy for file-based metadata is to
build a new copy in a temporary file and swap the file fork mappings
with the metadata inode.  We've built the atomic extent swap facility,
so now we need to build a facility for handling private temporary files.

The first step is to teach the filesystem to ignore the temporary files.
We'll mark them as PRIVATE in the VFS so that the kernel security
modules will leave it alone.  The second step is to add the online
repair code the ability to create a temporary file and reap extents from
the temporary file after the extent swap.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChandan Babu R <chandanbabu@kernel.org>

* tag 'repair-tempfiles-6.10_2024-04-15' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux:
  xfs: add the ability to reap entire inode forks
  xfs: refactor live buffer invalidation for repairs
  xfs: create temporary files and directories for online repair
  xfs: hide private inodes from bulkstat and handle functions
parents 22d5a8e5 5befb047
......@@ -207,6 +207,7 @@ xfs-y += $(addprefix scrub/, \
refcount_repair.o \
repair.o \
rmap_repair.o \
tempfile.o \
)
xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
......
......@@ -143,7 +143,7 @@ xchk_parent_validate(
}
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
return error;
if (dp == sc->ip || !S_ISDIR(VFS_I(dp)->i_mode)) {
if (dp == sc->ip || dp == sc->tempip || !S_ISDIR(VFS_I(dp)->i_mode)) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
goto out_rele;
}
......
......@@ -211,6 +211,48 @@ static inline void xreap_defer_finish_reset(struct xreap_state *rs)
rs->force_roll = false;
}
/*
* Compute the maximum length of a buffer cache scan (in units of sectors),
* given a quantity of fs blocks.
*/
xfs_daddr_t
xrep_bufscan_max_sectors(
struct xfs_mount *mp,
xfs_extlen_t fsblocks)
{
int max_fsbs;
/* Remote xattr values are the largest buffers that we support. */
max_fsbs = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
return XFS_FSB_TO_BB(mp, min_t(xfs_extlen_t, fsblocks, max_fsbs));
}
/*
* Return an incore buffer from a sector scan, or NULL if there are no buffers
* left to return.
*/
struct xfs_buf *
xrep_bufscan_advance(
struct xfs_mount *mp,
struct xrep_bufscan *scan)
{
scan->__sector_count += scan->daddr_step;
while (scan->__sector_count <= scan->max_sectors) {
struct xfs_buf *bp = NULL;
int error;
error = xfs_buf_incore(mp->m_ddev_targp, scan->daddr,
scan->__sector_count, XBF_LIVESCAN, &bp);
if (!error)
return bp;
scan->__sector_count += scan->daddr_step;
}
return NULL;
}
/* Try to invalidate the incore buffers for an extent that we're freeing. */
STATIC void
xreap_agextent_binval(
......@@ -241,28 +283,15 @@ xreap_agextent_binval(
* of any plausible size.
*/
while (bno < agbno_next) {
xfs_agblock_t fsbcount;
xfs_agblock_t max_fsbs;
/*
* Max buffer size is the max remote xattr buffer size, which
* is one fs block larger than 64k.
*/
max_fsbs = min_t(xfs_agblock_t, agbno_next - bno,
xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX));
for (fsbcount = 1; fsbcount <= max_fsbs; fsbcount++) {
struct xfs_buf *bp = NULL;
xfs_daddr_t daddr;
int error;
daddr = XFS_AGB_TO_DADDR(mp, agno, bno);
error = xfs_buf_incore(mp->m_ddev_targp, daddr,
XFS_FSB_TO_BB(mp, fsbcount),
XBF_LIVESCAN, &bp);
if (error)
continue;
struct xrep_bufscan scan = {
.daddr = XFS_AGB_TO_DADDR(mp, agno, bno),
.max_sectors = xrep_bufscan_max_sectors(mp,
agbno_next - bno),
.daddr_step = XFS_FSB_TO_BB(mp, 1),
};
struct xfs_buf *bp;
while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
xfs_trans_bjoin(sc->tp, bp);
xfs_trans_binval(sc->tp, bp);
rs->invalidated++;
......@@ -646,3 +675,375 @@ xrep_reap_fsblocks(
return 0;
}
/*
* Metadata files are not supposed to share blocks with anything else.
* If blocks are shared, we remove the reverse mapping (thus reducing the
* crosslink factor); if blocks are not shared, we also need to free them.
*
* This first step determines the longest subset of the passed-in imap
* (starting at its beginning) that is either crosslinked or not crosslinked.
* The blockcount will be adjust down as needed.
*/
STATIC int
xreap_bmapi_select(
struct xfs_scrub *sc,
struct xfs_inode *ip,
int whichfork,
struct xfs_bmbt_irec *imap,
bool *crosslinked)
{
struct xfs_owner_info oinfo;
struct xfs_btree_cur *cur;
xfs_filblks_t len = 1;
xfs_agblock_t bno;
xfs_agblock_t agbno;
xfs_agblock_t agbno_next;
int error;
agbno = XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock);
agbno_next = agbno + imap->br_blockcount;
cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, sc->sa.agf_bp,
sc->sa.pag);
xfs_rmap_ino_owner(&oinfo, ip->i_ino, whichfork, imap->br_startoff);
error = xfs_rmap_has_other_keys(cur, agbno, 1, &oinfo, crosslinked);
if (error)
goto out_cur;
bno = agbno + 1;
while (bno < agbno_next) {
bool also_crosslinked;
oinfo.oi_offset++;
error = xfs_rmap_has_other_keys(cur, bno, 1, &oinfo,
&also_crosslinked);
if (error)
goto out_cur;
if (also_crosslinked != *crosslinked)
break;
len++;
bno++;
}
imap->br_blockcount = len;
trace_xreap_bmapi_select(sc->sa.pag, agbno, len, *crosslinked);
out_cur:
xfs_btree_del_cursor(cur, error);
return error;
}
/*
* Decide if this buffer can be joined to a transaction. This is true for most
* buffers, but there are two cases that we want to catch: large remote xattr
* value buffers are not logged and can overflow the buffer log item dirty
* bitmap size; and oversized cached buffers if things have really gone
* haywire.
*/
static inline bool
xreap_buf_loggable(
const struct xfs_buf *bp)
{
int i;
for (i = 0; i < bp->b_map_count; i++) {
int chunks;
int map_size;
chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
XFS_BLF_CHUNK);
map_size = DIV_ROUND_UP(chunks, NBWORD);
if (map_size > XFS_BLF_DATAMAP_SIZE)
return false;
}
return true;
}
/*
* Invalidate any buffers for this file mapping. The @imap blockcount may be
* adjusted downward if we need to roll the transaction.
*/
STATIC int
xreap_bmapi_binval(
struct xfs_scrub *sc,
struct xfs_inode *ip,
int whichfork,
struct xfs_bmbt_irec *imap)
{
struct xfs_mount *mp = sc->mp;
struct xfs_perag *pag = sc->sa.pag;
int bmap_flags = xfs_bmapi_aflag(whichfork);
xfs_fileoff_t off;
xfs_fileoff_t max_off;
xfs_extlen_t scan_blocks;
xfs_agnumber_t agno = sc->sa.pag->pag_agno;
xfs_agblock_t bno;
xfs_agblock_t agbno;
xfs_agblock_t agbno_next;
unsigned int invalidated = 0;
int error;
/*
* Avoid invalidating AG headers and post-EOFS blocks because we never
* own those.
*/
agbno = bno = XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock);
agbno_next = agbno + imap->br_blockcount;
if (!xfs_verify_agbno(pag, agbno) ||
!xfs_verify_agbno(pag, agbno_next - 1))
return 0;
/*
* Buffers for file blocks can span multiple contiguous mappings. This
* means that for each block in the mapping, there could exist an
* xfs_buf indexed by that block with any length up to the maximum
* buffer size (remote xattr values) or to the next hole in the fork.
* To set up our binval scan, first we need to figure out the location
* of the next hole.
*/
off = imap->br_startoff + imap->br_blockcount;
max_off = off + xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
while (off < max_off) {
struct xfs_bmbt_irec hmap;
int nhmaps = 1;
error = xfs_bmapi_read(ip, off, max_off - off, &hmap,
&nhmaps, bmap_flags);
if (error)
return error;
if (nhmaps != 1 || hmap.br_startblock == DELAYSTARTBLOCK) {
ASSERT(0);
return -EFSCORRUPTED;
}
if (!xfs_bmap_is_real_extent(&hmap))
break;
off = hmap.br_startoff + hmap.br_blockcount;
}
scan_blocks = off - imap->br_startoff;
trace_xreap_bmapi_binval_scan(sc, imap, scan_blocks);
/*
* If there are incore buffers for these blocks, invalidate them. If
* we can't (try)lock the buffer we assume it's owned by someone else
* and leave it alone. The buffer cache cannot detect aliasing, so
* employ nested loops to detect incore buffers of any plausible size.
*/
while (bno < agbno_next) {
struct xrep_bufscan scan = {
.daddr = XFS_AGB_TO_DADDR(mp, agno, bno),
.max_sectors = xrep_bufscan_max_sectors(mp,
scan_blocks),
.daddr_step = XFS_FSB_TO_BB(mp, 1),
};
struct xfs_buf *bp;
while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) {
if (xreap_buf_loggable(bp)) {
xfs_trans_bjoin(sc->tp, bp);
xfs_trans_binval(sc->tp, bp);
} else {
xfs_buf_stale(bp);
xfs_buf_relse(bp);
}
invalidated++;
/*
* Stop invalidating if we've hit the limit; we should
* still have enough reservation left to free however
* much of the mapping we've seen so far.
*/
if (invalidated > XREAP_MAX_BINVAL) {
imap->br_blockcount = agbno_next - bno;
goto out;
}
}
bno++;
scan_blocks--;
}
out:
trace_xreap_bmapi_binval(sc->sa.pag, agbno, imap->br_blockcount);
return 0;
}
/*
* Dispose of as much of the beginning of this file fork mapping as possible.
* The number of blocks disposed of is returned in @imap->br_blockcount.
*/
STATIC int
xrep_reap_bmapi_iter(
struct xfs_scrub *sc,
struct xfs_inode *ip,
int whichfork,
struct xfs_bmbt_irec *imap,
bool crosslinked)
{
int error;
if (crosslinked) {
/*
* If there are other rmappings, this block is cross linked and
* must not be freed. Remove the reverse mapping, leave the
* buffer cache in its possibly confused state, and move on.
* We don't want to risk discarding valid data buffers from
* anybody else who thinks they own the block, even though that
* runs the risk of stale buffer warnings in the future.
*/
trace_xreap_dispose_unmap_extent(sc->sa.pag,
XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock),
imap->br_blockcount);
/*
* Schedule removal of the mapping from the fork. We use
* deferred log intents in this function to control the exact
* sequence of metadata updates.
*/
xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
-(int64_t)imap->br_blockcount);
xfs_rmap_unmap_extent(sc->tp, ip, whichfork, imap);
return 0;
}
/*
* If the block is not crosslinked, we can invalidate all the incore
* buffers for the extent, and then free the extent. This is a bit of
* a mess since we don't detect discontiguous buffers that are indexed
* by a block starting before the first block of the extent but overlap
* anyway.
*/
trace_xreap_dispose_free_extent(sc->sa.pag,
XFS_FSB_TO_AGBNO(sc->mp, imap->br_startblock),
imap->br_blockcount);
/*
* Invalidate as many buffers as we can, starting at the beginning of
* this mapping. If this function sets blockcount to zero, the
* transaction is full of logged buffer invalidations, so we need to
* return early so that we can roll and retry.
*/
error = xreap_bmapi_binval(sc, ip, whichfork, imap);
if (error || imap->br_blockcount == 0)
return error;
/*
* Schedule removal of the mapping from the fork. We use deferred log
* intents in this function to control the exact sequence of metadata
* updates.
*/
xfs_bmap_unmap_extent(sc->tp, ip, whichfork, imap);
xfs_trans_mod_dquot_byino(sc->tp, ip, XFS_TRANS_DQ_BCOUNT,
-(int64_t)imap->br_blockcount);
return xfs_free_extent_later(sc->tp, imap->br_startblock,
imap->br_blockcount, NULL, XFS_AG_RESV_NONE, true);
}
/*
* Dispose of as much of this file extent as we can. Upon successful return,
* the imap will reflect the mapping that was removed from the fork.
*/
STATIC int
xreap_ifork_extent(
struct xfs_scrub *sc,
struct xfs_inode *ip,
int whichfork,
struct xfs_bmbt_irec *imap)
{
xfs_agnumber_t agno;
bool crosslinked;
int error;
ASSERT(sc->sa.pag == NULL);
trace_xreap_ifork_extent(sc, ip, whichfork, imap);
agno = XFS_FSB_TO_AGNO(sc->mp, imap->br_startblock);
sc->sa.pag = xfs_perag_get(sc->mp, agno);
if (!sc->sa.pag)
return -EFSCORRUPTED;
error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &sc->sa.agf_bp);
if (error)
goto out_pag;
/*
* Decide the fate of the blocks at the beginning of the mapping, then
* update the mapping to use it with the unmap calls.
*/
error = xreap_bmapi_select(sc, ip, whichfork, imap, &crosslinked);
if (error)
goto out_agf;
error = xrep_reap_bmapi_iter(sc, ip, whichfork, imap, crosslinked);
if (error)
goto out_agf;
out_agf:
xfs_trans_brelse(sc->tp, sc->sa.agf_bp);
sc->sa.agf_bp = NULL;
out_pag:
xfs_perag_put(sc->sa.pag);
sc->sa.pag = NULL;
return error;
}
/*
* Dispose of each block mapped to the given fork of the given file. Callers
* must hold ILOCK_EXCL, and ip can only be sc->ip or sc->tempip. The fork
* must not have any delalloc reservations.
*/
int
xrep_reap_ifork(
struct xfs_scrub *sc,
struct xfs_inode *ip,
int whichfork)
{
xfs_fileoff_t off = 0;
int bmap_flags = xfs_bmapi_aflag(whichfork);
int error;
ASSERT(xfs_has_rmapbt(sc->mp));
ASSERT(ip == sc->ip || ip == sc->tempip);
ASSERT(whichfork == XFS_ATTR_FORK || !XFS_IS_REALTIME_INODE(ip));
while (off < XFS_MAX_FILEOFF) {
struct xfs_bmbt_irec imap;
int nimaps = 1;
/* Read the next extent, skip past holes and delalloc. */
error = xfs_bmapi_read(ip, off, XFS_MAX_FILEOFF - off, &imap,
&nimaps, bmap_flags);
if (error)
return error;
if (nimaps != 1 || imap.br_startblock == DELAYSTARTBLOCK) {
ASSERT(0);
return -EFSCORRUPTED;
}
/*
* If this is a real space mapping, reap as much of it as we
* can in a single transaction.
*/
if (xfs_bmap_is_real_extent(&imap)) {
error = xreap_ifork_extent(sc, ip, whichfork, &imap);
if (error)
return error;
error = xfs_defer_finish(&sc->tp);
if (error)
return error;
}
off = imap.br_startoff + imap.br_blockcount;
}
return 0;
}
......@@ -13,5 +13,26 @@ int xrep_reap_agblocks(struct xfs_scrub *sc, struct xagb_bitmap *bitmap,
const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type);
int xrep_reap_fsblocks(struct xfs_scrub *sc, struct xfsb_bitmap *bitmap,
const struct xfs_owner_info *oinfo);
int xrep_reap_ifork(struct xfs_scrub *sc, struct xfs_inode *ip, int whichfork);
/* Buffer cache scan context. */
struct xrep_bufscan {
/* Disk address for the buffers we want to scan. */
xfs_daddr_t daddr;
/* Maximum number of sectors to scan. */
xfs_daddr_t max_sectors;
/* Each round, increment the search length by this number of sectors. */
xfs_daddr_t daddr_step;
/* Internal scan state; initialize to zero. */
xfs_daddr_t __sector_count;
};
xfs_daddr_t xrep_bufscan_max_sectors(struct xfs_mount *mp,
xfs_extlen_t fsblocks);
struct xfs_buf *xrep_bufscan_advance(struct xfs_mount *mp,
struct xrep_bufscan *scan);
#endif /* __XFS_SCRUB_REAP_H__ */
......@@ -17,6 +17,7 @@
#include "xfs_scrub.h"
#include "xfs_buf_mem.h"
#include "xfs_rmap.h"
#include "xfs_exchrange.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
......@@ -24,6 +25,7 @@
#include "scrub/health.h"
#include "scrub/stats.h"
#include "scrub/xfile.h"
#include "scrub/tempfile.h"
/*
* Online Scrub and Repair
......@@ -211,6 +213,7 @@ xchk_teardown(
sc->buf = NULL;
}
xrep_tempfile_rele(sc);
xchk_fsgates_disable(sc);
return error;
}
......
......@@ -105,6 +105,10 @@ struct xfs_scrub {
/* Lock flags for @ip. */
uint ilock_flags;
/* A temporary file on this filesystem, for staging new metadata. */
struct xfs_inode *tempip;
uint temp_ilock_flags;
/* See the XCHK/XREP state flags below. */
unsigned int flags;
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_inode.h"
#include "xfs_ialloc.h"
#include "xfs_quota.h"
#include "xfs_bmap_btree.h"
#include "xfs_trans_space.h"
#include "xfs_dir2.h"
#include "xfs_exchrange.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/tempfile.h"
/*
* Create a temporary file for reconstructing metadata, with the intention of
* atomically exchanging the temporary file's contents with the file that's
* being repaired.
*/
int
xrep_tempfile_create(
struct xfs_scrub *sc,
uint16_t mode)
{
struct xfs_mount *mp = sc->mp;
struct xfs_trans *tp = NULL;
struct xfs_dquot *udqp = NULL;
struct xfs_dquot *gdqp = NULL;
struct xfs_dquot *pdqp = NULL;
struct xfs_trans_res *tres;
struct xfs_inode *dp = mp->m_rootip;
xfs_ino_t ino;
unsigned int resblks;
bool is_dir = S_ISDIR(mode);
int error;
if (xfs_is_shutdown(mp))
return -EIO;
if (xfs_is_readonly(mp))
return -EROFS;
ASSERT(sc->tp == NULL);
ASSERT(sc->tempip == NULL);
/*
* Make sure that we have allocated dquot(s) on disk. The temporary
* inode should be completely root owned so that we don't fail due to
* quota limits.
*/
error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
if (error)
return error;
if (is_dir) {
resblks = XFS_MKDIR_SPACE_RES(mp, 0);
tres = &M_RES(mp)->tr_mkdir;
} else {
resblks = XFS_IALLOC_SPACE_RES(mp);
tres = &M_RES(mp)->tr_create_tmpfile;
}
error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
&tp);
if (error)
goto out_release_dquots;
/* Allocate inode, set up directory. */
error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
if (error)
goto out_trans_cancel;
error = xfs_init_new_inode(&nop_mnt_idmap, tp, dp, ino, mode, 0, 0,
0, false, &sc->tempip);
if (error)
goto out_trans_cancel;
/* Change the ownership of the inode to root. */
VFS_I(sc->tempip)->i_uid = GLOBAL_ROOT_UID;
VFS_I(sc->tempip)->i_gid = GLOBAL_ROOT_GID;
sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);
/*
* Mark our temporary file as private so that LSMs and the ACL code
* don't try to add their own metadata or reason about these files.
* The file should never be exposed to userspace.
*/
VFS_I(sc->tempip)->i_flags |= S_PRIVATE;
VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR;
if (is_dir) {
error = xfs_dir_init(tp, sc->tempip, dp);
if (error)
goto out_trans_cancel;
}
/*
* Attach the dquot(s) to the inodes and modify them incore.
* These ids of the inode couldn't have changed since the new
* inode has been locked ever since it was created.
*/
xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp);
/*
* Put our temp file on the unlinked list so it's purged automatically.
* All file-based metadata being reconstructed using this file must be
* atomically exchanged with the original file because the contents
* here will be purged when the inode is dropped or log recovery cleans
* out the unlinked list.
*/
error = xfs_iunlink(tp, sc->tempip);
if (error)
goto out_trans_cancel;
error = xfs_trans_commit(tp);
if (error)
goto out_release_inode;
trace_xrep_tempfile_create(sc);
xfs_qm_dqrele(udqp);
xfs_qm_dqrele(gdqp);
xfs_qm_dqrele(pdqp);
/* Finish setting up the incore / vfs context. */
xfs_setup_iops(sc->tempip);
xfs_finish_inode_setup(sc->tempip);
sc->temp_ilock_flags = 0;
return error;
out_trans_cancel:
xfs_trans_cancel(tp);
out_release_inode:
/*
* Wait until after the current transaction is aborted to finish the
* setup of the inode and release the inode. This prevents recursive
* transactions and deadlocks from xfs_inactive.
*/
if (sc->tempip) {
xfs_finish_inode_setup(sc->tempip);
xchk_irele(sc, sc->tempip);
}
out_release_dquots:
xfs_qm_dqrele(udqp);
xfs_qm_dqrele(gdqp);
xfs_qm_dqrele(pdqp);
return error;
}
/* Take IOLOCK_EXCL on the temporary file, maybe. */
bool
xrep_tempfile_iolock_nowait(
struct xfs_scrub *sc)
{
if (xfs_ilock_nowait(sc->tempip, XFS_IOLOCK_EXCL)) {
sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
return true;
}
return false;
}
/*
* Take the temporary file's IOLOCK while holding a different inode's IOLOCK.
* In theory nobody else should hold the tempfile's IOLOCK, but we use trylock
* to avoid deadlocks and lockdep complaints.
*/
int
xrep_tempfile_iolock_polled(
struct xfs_scrub *sc)
{
int error = 0;
while (!xrep_tempfile_iolock_nowait(sc)) {
if (xchk_should_terminate(sc, &error))
return error;
delay(1);
}
return 0;
}
/* Release IOLOCK_EXCL on the temporary file. */
void
xrep_tempfile_iounlock(
struct xfs_scrub *sc)
{
xfs_iunlock(sc->tempip, XFS_IOLOCK_EXCL);
sc->temp_ilock_flags &= ~XFS_IOLOCK_EXCL;
}
/* Prepare the temporary file for metadata updates by grabbing ILOCK_EXCL. */
void
xrep_tempfile_ilock(
struct xfs_scrub *sc)
{
sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
xfs_ilock(sc->tempip, XFS_ILOCK_EXCL);
}
/* Try to grab ILOCK_EXCL on the temporary file. */
bool
xrep_tempfile_ilock_nowait(
struct xfs_scrub *sc)
{
if (xfs_ilock_nowait(sc->tempip, XFS_ILOCK_EXCL)) {
sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
return true;
}
return false;
}
/* Unlock ILOCK_EXCL on the temporary file after an update. */
void
xrep_tempfile_iunlock(
struct xfs_scrub *sc)
{
xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL;
}
/* Release the temporary file. */
void
xrep_tempfile_rele(
struct xfs_scrub *sc)
{
if (!sc->tempip)
return;
if (sc->temp_ilock_flags) {
xfs_iunlock(sc->tempip, sc->temp_ilock_flags);
sc->temp_ilock_flags = 0;
}
xchk_irele(sc, sc->tempip);
sc->tempip = NULL;
}
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_SCRUB_TEMPFILE_H__
#define __XFS_SCRUB_TEMPFILE_H__
#ifdef CONFIG_XFS_ONLINE_REPAIR
int xrep_tempfile_create(struct xfs_scrub *sc, uint16_t mode);
void xrep_tempfile_rele(struct xfs_scrub *sc);
bool xrep_tempfile_iolock_nowait(struct xfs_scrub *sc);
int xrep_tempfile_iolock_polled(struct xfs_scrub *sc);
void xrep_tempfile_iounlock(struct xfs_scrub *sc);
void xrep_tempfile_ilock(struct xfs_scrub *sc);
bool xrep_tempfile_ilock_nowait(struct xfs_scrub *sc);
void xrep_tempfile_iunlock(struct xfs_scrub *sc);
#else
static inline void xrep_tempfile_iolock_both(struct xfs_scrub *sc)
{
xchk_ilock(sc, XFS_IOLOCK_EXCL);
}
# define xrep_tempfile_rele(sc)
#endif /* CONFIG_XFS_ONLINE_REPAIR */
#endif /* __XFS_SCRUB_TEMPFILE_H__ */
......@@ -1539,6 +1539,7 @@ DEFINE_EVENT(xrep_extent_class, name, \
DEFINE_REPAIR_EXTENT_EVENT(xreap_dispose_unmap_extent);
DEFINE_REPAIR_EXTENT_EVENT(xreap_dispose_free_extent);
DEFINE_REPAIR_EXTENT_EVENT(xreap_agextent_binval);
DEFINE_REPAIR_EXTENT_EVENT(xreap_bmapi_binval);
DEFINE_REPAIR_EXTENT_EVENT(xrep_agfl_insert);
DECLARE_EVENT_CLASS(xrep_reap_find_class,
......@@ -1572,6 +1573,7 @@ DEFINE_EVENT(xrep_reap_find_class, name, \
bool crosslinked), \
TP_ARGS(pag, agbno, len, crosslinked))
DEFINE_REPAIR_REAP_FIND_EVENT(xreap_agextent_select);
DEFINE_REPAIR_REAP_FIND_EVENT(xreap_bmapi_select);
DECLARE_EVENT_CLASS(xrep_rmap_class,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
......@@ -2279,6 +2281,100 @@ TRACE_EVENT(xrep_rmap_live_update,
__entry->flags)
);
TRACE_EVENT(xrep_tempfile_create,
TP_PROTO(struct xfs_scrub *sc),
TP_ARGS(sc),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(unsigned int, type)
__field(xfs_agnumber_t, agno)
__field(xfs_ino_t, inum)
__field(unsigned int, gen)
__field(unsigned int, flags)
__field(xfs_ino_t, temp_inum)
),
TP_fast_assign(
__entry->dev = sc->mp->m_super->s_dev;
__entry->ino = sc->file ? XFS_I(file_inode(sc->file))->i_ino : 0;
__entry->type = sc->sm->sm_type;
__entry->agno = sc->sm->sm_agno;
__entry->inum = sc->sm->sm_ino;
__entry->gen = sc->sm->sm_gen;
__entry->flags = sc->sm->sm_flags;
__entry->temp_inum = sc->tempip->i_ino;
),
TP_printk("dev %d:%d ino 0x%llx type %s inum 0x%llx gen 0x%x flags 0x%x temp_inum 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
__entry->inum,
__entry->gen,
__entry->flags,
__entry->temp_inum)
);
TRACE_EVENT(xreap_ifork_extent,
TP_PROTO(struct xfs_scrub *sc, struct xfs_inode *ip, int whichfork,
const struct xfs_bmbt_irec *irec),
TP_ARGS(sc, ip, whichfork, irec),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(int, whichfork)
__field(xfs_fileoff_t, fileoff)
__field(xfs_filblks_t, len)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(int, state)
),
TP_fast_assign(
__entry->dev = sc->mp->m_super->s_dev;
__entry->ino = ip->i_ino;
__entry->whichfork = whichfork;
__entry->fileoff = irec->br_startoff;
__entry->len = irec->br_blockcount;
__entry->agno = XFS_FSB_TO_AGNO(sc->mp, irec->br_startblock);
__entry->agbno = XFS_FSB_TO_AGBNO(sc->mp, irec->br_startblock);
__entry->state = irec->br_state;
),
TP_printk("dev %d:%d ip 0x%llx whichfork %s agno 0x%x agbno 0x%x fileoff 0x%llx fsbcount 0x%llx state 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS),
__entry->agno,
__entry->agbno,
__entry->fileoff,
__entry->len,
__entry->state)
);
TRACE_EVENT(xreap_bmapi_binval_scan,
TP_PROTO(struct xfs_scrub *sc, const struct xfs_bmbt_irec *irec,
xfs_extlen_t scan_blocks),
TP_ARGS(sc, irec, scan_blocks),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_filblks_t, len)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, scan_blocks)
),
TP_fast_assign(
__entry->dev = sc->mp->m_super->s_dev;
__entry->len = irec->br_blockcount;
__entry->agno = XFS_FSB_TO_AGNO(sc->mp, irec->br_startblock);
__entry->agbno = XFS_FSB_TO_AGBNO(sc->mp, irec->br_startblock);
__entry->scan_blocks = scan_blocks;
),
TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%llx scan_blocks 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->agbno,
__entry->len,
__entry->scan_blocks)
);
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */
......
......@@ -160,7 +160,7 @@ xfs_nfs_get_inode(
}
}
if (VFS_I(ip)->i_generation != generation) {
if (VFS_I(ip)->i_generation != generation || IS_PRIVATE(VFS_I(ip))) {
xfs_irele(ip);
return ERR_PTR(-ESTALE);
}
......
......@@ -42,7 +42,6 @@
struct kmem_cache *xfs_inode_cache;
STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *);
STATIC int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag,
struct xfs_inode *);
......@@ -2151,7 +2150,7 @@ xfs_iunlink_insert_inode(
* We place the on-disk inode on a list in the AGI. It will be pulled from this
* list when the inode is freed.
*/
STATIC int
int
xfs_iunlink(
struct xfs_trans *tp,
struct xfs_inode *ip)
......
......@@ -616,6 +616,8 @@ extern struct kmem_cache *xfs_inode_cache;
bool xfs_inode_needs_inactive(struct xfs_inode *ip);
int xfs_iunlink(struct xfs_trans *tp, struct xfs_inode *ip);
void xfs_end_io(struct work_struct *work);
int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
......
......@@ -365,6 +365,9 @@ xfs_vn_link(
if (unlikely(error))
return error;
if (IS_PRIVATE(inode))
return -EPERM;
error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
if (unlikely(error))
return error;
......
......@@ -97,6 +97,14 @@ xfs_bulkstat_one_int(
vfsuid = i_uid_into_vfsuid(idmap, inode);
vfsgid = i_gid_into_vfsgid(idmap, inode);
/* If this is a private inode, don't leak its details to userspace. */
if (IS_PRIVATE(inode)) {
xfs_iunlock(ip, XFS_ILOCK_SHARED);
xfs_irele(ip);
error = -EINVAL;
goto out_advance;
}
/* xfs_iget returns the following without needing
* further change.
*/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment