Commit 8394a97c authored by Chandan Babu R's avatar Chandan Babu R

Merge tag 'in-memory-btrees-6.9_2024-02-23' of...

Merge tag 'in-memory-btrees-6.9_2024-02-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into xfs-6.9-mergeC

xfs: support in-memory btrees

Online repair of the reverse-mapping btrees presens some unique
challenges.  To construct a new reverse mapping btree, we must scan the
entire filesystem, but we cannot afford to quiesce the entire filesystem
for the potentially lengthy scan.

For rmap btrees, therefore, we relax our requirements of totally atomic
repairs.  Instead, repairs will scan all inodes, construct a new reverse
mapping dataset, format a new btree, and commit it before anyone trips
over the corruption.  This is exactly the same strategy as was used in
the quotacheck and nlink scanners.

Unfortunately, the xfarray cannot perform key-based lookups and is
therefore unsuitable for supporting live updates.  Luckily, we already a
data structure that maintains an indexed rmap recordset -- the existing
rmap btree code!  Hence we port the existing btree and buffer target
code to be able to create a btree using the xfile we developed earlier.
Live hooks keep the in-memory btree up to date for any resources that
have already been scanned.

This approach is not maximally memory efficient, but we can use the same
rmap code that we do everywhere else, which provides improved stability
without growing the code base even more.  Note that in-memory btree
blocks are always page sized.

This patchset modifies the kernel xfs buffer cache to be capable of
using a xfile (aka a shmem file) as a backing device.  It then augments
the btree code to support creating btree cursors with buffers that come
from a buftarg other than the data device (namely an xfile-backed
buftarg).  For the userspace xfs buffer cache, we instead use a memfd or
an O_TMPFILE file as a backing device.
Signed-off-by: default avatarDarrick J. Wong <djwong@kernel.org>
Signed-off-by: default avatarChandan Babu R <chandanbabu@kernel.org>

* tag 'in-memory-btrees-6.9_2024-02-23' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux:
  xfs: launder in-memory btree buffers before transaction commit
  xfs: support in-memory btrees
  xfs: add a xfs_btree_ptrs_equal helper
  xfs: support in-memory buffer cache targets
  xfs: teach buftargs to maintain their own buffer hashtable
parents aa8fb4bb 0dc63c8a
...@@ -2270,13 +2270,12 @@ follows: ...@@ -2270,13 +2270,12 @@ follows:
pointing to the xfile. pointing to the xfile.
3. Pass the buffer cache target, buffer ops, and other information to 3. Pass the buffer cache target, buffer ops, and other information to
``xfbtree_create`` to write an initial tree header and root block to the ``xfbtree_init`` to initialize the passed in ``struct xfbtree`` and write an
xfile. initial root block to the xfile.
Each btree type should define a wrapper that passes necessary arguments to Each btree type should define a wrapper that passes necessary arguments to
the creation function. the creation function.
For example, rmap btrees define ``xfs_rmapbt_mem_create`` to take care of For example, rmap btrees define ``xfs_rmapbt_mem_create`` to take care of
all the necessary details for callers. all the necessary details for callers.
A ``struct xfbtree`` object will be returned.
4. Pass the xfbtree object to the btree cursor creation function for the 4. Pass the xfbtree object to the btree cursor creation function for the
btree type. btree type.
......
...@@ -128,6 +128,12 @@ config XFS_LIVE_HOOKS ...@@ -128,6 +128,12 @@ config XFS_LIVE_HOOKS
bool bool
select JUMP_LABEL if HAVE_ARCH_JUMP_LABEL select JUMP_LABEL if HAVE_ARCH_JUMP_LABEL
config XFS_MEMORY_BUFS
bool
config XFS_BTREE_IN_MEM
bool
config XFS_ONLINE_SCRUB config XFS_ONLINE_SCRUB
bool "XFS online metadata check support" bool "XFS online metadata check support"
default n default n
...@@ -135,6 +141,7 @@ config XFS_ONLINE_SCRUB ...@@ -135,6 +141,7 @@ config XFS_ONLINE_SCRUB
depends on TMPFS && SHMEM depends on TMPFS && SHMEM
select XFS_LIVE_HOOKS select XFS_LIVE_HOOKS
select XFS_DRAIN_INTENTS select XFS_DRAIN_INTENTS
select XFS_MEMORY_BUFS
help help
If you say Y here you will be able to check metadata on a If you say Y here you will be able to check metadata on a
mounted XFS filesystem. This feature is intended to reduce mounted XFS filesystem. This feature is intended to reduce
...@@ -169,6 +176,7 @@ config XFS_ONLINE_REPAIR ...@@ -169,6 +176,7 @@ config XFS_ONLINE_REPAIR
bool "XFS online metadata repair support" bool "XFS online metadata repair support"
default n default n
depends on XFS_FS && XFS_ONLINE_SCRUB depends on XFS_FS && XFS_ONLINE_SCRUB
select XFS_BTREE_IN_MEM
help help
If you say Y here you will be able to repair metadata on a If you say Y here you will be able to repair metadata on a
mounted XFS filesystem. This feature is intended to reduce mounted XFS filesystem. This feature is intended to reduce
......
...@@ -137,6 +137,8 @@ endif ...@@ -137,6 +137,8 @@ endif
xfs-$(CONFIG_XFS_DRAIN_INTENTS) += xfs_drain.o xfs-$(CONFIG_XFS_DRAIN_INTENTS) += xfs_drain.o
xfs-$(CONFIG_XFS_LIVE_HOOKS) += xfs_hooks.o xfs-$(CONFIG_XFS_LIVE_HOOKS) += xfs_hooks.o
xfs-$(CONFIG_XFS_MEMORY_BUFS) += xfs_buf_mem.o
xfs-$(CONFIG_XFS_BTREE_IN_MEM) += libxfs/xfs_btree_mem.o
# online scrub/repair # online scrub/repair
ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y) ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
......
...@@ -264,7 +264,7 @@ xfs_free_perag( ...@@ -264,7 +264,7 @@ xfs_free_perag(
xfs_defer_drain_free(&pag->pag_intents_drain); xfs_defer_drain_free(&pag->pag_intents_drain);
cancel_delayed_work_sync(&pag->pag_blockgc_work); cancel_delayed_work_sync(&pag->pag_blockgc_work);
xfs_buf_hash_destroy(pag); xfs_buf_cache_destroy(&pag->pag_bcache);
/* drop the mount's active reference */ /* drop the mount's active reference */
xfs_perag_rele(pag); xfs_perag_rele(pag);
...@@ -352,7 +352,7 @@ xfs_free_unused_perag_range( ...@@ -352,7 +352,7 @@ xfs_free_unused_perag_range(
spin_unlock(&mp->m_perag_lock); spin_unlock(&mp->m_perag_lock);
if (!pag) if (!pag)
break; break;
xfs_buf_hash_destroy(pag); xfs_buf_cache_destroy(&pag->pag_bcache);
xfs_defer_drain_free(&pag->pag_intents_drain); xfs_defer_drain_free(&pag->pag_intents_drain);
kfree(pag); kfree(pag);
} }
...@@ -419,7 +419,7 @@ xfs_initialize_perag( ...@@ -419,7 +419,7 @@ xfs_initialize_perag(
pag->pagb_tree = RB_ROOT; pag->pagb_tree = RB_ROOT;
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
error = xfs_buf_hash_init(pag); error = xfs_buf_cache_init(&pag->pag_bcache);
if (error) if (error)
goto out_remove_pag; goto out_remove_pag;
......
...@@ -106,9 +106,7 @@ struct xfs_perag { ...@@ -106,9 +106,7 @@ struct xfs_perag {
int pag_ici_reclaimable; /* reclaimable inodes */ int pag_ici_reclaimable; /* reclaimable inodes */
unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */ unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */
/* buffer cache index */ struct xfs_buf_cache pag_bcache;
spinlock_t pag_buf_lock; /* lock for pag_buf_hash */
struct rhashtable pag_buf_hash;
/* background prealloc block trimming */ /* background prealloc block trimming */
struct delayed_work pag_blockgc_work; struct delayed_work pag_blockgc_work;
......
This diff is collapsed.
...@@ -112,6 +112,7 @@ static inline enum xbtree_key_contig xbtree_key_contig(uint64_t x, uint64_t y) ...@@ -112,6 +112,7 @@ static inline enum xbtree_key_contig xbtree_key_contig(uint64_t x, uint64_t y)
enum xfs_btree_type { enum xfs_btree_type {
XFS_BTREE_TYPE_AG, XFS_BTREE_TYPE_AG,
XFS_BTREE_TYPE_INODE, XFS_BTREE_TYPE_INODE,
XFS_BTREE_TYPE_MEM,
}; };
struct xfs_btree_ops { struct xfs_btree_ops {
...@@ -281,6 +282,10 @@ struct xfs_btree_cur ...@@ -281,6 +282,10 @@ struct xfs_btree_cur
struct xfs_buf *agbp; struct xfs_buf *agbp;
struct xbtree_afakeroot *afake; /* for staging cursor */ struct xbtree_afakeroot *afake; /* for staging cursor */
} bc_ag; } bc_ag;
struct {
struct xfbtree *xfbtree;
struct xfs_perag *pag;
} bc_mem;
}; };
/* per-format private data */ /* per-format private data */
...@@ -455,6 +460,8 @@ xfs_failaddr_t xfs_btree_fsblock_v5hdr_verify(struct xfs_buf *bp, ...@@ -455,6 +460,8 @@ xfs_failaddr_t xfs_btree_fsblock_v5hdr_verify(struct xfs_buf *bp,
uint64_t owner); uint64_t owner);
xfs_failaddr_t xfs_btree_fsblock_verify(struct xfs_buf *bp, xfs_failaddr_t xfs_btree_fsblock_verify(struct xfs_buf *bp,
unsigned int max_recs); unsigned int max_recs);
xfs_failaddr_t xfs_btree_memblock_verify(struct xfs_buf *bp,
unsigned int max_recs);
unsigned int xfs_btree_compute_maxlevels(const unsigned int *limits, unsigned int xfs_btree_compute_maxlevels(const unsigned int *limits,
unsigned long long records); unsigned long long records);
......
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_trans.h"
#include "xfs_btree.h"
#include "xfs_error.h"
#include "xfs_buf_mem.h"
#include "xfs_btree_mem.h"
#include "xfs_ag.h"
#include "xfs_buf_item.h"
#include "xfs_trace.h"
/* Set the root of an in-memory btree. */
void
xfbtree_set_root(
struct xfs_btree_cur *cur,
const union xfs_btree_ptr *ptr,
int inc)
{
ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
cur->bc_mem.xfbtree->root = *ptr;
cur->bc_mem.xfbtree->nlevels += inc;
}
/* Initialize a pointer from the in-memory btree header. */
void
xfbtree_init_ptr_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr)
{
ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
*ptr = cur->bc_mem.xfbtree->root;
}
/* Duplicate an in-memory btree cursor. */
struct xfs_btree_cur *
xfbtree_dup_cursor(
struct xfs_btree_cur *cur)
{
struct xfs_btree_cur *ncur;
ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
ncur = xfs_btree_alloc_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ops,
cur->bc_maxlevels, cur->bc_cache);
ncur->bc_flags = cur->bc_flags;
ncur->bc_nlevels = cur->bc_nlevels;
ncur->bc_mem.xfbtree = cur->bc_mem.xfbtree;
if (cur->bc_mem.pag)
ncur->bc_mem.pag = xfs_perag_hold(cur->bc_mem.pag);
return ncur;
}
/* Close the btree xfile and release all resources. */
void
xfbtree_destroy(
struct xfbtree *xfbt)
{
xfs_buftarg_drain(xfbt->target);
}
/* Compute the number of bytes available for records. */
static inline unsigned int
xfbtree_rec_bytes(
struct xfs_mount *mp,
const struct xfs_btree_ops *ops)
{
return XMBUF_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
}
/* Initialize an empty leaf block as the btree root. */
STATIC int
xfbtree_init_leaf_block(
struct xfs_mount *mp,
struct xfbtree *xfbt,
const struct xfs_btree_ops *ops)
{
struct xfs_buf *bp;
xfbno_t bno = xfbt->highest_bno++;
int error;
error = xfs_buf_get(xfbt->target, xfbno_to_daddr(bno), XFBNO_BBSIZE,
&bp);
if (error)
return error;
trace_xfbtree_create_root_buf(xfbt, bp);
bp->b_ops = ops->buf_ops;
xfs_btree_init_buf(mp, bp, ops, 0, 0, xfbt->owner);
xfs_buf_relse(bp);
xfbt->root.l = cpu_to_be64(bno);
return 0;
}
/*
* Create an in-memory btree root that can be used with the given xmbuf.
* Callers must set xfbt->owner.
*/
int
xfbtree_init(
struct xfs_mount *mp,
struct xfbtree *xfbt,
struct xfs_buftarg *btp,
const struct xfs_btree_ops *ops)
{
unsigned int blocklen = xfbtree_rec_bytes(mp, ops);
unsigned int keyptr_len;
int error;
/* Requires a long-format CRC-format btree */
if (!xfs_has_crc(mp)) {
ASSERT(xfs_has_crc(mp));
return -EINVAL;
}
if (ops->ptr_len != XFS_BTREE_LONG_PTR_LEN) {
ASSERT(ops->ptr_len == XFS_BTREE_LONG_PTR_LEN);
return -EINVAL;
}
memset(xfbt, 0, sizeof(*xfbt));
xfbt->target = btp;
/* Set up min/maxrecs for this btree. */
keyptr_len = ops->key_len + sizeof(__be64);
xfbt->maxrecs[0] = blocklen / ops->rec_len;
xfbt->maxrecs[1] = blocklen / keyptr_len;
xfbt->minrecs[0] = xfbt->maxrecs[0] / 2;
xfbt->minrecs[1] = xfbt->maxrecs[1] / 2;
xfbt->highest_bno = 0;
xfbt->nlevels = 1;
/* Initialize the empty btree. */
error = xfbtree_init_leaf_block(mp, xfbt, ops);
if (error)
goto err_freesp;
trace_xfbtree_init(mp, xfbt, ops);
return 0;
err_freesp:
xfs_buftarg_drain(xfbt->target);
return error;
}
/* Allocate a block to our in-memory btree. */
int
xfbtree_alloc_block(
struct xfs_btree_cur *cur,
const union xfs_btree_ptr *start,
union xfs_btree_ptr *new,
int *stat)
{
struct xfbtree *xfbt = cur->bc_mem.xfbtree;
xfbno_t bno = xfbt->highest_bno++;
ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
trace_xfbtree_alloc_block(xfbt, cur, bno);
/* Fail if the block address exceeds the maximum for the buftarg. */
if (!xfbtree_verify_bno(xfbt, bno)) {
ASSERT(xfbtree_verify_bno(xfbt, bno));
*stat = 0;
return 0;
}
new->l = cpu_to_be64(bno);
*stat = 1;
return 0;
}
/* Free a block from our in-memory btree. */
int
xfbtree_free_block(
struct xfs_btree_cur *cur,
struct xfs_buf *bp)
{
struct xfbtree *xfbt = cur->bc_mem.xfbtree;
xfs_daddr_t daddr = xfs_buf_daddr(bp);
xfbno_t bno = xfs_daddr_to_xfbno(daddr);
ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM);
trace_xfbtree_free_block(xfbt, cur, bno);
if (bno + 1 == xfbt->highest_bno)
xfbt->highest_bno--;
return 0;
}
/* Return the minimum number of records for a btree block. */
int
xfbtree_get_minrecs(
struct xfs_btree_cur *cur,
int level)
{
struct xfbtree *xfbt = cur->bc_mem.xfbtree;
return xfbt->minrecs[level != 0];
}
/* Return the maximum number of records for a btree block. */
int
xfbtree_get_maxrecs(
struct xfs_btree_cur *cur,
int level)
{
struct xfbtree *xfbt = cur->bc_mem.xfbtree;
return xfbt->maxrecs[level != 0];
}
/* If this log item is a buffer item that came from the xfbtree, return it. */
static inline struct xfs_buf *
xfbtree_buf_match(
struct xfbtree *xfbt,
const struct xfs_log_item *lip)
{
const struct xfs_buf_log_item *bli;
struct xfs_buf *bp;
if (lip->li_type != XFS_LI_BUF)
return NULL;
bli = container_of(lip, struct xfs_buf_log_item, bli_item);
bp = bli->bli_buf;
if (bp->b_target != xfbt->target)
return NULL;
return bp;
}
/*
* Commit changes to the incore btree immediately by writing all dirty xfbtree
* buffers to the backing xfile. This detaches all xfbtree buffers from the
* transaction, even on failure. The buffer locks are dropped between the
* delwri queue and submit, so the caller must synchronize btree access.
*
* Normally we'd let the buffers commit with the transaction and get written to
* the xfile via the log, but online repair stages ephemeral btrees in memory
* and uses the btree_staging functions to write new btrees to disk atomically.
* The in-memory btree (and its backing store) are discarded at the end of the
* repair phase, which means that xfbtree buffers cannot commit with the rest
* of a transaction.
*
* In other words, online repair only needs the transaction to collect buffer
* pointers and to avoid buffer deadlocks, not to guarantee consistency of
* updates.
*/
int
xfbtree_trans_commit(
struct xfbtree *xfbt,
struct xfs_trans *tp)
{
struct xfs_log_item *lip, *n;
bool tp_dirty = false;
int error = 0;
/*
* For each xfbtree buffer attached to the transaction, write the dirty
* buffers to the xfile and release them.
*/
list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) {
struct xfs_buf *bp = xfbtree_buf_match(xfbt, lip);
if (!bp) {
if (test_bit(XFS_LI_DIRTY, &lip->li_flags))
tp_dirty |= true;
continue;
}
trace_xfbtree_trans_commit_buf(xfbt, bp);
xmbuf_trans_bdetach(tp, bp);
/*
* If the buffer fails verification, note the failure but
* continue walking the transaction items so that we remove all
* ephemeral btree buffers.
*/
if (!error)
error = xmbuf_finalize(bp);
xfs_buf_relse(bp);
}
/*
* Reset the transaction's dirty flag to reflect the dirty state of the
* log items that are still attached.
*/
tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) |
(tp_dirty ? XFS_TRANS_DIRTY : 0);
return error;
}
/*
* Cancel changes to the incore btree by detaching all the xfbtree buffers.
* Changes are not undone, so callers must not access the btree ever again.
*/
void
xfbtree_trans_cancel(
struct xfbtree *xfbt,
struct xfs_trans *tp)
{
struct xfs_log_item *lip, *n;
bool tp_dirty = false;
list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) {
struct xfs_buf *bp = xfbtree_buf_match(xfbt, lip);
if (!bp) {
if (test_bit(XFS_LI_DIRTY, &lip->li_flags))
tp_dirty |= true;
continue;
}
trace_xfbtree_trans_cancel_buf(xfbt, bp);
xmbuf_trans_bdetach(tp, bp);
xfs_buf_relse(bp);
}
/*
* Reset the transaction's dirty flag to reflect the dirty state of the
* log items that are still attached.
*/
tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) |
(tp_dirty ? XFS_TRANS_DIRTY : 0);
}
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_BTREE_MEM_H__
#define __XFS_BTREE_MEM_H__
typedef uint64_t xfbno_t;
#define XFBNO_BLOCKSIZE (XMBUF_BLOCKSIZE)
#define XFBNO_BBSHIFT (XMBUF_BLOCKSHIFT - BBSHIFT)
#define XFBNO_BBSIZE (XFBNO_BLOCKSIZE >> BBSHIFT)
static inline xfs_daddr_t xfbno_to_daddr(xfbno_t blkno)
{
return blkno << XFBNO_BBSHIFT;
}
static inline xfbno_t xfs_daddr_to_xfbno(xfs_daddr_t daddr)
{
return daddr >> XFBNO_BBSHIFT;
}
struct xfbtree {
/* buffer cache target for this in-memory btree */
struct xfs_buftarg *target;
/* Highest block number that has been written to. */
xfbno_t highest_bno;
/* Owner of this btree. */
unsigned long long owner;
/* Btree header */
union xfs_btree_ptr root;
unsigned int nlevels;
/* Minimum and maximum records per block. */
unsigned int maxrecs[2];
unsigned int minrecs[2];
};
#ifdef CONFIG_XFS_BTREE_IN_MEM
static inline bool xfbtree_verify_bno(struct xfbtree *xfbt, xfbno_t bno)
{
return xmbuf_verify_daddr(xfbt->target, xfbno_to_daddr(bno));
}
void xfbtree_set_root(struct xfs_btree_cur *cur,
const union xfs_btree_ptr *ptr, int inc);
void xfbtree_init_ptr_from_cur(struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr);
struct xfs_btree_cur *xfbtree_dup_cursor(struct xfs_btree_cur *cur);
int xfbtree_get_minrecs(struct xfs_btree_cur *cur, int level);
int xfbtree_get_maxrecs(struct xfs_btree_cur *cur, int level);
int xfbtree_alloc_block(struct xfs_btree_cur *cur,
const union xfs_btree_ptr *start, union xfs_btree_ptr *ptr,
int *stat);
int xfbtree_free_block(struct xfs_btree_cur *cur, struct xfs_buf *bp);
/* Callers must set xfbt->target and xfbt->owner before calling this */
int xfbtree_init(struct xfs_mount *mp, struct xfbtree *xfbt,
struct xfs_buftarg *btp, const struct xfs_btree_ops *ops);
void xfbtree_destroy(struct xfbtree *xfbt);
int xfbtree_trans_commit(struct xfbtree *xfbt, struct xfs_trans *tp);
void xfbtree_trans_cancel(struct xfbtree *xfbt, struct xfs_trans *tp);
#else
# define xfbtree_verify_bno(...) (false)
#endif /* CONFIG_XFS_BTREE_IN_MEM */
#endif /* __XFS_BTREE_MEM_H__ */
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "xfs_quota.h" #include "xfs_quota.h"
#include "xfs_qm.h" #include "xfs_qm.h"
#include "xfs_scrub.h" #include "xfs_scrub.h"
#include "xfs_buf_mem.h"
#include "scrub/scrub.h" #include "scrub/scrub.h"
#include "scrub/common.h" #include "scrub/common.h"
#include "scrub/trace.h" #include "scrub/trace.h"
...@@ -190,6 +191,10 @@ xchk_teardown( ...@@ -190,6 +191,10 @@ xchk_teardown(
sc->flags &= ~XCHK_HAVE_FREEZE_PROT; sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
mnt_drop_write_file(sc->file); mnt_drop_write_file(sc->file);
} }
if (sc->xmbtp) {
xmbuf_free(sc->xmbtp);
sc->xmbtp = NULL;
}
if (sc->xfile) { if (sc->xfile) {
xfile_destroy(sc->xfile); xfile_destroy(sc->xfile);
sc->xfile = NULL; sc->xfile = NULL;
......
...@@ -99,6 +99,9 @@ struct xfs_scrub { ...@@ -99,6 +99,9 @@ struct xfs_scrub {
/* xfile used by the scrubbers; freed at teardown. */ /* xfile used by the scrubbers; freed at teardown. */
struct xfile *xfile; struct xfile *xfile;
/* buffer target for in-memory btrees; also freed at teardown. */
struct xfs_buftarg *xmbtp;
/* Lock flags for @ip. */ /* Lock flags for @ip. */
uint ilock_flags; uint ilock_flags;
......
This diff is collapsed.
...@@ -83,6 +83,14 @@ typedef unsigned int xfs_buf_flags_t; ...@@ -83,6 +83,14 @@ typedef unsigned int xfs_buf_flags_t;
#define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */ #define XFS_BSTATE_DISPOSE (1 << 0) /* buffer being discarded */
#define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ #define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */
struct xfs_buf_cache {
spinlock_t bc_lock;
struct rhashtable bc_hash;
};
int xfs_buf_cache_init(struct xfs_buf_cache *bch);
void xfs_buf_cache_destroy(struct xfs_buf_cache *bch);
/* /*
* The xfs_buftarg contains 2 notions of "sector size" - * The xfs_buftarg contains 2 notions of "sector size" -
* *
...@@ -101,6 +109,7 @@ struct xfs_buftarg { ...@@ -101,6 +109,7 @@ struct xfs_buftarg {
struct bdev_handle *bt_bdev_handle; struct bdev_handle *bt_bdev_handle;
struct block_device *bt_bdev; struct block_device *bt_bdev;
struct dax_device *bt_daxdev; struct dax_device *bt_daxdev;
struct file *bt_file;
u64 bt_dax_part_off; u64 bt_dax_part_off;
struct xfs_mount *bt_mount; struct xfs_mount *bt_mount;
unsigned int bt_meta_sectorsize; unsigned int bt_meta_sectorsize;
...@@ -114,6 +123,9 @@ struct xfs_buftarg { ...@@ -114,6 +123,9 @@ struct xfs_buftarg {
struct percpu_counter bt_io_count; struct percpu_counter bt_io_count;
struct ratelimit_state bt_ioerror_rl; struct ratelimit_state bt_ioerror_rl;
/* built-in cache, if we're not using the perag one */
struct xfs_buf_cache bt_cache[];
}; };
#define XB_PAGES 2 #define XB_PAGES 2
...@@ -379,4 +391,9 @@ int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops); ...@@ -379,4 +391,9 @@ int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic); bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic);
bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic); bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic);
/* for xfs_buf_mem.c only: */
int xfs_init_buftarg(struct xfs_buftarg *btp, size_t logical_sectorsize,
const char *descr);
void xfs_destroy_buftarg(struct xfs_buftarg *btp);
#endif /* __XFS_BUF_H__ */ #endif /* __XFS_BUF_H__ */
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2023-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_buf.h"
#include "xfs_buf_mem.h"
#include "xfs_trace.h"
#include <linux/shmem_fs.h>
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_error.h"
/*
* Buffer Cache for In-Memory Files
* ================================
*
* Online fsck wants to create ephemeral ordered recordsets. The existing
* btree infrastructure can do this, but we need the buffer cache to target
* memory instead of block devices.
*
* When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
* requirements. Therefore, the xmbuf mechanism uses an unlinked shmem file to
* store our staging data. This file is not installed in the file descriptor
* table so that user programs cannot access the data, which means that the
* xmbuf must be freed with xmbuf_destroy.
*
* xmbufs assume that the caller will handle all required concurrency
* management; standard vfs locks (freezer and inode) are not taken. Reads
* and writes are satisfied directly from the page cache.
*
* The only supported block size is PAGE_SIZE, and we cannot use highmem.
*/
/*
* shmem files used to back an in-memory buffer cache must not be exposed to
* userspace. Upper layers must coordinate access to the one handle returned
* by the constructor, so establish a separate lock class for xmbufs to avoid
* confusing lockdep.
*/
static struct lock_class_key xmbuf_i_mutex_key;
/*
* Allocate a buffer cache target for a memory-backed file and set up the
* buffer target.
*/
int
xmbuf_alloc(
struct xfs_mount *mp,
const char *descr,
struct xfs_buftarg **btpp)
{
struct file *file;
struct inode *inode;
struct xfs_buftarg *btp;
int error;
btp = kzalloc(struct_size(btp, bt_cache, 1), GFP_KERNEL);
if (!btp)
return -ENOMEM;
file = shmem_kernel_file_setup(descr, 0, 0);
if (IS_ERR(file)) {
error = PTR_ERR(file);
goto out_free_btp;
}
inode = file_inode(file);
/* private file, private locking */
lockdep_set_class(&inode->i_rwsem, &xmbuf_i_mutex_key);
/*
* We don't want to bother with kmapping data during repair, so don't
* allow highmem pages to back this mapping.
*/
mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
/* ensure all writes are below EOF to avoid pagecache zeroing */
i_size_write(inode, inode->i_sb->s_maxbytes);
trace_xmbuf_create(btp);
error = xfs_buf_cache_init(btp->bt_cache);
if (error)
goto out_file;
/* Initialize buffer target */
btp->bt_mount = mp;
btp->bt_dev = (dev_t)-1U;
btp->bt_bdev = NULL; /* in-memory buftargs have no bdev */
btp->bt_file = file;
btp->bt_meta_sectorsize = XMBUF_BLOCKSIZE;
btp->bt_meta_sectormask = XMBUF_BLOCKSIZE - 1;
error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr);
if (error)
goto out_bcache;
*btpp = btp;
return 0;
out_bcache:
xfs_buf_cache_destroy(btp->bt_cache);
out_file:
fput(file);
out_free_btp:
kfree(btp);
return error;
}
/* Free a buffer cache target for a memory-backed buffer cache. */
void
xmbuf_free(
struct xfs_buftarg *btp)
{
ASSERT(xfs_buftarg_is_mem(btp));
ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
trace_xmbuf_free(btp);
xfs_destroy_buftarg(btp);
xfs_buf_cache_destroy(btp->bt_cache);
fput(btp->bt_file);
kfree(btp);
}
/* Directly map a shmem page into the buffer cache. */
int
xmbuf_map_page(
struct xfs_buf *bp)
{
struct inode *inode = file_inode(bp->b_target->bt_file);
struct folio *folio = NULL;
struct page *page;
loff_t pos = BBTOB(xfs_buf_daddr(bp));
int error;
ASSERT(xfs_buftarg_is_mem(bp->b_target));
if (bp->b_map_count != 1)
return -ENOMEM;
if (BBTOB(bp->b_length) != XMBUF_BLOCKSIZE)
return -ENOMEM;
if (offset_in_page(pos) != 0) {
ASSERT(offset_in_page(pos));
return -ENOMEM;
}
error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, SGP_CACHE);
if (error)
return error;
if (filemap_check_wb_err(inode->i_mapping, 0)) {
folio_unlock(folio);
folio_put(folio);
return -EIO;
}
page = folio_file_page(folio, pos >> PAGE_SHIFT);
/*
* Mark the page dirty so that it won't be reclaimed once we drop the
* (potentially last) reference in xmbuf_unmap_page.
*/
set_page_dirty(page);
unlock_page(page);
bp->b_addr = page_address(page);
bp->b_pages = bp->b_page_array;
bp->b_pages[0] = page;
bp->b_page_count = 1;
return 0;
}
/* Unmap a shmem page that was mapped into the buffer cache. */
void
xmbuf_unmap_page(
struct xfs_buf *bp)
{
struct page *page = bp->b_pages[0];
ASSERT(xfs_buftarg_is_mem(bp->b_target));
put_page(page);
bp->b_addr = NULL;
bp->b_pages[0] = NULL;
bp->b_pages = NULL;
bp->b_page_count = 0;
}
/* Is this a valid daddr within the buftarg? */
bool
xmbuf_verify_daddr(
struct xfs_buftarg *btp,
xfs_daddr_t daddr)
{
struct inode *inode = file_inode(btp->bt_file);
ASSERT(xfs_buftarg_is_mem(btp));
return daddr < (inode->i_sb->s_maxbytes >> BBSHIFT);
}
/* Discard the page backing this buffer. */
static void
xmbuf_stale(
struct xfs_buf *bp)
{
struct inode *inode = file_inode(bp->b_target->bt_file);
loff_t pos;
ASSERT(xfs_buftarg_is_mem(bp->b_target));
pos = BBTOB(xfs_buf_daddr(bp));
shmem_truncate_range(inode, pos, pos + BBTOB(bp->b_length) - 1);
}
/*
* Finalize a buffer -- discard the backing page if it's stale, or run the
* write verifier to detect problems.
*/
int
xmbuf_finalize(
struct xfs_buf *bp)
{
xfs_failaddr_t fa;
int error = 0;
if (bp->b_flags & XBF_STALE) {
xmbuf_stale(bp);
return 0;
}
/*
* Although this btree is ephemeral, validate the buffer structure so
* that we can detect memory corruption errors and software bugs.
*/
fa = bp->b_ops->verify_struct(bp);
if (fa) {
error = -EFSCORRUPTED;
xfs_verifier_error(bp, error, fa);
}
return error;
}
/*
* Detach this xmbuf buffer from the transaction by any means necessary.
* All buffers are direct-mapped, so they do not need bwrite.
*/
void
xmbuf_trans_bdetach(
struct xfs_trans *tp,
struct xfs_buf *bp)
{
struct xfs_buf_log_item *bli = bp->b_log_item;
ASSERT(bli != NULL);
bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED |
XFS_BLI_LOGGED | XFS_BLI_STALE);
clear_bit(XFS_LI_DIRTY, &bli->bli_item.li_flags);
while (bp->b_log_item != NULL)
xfs_trans_bdetach(tp, bp);
}
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (c) 2023-2024 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
*/
#ifndef __XFS_BUF_MEM_H__
#define __XFS_BUF_MEM_H__
#define XMBUF_BLOCKSIZE (PAGE_SIZE)
#define XMBUF_BLOCKSHIFT (PAGE_SHIFT)
#ifdef CONFIG_XFS_MEMORY_BUFS
static inline bool xfs_buftarg_is_mem(const struct xfs_buftarg *btp)
{
return btp->bt_bdev == NULL;
}
int xmbuf_alloc(struct xfs_mount *mp, const char *descr,
struct xfs_buftarg **btpp);
void xmbuf_free(struct xfs_buftarg *btp);
int xmbuf_map_page(struct xfs_buf *bp);
void xmbuf_unmap_page(struct xfs_buf *bp);
bool xmbuf_verify_daddr(struct xfs_buftarg *btp, xfs_daddr_t daddr);
void xmbuf_trans_bdetach(struct xfs_trans *tp, struct xfs_buf *bp);
int xmbuf_finalize(struct xfs_buf *bp);
#else
# define xfs_buftarg_is_mem(...) (false)
# define xmbuf_map_page(...) (-ENOMEM)
# define xmbuf_unmap_page(...) ((void)0)
# define xmbuf_verify_daddr(...) (false)
#endif /* CONFIG_XFS_MEMORY_BUFS */
#endif /* __XFS_BUF_MEM_H__ */
...@@ -527,6 +527,9 @@ xfs_btree_mark_sick( ...@@ -527,6 +527,9 @@ xfs_btree_mark_sick(
struct xfs_btree_cur *cur) struct xfs_btree_cur *cur)
{ {
switch (cur->bc_ops->type) { switch (cur->bc_ops->type) {
case XFS_BTREE_TYPE_MEM:
/* no health state tracking for ephemeral btrees */
return;
case XFS_BTREE_TYPE_AG: case XFS_BTREE_TYPE_AG:
ASSERT(cur->bc_ops->sick_mask); ASSERT(cur->bc_ops->sick_mask);
xfs_ag_mark_sick(cur->bc_ag.pag, cur->bc_ops->sick_mask); xfs_ag_mark_sick(cur->bc_ag.pag, cur->bc_ops->sick_mask);
......
...@@ -505,9 +505,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) ...@@ -505,9 +505,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks); return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
} }
int xfs_buf_hash_init(struct xfs_perag *pag);
void xfs_buf_hash_destroy(struct xfs_perag *pag);
extern void xfs_uuid_table_free(void); extern void xfs_uuid_table_free(void);
extern uint64_t xfs_default_resblks(xfs_mount_t *mp); extern uint64_t xfs_default_resblks(xfs_mount_t *mp);
extern int xfs_mountfs(xfs_mount_t *mp); extern int xfs_mountfs(xfs_mount_t *mp);
......
...@@ -36,6 +36,8 @@ ...@@ -36,6 +36,8 @@
#include "xfs_error.h" #include "xfs_error.h"
#include <linux/iomap.h> #include <linux/iomap.h>
#include "xfs_iomap.h" #include "xfs_iomap.h"
#include "xfs_buf_mem.h"
#include "xfs_btree_mem.h"
/* /*
* We include this last to have the helpers above available for the trace * We include this last to have the helpers above available for the trace
......
...@@ -79,6 +79,8 @@ union xfs_btree_ptr; ...@@ -79,6 +79,8 @@ union xfs_btree_ptr;
struct xfs_dqtrx; struct xfs_dqtrx;
struct xfs_icwalk; struct xfs_icwalk;
struct xfs_perag; struct xfs_perag;
struct xfbtree;
struct xfs_btree_ops;
#define XFS_ATTR_FILTER_FLAGS \ #define XFS_ATTR_FILTER_FLAGS \
{ XFS_ATTR_ROOT, "ROOT" }, \ { XFS_ATTR_ROOT, "ROOT" }, \
...@@ -640,6 +642,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf); ...@@ -640,6 +642,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf);
DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur); DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur);
DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf); DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf);
DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse); DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse);
DEFINE_BUF_ITEM_EVENT(xfs_trans_bdetach);
DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin); DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold); DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
...@@ -2499,12 +2502,19 @@ TRACE_EVENT(xfs_btree_alloc_block, ...@@ -2499,12 +2502,19 @@ TRACE_EVENT(xfs_btree_alloc_block,
), ),
TP_fast_assign( TP_fast_assign(
__entry->dev = cur->bc_mp->m_super->s_dev; __entry->dev = cur->bc_mp->m_super->s_dev;
if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) { switch (cur->bc_ops->type) {
case XFS_BTREE_TYPE_INODE:
__entry->agno = 0; __entry->agno = 0;
__entry->ino = cur->bc_ino.ip->i_ino; __entry->ino = cur->bc_ino.ip->i_ino;
} else { break;
case XFS_BTREE_TYPE_AG:
__entry->agno = cur->bc_ag.pag->pag_agno; __entry->agno = cur->bc_ag.pag->pag_agno;
__entry->ino = 0; __entry->ino = 0;
break;
case XFS_BTREE_TYPE_MEM:
__entry->agno = 0;
__entry->ino = 0;
break;
} }
__assign_str(name, cur->bc_ops->name); __assign_str(name, cur->bc_ops->name);
__entry->error = error; __entry->error = error;
...@@ -4514,6 +4524,159 @@ DEFINE_PERAG_INTENTS_EVENT(xfs_perag_wait_intents); ...@@ -4514,6 +4524,159 @@ DEFINE_PERAG_INTENTS_EVENT(xfs_perag_wait_intents);
#endif /* CONFIG_XFS_DRAIN_INTENTS */ #endif /* CONFIG_XFS_DRAIN_INTENTS */
#ifdef CONFIG_XFS_MEMORY_BUFS
TRACE_EVENT(xmbuf_create,
TP_PROTO(struct xfs_buftarg *btp),
TP_ARGS(btp),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(unsigned long, ino)
__array(char, pathname, 256)
),
TP_fast_assign(
char pathname[257];
char *path;
struct file *file = btp->bt_file;
__entry->ino = file_inode(file)->i_ino;
memset(pathname, 0, sizeof(pathname));
path = file_path(file, pathname, sizeof(pathname) - 1);
if (IS_ERR(path))
path = "(unknown)";
strncpy(__entry->pathname, path, sizeof(__entry->pathname));
),
TP_printk("xmino 0x%lx path '%s'",
__entry->ino,
__entry->pathname)
);
TRACE_EVENT(xmbuf_free,
TP_PROTO(struct xfs_buftarg *btp),
TP_ARGS(btp),
TP_STRUCT__entry(
__field(unsigned long, ino)
__field(unsigned long long, bytes)
__field(loff_t, size)
),
TP_fast_assign(
struct file *file = btp->bt_file;
struct inode *inode = file_inode(file);
__entry->size = i_size_read(inode);
__entry->bytes = (inode->i_blocks << SECTOR_SHIFT) + inode->i_bytes;
__entry->ino = inode->i_ino;
),
TP_printk("xmino 0x%lx mem_bytes 0x%llx isize 0x%llx",
__entry->ino,
__entry->bytes,
__entry->size)
);
#endif /* CONFIG_XFS_MEMORY_BUFS */
#ifdef CONFIG_XFS_BTREE_IN_MEM
TRACE_EVENT(xfbtree_init,
TP_PROTO(struct xfs_mount *mp, struct xfbtree *xfbt,
const struct xfs_btree_ops *ops),
TP_ARGS(mp, xfbt, ops),
TP_STRUCT__entry(
__field(const void *, btree_ops)
__field(unsigned long, xfino)
__field(unsigned int, leaf_mxr)
__field(unsigned int, leaf_mnr)
__field(unsigned int, node_mxr)
__field(unsigned int, node_mnr)
__field(unsigned long long, owner)
),
TP_fast_assign(
__entry->btree_ops = ops;
__entry->xfino = file_inode(xfbt->target->bt_file)->i_ino;
__entry->leaf_mxr = xfbt->maxrecs[0];
__entry->node_mxr = xfbt->maxrecs[1];
__entry->leaf_mnr = xfbt->minrecs[0];
__entry->node_mnr = xfbt->minrecs[1];
__entry->owner = xfbt->owner;
),
TP_printk("xfino 0x%lx btree_ops %pS owner 0x%llx leaf_mxr %u leaf_mnr %u node_mxr %u node_mnr %u",
__entry->xfino,
__entry->btree_ops,
__entry->owner,
__entry->leaf_mxr,
__entry->leaf_mnr,
__entry->node_mxr,
__entry->node_mnr)
);
DECLARE_EVENT_CLASS(xfbtree_buf_class,
TP_PROTO(struct xfbtree *xfbt, struct xfs_buf *bp),
TP_ARGS(xfbt, bp),
TP_STRUCT__entry(
__field(unsigned long, xfino)
__field(xfs_daddr_t, bno)
__field(int, nblks)
__field(int, hold)
__field(int, pincount)
__field(unsigned int, lockval)
__field(unsigned int, flags)
),
TP_fast_assign(
__entry->xfino = file_inode(xfbt->target->bt_file)->i_ino;
__entry->bno = xfs_buf_daddr(bp);
__entry->nblks = bp->b_length;
__entry->hold = atomic_read(&bp->b_hold);
__entry->pincount = atomic_read(&bp->b_pin_count);
__entry->lockval = bp->b_sema.count;
__entry->flags = bp->b_flags;
),
TP_printk("xfino 0x%lx daddr 0x%llx bbcount 0x%x hold %d pincount %d lock %d flags %s",
__entry->xfino,
(unsigned long long)__entry->bno,
__entry->nblks,
__entry->hold,
__entry->pincount,
__entry->lockval,
__print_flags(__entry->flags, "|", XFS_BUF_FLAGS))
)
#define DEFINE_XFBTREE_BUF_EVENT(name) \
DEFINE_EVENT(xfbtree_buf_class, name, \
TP_PROTO(struct xfbtree *xfbt, struct xfs_buf *bp), \
TP_ARGS(xfbt, bp))
DEFINE_XFBTREE_BUF_EVENT(xfbtree_create_root_buf);
DEFINE_XFBTREE_BUF_EVENT(xfbtree_trans_commit_buf);
DEFINE_XFBTREE_BUF_EVENT(xfbtree_trans_cancel_buf);
DECLARE_EVENT_CLASS(xfbtree_freesp_class,
TP_PROTO(struct xfbtree *xfbt, struct xfs_btree_cur *cur,
xfs_fileoff_t fileoff),
TP_ARGS(xfbt, cur, fileoff),
TP_STRUCT__entry(
__field(unsigned long, xfino)
__string(btname, cur->bc_ops->name)
__field(int, nlevels)
__field(xfs_fileoff_t, fileoff)
),
TP_fast_assign(
__entry->xfino = file_inode(xfbt->target->bt_file)->i_ino;
__assign_str(btname, cur->bc_ops->name);
__entry->nlevels = cur->bc_nlevels;
__entry->fileoff = fileoff;
),
TP_printk("xfino 0x%lx %sbt nlevels %d fileoff 0x%llx",
__entry->xfino,
__get_str(btname),
__entry->nlevels,
(unsigned long long)__entry->fileoff)
)
#define DEFINE_XFBTREE_FREESP_EVENT(name) \
DEFINE_EVENT(xfbtree_freesp_class, name, \
TP_PROTO(struct xfbtree *xfbt, struct xfs_btree_cur *cur, \
xfs_fileoff_t fileoff), \
TP_ARGS(xfbt, cur, fileoff))
DEFINE_XFBTREE_FREESP_EVENT(xfbtree_alloc_block);
DEFINE_XFBTREE_FREESP_EVENT(xfbtree_free_block);
#endif /* CONFIG_XFS_BTREE_IN_MEM */
#endif /* _TRACE_XFS_H */ #endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_PATH
......
...@@ -215,6 +215,7 @@ struct xfs_buf *xfs_trans_getsb(struct xfs_trans *); ...@@ -215,6 +215,7 @@ struct xfs_buf *xfs_trans_getsb(struct xfs_trans *);
void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *); void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_bdetach(struct xfs_trans *tp, struct xfs_buf *bp);
void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *); void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *); void xfs_trans_bhold_release(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *); void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
......
...@@ -392,6 +392,48 @@ xfs_trans_brelse( ...@@ -392,6 +392,48 @@ xfs_trans_brelse(
xfs_buf_relse(bp); xfs_buf_relse(bp);
} }
/*
* Forcibly detach a buffer previously joined to the transaction. The caller
* will retain its locked reference to the buffer after this function returns.
* The buffer must be completely clean and must not be held to the transaction.
*/
void
xfs_trans_bdetach(
struct xfs_trans *tp,
struct xfs_buf *bp)
{
struct xfs_buf_log_item *bip = bp->b_log_item;
ASSERT(tp != NULL);
ASSERT(bp->b_transp == tp);
ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
trace_xfs_trans_bdetach(bip);
/*
* Erase all recursion count, since we're removing this buffer from the
* transaction.
*/
bip->bli_recur = 0;
/*
* The buffer must be completely clean. Specifically, it had better
* not be dirty, stale, logged, ordered, or held to the transaction.
*/
ASSERT(!test_bit(XFS_LI_DIRTY, &bip->bli_item.li_flags));
ASSERT(!(bip->bli_flags & XFS_BLI_DIRTY));
ASSERT(!(bip->bli_flags & XFS_BLI_HOLD));
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
ASSERT(!(bip->bli_flags & XFS_BLI_ORDERED));
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
/* Unlink the log item from the transaction and drop the log item. */
xfs_trans_del_item(&bip->bli_item);
xfs_buf_item_put(bip);
bp->b_transp = NULL;
}
/* /*
* Mark the buffer as not needing to be unlocked when the buf item's * Mark the buffer as not needing to be unlocked when the buf item's
* iop_committing() routine is called. The buffer must already be locked * iop_committing() routine is called. The buffer must already be locked
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment