Commit aafc3c24 authored by Brian Foster's avatar Brian Foster Committed by Dave Chinner

xfs: support the XFS_BTNUM_FINOBT free inode btree type

Define the AGI fields for the finobt root/level and add magic
numbers. Update the btree code to add support for the new
XFS_BTNUM_FINOBT inode btree.

The finobt root block is reserved immediately following the inobt
root block in the AG. Update XFS_PREALLOC_BLOCKS() to determine the
starting AG data block based on whether finobt support is enabled.
Signed-off-by: default avatarBrian Foster <bfoster@redhat.com>
Reviewed-by: default avatarDave Chinner <dchinner@redhat.com>
Signed-off-by: default avatarDave Chinner <david@fromorbit.com>
parent 8e2c84df
...@@ -160,30 +160,38 @@ typedef struct xfs_agi { ...@@ -160,30 +160,38 @@ typedef struct xfs_agi {
* still being referenced. * still being referenced.
*/ */
__be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS]; __be32 agi_unlinked[XFS_AGI_UNLINKED_BUCKETS];
/*
* This marks the end of logging region 1 and start of logging region 2.
*/
uuid_t agi_uuid; /* uuid of filesystem */ uuid_t agi_uuid; /* uuid of filesystem */
__be32 agi_crc; /* crc of agi sector */ __be32 agi_crc; /* crc of agi sector */
__be32 agi_pad32; __be32 agi_pad32;
__be64 agi_lsn; /* last write sequence */ __be64 agi_lsn; /* last write sequence */
__be32 agi_free_root; /* root of the free inode btree */
__be32 agi_free_level;/* levels in free inode btree */
/* structure must be padded to 64 bit alignment */ /* structure must be padded to 64 bit alignment */
} xfs_agi_t; } xfs_agi_t;
#define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc) #define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc)
#define XFS_AGI_MAGICNUM 0x00000001 #define XFS_AGI_MAGICNUM (1 << 0)
#define XFS_AGI_VERSIONNUM 0x00000002 #define XFS_AGI_VERSIONNUM (1 << 1)
#define XFS_AGI_SEQNO 0x00000004 #define XFS_AGI_SEQNO (1 << 2)
#define XFS_AGI_LENGTH 0x00000008 #define XFS_AGI_LENGTH (1 << 3)
#define XFS_AGI_COUNT 0x00000010 #define XFS_AGI_COUNT (1 << 4)
#define XFS_AGI_ROOT 0x00000020 #define XFS_AGI_ROOT (1 << 5)
#define XFS_AGI_LEVEL 0x00000040 #define XFS_AGI_LEVEL (1 << 6)
#define XFS_AGI_FREECOUNT 0x00000080 #define XFS_AGI_FREECOUNT (1 << 7)
#define XFS_AGI_NEWINO 0x00000100 #define XFS_AGI_NEWINO (1 << 8)
#define XFS_AGI_DIRINO 0x00000200 #define XFS_AGI_DIRINO (1 << 9)
#define XFS_AGI_UNLINKED 0x00000400 #define XFS_AGI_UNLINKED (1 << 10)
#define XFS_AGI_NUM_BITS 11 #define XFS_AGI_NUM_BITS_R1 11 /* end of the 1st agi logging region */
#define XFS_AGI_ALL_BITS ((1 << XFS_AGI_NUM_BITS) - 1) #define XFS_AGI_ALL_BITS_R1 ((1 << XFS_AGI_NUM_BITS_R1) - 1)
#define XFS_AGI_FREE_ROOT (1 << 11)
#define XFS_AGI_FREE_LEVEL (1 << 12)
#define XFS_AGI_NUM_BITS_R2 13
/* disk block (xfs_daddr_t) in the AG */ /* disk block (xfs_daddr_t) in the AG */
#define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) #define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log))
......
...@@ -43,9 +43,10 @@ kmem_zone_t *xfs_btree_cur_zone; ...@@ -43,9 +43,10 @@ kmem_zone_t *xfs_btree_cur_zone;
* Btree magic numbers. * Btree magic numbers.
*/ */
static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = {
{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC }, { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC,
XFS_FIBT_MAGIC },
{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC,
XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC } XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }
}; };
#define xfs_btree_magic(cur) \ #define xfs_btree_magic(cur) \
xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]
...@@ -1115,6 +1116,7 @@ xfs_btree_set_refs( ...@@ -1115,6 +1116,7 @@ xfs_btree_set_refs(
xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
break; break;
case XFS_BTNUM_INO: case XFS_BTNUM_INO:
case XFS_BTNUM_FINO:
xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
break; break;
case XFS_BTNUM_BMAP: case XFS_BTNUM_BMAP:
......
...@@ -62,6 +62,7 @@ union xfs_btree_rec { ...@@ -62,6 +62,7 @@ union xfs_btree_rec {
#define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi) #define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi)
#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) #define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi)
#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) #define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi)
#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi)
/* /*
* For logging record fields. * For logging record fields.
...@@ -92,6 +93,7 @@ do { \ ...@@ -92,6 +93,7 @@ do { \
case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \ case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \
case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \ case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \
case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \ case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \
case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \ } \
} while (0) } while (0)
...@@ -105,6 +107,7 @@ do { \ ...@@ -105,6 +107,7 @@ do { \
case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \ case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \
case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \ case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \
case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \
case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
} \ } \
} while (0) } while (0)
......
...@@ -202,6 +202,8 @@ typedef __be32 xfs_alloc_ptr_t; ...@@ -202,6 +202,8 @@ typedef __be32 xfs_alloc_ptr_t;
*/ */
#define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */ #define XFS_IBT_MAGIC 0x49414254 /* 'IABT' */
#define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */ #define XFS_IBT_CRC_MAGIC 0x49414233 /* 'IAB3' */
#define XFS_FIBT_MAGIC 0x46494254 /* 'FIBT' */
#define XFS_FIBT_CRC_MAGIC 0x46494233 /* 'FIB3' */
typedef __uint64_t xfs_inofree_t; typedef __uint64_t xfs_inofree_t;
#define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t)) #define XFS_INODES_PER_CHUNK (NBBY * sizeof(xfs_inofree_t))
...@@ -244,7 +246,17 @@ typedef __be32 xfs_inobt_ptr_t; ...@@ -244,7 +246,17 @@ typedef __be32 xfs_inobt_ptr_t;
* block numbers in the AG. * block numbers in the AG.
*/ */
#define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) #define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1))
#define XFS_PREALLOC_BLOCKS(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) #define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1))
/*
* The first data block of an AG depends on whether the filesystem was formatted
* with the finobt feature. If so, account for the finobt reserved root btree
* block.
*/
#define XFS_PREALLOC_BLOCKS(mp) \
(xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \
XFS_FIBT_BLOCK(mp) + 1 : \
XFS_IBT_BLOCK(mp) + 1)
......
...@@ -1488,7 +1488,16 @@ xfs_ialloc_compute_maxlevels( ...@@ -1488,7 +1488,16 @@ xfs_ialloc_compute_maxlevels(
} }
/* /*
* Log specified fields for the ag hdr (inode section) * Log specified fields for the ag hdr (inode section). The growth of the agi
* structure over time requires that we interpret the buffer as two logical
* regions delineated by the end of the unlinked list. This is due to the size
* of the hash table and its location in the middle of the agi.
*
* For example, a request to log a field before agi_unlinked and a field after
* agi_unlinked could cause us to log the entire hash table and use an excessive
* amount of log space. To avoid this behavior, log the region up through
* agi_unlinked in one call and the region after agi_unlinked through the end of
* the structure in another.
*/ */
void void
xfs_ialloc_log_agi( xfs_ialloc_log_agi(
...@@ -1511,6 +1520,8 @@ xfs_ialloc_log_agi( ...@@ -1511,6 +1520,8 @@ xfs_ialloc_log_agi(
offsetof(xfs_agi_t, agi_newino), offsetof(xfs_agi_t, agi_newino),
offsetof(xfs_agi_t, agi_dirino), offsetof(xfs_agi_t, agi_dirino),
offsetof(xfs_agi_t, agi_unlinked), offsetof(xfs_agi_t, agi_unlinked),
offsetof(xfs_agi_t, agi_free_root),
offsetof(xfs_agi_t, agi_free_level),
sizeof(xfs_agi_t) sizeof(xfs_agi_t)
}; };
#ifdef DEBUG #ifdef DEBUG
...@@ -1519,15 +1530,30 @@ xfs_ialloc_log_agi( ...@@ -1519,15 +1530,30 @@ xfs_ialloc_log_agi(
agi = XFS_BUF_TO_AGI(bp); agi = XFS_BUF_TO_AGI(bp);
ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
#endif #endif
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF);
/* /*
* Compute byte offsets for the first and last fields. * Compute byte offsets for the first and last fields in the first
* region and log the agi buffer. This only logs up through
* agi_unlinked.
*/ */
xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last); if (fields & XFS_AGI_ALL_BITS_R1) {
xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1,
&first, &last);
xfs_trans_log_buf(tp, bp, first, last);
}
/* /*
* Log the allocation group inode header buffer. * Mask off the bits in the first region and calculate the first and
* last field offsets for any bits in the second region.
*/ */
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); fields &= ~XFS_AGI_ALL_BITS_R1;
xfs_trans_log_buf(tp, bp, first, last); if (fields) {
xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2,
&first, &last);
xfs_trans_log_buf(tp, bp, first, last);
}
} }
#ifdef DEBUG #ifdef DEBUG
......
...@@ -67,6 +67,21 @@ xfs_inobt_set_root( ...@@ -67,6 +67,21 @@ xfs_inobt_set_root(
xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);
} }
STATIC void
xfs_finobt_set_root(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *nptr,
int inc) /* level change */
{
struct xfs_buf *agbp = cur->bc_private.a.agbp;
struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
agi->agi_free_root = nptr->s;
be32_add_cpu(&agi->agi_free_level, inc);
xfs_ialloc_log_agi(cur->bc_tp, agbp,
XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL);
}
STATIC int STATIC int
xfs_inobt_alloc_block( xfs_inobt_alloc_block(
struct xfs_btree_cur *cur, struct xfs_btree_cur *cur,
...@@ -174,6 +189,17 @@ xfs_inobt_init_ptr_from_cur( ...@@ -174,6 +189,17 @@ xfs_inobt_init_ptr_from_cur(
ptr->s = agi->agi_root; ptr->s = agi->agi_root;
} }
STATIC void
xfs_finobt_init_ptr_from_cur(
struct xfs_btree_cur *cur,
union xfs_btree_ptr *ptr)
{
struct xfs_agi *agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno));
ptr->s = agi->agi_free_root;
}
STATIC __int64_t STATIC __int64_t
xfs_inobt_key_diff( xfs_inobt_key_diff(
struct xfs_btree_cur *cur, struct xfs_btree_cur *cur,
...@@ -204,6 +230,7 @@ xfs_inobt_verify( ...@@ -204,6 +230,7 @@ xfs_inobt_verify(
*/ */
switch (block->bb_magic) { switch (block->bb_magic) {
case cpu_to_be32(XFS_IBT_CRC_MAGIC): case cpu_to_be32(XFS_IBT_CRC_MAGIC):
case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
if (!xfs_sb_version_hascrc(&mp->m_sb)) if (!xfs_sb_version_hascrc(&mp->m_sb))
return false; return false;
if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid))
...@@ -215,6 +242,7 @@ xfs_inobt_verify( ...@@ -215,6 +242,7 @@ xfs_inobt_verify(
return false; return false;
/* fall through */ /* fall through */
case cpu_to_be32(XFS_IBT_MAGIC): case cpu_to_be32(XFS_IBT_MAGIC):
case cpu_to_be32(XFS_FIBT_MAGIC):
break; break;
default: default:
return 0; return 0;
...@@ -318,6 +346,28 @@ static const struct xfs_btree_ops xfs_inobt_ops = { ...@@ -318,6 +346,28 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
#endif #endif
}; };
static const struct xfs_btree_ops xfs_finobt_ops = {
.rec_len = sizeof(xfs_inobt_rec_t),
.key_len = sizeof(xfs_inobt_key_t),
.dup_cursor = xfs_inobt_dup_cursor,
.set_root = xfs_finobt_set_root,
.alloc_block = xfs_inobt_alloc_block,
.free_block = xfs_inobt_free_block,
.get_minrecs = xfs_inobt_get_minrecs,
.get_maxrecs = xfs_inobt_get_maxrecs,
.init_key_from_rec = xfs_inobt_init_key_from_rec,
.init_rec_from_key = xfs_inobt_init_rec_from_key,
.init_rec_from_cur = xfs_inobt_init_rec_from_cur,
.init_ptr_from_cur = xfs_finobt_init_ptr_from_cur,
.key_diff = xfs_inobt_key_diff,
.buf_ops = &xfs_inobt_buf_ops,
#if defined(DEBUG) || defined(XFS_WARN)
.keys_inorder = xfs_inobt_keys_inorder,
.recs_inorder = xfs_inobt_recs_inorder,
#endif
};
/* /*
* Allocate a new inode btree cursor. * Allocate a new inode btree cursor.
*/ */
...@@ -336,11 +386,17 @@ xfs_inobt_init_cursor( ...@@ -336,11 +386,17 @@ xfs_inobt_init_cursor(
cur->bc_tp = tp; cur->bc_tp = tp;
cur->bc_mp = mp; cur->bc_mp = mp;
cur->bc_nlevels = be32_to_cpu(agi->agi_level);
cur->bc_btnum = btnum; cur->bc_btnum = btnum;
if (btnum == XFS_BTNUM_INO) {
cur->bc_nlevels = be32_to_cpu(agi->agi_level);
cur->bc_ops = &xfs_inobt_ops;
} else {
cur->bc_nlevels = be32_to_cpu(agi->agi_free_level);
cur->bc_ops = &xfs_finobt_ops;
}
cur->bc_blocklog = mp->m_sb.sb_blocklog; cur->bc_blocklog = mp->m_sb.sb_blocklog;
cur->bc_ops = &xfs_inobt_ops;
if (xfs_sb_version_hascrc(&mp->m_sb)) if (xfs_sb_version_hascrc(&mp->m_sb))
cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
......
...@@ -2138,7 +2138,9 @@ xlog_recover_validate_buf_type( ...@@ -2138,7 +2138,9 @@ xlog_recover_validate_buf_type(
bp->b_ops = &xfs_allocbt_buf_ops; bp->b_ops = &xfs_allocbt_buf_ops;
break; break;
case XFS_IBT_CRC_MAGIC: case XFS_IBT_CRC_MAGIC:
case XFS_FIBT_CRC_MAGIC:
case XFS_IBT_MAGIC: case XFS_IBT_MAGIC:
case XFS_FIBT_MAGIC:
bp->b_ops = &xfs_inobt_buf_ops; bp->b_ops = &xfs_inobt_buf_ops;
break; break;
case XFS_BMAP_CRC_MAGIC: case XFS_BMAP_CRC_MAGIC:
......
...@@ -59,6 +59,7 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v) ...@@ -59,6 +59,7 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v)
{ "abtc2", XFSSTAT_END_ABTC_V2 }, { "abtc2", XFSSTAT_END_ABTC_V2 },
{ "bmbt2", XFSSTAT_END_BMBT_V2 }, { "bmbt2", XFSSTAT_END_BMBT_V2 },
{ "ibt2", XFSSTAT_END_IBT_V2 }, { "ibt2", XFSSTAT_END_IBT_V2 },
{ "fibt2", XFSSTAT_END_FIBT_V2 },
/* we print both series of quota information together */ /* we print both series of quota information together */
{ "qm", XFSSTAT_END_QM }, { "qm", XFSSTAT_END_QM },
}; };
......
...@@ -183,7 +183,23 @@ struct xfsstats { ...@@ -183,7 +183,23 @@ struct xfsstats {
__uint32_t xs_ibt_2_alloc; __uint32_t xs_ibt_2_alloc;
__uint32_t xs_ibt_2_free; __uint32_t xs_ibt_2_free;
__uint32_t xs_ibt_2_moves; __uint32_t xs_ibt_2_moves;
#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_IBT_V2+6) #define XFSSTAT_END_FIBT_V2 (XFSSTAT_END_IBT_V2+15)
__uint32_t xs_fibt_2_lookup;
__uint32_t xs_fibt_2_compare;
__uint32_t xs_fibt_2_insrec;
__uint32_t xs_fibt_2_delrec;
__uint32_t xs_fibt_2_newroot;
__uint32_t xs_fibt_2_killroot;
__uint32_t xs_fibt_2_increment;
__uint32_t xs_fibt_2_decrement;
__uint32_t xs_fibt_2_lshift;
__uint32_t xs_fibt_2_rshift;
__uint32_t xs_fibt_2_split;
__uint32_t xs_fibt_2_join;
__uint32_t xs_fibt_2_alloc;
__uint32_t xs_fibt_2_free;
__uint32_t xs_fibt_2_moves;
#define XFSSTAT_END_XQMSTAT (XFSSTAT_END_FIBT_V2+6)
__uint32_t xs_qm_dqreclaims; __uint32_t xs_qm_dqreclaims;
__uint32_t xs_qm_dqreclaim_misses; __uint32_t xs_qm_dqreclaim_misses;
__uint32_t xs_qm_dquot_dups; __uint32_t xs_qm_dquot_dups;
......
...@@ -134,7 +134,7 @@ typedef enum { ...@@ -134,7 +134,7 @@ typedef enum {
typedef enum { typedef enum {
XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi, XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi,
XFS_BTNUM_MAX XFS_BTNUM_FINOi, XFS_BTNUM_MAX
} xfs_btnum_t; } xfs_btnum_t;
struct xfs_name { struct xfs_name {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment