Commit c6017471 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-5.5-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Darrick Wong:
 "Fix a few bugs that could lead to corrupt files, fsck complaints, and
  filesystem crashes:

   - Minor documentation fixes

   - Fix a file corruption due to read racing with an insert range
     operation.

   - Fix log reservation overflows when allocating large rt extents

   - Fix a buffer log item flags check

   - Don't allow administrators to mount with sunit= options that will
     cause later xfs_repair complaints about the root directory being
     suspicious because the fs geometry appeared inconsistent

   - Fix a non-static helper that should have been static"

* tag 'xfs-5.5-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: Make the symbol 'xfs_rtalloc_log_count' static
  xfs: don't commit sunit/swidth updates to disk if that would cause repair failures
  xfs: split the sunit parameter update into two parts
  xfs: refactor agfl length computation function
  libxfs: resync with the userspace libxfs
  xfs: use bitops interface for buf log item AIL flag check
  xfs: fix log reservation overflows when allocating large rt extents
  xfs: stabilize insert range start boundary to avoid COW writeback race
  xfs: fix Sphinx documentation warning
parents a3965607 5084bf6b
...@@ -253,7 +253,7 @@ The following sysctls are available for the XFS filesystem: ...@@ -253,7 +253,7 @@ The following sysctls are available for the XFS filesystem:
pool. pool.
fs.xfs.speculative_prealloc_lifetime fs.xfs.speculative_prealloc_lifetime
(Units: seconds Min: 1 Default: 300 Max: 86400) (Units: seconds Min: 1 Default: 300 Max: 86400)
The interval at which the background scanning for inodes The interval at which the background scanning for inodes
with unused speculative preallocation runs. The scan with unused speculative preallocation runs. The scan
removes unused preallocation from clean inodes and releases removes unused preallocation from clean inodes and releases
......
...@@ -2248,24 +2248,32 @@ xfs_alloc_longest_free_extent( ...@@ -2248,24 +2248,32 @@ xfs_alloc_longest_free_extent(
return pag->pagf_flcount > 0 || pag->pagf_longest > 0; return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
} }
/*
* Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
* return the largest possible minimum length.
*/
unsigned int unsigned int
xfs_alloc_min_freelist( xfs_alloc_min_freelist(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_perag *pag) struct xfs_perag *pag)
{ {
/* AG btrees have at least 1 level. */
static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
unsigned int min_free; unsigned int min_free;
ASSERT(mp->m_ag_maxlevels > 0);
/* space needed by-bno freespace btree */ /* space needed by-bno freespace btree */
min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1, min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
mp->m_ag_maxlevels); mp->m_ag_maxlevels);
/* space needed by-size freespace btree */ /* space needed by-size freespace btree */
min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1, min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
mp->m_ag_maxlevels); mp->m_ag_maxlevels);
/* space needed reverse mapping used space btree */ /* space needed reverse mapping used space btree */
if (xfs_sb_version_hasrmapbt(&mp->m_sb)) if (xfs_sb_version_hasrmapbt(&mp->m_sb))
min_free += min_t(unsigned int, min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
pag->pagf_levels[XFS_BTNUM_RMAPi] + 1, mp->m_rmap_maxlevels);
mp->m_rmap_maxlevels);
return min_free; return min_free;
} }
......
...@@ -4561,7 +4561,7 @@ xfs_bmapi_convert_delalloc( ...@@ -4561,7 +4561,7 @@ xfs_bmapi_convert_delalloc(
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
struct xfs_bmalloca bma = { NULL }; struct xfs_bmalloca bma = { NULL };
u16 flags = 0; uint16_t flags = 0;
struct xfs_trans *tp; struct xfs_trans *tp;
int error; int error;
...@@ -5972,8 +5972,7 @@ xfs_bmap_insert_extents( ...@@ -5972,8 +5972,7 @@ xfs_bmap_insert_extents(
goto del_cursor; goto del_cursor;
} }
if (XFS_IS_CORRUPT(mp, if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
stop_fsb >= got.br_startoff + got.br_blockcount)) {
error = -EFSCORRUPTED; error = -EFSCORRUPTED;
goto del_cursor; goto del_cursor;
} }
......
...@@ -724,3 +724,24 @@ xfs_dir2_namecheck( ...@@ -724,3 +724,24 @@ xfs_dir2_namecheck(
/* There shouldn't be any slashes or nulls here */ /* There shouldn't be any slashes or nulls here */
return !memchr(name, '/', length) && !memchr(name, 0, length); return !memchr(name, '/', length) && !memchr(name, 0, length);
} }
xfs_dahash_t
xfs_dir2_hashname(
struct xfs_mount *mp,
struct xfs_name *name)
{
if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb)))
return xfs_ascii_ci_hashname(name);
return xfs_da_hashname(name->name, name->len);
}
enum xfs_dacmp
xfs_dir2_compname(
struct xfs_da_args *args,
const unsigned char *name,
int len)
{
if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb)))
return xfs_ascii_ci_compname(args, name, len);
return xfs_da_compname(args, name, len);
}
...@@ -175,6 +175,12 @@ extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); ...@@ -175,6 +175,12 @@ extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_sf_removename(struct xfs_da_args *args); extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
extern int xfs_dir2_sf_replace(struct xfs_da_args *args); extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip); extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip);
int xfs_dir2_sf_entsize(struct xfs_mount *mp,
struct xfs_dir2_sf_hdr *hdr, int len);
void xfs_dir2_sf_put_ino(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep, xfs_ino_t ino);
void xfs_dir2_sf_put_ftype(struct xfs_mount *mp,
struct xfs_dir2_sf_entry *sfep, uint8_t ftype);
/* xfs_dir2_readdir.c */ /* xfs_dir2_readdir.c */
extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp, extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
...@@ -194,25 +200,8 @@ xfs_dir2_data_entsize( ...@@ -194,25 +200,8 @@ xfs_dir2_data_entsize(
return round_up(len, XFS_DIR2_DATA_ALIGN); return round_up(len, XFS_DIR2_DATA_ALIGN);
} }
static inline xfs_dahash_t xfs_dahash_t xfs_dir2_hashname(struct xfs_mount *mp, struct xfs_name *name);
xfs_dir2_hashname( enum xfs_dacmp xfs_dir2_compname(struct xfs_da_args *args,
struct xfs_mount *mp, const unsigned char *name, int len);
struct xfs_name *name)
{
if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb)))
return xfs_ascii_ci_hashname(name);
return xfs_da_hashname(name->name, name->len);
}
static inline enum xfs_dacmp
xfs_dir2_compname(
struct xfs_da_args *args,
const unsigned char *name,
int len)
{
if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb)))
return xfs_ascii_ci_compname(args, name, len);
return xfs_da_compname(args, name, len);
}
#endif /* __XFS_DIR2_PRIV_H__ */ #endif /* __XFS_DIR2_PRIV_H__ */
...@@ -37,7 +37,7 @@ static void xfs_dir2_sf_check(xfs_da_args_t *args); ...@@ -37,7 +37,7 @@ static void xfs_dir2_sf_check(xfs_da_args_t *args);
static void xfs_dir2_sf_toino4(xfs_da_args_t *args); static void xfs_dir2_sf_toino4(xfs_da_args_t *args);
static void xfs_dir2_sf_toino8(xfs_da_args_t *args); static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
static int int
xfs_dir2_sf_entsize( xfs_dir2_sf_entsize(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_dir2_sf_hdr *hdr, struct xfs_dir2_sf_hdr *hdr,
...@@ -84,7 +84,7 @@ xfs_dir2_sf_get_ino( ...@@ -84,7 +84,7 @@ xfs_dir2_sf_get_ino(
return get_unaligned_be64(from) & XFS_MAXINUMBER; return get_unaligned_be64(from) & XFS_MAXINUMBER;
} }
static void void
xfs_dir2_sf_put_ino( xfs_dir2_sf_put_ino(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_dir2_sf_hdr *hdr, struct xfs_dir2_sf_hdr *hdr,
...@@ -145,7 +145,7 @@ xfs_dir2_sf_get_ftype( ...@@ -145,7 +145,7 @@ xfs_dir2_sf_get_ftype(
return XFS_DIR3_FT_UNKNOWN; return XFS_DIR3_FT_UNKNOWN;
} }
static void void
xfs_dir2_sf_put_ftype( xfs_dir2_sf_put_ftype(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_dir2_sf_entry *sfep, struct xfs_dir2_sf_entry *sfep,
......
...@@ -2909,3 +2909,67 @@ xfs_ialloc_setup_geometry( ...@@ -2909,3 +2909,67 @@ xfs_ialloc_setup_geometry(
else else
igeo->ialloc_align = 0; igeo->ialloc_align = 0;
} }
/* Compute the location of the root directory inode that is laid out by mkfs. */
xfs_ino_t
xfs_ialloc_calc_rootino(
struct xfs_mount *mp,
int sunit)
{
struct xfs_ino_geometry *igeo = M_IGEO(mp);
xfs_agblock_t first_bno;
/*
* Pre-calculate the geometry of AG 0. We know what it looks like
* because libxfs knows how to create allocation groups now.
*
* first_bno is the first block in which mkfs could possibly have
* allocated the root directory inode, once we factor in the metadata
* that mkfs formats before it. Namely, the four AG headers...
*/
first_bno = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
/* ...the two free space btree roots... */
first_bno += 2;
/* ...the inode btree root... */
first_bno += 1;
/* ...the initial AGFL... */
first_bno += xfs_alloc_min_freelist(mp, NULL);
/* ...the free inode btree root... */
if (xfs_sb_version_hasfinobt(&mp->m_sb))
first_bno++;
/* ...the reverse mapping btree root... */
if (xfs_sb_version_hasrmapbt(&mp->m_sb))
first_bno++;
/* ...the reference count btree... */
if (xfs_sb_version_hasreflink(&mp->m_sb))
first_bno++;
/*
* ...and the log, if it is allocated in the first allocation group.
*
* This can happen with filesystems that only have a single
* allocation group, or very odd geometries created by old mkfs
* versions on very small filesystems.
*/
if (mp->m_sb.sb_logstart &&
XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0)
first_bno += mp->m_sb.sb_logblocks;
/*
* Now round first_bno up to whatever allocation alignment is given
* by the filesystem or was passed in.
*/
if (xfs_sb_version_hasdalign(&mp->m_sb) && igeo->ialloc_align > 0)
first_bno = roundup(first_bno, sunit);
else if (xfs_sb_version_hasalign(&mp->m_sb) &&
mp->m_sb.sb_inoalignmt > 1)
first_bno = roundup(first_bno, mp->m_sb.sb_inoalignmt);
return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno));
}
...@@ -152,5 +152,6 @@ int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask, ...@@ -152,5 +152,6 @@ int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask,
int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
void xfs_ialloc_setup_geometry(struct xfs_mount *mp); void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit);
#endif /* __XFS_IALLOC_H__ */ #endif /* __XFS_IALLOC_H__ */
...@@ -196,6 +196,24 @@ xfs_calc_inode_chunk_res( ...@@ -196,6 +196,24 @@ xfs_calc_inode_chunk_res(
return res; return res;
} }
/*
* Per-extent log reservation for the btree changes involved in freeing or
* allocating a realtime extent. We have to be able to log as many rtbitmap
* blocks as needed to mark inuse MAXEXTLEN blocks' worth of realtime extents,
* as well as the realtime summary block.
*/
static unsigned int
xfs_rtalloc_log_count(
struct xfs_mount *mp,
unsigned int num_ops)
{
unsigned int blksz = XFS_FSB_TO_B(mp, 1);
unsigned int rtbmp_bytes;
rtbmp_bytes = (MAXEXTLEN / mp->m_sb.sb_rextsize) / NBBY;
return (howmany(rtbmp_bytes, blksz) + 1) * num_ops;
}
/* /*
* Various log reservation values. * Various log reservation values.
* *
...@@ -218,13 +236,21 @@ xfs_calc_inode_chunk_res( ...@@ -218,13 +236,21 @@ xfs_calc_inode_chunk_res(
/* /*
* In a write transaction we can allocate a maximum of 2 * In a write transaction we can allocate a maximum of 2
* extents. This gives: * extents. This gives (t1):
* the inode getting the new extents: inode size * the inode getting the new extents: inode size
* the inode's bmap btree: max depth * block size * the inode's bmap btree: max depth * block size
* the agfs of the ags from which the extents are allocated: 2 * sector * the agfs of the ags from which the extents are allocated: 2 * sector
* the superblock free block counter: sector size * the superblock free block counter: sector size
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
* And the bmap_finish transaction can free bmap blocks in a join: * Or, if we're writing to a realtime file (t2):
* the inode getting the new extents: inode size
* the inode's bmap btree: max depth * block size
* the agfs of the ags from which the extents are allocated: 2 * sector
* the superblock free block counter: sector size
* the realtime bitmap: ((MAXEXTLEN / rtextsize) / NBBY) bytes
* the realtime summary: 1 block
* the allocation btrees: 2 trees * (2 * max depth - 1) * block size
* And the bmap_finish transaction can free bmap blocks in a join (t3):
* the agfs of the ags containing the blocks: 2 * sector size * the agfs of the ags containing the blocks: 2 * sector size
* the agfls of the ags containing the blocks: 2 * sector size * the agfls of the ags containing the blocks: 2 * sector size
* the super block free block counter: sector size * the super block free block counter: sector size
...@@ -234,40 +260,72 @@ STATIC uint ...@@ -234,40 +260,72 @@ STATIC uint
xfs_calc_write_reservation( xfs_calc_write_reservation(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
return XFS_DQUOT_LOGRES(mp) + unsigned int t1, t2, t3;
max((xfs_calc_inode_res(mp, 1) + unsigned int blksz = XFS_FSB_TO_B(mp, 1);
t1 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
t2 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
XFS_FSB_TO_B(mp, 1)) + blksz) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 1), blksz) +
XFS_FSB_TO_B(mp, 1))), xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), blksz);
(xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + } else {
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), t2 = 0;
XFS_FSB_TO_B(mp, 1)))); }
t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
} }
/* /*
* In truncating a file we free up to two extents at once. We can modify: * In truncating a file we free up to two extents at once. We can modify (t1):
* the inode being truncated: inode size * the inode being truncated: inode size
* the inode's bmap btree: (max depth + 1) * block size * the inode's bmap btree: (max depth + 1) * block size
* And the bmap_finish transaction can free the blocks and bmap blocks: * And the bmap_finish transaction can free the blocks and bmap blocks (t2):
* the agf for each of the ags: 4 * sector size * the agf for each of the ags: 4 * sector size
* the agfl for each of the ags: 4 * sector size * the agfl for each of the ags: 4 * sector size
* the super block to reflect the freed blocks: sector size * the super block to reflect the freed blocks: sector size
* worst case split in allocation btrees per extent assuming 4 extents: * worst case split in allocation btrees per extent assuming 4 extents:
* 4 exts * 2 trees * (2 * max depth - 1) * block size * 4 exts * 2 trees * (2 * max depth - 1) * block size
* Or, if it's a realtime file (t3):
* the agf for each of the ags: 2 * sector size
* the agfl for each of the ags: 2 * sector size
* the super block to reflect the freed blocks: sector size
* the realtime bitmap: 2 exts * ((MAXEXTLEN / rtextsize) / NBBY) bytes
* the realtime summary: 2 exts * 1 block
* worst case split in allocation btrees per extent assuming 2 extents:
* 2 exts * 2 trees * (2 * max depth - 1) * block size
*/ */
STATIC uint STATIC uint
xfs_calc_itruncate_reservation( xfs_calc_itruncate_reservation(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
return XFS_DQUOT_LOGRES(mp) + unsigned int t1, t2, t3;
max((xfs_calc_inode_res(mp, 1) + unsigned int blksz = XFS_FSB_TO_B(mp, 1);
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
XFS_FSB_TO_B(mp, 1))), t1 = xfs_calc_inode_res(mp, 1) +
(xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
XFS_FSB_TO_B(mp, 1)))); t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz);
if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
} else {
t3 = 0;
}
return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
} }
/* /*
......
...@@ -992,6 +992,7 @@ xfs_prepare_shift( ...@@ -992,6 +992,7 @@ xfs_prepare_shift(
struct xfs_inode *ip, struct xfs_inode *ip,
loff_t offset) loff_t offset)
{ {
struct xfs_mount *mp = ip->i_mount;
int error; int error;
/* /*
...@@ -1004,6 +1005,17 @@ xfs_prepare_shift( ...@@ -1004,6 +1005,17 @@ xfs_prepare_shift(
return error; return error;
} }
/*
* Shift operations must stabilize the start block offset boundary along
* with the full range of the operation. If we don't, a COW writeback
* completion could race with an insert, front merge with the start
* extent (after split) during the shift and corrupt the file. Start
* with the block just prior to the start to stabilize the boundary.
*/
offset = round_down(offset, 1 << mp->m_sb.sb_blocklog);
if (offset)
offset -= (1 << mp->m_sb.sb_blocklog);
/* /*
* Writeback and invalidate cache for the remainder of the file as we're * Writeback and invalidate cache for the remainder of the file as we're
* about to shift down every extent from offset to EOF. * about to shift down every extent from offset to EOF.
......
...@@ -956,7 +956,7 @@ xfs_buf_item_relse( ...@@ -956,7 +956,7 @@ xfs_buf_item_relse(
struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_buf_log_item *bip = bp->b_log_item;
trace_xfs_buf_item_relse(bp, _RET_IP_); trace_xfs_buf_item_relse(bp, _RET_IP_);
ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
bp->b_log_item = NULL; bp->b_log_item = NULL;
if (list_empty(&bp->b_li_list)) if (list_empty(&bp->b_li_list))
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include "xfs_reflink.h" #include "xfs_reflink.h"
#include "xfs_extent_busy.h" #include "xfs_extent_busy.h"
#include "xfs_health.h" #include "xfs_health.h"
#include "xfs_trace.h"
static DEFINE_MUTEX(xfs_uuid_table_mutex); static DEFINE_MUTEX(xfs_uuid_table_mutex);
static int xfs_uuid_table_size; static int xfs_uuid_table_size;
...@@ -360,66 +360,119 @@ xfs_readsb( ...@@ -360,66 +360,119 @@ xfs_readsb(
} }
/* /*
* Update alignment values based on mount options and sb values * If the sunit/swidth change would move the precomputed root inode value, we
* must reject the ondisk change because repair will stumble over that.
* However, we allow the mount to proceed because we never rejected this
* combination before. Returns true to update the sb, false otherwise.
*/
static inline int
xfs_check_new_dalign(
struct xfs_mount *mp,
int new_dalign,
bool *update_sb)
{
struct xfs_sb *sbp = &mp->m_sb;
xfs_ino_t calc_ino;
calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign);
trace_xfs_check_new_dalign(mp, new_dalign, calc_ino);
if (sbp->sb_rootino == calc_ino) {
*update_sb = true;
return 0;
}
xfs_warn(mp,
"Cannot change stripe alignment; would require moving root inode.");
/*
* XXX: Next time we add a new incompat feature, this should start
* returning -EINVAL to fail the mount. Until then, spit out a warning
* that we're ignoring the administrator's instructions.
*/
xfs_warn(mp, "Skipping superblock stripe alignment update.");
*update_sb = false;
return 0;
}
/*
* If we were provided with new sunit/swidth values as mount options, make sure
* that they pass basic alignment and superblock feature checks, and convert
* them into the same units (FSB) that everything else expects. This step
* /must/ be done before computing the inode geometry.
*/ */
STATIC int STATIC int
xfs_update_alignment(xfs_mount_t *mp) xfs_validate_new_dalign(
struct xfs_mount *mp)
{ {
xfs_sb_t *sbp = &(mp->m_sb); if (mp->m_dalign == 0)
return 0;
if (mp->m_dalign) { /*
* If stripe unit and stripe width are not multiples
* of the fs blocksize turn off alignment.
*/
if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
(BBTOB(mp->m_swidth) & mp->m_blockmask)) {
xfs_warn(mp,
"alignment check failed: sunit/swidth vs. blocksize(%d)",
mp->m_sb.sb_blocksize);
return -EINVAL;
} else {
/* /*
* If stripe unit and stripe width are not multiples * Convert the stripe unit and width to FSBs.
* of the fs blocksize turn off alignment.
*/ */
if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
(BBTOB(mp->m_swidth) & mp->m_blockmask)) { if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) {
xfs_warn(mp, xfs_warn(mp,
"alignment check failed: sunit/swidth vs. blocksize(%d)", "alignment check failed: sunit/swidth vs. agsize(%d)",
sbp->sb_blocksize); mp->m_sb.sb_agblocks);
return -EINVAL; return -EINVAL;
} else { } else if (mp->m_dalign) {
/* mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
* Convert the stripe unit and width to FSBs.
*/
mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
xfs_warn(mp,
"alignment check failed: sunit/swidth vs. agsize(%d)",
sbp->sb_agblocks);
return -EINVAL;
} else if (mp->m_dalign) {
mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
} else {
xfs_warn(mp,
"alignment check failed: sunit(%d) less than bsize(%d)",
mp->m_dalign, sbp->sb_blocksize);
return -EINVAL;
}
}
/*
* Update superblock with new values
* and log changes
*/
if (xfs_sb_version_hasdalign(sbp)) {
if (sbp->sb_unit != mp->m_dalign) {
sbp->sb_unit = mp->m_dalign;
mp->m_update_sb = true;
}
if (sbp->sb_width != mp->m_swidth) {
sbp->sb_width = mp->m_swidth;
mp->m_update_sb = true;
}
} else { } else {
xfs_warn(mp, xfs_warn(mp,
"cannot change alignment: superblock does not support data alignment"); "alignment check failed: sunit(%d) less than bsize(%d)",
mp->m_dalign, mp->m_sb.sb_blocksize);
return -EINVAL; return -EINVAL;
} }
}
if (!xfs_sb_version_hasdalign(&mp->m_sb)) {
xfs_warn(mp,
"cannot change alignment: superblock does not support data alignment");
return -EINVAL;
}
return 0;
}
/* Update alignment values based on mount options and sb values. */
STATIC int
xfs_update_alignment(
struct xfs_mount *mp)
{
struct xfs_sb *sbp = &mp->m_sb;
if (mp->m_dalign) {
bool update_sb;
int error;
if (sbp->sb_unit == mp->m_dalign &&
sbp->sb_width == mp->m_swidth)
return 0;
error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb);
if (error || !update_sb)
return error;
sbp->sb_unit = mp->m_dalign;
sbp->sb_width = mp->m_swidth;
mp->m_update_sb = true;
} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
xfs_sb_version_hasdalign(&mp->m_sb)) { xfs_sb_version_hasdalign(&mp->m_sb)) {
mp->m_dalign = sbp->sb_unit; mp->m_dalign = sbp->sb_unit;
mp->m_swidth = sbp->sb_width; mp->m_swidth = sbp->sb_width;
} }
return 0; return 0;
...@@ -648,12 +701,12 @@ xfs_mountfs( ...@@ -648,12 +701,12 @@ xfs_mountfs(
} }
/* /*
* Check if sb_agblocks is aligned at stripe boundary * If we were given new sunit/swidth options, do some basic validation
* If sb_agblocks is NOT aligned turn off m_dalign since * checks and convert the incore dalign and swidth values to the
* allocator alignment is within an ag, therefore ag has * same units (FSB) that everything else uses. This /must/ happen
* to be aligned at stripe boundary. * before computing the inode geometry.
*/ */
error = xfs_update_alignment(mp); error = xfs_validate_new_dalign(mp);
if (error) if (error)
goto out; goto out;
...@@ -664,6 +717,17 @@ xfs_mountfs( ...@@ -664,6 +717,17 @@ xfs_mountfs(
xfs_rmapbt_compute_maxlevels(mp); xfs_rmapbt_compute_maxlevels(mp);
xfs_refcountbt_compute_maxlevels(mp); xfs_refcountbt_compute_maxlevels(mp);
/*
* Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks
* is NOT aligned turn off m_dalign since allocator alignment is within
* an ag, therefore ag has to be aligned at stripe boundary. Note that
* we must compute the free space and rmap btree geometry before doing
* this.
*/
error = xfs_update_alignment(mp);
if (error)
goto out;
/* enable fail_at_unmount as default */ /* enable fail_at_unmount as default */
mp->m_fail_unmount = true; mp->m_fail_unmount = true;
......
...@@ -3573,6 +3573,27 @@ DEFINE_KMEM_EVENT(kmem_alloc_large); ...@@ -3573,6 +3573,27 @@ DEFINE_KMEM_EVENT(kmem_alloc_large);
DEFINE_KMEM_EVENT(kmem_realloc); DEFINE_KMEM_EVENT(kmem_realloc);
DEFINE_KMEM_EVENT(kmem_zone_alloc); DEFINE_KMEM_EVENT(kmem_zone_alloc);
TRACE_EVENT(xfs_check_new_dalign,
TP_PROTO(struct xfs_mount *mp, int new_dalign, xfs_ino_t calc_rootino),
TP_ARGS(mp, new_dalign, calc_rootino),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(int, new_dalign)
__field(xfs_ino_t, sb_rootino)
__field(xfs_ino_t, calc_rootino)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->new_dalign = new_dalign;
__entry->sb_rootino = mp->m_sb.sb_rootino;
__entry->calc_rootino = calc_rootino;
),
TP_printk("dev %d:%d new_dalign %d sb_rootino %llu calc_rootino %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->new_dalign, __entry->sb_rootino,
__entry->calc_rootino)
)
#endif /* _TRACE_XFS_H */ #endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_PATH
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment