Commit c6017471 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-5.5-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs fixes from Darrick Wong:
 "Fix a few bugs that could lead to corrupt files, fsck complaints, and
  filesystem crashes:

   - Minor documentation fixes

   - Fix a file corruption due to read racing with an insert range
     operation.

   - Fix log reservation overflows when allocating large rt extents

   - Fix a buffer log item flags check

   - Don't allow administrators to mount with sunit= options that will
     cause later xfs_repair complaints about the root directory being
     suspicious because the fs geometry appeared inconsistent

   - Fix a non-static helper that should have been static"

* tag 'xfs-5.5-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: Make the symbol 'xfs_rtalloc_log_count' static
  xfs: don't commit sunit/swidth updates to disk if that would cause repair failures
  xfs: split the sunit parameter update into two parts
  xfs: refactor agfl length computation function
  libxfs: resync with the userspace libxfs
  xfs: use bitops interface for buf log item AIL flag check
  xfs: fix log reservation overflows when allocating large rt extents
  xfs: stabilize insert range start boundary to avoid COW writeback race
  xfs: fix Sphinx documentation warning
parents a3965607 5084bf6b
...@@ -2248,23 +2248,31 @@ xfs_alloc_longest_free_extent( ...@@ -2248,23 +2248,31 @@ xfs_alloc_longest_free_extent(
return pag->pagf_flcount > 0 || pag->pagf_longest > 0; return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
} }
/*
* Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
* return the largest possible minimum length.
*/
unsigned int unsigned int
xfs_alloc_min_freelist( xfs_alloc_min_freelist(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_perag *pag) struct xfs_perag *pag)
{ {
/* AG btrees have at least 1 level. */
static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
unsigned int min_free; unsigned int min_free;
ASSERT(mp->m_ag_maxlevels > 0);
/* space needed by-bno freespace btree */ /* space needed by-bno freespace btree */
min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1, min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
mp->m_ag_maxlevels); mp->m_ag_maxlevels);
/* space needed by-size freespace btree */ /* space needed by-size freespace btree */
min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1, min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
mp->m_ag_maxlevels); mp->m_ag_maxlevels);
/* space needed reverse mapping used space btree */ /* space needed reverse mapping used space btree */
if (xfs_sb_version_hasrmapbt(&mp->m_sb)) if (xfs_sb_version_hasrmapbt(&mp->m_sb))
min_free += min_t(unsigned int, min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
mp->m_rmap_maxlevels); mp->m_rmap_maxlevels);
return min_free; return min_free;
......
...@@ -4561,7 +4561,7 @@ xfs_bmapi_convert_delalloc( ...@@ -4561,7 +4561,7 @@ xfs_bmapi_convert_delalloc(
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
struct xfs_bmalloca bma = { NULL }; struct xfs_bmalloca bma = { NULL };
u16 flags = 0; uint16_t flags = 0;
struct xfs_trans *tp; struct xfs_trans *tp;
int error; int error;
...@@ -5972,8 +5972,7 @@ xfs_bmap_insert_extents( ...@@ -5972,8 +5972,7 @@ xfs_bmap_insert_extents(
goto del_cursor; goto del_cursor;
} }
if (XFS_IS_CORRUPT(mp, if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
stop_fsb >= got.br_startoff + got.br_blockcount)) {
error = -EFSCORRUPTED; error = -EFSCORRUPTED;
goto del_cursor; goto del_cursor;
} }
......
...@@ -724,3 +724,24 @@ xfs_dir2_namecheck( ...@@ -724,3 +724,24 @@ xfs_dir2_namecheck(
/* There shouldn't be any slashes or nulls here */ /* There shouldn't be any slashes or nulls here */
return !memchr(name, '/', length) && !memchr(name, 0, length); return !memchr(name, '/', length) && !memchr(name, 0, length);
} }
xfs_dahash_t
xfs_dir2_hashname(
struct xfs_mount *mp,
struct xfs_name *name)
{
if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb)))
return xfs_ascii_ci_hashname(name);
return xfs_da_hashname(name->name, name->len);
}
enum xfs_dacmp
xfs_dir2_compname(
struct xfs_da_args *args,
const unsigned char *name,
int len)
{
if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb)))
return xfs_ascii_ci_compname(args, name, len);
return xfs_da_compname(args, name, len);
}
...@@ -175,6 +175,12 @@ extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); ...@@ -175,6 +175,12 @@ extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_sf_removename(struct xfs_da_args *args); extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
extern int xfs_dir2_sf_replace(struct xfs_da_args *args); extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip); extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip);
int xfs_dir2_sf_entsize(struct xfs_mount *mp,
struct xfs_dir2_sf_hdr *hdr, int len);
void xfs_dir2_sf_put_ino(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep, xfs_ino_t ino);
void xfs_dir2_sf_put_ftype(struct xfs_mount *mp,
struct xfs_dir2_sf_entry *sfep, uint8_t ftype);
/* xfs_dir2_readdir.c */ /* xfs_dir2_readdir.c */
extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp, extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
...@@ -194,25 +200,8 @@ xfs_dir2_data_entsize( ...@@ -194,25 +200,8 @@ xfs_dir2_data_entsize(
return round_up(len, XFS_DIR2_DATA_ALIGN); return round_up(len, XFS_DIR2_DATA_ALIGN);
} }
static inline xfs_dahash_t xfs_dahash_t xfs_dir2_hashname(struct xfs_mount *mp, struct xfs_name *name);
xfs_dir2_hashname( enum xfs_dacmp xfs_dir2_compname(struct xfs_da_args *args,
struct xfs_mount *mp, const unsigned char *name, int len);
struct xfs_name *name)
{
if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb)))
return xfs_ascii_ci_hashname(name);
return xfs_da_hashname(name->name, name->len);
}
static inline enum xfs_dacmp
xfs_dir2_compname(
struct xfs_da_args *args,
const unsigned char *name,
int len)
{
if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb)))
return xfs_ascii_ci_compname(args, name, len);
return xfs_da_compname(args, name, len);
}
#endif /* __XFS_DIR2_PRIV_H__ */ #endif /* __XFS_DIR2_PRIV_H__ */
...@@ -37,7 +37,7 @@ static void xfs_dir2_sf_check(xfs_da_args_t *args); ...@@ -37,7 +37,7 @@ static void xfs_dir2_sf_check(xfs_da_args_t *args);
static void xfs_dir2_sf_toino4(xfs_da_args_t *args); static void xfs_dir2_sf_toino4(xfs_da_args_t *args);
static void xfs_dir2_sf_toino8(xfs_da_args_t *args); static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
static int int
xfs_dir2_sf_entsize( xfs_dir2_sf_entsize(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_dir2_sf_hdr *hdr, struct xfs_dir2_sf_hdr *hdr,
...@@ -84,7 +84,7 @@ xfs_dir2_sf_get_ino( ...@@ -84,7 +84,7 @@ xfs_dir2_sf_get_ino(
return get_unaligned_be64(from) & XFS_MAXINUMBER; return get_unaligned_be64(from) & XFS_MAXINUMBER;
} }
static void void
xfs_dir2_sf_put_ino( xfs_dir2_sf_put_ino(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_dir2_sf_hdr *hdr, struct xfs_dir2_sf_hdr *hdr,
...@@ -145,7 +145,7 @@ xfs_dir2_sf_get_ftype( ...@@ -145,7 +145,7 @@ xfs_dir2_sf_get_ftype(
return XFS_DIR3_FT_UNKNOWN; return XFS_DIR3_FT_UNKNOWN;
} }
static void void
xfs_dir2_sf_put_ftype( xfs_dir2_sf_put_ftype(
struct xfs_mount *mp, struct xfs_mount *mp,
struct xfs_dir2_sf_entry *sfep, struct xfs_dir2_sf_entry *sfep,
......
...@@ -2909,3 +2909,67 @@ xfs_ialloc_setup_geometry( ...@@ -2909,3 +2909,67 @@ xfs_ialloc_setup_geometry(
else else
igeo->ialloc_align = 0; igeo->ialloc_align = 0;
} }
/* Compute the location of the root directory inode that is laid out by mkfs. */
xfs_ino_t
xfs_ialloc_calc_rootino(
struct xfs_mount *mp,
int sunit)
{
struct xfs_ino_geometry *igeo = M_IGEO(mp);
xfs_agblock_t first_bno;
/*
* Pre-calculate the geometry of AG 0. We know what it looks like
* because libxfs knows how to create allocation groups now.
*
* first_bno is the first block in which mkfs could possibly have
* allocated the root directory inode, once we factor in the metadata
* that mkfs formats before it. Namely, the four AG headers...
*/
first_bno = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
/* ...the two free space btree roots... */
first_bno += 2;
/* ...the inode btree root... */
first_bno += 1;
/* ...the initial AGFL... */
first_bno += xfs_alloc_min_freelist(mp, NULL);
/* ...the free inode btree root... */
if (xfs_sb_version_hasfinobt(&mp->m_sb))
first_bno++;
/* ...the reverse mapping btree root... */
if (xfs_sb_version_hasrmapbt(&mp->m_sb))
first_bno++;
/* ...the reference count btree... */
if (xfs_sb_version_hasreflink(&mp->m_sb))
first_bno++;
/*
* ...and the log, if it is allocated in the first allocation group.
*
* This can happen with filesystems that only have a single
* allocation group, or very odd geometries created by old mkfs
* versions on very small filesystems.
*/
if (mp->m_sb.sb_logstart &&
XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0)
first_bno += mp->m_sb.sb_logblocks;
/*
* Now round first_bno up to whatever allocation alignment is given
* by the filesystem or was passed in.
*/
if (xfs_sb_version_hasdalign(&mp->m_sb) && igeo->ialloc_align > 0)
first_bno = roundup(first_bno, sunit);
else if (xfs_sb_version_hasalign(&mp->m_sb) &&
mp->m_sb.sb_inoalignmt > 1)
first_bno = roundup(first_bno, mp->m_sb.sb_inoalignmt);
return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno));
}
...@@ -152,5 +152,6 @@ int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask, ...@@ -152,5 +152,6 @@ int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask,
int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
void xfs_ialloc_setup_geometry(struct xfs_mount *mp); void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit);
#endif /* __XFS_IALLOC_H__ */ #endif /* __XFS_IALLOC_H__ */
...@@ -196,6 +196,24 @@ xfs_calc_inode_chunk_res( ...@@ -196,6 +196,24 @@ xfs_calc_inode_chunk_res(
return res; return res;
} }
/*
* Per-extent log reservation for the btree changes involved in freeing or
* allocating a realtime extent. We have to be able to log as many rtbitmap
* blocks as needed to mark inuse MAXEXTLEN blocks' worth of realtime extents,
* as well as the realtime summary block.
*/
static unsigned int
xfs_rtalloc_log_count(
struct xfs_mount *mp,
unsigned int num_ops)
{
unsigned int blksz = XFS_FSB_TO_B(mp, 1);
unsigned int rtbmp_bytes;
rtbmp_bytes = (MAXEXTLEN / mp->m_sb.sb_rextsize) / NBBY;
return (howmany(rtbmp_bytes, blksz) + 1) * num_ops;
}
/* /*
* Various log reservation values. * Various log reservation values.
* *
...@@ -218,13 +236,21 @@ xfs_calc_inode_chunk_res( ...@@ -218,13 +236,21 @@ xfs_calc_inode_chunk_res(
/* /*
* In a write transaction we can allocate a maximum of 2 * In a write transaction we can allocate a maximum of 2
* extents. This gives: * extents. This gives (t1):
* the inode getting the new extents: inode size * the inode getting the new extents: inode size
* the inode's bmap btree: max depth * block size * the inode's bmap btree: max depth * block size
* the agfs of the ags from which the extents are allocated: 2 * sector * the agfs of the ags from which the extents are allocated: 2 * sector
* the superblock free block counter: sector size * the superblock free block counter: sector size
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
* And the bmap_finish transaction can free bmap blocks in a join: * Or, if we're writing to a realtime file (t2):
* the inode getting the new extents: inode size
* the inode's bmap btree: max depth * block size
* the agfs of the ags from which the extents are allocated: 2 * sector
* the superblock free block counter: sector size
* the realtime bitmap: ((MAXEXTLEN / rtextsize) / NBBY) bytes
* the realtime summary: 1 block
* the allocation btrees: 2 trees * (2 * max depth - 1) * block size
* And the bmap_finish transaction can free bmap blocks in a join (t3):
* the agfs of the ags containing the blocks: 2 * sector size * the agfs of the ags containing the blocks: 2 * sector size
* the agfls of the ags containing the blocks: 2 * sector size * the agfls of the ags containing the blocks: 2 * sector size
* the super block free block counter: sector size * the super block free block counter: sector size
...@@ -234,40 +260,72 @@ STATIC uint ...@@ -234,40 +260,72 @@ STATIC uint
xfs_calc_write_reservation( xfs_calc_write_reservation(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
return XFS_DQUOT_LOGRES(mp) + unsigned int t1, t2, t3;
max((xfs_calc_inode_res(mp, 1) + unsigned int blksz = XFS_FSB_TO_B(mp, 1);
t1 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
t2 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
XFS_FSB_TO_B(mp, 1)) + blksz) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 1), blksz) +
XFS_FSB_TO_B(mp, 1))), xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), blksz);
(xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + } else {
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), t2 = 0;
XFS_FSB_TO_B(mp, 1)))); }
t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
} }
/* /*
* In truncating a file we free up to two extents at once. We can modify: * In truncating a file we free up to two extents at once. We can modify (t1):
* the inode being truncated: inode size * the inode being truncated: inode size
* the inode's bmap btree: (max depth + 1) * block size * the inode's bmap btree: (max depth + 1) * block size
* And the bmap_finish transaction can free the blocks and bmap blocks: * And the bmap_finish transaction can free the blocks and bmap blocks (t2):
* the agf for each of the ags: 4 * sector size * the agf for each of the ags: 4 * sector size
* the agfl for each of the ags: 4 * sector size * the agfl for each of the ags: 4 * sector size
* the super block to reflect the freed blocks: sector size * the super block to reflect the freed blocks: sector size
* worst case split in allocation btrees per extent assuming 4 extents: * worst case split in allocation btrees per extent assuming 4 extents:
* 4 exts * 2 trees * (2 * max depth - 1) * block size * 4 exts * 2 trees * (2 * max depth - 1) * block size
* Or, if it's a realtime file (t3):
* the agf for each of the ags: 2 * sector size
* the agfl for each of the ags: 2 * sector size
* the super block to reflect the freed blocks: sector size
* the realtime bitmap: 2 exts * ((MAXEXTLEN / rtextsize) / NBBY) bytes
* the realtime summary: 2 exts * 1 block
* worst case split in allocation btrees per extent assuming 2 extents:
* 2 exts * 2 trees * (2 * max depth - 1) * block size
*/ */
STATIC uint STATIC uint
xfs_calc_itruncate_reservation( xfs_calc_itruncate_reservation(
struct xfs_mount *mp) struct xfs_mount *mp)
{ {
return XFS_DQUOT_LOGRES(mp) + unsigned int t1, t2, t3;
max((xfs_calc_inode_res(mp, 1) + unsigned int blksz = XFS_FSB_TO_B(mp, 1);
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
XFS_FSB_TO_B(mp, 1))), t1 = xfs_calc_inode_res(mp, 1) +
(xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
XFS_FSB_TO_B(mp, 1)))); t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz);
if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) +
xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
} else {
t3 = 0;
}
return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
} }
/* /*
......
...@@ -992,6 +992,7 @@ xfs_prepare_shift( ...@@ -992,6 +992,7 @@ xfs_prepare_shift(
struct xfs_inode *ip, struct xfs_inode *ip,
loff_t offset) loff_t offset)
{ {
struct xfs_mount *mp = ip->i_mount;
int error; int error;
/* /*
...@@ -1004,6 +1005,17 @@ xfs_prepare_shift( ...@@ -1004,6 +1005,17 @@ xfs_prepare_shift(
return error; return error;
} }
/*
* Shift operations must stabilize the start block offset boundary along
* with the full range of the operation. If we don't, a COW writeback
* completion could race with an insert, front merge with the start
* extent (after split) during the shift and corrupt the file. Start
* with the block just prior to the start to stabilize the boundary.
*/
offset = round_down(offset, 1 << mp->m_sb.sb_blocklog);
if (offset)
offset -= (1 << mp->m_sb.sb_blocklog);
/* /*
* Writeback and invalidate cache for the remainder of the file as we're * Writeback and invalidate cache for the remainder of the file as we're
* about to shift down every extent from offset to EOF. * about to shift down every extent from offset to EOF.
......
...@@ -956,7 +956,7 @@ xfs_buf_item_relse( ...@@ -956,7 +956,7 @@ xfs_buf_item_relse(
struct xfs_buf_log_item *bip = bp->b_log_item; struct xfs_buf_log_item *bip = bp->b_log_item;
trace_xfs_buf_item_relse(bp, _RET_IP_); trace_xfs_buf_item_relse(bp, _RET_IP_);
ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL)); ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
bp->b_log_item = NULL; bp->b_log_item = NULL;
if (list_empty(&bp->b_li_list)) if (list_empty(&bp->b_li_list))
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include "xfs_reflink.h" #include "xfs_reflink.h"
#include "xfs_extent_busy.h" #include "xfs_extent_busy.h"
#include "xfs_health.h" #include "xfs_health.h"
#include "xfs_trace.h"
static DEFINE_MUTEX(xfs_uuid_table_mutex); static DEFINE_MUTEX(xfs_uuid_table_mutex);
static int xfs_uuid_table_size; static int xfs_uuid_table_size;
...@@ -360,14 +360,54 @@ xfs_readsb( ...@@ -360,14 +360,54 @@ xfs_readsb(
} }
/* /*
* Update alignment values based on mount options and sb values * If the sunit/swidth change would move the precomputed root inode value, we
* must reject the ondisk change because repair will stumble over that.
* However, we allow the mount to proceed because we never rejected this
* combination before. Returns true to update the sb, false otherwise.
*/
static inline int
xfs_check_new_dalign(
struct xfs_mount *mp,
int new_dalign,
bool *update_sb)
{
struct xfs_sb *sbp = &mp->m_sb;
xfs_ino_t calc_ino;
calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign);
trace_xfs_check_new_dalign(mp, new_dalign, calc_ino);
if (sbp->sb_rootino == calc_ino) {
*update_sb = true;
return 0;
}
xfs_warn(mp,
"Cannot change stripe alignment; would require moving root inode.");
/*
* XXX: Next time we add a new incompat feature, this should start
* returning -EINVAL to fail the mount. Until then, spit out a warning
* that we're ignoring the administrator's instructions.
*/
xfs_warn(mp, "Skipping superblock stripe alignment update.");
*update_sb = false;
return 0;
}
/*
* If we were provided with new sunit/swidth values as mount options, make sure
* that they pass basic alignment and superblock feature checks, and convert
* them into the same units (FSB) that everything else expects. This step
* /must/ be done before computing the inode geometry.
*/ */
STATIC int STATIC int
xfs_update_alignment(xfs_mount_t *mp) xfs_validate_new_dalign(
struct xfs_mount *mp)
{ {
xfs_sb_t *sbp = &(mp->m_sb); if (mp->m_dalign == 0)
return 0;
if (mp->m_dalign) {
/* /*
* If stripe unit and stripe width are not multiples * If stripe unit and stripe width are not multiples
* of the fs blocksize turn off alignment. * of the fs blocksize turn off alignment.
...@@ -376,46 +416,59 @@ xfs_update_alignment(xfs_mount_t *mp) ...@@ -376,46 +416,59 @@ xfs_update_alignment(xfs_mount_t *mp)
(BBTOB(mp->m_swidth) & mp->m_blockmask)) { (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
xfs_warn(mp, xfs_warn(mp,
"alignment check failed: sunit/swidth vs. blocksize(%d)", "alignment check failed: sunit/swidth vs. blocksize(%d)",
sbp->sb_blocksize); mp->m_sb.sb_blocksize);
return -EINVAL; return -EINVAL;
} else { } else {
/* /*
* Convert the stripe unit and width to FSBs. * Convert the stripe unit and width to FSBs.
*/ */
mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) {
xfs_warn(mp, xfs_warn(mp,
"alignment check failed: sunit/swidth vs. agsize(%d)", "alignment check failed: sunit/swidth vs. agsize(%d)",
sbp->sb_agblocks); mp->m_sb.sb_agblocks);
return -EINVAL; return -EINVAL;
} else if (mp->m_dalign) { } else if (mp->m_dalign) {
mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
} else { } else {
xfs_warn(mp, xfs_warn(mp,
"alignment check failed: sunit(%d) less than bsize(%d)", "alignment check failed: sunit(%d) less than bsize(%d)",
mp->m_dalign, sbp->sb_blocksize); mp->m_dalign, mp->m_sb.sb_blocksize);
return -EINVAL; return -EINVAL;
} }
} }
/* if (!xfs_sb_version_hasdalign(&mp->m_sb)) {
* Update superblock with new values
* and log changes
*/
if (xfs_sb_version_hasdalign(sbp)) {
if (sbp->sb_unit != mp->m_dalign) {
sbp->sb_unit = mp->m_dalign;
mp->m_update_sb = true;
}
if (sbp->sb_width != mp->m_swidth) {
sbp->sb_width = mp->m_swidth;
mp->m_update_sb = true;
}
} else {
xfs_warn(mp, xfs_warn(mp,
"cannot change alignment: superblock does not support data alignment"); "cannot change alignment: superblock does not support data alignment");
return -EINVAL; return -EINVAL;
} }
return 0;
}
/* Update alignment values based on mount options and sb values. */
STATIC int
xfs_update_alignment(
struct xfs_mount *mp)
{
struct xfs_sb *sbp = &mp->m_sb;
if (mp->m_dalign) {
bool update_sb;
int error;
if (sbp->sb_unit == mp->m_dalign &&
sbp->sb_width == mp->m_swidth)
return 0;
error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb);
if (error || !update_sb)
return error;
sbp->sb_unit = mp->m_dalign;
sbp->sb_width = mp->m_swidth;
mp->m_update_sb = true;
} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN && } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
xfs_sb_version_hasdalign(&mp->m_sb)) { xfs_sb_version_hasdalign(&mp->m_sb)) {
mp->m_dalign = sbp->sb_unit; mp->m_dalign = sbp->sb_unit;
...@@ -648,12 +701,12 @@ xfs_mountfs( ...@@ -648,12 +701,12 @@ xfs_mountfs(
} }
/* /*
* Check if sb_agblocks is aligned at stripe boundary * If we were given new sunit/swidth options, do some basic validation
* If sb_agblocks is NOT aligned turn off m_dalign since * checks and convert the incore dalign and swidth values to the
* allocator alignment is within an ag, therefore ag has * same units (FSB) that everything else uses. This /must/ happen
* to be aligned at stripe boundary. * before computing the inode geometry.
*/ */
error = xfs_update_alignment(mp); error = xfs_validate_new_dalign(mp);
if (error) if (error)
goto out; goto out;
...@@ -664,6 +717,17 @@ xfs_mountfs( ...@@ -664,6 +717,17 @@ xfs_mountfs(
xfs_rmapbt_compute_maxlevels(mp); xfs_rmapbt_compute_maxlevels(mp);
xfs_refcountbt_compute_maxlevels(mp); xfs_refcountbt_compute_maxlevels(mp);
/*
* Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks
* is NOT aligned turn off m_dalign since allocator alignment is within
* an ag, therefore ag has to be aligned at stripe boundary. Note that
* we must compute the free space and rmap btree geometry before doing
* this.
*/
error = xfs_update_alignment(mp);
if (error)
goto out;
/* enable fail_at_unmount as default */ /* enable fail_at_unmount as default */
mp->m_fail_unmount = true; mp->m_fail_unmount = true;
......
...@@ -3573,6 +3573,27 @@ DEFINE_KMEM_EVENT(kmem_alloc_large); ...@@ -3573,6 +3573,27 @@ DEFINE_KMEM_EVENT(kmem_alloc_large);
DEFINE_KMEM_EVENT(kmem_realloc); DEFINE_KMEM_EVENT(kmem_realloc);
DEFINE_KMEM_EVENT(kmem_zone_alloc); DEFINE_KMEM_EVENT(kmem_zone_alloc);
TRACE_EVENT(xfs_check_new_dalign,
TP_PROTO(struct xfs_mount *mp, int new_dalign, xfs_ino_t calc_rootino),
TP_ARGS(mp, new_dalign, calc_rootino),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(int, new_dalign)
__field(xfs_ino_t, sb_rootino)
__field(xfs_ino_t, calc_rootino)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->new_dalign = new_dalign;
__entry->sb_rootino = mp->m_sb.sb_rootino;
__entry->calc_rootino = calc_rootino;
),
TP_printk("dev %d:%d new_dalign %d sb_rootino %llu calc_rootino %llu",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->new_dalign, __entry->sb_rootino,
__entry->calc_rootino)
)
#endif /* _TRACE_XFS_H */ #endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_PATH
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment