Commit 11fc88c2 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'xfs-5.17-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull xfs updates from Darrick Wong:
 "The big new feature here is that the mount code now only bothers to
  try to free stale COW staging extents if the fs unmounted uncleanly.
  This should reduce mount times, particularly on filesystems supporting
  reflink and containing a large number of allocation groups.

  Everything else this cycle are bugfixes, as the iomap folios
  conversion should be plenty enough excitement for anyone. That and I
  ran out of brain bandwidth after Thanksgiving last year.

  Summary:

   - Fix log recovery with da btree buffers when metauuid is in use.

   - Fix type coercion problems in xattr buffer size validation.

   - Fix a bug in online scrub dir leaf bestcount checking.

   - Only run COW recovery when recovering the log.

   - Fix symlink target buffer UAF problems and symlink locking problems
     by not exposing xfs innards to the VFS.

   - Fix incorrect quotaoff lock usage.

   - Don't let transactions cancel cleanly if they have deferred work
     items attached.

   - Fix a UAF when we're deciding if we need to relog an intent item.

   - Reduce kvmalloc overhead for log shadow buffers.

   - Clean up sysfs attr group usage.

   - Fix a bug where scrub's bmap/rmap checking could race with a quota
     file block allocation due to insufficient locking.

   - Teach scrub to complain about invalid project ids"

* tag 'xfs-5.17-merge-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  xfs: warn about inodes with project id of -1
  xfs: hold quota inode ILOCK_EXCL until the end of dqalloc
  xfs: Remove redundant assignment of mp
  xfs: reduce kvmalloc overhead for CIL shadow buffers
  xfs: sysfs: use default_groups in kobj_type
  xfs: prevent UAF in xfs_log_item_in_current_chkpt
  xfs: prevent a WARN_ONCE() in xfs_ioc_attr_list()
  xfs: Fix comments mentioning xfs_ialloc
  xfs: check sb_meta_uuid for dabuf buffer recovery
  xfs: fix a bug in the online fsck directory leaf1 bestcount check
  xfs: only run COW extent recovery when there are no live extents
  xfs: don't expose internal symlink metadata buffers to the vfs
  xfs: fix quotaoff mutex usage now that we don't support disabling it
  xfs: shut down filesystem if we xfs_trans_cancel with deferred work items
parents d601e58c 7e937bb3
...@@ -497,6 +497,7 @@ STATIC int ...@@ -497,6 +497,7 @@ STATIC int
xchk_directory_leaf1_bestfree( xchk_directory_leaf1_bestfree(
struct xfs_scrub *sc, struct xfs_scrub *sc,
struct xfs_da_args *args, struct xfs_da_args *args,
xfs_dir2_db_t last_data_db,
xfs_dablk_t lblk) xfs_dablk_t lblk)
{ {
struct xfs_dir3_icleaf_hdr leafhdr; struct xfs_dir3_icleaf_hdr leafhdr;
...@@ -534,10 +535,14 @@ xchk_directory_leaf1_bestfree( ...@@ -534,10 +535,14 @@ xchk_directory_leaf1_bestfree(
} }
/* /*
* There should be as many bestfree slots as there are dir data * There must be enough bestfree slots to cover all the directory data
* blocks that can fit under i_size. * blocks that we scanned. It is possible for there to be a hole
* between the last data block and i_disk_size. This seems like an
* oversight to the scrub author, but as we have been writing out
* directories like this (and xfs_repair doesn't mind them) for years,
* that's what we have to check.
*/ */
if (bestcount != xfs_dir2_byte_to_db(geo, sc->ip->i_disk_size)) { if (bestcount != last_data_db + 1) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
goto out; goto out;
} }
...@@ -669,6 +674,7 @@ xchk_directory_blocks( ...@@ -669,6 +674,7 @@ xchk_directory_blocks(
xfs_fileoff_t lblk; xfs_fileoff_t lblk;
struct xfs_iext_cursor icur; struct xfs_iext_cursor icur;
xfs_dablk_t dabno; xfs_dablk_t dabno;
xfs_dir2_db_t last_data_db = 0;
bool found; bool found;
int is_block = 0; int is_block = 0;
int error; int error;
...@@ -712,6 +718,7 @@ xchk_directory_blocks( ...@@ -712,6 +718,7 @@ xchk_directory_blocks(
args.geo->fsbcount); args.geo->fsbcount);
lblk < got.br_startoff + got.br_blockcount; lblk < got.br_startoff + got.br_blockcount;
lblk += args.geo->fsbcount) { lblk += args.geo->fsbcount) {
last_data_db = xfs_dir2_da_to_db(args.geo, lblk);
error = xchk_directory_data_bestfree(sc, lblk, error = xchk_directory_data_bestfree(sc, lblk,
is_block); is_block);
if (error) if (error)
...@@ -734,7 +741,7 @@ xchk_directory_blocks( ...@@ -734,7 +741,7 @@ xchk_directory_blocks(
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk); xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
goto out; goto out;
} }
error = xchk_directory_leaf1_bestfree(sc, &args, error = xchk_directory_leaf1_bestfree(sc, &args, last_data_db,
leaf_lblk); leaf_lblk);
if (error) if (error)
goto out; goto out;
......
...@@ -233,6 +233,7 @@ xchk_dinode( ...@@ -233,6 +233,7 @@ xchk_dinode(
unsigned long long isize; unsigned long long isize;
uint64_t flags2; uint64_t flags2;
uint32_t nextents; uint32_t nextents;
prid_t prid;
uint16_t flags; uint16_t flags;
uint16_t mode; uint16_t mode;
...@@ -267,6 +268,7 @@ xchk_dinode( ...@@ -267,6 +268,7 @@ xchk_dinode(
* so just mark this inode for preening. * so just mark this inode for preening.
*/ */
xchk_ino_set_preen(sc, ino); xchk_ino_set_preen(sc, ino);
prid = 0;
break; break;
case 2: case 2:
case 3: case 3:
...@@ -279,12 +281,17 @@ xchk_dinode( ...@@ -279,12 +281,17 @@ xchk_dinode(
if (dip->di_projid_hi != 0 && if (dip->di_projid_hi != 0 &&
!xfs_has_projid32(mp)) !xfs_has_projid32(mp))
xchk_ino_set_corrupt(sc, ino); xchk_ino_set_corrupt(sc, ino);
prid = be16_to_cpu(dip->di_projid_lo);
break; break;
default: default:
xchk_ino_set_corrupt(sc, ino); xchk_ino_set_corrupt(sc, ino);
return; return;
} }
if (xfs_has_projid32(mp))
prid |= (prid_t)be16_to_cpu(dip->di_projid_hi) << 16;
/* /*
* di_uid/di_gid -- -1 isn't invalid, but there's no way that * di_uid/di_gid -- -1 isn't invalid, but there's no way that
* userspace could have created that. * userspace could have created that.
...@@ -293,6 +300,13 @@ xchk_dinode( ...@@ -293,6 +300,13 @@ xchk_dinode(
dip->di_gid == cpu_to_be32(-1U)) dip->di_gid == cpu_to_be32(-1U))
xchk_ino_set_warning(sc, ino); xchk_ino_set_warning(sc, ino);
/*
* project id of -1 isn't supposed to be valid, but the kernel didn't
* always validate that.
*/
if (prid == -1U)
xchk_ino_set_warning(sc, ino);
/* di_format */ /* di_format */
switch (dip->di_format) { switch (dip->di_format) {
case XFS_DINODE_FMT_DEV: case XFS_DINODE_FMT_DEV:
......
...@@ -48,10 +48,10 @@ xchk_setup_quota( ...@@ -48,10 +48,10 @@ xchk_setup_quota(
dqtype = xchk_quota_to_dqtype(sc); dqtype = xchk_quota_to_dqtype(sc);
if (dqtype == 0) if (dqtype == 0)
return -EINVAL; return -EINVAL;
sc->flags |= XCHK_HAS_QUOTAOFFLOCK;
mutex_lock(&sc->mp->m_quotainfo->qi_quotaofflock);
if (!xfs_this_quota_on(sc->mp, dqtype)) if (!xfs_this_quota_on(sc->mp, dqtype))
return -ENOENT; return -ENOENT;
error = xchk_setup_fs(sc); error = xchk_setup_fs(sc);
if (error) if (error)
return error; return error;
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "xfs_ag.h" #include "xfs_ag.h"
#include "xfs_ag_resv.h" #include "xfs_ag_resv.h"
#include "xfs_quota.h" #include "xfs_quota.h"
#include "xfs_qm.h"
#include "scrub/scrub.h" #include "scrub/scrub.h"
#include "scrub/common.h" #include "scrub/common.h"
#include "scrub/trace.h" #include "scrub/trace.h"
...@@ -912,11 +913,13 @@ xrep_force_quotacheck( ...@@ -912,11 +913,13 @@ xrep_force_quotacheck(
if (!(flag & sc->mp->m_qflags)) if (!(flag & sc->mp->m_qflags))
return; return;
mutex_lock(&sc->mp->m_quotainfo->qi_quotaofflock);
sc->mp->m_qflags &= ~flag; sc->mp->m_qflags &= ~flag;
spin_lock(&sc->mp->m_sb_lock); spin_lock(&sc->mp->m_sb_lock);
sc->mp->m_sb.sb_qflags &= ~flag; sc->mp->m_sb.sb_qflags &= ~flag;
spin_unlock(&sc->mp->m_sb_lock); spin_unlock(&sc->mp->m_sb_lock);
xfs_log_sb(sc->tp); xfs_log_sb(sc->tp);
mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock);
} }
/* /*
......
...@@ -173,10 +173,6 @@ xchk_teardown( ...@@ -173,10 +173,6 @@ xchk_teardown(
mnt_drop_write_file(sc->file); mnt_drop_write_file(sc->file);
if (sc->flags & XCHK_REAPING_DISABLED) if (sc->flags & XCHK_REAPING_DISABLED)
xchk_start_reaping(sc); xchk_start_reaping(sc);
if (sc->flags & XCHK_HAS_QUOTAOFFLOCK) {
mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock);
sc->flags &= ~XCHK_HAS_QUOTAOFFLOCK;
}
if (sc->buf) { if (sc->buf) {
kmem_free(sc->buf); kmem_free(sc->buf);
sc->buf = NULL; sc->buf = NULL;
......
...@@ -88,7 +88,6 @@ struct xfs_scrub { ...@@ -88,7 +88,6 @@ struct xfs_scrub {
/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */ /* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
#define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */ #define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */
#define XCHK_HAS_QUOTAOFFLOCK (1 << 1) /* we hold the quotaoff lock */
#define XCHK_REAPING_DISABLED (1 << 2) /* background block reaping paused */ #define XCHK_REAPING_DISABLED (1 << 2) /* background block reaping paused */
#define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */ #define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */
......
...@@ -816,7 +816,7 @@ xlog_recover_get_buf_lsn( ...@@ -816,7 +816,7 @@ xlog_recover_get_buf_lsn(
} }
if (lsn != (xfs_lsn_t)-1) { if (lsn != (xfs_lsn_t)-1) {
if (!uuid_equal(&mp->m_sb.sb_uuid, uuid)) if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid))
goto recover_immediately; goto recover_immediately;
return lsn; return lsn;
} }
......
...@@ -289,13 +289,12 @@ xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp) ...@@ -289,13 +289,12 @@ xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
*/ */
STATIC int STATIC int
xfs_dquot_disk_alloc( xfs_dquot_disk_alloc(
struct xfs_trans **tpp,
struct xfs_dquot *dqp, struct xfs_dquot *dqp,
struct xfs_buf **bpp) struct xfs_buf **bpp)
{ {
struct xfs_bmbt_irec map; struct xfs_bmbt_irec map;
struct xfs_trans *tp = *tpp; struct xfs_trans *tp;
struct xfs_mount *mp = tp->t_mountp; struct xfs_mount *mp = dqp->q_mount;
struct xfs_buf *bp; struct xfs_buf *bp;
xfs_dqtype_t qtype = xfs_dquot_type(dqp); xfs_dqtype_t qtype = xfs_dquot_type(dqp);
struct xfs_inode *quotip = xfs_quota_inode(mp, qtype); struct xfs_inode *quotip = xfs_quota_inode(mp, qtype);
...@@ -304,29 +303,35 @@ xfs_dquot_disk_alloc( ...@@ -304,29 +303,35 @@ xfs_dquot_disk_alloc(
trace_xfs_dqalloc(dqp); trace_xfs_dqalloc(dqp);
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc,
XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
if (error)
return error;
xfs_ilock(quotip, XFS_ILOCK_EXCL); xfs_ilock(quotip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, quotip, 0);
if (!xfs_this_quota_on(dqp->q_mount, qtype)) { if (!xfs_this_quota_on(dqp->q_mount, qtype)) {
/* /*
* Return if this type of quotas is turned off while we didn't * Return if this type of quotas is turned off while we didn't
* have an inode lock * have an inode lock
*/ */
xfs_iunlock(quotip, XFS_ILOCK_EXCL); error = -ESRCH;
return -ESRCH; goto err_cancel;
} }
xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
error = xfs_iext_count_may_overflow(quotip, XFS_DATA_FORK, error = xfs_iext_count_may_overflow(quotip, XFS_DATA_FORK,
XFS_IEXT_ADD_NOSPLIT_CNT); XFS_IEXT_ADD_NOSPLIT_CNT);
if (error) if (error)
return error; goto err_cancel;
/* Create the block mapping. */ /* Create the block mapping. */
error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset, error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset,
XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 0, &map, XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 0, &map,
&nmaps); &nmaps);
if (error) if (error)
return error; goto err_cancel;
ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
ASSERT(nmaps == 1); ASSERT(nmaps == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) && ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
...@@ -341,7 +346,7 @@ xfs_dquot_disk_alloc( ...@@ -341,7 +346,7 @@ xfs_dquot_disk_alloc(
error = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno, error = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno,
mp->m_quotainfo->qi_dqchunklen, 0, &bp); mp->m_quotainfo->qi_dqchunklen, 0, &bp);
if (error) if (error)
return error; goto err_cancel;
bp->b_ops = &xfs_dquot_buf_ops; bp->b_ops = &xfs_dquot_buf_ops;
/* /*
...@@ -371,16 +376,25 @@ xfs_dquot_disk_alloc( ...@@ -371,16 +376,25 @@ xfs_dquot_disk_alloc(
* is responsible for unlocking any buffer passed back, either * is responsible for unlocking any buffer passed back, either
* manually or by committing the transaction. On error, the buffer is * manually or by committing the transaction. On error, the buffer is
* released and not passed back. * released and not passed back.
*
* Keep the quota inode ILOCKed until after the transaction commit to
* maintain the atomicity of bmap/rmap updates.
*/ */
xfs_trans_bhold(tp, bp); xfs_trans_bhold(tp, bp);
error = xfs_defer_finish(tpp); error = xfs_trans_commit(tp);
xfs_iunlock(quotip, XFS_ILOCK_EXCL);
if (error) { if (error) {
xfs_trans_bhold_release(*tpp, bp); xfs_buf_relse(bp);
xfs_trans_brelse(*tpp, bp);
return error; return error;
} }
*bpp = bp; *bpp = bp;
return 0; return 0;
err_cancel:
xfs_trans_cancel(tp);
xfs_iunlock(quotip, XFS_ILOCK_EXCL);
return error;
} }
/* /*
...@@ -629,43 +643,6 @@ xfs_dquot_to_disk( ...@@ -629,43 +643,6 @@ xfs_dquot_to_disk(
ddqp->d_rtbtimer = xfs_dquot_to_disk_ts(dqp, dqp->q_rtb.timer); ddqp->d_rtbtimer = xfs_dquot_to_disk_ts(dqp, dqp->q_rtb.timer);
} }
/* Allocate and initialize the dquot buffer for this in-core dquot. */
static int
xfs_qm_dqread_alloc(
struct xfs_mount *mp,
struct xfs_dquot *dqp,
struct xfs_buf **bpp)
{
struct xfs_trans *tp;
int error;
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc,
XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp);
if (error)
goto err;
error = xfs_dquot_disk_alloc(&tp, dqp, bpp);
if (error)
goto err_cancel;
error = xfs_trans_commit(tp);
if (error) {
/*
* Buffer was held to the transaction, so we have to unlock it
* manually here because we're not passing it back.
*/
xfs_buf_relse(*bpp);
*bpp = NULL;
goto err;
}
return 0;
err_cancel:
xfs_trans_cancel(tp);
err:
return error;
}
/* /*
* Read in the ondisk dquot using dqtobp() then copy it to an incore version, * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
* and release the buffer immediately. If @can_alloc is true, fill any * and release the buffer immediately. If @can_alloc is true, fill any
...@@ -689,7 +666,7 @@ xfs_qm_dqread( ...@@ -689,7 +666,7 @@ xfs_qm_dqread(
/* Try to read the buffer, allocating if necessary. */ /* Try to read the buffer, allocating if necessary. */
error = xfs_dquot_disk_read(mp, dqp, &bp); error = xfs_dquot_disk_read(mp, dqp, &bp);
if (error == -ENOENT && can_alloc) if (error == -ENOENT && can_alloc)
error = xfs_qm_dqread_alloc(mp, dqp, &bp); error = xfs_dquot_disk_alloc(dqp, &bp);
if (error) if (error)
goto err; goto err;
......
...@@ -213,11 +213,12 @@ static struct attribute *xfs_errortag_attrs[] = { ...@@ -213,11 +213,12 @@ static struct attribute *xfs_errortag_attrs[] = {
XFS_ERRORTAG_ATTR_LIST(ag_resv_fail), XFS_ERRORTAG_ATTR_LIST(ag_resv_fail),
NULL, NULL,
}; };
ATTRIBUTE_GROUPS(xfs_errortag);
static struct kobj_type xfs_errortag_ktype = { static struct kobj_type xfs_errortag_ktype = {
.release = xfs_sysfs_release, .release = xfs_sysfs_release,
.sysfs_ops = &xfs_errortag_sysfs_ops, .sysfs_ops = &xfs_errortag_sysfs_ops,
.default_attrs = xfs_errortag_attrs, .default_groups = xfs_errortag_groups,
}; };
int int
......
...@@ -749,7 +749,8 @@ xfs_iget( ...@@ -749,7 +749,8 @@ xfs_iget(
/* /*
* If we have a real type for an on-disk inode, we can setup the inode * If we have a real type for an on-disk inode, we can setup the inode
* now. If it's a new inode being created, xfs_ialloc will handle it. * now. If it's a new inode being created, xfs_init_new_inode will
* handle it.
*/ */
if (xfs_iflags_test(ip, XFS_INEW) && VFS_I(ip)->i_mode != 0) if (xfs_iflags_test(ip, XFS_INEW) && VFS_I(ip)->i_mode != 0)
xfs_setup_existing_inode(ip); xfs_setup_existing_inode(ip);
......
...@@ -372,7 +372,7 @@ int ...@@ -372,7 +372,7 @@ int
xfs_ioc_attr_list( xfs_ioc_attr_list(
struct xfs_inode *dp, struct xfs_inode *dp,
void __user *ubuf, void __user *ubuf,
int bufsize, size_t bufsize,
int flags, int flags,
struct xfs_attrlist_cursor __user *ucursor) struct xfs_attrlist_cursor __user *ucursor)
{ {
......
...@@ -38,8 +38,9 @@ xfs_readlink_by_handle( ...@@ -38,8 +38,9 @@ xfs_readlink_by_handle(
int xfs_ioc_attrmulti_one(struct file *parfilp, struct inode *inode, int xfs_ioc_attrmulti_one(struct file *parfilp, struct inode *inode,
uint32_t opcode, void __user *uname, void __user *value, uint32_t opcode, void __user *uname, void __user *value,
uint32_t *len, uint32_t flags); uint32_t *len, uint32_t flags);
int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf, int bufsize, int xfs_ioc_attr_list(struct xfs_inode *dp, void __user *ubuf,
int flags, struct xfs_attrlist_cursor __user *ucursor); size_t bufsize, int flags,
struct xfs_attrlist_cursor __user *ucursor);
extern struct dentry * extern struct dentry *
xfs_handle_to_dentry( xfs_handle_to_dentry(
......
...@@ -511,27 +511,6 @@ xfs_vn_get_link( ...@@ -511,27 +511,6 @@ xfs_vn_get_link(
return ERR_PTR(error); return ERR_PTR(error);
} }
STATIC const char *
xfs_vn_get_link_inline(
struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
struct xfs_inode *ip = XFS_I(inode);
char *link;
ASSERT(ip->i_df.if_format == XFS_DINODE_FMT_LOCAL);
/*
* The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if
* if_data is junk.
*/
link = ip->i_df.if_u1.if_data;
if (XFS_IS_CORRUPT(ip->i_mount, !link))
return ERR_PTR(-EFSCORRUPTED);
return link;
}
static uint32_t static uint32_t
xfs_stat_blksize( xfs_stat_blksize(
struct xfs_inode *ip) struct xfs_inode *ip)
...@@ -1250,14 +1229,6 @@ static const struct inode_operations xfs_symlink_inode_operations = { ...@@ -1250,14 +1229,6 @@ static const struct inode_operations xfs_symlink_inode_operations = {
.update_time = xfs_vn_update_time, .update_time = xfs_vn_update_time,
}; };
static const struct inode_operations xfs_inline_symlink_inode_operations = {
.get_link = xfs_vn_get_link_inline,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.listxattr = xfs_vn_listxattr,
.update_time = xfs_vn_update_time,
};
/* Figure out if this file actually supports DAX. */ /* Figure out if this file actually supports DAX. */
static bool static bool
xfs_inode_supports_dax( xfs_inode_supports_dax(
...@@ -1332,9 +1303,9 @@ xfs_diflags_to_iflags( ...@@ -1332,9 +1303,9 @@ xfs_diflags_to_iflags(
* Initialize the Linux inode. * Initialize the Linux inode.
* *
* When reading existing inodes from disk this is called directly from xfs_iget, * When reading existing inodes from disk this is called directly from xfs_iget,
* when creating a new inode it is called from xfs_ialloc after setting up the * when creating a new inode it is called from xfs_init_new_inode after setting
* inode. These callers have different criteria for clearing XFS_INEW, so leave * up the inode. These callers have different criteria for clearing XFS_INEW, so
* it up to the caller to deal with unlocking the inode appropriately. * leave it up to the caller to deal with unlocking the inode appropriately.
*/ */
void void
xfs_setup_inode( xfs_setup_inode(
...@@ -1408,10 +1379,7 @@ xfs_setup_iops( ...@@ -1408,10 +1379,7 @@ xfs_setup_iops(
inode->i_fop = &xfs_dir_file_operations; inode->i_fop = &xfs_dir_file_operations;
break; break;
case S_IFLNK: case S_IFLNK:
if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) inode->i_op = &xfs_symlink_inode_operations;
inode->i_op = &xfs_inline_symlink_inode_operations;
else
inode->i_op = &xfs_symlink_inode_operations;
break; break;
default: default:
inode->i_op = &xfs_inode_operations; inode->i_op = &xfs_inode_operations;
......
...@@ -102,6 +102,39 @@ xlog_cil_iovec_space( ...@@ -102,6 +102,39 @@ xlog_cil_iovec_space(
sizeof(uint64_t)); sizeof(uint64_t));
} }
/*
* shadow buffers can be large, so we need to use kvmalloc() here to ensure
* success. Unfortunately, kvmalloc() only allows GFP_KERNEL contexts to fall
* back to vmalloc, so we can't actually do anything useful with gfp flags to
* control the kmalloc() behaviour within kvmalloc(). Hence kmalloc() will do
* direct reclaim and compaction in the slow path, both of which are
* horrendously expensive. We just want kmalloc to fail fast and fall back to
* vmalloc if it can't get somethign straight away from the free lists or buddy
* allocator. Hence we have to open code kvmalloc outselves here.
*
* Also, we are in memalloc_nofs_save task context here, so despite the use of
* GFP_KERNEL here, we are actually going to be doing GFP_NOFS allocations. This
* is actually the only way to make vmalloc() do GFP_NOFS allocations, so lets
* just all pretend this is a GFP_KERNEL context operation....
*/
static inline void *
xlog_cil_kvmalloc(
size_t buf_size)
{
gfp_t flags = GFP_KERNEL;
void *p;
flags &= ~__GFP_DIRECT_RECLAIM;
flags |= __GFP_NOWARN | __GFP_NORETRY;
do {
p = kmalloc(buf_size, flags);
if (!p)
p = vmalloc(buf_size);
} while (!p);
return p;
}
/* /*
* Allocate or pin log vector buffers for CIL insertion. * Allocate or pin log vector buffers for CIL insertion.
* *
...@@ -203,25 +236,16 @@ xlog_cil_alloc_shadow_bufs( ...@@ -203,25 +236,16 @@ xlog_cil_alloc_shadow_bufs(
*/ */
if (!lip->li_lv_shadow || if (!lip->li_lv_shadow ||
buf_size > lip->li_lv_shadow->lv_size) { buf_size > lip->li_lv_shadow->lv_size) {
/* /*
* We free and allocate here as a realloc would copy * We free and allocate here as a realloc would copy
* unnecessary data. We don't use kmem_zalloc() for the * unnecessary data. We don't use kvzalloc() for the
* same reason - we don't need to zero the data area in * same reason - we don't need to zero the data area in
* the buffer, only the log vector header and the iovec * the buffer, only the log vector header and the iovec
* storage. * storage.
*/ */
kmem_free(lip->li_lv_shadow); kmem_free(lip->li_lv_shadow);
lv = xlog_cil_kvmalloc(buf_size);
/*
* We are in transaction context, which means this
* allocation will pick up GFP_NOFS from the
* memalloc_nofs_save/restore context the transaction
* holds. This means we can use GFP_KERNEL here so the
* generic kvmalloc() code will run vmalloc on
* contiguous page allocation failure as we require.
*/
lv = kvmalloc(buf_size, GFP_KERNEL);
memset(lv, 0, xlog_cil_iovec_space(niovecs)); memset(lv, 0, xlog_cil_iovec_space(niovecs));
lv->lv_item = lip; lv->lv_item = lip;
...@@ -1442,9 +1466,9 @@ xlog_cil_force_seq( ...@@ -1442,9 +1466,9 @@ xlog_cil_force_seq(
*/ */
bool bool
xfs_log_item_in_current_chkpt( xfs_log_item_in_current_chkpt(
struct xfs_log_item *lip) struct xfs_log_item *lip)
{ {
struct xfs_cil_ctx *ctx = lip->li_mountp->m_log->l_cilp->xc_ctx; struct xfs_cil *cil = lip->li_mountp->m_log->l_cilp;
if (list_empty(&lip->li_cil)) if (list_empty(&lip->li_cil))
return false; return false;
...@@ -1454,7 +1478,7 @@ xfs_log_item_in_current_chkpt( ...@@ -1454,7 +1478,7 @@ xfs_log_item_in_current_chkpt(
* first checkpoint it is written to. Hence if it is different to the * first checkpoint it is written to. Hence if it is different to the
* current sequence, we're in a new checkpoint. * current sequence, we're in a new checkpoint.
*/ */
return lip->li_seq == ctx->sequence; return lip->li_seq == READ_ONCE(cil->xc_current_sequence);
} }
/* /*
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
#include "xfs_buf_item.h" #include "xfs_buf_item.h"
#include "xfs_ag.h" #include "xfs_ag.h"
#include "xfs_quota.h" #include "xfs_quota.h"
#include "xfs_reflink.h"
#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) #define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1)
...@@ -3498,6 +3498,28 @@ xlog_recover_finish( ...@@ -3498,6 +3498,28 @@ xlog_recover_finish(
xlog_recover_process_iunlinks(log); xlog_recover_process_iunlinks(log);
xlog_recover_check_summary(log); xlog_recover_check_summary(log);
/*
* Recover any CoW staging blocks that are still referenced by the
* ondisk refcount metadata. During mount there cannot be any live
* staging extents as we have not permitted any user modifications.
* Therefore, it is safe to free them all right now, even on a
* read-only mount.
*/
error = xfs_reflink_recover_cow(log->l_mp);
if (error) {
xfs_alert(log->l_mp,
"Failed to recover leftover CoW staging extents, err %d.",
error);
/*
* If we get an error here, make sure the log is shut down
* but return zero so that any log items committed since the
* end of intents processing can be pushed through the CIL
* and AIL.
*/
xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
}
return 0; return 0;
} }
...@@ -3528,8 +3550,6 @@ xlog_recover_check_summary( ...@@ -3528,8 +3550,6 @@ xlog_recover_check_summary(
uint64_t ifree; uint64_t ifree;
int error; int error;
mp = log->l_mp;
freeblks = 0LL; freeblks = 0LL;
itotal = 0LL; itotal = 0LL;
ifree = 0LL; ifree = 0LL;
......
...@@ -936,15 +936,6 @@ xfs_mountfs( ...@@ -936,15 +936,6 @@ xfs_mountfs(
xfs_warn(mp, xfs_warn(mp,
"Unable to allocate reserve blocks. Continuing without reserve pool."); "Unable to allocate reserve blocks. Continuing without reserve pool.");
/* Recover any CoW blocks that never got remapped. */
error = xfs_reflink_recover_cow(mp);
if (error) {
xfs_err(mp,
"Error %d recovering leftover CoW allocations.", error);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
goto out_quota;
}
/* Reserve AG blocks for future btree expansion. */ /* Reserve AG blocks for future btree expansion. */
error = xfs_fs_reserve_ag_blocks(mp); error = xfs_fs_reserve_ag_blocks(mp);
if (error && error != -ENOSPC) if (error && error != -ENOSPC)
...@@ -955,7 +946,6 @@ xfs_mountfs( ...@@ -955,7 +946,6 @@ xfs_mountfs(
out_agresv: out_agresv:
xfs_fs_unreserve_ag_blocks(mp); xfs_fs_unreserve_ag_blocks(mp);
out_quota:
xfs_qm_unmount_quotas(mp); xfs_qm_unmount_quotas(mp);
out_rtunmount: out_rtunmount:
xfs_rtunmount_inodes(mp); xfs_rtunmount_inodes(mp);
......
...@@ -302,13 +302,6 @@ xfs_qm_scall_setqlim( ...@@ -302,13 +302,6 @@ xfs_qm_scall_setqlim(
if ((newlim->d_fieldmask & XFS_QC_MASK) == 0) if ((newlim->d_fieldmask & XFS_QC_MASK) == 0)
return 0; return 0;
/*
* We don't want to race with a quotaoff so take the quotaoff lock.
* We don't hold an inode lock, so there's nothing else to stop
* a quotaoff from happening.
*/
mutex_lock(&q->qi_quotaofflock);
/* /*
* Get the dquot (locked) before we start, as we need to do a * Get the dquot (locked) before we start, as we need to do a
* transaction to allocate it if it doesn't exist. Once we have the * transaction to allocate it if it doesn't exist. Once we have the
...@@ -319,7 +312,7 @@ xfs_qm_scall_setqlim( ...@@ -319,7 +312,7 @@ xfs_qm_scall_setqlim(
error = xfs_qm_dqget(mp, id, type, true, &dqp); error = xfs_qm_dqget(mp, id, type, true, &dqp);
if (error) { if (error) {
ASSERT(error != -ENOENT); ASSERT(error != -ENOENT);
goto out_unlock; return error;
} }
defq = xfs_get_defquota(q, xfs_dquot_type(dqp)); defq = xfs_get_defquota(q, xfs_dquot_type(dqp));
...@@ -415,8 +408,6 @@ xfs_qm_scall_setqlim( ...@@ -415,8 +408,6 @@ xfs_qm_scall_setqlim(
out_rele: out_rele:
xfs_qm_dqrele(dqp); xfs_qm_dqrele(dqp);
out_unlock:
mutex_unlock(&q->qi_quotaofflock);
return error; return error;
} }
......
...@@ -749,7 +749,10 @@ xfs_reflink_end_cow( ...@@ -749,7 +749,10 @@ xfs_reflink_end_cow(
} }
/* /*
* Free leftover CoW reservations that didn't get cleaned out. * Free all CoW staging blocks that are still referenced by the ondisk refcount
* metadata. The ondisk metadata does not track which inode created the
* staging extent, so callers must ensure that there are no cached inodes with
* live CoW staging extents.
*/ */
int int
xfs_reflink_recover_cow( xfs_reflink_recover_cow(
......
...@@ -1739,15 +1739,6 @@ xfs_remount_rw( ...@@ -1739,15 +1739,6 @@ xfs_remount_rw(
*/ */
xfs_restore_resvblks(mp); xfs_restore_resvblks(mp);
xfs_log_work_queue(mp); xfs_log_work_queue(mp);
/* Recover any CoW blocks that never got remapped. */
error = xfs_reflink_recover_cow(mp);
if (error) {
xfs_err(mp,
"Error %d recovering leftover CoW allocations.", error);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
return error;
}
xfs_blockgc_start(mp); xfs_blockgc_start(mp);
/* Create the per-AG metadata reservation pool .*/ /* Create the per-AG metadata reservation pool .*/
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "xfs_trace.h" #include "xfs_trace.h"
#include "xfs_trans.h" #include "xfs_trans.h"
#include "xfs_ialloc.h" #include "xfs_ialloc.h"
#include "xfs_error.h"
/* ----- Kernel only functions below ----- */ /* ----- Kernel only functions below ----- */
int int
...@@ -96,17 +97,15 @@ xfs_readlink_bmap_ilocked( ...@@ -96,17 +97,15 @@ xfs_readlink_bmap_ilocked(
int int
xfs_readlink( xfs_readlink(
struct xfs_inode *ip, struct xfs_inode *ip,
char *link) char *link)
{ {
struct xfs_mount *mp = ip->i_mount; struct xfs_mount *mp = ip->i_mount;
xfs_fsize_t pathlen; xfs_fsize_t pathlen;
int error = 0; int error = -EFSCORRUPTED;
trace_xfs_readlink(ip); trace_xfs_readlink(ip);
ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_LOCAL);
if (xfs_is_shutdown(mp)) if (xfs_is_shutdown(mp))
return -EIO; return -EIO;
...@@ -121,12 +120,22 @@ xfs_readlink( ...@@ -121,12 +120,22 @@ xfs_readlink(
__func__, (unsigned long long) ip->i_ino, __func__, (unsigned long long) ip->i_ino,
(long long) pathlen); (long long) pathlen);
ASSERT(0); ASSERT(0);
error = -EFSCORRUPTED;
goto out; goto out;
} }
if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
error = xfs_readlink_bmap_ilocked(ip, link); /*
* The VFS crashes on a NULL pointer, so return -EFSCORRUPTED
* if if_data is junk.
*/
if (XFS_IS_CORRUPT(ip->i_mount, !ip->i_df.if_u1.if_data))
goto out;
memcpy(link, ip->i_df.if_u1.if_data, pathlen + 1);
error = 0;
} else {
error = xfs_readlink_bmap_ilocked(ip, link);
}
out: out:
xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_iunlock(ip, XFS_ILOCK_SHARED);
......
...@@ -67,11 +67,12 @@ static const struct sysfs_ops xfs_sysfs_ops = { ...@@ -67,11 +67,12 @@ static const struct sysfs_ops xfs_sysfs_ops = {
static struct attribute *xfs_mp_attrs[] = { static struct attribute *xfs_mp_attrs[] = {
NULL, NULL,
}; };
ATTRIBUTE_GROUPS(xfs_mp);
struct kobj_type xfs_mp_ktype = { struct kobj_type xfs_mp_ktype = {
.release = xfs_sysfs_release, .release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops, .sysfs_ops = &xfs_sysfs_ops,
.default_attrs = xfs_mp_attrs, .default_groups = xfs_mp_groups,
}; };
#ifdef DEBUG #ifdef DEBUG
...@@ -239,11 +240,12 @@ static struct attribute *xfs_dbg_attrs[] = { ...@@ -239,11 +240,12 @@ static struct attribute *xfs_dbg_attrs[] = {
#endif #endif
NULL, NULL,
}; };
ATTRIBUTE_GROUPS(xfs_dbg);
struct kobj_type xfs_dbg_ktype = { struct kobj_type xfs_dbg_ktype = {
.release = xfs_sysfs_release, .release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops, .sysfs_ops = &xfs_sysfs_ops,
.default_attrs = xfs_dbg_attrs, .default_groups = xfs_dbg_groups,
}; };
#endif /* DEBUG */ #endif /* DEBUG */
...@@ -296,11 +298,12 @@ static struct attribute *xfs_stats_attrs[] = { ...@@ -296,11 +298,12 @@ static struct attribute *xfs_stats_attrs[] = {
ATTR_LIST(stats_clear), ATTR_LIST(stats_clear),
NULL, NULL,
}; };
ATTRIBUTE_GROUPS(xfs_stats);
struct kobj_type xfs_stats_ktype = { struct kobj_type xfs_stats_ktype = {
.release = xfs_sysfs_release, .release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops, .sysfs_ops = &xfs_sysfs_ops,
.default_attrs = xfs_stats_attrs, .default_groups = xfs_stats_groups,
}; };
/* xlog */ /* xlog */
...@@ -381,11 +384,12 @@ static struct attribute *xfs_log_attrs[] = { ...@@ -381,11 +384,12 @@ static struct attribute *xfs_log_attrs[] = {
ATTR_LIST(write_grant_head), ATTR_LIST(write_grant_head),
NULL, NULL,
}; };
ATTRIBUTE_GROUPS(xfs_log);
struct kobj_type xfs_log_ktype = { struct kobj_type xfs_log_ktype = {
.release = xfs_sysfs_release, .release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops, .sysfs_ops = &xfs_sysfs_ops,
.default_attrs = xfs_log_attrs, .default_groups = xfs_log_groups,
}; };
/* /*
...@@ -534,12 +538,12 @@ static struct attribute *xfs_error_attrs[] = { ...@@ -534,12 +538,12 @@ static struct attribute *xfs_error_attrs[] = {
ATTR_LIST(retry_timeout_seconds), ATTR_LIST(retry_timeout_seconds),
NULL, NULL,
}; };
ATTRIBUTE_GROUPS(xfs_error);
static struct kobj_type xfs_error_cfg_ktype = { static struct kobj_type xfs_error_cfg_ktype = {
.release = xfs_sysfs_release, .release = xfs_sysfs_release,
.sysfs_ops = &xfs_sysfs_ops, .sysfs_ops = &xfs_sysfs_ops,
.default_attrs = xfs_error_attrs, .default_groups = xfs_error_groups,
}; };
static struct kobj_type xfs_error_ktype = { static struct kobj_type xfs_error_ktype = {
......
...@@ -942,8 +942,17 @@ xfs_trans_cancel( ...@@ -942,8 +942,17 @@ xfs_trans_cancel(
trace_xfs_trans_cancel(tp, _RET_IP_); trace_xfs_trans_cancel(tp, _RET_IP_);
if (tp->t_flags & XFS_TRANS_PERM_LOG_RES) /*
* It's never valid to cancel a transaction with deferred ops attached,
* because the transaction is effectively dirty. Complain about this
* loudly before freeing the in-memory defer items.
*/
if (!list_empty(&tp->t_dfops)) {
ASSERT(xfs_is_shutdown(mp) || list_empty(&tp->t_dfops));
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
dirty = true;
xfs_defer_cancel(tp); xfs_defer_cancel(tp);
}
/* /*
* See if the caller is relying on us to shut down the * See if the caller is relying on us to shut down the
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment