Commit 22b4eb5e authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

* 'for-linus' of git://oss.sgi.com/xfs/xfs:
  xfs: cleanup xfs_file_aio_write
  xfs: always return with the iolock held from xfs_file_aio_write_checks
  xfs: remove the i_new_size field in struct xfs_inode
  xfs: remove the i_size field in struct xfs_inode
  xfs: replace i_pin_wait with a bit waitqueue
  xfs: replace i_flock with a sleeping bitlock
  xfs: make i_flags an unsigned long
  xfs: remove the if_ext_max field in struct xfs_ifork
  xfs: remove the unused dm_attrs structure
  xfs: cleanup xfs_iomap_eof_align_last_fsb
  xfs: remove xfs_itruncate_data
parents d65773b2 d0606464
......@@ -111,8 +111,7 @@ xfs_ioend_new_eof(
xfs_fsize_t bsize;
bsize = ioend->io_offset + ioend->io_size;
isize = MAX(ip->i_size, ip->i_new_size);
isize = MIN(isize, bsize);
isize = MIN(i_size_read(VFS_I(ip)), bsize);
return isize > ip->i_d.di_size ? isize : 0;
}
......@@ -126,11 +125,7 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
}
/*
* Update on-disk file size now that data has been written to disk. The
* current in-memory file size is i_size. If a write is beyond eof i_new_size
* will be the intended file size until i_size is updated. If this write does
* not extend all the way to the valid file size then restrict this update to
* the end of the write.
* Update on-disk file size now that data has been written to disk.
*
* This function does not block as blocking on the inode lock in IO completion
* can lead to IO completion order dependency deadlocks.. If it can't get the
......@@ -1278,6 +1273,15 @@ xfs_end_io_direct_write(
{
struct xfs_ioend *ioend = iocb->private;
/*
* While the generic direct I/O code updates the inode size, it does
* so only after the end_io handler is called, which means our
* end_io handler thinks the on-disk size is outside the in-core
* size. To prevent this just update it a little bit earlier here.
*/
if (offset + size > i_size_read(ioend->io_inode))
i_size_write(ioend->io_inode, offset + size);
/*
* blockdev_direct_IO can return an error even after the I/O
* completion handler was called. Thus we need to protect
......@@ -1340,12 +1344,11 @@ xfs_vm_write_failed(
if (to > inode->i_size) {
/*
* punch out the delalloc blocks we have already allocated. We
* don't call xfs_setattr() to do this as we may be in the
* middle of a multi-iovec write and so the vfs inode->i_size
* will not match the xfs ip->i_size and so it will zero too
* much. Hence we jus truncate the page cache to zero what is
* necessary and punch the delalloc blocks directly.
* Punch out the delalloc blocks we have already allocated.
*
* Don't bother with xfs_setattr given that nothing can have
* made it to disk yet as the page is still locked at this
* point.
*/
struct xfs_inode *ip = XFS_I(inode);
xfs_fileoff_t start_fsb;
......
......@@ -827,10 +827,6 @@ xfs_attr_inactive(xfs_inode_t *dp)
if (error)
goto out;
/*
* Commit the last in the sequence of transactions.
*/
xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
xfs_iunlock(dp, XFS_ILOCK_EXCL);
......
......@@ -271,10 +271,6 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff)
dp = args->dp;
mp = dp->i_mount;
dp->i_d.di_forkoff = forkoff;
dp->i_df.if_ext_max =
XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
dp->i_afp->if_ext_max =
XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
ifp = dp->i_afp;
ASSERT(ifp->if_flags & XFS_IFINLINE);
......@@ -326,7 +322,6 @@ xfs_attr_fork_reset(
ASSERT(ip->i_d.di_anextents == 0);
ASSERT(ip->i_afp == NULL);
ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
......@@ -389,10 +384,6 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
(args->op_flags & XFS_DA_OP_ADDNAME) ||
!(mp->m_flags & XFS_MOUNT_ATTR2) ||
dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
dp->i_afp->if_ext_max =
XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
dp->i_df.if_ext_max =
XFS_IFORK_DSIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
xfs_trans_log_inode(args->trans, dp,
XFS_ILOG_CORE | XFS_ILOG_ADATA);
}
......
This diff is collapsed.
......@@ -163,12 +163,14 @@ xfs_swap_extents_check_format(
/* Check temp in extent form to max in target */
if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max)
XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) >
XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
return EINVAL;
/* Check target in extent form to max in temp */
if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) >
XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
return EINVAL;
/*
......@@ -180,18 +182,25 @@ xfs_swap_extents_check_format(
* (a common defrag case) which will occur when the temp inode is in
* extent format...
*/
if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
((XFS_IFORK_BOFF(ip) &&
tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) ||
XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <= ip->i_df.if_ext_max))
return EINVAL;
if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
if (XFS_IFORK_BOFF(ip) &&
tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
return EINVAL;
if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
return EINVAL;
}
/* Reciprocal target->temp btree format checks */
if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
((XFS_IFORK_BOFF(tip) &&
ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) ||
XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <= tip->i_df.if_ext_max))
return EINVAL;
if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
if (XFS_IFORK_BOFF(tip) &&
ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
return EINVAL;
if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
return EINVAL;
}
return 0;
}
......@@ -348,16 +357,6 @@ xfs_swap_extents(
*ifp = *tifp; /* struct copy */
*tifp = *tempifp; /* struct copy */
/*
* Fix the in-memory data fork values that are dependent on the fork
* offset in the inode. We can't assume they remain the same as attr2
* has dynamic fork offsets.
*/
ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) /
(uint)sizeof(xfs_bmbt_rec_t);
tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) /
(uint)sizeof(xfs_bmbt_rec_t);
/*
* Fix the on-disk inode values
*/
......
......@@ -327,7 +327,7 @@ xfs_file_aio_read(
mp->m_rtdev_targp : mp->m_ddev_targp;
if ((iocb->ki_pos & target->bt_smask) ||
(size & target->bt_smask)) {
if (iocb->ki_pos == ip->i_size)
if (iocb->ki_pos == i_size_read(inode))
return 0;
return -XFS_ERROR(EINVAL);
}
......@@ -412,51 +412,6 @@ xfs_file_splice_read(
return ret;
}
STATIC void
xfs_aio_write_isize_update(
struct inode *inode,
loff_t *ppos,
ssize_t bytes_written)
{
struct xfs_inode *ip = XFS_I(inode);
xfs_fsize_t isize = i_size_read(inode);
if (bytes_written > 0)
XFS_STATS_ADD(xs_write_bytes, bytes_written);
if (unlikely(bytes_written < 0 && bytes_written != -EFAULT &&
*ppos > isize))
*ppos = isize;
if (*ppos > ip->i_size) {
xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
if (*ppos > ip->i_size)
ip->i_size = *ppos;
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
}
}
/*
* If this was a direct or synchronous I/O that failed (such as ENOSPC) then
* part of the I/O may have been written to disk before the error occurred. In
* this case the on-disk file size may have been adjusted beyond the in-memory
* file size and now needs to be truncated back.
*/
STATIC void
xfs_aio_write_newsize_update(
struct xfs_inode *ip,
xfs_fsize_t new_size)
{
if (new_size == ip->i_new_size) {
xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
if (new_size == ip->i_new_size)
ip->i_new_size = 0;
if (ip->i_d.di_size > ip->i_size)
ip->i_d.di_size = ip->i_size;
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
}
}
/*
* xfs_file_splice_write() does not use xfs_rw_ilock() because
* generic_file_splice_write() takes the i_mutex itself. This, in theory,
......@@ -475,7 +430,6 @@ xfs_file_splice_write(
{
struct inode *inode = outfilp->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
xfs_fsize_t new_size;
int ioflags = 0;
ssize_t ret;
......@@ -489,19 +443,12 @@ xfs_file_splice_write(
xfs_ilock(ip, XFS_IOLOCK_EXCL);
new_size = *ppos + count;
xfs_ilock(ip, XFS_ILOCK_EXCL);
if (new_size > ip->i_size)
ip->i_new_size = new_size;
xfs_iunlock(ip, XFS_ILOCK_EXCL);
trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
if (ret > 0)
XFS_STATS_ADD(xs_write_bytes, ret);
xfs_aio_write_isize_update(inode, ppos, ret);
xfs_aio_write_newsize_update(ip, new_size);
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
......@@ -689,28 +636,26 @@ xfs_zero_eof(
/*
* Common pre-write limit and setup checks.
*
* Returns with iolock held according to @iolock.
* Called with the iolocked held either shared and exclusive according to
* @iolock, and returns with it held. Might upgrade the iolock to exclusive
* if called for a direct write beyond i_size.
*/
STATIC ssize_t
xfs_file_aio_write_checks(
struct file *file,
loff_t *pos,
size_t *count,
xfs_fsize_t *new_sizep,
int *iolock)
{
struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
xfs_fsize_t new_size;
int error = 0;
xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
*new_sizep = 0;
restart:
error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
if (error) {
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
*iolock = 0;
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
......@@ -720,36 +665,21 @@ xfs_file_aio_write_checks(
/*
* If the offset is beyond the size of the file, we need to zero any
* blocks that fall between the existing EOF and the start of this
* write. There is no need to issue zeroing if another in-flght IO ends
* at or before this one If zeronig is needed and we are currently
* holding the iolock shared, we need to update it to exclusive which
* involves dropping all locks and relocking to maintain correct locking
* order. If we do this, restart the function to ensure all checks and
* values are still valid.
* write. If zeroing is needed and we are currently holding the
* iolock shared, we need to update it to exclusive which involves
* dropping all locks and relocking to maintain correct locking order.
* If we do this, restart the function to ensure all checks and values
* are still valid.
*/
if ((ip->i_new_size && *pos > ip->i_new_size) ||
(!ip->i_new_size && *pos > ip->i_size)) {
if (*pos > i_size_read(inode)) {
if (*iolock == XFS_IOLOCK_SHARED) {
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
*iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock);
goto restart;
}
error = -xfs_zero_eof(ip, *pos, ip->i_size);
error = -xfs_zero_eof(ip, *pos, i_size_read(inode));
}
/*
* If this IO extends beyond EOF, we may need to update ip->i_new_size.
* We have already zeroed space beyond EOF (if necessary). Only update
* ip->i_new_size if this IO ends beyond any other in-flight writes.
*/
new_size = *pos + *count;
if (new_size > ip->i_size) {
if (new_size > ip->i_new_size)
ip->i_new_size = new_size;
*new_sizep = new_size;
}
xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
if (error)
return error;
......@@ -794,9 +724,7 @@ xfs_file_dio_aio_write(
const struct iovec *iovp,
unsigned long nr_segs,
loff_t pos,
size_t ocount,
xfs_fsize_t *new_size,
int *iolock)
size_t ocount)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
......@@ -806,10 +734,10 @@ xfs_file_dio_aio_write(
ssize_t ret = 0;
size_t count = ocount;
int unaligned_io = 0;
int iolock;
struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
*iolock = 0;
if ((pos & target->bt_smask) || (count & target->bt_smask))
return -XFS_ERROR(EINVAL);
......@@ -824,31 +752,31 @@ xfs_file_dio_aio_write(
* EOF zeroing cases and fill out the new inode size as appropriate.
*/
if (unaligned_io || mapping->nrpages)
*iolock = XFS_IOLOCK_EXCL;
iolock = XFS_IOLOCK_EXCL;
else
*iolock = XFS_IOLOCK_SHARED;
xfs_rw_ilock(ip, *iolock);
iolock = XFS_IOLOCK_SHARED;
xfs_rw_ilock(ip, iolock);
/*
* Recheck if there are cached pages that need invalidate after we got
* the iolock to protect against other threads adding new pages while
* we were waiting for the iolock.
*/
if (mapping->nrpages && *iolock == XFS_IOLOCK_SHARED) {
xfs_rw_iunlock(ip, *iolock);
*iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, *iolock);
if (mapping->nrpages && iolock == XFS_IOLOCK_SHARED) {
xfs_rw_iunlock(ip, iolock);
iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, iolock);
}
ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock);
ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
if (ret)
return ret;
goto out;
if (mapping->nrpages) {
ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
FI_REMAPF_LOCKED);
if (ret)
return ret;
goto out;
}
/*
......@@ -857,15 +785,18 @@ xfs_file_dio_aio_write(
*/
if (unaligned_io)
inode_dio_wait(inode);
else if (*iolock == XFS_IOLOCK_EXCL) {
else if (iolock == XFS_IOLOCK_EXCL) {
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
*iolock = XFS_IOLOCK_SHARED;
iolock = XFS_IOLOCK_SHARED;
}
trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
ret = generic_file_direct_write(iocb, iovp,
&nr_segs, pos, &iocb->ki_pos, count, ocount);
out:
xfs_rw_iunlock(ip, iolock);
/* No fallback to buffered IO on errors for XFS. */
ASSERT(ret < 0 || ret == count);
return ret;
......@@ -877,9 +808,7 @@ xfs_file_buffered_aio_write(
const struct iovec *iovp,
unsigned long nr_segs,
loff_t pos,
size_t ocount,
xfs_fsize_t *new_size,
int *iolock)
size_t ocount)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
......@@ -887,14 +816,14 @@ xfs_file_buffered_aio_write(
struct xfs_inode *ip = XFS_I(inode);
ssize_t ret;
int enospc = 0;
int iolock = XFS_IOLOCK_EXCL;
size_t count = ocount;
*iolock = XFS_IOLOCK_EXCL;
xfs_rw_ilock(ip, *iolock);
xfs_rw_ilock(ip, iolock);
ret = xfs_file_aio_write_checks(file, &pos, &count, new_size, iolock);
ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
if (ret)
return ret;
goto out;
/* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info;
......@@ -908,13 +837,15 @@ xfs_file_buffered_aio_write(
* page locks and retry *once*
*/
if (ret == -ENOSPC && !enospc) {
ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
if (ret)
return ret;
enospc = 1;
goto write_retry;
ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
if (!ret)
goto write_retry;
}
current->backing_dev_info = NULL;
out:
xfs_rw_iunlock(ip, iolock);
return ret;
}
......@@ -930,9 +861,7 @@ xfs_file_aio_write(
struct inode *inode = mapping->host;
struct xfs_inode *ip = XFS_I(inode);
ssize_t ret;
int iolock;
size_t ocount = 0;
xfs_fsize_t new_size = 0;
XFS_STATS_INC(xs_write_calls);
......@@ -951,33 +880,22 @@ xfs_file_aio_write(
return -EIO;
if (unlikely(file->f_flags & O_DIRECT))
ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
ocount, &new_size, &iolock);
ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount);
else
ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
ocount, &new_size, &iolock);
xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
ocount);
if (ret <= 0)
goto out_unlock;
if (ret > 0) {
ssize_t err;
/* Handle various SYNC-type writes */
if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
loff_t end = pos + ret - 1;
int error;
XFS_STATS_ADD(xs_write_bytes, ret);
xfs_rw_iunlock(ip, iolock);
error = xfs_file_fsync(file, pos, end,
(file->f_flags & __O_SYNC) ? 0 : 1);
xfs_rw_ilock(ip, iolock);
if (error)
ret = error;
/* Handle various SYNC-type writes */
err = generic_write_sync(file, pos, ret);
if (err < 0)
ret = err;
}
out_unlock:
xfs_aio_write_newsize_update(ip, new_size);
xfs_rw_iunlock(ip, iolock);
return ret;
}
......
......@@ -90,7 +90,7 @@ xfs_wait_on_pages(
if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
return -filemap_fdatawait_range(mapping, first,
last == -1 ? ip->i_size - 1 : last);
last == -1 ? XFS_ISIZE(ip) - 1 : last);
}
return 0;
}
......@@ -77,7 +77,7 @@ xfs_inode_alloc(
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
ASSERT(!xfs_isiflocked(ip));
ASSERT(ip->i_ino == 0);
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
......@@ -94,8 +94,6 @@ xfs_inode_alloc(
ip->i_update_core = 0;
ip->i_delayed_blks = 0;
memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
ip->i_size = 0;
ip->i_new_size = 0;
return ip;
}
......@@ -150,7 +148,7 @@ xfs_inode_free(
/* asserts to verify all state is correct here */
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
ASSERT(!xfs_isiflocked(ip));
/*
* Because we use RCU freeing we need to ensure the inode always
......@@ -450,8 +448,6 @@ xfs_iget(
*ipp = ip;
ASSERT(ip->i_df.if_ext_max ==
XFS_IFORK_DSIZE(ip) / sizeof(xfs_bmbt_rec_t));
/*
* If we have a real type for an on-disk inode, we can set ops(&unlock)
* now. If it's a new inode being created, xfs_ialloc will handle it.
......@@ -715,3 +711,19 @@ xfs_isilocked(
return 0;
}
#endif
void
__xfs_iflock(
struct xfs_inode *ip)
{
wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IFLOCK_BIT);
DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IFLOCK_BIT);
do {
prepare_to_wait_exclusive(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
if (xfs_isiflocked(ip))
io_schedule();
} while (!xfs_iflock_nowait(ip));
finish_wait(wq, &wait.wait);
}
......@@ -299,11 +299,8 @@ xfs_iformat(
{
xfs_attr_shortform_t *atp;
int size;
int error;
int error = 0;
xfs_fsize_t di_size;
ip->i_df.if_ext_max =
XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
error = 0;
if (unlikely(be32_to_cpu(dip->di_nextents) +
be16_to_cpu(dip->di_anextents) >
......@@ -350,7 +347,6 @@ xfs_iformat(
return XFS_ERROR(EFSCORRUPTED);
}
ip->i_d.di_size = 0;
ip->i_size = 0;
ip->i_df.if_u2.if_rdev = xfs_dinode_get_rdev(dip);
break;
......@@ -409,10 +405,10 @@ xfs_iformat(
}
if (!XFS_DFORK_Q(dip))
return 0;
ASSERT(ip->i_afp == NULL);
ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
ip->i_afp->if_ext_max =
XFS_IFORK_ASIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
switch (dip->di_aformat) {
case XFS_DINODE_FMT_LOCAL:
atp = (xfs_attr_shortform_t *)XFS_DFORK_APTR(dip);
......@@ -604,10 +600,11 @@ xfs_iformat_btree(
* or the number of extents is greater than the number of
* blocks.
*/
if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max
|| XFS_BMDR_SPACE_CALC(nrecs) >
XFS_DFORK_SIZE(dip, ip->i_mount, whichfork)
|| XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
if (unlikely(XFS_IFORK_NEXTENTS(ip, whichfork) <=
XFS_IFORK_MAXEXT(ip, whichfork) ||
XFS_BMDR_SPACE_CALC(nrecs) >
XFS_DFORK_SIZE(dip, ip->i_mount, whichfork) ||
XFS_IFORK_NEXTENTS(ip, whichfork) > ip->i_d.di_nblocks)) {
xfs_warn(ip->i_mount, "corrupt inode %Lu (btree).",
(unsigned long long) ip->i_ino);
XFS_CORRUPTION_ERROR("xfs_iformat_btree", XFS_ERRLEVEL_LOW,
......@@ -835,12 +832,6 @@ xfs_iread(
* with the uninitialized part of it.
*/
ip->i_d.di_mode = 0;
/*
* Initialize the per-fork minima and maxima for a new
* inode here. xfs_iformat will do it for old inodes.
*/
ip->i_df.if_ext_max =
XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
}
/*
......@@ -861,7 +852,6 @@ xfs_iread(
}
ip->i_delayed_blks = 0;
ip->i_size = ip->i_d.di_size;
/*
* Mark the buffer containing the inode as something to keep
......@@ -1051,7 +1041,6 @@ xfs_ialloc(
}
ip->i_d.di_size = 0;
ip->i_size = 0;
ip->i_d.di_nextents = 0;
ASSERT(ip->i_d.di_nblocks == 0);
......@@ -1165,52 +1154,6 @@ xfs_ialloc(
return 0;
}
/*
* Check to make sure that there are no blocks allocated to the
* file beyond the size of the file. We don't check this for
* files with fixed size extents or real time extents, but we
* at least do it for regular files.
*/
#ifdef DEBUG
STATIC void
xfs_isize_check(
struct xfs_inode *ip,
xfs_fsize_t isize)
{
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t map_first;
int nimaps;
xfs_bmbt_irec_t imaps[2];
int error;
if (!S_ISREG(ip->i_d.di_mode))
return;
if (XFS_IS_REALTIME_INODE(ip))
return;
if (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
return;
nimaps = 2;
map_first = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
/*
* The filesystem could be shutting down, so bmapi may return
* an error.
*/
error = xfs_bmapi_read(ip, map_first,
(XFS_B_TO_FSB(mp,
(xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - map_first),
imaps, &nimaps, XFS_BMAPI_ENTIRE);
if (error)
return;
ASSERT(nimaps == 1);
ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
}
#else /* DEBUG */
#define xfs_isize_check(ip, isize)
#endif /* DEBUG */
/*
* Free up the underlying blocks past new_size. The new size must be smaller
* than the current size. This routine can be used both for the attribute and
......@@ -1252,12 +1195,14 @@ xfs_itruncate_extents(
int done = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
ASSERT(new_size <= ip->i_size);
ASSERT(new_size <= XFS_ISIZE(ip));
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(ip->i_itemp != NULL);
ASSERT(ip->i_itemp->ili_lock_flags == 0);
ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
trace_xfs_itruncate_extents_start(ip, new_size);
/*
* Since it is possible for space to become allocated beyond
* the end of the file (in a crash where the space is allocated
......@@ -1325,6 +1270,14 @@ xfs_itruncate_extents(
goto out;
}
/*
* Always re-log the inode so that our permanent transaction can keep
* on rolling it forward in the log.
*/
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
trace_xfs_itruncate_extents_end(ip, new_size);
out:
*tpp = tp;
return error;
......@@ -1338,74 +1291,6 @@ xfs_itruncate_extents(
goto out;
}
int
xfs_itruncate_data(
struct xfs_trans **tpp,
struct xfs_inode *ip,
xfs_fsize_t new_size)
{
int error;
trace_xfs_itruncate_data_start(ip, new_size);
/*
* The first thing we do is set the size to new_size permanently on
* disk. This way we don't have to worry about anyone ever being able
* to look at the data being freed even in the face of a crash.
* What we're getting around here is the case where we free a block, it
* is allocated to another file, it is written to, and then we crash.
* If the new data gets written to the file but the log buffers
* containing the free and reallocation don't, then we'd end up with
* garbage in the blocks being freed. As long as we make the new_size
* permanent before actually freeing any blocks it doesn't matter if
* they get written to.
*/
if (ip->i_d.di_nextents > 0) {
/*
* If we are not changing the file size then do not update
* the on-disk file size - we may be called from
* xfs_inactive_free_eofblocks(). If we update the on-disk
* file size and then the system crashes before the contents
* of the file are flushed to disk then the files may be
* full of holes (ie NULL files bug).
*/
if (ip->i_size != new_size) {
ip->i_d.di_size = new_size;
ip->i_size = new_size;
xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
}
}
error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size);
if (error)
return error;
/*
* If we are not changing the file size then do not update the on-disk
* file size - we may be called from xfs_inactive_free_eofblocks().
* If we update the on-disk file size and then the system crashes
* before the contents of the file are flushed to disk then the files
* may be full of holes (ie NULL files bug).
*/
xfs_isize_check(ip, new_size);
if (ip->i_size != new_size) {
ip->i_d.di_size = new_size;
ip->i_size = new_size;
}
ASSERT(new_size != 0 || ip->i_delayed_blks == 0);
ASSERT(new_size != 0 || ip->i_d.di_nextents == 0);
/*
* Always re-log the inode so that our permanent transaction can keep
* on rolling it forward in the log.
*/
xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
trace_xfs_itruncate_data_end(ip, new_size);
return 0;
}
/*
* This is called when the inode's link count goes to 0.
* We place the on-disk inode on a list in the AGI. It
......@@ -1824,8 +1709,7 @@ xfs_ifree(
ASSERT(ip->i_d.di_nlink == 0);
ASSERT(ip->i_d.di_nextents == 0);
ASSERT(ip->i_d.di_anextents == 0);
ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) ||
(!S_ISREG(ip->i_d.di_mode)));
ASSERT(ip->i_d.di_size == 0 || !S_ISREG(ip->i_d.di_mode));
ASSERT(ip->i_d.di_nblocks == 0);
/*
......@@ -1844,8 +1728,6 @@ xfs_ifree(
ip->i_d.di_flags = 0;
ip->i_d.di_dmevmask = 0;
ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */
ip->i_df.if_ext_max =
XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
/*
......@@ -2151,7 +2033,7 @@ xfs_idestroy_fork(
* once someone is waiting for it to be unpinned.
*/
static void
xfs_iunpin_nowait(
xfs_iunpin(
struct xfs_inode *ip)
{
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
......@@ -2163,14 +2045,29 @@ xfs_iunpin_nowait(
}
static void
__xfs_iunpin_wait(
struct xfs_inode *ip)
{
wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_IPINNED_BIT);
DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_IPINNED_BIT);
xfs_iunpin(ip);
do {
prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
if (xfs_ipincount(ip))
io_schedule();
} while (xfs_ipincount(ip));
finish_wait(wq, &wait.wait);
}
void
xfs_iunpin_wait(
struct xfs_inode *ip)
{
if (xfs_ipincount(ip)) {
xfs_iunpin_nowait(ip);
wait_event(ip->i_ipin_wait, (xfs_ipincount(ip) == 0));
}
if (xfs_ipincount(ip))
__xfs_iunpin_wait(ip);
}
/*
......@@ -2510,9 +2407,9 @@ xfs_iflush(
XFS_STATS_INC(xs_iflush_count);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(!completion_done(&ip->i_flush));
ASSERT(xfs_isiflocked(ip));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > ip->i_df.if_ext_max);
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
iip = ip->i_itemp;
mp = ip->i_mount;
......@@ -2529,7 +2426,7 @@ xfs_iflush(
* out for us if they occur after the log force completes.
*/
if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
xfs_iunpin_nowait(ip);
xfs_iunpin(ip);
xfs_ifunlock(ip);
return EAGAIN;
}
......@@ -2626,9 +2523,9 @@ xfs_iflush_int(
#endif
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(!completion_done(&ip->i_flush));
ASSERT(xfs_isiflocked(ip));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > ip->i_df.if_ext_max);
ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));
iip = ip->i_itemp;
mp = ip->i_mount;
......
......@@ -66,7 +66,6 @@ typedef struct xfs_ifork {
struct xfs_btree_block *if_broot; /* file's incore btree root */
short if_broot_bytes; /* bytes allocated for root */
unsigned char if_flags; /* per-fork flags */
unsigned char if_ext_max; /* max # of extent records */
union {
xfs_bmbt_rec_host_t *if_extents;/* linear map file exts */
xfs_ext_irec_t *if_ext_irec; /* irec map file exts */
......@@ -206,12 +205,12 @@ typedef struct xfs_icdinode {
((w) == XFS_DATA_FORK ? \
((ip)->i_d.di_nextents = (n)) : \
((ip)->i_d.di_anextents = (n)))
#define XFS_IFORK_MAXEXT(ip, w) \
(XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t))
#ifdef __KERNEL__
struct bhv_desc;
struct xfs_buf;
struct xfs_bmap_free;
struct xfs_bmbt_irec;
......@@ -220,12 +219,6 @@ struct xfs_mount;
struct xfs_trans;
struct xfs_dquot;
typedef struct dm_attrs_s {
__uint32_t da_dmevmask; /* DMIG event mask */
__uint16_t da_dmstate; /* DMIG state info */
__uint16_t da_pad; /* DMIG extra padding */
} dm_attrs_t;
typedef struct xfs_inode {
/* Inode linking and identification information. */
struct xfs_mount *i_mount; /* fs mount struct ptr */
......@@ -244,27 +237,19 @@ typedef struct xfs_inode {
struct xfs_inode_log_item *i_itemp; /* logging information */
mrlock_t i_lock; /* inode lock */
mrlock_t i_iolock; /* inode IO lock */
struct completion i_flush; /* inode flush completion q */
atomic_t i_pincount; /* inode pin count */
wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */
spinlock_t i_flags_lock; /* inode i_flags lock */
/* Miscellaneous state. */
unsigned short i_flags; /* see defined flags below */
unsigned long i_flags; /* see defined flags below */
unsigned char i_update_core; /* timestamps/size is dirty */
unsigned int i_delayed_blks; /* count of delay alloc blks */
xfs_icdinode_t i_d; /* most of ondisk inode */
xfs_fsize_t i_size; /* in-memory size */
xfs_fsize_t i_new_size; /* size when write completes */
/* VFS inode */
struct inode i_vnode; /* embedded VFS inode */
} xfs_inode_t;
#define XFS_ISIZE(ip) S_ISREG((ip)->i_d.di_mode) ? \
(ip)->i_size : (ip)->i_d.di_size;
/* Convert from vfs inode to xfs inode */
static inline struct xfs_inode *XFS_I(struct inode *inode)
{
......@@ -277,6 +262,18 @@ static inline struct inode *VFS_I(struct xfs_inode *ip)
return &ip->i_vnode;
}
/*
* For regular files we only update the on-disk filesize when actually
* writing data back to disk. Until then only the copy in the VFS inode
* is uptodate.
*/
static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip)
{
if (S_ISREG(ip->i_d.di_mode))
return i_size_read(VFS_I(ip));
return ip->i_d.di_size;
}
/*
* i_flags helper functions
*/
......@@ -331,6 +328,19 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
return ret;
}
static inline int
xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags)
{
int ret;
spin_lock(&ip->i_flags_lock);
ret = ip->i_flags & flags;
if (!ret)
ip->i_flags |= flags;
spin_unlock(&ip->i_flags_lock);
return ret;
}
/*
* Project quota id helpers (previously projid was 16bit only
* and using two 16bit values to hold new 32bit projid was chosen
......@@ -350,36 +360,20 @@ xfs_set_projid(struct xfs_inode *ip,
ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
}
/*
* Manage the i_flush queue embedded in the inode. This completion
* queue synchronizes processes attempting to flush the in-core
* inode back to disk.
*/
static inline void xfs_iflock(xfs_inode_t *ip)
{
wait_for_completion(&ip->i_flush);
}
static inline int xfs_iflock_nowait(xfs_inode_t *ip)
{
return try_wait_for_completion(&ip->i_flush);
}
static inline void xfs_ifunlock(xfs_inode_t *ip)
{
complete(&ip->i_flush);
}
/*
* In-core inode flags.
*/
#define XFS_IRECLAIM 0x0001 /* started reclaiming this inode */
#define XFS_ISTALE 0x0002 /* inode has been staled */
#define XFS_IRECLAIMABLE 0x0004 /* inode can be reclaimed */
#define XFS_INEW 0x0008 /* inode has just been allocated */
#define XFS_IFILESTREAM 0x0010 /* inode is in a filestream directory */
#define XFS_ITRUNCATED 0x0020 /* truncated down so flush-on-close */
#define XFS_IDIRTY_RELEASE 0x0040 /* dirty release already seen */
#define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */
#define XFS_ISTALE (1 << 1) /* inode has been staled */
#define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */
#define XFS_INEW (1 << 3) /* inode has just been allocated */
#define XFS_IFILESTREAM (1 << 4) /* inode is in a filestream dir. */
#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */
#define XFS_IFLOCK (1 << __XFS_IFLOCK_BIT)
#define __XFS_IPINNED_BIT 8 /* wakeup key for zero pin count */
#define XFS_IPINNED (1 << __XFS_IPINNED_BIT)
/*
* Per-lifetime flags need to be reset when re-using a reclaimable inode during
......@@ -391,6 +385,34 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \
XFS_IFILESTREAM);
/*
* Synchronize processes attempting to flush the in-core inode back to disk.
*/
extern void __xfs_iflock(struct xfs_inode *ip);
static inline int xfs_iflock_nowait(struct xfs_inode *ip)
{
return !xfs_iflags_test_and_set(ip, XFS_IFLOCK);
}
static inline void xfs_iflock(struct xfs_inode *ip)
{
if (!xfs_iflock_nowait(ip))
__xfs_iflock(ip);
}
static inline void xfs_ifunlock(struct xfs_inode *ip)
{
xfs_iflags_clear(ip, XFS_IFLOCK);
wake_up_bit(&ip->i_flags, __XFS_IFLOCK_BIT);
}
static inline int xfs_isiflocked(struct xfs_inode *ip)
{
return xfs_iflags_test(ip, XFS_IFLOCK);
}
/*
* Flags for inode locking.
* Bit ranges: 1<<1 - 1<<16-1 -- iolock/ilock modes (bitfield)
......@@ -491,8 +513,6 @@ int xfs_ifree(struct xfs_trans *, xfs_inode_t *,
struct xfs_bmap_free *);
int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
int, xfs_fsize_t);
int xfs_itruncate_data(struct xfs_trans **, struct xfs_inode *,
xfs_fsize_t);
int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
void xfs_iext_realloc(xfs_inode_t *, int, int);
......
......@@ -79,8 +79,6 @@ xfs_inode_item_size(
break;
case XFS_DINODE_FMT_BTREE:
ASSERT(ip->i_df.if_ext_max ==
XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t));
iip->ili_format.ilf_fields &=
~(XFS_ILOG_DDATA | XFS_ILOG_DEXT |
XFS_ILOG_DEV | XFS_ILOG_UUID);
......@@ -557,7 +555,7 @@ xfs_inode_item_unpin(
trace_xfs_inode_unpin(ip, _RET_IP_);
ASSERT(atomic_read(&ip->i_pincount) > 0);
if (atomic_dec_and_test(&ip->i_pincount))
wake_up(&ip->i_ipin_wait);
wake_up_bit(&ip->i_flags, __XFS_IPINNED_BIT);
}
/*
......@@ -719,7 +717,7 @@ xfs_inode_item_pushbuf(
* If a flush is not in progress anymore, chances are that the
* inode was taken off the AIL. So, just get out.
*/
if (completion_done(&ip->i_flush) ||
if (!xfs_isiflocked(ip) ||
!(lip->li_flags & XFS_LI_IN_AIL)) {
xfs_iunlock(ip, XFS_ILOCK_SHARED);
return true;
......@@ -752,7 +750,7 @@ xfs_inode_item_push(
struct xfs_inode *ip = iip->ili_inode;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
ASSERT(!completion_done(&ip->i_flush));
ASSERT(xfs_isiflocked(ip));
/*
* Since we were able to lock the inode's flush lock and
......
......@@ -57,26 +57,26 @@ xfs_iomap_eof_align_last_fsb(
xfs_fileoff_t *last_fsb)
{
xfs_fileoff_t new_last_fsb = 0;
xfs_extlen_t align;
xfs_extlen_t align = 0;
int eof, error;
if (XFS_IS_REALTIME_INODE(ip))
;
/*
* If mounted with the "-o swalloc" option, roundup the allocation
* request to a stripe width boundary if the file size is >=
* stripe width and we are allocating past the allocation eof.
*/
else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
(ip->i_size >= XFS_FSB_TO_B(mp, mp->m_swidth)))
new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
/*
* Roundup the allocation request to a stripe unit (m_dalign) boundary
* if the file size is >= stripe unit size, and we are allocating past
* the allocation eof.
*/
else if (mp->m_dalign && (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_dalign)))
new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
if (!XFS_IS_REALTIME_INODE(ip)) {
/*
* Round up the allocation request to a stripe unit
* (m_dalign) boundary if the file size is >= stripe unit
* size, and we are allocating past the allocation eof.
*
* If mounted with the "-o swalloc" option the alignment is
* increased from the strip unit size to the stripe width.
*/
if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC))
align = mp->m_swidth;
else if (mp->m_dalign)
align = mp->m_dalign;
if (align && XFS_ISIZE(ip) >= XFS_FSB_TO_B(mp, align))
new_last_fsb = roundup_64(*last_fsb, align);
}
/*
* Always round up the allocation request to an extent boundary
......@@ -154,7 +154,7 @@ xfs_iomap_write_direct(
offset_fsb = XFS_B_TO_FSBT(mp, offset);
last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
if ((offset + count) > ip->i_size) {
if ((offset + count) > XFS_ISIZE(ip)) {
error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
if (error)
goto error_out;
......@@ -211,7 +211,7 @@ xfs_iomap_write_direct(
xfs_trans_ijoin(tp, ip, 0);
bmapi_flag = 0;
if (offset < ip->i_size || extsz)
if (offset < XFS_ISIZE(ip) || extsz)
bmapi_flag |= XFS_BMAPI_PREALLOC;
/*
......@@ -286,7 +286,7 @@ xfs_iomap_eof_want_preallocate(
int found_delalloc = 0;
*prealloc = 0;
if ((offset + count) <= ip->i_size)
if (offset + count <= XFS_ISIZE(ip))
return 0;
/*
......@@ -340,7 +340,7 @@ xfs_iomap_prealloc_size(
* if we pass in alloc_blocks = 0. Hence the "+ 1" to
* ensure we always pass in a non-zero value.
*/
alloc_blocks = XFS_B_TO_FSB(mp, ip->i_size) + 1;
alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1;
alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
rounddown_pow_of_two(alloc_blocks));
......@@ -564,7 +564,7 @@ xfs_iomap_write_allocate(
* back....
*/
nimaps = 1;
end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
error = xfs_bmap_last_offset(NULL, ip, &last_block,
XFS_DATA_FORK);
if (error)
......
......@@ -750,6 +750,7 @@ xfs_setattr_size(
struct xfs_mount *mp = ip->i_mount;
struct inode *inode = VFS_I(ip);
int mask = iattr->ia_valid;
xfs_off_t oldsize, newsize;
struct xfs_trans *tp;
int error;
uint lock_flags;
......@@ -777,11 +778,13 @@ xfs_setattr_size(
lock_flags |= XFS_IOLOCK_EXCL;
xfs_ilock(ip, lock_flags);
oldsize = inode->i_size;
newsize = iattr->ia_size;
/*
* Short circuit the truncate case for zero length files.
*/
if (iattr->ia_size == 0 &&
ip->i_size == 0 && ip->i_d.di_nextents == 0) {
if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) {
if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
goto out_unlock;
......@@ -807,14 +810,14 @@ xfs_setattr_size(
* the inode to the transaction, because the inode cannot be unlocked
* once it is a part of the transaction.
*/
if (iattr->ia_size > ip->i_size) {
if (newsize > oldsize) {
/*
* Do the first part of growing a file: zero any data in the
* last block that is beyond the old EOF. We need to do this
* before the inode is joined to the transaction to modify
* i_size.
*/
error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
error = xfs_zero_eof(ip, newsize, oldsize);
if (error)
goto out_unlock;
}
......@@ -833,8 +836,8 @@ xfs_setattr_size(
* here and prevents waiting for other data not within the range we
* care about here.
*/
if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, 0,
if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) {
error = xfs_flush_pages(ip, ip->i_d.di_size, newsize, 0,
FI_NONE);
if (error)
goto out_unlock;
......@@ -845,8 +848,7 @@ xfs_setattr_size(
*/
inode_dio_wait(inode);
error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
xfs_get_blocks);
error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
if (error)
goto out_unlock;
......@@ -857,7 +859,7 @@ xfs_setattr_size(
if (error)
goto out_trans_cancel;
truncate_setsize(inode, iattr->ia_size);
truncate_setsize(inode, newsize);
commit_flags = XFS_TRANS_RELEASE_LOG_RES;
lock_flags |= XFS_ILOCK_EXCL;
......@@ -876,19 +878,29 @@ xfs_setattr_size(
* these flags set. For all other operations the VFS set these flags
* explicitly if it wants a timestamp update.
*/
if (iattr->ia_size != ip->i_size &&
(!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
iattr->ia_ctime = iattr->ia_mtime =
current_fs_time(inode->i_sb);
mask |= ATTR_CTIME | ATTR_MTIME;
}
if (iattr->ia_size > ip->i_size) {
ip->i_d.di_size = iattr->ia_size;
ip->i_size = iattr->ia_size;
} else if (iattr->ia_size <= ip->i_size ||
(iattr->ia_size == 0 && ip->i_d.di_nextents)) {
error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
/*
* The first thing we do is set the size to new_size permanently on
* disk. This way we don't have to worry about anyone ever being able
* to look at the data being freed even in the face of a crash.
* What we're getting around here is the case where we free a block, it
* is allocated to another file, it is written to, and then we crash.
* If the new data gets written to the file but the log buffers
* containing the free and reallocation don't, then we'd end up with
* garbage in the blocks being freed. As long as we make the new size
* permanent before actually freeing any blocks it doesn't matter if
* they get written to.
*/
ip->i_d.di_size = newsize;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
if (newsize <= oldsize) {
error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, newsize);
if (error)
goto out_trans_abort;
......
......@@ -31,6 +31,7 @@
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_itable.h"
#include "xfs_bmap.h"
#include "xfs_rtalloc.h"
......@@ -263,13 +264,18 @@ xfs_qm_scall_trunc_qfile(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
error = xfs_itruncate_data(&tp, ip, 0);
ip->i_d.di_size = 0;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
if (error) {
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
XFS_TRANS_ABORT);
goto out_unlock;
}
ASSERT(ip->i_d.di_nextents == 0);
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
......
......@@ -828,14 +828,6 @@ xfs_fs_inode_init_once(
/* xfs inode */
atomic_set(&ip->i_pincount, 0);
spin_lock_init(&ip->i_flags_lock);
init_waitqueue_head(&ip->i_ipin_wait);
/*
* Because we want to use a counting completion, complete
* the flush completion once to allow a single access to
* the flush completion without blocking.
*/
init_completion(&ip->i_flush);
complete(&ip->i_flush);
mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
"xfsino", ip->i_ino);
......
......@@ -707,14 +707,13 @@ xfs_reclaim_inode_grab(
return 1;
/*
* do some unlocked checks first to avoid unnecessary lock traffic.
* The first is a flush lock check, the second is a already in reclaim
* check. Only do these checks if we are not going to block on locks.
* If we are asked for non-blocking operation, do unlocked checks to
* see if the inode already is being flushed or in reclaim to avoid
* lock traffic.
*/
if ((flags & SYNC_TRYLOCK) &&
(!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
__xfs_iflags_test(ip, XFS_IFLOCK | XFS_IRECLAIM))
return 1;
}
/*
* The radix tree lock here protects a thread in xfs_iget from racing
......
......@@ -891,7 +891,6 @@ DECLARE_EVENT_CLASS(xfs_file_class,
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(xfs_fsize_t, size)
__field(xfs_fsize_t, new_size)
__field(loff_t, offset)
__field(size_t, count)
__field(int, flags)
......@@ -900,17 +899,15 @@ DECLARE_EVENT_CLASS(xfs_file_class,
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
__entry->size = ip->i_d.di_size;
__entry->new_size = ip->i_new_size;
__entry->offset = offset;
__entry->count = count;
__entry->flags = flags;
),
TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
TP_printk("dev %d:%d ino 0x%llx size 0x%llx "
"offset 0x%llx count 0x%zx ioflags %s",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->size,
__entry->new_size,
__entry->offset,
__entry->count,
__print_flags(__entry->flags, "|", XFS_IO_FLAGS))
......@@ -978,7 +975,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(loff_t, size)
__field(loff_t, new_size)
__field(loff_t, offset)
__field(size_t, count)
__field(int, type)
......@@ -990,7 +986,6 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
__entry->size = ip->i_d.di_size;
__entry->new_size = ip->i_new_size;
__entry->offset = offset;
__entry->count = count;
__entry->type = type;
......@@ -998,13 +993,11 @@ DECLARE_EVENT_CLASS(xfs_imap_class,
__entry->startblock = irec ? irec->br_startblock : 0;
__entry->blockcount = irec ? irec->br_blockcount : 0;
),
TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
"offset 0x%llx count %zd type %s "
"startoff 0x%llx startblock %lld blockcount 0x%llx",
TP_printk("dev %d:%d ino 0x%llx size 0x%llx offset 0x%llx count %zd "
"type %s startoff 0x%llx startblock %lld blockcount 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->size,
__entry->new_size,
__entry->offset,
__entry->count,
__print_symbolic(__entry->type, XFS_IO_TYPES),
......@@ -1031,26 +1024,23 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
__field(xfs_ino_t, ino)
__field(loff_t, isize)
__field(loff_t, disize)
__field(loff_t, new_size)
__field(loff_t, offset)
__field(size_t, count)
),
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
__entry->isize = ip->i_size;
__entry->isize = VFS_I(ip)->i_size;
__entry->disize = ip->i_d.di_size;
__entry->new_size = ip->i_new_size;
__entry->offset = offset;
__entry->count = count;
),
TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx "
TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx "
"offset 0x%llx count %zd",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->isize,
__entry->disize,
__entry->new_size,
__entry->offset,
__entry->count)
);
......@@ -1090,8 +1080,8 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class,
DEFINE_EVENT(xfs_itrunc_class, name, \
TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
TP_ARGS(ip, new_size))
DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start);
DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end);
DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_start);
DEFINE_ITRUNC_EVENT(xfs_itruncate_extents_end);
TRACE_EVENT(xfs_pagecache_inval,
TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
......@@ -1568,7 +1558,6 @@ DECLARE_EVENT_CLASS(xfs_swap_extent_class,
__field(xfs_ino_t, ino)
__field(int, format)
__field(int, nex)
__field(int, max_nex)
__field(int, broot_size)
__field(int, fork_off)
),
......@@ -1578,18 +1567,16 @@ DECLARE_EVENT_CLASS(xfs_swap_extent_class,
__entry->ino = ip->i_ino;
__entry->format = ip->i_d.di_format;
__entry->nex = ip->i_d.di_nextents;
__entry->max_nex = ip->i_df.if_ext_max;
__entry->broot_size = ip->i_df.if_broot_bytes;
__entry->fork_off = XFS_IFORK_BOFF(ip);
),
TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
"Max in-fork extents %d, broot size %d, fork offset %d",
"broot size %d, fork offset %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
__print_symbolic(__entry->format, XFS_INODE_FORMAT_STR),
__entry->nex,
__entry->max_nex,
__entry->broot_size,
__entry->fork_off)
)
......
......@@ -175,7 +175,7 @@ xfs_free_eofblocks(
* Figure out if there are any blocks beyond the end
* of the file. If not, then there is nothing to do.
*/
end_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)ip->i_size));
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
if (last_fsb <= end_fsb)
return 0;
......@@ -226,7 +226,14 @@ xfs_free_eofblocks(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
error = xfs_itruncate_data(&tp, ip, ip->i_size);
/*
* Do not update the on-disk file size. If we update the
* on-disk file size and then the system crashes before the
* contents of the file are flushed to disk then the files
* may be full of holes (ie NULL files bug).
*/
error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK,
XFS_ISIZE(ip));
if (error) {
/*
* If we get an error at this point we simply don't
......@@ -540,8 +547,8 @@ xfs_release(
return 0;
if ((S_ISREG(ip->i_d.di_mode) &&
((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
ip->i_delayed_blks > 0)) &&
(VFS_I(ip)->i_size > 0 ||
(VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) &&
(ip->i_df.if_flags & XFS_IFEXTENTS)) &&
(!(ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))) {
......@@ -618,7 +625,7 @@ xfs_inactive(
* only one with a reference to the inode.
*/
truncate = ((ip->i_d.di_nlink == 0) &&
((ip->i_d.di_size != 0) || (ip->i_size != 0) ||
((ip->i_d.di_size != 0) || XFS_ISIZE(ip) != 0 ||
(ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
S_ISREG(ip->i_d.di_mode));
......@@ -632,12 +639,12 @@ xfs_inactive(
if (ip->i_d.di_nlink != 0) {
if ((S_ISREG(ip->i_d.di_mode) &&
((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
ip->i_delayed_blks > 0)) &&
(ip->i_df.if_flags & XFS_IFEXTENTS) &&
(!(ip->i_d.di_flags &
(VFS_I(ip)->i_size > 0 ||
(VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) &&
(ip->i_df.if_flags & XFS_IFEXTENTS) &&
(!(ip->i_d.di_flags &
(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
(ip->i_delayed_blks != 0)))) {
ip->i_delayed_blks != 0))) {
error = xfs_free_eofblocks(mp, ip, 0);
if (error)
return VN_INACTIVE_CACHE;
......@@ -670,13 +677,18 @@ xfs_inactive(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
error = xfs_itruncate_data(&tp, ip, 0);
ip->i_d.di_size = 0;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
if (error) {
xfs_trans_cancel(tp,
XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
return VN_INACTIVE_CACHE;
}
ASSERT(ip->i_d.di_nextents == 0);
} else if (S_ISLNK(ip->i_d.di_mode)) {
/*
......@@ -1961,11 +1973,11 @@ xfs_zero_remaining_bytes(
* since nothing can read beyond eof. The space will
* be zeroed when the file is extended anyway.
*/
if (startoff >= ip->i_size)
if (startoff >= XFS_ISIZE(ip))
return 0;
if (endoff > ip->i_size)
endoff = ip->i_size;
if (endoff > XFS_ISIZE(ip))
endoff = XFS_ISIZE(ip);
bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp,
......@@ -2260,7 +2272,7 @@ xfs_change_file_space(
bf->l_start += offset;
break;
case 2: /*SEEK_END*/
bf->l_start += ip->i_size;
bf->l_start += XFS_ISIZE(ip);
break;
default:
return XFS_ERROR(EINVAL);
......@@ -2277,7 +2289,7 @@ xfs_change_file_space(
bf->l_whence = 0;
startoffset = bf->l_start;
fsize = ip->i_size;
fsize = XFS_ISIZE(ip);
/*
* XFS_IOC_RESVSP and XFS_IOC_UNRESVSP will reserve or unreserve
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment