Commit 66ce3cf8 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs

* 'for-linus' of git://oss.sgi.com/xfs/xfs: (21 commits)
  xfs: return inode fork offset in bulkstat for fsr
  xfs: Increase the default size of the reserved blocks pool
  xfs: truncate delalloc extents when IO fails in writeback
  xfs: check for more work before sleeping in xfssyncd
  xfs: Fix a build warning in xfs_aops.c
  xfs: fix locking for inode cache radix tree tag updates
  xfs: remove xfs_ipin/xfs_iunpin
  xfs: cleanup xfs_iunpin_wait/xfs_iunpin_nowait
  xfs: kill xfs_lrw.h
  xfs: factor common xfs_trans_bjoin code
  xfs: stop passing opaque handles to xfs_log.c routines
  xfs: split xfs_bmap_btalloc
  xfs: fix xfs_fsblock_t tracing
  xfs: fix inode pincount check in fsync
  xfs: Non-blocking inode locking in IO completion
  xfs: implement optimized fdatasync
  xfs: remove wrapper for the fsync file operation
  xfs: remove wrappers for read/write file operations
  xfs: merge xfs_lrw.c into xfs_file.c
  xfs: fix dquota trace format
  ...
parents 05c5cb31 9b1f56d6
......@@ -105,7 +105,6 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
xfs_globals.o \
xfs_ioctl.o \
xfs_iops.o \
xfs_lrw.o \
xfs_super.o \
xfs_sync.o \
xfs_xattr.o)
......
......@@ -39,6 +39,7 @@
#include "xfs_iomap.h"
#include "xfs_vnodeops.h"
#include "xfs_trace.h"
#include "xfs_bmap.h"
#include <linux/mpage.h>
#include <linux/pagevec.h>
#include <linux/writeback.h>
......@@ -163,14 +164,17 @@ xfs_ioend_new_eof(
}
/*
* Update on-disk file size now that data has been written to disk.
* The current in-memory file size is i_size. If a write is beyond
* eof i_new_size will be the intended file size until i_size is
* updated. If this write does not extend all the way to the valid
* file size then restrict this update to the end of the write.
* Update on-disk file size now that data has been written to disk. The
* current in-memory file size is i_size. If a write is beyond eof i_new_size
* will be the intended file size until i_size is updated. If this write does
* not extend all the way to the valid file size then restrict this update to
* the end of the write.
*
* This function does not block as blocking on the inode lock in IO completion
* can lead to IO completion order dependency deadlocks.. If it can't get the
* inode ilock it will return EAGAIN. Callers must handle this.
*/
STATIC void
STATIC int
xfs_setfilesize(
xfs_ioend_t *ioend)
{
......@@ -181,16 +185,40 @@ xfs_setfilesize(
ASSERT(ioend->io_type != IOMAP_READ);
if (unlikely(ioend->io_error))
return;
return 0;
if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
return EAGAIN;
xfs_ilock(ip, XFS_ILOCK_EXCL);
isize = xfs_ioend_new_eof(ioend);
if (isize) {
ip->i_d.di_size = isize;
xfs_mark_inode_dirty_sync(ip);
xfs_mark_inode_dirty(ip);
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
return 0;
}
/*
* Schedule IO completion handling on a xfsdatad if this was
* the final hold on this ioend. If we are asked to wait,
* flush the workqueue.
*/
STATIC void
xfs_finish_ioend(
xfs_ioend_t *ioend,
int wait)
{
if (atomic_dec_and_test(&ioend->io_remaining)) {
struct workqueue_struct *wq;
wq = (ioend->io_type == IOMAP_UNWRITTEN) ?
xfsconvertd_workqueue : xfsdatad_workqueue;
queue_work(wq, &ioend->io_work);
if (wait)
flush_workqueue(wq);
}
}
/*
......@@ -200,9 +228,9 @@ STATIC void
xfs_end_io(
struct work_struct *work)
{
xfs_ioend_t *ioend =
container_of(work, xfs_ioend_t, io_work);
xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work);
struct xfs_inode *ip = XFS_I(ioend->io_inode);
int error = 0;
/*
* For unwritten extents we need to issue transactions to convert a
......@@ -210,7 +238,6 @@ xfs_end_io(
*/
if (ioend->io_type == IOMAP_UNWRITTEN &&
likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
int error;
error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
ioend->io_size);
......@@ -222,30 +249,23 @@ xfs_end_io(
* We might have to update the on-disk file size after extending
* writes.
*/
if (ioend->io_type != IOMAP_READ)
xfs_setfilesize(ioend);
xfs_destroy_ioend(ioend);
}
if (ioend->io_type != IOMAP_READ) {
error = xfs_setfilesize(ioend);
ASSERT(!error || error == EAGAIN);
}
/*
* Schedule IO completion handling on a xfsdatad if this was
* the final hold on this ioend. If we are asked to wait,
* flush the workqueue.
/*
* If we didn't complete processing of the ioend, requeue it to the
* tail of the workqueue for another attempt later. Otherwise destroy
* it.
*/
STATIC void
xfs_finish_ioend(
xfs_ioend_t *ioend,
int wait)
{
if (atomic_dec_and_test(&ioend->io_remaining)) {
struct workqueue_struct *wq;
wq = (ioend->io_type == IOMAP_UNWRITTEN) ?
xfsconvertd_workqueue : xfsdatad_workqueue;
queue_work(wq, &ioend->io_work);
if (wait)
flush_workqueue(wq);
}
if (error == EAGAIN) {
atomic_inc(&ioend->io_remaining);
xfs_finish_ioend(ioend, 0);
/* ensure we don't spin on blocked ioends */
delay(1);
} else
xfs_destroy_ioend(ioend);
}
/*
......@@ -341,7 +361,7 @@ xfs_submit_ioend_bio(
* but don't update the inode size until I/O completion.
*/
if (xfs_ioend_new_eof(ioend))
xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode));
xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
submit_bio(wbc->sync_mode == WB_SYNC_ALL ?
WRITE_SYNC_PLUG : WRITE, bio);
......@@ -874,6 +894,118 @@ xfs_cluster_write(
}
}
STATIC void
xfs_vm_invalidatepage(
struct page *page,
unsigned long offset)
{
trace_xfs_invalidatepage(page->mapping->host, page, offset);
block_invalidatepage(page, offset);
}
/*
* If the page has delalloc buffers on it, we need to punch them out before we
* invalidate the page. If we don't, we leave a stale delalloc mapping on the
* inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
* is done on that same region - the delalloc extent is returned when none is
* supposed to be there.
*
* We prevent this by truncating away the delalloc regions on the page before
* invalidating it. Because they are delalloc, we can do this without needing a
* transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
* truncation without a transaction as there is no space left for block
* reservation (typically why we see a ENOSPC in writeback).
*
* This is not a performance critical path, so for now just do the punching a
* buffer head at a time.
*/
STATIC void
xfs_aops_discard_page(
struct page *page)
{
struct inode *inode = page->mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct buffer_head *bh, *head;
loff_t offset = page_offset(page);
ssize_t len = 1 << inode->i_blkbits;
if (!xfs_is_delayed_page(page, IOMAP_DELAY))
goto out_invalidate;
xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
"page discard on page %p, inode 0x%llx, offset %llu.",
page, ip->i_ino, offset);
xfs_ilock(ip, XFS_ILOCK_EXCL);
bh = head = page_buffers(page);
do {
int done;
xfs_fileoff_t offset_fsb;
xfs_bmbt_irec_t imap;
int nimaps = 1;
int error;
xfs_fsblock_t firstblock;
xfs_bmap_free_t flist;
if (!buffer_delay(bh))
goto next_buffer;
offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
/*
* Map the range first and check that it is a delalloc extent
* before trying to unmap the range. Otherwise we will be
* trying to remove a real extent (which requires a
* transaction) or a hole, which is probably a bad idea...
*/
error = xfs_bmapi(NULL, ip, offset_fsb, 1,
XFS_BMAPI_ENTIRE, NULL, 0, &imap,
&nimaps, NULL, NULL);
if (error) {
/* something screwed, just bail */
xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
"page discard failed delalloc mapping lookup.");
break;
}
if (!nimaps) {
/* nothing there */
goto next_buffer;
}
if (imap.br_startblock != DELAYSTARTBLOCK) {
/* been converted, ignore */
goto next_buffer;
}
WARN_ON(imap.br_blockcount == 0);
/*
* Note: while we initialise the firstblock/flist pair, they
* should never be used because blocks should never be
* allocated or freed for a delalloc extent and hence we need
* don't cancel or finish them after the xfs_bunmapi() call.
*/
xfs_bmap_init(&flist, &firstblock);
error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
&flist, NULL, &done);
ASSERT(!flist.xbf_count && !flist.xbf_first);
if (error) {
/* something screwed, just bail */
xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
"page discard unable to remove delalloc mapping.");
break;
}
next_buffer:
offset += len;
} while ((bh = bh->b_this_page) != head);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
out_invalidate:
xfs_vm_invalidatepage(page, 0);
return;
}
/*
* Calling this without startio set means we are being asked to make a dirty
* page ready for freeing it's buffers. When called with startio set then
......@@ -1125,7 +1257,7 @@ xfs_page_state_convert(
*/
if (err != -EAGAIN) {
if (!unmapped)
block_invalidatepage(page, 0);
xfs_aops_discard_page(page);
ClearPageUptodate(page);
}
return err;
......@@ -1535,15 +1667,6 @@ xfs_vm_readpages(
return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
}
STATIC void
xfs_vm_invalidatepage(
struct page *page,
unsigned long offset)
{
trace_xfs_invalidatepage(page->mapping->host, page, offset);
block_invalidatepage(page, offset);
}
const struct address_space_operations xfs_address_space_operations = {
.readpage = xfs_vm_readpage,
.readpages = xfs_vm_readpages,
......
This diff is collapsed.
......@@ -91,6 +91,16 @@ xfs_mark_inode_dirty_sync(
mark_inode_dirty_sync(inode);
}
void
xfs_mark_inode_dirty(
xfs_inode_t *ip)
{
struct inode *inode = VFS_I(ip);
if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
mark_inode_dirty(inode);
}
/*
* Change the requested timestamp in the given inode.
* We don't lock across timestamp updates, and we don't log them but
......
......@@ -88,7 +88,6 @@
#include <xfs_super.h>
#include <xfs_globals.h>
#include <xfs_fs_subr.h>
#include <xfs_lrw.h>
#include <xfs_buf.h>
/*
......
This diff is collapsed.
/*
* Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_LRW_H__
#define __XFS_LRW_H__
struct xfs_mount;
struct xfs_inode;
struct xfs_buf;
extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
extern int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
#endif /* __XFS_LRW_H__ */
......@@ -607,6 +607,7 @@ xfssyncd(
set_freezable();
timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10);
for (;;) {
if (list_empty(&mp->m_sync_list))
timeleft = schedule_timeout_interruptible(timeleft);
/* swsusp */
try_to_freeze();
......@@ -627,8 +628,7 @@ xfssyncd(
list_add_tail(&mp->m_sync_work.w_list,
&mp->m_sync_list);
}
list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list)
list_move(&work->w_list, &tmp);
list_splice_init(&mp->m_sync_list, &tmp);
spin_unlock(&mp->m_sync_lock);
list_for_each_entry_safe(work, n, &tmp, w_list) {
......@@ -688,12 +688,12 @@ xfs_inode_set_reclaim_tag(
struct xfs_perag *pag;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
read_lock(&pag->pag_ici_lock);
write_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
__xfs_inode_set_reclaim_tag(pag, ip);
__xfs_iflags_set(ip, XFS_IRECLAIMABLE);
spin_unlock(&ip->i_flags_lock);
read_unlock(&pag->pag_ici_lock);
write_unlock(&pag->pag_ici_lock);
xfs_perag_put(pag);
}
......
......@@ -51,22 +51,6 @@
#include "quota/xfs_dquot_item.h"
#include "quota/xfs_dquot.h"
/*
* Format fsblock number into a static buffer & return it.
*/
STATIC char *xfs_fmtfsblock(xfs_fsblock_t bno)
{
static char rval[50];
if (bno == NULLFSBLOCK)
sprintf(rval, "NULLFSBLOCK");
else if (isnullstartblock(bno))
sprintf(rval, "NULLSTARTBLOCK(%lld)", startblockval(bno));
else
sprintf(rval, "%lld", (xfs_dfsbno_t)bno);
return rval;
}
/*
* We include this last to have the helpers above available for the trace
* event implementations.
......
......@@ -197,13 +197,13 @@ TRACE_EVENT(xfs_iext_insert,
__entry->caller_ip = caller_ip;
),
TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
"offset %lld block %s count %lld flag %d caller %pf",
"offset %lld block %lld count %lld flag %d caller %pf",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
(long)__entry->idx,
__entry->startoff,
xfs_fmtfsblock(__entry->startblock),
(__int64_t)__entry->startblock,
__entry->blockcount,
__entry->state,
(char *)__entry->caller_ip)
......@@ -241,13 +241,13 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
__entry->caller_ip = caller_ip;
),
TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
"offset %lld block %s count %lld flag %d caller %pf",
"offset %lld block %lld count %lld flag %d caller %pf",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
(long)__entry->idx,
__entry->startoff,
xfs_fmtfsblock(__entry->startblock),
(__int64_t)__entry->startblock,
__entry->blockcount,
__entry->state,
(char *)__entry->caller_ip)
......@@ -593,7 +593,7 @@ DECLARE_EVENT_CLASS(xfs_dquot_class,
TP_ARGS(dqp),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(__be32, id)
__field(u32, id)
__field(unsigned, flags)
__field(unsigned, nrefs)
__field(unsigned long long, res_bcount)
......@@ -606,7 +606,7 @@ DECLARE_EVENT_CLASS(xfs_dquot_class,
), \
TP_fast_assign(
__entry->dev = dqp->q_mount->m_super->s_dev;
__entry->id = dqp->q_core.d_id;
__entry->id = be32_to_cpu(dqp->q_core.d_id);
__entry->flags = dqp->dq_flags;
__entry->nrefs = dqp->q_nrefs;
__entry->res_bcount = dqp->q_res_bcount;
......@@ -622,10 +622,10 @@ DECLARE_EVENT_CLASS(xfs_dquot_class,
be64_to_cpu(dqp->q_core.d_ino_softlimit);
),
TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
"bcnt 0x%llx [hard 0x%llx | soft 0x%llx] "
"icnt 0x%llx [hard 0x%llx | soft 0x%llx]",
"bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx "
"icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]",
MAJOR(__entry->dev), MINOR(__entry->dev),
be32_to_cpu(__entry->id),
__entry->id,
__print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
__entry->nrefs,
__entry->res_bcount,
......@@ -881,7 +881,7 @@ TRACE_EVENT(name, \
), \
TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
"offset 0x%llx count %zd flags %s " \
"startoff 0x%llx startblock %s blockcount 0x%llx", \
"startoff 0x%llx startblock %lld blockcount 0x%llx", \
MAJOR(__entry->dev), MINOR(__entry->dev), \
__entry->ino, \
__entry->size, \
......@@ -890,7 +890,7 @@ TRACE_EVENT(name, \
__entry->count, \
__print_flags(__entry->flags, "|", BMAPI_FLAGS), \
__entry->startoff, \
xfs_fmtfsblock(__entry->startblock), \
(__int64_t)__entry->startblock, \
__entry->blockcount) \
)
DEFINE_IOMAP_EVENT(xfs_iomap_enter);
......
......@@ -2550,106 +2550,56 @@ xfs_bmap_rtalloc(
}
STATIC int
xfs_bmap_btalloc(
xfs_bmalloca_t *ap) /* bmap alloc argument struct */
xfs_bmap_btalloc_nullfb(
struct xfs_bmalloca *ap,
struct xfs_alloc_arg *args,
xfs_extlen_t *blen)
{
xfs_mount_t *mp; /* mount point structure */
xfs_alloctype_t atype = 0; /* type for allocation routines */
xfs_extlen_t align; /* minimum allocation alignment */
xfs_agnumber_t ag;
xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
xfs_agnumber_t startag;
xfs_alloc_arg_t args;
xfs_extlen_t blen;
xfs_extlen_t nextminlen = 0;
xfs_perag_t *pag;
int nullfb; /* true if ap->firstblock isn't set */
int isaligned;
int notinit;
int tryagain;
struct xfs_mount *mp = ap->ip->i_mount;
struct xfs_perag *pag;
xfs_agnumber_t ag, startag;
int notinit = 0;
int error;
mp = ap->ip->i_mount;
align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
if (unlikely(align)) {
error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
align, 0, ap->eof, 0, ap->conv,
&ap->off, &ap->alen);
ASSERT(!error);
ASSERT(ap->alen);
}
nullfb = ap->firstblock == NULLFSBLOCK;
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
if (nullfb) {
if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
ag = xfs_filestream_lookup_ag(ap->ip);
ag = (ag != NULLAGNUMBER) ? ag : 0;
ap->rval = XFS_AGB_TO_FSB(mp, ag, 0);
} else {
ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
}
} else
ap->rval = ap->firstblock;
xfs_bmap_adjacent(ap);
/*
* If allowed, use ap->rval; otherwise must use firstblock since
* it's in the right allocation group.
*/
if (nullfb || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno)
;
else
ap->rval = ap->firstblock;
/*
* Normal allocation, done through xfs_alloc_vextent.
*/
tryagain = isaligned = 0;
args.tp = ap->tp;
args.mp = mp;
args.fsbno = ap->rval;
args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
args.firstblock = ap->firstblock;
blen = 0;
if (nullfb) {
if (ap->userdata && xfs_inode_is_filestream(ap->ip))
args.type = XFS_ALLOCTYPE_NEAR_BNO;
args->type = XFS_ALLOCTYPE_NEAR_BNO;
else
args.type = XFS_ALLOCTYPE_START_BNO;
args.total = ap->total;
args->type = XFS_ALLOCTYPE_START_BNO;
args->total = ap->total;
/*
* Search for an allocation group with a single extent
* large enough for the request.
*
* If one isn't found, then adjust the minimum allocation
* size to the largest space found.
* Search for an allocation group with a single extent large enough
* for the request. If one isn't found, then adjust the minimum
* allocation size to the largest space found.
*/
startag = ag = XFS_FSB_TO_AGNO(mp, args.fsbno);
startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);
if (startag == NULLAGNUMBER)
startag = ag = 0;
notinit = 0;
pag = xfs_perag_get(mp, ag);
while (blen < ap->alen) {
if (!pag->pagf_init &&
(error = xfs_alloc_pagf_init(mp, args.tp,
ag, XFS_ALLOC_FLAG_TRYLOCK))) {
while (*blen < ap->alen) {
if (!pag->pagf_init) {
error = xfs_alloc_pagf_init(mp, args->tp, ag,
XFS_ALLOC_FLAG_TRYLOCK);
if (error) {
xfs_perag_put(pag);
return error;
}
}
/*
* See xfs_alloc_fix_freelist...
*/
if (pag->pagf_init) {
xfs_extlen_t longest;
longest = xfs_alloc_longest_free_extent(mp, pag);
if (blen < longest)
blen = longest;
if (*blen < longest)
*blen = longest;
} else
notinit = 1;
if (xfs_inode_is_filestream(ap->ip)) {
if (blen >= ap->alen)
if (*blen >= ap->alen)
break;
if (ap->userdata) {
......@@ -2684,31 +2634,99 @@ xfs_bmap_btalloc(
pag = xfs_perag_get(mp, ag);
}
xfs_perag_put(pag);
/*
* Since the above loop did a BUF_TRYLOCK, it is
* possible that there is space for this request.
*/
if (notinit || blen < ap->minlen)
args.minlen = ap->minlen;
if (notinit || *blen < ap->minlen)
args->minlen = ap->minlen;
/*
* If the best seen length is less than the request
* length, use the best as the minimum.
*/
else if (blen < ap->alen)
args.minlen = blen;
else if (*blen < ap->alen)
args->minlen = *blen;
/*
* Otherwise we've seen an extent as big as alen,
* use that as the minimum.
*/
else
args.minlen = ap->alen;
args->minlen = ap->alen;
/*
* set the failure fallback case to look in the selected
* AG as the stream may have moved.
*/
if (xfs_inode_is_filestream(ap->ip))
ap->rval = args.fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
ap->rval = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
return 0;
}
STATIC int
xfs_bmap_btalloc(
xfs_bmalloca_t *ap) /* bmap alloc argument struct */
{
xfs_mount_t *mp; /* mount point structure */
xfs_alloctype_t atype = 0; /* type for allocation routines */
xfs_extlen_t align; /* minimum allocation alignment */
xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */
xfs_agnumber_t ag;
xfs_alloc_arg_t args;
xfs_extlen_t blen;
xfs_extlen_t nextminlen = 0;
int nullfb; /* true if ap->firstblock isn't set */
int isaligned;
int tryagain;
int error;
mp = ap->ip->i_mount;
align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
if (unlikely(align)) {
error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
align, 0, ap->eof, 0, ap->conv,
&ap->off, &ap->alen);
ASSERT(!error);
ASSERT(ap->alen);
}
nullfb = ap->firstblock == NULLFSBLOCK;
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
if (nullfb) {
if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
ag = xfs_filestream_lookup_ag(ap->ip);
ag = (ag != NULLAGNUMBER) ? ag : 0;
ap->rval = XFS_AGB_TO_FSB(mp, ag, 0);
} else {
ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
}
} else
ap->rval = ap->firstblock;
xfs_bmap_adjacent(ap);
/*
* If allowed, use ap->rval; otherwise must use firstblock since
* it's in the right allocation group.
*/
if (nullfb || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno)
;
else
ap->rval = ap->firstblock;
/*
* Normal allocation, done through xfs_alloc_vextent.
*/
tryagain = isaligned = 0;
args.tp = ap->tp;
args.mp = mp;
args.fsbno = ap->rval;
args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
args.firstblock = ap->firstblock;
blen = 0;
if (nullfb) {
error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
if (error)
return error;
} else if (ap->low) {
if (xfs_inode_is_filestream(ap->ip))
args.type = XFS_ALLOCTYPE_FIRST_AG;
......
......@@ -292,7 +292,8 @@ typedef struct xfs_bstat {
__s32 bs_extents; /* number of extents */
__u32 bs_gen; /* generation count */
__u16 bs_projid; /* project id */
unsigned char bs_pad[14]; /* pad space, unused */
__u16 bs_forkoff; /* inode fork offset in bytes */
unsigned char bs_pad[12]; /* pad space, unused */
__u32 bs_dmevmask; /* DMIG event mask */
__u16 bs_dmstate; /* DMIG state info */
__u16 bs_aextents; /* attribute number of extents */
......
......@@ -190,13 +190,12 @@ xfs_iget_cache_hit(
trace_xfs_iget_reclaim(ip);
/*
* We need to set XFS_INEW atomically with clearing the
* reclaimable tag so that we do have an indicator of the
* inode still being initialized.
* We need to set XFS_IRECLAIM to prevent xfs_reclaim_inode
* from stomping over us while we recycle the inode. We can't
* clear the radix tree reclaimable tag yet as it requires
* pag_ici_lock to be held exclusive.
*/
ip->i_flags |= XFS_INEW;
ip->i_flags &= ~XFS_IRECLAIMABLE;
__xfs_inode_clear_reclaim_tag(mp, pag, ip);
ip->i_flags |= XFS_IRECLAIM;
spin_unlock(&ip->i_flags_lock);
read_unlock(&pag->pag_ici_lock);
......@@ -216,7 +215,15 @@ xfs_iget_cache_hit(
trace_xfs_iget_reclaim(ip);
goto out_error;
}
write_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
ip->i_flags &= ~(XFS_IRECLAIMABLE | XFS_IRECLAIM);
ip->i_flags |= XFS_INEW;
__xfs_inode_clear_reclaim_tag(mp, pag, ip);
inode->i_state = I_NEW;
spin_unlock(&ip->i_flags_lock);
write_unlock(&pag->pag_ici_lock);
} else {
/* If the VFS inode is being torn down, pause and try again. */
if (!igrab(inode)) {
......
......@@ -2439,75 +2439,31 @@ xfs_idestroy_fork(
}
/*
* Increment the pin count of the given buffer.
* This value is protected by ipinlock spinlock in the mount structure.
* This is called to unpin an inode. The caller must have the inode locked
* in at least shared mode so that the buffer cannot be subsequently pinned
* once someone is waiting for it to be unpinned.
*/
void
xfs_ipin(
xfs_inode_t *ip)
{
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
atomic_inc(&ip->i_pincount);
}
/*
* Decrement the pin count of the given inode, and wake up
* anyone in xfs_iwait_unpin() if the count goes to 0. The
* inode must have been previously pinned with a call to xfs_ipin().
*/
void
xfs_iunpin(
xfs_inode_t *ip)
{
ASSERT(atomic_read(&ip->i_pincount) > 0);
if (atomic_dec_and_test(&ip->i_pincount))
wake_up(&ip->i_ipin_wait);
}
/*
* This is called to unpin an inode. It can be directed to wait or to return
* immediately without waiting for the inode to be unpinned. The caller must
* have the inode locked in at least shared mode so that the buffer cannot be
* subsequently pinned once someone is waiting for it to be unpinned.
*/
STATIC void
__xfs_iunpin_wait(
xfs_inode_t *ip,
int wait)
static void
xfs_iunpin_nowait(
struct xfs_inode *ip)
{
xfs_inode_log_item_t *iip = ip->i_itemp;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
if (atomic_read(&ip->i_pincount) == 0)
return;
/* Give the log a push to start the unpinning I/O */
if (iip && iip->ili_last_lsn)
xfs_log_force_lsn(ip->i_mount, iip->ili_last_lsn, 0);
else
xfs_log_force(ip->i_mount, 0);
xfs_log_force_lsn(ip->i_mount, ip->i_itemp->ili_last_lsn, 0);
if (wait)
wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0));
}
void
xfs_iunpin_wait(
xfs_inode_t *ip)
struct xfs_inode *ip)
{
__xfs_iunpin_wait(ip, 1);
}
static inline void
xfs_iunpin_nowait(
xfs_inode_t *ip)
{
__xfs_iunpin_wait(ip, 0);
if (xfs_ipincount(ip)) {
xfs_iunpin_nowait(ip);
wait_event(ip->i_ipin_wait, (xfs_ipincount(ip) == 0));
}
}
/*
* xfs_iextents_copy()
*
......
......@@ -471,8 +471,6 @@ int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
void xfs_iext_realloc(xfs_inode_t *, int, int);
void xfs_ipin(xfs_inode_t *);
void xfs_iunpin(xfs_inode_t *);
void xfs_iunpin_wait(xfs_inode_t *);
int xfs_iflush(xfs_inode_t *, uint);
void xfs_ichgtime(xfs_inode_t *, int);
......@@ -480,6 +478,7 @@ void xfs_lock_inodes(xfs_inode_t **, int, uint);
void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
void xfs_synchronize_times(xfs_inode_t *);
void xfs_mark_inode_dirty(xfs_inode_t *);
void xfs_mark_inode_dirty_sync(xfs_inode_t *);
#define IHOLD(ip) \
......
......@@ -535,23 +535,23 @@ xfs_inode_item_format(
/*
* This is called to pin the inode associated with the inode log
* item in memory so it cannot be written out. Do this by calling
* xfs_ipin() to bump the pin count in the inode while holding the
* inode pin lock.
* item in memory so it cannot be written out.
*/
STATIC void
xfs_inode_item_pin(
xfs_inode_log_item_t *iip)
{
ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
xfs_ipin(iip->ili_inode);
atomic_inc(&iip->ili_inode->i_pincount);
}
/*
* This is called to unpin the inode associated with the inode log
* item which was previously pinned with a call to xfs_inode_item_pin().
* Just call xfs_iunpin() on the inode to do this.
*
* Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
*/
/* ARGSUSED */
STATIC void
......@@ -559,7 +559,11 @@ xfs_inode_item_unpin(
xfs_inode_log_item_t *iip,
int stale)
{
xfs_iunpin(iip->ili_inode);
struct xfs_inode *ip = iip->ili_inode;
ASSERT(atomic_read(&ip->i_pincount) > 0);
if (atomic_dec_and_test(&ip->i_pincount))
wake_up(&ip->i_ipin_wait);
}
/* ARGSUSED */
......@@ -568,7 +572,7 @@ xfs_inode_item_unpin_remove(
xfs_inode_log_item_t *iip,
xfs_trans_t *tp)
{
xfs_iunpin(iip->ili_inode);
xfs_inode_item_unpin(iip, 0);
}
/*
......
......@@ -106,6 +106,7 @@ xfs_bulkstat_one_iget(
buf->bs_dmevmask = dic->di_dmevmask;
buf->bs_dmstate = dic->di_dmstate;
buf->bs_aextents = dic->di_anextents;
buf->bs_forkoff = XFS_IFORK_BOFF(ip);
switch (dic->di_format) {
case XFS_DINODE_FMT_DEV:
......@@ -176,6 +177,7 @@ xfs_bulkstat_one_dinode(
buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask);
buf->bs_dmstate = be16_to_cpu(dic->di_dmstate);
buf->bs_aextents = be16_to_cpu(dic->di_anextents);
buf->bs_forkoff = XFS_DFORK_BOFF(dic);
switch (dic->di_format) {
case XFS_DINODE_FMT_DEV:
......
......@@ -60,7 +60,7 @@ STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
STATIC void xlog_dealloc_log(xlog_t *log);
STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[],
int nentries, xfs_log_ticket_t tic,
int nentries, struct xlog_ticket *tic,
xfs_lsn_t *start_lsn,
xlog_in_core_t **commit_iclog,
uint flags);
......@@ -243,13 +243,13 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
* out when the next write occurs.
*/
xfs_lsn_t
xfs_log_done(xfs_mount_t *mp,
xfs_log_ticket_t xtic,
void **iclog,
xfs_log_done(
struct xfs_mount *mp,
struct xlog_ticket *ticket,
struct xlog_in_core **iclog,
uint flags)
{
xlog_t *log = mp->m_log;
xlog_ticket_t *ticket = (xfs_log_ticket_t) xtic;
struct log *log = mp->m_log;
xfs_lsn_t lsn = 0;
if (XLOG_FORCED_SHUTDOWN(log) ||
......@@ -258,8 +258,7 @@ xfs_log_done(xfs_mount_t *mp,
* If we get an error, just continue and give back the log ticket.
*/
(((ticket->t_flags & XLOG_TIC_INITED) == 0) &&
(xlog_commit_record(mp, ticket,
(xlog_in_core_t **)iclog, &lsn)))) {
(xlog_commit_record(mp, ticket, iclog, &lsn)))) {
lsn = (xfs_lsn_t) -1;
if (ticket->t_flags & XLOG_TIC_PERM_RESERV) {
flags |= XFS_LOG_REL_PERM_RESERV;
......@@ -289,7 +288,7 @@ xfs_log_done(xfs_mount_t *mp,
}
return lsn;
} /* xfs_log_done */
}
/*
* Attaches a new iclog I/O completion callback routine during
......@@ -298,11 +297,11 @@ xfs_log_done(xfs_mount_t *mp,
* executing the callback at an appropriate time.
*/
int
xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
void *iclog_hndl, /* iclog to hang callback off */
xfs_log_notify(
struct xfs_mount *mp,
struct xlog_in_core *iclog,
xfs_log_callback_t *cb)
{
xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
int abortflg;
spin_lock(&iclog->ic_callback_lock);
......@@ -316,16 +315,14 @@ xfs_log_notify(xfs_mount_t *mp, /* mount of partition */
}
spin_unlock(&iclog->ic_callback_lock);
return abortflg;
} /* xfs_log_notify */
}
int
xfs_log_release_iclog(xfs_mount_t *mp,
void *iclog_hndl)
xfs_log_release_iclog(
struct xfs_mount *mp,
struct xlog_in_core *iclog)
{
xlog_t *log = mp->m_log;
xlog_in_core_t *iclog = (xlog_in_core_t *)iclog_hndl;
if (xlog_state_release_iclog(log, iclog)) {
if (xlog_state_release_iclog(mp->m_log, iclog)) {
xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
return EIO;
}
......@@ -344,16 +341,17 @@ xfs_log_release_iclog(xfs_mount_t *mp,
* reservation, we prevent over allocation problems.
*/
int
xfs_log_reserve(xfs_mount_t *mp,
xfs_log_reserve(
struct xfs_mount *mp,
int unit_bytes,
int cnt,
xfs_log_ticket_t *ticket,
struct xlog_ticket **ticket,
__uint8_t client,
uint flags,
uint t_type)
{
xlog_t *log = mp->m_log;
xlog_ticket_t *internal_ticket;
struct log *log = mp->m_log;
struct xlog_ticket *internal_ticket;
int retval = 0;
ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
......@@ -367,7 +365,7 @@ xfs_log_reserve(xfs_mount_t *mp,
if (*ticket != NULL) {
ASSERT(flags & XFS_LOG_PERM_RESERV);
internal_ticket = (xlog_ticket_t *)*ticket;
internal_ticket = *ticket;
trace_xfs_log_reserve(log, internal_ticket);
......@@ -519,7 +517,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
xlog_in_core_t *first_iclog;
#endif
xfs_log_iovec_t reg[1];
xfs_log_ticket_t tic = NULL;
xlog_ticket_t *tic = NULL;
xfs_lsn_t lsn;
int error;
......@@ -656,24 +654,24 @@ xfs_log_unmount(xfs_mount_t *mp)
* transaction occur with one call to xfs_log_write().
*/
int
xfs_log_write(xfs_mount_t * mp,
xfs_log_iovec_t reg[],
xfs_log_write(
struct xfs_mount *mp,
struct xfs_log_iovec reg[],
int nentries,
xfs_log_ticket_t tic,
struct xlog_ticket *tic,
xfs_lsn_t *start_lsn)
{
struct log *log = mp->m_log;
int error;
xlog_t *log = mp->m_log;
if (XLOG_FORCED_SHUTDOWN(log))
return XFS_ERROR(EIO);
if ((error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0))) {
error = xlog_write(mp, reg, nentries, tic, start_lsn, NULL, 0);
if (error)
xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
}
return error;
} /* xfs_log_write */
}
void
xfs_log_move_tail(xfs_mount_t *mp,
......@@ -1642,16 +1640,16 @@ xlog_print_tic_res(xfs_mount_t *mp, xlog_ticket_t *ticket)
* bytes have been written out.
*/
STATIC int
xlog_write(xfs_mount_t * mp,
xfs_log_iovec_t reg[],
xlog_write(
struct xfs_mount *mp,
struct xfs_log_iovec reg[],
int nentries,
xfs_log_ticket_t tic,
struct xlog_ticket *ticket,
xfs_lsn_t *start_lsn,
xlog_in_core_t **commit_iclog,
struct xlog_in_core **commit_iclog,
uint flags)
{
xlog_t *log = mp->m_log;
xlog_ticket_t *ticket = (xlog_ticket_t *)tic;
xlog_in_core_t *iclog = NULL; /* ptr to current in-core log */
xlog_op_header_t *logop_head; /* ptr to log operation header */
__psint_t ptr; /* copy address into data region */
......@@ -1765,7 +1763,7 @@ xlog_write(xfs_mount_t * mp,
default:
xfs_fs_cmn_err(CE_WARN, mp,
"Bad XFS transaction clientid 0x%x in ticket 0x%p",
logop_head->oh_clientid, tic);
logop_head->oh_clientid, ticket);
return XFS_ERROR(EIO);
}
......
......@@ -110,8 +110,6 @@ typedef struct xfs_log_iovec {
uint i_type; /* type of region */
} xfs_log_iovec_t;
typedef void* xfs_log_ticket_t;
/*
* Structure used to pass callback function and the function's argument
* to the log manager.
......@@ -126,10 +124,12 @@ typedef struct xfs_log_callback {
#ifdef __KERNEL__
/* Log manager interfaces */
struct xfs_mount;
struct xlog_in_core;
struct xlog_ticket;
xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
xfs_log_ticket_t ticket,
void **iclog,
struct xlog_ticket *ticket,
struct xlog_in_core **iclog,
uint flags);
int _xfs_log_force(struct xfs_mount *mp,
uint flags,
......@@ -151,21 +151,21 @@ int xfs_log_mount_finish(struct xfs_mount *mp);
void xfs_log_move_tail(struct xfs_mount *mp,
xfs_lsn_t tail_lsn);
int xfs_log_notify(struct xfs_mount *mp,
void *iclog,
struct xlog_in_core *iclog,
xfs_log_callback_t *callback_entry);
int xfs_log_release_iclog(struct xfs_mount *mp,
void *iclog_hndl);
struct xlog_in_core *iclog);
int xfs_log_reserve(struct xfs_mount *mp,
int length,
int count,
xfs_log_ticket_t *ticket,
struct xlog_ticket **ticket,
__uint8_t clientid,
uint flags,
uint t_type);
int xfs_log_write(struct xfs_mount *mp,
xfs_log_iovec_t region[],
int nentries,
xfs_log_ticket_t ticket,
struct xlog_ticket *ticket,
xfs_lsn_t *start_lsn);
int xfs_log_unmount_write(struct xfs_mount *mp);
void xfs_log_unmount(struct xfs_mount *mp);
......
......@@ -1097,13 +1097,15 @@ xfs_default_resblks(xfs_mount_t *mp)
__uint64_t resblks;
/*
* We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
* This may drive us straight to ENOSPC on mount, but that implies
* we were already there on the last unmount. Warn if this occurs.
* We default to 5% or 8192 fsbs of space reserved, whichever is
* smaller. This is intended to cover concurrent allocation
* transactions when we initially hit enospc. These each require a 4
* block reservation. Hence by default we cover roughly 2000 concurrent
* allocation reservations.
*/
resblks = mp->m_sb.sb_dblocks;
do_div(resblks, 20);
resblks = min_t(__uint64_t, resblks, 1024);
resblks = min_t(__uint64_t, resblks, 8192);
return resblks;
}
......@@ -1417,6 +1419,9 @@ xfs_mountfs(
* when at ENOSPC. This is needed for operations like create with
* attr, unwritten extent conversion at ENOSPC, etc. Data allocations
* are not allowed to use this reserved space.
*
* This may drive us straight to ENOSPC on mount, but that implies
* we were already there on the last unmount. Warn if this occurs.
*/
if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
resblks = xfs_default_resblks(mp);
......@@ -1725,26 +1730,30 @@ xfs_mod_incore_sb_unlocked(
lcounter += rem;
}
} else { /* Taking blocks away */
lcounter += delta;
if (lcounter >= 0) {
mp->m_sb.sb_fdblocks = lcounter +
XFS_ALLOC_SET_ASIDE(mp);
return 0;
}
/*
* If were out of blocks, use any available reserved blocks if
* were allowed to.
* We are out of blocks, use any available reserved
* blocks if were allowed to.
*/
if (!rsvd)
return XFS_ERROR(ENOSPC);
if (lcounter < 0) {
if (rsvd) {
lcounter = (long long)mp->m_resblks_avail + delta;
if (lcounter < 0) {
return XFS_ERROR(ENOSPC);
}
if (lcounter >= 0) {
mp->m_resblks_avail = lcounter;
return 0;
} else { /* not reserved */
return XFS_ERROR(ENOSPC);
}
}
printk_once(KERN_WARNING
"Filesystem \"%s\": reserve blocks depleted! "
"Consider increasing reserve pool size.",
mp->m_fsname);
return XFS_ERROR(ENOSPC);
}
mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
......@@ -2052,6 +2061,26 @@ xfs_mount_log_sb(
return error;
}
/*
* If the underlying (data/log/rt) device is readonly, there are some
* operations that cannot proceed.
*/
int
xfs_dev_is_read_only(
struct xfs_mount *mp,
char *message)
{
if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
xfs_readonly_buftarg(mp->m_logdev_targp) ||
(mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
cmn_err(CE_NOTE,
"XFS: %s required on read-only device.", message);
cmn_err(CE_NOTE,
"XFS: write access unavailable, cannot proceed.");
return EROFS;
}
return 0;
}
#ifdef HAVE_PERCPU_SB
/*
......
......@@ -436,6 +436,8 @@ extern void xfs_freesb(xfs_mount_t *);
extern int xfs_fs_writable(xfs_mount_t *);
extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
extern int xfs_dmops_get(struct xfs_mount *);
extern void xfs_dmops_put(struct xfs_mount *);
......
......@@ -796,7 +796,7 @@ _xfs_trans_commit(
int sync;
#define XFS_TRANS_LOGVEC_COUNT 16
xfs_log_iovec_t log_vector_fast[XFS_TRANS_LOGVEC_COUNT];
void *commit_iclog;
struct xlog_in_core *commit_iclog;
int shutdown;
commit_lsn = -1;
......
......@@ -910,7 +910,7 @@ typedef struct xfs_trans {
unsigned int t_blk_res_used; /* # of resvd blocks used */
unsigned int t_rtx_res; /* # of rt extents resvd */
unsigned int t_rtx_res_used; /* # of resvd rt extents used */
xfs_log_ticket_t t_ticket; /* log mgr ticket */
struct xlog_ticket *t_ticket; /* log mgr ticket */
xfs_lsn_t t_lsn; /* log seq num of start of
* transaction. */
xfs_lsn_t t_commit_lsn; /* log seq num of end of
......
......@@ -46,6 +46,65 @@ STATIC xfs_buf_t *xfs_trans_buf_item_match(xfs_trans_t *, xfs_buftarg_t *,
STATIC xfs_buf_t *xfs_trans_buf_item_match_all(xfs_trans_t *, xfs_buftarg_t *,
xfs_daddr_t, int);
/*
* Add the locked buffer to the transaction.
*
* The buffer must be locked, and it cannot be associated with any
* transaction.
*
* If the buffer does not yet have a buf log item associated with it,
* then allocate one for it. Then add the buf item to the transaction.
*/
STATIC void
_xfs_trans_bjoin(
struct xfs_trans *tp,
struct xfs_buf *bp,
int reset_recur)
{
struct xfs_buf_log_item *bip;
ASSERT(XFS_BUF_ISBUSY(bp));
ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
/*
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
* it doesn't have one yet, then allocate one and initialize it.
* The checks to see if one is there are in xfs_buf_item_init().
*/
xfs_buf_item_init(bp, tp->t_mountp);
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
if (reset_recur)
bip->bli_recur = 0;
/*
* Take a reference for this transaction on the buf item.
*/
atomic_inc(&bip->bli_refcount);
/*
* Get a log_item_desc to point at the new item.
*/
(void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
/*
* Initialize b_fsprivate2 so we can find it with incore_match()
* in xfs_trans_get_buf() and friends above.
*/
XFS_BUF_SET_FSPRIVATE2(bp, tp);
}
void
xfs_trans_bjoin(
struct xfs_trans *tp,
struct xfs_buf *bp)
{
_xfs_trans_bjoin(tp, bp, 0);
trace_xfs_trans_bjoin(bp->b_fspriv);
}
/*
* Get and lock the buffer for the caller if it is not already
......@@ -132,40 +191,8 @@ xfs_trans_get_buf(xfs_trans_t *tp,
ASSERT(!XFS_BUF_GETERROR(bp));
/*
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
* it doesn't have one yet, then allocate one and initialize it.
* The checks to see if one is there are in xfs_buf_item_init().
*/
xfs_buf_item_init(bp, tp->t_mountp);
/*
* Set the recursion count for the buffer within this transaction
* to 0.
*/
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
bip->bli_recur = 0;
/*
* Take a reference for this transaction on the buf item.
*/
atomic_inc(&bip->bli_refcount);
/*
* Get a log_item_desc to point at the new item.
*/
(void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
/*
* Initialize b_fsprivate2 so we can find it with incore_match()
* above.
*/
XFS_BUF_SET_FSPRIVATE2(bp, tp);
trace_xfs_trans_get_buf(bip);
_xfs_trans_bjoin(tp, bp, 1);
trace_xfs_trans_get_buf(bp->b_fspriv);
return (bp);
}
......@@ -210,44 +237,11 @@ xfs_trans_getsb(xfs_trans_t *tp,
}
bp = xfs_getsb(mp, flags);
if (bp == NULL) {
if (bp == NULL)
return NULL;
}
/*
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
* it doesn't have one yet, then allocate one and initialize it.
* The checks to see if one is there are in xfs_buf_item_init().
*/
xfs_buf_item_init(bp, mp);
/*
* Set the recursion count for the buffer within this transaction
* to 0.
*/
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
bip->bli_recur = 0;
/*
* Take a reference for this transaction on the buf item.
*/
atomic_inc(&bip->bli_refcount);
/*
* Get a log_item_desc to point at the new item.
*/
(void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
/*
* Initialize b_fsprivate2 so we can find it with incore_match()
* above.
*/
XFS_BUF_SET_FSPRIVATE2(bp, tp);
trace_xfs_trans_getsb(bip);
_xfs_trans_bjoin(tp, bp, 1);
trace_xfs_trans_getsb(bp->b_fspriv);
return (bp);
}
......@@ -425,40 +419,9 @@ xfs_trans_read_buf(
if (XFS_FORCED_SHUTDOWN(mp))
goto shutdown_abort;
/*
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
* it doesn't have one yet, then allocate one and initialize it.
* The checks to see if one is there are in xfs_buf_item_init().
*/
xfs_buf_item_init(bp, tp->t_mountp);
_xfs_trans_bjoin(tp, bp, 1);
trace_xfs_trans_read_buf(bp->b_fspriv);
/*
* Set the recursion count for the buffer within this transaction
* to 0.
*/
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
bip->bli_recur = 0;
/*
* Take a reference for this transaction on the buf item.
*/
atomic_inc(&bip->bli_refcount);
/*
* Get a log_item_desc to point at the new item.
*/
(void) xfs_trans_add_item(tp, (xfs_log_item_t*)bip);
/*
* Initialize b_fsprivate2 so we can find it with incore_match()
* above.
*/
XFS_BUF_SET_FSPRIVATE2(bp, tp);
trace_xfs_trans_read_buf(bip);
*bpp = bp;
return 0;
......@@ -622,53 +585,6 @@ xfs_trans_brelse(xfs_trans_t *tp,
return;
}
/*
* Add the locked buffer to the transaction.
* The buffer must be locked, and it cannot be associated with any
* transaction.
*
* If the buffer does not yet have a buf log item associated with it,
* then allocate one for it. Then add the buf item to the transaction.
*/
void
xfs_trans_bjoin(xfs_trans_t *tp,
xfs_buf_t *bp)
{
xfs_buf_log_item_t *bip;
ASSERT(XFS_BUF_ISBUSY(bp));
ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
/*
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
* it doesn't have one yet, then allocate one and initialize it.
* The checks to see if one is there are in xfs_buf_item_init().
*/
xfs_buf_item_init(bp, tp->t_mountp);
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLI_CANCEL));
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
/*
* Take a reference for this transaction on the buf item.
*/
atomic_inc(&bip->bli_refcount);
/*
* Get a log_item_desc to point at the new item.
*/
(void) xfs_trans_add_item(tp, (xfs_log_item_t *)bip);
/*
* Initialize b_fsprivate2 so we can find it with incore_match()
* in xfs_trans_get_buf() and friends above.
*/
XFS_BUF_SET_FSPRIVATE2(bp, tp);
trace_xfs_trans_bjoin(bip);
}
/*
* Mark the buffer as not needing to be unlocked when the buf item's
* IOP_UNLOCK() routine is called. The buffer must already be locked
......
......@@ -583,113 +583,6 @@ xfs_readlink(
return error;
}
/*
* xfs_fsync
*
* This is called to sync the inode and its data out to disk. We need to hold
* the I/O lock while flushing the data, and the inode lock while flushing the
* inode. The inode lock CANNOT be held while flushing the data, so acquire
* after we're done with that.
*/
int
xfs_fsync(
xfs_inode_t *ip)
{
xfs_trans_t *tp;
int error = 0;
int log_flushed = 0;
xfs_itrace_entry(ip);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return XFS_ERROR(EIO);
/*
* We always need to make sure that the required inode state is safe on
* disk. The inode might be clean but we still might need to force the
* log because of committed transactions that haven't hit the disk yet.
* Likewise, there could be unflushed non-transactional changes to the
* inode core that have to go to disk and this requires us to issue
* a synchronous transaction to capture these changes correctly.
*
* This code relies on the assumption that if the update_* fields
* of the inode are clear and the inode is unpinned then it is clean
* and no action is required.
*/
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (!ip->i_update_core) {
/*
* Timestamps/size haven't changed since last inode flush or
* inode transaction commit. That means either nothing got
* written or a transaction committed which caught the updates.
* If the latter happened and the transaction hasn't hit the
* disk yet, the inode will be still be pinned. If it is,
* force the log.
*/
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (xfs_ipincount(ip)) {
if (ip->i_itemp->ili_last_lsn) {
error = _xfs_log_force_lsn(ip->i_mount,
ip->i_itemp->ili_last_lsn,
XFS_LOG_SYNC, &log_flushed);
} else {
error = _xfs_log_force(ip->i_mount,
XFS_LOG_SYNC, &log_flushed);
}
}
} else {
/*
* Kick off a transaction to log the inode core to get the
* updates. The sync transaction will also force the log.
*/
xfs_iunlock(ip, XFS_ILOCK_SHARED);
tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_FSYNC_TS);
error = xfs_trans_reserve(tp, 0,
XFS_FSYNC_TS_LOG_RES(ip->i_mount), 0, 0, 0);
if (error) {
xfs_trans_cancel(tp, 0);
return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
/*
* Note - it's possible that we might have pushed ourselves out
* of the way during trans_reserve which would flush the inode.
* But there's no guarantee that the inode buffer has actually
* gone out yet (it's delwri). Plus the buffer could be pinned
* anyway if it's part of an inode in another recent
* transaction. So we play it safe and fire off the
* transaction anyway.
*/
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_ihold(tp, ip);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_trans_set_sync(tp);
error = _xfs_trans_commit(tp, 0, &log_flushed);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
if (ip->i_mount->m_flags & XFS_MOUNT_BARRIER) {
/*
* If the log write didn't issue an ordered tag we need
* to flush the disk cache for the data device now.
*/
if (!log_flushed)
xfs_blkdev_issue_flush(ip->i_mount->m_ddev_targp);
/*
* If this inode is on the RT dev we need to flush that
* cache as well.
*/
if (XFS_IS_REALTIME_INODE(ip))
xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
}
return error;
}
/*
* Flags for xfs_free_eofblocks
*/
......
......@@ -21,7 +21,6 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */
int xfs_readlink(struct xfs_inode *ip, char *link);
int xfs_fsync(struct xfs_inode *ip);
int xfs_release(struct xfs_inode *ip);
int xfs_inactive(struct xfs_inode *ip);
int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
......@@ -50,18 +49,6 @@ int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
int flags, struct attrlist_cursor_kern *cursor);
ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb,
const struct iovec *iovp, unsigned int segs,
loff_t *offset, int ioflags);
ssize_t xfs_splice_read(struct xfs_inode *ip, struct file *infilp,
loff_t *ppos, struct pipe_inode_info *pipe, size_t count,
int flags, int ioflags);
ssize_t xfs_splice_write(struct xfs_inode *ip,
struct pipe_inode_info *pipe, struct file *outfilp,
loff_t *ppos, size_t count, int flags, int ioflags);
ssize_t xfs_write(struct xfs_inode *xip, struct kiocb *iocb,
const struct iovec *iovp, unsigned int nsegs,
loff_t *offset, int ioflags);
int xfs_bmap(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
int flags, struct xfs_iomap *iomapp, int *niomaps);
void xfs_tosspages(struct xfs_inode *inode, xfs_off_t first,
......@@ -72,4 +59,6 @@ int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first,
xfs_off_t last, uint64_t flags, int fiopt);
int xfs_wait_on_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last);
int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
#endif /* _XFS_VNODEOPS_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment