Commit 27ea95cc authored by Stephen Lord's avatar Stephen Lord Committed by Stephen Lord

[XFS] Implement deletion of inode clusters in XFS.

SGI Modid: 2.5.x-xfs:slinx:159536a
parent 611e7dfb
......@@ -162,6 +162,7 @@ xfs_buf_item_log_check(
#endif
STATIC void xfs_buf_error_relse(xfs_buf_t *bp);
STATIC void xfs_buf_do_callbacks(xfs_buf_t *bp, xfs_log_item_t *lip);
/*
* This returns the number of log iovecs needed to log the
......@@ -417,22 +418,25 @@ xfs_buf_item_unpin(
ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
ASSERT(XFS_BUF_ISSTALE(bp));
/**
ASSERT(bp->b_pincount == 0);
**/
ASSERT(bip->bli_format.blf_flags & XFS_BLI_CANCEL);
xfs_buf_item_trace("UNPIN STALE", bip);
xfs_buftrace("XFS_UNPIN STALE", bp);
AIL_LOCK(mp,s);
/*
* If we get called here because of an IO error, we may
* or may not have the item on the AIL. xfs_trans_delete_ail()
* will take care of that situation.
* xfs_trans_delete_ail() drops the AIL lock.
*/
xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip, s);
xfs_buf_item_relse(bp);
ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL);
if (bip->bli_flags & XFS_BLI_STALE_INODE) {
xfs_buf_do_callbacks(bp, (xfs_log_item_t *)bip);
XFS_BUF_FSPRIVATE(bp, void *) = NULL;
XFS_BUF_CLR_IODONE_FUNC(bp);
} else {
AIL_LOCK(mp,s);
xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip, s);
xfs_buf_item_relse(bp);
ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL);
}
xfs_buf_relse(bp);
}
}
......
......@@ -96,6 +96,7 @@ typedef struct xfs_buf_log_format_t {
#define XFS_BLI_STALE 0x04
#define XFS_BLI_LOGGED 0x08
#define XFS_BLI_INODE_ALLOC_BUF 0x10
#define XFS_BLI_STALE_INODE 0x20
#ifdef __KERNEL__
......@@ -130,7 +131,7 @@ typedef struct xfs_buf_log_item {
* items which have been canceled and should not be replayed.
*/
typedef struct xfs_buf_cancel {
xfs_daddr_t bc_blkno;
xfs_daddr_t bc_blkno;
uint bc_len;
int bc_refcount;
struct xfs_buf_cancel *bc_next;
......
......@@ -99,5 +99,6 @@ struct xfs_mount_args {
#define XFSMNT_NOUUID 0x01000000 /* Ignore fs uuid */
#define XFSMNT_DMAPI 0x02000000 /* enable dmapi/xdsm */
#define XFSMNT_NOLOGFLUSH 0x04000000 /* Don't flush for log blocks */
#define XFSMNT_IDELETE 0x08000000 /* inode cluster delete */
#endif /* __XFS_CLNT_H__ */
......@@ -57,6 +57,7 @@
#include "xfs_bit.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_bmap.h"
/*
* Log specified fields for the inode given by bp and off.
......@@ -921,7 +922,10 @@ xfs_dialloc(
int
xfs_difree(
xfs_trans_t *tp, /* transaction pointer */
xfs_ino_t inode) /* inode to be freed */
xfs_ino_t inode, /* inode to be freed */
xfs_bmap_free_t *flist, /* extents to free */
int *delete, /* set if inode cluster was deleted */
xfs_ino_t *first_ino) /* first inode in deleted cluster */
{
/* REFERENCED */
xfs_agblock_t agbno; /* block number containing inode */
......@@ -932,6 +936,7 @@ xfs_difree(
xfs_btree_cur_t *cur; /* inode btree cursor */
int error; /* error return value */
int i; /* result code */
int ilen; /* inodes in an inode cluster */
xfs_mount_t *mp; /* mount structure for filesystem */
int off; /* offset of inode in inode chunk */
xfs_inobt_rec_t rec; /* btree record */
......@@ -995,10 +1000,11 @@ xfs_difree(
if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
&rec.ir_freecount, &rec.ir_free, &i, ARCH_NOCONVERT)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
freecount += rec.ir_freecount;
if ((error = xfs_inobt_increment(cur, 0, &i)))
goto error0;
if (i) {
freecount += rec.ir_freecount;
if ((error = xfs_inobt_increment(cur, 0, &i)))
goto error0;
}
} while (i == 1);
ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) ||
XFS_FORCED_SHUTDOWN(mp));
......@@ -1033,20 +1039,60 @@ xfs_difree(
*/
XFS_INOBT_SET_FREE(&rec, off, ARCH_NOCONVERT);
rec.ir_freecount++;
if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) {
cmn_err(CE_WARN,
"xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.",
error, mp->m_fsname);
goto error0;
}
/*
* Change the inode free counts and log the ag/sb changes.
* When an inode cluster is free, it becomes elgible for removal
*/
INT_MOD(agi->agi_freecount, ARCH_CONVERT, 1);
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
down_read(&mp->m_peraglock);
mp->m_perag[agno].pagi_freecount++;
up_read(&mp->m_peraglock);
if ((mp->m_flags & XFS_MOUNT_IDELETE) &&
(rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
*delete = 1;
*first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
/*
* Remove the inode cluster from the AGI B+Tree, adjust the
* AGI and Superblock inode counts, and mark the disk space
* to be freed when the transaction is committed.
*/
ilen = XFS_IALLOC_INODES(mp);
INT_MOD(agi->agi_count, ARCH_CONVERT, -ilen);
INT_MOD(agi->agi_freecount, ARCH_CONVERT, -(ilen - 1));
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
down_read(&mp->m_peraglock);
mp->m_perag[agno].pagi_freecount -= ilen - 1;
up_read(&mp->m_peraglock);
xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
if ((error = xfs_inobt_delete(cur, &i))) {
cmn_err(CE_WARN, "xfs_difree: xfs_inobt_delete returned an error %d on %s.\n",
error, mp->m_fsname);
goto error0;
}
xfs_bmap_add_free(XFS_AGB_TO_FSB(mp,
agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)),
XFS_IALLOC_BLOCKS(mp), flist, mp);
} else {
*delete = 0;
if ((error = xfs_inobt_update(cur, rec.ir_startino, rec.ir_freecount, rec.ir_free))) {
cmn_err(CE_WARN,
"xfs_difree: xfs_inobt_update() returned an error %d on %s. Returning error.",
error, mp->m_fsname);
goto error0;
}
/*
* Change the inode free counts and log the ag/sb changes.
*/
INT_MOD(agi->agi_freecount, ARCH_CONVERT, 1);
xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
down_read(&mp->m_peraglock);
mp->m_perag[agno].pagi_freecount++;
up_read(&mp->m_peraglock);
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
}
#ifdef DEBUG
if (cur->bc_nlevels == 1) {
int freecount = 0;
......@@ -1054,20 +1100,23 @@ xfs_difree(
if ((error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &i)))
goto error0;
do {
if ((error = xfs_inobt_get_rec(cur, &rec.ir_startino,
&rec.ir_freecount, &rec.ir_free, &i, ARCH_NOCONVERT)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
freecount += rec.ir_freecount;
if ((error = xfs_inobt_increment(cur, 0, &i)))
if ((error = xfs_inobt_get_rec(cur,
&rec.ir_startino,
&rec.ir_freecount,
&rec.ir_free, &i,
ARCH_NOCONVERT)))
goto error0;
if (i) {
freecount += rec.ir_freecount;
if ((error = xfs_inobt_increment(cur, 0, &i)))
goto error0;
}
} while (i == 1);
ASSERT(freecount == INT_GET(agi->agi_freecount, ARCH_CONVERT) ||
XFS_FORCED_SHUTDOWN(mp));
}
#endif
xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
return 0;
error0:
......
......@@ -134,7 +134,10 @@ xfs_dialloc(
int /* error */
xfs_difree(
struct xfs_trans *tp, /* transaction pointer */
xfs_ino_t inode); /* inode to be freed */
xfs_ino_t inode, /* inode to be freed */
struct xfs_bmap_free *flist, /* extents to free */
int *delete, /* set if inode cluster was deleted */
xfs_ino_t *first_ino); /* first inode in deleted cluster */
/*
* Return the location of the inode in bno/len/off,
......
......@@ -49,6 +49,7 @@
#include "xfs_btree.h"
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
/*
* Inode allocation management for XFS.
......@@ -73,7 +74,6 @@ STATIC int xfs_inobt_updkey(xfs_btree_cur_t *, xfs_inobt_key_t *, int);
* Internal functions.
*/
#ifdef _NOTYET_
/*
* Single level of the xfs_inobt_delete record deletion routine.
* Delete record pointed to by cur/level.
......@@ -87,8 +87,7 @@ xfs_inobt_delrec(
int *stat) /* fail/done/go-on */
{
xfs_buf_t *agbp; /* buffer for a.g. inode header */
xfs_agnumber_t agfbno; /* agf block of freed btree block */
xfs_buf_t *agfbp; /* bp of agf block of freed block */
xfs_mount_t *mp; /* mount structure */
xfs_agi_t *agi; /* allocation group inode header */
xfs_inobt_block_t *block; /* btree block record/key lives in */
xfs_agblock_t bno; /* btree block number */
......@@ -96,15 +95,15 @@ xfs_inobt_delrec(
int error; /* error return value */
int i; /* loop index */
xfs_inobt_key_t key; /* kp points here if block is level 0 */
xfs_inobt_key_t *kp; /* pointer to btree keys */
xfs_inobt_key_t *kp = NULL; /* pointer to btree keys */
xfs_agblock_t lbno; /* left block's block number */
xfs_buf_t *lbp; /* left block's buffer pointer */
xfs_inobt_block_t *left; /* left btree block */
xfs_inobt_key_t *lkp; /* left block key pointer */
xfs_inobt_ptr_t *lpp; /* left block address pointer */
int lrecs; /* number of records in left block */
int lrecs = 0; /* number of records in left block */
xfs_inobt_rec_t *lrp; /* left block record pointer */
xfs_inobt_ptr_t *pp; /* pointer to btree addresses */
xfs_inobt_ptr_t *pp = NULL; /* pointer to btree addresses */
int ptr; /* index in btree block for this rec */
xfs_agblock_t rbno; /* right block's block number */
xfs_buf_t *rbp; /* right block's buffer pointer */
......@@ -112,10 +111,12 @@ xfs_inobt_delrec(
xfs_inobt_key_t *rkp; /* right block key pointer */
xfs_inobt_rec_t *rp; /* pointer to btree records */
xfs_inobt_ptr_t *rpp; /* right block address pointer */
int rrecs; /* number of records in right block */
int rrecs = 0; /* number of records in right block */
int numrecs;
xfs_inobt_rec_t *rrp; /* right block record pointer */
xfs_btree_cur_t *tcur; /* temporary btree cursor */
mp = cur->bc_mp;
/*
* Get the index of the entry being deleted, check for nothing there.
......@@ -125,19 +126,22 @@ xfs_inobt_delrec(
*stat = 0;
return 0;
}
/*
* Get the buffer & block containing the record or key/ptr.
*/
bp = cur->bc_bufs[level];
block = XFS_BUF_TO_INOBT_BLOCK(bp);
#ifdef DEBUG
if (error = xfs_btree_check_sblock(cur, block, level, bp))
if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
return error;
#endif
/*
* Fail if we're off the end of the block.
*/
if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
if (ptr > numrecs) {
*stat = 0;
return 0;
}
......@@ -150,18 +154,18 @@ xfs_inobt_delrec(
kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
#ifdef DEBUG
for (i = ptr; i < INT_GET(block->bb_numrecs, ARCH_CONVERT); i++) {
if (error = xfs_btree_check_sptr(cur, INT_GET(pp[i], ARCH_CONVERT), level))
for (i = ptr; i < numrecs; i++) {
if ((error = xfs_btree_check_sptr(cur, INT_GET(pp[i], ARCH_CONVERT), level)))
return error;
}
#endif
if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
if (ptr < numrecs) {
memmove(&kp[ptr - 1], &kp[ptr],
(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*kp));
(numrecs - ptr) * sizeof(*kp));
memmove(&pp[ptr - 1], &pp[ptr],
(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*pp));
xfs_inobt_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
xfs_inobt_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
(numrecs - ptr) * sizeof(*kp));
xfs_inobt_log_keys(cur, bp, ptr, numrecs - 1);
xfs_inobt_log_ptrs(cur, bp, ptr, numrecs - 1);
}
}
/*
......@@ -170,24 +174,25 @@ xfs_inobt_delrec(
*/
else {
rp = XFS_INOBT_REC_ADDR(block, 1, cur);
if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
if (ptr < numrecs) {
memmove(&rp[ptr - 1], &rp[ptr],
(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr) * sizeof(*rp));
xfs_inobt_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT) - 1);
(numrecs - ptr) * sizeof(*rp));
xfs_inobt_log_recs(cur, bp, ptr, numrecs - 1);
}
/*
* If it's the first record in the block, we'll need a key
* structure to pass up to the next level (updkey).
*/
if (ptr == 1) {
INT_COPY(key.ir_startino, rp->ir_startino, ARCH_CONVERT);
key.ir_startino = rp->ir_startino;
kp = &key;
}
}
/*
* Decrement and log the number of entries in the block.
*/
INT_MOD(block->bb_numrecs, ARCH_CONVERT, -1);
numrecs--;
INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
/*
* Is this the root level? If so, we're almost done.
......@@ -199,7 +204,7 @@ xfs_inobt_delrec(
* and it's NOT the leaf level,
* then we can get rid of this level.
*/
if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == 1 && level > 0) {
if (numrecs == 1 && level > 0) {
agbp = cur->bc_private.i.agbp;
agi = XFS_BUF_TO_AGI(agbp);
/*
......@@ -207,12 +212,13 @@ xfs_inobt_delrec(
* Make it the new root of the btree.
*/
bno = INT_GET(agi->agi_root, ARCH_CONVERT);
INT_COPY(agi->agi_root, *pp, ARCH_CONVERT);
agi->agi_root = *pp;
INT_MOD(agi->agi_level, ARCH_CONVERT, -1);
/*
* Free the block.
*/
if (error = xfs_free_extent(cur->bc_tp, bno, 1))
if ((error = xfs_free_extent(cur->bc_tp,
XFS_AGB_TO_FSB(mp, cur->bc_private.i.agno, bno), 1)))
return error;
xfs_trans_binval(cur->bc_tp, bp);
xfs_ialloc_log_agi(cur->bc_tp, agbp,
......@@ -222,21 +228,6 @@ xfs_inobt_delrec(
*/
cur->bc_bufs[level] = NULL;
cur->bc_nlevels--;
/*
* To ensure that the freed block is not used for
* user data until this transaction is permanent,
* we lock the agf buffer for this ag until the
* transaction record makes it to the on-disk log.
*/
agfbno = XFS_AG_DADDR(cur->bc_mp,
cur->bc_private.i.agno,
XFS_AGF_DADDR(mp));
if (error = xfs_trans_read_buf(cur->bc_mp, cur->bc_tp,
cur->bc_mp->m_ddev_targp, agfbno,
XFS_FSS_TO_BB(mp, 1), 0, &agfbp))
return error;
ASSERT(!XFS_BUF_GETERROR(agfbp));
xfs_trans_bhold_until_committed(cur->bc_tp, agfbp);
} else if (level > 0 &&
(error = xfs_inobt_decrement(cur, level, &i)))
return error;
......@@ -253,7 +244,7 @@ xfs_inobt_delrec(
* If the number of records remaining in the block is at least
* the minimum, we're done.
*/
if (INT_GET(block->bb_numrecs, ARCH_CONVERT) >= XFS_INOBT_BLOCK_MINRECS(level, cur)) {
if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) {
if (level > 0 &&
(error = xfs_inobt_decrement(cur, level, &i)))
return error;
......@@ -273,7 +264,7 @@ xfs_inobt_delrec(
* Duplicate the cursor so our btree manipulations here won't
* disrupt the next level up.
*/
if (error = xfs_btree_dup_cursor(cur, &tcur))
if ((error = xfs_btree_dup_cursor(cur, &tcur)))
return error;
/*
* If there's a right sibling, see if it's ok to shift an entry
......@@ -286,7 +277,7 @@ xfs_inobt_delrec(
*/
i = xfs_btree_lastrec(tcur, level);
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
if (error = xfs_inobt_increment(tcur, level, &i))
if ((error = xfs_inobt_increment(tcur, level, &i)))
goto error0;
XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
i = xfs_btree_lastrec(tcur, level);
......@@ -297,7 +288,7 @@ xfs_inobt_delrec(
rbp = tcur->bc_bufs[level];
right = XFS_BUF_TO_INOBT_BLOCK(rbp);
#ifdef DEBUG
if (error = xfs_btree_check_sblock(cur, right, level, rbp))
if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
goto error0;
#endif
/*
......@@ -311,7 +302,7 @@ xfs_inobt_delrec(
*/
if (INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1 >=
XFS_INOBT_BLOCK_MINRECS(level, cur)) {
if (error = xfs_inobt_lshift(tcur, level, &i))
if ((error = xfs_inobt_lshift(tcur, level, &i)))
goto error0;
if (i) {
ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
......@@ -334,7 +325,7 @@ xfs_inobt_delrec(
rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
if (lbno != NULLAGBLOCK) {
xfs_btree_firstrec(tcur, level);
if (error = xfs_inobt_decrement(tcur, level, &i))
if ((error = xfs_inobt_decrement(tcur, level, &i)))
goto error0;
}
}
......@@ -348,7 +339,7 @@ xfs_inobt_delrec(
* previous block.
*/
xfs_btree_firstrec(tcur, level);
if (error = xfs_inobt_decrement(tcur, level, &i))
if ((error = xfs_inobt_decrement(tcur, level, &i)))
goto error0;
xfs_btree_firstrec(tcur, level);
/*
......@@ -357,7 +348,7 @@ xfs_inobt_delrec(
lbp = tcur->bc_bufs[level];
left = XFS_BUF_TO_INOBT_BLOCK(lbp);
#ifdef DEBUG
if (error = xfs_btree_check_sblock(cur, left, level, lbp))
if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
goto error0;
#endif
/*
......@@ -371,7 +362,7 @@ xfs_inobt_delrec(
*/
if (INT_GET(left->bb_numrecs, ARCH_CONVERT) - 1 >=
XFS_INOBT_BLOCK_MINRECS(level, cur)) {
if (error = xfs_inobt_rshift(tcur, level, &i))
if ((error = xfs_inobt_rshift(tcur, level, &i)))
goto error0;
if (i) {
ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
......@@ -402,41 +393,44 @@ xfs_inobt_delrec(
* See if we can join with the left neighbor block.
*/
if (lbno != NULLAGBLOCK &&
lrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
lrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
/*
* Set "right" to be the starting block,
* "left" to be the left neighbor.
*/
rbno = bno;
right = block;
rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
rbp = bp;
if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
cur->bc_private.i.agno, lbno, 0, &lbp,
XFS_INO_BTREE_REF))
XFS_INO_BTREE_REF)))
return error;
left = XFS_BUF_TO_INOBT_BLOCK(lbp);
if (error = xfs_btree_check_sblock(cur, left, level, lbp))
lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
return error;
}
/*
* If that won't work, see if we can join with the right neighbor block.
*/
else if (rbno != NULLAGBLOCK &&
rrecs + INT_GET(block->bb_numrecs, ARCH_CONVERT) <=
XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
rrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
/*
* Set "left" to be the starting block,
* "right" to be the right neighbor.
*/
lbno = bno;
left = block;
lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
lbp = bp;
if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
cur->bc_private.i.agno, rbno, 0, &rbp,
XFS_INO_BTREE_REF))
XFS_INO_BTREE_REF)))
return error;
right = XFS_BUF_TO_INOBT_BLOCK(rbp);
if (error = xfs_btree_check_sblock(cur, right, level, rbp))
rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
return error;
}
/*
......@@ -457,40 +451,53 @@ xfs_inobt_delrec(
/*
* It's a non-leaf. Move keys and pointers.
*/
lkp = XFS_INOBT_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
lpp = XFS_INOBT_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
lkp = XFS_INOBT_KEY_ADDR(left, lrecs + 1, cur);
lpp = XFS_INOBT_PTR_ADDR(left, lrecs + 1, cur);
rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
#ifdef DEBUG
for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
if (error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level))
for (i = 0; i < rrecs; i++) {
if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)))
return error;
}
#endif
memcpy(lkp, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lkp));
memcpy(lpp, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lpp));
xfs_inobt_log_keys(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
xfs_inobt_log_ptrs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
memcpy(lkp, rkp, rrecs * sizeof(*lkp));
memcpy(lpp, rpp, rrecs * sizeof(*lpp));
xfs_inobt_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
xfs_inobt_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
} else {
/*
* It's a leaf. Move records.
*/
lrp = XFS_INOBT_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1, cur);
lrp = XFS_INOBT_REC_ADDR(left, lrecs + 1, cur);
rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
memcpy(lrp, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*lrp));
xfs_inobt_log_recs(cur, lbp, INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1,
INT_GET(left->bb_numrecs, ARCH_CONVERT) + INT_GET(right->bb_numrecs, ARCH_CONVERT));
memcpy(lrp, rrp, rrecs * sizeof(*lrp));
xfs_inobt_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
}
/*
* If we joined with the left neighbor, set the buffer in the
* cursor to the left block, and fix up the index.
*/
if (bp != lbp) {
xfs_btree_setbuf(cur, level, lbp);
cur->bc_ptrs[level] += lrecs;
}
/*
* If we joined with the right neighbor and there's a level above
* us, increment the cursor at that level.
*/
else if (level + 1 < cur->bc_nlevels &&
(error = xfs_alloc_increment(cur, level + 1, &i)))
return error;
/*
* Fix up the number of records in the surviving block.
*/
INT_MOD(left->bb_numrecs, ARCH_CONVERT, INT_GET(right->bb_numrecs, ARCH_CONVERT));
lrecs += rrecs;
INT_SET(left->bb_numrecs, ARCH_CONVERT, lrecs);
/*
* Fix up the right block pointer in the surviving block, and log it.
*/
INT_COPY(left->bb_rightsib, right->bb_rightsib, ARCH_CONVERT);
left->bb_rightsib = right->bb_rightsib;
xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
/*
* If there is a right sibling now, make it point to the
......@@ -500,12 +507,12 @@ xfs_inobt_delrec(
xfs_inobt_block_t *rrblock;
xfs_buf_t *rrbp;
if (error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
cur->bc_private.i.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0,
&rrbp, XFS_INO_BTREE_REF))
&rrbp, XFS_INO_BTREE_REF)))
return error;
rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
if (error = xfs_btree_check_sblock(cur, rrblock, level, rrbp))
if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
return error;
INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, lbno);
xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
......@@ -513,40 +520,10 @@ xfs_inobt_delrec(
/*
* Free the deleting block.
*/
if (error = xfs_free_extent(cur->bc_tp, rbno, 1))
if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp,
cur->bc_private.i.agno, rbno), 1)))
return error;
xfs_trans_binval(cur->bc_tp, rbp);
/*
* To ensure that the freed block is not used for
* user data until this transaction is permanent,
* we lock the agf buffer for this ag until the
* transaction record makes it to the on-disk log.
*/
agfbno = XFS_AG_DADDR(cur->bc_mp, cur->bc_private.i.agno,
XFS_AGF_DADDR(mp));
if (error = xfs_trans_read_buf(cur->bc_mp, cur->bc_tp,
cur->bc_mp->m_ddev_targp, agfbno,
XFS_FSS_TO_BB(mp, 1), 0, &agfbp))
return error;
ASSERT(!XFS_BUF_GETERROR(agfbp));
xfs_trans_bhold_until_committed(cur->bc_tp, agfbp);
/*
* If we joined with the left neighbor, set the buffer in the
* cursor to the left block, and fix up the index.
*/
if (bp != lbp) {
cur->bc_bufs[level] = lbp;
cur->bc_ptrs[level] += INT_GET(left->bb_numrecs, ARCH_CONVERT);
cur->bc_ra[level] = 0;
}
/*
* If we joined with the right neighbor and there's a level above
* us, increment the cursor at that level.
*/
else if (level + 1 < cur->bc_nlevels &&
(error = xfs_inobt_increment(cur, level + 1, &i))) {
return error;
}
/*
* Readjust the ptr at this level if it's not a leaf, since it's
* still pointing at the deletion point, which makes the cursor
......@@ -565,7 +542,6 @@ xfs_inobt_delrec(
xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
return error;
}
#endif /* _NOTYET_ */
/*
* Insert one record/level. Return information to the caller
......@@ -590,6 +566,7 @@ xfs_inobt_insrec(
xfs_btree_cur_t *ncur; /* new cursor to be used at next lvl */
xfs_inobt_key_t nkey; /* new key value, from split */
xfs_inobt_rec_t nrec; /* new record value, for caller */
int numrecs;
int optr; /* old ptr value */
xfs_inobt_ptr_t *pp; /* pointer to btree addresses */
int ptr; /* index in btree block for this rec */
......@@ -622,13 +599,14 @@ xfs_inobt_insrec(
*/
bp = cur->bc_bufs[level];
block = XFS_BUF_TO_INOBT_BLOCK(bp);
numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
#ifdef DEBUG
if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
return error;
/*
* Check that the new entry is being inserted in the right place.
*/
if (ptr <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
if (ptr <= numrecs) {
if (level == 0) {
rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
xfs_btree_check_rec(cur->bc_btnum, recp, rp);
......@@ -644,7 +622,7 @@ xfs_inobt_insrec(
* If the block is full, we can't insert the new entry until we
* make the block un-full.
*/
if (INT_GET(block->bb_numrecs, ARCH_CONVERT) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
if (numrecs == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
/*
* First, try shifting an entry to the right neighbor.
*/
......@@ -695,6 +673,7 @@ xfs_inobt_insrec(
* At this point we know there's room for our new entry in the block
* we're pointing at.
*/
numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
if (level > 0) {
/*
* It's a non-leaf entry. Make a hole for the new data
......@@ -703,15 +682,15 @@ xfs_inobt_insrec(
kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
#ifdef DEBUG
for (i = INT_GET(block->bb_numrecs, ARCH_CONVERT); i >= ptr; i--) {
for (i = numrecs; i >= ptr; i--) {
if ((error = xfs_btree_check_sptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), level)))
return error;
}
#endif
memmove(&kp[ptr], &kp[ptr - 1],
(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*kp));
(numrecs - ptr + 1) * sizeof(*kp));
memmove(&pp[ptr], &pp[ptr - 1],
(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*pp));
(numrecs - ptr + 1) * sizeof(*pp));
/*
* Now stuff the new data in, bump numrecs and log the new data.
*/
......@@ -721,23 +700,25 @@ xfs_inobt_insrec(
#endif
kp[ptr - 1] = key; /* INT_: struct copy */
INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop);
INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
xfs_inobt_log_keys(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
xfs_inobt_log_ptrs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
numrecs++;
INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
xfs_inobt_log_keys(cur, bp, ptr, numrecs);
xfs_inobt_log_ptrs(cur, bp, ptr, numrecs);
} else {
/*
* It's a leaf entry. Make a hole for the new record.
*/
rp = XFS_INOBT_REC_ADDR(block, 1, cur);
memmove(&rp[ptr], &rp[ptr - 1],
(INT_GET(block->bb_numrecs, ARCH_CONVERT) - ptr + 1) * sizeof(*rp));
(numrecs - ptr + 1) * sizeof(*rp));
/*
* Now stuff the new record in, bump numrecs
* and log the new data.
*/
rp[ptr - 1] = *recp; /* INT_: struct copy */
INT_MOD(block->bb_numrecs, ARCH_CONVERT, +1);
xfs_inobt_log_recs(cur, bp, ptr, INT_GET(block->bb_numrecs, ARCH_CONVERT));
numrecs++;
INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
xfs_inobt_log_recs(cur, bp, ptr, numrecs);
}
/*
* Log the new number of records in the btree header.
......@@ -747,7 +728,7 @@ xfs_inobt_insrec(
/*
* Check that the key/record is in the right place, now.
*/
if (ptr < INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
if (ptr < numrecs) {
if (level == 0)
xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
rp + ptr);
......@@ -1774,7 +1755,6 @@ xfs_inobt_decrement(
return 0;
}
#ifdef _NOTYET_
/*
* Delete the record pointed to by cur.
* The cursor refers to the place where the record was (could be inserted)
......@@ -1795,13 +1775,13 @@ xfs_inobt_delete(
* Otherwise we are done.
*/
for (level = 0, i = 2; i == 2; level++) {
if (error = xfs_inobt_delrec(cur, level, &i))
if ((error = xfs_inobt_delrec(cur, level, &i)))
return error;
}
if (i == 0) {
for (level = 1; level < cur->bc_nlevels; level++) {
if (cur->bc_ptrs[level] == 0) {
if (error = xfs_inobt_decrement(cur, level, &i))
if ((error = xfs_inobt_decrement(cur, level, &i)))
return error;
break;
}
......@@ -1810,7 +1790,7 @@ xfs_inobt_delete(
*stat = i;
return 0;
}
#endif /* _NOTYET_ */
/*
* Get the data from the pointed-to record.
......
......@@ -225,7 +225,6 @@ xfs_inobt_decrement(
int level, /* level in btree, 0 is leaf */
int *stat); /* success/failure */
#ifdef _NOTYET_
/*
* Delete the record pointed to by cur.
* The cursor refers to the place where the record was (could be inserted)
......@@ -235,7 +234,6 @@ int /* error */
xfs_inobt_delete(
struct xfs_btree_cur *cur, /* btree cursor */
int *stat); /* success/failure */
#endif /* _NOTYET_ */
/*
* Get the data from the pointed-to record.
......
......@@ -258,6 +258,7 @@ xfs_iget_core(
if (newnode) {
xfs_iocore_inode_reinit(ip);
}
ip->i_flags &= ~XFS_ISTALE;
vn_trace_exit(vp, "xfs_iget.found",
(inst_t *)__return_address);
......
......@@ -36,6 +36,7 @@
#include "xfs_inum.h"
#include "xfs_log.h"
#include "xfs_trans.h"
#include "xfs_trans_priv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir.h"
......@@ -2103,6 +2104,180 @@ xfs_iunlink_remove(
return 0;
}
static __inline__ int xfs_inode_clean(xfs_inode_t *ip)
{
return (((ip->i_itemp == NULL) ||
!(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
(ip->i_update_core == 0));
}
void
xfs_ifree_cluster(
xfs_inode_t *free_ip,
xfs_trans_t *tp,
xfs_ino_t inum)
{
xfs_mount_t *mp = free_ip->i_mount;
int blks_per_cluster;
int nbufs;
int ninodes;
int i, j, found, pre_flushed;
xfs_daddr_t blkno;
xfs_buf_t *bp;
xfs_ihash_t *ih;
xfs_inode_t *ip, **ip_found;
xfs_inode_log_item_t *iip;
xfs_log_item_t *lip;
SPLDECL(s);
if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) {
blks_per_cluster = 1;
ninodes = mp->m_sb.sb_inopblock;
nbufs = XFS_IALLOC_BLOCKS(mp);
} else {
blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) /
mp->m_sb.sb_blocksize;
ninodes = blks_per_cluster * mp->m_sb.sb_inopblock;
nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster;
}
ip_found = kmem_alloc(ninodes * sizeof(xfs_inode_t *), KM_NOFS);
for (j = 0; j < nbufs; j++, inum += ninodes) {
blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
XFS_INO_TO_AGBNO(mp, inum));
/*
* Look for each inode in memory and attempt to lock it,
* we can be racing with flush and tail pushing here.
* any inode we get the locks on, add to an array of
* inode items to process later.
*
* The get the buffer lock, we could beat a flush
* or tail pushing thread to the lock here, in which
* case they will go looking for the inode buffer
* and fail, we need some other form of interlock
* here.
*/
found = 0;
for (i = 0; i < ninodes; i++) {
ih = XFS_IHASH(mp, inum + i);
read_lock(&ih->ih_lock);
for (ip = ih->ih_next; ip != NULL; ip = ip->i_next) {
if (ip->i_ino == inum + i)
break;
}
/* Inode not in memory or we found it already,
* nothing to do
*/
if (!ip || (ip->i_flags & XFS_ISTALE)) {
read_unlock(&ih->ih_lock);
continue;
}
if (xfs_inode_clean(ip)) {
read_unlock(&ih->ih_lock);
continue;
}
/* If we can get the locks then add it to the
* list, otherwise by the time we get the bp lock
* below it will already be attached to the
* inode buffer.
*/
/* This inode will already be locked - by us, lets
* keep it that way.
*/
if (ip == free_ip) {
if (xfs_iflock_nowait(ip)) {
ip->i_flags |= XFS_ISTALE;
if (xfs_inode_clean(ip)) {
xfs_ifunlock(ip);
} else {
ip_found[found++] = ip;
}
}
read_unlock(&ih->ih_lock);
continue;
}
if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
if (xfs_iflock_nowait(ip)) {
ip->i_flags |= XFS_ISTALE;
if (xfs_inode_clean(ip)) {
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
} else {
ip_found[found++] = ip;
}
} else {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
}
read_unlock(&ih->ih_lock);
}
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
mp->m_bsize * blks_per_cluster,
XFS_BUF_LOCK);
pre_flushed = 0;
lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
while (lip) {
if (lip->li_type == XFS_LI_INODE) {
iip = (xfs_inode_log_item_t *)lip;
ASSERT(iip->ili_logged == 1);
lip->li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*)) xfs_istale_done;
AIL_LOCK(mp,s);
iip->ili_flush_lsn = iip->ili_item.li_lsn;
AIL_UNLOCK(mp, s);
iip->ili_inode->i_flags |= XFS_ISTALE;
pre_flushed++;
}
lip = lip->li_bio_list;
}
for (i = 0; i < found; i++) {
ip = ip_found[i];
iip = ip->i_itemp;
if (!iip) {
ip->i_update_core = 0;
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
continue;
}
iip->ili_last_fields = iip->ili_format.ilf_fields;
iip->ili_format.ilf_fields = 0;
iip->ili_logged = 1;
AIL_LOCK(mp,s);
iip->ili_flush_lsn = iip->ili_item.li_lsn;
AIL_UNLOCK(mp, s);
xfs_buf_attach_iodone(bp,
(void(*)(xfs_buf_t*,xfs_log_item_t*))
xfs_istale_done, (xfs_log_item_t *)iip);
if (ip != free_ip) {
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
}
if (found || pre_flushed)
xfs_trans_stale_inode_buf(tp, bp);
xfs_trans_binval(tp, bp);
}
kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *));
}
/*
* This is called to return an inode to the inode free list.
* The inode should already be truncated to 0 length and have
......@@ -2116,9 +2291,12 @@ xfs_iunlink_remove(
int
xfs_ifree(
xfs_trans_t *tp,
xfs_inode_t *ip)
xfs_inode_t *ip,
xfs_bmap_free_t *flist)
{
int error;
int error;
int delete;
xfs_ino_t first_ino;
ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
ASSERT(ip->i_transp == tp);
......@@ -2137,7 +2315,7 @@ xfs_ifree(
return error;
}
error = xfs_difree(tp, ip->i_ino);
error = xfs_difree(tp, ip->i_ino, flist, &delete, &first_ino);
if (error != 0) {
return error;
}
......@@ -2149,13 +2327,17 @@ xfs_ifree(
XFS_IFORK_DSIZE(ip) / (uint)sizeof(xfs_bmbt_rec_t);
ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS;
ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS;
/*
* Bump the generation count so no one will be confused
* by reincarnations of this inode.
*/
ip->i_d.di_gen++;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
if (delete) {
xfs_ifree_cluster(ip, tp, first_ino);
}
return 0;
}
......
......@@ -179,7 +179,7 @@ typedef struct xfs_ihash {
* Inode hashing and hash bucket locking.
*/
#define XFS_BUCKETS(mp) (37*(mp)->m_sb.sb_agcount-1)
#define XFS_IHASH(mp,ino) ((mp)->m_ihash + (((uint)ino) % (mp)->m_ihsize))
#define XFS_IHASH(mp,ino) ((mp)->m_ihash + (((uint)(ino)) % (mp)->m_ihsize))
/*
* This is the xfs inode cluster hash. This hash is used by xfs_iflush to
......@@ -362,7 +362,8 @@ void xfs_ifork_next_set(xfs_inode_t *ip, int w, int n);
#define XFS_IUIOSZ 0x0002 /* inode i/o sizes have been explicitly set */
#define XFS_IQUIESCE 0x0004 /* we have started quiescing for this inode */
#define XFS_IRECLAIM 0x0008 /* we have started reclaiming this inode */
#define XFS_IRECLAIMABLE 0x0010 /* inode can be reclaimed */
#define XFS_ISTALE 0x0010 /* inode has been staled */
#define XFS_IRECLAIMABLE 0x0020 /* inode can be reclaimed */
/*
* Flags for inode locking.
......@@ -487,7 +488,8 @@ int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, nlink_t,
struct xfs_buf **, boolean_t *, xfs_inode_t **);
void xfs_xlate_dinode_core(xfs_caddr_t, struct xfs_dinode_core *, int,
xfs_arch_t);
int xfs_ifree(struct xfs_trans *, xfs_inode_t *);
int xfs_ifree(struct xfs_trans *, xfs_inode_t *,
struct xfs_bmap_free *);
int xfs_atruncate_start(xfs_inode_t *);
void xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
......
......@@ -631,6 +631,14 @@ xfs_inode_item_trylock(
}
/* NOTREACHED */
}
/* Stale items should force out the iclog */
if (ip->i_flags & XFS_ISTALE) {
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_SHARED|XFS_IUNLOCK_NONOTIFY);
return XFS_ITEM_PINNED;
}
#ifdef DEBUG
if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
ASSERT(iip->ili_format.ilf_fields != 0);
......@@ -1074,3 +1082,11 @@ xfs_iflush_abort(
*/
xfs_ifunlock(ip);
}
void
xfs_istale_done(
xfs_buf_t *bp,
xfs_inode_log_item_t *iip)
{
xfs_iflush_abort(iip->ili_inode);
}
......@@ -189,6 +189,7 @@ int xfs_ilog_fext(int w);
void xfs_inode_item_init(struct xfs_inode *, struct xfs_mount *);
void xfs_inode_item_destroy(struct xfs_inode *);
void xfs_iflush_done(struct xfs_buf *, xfs_inode_log_item_t *);
void xfs_istale_done(struct xfs_buf *, xfs_inode_log_item_t *);
void xfs_iflush_abort(struct xfs_inode *);
#endif /* __KERNEL__ */
......
......@@ -1529,17 +1529,35 @@ xlog_recover_reorder_trans(
xlog_recover_t *trans)
{
xlog_recover_item_t *first_item, *itemq, *itemq_next;
xfs_buf_log_format_t *buf_f;
xfs_buf_log_format_v1_t *obuf_f;
ushort flags;
first_item = itemq = trans->r_itemq;
trans->r_itemq = NULL;
do {
itemq_next = itemq->ri_next;
buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr;
switch (ITEM_TYPE(itemq)) {
case XFS_LI_BUF:
flags = buf_f->blf_flags;
break;
case XFS_LI_6_1_BUF:
case XFS_LI_5_3_BUF:
xlog_recover_insert_item_frontq(&trans->r_itemq, itemq);
obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
flags = obuf_f->blf_flags;
break;
}
switch (ITEM_TYPE(itemq)) {
case XFS_LI_BUF:
case XFS_LI_6_1_BUF:
case XFS_LI_5_3_BUF:
if ((!flags & XFS_BLI_CANCEL)) {
xlog_recover_insert_item_frontq(&trans->r_itemq,
itemq);
break;
}
case XFS_LI_INODE:
case XFS_LI_6_1_INODE:
case XFS_LI_5_3_INODE:
......@@ -1668,32 +1686,16 @@ xlog_recover_do_buffer_pass1(
* made at that point.
*/
STATIC int
xlog_recover_do_buffer_pass2(
xlog_check_buffer_cancelled(
xlog_t *log,
xfs_buf_log_format_t *buf_f)
xfs_daddr_t blkno,
uint len,
ushort flags)
{
xfs_buf_cancel_t *bcp;
xfs_buf_cancel_t *prevp;
xfs_buf_cancel_t **bucket;
xfs_buf_log_format_v1_t *obuf_f;
xfs_daddr_t blkno = 0;
ushort flags = 0;
uint len = 0;
switch (buf_f->blf_type) {
case XFS_LI_BUF:
blkno = buf_f->blf_blkno;
flags = buf_f->blf_flags;
len = buf_f->blf_len;
break;
case XFS_LI_6_1_BUF:
case XFS_LI_5_3_BUF:
obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
blkno = (xfs_daddr_t) obuf_f->blf_blkno;
flags = obuf_f->blf_flags;
len = (xfs_daddr_t) obuf_f->blf_len;
break;
}
if (log->l_buf_cancel_table == NULL) {
/*
* There is nothing in the table built in pass one,
......@@ -1755,6 +1757,34 @@ xlog_recover_do_buffer_pass2(
return 0;
}
STATIC int
xlog_recover_do_buffer_pass2(
xlog_t *log,
xfs_buf_log_format_t *buf_f)
{
xfs_buf_log_format_v1_t *obuf_f;
xfs_daddr_t blkno = 0;
ushort flags = 0;
uint len = 0;
switch (buf_f->blf_type) {
case XFS_LI_BUF:
blkno = buf_f->blf_blkno;
flags = buf_f->blf_flags;
len = buf_f->blf_len;
break;
case XFS_LI_6_1_BUF:
case XFS_LI_5_3_BUF:
obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
blkno = (xfs_daddr_t) obuf_f->blf_blkno;
flags = obuf_f->blf_flags;
len = (xfs_daddr_t) obuf_f->blf_len;
break;
}
return xlog_check_buffer_cancelled(log, blkno, len, flags);
}
/*
* Perform recovery for a buffer full of inodes. In these buffers,
* the only data which should be recovered is that which corresponds
......@@ -2289,6 +2319,14 @@ xlog_recover_do_inode_trans(
imap.im_blkno = 0;
xfs_imap(log->l_mp, 0, ino, &imap, 0);
}
/*
* Inode buffers can be freed, look out for it,
* and do not replay the inode.
*/
if (xlog_check_buffer_cancelled(log, imap.im_blkno, imap.im_len, 0))
return 0;
bp = xfs_buf_read_flags(mp->m_ddev_targp, imap.im_blkno, imap.im_len,
XFS_BUF_LOCK);
if (XFS_BUF_ISERROR(bp)) {
......
......@@ -416,6 +416,7 @@ typedef struct xfs_mount {
#define XFS_MOUNT_32BITINOOPT 0x00008000 /* saved mount option state */
#define XFS_MOUNT_NOUUID 0x00010000 /* ignore uuid during mount */
#define XFS_MOUNT_NOLOGFLUSH 0x00020000
#define XFS_MOUNT_IDELETE 0x00040000 /* delete empty inode clusters*/
/*
* Default minimum read and write sizes.
......
......@@ -365,7 +365,6 @@ xfs_trans_mod_sb(
switch (field) {
case XFS_TRANS_SB_ICOUNT:
ASSERT(delta > 0);
tp->t_icount_delta += delta;
break;
case XFS_TRANS_SB_IFREE:
......
......@@ -703,6 +703,8 @@ typedef struct xfs_trans {
* the agi hash list and counters: sector size
* the inode btree entry: block size
* the on disk inode before ours in the agi hash list: inode cluster size
* the inode btree: max depth * blocksize
* the allocation btrees: 2 trees * (max depth - 1) * block size
*/
#define XFS_CALC_IFREE_LOG_RES(mp) \
((mp)->m_sb.sb_inodesize + \
......@@ -710,7 +712,10 @@ typedef struct xfs_trans {
(mp)->m_sb.sb_sectsize + \
XFS_FSB_TO_B((mp), 1) + \
MAX((__uint16_t)XFS_FSB_TO_B((mp), 1), XFS_INODE_CLUSTER_SIZE(mp)) + \
(128 * 5))
(128 * 5) + \
(128 * (2 + XFS_IALLOC_BLOCKS(mp) + XFS_IN_MAXLEVELS(mp) + \
XFS_ALLOCFREE_LOG_COUNT(mp, 1))))
#define XFS_IFREE_LOG_RES(mp) ((mp)->m_reservations.tr_ifree)
......@@ -918,6 +923,7 @@ typedef struct xfs_trans {
#define XFS_DEFAULT_LOG_COUNT 1
#define XFS_DEFAULT_PERM_LOG_COUNT 2
#define XFS_ITRUNCATE_LOG_COUNT 2
#define XFS_INACTIVE_LOG_COUNT 2
#define XFS_CREATE_LOG_COUNT 2
#define XFS_MKDIR_LOG_COUNT 3
#define XFS_SYMLINK_LOG_COUNT 3
......@@ -991,6 +997,8 @@ void xfs_trans_bhold(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_bhold_until_committed(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_binval(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_inode_buf(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
......
......@@ -931,6 +931,35 @@ xfs_trans_inode_buf(
bip->bli_format.blf_flags |= XFS_BLI_INODE_BUF;
}
/*
* This call is used to indicate that the buffer is going to
* be staled and was an inode buffer. This means it gets
* special processing during unpin - where any inodes
* associated with the buffer should be removed from ail.
* There is also special processing during recovery,
* any replay of the inodes in the buffer needs to be
* prevented as the buffer may have been reused.
*/
void
xfs_trans_stale_inode_buf(
xfs_trans_t *tp,
xfs_buf_t *bp)
{
xfs_buf_log_item_t *bip;
ASSERT(XFS_BUF_ISBUSY(bp));
ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_flags |= XFS_BLI_STALE_INODE;
bip->bli_item.li_cb = (void(*)(xfs_buf_t*,xfs_log_item_t*))
xfs_buf_iodone;
}
/*
* Mark the buffer as being one which contains newly allocated
......@@ -954,7 +983,6 @@ xfs_trans_inode_alloc_buf(
bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));
bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
}
......
......@@ -298,6 +298,8 @@ xfs_start_flags(
mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
}
if (ap->flags & XFSMNT_IDELETE)
mp->m_flags |= XFS_MOUNT_IDELETE;
/*
* no recovery flag requires a read-only mount
......@@ -1597,6 +1599,7 @@ xfs_vget(
#define MNTOPT_NOLOGFLUSH "nologflush" /* don't hard flush on log writes */
#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
#define MNTOPT_IKEEP "ikeep" /* free empty inode clusters */
int
......@@ -1611,6 +1614,8 @@ xfs_parseargs(
int dsunit, dswidth, vol_dsunit, vol_dswidth;
int iosize;
args->flags |= XFSMNT_IDELETE; /* default to on */
if (!options)
return 0;
......@@ -1715,6 +1720,8 @@ xfs_parseargs(
args->flags |= XFSMNT_NOUUID;
} else if (!strcmp(this_char, MNTOPT_NOLOGFLUSH)) {
args->flags |= XFSMNT_NOLOGFLUSH;
} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
args->flags &= ~XFSMNT_IDELETE;
} else if (!strcmp(this_char, "osyncisdsync")) {
/* no-op, this is now the default */
printk("XFS: osyncisdsync is now the default, option is deprecated.\n");
......
......@@ -1595,8 +1595,7 @@ xfs_inactive_symlink_local(
STATIC int
xfs_inactive_attrs(
xfs_inode_t *ip,
xfs_trans_t **tpp,
int *commitflags)
xfs_trans_t **tpp)
{
xfs_trans_t *tp;
int error;
......@@ -1606,9 +1605,8 @@ xfs_inactive_attrs(
tp = *tpp;
mp = ip->i_mount;
ASSERT(ip->i_d.di_forkoff != 0);
xfs_trans_commit(tp, *commitflags, NULL);
xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
*commitflags = 0;
error = xfs_attr_inactive(ip);
if (error) {
......@@ -1620,8 +1618,8 @@ xfs_inactive_attrs(
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
error = xfs_trans_reserve(tp, 0,
XFS_IFREE_LOG_RES(mp),
0, 0,
XFS_DEFAULT_LOG_COUNT);
0, XFS_TRANS_PERM_LOG_RES,
XFS_INACTIVE_LOG_COUNT);
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0);
......@@ -1694,10 +1692,12 @@ xfs_inactive(
{
xfs_inode_t *ip;
vnode_t *vp;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
int committed;
xfs_trans_t *tp;
xfs_mount_t *mp;
int error;
int commit_flags;
int truncate;
vp = BHV_TO_VNODE(bdp);
......@@ -1795,10 +1795,10 @@ xfs_inactive(
*/
error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK,
(!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0));
commit_flags = XFS_TRANS_RELEASE_LOG_RES;
if (error) {
xfs_trans_cancel(tp, commit_flags | XFS_TRANS_ABORT);
xfs_trans_cancel(tp,
XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
return (VN_INACTIVE_CACHE);
}
......@@ -1819,13 +1819,11 @@ xfs_inactive(
xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
xfs_trans_ihold(tp, ip);
commit_flags = XFS_TRANS_RELEASE_LOG_RES;
} else {
error = xfs_trans_reserve(tp, 0,
XFS_IFREE_LOG_RES(mp),
0, 0,
XFS_DEFAULT_LOG_COUNT);
0, XFS_TRANS_PERM_LOG_RES,
XFS_INACTIVE_LOG_COUNT);
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0);
......@@ -1835,7 +1833,6 @@ xfs_inactive(
xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
xfs_trans_ihold(tp, ip);
commit_flags = 0;
}
/*
......@@ -1846,7 +1843,7 @@ xfs_inactive(
* because we can't use it for xfs_attr_inactive().
*/
if (ip->i_d.di_anextents > 0) {
error = xfs_inactive_attrs(ip, &tp, &commit_flags);
error = xfs_inactive_attrs(ip, &tp);
/*
* If we got an error, the transaction is already
* cancelled, and the inode is unlocked. Just get out.
......@@ -1860,7 +1857,8 @@ xfs_inactive(
/*
* Free the inode.
*/
error = xfs_ifree(tp, ip);
XFS_BMAP_INIT(&free_list, &first_block);
error = xfs_ifree(tp, ip, &free_list);
if (error) {
/*
* If we fail to free the inode, shut down. The cancel
......@@ -1873,7 +1871,7 @@ xfs_inactive(
error, mp->m_fsname);
xfs_force_shutdown(mp, XFS_METADATA_IO_ERROR);
}
xfs_trans_cancel(tp, commit_flags | XFS_TRANS_ABORT);
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
} else {
/*
* Credit the quota account(s). The inode is gone.
......@@ -1884,7 +1882,9 @@ xfs_inactive(
* Just ignore errors at this point. There is
* nothing we can do except to try to keep going.
*/
(void) xfs_trans_commit(tp, commit_flags, NULL);
(void) xfs_bmap_finish(&tp, &free_list, first_block,
&committed);
(void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
}
/*
* Release the dquots held by inode, if any.
......
......@@ -2643,6 +2643,7 @@ xfs_buf_item_print(xfs_buf_log_item_t *blip, int summary)
"stale", /* 0x4 */
"logged", /* 0x8 */
"ialloc", /* 0x10 */
"inode_stale", /* 0x20 */
0
};
static char *blf_flags[] = {
......@@ -4811,6 +4812,7 @@ xfsidbg_xnode(xfs_inode_t *ip)
"uiosize", /* XFS_IUIOSZ */
"quiesce", /* XFS_IQUIESCE */
"reclaim", /* XFS_IRECLAIM */
"stale", /* XFS_ISTALE */
NULL
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment