Commit f2bdfda9 authored by Dave Chinner's avatar Dave Chinner

Merge branch 'xfs-4.8-misc-fixes-4' into for-next

parents dc4113d2 72ccbbe1
...@@ -356,7 +356,6 @@ xfs_da3_split( ...@@ -356,7 +356,6 @@ xfs_da3_split(
struct xfs_da_state_blk *newblk; struct xfs_da_state_blk *newblk;
struct xfs_da_state_blk *addblk; struct xfs_da_state_blk *addblk;
struct xfs_da_intnode *node; struct xfs_da_intnode *node;
struct xfs_buf *bp;
int max; int max;
int action = 0; int action = 0;
int error; int error;
...@@ -397,7 +396,9 @@ xfs_da3_split( ...@@ -397,7 +396,9 @@ xfs_da3_split(
break; break;
} }
/* /*
* Entry wouldn't fit, split the leaf again. * Entry wouldn't fit, split the leaf again. The new
* extrablk will be consumed by xfs_da3_node_split if
* the node is split.
*/ */
state->extravalid = 1; state->extravalid = 1;
if (state->inleaf) { if (state->inleaf) {
...@@ -445,6 +446,14 @@ xfs_da3_split( ...@@ -445,6 +446,14 @@ xfs_da3_split(
if (!addblk) if (!addblk)
return 0; return 0;
/*
* xfs_da3_node_split() should have consumed any extra blocks we added
* during a double leaf split in the attr fork. This is guaranteed as
* we can't be here if the attr fork only has a single leaf block.
*/
ASSERT(state->extravalid == 0 ||
state->path.blk[max].magic == XFS_DIR2_LEAFN_MAGIC);
/* /*
* Split the root node. * Split the root node.
*/ */
...@@ -457,43 +466,33 @@ xfs_da3_split( ...@@ -457,43 +466,33 @@ xfs_da3_split(
} }
/* /*
* Update pointers to the node which used to be block 0 and * Update pointers to the node which used to be block 0 and just got
* just got bumped because of the addition of a new root node. * bumped because of the addition of a new root node. Note that the
* There might be three blocks involved if a double split occurred, * original block 0 could be at any position in the list of blocks in
* and the original block 0 could be at any position in the list. * the tree.
* *
* Note: the magic numbers and sibling pointers are in the same * Note: the magic numbers and sibling pointers are in the same physical
* physical place for both v2 and v3 headers (by design). Hence it * place for both v2 and v3 headers (by design). Hence it doesn't matter
* doesn't matter which version of the xfs_da_intnode structure we use * which version of the xfs_da_intnode structure we use here as the
* here as the result will be the same using either structure. * result will be the same using either structure.
*/ */
node = oldblk->bp->b_addr; node = oldblk->bp->b_addr;
if (node->hdr.info.forw) { if (node->hdr.info.forw) {
if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) { ASSERT(be32_to_cpu(node->hdr.info.forw) == addblk->blkno);
bp = addblk->bp; node = addblk->bp->b_addr;
} else {
ASSERT(state->extravalid);
bp = state->extrablk.bp;
}
node = bp->b_addr;
node->hdr.info.back = cpu_to_be32(oldblk->blkno); node->hdr.info.back = cpu_to_be32(oldblk->blkno);
xfs_trans_log_buf(state->args->trans, bp, xfs_trans_log_buf(state->args->trans, addblk->bp,
XFS_DA_LOGRANGE(node, &node->hdr.info, XFS_DA_LOGRANGE(node, &node->hdr.info,
sizeof(node->hdr.info))); sizeof(node->hdr.info)));
} }
node = oldblk->bp->b_addr; node = oldblk->bp->b_addr;
if (node->hdr.info.back) { if (node->hdr.info.back) {
if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) { ASSERT(be32_to_cpu(node->hdr.info.back) == addblk->blkno);
bp = addblk->bp; node = addblk->bp->b_addr;
} else {
ASSERT(state->extravalid);
bp = state->extrablk.bp;
}
node = bp->b_addr;
node->hdr.info.forw = cpu_to_be32(oldblk->blkno); node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
xfs_trans_log_buf(state->args->trans, bp, xfs_trans_log_buf(state->args->trans, addblk->bp,
XFS_DA_LOGRANGE(node, &node->hdr.info, XFS_DA_LOGRANGE(node, &node->hdr.info,
sizeof(node->hdr.info))); sizeof(node->hdr.info)));
} }
addblk->bp = NULL; addblk->bp = NULL;
return 0; return 0;
......
...@@ -87,6 +87,12 @@ xfs_find_bdev_for_inode( ...@@ -87,6 +87,12 @@ xfs_find_bdev_for_inode(
* We're now finished for good with this page. Update the page state via the * We're now finished for good with this page. Update the page state via the
* associated buffer_heads, paying attention to the start and end offsets that * associated buffer_heads, paying attention to the start and end offsets that
* we need to process on the page. * we need to process on the page.
*
* Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last
* buffer in the IO. Once it does this, it is unsafe to access the bufferhead or
* the page at all, as we may be racing with memory reclaim and it can free both
* the bufferhead chain and the page as it will see the page as clean and
* unused.
*/ */
static void static void
xfs_finish_page_writeback( xfs_finish_page_writeback(
...@@ -95,8 +101,9 @@ xfs_finish_page_writeback( ...@@ -95,8 +101,9 @@ xfs_finish_page_writeback(
int error) int error)
{ {
unsigned int end = bvec->bv_offset + bvec->bv_len - 1; unsigned int end = bvec->bv_offset + bvec->bv_len - 1;
struct buffer_head *head, *bh; struct buffer_head *head, *bh, *next;
unsigned int off = 0; unsigned int off = 0;
unsigned int bsize;
ASSERT(bvec->bv_offset < PAGE_SIZE); ASSERT(bvec->bv_offset < PAGE_SIZE);
ASSERT((bvec->bv_offset & ((1 << inode->i_blkbits) - 1)) == 0); ASSERT((bvec->bv_offset & ((1 << inode->i_blkbits) - 1)) == 0);
...@@ -105,15 +112,17 @@ xfs_finish_page_writeback( ...@@ -105,15 +112,17 @@ xfs_finish_page_writeback(
bh = head = page_buffers(bvec->bv_page); bh = head = page_buffers(bvec->bv_page);
bsize = bh->b_size;
do { do {
next = bh->b_this_page;
if (off < bvec->bv_offset) if (off < bvec->bv_offset)
goto next_bh; goto next_bh;
if (off > end) if (off > end)
break; break;
bh->b_end_io(bh, !error); bh->b_end_io(bh, !error);
next_bh: next_bh:
off += bh->b_size; off += bsize;
} while ((bh = bh->b_this_page) != head); } while ((bh = next) != head);
} }
/* /*
...@@ -1040,6 +1049,20 @@ xfs_vm_releasepage( ...@@ -1040,6 +1049,20 @@ xfs_vm_releasepage(
trace_xfs_releasepage(page->mapping->host, page, 0, 0); trace_xfs_releasepage(page->mapping->host, page, 0, 0);
/*
* mm accommodates an old ext3 case where clean pages might not have had
* the dirty bit cleared. Thus, it can send actual dirty pages to
* ->releasepage() via shrink_active_list(). Conversely,
* block_invalidatepage() can send pages that are still marked dirty
* but otherwise have invalidated buffers.
*
* We've historically freed buffers on the latter. Instead, quietly
* filter out all dirty pages to avoid spurious buffer state warnings.
* This can likely be removed once shrink_active_list() is fixed.
*/
if (PageDirty(page))
return 0;
xfs_count_page_state(page, &delalloc, &unwritten); xfs_count_page_state(page, &delalloc, &unwritten);
if (WARN_ON_ONCE(delalloc)) if (WARN_ON_ONCE(delalloc))
......
...@@ -957,6 +957,7 @@ xfs_buf_item_free( ...@@ -957,6 +957,7 @@ xfs_buf_item_free(
xfs_buf_log_item_t *bip) xfs_buf_log_item_t *bip)
{ {
xfs_buf_item_free_format(bip); xfs_buf_item_free_format(bip);
kmem_free(bip->bli_item.li_lv_shadow);
kmem_zone_free(xfs_buf_item_zone, bip); kmem_zone_free(xfs_buf_item_zone, bip);
} }
......
...@@ -74,6 +74,7 @@ xfs_qm_dqdestroy( ...@@ -74,6 +74,7 @@ xfs_qm_dqdestroy(
{ {
ASSERT(list_empty(&dqp->q_lru)); ASSERT(list_empty(&dqp->q_lru));
kmem_free(dqp->q_logitem.qli_item.li_lv_shadow);
mutex_destroy(&dqp->q_qlock); mutex_destroy(&dqp->q_qlock);
XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot); XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot);
......
...@@ -370,6 +370,8 @@ xfs_qm_qoffend_logitem_committed( ...@@ -370,6 +370,8 @@ xfs_qm_qoffend_logitem_committed(
spin_lock(&ailp->xa_lock); spin_lock(&ailp->xa_lock);
xfs_trans_ail_delete(ailp, &qfs->qql_item, SHUTDOWN_LOG_IO_ERROR); xfs_trans_ail_delete(ailp, &qfs->qql_item, SHUTDOWN_LOG_IO_ERROR);
kmem_free(qfs->qql_item.li_lv_shadow);
kmem_free(lip->li_lv_shadow);
kmem_free(qfs); kmem_free(qfs);
kmem_free(qfe); kmem_free(qfe);
return (xfs_lsn_t)-1; return (xfs_lsn_t)-1;
......
...@@ -40,6 +40,7 @@ void ...@@ -40,6 +40,7 @@ void
xfs_efi_item_free( xfs_efi_item_free(
struct xfs_efi_log_item *efip) struct xfs_efi_log_item *efip)
{ {
kmem_free(efip->efi_item.li_lv_shadow);
if (efip->efi_format.efi_nextents > XFS_EFI_MAX_FAST_EXTENTS) if (efip->efi_format.efi_nextents > XFS_EFI_MAX_FAST_EXTENTS)
kmem_free(efip); kmem_free(efip);
else else
...@@ -300,6 +301,7 @@ static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip) ...@@ -300,6 +301,7 @@ static inline struct xfs_efd_log_item *EFD_ITEM(struct xfs_log_item *lip)
STATIC void STATIC void
xfs_efd_item_free(struct xfs_efd_log_item *efdp) xfs_efd_item_free(struct xfs_efd_log_item *efdp)
{ {
kmem_free(efdp->efd_item.li_lv_shadow);
if (efdp->efd_format.efd_nextents > XFS_EFD_MAX_FAST_EXTENTS) if (efdp->efd_format.efd_nextents > XFS_EFD_MAX_FAST_EXTENTS)
kmem_free(efdp); kmem_free(efdp);
else else
......
...@@ -327,7 +327,7 @@ xfs_file_dio_aio_read( ...@@ -327,7 +327,7 @@ xfs_file_dio_aio_read(
return ret; return ret;
} }
STATIC ssize_t static noinline ssize_t
xfs_file_dax_read( xfs_file_dax_read(
struct kiocb *iocb, struct kiocb *iocb,
struct iov_iter *to) struct iov_iter *to)
...@@ -706,7 +706,7 @@ xfs_file_dio_aio_write( ...@@ -706,7 +706,7 @@ xfs_file_dio_aio_write(
return ret; return ret;
} }
STATIC ssize_t static noinline ssize_t
xfs_file_dax_write( xfs_file_dax_write(
struct kiocb *iocb, struct kiocb *iocb,
struct iov_iter *from) struct iov_iter *from)
......
...@@ -651,6 +651,7 @@ void ...@@ -651,6 +651,7 @@ void
xfs_inode_item_destroy( xfs_inode_item_destroy(
xfs_inode_t *ip) xfs_inode_t *ip)
{ {
kmem_free(ip->i_itemp->ili_item.li_lv_shadow);
kmem_zone_free(xfs_ili_zone, ip->i_itemp); kmem_zone_free(xfs_ili_zone, ip->i_itemp);
} }
......
...@@ -78,6 +78,157 @@ xlog_cil_init_post_recovery( ...@@ -78,6 +78,157 @@ xlog_cil_init_post_recovery(
log->l_cilp->xc_ctx->sequence = 1; log->l_cilp->xc_ctx->sequence = 1;
} }
static inline int
xlog_cil_iovec_space(
uint niovecs)
{
return round_up((sizeof(struct xfs_log_vec) +
niovecs * sizeof(struct xfs_log_iovec)),
sizeof(uint64_t));
}
/*
* Allocate or pin log vector buffers for CIL insertion.
*
* The CIL currently uses disposable buffers for copying a snapshot of the
* modified items into the log during a push. The biggest problem with this is
* the requirement to allocate the disposable buffer during the commit if:
* a) does not exist; or
* b) it is too small
*
* If we do this allocation within xlog_cil_insert_format_items(), it is done
* under the xc_ctx_lock, which means that a CIL push cannot occur during
* the memory allocation. This means that we have a potential deadlock situation
* under low memory conditions when we have lots of dirty metadata pinned in
* the CIL and we need a CIL commit to occur to free memory.
*
* To avoid this, we need to move the memory allocation outside the
* xc_ctx_lock, but because the log vector buffers are disposable, that opens
* up a TOCTOU race condition w.r.t. the CIL committing and removing the log
* vector buffers between the check and the formatting of the item into the
* log vector buffer within the xc_ctx_lock.
*
* Because the log vector buffer needs to be unchanged during the CIL push
* process, we cannot share the buffer between the transaction commit (which
* modifies the buffer) and the CIL push context that is writing the changes
* into the log. This means skipping preallocation of buffer space is
* unreliable, but we most definitely do not want to be allocating and freeing
* buffers unnecessarily during commits when overwrites can be done safely.
*
* The simplest solution to this problem is to allocate a shadow buffer when a
* log item is committed for the second time, and then to only use this buffer
* if necessary. The buffer can remain attached to the log item until such time
* it is needed, and this is the buffer that is reallocated to match the size of
* the incoming modification. Then during the formatting of the item we can swap
* the active buffer with the new one if we can't reuse the existing buffer. We
* don't free the old buffer as it may be reused on the next modification if
* it's size is right, otherwise we'll free and reallocate it at that point.
*
* This function builds a vector for the changes in each log item in the
* transaction. It then works out the length of the buffer needed for each log
* item, allocates them and attaches the vector to the log item in preparation
* for the formatting step which occurs under the xc_ctx_lock.
*
* While this means the memory footprint goes up, it avoids the repeated
* alloc/free pattern that repeated modifications of an item would otherwise
* cause, and hence minimises the CPU overhead of such behaviour.
*/
static void
xlog_cil_alloc_shadow_bufs(
struct xlog *log,
struct xfs_trans *tp)
{
struct xfs_log_item_desc *lidp;
list_for_each_entry(lidp, &tp->t_items, lid_trans) {
struct xfs_log_item *lip = lidp->lid_item;
struct xfs_log_vec *lv;
int niovecs = 0;
int nbytes = 0;
int buf_size;
bool ordered = false;
/* Skip items which aren't dirty in this transaction. */
if (!(lidp->lid_flags & XFS_LID_DIRTY))
continue;
/* get number of vecs and size of data to be stored */
lip->li_ops->iop_size(lip, &niovecs, &nbytes);
/*
* Ordered items need to be tracked but we do not wish to write
* them. We need a logvec to track the object, but we do not
* need an iovec or buffer to be allocated for copying data.
*/
if (niovecs == XFS_LOG_VEC_ORDERED) {
ordered = true;
niovecs = 0;
nbytes = 0;
}
/*
* We 64-bit align the length of each iovec so that the start
* of the next one is naturally aligned. We'll need to
* account for that slack space here. Then round nbytes up
* to 64-bit alignment so that the initial buffer alignment is
* easy to calculate and verify.
*/
nbytes += niovecs * sizeof(uint64_t);
nbytes = round_up(nbytes, sizeof(uint64_t));
/*
* The data buffer needs to start 64-bit aligned, so round up
* that space to ensure we can align it appropriately and not
* overrun the buffer.
*/
buf_size = nbytes + xlog_cil_iovec_space(niovecs);
/*
* if we have no shadow buffer, or it is too small, we need to
* reallocate it.
*/
if (!lip->li_lv_shadow ||
buf_size > lip->li_lv_shadow->lv_size) {
/*
* We free and allocate here as a realloc would copy
* unecessary data. We don't use kmem_zalloc() for the
* same reason - we don't need to zero the data area in
* the buffer, only the log vector header and the iovec
* storage.
*/
kmem_free(lip->li_lv_shadow);
lv = kmem_alloc(buf_size, KM_SLEEP|KM_NOFS);
memset(lv, 0, xlog_cil_iovec_space(niovecs));
lv->lv_item = lip;
lv->lv_size = buf_size;
if (ordered)
lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
else
lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];
lip->li_lv_shadow = lv;
} else {
/* same or smaller, optimise common overwrite case */
lv = lip->li_lv_shadow;
if (ordered)
lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
else
lv->lv_buf_len = 0;
lv->lv_bytes = 0;
lv->lv_next = NULL;
}
/* Ensure the lv is set up according to ->iop_size */
lv->lv_niovecs = niovecs;
/* The allocated data region lies beyond the iovec region */
lv->lv_buf = (char *)lv + xlog_cil_iovec_space(niovecs);
}
}
/* /*
* Prepare the log item for insertion into the CIL. Calculate the difference in * Prepare the log item for insertion into the CIL. Calculate the difference in
* log space and vectors it will consume, and if it is a new item pin it as * log space and vectors it will consume, and if it is a new item pin it as
...@@ -100,16 +251,19 @@ xfs_cil_prepare_item( ...@@ -100,16 +251,19 @@ xfs_cil_prepare_item(
/* /*
* If there is no old LV, this is the first time we've seen the item in * If there is no old LV, this is the first time we've seen the item in
* this CIL context and so we need to pin it. If we are replacing the * this CIL context and so we need to pin it. If we are replacing the
* old_lv, then remove the space it accounts for and free it. * old_lv, then remove the space it accounts for and make it the shadow
* buffer for later freeing. In both cases we are now switching to the
* shadow buffer, so update the the pointer to it appropriately.
*/ */
if (!old_lv) if (!old_lv) {
lv->lv_item->li_ops->iop_pin(lv->lv_item); lv->lv_item->li_ops->iop_pin(lv->lv_item);
else if (old_lv != lv) { lv->lv_item->li_lv_shadow = NULL;
} else if (old_lv != lv) {
ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED); ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
*diff_len -= old_lv->lv_bytes; *diff_len -= old_lv->lv_bytes;
*diff_iovecs -= old_lv->lv_niovecs; *diff_iovecs -= old_lv->lv_niovecs;
kmem_free(old_lv); lv->lv_item->li_lv_shadow = old_lv;
} }
/* attach new log vector to log item */ /* attach new log vector to log item */
...@@ -133,11 +287,13 @@ xfs_cil_prepare_item( ...@@ -133,11 +287,13 @@ xfs_cil_prepare_item(
* write it out asynchronously without needing to relock the object that was * write it out asynchronously without needing to relock the object that was
* modified at the time it gets written into the iclog. * modified at the time it gets written into the iclog.
* *
* This function builds a vector for the changes in each log item in the * This function takes the prepared log vectors attached to each log item, and
* transaction. It then works out the length of the buffer needed for each log * formats the changes into the log vector buffer. The buffer it uses is
* item, allocates them and formats the vector for the item into the buffer. * dependent on the current state of the vector in the CIL - the shadow lv is
* The buffer is then attached to the log item are then inserted into the * guaranteed to be large enough for the current modification, but we will only
* Committed Item List for tracking until the next checkpoint is written out. * use that if we can't reuse the existing lv. If we can't reuse the existing
* lv, then simple swap it out for the shadow lv. We don't free it - that is
* done lazily either by th enext modification or the freeing of the log item.
* *
* We don't set up region headers during this process; we simply copy the * We don't set up region headers during this process; we simply copy the
* regions into the flat buffer. We can do this because we still have to do a * regions into the flat buffer. We can do this because we still have to do a
...@@ -170,59 +326,29 @@ xlog_cil_insert_format_items( ...@@ -170,59 +326,29 @@ xlog_cil_insert_format_items(
list_for_each_entry(lidp, &tp->t_items, lid_trans) { list_for_each_entry(lidp, &tp->t_items, lid_trans) {
struct xfs_log_item *lip = lidp->lid_item; struct xfs_log_item *lip = lidp->lid_item;
struct xfs_log_vec *lv; struct xfs_log_vec *lv;
struct xfs_log_vec *old_lv; struct xfs_log_vec *old_lv = NULL;
int niovecs = 0; struct xfs_log_vec *shadow;
int nbytes = 0;
int buf_size;
bool ordered = false; bool ordered = false;
/* Skip items which aren't dirty in this transaction. */ /* Skip items which aren't dirty in this transaction. */
if (!(lidp->lid_flags & XFS_LID_DIRTY)) if (!(lidp->lid_flags & XFS_LID_DIRTY))
continue; continue;
/* get number of vecs and size of data to be stored */
lip->li_ops->iop_size(lip, &niovecs, &nbytes);
/* Skip items that do not have any vectors for writing */
if (!niovecs)
continue;
/* /*
* Ordered items need to be tracked but we do not wish to write * The formatting size information is already attached to
* them. We need a logvec to track the object, but we do not * the shadow lv on the log item.
* need an iovec or buffer to be allocated for copying data.
*/ */
if (niovecs == XFS_LOG_VEC_ORDERED) { shadow = lip->li_lv_shadow;
if (shadow->lv_buf_len == XFS_LOG_VEC_ORDERED)
ordered = true; ordered = true;
niovecs = 0;
nbytes = 0;
}
/* /* Skip items that do not have any vectors for writing */
* We 64-bit align the length of each iovec so that the start if (!shadow->lv_niovecs && !ordered)
* of the next one is naturally aligned. We'll need to continue;
* account for that slack space here. Then round nbytes up
* to 64-bit alignment so that the initial buffer alignment is
* easy to calculate and verify.
*/
nbytes += niovecs * sizeof(uint64_t);
nbytes = round_up(nbytes, sizeof(uint64_t));
/* grab the old item if it exists for reservation accounting */
old_lv = lip->li_lv;
/*
* The data buffer needs to start 64-bit aligned, so round up
* that space to ensure we can align it appropriately and not
* overrun the buffer.
*/
buf_size = nbytes +
round_up((sizeof(struct xfs_log_vec) +
niovecs * sizeof(struct xfs_log_iovec)),
sizeof(uint64_t));
/* compare to existing item size */ /* compare to existing item size */
if (lip->li_lv && buf_size <= lip->li_lv->lv_size) { old_lv = lip->li_lv;
if (lip->li_lv && shadow->lv_size <= lip->li_lv->lv_size) {
/* same or smaller, optimise common overwrite case */ /* same or smaller, optimise common overwrite case */
lv = lip->li_lv; lv = lip->li_lv;
lv->lv_next = NULL; lv->lv_next = NULL;
...@@ -236,32 +362,29 @@ xlog_cil_insert_format_items( ...@@ -236,32 +362,29 @@ xlog_cil_insert_format_items(
*/ */
*diff_iovecs -= lv->lv_niovecs; *diff_iovecs -= lv->lv_niovecs;
*diff_len -= lv->lv_bytes; *diff_len -= lv->lv_bytes;
/* Ensure the lv is set up according to ->iop_size */
lv->lv_niovecs = shadow->lv_niovecs;
/* reset the lv buffer information for new formatting */
lv->lv_buf_len = 0;
lv->lv_bytes = 0;
lv->lv_buf = (char *)lv +
xlog_cil_iovec_space(lv->lv_niovecs);
} else { } else {
/* allocate new data chunk */ /* switch to shadow buffer! */
lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS); lv = shadow;
lv->lv_item = lip; lv->lv_item = lip;
lv->lv_size = buf_size;
if (ordered) { if (ordered) {
/* track as an ordered logvec */ /* track as an ordered logvec */
ASSERT(lip->li_lv == NULL); ASSERT(lip->li_lv == NULL);
lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
goto insert; goto insert;
} }
lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];
} }
/* Ensure the lv is set up according to ->iop_size */
lv->lv_niovecs = niovecs;
/* The allocated data region lies beyond the iovec region */
lv->lv_buf_len = 0;
lv->lv_bytes = 0;
lv->lv_buf = (char *)lv + buf_size - nbytes;
ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t))); ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t)));
lip->li_ops->iop_format(lip, lv); lip->li_ops->iop_format(lip, lv);
insert: insert:
ASSERT(lv->lv_buf_len <= nbytes);
xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs); xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs);
} }
} }
...@@ -783,6 +906,13 @@ xfs_log_commit_cil( ...@@ -783,6 +906,13 @@ xfs_log_commit_cil(
struct xlog *log = mp->m_log; struct xlog *log = mp->m_log;
struct xfs_cil *cil = log->l_cilp; struct xfs_cil *cil = log->l_cilp;
/*
* Do all necessary memory allocation before we lock the CIL.
* This ensures the allocation does not deadlock with a CIL
* push in memory reclaim (e.g. from kswapd).
*/
xlog_cil_alloc_shadow_bufs(log, tp);
/* lock out background commit */ /* lock out background commit */
down_read(&cil->xc_ctx_lock); down_read(&cil->xc_ctx_lock);
......
...@@ -1573,10 +1573,6 @@ xfs_fs_fill_super( ...@@ -1573,10 +1573,6 @@ xfs_fs_fill_super(
} }
} }
if (xfs_sb_version_hassparseinodes(&mp->m_sb))
xfs_alert(mp,
"EXPERIMENTAL sparse inode feature enabled. Use at your own risk!");
error = xfs_mountfs(mp); error = xfs_mountfs(mp);
if (error) if (error)
goto out_filestream_unmount; goto out_filestream_unmount;
......
...@@ -52,6 +52,7 @@ typedef struct xfs_log_item { ...@@ -52,6 +52,7 @@ typedef struct xfs_log_item {
/* delayed logging */ /* delayed logging */
struct list_head li_cil; /* CIL pointers */ struct list_head li_cil; /* CIL pointers */
struct xfs_log_vec *li_lv; /* active log vector */ struct xfs_log_vec *li_lv; /* active log vector */
struct xfs_log_vec *li_lv_shadow; /* standby vector */
xfs_lsn_t li_seq; /* CIL commit seq */ xfs_lsn_t li_seq; /* CIL commit seq */
} xfs_log_item_t; } xfs_log_item_t;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment