Commit 5e116e99 authored by Dave Chinner's avatar Dave Chinner

Merge branch 'guilt/xlog-intent-whiteouts' into xfs-5.19-for-next

parents 9cf4f616 0d227466
...@@ -186,7 +186,7 @@ static const struct xfs_defer_op_type *defer_op_types[] = { ...@@ -186,7 +186,7 @@ static const struct xfs_defer_op_type *defer_op_types[] = {
[XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type, [XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type,
}; };
static void static bool
xfs_defer_create_intent( xfs_defer_create_intent(
struct xfs_trans *tp, struct xfs_trans *tp,
struct xfs_defer_pending *dfp, struct xfs_defer_pending *dfp,
...@@ -197,6 +197,7 @@ xfs_defer_create_intent( ...@@ -197,6 +197,7 @@ xfs_defer_create_intent(
if (!dfp->dfp_intent) if (!dfp->dfp_intent)
dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work, dfp->dfp_intent = ops->create_intent(tp, &dfp->dfp_work,
dfp->dfp_count, sort); dfp->dfp_count, sort);
return dfp->dfp_intent != NULL;
} }
/* /*
...@@ -204,16 +205,18 @@ xfs_defer_create_intent( ...@@ -204,16 +205,18 @@ xfs_defer_create_intent(
* associated extents, then add the entire intake list to the end of * associated extents, then add the entire intake list to the end of
* the pending list. * the pending list.
*/ */
STATIC void static bool
xfs_defer_create_intents( xfs_defer_create_intents(
struct xfs_trans *tp) struct xfs_trans *tp)
{ {
struct xfs_defer_pending *dfp; struct xfs_defer_pending *dfp;
bool ret = false;
list_for_each_entry(dfp, &tp->t_dfops, dfp_list) { list_for_each_entry(dfp, &tp->t_dfops, dfp_list) {
trace_xfs_defer_create_intent(tp->t_mountp, dfp); trace_xfs_defer_create_intent(tp->t_mountp, dfp);
xfs_defer_create_intent(tp, dfp, true); ret |= xfs_defer_create_intent(tp, dfp, true);
} }
return ret;
} }
/* Abort all the intents that were committed. */ /* Abort all the intents that were committed. */
...@@ -487,7 +490,7 @@ int ...@@ -487,7 +490,7 @@ int
xfs_defer_finish_noroll( xfs_defer_finish_noroll(
struct xfs_trans **tp) struct xfs_trans **tp)
{ {
struct xfs_defer_pending *dfp; struct xfs_defer_pending *dfp = NULL;
int error = 0; int error = 0;
LIST_HEAD(dop_pending); LIST_HEAD(dop_pending);
...@@ -506,17 +509,20 @@ xfs_defer_finish_noroll( ...@@ -506,17 +509,20 @@ xfs_defer_finish_noroll(
* of time that any one intent item can stick around in memory, * of time that any one intent item can stick around in memory,
* pinning the log tail. * pinning the log tail.
*/ */
xfs_defer_create_intents(*tp); bool has_intents = xfs_defer_create_intents(*tp);
list_splice_init(&(*tp)->t_dfops, &dop_pending); list_splice_init(&(*tp)->t_dfops, &dop_pending);
if (has_intents || dfp) {
error = xfs_defer_trans_roll(tp); error = xfs_defer_trans_roll(tp);
if (error) if (error)
goto out_shutdown; goto out_shutdown;
/* Possibly relog intent items to keep the log moving. */ /* Relog intent items to keep the log moving. */
error = xfs_defer_relog(tp, &dop_pending); error = xfs_defer_relog(tp, &dop_pending);
if (error) if (error)
goto out_shutdown; goto out_shutdown;
}
dfp = list_first_entry(&dop_pending, struct xfs_defer_pending, dfp = list_first_entry(&dop_pending, struct xfs_defer_pending,
dfp_list); dfp_list);
......
...@@ -36,7 +36,7 @@ xfs_init_local_fork( ...@@ -36,7 +36,7 @@ xfs_init_local_fork(
int64_t size) int64_t size)
{ {
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
int mem_size = size, real_size = 0; int mem_size = size;
bool zero_terminate; bool zero_terminate;
/* /*
...@@ -50,8 +50,7 @@ xfs_init_local_fork( ...@@ -50,8 +50,7 @@ xfs_init_local_fork(
mem_size++; mem_size++;
if (size) { if (size) {
real_size = roundup(mem_size, 4); ifp->if_u1.if_data = kmem_alloc(mem_size, KM_NOFS);
ifp->if_u1.if_data = kmem_alloc(real_size, KM_NOFS);
memcpy(ifp->if_u1.if_data, data, size); memcpy(ifp->if_u1.if_data, data, size);
if (zero_terminate) if (zero_terminate)
ifp->if_u1.if_data[size] = '\0'; ifp->if_u1.if_data[size] = '\0';
...@@ -497,12 +496,7 @@ xfs_idata_realloc( ...@@ -497,12 +496,7 @@ xfs_idata_realloc(
return; return;
} }
/* ifp->if_u1.if_data = krealloc(ifp->if_u1.if_data, new_size,
* For inline data, the underlying buffer must be a multiple of 4 bytes
* in size so that it can be logged and stay on word boundaries.
* We enforce that here.
*/
ifp->if_u1.if_data = krealloc(ifp->if_u1.if_data, roundup(new_size, 4),
GFP_NOFS | __GFP_NOFAIL); GFP_NOFS | __GFP_NOFAIL);
ifp->if_bytes = new_size; ifp->if_bytes = new_size;
} }
......
...@@ -54,13 +54,23 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp, ...@@ -54,13 +54,23 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp,
/* /*
* Values for t_flags. * Values for t_flags.
*/ */
#define XFS_TRANS_DIRTY 0x01 /* something needs to be logged */ /* Transaction needs to be logged */
#define XFS_TRANS_SB_DIRTY 0x02 /* superblock is modified */ #define XFS_TRANS_DIRTY (1u << 0)
#define XFS_TRANS_PERM_LOG_RES 0x04 /* xact took a permanent log res */ /* Superblock is dirty and needs to be logged */
#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ #define XFS_TRANS_SB_DIRTY (1u << 1)
#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ /* Transaction took a permanent log reservation */
#define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */ #define XFS_TRANS_PERM_LOG_RES (1u << 2)
#define XFS_TRANS_RES_FDBLKS 0x80 /* reserve newly freed blocks */ /* Synchronous transaction commit needed */
#define XFS_TRANS_SYNC (1u << 3)
/* Transaction can use reserve block pool */
#define XFS_TRANS_RESERVE (1u << 4)
/* Transaction should avoid VFS level superblock write accounting */
#define XFS_TRANS_NO_WRITECOUNT (1u << 5)
/* Transaction has freed blocks returned to it's reservation */
#define XFS_TRANS_RES_FDBLKS (1u << 6)
/* Transaction contains an intent done log item */
#define XFS_TRANS_HAS_INTENT_DONE (1u << 7)
/* /*
* LOWMODE is used by the allocator to activate the lowspace algorithm - when * LOWMODE is used by the allocator to activate the lowspace algorithm - when
* free space is running low the extent allocator may choose to allocate an * free space is running low the extent allocator may choose to allocate an
......
...@@ -39,6 +39,7 @@ STATIC void ...@@ -39,6 +39,7 @@ STATIC void
xfs_bui_item_free( xfs_bui_item_free(
struct xfs_bui_log_item *buip) struct xfs_bui_log_item *buip)
{ {
kmem_free(buip->bui_item.li_lv_shadow);
kmem_cache_free(xfs_bui_cache, buip); kmem_cache_free(xfs_bui_cache, buip);
} }
...@@ -54,10 +55,11 @@ xfs_bui_release( ...@@ -54,10 +55,11 @@ xfs_bui_release(
struct xfs_bui_log_item *buip) struct xfs_bui_log_item *buip)
{ {
ASSERT(atomic_read(&buip->bui_refcount) > 0); ASSERT(atomic_read(&buip->bui_refcount) > 0);
if (atomic_dec_and_test(&buip->bui_refcount)) { if (!atomic_dec_and_test(&buip->bui_refcount))
xfs_trans_ail_delete(&buip->bui_item, SHUTDOWN_LOG_IO_ERROR); return;
xfs_trans_ail_delete(&buip->bui_item, 0);
xfs_bui_item_free(buip); xfs_bui_item_free(buip);
}
} }
...@@ -198,14 +200,24 @@ xfs_bud_item_release( ...@@ -198,14 +200,24 @@ xfs_bud_item_release(
struct xfs_bud_log_item *budp = BUD_ITEM(lip); struct xfs_bud_log_item *budp = BUD_ITEM(lip);
xfs_bui_release(budp->bud_buip); xfs_bui_release(budp->bud_buip);
kmem_free(budp->bud_item.li_lv_shadow);
kmem_cache_free(xfs_bud_cache, budp); kmem_cache_free(xfs_bud_cache, budp);
} }
static struct xfs_log_item *
xfs_bud_item_intent(
struct xfs_log_item *lip)
{
return &BUD_ITEM(lip)->bud_buip->bui_item;
}
static const struct xfs_item_ops xfs_bud_item_ops = { static const struct xfs_item_ops xfs_bud_item_ops = {
.flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED |
XFS_ITEM_INTENT_DONE,
.iop_size = xfs_bud_item_size, .iop_size = xfs_bud_item_size,
.iop_format = xfs_bud_item_format, .iop_format = xfs_bud_item_format,
.iop_release = xfs_bud_item_release, .iop_release = xfs_bud_item_release,
.iop_intent = xfs_bud_item_intent,
}; };
static struct xfs_bud_log_item * static struct xfs_bud_log_item *
...@@ -254,7 +266,7 @@ xfs_trans_log_finish_bmap_update( ...@@ -254,7 +266,7 @@ xfs_trans_log_finish_bmap_update(
* 1.) releases the BUI and frees the BUD * 1.) releases the BUI and frees the BUD
* 2.) shuts down the filesystem * 2.) shuts down the filesystem
*/ */
tp->t_flags |= XFS_TRANS_DIRTY; tp->t_flags |= XFS_TRANS_DIRTY | XFS_TRANS_HAS_INTENT_DONE;
set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags); set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
return error; return error;
...@@ -586,6 +598,7 @@ xfs_bui_item_relog( ...@@ -586,6 +598,7 @@ xfs_bui_item_relog(
} }
static const struct xfs_item_ops xfs_bui_item_ops = { static const struct xfs_item_ops xfs_bui_item_ops = {
.flags = XFS_ITEM_INTENT,
.iop_size = xfs_bui_item_size, .iop_size = xfs_bui_item_size,
.iop_format = xfs_bui_item_format, .iop_format = xfs_bui_item_format,
.iop_unpin = xfs_bui_item_unpin, .iop_unpin = xfs_bui_item_unpin,
......
...@@ -58,10 +58,11 @@ xfs_efi_release( ...@@ -58,10 +58,11 @@ xfs_efi_release(
struct xfs_efi_log_item *efip) struct xfs_efi_log_item *efip)
{ {
ASSERT(atomic_read(&efip->efi_refcount) > 0); ASSERT(atomic_read(&efip->efi_refcount) > 0);
if (atomic_dec_and_test(&efip->efi_refcount)) { if (!atomic_dec_and_test(&efip->efi_refcount))
xfs_trans_ail_delete(&efip->efi_item, SHUTDOWN_LOG_IO_ERROR); return;
xfs_trans_ail_delete(&efip->efi_item, 0);
xfs_efi_item_free(efip); xfs_efi_item_free(efip);
}
} }
/* /*
...@@ -306,11 +307,20 @@ xfs_efd_item_release( ...@@ -306,11 +307,20 @@ xfs_efd_item_release(
xfs_efd_item_free(efdp); xfs_efd_item_free(efdp);
} }
static struct xfs_log_item *
xfs_efd_item_intent(
struct xfs_log_item *lip)
{
return &EFD_ITEM(lip)->efd_efip->efi_item;
}
static const struct xfs_item_ops xfs_efd_item_ops = { static const struct xfs_item_ops xfs_efd_item_ops = {
.flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED |
XFS_ITEM_INTENT_DONE,
.iop_size = xfs_efd_item_size, .iop_size = xfs_efd_item_size,
.iop_format = xfs_efd_item_format, .iop_format = xfs_efd_item_format,
.iop_release = xfs_efd_item_release, .iop_release = xfs_efd_item_release,
.iop_intent = xfs_efd_item_intent,
}; };
/* /*
...@@ -380,7 +390,7 @@ xfs_trans_free_extent( ...@@ -380,7 +390,7 @@ xfs_trans_free_extent(
* 1.) releases the EFI and frees the EFD * 1.) releases the EFI and frees the EFD
* 2.) shuts down the filesystem * 2.) shuts down the filesystem
*/ */
tp->t_flags |= XFS_TRANS_DIRTY; tp->t_flags |= XFS_TRANS_DIRTY | XFS_TRANS_HAS_INTENT_DONE;
set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags); set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
next_extent = efdp->efd_next_extent; next_extent = efdp->efd_next_extent;
...@@ -688,6 +698,7 @@ xfs_efi_item_relog( ...@@ -688,6 +698,7 @@ xfs_efi_item_relog(
} }
static const struct xfs_item_ops xfs_efi_item_ops = { static const struct xfs_item_ops xfs_efi_item_ops = {
.flags = XFS_ITEM_INTENT,
.iop_size = xfs_efi_item_size, .iop_size = xfs_efi_item_size,
.iop_format = xfs_efi_item_format, .iop_format = xfs_efi_item_format,
.iop_unpin = xfs_efi_item_unpin, .iop_unpin = xfs_efi_item_unpin,
......
...@@ -63,6 +63,7 @@ STATIC void ...@@ -63,6 +63,7 @@ STATIC void
xfs_icreate_item_release( xfs_icreate_item_release(
struct xfs_log_item *lip) struct xfs_log_item *lip)
{ {
kmem_free(ICR_ITEM(lip)->ic_item.li_lv_shadow);
kmem_cache_free(xfs_icreate_cache, ICR_ITEM(lip)); kmem_cache_free(xfs_icreate_cache, ICR_ITEM(lip));
} }
......
...@@ -71,7 +71,7 @@ xfs_inode_item_data_fork_size( ...@@ -71,7 +71,7 @@ xfs_inode_item_data_fork_size(
case XFS_DINODE_FMT_LOCAL: case XFS_DINODE_FMT_LOCAL:
if ((iip->ili_fields & XFS_ILOG_DDATA) && if ((iip->ili_fields & XFS_ILOG_DDATA) &&
ip->i_df.if_bytes > 0) { ip->i_df.if_bytes > 0) {
*nbytes += roundup(ip->i_df.if_bytes, 4); *nbytes += xlog_calc_iovec_len(ip->i_df.if_bytes);
*nvecs += 1; *nvecs += 1;
} }
break; break;
...@@ -112,7 +112,7 @@ xfs_inode_item_attr_fork_size( ...@@ -112,7 +112,7 @@ xfs_inode_item_attr_fork_size(
case XFS_DINODE_FMT_LOCAL: case XFS_DINODE_FMT_LOCAL:
if ((iip->ili_fields & XFS_ILOG_ADATA) && if ((iip->ili_fields & XFS_ILOG_ADATA) &&
ip->i_afp->if_bytes > 0) { ip->i_afp->if_bytes > 0) {
*nbytes += roundup(ip->i_afp->if_bytes, 4); *nbytes += xlog_calc_iovec_len(ip->i_afp->if_bytes);
*nvecs += 1; *nvecs += 1;
} }
break; break;
...@@ -204,17 +204,12 @@ xfs_inode_item_format_data_fork( ...@@ -204,17 +204,12 @@ xfs_inode_item_format_data_fork(
~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV); ~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT | XFS_ILOG_DEV);
if ((iip->ili_fields & XFS_ILOG_DDATA) && if ((iip->ili_fields & XFS_ILOG_DDATA) &&
ip->i_df.if_bytes > 0) { ip->i_df.if_bytes > 0) {
/*
* Round i_bytes up to a word boundary.
* The underlying memory is guaranteed
* to be there by xfs_idata_realloc().
*/
data_bytes = roundup(ip->i_df.if_bytes, 4);
ASSERT(ip->i_df.if_u1.if_data != NULL); ASSERT(ip->i_df.if_u1.if_data != NULL);
ASSERT(ip->i_disk_size > 0); ASSERT(ip->i_disk_size > 0);
xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL, xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL,
ip->i_df.if_u1.if_data, data_bytes); ip->i_df.if_u1.if_data,
ilf->ilf_dsize = (unsigned)data_bytes; ip->i_df.if_bytes);
ilf->ilf_dsize = (unsigned)ip->i_df.if_bytes;
ilf->ilf_size++; ilf->ilf_size++;
} else { } else {
iip->ili_fields &= ~XFS_ILOG_DDATA; iip->ili_fields &= ~XFS_ILOG_DDATA;
...@@ -288,17 +283,11 @@ xfs_inode_item_format_attr_fork( ...@@ -288,17 +283,11 @@ xfs_inode_item_format_attr_fork(
if ((iip->ili_fields & XFS_ILOG_ADATA) && if ((iip->ili_fields & XFS_ILOG_ADATA) &&
ip->i_afp->if_bytes > 0) { ip->i_afp->if_bytes > 0) {
/*
* Round i_bytes up to a word boundary.
* The underlying memory is guaranteed
* to be there by xfs_idata_realloc().
*/
data_bytes = roundup(ip->i_afp->if_bytes, 4);
ASSERT(ip->i_afp->if_u1.if_data != NULL); ASSERT(ip->i_afp->if_u1.if_data != NULL);
xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL, xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL,
ip->i_afp->if_u1.if_data, ip->i_afp->if_u1.if_data,
data_bytes); ip->i_afp->if_bytes);
ilf->ilf_asize = (unsigned)data_bytes; ilf->ilf_asize = (unsigned)ip->i_afp->if_bytes;
ilf->ilf_size++; ilf->ilf_size++;
} else { } else {
iip->ili_fields &= ~XFS_ILOG_ADATA; iip->ili_fields &= ~XFS_ILOG_ADATA;
......
...@@ -462,7 +462,7 @@ xlog_recover_inode_commit_pass2( ...@@ -462,7 +462,7 @@ xlog_recover_inode_commit_pass2(
ASSERT(in_f->ilf_size <= 4); ASSERT(in_f->ilf_size <= 4);
ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK)); ASSERT((in_f->ilf_size == 3) || (fields & XFS_ILOG_AFORK));
ASSERT(!(fields & XFS_ILOG_DFORK) || ASSERT(!(fields & XFS_ILOG_DFORK) ||
(len == in_f->ilf_dsize)); (len == xlog_calc_iovec_len(in_f->ilf_dsize)));
switch (fields & XFS_ILOG_DFORK) { switch (fields & XFS_ILOG_DFORK) {
case XFS_ILOG_DDATA: case XFS_ILOG_DDATA:
...@@ -497,7 +497,7 @@ xlog_recover_inode_commit_pass2( ...@@ -497,7 +497,7 @@ xlog_recover_inode_commit_pass2(
} }
len = item->ri_buf[attr_index].i_len; len = item->ri_buf[attr_index].i_len;
src = item->ri_buf[attr_index].i_addr; src = item->ri_buf[attr_index].i_addr;
ASSERT(len == in_f->ilf_asize); ASSERT(len == xlog_calc_iovec_len(in_f->ilf_asize));
switch (in_f->ilf_fields & XFS_ILOG_AFORK) { switch (in_f->ilf_fields & XFS_ILOG_AFORK) {
case XFS_ILOG_ADATA: case XFS_ILOG_ADATA:
......
...@@ -21,23 +21,59 @@ struct xfs_log_vec { ...@@ -21,23 +21,59 @@ struct xfs_log_vec {
#define XFS_LOG_VEC_ORDERED (-1) #define XFS_LOG_VEC_ORDERED (-1)
/*
* Calculate the log iovec length for a given user buffer length. Intended to be
* used by ->iop_size implementations when sizing buffers of arbitrary
* alignments.
*/
static inline int
xlog_calc_iovec_len(int len)
{
return roundup(len, sizeof(uint32_t));
}
void *xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, void *xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
uint type); uint type);
static inline void static inline void
xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len) xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec,
int data_len)
{ {
struct xlog_op_header *oph = vec->i_addr; struct xlog_op_header *oph = vec->i_addr;
int len;
/*
* Always round up the length to the correct alignment so callers don't
* need to know anything about this log vec layout requirement. This
* means we have to zero the area the data to be written does not cover.
* This is complicated by fact the payload region is offset into the
* logvec region by the opheader that tracks the payload.
*/
len = xlog_calc_iovec_len(data_len);
if (len - data_len != 0) {
char *buf = vec->i_addr + sizeof(struct xlog_op_header);
/* opheader tracks payload length, logvec tracks region length */ memset(buf + data_len, 0, len - data_len);
}
/*
* The opheader tracks aligned payload length, whilst the logvec tracks
* the overall region length.
*/
oph->oh_len = cpu_to_be32(len); oph->oh_len = cpu_to_be32(len);
len += sizeof(struct xlog_op_header); len += sizeof(struct xlog_op_header);
lv->lv_buf_len += len; lv->lv_buf_len += len;
lv->lv_bytes += len; lv->lv_bytes += len;
vec->i_len = len; vec->i_len = len;
/* Catch buffer overruns */
ASSERT((void *)lv->lv_buf + lv->lv_bytes <= (void *)lv + lv->lv_size);
} }
/*
* Copy the amount of data requested by the caller into a new log iovec.
*/
static inline void * static inline void *
xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
uint type, void *data, int len) uint type, void *data, int len)
......
...@@ -47,6 +47,38 @@ xlog_cil_ticket_alloc( ...@@ -47,6 +47,38 @@ xlog_cil_ticket_alloc(
return tic; return tic;
} }
/*
* Check if the current log item was first committed in this sequence.
* We can't rely on just the log item being in the CIL, we have to check
* the recorded commit sequence number.
*
* Note: for this to be used in a non-racy manner, it has to be called with
* CIL flushing locked out. As a result, it should only be used during the
* transaction commit process when deciding what to format into the item.
*/
static bool
xlog_item_in_current_chkpt(
struct xfs_cil *cil,
struct xfs_log_item *lip)
{
if (list_empty(&lip->li_cil))
return false;
/*
* li_seq is written on the first commit of a log item to record the
* first checkpoint it is written to. Hence if it is different to the
* current sequence, we're in a new checkpoint.
*/
return lip->li_seq == READ_ONCE(cil->xc_current_sequence);
}
bool
xfs_log_item_in_current_chkpt(
struct xfs_log_item *lip)
{
return xlog_item_in_current_chkpt(lip->li_log->l_cilp, lip);
}
/* /*
* Unavoidable forward declaration - xlog_cil_push_work() calls * Unavoidable forward declaration - xlog_cil_push_work() calls
* xlog_cil_ctx_alloc() itself. * xlog_cil_ctx_alloc() itself.
...@@ -444,7 +476,8 @@ xlog_cil_insert_format_items( ...@@ -444,7 +476,8 @@ xlog_cil_insert_format_items(
static void static void
xlog_cil_insert_items( xlog_cil_insert_items(
struct xlog *log, struct xlog *log,
struct xfs_trans *tp) struct xfs_trans *tp,
uint32_t released_space)
{ {
struct xfs_cil *cil = log->l_cilp; struct xfs_cil *cil = log->l_cilp;
struct xfs_cil_ctx *ctx = cil->xc_ctx; struct xfs_cil_ctx *ctx = cil->xc_ctx;
...@@ -493,7 +526,9 @@ xlog_cil_insert_items( ...@@ -493,7 +526,9 @@ xlog_cil_insert_items(
ASSERT(tp->t_ticket->t_curr_res >= len); ASSERT(tp->t_ticket->t_curr_res >= len);
} }
tp->t_ticket->t_curr_res -= len; tp->t_ticket->t_curr_res -= len;
tp->t_ticket->t_curr_res += released_space;
ctx->space_used += len; ctx->space_used += len;
ctx->space_used -= released_space;
/* /*
* If we've overrun the reservation, dump the tx details before we move * If we've overrun the reservation, dump the tx details before we move
...@@ -934,6 +969,65 @@ xlog_cil_build_trans_hdr( ...@@ -934,6 +969,65 @@ xlog_cil_build_trans_hdr(
tic->t_curr_res -= lvhdr->lv_bytes; tic->t_curr_res -= lvhdr->lv_bytes;
} }
/*
* Pull all the log vectors off the items in the CIL, and remove the items from
* the CIL. We don't need the CIL lock here because it's only needed on the
* transaction commit side which is currently locked out by the flush lock.
*
* If a log item is marked with a whiteout, we do not need to write it to the
* journal and so we just move them to the whiteout list for the caller to
* dispose of appropriately.
*/
static void
xlog_cil_build_lv_chain(
struct xfs_cil *cil,
struct xfs_cil_ctx *ctx,
struct list_head *whiteouts,
uint32_t *num_iovecs,
uint32_t *num_bytes)
{
struct xfs_log_vec *lv = NULL;
while (!list_empty(&cil->xc_cil)) {
struct xfs_log_item *item;
item = list_first_entry(&cil->xc_cil,
struct xfs_log_item, li_cil);
if (test_bit(XFS_LI_WHITEOUT, &item->li_flags)) {
list_move(&item->li_cil, whiteouts);
trace_xfs_cil_whiteout_skip(item);
continue;
}
list_del_init(&item->li_cil);
if (!ctx->lv_chain)
ctx->lv_chain = item->li_lv;
else
lv->lv_next = item->li_lv;
lv = item->li_lv;
item->li_lv = NULL;
*num_iovecs += lv->lv_niovecs;
/* we don't write ordered log vectors */
if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED)
*num_bytes += lv->lv_bytes;
}
}
static void
xlog_cil_cleanup_whiteouts(
struct list_head *whiteouts)
{
while (!list_empty(whiteouts)) {
struct xfs_log_item *item = list_first_entry(whiteouts,
struct xfs_log_item, li_cil);
list_del_init(&item->li_cil);
trace_xfs_cil_whiteout_unpin(item);
item->li_ops->iop_unpin(item, 1);
}
}
/* /*
* Push the Committed Item List to the log. * Push the Committed Item List to the log.
* *
...@@ -956,7 +1050,6 @@ xlog_cil_push_work( ...@@ -956,7 +1050,6 @@ xlog_cil_push_work(
container_of(work, struct xfs_cil_ctx, push_work); container_of(work, struct xfs_cil_ctx, push_work);
struct xfs_cil *cil = ctx->cil; struct xfs_cil *cil = ctx->cil;
struct xlog *log = cil->xc_log; struct xlog *log = cil->xc_log;
struct xfs_log_vec *lv;
struct xfs_cil_ctx *new_ctx; struct xfs_cil_ctx *new_ctx;
int num_iovecs = 0; int num_iovecs = 0;
int num_bytes = 0; int num_bytes = 0;
...@@ -965,6 +1058,7 @@ xlog_cil_push_work( ...@@ -965,6 +1058,7 @@ xlog_cil_push_work(
struct xfs_log_vec lvhdr = { NULL }; struct xfs_log_vec lvhdr = { NULL };
xfs_csn_t push_seq; xfs_csn_t push_seq;
bool push_commit_stable; bool push_commit_stable;
LIST_HEAD (whiteouts);
new_ctx = xlog_cil_ctx_alloc(); new_ctx = xlog_cil_ctx_alloc();
new_ctx->ticket = xlog_cil_ticket_alloc(log); new_ctx->ticket = xlog_cil_ticket_alloc(log);
...@@ -1033,31 +1127,7 @@ xlog_cil_push_work( ...@@ -1033,31 +1127,7 @@ xlog_cil_push_work(
list_add(&ctx->committing, &cil->xc_committing); list_add(&ctx->committing, &cil->xc_committing);
spin_unlock(&cil->xc_push_lock); spin_unlock(&cil->xc_push_lock);
/* xlog_cil_build_lv_chain(cil, ctx, &whiteouts, &num_iovecs, &num_bytes);
* Pull all the log vectors off the items in the CIL, and remove the
* items from the CIL. We don't need the CIL lock here because it's only
* needed on the transaction commit side which is currently locked out
* by the flush lock.
*/
lv = NULL;
while (!list_empty(&cil->xc_cil)) {
struct xfs_log_item *item;
item = list_first_entry(&cil->xc_cil,
struct xfs_log_item, li_cil);
list_del_init(&item->li_cil);
if (!ctx->lv_chain)
ctx->lv_chain = item->li_lv;
else
lv->lv_next = item->li_lv;
lv = item->li_lv;
item->li_lv = NULL;
num_iovecs += lv->lv_niovecs;
/* we don't write ordered log vectors */
if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED)
num_bytes += lv->lv_bytes;
}
/* /*
* Switch the contexts so we can drop the context lock and move out * Switch the contexts so we can drop the context lock and move out
...@@ -1160,6 +1230,7 @@ xlog_cil_push_work( ...@@ -1160,6 +1230,7 @@ xlog_cil_push_work(
/* Not safe to reference ctx now! */ /* Not safe to reference ctx now! */
spin_unlock(&log->l_icloglock); spin_unlock(&log->l_icloglock);
xlog_cil_cleanup_whiteouts(&whiteouts);
return; return;
out_skip: out_skip:
...@@ -1171,6 +1242,7 @@ xlog_cil_push_work( ...@@ -1171,6 +1242,7 @@ xlog_cil_push_work(
out_abort_free_ticket: out_abort_free_ticket:
xfs_log_ticket_ungrant(log, ctx->ticket); xfs_log_ticket_ungrant(log, ctx->ticket);
ASSERT(xlog_is_shutdown(log)); ASSERT(xlog_is_shutdown(log));
xlog_cil_cleanup_whiteouts(&whiteouts);
if (!ctx->commit_iclog) { if (!ctx->commit_iclog) {
xlog_cil_committed(ctx); xlog_cil_committed(ctx);
return; return;
...@@ -1319,6 +1391,43 @@ xlog_cil_empty( ...@@ -1319,6 +1391,43 @@ xlog_cil_empty(
return empty; return empty;
} }
/*
* If there are intent done items in this transaction and the related intent was
* committed in the current (same) CIL checkpoint, we don't need to write either
* the intent or intent done item to the journal as the change will be
* journalled atomically within this checkpoint. As we cannot remove items from
* the CIL here, mark the related intent with a whiteout so that the CIL push
* can remove it rather than writing it to the journal. Then remove the intent
* done item from the current transaction and release it so it doesn't get put
* into the CIL at all.
*/
static uint32_t
xlog_cil_process_intents(
struct xfs_cil *cil,
struct xfs_trans *tp)
{
struct xfs_log_item *lip, *ilip, *next;
uint32_t len = 0;
list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
if (!(lip->li_ops->flags & XFS_ITEM_INTENT_DONE))
continue;
ilip = lip->li_ops->iop_intent(lip);
if (!ilip || !xlog_item_in_current_chkpt(cil, ilip))
continue;
set_bit(XFS_LI_WHITEOUT, &ilip->li_flags);
trace_xfs_cil_whiteout_mark(ilip);
len += ilip->li_lv->lv_bytes;
kmem_free(ilip->li_lv);
ilip->li_lv = NULL;
xfs_trans_del_item(lip);
lip->li_ops->iop_release(lip);
}
return len;
}
/* /*
* Commit a transaction with the given vector to the Committed Item List. * Commit a transaction with the given vector to the Committed Item List.
* *
...@@ -1341,6 +1450,7 @@ xlog_cil_commit( ...@@ -1341,6 +1450,7 @@ xlog_cil_commit(
{ {
struct xfs_cil *cil = log->l_cilp; struct xfs_cil *cil = log->l_cilp;
struct xfs_log_item *lip, *next; struct xfs_log_item *lip, *next;
uint32_t released_space = 0;
/* /*
* Do all necessary memory allocation before we lock the CIL. * Do all necessary memory allocation before we lock the CIL.
...@@ -1352,7 +1462,10 @@ xlog_cil_commit( ...@@ -1352,7 +1462,10 @@ xlog_cil_commit(
/* lock out background commit */ /* lock out background commit */
down_read(&cil->xc_ctx_lock); down_read(&cil->xc_ctx_lock);
xlog_cil_insert_items(log, tp); if (tp->t_flags & XFS_TRANS_HAS_INTENT_DONE)
released_space = xlog_cil_process_intents(cil, tp);
xlog_cil_insert_items(log, tp, released_space);
if (regrant && !xlog_is_shutdown(log)) if (regrant && !xlog_is_shutdown(log))
xfs_log_ticket_regrant(log, tp->t_ticket); xfs_log_ticket_regrant(log, tp->t_ticket);
...@@ -1508,32 +1621,6 @@ xlog_cil_force_seq( ...@@ -1508,32 +1621,6 @@ xlog_cil_force_seq(
return 0; return 0;
} }
/*
* Check if the current log item was first committed in this sequence.
* We can't rely on just the log item being in the CIL, we have to check
* the recorded commit sequence number.
*
* Note: for this to be used in a non-racy manner, it has to be called with
* CIL flushing locked out. As a result, it should only be used during the
* transaction commit process when deciding what to format into the item.
*/
bool
xfs_log_item_in_current_chkpt(
struct xfs_log_item *lip)
{
struct xfs_cil *cil = lip->li_log->l_cilp;
if (list_empty(&lip->li_cil))
return false;
/*
* li_seq is written on the first commit of a log item to record the
* first checkpoint it is written to. Hence if it is different to the
* current sequence, we're in a new checkpoint.
*/
return lip->li_seq == READ_ONCE(cil->xc_current_sequence);
}
/* /*
* Perform initial CIL structure initialisation. * Perform initial CIL structure initialisation.
*/ */
......
...@@ -35,6 +35,7 @@ STATIC void ...@@ -35,6 +35,7 @@ STATIC void
xfs_cui_item_free( xfs_cui_item_free(
struct xfs_cui_log_item *cuip) struct xfs_cui_log_item *cuip)
{ {
kmem_free(cuip->cui_item.li_lv_shadow);
if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS) if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS)
kmem_free(cuip); kmem_free(cuip);
else else
...@@ -53,10 +54,11 @@ xfs_cui_release( ...@@ -53,10 +54,11 @@ xfs_cui_release(
struct xfs_cui_log_item *cuip) struct xfs_cui_log_item *cuip)
{ {
ASSERT(atomic_read(&cuip->cui_refcount) > 0); ASSERT(atomic_read(&cuip->cui_refcount) > 0);
if (atomic_dec_and_test(&cuip->cui_refcount)) { if (!atomic_dec_and_test(&cuip->cui_refcount))
xfs_trans_ail_delete(&cuip->cui_item, SHUTDOWN_LOG_IO_ERROR); return;
xfs_trans_ail_delete(&cuip->cui_item, 0);
xfs_cui_item_free(cuip); xfs_cui_item_free(cuip);
}
} }
...@@ -204,14 +206,24 @@ xfs_cud_item_release( ...@@ -204,14 +206,24 @@ xfs_cud_item_release(
struct xfs_cud_log_item *cudp = CUD_ITEM(lip); struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
xfs_cui_release(cudp->cud_cuip); xfs_cui_release(cudp->cud_cuip);
kmem_free(cudp->cud_item.li_lv_shadow);
kmem_cache_free(xfs_cud_cache, cudp); kmem_cache_free(xfs_cud_cache, cudp);
} }
static struct xfs_log_item *
xfs_cud_item_intent(
struct xfs_log_item *lip)
{
return &CUD_ITEM(lip)->cud_cuip->cui_item;
}
static const struct xfs_item_ops xfs_cud_item_ops = { static const struct xfs_item_ops xfs_cud_item_ops = {
.flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED |
XFS_ITEM_INTENT_DONE,
.iop_size = xfs_cud_item_size, .iop_size = xfs_cud_item_size,
.iop_format = xfs_cud_item_format, .iop_format = xfs_cud_item_format,
.iop_release = xfs_cud_item_release, .iop_release = xfs_cud_item_release,
.iop_intent = xfs_cud_item_intent,
}; };
static struct xfs_cud_log_item * static struct xfs_cud_log_item *
...@@ -259,7 +271,7 @@ xfs_trans_log_finish_refcount_update( ...@@ -259,7 +271,7 @@ xfs_trans_log_finish_refcount_update(
* 1.) releases the CUI and frees the CUD * 1.) releases the CUI and frees the CUD
* 2.) shuts down the filesystem * 2.) shuts down the filesystem
*/ */
tp->t_flags |= XFS_TRANS_DIRTY; tp->t_flags |= XFS_TRANS_DIRTY | XFS_TRANS_HAS_INTENT_DONE;
set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags); set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
return error; return error;
...@@ -600,6 +612,7 @@ xfs_cui_item_relog( ...@@ -600,6 +612,7 @@ xfs_cui_item_relog(
} }
static const struct xfs_item_ops xfs_cui_item_ops = { static const struct xfs_item_ops xfs_cui_item_ops = {
.flags = XFS_ITEM_INTENT,
.iop_size = xfs_cui_item_size, .iop_size = xfs_cui_item_size,
.iop_format = xfs_cui_item_format, .iop_format = xfs_cui_item_format,
.iop_unpin = xfs_cui_item_unpin, .iop_unpin = xfs_cui_item_unpin,
......
...@@ -35,6 +35,7 @@ STATIC void ...@@ -35,6 +35,7 @@ STATIC void
xfs_rui_item_free( xfs_rui_item_free(
struct xfs_rui_log_item *ruip) struct xfs_rui_log_item *ruip)
{ {
kmem_free(ruip->rui_item.li_lv_shadow);
if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS) if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS)
kmem_free(ruip); kmem_free(ruip);
else else
...@@ -53,10 +54,11 @@ xfs_rui_release( ...@@ -53,10 +54,11 @@ xfs_rui_release(
struct xfs_rui_log_item *ruip) struct xfs_rui_log_item *ruip)
{ {
ASSERT(atomic_read(&ruip->rui_refcount) > 0); ASSERT(atomic_read(&ruip->rui_refcount) > 0);
if (atomic_dec_and_test(&ruip->rui_refcount)) { if (!atomic_dec_and_test(&ruip->rui_refcount))
xfs_trans_ail_delete(&ruip->rui_item, SHUTDOWN_LOG_IO_ERROR); return;
xfs_trans_ail_delete(&ruip->rui_item, 0);
xfs_rui_item_free(ruip); xfs_rui_item_free(ruip);
}
} }
STATIC void STATIC void
...@@ -227,14 +229,24 @@ xfs_rud_item_release( ...@@ -227,14 +229,24 @@ xfs_rud_item_release(
struct xfs_rud_log_item *rudp = RUD_ITEM(lip); struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
xfs_rui_release(rudp->rud_ruip); xfs_rui_release(rudp->rud_ruip);
kmem_free(rudp->rud_item.li_lv_shadow);
kmem_cache_free(xfs_rud_cache, rudp); kmem_cache_free(xfs_rud_cache, rudp);
} }
static struct xfs_log_item *
xfs_rud_item_intent(
struct xfs_log_item *lip)
{
return &RUD_ITEM(lip)->rud_ruip->rui_item;
}
static const struct xfs_item_ops xfs_rud_item_ops = { static const struct xfs_item_ops xfs_rud_item_ops = {
.flags = XFS_ITEM_RELEASE_WHEN_COMMITTED, .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED |
XFS_ITEM_INTENT_DONE,
.iop_size = xfs_rud_item_size, .iop_size = xfs_rud_item_size,
.iop_format = xfs_rud_item_format, .iop_format = xfs_rud_item_format,
.iop_release = xfs_rud_item_release, .iop_release = xfs_rud_item_release,
.iop_intent = xfs_rud_item_intent,
}; };
static struct xfs_rud_log_item * static struct xfs_rud_log_item *
...@@ -327,7 +339,7 @@ xfs_trans_log_finish_rmap_update( ...@@ -327,7 +339,7 @@ xfs_trans_log_finish_rmap_update(
* 1.) releases the RUI and frees the RUD * 1.) releases the RUI and frees the RUD
* 2.) shuts down the filesystem * 2.) shuts down the filesystem
*/ */
tp->t_flags |= XFS_TRANS_DIRTY; tp->t_flags |= XFS_TRANS_DIRTY | XFS_TRANS_HAS_INTENT_DONE;
set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags); set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags);
return error; return error;
...@@ -630,6 +642,7 @@ xfs_rui_item_relog( ...@@ -630,6 +642,7 @@ xfs_rui_item_relog(
} }
static const struct xfs_item_ops xfs_rui_item_ops = { static const struct xfs_item_ops xfs_rui_item_ops = {
.flags = XFS_ITEM_INTENT,
.iop_size = xfs_rui_item_size, .iop_size = xfs_rui_item_size,
.iop_format = xfs_rui_item_format, .iop_format = xfs_rui_item_format,
.iop_unpin = xfs_rui_item_unpin, .iop_unpin = xfs_rui_item_unpin,
......
...@@ -1332,6 +1332,9 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_push); ...@@ -1332,6 +1332,9 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_push);
DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned); DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned);
DEFINE_LOG_ITEM_EVENT(xfs_ail_locked); DEFINE_LOG_ITEM_EVENT(xfs_ail_locked);
DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_mark);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_skip);
DEFINE_LOG_ITEM_EVENT(xfs_cil_whiteout_unpin);
DECLARE_EVENT_CLASS(xfs_ail_class, DECLARE_EVENT_CLASS(xfs_ail_class,
TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn),
......
...@@ -55,13 +55,15 @@ struct xfs_log_item { ...@@ -55,13 +55,15 @@ struct xfs_log_item {
#define XFS_LI_IN_AIL 0 #define XFS_LI_IN_AIL 0
#define XFS_LI_ABORTED 1 #define XFS_LI_ABORTED 1
#define XFS_LI_FAILED 2 #define XFS_LI_FAILED 2
#define XFS_LI_DIRTY 3 /* log item dirty in transaction */ #define XFS_LI_DIRTY 3
#define XFS_LI_WHITEOUT 4
#define XFS_LI_FLAGS \ #define XFS_LI_FLAGS \
{ (1u << XFS_LI_IN_AIL), "IN_AIL" }, \ { (1u << XFS_LI_IN_AIL), "IN_AIL" }, \
{ (1u << XFS_LI_ABORTED), "ABORTED" }, \ { (1u << XFS_LI_ABORTED), "ABORTED" }, \
{ (1u << XFS_LI_FAILED), "FAILED" }, \ { (1u << XFS_LI_FAILED), "FAILED" }, \
{ (1u << XFS_LI_DIRTY), "DIRTY" } { (1u << XFS_LI_DIRTY), "DIRTY" }, \
{ (1u << XFS_LI_WHITEOUT), "WHITEOUT" }
struct xfs_item_ops { struct xfs_item_ops {
unsigned flags; unsigned flags;
...@@ -78,30 +80,32 @@ struct xfs_item_ops { ...@@ -78,30 +80,32 @@ struct xfs_item_ops {
bool (*iop_match)(struct xfs_log_item *item, uint64_t id); bool (*iop_match)(struct xfs_log_item *item, uint64_t id);
struct xfs_log_item *(*iop_relog)(struct xfs_log_item *intent, struct xfs_log_item *(*iop_relog)(struct xfs_log_item *intent,
struct xfs_trans *tp); struct xfs_trans *tp);
struct xfs_log_item *(*iop_intent)(struct xfs_log_item *intent_done);
}; };
/* Is this log item a deferred action intent? */ /*
* Log item ops flags
*/
/*
* Release the log item when the journal commits instead of inserting into the
* AIL for writeback tracking and/or log tail pinning.
*/
#define XFS_ITEM_RELEASE_WHEN_COMMITTED (1 << 0)
#define XFS_ITEM_INTENT (1 << 1)
#define XFS_ITEM_INTENT_DONE (1 << 2)
static inline bool static inline bool
xlog_item_is_intent(struct xfs_log_item *lip) xlog_item_is_intent(struct xfs_log_item *lip)
{ {
return lip->li_ops->iop_recover != NULL && return lip->li_ops->flags & XFS_ITEM_INTENT;
lip->li_ops->iop_match != NULL;
} }
/* Is this a log intent-done item? */
static inline bool static inline bool
xlog_item_is_intent_done(struct xfs_log_item *lip) xlog_item_is_intent_done(struct xfs_log_item *lip)
{ {
return lip->li_ops->iop_unpin == NULL && return lip->li_ops->flags & XFS_ITEM_INTENT_DONE;
lip->li_ops->iop_push == NULL;
} }
/*
* Release the log item as soon as committed. This is for items just logging
* intents that never need to be written back in place.
*/
#define XFS_ITEM_RELEASE_WHEN_COMMITTED (1 << 0)
void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item, void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
int type, const struct xfs_item_ops *ops); int type, const struct xfs_item_ops *ops);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment