Commit c0fb4765 authored by Dave Chinner's avatar Dave Chinner Committed by Dave Chinner

xfs: convert CIL to unordered per cpu lists

So that we can remove the cil_lock which is a global serialisation
point. We've already got ordering sorted, so all we need to do is
treat the CIL list like the busy extent list and reconstruct it
before the push starts.

This is what we're trying to avoid:

 -   75.35%     1.83%  [kernel]            [k] xfs_log_commit_cil
    - 46.35% xfs_log_commit_cil
       - 41.54% _raw_spin_lock
          - 67.30% do_raw_spin_lock
               66.96% __pv_queued_spin_lock_slowpath

Which happens on a 32p system when running a 32-way 'rm -rf'
workload. After this patch:

-   20.90%     3.23%  [kernel]               [k] xfs_log_commit_cil
   - 17.67% xfs_log_commit_cil
      - 6.51% xfs_log_ticket_ungrant
           1.40% xfs_log_space_wake
        2.32% memcpy_erms
      - 2.18% xfs_buf_item_committing
         - 2.12% xfs_buf_item_release
            - 1.03% xfs_buf_unlock
                 0.96% up
              0.72% xfs_buf_rele
        1.33% xfs_inode_item_format
        1.19% down_read
        0.91% up_read
        0.76% xfs_buf_item_format
      - 0.68% kmem_alloc_large
         - 0.67% kmem_alloc
              0.64% __kmalloc
        0.50% xfs_buf_item_size

It kinda looks like the workload is running out of log space all
the time. But all the spinlock contention is gone and the
transaction commit rate has gone from 800k/s to 1.3M/s so the amount
of real work being done has gone up a *lot*.
Signed-off-by: default avatarDave Chinner <dchinner@redhat.com>
Reviewed-by: default avatarDarrick J. Wong <djwong@kernel.org>
parent 016a2338
...@@ -104,6 +104,7 @@ xlog_cil_ctx_alloc(void) ...@@ -104,6 +104,7 @@ xlog_cil_ctx_alloc(void)
ctx = kmem_zalloc(sizeof(*ctx), KM_NOFS); ctx = kmem_zalloc(sizeof(*ctx), KM_NOFS);
INIT_LIST_HEAD(&ctx->committing); INIT_LIST_HEAD(&ctx->committing);
INIT_LIST_HEAD(&ctx->busy_extents); INIT_LIST_HEAD(&ctx->busy_extents);
INIT_LIST_HEAD(&ctx->log_items);
INIT_WORK(&ctx->push_work, xlog_cil_push_work); INIT_WORK(&ctx->push_work, xlog_cil_push_work);
return ctx; return ctx;
} }
...@@ -132,6 +133,8 @@ xlog_cil_push_pcp_aggregate( ...@@ -132,6 +133,8 @@ xlog_cil_push_pcp_aggregate(
list_splice_init(&cilpcp->busy_extents, list_splice_init(&cilpcp->busy_extents,
&ctx->busy_extents); &ctx->busy_extents);
} }
if (!list_empty(&cilpcp->log_items))
list_splice_init(&cilpcp->log_items, &ctx->log_items);
/* /*
* We're in the middle of switching cil contexts. Reset the * We're in the middle of switching cil contexts. Reset the
...@@ -579,10 +582,9 @@ xlog_cil_insert_items( ...@@ -579,10 +582,9 @@ xlog_cil_insert_items(
/* /*
* We need to take the CIL checkpoint unit reservation on the first * We need to take the CIL checkpoint unit reservation on the first
* commit into the CIL. Test the XLOG_CIL_EMPTY bit first so we don't * commit into the CIL. Test the XLOG_CIL_EMPTY bit first so we don't
* unnecessarily do an atomic op in the fast path here. We don't need to * unnecessarily do an atomic op in the fast path here. We can clear the
* hold the xc_cil_lock here to clear the XLOG_CIL_EMPTY bit as we are * XLOG_CIL_EMPTY bit as we are under the xc_ctx_lock here and that
* under the xc_ctx_lock here and that needs to be held exclusively to * needs to be held exclusively to reset the XLOG_CIL_EMPTY bit.
* reset the XLOG_CIL_EMPTY bit.
*/ */
if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags) && if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags) &&
test_and_clear_bit(XLOG_CIL_EMPTY, &cil->xc_flags)) test_and_clear_bit(XLOG_CIL_EMPTY, &cil->xc_flags))
...@@ -643,7 +645,6 @@ xlog_cil_insert_items( ...@@ -643,7 +645,6 @@ xlog_cil_insert_items(
/* attach the transaction to the CIL if it has any busy extents */ /* attach the transaction to the CIL if it has any busy extents */
if (!list_empty(&tp->t_busy)) if (!list_empty(&tp->t_busy))
list_splice_init(&tp->t_busy, &cilpcp->busy_extents); list_splice_init(&tp->t_busy, &cilpcp->busy_extents);
put_cpu_ptr(cilpcp);
/* /*
* Now update the order of everything modified in the transaction * Now update the order of everything modified in the transaction
...@@ -652,7 +653,6 @@ xlog_cil_insert_items( ...@@ -652,7 +653,6 @@ xlog_cil_insert_items(
* the transaction commit. * the transaction commit.
*/ */
order = atomic_inc_return(&ctx->order_id); order = atomic_inc_return(&ctx->order_id);
spin_lock(&cil->xc_cil_lock);
list_for_each_entry(lip, &tp->t_items, li_trans) { list_for_each_entry(lip, &tp->t_items, li_trans) {
/* Skip items which aren't dirty in this transaction. */ /* Skip items which aren't dirty in this transaction. */
if (!test_bit(XFS_LI_DIRTY, &lip->li_flags)) if (!test_bit(XFS_LI_DIRTY, &lip->li_flags))
...@@ -661,10 +661,9 @@ xlog_cil_insert_items( ...@@ -661,10 +661,9 @@ xlog_cil_insert_items(
lip->li_order_id = order; lip->li_order_id = order;
if (!list_empty(&lip->li_cil)) if (!list_empty(&lip->li_cil))
continue; continue;
list_add_tail(&lip->li_cil, &cil->xc_cil); list_add_tail(&lip->li_cil, &cilpcp->log_items);
} }
put_cpu_ptr(cilpcp);
spin_unlock(&cil->xc_cil_lock);
/* /*
* If we've overrun the reservation, dump the tx details before we move * If we've overrun the reservation, dump the tx details before we move
...@@ -1113,7 +1112,6 @@ xlog_cil_order_cmp( ...@@ -1113,7 +1112,6 @@ xlog_cil_order_cmp(
*/ */
static void static void
xlog_cil_build_lv_chain( xlog_cil_build_lv_chain(
struct xfs_cil *cil,
struct xfs_cil_ctx *ctx, struct xfs_cil_ctx *ctx,
struct list_head *whiteouts, struct list_head *whiteouts,
uint32_t *num_iovecs, uint32_t *num_iovecs,
...@@ -1121,12 +1119,12 @@ xlog_cil_build_lv_chain( ...@@ -1121,12 +1119,12 @@ xlog_cil_build_lv_chain(
{ {
struct xfs_log_vec *lv = NULL; struct xfs_log_vec *lv = NULL;
list_sort(NULL, &cil->xc_cil, xlog_cil_order_cmp); list_sort(NULL, &ctx->log_items, xlog_cil_order_cmp);
while (!list_empty(&cil->xc_cil)) { while (!list_empty(&ctx->log_items)) {
struct xfs_log_item *item; struct xfs_log_item *item;
item = list_first_entry(&cil->xc_cil, item = list_first_entry(&ctx->log_items,
struct xfs_log_item, li_cil); struct xfs_log_item, li_cil);
if (test_bit(XFS_LI_WHITEOUT, &item->li_flags)) { if (test_bit(XFS_LI_WHITEOUT, &item->li_flags)) {
...@@ -1265,7 +1263,7 @@ xlog_cil_push_work( ...@@ -1265,7 +1263,7 @@ xlog_cil_push_work(
list_add(&ctx->committing, &cil->xc_committing); list_add(&ctx->committing, &cil->xc_committing);
spin_unlock(&cil->xc_push_lock); spin_unlock(&cil->xc_push_lock);
xlog_cil_build_lv_chain(cil, ctx, &whiteouts, &num_iovecs, &num_bytes); xlog_cil_build_lv_chain(ctx, &whiteouts, &num_iovecs, &num_bytes);
/* /*
* Switch the contexts so we can drop the context lock and move out * Switch the contexts so we can drop the context lock and move out
...@@ -1409,7 +1407,6 @@ xlog_cil_push_background( ...@@ -1409,7 +1407,6 @@ xlog_cil_push_background(
* The cil won't be empty because we are called while holding the * The cil won't be empty because we are called while holding the
* context lock so whatever we added to the CIL will still be there. * context lock so whatever we added to the CIL will still be there.
*/ */
ASSERT(!list_empty(&cil->xc_cil));
ASSERT(!test_bit(XLOG_CIL_EMPTY, &cil->xc_flags)); ASSERT(!test_bit(XLOG_CIL_EMPTY, &cil->xc_flags));
/* /*
...@@ -1656,7 +1653,7 @@ xlog_cil_flush( ...@@ -1656,7 +1653,7 @@ xlog_cil_flush(
* If the CIL is empty, make sure that any previous checkpoint that may * If the CIL is empty, make sure that any previous checkpoint that may
* still be in an active iclog is pushed to stable storage. * still be in an active iclog is pushed to stable storage.
*/ */
if (list_empty(&log->l_cilp->xc_cil)) if (test_bit(XLOG_CIL_EMPTY, &log->l_cilp->xc_flags))
xfs_log_force(log->l_mp, 0); xfs_log_force(log->l_mp, 0);
} }
...@@ -1784,6 +1781,8 @@ xlog_cil_pcp_dead( ...@@ -1784,6 +1781,8 @@ xlog_cil_pcp_dead(
ctx->ticket->t_curr_res += cilpcp->space_reserved; ctx->ticket->t_curr_res += cilpcp->space_reserved;
cilpcp->space_reserved = 0; cilpcp->space_reserved = 0;
if (!list_empty(&cilpcp->log_items))
list_splice_init(&cilpcp->log_items, &ctx->log_items);
if (!list_empty(&cilpcp->busy_extents)) if (!list_empty(&cilpcp->busy_extents))
list_splice_init(&cilpcp->busy_extents, &ctx->busy_extents); list_splice_init(&cilpcp->busy_extents, &ctx->busy_extents);
atomic_add(cilpcp->space_used, &ctx->space_used); atomic_add(cilpcp->space_used, &ctx->space_used);
...@@ -1824,11 +1823,10 @@ xlog_cil_init( ...@@ -1824,11 +1823,10 @@ xlog_cil_init(
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); cilpcp = per_cpu_ptr(cil->xc_pcp, cpu);
INIT_LIST_HEAD(&cilpcp->busy_extents); INIT_LIST_HEAD(&cilpcp->busy_extents);
INIT_LIST_HEAD(&cilpcp->log_items);
} }
INIT_LIST_HEAD(&cil->xc_cil);
INIT_LIST_HEAD(&cil->xc_committing); INIT_LIST_HEAD(&cil->xc_committing);
spin_lock_init(&cil->xc_cil_lock);
spin_lock_init(&cil->xc_push_lock); spin_lock_init(&cil->xc_push_lock);
init_waitqueue_head(&cil->xc_push_wait); init_waitqueue_head(&cil->xc_push_wait);
init_rwsem(&cil->xc_ctx_lock); init_rwsem(&cil->xc_ctx_lock);
...@@ -1859,7 +1857,6 @@ xlog_cil_destroy( ...@@ -1859,7 +1857,6 @@ xlog_cil_destroy(
kmem_free(cil->xc_ctx); kmem_free(cil->xc_ctx);
} }
ASSERT(list_empty(&cil->xc_cil));
ASSERT(test_bit(XLOG_CIL_EMPTY, &cil->xc_flags)); ASSERT(test_bit(XLOG_CIL_EMPTY, &cil->xc_flags));
free_percpu(cil->xc_pcp); free_percpu(cil->xc_pcp);
destroy_workqueue(cil->xc_push_wq); destroy_workqueue(cil->xc_push_wq);
......
...@@ -224,6 +224,7 @@ struct xfs_cil_ctx { ...@@ -224,6 +224,7 @@ struct xfs_cil_ctx {
struct xlog_ticket *ticket; /* chkpt ticket */ struct xlog_ticket *ticket; /* chkpt ticket */
atomic_t space_used; /* aggregate size of regions */ atomic_t space_used; /* aggregate size of regions */
struct list_head busy_extents; /* busy extents in chkpt */ struct list_head busy_extents; /* busy extents in chkpt */
struct list_head log_items; /* log items in chkpt */
struct xfs_log_vec *lv_chain; /* logvecs being pushed */ struct xfs_log_vec *lv_chain; /* logvecs being pushed */
struct list_head iclog_entry; struct list_head iclog_entry;
struct list_head committing; /* ctx committing list */ struct list_head committing; /* ctx committing list */
...@@ -262,8 +263,6 @@ struct xfs_cil { ...@@ -262,8 +263,6 @@ struct xfs_cil {
struct xlog *xc_log; struct xlog *xc_log;
unsigned long xc_flags; unsigned long xc_flags;
atomic_t xc_iclog_hdrs; atomic_t xc_iclog_hdrs;
struct list_head xc_cil;
spinlock_t xc_cil_lock;
struct workqueue_struct *xc_push_wq; struct workqueue_struct *xc_push_wq;
struct rw_semaphore xc_ctx_lock ____cacheline_aligned_in_smp; struct rw_semaphore xc_ctx_lock ____cacheline_aligned_in_smp;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment