Commit f5b90b6b authored by Lars Ellenberg's avatar Lars Ellenberg Committed by Philipp Reisner

drbd: resync should only lock out specific ranges

During resync, if we need to block some specific incoming write because
of active resync requests to that same range, we potentially caused
*all* new application writes (to "cold" activity log extents) to block
until this one request has been processed.

Improve the do_submit() logic to
 * grab all incoming requests to some "incoming" list
 * process this list
   - move aside requests that are blocked by resync
   - prepare activity log transactions,
   - commit transactions and submit corresponding requests
   - if there are remaining requests that only wait for
     activity log extents to become free, stop the fast path
     (mark activity log as "starving")
   - iterate until no more requests are waiting for the activity log,
     but all potentially remaining requests are only blocked by resync
 * only then grab new incoming requests

That way, very busy IO on currently "hot" activity log extents cannot
starve scattered IO to "cold" extents. And blocked-by-resync requests
are processed once resync traffic on the affected region has ceased,
without blocking anything else.

The only blocking mode left is when we cannot start requests to "cold"
extents because all currently "hot" extents are actually used.
Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
parent cc356f85
...@@ -357,8 +357,19 @@ int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval * ...@@ -357,8 +357,19 @@ int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *
/* We want all necessary updates for a given request within the same transaction /* We want all necessary updates for a given request within the same transaction
* We could first check how many updates are *actually* needed, * We could first check how many updates are *actually* needed,
* and use that instead of the worst-case nr_al_extents */ * and use that instead of the worst-case nr_al_extents */
if (available_update_slots < nr_al_extents) if (available_update_slots < nr_al_extents) {
return -EWOULDBLOCK; /* Too many activity log extents are currently "hot".
*
* If we have accumulated pending changes already,
* we made progress.
*
* If we cannot get even a single pending change through,
* stop the fast path until we made some progress,
* or requests to "cold" extents could be starved. */
if (!al->pending_changes)
__set_bit(__LC_STARVING, &device->act_log->flags);
return -ENOBUFS;
}
/* Is resync active in this area? */ /* Is resync active in this area? */
for (enr = first; enr <= last; enr++) { for (enr = first; enr <= last; enr++) {
......
...@@ -1182,6 +1182,8 @@ static void drbd_queue_write(struct drbd_device *device, struct drbd_request *re ...@@ -1182,6 +1182,8 @@ static void drbd_queue_write(struct drbd_device *device, struct drbd_request *re
&device->pending_master_completion[1 /* WRITE */]); &device->pending_master_completion[1 /* WRITE */]);
spin_unlock_irq(&device->resource->req_lock); spin_unlock_irq(&device->resource->req_lock);
queue_work(device->submit.wq, &device->submit.worker); queue_work(device->submit.wq, &device->submit.worker);
/* do_submit() may sleep internally on al_wait, too */
wake_up(&device->al_wait);
} }
/* returns the new drbd_request pointer, if the caller is expected to /* returns the new drbd_request pointer, if the caller is expected to
...@@ -1365,7 +1367,8 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom ...@@ -1365,7 +1367,8 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom
static bool prepare_al_transaction_nonblock(struct drbd_device *device, static bool prepare_al_transaction_nonblock(struct drbd_device *device,
struct list_head *incoming, struct list_head *incoming,
struct list_head *pending) struct list_head *pending,
struct list_head *later)
{ {
struct drbd_request *req, *tmp; struct drbd_request *req, *tmp;
int wake = 0; int wake = 0;
...@@ -1374,44 +1377,105 @@ static bool prepare_al_transaction_nonblock(struct drbd_device *device, ...@@ -1374,44 +1377,105 @@ static bool prepare_al_transaction_nonblock(struct drbd_device *device,
spin_lock_irq(&device->al_lock); spin_lock_irq(&device->al_lock);
list_for_each_entry_safe(req, tmp, incoming, tl_requests) { list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
err = drbd_al_begin_io_nonblock(device, &req->i); err = drbd_al_begin_io_nonblock(device, &req->i);
if (err == -ENOBUFS)
break;
if (err == -EBUSY) if (err == -EBUSY)
wake = 1; wake = 1;
if (err) if (err)
continue; list_move_tail(&req->tl_requests, later);
list_move_tail(&req->tl_requests, pending); else
list_move_tail(&req->tl_requests, pending);
} }
spin_unlock_irq(&device->al_lock); spin_unlock_irq(&device->al_lock);
if (wake) if (wake)
wake_up(&device->al_wait); wake_up(&device->al_wait);
return !list_empty(pending); return !list_empty(pending);
} }
void send_and_submit_pending(struct drbd_device *device, struct list_head *pending)
{
struct drbd_request *req, *tmp;
list_for_each_entry_safe(req, tmp, pending, tl_requests) {
req->rq_state |= RQ_IN_ACT_LOG;
req->in_actlog_jif = jiffies;
atomic_dec(&device->ap_actlog_cnt);
list_del_init(&req->tl_requests);
drbd_send_and_submit(device, req);
}
}
void do_submit(struct work_struct *ws) void do_submit(struct work_struct *ws)
{ {
struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker); struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker);
LIST_HEAD(incoming); LIST_HEAD(incoming); /* from drbd_make_request() */
LIST_HEAD(pending); LIST_HEAD(pending); /* to be submitted after next AL-transaction commit */
struct drbd_request *req, *tmp; LIST_HEAD(busy); /* blocked by resync requests */
/* grab new incoming requests */
spin_lock_irq(&device->resource->req_lock);
list_splice_tail_init(&device->submit.writes, &incoming);
spin_unlock_irq(&device->resource->req_lock);
for (;;) { for (;;) {
spin_lock_irq(&device->resource->req_lock); DEFINE_WAIT(wait);
list_splice_tail_init(&device->submit.writes, &incoming);
spin_unlock_irq(&device->resource->req_lock);
/* move used-to-be-busy back to front of incoming */
list_splice_init(&busy, &incoming);
submit_fast_path(device, &incoming); submit_fast_path(device, &incoming);
if (list_empty(&incoming)) if (list_empty(&incoming))
break; break;
skip_fast_path:
wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending));
/* Maybe more was queued, while we prepared the transaction?
* Try to stuff them into this transaction as well.
* Be strictly non-blocking here, no wait_event, we already
* have something to commit.
* Stop if we don't make any more progres.
*/
for (;;) { for (;;) {
prepare_to_wait(&device->al_wait, &wait, TASK_UNINTERRUPTIBLE);
list_splice_init(&busy, &incoming);
prepare_al_transaction_nonblock(device, &incoming, &pending, &busy);
if (!list_empty(&pending))
break;
schedule();
/* If all currently "hot" activity log extents are kept busy by
* incoming requests, we still must not totally starve new
* requests to "cold" extents.
* Something left on &incoming means there had not been
* enough update slots available, and the activity log
* has been marked as "starving".
*
* Try again now, without looking for new requests,
* effectively blocking all new requests until we made
* at least _some_ progress with what we currently have.
*/
if (!list_empty(&incoming))
continue;
/* Nothing moved to pending, but nothing left
* on incoming: all moved to busy!
* Grab new and iterate. */
spin_lock_irq(&device->resource->req_lock);
list_splice_tail_init(&device->submit.writes, &incoming);
spin_unlock_irq(&device->resource->req_lock);
}
finish_wait(&device->al_wait, &wait);
/* If the transaction was full, before all incoming requests
* had been processed, skip ahead to commit, and iterate
* without splicing in more incoming requests from upper layers.
*
* Else, if all incoming have been processed,
* they have become either "pending" (to be submitted after
* next transaction commit) or "busy" (blocked by resync).
*
* Maybe more was queued, while we prepared the transaction?
* Try to stuff those into this transaction as well.
* Be strictly non-blocking here,
* we already have something to commit.
*
* Commit if we don't make any more progres.
*/
while (list_empty(&incoming)) {
LIST_HEAD(more_pending); LIST_HEAD(more_pending);
LIST_HEAD(more_incoming); LIST_HEAD(more_incoming);
bool made_progress; bool made_progress;
...@@ -1428,46 +1492,16 @@ void do_submit(struct work_struct *ws) ...@@ -1428,46 +1492,16 @@ void do_submit(struct work_struct *ws)
if (list_empty(&more_incoming)) if (list_empty(&more_incoming))
break; break;
made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending); made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending, &busy);
list_splice_tail_init(&more_pending, &pending); list_splice_tail_init(&more_pending, &pending);
list_splice_tail_init(&more_incoming, &incoming); list_splice_tail_init(&more_incoming, &incoming);
if (!made_progress) if (!made_progress)
break; break;
} }
drbd_al_begin_io_commit(device);
list_for_each_entry_safe(req, tmp, &pending, tl_requests) { drbd_al_begin_io_commit(device);
req->rq_state |= RQ_IN_ACT_LOG; send_and_submit_pending(device, &pending);
req->in_actlog_jif = jiffies;
atomic_dec(&device->ap_actlog_cnt);
list_del_init(&req->tl_requests);
drbd_send_and_submit(device, req);
}
/* If all currently hot activity log extents are kept busy by
* incoming requests, we still must not totally starve new
* requests to cold extents. In that case, prepare one request
* in blocking mode. */
list_for_each_entry_safe(req, tmp, &incoming, tl_requests) {
bool was_cold;
list_del_init(&req->tl_requests);
was_cold = drbd_al_begin_io_prepare(device, &req->i);
if (!was_cold) {
req->rq_state |= RQ_IN_ACT_LOG;
req->in_actlog_jif = jiffies;
atomic_dec(&device->ap_actlog_cnt);
/* Corresponding extent was hot after all? */
drbd_send_and_submit(device, req);
} else {
/* Found a request to a cold extent.
* Put on "pending" list,
* and try to cumulate with more. */
list_add(&req->tl_requests, &pending);
goto skip_fast_path;
}
}
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment