Commit e4d7d6f4 authored by Lars Ellenberg's avatar Lars Ellenberg Committed by Jens Axboe

drbd: add back some fairness to AL transactions

When batching more updates to the activity log into single transactions,
we lost the ability for new requests to force themselves into the active
set: all preparation steps became non-blocking, and if all currently
hot extents keep busy, they could starve out new incoming requests
to cold extents for quite a while.

This can only happen if your IO backend accepts more IO operations per
average DRBD replication round trip time than you have al-extents
configured.

If we have incoming requests to cold extents,
at least do one blocking update per transaction.

In an artificial worst-case workload on SSD with an asynchronous 600 ms
replication link, with al-extents = 7 (the minimum we allow), and
concurrent full resynch, without this patch, some write requests have
been observed to be starved for 40 seconds.
With this patch, application observed a worst case latency of twice the
replication round trip time.
Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: default avatarJens Axboe <axboe@fb.com>
parent fa090e70
...@@ -203,7 +203,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd ...@@ -203,7 +203,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd
BUG_ON(!bdev->md_bdev); BUG_ON(!bdev->md_bdev);
drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n", dynamic_drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n",
current->comm, current->pid, __func__, current->comm, current->pid, __func__,
(unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ",
(void*)_RET_IP_ ); (void*)_RET_IP_ );
...@@ -275,7 +275,6 @@ bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval ...@@ -275,7 +275,6 @@ bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval
return _al_get(device, first, true); return _al_get(device, first, true);
} }
static
bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i) bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i)
{ {
/* for bios crossing activity log extent boundaries, /* for bios crossing activity log extent boundaries,
......
...@@ -1487,6 +1487,7 @@ extern const char *drbd_conn_str(enum drbd_conns s); ...@@ -1487,6 +1487,7 @@ extern const char *drbd_conn_str(enum drbd_conns s);
extern const char *drbd_role_str(enum drbd_role s); extern const char *drbd_role_str(enum drbd_role s);
/* drbd_actlog.c */ /* drbd_actlog.c */
extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i);
extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i); extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i);
extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate); extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate);
extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i); extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i);
......
...@@ -1242,6 +1242,7 @@ void do_submit(struct work_struct *ws) ...@@ -1242,6 +1242,7 @@ void do_submit(struct work_struct *ws)
if (list_empty(&incoming)) if (list_empty(&incoming))
break; break;
skip_fast_path:
wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending)); wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending));
/* Maybe more was queued, while we prepared the transaction? /* Maybe more was queued, while we prepared the transaction?
* Try to stuff them into this transaction as well. * Try to stuff them into this transaction as well.
...@@ -1280,6 +1281,25 @@ void do_submit(struct work_struct *ws) ...@@ -1280,6 +1281,25 @@ void do_submit(struct work_struct *ws)
list_del_init(&req->tl_requests); list_del_init(&req->tl_requests);
drbd_send_and_submit(device, req); drbd_send_and_submit(device, req);
} }
/* If all currently hot activity log extents are kept busy by
* incoming requests, we still must not totally starve new
* requests to cold extents. In that case, prepare one request
* in blocking mode. */
list_for_each_entry_safe(req, tmp, &incoming, tl_requests) {
list_del_init(&req->tl_requests);
req->rq_state |= RQ_IN_ACT_LOG;
if (!drbd_al_begin_io_prepare(device, &req->i)) {
/* Corresponding extent was hot after all? */
drbd_send_and_submit(device, req);
} else {
/* Found a request to a cold extent.
* Put on "pending" list,
* and try to cumulate with more. */
list_add(&req->tl_requests, &pending);
goto skip_fast_path;
}
}
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment