Commit 8b946255 authored by Andreas Gruenbacher's avatar Andreas Gruenbacher Committed by Philipp Reisner

drbd: Use interval tree for overlapping epoch entry detection

Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
parent 010f6e67
...@@ -1080,6 +1080,9 @@ struct drbd_conf { ...@@ -1080,6 +1080,9 @@ struct drbd_conf {
struct hlist_head *ee_hash; /* is proteced by req_lock! */ struct hlist_head *ee_hash; /* is proteced by req_lock! */
unsigned int ee_hash_s; unsigned int ee_hash_s;
/* Interval tree of pending remote write requests (struct drbd_epoch_entry) */
struct rb_root epoch_entries;
/* this one is protected by ee_lock, single thread */ /* this one is protected by ee_lock, single thread */
struct drbd_epoch_entry *last_write_w_barrier; struct drbd_epoch_entry *last_write_w_barrier;
......
...@@ -3475,6 +3475,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor) ...@@ -3475,6 +3475,7 @@ struct drbd_conf *drbd_new_device(unsigned int minor)
goto out_no_tl; goto out_no_tl;
mdev->read_requests = RB_ROOT; mdev->read_requests = RB_ROOT;
mdev->write_requests = RB_ROOT; mdev->write_requests = RB_ROOT;
mdev->epoch_entries = RB_ROOT;
mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL); mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL);
if (!mdev->app_reads_hash) if (!mdev->app_reads_hash)
......
...@@ -334,6 +334,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, ...@@ -334,6 +334,7 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev,
goto fail; goto fail;
INIT_HLIST_NODE(&e->collision); INIT_HLIST_NODE(&e->collision);
drbd_clear_interval(&e->i);
e->epoch = NULL; e->epoch = NULL;
e->mdev = mdev; e->mdev = mdev;
e->pages = page; e->pages = page;
...@@ -361,6 +362,7 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i ...@@ -361,6 +362,7 @@ void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int i
drbd_pp_free(mdev, e->pages, is_net); drbd_pp_free(mdev, e->pages, is_net);
D_ASSERT(atomic_read(&e->pending_bios) == 0); D_ASSERT(atomic_read(&e->pending_bios) == 0);
D_ASSERT(hlist_unhashed(&e->collision)); D_ASSERT(hlist_unhashed(&e->collision));
D_ASSERT(drbd_interval_empty(&e->i));
mempool_free(e, drbd_ee_mempool); mempool_free(e, drbd_ee_mempool);
} }
...@@ -1418,6 +1420,7 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u ...@@ -1418,6 +1420,7 @@ static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int u
int ok; int ok;
D_ASSERT(hlist_unhashed(&e->collision)); D_ASSERT(hlist_unhashed(&e->collision));
D_ASSERT(drbd_interval_empty(&e->i));
if (likely((e->flags & EE_WAS_ERROR) == 0)) { if (likely((e->flags & EE_WAS_ERROR) == 0)) {
drbd_set_in_sync(mdev, sector, e->i.size); drbd_set_in_sync(mdev, sector, e->i.size);
...@@ -1574,9 +1577,13 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel) ...@@ -1574,9 +1577,13 @@ static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
spin_lock_irq(&mdev->req_lock); spin_lock_irq(&mdev->req_lock);
D_ASSERT(!hlist_unhashed(&e->collision)); D_ASSERT(!hlist_unhashed(&e->collision));
hlist_del_init(&e->collision); hlist_del_init(&e->collision);
D_ASSERT(!drbd_interval_empty(&e->i));
drbd_remove_interval(&mdev->epoch_entries, &e->i);
drbd_clear_interval(&e->i);
spin_unlock_irq(&mdev->req_lock); spin_unlock_irq(&mdev->req_lock);
} else { } else {
D_ASSERT(hlist_unhashed(&e->collision)); D_ASSERT(hlist_unhashed(&e->collision));
D_ASSERT(drbd_interval_empty(&e->i));
} }
drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0)); drbd_may_finish_epoch(mdev, e->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
...@@ -1595,6 +1602,9 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u ...@@ -1595,6 +1602,9 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u
spin_lock_irq(&mdev->req_lock); spin_lock_irq(&mdev->req_lock);
D_ASSERT(!hlist_unhashed(&e->collision)); D_ASSERT(!hlist_unhashed(&e->collision));
hlist_del_init(&e->collision); hlist_del_init(&e->collision);
D_ASSERT(!drbd_interval_empty(&e->i));
drbd_remove_interval(&mdev->epoch_entries, &e->i);
drbd_clear_interval(&e->i);
spin_unlock_irq(&mdev->req_lock); spin_unlock_irq(&mdev->req_lock);
dec_unacked(mdev); dec_unacked(mdev);
...@@ -1767,6 +1777,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned ...@@ -1767,6 +1777,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
spin_lock_irq(&mdev->req_lock); spin_lock_irq(&mdev->req_lock);
hlist_add_head(&e->collision, ee_hash_slot(mdev, sector)); hlist_add_head(&e->collision, ee_hash_slot(mdev, sector));
drbd_insert_interval(&mdev->epoch_entries, &e->i);
first = 1; first = 1;
for (;;) { for (;;) {
...@@ -1817,6 +1828,8 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned ...@@ -1817,6 +1828,8 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
if (signal_pending(current)) { if (signal_pending(current)) {
hlist_del_init(&e->collision); hlist_del_init(&e->collision);
drbd_remove_interval(&mdev->epoch_entries, &e->i);
drbd_clear_interval(&e->i);
spin_unlock_irq(&mdev->req_lock); spin_unlock_irq(&mdev->req_lock);
...@@ -1875,6 +1888,8 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned ...@@ -1875,6 +1888,8 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
spin_lock_irq(&mdev->req_lock); spin_lock_irq(&mdev->req_lock);
list_del(&e->w.list); list_del(&e->w.list);
hlist_del_init(&e->collision); hlist_del_init(&e->collision);
drbd_remove_interval(&mdev->epoch_entries, &e->i);
drbd_clear_interval(&e->i);
spin_unlock_irq(&mdev->req_lock); spin_unlock_irq(&mdev->req_lock);
if (e->flags & EE_CALL_AL_COMPLETE_IO) if (e->flags & EE_CALL_AL_COMPLETE_IO)
drbd_al_complete_io(mdev, e->i.sector); drbd_al_complete_io(mdev, e->i.sector);
......
...@@ -135,9 +135,6 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, ...@@ -135,9 +135,6 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
struct drbd_request *req) struct drbd_request *req)
{ {
const unsigned long s = req->rq_state; const unsigned long s = req->rq_state;
struct drbd_epoch_entry *e;
struct hlist_node *n;
struct hlist_head *slot;
/* Before we can signal completion to the upper layers, /* Before we can signal completion to the upper layers,
* we may need to close the current epoch. * we may need to close the current epoch.
...@@ -185,16 +182,10 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev, ...@@ -185,16 +182,10 @@ static void _about_to_complete_local_write(struct drbd_conf *mdev,
* *
* anyways, if we found one, * anyways, if we found one,
* we just have to do a wake_up. */ * we just have to do a wake_up. */
#define OVERLAPS overlaps(sector, size, e->i.sector, e->i.size) i = drbd_find_overlap(&mdev->epoch_entries, sector, size);
slot = ee_hash_slot(mdev, req->i.sector); if (i)
hlist_for_each_entry(e, n, slot, collision) { wake_up(&mdev->misc_wait);
if (OVERLAPS) {
wake_up(&mdev->misc_wait);
break;
}
}
} }
#undef OVERLAPS
} }
void complete_master_bio(struct drbd_conf *mdev, void complete_master_bio(struct drbd_conf *mdev,
...@@ -332,9 +323,6 @@ static int _req_conflicts(struct drbd_request *req) ...@@ -332,9 +323,6 @@ static int _req_conflicts(struct drbd_request *req)
const sector_t sector = req->i.sector; const sector_t sector = req->i.sector;
const int size = req->i.size; const int size = req->i.size;
struct drbd_interval *i; struct drbd_interval *i;
struct drbd_epoch_entry *e;
struct hlist_node *n;
struct hlist_head *slot;
D_ASSERT(hlist_unhashed(&req->collision)); D_ASSERT(hlist_unhashed(&req->collision));
D_ASSERT(drbd_interval_empty(&req->i)); D_ASSERT(drbd_interval_empty(&req->i));
...@@ -364,21 +352,21 @@ static int _req_conflicts(struct drbd_request *req) ...@@ -364,21 +352,21 @@ static int _req_conflicts(struct drbd_request *req)
if (mdev->ee_hash_s) { if (mdev->ee_hash_s) {
/* now, check for overlapping requests with remote origin */ /* now, check for overlapping requests with remote origin */
BUG_ON(mdev->ee_hash == NULL); BUG_ON(mdev->ee_hash == NULL);
#define OVERLAPS overlaps(e->i.sector, e->i.size, sector, size)
slot = ee_hash_slot(mdev, sector); i = drbd_find_overlap(&mdev->epoch_entries, sector, size);
hlist_for_each_entry(e, n, slot, collision) { if (i) {
if (OVERLAPS) { struct drbd_epoch_entry *e =
dev_alert(DEV, "%s[%u] Concurrent remote write detected!" container_of(i, struct drbd_epoch_entry, i);
" [DISCARD L] new: %llus +%u; "
"pending: %llus +%u\n", dev_alert(DEV, "%s[%u] Concurrent remote write detected!"
current->comm, current->pid, " [DISCARD L] new: %llus +%u; "
(unsigned long long)sector, size, "pending: %llus +%u\n",
(unsigned long long)e->i.sector, e->i.size); current->comm, current->pid,
goto out_conflict; (unsigned long long)sector, size,
} (unsigned long long)e->i.sector, e->i.size);
goto out_conflict;
} }
} }
#undef OVERLAPS
out_no_conflict: out_no_conflict:
/* this is like it should be, and what we expected. /* this is like it should be, and what we expected.
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment