Commit 9a25a04c authored by Philipp Reisner's avatar Philipp Reisner

drbd: If we detect late that IO got frozen, retry after we thawed.

If we detect late (= after grabing mdev->req_lock) that IO got frozen, we
return 1 to generic_make_request(), which simply will retry to make a
request for that bio.

In the subsequent call of generic_make_request() into drbd_make_request_26()
we sleep in inc_ap_bio().
Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
parent a1c88d0d
...@@ -2223,7 +2223,7 @@ static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) ...@@ -2223,7 +2223,7 @@ static inline int __inc_ap_bio_cond(struct drbd_conf *mdev)
/* I'd like to use wait_event_lock_irq, /* I'd like to use wait_event_lock_irq,
* but I'm not sure when it got introduced, * but I'm not sure when it got introduced,
* and not sure when it has 3 or 4 arguments */ * and not sure when it has 3 or 4 arguments */
static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two) static inline void inc_ap_bio(struct drbd_conf *mdev, int count)
{ {
/* compare with after_state_ch, /* compare with after_state_ch,
* os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */ * os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S */
...@@ -2245,7 +2245,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two) ...@@ -2245,7 +2245,7 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int one_or_two)
finish_wait(&mdev->misc_wait, &wait); finish_wait(&mdev->misc_wait, &wait);
spin_lock_irq(&mdev->req_lock); spin_lock_irq(&mdev->req_lock);
} }
atomic_add(one_or_two, &mdev->ap_bio_cnt); atomic_add(count, &mdev->ap_bio_cnt);
spin_unlock_irq(&mdev->req_lock); spin_unlock_irq(&mdev->req_lock);
} }
......
...@@ -722,6 +722,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) ...@@ -722,6 +722,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
struct drbd_request *req; struct drbd_request *req;
int local, remote; int local, remote;
int err = -EIO; int err = -EIO;
int ret = 0;
/* allocate outside of all locks; */ /* allocate outside of all locks; */
req = drbd_req_new(mdev, bio); req = drbd_req_new(mdev, bio);
...@@ -784,7 +785,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) ...@@ -784,7 +785,7 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
(mdev->state.pdsk == D_INCONSISTENT && (mdev->state.pdsk == D_INCONSISTENT &&
mdev->state.conn >= C_CONNECTED)); mdev->state.conn >= C_CONNECTED));
if (!(local || remote)) { if (!(local || remote) && !mdev->state.susp) {
dev_err(DEV, "IO ERROR: neither local nor remote disk\n"); dev_err(DEV, "IO ERROR: neither local nor remote disk\n");
goto fail_free_complete; goto fail_free_complete;
} }
...@@ -810,6 +811,16 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) ...@@ -810,6 +811,16 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
/* GOOD, everything prepared, grab the spin_lock */ /* GOOD, everything prepared, grab the spin_lock */
spin_lock_irq(&mdev->req_lock); spin_lock_irq(&mdev->req_lock);
if (mdev->state.susp) {
/* If we got suspended, use the retry mechanism of
generic_make_request() to restart processing of this
bio. In the next call to drbd_make_request_26
we sleep in inc_ap_bio() */
ret = 1;
spin_unlock_irq(&mdev->req_lock);
goto fail_free_complete;
}
if (remote) { if (remote) {
remote = (mdev->state.pdsk == D_UP_TO_DATE || remote = (mdev->state.pdsk == D_UP_TO_DATE ||
(mdev->state.pdsk == D_INCONSISTENT && (mdev->state.pdsk == D_INCONSISTENT &&
...@@ -947,12 +958,14 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio) ...@@ -947,12 +958,14 @@ static int drbd_make_request_common(struct drbd_conf *mdev, struct bio *bio)
req->private_bio = NULL; req->private_bio = NULL;
put_ldev(mdev); put_ldev(mdev);
} }
bio_endio(bio, err); if (!ret)
bio_endio(bio, err);
drbd_req_free(req); drbd_req_free(req);
dec_ap_bio(mdev); dec_ap_bio(mdev);
kfree(b); kfree(b);
return 0; return ret;
} }
/* helper function for drbd_make_request /* helper function for drbd_make_request
...@@ -1065,15 +1078,21 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio) ...@@ -1065,15 +1078,21 @@ int drbd_make_request_26(struct request_queue *q, struct bio *bio)
/* we need to get a "reference count" (ap_bio_cnt) /* we need to get a "reference count" (ap_bio_cnt)
* to avoid races with the disconnect/reconnect/suspend code. * to avoid races with the disconnect/reconnect/suspend code.
* In case we need to split the bio here, we need to get two references * In case we need to split the bio here, we need to get three references
* atomically, otherwise we might deadlock when trying to submit the * atomically, otherwise we might deadlock when trying to submit the
* second one! */ * second one! */
inc_ap_bio(mdev, 2); inc_ap_bio(mdev, 3);
D_ASSERT(e_enr == s_enr + 1); D_ASSERT(e_enr == s_enr + 1);
drbd_make_request_common(mdev, &bp->bio1); while (drbd_make_request_common(mdev, &bp->bio1))
drbd_make_request_common(mdev, &bp->bio2); inc_ap_bio(mdev, 1);
while (drbd_make_request_common(mdev, &bp->bio2))
inc_ap_bio(mdev, 1);
dec_ap_bio(mdev);
bio_pair_release(bp); bio_pair_release(bp);
} }
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment