Commit 08d0dabf authored by Lars Ellenberg's avatar Lars Ellenberg Committed by Philipp Reisner

drbd: application writes may set-in-sync in protocol != C

If "dirty" blocks are written to during resync,
that brings them in-sync.

By explicitly requesting write-acks during resync even in protocol != C,
we now can actually respect this.
Signed-off-by: default avatarPhilipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: default avatarLars Ellenberg <lars.ellenberg@linbit.com>
parent 5d0b17f1
...@@ -10,7 +10,9 @@ struct drbd_interval { ...@@ -10,7 +10,9 @@ struct drbd_interval {
unsigned int size; /* size in bytes */ unsigned int size; /* size in bytes */
sector_t end; /* highest interval end in subtree */ sector_t end; /* highest interval end in subtree */
int local:1 /* local or remote request? */; int local:1 /* local or remote request? */;
int waiting:1; int waiting:1; /* someone is waiting for this to complete */
int completed:1; /* this has been completed already;
* ignore for conflict detection */
}; };
static inline void drbd_clear_interval(struct drbd_interval *i) static inline void drbd_clear_interval(struct drbd_interval *i)
......
...@@ -1639,7 +1639,10 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request * ...@@ -1639,7 +1639,10 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
if (peer_device->connection->agreed_pro_version >= 100) { if (peer_device->connection->agreed_pro_version >= 100) {
if (req->rq_state & RQ_EXP_RECEIVE_ACK) if (req->rq_state & RQ_EXP_RECEIVE_ACK)
dp_flags |= DP_SEND_RECEIVE_ACK; dp_flags |= DP_SEND_RECEIVE_ACK;
if (req->rq_state & RQ_EXP_WRITE_ACK) /* During resync, request an explicit write ack,
* even in protocol != C */
if (req->rq_state & RQ_EXP_WRITE_ACK
|| (dp_flags & DP_MAY_SET_IN_SYNC))
dp_flags |= DP_SEND_WRITE_ACK; dp_flags |= DP_SEND_WRITE_ACK;
} }
p->dp_flags = cpu_to_be32(dp_flags); p->dp_flags = cpu_to_be32(dp_flags);
......
...@@ -1930,6 +1930,7 @@ static int e_end_block(struct drbd_work *w, int cancel) ...@@ -1930,6 +1930,7 @@ static int e_end_block(struct drbd_work *w, int cancel)
} }
dec_unacked(device); dec_unacked(device);
} }
/* we delete from the conflict detection hash _after_ we sent out the /* we delete from the conflict detection hash _after_ we sent out the
* P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
if (peer_req->flags & EE_IN_INTERVAL_TREE) { if (peer_req->flags & EE_IN_INTERVAL_TREE) {
...@@ -2156,6 +2157,8 @@ static int handle_write_conflicts(struct drbd_device *device, ...@@ -2156,6 +2157,8 @@ static int handle_write_conflicts(struct drbd_device *device,
drbd_for_each_overlap(i, &device->write_requests, sector, size) { drbd_for_each_overlap(i, &device->write_requests, sector, size) {
if (i == &peer_req->i) if (i == &peer_req->i)
continue; continue;
if (i->completed)
continue;
if (!i->local) { if (!i->local) {
/* /*
......
...@@ -92,6 +92,19 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, ...@@ -92,6 +92,19 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device,
return req; return req;
} }
static void drbd_remove_request_interval(struct rb_root *root,
struct drbd_request *req)
{
struct drbd_device *device = req->device;
struct drbd_interval *i = &req->i;
drbd_remove_interval(root, i);
/* Wake up any processes waiting for this request to complete. */
if (i->waiting)
wake_up(&device->misc_wait);
}
void drbd_req_destroy(struct kref *kref) void drbd_req_destroy(struct kref *kref)
{ {
struct drbd_request *req = container_of(kref, struct drbd_request, kref); struct drbd_request *req = container_of(kref, struct drbd_request, kref);
...@@ -115,6 +128,20 @@ void drbd_req_destroy(struct kref *kref) ...@@ -115,6 +128,20 @@ void drbd_req_destroy(struct kref *kref)
* here unconditionally */ * here unconditionally */
list_del_init(&req->tl_requests); list_del_init(&req->tl_requests);
/* finally remove the request from the conflict detection
* respective block_id verification interval tree. */
if (!drbd_interval_empty(&req->i)) {
struct rb_root *root;
if (s & RQ_WRITE)
root = &device->write_requests;
else
root = &device->read_requests;
drbd_remove_request_interval(root, req);
} else if (s & (RQ_NET_MASK & ~RQ_NET_DONE) && req->i.size != 0)
drbd_err(device, "drbd_req_destroy: Logic BUG: interval empty, but: rq_state=0x%x, sect=%llu, size=%u\n",
s, (unsigned long long)req->i.sector, req->i.size);
/* if it was a write, we may have to set the corresponding /* if it was a write, we may have to set the corresponding
* bit(s) out-of-sync first. If it had a local part, we need to * bit(s) out-of-sync first. If it had a local part, we need to
* release the reference to the activity log. */ * release the reference to the activity log. */
...@@ -188,19 +215,6 @@ void complete_master_bio(struct drbd_device *device, ...@@ -188,19 +215,6 @@ void complete_master_bio(struct drbd_device *device,
} }
static void drbd_remove_request_interval(struct rb_root *root,
struct drbd_request *req)
{
struct drbd_device *device = req->device;
struct drbd_interval *i = &req->i;
drbd_remove_interval(root, i);
/* Wake up any processes waiting for this request to complete. */
if (i->waiting)
wake_up(&device->misc_wait);
}
/* Helper for __req_mod(). /* Helper for __req_mod().
* Set m->bio to the master bio, if it is fit to be completed, * Set m->bio to the master bio, if it is fit to be completed,
* or leave it alone (it is initialized to NULL in __req_mod), * or leave it alone (it is initialized to NULL in __req_mod),
...@@ -254,18 +268,6 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) ...@@ -254,18 +268,6 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK); ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
error = PTR_ERR(req->private_bio); error = PTR_ERR(req->private_bio);
/* remove the request from the conflict detection
* respective block_id verification hash */
if (!drbd_interval_empty(&req->i)) {
struct rb_root *root;
if (rw == WRITE)
root = &device->write_requests;
else
root = &device->read_requests;
drbd_remove_request_interval(root, req);
}
/* Before we can signal completion to the upper layers, /* Before we can signal completion to the upper layers,
* we may need to close the current transfer log epoch. * we may need to close the current transfer log epoch.
* We are within the request lock, so we can simply compare * We are within the request lock, so we can simply compare
...@@ -301,7 +303,15 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m) ...@@ -301,7 +303,15 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
m->error = ok ? 0 : (error ?: -EIO); m->error = ok ? 0 : (error ?: -EIO);
m->bio = req->master_bio; m->bio = req->master_bio;
req->master_bio = NULL; req->master_bio = NULL;
/* We leave it in the tree, to be able to verify later
* write-acks in protocol != C during resync.
* But we mark it as "complete", so it won't be counted as
* conflict in a multi-primary setup. */
req->i.completed = true;
} }
if (req->i.waiting)
wake_up(&device->misc_wait);
} }
static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put) static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put)
...@@ -660,12 +670,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, ...@@ -660,12 +670,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
case WRITE_ACKED_BY_PEER_AND_SIS: case WRITE_ACKED_BY_PEER_AND_SIS:
req->rq_state |= RQ_NET_SIS; req->rq_state |= RQ_NET_SIS;
case WRITE_ACKED_BY_PEER: case WRITE_ACKED_BY_PEER:
D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK); /* Normal operation protocol C: successfully written on peer.
/* protocol C; successfully written on peer. * During resync, even in protocol != C,
* we requested an explicit write ack anyways.
* Which means we cannot even assert anything here.
* Nothing more to do here. * Nothing more to do here.
* We want to keep the tl in place for all protocols, to cater * We want to keep the tl in place for all protocols, to cater
* for volatile write-back caches on lower level devices. */ * for volatile write-back caches on lower level devices. */
goto ack_common; goto ack_common;
case RECV_ACKED_BY_PEER: case RECV_ACKED_BY_PEER:
D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK); D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK);
...@@ -673,7 +684,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, ...@@ -673,7 +684,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
* see also notes above in HANDED_OVER_TO_NETWORK about * see also notes above in HANDED_OVER_TO_NETWORK about
* protocol != C */ * protocol != C */
ack_common: ack_common:
D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK); mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
break; break;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment