Commit 4fd1ffaa authored by Jens Axboe's avatar Jens Axboe

Merge branch 'for-jens' of git://git.drbd.org/linux-drbd into for-3.5/drivers

Philipp writes:

This are the updates we have in the drbd-8.3 tree. They are intended
for your "for-3.5/drivers" drivers branch.

These changes include one new feature:
 * Allow detach from frozen backing devices with the new --force option;
   configurable timeout for backing devices by the new disk-timeout option

And huge number of bug fixes:
 * Fixed a write ordering problem on SyncTarget nodes for a write
   to a block that gets resynced at the same time. The bug can
   only be triggered with a device that has a firmware that
   actually reorders writes to the same block
 * Fixed a race between disconnect and receive_state, that could cause
   a IO lockup
 * Fixed resend/resubmit for requests with disk or network timeout
 * Make sure that hard state changed do not disturb the connection
   establishing process (I.e. detach due to an IO error). When the
   bug was triggered it caused a retry in the connect process
 * Postpone soft state changes to no disturb the connection
   establishing process (I.e. becoming primary). When the bug
   was triggered it could cause both nodes going into SyncSource state
 * Fixed a refcount leak that could cause failures when trying to
   unload a protocol family modules, that was used by DRBD
 * Dedicated page pool for meta data IOs
 * Deny normal detach (as opposed to --forced) if the user tries
   to detach from the last UpToDate disk in the resource
 * Fixed a possible protocol error that could be caused by
   "unusual" BIOs.
 * Enforce the disk-timeout option also on meta-data IO operations
 * Implemented stable bitmap pages when we do a full write out of
   the bitmap
 * Fixed a rare compatibility issue with DRBD's older than 8.3.7
   when negotiating the bio_size
 * Fixed a rare race condition where an empty resync could stall with
   if pause/unpause events happen in parallel
 * Made the re-establishing of connections quicker, if it got a broken pipe
   once. Previously there was a bug in the code caused it to waste the first
   successful established connection after a broken pipe event.

PS: I am postponing the drbd-8.4 for mainline for one or two kernel
    development cycles more (the ~400 patchets set).
parents 13828dec 92b4ca29
...@@ -65,39 +65,80 @@ struct drbd_atodb_wait { ...@@ -65,39 +65,80 @@ struct drbd_atodb_wait {
int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int); int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int);
void *drbd_md_get_buffer(struct drbd_conf *mdev)
{
int r;
wait_event(mdev->misc_wait,
(r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 ||
mdev->state.disk <= D_FAILED);
return r ? NULL : page_address(mdev->md_io_page);
}
void drbd_md_put_buffer(struct drbd_conf *mdev)
{
if (atomic_dec_and_test(&mdev->md_io_in_use))
wake_up(&mdev->misc_wait);
}
static bool md_io_allowed(struct drbd_conf *mdev)
{
enum drbd_disk_state ds = mdev->state.disk;
return ds >= D_NEGOTIATING || ds == D_ATTACHING;
}
void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
unsigned int *done)
{
long dt = bdev->dc.disk_timeout * HZ / 10;
if (dt == 0)
dt = MAX_SCHEDULE_TIMEOUT;
dt = wait_event_timeout(mdev->misc_wait, *done || !md_io_allowed(mdev), dt);
if (dt == 0)
dev_err(DEV, "meta-data IO operation timed out\n");
}
static int _drbd_md_sync_page_io(struct drbd_conf *mdev, static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
struct drbd_backing_dev *bdev, struct drbd_backing_dev *bdev,
struct page *page, sector_t sector, struct page *page, sector_t sector,
int rw, int size) int rw, int size)
{ {
struct bio *bio; struct bio *bio;
struct drbd_md_io md_io;
int ok; int ok;
md_io.mdev = mdev; mdev->md_io.done = 0;
init_completion(&md_io.event); mdev->md_io.error = -ENODEV;
md_io.error = 0;
if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags)) if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags))
rw |= REQ_FUA | REQ_FLUSH; rw |= REQ_FUA | REQ_FLUSH;
rw |= REQ_SYNC; rw |= REQ_SYNC;
bio = bio_alloc(GFP_NOIO, 1); bio = bio_alloc_drbd(GFP_NOIO);
bio->bi_bdev = bdev->md_bdev; bio->bi_bdev = bdev->md_bdev;
bio->bi_sector = sector; bio->bi_sector = sector;
ok = (bio_add_page(bio, page, size, 0) == size); ok = (bio_add_page(bio, page, size, 0) == size);
if (!ok) if (!ok)
goto out; goto out;
bio->bi_private = &md_io; bio->bi_private = &mdev->md_io;
bio->bi_end_io = drbd_md_io_complete; bio->bi_end_io = drbd_md_io_complete;
bio->bi_rw = rw; bio->bi_rw = rw;
if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */
dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n");
ok = 0;
goto out;
}
bio_get(bio); /* one bio_put() is in the completion handler */
atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */
if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
bio_endio(bio, -EIO); bio_endio(bio, -EIO);
else else
submit_bio(rw, bio); submit_bio(rw, bio);
wait_for_completion(&md_io.event); wait_until_done_or_disk_failure(mdev, bdev, &mdev->md_io.done);
ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0; ok = bio_flagged(bio, BIO_UPTODATE) && mdev->md_io.error == 0;
out: out:
bio_put(bio); bio_put(bio);
...@@ -111,7 +152,7 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, ...@@ -111,7 +152,7 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
int offset = 0; int offset = 0;
struct page *iop = mdev->md_io_page; struct page *iop = mdev->md_io_page;
D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1);
BUG_ON(!bdev->md_bdev); BUG_ON(!bdev->md_bdev);
...@@ -328,8 +369,13 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) ...@@ -328,8 +369,13 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
return 1; return 1;
} }
mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */
buffer = (struct al_transaction *)page_address(mdev->md_io_page); if (!buffer) {
dev_err(DEV, "disk failed while waiting for md_io buffer\n");
complete(&((struct update_al_work *)w)->event);
put_ldev(mdev);
return 1;
}
buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
buffer->tr_number = cpu_to_be32(mdev->al_tr_number); buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
...@@ -374,7 +420,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused) ...@@ -374,7 +420,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE); D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE);
mdev->al_tr_number++; mdev->al_tr_number++;
mutex_unlock(&mdev->md_io_mutex); drbd_md_put_buffer(mdev);
complete(&((struct update_al_work *)w)->event); complete(&((struct update_al_work *)w)->event);
put_ldev(mdev); put_ldev(mdev);
...@@ -443,8 +489,9 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) ...@@ -443,8 +489,9 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
/* lock out all other meta data io for now, /* lock out all other meta data io for now,
* and make sure the page is mapped. * and make sure the page is mapped.
*/ */
mutex_lock(&mdev->md_io_mutex); buffer = drbd_md_get_buffer(mdev);
buffer = page_address(mdev->md_io_page); if (!buffer)
return 0;
/* Find the valid transaction in the log */ /* Find the valid transaction in the log */
for (i = 0; i <= mx; i++) { for (i = 0; i <= mx; i++) {
...@@ -452,7 +499,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) ...@@ -452,7 +499,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
if (rv == 0) if (rv == 0)
continue; continue;
if (rv == -1) { if (rv == -1) {
mutex_unlock(&mdev->md_io_mutex); drbd_md_put_buffer(mdev);
return 0; return 0;
} }
cnr = be32_to_cpu(buffer->tr_number); cnr = be32_to_cpu(buffer->tr_number);
...@@ -478,7 +525,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) ...@@ -478,7 +525,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
if (!found_valid) { if (!found_valid) {
dev_warn(DEV, "No usable activity log found.\n"); dev_warn(DEV, "No usable activity log found.\n");
mutex_unlock(&mdev->md_io_mutex); drbd_md_put_buffer(mdev);
return 1; return 1;
} }
...@@ -493,7 +540,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) ...@@ -493,7 +540,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
rv = drbd_al_read_tr(mdev, bdev, buffer, i); rv = drbd_al_read_tr(mdev, bdev, buffer, i);
ERR_IF(rv == 0) goto cancel; ERR_IF(rv == 0) goto cancel;
if (rv == -1) { if (rv == -1) {
mutex_unlock(&mdev->md_io_mutex); drbd_md_put_buffer(mdev);
return 0; return 0;
} }
...@@ -534,7 +581,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) ...@@ -534,7 +581,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
mdev->al_tr_pos = 0; mdev->al_tr_pos = 0;
/* ok, we are done with it */ /* ok, we are done with it */
mutex_unlock(&mdev->md_io_mutex); drbd_md_put_buffer(mdev);
dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n", dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n",
transactions, active_extents); transactions, active_extents);
...@@ -671,16 +718,20 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, ...@@ -671,16 +718,20 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
else else
ext->rs_failed += count; ext->rs_failed += count;
if (ext->rs_left < ext->rs_failed) { if (ext->rs_left < ext->rs_failed) {
dev_err(DEV, "BAD! sector=%llus enr=%u rs_left=%d " dev_warn(DEV, "BAD! sector=%llus enr=%u rs_left=%d "
"rs_failed=%d count=%d\n", "rs_failed=%d count=%d cstate=%s\n",
(unsigned long long)sector, (unsigned long long)sector,
ext->lce.lc_number, ext->rs_left, ext->lce.lc_number, ext->rs_left,
ext->rs_failed, count); ext->rs_failed, count,
dump_stack(); drbd_conn_str(mdev->state.conn));
lc_put(mdev->resync, &ext->lce); /* We don't expect to be able to clear more bits
drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); * than have been set when we originally counted
return; * the set bits to cache that value in ext->rs_left.
* Whatever the reason (disconnect during resync,
* delayed local completion of an application write),
* try to fix it up by recounting here. */
ext->rs_left = drbd_bm_e_weight(mdev, enr);
} }
} else { } else {
/* Normally this element should be in the cache, /* Normally this element should be in the cache,
...@@ -1192,6 +1243,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev) ...@@ -1192,6 +1243,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev)
put_ldev(mdev); put_ldev(mdev);
} }
spin_unlock_irq(&mdev->al_lock); spin_unlock_irq(&mdev->al_lock);
wake_up(&mdev->al_wait);
return 0; return 0;
} }
......
...@@ -205,7 +205,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev) ...@@ -205,7 +205,7 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
static void bm_store_page_idx(struct page *page, unsigned long idx) static void bm_store_page_idx(struct page *page, unsigned long idx)
{ {
BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK)); BUG_ON(0 != (idx & ~BM_PAGE_IDX_MASK));
page_private(page) |= idx; set_page_private(page, idx);
} }
static unsigned long bm_page_to_idx(struct page *page) static unsigned long bm_page_to_idx(struct page *page)
...@@ -886,12 +886,21 @@ void drbd_bm_clear_all(struct drbd_conf *mdev) ...@@ -886,12 +886,21 @@ void drbd_bm_clear_all(struct drbd_conf *mdev)
struct bm_aio_ctx { struct bm_aio_ctx {
struct drbd_conf *mdev; struct drbd_conf *mdev;
atomic_t in_flight; atomic_t in_flight;
struct completion done; unsigned int done;
unsigned flags; unsigned flags;
#define BM_AIO_COPY_PAGES 1 #define BM_AIO_COPY_PAGES 1
int error; int error;
struct kref kref;
}; };
static void bm_aio_ctx_destroy(struct kref *kref)
{
struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref);
put_ldev(ctx->mdev);
kfree(ctx);
}
/* bv_page may be a copy, or may be the original */ /* bv_page may be a copy, or may be the original */
static void bm_async_io_complete(struct bio *bio, int error) static void bm_async_io_complete(struct bio *bio, int error)
{ {
...@@ -930,20 +939,21 @@ static void bm_async_io_complete(struct bio *bio, int error) ...@@ -930,20 +939,21 @@ static void bm_async_io_complete(struct bio *bio, int error)
bm_page_unlock_io(mdev, idx); bm_page_unlock_io(mdev, idx);
/* FIXME give back to page pool */
if (ctx->flags & BM_AIO_COPY_PAGES) if (ctx->flags & BM_AIO_COPY_PAGES)
put_page(bio->bi_io_vec[0].bv_page); mempool_free(bio->bi_io_vec[0].bv_page, drbd_md_io_page_pool);
bio_put(bio); bio_put(bio);
if (atomic_dec_and_test(&ctx->in_flight)) if (atomic_dec_and_test(&ctx->in_flight)) {
complete(&ctx->done); ctx->done = 1;
wake_up(&mdev->misc_wait);
kref_put(&ctx->kref, &bm_aio_ctx_destroy);
}
} }
static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local)
{ {
/* we are process context. we always get a bio */ struct bio *bio = bio_alloc_drbd(GFP_NOIO);
struct bio *bio = bio_alloc(GFP_KERNEL, 1);
struct drbd_conf *mdev = ctx->mdev; struct drbd_conf *mdev = ctx->mdev;
struct drbd_bitmap *b = mdev->bitmap; struct drbd_bitmap *b = mdev->bitmap;
struct page *page; struct page *page;
...@@ -966,10 +976,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must ...@@ -966,10 +976,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
bm_set_page_unchanged(b->bm_pages[page_nr]); bm_set_page_unchanged(b->bm_pages[page_nr]);
if (ctx->flags & BM_AIO_COPY_PAGES) { if (ctx->flags & BM_AIO_COPY_PAGES) {
/* FIXME alloc_page is good enough for now, but actually needs
* to use pre-allocated page pool */
void *src, *dest; void *src, *dest;
page = alloc_page(__GFP_HIGHMEM|__GFP_WAIT); page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT);
dest = kmap_atomic(page); dest = kmap_atomic(page);
src = kmap_atomic(b->bm_pages[page_nr]); src = kmap_atomic(b->bm_pages[page_nr]);
memcpy(dest, src, PAGE_SIZE); memcpy(dest, src, PAGE_SIZE);
...@@ -981,6 +989,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must ...@@ -981,6 +989,8 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
bio->bi_bdev = mdev->ldev->md_bdev; bio->bi_bdev = mdev->ldev->md_bdev;
bio->bi_sector = on_disk_sector; bio->bi_sector = on_disk_sector;
/* bio_add_page of a single page to an empty bio will always succeed,
* according to api. Do we want to assert that? */
bio_add_page(bio, page, len, 0); bio_add_page(bio, page, len, 0);
bio->bi_private = ctx; bio->bi_private = ctx;
bio->bi_end_io = bm_async_io_complete; bio->bi_end_io = bm_async_io_complete;
...@@ -999,14 +1009,9 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must ...@@ -999,14 +1009,9 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
/* /*
* bm_rw: read/write the whole bitmap from/to its on disk location. * bm_rw: read/write the whole bitmap from/to its on disk location.
*/ */
static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_idx) __must_hold(local) static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
{ {
struct bm_aio_ctx ctx = { struct bm_aio_ctx *ctx;
.mdev = mdev,
.in_flight = ATOMIC_INIT(1),
.done = COMPLETION_INITIALIZER_ONSTACK(ctx.done),
.flags = lazy_writeout_upper_idx ? BM_AIO_COPY_PAGES : 0,
};
struct drbd_bitmap *b = mdev->bitmap; struct drbd_bitmap *b = mdev->bitmap;
int num_pages, i, count = 0; int num_pages, i, count = 0;
unsigned long now; unsigned long now;
...@@ -1021,7 +1026,27 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id ...@@ -1021,7 +1026,27 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id
* For lazy writeout, we don't care for ongoing changes to the bitmap, * For lazy writeout, we don't care for ongoing changes to the bitmap,
* as we submit copies of pages anyways. * as we submit copies of pages anyways.
*/ */
if (!ctx.flags)
ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
if (!ctx)
return -ENOMEM;
*ctx = (struct bm_aio_ctx) {
.mdev = mdev,
.in_flight = ATOMIC_INIT(1),
.done = 0,
.flags = flags,
.error = 0,
.kref = { ATOMIC_INIT(2) },
};
if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */
dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n");
kfree(ctx);
return -ENODEV;
}
if (!ctx->flags)
WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
num_pages = b->bm_number_of_pages; num_pages = b->bm_number_of_pages;
...@@ -1046,29 +1071,38 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id ...@@ -1046,29 +1071,38 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id
continue; continue;
} }
} }
atomic_inc(&ctx.in_flight); atomic_inc(&ctx->in_flight);
bm_page_io_async(&ctx, i, rw); bm_page_io_async(ctx, i, rw);
++count; ++count;
cond_resched(); cond_resched();
} }
/* /*
* We initialize ctx.in_flight to one to make sure bm_async_io_complete * We initialize ctx->in_flight to one to make sure bm_async_io_complete
* will not complete() early, and decrement / test it here. If there * will not set ctx->done early, and decrement / test it here. If there
* are still some bios in flight, we need to wait for them here. * are still some bios in flight, we need to wait for them here.
* If all IO is done already (or nothing had been submitted), there is
* no need to wait. Still, we need to put the kref associated with the
* "in_flight reached zero, all done" event.
*/ */
if (!atomic_dec_and_test(&ctx.in_flight)) if (!atomic_dec_and_test(&ctx->in_flight))
wait_for_completion(&ctx.done); wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
else
kref_put(&ctx->kref, &bm_aio_ctx_destroy);
dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n",
rw == WRITE ? "WRITE" : "READ", rw == WRITE ? "WRITE" : "READ",
count, jiffies - now); count, jiffies - now);
if (ctx.error) { if (ctx->error) {
dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n");
drbd_chk_io_error(mdev, 1, true); drbd_chk_io_error(mdev, 1, true);
err = -EIO; /* ctx.error ? */ err = -EIO; /* ctx->error ? */
} }
if (atomic_read(&ctx->in_flight))
err = -EIO; /* Disk failed during IO... */
now = jiffies; now = jiffies;
if (rw == WRITE) { if (rw == WRITE) {
drbd_md_flush(mdev); drbd_md_flush(mdev);
...@@ -1082,6 +1116,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id ...@@ -1082,6 +1116,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id
dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
kref_put(&ctx->kref, &bm_aio_ctx_destroy);
return err; return err;
} }
...@@ -1091,7 +1126,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id ...@@ -1091,7 +1126,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id
*/ */
int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)
{ {
return bm_rw(mdev, READ, 0); return bm_rw(mdev, READ, 0, 0);
} }
/** /**
...@@ -1102,7 +1137,7 @@ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local) ...@@ -1102,7 +1137,7 @@ int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local)
*/ */
int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
{ {
return bm_rw(mdev, WRITE, 0); return bm_rw(mdev, WRITE, 0, 0);
} }
/** /**
...@@ -1112,7 +1147,23 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local) ...@@ -1112,7 +1147,23 @@ int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local)
*/ */
int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local) int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(local)
{ {
return bm_rw(mdev, WRITE, upper_idx); return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, upper_idx);
}
/**
* drbd_bm_write_copy_pages() - Write the whole bitmap to its on disk location.
* @mdev: DRBD device.
*
* Will only write pages that have changed since last IO.
* In contrast to drbd_bm_write(), this will copy the bitmap pages
* to temporary writeout pages. It is intended to trigger a full write-out
* while still allowing the bitmap to change, for example if a resync or online
* verify is aborted due to a failed peer disk, while local IO continues, or
* pending resync acks are still being processed.
*/
int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local)
{
return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0);
} }
...@@ -1130,28 +1181,45 @@ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(l ...@@ -1130,28 +1181,45 @@ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(l
*/ */
int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local)
{ {
struct bm_aio_ctx ctx = { struct bm_aio_ctx *ctx;
int err;
if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) {
dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx);
return 0;
}
ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
if (!ctx)
return -ENOMEM;
*ctx = (struct bm_aio_ctx) {
.mdev = mdev, .mdev = mdev,
.in_flight = ATOMIC_INIT(1), .in_flight = ATOMIC_INIT(1),
.done = COMPLETION_INITIALIZER_ONSTACK(ctx.done), .done = 0,
.flags = BM_AIO_COPY_PAGES, .flags = BM_AIO_COPY_PAGES,
.error = 0,
.kref = { ATOMIC_INIT(2) },
}; };
if (bm_test_page_unchanged(mdev->bitmap->bm_pages[idx])) { if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */
dynamic_dev_dbg(DEV, "skipped bm page write for idx %u\n", idx); dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n");
return 0; kfree(ctx);
return -ENODEV;
} }
bm_page_io_async(&ctx, idx, WRITE_SYNC); bm_page_io_async(ctx, idx, WRITE_SYNC);
wait_for_completion(&ctx.done); wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
if (ctx.error) if (ctx->error)
drbd_chk_io_error(mdev, 1, true); drbd_chk_io_error(mdev, 1, true);
/* that should force detach, so the in memory bitmap will be /* that should force detach, so the in memory bitmap will be
* gone in a moment as well. */ * gone in a moment as well. */
mdev->bm_writ_cnt++; mdev->bm_writ_cnt++;
return ctx.error; err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error;
kref_put(&ctx->kref, &bm_aio_ctx_destroy);
return err;
} }
/* NOTE /* NOTE
......
...@@ -712,7 +712,6 @@ struct drbd_request { ...@@ -712,7 +712,6 @@ struct drbd_request {
struct list_head tl_requests; /* ring list in the transfer log */ struct list_head tl_requests; /* ring list in the transfer log */
struct bio *master_bio; /* master bio pointer */ struct bio *master_bio; /* master bio pointer */
unsigned long rq_state; /* see comments above _req_mod() */ unsigned long rq_state; /* see comments above _req_mod() */
int seq_num;
unsigned long start_time; unsigned long start_time;
}; };
...@@ -851,6 +850,7 @@ enum { ...@@ -851,6 +850,7 @@ enum {
NEW_CUR_UUID, /* Create new current UUID when thawing IO */ NEW_CUR_UUID, /* Create new current UUID when thawing IO */
AL_SUSPENDED, /* Activity logging is currently suspended. */ AL_SUSPENDED, /* Activity logging is currently suspended. */
AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */ AHEAD_TO_SYNC_SOURCE, /* Ahead -> SyncSource queued */
STATE_SENT, /* Do not change state/UUIDs while this is set */
}; };
struct drbd_bitmap; /* opaque for drbd_conf */ struct drbd_bitmap; /* opaque for drbd_conf */
...@@ -862,31 +862,30 @@ enum bm_flag { ...@@ -862,31 +862,30 @@ enum bm_flag {
BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */ BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
/* currently locked for bulk operation */ /* currently locked for bulk operation */
BM_LOCKED_MASK = 0x7, BM_LOCKED_MASK = 0xf,
/* in detail, that is: */ /* in detail, that is: */
BM_DONT_CLEAR = 0x1, BM_DONT_CLEAR = 0x1,
BM_DONT_SET = 0x2, BM_DONT_SET = 0x2,
BM_DONT_TEST = 0x4, BM_DONT_TEST = 0x4,
/* so we can mark it locked for bulk operation,
* and still allow all non-bulk operations */
BM_IS_LOCKED = 0x8,
/* (test bit, count bit) allowed (common case) */ /* (test bit, count bit) allowed (common case) */
BM_LOCKED_TEST_ALLOWED = 0x3, BM_LOCKED_TEST_ALLOWED = BM_DONT_CLEAR | BM_DONT_SET | BM_IS_LOCKED,
/* testing bits, as well as setting new bits allowed, but clearing bits /* testing bits, as well as setting new bits allowed, but clearing bits
* would be unexpected. Used during bitmap receive. Setting new bits * would be unexpected. Used during bitmap receive. Setting new bits
* requires sending of "out-of-sync" information, though. */ * requires sending of "out-of-sync" information, though. */
BM_LOCKED_SET_ALLOWED = 0x1, BM_LOCKED_SET_ALLOWED = BM_DONT_CLEAR | BM_IS_LOCKED,
/* clear is not expected while bitmap is locked for bulk operation */ /* for drbd_bm_write_copy_pages, everything is allowed,
* only concurrent bulk operations are locked out. */
BM_LOCKED_CHANGE_ALLOWED = BM_IS_LOCKED,
}; };
/* TODO sort members for performance
* MAYBE group them further */
/* THINK maybe we actually want to use the default "event/%s" worker threads
* or similar in linux 2.6, which uses per cpu data and threads.
*/
struct drbd_work_queue { struct drbd_work_queue {
struct list_head q; struct list_head q;
struct semaphore s; /* producers up it, worker down()s it */ struct semaphore s; /* producers up it, worker down()s it */
...@@ -938,8 +937,7 @@ struct drbd_backing_dev { ...@@ -938,8 +937,7 @@ struct drbd_backing_dev {
}; };
struct drbd_md_io { struct drbd_md_io {
struct drbd_conf *mdev; unsigned int done;
struct completion event;
int error; int error;
}; };
...@@ -1022,6 +1020,7 @@ struct drbd_conf { ...@@ -1022,6 +1020,7 @@ struct drbd_conf {
struct drbd_tl_epoch *newest_tle; struct drbd_tl_epoch *newest_tle;
struct drbd_tl_epoch *oldest_tle; struct drbd_tl_epoch *oldest_tle;
struct list_head out_of_sequence_requests; struct list_head out_of_sequence_requests;
struct list_head barrier_acked_requests;
struct hlist_head *tl_hash; struct hlist_head *tl_hash;
unsigned int tl_hash_s; unsigned int tl_hash_s;
...@@ -1056,6 +1055,8 @@ struct drbd_conf { ...@@ -1056,6 +1055,8 @@ struct drbd_conf {
struct crypto_hash *csums_tfm; struct crypto_hash *csums_tfm;
struct crypto_hash *verify_tfm; struct crypto_hash *verify_tfm;
unsigned long last_reattach_jif;
unsigned long last_reconnect_jif;
struct drbd_thread receiver; struct drbd_thread receiver;
struct drbd_thread worker; struct drbd_thread worker;
struct drbd_thread asender; struct drbd_thread asender;
...@@ -1094,7 +1095,8 @@ struct drbd_conf { ...@@ -1094,7 +1095,8 @@ struct drbd_conf {
wait_queue_head_t ee_wait; wait_queue_head_t ee_wait;
struct page *md_io_page; /* one page buffer for md_io */ struct page *md_io_page; /* one page buffer for md_io */
struct page *md_io_tmpp; /* for logical_block_size != 512 */ struct page *md_io_tmpp; /* for logical_block_size != 512 */
struct mutex md_io_mutex; /* protects the md_io_buffer */ struct drbd_md_io md_io;
atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */
spinlock_t al_lock; spinlock_t al_lock;
wait_queue_head_t al_wait; wait_queue_head_t al_wait;
struct lru_cache *act_log; /* activity log */ struct lru_cache *act_log; /* activity log */
...@@ -1228,8 +1230,8 @@ extern int drbd_send_uuids(struct drbd_conf *mdev); ...@@ -1228,8 +1230,8 @@ extern int drbd_send_uuids(struct drbd_conf *mdev);
extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev); extern int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev);
extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev); extern int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev);
extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags); extern int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags);
extern int _drbd_send_state(struct drbd_conf *mdev); extern int drbd_send_state(struct drbd_conf *mdev, union drbd_state s);
extern int drbd_send_state(struct drbd_conf *mdev); extern int drbd_send_current_state(struct drbd_conf *mdev);
extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, extern int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
enum drbd_packets cmd, struct p_header80 *h, enum drbd_packets cmd, struct p_header80 *h,
size_t size, unsigned msg_flags); size_t size, unsigned msg_flags);
...@@ -1461,6 +1463,7 @@ extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr); ...@@ -1461,6 +1463,7 @@ extern int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr);
extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local); extern int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local);
extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local); extern int drbd_bm_read(struct drbd_conf *mdev) __must_hold(local);
extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local); extern int drbd_bm_write(struct drbd_conf *mdev) __must_hold(local);
extern int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local);
extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
unsigned long al_enr); unsigned long al_enr);
extern size_t drbd_bm_words(struct drbd_conf *mdev); extern size_t drbd_bm_words(struct drbd_conf *mdev);
...@@ -1493,11 +1496,38 @@ extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */ ...@@ -1493,11 +1496,38 @@ extern struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
extern mempool_t *drbd_request_mempool; extern mempool_t *drbd_request_mempool;
extern mempool_t *drbd_ee_mempool; extern mempool_t *drbd_ee_mempool;
extern struct page *drbd_pp_pool; /* drbd's page pool */ /* drbd's page pool, used to buffer data received from the peer,
* or data requested by the peer.
*
* This does not have an emergency reserve.
*
* When allocating from this pool, it first takes pages from the pool.
* Only if the pool is depleted will try to allocate from the system.
*
* The assumption is that pages taken from this pool will be processed,
* and given back, "quickly", and then can be recycled, so we can avoid
* frequent calls to alloc_page(), and still will be able to make progress even
* under memory pressure.
*/
extern struct page *drbd_pp_pool;
extern spinlock_t drbd_pp_lock; extern spinlock_t drbd_pp_lock;
extern int drbd_pp_vacant; extern int drbd_pp_vacant;
extern wait_queue_head_t drbd_pp_wait; extern wait_queue_head_t drbd_pp_wait;
/* We also need a standard (emergency-reserve backed) page pool
* for meta data IO (activity log, bitmap).
* We can keep it global, as long as it is used as "N pages at a time".
* 128 should be plenty, currently we probably can get away with as few as 1.
*/
#define DRBD_MIN_POOL_PAGES 128
extern mempool_t *drbd_md_io_page_pool;
/* We also need to make sure we get a bio
* when we need it for housekeeping purposes */
extern struct bio_set *drbd_md_io_bio_set;
/* to allocate from that set */
extern struct bio *bio_alloc_drbd(gfp_t gfp_mask);
extern rwlock_t global_state_lock; extern rwlock_t global_state_lock;
extern struct drbd_conf *drbd_new_device(unsigned int minor); extern struct drbd_conf *drbd_new_device(unsigned int minor);
...@@ -1536,8 +1566,12 @@ extern void resume_next_sg(struct drbd_conf *mdev); ...@@ -1536,8 +1566,12 @@ extern void resume_next_sg(struct drbd_conf *mdev);
extern void suspend_other_sg(struct drbd_conf *mdev); extern void suspend_other_sg(struct drbd_conf *mdev);
extern int drbd_resync_finished(struct drbd_conf *mdev); extern int drbd_resync_finished(struct drbd_conf *mdev);
/* maybe rather drbd_main.c ? */ /* maybe rather drbd_main.c ? */
extern void *drbd_md_get_buffer(struct drbd_conf *mdev);
extern void drbd_md_put_buffer(struct drbd_conf *mdev);
extern int drbd_md_sync_page_io(struct drbd_conf *mdev, extern int drbd_md_sync_page_io(struct drbd_conf *mdev,
struct drbd_backing_dev *bdev, sector_t sector, int rw); struct drbd_backing_dev *bdev, sector_t sector, int rw);
extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
unsigned int *done);
extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int); extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int);
extern void drbd_rs_controller_reset(struct drbd_conf *mdev); extern void drbd_rs_controller_reset(struct drbd_conf *mdev);
...@@ -1754,19 +1788,6 @@ static inline struct page *page_chain_next(struct page *page) ...@@ -1754,19 +1788,6 @@ static inline struct page *page_chain_next(struct page *page)
#define page_chain_for_each_safe(page, n) \ #define page_chain_for_each_safe(page, n) \
for (; page && ({ n = page_chain_next(page); 1; }); page = n) for (; page && ({ n = page_chain_next(page); 1; }); page = n)
static inline int drbd_bio_has_active_page(struct bio *bio)
{
struct bio_vec *bvec;
int i;
__bio_for_each_segment(bvec, bio, i, 0) {
if (page_count(bvec->bv_page) > 1)
return 1;
}
return 0;
}
static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e) static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e)
{ {
struct page *page = e->pages; struct page *page = e->pages;
...@@ -1777,7 +1798,6 @@ static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e) ...@@ -1777,7 +1798,6 @@ static inline int drbd_ee_has_active_page(struct drbd_epoch_entry *e)
return 0; return 0;
} }
static inline void drbd_state_lock(struct drbd_conf *mdev) static inline void drbd_state_lock(struct drbd_conf *mdev)
{ {
wait_event(mdev->misc_wait, wait_event(mdev->misc_wait,
...@@ -2230,7 +2250,7 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, ...@@ -2230,7 +2250,7 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev,
* Note: currently we don't support such large bitmaps on 32bit * Note: currently we don't support such large bitmaps on 32bit
* arch anyways, but no harm done to be prepared for it here. * arch anyways, but no harm done to be prepared for it here.
*/ */
unsigned int shift = mdev->rs_total >= (1ULL << 32) ? 16 : 10; unsigned int shift = mdev->rs_total > UINT_MAX ? 16 : 10;
unsigned long left = *bits_left >> shift; unsigned long left = *bits_left >> shift;
unsigned long total = 1UL + (mdev->rs_total >> shift); unsigned long total = 1UL + (mdev->rs_total >> shift);
unsigned long tmp = 1000UL - left * 1000UL/total; unsigned long tmp = 1000UL - left * 1000UL/total;
...@@ -2306,12 +2326,12 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev) ...@@ -2306,12 +2326,12 @@ static inline int drbd_state_is_stable(struct drbd_conf *mdev)
case D_OUTDATED: case D_OUTDATED:
case D_CONSISTENT: case D_CONSISTENT:
case D_UP_TO_DATE: case D_UP_TO_DATE:
case D_FAILED:
/* disk state is stable as well. */ /* disk state is stable as well. */
break; break;
/* no new io accepted during tansitional states */ /* no new io accepted during tansitional states */
case D_ATTACHING: case D_ATTACHING:
case D_FAILED:
case D_NEGOTIATING: case D_NEGOTIATING:
case D_UNKNOWN: case D_UNKNOWN:
case D_MASK: case D_MASK:
......
This diff is collapsed.
...@@ -289,7 +289,7 @@ static int _try_outdate_peer_async(void *data) ...@@ -289,7 +289,7 @@ static int _try_outdate_peer_async(void *data)
*/ */
spin_lock_irq(&mdev->req_lock); spin_lock_irq(&mdev->req_lock);
ns = mdev->state; ns = mdev->state;
if (ns.conn < C_WF_REPORT_PARAMS) { if (ns.conn < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &mdev->flags)) {
ns.pdsk = nps; ns.pdsk = nps;
_drbd_set_state(mdev, ns, CS_VERBOSE, NULL); _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
} }
...@@ -432,7 +432,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) ...@@ -432,7 +432,7 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
/* if this was forced, we should consider sync */ /* if this was forced, we should consider sync */
if (forced) if (forced)
drbd_send_uuids(mdev); drbd_send_uuids(mdev);
drbd_send_state(mdev); drbd_send_current_state(mdev);
} }
drbd_md_sync(mdev); drbd_md_sync(mdev);
...@@ -845,9 +845,10 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev) ...@@ -845,9 +845,10 @@ void drbd_reconsider_max_bio_size(struct drbd_conf *mdev)
Because new from 8.3.8 onwards the peer can use multiple Because new from 8.3.8 onwards the peer can use multiple
BIOs for a single peer_request */ BIOs for a single peer_request */
if (mdev->state.conn >= C_CONNECTED) { if (mdev->state.conn >= C_CONNECTED) {
if (mdev->agreed_pro_version < 94) if (mdev->agreed_pro_version < 94) {
peer = mdev->peer_max_bio_size; peer = min_t(int, mdev->peer_max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
else if (mdev->agreed_pro_version == 94) /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
} else if (mdev->agreed_pro_version == 94)
peer = DRBD_MAX_SIZE_H80_PACKET; peer = DRBD_MAX_SIZE_H80_PACKET;
else /* drbd 8.3.8 onwards */ else /* drbd 8.3.8 onwards */
peer = DRBD_MAX_BIO_SIZE; peer = DRBD_MAX_BIO_SIZE;
...@@ -1032,7 +1033,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp ...@@ -1032,7 +1033,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
dev_err(DEV, "max capacity %llu smaller than disk size %llu\n", dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
(unsigned long long) drbd_get_max_capacity(nbc), (unsigned long long) drbd_get_max_capacity(nbc),
(unsigned long long) nbc->dc.disk_size); (unsigned long long) nbc->dc.disk_size);
retcode = ERR_DISK_TO_SMALL; retcode = ERR_DISK_TOO_SMALL;
goto fail; goto fail;
} }
...@@ -1046,7 +1047,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp ...@@ -1046,7 +1047,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
} }
if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) { if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
retcode = ERR_MD_DISK_TO_SMALL; retcode = ERR_MD_DISK_TOO_SMALL;
dev_warn(DEV, "refusing attach: md-device too small, " dev_warn(DEV, "refusing attach: md-device too small, "
"at least %llu sectors needed for this meta-disk type\n", "at least %llu sectors needed for this meta-disk type\n",
(unsigned long long) min_md_device_sectors); (unsigned long long) min_md_device_sectors);
...@@ -1057,7 +1058,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp ...@@ -1057,7 +1058,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
* (we may currently be R_PRIMARY with no local disk...) */ * (we may currently be R_PRIMARY with no local disk...) */
if (drbd_get_max_capacity(nbc) < if (drbd_get_max_capacity(nbc) <
drbd_get_capacity(mdev->this_bdev)) { drbd_get_capacity(mdev->this_bdev)) {
retcode = ERR_DISK_TO_SMALL; retcode = ERR_DISK_TOO_SMALL;
goto fail; goto fail;
} }
...@@ -1138,7 +1139,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp ...@@ -1138,7 +1139,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
if (drbd_md_test_flag(nbc, MDF_CONSISTENT) && if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) { drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) {
dev_warn(DEV, "refusing to truncate a consistent device\n"); dev_warn(DEV, "refusing to truncate a consistent device\n");
retcode = ERR_DISK_TO_SMALL; retcode = ERR_DISK_TOO_SMALL;
goto force_diskless_dec; goto force_diskless_dec;
} }
...@@ -1336,17 +1337,34 @@ static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, ...@@ -1336,17 +1337,34 @@ static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
{ {
enum drbd_ret_code retcode; enum drbd_ret_code retcode;
int ret; int ret;
struct detach dt = {};
if (!detach_from_tags(mdev, nlp->tag_list, &dt)) {
reply->ret_code = ERR_MANDATORY_TAG;
goto out;
}
if (dt.detach_force) {
drbd_force_state(mdev, NS(disk, D_FAILED));
reply->ret_code = SS_SUCCESS;
goto out;
}
drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */ drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
drbd_md_get_buffer(mdev); /* make sure there is no in-flight meta-data IO */
retcode = drbd_request_state(mdev, NS(disk, D_FAILED)); retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
drbd_md_put_buffer(mdev);
/* D_FAILED will transition to DISKLESS. */ /* D_FAILED will transition to DISKLESS. */
ret = wait_event_interruptible(mdev->misc_wait, ret = wait_event_interruptible(mdev->misc_wait,
mdev->state.disk != D_FAILED); mdev->state.disk != D_FAILED);
drbd_resume_io(mdev); drbd_resume_io(mdev);
if ((int)retcode == (int)SS_IS_DISKLESS) if ((int)retcode == (int)SS_IS_DISKLESS)
retcode = SS_NOTHING_TO_DO; retcode = SS_NOTHING_TO_DO;
if (ret) if (ret)
retcode = ERR_INTR; retcode = ERR_INTR;
reply->ret_code = retcode; reply->ret_code = retcode;
out:
return 0; return 0;
} }
...@@ -1711,7 +1729,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, ...@@ -1711,7 +1729,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
if (rs.no_resync && mdev->agreed_pro_version < 93) { if (rs.no_resync && mdev->agreed_pro_version < 93) {
retcode = ERR_NEED_APV_93; retcode = ERR_NEED_APV_93;
goto fail; goto fail_ldev;
} }
if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
...@@ -1738,6 +1756,10 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, ...@@ -1738,6 +1756,10 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
fail: fail:
reply->ret_code = retcode; reply->ret_code = retcode;
return 0; return 0;
fail_ldev:
put_ldev(mdev);
goto fail;
} }
static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
...@@ -1941,6 +1963,7 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl ...@@ -1941,6 +1963,7 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
/* If there is still bitmap IO pending, probably because of a previous /* If there is still bitmap IO pending, probably because of a previous
* resync just being finished, wait for it before requesting a new resync. */ * resync just being finished, wait for it before requesting a new resync. */
drbd_suspend_io(mdev);
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED); retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
...@@ -1959,6 +1982,7 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl ...@@ -1959,6 +1982,7 @@ static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T)); retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
} }
drbd_resume_io(mdev);
reply->ret_code = retcode; reply->ret_code = retcode;
return 0; return 0;
...@@ -1980,6 +2004,7 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re ...@@ -1980,6 +2004,7 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re
/* If there is still bitmap IO pending, probably because of a previous /* If there is still bitmap IO pending, probably because of a previous
* resync just being finished, wait for it before requesting a new resync. */ * resync just being finished, wait for it before requesting a new resync. */
drbd_suspend_io(mdev);
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED); retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
...@@ -1998,6 +2023,7 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re ...@@ -1998,6 +2023,7 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re
} else } else
retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S)); retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
} }
drbd_resume_io(mdev);
reply->ret_code = retcode; reply->ret_code = retcode;
return 0; return 0;
...@@ -2170,11 +2196,13 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, ...@@ -2170,11 +2196,13 @@ static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
/* If there is still bitmap IO pending, e.g. previous resync or verify /* If there is still bitmap IO pending, e.g. previous resync or verify
* just being finished, wait for it before requesting a new resync. */ * just being finished, wait for it before requesting a new resync. */
drbd_suspend_io(mdev);
wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags)); wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
/* w_make_ov_request expects position to be aligned */ /* w_make_ov_request expects position to be aligned */
mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT; mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S)); reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
drbd_resume_io(mdev);
return 0; return 0;
} }
......
...@@ -52,7 +52,7 @@ void seq_printf_with_thousands_grouping(struct seq_file *seq, long v) ...@@ -52,7 +52,7 @@ void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
if (unlikely(v >= 1000000)) { if (unlikely(v >= 1000000)) {
/* cool: > GiByte/s */ /* cool: > GiByte/s */
seq_printf(seq, "%ld,", v / 1000000); seq_printf(seq, "%ld,", v / 1000000);
v /= 1000000; v %= 1000000;
seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000); seq_printf(seq, "%03ld,%03ld", v/1000, v % 1000);
} else if (likely(v >= 1000)) } else if (likely(v >= 1000))
seq_printf(seq, "%ld,%03ld", v/1000, v % 1000); seq_printf(seq, "%ld,%03ld", v/1000, v % 1000);
......
...@@ -466,6 +466,7 @@ static int drbd_accept(struct drbd_conf *mdev, const char **what, ...@@ -466,6 +466,7 @@ static int drbd_accept(struct drbd_conf *mdev, const char **what,
goto out; goto out;
} }
(*newsock)->ops = sock->ops; (*newsock)->ops = sock->ops;
__module_get((*newsock)->ops->owner);
out: out:
return err; return err;
...@@ -750,6 +751,7 @@ static int drbd_connect(struct drbd_conf *mdev) ...@@ -750,6 +751,7 @@ static int drbd_connect(struct drbd_conf *mdev)
{ {
struct socket *s, *sock, *msock; struct socket *s, *sock, *msock;
int try, h, ok; int try, h, ok;
enum drbd_state_rv rv;
D_ASSERT(!mdev->data.socket); D_ASSERT(!mdev->data.socket);
...@@ -888,25 +890,32 @@ static int drbd_connect(struct drbd_conf *mdev) ...@@ -888,25 +890,32 @@ static int drbd_connect(struct drbd_conf *mdev)
} }
} }
if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS)
return 0;
sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
atomic_set(&mdev->packet_seq, 0); atomic_set(&mdev->packet_seq, 0);
mdev->peer_seq = 0; mdev->peer_seq = 0;
drbd_thread_start(&mdev->asender);
if (drbd_send_protocol(mdev) == -1) if (drbd_send_protocol(mdev) == -1)
return -1; return -1;
set_bit(STATE_SENT, &mdev->flags);
drbd_send_sync_param(mdev, &mdev->sync_conf); drbd_send_sync_param(mdev, &mdev->sync_conf);
drbd_send_sizes(mdev, 0, 0); drbd_send_sizes(mdev, 0, 0);
drbd_send_uuids(mdev); drbd_send_uuids(mdev);
drbd_send_state(mdev); drbd_send_current_state(mdev);
clear_bit(USE_DEGR_WFC_T, &mdev->flags); clear_bit(USE_DEGR_WFC_T, &mdev->flags);
clear_bit(RESIZE_PENDING, &mdev->flags); clear_bit(RESIZE_PENDING, &mdev->flags);
spin_lock_irq(&mdev->req_lock);
rv = _drbd_set_state(_NS(mdev, conn, C_WF_REPORT_PARAMS), CS_VERBOSE, NULL);
if (mdev->state.conn != C_WF_REPORT_PARAMS)
clear_bit(STATE_SENT, &mdev->flags);
spin_unlock_irq(&mdev->req_lock);
if (rv < SS_SUCCESS)
return 0;
drbd_thread_start(&mdev->asender);
mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */ mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
return 1; return 1;
...@@ -957,7 +966,7 @@ static void drbd_flush(struct drbd_conf *mdev) ...@@ -957,7 +966,7 @@ static void drbd_flush(struct drbd_conf *mdev)
rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL, rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
NULL); NULL);
if (rv) { if (rv) {
dev_err(DEV, "local disk flush failed with status %d\n", rv); dev_info(DEV, "local disk flush failed with status %d\n", rv);
/* would rather check on EOPNOTSUPP, but that is not reliable. /* would rather check on EOPNOTSUPP, but that is not reliable.
* don't try again for ANY return value != 0 * don't try again for ANY return value != 0
* if (rv == -EOPNOTSUPP) */ * if (rv == -EOPNOTSUPP) */
...@@ -1001,12 +1010,13 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev, ...@@ -1001,12 +1010,13 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
if (epoch_size != 0 && if (epoch_size != 0 &&
atomic_read(&epoch->active) == 0 && atomic_read(&epoch->active) == 0 &&
test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) { (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
if (!(ev & EV_CLEANUP)) { if (!(ev & EV_CLEANUP)) {
spin_unlock(&mdev->epoch_lock); spin_unlock(&mdev->epoch_lock);
drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size); drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
spin_lock(&mdev->epoch_lock); spin_lock(&mdev->epoch_lock);
} }
if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
dec_unacked(mdev); dec_unacked(mdev);
if (mdev->current_epoch != epoch) { if (mdev->current_epoch != epoch) {
...@@ -1096,7 +1106,11 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, ...@@ -1096,7 +1106,11 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e,
/* In most cases, we will only need one bio. But in case the lower /* In most cases, we will only need one bio. But in case the lower
* level restrictions happen to be different at this offset on this * level restrictions happen to be different at this offset on this
* side than those of the sending peer, we may need to submit the * side than those of the sending peer, we may need to submit the
* request in more than one bio. */ * request in more than one bio.
*
* Plain bio_alloc is good enough here, this is no DRBD internally
* generated bio, but a bio allocated on behalf of the peer.
*/
next_bio: next_bio:
bio = bio_alloc(GFP_NOIO, nr_pages); bio = bio_alloc(GFP_NOIO, nr_pages);
if (!bio) { if (!bio) {
...@@ -1583,6 +1597,24 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u ...@@ -1583,6 +1597,24 @@ static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int u
return ok; return ok;
} }
static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_epoch_entry *data_e)
{
struct drbd_epoch_entry *rs_e;
bool rv = 0;
spin_lock_irq(&mdev->req_lock);
list_for_each_entry(rs_e, &mdev->sync_ee, w.list) {
if (overlaps(data_e->sector, data_e->size, rs_e->sector, rs_e->size)) {
rv = 1;
break;
}
}
spin_unlock_irq(&mdev->req_lock);
return rv;
}
/* Called from receive_Data. /* Called from receive_Data.
* Synchronize packets on sock with packets on msock. * Synchronize packets on sock with packets on msock.
* *
...@@ -1826,6 +1858,9 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned ...@@ -1826,6 +1858,9 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
list_add(&e->w.list, &mdev->active_ee); list_add(&e->w.list, &mdev->active_ee);
spin_unlock_irq(&mdev->req_lock); spin_unlock_irq(&mdev->req_lock);
if (mdev->state.conn == C_SYNC_TARGET)
wait_event(mdev->ee_wait, !overlapping_resync_write(mdev, e));
switch (mdev->net_conf->wire_protocol) { switch (mdev->net_conf->wire_protocol) {
case DRBD_PROT_C: case DRBD_PROT_C:
inc_unacked(mdev); inc_unacked(mdev);
...@@ -2420,7 +2455,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l ...@@ -2420,7 +2455,7 @@ static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(l
mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START]; mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1]; mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
dev_info(DEV, "Did not got last syncUUID packet, corrected:\n"); dev_info(DEV, "Lost last syncUUID packet, corrected:\n");
drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]); drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
return -1; return -1;
...@@ -2806,9 +2841,9 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi ...@@ -2806,9 +2841,9 @@ static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsi
if (apv >= 88) { if (apv >= 88) {
if (apv == 88) { if (apv == 88) {
if (data_size > SHARED_SECRET_MAX) { if (data_size > SHARED_SECRET_MAX || data_size == 0) {
dev_err(DEV, "verify-alg too long, " dev_err(DEV, "verify-alg of wrong size, "
"peer wants %u, accepting only %u byte\n", "peer wants %u, accepting only up to %u byte\n",
data_size, SHARED_SECRET_MAX); data_size, SHARED_SECRET_MAX);
return false; return false;
} }
...@@ -3168,9 +3203,20 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned ...@@ -3168,9 +3203,20 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
os = ns = mdev->state; os = ns = mdev->state;
spin_unlock_irq(&mdev->req_lock); spin_unlock_irq(&mdev->req_lock);
/* peer says his disk is uptodate, while we think it is inconsistent, /* If some other part of the code (asender thread, timeout)
* and this happens while we think we have a sync going on. */ * already decided to close the connection again,
if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE && * we must not "re-establish" it here. */
if (os.conn <= C_TEAR_DOWN)
return false;
/* If this is the "end of sync" confirmation, usually the peer disk
* transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
* set) resync started in PausedSyncT, or if the timing of pause-/
* unpause-sync events has been "just right", the peer disk may
* transition from D_CONSISTENT to D_UP_TO_DATE as well.
*/
if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
real_peer_disk == D_UP_TO_DATE &&
os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) { os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
/* If we are (becoming) SyncSource, but peer is still in sync /* If we are (becoming) SyncSource, but peer is still in sync
* preparation, ignore its uptodate-ness to avoid flapping, it * preparation, ignore its uptodate-ness to avoid flapping, it
...@@ -3288,7 +3334,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned ...@@ -3288,7 +3334,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
/* Nowadays only used when forcing a node into primary role and /* Nowadays only used when forcing a node into primary role and
setting its disk to UpToDate with that */ setting its disk to UpToDate with that */
drbd_send_uuids(mdev); drbd_send_uuids(mdev);
drbd_send_state(mdev); drbd_send_current_state(mdev);
} }
} }
...@@ -3776,6 +3822,13 @@ static void drbd_disconnect(struct drbd_conf *mdev) ...@@ -3776,6 +3822,13 @@ static void drbd_disconnect(struct drbd_conf *mdev)
if (mdev->state.conn == C_STANDALONE) if (mdev->state.conn == C_STANDALONE)
return; return;
/* We are about to start the cleanup after connection loss.
* Make sure drbd_make_request knows about that.
* Usually we should be in some network failure state already,
* but just in case we are not, we fix it up here.
*/
drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE));
/* asender does not clean up anything. it must not interfere, either */ /* asender does not clean up anything. it must not interfere, either */
drbd_thread_stop(&mdev->asender); drbd_thread_stop(&mdev->asender);
drbd_free_sock(mdev); drbd_free_sock(mdev);
...@@ -3803,8 +3856,6 @@ static void drbd_disconnect(struct drbd_conf *mdev) ...@@ -3803,8 +3856,6 @@ static void drbd_disconnect(struct drbd_conf *mdev)
atomic_set(&mdev->rs_pending_cnt, 0); atomic_set(&mdev->rs_pending_cnt, 0);
wake_up(&mdev->misc_wait); wake_up(&mdev->misc_wait);
del_timer(&mdev->request_timer);
/* make sure syncer is stopped and w_resume_next_sg queued */ /* make sure syncer is stopped and w_resume_next_sg queued */
del_timer_sync(&mdev->resync_timer); del_timer_sync(&mdev->resync_timer);
resync_timer_fn((unsigned long)mdev); resync_timer_fn((unsigned long)mdev);
...@@ -4433,7 +4484,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) ...@@ -4433,7 +4484,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h)
if (mdev->state.conn == C_AHEAD && if (mdev->state.conn == C_AHEAD &&
atomic_read(&mdev->ap_in_flight) == 0 && atomic_read(&mdev->ap_in_flight) == 0 &&
!test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) { !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) {
mdev->start_resync_timer.expires = jiffies + HZ; mdev->start_resync_timer.expires = jiffies + HZ;
add_timer(&mdev->start_resync_timer); add_timer(&mdev->start_resync_timer);
} }
......
This diff is collapsed.
...@@ -105,6 +105,7 @@ enum drbd_req_event { ...@@ -105,6 +105,7 @@ enum drbd_req_event {
read_completed_with_error, read_completed_with_error,
read_ahead_completed_with_error, read_ahead_completed_with_error,
write_completed_with_error, write_completed_with_error,
abort_disk_io,
completed_ok, completed_ok,
resend, resend,
fail_frozen_disk_io, fail_frozen_disk_io,
...@@ -118,18 +119,21 @@ enum drbd_req_event { ...@@ -118,18 +119,21 @@ enum drbd_req_event {
* same time, so we should hold the request lock anyways. * same time, so we should hold the request lock anyways.
*/ */
enum drbd_req_state_bits { enum drbd_req_state_bits {
/* 210 /* 3210
* 000: no local possible * 0000: no local possible
* 001: to be submitted * 0001: to be submitted
* UNUSED, we could map: 011: submitted, completion still pending * UNUSED, we could map: 011: submitted, completion still pending
* 110: completed ok * 0110: completed ok
* 010: completed with error * 0010: completed with error
* 1001: Aborted (before completion)
* 1x10: Aborted and completed -> free
*/ */
__RQ_LOCAL_PENDING, __RQ_LOCAL_PENDING,
__RQ_LOCAL_COMPLETED, __RQ_LOCAL_COMPLETED,
__RQ_LOCAL_OK, __RQ_LOCAL_OK,
__RQ_LOCAL_ABORTED,
/* 76543 /* 87654
* 00000: no network possible * 00000: no network possible
* 00001: to be send * 00001: to be send
* 00011: to be send, on worker queue * 00011: to be send, on worker queue
...@@ -199,8 +203,9 @@ enum drbd_req_state_bits { ...@@ -199,8 +203,9 @@ enum drbd_req_state_bits {
#define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING) #define RQ_LOCAL_PENDING (1UL << __RQ_LOCAL_PENDING)
#define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED) #define RQ_LOCAL_COMPLETED (1UL << __RQ_LOCAL_COMPLETED)
#define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK) #define RQ_LOCAL_OK (1UL << __RQ_LOCAL_OK)
#define RQ_LOCAL_ABORTED (1UL << __RQ_LOCAL_ABORTED)
#define RQ_LOCAL_MASK ((RQ_LOCAL_OK << 1)-1) /* 0x07 */ #define RQ_LOCAL_MASK ((RQ_LOCAL_ABORTED << 1)-1)
#define RQ_NET_PENDING (1UL << __RQ_NET_PENDING) #define RQ_NET_PENDING (1UL << __RQ_NET_PENDING)
#define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED) #define RQ_NET_QUEUED (1UL << __RQ_NET_QUEUED)
......
...@@ -70,11 +70,29 @@ rwlock_t global_state_lock; ...@@ -70,11 +70,29 @@ rwlock_t global_state_lock;
void drbd_md_io_complete(struct bio *bio, int error) void drbd_md_io_complete(struct bio *bio, int error)
{ {
struct drbd_md_io *md_io; struct drbd_md_io *md_io;
struct drbd_conf *mdev;
md_io = (struct drbd_md_io *)bio->bi_private; md_io = (struct drbd_md_io *)bio->bi_private;
mdev = container_of(md_io, struct drbd_conf, md_io);
md_io->error = error; md_io->error = error;
complete(&md_io->event); /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
* to timeout on the lower level device, and eventually detach from it.
* If this io completion runs after that timeout expired, this
* drbd_md_put_buffer() may allow us to finally try and re-attach.
* During normal operation, this only puts that extra reference
* down to 1 again.
* Make sure we first drop the reference, and only then signal
* completion, or we may (in drbd_al_read_log()) cycle so fast into the
* next drbd_md_sync_page_io(), that we trigger the
* ASSERT(atomic_read(&mdev->md_io_in_use) == 1) there.
*/
drbd_md_put_buffer(mdev);
md_io->done = 1;
wake_up(&mdev->misc_wait);
bio_put(bio);
put_ldev(mdev);
} }
/* reads on behalf of the partner, /* reads on behalf of the partner,
...@@ -226,6 +244,7 @@ void drbd_endio_pri(struct bio *bio, int error) ...@@ -226,6 +244,7 @@ void drbd_endio_pri(struct bio *bio, int error)
spin_lock_irqsave(&mdev->req_lock, flags); spin_lock_irqsave(&mdev->req_lock, flags);
__req_mod(req, what, &m); __req_mod(req, what, &m);
spin_unlock_irqrestore(&mdev->req_lock, flags); spin_unlock_irqrestore(&mdev->req_lock, flags);
put_ldev(mdev);
if (m.bio) if (m.bio)
complete_master_bio(mdev, &m); complete_master_bio(mdev, &m);
...@@ -290,7 +309,7 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio * ...@@ -290,7 +309,7 @@ void drbd_csum_bio(struct drbd_conf *mdev, struct crypto_hash *tfm, struct bio *
sg_init_table(&sg, 1); sg_init_table(&sg, 1);
crypto_hash_init(&desc); crypto_hash_init(&desc);
__bio_for_each_segment(bvec, bio, i, 0) { bio_for_each_segment(bvec, bio, i) {
sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset); sg_set_page(&sg, bvec->bv_page, bvec->bv_len, bvec->bv_offset);
crypto_hash_update(&desc, &sg, sg.length); crypto_hash_update(&desc, &sg, sg.length);
} }
...@@ -728,7 +747,7 @@ int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel) ...@@ -728,7 +747,7 @@ int w_start_resync(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
} }
drbd_start_resync(mdev, C_SYNC_SOURCE); drbd_start_resync(mdev, C_SYNC_SOURCE);
clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags); clear_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags);
return 1; return 1;
} }
...@@ -1519,14 +1538,14 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side) ...@@ -1519,14 +1538,14 @@ void drbd_start_resync(struct drbd_conf *mdev, enum drbd_conns side)
} }
drbd_state_lock(mdev); drbd_state_lock(mdev);
write_lock_irq(&global_state_lock);
if (!get_ldev_if_state(mdev, D_NEGOTIATING)) { if (!get_ldev_if_state(mdev, D_NEGOTIATING)) {
write_unlock_irq(&global_state_lock);
drbd_state_unlock(mdev); drbd_state_unlock(mdev);
return; return;
} }
write_lock_irq(&global_state_lock); ns.i = mdev->state.i;
ns = mdev->state;
ns.aftr_isp = !_drbd_may_sync_now(mdev); ns.aftr_isp = !_drbd_may_sync_now(mdev);
......
...@@ -53,7 +53,7 @@ ...@@ -53,7 +53,7 @@
extern const char *drbd_buildtag(void); extern const char *drbd_buildtag(void);
#define REL_VERSION "8.3.11" #define REL_VERSION "8.3.13"
#define API_VERSION 88 #define API_VERSION 88
#define PRO_VERSION_MIN 86 #define PRO_VERSION_MIN 86
#define PRO_VERSION_MAX 96 #define PRO_VERSION_MAX 96
...@@ -112,8 +112,8 @@ enum drbd_ret_code { ...@@ -112,8 +112,8 @@ enum drbd_ret_code {
ERR_OPEN_MD_DISK = 105, ERR_OPEN_MD_DISK = 105,
ERR_DISK_NOT_BDEV = 107, ERR_DISK_NOT_BDEV = 107,
ERR_MD_NOT_BDEV = 108, ERR_MD_NOT_BDEV = 108,
ERR_DISK_TO_SMALL = 111, ERR_DISK_TOO_SMALL = 111,
ERR_MD_DISK_TO_SMALL = 112, ERR_MD_DISK_TOO_SMALL = 112,
ERR_BDCLAIM_DISK = 114, ERR_BDCLAIM_DISK = 114,
ERR_BDCLAIM_MD_DISK = 115, ERR_BDCLAIM_MD_DISK = 115,
ERR_MD_IDX_INVALID = 116, ERR_MD_IDX_INVALID = 116,
......
...@@ -48,6 +48,11 @@ ...@@ -48,6 +48,11 @@
#define DRBD_TIMEOUT_MAX 600 #define DRBD_TIMEOUT_MAX 600
#define DRBD_TIMEOUT_DEF 60 /* 6 seconds */ #define DRBD_TIMEOUT_DEF 60 /* 6 seconds */
/* If backing disk takes longer than disk_timeout, mark the disk as failed */
#define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */
#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */
#define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */
/* active connection retries when C_WF_CONNECTION */ /* active connection retries when C_WF_CONNECTION */
#define DRBD_CONNECT_INT_MIN 1 #define DRBD_CONNECT_INT_MIN 1
#define DRBD_CONNECT_INT_MAX 120 #define DRBD_CONNECT_INT_MAX 120
...@@ -60,7 +65,7 @@ ...@@ -60,7 +65,7 @@
/* timeout for the ping packets.*/ /* timeout for the ping packets.*/
#define DRBD_PING_TIMEO_MIN 1 #define DRBD_PING_TIMEO_MIN 1
#define DRBD_PING_TIMEO_MAX 100 #define DRBD_PING_TIMEO_MAX 300
#define DRBD_PING_TIMEO_DEF 5 #define DRBD_PING_TIMEO_DEF 5
/* max number of write requests between write barriers */ /* max number of write requests between write barriers */
......
...@@ -31,9 +31,12 @@ NL_PACKET(disk_conf, 3, ...@@ -31,9 +31,12 @@ NL_PACKET(disk_conf, 3,
NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs) NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs)
NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier) NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier)
NL_BIT( 58, T_MAY_IGNORE, no_disk_drain) NL_BIT( 58, T_MAY_IGNORE, no_disk_drain)
NL_INTEGER( 89, T_MAY_IGNORE, disk_timeout)
) )
NL_PACKET(detach, 4, ) NL_PACKET(detach, 4,
NL_BIT( 88, T_MANDATORY, detach_force)
)
NL_PACKET(net_conf, 5, NL_PACKET(net_conf, 5,
NL_STRING( 8, T_MANDATORY, my_addr, 128) NL_STRING( 8, T_MANDATORY, my_addr, 128)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment