Commit 25aa6a7a authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'md-3.6' of git://neil.brown.name/md

Pull additional md update from NeilBrown:
 "This contains a few patches that depend on plugging changes in the
  block layer so needed to wait for those.

  It also contains a Kconfig fix for the new RAID10 support in dm-raid."

* tag 'md-3.6' of git://neil.brown.name/md:
  md/dm-raid: DM_RAID should select MD_RAID10
  md/raid1: submit IO from originating thread instead of md thread.
  raid5: raid5d handle stripe in batch way
  raid5: make_request use batch stripe release
parents c8924234 d9f691c3
......@@ -268,13 +268,14 @@ config DM_MIRROR
needed for live data migration tools such as 'pvmove'.
config DM_RAID
tristate "RAID 1/4/5/6 target"
tristate "RAID 1/4/5/6/10 target"
depends on BLK_DEV_DM
select MD_RAID1
select MD_RAID10
select MD_RAID456
select BLK_DEV_MD
---help---
A dm target that supports RAID1, RAID4, RAID5 and RAID6 mappings
A dm target that supports RAID1, RAID10, RAID4, RAID5 and RAID6 mappings
A RAID-5 set of N drives with a capacity of C MB per drive provides
the capacity of C * (N - 1) MB, and protects against a failure
......
......@@ -1305,7 +1305,7 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
prepare_to_wait(&bitmap->overflow_wait, &__wait,
TASK_UNINTERRUPTIBLE);
spin_unlock_irq(&bitmap->counts.lock);
io_schedule();
schedule();
finish_wait(&bitmap->overflow_wait, &__wait);
continue;
}
......
......@@ -944,6 +944,44 @@ static void alloc_behind_pages(struct bio *bio, struct r1bio *r1_bio)
pr_debug("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
}
struct raid1_plug_cb {
struct blk_plug_cb cb;
struct bio_list pending;
int pending_cnt;
};
static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
{
struct raid1_plug_cb *plug = container_of(cb, struct raid1_plug_cb,
cb);
struct mddev *mddev = plug->cb.data;
struct r1conf *conf = mddev->private;
struct bio *bio;
if (from_schedule) {
spin_lock_irq(&conf->device_lock);
bio_list_merge(&conf->pending_bio_list, &plug->pending);
conf->pending_count += plug->pending_cnt;
spin_unlock_irq(&conf->device_lock);
md_wakeup_thread(mddev->thread);
kfree(plug);
return;
}
/* we aren't scheduling, so we can do the write-out directly. */
bio = bio_list_get(&plug->pending);
bitmap_unplug(mddev->bitmap);
wake_up(&conf->wait_barrier);
while (bio) { /* submit pending writes */
struct bio *next = bio->bi_next;
bio->bi_next = NULL;
generic_make_request(bio);
bio = next;
}
kfree(plug);
}
static void make_request(struct mddev *mddev, struct bio * bio)
{
struct r1conf *conf = mddev->private;
......@@ -957,6 +995,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
struct md_rdev *blocked_rdev;
struct blk_plug_cb *cb;
struct raid1_plug_cb *plug = NULL;
int first_clone;
int sectors_handled;
int max_sectors;
......@@ -1259,11 +1299,22 @@ static void make_request(struct mddev *mddev, struct bio * bio)
mbio->bi_private = r1_bio;
atomic_inc(&r1_bio->remaining);
cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug));
if (cb)
plug = container_of(cb, struct raid1_plug_cb, cb);
else
plug = NULL;
spin_lock_irqsave(&conf->device_lock, flags);
if (plug) {
bio_list_add(&plug->pending, mbio);
plug->pending_cnt++;
} else {
bio_list_add(&conf->pending_bio_list, mbio);
conf->pending_count++;
}
spin_unlock_irqrestore(&conf->device_lock, flags);
if (!mddev_check_plugged(mddev))
if (!plug)
md_wakeup_thread(mddev->thread);
}
/* Mustn't call r1_bio_write_done before this next test,
......
......@@ -484,7 +484,8 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
} else {
if (atomic_read(&sh->count)) {
BUG_ON(!list_empty(&sh->lru)
&& !test_bit(STRIPE_EXPANDING, &sh->state));
&& !test_bit(STRIPE_EXPANDING, &sh->state)
&& !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state));
} else {
if (!test_bit(STRIPE_HANDLE, &sh->state))
atomic_inc(&conf->active_stripes);
......@@ -4010,6 +4011,62 @@ static struct stripe_head *__get_priority_stripe(struct r5conf *conf)
return sh;
}
struct raid5_plug_cb {
struct blk_plug_cb cb;
struct list_head list;
};
static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
{
struct raid5_plug_cb *cb = container_of(
blk_cb, struct raid5_plug_cb, cb);
struct stripe_head *sh;
struct mddev *mddev = cb->cb.data;
struct r5conf *conf = mddev->private;
if (cb->list.next && !list_empty(&cb->list)) {
spin_lock_irq(&conf->device_lock);
while (!list_empty(&cb->list)) {
sh = list_first_entry(&cb->list, struct stripe_head, lru);
list_del_init(&sh->lru);
/*
* avoid race release_stripe_plug() sees
* STRIPE_ON_UNPLUG_LIST clear but the stripe
* is still in our list
*/
smp_mb__before_clear_bit();
clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
__release_stripe(conf, sh);
}
spin_unlock_irq(&conf->device_lock);
}
kfree(cb);
}
static void release_stripe_plug(struct mddev *mddev,
struct stripe_head *sh)
{
struct blk_plug_cb *blk_cb = blk_check_plugged(
raid5_unplug, mddev,
sizeof(struct raid5_plug_cb));
struct raid5_plug_cb *cb;
if (!blk_cb) {
release_stripe(sh);
return;
}
cb = container_of(blk_cb, struct raid5_plug_cb, cb);
if (cb->list.next == NULL)
INIT_LIST_HEAD(&cb->list);
if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state))
list_add_tail(&sh->lru, &cb->list);
else
release_stripe(sh);
}
static void make_request(struct mddev *mddev, struct bio * bi)
{
struct r5conf *conf = mddev->private;
......@@ -4138,8 +4195,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
if ((bi->bi_rw & REQ_NOIDLE) &&
!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
atomic_inc(&conf->preread_active_stripes);
mddev_check_plugged(mddev);
release_stripe(sh);
release_stripe_plug(mddev, sh);
} else {
/* cannot get stripe for read-ahead, just give-up */
clear_bit(BIO_UPTODATE, &bi->bi_flags);
......@@ -4537,6 +4593,30 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
return handled;
}
#define MAX_STRIPE_BATCH 8
static int handle_active_stripes(struct r5conf *conf)
{
struct stripe_head *batch[MAX_STRIPE_BATCH], *sh;
int i, batch_size = 0;
while (batch_size < MAX_STRIPE_BATCH &&
(sh = __get_priority_stripe(conf)) != NULL)
batch[batch_size++] = sh;
if (batch_size == 0)
return batch_size;
spin_unlock_irq(&conf->device_lock);
for (i = 0; i < batch_size; i++)
handle_stripe(batch[i]);
cond_resched();
spin_lock_irq(&conf->device_lock);
for (i = 0; i < batch_size; i++)
__release_stripe(conf, batch[i]);
return batch_size;
}
/*
* This is our raid5 kernel thread.
......@@ -4547,7 +4627,6 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
*/
static void raid5d(struct mddev *mddev)
{
struct stripe_head *sh;
struct r5conf *conf = mddev->private;
int handled;
struct blk_plug plug;
......@@ -4561,6 +4640,7 @@ static void raid5d(struct mddev *mddev)
spin_lock_irq(&conf->device_lock);
while (1) {
struct bio *bio;
int batch_size;
if (
!list_empty(&conf->bitmap_list)) {
......@@ -4584,22 +4664,17 @@ static void raid5d(struct mddev *mddev)
handled++;
}
sh = __get_priority_stripe(conf);
if (!sh)
batch_size = handle_active_stripes(conf);
if (!batch_size)
break;
spin_unlock_irq(&conf->device_lock);
handled += batch_size;
handled++;
handle_stripe(sh);
release_stripe(sh);
cond_resched();
if (mddev->flags & ~(1<<MD_CHANGE_PENDING))
if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) {
spin_unlock_irq(&conf->device_lock);
md_check_recovery(mddev);
spin_lock_irq(&conf->device_lock);
}
}
pr_debug("%d stripes handled\n", handled);
spin_unlock_irq(&conf->device_lock);
......
......@@ -321,6 +321,7 @@ enum {
STRIPE_BIOFILL_RUN,
STRIPE_COMPUTE_RUN,
STRIPE_OPS_REQ_PENDING,
STRIPE_ON_UNPLUG_LIST,
};
/*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment