Commit 6e451397 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md: (45 commits)
  md: don't insist on valid event count for spare devices.
  md: simplify updating of event count to sometimes avoid updating spares.
  md/raid6: Fix raid-6 read-error correction in degraded state
  md: restore ability of spare drives to spin down.
  md: Fix read balancing in RAID1 and RAID10 on drives > 2TB
  md/linear: standardise all printk messages
  md/raid0: tidy up printk messages.
  md/raid10: tidy up printk messages.
  md/raid1: improve printk messages
  md/raid5: improve consistency of error messages.
  md: remove EXPERIMENTAL designation from RAID10
  md: allow integers to be passed to md/level
  md: notify mdstat waiters of level change
  md/raid4: permit raid0 takeover
  md/raid1: delay reads that could overtake behind-writes.
  md/raid1: fix confusing 'redirect sector' message.
  md: don't unregister the thread in mddev_suspend
  md: factor out init code for an mddev
  md: pass mddev to make_request functions rather than request_queue
  md: call md_stop_writes from md_stop
  ...
parents d79df0b1 19fdb9ee
......@@ -100,8 +100,8 @@ config MD_RAID1
If unsure, say Y.
config MD_RAID10
tristate "RAID-10 (mirrored striping) mode (EXPERIMENTAL)"
depends on BLK_DEV_MD && EXPERIMENTAL
tristate "RAID-10 (mirrored striping) mode"
depends on BLK_DEV_MD
---help---
RAID-10 provides a combination of striping (RAID-0) and
mirroring (RAID-1) with easier configuration and more flexible
......
......@@ -505,7 +505,7 @@ void bitmap_update_sb(struct bitmap *bitmap)
return;
}
spin_unlock_irqrestore(&bitmap->lock, flags);
sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
sb->events = cpu_to_le64(bitmap->mddev->events);
if (bitmap->mddev->events < bitmap->events_cleared) {
/* rocking back to read-only */
......@@ -526,7 +526,7 @@ void bitmap_print_sb(struct bitmap *bitmap)
if (!bitmap || !bitmap->sb_page)
return;
sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
printk(KERN_DEBUG "%s: bitmap file superblock:\n", bmname(bitmap));
printk(KERN_DEBUG " magic: %08x\n", le32_to_cpu(sb->magic));
printk(KERN_DEBUG " version: %d\n", le32_to_cpu(sb->version));
......@@ -575,7 +575,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
return err;
}
sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
chunksize = le32_to_cpu(sb->chunksize);
daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
......@@ -661,7 +661,7 @@ static int bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
return 0;
}
spin_unlock_irqrestore(&bitmap->lock, flags);
sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
old = le32_to_cpu(sb->state) & bits;
switch (op) {
case MASK_SET: sb->state |= cpu_to_le32(bits);
......@@ -1292,9 +1292,14 @@ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sect
if (!bitmap) return 0;
if (behind) {
int bw;
atomic_inc(&bitmap->behind_writes);
bw = atomic_read(&bitmap->behind_writes);
if (bw > bitmap->behind_writes_used)
bitmap->behind_writes_used = bw;
PRINTK(KERN_DEBUG "inc write-behind count %d/%d\n",
atomic_read(&bitmap->behind_writes), bitmap->max_write_behind);
bw, bitmap->max_write_behind);
}
while (sectors) {
......@@ -1351,7 +1356,8 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
{
if (!bitmap) return;
if (behind) {
atomic_dec(&bitmap->behind_writes);
if (atomic_dec_and_test(&bitmap->behind_writes))
wake_up(&bitmap->behind_wait);
PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n",
atomic_read(&bitmap->behind_writes), bitmap->max_write_behind);
}
......@@ -1675,6 +1681,7 @@ int bitmap_create(mddev_t *mddev)
atomic_set(&bitmap->pending_writes, 0);
init_waitqueue_head(&bitmap->write_wait);
init_waitqueue_head(&bitmap->overflow_wait);
init_waitqueue_head(&bitmap->behind_wait);
bitmap->mddev = mddev;
......@@ -2006,6 +2013,27 @@ static ssize_t can_clear_store(mddev_t *mddev, const char *buf, size_t len)
static struct md_sysfs_entry bitmap_can_clear =
__ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store);
static ssize_t
behind_writes_used_show(mddev_t *mddev, char *page)
{
if (mddev->bitmap == NULL)
return sprintf(page, "0\n");
return sprintf(page, "%lu\n",
mddev->bitmap->behind_writes_used);
}
static ssize_t
behind_writes_used_reset(mddev_t *mddev, const char *buf, size_t len)
{
if (mddev->bitmap)
mddev->bitmap->behind_writes_used = 0;
return len;
}
static struct md_sysfs_entry max_backlog_used =
__ATTR(max_backlog_used, S_IRUGO | S_IWUSR,
behind_writes_used_show, behind_writes_used_reset);
static struct attribute *md_bitmap_attrs[] = {
&bitmap_location.attr,
&bitmap_timeout.attr,
......@@ -2013,6 +2041,7 @@ static struct attribute *md_bitmap_attrs[] = {
&bitmap_chunksize.attr,
&bitmap_metadata.attr,
&bitmap_can_clear.attr,
&max_backlog_used.attr,
NULL
};
struct attribute_group md_bitmap_group = {
......
......@@ -227,6 +227,7 @@ struct bitmap {
int allclean;
atomic_t behind_writes;
unsigned long behind_writes_used; /* highest actual value at runtime */
/*
* the bitmap daemon - periodically wakes up and sweeps the bitmap
......@@ -239,6 +240,7 @@ struct bitmap {
atomic_t pending_writes; /* pending writes to the bitmap file */
wait_queue_head_t write_wait;
wait_queue_head_t overflow_wait;
wait_queue_head_t behind_wait;
struct sysfs_dirent *sysfs_can_clear;
};
......
......@@ -169,10 +169,9 @@ static void add_sector(conf_t *conf, sector_t start, int mode)
conf->nfaults = n+1;
}
static int make_request(struct request_queue *q, struct bio *bio)
static int make_request(mddev_t *mddev, struct bio *bio)
{
mddev_t *mddev = q->queuedata;
conf_t *conf = (conf_t*)mddev->private;
conf_t *conf = mddev->private;
int failit = 0;
if (bio_data_dir(bio) == WRITE) {
......@@ -225,7 +224,7 @@ static int make_request(struct request_queue *q, struct bio *bio)
static void status(struct seq_file *seq, mddev_t *mddev)
{
conf_t *conf = (conf_t*)mddev->private;
conf_t *conf = mddev->private;
int n;
if ((n=atomic_read(&conf->counters[WriteTransient])) != 0)
......@@ -328,7 +327,7 @@ static int run(mddev_t *mddev)
static int stop(mddev_t *mddev)
{
conf_t *conf = (conf_t *)mddev->private;
conf_t *conf = mddev->private;
kfree(conf);
mddev->private = NULL;
......
......@@ -159,7 +159,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
sector_t sectors;
if (j < 0 || j >= raid_disks || disk->rdev) {
printk("linear: disk numbering problem. Aborting!\n");
printk(KERN_ERR "md/linear:%s: disk numbering problem. Aborting!\n",
mdname(mddev));
goto out;
}
......@@ -187,7 +188,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
}
if (cnt != raid_disks) {
printk("linear: not enough drives present. Aborting!\n");
printk(KERN_ERR "md/linear:%s: not enough drives present. Aborting!\n",
mdname(mddev));
goto out;
}
......@@ -282,29 +284,21 @@ static int linear_stop (mddev_t *mddev)
rcu_barrier();
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
kfree(conf);
mddev->private = NULL;
return 0;
}
static int linear_make_request (struct request_queue *q, struct bio *bio)
static int linear_make_request (mddev_t *mddev, struct bio *bio)
{
const int rw = bio_data_dir(bio);
mddev_t *mddev = q->queuedata;
dev_info_t *tmp_dev;
sector_t start_sector;
int cpu;
if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
md_barrier_request(mddev, bio);
return 0;
}
cpu = part_stat_lock();
part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
bio_sectors(bio));
part_stat_unlock();
rcu_read_lock();
tmp_dev = which_dev(mddev, bio->bi_sector);
start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
......@@ -314,12 +308,14 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
|| (bio->bi_sector < start_sector))) {
char b[BDEVNAME_SIZE];
printk("linear_make_request: Sector %llu out of bounds on "
"dev %s: %llu sectors, offset %llu\n",
(unsigned long long)bio->bi_sector,
bdevname(tmp_dev->rdev->bdev, b),
(unsigned long long)tmp_dev->rdev->sectors,
(unsigned long long)start_sector);
printk(KERN_ERR
"md/linear:%s: make_request: Sector %llu out of bounds on "
"dev %s: %llu sectors, offset %llu\n",
mdname(mddev),
(unsigned long long)bio->bi_sector,
bdevname(tmp_dev->rdev->bdev, b),
(unsigned long long)tmp_dev->rdev->sectors,
(unsigned long long)start_sector);
rcu_read_unlock();
bio_io_error(bio);
return 0;
......@@ -336,9 +332,9 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
bp = bio_split(bio, end_sector - bio->bi_sector);
if (linear_make_request(q, &bp->bio1))
if (linear_make_request(mddev, &bp->bio1))
generic_make_request(&bp->bio1);
if (linear_make_request(q, &bp->bio2))
if (linear_make_request(mddev, &bp->bio2))
generic_make_request(&bp->bio2);
bio_pair_release(bp);
return 0;
......
This diff is collapsed.
......@@ -74,9 +74,6 @@ struct mdk_rdev_s
#define Blocked 8 /* An error occured on an externally
* managed array, don't allow writes
* until it is cleared */
#define StateChanged 9 /* Faulty or Blocked has changed during
* interrupt, so it needs to be
* notified by the thread */
wait_queue_head_t blocked_wait;
int desc_nr; /* descriptor index in the superblock */
......@@ -153,6 +150,12 @@ struct mddev_s
int external_size; /* size managed
* externally */
__u64 events;
/* If the last 'event' was simply a clean->dirty transition, and
* we didn't write it to the spares, then it is safe and simple
* to just decrement the event count on a dirty->clean transition.
* So we record that possibility here.
*/
int can_decrease_events;
char uuid[16];
......@@ -240,7 +243,6 @@ struct mddev_s
atomic_t active; /* general refcount */
atomic_t openers; /* number of active opens */
int changed; /* true if we might need to reread partition info */
int degraded; /* whether md should consider
* adding a spare
*/
......@@ -279,9 +281,6 @@ struct mddev_s
atomic_t writes_pending;
struct request_queue *queue; /* for plugging ... */
atomic_t write_behind; /* outstanding async IO */
unsigned int max_write_behind; /* 0 = sync */
struct bitmap *bitmap; /* the bitmap for the device */
struct {
struct file *file; /* the bitmap file */
......@@ -305,6 +304,7 @@ struct mddev_s
atomic_t max_corr_read_errors; /* max read retries */
struct list_head all_mddevs;
struct attribute_group *to_remove;
/* Generic barrier handling.
* If there is a pending barrier request, all other
* writes are blocked while the devices are flushed.
......@@ -336,7 +336,7 @@ struct mdk_personality
int level;
struct list_head list;
struct module *owner;
int (*make_request)(struct request_queue *q, struct bio *bio);
int (*make_request)(mddev_t *mddev, struct bio *bio);
int (*run)(mddev_t *mddev);
int (*stop)(mddev_t *mddev);
void (*status)(struct seq_file *seq, mddev_t *mddev);
......
......@@ -85,7 +85,7 @@ static void multipath_end_bh_io (struct multipath_bh *mp_bh, int err)
static void multipath_end_request(struct bio *bio, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct multipath_bh * mp_bh = (struct multipath_bh *)(bio->bi_private);
struct multipath_bh *mp_bh = bio->bi_private;
multipath_conf_t *conf = mp_bh->mddev->private;
mdk_rdev_t *rdev = conf->multipaths[mp_bh->path].rdev;
......@@ -136,14 +136,11 @@ static void multipath_unplug(struct request_queue *q)
}
static int multipath_make_request (struct request_queue *q, struct bio * bio)
static int multipath_make_request(mddev_t *mddev, struct bio * bio)
{
mddev_t *mddev = q->queuedata;
multipath_conf_t *conf = mddev->private;
struct multipath_bh * mp_bh;
struct multipath_info *multipath;
const int rw = bio_data_dir(bio);
int cpu;
if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
md_barrier_request(mddev, bio);
......@@ -155,12 +152,6 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio)
mp_bh->master_bio = bio;
mp_bh->mddev = mddev;
cpu = part_stat_lock();
part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw],
bio_sectors(bio));
part_stat_unlock();
mp_bh->path = multipath_map(conf);
if (mp_bh->path < 0) {
bio_endio(bio, -EIO);
......
This diff is collapsed.
......@@ -13,6 +13,9 @@ struct raid0_private_data
struct strip_zone *strip_zone;
mdk_rdev_t **devlist; /* lists of rdevs, pointed to by strip_zone->dev */
int nr_strip_zones;
int scale_raid_disks; /* divide rdev->raid_disks by this in run()
* to handle conversion from raid10
*/
};
typedef struct raid0_private_data raid0_conf_t;
......
This diff is collapsed.
This diff is collapsed.
......@@ -33,9 +33,16 @@ struct r10_private_data_s {
* 1 stripe.
*/
sector_t dev_sectors; /* temp copy of mddev->dev_sectors */
int chunk_shift; /* shift from chunks to sectors */
sector_t chunk_mask;
int scale_disks; /* When starting array, multiply
* each ->raid_disk by this.
* Need for raid0->raid10 migration
*/
struct list_head retry_list;
/* queue pending writes and submit them on unplug */
struct bio_list pending_bio_list;
......@@ -57,6 +64,11 @@ struct r10_private_data_s {
mempool_t *r10bio_pool;
mempool_t *r10buf_pool;
struct page *tmppage;
/* When taking over an array from a different personality, we store
* the new thread here until we fully activate the array.
*/
struct mdk_thread_s *thread;
};
typedef struct r10_private_data_s conf_t;
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment