Commit 2a4c32ed authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md

Pull MD updates from Shaohua Li:

 - a raid5 writeback cache feature.

   The goal is to aggregate writes to make full stripe write and reduce
   read-modify-write. It's helpful for workload which does sequential
   write and follows fsync for example. This feature is experimental and
   off by default right now.

 - FAILFAST support.

   This fails IOs to broken raid disks quickly, so can improve latency.
   It's mainly for DASD storage, but some patches help normal raid array
   too.

 - support bad block for raid array with external metadata

 - AVX2 instruction support for raid6 parity calculation

 - normalize MD info output

 - add missing blktrace

 - other bug fixes

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: (66 commits)
  md: separate flags for superblock changes
  md: MD_RECOVERY_NEEDED is set for mddev->recovery
  md: takeover should clear unrelated bits
  md/r5cache: after recovery, increase journal seq by 10000
  md/raid5-cache: fix crc in rewrite_data_only_stripes()
  md/raid5-cache: no recovery is required when create super-block
  md: fix refcount problem on mddev when stopping array.
  md/r5cache: do r5c_update_log_state after log recovery
  md/raid5-cache: adjust the write position of the empty block if no data blocks
  md/r5cache: run_no_space_stripes() when R5C_LOG_CRITICAL == 0
  md/raid5: limit request size according to implementation limits
  md/raid5-cache: do not need to set STRIPE_PREREAD_ACTIVE repeatedly
  md/raid5-cache: remove the unnecessary next_cp_seq field from the r5l_log
  md/raid5-cache: release the stripe_head at the appropriate location
  md/raid5-cache: use ring add to prevent overflow
  md/raid5-cache: remove unnecessary function parameters
  raid5-cache: don't set STRIPE_R5C_PARTIAL_STRIPE flag while load stripe into cache
  raid5-cache: add another check conditon before replaying one stripe
  md/r5cache: enable IRQs on error path
  md/r5cache: handle alloc_page failure
  ...
parents b9f98bd4 20737738
This diff is collapsed.
......@@ -2011,7 +2011,7 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
sb->compat_features = cpu_to_le32(FEATURE_FLAG_SUPPORTS_V190);
/* Force writing of superblocks to disk */
set_bit(MD_CHANGE_DEVS, &rdev->mddev->flags);
set_bit(MD_SB_CHANGE_DEVS, &rdev->mddev->sb_flags);
/* Any superblock is better than none, choose that if given */
return refdev ? 0 : 1;
......@@ -3497,7 +3497,7 @@ static void rs_update_sbs(struct raid_set *rs)
struct mddev *mddev = &rs->md;
int ro = mddev->ro;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
mddev->ro = 0;
md_update_sb(mddev, 1);
mddev->ro = ro;
......
......@@ -21,6 +21,7 @@
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <trace/events/block.h>
#include "md.h"
#include "linear.h"
......@@ -101,7 +102,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
sector_t sectors;
if (j < 0 || j >= raid_disks || disk->rdev) {
printk(KERN_ERR "md/linear:%s: disk numbering problem. Aborting!\n",
pr_warn("md/linear:%s: disk numbering problem. Aborting!\n",
mdname(mddev));
goto out;
}
......@@ -123,7 +124,7 @@ static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
discard_supported = true;
}
if (cnt != raid_disks) {
printk(KERN_ERR "md/linear:%s: not enough drives present. Aborting!\n",
pr_warn("md/linear:%s: not enough drives present. Aborting!\n",
mdname(mddev));
goto out;
}
......@@ -227,22 +228,22 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
}
do {
tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector);
sector_t bio_sector = bio->bi_iter.bi_sector;
tmp_dev = which_dev(mddev, bio_sector);
start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
end_sector = tmp_dev->end_sector;
data_offset = tmp_dev->rdev->data_offset;
bio->bi_bdev = tmp_dev->rdev->bdev;
if (unlikely(bio->bi_iter.bi_sector >= end_sector ||
bio->bi_iter.bi_sector < start_sector))
if (unlikely(bio_sector >= end_sector ||
bio_sector < start_sector))
goto out_of_bounds;
if (unlikely(bio_end_sector(bio) > end_sector)) {
/* This bio crosses a device boundary, so we have to
* split it.
*/
split = bio_split(bio, end_sector -
bio->bi_iter.bi_sector,
split = bio_split(bio, end_sector - bio_sector,
GFP_NOIO, fs_bio_set);
bio_chain(split, bio);
} else {
......@@ -256,15 +257,18 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
!blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
/* Just ignore it */
bio_endio(split);
} else
} else {
if (mddev->gendisk)
trace_block_bio_remap(bdev_get_queue(split->bi_bdev),
split, disk_devt(mddev->gendisk),
bio_sector);
generic_make_request(split);
}
} while (split != bio);
return;
out_of_bounds:
printk(KERN_ERR
"md/linear:%s: make_request: Sector %llu out of bounds on "
"dev %s: %llu sectors, offset %llu\n",
pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %s: %llu sectors, offset %llu\n",
mdname(mddev),
(unsigned long long)bio->bi_iter.bi_sector,
bdevname(tmp_dev->rdev->bdev, b),
......@@ -275,7 +279,6 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
static void linear_status (struct seq_file *seq, struct mddev *mddev)
{
seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
}
......
This diff is collapsed.
......@@ -29,6 +29,16 @@
#define MaxSector (~(sector_t)0)
/*
* These flags should really be called "NO_RETRY" rather than
* "FAILFAST" because they don't make any promise about time lapse,
* only about the number of retries, which will be zero.
* REQ_FAILFAST_DRIVER is not included because
* Commit: 4a27446f3e39 ("[SCSI] modify scsi to handle new fail fast flags.")
* seems to suggest that the errors it avoids retrying should usually
* be retried.
*/
#define MD_FAILFAST (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT)
/*
* MD's 'extended' device
*/
......@@ -168,6 +178,19 @@ enum flag_bits {
* so it is safe to remove without
* another synchronize_rcu() call.
*/
ExternalBbl, /* External metadata provides bad
* block management for a disk
*/
FailFast, /* Minimal retries should be attempted on
* this device, so use REQ_FAILFAST_DEV.
* Also don't try to repair failed reads.
* It is expects that no bad block log
* is present.
*/
LastDev, /* Seems to be the last working dev as
* it didn't fail, so don't use FailFast
* any more for metadata
*/
};
static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
......@@ -189,6 +212,31 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new);
struct md_cluster_info;
enum mddev_flags {
MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */
MD_CLOSING, /* If set, we are closing the array, do not open
* it then */
MD_JOURNAL_CLEAN, /* A raid with journal is already clean */
MD_HAS_JOURNAL, /* The raid array has journal feature set */
MD_RELOAD_SB, /* Reload the superblock because another node
* updated it.
*/
MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node
* already took resync lock, need to
* release the lock */
MD_FAILFAST_SUPPORTED, /* Using MD_FAILFAST on metadata writes is
* supported as calls to md_error() will
* never cause the array to become failed.
*/
};
enum mddev_sb_flags {
MD_SB_CHANGE_DEVS, /* Some device status has changed */
MD_SB_CHANGE_CLEAN, /* transition to or from 'clean' */
MD_SB_CHANGE_PENDING, /* switch from 'clean' to 'active' in progress */
MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */
};
struct mddev {
void *private;
struct md_personality *pers;
......@@ -196,21 +244,7 @@ struct mddev {
int md_minor;
struct list_head disks;
unsigned long flags;
#define MD_CHANGE_DEVS 0 /* Some device status has changed */
#define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */
#define MD_CHANGE_PENDING 2 /* switch from 'clean' to 'active' in progress */
#define MD_UPDATE_SB_FLAGS (1 | 2 | 4) /* If these are set, md_update_sb needed */
#define MD_ARRAY_FIRST_USE 3 /* First use of array, needs initialization */
#define MD_CLOSING 4 /* If set, we are closing the array, do not open
* it then */
#define MD_JOURNAL_CLEAN 5 /* A raid with journal is already clean */
#define MD_HAS_JOURNAL 6 /* The raid array has journal feature set */
#define MD_RELOAD_SB 7 /* Reload the superblock because another node
* updated it.
*/
#define MD_CLUSTER_RESYNC_LOCKED 8 /* cluster raid only, which means node
* already took resync lock, need to
* release the lock */
unsigned long sb_flags;
int suspended;
atomic_t active_io;
......@@ -304,31 +338,6 @@ struct mddev {
int parallel_resync;
int ok_start_degraded;
/* recovery/resync flags
* NEEDED: we might need to start a resync/recover
* RUNNING: a thread is running, or about to be started
* SYNC: actually doing a resync, not a recovery
* RECOVER: doing recovery, or need to try it.
* INTR: resync needs to be aborted for some reason
* DONE: thread is done and is waiting to be reaped
* REQUEST: user-space has requested a sync (used with SYNC)
* CHECK: user-space request for check-only, no repair
* RESHAPE: A reshape is happening
* ERROR: sync-action interrupted because io-error
*
* If neither SYNC or RESHAPE are set, then it is a recovery.
*/
#define MD_RECOVERY_RUNNING 0
#define MD_RECOVERY_SYNC 1
#define MD_RECOVERY_RECOVER 2
#define MD_RECOVERY_INTR 3
#define MD_RECOVERY_DONE 4
#define MD_RECOVERY_NEEDED 5
#define MD_RECOVERY_REQUESTED 6
#define MD_RECOVERY_CHECK 7
#define MD_RECOVERY_RESHAPE 8
#define MD_RECOVERY_FROZEN 9
#define MD_RECOVERY_ERROR 10
unsigned long recovery;
/* If a RAID personality determines that recovery (of a particular
......@@ -442,6 +451,23 @@ struct mddev {
unsigned int good_device_nr; /* good device num within cluster raid */
};
enum recovery_flags {
/*
* If neither SYNC or RESHAPE are set, then it is a recovery.
*/
MD_RECOVERY_RUNNING, /* a thread is running, or about to be started */
MD_RECOVERY_SYNC, /* actually doing a resync, not a recovery */
MD_RECOVERY_RECOVER, /* doing recovery, or need to try it. */
MD_RECOVERY_INTR, /* resync needs to be aborted for some reason */
MD_RECOVERY_DONE, /* thread is done and is waiting to be reaped */
MD_RECOVERY_NEEDED, /* we might need to start a resync/recover */
MD_RECOVERY_REQUESTED, /* user-space has requested a sync (used with SYNC) */
MD_RECOVERY_CHECK, /* user-space request for check-only, no repair */
MD_RECOVERY_RESHAPE, /* A reshape is happening */
MD_RECOVERY_FROZEN, /* User request to abort, and not restart, any action */
MD_RECOVERY_ERROR, /* sync-action interrupted because io-error */
};
static inline int __must_check mddev_lock(struct mddev *mddev)
{
return mutex_lock_interruptible(&mddev->reconfig_mutex);
......@@ -623,7 +649,7 @@ extern int mddev_congested(struct mddev *mddev, int bits);
extern void md_flush_request(struct mddev *mddev, struct bio *bio);
extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
sector_t sector, int size, struct page *page);
extern void md_super_wait(struct mddev *mddev);
extern int md_super_wait(struct mddev *mddev);
extern int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
struct page *page, int op, int op_flags,
bool metadata_op);
......
......@@ -52,7 +52,7 @@ static int multipath_map (struct mpconf *conf)
}
rcu_read_unlock();
printk(KERN_ERR "multipath_map(): no more operational IO paths?\n");
pr_crit_ratelimited("multipath_map(): no more operational IO paths?\n");
return (-1);
}
......@@ -97,7 +97,7 @@ static void multipath_end_request(struct bio *bio)
*/
char b[BDEVNAME_SIZE];
md_error (mp_bh->mddev, rdev);
printk(KERN_ERR "multipath: %s: rescheduling sector %llu\n",
pr_info("multipath: %s: rescheduling sector %llu\n",
bdevname(rdev->bdev,b),
(unsigned long long)bio->bi_iter.bi_sector);
multipath_reschedule_retry(mp_bh);
......@@ -194,8 +194,7 @@ static void multipath_error (struct mddev *mddev, struct md_rdev *rdev)
* first check if this is a queued request for a device
* which has just failed.
*/
printk(KERN_ALERT
"multipath: only one IO path left and IO error.\n");
pr_warn("multipath: only one IO path left and IO error.\n");
/* leave it active... it's all we have */
return;
}
......@@ -209,11 +208,9 @@ static void multipath_error (struct mddev *mddev, struct md_rdev *rdev)
spin_unlock_irqrestore(&conf->device_lock, flags);
}
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT "multipath: IO failure on %s,"
" disabling IO path.\n"
"multipath: Operation continuing"
" on %d IO paths.\n",
set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
pr_err("multipath: IO failure on %s, disabling IO path.\n"
"multipath: Operation continuing on %d IO paths.\n",
bdevname(rdev->bdev, b),
conf->raid_disks - mddev->degraded);
}
......@@ -223,19 +220,19 @@ static void print_multipath_conf (struct mpconf *conf)
int i;
struct multipath_info *tmp;
printk("MULTIPATH conf printout:\n");
pr_debug("MULTIPATH conf printout:\n");
if (!conf) {
printk("(conf==NULL)\n");
pr_debug("(conf==NULL)\n");
return;
}
printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
pr_debug(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
conf->raid_disks);
for (i = 0; i < conf->raid_disks; i++) {
char b[BDEVNAME_SIZE];
tmp = conf->multipaths + i;
if (tmp->rdev)
printk(" disk%d, o:%d, dev:%s\n",
pr_debug(" disk%d, o:%d, dev:%s\n",
i,!test_bit(Faulty, &tmp->rdev->flags),
bdevname(tmp->rdev->bdev,b));
}
......@@ -292,8 +289,7 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
if (rdev == p->rdev) {
if (test_bit(In_sync, &rdev->flags) ||
atomic_read(&rdev->nr_pending)) {
printk(KERN_ERR "hot-remove-disk, slot %d is identified"
" but is still operational!\n", number);
pr_warn("hot-remove-disk, slot %d is identified but is still operational!\n", number);
err = -EBUSY;
goto abort;
}
......@@ -346,14 +342,12 @@ static void multipathd(struct md_thread *thread)
bio->bi_iter.bi_sector = mp_bh->master_bio->bi_iter.bi_sector;
if ((mp_bh->path = multipath_map (conf))<0) {
printk(KERN_ALERT "multipath: %s: unrecoverable IO read"
" error for block %llu\n",
pr_err("multipath: %s: unrecoverable IO read error for block %llu\n",
bdevname(bio->bi_bdev,b),
(unsigned long long)bio->bi_iter.bi_sector);
multipath_end_bh_io(mp_bh, -EIO);
} else {
printk(KERN_ERR "multipath: %s: redirecting sector %llu"
" to another IO path\n",
pr_err("multipath: %s: redirecting sector %llu to another IO path\n",
bdevname(bio->bi_bdev,b),
(unsigned long long)bio->bi_iter.bi_sector);
*bio = *(mp_bh->master_bio);
......@@ -389,7 +383,7 @@ static int multipath_run (struct mddev *mddev)
return -EINVAL;
if (mddev->level != LEVEL_MULTIPATH) {
printk("multipath: %s: raid level not set to multipath IO (%d)\n",
pr_warn("multipath: %s: raid level not set to multipath IO (%d)\n",
mdname(mddev), mddev->level);
goto out;
}
......@@ -401,21 +395,13 @@ static int multipath_run (struct mddev *mddev)
conf = kzalloc(sizeof(struct mpconf), GFP_KERNEL);
mddev->private = conf;
if (!conf) {
printk(KERN_ERR
"multipath: couldn't allocate memory for %s\n",
mdname(mddev));
if (!conf)
goto out;
}
conf->multipaths = kzalloc(sizeof(struct multipath_info)*mddev->raid_disks,
GFP_KERNEL);
if (!conf->multipaths) {
printk(KERN_ERR
"multipath: couldn't allocate memory for %s\n",
mdname(mddev));
if (!conf->multipaths)
goto out_free_conf;
}
working_disks = 0;
rdev_for_each(rdev, mddev) {
......@@ -439,7 +425,7 @@ static int multipath_run (struct mddev *mddev)
INIT_LIST_HEAD(&conf->retry_list);
if (!working_disks) {
printk(KERN_ERR "multipath: no operational IO paths for %s\n",
pr_warn("multipath: no operational IO paths for %s\n",
mdname(mddev));
goto out_free_conf;
}
......@@ -447,25 +433,15 @@ static int multipath_run (struct mddev *mddev)
conf->pool = mempool_create_kmalloc_pool(NR_RESERVED_BUFS,
sizeof(struct multipath_bh));
if (conf->pool == NULL) {
printk(KERN_ERR
"multipath: couldn't allocate memory for %s\n",
mdname(mddev));
if (conf->pool == NULL)
goto out_free_conf;
}
{
mddev->thread = md_register_thread(multipathd, mddev,
"multipath");
if (!mddev->thread) {
printk(KERN_ERR "multipath: couldn't allocate thread"
" for %s\n", mdname(mddev));
if (!mddev->thread)
goto out_free_conf;
}
}
printk(KERN_INFO
"multipath: array %s active with %d out of %d IO paths\n",
pr_info("multipath: array %s active with %d out of %d IO paths\n",
mdname(mddev), conf->raid_disks - mddev->degraded,
mddev->raid_disks);
/*
......
......@@ -21,6 +21,7 @@
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <trace/events/block.h>
#include "md.h"
#include "raid0.h"
#include "raid5.h"
......@@ -51,20 +52,21 @@ static void dump_zones(struct mddev *mddev)
char b[BDEVNAME_SIZE];
struct r0conf *conf = mddev->private;
int raid_disks = conf->strip_zone[0].nb_dev;
printk(KERN_INFO "md: RAID0 configuration for %s - %d zone%s\n",
pr_debug("md: RAID0 configuration for %s - %d zone%s\n",
mdname(mddev),
conf->nr_strip_zones, conf->nr_strip_zones==1?"":"s");
for (j = 0; j < conf->nr_strip_zones; j++) {
printk(KERN_INFO "md: zone%d=[", j);
char line[200];
int len = 0;
for (k = 0; k < conf->strip_zone[j].nb_dev; k++)
printk(KERN_CONT "%s%s", k?"/":"",
len += snprintf(line+len, 200-len, "%s%s", k?"/":"",
bdevname(conf->devlist[j*raid_disks
+ k]->bdev, b));
printk(KERN_CONT "]\n");
pr_debug("md: zone%d=[%s]\n", j, line);
zone_size = conf->strip_zone[j].zone_end - zone_start;
printk(KERN_INFO " zone-offset=%10lluKB, "
"device-offset=%10lluKB, size=%10lluKB\n",
pr_debug(" zone-offset=%10lluKB, device-offset=%10lluKB, size=%10lluKB\n",
(unsigned long long)zone_start>>1,
(unsigned long long)conf->strip_zone[j].dev_start>>1,
(unsigned long long)zone_size>>1);
......@@ -142,7 +144,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
* chunk size is a multiple of that sector size
*/
if ((mddev->chunk_sectors << 9) % blksize) {
printk(KERN_ERR "md/raid0:%s: chunk_size of %d not multiple of block size %d\n",
pr_warn("md/raid0:%s: chunk_size of %d not multiple of block size %d\n",
mdname(mddev),
mddev->chunk_sectors << 9, blksize);
err = -EINVAL;
......@@ -186,19 +188,18 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
}
if (j < 0) {
printk(KERN_ERR
"md/raid0:%s: remove inactive devices before converting to RAID0\n",
pr_warn("md/raid0:%s: remove inactive devices before converting to RAID0\n",
mdname(mddev));
goto abort;
}
if (j >= mddev->raid_disks) {
printk(KERN_ERR "md/raid0:%s: bad disk number %d - "
"aborting!\n", mdname(mddev), j);
pr_warn("md/raid0:%s: bad disk number %d - aborting!\n",
mdname(mddev), j);
goto abort;
}
if (dev[j]) {
printk(KERN_ERR "md/raid0:%s: multiple devices for %d - "
"aborting!\n", mdname(mddev), j);
pr_warn("md/raid0:%s: multiple devices for %d - aborting!\n",
mdname(mddev), j);
goto abort;
}
dev[j] = rdev1;
......@@ -208,8 +209,8 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
cnt++;
}
if (cnt != mddev->raid_disks) {
printk(KERN_ERR "md/raid0:%s: too few disks (%d of %d) - "
"aborting!\n", mdname(mddev), cnt, mddev->raid_disks);
pr_warn("md/raid0:%s: too few disks (%d of %d) - aborting!\n",
mdname(mddev), cnt, mddev->raid_disks);
goto abort;
}
zone->nb_dev = cnt;
......@@ -357,8 +358,7 @@ static int raid0_run(struct mddev *mddev)
int ret;
if (mddev->chunk_sectors == 0) {
printk(KERN_ERR "md/raid0:%s: chunk size must be set.\n",
mdname(mddev));
pr_warn("md/raid0:%s: chunk size must be set.\n", mdname(mddev));
return -EINVAL;
}
if (md_check_no_bitmap(mddev))
......@@ -399,7 +399,7 @@ static int raid0_run(struct mddev *mddev)
/* calculate array device size */
md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
printk(KERN_INFO "md/raid0:%s: md_size is %llu sectors.\n",
pr_debug("md/raid0:%s: md_size is %llu sectors.\n",
mdname(mddev),
(unsigned long long)mddev->array_sectors);
......@@ -464,7 +464,8 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
}
do {
sector_t sector = bio->bi_iter.bi_sector;
sector_t bio_sector = bio->bi_iter.bi_sector;
sector_t sector = bio_sector;
unsigned chunk_sects = mddev->chunk_sectors;
unsigned sectors = chunk_sects -
......@@ -473,7 +474,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
: sector_div(sector, chunk_sects));
/* Restore due to sector_div */
sector = bio->bi_iter.bi_sector;
sector = bio_sector;
if (sectors < bio_sectors(bio)) {
split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set);
......@@ -492,8 +493,13 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
!blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
/* Just ignore it */
bio_endio(split);
} else
} else {
if (mddev->gendisk)
trace_block_bio_remap(bdev_get_queue(split->bi_bdev),
split, disk_devt(mddev->gendisk),
bio_sector);
generic_make_request(split);
}
} while (split != bio);
}
......@@ -509,7 +515,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev)
struct r0conf *priv_conf;
if (mddev->degraded != 1) {
printk(KERN_ERR "md/raid0:%s: raid5 must be degraded! Degraded disks: %d\n",
pr_warn("md/raid0:%s: raid5 must be degraded! Degraded disks: %d\n",
mdname(mddev),
mddev->degraded);
return ERR_PTR(-EINVAL);
......@@ -518,7 +524,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev)
rdev_for_each(rdev, mddev) {
/* check slot number for a disk */
if (rdev->raid_disk == mddev->raid_disks-1) {
printk(KERN_ERR "md/raid0:%s: raid5 must have missing parity disk!\n",
pr_warn("md/raid0:%s: raid5 must have missing parity disk!\n",
mdname(mddev));
return ERR_PTR(-EINVAL);
}
......@@ -533,8 +539,11 @@ static void *raid0_takeover_raid45(struct mddev *mddev)
mddev->delta_disks = -1;
/* make sure it will be not marked as dirty */
mddev->recovery_cp = MaxSector;
clear_bit(MD_HAS_JOURNAL, &mddev->flags);
clear_bit(MD_JOURNAL_CLEAN, &mddev->flags);
create_strip_zones(mddev, &priv_conf);
return priv_conf;
}
......@@ -549,18 +558,18 @@ static void *raid0_takeover_raid10(struct mddev *mddev)
* - all mirrors must be already degraded
*/
if (mddev->layout != ((1 << 8) + 2)) {
printk(KERN_ERR "md/raid0:%s:: Raid0 cannot takeover layout: 0x%x\n",
pr_warn("md/raid0:%s:: Raid0 cannot takeover layout: 0x%x\n",
mdname(mddev),
mddev->layout);
return ERR_PTR(-EINVAL);
}
if (mddev->raid_disks & 1) {
printk(KERN_ERR "md/raid0:%s: Raid0 cannot takeover Raid10 with odd disk number.\n",
pr_warn("md/raid0:%s: Raid0 cannot takeover Raid10 with odd disk number.\n",
mdname(mddev));
return ERR_PTR(-EINVAL);
}
if (mddev->degraded != (mddev->raid_disks>>1)) {
printk(KERN_ERR "md/raid0:%s: All mirrors must be already degraded!\n",
pr_warn("md/raid0:%s: All mirrors must be already degraded!\n",
mdname(mddev));
return ERR_PTR(-EINVAL);
}
......@@ -574,6 +583,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev)
mddev->degraded = 0;
/* make sure it will be not marked as dirty */
mddev->recovery_cp = MaxSector;
clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
create_strip_zones(mddev, &priv_conf);
return priv_conf;
......@@ -588,7 +598,7 @@ static void *raid0_takeover_raid1(struct mddev *mddev)
* - (N - 1) mirror drives must be already faulty
*/
if ((mddev->raid_disks - 1) != mddev->degraded) {
printk(KERN_ERR "md/raid0:%s: (N - 1) mirrors drives must be already faulty!\n",
pr_err("md/raid0:%s: (N - 1) mirrors drives must be already faulty!\n",
mdname(mddev));
return ERR_PTR(-EINVAL);
}
......@@ -616,6 +626,7 @@ static void *raid0_takeover_raid1(struct mddev *mddev)
mddev->raid_disks = 1;
/* make sure it will be not marked as dirty */
mddev->recovery_cp = MaxSector;
clear_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
create_strip_zones(mddev, &priv_conf);
return priv_conf;
......@@ -631,7 +642,7 @@ static void *raid0_takeover(struct mddev *mddev)
*/
if (mddev->bitmap) {
printk(KERN_ERR "md/raid0: %s: cannot takeover array with bitmap\n",
pr_warn("md/raid0: %s: cannot takeover array with bitmap\n",
mdname(mddev));
return ERR_PTR(-EBUSY);
}
......@@ -642,7 +653,7 @@ static void *raid0_takeover(struct mddev *mddev)
if (mddev->layout == ALGORITHM_PARITY_N)
return raid0_takeover_raid45(mddev);
printk(KERN_ERR "md/raid0:%s: Raid can only takeover Raid5 with layout: %d\n",
pr_warn("md/raid0:%s: Raid can only takeover Raid5 with layout: %d\n",
mdname(mddev), ALGORITHM_PARITY_N);
}
......@@ -652,7 +663,7 @@ static void *raid0_takeover(struct mddev *mddev)
if (mddev->level == 1)
return raid0_takeover_raid1(mddev);
printk(KERN_ERR "Takeover from raid%i to raid0 not supported\n",
pr_warn("Takeover from raid%i to raid0 not supported\n",
mddev->level);
return ERR_PTR(-EINVAL);
......
This diff is collapsed.
......@@ -161,14 +161,15 @@ struct r1bio {
};
/* bits for r1bio.state */
#define R1BIO_Uptodate 0
#define R1BIO_IsSync 1
#define R1BIO_Degraded 2
#define R1BIO_BehindIO 3
enum r1bio_state {
R1BIO_Uptodate,
R1BIO_IsSync,
R1BIO_Degraded,
R1BIO_BehindIO,
/* Set ReadError on bios that experience a readerror so that
* raid1d knows what to do with them.
*/
#define R1BIO_ReadError 4
R1BIO_ReadError,
/* For write-behind requests, we call bi_end_io when
* the last non-write-behind device completes, providing
* any write was successful. Otherwise we call when
......@@ -176,10 +177,12 @@ struct r1bio {
* with failure when last write completes (and all failed).
* Record that bi_end_io was called with this flag...
*/
#define R1BIO_Returned 6
R1BIO_Returned,
/* If a write for this request means we can clear some
* known-bad-block records, we set this flag
*/
#define R1BIO_MadeGood 7
#define R1BIO_WriteError 8
R1BIO_MadeGood,
R1BIO_WriteError,
R1BIO_FailFast,
};
#endif
This diff is collapsed.
......@@ -156,5 +156,7 @@ enum r10bio_state {
* flag is set
*/
R10BIO_Previous,
/* failfast devices did receive failfast requests. */
R10BIO_FailFast,
};
#endif
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -84,6 +84,10 @@
#define MD_DISK_CANDIDATE 5 /* disk is added as spare (local) until confirmed
* For clustered enviroments only.
*/
#define MD_DISK_FAILFAST 10 /* Send REQ_FAILFAST if there are multiple
* devices available - and don't try to
* correct read errors.
*/
#define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config.
* read requests will only be sent here in
......@@ -265,8 +269,9 @@ struct mdp_superblock_1 {
__le32 dev_number; /* permanent identifier of this device - not role in raid */
__le32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */
__u8 device_uuid[16]; /* user-space setable, ignored by kernel */
__u8 devflags; /* per-device flags. Only one defined...*/
__u8 devflags; /* per-device flags. Only two defined...*/
#define WriteMostly1 1 /* mask for writemostly flag in above */
#define FailFast1 2 /* Should avoid retries and fixups and just fail */
/* Bad block log. If there are any bad blocks the feature flag is set.
* If offset and size are non-zero, that space is reserved and available
*/
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment