Commit 0bd7c5d8 authored by Jens Axboe's avatar Jens Axboe

Merge tag 'md-next-20231219' of...

Merge tag 'md-next-20231219' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md into for-6.8/block

Pull MD updates from Song:

"1. Remove deprecated flavors, by Song Liu;
 2. raid1 read error check support, by Li Nan;
 3. Better handle events off-by-1 case, by Alex Lyakas."

* tag 'md-next-20231219' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md: Remove deprecated CONFIG_MD_FAULTY
  md: Remove deprecated CONFIG_MD_MULTIPATH
  md: Remove deprecated CONFIG_MD_LINEAR
  md/raid1: support read error check
  md: factor out a helper exceed_read_errors() to check read_errors
  md: Whenassemble the array, consult the superblock of the freshest device
  md/raid1: remove unnecessary null checking
parents 4c434392 415c7451
...@@ -61,19 +61,6 @@ config MD_BITMAP_FILE ...@@ -61,19 +61,6 @@ config MD_BITMAP_FILE
various kernel APIs and can only work with files on a file system not various kernel APIs and can only work with files on a file system not
actually sitting on the MD device. actually sitting on the MD device.
config MD_LINEAR
tristate "Linear (append) mode (deprecated)"
depends on BLK_DEV_MD
help
If you say Y here, then your multiple devices driver will be able to
use the so-called linear mode, i.e. it will combine the hard disk
partitions by simply appending one to the other.
To compile this as a module, choose M here: the module
will be called linear.
If unsure, say Y.
config MD_RAID0 config MD_RAID0
tristate "RAID-0 (striping) mode" tristate "RAID-0 (striping) mode"
depends on BLK_DEV_MD depends on BLK_DEV_MD
...@@ -172,27 +159,6 @@ config MD_RAID456 ...@@ -172,27 +159,6 @@ config MD_RAID456
If unsure, say Y. If unsure, say Y.
config MD_MULTIPATH
tristate "Multipath I/O support (deprecated)"
depends on BLK_DEV_MD
help
MD_MULTIPATH provides a simple multi-path personality for use
the MD framework. It is not under active development. New
projects should consider using DM_MULTIPATH which has more
features and more testing.
If unsure, say N.
config MD_FAULTY
tristate "Faulty test module for MD (deprecated)"
depends on BLK_DEV_MD
help
The "faulty" module allows for a block device that occasionally returns
read or write errors. It is useful for testing.
In unsure, say N.
config MD_CLUSTER config MD_CLUSTER
tristate "Cluster Support for MD" tristate "Cluster Support for MD"
depends on BLK_DEV_MD depends on BLK_DEV_MD
......
...@@ -29,22 +29,16 @@ dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o ...@@ -29,22 +29,16 @@ dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o
md-mod-y += md.o md-bitmap.o md-mod-y += md.o md-bitmap.o
raid456-y += raid5.o raid5-cache.o raid5-ppl.o raid456-y += raid5.o raid5-cache.o raid5-ppl.o
linear-y += md-linear.o
multipath-y += md-multipath.o
faulty-y += md-faulty.o
# Note: link order is important. All raid personalities # Note: link order is important. All raid personalities
# and must come before md.o, as they each initialise # and must come before md.o, as they each initialise
# themselves, and md.o may use the personalities when it # themselves, and md.o may use the personalities when it
# auto-initialised. # auto-initialised.
obj-$(CONFIG_MD_LINEAR) += linear.o
obj-$(CONFIG_MD_RAID0) += raid0.o obj-$(CONFIG_MD_RAID0) += raid0.o
obj-$(CONFIG_MD_RAID1) += raid1.o obj-$(CONFIG_MD_RAID1) += raid1.o
obj-$(CONFIG_MD_RAID10) += raid10.o obj-$(CONFIG_MD_RAID10) += raid10.o
obj-$(CONFIG_MD_RAID456) += raid456.o obj-$(CONFIG_MD_RAID456) += raid456.o
obj-$(CONFIG_MD_MULTIPATH) += multipath.o
obj-$(CONFIG_MD_FAULTY) += faulty.o
obj-$(CONFIG_MD_CLUSTER) += md-cluster.o obj-$(CONFIG_MD_CLUSTER) += md-cluster.o
obj-$(CONFIG_BCACHE) += bcache/ obj-$(CONFIG_BCACHE) += bcache/
obj-$(CONFIG_BLK_DEV_MD) += md-mod.o obj-$(CONFIG_BLK_DEV_MD) += md-mod.o
......
...@@ -49,7 +49,6 @@ static int md_setup_ents __initdata; ...@@ -49,7 +49,6 @@ static int md_setup_ents __initdata;
* instead of just one. -- KTK * instead of just one. -- KTK
* 18May2000: Added support for persistent-superblock arrays: * 18May2000: Added support for persistent-superblock arrays:
* md=n,0,factor,fault,device-list uses RAID0 for device n * md=n,0,factor,fault,device-list uses RAID0 for device n
* md=n,-1,factor,fault,device-list uses LINEAR for device n
* md=n,device-list reads a RAID superblock from the devices * md=n,device-list reads a RAID superblock from the devices
* elements in device-list are read by name_to_kdev_t so can be * elements in device-list are read by name_to_kdev_t so can be
* a hex number or something like /dev/hda1 /dev/sdb * a hex number or something like /dev/hda1 /dev/sdb
...@@ -88,7 +87,7 @@ static int __init md_setup(char *str) ...@@ -88,7 +87,7 @@ static int __init md_setup(char *str)
md_setup_ents++; md_setup_ents++;
switch (get_option(&str, &level)) { /* RAID level */ switch (get_option(&str, &level)) { /* RAID level */
case 2: /* could be 0 or -1.. */ case 2: /* could be 0 or -1.. */
if (level == 0 || level == LEVEL_LINEAR) { if (level == 0) {
if (get_option(&str, &factor) != 2 || /* Chunk Size */ if (get_option(&str, &factor) != 2 || /* Chunk Size */
get_option(&str, &fault) != 2) { get_option(&str, &fault) != 2) {
printk(KERN_WARNING "md: Too few arguments supplied to md=.\n"); printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
...@@ -96,10 +95,7 @@ static int __init md_setup(char *str) ...@@ -96,10 +95,7 @@ static int __init md_setup(char *str)
} }
md_setup_args[ent].level = level; md_setup_args[ent].level = level;
md_setup_args[ent].chunk = 1 << (factor+12); md_setup_args[ent].chunk = 1 << (factor+12);
if (level == LEVEL_LINEAR) pername = "raid0";
pername = "linear";
else
pername = "raid0";
break; break;
} }
fallthrough; fallthrough;
......
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* faulty.c : Multiple Devices driver for Linux
*
* Copyright (C) 2004 Neil Brown
*
* fautly-device-simulator personality for md
*/
/*
* The "faulty" personality causes some requests to fail.
*
* Possible failure modes are:
* reads fail "randomly" but succeed on retry
* writes fail "randomly" but succeed on retry
* reads for some address fail and then persist until a write
* reads for some address fail and then persist irrespective of write
* writes for some address fail and persist
* all writes fail
*
* Different modes can be active at a time, but only
* one can be set at array creation. Others can be added later.
* A mode can be one-shot or recurrent with the recurrence being
* once in every N requests.
* The bottom 5 bits of the "layout" indicate the mode. The
* remainder indicate a period, or 0 for one-shot.
*
* There is an implementation limit on the number of concurrently
* persisting-faulty blocks. When a new fault is requested that would
* exceed the limit, it is ignored.
* All current faults can be clear using a layout of "0".
*
* Requests are always sent to the device. If they are to fail,
* we clone the bio and insert a new b_end_io into the chain.
*/
#define WriteTransient 0
#define ReadTransient 1
#define WritePersistent 2
#define ReadPersistent 3
#define WriteAll 4 /* doesn't go to device */
#define ReadFixable 5
#define Modes 6
#define ClearErrors 31
#define ClearFaults 30
#define AllPersist 100 /* internal use only */
#define NoPersist 101
#define ModeMask 0x1f
#define ModeShift 5
#define MaxFault 50
#include <linux/blkdev.h>
#include <linux/module.h>
#include <linux/raid/md_u.h>
#include <linux/slab.h>
#include "md.h"
#include <linux/seq_file.h>
static void faulty_fail(struct bio *bio)
{
struct bio *b = bio->bi_private;
b->bi_iter.bi_size = bio->bi_iter.bi_size;
b->bi_iter.bi_sector = bio->bi_iter.bi_sector;
bio_put(bio);
bio_io_error(b);
}
struct faulty_conf {
int period[Modes];
atomic_t counters[Modes];
sector_t faults[MaxFault];
int modes[MaxFault];
int nfaults;
struct md_rdev *rdev;
};
static int check_mode(struct faulty_conf *conf, int mode)
{
if (conf->period[mode] == 0 &&
atomic_read(&conf->counters[mode]) <= 0)
return 0; /* no failure, no decrement */
if (atomic_dec_and_test(&conf->counters[mode])) {
if (conf->period[mode])
atomic_set(&conf->counters[mode], conf->period[mode]);
return 1;
}
return 0;
}
static int check_sector(struct faulty_conf *conf, sector_t start, sector_t end, int dir)
{
/* If we find a ReadFixable sector, we fix it ... */
int i;
for (i=0; i<conf->nfaults; i++)
if (conf->faults[i] >= start &&
conf->faults[i] < end) {
/* found it ... */
switch (conf->modes[i] * 2 + dir) {
case WritePersistent*2+WRITE: return 1;
case ReadPersistent*2+READ: return 1;
case ReadFixable*2+READ: return 1;
case ReadFixable*2+WRITE:
conf->modes[i] = NoPersist;
return 0;
case AllPersist*2+READ:
case AllPersist*2+WRITE: return 1;
default:
return 0;
}
}
return 0;
}
static void add_sector(struct faulty_conf *conf, sector_t start, int mode)
{
int i;
int n = conf->nfaults;
for (i=0; i<conf->nfaults; i++)
if (conf->faults[i] == start) {
switch(mode) {
case NoPersist: conf->modes[i] = mode; return;
case WritePersistent:
if (conf->modes[i] == ReadPersistent ||
conf->modes[i] == ReadFixable)
conf->modes[i] = AllPersist;
else
conf->modes[i] = WritePersistent;
return;
case ReadPersistent:
if (conf->modes[i] == WritePersistent)
conf->modes[i] = AllPersist;
else
conf->modes[i] = ReadPersistent;
return;
case ReadFixable:
if (conf->modes[i] == WritePersistent ||
conf->modes[i] == ReadPersistent)
conf->modes[i] = AllPersist;
else
conf->modes[i] = ReadFixable;
return;
}
} else if (conf->modes[i] == NoPersist)
n = i;
if (n >= MaxFault)
return;
conf->faults[n] = start;
conf->modes[n] = mode;
if (conf->nfaults == n)
conf->nfaults = n+1;
}
static bool faulty_make_request(struct mddev *mddev, struct bio *bio)
{
struct faulty_conf *conf = mddev->private;
int failit = 0;
if (bio_data_dir(bio) == WRITE) {
/* write request */
if (atomic_read(&conf->counters[WriteAll])) {
/* special case - don't decrement, don't submit_bio_noacct,
* just fail immediately
*/
bio_io_error(bio);
return true;
}
if (check_sector(conf, bio->bi_iter.bi_sector,
bio_end_sector(bio), WRITE))
failit = 1;
if (check_mode(conf, WritePersistent)) {
add_sector(conf, bio->bi_iter.bi_sector,
WritePersistent);
failit = 1;
}
if (check_mode(conf, WriteTransient))
failit = 1;
} else {
/* read request */
if (check_sector(conf, bio->bi_iter.bi_sector,
bio_end_sector(bio), READ))
failit = 1;
if (check_mode(conf, ReadTransient))
failit = 1;
if (check_mode(conf, ReadPersistent)) {
add_sector(conf, bio->bi_iter.bi_sector,
ReadPersistent);
failit = 1;
}
if (check_mode(conf, ReadFixable)) {
add_sector(conf, bio->bi_iter.bi_sector,
ReadFixable);
failit = 1;
}
}
md_account_bio(mddev, &bio);
if (failit) {
struct bio *b = bio_alloc_clone(conf->rdev->bdev, bio, GFP_NOIO,
&mddev->bio_set);
b->bi_private = bio;
b->bi_end_io = faulty_fail;
bio = b;
} else
bio_set_dev(bio, conf->rdev->bdev);
submit_bio_noacct(bio);
return true;
}
static void faulty_status(struct seq_file *seq, struct mddev *mddev)
{
struct faulty_conf *conf = mddev->private;
int n;
if ((n=atomic_read(&conf->counters[WriteTransient])) != 0)
seq_printf(seq, " WriteTransient=%d(%d)",
n, conf->period[WriteTransient]);
if ((n=atomic_read(&conf->counters[ReadTransient])) != 0)
seq_printf(seq, " ReadTransient=%d(%d)",
n, conf->period[ReadTransient]);
if ((n=atomic_read(&conf->counters[WritePersistent])) != 0)
seq_printf(seq, " WritePersistent=%d(%d)",
n, conf->period[WritePersistent]);
if ((n=atomic_read(&conf->counters[ReadPersistent])) != 0)
seq_printf(seq, " ReadPersistent=%d(%d)",
n, conf->period[ReadPersistent]);
if ((n=atomic_read(&conf->counters[ReadFixable])) != 0)
seq_printf(seq, " ReadFixable=%d(%d)",
n, conf->period[ReadFixable]);
if ((n=atomic_read(&conf->counters[WriteAll])) != 0)
seq_printf(seq, " WriteAll");
seq_printf(seq, " nfaults=%d", conf->nfaults);
}
static int faulty_reshape(struct mddev *mddev)
{
int mode = mddev->new_layout & ModeMask;
int count = mddev->new_layout >> ModeShift;
struct faulty_conf *conf = mddev->private;
if (mddev->new_layout < 0)
return 0;
/* new layout */
if (mode == ClearFaults)
conf->nfaults = 0;
else if (mode == ClearErrors) {
int i;
for (i=0 ; i < Modes ; i++) {
conf->period[i] = 0;
atomic_set(&conf->counters[i], 0);
}
} else if (mode < Modes) {
conf->period[mode] = count;
if (!count) count++;
atomic_set(&conf->counters[mode], count);
} else
return -EINVAL;
mddev->new_layout = -1;
mddev->layout = -1; /* makes sure further changes come through */
return 0;
}
static sector_t faulty_size(struct mddev *mddev, sector_t sectors, int raid_disks)
{
WARN_ONCE(raid_disks,
"%s does not support generic reshape\n", __func__);
if (sectors == 0)
return mddev->dev_sectors;
return sectors;
}
static int faulty_run(struct mddev *mddev)
{
struct md_rdev *rdev;
int i;
struct faulty_conf *conf;
if (md_check_no_bitmap(mddev))
return -EINVAL;
conf = kmalloc(sizeof(*conf), GFP_KERNEL);
if (!conf)
return -ENOMEM;
for (i=0; i<Modes; i++) {
atomic_set(&conf->counters[i], 0);
conf->period[i] = 0;
}
conf->nfaults = 0;
rdev_for_each(rdev, mddev) {
conf->rdev = rdev;
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
}
md_set_array_sectors(mddev, faulty_size(mddev, 0, 0));
mddev->private = conf;
faulty_reshape(mddev);
return 0;
}
static void faulty_free(struct mddev *mddev, void *priv)
{
struct faulty_conf *conf = priv;
kfree(conf);
}
static struct md_personality faulty_personality =
{
.name = "faulty",
.level = LEVEL_FAULTY,
.owner = THIS_MODULE,
.make_request = faulty_make_request,
.run = faulty_run,
.free = faulty_free,
.status = faulty_status,
.check_reshape = faulty_reshape,
.size = faulty_size,
};
static int __init raid_init(void)
{
return register_md_personality(&faulty_personality);
}
static void raid_exit(void)
{
unregister_md_personality(&faulty_personality);
}
module_init(raid_init);
module_exit(raid_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Fault injection personality for MD (deprecated)");
MODULE_ALIAS("md-personality-10"); /* faulty */
MODULE_ALIAS("md-faulty");
MODULE_ALIAS("md-level--5");
// SPDX-License-Identifier: GPL-2.0-or-later
/*
linear.c : Multiple Devices driver for Linux
Copyright (C) 1994-96 Marc ZYNGIER
<zyngier@ufr-info-p7.ibp.fr> or
<maz@gloups.fdn.fr>
Linear mode management functions.
*/
#include <linux/blkdev.h>
#include <linux/raid/md_u.h>
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <trace/events/block.h>
#include "md.h"
#include "md-linear.h"
/*
* find which device holds a particular offset
*/
static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector)
{
int lo, mid, hi;
struct linear_conf *conf;
lo = 0;
hi = mddev->raid_disks - 1;
conf = mddev->private;
/*
* Binary Search
*/
while (hi > lo) {
mid = (hi + lo) / 2;
if (sector < conf->disks[mid].end_sector)
hi = mid;
else
lo = mid + 1;
}
return conf->disks + lo;
}
static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks)
{
struct linear_conf *conf;
sector_t array_sectors;
conf = mddev->private;
WARN_ONCE(sectors || raid_disks,
"%s does not support generic reshape\n", __func__);
array_sectors = conf->array_sectors;
return array_sectors;
}
static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
{
struct linear_conf *conf;
struct md_rdev *rdev;
int i, cnt;
conf = kzalloc(struct_size(conf, disks, raid_disks), GFP_KERNEL);
if (!conf)
return NULL;
/*
* conf->raid_disks is copy of mddev->raid_disks. The reason to
* keep a copy of mddev->raid_disks in struct linear_conf is,
* mddev->raid_disks may not be consistent with pointers number of
* conf->disks[] when it is updated in linear_add() and used to
* iterate old conf->disks[] earray in linear_congested().
* Here conf->raid_disks is always consitent with number of
* pointers in conf->disks[] array, and mddev->private is updated
* with rcu_assign_pointer() in linear_addr(), such race can be
* avoided.
*/
conf->raid_disks = raid_disks;
cnt = 0;
conf->array_sectors = 0;
rdev_for_each(rdev, mddev) {
int j = rdev->raid_disk;
struct dev_info *disk = conf->disks + j;
sector_t sectors;
if (j < 0 || j >= raid_disks || disk->rdev) {
pr_warn("md/linear:%s: disk numbering problem. Aborting!\n",
mdname(mddev));
goto out;
}
disk->rdev = rdev;
if (mddev->chunk_sectors) {
sectors = rdev->sectors;
sector_div(sectors, mddev->chunk_sectors);
rdev->sectors = sectors * mddev->chunk_sectors;
}
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
conf->array_sectors += rdev->sectors;
cnt++;
}
if (cnt != raid_disks) {
pr_warn("md/linear:%s: not enough drives present. Aborting!\n",
mdname(mddev));
goto out;
}
/*
* Here we calculate the device offsets.
*/
conf->disks[0].end_sector = conf->disks[0].rdev->sectors;
for (i = 1; i < raid_disks; i++)
conf->disks[i].end_sector =
conf->disks[i-1].end_sector +
conf->disks[i].rdev->sectors;
return conf;
out:
kfree(conf);
return NULL;
}
static int linear_run (struct mddev *mddev)
{
struct linear_conf *conf;
int ret;
if (md_check_no_bitmap(mddev))
return -EINVAL;
conf = linear_conf(mddev, mddev->raid_disks);
if (!conf)
return 1;
mddev->private = conf;
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
ret = md_integrity_register(mddev);
if (ret) {
kfree(conf);
mddev->private = NULL;
}
return ret;
}
static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
{
/* Adding a drive to a linear array allows the array to grow.
* It is permitted if the new drive has a matching superblock
* already on it, with raid_disk equal to raid_disks.
* It is achieved by creating a new linear_private_data structure
* and swapping it in in-place of the current one.
* The current one is never freed until the array is stopped.
* This avoids races.
*/
struct linear_conf *newconf, *oldconf;
if (rdev->saved_raid_disk != mddev->raid_disks)
return -EINVAL;
rdev->raid_disk = rdev->saved_raid_disk;
rdev->saved_raid_disk = -1;
newconf = linear_conf(mddev,mddev->raid_disks+1);
if (!newconf)
return -ENOMEM;
/* newconf->raid_disks already keeps a copy of * the increased
* value of mddev->raid_disks, WARN_ONCE() is just used to make
* sure of this. It is possible that oldconf is still referenced
* in linear_congested(), therefore kfree_rcu() is used to free
* oldconf until no one uses it anymore.
*/
oldconf = rcu_dereference_protected(mddev->private,
lockdep_is_held(&mddev->reconfig_mutex));
mddev->raid_disks++;
WARN_ONCE(mddev->raid_disks != newconf->raid_disks,
"copied raid_disks doesn't match mddev->raid_disks");
rcu_assign_pointer(mddev->private, newconf);
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
kfree_rcu(oldconf, rcu);
return 0;
}
static void linear_free(struct mddev *mddev, void *priv)
{
struct linear_conf *conf = priv;
kfree(conf);
}
static bool linear_make_request(struct mddev *mddev, struct bio *bio)
{
struct dev_info *tmp_dev;
sector_t start_sector, end_sector, data_offset;
sector_t bio_sector = bio->bi_iter.bi_sector;
if (unlikely(bio->bi_opf & REQ_PREFLUSH)
&& md_flush_request(mddev, bio))
return true;
tmp_dev = which_dev(mddev, bio_sector);
start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
end_sector = tmp_dev->end_sector;
data_offset = tmp_dev->rdev->data_offset;
if (unlikely(bio_sector >= end_sector ||
bio_sector < start_sector))
goto out_of_bounds;
if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
md_error(mddev, tmp_dev->rdev);
bio_io_error(bio);
return true;
}
if (unlikely(bio_end_sector(bio) > end_sector)) {
/* This bio crosses a device boundary, so we have to split it */
struct bio *split = bio_split(bio, end_sector - bio_sector,
GFP_NOIO, &mddev->bio_set);
bio_chain(split, bio);
submit_bio_noacct(bio);
bio = split;
}
md_account_bio(mddev, &bio);
bio_set_dev(bio, tmp_dev->rdev->bdev);
bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
start_sector + data_offset;
if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!bdev_max_discard_sectors(bio->bi_bdev))) {
/* Just ignore it */
bio_endio(bio);
} else {
if (mddev->gendisk)
trace_block_bio_remap(bio, disk_devt(mddev->gendisk),
bio_sector);
mddev_check_write_zeroes(mddev, bio);
submit_bio_noacct(bio);
}
return true;
out_of_bounds:
pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %pg: %llu sectors, offset %llu\n",
mdname(mddev),
(unsigned long long)bio->bi_iter.bi_sector,
tmp_dev->rdev->bdev,
(unsigned long long)tmp_dev->rdev->sectors,
(unsigned long long)start_sector);
bio_io_error(bio);
return true;
}
static void linear_status (struct seq_file *seq, struct mddev *mddev)
{
seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
}
static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
{
if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
char *md_name = mdname(mddev);
pr_crit("md/linear%s: Disk failure on %pg detected, failing array.\n",
md_name, rdev->bdev);
}
}
static void linear_quiesce(struct mddev *mddev, int state)
{
}
static struct md_personality linear_personality =
{
.name = "linear",
.level = LEVEL_LINEAR,
.owner = THIS_MODULE,
.make_request = linear_make_request,
.run = linear_run,
.free = linear_free,
.status = linear_status,
.hot_add_disk = linear_add,
.size = linear_size,
.quiesce = linear_quiesce,
.error_handler = linear_error,
};
static int __init linear_init (void)
{
return register_md_personality (&linear_personality);
}
static void linear_exit (void)
{
unregister_md_personality (&linear_personality);
}
module_init(linear_init);
module_exit(linear_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Linear device concatenation personality for MD (deprecated)");
MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/
MODULE_ALIAS("md-linear");
MODULE_ALIAS("md-level--1");
This diff is collapsed.
This diff is collapsed.
...@@ -173,3 +173,57 @@ static inline void raid1_prepare_flush_writes(struct bitmap *bitmap) ...@@ -173,3 +173,57 @@ static inline void raid1_prepare_flush_writes(struct bitmap *bitmap)
else else
md_bitmap_unplug(bitmap); md_bitmap_unplug(bitmap);
} }
/*
* Used by fix_read_error() to decay the per rdev read_errors.
* We halve the read error count for every hour that has elapsed
* since the last recorded read error.
*/
static inline void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
{
long cur_time_mon;
unsigned long hours_since_last;
unsigned int read_errors = atomic_read(&rdev->read_errors);
cur_time_mon = ktime_get_seconds();
if (rdev->last_read_error == 0) {
/* first time we've seen a read error */
rdev->last_read_error = cur_time_mon;
return;
}
hours_since_last = (long)(cur_time_mon -
rdev->last_read_error) / 3600;
rdev->last_read_error = cur_time_mon;
/*
* if hours_since_last is > the number of bits in read_errors
* just set read errors to 0. We do this to avoid
* overflowing the shift of read_errors by hours_since_last.
*/
if (hours_since_last >= 8 * sizeof(read_errors))
atomic_set(&rdev->read_errors, 0);
else
atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
}
static inline bool exceed_read_errors(struct mddev *mddev, struct md_rdev *rdev)
{
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
int read_errors;
check_decay_read_errors(mddev, rdev);
read_errors = atomic_inc_return(&rdev->read_errors);
if (read_errors > max_read_errors) {
pr_notice("md/"RAID_1_10_NAME":%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
mdname(mddev), rdev->bdev, read_errors, max_read_errors);
pr_notice("md/"RAID_1_10_NAME":%s: %pg: Failing raid device\n",
mdname(mddev), rdev->bdev);
md_error(mddev, rdev);
return true;
}
return false;
}
...@@ -49,6 +49,7 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr); ...@@ -49,6 +49,7 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
#define raid1_log(md, fmt, args...) \ #define raid1_log(md, fmt, args...) \
do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0) do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0)
#define RAID_1_10_NAME "raid1"
#include "raid1-10.c" #include "raid1-10.c"
#define START(node) ((node)->start) #define START(node) ((node)->start)
...@@ -1124,8 +1125,6 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio, ...@@ -1124,8 +1125,6 @@ static void alloc_behind_master_bio(struct r1bio *r1_bio,
behind_bio = bio_alloc_bioset(NULL, vcnt, 0, GFP_NOIO, behind_bio = bio_alloc_bioset(NULL, vcnt, 0, GFP_NOIO,
&r1_bio->mddev->bio_set); &r1_bio->mddev->bio_set);
if (!behind_bio)
return;
/* discard op, we don't support writezero/writesame yet */ /* discard op, we don't support writezero/writesame yet */
if (!bio_has_data(bio)) { if (!bio_has_data(bio)) {
...@@ -2257,16 +2256,24 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio) ...@@ -2257,16 +2256,24 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
* 3. Performs writes following reads for array synchronising. * 3. Performs writes following reads for array synchronising.
*/ */
static void fix_read_error(struct r1conf *conf, int read_disk, static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
sector_t sect, int sectors)
{ {
sector_t sect = r1_bio->sector;
int sectors = r1_bio->sectors;
int read_disk = r1_bio->read_disk;
struct mddev *mddev = conf->mddev; struct mddev *mddev = conf->mddev;
struct md_rdev *rdev = rcu_dereference(conf->mirrors[read_disk].rdev);
if (exceed_read_errors(mddev, rdev)) {
r1_bio->bios[r1_bio->read_disk] = IO_BLOCKED;
return;
}
while(sectors) { while(sectors) {
int s = sectors; int s = sectors;
int d = read_disk; int d = read_disk;
int success = 0; int success = 0;
int start; int start;
struct md_rdev *rdev;
if (s > (PAGE_SIZE>>9)) if (s > (PAGE_SIZE>>9))
s = PAGE_SIZE >> 9; s = PAGE_SIZE >> 9;
...@@ -2507,8 +2514,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) ...@@ -2507,8 +2514,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
if (mddev->ro == 0 if (mddev->ro == 0
&& !test_bit(FailFast, &rdev->flags)) { && !test_bit(FailFast, &rdev->flags)) {
freeze_array(conf, 1); freeze_array(conf, 1);
fix_read_error(conf, r1_bio->read_disk, fix_read_error(conf, r1_bio);
r1_bio->sector, r1_bio->sectors);
unfreeze_array(conf); unfreeze_array(conf);
} else if (mddev->ro == 0 && test_bit(FailFast, &rdev->flags)) { } else if (mddev->ro == 0 && test_bit(FailFast, &rdev->flags)) {
md_error(mddev, rdev); md_error(mddev, rdev);
......
...@@ -19,6 +19,8 @@ ...@@ -19,6 +19,8 @@
#include <linux/raid/md_p.h> #include <linux/raid/md_p.h>
#include <trace/events/block.h> #include <trace/events/block.h>
#include "md.h" #include "md.h"
#define RAID_1_10_NAME "raid10"
#include "raid10.h" #include "raid10.h"
#include "raid0.h" #include "raid0.h"
#include "md-bitmap.h" #include "md-bitmap.h"
...@@ -2592,42 +2594,6 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) ...@@ -2592,42 +2594,6 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
} }
} }
/*
* Used by fix_read_error() to decay the per rdev read_errors.
* We halve the read error count for every hour that has elapsed
* since the last recorded read error.
*
*/
static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
{
long cur_time_mon;
unsigned long hours_since_last;
unsigned int read_errors = atomic_read(&rdev->read_errors);
cur_time_mon = ktime_get_seconds();
if (rdev->last_read_error == 0) {
/* first time we've seen a read error */
rdev->last_read_error = cur_time_mon;
return;
}
hours_since_last = (long)(cur_time_mon -
rdev->last_read_error) / 3600;
rdev->last_read_error = cur_time_mon;
/*
* if hours_since_last is > the number of bits in read_errors
* just set read errors to 0. We do this to avoid
* overflowing the shift of read_errors by hours_since_last.
*/
if (hours_since_last >= 8 * sizeof(read_errors))
atomic_set(&rdev->read_errors, 0);
else
atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
}
static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector, static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
int sectors, struct page *page, enum req_op op) int sectors, struct page *page, enum req_op op)
{ {
...@@ -2665,7 +2631,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2665,7 +2631,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
int sect = 0; /* Offset from r10_bio->sector */ int sect = 0; /* Offset from r10_bio->sector */
int sectors = r10_bio->sectors, slot = r10_bio->read_slot; int sectors = r10_bio->sectors, slot = r10_bio->read_slot;
struct md_rdev *rdev; struct md_rdev *rdev;
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
int d = r10_bio->devs[slot].devnum; int d = r10_bio->devs[slot].devnum;
/* still own a reference to this rdev, so it cannot /* still own a reference to this rdev, so it cannot
...@@ -2678,15 +2643,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 ...@@ -2678,15 +2643,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
more fix_read_error() attempts */ more fix_read_error() attempts */
return; return;
check_decay_read_errors(mddev, rdev); if (exceed_read_errors(mddev, rdev)) {
atomic_inc(&rdev->read_errors);
if (atomic_read(&rdev->read_errors) > max_read_errors) {
pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n",
mdname(mddev), rdev->bdev,
atomic_read(&rdev->read_errors), max_read_errors);
pr_notice("md/raid10:%s: %pg: Failing raid device\n",
mdname(mddev), rdev->bdev);
md_error(mddev, rdev);
r10_bio->devs[slot].bio = IO_BLOCKED; r10_bio->devs[slot].bio = IO_BLOCKED;
return; return;
} }
......
...@@ -2,15 +2,11 @@ ...@@ -2,15 +2,11 @@
/* /*
md_p.h : physical layout of Linux RAID devices md_p.h : physical layout of Linux RAID devices
Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option) the Free Software Foundation; either version 2, or (at your option)
any later version. any later version.
You should have received a copy of the GNU General Public License
(for example /usr/src/linux/COPYING); if not, write to the Free
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/ */
#ifndef _MD_P_H #ifndef _MD_P_H
...@@ -237,7 +233,7 @@ struct mdp_superblock_1 { ...@@ -237,7 +233,7 @@ struct mdp_superblock_1 {
char set_name[32]; /* set and interpreted by user-space */ char set_name[32]; /* set and interpreted by user-space */
__le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/ __le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/
__le32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */ __le32 level; /* 0,1,4,5 */
__le32 layout; /* only for raid5 and raid10 currently */ __le32 layout; /* only for raid5 and raid10 currently */
__le64 size; /* used size of component devices, in 512byte sectors */ __le64 size; /* used size of component devices, in 512byte sectors */
......
...@@ -2,15 +2,11 @@ ...@@ -2,15 +2,11 @@
/* /*
md_u.h : user <=> kernel API between Linux raidtools and RAID drivers md_u.h : user <=> kernel API between Linux raidtools and RAID drivers
Copyright (C) 1998 Ingo Molnar Copyright (C) 1998 Ingo Molnar
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option) the Free Software Foundation; either version 2, or (at your option)
any later version. any later version.
You should have received a copy of the GNU General Public License
(for example /usr/src/linux/COPYING); if not, write to the Free
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/ */
#ifndef _UAPI_MD_U_H #ifndef _UAPI_MD_U_H
...@@ -107,11 +103,6 @@ typedef struct mdu_array_info_s { ...@@ -107,11 +103,6 @@ typedef struct mdu_array_info_s {
} mdu_array_info_t; } mdu_array_info_t;
/* non-obvious values for 'level' */
#define LEVEL_MULTIPATH (-4)
#define LEVEL_LINEAR (-1)
#define LEVEL_FAULTY (-5)
/* we need a value for 'no level specified' and 0 /* we need a value for 'no level specified' and 0
* means 'raid0', so we need something else. This is * means 'raid0', so we need something else. This is
* for internal use only * for internal use only
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment