Commit 223cdea4 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://neil.brown.name/md

* 'for-linus' of git://neil.brown.name/md: (53 commits)
  md/raid5 revise rules for when to update metadata during reshape
  md/raid5: minor code cleanups in make_request.
  md: remove CONFIG_MD_RAID_RESHAPE config option.
  md/raid5: be more careful about write ordering when reshaping.
  md: don't display meaningless values in sysfs files resync_start and sync_speed
  md/raid5: allow layout and chunksize to be changed on active array.
  md/raid5: reshape using largest of old and new chunk size
  md/raid5: prepare for allowing reshape to change layout
  md/raid5: prepare for allowing reshape to change chunksize.
  md/raid5: clearly differentiate 'before' and 'after' stripes during reshape.
  Documentation/md.txt update
  md: allow number of drives in raid5 to be reduced
  md/raid5: change reshape-progress measurement to cope with reshaping backwards.
  md: add explicit method to signal the end of a reshape.
  md/raid5: enhance raid5_size to work correctly with negative delta_disks
  md/raid5: drop qd_idx from r6_state
  md/raid6: move raid6 data processing to raid6_pq.ko
  md: raid5 run(): Fix max_degraded for raid level 4.
  md: 'array_size' sysfs attribute
  md: centralize ->array_sectors modifications
  ...
parents 31e6e2da c8f517c4
......@@ -164,15 +164,19 @@ All md devices contain:
raid_disks
a text file with a simple number indicating the number of devices
in a fully functional array. If this is not yet known, the file
will be empty. If an array is being resized (not currently
possible) this will contain the larger of the old and new sizes.
Some raid level (RAID1) allow this value to be set while the
array is active. This will reconfigure the array. Otherwise
it can only be set while assembling an array.
will be empty. If an array is being resized this will contain
the new number of devices.
Some raid levels allow this value to be set while the array is
active. This will reconfigure the array. Otherwise it can only
be set while assembling an array.
A change to this attribute will not be permitted if it would
reduce the size of the array. To reduce the number of drives
in an e.g. raid5, the array size must first be reduced by
setting the 'array_size' attribute.
chunk_size
This is the size if bytes for 'chunks' and is only relevant to
raid levels that involve striping (1,4,5,6,10). The address space
This is the size in bytes for 'chunks' and is only relevant to
raid levels that involve striping (0,4,5,6,10). The address space
of the array is conceptually divided into chunks and consecutive
chunks are striped onto neighbouring devices.
The size should be at least PAGE_SIZE (4k) and should be a power
......@@ -183,6 +187,20 @@ All md devices contain:
simply a number that is interpretted differently by different
levels. It can be written while assembling an array.
array_size
This can be used to artificially constrain the available space in
the array to be less than is actually available on the combined
devices. Writing a number (in Kilobytes) which is less than
the available size will set the size. Any reconfiguration of the
array (e.g. adding devices) will not cause the size to change.
Writing the word 'default' will cause the effective size of the
array to be whatever size is actually available based on
'level', 'chunk_size' and 'component_size'.
This can be used to reduce the size of the array before reducing
the number of devices in a raid4/5/6, or to support external
metadata formats which mandate such clipping.
reshape_position
This is either "none" or a sector number within the devices of
the array where "reshape" is up to. If this is set, the three
......@@ -207,6 +225,11 @@ All md devices contain:
about the array. It can be 0.90 (traditional format), 1.0, 1.1,
1.2 (newer format in varying locations) or "none" indicating that
the kernel isn't managing metadata at all.
Alternately it can be "external:" followed by a string which
is set by user-space. This indicates that metadata is managed
by a user-space program. Any device failure or other event that
requires a metadata update will cause array activity to be
suspended until the event is acknowledged.
resync_start
The point at which resync should start. If no resync is needed,
......
......@@ -18,8 +18,8 @@
#define BH_TRACE 0
#include <linux/module.h>
#include <linux/raid/md.h>
#include <linux/raid/xor.h>
#include <linux/jiffies.h>
#include <asm/xor.h>
/* The xor routines to use. */
......
......@@ -121,6 +121,7 @@ config MD_RAID10
config MD_RAID456
tristate "RAID-4/RAID-5/RAID-6 mode"
depends on BLK_DEV_MD
select MD_RAID6_PQ
select ASYNC_MEMCPY
select ASYNC_XOR
---help---
......@@ -151,34 +152,8 @@ config MD_RAID456
If unsure, say Y.
config MD_RAID5_RESHAPE
bool "Support adding drives to a raid-5 array"
depends on MD_RAID456
default y
---help---
A RAID-5 set can be expanded by adding extra drives. This
requires "restriping" the array which means (almost) every
block must be written to a different place.
This option allows such restriping to be done while the array
is online.
You will need mdadm version 2.4.1 or later to use this
feature safely. During the early stage of reshape there is
a critical section where live data is being over-written. A
crash during this time needs extra care for recovery. The
newer mdadm takes a copy of the data in the critical section
and will restore it, if necessary, after a crash.
The mdadm usage is e.g.
mdadm --grow /dev/md1 --raid-disks=6
to grow '/dev/md1' to having 6 disks.
Note: The array can only be expanded, not contracted.
There should be enough spares already present to make the new
array workable.
If unsure, say Y.
config MD_RAID6_PQ
tristate
config MD_MULTIPATH
tristate "Multipath I/O support"
......
......@@ -2,20 +2,21 @@
# Makefile for the kernel software RAID and LVM drivers.
#
dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
dm-mod-y += dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \
dm-ioctl.o dm-io.o dm-kcopyd.o dm-sysfs.o
dm-multipath-objs := dm-path-selector.o dm-mpath.o
dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-snap-transient.o \
dm-multipath-y += dm-path-selector.o dm-mpath.o
dm-snapshot-y += dm-snap.o dm-exception-store.o dm-snap-transient.o \
dm-snap-persistent.o
dm-mirror-objs := dm-raid1.o
md-mod-objs := md.o bitmap.o
raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \
dm-mirror-y += dm-raid1.o
md-mod-y += md.o bitmap.o
raid456-y += raid5.o
raid6_pq-y += raid6algos.o raid6recov.o raid6tables.o \
raid6int1.o raid6int2.o raid6int4.o \
raid6int8.o raid6int16.o raid6int32.o \
raid6altivec1.o raid6altivec2.o raid6altivec4.o \
raid6altivec8.o \
raid6mmx.o raid6sse1.o raid6sse2.o
hostprogs-y := mktables
hostprogs-y += mktables
# Note: link order is important. All raid personalities
# and must come before md.o, as they each initialise
......@@ -26,6 +27,7 @@ obj-$(CONFIG_MD_LINEAR) += linear.o
obj-$(CONFIG_MD_RAID0) += raid0.o
obj-$(CONFIG_MD_RAID1) += raid1.o
obj-$(CONFIG_MD_RAID10) += raid10.o
obj-$(CONFIG_MD_RAID6_PQ) += raid6_pq.o
obj-$(CONFIG_MD_RAID456) += raid456.o
obj-$(CONFIG_MD_MULTIPATH) += multipath.o
obj-$(CONFIG_MD_FAULTY) += faulty.o
......
......@@ -16,6 +16,7 @@
* wait if count gets too high, wake when it drops to half.
*/
#include <linux/blkdev.h>
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/slab.h>
......@@ -26,8 +27,8 @@
#include <linux/file.h>
#include <linux/mount.h>
#include <linux/buffer_head.h>
#include <linux/raid/md.h>
#include <linux/raid/bitmap.h>
#include "md.h"
#include "bitmap.h"
/* debug macros */
......@@ -111,9 +112,10 @@ static int bitmap_checkpage(struct bitmap *bitmap, unsigned long page, int creat
unsigned char *mappage;
if (page >= bitmap->pages) {
printk(KERN_ALERT
"%s: invalid bitmap page request: %lu (> %lu)\n",
bmname(bitmap), page, bitmap->pages-1);
/* This can happen if bitmap_start_sync goes beyond
* End-of-device while looking for a whole page.
* It is harmless.
*/
return -EINVAL;
}
......@@ -265,7 +267,6 @@ static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
list_for_each_continue_rcu(pos, &mddev->disks) {
rdev = list_entry(pos, mdk_rdev_t, same_set);
if (rdev->raid_disk >= 0 &&
test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags)) {
/* this is a usable devices */
atomic_inc(&rdev->nr_pending);
......@@ -297,7 +298,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
+ size/512 > 0)
/* bitmap runs in to metadata */
goto bad_alignment;
if (rdev->data_offset + mddev->size*2
if (rdev->data_offset + mddev->dev_sectors
> rdev->sb_start + bitmap->offset)
/* data runs in to bitmap */
goto bad_alignment;
......@@ -570,7 +571,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO ||
le32_to_cpu(sb->version) > BITMAP_MAJOR_HI)
reason = "unrecognized superblock version";
else if (chunksize < PAGE_SIZE)
else if (chunksize < 512)
reason = "bitmap chunksize too small";
else if ((1 << ffz(~chunksize)) != chunksize)
reason = "bitmap chunksize not a power of 2";
......@@ -1306,6 +1307,9 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n",
atomic_read(&bitmap->behind_writes), bitmap->max_write_behind);
}
if (bitmap->mddev->degraded)
/* Never clear bits or update events_cleared when degraded */
success = 0;
while (sectors) {
int blocks;
......@@ -1345,7 +1349,7 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
}
}
int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
static int __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
int degraded)
{
bitmap_counter_t *bmc;
......@@ -1374,6 +1378,29 @@ int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
return rv;
}
int bitmap_start_sync(struct bitmap *bitmap, sector_t offset, int *blocks,
int degraded)
{
/* bitmap_start_sync must always report on multiples of whole
* pages, otherwise resync (which is very PAGE_SIZE based) will
* get confused.
* So call __bitmap_start_sync repeatedly (if needed) until
* At least PAGE_SIZE>>9 blocks are covered.
* Return the 'or' of the result.
*/
int rv = 0;
int blocks1;
*blocks = 0;
while (*blocks < (PAGE_SIZE>>9)) {
rv |= __bitmap_start_sync(bitmap, offset,
&blocks1, degraded);
offset += blocks1;
*blocks += blocks1;
}
return rv;
}
void bitmap_end_sync(struct bitmap *bitmap, sector_t offset, int *blocks, int aborted)
{
bitmap_counter_t *bmc;
......@@ -1443,6 +1470,8 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector)
wait_event(bitmap->mddev->recovery_wait,
atomic_read(&bitmap->mddev->recovery_active) == 0);
bitmap->mddev->curr_resync_completed = bitmap->mddev->curr_resync;
set_bit(MD_CHANGE_CLEAN, &bitmap->mddev->flags);
sector &= ~((1ULL << CHUNK_BLOCK_SHIFT(bitmap)) - 1);
s = 0;
while (s < sector && s < bitmap->mddev->resync_max_sectors) {
......
......@@ -62,7 +62,10 @@
#define ModeShift 5
#define MaxFault 50
#include <linux/raid/md.h>
#include <linux/blkdev.h>
#include <linux/raid/md_u.h>
#include "md.h"
#include <linux/seq_file.h>
static void faulty_fail(struct bio *bio, int error)
......@@ -280,6 +283,17 @@ static int reconfig(mddev_t *mddev, int layout, int chunk_size)
return 0;
}
static sector_t faulty_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
WARN_ONCE(raid_disks,
"%s does not support generic reshape\n", __func__);
if (sectors == 0)
return mddev->dev_sectors;
return sectors;
}
static int run(mddev_t *mddev)
{
mdk_rdev_t *rdev;
......@@ -298,7 +312,7 @@ static int run(mddev_t *mddev)
list_for_each_entry(rdev, &mddev->disks, same_set)
conf->rdev = rdev;
mddev->array_sectors = mddev->size * 2;
md_set_array_sectors(mddev, faulty_size(mddev, 0, 0));
mddev->private = conf;
reconfig(mddev, mddev->layout, -1);
......@@ -325,6 +339,7 @@ static struct mdk_personality faulty_personality =
.stop = stop,
.status = status,
.reconfig = reconfig,
.size = faulty_size,
};
static int __init raid_init(void)
......
......@@ -16,7 +16,11 @@
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/raid/linear.h>
#include <linux/blkdev.h>
#include <linux/raid/md_u.h>
#include <linux/seq_file.h>
#include "md.h"
#include "linear.h"
/*
* find which device holds a particular offset
......@@ -97,6 +101,16 @@ static int linear_congested(void *data, int bits)
return ret;
}
static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
linear_conf_t *conf = mddev_to_conf(mddev);
WARN_ONCE(sectors || raid_disks,
"%s does not support generic reshape\n", __func__);
return conf->array_sectors;
}
static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
{
linear_conf_t *conf;
......@@ -135,8 +149,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
mddev->queue->max_sectors > (PAGE_SIZE>>9))
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
disk->num_sectors = rdev->size * 2;
conf->array_sectors += rdev->size * 2;
disk->num_sectors = rdev->sectors;
conf->array_sectors += rdev->sectors;
cnt++;
}
......@@ -249,7 +263,7 @@ static int linear_run (mddev_t *mddev)
if (!conf)
return 1;
mddev->private = conf;
mddev->array_sectors = conf->array_sectors;
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
mddev->queue->unplug_fn = linear_unplug;
......@@ -283,7 +297,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
newconf->prev = mddev_to_conf(mddev);
mddev->private = newconf;
mddev->raid_disks++;
mddev->array_sectors = newconf->array_sectors;
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
set_capacity(mddev->gendisk, mddev->array_sectors);
return 0;
}
......@@ -381,6 +395,7 @@ static struct mdk_personality linear_personality =
.stop = linear_stop,
.status = linear_status,
.hot_add_disk = linear_add,
.size = linear_size,
};
static int __init linear_init (void)
......
#ifndef _LINEAR_H
#define _LINEAR_H
#include <linux/raid/md.h>
struct dev_info {
mdk_rdev_t *rdev;
sector_t num_sectors;
......
This diff is collapsed.
......@@ -15,21 +15,8 @@
#ifndef _MD_K_H
#define _MD_K_H
/* and dm-bio-list.h is not under include/linux because.... ??? */
#include "../../../drivers/md/dm-bio-list.h"
#ifdef CONFIG_BLOCK
#define LEVEL_MULTIPATH (-4)
#define LEVEL_LINEAR (-1)
#define LEVEL_FAULTY (-5)
/* we need a value for 'no level specified' and 0
* means 'raid0', so we need something else. This is
* for internal use only
*/
#define LEVEL_NONE (-1000000)
#define MaxSector (~(sector_t)0)
typedef struct mddev_s mddev_t;
......@@ -49,9 +36,9 @@ struct mdk_rdev_s
{
struct list_head same_set; /* RAID devices within the same set */
sector_t size; /* Device size (in blocks) */
sector_t sectors; /* Device size (in 512bytes sectors) */
mddev_t *mddev; /* RAID array if running */
long last_events; /* IO event timestamp */
int last_events; /* IO event timestamp */
struct block_device *bdev; /* block device handle */
......@@ -132,6 +119,8 @@ struct mddev_s
#define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */
#define MD_CHANGE_PENDING 2 /* superblock update in progress */
int suspended;
atomic_t active_io;
int ro;
struct gendisk *gendisk;
......@@ -155,8 +144,11 @@ struct mddev_s
char clevel[16];
int raid_disks;
int max_disks;
sector_t size; /* used size of component devices */
sector_t dev_sectors; /* used size of
* component devices */
sector_t array_sectors; /* exported array size */
int external_size; /* size managed
* externally */
__u64 events;
char uuid[16];
......@@ -172,6 +164,13 @@ struct mddev_s
struct mdk_thread_s *thread; /* management thread */
struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */
sector_t curr_resync; /* last block scheduled */
/* As resync requests can complete out of order, we cannot easily track
* how much resync has been completed. So we occasionally pause until
* everything completes, then set curr_resync_completed to curr_resync.
* As such it may be well behind the real resync mark, but it is a value
* we are certain of.
*/
sector_t curr_resync_completed;
unsigned long resync_mark; /* a recent timestamp */
sector_t resync_mark_cnt;/* blocks written at resync_mark */
sector_t curr_mark_cnt; /* blocks scheduled now */
......@@ -315,8 +314,10 @@ struct mdk_personality
int (*spare_active) (mddev_t *mddev);
sector_t (*sync_request)(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster);
int (*resize) (mddev_t *mddev, sector_t sectors);
sector_t (*size) (mddev_t *mddev, sector_t sectors, int raid_disks);
int (*check_reshape) (mddev_t *mddev);
int (*start_reshape) (mddev_t *mddev);
void (*finish_reshape) (mddev_t *mddev);
int (*reconfig) (mddev_t *mddev, int layout, int chunk_size);
/* quiesce moves between quiescence states
* 0 - fully active
......@@ -324,6 +325,16 @@ struct mdk_personality
* others - reserved
*/
void (*quiesce) (mddev_t *mddev, int state);
/* takeover is used to transition an array from one
* personality to another. The new personality must be able
* to handle the data in the current layout.
* e.g. 2drive raid1 -> 2drive raid5
* ndrive raid5 -> degraded n+1drive raid6 with special layout
* If the takeover succeeds, a new 'private' structure is returned.
* This needs to be installed and then ->run used to activate the
* array.
*/
void *(*takeover) (mddev_t *mddev);
};
......@@ -400,3 +411,26 @@ static inline void safe_put_page(struct page *p)
#endif /* CONFIG_BLOCK */
#endif
extern int register_md_personality(struct mdk_personality *p);
extern int unregister_md_personality(struct mdk_personality *p);
extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev),
mddev_t *mddev, const char *name);
extern void md_unregister_thread(mdk_thread_t *thread);
extern void md_wakeup_thread(mdk_thread_t *thread);
extern void md_check_recovery(mddev_t *mddev);
extern void md_write_start(mddev_t *mddev, struct bio *bi);
extern void md_write_end(mddev_t *mddev);
extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
sector_t sector, int size, struct page *page);
extern void md_super_wait(mddev_t *mddev);
extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
struct page *page, int rw);
extern void md_do_sync(mddev_t *mddev);
extern void md_new_event(mddev_t *mddev);
extern int md_allow_write(mddev_t *mddev);
extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors);
......@@ -59,7 +59,7 @@ int main(int argc, char *argv[])
uint8_t v;
uint8_t exptbl[256], invtbl[256];
printf("#include \"raid6.h\"\n");
printf("#include <linux/raid/pq.h>\n");
/* Compute multiplication table */
printf("\nconst u8 __attribute__((aligned(256)))\n"
......@@ -76,6 +76,9 @@ int main(int argc, char *argv[])
printf("\t},\n");
}
printf("};\n");
printf("#ifdef __KERNEL__\n");
printf("EXPORT_SYMBOL(raid6_gfmul);\n");
printf("#endif\n");
/* Compute power-of-2 table (exponent) */
v = 1;
......@@ -92,6 +95,9 @@ int main(int argc, char *argv[])
}
}
printf("};\n");
printf("#ifdef __KERNEL__\n");
printf("EXPORT_SYMBOL(raid6_gfexp);\n");
printf("#endif\n");
/* Compute inverse table x^-1 == x^254 */
printf("\nconst u8 __attribute__((aligned(256)))\n"
......@@ -104,6 +110,9 @@ int main(int argc, char *argv[])
}
}
printf("};\n");
printf("#ifdef __KERNEL__\n");
printf("EXPORT_SYMBOL(raid6_gfinv);\n");
printf("#endif\n");
/* Compute inv(2^x + 1) (exponent-xor-inverse) table */
printf("\nconst u8 __attribute__((aligned(256)))\n"
......@@ -115,6 +124,9 @@ int main(int argc, char *argv[])
(j == 7) ? '\n' : ' ');
}
printf("};\n");
printf("#ifdef __KERNEL__\n");
printf("EXPORT_SYMBOL(raid6_gfexi);\n");
printf("#endif\n");
return 0;
}
......@@ -19,7 +19,11 @@
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/raid/multipath.h>
#include <linux/blkdev.h>
#include <linux/raid/md_u.h>
#include <linux/seq_file.h>
#include "md.h"
#include "multipath.h"
#define MAX_WORK_PER_DISK 128
......@@ -402,6 +406,14 @@ static void multipathd (mddev_t *mddev)
spin_unlock_irqrestore(&conf->device_lock, flags);
}
static sector_t multipath_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
WARN_ONCE(sectors || raid_disks,
"%s does not support generic reshape\n", __func__);
return mddev->dev_sectors;
}
static int multipath_run (mddev_t *mddev)
{
multipath_conf_t *conf;
......@@ -498,7 +510,7 @@ static int multipath_run (mddev_t *mddev)
/*
* Ok, everything is just fine now
*/
mddev->array_sectors = mddev->size * 2;
md_set_array_sectors(mddev, multipath_size(mddev, 0, 0));
mddev->queue->unplug_fn = multipath_unplug;
mddev->queue->backing_dev_info.congested_fn = multipath_congested;
......@@ -543,6 +555,7 @@ static struct mdk_personality multipath_personality =
.error_handler = multipath_error,
.hot_add_disk = multipath_add_disk,
.hot_remove_disk= multipath_remove_disk,
.size = multipath_size,
};
static int __init multipath_init (void)
......
#ifndef _MULTIPATH_H
#define _MULTIPATH_H
#include <linux/raid/md.h>
struct multipath_info {
mdk_rdev_t *rdev;
};
......
......@@ -18,7 +18,10 @@
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/raid/raid0.h>
#include <linux/blkdev.h>
#include <linux/seq_file.h>
#include "md.h"
#include "raid0.h"
static void raid0_unplug(struct request_queue *q)
{
......@@ -73,16 +76,15 @@ static int create_strip_zones (mddev_t *mddev)
list_for_each_entry(rdev2, &mddev->disks, same_set) {
printk(KERN_INFO "raid0: comparing %s(%llu)",
bdevname(rdev1->bdev,b),
(unsigned long long)rdev1->size);
(unsigned long long)rdev1->sectors);
printk(KERN_INFO " with %s(%llu)\n",
bdevname(rdev2->bdev,b),
(unsigned long long)rdev2->size);
(unsigned long long)rdev2->sectors);
if (rdev2 == rdev1) {
printk(KERN_INFO "raid0: END\n");
break;
}
if (rdev2->size == rdev1->size)
{
if (rdev2->sectors == rdev1->sectors) {
/*
* Not unique, don't count it as a new
* group
......@@ -145,7 +147,7 @@ static int create_strip_zones (mddev_t *mddev)
mddev->queue->max_sectors > (PAGE_SIZE>>9))
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
if (!smallest || (rdev1->size <smallest->size))
if (!smallest || (rdev1->sectors < smallest->sectors))
smallest = rdev1;
cnt++;
}
......@@ -155,10 +157,10 @@ static int create_strip_zones (mddev_t *mddev)
goto abort;
}
zone->nb_dev = cnt;
zone->sectors = smallest->size * cnt * 2;
zone->sectors = smallest->sectors * cnt;
zone->zone_start = 0;
current_start = smallest->size * 2;
current_start = smallest->sectors;
curr_zone_start = zone->sectors;
/* now do the other zones */
......@@ -177,29 +179,29 @@ static int create_strip_zones (mddev_t *mddev)
rdev = conf->strip_zone[0].dev[j];
printk(KERN_INFO "raid0: checking %s ...",
bdevname(rdev->bdev, b));
if (rdev->size > current_start / 2) {
printk(KERN_INFO " contained as device %d\n",
c);
if (rdev->sectors <= current_start) {
printk(KERN_INFO " nope.\n");
continue;
}
printk(KERN_INFO " contained as device %d\n", c);
zone->dev[c] = rdev;
c++;
if (!smallest || (rdev->size <smallest->size)) {
if (!smallest || rdev->sectors < smallest->sectors) {
smallest = rdev;
printk(KERN_INFO " (%llu) is smallest!.\n",
(unsigned long long)rdev->size);
(unsigned long long)rdev->sectors);
}
} else
printk(KERN_INFO " nope.\n");
}
zone->nb_dev = c;
zone->sectors = (smallest->size * 2 - current_start) * c;
zone->sectors = (smallest->sectors - current_start) * c;
printk(KERN_INFO "raid0: zone->nb_dev: %d, sectors: %llu\n",
zone->nb_dev, (unsigned long long)zone->sectors);
zone->zone_start = curr_zone_start;
curr_zone_start += zone->sectors;
current_start = smallest->size * 2;
current_start = smallest->sectors;
printk(KERN_INFO "raid0: current zone start: %llu\n",
(unsigned long long)current_start);
}
......@@ -261,12 +263,25 @@ static int raid0_mergeable_bvec(struct request_queue *q,
return max;
}
static sector_t raid0_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
sector_t array_sectors = 0;
mdk_rdev_t *rdev;
WARN_ONCE(sectors || raid_disks,
"%s does not support generic reshape\n", __func__);
list_for_each_entry(rdev, &mddev->disks, same_set)
array_sectors += rdev->sectors;
return array_sectors;
}
static int raid0_run (mddev_t *mddev)
{
unsigned cur=0, i=0, nb_zone;
s64 sectors;
raid0_conf_t *conf;
mdk_rdev_t *rdev;
if (mddev->chunk_size == 0) {
printk(KERN_ERR "md/raid0: non-zero chunk size required.\n");
......@@ -291,16 +306,14 @@ static int raid0_run (mddev_t *mddev)
goto out_free_conf;
/* calculate array device size */
mddev->array_sectors = 0;
list_for_each_entry(rdev, &mddev->disks, same_set)
mddev->array_sectors += rdev->size * 2;
md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
printk(KERN_INFO "raid0 : md_size is %llu sectors.\n",
(unsigned long long)mddev->array_sectors);
printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n",
(unsigned long long)conf->spacing);
{
sector_t s = mddev->array_sectors;
sector_t s = raid0_size(mddev, 0, 0);
sector_t space = conf->spacing;
int round;
conf->sector_shift = 0;
......@@ -509,6 +522,7 @@ static struct mdk_personality raid0_personality=
.run = raid0_run,
.stop = raid0_stop,
.status = raid0_status,
.size = raid0_size,
};
static int __init raid0_init (void)
......
#ifndef _RAID0_H
#define _RAID0_H
#include <linux/raid/md.h>
struct strip_zone
{
sector_t zone_start; /* Zone offset in md_dev (in sectors) */
......
......@@ -31,10 +31,13 @@
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "dm-bio-list.h"
#include <linux/delay.h>
#include <linux/raid/raid1.h>
#include <linux/raid/bitmap.h>
#include <linux/blkdev.h>
#include <linux/seq_file.h>
#include "md.h"
#include "dm-bio-list.h"
#include "raid1.h"
#include "bitmap.h"
#define DEBUG 0
#if DEBUG
......@@ -1723,7 +1726,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
return 0;
}
max_sector = mddev->size << 1;
max_sector = mddev->dev_sectors;
if (sector_nr >= max_sector) {
/* If we aborted, we need to abort the
* sync on the 'current' bitmap chunk (there will
......@@ -1919,6 +1922,14 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
return nr_sectors;
}
static sector_t raid1_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
if (sectors)
return sectors;
return mddev->dev_sectors;
}
static int run(mddev_t *mddev)
{
conf_t *conf;
......@@ -2048,7 +2059,7 @@ static int run(mddev_t *mddev)
/*
* Ok, everything is just fine now
*/
mddev->array_sectors = mddev->size * 2;
md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
mddev->queue->unplug_fn = raid1_unplug;
mddev->queue->backing_dev_info.congested_fn = raid1_congested;
......@@ -2089,6 +2100,9 @@ static int stop(mddev_t *mddev)
/* need to kick something here to make sure I/O goes? */
}
raise_barrier(conf);
lower_barrier(conf);
md_unregister_thread(mddev->thread);
mddev->thread = NULL;
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
......@@ -2110,15 +2124,17 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
* any io in the removed space completes, but it hardly seems
* worth it.
*/
mddev->array_sectors = sectors;
md_set_array_sectors(mddev, raid1_size(mddev, sectors, 0));
if (mddev->array_sectors > raid1_size(mddev, sectors, 0))
return -EINVAL;
set_capacity(mddev->gendisk, mddev->array_sectors);
mddev->changed = 1;
if (mddev->array_sectors / 2 > mddev->size &&
if (sectors > mddev->dev_sectors &&
mddev->recovery_cp == MaxSector) {
mddev->recovery_cp = mddev->size << 1;
mddev->recovery_cp = mddev->dev_sectors;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
}
mddev->size = mddev->array_sectors / 2;
mddev->dev_sectors = sectors;
mddev->resync_max_sectors = sectors;
return 0;
}
......@@ -2264,6 +2280,7 @@ static struct mdk_personality raid1_personality =
.spare_active = raid1_spare_active,
.sync_request = sync_request,
.resize = raid1_resize,
.size = raid1_size,
.check_reshape = raid1_reshape,
.quiesce = raid1_quiesce,
};
......
#ifndef _RAID1_H
#define _RAID1_H
#include <linux/raid/md.h>
typedef struct mirror_info mirror_info_t;
struct mirror_info {
......
......@@ -18,10 +18,13 @@
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include "dm-bio-list.h"
#include <linux/delay.h>
#include <linux/raid/raid10.h>
#include <linux/raid/bitmap.h>
#include <linux/blkdev.h>
#include <linux/seq_file.h>
#include "md.h"
#include "dm-bio-list.h"
#include "raid10.h"
#include "bitmap.h"
/*
* RAID10 provides a combination of RAID0 and RAID1 functionality.
......@@ -1695,7 +1698,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
return 0;
skipped:
max_sector = mddev->size << 1;
max_sector = mddev->dev_sectors;
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
max_sector = mddev->resync_max_sectors;
if (sector_nr >= max_sector) {
......@@ -2020,6 +2023,25 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
goto skipped;
}
static sector_t
raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks)
{
sector_t size;
conf_t *conf = mddev_to_conf(mddev);
if (!raid_disks)
raid_disks = mddev->raid_disks;
if (!sectors)
sectors = mddev->dev_sectors;
size = sectors >> conf->chunk_shift;
sector_div(size, conf->far_copies);
size = size * raid_disks;
sector_div(size, conf->near_copies);
return size << conf->chunk_shift;
}
static int run(mddev_t *mddev)
{
conf_t *conf;
......@@ -2076,7 +2098,7 @@ static int run(mddev_t *mddev)
conf->far_offset = fo;
conf->chunk_mask = (sector_t)(mddev->chunk_size>>9)-1;
conf->chunk_shift = ffz(~mddev->chunk_size) - 9;
size = mddev->size >> (conf->chunk_shift-1);
size = mddev->dev_sectors >> conf->chunk_shift;
sector_div(size, fc);
size = size * conf->raid_disks;
sector_div(size, nc);
......@@ -2089,7 +2111,7 @@ static int run(mddev_t *mddev)
*/
stride += conf->raid_disks - 1;
sector_div(stride, conf->raid_disks);
mddev->size = stride << (conf->chunk_shift-1);
mddev->dev_sectors = stride << conf->chunk_shift;
if (fo)
stride = 1;
......@@ -2171,8 +2193,8 @@ static int run(mddev_t *mddev)
/*
* Ok, everything is just fine now
*/
mddev->array_sectors = size << conf->chunk_shift;
mddev->resync_max_sectors = size << conf->chunk_shift;
md_set_array_sectors(mddev, raid10_size(mddev, 0, 0));
mddev->resync_max_sectors = raid10_size(mddev, 0, 0);
mddev->queue->unplug_fn = raid10_unplug;
mddev->queue->backing_dev_info.congested_fn = raid10_congested;
......@@ -2208,6 +2230,9 @@ static int stop(mddev_t *mddev)
{
conf_t *conf = mddev_to_conf(mddev);
raise_barrier(conf, 0);
lower_barrier(conf);
md_unregister_thread(mddev->thread);
mddev->thread = NULL;
blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
......@@ -2255,6 +2280,7 @@ static struct mdk_personality raid10_personality =
.spare_active = raid10_spare_active,
.sync_request = sync_request,
.quiesce = raid10_quiesce,
.size = raid10_size,
};
static int __init raid_init(void)
......
#ifndef _RAID10_H
#define _RAID10_H
#include <linux/raid/md.h>
typedef struct mirror_info mirror_info_t;
struct mirror_info {
......
This diff is collapsed.
#ifndef _RAID5_H
#define _RAID5_H
#include <linux/raid/md.h>
#include <linux/raid/xor.h>
/*
......@@ -199,8 +198,12 @@ struct stripe_head {
struct hlist_node hash;
struct list_head lru; /* inactive_list or handle_list */
struct raid5_private_data *raid_conf;
short generation; /* increments with every
* reshape */
sector_t sector; /* sector of this row */
int pd_idx; /* parity disk index */
short pd_idx; /* parity disk index */
short qd_idx; /* 'Q' disk index for raid6 */
short ddf_layout;/* use DDF ordering to calculate Q */
unsigned long state; /* state flags */
atomic_t count; /* nr of active thread/requests */
spinlock_t lock;
......@@ -238,7 +241,7 @@ struct stripe_head_state {
/* r6_state - extra state data only relevant to r6 */
struct r6_state {
int p_failed, q_failed, qd_idx, failed_num[2];
int p_failed, q_failed, failed_num[2];
};
/* Flags */
......@@ -268,6 +271,8 @@ struct r6_state {
#define READ_MODIFY_WRITE 2
/* not a write method, but a compute_parity mode */
#define CHECK_PARITY 3
/* Additional compute_parity mode -- updates the parity w/o LOCKING */
#define UPDATE_PARITY 4
/*
* Stripe state
......@@ -334,12 +339,21 @@ struct raid5_private_data {
int raid_disks;
int max_nr_stripes;
/* used during an expand */
sector_t expand_progress; /* MaxSector when no expand happening */
sector_t expand_lo; /* from here up to expand_progress it out-of-bounds
* as we haven't flushed the metadata yet
/* reshape_progress is the leading edge of a 'reshape'
* It has value MaxSector when no reshape is happening
* If delta_disks < 0, it is the last sector we started work on,
* else is it the next sector to work on.
*/
sector_t reshape_progress;
/* reshape_safe is the trailing edge of a reshape. We know that
* before (or after) this address, all reshape has completed.
*/
sector_t reshape_safe;
int previous_raid_disks;
int prev_chunk, prev_algo;
short generation; /* increments with every reshape */
unsigned long reshape_checkpoint; /* Time we last updated
* metadata */
struct list_head handle_list; /* stripes needing handling */
struct list_head hold_list; /* preread ready stripes */
......@@ -385,6 +399,11 @@ struct raid5_private_data {
int pool_size; /* number of disks in stripeheads in pool */
spinlock_t device_lock;
struct disk_info *disks;
/* When taking over an array from a different personality, we store
* the new thread here until we fully activate the array.
*/
struct mdk_thread_s *thread;
};
typedef struct raid5_private_data raid5_conf_t;
......@@ -394,9 +413,62 @@ typedef struct raid5_private_data raid5_conf_t;
/*
* Our supported algorithms
*/
#define ALGORITHM_LEFT_ASYMMETRIC 0
#define ALGORITHM_RIGHT_ASYMMETRIC 1
#define ALGORITHM_LEFT_SYMMETRIC 2
#define ALGORITHM_RIGHT_SYMMETRIC 3
#define ALGORITHM_LEFT_ASYMMETRIC 0 /* Rotating Parity N with Data Restart */
#define ALGORITHM_RIGHT_ASYMMETRIC 1 /* Rotating Parity 0 with Data Restart */
#define ALGORITHM_LEFT_SYMMETRIC 2 /* Rotating Parity N with Data Continuation */
#define ALGORITHM_RIGHT_SYMMETRIC 3 /* Rotating Parity 0 with Data Continuation */
/* Define non-rotating (raid4) algorithms. These allow
* conversion of raid4 to raid5.
*/
#define ALGORITHM_PARITY_0 4 /* P or P,Q are initial devices */
#define ALGORITHM_PARITY_N 5 /* P or P,Q are final devices. */
/* DDF RAID6 layouts differ from md/raid6 layouts in two ways.
* Firstly, the exact positioning of the parity block is slightly
* different between the 'LEFT_*' modes of md and the "_N_*" modes
* of DDF.
* Secondly, or order of datablocks over which the Q syndrome is computed
* is different.
* Consequently we have different layouts for DDF/raid6 than md/raid6.
* These layouts are from the DDFv1.2 spec.
* Interestingly DDFv1.2-Errata-A does not specify N_CONTINUE but
* leaves RLQ=3 as 'Vendor Specific'
*/
#define ALGORITHM_ROTATING_ZERO_RESTART 8 /* DDF PRL=6 RLQ=1 */
#define ALGORITHM_ROTATING_N_RESTART 9 /* DDF PRL=6 RLQ=2 */
#define ALGORITHM_ROTATING_N_CONTINUE 10 /*DDF PRL=6 RLQ=3 */
/* For every RAID5 algorithm we define a RAID6 algorithm
* with exactly the same layout for data and parity, and
* with the Q block always on the last device (N-1).
* This allows trivial conversion from RAID5 to RAID6
*/
#define ALGORITHM_LEFT_ASYMMETRIC_6 16
#define ALGORITHM_RIGHT_ASYMMETRIC_6 17
#define ALGORITHM_LEFT_SYMMETRIC_6 18
#define ALGORITHM_RIGHT_SYMMETRIC_6 19
#define ALGORITHM_PARITY_0_6 20
#define ALGORITHM_PARITY_N_6 ALGORITHM_PARITY_N
static inline int algorithm_valid_raid5(int layout)
{
return (layout >= 0) &&
(layout <= 5);
}
static inline int algorithm_valid_raid6(int layout)
{
return (layout >= 0 && layout <= 5)
||
(layout == 8 || layout == 10)
||
(layout >= 16 && layout <= 20);
}
static inline int algorithm_is_DDF(int layout)
{
return layout >= 8 && layout <= 10;
}
#endif
......@@ -5,7 +5,7 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
* Bostom MA 02111-1307, USA; either version 2 of the License, or
* Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
......@@ -16,13 +16,20 @@
* Algorithm list and algorithm selection for RAID-6
*/
#include "raid6.h"
#include <linux/raid/pq.h>
#ifndef __KERNEL__
#include <sys/mman.h>
#include <stdio.h>
#else
#if !RAID6_USE_EMPTY_ZERO_PAGE
/* In .bss so it's zeroed */
const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
EXPORT_SYMBOL(raid6_empty_zero_page);
#endif
#endif
struct raid6_calls raid6_call;
EXPORT_SYMBOL_GPL(raid6_call);
/* Various routine sets */
extern const struct raid6_calls raid6_intx1;
......@@ -79,6 +86,7 @@ const struct raid6_calls * const raid6_algos[] = {
#else
/* Need more time to be stable in userspace */
#define RAID6_TIME_JIFFIES_LG2 9
#define time_before(x, y) ((x) < (y))
#endif
/* Try to pick the best algorithm */
......@@ -152,3 +160,12 @@ int __init raid6_select_algo(void)
return best ? 0 : -EINVAL;
}
static void raid6_exit(void)
{
do { } while (0);
}
subsys_initcall(raid6_select_algo);
module_exit(raid6_exit);
MODULE_LICENSE("GPL");
......@@ -5,7 +5,7 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
* Bostom MA 02111-1307, USA; either version 2 of the License, or
* Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
......@@ -22,7 +22,7 @@
* bracked this with preempt_disable/enable or in a lock)
*/
#include "raid6.h"
#include <linux/raid/pq.h>
#ifdef CONFIG_ALTIVEC
......
......@@ -5,7 +5,7 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
* Bostom MA 02111-1307, USA; either version 2 of the License, or
* Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
......@@ -18,7 +18,7 @@
* This file is postprocessed using unroll.pl
*/
#include "raid6.h"
#include <linux/raid/pq.h>
/*
* This is the C data type to use
......
......@@ -5,7 +5,7 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
* Bostom MA 02111-1307, USA; either version 2 of the License, or
* Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
......@@ -18,7 +18,7 @@
#if defined(__i386__) && !defined(__arch_um__)
#include "raid6.h"
#include <linux/raid/pq.h>
#include "raid6x86.h"
/* Shared with raid6sse1.c */
......
......@@ -5,7 +5,7 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
* Bostom MA 02111-1307, USA; either version 2 of the License, or
* Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
......@@ -18,7 +18,7 @@
* the syndrome.)
*/
#include "raid6.h"
#include <linux/raid/pq.h>
/* Recover two failed data blocks. */
void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
......@@ -63,9 +63,7 @@ void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
p++; q++;
}
}
EXPORT_SYMBOL_GPL(raid6_2data_recov);
/* Recover failure of one data block plus the P block */
void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
......@@ -97,9 +95,10 @@ void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs)
q++; dq++;
}
}
EXPORT_SYMBOL_GPL(raid6_datap_recov);
#ifndef __KERNEL__ /* Testing only */
#ifndef __KERNEL__
/* Testing only */
/* Recover two failed blocks. */
void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs)
......
......@@ -5,7 +5,7 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
* Bostom MA 02111-1307, USA; either version 2 of the License, or
* Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
......@@ -23,7 +23,7 @@
#if defined(__i386__) && !defined(__arch_um__)
#include "raid6.h"
#include <linux/raid/pq.h>
#include "raid6x86.h"
/* Defined in raid6mmx.c */
......
......@@ -5,7 +5,7 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
* Bostom MA 02111-1307, USA; either version 2 of the License, or
* Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
......@@ -19,7 +19,7 @@
#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__)
#include "raid6.h"
#include <linux/raid/pq.h>
#include "raid6x86.h"
static const struct raid6_sse_constants {
......
......@@ -5,7 +5,7 @@
CC = gcc
OPTFLAGS = -O2 # Adjust as desired
CFLAGS = -I.. -g $(OPTFLAGS)
CFLAGS = -I.. -I ../../../include -g $(OPTFLAGS)
LD = ld
PERL = perl
AR = ar
......
......@@ -17,7 +17,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "raid6.h"
#include <linux/raid/pq.h>
#define NDISKS 16 /* Including P and Q */
......
......@@ -5,7 +5,7 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
* Bostom MA 02111-1307, USA; either version 2 of the License, or
* Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
......
......@@ -23,7 +23,7 @@
#include <linux/if.h>
#include <linux/if_bridge.h>
#include <linux/slab.h>
#include <linux/raid/md.h>
#include <linux/raid/md_u.h>
#include <linux/kd.h>
#include <linux/route.h>
#include <linux/in6.h>
......
/*
md.h : Multiple Devices driver for Linux
Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman
Copyright (C) 1994-96 Marc ZYNGIER
<zyngier@ufr-info-p7.ibp.fr> or
<maz@gloups.fdn.fr>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
You should have received a copy of the GNU General Public License
(for example /usr/src/linux/COPYING); if not, write to the Free
Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef _MD_H
#define _MD_H
#include <linux/blkdev.h>
#include <linux/seq_file.h>
/*
* 'md_p.h' holds the 'physical' layout of RAID devices
* 'md_u.h' holds the user <=> kernel API
*
* 'md_k.h' holds kernel internal definitions
*/
#include <linux/raid/md_p.h>
#include <linux/raid/md_u.h>
#include <linux/raid/md_k.h>
#ifdef CONFIG_MD
/*
* Different major versions are not compatible.
* Different minor versions are only downward compatible.
* Different patchlevel versions are downward and upward compatible.
*/
#define MD_MAJOR_VERSION 0
#define MD_MINOR_VERSION 90
/*
* MD_PATCHLEVEL_VERSION indicates kernel functionality.
* >=1 means different superblock formats are selectable using SET_ARRAY_INFO
* and major_version/minor_version accordingly
* >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT
* in the super status byte
* >=3 means that bitmap superblock version 4 is supported, which uses
* little-ending representation rather than host-endian
*/
#define MD_PATCHLEVEL_VERSION 3
extern int mdp_major;
extern int register_md_personality(struct mdk_personality *p);
extern int unregister_md_personality(struct mdk_personality *p);
extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev),
mddev_t *mddev, const char *name);
extern void md_unregister_thread(mdk_thread_t *thread);
extern void md_wakeup_thread(mdk_thread_t *thread);
extern void md_check_recovery(mddev_t *mddev);
extern void md_write_start(mddev_t *mddev, struct bio *bi);
extern void md_write_end(mddev_t *mddev);
extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
extern void md_error(mddev_t *mddev, mdk_rdev_t *rdev);
extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
sector_t sector, int size, struct page *page);
extern void md_super_wait(mddev_t *mddev);
extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
struct page *page, int rw);
extern void md_do_sync(mddev_t *mddev);
extern void md_new_event(mddev_t *mddev);
extern int md_allow_write(mddev_t *mddev);
extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
#endif /* CONFIG_MD */
#endif
......@@ -15,6 +15,24 @@
#ifndef _MD_U_H
#define _MD_U_H
/*
* Different major versions are not compatible.
* Different minor versions are only downward compatible.
* Different patchlevel versions are downward and upward compatible.
*/
#define MD_MAJOR_VERSION 0
#define MD_MINOR_VERSION 90
/*
* MD_PATCHLEVEL_VERSION indicates kernel functionality.
* >=1 means different superblock formats are selectable using SET_ARRAY_INFO
* and major_version/minor_version accordingly
* >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT
* in the super status byte
* >=3 means that bitmap superblock version 4 is supported, which uses
* little-ending representation rather than host-endian
*/
#define MD_PATCHLEVEL_VERSION 3
/* ioctls */
/* status */
......@@ -46,6 +64,12 @@
#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33)
#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34)
/* 63 partitions with the alternate major number (mdp) */
#define MdpMinorShift 6
#ifdef __KERNEL__
extern int mdp_major;
#endif
typedef struct mdu_version_s {
int major;
int minor;
......@@ -85,6 +109,17 @@ typedef struct mdu_array_info_s {
} mdu_array_info_t;
/* non-obvious values for 'level' */
#define LEVEL_MULTIPATH (-4)
#define LEVEL_LINEAR (-1)
#define LEVEL_FAULTY (-5)
/* we need a value for 'no level specified' and 0
* means 'raid0', so we need something else. This is
* for internal use only
*/
#define LEVEL_NONE (-1000000)
typedef struct mdu_disk_info_s {
/*
* configuration/status of one particular disk
......
......@@ -5,7 +5,7 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, Inc., 53 Temple Place Ste 330,
* Bostom MA 02111-1307, USA; either version 2 of the License, or
* Boston MA 02111-1307, USA; either version 2 of the License, or
* (at your option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
......@@ -17,14 +17,7 @@
/* Set to 1 to use kernel-wide empty_zero_page */
#define RAID6_USE_EMPTY_ZERO_PAGE 0
#include <linux/raid/md.h>
#include <linux/raid/raid5.h>
typedef raid5_conf_t raid6_conf_t; /* Same configuration */
/* Additional compute_parity mode -- updates the parity w/o LOCKING */
#define UPDATE_PARITY 4
#include <linux/blkdev.h>
/* We need a pre-zeroed page... if we don't want to use the kernel-provided
one define it here */
......@@ -68,6 +61,10 @@ extern const char raid6_empty_zero_page[PAGE_SIZE];
#define enable_kernel_altivec()
#define disable_kernel_altivec()
#define EXPORT_SYMBOL(sym)
#define MODULE_LICENSE(licence)
#define subsys_initcall(x)
#define module_exit(x)
#endif /* __KERNEL__ */
/* Routine choices */
......@@ -98,9 +95,11 @@ extern const u8 raid6_gfinv[256] __attribute__((aligned(256)));
extern const u8 raid6_gfexi[256] __attribute__((aligned(256)));
/* Recovery routines */
void raid6_2data_recov(int disks, size_t bytes, int faila, int failb, void **ptrs);
void raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
void **ptrs);
void raid6_datap_recov(int disks, size_t bytes, int faila, void **ptrs);
void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs);
void raid6_dual_recov(int disks, size_t bytes, int faila, int failb,
void **ptrs);
/* Some definitions to allow code to be compiled for testing in userspace */
#ifndef __KERNEL__
......@@ -108,8 +107,11 @@ void raid6_dual_recov(int disks, size_t bytes, int faila, int failb, void **ptrs
# define jiffies raid6_jiffies()
# define printk printf
# define GFP_KERNEL 0
# define __get_free_pages(x,y) ((unsigned long)mmap(NULL, PAGE_SIZE << (y), PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0))
# define free_pages(x,y) munmap((void *)(x), (y)*PAGE_SIZE)
# define __get_free_pages(x, y) ((unsigned long)mmap(NULL, PAGE_SIZE << (y), \
PROT_READ|PROT_WRITE, \
MAP_PRIVATE|MAP_ANONYMOUS,\
0, 0))
# define free_pages(x, y) munmap((void *)(x), (y)*PAGE_SIZE)
static inline void cpu_relax(void)
{
......
#ifndef _XOR_H
#define _XOR_H
#include <linux/raid/md.h>
#define MAX_XOR_BLOCKS 4
extern void xor_blocks(unsigned int count, unsigned int bytes,
......
#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/init.h>
#include <linux/syscalls.h>
#include <linux/unistd.h>
......
#include <linux/delay.h>
#include <linux/raid/md.h>
#include <linux/raid/md_u.h>
#include <linux/raid/md_p.h>
#include "do_mounts.h"
......@@ -112,8 +113,6 @@ static int __init md_setup(char *str)
return 1;
}
#define MdpMinorShift 6
static void __init md_setup_drive(void)
{
int minor, i, ent, partitioned;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment