Commit 63f33b8d authored by Jonathan Brassow's avatar Jonathan Brassow Committed by NeilBrown

DM RAID: Add support for MD RAID10

Support the MD RAID10 personality through dm-raid.c
Signed-off-by: default avatarJonathan Brassow <jbrassow@redhat.com>
Signed-off-by: default avatarNeilBrown <neilb@suse.de>
parent bb181e2e
...@@ -27,6 +27,10 @@ The target is named "raid" and it accepts the following parameters: ...@@ -27,6 +27,10 @@ The target is named "raid" and it accepts the following parameters:
- rotating parity N (right-to-left) with data restart - rotating parity N (right-to-left) with data restart
raid6_nc RAID6 N continue raid6_nc RAID6 N continue
- rotating parity N (right-to-left) with data continuation - rotating parity N (right-to-left) with data continuation
raid10 Various RAID10 inspired algorithms chosen by additional params
- RAID10: Striped Mirrors (aka 'Striping on top of mirrors')
- RAID1E: Integrated Adjacent Stripe Mirroring
- and other similar RAID10 variants
Reference: Chapter 4 of Reference: Chapter 4 of
http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf
...@@ -59,6 +63,28 @@ The target is named "raid" and it accepts the following parameters: ...@@ -59,6 +63,28 @@ The target is named "raid" and it accepts the following parameters:
logical size of the array. The bitmap records the device logical size of the array. The bitmap records the device
synchronisation state for each region. synchronisation state for each region.
[raid10_copies <# copies>]
[raid10_format near]
These two options are used to alter the default layout of
a RAID10 configuration. The number of copies is can be
specified, but the default is 2. There are other variations
to how the copies are laid down - the default and only current
option is "near". Near copies are what most people think of
with respect to mirroring. If these options are left
unspecified, or 'raid10_copies 2' and/or 'raid10_format near'
are given, then the layouts for 2, 3 and 4 devices are:
2 drives 3 drives 4 drives
-------- ---------- --------------
A1 A1 A1 A1 A2 A1 A1 A2 A2
A2 A2 A2 A3 A3 A3 A3 A4 A4
A3 A3 A4 A4 A5 A5 A5 A6 A6
A4 A4 A5 A6 A6 A7 A7 A8 A8
.. .. .. .. .. .. .. .. ..
The 2-device layout is equivalent 2-way RAID1. The 4-device
layout is what a traditional RAID10 would look like. The
3-device layout is what might be called a 'RAID1E - Integrated
Adjacent Stripe Mirroring'.
<#raid_devs>: The number of devices composing the array. <#raid_devs>: The number of devices composing the array.
Each device consists of two entries. The first is the device Each device consists of two entries. The first is the device
containing the metadata (if any); the second is the one containing the containing the metadata (if any); the second is the one containing the
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
#include "md.h" #include "md.h"
#include "raid1.h" #include "raid1.h"
#include "raid5.h" #include "raid5.h"
#include "raid10.h"
#include "bitmap.h" #include "bitmap.h"
#include <linux/device-mapper.h> #include <linux/device-mapper.h>
...@@ -52,7 +53,10 @@ struct raid_dev { ...@@ -52,7 +53,10 @@ struct raid_dev {
#define DMPF_MAX_RECOVERY_RATE 0x20 #define DMPF_MAX_RECOVERY_RATE 0x20
#define DMPF_MAX_WRITE_BEHIND 0x40 #define DMPF_MAX_WRITE_BEHIND 0x40
#define DMPF_STRIPE_CACHE 0x80 #define DMPF_STRIPE_CACHE 0x80
#define DMPF_REGION_SIZE 0X100 #define DMPF_REGION_SIZE 0x100
#define DMPF_RAID10_COPIES 0x200
#define DMPF_RAID10_FORMAT 0x400
struct raid_set { struct raid_set {
struct dm_target *ti; struct dm_target *ti;
...@@ -76,6 +80,7 @@ static struct raid_type { ...@@ -76,6 +80,7 @@ static struct raid_type {
const unsigned algorithm; /* RAID algorithm. */ const unsigned algorithm; /* RAID algorithm. */
} raid_types[] = { } raid_types[] = {
{"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */},
{"raid10", "RAID10 (striped mirrors)", 0, 2, 10, UINT_MAX /* Varies */},
{"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0},
{"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC},
{"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC},
...@@ -86,6 +91,17 @@ static struct raid_type { ...@@ -86,6 +91,17 @@ static struct raid_type {
{"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE}
}; };
static unsigned raid10_md_layout_to_copies(int layout)
{
return layout & 0xFF;
}
static int raid10_format_to_md_layout(char *format, unsigned copies)
{
/* 1 "far" copy, and 'copies' "near" copies */
return (1 << 8) | (copies & 0xFF);
}
static struct raid_type *get_raid_type(char *name) static struct raid_type *get_raid_type(char *name)
{ {
int i; int i;
...@@ -339,10 +355,16 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) ...@@ -339,10 +355,16 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
* [max_write_behind <sectors>] See '-write-behind=' (man mdadm) * [max_write_behind <sectors>] See '-write-behind=' (man mdadm)
* [stripe_cache <sectors>] Stripe cache size for higher RAIDs * [stripe_cache <sectors>] Stripe cache size for higher RAIDs
* [region_size <sectors>] Defines granularity of bitmap * [region_size <sectors>] Defines granularity of bitmap
*
* RAID10-only options:
* [raid10_copies <# copies>] Number of copies. (Default: 2)
* [raid10_format <near>] Layout algorithm. (Default: near)
*/ */
static int parse_raid_params(struct raid_set *rs, char **argv, static int parse_raid_params(struct raid_set *rs, char **argv,
unsigned num_raid_params) unsigned num_raid_params)
{ {
char *raid10_format = "near";
unsigned raid10_copies = 2;
unsigned i, rebuild_cnt = 0; unsigned i, rebuild_cnt = 0;
unsigned long value, region_size = 0; unsigned long value, region_size = 0;
sector_t sectors_per_dev = rs->ti->len; sector_t sectors_per_dev = rs->ti->len;
...@@ -416,11 +438,28 @@ static int parse_raid_params(struct raid_set *rs, char **argv, ...@@ -416,11 +438,28 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
} }
key = argv[i++]; key = argv[i++];
/* Parameters that take a string value are checked here. */
if (!strcasecmp(key, "raid10_format")) {
if (rs->raid_type->level != 10) {
rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type";
return -EINVAL;
}
if (strcmp("near", argv[i])) {
rs->ti->error = "Invalid 'raid10_format' value given";
return -EINVAL;
}
raid10_format = argv[i];
rs->print_flags |= DMPF_RAID10_FORMAT;
continue;
}
if (strict_strtoul(argv[i], 10, &value) < 0) { if (strict_strtoul(argv[i], 10, &value) < 0) {
rs->ti->error = "Bad numerical argument given in raid params"; rs->ti->error = "Bad numerical argument given in raid params";
return -EINVAL; return -EINVAL;
} }
/* Parameters that take a numeric value are checked here */
if (!strcasecmp(key, "rebuild")) { if (!strcasecmp(key, "rebuild")) {
rebuild_cnt++; rebuild_cnt++;
...@@ -439,6 +478,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv, ...@@ -439,6 +478,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
return -EINVAL; return -EINVAL;
} }
break; break;
case 10:
default: default:
DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name);
rs->ti->error = "Rebuild not supported for this RAID type"; rs->ti->error = "Rebuild not supported for this RAID type";
...@@ -495,7 +535,8 @@ static int parse_raid_params(struct raid_set *rs, char **argv, ...@@ -495,7 +535,8 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
*/ */
value /= 2; value /= 2;
if (rs->raid_type->level < 5) { if ((rs->raid_type->level != 5) &&
(rs->raid_type->level != 6)) {
rs->ti->error = "Inappropriate argument: stripe_cache"; rs->ti->error = "Inappropriate argument: stripe_cache";
return -EINVAL; return -EINVAL;
} }
...@@ -520,6 +561,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv, ...@@ -520,6 +561,14 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
} else if (!strcasecmp(key, "region_size")) { } else if (!strcasecmp(key, "region_size")) {
rs->print_flags |= DMPF_REGION_SIZE; rs->print_flags |= DMPF_REGION_SIZE;
region_size = value; region_size = value;
} else if (!strcasecmp(key, "raid10_copies") &&
(rs->raid_type->level == 10)) {
if ((value < 2) || (value > 0xFF)) {
rs->ti->error = "Bad value for 'raid10_copies'";
return -EINVAL;
}
rs->print_flags |= DMPF_RAID10_COPIES;
raid10_copies = value;
} else { } else {
DMERR("Unable to parse RAID parameter: %s", key); DMERR("Unable to parse RAID parameter: %s", key);
rs->ti->error = "Unable to parse RAID parameters"; rs->ti->error = "Unable to parse RAID parameters";
...@@ -538,8 +587,22 @@ static int parse_raid_params(struct raid_set *rs, char **argv, ...@@ -538,8 +587,22 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
if (dm_set_target_max_io_len(rs->ti, max_io_len)) if (dm_set_target_max_io_len(rs->ti, max_io_len))
return -EINVAL; return -EINVAL;
if ((rs->raid_type->level > 1) && if (rs->raid_type->level == 10) {
sector_div(sectors_per_dev, (rs->md.raid_disks - rs->raid_type->parity_devs))) { if (raid10_copies > rs->md.raid_disks) {
rs->ti->error = "Not enough devices to satisfy specification";
return -EINVAL;
}
/* (Len * #mirrors) / #devices */
sectors_per_dev = rs->ti->len * raid10_copies;
sector_div(sectors_per_dev, rs->md.raid_disks);
rs->md.layout = raid10_format_to_md_layout(raid10_format,
raid10_copies);
rs->md.new_layout = rs->md.layout;
} else if ((rs->raid_type->level > 1) &&
sector_div(sectors_per_dev,
(rs->md.raid_disks - rs->raid_type->parity_devs))) {
rs->ti->error = "Target length not divisible by number of data devices"; rs->ti->error = "Target length not divisible by number of data devices";
return -EINVAL; return -EINVAL;
} }
...@@ -566,6 +629,9 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits) ...@@ -566,6 +629,9 @@ static int raid_is_congested(struct dm_target_callbacks *cb, int bits)
if (rs->raid_type->level == 1) if (rs->raid_type->level == 1)
return md_raid1_congested(&rs->md, bits); return md_raid1_congested(&rs->md, bits);
if (rs->raid_type->level == 10)
return md_raid10_congested(&rs->md, bits);
return md_raid5_congested(&rs->md, bits); return md_raid5_congested(&rs->md, bits);
} }
...@@ -884,6 +950,9 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) ...@@ -884,6 +950,9 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
case 6: case 6:
redundancy = rs->raid_type->parity_devs; redundancy = rs->raid_type->parity_devs;
break; break;
case 10:
redundancy = raid10_md_layout_to_copies(mddev->layout) - 1;
break;
default: default:
ti->error = "Unknown RAID type"; ti->error = "Unknown RAID type";
return -EINVAL; return -EINVAL;
...@@ -1049,12 +1118,19 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) ...@@ -1049,12 +1118,19 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad; goto bad;
} }
if (ti->len != rs->md.array_sectors) {
ti->error = "Array size does not match requested target length";
ret = -EINVAL;
goto size_mismatch;
}
rs->callbacks.congested_fn = raid_is_congested; rs->callbacks.congested_fn = raid_is_congested;
dm_table_add_target_callbacks(ti->table, &rs->callbacks); dm_table_add_target_callbacks(ti->table, &rs->callbacks);
mddev_suspend(&rs->md); mddev_suspend(&rs->md);
return 0; return 0;
size_mismatch:
md_stop(&rs->md);
bad: bad:
context_free(rs); context_free(rs);
...@@ -1203,6 +1279,13 @@ static int raid_status(struct dm_target *ti, status_type_t type, ...@@ -1203,6 +1279,13 @@ static int raid_status(struct dm_target *ti, status_type_t type,
DMEMIT(" region_size %lu", DMEMIT(" region_size %lu",
rs->md.bitmap_info.chunksize >> 9); rs->md.bitmap_info.chunksize >> 9);
if (rs->print_flags & DMPF_RAID10_COPIES)
DMEMIT(" raid10_copies %u",
raid10_md_layout_to_copies(rs->md.layout));
if (rs->print_flags & DMPF_RAID10_FORMAT)
DMEMIT(" raid10_format near");
DMEMIT(" %d", rs->md.raid_disks); DMEMIT(" %d", rs->md.raid_disks);
for (i = 0; i < rs->md.raid_disks; i++) { for (i = 0; i < rs->md.raid_disks; i++) {
if (rs->dev[i].meta_dev) if (rs->dev[i].meta_dev)
...@@ -1277,7 +1360,7 @@ static void raid_resume(struct dm_target *ti) ...@@ -1277,7 +1360,7 @@ static void raid_resume(struct dm_target *ti)
static struct target_type raid_target = { static struct target_type raid_target = {
.name = "raid", .name = "raid",
.version = {1, 2, 0}, .version = {1, 3, 0},
.module = THIS_MODULE, .module = THIS_MODULE,
.ctr = raid_ctr, .ctr = raid_ctr,
.dtr = raid_dtr, .dtr = raid_dtr,
...@@ -1304,6 +1387,8 @@ module_init(dm_raid_init); ...@@ -1304,6 +1387,8 @@ module_init(dm_raid_init);
module_exit(dm_raid_exit); module_exit(dm_raid_exit);
MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target"); MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target");
MODULE_ALIAS("dm-raid1");
MODULE_ALIAS("dm-raid10");
MODULE_ALIAS("dm-raid4"); MODULE_ALIAS("dm-raid4");
MODULE_ALIAS("dm-raid5"); MODULE_ALIAS("dm-raid5");
MODULE_ALIAS("dm-raid6"); MODULE_ALIAS("dm-raid6");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment