Commit 1797a796 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] md: Allow partitioning of MD devices.

From: NeilBrown <neilb@cse.unsw.edu.au>

With this patch, md used two major numbers for arrays.

One Major is number 9 with name 'md' have unpartitioned md arrays, one per
minor number.

The other Major is allocated dynamically with name 'mdp' and had on array for
every 64 minors, allowing for upto 63 partitions.

The arrays under one major are completely separate from the arrays under the
other.

The preferred name for devices with the new major are of the form:

  /dev/md/d1p3  # partion 3 of device 1 - minor 67

When a paritioned md device is assembled, the partitions are not recognised
until after the whole-array device is opened again.  A future version of
mdadm will perform this open so that the need will be transparent.
parent 5077fef0
...@@ -52,6 +52,9 @@ ...@@ -52,6 +52,9 @@
#define MAJOR_NR MD_MAJOR #define MAJOR_NR MD_MAJOR
#define MD_DRIVER #define MD_DRIVER
/* 63 partitions with the alternate major number (mdp) */
#define MdpMinorShift 6
#define DEBUG 0 #define DEBUG 0
#define dprintk(x...) ((void)(DEBUG && printk(x))) #define dprintk(x...) ((void)(DEBUG && printk(x)))
...@@ -178,14 +181,14 @@ static void mddev_put(mddev_t *mddev) ...@@ -178,14 +181,14 @@ static void mddev_put(mddev_t *mddev)
spin_unlock(&all_mddevs_lock); spin_unlock(&all_mddevs_lock);
} }
static mddev_t * mddev_find(int unit) static mddev_t * mddev_find(dev_t unit)
{ {
mddev_t *mddev, *new = NULL; mddev_t *mddev, *new = NULL;
retry: retry:
spin_lock(&all_mddevs_lock); spin_lock(&all_mddevs_lock);
list_for_each_entry(mddev, &all_mddevs, all_mddevs) list_for_each_entry(mddev, &all_mddevs, all_mddevs)
if (mdidx(mddev) == unit) { if (mddev->unit == unit) {
mddev_get(mddev); mddev_get(mddev);
spin_unlock(&all_mddevs_lock); spin_unlock(&all_mddevs_lock);
if (new) if (new)
...@@ -206,7 +209,12 @@ static mddev_t * mddev_find(int unit) ...@@ -206,7 +209,12 @@ static mddev_t * mddev_find(int unit)
memset(new, 0, sizeof(*new)); memset(new, 0, sizeof(*new));
new->__minor = unit; new->unit = unit;
if (MAJOR(unit) == MD_MAJOR)
new->md_minor = MINOR(unit);
else
new->md_minor = MINOR(unit) >> MdpMinorShift;
init_MUTEX(&new->reconfig_sem); init_MUTEX(&new->reconfig_sem);
INIT_LIST_HEAD(&new->disks); INIT_LIST_HEAD(&new->disks);
INIT_LIST_HEAD(&new->all_mddevs); INIT_LIST_HEAD(&new->all_mddevs);
...@@ -660,7 +668,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) ...@@ -660,7 +668,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
sb->level = mddev->level; sb->level = mddev->level;
sb->size = mddev->size; sb->size = mddev->size;
sb->raid_disks = mddev->raid_disks; sb->raid_disks = mddev->raid_disks;
sb->md_minor = mddev->__minor; sb->md_minor = mddev->md_minor;
sb->not_persistent = !mddev->persistent; sb->not_persistent = !mddev->persistent;
sb->utime = mddev->utime; sb->utime = mddev->utime;
sb->state = 0; sb->state = 0;
...@@ -1442,13 +1450,16 @@ static int analyze_sbs(mddev_t * mddev) ...@@ -1442,13 +1450,16 @@ static int analyze_sbs(mddev_t * mddev)
return 1; return 1;
} }
static int mdp_major = 0;
static struct kobject *md_probe(dev_t dev, int *part, void *data) static struct kobject *md_probe(dev_t dev, int *part, void *data)
{ {
static DECLARE_MUTEX(disks_sem); static DECLARE_MUTEX(disks_sem);
int unit = *part; mddev_t *mddev = mddev_find(dev);
mddev_t *mddev = mddev_find(unit);
struct gendisk *disk; struct gendisk *disk;
int partitioned = (MAJOR(dev) != MD_MAJOR);
int shift = partitioned ? MdpMinorShift : 0;
int unit = MINOR(dev) >> shift;
if (!mddev) if (!mddev)
return NULL; return NULL;
...@@ -1459,15 +1470,18 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) ...@@ -1459,15 +1470,18 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
mddev_put(mddev); mddev_put(mddev);
return NULL; return NULL;
} }
disk = alloc_disk(1); disk = alloc_disk(1 << shift);
if (!disk) { if (!disk) {
up(&disks_sem); up(&disks_sem);
mddev_put(mddev); mddev_put(mddev);
return NULL; return NULL;
} }
disk->major = MD_MAJOR; disk->major = MAJOR(dev);
disk->first_minor = mdidx(mddev); disk->first_minor = unit << shift;
sprintf(disk->disk_name, "md%d", mdidx(mddev)); if (partitioned)
sprintf(disk->disk_name, "md_d%d", unit);
else
sprintf(disk->disk_name, "md%d", unit);
disk->fops = &md_fops; disk->fops = &md_fops;
disk->private_data = mddev; disk->private_data = mddev;
disk->queue = mddev->queue; disk->queue = mddev->queue;
...@@ -1496,7 +1510,6 @@ static int do_md_run(mddev_t * mddev) ...@@ -1496,7 +1510,6 @@ static int do_md_run(mddev_t * mddev)
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct gendisk *disk; struct gendisk *disk;
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
int unit;
if (list_empty(&mddev->disks)) { if (list_empty(&mddev->disks)) {
MD_BUG(); MD_BUG();
...@@ -1588,8 +1601,7 @@ static int do_md_run(mddev_t * mddev) ...@@ -1588,8 +1601,7 @@ static int do_md_run(mddev_t * mddev)
invalidate_bdev(rdev->bdev, 0); invalidate_bdev(rdev->bdev, 0);
} }
unit = mdidx(mddev); md_probe(mddev->unit, NULL, NULL);
md_probe(0, &unit, NULL);
disk = mddev->gendisk; disk = mddev->gendisk;
if (!disk) if (!disk)
return -ENOMEM; return -ENOMEM;
...@@ -1636,6 +1648,7 @@ static int do_md_run(mddev_t * mddev) ...@@ -1636,6 +1648,7 @@ static int do_md_run(mddev_t * mddev)
mddev->queue->queuedata = mddev; mddev->queue->queuedata = mddev;
mddev->queue->make_request_fn = mddev->pers->make_request; mddev->queue->make_request_fn = mddev->pers->make_request;
mddev->changed = 1;
return 0; return 0;
} }
...@@ -1735,6 +1748,7 @@ static int do_md_stop(mddev_t * mddev, int ro) ...@@ -1735,6 +1748,7 @@ static int do_md_stop(mddev_t * mddev, int ro)
disk = mddev->gendisk; disk = mddev->gendisk;
if (disk) if (disk)
set_capacity(disk, 0); set_capacity(disk, 0);
mddev->changed = 1;
} else } else
printk(KERN_INFO "md: %s switched to read-only mode.\n", printk(KERN_INFO "md: %s switched to read-only mode.\n",
mdname(mddev)); mdname(mddev));
...@@ -1791,6 +1805,7 @@ static void autorun_devices(void) ...@@ -1791,6 +1805,7 @@ static void autorun_devices(void)
printk(KERN_INFO "md: autorun ...\n"); printk(KERN_INFO "md: autorun ...\n");
while (!list_empty(&pending_raid_disks)) { while (!list_empty(&pending_raid_disks)) {
dev_t dev;
rdev0 = list_entry(pending_raid_disks.next, rdev0 = list_entry(pending_raid_disks.next,
mdk_rdev_t, same_set); mdk_rdev_t, same_set);
...@@ -1808,8 +1823,14 @@ static void autorun_devices(void) ...@@ -1808,8 +1823,14 @@ static void autorun_devices(void)
* mostly sane superblocks. It's time to allocate the * mostly sane superblocks. It's time to allocate the
* mddev. * mddev.
*/ */
if (rdev0->preferred_minor < 0 || rdev0->preferred_minor >= MAX_MD_DEVS) {
mddev = mddev_find(rdev0->preferred_minor); printk(KERN_INFO "md: unit number in %s is bad: %d\n",
bdevname(rdev0->bdev, b), rdev0->preferred_minor);
break;
}
dev = MKDEV(MD_MAJOR, rdev0->preferred_minor);
md_probe(dev, NULL, NULL);
mddev = mddev_find(dev);
if (!mddev) { if (!mddev) {
printk(KERN_ERR printk(KERN_ERR
"md: cannot allocate memory for md drive.\n"); "md: cannot allocate memory for md drive.\n");
...@@ -1824,7 +1845,7 @@ static void autorun_devices(void) ...@@ -1824,7 +1845,7 @@ static void autorun_devices(void)
"md: %s already running, cannot run %s\n", "md: %s already running, cannot run %s\n",
mdname(mddev), bdevname(rdev0->bdev,b)); mdname(mddev), bdevname(rdev0->bdev,b));
mddev_unlock(mddev); mddev_unlock(mddev);
} else if (rdev0->preferred_minor >= 0 && rdev0->preferred_minor < MAX_MD_DEVS) { } else {
printk(KERN_INFO "md: created %s\n", mdname(mddev)); printk(KERN_INFO "md: created %s\n", mdname(mddev));
ITERATE_RDEV_GENERIC(candidates,rdev,tmp) { ITERATE_RDEV_GENERIC(candidates,rdev,tmp) {
list_del_init(&rdev->same_set); list_del_init(&rdev->same_set);
...@@ -1833,9 +1854,7 @@ static void autorun_devices(void) ...@@ -1833,9 +1854,7 @@ static void autorun_devices(void)
} }
autorun_array(mddev); autorun_array(mddev);
mddev_unlock(mddev); mddev_unlock(mddev);
} else }
printk(KERN_WARNING "md: %s had invalid preferred minor %d\n",
bdevname(rdev->bdev, b), rdev0->preferred_minor);
/* on success, candidates will be empty, on error /* on success, candidates will be empty, on error
* it won't... * it won't...
*/ */
...@@ -1955,7 +1974,7 @@ static int get_array_info(mddev_t * mddev, void * arg) ...@@ -1955,7 +1974,7 @@ static int get_array_info(mddev_t * mddev, void * arg)
info.size = mddev->size; info.size = mddev->size;
info.nr_disks = nr; info.nr_disks = nr;
info.raid_disks = mddev->raid_disks; info.raid_disks = mddev->raid_disks;
info.md_minor = mddev->__minor; info.md_minor = mddev->md_minor;
info.not_persistent= !mddev->persistent; info.not_persistent= !mddev->persistent;
info.utime = mddev->utime; info.utime = mddev->utime;
...@@ -2326,7 +2345,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) ...@@ -2326,7 +2345,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
mddev->level = info->level; mddev->level = info->level;
mddev->size = info->size; mddev->size = info->size;
mddev->raid_disks = info->raid_disks; mddev->raid_disks = info->raid_disks;
/* don't set __minor, it is determined by which /dev/md* was /* don't set md_minor, it is determined by which /dev/md* was
* openned * openned
*/ */
if (info->state & (1<<MD_SB_CLEAN)) if (info->state & (1<<MD_SB_CLEAN))
...@@ -2366,7 +2385,6 @@ static int md_ioctl(struct inode *inode, struct file *file, ...@@ -2366,7 +2385,6 @@ static int md_ioctl(struct inode *inode, struct file *file,
unsigned int cmd, unsigned long arg) unsigned int cmd, unsigned long arg)
{ {
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
unsigned int minor = iminor(inode);
int err = 0; int err = 0;
struct hd_geometry *loc = (struct hd_geometry *) arg; struct hd_geometry *loc = (struct hd_geometry *) arg;
mddev_t *mddev = NULL; mddev_t *mddev = NULL;
...@@ -2374,11 +2392,6 @@ static int md_ioctl(struct inode *inode, struct file *file, ...@@ -2374,11 +2392,6 @@ static int md_ioctl(struct inode *inode, struct file *file,
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EACCES; return -EACCES;
if (minor >= MAX_MD_DEVS) {
MD_BUG();
return -EINVAL;
}
/* /*
* Commands dealing with the RAID driver but not any * Commands dealing with the RAID driver but not any
* particular array: * particular array:
...@@ -2620,6 +2633,7 @@ static int md_open(struct inode *inode, struct file *file) ...@@ -2620,6 +2633,7 @@ static int md_open(struct inode *inode, struct file *file)
mddev_get(mddev); mddev_get(mddev);
mddev_unlock(mddev); mddev_unlock(mddev);
check_disk_change(inode->i_bdev);
out: out:
return err; return err;
} }
...@@ -2635,12 +2649,28 @@ static int md_release(struct inode *inode, struct file * file) ...@@ -2635,12 +2649,28 @@ static int md_release(struct inode *inode, struct file * file)
return 0; return 0;
} }
static int md_media_changed(struct gendisk *disk)
{
mddev_t *mddev = disk->private_data;
return mddev->changed;
}
static int md_revalidate(struct gendisk *disk)
{
mddev_t *mddev = disk->private_data;
mddev->changed = 0;
return 0;
}
static struct block_device_operations md_fops = static struct block_device_operations md_fops =
{ {
.owner = THIS_MODULE, .owner = THIS_MODULE,
.open = md_open, .open = md_open,
.release = md_release, .release = md_release,
.ioctl = md_ioctl, .ioctl = md_ioctl,
.media_changed = md_media_changed,
.revalidate_disk= md_revalidate,
}; };
int md_thread(void * arg) int md_thread(void * arg)
...@@ -3505,16 +3535,26 @@ int __init md_init(void) ...@@ -3505,16 +3535,26 @@ int __init md_init(void)
if (register_blkdev(MAJOR_NR, "md")) if (register_blkdev(MAJOR_NR, "md"))
return -1; return -1;
if ((mdp_major=register_blkdev(0, "mdp"))<=0) {
unregister_blkdev(MAJOR_NR, "md");
return -1;
}
devfs_mk_dir("md"); devfs_mk_dir("md");
blk_register_region(MKDEV(MAJOR_NR, 0), MAX_MD_DEVS, THIS_MODULE, blk_register_region(MKDEV(MAJOR_NR, 0), MAX_MD_DEVS, THIS_MODULE,
md_probe, NULL, NULL); md_probe, NULL, NULL);
blk_register_region(MKDEV(mdp_major, 0), MAX_MD_DEVS<<MdpMinorShift, THIS_MODULE,
md_probe, NULL, NULL);
for (minor=0; minor < MAX_MD_DEVS; ++minor) { for (minor=0; minor < MAX_MD_DEVS; ++minor)
devfs_mk_bdev(MKDEV(MAJOR_NR, minor), devfs_mk_bdev(MKDEV(MAJOR_NR, minor),
S_IFBLK|S_IRUSR|S_IWUSR, S_IFBLK|S_IRUSR|S_IWUSR,
"md/%d", minor); "md/%d", minor);
}
for (minor=0; minor < MAX_MD_DEVS; ++minor)
devfs_mk_bdev(MKDEV(mdp_major, minor<<MdpMinorShift),
S_IFBLK|S_IRUSR|S_IWUSR,
"md/d%d", minor);
register_reboot_notifier(&md_notifier); register_reboot_notifier(&md_notifier);
raid_table_header = register_sysctl_table(raid_root_table, 1); raid_table_header = register_sysctl_table(raid_root_table, 1);
...@@ -3576,11 +3616,16 @@ static __exit void md_exit(void) ...@@ -3576,11 +3616,16 @@ static __exit void md_exit(void)
struct list_head *tmp; struct list_head *tmp;
int i; int i;
blk_unregister_region(MKDEV(MAJOR_NR,0), MAX_MD_DEVS); blk_unregister_region(MKDEV(MAJOR_NR,0), MAX_MD_DEVS);
blk_unregister_region(MKDEV(mdp_major,0), MAX_MD_DEVS << MdpMinorShift);
for (i=0; i < MAX_MD_DEVS; i++) for (i=0; i < MAX_MD_DEVS; i++)
devfs_remove("md/%d", i); devfs_remove("md/%d", i);
for (i=0; i < MAX_MD_DEVS; i++)
devfs_remove("md/d%d", i);
devfs_remove("md"); devfs_remove("md");
unregister_blkdev(MAJOR_NR,"md"); unregister_blkdev(MAJOR_NR,"md");
unregister_blkdev(mdp_major, "mdp");
unregister_reboot_notifier(&md_notifier); unregister_reboot_notifier(&md_notifier);
unregister_sysctl_table(raid_table_header); unregister_sysctl_table(raid_table_header);
remove_proc_entry("mdstat", NULL); remove_proc_entry("mdstat", NULL);
......
...@@ -284,7 +284,7 @@ static int grow_stripes(raid5_conf_t *conf, int num) ...@@ -284,7 +284,7 @@ static int grow_stripes(raid5_conf_t *conf, int num)
kmem_cache_t *sc; kmem_cache_t *sc;
int devs = conf->raid_disks; int devs = conf->raid_disks;
sprintf(conf->cache_name, "md/raid5-%d", conf->mddev->__minor); sprintf(conf->cache_name, "raid5/%s", mdname(conf->mddev));
sc = kmem_cache_create(conf->cache_name, sc = kmem_cache_create(conf->cache_name,
sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
......
...@@ -303,7 +303,7 @@ static int grow_stripes(raid6_conf_t *conf, int num) ...@@ -303,7 +303,7 @@ static int grow_stripes(raid6_conf_t *conf, int num)
kmem_cache_t *sc; kmem_cache_t *sc;
int devs = conf->raid_disks; int devs = conf->raid_disks;
sprintf(conf->cache_name, "md/raid6-%d", conf->mddev->__minor); sprintf(conf->cache_name, "raid6/%s", mdname(conf->mddev));
sc = kmem_cache_create(conf->cache_name, sc = kmem_cache_create(conf->cache_name,
sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
......
...@@ -186,7 +186,8 @@ struct mddev_s ...@@ -186,7 +186,8 @@ struct mddev_s
{ {
void *private; void *private;
mdk_personality_t *pers; mdk_personality_t *pers;
int __minor; dev_t unit;
int md_minor;
struct list_head disks; struct list_head disks;
int sb_dirty; int sb_dirty;
int ro; int ro;
...@@ -235,6 +236,7 @@ struct mddev_s ...@@ -235,6 +236,7 @@ struct mddev_s
struct semaphore reconfig_sem; struct semaphore reconfig_sem;
atomic_t active; atomic_t active;
int changed; /* true if we might need to reread partition info */
int degraded; /* whether md should consider int degraded; /* whether md should consider
* adding a spare * adding a spare
*/ */
...@@ -272,15 +274,6 @@ struct mdk_personality_s ...@@ -272,15 +274,6 @@ struct mdk_personality_s
}; };
/*
* Currently we index md_array directly, based on the minor
* number. This will have to change to dynamic allocation
* once we start supporting partitioning of md devices.
*/
static inline int mdidx (mddev_t * mddev)
{
return mddev->__minor;
}
static inline char * mdname (mddev_t * mddev) static inline char * mdname (mddev_t * mddev)
{ {
return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment