Commit b522adcd authored by Dan Williams's avatar Dan Williams

md: 'array_size' sysfs attribute

Allow userspace to set the size of the array according to the following
semantics:

1/ size must be <= to the size returned by mddev->pers->size(mddev, 0, 0)
   a) If size is set before the array is running, do_md_run will fail
      if size is greater than the default size
   b) A reshape attempt that reduces the default size to less than the set
      array size should be blocked
2/ once userspace sets the size the kernel will not change it
3/ writing 'default' to this attribute returns control of the size to the
   kernel and reverts to the size reported by the personality

Also, convert locations that need to know the default size from directly
reading ->array_sectors to <pers>_size.  Resync/reshape operations
always follow the default size.

Finally, fixup other locations that read a number of 1k-blocks from
userspace to use strict_blocks_to_sectors() which checks for unsigned
long long to sector_t overflow and blocks to sectors overflow.
Reviewed-by: default avatarAndre Noll <maan@systemlinux.org>
Signed-off-by: default avatarDan Williams <dan.j.williams@intel.com>
parent 1f403624
...@@ -387,6 +387,11 @@ static inline int mddev_lock(mddev_t * mddev) ...@@ -387,6 +387,11 @@ static inline int mddev_lock(mddev_t * mddev)
return mutex_lock_interruptible(&mddev->reconfig_mutex); return mutex_lock_interruptible(&mddev->reconfig_mutex);
} }
static inline int mddev_is_locked(mddev_t *mddev)
{
return mutex_is_locked(&mddev->reconfig_mutex);
}
static inline int mddev_trylock(mddev_t * mddev) static inline int mddev_trylock(mddev_t * mddev)
{ {
return mutex_trylock(&mddev->reconfig_mutex); return mutex_trylock(&mddev->reconfig_mutex);
...@@ -2282,16 +2287,34 @@ static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2) ...@@ -2282,16 +2287,34 @@ static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2)
return 1; return 1;
} }
static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
{
unsigned long long blocks;
sector_t new;
if (strict_strtoull(buf, 10, &blocks) < 0)
return -EINVAL;
if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
return -EINVAL; /* sector conversion overflow */
new = blocks * 2;
if (new != blocks * 2)
return -EINVAL; /* unsigned long long to sector_t overflow */
*sectors = new;
return 0;
}
static ssize_t static ssize_t
rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
{ {
mddev_t *my_mddev = rdev->mddev; mddev_t *my_mddev = rdev->mddev;
sector_t oldsectors = rdev->sectors; sector_t oldsectors = rdev->sectors;
unsigned long long sectors; sector_t sectors;
if (strict_strtoull(buf, 10, &sectors) < 0) if (strict_blocks_to_sectors(buf, &sectors) < 0)
return -EINVAL; return -EINVAL;
sectors *= 2;
if (my_mddev->pers && rdev->raid_disk >= 0) { if (my_mddev->pers && rdev->raid_disk >= 0) {
if (my_mddev->persistent) { if (my_mddev->persistent) {
sectors = super_types[my_mddev->major_version]. sectors = super_types[my_mddev->major_version].
...@@ -3182,12 +3205,11 @@ size_store(mddev_t *mddev, const char *buf, size_t len) ...@@ -3182,12 +3205,11 @@ size_store(mddev_t *mddev, const char *buf, size_t len)
* not increase it (except from 0). * not increase it (except from 0).
* If array is active, we can try an on-line resize * If array is active, we can try an on-line resize
*/ */
unsigned long long sectors; sector_t sectors;
int err = strict_strtoull(buf, 10, &sectors); int err = strict_blocks_to_sectors(buf, &sectors);
if (err < 0) if (err < 0)
return err; return err;
sectors *= 2;
if (mddev->pers) { if (mddev->pers) {
err = update_size(mddev, sectors); err = update_size(mddev, sectors);
md_update_sb(mddev, 1); md_update_sb(mddev, 1);
...@@ -3627,6 +3649,57 @@ static struct md_sysfs_entry md_reshape_position = ...@@ -3627,6 +3649,57 @@ static struct md_sysfs_entry md_reshape_position =
__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show, __ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
reshape_position_store); reshape_position_store);
static ssize_t
array_size_show(mddev_t *mddev, char *page)
{
if (mddev->external_size)
return sprintf(page, "%llu\n",
(unsigned long long)mddev->array_sectors/2);
else
return sprintf(page, "default\n");
}
static ssize_t
array_size_store(mddev_t *mddev, const char *buf, size_t len)
{
sector_t sectors;
if (strncmp(buf, "default", 7) == 0) {
if (mddev->pers)
sectors = mddev->pers->size(mddev, 0, 0);
else
sectors = mddev->array_sectors;
mddev->external_size = 0;
} else {
if (strict_blocks_to_sectors(buf, &sectors) < 0)
return -EINVAL;
if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
return -EINVAL;
mddev->external_size = 1;
}
mddev->array_sectors = sectors;
set_capacity(mddev->gendisk, mddev->array_sectors);
if (mddev->pers) {
struct block_device *bdev = bdget_disk(mddev->gendisk, 0);
if (bdev) {
mutex_lock(&bdev->bd_inode->i_mutex);
i_size_write(bdev->bd_inode,
(loff_t)mddev->array_sectors << 9);
mutex_unlock(&bdev->bd_inode->i_mutex);
bdput(bdev);
}
}
return len;
}
static struct md_sysfs_entry md_array_size =
__ATTR(array_size, S_IRUGO|S_IWUSR, array_size_show,
array_size_store);
static struct attribute *md_default_attrs[] = { static struct attribute *md_default_attrs[] = {
&md_level.attr, &md_level.attr,
...@@ -3640,6 +3713,7 @@ static struct attribute *md_default_attrs[] = { ...@@ -3640,6 +3713,7 @@ static struct attribute *md_default_attrs[] = {
&md_safe_delay.attr, &md_safe_delay.attr,
&md_array_state.attr, &md_array_state.attr,
&md_reshape_position.attr, &md_reshape_position.attr,
&md_array_size.attr,
NULL, NULL,
}; };
...@@ -4045,7 +4119,17 @@ static int do_md_run(mddev_t * mddev) ...@@ -4045,7 +4119,17 @@ static int do_md_run(mddev_t * mddev)
err = mddev->pers->run(mddev); err = mddev->pers->run(mddev);
if (err) if (err)
printk(KERN_ERR "md: pers->run() failed ...\n"); printk(KERN_ERR "md: pers->run() failed ...\n");
else if (mddev->pers->sync_request) { else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) {
WARN_ONCE(!mddev->external_size, "%s: default size too small,"
" but 'external_size' not in effect?\n", __func__);
printk(KERN_ERR
"md: invalid array_size %llu > default size %llu\n",
(unsigned long long)mddev->array_sectors / 2,
(unsigned long long)mddev->pers->size(mddev, 0, 0) / 2);
err = -EINVAL;
mddev->pers->stop(mddev);
}
if (err == 0 && mddev->pers->sync_request) {
err = bitmap_create(mddev); err = bitmap_create(mddev);
if (err) { if (err) {
printk(KERN_ERR "%s: failed to create bitmap (%d)\n", printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
...@@ -4281,6 +4365,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) ...@@ -4281,6 +4365,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
export_array(mddev); export_array(mddev);
mddev->array_sectors = 0; mddev->array_sectors = 0;
mddev->external_size = 0;
mddev->dev_sectors = 0; mddev->dev_sectors = 0;
mddev->raid_disks = 0; mddev->raid_disks = 0;
mddev->recovery_cp = 0; mddev->recovery_cp = 0;
...@@ -4979,10 +5064,23 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) ...@@ -4979,10 +5064,23 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors) void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors)
{ {
WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
if (mddev->external_size)
return;
mddev->array_sectors = array_sectors; mddev->array_sectors = array_sectors;
} }
EXPORT_SYMBOL(md_set_array_sectors); EXPORT_SYMBOL(md_set_array_sectors);
void md_set_array_sectors_lock(mddev_t *mddev, sector_t array_sectors)
{
mddev_lock(mddev);
md_set_array_sectors(mddev, array_sectors);
mddev_unlock(mddev);
}
EXPORT_SYMBOL(md_set_array_sectors_lock);
static int update_size(mddev_t *mddev, sector_t num_sectors) static int update_size(mddev_t *mddev, sector_t num_sectors)
{ {
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
......
...@@ -147,6 +147,8 @@ struct mddev_s ...@@ -147,6 +147,8 @@ struct mddev_s
sector_t dev_sectors; /* used size of sector_t dev_sectors; /* used size of
* component devices */ * component devices */
sector_t array_sectors; /* exported array size */ sector_t array_sectors; /* exported array size */
int external_size; /* size managed
* externally */
__u64 events; __u64 events;
char uuid[16]; char uuid[16];
...@@ -431,3 +433,4 @@ extern void md_new_event(mddev_t *mddev); ...@@ -431,3 +433,4 @@ extern void md_new_event(mddev_t *mddev);
extern int md_allow_write(mddev_t *mddev); extern int md_allow_write(mddev_t *mddev);
extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors);
extern void md_set_array_sectors_lock(mddev_t *mddev, sector_t array_sectors);
...@@ -313,7 +313,7 @@ static int raid0_run (mddev_t *mddev) ...@@ -313,7 +313,7 @@ static int raid0_run (mddev_t *mddev)
printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n", printk(KERN_INFO "raid0 : conf->spacing is %llu sectors.\n",
(unsigned long long)conf->spacing); (unsigned long long)conf->spacing);
{ {
sector_t s = mddev->array_sectors; sector_t s = raid0_size(mddev, 0, 0);
sector_t space = conf->spacing; sector_t space = conf->spacing;
int round; int round;
conf->sector_shift = 0; conf->sector_shift = 0;
......
...@@ -2125,14 +2125,16 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors) ...@@ -2125,14 +2125,16 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
* worth it. * worth it.
*/ */
md_set_array_sectors(mddev, raid1_size(mddev, sectors, 0)); md_set_array_sectors(mddev, raid1_size(mddev, sectors, 0));
if (mddev->array_sectors > raid1_size(mddev, sectors, 0))
return -EINVAL;
set_capacity(mddev->gendisk, mddev->array_sectors); set_capacity(mddev->gendisk, mddev->array_sectors);
mddev->changed = 1; mddev->changed = 1;
if (mddev->array_sectors > mddev->dev_sectors && if (sectors > mddev->dev_sectors &&
mddev->recovery_cp == MaxSector) { mddev->recovery_cp == MaxSector) {
mddev->recovery_cp = mddev->dev_sectors; mddev->recovery_cp = mddev->dev_sectors;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
} }
mddev->dev_sectors = mddev->array_sectors; mddev->dev_sectors = sectors;
mddev->resync_max_sectors = sectors; mddev->resync_max_sectors = sectors;
return 0; return 0;
} }
......
...@@ -2194,7 +2194,7 @@ static int run(mddev_t *mddev) ...@@ -2194,7 +2194,7 @@ static int run(mddev_t *mddev)
* Ok, everything is just fine now * Ok, everything is just fine now
*/ */
md_set_array_sectors(mddev, raid10_size(mddev, 0, 0)); md_set_array_sectors(mddev, raid10_size(mddev, 0, 0));
mddev->resync_max_sectors = mddev->array_sectors; mddev->resync_max_sectors = raid10_size(mddev, 0, 0);
mddev->queue->unplug_fn = raid10_unplug; mddev->queue->unplug_fn = raid10_unplug;
mddev->queue->backing_dev_info.congested_fn = raid10_congested; mddev->queue->backing_dev_info.congested_fn = raid10_congested;
......
...@@ -3703,6 +3703,8 @@ static int make_request(struct request_queue *q, struct bio * bi) ...@@ -3703,6 +3703,8 @@ static int make_request(struct request_queue *q, struct bio * bi)
return 0; return 0;
} }
static sector_t raid5_size(mddev_t *mddev, sector_t sectors, int raid_disks);
static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped) static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped)
{ {
/* reshaping is quite different to recovery/resync so it is /* reshaping is quite different to recovery/resync so it is
...@@ -3781,7 +3783,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped ...@@ -3781,7 +3783,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
j == sh->qd_idx) j == sh->qd_idx)
continue; continue;
s = compute_blocknr(sh, j); s = compute_blocknr(sh, j);
if (s < mddev->array_sectors) { if (s < raid5_size(mddev, 0, 0)) {
skipped = 1; skipped = 1;
continue; continue;
} }
...@@ -4700,6 +4702,9 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) ...@@ -4700,6 +4702,9 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
sectors &= ~((sector_t)mddev->chunk_size/512 - 1); sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
md_set_array_sectors(mddev, raid5_size(mddev, sectors, md_set_array_sectors(mddev, raid5_size(mddev, sectors,
mddev->raid_disks)); mddev->raid_disks));
if (mddev->array_sectors >
raid5_size(mddev, sectors, mddev->raid_disks))
return -EINVAL;
set_capacity(mddev->gendisk, mddev->array_sectors); set_capacity(mddev->gendisk, mddev->array_sectors);
mddev->changed = 1; mddev->changed = 1;
if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) {
...@@ -4837,7 +4842,7 @@ static void end_reshape(raid5_conf_t *conf) ...@@ -4837,7 +4842,7 @@ static void end_reshape(raid5_conf_t *conf)
if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) {
mddev_t *mddev = conf->mddev; mddev_t *mddev = conf->mddev;
md_set_array_sectors(mddev, raid5_size(mddev, 0, md_set_array_sectors_lock(mddev, raid5_size(mddev, 0,
conf->raid_disks)); conf->raid_disks));
set_capacity(mddev->gendisk, mddev->array_sectors); set_capacity(mddev->gendisk, mddev->array_sectors);
mddev->changed = 1; mddev->changed = 1;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment