Commit 24f27632 authored by Dave Jones's avatar Dave Jones

[PATCH] LVM fixes.

Numerous LVM fixes & cleanups.
Gets things compiling again, and allegedly works according to feedback.
Mostly from Anders in absence of any maintainence by LVM people.

There are two critical problems with lvm in 2.5.2-pre11 (and earlier).
* ioctls on the chardev allocates a >4k lv_t on stack. if the ioctl is
LV_CREATE, LV_EXTEND, LV_REDUCE or LV_RENAME data is copied into this
and the task_struct will most certainly be corrupted.
* sizeof(lv_t) differs between user and kernelspace. The userspace version
of the lv_t structure is much smaller than the kernelspace version. This
leads to corruption of memory in the userspace application when an lv_t is
copied from kernelspace to userspace, as in "vgdisplay -v"

The following patch addresses these two issues. It puts the user-space
version of the lv_t into a substructure of the kernelspace version. When
communicating to userspace just the userlv_t part is used. This avoids the
allocation of the lv_t on the stack by allocation just a userlv_t instead.
parent 4d32c6ba
......@@ -170,11 +170,11 @@ static inline const char *_basename(const char *str) {
devfs_handle_t lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv) {
struct proc_dir_entry *pde;
const char *name = _basename(lv->lv_name);
const char *name = _basename(lv->u.lv_name);
lv_devfs_handle[minor(lv->lv_dev)] = devfs_register(
lv_devfs_handle[minor(lv->u.lv_dev)] = devfs_register(
vg_devfs_handle[vg_ptr->vg_number], name,
DEVFS_FL_DEFAULT, LVM_BLK_MAJOR, minor(lv->lv_dev),
DEVFS_FL_DEFAULT, LVM_BLK_MAJOR, minor(lv->u.lv_dev),
S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
&lvm_blk_dops, NULL);
......@@ -183,15 +183,15 @@ devfs_handle_t lvm_fs_create_lv(vg_t *vg_ptr, lv_t *lv) {
pde->read_proc = _proc_read_lv;
pde->data = lv;
}
return lv_devfs_handle[minor(lv->lv_dev)];
return lv_devfs_handle[minor(lv->u.lv_dev)];
}
void lvm_fs_remove_lv(vg_t *vg_ptr, lv_t *lv) {
devfs_unregister(lv_devfs_handle[minor(lv->lv_dev)]);
lv_devfs_handle[minor(lv->lv_dev)] = NULL;
devfs_unregister(lv_devfs_handle[minor(lv->u.lv_dev)]);
lv_devfs_handle[minor(lv->u.lv_dev)] = NULL;
if(vg_ptr->lv_subdir_pde) {
const char *name = _basename(lv->lv_name);
const char *name = _basename(lv->u.lv_name);
remove_proc_entry(name, vg_ptr->lv_subdir_pde);
}
}
......@@ -269,21 +269,21 @@ static int _proc_read_lv(char *page, char **start, off_t off,
int sz = 0;
lv_t *lv = data;
sz += sprintf(page + sz, "name: %s\n", lv->lv_name);
sz += sprintf(page + sz, "size: %u\n", lv->lv_size);
sz += sprintf(page + sz, "access: %u\n", lv->lv_access);
sz += sprintf(page + sz, "status: %u\n", lv->lv_status);
sz += sprintf(page + sz, "number: %u\n", lv->lv_number);
sz += sprintf(page + sz, "open: %u\n", lv->lv_open);
sz += sprintf(page + sz, "allocation: %u\n", lv->lv_allocation);
if(lv->lv_stripes > 1) {
sz += sprintf(page + sz, "name: %s\n", lv->u.lv_name);
sz += sprintf(page + sz, "size: %u\n", lv->u.lv_size);
sz += sprintf(page + sz, "access: %u\n", lv->u.lv_access);
sz += sprintf(page + sz, "status: %u\n", lv->u.lv_status);
sz += sprintf(page + sz, "number: %u\n", lv->u.lv_number);
sz += sprintf(page + sz, "open: %u\n", lv->u.lv_open);
sz += sprintf(page + sz, "allocation: %u\n", lv->u.lv_allocation);
if(lv->u.lv_stripes > 1) {
sz += sprintf(page + sz, "stripes: %u\n",
lv->lv_stripes);
lv->u.lv_stripes);
sz += sprintf(page + sz, "stripesize: %u\n",
lv->lv_stripesize);
lv->u.lv_stripesize);
}
sz += sprintf(page + sz, "device: %02u:%02u\n",
major(lv->lv_dev), minor(lv->lv_dev));
major(lv->u.lv_dev), minor(lv->u.lv_dev));
return sz;
}
......@@ -350,13 +350,13 @@ static int _proc_read_global(char *page, char **start, off_t pos, int count,
if (vg_ptr->lv_cur > 0) {
for (l = 0; l < vg[v]->lv_max; l++) {
if ((lv_ptr = vg_ptr->lv[l]) != NULL) {
pe_t_bytes += lv_ptr->lv_allocated_le;
pe_t_bytes += lv_ptr->u.lv_allocated_le;
hash_table_bytes += lv_ptr->lv_snapshot_hash_table_size;
if (lv_ptr->lv_block_exception != NULL)
lv_block_exception_t_bytes += lv_ptr->lv_remap_end;
if (lv_ptr->lv_open > 0) {
if (lv_ptr->u.lv_block_exception != NULL)
lv_block_exception_t_bytes += lv_ptr->u.lv_remap_end;
if (lv_ptr->u.lv_open > 0) {
lv_open_counter++;
lv_open_total += lv_ptr->lv_open;
lv_open_total += lv_ptr->u.lv_open;
}
}
}
......@@ -532,16 +532,16 @@ static int _lv_info(vg_t *vg_ptr, lv_t *lv_ptr, char *buf) {
char inactive_flag = 'A', allocation_flag = ' ',
stripes_flag = ' ', rw_flag = ' ', *basename;
if (!(lv_ptr->lv_status & LV_ACTIVE))
if (!(lv_ptr->u.lv_status & LV_ACTIVE))
inactive_flag = 'I';
rw_flag = 'R';
if (lv_ptr->lv_access & LV_WRITE)
if (lv_ptr->u.lv_access & LV_WRITE)
rw_flag = 'W';
allocation_flag = 'D';
if (lv_ptr->lv_allocation & LV_CONTIGUOUS)
if (lv_ptr->u.lv_allocation & LV_CONTIGUOUS)
allocation_flag = 'C';
stripes_flag = 'L';
if (lv_ptr->lv_stripes > 1)
if (lv_ptr->u.lv_stripes > 1)
stripes_flag = 'S';
sz += sprintf(buf+sz,
"[%c%c%c%c",
......@@ -549,29 +549,29 @@ static int _lv_info(vg_t *vg_ptr, lv_t *lv_ptr, char *buf) {
rw_flag,
allocation_flag,
stripes_flag);
if (lv_ptr->lv_stripes > 1)
if (lv_ptr->u.lv_stripes > 1)
sz += sprintf(buf+sz, "%-2d",
lv_ptr->lv_stripes);
lv_ptr->u.lv_stripes);
else
sz += sprintf(buf+sz, " ");
/* FIXME: use _basename */
basename = strrchr(lv_ptr->lv_name, '/');
if ( basename == 0) basename = lv_ptr->lv_name;
basename = strrchr(lv_ptr->u.lv_name, '/');
if ( basename == 0) basename = lv_ptr->u.lv_name;
else basename++;
sz += sprintf(buf+sz, "] %-25s", basename);
if (strlen(basename) > 25)
sz += sprintf(buf+sz,
"\n ");
sz += sprintf(buf+sz, "%9d /%-6d ",
lv_ptr->lv_size >> 1,
lv_ptr->lv_size / vg_ptr->pe_size);
lv_ptr->u.lv_size >> 1,
lv_ptr->u.lv_size / vg_ptr->pe_size);
if (lv_ptr->lv_open == 0)
if (lv_ptr->u.lv_open == 0)
sz += sprintf(buf+sz, "close");
else
sz += sprintf(buf+sz, "%dx open",
lv_ptr->lv_open);
lv_ptr->u.lv_open);
return sz;
}
......
......@@ -93,7 +93,7 @@ lvm_find_exception_table(kdev_t org_dev, unsigned long org_start, lv_t * lv)
{
struct list_head * hash_table = lv->lv_snapshot_hash_table, * next;
unsigned long mask = lv->lv_snapshot_hash_mask;
int chunk_size = lv->lv_chunk_size;
int chunk_size = lv->u.lv_chunk_size;
lv_block_exception_t * ret;
int i = 0;
......@@ -127,7 +127,7 @@ inline void lvm_hash_link(lv_block_exception_t * exception,
{
struct list_head * hash_table = lv->lv_snapshot_hash_table;
unsigned long mask = lv->lv_snapshot_hash_mask;
int chunk_size = lv->lv_chunk_size;
int chunk_size = lv->u.lv_chunk_size;
hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)];
list_add(&exception->hash, hash_table);
......@@ -139,7 +139,7 @@ int lvm_snapshot_remap_block(kdev_t * org_dev, unsigned long * org_sector,
int ret;
unsigned long pe_off, pe_adjustment, __org_start;
kdev_t __org_dev;
int chunk_size = lv->lv_chunk_size;
int chunk_size = lv->u.lv_chunk_size;
lv_block_exception_t * exception;
pe_off = pe_start % chunk_size;
......@@ -164,26 +164,26 @@ void lvm_drop_snapshot(vg_t *vg, lv_t *lv_snap, const char *reason)
/* no exception storage space available for this snapshot
or error on this snapshot --> release it */
invalidate_buffers(lv_snap->lv_dev);
invalidate_buffers(lv_snap->u.lv_dev);
/* wipe the snapshot since it's inconsistent now */
_disable_snapshot(vg, lv_snap);
last_dev = NODEV;
for (i = 0; i < lv_snap->lv_remap_ptr; i++) {
if ( !kdev_same(lv_snap->lv_block_exception[i].rdev_new,
for (i = 0; i < lv_snap->u.lv_remap_ptr; i++) {
if ( !kdev_same(lv_snap->u.lv_block_exception[i].rdev_new,
last_dev)) {
last_dev = lv_snap->lv_block_exception[i].rdev_new;
last_dev = lv_snap->u.lv_block_exception[i].rdev_new;
invalidate_buffers(last_dev);
}
}
lvm_snapshot_release(lv_snap);
lv_snap->lv_status &= ~LV_ACTIVE;
lv_snap->u.lv_status &= ~LV_ACTIVE;
printk(KERN_INFO
"%s -- giving up to snapshot %s on %s: %s\n",
lvm_name, lv_snap->lv_snapshot_org->lv_name, lv_snap->lv_name,
lvm_name, lv_snap->u.lv_snapshot_org->u.lv_name, lv_snap->u.lv_name,
reason);
}
......@@ -234,7 +234,7 @@ static inline void invalidate_snap_cache(unsigned long start, unsigned long nr,
int lvm_snapshot_fill_COW_page(vg_t * vg, lv_t * lv_snap)
{
uint pvn;
int id = 0, is = lv_snap->lv_remap_ptr;
int id = 0, is = lv_snap->u.lv_remap_ptr;
ulong blksize_snap;
lv_COW_table_disk_t * lv_COW_table = (lv_COW_table_disk_t *)
page_address(lv_snap->lv_COW_table_iobuf->maplist[0]);
......@@ -244,13 +244,13 @@ int lvm_snapshot_fill_COW_page(vg_t * vg, lv_t * lv_snap)
is--;
blksize_snap =
block_size(lv_snap->lv_block_exception[is].rdev_new);
block_size(lv_snap->u.lv_block_exception[is].rdev_new);
is -= is % (blksize_snap / sizeof(lv_COW_table_disk_t));
memset(lv_COW_table, 0, blksize_snap);
for ( ; is < lv_snap->lv_remap_ptr; is++, id++) {
for ( ; is < lv_snap->u.lv_remap_ptr; is++, id++) {
/* store new COW_table entry */
lv_block_exception_t *be = lv_snap->lv_block_exception + is;
lv_block_exception_t *be = lv_snap->u.lv_block_exception + is;
if(_pv_get_number(vg, be->rdev_org, &pvn))
goto bad;
......@@ -281,7 +281,7 @@ int lvm_write_COW_table_block(vg_t * vg, lv_t *lv_snap)
int r;
const char *err;
if((r = _write_COW_table_block(vg, lv_snap,
lv_snap->lv_remap_ptr - 1, &err)))
lv_snap->u.lv_remap_ptr - 1, &err)))
lvm_drop_snapshot(vg, lv_snap, err);
return r;
}
......@@ -303,13 +303,13 @@ int lvm_snapshot_COW(kdev_t org_phys_dev,
const char * reason;
kdev_t snap_phys_dev;
unsigned long org_start, snap_start, virt_start, pe_off;
int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size;
int idx = lv_snap->u.lv_remap_ptr, chunk_size = lv_snap->u.lv_chunk_size;
struct kiobuf * iobuf;
int blksize_snap, blksize_org, min_blksize, max_blksize;
int max_sectors, nr_sectors;
/* check if we are out of snapshot space */
if (idx >= lv_snap->lv_remap_end)
if (idx >= lv_snap->u.lv_remap_end)
goto fail_out_of_space;
/* calculate physical boundaries of source chunk */
......@@ -318,8 +318,8 @@ int lvm_snapshot_COW(kdev_t org_phys_dev,
virt_start = org_virt_sector - (org_phys_sector - org_start);
/* calculate physical boundaries of destination chunk */
snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
snap_start = lv_snap->lv_block_exception[idx].rsector_new;
snap_phys_dev = lv_snap->u.lv_block_exception[idx].rdev_new;
snap_start = lv_snap->u.lv_block_exception[idx].rsector_new;
#ifdef DEBUG_SNAPSHOT
printk(KERN_INFO
......@@ -371,20 +371,20 @@ int lvm_snapshot_COW(kdev_t org_phys_dev,
#ifdef DEBUG_SNAPSHOT
/* invalidate the logical snapshot buffer cache */
invalidate_snap_cache(virt_start, lv_snap->lv_chunk_size,
lv_snap->lv_dev);
invalidate_snap_cache(virt_start, lv_snap->u.lv_chunk_size,
lv_snap->u.lv_dev);
#endif
/* the original chunk is now stored on the snapshot volume
so update the execption table */
lv_snap->lv_block_exception[idx].rdev_org = org_phys_dev;
lv_snap->lv_block_exception[idx].rsector_org = org_start;
lv_snap->u.lv_block_exception[idx].rdev_org = org_phys_dev;
lv_snap->u.lv_block_exception[idx].rsector_org = org_start;
lvm_hash_link(lv_snap->lv_block_exception + idx,
lvm_hash_link(lv_snap->u.lv_block_exception + idx,
org_phys_dev, org_start, lv_snap);
lv_snap->lv_remap_ptr = idx + 1;
lv_snap->u.lv_remap_ptr = idx + 1;
if (lv_snap->lv_snapshot_use_rate > 0) {
if (lv_snap->lv_remap_ptr * 100 / lv_snap->lv_remap_end >= lv_snap->lv_snapshot_use_rate)
if (lv_snap->u.lv_remap_ptr * 100 / lv_snap->u.lv_remap_end >= lv_snap->lv_snapshot_use_rate)
wake_up_interruptible(&lv_snap->lv_snapshot_wait);
}
return 0;
......@@ -462,7 +462,7 @@ int lvm_snapshot_alloc_hash_table(lv_t * lv)
unsigned long buckets, max_buckets, size;
struct list_head * hash;
buckets = lv->lv_remap_end;
buckets = lv->u.lv_remap_end;
max_buckets = calc_max_buckets();
buckets = min(buckets, max_buckets);
while (buckets & (buckets-1))
......@@ -531,10 +531,10 @@ int lvm_snapshot_alloc(lv_t * lv_snap)
void lvm_snapshot_release(lv_t * lv)
{
if (lv->lv_block_exception)
if (lv->u.lv_block_exception)
{
vfree(lv->lv_block_exception);
lv->lv_block_exception = NULL;
vfree(lv->u.lv_block_exception);
lv->u.lv_block_exception = NULL;
}
if (lv->lv_snapshot_hash_table)
{
......@@ -578,8 +578,8 @@ static int _write_COW_table_block(vg_t *vg, lv_t *lv_snap,
COW_entries_per_pe = LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv_snap);
/* get physical addresse of destination chunk */
snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
snap_phys_dev = lv_snap->u.lv_block_exception[idx].rdev_new;
snap_pe_start = lv_snap->u.lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->u.lv_chunk_size;
blksize_snap = block_size(snap_phys_dev);
......@@ -595,7 +595,7 @@ static int _write_COW_table_block(vg_t *vg, lv_t *lv_snap,
blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10);
/* store new COW_table entry */
be = lv_snap->lv_block_exception + idx;
be = lv_snap->u.lv_block_exception + idx;
if(_pv_get_number(vg, be->rdev_org, &pvn))
goto fail_pv_get_number;
......@@ -620,15 +620,15 @@ static int _write_COW_table_block(vg_t *vg, lv_t *lv_snap,
if (idx_COW_table % COW_entries_per_block == COW_entries_per_block - 1 || end_of_table)
{
/* don't go beyond the end */
if (idx + 1 >= lv_snap->lv_remap_end) goto out;
if (idx + 1 >= lv_snap->u.lv_remap_end) goto out;
memset(lv_COW_table, 0, blksize_snap);
if (end_of_table)
{
idx++;
snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
snap_phys_dev = lv_snap->u.lv_block_exception[idx].rdev_new;
snap_pe_start = lv_snap->u.lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->u.lv_chunk_size;
blksize_snap = block_size(snap_phys_dev);
blocks[0] = snap_pe_start >> (blksize_snap >> 10);
} else blocks[0]++;
......@@ -664,7 +664,7 @@ static int _write_COW_table_block(vg_t *vg, lv_t *lv_snap,
static void _disable_snapshot(vg_t *vg, lv_t *lv) {
const char *err;
lv->lv_block_exception[0].rsector_org = LVM_SNAPSHOT_DROPPED_SECTOR;
lv->u.lv_block_exception[0].rsector_org = LVM_SNAPSHOT_DROPPED_SECTOR;
if(_write_COW_table_block(vg, lv, 0, &err) < 0) {
printk(KERN_ERR "%s -- couldn't disable snapshot: %s\n",
lvm_name, err);
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment