ubd_kern.c 34.5 KB
Newer Older
1
/*
Linus Torvalds's avatar
Linus Torvalds committed
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
 * Licensed under the GPL
 */

/* 2001-09-28...2002-04-17
 * Partition stuff by James_McMechan@hotmail.com
 * old style ubd by setting UBD_SHIFT to 0
 * 2002-09-27...2002-10-18 massive tinkering for 2.5
 * partitions have changed in 2.5
 * 2003-01-29 more tinkering for 2.5.59-1
 * This should now address the sysfs problems and has
 * the symlink for devfs to allow for booting with
 * the common /dev/ubd/discX/... names rather than
 * only /dev/ubdN/discN this version also has lots of
 * clean ups preparing for ubd-many.
 * James McMechan
 */

#define UBD_SHIFT 4

Al Viro's avatar
Al Viro committed
22 23 24 25 26 27 28 29 30 31 32 33 34 35
#include <linux/module.h>
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/ata.h>
#include <linux/hdreg.h>
#include <linux/cdrom.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/ctype.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/platform_device.h>
#include <linux/scatterlist.h>
#include <asm/tlbflush.h>
36
#include <kern_util.h>
Linus Torvalds's avatar
Linus Torvalds committed
37
#include "mconsole_kern.h"
38 39
#include <init.h>
#include <irq_kern.h>
Al Viro's avatar
Al Viro committed
40
#include "ubd.h"
41
#include <os.h>
Linus Torvalds's avatar
Linus Torvalds committed
42 43
#include "cow.h"

44
enum ubd_req { UBD_READ, UBD_WRITE, UBD_FLUSH };
Linus Torvalds's avatar
Linus Torvalds committed
45 46

struct io_thread_req {
47
	struct request *req;
48
	enum ubd_req op;
Linus Torvalds's avatar
Linus Torvalds committed
49 50 51 52 53 54
	int fds[2];
	unsigned long offsets[2];
	unsigned long long offset;
	unsigned long length;
	char *buffer;
	int sectorsize;
55 56 57
	unsigned long sector_mask;
	unsigned long long cow_offset;
	unsigned long bitmap_words[2];
Linus Torvalds's avatar
Linus Torvalds committed
58 59 60
	int error;
};

61
static inline int ubd_test_bit(__u64 bit, unsigned char *data)
Linus Torvalds's avatar
Linus Torvalds committed
62 63 64 65
{
	__u64 n;
	int bits, off;

66
	bits = sizeof(data[0]) * 8;
Linus Torvalds's avatar
Linus Torvalds committed
67 68
	n = bit / bits;
	off = bit % bits;
69
	return (data[n] & (1 << off)) != 0;
Linus Torvalds's avatar
Linus Torvalds committed
70 71
}

72
static inline void ubd_set_bit(__u64 bit, unsigned char *data)
Linus Torvalds's avatar
Linus Torvalds committed
73 74 75 76
{
	__u64 n;
	int bits, off;

77
	bits = sizeof(data[0]) * 8;
Linus Torvalds's avatar
Linus Torvalds committed
78 79
	n = bit / bits;
	off = bit % bits;
80
	data[n] |= (1 << off);
Linus Torvalds's avatar
Linus Torvalds committed
81 82 83 84 85
}
/*End stuff from ubd_user.h*/

#define DRIVER_NAME "uml-blkdev"

86
static DEFINE_MUTEX(ubd_lock);
Arnd Bergmann's avatar
Arnd Bergmann committed
87
static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
Linus Torvalds's avatar
Linus Torvalds committed
88

Al Viro's avatar
Al Viro committed
89
static int ubd_open(struct block_device *bdev, fmode_t mode);
90
static void ubd_release(struct gendisk *disk, fmode_t mode);
Al Viro's avatar
Al Viro committed
91
static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
Linus Torvalds's avatar
Linus Torvalds committed
92
		     unsigned int cmd, unsigned long arg);
93
static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
Linus Torvalds's avatar
Linus Torvalds committed
94

95
#define MAX_DEV (16)
Linus Torvalds's avatar
Linus Torvalds committed
96

97
static const struct block_device_operations ubd_blops = {
Linus Torvalds's avatar
Linus Torvalds committed
98
        .owner		= THIS_MODULE,
Al Viro's avatar
Al Viro committed
99 100 101
        .open		= ubd_open,
        .release	= ubd_release,
        .ioctl		= ubd_ioctl,
102
	.getgeo		= ubd_getgeo,
Linus Torvalds's avatar
Linus Torvalds committed
103 104 105
};

/* Protected by ubd_lock */
106
static int fake_major = UBD_MAJOR;
Linus Torvalds's avatar
Linus Torvalds committed
107 108
static struct gendisk *ubd_gendisk[MAX_DEV];
static struct gendisk *fake_gendisk[MAX_DEV];
109

Linus Torvalds's avatar
Linus Torvalds committed
110 111 112 113 114 115 116 117 118 119
#ifdef CONFIG_BLK_DEV_UBD_SYNC
#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
					 .cl = 1 })
#else
#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
					 .cl = 1 })
#endif
static struct openflags global_openflags = OPEN_FLAGS;

struct cow {
120
	/* backing file name */
Linus Torvalds's avatar
Linus Torvalds committed
121
	char *file;
122
	/* backing file fd */
Linus Torvalds's avatar
Linus Torvalds committed
123 124 125 126
	int fd;
	unsigned long *bitmap;
	unsigned long bitmap_len;
	int bitmap_offset;
127
	int data_offset;
Linus Torvalds's avatar
Linus Torvalds committed
128 129
};

Jeff Dike's avatar
Jeff Dike committed
130 131
#define MAX_SG 64

Linus Torvalds's avatar
Linus Torvalds committed
132
struct ubd {
Jeff Dike's avatar
Jeff Dike committed
133
	struct list_head restart;
134 135
	/* name (and fd, below) of the file opened for writing, either the
	 * backing or the cow file. */
Linus Torvalds's avatar
Linus Torvalds committed
136 137 138 139 140 141
	char *file;
	int count;
	int fd;
	__u64 size;
	struct openflags boot_openflags;
	struct openflags openflags;
142 143
	unsigned shared:1;
	unsigned no_cow:1;
Linus Torvalds's avatar
Linus Torvalds committed
144 145
	struct cow cow;
	struct platform_device pdev;
146 147
	struct request_queue *queue;
	spinlock_t lock;
Jeff Dike's avatar
Jeff Dike committed
148 149 150
	struct scatterlist sg[MAX_SG];
	struct request *request;
	int start_sg, end_sg;
151
	sector_t rq_pos;
Linus Torvalds's avatar
Linus Torvalds committed
152 153 154 155
};

#define DEFAULT_COW { \
	.file =			NULL, \
156 157
	.fd =			-1,	\
	.bitmap =		NULL, \
Linus Torvalds's avatar
Linus Torvalds committed
158
	.bitmap_offset =	0, \
159
	.data_offset =		0, \
Linus Torvalds's avatar
Linus Torvalds committed
160 161 162 163 164 165 166 167 168
}

#define DEFAULT_UBD { \
	.file = 		NULL, \
	.count =		0, \
	.fd =			-1, \
	.size =			-1, \
	.boot_openflags =	OPEN_FLAGS, \
	.openflags =		OPEN_FLAGS, \
169
	.no_cow =               0, \
170
	.shared =		0, \
171
	.cow =			DEFAULT_COW, \
172
	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
Jeff Dike's avatar
Jeff Dike committed
173 174 175
	.request =		NULL, \
	.start_sg =		0, \
	.end_sg =		0, \
176
	.rq_pos =		0, \
Linus Torvalds's avatar
Linus Torvalds committed
177 178
}

179
/* Protected by ubd_lock */
180
static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
Linus Torvalds's avatar
Linus Torvalds committed
181 182 183 184 185 186 187 188 189 190 191 192

/* Only changed by fake_ide_setup which is a setup */
static int fake_ide = 0;
static struct proc_dir_entry *proc_ide_root = NULL;
static struct proc_dir_entry *proc_ide = NULL;

static void make_proc_ide(void)
{
	proc_ide_root = proc_mkdir("ide", NULL);
	proc_ide = proc_mkdir("ide0", proc_ide_root);
}

193
static int fake_ide_media_proc_show(struct seq_file *m, void *v)
Linus Torvalds's avatar
Linus Torvalds committed
194
{
195 196 197 198 199 200 201
	seq_puts(m, "disk\n");
	return 0;
}

static int fake_ide_media_proc_open(struct inode *inode, struct file *file)
{
	return single_open(file, fake_ide_media_proc_show, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
202 203
}

204 205 206 207 208 209 210 211
static const struct file_operations fake_ide_media_proc_fops = {
	.owner		= THIS_MODULE,
	.open		= fake_ide_media_proc_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
};

WANG Cong's avatar
WANG Cong committed
212
static void make_ide_entries(const char *dev_name)
Linus Torvalds's avatar
Linus Torvalds committed
213 214 215 216 217 218 219 220 221
{
	struct proc_dir_entry *dir, *ent;
	char name[64];

	if(proc_ide_root == NULL) make_proc_ide();

	dir = proc_mkdir(dev_name, proc_ide);
	if(!dir) return;

222
	ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops);
Linus Torvalds's avatar
Linus Torvalds committed
223
	if(!ent) return;
WANG Cong's avatar
WANG Cong committed
224
	snprintf(name, sizeof(name), "ide0/%s", dev_name);
Linus Torvalds's avatar
Linus Torvalds committed
225 226 227 228 229 230
	proc_symlink(dev_name, proc_ide_root, name);
}

static int fake_ide_setup(char *str)
{
	fake_ide = 1;
231
	return 1;
Linus Torvalds's avatar
Linus Torvalds committed
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
}

__setup("fake_ide", fake_ide_setup);

__uml_help(fake_ide_setup,
"fake_ide\n"
"    Create ide0 entries that map onto ubd devices.\n\n"
);

static int parse_unit(char **ptr)
{
	char *str = *ptr, *end;
	int n = -1;

	if(isdigit(*str)) {
		n = simple_strtoul(str, &end, 0);
		if(end == str)
249
			return -1;
Linus Torvalds's avatar
Linus Torvalds committed
250 251
		*ptr = end;
	}
252
	else if (('a' <= *str) && (*str <= 'z')) {
Linus Torvalds's avatar
Linus Torvalds committed
253 254 255 256
		n = *str - 'a';
		str++;
		*ptr = str;
	}
257
	return n;
Linus Torvalds's avatar
Linus Torvalds committed
258 259
}

260 261 262 263
/* If *index_out == -1 at exit, the passed option was a general one;
 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
 * should not be freed on exit.
 */
264
static int ubd_setup_common(char *str, int *index_out, char **error_out)
Linus Torvalds's avatar
Linus Torvalds committed
265
{
266
	struct ubd *ubd_dev;
Linus Torvalds's avatar
Linus Torvalds committed
267 268
	struct openflags flags = global_openflags;
	char *backing_file;
269
	int n, err = 0, i;
Linus Torvalds's avatar
Linus Torvalds committed
270 271 272 273 274 275 276 277 278 279

	if(index_out) *index_out = -1;
	n = *str;
	if(n == '='){
		char *end;
		int major;

		str++;
		if(!strcmp(str, "sync")){
			global_openflags = of_sync(global_openflags);
280
			goto out1;
Linus Torvalds's avatar
Linus Torvalds committed
281
		}
282 283

		err = -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
284 285
		major = simple_strtoul(str, &end, 0);
		if((*end != '\0') || (end == str)){
286
			*error_out = "Didn't parse major number";
287
			goto out1;
Linus Torvalds's avatar
Linus Torvalds committed
288 289
		}

290
		mutex_lock(&ubd_lock);
291
		if (fake_major != UBD_MAJOR) {
292 293 294
			*error_out = "Can't assign a fake major twice";
			goto out1;
		}
295

296
		fake_major = major;
Linus Torvalds's avatar
Linus Torvalds committed
297 298 299

		printk(KERN_INFO "Setting extra ubd major number to %d\n",
		       major);
300 301 302 303
		err = 0;
	out1:
		mutex_unlock(&ubd_lock);
		return err;
Linus Torvalds's avatar
Linus Torvalds committed
304 305 306 307
	}

	n = parse_unit(&str);
	if(n < 0){
308 309
		*error_out = "Couldn't parse device number";
		return -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
310 311
	}
	if(n >= MAX_DEV){
312 313
		*error_out = "Device number out of range";
		return 1;
Linus Torvalds's avatar
Linus Torvalds committed
314 315
	}

316
	err = -EBUSY;
317
	mutex_lock(&ubd_lock);
Linus Torvalds's avatar
Linus Torvalds committed
318

319 320
	ubd_dev = &ubd_devs[n];
	if(ubd_dev->file != NULL){
321
		*error_out = "Device is already configured";
Linus Torvalds's avatar
Linus Torvalds committed
322 323 324 325 326 327
		goto out;
	}

	if (index_out)
		*index_out = n;

328
	err = -EINVAL;
329
	for (i = 0; i < sizeof("rscd="); i++) {
Linus Torvalds's avatar
Linus Torvalds committed
330 331 332 333 334 335 336 337
		switch (*str) {
		case 'r':
			flags.w = 0;
			break;
		case 's':
			flags.s = 1;
			break;
		case 'd':
338
			ubd_dev->no_cow = 1;
Linus Torvalds's avatar
Linus Torvalds committed
339
			break;
340
		case 'c':
341
			ubd_dev->shared = 1;
342
			break;
Linus Torvalds's avatar
Linus Torvalds committed
343 344 345 346
		case '=':
			str++;
			goto break_loop;
		default:
347 348
			*error_out = "Expected '=' or flag letter "
				"(r, s, c, or d)";
Linus Torvalds's avatar
Linus Torvalds committed
349 350 351 352 353
			goto out;
		}
		str++;
	}

354 355 356 357
	if (*str == '=')
		*error_out = "Too many flags specified";
	else
		*error_out = "Missing '='";
Linus Torvalds's avatar
Linus Torvalds committed
358 359 360 361 362
	goto out;

break_loop:
	backing_file = strchr(str, ',');

363
	if (backing_file == NULL)
Linus Torvalds's avatar
Linus Torvalds committed
364 365
		backing_file = strchr(str, ':');

366 367 368 369 370
	if(backing_file != NULL){
		if(ubd_dev->no_cow){
			*error_out = "Can't specify both 'd' and a cow file";
			goto out;
		}
Linus Torvalds's avatar
Linus Torvalds committed
371 372 373 374 375
		else {
			*backing_file = '\0';
			backing_file++;
		}
	}
376
	err = 0;
377 378 379
	ubd_dev->file = str;
	ubd_dev->cow.file = backing_file;
	ubd_dev->boot_openflags = flags;
Linus Torvalds's avatar
Linus Torvalds committed
380
out:
381
	mutex_unlock(&ubd_lock);
382
	return err;
Linus Torvalds's avatar
Linus Torvalds committed
383 384 385 386
}

static int ubd_setup(char *str)
{
387 388 389 390 391 392 393 394
	char *error;
	int err;

	err = ubd_setup_common(str, NULL, &error);
	if(err)
		printk(KERN_ERR "Failed to initialize device with \"%s\" : "
		       "%s\n", str, error);
	return 1;
Linus Torvalds's avatar
Linus Torvalds committed
395 396 397 398 399 400 401 402 403 404 405
}

__setup("ubd", ubd_setup);
__uml_help(ubd_setup,
"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
"    This is used to associate a device with a file in the underlying\n"
"    filesystem. When specifying two filenames, the first one is the\n"
"    COW name and the second is the backing file name. As separator you can\n"
"    use either a ':' or a ',': the first one allows writing things like;\n"
"	ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
"    while with a ',' the shell would not expand the 2nd '~'.\n"
406
"    When using only one filename, UML will detect whether to treat it like\n"
Linus Torvalds's avatar
Linus Torvalds committed
407 408 409 410 411 412 413 414 415 416
"    a COW file or a backing file. To override this detection, add the 'd'\n"
"    flag:\n"
"	ubd0d=BackingFile\n"
"    Usually, there is a filesystem in the file, but \n"
"    that's not required. Swap devices containing swap files can be\n"
"    specified like this. Also, a file which doesn't contain a\n"
"    filesystem can have its contents read in the virtual \n"
"    machine by running 'dd' on the device. <n> must be in the range\n"
"    0 to 7. Appending an 'r' to the number will cause that device\n"
"    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
Jeff Dike's avatar
Jeff Dike committed
417 418 419 420
"    an 's' will cause data to be written to disk on the host immediately.\n"
"    'c' will cause the device to be treated as being shared between multiple\n"
"    UMLs and file locking will be turned off - this is appropriate for a\n"
"    cluster filesystem and inappropriate at almost all other times.\n\n"
Linus Torvalds's avatar
Linus Torvalds committed
421 422
);

Jeff Dike's avatar
Jeff Dike committed
423
static int udb_setup(char *str)
Linus Torvalds's avatar
Linus Torvalds committed
424 425 426
{
	printk("udb%s specified on command line is almost certainly a ubd -> "
	       "udb TYPO\n", str);
427
	return 1;
Linus Torvalds's avatar
Linus Torvalds committed
428 429 430 431 432
}

__setup("udb", udb_setup);
__uml_help(udb_setup,
"udb\n"
433 434 435
"    This option is here solely to catch ubd -> udb typos, which can be\n"
"    to impossible to catch visually unless you specifically look for\n"
"    them.  The only result of any option starting with 'udb' is an error\n"
Linus Torvalds's avatar
Linus Torvalds committed
436 437 438
"    in the boot output.\n\n"
);

439
static void do_ubd_request(struct request_queue * q);
440 441

/* Only changed by ubd_init, which is an initcall. */
442
static int thread_fd = -1;
Jeff Dike's avatar
Jeff Dike committed
443 444
static LIST_HEAD(restart);

445
/* XXX - move this inside ubd_intr. */
446
/* Called without dev->lock held, and only in interrupt context. */
447
static void ubd_handler(void)
Linus Torvalds's avatar
Linus Torvalds committed
448
{
449
	struct io_thread_req *req;
Jeff Dike's avatar
Jeff Dike committed
450 451 452
	struct ubd *ubd;
	struct list_head *list, *next_ele;
	unsigned long flags;
453 454
	int n;

Jeff Dike's avatar
Jeff Dike committed
455
	while(1){
456 457
		n = os_read_file(thread_fd, &req,
				 sizeof(struct io_thread_req *));
Jeff Dike's avatar
Jeff Dike committed
458 459 460 461 462 463 464
		if(n != sizeof(req)){
			if(n == -EAGAIN)
				break;
			printk(KERN_ERR "spurious interrupt in ubd_handler, "
			       "err = %d\n", -n);
			return;
		}
465

Tejun Heo's avatar
Tejun Heo committed
466
		blk_end_request(req->req, 0, req->length);
467
		kfree(req);
Jeff Dike's avatar
Jeff Dike committed
468
	}
469
	reactivate_fd(thread_fd, UBD_IRQ);
Jeff Dike's avatar
Jeff Dike committed
470 471 472 473 474 475 476 477

	list_for_each_safe(list, next_ele, &restart){
		ubd = container_of(list, struct ubd, restart);
		list_del_init(&ubd->restart);
		spin_lock_irqsave(&ubd->lock, flags);
		do_ubd_request(ubd->queue);
		spin_unlock_irqrestore(&ubd->lock, flags);
	}
Linus Torvalds's avatar
Linus Torvalds committed
478 479
}

Al Viro's avatar
Al Viro committed
480
static irqreturn_t ubd_intr(int irq, void *dev)
Linus Torvalds's avatar
Linus Torvalds committed
481
{
482
	ubd_handler();
483
	return IRQ_HANDLED;
484
}
485

486 487
/* Only changed by ubd_init, which is an initcall. */
static int io_pid = -1;
488

489
static void kill_io_thread(void)
490
{
491
	if(io_pid != -1)
492
		os_kill_process(io_pid, 1);
493
}
Linus Torvalds's avatar
Linus Torvalds committed
494

495 496
__uml_exitcall(kill_io_thread);

497
static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
Linus Torvalds's avatar
Linus Torvalds committed
498 499
{
	char *file;
Richard Weinberger's avatar
Richard Weinberger committed
500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516
	int fd;
	int err;

	__u32 version;
	__u32 align;
	char *backing_file;
	time_t mtime;
	unsigned long long size;
	int sector_size;
	int bitmap_offset;

	if (ubd_dev->file && ubd_dev->cow.file) {
		file = ubd_dev->cow.file;

		goto out;
	}

517
	fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
Richard Weinberger's avatar
Richard Weinberger committed
518 519 520 521 522 523
	if (fd < 0)
		return fd;

	err = read_cow_header(file_reader, &fd, &version, &backing_file, \
		&mtime, &size, &sector_size, &align, &bitmap_offset);
	os_close_file(fd);
Linus Torvalds's avatar
Linus Torvalds committed
524

Richard Weinberger's avatar
Richard Weinberger committed
525 526 527 528 529 530
	if(err == -EINVAL)
		file = ubd_dev->file;
	else
		file = backing_file;

out:
531
	return os_file_size(file, size_out);
Linus Torvalds's avatar
Linus Torvalds committed
532 533
}

534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719
static int read_cow_bitmap(int fd, void *buf, int offset, int len)
{
	int err;

	err = os_seek_file(fd, offset);
	if (err < 0)
		return err;

	err = os_read_file(fd, buf, len);
	if (err < 0)
		return err;

	return 0;
}

static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
{
	unsigned long modtime;
	unsigned long long actual;
	int err;

	err = os_file_modtime(file, &modtime);
	if (err < 0) {
		printk(KERN_ERR "Failed to get modification time of backing "
		       "file \"%s\", err = %d\n", file, -err);
		return err;
	}

	err = os_file_size(file, &actual);
	if (err < 0) {
		printk(KERN_ERR "Failed to get size of backing file \"%s\", "
		       "err = %d\n", file, -err);
		return err;
	}

	if (actual != size) {
		/*__u64 can be a long on AMD64 and with %lu GCC complains; so
		 * the typecast.*/
		printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
		       "vs backing file\n", (unsigned long long) size, actual);
		return -EINVAL;
	}
	if (modtime != mtime) {
		printk(KERN_ERR "mtime mismatch (%ld vs %ld) of COW header vs "
		       "backing file\n", mtime, modtime);
		return -EINVAL;
	}
	return 0;
}

static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
{
	struct uml_stat buf1, buf2;
	int err;

	if (from_cmdline == NULL)
		return 0;
	if (!strcmp(from_cmdline, from_cow))
		return 0;

	err = os_stat_file(from_cmdline, &buf1);
	if (err < 0) {
		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
		       -err);
		return 0;
	}
	err = os_stat_file(from_cow, &buf2);
	if (err < 0) {
		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
		       -err);
		return 1;
	}
	if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
		return 0;

	printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
	       "\"%s\" specified in COW header of \"%s\"\n",
	       from_cmdline, from_cow, cow);
	return 1;
}

static int open_ubd_file(char *file, struct openflags *openflags, int shared,
		  char **backing_file_out, int *bitmap_offset_out,
		  unsigned long *bitmap_len_out, int *data_offset_out,
		  int *create_cow_out)
{
	time_t mtime;
	unsigned long long size;
	__u32 version, align;
	char *backing_file;
	int fd, err, sectorsize, asked_switch, mode = 0644;

	fd = os_open_file(file, *openflags, mode);
	if (fd < 0) {
		if ((fd == -ENOENT) && (create_cow_out != NULL))
			*create_cow_out = 1;
		if (!openflags->w ||
		    ((fd != -EROFS) && (fd != -EACCES)))
			return fd;
		openflags->w = 0;
		fd = os_open_file(file, *openflags, mode);
		if (fd < 0)
			return fd;
	}

	if (shared)
		printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
	else {
		err = os_lock_file(fd, openflags->w);
		if (err < 0) {
			printk(KERN_ERR "Failed to lock '%s', err = %d\n",
			       file, -err);
			goto out_close;
		}
	}

	/* Successful return case! */
	if (backing_file_out == NULL)
		return fd;

	err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
			      &size, &sectorsize, &align, bitmap_offset_out);
	if (err && (*backing_file_out != NULL)) {
		printk(KERN_ERR "Failed to read COW header from COW file "
		       "\"%s\", errno = %d\n", file, -err);
		goto out_close;
	}
	if (err)
		return fd;

	asked_switch = path_requires_switch(*backing_file_out, backing_file,
					    file);

	/* Allow switching only if no mismatch. */
	if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
						   mtime)) {
		printk(KERN_ERR "Switching backing file to '%s'\n",
		       *backing_file_out);
		err = write_cow_header(file, fd, *backing_file_out,
				       sectorsize, align, &size);
		if (err) {
			printk(KERN_ERR "Switch failed, errno = %d\n", -err);
			goto out_close;
		}
	} else {
		*backing_file_out = backing_file;
		err = backing_file_mismatch(*backing_file_out, size, mtime);
		if (err)
			goto out_close;
	}

	cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
		  bitmap_len_out, data_offset_out);

	return fd;
 out_close:
	os_close_file(fd);
	return err;
}

static int create_cow_file(char *cow_file, char *backing_file,
		    struct openflags flags,
		    int sectorsize, int alignment, int *bitmap_offset_out,
		    unsigned long *bitmap_len_out, int *data_offset_out)
{
	int err, fd;

	flags.c = 1;
	fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
	if (fd < 0) {
		err = fd;
		printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
		       cow_file, -err);
		goto out;
	}

	err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
			    bitmap_offset_out, bitmap_len_out,
			    data_offset_out);
	if (!err)
		return fd;
	os_close_file(fd);
 out:
	return err;
}

720
static void ubd_close_dev(struct ubd *ubd_dev)
Linus Torvalds's avatar
Linus Torvalds committed
721
{
722 723
	os_close_file(ubd_dev->fd);
	if(ubd_dev->cow.file == NULL)
Linus Torvalds's avatar
Linus Torvalds committed
724 725
		return;

726 727 728
	os_close_file(ubd_dev->cow.fd);
	vfree(ubd_dev->cow.bitmap);
	ubd_dev->cow.bitmap = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
729 730
}

731
static int ubd_open_dev(struct ubd *ubd_dev)
Linus Torvalds's avatar
Linus Torvalds committed
732 733 734 735
{
	struct openflags flags;
	char **back_ptr;
	int err, create_cow, *create_ptr;
736
	int fd;
Linus Torvalds's avatar
Linus Torvalds committed
737

738
	ubd_dev->openflags = ubd_dev->boot_openflags;
Linus Torvalds's avatar
Linus Torvalds committed
739
	create_cow = 0;
740 741
	create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
	back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
742 743

	fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
744 745
				back_ptr, &ubd_dev->cow.bitmap_offset,
				&ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
746
				create_ptr);
Linus Torvalds's avatar
Linus Torvalds committed
747

748 749
	if((fd == -ENOENT) && create_cow){
		fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
750 751 752 753
					  ubd_dev->openflags, 1 << 9, PAGE_SIZE,
					  &ubd_dev->cow.bitmap_offset,
					  &ubd_dev->cow.bitmap_len,
					  &ubd_dev->cow.data_offset);
754
		if(fd >= 0){
Linus Torvalds's avatar
Linus Torvalds committed
755
			printk(KERN_INFO "Creating \"%s\" as COW file for "
756
			       "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
Linus Torvalds's avatar
Linus Torvalds committed
757 758 759
		}
	}

760
	if(fd < 0){
761
		printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
762 763
		       -fd);
		return fd;
Linus Torvalds's avatar
Linus Torvalds committed
764
	}
765
	ubd_dev->fd = fd;
Linus Torvalds's avatar
Linus Torvalds committed
766

767
	if(ubd_dev->cow.file != NULL){
768
		blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
769

Linus Torvalds's avatar
Linus Torvalds committed
770
		err = -ENOMEM;
771
		ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
772
		if(ubd_dev->cow.bitmap == NULL){
Linus Torvalds's avatar
Linus Torvalds committed
773 774 775 776 777
			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
			goto error;
		}
		flush_tlb_kernel_vm();

778 779 780
		err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
				      ubd_dev->cow.bitmap_offset,
				      ubd_dev->cow.bitmap_len);
Linus Torvalds's avatar
Linus Torvalds committed
781 782 783
		if(err < 0)
			goto error;

784
		flags = ubd_dev->openflags;
Linus Torvalds's avatar
Linus Torvalds committed
785
		flags.w = 0;
786
		err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
787
				    NULL, NULL, NULL, NULL);
Linus Torvalds's avatar
Linus Torvalds committed
788
		if(err < 0) goto error;
789
		ubd_dev->cow.fd = err;
Linus Torvalds's avatar
Linus Torvalds committed
790
	}
791
	return 0;
Linus Torvalds's avatar
Linus Torvalds committed
792
 error:
793
	os_close_file(ubd_dev->fd);
794
	return err;
Linus Torvalds's avatar
Linus Torvalds committed
795 796
}

Jeff Dike's avatar
Jeff Dike committed
797 798
static void ubd_device_release(struct device *dev)
{
799
	struct ubd *ubd_dev = dev_get_drvdata(dev);
Jeff Dike's avatar
Jeff Dike committed
800 801 802 803 804

	blk_cleanup_queue(ubd_dev->queue);
	*ubd_dev = ((struct ubd) DEFAULT_UBD);
}

805
static int ubd_disk_register(int major, u64 size, int unit,
806
			     struct gendisk **disk_out)
Linus Torvalds's avatar
Linus Torvalds committed
807 808 809 810 811
{
	struct gendisk *disk;

	disk = alloc_disk(1 << UBD_SHIFT);
	if(disk == NULL)
812
		return -ENOMEM;
Linus Torvalds's avatar
Linus Torvalds committed
813 814 815 816 817

	disk->major = major;
	disk->first_minor = unit << UBD_SHIFT;
	disk->fops = &ubd_blops;
	set_capacity(disk, size / 512);
818
	if (major == UBD_MAJOR)
Linus Torvalds's avatar
Linus Torvalds committed
819
		sprintf(disk->disk_name, "ubd%c", 'a' + unit);
820
	else
Linus Torvalds's avatar
Linus Torvalds committed
821 822 823
		sprintf(disk->disk_name, "ubd_fake%d", unit);

	/* sysfs register (not for ide fake devices) */
824
	if (major == UBD_MAJOR) {
825 826
		ubd_devs[unit].pdev.id   = unit;
		ubd_devs[unit].pdev.name = DRIVER_NAME;
Jeff Dike's avatar
Jeff Dike committed
827
		ubd_devs[unit].pdev.dev.release = ubd_device_release;
828
		dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
829 830
		platform_device_register(&ubd_devs[unit].pdev);
		disk->driverfs_dev = &ubd_devs[unit].pdev.dev;
Linus Torvalds's avatar
Linus Torvalds committed
831 832
	}

833
	disk->private_data = &ubd_devs[unit];
834
	disk->queue = ubd_devs[unit].queue;
Linus Torvalds's avatar
Linus Torvalds committed
835 836 837 838 839 840 841 842
	add_disk(disk);

	*disk_out = disk;
	return 0;
}

#define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))

843
static int ubd_add(int n, char **error_out)
Linus Torvalds's avatar
Linus Torvalds committed
844
{
845
	struct ubd *ubd_dev = &ubd_devs[n];
846
	int err = 0;
Linus Torvalds's avatar
Linus Torvalds committed
847

848
	if(ubd_dev->file == NULL)
Jeff Dike's avatar
Jeff Dike committed
849
		goto out;
Linus Torvalds's avatar
Linus Torvalds committed
850

851
	err = ubd_file_size(ubd_dev, &ubd_dev->size);
852 853
	if(err < 0){
		*error_out = "Couldn't determine size of device's file";
854
		goto out;
855
	}
Linus Torvalds's avatar
Linus Torvalds committed
856

857
	ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
Linus Torvalds's avatar
Linus Torvalds committed
858

Jeff Dike's avatar
Jeff Dike committed
859
	INIT_LIST_HEAD(&ubd_dev->restart);
860
	sg_init_table(ubd_dev->sg, MAX_SG);
Jeff Dike's avatar
Jeff Dike committed
861

862 863 864 865
	err = -ENOMEM;
	ubd_dev->queue = blk_init_queue(do_ubd_request, &ubd_dev->lock);
	if (ubd_dev->queue == NULL) {
		*error_out = "Failed to initialize device queue";
866
		goto out;
867 868
	}
	ubd_dev->queue->queuedata = ubd_dev;
869
	blk_queue_flush(ubd_dev->queue, REQ_FLUSH);
870

871
	blk_queue_max_segments(ubd_dev->queue, MAX_SG);
872
	err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
873 874 875 876
	if(err){
		*error_out = "Failed to register device";
		goto out_cleanup;
	}
877

878
	if (fake_major != UBD_MAJOR)
879
		ubd_disk_register(fake_major, ubd_dev->size, n,
880
				  &fake_gendisk[n]);
Linus Torvalds's avatar
Linus Torvalds committed
881

Jeff Dike's avatar
Jeff Dike committed
882 883 884 885
	/*
	 * Perhaps this should also be under the "if (fake_major)" above
	 * using the fake_disk->disk_name
	 */
Linus Torvalds's avatar
Linus Torvalds committed
886 887 888
	if (fake_ide)
		make_ide_entries(ubd_gendisk[n]->disk_name);

Jeff Dike's avatar
Jeff Dike committed
889 890 891
	err = 0;
out:
	return err;
892 893 894 895

out_cleanup:
	blk_cleanup_queue(ubd_dev->queue);
	goto out;
Linus Torvalds's avatar
Linus Torvalds committed
896 897
}

898
static int ubd_config(char *str, char **error_out)
Linus Torvalds's avatar
Linus Torvalds committed
899
{
900
	int n, ret;
Linus Torvalds's avatar
Linus Torvalds committed
901

902 903 904 905
	/* This string is possibly broken up and stored, so it's only
	 * freed if ubd_setup_common fails, or if only general options
	 * were set.
	 */
Jeff Dike's avatar
Jeff Dike committed
906
	str = kstrdup(str, GFP_KERNEL);
907
	if (str == NULL) {
908 909
		*error_out = "Failed to allocate memory";
		return -ENOMEM;
Linus Torvalds's avatar
Linus Torvalds committed
910
	}
911 912 913

	ret = ubd_setup_common(str, &n, error_out);
	if (ret)
914
		goto err_free;
915

916 917
	if (n == -1) {
		ret = 0;
918
		goto err_free;
Linus Torvalds's avatar
Linus Torvalds committed
919 920
	}

921
	mutex_lock(&ubd_lock);
922
	ret = ubd_add(n, error_out);
923
	if (ret)
924
		ubd_devs[n].file = NULL;
925
	mutex_unlock(&ubd_lock);
Linus Torvalds's avatar
Linus Torvalds committed
926

927
out:
928
	return ret;
929 930 931 932

err_free:
	kfree(str);
	goto out;
Linus Torvalds's avatar
Linus Torvalds committed
933 934 935 936
}

static int ubd_get_config(char *name, char *str, int size, char **error_out)
{
937
	struct ubd *ubd_dev;
Linus Torvalds's avatar
Linus Torvalds committed
938 939 940 941 942
	int n, len = 0;

	n = parse_unit(&name);
	if((n >= MAX_DEV) || (n < 0)){
		*error_out = "ubd_get_config : device number out of range";
943
		return -1;
Linus Torvalds's avatar
Linus Torvalds committed
944 945
	}

946
	ubd_dev = &ubd_devs[n];
947
	mutex_lock(&ubd_lock);
Linus Torvalds's avatar
Linus Torvalds committed
948

949
	if(ubd_dev->file == NULL){
Linus Torvalds's avatar
Linus Torvalds committed
950 951 952 953
		CONFIG_CHUNK(str, size, len, "", 1);
		goto out;
	}

954
	CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
Linus Torvalds's avatar
Linus Torvalds committed
955

956
	if(ubd_dev->cow.file != NULL){
Linus Torvalds's avatar
Linus Torvalds committed
957
		CONFIG_CHUNK(str, size, len, ",", 0);
958
		CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
Linus Torvalds's avatar
Linus Torvalds committed
959 960 961 962
	}
	else CONFIG_CHUNK(str, size, len, "", 1);

 out:
963
	mutex_unlock(&ubd_lock);
964
	return len;
Linus Torvalds's avatar
Linus Torvalds committed
965 966
}

967 968
static int ubd_id(char **str, int *start_out, int *end_out)
{
969
	int n;
970 971

	n = parse_unit(str);
972 973 974
	*start_out = 0;
	*end_out = MAX_DEV - 1;
	return n;
975 976
}

977
static int ubd_remove(int n, char **error_out)
Linus Torvalds's avatar
Linus Torvalds committed
978
{
Jeff Dike's avatar
Jeff Dike committed
979
	struct gendisk *disk = ubd_gendisk[n];
980
	struct ubd *ubd_dev;
981
	int err = -ENODEV;
Linus Torvalds's avatar
Linus Torvalds committed
982

983
	mutex_lock(&ubd_lock);
Linus Torvalds's avatar
Linus Torvalds committed
984

985
	ubd_dev = &ubd_devs[n];
Linus Torvalds's avatar
Linus Torvalds committed
986

987
	if(ubd_dev->file == NULL)
988
		goto out;
Linus Torvalds's avatar
Linus Torvalds committed
989

990 991
	/* you cannot remove a open disk */
	err = -EBUSY;
992
	if(ubd_dev->count > 0)
Linus Torvalds's avatar
Linus Torvalds committed
993 994
		goto out;

995
	ubd_gendisk[n] = NULL;
996 997 998 999
	if(disk != NULL){
		del_gendisk(disk);
		put_disk(disk);
	}
Linus Torvalds's avatar
Linus Torvalds committed
1000 1001 1002 1003 1004 1005 1006 1007

	if(fake_gendisk[n] != NULL){
		del_gendisk(fake_gendisk[n]);
		put_disk(fake_gendisk[n]);
		fake_gendisk[n] = NULL;
	}

	err = 0;
Jeff Dike's avatar
Jeff Dike committed
1008
	platform_device_unregister(&ubd_dev->pdev);
1009
out:
1010
	mutex_unlock(&ubd_lock);
1011
	return err;
Linus Torvalds's avatar
Linus Torvalds committed
1012 1013
}

1014
/* All these are called by mconsole in process context and without
1015
 * ubd-specific locks.  The structure itself is const except for .list.
1016
 */
Linus Torvalds's avatar
Linus Torvalds committed
1017
static struct mc_device ubd_mc = {
Jeff Dike's avatar
Jeff Dike committed
1018
	.list		= LIST_HEAD_INIT(ubd_mc.list),
Linus Torvalds's avatar
Linus Torvalds committed
1019 1020
	.name		= "ubd",
	.config		= ubd_config,
1021
	.get_config	= ubd_get_config,
1022
	.id		= ubd_id,
Linus Torvalds's avatar
Linus Torvalds committed
1023 1024 1025
	.remove		= ubd_remove,
};

1026
static int __init ubd_mc_init(void)
Linus Torvalds's avatar
Linus Torvalds committed
1027 1028 1029 1030 1031 1032 1033
{
	mconsole_register_dev(&ubd_mc);
	return 0;
}

__initcall(ubd_mc_init);

1034 1035 1036 1037
static int __init ubd0_init(void)
{
	struct ubd *ubd_dev = &ubd_devs[0];

1038
	mutex_lock(&ubd_lock);
1039 1040
	if(ubd_dev->file == NULL)
		ubd_dev->file = "root_fs";
1041 1042
	mutex_unlock(&ubd_lock);

1043
	return 0;
1044 1045 1046 1047
}

__initcall(ubd0_init);

1048
/* Used in ubd_init, which is an initcall */
1049 1050 1051 1052
static struct platform_driver ubd_driver = {
	.driver = {
		.name  = DRIVER_NAME,
	},
Linus Torvalds's avatar
Linus Torvalds committed
1053 1054
};

1055
static int __init ubd_init(void)
Linus Torvalds's avatar
Linus Torvalds committed
1056
{
1057 1058
	char *error;
	int i, err;
Linus Torvalds's avatar
Linus Torvalds committed
1059

1060
	if (register_blkdev(UBD_MAJOR, "ubd"))
Linus Torvalds's avatar
Linus Torvalds committed
1061 1062
		return -1;

1063
	if (fake_major != UBD_MAJOR) {
Linus Torvalds's avatar
Linus Torvalds committed
1064 1065 1066 1067 1068 1069
		char name[sizeof("ubd_nnn\0")];

		snprintf(name, sizeof(name), "ubd_%d", fake_major);
		if (register_blkdev(fake_major, "ubd"))
			return -1;
	}
1070
	platform_driver_register(&ubd_driver);
1071
	mutex_lock(&ubd_lock);
1072 1073 1074 1075 1076 1077
	for (i = 0; i < MAX_DEV; i++){
		err = ubd_add(i, &error);
		if(err)
			printk(KERN_ERR "Failed to initialize ubd device %d :"
			       "%s\n", i, error);
	}
1078
	mutex_unlock(&ubd_lock);
Linus Torvalds's avatar
Linus Torvalds committed
1079 1080 1081 1082 1083
	return 0;
}

late_initcall(ubd_init);

1084
static int __init ubd_driver_init(void){
1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
	unsigned long stack;
	int err;

	/* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
	if(global_openflags.s){
		printk(KERN_INFO "ubd: Synchronous mode\n");
		/* Letting ubd=sync be like using ubd#s= instead of ubd#= is
		 * enough. So use anyway the io thread. */
	}
	stack = alloc_stack(0, 0);
1095
	io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1096 1097
				 &thread_fd);
	if(io_pid < 0){
1098
		printk(KERN_ERR
1099 1100 1101
		       "ubd : Failed to start I/O thread (errno = %d) - "
		       "falling back to synchronous I/O\n", -io_pid);
		io_pid = -1;
1102
		return 0;
1103
	}
1104
	err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
Yong Zhang's avatar
Yong Zhang committed
1105
			     0, "ubd", ubd_devs);
1106 1107
	if(err != 0)
		printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1108
	return 0;
1109 1110 1111 1112
}

device_initcall(ubd_driver_init);

Al Viro's avatar
Al Viro committed
1113
static int ubd_open(struct block_device *bdev, fmode_t mode)
Linus Torvalds's avatar
Linus Torvalds committed
1114
{
Al Viro's avatar
Al Viro committed
1115
	struct gendisk *disk = bdev->bd_disk;
1116
	struct ubd *ubd_dev = disk->private_data;
Linus Torvalds's avatar
Linus Torvalds committed
1117 1118
	int err = 0;

Arnd Bergmann's avatar
Arnd Bergmann committed
1119
	mutex_lock(&ubd_mutex);
1120 1121
	if(ubd_dev->count == 0){
		err = ubd_open_dev(ubd_dev);
Linus Torvalds's avatar
Linus Torvalds committed
1122 1123
		if(err){
			printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1124
			       disk->disk_name, ubd_dev->file, -err);
Linus Torvalds's avatar
Linus Torvalds committed
1125 1126 1127
			goto out;
		}
	}
1128 1129
	ubd_dev->count++;
	set_disk_ro(disk, !ubd_dev->openflags.w);
1130 1131 1132

	/* This should no more be needed. And it didn't work anyway to exclude
	 * read-write remounting of filesystems.*/
Al Viro's avatar
Al Viro committed
1133
	/*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1134
	        if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
Linus Torvalds's avatar
Linus Torvalds committed
1135
	        err = -EROFS;
1136
	}*/
1137
out:
Arnd Bergmann's avatar
Arnd Bergmann committed
1138
	mutex_unlock(&ubd_mutex);
1139
	return err;
Linus Torvalds's avatar
Linus Torvalds committed
1140 1141
}

1142
static void ubd_release(struct gendisk *disk, fmode_t mode)
Linus Torvalds's avatar
Linus Torvalds committed
1143
{
1144
	struct ubd *ubd_dev = disk->private_data;
Linus Torvalds's avatar
Linus Torvalds committed
1145

Arnd Bergmann's avatar
Arnd Bergmann committed
1146
	mutex_lock(&ubd_mutex);
1147
	if(--ubd_dev->count == 0)
1148
		ubd_close_dev(ubd_dev);
Arnd Bergmann's avatar
Arnd Bergmann committed
1149
	mutex_unlock(&ubd_mutex);
Linus Torvalds's avatar
Linus Torvalds committed
1150 1151
}

1152 1153 1154 1155
static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
			  __u64 *cow_offset, unsigned long *bitmap,
			  __u64 bitmap_offset, unsigned long *bitmap_words,
			  __u64 bitmap_len)
Linus Torvalds's avatar
Linus Torvalds committed
1156
{
1157 1158 1159 1160 1161 1162 1163 1164
	__u64 sector = io_offset >> 9;
	int i, update_bitmap = 0;

	for(i = 0; i < length >> 9; i++){
		if(cow_mask != NULL)
			ubd_set_bit(i, (unsigned char *) cow_mask);
		if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
			continue;
Linus Torvalds's avatar
Linus Torvalds committed
1165

1166 1167 1168 1169 1170 1171
		update_bitmap = 1;
		ubd_set_bit(sector + i, (unsigned char *) bitmap);
	}

	if(!update_bitmap)
		return;
Linus Torvalds's avatar
Linus Torvalds committed
1172

1173
	*cow_offset = sector / (sizeof(unsigned long) * 8);
Linus Torvalds's avatar
Linus Torvalds committed
1174

1175 1176 1177 1178 1179
	/* This takes care of the case where we're exactly at the end of the
	 * device, and *cow_offset + 1 is off the end.  So, just back it up
	 * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
	 * for the original diagnosis.
	 */
Jiri Olsa's avatar
Jiri Olsa committed
1180 1181
	if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
					 sizeof(unsigned long)) - 1))
1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202
		(*cow_offset)--;

	bitmap_words[0] = bitmap[*cow_offset];
	bitmap_words[1] = bitmap[*cow_offset + 1];

	*cow_offset *= sizeof(unsigned long);
	*cow_offset += bitmap_offset;
}

static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
		       __u64 bitmap_offset, __u64 bitmap_len)
{
	__u64 sector = req->offset >> 9;
	int i;

	if(req->length > (sizeof(req->sector_mask) * 8) << 9)
		panic("Operation too long");

	if(req->op == UBD_READ) {
		for(i = 0; i < req->length >> 9; i++){
			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1203
				ubd_set_bit(i, (unsigned char *)
1204
					    &req->sector_mask);
1205
		}
1206 1207 1208 1209
	}
	else cowify_bitmap(req->offset, req->length, &req->sector_mask,
			   &req->cow_offset, bitmap, bitmap_offset,
			   req->bitmap_words, bitmap_len);
Linus Torvalds's avatar
Linus Torvalds committed
1210 1211
}

1212
/* Called with dev->lock held */
Jeff Dike's avatar
Jeff Dike committed
1213 1214 1215
static void prepare_request(struct request *req, struct io_thread_req *io_req,
			    unsigned long long offset, int page_offset,
			    int len, struct page *page)
Linus Torvalds's avatar
Linus Torvalds committed
1216 1217
{
	struct gendisk *disk = req->rq_disk;
1218
	struct ubd *ubd_dev = disk->private_data;
1219

1220
	io_req->req = req;
Jeff Dike's avatar
Jeff Dike committed
1221 1222
	io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
		ubd_dev->fd;
1223
	io_req->fds[1] = ubd_dev->fd;
1224
	io_req->cow_offset = -1;
Linus Torvalds's avatar
Linus Torvalds committed
1225 1226 1227
	io_req->offset = offset;
	io_req->length = len;
	io_req->error = 0;
1228 1229 1230
	io_req->sector_mask = 0;

	io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
Linus Torvalds's avatar
Linus Torvalds committed
1231
	io_req->offsets[0] = 0;
1232
	io_req->offsets[1] = ubd_dev->cow.data_offset;
Jeff Dike's avatar
Jeff Dike committed
1233
	io_req->buffer = page_address(page) + page_offset;
Linus Torvalds's avatar
Linus Torvalds committed
1234 1235
	io_req->sectorsize = 1 << 9;

1236
	if(ubd_dev->cow.file != NULL)
Jeff Dike's avatar
Jeff Dike committed
1237 1238
		cowify_req(io_req, ubd_dev->cow.bitmap,
			   ubd_dev->cow.bitmap_offset, ubd_dev->cow.bitmap_len);
1239

Linus Torvalds's avatar
Linus Torvalds committed
1240 1241
}

1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254
/* Called with dev->lock held */
static void prepare_flush_request(struct request *req,
				  struct io_thread_req *io_req)
{
	struct gendisk *disk = req->rq_disk;
	struct ubd *ubd_dev = disk->private_data;

	io_req->req = req;
	io_req->fds[0] = (ubd_dev->cow.file != NULL) ? ubd_dev->cow.fd :
		ubd_dev->fd;
	io_req->op = UBD_FLUSH;
}

1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271
static bool submit_request(struct io_thread_req *io_req, struct ubd *dev)
{
	int n = os_write_file(thread_fd, &io_req,
			     sizeof(io_req));
	if (n != sizeof(io_req)) {
		if (n != -EAGAIN)
			printk("write to io thread failed, "
			       "errno = %d\n", -n);
		else if (list_empty(&dev->restart))
			list_add(&dev->restart, &restart);

		kfree(io_req);
		return false;
	}
	return true;
}

1272
/* Called with dev->lock held */
1273
static void do_ubd_request(struct request_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
1274
{
1275
	struct io_thread_req *io_req;
Linus Torvalds's avatar
Linus Torvalds committed
1276
	struct request *req;
Jeff Dike's avatar
Jeff Dike committed
1277 1278

	while(1){
1279
		struct ubd *dev = q->queuedata;
Jeff Dike's avatar
Jeff Dike committed
1280
		if(dev->end_sg == 0){
1281
			struct request *req = blk_fetch_request(q);
Jeff Dike's avatar
Jeff Dike committed
1282 1283 1284 1285
			if(req == NULL)
				return;

			dev->request = req;
1286
			dev->rq_pos = blk_rq_pos(req);
Jeff Dike's avatar
Jeff Dike committed
1287 1288 1289 1290 1291
			dev->start_sg = 0;
			dev->end_sg = blk_rq_map_sg(q, req, dev->sg);
		}

		req = dev->request;
1292 1293 1294 1295 1296 1297 1298 1299 1300 1301

		if (req->cmd_flags & REQ_FLUSH) {
			io_req = kmalloc(sizeof(struct io_thread_req),
					 GFP_ATOMIC);
			if (io_req == NULL) {
				if (list_empty(&dev->restart))
					list_add(&dev->restart, &restart);
				return;
			}
			prepare_flush_request(req, io_req);
1302
			submit_request(io_req, dev);
1303 1304
		}

Jeff Dike's avatar
Jeff Dike committed
1305 1306 1307
		while(dev->start_sg < dev->end_sg){
			struct scatterlist *sg = &dev->sg[dev->start_sg];

1308
			io_req = kmalloc(sizeof(struct io_thread_req),
1309
					 GFP_ATOMIC);
1310 1311 1312 1313 1314 1315
			if(io_req == NULL){
				if(list_empty(&dev->restart))
					list_add(&dev->restart, &restart);
				return;
			}
			prepare_request(req, io_req,
1316
					(unsigned long long)dev->rq_pos << 9,
1317
					sg->offset, sg->length, sg_page(sg));
Jeff Dike's avatar
Jeff Dike committed
1318

1319
			if (submit_request(io_req, dev) == false)
Jeff Dike's avatar
Jeff Dike committed
1320 1321
				return;

1322
			dev->rq_pos += sg->length >> 9;
Jeff Dike's avatar
Jeff Dike committed
1323
			dev->start_sg++;
Linus Torvalds's avatar
Linus Torvalds committed
1324
		}
Jeff Dike's avatar
Jeff Dike committed
1325 1326
		dev->end_sg = 0;
		dev->request = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
1327 1328 1329
	}
}

1330 1331
static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
{
1332
	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1333 1334 1335

	geo->heads = 128;
	geo->sectors = 32;
1336
	geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1337 1338 1339
	return 0;
}

Al Viro's avatar
Al Viro committed
1340
static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
Linus Torvalds's avatar
Linus Torvalds committed
1341 1342
		     unsigned int cmd, unsigned long arg)
{
Al Viro's avatar
Al Viro committed
1343
	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1344
	u16 ubd_id[ATA_ID_WORDS];
Linus Torvalds's avatar
Linus Torvalds committed
1345 1346 1347 1348

	switch (cmd) {
		struct cdrom_volctrl volume;
	case HDIO_GET_IDENTITY:
1349 1350 1351 1352
		memset(&ubd_id, 0, ATA_ID_WORDS * 2);
		ubd_id[ATA_ID_CYLS]	= ubd_dev->size / (128 * 32 * 512);
		ubd_id[ATA_ID_HEADS]	= 128;
		ubd_id[ATA_ID_SECTORS]	= 32;
Linus Torvalds's avatar
Linus Torvalds committed
1353 1354
		if(copy_to_user((char __user *) arg, (char *) &ubd_id,
				 sizeof(ubd_id)))
1355 1356
			return -EFAULT;
		return 0;
1357

Linus Torvalds's avatar
Linus Torvalds committed
1358 1359
	case CDROMVOLREAD:
		if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1360
			return -EFAULT;
Linus Torvalds's avatar
Linus Torvalds committed
1361 1362 1363 1364 1365
		volume.channel0 = 255;
		volume.channel1 = 255;
		volume.channel2 = 255;
		volume.channel3 = 255;
		if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1366 1367
			return -EFAULT;
		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1368
	}
1369
	return -EINVAL;
Linus Torvalds's avatar
Linus Torvalds committed
1370 1371
}

1372
static int update_bitmap(struct io_thread_req *req)
Linus Torvalds's avatar
Linus Torvalds committed
1373
{
1374
	int n;
Linus Torvalds's avatar
Linus Torvalds committed
1375

1376
	if(req->cow_offset == -1)
1377
		return 0;
Linus Torvalds's avatar
Linus Torvalds committed
1378

1379 1380 1381
	n = os_seek_file(req->fds[1], req->cow_offset);
	if(n < 0){
		printk("do_io - bitmap lseek failed : err = %d\n", -n);
1382
		return 1;
1383
	}
Linus Torvalds's avatar
Linus Torvalds committed
1384

1385 1386
	n = os_write_file(req->fds[1], &req->bitmap_words,
			  sizeof(req->bitmap_words));
1387 1388 1389
	if(n != sizeof(req->bitmap_words)){
		printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
		       req->fds[1]);
1390
		return 1;
1391
	}
Linus Torvalds's avatar
Linus Torvalds committed
1392

1393
	return 0;
1394
}
Linus Torvalds's avatar
Linus Torvalds committed
1395

1396
static void do_io(struct io_thread_req *req)
1397 1398 1399 1400 1401 1402 1403
{
	char *buf;
	unsigned long len;
	int n, nsectors, start, end, bit;
	int err;
	__u64 off;

1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414
	if (req->op == UBD_FLUSH) {
		/* fds[0] is always either the rw image or our cow file */
		n = os_sync_file(req->fds[0]);
		if (n != 0) {
			printk("do_io - sync failed err = %d "
			       "fd = %d\n", -n, req->fds[0]);
			req->error = 1;
		}
		return;
	}

1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440
	nsectors = req->length / req->sectorsize;
	start = 0;
	do {
		bit = ubd_test_bit(start, (unsigned char *) &req->sector_mask);
		end = start;
		while((end < nsectors) &&
		      (ubd_test_bit(end, (unsigned char *)
				    &req->sector_mask) == bit))
			end++;

		off = req->offset + req->offsets[bit] +
			start * req->sectorsize;
		len = (end - start) * req->sectorsize;
		buf = &req->buffer[start * req->sectorsize];

		err = os_seek_file(req->fds[bit], off);
		if(err < 0){
			printk("do_io - lseek failed : err = %d\n", -err);
			req->error = 1;
			return;
		}
		if(req->op == UBD_READ){
			n = 0;
			do {
				buf = &buf[n];
				len -= n;
1441
				n = os_read_file(req->fds[bit], buf, len);
1442 1443 1444 1445 1446 1447 1448 1449 1450
				if (n < 0) {
					printk("do_io - read failed, err = %d "
					       "fd = %d\n", -n, req->fds[bit]);
					req->error = 1;
					return;
				}
			} while((n < len) && (n != 0));
			if (n < len) memset(&buf[n], 0, len - n);
		} else {
1451
			n = os_write_file(req->fds[bit], buf, len);
1452 1453 1454 1455 1456 1457 1458 1459 1460 1461
			if(n != len){
				printk("do_io - write failed err = %d "
				       "fd = %d\n", -n, req->fds[bit]);
				req->error = 1;
				return;
			}
		}

		start = end;
	} while(start < nsectors);
Linus Torvalds's avatar
Linus Torvalds committed
1462

1463
	req->error = update_bitmap(req);
Linus Torvalds's avatar
Linus Torvalds committed
1464
}
1465 1466 1467 1468 1469 1470

/* Changed in start_io_thread, which is serialized by being called only
 * from ubd_init, which is an initcall.
 */
int kernel_fd = -1;

1471 1472
/* Only changed by the io thread. XXX: currently unused. */
static int io_count = 0;
1473 1474 1475

int io_thread(void *arg)
{
1476
	struct io_thread_req *req;
1477 1478 1479 1480
	int n;

	ignore_sigwinch_sig();
	while(1){
1481
		n = os_read_file(kernel_fd, &req,
1482 1483
				 sizeof(struct io_thread_req *));
		if(n != sizeof(struct io_thread_req *)){
1484 1485 1486 1487 1488 1489 1490 1491 1492 1493
			if(n < 0)
				printk("io_thread - read failed, fd = %d, "
				       "err = %d\n", kernel_fd, -n);
			else {
				printk("io_thread - short read, fd = %d, "
				       "length = %d\n", kernel_fd, n);
			}
			continue;
		}
		io_count++;
1494
		do_io(req);
1495
		n = os_write_file(kernel_fd, &req,
1496 1497
				  sizeof(struct io_thread_req *));
		if(n != sizeof(struct io_thread_req *))
1498 1499 1500 1501
			printk("io_thread - write failed, fd = %d, err = %d\n",
			       kernel_fd, -n);
	}

1502 1503
	return 0;
}