Commit 0925bad3 authored by Linus Torvalds's avatar Linus Torvalds

v2.5.1 -> v2.5.1.1

- me: revert the "kill(-1..)" change.  POSIX isn't that clear on the
issue anyway, and the new behaviour breaks things.
- Jens Axboe: more bio updates
- Al Viro: rd_load cleanups. hpfs mount fix, mount cleanups
- Ingo Molnar: more raid updates
- Jakub Jelinek: fix Linux/x86 confusion about arg passing of "save_v86_state" and "do_signal"
- Trond Myklebust: fix NFS client race conditions
parent 51f4a834
...@@ -18,8 +18,8 @@ ...@@ -18,8 +18,8 @@
</authorgroup> </authorgroup>
<copyright> <copyright>
<year>2000</year> <year>2001</year>
<holder>Paul Russell</holder> <holder>Rusty Russell</holder>
</copyright> </copyright>
<legalnotice> <legalnotice>
...@@ -651,6 +651,29 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress)); ...@@ -651,6 +651,29 @@ printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress));
</para> </para>
</sect1> </sect1>
<sect1 id="routines-endian">
<title><function>cpu_to_be32()</function>/<function>be32_to_cpu()</function>/<function>cpu_to_le32()</function>/<function>le32_to_cpu()</function>
<filename class=headerfile>include/asm/byteorder.h</filename>
</title>
<para>
The <function>cpu_to_be32()</function> family (where the "32" can
be replaced by 64 or 16, and the "be" can be replaced by "le") are
the general way to do endian conversions in the kernel: they
return the converted value. All variations supply the reverse as
well: <function>be32_to_cpu()</function>, etc.
</para>
<para>
There are two major variations of these functions: the pointer
variation, such as <function>cpu_to_be32p()</function>, which take
a pointer to the given type, and return the converted value. The
other variation is the "in-situ" family, such as
<function>cpu_to_be32s()</function>, which convert value referred
to by the pointer, and return void.
</para>
</sect1>
<sect1 id="routines-local-irqs"> <sect1 id="routines-local-irqs">
<title><function>local_irq_save()</function>/<function>local_irq_restore()</function> <title><function>local_irq_save()</function>/<function>local_irq_restore()</function>
<filename class=headerfile>include/asm/system.h</filename> <filename class=headerfile>include/asm/system.h</filename>
......
...@@ -1322,8 +1322,10 @@ W: http://www.torque.net/sg ...@@ -1322,8 +1322,10 @@ W: http://www.torque.net/sg
S: Maintained S: Maintained
SCSI SUBSYSTEM SCSI SUBSYSTEM
P: Jens Axboe
M: axboe@suse.de
L: linux-scsi@vger.kernel.org L: linux-scsi@vger.kernel.org
S: Unmaintained S: Maintained
SCSI TAPE DRIVER SCSI TAPE DRIVER
P: Kai Mkisara P: Kai Mkisara
......
VERSION = 2 VERSION = 2
PATCHLEVEL = 5 PATCHLEVEL = 5
SUBLEVEL = 1 SUBLEVEL = 2
EXTRAVERSION = EXTRAVERSION =-pre1
KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
asmlinkage int FASTCALL(do_signal(struct pt_regs *regs, sigset_t *oldset)); int FASTCALL(do_signal(struct pt_regs *regs, sigset_t *oldset));
int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from) int copy_siginfo_to_user(siginfo_t *to, siginfo_t *from)
{ {
......
...@@ -62,7 +62,7 @@ ...@@ -62,7 +62,7 @@
( (unsigned)( & (((struct kernel_vm86_regs *)0)->VM86_REGS_PART2) ) ) ( (unsigned)( & (((struct kernel_vm86_regs *)0)->VM86_REGS_PART2) ) )
#define VM86_REGS_SIZE2 (sizeof(struct kernel_vm86_regs) - VM86_REGS_SIZE1) #define VM86_REGS_SIZE2 (sizeof(struct kernel_vm86_regs) - VM86_REGS_SIZE1)
asmlinkage struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs)); struct pt_regs * FASTCALL(save_v86_state(struct kernel_vm86_regs * regs));
struct pt_regs * save_v86_state(struct kernel_vm86_regs * regs) struct pt_regs * save_v86_state(struct kernel_vm86_regs * regs)
{ {
struct tss_struct *tss; struct tss_struct *tss;
......
...@@ -55,12 +55,6 @@ unsigned char __res[sizeof(bd_t)]; ...@@ -55,12 +55,6 @@ unsigned char __res[sizeof(bd_t)];
extern void m8xx_ide_init(void); extern void m8xx_ide_init(void);
#ifdef CONFIG_BLK_DEV_RAM
extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */
extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */
extern int rd_image_start; /* starting block # of image */
#endif
extern unsigned long find_available_memory(void); extern unsigned long find_available_memory(void);
extern void m8xx_cpm_reset(uint); extern void m8xx_cpm_reset(uint);
......
...@@ -111,12 +111,6 @@ extern unsigned long Hash_size, Hash_mask; ...@@ -111,12 +111,6 @@ extern unsigned long Hash_size, Hash_mask;
extern int probingmem; extern int probingmem;
extern unsigned long loops_per_jiffy; extern unsigned long loops_per_jiffy;
#ifdef CONFIG_BLK_DEV_RAM
extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */
extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */
extern int rd_image_start; /* starting block # of image */
#endif
#ifdef CONFIG_SOUND_MODULE #ifdef CONFIG_SOUND_MODULE
EXPORT_SYMBOL(ppc_cs4232_dma); EXPORT_SYMBOL(ppc_cs4232_dma);
EXPORT_SYMBOL(ppc_cs4232_dma2); EXPORT_SYMBOL(ppc_cs4232_dma2);
......
...@@ -35,7 +35,7 @@ int blk_do_rq(request_queue_t *q, struct request *rq) ...@@ -35,7 +35,7 @@ int blk_do_rq(request_queue_t *q, struct request *rq)
DECLARE_COMPLETION(wait); DECLARE_COMPLETION(wait);
int err = 0; int err = 0;
rq->flags |= REQ_BARRIER; rq->flags |= REQ_NOMERGE;
rq->waiting = &wait; rq->waiting = &wait;
elv_add_request(q, rq, 1); elv_add_request(q, rq, 1);
generic_unplug_device(q); generic_unplug_device(q);
...@@ -81,3 +81,5 @@ int block_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg) ...@@ -81,3 +81,5 @@ int block_ioctl(kdev_t dev, unsigned int cmd, unsigned long arg)
#endif #endif
return err; return err;
} }
EXPORT_SYMBOL(block_ioctl);
...@@ -53,7 +53,7 @@ inline int bio_rq_in_between(struct bio *bio, struct request *rq, ...@@ -53,7 +53,7 @@ inline int bio_rq_in_between(struct bio *bio, struct request *rq,
* if .next is a valid request * if .next is a valid request
*/ */
next = rq->queuelist.next; next = rq->queuelist.next;
if (next == head) if (unlikely(next == head))
return 0; return 0;
next_rq = list_entry(next, struct request, queuelist); next_rq = list_entry(next, struct request, queuelist);
...@@ -121,20 +121,52 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio) ...@@ -121,20 +121,52 @@ inline int elv_rq_merge_ok(struct request *rq, struct bio *bio)
return 0; return 0;
} }
int elevator_linus_merge(request_queue_t *q, struct request **req, inline int elv_try_merge(struct request *__rq, struct bio *bio)
struct list_head *head, struct bio *bio)
{ {
unsigned int count = bio_sectors(bio); unsigned int count = bio_sectors(bio);
struct list_head *entry = &q->queue_head;
int ret = ELEVATOR_NO_MERGE; if (!elv_rq_merge_ok(__rq, bio))
return ELEVATOR_NO_MERGE;
/*
* we can merge and sequence is ok, check if it's possible
*/
if (__rq->sector + __rq->nr_sectors == bio->bi_sector) {
return ELEVATOR_BACK_MERGE;
} else if (__rq->sector - count == bio->bi_sector) {
__rq->elevator_sequence -= count;
return ELEVATOR_FRONT_MERGE;
}
return ELEVATOR_NO_MERGE;
}
int elevator_linus_merge(request_queue_t *q, struct request **req,
struct bio *bio)
{
struct list_head *entry;
struct request *__rq; struct request *__rq;
int ret;
/*
* give a one-shot try to merging with the last touched
* request
*/
if (q->last_merge) {
__rq = list_entry_rq(q->last_merge);
BUG_ON(__rq->flags & REQ_STARTED);
if ((ret = elv_try_merge(__rq, bio))) {
*req = __rq;
return ret;
}
}
entry = &q->queue_head; entry = &q->queue_head;
while ((entry = entry->prev) != head) { ret = ELEVATOR_NO_MERGE;
while ((entry = entry->prev) != &q->queue_head) {
__rq = list_entry_rq(entry); __rq = list_entry_rq(entry);
prefetch(list_entry_rq(entry->prev));
/* /*
* simply "aging" of requests in queue * simply "aging" of requests in queue
*/ */
...@@ -144,26 +176,15 @@ int elevator_linus_merge(request_queue_t *q, struct request **req, ...@@ -144,26 +176,15 @@ int elevator_linus_merge(request_queue_t *q, struct request **req,
break; break;
if (!(__rq->flags & REQ_CMD)) if (!(__rq->flags & REQ_CMD))
continue; continue;
if (__rq->elevator_sequence < 0)
break;
if (!*req && bio_rq_in_between(bio, __rq, &q->queue_head)) if (!*req && bio_rq_in_between(bio, __rq, &q->queue_head))
*req = __rq; *req = __rq;
if (!elv_rq_merge_ok(__rq, bio))
continue;
if (__rq->elevator_sequence < count) if ((ret = elv_try_merge(__rq, bio))) {
break;
/*
* we can merge and sequence is ok, check if it's possible
*/
if (__rq->sector + __rq->nr_sectors == bio->bi_sector) {
ret = ELEVATOR_BACK_MERGE;
*req = __rq;
break;
} else if (__rq->sector - count == bio->bi_sector) {
ret = ELEVATOR_FRONT_MERGE;
__rq->elevator_sequence -= count;
*req = __rq; *req = __rq;
q->last_merge = &__rq->queuelist;
break; break;
} }
} }
...@@ -183,7 +204,6 @@ void elevator_linus_merge_cleanup(request_queue_t *q, struct request *req, int c ...@@ -183,7 +204,6 @@ void elevator_linus_merge_cleanup(request_queue_t *q, struct request *req, int c
entry = &req->queuelist; entry = &req->queuelist;
while ((entry = entry->next) != &q->queue_head) { while ((entry = entry->next) != &q->queue_head) {
struct request *tmp; struct request *tmp;
prefetch(list_entry_rq(entry->next));
tmp = list_entry_rq(entry); tmp = list_entry_rq(entry);
tmp->elevator_sequence -= count; tmp->elevator_sequence -= count;
} }
...@@ -199,12 +219,20 @@ void elv_add_request_fn(request_queue_t *q, struct request *rq, ...@@ -199,12 +219,20 @@ void elv_add_request_fn(request_queue_t *q, struct request *rq,
struct list_head *insert_here) struct list_head *insert_here)
{ {
list_add(&rq->queuelist, insert_here); list_add(&rq->queuelist, insert_here);
/*
* new merges must not precede this barrier
*/
if (rq->flags & REQ_BARRIER)
q->last_merge = NULL;
else if (!q->last_merge)
q->last_merge = &rq->queuelist;
} }
struct request *elv_next_request_fn(request_queue_t *q) struct request *elv_next_request_fn(request_queue_t *q)
{ {
if (!blk_queue_empty(q)) if (!blk_queue_empty(q))
return list_entry(q->queue_head.next, struct request, queuelist); return list_entry_rq(q->queue_head.next);
return NULL; return NULL;
} }
...@@ -222,17 +250,24 @@ void elv_linus_exit(request_queue_t *q, elevator_t *e) ...@@ -222,17 +250,24 @@ void elv_linus_exit(request_queue_t *q, elevator_t *e)
* See if we can find a request that this buffer can be coalesced with. * See if we can find a request that this buffer can be coalesced with.
*/ */
int elevator_noop_merge(request_queue_t *q, struct request **req, int elevator_noop_merge(request_queue_t *q, struct request **req,
struct list_head *head, struct bio *bio) struct bio *bio)
{ {
unsigned int count = bio_sectors(bio);
struct list_head *entry = &q->queue_head; struct list_head *entry = &q->queue_head;
struct request *__rq; struct request *__rq;
int ret;
entry = &q->queue_head; if (q->last_merge) {
while ((entry = entry->prev) != head) { __rq = list_entry_rq(q->last_merge);
__rq = list_entry_rq(entry); BUG_ON(__rq->flags & REQ_STARTED);
if ((ret = elv_try_merge(__rq, bio))) {
*req = __rq;
return ret;
}
}
prefetch(list_entry_rq(entry->prev)); while ((entry = entry->prev) != &q->queue_head) {
__rq = list_entry_rq(entry);
if (__rq->flags & (REQ_BARRIER | REQ_STARTED)) if (__rq->flags & (REQ_BARRIER | REQ_STARTED))
break; break;
...@@ -240,18 +275,10 @@ int elevator_noop_merge(request_queue_t *q, struct request **req, ...@@ -240,18 +275,10 @@ int elevator_noop_merge(request_queue_t *q, struct request **req,
if (!(__rq->flags & REQ_CMD)) if (!(__rq->flags & REQ_CMD))
continue; continue;
if (!elv_rq_merge_ok(__rq, bio)) if ((ret = elv_try_merge(__rq, bio))) {
continue;
/*
* we can merge and sequence is ok, check if it's possible
*/
if (__rq->sector + __rq->nr_sectors == bio->bi_sector) {
*req = __rq;
return ELEVATOR_BACK_MERGE;
} else if (__rq->sector - count == bio->bi_sector) {
*req = __rq; *req = __rq;
return ELEVATOR_FRONT_MERGE; q->last_merge = &__rq->queuelist;
return ret;
} }
} }
...@@ -267,6 +294,7 @@ int elevator_init(request_queue_t *q, elevator_t *e, elevator_t type) ...@@ -267,6 +294,7 @@ int elevator_init(request_queue_t *q, elevator_t *e, elevator_t type)
*e = type; *e = type;
INIT_LIST_HEAD(&q->queue_head); INIT_LIST_HEAD(&q->queue_head);
q->last_merge = NULL;
if (e->elevator_init_fn) if (e->elevator_init_fn)
return e->elevator_init_fn(q, e); return e->elevator_init_fn(q, e);
......
...@@ -117,6 +117,11 @@ inline request_queue_t *blk_get_queue(kdev_t dev) ...@@ -117,6 +117,11 @@ inline request_queue_t *blk_get_queue(kdev_t dev)
return &blk_dev[MAJOR(dev)].request_queue; return &blk_dev[MAJOR(dev)].request_queue;
} }
void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn)
{
q->prep_rq_fn = pfn;
}
/** /**
* blk_queue_make_request - define an alternate make_request function for a device * blk_queue_make_request - define an alternate make_request function for a device
* @q: the request queue for the device to be affected * @q: the request queue for the device to be affected
...@@ -179,7 +184,6 @@ void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr) ...@@ -179,7 +184,6 @@ void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
if (dma_addr == BLK_BOUNCE_ISA) { if (dma_addr == BLK_BOUNCE_ISA) {
init_emergency_isa_pool(); init_emergency_isa_pool();
q->bounce_gfp = GFP_NOIO | GFP_DMA; q->bounce_gfp = GFP_NOIO | GFP_DMA;
printk("isa pfn %lu, max low %lu, max %lu\n", bounce_pfn, blk_max_low_pfn, blk_max_pfn);
} else } else
q->bounce_gfp = GFP_NOHIGHIO; q->bounce_gfp = GFP_NOHIGHIO;
...@@ -319,7 +323,7 @@ void blk_dump_rq_flags(struct request *rq, char *msg) ...@@ -319,7 +323,7 @@ void blk_dump_rq_flags(struct request *rq, char *msg)
/* /*
* standard prep_rq_fn that builds 10 byte cmds * standard prep_rq_fn that builds 10 byte cmds
*/ */
static int ll_10byte_cmd_build(request_queue_t *q, struct request *rq) int ll_10byte_cmd_build(request_queue_t *q, struct request *rq)
{ {
int hard_sect = get_hardsect_size(rq->rq_dev); int hard_sect = get_hardsect_size(rq->rq_dev);
sector_t block = rq->hard_sector / (hard_sect >> 9); sector_t block = rq->hard_sector / (hard_sect >> 9);
...@@ -477,7 +481,7 @@ int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg ...@@ -477,7 +481,7 @@ int blk_rq_map_sg(request_queue_t *q, struct request *rq, struct scatterlist *sg
sg[nsegs - 1].length += nbytes; sg[nsegs - 1].length += nbytes;
} else { } else {
new_segment: new_segment:
sg[nsegs].address = NULL; memset(&sg[nsegs],0,sizeof(struct scatterlist));
sg[nsegs].page = bvec->bv_page; sg[nsegs].page = bvec->bv_page;
sg[nsegs].length = nbytes; sg[nsegs].length = nbytes;
sg[nsegs].offset = bvec->bv_offset; sg[nsegs].offset = bvec->bv_offset;
...@@ -540,11 +544,11 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req, ...@@ -540,11 +544,11 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req,
{ {
if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) { if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) {
req->flags |= REQ_NOMERGE; req->flags |= REQ_NOMERGE;
q->last_merge = NULL;
return 0; return 0;
} }
if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio)))
__BVEC_START(bio)))
return ll_new_mergeable(q, req, bio); return ll_new_mergeable(q, req, bio);
return ll_new_hw_segment(q, req, bio); return ll_new_hw_segment(q, req, bio);
...@@ -555,11 +559,11 @@ static int ll_front_merge_fn(request_queue_t *q, struct request *req, ...@@ -555,11 +559,11 @@ static int ll_front_merge_fn(request_queue_t *q, struct request *req,
{ {
if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) { if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) {
req->flags |= REQ_NOMERGE; req->flags |= REQ_NOMERGE;
q->last_merge = NULL;
return 0; return 0;
} }
if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)))
__BVEC_START(req->bio)))
return ll_new_mergeable(q, req, bio); return ll_new_mergeable(q, req, bio);
return ll_new_hw_segment(q, req, bio); return ll_new_hw_segment(q, req, bio);
...@@ -568,7 +572,7 @@ static int ll_front_merge_fn(request_queue_t *q, struct request *req, ...@@ -568,7 +572,7 @@ static int ll_front_merge_fn(request_queue_t *q, struct request *req,
static int ll_merge_requests_fn(request_queue_t *q, struct request *req, static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
struct request *next) struct request *next)
{ {
int total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; int total_phys_segments = req->nr_phys_segments +next->nr_phys_segments;
int total_hw_segments = req->nr_hw_segments + next->nr_hw_segments; int total_hw_segments = req->nr_hw_segments + next->nr_hw_segments;
/* /*
...@@ -599,6 +603,9 @@ static int ll_merge_requests_fn(request_queue_t *q, struct request *req, ...@@ -599,6 +603,9 @@ static int ll_merge_requests_fn(request_queue_t *q, struct request *req,
return 0; return 0;
/* Merge is OK... */ /* Merge is OK... */
if (q->last_merge == &next->queuelist)
q->last_merge = NULL;
req->nr_phys_segments = total_phys_segments; req->nr_phys_segments = total_phys_segments;
req->nr_hw_segments = total_hw_segments; req->nr_hw_segments = total_hw_segments;
return 1; return 1;
...@@ -799,7 +806,7 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock) ...@@ -799,7 +806,7 @@ int blk_init_queue(request_queue_t *q, request_fn_proc *rfn, spinlock_t *lock)
q->back_merge_fn = ll_back_merge_fn; q->back_merge_fn = ll_back_merge_fn;
q->front_merge_fn = ll_front_merge_fn; q->front_merge_fn = ll_front_merge_fn;
q->merge_requests_fn = ll_merge_requests_fn; q->merge_requests_fn = ll_merge_requests_fn;
q->prep_rq_fn = ll_10byte_cmd_build; q->prep_rq_fn = NULL;
q->plug_tq.sync = 0; q->plug_tq.sync = 0;
q->plug_tq.routine = &generic_unplug_device; q->plug_tq.routine = &generic_unplug_device;
q->plug_tq.data = q; q->plug_tq.data = q;
...@@ -1039,13 +1046,11 @@ static inline void attempt_back_merge(request_queue_t *q, struct request *rq) ...@@ -1039,13 +1046,11 @@ static inline void attempt_back_merge(request_queue_t *q, struct request *rq)
attempt_merge(q, rq); attempt_merge(q, rq);
} }
static inline void attempt_front_merge(request_queue_t *q, static inline void attempt_front_merge(request_queue_t *q, struct request *rq)
struct list_head *head,
struct request *rq)
{ {
struct list_head *prev = rq->queuelist.prev; struct list_head *prev = rq->queuelist.prev;
if (prev != head) if (prev != &q->queue_head)
attempt_merge(q, blkdev_entry_to_request(prev)); attempt_merge(q, blkdev_entry_to_request(prev));
} }
...@@ -1081,7 +1086,7 @@ static int __make_request(request_queue_t *q, struct bio *bio) ...@@ -1081,7 +1086,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
{ {
struct request *req, *freereq = NULL; struct request *req, *freereq = NULL;
int el_ret, latency = 0, rw, nr_sectors, cur_nr_sectors, barrier; int el_ret, latency = 0, rw, nr_sectors, cur_nr_sectors, barrier;
struct list_head *head, *insert_here; struct list_head *insert_here = &q->queue_head;
elevator_t *elevator = &q->elevator; elevator_t *elevator = &q->elevator;
sector_t sector; sector_t sector;
...@@ -1102,24 +1107,18 @@ static int __make_request(request_queue_t *q, struct bio *bio) ...@@ -1102,24 +1107,18 @@ static int __make_request(request_queue_t *q, struct bio *bio)
latency = elevator_request_latency(elevator, rw); latency = elevator_request_latency(elevator, rw);
barrier = test_bit(BIO_RW_BARRIER, &bio->bi_rw); barrier = test_bit(BIO_RW_BARRIER, &bio->bi_rw);
again:
req = NULL;
head = &q->queue_head;
spin_lock_irq(q->queue_lock); spin_lock_irq(q->queue_lock);
insert_here = head->prev;
if (blk_queue_empty(q) || barrier) { if (blk_queue_empty(q) || barrier) {
blk_plug_device(q); blk_plug_device(q);
goto get_rq; goto get_rq;
} else if ((req = __elv_next_request(q))) { }
if (req->flags & REQ_STARTED)
head = head->next;
again:
req = NULL; req = NULL;
} insert_here = q->queue_head.prev;
el_ret = elevator->elevator_merge_fn(q, &req, head, bio); el_ret = elevator->elevator_merge_fn(q, &req, bio);
switch (el_ret) { switch (el_ret) {
case ELEVATOR_BACK_MERGE: case ELEVATOR_BACK_MERGE:
BUG_ON(req->flags & REQ_STARTED); BUG_ON(req->flags & REQ_STARTED);
...@@ -1157,7 +1156,7 @@ static int __make_request(request_queue_t *q, struct bio *bio) ...@@ -1157,7 +1156,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
req->sector = req->hard_sector = sector; req->sector = req->hard_sector = sector;
req->nr_sectors = req->hard_nr_sectors += nr_sectors; req->nr_sectors = req->hard_nr_sectors += nr_sectors;
drive_stat_acct(req, nr_sectors, 0); drive_stat_acct(req, nr_sectors, 0);
attempt_front_merge(q, head, req); attempt_front_merge(q, req);
goto out; goto out;
/* /*
...@@ -1188,7 +1187,6 @@ static int __make_request(request_queue_t *q, struct bio *bio) ...@@ -1188,7 +1187,6 @@ static int __make_request(request_queue_t *q, struct bio *bio)
req = freereq; req = freereq;
freereq = NULL; freereq = NULL;
} else if ((req = get_request(q, rw)) == NULL) { } else if ((req = get_request(q, rw)) == NULL) {
spin_unlock_irq(q->queue_lock); spin_unlock_irq(q->queue_lock);
/* /*
...@@ -1200,6 +1198,7 @@ static int __make_request(request_queue_t *q, struct bio *bio) ...@@ -1200,6 +1198,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
} }
freereq = get_request_wait(q, rw); freereq = get_request_wait(q, rw);
spin_lock_irq(q->queue_lock);
goto again; goto again;
} }
...@@ -1755,3 +1754,6 @@ EXPORT_SYMBOL(submit_bio); ...@@ -1755,3 +1754,6 @@ EXPORT_SYMBOL(submit_bio);
EXPORT_SYMBOL(blk_queue_assign_lock); EXPORT_SYMBOL(blk_queue_assign_lock);
EXPORT_SYMBOL(blk_phys_contig_segment); EXPORT_SYMBOL(blk_phys_contig_segment);
EXPORT_SYMBOL(blk_hw_contig_segment); EXPORT_SYMBOL(blk_hw_contig_segment);
EXPORT_SYMBOL(ll_10byte_cmd_build);
EXPORT_SYMBOL(blk_queue_prep_rq);
...@@ -1007,6 +1007,7 @@ int __init loop_init(void) ...@@ -1007,6 +1007,7 @@ int __init loop_init(void)
goto out_mem; goto out_mem;
blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request); blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request);
blk_queue_bounce_limit(BLK_DEFAULT_QUEUE(MAJOR_NR), BLK_BOUNCE_HIGH);
for (i = 0; i < max_loop; i++) { for (i = 0; i < max_loop; i++) {
struct loop_device *lo = &loop_dev[i]; struct loop_device *lo = &loop_dev[i];
......
...@@ -43,26 +43,12 @@ ...@@ -43,26 +43,12 @@
*/ */
#include <linux/config.h> #include <linux/config.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/hdreg.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/ioctl.h>
#include <linux/fd.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/init.h> #include <linux/init.h>
#include <linux/devfs_fs_kernel.h> #include <linux/devfs_fs_kernel.h>
#include <linux/smp_lock.h>
#include <asm/system.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/byteorder.h>
extern void wait_for_keypress(void);
/* /*
* 35 has been officially registered as the RAMDISK major number, but * 35 has been officially registered as the RAMDISK major number, but
...@@ -79,6 +65,8 @@ extern void wait_for_keypress(void); ...@@ -79,6 +65,8 @@ extern void wait_for_keypress(void);
#ifdef CONFIG_BLK_DEV_INITRD #ifdef CONFIG_BLK_DEV_INITRD
static int initrd_users; static int initrd_users;
static spinlock_t initrd_users_lock = SPIN_LOCK_UNLOCKED; static spinlock_t initrd_users_lock = SPIN_LOCK_UNLOCKED;
unsigned long initrd_start, initrd_end;
int initrd_below_start_ok;
#endif #endif
/* Various static variables go here. Most are used only in the RAM disk code. /* Various static variables go here. Most are used only in the RAM disk code.
...@@ -111,70 +99,6 @@ int rd_size = CONFIG_BLK_DEV_RAM_SIZE; /* Size of the RAM disks */ ...@@ -111,70 +99,6 @@ int rd_size = CONFIG_BLK_DEV_RAM_SIZE; /* Size of the RAM disks */
*/ */
int rd_blocksize = BLOCK_SIZE; /* blocksize of the RAM disks */ int rd_blocksize = BLOCK_SIZE; /* blocksize of the RAM disks */
#ifndef MODULE
int rd_doload; /* 1 = load RAM disk, 0 = don't load */
int rd_prompt = 1; /* 1 = prompt for RAM disk, 0 = don't prompt */
int rd_image_start; /* starting block # of image */
#ifdef CONFIG_BLK_DEV_INITRD
unsigned long initrd_start, initrd_end;
int mount_initrd = 1; /* zero if initrd should not be mounted */
int initrd_below_start_ok;
static int __init no_initrd(char *str)
{
mount_initrd = 0;
return 1;
}
__setup("noinitrd", no_initrd);
#endif
static int __init ramdisk_start_setup(char *str)
{
rd_image_start = simple_strtol(str,NULL,0);
return 1;
}
static int __init load_ramdisk(char *str)
{
rd_doload = simple_strtol(str,NULL,0) & 3;
return 1;
}
static int __init prompt_ramdisk(char *str)
{
rd_prompt = simple_strtol(str,NULL,0) & 1;
return 1;
}
static int __init ramdisk_size(char *str)
{
rd_size = simple_strtol(str,NULL,0);
return 1;
}
static int __init ramdisk_size2(char *str)
{
return ramdisk_size(str);
}
static int __init ramdisk_blocksize(char *str)
{
rd_blocksize = simple_strtol(str,NULL,0);
return 1;
}
__setup("ramdisk_start=", ramdisk_start_setup);
__setup("load_ramdisk=", load_ramdisk);
__setup("prompt_ramdisk=", prompt_ramdisk);
__setup("ramdisk=", ramdisk_size);
__setup("ramdisk_size=", ramdisk_size2);
__setup("ramdisk_blocksize=", ramdisk_blocksize);
#endif
/* /*
* Copyright (C) 2000 Linus Torvalds. * Copyright (C) 2000 Linus Torvalds.
* 2000 Transmeta Corp. * 2000 Transmeta Corp.
...@@ -492,7 +416,7 @@ static void __exit rd_cleanup (void) ...@@ -492,7 +416,7 @@ static void __exit rd_cleanup (void)
} }
/* This is the registration and initialization section of the RAM disk driver */ /* This is the registration and initialization section of the RAM disk driver */
int __init rd_init (void) static int __init rd_init (void)
{ {
int i; int i;
...@@ -548,7 +472,28 @@ int __init rd_init (void) ...@@ -548,7 +472,28 @@ int __init rd_init (void)
module_init(rd_init); module_init(rd_init);
module_exit(rd_cleanup); module_exit(rd_cleanup);
/* loadable module support */ /* options - nonmodular */
#ifndef MODULE
static int __init ramdisk_size(char *str)
{
rd_size = simple_strtol(str,NULL,0);
return 1;
}
static int __init ramdisk_size2(char *str) /* kludge */
{
return ramdisk_size(str);
}
static int __init ramdisk_blocksize(char *str)
{
rd_blocksize = simple_strtol(str,NULL,0);
return 1;
}
__setup("ramdisk=", ramdisk_size);
__setup("ramdisk_size=", ramdisk_size2);
__setup("ramdisk_blocksize=", ramdisk_blocksize);
#endif
/* options - modular */
MODULE_PARM (rd_size, "1i"); MODULE_PARM (rd_size, "1i");
MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
MODULE_PARM (rd_blocksize, "i"); MODULE_PARM (rd_blocksize, "i");
......
...@@ -459,6 +459,8 @@ void __init ide_init_amd74xx (ide_hwif_t *hwif) ...@@ -459,6 +459,8 @@ void __init ide_init_amd74xx (ide_hwif_t *hwif)
hwif->tuneproc = &amd74xx_tune_drive; hwif->tuneproc = &amd74xx_tune_drive;
hwif->speedproc = &amd74xx_tune_chipset; hwif->speedproc = &amd74xx_tune_chipset;
hwif->highmem = 1;
#ifndef CONFIG_BLK_DEV_IDEDMA #ifndef CONFIG_BLK_DEV_IDEDMA
hwif->drives[0].autotune = 1; hwif->drives[0].autotune = 1;
hwif->drives[1].autotune = 1; hwif->drives[1].autotune = 1;
......
...@@ -594,7 +594,7 @@ static int cdrom_decode_status (ide_startstop_t *startstop, ide_drive_t *drive, ...@@ -594,7 +594,7 @@ static int cdrom_decode_status (ide_startstop_t *startstop, ide_drive_t *drive,
cdrom_end_request (1, drive); cdrom_end_request (1, drive);
*startstop = ide_error (drive, "request sense failure", stat); *startstop = ide_error (drive, "request sense failure", stat);
return 1; return 1;
} else if (rq->flags & REQ_PC) { } else if (rq->flags & (REQ_PC | REQ_BLOCK_PC)) {
/* All other functions, except for READ. */ /* All other functions, except for READ. */
struct completion *wait = NULL; struct completion *wait = NULL;
pc = (struct packet_command *) rq->special; pc = (struct packet_command *) rq->special;
...@@ -2675,6 +2675,8 @@ int ide_cdrom_setup (ide_drive_t *drive) ...@@ -2675,6 +2675,8 @@ int ide_cdrom_setup (ide_drive_t *drive)
set_blocksize(MKDEV(HWIF(drive)->major, minor), CD_FRAMESIZE); set_blocksize(MKDEV(HWIF(drive)->major, minor), CD_FRAMESIZE);
blk_queue_hardsect_size(&drive->queue, CD_FRAMESIZE); blk_queue_hardsect_size(&drive->queue, CD_FRAMESIZE);
blk_queue_prep_rq(&drive->queue, ll_10byte_cmd_build);
drive->special.all = 0; drive->special.all = 0;
drive->ready_stat = 0; drive->ready_stat = 0;
......
...@@ -336,23 +336,7 @@ typedef struct { ...@@ -336,23 +336,7 @@ typedef struct {
#define IDEFLOPPY_IOCTL_FORMAT_START 0x4602 #define IDEFLOPPY_IOCTL_FORMAT_START 0x4602
#define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS 0x4603 #define IDEFLOPPY_IOCTL_FORMAT_GET_PROGRESS 0x4603
/* #define IDEFLOPPY_RQ (REQ_SPECIAL)
* Special requests for our block device strategy routine.
*/
#define IDEFLOPPY_FIRST_RQ 90
/*
* IDEFLOPPY_PC_RQ is used to queue a packet command in the request queue.
*/
#define IDEFLOPPY_PC_RQ 90
#define IDEFLOPPY_LAST_RQ 90
/*
* A macro which can be used to check if a given request command
* originated in the driver or in the buffer cache layer.
*/
#define IDEFLOPPY_RQ_CMD(cmd) ((cmd >= IDEFLOPPY_FIRST_RQ) && (cmd <= IDEFLOPPY_LAST_RQ))
/* /*
* Error codes which are returned in rq->errors to the higher part * Error codes which are returned in rq->errors to the higher part
...@@ -696,7 +680,7 @@ static void idefloppy_end_request (byte uptodate, ide_hwgroup_t *hwgroup) ...@@ -696,7 +680,7 @@ static void idefloppy_end_request (byte uptodate, ide_hwgroup_t *hwgroup)
/* Why does this happen? */ /* Why does this happen? */
if (!rq) if (!rq)
return; return;
if (!IDEFLOPPY_RQ_CMD (rq->cmd)) { if (rq->flags & IDEFLOPPY_RQ) {
ide_end_request (uptodate, hwgroup); ide_end_request (uptodate, hwgroup);
return; return;
} }
...@@ -776,7 +760,7 @@ static void idefloppy_queue_pc_head (ide_drive_t *drive,idefloppy_pc_t *pc,struc ...@@ -776,7 +760,7 @@ static void idefloppy_queue_pc_head (ide_drive_t *drive,idefloppy_pc_t *pc,struc
{ {
ide_init_drive_cmd (rq); ide_init_drive_cmd (rq);
rq->buffer = (char *) pc; rq->buffer = (char *) pc;
rq->cmd = IDEFLOPPY_PC_RQ; rq->flags = IDEFLOPPY_RQ;
(void) ide_do_drive_cmd (drive, rq, ide_preempt); (void) ide_do_drive_cmd (drive, rq, ide_preempt);
} }
...@@ -1192,6 +1176,7 @@ static void idefloppy_create_rw_cmd (idefloppy_floppy_t *floppy, idefloppy_pc_t ...@@ -1192,6 +1176,7 @@ static void idefloppy_create_rw_cmd (idefloppy_floppy_t *floppy, idefloppy_pc_t
{ {
int block = sector / floppy->bs_factor; int block = sector / floppy->bs_factor;
int blocks = rq->nr_sectors / floppy->bs_factor; int blocks = rq->nr_sectors / floppy->bs_factor;
int cmd = rq_data_dir(rq);
#if IDEFLOPPY_DEBUG_LOG #if IDEFLOPPY_DEBUG_LOG
printk ("create_rw1%d_cmd: block == %d, blocks == %d\n", printk ("create_rw1%d_cmd: block == %d, blocks == %d\n",
...@@ -1200,18 +1185,18 @@ static void idefloppy_create_rw_cmd (idefloppy_floppy_t *floppy, idefloppy_pc_t ...@@ -1200,18 +1185,18 @@ static void idefloppy_create_rw_cmd (idefloppy_floppy_t *floppy, idefloppy_pc_t
idefloppy_init_pc (pc); idefloppy_init_pc (pc);
if (test_bit (IDEFLOPPY_USE_READ12, &floppy->flags)) { if (test_bit (IDEFLOPPY_USE_READ12, &floppy->flags)) {
pc->c[0] = rq->cmd == READ ? IDEFLOPPY_READ12_CMD : IDEFLOPPY_WRITE12_CMD; pc->c[0] = cmd == READ ? IDEFLOPPY_READ12_CMD : IDEFLOPPY_WRITE12_CMD;
put_unaligned (htonl (blocks), (unsigned int *) &pc->c[6]); put_unaligned (htonl (blocks), (unsigned int *) &pc->c[6]);
} else { } else {
pc->c[0] = rq->cmd == READ ? IDEFLOPPY_READ10_CMD : IDEFLOPPY_WRITE10_CMD; pc->c[0] = cmd == READ ? IDEFLOPPY_READ10_CMD : IDEFLOPPY_WRITE10_CMD;
put_unaligned (htons (blocks), (unsigned short *) &pc->c[7]); put_unaligned (htons (blocks), (unsigned short *) &pc->c[7]);
} }
put_unaligned (htonl (block), (unsigned int *) &pc->c[2]); put_unaligned (htonl (block), (unsigned int *) &pc->c[2]);
pc->callback = &idefloppy_rw_callback; pc->callback = &idefloppy_rw_callback;
pc->rq = rq; pc->rq = rq;
pc->b_data = rq->buffer; pc->b_data = rq->buffer;
pc->b_count = rq->cmd == READ ? 0 : rq->bio->bi_size; pc->b_count = cmd == READ ? 0 : rq->bio->bi_size;
if (rq->cmd == WRITE) if (rq->flags & REQ_RW)
set_bit (PC_WRITING, &pc->flags); set_bit (PC_WRITING, &pc->flags);
pc->buffer = NULL; pc->buffer = NULL;
pc->request_transfer = pc->buffer_size = blocks * floppy->block_size; pc->request_transfer = pc->buffer_size = blocks * floppy->block_size;
...@@ -1227,8 +1212,8 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request ...@@ -1227,8 +1212,8 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
idefloppy_pc_t *pc; idefloppy_pc_t *pc;
#if IDEFLOPPY_DEBUG_LOG #if IDEFLOPPY_DEBUG_LOG
printk (KERN_INFO "rq_status: %d, rq_dev: %u, cmd: %d, errors: %d\n",rq->rq_status,(unsigned int) rq->rq_dev,rq->cmd,rq->errors); printk (KERN_INFO "rq_status: %d, rq_dev: %u, flags: %lx, errors: %d\n",rq->rq_status,(unsigned int) rq->rq_dev,rq->flags,rq->errors);
printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %ld\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors); printk (KERN_INFO "sector: %ld, nr_sectors: %ld, current_nr_sectors: %d\n",rq->sector,rq->nr_sectors,rq->current_nr_sectors);
#endif /* IDEFLOPPY_DEBUG_LOG */ #endif /* IDEFLOPPY_DEBUG_LOG */
if (rq->errors >= ERROR_MAX) { if (rq->errors >= ERROR_MAX) {
...@@ -1240,9 +1225,7 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request ...@@ -1240,9 +1225,7 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
idefloppy_end_request (0, HWGROUP(drive)); idefloppy_end_request (0, HWGROUP(drive));
return ide_stopped; return ide_stopped;
} }
switch (rq->cmd) { if (rq->flags & REQ_CMD) {
case READ:
case WRITE:
if (rq->sector % floppy->bs_factor || rq->nr_sectors % floppy->bs_factor) { if (rq->sector % floppy->bs_factor || rq->nr_sectors % floppy->bs_factor) {
printk ("%s: unsupported r/w request size\n", drive->name); printk ("%s: unsupported r/w request size\n", drive->name);
idefloppy_end_request (0, HWGROUP(drive)); idefloppy_end_request (0, HWGROUP(drive));
...@@ -1250,12 +1233,10 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request ...@@ -1250,12 +1233,10 @@ static ide_startstop_t idefloppy_do_request (ide_drive_t *drive, struct request
} }
pc = idefloppy_next_pc_storage (drive); pc = idefloppy_next_pc_storage (drive);
idefloppy_create_rw_cmd (floppy, pc, rq, block); idefloppy_create_rw_cmd (floppy, pc, rq, block);
break; } else if (rq->flags & IDEFLOPPY_RQ) {
case IDEFLOPPY_PC_RQ:
pc = (idefloppy_pc_t *) rq->buffer; pc = (idefloppy_pc_t *) rq->buffer;
break; } else {
default: blk_dump_rq_flags(rq, "ide-floppy: unsupported command in queue");
printk (KERN_ERR "ide-floppy: unsupported command %x in request queue\n", rq->cmd);
idefloppy_end_request (0,HWGROUP (drive)); idefloppy_end_request (0,HWGROUP (drive));
return ide_stopped; return ide_stopped;
} }
...@@ -1273,7 +1254,7 @@ static int idefloppy_queue_pc_tail (ide_drive_t *drive,idefloppy_pc_t *pc) ...@@ -1273,7 +1254,7 @@ static int idefloppy_queue_pc_tail (ide_drive_t *drive,idefloppy_pc_t *pc)
ide_init_drive_cmd (&rq); ide_init_drive_cmd (&rq);
rq.buffer = (char *) pc; rq.buffer = (char *) pc;
rq.cmd = IDEFLOPPY_PC_RQ; rq.flags = IDEFLOPPY_RQ;
return ide_do_drive_cmd (drive, &rq, ide_wait); return ide_do_drive_cmd (drive, &rq, ide_wait);
} }
......
...@@ -66,7 +66,7 @@ static mdk_personality_t *pers[MAX_PERSONALITY]; ...@@ -66,7 +66,7 @@ static mdk_personality_t *pers[MAX_PERSONALITY];
/* /*
* Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
* is 100 KB/sec, so the extra system load does not show up that much. * is 1000 KB/sec, so the extra system load does not show up that much.
* Increase it if you want to have more _guaranteed_ speed. Note that * Increase it if you want to have more _guaranteed_ speed. Note that
* the RAID driver will use the maximum available bandwith if the IO * the RAID driver will use the maximum available bandwith if the IO
* subsystem is idle. There is also an 'absolute maximum' reconstruction * subsystem is idle. There is also an 'absolute maximum' reconstruction
...@@ -76,8 +76,8 @@ static mdk_personality_t *pers[MAX_PERSONALITY]; ...@@ -76,8 +76,8 @@ static mdk_personality_t *pers[MAX_PERSONALITY];
* you can change it via /proc/sys/dev/raid/speed_limit_min and _max. * you can change it via /proc/sys/dev/raid/speed_limit_min and _max.
*/ */
static int sysctl_speed_limit_min = 100; static int sysctl_speed_limit_min = 1000;
static int sysctl_speed_limit_max = 100000; static int sysctl_speed_limit_max = 200000;
static struct ctl_table_header *raid_table_header; static struct ctl_table_header *raid_table_header;
...@@ -3336,7 +3336,7 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok) ...@@ -3336,7 +3336,7 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
int md_do_sync(mddev_t *mddev, mdp_disk_t *spare) int md_do_sync(mddev_t *mddev, mdp_disk_t *spare)
{ {
mddev_t *mddev2; mddev_t *mddev2;
unsigned int max_sectors, currspeed, unsigned int max_sectors, currspeed = 0,
j, window, err, serialize; j, window, err, serialize;
unsigned long mark[SYNC_MARKS]; unsigned long mark[SYNC_MARKS];
unsigned long mark_cnt[SYNC_MARKS]; unsigned long mark_cnt[SYNC_MARKS];
...@@ -3376,8 +3376,7 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare) ...@@ -3376,8 +3376,7 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare)
max_sectors = mddev->sb->size << 1; max_sectors = mddev->sb->size << 1;
printk(KERN_INFO "md: syncing RAID array md%d\n", mdidx(mddev)); printk(KERN_INFO "md: syncing RAID array md%d\n", mdidx(mddev));
printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed: %d KB/sec/disc.\n", printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed: %d KB/sec/disc.\n", sysctl_speed_limit_min);
sysctl_speed_limit_min);
printk(KERN_INFO "md: using maximum available idle IO bandwith " printk(KERN_INFO "md: using maximum available idle IO bandwith "
"(but not more than %d KB/sec) for reconstruction.\n", "(but not more than %d KB/sec) for reconstruction.\n",
sysctl_speed_limit_max); sysctl_speed_limit_max);
...@@ -3409,7 +3408,7 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare) ...@@ -3409,7 +3408,7 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare)
for (j = 0; j < max_sectors;) { for (j = 0; j < max_sectors;) {
int sectors; int sectors;
sectors = mddev->pers->sync_request(mddev, j); sectors = mddev->pers->sync_request(mddev, j, currspeed < sysctl_speed_limit_min);
if (sectors < 0) { if (sectors < 0) {
err = sectors; err = sectors;
goto out; goto out;
......
...@@ -28,7 +28,6 @@ ...@@ -28,7 +28,6 @@
#define MD_DRIVER #define MD_DRIVER
#define MD_PERSONALITY #define MD_PERSONALITY
#define MAX_WORK_PER_DISK 128
/* /*
* Number of guaranteed r1bios in case of extreme VM load: * Number of guaranteed r1bios in case of extreme VM load:
*/ */
...@@ -38,24 +37,6 @@ static mdk_personality_t raid1_personality; ...@@ -38,24 +37,6 @@ static mdk_personality_t raid1_personality;
static spinlock_t retry_list_lock = SPIN_LOCK_UNLOCKED; static spinlock_t retry_list_lock = SPIN_LOCK_UNLOCKED;
static LIST_HEAD(retry_list_head); static LIST_HEAD(retry_list_head);
static inline void check_all_w_bios_empty(r1bio_t *r1_bio)
{
int i;
return;
for (i = 0; i < MD_SB_DISKS; i++)
if (r1_bio->write_bios[i])
BUG();
}
static inline void check_all_bios_empty(r1bio_t *r1_bio)
{
return;
if (r1_bio->read_bio)
BUG();
check_all_w_bios_empty(r1_bio);
}
static void * r1bio_pool_alloc(int gfp_flags, void *data) static void * r1bio_pool_alloc(int gfp_flags, void *data)
{ {
r1bio_t *r1_bio; r1bio_t *r1_bio;
...@@ -69,11 +50,11 @@ static void * r1bio_pool_alloc(int gfp_flags, void *data) ...@@ -69,11 +50,11 @@ static void * r1bio_pool_alloc(int gfp_flags, void *data)
static void r1bio_pool_free(void *r1_bio, void *data) static void r1bio_pool_free(void *r1_bio, void *data)
{ {
check_all_bios_empty(r1_bio);
kfree(r1_bio); kfree(r1_bio);
} }
#define RESYNC_BLOCK_SIZE (64*1024) #define RESYNC_BLOCK_SIZE (64*1024)
#define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
#define RESYNC_WINDOW (2048*1024) #define RESYNC_WINDOW (2048*1024)
...@@ -86,7 +67,6 @@ static void * r1buf_pool_alloc(int gfp_flags, void *data) ...@@ -86,7 +67,6 @@ static void * r1buf_pool_alloc(int gfp_flags, void *data)
int i, j; int i, j;
r1_bio = mempool_alloc(conf->r1bio_pool, gfp_flags); r1_bio = mempool_alloc(conf->r1bio_pool, gfp_flags);
check_all_bios_empty(r1_bio);
bio = bio_alloc(gfp_flags, RESYNC_PAGES); bio = bio_alloc(gfp_flags, RESYNC_PAGES);
if (!bio) if (!bio)
...@@ -131,7 +111,6 @@ static void r1buf_pool_free(void *__r1_bio, void *data) ...@@ -131,7 +111,6 @@ static void r1buf_pool_free(void *__r1_bio, void *data)
r1bio_t *r1bio = __r1_bio; r1bio_t *r1bio = __r1_bio;
struct bio *bio = r1bio->master_bio; struct bio *bio = r1bio->master_bio;
check_all_bios_empty(r1bio);
if (atomic_read(&bio->bi_cnt) != 1) if (atomic_read(&bio->bi_cnt) != 1)
BUG(); BUG();
for (i = 0; i < RESYNC_PAGES; i++) { for (i = 0; i < RESYNC_PAGES; i++) {
...@@ -163,13 +142,25 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio) ...@@ -163,13 +142,25 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio)
} }
*bio = NULL; *bio = NULL;
} }
check_all_bios_empty(r1_bio);
} }
static inline void free_r1bio(r1bio_t *r1_bio) static inline void free_r1bio(r1bio_t *r1_bio)
{ {
unsigned long flags;
conf_t *conf = mddev_to_conf(r1_bio->mddev); conf_t *conf = mddev_to_conf(r1_bio->mddev);
/*
* Wake up any possible resync thread that waits for the device
* to go idle.
*/
spin_lock_irqsave(&conf->resync_lock, flags);
if (!--conf->nr_pending) {
wake_up(&conf->wait_idle);
wake_up(&conf->wait_resume);
}
spin_unlock_irqrestore(&conf->resync_lock, flags);
put_all_bios(conf, r1_bio); put_all_bios(conf, r1_bio);
mempool_free(r1_bio, conf->r1bio_pool); mempool_free(r1_bio, conf->r1bio_pool);
} }
...@@ -178,7 +169,14 @@ static inline void put_buf(r1bio_t *r1_bio) ...@@ -178,7 +169,14 @@ static inline void put_buf(r1bio_t *r1_bio)
{ {
conf_t *conf = mddev_to_conf(r1_bio->mddev); conf_t *conf = mddev_to_conf(r1_bio->mddev);
struct bio *bio = r1_bio->master_bio; struct bio *bio = r1_bio->master_bio;
unsigned long flags;
spin_lock_irqsave(&conf->resync_lock, flags);
if (!--conf->nr_pending) {
wake_up(&conf->wait_idle);
wake_up(&conf->wait_resume);
}
spin_unlock_irqrestore(&conf->resync_lock, flags);
/* /*
* undo any possible partial request fixup magic: * undo any possible partial request fixup magic:
*/ */
...@@ -222,37 +220,6 @@ static void reschedule_retry(r1bio_t *r1_bio) ...@@ -222,37 +220,6 @@ static void reschedule_retry(r1bio_t *r1_bio)
md_wakeup_thread(conf->thread); md_wakeup_thread(conf->thread);
} }
static void inline raid_request_done(unsigned long sector, conf_t *conf, int phase)
{
unsigned long flags;
spin_lock_irqsave(&conf->segment_lock, flags);
if (sector < conf->start_active)
conf->cnt_done--;
else if (sector >= conf->start_future && conf->phase == phase)
conf->cnt_future--;
else if (!--conf->cnt_pending)
wake_up(&conf->wait_ready);
spin_unlock_irqrestore(&conf->segment_lock, flags);
}
static void inline sync_request_done(sector_t sector, conf_t *conf)
{
unsigned long flags;
spin_lock_irqsave(&conf->segment_lock, flags);
if (sector >= conf->start_ready)
--conf->cnt_ready;
else if (sector >= conf->start_active) {
if (!--conf->cnt_active) {
conf->start_active = conf->start_ready;
wake_up(&conf->wait_done);
}
}
spin_unlock_irqrestore(&conf->segment_lock, flags);
}
/* /*
* raid_end_bio_io() is called when we have finished servicing a mirrored * raid_end_bio_io() is called when we have finished servicing a mirrored
* operation and are ready to return a success/failure code to the buffer * operation and are ready to return a success/failure code to the buffer
...@@ -262,19 +229,29 @@ static int raid_end_bio_io(r1bio_t *r1_bio, int uptodate, int nr_sectors) ...@@ -262,19 +229,29 @@ static int raid_end_bio_io(r1bio_t *r1_bio, int uptodate, int nr_sectors)
{ {
struct bio *bio = r1_bio->master_bio; struct bio *bio = r1_bio->master_bio;
raid_request_done(bio->bi_sector, mddev_to_conf(r1_bio->mddev),
test_bit(R1BIO_SyncPhase, &r1_bio->state));
bio_endio(bio, uptodate, nr_sectors); bio_endio(bio, uptodate, nr_sectors);
free_r1bio(r1_bio); free_r1bio(r1_bio);
return 0; return 0;
} }
/*
* Update disk head position estimator based on IRQ completion info.
*/
static void inline update_head_pos(int disk, r1bio_t *r1_bio)
{
conf_t *conf = mddev_to_conf(r1_bio->mddev);
conf->mirrors[disk].head_position =
r1_bio->sector + (r1_bio->master_bio->bi_size >> 9);
atomic_dec(&conf->mirrors[disk].nr_pending);
}
static int end_request(struct bio *bio, int nr_sectors) static int end_request(struct bio *bio, int nr_sectors)
{ {
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
int i;
/* /*
* this branch is our 'one mirror IO has finished' event handler: * this branch is our 'one mirror IO has finished' event handler:
...@@ -287,20 +264,16 @@ static int end_request(struct bio *bio, int nr_sectors) ...@@ -287,20 +264,16 @@ static int end_request(struct bio *bio, int nr_sectors)
* we will return a good error code for to the higher * we will return a good error code for to the higher
* levels even if IO on some other mirrored buffer fails. * levels even if IO on some other mirrored buffer fails.
* *
* The 'master' represents the complex operation to * The 'master' represents the composite IO operation to
* user-side. So if something waits for IO, then it will * user-side. So if something waits for IO, then it will
* wait for the 'master' bio. * wait for the 'master' bio.
*/ */
set_bit(R1BIO_Uptodate, &r1_bio->state); set_bit(R1BIO_Uptodate, &r1_bio->state);
/*
* We split up the read and write side, imho they are
* conceptually different.
*/
if ((r1_bio->cmd == READ) || (r1_bio->cmd == READA)) { if ((r1_bio->cmd == READ) || (r1_bio->cmd == READA)) {
if (!r1_bio->read_bio) if (!r1_bio->read_bio)
BUG(); BUG();
update_head_pos(r1_bio->read_disk, r1_bio);
/* /*
* we have only one bio on the read side * we have only one bio on the read side
*/ */
...@@ -322,6 +295,14 @@ static int end_request(struct bio *bio, int nr_sectors) ...@@ -322,6 +295,14 @@ static int end_request(struct bio *bio, int nr_sectors)
/* /*
* WRITE: * WRITE:
* *
* First, find the disk this bio belongs to.
*/
for (i = 0; i < MD_SB_DISKS; i++)
if (r1_bio->write_bios[i] == bio) {
update_head_pos(i, r1_bio);
break;
}
/*
* Let's see if all mirrored write operations have finished * Let's see if all mirrored write operations have finished
* already. * already.
*/ */
...@@ -332,44 +313,49 @@ static int end_request(struct bio *bio, int nr_sectors) ...@@ -332,44 +313,49 @@ static int end_request(struct bio *bio, int nr_sectors)
/* /*
* This routine returns the disk from which the requested read should * This routine returns the disk from which the requested read should
* be done. It bookkeeps the last read position for every disk * be done. There is a per-array 'next expected sequential IO' sector
* in array and when new read requests come, the disk which last * number - if this matches on the next IO then we use the last disk.
* position is nearest to the request, is chosen. * There is also a per-disk 'last know head position' sector that is
* maintained from IRQ contexts, both the normal and the resync IO
* completion handlers update this position correctly. If there is no
* perfect sequential match then we pick the disk whose head is closest.
* *
* TODO: now if there are 2 mirrors in the same 2 devices, performance * If there are 2 mirrors in the same 2 devices, performance degrades
* degrades dramatically because position is mirror, not device based. * because position is mirror, not device based.
* This should be changed to be device based. Also atomic sequential
* reads should be somehow balanced.
*/ */
static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio) static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio)
{ {
const int sectors = bio->bi_size >> 9;
const unsigned long this_sector = r1_bio->sector; const unsigned long this_sector = r1_bio->sector;
unsigned long new_distance, current_distance;
int new_disk = conf->last_used, disk = new_disk; int new_disk = conf->last_used, disk = new_disk;
const int sectors = bio->bi_size >> 9;
sector_t new_distance, current_distance;
/* /*
* Check if it is sane at all to balance * Check if it if we can balance. We can balance on the whole
* device if no resync is going on, or below the resync window.
* We take the first readable disk when above the resync window.
*/ */
if (conf->resync_mirrors && (this_sector + sectors >= conf->next_resync)) {
if (conf->resync_mirrors) /* make sure that disk is operational */
new_disk = 0;
while (!conf->mirrors[new_disk].operational || conf->mirrors[new_disk].write_only) {
new_disk++;
if (new_disk == conf->raid_disks) {
new_disk = 0;
break;
}
}
goto rb_out; goto rb_out;
}
/* make sure that disk is operational */ /* make sure the disk is operational */
while( !conf->mirrors[new_disk].operational) { while (!conf->mirrors[new_disk].operational) {
if (new_disk <= 0) new_disk = conf->raid_disks; if (new_disk <= 0)
new_disk = conf->raid_disks;
new_disk--; new_disk--;
if (new_disk == disk) { if (new_disk == disk) {
/*
* This means no working disk was found
* Nothing much to do, lets not change anything
* and hope for the best...
*/
new_disk = conf->last_used; new_disk = conf->last_used;
goto rb_out; goto rb_out;
} }
} }
...@@ -377,36 +363,16 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio) ...@@ -377,36 +363,16 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio)
/* now disk == new_disk == starting point for search */ /* now disk == new_disk == starting point for search */
/* /*
* Don't touch anything for sequential reads. * Don't change to another disk for sequential reads:
*/ */
if (this_sector == conf->mirrors[new_disk].head_position) if (conf->next_seq_sect == this_sector)
goto rb_out; goto rb_out;
if (this_sector == conf->mirrors[new_disk].head_position)
/*
* If reads have been done only on a single disk
* for a time, lets give another disk a change.
* This is for kicking those idling disks so that
* they would find work near some hotspot.
*/
if (conf->sect_count >= conf->mirrors[new_disk].sect_limit) {
conf->sect_count = 0;
do {
if (new_disk <= 0)
new_disk = conf->raid_disks;
new_disk--;
if (new_disk == disk)
break;
} while ((conf->mirrors[new_disk].write_only) ||
(!conf->mirrors[new_disk].operational));
goto rb_out; goto rb_out;
}
current_distance = abs(this_sector - current_distance = abs(this_sector - conf->mirrors[disk].head_position);
conf->mirrors[disk].head_position);
/* Find the disk which is closest */ /* Find the disk whose head is closest */
do { do {
if (disk <= 0) if (disk <= 0)
...@@ -417,44 +383,56 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio) ...@@ -417,44 +383,56 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio)
(!conf->mirrors[disk].operational)) (!conf->mirrors[disk].operational))
continue; continue;
new_distance = abs(this_sector - if (!atomic_read(&conf->mirrors[disk].nr_pending)) {
conf->mirrors[disk].head_position); new_disk = disk;
break;
}
new_distance = abs(this_sector - conf->mirrors[disk].head_position);
if (new_distance < current_distance) { if (new_distance < current_distance) {
conf->sect_count = 0;
current_distance = new_distance; current_distance = new_distance;
new_disk = disk; new_disk = disk;
} }
} while (disk != conf->last_used); } while (disk != conf->last_used);
rb_out: rb_out:
conf->mirrors[new_disk].head_position = this_sector + sectors; r1_bio->read_disk = new_disk;
conf->next_seq_sect = this_sector + sectors;
conf->last_used = new_disk; conf->last_used = new_disk;
conf->sect_count += sectors;
return new_disk; return new_disk;
} }
/* /*
* Wait if the reconstruction state machine puts up a bar for * Throttle resync depth, so that we can both get proper overlapping of
* new requests in this sector range: * requests, but are still able to handle normal requests quickly.
*/ */
static inline void new_request(conf_t *conf, r1bio_t *r1_bio) #define RESYNC_DEPTH 32
static void device_barrier(conf_t *conf, sector_t sect)
{
spin_lock_irq(&conf->resync_lock);
wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume), conf->resync_lock);
if (!conf->barrier++) {
wait_event_lock_irq(conf->wait_idle, !conf->nr_pending, conf->resync_lock);
if (conf->nr_pending)
BUG();
}
wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH, conf->resync_lock);
conf->next_resync = sect;
spin_unlock_irq(&conf->resync_lock);
}
static void resume_device(conf_t *conf)
{ {
spin_lock_irq(&conf->segment_lock); spin_lock_irq(&conf->resync_lock);
wait_event_lock_irq(conf->wait_done, if (!conf->barrier)
r1_bio->sector < conf->start_active || BUG();
r1_bio->sector >= conf->start_future, --conf->barrier;
conf->segment_lock); wake_up(&conf->wait_resume);
if (r1_bio->sector < conf->start_active) wake_up(&conf->wait_idle);
conf->cnt_done++; spin_unlock_irq(&conf->resync_lock);
else {
conf->cnt_future++;
if (conf->phase)
set_bit(R1BIO_SyncPhase, &r1_bio->state);
}
spin_unlock_irq(&conf->segment_lock);
} }
static int make_request(mddev_t *mddev, int rw, struct bio * bio) static int make_request(mddev_t *mddev, int rw, struct bio * bio)
...@@ -465,6 +443,16 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio) ...@@ -465,6 +443,16 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio)
struct bio *read_bio; struct bio *read_bio;
int i, sum_bios = 0, disks = MD_SB_DISKS; int i, sum_bios = 0, disks = MD_SB_DISKS;
/*
* Register the new request and wait if the reconstruction
* thread has put up a bar for new requests.
* Continue immediately if no resync is active currently.
*/
spin_lock_irq(&conf->resync_lock);
wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock);
conf->nr_pending++;
spin_unlock_irq(&conf->resync_lock);
/* /*
* make_request() can abort the operation when READA is being * make_request() can abort the operation when READA is being
* used and no empty request is available. * used and no empty request is available.
...@@ -475,7 +463,6 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio) ...@@ -475,7 +463,6 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio)
rw = READ; rw = READ;
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
check_all_bios_empty(r1_bio);
r1_bio->master_bio = bio; r1_bio->master_bio = bio;
...@@ -483,8 +470,6 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio) ...@@ -483,8 +470,6 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio)
r1_bio->sector = bio->bi_sector; r1_bio->sector = bio->bi_sector;
r1_bio->cmd = rw; r1_bio->cmd = rw;
new_request(conf, r1_bio);
if (rw == READ) { if (rw == READ) {
/* /*
* read balancing logic: * read balancing logic:
...@@ -503,15 +488,13 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio) ...@@ -503,15 +488,13 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio)
read_bio->bi_private = r1_bio; read_bio->bi_private = r1_bio;
generic_make_request(read_bio); generic_make_request(read_bio);
atomic_inc(&conf->mirrors[r1_bio->read_disk].nr_pending);
return 0; return 0;
} }
/* /*
* WRITE: * WRITE:
*/ */
check_all_w_bios_empty(r1_bio);
for (i = 0; i < disks; i++) { for (i = 0; i < disks; i++) {
struct bio *mbio; struct bio *mbio;
if (!conf->mirrors[i].operational) if (!conf->mirrors[i].operational)
...@@ -542,14 +525,13 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio) ...@@ -542,14 +525,13 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio)
/* /*
* We have to be a bit careful about the semaphore above, thats * We have to be a bit careful about the semaphore above, thats
* why we start the requests separately. Since kmalloc() could * why we start the requests separately. Since generic_make_request()
* fail, sleep and make_request() can sleep too, this is the * can sleep, this is the safer solution. Imagine, end_request
* safer solution. Imagine, end_request decreasing the semaphore * decreasing the semaphore before we could have set it up ...
* before we could have set it up ... We could play tricks with * We could play tricks with the semaphore (presetting it and
* the semaphore (presetting it and correcting at the end if * correcting at the end if sum_bios is not 'n' but we have to
* sum_bios is not 'n' but we have to do end_request by hand if * do end_request by hand if all requests finish until we had a
* all requests finish until we had a chance to set up the * chance to set up the semaphore correctly ... lots of races).
* semaphore correctly ... lots of races).
*/ */
for (i = 0; i < disks; i++) { for (i = 0; i < disks; i++) {
struct bio *mbio; struct bio *mbio;
...@@ -558,6 +540,7 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio) ...@@ -558,6 +540,7 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio)
continue; continue;
generic_make_request(mbio); generic_make_request(mbio);
atomic_inc(&conf->mirrors[i].nr_pending);
} }
return 0; return 0;
} }
...@@ -610,8 +593,7 @@ static void mark_disk_bad(mddev_t *mddev, int failed) ...@@ -610,8 +593,7 @@ static void mark_disk_bad(mddev_t *mddev, int failed)
md_wakeup_thread(conf->thread); md_wakeup_thread(conf->thread);
if (!mirror->write_only) if (!mirror->write_only)
conf->working_disks--; conf->working_disks--;
printk(DISK_FAILED, partition_name(mirror->dev), printk(DISK_FAILED, partition_name(mirror->dev), conf->working_disks);
conf->working_disks);
} }
static int error(mddev_t *mddev, kdev_t dev) static int error(mddev_t *mddev, kdev_t dev)
...@@ -644,12 +626,6 @@ static int error(mddev_t *mddev, kdev_t dev) ...@@ -644,12 +626,6 @@ static int error(mddev_t *mddev, kdev_t dev)
return 0; return 0;
} }
#undef LAST_DISK
#undef NO_SPARE_DISK
#undef DISK_FAILED
#undef START_SYNCING
static void print_conf(conf_t *conf) static void print_conf(conf_t *conf)
{ {
int i; int i;
...@@ -674,29 +650,13 @@ static void print_conf(conf_t *conf) ...@@ -674,29 +650,13 @@ static void print_conf(conf_t *conf)
static void close_sync(conf_t *conf) static void close_sync(conf_t *conf)
{ {
mddev_t *mddev = conf->mddev; spin_lock_irq(&conf->resync_lock);
/* wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock);
* If reconstruction was interrupted, we need to close the "active" spin_unlock_irq(&conf->resync_lock);
* and "pending" holes.
* we know that there are no active rebuild requests, if (conf->barrier) BUG();
* os cnt_active == cnt_ready == 0 if (waitqueue_active(&conf->wait_idle)) BUG();
*/ if (waitqueue_active(&conf->wait_resume)) BUG();
spin_lock_irq(&conf->segment_lock);
conf->start_active = conf->start_pending;
conf->start_ready = conf->start_pending;
wait_event_lock_irq(conf->wait_ready, !conf->cnt_pending, conf->segment_lock);
conf->start_active = conf->start_ready = conf->start_pending = conf->start_future;
conf->start_future = mddev->sb->size+1;
conf->cnt_pending = conf->cnt_future;
conf->cnt_future = 0;
conf->phase = conf->phase ^1;
wait_event_lock_irq(conf->wait_ready, !conf->cnt_pending, conf->segment_lock);
conf->start_active = conf->start_ready = conf->start_pending = conf->start_future = 0;
conf->phase = 0;
conf->cnt_future = conf->cnt_done;;
conf->cnt_done = 0;
spin_unlock_irq(&conf->segment_lock);
wake_up(&conf->wait_done);
} }
static int diskop(mddev_t *mddev, mdp_disk_t **d, int state) static int diskop(mddev_t *mddev, mdp_disk_t **d, int state)
...@@ -975,9 +935,9 @@ static int end_sync_read(struct bio *bio, int nr_sectors) ...@@ -975,9 +935,9 @@ static int end_sync_read(struct bio *bio, int nr_sectors)
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
check_all_w_bios_empty(r1_bio);
if (r1_bio->read_bio != bio) if (r1_bio->read_bio != bio)
BUG(); BUG();
update_head_pos(r1_bio->read_disk, r1_bio);
/* /*
* we have read a block, now it needs to be re-written, * we have read a block, now it needs to be re-written,
* or re-read if the read failed. * or re-read if the read failed.
...@@ -997,13 +957,21 @@ static int end_sync_write(struct bio *bio, int nr_sectors) ...@@ -997,13 +957,21 @@ static int end_sync_write(struct bio *bio, int nr_sectors)
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private); r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
mddev_t *mddev = r1_bio->mddev; mddev_t *mddev = r1_bio->mddev;
int i;
if (!uptodate) if (!uptodate)
md_error(mddev, bio->bi_dev); md_error(mddev, bio->bi_dev);
for (i = 0; i < MD_SB_DISKS; i++)
if (r1_bio->write_bios[i] == bio) {
update_head_pos(i, r1_bio);
break;
}
if (atomic_dec_and_test(&r1_bio->remaining)) { if (atomic_dec_and_test(&r1_bio->remaining)) {
sync_request_done(r1_bio->sector, mddev_to_conf(mddev)); conf_t *conf = mddev_to_conf(mddev);
md_done_sync(mddev, r1_bio->master_bio->bi_size >> 9, uptodate); md_done_sync(mddev, r1_bio->master_bio->bi_size >> 9, uptodate);
resume_device(conf);
put_buf(r1_bio); put_buf(r1_bio);
} }
return 0; return 0;
...@@ -1029,11 +997,11 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) ...@@ -1029,11 +997,11 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
*/ */
printk(IO_ERROR, partition_name(bio->bi_dev), r1_bio->sector); printk(IO_ERROR, partition_name(bio->bi_dev), r1_bio->sector);
md_done_sync(mddev, r1_bio->master_bio->bi_size >> 9, 0); md_done_sync(mddev, r1_bio->master_bio->bi_size >> 9, 0);
resume_device(conf);
put_buf(r1_bio);
return; return;
} }
check_all_w_bios_empty(r1_bio);
for (i = 0; i < disks ; i++) { for (i = 0; i < disks ; i++) {
if (!conf->mirrors[i].operational) if (!conf->mirrors[i].operational)
continue; continue;
...@@ -1071,8 +1039,8 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) ...@@ -1071,8 +1039,8 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
* must be done * must be done
*/ */
printk(IO_ERROR, partition_name(bio->bi_dev), r1_bio->sector); printk(IO_ERROR, partition_name(bio->bi_dev), r1_bio->sector);
sync_request_done(r1_bio->sector, conf);
md_done_sync(mddev, r1_bio->master_bio->bi_size >> 9, 0); md_done_sync(mddev, r1_bio->master_bio->bi_size >> 9, 0);
resume_device(conf);
put_buf(r1_bio); put_buf(r1_bio);
return; return;
} }
...@@ -1083,6 +1051,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) ...@@ -1083,6 +1051,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
md_sync_acct(mbio->bi_dev, mbio->bi_size >> 9); md_sync_acct(mbio->bi_dev, mbio->bi_size >> 9);
generic_make_request(mbio); generic_make_request(mbio);
atomic_inc(&conf->mirrors[i].nr_pending);
} }
} }
...@@ -1101,6 +1070,7 @@ static void raid1d(void *data) ...@@ -1101,6 +1070,7 @@ static void raid1d(void *data)
struct bio *bio; struct bio *bio;
unsigned long flags; unsigned long flags;
mddev_t *mddev; mddev_t *mddev;
conf_t *conf;
kdev_t dev; kdev_t dev;
...@@ -1111,9 +1081,9 @@ static void raid1d(void *data) ...@@ -1111,9 +1081,9 @@ static void raid1d(void *data)
r1_bio = list_entry(head->prev, r1bio_t, retry_list); r1_bio = list_entry(head->prev, r1bio_t, retry_list);
list_del(head->prev); list_del(head->prev);
spin_unlock_irqrestore(&retry_list_lock, flags); spin_unlock_irqrestore(&retry_list_lock, flags);
check_all_w_bios_empty(r1_bio);
mddev = r1_bio->mddev; mddev = r1_bio->mddev;
conf = mddev_to_conf(mddev);
if (mddev->sb_dirty) { if (mddev->sb_dirty) {
printk(KERN_INFO "raid1: dirty sb detected, updating.\n"); printk(KERN_INFO "raid1: dirty sb detected, updating.\n");
mddev->sb_dirty = 0; mddev->sb_dirty = 0;
...@@ -1139,13 +1109,12 @@ static void raid1d(void *data) ...@@ -1139,13 +1109,12 @@ static void raid1d(void *data)
bio->bi_rw = r1_bio->cmd; bio->bi_rw = r1_bio->cmd;
generic_make_request(bio); generic_make_request(bio);
atomic_inc(&conf->mirrors[r1_bio->read_disk].nr_pending);
break; break;
} }
} }
spin_unlock_irqrestore(&retry_list_lock, flags); spin_unlock_irqrestore(&retry_list_lock, flags);
} }
#undef IO_ERROR
#undef REDIRECT_SECTOR
/* /*
* Private kernel thread to reconstruct mirrors after an unclean * Private kernel thread to reconstruct mirrors after an unclean
...@@ -1177,101 +1146,27 @@ static int init_resync(conf_t *conf) ...@@ -1177,101 +1146,27 @@ static int init_resync(conf_t *conf)
{ {
int buffs; int buffs;
conf->start_active = 0;
conf->start_ready = 0;
conf->start_pending = 0;
conf->start_future = 0;
conf->phase = 0;
buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE; buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
if (conf->r1buf_pool) if (conf->r1buf_pool)
BUG(); BUG();
conf->r1buf_pool = mempool_create(buffs, r1buf_pool_alloc, r1buf_pool_free, conf); conf->r1buf_pool = mempool_create(buffs, r1buf_pool_alloc, r1buf_pool_free, conf);
if (!conf->r1buf_pool) if (!conf->r1buf_pool)
return -ENOMEM; return -ENOMEM;
conf->window = 2048; conf->next_resync = 0;
conf->cnt_future += conf->cnt_done+conf->cnt_pending;
conf->cnt_done = conf->cnt_pending = 0;
if (conf->cnt_ready || conf->cnt_active)
MD_BUG();
return 0; return 0;
} }
static void wait_sync_pending(conf_t *conf, sector_t sector_nr)
{
spin_lock_irq(&conf->segment_lock);
while (sector_nr >= conf->start_pending) {
// printk("wait .. sect=%lu start_active=%d ready=%d pending=%d future=%d, cnt_done=%d active=%d ready=%d pending=%d future=%d\n", sector_nr, conf->start_active, conf->start_ready, conf->start_pending, conf->start_future, conf->cnt_done, conf->cnt_active, conf->cnt_ready, conf->cnt_pending, conf->cnt_future);
wait_event_lock_irq(conf->wait_done, !conf->cnt_active,
conf->segment_lock);
wait_event_lock_irq(conf->wait_ready, !conf->cnt_pending,
conf->segment_lock);
conf->start_active = conf->start_ready;
conf->start_ready = conf->start_pending;
conf->start_pending = conf->start_future;
conf->start_future = conf->start_future+conf->window;
// Note: falling off the end is not a problem
conf->phase = conf->phase ^1;
conf->cnt_active = conf->cnt_ready;
conf->cnt_ready = 0;
conf->cnt_pending = conf->cnt_future;
conf->cnt_future = 0;
wake_up(&conf->wait_done);
}
conf->cnt_ready++;
spin_unlock_irq(&conf->segment_lock);
}
/* /*
* perform a "sync" on one "block" * perform a "sync" on one "block"
* *
* We need to make sure that no normal I/O request - particularly write * We need to make sure that no normal I/O request - particularly write
* requests - conflict with active sync requests. * requests - conflict with active sync requests.
* This is achieved by conceptually dividing the block space into a
* number of sections:
* DONE: 0 .. a-1 These blocks are in-sync
* ACTIVE: a.. b-1 These blocks may have active sync requests, but
* no normal IO requests
* READY: b .. c-1 These blocks have no normal IO requests - sync
* request may be happening
* PENDING: c .. d-1 These blocks may have IO requests, but no new
* ones will be added
* FUTURE: d .. end These blocks are not to be considered yet. IO may
* be happening, but not sync
* *
* We keep a * This is achieved by tracking pending requests and a 'barrier' concept
* phase which flips (0 or 1) each time d moves and * that can be installed to exclude normal IO requests.
* a count of:
* z = active io requests in FUTURE since d moved - marked with
* current phase
* y = active io requests in FUTURE before d moved, or PENDING -
* marked with previous phase
* x = active sync requests in READY
* w = active sync requests in ACTIVE
* v = active io requests in DONE
*
* Normally, a=b=c=d=0 and z= active io requests
* or a=b=c=d=END and v= active io requests
* Allowed changes to a,b,c,d:
* A: c==d && y==0 -> d+=window, y=z, z=0, phase=!phase
* B: y==0 -> c=d
* C: b=c, w+=x, x=0
* D: w==0 -> a=b
* E: a==b==c==d==end -> a=b=c=d=0, z=v, v=0
*
* At start of sync we apply A.
* When y reaches 0, we apply B then A then being sync requests
* When sync point reaches c-1, we wait for y==0, and W==0, and
* then apply apply B then A then D then C.
* Finally, we apply E
*
* The sync request simply issues a "read" against a working drive
* This is marked so that on completion the raid1d thread is woken to
* issue suitable write requests
*/ */
static int sync_request(mddev_t *mddev, sector_t sector_nr) static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
{ {
conf_t *conf = mddev_to_conf(mddev); conf_t *conf = mddev_to_conf(mddev);
mirror_info_t *mirror; mirror_info_t *mirror;
...@@ -1283,8 +1178,13 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr) ...@@ -1283,8 +1178,13 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr)
if (!sector_nr) if (!sector_nr)
if (init_resync(conf)) if (init_resync(conf))
return -ENOMEM; return -ENOMEM;
/*
wait_sync_pending(conf, sector_nr); * If there is non-resync activity waiting for us then
* put in a delay to throttle resync.
*/
if (!go_faster && waitqueue_active(&conf->wait_resume))
schedule_timeout(HZ);
device_barrier(conf, sector_nr + RESYNC_SECTORS);
/* /*
* If reconstructing, and >1 working disc, * If reconstructing, and >1 working disc,
...@@ -1302,10 +1202,13 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr) ...@@ -1302,10 +1202,13 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr)
} }
conf->last_used = disk; conf->last_used = disk;
mirror = conf->mirrors+conf->last_used; mirror = conf->mirrors + conf->last_used;
r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO); r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
check_all_bios_empty(r1_bio);
spin_lock_irq(&conf->resync_lock);
conf->nr_pending++;
spin_unlock_irq(&conf->resync_lock);
r1_bio->mddev = mddev; r1_bio->mddev = mddev;
r1_bio->sector = sector_nr; r1_bio->sector = sector_nr;
...@@ -1344,6 +1247,7 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr) ...@@ -1344,6 +1247,7 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr)
md_sync_acct(read_bio->bi_dev, nr_sectors); md_sync_acct(read_bio->bi_dev, nr_sectors);
generic_make_request(read_bio); generic_make_request(read_bio);
atomic_inc(&conf->mirrors[conf->last_used].nr_pending);
return nr_sectors; return nr_sectors;
} }
...@@ -1447,7 +1351,6 @@ static int run(mddev_t *mddev) ...@@ -1447,7 +1351,6 @@ static int run(mddev_t *mddev)
disk->number = descriptor->number; disk->number = descriptor->number;
disk->raid_disk = disk_idx; disk->raid_disk = disk_idx;
disk->dev = rdev->dev; disk->dev = rdev->dev;
disk->sect_limit = MAX_WORK_PER_DISK;
disk->operational = 0; disk->operational = 0;
disk->write_only = 0; disk->write_only = 0;
disk->spare = 0; disk->spare = 0;
...@@ -1479,7 +1382,6 @@ static int run(mddev_t *mddev) ...@@ -1479,7 +1382,6 @@ static int run(mddev_t *mddev)
disk->number = descriptor->number; disk->number = descriptor->number;
disk->raid_disk = disk_idx; disk->raid_disk = disk_idx;
disk->dev = rdev->dev; disk->dev = rdev->dev;
disk->sect_limit = MAX_WORK_PER_DISK;
disk->operational = 1; disk->operational = 1;
disk->write_only = 0; disk->write_only = 0;
disk->spare = 0; disk->spare = 0;
...@@ -1494,7 +1396,6 @@ static int run(mddev_t *mddev) ...@@ -1494,7 +1396,6 @@ static int run(mddev_t *mddev)
disk->number = descriptor->number; disk->number = descriptor->number;
disk->raid_disk = disk_idx; disk->raid_disk = disk_idx;
disk->dev = rdev->dev; disk->dev = rdev->dev;
disk->sect_limit = MAX_WORK_PER_DISK;
disk->operational = 0; disk->operational = 0;
disk->write_only = 0; disk->write_only = 0;
disk->spare = 1; disk->spare = 1;
...@@ -1507,9 +1408,9 @@ static int run(mddev_t *mddev) ...@@ -1507,9 +1408,9 @@ static int run(mddev_t *mddev)
conf->mddev = mddev; conf->mddev = mddev;
conf->device_lock = SPIN_LOCK_UNLOCKED; conf->device_lock = SPIN_LOCK_UNLOCKED;
conf->segment_lock = SPIN_LOCK_UNLOCKED; conf->resync_lock = SPIN_LOCK_UNLOCKED;
init_waitqueue_head(&conf->wait_done); init_waitqueue_head(&conf->wait_idle);
init_waitqueue_head(&conf->wait_ready); init_waitqueue_head(&conf->wait_resume);
if (!conf->working_disks) { if (!conf->working_disks) {
printk(NONE_OPERATIONAL, mdidx(mddev)); printk(NONE_OPERATIONAL, mdidx(mddev));
...@@ -1611,17 +1512,6 @@ static int run(mddev_t *mddev) ...@@ -1611,17 +1512,6 @@ static int run(mddev_t *mddev)
return -EIO; return -EIO;
} }
#undef INVALID_LEVEL
#undef NO_SB
#undef ERRORS
#undef NOT_IN_SYNC
#undef INCONSISTENT
#undef ALREADY_RUNNING
#undef OPERATIONAL
#undef SPARE
#undef NONE_OPERATIONAL
#undef ARRAY_IS_ACTIVE
static int stop_resync(mddev_t *mddev) static int stop_resync(mddev_t *mddev)
{ {
conf_t *conf = mddev_to_conf(mddev); conf_t *conf = mddev_to_conf(mddev);
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
* Author: Fabrice Bellard (fabrice.bellard@netgem.com) * Author: Fabrice Bellard (fabrice.bellard@netgem.com)
* Copyright (C) 2000 Netgem S.A. * Copyright (C) 2000 Netgem S.A.
* *
* $Id: nftlmount.c,v 1.23 2001/09/19 21:42:32 dwmw2 Exp $ * $Id: nftlmount.c,v 1.25 2001/11/30 16:46:27 dwmw2 Exp $
* *
* This program is free software; you can redistribute it and/or modify * This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
...@@ -39,7 +39,7 @@ ...@@ -39,7 +39,7 @@
#define SECTORSIZE 512 #define SECTORSIZE 512
char nftlmountrev[]="$Revision: 1.23 $"; char nftlmountrev[]="$Revision: 1.25 $";
/* find_boot_record: Find the NFTL Media Header and its Spare copy which contains the /* find_boot_record: Find the NFTL Media Header and its Spare copy which contains the
* various device information of the NFTL partition and Bad Unit Table. Update * various device information of the NFTL partition and Bad Unit Table. Update
...@@ -94,11 +94,11 @@ static int find_boot_record(struct NFTLrecord *nftl) ...@@ -94,11 +94,11 @@ static int find_boot_record(struct NFTLrecord *nftl)
continue; continue;
} }
#if 1 /* Some people seem to have devices without ECC or erase marks #if 0 /* Some people seem to have devices without ECC or erase marks
on the Media Header blocks. There are enough other sanity on the Media Header blocks. There are enough other sanity
checks in here that we can probably do without it. checks in here that we can probably do without it.
*/ */
if (le16_to_cpu ((h1.EraseMark | h1.EraseMark1) != ERASE_MARK)) { if (le16_to_cpu(h1.EraseMark | h1.EraseMark1) != ERASE_MARK) {
printk(KERN_NOTICE "ANAND header found at 0x%x in mtd%d, but erase mark not present (0x%04x,0x%04x instead)\n", printk(KERN_NOTICE "ANAND header found at 0x%x in mtd%d, but erase mark not present (0x%04x,0x%04x instead)\n",
block * nftl->EraseSize, nftl->mtd->index, block * nftl->EraseSize, nftl->mtd->index,
le16_to_cpu(h1.EraseMark), le16_to_cpu(h1.EraseMark1)); le16_to_cpu(h1.EraseMark), le16_to_cpu(h1.EraseMark1));
......
...@@ -467,10 +467,11 @@ Scsi_Cmnd *icmd; ...@@ -467,10 +467,11 @@ Scsi_Cmnd *icmd;
static void do_ql_ihandl(int irq, void *dev_id, struct pt_regs * regs) static void do_ql_ihandl(int irq, void *dev_id, struct pt_regs * regs)
{ {
unsigned long flags; unsigned long flags;
struct Scsi_Host *host = dev_id;
spin_lock_irqsave(&io_request_lock, flags); spin_lock_irqsave(&host->host_lock, flags);
ql_ihandl(irq, dev_id, regs); ql_ihandl(irq, dev_id, regs);
spin_unlock_irqrestore(&io_request_lock, flags); spin_unlock_irqrestore(&host->host_lock, flags);
} }
#endif #endif
......
...@@ -548,10 +548,8 @@ extern void print_status (int status); ...@@ -548,10 +548,8 @@ extern void print_status (int status);
*/ */
struct scsi_device { struct scsi_device {
/* private: */
/* /*
* This information is private to the scsi mid-layer. Wrapping it in a * This information is private to the scsi mid-layer.
* struct private is a way of marking it in a sort of C++ type of way.
*/ */
struct scsi_device *next; /* Used for linked list */ struct scsi_device *next; /* Used for linked list */
struct scsi_device *prev; /* Used for linked list */ struct scsi_device *prev; /* Used for linked list */
...@@ -563,7 +561,6 @@ struct scsi_device { ...@@ -563,7 +561,6 @@ struct scsi_device {
volatile unsigned short device_busy; /* commands actually active on low-level */ volatile unsigned short device_busy; /* commands actually active on low-level */
Scsi_Cmnd *device_queue; /* queue of SCSI Command structures */ Scsi_Cmnd *device_queue; /* queue of SCSI Command structures */
/* public: */
unsigned int id, lun, channel; unsigned int id, lun, channel;
unsigned int manufacturer; /* Manufacturer of device, for using unsigned int manufacturer; /* Manufacturer of device, for using
...@@ -681,11 +678,7 @@ struct scsi_request { ...@@ -681,11 +678,7 @@ struct scsi_request {
*/ */
struct scsi_cmnd { struct scsi_cmnd {
int sc_magic; int sc_magic;
/* private: */
/*
* This information is private to the scsi mid-layer. Wrapping it in a
* struct private is a way of marking it in a sort of C++ type of way.
*/
struct Scsi_Host *host; struct Scsi_Host *host;
unsigned short state; unsigned short state;
unsigned short owner; unsigned short owner;
...@@ -727,8 +720,6 @@ struct scsi_cmnd { ...@@ -727,8 +720,6 @@ struct scsi_cmnd {
struct scsi_cmnd *bh_next; /* To enumerate the commands waiting struct scsi_cmnd *bh_next; /* To enumerate the commands waiting
to be processed. */ to be processed. */
/* public: */
unsigned int target; unsigned int target;
unsigned int lun; unsigned int lun;
unsigned int channel; unsigned int channel;
......
...@@ -59,12 +59,10 @@ ...@@ -59,12 +59,10 @@
*/ */
int scsi_init_io(Scsi_Cmnd *SCpnt) int scsi_init_io(Scsi_Cmnd *SCpnt)
{ {
struct request *req; struct request *req = &SCpnt->request;
struct scatterlist *sgpnt; struct scatterlist *sgpnt;
int count, gfp_mask; int count, gfp_mask;
req = &SCpnt->request;
/* /*
* First we need to know how many scatter gather segments are needed. * First we need to know how many scatter gather segments are needed.
*/ */
...@@ -85,14 +83,13 @@ int scsi_init_io(Scsi_Cmnd *SCpnt) ...@@ -85,14 +83,13 @@ int scsi_init_io(Scsi_Cmnd *SCpnt)
BUG_ON(!sgpnt); BUG_ON(!sgpnt);
SCpnt->request_buffer = (char *) sgpnt; SCpnt->request_buffer = (char *) sgpnt;
SCpnt->request_bufflen = 0; SCpnt->request_bufflen = req->nr_sectors << 9;
req->buffer = NULL; req->buffer = NULL;
/* /*
* Next, walk the list, and fill in the addresses and sizes of * Next, walk the list, and fill in the addresses and sizes of
* each segment. * each segment.
*/ */
SCpnt->request_bufflen = req->nr_sectors << 9;
count = blk_rq_map_sg(req->q, req, SCpnt->request_buffer); count = blk_rq_map_sg(req->q, req, SCpnt->request_buffer);
/* /*
...@@ -142,8 +139,7 @@ void scsi_initialize_merge_fn(Scsi_Device * SDpnt) ...@@ -142,8 +139,7 @@ void scsi_initialize_merge_fn(Scsi_Device * SDpnt)
bounce_limit = BLK_BOUNCE_ANY; bounce_limit = BLK_BOUNCE_ANY;
else else
bounce_limit = SHpnt->pci_dev->dma_mask; bounce_limit = SHpnt->pci_dev->dma_mask;
} } else if (SHpnt->unchecked_isa_dma)
if (SHpnt->unchecked_isa_dma)
bounce_limit = BLK_BOUNCE_ISA; bounce_limit = BLK_BOUNCE_ISA;
blk_queue_bounce_limit(q, bounce_limit); blk_queue_bounce_limit(q, bounce_limit);
......
...@@ -287,9 +287,6 @@ static int sr_init_command(Scsi_Cmnd * SCpnt) ...@@ -287,9 +287,6 @@ static int sr_init_command(Scsi_Cmnd * SCpnt)
return 0; return 0;
} }
if (rq_data_dir(&SCpnt->request) == WRITE && !scsi_CDs[dev].device->writeable)
return 0;
/* /*
* we do lazy blocksize switching (when reading XA sectors, * we do lazy blocksize switching (when reading XA sectors,
* see CDROMREADMODE2 ioctl) * see CDROMREADMODE2 ioctl)
......
...@@ -142,6 +142,7 @@ struct bio *bio_alloc(int gfp_mask, int nr_iovecs) ...@@ -142,6 +142,7 @@ struct bio *bio_alloc(int gfp_mask, int nr_iovecs)
bio->bi_io_vec = bvl; bio->bi_io_vec = bvl;
return bio; return bio;
} }
mempool_free(bio, bio_pool); mempool_free(bio, bio_pool);
return NULL; return NULL;
} }
...@@ -311,28 +312,6 @@ struct bio *bio_copy(struct bio *bio, int gfp_mask, int copy) ...@@ -311,28 +312,6 @@ struct bio *bio_copy(struct bio *bio, int gfp_mask, int copy)
return NULL; return NULL;
} }
#ifdef BIO_PAGEIO
static int bio_end_io_page(struct bio *bio)
{
struct page *page = bio_page(bio);
if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
SetPageError(page);
if (!PageError(page))
SetPageUptodate(page);
/*
* Run the hooks that have to be done when a page I/O has completed.
*/
if (PageTestandClearDecrAfter(page))
atomic_dec(&nr_async_pages);
UnlockPage(page);
bio_put(bio);
return 1;
}
#endif
static int bio_end_io_kio(struct bio *bio, int nr_sectors) static int bio_end_io_kio(struct bio *bio, int nr_sectors)
{ {
struct kiobuf *kio = (struct kiobuf *) bio->bi_private; struct kiobuf *kio = (struct kiobuf *) bio->bi_private;
......
...@@ -410,6 +410,8 @@ struct super_block *hpfs_read_super(struct super_block *s, void *options, ...@@ -410,6 +410,8 @@ struct super_block *hpfs_read_super(struct super_block *s, void *options,
/*s->s_hpfs_mounting = 1;*/ /*s->s_hpfs_mounting = 1;*/
dev = s->s_dev; dev = s->s_dev;
set_blocksize(dev, 512); set_blocksize(dev, 512);
s->s_blocksize = 512;
s->s_blocksize_bits = 9;
s->s_hpfs_fs_size = -1; s->s_hpfs_fs_size = -1;
if (!(bootblock = hpfs_map_sector(s, 0, &bh0, 0))) goto bail1; if (!(bootblock = hpfs_map_sector(s, 0, &bh0, 0))) goto bail1;
if (!(superblock = hpfs_map_sector(s, 16, &bh1, 1))) goto bail2; if (!(superblock = hpfs_map_sector(s, 16, &bh1, 1))) goto bail2;
...@@ -436,8 +438,6 @@ struct super_block *hpfs_read_super(struct super_block *s, void *options, ...@@ -436,8 +438,6 @@ struct super_block *hpfs_read_super(struct super_block *s, void *options,
/* Fill superblock stuff */ /* Fill superblock stuff */
s->s_magic = HPFS_SUPER_MAGIC; s->s_magic = HPFS_SUPER_MAGIC;
s->s_blocksize = 512;
s->s_blocksize_bits = 9;
s->s_op = &hpfs_sops; s->s_op = &hpfs_sops;
s->s_hpfs_root = superblock->root; s->s_hpfs_root = superblock->root;
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
struct vfsmount *do_kern_mount(char *type, int flags, char *name, void *data); struct vfsmount *do_kern_mount(const char *type, int flags, char *name, void *data);
int do_remount_sb(struct super_block *sb, int flags, void * data); int do_remount_sb(struct super_block *sb, int flags, void * data);
void kill_super(struct super_block *sb); void kill_super(struct super_block *sb);
...@@ -622,9 +622,18 @@ static int do_move_mount(struct nameidata *nd, char *old_name) ...@@ -622,9 +622,18 @@ static int do_move_mount(struct nameidata *nd, char *old_name)
static int do_add_mount(struct nameidata *nd, char *type, int flags, static int do_add_mount(struct nameidata *nd, char *type, int flags,
int mnt_flags, char *name, void *data) int mnt_flags, char *name, void *data)
{ {
struct vfsmount *mnt = do_kern_mount(type, flags, name, data); struct vfsmount *mnt;
int err = PTR_ERR(mnt); int err;
if (!type || !memchr(type, 0, PAGE_SIZE))
return -EINVAL;
/* we need capabilities... */
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
mnt = do_kern_mount(type, flags, name, data);
err = PTR_ERR(mnt);
if (IS_ERR(mnt)) if (IS_ERR(mnt))
goto out; goto out;
......
...@@ -161,15 +161,10 @@ static int nfs_prepare_write(struct file *file, struct page *page, unsigned offs ...@@ -161,15 +161,10 @@ static int nfs_prepare_write(struct file *file, struct page *page, unsigned offs
static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to) static int nfs_commit_write(struct file *file, struct page *page, unsigned offset, unsigned to)
{ {
long status; long status;
loff_t pos = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + to;
struct inode *inode = page->mapping->host;
lock_kernel(); lock_kernel();
status = nfs_updatepage(file, page, offset, to-offset); status = nfs_updatepage(file, page, offset, to-offset);
unlock_kernel(); unlock_kernel();
/* most likely it's already done. CHECKME */
if (pos > inode->i_size)
inode->i_size = pos;
return status; return status;
} }
......
...@@ -107,17 +107,10 @@ nfs_read_inode(struct inode * inode) ...@@ -107,17 +107,10 @@ nfs_read_inode(struct inode * inode)
inode->i_rdev = 0; inode->i_rdev = 0;
/* We can't support UPDATE_ATIME(), since the server will reset it */ /* We can't support UPDATE_ATIME(), since the server will reset it */
inode->i_flags |= S_NOATIME; inode->i_flags |= S_NOATIME;
NFS_FILEID(inode) = 0;
NFS_FSID(inode) = 0;
NFS_FLAGS(inode) = 0;
INIT_LIST_HEAD(&inode->u.nfs_i.read); INIT_LIST_HEAD(&inode->u.nfs_i.read);
INIT_LIST_HEAD(&inode->u.nfs_i.dirty); INIT_LIST_HEAD(&inode->u.nfs_i.dirty);
INIT_LIST_HEAD(&inode->u.nfs_i.commit); INIT_LIST_HEAD(&inode->u.nfs_i.commit);
INIT_LIST_HEAD(&inode->u.nfs_i.writeback); INIT_LIST_HEAD(&inode->u.nfs_i.writeback);
inode->u.nfs_i.nread = 0;
inode->u.nfs_i.ndirty = 0;
inode->u.nfs_i.ncommit = 0;
inode->u.nfs_i.npages = 0;
NFS_CACHEINV(inode); NFS_CACHEINV(inode);
NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
NFS_ATTRTIMEO_UPDATE(inode) = jiffies; NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
...@@ -655,19 +648,6 @@ nfs_fill_inode(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr) ...@@ -655,19 +648,6 @@ nfs_fill_inode(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_op = &nfs_symlink_inode_operations; inode->i_op = &nfs_symlink_inode_operations;
else else
init_special_inode(inode, inode->i_mode, fattr->rdev); init_special_inode(inode, inode->i_mode, fattr->rdev);
/*
* Preset the size and mtime, as there's no need
* to invalidate the caches.
*/
inode->i_size = nfs_size_to_loff_t(fattr->size);
inode->i_mtime = nfs_time_to_secs(fattr->mtime);
inode->i_atime = nfs_time_to_secs(fattr->atime);
inode->i_ctime = nfs_time_to_secs(fattr->ctime);
NFS_CACHE_CTIME(inode) = fattr->ctime;
NFS_CACHE_MTIME(inode) = fattr->mtime;
NFS_CACHE_ISIZE(inode) = fattr->size;
NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
memcpy(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)); memcpy(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh));
} }
nfs_refresh_inode(inode, fattr); nfs_refresh_inode(inode, fattr);
...@@ -697,6 +677,9 @@ nfs_find_actor(struct inode *inode, unsigned long ino, void *opaque) ...@@ -697,6 +677,9 @@ nfs_find_actor(struct inode *inode, unsigned long ino, void *opaque)
return 0; return 0;
if (memcmp(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)) != 0) if (memcmp(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)) != 0)
return 0; return 0;
/* Force an attribute cache update if inode->i_count == 0 */
if (!atomic_read(&inode->i_count))
NFS_CACHEINV(inode);
return 1; return 1;
} }
...@@ -797,7 +780,9 @@ printk("nfs_notify_change: revalidate failed, error=%d\n", error); ...@@ -797,7 +780,9 @@ printk("nfs_notify_change: revalidate failed, error=%d\n", error);
if (!S_ISREG(inode->i_mode)) if (!S_ISREG(inode->i_mode))
attr->ia_valid &= ~ATTR_SIZE; attr->ia_valid &= ~ATTR_SIZE;
filemap_fdatasync(inode->i_mapping);
error = nfs_wb_all(inode); error = nfs_wb_all(inode);
filemap_fdatawait(inode->i_mapping);
if (error) if (error)
goto out; goto out;
...@@ -825,6 +810,8 @@ printk("nfs_notify_change: revalidate failed, error=%d\n", error); ...@@ -825,6 +810,8 @@ printk("nfs_notify_change: revalidate failed, error=%d\n", error);
fattr.pre_ctime = NFS_CACHE_CTIME(inode); fattr.pre_ctime = NFS_CACHE_CTIME(inode);
fattr.valid |= NFS_ATTR_WCC; fattr.valid |= NFS_ATTR_WCC;
} }
/* Force an attribute cache update */
NFS_CACHEINV(inode);
error = nfs_refresh_inode(inode, &fattr); error = nfs_refresh_inode(inode, &fattr);
out: out:
return error; return error;
...@@ -965,6 +952,34 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) ...@@ -965,6 +952,34 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
return status; return status;
} }
/*
* nfs_fattr_obsolete - Test if attribute data is newer than cached data
* @inode: inode
* @fattr: attributes to test
*
* Avoid stuffing the attribute cache with obsolete information.
* We always accept updates if the attribute cache timed out, or if
* fattr->ctime is newer than our cached value.
* If fattr->ctime matches the cached value, we still accept the update
* if it increases the file size.
*/
static inline
int nfs_fattr_obsolete(struct inode *inode, struct nfs_fattr *fattr)
{
s64 cdif;
if (time_after(jiffies, NFS_READTIME(inode)+NFS_ATTRTIMEO(inode)))
goto out_valid;
if ((cdif = (s64)fattr->ctime - (s64)NFS_CACHE_CTIME(inode)) > 0)
goto out_valid;
/* Ugh... */
if (cdif == 0 && fattr->size > NFS_CACHE_ISIZE(inode))
goto out_valid;
return -1;
out_valid:
return 0;
}
/* /*
* Many nfs protocol calls return the new file attributes after * Many nfs protocol calls return the new file attributes after
* an operation. Here we update the inode to reflect the state * an operation. Here we update the inode to reflect the state
...@@ -982,6 +997,7 @@ __nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -982,6 +997,7 @@ __nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
{ {
__u64 new_size, new_mtime; __u64 new_size, new_mtime;
loff_t new_isize; loff_t new_isize;
time_t new_atime;
int invalid = 0; int invalid = 0;
dfprintk(VFS, "NFS: refresh_inode(%x/%ld ct=%d info=0x%x)\n", dfprintk(VFS, "NFS: refresh_inode(%x/%ld ct=%d info=0x%x)\n",
...@@ -1007,6 +1023,11 @@ __nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -1007,6 +1023,11 @@ __nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
new_size = fattr->size; new_size = fattr->size;
new_isize = nfs_size_to_loff_t(fattr->size); new_isize = nfs_size_to_loff_t(fattr->size);
new_atime = nfs_time_to_secs(fattr->atime);
/* Avoid races */
if (nfs_fattr_obsolete(inode, fattr))
goto out_nochange;
/* /*
* Update the read time so we don't revalidate too often. * Update the read time so we don't revalidate too often.
*/ */
...@@ -1056,7 +1077,7 @@ __nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -1056,7 +1077,7 @@ __nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
NFS_CACHE_CTIME(inode) = fattr->ctime; NFS_CACHE_CTIME(inode) = fattr->ctime;
inode->i_ctime = nfs_time_to_secs(fattr->ctime); inode->i_ctime = nfs_time_to_secs(fattr->ctime);
inode->i_atime = nfs_time_to_secs(fattr->atime); inode->i_atime = new_atime;
NFS_CACHE_MTIME(inode) = new_mtime; NFS_CACHE_MTIME(inode) = new_mtime;
inode->i_mtime = nfs_time_to_secs(new_mtime); inode->i_mtime = nfs_time_to_secs(new_mtime);
...@@ -1093,7 +1114,10 @@ __nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) ...@@ -1093,7 +1114,10 @@ __nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
if (invalid) if (invalid)
nfs_zap_caches(inode); nfs_zap_caches(inode);
return 0; return 0;
out_nochange:
if (new_atime - inode->i_atime > 0)
inode->i_atime = new_atime;
return 0;
out_changed: out_changed:
/* /*
* Big trouble! The inode has become a different object. * Big trouble! The inode has become a different object.
......
...@@ -270,14 +270,12 @@ nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res) ...@@ -270,14 +270,12 @@ nfs_xdr_readres(struct rpc_rqst *req, u32 *p, struct nfs_readres *res)
count = ntohl(*p++); count = ntohl(*p++);
hdrlen = (u8 *) p - (u8 *) iov->iov_base; hdrlen = (u8 *) p - (u8 *) iov->iov_base;
recvd = req->rq_rlen - hdrlen; if (iov->iov_len > hdrlen) {
if (p != iov[req->rq_rnr-1].iov_base) { dprintk("NFS: READ header is short. iovec will be shifted.\n");
/* Unexpected reply header size. Punt. xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen);
* XXX: Move iovec contents to align data on page
* boundary and adjust RPC header size guess */
printk(KERN_WARNING "NFS: Odd RPC header size in read reply: %d\n", hdrlen);
return -errno_NFSERR_IO;
} }
recvd = req->rq_rlen - hdrlen;
if (count > recvd) { if (count > recvd) {
printk(KERN_WARNING "NFS: server cheating in read reply: " printk(KERN_WARNING "NFS: server cheating in read reply: "
"count %d > recvd %d\n", count, recvd); "count %d > recvd %d\n", count, recvd);
...@@ -448,27 +446,23 @@ static int ...@@ -448,27 +446,23 @@ static int
nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res) nfs_xdr_readdirres(struct rpc_rqst *req, u32 *p, struct nfs_readdirres *res)
{ {
struct iovec *iov = req->rq_rvec; struct iovec *iov = req->rq_rvec;
int hdrlen;
int status, nr; int status, nr;
u32 *end, *entry, len; u32 *end, *entry, len;
if ((status = ntohl(*p++))) if ((status = ntohl(*p++)))
return -nfs_stat_to_errno(status); return -nfs_stat_to_errno(status);
if ((void *) p != ((u8 *) iov->iov_base+iov->iov_len)) {
/* Unexpected reply header size. Punt. */ hdrlen = (u8 *) p - (u8 *) iov->iov_base;
printk(KERN_WARNING "NFS: Odd RPC header size in readdirres reply\n"); if (iov->iov_len > hdrlen) {
return -errno_NFSERR_IO; dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen);
} }
/* Get start and end address of XDR data */ /* Get start and end address of XDR data */
p = (u32 *) iov[1].iov_base; p = (u32 *) iov[1].iov_base;
end = (u32 *) ((u8 *) p + iov[1].iov_len); end = (u32 *) ((u8 *) p + iov[1].iov_len);
/* Get start and end of dirent buffer */
if (res->buffer != p) {
printk(KERN_ERR "NFS: Bad result buffer in readdir\n");
return -errno_NFSERR_IO;
}
for (nr = 0; *p++; nr++) { for (nr = 0; *p++; nr++) {
entry = p - 1; entry = p - 1;
if (p + 2 > end) if (p + 2 > end)
...@@ -598,13 +592,21 @@ nfs_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_readlinkargs *args ...@@ -598,13 +592,21 @@ nfs_xdr_readlinkargs(struct rpc_rqst *req, u32 *p, struct nfs_readlinkargs *args
static int static int
nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_readlinkres *res) nfs_xdr_readlinkres(struct rpc_rqst *req, u32 *p, struct nfs_readlinkres *res)
{ {
struct iovec *iov = req->rq_rvec;
u32 *strlen; u32 *strlen;
char *string; char *string;
int hdrlen;
int status; int status;
unsigned int len; unsigned int len;
if ((status = ntohl(*p++))) if ((status = ntohl(*p++)))
return -nfs_stat_to_errno(status); return -nfs_stat_to_errno(status);
hdrlen = (u8 *) p - (u8 *) iov->iov_base;
if (iov->iov_len > hdrlen) {
dprintk("NFS: READLINK header is short. iovec will be shifted.\n");
xdr_shift_iovec(iov, req->rq_rnr, iov->iov_len - hdrlen);
}
strlen = (u32*)res->buffer; strlen = (u32*)res->buffer;
/* Convert length of symlink */ /* Convert length of symlink */
len = ntohl(*strlen); len = ntohl(*strlen);
......
...@@ -397,7 +397,7 @@ nfs_readpage_result(struct rpc_task *task) ...@@ -397,7 +397,7 @@ nfs_readpage_result(struct rpc_task *task)
{ {
struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
struct inode *inode = data->inode; struct inode *inode = data->inode;
int count = data->res.count; unsigned int count = data->res.count;
dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
task->tk_pid, task->tk_status); task->tk_pid, task->tk_status);
...@@ -408,9 +408,15 @@ nfs_readpage_result(struct rpc_task *task) ...@@ -408,9 +408,15 @@ nfs_readpage_result(struct rpc_task *task)
struct page *page = req->wb_page; struct page *page = req->wb_page;
nfs_list_remove_request(req); nfs_list_remove_request(req);
if (task->tk_status >= 0 && count >= 0) { if (task->tk_status >= 0) {
SetPageUptodate(page); if (count < PAGE_CACHE_SIZE) {
char *p = kmap(page);
memset(p + count, 0, PAGE_CACHE_SIZE - count);
kunmap(page);
count = 0;
} else
count -= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE;
SetPageUptodate(page);
} else } else
SetPageError(page); SetPageError(page);
flush_dcache_page(page); flush_dcache_page(page);
......
...@@ -213,6 +213,7 @@ nfs_writepage_async(struct file *file, struct inode *inode, struct page *page, ...@@ -213,6 +213,7 @@ nfs_writepage_async(struct file *file, struct inode *inode, struct page *page,
unsigned int offset, unsigned int count) unsigned int offset, unsigned int count)
{ {
struct nfs_page *req; struct nfs_page *req;
loff_t end;
int status; int status;
req = nfs_update_request(file, inode, page, offset, count); req = nfs_update_request(file, inode, page, offset, count);
...@@ -223,6 +224,10 @@ nfs_writepage_async(struct file *file, struct inode *inode, struct page *page, ...@@ -223,6 +224,10 @@ nfs_writepage_async(struct file *file, struct inode *inode, struct page *page,
req->wb_cred = get_rpccred(NFS_I(inode)->mm_cred); req->wb_cred = get_rpccred(NFS_I(inode)->mm_cred);
nfs_unlock_request(req); nfs_unlock_request(req);
nfs_strategy(inode); nfs_strategy(inode);
end = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + (loff_t)(offset + count);
if (inode->i_size < end)
inode->i_size = end;
out: out:
return status; return status;
} }
...@@ -781,6 +786,7 @@ nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsign ...@@ -781,6 +786,7 @@ nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsign
struct dentry *dentry = file->f_dentry; struct dentry *dentry = file->f_dentry;
struct inode *inode = dentry->d_inode; struct inode *inode = dentry->d_inode;
struct nfs_page *req; struct nfs_page *req;
loff_t end;
int status = 0; int status = 0;
dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n", dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
...@@ -812,6 +818,10 @@ nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsign ...@@ -812,6 +818,10 @@ nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsign
goto done; goto done;
status = 0; status = 0;
end = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + (loff_t)(offset + count);
if (inode->i_size < end)
inode->i_size = end;
/* If we wrote past the end of the page. /* If we wrote past the end of the page.
* Call the strategy routine so it can send out a bunch * Call the strategy routine so it can send out a bunch
* of requests. * of requests.
......
...@@ -34,8 +34,6 @@ ...@@ -34,8 +34,6 @@
#define __NO_VERSION__ #define __NO_VERSION__
#include <linux/module.h> #include <linux/module.h>
int do_remount_sb(struct super_block *sb, int flags, void * data);
LIST_HEAD(super_blocks); LIST_HEAD(super_blocks);
spinlock_t sb_lock = SPIN_LOCK_UNLOCKED; spinlock_t sb_lock = SPIN_LOCK_UNLOCKED;
...@@ -377,7 +375,7 @@ static void insert_super(struct super_block *s, struct file_system_type *type) ...@@ -377,7 +375,7 @@ static void insert_super(struct super_block *s, struct file_system_type *type)
get_filesystem(type); get_filesystem(type);
} }
void put_unnamed_dev(kdev_t dev); /* should become static */ static void put_anon_dev(kdev_t dev);
/** /**
* remove_super - makes superblock unreachable * remove_super - makes superblock unreachable
...@@ -407,7 +405,7 @@ static void remove_super(struct super_block *s) ...@@ -407,7 +405,7 @@ static void remove_super(struct super_block *s)
if (bdev) if (bdev)
blkdev_put(bdev, BDEV_FS); blkdev_put(bdev, BDEV_FS);
else else
put_unnamed_dev(dev); put_anon_dev(dev);
} }
struct vfsmount *alloc_vfsmnt(void); struct vfsmount *alloc_vfsmnt(void);
...@@ -530,36 +528,122 @@ asmlinkage long sys_ustat(dev_t dev, struct ustat * ubuf) ...@@ -530,36 +528,122 @@ asmlinkage long sys_ustat(dev_t dev, struct ustat * ubuf)
return err; return err;
} }
/**
* do_remount_sb - asks filesystem to change mount options.
* @sb: superblock in question
* @flags: numeric part of options
* @data: the rest of options
*
* Alters the mount options of a mounted file system.
*/
int do_remount_sb(struct super_block *sb, int flags, void *data)
{
int retval;
if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev))
return -EACCES;
/*flags |= MS_RDONLY;*/
if (flags & MS_RDONLY)
acct_auto_close(sb->s_dev);
shrink_dcache_sb(sb);
fsync_super(sb);
/* If we are remounting RDONLY, make sure there are no rw files open */
if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
if (!fs_may_remount_ro(sb))
return -EBUSY;
if (sb->s_op && sb->s_op->remount_fs) {
lock_super(sb);
retval = sb->s_op->remount_fs(sb, &flags, data);
unlock_super(sb);
if (retval)
return retval;
}
sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
return 0;
}
/* /*
* Unnamed block devices are dummy devices used by virtual * Unnamed block devices are dummy devices used by virtual
* filesystems which don't use real block-devices. -- jrs * filesystems which don't use real block-devices. -- jrs
*/ */
static unsigned long unnamed_dev_in_use[256/(8*sizeof(unsigned long))]; enum {Max_anon = 256};
static unsigned long unnamed_dev_in_use[Max_anon/(8*sizeof(unsigned long))];
static spinlock_t unnamed_dev_lock = SPIN_LOCK_UNLOCKED;/* protects the above */
kdev_t get_unnamed_dev(void) /**
* put_anon_dev - release anonymous device number.
* @dev: device in question
*/
static void put_anon_dev(kdev_t dev)
{ {
int i; spin_lock(&unnamed_dev_lock);
clear_bit(MINOR(dev), unnamed_dev_in_use);
for (i = 1; i < 256; i++) { spin_unlock(&unnamed_dev_lock);
if (!test_and_set_bit(i,unnamed_dev_in_use))
return MKDEV(UNNAMED_MAJOR, i);
}
return 0;
} }
void put_unnamed_dev(kdev_t dev) /**
* get_anon_super - allocate a superblock for non-device fs
* @type: filesystem type
* @compare: check if existing superblock is what we want
* @data: argument for @compare.
*
* get_anon_super is a helper for non-blockdevice filesystems.
* It either finds and returns one of the superblocks of given type
* (if it can find one that would satisfy caller) or creates a new
* one. In the either case we return an active reference to superblock
* with ->s_umount locked. If superblock is new it gets a new
* anonymous device allocated for it and is inserted into lists -
* other initialization is left to caller.
*
* Rather than duplicating all that logics every time when
* we want something that doesn't fit "nodev" and "single" we pull
* the relevant code into common helper and let get_sb_...() call
* it.
*
* NB: get_sb_...() is going to become an fs type method, with
* current ->read_super() becoming a callback used by common instances.
*/
struct super_block *get_anon_super(struct file_system_type *type,
int (*compare)(struct super_block *,void *), void *data)
{ {
if (!dev || MAJOR(dev) != UNNAMED_MAJOR) struct super_block *s = alloc_super();
return; kdev_t dev;
if (test_and_clear_bit(MINOR(dev), unnamed_dev_in_use)) struct list_head *p;
return;
printk("VFS: put_unnamed_dev: freeing unused device %s\n", if (!s)
kdevname(dev)); return ERR_PTR(-ENOMEM);
spin_lock(&unnamed_dev_lock);
dev = find_first_zero_bit(unnamed_dev_in_use, Max_anon);
if (dev == Max_anon) {
spin_unlock(&unnamed_dev_lock);
destroy_super(s);
return ERR_PTR(-EMFILE);
}
set_bit(dev, unnamed_dev_in_use);
spin_unlock(&unnamed_dev_lock);
retry:
spin_lock(&sb_lock);
if (compare) list_for_each(p, &type->fs_supers) {
struct super_block *old;
old = list_entry(p, struct super_block, s_instances);
if (!compare(old, data))
continue;
if (!grab_super(old))
goto retry;
destroy_super(s);
return old;
}
s->s_dev = dev;
insert_super(s, type);
return s;
} }
static struct super_block *get_sb_bdev(struct file_system_type *fs_type, static struct super_block *get_sb_bdev(struct file_system_type *fs_type,
char *dev_name, int flags, void * data) int flags, char *dev_name, void * data)
{ {
struct inode *inode; struct inode *inode;
struct block_device *bdev; struct block_device *bdev;
...@@ -598,17 +682,13 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type, ...@@ -598,17 +682,13 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type,
goto out; goto out;
check_disk_change(dev); check_disk_change(dev);
error = -EACCES; error = -EACCES;
if (!(flags & MS_RDONLY) && is_read_only(dev)) { if (!(flags & MS_RDONLY) && is_read_only(dev))
blkdev_put(bdev, BDEV_FS); goto out1;
goto out;
}
error = -ENOMEM; error = -ENOMEM;
s = alloc_super(); s = alloc_super();
if (!s) { if (!s)
blkdev_put(bdev, BDEV_FS); goto out1;
goto out;
}
error = -EBUSY; error = -EBUSY;
restart: restart:
...@@ -622,8 +702,7 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type, ...@@ -622,8 +702,7 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type,
((flags ^ old->s_flags) & MS_RDONLY)) { ((flags ^ old->s_flags) & MS_RDONLY)) {
spin_unlock(&sb_lock); spin_unlock(&sb_lock);
destroy_super(s); destroy_super(s);
blkdev_put(bdev, BDEV_FS); goto out1;
goto out;
} }
if (!grab_super(old)) if (!grab_super(old))
goto restart; goto restart;
...@@ -636,97 +715,103 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type, ...@@ -636,97 +715,103 @@ static struct super_block *get_sb_bdev(struct file_system_type *fs_type,
s->s_bdev = bdev; s->s_bdev = bdev;
s->s_flags = flags; s->s_flags = flags;
insert_super(s, fs_type); insert_super(s, fs_type);
error = -EINVAL;
lock_super(s);
if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0)) if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0))
goto out_fail; goto Einval;
s->s_flags |= MS_ACTIVE; s->s_flags |= MS_ACTIVE;
unlock_super(s);
path_release(&nd); path_release(&nd);
return s; return s;
out_fail: Einval:
unlock_super(s);
deactivate_super(s); deactivate_super(s);
remove_super(s); remove_super(s);
error = -EINVAL;
goto out;
out1:
blkdev_put(bdev, BDEV_FS);
out: out:
path_release(&nd); path_release(&nd);
return ERR_PTR(error); return ERR_PTR(error);
} }
static struct super_block *get_sb_nodev(struct file_system_type *fs_type, static struct super_block *get_sb_nodev(struct file_system_type *fs_type,
int flags, void * data) int flags, char *dev_name, void *data)
{ {
struct super_block *s = alloc_super(); struct super_block *s = get_anon_super(fs_type, NULL, NULL);
if (!s) if (IS_ERR(s))
return ERR_PTR(-ENOMEM);
s->s_dev = get_unnamed_dev();
if (!s->s_dev) {
destroy_super(s);
return ERR_PTR(-EMFILE);
}
s->s_flags = flags;
spin_lock(&sb_lock);
insert_super(s, fs_type);
lock_super(s);
if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0))
goto out_fail;
s->s_flags |= MS_ACTIVE;
unlock_super(s);
return s; return s;
out_fail: s->s_flags = flags;
unlock_super(s); if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0)) {
deactivate_super(s); deactivate_super(s);
remove_super(s); remove_super(s);
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
}
s->s_flags |= MS_ACTIVE;
return s;
}
static int compare_single(struct super_block *s, void *p)
{
return 1;
} }
static struct super_block *get_sb_single(struct file_system_type *fs_type, static struct super_block *get_sb_single(struct file_system_type *fs_type,
int flags, void *data) int flags, char *dev_name, void *data)
{ {
struct super_block * s = alloc_super(); struct super_block *s = get_anon_super(fs_type, compare_single, NULL);
if (!s)
return ERR_PTR(-ENOMEM);
/*
* Get the superblock of kernel-wide instance, but
* keep the reference to fs_type.
*/
retry:
spin_lock(&sb_lock);
if (!list_empty(&fs_type->fs_supers)) {
struct super_block *old;
old = list_entry(fs_type->fs_supers.next, struct super_block,
s_instances);
if (!grab_super(old))
goto retry;
destroy_super(s);
do_remount_sb(old, flags, data);
return old;
} else {
s->s_dev = get_unnamed_dev();
if (!s->s_dev) {
spin_unlock(&sb_lock);
destroy_super(s);
return ERR_PTR(-EMFILE);
}
s->s_flags = flags;
insert_super(s, fs_type);
lock_super(s);
if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0))
goto out_fail;
s->s_flags |= MS_ACTIVE;
unlock_super(s);
return s;
out_fail: if (IS_ERR(s))
unlock_super(s); return s;
if (!s->s_root) {
s->s_flags = flags;
if (!fs_type->read_super(s, data, flags & MS_VERBOSE ? 1 : 0)) {
deactivate_super(s); deactivate_super(s);
remove_super(s); remove_super(s);
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
} }
s->s_flags |= MS_ACTIVE;
}
do_remount_sb(s, flags, data);
return s;
}
struct vfsmount *
do_kern_mount(const char *fstype, int flags, char *name, void *data)
{
struct file_system_type *type = get_fs_type(fstype);
struct super_block *sb = ERR_PTR(-ENOMEM);
struct vfsmount *mnt;
if (!type)
return ERR_PTR(-ENODEV);
mnt = alloc_vfsmnt();
if (!mnt)
goto out;
set_devname(mnt, name);
if (type->fs_flags & FS_REQUIRES_DEV)
sb = get_sb_bdev(type, flags, name, data);
else if (type->fs_flags & FS_SINGLE)
sb = get_sb_single(type, flags, name, data);
else
sb = get_sb_nodev(type, flags, name, data);
if (IS_ERR(sb))
goto out_mnt;
if (type->fs_flags & FS_NOMOUNT)
sb->s_flags |= MS_NOUSER;
mnt->mnt_sb = sb;
mnt->mnt_root = dget(sb->s_root);
mnt->mnt_mountpoint = sb->s_root;
mnt->mnt_parent = mnt;
up_write(&sb->s_umount);
put_filesystem(type);
return mnt;
out_mnt:
free_vfsmnt(mnt);
out:
put_filesystem(type);
return (struct vfsmount *)sb;
} }
void kill_super(struct super_block *sb) void kill_super(struct super_block *sb)
...@@ -739,7 +824,6 @@ void kill_super(struct super_block *sb) ...@@ -739,7 +824,6 @@ void kill_super(struct super_block *sb)
return; return;
down_write(&sb->s_umount); down_write(&sb->s_umount);
lock_kernel();
sb->s_root = NULL; sb->s_root = NULL;
/* Need to clean after the sucker */ /* Need to clean after the sucker */
if (fs->fs_flags & FS_LITTER) if (fs->fs_flags & FS_LITTER)
...@@ -748,6 +832,7 @@ void kill_super(struct super_block *sb) ...@@ -748,6 +832,7 @@ void kill_super(struct super_block *sb)
dput(root); dput(root);
fsync_super(sb); fsync_super(sb);
lock_super(sb); lock_super(sb);
lock_kernel();
sb->s_flags &= ~MS_ACTIVE; sb->s_flags &= ~MS_ACTIVE;
invalidate_inodes(sb); /* bad name - it should be evict_inodes() */ invalidate_inodes(sb); /* bad name - it should be evict_inodes() */
if (sop) { if (sop) {
...@@ -768,96 +853,7 @@ void kill_super(struct super_block *sb) ...@@ -768,96 +853,7 @@ void kill_super(struct super_block *sb)
remove_super(sb); remove_super(sb);
} }
/*
* Alters the mount flags of a mounted file system. Only the mount point
* is used as a reference - file system type and the device are ignored.
*/
int do_remount_sb(struct super_block *sb, int flags, void *data)
{
int retval;
if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev))
return -EACCES;
/*flags |= MS_RDONLY;*/
if (flags & MS_RDONLY)
acct_auto_close(sb->s_dev);
shrink_dcache_sb(sb);
fsync_super(sb);
/* If we are remounting RDONLY, make sure there are no rw files open */
if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
if (!fs_may_remount_ro(sb))
return -EBUSY;
if (sb->s_op && sb->s_op->remount_fs) {
lock_super(sb);
retval = sb->s_op->remount_fs(sb, &flags, data);
unlock_super(sb);
if (retval)
return retval;
}
sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
/*
* We can't invalidate inodes as we can loose data when remounting
* (someone might manage to alter data while we are waiting in lock_super()
* or in foo_remount_fs()))
*/
return 0;
}
struct vfsmount *do_kern_mount(char *type, int flags, char *name, void *data)
{
struct file_system_type * fstype;
struct vfsmount *mnt = NULL;
struct super_block *sb;
if (!type || !memchr(type, 0, PAGE_SIZE))
return ERR_PTR(-EINVAL);
/* we need capabilities... */
if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
/* ... filesystem driver... */
fstype = get_fs_type(type);
if (!fstype)
return ERR_PTR(-ENODEV);
/* ... allocated vfsmount... */
mnt = alloc_vfsmnt();
if (!mnt) {
mnt = ERR_PTR(-ENOMEM);
goto fs_out;
}
set_devname(mnt, name);
/* get locked superblock */
if (fstype->fs_flags & FS_REQUIRES_DEV)
sb = get_sb_bdev(fstype, name, flags, data);
else if (fstype->fs_flags & FS_SINGLE)
sb = get_sb_single(fstype, flags, data);
else
sb = get_sb_nodev(fstype, flags, data);
if (IS_ERR(sb)) {
free_vfsmnt(mnt);
mnt = (struct vfsmount *)sb;
goto fs_out;
}
if (fstype->fs_flags & FS_NOMOUNT)
sb->s_flags |= MS_NOUSER;
mnt->mnt_sb = sb;
mnt->mnt_root = dget(sb->s_root);
mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
up_write(&sb->s_umount);
fs_out:
put_filesystem(fstype);
return mnt;
}
struct vfsmount *kern_mount(struct file_system_type *type) struct vfsmount *kern_mount(struct file_system_type *type)
{ {
return do_kern_mount((char *)type->name, 0, (char *)type->name, NULL); return do_kern_mount(type->name, 0, (char *)type->name, NULL);
} }
...@@ -20,7 +20,6 @@ extern void add_blkdev_randomness(int major); ...@@ -20,7 +20,6 @@ extern void add_blkdev_randomness(int major);
#define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */ #define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */
extern unsigned long initrd_start,initrd_end; extern unsigned long initrd_start,initrd_end;
extern int mount_initrd; /* zero if initrd should not be mounted */
extern int initrd_below_start_ok; /* 1 if it is not an error if initrd_start < memory_start */ extern int initrd_below_start_ok; /* 1 if it is not an error if initrd_start < memory_start */
extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */ extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */
extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */ extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */
...@@ -55,6 +54,9 @@ extern inline struct request *elv_next_request(request_queue_t *q) ...@@ -55,6 +54,9 @@ extern inline struct request *elv_next_request(request_queue_t *q)
while ((rq = __elv_next_request(q))) { while ((rq = __elv_next_request(q))) {
rq->flags |= REQ_STARTED; rq->flags |= REQ_STARTED;
if (&rq->queuelist == q->last_merge)
q->last_merge = NULL;
if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn) if ((rq->flags & REQ_DONTPREP) || !q->prep_rq_fn)
break; break;
......
...@@ -129,17 +129,18 @@ enum blk_queue_state { ...@@ -129,17 +129,18 @@ enum blk_queue_state {
struct request_queue struct request_queue
{ {
/*
* the queue request freelist, one for reads and one for writes
*/
struct request_list rq[2];
/* /*
* Together with queue_head for cacheline sharing * Together with queue_head for cacheline sharing
*/ */
struct list_head queue_head; struct list_head queue_head;
struct list_head *last_merge;
elevator_t elevator; elevator_t elevator;
/*
* the queue request freelist, one for reads and one for writes
*/
struct request_list rq[2];
request_fn_proc *request_fn; request_fn_proc *request_fn;
merge_request_fn *back_merge_fn; merge_request_fn *back_merge_fn;
merge_request_fn *front_merge_fn; merge_request_fn *front_merge_fn;
...@@ -213,27 +214,25 @@ struct request_queue ...@@ -213,27 +214,25 @@ struct request_queue
extern unsigned long blk_max_low_pfn, blk_max_pfn; extern unsigned long blk_max_low_pfn, blk_max_pfn;
#define BLK_BOUNCE_HIGH (blk_max_low_pfn << PAGE_SHIFT) /*
#define BLK_BOUNCE_ANY (blk_max_pfn << PAGE_SHIFT) * standard bounce addresses:
*
* BLK_BOUNCE_HIGH : bounce all highmem pages
* BLK_BOUNCE_ANY : don't bounce anything
* BLK_BOUNCE_ISA : bounce pages above ISA DMA boundary
*/
#define BLK_BOUNCE_HIGH ((blk_max_low_pfn + 1) << PAGE_SHIFT)
#define BLK_BOUNCE_ANY ((blk_max_pfn + 1) << PAGE_SHIFT)
#define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD) #define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD)
#ifdef CONFIG_HIGHMEM extern int init_emergency_isa_pool(void);
extern void create_bounce(unsigned long pfn, int gfp, struct bio **bio_orig); extern void create_bounce(unsigned long pfn, int gfp, struct bio **bio_orig);
extern void init_emergency_isa_pool(void);
extern inline void blk_queue_bounce(request_queue_t *q, struct bio **bio) extern inline void blk_queue_bounce(request_queue_t *q, struct bio **bio)
{ {
create_bounce(q->bounce_pfn, q->bounce_gfp, bio); create_bounce(q->bounce_pfn, q->bounce_gfp, bio);
} }
#else /* CONFIG_HIGHMEM */
#define blk_queue_bounce(q, bio) do { } while (0)
#define init_emergency_isa_pool() do { } while (0)
#endif /* CONFIG_HIGHMEM */
#define rq_for_each_bio(bio, rq) \ #define rq_for_each_bio(bio, rq) \
if ((rq->bio)) \ if ((rq->bio)) \
for (bio = (rq)->bio; bio; bio = bio->bi_next) for (bio = (rq)->bio; bio; bio = bio->bi_next)
...@@ -275,9 +274,8 @@ extern void blk_plug_device(request_queue_t *); ...@@ -275,9 +274,8 @@ extern void blk_plug_device(request_queue_t *);
extern void blk_recount_segments(request_queue_t *, struct bio *); extern void blk_recount_segments(request_queue_t *, struct bio *);
extern inline int blk_phys_contig_segment(request_queue_t *q, struct bio *, struct bio *); extern inline int blk_phys_contig_segment(request_queue_t *q, struct bio *, struct bio *);
extern inline int blk_hw_contig_segment(request_queue_t *q, struct bio *, struct bio *); extern inline int blk_hw_contig_segment(request_queue_t *q, struct bio *, struct bio *);
extern void blk_queue_assign_lock(request_queue_t *q, spinlock_t *);
extern int block_ioctl(kdev_t, unsigned int, unsigned long); extern int block_ioctl(kdev_t, unsigned int, unsigned long);
extern int ll_10byte_cmd_build(request_queue_t *, struct request *);
/* /*
* Access functions for manipulating queue properties * Access functions for manipulating queue properties
...@@ -292,6 +290,9 @@ extern void blk_queue_max_hw_segments(request_queue_t *q, unsigned short); ...@@ -292,6 +290,9 @@ extern void blk_queue_max_hw_segments(request_queue_t *q, unsigned short);
extern void blk_queue_max_segment_size(request_queue_t *q, unsigned int); extern void blk_queue_max_segment_size(request_queue_t *q, unsigned int);
extern void blk_queue_hardsect_size(request_queue_t *q, unsigned short); extern void blk_queue_hardsect_size(request_queue_t *q, unsigned short);
extern void blk_queue_segment_boundary(request_queue_t *q, unsigned long); extern void blk_queue_segment_boundary(request_queue_t *q, unsigned long);
extern void blk_queue_assign_lock(request_queue_t *q, spinlock_t *);
extern void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn);
extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *); extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *);
extern void blk_dump_rq_flags(struct request *, char *); extern void blk_dump_rq_flags(struct request *, char *);
extern void generic_unplug_device(void *); extern void generic_unplug_device(void *);
......
#ifndef _LINUX_BLKDEV_H
#define _LINUX_BLKDEV_H
#include <linux/major.h>
#include <linux/sched.h>
#include <linux/genhd.h>
#include <linux/tqueue.h>
#include <linux/list.h>
#include <linux/mm.h>
#include <asm/scatterlist.h>
struct request_queue;
typedef struct request_queue request_queue_t;
struct elevator_s;
typedef struct elevator_s elevator_t;
struct request_list {
unsigned int count;
struct list_head free;
wait_queue_head_t wait;
};
struct request {
struct list_head queuelist; /* looking for ->queue? you must _not_
* access it directly, use
* blkdev_dequeue_request! */
int elevator_sequence;
unsigned char cmd[16];
unsigned long flags; /* see REQ_ bits below */
int rq_status; /* should split this into a few status bits */
kdev_t rq_dev;
int errors;
sector_t sector;
unsigned long nr_sectors;
unsigned long hard_sector; /* the hard_* are block layer
* internals, no driver should
* touch them
*/
unsigned long hard_nr_sectors;
/* Number of scatter-gather DMA addr+len pairs after
* physical address coalescing is performed.
*/
unsigned short nr_phys_segments;
/* Number of scatter-gather addr+len pairs after
* physical and DMA remapping hardware coalescing is performed.
* This is the number of scatter-gather entries the driver
* will actually have to deal with after DMA mapping is done.
*/
unsigned short nr_hw_segments;
unsigned int current_nr_sectors;
unsigned int hard_cur_sectors;
void *special;
char *buffer;
struct completion *waiting;
struct bio *bio, *biotail;
request_queue_t *q;
struct request_list *rl;
};
/*
* first three bits match BIO_RW* bits, important
*/
enum rq_flag_bits {
__REQ_RW, /* not set, read. set, write */
__REQ_RW_AHEAD, /* READA */
__REQ_BARRIER, /* may not be passed */
__REQ_CMD, /* is a regular fs rw request */
__REQ_NOMERGE, /* don't touch this for merging */
__REQ_STARTED, /* drive already may have started this one */
__REQ_DONTPREP, /* don't call prep for this one */
/*
* for IDE
*/
__REQ_DRIVE_CMD,
__REQ_DRIVE_TASK,
__REQ_PC, /* packet command (special) */
__REQ_BLOCK_PC, /* queued down pc from block layer */
__REQ_SENSE, /* sense retrival */
__REQ_SPECIAL, /* driver special command */
__REQ_NR_BITS, /* stops here */
};
#define REQ_RW (1 << __REQ_RW)
#define REQ_RW_AHEAD (1 << __REQ_RW_AHEAD)
#define REQ_BARRIER (1 << __REQ_BARRIER)
#define REQ_CMD (1 << __REQ_CMD)
#define REQ_NOMERGE (1 << __REQ_NOMERGE)
#define REQ_STARTED (1 << __REQ_STARTED)
#define REQ_DONTPREP (1 << __REQ_DONTPREP)
#define REQ_DRIVE_CMD (1 << __REQ_DRIVE_CMD)
#define REQ_DRIVE_TASK (1 << __REQ_DRIVE_TASK)
#define REQ_PC (1 << __REQ_PC)
#define REQ_SENSE (1 << __REQ_SENSE)
#define REQ_BLOCK_PC (1 << __REQ_BLOCK_PC)
#define REQ_SPECIAL (1 << __REQ_SPECIAL)
#include <linux/elevator.h>
typedef int (merge_request_fn) (request_queue_t *, struct request *,
struct bio *);
typedef int (merge_requests_fn) (request_queue_t *, struct request *,
struct request *);
typedef void (request_fn_proc) (request_queue_t *q);
typedef request_queue_t * (queue_proc) (kdev_t dev);
typedef int (make_request_fn) (request_queue_t *q, struct bio *bio);
typedef int (prep_rq_fn) (request_queue_t *, struct request *);
typedef void (unplug_device_fn) (void *q);
enum blk_queue_state {
Queue_down,
Queue_up,
};
/*
* Default nr free requests per queue, ll_rw_blk will scale it down
* according to available RAM at init time
*/
#define QUEUE_NR_REQUESTS 8192
struct request_queue
{
/*
* the queue request freelist, one for reads and one for writes
*/
struct request_list rq[2];
/*
* Together with queue_head for cacheline sharing
*/
struct list_head queue_head;
elevator_t elevator;
request_fn_proc *request_fn;
merge_request_fn *back_merge_fn;
merge_request_fn *front_merge_fn;
merge_requests_fn *merge_requests_fn;
make_request_fn *make_request_fn;
prep_rq_fn *prep_rq_fn;
/*
* The queue owner gets to use this for whatever they like.
* ll_rw_blk doesn't touch it.
*/
void *queuedata;
/*
* queue needs bounce pages for pages above this limit
*/
unsigned long bounce_pfn;
int bounce_gfp;
/*
* This is used to remove the plug when tq_disk runs.
*/
struct tq_struct plug_tq;
/*
* various queue flags, see QUEUE_* below
*/
unsigned long queue_flags;
/*
* protects queue structures from reentrancy
*/
spinlock_t *queue_lock;
/*
* queue settings
*/
unsigned short max_sectors;
unsigned short max_phys_segments;
unsigned short max_hw_segments;
unsigned short hardsect_size;
unsigned int max_segment_size;
unsigned long seg_boundary_mask;
wait_queue_head_t queue_wait;
};
#define RQ_INACTIVE (-1)
#define RQ_ACTIVE 1
#define RQ_SCSI_BUSY 0xffff
#define RQ_SCSI_DONE 0xfffe
#define RQ_SCSI_DISCONNECTING 0xffe0
#define QUEUE_FLAG_PLUGGED 0 /* queue is plugged */
#define QUEUE_FLAG_NOSPLIT 1 /* can process bio over several goes */
#define QUEUE_FLAG_CLUSTER 2 /* cluster several segments into 1 */
#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
#define blk_mark_plugged(q) set_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
#define blk_queue_empty(q) elv_queue_empty(q)
#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
#define rq_data_dir(rq) ((rq)->flags & 1)
/*
* noop, requests are automagically marked as active/inactive by I/O
* scheduler -- see elv_next_request
*/
#define blk_queue_headactive(q, head_active)
extern unsigned long blk_max_low_pfn, blk_max_pfn;
#define BLK_BOUNCE_HIGH (blk_max_low_pfn << PAGE_SHIFT)
#define BLK_BOUNCE_ANY (blk_max_pfn << PAGE_SHIFT)
#define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD)
#ifdef CONFIG_HIGHMEM
extern void create_bounce(unsigned long pfn, int gfp, struct bio **bio_orig);
extern void init_emergency_isa_pool(void);
extern inline void blk_queue_bounce(request_queue_t *q, struct bio **bio)
{
create_bounce(q->bounce_pfn, q->bounce_gfp, bio);
}
#else /* CONFIG_HIGHMEM */
#define blk_queue_bounce(q, bio) do { } while (0)
#define init_emergency_isa_pool() do { } while (0)
#endif /* CONFIG_HIGHMEM */
#define rq_for_each_bio(bio, rq) \
if ((rq->bio)) \
for (bio = (rq)->bio; bio; bio = bio->bi_next)
struct blk_dev_struct {
/*
* queue_proc has to be atomic
*/
request_queue_t request_queue;
queue_proc *queue;
void *data;
};
struct sec_size {
unsigned block_size;
unsigned block_size_bits;
};
/*
* Used to indicate the default queue for drivers that don't bother
* to implement multiple queues. We have this access macro here
* so as to eliminate the need for each and every block device
* driver to know about the internal structure of blk_dev[].
*/
#define BLK_DEFAULT_QUEUE(_MAJOR) &blk_dev[_MAJOR].request_queue
extern struct sec_size * blk_sec[MAX_BLKDEV];
extern struct blk_dev_struct blk_dev[MAX_BLKDEV];
extern void grok_partitions(kdev_t dev, long size);
extern int wipe_partitions(kdev_t dev);
extern void register_disk(struct gendisk *dev, kdev_t first, unsigned minors, struct block_device_operations *ops, long size);
extern void generic_make_request(struct bio *bio);
extern inline request_queue_t *blk_get_queue(kdev_t dev);
extern void blkdev_release_request(struct request *);
extern void blk_attempt_remerge(request_queue_t *, struct request *);
extern struct request *blk_get_request(request_queue_t *, int, int);
extern void blk_put_request(struct request *);
extern void blk_plug_device(request_queue_t *);
extern void blk_recount_segments(request_queue_t *, struct bio *);
extern inline int blk_phys_contig_segment(request_queue_t *q, struct bio *, struct bio *);
extern inline int blk_hw_contig_segment(request_queue_t *q, struct bio *, struct bio *);
extern void blk_queue_assign_lock(request_queue_t *q, spinlock_t *);
extern int block_ioctl(kdev_t, unsigned int, unsigned long);
/*
* Access functions for manipulating queue properties
*/
extern int blk_init_queue(request_queue_t *, request_fn_proc *, spinlock_t *);
extern void blk_cleanup_queue(request_queue_t *);
extern void blk_queue_make_request(request_queue_t *, make_request_fn *);
extern void blk_queue_bounce_limit(request_queue_t *, u64);
extern void blk_queue_max_sectors(request_queue_t *q, unsigned short);
extern void blk_queue_max_phys_segments(request_queue_t *q, unsigned short);
extern void blk_queue_max_hw_segments(request_queue_t *q, unsigned short);
extern void blk_queue_max_segment_size(request_queue_t *q, unsigned int);
extern void blk_queue_hardsect_size(request_queue_t *q, unsigned short);
extern void blk_queue_segment_boundary(request_queue_t *q, unsigned long);
extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *);
extern void blk_dump_rq_flags(struct request *, char *);
extern void generic_unplug_device(void *);
extern int * blk_size[MAX_BLKDEV];
extern int * blksize_size[MAX_BLKDEV];
extern int * max_readahead[MAX_BLKDEV];
#define MAX_PHYS_SEGMENTS 128
#define MAX_HW_SEGMENTS 128
#define MAX_SECTORS 255
#define MAX_SEGMENT_SIZE 65536
/* read-ahead in pages.. */
#define MAX_READAHEAD 31
#define MIN_READAHEAD 3
#define blkdev_entry_to_request(entry) list_entry((entry), struct request, queuelist)
#define blkdev_entry_next_request(entry) blkdev_entry_to_request((entry)->next)
#define blkdev_entry_prev_request(entry) blkdev_entry_to_request((entry)->prev)
#define blkdev_next_request(req) blkdev_entry_to_request((req)->queuelist.next)
#define blkdev_prev_request(req) blkdev_entry_to_request((req)->queuelist.prev)
extern void drive_stat_acct(struct request *, int, int);
extern inline void blk_clear(int major)
{
blk_size[major] = NULL;
#if 0
blk_size_in_bytes[major] = NULL;
#endif
blksize_size[major] = NULL;
max_readahead[major] = NULL;
read_ahead[major] = 0;
}
extern inline int get_hardsect_size(kdev_t dev)
{
request_queue_t *q = blk_get_queue(dev);
int retval = 512;
if (q && q->hardsect_size)
retval = q->hardsect_size;
return retval;
}
#define blk_finished_io(nsects) do { } while (0)
#define blk_started_io(nsects) do { } while (0)
extern inline unsigned int blksize_bits(unsigned int size)
{
unsigned int bits = 8;
do {
bits++;
size >>= 1;
} while (size > 256);
return bits;
}
extern inline unsigned int block_size(kdev_t dev)
{
int retval = BLOCK_SIZE;
int major = MAJOR(dev);
if (blksize_size[major]) {
int minor = MINOR(dev);
if (blksize_size[major][minor])
retval = blksize_size[major][minor];
}
return retval;
}
#endif
#ifndef _LINUX_ELEVATOR_H #ifndef _LINUX_ELEVATOR_H
#define _LINUX_ELEVATOR_H #define _LINUX_ELEVATOR_H
typedef void (elevator_fn) (struct request *, elevator_t *,
struct list_head *,
struct list_head *, int);
typedef int (elevator_merge_fn) (request_queue_t *, struct request **, typedef int (elevator_merge_fn) (request_queue_t *, struct request **,
struct list_head *, struct bio *); struct bio *);
typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int); typedef void (elevator_merge_cleanup_fn) (request_queue_t *, struct request *, int);
...@@ -21,8 +17,7 @@ typedef void (elevator_exit_fn) (request_queue_t *, elevator_t *); ...@@ -21,8 +17,7 @@ typedef void (elevator_exit_fn) (request_queue_t *, elevator_t *);
struct elevator_s struct elevator_s
{ {
int read_latency; int latency[2];
int write_latency;
elevator_merge_fn *elevator_merge_fn; elevator_merge_fn *elevator_merge_fn;
elevator_merge_cleanup_fn *elevator_merge_cleanup_fn; elevator_merge_cleanup_fn *elevator_merge_cleanup_fn;
...@@ -35,11 +30,11 @@ struct elevator_s ...@@ -35,11 +30,11 @@ struct elevator_s
elevator_exit_fn *elevator_exit_fn; elevator_exit_fn *elevator_exit_fn;
}; };
int elevator_noop_merge(request_queue_t *, struct request **, struct list_head *, struct bio *); int elevator_noop_merge(request_queue_t *, struct request **, struct bio *);
void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int); void elevator_noop_merge_cleanup(request_queue_t *, struct request *, int);
void elevator_noop_merge_req(struct request *, struct request *); void elevator_noop_merge_req(struct request *, struct request *);
int elevator_linus_merge(request_queue_t *, struct request **, struct list_head *, struct bio *); int elevator_linus_merge(request_queue_t *, struct request **, struct bio *);
void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int); void elevator_linus_merge_cleanup(request_queue_t *, struct request *, int);
void elevator_linus_merge_req(struct request *, struct request *); void elevator_linus_merge_req(struct request *, struct request *);
int elv_linus_init(request_queue_t *, elevator_t *); int elv_linus_init(request_queue_t *, elevator_t *);
...@@ -69,32 +64,7 @@ extern void elevator_exit(request_queue_t *, elevator_t *); ...@@ -69,32 +64,7 @@ extern void elevator_exit(request_queue_t *, elevator_t *);
#define ELEVATOR_FRONT_MERGE 1 #define ELEVATOR_FRONT_MERGE 1
#define ELEVATOR_BACK_MERGE 2 #define ELEVATOR_BACK_MERGE 2
/* #define elevator_request_latency(e, rw) ((e)->latency[(rw) & 1])
* This is used in the elevator algorithm. We don't prioritise reads
* over writes any more --- although reads are more time-critical than
* writes, by treating them equally we increase filesystem throughput.
* This turns out to give better overall performance. -- sct
*/
#define IN_ORDER(s1,s2) \
((((s1)->rq_dev == (s2)->rq_dev && \
(s1)->sector < (s2)->sector)) || \
(s1)->rq_dev < (s2)->rq_dev)
#define BHRQ_IN_ORDER(bh, rq) \
((((bh)->b_rdev == (rq)->rq_dev && \
(bh)->b_rsector < (rq)->sector)) || \
(bh)->b_rdev < (rq)->rq_dev)
static inline int elevator_request_latency(elevator_t * elevator, int rw)
{
int latency;
latency = elevator->read_latency;
if (rw != READ)
latency = elevator->write_latency;
return latency;
}
/* /*
* will change once we move to a more complex data structure than a simple * will change once we move to a more complex data structure than a simple
...@@ -116,9 +86,7 @@ struct elv_linus_data { ...@@ -116,9 +86,7 @@ struct elv_linus_data {
#define ELEVATOR_NOOP \ #define ELEVATOR_NOOP \
((elevator_t) { \ ((elevator_t) { \
0, /* read_latency */ \ { 0, 0}, \
0, /* write_latency */ \
\
elevator_noop_merge, /* elevator_merge_fn */ \ elevator_noop_merge, /* elevator_merge_fn */ \
elevator_noop_merge_cleanup, /* elevator_merge_cleanup_fn */ \ elevator_noop_merge_cleanup, /* elevator_merge_cleanup_fn */ \
elevator_noop_merge_req, /* elevator_merge_req_fn */ \ elevator_noop_merge_req, /* elevator_merge_req_fn */ \
...@@ -130,9 +98,7 @@ struct elv_linus_data { ...@@ -130,9 +98,7 @@ struct elv_linus_data {
#define ELEVATOR_LINUS \ #define ELEVATOR_LINUS \
((elevator_t) { \ ((elevator_t) { \
8192, /* read passovers */ \ { 8192, 16384 }, \
16384, /* write passovers */ \
\
elevator_linus_merge, /* elevator_merge_fn */ \ elevator_linus_merge, /* elevator_merge_fn */ \
elevator_linus_merge_cleanup, /* elevator_merge_cleanup_fn */ \ elevator_linus_merge_cleanup, /* elevator_merge_cleanup_fn */ \
elevator_linus_merge_req, /* elevator_merge_req_fn */ \ elevator_linus_merge_req, /* elevator_merge_req_fn */ \
......
...@@ -240,7 +240,7 @@ struct mdk_personality_s ...@@ -240,7 +240,7 @@ struct mdk_personality_s
int (*stop_resync)(mddev_t *mddev); int (*stop_resync)(mddev_t *mddev);
int (*restart_resync)(mddev_t *mddev); int (*restart_resync)(mddev_t *mddev);
int (*sync_request)(mddev_t *mddev, sector_t sector_nr); int (*sync_request)(mddev_t *mddev, sector_t sector_nr, int go_faster);
}; };
......
...@@ -9,8 +9,8 @@ struct mirror_info { ...@@ -9,8 +9,8 @@ struct mirror_info {
int number; int number;
int raid_disk; int raid_disk;
kdev_t dev; kdev_t dev;
int sect_limit; sector_t head_position;
int head_position; atomic_t nr_pending;
/* /*
* State bits: * State bits:
...@@ -31,23 +31,21 @@ struct r1_private_data_s { ...@@ -31,23 +31,21 @@ struct r1_private_data_s {
int raid_disks; int raid_disks;
int working_disks; int working_disks;
int last_used; int last_used;
sector_t next_sect; sector_t next_seq_sect;
int sect_count;
mdk_thread_t *thread, *resync_thread; mdk_thread_t *thread, *resync_thread;
int resync_mirrors; int resync_mirrors;
mirror_info_t *spare; mirror_info_t *spare;
spinlock_t device_lock; spinlock_t device_lock;
/* for use when syncing mirrors: */ /* for use when syncing mirrors: */
unsigned long start_active, start_ready,
start_pending, start_future; spinlock_t resync_lock;
int cnt_done, cnt_active, cnt_ready, int nr_pending;
cnt_pending, cnt_future; int barrier;
int phase; sector_t next_resync;
int window;
wait_queue_head_t wait_done; wait_queue_head_t wait_idle;
wait_queue_head_t wait_ready; wait_queue_head_t wait_resume;
spinlock_t segment_lock;
mempool_t *r1bio_pool; mempool_t *r1bio_pool;
mempool_t *r1buf_pool; mempool_t *r1buf_pool;
...@@ -62,7 +60,8 @@ typedef struct r1_private_data_s conf_t; ...@@ -62,7 +60,8 @@ typedef struct r1_private_data_s conf_t;
#define mddev_to_conf(mddev) ((conf_t *) mddev->private) #define mddev_to_conf(mddev) ((conf_t *) mddev->private)
/* /*
* this is our 'private' 'collective' RAID1 buffer head. * this is our 'private' RAID1 bio.
*
* it contains information about what kind of IO operations were started * it contains information about what kind of IO operations were started
* for this RAID1 operation, and about their status: * for this RAID1 operation, and about their status:
*/ */
...@@ -83,6 +82,7 @@ struct r1bio_s { ...@@ -83,6 +82,7 @@ struct r1bio_s {
* if the IO is in READ direction, then this bio is used: * if the IO is in READ direction, then this bio is used:
*/ */
struct bio *read_bio; struct bio *read_bio;
int read_disk;
/* /*
* if the IO is in WRITE direction, then multiple bios are used: * if the IO is in WRITE direction, then multiple bios are used:
*/ */
...@@ -94,5 +94,5 @@ struct r1bio_s { ...@@ -94,5 +94,5 @@ struct r1bio_s {
/* bits for r1bio.state */ /* bits for r1bio.state */
#define R1BIO_Uptodate 1 #define R1BIO_Uptodate 1
#define R1BIO_SyncPhase 2
#endif #endif
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include <linux/config.h> #include <linux/config.h>
#include <linux/vt.h> #include <linux/vt.h>
#include <linux/kd.h> #include <linux/kd.h>
#include <linux/tty.h>
/* /*
* Presently, a lot of graphics programs do not restore the contents of * Presently, a lot of graphics programs do not restore the contents of
......
...@@ -3,12 +3,8 @@ ...@@ -3,12 +3,8 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/devfs_fs_kernel.h> #include <linux/devfs_fs_kernel.h>
#include <linux/unistd.h> #include <linux/unistd.h>
#include <linux/string.h>
#include <linux/ctype.h> #include <linux/ctype.h>
#include <linux/init.h>
#include <linux/smp_lock.h>
#include <linux/blk.h> #include <linux/blk.h>
#include <linux/tty.h>
#include <linux/fd.h> #include <linux/fd.h>
#include <linux/nfs_fs.h> #include <linux/nfs_fs.h>
...@@ -18,8 +14,6 @@ ...@@ -18,8 +14,6 @@
#include <linux/ext2_fs.h> #include <linux/ext2_fs.h>
#include <linux/romfs_fs.h> #include <linux/romfs_fs.h>
#include <asm/uaccess.h>
#define BUILD_CRAMDISK #define BUILD_CRAMDISK
extern int get_filesystem_list(char * buf); extern int get_filesystem_list(char * buf);
...@@ -38,12 +32,21 @@ asmlinkage long sys_ioctl(int fd, int cmd, unsigned long arg); ...@@ -38,12 +32,21 @@ asmlinkage long sys_ioctl(int fd, int cmd, unsigned long arg);
#ifdef CONFIG_BLK_DEV_INITRD #ifdef CONFIG_BLK_DEV_INITRD
unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */ unsigned int real_root_dev; /* do_proc_dointvec cannot handle kdev_t */
#endif static int __initdata mount_initrd = 1;
#ifdef CONFIG_BLK_DEV_RAM
extern int rd_doload; static int __init no_initrd(char *str)
{
mount_initrd = 0;
return 1;
}
__setup("noinitrd", no_initrd);
#else #else
static int rd_doload = 0; static int __initdata mount_initrd = 0;
#endif #endif
int __initdata rd_doload; /* 1 = load RAM disk, 0 = don't load */
int root_mountflags = MS_RDONLY | MS_VERBOSE; int root_mountflags = MS_RDONLY | MS_VERBOSE;
static char root_device_name[64]; static char root_device_name[64];
...@@ -52,6 +55,13 @@ kdev_t ROOT_DEV; ...@@ -52,6 +55,13 @@ kdev_t ROOT_DEV;
static int do_devfs = 0; static int do_devfs = 0;
static int __init load_ramdisk(char *str)
{
rd_doload = simple_strtol(str,NULL,0) & 3;
return 1;
}
__setup("load_ramdisk=", load_ramdisk);
static int __init readonly(char *str) static int __init readonly(char *str)
{ {
if (*str) if (*str)
...@@ -371,6 +381,24 @@ static void __init change_floppy(char *fmt, ...) ...@@ -371,6 +381,24 @@ static void __init change_floppy(char *fmt, ...)
#ifdef CONFIG_BLK_DEV_RAM #ifdef CONFIG_BLK_DEV_RAM
int __initdata rd_prompt = 1; /* 1 = prompt for RAM disk, 0 = don't prompt */
static int __init prompt_ramdisk(char *str)
{
rd_prompt = simple_strtol(str,NULL,0) & 1;
return 1;
}
__setup("prompt_ramdisk=", prompt_ramdisk);
int __initdata rd_image_start; /* starting block # of image */
static int __init ramdisk_start_setup(char *str)
{
rd_image_start = simple_strtol(str,NULL,0);
return 1;
}
__setup("ramdisk_start=", ramdisk_start_setup);
static int __init crd_load(int in_fd, int out_fd); static int __init crd_load(int in_fd, int out_fd);
/* /*
...@@ -588,7 +616,6 @@ static int __init rd_load_image(char *from) ...@@ -588,7 +616,6 @@ static int __init rd_load_image(char *from)
static int __init rd_load_disk(int n) static int __init rd_load_disk(int n)
{ {
#ifdef CONFIG_BLK_DEV_RAM #ifdef CONFIG_BLK_DEV_RAM
extern int rd_prompt;
if (rd_prompt) if (rd_prompt)
change_floppy("root floppy disk to be loaded into RAM disk"); change_floppy("root floppy disk to be loaded into RAM disk");
create_dev("/dev/ram", MKDEV(RAMDISK_MAJOR, n), NULL); create_dev("/dev/ram", MKDEV(RAMDISK_MAJOR, n), NULL);
...@@ -715,13 +742,10 @@ static int __init initrd_load(void) ...@@ -715,13 +742,10 @@ static int __init initrd_load(void)
*/ */
void prepare_namespace(void) void prepare_namespace(void)
{ {
int do_initrd = 0;
int is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR; int is_floppy = MAJOR(ROOT_DEV) == FLOPPY_MAJOR;
#ifdef CONFIG_BLK_DEV_INITRD #ifdef CONFIG_BLK_DEV_INITRD
if (!initrd_start) if (!initrd_start)
mount_initrd = 0; mount_initrd = 0;
if (mount_initrd)
do_initrd = 1;
real_root_dev = ROOT_DEV; real_root_dev = ROOT_DEV;
#endif #endif
sys_mkdir("/dev", 0700); sys_mkdir("/dev", 0700);
...@@ -732,7 +756,7 @@ void prepare_namespace(void) ...@@ -732,7 +756,7 @@ void prepare_namespace(void)
#endif #endif
create_dev("/dev/root", ROOT_DEV, NULL); create_dev("/dev/root", ROOT_DEV, NULL);
if (do_initrd) { if (mount_initrd) {
if (initrd_load() && ROOT_DEV != MKDEV(RAMDISK_MAJOR, 0)) { if (initrd_load() && ROOT_DEV != MKDEV(RAMDISK_MAJOR, 0)) {
handle_initrd(); handle_initrd();
goto out; goto out;
......
...@@ -116,11 +116,11 @@ EXPORT_SYMBOL(vmtruncate); ...@@ -116,11 +116,11 @@ EXPORT_SYMBOL(vmtruncate);
EXPORT_SYMBOL(find_vma); EXPORT_SYMBOL(find_vma);
EXPORT_SYMBOL(get_unmapped_area); EXPORT_SYMBOL(get_unmapped_area);
EXPORT_SYMBOL(init_mm); EXPORT_SYMBOL(init_mm);
EXPORT_SYMBOL(create_bounce);
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
EXPORT_SYMBOL(kmap_high); EXPORT_SYMBOL(kmap_high);
EXPORT_SYMBOL(kunmap_high); EXPORT_SYMBOL(kunmap_high);
EXPORT_SYMBOL(highmem_start_page); EXPORT_SYMBOL(highmem_start_page);
EXPORT_SYMBOL(create_bounce);
EXPORT_SYMBOL(kmap_prot); EXPORT_SYMBOL(kmap_prot);
EXPORT_SYMBOL(kmap_pte); EXPORT_SYMBOL(kmap_pte);
#endif #endif
......
...@@ -649,10 +649,8 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid) ...@@ -649,10 +649,8 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
/* /*
* kill_something_info() interprets pid in interesting ways just like kill(2). * kill_something_info() interprets pid in interesting ways just like kill(2).
* *
* POSIX (2001) specifies "If pid is -1, sig shall be sent to all processes * POSIX specifies that kill(-1,sig) is unspecified, but what we have
* (excluding an unspecified set of system processes) for which the process * is probably wrong. Should make it like BSD or SYSV.
* has permission to send that signal."
* So, probably the process should also signal itself.
*/ */
static int kill_something_info(int sig, struct siginfo *info, int pid) static int kill_something_info(int sig, struct siginfo *info, int pid)
...@@ -665,7 +663,7 @@ static int kill_something_info(int sig, struct siginfo *info, int pid) ...@@ -665,7 +663,7 @@ static int kill_something_info(int sig, struct siginfo *info, int pid)
read_lock(&tasklist_lock); read_lock(&tasklist_lock);
for_each_task(p) { for_each_task(p) {
if (p->pid > 1) { if (p->pid > 1 && p != current) {
int err = send_sig_info(sig, info, p); int err = send_sig_info(sig, info, p);
++count; ++count;
if (err != -EPERM) if (err != -EPERM)
......
...@@ -14,8 +14,6 @@ export-objs := shmem.o filemap.o mempool.o ...@@ -14,8 +14,6 @@ export-objs := shmem.o filemap.o mempool.o
obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ obj-y := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \ vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \ page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \
shmem.o mempool.o shmem.o highmem.o mempool.o
obj-$(CONFIG_HIGHMEM) += highmem.o
include $(TOPDIR)/Rules.make include $(TOPDIR)/Rules.make
...@@ -19,6 +19,19 @@ ...@@ -19,6 +19,19 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/pagemap.h> #include <linux/pagemap.h>
#include <linux/mempool.h> #include <linux/mempool.h>
#include <linux/blkdev.h>
static mempool_t *page_pool, *isa_page_pool;
static void *page_pool_alloc(int gfp_mask, void *data)
{
return alloc_page(gfp_mask);
}
static void page_pool_free(void *page, void *data)
{
__free_page(page);
}
/* /*
* Virtual_count is not a pure "count". * Virtual_count is not a pure "count".
...@@ -28,6 +41,7 @@ ...@@ -28,6 +41,7 @@
* since the last TLB flush - so we can't use it. * since the last TLB flush - so we can't use it.
* n means that there are (n-1) current users of it. * n means that there are (n-1) current users of it.
*/ */
#ifdef CONFIG_HIGHMEM
static int pkmap_count[LAST_PKMAP]; static int pkmap_count[LAST_PKMAP];
static unsigned int last_pkmap_nr; static unsigned int last_pkmap_nr;
static spinlock_t kmap_lock = SPIN_LOCK_UNLOCKED; static spinlock_t kmap_lock = SPIN_LOCK_UNLOCKED;
...@@ -185,19 +199,6 @@ void kunmap_high(struct page *page) ...@@ -185,19 +199,6 @@ void kunmap_high(struct page *page)
} }
#define POOL_SIZE 64 #define POOL_SIZE 64
#define ISA_POOL_SIZE 16
static mempool_t *page_pool, *isa_page_pool;
static void *page_pool_alloc(int gfp_mask, void *data)
{
return alloc_page(gfp_mask);
}
static void page_pool_free(void *page, void *data)
{
__free_page(page);
}
static __init int init_emergency_pool(void) static __init int init_emergency_pool(void)
{ {
...@@ -211,11 +212,37 @@ static __init int init_emergency_pool(void) ...@@ -211,11 +212,37 @@ static __init int init_emergency_pool(void)
page_pool = mempool_create(POOL_SIZE, page_pool_alloc, page_pool_free, NULL); page_pool = mempool_create(POOL_SIZE, page_pool_alloc, page_pool_free, NULL);
if (!page_pool) if (!page_pool)
BUG(); BUG();
printk("highmem bounce pool size: %d pages and bhs.\n", POOL_SIZE); printk("highmem bounce pool size: %d pages\n", POOL_SIZE);
return 0; return 0;
} }
__initcall(init_emergency_pool);
/*
* highmem version, map in to vec
*/
static inline void bounce_copy_vec(struct bio_vec *to, unsigned char *vfrom)
{
unsigned long flags;
unsigned char *vto;
local_irq_save(flags);
vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ);
memcpy(vto + to->bv_offset, vfrom, to->bv_len);
kunmap_atomic(vto, KM_BOUNCE_READ);
local_irq_restore(flags);
}
#else /* CONFIG_HIGHMEM */
#define bounce_copy_vec(to, vfrom) \
memcpy(page_address((to)->bv_page) + (to)->bv_offset, vfrom, (to)->bv_len)
#endif
#define ISA_POOL_SIZE 16
/* /*
* gets called "every" time someone init's a queue with BLK_BOUNCE_ISA * gets called "every" time someone init's a queue with BLK_BOUNCE_ISA
* as the max address, so check if the pool has already been created. * as the max address, so check if the pool has already been created.
...@@ -233,8 +260,6 @@ int init_emergency_isa_pool(void) ...@@ -233,8 +260,6 @@ int init_emergency_isa_pool(void)
return 0; return 0;
} }
__initcall(init_emergency_pool);
/* /*
* Simple bounce buffer support for highmem pages. Depending on the * Simple bounce buffer support for highmem pages. Depending on the
* queue gfp mask set, *to may or may not be a highmem page. kmap it * queue gfp mask set, *to may or may not be a highmem page. kmap it
...@@ -242,8 +267,7 @@ __initcall(init_emergency_pool); ...@@ -242,8 +267,7 @@ __initcall(init_emergency_pool);
*/ */
static inline void copy_to_high_bio_irq(struct bio *to, struct bio *from) static inline void copy_to_high_bio_irq(struct bio *to, struct bio *from)
{ {
unsigned char *vto, *vfrom; unsigned char *vfrom;
unsigned long flags;
struct bio_vec *tovec, *fromvec; struct bio_vec *tovec, *fromvec;
int i; int i;
...@@ -258,11 +282,7 @@ static inline void copy_to_high_bio_irq(struct bio *to, struct bio *from) ...@@ -258,11 +282,7 @@ static inline void copy_to_high_bio_irq(struct bio *to, struct bio *from)
vfrom = page_address(fromvec->bv_page) + fromvec->bv_offset; vfrom = page_address(fromvec->bv_page) + fromvec->bv_offset;
local_irq_save(flags); bounce_copy_vec(tovec, vfrom);
vto = kmap_atomic(tovec->bv_page, KM_BOUNCE_READ);
memcpy(vto + tovec->bv_offset, vfrom, tovec->bv_len);
kunmap_atomic(vto, KM_BOUNCE_READ);
local_irq_restore(flags);
} }
} }
...@@ -336,10 +356,25 @@ void create_bounce(unsigned long pfn, int gfp, struct bio **bio_orig) ...@@ -336,10 +356,25 @@ void create_bounce(unsigned long pfn, int gfp, struct bio **bio_orig)
BUG_ON((*bio_orig)->bi_idx); BUG_ON((*bio_orig)->bi_idx);
/*
* for non-isa bounce case, just check if the bounce pfn is equal
* to or bigger than the highest pfn in the system -- in that case,
* don't waste time iterating over bio segments
*/
if (!(gfp & GFP_DMA)) { if (!(gfp & GFP_DMA)) {
if (pfn >= blk_max_pfn)
return;
#ifndef CONFIG_HIGHMEM
/*
* should not hit for non-highmem case
*/
BUG();
#endif
bio_gfp = GFP_NOHIGHIO; bio_gfp = GFP_NOHIGHIO;
pool = page_pool; pool = page_pool;
} else { } else {
BUG_ON(!isa_page_pool);
bio_gfp = GFP_NOIO; bio_gfp = GFP_NOIO;
pool = isa_page_pool; pool = isa_page_pool;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment