diff --git a/Makefile b/Makefile index 787c8a8b9a5866cb7aa6fa5d3bf5cbe70913a392..f2eda9ca6c0353a65de913108c68451c62053c73 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 2 PATCHLEVEL = 5 -SUBLEVEL = 22 +SUBLEVEL = 23 EXTRAVERSION = # We are using a recursive build, so we need to do a little thinking diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c index 4ba5641b271fdf00cbff933e98170ee8813413be..55b8fc43a9bfe68358eb9e3b05b47bd4a2ca17d4 100644 --- a/arch/i386/mm/ioremap.c +++ b/arch/i386/mm/ioremap.c @@ -221,8 +221,6 @@ void iounmap(void *addr) return; } - BUG_ON(p->phys_addr == 0); /* not allocated with ioremap */ - vmfree_area_pages(VMALLOC_VMADDR(p->addr), p->size); if (p->flags && p->phys_addr < virt_to_phys(high_memory)) { change_page_attr(virt_to_page(__va(p->phys_addr)), diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 16abcb3f5481814810691da07bd73dc8b7f0452a..318ff55529fbde2b58898e34952d7079364a0b09 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -161,6 +161,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); init_waitqueue_head(&q->queue_wait); + INIT_LIST_HEAD(&q->plug_list); } /** diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 44909021aa06e418625ca81ed4031f06ea5c303e..e046885bb67bff284a59cde6e402b99aa90180be 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -129,6 +129,8 @@ struct cardinfo { */ struct bio *bio, *currentbio, **biotail; + request_queue_t queue; + struct mm_page { dma_addr_t page_dma; struct mm_dma_desc *desc; @@ -142,8 +144,6 @@ struct cardinfo { struct tasklet_struct tasklet; unsigned int dma_status; - struct tq_struct plug_tq; - struct { int good; int warned; @@ -293,7 +293,7 @@ static void dump_dmastat(struct cardinfo *card, unsigned int dmastat) * Whenever IO on the active page completes, the Ready page is activated * and the ex-Active page is clean out and made Ready. * Otherwise the Ready page is only activated when it becomes full, or - * when mm_unplug_device is called via run_task_queue(&tq_disk). + * when mm_unplug_device is called via blk_run_queues(). * * If a request arrives while both pages a full, it is queued, and b_rdev is * overloaded to record whether it was a read or a write. @@ -341,8 +341,9 @@ static void mm_start_io(struct cardinfo *card) offset = ((char*)desc) - ((char*)page->desc); writel(cpu_to_le32((page->page_dma+offset)&0xffffffff), card->csr_remap + DMA_DESCRIPTOR_ADDR); - /* if sizeof(dma_addr_t) == 32, this will generate a warning, sorry */ - writel(cpu_to_le32((page->page_dma)>>32), + /* Force the value to u64 before shifting otherwise >> 32 is undefined C + * and on some ports will do nothing ! */ + writel(cpu_to_le32(((u64)page->page_dma)>>32), card->csr_remap + DMA_DESCRIPTOR_ADDR + 4); /* Go, go, go */ @@ -384,10 +385,12 @@ static inline void reset_page(struct mm_page *page) static void mm_unplug_device(void *data) { - struct cardinfo *card = data; + request_queue_t *q = data; + struct cardinfo *card = q->queuedata; spin_lock_bh(&card->lock); - activate(card); + if (blk_remove_plug(q)) + activate(card); spin_unlock_bh(&card->lock); } @@ -565,8 +568,7 @@ static void process_page(unsigned long data) */ static int mm_make_request(request_queue_t *q, struct bio *bio) { - struct cardinfo *card = &cards[DEVICE_NR( - bio->bi_bdev->bd_dev)]; + struct cardinfo *card = q->queuedata; PRINTK("mm_make_request %ld %d\n", bh->b_rsector, bh->b_size); /* set uptodate now, and clear it if there are any errors */ @@ -576,9 +578,9 @@ static int mm_make_request(request_queue_t *q, struct bio *bio) *card->biotail = bio; bio->bi_next = NULL; card->biotail = &bio->bi_next; + blk_plug_device(q); spin_unlock_bh(&card->lock); - queue_task(&card->plug_tq, &tq_disk); return 0; } @@ -1065,11 +1067,12 @@ static int __devinit mm_pci_probe(struct pci_dev *dev, const struct pci_device_i card->bio = NULL; card->biotail = &card->bio; + blk_queue_make_request(&card->queue, mm_make_request); + card->queue.queuedata = card; + card->queue.unplug_fn = mm_unplug_device; + tasklet_init(&card->tasklet, process_page, (unsigned long)card); - card->plug_tq.sync = 0; - card->plug_tq.routine = &mm_unplug_device; - card->plug_tq.data = card; card->check_batteries = 0; mem_present = readb(card->csr_remap + MEMCTRLSTATUS_MEMORY); @@ -1237,6 +1240,17 @@ static struct pci_driver mm_pci_driver = { -- mm_init ----------------------------------------------------------------------------------- */ + +static request_queue_t * mm_queue_proc(kdev_t dev) +{ + int c = DEVICE_NR(kdev_val(dev)); + + if (c < MM_MAXCARDS) + return &cards[c].queue; + else + return BLK_DEFAULT_QUEUE(MAJOR_NR); +} + int __init mm_init(void) { int retval, i; @@ -1276,11 +1290,9 @@ int __init mm_init(void) mm_gendisk.part = mm_partitions; mm_gendisk.nr_real = num_cards; + blk_dev[MAJOR_NR].queue = mm_queue_proc; add_gendisk(&mm_gendisk); - blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), - mm_make_request); - blk_size[MAJOR_NR] = mm_gendisk.sizes; for (i = 0; i < num_cards; i++) { register_disk(&mm_gendisk, mk_kdev(MAJOR_NR, i<<MM_SHIFT), MM_SHIFT, diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 48fb74e50d5cb2c41d937fdeb76bf43fa5c20bf1..d8f29104dacf9b8c57a68123aa05894273e0f7eb 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -1,6 +1,6 @@ /* linear.c : Multiple Devices driver for Linux - Copyright (C) 1994-96 Marc ZYNGIER + Copyright (C) 1994-96 Marc ZYNGIER <zyngier@ufr-info-p7.ibp.fr> or <maz@gloups.fdn.fr> @@ -33,39 +33,45 @@ static int linear_run (mddev_t *mddev) linear_conf_t *conf; struct linear_hash *table; mdk_rdev_t *rdev; - int size, i, j, nb_zone; + int size, i, nb_zone, cnt; unsigned int curr_offset; + struct list_head *tmp; MOD_INC_USE_COUNT; conf = kmalloc (sizeof (*conf), GFP_KERNEL); if (!conf) goto out; + memset(conf, 0, sizeof(*conf)); mddev->private = conf; - if (md_check_ordering(mddev)) { - printk("linear: disks are not ordered, aborting!\n"); - goto out; - } /* * Find the smallest device. */ conf->smallest = NULL; - curr_offset = 0; - ITERATE_RDEV_ORDERED(mddev,rdev,j) { + cnt = 0; + ITERATE_RDEV(mddev,rdev,tmp) { + int j = rdev->sb->this_disk.raid_disk; dev_info_t *disk = conf->disks + j; + if (j < 0 || j > mddev->sb->raid_disks || disk->bdev) { + printk("linear: disk numbering problem. Aborting!\n"); + goto out; + } + disk->dev = rdev->dev; disk->bdev = rdev->bdev; atomic_inc(&rdev->bdev->bd_count); disk->size = rdev->size; - disk->offset = curr_offset; - - curr_offset += disk->size; if (!conf->smallest || (disk->size < conf->smallest->size)) conf->smallest = disk; + cnt++; + } + if (cnt != mddev->sb->raid_disks) { + printk("linear: not enough drives present. Aborting!\n"); + goto out; } nb_zone = conf->nr_zones = @@ -81,10 +87,13 @@ static int linear_run (mddev_t *mddev) * Here we generate the linear hash table */ table = conf->hash_table; - i = 0; size = 0; - for (j = 0; j < mddev->nb_dev; j++) { - dev_info_t *disk = conf->disks + j; + curr_offset = 0; + for (i = 0; i < cnt; i++) { + dev_info_t *disk = conf->disks + i; + + disk->offset = curr_offset; + curr_offset += disk->size; if (size < 0) { table[-1].dev1 = disk; @@ -130,12 +139,13 @@ static int linear_stop (mddev_t *mddev) return 0; } -static int linear_make_request (mddev_t *mddev, int rw, struct bio *bio) +static int linear_make_request (request_queue_t *q, struct bio *bio) { - linear_conf_t *conf = mddev_to_conf(mddev); - struct linear_hash *hash; - dev_info_t *tmp_dev; - long block; + mddev_t *mddev = q->queuedata; + linear_conf_t *conf = mddev_to_conf(mddev); + struct linear_hash *hash; + dev_info_t *tmp_dev; + long block; block = bio->bi_sector >> 1; hash = conf->hash_table + (block / conf->smallest->size); @@ -186,7 +196,7 @@ static int linear_status (char *page, mddev_t *mddev) } sz += sprintf(page+sz, "\n"); #endif - sz += sprintf(page+sz, " %dk rounding", mddev->param.chunk_size/1024); + sz += sprintf(page+sz, " %dk rounding", mddev->sb->chunk_size/1024); return sz; } diff --git a/drivers/md/md.c b/drivers/md/md.c index d232703228045fd1247d27e8bc32809f50222382..acce321b5938d0cb1c8031590c22658b61c84ad8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -107,7 +107,7 @@ static ctl_table raid_root_table[] = { * subsystems want to have a pre-defined structure */ struct hd_struct md_hd_struct[MAX_MD_DEVS]; -static int md_maxreadahead[MAX_MD_DEVS]; +static void md_recover_arrays(void); static mdk_thread_t *md_recovery_thread; int md_size[MAX_MD_DEVS]; @@ -129,93 +129,111 @@ static struct gendisk md_gendisk= /* * Enables to iterate over all existing md arrays + * all_mddevs_lock protects this list as well as mddev_map. */ static LIST_HEAD(all_mddevs); +static spinlock_t all_mddevs_lock = SPIN_LOCK_UNLOCKED; + /* - * The mapping between kdev and mddev is not necessary a simple - * one! Eg. HSM uses several sub-devices to implement Logical - * Volumes. All these sub-devices map to the same mddev. + * iterates through all used mddevs in the system. + * We take care to grab the all_mddevs_lock whenever navigating + * the list, and to always hold a refcount when unlocked. + * Any code which breaks out of this loop while own + * a reference to the current mddev and must mddev_put it. */ -dev_mapping_t mddev_map[MAX_MD_DEVS]; +#define ITERATE_MDDEV(mddev,tmp) \ + \ + for (spin_lock(&all_mddevs_lock), \ + (tmp = all_mddevs.next), \ + (mddev = NULL); \ + (void)(tmp != &all_mddevs && \ + mddev_get(list_entry(tmp, mddev_t, all_mddevs))),\ + spin_unlock(&all_mddevs_lock), \ + (mddev ? mddev_put(mddev):(void)NULL), \ + (mddev = list_entry(tmp, mddev_t, all_mddevs)), \ + (tmp != &all_mddevs); \ + spin_lock(&all_mddevs_lock), \ + (tmp = tmp->next) \ + ) + +static mddev_t *mddev_map[MAX_MD_DEVS]; + +static int md_fail_request (request_queue_t *q, struct bio *bio) +{ + bio_io_error(bio); + return 0; +} -void add_mddev_mapping(mddev_t * mddev, kdev_t dev, void *data) +static inline mddev_t *mddev_get(mddev_t *mddev) { - unsigned int minor = minor(dev); - - if (major(dev) != MD_MAJOR) { - MD_BUG(); - return; - } - if (mddev_map[minor].mddev) { - MD_BUG(); - return; - } - mddev_map[minor].mddev = mddev; - mddev_map[minor].data = data; + atomic_inc(&mddev->active); + return mddev; } -void del_mddev_mapping(mddev_t * mddev, kdev_t dev) +static void mddev_put(mddev_t *mddev) { - unsigned int minor = minor(dev); - - if (major(dev) != MD_MAJOR) { - MD_BUG(); + if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) return; + if (!mddev->sb && list_empty(&mddev->disks)) { + list_del(&mddev->all_mddevs); + mddev_map[mdidx(mddev)] = NULL; + kfree(mddev); + MOD_DEC_USE_COUNT; } - if (mddev_map[minor].mddev != mddev) { - MD_BUG(); - return; - } - mddev_map[minor].mddev = NULL; - mddev_map[minor].data = NULL; + spin_unlock(&all_mddevs_lock); } -static int md_make_request (request_queue_t *q, struct bio *bio) +static mddev_t * mddev_find(int unit) { - mddev_t *mddev = kdev_to_mddev(to_kdev_t(bio->bi_bdev->bd_dev)); + mddev_t *mddev, *new = NULL; - if (mddev && mddev->pers) - return mddev->pers->make_request(mddev, bio_rw(bio), bio); - else { - bio_io_error(bio); - return 0; + retry: + spin_lock(&all_mddevs_lock); + if (mddev_map[unit]) { + mddev = mddev_get(mddev_map[unit]); + spin_unlock(&all_mddevs_lock); + if (new) + kfree(new); + return mddev; } -} - -static mddev_t * alloc_mddev(kdev_t dev) -{ - mddev_t *mddev; - - if (major(dev) != MD_MAJOR) { - MD_BUG(); - return 0; + if (new) { + mddev_map[unit] = new; + list_add(&new->all_mddevs, &all_mddevs); + spin_unlock(&all_mddevs_lock); + MOD_INC_USE_COUNT; + return new; } - mddev = (mddev_t *) kmalloc(sizeof(*mddev), GFP_KERNEL); - if (!mddev) + spin_unlock(&all_mddevs_lock); + + new = (mddev_t *) kmalloc(sizeof(*new), GFP_KERNEL); + if (!new) return NULL; - memset(mddev, 0, sizeof(*mddev)); + memset(new, 0, sizeof(*new)); - mddev->__minor = minor(dev); - init_MUTEX(&mddev->reconfig_sem); - init_MUTEX(&mddev->recovery_sem); - init_MUTEX(&mddev->resync_sem); - INIT_LIST_HEAD(&mddev->disks); - INIT_LIST_HEAD(&mddev->all_mddevs); - atomic_set(&mddev->active, 0); + new->__minor = unit; + init_MUTEX(&new->reconfig_sem); + INIT_LIST_HEAD(&new->disks); + INIT_LIST_HEAD(&new->all_mddevs); + atomic_set(&new->active, 1); - /* - * The 'base' mddev is the one with data NULL. - * personalities can create additional mddevs - * if necessary. - */ - add_mddev_mapping(mddev, dev, 0); - list_add(&mddev->all_mddevs, &all_mddevs); + goto retry; +} - MOD_INC_USE_COUNT; +static inline int mddev_lock(mddev_t * mddev) +{ + return down_interruptible(&mddev->reconfig_sem); +} - return mddev; +static inline int mddev_trylock(mddev_t * mddev) +{ + return down_trylock(&mddev->reconfig_sem); +} + +static inline void mddev_unlock(mddev_t * mddev) +{ + up(&mddev->reconfig_sem); } mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) @@ -249,13 +267,12 @@ char * partition_name(kdev_t dev) struct gendisk *hd; static char nomem [] = "<nomem>"; dev_name_t *dname; - struct list_head *tmp = device_names.next; + struct list_head *tmp; - while (tmp != &device_names) { + list_for_each(tmp, &device_names) { dname = list_entry(tmp, dev_name_t, list); if (kdev_same(dname->dev, dev)) return dname->name; - tmp = tmp->next; } dname = (dev_name_t *) kmalloc(sizeof(*dname), GFP_KERNEL); @@ -275,7 +292,6 @@ char * partition_name(kdev_t dev) } dname->dev = dev; - INIT_LIST_HEAD(&dname->list); list_add(&dname->list, &device_names); return dname->name; @@ -326,69 +342,6 @@ static unsigned int zoned_raid_size(mddev_t *mddev) return 0; } -/* - * We check wether all devices are numbered from 0 to nb_dev-1. The - * order is guaranteed even after device name changes. - * - * Some personalities (raid0, linear) use this. Personalities that - * provide data have to be able to deal with loss of individual - * disks, so they do their checking themselves. - */ -int md_check_ordering(mddev_t *mddev) -{ - int i, c; - mdk_rdev_t *rdev; - struct list_head *tmp; - - /* - * First, all devices must be fully functional - */ - ITERATE_RDEV(mddev,rdev,tmp) { - if (rdev->faulty) { - printk(KERN_ERR "md: md%d's device %s faulty, aborting.\n", - mdidx(mddev), partition_name(rdev->dev)); - goto abort; - } - } - - c = 0; - ITERATE_RDEV(mddev,rdev,tmp) { - c++; - } - if (c != mddev->nb_dev) { - MD_BUG(); - goto abort; - } - if (mddev->nb_dev != mddev->sb->raid_disks) { - printk(KERN_ERR "md: md%d, array needs %d disks, has %d, aborting.\n", - mdidx(mddev), mddev->sb->raid_disks, mddev->nb_dev); - goto abort; - } - /* - * Now the numbering check - */ - for (i = 0; i < mddev->nb_dev; i++) { - c = 0; - ITERATE_RDEV(mddev,rdev,tmp) { - if (rdev->desc_nr == i) - c++; - } - if (!c) { - printk(KERN_ERR "md: md%d, missing disk #%d, aborting.\n", - mdidx(mddev), i); - goto abort; - } - if (c > 1) { - printk(KERN_ERR "md: md%d, too many disks #%d, aborting.\n", - mdidx(mddev), i); - goto abort; - } - } - return 0; -abort: - return 1; -} - static void remove_descriptor(mdp_disk_t *disk, mdp_super_t *sb) { if (disk_active(disk)) { @@ -618,8 +571,7 @@ static void bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) list_add(&rdev->same_set, &mddev->disks); rdev->mddev = mddev; - mddev->nb_dev++; - printk(KERN_INFO "md: bind<%s,%d>\n", partition_name(rdev->dev), mddev->nb_dev); + printk(KERN_INFO "md: bind<%s>\n", partition_name(rdev->dev)); } static void unbind_rdev_from_array(mdk_rdev_t * rdev) @@ -628,11 +580,8 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) MD_BUG(); return; } - list_del(&rdev->same_set); - INIT_LIST_HEAD(&rdev->same_set); - rdev->mddev->nb_dev--; - printk(KERN_INFO "md: unbind<%s,%d>\n", partition_name(rdev->dev), - rdev->mddev->nb_dev); + list_del_init(&rdev->same_set); + printk(KERN_INFO "md: unbind<%s>\n", partition_name(rdev->dev)); rdev->mddev = NULL; } @@ -682,13 +631,11 @@ static void export_rdev(mdk_rdev_t * rdev) MD_BUG(); unlock_rdev(rdev); free_disk_sb(rdev); - list_del(&rdev->all); - INIT_LIST_HEAD(&rdev->all); - if (rdev->pending.next != &rdev->pending) { + list_del_init(&rdev->all); + if (!list_empty(&rdev->pending)) { printk(KERN_INFO "md: (%s was pending)\n", partition_name(rdev->dev)); - list_del(&rdev->pending); - INIT_LIST_HEAD(&rdev->pending); + list_del_init(&rdev->pending); } #ifndef MODULE md_autodetect_dev(rdev->dev); @@ -722,7 +669,7 @@ static void export_array(mddev_t *mddev) } kick_rdev_from_array(rdev); } - if (mddev->nb_dev) + if (!list_empty(&mddev->disks)) MD_BUG(); } @@ -736,21 +683,6 @@ static void free_mddev(mddev_t *mddev) export_array(mddev); md_size[mdidx(mddev)] = 0; md_hd_struct[mdidx(mddev)].nr_sects = 0; - - /* - * Make sure nobody else is using this mddev - * (careful, we rely on the global kernel lock here) - */ - while (atomic_read(&mddev->resync_sem.count) != 1) - schedule(); - while (atomic_read(&mddev->recovery_sem.count) != 1) - schedule(); - - del_mddev_mapping(mddev, mk_kdev(MD_MAJOR, mdidx(mddev))); - list_del(&mddev->all_mddevs); - INIT_LIST_HEAD(&mddev->all_mddevs); - kfree(mddev); - MOD_DEC_USE_COUNT; } #undef BAD_CSUM @@ -892,12 +824,10 @@ static mdk_rdev_t * find_rdev_all(kdev_t dev) struct list_head *tmp; mdk_rdev_t *rdev; - tmp = all_raid_disks.next; - while (tmp != &all_raid_disks) { + list_for_each(tmp, &all_raid_disks) { rdev = list_entry(tmp, mdk_rdev_t, all); if (kdev_same(rdev->dev, dev)) return rdev; - tmp = tmp->next; } return NULL; } @@ -993,12 +923,13 @@ static int sync_sbs(mddev_t * mddev) return 0; } -int md_update_sb(mddev_t * mddev) +void __md_update_sb(mddev_t * mddev) { int err, count = 100; struct list_head *tmp; mdk_rdev_t *rdev; + mddev->sb_dirty = 0; repeat: mddev->sb->utime = CURRENT_TIME; if (!(++mddev->sb->events_lo)) @@ -1020,7 +951,7 @@ int md_update_sb(mddev_t * mddev) * nonpersistent superblocks */ if (mddev->sb->not_persistent) - return 0; + return; printk(KERN_INFO "md: updating md%d RAID superblock on device\n", mdidx(mddev)); @@ -1048,9 +979,18 @@ int md_update_sb(mddev_t * mddev) } printk(KERN_ERR "md: excessive errors occurred during superblock update, exiting\n"); } - return 0; } +void md_update_sb(mddev_t *mddev) +{ + if (mddev_lock(mddev)) + return; + if (mddev->sb_dirty) + __md_update_sb(mddev); + mddev_unlock(mddev); +} + + /* * Import a device. If 'on_disk', then sanity check the superblock * @@ -1122,6 +1062,7 @@ static int md_import_device(kdev_t newdev, int on_disk) } list_add(&rdev->all, &all_raid_disks); INIT_LIST_HEAD(&rdev->pending); + INIT_LIST_HEAD(&rdev->same_set); if (rdev->faulty && rdev->sb) free_disk_sb(rdev); @@ -1574,7 +1515,6 @@ static int device_size_calculation(mddev_t * mddev) if (sb->level == -3) readahead = 0; } - md_maxreadahead[mdidx(mddev)] = readahead; printk(KERN_INFO "md%d: max total readahead window set to %ldk\n", mdidx(mddev), readahead*(PAGE_SIZE/1024)); @@ -1605,7 +1545,7 @@ static int do_md_run(mddev_t * mddev) mdk_rdev_t *rdev; - if (!mddev->nb_dev) { + if (list_empty(&mddev->disks)) { MD_BUG(); return -EINVAL; } @@ -1630,9 +1570,6 @@ static int do_md_run(mddev_t * mddev) chunk_size = mddev->sb->chunk_size; pnum = level_to_pers(mddev->sb->level); - mddev->param.chunk_size = chunk_size; - mddev->param.personality = pnum; - if ((pnum != MULTIPATH) && (pnum != RAID1)) { if (!chunk_size) { /* @@ -1712,6 +1649,9 @@ static int do_md_run(mddev_t * mddev) } mddev->pers = pers[pnum]; + blk_queue_make_request(&mddev->queue, mddev->pers->make_request); + mddev->queue.queuedata = mddev; + err = mddev->pers->run(mddev); if (err) { printk(KERN_ERR "md: pers->run() failed ...\n"); @@ -1719,9 +1659,15 @@ static int do_md_run(mddev_t * mddev) return -EINVAL; } - mddev->sb->state &= ~(1 << MD_SB_CLEAN); - md_update_sb(mddev); + mddev->in_sync = (mddev->sb->state & (1<<MD_SB_CLEAN)); + /* if personality doesn't have "sync_request", then + * a dirty array doesn't mean anything + */ + if (mddev->pers->sync_request) + mddev->sb->state &= ~(1 << MD_SB_CLEAN); + __md_update_sb(mddev); + md_recover_arrays(); /* * md_size has units of 1K blocks, which are * twice as large as sectors. @@ -1736,21 +1682,21 @@ static int do_md_run(mddev_t * mddev) #undef TOO_BIG_CHUNKSIZE #undef BAD_CHUNKSIZE -#define OUT(x) do { err = (x); goto out; } while (0) - static int restart_array(mddev_t *mddev) { - int err = 0; + int err; /* * Complain if it has no devices */ - if (!mddev->nb_dev) - OUT(-ENXIO); + err = -ENXIO; + if (list_empty(&mddev->disks)) + goto out; if (mddev->pers) { + err = -EBUSY; if (!mddev->ro) - OUT(-EBUSY); + goto out; mddev->ro = 0; set_device_ro(mddev_to_kdev(mddev), 0); @@ -1761,8 +1707,7 @@ static int restart_array(mddev_t *mddev) * Kick recovery or resync if necessary */ md_recover_arrays(); - if (mddev->pers->restart_resync) - mddev->pers->restart_resync(mddev); + err = 0; } else { printk(KERN_ERR "md: md%d has no personality assigned.\n", mdidx(mddev)); @@ -1780,49 +1725,43 @@ static int restart_array(mddev_t *mddev) static int do_md_stop(mddev_t * mddev, int ro) { - int err = 0, resync_interrupted = 0; + int err = 0; kdev_t dev = mddev_to_kdev(mddev); if (atomic_read(&mddev->active)>1) { printk(STILL_IN_USE, mdidx(mddev)); - OUT(-EBUSY); + err = -EBUSY; + goto out; } if (mddev->pers) { - /* - * It is safe to call stop here, it only frees private - * data. Also, it tells us if a device is unstoppable - * (eg. resyncing is in progress) - */ - if (mddev->pers->stop_resync) - if (mddev->pers->stop_resync(mddev)) - resync_interrupted = 1; - - if (mddev->recovery_running) - md_interrupt_thread(md_recovery_thread); - - /* - * This synchronizes with signal delivery to the - * resync or reconstruction thread. It also nicely - * hangs the process if some reconstruction has not - * finished. - */ - down(&mddev->recovery_sem); - up(&mddev->recovery_sem); + if (mddev->sync_thread) { + if (mddev->recovery_running > 0) + mddev->recovery_running = -EINTR; + md_unregister_thread(mddev->sync_thread); + mddev->sync_thread = NULL; + if (mddev->spare) { + mddev->pers->diskop(mddev, &mddev->spare, + DISKOP_SPARE_INACTIVE); + mddev->spare = NULL; + } + } invalidate_device(dev, 1); if (ro) { + err = -ENXIO; if (mddev->ro) - OUT(-ENXIO); + goto out; mddev->ro = 1; } else { if (mddev->ro) set_device_ro(dev, 0); if (mddev->pers->stop(mddev)) { + err = -EBUSY; if (mddev->ro) set_device_ro(dev, 1); - OUT(-EBUSY); + goto out; } if (mddev->ro) mddev->ro = 0; @@ -1832,11 +1771,11 @@ static int do_md_stop(mddev_t * mddev, int ro) * mark it clean only if there was no resync * interrupted. */ - if (!mddev->recovery_running && !resync_interrupted) { + if (mddev->in_sync) { printk(KERN_INFO "md: marking sb clean...\n"); mddev->sb->state |= 1 << MD_SB_CLEAN; } - md_update_sb(mddev); + __md_update_sb(mddev); } if (ro) set_device_ro(dev, 1); @@ -1848,15 +1787,13 @@ static int do_md_stop(mddev_t * mddev, int ro) if (!ro) { printk(KERN_INFO "md: md%d stopped.\n", mdidx(mddev)); free_mddev(mddev); - } else printk(KERN_INFO "md: md%d switched to read-only mode.\n", mdidx(mddev)); + err = 0; out: return err; } -#undef OUT - /* * We have to safely support old arrays too. */ @@ -1877,7 +1814,7 @@ static void autorun_array(mddev_t *mddev) struct list_head *tmp; int err; - if (mddev->disks.prev == &mddev->disks) { + if (list_empty(&mddev->disks)) { MD_BUG(); return; } @@ -1912,17 +1849,15 @@ static void autorun_array(mddev_t *mddev) * * If "unit" is allocated, then bump its reference count */ -static void autorun_devices(kdev_t countdev) +static void autorun_devices(void) { struct list_head candidates; struct list_head *tmp; mdk_rdev_t *rdev0, *rdev; mddev_t *mddev; - kdev_t md_kdev; - printk(KERN_INFO "md: autorun ...\n"); - while (pending_raid_disks.next != &pending_raid_disks) { + while (!list_empty(&pending_raid_disks)) { rdev0 = list_entry(pending_raid_disks.next, mdk_rdev_t, pending); @@ -1946,29 +1881,34 @@ static void autorun_devices(kdev_t countdev) * mostly sane superblocks. It's time to allocate the * mddev. */ - md_kdev = mk_kdev(MD_MAJOR, rdev0->sb->md_minor); - mddev = kdev_to_mddev(md_kdev); - if (mddev) { - printk(KERN_WARNING "md: md%d already running, cannot run %s\n", - mdidx(mddev), partition_name(rdev0->dev)); - ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) - export_rdev(rdev); - continue; - } - mddev = alloc_mddev(md_kdev); + + mddev = mddev_find(rdev0->sb->md_minor); if (!mddev) { printk(KERN_ERR "md: cannot allocate memory for md drive.\n"); break; } - if (kdev_same(md_kdev, countdev)) - atomic_inc(&mddev->active); - printk(KERN_INFO "md: created md%d\n", mdidx(mddev)); - ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) { - bind_rdev_to_array(rdev, mddev); - list_del(&rdev->pending); - INIT_LIST_HEAD(&rdev->pending); + if (mddev_lock(mddev)) + printk(KERN_WARNING "md: md%d locked, cannot run\n", + mdidx(mddev)); + else if (mddev->sb || !list_empty(&mddev->disks)) { + printk(KERN_WARNING "md: md%d already running, cannot run %s\n", + mdidx(mddev), partition_name(rdev0->dev)); + mddev_unlock(mddev); + } else { + printk(KERN_INFO "md: created md%d\n", mdidx(mddev)); + ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) { + bind_rdev_to_array(rdev, mddev); + list_del_init(&rdev->pending); + } + autorun_array(mddev); + mddev_unlock(mddev); } - autorun_array(mddev); + /* on success, candidates will be empty, on error + * it wont... + */ + ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) + export_rdev(rdev); + mddev_put(mddev); } printk(KERN_INFO "md: ... autorun DONE.\n"); } @@ -2005,7 +1945,7 @@ static void autorun_devices(kdev_t countdev) #define AUTORUNNING KERN_INFO \ "md: auto-running md%d.\n" -static int autostart_array(kdev_t startdev, kdev_t countdev) +static int autostart_array(kdev_t startdev) { int err = -EINVAL, i; mdp_super_t *sb = NULL; @@ -2065,7 +2005,7 @@ static int autostart_array(kdev_t startdev, kdev_t countdev) /* * possibly return codes */ - autorun_devices(countdev); + autorun_devices(); return 0; abort: @@ -2191,7 +2131,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) MD_BUG(); return -EINVAL; } - if (mddev->nb_dev) { + if (!list_empty(&mddev->disks)) { mdk_rdev_t *rdev0 = list_entry(mddev->disks.next, mdk_rdev_t, same_set); if (!uuid_equal(rdev0, rdev)) { @@ -2346,8 +2286,7 @@ static int hot_remove_disk(mddev_t * mddev, kdev_t dev) remove_descriptor(disk, mddev->sb); kick_rdev_from_array(rdev); - mddev->sb_dirty = 1; - md_update_sb(mddev); + __md_update_sb(mddev); return 0; busy: @@ -2458,9 +2397,7 @@ static int hot_add_disk(mddev_t * mddev, kdev_t dev) mddev->sb->spare_disks++; mddev->sb->working_disks++; - mddev->sb_dirty = 1; - - md_update_sb(mddev); + __md_update_sb(mddev); /* * Kick recovery, maybe this spare has to be added to the @@ -2520,36 +2457,6 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) } #undef SET_SB -static int set_disk_info(mddev_t * mddev, void * arg) -{ - printk(KERN_INFO "md: not yet"); - return -EINVAL; -} - -static int clear_array(mddev_t * mddev) -{ - printk(KERN_INFO "md: not yet"); - return -EINVAL; -} - -static int write_raid_info(mddev_t * mddev) -{ - printk(KERN_INFO "md: not yet"); - return -EINVAL; -} - -static int protect_array(mddev_t * mddev) -{ - printk(KERN_INFO "md: not yet"); - return -EINVAL; -} - -static int unprotect_array(mddev_t * mddev) -{ - printk(KERN_INFO "md: not yet"); - return -EINVAL; -} - static int set_disk_faulty(mddev_t *mddev, kdev_t dev) { mdk_rdev_t *rdev; @@ -2595,7 +2502,7 @@ static int md_ioctl(struct inode *inode, struct file *file, case PRINT_RAID_DEBUG: err = 0; md_print_devices(); - goto done_unlock; + goto done; #ifndef MODULE case RAID_AUTORUN: @@ -2632,40 +2539,30 @@ static int md_ioctl(struct inode *inode, struct file *file, * Commands creating/starting a new array: */ - mddev = kdev_to_mddev(dev); + mddev = inode->i_bdev->bd_inode->u.generic_ip; - switch (cmd) - { - case SET_ARRAY_INFO: - case START_ARRAY: - if (mddev) { - printk(KERN_WARNING "md: array md%d already exists!\n", - mdidx(mddev)); - err = -EEXIST; - goto abort; - } - default:; + if (!mddev) { + BUG(); + goto abort; } + + err = mddev_lock(mddev); + if (err) { + printk(KERN_INFO "md: ioctl lock interrupted, reason %d, cmd %d\n", + err, cmd); + goto abort; + } + switch (cmd) { case SET_ARRAY_INFO: - mddev = alloc_mddev(dev); - if (!mddev) { - err = -ENOMEM; - goto abort; - } - atomic_inc(&mddev->active); - /* - * alloc_mddev() should possibly self-lock. - */ - err = lock_mddev(mddev); - if (err) { - printk(KERN_WARNING "md: ioctl, reason %d, cmd %d\n", - err, cmd); - goto abort; + if (!list_empty(&mddev->disks)) { + printk(KERN_WARNING "md: array md%d already has disks!\n", + mdidx(mddev)); + err = -EBUSY; + goto abort_unlock; } - if (mddev->sb) { printk(KERN_WARNING "md: array md%d already has a superblock!\n", mdidx(mddev)); @@ -2690,13 +2587,13 @@ static int md_ioctl(struct inode *inode, struct file *file, /* * possibly make it lock the array ... */ - err = autostart_array(val_to_kdev(arg), dev); + err = autostart_array(val_to_kdev(arg)); if (err) { printk(KERN_WARNING "md: autostart %s failed!\n", partition_name(val_to_kdev(arg))); - goto abort; + goto abort_unlock; } - goto done; + goto done_unlock; default:; } @@ -2704,16 +2601,6 @@ static int md_ioctl(struct inode *inode, struct file *file, /* * Commands querying/configuring an existing array: */ - - if (!mddev) { - err = -ENODEV; - goto abort; - } - err = lock_mddev(mddev); - if (err) { - printk(KERN_INFO "md: ioctl lock interrupted, reason %d, cmd %d\n",err, cmd); - goto abort; - } /* if we don't have a superblock yet, only ADD_NEW_DISK or STOP_ARRAY is allowed */ if (!mddev->sb && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY && cmd != RUN_ARRAY) { err = -ENODEV; @@ -2738,8 +2625,7 @@ static int md_ioctl(struct inode *inode, struct file *file, goto done_unlock; case STOP_ARRAY: - if (!(err = do_md_stop (mddev, 0))) - mddev = NULL; + err = do_md_stop (mddev, 0); goto done_unlock; case STOP_ARRAY_RO: @@ -2784,10 +2670,6 @@ static int md_ioctl(struct inode *inode, struct file *file, switch (cmd) { - case CLEAR_ARRAY: - err = clear_array(mddev); - goto done_unlock; - case ADD_NEW_DISK: { mdu_disk_info_t info; @@ -2808,35 +2690,12 @@ static int md_ioctl(struct inode *inode, struct file *file, err = hot_add_disk(mddev, val_to_kdev(arg)); goto done_unlock; - case SET_DISK_INFO: - err = set_disk_info(mddev, (void *)arg); - goto done_unlock; - - case WRITE_RAID_INFO: - err = write_raid_info(mddev); - goto done_unlock; - - case UNPROTECT_ARRAY: - err = unprotect_array(mddev); - goto done_unlock; - - case PROTECT_ARRAY: - err = protect_array(mddev); - goto done_unlock; - case SET_DISK_FAULTY: err = set_disk_faulty(mddev, val_to_kdev(arg)); goto done_unlock; case RUN_ARRAY: { -/* The data is never used.... - mdu_param_t param; - err = copy_from_user(¶m, (mdu_param_t *)arg, - sizeof(param)); - if (err) - goto abort_unlock; -*/ err = do_md_run (mddev); /* * we have to clean up the mess if @@ -2845,8 +2704,7 @@ static int md_ioctl(struct inode *inode, struct file *file, */ if (err) { mddev->sb_dirty = 0; - if (!do_md_stop (mddev, 0)) - mddev = NULL; + do_md_stop (mddev, 0); } goto done_unlock; } @@ -2861,8 +2719,7 @@ static int md_ioctl(struct inode *inode, struct file *file, done_unlock: abort_unlock: - if (mddev) - unlock_mddev(mddev); + mddev_unlock(mddev); return err; done: @@ -2875,19 +2732,34 @@ static int md_ioctl(struct inode *inode, struct file *file, static int md_open(struct inode *inode, struct file *file) { /* - * Always succeed, but increment the usage count + * Succeed if we can find or allocate a mddev structure. */ - mddev_t *mddev = kdev_to_mddev(inode->i_rdev); - if (mddev) - atomic_inc(&mddev->active); - return (0); + mddev_t *mddev = mddev_find(minor(inode->i_rdev)); + int err = -ENOMEM; + + if (!mddev) + goto out; + + if ((err = mddev_lock(mddev))) + goto put; + + err = 0; + mddev_unlock(mddev); + inode->i_bdev->bd_inode->u.generic_ip = mddev_get(mddev); + put: + mddev_put(mddev); + out: + return err; } static int md_release(struct inode *inode, struct file * file) { - mddev_t *mddev = kdev_to_mddev(inode->i_rdev); - if (mddev) - atomic_dec(&mddev->active); + mddev_t *mddev = inode->i_bdev->bd_inode->u.generic_ip; + + if (!mddev) + BUG(); + mddev_put(mddev); + return 0; } @@ -2918,6 +2790,7 @@ int md_thread(void * arg) */ daemonize(); + reparent_to_init(); sprintf(current->comm, thread->name); current->exit_signal = SIGCHLD; @@ -2941,17 +2814,10 @@ int md_thread(void * arg) complete(thread->event); while (thread->run) { void (*run)(void *data); - DECLARE_WAITQUEUE(wait, current); - add_wait_queue(&thread->wqueue, &wait); - set_task_state(current, TASK_INTERRUPTIBLE); - if (!test_bit(THREAD_WAKEUP, &thread->flags)) { - dprintk("md: thread %p went to sleep.\n", thread); - schedule(); - dprintk("md: thread %p woke up.\n", thread); - } - current->state = TASK_RUNNING; - remove_wait_queue(&thread->wqueue, &wait); + wait_event_interruptible(thread->wqueue, + test_bit(THREAD_WAKEUP, &thread->flags)); + clear_bit(THREAD_WAKEUP, &thread->flags); run = thread->run; @@ -3026,7 +2892,7 @@ void md_unregister_thread(mdk_thread_t *thread) kfree(thread); } -void md_recover_arrays(void) +static void md_recover_arrays(void) { if (!md_recovery_thread) { MD_BUG(); @@ -3042,7 +2908,7 @@ int md_error(mddev_t *mddev, struct block_device *bdev) kdev_t rdev = to_kdev_t(bdev->bd_dev); dprintk("md_error dev:(%d:%d), rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", - major(dev),minor(dev),major(rdev),minor(rdev), + MD_MAJOR,mdidx(mddev),major(rdev),minor(rdev), __builtin_return_address(0),__builtin_return_address(1), __builtin_return_address(2),__builtin_return_address(3)); @@ -3055,17 +2921,14 @@ int md_error(mddev_t *mddev, struct block_device *bdev) return 0; if (!mddev->pers->error_handler || mddev->pers->error_handler(mddev,rdev) <= 0) { - free_disk_sb(rrdev); rrdev->faulty = 1; } else return 1; /* * if recovery was running, stop it now. */ - if (mddev->pers->stop_resync) - mddev->pers->stop_resync(mddev); - if (mddev->recovery_running) - md_interrupt_thread(md_recovery_thread); + if (mddev->recovery_running) + mddev->recovery_running = -EIO; md_recover_arrays(); return 0; @@ -3080,7 +2943,7 @@ static int status_unused(char * page) sz += sprintf(page + sz, "unused devices: "); ITERATE_RDEV_ALL(rdev,tmp) { - if (!rdev->same_set.next && !rdev->same_set.prev) { + if (list_empty(&rdev->same_set)) { /* * The device is not yet used by any array. */ @@ -3123,18 +2986,9 @@ static int status_resync(char * page, mddev_t * mddev) sz += sprintf(page + sz, "."); sz += sprintf(page + sz, "] "); } - if (!mddev->recovery_running) - /* - * true resync - */ - sz += sprintf(page + sz, " resync =%3lu.%lu%% (%lu/%lu)", - res/10, res % 10, resync, max_blocks); - else - /* - * recovery ... - */ - sz += sprintf(page + sz, " recovery =%3lu.%lu%% (%lu/%lu)", - res/10, res % 10, resync, max_blocks); + sz += sprintf(page + sz, " %s =%3lu.%lu%% (%lu/%lu)", + (mddev->spare ? "recovery" : "resync"), + res/10, res % 10, resync, max_blocks); /* * We do not want to overflow, so the order of operands and @@ -3172,7 +3026,7 @@ static int md_status_read_proc(char *page, char **start, off_t off, sz += sprintf(page+sz, "\n"); - ITERATE_MDDEV(mddev,tmp) { + ITERATE_MDDEV(mddev,tmp) if (mddev_lock(mddev)==0) { sz += sprintf(page + sz, "md%d : %sactive", mdidx(mddev), mddev->pers ? "" : "in"); if (mddev->pers) { @@ -3192,7 +3046,7 @@ static int md_status_read_proc(char *page, char **start, off_t off, size += rdev->size; } - if (mddev->nb_dev) { + if (!list_empty(&mddev->disks)) { if (mddev->pers) sz += sprintf(page + sz, "\n %d blocks", md_size[mdidx(mddev)]); @@ -3202,19 +3056,20 @@ static int md_status_read_proc(char *page, char **start, off_t off, if (!mddev->pers) { sz += sprintf(page+sz, "\n"); + mddev_unlock(mddev); continue; } sz += mddev->pers->status (page+sz, mddev); sz += sprintf(page+sz, "\n "); - if (mddev->curr_resync) { + if (mddev->curr_resync > 1) sz += status_resync (page+sz, mddev); - } else { - if (atomic_read(&mddev->resync_sem.count) != 1) + else if (mddev->curr_resync == 1) sz += sprintf(page + sz, " resync=DELAYED"); - } + sz += sprintf(page + sz, "\n"); + mddev_unlock(mddev); } sz += status_unused(page + sz); @@ -3315,60 +3170,70 @@ static int is_mddev_idle(mddev_t *mddev) return idle; } -DECLARE_WAIT_QUEUE_HEAD(resync_wait); - void md_done_sync(mddev_t *mddev, int blocks, int ok) { /* another "blocks" (512byte) blocks have been synced */ atomic_sub(blocks, &mddev->recovery_active); wake_up(&mddev->recovery_wait); if (!ok) { + mddev->recovery_running = -EIO; + md_recover_arrays(); // stop recovery, signal do_sync .... } } + +DECLARE_WAIT_QUEUE_HEAD(resync_wait); + #define SYNC_MARKS 10 #define SYNC_MARK_STEP (3*HZ) -int md_do_sync(mddev_t *mddev, mdp_disk_t *spare) +static void md_do_sync(void *data) { + mddev_t *mddev = data; mddev_t *mddev2; unsigned int max_sectors, currspeed = 0, - j, window, err, serialize; + j, window, err; unsigned long mark[SYNC_MARKS]; unsigned long mark_cnt[SYNC_MARKS]; int last_mark,m; struct list_head *tmp; unsigned long last_check; + /* just incase thread restarts... */ + if (mddev->recovery_running <= 0) + return; - err = down_interruptible(&mddev->resync_sem); - if (err) - goto out_nolock; + /* we overload curr_resync somewhat here. + * 0 == not engaged in resync at all + * 2 == checking that there is no conflict with another sync + * 1 == like 2, but have yielded to allow conflicting resync to + * commense + * other == active in resync - this many blocks + */ + do { + mddev->curr_resync = 2; -recheck: - serialize = 0; - ITERATE_MDDEV(mddev2,tmp) { - if (mddev2 == mddev) - continue; - if (mddev2->curr_resync && match_mddev_units(mddev,mddev2)) { - printk(KERN_INFO "md: delaying resync of md%d until md%d " - "has finished resync (they share one or more physical units)\n", - mdidx(mddev), mdidx(mddev2)); - serialize = 1; - break; - } - } - if (serialize) { - interruptible_sleep_on(&resync_wait); - if (signal_pending(current)) { - flush_curr_signals(); - err = -EINTR; - goto out; + ITERATE_MDDEV(mddev2,tmp) { + if (mddev2 == mddev) + continue; + if (mddev2->curr_resync && + match_mddev_units(mddev,mddev2)) { + printk(KERN_INFO "md: delaying resync of md%d until md%d " + "has finished resync (they share one or more physical units)\n", + mdidx(mddev), mdidx(mddev2)); + if (mddev < mddev2) /* arbitrarily yield */ + mddev->curr_resync = 1; + if (wait_event_interruptible(resync_wait, + mddev2->curr_resync < 2)) { + flush_curr_signals(); + err = -EINTR; + mddev_put(mddev2); + goto out; + } + } } - goto recheck; - } + } while (mddev->curr_resync < 2); - mddev->curr_resync = 1; max_sectors = mddev->sb->size << 1; printk(KERN_INFO "md: syncing RAID array md%d\n", mdidx(mddev)); @@ -3406,7 +3271,7 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare) } atomic_add(sectors, &mddev->recovery_active); j += sectors; - mddev->curr_resync = j; + if (j>1) mddev->curr_resync = j; if (last_check + window > j) continue; @@ -3432,7 +3297,6 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare) /* * got a signal, exit. */ - mddev->curr_resync = 0; printk(KERN_INFO "md: md_do_sync() got signal ... exiting\n"); flush_curr_signals(); err = -EINTR; @@ -3467,106 +3331,116 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare) */ out: wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); - up(&mddev->resync_sem); -out_nolock: + /* tell personality that we are finished */ + mddev->pers->sync_request(mddev, max_sectors, 1); + mddev->curr_resync = 0; - wake_up(&resync_wait); - return err; + if (err) + mddev->recovery_running = err; + if (mddev->recovery_running > 0) + mddev->recovery_running = 0; + if (mddev->recovery_running == 0) + mddev->in_sync = 1; + md_recover_arrays(); } /* - * This is a kernel thread which syncs a spare disk with the active array - * - * the amount of foolproofing might seem to be a tad excessive, but an - * early (not so error-safe) version of raid1syncd synced the first 0.5 gigs - * of my root partition with the first 0.5 gigs of my /home partition ... so - * i'm a bit nervous ;) + * This is the kernel thread that watches all md arrays for re-sync action + * that might be needed. + * It does not do any resync itself, but rather "forks" off other threads + * to do that as needed. + * When it is determined that resync is needed, we set "->recovery_running" and + * create a thread at ->sync_thread. + * When the thread finishes is clears recovery_running (or set and error) + * and wakeup up this thread which will reap the thread and finish up. */ void md_do_recovery(void *data) { - int err; mddev_t *mddev; mdp_super_t *sb; - mdp_disk_t *spare; struct list_head *tmp; - printk(KERN_INFO "md: recovery thread got woken up ...\n"); -restart: - ITERATE_MDDEV(mddev,tmp) { + dprintk(KERN_INFO "md: recovery thread got woken up ...\n"); + + ITERATE_MDDEV(mddev,tmp) if (mddev_lock(mddev)==0) { sb = mddev->sb; - if (!sb) - continue; - if (mddev->recovery_running) - continue; - if (sb->active_disks == sb->raid_disks) - continue; - if (!sb->spare_disks) { - printk(KERN_ERR "md%d: no spare disk to reconstruct array! " - "-- continuing in degraded mode\n", mdidx(mddev)); - continue; - } - /* - * now here we get the spare and resync it. - */ - spare = get_spare(mddev); - if (!spare) - continue; - printk(KERN_INFO "md%d: resyncing spare disk %s to replace failed disk\n", - mdidx(mddev), partition_name(mk_kdev(spare->major,spare->minor))); - if (!mddev->pers->diskop) - continue; - if (mddev->pers->diskop(mddev, &spare, DISKOP_SPARE_WRITE)) - continue; - down(&mddev->recovery_sem); - mddev->recovery_running = 1; - err = md_do_sync(mddev, spare); - if (err == -EIO) { - printk(KERN_INFO "md%d: spare disk %s failed, skipping to next spare.\n", - mdidx(mddev), partition_name(mk_kdev(spare->major,spare->minor))); - if (!disk_faulty(spare)) { - mddev->pers->diskop(mddev,&spare,DISKOP_SPARE_INACTIVE); - mark_disk_faulty(spare); - mark_disk_nonsync(spare); - mark_disk_inactive(spare); - sb->spare_disks--; - sb->working_disks--; - sb->failed_disks++; + if (!sb || !mddev->pers || !mddev->pers->diskop || mddev->ro) + goto unlock; + if (mddev->recovery_running > 0) + /* resync/recovery still happening */ + goto unlock; + if (mddev->sync_thread) { + /* resync has finished, collect result */ + md_unregister_thread(mddev->sync_thread); + mddev->sync_thread = NULL; + if (mddev->recovery_running < 0) { + /* some sort of failure. + * If we were doing a reconstruction, + * we need to retrieve the spare + */ + if (mddev->spare) { + mddev->pers->diskop(mddev, &mddev->spare, + DISKOP_SPARE_INACTIVE); + mddev->spare = NULL; + } + } else { + /* success...*/ + if (mddev->spare) { + mddev->pers->diskop(mddev, &mddev->spare, + DISKOP_SPARE_ACTIVE); + mark_disk_sync(mddev->spare); + mark_disk_active(mddev->spare); + sb->active_disks++; + sb->spare_disks--; + mddev->spare = NULL; + } } - } else - if (disk_faulty(spare)) - mddev->pers->diskop(mddev, &spare, - DISKOP_SPARE_INACTIVE); - if (err == -EINTR || err == -ENOMEM) { - /* - * Recovery got interrupted, or ran out of mem ... - * signal back that we have finished using the array. - */ - mddev->pers->diskop(mddev, &spare, - DISKOP_SPARE_INACTIVE); - up(&mddev->recovery_sem); + __md_update_sb(mddev); mddev->recovery_running = 0; - continue; - } else { + wake_up(&resync_wait); + goto unlock; + } + if (mddev->recovery_running) { + /* that's odd.. */ mddev->recovery_running = 0; - up(&mddev->recovery_sem); + wake_up(&resync_wait); } - if (!disk_faulty(spare)) { - /* - * the SPARE_ACTIVE diskop possibly changes the - * pointer too - */ - mddev->pers->diskop(mddev, &spare, DISKOP_SPARE_ACTIVE); - mark_disk_sync(spare); - mark_disk_active(spare); - sb->active_disks++; - sb->spare_disks--; + + if (sb->active_disks < sb->raid_disks) { + mddev->spare = get_spare(mddev); + if (!mddev->spare) + printk(KERN_ERR "md%d: no spare disk to reconstruct array! " + "-- continuing in degraded mode\n", mdidx(mddev)); + else + printk(KERN_INFO "md%d: resyncing spare disk %s to replace failed disk\n", + mdidx(mddev), partition_name(mk_kdev(mddev->spare->major,mddev->spare->minor))); + } + if (!mddev->spare && mddev->in_sync) { + /* nothing we can do ... */ + goto unlock; + } + if (mddev->pers->sync_request) { + mddev->sync_thread = md_register_thread(md_do_sync, + mddev, + "md_resync"); + if (!mddev->sync_thread) { + printk(KERN_ERR "md%d: could not start resync thread...\n", mdidx(mddev)); + if (mddev->spare) + mddev->pers->diskop(mddev, &mddev->spare, DISKOP_SPARE_INACTIVE); + mddev->spare = NULL; + mddev->recovery_running = 0; + } else { + if (mddev->spare) + mddev->pers->diskop(mddev, &mddev->spare, DISKOP_SPARE_WRITE); + mddev->recovery_running = 1; + md_wakeup_thread(mddev->sync_thread); + } } - mddev->sb_dirty = 1; - md_update_sb(mddev); - goto restart; + unlock: + mddev_unlock(mddev); } - printk(KERN_INFO "md: recovery thread finished ...\n"); + dprintk(KERN_INFO "md: recovery thread finished ...\n"); } @@ -3582,7 +3456,8 @@ int md_notify_reboot(struct notifier_block *this, return NOTIFY_DONE; ITERATE_MDDEV(mddev,tmp) - do_md_stop (mddev, 1); + if (mddev_trylock(mddev)==0) + do_md_stop (mddev, 1); /* * certain more exotic SCSI devices are known to be * volatile wrt too early system reboots. While the @@ -3606,7 +3481,6 @@ static void md_geninit(void) for(i = 0; i < MAX_MD_DEVS; i++) { md_size[i] = 0; - md_maxreadahead[i] = 32; } blk_size[MAJOR_NR] = md_size; @@ -3617,6 +3491,18 @@ static void md_geninit(void) #endif } +request_queue_t * md_queue_proc(kdev_t dev) +{ + mddev_t *mddev = mddev_find(minor(dev)); + request_queue_t *q = BLK_DEFAULT_QUEUE(MAJOR_NR); + if (!mddev || atomic_read(&mddev->active)<2) + BUG(); + if (mddev->pers) + q = &mddev->queue; + mddev_put(mddev); /* the caller must hold a reference... */ + return q; +} + int __init md_init(void) { static char * name = "mdrecoveryd"; @@ -3641,8 +3527,9 @@ int __init md_init(void) S_IFBLK | S_IRUSR | S_IWUSR, &md_fops, NULL); } - /* forward all md request to md_make_request */ - blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), md_make_request); + /* all requests on an uninitialised device get failed... */ + blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), md_fail_request); + blk_dev[MAJOR_NR].queue = md_queue_proc; add_gendisk(&md_gendisk); @@ -3720,7 +3607,7 @@ static void autostart_arrays(void) } dev_cnt = 0; - autorun_devices(to_kdev_t(-1)); + autorun_devices(); } static struct { @@ -3859,17 +3746,27 @@ void __init md_setup_drive(void) if (!md_setup_args.device_set[minor]) continue; - if (mddev_map[minor].mddev) { + printk(KERN_INFO "md: Loading md%d: %s\n", minor, md_setup_args.device_names[minor]); + + mddev = mddev_find(minor); + if (!mddev) { + printk(KERN_ERR "md: kmalloc failed - cannot start array %d\n", minor); + continue; + } + if (mddev_lock(mddev)) { printk(KERN_WARNING - "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n", + "md: Ignoring md=%d, cannot lock!\n", minor); + mddev_put(mddev); continue; } - printk(KERN_INFO "md: Loading md%d: %s\n", minor, md_setup_args.device_names[minor]); - mddev = alloc_mddev(mk_kdev(MD_MAJOR,minor)); - if (!mddev) { - printk(KERN_ERR "md: kmalloc failed - cannot start array %d\n", minor); + if (mddev->sb || !list_empty(&mddev->disks)) { + printk(KERN_WARNING + "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n", + minor); + mddev_unlock(mddev); + mddev_put(mddev); continue; } if (md_setup_args.pers[minor]) { @@ -3923,6 +3820,8 @@ void __init md_setup_drive(void) do_md_stop(mddev, 0); printk(KERN_WARNING "md: starting md%d failed\n", minor); } + mddev_unlock(mddev); + mddev_put(mddev); } } @@ -3973,9 +3872,10 @@ int init_module(void) static void free_device_names(void) { - while (device_names.next != &device_names) { - struct list_head *tmp = device_names.next; - list_del(tmp); + while (!list_empty(&device_names)) { + struct dname *tmp = list_entry(device_names.next, + dev_name_t, list); + list_del(&tmp->list); kfree(tmp); } } @@ -4006,10 +3906,8 @@ EXPORT_SYMBOL(register_md_personality); EXPORT_SYMBOL(unregister_md_personality); EXPORT_SYMBOL(partition_name); EXPORT_SYMBOL(md_error); -EXPORT_SYMBOL(md_do_sync); EXPORT_SYMBOL(md_sync_acct); EXPORT_SYMBOL(md_done_sync); -EXPORT_SYMBOL(md_recover_arrays); EXPORT_SYMBOL(md_register_thread); EXPORT_SYMBOL(md_unregister_thread); EXPORT_SYMBOL(md_update_sb); @@ -4017,7 +3915,5 @@ EXPORT_SYMBOL(md_wakeup_thread); EXPORT_SYMBOL(md_print_devices); EXPORT_SYMBOL(find_rdev_nr); EXPORT_SYMBOL(md_interrupt_thread); -EXPORT_SYMBOL(mddev_map); -EXPORT_SYMBOL(md_check_ordering); EXPORT_SYMBOL(get_spare); MODULE_LICENSE("GPL"); diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index 6db555317b135bff56226ea9f28189df91b5eff6..32dc200aee669366284a86f619afc34865ef67fa 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c @@ -244,27 +244,19 @@ static int multipath_read_balance (multipath_conf_t *conf) return 0; } -static int multipath_make_request (mddev_t *mddev, int rw, struct bio * bio) +static int multipath_make_request (request_queue_t *q, struct bio * bio) { + mddev_t *mddev = q->queuedata; multipath_conf_t *conf = mddev_to_conf(mddev); struct bio *real_bio; struct multipath_bh * mp_bh; struct multipath_info *multipath; -/* - * make_request() can abort the operation when READA is being - * used and no empty request is available. - * - * Currently, just replace the command with READ/WRITE. - */ - if (rw == READA) - rw = READ; - mp_bh = multipath_alloc_mpbh (conf); mp_bh->master_bio = bio; mp_bh->mddev = mddev; - mp_bh->cmd = rw; + mp_bh->cmd = bio_data_dir(bio); /* * read balancing logic: @@ -273,7 +265,7 @@ static int multipath_make_request (mddev_t *mddev, int rw, struct bio * bio) real_bio = bio_clone(bio, GFP_NOIO); real_bio->bi_bdev = multipath->bdev; - real_bio->bi_rw = rw; + real_bio->bi_rw = bio_data_dir(bio); real_bio->bi_end_io = multipath_end_request; real_bio->bi_private = mp_bh; mp_bh->bio = real_bio; @@ -708,7 +700,6 @@ static void multipathd (void *data) mddev = mp_bh->mddev; if (mddev->sb_dirty) { printk(KERN_INFO "dirty sb detected, updating.\n"); - mddev->sb_dirty = 0; md_update_sb(mddev); } bio = mp_bh->bio; diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 8f149a1efe1b75b56963ff1b0fcfc7b1550fc038..2dd6e9d5f9851fce3bcee2e13764574535bbe295 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -29,21 +29,26 @@ static int create_strip_zones (mddev_t *mddev) { - int i, c, j, j1, j2; + int i, c, j; unsigned long current_offset, curr_zone_offset; raid0_conf_t *conf = mddev_to_conf(mddev); mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; + struct list_head *tmp1, *tmp2; + struct strip_zone *zone; + int cnt; /* * The number of 'same size groups' */ conf->nr_strip_zones = 0; - ITERATE_RDEV_ORDERED(mddev,rdev1,j1) { + ITERATE_RDEV(mddev,rdev1,tmp1) { printk("raid0: looking at %s\n", partition_name(rdev1->dev)); c = 0; - ITERATE_RDEV_ORDERED(mddev,rdev2,j2) { - printk("raid0: comparing %s(%ld) with %s(%ld)\n", partition_name(rdev1->dev), rdev1->size, partition_name(rdev2->dev), rdev2->size); + ITERATE_RDEV(mddev,rdev2,tmp2) { + printk("raid0: comparing %s(%ld) with %s(%ld)\n", + partition_name(rdev1->dev), rdev1->size, + partition_name(rdev2->dev), rdev2->size); if (rdev2 == rdev1) { printk("raid0: END\n"); break; @@ -51,7 +56,7 @@ static int create_strip_zones (mddev_t *mddev) if (rdev2->size == rdev1->size) { /* - * Not unique, dont count it as a new + * Not unique, don't count it as a new * group */ printk("raid0: EQUAL\n"); @@ -66,29 +71,62 @@ static int create_strip_zones (mddev_t *mddev) printk("raid0: %d zones\n", conf->nr_strip_zones); } } - printk("raid0: FINAL %d zones\n", conf->nr_strip_zones); + printk("raid0: FINAL %d zones\n", conf->nr_strip_zones); conf->strip_zone = vmalloc(sizeof(struct strip_zone)* conf->nr_strip_zones); if (!conf->strip_zone) return 1; + memset(conf->strip_zone, 0,sizeof(struct strip_zone)* + conf->nr_strip_zones); + /* The first zone must contain all devices, so here we check that + * there is a properly alignment of slots to devices and find them all + */ + zone = &conf->strip_zone[0]; + cnt = 0; + smallest = NULL; + ITERATE_RDEV(mddev, rdev1, tmp1) { + int j = rdev1->sb->this_disk.raid_disk; + + if (j < 0 || j >= mddev->sb->raid_disks) { + printk("raid0: bad disk number %d - aborting!\n", j); + goto abort; + } + if (zone->dev[j]) { + printk("raid0: multiple devices for %d - aborting!\n", j); + goto abort; + } + zone->dev[j] = rdev1; + if (!smallest || (rdev1->size <smallest->size)) + smallest = rdev1; + cnt++; + } + if (cnt != mddev->sb->raid_disks) { + printk("raid0: too few disks (%d of %d) - aborting!\n", cnt, + mddev->sb->raid_disks); + goto abort; + } + zone->nb_dev = cnt; + zone->size = smallest->size * cnt; + zone->zone_offset = 0; - conf->smallest = NULL; - current_offset = 0; - curr_zone_offset = 0; + conf->smallest = zone; + current_offset = smallest->size; + curr_zone_offset = zone->size; - for (i = 0; i < conf->nr_strip_zones; i++) + /* now do the other zones */ + for (i = 1; i < conf->nr_strip_zones; i++) { - struct strip_zone *zone = conf->strip_zone + i; + zone = conf->strip_zone + i; printk("raid0: zone %d\n", i); zone->dev_offset = current_offset; smallest = NULL; c = 0; - ITERATE_RDEV_ORDERED(mddev,rdev,j) { - + for (j=0; j<cnt; j++) { + rdev = conf->strip_zone[0].dev[j]; printk("raid0: checking %s ...", partition_name(rdev->dev)); if (rdev->size > current_offset) { @@ -118,6 +156,9 @@ static int create_strip_zones (mddev_t *mddev) } printk("raid0: done.\n"); return 0; + abort: + vfree(conf->strip_zone); + return 1; } static int raid0_run (mddev_t *mddev) @@ -132,11 +173,6 @@ static int raid0_run (mddev_t *mddev) goto out; mddev->private = (void *)conf; - if (md_check_ordering(mddev)) { - printk("raid0: disks are not ordered, aborting!\n"); - goto out_free_conf; - } - if (create_strip_zones (mddev)) goto out_free_conf; @@ -225,8 +261,9 @@ static int raid0_stop (mddev_t *mddev) * Of course, those facts may not be valid anymore (and surely won't...) * Hey guys, there's some work out there ;-) */ -static int raid0_make_request (mddev_t *mddev, int rw, struct bio *bio) +static int raid0_make_request (request_queue_t *q, struct bio *bio) { + mddev_t *mddev = q->queuedata; unsigned int sect_in_chunk, chunksize_bits, chunk_size; raid0_conf_t *conf = mddev_to_conf(mddev); struct raid0_hash *hash; @@ -234,7 +271,7 @@ static int raid0_make_request (mddev_t *mddev, int rw, struct bio *bio) mdk_rdev_t *tmp_dev; unsigned long chunk, block, rsect; - chunk_size = mddev->param.chunk_size >> 10; + chunk_size = mddev->sb->chunk_size >> 10; chunksize_bits = ffz(~chunk_size); block = bio->bi_sector >> 1; hash = conf->hash_table + block / conf->smallest->size; @@ -323,7 +360,7 @@ static int raid0_status (char *page, mddev_t *mddev) conf->strip_zone[j].size); } #endif - sz += sprintf(page + sz, " %dk chunks", mddev->param.chunk_size/1024); + sz += sprintf(page + sz, " %dk chunks", mddev->sb->chunk_size/1024); return sz; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 96ad858cf033681183b218bb91cafad8a0c6a79a..4c855576f9fecf5b9f42a1661332a37d639460d8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -334,7 +334,7 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio) * device if no resync is going on, or below the resync window. * We take the first readable disk when above the resync window. */ - if (conf->resync_mirrors && (this_sector + sectors >= conf->next_resync)) { + if (!conf->mddev->in_sync && (this_sector + sectors >= conf->next_resync)) { /* make sure that disk is operational */ new_disk = 0; while (!conf->mirrors[new_disk].operational || conf->mirrors[new_disk].write_only) { @@ -434,8 +434,9 @@ static void resume_device(conf_t *conf) spin_unlock_irq(&conf->resync_lock); } -static int make_request(mddev_t *mddev, int rw, struct bio * bio) +static int make_request(request_queue_t *q, struct bio * bio) { + mddev_t *mddev = q->queuedata; conf_t *conf = mddev_to_conf(mddev); mirror_info_t *mirror; r1bio_t *r1_bio; @@ -456,20 +457,16 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio) * make_request() can abort the operation when READA is being * used and no empty request is available. * - * Currently, just replace the command with READ. */ - if (rw == READA) - rw = READ; - r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); r1_bio->master_bio = bio; r1_bio->mddev = mddev; r1_bio->sector = bio->bi_sector; - r1_bio->cmd = rw; + r1_bio->cmd = bio_data_dir(bio); - if (rw == READ) { + if (r1_bio->cmd == READ) { /* * read balancing logic: */ @@ -483,7 +480,7 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio) read_bio->bi_sector = r1_bio->sector; read_bio->bi_bdev = mirror->bdev; read_bio->bi_end_io = end_request; - read_bio->bi_rw = rw; + read_bio->bi_rw = r1_bio->cmd; read_bio->bi_private = r1_bio; generic_make_request(read_bio); @@ -507,7 +504,7 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio) mbio->bi_sector = r1_bio->sector; mbio->bi_bdev = conf->mirrors[i].bdev; mbio->bi_end_io = end_request; - mbio->bi_rw = rw; + mbio->bi_rw = r1_bio->cmd; mbio->bi_private = r1_bio; sum_bios++; @@ -656,6 +653,9 @@ static void close_sync(conf_t *conf) if (conf->barrier) BUG(); if (waitqueue_active(&conf->wait_idle)) BUG(); if (waitqueue_active(&conf->wait_resume)) BUG(); + + mempool_destroy(conf->r1buf_pool); + conf->r1buf_pool = NULL; } static int diskop(mddev_t *mddev, mdp_disk_t **d, int state) @@ -772,7 +772,6 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state) * Deactivate a spare disk: */ case DISKOP_SPARE_INACTIVE: - close_sync(conf); sdisk = conf->mirrors + spare_disk; sdisk->operational = 0; sdisk->write_only = 0; @@ -785,7 +784,6 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state) * property) */ case DISKOP_SPARE_ACTIVE: - close_sync(conf); sdisk = conf->mirrors + spare_disk; fdisk = conf->mirrors + failed_disk; @@ -919,10 +917,6 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state) } abort: spin_unlock_irq(&conf->device_lock); - if (state == DISKOP_SPARE_ACTIVE || state == DISKOP_SPARE_INACTIVE) { - mempool_destroy(conf->r1buf_pool); - conf->r1buf_pool = NULL; - } print_conf(conf); return err; @@ -1012,7 +1006,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) * we read from here, no need to write */ continue; - if (i < conf->raid_disks && !conf->resync_mirrors) + if (i < conf->raid_disks && mddev->in_sync) /* * don't need to write this we are just rebuilding */ @@ -1088,7 +1082,6 @@ static void raid1d(void *data) conf = mddev_to_conf(mddev); if (mddev->sb_dirty) { printk(KERN_INFO "raid1: dirty sb detected, updating.\n"); - mddev->sb_dirty = 0; md_update_sb(mddev); } bio = r1_bio->master_bio; @@ -1118,31 +1111,6 @@ static void raid1d(void *data) spin_unlock_irqrestore(&retry_list_lock, flags); } -/* - * Private kernel thread to reconstruct mirrors after an unclean - * shutdown. - */ -static void raid1syncd(void *data) -{ - conf_t *conf = data; - mddev_t *mddev = conf->mddev; - - if (!conf->resync_mirrors) - return; - if (conf->resync_mirrors == 2) - return; - down(&mddev->recovery_sem); - if (!md_do_sync(mddev, NULL)) { - /* - * Only if everything went Ok. - */ - conf->resync_mirrors = 0; - } - - close_sync(conf); - - up(&mddev->recovery_sem); -} static int init_resync(conf_t *conf) { @@ -1177,9 +1145,16 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) sector_t max_sector, nr_sectors; int disk, partial; - if (!sector_nr) + if (sector_nr == 0) if (init_resync(conf)) return -ENOMEM; + + max_sector = mddev->sb->size << 1; + if (sector_nr >= max_sector) { + close_sync(conf); + return 0; + } + /* * If there is non-resync activity waiting for us then * put in a delay to throttle resync. @@ -1216,10 +1191,6 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) r1_bio->sector = sector_nr; r1_bio->cmd = SPECIAL; - max_sector = mddev->sb->size << 1; - if (sector_nr >= max_sector) - BUG(); - bio = r1_bio->master_bio; nr_sectors = RESYNC_BLOCK_SIZE >> 9; if (max_sector - sector_nr < nr_sectors) @@ -1302,7 +1273,6 @@ static int run(mddev_t *mddev) mdp_disk_t *descriptor; mdk_rdev_t *rdev; struct list_head *tmp; - int start_recovery = 0; MOD_INC_USE_COUNT; @@ -1454,10 +1424,6 @@ static int run(mddev_t *mddev) conf->last_used = j; - if (conf->working_disks != sb->raid_disks) { - printk(KERN_ALERT "raid1: md%d, not all disks are operational -- trying to recover array\n", mdidx(mddev)); - start_recovery = 1; - } { const char * name = "raid1d"; @@ -1469,20 +1435,6 @@ static int run(mddev_t *mddev) } } - if (!start_recovery && !(sb->state & (1 << MD_SB_CLEAN)) && - (conf->working_disks > 1)) { - const char * name = "raid1syncd"; - - conf->resync_thread = md_register_thread(raid1syncd, conf, name); - if (!conf->resync_thread) { - printk(THREAD_ERROR, mdidx(mddev)); - goto out_free_conf; - } - - printk(START_RESYNC, mdidx(mddev)); - conf->resync_mirrors = 1; - md_wakeup_thread(conf->resync_thread); - } /* * Regenerate the "device is in sync with the raid set" bit for @@ -1499,10 +1451,6 @@ static int run(mddev_t *mddev) } sb->active_disks = conf->working_disks; - if (start_recovery) - md_recover_arrays(); - - printk(ARRAY_IS_ACTIVE, mdidx(mddev), sb->active_disks, sb->raid_disks); /* * Ok, everything is just fine now @@ -1522,47 +1470,12 @@ static int run(mddev_t *mddev) return -EIO; } -static int stop_resync(mddev_t *mddev) -{ - conf_t *conf = mddev_to_conf(mddev); - - if (conf->resync_thread) { - if (conf->resync_mirrors) { - conf->resync_mirrors = 2; - md_interrupt_thread(conf->resync_thread); - - printk(KERN_INFO "raid1: mirror resync was not fully finished, restarting next time.\n"); - return 1; - } - return 0; - } - return 0; -} - -static int restart_resync(mddev_t *mddev) -{ - conf_t *conf = mddev_to_conf(mddev); - - if (conf->resync_mirrors) { - if (!conf->resync_thread) { - MD_BUG(); - return 0; - } - conf->resync_mirrors = 1; - md_wakeup_thread(conf->resync_thread); - return 1; - } - return 0; -} - static int stop(mddev_t *mddev) { conf_t *conf = mddev_to_conf(mddev); int i; md_unregister_thread(conf->thread); - if (conf->resync_thread) - md_unregister_thread(conf->resync_thread); if (conf->r1bio_pool) mempool_destroy(conf->r1bio_pool); for (i = 0; i < MD_SB_DISKS; i++) @@ -1583,8 +1496,6 @@ static mdk_personality_t raid1_personality = status: status, error_handler: error, diskop: diskop, - stop_resync: stop_resync, - restart_resync: restart_resync, sync_request: sync_request }; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 62873d89e395c0734759b5540a1dc5c8824ee45f..f19d8d936f44ae2c53bc3b15403f2b9e78dc6fc4 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -634,7 +634,6 @@ static void copy_data(int frombio, struct bio *bio, else page_offset = (signed)(sector - bio->bi_sector) * -512; bio_for_each_segment(bvl, bio, i) { - char *ba = __bio_kmap(bio, i); int len = bio_iovec_idx(bio,i)->bv_len; int clen; int b_offset = 0; @@ -649,13 +648,16 @@ static void copy_data(int frombio, struct bio *bio, clen = STRIPE_SIZE - page_offset; else clen = len; - if (len > 0) { + if (clen > 0) { + char *ba = __bio_kmap(bio, i); if (frombio) memcpy(pa+page_offset, ba+b_offset, clen); else memcpy(ba+b_offset, pa+page_offset, clen); - } - __bio_kunmap(bio, i); + __bio_kunmap(bio, i); + } + if (clen < len) /* hit end of page */ + break; page_offset += len; } } @@ -810,6 +812,8 @@ static void add_stripe_bio (struct stripe_head *sh, struct bio *bi, int dd_idx, spin_unlock_irq(&conf->device_lock); spin_unlock(&sh->lock); + PRINTK("added bi b#%lu to stripe s#%lu, disk %d.\n", bi->bi_sector, sh->sector, dd_idx); + if (forwrite) { /* check if page is coverred */ sector_t sector = sh->dev[dd_idx].sector; @@ -823,8 +827,6 @@ static void add_stripe_bio (struct stripe_head *sh, struct bio *bi, int dd_idx, if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); } - - PRINTK("added bi b#%lu to stripe s#%lu, disk %d.\n", bi->bi_sector, sh->sector, dd_idx); } @@ -1036,7 +1038,7 @@ static void handle_stripe(struct stripe_head *sh) ) && !test_bit(R5_UPTODATE, &dev->flags)) { if (conf->disks[i].operational -/* && !(conf->resync_parity && i == sh->pd_idx) */ +/* && !(!mddev->insync && i == sh->pd_idx) */ ) rmw++; else rmw += 2*disks; /* cannot read it */ @@ -1226,14 +1228,15 @@ static inline void raid5_activate_delayed(raid5_conf_t *conf) } static void raid5_unplug_device(void *data) { - raid5_conf_t *conf = (raid5_conf_t *)data; + request_queue_t *q = data; + mddev_t *mddev = q->queuedata; + raid5_conf_t *conf = mddev_to_conf(mddev); unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); - raid5_activate_delayed(conf); - - conf->plugged = 0; + if (blk_remove_plug(q)) + raid5_activate_delayed(conf); md_wakeup_thread(conf->thread); spin_unlock_irqrestore(&conf->device_lock, flags); @@ -1242,31 +1245,21 @@ static void raid5_unplug_device(void *data) static inline void raid5_plug_device(raid5_conf_t *conf) { spin_lock_irq(&conf->device_lock); - if (list_empty(&conf->delayed_list)) - if (!conf->plugged) { - conf->plugged = 1; - queue_task(&conf->plug_tq, &tq_disk); - } + blk_plug_device(&conf->mddev->queue); spin_unlock_irq(&conf->device_lock); } -static int make_request (mddev_t *mddev, int rw, struct bio * bi) +static int make_request (request_queue_t *q, struct bio * bi) { - raid5_conf_t *conf = (raid5_conf_t *) mddev->private; + mddev_t *mddev = q->queuedata; + raid5_conf_t *conf = mddev_to_conf(mddev); const unsigned int raid_disks = conf->raid_disks; const unsigned int data_disks = raid_disks - 1; unsigned int dd_idx, pd_idx; sector_t new_sector; sector_t logical_sector, last_sector; - int read_ahead = 0; - struct stripe_head *sh; - if (rw == READA) { - rw = READ; - read_ahead=1; - } - logical_sector = bi->bi_sector & ~(STRIPE_SECTORS-1); last_sector = bi->bi_sector + (bi->bi_size>>9); @@ -1281,10 +1274,10 @@ static int make_request (mddev_t *mddev, int rw, struct bio * bi) PRINTK("raid5: make_request, sector %ul logical %ul\n", new_sector, logical_sector); - sh = get_active_stripe(conf, new_sector, pd_idx, read_ahead); + sh = get_active_stripe(conf, new_sector, pd_idx, (bi->bi_rw&RWA_MASK)); if (sh) { - add_stripe_bio(sh, bi, dd_idx, rw); + add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK)); raid5_plug_device(conf); handle_stripe(sh); @@ -1311,6 +1304,10 @@ static int sync_request (mddev_t *mddev, sector_t sector_nr, int go_faster) int raid_disks = conf->raid_disks; int data_disks = raid_disks-1; + if (sector_nr >= mddev->sb->size <<1) + /* just being told to finish up .. nothing to do */ + return 0; + first_sector = raid5_compute_sector(stripe*data_disks*sectors_per_chunk + chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf); sh = get_active_stripe(conf, sector_nr, pd_idx, 0); @@ -1343,17 +1340,15 @@ static void raid5d (void *data) handled = 0; - if (mddev->sb_dirty) { - mddev->sb_dirty = 0; + if (mddev->sb_dirty) md_update_sb(mddev); - } spin_lock_irq(&conf->device_lock); while (1) { struct list_head *first; if (list_empty(&conf->handle_list) && atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD && - !conf->plugged && + !blk_queue_plugged(&mddev->queue) && !list_empty(&conf->delayed_list)) raid5_activate_delayed(conf); @@ -1382,31 +1377,6 @@ static void raid5d (void *data) PRINTK("--- raid5d inactive\n"); } -/* - * Private kernel thread for parity reconstruction after an unclean - * shutdown. Reconstruction on spare drives in case of a failed drive - * is done by the generic mdsyncd. - */ -static void raid5syncd (void *data) -{ - raid5_conf_t *conf = data; - mddev_t *mddev = conf->mddev; - - if (!conf->resync_parity) - return; - if (conf->resync_parity == 2) - return; - down(&mddev->recovery_sem); - if (md_do_sync(mddev,NULL)) { - up(&mddev->recovery_sem); - printk("raid5: resync aborted!\n"); - return; - } - conf->resync_parity = 0; - up(&mddev->recovery_sem); - printk("raid5: resync finished.\n"); -} - static int run (mddev_t *mddev) { raid5_conf_t *conf; @@ -1416,7 +1386,6 @@ static int run (mddev_t *mddev) mdk_rdev_t *rdev; struct disk_info *disk; struct list_head *tmp; - int start_recovery = 0; MOD_INC_USE_COUNT; @@ -1444,10 +1413,7 @@ static int run (mddev_t *mddev) atomic_set(&conf->active_stripes, 0); atomic_set(&conf->preread_active_stripes, 0); - conf->plugged = 0; - conf->plug_tq.sync = 0; - conf->plug_tq.routine = &raid5_unplug_device; - conf->plug_tq.data = conf; + mddev->queue.unplug_fn = raid5_unplug_device; PRINTK("raid5: run(md%d) called.\n", mdidx(mddev)); @@ -1571,9 +1537,10 @@ static int run (mddev_t *mddev) goto abort; } - if (conf->working_disks != sb->raid_disks) { - printk(KERN_ALERT "raid5: md%d, not all disks are operational -- trying to recover array\n", mdidx(mddev)); - start_recovery = 1; + if (conf->failed_disks == 1 && + !(sb->state & (1<<MD_SB_CLEAN))) { + printk(KERN_ERR "raid5: cannot start dirty degraded array for md%d\n", mdidx(mddev)); + goto abort; } { @@ -1587,10 +1554,11 @@ static int run (mddev_t *mddev) } memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + - conf->raid_disks * ((sizeof(struct buffer_head) + PAGE_SIZE))) / 1024; + conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; if (grow_stripes(conf, conf->max_nr_stripes)) { printk(KERN_ERR "raid5: couldn't allocate %dkB for buffers\n", memory); shrink_stripes(conf); + md_unregister_thread(conf->thread); goto abort; } else printk(KERN_INFO "raid5: allocated %dkB for md%d\n", memory, mdidx(mddev)); @@ -1615,23 +1583,6 @@ static int run (mddev_t *mddev) else printk(KERN_ALERT "raid5: raid level %d set md%d active with %d out of %d devices, algorithm %d\n", conf->level, mdidx(mddev), sb->active_disks, sb->raid_disks, conf->algorithm); - if (!start_recovery && !(sb->state & (1 << MD_SB_CLEAN))) { - const char * name = "raid5syncd"; - - conf->resync_thread = md_register_thread(raid5syncd, conf,name); - if (!conf->resync_thread) { - printk(KERN_ERR "raid5: couldn't allocate thread for md%d\n", mdidx(mddev)); - goto abort; - } - - printk("raid5: raid set md%d not clean; reconstructing parity\n", mdidx(mddev)); - conf->resync_parity = 1; - md_wakeup_thread(conf->resync_thread); - } - - print_raid5_conf(conf); - if (start_recovery) - md_recover_arrays(); print_raid5_conf(conf); /* Ok, everything is just fine now */ @@ -1650,48 +1601,12 @@ static int run (mddev_t *mddev) return -EIO; } -static int stop_resync (mddev_t *mddev) -{ - raid5_conf_t *conf = mddev_to_conf(mddev); - mdk_thread_t *thread = conf->resync_thread; - - if (thread) { - if (conf->resync_parity) { - conf->resync_parity = 2; - md_interrupt_thread(thread); - printk(KERN_INFO "raid5: parity resync was not fully finished, restarting next time.\n"); - return 1; - } - return 0; - } - return 0; -} - -static int restart_resync (mddev_t *mddev) -{ - raid5_conf_t *conf = mddev_to_conf(mddev); - - if (conf->resync_parity) { - if (!conf->resync_thread) { - MD_BUG(); - return 0; - } - printk("raid5: waking up raid5resync.\n"); - conf->resync_parity = 1; - md_wakeup_thread(conf->resync_thread); - return 1; - } else - printk("raid5: no restart-resync needed.\n"); - return 0; -} static int stop (mddev_t *mddev) { raid5_conf_t *conf = (raid5_conf_t *) mddev->private; - if (conf->resync_thread) - md_unregister_thread(conf->resync_thread); md_unregister_thread(conf->thread); shrink_stripes(conf); free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER); @@ -2066,8 +1981,6 @@ static mdk_personality_t raid5_personality= status: status, error_handler: error, diskop: diskop, - stop_resync: stop_resync, - restart_resync: restart_resync, sync_request: sync_request }; diff --git a/drivers/usb/class/usb-midi.c b/drivers/usb/class/usb-midi.c index 4c2b52180638ffbbcc19af4555ffbec4758cc3fc..8aae77591839098d18f85f5ebe404a7b991e46fc 100644 --- a/drivers/usb/class/usb-midi.c +++ b/drivers/usb/class/usb-midi.c @@ -106,9 +106,7 @@ MODULE_PARM_DESC(ulangid, "The optional preferred USB Language ID for all device MODULE_AUTHOR("NAGANO Daisuke <breeze.nagano@nifty.ne.jp>"); MODULE_DESCRIPTION("USB-MIDI driver"); -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,14) MODULE_LICENSE("GPL"); -#endif /* ------------------------------------------------------------------------- */ diff --git a/drivers/usb/host/ohci-dbg.c b/drivers/usb/host/ohci-dbg.c index c3e79562190785218280a86f7f5cfe09b894e745..b6aba6fb9c4940fb177a3b347330746b11be7db6 100644 --- a/drivers/usb/host/ohci-dbg.c +++ b/drivers/usb/host/ohci-dbg.c @@ -74,9 +74,9 @@ static void urb_print (struct urb * urb, char * str, int small) static inline struct ed * dma_to_ed (struct ohci_hcd *hc, dma_addr_t ed_dma); -#ifdef OHCI_VERBOSE_DEBUG /* print non-empty branches of the periodic ed tree */ -void ohci_dump_periodic (struct ohci_hcd *ohci, char *label) +static void __attribute__ ((unused)) +ohci_dump_periodic (struct ohci_hcd *ohci, char *label) { int i, j; u32 *ed_p; @@ -101,7 +101,6 @@ void ohci_dump_periodic (struct ohci_hcd *ohci, char *label) printk (KERN_DEBUG "%s, ohci %s, empty periodic schedule\n", label, ohci->hcd.self.bus_name); } -#endif static void ohci_dump_intr_mask (char *label, __u32 mask) { @@ -241,6 +240,97 @@ static void ohci_dump (struct ohci_hcd *controller, int verbose) ohci_dump_roothub (controller, 1); } +static void ohci_dump_td (char *label, struct td *td) +{ + u32 tmp = le32_to_cpup (&td->hwINFO); + + dbg ("%s td %p; urb %p index %d; hw next td %08x", + label, td, + td->urb, td->index, + le32_to_cpup (&td->hwNextTD)); + if ((tmp & TD_ISO) == 0) { + char *toggle, *pid; + u32 cbp, be; + + switch (tmp & TD_T) { + case TD_T_DATA0: toggle = "DATA0"; break; + case TD_T_DATA1: toggle = "DATA1"; break; + case TD_T_TOGGLE: toggle = "(CARRY)"; break; + default: toggle = "(?)"; break; + } + switch (tmp & TD_DP) { + case TD_DP_SETUP: pid = "SETUP"; break; + case TD_DP_IN: pid = "IN"; break; + case TD_DP_OUT: pid = "OUT"; break; + default: pid = "(bad pid)"; break; + } + dbg (" info %08x CC=%x %s DI=%d %s %s", tmp, + TD_CC_GET(tmp), /* EC, */ toggle, + (tmp & TD_DI) >> 21, pid, + (tmp & TD_R) ? "R" : ""); + cbp = le32_to_cpup (&td->hwCBP); + be = le32_to_cpup (&td->hwBE); + dbg (" cbp %08x be %08x (len %d)", cbp, be, + cbp ? (be + 1 - cbp) : 0); + } else { + unsigned i; + dbg (" info %08x CC=%x DI=%d START=%04x", tmp, + TD_CC_GET(tmp), /* FC, */ + (tmp & TD_DI) >> 21, + tmp & 0x0000ffff); + dbg (" bp0 %08x be %08x", + le32_to_cpup (&td->hwCBP) & ~0x0fff, + le32_to_cpup (&td->hwBE)); + for (i = 0; i < MAXPSW; i++) { + dbg (" psw [%d] = %2x", i, + le16_to_cpu (td->hwPSW [i])); + } + } +} + +/* caller MUST own hcd spinlock if verbose is set! */ +static void __attribute__((unused)) +ohci_dump_ed (struct ohci_hcd *ohci, char *label, struct ed *ed, int verbose) +{ + u32 tmp = ed->hwINFO; + char *type = ""; + + dbg ("%s: %s, ed %p state 0x%x type %d; next ed %08x", + ohci->hcd.self.bus_name, label, + ed, ed->state, ed->type, + le32_to_cpup (&ed->hwNextED)); + switch (tmp & (ED_IN|ED_OUT)) { + case ED_OUT: type = "-OUT"; break; + case ED_IN: type = "-IN"; break; + /* else from TDs ... control */ + } + dbg (" info %08x MAX=%d%s%s%s EP=%d%s DEV=%d", le32_to_cpu (tmp), + 0x0fff & (le32_to_cpu (tmp) >> 16), + (tmp & ED_ISO) ? " ISO" : "", + (tmp & ED_SKIP) ? " SKIP" : "", + (tmp & ED_LOWSPEED) ? " LOW" : "", + 0x000f & (le32_to_cpu (tmp) >> 7), + type, + 0x007f & le32_to_cpu (tmp)); + dbg (" tds: head %08x%s%s tail %08x%s", + tmp = le32_to_cpup (&ed->hwHeadP), + (ed->hwHeadP & ED_H) ? " HALT" : "", + (ed->hwHeadP & ED_C) ? " CARRY" : "", + le32_to_cpup (&ed->hwTailP), + verbose ? "" : " (not listing)"); + if (verbose) { + struct list_head *tmp; + + /* use ed->td_list because HC concurrently modifies + * hwNextTD as it accumulates ed_donelist. + */ + list_for_each (tmp, &ed->td_list) { + struct td *td; + td = list_entry (tmp, struct td, td_list); + ohci_dump_td (" ->", td); + } + } +} #endif diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c index 185d1d3cd6d526459362256312f67afeff3f1a13..bfd4bc4315d65b83dd5db041390c5befe65aecef 100644 --- a/drivers/usb/host/ohci-hcd.c +++ b/drivers/usb/host/ohci-hcd.c @@ -100,7 +100,7 @@ * - lots more testing!! */ -#define DRIVER_VERSION "2002-Jun-10" +#define DRIVER_VERSION "2002-Jun-15" #define DRIVER_AUTHOR "Roman Weissgaerber <weissg@vienna.at>, David Brownell" #define DRIVER_DESC "USB 1.1 'Open' Host Controller (OHCI) Driver" @@ -145,8 +145,8 @@ static int ohci_urb_enqueue ( urb_print (urb, "SUB", usb_pipein (pipe)); #endif - /* every endpoint has a ed, locate and fill it */ - if (! (ed = ep_add_ed (urb->dev, pipe, urb->interval, 1, mem_flags))) + /* every endpoint has a ed, locate and maybe (re)initialize it */ + if (! (ed = ed_get (ohci, urb->dev, pipe, urb->interval))) return -ENOMEM; /* for the private part of the URB we need the number of TDs (size) */ @@ -498,6 +498,7 @@ static void ohci_irq (struct usb_hcd *hcd) struct ohci_regs *regs = ohci->regs; int ints; + /* we can eliminate a (slow) readl() if _only_ WDH caused this irq */ if ((ohci->hcca->done_head != 0) && ! (le32_to_cpup (&ohci->hcca->done_head) & 0x01)) { ints = OHCI_INTR_WDH; diff --git a/drivers/usb/host/ohci-mem.c b/drivers/usb/host/ohci-mem.c index f2b2df91bc008a2c29d172cc0e232069025db366..c2b0b2ac8be954cd691eb333c86f08b472924d4b 100644 --- a/drivers/usb/host/ohci-mem.c +++ b/drivers/usb/host/ohci-mem.c @@ -221,6 +221,7 @@ ed_alloc (struct ohci_hcd *hc, int mem_flags) ed = pci_pool_alloc (hc->ed_cache, mem_flags, &dma); if (ed) { memset (ed, 0, sizeof (*ed)); + INIT_LIST_HEAD (&ed->td_list); ed->dma = dma; /* hash it for later reverse mapping */ if (!hash_add_ed (hc, ed, mem_flags)) { diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c index 837210e9a0f408a6bdd73675f03056c63141ce26..638c1d3828e2d80035e2cc3bb0199cbf752aa493 100644 --- a/drivers/usb/host/ohci-q.c +++ b/drivers/usb/host/ohci-q.c @@ -131,8 +131,9 @@ static void intr_resub (struct ohci_hcd *hc, struct urb *urb) /* search for the right branch to insert an interrupt ed into the int tree * do some load balancing; - * returns the branch and - * sets the interval to interval = 2^integer (ld (interval)) + * returns the branch + * FIXME allow for failure, when there's no bandwidth left; + * and consider iso loads too */ static int ep_int_balance (struct ohci_hcd *ohci, int interval, int load) { @@ -152,19 +153,6 @@ static int ep_int_balance (struct ohci_hcd *ohci, int interval, int load) /*-------------------------------------------------------------------------*/ -/* 2^int ( ld (inter)) */ - -static int ep_2_n_interval (int inter) -{ - int i; - - for (i = 0; ((inter >> i) > 1 ) && (i < 5); i++) - continue; - return 1 << i; -} - -/*-------------------------------------------------------------------------*/ - /* the int tree is a binary tree * in order to process it sequentially the indexes of the branches have * to be mapped the mapping reverses the bits of a word of num_bits length @@ -230,8 +218,7 @@ static int ep_link (struct ohci_hcd *ohci, struct ed *edi) case PIPE_INTERRUPT: load = ed->intriso.intr_info.int_load; - interval = ep_2_n_interval (ed->intriso.intr_info.int_period); - ed->interval = interval; + interval = ed->interval; int_branch = ep_int_balance (ohci, interval, load); ed->intriso.intr_info.int_branch = int_branch; @@ -301,6 +288,7 @@ static void periodic_unlink ( * just the link to the ed is unlinked. * the link from the ed still points to another operational ed or 0 * so the HC can eventually finish the processing of the unlinked ed + * caller guarantees the ED has no active TDs. */ static int start_ed_unlink (struct ohci_hcd *ohci, struct ed *ed) { @@ -387,84 +375,99 @@ static int start_ed_unlink (struct ohci_hcd *ohci, struct ed *ed) /*-------------------------------------------------------------------------*/ -/* (re)init an endpoint; this _should_ be done once at the - * usb_set_configuration command, but the USB stack is a bit stateless - * so we do it at every transaction. - * if the state of the ed is ED_NEW then a dummy td is added and the - * state is changed to ED_UNLINK - * in all other cases the state is left unchanged - * the ed info fields are set even though most of them should - * not change +/* get and maybe (re)init an endpoint. init _should_ be done only as part + * of usb_set_configuration() or usb_set_interface() ... but the USB stack + * isn't very stateful, so we re-init whenever the HC isn't looking. */ -static struct ed *ep_add_ed ( +static struct ed *ed_get ( + struct ohci_hcd *ohci, struct usb_device *udev, unsigned int pipe, - int interval, - int load, - int mem_flags + int interval ) { - struct ohci_hcd *ohci = hcd_to_ohci (udev->bus->hcpriv); + int is_out = !usb_pipein (pipe); + int type = usb_pipetype (pipe); + int bus_msecs = 0; struct hcd_dev *dev = (struct hcd_dev *) udev->hcpriv; - struct td *td; struct ed *ed; unsigned ep; unsigned long flags; - spin_lock_irqsave (&ohci->lock, flags); - ep = usb_pipeendpoint (pipe) << 1; - if (!usb_pipecontrol (pipe) && usb_pipeout (pipe)) + if (type != PIPE_CONTROL && is_out) ep |= 1; + if (type == PIPE_INTERRUPT) + bus_msecs = usb_calc_bus_time (udev->speed, !is_out, 0, + usb_maxpacket (udev, pipe, is_out)) / 1000; + + spin_lock_irqsave (&ohci->lock, flags); + if (!(ed = dev->ep [ep])) { ed = ed_alloc (ohci, SLAB_ATOMIC); if (!ed) { /* out of memory */ - spin_unlock_irqrestore (&ohci->lock, flags); - return NULL; + goto done; } dev->ep [ep] = ed; } if (ed->state & ED_URB_DEL) { /* pending unlink request */ - spin_unlock_irqrestore (&ohci->lock, flags); - return NULL; + ed = 0; + goto done; } if (ed->state == ED_NEW) { + struct td *td; + ed->hwINFO = ED_SKIP; /* dummy td; end of td list for ed */ td = td_alloc (ohci, SLAB_ATOMIC); if (!td) { /* out of memory */ - spin_unlock_irqrestore (&ohci->lock, flags); - return NULL; + ed = 0; + goto done; } ed->dummy = td; ed->hwTailP = cpu_to_le32 (td->td_dma); ed->hwHeadP = ed->hwTailP; /* ED_C, ED_H zeroed */ ed->state = ED_UNLINK; - ed->type = usb_pipetype (pipe); + ed->type = type; } -// FIXME: don't do this if it's linked to the HC, or without knowing it's -// safe to clobber state/mode info tied to (previous) config/altsetting. -// (but dev0/ep0, used by set_address, must get clobbered) - - ed->hwINFO = cpu_to_le32 (usb_pipedevice (pipe) - | usb_pipeendpoint (pipe) << 7 - | (usb_pipeisoc (pipe)? 0x8000: 0) - | (usb_pipecontrol (pipe) - ? 0: (usb_pipeout (pipe)? 0x800: 0x1000)) - | (udev->speed == USB_SPEED_LOW) << 13 - | usb_maxpacket (udev, pipe, usb_pipeout (pipe)) - << 16); - - if (ed->type == PIPE_INTERRUPT && ed->state == ED_UNLINK) { - ed->intriso.intr_info.int_period = interval; - ed->intriso.intr_info.int_load = load; - } + /* FIXME: Don't do this without knowing it's safe to clobber this + * state/mode info. Currently the upper layers don't support such + * guarantees; we're lucky changing config/altsetting is rare. + */ + if (ed->state == ED_UNLINK) { + u32 info; + + info = usb_pipedevice (pipe); + info |= (ep >> 1) << 7; + info |= usb_maxpacket (udev, pipe, is_out) << 16; + info = cpu_to_le32 (info); + if (udev->speed == USB_SPEED_LOW) + info |= ED_LOWSPEED; + /* control transfers store pids in tds */ + if (type != PIPE_CONTROL) { + info |= is_out ? ED_OUT : ED_IN; + if (type == PIPE_ISOCHRONOUS) + info |= ED_ISO; + if (type == PIPE_INTERRUPT) { + ed->intriso.intr_info.int_load = bus_msecs; + if (interval > 32) + interval = 32; + } + } + ed->hwINFO = info; + /* value ignored except on periodic EDs, where + * we know it's already a power of 2 + */ + ed->interval = interval; + } + +done: spin_unlock_irqrestore (&ohci->lock, flags); return ed; } @@ -736,8 +739,8 @@ static void td_done (struct urb *urb, struct td *td) urb->iso_frame_desc [td->index].status = cc_to_error [cc]; if (cc != 0) - dbg (" urb %p iso TD %d len %d CC %d", - urb, td->index, dlen, cc); + dbg (" urb %p iso TD %p (%d) len %d CC %d", + urb, td, 1 + td->index, dlen, cc); /* BULK, INT, CONTROL ... drivers see aggregate length/status, * except that "setup" bytes aren't counted and "short" transfers @@ -776,9 +779,13 @@ static void td_done (struct urb *urb, struct td *td) - td->data_dma; } +#ifdef VERBOSE_DEBUG if (cc != 0) - dbg (" urb %p TD %d CC %d, len=%d", - urb, td->index, cc, urb->actual_length); + dbg (" urb %p TD %p (%d) CC %d, len=%d/%d", + urb, td, 1 + td->index, cc, + urb->actual_length, + urb->transfer_buffer_length); +#endif } } @@ -812,8 +819,8 @@ static struct td *dl_reverse_done_list (struct ohci_hcd *ohci) if (urb_priv && ((td_list->index + 1) < urb_priv->length)) { #ifdef OHCI_VERBOSE_DEBUG - dbg ("urb %p TD %d of %d, patch ED", - td_list->urb, + dbg ("urb %p TD %p (%d/%d), patch ED", + td_list->urb, td_list, 1 + td_list->index, urb_priv->length); #endif diff --git a/drivers/usb/host/ohci.h b/drivers/usb/host/ohci.h index a5bbe43fb75fbe10fb3137f57d711b1673ba51fd..d5fc9517f1329106ef5386675027fdd3088c1ab7 100644 --- a/drivers/usb/host/ohci.h +++ b/drivers/usb/host/ohci.h @@ -19,7 +19,7 @@ struct ed { #define ED_SKIP __constant_cpu_to_le32(1 << 14) #define ED_LOWSPEED __constant_cpu_to_le32(1 << 13) #define ED_OUT __constant_cpu_to_le32(0x01 << 11) -#define ED_IN __constant_cpu_to_le32(0x10 << 11) +#define ED_IN __constant_cpu_to_le32(0x02 << 11) __u32 hwTailP; /* tail of TD list */ __u32 hwHeadP; /* head of TD list */ #define ED_C __constant_cpu_to_le32(0x02) /* toggle carry */ @@ -30,24 +30,24 @@ struct ed { dma_addr_t dma; /* addr of ED */ struct ed *ed_prev; /* for non-interrupt EDs */ struct td *dummy; + struct list_head td_list; /* "shadow list" of our TDs */ + + u8 state; /* ED_{NEW,UNLINK,OPER} */ +#define ED_NEW 0x00 /* unused, no dummy td */ +#define ED_UNLINK 0x01 /* dummy td, maybe linked to hc */ +#define ED_OPER 0x02 /* dummy td, _is_ linked to hc */ +#define ED_URB_DEL 0x08 /* for unlinking; masked in */ u8 type; /* PIPE_{BULK,...} */ - u8 interval; /* interrupt, isochronous */ + u16 interval; /* interrupt, isochronous */ union { struct intr_info { /* interrupt */ - u8 int_period; u8 int_branch; u8 int_load; } intr_info; u16 last_iso; /* isochronous */ } intriso; - u8 state; /* ED_{NEW,UNLINK,OPER} */ -#define ED_NEW 0x00 /* unused, no dummy td */ -#define ED_UNLINK 0x01 /* dummy td, maybe linked to hc */ -#define ED_OPER 0x02 /* dummy td, _is_ linked to hc */ -#define ED_URB_DEL 0x08 /* for unlinking; masked in */ - /* HC may see EDs on rm_list until next frame (frame_no == tick) */ u16 tick; struct ed *ed_rm_list; @@ -108,6 +108,8 @@ struct td { dma_addr_t td_dma; /* addr of this TD */ dma_addr_t data_dma; /* addr of data it points to */ + + struct list_head td_list; /* "shadow list", TDs on same ED */ } __attribute__ ((aligned(32))); /* c/b/i need 16; only iso needs 32 */ #define TD_MASK ((u32)~0x1f) /* strip hw status in low addr bits */ diff --git a/drivers/usb/net/kaweth.c b/drivers/usb/net/kaweth.c index 66102b6f85e759533a95de42362cbe99bb576385..096a8b1aded263e80f0046fef33dbed005f8e877 100644 --- a/drivers/usb/net/kaweth.c +++ b/drivers/usb/net/kaweth.c @@ -220,10 +220,11 @@ struct kaweth_device struct urb *rx_urb; struct urb *tx_urb; struct urb *irq_urb; + + struct sk_buff *tx_skb; __u8 *firmware_buf; __u8 scratch[KAWETH_SCRATCH_SIZE]; - __u8 tx_buf[KAWETH_BUF_SIZE]; __u8 rx_buf[KAWETH_BUF_SIZE]; __u8 intbuffer[INTBUFFERSIZE]; __u16 packet_filter_bitmap; @@ -650,11 +651,13 @@ static int kaweth_ioctl(struct net_device *net, struct ifreq *rq, int cmd) static void kaweth_usb_transmit_complete(struct urb *urb) { struct kaweth_device *kaweth = urb->context; + struct sk_buff *skb = kaweth->tx_skb; if (unlikely(urb->status != 0)) kaweth_dbg("%s: TX status %d.", kaweth->net->name, urb->status); netif_wake_queue(kaweth->net); + dev_kfree_skb(skb); } /**************************************************************** @@ -663,7 +666,7 @@ static void kaweth_usb_transmit_complete(struct urb *urb) static int kaweth_start_xmit(struct sk_buff *skb, struct net_device *net) { struct kaweth_device *kaweth = net->priv; - int count = skb->len; + char *private_header; int res; @@ -679,15 +682,30 @@ static int kaweth_start_xmit(struct sk_buff *skb, struct net_device *net) kaweth_async_set_rx_mode(kaweth); netif_stop_queue(net); - *((__u16 *)kaweth->tx_buf) = cpu_to_le16(skb->len); + /* We now decide whether we can put our special header into the sk_buff */ + if (skb_cloned(skb) || skb_headroom(skb) < 2) { + /* no such luck - we make our own */ + struct sk_buff *copied_skb; + copied_skb = skb_copy_expand(skb, 2, 0, GFP_ATOMIC); + dev_kfree_skb_any(skb); + skb = copied_skb; + if (!copied_skb) { + kaweth->stats.tx_errors++; + netif_start_queue(net); + spin_unlock(&kaweth->device_lock); + return 0; + } + } - memcpy(kaweth->tx_buf + 2, skb->data, skb->len); + private_header = __skb_push(skb, 2); + *private_header = cpu_to_le16(skb->len); + kaweth->tx_skb = skb; FILL_BULK_URB(kaweth->tx_urb, kaweth->dev, usb_sndbulkpipe(kaweth->dev, 2), - kaweth->tx_buf, - count + 2, + private_header, + skb->len, kaweth_usb_transmit_complete, kaweth); kaweth->end = 0; @@ -699,6 +717,7 @@ static int kaweth_start_xmit(struct sk_buff *skb, struct net_device *net) kaweth->stats.tx_errors++; netif_start_queue(net); + dev_kfree_skb(skb); } else { @@ -707,8 +726,6 @@ static int kaweth_start_xmit(struct sk_buff *skb, struct net_device *net) net->trans_start = jiffies; } - dev_kfree_skb(skb); - spin_unlock(&kaweth->device_lock); return 0; diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index 17e861fcae9f2f5938212b6f292cb4f256cc0f76..4e9f376c38f59e979ad418ef95f7d4d3a0a6d500 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -51,12 +51,6 @@ #include <linux/slab.h> -/* - * kernel thread actions - */ - -#define US_ACT_COMMAND 1 -#define US_ACT_EXIT 5 /*********************************************************************** * Host functions @@ -204,7 +198,7 @@ static int device_reset( Scsi_Cmnd *srb ) US_DEBUGP("device_reset() called\n" ); /* if the device was removed, then we're already reset */ - if (atomic_read(&us->sm_state) == US_STATE_DETACHED) + if (!test_bit(DEV_ATTACHED, &us->bitflags)) return SUCCESS; scsi_unlock(srb->host); @@ -235,7 +229,7 @@ static int bus_reset( Scsi_Cmnd *srb ) US_DEBUGP("bus_reset() called\n"); /* if the device has been removed, this worked */ - if (atomic_read(&us->sm_state) == US_STATE_DETACHED) { + if (!test_bit(DEV_ATTACHED, &us->bitflags)) { US_DEBUGP("-- device removed already\n"); return SUCCESS; } @@ -337,8 +331,8 @@ static int proc_info (char *buffer, char **start, off_t offset, int length, /* show the GUID of the device */ SPRINTF(" GUID: " GUID_FORMAT "\n", GUID_ARGS(us->guid)); - SPRINTF(" Attached: %s\n", (atomic_read(&us->sm_state) == - US_STATE_DETACHED) ? "Yes" : "No"); + SPRINTF(" Attached: %s\n", (test_bit(DEV_ATTACHED, &us->bitflags) + ? "Yes" : "No")); /* * Calculate start of next buffer, and return value. diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c index dfdc30db5489e39623b242e395571ed758bcc9ba..76d70eb5a9bf3ad574b76de7918c59ae94c19d09 100644 --- a/drivers/usb/storage/usb.c +++ b/drivers/usb/storage/usb.c @@ -99,13 +99,6 @@ MODULE_LICENSE("GPL"); static int my_host_number; -/* - * kernel thread actions - */ - -#define US_ACT_COMMAND 1 -#define US_ACT_EXIT 5 - /* The list of structures and the protective lock for them */ struct us_data *us_list; struct semaphore us_list_semaphore; @@ -426,7 +419,7 @@ static int usb_stor_control_thread(void * __us) down(&(us->dev_semaphore)); /* our device has gone - pretend not ready */ - if (atomic_read(&us->device_state) == US_STATE_DETACHED) { + if (!test_bit(DEV_ATTACHED, &us->bitflags)) { US_DEBUGP("Request is for removed device\n"); /* For REQUEST_SENSE, it's the data. But * for anything else, it should look like @@ -450,7 +443,7 @@ static int usb_stor_control_thread(void * __us) sizeof(usb_stor_sense_notready)); us->srb->result = CHECK_CONDITION << 1; } - } else { /* atomic_read(&us->device_state) == STATE_DETACHED */ + } else { /* test_bit(DEV_ATTACHED, &us->bitflags) */ /* Handle those devices which need us to fake * their inquiry data */ @@ -557,9 +550,8 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, unsigned int flags; struct us_unusual_dev *unusual_dev; struct us_data *ss = NULL; -#ifdef CONFIG_USB_STORAGE_SDDR09 int result; -#endif + int new_device = 0; /* these are temporary copies -- we test on these, then put them * in the us-data structure @@ -570,13 +562,13 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, u8 subclass = 0; u8 protocol = 0; - /* the altsettting on the interface we're probing that matched our + /* the altsetting on the interface we're probing that matched our * usb_match_id table */ struct usb_interface *intf = dev->actconfig->interface; struct usb_interface_descriptor *altsetting = intf[ifnum].altsetting + intf[ifnum].act_altsetting; - US_DEBUGP("act_altsettting is %d\n", intf[ifnum].act_altsetting); + US_DEBUGP("act_altsetting is %d\n", intf[ifnum].act_altsetting); /* clear the temporary strings */ memset(mf, 0, sizeof(mf)); @@ -663,7 +655,7 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, return NULL; } - /* At this point, we're committed to using the device */ + /* At this point, we've decided to try to use the device */ usb_get_dev(dev); /* clear the GUID and fetch the strings */ @@ -696,7 +688,8 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, */ ss = us_list; while ((ss != NULL) && - ((ss->pusb_dev) || !GUID_EQUAL(guid, ss->guid))) + (test_bit(DEV_ATTACHED, &ss->bitflags) || + !GUID_EQUAL(guid, ss->guid))) ss = ss->next; if (ss != NULL) { @@ -710,29 +703,23 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, /* establish the connection to the new device upon reconnect */ ss->ifnum = ifnum; ss->pusb_dev = dev; - atomic_set(&ss->device_state, US_STATE_ATTACHED); + set_bit(DEV_ATTACHED, &ss->bitflags); /* copy over the endpoint data */ - if (ep_in) - ss->ep_in = ep_in->bEndpointAddress & - USB_ENDPOINT_NUMBER_MASK; - if (ep_out) - ss->ep_out = ep_out->bEndpointAddress & - USB_ENDPOINT_NUMBER_MASK; + ss->ep_in = ep_in->bEndpointAddress & + USB_ENDPOINT_NUMBER_MASK; + ss->ep_out = ep_out->bEndpointAddress & + USB_ENDPOINT_NUMBER_MASK; ss->ep_int = ep_int; /* allocate an IRQ callback if one is needed */ - if ((ss->protocol == US_PR_CBI) && usb_stor_allocate_irq(ss)) { - usb_put_dev(dev); - return NULL; - } + if ((ss->protocol == US_PR_CBI) && usb_stor_allocate_irq(ss)) + goto BadDevice; /* allocate the URB we're going to use */ ss->current_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!ss->current_urb) { - usb_put_dev(dev); - return NULL; - } + if (!ss->current_urb) + goto BadDevice; /* Re-Initialize the device if it needs it */ if (unusual_dev && unusual_dev->initFunction) @@ -752,14 +739,12 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, return NULL; } memset(ss, 0, sizeof(struct us_data)); + new_device = 1; /* allocate the URB we're going to use */ ss->current_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!ss->current_urb) { - kfree(ss); - usb_put_dev(dev); - return NULL; - } + if (!ss->current_urb) + goto BadDevice; /* Initialize the mutexes only when the struct is new */ init_completion(&(ss->notify)); @@ -776,12 +761,10 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, ss->unusual_dev = unusual_dev; /* copy over the endpoint data */ - if (ep_in) - ss->ep_in = ep_in->bEndpointAddress & - USB_ENDPOINT_NUMBER_MASK; - if (ep_out) - ss->ep_out = ep_out->bEndpointAddress & - USB_ENDPOINT_NUMBER_MASK; + ss->ep_in = ep_in->bEndpointAddress & + USB_ENDPOINT_NUMBER_MASK; + ss->ep_out = ep_out->bEndpointAddress & + USB_ENDPOINT_NUMBER_MASK; ss->ep_int = ep_int; /* establish the connection to the new device */ @@ -904,12 +887,8 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, #endif default: - ss->transport_name = "Unknown"; - kfree(ss->current_urb); - kfree(ss); - usb_put_dev(dev); - return NULL; - break; + /* ss->transport_name = "Unknown"; */ + goto BadDevice; } US_DEBUGP("Transport: %s\n", ss->transport_name); @@ -959,22 +938,14 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, #endif default: - ss->protocol_name = "Unknown"; - kfree(ss->current_urb); - kfree(ss); - usb_put_dev(dev); - return NULL; - break; + /* ss->protocol_name = "Unknown"; */ + goto BadDevice; } US_DEBUGP("Protocol: %s\n", ss->protocol_name); /* allocate an IRQ callback if one is needed */ - if ((ss->protocol == US_PR_CBI) && usb_stor_allocate_irq(ss)) { - kfree(ss->current_urb); - kfree(ss); - usb_put_dev(dev); - return NULL; - } + if ((ss->protocol == US_PR_CBI) && usb_stor_allocate_irq(ss)) + goto BadDevice; /* * Since this is a new device, we need to generate a scsi @@ -1001,16 +972,13 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, /* start up our control thread */ atomic_set(&ss->sm_state, US_STATE_IDLE); - atomic_set(&ss->device_state, US_STATE_ATTACHED); + set_bit(DEV_ATTACHED, &ss->bitflags); ss->pid = kernel_thread(usb_stor_control_thread, ss, CLONE_VM); if (ss->pid < 0) { printk(KERN_WARNING USB_STORAGE "Unable to start control thread\n"); - kfree(ss->current_urb); - kfree(ss); - usb_put_dev(dev); - return NULL; + goto BadDevice; } /* wait for the thread to start */ @@ -1018,7 +986,17 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, /* now register - our detect function will be called */ ss->htmplt.module = THIS_MODULE; - scsi_register_host(&(ss->htmplt)); + result = scsi_register_host(&(ss->htmplt)); + if (result) { + printk(KERN_WARNING USB_STORAGE + "Unable to register the scsi host\n"); + + /* tell the control thread to exit */ + ss->action = US_ACT_EXIT; + up(&ss->sema); + wait_for_completion(&ss->notify); + goto BadDevice; + } /* lock access to the data structures */ down(&us_list_semaphore); @@ -1038,6 +1016,31 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, /* return a pointer for the disconnect function */ return ss; + + /* we come here if there are any problems */ + BadDevice: + US_DEBUGP("storage_probe() failed\n"); + down(&ss->irq_urb_sem); + if (ss->irq_urb) { + usb_unlink_urb(ss->irq_urb); + usb_free_urb(ss->irq_urb); + ss->irq_urb = NULL; + } + up(&ss->irq_urb_sem); + if (ss->current_urb) { + usb_unlink_urb(ss->current_urb); + usb_free_urb(ss->current_urb); + ss->current_urb = NULL; + } + + clear_bit(DEV_ATTACHED, &ss->bitflags); + ss->pusb_dev = NULL; + if (new_device) + kfree(ss); + else + up(&ss->dev_semaphore); + usb_put_dev(dev); + return NULL; } /* Handle a disconnect event from the USB core */ @@ -1078,7 +1081,7 @@ static void storage_disconnect(struct usb_device *dev, void *ptr) /* mark the device as gone */ usb_put_dev(ss->pusb_dev); ss->pusb_dev = NULL; - atomic_set(&ss->sm_state, US_STATE_DETACHED); + clear_bit(DEV_ATTACHED, &ss->bitflags); /* unlock access to the device data structure */ up(&(ss->dev_semaphore)); diff --git a/drivers/usb/storage/usb.h b/drivers/usb/storage/usb.h index 6c90eb638a4b53ed56f46d64c881b7f0c7437ec7..d0f1f24ded28638b952c958ee59d07969e2d7290 100644 --- a/drivers/usb/storage/usb.h +++ b/drivers/usb/storage/usb.h @@ -103,9 +103,10 @@ struct us_unusual_dev { #define US_FL_SCM_MULT_TARG 0x00000020 /* supports multiple targets */ #define US_FL_FIX_INQUIRY 0x00000040 /* INQUIRY response needs fixing */ -/* device attached/detached states */ -#define US_STATE_DETACHED 1 -#define US_STATE_ATTACHED 2 + +/* kernel thread actions */ +#define US_ACT_COMMAND 1 +#define US_ACT_EXIT 5 /* processing state machine states */ #define US_STATE_IDLE 1 @@ -127,10 +128,9 @@ struct us_data { /* The device we're working with * It's important to note: * (o) you must hold dev_semaphore to change pusb_dev - * (o) device_state should change whenever pusb_dev does + * (o) DEV_ATTACHED in bitflags should change whenever pusb_dev does */ struct semaphore dev_semaphore; /* protect pusb_dev */ - atomic_t device_state; /* attached or detached */ struct usb_device *pusb_dev; /* this usb_device */ unsigned int flags; /* from filter initially */ @@ -174,6 +174,7 @@ struct us_data { struct semaphore ip_waitq; /* for CBI interrupts */ unsigned long bitflags; /* single-bit flags: */ #define IP_WANTED 1 /* is an IRQ expected? */ +#define DEV_ATTACHED 2 /* is the dev. attached?*/ /* interrupt communications data */ struct semaphore irq_urb_sem; /* to protect irq_urb */ diff --git a/fs/select.c b/fs/select.c index 6a5909a75677acc1041b1484f3ce3a73c0ea3da1..30c29f1e49f8830973ce0f43ea897fe0e4fd418a 100644 --- a/fs/select.c +++ b/fs/select.c @@ -12,9 +12,6 @@ * 24 January 2000 * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian). - * - * Dec 2001 - * Stack allocation and fast path (Andi Kleen) */ #include <linux/slab.h> @@ -29,6 +26,21 @@ #define ROUND_UP(x,y) (((x)+(y)-1)/(y)) #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) +struct poll_table_entry { + struct file * filp; + wait_queue_t wait; + wait_queue_head_t * wait_address; +}; + +struct poll_table_page { + struct poll_table_page * next; + struct poll_table_entry * entry; + struct poll_table_entry entries[0]; +}; + +#define POLL_TABLE_FULL(table) \ + ((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table)) + /* * Ok, Peter made a complicated, but straightforward multiple_wait() function. * I have rewritten this, taking some shortcuts: This code may not be easy to @@ -50,39 +62,30 @@ void poll_freewait(poll_table* pt) struct poll_table_page *old; entry = p->entry; - while (entry > p->entries) { + do { entry--; remove_wait_queue(entry->wait_address,&entry->wait); fput(entry->filp); - } + } while (entry > p->entries); old = p; p = p->next; - if (old != &pt->inline_page) - free_page((unsigned long) old); + free_page((unsigned long) old); } } void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) { struct poll_table_page *table = p->table; - struct poll_table_page *new_table = NULL; - int sz; - - if (!table) { - new_table = &p->inline_page; - } else { - sz = (table == &p->inline_page) ? POLL_INLINE_TABLE_LEN : PAGE_SIZE; - if ((char*)table->entry >= (char*)table + sz) { - new_table = (struct poll_table_page *)__get_free_page(GFP_KERNEL); - if (!new_table) { - p->error = -ENOMEM; - __set_current_state(TASK_RUNNING); - return; - } - } - } - if (new_table) { + if (!table || POLL_TABLE_FULL(table)) { + struct poll_table_page *new_table; + + new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL); + if (!new_table) { + p->error = -ENOMEM; + __set_current_state(TASK_RUNNING); + return; + } new_table->entry = new_table->entries; new_table->next = table; p->table = new_table; @@ -110,6 +113,48 @@ void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table #define BITS(fds, n) (*__IN(fds, n)|*__OUT(fds, n)|*__EX(fds, n)) +static int max_select_fd(unsigned long n, fd_set_bits *fds) +{ + unsigned long *open_fds; + unsigned long set; + int max; + + /* handle last in-complete long-word first */ + set = ~(~0UL << (n & (__NFDBITS-1))); + n /= __NFDBITS; + open_fds = current->files->open_fds->fds_bits+n; + max = 0; + if (set) { + set &= BITS(fds, n); + if (set) { + if (!(set & ~*open_fds)) + goto get_max; + return -EBADF; + } + } + while (n) { + open_fds--; + n--; + set = BITS(fds, n); + if (!set) + continue; + if (set & ~*open_fds) + return -EBADF; + if (max) + continue; +get_max: + do { + max++; + set >>= 1; + } while (set); + max += n * __NFDBITS; + } + + return max; +} + +#define BIT(i) (1UL << ((i)&(__NFDBITS-1))) +#define MEM(i,m) ((m)+(unsigned)(i)/__NFDBITS) #define ISSET(i,m) (((i)&*(m)) != 0) #define SET(i,m) (*(m) |= (i)) @@ -120,71 +165,56 @@ void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table int do_select(int n, fd_set_bits *fds, long *timeout) { poll_table table, *wait; - int retval, off, max, maxoff; + int retval, i, off; long __timeout = *timeout; + read_lock(¤t->files->file_lock); + retval = max_select_fd(n, fds); + read_unlock(¤t->files->file_lock); + + if (retval < 0) + return retval; + n = retval; + poll_initwait(&table); wait = &table; if (!__timeout) wait = NULL; - retval = 0; - maxoff = n/BITS_PER_LONG; - max = 0; for (;;) { set_current_state(TASK_INTERRUPTIBLE); - for (off = 0; off <= maxoff; off++) { - unsigned long val = BITS(fds, off); + for (i = 0 ; i < n; i++) { + unsigned long bit = BIT(i); + unsigned long mask; + struct file *file; - if (!val) + off = i / __NFDBITS; + if (!(bit & BITS(fds, off))) continue; - while (val) { - int k = ffz(~val); - unsigned long mask, bit; - struct file *file; - - if (k > n%BITS_PER_LONG) - break; - - bit = (1UL << k); - val &= ~bit; - - file = fget((off * BITS_PER_LONG) + k); - mask = POLLNVAL; - if (file) { - mask = DEFAULT_POLLMASK; - if (file->f_op && file->f_op->poll) - mask = file->f_op->poll(file, wait); - fput(file); - } else { - /* This error will shadow all other results. - * This matches previous linux behaviour */ - retval = -EBADF; - goto out; - } - if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) { - SET(bit, __RES_IN(fds,off)); - retval++; - wait = NULL; - } - if ((mask& POLLOUT_SET) && ISSET(bit,__OUT(fds,off))) { - SET(bit, __RES_OUT(fds,off)); - retval++; - wait = NULL; - } - if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) { - SET(bit, __RES_EX(fds,off)); - retval++; - wait = NULL; - } - - if (!(val &= ~bit)) - break; + file = fget(i); + mask = POLLNVAL; + if (file) { + mask = DEFAULT_POLLMASK; + if (file->f_op && file->f_op->poll) + mask = file->f_op->poll(file, wait); + fput(file); + } + if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) { + SET(bit, __RES_IN(fds,off)); + retval++; + wait = NULL; + } + if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) { + SET(bit, __RES_OUT(fds,off)); + retval++; + wait = NULL; + } + if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) { + SET(bit, __RES_EX(fds,off)); + retval++; + wait = NULL; } } - - - maxoff = max; wait = NULL; if (retval || !__timeout || signal_pending(current)) break; @@ -194,43 +224,25 @@ int do_select(int n, fd_set_bits *fds, long *timeout) } __timeout = schedule_timeout(__timeout); } - -out: current->state = TASK_RUNNING; poll_freewait(&table); /* - * Update the caller timeout. + * Up-to-date the caller timeout. */ *timeout = __timeout; return retval; } -/* - * We do a VERIFY_WRITE here even though we are only reading this time: - * we'll write to it eventually.. - */ +static void *select_bits_alloc(int size) +{ + return kmalloc(6 * size, GFP_KERNEL); +} -static int get_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset) +static void select_bits_free(void *bits, int size) { - unsigned long rounded = FDS_BYTES(nr), mask; - if (ufdset) { - int error = verify_area(VERIFY_WRITE, ufdset, rounded); - if (!error && __copy_from_user(fdset, ufdset, rounded)) - error = -EFAULT; - if (nr % __NFDBITS == 0) - mask = 0; - else { - /* This includes one bit too much according to SU; - but without this some programs hang. */ - mask = ~(~0UL << (nr%__NFDBITS)); - } - fdset[nr/__NFDBITS] &= mask; - return error; - } - memset(fdset, 0, rounded); - return 0; + kfree(bits); } /* @@ -251,7 +263,6 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp) char *bits; long timeout; int ret, size, max_fdset; - char stack_bits[FDS_BYTES(FAST_SELECT_MAX) * 6]; timeout = MAX_SCHEDULE_TIMEOUT; if (tvp) { @@ -286,16 +297,11 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp) * since we used fdset we need to allocate memory in units of * long-words. */ + ret = -ENOMEM; size = FDS_BYTES(n); - if (n < FAST_SELECT_MAX) { - bits = stack_bits; - } else { - ret = -ENOMEM; - bits = kmalloc(6*size, GFP_KERNEL); - if (!bits) - goto out_nofds; - } - + bits = select_bits_alloc(size); + if (!bits) + goto out_nofds; fds.in = (unsigned long *) bits; fds.out = (unsigned long *) (bits + size); fds.ex = (unsigned long *) (bits + 2*size); @@ -307,7 +313,9 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp) (ret = get_fd_set(n, outp, fds.out)) || (ret = get_fd_set(n, exp, fds.ex))) goto out; - memset(fds.res_in, 0, 3*size); + zero_fd_set(n, fds.res_in); + zero_fd_set(n, fds.res_out); + zero_fd_set(n, fds.res_ex); ret = do_select(n, &fds, &timeout); @@ -318,8 +326,8 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp) usec = timeout % HZ; usec *= (1000000/HZ); } - __put_user(sec, &tvp->tv_sec); - __put_user(usec, &tvp->tv_usec); + put_user(sec, &tvp->tv_sec); + put_user(usec, &tvp->tv_usec); } if (ret < 0) @@ -336,10 +344,8 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp) set_fd_set(n, exp, fds.res_ex); out: - if (n >= FAST_SELECT_MAX) - kfree(bits); + select_bits_free(bits, size); out_nofds: - return ret; } @@ -404,42 +410,12 @@ static int do_poll(unsigned int nfds, unsigned int nchunks, unsigned int nleft, return count; } -static int fast_poll(poll_table *table, poll_table *wait, struct pollfd *ufds, - unsigned int nfds, long timeout) -{ - poll_table *pt = wait; - struct pollfd fds[FAST_POLL_MAX]; - int count, i; - - if (copy_from_user(fds, ufds, nfds * sizeof(struct pollfd))) - return -EFAULT; - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - count = 0; - do_pollfd(nfds, fds, &pt, &count); - pt = NULL; - if (count || !timeout || signal_pending(current)) - break; - count = wait->error; - if (count) - break; - timeout = schedule_timeout(timeout); - } - current->state = TASK_RUNNING; - for (i = 0; i < nfds; i++) - __put_user(fds[i].revents, &ufds[i].revents); - poll_freewait(table); - if (!count && signal_pending(current)) - return -EINTR; - return count; -} - asmlinkage long sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout) { - int i, j, err, fdcount; + int i, j, fdcount, err; struct pollfd **fds; poll_table table, *wait; - int nchunks, nleft; + int nchunks, nleft; /* Do a sanity check on nfds ... */ if (nfds > NR_OPEN) @@ -453,45 +429,43 @@ asmlinkage long sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout) timeout = MAX_SCHEDULE_TIMEOUT; } - poll_initwait(&table); wait = &table; if (!timeout) wait = NULL; - if (nfds < FAST_POLL_MAX) - return fast_poll(&table, wait, ufds, nfds, timeout); - err = -ENOMEM; - fds = (struct pollfd **)kmalloc( - (1 + (nfds - 1) / POLLFD_PER_PAGE) * sizeof(struct pollfd *), - GFP_KERNEL); - if (fds == NULL) - goto out; - + fds = NULL; + if (nfds != 0) { + fds = (struct pollfd **)kmalloc( + (1 + (nfds - 1) / POLLFD_PER_PAGE) * sizeof(struct pollfd *), + GFP_KERNEL); + if (fds == NULL) + goto out; + } + nchunks = 0; nleft = nfds; - while (nleft > POLLFD_PER_PAGE) { + while (nleft > POLLFD_PER_PAGE) { /* allocate complete PAGE_SIZE chunks */ fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL); if (fds[nchunks] == NULL) goto out_fds; nchunks++; nleft -= POLLFD_PER_PAGE; } - if (nleft) { + if (nleft) { /* allocate last PAGE_SIZE chunk, only nleft elements used */ fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL); if (fds[nchunks] == NULL) goto out_fds; - } - + } + err = -EFAULT; for (i=0; i < nchunks; i++) if (copy_from_user(fds[i], ufds + i*POLLFD_PER_PAGE, PAGE_SIZE)) goto out_fds1; - if (nleft) { if (copy_from_user(fds[nchunks], ufds + nchunks*POLLFD_PER_PAGE, - nleft * sizeof(struct pollfd))) + nleft * sizeof(struct pollfd))) goto out_fds1; } @@ -515,7 +489,8 @@ asmlinkage long sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout) out_fds: for (i=0; i < nchunks; i++) free_page((unsigned long)(fds[i])); - kfree(fds); + if (nfds != 0) + kfree(fds); out: poll_freewait(&table); return err; diff --git a/include/linux/poll.h b/include/linux/poll.h index 86b1ee2d3eb319e5c3ec56722247c0c34b39ee82..796aac51388a499a917e855a2cc16d179f0495ee 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -10,32 +10,13 @@ #include <linux/mm.h> #include <asm/uaccess.h> -#define POLL_INLINE_BYTES 256 -#define FAST_SELECT_MAX 128 -#define FAST_POLL_MAX 128 -#define POLL_INLINE_ENTRIES (1+(POLL_INLINE_BYTES / sizeof(struct poll_table_entry))) - -struct poll_table_entry { - struct file * filp; - wait_queue_t wait; - wait_queue_head_t * wait_address; -}; - -struct poll_table_page { - struct poll_table_page * next; - struct poll_table_entry * entry; - struct poll_table_entry entries[0]; -}; +struct poll_table_page; typedef struct poll_table_struct { int error; struct poll_table_page * table; - struct poll_table_page inline_page; - struct poll_table_entry inline_table[POLL_INLINE_ENTRIES]; } poll_table; -#define POLL_INLINE_TABLE_LEN (sizeof(poll_table) - offsetof(poll_table, inline_page)) - extern void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p); static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) @@ -49,7 +30,6 @@ static inline void poll_initwait(poll_table* pt) pt->error = 0; pt->table = NULL; } - extern void poll_freewait(poll_table* pt); @@ -69,6 +49,27 @@ typedef struct { #define FDS_LONGS(nr) (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG) #define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long)) +/* + * We do a VERIFY_WRITE here even though we are only reading this time: + * we'll write to it eventually.. + * + * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned. + */ +static inline +int get_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset) +{ + nr = FDS_BYTES(nr); + if (ufdset) { + int error; + error = verify_area(VERIFY_WRITE, ufdset, nr); + if (!error && __copy_from_user(fdset, ufdset, nr)) + error = -EFAULT; + return error; + } + memset(fdset, 0, nr); + return 0; +} + static inline void set_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset) { @@ -76,6 +77,12 @@ void set_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset) __copy_to_user(ufdset, fdset, FDS_BYTES(nr)); } +static inline +void zero_fd_set(unsigned long nr, unsigned long *fdset) +{ + memset(fdset, 0, FDS_BYTES(nr)); +} + extern int do_select(int n, fd_set_bits *fds, long *timeout); #endif /* KERNEL */ diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index cb6332482af27cfd0a875f5660da2f67147cb4cc..19ebeb0a4988cb50e69387cea50c793197fe458b 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -63,8 +63,6 @@ extern int md_size[MAX_MD_DEVS]; extern struct hd_struct md_hd_struct[MAX_MD_DEVS]; -extern void add_mddev_mapping (mddev_t *mddev, kdev_t dev, void *data); -extern void del_mddev_mapping (mddev_t *mddev, kdev_t dev); extern char * partition_name (kdev_t dev); extern inline char * bdev_partition_name (struct block_device *bdev) { @@ -77,14 +75,9 @@ extern mdk_thread_t * md_register_thread (void (*run) (void *data), extern void md_unregister_thread (mdk_thread_t *thread); extern void md_wakeup_thread(mdk_thread_t *thread); extern void md_interrupt_thread (mdk_thread_t *thread); -extern int md_update_sb (mddev_t *mddev); -extern int md_do_sync(mddev_t *mddev, mdp_disk_t *spare); +extern void md_update_sb (mddev_t *mddev); extern void md_done_sync(mddev_t *mddev, int blocks, int ok); extern void md_sync_acct(kdev_t dev, unsigned long nr_sectors); -extern void md_recover_arrays (void); -extern int md_check_ordering (mddev_t *mddev); -extern int md_notify_reboot(struct notifier_block *this, - unsigned long code, void *x); extern int md_error (mddev_t *mddev, struct block_device *bdev); extern int md_run_setup(void); diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 7b270a50487ee7b9fb3fb1f11870d48a12e6df5f..e238bd222ea2607d56363d3e5363a3eb3477612f 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -64,24 +64,6 @@ typedef struct mdk_rdev_s mdk_rdev_t; #define MAX_MD_DEVS (1<<MINORBITS) /* Max number of md dev */ -/* - * Maps a kdev to an mddev/subdev. How 'data' is handled is up to - * the personality. (eg. HSM uses this to identify individual LVs) - */ -typedef struct dev_mapping_s { - mddev_t *mddev; - void *data; -} dev_mapping_t; - -extern dev_mapping_t mddev_map [MAX_MD_DEVS]; - -static inline mddev_t * kdev_to_mddev (kdev_t dev) -{ - if (major(dev) != MD_MAJOR) - BUG(); - return mddev_map[minor(dev)].mddev; -} - /* * options passed in raidrun: */ @@ -196,31 +178,38 @@ struct mddev_s mdk_personality_t *pers; int __minor; mdp_super_t *sb; - int nb_dev; struct list_head disks; int sb_dirty; - mdu_param_t param; int ro; + + struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ unsigned long curr_resync; /* blocks scheduled */ unsigned long resync_mark; /* a recent timestamp */ unsigned long resync_mark_cnt;/* blocks written at resync_mark */ - char *name; + /* recovery_running is 0 for no recovery/resync, + * 1 for active recovery + * 2 for active resync + * -error for an error (e.g. -EINTR) + * it can only be set > 0 under reconfig_sem + */ int recovery_running; + int in_sync; /* know to not need resync */ struct semaphore reconfig_sem; - struct semaphore recovery_sem; - struct semaphore resync_sem; atomic_t active; + mdp_disk_t *spare; atomic_t recovery_active; /* blocks scheduled, but not written */ wait_queue_head_t recovery_wait; + request_queue_t queue; /* for plugging ... */ + struct list_head all_mddevs; }; struct mdk_personality_s { char *name; - int (*make_request)(mddev_t *mddev, int rw, struct bio *bio); + int (*make_request)(request_queue_t *q, struct bio *bio); int (*run)(mddev_t *mddev); int (*stop)(mddev_t *mddev); int (*status)(char *page, mddev_t *mddev); @@ -237,9 +226,6 @@ struct mdk_personality_s * SPARE_ACTIVE expects such a change) */ int (*diskop) (mddev_t *mddev, mdp_disk_t **descriptor, int state); - - int (*stop_resync)(mddev_t *mddev); - int (*restart_resync)(mddev_t *mddev); int (*sync_request)(mddev_t *mddev, sector_t sector_nr, int go_faster); }; @@ -279,13 +265,6 @@ extern mdp_disk_t *get_spare(mddev_t *mddev); #define ITERATE_RDEV(mddev,rdev,tmp) \ ITERATE_RDEV_GENERIC((mddev)->disks,same_set,rdev,tmp) -/* - * Same as above, but assumes that the device has rdev->desc_nr numbered - * from 0 to mddev->nb_dev, and iterates through rdevs in ascending order. - */ -#define ITERATE_RDEV_ORDERED(mddev,rdev,i) \ - for (i = 0; rdev = find_rdev_nr(mddev, i), i < mddev->nb_dev; i++) - /* * Iterates through all 'RAID managed disks' @@ -299,26 +278,6 @@ extern mdp_disk_t *get_spare(mddev_t *mddev); #define ITERATE_RDEV_PENDING(rdev,tmp) \ ITERATE_RDEV_GENERIC(pending_raid_disks,pending,rdev,tmp) -/* - * iterates through all used mddevs in the system. - */ -#define ITERATE_MDDEV(mddev,tmp) \ - \ - for (tmp = all_mddevs.next; \ - mddev = list_entry(tmp, mddev_t, all_mddevs), \ - tmp = tmp->next, tmp->prev != &all_mddevs \ - ; ) - -static inline int lock_mddev (mddev_t * mddev) -{ - return down_interruptible(&mddev->reconfig_sem); -} - -static inline void unlock_mddev (mddev_t * mddev) -{ - up(&mddev->reconfig_sem); -} - #define xchg_values(x,y) do { __typeof__(x) __tmp = x; \ x = y; y = __tmp; } while (0) diff --git a/include/linux/raid/raid1.h b/include/linux/raid/raid1.h index 3185c754e3fb3b3f9084f80639b1708568df73ea..f9f02ab19bd3a206c97a4623a21cd76d40ba388e 100644 --- a/include/linux/raid/raid1.h +++ b/include/linux/raid/raid1.h @@ -33,8 +33,7 @@ struct r1_private_data_s { int working_disks; int last_used; sector_t next_seq_sect; - mdk_thread_t *thread, *resync_thread; - int resync_mirrors; + mdk_thread_t *thread; mirror_info_t *spare; spinlock_t device_lock; diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 67f7bf47179882068c4ae70c4a0679e1ffa203b1..e14864259ffdbd094ff37debcc245b4065f1c03c 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -177,7 +177,7 @@ struct stripe_head { * is put on a "delayed" queue until there are no stripes currently * in a pre-read phase. Further, if the "delayed" queue is empty when * a stripe is put on it then we "plug" the queue and do not process it - * until an unplg call is made. (the tq_disk list is run). + * until an unplug call is made. (blk_run_queues is run). * * When preread is initiated on a stripe, we set PREREAD_ACTIVE and add * it to the count of prereading stripes. @@ -205,12 +205,11 @@ struct disk_info { struct raid5_private_data { struct stripe_head **stripe_hashtbl; mddev_t *mddev; - mdk_thread_t *thread, *resync_thread; + mdk_thread_t *thread; struct disk_info disks[MD_SB_DISKS]; struct disk_info *spare; int chunk_size, level, algorithm; int raid_disks, working_disks, failed_disks; - int resync_parity; int max_nr_stripes; struct list_head handle_list; /* stripes needing handling */ @@ -229,9 +228,6 @@ struct raid5_private_data { * waiting for 25% to be free */ spinlock_t device_lock; - - int plugged; - struct tq_struct plug_tq; }; typedef struct raid5_private_data raid5_conf_t;