Commit ef6f0bcd authored by Kai Germaschewski's avatar Kai Germaschewski

Merge tp1.ruhr-uni-bochum.de:/home/kai/kernel/v2.5/linux-2.5

into tp1.ruhr-uni-bochum.de:/home/kai/kernel/v2.5/linux-2.5.make
parents faf49a92 33e448ef
VERSION = 2 VERSION = 2
PATCHLEVEL = 5 PATCHLEVEL = 5
SUBLEVEL = 22 SUBLEVEL = 23
EXTRAVERSION = EXTRAVERSION =
# We are using a recursive build, so we need to do a little thinking # We are using a recursive build, so we need to do a little thinking
......
...@@ -221,8 +221,6 @@ void iounmap(void *addr) ...@@ -221,8 +221,6 @@ void iounmap(void *addr)
return; return;
} }
BUG_ON(p->phys_addr == 0); /* not allocated with ioremap */
vmfree_area_pages(VMALLOC_VMADDR(p->addr), p->size); vmfree_area_pages(VMALLOC_VMADDR(p->addr), p->size);
if (p->flags && p->phys_addr < virt_to_phys(high_memory)) { if (p->flags && p->phys_addr < virt_to_phys(high_memory)) {
change_page_attr(virt_to_page(__va(p->phys_addr)), change_page_attr(virt_to_page(__va(p->phys_addr)),
......
...@@ -161,6 +161,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn) ...@@ -161,6 +161,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
init_waitqueue_head(&q->queue_wait); init_waitqueue_head(&q->queue_wait);
INIT_LIST_HEAD(&q->plug_list);
} }
/** /**
......
...@@ -129,6 +129,8 @@ struct cardinfo { ...@@ -129,6 +129,8 @@ struct cardinfo {
*/ */
struct bio *bio, *currentbio, **biotail; struct bio *bio, *currentbio, **biotail;
request_queue_t queue;
struct mm_page { struct mm_page {
dma_addr_t page_dma; dma_addr_t page_dma;
struct mm_dma_desc *desc; struct mm_dma_desc *desc;
...@@ -142,8 +144,6 @@ struct cardinfo { ...@@ -142,8 +144,6 @@ struct cardinfo {
struct tasklet_struct tasklet; struct tasklet_struct tasklet;
unsigned int dma_status; unsigned int dma_status;
struct tq_struct plug_tq;
struct { struct {
int good; int good;
int warned; int warned;
...@@ -293,7 +293,7 @@ static void dump_dmastat(struct cardinfo *card, unsigned int dmastat) ...@@ -293,7 +293,7 @@ static void dump_dmastat(struct cardinfo *card, unsigned int dmastat)
* Whenever IO on the active page completes, the Ready page is activated * Whenever IO on the active page completes, the Ready page is activated
* and the ex-Active page is clean out and made Ready. * and the ex-Active page is clean out and made Ready.
* Otherwise the Ready page is only activated when it becomes full, or * Otherwise the Ready page is only activated when it becomes full, or
* when mm_unplug_device is called via run_task_queue(&tq_disk). * when mm_unplug_device is called via blk_run_queues().
* *
* If a request arrives while both pages a full, it is queued, and b_rdev is * If a request arrives while both pages a full, it is queued, and b_rdev is
* overloaded to record whether it was a read or a write. * overloaded to record whether it was a read or a write.
...@@ -341,8 +341,9 @@ static void mm_start_io(struct cardinfo *card) ...@@ -341,8 +341,9 @@ static void mm_start_io(struct cardinfo *card)
offset = ((char*)desc) - ((char*)page->desc); offset = ((char*)desc) - ((char*)page->desc);
writel(cpu_to_le32((page->page_dma+offset)&0xffffffff), writel(cpu_to_le32((page->page_dma+offset)&0xffffffff),
card->csr_remap + DMA_DESCRIPTOR_ADDR); card->csr_remap + DMA_DESCRIPTOR_ADDR);
/* if sizeof(dma_addr_t) == 32, this will generate a warning, sorry */ /* Force the value to u64 before shifting otherwise >> 32 is undefined C
writel(cpu_to_le32((page->page_dma)>>32), * and on some ports will do nothing ! */
writel(cpu_to_le32(((u64)page->page_dma)>>32),
card->csr_remap + DMA_DESCRIPTOR_ADDR + 4); card->csr_remap + DMA_DESCRIPTOR_ADDR + 4);
/* Go, go, go */ /* Go, go, go */
...@@ -384,10 +385,12 @@ static inline void reset_page(struct mm_page *page) ...@@ -384,10 +385,12 @@ static inline void reset_page(struct mm_page *page)
static void mm_unplug_device(void *data) static void mm_unplug_device(void *data)
{ {
struct cardinfo *card = data; request_queue_t *q = data;
struct cardinfo *card = q->queuedata;
spin_lock_bh(&card->lock); spin_lock_bh(&card->lock);
activate(card); if (blk_remove_plug(q))
activate(card);
spin_unlock_bh(&card->lock); spin_unlock_bh(&card->lock);
} }
...@@ -565,8 +568,7 @@ static void process_page(unsigned long data) ...@@ -565,8 +568,7 @@ static void process_page(unsigned long data)
*/ */
static int mm_make_request(request_queue_t *q, struct bio *bio) static int mm_make_request(request_queue_t *q, struct bio *bio)
{ {
struct cardinfo *card = &cards[DEVICE_NR( struct cardinfo *card = q->queuedata;
bio->bi_bdev->bd_dev)];
PRINTK("mm_make_request %ld %d\n", bh->b_rsector, bh->b_size); PRINTK("mm_make_request %ld %d\n", bh->b_rsector, bh->b_size);
/* set uptodate now, and clear it if there are any errors */ /* set uptodate now, and clear it if there are any errors */
...@@ -576,9 +578,9 @@ static int mm_make_request(request_queue_t *q, struct bio *bio) ...@@ -576,9 +578,9 @@ static int mm_make_request(request_queue_t *q, struct bio *bio)
*card->biotail = bio; *card->biotail = bio;
bio->bi_next = NULL; bio->bi_next = NULL;
card->biotail = &bio->bi_next; card->biotail = &bio->bi_next;
blk_plug_device(q);
spin_unlock_bh(&card->lock); spin_unlock_bh(&card->lock);
queue_task(&card->plug_tq, &tq_disk);
return 0; return 0;
} }
...@@ -1065,11 +1067,12 @@ static int __devinit mm_pci_probe(struct pci_dev *dev, const struct pci_device_i ...@@ -1065,11 +1067,12 @@ static int __devinit mm_pci_probe(struct pci_dev *dev, const struct pci_device_i
card->bio = NULL; card->bio = NULL;
card->biotail = &card->bio; card->biotail = &card->bio;
blk_queue_make_request(&card->queue, mm_make_request);
card->queue.queuedata = card;
card->queue.unplug_fn = mm_unplug_device;
tasklet_init(&card->tasklet, process_page, (unsigned long)card); tasklet_init(&card->tasklet, process_page, (unsigned long)card);
card->plug_tq.sync = 0;
card->plug_tq.routine = &mm_unplug_device;
card->plug_tq.data = card;
card->check_batteries = 0; card->check_batteries = 0;
mem_present = readb(card->csr_remap + MEMCTRLSTATUS_MEMORY); mem_present = readb(card->csr_remap + MEMCTRLSTATUS_MEMORY);
...@@ -1237,6 +1240,17 @@ static struct pci_driver mm_pci_driver = { ...@@ -1237,6 +1240,17 @@ static struct pci_driver mm_pci_driver = {
-- mm_init -- mm_init
----------------------------------------------------------------------------------- -----------------------------------------------------------------------------------
*/ */
static request_queue_t * mm_queue_proc(kdev_t dev)
{
int c = DEVICE_NR(kdev_val(dev));
if (c < MM_MAXCARDS)
return &cards[c].queue;
else
return BLK_DEFAULT_QUEUE(MAJOR_NR);
}
int __init mm_init(void) int __init mm_init(void)
{ {
int retval, i; int retval, i;
...@@ -1276,11 +1290,9 @@ int __init mm_init(void) ...@@ -1276,11 +1290,9 @@ int __init mm_init(void)
mm_gendisk.part = mm_partitions; mm_gendisk.part = mm_partitions;
mm_gendisk.nr_real = num_cards; mm_gendisk.nr_real = num_cards;
blk_dev[MAJOR_NR].queue = mm_queue_proc;
add_gendisk(&mm_gendisk); add_gendisk(&mm_gendisk);
blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR),
mm_make_request);
blk_size[MAJOR_NR] = mm_gendisk.sizes; blk_size[MAJOR_NR] = mm_gendisk.sizes;
for (i = 0; i < num_cards; i++) { for (i = 0; i < num_cards; i++) {
register_disk(&mm_gendisk, mk_kdev(MAJOR_NR, i<<MM_SHIFT), MM_SHIFT, register_disk(&mm_gendisk, mk_kdev(MAJOR_NR, i<<MM_SHIFT), MM_SHIFT,
......
/* /*
linear.c : Multiple Devices driver for Linux linear.c : Multiple Devices driver for Linux
Copyright (C) 1994-96 Marc ZYNGIER Copyright (C) 1994-96 Marc ZYNGIER
<zyngier@ufr-info-p7.ibp.fr> or <zyngier@ufr-info-p7.ibp.fr> or
<maz@gloups.fdn.fr> <maz@gloups.fdn.fr>
...@@ -33,39 +33,45 @@ static int linear_run (mddev_t *mddev) ...@@ -33,39 +33,45 @@ static int linear_run (mddev_t *mddev)
linear_conf_t *conf; linear_conf_t *conf;
struct linear_hash *table; struct linear_hash *table;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
int size, i, j, nb_zone; int size, i, nb_zone, cnt;
unsigned int curr_offset; unsigned int curr_offset;
struct list_head *tmp;
MOD_INC_USE_COUNT; MOD_INC_USE_COUNT;
conf = kmalloc (sizeof (*conf), GFP_KERNEL); conf = kmalloc (sizeof (*conf), GFP_KERNEL);
if (!conf) if (!conf)
goto out; goto out;
memset(conf, 0, sizeof(*conf));
mddev->private = conf; mddev->private = conf;
if (md_check_ordering(mddev)) {
printk("linear: disks are not ordered, aborting!\n");
goto out;
}
/* /*
* Find the smallest device. * Find the smallest device.
*/ */
conf->smallest = NULL; conf->smallest = NULL;
curr_offset = 0; cnt = 0;
ITERATE_RDEV_ORDERED(mddev,rdev,j) { ITERATE_RDEV(mddev,rdev,tmp) {
int j = rdev->sb->this_disk.raid_disk;
dev_info_t *disk = conf->disks + j; dev_info_t *disk = conf->disks + j;
if (j < 0 || j > mddev->sb->raid_disks || disk->bdev) {
printk("linear: disk numbering problem. Aborting!\n");
goto out;
}
disk->dev = rdev->dev; disk->dev = rdev->dev;
disk->bdev = rdev->bdev; disk->bdev = rdev->bdev;
atomic_inc(&rdev->bdev->bd_count); atomic_inc(&rdev->bdev->bd_count);
disk->size = rdev->size; disk->size = rdev->size;
disk->offset = curr_offset;
curr_offset += disk->size;
if (!conf->smallest || (disk->size < conf->smallest->size)) if (!conf->smallest || (disk->size < conf->smallest->size))
conf->smallest = disk; conf->smallest = disk;
cnt++;
}
if (cnt != mddev->sb->raid_disks) {
printk("linear: not enough drives present. Aborting!\n");
goto out;
} }
nb_zone = conf->nr_zones = nb_zone = conf->nr_zones =
...@@ -81,10 +87,13 @@ static int linear_run (mddev_t *mddev) ...@@ -81,10 +87,13 @@ static int linear_run (mddev_t *mddev)
* Here we generate the linear hash table * Here we generate the linear hash table
*/ */
table = conf->hash_table; table = conf->hash_table;
i = 0;
size = 0; size = 0;
for (j = 0; j < mddev->nb_dev; j++) { curr_offset = 0;
dev_info_t *disk = conf->disks + j; for (i = 0; i < cnt; i++) {
dev_info_t *disk = conf->disks + i;
disk->offset = curr_offset;
curr_offset += disk->size;
if (size < 0) { if (size < 0) {
table[-1].dev1 = disk; table[-1].dev1 = disk;
...@@ -130,12 +139,13 @@ static int linear_stop (mddev_t *mddev) ...@@ -130,12 +139,13 @@ static int linear_stop (mddev_t *mddev)
return 0; return 0;
} }
static int linear_make_request (mddev_t *mddev, int rw, struct bio *bio) static int linear_make_request (request_queue_t *q, struct bio *bio)
{ {
linear_conf_t *conf = mddev_to_conf(mddev); mddev_t *mddev = q->queuedata;
struct linear_hash *hash; linear_conf_t *conf = mddev_to_conf(mddev);
dev_info_t *tmp_dev; struct linear_hash *hash;
long block; dev_info_t *tmp_dev;
long block;
block = bio->bi_sector >> 1; block = bio->bi_sector >> 1;
hash = conf->hash_table + (block / conf->smallest->size); hash = conf->hash_table + (block / conf->smallest->size);
...@@ -186,7 +196,7 @@ static int linear_status (char *page, mddev_t *mddev) ...@@ -186,7 +196,7 @@ static int linear_status (char *page, mddev_t *mddev)
} }
sz += sprintf(page+sz, "\n"); sz += sprintf(page+sz, "\n");
#endif #endif
sz += sprintf(page+sz, " %dk rounding", mddev->param.chunk_size/1024); sz += sprintf(page+sz, " %dk rounding", mddev->sb->chunk_size/1024);
return sz; return sz;
} }
......
...@@ -107,7 +107,7 @@ static ctl_table raid_root_table[] = { ...@@ -107,7 +107,7 @@ static ctl_table raid_root_table[] = {
* subsystems want to have a pre-defined structure * subsystems want to have a pre-defined structure
*/ */
struct hd_struct md_hd_struct[MAX_MD_DEVS]; struct hd_struct md_hd_struct[MAX_MD_DEVS];
static int md_maxreadahead[MAX_MD_DEVS]; static void md_recover_arrays(void);
static mdk_thread_t *md_recovery_thread; static mdk_thread_t *md_recovery_thread;
int md_size[MAX_MD_DEVS]; int md_size[MAX_MD_DEVS];
...@@ -129,93 +129,111 @@ static struct gendisk md_gendisk= ...@@ -129,93 +129,111 @@ static struct gendisk md_gendisk=
/* /*
* Enables to iterate over all existing md arrays * Enables to iterate over all existing md arrays
* all_mddevs_lock protects this list as well as mddev_map.
*/ */
static LIST_HEAD(all_mddevs); static LIST_HEAD(all_mddevs);
static spinlock_t all_mddevs_lock = SPIN_LOCK_UNLOCKED;
/* /*
* The mapping between kdev and mddev is not necessary a simple * iterates through all used mddevs in the system.
* one! Eg. HSM uses several sub-devices to implement Logical * We take care to grab the all_mddevs_lock whenever navigating
* Volumes. All these sub-devices map to the same mddev. * the list, and to always hold a refcount when unlocked.
* Any code which breaks out of this loop while own
* a reference to the current mddev and must mddev_put it.
*/ */
dev_mapping_t mddev_map[MAX_MD_DEVS]; #define ITERATE_MDDEV(mddev,tmp) \
\
for (spin_lock(&all_mddevs_lock), \
(tmp = all_mddevs.next), \
(mddev = NULL); \
(void)(tmp != &all_mddevs && \
mddev_get(list_entry(tmp, mddev_t, all_mddevs))),\
spin_unlock(&all_mddevs_lock), \
(mddev ? mddev_put(mddev):(void)NULL), \
(mddev = list_entry(tmp, mddev_t, all_mddevs)), \
(tmp != &all_mddevs); \
spin_lock(&all_mddevs_lock), \
(tmp = tmp->next) \
)
static mddev_t *mddev_map[MAX_MD_DEVS];
static int md_fail_request (request_queue_t *q, struct bio *bio)
{
bio_io_error(bio);
return 0;
}
void add_mddev_mapping(mddev_t * mddev, kdev_t dev, void *data) static inline mddev_t *mddev_get(mddev_t *mddev)
{ {
unsigned int minor = minor(dev); atomic_inc(&mddev->active);
return mddev;
if (major(dev) != MD_MAJOR) {
MD_BUG();
return;
}
if (mddev_map[minor].mddev) {
MD_BUG();
return;
}
mddev_map[minor].mddev = mddev;
mddev_map[minor].data = data;
} }
void del_mddev_mapping(mddev_t * mddev, kdev_t dev) static void mddev_put(mddev_t *mddev)
{ {
unsigned int minor = minor(dev); if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
if (major(dev) != MD_MAJOR) {
MD_BUG();
return; return;
if (!mddev->sb && list_empty(&mddev->disks)) {
list_del(&mddev->all_mddevs);
mddev_map[mdidx(mddev)] = NULL;
kfree(mddev);
MOD_DEC_USE_COUNT;
} }
if (mddev_map[minor].mddev != mddev) { spin_unlock(&all_mddevs_lock);
MD_BUG();
return;
}
mddev_map[minor].mddev = NULL;
mddev_map[minor].data = NULL;
} }
static int md_make_request (request_queue_t *q, struct bio *bio) static mddev_t * mddev_find(int unit)
{ {
mddev_t *mddev = kdev_to_mddev(to_kdev_t(bio->bi_bdev->bd_dev)); mddev_t *mddev, *new = NULL;
if (mddev && mddev->pers) retry:
return mddev->pers->make_request(mddev, bio_rw(bio), bio); spin_lock(&all_mddevs_lock);
else { if (mddev_map[unit]) {
bio_io_error(bio); mddev = mddev_get(mddev_map[unit]);
return 0; spin_unlock(&all_mddevs_lock);
if (new)
kfree(new);
return mddev;
} }
} if (new) {
mddev_map[unit] = new;
static mddev_t * alloc_mddev(kdev_t dev) list_add(&new->all_mddevs, &all_mddevs);
{ spin_unlock(&all_mddevs_lock);
mddev_t *mddev; MOD_INC_USE_COUNT;
return new;
if (major(dev) != MD_MAJOR) {
MD_BUG();
return 0;
} }
mddev = (mddev_t *) kmalloc(sizeof(*mddev), GFP_KERNEL); spin_unlock(&all_mddevs_lock);
if (!mddev)
new = (mddev_t *) kmalloc(sizeof(*new), GFP_KERNEL);
if (!new)
return NULL; return NULL;
memset(mddev, 0, sizeof(*mddev)); memset(new, 0, sizeof(*new));
mddev->__minor = minor(dev); new->__minor = unit;
init_MUTEX(&mddev->reconfig_sem); init_MUTEX(&new->reconfig_sem);
init_MUTEX(&mddev->recovery_sem); INIT_LIST_HEAD(&new->disks);
init_MUTEX(&mddev->resync_sem); INIT_LIST_HEAD(&new->all_mddevs);
INIT_LIST_HEAD(&mddev->disks); atomic_set(&new->active, 1);
INIT_LIST_HEAD(&mddev->all_mddevs);
atomic_set(&mddev->active, 0);
/* goto retry;
* The 'base' mddev is the one with data NULL. }
* personalities can create additional mddevs
* if necessary.
*/
add_mddev_mapping(mddev, dev, 0);
list_add(&mddev->all_mddevs, &all_mddevs);
MOD_INC_USE_COUNT; static inline int mddev_lock(mddev_t * mddev)
{
return down_interruptible(&mddev->reconfig_sem);
}
return mddev; static inline int mddev_trylock(mddev_t * mddev)
{
return down_trylock(&mddev->reconfig_sem);
}
static inline void mddev_unlock(mddev_t * mddev)
{
up(&mddev->reconfig_sem);
} }
mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr) mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
...@@ -249,13 +267,12 @@ char * partition_name(kdev_t dev) ...@@ -249,13 +267,12 @@ char * partition_name(kdev_t dev)
struct gendisk *hd; struct gendisk *hd;
static char nomem [] = "<nomem>"; static char nomem [] = "<nomem>";
dev_name_t *dname; dev_name_t *dname;
struct list_head *tmp = device_names.next; struct list_head *tmp;
while (tmp != &device_names) { list_for_each(tmp, &device_names) {
dname = list_entry(tmp, dev_name_t, list); dname = list_entry(tmp, dev_name_t, list);
if (kdev_same(dname->dev, dev)) if (kdev_same(dname->dev, dev))
return dname->name; return dname->name;
tmp = tmp->next;
} }
dname = (dev_name_t *) kmalloc(sizeof(*dname), GFP_KERNEL); dname = (dev_name_t *) kmalloc(sizeof(*dname), GFP_KERNEL);
...@@ -275,7 +292,6 @@ char * partition_name(kdev_t dev) ...@@ -275,7 +292,6 @@ char * partition_name(kdev_t dev)
} }
dname->dev = dev; dname->dev = dev;
INIT_LIST_HEAD(&dname->list);
list_add(&dname->list, &device_names); list_add(&dname->list, &device_names);
return dname->name; return dname->name;
...@@ -326,69 +342,6 @@ static unsigned int zoned_raid_size(mddev_t *mddev) ...@@ -326,69 +342,6 @@ static unsigned int zoned_raid_size(mddev_t *mddev)
return 0; return 0;
} }
/*
* We check wether all devices are numbered from 0 to nb_dev-1. The
* order is guaranteed even after device name changes.
*
* Some personalities (raid0, linear) use this. Personalities that
* provide data have to be able to deal with loss of individual
* disks, so they do their checking themselves.
*/
int md_check_ordering(mddev_t *mddev)
{
int i, c;
mdk_rdev_t *rdev;
struct list_head *tmp;
/*
* First, all devices must be fully functional
*/
ITERATE_RDEV(mddev,rdev,tmp) {
if (rdev->faulty) {
printk(KERN_ERR "md: md%d's device %s faulty, aborting.\n",
mdidx(mddev), partition_name(rdev->dev));
goto abort;
}
}
c = 0;
ITERATE_RDEV(mddev,rdev,tmp) {
c++;
}
if (c != mddev->nb_dev) {
MD_BUG();
goto abort;
}
if (mddev->nb_dev != mddev->sb->raid_disks) {
printk(KERN_ERR "md: md%d, array needs %d disks, has %d, aborting.\n",
mdidx(mddev), mddev->sb->raid_disks, mddev->nb_dev);
goto abort;
}
/*
* Now the numbering check
*/
for (i = 0; i < mddev->nb_dev; i++) {
c = 0;
ITERATE_RDEV(mddev,rdev,tmp) {
if (rdev->desc_nr == i)
c++;
}
if (!c) {
printk(KERN_ERR "md: md%d, missing disk #%d, aborting.\n",
mdidx(mddev), i);
goto abort;
}
if (c > 1) {
printk(KERN_ERR "md: md%d, too many disks #%d, aborting.\n",
mdidx(mddev), i);
goto abort;
}
}
return 0;
abort:
return 1;
}
static void remove_descriptor(mdp_disk_t *disk, mdp_super_t *sb) static void remove_descriptor(mdp_disk_t *disk, mdp_super_t *sb)
{ {
if (disk_active(disk)) { if (disk_active(disk)) {
...@@ -618,8 +571,7 @@ static void bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) ...@@ -618,8 +571,7 @@ static void bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
list_add(&rdev->same_set, &mddev->disks); list_add(&rdev->same_set, &mddev->disks);
rdev->mddev = mddev; rdev->mddev = mddev;
mddev->nb_dev++; printk(KERN_INFO "md: bind<%s>\n", partition_name(rdev->dev));
printk(KERN_INFO "md: bind<%s,%d>\n", partition_name(rdev->dev), mddev->nb_dev);
} }
static void unbind_rdev_from_array(mdk_rdev_t * rdev) static void unbind_rdev_from_array(mdk_rdev_t * rdev)
...@@ -628,11 +580,8 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) ...@@ -628,11 +580,8 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
MD_BUG(); MD_BUG();
return; return;
} }
list_del(&rdev->same_set); list_del_init(&rdev->same_set);
INIT_LIST_HEAD(&rdev->same_set); printk(KERN_INFO "md: unbind<%s>\n", partition_name(rdev->dev));
rdev->mddev->nb_dev--;
printk(KERN_INFO "md: unbind<%s,%d>\n", partition_name(rdev->dev),
rdev->mddev->nb_dev);
rdev->mddev = NULL; rdev->mddev = NULL;
} }
...@@ -682,13 +631,11 @@ static void export_rdev(mdk_rdev_t * rdev) ...@@ -682,13 +631,11 @@ static void export_rdev(mdk_rdev_t * rdev)
MD_BUG(); MD_BUG();
unlock_rdev(rdev); unlock_rdev(rdev);
free_disk_sb(rdev); free_disk_sb(rdev);
list_del(&rdev->all); list_del_init(&rdev->all);
INIT_LIST_HEAD(&rdev->all); if (!list_empty(&rdev->pending)) {
if (rdev->pending.next != &rdev->pending) {
printk(KERN_INFO "md: (%s was pending)\n", printk(KERN_INFO "md: (%s was pending)\n",
partition_name(rdev->dev)); partition_name(rdev->dev));
list_del(&rdev->pending); list_del_init(&rdev->pending);
INIT_LIST_HEAD(&rdev->pending);
} }
#ifndef MODULE #ifndef MODULE
md_autodetect_dev(rdev->dev); md_autodetect_dev(rdev->dev);
...@@ -722,7 +669,7 @@ static void export_array(mddev_t *mddev) ...@@ -722,7 +669,7 @@ static void export_array(mddev_t *mddev)
} }
kick_rdev_from_array(rdev); kick_rdev_from_array(rdev);
} }
if (mddev->nb_dev) if (!list_empty(&mddev->disks))
MD_BUG(); MD_BUG();
} }
...@@ -736,21 +683,6 @@ static void free_mddev(mddev_t *mddev) ...@@ -736,21 +683,6 @@ static void free_mddev(mddev_t *mddev)
export_array(mddev); export_array(mddev);
md_size[mdidx(mddev)] = 0; md_size[mdidx(mddev)] = 0;
md_hd_struct[mdidx(mddev)].nr_sects = 0; md_hd_struct[mdidx(mddev)].nr_sects = 0;
/*
* Make sure nobody else is using this mddev
* (careful, we rely on the global kernel lock here)
*/
while (atomic_read(&mddev->resync_sem.count) != 1)
schedule();
while (atomic_read(&mddev->recovery_sem.count) != 1)
schedule();
del_mddev_mapping(mddev, mk_kdev(MD_MAJOR, mdidx(mddev)));
list_del(&mddev->all_mddevs);
INIT_LIST_HEAD(&mddev->all_mddevs);
kfree(mddev);
MOD_DEC_USE_COUNT;
} }
#undef BAD_CSUM #undef BAD_CSUM
...@@ -892,12 +824,10 @@ static mdk_rdev_t * find_rdev_all(kdev_t dev) ...@@ -892,12 +824,10 @@ static mdk_rdev_t * find_rdev_all(kdev_t dev)
struct list_head *tmp; struct list_head *tmp;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
tmp = all_raid_disks.next; list_for_each(tmp, &all_raid_disks) {
while (tmp != &all_raid_disks) {
rdev = list_entry(tmp, mdk_rdev_t, all); rdev = list_entry(tmp, mdk_rdev_t, all);
if (kdev_same(rdev->dev, dev)) if (kdev_same(rdev->dev, dev))
return rdev; return rdev;
tmp = tmp->next;
} }
return NULL; return NULL;
} }
...@@ -993,12 +923,13 @@ static int sync_sbs(mddev_t * mddev) ...@@ -993,12 +923,13 @@ static int sync_sbs(mddev_t * mddev)
return 0; return 0;
} }
int md_update_sb(mddev_t * mddev) void __md_update_sb(mddev_t * mddev)
{ {
int err, count = 100; int err, count = 100;
struct list_head *tmp; struct list_head *tmp;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
mddev->sb_dirty = 0;
repeat: repeat:
mddev->sb->utime = CURRENT_TIME; mddev->sb->utime = CURRENT_TIME;
if (!(++mddev->sb->events_lo)) if (!(++mddev->sb->events_lo))
...@@ -1020,7 +951,7 @@ int md_update_sb(mddev_t * mddev) ...@@ -1020,7 +951,7 @@ int md_update_sb(mddev_t * mddev)
* nonpersistent superblocks * nonpersistent superblocks
*/ */
if (mddev->sb->not_persistent) if (mddev->sb->not_persistent)
return 0; return;
printk(KERN_INFO "md: updating md%d RAID superblock on device\n", printk(KERN_INFO "md: updating md%d RAID superblock on device\n",
mdidx(mddev)); mdidx(mddev));
...@@ -1048,9 +979,18 @@ int md_update_sb(mddev_t * mddev) ...@@ -1048,9 +979,18 @@ int md_update_sb(mddev_t * mddev)
} }
printk(KERN_ERR "md: excessive errors occurred during superblock update, exiting\n"); printk(KERN_ERR "md: excessive errors occurred during superblock update, exiting\n");
} }
return 0;
} }
void md_update_sb(mddev_t *mddev)
{
if (mddev_lock(mddev))
return;
if (mddev->sb_dirty)
__md_update_sb(mddev);
mddev_unlock(mddev);
}
/* /*
* Import a device. If 'on_disk', then sanity check the superblock * Import a device. If 'on_disk', then sanity check the superblock
* *
...@@ -1122,6 +1062,7 @@ static int md_import_device(kdev_t newdev, int on_disk) ...@@ -1122,6 +1062,7 @@ static int md_import_device(kdev_t newdev, int on_disk)
} }
list_add(&rdev->all, &all_raid_disks); list_add(&rdev->all, &all_raid_disks);
INIT_LIST_HEAD(&rdev->pending); INIT_LIST_HEAD(&rdev->pending);
INIT_LIST_HEAD(&rdev->same_set);
if (rdev->faulty && rdev->sb) if (rdev->faulty && rdev->sb)
free_disk_sb(rdev); free_disk_sb(rdev);
...@@ -1574,7 +1515,6 @@ static int device_size_calculation(mddev_t * mddev) ...@@ -1574,7 +1515,6 @@ static int device_size_calculation(mddev_t * mddev)
if (sb->level == -3) if (sb->level == -3)
readahead = 0; readahead = 0;
} }
md_maxreadahead[mdidx(mddev)] = readahead;
printk(KERN_INFO "md%d: max total readahead window set to %ldk\n", printk(KERN_INFO "md%d: max total readahead window set to %ldk\n",
mdidx(mddev), readahead*(PAGE_SIZE/1024)); mdidx(mddev), readahead*(PAGE_SIZE/1024));
...@@ -1605,7 +1545,7 @@ static int do_md_run(mddev_t * mddev) ...@@ -1605,7 +1545,7 @@ static int do_md_run(mddev_t * mddev)
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
if (!mddev->nb_dev) { if (list_empty(&mddev->disks)) {
MD_BUG(); MD_BUG();
return -EINVAL; return -EINVAL;
} }
...@@ -1630,9 +1570,6 @@ static int do_md_run(mddev_t * mddev) ...@@ -1630,9 +1570,6 @@ static int do_md_run(mddev_t * mddev)
chunk_size = mddev->sb->chunk_size; chunk_size = mddev->sb->chunk_size;
pnum = level_to_pers(mddev->sb->level); pnum = level_to_pers(mddev->sb->level);
mddev->param.chunk_size = chunk_size;
mddev->param.personality = pnum;
if ((pnum != MULTIPATH) && (pnum != RAID1)) { if ((pnum != MULTIPATH) && (pnum != RAID1)) {
if (!chunk_size) { if (!chunk_size) {
/* /*
...@@ -1712,6 +1649,9 @@ static int do_md_run(mddev_t * mddev) ...@@ -1712,6 +1649,9 @@ static int do_md_run(mddev_t * mddev)
} }
mddev->pers = pers[pnum]; mddev->pers = pers[pnum];
blk_queue_make_request(&mddev->queue, mddev->pers->make_request);
mddev->queue.queuedata = mddev;
err = mddev->pers->run(mddev); err = mddev->pers->run(mddev);
if (err) { if (err) {
printk(KERN_ERR "md: pers->run() failed ...\n"); printk(KERN_ERR "md: pers->run() failed ...\n");
...@@ -1719,9 +1659,15 @@ static int do_md_run(mddev_t * mddev) ...@@ -1719,9 +1659,15 @@ static int do_md_run(mddev_t * mddev)
return -EINVAL; return -EINVAL;
} }
mddev->sb->state &= ~(1 << MD_SB_CLEAN); mddev->in_sync = (mddev->sb->state & (1<<MD_SB_CLEAN));
md_update_sb(mddev); /* if personality doesn't have "sync_request", then
* a dirty array doesn't mean anything
*/
if (mddev->pers->sync_request)
mddev->sb->state &= ~(1 << MD_SB_CLEAN);
__md_update_sb(mddev);
md_recover_arrays();
/* /*
* md_size has units of 1K blocks, which are * md_size has units of 1K blocks, which are
* twice as large as sectors. * twice as large as sectors.
...@@ -1736,21 +1682,21 @@ static int do_md_run(mddev_t * mddev) ...@@ -1736,21 +1682,21 @@ static int do_md_run(mddev_t * mddev)
#undef TOO_BIG_CHUNKSIZE #undef TOO_BIG_CHUNKSIZE
#undef BAD_CHUNKSIZE #undef BAD_CHUNKSIZE
#define OUT(x) do { err = (x); goto out; } while (0)
static int restart_array(mddev_t *mddev) static int restart_array(mddev_t *mddev)
{ {
int err = 0; int err;
/* /*
* Complain if it has no devices * Complain if it has no devices
*/ */
if (!mddev->nb_dev) err = -ENXIO;
OUT(-ENXIO); if (list_empty(&mddev->disks))
goto out;
if (mddev->pers) { if (mddev->pers) {
err = -EBUSY;
if (!mddev->ro) if (!mddev->ro)
OUT(-EBUSY); goto out;
mddev->ro = 0; mddev->ro = 0;
set_device_ro(mddev_to_kdev(mddev), 0); set_device_ro(mddev_to_kdev(mddev), 0);
...@@ -1761,8 +1707,7 @@ static int restart_array(mddev_t *mddev) ...@@ -1761,8 +1707,7 @@ static int restart_array(mddev_t *mddev)
* Kick recovery or resync if necessary * Kick recovery or resync if necessary
*/ */
md_recover_arrays(); md_recover_arrays();
if (mddev->pers->restart_resync) err = 0;
mddev->pers->restart_resync(mddev);
} else { } else {
printk(KERN_ERR "md: md%d has no personality assigned.\n", printk(KERN_ERR "md: md%d has no personality assigned.\n",
mdidx(mddev)); mdidx(mddev));
...@@ -1780,49 +1725,43 @@ static int restart_array(mddev_t *mddev) ...@@ -1780,49 +1725,43 @@ static int restart_array(mddev_t *mddev)
static int do_md_stop(mddev_t * mddev, int ro) static int do_md_stop(mddev_t * mddev, int ro)
{ {
int err = 0, resync_interrupted = 0; int err = 0;
kdev_t dev = mddev_to_kdev(mddev); kdev_t dev = mddev_to_kdev(mddev);
if (atomic_read(&mddev->active)>1) { if (atomic_read(&mddev->active)>1) {
printk(STILL_IN_USE, mdidx(mddev)); printk(STILL_IN_USE, mdidx(mddev));
OUT(-EBUSY); err = -EBUSY;
goto out;
} }
if (mddev->pers) { if (mddev->pers) {
/* if (mddev->sync_thread) {
* It is safe to call stop here, it only frees private if (mddev->recovery_running > 0)
* data. Also, it tells us if a device is unstoppable mddev->recovery_running = -EINTR;
* (eg. resyncing is in progress) md_unregister_thread(mddev->sync_thread);
*/ mddev->sync_thread = NULL;
if (mddev->pers->stop_resync) if (mddev->spare) {
if (mddev->pers->stop_resync(mddev)) mddev->pers->diskop(mddev, &mddev->spare,
resync_interrupted = 1; DISKOP_SPARE_INACTIVE);
mddev->spare = NULL;
if (mddev->recovery_running) }
md_interrupt_thread(md_recovery_thread); }
/*
* This synchronizes with signal delivery to the
* resync or reconstruction thread. It also nicely
* hangs the process if some reconstruction has not
* finished.
*/
down(&mddev->recovery_sem);
up(&mddev->recovery_sem);
invalidate_device(dev, 1); invalidate_device(dev, 1);
if (ro) { if (ro) {
err = -ENXIO;
if (mddev->ro) if (mddev->ro)
OUT(-ENXIO); goto out;
mddev->ro = 1; mddev->ro = 1;
} else { } else {
if (mddev->ro) if (mddev->ro)
set_device_ro(dev, 0); set_device_ro(dev, 0);
if (mddev->pers->stop(mddev)) { if (mddev->pers->stop(mddev)) {
err = -EBUSY;
if (mddev->ro) if (mddev->ro)
set_device_ro(dev, 1); set_device_ro(dev, 1);
OUT(-EBUSY); goto out;
} }
if (mddev->ro) if (mddev->ro)
mddev->ro = 0; mddev->ro = 0;
...@@ -1832,11 +1771,11 @@ static int do_md_stop(mddev_t * mddev, int ro) ...@@ -1832,11 +1771,11 @@ static int do_md_stop(mddev_t * mddev, int ro)
* mark it clean only if there was no resync * mark it clean only if there was no resync
* interrupted. * interrupted.
*/ */
if (!mddev->recovery_running && !resync_interrupted) { if (mddev->in_sync) {
printk(KERN_INFO "md: marking sb clean...\n"); printk(KERN_INFO "md: marking sb clean...\n");
mddev->sb->state |= 1 << MD_SB_CLEAN; mddev->sb->state |= 1 << MD_SB_CLEAN;
} }
md_update_sb(mddev); __md_update_sb(mddev);
} }
if (ro) if (ro)
set_device_ro(dev, 1); set_device_ro(dev, 1);
...@@ -1848,15 +1787,13 @@ static int do_md_stop(mddev_t * mddev, int ro) ...@@ -1848,15 +1787,13 @@ static int do_md_stop(mddev_t * mddev, int ro)
if (!ro) { if (!ro) {
printk(KERN_INFO "md: md%d stopped.\n", mdidx(mddev)); printk(KERN_INFO "md: md%d stopped.\n", mdidx(mddev));
free_mddev(mddev); free_mddev(mddev);
} else } else
printk(KERN_INFO "md: md%d switched to read-only mode.\n", mdidx(mddev)); printk(KERN_INFO "md: md%d switched to read-only mode.\n", mdidx(mddev));
err = 0;
out: out:
return err; return err;
} }
#undef OUT
/* /*
* We have to safely support old arrays too. * We have to safely support old arrays too.
*/ */
...@@ -1877,7 +1814,7 @@ static void autorun_array(mddev_t *mddev) ...@@ -1877,7 +1814,7 @@ static void autorun_array(mddev_t *mddev)
struct list_head *tmp; struct list_head *tmp;
int err; int err;
if (mddev->disks.prev == &mddev->disks) { if (list_empty(&mddev->disks)) {
MD_BUG(); MD_BUG();
return; return;
} }
...@@ -1912,17 +1849,15 @@ static void autorun_array(mddev_t *mddev) ...@@ -1912,17 +1849,15 @@ static void autorun_array(mddev_t *mddev)
* *
* If "unit" is allocated, then bump its reference count * If "unit" is allocated, then bump its reference count
*/ */
static void autorun_devices(kdev_t countdev) static void autorun_devices(void)
{ {
struct list_head candidates; struct list_head candidates;
struct list_head *tmp; struct list_head *tmp;
mdk_rdev_t *rdev0, *rdev; mdk_rdev_t *rdev0, *rdev;
mddev_t *mddev; mddev_t *mddev;
kdev_t md_kdev;
printk(KERN_INFO "md: autorun ...\n"); printk(KERN_INFO "md: autorun ...\n");
while (pending_raid_disks.next != &pending_raid_disks) { while (!list_empty(&pending_raid_disks)) {
rdev0 = list_entry(pending_raid_disks.next, rdev0 = list_entry(pending_raid_disks.next,
mdk_rdev_t, pending); mdk_rdev_t, pending);
...@@ -1946,29 +1881,34 @@ static void autorun_devices(kdev_t countdev) ...@@ -1946,29 +1881,34 @@ static void autorun_devices(kdev_t countdev)
* mostly sane superblocks. It's time to allocate the * mostly sane superblocks. It's time to allocate the
* mddev. * mddev.
*/ */
md_kdev = mk_kdev(MD_MAJOR, rdev0->sb->md_minor);
mddev = kdev_to_mddev(md_kdev); mddev = mddev_find(rdev0->sb->md_minor);
if (mddev) {
printk(KERN_WARNING "md: md%d already running, cannot run %s\n",
mdidx(mddev), partition_name(rdev0->dev));
ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp)
export_rdev(rdev);
continue;
}
mddev = alloc_mddev(md_kdev);
if (!mddev) { if (!mddev) {
printk(KERN_ERR "md: cannot allocate memory for md drive.\n"); printk(KERN_ERR "md: cannot allocate memory for md drive.\n");
break; break;
} }
if (kdev_same(md_kdev, countdev)) if (mddev_lock(mddev))
atomic_inc(&mddev->active); printk(KERN_WARNING "md: md%d locked, cannot run\n",
printk(KERN_INFO "md: created md%d\n", mdidx(mddev)); mdidx(mddev));
ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) { else if (mddev->sb || !list_empty(&mddev->disks)) {
bind_rdev_to_array(rdev, mddev); printk(KERN_WARNING "md: md%d already running, cannot run %s\n",
list_del(&rdev->pending); mdidx(mddev), partition_name(rdev0->dev));
INIT_LIST_HEAD(&rdev->pending); mddev_unlock(mddev);
} else {
printk(KERN_INFO "md: created md%d\n", mdidx(mddev));
ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) {
bind_rdev_to_array(rdev, mddev);
list_del_init(&rdev->pending);
}
autorun_array(mddev);
mddev_unlock(mddev);
} }
autorun_array(mddev); /* on success, candidates will be empty, on error
* it wont...
*/
ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp)
export_rdev(rdev);
mddev_put(mddev);
} }
printk(KERN_INFO "md: ... autorun DONE.\n"); printk(KERN_INFO "md: ... autorun DONE.\n");
} }
...@@ -2005,7 +1945,7 @@ static void autorun_devices(kdev_t countdev) ...@@ -2005,7 +1945,7 @@ static void autorun_devices(kdev_t countdev)
#define AUTORUNNING KERN_INFO \ #define AUTORUNNING KERN_INFO \
"md: auto-running md%d.\n" "md: auto-running md%d.\n"
static int autostart_array(kdev_t startdev, kdev_t countdev) static int autostart_array(kdev_t startdev)
{ {
int err = -EINVAL, i; int err = -EINVAL, i;
mdp_super_t *sb = NULL; mdp_super_t *sb = NULL;
...@@ -2065,7 +2005,7 @@ static int autostart_array(kdev_t startdev, kdev_t countdev) ...@@ -2065,7 +2005,7 @@ static int autostart_array(kdev_t startdev, kdev_t countdev)
/* /*
* possibly return codes * possibly return codes
*/ */
autorun_devices(countdev); autorun_devices();
return 0; return 0;
abort: abort:
...@@ -2191,7 +2131,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) ...@@ -2191,7 +2131,7 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
MD_BUG(); MD_BUG();
return -EINVAL; return -EINVAL;
} }
if (mddev->nb_dev) { if (!list_empty(&mddev->disks)) {
mdk_rdev_t *rdev0 = list_entry(mddev->disks.next, mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
mdk_rdev_t, same_set); mdk_rdev_t, same_set);
if (!uuid_equal(rdev0, rdev)) { if (!uuid_equal(rdev0, rdev)) {
...@@ -2346,8 +2286,7 @@ static int hot_remove_disk(mddev_t * mddev, kdev_t dev) ...@@ -2346,8 +2286,7 @@ static int hot_remove_disk(mddev_t * mddev, kdev_t dev)
remove_descriptor(disk, mddev->sb); remove_descriptor(disk, mddev->sb);
kick_rdev_from_array(rdev); kick_rdev_from_array(rdev);
mddev->sb_dirty = 1; __md_update_sb(mddev);
md_update_sb(mddev);
return 0; return 0;
busy: busy:
...@@ -2458,9 +2397,7 @@ static int hot_add_disk(mddev_t * mddev, kdev_t dev) ...@@ -2458,9 +2397,7 @@ static int hot_add_disk(mddev_t * mddev, kdev_t dev)
mddev->sb->spare_disks++; mddev->sb->spare_disks++;
mddev->sb->working_disks++; mddev->sb->working_disks++;
mddev->sb_dirty = 1; __md_update_sb(mddev);
md_update_sb(mddev);
/* /*
* Kick recovery, maybe this spare has to be added to the * Kick recovery, maybe this spare has to be added to the
...@@ -2520,36 +2457,6 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) ...@@ -2520,36 +2457,6 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
} }
#undef SET_SB #undef SET_SB
static int set_disk_info(mddev_t * mddev, void * arg)
{
printk(KERN_INFO "md: not yet");
return -EINVAL;
}
static int clear_array(mddev_t * mddev)
{
printk(KERN_INFO "md: not yet");
return -EINVAL;
}
static int write_raid_info(mddev_t * mddev)
{
printk(KERN_INFO "md: not yet");
return -EINVAL;
}
static int protect_array(mddev_t * mddev)
{
printk(KERN_INFO "md: not yet");
return -EINVAL;
}
static int unprotect_array(mddev_t * mddev)
{
printk(KERN_INFO "md: not yet");
return -EINVAL;
}
static int set_disk_faulty(mddev_t *mddev, kdev_t dev) static int set_disk_faulty(mddev_t *mddev, kdev_t dev)
{ {
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
...@@ -2595,7 +2502,7 @@ static int md_ioctl(struct inode *inode, struct file *file, ...@@ -2595,7 +2502,7 @@ static int md_ioctl(struct inode *inode, struct file *file,
case PRINT_RAID_DEBUG: case PRINT_RAID_DEBUG:
err = 0; err = 0;
md_print_devices(); md_print_devices();
goto done_unlock; goto done;
#ifndef MODULE #ifndef MODULE
case RAID_AUTORUN: case RAID_AUTORUN:
...@@ -2632,40 +2539,30 @@ static int md_ioctl(struct inode *inode, struct file *file, ...@@ -2632,40 +2539,30 @@ static int md_ioctl(struct inode *inode, struct file *file,
* Commands creating/starting a new array: * Commands creating/starting a new array:
*/ */
mddev = kdev_to_mddev(dev); mddev = inode->i_bdev->bd_inode->u.generic_ip;
switch (cmd) if (!mddev) {
{ BUG();
case SET_ARRAY_INFO: goto abort;
case START_ARRAY:
if (mddev) {
printk(KERN_WARNING "md: array md%d already exists!\n",
mdidx(mddev));
err = -EEXIST;
goto abort;
}
default:;
} }
err = mddev_lock(mddev);
if (err) {
printk(KERN_INFO "md: ioctl lock interrupted, reason %d, cmd %d\n",
err, cmd);
goto abort;
}
switch (cmd) switch (cmd)
{ {
case SET_ARRAY_INFO: case SET_ARRAY_INFO:
mddev = alloc_mddev(dev);
if (!mddev) {
err = -ENOMEM;
goto abort;
}
atomic_inc(&mddev->active);
/* if (!list_empty(&mddev->disks)) {
* alloc_mddev() should possibly self-lock. printk(KERN_WARNING "md: array md%d already has disks!\n",
*/ mdidx(mddev));
err = lock_mddev(mddev); err = -EBUSY;
if (err) { goto abort_unlock;
printk(KERN_WARNING "md: ioctl, reason %d, cmd %d\n",
err, cmd);
goto abort;
} }
if (mddev->sb) { if (mddev->sb) {
printk(KERN_WARNING "md: array md%d already has a superblock!\n", printk(KERN_WARNING "md: array md%d already has a superblock!\n",
mdidx(mddev)); mdidx(mddev));
...@@ -2690,13 +2587,13 @@ static int md_ioctl(struct inode *inode, struct file *file, ...@@ -2690,13 +2587,13 @@ static int md_ioctl(struct inode *inode, struct file *file,
/* /*
* possibly make it lock the array ... * possibly make it lock the array ...
*/ */
err = autostart_array(val_to_kdev(arg), dev); err = autostart_array(val_to_kdev(arg));
if (err) { if (err) {
printk(KERN_WARNING "md: autostart %s failed!\n", printk(KERN_WARNING "md: autostart %s failed!\n",
partition_name(val_to_kdev(arg))); partition_name(val_to_kdev(arg)));
goto abort; goto abort_unlock;
} }
goto done; goto done_unlock;
default:; default:;
} }
...@@ -2704,16 +2601,6 @@ static int md_ioctl(struct inode *inode, struct file *file, ...@@ -2704,16 +2601,6 @@ static int md_ioctl(struct inode *inode, struct file *file,
/* /*
* Commands querying/configuring an existing array: * Commands querying/configuring an existing array:
*/ */
if (!mddev) {
err = -ENODEV;
goto abort;
}
err = lock_mddev(mddev);
if (err) {
printk(KERN_INFO "md: ioctl lock interrupted, reason %d, cmd %d\n",err, cmd);
goto abort;
}
/* if we don't have a superblock yet, only ADD_NEW_DISK or STOP_ARRAY is allowed */ /* if we don't have a superblock yet, only ADD_NEW_DISK or STOP_ARRAY is allowed */
if (!mddev->sb && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY && cmd != RUN_ARRAY) { if (!mddev->sb && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY && cmd != RUN_ARRAY) {
err = -ENODEV; err = -ENODEV;
...@@ -2738,8 +2625,7 @@ static int md_ioctl(struct inode *inode, struct file *file, ...@@ -2738,8 +2625,7 @@ static int md_ioctl(struct inode *inode, struct file *file,
goto done_unlock; goto done_unlock;
case STOP_ARRAY: case STOP_ARRAY:
if (!(err = do_md_stop (mddev, 0))) err = do_md_stop (mddev, 0);
mddev = NULL;
goto done_unlock; goto done_unlock;
case STOP_ARRAY_RO: case STOP_ARRAY_RO:
...@@ -2784,10 +2670,6 @@ static int md_ioctl(struct inode *inode, struct file *file, ...@@ -2784,10 +2670,6 @@ static int md_ioctl(struct inode *inode, struct file *file,
switch (cmd) switch (cmd)
{ {
case CLEAR_ARRAY:
err = clear_array(mddev);
goto done_unlock;
case ADD_NEW_DISK: case ADD_NEW_DISK:
{ {
mdu_disk_info_t info; mdu_disk_info_t info;
...@@ -2808,35 +2690,12 @@ static int md_ioctl(struct inode *inode, struct file *file, ...@@ -2808,35 +2690,12 @@ static int md_ioctl(struct inode *inode, struct file *file,
err = hot_add_disk(mddev, val_to_kdev(arg)); err = hot_add_disk(mddev, val_to_kdev(arg));
goto done_unlock; goto done_unlock;
case SET_DISK_INFO:
err = set_disk_info(mddev, (void *)arg);
goto done_unlock;
case WRITE_RAID_INFO:
err = write_raid_info(mddev);
goto done_unlock;
case UNPROTECT_ARRAY:
err = unprotect_array(mddev);
goto done_unlock;
case PROTECT_ARRAY:
err = protect_array(mddev);
goto done_unlock;
case SET_DISK_FAULTY: case SET_DISK_FAULTY:
err = set_disk_faulty(mddev, val_to_kdev(arg)); err = set_disk_faulty(mddev, val_to_kdev(arg));
goto done_unlock; goto done_unlock;
case RUN_ARRAY: case RUN_ARRAY:
{ {
/* The data is never used....
mdu_param_t param;
err = copy_from_user(&param, (mdu_param_t *)arg,
sizeof(param));
if (err)
goto abort_unlock;
*/
err = do_md_run (mddev); err = do_md_run (mddev);
/* /*
* we have to clean up the mess if * we have to clean up the mess if
...@@ -2845,8 +2704,7 @@ static int md_ioctl(struct inode *inode, struct file *file, ...@@ -2845,8 +2704,7 @@ static int md_ioctl(struct inode *inode, struct file *file,
*/ */
if (err) { if (err) {
mddev->sb_dirty = 0; mddev->sb_dirty = 0;
if (!do_md_stop (mddev, 0)) do_md_stop (mddev, 0);
mddev = NULL;
} }
goto done_unlock; goto done_unlock;
} }
...@@ -2861,8 +2719,7 @@ static int md_ioctl(struct inode *inode, struct file *file, ...@@ -2861,8 +2719,7 @@ static int md_ioctl(struct inode *inode, struct file *file,
done_unlock: done_unlock:
abort_unlock: abort_unlock:
if (mddev) mddev_unlock(mddev);
unlock_mddev(mddev);
return err; return err;
done: done:
...@@ -2875,19 +2732,34 @@ static int md_ioctl(struct inode *inode, struct file *file, ...@@ -2875,19 +2732,34 @@ static int md_ioctl(struct inode *inode, struct file *file,
static int md_open(struct inode *inode, struct file *file) static int md_open(struct inode *inode, struct file *file)
{ {
/* /*
* Always succeed, but increment the usage count * Succeed if we can find or allocate a mddev structure.
*/ */
mddev_t *mddev = kdev_to_mddev(inode->i_rdev); mddev_t *mddev = mddev_find(minor(inode->i_rdev));
if (mddev) int err = -ENOMEM;
atomic_inc(&mddev->active);
return (0); if (!mddev)
goto out;
if ((err = mddev_lock(mddev)))
goto put;
err = 0;
mddev_unlock(mddev);
inode->i_bdev->bd_inode->u.generic_ip = mddev_get(mddev);
put:
mddev_put(mddev);
out:
return err;
} }
static int md_release(struct inode *inode, struct file * file) static int md_release(struct inode *inode, struct file * file)
{ {
mddev_t *mddev = kdev_to_mddev(inode->i_rdev); mddev_t *mddev = inode->i_bdev->bd_inode->u.generic_ip;
if (mddev)
atomic_dec(&mddev->active); if (!mddev)
BUG();
mddev_put(mddev);
return 0; return 0;
} }
...@@ -2918,6 +2790,7 @@ int md_thread(void * arg) ...@@ -2918,6 +2790,7 @@ int md_thread(void * arg)
*/ */
daemonize(); daemonize();
reparent_to_init();
sprintf(current->comm, thread->name); sprintf(current->comm, thread->name);
current->exit_signal = SIGCHLD; current->exit_signal = SIGCHLD;
...@@ -2941,17 +2814,10 @@ int md_thread(void * arg) ...@@ -2941,17 +2814,10 @@ int md_thread(void * arg)
complete(thread->event); complete(thread->event);
while (thread->run) { while (thread->run) {
void (*run)(void *data); void (*run)(void *data);
DECLARE_WAITQUEUE(wait, current);
add_wait_queue(&thread->wqueue, &wait); wait_event_interruptible(thread->wqueue,
set_task_state(current, TASK_INTERRUPTIBLE); test_bit(THREAD_WAKEUP, &thread->flags));
if (!test_bit(THREAD_WAKEUP, &thread->flags)) {
dprintk("md: thread %p went to sleep.\n", thread);
schedule();
dprintk("md: thread %p woke up.\n", thread);
}
current->state = TASK_RUNNING;
remove_wait_queue(&thread->wqueue, &wait);
clear_bit(THREAD_WAKEUP, &thread->flags); clear_bit(THREAD_WAKEUP, &thread->flags);
run = thread->run; run = thread->run;
...@@ -3026,7 +2892,7 @@ void md_unregister_thread(mdk_thread_t *thread) ...@@ -3026,7 +2892,7 @@ void md_unregister_thread(mdk_thread_t *thread)
kfree(thread); kfree(thread);
} }
void md_recover_arrays(void) static void md_recover_arrays(void)
{ {
if (!md_recovery_thread) { if (!md_recovery_thread) {
MD_BUG(); MD_BUG();
...@@ -3042,7 +2908,7 @@ int md_error(mddev_t *mddev, struct block_device *bdev) ...@@ -3042,7 +2908,7 @@ int md_error(mddev_t *mddev, struct block_device *bdev)
kdev_t rdev = to_kdev_t(bdev->bd_dev); kdev_t rdev = to_kdev_t(bdev->bd_dev);
dprintk("md_error dev:(%d:%d), rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", dprintk("md_error dev:(%d:%d), rdev:(%d:%d), (caller: %p,%p,%p,%p).\n",
major(dev),minor(dev),major(rdev),minor(rdev), MD_MAJOR,mdidx(mddev),major(rdev),minor(rdev),
__builtin_return_address(0),__builtin_return_address(1), __builtin_return_address(0),__builtin_return_address(1),
__builtin_return_address(2),__builtin_return_address(3)); __builtin_return_address(2),__builtin_return_address(3));
...@@ -3055,17 +2921,14 @@ int md_error(mddev_t *mddev, struct block_device *bdev) ...@@ -3055,17 +2921,14 @@ int md_error(mddev_t *mddev, struct block_device *bdev)
return 0; return 0;
if (!mddev->pers->error_handler if (!mddev->pers->error_handler
|| mddev->pers->error_handler(mddev,rdev) <= 0) { || mddev->pers->error_handler(mddev,rdev) <= 0) {
free_disk_sb(rrdev);
rrdev->faulty = 1; rrdev->faulty = 1;
} else } else
return 1; return 1;
/* /*
* if recovery was running, stop it now. * if recovery was running, stop it now.
*/ */
if (mddev->pers->stop_resync) if (mddev->recovery_running)
mddev->pers->stop_resync(mddev); mddev->recovery_running = -EIO;
if (mddev->recovery_running)
md_interrupt_thread(md_recovery_thread);
md_recover_arrays(); md_recover_arrays();
return 0; return 0;
...@@ -3080,7 +2943,7 @@ static int status_unused(char * page) ...@@ -3080,7 +2943,7 @@ static int status_unused(char * page)
sz += sprintf(page + sz, "unused devices: "); sz += sprintf(page + sz, "unused devices: ");
ITERATE_RDEV_ALL(rdev,tmp) { ITERATE_RDEV_ALL(rdev,tmp) {
if (!rdev->same_set.next && !rdev->same_set.prev) { if (list_empty(&rdev->same_set)) {
/* /*
* The device is not yet used by any array. * The device is not yet used by any array.
*/ */
...@@ -3123,18 +2986,9 @@ static int status_resync(char * page, mddev_t * mddev) ...@@ -3123,18 +2986,9 @@ static int status_resync(char * page, mddev_t * mddev)
sz += sprintf(page + sz, "."); sz += sprintf(page + sz, ".");
sz += sprintf(page + sz, "] "); sz += sprintf(page + sz, "] ");
} }
if (!mddev->recovery_running) sz += sprintf(page + sz, " %s =%3lu.%lu%% (%lu/%lu)",
/* (mddev->spare ? "recovery" : "resync"),
* true resync res/10, res % 10, resync, max_blocks);
*/
sz += sprintf(page + sz, " resync =%3lu.%lu%% (%lu/%lu)",
res/10, res % 10, resync, max_blocks);
else
/*
* recovery ...
*/
sz += sprintf(page + sz, " recovery =%3lu.%lu%% (%lu/%lu)",
res/10, res % 10, resync, max_blocks);
/* /*
* We do not want to overflow, so the order of operands and * We do not want to overflow, so the order of operands and
...@@ -3172,7 +3026,7 @@ static int md_status_read_proc(char *page, char **start, off_t off, ...@@ -3172,7 +3026,7 @@ static int md_status_read_proc(char *page, char **start, off_t off,
sz += sprintf(page+sz, "\n"); sz += sprintf(page+sz, "\n");
ITERATE_MDDEV(mddev,tmp) { ITERATE_MDDEV(mddev,tmp) if (mddev_lock(mddev)==0) {
sz += sprintf(page + sz, "md%d : %sactive", mdidx(mddev), sz += sprintf(page + sz, "md%d : %sactive", mdidx(mddev),
mddev->pers ? "" : "in"); mddev->pers ? "" : "in");
if (mddev->pers) { if (mddev->pers) {
...@@ -3192,7 +3046,7 @@ static int md_status_read_proc(char *page, char **start, off_t off, ...@@ -3192,7 +3046,7 @@ static int md_status_read_proc(char *page, char **start, off_t off,
size += rdev->size; size += rdev->size;
} }
if (mddev->nb_dev) { if (!list_empty(&mddev->disks)) {
if (mddev->pers) if (mddev->pers)
sz += sprintf(page + sz, "\n %d blocks", sz += sprintf(page + sz, "\n %d blocks",
md_size[mdidx(mddev)]); md_size[mdidx(mddev)]);
...@@ -3202,19 +3056,20 @@ static int md_status_read_proc(char *page, char **start, off_t off, ...@@ -3202,19 +3056,20 @@ static int md_status_read_proc(char *page, char **start, off_t off,
if (!mddev->pers) { if (!mddev->pers) {
sz += sprintf(page+sz, "\n"); sz += sprintf(page+sz, "\n");
mddev_unlock(mddev);
continue; continue;
} }
sz += mddev->pers->status (page+sz, mddev); sz += mddev->pers->status (page+sz, mddev);
sz += sprintf(page+sz, "\n "); sz += sprintf(page+sz, "\n ");
if (mddev->curr_resync) { if (mddev->curr_resync > 1)
sz += status_resync (page+sz, mddev); sz += status_resync (page+sz, mddev);
} else { else if (mddev->curr_resync == 1)
if (atomic_read(&mddev->resync_sem.count) != 1)
sz += sprintf(page + sz, " resync=DELAYED"); sz += sprintf(page + sz, " resync=DELAYED");
}
sz += sprintf(page + sz, "\n"); sz += sprintf(page + sz, "\n");
mddev_unlock(mddev);
} }
sz += status_unused(page + sz); sz += status_unused(page + sz);
...@@ -3315,60 +3170,70 @@ static int is_mddev_idle(mddev_t *mddev) ...@@ -3315,60 +3170,70 @@ static int is_mddev_idle(mddev_t *mddev)
return idle; return idle;
} }
DECLARE_WAIT_QUEUE_HEAD(resync_wait);
void md_done_sync(mddev_t *mddev, int blocks, int ok) void md_done_sync(mddev_t *mddev, int blocks, int ok)
{ {
/* another "blocks" (512byte) blocks have been synced */ /* another "blocks" (512byte) blocks have been synced */
atomic_sub(blocks, &mddev->recovery_active); atomic_sub(blocks, &mddev->recovery_active);
wake_up(&mddev->recovery_wait); wake_up(&mddev->recovery_wait);
if (!ok) { if (!ok) {
mddev->recovery_running = -EIO;
md_recover_arrays();
// stop recovery, signal do_sync .... // stop recovery, signal do_sync ....
} }
} }
DECLARE_WAIT_QUEUE_HEAD(resync_wait);
#define SYNC_MARKS 10 #define SYNC_MARKS 10
#define SYNC_MARK_STEP (3*HZ) #define SYNC_MARK_STEP (3*HZ)
int md_do_sync(mddev_t *mddev, mdp_disk_t *spare) static void md_do_sync(void *data)
{ {
mddev_t *mddev = data;
mddev_t *mddev2; mddev_t *mddev2;
unsigned int max_sectors, currspeed = 0, unsigned int max_sectors, currspeed = 0,
j, window, err, serialize; j, window, err;
unsigned long mark[SYNC_MARKS]; unsigned long mark[SYNC_MARKS];
unsigned long mark_cnt[SYNC_MARKS]; unsigned long mark_cnt[SYNC_MARKS];
int last_mark,m; int last_mark,m;
struct list_head *tmp; struct list_head *tmp;
unsigned long last_check; unsigned long last_check;
/* just incase thread restarts... */
if (mddev->recovery_running <= 0)
return;
err = down_interruptible(&mddev->resync_sem); /* we overload curr_resync somewhat here.
if (err) * 0 == not engaged in resync at all
goto out_nolock; * 2 == checking that there is no conflict with another sync
* 1 == like 2, but have yielded to allow conflicting resync to
* commense
* other == active in resync - this many blocks
*/
do {
mddev->curr_resync = 2;
recheck: ITERATE_MDDEV(mddev2,tmp) {
serialize = 0; if (mddev2 == mddev)
ITERATE_MDDEV(mddev2,tmp) { continue;
if (mddev2 == mddev) if (mddev2->curr_resync &&
continue; match_mddev_units(mddev,mddev2)) {
if (mddev2->curr_resync && match_mddev_units(mddev,mddev2)) { printk(KERN_INFO "md: delaying resync of md%d until md%d "
printk(KERN_INFO "md: delaying resync of md%d until md%d " "has finished resync (they share one or more physical units)\n",
"has finished resync (they share one or more physical units)\n", mdidx(mddev), mdidx(mddev2));
mdidx(mddev), mdidx(mddev2)); if (mddev < mddev2) /* arbitrarily yield */
serialize = 1; mddev->curr_resync = 1;
break; if (wait_event_interruptible(resync_wait,
} mddev2->curr_resync < 2)) {
} flush_curr_signals();
if (serialize) { err = -EINTR;
interruptible_sleep_on(&resync_wait); mddev_put(mddev2);
if (signal_pending(current)) { goto out;
flush_curr_signals(); }
err = -EINTR; }
goto out;
} }
goto recheck; } while (mddev->curr_resync < 2);
}
mddev->curr_resync = 1;
max_sectors = mddev->sb->size << 1; max_sectors = mddev->sb->size << 1;
printk(KERN_INFO "md: syncing RAID array md%d\n", mdidx(mddev)); printk(KERN_INFO "md: syncing RAID array md%d\n", mdidx(mddev));
...@@ -3406,7 +3271,7 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare) ...@@ -3406,7 +3271,7 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare)
} }
atomic_add(sectors, &mddev->recovery_active); atomic_add(sectors, &mddev->recovery_active);
j += sectors; j += sectors;
mddev->curr_resync = j; if (j>1) mddev->curr_resync = j;
if (last_check + window > j) if (last_check + window > j)
continue; continue;
...@@ -3432,7 +3297,6 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare) ...@@ -3432,7 +3297,6 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare)
/* /*
* got a signal, exit. * got a signal, exit.
*/ */
mddev->curr_resync = 0;
printk(KERN_INFO "md: md_do_sync() got signal ... exiting\n"); printk(KERN_INFO "md: md_do_sync() got signal ... exiting\n");
flush_curr_signals(); flush_curr_signals();
err = -EINTR; err = -EINTR;
...@@ -3467,106 +3331,116 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare) ...@@ -3467,106 +3331,116 @@ int md_do_sync(mddev_t *mddev, mdp_disk_t *spare)
*/ */
out: out:
wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
up(&mddev->resync_sem); /* tell personality that we are finished */
out_nolock: mddev->pers->sync_request(mddev, max_sectors, 1);
mddev->curr_resync = 0; mddev->curr_resync = 0;
wake_up(&resync_wait); if (err)
return err; mddev->recovery_running = err;
if (mddev->recovery_running > 0)
mddev->recovery_running = 0;
if (mddev->recovery_running == 0)
mddev->in_sync = 1;
md_recover_arrays();
} }
/* /*
* This is a kernel thread which syncs a spare disk with the active array * This is the kernel thread that watches all md arrays for re-sync action
* * that might be needed.
* the amount of foolproofing might seem to be a tad excessive, but an * It does not do any resync itself, but rather "forks" off other threads
* early (not so error-safe) version of raid1syncd synced the first 0.5 gigs * to do that as needed.
* of my root partition with the first 0.5 gigs of my /home partition ... so * When it is determined that resync is needed, we set "->recovery_running" and
* i'm a bit nervous ;) * create a thread at ->sync_thread.
* When the thread finishes is clears recovery_running (or set and error)
* and wakeup up this thread which will reap the thread and finish up.
*/ */
void md_do_recovery(void *data) void md_do_recovery(void *data)
{ {
int err;
mddev_t *mddev; mddev_t *mddev;
mdp_super_t *sb; mdp_super_t *sb;
mdp_disk_t *spare;
struct list_head *tmp; struct list_head *tmp;
printk(KERN_INFO "md: recovery thread got woken up ...\n"); dprintk(KERN_INFO "md: recovery thread got woken up ...\n");
restart:
ITERATE_MDDEV(mddev,tmp) { ITERATE_MDDEV(mddev,tmp) if (mddev_lock(mddev)==0) {
sb = mddev->sb; sb = mddev->sb;
if (!sb) if (!sb || !mddev->pers || !mddev->pers->diskop || mddev->ro)
continue; goto unlock;
if (mddev->recovery_running) if (mddev->recovery_running > 0)
continue; /* resync/recovery still happening */
if (sb->active_disks == sb->raid_disks) goto unlock;
continue; if (mddev->sync_thread) {
if (!sb->spare_disks) { /* resync has finished, collect result */
printk(KERN_ERR "md%d: no spare disk to reconstruct array! " md_unregister_thread(mddev->sync_thread);
"-- continuing in degraded mode\n", mdidx(mddev)); mddev->sync_thread = NULL;
continue; if (mddev->recovery_running < 0) {
} /* some sort of failure.
/* * If we were doing a reconstruction,
* now here we get the spare and resync it. * we need to retrieve the spare
*/ */
spare = get_spare(mddev); if (mddev->spare) {
if (!spare) mddev->pers->diskop(mddev, &mddev->spare,
continue; DISKOP_SPARE_INACTIVE);
printk(KERN_INFO "md%d: resyncing spare disk %s to replace failed disk\n", mddev->spare = NULL;
mdidx(mddev), partition_name(mk_kdev(spare->major,spare->minor))); }
if (!mddev->pers->diskop) } else {
continue; /* success...*/
if (mddev->pers->diskop(mddev, &spare, DISKOP_SPARE_WRITE)) if (mddev->spare) {
continue; mddev->pers->diskop(mddev, &mddev->spare,
down(&mddev->recovery_sem); DISKOP_SPARE_ACTIVE);
mddev->recovery_running = 1; mark_disk_sync(mddev->spare);
err = md_do_sync(mddev, spare); mark_disk_active(mddev->spare);
if (err == -EIO) { sb->active_disks++;
printk(KERN_INFO "md%d: spare disk %s failed, skipping to next spare.\n", sb->spare_disks--;
mdidx(mddev), partition_name(mk_kdev(spare->major,spare->minor))); mddev->spare = NULL;
if (!disk_faulty(spare)) { }
mddev->pers->diskop(mddev,&spare,DISKOP_SPARE_INACTIVE);
mark_disk_faulty(spare);
mark_disk_nonsync(spare);
mark_disk_inactive(spare);
sb->spare_disks--;
sb->working_disks--;
sb->failed_disks++;
} }
} else __md_update_sb(mddev);
if (disk_faulty(spare))
mddev->pers->diskop(mddev, &spare,
DISKOP_SPARE_INACTIVE);
if (err == -EINTR || err == -ENOMEM) {
/*
* Recovery got interrupted, or ran out of mem ...
* signal back that we have finished using the array.
*/
mddev->pers->diskop(mddev, &spare,
DISKOP_SPARE_INACTIVE);
up(&mddev->recovery_sem);
mddev->recovery_running = 0; mddev->recovery_running = 0;
continue; wake_up(&resync_wait);
} else { goto unlock;
}
if (mddev->recovery_running) {
/* that's odd.. */
mddev->recovery_running = 0; mddev->recovery_running = 0;
up(&mddev->recovery_sem); wake_up(&resync_wait);
} }
if (!disk_faulty(spare)) {
/* if (sb->active_disks < sb->raid_disks) {
* the SPARE_ACTIVE diskop possibly changes the mddev->spare = get_spare(mddev);
* pointer too if (!mddev->spare)
*/ printk(KERN_ERR "md%d: no spare disk to reconstruct array! "
mddev->pers->diskop(mddev, &spare, DISKOP_SPARE_ACTIVE); "-- continuing in degraded mode\n", mdidx(mddev));
mark_disk_sync(spare); else
mark_disk_active(spare); printk(KERN_INFO "md%d: resyncing spare disk %s to replace failed disk\n",
sb->active_disks++; mdidx(mddev), partition_name(mk_kdev(mddev->spare->major,mddev->spare->minor)));
sb->spare_disks--; }
if (!mddev->spare && mddev->in_sync) {
/* nothing we can do ... */
goto unlock;
}
if (mddev->pers->sync_request) {
mddev->sync_thread = md_register_thread(md_do_sync,
mddev,
"md_resync");
if (!mddev->sync_thread) {
printk(KERN_ERR "md%d: could not start resync thread...\n", mdidx(mddev));
if (mddev->spare)
mddev->pers->diskop(mddev, &mddev->spare, DISKOP_SPARE_INACTIVE);
mddev->spare = NULL;
mddev->recovery_running = 0;
} else {
if (mddev->spare)
mddev->pers->diskop(mddev, &mddev->spare, DISKOP_SPARE_WRITE);
mddev->recovery_running = 1;
md_wakeup_thread(mddev->sync_thread);
}
} }
mddev->sb_dirty = 1; unlock:
md_update_sb(mddev); mddev_unlock(mddev);
goto restart;
} }
printk(KERN_INFO "md: recovery thread finished ...\n"); dprintk(KERN_INFO "md: recovery thread finished ...\n");
} }
...@@ -3582,7 +3456,8 @@ int md_notify_reboot(struct notifier_block *this, ...@@ -3582,7 +3456,8 @@ int md_notify_reboot(struct notifier_block *this,
return NOTIFY_DONE; return NOTIFY_DONE;
ITERATE_MDDEV(mddev,tmp) ITERATE_MDDEV(mddev,tmp)
do_md_stop (mddev, 1); if (mddev_trylock(mddev)==0)
do_md_stop (mddev, 1);
/* /*
* certain more exotic SCSI devices are known to be * certain more exotic SCSI devices are known to be
* volatile wrt too early system reboots. While the * volatile wrt too early system reboots. While the
...@@ -3606,7 +3481,6 @@ static void md_geninit(void) ...@@ -3606,7 +3481,6 @@ static void md_geninit(void)
for(i = 0; i < MAX_MD_DEVS; i++) { for(i = 0; i < MAX_MD_DEVS; i++) {
md_size[i] = 0; md_size[i] = 0;
md_maxreadahead[i] = 32;
} }
blk_size[MAJOR_NR] = md_size; blk_size[MAJOR_NR] = md_size;
...@@ -3617,6 +3491,18 @@ static void md_geninit(void) ...@@ -3617,6 +3491,18 @@ static void md_geninit(void)
#endif #endif
} }
request_queue_t * md_queue_proc(kdev_t dev)
{
mddev_t *mddev = mddev_find(minor(dev));
request_queue_t *q = BLK_DEFAULT_QUEUE(MAJOR_NR);
if (!mddev || atomic_read(&mddev->active)<2)
BUG();
if (mddev->pers)
q = &mddev->queue;
mddev_put(mddev); /* the caller must hold a reference... */
return q;
}
int __init md_init(void) int __init md_init(void)
{ {
static char * name = "mdrecoveryd"; static char * name = "mdrecoveryd";
...@@ -3641,8 +3527,9 @@ int __init md_init(void) ...@@ -3641,8 +3527,9 @@ int __init md_init(void)
S_IFBLK | S_IRUSR | S_IWUSR, &md_fops, NULL); S_IFBLK | S_IRUSR | S_IWUSR, &md_fops, NULL);
} }
/* forward all md request to md_make_request */ /* all requests on an uninitialised device get failed... */
blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), md_make_request); blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), md_fail_request);
blk_dev[MAJOR_NR].queue = md_queue_proc;
add_gendisk(&md_gendisk); add_gendisk(&md_gendisk);
...@@ -3720,7 +3607,7 @@ static void autostart_arrays(void) ...@@ -3720,7 +3607,7 @@ static void autostart_arrays(void)
} }
dev_cnt = 0; dev_cnt = 0;
autorun_devices(to_kdev_t(-1)); autorun_devices();
} }
static struct { static struct {
...@@ -3859,17 +3746,27 @@ void __init md_setup_drive(void) ...@@ -3859,17 +3746,27 @@ void __init md_setup_drive(void)
if (!md_setup_args.device_set[minor]) if (!md_setup_args.device_set[minor])
continue; continue;
if (mddev_map[minor].mddev) { printk(KERN_INFO "md: Loading md%d: %s\n", minor, md_setup_args.device_names[minor]);
mddev = mddev_find(minor);
if (!mddev) {
printk(KERN_ERR "md: kmalloc failed - cannot start array %d\n", minor);
continue;
}
if (mddev_lock(mddev)) {
printk(KERN_WARNING printk(KERN_WARNING
"md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n", "md: Ignoring md=%d, cannot lock!\n",
minor); minor);
mddev_put(mddev);
continue; continue;
} }
printk(KERN_INFO "md: Loading md%d: %s\n", minor, md_setup_args.device_names[minor]);
mddev = alloc_mddev(mk_kdev(MD_MAJOR,minor)); if (mddev->sb || !list_empty(&mddev->disks)) {
if (!mddev) { printk(KERN_WARNING
printk(KERN_ERR "md: kmalloc failed - cannot start array %d\n", minor); "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n",
minor);
mddev_unlock(mddev);
mddev_put(mddev);
continue; continue;
} }
if (md_setup_args.pers[minor]) { if (md_setup_args.pers[minor]) {
...@@ -3923,6 +3820,8 @@ void __init md_setup_drive(void) ...@@ -3923,6 +3820,8 @@ void __init md_setup_drive(void)
do_md_stop(mddev, 0); do_md_stop(mddev, 0);
printk(KERN_WARNING "md: starting md%d failed\n", minor); printk(KERN_WARNING "md: starting md%d failed\n", minor);
} }
mddev_unlock(mddev);
mddev_put(mddev);
} }
} }
...@@ -3973,9 +3872,10 @@ int init_module(void) ...@@ -3973,9 +3872,10 @@ int init_module(void)
static void free_device_names(void) static void free_device_names(void)
{ {
while (device_names.next != &device_names) { while (!list_empty(&device_names)) {
struct list_head *tmp = device_names.next; struct dname *tmp = list_entry(device_names.next,
list_del(tmp); dev_name_t, list);
list_del(&tmp->list);
kfree(tmp); kfree(tmp);
} }
} }
...@@ -4006,10 +3906,8 @@ EXPORT_SYMBOL(register_md_personality); ...@@ -4006,10 +3906,8 @@ EXPORT_SYMBOL(register_md_personality);
EXPORT_SYMBOL(unregister_md_personality); EXPORT_SYMBOL(unregister_md_personality);
EXPORT_SYMBOL(partition_name); EXPORT_SYMBOL(partition_name);
EXPORT_SYMBOL(md_error); EXPORT_SYMBOL(md_error);
EXPORT_SYMBOL(md_do_sync);
EXPORT_SYMBOL(md_sync_acct); EXPORT_SYMBOL(md_sync_acct);
EXPORT_SYMBOL(md_done_sync); EXPORT_SYMBOL(md_done_sync);
EXPORT_SYMBOL(md_recover_arrays);
EXPORT_SYMBOL(md_register_thread); EXPORT_SYMBOL(md_register_thread);
EXPORT_SYMBOL(md_unregister_thread); EXPORT_SYMBOL(md_unregister_thread);
EXPORT_SYMBOL(md_update_sb); EXPORT_SYMBOL(md_update_sb);
...@@ -4017,7 +3915,5 @@ EXPORT_SYMBOL(md_wakeup_thread); ...@@ -4017,7 +3915,5 @@ EXPORT_SYMBOL(md_wakeup_thread);
EXPORT_SYMBOL(md_print_devices); EXPORT_SYMBOL(md_print_devices);
EXPORT_SYMBOL(find_rdev_nr); EXPORT_SYMBOL(find_rdev_nr);
EXPORT_SYMBOL(md_interrupt_thread); EXPORT_SYMBOL(md_interrupt_thread);
EXPORT_SYMBOL(mddev_map);
EXPORT_SYMBOL(md_check_ordering);
EXPORT_SYMBOL(get_spare); EXPORT_SYMBOL(get_spare);
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
...@@ -244,27 +244,19 @@ static int multipath_read_balance (multipath_conf_t *conf) ...@@ -244,27 +244,19 @@ static int multipath_read_balance (multipath_conf_t *conf)
return 0; return 0;
} }
static int multipath_make_request (mddev_t *mddev, int rw, struct bio * bio) static int multipath_make_request (request_queue_t *q, struct bio * bio)
{ {
mddev_t *mddev = q->queuedata;
multipath_conf_t *conf = mddev_to_conf(mddev); multipath_conf_t *conf = mddev_to_conf(mddev);
struct bio *real_bio; struct bio *real_bio;
struct multipath_bh * mp_bh; struct multipath_bh * mp_bh;
struct multipath_info *multipath; struct multipath_info *multipath;
/*
* make_request() can abort the operation when READA is being
* used and no empty request is available.
*
* Currently, just replace the command with READ/WRITE.
*/
if (rw == READA)
rw = READ;
mp_bh = multipath_alloc_mpbh (conf); mp_bh = multipath_alloc_mpbh (conf);
mp_bh->master_bio = bio; mp_bh->master_bio = bio;
mp_bh->mddev = mddev; mp_bh->mddev = mddev;
mp_bh->cmd = rw; mp_bh->cmd = bio_data_dir(bio);
/* /*
* read balancing logic: * read balancing logic:
...@@ -273,7 +265,7 @@ static int multipath_make_request (mddev_t *mddev, int rw, struct bio * bio) ...@@ -273,7 +265,7 @@ static int multipath_make_request (mddev_t *mddev, int rw, struct bio * bio)
real_bio = bio_clone(bio, GFP_NOIO); real_bio = bio_clone(bio, GFP_NOIO);
real_bio->bi_bdev = multipath->bdev; real_bio->bi_bdev = multipath->bdev;
real_bio->bi_rw = rw; real_bio->bi_rw = bio_data_dir(bio);
real_bio->bi_end_io = multipath_end_request; real_bio->bi_end_io = multipath_end_request;
real_bio->bi_private = mp_bh; real_bio->bi_private = mp_bh;
mp_bh->bio = real_bio; mp_bh->bio = real_bio;
...@@ -708,7 +700,6 @@ static void multipathd (void *data) ...@@ -708,7 +700,6 @@ static void multipathd (void *data)
mddev = mp_bh->mddev; mddev = mp_bh->mddev;
if (mddev->sb_dirty) { if (mddev->sb_dirty) {
printk(KERN_INFO "dirty sb detected, updating.\n"); printk(KERN_INFO "dirty sb detected, updating.\n");
mddev->sb_dirty = 0;
md_update_sb(mddev); md_update_sb(mddev);
} }
bio = mp_bh->bio; bio = mp_bh->bio;
......
...@@ -29,21 +29,26 @@ ...@@ -29,21 +29,26 @@
static int create_strip_zones (mddev_t *mddev) static int create_strip_zones (mddev_t *mddev)
{ {
int i, c, j, j1, j2; int i, c, j;
unsigned long current_offset, curr_zone_offset; unsigned long current_offset, curr_zone_offset;
raid0_conf_t *conf = mddev_to_conf(mddev); raid0_conf_t *conf = mddev_to_conf(mddev);
mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev; mdk_rdev_t *smallest, *rdev1, *rdev2, *rdev;
struct list_head *tmp1, *tmp2;
struct strip_zone *zone;
int cnt;
/* /*
* The number of 'same size groups' * The number of 'same size groups'
*/ */
conf->nr_strip_zones = 0; conf->nr_strip_zones = 0;
ITERATE_RDEV_ORDERED(mddev,rdev1,j1) { ITERATE_RDEV(mddev,rdev1,tmp1) {
printk("raid0: looking at %s\n", partition_name(rdev1->dev)); printk("raid0: looking at %s\n", partition_name(rdev1->dev));
c = 0; c = 0;
ITERATE_RDEV_ORDERED(mddev,rdev2,j2) { ITERATE_RDEV(mddev,rdev2,tmp2) {
printk("raid0: comparing %s(%ld) with %s(%ld)\n", partition_name(rdev1->dev), rdev1->size, partition_name(rdev2->dev), rdev2->size); printk("raid0: comparing %s(%ld) with %s(%ld)\n",
partition_name(rdev1->dev), rdev1->size,
partition_name(rdev2->dev), rdev2->size);
if (rdev2 == rdev1) { if (rdev2 == rdev1) {
printk("raid0: END\n"); printk("raid0: END\n");
break; break;
...@@ -51,7 +56,7 @@ static int create_strip_zones (mddev_t *mddev) ...@@ -51,7 +56,7 @@ static int create_strip_zones (mddev_t *mddev)
if (rdev2->size == rdev1->size) if (rdev2->size == rdev1->size)
{ {
/* /*
* Not unique, dont count it as a new * Not unique, don't count it as a new
* group * group
*/ */
printk("raid0: EQUAL\n"); printk("raid0: EQUAL\n");
...@@ -66,29 +71,62 @@ static int create_strip_zones (mddev_t *mddev) ...@@ -66,29 +71,62 @@ static int create_strip_zones (mddev_t *mddev)
printk("raid0: %d zones\n", conf->nr_strip_zones); printk("raid0: %d zones\n", conf->nr_strip_zones);
} }
} }
printk("raid0: FINAL %d zones\n", conf->nr_strip_zones); printk("raid0: FINAL %d zones\n", conf->nr_strip_zones);
conf->strip_zone = vmalloc(sizeof(struct strip_zone)* conf->strip_zone = vmalloc(sizeof(struct strip_zone)*
conf->nr_strip_zones); conf->nr_strip_zones);
if (!conf->strip_zone) if (!conf->strip_zone)
return 1; return 1;
memset(conf->strip_zone, 0,sizeof(struct strip_zone)*
conf->nr_strip_zones);
/* The first zone must contain all devices, so here we check that
* there is a properly alignment of slots to devices and find them all
*/
zone = &conf->strip_zone[0];
cnt = 0;
smallest = NULL;
ITERATE_RDEV(mddev, rdev1, tmp1) {
int j = rdev1->sb->this_disk.raid_disk;
if (j < 0 || j >= mddev->sb->raid_disks) {
printk("raid0: bad disk number %d - aborting!\n", j);
goto abort;
}
if (zone->dev[j]) {
printk("raid0: multiple devices for %d - aborting!\n", j);
goto abort;
}
zone->dev[j] = rdev1;
if (!smallest || (rdev1->size <smallest->size))
smallest = rdev1;
cnt++;
}
if (cnt != mddev->sb->raid_disks) {
printk("raid0: too few disks (%d of %d) - aborting!\n", cnt,
mddev->sb->raid_disks);
goto abort;
}
zone->nb_dev = cnt;
zone->size = smallest->size * cnt;
zone->zone_offset = 0;
conf->smallest = NULL; conf->smallest = zone;
current_offset = 0; current_offset = smallest->size;
curr_zone_offset = 0; curr_zone_offset = zone->size;
for (i = 0; i < conf->nr_strip_zones; i++) /* now do the other zones */
for (i = 1; i < conf->nr_strip_zones; i++)
{ {
struct strip_zone *zone = conf->strip_zone + i; zone = conf->strip_zone + i;
printk("raid0: zone %d\n", i); printk("raid0: zone %d\n", i);
zone->dev_offset = current_offset; zone->dev_offset = current_offset;
smallest = NULL; smallest = NULL;
c = 0; c = 0;
ITERATE_RDEV_ORDERED(mddev,rdev,j) { for (j=0; j<cnt; j++) {
rdev = conf->strip_zone[0].dev[j];
printk("raid0: checking %s ...", partition_name(rdev->dev)); printk("raid0: checking %s ...", partition_name(rdev->dev));
if (rdev->size > current_offset) if (rdev->size > current_offset)
{ {
...@@ -118,6 +156,9 @@ static int create_strip_zones (mddev_t *mddev) ...@@ -118,6 +156,9 @@ static int create_strip_zones (mddev_t *mddev)
} }
printk("raid0: done.\n"); printk("raid0: done.\n");
return 0; return 0;
abort:
vfree(conf->strip_zone);
return 1;
} }
static int raid0_run (mddev_t *mddev) static int raid0_run (mddev_t *mddev)
...@@ -132,11 +173,6 @@ static int raid0_run (mddev_t *mddev) ...@@ -132,11 +173,6 @@ static int raid0_run (mddev_t *mddev)
goto out; goto out;
mddev->private = (void *)conf; mddev->private = (void *)conf;
if (md_check_ordering(mddev)) {
printk("raid0: disks are not ordered, aborting!\n");
goto out_free_conf;
}
if (create_strip_zones (mddev)) if (create_strip_zones (mddev))
goto out_free_conf; goto out_free_conf;
...@@ -225,8 +261,9 @@ static int raid0_stop (mddev_t *mddev) ...@@ -225,8 +261,9 @@ static int raid0_stop (mddev_t *mddev)
* Of course, those facts may not be valid anymore (and surely won't...) * Of course, those facts may not be valid anymore (and surely won't...)
* Hey guys, there's some work out there ;-) * Hey guys, there's some work out there ;-)
*/ */
static int raid0_make_request (mddev_t *mddev, int rw, struct bio *bio) static int raid0_make_request (request_queue_t *q, struct bio *bio)
{ {
mddev_t *mddev = q->queuedata;
unsigned int sect_in_chunk, chunksize_bits, chunk_size; unsigned int sect_in_chunk, chunksize_bits, chunk_size;
raid0_conf_t *conf = mddev_to_conf(mddev); raid0_conf_t *conf = mddev_to_conf(mddev);
struct raid0_hash *hash; struct raid0_hash *hash;
...@@ -234,7 +271,7 @@ static int raid0_make_request (mddev_t *mddev, int rw, struct bio *bio) ...@@ -234,7 +271,7 @@ static int raid0_make_request (mddev_t *mddev, int rw, struct bio *bio)
mdk_rdev_t *tmp_dev; mdk_rdev_t *tmp_dev;
unsigned long chunk, block, rsect; unsigned long chunk, block, rsect;
chunk_size = mddev->param.chunk_size >> 10; chunk_size = mddev->sb->chunk_size >> 10;
chunksize_bits = ffz(~chunk_size); chunksize_bits = ffz(~chunk_size);
block = bio->bi_sector >> 1; block = bio->bi_sector >> 1;
hash = conf->hash_table + block / conf->smallest->size; hash = conf->hash_table + block / conf->smallest->size;
...@@ -323,7 +360,7 @@ static int raid0_status (char *page, mddev_t *mddev) ...@@ -323,7 +360,7 @@ static int raid0_status (char *page, mddev_t *mddev)
conf->strip_zone[j].size); conf->strip_zone[j].size);
} }
#endif #endif
sz += sprintf(page + sz, " %dk chunks", mddev->param.chunk_size/1024); sz += sprintf(page + sz, " %dk chunks", mddev->sb->chunk_size/1024);
return sz; return sz;
} }
......
...@@ -334,7 +334,7 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio) ...@@ -334,7 +334,7 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio)
* device if no resync is going on, or below the resync window. * device if no resync is going on, or below the resync window.
* We take the first readable disk when above the resync window. * We take the first readable disk when above the resync window.
*/ */
if (conf->resync_mirrors && (this_sector + sectors >= conf->next_resync)) { if (!conf->mddev->in_sync && (this_sector + sectors >= conf->next_resync)) {
/* make sure that disk is operational */ /* make sure that disk is operational */
new_disk = 0; new_disk = 0;
while (!conf->mirrors[new_disk].operational || conf->mirrors[new_disk].write_only) { while (!conf->mirrors[new_disk].operational || conf->mirrors[new_disk].write_only) {
...@@ -434,8 +434,9 @@ static void resume_device(conf_t *conf) ...@@ -434,8 +434,9 @@ static void resume_device(conf_t *conf)
spin_unlock_irq(&conf->resync_lock); spin_unlock_irq(&conf->resync_lock);
} }
static int make_request(mddev_t *mddev, int rw, struct bio * bio) static int make_request(request_queue_t *q, struct bio * bio)
{ {
mddev_t *mddev = q->queuedata;
conf_t *conf = mddev_to_conf(mddev); conf_t *conf = mddev_to_conf(mddev);
mirror_info_t *mirror; mirror_info_t *mirror;
r1bio_t *r1_bio; r1bio_t *r1_bio;
...@@ -456,20 +457,16 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio) ...@@ -456,20 +457,16 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio)
* make_request() can abort the operation when READA is being * make_request() can abort the operation when READA is being
* used and no empty request is available. * used and no empty request is available.
* *
* Currently, just replace the command with READ.
*/ */
if (rw == READA)
rw = READ;
r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
r1_bio->master_bio = bio; r1_bio->master_bio = bio;
r1_bio->mddev = mddev; r1_bio->mddev = mddev;
r1_bio->sector = bio->bi_sector; r1_bio->sector = bio->bi_sector;
r1_bio->cmd = rw; r1_bio->cmd = bio_data_dir(bio);
if (rw == READ) { if (r1_bio->cmd == READ) {
/* /*
* read balancing logic: * read balancing logic:
*/ */
...@@ -483,7 +480,7 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio) ...@@ -483,7 +480,7 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio)
read_bio->bi_sector = r1_bio->sector; read_bio->bi_sector = r1_bio->sector;
read_bio->bi_bdev = mirror->bdev; read_bio->bi_bdev = mirror->bdev;
read_bio->bi_end_io = end_request; read_bio->bi_end_io = end_request;
read_bio->bi_rw = rw; read_bio->bi_rw = r1_bio->cmd;
read_bio->bi_private = r1_bio; read_bio->bi_private = r1_bio;
generic_make_request(read_bio); generic_make_request(read_bio);
...@@ -507,7 +504,7 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio) ...@@ -507,7 +504,7 @@ static int make_request(mddev_t *mddev, int rw, struct bio * bio)
mbio->bi_sector = r1_bio->sector; mbio->bi_sector = r1_bio->sector;
mbio->bi_bdev = conf->mirrors[i].bdev; mbio->bi_bdev = conf->mirrors[i].bdev;
mbio->bi_end_io = end_request; mbio->bi_end_io = end_request;
mbio->bi_rw = rw; mbio->bi_rw = r1_bio->cmd;
mbio->bi_private = r1_bio; mbio->bi_private = r1_bio;
sum_bios++; sum_bios++;
...@@ -656,6 +653,9 @@ static void close_sync(conf_t *conf) ...@@ -656,6 +653,9 @@ static void close_sync(conf_t *conf)
if (conf->barrier) BUG(); if (conf->barrier) BUG();
if (waitqueue_active(&conf->wait_idle)) BUG(); if (waitqueue_active(&conf->wait_idle)) BUG();
if (waitqueue_active(&conf->wait_resume)) BUG(); if (waitqueue_active(&conf->wait_resume)) BUG();
mempool_destroy(conf->r1buf_pool);
conf->r1buf_pool = NULL;
} }
static int diskop(mddev_t *mddev, mdp_disk_t **d, int state) static int diskop(mddev_t *mddev, mdp_disk_t **d, int state)
...@@ -772,7 +772,6 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state) ...@@ -772,7 +772,6 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state)
* Deactivate a spare disk: * Deactivate a spare disk:
*/ */
case DISKOP_SPARE_INACTIVE: case DISKOP_SPARE_INACTIVE:
close_sync(conf);
sdisk = conf->mirrors + spare_disk; sdisk = conf->mirrors + spare_disk;
sdisk->operational = 0; sdisk->operational = 0;
sdisk->write_only = 0; sdisk->write_only = 0;
...@@ -785,7 +784,6 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state) ...@@ -785,7 +784,6 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state)
* property) * property)
*/ */
case DISKOP_SPARE_ACTIVE: case DISKOP_SPARE_ACTIVE:
close_sync(conf);
sdisk = conf->mirrors + spare_disk; sdisk = conf->mirrors + spare_disk;
fdisk = conf->mirrors + failed_disk; fdisk = conf->mirrors + failed_disk;
...@@ -919,10 +917,6 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state) ...@@ -919,10 +917,6 @@ static int diskop(mddev_t *mddev, mdp_disk_t **d, int state)
} }
abort: abort:
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
if (state == DISKOP_SPARE_ACTIVE || state == DISKOP_SPARE_INACTIVE) {
mempool_destroy(conf->r1buf_pool);
conf->r1buf_pool = NULL;
}
print_conf(conf); print_conf(conf);
return err; return err;
...@@ -1012,7 +1006,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) ...@@ -1012,7 +1006,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
* we read from here, no need to write * we read from here, no need to write
*/ */
continue; continue;
if (i < conf->raid_disks && !conf->resync_mirrors) if (i < conf->raid_disks && mddev->in_sync)
/* /*
* don't need to write this we are just rebuilding * don't need to write this we are just rebuilding
*/ */
...@@ -1088,7 +1082,6 @@ static void raid1d(void *data) ...@@ -1088,7 +1082,6 @@ static void raid1d(void *data)
conf = mddev_to_conf(mddev); conf = mddev_to_conf(mddev);
if (mddev->sb_dirty) { if (mddev->sb_dirty) {
printk(KERN_INFO "raid1: dirty sb detected, updating.\n"); printk(KERN_INFO "raid1: dirty sb detected, updating.\n");
mddev->sb_dirty = 0;
md_update_sb(mddev); md_update_sb(mddev);
} }
bio = r1_bio->master_bio; bio = r1_bio->master_bio;
...@@ -1118,31 +1111,6 @@ static void raid1d(void *data) ...@@ -1118,31 +1111,6 @@ static void raid1d(void *data)
spin_unlock_irqrestore(&retry_list_lock, flags); spin_unlock_irqrestore(&retry_list_lock, flags);
} }
/*
* Private kernel thread to reconstruct mirrors after an unclean
* shutdown.
*/
static void raid1syncd(void *data)
{
conf_t *conf = data;
mddev_t *mddev = conf->mddev;
if (!conf->resync_mirrors)
return;
if (conf->resync_mirrors == 2)
return;
down(&mddev->recovery_sem);
if (!md_do_sync(mddev, NULL)) {
/*
* Only if everything went Ok.
*/
conf->resync_mirrors = 0;
}
close_sync(conf);
up(&mddev->recovery_sem);
}
static int init_resync(conf_t *conf) static int init_resync(conf_t *conf)
{ {
...@@ -1177,9 +1145,16 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) ...@@ -1177,9 +1145,16 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
sector_t max_sector, nr_sectors; sector_t max_sector, nr_sectors;
int disk, partial; int disk, partial;
if (!sector_nr) if (sector_nr == 0)
if (init_resync(conf)) if (init_resync(conf))
return -ENOMEM; return -ENOMEM;
max_sector = mddev->sb->size << 1;
if (sector_nr >= max_sector) {
close_sync(conf);
return 0;
}
/* /*
* If there is non-resync activity waiting for us then * If there is non-resync activity waiting for us then
* put in a delay to throttle resync. * put in a delay to throttle resync.
...@@ -1216,10 +1191,6 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster) ...@@ -1216,10 +1191,6 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
r1_bio->sector = sector_nr; r1_bio->sector = sector_nr;
r1_bio->cmd = SPECIAL; r1_bio->cmd = SPECIAL;
max_sector = mddev->sb->size << 1;
if (sector_nr >= max_sector)
BUG();
bio = r1_bio->master_bio; bio = r1_bio->master_bio;
nr_sectors = RESYNC_BLOCK_SIZE >> 9; nr_sectors = RESYNC_BLOCK_SIZE >> 9;
if (max_sector - sector_nr < nr_sectors) if (max_sector - sector_nr < nr_sectors)
...@@ -1302,7 +1273,6 @@ static int run(mddev_t *mddev) ...@@ -1302,7 +1273,6 @@ static int run(mddev_t *mddev)
mdp_disk_t *descriptor; mdp_disk_t *descriptor;
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct list_head *tmp; struct list_head *tmp;
int start_recovery = 0;
MOD_INC_USE_COUNT; MOD_INC_USE_COUNT;
...@@ -1454,10 +1424,6 @@ static int run(mddev_t *mddev) ...@@ -1454,10 +1424,6 @@ static int run(mddev_t *mddev)
conf->last_used = j; conf->last_used = j;
if (conf->working_disks != sb->raid_disks) {
printk(KERN_ALERT "raid1: md%d, not all disks are operational -- trying to recover array\n", mdidx(mddev));
start_recovery = 1;
}
{ {
const char * name = "raid1d"; const char * name = "raid1d";
...@@ -1469,20 +1435,6 @@ static int run(mddev_t *mddev) ...@@ -1469,20 +1435,6 @@ static int run(mddev_t *mddev)
} }
} }
if (!start_recovery && !(sb->state & (1 << MD_SB_CLEAN)) &&
(conf->working_disks > 1)) {
const char * name = "raid1syncd";
conf->resync_thread = md_register_thread(raid1syncd, conf, name);
if (!conf->resync_thread) {
printk(THREAD_ERROR, mdidx(mddev));
goto out_free_conf;
}
printk(START_RESYNC, mdidx(mddev));
conf->resync_mirrors = 1;
md_wakeup_thread(conf->resync_thread);
}
/* /*
* Regenerate the "device is in sync with the raid set" bit for * Regenerate the "device is in sync with the raid set" bit for
...@@ -1499,10 +1451,6 @@ static int run(mddev_t *mddev) ...@@ -1499,10 +1451,6 @@ static int run(mddev_t *mddev)
} }
sb->active_disks = conf->working_disks; sb->active_disks = conf->working_disks;
if (start_recovery)
md_recover_arrays();
printk(ARRAY_IS_ACTIVE, mdidx(mddev), sb->active_disks, sb->raid_disks); printk(ARRAY_IS_ACTIVE, mdidx(mddev), sb->active_disks, sb->raid_disks);
/* /*
* Ok, everything is just fine now * Ok, everything is just fine now
...@@ -1522,47 +1470,12 @@ static int run(mddev_t *mddev) ...@@ -1522,47 +1470,12 @@ static int run(mddev_t *mddev)
return -EIO; return -EIO;
} }
static int stop_resync(mddev_t *mddev)
{
conf_t *conf = mddev_to_conf(mddev);
if (conf->resync_thread) {
if (conf->resync_mirrors) {
conf->resync_mirrors = 2;
md_interrupt_thread(conf->resync_thread);
printk(KERN_INFO "raid1: mirror resync was not fully finished, restarting next time.\n");
return 1;
}
return 0;
}
return 0;
}
static int restart_resync(mddev_t *mddev)
{
conf_t *conf = mddev_to_conf(mddev);
if (conf->resync_mirrors) {
if (!conf->resync_thread) {
MD_BUG();
return 0;
}
conf->resync_mirrors = 1;
md_wakeup_thread(conf->resync_thread);
return 1;
}
return 0;
}
static int stop(mddev_t *mddev) static int stop(mddev_t *mddev)
{ {
conf_t *conf = mddev_to_conf(mddev); conf_t *conf = mddev_to_conf(mddev);
int i; int i;
md_unregister_thread(conf->thread); md_unregister_thread(conf->thread);
if (conf->resync_thread)
md_unregister_thread(conf->resync_thread);
if (conf->r1bio_pool) if (conf->r1bio_pool)
mempool_destroy(conf->r1bio_pool); mempool_destroy(conf->r1bio_pool);
for (i = 0; i < MD_SB_DISKS; i++) for (i = 0; i < MD_SB_DISKS; i++)
...@@ -1583,8 +1496,6 @@ static mdk_personality_t raid1_personality = ...@@ -1583,8 +1496,6 @@ static mdk_personality_t raid1_personality =
status: status, status: status,
error_handler: error, error_handler: error,
diskop: diskop, diskop: diskop,
stop_resync: stop_resync,
restart_resync: restart_resync,
sync_request: sync_request sync_request: sync_request
}; };
......
...@@ -634,7 +634,6 @@ static void copy_data(int frombio, struct bio *bio, ...@@ -634,7 +634,6 @@ static void copy_data(int frombio, struct bio *bio,
else else
page_offset = (signed)(sector - bio->bi_sector) * -512; page_offset = (signed)(sector - bio->bi_sector) * -512;
bio_for_each_segment(bvl, bio, i) { bio_for_each_segment(bvl, bio, i) {
char *ba = __bio_kmap(bio, i);
int len = bio_iovec_idx(bio,i)->bv_len; int len = bio_iovec_idx(bio,i)->bv_len;
int clen; int clen;
int b_offset = 0; int b_offset = 0;
...@@ -649,13 +648,16 @@ static void copy_data(int frombio, struct bio *bio, ...@@ -649,13 +648,16 @@ static void copy_data(int frombio, struct bio *bio,
clen = STRIPE_SIZE - page_offset; clen = STRIPE_SIZE - page_offset;
else clen = len; else clen = len;
if (len > 0) { if (clen > 0) {
char *ba = __bio_kmap(bio, i);
if (frombio) if (frombio)
memcpy(pa+page_offset, ba+b_offset, clen); memcpy(pa+page_offset, ba+b_offset, clen);
else else
memcpy(ba+b_offset, pa+page_offset, clen); memcpy(ba+b_offset, pa+page_offset, clen);
} __bio_kunmap(bio, i);
__bio_kunmap(bio, i); }
if (clen < len) /* hit end of page */
break;
page_offset += len; page_offset += len;
} }
} }
...@@ -810,6 +812,8 @@ static void add_stripe_bio (struct stripe_head *sh, struct bio *bi, int dd_idx, ...@@ -810,6 +812,8 @@ static void add_stripe_bio (struct stripe_head *sh, struct bio *bi, int dd_idx,
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
spin_unlock(&sh->lock); spin_unlock(&sh->lock);
PRINTK("added bi b#%lu to stripe s#%lu, disk %d.\n", bi->bi_sector, sh->sector, dd_idx);
if (forwrite) { if (forwrite) {
/* check if page is coverred */ /* check if page is coverred */
sector_t sector = sh->dev[dd_idx].sector; sector_t sector = sh->dev[dd_idx].sector;
...@@ -823,8 +827,6 @@ static void add_stripe_bio (struct stripe_head *sh, struct bio *bi, int dd_idx, ...@@ -823,8 +827,6 @@ static void add_stripe_bio (struct stripe_head *sh, struct bio *bi, int dd_idx,
if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
} }
PRINTK("added bi b#%lu to stripe s#%lu, disk %d.\n", bi->bi_sector, sh->sector, dd_idx);
} }
...@@ -1036,7 +1038,7 @@ static void handle_stripe(struct stripe_head *sh) ...@@ -1036,7 +1038,7 @@ static void handle_stripe(struct stripe_head *sh)
) && ) &&
!test_bit(R5_UPTODATE, &dev->flags)) { !test_bit(R5_UPTODATE, &dev->flags)) {
if (conf->disks[i].operational if (conf->disks[i].operational
/* && !(conf->resync_parity && i == sh->pd_idx) */ /* && !(!mddev->insync && i == sh->pd_idx) */
) )
rmw++; rmw++;
else rmw += 2*disks; /* cannot read it */ else rmw += 2*disks; /* cannot read it */
...@@ -1226,14 +1228,15 @@ static inline void raid5_activate_delayed(raid5_conf_t *conf) ...@@ -1226,14 +1228,15 @@ static inline void raid5_activate_delayed(raid5_conf_t *conf)
} }
static void raid5_unplug_device(void *data) static void raid5_unplug_device(void *data)
{ {
raid5_conf_t *conf = (raid5_conf_t *)data; request_queue_t *q = data;
mddev_t *mddev = q->queuedata;
raid5_conf_t *conf = mddev_to_conf(mddev);
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags); spin_lock_irqsave(&conf->device_lock, flags);
raid5_activate_delayed(conf); if (blk_remove_plug(q))
raid5_activate_delayed(conf);
conf->plugged = 0;
md_wakeup_thread(conf->thread); md_wakeup_thread(conf->thread);
spin_unlock_irqrestore(&conf->device_lock, flags); spin_unlock_irqrestore(&conf->device_lock, flags);
...@@ -1242,31 +1245,21 @@ static void raid5_unplug_device(void *data) ...@@ -1242,31 +1245,21 @@ static void raid5_unplug_device(void *data)
static inline void raid5_plug_device(raid5_conf_t *conf) static inline void raid5_plug_device(raid5_conf_t *conf)
{ {
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
if (list_empty(&conf->delayed_list)) blk_plug_device(&conf->mddev->queue);
if (!conf->plugged) {
conf->plugged = 1;
queue_task(&conf->plug_tq, &tq_disk);
}
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
} }
static int make_request (mddev_t *mddev, int rw, struct bio * bi) static int make_request (request_queue_t *q, struct bio * bi)
{ {
raid5_conf_t *conf = (raid5_conf_t *) mddev->private; mddev_t *mddev = q->queuedata;
raid5_conf_t *conf = mddev_to_conf(mddev);
const unsigned int raid_disks = conf->raid_disks; const unsigned int raid_disks = conf->raid_disks;
const unsigned int data_disks = raid_disks - 1; const unsigned int data_disks = raid_disks - 1;
unsigned int dd_idx, pd_idx; unsigned int dd_idx, pd_idx;
sector_t new_sector; sector_t new_sector;
sector_t logical_sector, last_sector; sector_t logical_sector, last_sector;
int read_ahead = 0;
struct stripe_head *sh; struct stripe_head *sh;
if (rw == READA) {
rw = READ;
read_ahead=1;
}
logical_sector = bi->bi_sector & ~(STRIPE_SECTORS-1); logical_sector = bi->bi_sector & ~(STRIPE_SECTORS-1);
last_sector = bi->bi_sector + (bi->bi_size>>9); last_sector = bi->bi_sector + (bi->bi_size>>9);
...@@ -1281,10 +1274,10 @@ static int make_request (mddev_t *mddev, int rw, struct bio * bi) ...@@ -1281,10 +1274,10 @@ static int make_request (mddev_t *mddev, int rw, struct bio * bi)
PRINTK("raid5: make_request, sector %ul logical %ul\n", PRINTK("raid5: make_request, sector %ul logical %ul\n",
new_sector, logical_sector); new_sector, logical_sector);
sh = get_active_stripe(conf, new_sector, pd_idx, read_ahead); sh = get_active_stripe(conf, new_sector, pd_idx, (bi->bi_rw&RWA_MASK));
if (sh) { if (sh) {
add_stripe_bio(sh, bi, dd_idx, rw); add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK));
raid5_plug_device(conf); raid5_plug_device(conf);
handle_stripe(sh); handle_stripe(sh);
...@@ -1311,6 +1304,10 @@ static int sync_request (mddev_t *mddev, sector_t sector_nr, int go_faster) ...@@ -1311,6 +1304,10 @@ static int sync_request (mddev_t *mddev, sector_t sector_nr, int go_faster)
int raid_disks = conf->raid_disks; int raid_disks = conf->raid_disks;
int data_disks = raid_disks-1; int data_disks = raid_disks-1;
if (sector_nr >= mddev->sb->size <<1)
/* just being told to finish up .. nothing to do */
return 0;
first_sector = raid5_compute_sector(stripe*data_disks*sectors_per_chunk first_sector = raid5_compute_sector(stripe*data_disks*sectors_per_chunk
+ chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf); + chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf);
sh = get_active_stripe(conf, sector_nr, pd_idx, 0); sh = get_active_stripe(conf, sector_nr, pd_idx, 0);
...@@ -1343,17 +1340,15 @@ static void raid5d (void *data) ...@@ -1343,17 +1340,15 @@ static void raid5d (void *data)
handled = 0; handled = 0;
if (mddev->sb_dirty) { if (mddev->sb_dirty)
mddev->sb_dirty = 0;
md_update_sb(mddev); md_update_sb(mddev);
}
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
while (1) { while (1) {
struct list_head *first; struct list_head *first;
if (list_empty(&conf->handle_list) && if (list_empty(&conf->handle_list) &&
atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD && atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD &&
!conf->plugged && !blk_queue_plugged(&mddev->queue) &&
!list_empty(&conf->delayed_list)) !list_empty(&conf->delayed_list))
raid5_activate_delayed(conf); raid5_activate_delayed(conf);
...@@ -1382,31 +1377,6 @@ static void raid5d (void *data) ...@@ -1382,31 +1377,6 @@ static void raid5d (void *data)
PRINTK("--- raid5d inactive\n"); PRINTK("--- raid5d inactive\n");
} }
/*
* Private kernel thread for parity reconstruction after an unclean
* shutdown. Reconstruction on spare drives in case of a failed drive
* is done by the generic mdsyncd.
*/
static void raid5syncd (void *data)
{
raid5_conf_t *conf = data;
mddev_t *mddev = conf->mddev;
if (!conf->resync_parity)
return;
if (conf->resync_parity == 2)
return;
down(&mddev->recovery_sem);
if (md_do_sync(mddev,NULL)) {
up(&mddev->recovery_sem);
printk("raid5: resync aborted!\n");
return;
}
conf->resync_parity = 0;
up(&mddev->recovery_sem);
printk("raid5: resync finished.\n");
}
static int run (mddev_t *mddev) static int run (mddev_t *mddev)
{ {
raid5_conf_t *conf; raid5_conf_t *conf;
...@@ -1416,7 +1386,6 @@ static int run (mddev_t *mddev) ...@@ -1416,7 +1386,6 @@ static int run (mddev_t *mddev)
mdk_rdev_t *rdev; mdk_rdev_t *rdev;
struct disk_info *disk; struct disk_info *disk;
struct list_head *tmp; struct list_head *tmp;
int start_recovery = 0;
MOD_INC_USE_COUNT; MOD_INC_USE_COUNT;
...@@ -1444,10 +1413,7 @@ static int run (mddev_t *mddev) ...@@ -1444,10 +1413,7 @@ static int run (mddev_t *mddev)
atomic_set(&conf->active_stripes, 0); atomic_set(&conf->active_stripes, 0);
atomic_set(&conf->preread_active_stripes, 0); atomic_set(&conf->preread_active_stripes, 0);
conf->plugged = 0; mddev->queue.unplug_fn = raid5_unplug_device;
conf->plug_tq.sync = 0;
conf->plug_tq.routine = &raid5_unplug_device;
conf->plug_tq.data = conf;
PRINTK("raid5: run(md%d) called.\n", mdidx(mddev)); PRINTK("raid5: run(md%d) called.\n", mdidx(mddev));
...@@ -1571,9 +1537,10 @@ static int run (mddev_t *mddev) ...@@ -1571,9 +1537,10 @@ static int run (mddev_t *mddev)
goto abort; goto abort;
} }
if (conf->working_disks != sb->raid_disks) { if (conf->failed_disks == 1 &&
printk(KERN_ALERT "raid5: md%d, not all disks are operational -- trying to recover array\n", mdidx(mddev)); !(sb->state & (1<<MD_SB_CLEAN))) {
start_recovery = 1; printk(KERN_ERR "raid5: cannot start dirty degraded array for md%d\n", mdidx(mddev));
goto abort;
} }
{ {
...@@ -1587,10 +1554,11 @@ static int run (mddev_t *mddev) ...@@ -1587,10 +1554,11 @@ static int run (mddev_t *mddev)
} }
memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
conf->raid_disks * ((sizeof(struct buffer_head) + PAGE_SIZE))) / 1024; conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
if (grow_stripes(conf, conf->max_nr_stripes)) { if (grow_stripes(conf, conf->max_nr_stripes)) {
printk(KERN_ERR "raid5: couldn't allocate %dkB for buffers\n", memory); printk(KERN_ERR "raid5: couldn't allocate %dkB for buffers\n", memory);
shrink_stripes(conf); shrink_stripes(conf);
md_unregister_thread(conf->thread);
goto abort; goto abort;
} else } else
printk(KERN_INFO "raid5: allocated %dkB for md%d\n", memory, mdidx(mddev)); printk(KERN_INFO "raid5: allocated %dkB for md%d\n", memory, mdidx(mddev));
...@@ -1615,23 +1583,6 @@ static int run (mddev_t *mddev) ...@@ -1615,23 +1583,6 @@ static int run (mddev_t *mddev)
else else
printk(KERN_ALERT "raid5: raid level %d set md%d active with %d out of %d devices, algorithm %d\n", conf->level, mdidx(mddev), sb->active_disks, sb->raid_disks, conf->algorithm); printk(KERN_ALERT "raid5: raid level %d set md%d active with %d out of %d devices, algorithm %d\n", conf->level, mdidx(mddev), sb->active_disks, sb->raid_disks, conf->algorithm);
if (!start_recovery && !(sb->state & (1 << MD_SB_CLEAN))) {
const char * name = "raid5syncd";
conf->resync_thread = md_register_thread(raid5syncd, conf,name);
if (!conf->resync_thread) {
printk(KERN_ERR "raid5: couldn't allocate thread for md%d\n", mdidx(mddev));
goto abort;
}
printk("raid5: raid set md%d not clean; reconstructing parity\n", mdidx(mddev));
conf->resync_parity = 1;
md_wakeup_thread(conf->resync_thread);
}
print_raid5_conf(conf);
if (start_recovery)
md_recover_arrays();
print_raid5_conf(conf); print_raid5_conf(conf);
/* Ok, everything is just fine now */ /* Ok, everything is just fine now */
...@@ -1650,48 +1601,12 @@ static int run (mddev_t *mddev) ...@@ -1650,48 +1601,12 @@ static int run (mddev_t *mddev)
return -EIO; return -EIO;
} }
static int stop_resync (mddev_t *mddev)
{
raid5_conf_t *conf = mddev_to_conf(mddev);
mdk_thread_t *thread = conf->resync_thread;
if (thread) {
if (conf->resync_parity) {
conf->resync_parity = 2;
md_interrupt_thread(thread);
printk(KERN_INFO "raid5: parity resync was not fully finished, restarting next time.\n");
return 1;
}
return 0;
}
return 0;
}
static int restart_resync (mddev_t *mddev)
{
raid5_conf_t *conf = mddev_to_conf(mddev);
if (conf->resync_parity) {
if (!conf->resync_thread) {
MD_BUG();
return 0;
}
printk("raid5: waking up raid5resync.\n");
conf->resync_parity = 1;
md_wakeup_thread(conf->resync_thread);
return 1;
} else
printk("raid5: no restart-resync needed.\n");
return 0;
}
static int stop (mddev_t *mddev) static int stop (mddev_t *mddev)
{ {
raid5_conf_t *conf = (raid5_conf_t *) mddev->private; raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
if (conf->resync_thread)
md_unregister_thread(conf->resync_thread);
md_unregister_thread(conf->thread); md_unregister_thread(conf->thread);
shrink_stripes(conf); shrink_stripes(conf);
free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER); free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER);
...@@ -2066,8 +1981,6 @@ static mdk_personality_t raid5_personality= ...@@ -2066,8 +1981,6 @@ static mdk_personality_t raid5_personality=
status: status, status: status,
error_handler: error, error_handler: error,
diskop: diskop, diskop: diskop,
stop_resync: stop_resync,
restart_resync: restart_resync,
sync_request: sync_request sync_request: sync_request
}; };
......
...@@ -106,9 +106,7 @@ MODULE_PARM_DESC(ulangid, "The optional preferred USB Language ID for all device ...@@ -106,9 +106,7 @@ MODULE_PARM_DESC(ulangid, "The optional preferred USB Language ID for all device
MODULE_AUTHOR("NAGANO Daisuke <breeze.nagano@nifty.ne.jp>"); MODULE_AUTHOR("NAGANO Daisuke <breeze.nagano@nifty.ne.jp>");
MODULE_DESCRIPTION("USB-MIDI driver"); MODULE_DESCRIPTION("USB-MIDI driver");
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,4,14)
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");
#endif
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */
......
...@@ -74,9 +74,9 @@ static void urb_print (struct urb * urb, char * str, int small) ...@@ -74,9 +74,9 @@ static void urb_print (struct urb * urb, char * str, int small)
static inline struct ed * static inline struct ed *
dma_to_ed (struct ohci_hcd *hc, dma_addr_t ed_dma); dma_to_ed (struct ohci_hcd *hc, dma_addr_t ed_dma);
#ifdef OHCI_VERBOSE_DEBUG
/* print non-empty branches of the periodic ed tree */ /* print non-empty branches of the periodic ed tree */
void ohci_dump_periodic (struct ohci_hcd *ohci, char *label) static void __attribute__ ((unused))
ohci_dump_periodic (struct ohci_hcd *ohci, char *label)
{ {
int i, j; int i, j;
u32 *ed_p; u32 *ed_p;
...@@ -101,7 +101,6 @@ void ohci_dump_periodic (struct ohci_hcd *ohci, char *label) ...@@ -101,7 +101,6 @@ void ohci_dump_periodic (struct ohci_hcd *ohci, char *label)
printk (KERN_DEBUG "%s, ohci %s, empty periodic schedule\n", printk (KERN_DEBUG "%s, ohci %s, empty periodic schedule\n",
label, ohci->hcd.self.bus_name); label, ohci->hcd.self.bus_name);
} }
#endif
static void ohci_dump_intr_mask (char *label, __u32 mask) static void ohci_dump_intr_mask (char *label, __u32 mask)
{ {
...@@ -241,6 +240,97 @@ static void ohci_dump (struct ohci_hcd *controller, int verbose) ...@@ -241,6 +240,97 @@ static void ohci_dump (struct ohci_hcd *controller, int verbose)
ohci_dump_roothub (controller, 1); ohci_dump_roothub (controller, 1);
} }
static void ohci_dump_td (char *label, struct td *td)
{
u32 tmp = le32_to_cpup (&td->hwINFO);
dbg ("%s td %p; urb %p index %d; hw next td %08x",
label, td,
td->urb, td->index,
le32_to_cpup (&td->hwNextTD));
if ((tmp & TD_ISO) == 0) {
char *toggle, *pid;
u32 cbp, be;
switch (tmp & TD_T) {
case TD_T_DATA0: toggle = "DATA0"; break;
case TD_T_DATA1: toggle = "DATA1"; break;
case TD_T_TOGGLE: toggle = "(CARRY)"; break;
default: toggle = "(?)"; break;
}
switch (tmp & TD_DP) {
case TD_DP_SETUP: pid = "SETUP"; break;
case TD_DP_IN: pid = "IN"; break;
case TD_DP_OUT: pid = "OUT"; break;
default: pid = "(bad pid)"; break;
}
dbg (" info %08x CC=%x %s DI=%d %s %s", tmp,
TD_CC_GET(tmp), /* EC, */ toggle,
(tmp & TD_DI) >> 21, pid,
(tmp & TD_R) ? "R" : "");
cbp = le32_to_cpup (&td->hwCBP);
be = le32_to_cpup (&td->hwBE);
dbg (" cbp %08x be %08x (len %d)", cbp, be,
cbp ? (be + 1 - cbp) : 0);
} else {
unsigned i;
dbg (" info %08x CC=%x DI=%d START=%04x", tmp,
TD_CC_GET(tmp), /* FC, */
(tmp & TD_DI) >> 21,
tmp & 0x0000ffff);
dbg (" bp0 %08x be %08x",
le32_to_cpup (&td->hwCBP) & ~0x0fff,
le32_to_cpup (&td->hwBE));
for (i = 0; i < MAXPSW; i++) {
dbg (" psw [%d] = %2x", i,
le16_to_cpu (td->hwPSW [i]));
}
}
}
/* caller MUST own hcd spinlock if verbose is set! */
static void __attribute__((unused))
ohci_dump_ed (struct ohci_hcd *ohci, char *label, struct ed *ed, int verbose)
{
u32 tmp = ed->hwINFO;
char *type = "";
dbg ("%s: %s, ed %p state 0x%x type %d; next ed %08x",
ohci->hcd.self.bus_name, label,
ed, ed->state, ed->type,
le32_to_cpup (&ed->hwNextED));
switch (tmp & (ED_IN|ED_OUT)) {
case ED_OUT: type = "-OUT"; break;
case ED_IN: type = "-IN"; break;
/* else from TDs ... control */
}
dbg (" info %08x MAX=%d%s%s%s EP=%d%s DEV=%d", le32_to_cpu (tmp),
0x0fff & (le32_to_cpu (tmp) >> 16),
(tmp & ED_ISO) ? " ISO" : "",
(tmp & ED_SKIP) ? " SKIP" : "",
(tmp & ED_LOWSPEED) ? " LOW" : "",
0x000f & (le32_to_cpu (tmp) >> 7),
type,
0x007f & le32_to_cpu (tmp));
dbg (" tds: head %08x%s%s tail %08x%s",
tmp = le32_to_cpup (&ed->hwHeadP),
(ed->hwHeadP & ED_H) ? " HALT" : "",
(ed->hwHeadP & ED_C) ? " CARRY" : "",
le32_to_cpup (&ed->hwTailP),
verbose ? "" : " (not listing)");
if (verbose) {
struct list_head *tmp;
/* use ed->td_list because HC concurrently modifies
* hwNextTD as it accumulates ed_donelist.
*/
list_for_each (tmp, &ed->td_list) {
struct td *td;
td = list_entry (tmp, struct td, td_list);
ohci_dump_td (" ->", td);
}
}
}
#endif #endif
...@@ -100,7 +100,7 @@ ...@@ -100,7 +100,7 @@
* - lots more testing!! * - lots more testing!!
*/ */
#define DRIVER_VERSION "2002-Jun-10" #define DRIVER_VERSION "2002-Jun-15"
#define DRIVER_AUTHOR "Roman Weissgaerber <weissg@vienna.at>, David Brownell" #define DRIVER_AUTHOR "Roman Weissgaerber <weissg@vienna.at>, David Brownell"
#define DRIVER_DESC "USB 1.1 'Open' Host Controller (OHCI) Driver" #define DRIVER_DESC "USB 1.1 'Open' Host Controller (OHCI) Driver"
...@@ -145,8 +145,8 @@ static int ohci_urb_enqueue ( ...@@ -145,8 +145,8 @@ static int ohci_urb_enqueue (
urb_print (urb, "SUB", usb_pipein (pipe)); urb_print (urb, "SUB", usb_pipein (pipe));
#endif #endif
/* every endpoint has a ed, locate and fill it */ /* every endpoint has a ed, locate and maybe (re)initialize it */
if (! (ed = ep_add_ed (urb->dev, pipe, urb->interval, 1, mem_flags))) if (! (ed = ed_get (ohci, urb->dev, pipe, urb->interval)))
return -ENOMEM; return -ENOMEM;
/* for the private part of the URB we need the number of TDs (size) */ /* for the private part of the URB we need the number of TDs (size) */
...@@ -498,6 +498,7 @@ static void ohci_irq (struct usb_hcd *hcd) ...@@ -498,6 +498,7 @@ static void ohci_irq (struct usb_hcd *hcd)
struct ohci_regs *regs = ohci->regs; struct ohci_regs *regs = ohci->regs;
int ints; int ints;
/* we can eliminate a (slow) readl() if _only_ WDH caused this irq */
if ((ohci->hcca->done_head != 0) if ((ohci->hcca->done_head != 0)
&& ! (le32_to_cpup (&ohci->hcca->done_head) & 0x01)) { && ! (le32_to_cpup (&ohci->hcca->done_head) & 0x01)) {
ints = OHCI_INTR_WDH; ints = OHCI_INTR_WDH;
......
...@@ -221,6 +221,7 @@ ed_alloc (struct ohci_hcd *hc, int mem_flags) ...@@ -221,6 +221,7 @@ ed_alloc (struct ohci_hcd *hc, int mem_flags)
ed = pci_pool_alloc (hc->ed_cache, mem_flags, &dma); ed = pci_pool_alloc (hc->ed_cache, mem_flags, &dma);
if (ed) { if (ed) {
memset (ed, 0, sizeof (*ed)); memset (ed, 0, sizeof (*ed));
INIT_LIST_HEAD (&ed->td_list);
ed->dma = dma; ed->dma = dma;
/* hash it for later reverse mapping */ /* hash it for later reverse mapping */
if (!hash_add_ed (hc, ed, mem_flags)) { if (!hash_add_ed (hc, ed, mem_flags)) {
......
...@@ -131,8 +131,9 @@ static void intr_resub (struct ohci_hcd *hc, struct urb *urb) ...@@ -131,8 +131,9 @@ static void intr_resub (struct ohci_hcd *hc, struct urb *urb)
/* search for the right branch to insert an interrupt ed into the int tree /* search for the right branch to insert an interrupt ed into the int tree
* do some load balancing; * do some load balancing;
* returns the branch and * returns the branch
* sets the interval to interval = 2^integer (ld (interval)) * FIXME allow for failure, when there's no bandwidth left;
* and consider iso loads too
*/ */
static int ep_int_balance (struct ohci_hcd *ohci, int interval, int load) static int ep_int_balance (struct ohci_hcd *ohci, int interval, int load)
{ {
...@@ -152,19 +153,6 @@ static int ep_int_balance (struct ohci_hcd *ohci, int interval, int load) ...@@ -152,19 +153,6 @@ static int ep_int_balance (struct ohci_hcd *ohci, int interval, int load)
/*-------------------------------------------------------------------------*/ /*-------------------------------------------------------------------------*/
/* 2^int ( ld (inter)) */
static int ep_2_n_interval (int inter)
{
int i;
for (i = 0; ((inter >> i) > 1 ) && (i < 5); i++)
continue;
return 1 << i;
}
/*-------------------------------------------------------------------------*/
/* the int tree is a binary tree /* the int tree is a binary tree
* in order to process it sequentially the indexes of the branches have * in order to process it sequentially the indexes of the branches have
* to be mapped the mapping reverses the bits of a word of num_bits length * to be mapped the mapping reverses the bits of a word of num_bits length
...@@ -230,8 +218,7 @@ static int ep_link (struct ohci_hcd *ohci, struct ed *edi) ...@@ -230,8 +218,7 @@ static int ep_link (struct ohci_hcd *ohci, struct ed *edi)
case PIPE_INTERRUPT: case PIPE_INTERRUPT:
load = ed->intriso.intr_info.int_load; load = ed->intriso.intr_info.int_load;
interval = ep_2_n_interval (ed->intriso.intr_info.int_period); interval = ed->interval;
ed->interval = interval;
int_branch = ep_int_balance (ohci, interval, load); int_branch = ep_int_balance (ohci, interval, load);
ed->intriso.intr_info.int_branch = int_branch; ed->intriso.intr_info.int_branch = int_branch;
...@@ -301,6 +288,7 @@ static void periodic_unlink ( ...@@ -301,6 +288,7 @@ static void periodic_unlink (
* just the link to the ed is unlinked. * just the link to the ed is unlinked.
* the link from the ed still points to another operational ed or 0 * the link from the ed still points to another operational ed or 0
* so the HC can eventually finish the processing of the unlinked ed * so the HC can eventually finish the processing of the unlinked ed
* caller guarantees the ED has no active TDs.
*/ */
static int start_ed_unlink (struct ohci_hcd *ohci, struct ed *ed) static int start_ed_unlink (struct ohci_hcd *ohci, struct ed *ed)
{ {
...@@ -387,84 +375,99 @@ static int start_ed_unlink (struct ohci_hcd *ohci, struct ed *ed) ...@@ -387,84 +375,99 @@ static int start_ed_unlink (struct ohci_hcd *ohci, struct ed *ed)
/*-------------------------------------------------------------------------*/ /*-------------------------------------------------------------------------*/
/* (re)init an endpoint; this _should_ be done once at the /* get and maybe (re)init an endpoint. init _should_ be done only as part
* usb_set_configuration command, but the USB stack is a bit stateless * of usb_set_configuration() or usb_set_interface() ... but the USB stack
* so we do it at every transaction. * isn't very stateful, so we re-init whenever the HC isn't looking.
* if the state of the ed is ED_NEW then a dummy td is added and the
* state is changed to ED_UNLINK
* in all other cases the state is left unchanged
* the ed info fields are set even though most of them should
* not change
*/ */
static struct ed *ep_add_ed ( static struct ed *ed_get (
struct ohci_hcd *ohci,
struct usb_device *udev, struct usb_device *udev,
unsigned int pipe, unsigned int pipe,
int interval, int interval
int load,
int mem_flags
) { ) {
struct ohci_hcd *ohci = hcd_to_ohci (udev->bus->hcpriv); int is_out = !usb_pipein (pipe);
int type = usb_pipetype (pipe);
int bus_msecs = 0;
struct hcd_dev *dev = (struct hcd_dev *) udev->hcpriv; struct hcd_dev *dev = (struct hcd_dev *) udev->hcpriv;
struct td *td;
struct ed *ed; struct ed *ed;
unsigned ep; unsigned ep;
unsigned long flags; unsigned long flags;
spin_lock_irqsave (&ohci->lock, flags);
ep = usb_pipeendpoint (pipe) << 1; ep = usb_pipeendpoint (pipe) << 1;
if (!usb_pipecontrol (pipe) && usb_pipeout (pipe)) if (type != PIPE_CONTROL && is_out)
ep |= 1; ep |= 1;
if (type == PIPE_INTERRUPT)
bus_msecs = usb_calc_bus_time (udev->speed, !is_out, 0,
usb_maxpacket (udev, pipe, is_out)) / 1000;
spin_lock_irqsave (&ohci->lock, flags);
if (!(ed = dev->ep [ep])) { if (!(ed = dev->ep [ep])) {
ed = ed_alloc (ohci, SLAB_ATOMIC); ed = ed_alloc (ohci, SLAB_ATOMIC);
if (!ed) { if (!ed) {
/* out of memory */ /* out of memory */
spin_unlock_irqrestore (&ohci->lock, flags); goto done;
return NULL;
} }
dev->ep [ep] = ed; dev->ep [ep] = ed;
} }
if (ed->state & ED_URB_DEL) { if (ed->state & ED_URB_DEL) {
/* pending unlink request */ /* pending unlink request */
spin_unlock_irqrestore (&ohci->lock, flags); ed = 0;
return NULL; goto done;
} }
if (ed->state == ED_NEW) { if (ed->state == ED_NEW) {
struct td *td;
ed->hwINFO = ED_SKIP; ed->hwINFO = ED_SKIP;
/* dummy td; end of td list for ed */ /* dummy td; end of td list for ed */
td = td_alloc (ohci, SLAB_ATOMIC); td = td_alloc (ohci, SLAB_ATOMIC);
if (!td) { if (!td) {
/* out of memory */ /* out of memory */
spin_unlock_irqrestore (&ohci->lock, flags); ed = 0;
return NULL; goto done;
} }
ed->dummy = td; ed->dummy = td;
ed->hwTailP = cpu_to_le32 (td->td_dma); ed->hwTailP = cpu_to_le32 (td->td_dma);
ed->hwHeadP = ed->hwTailP; /* ED_C, ED_H zeroed */ ed->hwHeadP = ed->hwTailP; /* ED_C, ED_H zeroed */
ed->state = ED_UNLINK; ed->state = ED_UNLINK;
ed->type = usb_pipetype (pipe); ed->type = type;
} }
// FIXME: don't do this if it's linked to the HC, or without knowing it's /* FIXME: Don't do this without knowing it's safe to clobber this
// safe to clobber state/mode info tied to (previous) config/altsetting. * state/mode info. Currently the upper layers don't support such
// (but dev0/ep0, used by set_address, must get clobbered) * guarantees; we're lucky changing config/altsetting is rare.
*/
ed->hwINFO = cpu_to_le32 (usb_pipedevice (pipe) if (ed->state == ED_UNLINK) {
| usb_pipeendpoint (pipe) << 7 u32 info;
| (usb_pipeisoc (pipe)? 0x8000: 0)
| (usb_pipecontrol (pipe) info = usb_pipedevice (pipe);
? 0: (usb_pipeout (pipe)? 0x800: 0x1000)) info |= (ep >> 1) << 7;
| (udev->speed == USB_SPEED_LOW) << 13 info |= usb_maxpacket (udev, pipe, is_out) << 16;
| usb_maxpacket (udev, pipe, usb_pipeout (pipe)) info = cpu_to_le32 (info);
<< 16); if (udev->speed == USB_SPEED_LOW)
info |= ED_LOWSPEED;
if (ed->type == PIPE_INTERRUPT && ed->state == ED_UNLINK) { /* control transfers store pids in tds */
ed->intriso.intr_info.int_period = interval; if (type != PIPE_CONTROL) {
ed->intriso.intr_info.int_load = load; info |= is_out ? ED_OUT : ED_IN;
} if (type == PIPE_ISOCHRONOUS)
info |= ED_ISO;
if (type == PIPE_INTERRUPT) {
ed->intriso.intr_info.int_load = bus_msecs;
if (interval > 32)
interval = 32;
}
}
ed->hwINFO = info;
/* value ignored except on periodic EDs, where
* we know it's already a power of 2
*/
ed->interval = interval;
}
done:
spin_unlock_irqrestore (&ohci->lock, flags); spin_unlock_irqrestore (&ohci->lock, flags);
return ed; return ed;
} }
...@@ -736,8 +739,8 @@ static void td_done (struct urb *urb, struct td *td) ...@@ -736,8 +739,8 @@ static void td_done (struct urb *urb, struct td *td)
urb->iso_frame_desc [td->index].status = cc_to_error [cc]; urb->iso_frame_desc [td->index].status = cc_to_error [cc];
if (cc != 0) if (cc != 0)
dbg (" urb %p iso TD %d len %d CC %d", dbg (" urb %p iso TD %p (%d) len %d CC %d",
urb, td->index, dlen, cc); urb, td, 1 + td->index, dlen, cc);
/* BULK, INT, CONTROL ... drivers see aggregate length/status, /* BULK, INT, CONTROL ... drivers see aggregate length/status,
* except that "setup" bytes aren't counted and "short" transfers * except that "setup" bytes aren't counted and "short" transfers
...@@ -776,9 +779,13 @@ static void td_done (struct urb *urb, struct td *td) ...@@ -776,9 +779,13 @@ static void td_done (struct urb *urb, struct td *td)
- td->data_dma; - td->data_dma;
} }
#ifdef VERBOSE_DEBUG
if (cc != 0) if (cc != 0)
dbg (" urb %p TD %d CC %d, len=%d", dbg (" urb %p TD %p (%d) CC %d, len=%d/%d",
urb, td->index, cc, urb->actual_length); urb, td, 1 + td->index, cc,
urb->actual_length,
urb->transfer_buffer_length);
#endif
} }
} }
...@@ -812,8 +819,8 @@ static struct td *dl_reverse_done_list (struct ohci_hcd *ohci) ...@@ -812,8 +819,8 @@ static struct td *dl_reverse_done_list (struct ohci_hcd *ohci)
if (urb_priv && ((td_list->index + 1) if (urb_priv && ((td_list->index + 1)
< urb_priv->length)) { < urb_priv->length)) {
#ifdef OHCI_VERBOSE_DEBUG #ifdef OHCI_VERBOSE_DEBUG
dbg ("urb %p TD %d of %d, patch ED", dbg ("urb %p TD %p (%d/%d), patch ED",
td_list->urb, td_list->urb, td_list,
1 + td_list->index, 1 + td_list->index,
urb_priv->length); urb_priv->length);
#endif #endif
......
...@@ -19,7 +19,7 @@ struct ed { ...@@ -19,7 +19,7 @@ struct ed {
#define ED_SKIP __constant_cpu_to_le32(1 << 14) #define ED_SKIP __constant_cpu_to_le32(1 << 14)
#define ED_LOWSPEED __constant_cpu_to_le32(1 << 13) #define ED_LOWSPEED __constant_cpu_to_le32(1 << 13)
#define ED_OUT __constant_cpu_to_le32(0x01 << 11) #define ED_OUT __constant_cpu_to_le32(0x01 << 11)
#define ED_IN __constant_cpu_to_le32(0x10 << 11) #define ED_IN __constant_cpu_to_le32(0x02 << 11)
__u32 hwTailP; /* tail of TD list */ __u32 hwTailP; /* tail of TD list */
__u32 hwHeadP; /* head of TD list */ __u32 hwHeadP; /* head of TD list */
#define ED_C __constant_cpu_to_le32(0x02) /* toggle carry */ #define ED_C __constant_cpu_to_le32(0x02) /* toggle carry */
...@@ -30,24 +30,24 @@ struct ed { ...@@ -30,24 +30,24 @@ struct ed {
dma_addr_t dma; /* addr of ED */ dma_addr_t dma; /* addr of ED */
struct ed *ed_prev; /* for non-interrupt EDs */ struct ed *ed_prev; /* for non-interrupt EDs */
struct td *dummy; struct td *dummy;
struct list_head td_list; /* "shadow list" of our TDs */
u8 state; /* ED_{NEW,UNLINK,OPER} */
#define ED_NEW 0x00 /* unused, no dummy td */
#define ED_UNLINK 0x01 /* dummy td, maybe linked to hc */
#define ED_OPER 0x02 /* dummy td, _is_ linked to hc */
#define ED_URB_DEL 0x08 /* for unlinking; masked in */
u8 type; /* PIPE_{BULK,...} */ u8 type; /* PIPE_{BULK,...} */
u8 interval; /* interrupt, isochronous */ u16 interval; /* interrupt, isochronous */
union { union {
struct intr_info { /* interrupt */ struct intr_info { /* interrupt */
u8 int_period;
u8 int_branch; u8 int_branch;
u8 int_load; u8 int_load;
} intr_info; } intr_info;
u16 last_iso; /* isochronous */ u16 last_iso; /* isochronous */
} intriso; } intriso;
u8 state; /* ED_{NEW,UNLINK,OPER} */
#define ED_NEW 0x00 /* unused, no dummy td */
#define ED_UNLINK 0x01 /* dummy td, maybe linked to hc */
#define ED_OPER 0x02 /* dummy td, _is_ linked to hc */
#define ED_URB_DEL 0x08 /* for unlinking; masked in */
/* HC may see EDs on rm_list until next frame (frame_no == tick) */ /* HC may see EDs on rm_list until next frame (frame_no == tick) */
u16 tick; u16 tick;
struct ed *ed_rm_list; struct ed *ed_rm_list;
...@@ -108,6 +108,8 @@ struct td { ...@@ -108,6 +108,8 @@ struct td {
dma_addr_t td_dma; /* addr of this TD */ dma_addr_t td_dma; /* addr of this TD */
dma_addr_t data_dma; /* addr of data it points to */ dma_addr_t data_dma; /* addr of data it points to */
struct list_head td_list; /* "shadow list", TDs on same ED */
} __attribute__ ((aligned(32))); /* c/b/i need 16; only iso needs 32 */ } __attribute__ ((aligned(32))); /* c/b/i need 16; only iso needs 32 */
#define TD_MASK ((u32)~0x1f) /* strip hw status in low addr bits */ #define TD_MASK ((u32)~0x1f) /* strip hw status in low addr bits */
......
...@@ -220,10 +220,11 @@ struct kaweth_device ...@@ -220,10 +220,11 @@ struct kaweth_device
struct urb *rx_urb; struct urb *rx_urb;
struct urb *tx_urb; struct urb *tx_urb;
struct urb *irq_urb; struct urb *irq_urb;
struct sk_buff *tx_skb;
__u8 *firmware_buf; __u8 *firmware_buf;
__u8 scratch[KAWETH_SCRATCH_SIZE]; __u8 scratch[KAWETH_SCRATCH_SIZE];
__u8 tx_buf[KAWETH_BUF_SIZE];
__u8 rx_buf[KAWETH_BUF_SIZE]; __u8 rx_buf[KAWETH_BUF_SIZE];
__u8 intbuffer[INTBUFFERSIZE]; __u8 intbuffer[INTBUFFERSIZE];
__u16 packet_filter_bitmap; __u16 packet_filter_bitmap;
...@@ -650,11 +651,13 @@ static int kaweth_ioctl(struct net_device *net, struct ifreq *rq, int cmd) ...@@ -650,11 +651,13 @@ static int kaweth_ioctl(struct net_device *net, struct ifreq *rq, int cmd)
static void kaweth_usb_transmit_complete(struct urb *urb) static void kaweth_usb_transmit_complete(struct urb *urb)
{ {
struct kaweth_device *kaweth = urb->context; struct kaweth_device *kaweth = urb->context;
struct sk_buff *skb = kaweth->tx_skb;
if (unlikely(urb->status != 0)) if (unlikely(urb->status != 0))
kaweth_dbg("%s: TX status %d.", kaweth->net->name, urb->status); kaweth_dbg("%s: TX status %d.", kaweth->net->name, urb->status);
netif_wake_queue(kaweth->net); netif_wake_queue(kaweth->net);
dev_kfree_skb(skb);
} }
/**************************************************************** /****************************************************************
...@@ -663,7 +666,7 @@ static void kaweth_usb_transmit_complete(struct urb *urb) ...@@ -663,7 +666,7 @@ static void kaweth_usb_transmit_complete(struct urb *urb)
static int kaweth_start_xmit(struct sk_buff *skb, struct net_device *net) static int kaweth_start_xmit(struct sk_buff *skb, struct net_device *net)
{ {
struct kaweth_device *kaweth = net->priv; struct kaweth_device *kaweth = net->priv;
int count = skb->len; char *private_header;
int res; int res;
...@@ -679,15 +682,30 @@ static int kaweth_start_xmit(struct sk_buff *skb, struct net_device *net) ...@@ -679,15 +682,30 @@ static int kaweth_start_xmit(struct sk_buff *skb, struct net_device *net)
kaweth_async_set_rx_mode(kaweth); kaweth_async_set_rx_mode(kaweth);
netif_stop_queue(net); netif_stop_queue(net);
*((__u16 *)kaweth->tx_buf) = cpu_to_le16(skb->len); /* We now decide whether we can put our special header into the sk_buff */
if (skb_cloned(skb) || skb_headroom(skb) < 2) {
/* no such luck - we make our own */
struct sk_buff *copied_skb;
copied_skb = skb_copy_expand(skb, 2, 0, GFP_ATOMIC);
dev_kfree_skb_any(skb);
skb = copied_skb;
if (!copied_skb) {
kaweth->stats.tx_errors++;
netif_start_queue(net);
spin_unlock(&kaweth->device_lock);
return 0;
}
}
memcpy(kaweth->tx_buf + 2, skb->data, skb->len); private_header = __skb_push(skb, 2);
*private_header = cpu_to_le16(skb->len);
kaweth->tx_skb = skb;
FILL_BULK_URB(kaweth->tx_urb, FILL_BULK_URB(kaweth->tx_urb,
kaweth->dev, kaweth->dev,
usb_sndbulkpipe(kaweth->dev, 2), usb_sndbulkpipe(kaweth->dev, 2),
kaweth->tx_buf, private_header,
count + 2, skb->len,
kaweth_usb_transmit_complete, kaweth_usb_transmit_complete,
kaweth); kaweth);
kaweth->end = 0; kaweth->end = 0;
...@@ -699,6 +717,7 @@ static int kaweth_start_xmit(struct sk_buff *skb, struct net_device *net) ...@@ -699,6 +717,7 @@ static int kaweth_start_xmit(struct sk_buff *skb, struct net_device *net)
kaweth->stats.tx_errors++; kaweth->stats.tx_errors++;
netif_start_queue(net); netif_start_queue(net);
dev_kfree_skb(skb);
} }
else else
{ {
...@@ -707,8 +726,6 @@ static int kaweth_start_xmit(struct sk_buff *skb, struct net_device *net) ...@@ -707,8 +726,6 @@ static int kaweth_start_xmit(struct sk_buff *skb, struct net_device *net)
net->trans_start = jiffies; net->trans_start = jiffies;
} }
dev_kfree_skb(skb);
spin_unlock(&kaweth->device_lock); spin_unlock(&kaweth->device_lock);
return 0; return 0;
......
...@@ -51,12 +51,6 @@ ...@@ -51,12 +51,6 @@
#include <linux/slab.h> #include <linux/slab.h>
/*
* kernel thread actions
*/
#define US_ACT_COMMAND 1
#define US_ACT_EXIT 5
/*********************************************************************** /***********************************************************************
* Host functions * Host functions
...@@ -204,7 +198,7 @@ static int device_reset( Scsi_Cmnd *srb ) ...@@ -204,7 +198,7 @@ static int device_reset( Scsi_Cmnd *srb )
US_DEBUGP("device_reset() called\n" ); US_DEBUGP("device_reset() called\n" );
/* if the device was removed, then we're already reset */ /* if the device was removed, then we're already reset */
if (atomic_read(&us->sm_state) == US_STATE_DETACHED) if (!test_bit(DEV_ATTACHED, &us->bitflags))
return SUCCESS; return SUCCESS;
scsi_unlock(srb->host); scsi_unlock(srb->host);
...@@ -235,7 +229,7 @@ static int bus_reset( Scsi_Cmnd *srb ) ...@@ -235,7 +229,7 @@ static int bus_reset( Scsi_Cmnd *srb )
US_DEBUGP("bus_reset() called\n"); US_DEBUGP("bus_reset() called\n");
/* if the device has been removed, this worked */ /* if the device has been removed, this worked */
if (atomic_read(&us->sm_state) == US_STATE_DETACHED) { if (!test_bit(DEV_ATTACHED, &us->bitflags)) {
US_DEBUGP("-- device removed already\n"); US_DEBUGP("-- device removed already\n");
return SUCCESS; return SUCCESS;
} }
...@@ -337,8 +331,8 @@ static int proc_info (char *buffer, char **start, off_t offset, int length, ...@@ -337,8 +331,8 @@ static int proc_info (char *buffer, char **start, off_t offset, int length,
/* show the GUID of the device */ /* show the GUID of the device */
SPRINTF(" GUID: " GUID_FORMAT "\n", GUID_ARGS(us->guid)); SPRINTF(" GUID: " GUID_FORMAT "\n", GUID_ARGS(us->guid));
SPRINTF(" Attached: %s\n", (atomic_read(&us->sm_state) == SPRINTF(" Attached: %s\n", (test_bit(DEV_ATTACHED, &us->bitflags)
US_STATE_DETACHED) ? "Yes" : "No"); ? "Yes" : "No"));
/* /*
* Calculate start of next buffer, and return value. * Calculate start of next buffer, and return value.
......
...@@ -99,13 +99,6 @@ MODULE_LICENSE("GPL"); ...@@ -99,13 +99,6 @@ MODULE_LICENSE("GPL");
static int my_host_number; static int my_host_number;
/*
* kernel thread actions
*/
#define US_ACT_COMMAND 1
#define US_ACT_EXIT 5
/* The list of structures and the protective lock for them */ /* The list of structures and the protective lock for them */
struct us_data *us_list; struct us_data *us_list;
struct semaphore us_list_semaphore; struct semaphore us_list_semaphore;
...@@ -426,7 +419,7 @@ static int usb_stor_control_thread(void * __us) ...@@ -426,7 +419,7 @@ static int usb_stor_control_thread(void * __us)
down(&(us->dev_semaphore)); down(&(us->dev_semaphore));
/* our device has gone - pretend not ready */ /* our device has gone - pretend not ready */
if (atomic_read(&us->device_state) == US_STATE_DETACHED) { if (!test_bit(DEV_ATTACHED, &us->bitflags)) {
US_DEBUGP("Request is for removed device\n"); US_DEBUGP("Request is for removed device\n");
/* For REQUEST_SENSE, it's the data. But /* For REQUEST_SENSE, it's the data. But
* for anything else, it should look like * for anything else, it should look like
...@@ -450,7 +443,7 @@ static int usb_stor_control_thread(void * __us) ...@@ -450,7 +443,7 @@ static int usb_stor_control_thread(void * __us)
sizeof(usb_stor_sense_notready)); sizeof(usb_stor_sense_notready));
us->srb->result = CHECK_CONDITION << 1; us->srb->result = CHECK_CONDITION << 1;
} }
} else { /* atomic_read(&us->device_state) == STATE_DETACHED */ } else { /* test_bit(DEV_ATTACHED, &us->bitflags) */
/* Handle those devices which need us to fake /* Handle those devices which need us to fake
* their inquiry data */ * their inquiry data */
...@@ -557,9 +550,8 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, ...@@ -557,9 +550,8 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum,
unsigned int flags; unsigned int flags;
struct us_unusual_dev *unusual_dev; struct us_unusual_dev *unusual_dev;
struct us_data *ss = NULL; struct us_data *ss = NULL;
#ifdef CONFIG_USB_STORAGE_SDDR09
int result; int result;
#endif int new_device = 0;
/* these are temporary copies -- we test on these, then put them /* these are temporary copies -- we test on these, then put them
* in the us-data structure * in the us-data structure
...@@ -570,13 +562,13 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, ...@@ -570,13 +562,13 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum,
u8 subclass = 0; u8 subclass = 0;
u8 protocol = 0; u8 protocol = 0;
/* the altsettting on the interface we're probing that matched our /* the altsetting on the interface we're probing that matched our
* usb_match_id table * usb_match_id table
*/ */
struct usb_interface *intf = dev->actconfig->interface; struct usb_interface *intf = dev->actconfig->interface;
struct usb_interface_descriptor *altsetting = struct usb_interface_descriptor *altsetting =
intf[ifnum].altsetting + intf[ifnum].act_altsetting; intf[ifnum].altsetting + intf[ifnum].act_altsetting;
US_DEBUGP("act_altsettting is %d\n", intf[ifnum].act_altsetting); US_DEBUGP("act_altsetting is %d\n", intf[ifnum].act_altsetting);
/* clear the temporary strings */ /* clear the temporary strings */
memset(mf, 0, sizeof(mf)); memset(mf, 0, sizeof(mf));
...@@ -663,7 +655,7 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, ...@@ -663,7 +655,7 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum,
return NULL; return NULL;
} }
/* At this point, we're committed to using the device */ /* At this point, we've decided to try to use the device */
usb_get_dev(dev); usb_get_dev(dev);
/* clear the GUID and fetch the strings */ /* clear the GUID and fetch the strings */
...@@ -696,7 +688,8 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, ...@@ -696,7 +688,8 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum,
*/ */
ss = us_list; ss = us_list;
while ((ss != NULL) && while ((ss != NULL) &&
((ss->pusb_dev) || !GUID_EQUAL(guid, ss->guid))) (test_bit(DEV_ATTACHED, &ss->bitflags) ||
!GUID_EQUAL(guid, ss->guid)))
ss = ss->next; ss = ss->next;
if (ss != NULL) { if (ss != NULL) {
...@@ -710,29 +703,23 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, ...@@ -710,29 +703,23 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum,
/* establish the connection to the new device upon reconnect */ /* establish the connection to the new device upon reconnect */
ss->ifnum = ifnum; ss->ifnum = ifnum;
ss->pusb_dev = dev; ss->pusb_dev = dev;
atomic_set(&ss->device_state, US_STATE_ATTACHED); set_bit(DEV_ATTACHED, &ss->bitflags);
/* copy over the endpoint data */ /* copy over the endpoint data */
if (ep_in) ss->ep_in = ep_in->bEndpointAddress &
ss->ep_in = ep_in->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK;
USB_ENDPOINT_NUMBER_MASK; ss->ep_out = ep_out->bEndpointAddress &
if (ep_out) USB_ENDPOINT_NUMBER_MASK;
ss->ep_out = ep_out->bEndpointAddress &
USB_ENDPOINT_NUMBER_MASK;
ss->ep_int = ep_int; ss->ep_int = ep_int;
/* allocate an IRQ callback if one is needed */ /* allocate an IRQ callback if one is needed */
if ((ss->protocol == US_PR_CBI) && usb_stor_allocate_irq(ss)) { if ((ss->protocol == US_PR_CBI) && usb_stor_allocate_irq(ss))
usb_put_dev(dev); goto BadDevice;
return NULL;
}
/* allocate the URB we're going to use */ /* allocate the URB we're going to use */
ss->current_urb = usb_alloc_urb(0, GFP_KERNEL); ss->current_urb = usb_alloc_urb(0, GFP_KERNEL);
if (!ss->current_urb) { if (!ss->current_urb)
usb_put_dev(dev); goto BadDevice;
return NULL;
}
/* Re-Initialize the device if it needs it */ /* Re-Initialize the device if it needs it */
if (unusual_dev && unusual_dev->initFunction) if (unusual_dev && unusual_dev->initFunction)
...@@ -752,14 +739,12 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, ...@@ -752,14 +739,12 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum,
return NULL; return NULL;
} }
memset(ss, 0, sizeof(struct us_data)); memset(ss, 0, sizeof(struct us_data));
new_device = 1;
/* allocate the URB we're going to use */ /* allocate the URB we're going to use */
ss->current_urb = usb_alloc_urb(0, GFP_KERNEL); ss->current_urb = usb_alloc_urb(0, GFP_KERNEL);
if (!ss->current_urb) { if (!ss->current_urb)
kfree(ss); goto BadDevice;
usb_put_dev(dev);
return NULL;
}
/* Initialize the mutexes only when the struct is new */ /* Initialize the mutexes only when the struct is new */
init_completion(&(ss->notify)); init_completion(&(ss->notify));
...@@ -776,12 +761,10 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, ...@@ -776,12 +761,10 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum,
ss->unusual_dev = unusual_dev; ss->unusual_dev = unusual_dev;
/* copy over the endpoint data */ /* copy over the endpoint data */
if (ep_in) ss->ep_in = ep_in->bEndpointAddress &
ss->ep_in = ep_in->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK;
USB_ENDPOINT_NUMBER_MASK; ss->ep_out = ep_out->bEndpointAddress &
if (ep_out) USB_ENDPOINT_NUMBER_MASK;
ss->ep_out = ep_out->bEndpointAddress &
USB_ENDPOINT_NUMBER_MASK;
ss->ep_int = ep_int; ss->ep_int = ep_int;
/* establish the connection to the new device */ /* establish the connection to the new device */
...@@ -904,12 +887,8 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, ...@@ -904,12 +887,8 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum,
#endif #endif
default: default:
ss->transport_name = "Unknown"; /* ss->transport_name = "Unknown"; */
kfree(ss->current_urb); goto BadDevice;
kfree(ss);
usb_put_dev(dev);
return NULL;
break;
} }
US_DEBUGP("Transport: %s\n", ss->transport_name); US_DEBUGP("Transport: %s\n", ss->transport_name);
...@@ -959,22 +938,14 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, ...@@ -959,22 +938,14 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum,
#endif #endif
default: default:
ss->protocol_name = "Unknown"; /* ss->protocol_name = "Unknown"; */
kfree(ss->current_urb); goto BadDevice;
kfree(ss);
usb_put_dev(dev);
return NULL;
break;
} }
US_DEBUGP("Protocol: %s\n", ss->protocol_name); US_DEBUGP("Protocol: %s\n", ss->protocol_name);
/* allocate an IRQ callback if one is needed */ /* allocate an IRQ callback if one is needed */
if ((ss->protocol == US_PR_CBI) && usb_stor_allocate_irq(ss)) { if ((ss->protocol == US_PR_CBI) && usb_stor_allocate_irq(ss))
kfree(ss->current_urb); goto BadDevice;
kfree(ss);
usb_put_dev(dev);
return NULL;
}
/* /*
* Since this is a new device, we need to generate a scsi * Since this is a new device, we need to generate a scsi
...@@ -1001,16 +972,13 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, ...@@ -1001,16 +972,13 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum,
/* start up our control thread */ /* start up our control thread */
atomic_set(&ss->sm_state, US_STATE_IDLE); atomic_set(&ss->sm_state, US_STATE_IDLE);
atomic_set(&ss->device_state, US_STATE_ATTACHED); set_bit(DEV_ATTACHED, &ss->bitflags);
ss->pid = kernel_thread(usb_stor_control_thread, ss, ss->pid = kernel_thread(usb_stor_control_thread, ss,
CLONE_VM); CLONE_VM);
if (ss->pid < 0) { if (ss->pid < 0) {
printk(KERN_WARNING USB_STORAGE printk(KERN_WARNING USB_STORAGE
"Unable to start control thread\n"); "Unable to start control thread\n");
kfree(ss->current_urb); goto BadDevice;
kfree(ss);
usb_put_dev(dev);
return NULL;
} }
/* wait for the thread to start */ /* wait for the thread to start */
...@@ -1018,7 +986,17 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, ...@@ -1018,7 +986,17 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum,
/* now register - our detect function will be called */ /* now register - our detect function will be called */
ss->htmplt.module = THIS_MODULE; ss->htmplt.module = THIS_MODULE;
scsi_register_host(&(ss->htmplt)); result = scsi_register_host(&(ss->htmplt));
if (result) {
printk(KERN_WARNING USB_STORAGE
"Unable to register the scsi host\n");
/* tell the control thread to exit */
ss->action = US_ACT_EXIT;
up(&ss->sema);
wait_for_completion(&ss->notify);
goto BadDevice;
}
/* lock access to the data structures */ /* lock access to the data structures */
down(&us_list_semaphore); down(&us_list_semaphore);
...@@ -1038,6 +1016,31 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum, ...@@ -1038,6 +1016,31 @@ static void * storage_probe(struct usb_device *dev, unsigned int ifnum,
/* return a pointer for the disconnect function */ /* return a pointer for the disconnect function */
return ss; return ss;
/* we come here if there are any problems */
BadDevice:
US_DEBUGP("storage_probe() failed\n");
down(&ss->irq_urb_sem);
if (ss->irq_urb) {
usb_unlink_urb(ss->irq_urb);
usb_free_urb(ss->irq_urb);
ss->irq_urb = NULL;
}
up(&ss->irq_urb_sem);
if (ss->current_urb) {
usb_unlink_urb(ss->current_urb);
usb_free_urb(ss->current_urb);
ss->current_urb = NULL;
}
clear_bit(DEV_ATTACHED, &ss->bitflags);
ss->pusb_dev = NULL;
if (new_device)
kfree(ss);
else
up(&ss->dev_semaphore);
usb_put_dev(dev);
return NULL;
} }
/* Handle a disconnect event from the USB core */ /* Handle a disconnect event from the USB core */
...@@ -1078,7 +1081,7 @@ static void storage_disconnect(struct usb_device *dev, void *ptr) ...@@ -1078,7 +1081,7 @@ static void storage_disconnect(struct usb_device *dev, void *ptr)
/* mark the device as gone */ /* mark the device as gone */
usb_put_dev(ss->pusb_dev); usb_put_dev(ss->pusb_dev);
ss->pusb_dev = NULL; ss->pusb_dev = NULL;
atomic_set(&ss->sm_state, US_STATE_DETACHED); clear_bit(DEV_ATTACHED, &ss->bitflags);
/* unlock access to the device data structure */ /* unlock access to the device data structure */
up(&(ss->dev_semaphore)); up(&(ss->dev_semaphore));
......
...@@ -103,9 +103,10 @@ struct us_unusual_dev { ...@@ -103,9 +103,10 @@ struct us_unusual_dev {
#define US_FL_SCM_MULT_TARG 0x00000020 /* supports multiple targets */ #define US_FL_SCM_MULT_TARG 0x00000020 /* supports multiple targets */
#define US_FL_FIX_INQUIRY 0x00000040 /* INQUIRY response needs fixing */ #define US_FL_FIX_INQUIRY 0x00000040 /* INQUIRY response needs fixing */
/* device attached/detached states */
#define US_STATE_DETACHED 1 /* kernel thread actions */
#define US_STATE_ATTACHED 2 #define US_ACT_COMMAND 1
#define US_ACT_EXIT 5
/* processing state machine states */ /* processing state machine states */
#define US_STATE_IDLE 1 #define US_STATE_IDLE 1
...@@ -127,10 +128,9 @@ struct us_data { ...@@ -127,10 +128,9 @@ struct us_data {
/* The device we're working with /* The device we're working with
* It's important to note: * It's important to note:
* (o) you must hold dev_semaphore to change pusb_dev * (o) you must hold dev_semaphore to change pusb_dev
* (o) device_state should change whenever pusb_dev does * (o) DEV_ATTACHED in bitflags should change whenever pusb_dev does
*/ */
struct semaphore dev_semaphore; /* protect pusb_dev */ struct semaphore dev_semaphore; /* protect pusb_dev */
atomic_t device_state; /* attached or detached */
struct usb_device *pusb_dev; /* this usb_device */ struct usb_device *pusb_dev; /* this usb_device */
unsigned int flags; /* from filter initially */ unsigned int flags; /* from filter initially */
...@@ -174,6 +174,7 @@ struct us_data { ...@@ -174,6 +174,7 @@ struct us_data {
struct semaphore ip_waitq; /* for CBI interrupts */ struct semaphore ip_waitq; /* for CBI interrupts */
unsigned long bitflags; /* single-bit flags: */ unsigned long bitflags; /* single-bit flags: */
#define IP_WANTED 1 /* is an IRQ expected? */ #define IP_WANTED 1 /* is an IRQ expected? */
#define DEV_ATTACHED 2 /* is the dev. attached?*/
/* interrupt communications data */ /* interrupt communications data */
struct semaphore irq_urb_sem; /* to protect irq_urb */ struct semaphore irq_urb_sem; /* to protect irq_urb */
......
...@@ -12,9 +12,6 @@ ...@@ -12,9 +12,6 @@
* 24 January 2000 * 24 January 2000
* Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation * Changed sys_poll()/do_poll() to use PAGE_SIZE chunk-based allocation
* of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian). * of fds to overcome nfds < 16390 descriptors limit (Tigran Aivazian).
*
* Dec 2001
* Stack allocation and fast path (Andi Kleen)
*/ */
#include <linux/slab.h> #include <linux/slab.h>
...@@ -29,6 +26,21 @@ ...@@ -29,6 +26,21 @@
#define ROUND_UP(x,y) (((x)+(y)-1)/(y)) #define ROUND_UP(x,y) (((x)+(y)-1)/(y))
#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) #define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)
struct poll_table_entry {
struct file * filp;
wait_queue_t wait;
wait_queue_head_t * wait_address;
};
struct poll_table_page {
struct poll_table_page * next;
struct poll_table_entry * entry;
struct poll_table_entry entries[0];
};
#define POLL_TABLE_FULL(table) \
((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
/* /*
* Ok, Peter made a complicated, but straightforward multiple_wait() function. * Ok, Peter made a complicated, but straightforward multiple_wait() function.
* I have rewritten this, taking some shortcuts: This code may not be easy to * I have rewritten this, taking some shortcuts: This code may not be easy to
...@@ -50,39 +62,30 @@ void poll_freewait(poll_table* pt) ...@@ -50,39 +62,30 @@ void poll_freewait(poll_table* pt)
struct poll_table_page *old; struct poll_table_page *old;
entry = p->entry; entry = p->entry;
while (entry > p->entries) { do {
entry--; entry--;
remove_wait_queue(entry->wait_address,&entry->wait); remove_wait_queue(entry->wait_address,&entry->wait);
fput(entry->filp); fput(entry->filp);
} } while (entry > p->entries);
old = p; old = p;
p = p->next; p = p->next;
if (old != &pt->inline_page) free_page((unsigned long) old);
free_page((unsigned long) old);
} }
} }
void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
{ {
struct poll_table_page *table = p->table; struct poll_table_page *table = p->table;
struct poll_table_page *new_table = NULL;
int sz;
if (!table) {
new_table = &p->inline_page;
} else {
sz = (table == &p->inline_page) ? POLL_INLINE_TABLE_LEN : PAGE_SIZE;
if ((char*)table->entry >= (char*)table + sz) {
new_table = (struct poll_table_page *)__get_free_page(GFP_KERNEL);
if (!new_table) {
p->error = -ENOMEM;
__set_current_state(TASK_RUNNING);
return;
}
}
}
if (new_table) { if (!table || POLL_TABLE_FULL(table)) {
struct poll_table_page *new_table;
new_table = (struct poll_table_page *) __get_free_page(GFP_KERNEL);
if (!new_table) {
p->error = -ENOMEM;
__set_current_state(TASK_RUNNING);
return;
}
new_table->entry = new_table->entries; new_table->entry = new_table->entries;
new_table->next = table; new_table->next = table;
p->table = new_table; p->table = new_table;
...@@ -110,6 +113,48 @@ void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table ...@@ -110,6 +113,48 @@ void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table
#define BITS(fds, n) (*__IN(fds, n)|*__OUT(fds, n)|*__EX(fds, n)) #define BITS(fds, n) (*__IN(fds, n)|*__OUT(fds, n)|*__EX(fds, n))
static int max_select_fd(unsigned long n, fd_set_bits *fds)
{
unsigned long *open_fds;
unsigned long set;
int max;
/* handle last in-complete long-word first */
set = ~(~0UL << (n & (__NFDBITS-1)));
n /= __NFDBITS;
open_fds = current->files->open_fds->fds_bits+n;
max = 0;
if (set) {
set &= BITS(fds, n);
if (set) {
if (!(set & ~*open_fds))
goto get_max;
return -EBADF;
}
}
while (n) {
open_fds--;
n--;
set = BITS(fds, n);
if (!set)
continue;
if (set & ~*open_fds)
return -EBADF;
if (max)
continue;
get_max:
do {
max++;
set >>= 1;
} while (set);
max += n * __NFDBITS;
}
return max;
}
#define BIT(i) (1UL << ((i)&(__NFDBITS-1)))
#define MEM(i,m) ((m)+(unsigned)(i)/__NFDBITS)
#define ISSET(i,m) (((i)&*(m)) != 0) #define ISSET(i,m) (((i)&*(m)) != 0)
#define SET(i,m) (*(m) |= (i)) #define SET(i,m) (*(m) |= (i))
...@@ -120,71 +165,56 @@ void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table ...@@ -120,71 +165,56 @@ void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table
int do_select(int n, fd_set_bits *fds, long *timeout) int do_select(int n, fd_set_bits *fds, long *timeout)
{ {
poll_table table, *wait; poll_table table, *wait;
int retval, off, max, maxoff; int retval, i, off;
long __timeout = *timeout; long __timeout = *timeout;
read_lock(&current->files->file_lock);
retval = max_select_fd(n, fds);
read_unlock(&current->files->file_lock);
if (retval < 0)
return retval;
n = retval;
poll_initwait(&table); poll_initwait(&table);
wait = &table; wait = &table;
if (!__timeout) if (!__timeout)
wait = NULL; wait = NULL;
retval = 0; retval = 0;
maxoff = n/BITS_PER_LONG;
max = 0;
for (;;) { for (;;) {
set_current_state(TASK_INTERRUPTIBLE); set_current_state(TASK_INTERRUPTIBLE);
for (off = 0; off <= maxoff; off++) { for (i = 0 ; i < n; i++) {
unsigned long val = BITS(fds, off); unsigned long bit = BIT(i);
unsigned long mask;
struct file *file;
if (!val) off = i / __NFDBITS;
if (!(bit & BITS(fds, off)))
continue; continue;
while (val) { file = fget(i);
int k = ffz(~val); mask = POLLNVAL;
unsigned long mask, bit; if (file) {
struct file *file; mask = DEFAULT_POLLMASK;
if (file->f_op && file->f_op->poll)
if (k > n%BITS_PER_LONG) mask = file->f_op->poll(file, wait);
break; fput(file);
}
bit = (1UL << k); if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) {
val &= ~bit; SET(bit, __RES_IN(fds,off));
retval++;
file = fget((off * BITS_PER_LONG) + k); wait = NULL;
mask = POLLNVAL; }
if (file) { if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) {
mask = DEFAULT_POLLMASK; SET(bit, __RES_OUT(fds,off));
if (file->f_op && file->f_op->poll) retval++;
mask = file->f_op->poll(file, wait); wait = NULL;
fput(file); }
} else { if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) {
/* This error will shadow all other results. SET(bit, __RES_EX(fds,off));
* This matches previous linux behaviour */ retval++;
retval = -EBADF; wait = NULL;
goto out;
}
if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) {
SET(bit, __RES_IN(fds,off));
retval++;
wait = NULL;
}
if ((mask& POLLOUT_SET) && ISSET(bit,__OUT(fds,off))) {
SET(bit, __RES_OUT(fds,off));
retval++;
wait = NULL;
}
if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) {
SET(bit, __RES_EX(fds,off));
retval++;
wait = NULL;
}
if (!(val &= ~bit))
break;
} }
} }
maxoff = max;
wait = NULL; wait = NULL;
if (retval || !__timeout || signal_pending(current)) if (retval || !__timeout || signal_pending(current))
break; break;
...@@ -194,43 +224,25 @@ int do_select(int n, fd_set_bits *fds, long *timeout) ...@@ -194,43 +224,25 @@ int do_select(int n, fd_set_bits *fds, long *timeout)
} }
__timeout = schedule_timeout(__timeout); __timeout = schedule_timeout(__timeout);
} }
out:
current->state = TASK_RUNNING; current->state = TASK_RUNNING;
poll_freewait(&table); poll_freewait(&table);
/* /*
* Update the caller timeout. * Up-to-date the caller timeout.
*/ */
*timeout = __timeout; *timeout = __timeout;
return retval; return retval;
} }
/* static void *select_bits_alloc(int size)
* We do a VERIFY_WRITE here even though we are only reading this time: {
* we'll write to it eventually.. return kmalloc(6 * size, GFP_KERNEL);
*/ }
static int get_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset) static void select_bits_free(void *bits, int size)
{ {
unsigned long rounded = FDS_BYTES(nr), mask; kfree(bits);
if (ufdset) {
int error = verify_area(VERIFY_WRITE, ufdset, rounded);
if (!error && __copy_from_user(fdset, ufdset, rounded))
error = -EFAULT;
if (nr % __NFDBITS == 0)
mask = 0;
else {
/* This includes one bit too much according to SU;
but without this some programs hang. */
mask = ~(~0UL << (nr%__NFDBITS));
}
fdset[nr/__NFDBITS] &= mask;
return error;
}
memset(fdset, 0, rounded);
return 0;
} }
/* /*
...@@ -251,7 +263,6 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp) ...@@ -251,7 +263,6 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
char *bits; char *bits;
long timeout; long timeout;
int ret, size, max_fdset; int ret, size, max_fdset;
char stack_bits[FDS_BYTES(FAST_SELECT_MAX) * 6];
timeout = MAX_SCHEDULE_TIMEOUT; timeout = MAX_SCHEDULE_TIMEOUT;
if (tvp) { if (tvp) {
...@@ -286,16 +297,11 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp) ...@@ -286,16 +297,11 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
* since we used fdset we need to allocate memory in units of * since we used fdset we need to allocate memory in units of
* long-words. * long-words.
*/ */
ret = -ENOMEM;
size = FDS_BYTES(n); size = FDS_BYTES(n);
if (n < FAST_SELECT_MAX) { bits = select_bits_alloc(size);
bits = stack_bits; if (!bits)
} else { goto out_nofds;
ret = -ENOMEM;
bits = kmalloc(6*size, GFP_KERNEL);
if (!bits)
goto out_nofds;
}
fds.in = (unsigned long *) bits; fds.in = (unsigned long *) bits;
fds.out = (unsigned long *) (bits + size); fds.out = (unsigned long *) (bits + size);
fds.ex = (unsigned long *) (bits + 2*size); fds.ex = (unsigned long *) (bits + 2*size);
...@@ -307,7 +313,9 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp) ...@@ -307,7 +313,9 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
(ret = get_fd_set(n, outp, fds.out)) || (ret = get_fd_set(n, outp, fds.out)) ||
(ret = get_fd_set(n, exp, fds.ex))) (ret = get_fd_set(n, exp, fds.ex)))
goto out; goto out;
memset(fds.res_in, 0, 3*size); zero_fd_set(n, fds.res_in);
zero_fd_set(n, fds.res_out);
zero_fd_set(n, fds.res_ex);
ret = do_select(n, &fds, &timeout); ret = do_select(n, &fds, &timeout);
...@@ -318,8 +326,8 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp) ...@@ -318,8 +326,8 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
usec = timeout % HZ; usec = timeout % HZ;
usec *= (1000000/HZ); usec *= (1000000/HZ);
} }
__put_user(sec, &tvp->tv_sec); put_user(sec, &tvp->tv_sec);
__put_user(usec, &tvp->tv_usec); put_user(usec, &tvp->tv_usec);
} }
if (ret < 0) if (ret < 0)
...@@ -336,10 +344,8 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp) ...@@ -336,10 +344,8 @@ sys_select(int n, fd_set *inp, fd_set *outp, fd_set *exp, struct timeval *tvp)
set_fd_set(n, exp, fds.res_ex); set_fd_set(n, exp, fds.res_ex);
out: out:
if (n >= FAST_SELECT_MAX) select_bits_free(bits, size);
kfree(bits);
out_nofds: out_nofds:
return ret; return ret;
} }
...@@ -404,42 +410,12 @@ static int do_poll(unsigned int nfds, unsigned int nchunks, unsigned int nleft, ...@@ -404,42 +410,12 @@ static int do_poll(unsigned int nfds, unsigned int nchunks, unsigned int nleft,
return count; return count;
} }
static int fast_poll(poll_table *table, poll_table *wait, struct pollfd *ufds,
unsigned int nfds, long timeout)
{
poll_table *pt = wait;
struct pollfd fds[FAST_POLL_MAX];
int count, i;
if (copy_from_user(fds, ufds, nfds * sizeof(struct pollfd)))
return -EFAULT;
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
count = 0;
do_pollfd(nfds, fds, &pt, &count);
pt = NULL;
if (count || !timeout || signal_pending(current))
break;
count = wait->error;
if (count)
break;
timeout = schedule_timeout(timeout);
}
current->state = TASK_RUNNING;
for (i = 0; i < nfds; i++)
__put_user(fds[i].revents, &ufds[i].revents);
poll_freewait(table);
if (!count && signal_pending(current))
return -EINTR;
return count;
}
asmlinkage long sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout) asmlinkage long sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout)
{ {
int i, j, err, fdcount; int i, j, fdcount, err;
struct pollfd **fds; struct pollfd **fds;
poll_table table, *wait; poll_table table, *wait;
int nchunks, nleft; int nchunks, nleft;
/* Do a sanity check on nfds ... */ /* Do a sanity check on nfds ... */
if (nfds > NR_OPEN) if (nfds > NR_OPEN)
...@@ -453,45 +429,43 @@ asmlinkage long sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout) ...@@ -453,45 +429,43 @@ asmlinkage long sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout)
timeout = MAX_SCHEDULE_TIMEOUT; timeout = MAX_SCHEDULE_TIMEOUT;
} }
poll_initwait(&table); poll_initwait(&table);
wait = &table; wait = &table;
if (!timeout) if (!timeout)
wait = NULL; wait = NULL;
if (nfds < FAST_POLL_MAX)
return fast_poll(&table, wait, ufds, nfds, timeout);
err = -ENOMEM; err = -ENOMEM;
fds = (struct pollfd **)kmalloc( fds = NULL;
(1 + (nfds - 1) / POLLFD_PER_PAGE) * sizeof(struct pollfd *), if (nfds != 0) {
GFP_KERNEL); fds = (struct pollfd **)kmalloc(
if (fds == NULL) (1 + (nfds - 1) / POLLFD_PER_PAGE) * sizeof(struct pollfd *),
goto out; GFP_KERNEL);
if (fds == NULL)
goto out;
}
nchunks = 0; nchunks = 0;
nleft = nfds; nleft = nfds;
while (nleft > POLLFD_PER_PAGE) { while (nleft > POLLFD_PER_PAGE) { /* allocate complete PAGE_SIZE chunks */
fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL); fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL);
if (fds[nchunks] == NULL) if (fds[nchunks] == NULL)
goto out_fds; goto out_fds;
nchunks++; nchunks++;
nleft -= POLLFD_PER_PAGE; nleft -= POLLFD_PER_PAGE;
} }
if (nleft) { if (nleft) { /* allocate last PAGE_SIZE chunk, only nleft elements used */
fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL); fds[nchunks] = (struct pollfd *)__get_free_page(GFP_KERNEL);
if (fds[nchunks] == NULL) if (fds[nchunks] == NULL)
goto out_fds; goto out_fds;
} }
err = -EFAULT; err = -EFAULT;
for (i=0; i < nchunks; i++) for (i=0; i < nchunks; i++)
if (copy_from_user(fds[i], ufds + i*POLLFD_PER_PAGE, PAGE_SIZE)) if (copy_from_user(fds[i], ufds + i*POLLFD_PER_PAGE, PAGE_SIZE))
goto out_fds1; goto out_fds1;
if (nleft) { if (nleft) {
if (copy_from_user(fds[nchunks], ufds + nchunks*POLLFD_PER_PAGE, if (copy_from_user(fds[nchunks], ufds + nchunks*POLLFD_PER_PAGE,
nleft * sizeof(struct pollfd))) nleft * sizeof(struct pollfd)))
goto out_fds1; goto out_fds1;
} }
...@@ -515,7 +489,8 @@ asmlinkage long sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout) ...@@ -515,7 +489,8 @@ asmlinkage long sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout)
out_fds: out_fds:
for (i=0; i < nchunks; i++) for (i=0; i < nchunks; i++)
free_page((unsigned long)(fds[i])); free_page((unsigned long)(fds[i]));
kfree(fds); if (nfds != 0)
kfree(fds);
out: out:
poll_freewait(&table); poll_freewait(&table);
return err; return err;
......
...@@ -10,32 +10,13 @@ ...@@ -10,32 +10,13 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#define POLL_INLINE_BYTES 256 struct poll_table_page;
#define FAST_SELECT_MAX 128
#define FAST_POLL_MAX 128
#define POLL_INLINE_ENTRIES (1+(POLL_INLINE_BYTES / sizeof(struct poll_table_entry)))
struct poll_table_entry {
struct file * filp;
wait_queue_t wait;
wait_queue_head_t * wait_address;
};
struct poll_table_page {
struct poll_table_page * next;
struct poll_table_entry * entry;
struct poll_table_entry entries[0];
};
typedef struct poll_table_struct { typedef struct poll_table_struct {
int error; int error;
struct poll_table_page * table; struct poll_table_page * table;
struct poll_table_page inline_page;
struct poll_table_entry inline_table[POLL_INLINE_ENTRIES];
} poll_table; } poll_table;
#define POLL_INLINE_TABLE_LEN (sizeof(poll_table) - offsetof(poll_table, inline_page))
extern void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p); extern void __pollwait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p);
static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p) static inline void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
...@@ -49,7 +30,6 @@ static inline void poll_initwait(poll_table* pt) ...@@ -49,7 +30,6 @@ static inline void poll_initwait(poll_table* pt)
pt->error = 0; pt->error = 0;
pt->table = NULL; pt->table = NULL;
} }
extern void poll_freewait(poll_table* pt); extern void poll_freewait(poll_table* pt);
...@@ -69,6 +49,27 @@ typedef struct { ...@@ -69,6 +49,27 @@ typedef struct {
#define FDS_LONGS(nr) (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG) #define FDS_LONGS(nr) (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG)
#define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long)) #define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long))
/*
* We do a VERIFY_WRITE here even though we are only reading this time:
* we'll write to it eventually..
*
* Use "unsigned long" accesses to let user-mode fd_set's be long-aligned.
*/
static inline
int get_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset)
{
nr = FDS_BYTES(nr);
if (ufdset) {
int error;
error = verify_area(VERIFY_WRITE, ufdset, nr);
if (!error && __copy_from_user(fdset, ufdset, nr))
error = -EFAULT;
return error;
}
memset(fdset, 0, nr);
return 0;
}
static inline static inline
void set_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset) void set_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset)
{ {
...@@ -76,6 +77,12 @@ void set_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset) ...@@ -76,6 +77,12 @@ void set_fd_set(unsigned long nr, void *ufdset, unsigned long *fdset)
__copy_to_user(ufdset, fdset, FDS_BYTES(nr)); __copy_to_user(ufdset, fdset, FDS_BYTES(nr));
} }
static inline
void zero_fd_set(unsigned long nr, unsigned long *fdset)
{
memset(fdset, 0, FDS_BYTES(nr));
}
extern int do_select(int n, fd_set_bits *fds, long *timeout); extern int do_select(int n, fd_set_bits *fds, long *timeout);
#endif /* KERNEL */ #endif /* KERNEL */
......
...@@ -63,8 +63,6 @@ ...@@ -63,8 +63,6 @@
extern int md_size[MAX_MD_DEVS]; extern int md_size[MAX_MD_DEVS];
extern struct hd_struct md_hd_struct[MAX_MD_DEVS]; extern struct hd_struct md_hd_struct[MAX_MD_DEVS];
extern void add_mddev_mapping (mddev_t *mddev, kdev_t dev, void *data);
extern void del_mddev_mapping (mddev_t *mddev, kdev_t dev);
extern char * partition_name (kdev_t dev); extern char * partition_name (kdev_t dev);
extern inline char * bdev_partition_name (struct block_device *bdev) extern inline char * bdev_partition_name (struct block_device *bdev)
{ {
...@@ -77,14 +75,9 @@ extern mdk_thread_t * md_register_thread (void (*run) (void *data), ...@@ -77,14 +75,9 @@ extern mdk_thread_t * md_register_thread (void (*run) (void *data),
extern void md_unregister_thread (mdk_thread_t *thread); extern void md_unregister_thread (mdk_thread_t *thread);
extern void md_wakeup_thread(mdk_thread_t *thread); extern void md_wakeup_thread(mdk_thread_t *thread);
extern void md_interrupt_thread (mdk_thread_t *thread); extern void md_interrupt_thread (mdk_thread_t *thread);
extern int md_update_sb (mddev_t *mddev); extern void md_update_sb (mddev_t *mddev);
extern int md_do_sync(mddev_t *mddev, mdp_disk_t *spare);
extern void md_done_sync(mddev_t *mddev, int blocks, int ok); extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
extern void md_sync_acct(kdev_t dev, unsigned long nr_sectors); extern void md_sync_acct(kdev_t dev, unsigned long nr_sectors);
extern void md_recover_arrays (void);
extern int md_check_ordering (mddev_t *mddev);
extern int md_notify_reboot(struct notifier_block *this,
unsigned long code, void *x);
extern int md_error (mddev_t *mddev, struct block_device *bdev); extern int md_error (mddev_t *mddev, struct block_device *bdev);
extern int md_run_setup(void); extern int md_run_setup(void);
......
...@@ -64,24 +64,6 @@ typedef struct mdk_rdev_s mdk_rdev_t; ...@@ -64,24 +64,6 @@ typedef struct mdk_rdev_s mdk_rdev_t;
#define MAX_MD_DEVS (1<<MINORBITS) /* Max number of md dev */ #define MAX_MD_DEVS (1<<MINORBITS) /* Max number of md dev */
/*
* Maps a kdev to an mddev/subdev. How 'data' is handled is up to
* the personality. (eg. HSM uses this to identify individual LVs)
*/
typedef struct dev_mapping_s {
mddev_t *mddev;
void *data;
} dev_mapping_t;
extern dev_mapping_t mddev_map [MAX_MD_DEVS];
static inline mddev_t * kdev_to_mddev (kdev_t dev)
{
if (major(dev) != MD_MAJOR)
BUG();
return mddev_map[minor(dev)].mddev;
}
/* /*
* options passed in raidrun: * options passed in raidrun:
*/ */
...@@ -196,31 +178,38 @@ struct mddev_s ...@@ -196,31 +178,38 @@ struct mddev_s
mdk_personality_t *pers; mdk_personality_t *pers;
int __minor; int __minor;
mdp_super_t *sb; mdp_super_t *sb;
int nb_dev;
struct list_head disks; struct list_head disks;
int sb_dirty; int sb_dirty;
mdu_param_t param;
int ro; int ro;
struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */
unsigned long curr_resync; /* blocks scheduled */ unsigned long curr_resync; /* blocks scheduled */
unsigned long resync_mark; /* a recent timestamp */ unsigned long resync_mark; /* a recent timestamp */
unsigned long resync_mark_cnt;/* blocks written at resync_mark */ unsigned long resync_mark_cnt;/* blocks written at resync_mark */
char *name; /* recovery_running is 0 for no recovery/resync,
* 1 for active recovery
* 2 for active resync
* -error for an error (e.g. -EINTR)
* it can only be set > 0 under reconfig_sem
*/
int recovery_running; int recovery_running;
int in_sync; /* know to not need resync */
struct semaphore reconfig_sem; struct semaphore reconfig_sem;
struct semaphore recovery_sem;
struct semaphore resync_sem;
atomic_t active; atomic_t active;
mdp_disk_t *spare;
atomic_t recovery_active; /* blocks scheduled, but not written */ atomic_t recovery_active; /* blocks scheduled, but not written */
wait_queue_head_t recovery_wait; wait_queue_head_t recovery_wait;
request_queue_t queue; /* for plugging ... */
struct list_head all_mddevs; struct list_head all_mddevs;
}; };
struct mdk_personality_s struct mdk_personality_s
{ {
char *name; char *name;
int (*make_request)(mddev_t *mddev, int rw, struct bio *bio); int (*make_request)(request_queue_t *q, struct bio *bio);
int (*run)(mddev_t *mddev); int (*run)(mddev_t *mddev);
int (*stop)(mddev_t *mddev); int (*stop)(mddev_t *mddev);
int (*status)(char *page, mddev_t *mddev); int (*status)(char *page, mddev_t *mddev);
...@@ -237,9 +226,6 @@ struct mdk_personality_s ...@@ -237,9 +226,6 @@ struct mdk_personality_s
* SPARE_ACTIVE expects such a change) * SPARE_ACTIVE expects such a change)
*/ */
int (*diskop) (mddev_t *mddev, mdp_disk_t **descriptor, int state); int (*diskop) (mddev_t *mddev, mdp_disk_t **descriptor, int state);
int (*stop_resync)(mddev_t *mddev);
int (*restart_resync)(mddev_t *mddev);
int (*sync_request)(mddev_t *mddev, sector_t sector_nr, int go_faster); int (*sync_request)(mddev_t *mddev, sector_t sector_nr, int go_faster);
}; };
...@@ -279,13 +265,6 @@ extern mdp_disk_t *get_spare(mddev_t *mddev); ...@@ -279,13 +265,6 @@ extern mdp_disk_t *get_spare(mddev_t *mddev);
#define ITERATE_RDEV(mddev,rdev,tmp) \ #define ITERATE_RDEV(mddev,rdev,tmp) \
ITERATE_RDEV_GENERIC((mddev)->disks,same_set,rdev,tmp) ITERATE_RDEV_GENERIC((mddev)->disks,same_set,rdev,tmp)
/*
* Same as above, but assumes that the device has rdev->desc_nr numbered
* from 0 to mddev->nb_dev, and iterates through rdevs in ascending order.
*/
#define ITERATE_RDEV_ORDERED(mddev,rdev,i) \
for (i = 0; rdev = find_rdev_nr(mddev, i), i < mddev->nb_dev; i++)
/* /*
* Iterates through all 'RAID managed disks' * Iterates through all 'RAID managed disks'
...@@ -299,26 +278,6 @@ extern mdp_disk_t *get_spare(mddev_t *mddev); ...@@ -299,26 +278,6 @@ extern mdp_disk_t *get_spare(mddev_t *mddev);
#define ITERATE_RDEV_PENDING(rdev,tmp) \ #define ITERATE_RDEV_PENDING(rdev,tmp) \
ITERATE_RDEV_GENERIC(pending_raid_disks,pending,rdev,tmp) ITERATE_RDEV_GENERIC(pending_raid_disks,pending,rdev,tmp)
/*
* iterates through all used mddevs in the system.
*/
#define ITERATE_MDDEV(mddev,tmp) \
\
for (tmp = all_mddevs.next; \
mddev = list_entry(tmp, mddev_t, all_mddevs), \
tmp = tmp->next, tmp->prev != &all_mddevs \
; )
static inline int lock_mddev (mddev_t * mddev)
{
return down_interruptible(&mddev->reconfig_sem);
}
static inline void unlock_mddev (mddev_t * mddev)
{
up(&mddev->reconfig_sem);
}
#define xchg_values(x,y) do { __typeof__(x) __tmp = x; \ #define xchg_values(x,y) do { __typeof__(x) __tmp = x; \
x = y; y = __tmp; } while (0) x = y; y = __tmp; } while (0)
......
...@@ -33,8 +33,7 @@ struct r1_private_data_s { ...@@ -33,8 +33,7 @@ struct r1_private_data_s {
int working_disks; int working_disks;
int last_used; int last_used;
sector_t next_seq_sect; sector_t next_seq_sect;
mdk_thread_t *thread, *resync_thread; mdk_thread_t *thread;
int resync_mirrors;
mirror_info_t *spare; mirror_info_t *spare;
spinlock_t device_lock; spinlock_t device_lock;
......
...@@ -177,7 +177,7 @@ struct stripe_head { ...@@ -177,7 +177,7 @@ struct stripe_head {
* is put on a "delayed" queue until there are no stripes currently * is put on a "delayed" queue until there are no stripes currently
* in a pre-read phase. Further, if the "delayed" queue is empty when * in a pre-read phase. Further, if the "delayed" queue is empty when
* a stripe is put on it then we "plug" the queue and do not process it * a stripe is put on it then we "plug" the queue and do not process it
* until an unplg call is made. (the tq_disk list is run). * until an unplug call is made. (blk_run_queues is run).
* *
* When preread is initiated on a stripe, we set PREREAD_ACTIVE and add * When preread is initiated on a stripe, we set PREREAD_ACTIVE and add
* it to the count of prereading stripes. * it to the count of prereading stripes.
...@@ -205,12 +205,11 @@ struct disk_info { ...@@ -205,12 +205,11 @@ struct disk_info {
struct raid5_private_data { struct raid5_private_data {
struct stripe_head **stripe_hashtbl; struct stripe_head **stripe_hashtbl;
mddev_t *mddev; mddev_t *mddev;
mdk_thread_t *thread, *resync_thread; mdk_thread_t *thread;
struct disk_info disks[MD_SB_DISKS]; struct disk_info disks[MD_SB_DISKS];
struct disk_info *spare; struct disk_info *spare;
int chunk_size, level, algorithm; int chunk_size, level, algorithm;
int raid_disks, working_disks, failed_disks; int raid_disks, working_disks, failed_disks;
int resync_parity;
int max_nr_stripes; int max_nr_stripes;
struct list_head handle_list; /* stripes needing handling */ struct list_head handle_list; /* stripes needing handling */
...@@ -229,9 +228,6 @@ struct raid5_private_data { ...@@ -229,9 +228,6 @@ struct raid5_private_data {
* waiting for 25% to be free * waiting for 25% to be free
*/ */
spinlock_t device_lock; spinlock_t device_lock;
int plugged;
struct tq_struct plug_tq;
}; };
typedef struct raid5_private_data raid5_conf_t; typedef struct raid5_private_data raid5_conf_t;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment