Commit 313d90b7 authored by Neil Brown's avatar Neil Brown Committed by Christoph Hellwig

[PATCH] Initial md/raid5 support for 2.5 (with bio)

With this patch raid5 works.  There is still some more
work to though.

- uses bio instead of buffer_head
- stripe cache is now a fixed size.
   If read requests are smaller, we read the whole block anyway
   If write reqeusts are smaller, we pre-read.
- stripe_head is now variable sized with an array of structures at
  the end.  We allocate extra space depending on how many devices
  are in the array.
  stripe_head has it's very own slab cache.
- store and use bdev for each device in array

by-passing the cache for reads is currently disabled.  I need to
think through the implications (and implementation) of allowing
large bion that are larger than the stripe cache to go directly
to the device (if it isn't failed of-course).
parent 7d684b93
This diff is collapsed.
...@@ -26,31 +26,30 @@ ...@@ -26,31 +26,30 @@
static struct xor_block_template *active_template; static struct xor_block_template *active_template;
void void
xor_block(unsigned int count, struct buffer_head **bh_ptr) xor_block(unsigned int count, unsigned int bytes, void **ptr)
{ {
unsigned long *p0, *p1, *p2, *p3, *p4; unsigned long *p0, *p1, *p2, *p3, *p4;
unsigned long bytes = bh_ptr[0]->b_size;
p0 = (unsigned long *) bh_ptr[0]->b_data; p0 = (unsigned long *) ptr[0];
p1 = (unsigned long *) bh_ptr[1]->b_data; p1 = (unsigned long *) ptr[1];
if (count == 2) { if (count == 2) {
active_template->do_2(bytes, p0, p1); active_template->do_2(bytes, p0, p1);
return; return;
} }
p2 = (unsigned long *) bh_ptr[2]->b_data; p2 = (unsigned long *) ptr[2];
if (count == 3) { if (count == 3) {
active_template->do_3(bytes, p0, p1, p2); active_template->do_3(bytes, p0, p1, p2);
return; return;
} }
p3 = (unsigned long *) bh_ptr[3]->b_data; p3 = (unsigned long *) ptr[3];
if (count == 4) { if (count == 4) {
active_template->do_4(bytes, p0, p1, p2, p3); active_template->do_4(bytes, p0, p1, p2, p3);
return; return;
} }
p4 = (unsigned long *) bh_ptr[4]->b_data; p4 = (unsigned long *) ptr[4];
active_template->do_5(bytes, p0, p1, p2, p3, p4); active_template->do_5(bytes, p0, p1, p2, p3, p4);
} }
......
...@@ -7,21 +7,21 @@ ...@@ -7,21 +7,21 @@
/* /*
* *
* Each stripe contains one buffer per disc. Each buffer can be in * Each stripe contains one buffer per disc. Each buffer can be in
* one of a number of states determined by bh_state. Changes between * one of a number of states stored in "flags". Changes between
* these states happen *almost* exclusively under a per-stripe * these states happen *almost* exclusively under a per-stripe
* spinlock. Some very specific changes can happen in b_end_io, and * spinlock. Some very specific changes can happen in bi_end_io, and
* these are not protected by the spin lock. * these are not protected by the spin lock.
* *
* The bh_state bits that are used to represent these states are: * The flag bits that are used to represent these states are:
* BH_Uptodate, BH_Lock * R5_UPTODATE and R5_LOCKED
* *
* State Empty == !Uptodate, !Lock * State Empty == !UPTODATE, !LOCK
* We have no data, and there is no active request * We have no data, and there is no active request
* State Want == !Uptodate, Lock * State Want == !UPTODATE, LOCK
* A read request is being submitted for this block * A read request is being submitted for this block
* State Dirty == Uptodate, Lock * State Dirty == UPTODATE, LOCK
* Some new data is in this buffer, and it is being written out * Some new data is in this buffer, and it is being written out
* State Clean == Uptodate, !Lock * State Clean == UPTODATE, !LOCK
* We have valid data which is the same as on disc * We have valid data which is the same as on disc
* *
* The possible state transitions are: * The possible state transitions are:
...@@ -124,24 +124,29 @@ ...@@ -124,24 +124,29 @@
* plus raid5d if it is handling it, plus one for each active request * plus raid5d if it is handling it, plus one for each active request
* on a cached buffer. * on a cached buffer.
*/ */
struct stripe_head { struct stripe_head {
struct stripe_head *hash_next, **hash_pprev; /* hash pointers */ struct stripe_head *hash_next, **hash_pprev; /* hash pointers */
struct list_head lru; /* inactive_list or handle_list */ struct list_head lru; /* inactive_list or handle_list */
struct raid5_private_data *raid_conf; struct raid5_private_data *raid_conf;
struct buffer_head *bh_cache[MD_SB_DISKS]; /* buffered copy */ sector_t sector; /* sector of this row */
struct buffer_head *bh_read[MD_SB_DISKS]; /* read request buffers of the MD device */
struct buffer_head *bh_write[MD_SB_DISKS]; /* write request buffers of the MD device */
struct buffer_head *bh_written[MD_SB_DISKS]; /* write request buffers of the MD device that have been scheduled for write */
struct page *bh_page[MD_SB_DISKS]; /* saved bh_cache[n]->b_page when reading around the cache */
unsigned long sector; /* sector of this row */
int size; /* buffers size */
int pd_idx; /* parity disk index */ int pd_idx; /* parity disk index */
unsigned long state; /* state flags */ unsigned long state; /* state flags */
atomic_t count; /* nr of active thread/requests */ atomic_t count; /* nr of active thread/requests */
spinlock_t lock; spinlock_t lock;
int sync_redone; struct r5dev {
struct bio req;
struct bio_vec vec;
struct page *page;
struct bio *toread, *towrite, *written;
sector_t sector; /* sector of this page */
unsigned long flags;
} dev[1]; /* allocated with extra space depending of RAID geometry */
}; };
/* Flags */
#define R5_UPTODATE 0 /* page contains current data */
#define R5_LOCKED 1 /* IO has been submitted on "req" */
#define R5_OVERWRITE 2 /* towrite covers whole page */
/* /*
* Write method * Write method
...@@ -187,6 +192,7 @@ struct stripe_head { ...@@ -187,6 +192,7 @@ struct stripe_head {
struct disk_info { struct disk_info {
kdev_t dev; kdev_t dev;
struct block_device *bdev;
int operational; int operational;
int number; int number;
int raid_disk; int raid_disk;
...@@ -201,7 +207,6 @@ struct raid5_private_data { ...@@ -201,7 +207,6 @@ struct raid5_private_data {
mdk_thread_t *thread, *resync_thread; mdk_thread_t *thread, *resync_thread;
struct disk_info disks[MD_SB_DISKS]; struct disk_info disks[MD_SB_DISKS];
struct disk_info *spare; struct disk_info *spare;
int buffer_size;
int chunk_size, level, algorithm; int chunk_size, level, algorithm;
int raid_disks, working_disks, failed_disks; int raid_disks, working_disks, failed_disks;
int resync_parity; int resync_parity;
...@@ -210,6 +215,9 @@ struct raid5_private_data { ...@@ -210,6 +215,9 @@ struct raid5_private_data {
struct list_head handle_list; /* stripes needing handling */ struct list_head handle_list; /* stripes needing handling */
struct list_head delayed_list; /* stripes that have plugged requests */ struct list_head delayed_list; /* stripes that have plugged requests */
atomic_t preread_active_stripes; /* stripes with scheduled io */ atomic_t preread_active_stripes; /* stripes with scheduled io */
char cache_name[20];
kmem_cache_t *slab_cache; /* for allocating stripes */
/* /*
* Free stripes pool * Free stripes pool
*/ */
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#define MAX_XOR_BLOCKS 5 #define MAX_XOR_BLOCKS 5
extern void xor_block(unsigned int count, struct buffer_head **bh_ptr); extern void xor_block(unsigned int count, unsigned int bytes, void **ptr);
struct xor_block_template { struct xor_block_template {
struct xor_block_template *next; struct xor_block_template *next;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment