Commit 593060d7 authored by Chris Mason's avatar Chris Mason

Btrfs: Implement raid0 when multiple devices are present

Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 8a4b83cc
...@@ -413,6 +413,7 @@ struct btrfs_csum_item { ...@@ -413,6 +413,7 @@ struct btrfs_csum_item {
#define BTRFS_BLOCK_GROUP_DATA (1 << 0) #define BTRFS_BLOCK_GROUP_DATA (1 << 0)
#define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1) #define BTRFS_BLOCK_GROUP_SYSTEM (1 << 1)
#define BTRFS_BLOCK_GROUP_METADATA (1 << 2) #define BTRFS_BLOCK_GROUP_METADATA (1 << 2)
#define BTRFS_BLOCK_GROUP_RAID0 (1 << 3)
struct btrfs_block_group_item { struct btrfs_block_group_item {
...@@ -498,6 +499,8 @@ struct btrfs_fs_info { ...@@ -498,6 +499,8 @@ struct btrfs_fs_info {
u64 delalloc_bytes; u64 delalloc_bytes;
u64 last_alloc; u64 last_alloc;
u64 last_data_alloc; u64 last_data_alloc;
int extra_data_alloc_bits;
int extra_alloc_bits;
}; };
/* /*
......
...@@ -736,6 +736,8 @@ struct btrfs_root *open_ctree(struct super_block *sb, ...@@ -736,6 +736,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->total_pinned = 0; fs_info->total_pinned = 0;
fs_info->last_alloc = 0; fs_info->last_alloc = 0;
fs_info->last_data_alloc = 0; fs_info->last_data_alloc = 0;
fs_info->extra_alloc_bits = 0;
fs_info->extra_data_alloc_bits = 0;
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info); INIT_WORK(&fs_info->trans_work, btrfs_transaction_cleaner, fs_info);
......
...@@ -172,7 +172,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct ...@@ -172,7 +172,7 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
{ {
return (cache->flags & bits); return (cache->flags & bits) == bits;
} }
static int noinline find_search_start(struct btrfs_root *root, static int noinline find_search_start(struct btrfs_root *root,
...@@ -1010,6 +1010,35 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, ...@@ -1010,6 +1010,35 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
} }
static int update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used,
struct btrfs_space_info **space_info)
{
struct btrfs_space_info *found;
found = __find_space_info(info, flags);
if (found) {
found->total_bytes += total_bytes;
found->bytes_used += bytes_used;
WARN_ON(found->total_bytes < found->bytes_used);
*space_info = found;
return 0;
}
found = kmalloc(sizeof(*found), GFP_NOFS);
if (!found)
return -ENOMEM;
list_add(&found->list, &info->space_info);
found->flags = flags;
found->total_bytes = total_bytes;
found->bytes_used = bytes_used;
found->bytes_pinned = 0;
found->full = 0;
*space_info = found;
return 0;
}
static int do_chunk_alloc(struct btrfs_trans_handle *trans, static int do_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *extent_root, u64 alloc_bytes, struct btrfs_root *extent_root, u64 alloc_bytes,
u64 flags) u64 flags)
...@@ -1021,6 +1050,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, ...@@ -1021,6 +1050,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
int ret; int ret;
space_info = __find_space_info(extent_root->fs_info, flags); space_info = __find_space_info(extent_root->fs_info, flags);
if (!space_info) {
ret = update_space_info(extent_root->fs_info, flags,
0, 0, &space_info);
BUG_ON(ret);
}
BUG_ON(!space_info); BUG_ON(!space_info);
if (space_info->full) if (space_info->full)
...@@ -1044,6 +1078,17 @@ printk("space info full %Lu\n", flags); ...@@ -1044,6 +1078,17 @@ printk("space info full %Lu\n", flags);
extent_root->fs_info->chunk_root->root_key.objectid, extent_root->fs_info->chunk_root->root_key.objectid,
start, num_bytes); start, num_bytes);
BUG_ON(ret); BUG_ON(ret);
if (flags & BTRFS_BLOCK_GROUP_RAID0) {
if (flags & BTRFS_BLOCK_GROUP_DATA) {
extent_root->fs_info->extra_data_alloc_bits =
BTRFS_BLOCK_GROUP_RAID0;
}
if (flags & BTRFS_BLOCK_GROUP_METADATA) {
extent_root->fs_info->extra_alloc_bits =
BTRFS_BLOCK_GROUP_RAID0;
}
}
return 0; return 0;
} }
...@@ -1655,24 +1700,31 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, ...@@ -1655,24 +1700,31 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
struct btrfs_extent_ref *ref; struct btrfs_extent_ref *ref;
struct btrfs_path *path; struct btrfs_path *path;
struct btrfs_key keys[2]; struct btrfs_key keys[2];
int extra_chunk_alloc_bits = 0;
if (data) { if (data) {
data = BTRFS_BLOCK_GROUP_DATA; data = BTRFS_BLOCK_GROUP_DATA | info->extra_data_alloc_bits;
} else if (root == root->fs_info->chunk_root) { } else if (root == root->fs_info->chunk_root) {
data = BTRFS_BLOCK_GROUP_SYSTEM; data = BTRFS_BLOCK_GROUP_SYSTEM;
} else { } else {
data = BTRFS_BLOCK_GROUP_METADATA; data = BTRFS_BLOCK_GROUP_METADATA | info->extra_alloc_bits;
} }
if (btrfs_super_num_devices(&info->super_copy) > 1 &&
!(data & BTRFS_BLOCK_GROUP_SYSTEM))
extra_chunk_alloc_bits = BTRFS_BLOCK_GROUP_RAID0;
if (root->ref_cows) { if (root->ref_cows) {
if (data != BTRFS_BLOCK_GROUP_METADATA) { if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
ret = do_chunk_alloc(trans, root->fs_info->extent_root, ret = do_chunk_alloc(trans, root->fs_info->extent_root,
2 * 1024 * 1024, 2 * 1024 * 1024,
BTRFS_BLOCK_GROUP_METADATA); BTRFS_BLOCK_GROUP_METADATA |
info->extra_alloc_bits |
extra_chunk_alloc_bits);
BUG_ON(ret); BUG_ON(ret);
} }
ret = do_chunk_alloc(trans, root->fs_info->extent_root, ret = do_chunk_alloc(trans, root->fs_info->extent_root,
num_bytes + 2 * 1024 * 1024, data); num_bytes + 2 * 1024 * 1024, data |
extra_chunk_alloc_bits);
BUG_ON(ret); BUG_ON(ret);
} }
...@@ -2627,34 +2679,6 @@ int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path, ...@@ -2627,34 +2679,6 @@ int find_first_block_group(struct btrfs_root *root, struct btrfs_path *path,
return ret; return ret;
} }
static int update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used,
struct btrfs_space_info **space_info)
{
struct btrfs_space_info *found;
found = __find_space_info(info, flags);
if (found) {
found->total_bytes += total_bytes;
found->bytes_used += bytes_used;
WARN_ON(found->total_bytes < found->bytes_used);
*space_info = found;
return 0;
}
found = kmalloc(sizeof(*found), GFP_NOFS);
if (!found)
return -ENOMEM;
list_add(&found->list, &info->space_info);
found->flags = flags;
found->total_bytes = total_bytes;
found->bytes_used = bytes_used;
found->bytes_pinned = 0;
found->full = 0;
*space_info = found;
return 0;
}
int btrfs_read_block_groups(struct btrfs_root *root) int btrfs_read_block_groups(struct btrfs_root *root)
{ {
struct btrfs_path *path; struct btrfs_path *path;
...@@ -2712,6 +2736,16 @@ int btrfs_read_block_groups(struct btrfs_root *root) ...@@ -2712,6 +2736,16 @@ int btrfs_read_block_groups(struct btrfs_root *root)
} else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) { } else if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
bit = BLOCK_GROUP_METADATA; bit = BLOCK_GROUP_METADATA;
} }
if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) {
if (cache->flags & BTRFS_BLOCK_GROUP_DATA) {
info->extra_data_alloc_bits =
BTRFS_BLOCK_GROUP_RAID0;
}
if (cache->flags & BTRFS_BLOCK_GROUP_METADATA) {
info->extra_alloc_bits =
BTRFS_BLOCK_GROUP_RAID0;
}
}
ret = update_space_info(info, cache->flags, found_key.offset, ret = update_space_info(info, cache->flags, found_key.offset,
btrfs_block_group_used(&cache->item), btrfs_block_group_used(&cache->item),
......
...@@ -317,8 +317,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, ...@@ -317,8 +317,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
map_length = length; map_length = length;
ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev); ret = btrfs_map_block(map_tree, logical, &physical, &map_length, &dev);
if (map_length < length + size) { if (map_length < length + size) {
printk("merge bio hook logical %Lu bio len %Lu physical %Lu "
"len %Lu\n", logical, length, physical, map_length);
return 1; return 1;
} }
return 0; return 0;
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/bio.h> #include <linux/bio.h>
#include <linux/buffer_head.h> #include <linux/buffer_head.h>
#include <asm/div64.h>
#include "ctree.h" #include "ctree.h"
#include "extent_map.h" #include "extent_map.h"
#include "disk-io.h" #include "disk-io.h"
...@@ -25,10 +26,24 @@ ...@@ -25,10 +26,24 @@
#include "print-tree.h" #include "print-tree.h"
#include "volumes.h" #include "volumes.h"
struct map_lookup { struct stripe {
struct btrfs_device *dev; struct btrfs_device *dev;
u64 physical; u64 physical;
}; };
struct map_lookup {
u64 type;
int io_align;
int io_width;
int stripe_len;
int sector_size;
int num_stripes;
struct stripe stripes[];
};
#define map_lookup_size(n) (sizeof(struct map_lookup) + \
(sizeof(struct stripe) * (n)))
static DEFINE_MUTEX(uuid_mutex); static DEFINE_MUTEX(uuid_mutex);
static LIST_HEAD(fs_uuids); static LIST_HEAD(fs_uuids);
...@@ -592,6 +607,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ...@@ -592,6 +607,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
u64 *num_bytes, u64 type) u64 *num_bytes, u64 type)
{ {
u64 dev_offset; u64 dev_offset;
struct btrfs_fs_info *info = extent_root->fs_info;
struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
struct btrfs_stripe *stripes; struct btrfs_stripe *stripes;
struct btrfs_device *device = NULL; struct btrfs_device *device = NULL;
...@@ -610,10 +626,18 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ...@@ -610,10 +626,18 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
int looped = 0; int looped = 0;
int ret; int ret;
int index; int index;
int stripe_len = 64 * 1024;
struct btrfs_key key; struct btrfs_key key;
if (list_empty(dev_list)) if (list_empty(dev_list))
return -ENOSPC; return -ENOSPC;
if (type & BTRFS_BLOCK_GROUP_RAID0)
num_stripes = btrfs_super_num_devices(&info->super_copy);
if (type & BTRFS_BLOCK_GROUP_DATA)
stripe_len = 64 * 1024;
if (type & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM))
stripe_len = 32 * 1024;
again: again:
INIT_LIST_HEAD(&private_devs); INIT_LIST_HEAD(&private_devs);
cur = dev_list->next; cur = dev_list->next;
...@@ -650,9 +674,15 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ...@@ -650,9 +674,15 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (!chunk) if (!chunk)
return -ENOMEM; return -ENOMEM;
map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
if (!map) {
kfree(chunk);
return -ENOMEM;
}
stripes = &chunk->stripe; stripes = &chunk->stripe;
*num_bytes = calc_size; *num_bytes = calc_size * num_stripes;
index = 0; index = 0;
while(index < num_stripes) { while(index < num_stripes) {
BUG_ON(list_empty(&private_devs)); BUG_ON(list_empty(&private_devs));
...@@ -669,6 +699,8 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); ...@@ -669,6 +699,8 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid);
ret = btrfs_update_device(trans, device); ret = btrfs_update_device(trans, device);
BUG_ON(ret); BUG_ON(ret);
map->stripes[index].dev = device;
map->stripes[index].physical = dev_offset;
btrfs_set_stack_stripe_devid(stripes + index, device->devid); btrfs_set_stack_stripe_devid(stripes + index, device->devid);
btrfs_set_stack_stripe_offset(stripes + index, dev_offset); btrfs_set_stack_stripe_offset(stripes + index, dev_offset);
physical = dev_offset; physical = dev_offset;
...@@ -680,12 +712,18 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); ...@@ -680,12 +712,18 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid);
key.offset = *num_bytes; key.offset = *num_bytes;
key.type = BTRFS_CHUNK_ITEM_KEY; key.type = BTRFS_CHUNK_ITEM_KEY;
btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
btrfs_set_stack_chunk_stripe_len(chunk, 64 * 1024); btrfs_set_stack_chunk_stripe_len(chunk, stripe_len);
btrfs_set_stack_chunk_type(chunk, type); btrfs_set_stack_chunk_type(chunk, type);
btrfs_set_stack_chunk_num_stripes(chunk, num_stripes); btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
btrfs_set_stack_chunk_io_align(chunk, extent_root->sectorsize); btrfs_set_stack_chunk_io_align(chunk, stripe_len);
btrfs_set_stack_chunk_io_width(chunk, extent_root->sectorsize); btrfs_set_stack_chunk_io_width(chunk, stripe_len);
btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize); btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
map->sector_size = extent_root->sectorsize;
map->stripe_len = stripe_len;
map->io_align = stripe_len;
map->io_width = stripe_len;
map->type = type;
map->num_stripes = num_stripes;
ret = btrfs_insert_item(trans, chunk_root, &key, chunk, ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
btrfs_chunk_item_size(num_stripes)); btrfs_chunk_item_size(num_stripes));
...@@ -695,25 +733,11 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid); ...@@ -695,25 +733,11 @@ printk("alloc chunk size %Lu from dev %Lu\n", calc_size, device->devid);
em = alloc_extent_map(GFP_NOFS); em = alloc_extent_map(GFP_NOFS);
if (!em) if (!em)
return -ENOMEM; return -ENOMEM;
map = kmalloc(sizeof(*map), GFP_NOFS);
if (!map) {
free_extent_map(em);
return -ENOMEM;
}
em->bdev = (struct block_device *)map; em->bdev = (struct block_device *)map;
em->start = key.objectid; em->start = key.objectid;
em->len = key.offset; em->len = key.offset;
em->block_start = 0; em->block_start = 0;
map->physical = physical;
map->dev = device;
if (!map->dev) {
kfree(map);
free_extent_map(em);
return -EIO;
}
kfree(chunk); kfree(chunk);
em_tree = &extent_root->fs_info->mapping_tree.map_tree; em_tree = &extent_root->fs_info->mapping_tree.map_tree;
...@@ -758,6 +782,9 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, ...@@ -758,6 +782,9 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
struct map_lookup *map; struct map_lookup *map;
struct extent_map_tree *em_tree = &map_tree->map_tree; struct extent_map_tree *em_tree = &map_tree->map_tree;
u64 offset; u64 offset;
u64 stripe_offset;
u64 stripe_nr;
int stripe_index;
spin_lock(&em_tree->lock); spin_lock(&em_tree->lock);
...@@ -767,9 +794,40 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, ...@@ -767,9 +794,40 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree,
BUG_ON(em->start > logical || em->start + em->len < logical); BUG_ON(em->start > logical || em->start + em->len < logical);
map = (struct map_lookup *)em->bdev; map = (struct map_lookup *)em->bdev;
offset = logical - em->start; offset = logical - em->start;
*phys = map->physical + offset;
stripe_nr = offset;
/*
* stripe_nr counts the total number of stripes we have to stride
* to get to this block
*/
do_div(stripe_nr, map->stripe_len);
stripe_offset = stripe_nr * map->stripe_len;
BUG_ON(offset < stripe_offset);
/* stripe_offset is the offset of this block in its stripe*/
stripe_offset = offset - stripe_offset;
/*
* after this do_div call, stripe_nr is the number of stripes
* on this device we have to walk to find the data, and
* stripe_index is the number of our device in the stripe array
*/
stripe_index = do_div(stripe_nr, map->num_stripes);
BUG_ON(stripe_index >= map->num_stripes);
*phys = map->stripes[stripe_index].physical + stripe_offset +
stripe_nr * map->stripe_len;
if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
/* we limit the length of each bio to what fits in a stripe */
*length = min_t(u64, em->len - offset,
map->stripe_len - stripe_offset);
} else {
*length = em->len - offset; *length = em->len - offset;
*dev = map->dev; }
*dev = map->stripes[stripe_index].dev;
free_extent_map(em); free_extent_map(em);
spin_unlock(&em_tree->lock); spin_unlock(&em_tree->lock);
return 0; return 0;
...@@ -822,7 +880,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, ...@@ -822,7 +880,9 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
u64 logical; u64 logical;
u64 length; u64 length;
u64 devid; u64 devid;
int num_stripes;
int ret; int ret;
int i;
logical = key->objectid; logical = key->objectid;
length = key->offset; length = key->offset;
...@@ -846,7 +906,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, ...@@ -846,7 +906,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
em = alloc_extent_map(GFP_NOFS); em = alloc_extent_map(GFP_NOFS);
if (!em) if (!em)
return -ENOMEM; return -ENOMEM;
map = kmalloc(sizeof(*map), GFP_NOFS); num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
if (!map) { if (!map) {
free_extent_map(em); free_extent_map(em);
return -ENOMEM; return -ENOMEM;
...@@ -857,14 +918,23 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, ...@@ -857,14 +918,23 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
em->len = length; em->len = length;
em->block_start = 0; em->block_start = 0;
map->physical = btrfs_stripe_offset_nr(leaf, chunk, 0); map->num_stripes = num_stripes;
devid = btrfs_stripe_devid_nr(leaf, chunk, 0); map->io_width = btrfs_chunk_io_width(leaf, chunk);
map->dev = btrfs_find_device(root, devid); map->io_align = btrfs_chunk_io_align(leaf, chunk);
if (!map->dev) { map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
map->type = btrfs_chunk_type(leaf, chunk);
for (i = 0; i < num_stripes; i++) {
map->stripes[i].physical =
btrfs_stripe_offset_nr(leaf, chunk, i);
devid = btrfs_stripe_devid_nr(leaf, chunk, i);
map->stripes[i].dev = btrfs_find_device(root, devid);
if (!map->stripes[i].dev) {
kfree(map); kfree(map);
free_extent_map(em); free_extent_map(em);
return -EIO; return -EIO;
} }
}
spin_lock(&map_tree->map_tree.lock); spin_lock(&map_tree->map_tree.lock);
ret = add_extent_mapping(&map_tree->map_tree, em); ret = add_extent_mapping(&map_tree->map_tree, em);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment