Commit ff7db6e0 authored by David Sterba's avatar David Sterba

Merge branch 'foreign/zhaolei/reada' into for-chris-4.6

parents 23c1a966 7aff8cf4
...@@ -1825,6 +1825,9 @@ struct btrfs_fs_info { ...@@ -1825,6 +1825,9 @@ struct btrfs_fs_info {
spinlock_t reada_lock; spinlock_t reada_lock;
struct radix_tree_root reada_tree; struct radix_tree_root reada_tree;
/* readahead works cnt */
atomic_t reada_works_cnt;
/* Extent buffer radix tree */ /* Extent buffer radix tree */
spinlock_t buffer_lock; spinlock_t buffer_lock;
struct radix_tree_root buffer_radix; struct radix_tree_root buffer_radix;
...@@ -4563,8 +4566,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, ...@@ -4563,8 +4566,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
struct btrfs_key *start, struct btrfs_key *end); struct btrfs_key *start, struct btrfs_key *end);
int btrfs_reada_wait(void *handle); int btrfs_reada_wait(void *handle);
void btrfs_reada_detach(void *handle); void btrfs_reada_detach(void *handle);
int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, int btree_readahead_hook(struct btrfs_fs_info *fs_info,
u64 start, int err); struct extent_buffer *eb, u64 start, int err);
static inline int is_fstree(u64 rootid) static inline int is_fstree(u64 rootid)
{ {
......
...@@ -612,6 +612,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, ...@@ -612,6 +612,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
int found_level; int found_level;
struct extent_buffer *eb; struct extent_buffer *eb;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0; int ret = 0;
int reads_done; int reads_done;
...@@ -637,20 +638,20 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, ...@@ -637,20 +638,20 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
found_start = btrfs_header_bytenr(eb); found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) { if (found_start != eb->start) {
btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu", btrfs_err_rl(fs_info, "bad tree block start %llu %llu",
found_start, eb->start); found_start, eb->start);
ret = -EIO; ret = -EIO;
goto err; goto err;
} }
if (check_tree_block_fsid(root->fs_info, eb)) { if (check_tree_block_fsid(fs_info, eb)) {
btrfs_err_rl(eb->fs_info, "bad fsid on block %llu", btrfs_err_rl(fs_info, "bad fsid on block %llu",
eb->start); eb->start);
ret = -EIO; ret = -EIO;
goto err; goto err;
} }
found_level = btrfs_header_level(eb); found_level = btrfs_header_level(eb);
if (found_level >= BTRFS_MAX_LEVEL) { if (found_level >= BTRFS_MAX_LEVEL) {
btrfs_err(root->fs_info, "bad tree block level %d", btrfs_err(fs_info, "bad tree block level %d",
(int)btrfs_header_level(eb)); (int)btrfs_header_level(eb));
ret = -EIO; ret = -EIO;
goto err; goto err;
...@@ -659,7 +660,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, ...@@ -659,7 +660,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
eb, found_level); eb, found_level);
ret = csum_tree_block(root->fs_info, eb, 1); ret = csum_tree_block(fs_info, eb, 1);
if (ret) { if (ret) {
ret = -EIO; ret = -EIO;
goto err; goto err;
...@@ -680,7 +681,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, ...@@ -680,7 +681,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
err: err:
if (reads_done && if (reads_done &&
test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
btree_readahead_hook(root, eb, eb->start, ret); btree_readahead_hook(fs_info, eb, eb->start, ret);
if (ret) { if (ret) {
/* /*
...@@ -699,14 +700,13 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, ...@@ -699,14 +700,13 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
static int btree_io_failed_hook(struct page *page, int failed_mirror) static int btree_io_failed_hook(struct page *page, int failed_mirror)
{ {
struct extent_buffer *eb; struct extent_buffer *eb;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
eb = (struct extent_buffer *)page->private; eb = (struct extent_buffer *)page->private;
set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = failed_mirror; eb->read_mirror = failed_mirror;
atomic_dec(&eb->io_pages); atomic_dec(&eb->io_pages);
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
btree_readahead_hook(root, eb, eb->start, -EIO); btree_readahead_hook(eb->fs_info, eb, eb->start, -EIO);
return -EIO; /* we fixed nothing */ return -EIO; /* we fixed nothing */
} }
...@@ -2604,6 +2604,7 @@ int open_ctree(struct super_block *sb, ...@@ -2604,6 +2604,7 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->nr_async_bios, 0); atomic_set(&fs_info->nr_async_bios, 0);
atomic_set(&fs_info->defrag_running, 0); atomic_set(&fs_info->defrag_running, 0);
atomic_set(&fs_info->qgroup_op_seq, 0); atomic_set(&fs_info->qgroup_op_seq, 0);
atomic_set(&fs_info->reada_works_cnt, 0);
atomic64_set(&fs_info->tree_mod_seq, 0); atomic64_set(&fs_info->tree_mod_seq, 0);
fs_info->sb = sb; fs_info->sb = sb;
fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
......
...@@ -72,7 +72,7 @@ struct reada_extent { ...@@ -72,7 +72,7 @@ struct reada_extent {
spinlock_t lock; spinlock_t lock;
struct reada_zone *zones[BTRFS_MAX_MIRRORS]; struct reada_zone *zones[BTRFS_MAX_MIRRORS];
int nzones; int nzones;
struct btrfs_device *scheduled_for; int scheduled;
}; };
struct reada_zone { struct reada_zone {
...@@ -101,11 +101,12 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info); ...@@ -101,11 +101,12 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info);
static void __reada_start_machine(struct btrfs_fs_info *fs_info); static void __reada_start_machine(struct btrfs_fs_info *fs_info);
static int reada_add_block(struct reada_control *rc, u64 logical, static int reada_add_block(struct reada_control *rc, u64 logical,
struct btrfs_key *top, int level, u64 generation); struct btrfs_key *top, u64 generation);
/* recurses */ /* recurses */
/* in case of err, eb might be NULL */ /* in case of err, eb might be NULL */
static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, static void __readahead_hook(struct btrfs_fs_info *fs_info,
struct reada_extent *re, struct extent_buffer *eb,
u64 start, int err) u64 start, int err)
{ {
int level = 0; int level = 0;
...@@ -113,55 +114,40 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, ...@@ -113,55 +114,40 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
int i; int i;
u64 bytenr; u64 bytenr;
u64 generation; u64 generation;
struct reada_extent *re;
struct btrfs_fs_info *fs_info = root->fs_info;
struct list_head list; struct list_head list;
unsigned long index = start >> PAGE_CACHE_SHIFT;
struct btrfs_device *for_dev;
if (eb) if (eb)
level = btrfs_header_level(eb); level = btrfs_header_level(eb);
/* find extent */
spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index);
if (re)
re->refcnt++;
spin_unlock(&fs_info->reada_lock);
if (!re)
return -1;
spin_lock(&re->lock); spin_lock(&re->lock);
/* /*
* just take the full list from the extent. afterwards we * just take the full list from the extent. afterwards we
* don't need the lock anymore * don't need the lock anymore
*/ */
list_replace_init(&re->extctl, &list); list_replace_init(&re->extctl, &list);
for_dev = re->scheduled_for; re->scheduled = 0;
re->scheduled_for = NULL;
spin_unlock(&re->lock); spin_unlock(&re->lock);
if (err == 0) {
nritems = level ? btrfs_header_nritems(eb) : 0;
generation = btrfs_header_generation(eb);
/*
* FIXME: currently we just set nritems to 0 if this is a leaf,
* effectively ignoring the content. In a next step we could
* trigger more readahead depending from the content, e.g.
* fetch the checksums for the extents in the leaf.
*/
} else {
/* /*
* this is the error case, the extent buffer has not been * this is the error case, the extent buffer has not been
* read correctly. We won't access anything from it and * read correctly. We won't access anything from it and
* just cleanup our data structures. Effectively this will * just cleanup our data structures. Effectively this will
* cut the branch below this node from read ahead. * cut the branch below this node from read ahead.
*/ */
nritems = 0; if (err)
generation = 0; goto cleanup;
}
/*
* FIXME: currently we just set nritems to 0 if this is a leaf,
* effectively ignoring the content. In a next step we could
* trigger more readahead depending from the content, e.g.
* fetch the checksums for the extents in the leaf.
*/
if (!level)
goto cleanup;
nritems = btrfs_header_nritems(eb);
generation = btrfs_header_generation(eb);
for (i = 0; i < nritems; i++) { for (i = 0; i < nritems; i++) {
struct reada_extctl *rec; struct reada_extctl *rec;
u64 n_gen; u64 n_gen;
...@@ -188,7 +174,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, ...@@ -188,7 +174,7 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
*/ */
#ifdef DEBUG #ifdef DEBUG
if (rec->generation != generation) { if (rec->generation != generation) {
btrfs_debug(root->fs_info, btrfs_debug(fs_info,
"generation mismatch for (%llu,%d,%llu) %llu != %llu", "generation mismatch for (%llu,%d,%llu) %llu != %llu",
key.objectid, key.type, key.offset, key.objectid, key.type, key.offset,
rec->generation, generation); rec->generation, generation);
...@@ -197,10 +183,11 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, ...@@ -197,10 +183,11 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
if (rec->generation == generation && if (rec->generation == generation &&
btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 && btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0) btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
reada_add_block(rc, bytenr, &next_key, reada_add_block(rc, bytenr, &next_key, n_gen);
level - 1, n_gen);
} }
} }
cleanup:
/* /*
* free extctl records * free extctl records
*/ */
...@@ -222,26 +209,37 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, ...@@ -222,26 +209,37 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
reada_extent_put(fs_info, re); /* one ref for each entry */ reada_extent_put(fs_info, re); /* one ref for each entry */
} }
reada_extent_put(fs_info, re); /* our ref */
if (for_dev)
atomic_dec(&for_dev->reada_in_flight);
return 0; return;
} }
/* /*
* start is passed separately in case eb in NULL, which may be the case with * start is passed separately in case eb in NULL, which may be the case with
* failed I/O * failed I/O
*/ */
int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, int btree_readahead_hook(struct btrfs_fs_info *fs_info,
u64 start, int err) struct extent_buffer *eb, u64 start, int err)
{ {
int ret; int ret = 0;
struct reada_extent *re;
ret = __readahead_hook(root, eb, start, err); /* find extent */
spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree,
start >> PAGE_CACHE_SHIFT);
if (re)
re->refcnt++;
spin_unlock(&fs_info->reada_lock);
if (!re) {
ret = -1;
goto start_machine;
}
reada_start_machine(root->fs_info); __readahead_hook(fs_info, re, eb, start, err);
reada_extent_put(fs_info, re); /* our ref */
start_machine:
reada_start_machine(fs_info);
return ret; return ret;
} }
...@@ -260,18 +258,14 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, ...@@ -260,18 +258,14 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
spin_lock(&fs_info->reada_lock); spin_lock(&fs_info->reada_lock);
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
logical >> PAGE_CACHE_SHIFT, 1); logical >> PAGE_CACHE_SHIFT, 1);
if (ret == 1) if (ret == 1 && logical >= zone->start && logical <= zone->end) {
kref_get(&zone->refcnt); kref_get(&zone->refcnt);
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
if (ret == 1) {
if (logical >= zone->start && logical < zone->end)
return zone; return zone;
spin_lock(&fs_info->reada_lock);
kref_put(&zone->refcnt, reada_zone_release);
spin_unlock(&fs_info->reada_lock);
} }
spin_unlock(&fs_info->reada_lock);
cache = btrfs_lookup_block_group(fs_info, logical); cache = btrfs_lookup_block_group(fs_info, logical);
if (!cache) if (!cache)
return NULL; return NULL;
...@@ -307,8 +301,10 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, ...@@ -307,8 +301,10 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
kfree(zone); kfree(zone);
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
logical >> PAGE_CACHE_SHIFT, 1); logical >> PAGE_CACHE_SHIFT, 1);
if (ret == 1) if (ret == 1 && logical >= zone->start && logical <= zone->end)
kref_get(&zone->refcnt); kref_get(&zone->refcnt);
else
zone = NULL;
} }
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
...@@ -317,7 +313,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, ...@@ -317,7 +313,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
static struct reada_extent *reada_find_extent(struct btrfs_root *root, static struct reada_extent *reada_find_extent(struct btrfs_root *root,
u64 logical, u64 logical,
struct btrfs_key *top, int level) struct btrfs_key *top)
{ {
int ret; int ret;
struct reada_extent *re = NULL; struct reada_extent *re = NULL;
...@@ -330,9 +326,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -330,9 +326,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
u64 length; u64 length;
int real_stripes; int real_stripes;
int nzones = 0; int nzones = 0;
int i;
unsigned long index = logical >> PAGE_CACHE_SHIFT; unsigned long index = logical >> PAGE_CACHE_SHIFT;
int dev_replace_is_ongoing; int dev_replace_is_ongoing;
int have_zone = 0;
spin_lock(&fs_info->reada_lock); spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index); re = radix_tree_lookup(&fs_info->reada_tree, index);
...@@ -375,11 +371,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -375,11 +371,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
struct reada_zone *zone; struct reada_zone *zone;
dev = bbio->stripes[nzones].dev; dev = bbio->stripes[nzones].dev;
/* cannot read ahead on missing device. */
if (!dev->bdev)
continue;
zone = reada_find_zone(fs_info, dev, logical, bbio); zone = reada_find_zone(fs_info, dev, logical, bbio);
if (!zone) if (!zone)
break; continue;
re->zones[nzones] = zone; re->zones[re->nzones++] = zone;
spin_lock(&zone->lock); spin_lock(&zone->lock);
if (!zone->elems) if (!zone->elems)
kref_get(&zone->refcnt); kref_get(&zone->refcnt);
...@@ -389,8 +390,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -389,8 +390,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
kref_put(&zone->refcnt, reada_zone_release); kref_put(&zone->refcnt, reada_zone_release);
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
} }
re->nzones = nzones; if (re->nzones == 0) {
if (nzones == 0) {
/* not a single zone found, error and out */ /* not a single zone found, error and out */
goto error; goto error;
} }
...@@ -415,8 +415,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -415,8 +415,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
prev_dev = NULL; prev_dev = NULL;
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing( dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
&fs_info->dev_replace); &fs_info->dev_replace);
for (i = 0; i < nzones; ++i) { for (nzones = 0; nzones < re->nzones; ++nzones) {
dev = bbio->stripes[i].dev; dev = re->zones[nzones]->device;
if (dev == prev_dev) { if (dev == prev_dev) {
/* /*
* in case of DUP, just add the first zone. As both * in case of DUP, just add the first zone. As both
...@@ -427,15 +428,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -427,15 +428,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
*/ */
continue; continue;
} }
if (!dev->bdev) { if (!dev->bdev)
/*
* cannot read ahead on missing device, but for RAID5/6,
* REQ_GET_READ_MIRRORS return 1. So don't skip missing
* device for such case.
*/
if (nzones > 1)
continue; continue;
}
if (dev_replace_is_ongoing && if (dev_replace_is_ongoing &&
dev == fs_info->dev_replace.tgtdev) { dev == fs_info->dev_replace.tgtdev) {
/* /*
...@@ -447,8 +442,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -447,8 +442,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
prev_dev = dev; prev_dev = dev;
ret = radix_tree_insert(&dev->reada_extents, index, re); ret = radix_tree_insert(&dev->reada_extents, index, re);
if (ret) { if (ret) {
while (--i >= 0) { while (--nzones >= 0) {
dev = bbio->stripes[i].dev; dev = re->zones[nzones]->device;
BUG_ON(dev == NULL); BUG_ON(dev == NULL);
/* ignore whether the entry was inserted */ /* ignore whether the entry was inserted */
radix_tree_delete(&dev->reada_extents, index); radix_tree_delete(&dev->reada_extents, index);
...@@ -459,18 +454,21 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -459,18 +454,21 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
btrfs_dev_replace_unlock(&fs_info->dev_replace); btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error; goto error;
} }
have_zone = 1;
} }
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace); btrfs_dev_replace_unlock(&fs_info->dev_replace);
if (!have_zone)
goto error;
btrfs_put_bbio(bbio); btrfs_put_bbio(bbio);
return re; return re;
error: error:
while (nzones) { for (nzones = 0; nzones < re->nzones; ++nzones) {
struct reada_zone *zone; struct reada_zone *zone;
--nzones;
zone = re->zones[nzones]; zone = re->zones[nzones];
kref_get(&zone->refcnt); kref_get(&zone->refcnt);
spin_lock(&zone->lock); spin_lock(&zone->lock);
...@@ -531,8 +529,6 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info, ...@@ -531,8 +529,6 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
kref_put(&zone->refcnt, reada_zone_release); kref_put(&zone->refcnt, reada_zone_release);
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
} }
if (re->scheduled_for)
atomic_dec(&re->scheduled_for->reada_in_flight);
kfree(re); kfree(re);
} }
...@@ -556,13 +552,13 @@ static void reada_control_release(struct kref *kref) ...@@ -556,13 +552,13 @@ static void reada_control_release(struct kref *kref)
} }
static int reada_add_block(struct reada_control *rc, u64 logical, static int reada_add_block(struct reada_control *rc, u64 logical,
struct btrfs_key *top, int level, u64 generation) struct btrfs_key *top, u64 generation)
{ {
struct btrfs_root *root = rc->root; struct btrfs_root *root = rc->root;
struct reada_extent *re; struct reada_extent *re;
struct reada_extctl *rec; struct reada_extctl *rec;
re = reada_find_extent(root, logical, top, level); /* takes one ref */ re = reada_find_extent(root, logical, top); /* takes one ref */
if (!re) if (!re)
return -1; return -1;
...@@ -662,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, ...@@ -662,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
u64 logical; u64 logical;
int ret; int ret;
int i; int i;
int need_kick = 0;
spin_lock(&fs_info->reada_lock); spin_lock(&fs_info->reada_lock);
if (dev->reada_curr_zone == NULL) { if (dev->reada_curr_zone == NULL) {
...@@ -679,7 +674,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, ...@@ -679,7 +674,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
*/ */
ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re, ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
dev->reada_next >> PAGE_CACHE_SHIFT, 1); dev->reada_next >> PAGE_CACHE_SHIFT, 1);
if (ret == 0 || re->logical >= dev->reada_curr_zone->end) { if (ret == 0 || re->logical > dev->reada_curr_zone->end) {
ret = reada_pick_zone(dev); ret = reada_pick_zone(dev);
if (!ret) { if (!ret) {
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
...@@ -698,6 +693,15 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, ...@@ -698,6 +693,15 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
spin_lock(&re->lock);
if (re->scheduled || list_empty(&re->extctl)) {
spin_unlock(&re->lock);
reada_extent_put(fs_info, re);
return 0;
}
re->scheduled = 1;
spin_unlock(&re->lock);
/* /*
* find mirror num * find mirror num
*/ */
...@@ -709,29 +713,20 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info, ...@@ -709,29 +713,20 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
} }
logical = re->logical; logical = re->logical;
spin_lock(&re->lock);
if (re->scheduled_for == NULL) {
re->scheduled_for = dev;
need_kick = 1;
}
spin_unlock(&re->lock);
reada_extent_put(fs_info, re);
if (!need_kick)
return 0;
atomic_inc(&dev->reada_in_flight); atomic_inc(&dev->reada_in_flight);
ret = reada_tree_block_flagged(fs_info->extent_root, logical, ret = reada_tree_block_flagged(fs_info->extent_root, logical,
mirror_num, &eb); mirror_num, &eb);
if (ret) if (ret)
__readahead_hook(fs_info->extent_root, NULL, logical, ret); __readahead_hook(fs_info, re, NULL, logical, ret);
else if (eb) else if (eb)
__readahead_hook(fs_info->extent_root, eb, eb->start, ret); __readahead_hook(fs_info, re, eb, eb->start, ret);
if (eb) if (eb)
free_extent_buffer(eb); free_extent_buffer(eb);
atomic_dec(&dev->reada_in_flight);
reada_extent_put(fs_info, re);
return 1; return 1;
} }
...@@ -752,6 +747,8 @@ static void reada_start_machine_worker(struct btrfs_work *work) ...@@ -752,6 +747,8 @@ static void reada_start_machine_worker(struct btrfs_work *work)
set_task_ioprio(current, BTRFS_IOPRIO_READA); set_task_ioprio(current, BTRFS_IOPRIO_READA);
__reada_start_machine(fs_info); __reada_start_machine(fs_info);
set_task_ioprio(current, old_ioprio); set_task_ioprio(current, old_ioprio);
atomic_dec(&fs_info->reada_works_cnt);
} }
static void __reada_start_machine(struct btrfs_fs_info *fs_info) static void __reada_start_machine(struct btrfs_fs_info *fs_info)
...@@ -783,8 +780,12 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info) ...@@ -783,8 +780,12 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
* enqueue to workers to finish it. This will distribute the load to * enqueue to workers to finish it. This will distribute the load to
* the cores. * the cores.
*/ */
for (i = 0; i < 2; ++i) for (i = 0; i < 2; ++i) {
reada_start_machine(fs_info); reada_start_machine(fs_info);
if (atomic_read(&fs_info->reada_works_cnt) >
BTRFS_MAX_MIRRORS * 2)
break;
}
} }
static void reada_start_machine(struct btrfs_fs_info *fs_info) static void reada_start_machine(struct btrfs_fs_info *fs_info)
...@@ -801,6 +802,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) ...@@ -801,6 +802,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
rmw->fs_info = fs_info; rmw->fs_info = fs_info;
btrfs_queue_work(fs_info->readahead_workers, &rmw->work); btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
atomic_inc(&fs_info->reada_works_cnt);
} }
#ifdef DEBUG #ifdef DEBUG
...@@ -848,10 +850,9 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) ...@@ -848,10 +850,9 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
if (ret == 0) if (ret == 0)
break; break;
printk(KERN_DEBUG printk(KERN_DEBUG
" re: logical %llu size %u empty %d for %lld", " re: logical %llu size %u empty %d scheduled %d",
re->logical, fs_info->tree_root->nodesize, re->logical, fs_info->tree_root->nodesize,
list_empty(&re->extctl), re->scheduled_for ? list_empty(&re->extctl), re->scheduled);
re->scheduled_for->devid : -1);
for (i = 0; i < re->nzones; ++i) { for (i = 0; i < re->nzones; ++i) {
printk(KERN_CONT " zone %llu-%llu devs", printk(KERN_CONT " zone %llu-%llu devs",
...@@ -878,19 +879,14 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) ...@@ -878,19 +879,14 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
index, 1); index, 1);
if (ret == 0) if (ret == 0)
break; break;
if (!re->scheduled_for) { if (!re->scheduled) {
index = (re->logical >> PAGE_CACHE_SHIFT) + 1; index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
continue; continue;
} }
printk(KERN_DEBUG printk(KERN_DEBUG
"re: logical %llu size %u list empty %d for %lld", "re: logical %llu size %u list empty %d scheduled %d",
re->logical, fs_info->tree_root->nodesize, re->logical, fs_info->tree_root->nodesize,
list_empty(&re->extctl), list_empty(&re->extctl), re->scheduled);
re->scheduled_for ? re->scheduled_for->devid : -1);
for (i = 0; i < re->nzones; ++i) {
printk(KERN_CONT " zone %llu-%llu devs",
re->zones[i]->start,
re->zones[i]->end);
for (i = 0; i < re->nzones; ++i) { for (i = 0; i < re->nzones; ++i) {
printk(KERN_CONT " zone %llu-%llu devs", printk(KERN_CONT " zone %llu-%llu devs",
re->zones[i]->start, re->zones[i]->start,
...@@ -900,7 +896,6 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all) ...@@ -900,7 +896,6 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
re->zones[i]->devs[j]->devid); re->zones[i]->devs[j]->devid);
} }
} }
}
printk(KERN_CONT "\n"); printk(KERN_CONT "\n");
index = (re->logical >> PAGE_CACHE_SHIFT) + 1; index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
} }
...@@ -917,7 +912,6 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, ...@@ -917,7 +912,6 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
struct reada_control *rc; struct reada_control *rc;
u64 start; u64 start;
u64 generation; u64 generation;
int level;
int ret; int ret;
struct extent_buffer *node; struct extent_buffer *node;
static struct btrfs_key max_key = { static struct btrfs_key max_key = {
...@@ -940,11 +934,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, ...@@ -940,11 +934,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
node = btrfs_root_node(root); node = btrfs_root_node(root);
start = node->start; start = node->start;
level = btrfs_header_level(node);
generation = btrfs_header_generation(node); generation = btrfs_header_generation(node);
free_extent_buffer(node); free_extent_buffer(node);
ret = reada_add_block(rc, start, &max_key, level, generation); ret = reada_add_block(rc, start, &max_key, generation);
if (ret) { if (ret) {
kfree(rc); kfree(rc);
return ERR_PTR(ret); return ERR_PTR(ret);
...@@ -959,8 +952,11 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, ...@@ -959,8 +952,11 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
int btrfs_reada_wait(void *handle) int btrfs_reada_wait(void *handle)
{ {
struct reada_control *rc = handle; struct reada_control *rc = handle;
struct btrfs_fs_info *fs_info = rc->root->fs_info;
while (atomic_read(&rc->elems)) { while (atomic_read(&rc->elems)) {
if (!atomic_read(&fs_info->reada_works_cnt))
reada_start_machine(fs_info);
wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0, wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
5 * HZ); 5 * HZ);
dump_devs(rc->root->fs_info, dump_devs(rc->root->fs_info,
...@@ -977,9 +973,13 @@ int btrfs_reada_wait(void *handle) ...@@ -977,9 +973,13 @@ int btrfs_reada_wait(void *handle)
int btrfs_reada_wait(void *handle) int btrfs_reada_wait(void *handle)
{ {
struct reada_control *rc = handle; struct reada_control *rc = handle;
struct btrfs_fs_info *fs_info = rc->root->fs_info;
while (atomic_read(&rc->elems)) { while (atomic_read(&rc->elems)) {
wait_event(rc->wait, atomic_read(&rc->elems) == 0); if (!atomic_read(&fs_info->reada_works_cnt))
reada_start_machine(fs_info);
wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
(HZ + 9) / 10);
} }
kref_put(&rc->refcnt, reada_control_release); kref_put(&rc->refcnt, reada_control_release);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment