Commit f7b00693 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
 "This has our collection of bug fixes.  I missed the last rc because I
  thought our patches were making NFS crash during my xfs test runs.
  Turns out it was an NFS client bug fixed by someone else while I tried
  to bisect it.

  All of these fixes are small, but some are fairly high impact.  The
  biggest are fixes for our mount -o remount handling, a deadlock due to
  GFP_KERNEL allocations in readdir, and a RAID10 error handling bug.

  This was tested against both 3.3 and Linus' master as of this morning."

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (26 commits)
  Btrfs: reduce lock contention during extent insertion
  Btrfs: avoid deadlocks from GFP_KERNEL allocations during btrfs_real_readdir
  Btrfs: Fix space checking during fs resize
  Btrfs: fix block_rsv and space_info lock ordering
  Btrfs: Prevent root_list corruption
  Btrfs: fix repair code for RAID10
  Btrfs: do not start delalloc inodes during sync
  Btrfs: fix that check_int_data mount option was ignored
  Btrfs: don't count CRC or header errors twice while scrubbing
  Btrfs: fix btrfs_ioctl_dev_info() crash on missing device
  btrfs: don't return EINTR
  Btrfs: double unlock bug in error handling
  Btrfs: always store the mirror we read the eb from
  fs/btrfs/volumes.c: add missing free_fs_devices
  btrfs: fix early abort in 'remount'
  Btrfs: fix max chunk size check in chunk allocator
  Btrfs: add missing read locks in backref.c
  Btrfs: don't call free_extent_buffer twice in iterate_irefs
  Btrfs: Make free_ipath() deal gracefully with NULL pointers
  Btrfs: avoid possible use-after-free in clear_extent_bit()
  ...
parents b990f9b3 dc7fdde3
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "ulist.h" #include "ulist.h"
#include "transaction.h" #include "transaction.h"
#include "delayed-ref.h" #include "delayed-ref.h"
#include "locking.h"
/* /*
* this structure records all encountered refs on the way up to the root * this structure records all encountered refs on the way up to the root
...@@ -893,18 +894,22 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, ...@@ -893,18 +894,22 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
s64 bytes_left = size - 1; s64 bytes_left = size - 1;
struct extent_buffer *eb = eb_in; struct extent_buffer *eb = eb_in;
struct btrfs_key found_key; struct btrfs_key found_key;
int leave_spinning = path->leave_spinning;
if (bytes_left >= 0) if (bytes_left >= 0)
dest[bytes_left] = '\0'; dest[bytes_left] = '\0';
path->leave_spinning = 1;
while (1) { while (1) {
len = btrfs_inode_ref_name_len(eb, iref); len = btrfs_inode_ref_name_len(eb, iref);
bytes_left -= len; bytes_left -= len;
if (bytes_left >= 0) if (bytes_left >= 0)
read_extent_buffer(eb, dest + bytes_left, read_extent_buffer(eb, dest + bytes_left,
(unsigned long)(iref + 1), len); (unsigned long)(iref + 1), len);
if (eb != eb_in) if (eb != eb_in) {
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb); free_extent_buffer(eb);
}
ret = inode_ref_info(parent, 0, fs_root, path, &found_key); ret = inode_ref_info(parent, 0, fs_root, path, &found_key);
if (ret > 0) if (ret > 0)
ret = -ENOENT; ret = -ENOENT;
...@@ -919,8 +924,11 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, ...@@ -919,8 +924,11 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
slot = path->slots[0]; slot = path->slots[0];
eb = path->nodes[0]; eb = path->nodes[0];
/* make sure we can use eb after releasing the path */ /* make sure we can use eb after releasing the path */
if (eb != eb_in) if (eb != eb_in) {
atomic_inc(&eb->refs); atomic_inc(&eb->refs);
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
}
btrfs_release_path(path); btrfs_release_path(path);
iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
...@@ -931,6 +939,7 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, ...@@ -931,6 +939,7 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
} }
btrfs_release_path(path); btrfs_release_path(path);
path->leave_spinning = leave_spinning;
if (ret) if (ret)
return ERR_PTR(ret); return ERR_PTR(ret);
...@@ -1247,7 +1256,7 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, ...@@ -1247,7 +1256,7 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
struct btrfs_path *path, struct btrfs_path *path,
iterate_irefs_t *iterate, void *ctx) iterate_irefs_t *iterate, void *ctx)
{ {
int ret; int ret = 0;
int slot; int slot;
u32 cur; u32 cur;
u32 len; u32 len;
...@@ -1259,7 +1268,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, ...@@ -1259,7 +1268,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
struct btrfs_inode_ref *iref; struct btrfs_inode_ref *iref;
struct btrfs_key found_key; struct btrfs_key found_key;
while (1) { while (!ret) {
path->leave_spinning = 1;
ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path,
&found_key); &found_key);
if (ret < 0) if (ret < 0)
...@@ -1275,6 +1285,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, ...@@ -1275,6 +1285,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
eb = path->nodes[0]; eb = path->nodes[0];
/* make sure we can use eb after releasing the path */ /* make sure we can use eb after releasing the path */
atomic_inc(&eb->refs); atomic_inc(&eb->refs);
btrfs_tree_read_lock(eb);
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
btrfs_release_path(path); btrfs_release_path(path);
item = btrfs_item_nr(eb, slot); item = btrfs_item_nr(eb, slot);
...@@ -1288,13 +1300,12 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, ...@@ -1288,13 +1300,12 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root,
(unsigned long long)found_key.objectid, (unsigned long long)found_key.objectid,
(unsigned long long)fs_root->objectid); (unsigned long long)fs_root->objectid);
ret = iterate(parent, iref, eb, ctx); ret = iterate(parent, iref, eb, ctx);
if (ret) { if (ret)
free_extent_buffer(eb);
break; break;
}
len = sizeof(*iref) + name_len; len = sizeof(*iref) + name_len;
iref = (struct btrfs_inode_ref *)((char *)iref + len); iref = (struct btrfs_inode_ref *)((char *)iref + len);
} }
btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb); free_extent_buffer(eb);
} }
...@@ -1414,6 +1425,8 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, ...@@ -1414,6 +1425,8 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
void free_ipath(struct inode_fs_paths *ipath) void free_ipath(struct inode_fs_paths *ipath)
{ {
if (!ipath)
return;
kfree(ipath->fspath); kfree(ipath->fspath);
kfree(ipath); kfree(ipath);
} }
...@@ -1078,7 +1078,7 @@ struct btrfs_fs_info { ...@@ -1078,7 +1078,7 @@ struct btrfs_fs_info {
* is required instead of the faster short fsync log commits * is required instead of the faster short fsync log commits
*/ */
u64 last_trans_log_full_commit; u64 last_trans_log_full_commit;
unsigned long mount_opt:21; unsigned long mount_opt;
unsigned long compress_type:4; unsigned long compress_type:4;
u64 max_inline; u64 max_inline;
u64 alloc_start; u64 alloc_start;
......
...@@ -383,17 +383,16 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, ...@@ -383,17 +383,16 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
break; break;
if (!failed_mirror) {
failed = 1;
printk(KERN_ERR "failed mirror was %d\n", eb->failed_mirror);
failed_mirror = eb->failed_mirror;
}
num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
eb->start, eb->len); eb->start, eb->len);
if (num_copies == 1) if (num_copies == 1)
break; break;
if (!failed_mirror) {
failed = 1;
failed_mirror = eb->read_mirror;
}
mirror_num++; mirror_num++;
if (mirror_num == failed_mirror) if (mirror_num == failed_mirror)
mirror_num++; mirror_num++;
...@@ -564,7 +563,7 @@ struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree, ...@@ -564,7 +563,7 @@ struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,
} }
static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state) struct extent_state *state, int mirror)
{ {
struct extent_io_tree *tree; struct extent_io_tree *tree;
u64 found_start; u64 found_start;
...@@ -589,6 +588,7 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, ...@@ -589,6 +588,7 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
if (!reads_done) if (!reads_done)
goto err; goto err;
eb->read_mirror = mirror;
if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
ret = -EIO; ret = -EIO;
goto err; goto err;
...@@ -652,7 +652,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) ...@@ -652,7 +652,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
eb = (struct extent_buffer *)page->private; eb = (struct extent_buffer *)page->private;
set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
eb->failed_mirror = failed_mirror; eb->read_mirror = failed_mirror;
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
btree_readahead_hook(root, eb, eb->start, -EIO); btree_readahead_hook(root, eb, eb->start, -EIO);
return -EIO; /* we fixed nothing */ return -EIO; /* we fixed nothing */
...@@ -2254,9 +2254,9 @@ int open_ctree(struct super_block *sb, ...@@ -2254,9 +2254,9 @@ int open_ctree(struct super_block *sb,
goto fail_sb_buffer; goto fail_sb_buffer;
} }
if (sectorsize < PAGE_SIZE) { if (sectorsize != PAGE_SIZE) {
printk(KERN_WARNING "btrfs: Incompatible sector size " printk(KERN_WARNING "btrfs: Incompatible sector size(%lu) "
"found on %s\n", sb->s_id); "found on %s\n", (unsigned long)sectorsize, sb->s_id);
goto fail_sb_buffer; goto fail_sb_buffer;
} }
......
...@@ -2301,6 +2301,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, ...@@ -2301,6 +2301,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
if (ret) { if (ret) {
printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret); printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret);
spin_lock(&delayed_refs->lock);
return ret; return ret;
} }
...@@ -2331,6 +2332,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, ...@@ -2331,6 +2332,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
if (ret) { if (ret) {
printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret); printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret);
spin_lock(&delayed_refs->lock);
return ret; return ret;
} }
...@@ -3769,13 +3771,10 @@ static int reserve_metadata_bytes(struct btrfs_root *root, ...@@ -3769,13 +3771,10 @@ static int reserve_metadata_bytes(struct btrfs_root *root,
*/ */
if (current->journal_info) if (current->journal_info)
return -EAGAIN; return -EAGAIN;
ret = wait_event_interruptible(space_info->wait, ret = wait_event_killable(space_info->wait, !space_info->flush);
!space_info->flush); /* Must have been killed, return */
/* Must have been interrupted, return */ if (ret)
if (ret) {
printk(KERN_DEBUG "btrfs: %s returning -EINTR\n", __func__);
return -EINTR; return -EINTR;
}
spin_lock(&space_info->lock); spin_lock(&space_info->lock);
} }
...@@ -4215,8 +4214,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) ...@@ -4215,8 +4214,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
num_bytes = calc_global_metadata_size(fs_info); num_bytes = calc_global_metadata_size(fs_info);
spin_lock(&block_rsv->lock);
spin_lock(&sinfo->lock); spin_lock(&sinfo->lock);
spin_lock(&block_rsv->lock);
block_rsv->size = num_bytes; block_rsv->size = num_bytes;
...@@ -4242,8 +4241,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) ...@@ -4242,8 +4241,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
block_rsv->full = 1; block_rsv->full = 1;
} }
spin_unlock(&sinfo->lock);
spin_unlock(&block_rsv->lock); spin_unlock(&block_rsv->lock);
spin_unlock(&sinfo->lock);
} }
static void init_global_block_rsv(struct btrfs_fs_info *fs_info) static void init_global_block_rsv(struct btrfs_fs_info *fs_info)
......
...@@ -402,20 +402,28 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, ...@@ -402,20 +402,28 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
return 0; return 0;
} }
static struct extent_state *next_state(struct extent_state *state)
{
struct rb_node *next = rb_next(&state->rb_node);
if (next)
return rb_entry(next, struct extent_state, rb_node);
else
return NULL;
}
/* /*
* utility function to clear some bits in an extent state struct. * utility function to clear some bits in an extent state struct.
* it will optionally wake up any one waiting on this state (wake == 1), or * it will optionally wake up any one waiting on this state (wake == 1)
* forcibly remove the state from the tree (delete == 1).
* *
* If no bits are set on the state struct after clearing things, the * If no bits are set on the state struct after clearing things, the
* struct is freed and removed from the tree * struct is freed and removed from the tree
*/ */
static int clear_state_bit(struct extent_io_tree *tree, static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
struct extent_state *state, struct extent_state *state,
int *bits, int wake) int *bits, int wake)
{ {
struct extent_state *next;
int bits_to_clear = *bits & ~EXTENT_CTLBITS; int bits_to_clear = *bits & ~EXTENT_CTLBITS;
int ret = state->state & bits_to_clear;
if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) {
u64 range = state->end - state->start + 1; u64 range = state->end - state->start + 1;
...@@ -427,6 +435,7 @@ static int clear_state_bit(struct extent_io_tree *tree, ...@@ -427,6 +435,7 @@ static int clear_state_bit(struct extent_io_tree *tree,
if (wake) if (wake)
wake_up(&state->wq); wake_up(&state->wq);
if (state->state == 0) { if (state->state == 0) {
next = next_state(state);
if (state->tree) { if (state->tree) {
rb_erase(&state->rb_node, &tree->state); rb_erase(&state->rb_node, &tree->state);
state->tree = NULL; state->tree = NULL;
...@@ -436,8 +445,9 @@ static int clear_state_bit(struct extent_io_tree *tree, ...@@ -436,8 +445,9 @@ static int clear_state_bit(struct extent_io_tree *tree,
} }
} else { } else {
merge_state(tree, state); merge_state(tree, state);
next = next_state(state);
} }
return ret; return next;
} }
static struct extent_state * static struct extent_state *
...@@ -476,7 +486,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -476,7 +486,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state *state; struct extent_state *state;
struct extent_state *cached; struct extent_state *cached;
struct extent_state *prealloc = NULL; struct extent_state *prealloc = NULL;
struct rb_node *next_node;
struct rb_node *node; struct rb_node *node;
u64 last_end; u64 last_end;
int err; int err;
...@@ -528,14 +537,11 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -528,14 +537,11 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
WARN_ON(state->end < start); WARN_ON(state->end < start);
last_end = state->end; last_end = state->end;
if (state->end < end && !need_resched())
next_node = rb_next(&state->rb_node);
else
next_node = NULL;
/* the state doesn't have the wanted bits, go ahead */ /* the state doesn't have the wanted bits, go ahead */
if (!(state->state & bits)) if (!(state->state & bits)) {
state = next_state(state);
goto next; goto next;
}
/* /*
* | ---- desired range ---- | * | ---- desired range ---- |
...@@ -593,16 +599,13 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, ...@@ -593,16 +599,13 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
goto out; goto out;
} }
clear_state_bit(tree, state, &bits, wake); state = clear_state_bit(tree, state, &bits, wake);
next: next:
if (last_end == (u64)-1) if (last_end == (u64)-1)
goto out; goto out;
start = last_end + 1; start = last_end + 1;
if (start <= end && next_node) { if (start <= end && state && !need_resched())
state = rb_entry(next_node, struct extent_state,
rb_node);
goto hit_next; goto hit_next;
}
goto search_again; goto search_again;
out: out:
...@@ -2301,7 +2304,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) ...@@ -2301,7 +2304,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
u64 start; u64 start;
u64 end; u64 end;
int whole_page; int whole_page;
int failed_mirror; int mirror;
int ret; int ret;
if (err) if (err)
...@@ -2340,20 +2343,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err) ...@@ -2340,20 +2343,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
} }
spin_unlock(&tree->lock); spin_unlock(&tree->lock);
mirror = (int)(unsigned long)bio->bi_bdev;
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end, ret = tree->ops->readpage_end_io_hook(page, start, end,
state); state, mirror);
if (ret) if (ret)
uptodate = 0; uptodate = 0;
else else
clean_io_failure(start, page); clean_io_failure(start, page);
} }
if (!uptodate)
failed_mirror = (int)(unsigned long)bio->bi_bdev;
if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
ret = tree->ops->readpage_io_failed_hook(page, failed_mirror); ret = tree->ops->readpage_io_failed_hook(page, mirror);
if (!ret && !err && if (!ret && !err &&
test_bit(BIO_UPTODATE, &bio->bi_flags)) test_bit(BIO_UPTODATE, &bio->bi_flags))
uptodate = 1; uptodate = 1;
...@@ -2368,8 +2369,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) ...@@ -2368,8 +2369,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
* can't handle the error it will return -EIO and we * can't handle the error it will return -EIO and we
* remain responsible for that page. * remain responsible for that page.
*/ */
ret = bio_readpage_error(bio, page, start, end, ret = bio_readpage_error(bio, page, start, end, mirror, NULL);
failed_mirror, NULL);
if (ret == 0) { if (ret == 0) {
uptodate = uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags); test_bit(BIO_UPTODATE, &bio->bi_flags);
...@@ -4462,7 +4462,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, ...@@ -4462,7 +4462,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
} }
clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
eb->failed_mirror = 0; eb->read_mirror = 0;
atomic_set(&eb->io_pages, num_reads); atomic_set(&eb->io_pages, num_reads);
for (i = start_i; i < num_pages; i++) { for (i = start_i; i < num_pages; i++) {
page = extent_buffer_page(eb, i); page = extent_buffer_page(eb, i);
......
...@@ -79,7 +79,7 @@ struct extent_io_ops { ...@@ -79,7 +79,7 @@ struct extent_io_ops {
u64 start, u64 end, u64 start, u64 end,
struct extent_state *state); struct extent_state *state);
int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state); struct extent_state *state, int mirror);
int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
struct extent_state *state, int uptodate); struct extent_state *state, int uptodate);
void (*set_bit_hook)(struct inode *inode, struct extent_state *state, void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
...@@ -135,7 +135,7 @@ struct extent_buffer { ...@@ -135,7 +135,7 @@ struct extent_buffer {
spinlock_t refs_lock; spinlock_t refs_lock;
atomic_t refs; atomic_t refs;
atomic_t io_pages; atomic_t io_pages;
int failed_mirror; int read_mirror;
struct list_head leak_list; struct list_head leak_list;
struct rcu_head rcu_head; struct rcu_head rcu_head;
pid_t lock_owner; pid_t lock_owner;
......
...@@ -567,6 +567,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, ...@@ -567,6 +567,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
int extent_type; int extent_type;
int recow; int recow;
int ret; int ret;
int modify_tree = -1;
if (drop_cache) if (drop_cache)
btrfs_drop_extent_cache(inode, start, end - 1, 0); btrfs_drop_extent_cache(inode, start, end - 1, 0);
...@@ -575,10 +576,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, ...@@ -575,10 +576,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
if (!path) if (!path)
return -ENOMEM; return -ENOMEM;
if (start >= BTRFS_I(inode)->disk_i_size)
modify_tree = 0;
while (1) { while (1) {
recow = 0; recow = 0;
ret = btrfs_lookup_file_extent(trans, root, path, ino, ret = btrfs_lookup_file_extent(trans, root, path, ino,
search_start, -1); search_start, modify_tree);
if (ret < 0) if (ret < 0)
break; break;
if (ret > 0 && path->slots[0] > 0 && search_start == start) { if (ret > 0 && path->slots[0] > 0 && search_start == start) {
...@@ -634,7 +638,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, ...@@ -634,7 +638,8 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
} }
search_start = max(key.offset, start); search_start = max(key.offset, start);
if (recow) { if (recow || !modify_tree) {
modify_tree = -1;
btrfs_release_path(path); btrfs_release_path(path);
continue; continue;
} }
......
...@@ -1947,7 +1947,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, ...@@ -1947,7 +1947,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
* extent_io.c will try to find good copies for us. * extent_io.c will try to find good copies for us.
*/ */
static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state) struct extent_state *state, int mirror)
{ {
size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
struct inode *inode = page->mapping->host; struct inode *inode = page->mapping->host;
...@@ -4069,7 +4069,7 @@ static struct inode *new_simple_dir(struct super_block *s, ...@@ -4069,7 +4069,7 @@ static struct inode *new_simple_dir(struct super_block *s,
BTRFS_I(inode)->dummy_inode = 1; BTRFS_I(inode)->dummy_inode = 1;
inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID;
inode->i_op = &simple_dir_inode_operations; inode->i_op = &btrfs_dir_ro_inode_operations;
inode->i_fop = &simple_dir_operations; inode->i_fop = &simple_dir_operations;
inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
...@@ -4140,14 +4140,18 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) ...@@ -4140,14 +4140,18 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
static int btrfs_dentry_delete(const struct dentry *dentry) static int btrfs_dentry_delete(const struct dentry *dentry)
{ {
struct btrfs_root *root; struct btrfs_root *root;
struct inode *inode = dentry->d_inode;
if (!dentry->d_inode && !IS_ROOT(dentry)) if (!inode && !IS_ROOT(dentry))
dentry = dentry->d_parent; inode = dentry->d_parent->d_inode;
if (dentry->d_inode) { if (inode) {
root = BTRFS_I(dentry->d_inode)->root; root = BTRFS_I(inode)->root;
if (btrfs_root_refs(&root->root_item) == 0) if (btrfs_root_refs(&root->root_item) == 0)
return 1; return 1;
if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
return 1;
} }
return 0; return 0;
} }
...@@ -4188,7 +4192,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, ...@@ -4188,7 +4192,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
struct btrfs_path *path; struct btrfs_path *path;
struct list_head ins_list; struct list_head ins_list;
struct list_head del_list; struct list_head del_list;
struct qstr q;
int ret; int ret;
struct extent_buffer *leaf; struct extent_buffer *leaf;
int slot; int slot;
...@@ -4279,7 +4282,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, ...@@ -4279,7 +4282,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
while (di_cur < di_total) { while (di_cur < di_total) {
struct btrfs_key location; struct btrfs_key location;
struct dentry *tmp;
if (verify_dir_item(root, leaf, di)) if (verify_dir_item(root, leaf, di))
break; break;
...@@ -4300,35 +4302,15 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, ...@@ -4300,35 +4302,15 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
btrfs_dir_item_key_to_cpu(leaf, di, &location); btrfs_dir_item_key_to_cpu(leaf, di, &location);
q.name = name_ptr;
q.len = name_len;
q.hash = full_name_hash(q.name, q.len);
tmp = d_lookup(filp->f_dentry, &q);
if (!tmp) {
struct btrfs_key *newkey;
newkey = kzalloc(sizeof(struct btrfs_key),
GFP_NOFS);
if (!newkey)
goto no_dentry;
tmp = d_alloc(filp->f_dentry, &q);
if (!tmp) {
kfree(newkey);
dput(tmp);
goto no_dentry;
}
memcpy(newkey, &location,
sizeof(struct btrfs_key));
tmp->d_fsdata = newkey;
tmp->d_flags |= DCACHE_NEED_LOOKUP;
d_rehash(tmp);
dput(tmp);
} else {
dput(tmp);
}
no_dentry:
/* is this a reference to our own snapshot? If so /* is this a reference to our own snapshot? If so
* skip it * skip it.
*
* In contrast to old kernels, we insert the snapshot's
* dir item and dir index after it has been created, so
* we won't find a reference to our own snapshot. We
* still keep the following code for backward
* compatibility.
*/ */
if (location.type == BTRFS_ROOT_ITEM_KEY && if (location.type == BTRFS_ROOT_ITEM_KEY &&
location.objectid == root->root_key.objectid) { location.objectid == root->root_key.objectid) {
......
...@@ -2262,7 +2262,10 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) ...@@ -2262,7 +2262,10 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
di_args->bytes_used = dev->bytes_used; di_args->bytes_used = dev->bytes_used;
di_args->total_bytes = dev->total_bytes; di_args->total_bytes = dev->total_bytes;
memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid));
strncpy(di_args->path, dev->name, sizeof(di_args->path)); if (dev->name)
strncpy(di_args->path, dev->name, sizeof(di_args->path));
else
di_args->path[0] = '\0';
out: out:
if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args)))
......
...@@ -250,14 +250,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, ...@@ -250,14 +250,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
struct btrfs_bio *bbio) struct btrfs_bio *bbio)
{ {
int ret; int ret;
int looped = 0;
struct reada_zone *zone; struct reada_zone *zone;
struct btrfs_block_group_cache *cache = NULL; struct btrfs_block_group_cache *cache = NULL;
u64 start; u64 start;
u64 end; u64 end;
int i; int i;
again:
zone = NULL; zone = NULL;
spin_lock(&fs_info->reada_lock); spin_lock(&fs_info->reada_lock);
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
...@@ -274,9 +272,6 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, ...@@ -274,9 +272,6 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
} }
if (looped)
return NULL;
cache = btrfs_lookup_block_group(fs_info, logical); cache = btrfs_lookup_block_group(fs_info, logical);
if (!cache) if (!cache)
return NULL; return NULL;
...@@ -307,13 +302,15 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, ...@@ -307,13 +302,15 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
ret = radix_tree_insert(&dev->reada_zones, ret = radix_tree_insert(&dev->reada_zones,
(unsigned long)(zone->end >> PAGE_CACHE_SHIFT), (unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
zone); zone);
spin_unlock(&fs_info->reada_lock);
if (ret) { if (ret == -EEXIST) {
kfree(zone); kfree(zone);
looped = 1; ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
goto again; logical >> PAGE_CACHE_SHIFT, 1);
if (ret == 1)
kref_get(&zone->refcnt);
} }
spin_unlock(&fs_info->reada_lock);
return zone; return zone;
} }
...@@ -323,26 +320,26 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -323,26 +320,26 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
struct btrfs_key *top, int level) struct btrfs_key *top, int level)
{ {
int ret; int ret;
int looped = 0;
struct reada_extent *re = NULL; struct reada_extent *re = NULL;
struct reada_extent *re_exist = NULL;
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
struct btrfs_bio *bbio = NULL; struct btrfs_bio *bbio = NULL;
struct btrfs_device *dev; struct btrfs_device *dev;
struct btrfs_device *prev_dev;
u32 blocksize; u32 blocksize;
u64 length; u64 length;
int nzones = 0; int nzones = 0;
int i; int i;
unsigned long index = logical >> PAGE_CACHE_SHIFT; unsigned long index = logical >> PAGE_CACHE_SHIFT;
again:
spin_lock(&fs_info->reada_lock); spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index); re = radix_tree_lookup(&fs_info->reada_tree, index);
if (re) if (re)
kref_get(&re->refcnt); kref_get(&re->refcnt);
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
if (re || looped) if (re)
return re; return re;
re = kzalloc(sizeof(*re), GFP_NOFS); re = kzalloc(sizeof(*re), GFP_NOFS);
...@@ -398,16 +395,31 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -398,16 +395,31 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
/* insert extent in reada_tree + all per-device trees, all or nothing */ /* insert extent in reada_tree + all per-device trees, all or nothing */
spin_lock(&fs_info->reada_lock); spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&fs_info->reada_tree, index, re); ret = radix_tree_insert(&fs_info->reada_tree, index, re);
if (ret == -EEXIST) {
re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
BUG_ON(!re_exist);
kref_get(&re_exist->refcnt);
spin_unlock(&fs_info->reada_lock);
goto error;
}
if (ret) { if (ret) {
spin_unlock(&fs_info->reada_lock); spin_unlock(&fs_info->reada_lock);
if (ret != -ENOMEM) {
/* someone inserted the extent in the meantime */
looped = 1;
}
goto error; goto error;
} }
prev_dev = NULL;
for (i = 0; i < nzones; ++i) { for (i = 0; i < nzones; ++i) {
dev = bbio->stripes[i].dev; dev = bbio->stripes[i].dev;
if (dev == prev_dev) {
/*
* in case of DUP, just add the first zone. As both
* are on the same device, there's nothing to gain
* from adding both.
* Also, it wouldn't work, as the tree is per device
* and adding would fail with EEXIST
*/
continue;
}
prev_dev = dev;
ret = radix_tree_insert(&dev->reada_extents, index, re); ret = radix_tree_insert(&dev->reada_extents, index, re);
if (ret) { if (ret) {
while (--i >= 0) { while (--i >= 0) {
...@@ -450,9 +462,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, ...@@ -450,9 +462,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
} }
kfree(bbio); kfree(bbio);
kfree(re); kfree(re);
if (looped) return re_exist;
goto again;
return NULL;
} }
static void reada_kref_dummy(struct kref *kr) static void reada_kref_dummy(struct kref *kr)
......
...@@ -1279,7 +1279,9 @@ static int __update_reloc_root(struct btrfs_root *root, int del) ...@@ -1279,7 +1279,9 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
if (rb_node) if (rb_node)
backref_tree_panic(rb_node, -EEXIST, node->bytenr); backref_tree_panic(rb_node, -EEXIST, node->bytenr);
} else { } else {
spin_lock(&root->fs_info->trans_lock);
list_del_init(&root->root_list); list_del_init(&root->root_list);
spin_unlock(&root->fs_info->trans_lock);
kfree(node); kfree(node);
} }
return 0; return 0;
...@@ -3811,7 +3813,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) ...@@ -3811,7 +3813,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5); ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5);
if (ret < 0) { if (ret < 0) {
if (ret != -EAGAIN) { if (ret != -ENOSPC) {
err = ret; err = ret;
WARN_ON(1); WARN_ON(1);
break; break;
......
...@@ -1257,12 +1257,6 @@ static int scrub_checksum_data(struct scrub_block *sblock) ...@@ -1257,12 +1257,6 @@ static int scrub_checksum_data(struct scrub_block *sblock)
if (memcmp(csum, on_disk_csum, sdev->csum_size)) if (memcmp(csum, on_disk_csum, sdev->csum_size))
fail = 1; fail = 1;
if (fail) {
spin_lock(&sdev->stat_lock);
++sdev->stat.csum_errors;
spin_unlock(&sdev->stat_lock);
}
return fail; return fail;
} }
...@@ -1335,15 +1329,6 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) ...@@ -1335,15 +1329,6 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size)) if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size))
++crc_fail; ++crc_fail;
if (crc_fail || fail) {
spin_lock(&sdev->stat_lock);
if (crc_fail)
++sdev->stat.csum_errors;
if (fail)
++sdev->stat.verify_errors;
spin_unlock(&sdev->stat_lock);
}
return fail || crc_fail; return fail || crc_fail;
} }
......
...@@ -815,7 +815,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) ...@@ -815,7 +815,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return 0; return 0;
} }
btrfs_start_delalloc_inodes(root, 0);
btrfs_wait_ordered_extents(root, 0, 0); btrfs_wait_ordered_extents(root, 0, 0);
trans = btrfs_start_transaction(root, 0); trans = btrfs_start_transaction(root, 0);
...@@ -1148,13 +1147,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) ...@@ -1148,13 +1147,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (ret) if (ret)
goto restore; goto restore;
} else { } else {
if (fs_info->fs_devices->rw_devices == 0) if (fs_info->fs_devices->rw_devices == 0) {
ret = -EACCES; ret = -EACCES;
goto restore; goto restore;
}
if (btrfs_super_log_root(fs_info->super_copy) != 0) if (btrfs_super_log_root(fs_info->super_copy) != 0) {
ret = -EINVAL; ret = -EINVAL;
goto restore; goto restore;
}
ret = btrfs_cleanup_fs_roots(fs_info); ret = btrfs_cleanup_fs_roots(fs_info);
if (ret) if (ret)
......
...@@ -73,8 +73,10 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail) ...@@ -73,8 +73,10 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
cur_trans = root->fs_info->running_transaction; cur_trans = root->fs_info->running_transaction;
if (cur_trans) { if (cur_trans) {
if (cur_trans->aborted) if (cur_trans->aborted) {
spin_unlock(&root->fs_info->trans_lock);
return cur_trans->aborted; return cur_trans->aborted;
}
atomic_inc(&cur_trans->use_count); atomic_inc(&cur_trans->use_count);
atomic_inc(&cur_trans->num_writers); atomic_inc(&cur_trans->num_writers);
cur_trans->num_joined++; cur_trans->num_joined++;
...@@ -1400,6 +1402,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -1400,6 +1402,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
ret = commit_fs_roots(trans, root); ret = commit_fs_roots(trans, root);
if (ret) { if (ret) {
mutex_unlock(&root->fs_info->tree_log_mutex); mutex_unlock(&root->fs_info->tree_log_mutex);
mutex_unlock(&root->fs_info->reloc_mutex);
goto cleanup_transaction; goto cleanup_transaction;
} }
...@@ -1411,6 +1414,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ...@@ -1411,6 +1414,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
ret = commit_cowonly_roots(trans, root); ret = commit_cowonly_roots(trans, root);
if (ret) { if (ret) {
mutex_unlock(&root->fs_info->tree_log_mutex); mutex_unlock(&root->fs_info->tree_log_mutex);
mutex_unlock(&root->fs_info->reloc_mutex);
goto cleanup_transaction; goto cleanup_transaction;
} }
......
...@@ -3324,12 +3324,14 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ...@@ -3324,12 +3324,14 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
stripe_size = devices_info[ndevs-1].max_avail; stripe_size = devices_info[ndevs-1].max_avail;
num_stripes = ndevs * dev_stripes; num_stripes = ndevs * dev_stripes;
if (stripe_size * num_stripes > max_chunk_size * ncopies) { if (stripe_size * ndevs > max_chunk_size * ncopies) {
stripe_size = max_chunk_size * ncopies; stripe_size = max_chunk_size * ncopies;
do_div(stripe_size, num_stripes); do_div(stripe_size, ndevs);
} }
do_div(stripe_size, dev_stripes); do_div(stripe_size, dev_stripes);
/* align to BTRFS_STRIPE_LEN */
do_div(stripe_size, BTRFS_STRIPE_LEN); do_div(stripe_size, BTRFS_STRIPE_LEN);
stripe_size *= BTRFS_STRIPE_LEN; stripe_size *= BTRFS_STRIPE_LEN;
...@@ -3805,10 +3807,11 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, ...@@ -3805,10 +3807,11 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
else if (mirror_num) else if (mirror_num)
stripe_index += mirror_num - 1; stripe_index += mirror_num - 1;
else { else {
int old_stripe_index = stripe_index;
stripe_index = find_live_mirror(map, stripe_index, stripe_index = find_live_mirror(map, stripe_index,
map->sub_stripes, stripe_index + map->sub_stripes, stripe_index +
current->pid % map->sub_stripes); current->pid % map->sub_stripes);
mirror_num = stripe_index + 1; mirror_num = stripe_index - old_stripe_index + 1;
} }
} else { } else {
/* /*
...@@ -4350,8 +4353,10 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid) ...@@ -4350,8 +4353,10 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
ret = __btrfs_open_devices(fs_devices, FMODE_READ, ret = __btrfs_open_devices(fs_devices, FMODE_READ,
root->fs_info->bdev_holder); root->fs_info->bdev_holder);
if (ret) if (ret) {
free_fs_devices(fs_devices);
goto out; goto out;
}
if (!fs_devices->seeding) { if (!fs_devices->seeding) {
__btrfs_close_devices(fs_devices); __btrfs_close_devices(fs_devices);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment