Commit 925baedd authored by Chris Mason's avatar Chris Mason

Btrfs: Start btree concurrency work.

The allocation trees and the chunk trees are serialized via their own
dedicated mutexes.  This means allocation location is still not very
fine grained.

The main FS btree is protected by locks on each block in the btree.  Locks
are taken top / down, and as processing finishes on a given level of the
tree, the lock is released after locking the lower level.

The end result of a search is now a path where only the lowest level
is locked.  Releasing or freeing the path drops any locks held.
Signed-off-by: default avatarChris Mason <chris.mason@oracle.com>
parent 1cc127b5
......@@ -6,7 +6,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
hash.o file-item.o inode-item.o inode-map.o disk-io.o \
transaction.o bit-radix.o inode.o file.o tree-defrag.o \
extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
extent_io.o volumes.o async-thread.o ioctl.o
extent_io.o volumes.o async-thread.o ioctl.o locking.o
btrfs-$(CONFIG_FS_POSIX_ACL) += acl.o
else
......
This diff is collapsed.
......@@ -330,8 +330,13 @@ struct btrfs_node {
struct btrfs_path {
struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
int slots[BTRFS_MAX_LEVEL];
/* if there is real range locking, this locks field will change */
int locks[BTRFS_MAX_LEVEL];
int reada;
/* keep some upper locks as we walk down */
int keep_locks;
int lowest_level;
int skip_locking;
};
/*
......@@ -515,6 +520,8 @@ struct btrfs_fs_info {
spinlock_t hash_lock;
struct mutex trans_mutex;
struct mutex fs_mutex;
struct mutex alloc_mutex;
struct mutex chunk_mutex;
struct list_head trans_list;
struct list_head hashers;
struct list_head dead_roots;
......@@ -576,6 +583,10 @@ struct btrfs_fs_info {
*/
struct btrfs_root {
struct extent_buffer *node;
/* the node lock is held while changing the node pointer */
spinlock_t node_lock;
struct extent_buffer *commit_root;
struct btrfs_root_item root_item;
struct btrfs_key root_key;
......@@ -1353,13 +1364,7 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
struct btrfs_block_group_cache
*hint, u64 search_start,
int data, int owner);
int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 owner_objectid);
struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 size,
u64 root_objectid,
u64 hint, u64 empty_size);
struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u32 blocksize,
u64 root_objectid,
......@@ -1368,8 +1373,6 @@ struct extent_buffer *__btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
int level,
u64 hint,
u64 empty_size);
int btrfs_grow_extent_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 new_size);
int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size);
int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
......@@ -1409,6 +1412,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
int btrfs_previous_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid,
int type);
struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
int btrfs_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
struct extent_buffer *parent, int parent_slot,
......
......@@ -32,6 +32,7 @@
#include "volumes.h"
#include "print-tree.h"
#include "async-thread.h"
#include "locking.h"
#if 0
static int check_tree_block(struct btrfs_root *root, struct extent_buffer *buf)
......@@ -681,9 +682,11 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
{
struct inode *btree_inode = root->fs_info->btree_inode;
if (btrfs_header_generation(buf) ==
root->fs_info->running_transaction->transid)
root->fs_info->running_transaction->transid) {
WARN_ON(!btrfs_tree_locked(buf));
clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
buf);
}
return 0;
}
......@@ -720,6 +723,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->in_sysfs = 0;
INIT_LIST_HEAD(&root->dirty_list);
spin_lock_init(&root->node_lock);
memset(&root->root_key, 0, sizeof(root->root_key));
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
......@@ -1196,6 +1200,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
mutex_init(&fs_info->trans_mutex);
mutex_init(&fs_info->fs_mutex);
mutex_init(&fs_info->alloc_mutex);
mutex_init(&fs_info->chunk_mutex);
#if 0
ret = add_hasher(fs_info, "crc32c");
......@@ -1274,7 +1280,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
mutex_lock(&fs_info->fs_mutex);
mutex_lock(&fs_info->chunk_mutex);
ret = btrfs_read_sys_array(tree_root);
mutex_unlock(&fs_info->chunk_mutex);
if (ret) {
printk("btrfs: failed to read the system array on %s\n",
sb->s_id);
......@@ -1296,7 +1304,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
(unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
BTRFS_UUID_SIZE);
mutex_lock(&fs_info->chunk_mutex);
ret = btrfs_read_chunk_tree(chunk_root);
mutex_unlock(&fs_info->chunk_mutex);
BUG_ON(ret);
btrfs_close_extra_devices(fs_devices);
......@@ -1654,6 +1664,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
u64 transid = btrfs_header_generation(buf);
struct inode *btree_inode = root->fs_info->btree_inode;
WARN_ON(!btrfs_tree_locked(buf));
if (transid != root->fs_info->generation) {
printk(KERN_CRIT "transid mismatch buffer %llu, found %Lu running %Lu\n",
(unsigned long long)buf->start,
......
This diff is collapsed.
......@@ -2889,7 +2889,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
for (i = 0; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
lock_page(page);
if (i == 0)
set_page_extent_head(page, eb->len);
else
......@@ -2907,7 +2906,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
end = start + PAGE_CACHE_SIZE - 1;
if (test_range_bit(tree, start, end,
EXTENT_DIRTY, 0)) {
unlock_page(page);
continue;
}
}
......@@ -2919,7 +2917,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
PAGECACHE_TAG_DIRTY);
}
read_unlock_irq(&page->mapping->tree_lock);
unlock_page(page);
}
return 0;
}
......@@ -2948,17 +2945,12 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
* on us if the page isn't already dirty.
*/
if (i == 0) {
lock_page(page);
set_page_extent_head(page, eb->len);
} else if (PagePrivate(page) &&
page->private != EXTENT_PAGE_PRIVATE) {
lock_page(page);
set_page_extent_mapped(page);
unlock_page(page);
}
__set_page_dirty_nobuffers(extent_buffer_page(eb, i));
if (i == 0)
unlock_page(page);
}
return set_extent_dirty(tree, eb->start,
eb->start + eb->len - 1, GFP_NOFS);
......
......@@ -115,6 +115,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
trans = btrfs_start_transaction(root, 1);
BUG_ON(!trans);
btrfs_set_trans_block_group(trans, inode);
mutex_unlock(&root->fs_info->fs_mutex);
num_bytes = (end - start + blocksize) & ~(blocksize - 1);
num_bytes = max(blocksize, num_bytes);
......@@ -159,6 +160,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
btrfs_add_ordered_inode(inode);
btrfs_update_inode(trans, root, inode);
out:
mutex_lock(&root->fs_info->fs_mutex);
btrfs_end_transaction(trans, root);
return ret;
}
......@@ -349,10 +351,12 @@ int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
mutex_lock(&root->fs_info->fs_mutex);
trans = btrfs_start_transaction(root, 1);
mutex_unlock(&root->fs_info->fs_mutex);
btrfs_set_trans_block_group(trans, inode);
btrfs_csum_file_blocks(trans, root, inode, bio, sums);
mutex_lock(&root->fs_info->fs_mutex);
ret = btrfs_end_transaction(trans, root);
BUG_ON(ret);
mutex_unlock(&root->fs_info->fs_mutex);
......@@ -807,6 +811,7 @@ static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
goto err;
}
ret = btrfs_delete_one_dir_name(trans, root, path, di);
btrfs_release_path(root, path);
dentry->d_inode->i_ctime = dir->i_ctime;
ret = btrfs_del_inode_ref(trans, root, name, name_len,
......@@ -881,8 +886,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
struct btrfs_trans_handle *trans;
unsigned long nr = 0;
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
return -ENOTEMPTY;
}
mutex_lock(&root->fs_info->fs_mutex);
ret = btrfs_check_free_space(root, 1, 1);
......
......@@ -43,6 +43,7 @@
#include "ioctl.h"
#include "print-tree.h"
#include "volumes.h"
#include "locking.h"
......@@ -75,9 +76,9 @@ static noinline int create_subvol(struct btrfs_root *root, char *name,
if (ret)
goto fail;
leaf = __btrfs_alloc_free_block(trans, root, root->leafsize,
objectid, trans->transid, 0, 0,
0, 0);
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
objectid, trans->transid, 0, 0,
0, 0);
if (IS_ERR(leaf))
return PTR_ERR(leaf);
......@@ -108,6 +109,7 @@ static noinline int create_subvol(struct btrfs_root *root, char *name,
memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
root_item.drop_level = 0;
btrfs_tree_unlock(leaf);
free_extent_buffer(leaf);
leaf = NULL;
......
/*
* Copyright (C) 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <linux/sched.h>
#include <linux/gfp.h>
#include <linux/pagemap.h>
#include <linux/spinlock.h>
#include <linux/page-flags.h>
#include <linux/bug.h>
#include "ctree.h"
#include "extent_io.h"
#include "locking.h"
int btrfs_tree_lock(struct extent_buffer *eb)
{
lock_page(eb->first_page);
return 0;
}
int btrfs_try_tree_lock(struct extent_buffer *eb)
{
return TestSetPageLocked(eb->first_page);
}
int btrfs_tree_unlock(struct extent_buffer *eb)
{
WARN_ON(!PageLocked(eb->first_page));
unlock_page(eb->first_page);
return 0;
}
int btrfs_tree_locked(struct extent_buffer *eb)
{
return PageLocked(eb->first_page);
}
/*
* Copyright (C) 2008 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#ifndef __BTRFS_LOCKING_
#define __BTRFS_LOCKING_
int btrfs_tree_lock(struct extent_buffer *eb);
int btrfs_tree_unlock(struct extent_buffer *eb);
int btrfs_tree_locked(struct extent_buffer *eb);
int btrfs_try_tree_lock(struct extent_buffer *eb);
#endif
......@@ -23,6 +23,7 @@
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
#include "locking.h"
static int total_trans = 0;
extern struct kmem_cache *btrfs_trans_handle_cachep;
......@@ -96,8 +97,7 @@ static noinline int record_root_in_trans(struct btrfs_root *root)
radix_tree_tag_set(&root->fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
BTRFS_ROOT_DEFRAG_TAG);
root->commit_root = root->node;
extent_buffer_get(root->node);
root->commit_root = btrfs_root_node(root);
} else {
WARN_ON(1);
}
......@@ -559,6 +559,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root = fs_info->tree_root;
struct btrfs_root *root = pending->root;
struct extent_buffer *tmp;
struct extent_buffer *old;
int ret;
int namelen;
u64 objectid;
......@@ -578,16 +579,18 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
key.offset = 1;
btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
extent_buffer_get(root->node);
btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
free_extent_buffer(tmp);
old = btrfs_lock_root_node(root);
btrfs_cow_block(trans, root, old, NULL, 0, &old);
btrfs_copy_root(trans, root, root->node, &tmp, objectid);
btrfs_copy_root(trans, root, old, &tmp, objectid);
btrfs_tree_unlock(old);
free_extent_buffer(old);
btrfs_set_root_bytenr(new_root_item, tmp->start);
btrfs_set_root_level(new_root_item, btrfs_header_level(tmp));
ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
new_root_item);
btrfs_tree_unlock(tmp);
free_extent_buffer(tmp);
if (ret)
goto fail;
......
......@@ -181,6 +181,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
if (root->fs_info->extent_root == root)
is_extent = 1;
goto out;
if (root->ref_cows == 0 && !is_extent)
goto out;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment