Commit e2aed8df authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull large btrfs update from Chris Mason:
 "This pull request is very large, and the two main features in here
  have been under testing/devel for quite a while.

  We have subvolume quotas from the strato developers.  This enables
  full tracking of how many blocks are allocated to each subvolume (and
  all snapshots) and you can set limits on a per-subvolume basis.  You
  can also create quota groups and toss multiple subvolumes into a big
  group.  It's everything you need to be a web hosting company and give
  each user their own subvolume.

  The userland side of the quotas is being refreshed, they'll send out
  details on where to grab it soon.

  Next is the kernel side of btrfs send/receive from Alexander Block.
  This leverages the same infrastructure as the quota code to figure out
  relationships between blocks and their owners.  It can then compute
  the difference between two snapshots and sends the diffs in a neutral
  format into userland.

  The basic model:

        create a snapshot
        send that snapshot as the initial backup
        make changes
        create a second snapshot
        send the incremental as a backup
        delete the first snapshot
        (use the second snapshot for the next incremental)

  The receive portion is all in userland, and in the 'next' branch of my
  btrfs-progs repo.

  There's still some work to do in terms of optimizing the send side
  from kernel to userland.  The really important part is figuring out
  how two snapshots are different, and this is where we are
  concentrating right now.  The initial send of a dataset is a little
  slower than tar, but the incremental sends are dramatically faster
  than what rsync can do.

  On top of all of that, we have a nice queue of fixes, cleanups and
  optimizations."

Fix up trivial modify/del conflict in fs/btrfs/ioctl.c

Also fix up semantic conflict in fs/btrfs/send.c: the interface to
dentry_open() changed in commit 765927b2 ("switch dentry_open() to
struct path, make it grab references itself"), and since it now grabs
whatever references it needs, we should no longer do the mntget() on the
mnt (and we need to dput() the dentry reference we took).

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (65 commits)
  Btrfs: uninit variable fixes in send/receive
  Btrfs: introduce BTRFS_IOC_SEND for btrfs send/receive
  Btrfs: add btrfs_compare_trees function
  Btrfs: introduce subvol uuids and times
  Btrfs: make iref_to_path non static
  Btrfs: add a barrier before a waitqueue_active check
  Btrfs: call the ordered free operation without any locks held
  Btrfs: Check INCOMPAT flags on remount and add helper function
  Btrfs: add helper for tree enumeration
  btrfs: allow cross-subvolume file clone
  Btrfs: improve multi-thread buffer read
  Btrfs: make btrfs's allocation smoothly with preallocation
  Btrfs: lock the transition from dirty to writeback for an eb
  Btrfs: fix potential race in extent buffer freeing
  Btrfs: don't return true in releasepage unless we actually freed the eb
  Btrfs: suppress printk() if all device I/O stats are zero
  Btrfs: remove unwanted printk() for btrfs device I/O stats
  Btrfs: rewrite BTRFS_SETGET_FUNCS
  Btrfs: zero unused bytes in inode item
  Btrfs: kill free_space pointer from inode structure
  ...

Conflicts:
	fs/btrfs/ioctl.c
parents 47652500 b24baf69
......@@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
export.o tree-log.o free-space-cache.o zlib.o lzo.o \
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
reada.o backref.o ulist.o
reada.o backref.o ulist.o qgroup.o send.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
......@@ -206,10 +206,17 @@ static noinline void run_ordered_completions(struct btrfs_workers *workers,
work->ordered_func(work);
/* now take the lock again and call the freeing code */
/* now take the lock again and drop our item from the list */
spin_lock(&workers->order_lock);
list_del(&work->order_list);
spin_unlock(&workers->order_lock);
/*
* we don't want to call the ordered free functions
* with the lock held though
*/
work->ordered_free(work);
spin_lock(&workers->order_lock);
}
spin_unlock(&workers->order_lock);
......
......@@ -773,9 +773,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
*/
static int find_parent_nodes(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 delayed_ref_seq, u64 time_seq,
struct ulist *refs, struct ulist *roots,
const u64 *extent_item_pos)
u64 time_seq, struct ulist *refs,
struct ulist *roots, const u64 *extent_item_pos)
{
struct btrfs_key key;
struct btrfs_path *path;
......@@ -837,7 +836,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
btrfs_put_delayed_ref(&head->node);
goto again;
}
ret = __add_delayed_refs(head, delayed_ref_seq,
ret = __add_delayed_refs(head, time_seq,
&prefs_delayed);
mutex_unlock(&head->mutex);
if (ret) {
......@@ -981,8 +980,7 @@ static void free_leaf_list(struct ulist *blocks)
*/
static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 delayed_ref_seq, u64 time_seq,
struct ulist **leafs,
u64 time_seq, struct ulist **leafs,
const u64 *extent_item_pos)
{
struct ulist *tmp;
......@@ -997,7 +995,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
return -ENOMEM;
}
ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq,
ret = find_parent_nodes(trans, fs_info, bytenr,
time_seq, *leafs, tmp, extent_item_pos);
ulist_free(tmp);
......@@ -1024,8 +1022,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
*/
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 delayed_ref_seq, u64 time_seq,
struct ulist **roots)
u64 time_seq, struct ulist **roots)
{
struct ulist *tmp;
struct ulist_node *node = NULL;
......@@ -1043,7 +1040,7 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
ULIST_ITER_INIT(&uiter);
while (1) {
ret = find_parent_nodes(trans, fs_info, bytenr, delayed_ref_seq,
ret = find_parent_nodes(trans, fs_info, bytenr,
time_seq, tmp, *roots, NULL);
if (ret < 0 && ret != -ENOENT) {
ulist_free(tmp);
......@@ -1125,10 +1122,10 @@ static int inode_ref_info(u64 inum, u64 ioff, struct btrfs_root *fs_root,
* required for the path to fit into the buffer. in that case, the returned
* value will be smaller than dest. callers must check this!
*/
static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
struct btrfs_inode_ref *iref,
struct extent_buffer *eb_in, u64 parent,
char *dest, u32 size)
char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
struct btrfs_inode_ref *iref,
struct extent_buffer *eb_in, u64 parent,
char *dest, u32 size)
{
u32 len;
int slot;
......@@ -1376,11 +1373,9 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
struct ulist *roots = NULL;
struct ulist_node *ref_node = NULL;
struct ulist_node *root_node = NULL;
struct seq_list seq_elem = {};
struct seq_list tree_mod_seq_elem = {};
struct ulist_iterator ref_uiter;
struct ulist_iterator root_uiter;
struct btrfs_delayed_ref_root *delayed_refs = NULL;
pr_debug("resolving all inodes for extent %llu\n",
extent_item_objectid);
......@@ -1391,16 +1386,11 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
trans = btrfs_join_transaction(fs_info->extent_root);
if (IS_ERR(trans))
return PTR_ERR(trans);
delayed_refs = &trans->transaction->delayed_refs;
spin_lock(&delayed_refs->lock);
btrfs_get_delayed_seq(delayed_refs, &seq_elem);
spin_unlock(&delayed_refs->lock);
btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
}
ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
seq_elem.seq, tree_mod_seq_elem.seq, &refs,
tree_mod_seq_elem.seq, &refs,
&extent_item_pos);
if (ret)
goto out;
......@@ -1408,8 +1398,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
ULIST_ITER_INIT(&ref_uiter);
while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
ret = btrfs_find_all_roots(trans, fs_info, ref_node->val,
seq_elem.seq,
tree_mod_seq_elem.seq, &roots);
tree_mod_seq_elem.seq, &roots);
if (ret)
break;
ULIST_ITER_INIT(&root_uiter);
......@@ -1431,7 +1420,6 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
out:
if (!search_commit_root) {
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
btrfs_put_delayed_seq(delayed_refs, &seq_elem);
btrfs_end_transaction(trans, fs_info->extent_root);
}
......@@ -1543,7 +1531,7 @@ static int inode_to_path(u64 inum, struct btrfs_inode_ref *iref,
ipath->fspath->bytes_left - s_ptr : 0;
fspath_min = (char *)ipath->fspath->val + (i + 1) * s_ptr;
fspath = iref_to_path(ipath->fs_root, ipath->btrfs_path, iref, eb,
fspath = btrfs_iref_to_path(ipath->fs_root, ipath->btrfs_path, iref, eb,
inum, fspath_min, bytes_left);
if (IS_ERR(fspath))
return PTR_ERR(fspath);
......
......@@ -21,6 +21,7 @@
#include "ioctl.h"
#include "ulist.h"
#include "extent_io.h"
#define BTRFS_BACKREF_SEARCH_COMMIT_ROOT ((struct btrfs_trans_handle *)0)
......@@ -58,8 +59,10 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 delayed_ref_seq, u64 time_seq,
struct ulist **roots);
u64 time_seq, struct ulist **roots);
char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
struct btrfs_inode_ref *iref, struct extent_buffer *eb,
u64 parent, char *dest, u32 size);
struct btrfs_data_container *init_data_container(u32 total_bytes);
struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root,
......
......@@ -87,9 +87,6 @@ struct btrfs_inode {
/* node for the red-black tree that links inodes in subvolume root */
struct rb_node rb_node;
/* the space_info for where this inode's data allocations are done */
struct btrfs_space_info *space_info;
unsigned long runtime_flags;
/* full 64 bit generation number, struct vfs_inode doesn't have a big
......@@ -191,11 +188,14 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size)
BTRFS_I(inode)->disk_i_size = size;
}
static inline bool btrfs_is_free_space_inode(struct btrfs_root *root,
struct inode *inode)
static inline bool btrfs_is_free_space_inode(struct inode *inode)
{
if (root == root->fs_info->tree_root ||
BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
struct btrfs_root *root = BTRFS_I(inode)->root;
if (root == root->fs_info->tree_root &&
btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID)
return true;
if (BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
return true;
return false;
}
......
......@@ -1032,6 +1032,7 @@ static int btrfsic_process_metablock(
struct btrfs_disk_key *disk_key;
u8 type;
u32 item_offset;
u32 item_size;
if (disk_item_offset + sizeof(struct btrfs_item) >
sf->block_ctx->len) {
......@@ -1047,6 +1048,7 @@ static int btrfsic_process_metablock(
disk_item_offset,
sizeof(struct btrfs_item));
item_offset = le32_to_cpu(disk_item.offset);
item_size = le32_to_cpu(disk_item.size);
disk_key = &disk_item.key;
type = disk_key->type;
......@@ -1057,14 +1059,13 @@ static int btrfsic_process_metablock(
root_item_offset = item_offset +
offsetof(struct btrfs_leaf, items);
if (root_item_offset +
sizeof(struct btrfs_root_item) >
if (root_item_offset + item_size >
sf->block_ctx->len)
goto leaf_item_out_of_bounce_error;
btrfsic_read_from_block_data(
sf->block_ctx, &root_item,
root_item_offset,
sizeof(struct btrfs_root_item));
item_size);
next_bytenr = le64_to_cpu(root_item.bytenr);
sf->error =
......
This diff is collapsed.
This diff is collapsed.
......@@ -62,6 +62,7 @@ static inline void btrfs_init_delayed_node(
INIT_LIST_HEAD(&delayed_node->n_list);
INIT_LIST_HEAD(&delayed_node->p_list);
delayed_node->bytes_reserved = 0;
memset(&delayed_node->inode_item, 0, sizeof(delayed_node->inode_item));
}
static inline int btrfs_is_continuous_delayed_item(
......@@ -1113,8 +1114,8 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
* Returns < 0 on error and returns with an aborted transaction with any
* outstanding delayed items cleaned up.
*/
int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int nr)
{
struct btrfs_root *curr_root = root;
struct btrfs_delayed_root *delayed_root;
......@@ -1122,6 +1123,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct btrfs_block_rsv *block_rsv;
int ret = 0;
bool count = (nr > 0);
if (trans->aborted)
return -EIO;
......@@ -1137,7 +1139,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
delayed_root = btrfs_get_delayed_root(root);
curr_node = btrfs_first_delayed_node(delayed_root);
while (curr_node) {
while (curr_node && (!count || (count && nr--))) {
curr_root = curr_node->root;
ret = btrfs_insert_delayed_items(trans, path, curr_root,
curr_node);
......@@ -1149,6 +1151,7 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
path, curr_node);
if (ret) {
btrfs_release_delayed_node(curr_node);
curr_node = NULL;
btrfs_abort_transaction(trans, root, ret);
break;
}
......@@ -1158,12 +1161,26 @@ int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
btrfs_release_delayed_node(prev_node);
}
if (curr_node)
btrfs_release_delayed_node(curr_node);
btrfs_free_path(path);
trans->block_rsv = block_rsv;
return ret;
}
int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
return __btrfs_run_delayed_items(trans, root, -1);
}
int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int nr)
{
return __btrfs_run_delayed_items(trans, root, nr);
}
static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *node)
{
......
......@@ -107,6 +107,8 @@ int btrfs_inode_delayed_dir_index_count(struct inode *inode);
int btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int nr);
void btrfs_balance_delayed_items(struct btrfs_root *root);
......
......@@ -233,22 +233,26 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
return 0;
}
int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq)
{
struct seq_list *elem;
assert_spin_locked(&delayed_refs->lock);
if (list_empty(&delayed_refs->seq_head))
return 0;
elem = list_first_entry(&delayed_refs->seq_head, struct seq_list, list);
if (seq >= elem->seq) {
pr_debug("holding back delayed_ref %llu, lowest is %llu (%p)\n",
seq, elem->seq, delayed_refs);
return 1;
int ret = 0;
spin_lock(&fs_info->tree_mod_seq_lock);
if (!list_empty(&fs_info->tree_mod_seq_list)) {
elem = list_first_entry(&fs_info->tree_mod_seq_list,
struct seq_list, list);
if (seq >= elem->seq) {
pr_debug("holding back delayed_ref %llu, lowest is "
"%llu (%p)\n", seq, elem->seq, delayed_refs);
ret = 1;
}
}
return 0;
spin_unlock(&fs_info->tree_mod_seq_lock);
return ret;
}
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
......@@ -525,8 +529,8 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
if (is_fstree(ref_root))
seq = inc_delayed_seq(delayed_refs);
if (need_ref_seq(for_cow, ref_root))
seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
ref->seq = seq;
full_ref = btrfs_delayed_node_to_tree_ref(ref);
......@@ -584,8 +588,8 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
ref->is_head = 0;
ref->in_tree = 1;
if (is_fstree(ref_root))
seq = inc_delayed_seq(delayed_refs);
if (need_ref_seq(for_cow, ref_root))
seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
ref->seq = seq;
full_ref = btrfs_delayed_node_to_data_ref(ref);
......@@ -658,10 +662,12 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
add_delayed_tree_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, level, action,
for_cow);
if (!is_fstree(ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
if (!need_ref_seq(for_cow, ref_root) &&
waitqueue_active(&fs_info->tree_mod_seq_wait))
wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
return 0;
}
......@@ -707,10 +713,12 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
add_delayed_data_ref(fs_info, trans, &ref->node, bytenr,
num_bytes, parent, ref_root, owner, offset,
action, for_cow);
if (!is_fstree(ref_root) &&
waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
if (!need_ref_seq(for_cow, ref_root) &&
waitqueue_active(&fs_info->tree_mod_seq_wait))
wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, &ref->node, extent_op);
return 0;
}
......@@ -736,8 +744,8 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
extent_op->is_data);
if (waitqueue_active(&delayed_refs->seq_wait))
wake_up(&delayed_refs->seq_wait);
if (waitqueue_active(&fs_info->tree_mod_seq_wait))
wake_up(&fs_info->tree_mod_seq_wait);
spin_unlock(&delayed_refs->lock);
return 0;
}
......
......@@ -139,26 +139,6 @@ struct btrfs_delayed_ref_root {
int flushing;
u64 run_delayed_start;
/*
* seq number of delayed refs. We need to know if a backref was being
* added before the currently processed ref or afterwards.
*/
u64 seq;
/*
* seq_list holds a list of all seq numbers that are currently being
* added to the list. While walking backrefs (btrfs_find_all_roots,
* qgroups), which might take some time, no newer ref must be processed,
* as it might influence the outcome of the walk.
*/
struct list_head seq_head;
/*
* when the only refs we have in the list must not be processed, we want
* to wait for more refs to show up or for the end of backref walking.
*/
wait_queue_head_t seq_wait;
};
static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
......@@ -195,34 +175,28 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
struct list_head *cluster, u64 search_start);
static inline u64 inc_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs)
{
assert_spin_locked(&delayed_refs->lock);
++delayed_refs->seq;
return delayed_refs->seq;
}
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
u64 seq);
static inline void
btrfs_get_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
struct seq_list *elem)
/*
* delayed refs with a ref_seq > 0 must be held back during backref walking.
* this only applies to items in one of the fs-trees. for_cow items never need
* to be held back, so they won't get a ref_seq number.
*/
static inline int need_ref_seq(int for_cow, u64 rootid)
{
assert_spin_locked(&delayed_refs->lock);
elem->seq = delayed_refs->seq;
list_add_tail(&elem->list, &delayed_refs->seq_head);
}
if (for_cow)
return 0;
static inline void
btrfs_put_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
struct seq_list *elem)
{
spin_lock(&delayed_refs->lock);
list_del(&elem->list);
wake_up(&delayed_refs->seq_wait);
spin_unlock(&delayed_refs->lock);
}
if (rootid == BTRFS_FS_TREE_OBJECTID)
return 1;
int btrfs_check_delayed_seq(struct btrfs_delayed_ref_root *delayed_refs,
u64 seq);
if ((s64)rootid >= (s64)BTRFS_FIRST_FREE_OBJECTID)
return 1;
return 0;
}
/*
* a node might live in a head or a regular ref, this lets you
......
......@@ -407,7 +407,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
break;
}
if (failed && !ret)
if (failed && !ret && failed_mirror)
repair_eb_io_failure(root, eb, failed_mirror);
return ret;
......@@ -1182,6 +1182,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->defrag_running = 0;
root->root_key.objectid = objectid;
root->anon_dev = 0;
spin_lock_init(&root->root_times_lock);
}
static int __must_check find_and_setup_root(struct btrfs_root *tree_root,
......@@ -1225,6 +1227,82 @@ static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
return root;
}
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
u64 objectid)
{
struct extent_buffer *leaf;
struct btrfs_root *tree_root = fs_info->tree_root;
struct btrfs_root *root;
struct btrfs_key key;
int ret = 0;
u64 bytenr;
root = btrfs_alloc_root(fs_info);
if (!root)
return ERR_PTR(-ENOMEM);
__setup_root(tree_root->nodesize, tree_root->leafsize,
tree_root->sectorsize, tree_root->stripesize,
root, fs_info, objectid);
root->root_key.objectid = objectid;
root->root_key.type = BTRFS_ROOT_ITEM_KEY;
root->root_key.offset = 0;
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
0, objectid, NULL, 0, 0, 0);
if (IS_ERR(leaf)) {
ret = PTR_ERR(leaf);
goto fail;
}
bytenr = leaf->start;
memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header));
btrfs_set_header_bytenr(leaf, leaf->start);
btrfs_set_header_generation(leaf, trans->transid);
btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(leaf, objectid);
root->node = leaf;
write_extent_buffer(leaf, fs_info->fsid,
(unsigned long)btrfs_header_fsid(leaf),
BTRFS_FSID_SIZE);
write_extent_buffer(leaf, fs_info->chunk_tree_uuid,
(unsigned long)btrfs_header_chunk_tree_uuid(leaf),
BTRFS_UUID_SIZE);
btrfs_mark_buffer_dirty(leaf);
root->commit_root = btrfs_root_node(root);
root->track_dirty = 1;
root->root_item.flags = 0;
root->root_item.byte_limit = 0;
btrfs_set_root_bytenr(&root->root_item, leaf->start);
btrfs_set_root_generation(&root->root_item, trans->transid);
btrfs_set_root_level(&root->root_item, 0);
btrfs_set_root_refs(&root->root_item, 1);
btrfs_set_root_used(&root->root_item, leaf->len);
btrfs_set_root_last_snapshot(&root->root_item, 0);
btrfs_set_root_dirid(&root->root_item, 0);
root->root_item.drop_level = 0;
key.objectid = objectid;
key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = 0;
ret = btrfs_insert_root(trans, tree_root, &key, &root->root_item);
if (ret)
goto fail;
btrfs_tree_unlock(leaf);
fail:
if (ret)
return ERR_PTR(ret);
return root;
}
static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
......@@ -1326,6 +1404,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
u64 generation;
u32 blocksize;
int ret = 0;
int slot;
root = btrfs_alloc_root(fs_info);
if (!root)
......@@ -1352,9 +1431,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
if (ret == 0) {
l = path->nodes[0];
read_extent_buffer(l, &root->root_item,
btrfs_item_ptr_offset(l, path->slots[0]),
sizeof(root->root_item));
slot = path->slots[0];
btrfs_read_root_item(tree_root, l, slot, &root->root_item);
memcpy(&root->root_key, location, sizeof(*location));
}
btrfs_free_path(path);
......@@ -1396,6 +1474,9 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
return fs_info->dev_root;
if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
return fs_info->csum_root;
if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID)
return fs_info->quota_root ? fs_info->quota_root :
ERR_PTR(-ENOENT);
again:
spin_lock(&fs_info->fs_roots_radix_lock);
root = radix_tree_lookup(&fs_info->fs_roots_radix,
......@@ -1823,6 +1904,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
free_extent_buffer(info->extent_root->commit_root);
free_extent_buffer(info->csum_root->node);
free_extent_buffer(info->csum_root->commit_root);
if (info->quota_root) {
free_extent_buffer(info->quota_root->node);
free_extent_buffer(info->quota_root->commit_root);
}
info->tree_root->node = NULL;
info->tree_root->commit_root = NULL;
......@@ -1832,6 +1917,10 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
info->extent_root->commit_root = NULL;
info->csum_root->node = NULL;
info->csum_root->commit_root = NULL;
if (info->quota_root) {
info->quota_root->node = NULL;
info->quota_root->commit_root = NULL;
}
if (chunk_root) {
free_extent_buffer(info->chunk_root->node);
......@@ -1862,6 +1951,7 @@ int open_ctree(struct super_block *sb,
struct btrfs_root *csum_root;
struct btrfs_root *chunk_root;
struct btrfs_root *dev_root;
struct btrfs_root *quota_root;
struct btrfs_root *log_tree_root;
int ret;
int err = -EINVAL;
......@@ -1873,9 +1963,10 @@ int open_ctree(struct super_block *sb,
csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info);
chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info);
quota_root = fs_info->quota_root = btrfs_alloc_root(fs_info);
if (!tree_root || !extent_root || !csum_root ||
!chunk_root || !dev_root) {
!chunk_root || !dev_root || !quota_root) {
err = -ENOMEM;
goto fail;
}
......@@ -1944,6 +2035,8 @@ int open_ctree(struct super_block *sb,
fs_info->free_chunk_space = 0;
fs_info->tree_mod_log = RB_ROOT;
init_waitqueue_head(&fs_info->tree_mod_seq_wait);
/* readahead state */
INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
spin_lock_init(&fs_info->reada_lock);
......@@ -2032,6 +2125,13 @@ int open_ctree(struct super_block *sb,
init_rwsem(&fs_info->cleanup_work_sem);
init_rwsem(&fs_info->subvol_sem);
spin_lock_init(&fs_info->qgroup_lock);
fs_info->qgroup_tree = RB_ROOT;
INIT_LIST_HEAD(&fs_info->dirty_qgroups);
fs_info->qgroup_seq = 1;
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
......@@ -2244,7 +2344,7 @@ int open_ctree(struct super_block *sb,
ret |= btrfs_start_workers(&fs_info->caching_workers);
ret |= btrfs_start_workers(&fs_info->readahead_workers);
if (ret) {
ret = -ENOMEM;
err = -ENOMEM;
goto fail_sb_buffer;
}
......@@ -2356,6 +2456,17 @@ int open_ctree(struct super_block *sb,
goto recovery_tree_root;
csum_root->track_dirty = 1;
ret = find_and_setup_root(tree_root, fs_info,
BTRFS_QUOTA_TREE_OBJECTID, quota_root);
if (ret) {
kfree(quota_root);
quota_root = fs_info->quota_root = NULL;
} else {
quota_root->track_dirty = 1;
fs_info->quota_enabled = 1;
fs_info->pending_quota_state = 1;
}
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
......@@ -2415,6 +2526,9 @@ int open_ctree(struct super_block *sb,
" integrity check module %s\n", sb->s_id);
}
#endif
ret = btrfs_read_qgroup_config(fs_info);
if (ret)
goto fail_trans_kthread;
/* do not make disk changes in broken FS */
if (btrfs_super_log_root(disk_super) != 0 &&
......@@ -2425,7 +2539,7 @@ int open_ctree(struct super_block *sb,
printk(KERN_WARNING "Btrfs log replay required "
"on RO media\n");
err = -EIO;
goto fail_trans_kthread;
goto fail_qgroup;
}
blocksize =
btrfs_level_size(tree_root,
......@@ -2434,7 +2548,7 @@ int open_ctree(struct super_block *sb,
log_tree_root = btrfs_alloc_root(fs_info);
if (!log_tree_root) {
err = -ENOMEM;
goto fail_trans_kthread;
goto fail_qgroup;
}
__setup_root(nodesize, leafsize, sectorsize, stripesize,
......@@ -2466,15 +2580,15 @@ int open_ctree(struct super_block *sb,
if (!(sb->s_flags & MS_RDONLY)) {
ret = btrfs_cleanup_fs_roots(fs_info);
if (ret) {
}
if (ret)
goto fail_trans_kthread;
ret = btrfs_recover_relocation(tree_root);
if (ret < 0) {
printk(KERN_WARNING
"btrfs: failed to recover relocation\n");
err = -EINVAL;
goto fail_trans_kthread;
goto fail_qgroup;
}
}
......@@ -2484,10 +2598,10 @@ int open_ctree(struct super_block *sb,
fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
if (!fs_info->fs_root)
goto fail_trans_kthread;
goto fail_qgroup;
if (IS_ERR(fs_info->fs_root)) {
err = PTR_ERR(fs_info->fs_root);
goto fail_trans_kthread;
goto fail_qgroup;
}
if (sb->s_flags & MS_RDONLY)
......@@ -2511,6 +2625,8 @@ int open_ctree(struct super_block *sb,
return 0;
fail_qgroup:
btrfs_free_qgroup_config(fs_info);
fail_trans_kthread:
kthread_stop(fs_info->transaction_kthread);
fail_cleaner:
......@@ -3109,6 +3225,8 @@ int close_ctree(struct btrfs_root *root)
fs_info->closing = 2;
smp_mb();
btrfs_free_qgroup_config(root->fs_info);
if (fs_info->delalloc_bytes) {
printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
(unsigned long long)fs_info->delalloc_bytes);
......@@ -3128,6 +3246,10 @@ int close_ctree(struct btrfs_root *root)
free_extent_buffer(fs_info->dev_root->commit_root);
free_extent_buffer(fs_info->csum_root->node);
free_extent_buffer(fs_info->csum_root->commit_root);
if (fs_info->quota_root) {
free_extent_buffer(fs_info->quota_root->node);
free_extent_buffer(fs_info->quota_root->commit_root);
}
btrfs_free_block_groups(fs_info);
......@@ -3258,7 +3380,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
return btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
}
static int btree_lock_page_hook(struct page *page, void *data,
int btree_lock_page_hook(struct page *page, void *data,
void (*flush_fn)(void *))
{
struct inode *inode = page->mapping->host;
......
......@@ -89,6 +89,12 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
int btrfs_cleanup_transaction(struct btrfs_root *root);
void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
struct btrfs_root *root);
void btrfs_abort_devices(struct btrfs_root *root);
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info,
u64 objectid);
int btree_lock_page_hook(struct page *page, void *data,
void (*flush_fn)(void *));
#ifdef CONFIG_DEBUG_LOCK_ALLOC
void btrfs_init_lockdep(void);
......
This diff is collapsed.
......@@ -1919,7 +1919,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
return -EIO;
}
printk_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu "
printk_ratelimited_in_rcu(KERN_INFO "btrfs read error corrected: ino %lu off %llu "
"(dev %s sector %llu)\n", page->mapping->host->i_ino,
start, rcu_str_deref(dev->name), sector);
......@@ -3078,8 +3078,15 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb,
}
}
/*
* We need to do this to prevent races in people who check if the eb is
* under IO since we can end up having no IO bits set for a short period
* of time.
*/
spin_lock(&eb->refs_lock);
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
spin_unlock(&eb->refs_lock);
btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
spin_lock(&fs_info->delalloc_lock);
if (fs_info->dirty_metadata_bytes >= eb->len)
......@@ -3088,6 +3095,8 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb,
WARN_ON(1);
spin_unlock(&fs_info->delalloc_lock);
ret = 1;
} else {
spin_unlock(&eb->refs_lock);
}
btrfs_tree_unlock(eb);
......@@ -3558,19 +3567,38 @@ int extent_readpages(struct extent_io_tree *tree,
struct bio *bio = NULL;
unsigned page_idx;
unsigned long bio_flags = 0;
struct page *pagepool[16];
struct page *page;
int i = 0;
int nr = 0;
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = list_entry(pages->prev, struct page, lru);
page = list_entry(pages->prev, struct page, lru);
prefetchw(&page->flags);
list_del(&page->lru);
if (!add_to_page_cache_lru(page, mapping,
if (add_to_page_cache_lru(page, mapping,
page->index, GFP_NOFS)) {
__extent_read_full_page(tree, page, get_extent,
&bio, 0, &bio_flags);
page_cache_release(page);
continue;
}
page_cache_release(page);
pagepool[nr++] = page;
if (nr < ARRAY_SIZE(pagepool))
continue;
for (i = 0; i < nr; i++) {
__extent_read_full_page(tree, pagepool[i], get_extent,
&bio, 0, &bio_flags);
page_cache_release(pagepool[i]);
}
nr = 0;
}
for (i = 0; i < nr; i++) {
__extent_read_full_page(tree, pagepool[i], get_extent,
&bio, 0, &bio_flags);
page_cache_release(pagepool[i]);
}
BUG_ON(!list_empty(pages));
if (bio)
return submit_one_bio(READ, bio, 0, bio_flags);
......@@ -4124,11 +4152,10 @@ static void check_buffer_tree_ref(struct extent_buffer *eb)
* So bump the ref count first, then set the bit. If someone
* beat us to it, drop the ref we added.
*/
if (!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags)) {
spin_lock(&eb->refs_lock);
if (!test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
atomic_inc(&eb->refs);
if (test_and_set_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags))
atomic_dec(&eb->refs);
}
spin_unlock(&eb->refs_lock);
}
static void mark_extent_buffer_accessed(struct extent_buffer *eb)
......@@ -4240,9 +4267,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
goto free_eb;
}
/* add one reference for the tree */
spin_lock(&eb->refs_lock);
check_buffer_tree_ref(eb);
spin_unlock(&eb->refs_lock);
spin_unlock(&tree->buffer_lock);
radix_tree_preload_end();
......@@ -4301,7 +4326,7 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
}
/* Expects to have eb->eb_lock already held */
static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
{
WARN_ON(atomic_read(&eb->refs) == 0);
if (atomic_dec_and_test(&eb->refs)) {
......@@ -4322,9 +4347,11 @@ static void release_extent_buffer(struct extent_buffer *eb, gfp_t mask)
btrfs_release_extent_buffer_page(eb, 0);
call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
return;
return 1;
}
spin_unlock(&eb->refs_lock);
return 0;
}
void free_extent_buffer(struct extent_buffer *eb)
......@@ -4963,7 +4990,6 @@ int try_release_extent_buffer(struct page *page, gfp_t mask)
spin_unlock(&eb->refs_lock);
return 0;
}
release_extent_buffer(eb, mask);
return 1;
return release_extent_buffer(eb, mask);
}
......@@ -183,7 +183,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
* read from the commit root and sidestep a nasty deadlock
* between reading the free space cache and updating the csum tree.
*/
if (btrfs_is_free_space_inode(root, inode)) {
if (btrfs_is_free_space_inode(inode)) {
path->search_commit_root = 1;
path->skip_locking = 1;
}
......@@ -690,6 +690,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
return -ENOMEM;
sector_sum = sums->sums;
trans->adding_csums = 1;
again:
next_offset = (u64)-1;
found_next = 0;
......@@ -853,6 +854,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
goto again;
}
out:
trans->adding_csums = 0;
btrfs_free_path(path);
return ret;
......
......@@ -1968,7 +1968,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
for (n = rb_first(&ctl->free_space_offset); n; n = rb_next(n)) {
info = rb_entry(n, struct btrfs_free_space, offset_index);
if (info->bytes >= bytes)
if (info->bytes >= bytes && !block_group->ro)
count++;
printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
(unsigned long long)info->offset,
......
......@@ -825,7 +825,7 @@ static noinline int cow_file_range(struct inode *inode,
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
BUG_ON(btrfs_is_free_space_inode(root, inode));
BUG_ON(btrfs_is_free_space_inode(inode));
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
extent_clear_unlock_delalloc(inode,
......@@ -1010,7 +1010,7 @@ static noinline void async_cow_submit(struct btrfs_work *work)
atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages);
if (atomic_read(&root->fs_info->async_delalloc_pages) <
5 * 1042 * 1024 &&
5 * 1024 * 1024 &&
waitqueue_active(&root->fs_info->async_submit_wait))
wake_up(&root->fs_info->async_submit_wait);
......@@ -1035,7 +1035,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
struct btrfs_root *root = BTRFS_I(inode)->root;
unsigned long nr_pages;
u64 cur_end;
int limit = 10 * 1024 * 1042;
int limit = 10 * 1024 * 1024;
clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED,
1, 0, NULL, GFP_NOFS);
......@@ -1153,7 +1153,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
return -ENOMEM;
}
nolock = btrfs_is_free_space_inode(root, inode);
nolock = btrfs_is_free_space_inode(inode);
if (nolock)
trans = btrfs_join_transaction_nolock(root);
......@@ -1466,7 +1466,7 @@ static void btrfs_set_bit_hook(struct inode *inode,
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
bool do_list = !btrfs_is_free_space_inode(root, inode);
bool do_list = !btrfs_is_free_space_inode(inode);
if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC;
......@@ -1501,7 +1501,7 @@ static void btrfs_clear_bit_hook(struct inode *inode,
if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
bool do_list = !btrfs_is_free_space_inode(root, inode);
bool do_list = !btrfs_is_free_space_inode(inode);
if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC;
......@@ -1612,7 +1612,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
if (btrfs_is_free_space_inode(root, inode))
if (btrfs_is_free_space_inode(inode))
metadata = 2;
if (!(rw & REQ_WRITE)) {
......@@ -1869,7 +1869,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
int ret;
bool nolock;
nolock = btrfs_is_free_space_inode(root, inode);
nolock = btrfs_is_free_space_inode(inode);
if (test_bit(BTRFS_ORDERED_IOERR, &ordered_extent->flags)) {
ret = -EIO;
......@@ -2007,7 +2007,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
ordered_extent->work.func = finish_ordered_fn;
ordered_extent->work.flags = 0;
if (btrfs_is_free_space_inode(root, inode))
if (btrfs_is_free_space_inode(inode))
workers = &root->fs_info->endio_freespace_worker;
else
workers = &root->fs_info->endio_write_workers;
......@@ -2732,8 +2732,10 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
* The data relocation inode should also be directly updated
* without delay
*/
if (!btrfs_is_free_space_inode(root, inode)
if (!btrfs_is_free_space_inode(inode)
&& root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
btrfs_update_root_times(trans, root);
ret = btrfs_delayed_update_inode(trans, root, inode);
if (!ret)
btrfs_set_inode_last_trans(trans, inode);
......@@ -2833,7 +2835,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
inode_inc_iversion(inode);
inode_inc_iversion(dir);
inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
btrfs_update_inode(trans, root, dir);
ret = btrfs_update_inode(trans, root, dir);
out:
return ret;
}
......@@ -3743,7 +3745,7 @@ void btrfs_evict_inode(struct inode *inode)
truncate_inode_pages(&inode->i_data, 0);
if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
btrfs_is_free_space_inode(root, inode)))
btrfs_is_free_space_inode(inode)))
goto no_delete;
if (is_bad_inode(inode)) {
......@@ -4082,7 +4084,6 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
struct btrfs_iget_args *args = p;
inode->i_ino = args->ino;
BTRFS_I(inode)->root = args->root;
btrfs_set_inode_space_info(args->root, inode);
return 0;
}
......@@ -4457,7 +4458,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
if (test_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags))
return 0;
if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(inode))
nolock = true;
if (wbc->sync_mode == WB_SYNC_ALL) {
......@@ -4518,6 +4519,11 @@ int btrfs_dirty_inode(struct inode *inode)
static int btrfs_update_time(struct inode *inode, struct timespec *now,
int flags)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
if (btrfs_root_readonly(root))
return -EROFS;
if (flags & S_VERSION)
inode_inc_iversion(inode);
if (flags & S_CTIME)
......@@ -4662,7 +4668,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
BTRFS_I(inode)->root = root;
BTRFS_I(inode)->generation = trans->transid;
inode->i_generation = BTRFS_I(inode)->generation;
btrfs_set_inode_space_info(root, inode);
if (S_ISDIR(mode))
owner = 0;
......@@ -4690,6 +4695,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
memset_extent_buffer(path->nodes[0], 0, (unsigned long)inode_item,
sizeof(*inode_item));
fill_inode_item(trans, path->nodes[0], inode_item, inode);
ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
......@@ -4723,6 +4730,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
trace_btrfs_inode_new(inode);
btrfs_set_inode_last_trans(trans, inode);
btrfs_update_root_times(trans, root);
return inode;
fail:
if (dir)
......@@ -6939,7 +6948,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
return NULL;
ei->root = NULL;
ei->space_info = NULL;
ei->generation = 0;
ei->last_trans = 0;
ei->last_sub_trans = 0;
......@@ -7046,7 +7054,7 @@ int btrfs_drop_inode(struct inode *inode)
struct btrfs_root *root = BTRFS_I(inode)->root;
if (btrfs_root_refs(&root->root_item) == 0 &&
!btrfs_is_free_space_inode(root, inode))
!btrfs_is_free_space_inode(inode))
return 1;
else
return generic_drop_inode(inode);
......
This diff is collapsed.
......@@ -32,15 +32,46 @@ struct btrfs_ioctl_vol_args {
#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2)
#define BTRFS_FSID_SIZE 16
#define BTRFS_UUID_SIZE 16
#define BTRFS_QGROUP_INHERIT_SET_LIMITS (1ULL << 0)
struct btrfs_qgroup_limit {
__u64 flags;
__u64 max_rfer;
__u64 max_excl;
__u64 rsv_rfer;
__u64 rsv_excl;
};
struct btrfs_qgroup_inherit {
__u64 flags;
__u64 num_qgroups;
__u64 num_ref_copies;
__u64 num_excl_copies;
struct btrfs_qgroup_limit lim;
__u64 qgroups[0];
};
struct btrfs_ioctl_qgroup_limit_args {
__u64 qgroupid;
struct btrfs_qgroup_limit lim;
};
#define BTRFS_SUBVOL_NAME_MAX 4039
struct btrfs_ioctl_vol_args_v2 {
__s64 fd;
__u64 transid;
__u64 flags;
__u64 unused[4];
union {
struct {
__u64 size;
struct btrfs_qgroup_inherit __user *qgroup_inherit;
};
__u64 unused[4];
};
char name[BTRFS_SUBVOL_NAME_MAX + 1];
};
......@@ -285,9 +316,13 @@ enum btrfs_dev_stat_values {
BTRFS_DEV_STAT_VALUES_MAX
};
/* Reset statistics after reading; needs SYS_ADMIN capability */
#define BTRFS_DEV_STATS_RESET (1ULL << 0)
struct btrfs_ioctl_get_dev_stats {
__u64 devid; /* in */
__u64 nr_items; /* in/out */
__u64 flags; /* in/out */
/* out values: */
__u64 values[BTRFS_DEV_STAT_VALUES_MAX];
......@@ -295,6 +330,48 @@ struct btrfs_ioctl_get_dev_stats {
__u64 unused[128 - 2 - BTRFS_DEV_STAT_VALUES_MAX]; /* pad to 1k */
};
#define BTRFS_QUOTA_CTL_ENABLE 1
#define BTRFS_QUOTA_CTL_DISABLE 2
#define BTRFS_QUOTA_CTL_RESCAN 3
struct btrfs_ioctl_quota_ctl_args {
__u64 cmd;
__u64 status;
};
struct btrfs_ioctl_qgroup_assign_args {
__u64 assign;
__u64 src;
__u64 dst;
};
struct btrfs_ioctl_qgroup_create_args {
__u64 create;
__u64 qgroupid;
};
struct btrfs_ioctl_timespec {
__u64 sec;
__u32 nsec;
};
struct btrfs_ioctl_received_subvol_args {
char uuid[BTRFS_UUID_SIZE]; /* in */
__u64 stransid; /* in */
__u64 rtransid; /* out */
struct btrfs_ioctl_timespec stime; /* in */
struct btrfs_ioctl_timespec rtime; /* out */
__u64 flags; /* in */
__u64 reserved[16]; /* in */
};
struct btrfs_ioctl_send_args {
__s64 send_fd; /* in */
__u64 clone_sources_count; /* in */
__u64 __user *clone_sources; /* in */
__u64 parent_root; /* in */
__u64 flags; /* in */
__u64 reserved[4]; /* in */
};
#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
......@@ -339,6 +416,8 @@ struct btrfs_ioctl_get_dev_stats {
#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
struct btrfs_ioctl_vol_args_v2)
#define BTRFS_IOC_SUBVOL_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 24, \
struct btrfs_ioctl_vol_args_v2)
#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
#define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
......@@ -359,9 +438,19 @@ struct btrfs_ioctl_get_dev_stats {
struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \
struct btrfs_ioctl_received_subvol_args)
#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct btrfs_ioctl_send_args)
#define BTRFS_IOC_DEVICES_READY _IOR(BTRFS_IOCTL_MAGIC, 39, \
struct btrfs_ioctl_vol_args)
#define BTRFS_IOC_QUOTA_CTL _IOWR(BTRFS_IOCTL_MAGIC, 40, \
struct btrfs_ioctl_quota_ctl_args)
#define BTRFS_IOC_QGROUP_ASSIGN _IOW(BTRFS_IOCTL_MAGIC, 41, \
struct btrfs_ioctl_qgroup_assign_args)
#define BTRFS_IOC_QGROUP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 42, \
struct btrfs_ioctl_qgroup_create_args)
#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
struct btrfs_ioctl_qgroup_limit_args)
#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
struct btrfs_ioctl_get_dev_stats)
#define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \
struct btrfs_ioctl_get_dev_stats)
#endif
......@@ -78,13 +78,15 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw)
write_lock(&eb->lock);
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_inc(&eb->spinning_writers);
if (atomic_dec_and_test(&eb->blocking_writers))
if (atomic_dec_and_test(&eb->blocking_writers) &&
waitqueue_active(&eb->write_lock_wq))
wake_up(&eb->write_lock_wq);
} else if (rw == BTRFS_READ_LOCK_BLOCKING) {
BUG_ON(atomic_read(&eb->blocking_readers) == 0);
read_lock(&eb->lock);
atomic_inc(&eb->spinning_readers);
if (atomic_dec_and_test(&eb->blocking_readers))
if (atomic_dec_and_test(&eb->blocking_readers) &&
waitqueue_active(&eb->read_lock_wq))
wake_up(&eb->read_lock_wq);
}
return;
......@@ -199,7 +201,8 @@ void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb)
}
btrfs_assert_tree_read_locked(eb);
WARN_ON(atomic_read(&eb->blocking_readers) == 0);
if (atomic_dec_and_test(&eb->blocking_readers))
if (atomic_dec_and_test(&eb->blocking_readers) &&
waitqueue_active(&eb->read_lock_wq))
wake_up(&eb->read_lock_wq);
atomic_dec(&eb->read_locks);
}
......@@ -247,8 +250,9 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
if (blockers) {
WARN_ON(atomic_read(&eb->spinning_writers));
atomic_dec(&eb->blocking_writers);
smp_wmb();
wake_up(&eb->write_lock_wq);
smp_mb();
if (waitqueue_active(&eb->write_lock_wq))
wake_up(&eb->write_lock_wq);
} else {
WARN_ON(atomic_read(&eb->spinning_writers) != 1);
atomic_dec(&eb->spinning_writers);
......
This diff is collapsed.
......@@ -1239,10 +1239,11 @@ static int __must_check __add_reloc_root(struct btrfs_root *root)
node->bytenr, &node->rb_node);
spin_unlock(&rc->reloc_root_tree.lock);
if (rb_node) {
kfree(node);
btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found "
"for start=%llu while inserting into relocation "
"tree\n");
kfree(node);
return -EEXIST;
}
list_add_tail(&root->root_list, &rc->reloc_roots);
......
......@@ -16,11 +16,54 @@
* Boston, MA 021110-1307, USA.
*/
#include <linux/uuid.h>
#include "ctree.h"
#include "transaction.h"
#include "disk-io.h"
#include "print-tree.h"
/*
* Read a root item from the tree. In case we detect a root item smaller then
* sizeof(root_item), we know it's an old version of the root structure and
* initialize all new fields to zero. The same happens if we detect mismatching
* generation numbers as then we know the root was once mounted with an older
* kernel that was not aware of the root item structure change.
*/
void btrfs_read_root_item(struct btrfs_root *root,
struct extent_buffer *eb, int slot,
struct btrfs_root_item *item)
{
uuid_le uuid;
int len;
int need_reset = 0;
len = btrfs_item_size_nr(eb, slot);
read_extent_buffer(eb, item, btrfs_item_ptr_offset(eb, slot),
min_t(int, len, (int)sizeof(*item)));
if (len < sizeof(*item))
need_reset = 1;
if (!need_reset && btrfs_root_generation(item)
!= btrfs_root_generation_v2(item)) {
if (btrfs_root_generation_v2(item) != 0) {
printk(KERN_WARNING "btrfs: mismatching "
"generation and generation_v2 "
"found in root item. This root "
"was probably mounted with an "
"older kernel. Resetting all "
"new fields.\n");
}
need_reset = 1;
}
if (need_reset) {
memset(&item->generation_v2, 0,
sizeof(*item) - offsetof(struct btrfs_root_item,
generation_v2));
uuid_le_gen(&uuid);
memcpy(item->uuid, uuid.b, BTRFS_UUID_SIZE);
}
}
/*
* lookup the root with the highest offset for a given objectid. The key we do
* find is copied into 'key'. If we find something return 0, otherwise 1, < 0
......@@ -61,10 +104,10 @@ int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
goto out;
}
if (item)
read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
sizeof(*item));
btrfs_read_root_item(root, l, slot, item);
if (key)
memcpy(key, &found_key, sizeof(found_key));
ret = 0;
out:
btrfs_free_path(path);
......@@ -91,16 +134,15 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
int ret;
int slot;
unsigned long ptr;
int old_len;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
if (ret < 0) {
btrfs_abort_transaction(trans, root, ret);
goto out;
}
if (ret < 0)
goto out_abort;
if (ret != 0) {
btrfs_print_leaf(root, path->nodes[0]);
......@@ -113,16 +155,56 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
l = path->nodes[0];
slot = path->slots[0];
ptr = btrfs_item_ptr_offset(l, slot);
old_len = btrfs_item_size_nr(l, slot);
/*
* If this is the first time we update the root item which originated
* from an older kernel, we need to enlarge the item size to make room
* for the added fields.
*/
if (old_len < sizeof(*item)) {
btrfs_release_path(path);
ret = btrfs_search_slot(trans, root, key, path,
-1, 1);
if (ret < 0)
goto out_abort;
ret = btrfs_del_item(trans, root, path);
if (ret < 0)
goto out_abort;
btrfs_release_path(path);
ret = btrfs_insert_empty_item(trans, root, path,
key, sizeof(*item));
if (ret < 0)
goto out_abort;
l = path->nodes[0];
slot = path->slots[0];
ptr = btrfs_item_ptr_offset(l, slot);
}
/*
* Update generation_v2 so at the next mount we know the new root
* fields are valid.
*/
btrfs_set_root_generation_v2(item, btrfs_root_generation(item));
write_extent_buffer(l, item, ptr, sizeof(*item));
btrfs_mark_buffer_dirty(path->nodes[0]);
out:
btrfs_free_path(path);
return ret;
out_abort:
btrfs_abort_transaction(trans, root, ret);
goto out;
}
int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_key *key, struct btrfs_root_item *item)
{
/*
* Make sure generation v1 and v2 match. See update_root for details.
*/
btrfs_set_root_generation_v2(item, btrfs_root_generation(item));
return btrfs_insert_item(trans, root, key, item, sizeof(*item));
}
......@@ -454,3 +536,16 @@ void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item)
root_item->byte_limit = 0;
}
}
void btrfs_update_root_times(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_root_item *item = &root->root_item;
struct timespec ct = CURRENT_TIME;
spin_lock(&root->root_times_lock);
item->ctransid = trans->transid;
item->ctime.sec = cpu_to_le64(ct.tv_sec);
item->ctime.nsec = cpu_to_le64(ct.tv_nsec);
spin_unlock(&root->root_times_lock);
}
This diff is collapsed.
/*
* Copyright (C) 2012 Alexander Block. All rights reserved.
* Copyright (C) 2012 STRATO. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include "ctree.h"
#define BTRFS_SEND_STREAM_MAGIC "btrfs-stream"
#define BTRFS_SEND_STREAM_VERSION 1
#define BTRFS_SEND_BUF_SIZE (1024 * 64)
#define BTRFS_SEND_READ_SIZE (1024 * 48)
enum btrfs_tlv_type {
BTRFS_TLV_U8,
BTRFS_TLV_U16,
BTRFS_TLV_U32,
BTRFS_TLV_U64,
BTRFS_TLV_BINARY,
BTRFS_TLV_STRING,
BTRFS_TLV_UUID,
BTRFS_TLV_TIMESPEC,
};
struct btrfs_stream_header {
char magic[sizeof(BTRFS_SEND_STREAM_MAGIC)];
__le32 version;
} __attribute__ ((__packed__));
struct btrfs_cmd_header {
/* len excluding the header */
__le32 len;
__le16 cmd;
/* crc including the header with zero crc field */
__le32 crc;
} __attribute__ ((__packed__));
struct btrfs_tlv_header {
__le16 tlv_type;
/* len excluding the header */
__le16 tlv_len;
} __attribute__ ((__packed__));
/* commands */
enum btrfs_send_cmd {
BTRFS_SEND_C_UNSPEC,
BTRFS_SEND_C_SUBVOL,
BTRFS_SEND_C_SNAPSHOT,
BTRFS_SEND_C_MKFILE,
BTRFS_SEND_C_MKDIR,
BTRFS_SEND_C_MKNOD,
BTRFS_SEND_C_MKFIFO,
BTRFS_SEND_C_MKSOCK,
BTRFS_SEND_C_SYMLINK,
BTRFS_SEND_C_RENAME,
BTRFS_SEND_C_LINK,
BTRFS_SEND_C_UNLINK,
BTRFS_SEND_C_RMDIR,
BTRFS_SEND_C_SET_XATTR,
BTRFS_SEND_C_REMOVE_XATTR,
BTRFS_SEND_C_WRITE,
BTRFS_SEND_C_CLONE,
BTRFS_SEND_C_TRUNCATE,
BTRFS_SEND_C_CHMOD,
BTRFS_SEND_C_CHOWN,
BTRFS_SEND_C_UTIMES,
BTRFS_SEND_C_END,
__BTRFS_SEND_C_MAX,
};
#define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1)
/* attributes in send stream */
enum {
BTRFS_SEND_A_UNSPEC,
BTRFS_SEND_A_UUID,
BTRFS_SEND_A_CTRANSID,
BTRFS_SEND_A_INO,
BTRFS_SEND_A_SIZE,
BTRFS_SEND_A_MODE,
BTRFS_SEND_A_UID,
BTRFS_SEND_A_GID,
BTRFS_SEND_A_RDEV,
BTRFS_SEND_A_CTIME,
BTRFS_SEND_A_MTIME,
BTRFS_SEND_A_ATIME,
BTRFS_SEND_A_OTIME,
BTRFS_SEND_A_XATTR_NAME,
BTRFS_SEND_A_XATTR_DATA,
BTRFS_SEND_A_PATH,
BTRFS_SEND_A_PATH_TO,
BTRFS_SEND_A_PATH_LINK,
BTRFS_SEND_A_FILE_OFFSET,
BTRFS_SEND_A_DATA,
BTRFS_SEND_A_CLONE_UUID,
BTRFS_SEND_A_CLONE_CTRANSID,
BTRFS_SEND_A_CLONE_PATH,
BTRFS_SEND_A_CLONE_OFFSET,
BTRFS_SEND_A_CLONE_LEN,
__BTRFS_SEND_A_MAX,
};
#define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
#ifdef __KERNEL__
long btrfs_ioctl_send(struct file *mnt_file, void __user *arg);
#endif
......@@ -17,15 +17,27 @@
*/
#include <linux/highmem.h>
#include <asm/unaligned.h>
/* this is some deeply nasty code. ctree.h has a different
* definition for this BTRFS_SETGET_FUNCS macro, behind a #ifndef
#include "ctree.h"
static inline u8 get_unaligned_le8(const void *p)
{
return *(u8 *)p;
}
static inline void put_unaligned_le8(u8 val, void *p)
{
*(u8 *)p = val;
}
/*
* this is some deeply nasty code.
*
* The end result is that anyone who #includes ctree.h gets a
* declaration for the btrfs_set_foo functions and btrfs_foo functions
*
* This file declares the macros and then #includes ctree.h, which results
* in cpp creating the function here based on the template below.
* declaration for the btrfs_set_foo functions and btrfs_foo functions,
* which are wappers of btrfs_set_token_#bits functions and
* btrfs_get_token_#bits functions, which are defined in this file.
*
* These setget functions do all the extent_buffer related mapping
* required to efficiently read and write specific fields in the extent
......@@ -33,103 +45,93 @@
* an unsigned long offset into the extent buffer which has been
* cast to a specific type. This gives us all the gcc type checking.
*
* The extent buffer api is used to do all the kmapping and page
* spanning work required to get extent buffers in highmem and have
* a metadata blocksize different from the page size.
*
* The macro starts with a simple function prototype declaration so that
* sparse won't complain about it being static.
* The extent buffer api is used to do the page spanning work required to
* have a metadata blocksize different from the page size.
*/
#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
u##bits btrfs_##name(struct extent_buffer *eb, type *s); \
void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); \
void btrfs_set_token_##name(struct extent_buffer *eb, type *s, u##bits val, struct btrfs_map_token *token); \
u##bits btrfs_token_##name(struct extent_buffer *eb, \
type *s, struct btrfs_map_token *token) \
#define DEFINE_BTRFS_SETGET_BITS(bits) \
u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \
unsigned long off, \
struct btrfs_map_token *token) \
{ \
unsigned long part_offset = (unsigned long)s; \
unsigned long offset = part_offset + offsetof(type, member); \
type *p; \
int err; \
char *kaddr; \
unsigned long map_start; \
unsigned long map_len; \
unsigned long mem_len = sizeof(((type *)0)->member); \
u##bits res; \
if (token && token->kaddr && token->offset <= offset && \
token->eb == eb && \
(token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \
kaddr = token->kaddr; \
p = (type *)(kaddr + part_offset - token->offset); \
res = le##bits##_to_cpu(p->member); \
return res; \
} \
err = map_private_extent_buffer(eb, offset, \
mem_len, \
&kaddr, &map_start, &map_len); \
if (err) { \
__le##bits leres; \
read_eb_member(eb, s, type, member, &leres); \
return le##bits##_to_cpu(leres); \
} \
p = (type *)(kaddr + part_offset - map_start); \
res = le##bits##_to_cpu(p->member); \
if (token) { \
token->kaddr = kaddr; \
token->offset = map_start; \
token->eb = eb; \
} \
return res; \
unsigned long part_offset = (unsigned long)ptr; \
unsigned long offset = part_offset + off; \
void *p; \
int err; \
char *kaddr; \
unsigned long map_start; \
unsigned long map_len; \
int size = sizeof(u##bits); \
u##bits res; \
\
if (token && token->kaddr && token->offset <= offset && \
token->eb == eb && \
(token->offset + PAGE_CACHE_SIZE >= offset + size)) { \
kaddr = token->kaddr; \
p = kaddr + part_offset - token->offset; \
res = get_unaligned_le##bits(p + off); \
return res; \
} \
err = map_private_extent_buffer(eb, offset, size, \
&kaddr, &map_start, &map_len); \
if (err) { \
__le##bits leres; \
\
read_extent_buffer(eb, &leres, offset, size); \
return le##bits##_to_cpu(leres); \
} \
p = kaddr + part_offset - map_start; \
res = get_unaligned_le##bits(p + off); \
if (token) { \
token->kaddr = kaddr; \
token->offset = map_start; \
token->eb = eb; \
} \
return res; \
} \
void btrfs_set_token_##name(struct extent_buffer *eb, \
type *s, u##bits val, struct btrfs_map_token *token) \
void btrfs_set_token_##bits(struct extent_buffer *eb, \
void *ptr, unsigned long off, u##bits val, \
struct btrfs_map_token *token) \
{ \
unsigned long part_offset = (unsigned long)s; \
unsigned long offset = part_offset + offsetof(type, member); \
type *p; \
int err; \
char *kaddr; \
unsigned long map_start; \
unsigned long map_len; \
unsigned long mem_len = sizeof(((type *)0)->member); \
if (token && token->kaddr && token->offset <= offset && \
token->eb == eb && \
(token->offset + PAGE_CACHE_SIZE >= offset + mem_len)) { \
kaddr = token->kaddr; \
p = (type *)(kaddr + part_offset - token->offset); \
p->member = cpu_to_le##bits(val); \
return; \
} \
err = map_private_extent_buffer(eb, offset, \
mem_len, \
&kaddr, &map_start, &map_len); \
if (err) { \
__le##bits val2; \
val2 = cpu_to_le##bits(val); \
write_eb_member(eb, s, type, member, &val2); \
return; \
} \
p = (type *)(kaddr + part_offset - map_start); \
p->member = cpu_to_le##bits(val); \
if (token) { \
token->kaddr = kaddr; \
token->offset = map_start; \
token->eb = eb; \
} \
} \
void btrfs_set_##name(struct extent_buffer *eb, \
type *s, u##bits val) \
{ \
btrfs_set_token_##name(eb, s, val, NULL); \
} \
u##bits btrfs_##name(struct extent_buffer *eb, \
type *s) \
{ \
return btrfs_token_##name(eb, s, NULL); \
} \
unsigned long part_offset = (unsigned long)ptr; \
unsigned long offset = part_offset + off; \
void *p; \
int err; \
char *kaddr; \
unsigned long map_start; \
unsigned long map_len; \
int size = sizeof(u##bits); \
\
if (token && token->kaddr && token->offset <= offset && \
token->eb == eb && \
(token->offset + PAGE_CACHE_SIZE >= offset + size)) { \
kaddr = token->kaddr; \
p = kaddr + part_offset - token->offset; \
put_unaligned_le##bits(val, p + off); \
return; \
} \
err = map_private_extent_buffer(eb, offset, size, \
&kaddr, &map_start, &map_len); \
if (err) { \
__le##bits val2; \
\
val2 = cpu_to_le##bits(val); \
write_extent_buffer(eb, &val2, offset, size); \
return; \
} \
p = kaddr + part_offset - map_start; \
put_unaligned_le##bits(val, p + off); \
if (token) { \
token->kaddr = kaddr; \
token->offset = map_start; \
token->eb = eb; \
} \
}
#include "ctree.h"
DEFINE_BTRFS_SETGET_BITS(8)
DEFINE_BTRFS_SETGET_BITS(16)
DEFINE_BTRFS_SETGET_BITS(32)
DEFINE_BTRFS_SETGET_BITS(64)
void btrfs_node_key(struct extent_buffer *eb,
struct btrfs_disk_key *disk_key, int nr)
......
......@@ -396,15 +396,23 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
strcmp(args[0].from, "zlib") == 0) {
compress_type = "zlib";
info->compress_type = BTRFS_COMPRESS_ZLIB;
btrfs_set_opt(info->mount_opt, COMPRESS);
} else if (strcmp(args[0].from, "lzo") == 0) {
compress_type = "lzo";
info->compress_type = BTRFS_COMPRESS_LZO;
btrfs_set_opt(info->mount_opt, COMPRESS);
btrfs_set_fs_incompat(info, COMPRESS_LZO);
} else if (strncmp(args[0].from, "no", 2) == 0) {
compress_type = "no";
info->compress_type = BTRFS_COMPRESS_NONE;
btrfs_clear_opt(info->mount_opt, COMPRESS);
btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS);
compress_force = false;
} else {
ret = -EINVAL;
goto out;
}
btrfs_set_opt(info->mount_opt, COMPRESS);
if (compress_force) {
btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
pr_info("btrfs: force %s compression\n",
......@@ -1455,6 +1463,13 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
ret = btrfs_scan_one_device(vol->name, FMODE_READ,
&btrfs_fs_type, &fs_devices);
break;
case BTRFS_IOC_DEVICES_READY:
ret = btrfs_scan_one_device(vol->name, FMODE_READ,
&btrfs_fs_type, &fs_devices);
if (ret)
break;
ret = !(fs_devices->num_devices == fs_devices->total_devices);
break;
}
kfree(vol);
......@@ -1477,16 +1492,6 @@ static int btrfs_unfreeze(struct super_block *sb)
return 0;
}
static void btrfs_fs_dirty_inode(struct inode *inode, int flags)
{
int ret;
ret = btrfs_dirty_inode(inode);
if (ret)
printk_ratelimited(KERN_ERR "btrfs: fail to dirty inode %Lu "
"error %d\n", btrfs_ino(inode), ret);
}
static int btrfs_show_devname(struct seq_file *m, struct dentry *root)
{
struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb);
......@@ -1526,7 +1531,6 @@ static const struct super_operations btrfs_super_ops = {
.show_options = btrfs_show_options,
.show_devname = btrfs_show_devname,
.write_inode = btrfs_write_inode,
.dirty_inode = btrfs_fs_dirty_inode,
.alloc_inode = btrfs_alloc_inode,
.destroy_inode = btrfs_destroy_inode,
.statfs = btrfs_statfs,
......
......@@ -22,6 +22,7 @@
#include <linux/writeback.h>
#include <linux/pagemap.h>
#include <linux/blkdev.h>
#include <linux/uuid.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
......@@ -38,7 +39,6 @@ void put_transaction(struct btrfs_transaction *transaction)
if (atomic_dec_and_test(&transaction->use_count)) {
BUG_ON(!list_empty(&transaction->list));
WARN_ON(transaction->delayed_refs.root.rb_node);
WARN_ON(!list_empty(&transaction->delayed_refs.seq_head));
memset(transaction, 0, sizeof(*transaction));
kmem_cache_free(btrfs_transaction_cachep, transaction);
}
......@@ -100,8 +100,8 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
kmem_cache_free(btrfs_transaction_cachep, cur_trans);
cur_trans = fs_info->running_transaction;
goto loop;
} else if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
spin_unlock(&root->fs_info->trans_lock);
} else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
spin_unlock(&fs_info->trans_lock);
kmem_cache_free(btrfs_transaction_cachep, cur_trans);
return -EROFS;
}
......@@ -126,7 +126,6 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
cur_trans->delayed_refs.num_heads = 0;
cur_trans->delayed_refs.flushing = 0;
cur_trans->delayed_refs.run_delayed_start = 0;
cur_trans->delayed_refs.seq = 1;
/*
* although the tree mod log is per file system and not per transaction,
......@@ -145,10 +144,8 @@ static noinline int join_transaction(struct btrfs_root *root, int nofail)
}
atomic_set(&fs_info->tree_mod_seq, 0);
init_waitqueue_head(&cur_trans->delayed_refs.seq_wait);
spin_lock_init(&cur_trans->commit_lock);
spin_lock_init(&cur_trans->delayed_refs.lock);
INIT_LIST_HEAD(&cur_trans->delayed_refs.seq_head);
INIT_LIST_HEAD(&cur_trans->pending_snapshots);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
......@@ -299,6 +296,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
struct btrfs_transaction *cur_trans;
u64 num_bytes = 0;
int ret;
u64 qgroup_reserved = 0;
if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
return ERR_PTR(-EROFS);
......@@ -317,6 +315,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
* the appropriate flushing if need be.
*/
if (num_items > 0 && root != root->fs_info->chunk_root) {
if (root->fs_info->quota_enabled &&
is_fstree(root->root_key.objectid)) {
qgroup_reserved = num_items * root->leafsize;
ret = btrfs_qgroup_reserve(root, qgroup_reserved);
if (ret)
return ERR_PTR(ret);
}
num_bytes = btrfs_calc_trans_metadata_size(root, num_items);
ret = btrfs_block_rsv_add(root,
&root->fs_info->trans_block_rsv,
......@@ -349,11 +355,16 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
h->transaction = cur_trans;
h->blocks_used = 0;
h->bytes_reserved = 0;
h->root = root;
h->delayed_ref_updates = 0;
h->use_count = 1;
h->adding_csums = 0;
h->block_rsv = NULL;
h->orig_rsv = NULL;
h->aborted = 0;
h->qgroup_reserved = qgroup_reserved;
h->delayed_ref_elem.seq = 0;
INIT_LIST_HEAD(&h->qgroup_ref_list);
smp_mb();
if (cur_trans->blocked && may_wait_transaction(root, type)) {
......@@ -473,7 +484,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_block_rsv *rsv = trans->block_rsv;
int updates;
int err;
......@@ -481,12 +491,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
if (cur_trans->blocked || cur_trans->delayed_refs.flushing)
return 1;
/*
* We need to do this in case we're deleting csums so the global block
* rsv get's used instead of the csum block rsv.
*/
trans->block_rsv = NULL;
updates = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
if (updates) {
......@@ -495,8 +499,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
return err;
}
trans->block_rsv = rsv;
return should_end_transaction(trans, root);
}
......@@ -513,8 +515,24 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
return 0;
}
/*
* do the qgroup accounting as early as possible
*/
err = btrfs_delayed_refs_qgroup_accounting(trans, info);
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
/*
* the same root has to be passed to start_transaction and
* end_transaction. Subvolume quota depends on this.
*/
WARN_ON(trans->root != root);
if (trans->qgroup_reserved) {
btrfs_qgroup_free(root, trans->qgroup_reserved);
trans->qgroup_reserved = 0;
}
while (count < 2) {
unsigned long cur = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
......@@ -527,6 +545,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
}
count++;
}
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
should_end_transaction(trans, root)) {
......@@ -567,6 +587,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) {
err = -EIO;
}
assert_qgroups_uptodate(trans);
memset(trans, 0, sizeof(*trans));
kmem_cache_free(btrfs_trans_handle_cachep, trans);
......@@ -785,6 +806,13 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
ret = btrfs_run_dev_stats(trans, root->fs_info);
BUG_ON(ret);
ret = btrfs_run_qgroups(trans, root->fs_info);
BUG_ON(ret);
/* run_qgroups might have added some more refs */
ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
BUG_ON(ret);
while (!list_empty(&fs_info->dirty_cowonly_roots)) {
next = fs_info->dirty_cowonly_roots.next;
list_del_init(next);
......@@ -926,11 +954,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct dentry *dentry;
struct extent_buffer *tmp;
struct extent_buffer *old;
struct timespec cur_time = CURRENT_TIME;
int ret;
u64 to_reserve = 0;
u64 index = 0;
u64 objectid;
u64 root_flags;
uuid_le new_uuid;
rsv = trans->block_rsv;
......@@ -957,6 +987,14 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
}
}
ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid,
objectid, pending->inherit);
kfree(pending->inherit);
if (ret) {
pending->error = ret;
goto fail;
}
key.objectid = objectid;
key.offset = (u64)-1;
key.type = BTRFS_ROOT_ITEM_KEY;
......@@ -1016,6 +1054,20 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY;
btrfs_set_root_flags(new_root_item, root_flags);
btrfs_set_root_generation_v2(new_root_item,
trans->transid);
uuid_le_gen(&new_uuid);
memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
memcpy(new_root_item->parent_uuid, root->root_item.uuid,
BTRFS_UUID_SIZE);
new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec);
btrfs_set_root_otransid(new_root_item, trans->transid);
memset(&new_root_item->stime, 0, sizeof(new_root_item->stime));
memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime));
btrfs_set_root_stransid(new_root_item, 0);
btrfs_set_root_rtransid(new_root_item, 0);
old = btrfs_lock_root_node(root);
ret = btrfs_cow_block(trans, root, old, NULL, 0, &old);
if (ret) {
......@@ -1269,9 +1321,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
btrfs_run_ordered_operations(root, 0);
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
if (cur_trans->aborted)
goto cleanup_transaction;
......@@ -1282,6 +1331,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
if (ret)
goto cleanup_transaction;
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
cur_trans = trans->transaction;
/*
......@@ -1330,7 +1382,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
spin_unlock(&root->fs_info->trans_lock);
}
if (now < cur_trans->start_time || now - cur_trans->start_time < 1)
if (!btrfs_test_opt(root, SSD) &&
(now < cur_trans->start_time || now - cur_trans->start_time < 1))
should_grow = 1;
do {
......@@ -1351,6 +1404,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
if (ret)
goto cleanup_transaction;
/*
* running the delayed items may have added new refs. account
* them now so that they hinder processing of more delayed refs
* as little as possible.
*/
btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info);
/*
* rename don't use btrfs_join_transaction, so, once we
* set the transaction to blocked above, we aren't going
......@@ -1463,6 +1523,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
root->fs_info->chunk_root->node);
switch_commit_root(root->fs_info->chunk_root);
assert_qgroups_uptodate(trans);
update_super_roots(root);
if (!root->fs_info->log_root_recovering) {
......@@ -1532,6 +1593,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
return ret;
cleanup_transaction:
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n");
// WARN_ON(1);
if (current->journal_info == trans)
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -1551,6 +1551,8 @@ void touch_atime(struct path *path)
* Btrfs), but since we touch atime while walking down the path we
* really don't care if we failed to update the atime of the file,
* so just ignore the return value.
* We may also fail on filesystems that have the ability to make parts
* of the fs read only, e.g. subvolumes in Btrfs.
*/
update_time(inode, &now, S_ATIME);
mnt_drop_write(mnt);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment